npm - llmist - Versions diffs - 2.3.0 → 2.5.0 - Mend

llmist 2.3.0 → 2.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

package/README.md +7 -0
package/dist/{chunk-ZDNV7DDO.js → chunk-IHSZUAYN.js} +4 -2
package/dist/chunk-IHSZUAYN.js.map +1 -0
package/dist/{chunk-GANXNBIZ.js → chunk-YHS2DYXP.js} +2839 -579
package/dist/chunk-YHS2DYXP.js.map +1 -0
package/dist/cli.cjs +2717 -198
package/dist/cli.cjs.map +1 -1
package/dist/cli.js +638 -47
package/dist/cli.js.map +1 -1
package/dist/index.cjs +2496 -220
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +109 -20
package/dist/index.d.ts +109 -20
package/dist/index.js +34 -2
package/dist/{mock-stream-wRfUqXx4.d.cts → mock-stream-ga4KIiwX.d.cts} +1121 -12
package/dist/{mock-stream-wRfUqXx4.d.ts → mock-stream-ga4KIiwX.d.ts} +1121 -12
package/dist/testing/index.cjs +2771 -559
package/dist/testing/index.cjs.map +1 -1
package/dist/testing/index.d.cts +2 -2
package/dist/testing/index.d.ts +2 -2
package/dist/testing/index.js +1 -1
package/package.json +1 -1
package/dist/chunk-GANXNBIZ.js.map +0 -1
package/dist/chunk-ZDNV7DDO.js.map +0 -1

package/dist/cli.js CHANGED Viewed

@@ -1,5 +1,5 @@
 #!/usr/bin/env node
-import "./chunk-ZDNV7DDO.js";
+import "./chunk-IHSZUAYN.js";
 import {
   AgentBuilder,
   BaseGadget,
@@ -10,14 +10,20 @@ import {
   LLMMessageBuilder,
   LLMist,
   MODEL_ALIASES,
+  audioFromBuffer,
   createGadget,
   createLogger,
+  detectAudioMimeType,
+  detectImageMimeType,
+  extractText,
+  imageFromBuffer,
   init_builder,
   init_client,
   init_constants,
   init_create_gadget,
   init_exceptions,
   init_gadget,
+  init_input_content,
   init_logger,
   init_messages,
   init_model_shortcuts,
@@ -26,8 +32,9 @@ import {
   init_schema_validator,
   resolveModel,
   schemaToJSONSchema,
+  text,
   validateGadgetSchema
-} from "./chunk-GANXNBIZ.js";
+} from "./chunk-YHS2DYXP.js";
 // src/cli/constants.ts
 var CLI_NAME = "llmist";
@@ -36,7 +43,10 @@ var COMMANDS = {
   complete: "complete",
   agent: "agent",
   models: "models",
-  gadget: "gadget"
+  gadget: "gadget",
+  image: "image",
+  speech: "speech",
+  vision: "vision"
 };
 var LOG_LEVELS = ["silly", "trace", "debug", "info", "warn", "error", "fatal"];
 var DEFAULT_MODEL = "openai:gpt-5-nano";
@@ -57,7 +67,20 @@ var OPTION_FLAGS = {
   docker: "--docker",
   dockerRo: "--docker-ro",
   noDocker: "--no-docker",
-  dockerDev: "--docker-dev"
+  dockerDev: "--docker-dev",
+  // Multimodal input options
+  inputImage: "--image <path>",
+  inputAudio: "--audio <path>",
+  // Image generation options
+  imageSize: "--size <size>",
+  imageQuality: "--quality <quality>",
+  imageCount: "-n, --count <number>",
+  imageOutput: "-o, --output <path>",
+  // Speech generation options
+  voice: "--voice <name>",
+  speechFormat: "--format <format>",
+  speechSpeed: "--speed <value>",
+  speechOutput: "-o, --output <path>"
 };
 var OPTION_DESCRIPTIONS = {
   model: "Model identifier, e.g. openai:gpt-5-nano or anthropic:claude-sonnet-4-5.",
@@ -73,10 +96,23 @@ var OPTION_DESCRIPTIONS = {
   noBuiltins: "Disable built-in gadgets (AskUser, TellUser).",
   noBuiltinInteraction: "Disable interactive gadgets (AskUser) while keeping TellUser.",
   quiet: "Suppress all output except content (text and TellUser messages).",
+  // Multimodal input descriptions
+  inputImage: "Image file to include with the prompt (vision models).",
+  inputAudio: "Audio file to include with the prompt (Gemini only).",
   docker: "Run agent in a Docker sandbox container for security isolation.",
   dockerRo: "Run in Docker with current directory mounted read-only.",
   noDocker: "Disable Docker sandboxing (override config).",
-  dockerDev: "Run in Docker dev mode (mount local source instead of npm install)."
+  dockerDev: "Run in Docker dev mode (mount local source instead of npm install).",
+  // Image generation descriptions
+  imageSize: "Image size/aspect ratio, e.g. '1024x1024', '1:1', '16:9'.",
+  imageQuality: "Image quality: 'standard', 'hd', 'low', 'medium', 'high'.",
+  imageCount: "Number of images to generate (model dependent, usually 1-4).",
+  imageOutput: "Output path for the generated image. Defaults to stdout if not specified.",
+  // Speech generation descriptions
+  voice: "Voice name for speech generation, e.g. 'nova', 'alloy', 'Zephyr'.",
+  speechFormat: "Audio format: 'mp3', 'opus', 'aac', 'flac', 'wav', 'pcm'.",
+  speechSpeed: "Speech speed multiplier (0.25 to 4.0, default 1.0).",
+  speechOutput: "Output path for audio file. Defaults to stdout if not specified."
 };
 var SUMMARY_PREFIX = "[llmist]";
@@ -86,7 +122,7 @@ import { Command, InvalidArgumentError as InvalidArgumentError2 } from "commande
 // package.json
 var package_default = {
   name: "llmist",
-  version: "2.2.0",
+  version: "2.5.0",
   description: "TypeScript LLM client with streaming tool execution. Tools fire mid-stream. Built-in function calling works with any model\u2014no structured outputs or native tool support required.",
   type: "module",
   main: "dist/index.cjs",
@@ -226,6 +262,7 @@ function isAbortError(error) {
 }
 // src/cli/agent-command.ts
+init_input_content();
 init_registry();
 init_constants();
@@ -550,6 +587,75 @@ var finish = createGadget({
 });
 var builtinGadgets = [askUser, tellUser, finish];
+// src/cli/file-utils.ts
+init_input_content();
+import { readFile, stat } from "node:fs/promises";
+import { resolve as resolve2 } from "node:path";
+var DEFAULT_MAX_FILE_SIZE = 50 * 1024 * 1024;
+function formatFileSize(bytes) {
+  if (bytes < 1024) return `${bytes} bytes`;
+  if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`;
+  if (bytes < 1024 * 1024 * 1024) return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
+  return `${(bytes / (1024 * 1024 * 1024)).toFixed(1)} GB`;
+}
+async function checkFileSize(absolutePath, filePath, maxSize) {
+  const stats = await stat(absolutePath);
+  if (stats.size > maxSize) {
+    throw new Error(
+      `File "${filePath}" is too large (${formatFileSize(stats.size)}). Maximum allowed size is ${formatFileSize(maxSize)}. Consider compressing the file or using a smaller version.`
+    );
+  }
+}
+async function readImageFile(filePath, options = {}) {
+  const absolutePath = resolve2(filePath);
+  const maxFileSize = options.maxFileSize ?? DEFAULT_MAX_FILE_SIZE;
+  let buffer;
+  try {
+    await checkFileSize(absolutePath, filePath, maxFileSize);
+    buffer = await readFile(absolutePath);
+  } catch (error) {
+    const message = error instanceof Error ? error.message : String(error);
+    throw new Error(`Failed to read image file "${filePath}": ${message}`);
+  }
+  const mimeType = detectImageMimeType(buffer);
+  if (!mimeType) {
+    throw new Error(
+      `File "${filePath}" is not a supported image format. Supported formats: JPEG, PNG, GIF, WebP`
+    );
+  }
+  return imageFromBuffer(buffer, mimeType);
+}
+async function readAudioFile(filePath, options = {}) {
+  const absolutePath = resolve2(filePath);
+  const maxFileSize = options.maxFileSize ?? DEFAULT_MAX_FILE_SIZE;
+  let buffer;
+  try {
+    await checkFileSize(absolutePath, filePath, maxFileSize);
+    buffer = await readFile(absolutePath);
+  } catch (error) {
+    const message = error instanceof Error ? error.message : String(error);
+    throw new Error(`Failed to read audio file "${filePath}": ${message}`);
+  }
+  const mimeType = detectAudioMimeType(buffer);
+  if (!mimeType) {
+    throw new Error(
+      `File "${filePath}" is not a supported audio format. Supported formats: MP3, WAV, OGG, WebM`
+    );
+  }
+  return audioFromBuffer(buffer, mimeType);
+}
+async function readFileBuffer(filePath, options = {}) {
+  const absolutePath = resolve2(filePath);
+  const maxFileSize = options.maxFileSize ?? DEFAULT_MAX_FILE_SIZE;
+  try {
+    await checkFileSize(absolutePath, filePath, maxFileSize);
+    return await readFile(absolutePath);
+  } catch (error) {
+    const message = error instanceof Error ? error.message : String(error);
+    throw new Error(`Failed to read file "${filePath}": ${message}`);
+  }
+}
 // src/cli/gadgets.ts
 init_gadget();
 import fs5 from "node:fs";
@@ -713,7 +819,7 @@ ${formattedList}`;
 // src/cli/builtins/filesystem/read-file.ts
 import fs3 from "node:fs";
 import { z as z3 } from "zod";
-var readFile = createGadget({
+var readFile2 = createGadget({
   name: "ReadFile",
   description: "Read the entire content of a file and return it as text. The file path must be within the current working directory or its subdirectories.",
   schema: z3.object({
@@ -984,7 +1090,7 @@ error: ${message}`;
 // src/cli/builtins/index.ts
 var builtinGadgetRegistry = {
   ListDirectory: listDirectory,
-  ReadFile: readFile,
+  ReadFile: readFile2,
   WriteFile: writeFile,
   EditFile: editFile,
   RunCommand: runCommand
@@ -1119,6 +1225,7 @@ async function loadGadgets(specifiers, cwd, importer = (specifier) => import(spe
 }
 // src/cli/llm-logging.ts
+init_messages();
 import { mkdir, writeFile as writeFile2 } from "node:fs/promises";
 import { homedir } from "node:os";
 import { join } from "node:path";
@@ -1136,7 +1243,7 @@ function formatLlmRequest(messages) {
   const lines = [];
   for (const msg of messages) {
     lines.push(`=== ${msg.role.toUpperCase()} ===`);
-    lines.push(msg.content ?? "");
+    lines.push(msg.content ? extractText(msg.content) : "");
     lines.push("");
   }
   return lines.join("\n");
@@ -1210,9 +1317,9 @@ function ensureMarkedConfigured() {
     markedConfigured = true;
   }
 }
-function renderMarkdown(text) {
+function renderMarkdown(text2) {
   ensureMarkedConfigured();
-  let rendered = marked.parse(text);
+  let rendered = marked.parse(text2);
   rendered = rendered.replace(/\*\*(.+?)\*\*/g, (_, content) => chalk3.bold(content)).replace(/(?<!\*)\*(\S[^*]*)\*(?!\*)/g, (_, content) => chalk3.italic(content));
   return rendered.trimEnd();
 }
@@ -1226,8 +1333,8 @@ function createRainbowSeparator() {
   }
   return result;
 }
-function renderMarkdownWithSeparators(text) {
-  const rendered = renderMarkdown(text);
+function renderMarkdownWithSeparators(text2) {
+  const rendered = renderMarkdown(text2);
   const separator = createRainbowSeparator();
   return `
 ${separator}
@@ -1395,12 +1502,12 @@ var StreamPrinter = class {
    *
    * @param text - Text to write
    */
-  write(text) {
-    if (!text) {
+  write(text2) {
+    if (!text2) {
       return;
     }
-    this.target.write(text);
-    this.endedWithNewline = text.endsWith("\n");
+    this.target.write(text2);
+    this.endedWithNewline = text2.endsWith("\n");
   }
   /**
    * Ensures output ends with a newline by writing one if needed.
@@ -1879,7 +1986,7 @@ function addCompleteOptions(cmd, defaults) {
     OPTION_DESCRIPTIONS.maxTokens,
     createNumericParser({ label: "Max tokens", integer: true, min: 1 }),
     defaults?.["max-tokens"]
-  ).option(OPTION_FLAGS.quiet, OPTION_DESCRIPTIONS.quiet, defaults?.quiet).option(OPTION_FLAGS.logLlmRequests, OPTION_DESCRIPTIONS.logLlmRequests, defaults?.["log-llm-requests"]);
+  ).option(OPTION_FLAGS.quiet, OPTION_DESCRIPTIONS.quiet, defaults?.quiet).option(OPTION_FLAGS.logLlmRequests, OPTION_DESCRIPTIONS.logLlmRequests, defaults?.["log-llm-requests"]).option(OPTION_FLAGS.inputImage, OPTION_DESCRIPTIONS.inputImage).option(OPTION_FLAGS.inputAudio, OPTION_DESCRIPTIONS.inputAudio);
 }
 function addAgentOptions(cmd, defaults) {
   const gadgetAccumulator = (value, previous = []) => [
@@ -1903,7 +2010,7 @@ function addAgentOptions(cmd, defaults) {
     OPTION_FLAGS.noBuiltinInteraction,
     OPTION_DESCRIPTIONS.noBuiltinInteraction,
     defaults?.["builtin-interaction"] !== false
-  ).option(OPTION_FLAGS.quiet, OPTION_DESCRIPTIONS.quiet, defaults?.quiet).option(OPTION_FLAGS.logLlmRequests, OPTION_DESCRIPTIONS.logLlmRequests, defaults?.["log-llm-requests"]).option(OPTION_FLAGS.docker, OPTION_DESCRIPTIONS.docker).option(OPTION_FLAGS.dockerRo, OPTION_DESCRIPTIONS.dockerRo).option(OPTION_FLAGS.noDocker, OPTION_DESCRIPTIONS.noDocker).option(OPTION_FLAGS.dockerDev, OPTION_DESCRIPTIONS.dockerDev);
+  ).option(OPTION_FLAGS.quiet, OPTION_DESCRIPTIONS.quiet, defaults?.quiet).option(OPTION_FLAGS.logLlmRequests, OPTION_DESCRIPTIONS.logLlmRequests, defaults?.["log-llm-requests"]).option(OPTION_FLAGS.inputImage, OPTION_DESCRIPTIONS.inputImage).option(OPTION_FLAGS.inputAudio, OPTION_DESCRIPTIONS.inputAudio).option(OPTION_FLAGS.docker, OPTION_DESCRIPTIONS.docker).option(OPTION_FLAGS.dockerRo, OPTION_DESCRIPTIONS.dockerRo).option(OPTION_FLAGS.noDocker, OPTION_DESCRIPTIONS.noDocker).option(OPTION_FLAGS.dockerDev, OPTION_DESCRIPTIONS.dockerDev);
 }
 function configToCompleteOptions(config) {
   const result = {};
@@ -2108,6 +2215,22 @@ var AGENT_CONFIG_KEYS = /* @__PURE__ */ new Set([
   "docker-cwd-permission"
   // Override CWD mount permission for this profile
 ]);
+var IMAGE_CONFIG_KEYS = /* @__PURE__ */ new Set([
+  "model",
+  "size",
+  "quality",
+  "count",
+  "output",
+  "quiet"
+]);
+var SPEECH_CONFIG_KEYS = /* @__PURE__ */ new Set([
+  "model",
+  "voice",
+  "format",
+  "speed",
+  "output",
+  "quiet"
+]);
 var CUSTOM_CONFIG_KEYS = /* @__PURE__ */ new Set([
   ...COMPLETE_CONFIG_KEYS,
   ...AGENT_CONFIG_KEYS,
@@ -2368,6 +2491,75 @@ function validateAgentConfig(raw, section) {
   }
   return result;
 }
+function validateImageConfig(raw, section) {
+  if (typeof raw !== "object" || raw === null) {
+    throw new ConfigError(`[${section}] must be a table`);
+  }
+  const rawObj = raw;
+  for (const key of Object.keys(rawObj)) {
+    if (!IMAGE_CONFIG_KEYS.has(key)) {
+      throw new ConfigError(`[${section}].${key} is not a valid option`);
+    }
+  }
+  const result = {};
+  if ("model" in rawObj) {
+    result.model = validateString(rawObj.model, "model", section);
+  }
+  if ("size" in rawObj) {
+    result.size = validateString(rawObj.size, "size", section);
+  }
+  if ("quality" in rawObj) {
+    result.quality = validateString(rawObj.quality, "quality", section);
+  }
+  if ("count" in rawObj) {
+    result.count = validateNumber(rawObj.count, "count", section, {
+      integer: true,
+      min: 1,
+      max: 10
+    });
+  }
+  if ("output" in rawObj) {
+    result.output = validateString(rawObj.output, "output", section);
+  }
+  if ("quiet" in rawObj) {
+    result.quiet = validateBoolean(rawObj.quiet, "quiet", section);
+  }
+  return result;
+}
+function validateSpeechConfig(raw, section) {
+  if (typeof raw !== "object" || raw === null) {
+    throw new ConfigError(`[${section}] must be a table`);
+  }
+  const rawObj = raw;
+  for (const key of Object.keys(rawObj)) {
+    if (!SPEECH_CONFIG_KEYS.has(key)) {
+      throw new ConfigError(`[${section}].${key} is not a valid option`);
+    }
+  }
+  const result = {};
+  if ("model" in rawObj) {
+    result.model = validateString(rawObj.model, "model", section);
+  }
+  if ("voice" in rawObj) {
+    result.voice = validateString(rawObj.voice, "voice", section);
+  }
+  if ("format" in rawObj) {
+    result.format = validateString(rawObj.format, "format", section);
+  }
+  if ("speed" in rawObj) {
+    result.speed = validateNumber(rawObj.speed, "speed", section, {
+      min: 0.25,
+      max: 4
+    });
+  }
+  if ("output" in rawObj) {
+    result.output = validateString(rawObj.output, "output", section);
+  }
+  if ("quiet" in rawObj) {
+    result.quiet = validateBoolean(rawObj.quiet, "quiet", section);
+  }
+  return result;
+}
 function validateStringOrBoolean(value, field, section) {
   if (typeof value === "string" || typeof value === "boolean") {
     return value;
@@ -2490,6 +2682,10 @@ function validateConfig(raw, configPath) {
         result.complete = validateCompleteConfig(value, key);
       } else if (key === "agent") {
         result.agent = validateAgentConfig(value, key);
+      } else if (key === "image") {
+        result.image = validateImageConfig(value, key);
+      } else if (key === "speech") {
+        result.speech = validateSpeechConfig(value, key);
       } else if (key === "prompts") {
         result.prompts = validatePromptsConfig(value, key);
       } else if (key === "docker") {
@@ -2534,7 +2730,7 @@ function loadConfig() {
   return resolveTemplatesInConfig(inherited, configPath);
 }
 function getCustomCommandNames(config) {
-  const reserved = /* @__PURE__ */ new Set(["global", "complete", "agent", "prompts", "docker"]);
+  const reserved = /* @__PURE__ */ new Set(["global", "complete", "agent", "image", "speech", "prompts", "docker"]);
   return Object.keys(config).filter((key) => !reserved.has(key));
 }
 function resolveTemplatesInConfig(config, configPath) {
@@ -3528,8 +3724,8 @@ Denied: ${result.reason ?? "by user"}`
   builder.withTextOnlyHandler("acknowledge");
   builder.withTextWithGadgetsHandler({
     gadgetName: "TellUser",
-    parameterMapping: (text) => ({ message: text, done: false, type: "info" }),
-    resultMapping: (text) => `\u2139\uFE0F  ${text}`
+    parameterMapping: (text2) => ({ message: text2, done: false, type: "info" }),
+    resultMapping: (text2) => `\u2139\uFE0F  ${text2}`
   });
   builder.withTrailingMessage(
     (ctx) => [
@@ -3538,7 +3734,19 @@ Denied: ${result.reason ?? "by user"}`
       "Maximize efficiency by batching independent operations in a single response."
     ].join(" ")
   );
-  const agent = builder.ask(prompt);
+  let agent;
+  if (options.image || options.audio) {
+    const parts = [text(prompt)];
+    if (options.image) {
+      parts.push(await readImageFile(options.image));
+    }
+    if (options.audio) {
+      parts.push(await readAudioFile(options.audio));
+    }
+    agent = builder.askWithContent(parts);
+  } else {
+    agent = builder.ask(prompt);
+  }
   let textBuffer = "";
   const flushTextBuffer = () => {
     if (textBuffer) {
@@ -3613,6 +3821,7 @@ function registerAgentCommand(program, env, config) {
 }
 // src/cli/complete-command.ts
+init_input_content();
 init_messages();
 init_model_shortcuts();
 init_constants();
@@ -3624,7 +3833,18 @@ async function executeComplete(promptArg, options, env) {
   if (options.system) {
     builder.addSystem(options.system);
   }
-  builder.addUser(prompt);
+  if (options.image || options.audio) {
+    const parts = [text(prompt)];
+    if (options.image) {
+      parts.push(await readImageFile(options.image));
+    }
+    if (options.audio) {
+      parts.push(await readAudioFile(options.audio));
+    }
+    builder.addUserMultimodal(parts);
+  } else {
+    builder.addUser(prompt);
+  }
   const messages = builder.build();
   const llmLogsBaseDir = resolveLogDir(options.logLlmRequests, "requests");
   let llmSessionDir;
@@ -4129,19 +4349,118 @@ function registerGadgetCommand(program, env) {
   );
 }
+// src/cli/image-command.ts
+import { writeFileSync as writeFileSync2 } from "node:fs";
+var DEFAULT_IMAGE_MODEL = "dall-e-3";
+async function executeImage(promptArg, options, env) {
+  const prompt = await resolvePrompt(promptArg, env);
+  const client = env.createClient();
+  const model = options.model;
+  const n = options.count ? Number.parseInt(options.count, 10) : 1;
+  const stderrTTY = env.stderr.isTTY === true;
+  if (!options.quiet && stderrTTY) {
+    env.stderr.write(`${SUMMARY_PREFIX} Generating image with ${model}...
+`);
+  }
+  const result = await client.image.generate({
+    model,
+    prompt,
+    size: options.size,
+    quality: options.quality,
+    n,
+    responseFormat: options.output ? "b64_json" : "url"
+  });
+  if (options.output) {
+    const imageData = result.images[0];
+    if (imageData.b64Json) {
+      const buffer = Buffer.from(imageData.b64Json, "base64");
+      writeFileSync2(options.output, buffer);
+      if (!options.quiet) {
+        env.stderr.write(`${SUMMARY_PREFIX} Image saved to ${options.output}
+`);
+      }
+    } else if (imageData.url) {
+      env.stdout.write(`${imageData.url}
+`);
+    }
+  } else {
+    for (const image of result.images) {
+      if (image.url) {
+        env.stdout.write(`${image.url}
+`);
+      } else if (image.b64Json) {
+        env.stdout.write(image.b64Json);
+      }
+    }
+  }
+  if (!options.quiet && stderrTTY) {
+    const parts = [
+      `${result.images.length} image(s)`,
+      `size: ${result.usage.size}`,
+      `quality: ${result.usage.quality}`
+    ];
+    if (result.cost !== void 0) {
+      parts.push(`cost: ${formatCost(result.cost)}`);
+    }
+    env.stderr.write(`${SUMMARY_PREFIX} ${parts.join(" | ")}
+`);
+  }
+}
+function registerImageCommand(program, env, config) {
+  program.command(COMMANDS.image).description("Generate images from a text prompt.").argument("[prompt]", "Image generation prompt. If omitted, stdin is used when available.").option(
+    OPTION_FLAGS.model,
+    OPTION_DESCRIPTIONS.model,
+    config?.model ?? DEFAULT_IMAGE_MODEL
+  ).option(OPTION_FLAGS.imageSize, OPTION_DESCRIPTIONS.imageSize, config?.size).option(OPTION_FLAGS.imageQuality, OPTION_DESCRIPTIONS.imageQuality, config?.quality).option(OPTION_FLAGS.imageCount, OPTION_DESCRIPTIONS.imageCount, config?.count?.toString()).option(OPTION_FLAGS.imageOutput, OPTION_DESCRIPTIONS.imageOutput, config?.output).option(OPTION_FLAGS.quiet, OPTION_DESCRIPTIONS.quiet, config?.quiet ?? false).action(
+    (prompt, options) => executeAction(() => executeImage(prompt, options, env), env)
+  );
+}
 // src/cli/models-command.ts
 import chalk8 from "chalk";
 init_model_shortcuts();
 async function handleModelsCommand(options, env) {
   const client = env.createClient();
-  const models = client.modelRegistry.listModels(options.provider);
+  const showText = options.all || options.text || !options.image && !options.speech;
+  const showImage = options.all || options.image;
+  const showSpeech = options.all || options.speech;
+  const textModels = showText ? client.modelRegistry.listModels(options.provider) : [];
+  const imageModels = showImage ? client.image.listModels().filter((m) => !options.provider || m.provider === options.provider) : [];
+  const speechModels = showSpeech ? client.speech.listModels().filter((m) => !options.provider || m.provider === options.provider) : [];
   if (options.format === "json") {
-    renderJSON(models, env.stdout);
+    renderJSON(textModels, imageModels, speechModels, env.stdout);
   } else {
-    renderTable(models, options.verbose || false, env.stdout);
+    renderAllTables(textModels, imageModels, speechModels, options.verbose || false, env.stdout);
+  }
+}
+function renderAllTables(textModels, imageModels, speechModels, verbose, stream) {
+  const hasAnyModels = textModels.length > 0 || imageModels.length > 0 || speechModels.length > 0;
+  if (!hasAnyModels) {
+    stream.write(chalk8.yellow("\nNo models found matching the specified criteria.\n\n"));
+    return;
+  }
+  stream.write(chalk8.bold.cyan("\nAvailable Models\n"));
+  stream.write(chalk8.cyan("=".repeat(80)) + "\n\n");
+  if (textModels.length > 0) {
+    renderTextTable(textModels, verbose, stream);
+  }
+  if (imageModels.length > 0) {
+    renderImageTable(imageModels, verbose, stream);
+  }
+  if (speechModels.length > 0) {
+    renderSpeechTable(speechModels, verbose, stream);
+  }
+  if (textModels.length > 0) {
+    stream.write(chalk8.bold.magenta("Model Shortcuts\n"));
+    stream.write(chalk8.dim("\u2500".repeat(80)) + "\n");
+    const shortcuts = Object.entries(MODEL_ALIASES).sort((a, b) => a[0].localeCompare(b[0]));
+    for (const [shortcut, fullName] of shortcuts) {
+      stream.write(chalk8.cyan(`  ${shortcut.padEnd(15)}`) + chalk8.dim(" \u2192 ") + chalk8.white(fullName) + "\n");
+    }
+    stream.write("\n");
   }
 }
-function renderTable(models, verbose, stream) {
+function renderTextTable(models, verbose, stream) {
   const grouped = /* @__PURE__ */ new Map();
   for (const model of models) {
     const provider = model.provider;
@@ -4150,13 +4469,13 @@ function renderTable(models, verbose, stream) {
     }
     grouped.get(provider).push(model);
   }
-  stream.write(chalk8.bold.cyan("\nAvailable Models\n"));
-  stream.write(chalk8.cyan("=".repeat(80)) + "\n\n");
+  stream.write(chalk8.bold.blue("\u{1F4DD} Text/LLM Models\n"));
+  stream.write(chalk8.dim("\u2500".repeat(80)) + "\n\n");
   const providers = Array.from(grouped.keys()).sort();
   for (const provider of providers) {
     const providerModels = grouped.get(provider);
     const providerName = provider.charAt(0).toUpperCase() + provider.slice(1);
-    stream.write(chalk8.bold.yellow(`${providerName} Models
+    stream.write(chalk8.bold.yellow(`${providerName}
 `));
     if (verbose) {
       renderVerboseTable(providerModels, stream);
@@ -4165,13 +4484,6 @@ function renderTable(models, verbose, stream) {
     }
     stream.write("\n");
   }
-  stream.write(chalk8.bold.magenta("Model Shortcuts\n"));
-  stream.write(chalk8.dim("\u2500".repeat(80)) + "\n");
-  const shortcuts = Object.entries(MODEL_ALIASES).sort((a, b) => a[0].localeCompare(b[0]));
-  for (const [shortcut, fullName] of shortcuts) {
-    stream.write(chalk8.cyan(`  ${shortcut.padEnd(15)}`) + chalk8.dim(" \u2192 ") + chalk8.white(fullName) + "\n");
-  }
-  stream.write("\n");
 }
 function renderCompactTable(models, stream) {
   const idWidth = 25;
@@ -4248,9 +4560,171 @@ function renderVerboseTable(models, stream) {
   }
   stream.write("\n");
 }
-function renderJSON(models, stream) {
-  const output = {
-    models: models.map((model) => ({
+function renderImageTable(models, verbose, stream) {
+  stream.write(chalk8.bold.green("\u{1F3A8} Image Generation Models\n"));
+  stream.write(chalk8.dim("\u2500".repeat(80)) + "\n\n");
+  const grouped = /* @__PURE__ */ new Map();
+  for (const model of models) {
+    if (!grouped.has(model.provider)) {
+      grouped.set(model.provider, []);
+    }
+    grouped.get(model.provider).push(model);
+  }
+  for (const [provider, providerModels] of Array.from(grouped.entries()).sort()) {
+    const providerName = provider.charAt(0).toUpperCase() + provider.slice(1);
+    stream.write(chalk8.bold.yellow(`${providerName}
+`));
+    if (verbose) {
+      for (const model of providerModels) {
+        stream.write(chalk8.bold.green(`
+  ${model.modelId}
+`));
+        stream.write(chalk8.dim("  " + "\u2500".repeat(60)) + "\n");
+        stream.write(`  ${chalk8.dim("Name:")}      ${chalk8.white(model.displayName)}
+`);
+        stream.write(`  ${chalk8.dim("Sizes:")}     ${chalk8.yellow(model.supportedSizes.join(", "))}
+`);
+        if (model.supportedQualities) {
+          stream.write(`  ${chalk8.dim("Qualities:")} ${chalk8.yellow(model.supportedQualities.join(", "))}
+`);
+        }
+        stream.write(`  ${chalk8.dim("Max Images:")} ${chalk8.yellow(model.maxImages.toString())}
+`);
+        stream.write(`  ${chalk8.dim("Pricing:")}   ${chalk8.cyan(formatImagePrice(model))}
+`);
+        if (model.features) {
+          const features = [];
+          if (model.features.textRendering) features.push("text-rendering");
+          if (model.features.transparency) features.push("transparency");
+          if (model.features.conversational) features.push("conversational");
+          if (features.length > 0) {
+            stream.write(`  ${chalk8.dim("Features:")}  ${chalk8.blue(features.join(", "))}
+`);
+          }
+        }
+      }
+    } else {
+      const idWidth = 32;
+      const nameWidth = 25;
+      const sizesWidth = 20;
+      const priceWidth = 15;
+      stream.write(chalk8.dim("\u2500".repeat(idWidth + nameWidth + sizesWidth + priceWidth + 6)) + "\n");
+      stream.write(
+        chalk8.bold(
+          "Model ID".padEnd(idWidth) + "  " + "Display Name".padEnd(nameWidth) + "  " + "Sizes".padEnd(sizesWidth) + "  " + "Price".padEnd(priceWidth)
+        ) + "\n"
+      );
+      stream.write(chalk8.dim("\u2500".repeat(idWidth + nameWidth + sizesWidth + priceWidth + 6)) + "\n");
+      for (const model of providerModels) {
+        const sizes = model.supportedSizes.length > 2 ? model.supportedSizes.slice(0, 2).join(", ") + "..." : model.supportedSizes.join(", ");
+        stream.write(
+          chalk8.green(model.modelId.padEnd(idWidth)) + "  " + chalk8.white(model.displayName.substring(0, nameWidth - 1).padEnd(nameWidth)) + "  " + chalk8.yellow(sizes.padEnd(sizesWidth)) + "  " + chalk8.cyan(formatImagePrice(model).padEnd(priceWidth)) + "\n"
+        );
+      }
+      stream.write(chalk8.dim("\u2500".repeat(idWidth + nameWidth + sizesWidth + priceWidth + 6)) + "\n");
+    }
+    stream.write("\n");
+  }
+}
+function renderSpeechTable(models, verbose, stream) {
+  stream.write(chalk8.bold.magenta("\u{1F3A4} Speech (TTS) Models\n"));
+  stream.write(chalk8.dim("\u2500".repeat(80)) + "\n\n");
+  const grouped = /* @__PURE__ */ new Map();
+  for (const model of models) {
+    if (!grouped.has(model.provider)) {
+      grouped.set(model.provider, []);
+    }
+    grouped.get(model.provider).push(model);
+  }
+  for (const [provider, providerModels] of Array.from(grouped.entries()).sort()) {
+    const providerName = provider.charAt(0).toUpperCase() + provider.slice(1);
+    stream.write(chalk8.bold.yellow(`${providerName}
+`));
+    if (verbose) {
+      for (const model of providerModels) {
+        stream.write(chalk8.bold.green(`
+  ${model.modelId}
+`));
+        stream.write(chalk8.dim("  " + "\u2500".repeat(60)) + "\n");
+        stream.write(`  ${chalk8.dim("Name:")}    ${chalk8.white(model.displayName)}
+`);
+        stream.write(`  ${chalk8.dim("Voices:")}  ${chalk8.yellow(model.voices.length.toString())} voices
+`);
+        if (model.voices.length <= 6) {
+          stream.write(`            ${chalk8.dim(model.voices.join(", "))}
+`);
+        } else {
+          stream.write(`            ${chalk8.dim(model.voices.slice(0, 6).join(", ") + "...")}
+`);
+        }
+        stream.write(`  ${chalk8.dim("Formats:")} ${chalk8.yellow(model.formats.join(", "))}
+`);
+        stream.write(`  ${chalk8.dim("Max Input:")} ${chalk8.yellow(model.maxInputLength.toString())} chars
+`);
+        stream.write(`  ${chalk8.dim("Pricing:")} ${chalk8.cyan(formatSpeechPrice(model))}
+`);
+        if (model.features) {
+          const features = [];
+          if (model.features.multiSpeaker) features.push("multi-speaker");
+          if (model.features.voiceInstructions) features.push("voice-instructions");
+          if (model.features.languages) features.push(`${model.features.languages} languages`);
+          if (features.length > 0) {
+            stream.write(`  ${chalk8.dim("Features:")} ${chalk8.blue(features.join(", "))}
+`);
+          }
+        }
+      }
+    } else {
+      const idWidth = 30;
+      const nameWidth = 28;
+      const voicesWidth = 12;
+      const priceWidth = 18;
+      stream.write(chalk8.dim("\u2500".repeat(idWidth + nameWidth + voicesWidth + priceWidth + 6)) + "\n");
+      stream.write(
+        chalk8.bold(
+          "Model ID".padEnd(idWidth) + "  " + "Display Name".padEnd(nameWidth) + "  " + "Voices".padEnd(voicesWidth) + "  " + "Price".padEnd(priceWidth)
+        ) + "\n"
+      );
+      stream.write(chalk8.dim("\u2500".repeat(idWidth + nameWidth + voicesWidth + priceWidth + 6)) + "\n");
+      for (const model of providerModels) {
+        stream.write(
+          chalk8.green(model.modelId.padEnd(idWidth)) + "  " + chalk8.white(model.displayName.substring(0, nameWidth - 1).padEnd(nameWidth)) + "  " + chalk8.yellow(`${model.voices.length} voices`.padEnd(voicesWidth)) + "  " + chalk8.cyan(formatSpeechPrice(model).padEnd(priceWidth)) + "\n"
+        );
+      }
+      stream.write(chalk8.dim("\u2500".repeat(idWidth + nameWidth + voicesWidth + priceWidth + 6)) + "\n");
+    }
+    stream.write("\n");
+  }
+}
+function formatImagePrice(model) {
+  if (model.pricing.perImage !== void 0) {
+    return `$${model.pricing.perImage.toFixed(2)}/img`;
+  }
+  if (model.pricing.bySize) {
+    const prices = Object.values(model.pricing.bySize);
+    const minPrice = Math.min(...prices.flatMap((p) => typeof p === "number" ? [p] : Object.values(p)));
+    const maxPrice = Math.max(...prices.flatMap((p) => typeof p === "number" ? [p] : Object.values(p)));
+    if (minPrice === maxPrice) {
+      return `$${minPrice.toFixed(2)}/img`;
+    }
+    return `$${minPrice.toFixed(2)}-${maxPrice.toFixed(2)}`;
+  }
+  return "varies";
+}
+function formatSpeechPrice(model) {
+  if (model.pricing.perCharacter !== void 0) {
+    const perMillion = model.pricing.perCharacter * 1e6;
+    return `$${perMillion.toFixed(0)}/1M chars`;
+  }
+  if (model.pricing.perMinute !== void 0) {
+    return `~$${model.pricing.perMinute.toFixed(2)}/min`;
+  }
+  return "varies";
+}
+function renderJSON(textModels, imageModels, speechModels, stream) {
+  const output = {};
+  if (textModels.length > 0) {
+    output.textModels = textModels.map((model) => ({
       provider: model.provider,
       modelId: model.modelId,
       displayName: model.displayName,
@@ -4266,9 +4740,33 @@ function renderJSON(models, stream) {
       knowledgeCutoff: model.knowledgeCutoff,
       features: model.features,
       metadata: model.metadata
-    })),
-    shortcuts: MODEL_ALIASES
-  };
+    }));
+    output.shortcuts = MODEL_ALIASES;
+  }
+  if (imageModels.length > 0) {
+    output.imageModels = imageModels.map((model) => ({
+      provider: model.provider,
+      modelId: model.modelId,
+      displayName: model.displayName,
+      supportedSizes: model.supportedSizes,
+      supportedQualities: model.supportedQualities,
+      maxImages: model.maxImages,
+      pricing: model.pricing,
+      features: model.features
+    }));
+  }
+  if (speechModels.length > 0) {
+    output.speechModels = speechModels.map((model) => ({
+      provider: model.provider,
+      modelId: model.modelId,
+      displayName: model.displayName,
+      voices: model.voices,
+      formats: model.formats,
+      maxInputLength: model.maxInputLength,
+      pricing: model.pricing,
+      features: model.features
+    }));
+  }
   stream.write(JSON.stringify(output, null, 2) + "\n");
 }
 function formatTokens2(count) {
@@ -4281,7 +4779,7 @@ function formatTokens2(count) {
   }
 }
 function registerModelsCommand(program, env) {
-  program.command(COMMANDS.models).description("List all available LLM models with pricing and capabilities.").option("--provider <name>", "Filter by provider (openai, anthropic, gemini)").option("--format <format>", "Output format: table or json", "table").option("--verbose", "Show detailed model information", false).action(
+  program.command(COMMANDS.models).description("List available models with pricing and capabilities.").option("--provider <name>", "Filter by provider (openai, anthropic, gemini)").option("--format <format>", "Output format: table or json", "table").option("--verbose", "Show detailed model information", false).option("--text", "Show text/LLM models (default if no type specified)").option("--image", "Show image generation models").option("--speech", "Show speech/TTS models").option("--all", "Show all model types (text, image, speech)").action(
     (options) => executeAction(
       () => handleModelsCommand(options, env),
       env
@@ -4289,6 +4787,96 @@ function registerModelsCommand(program, env) {
   );
 }
+// src/cli/speech-command.ts
+import { writeFileSync as writeFileSync3 } from "node:fs";
+var DEFAULT_SPEECH_MODEL = "tts-1";
+var DEFAULT_VOICE = "nova";
+async function executeSpeech(textArg, options, env) {
+  const text2 = await resolvePrompt(textArg, env);
+  const client = env.createClient();
+  const model = options.model;
+  const voice = options.voice ?? DEFAULT_VOICE;
+  const speed = options.speed ? Number.parseFloat(options.speed) : void 0;
+  const stderrTTY = env.stderr.isTTY === true;
+  if (!options.quiet && stderrTTY) {
+    env.stderr.write(`${SUMMARY_PREFIX} Generating speech with ${model} (voice: ${voice})...
+`);
+  }
+  const result = await client.speech.generate({
+    model,
+    input: text2,
+    voice,
+    responseFormat: options.format,
+    speed
+  });
+  const audioBuffer = Buffer.from(result.audio);
+  if (options.output) {
+    writeFileSync3(options.output, audioBuffer);
+    if (!options.quiet) {
+      env.stderr.write(`${SUMMARY_PREFIX} Audio saved to ${options.output}
+`);
+    }
+  } else {
+    env.stdout.write(audioBuffer);
+  }
+  if (!options.quiet && stderrTTY) {
+    const parts = [
+      `${result.usage.characterCount} characters`,
+      `format: ${result.format}`
+    ];
+    if (result.cost !== void 0) {
+      parts.push(`cost: ${formatCost(result.cost)}`);
+    }
+    env.stderr.write(`${SUMMARY_PREFIX} ${parts.join(" | ")}
+`);
+  }
+}
+function registerSpeechCommand(program, env, config) {
+  program.command(COMMANDS.speech).description("Generate speech audio from text.").argument("[text]", "Text to convert to speech. If omitted, stdin is used when available.").option(
+    OPTION_FLAGS.model,
+    OPTION_DESCRIPTIONS.model,
+    config?.model ?? DEFAULT_SPEECH_MODEL
+  ).option(OPTION_FLAGS.voice, OPTION_DESCRIPTIONS.voice, config?.voice ?? DEFAULT_VOICE).option(OPTION_FLAGS.speechFormat, OPTION_DESCRIPTIONS.speechFormat, config?.format).option(OPTION_FLAGS.speechSpeed, OPTION_DESCRIPTIONS.speechSpeed, config?.speed?.toString()).option(OPTION_FLAGS.speechOutput, OPTION_DESCRIPTIONS.speechOutput, config?.output).option(OPTION_FLAGS.quiet, OPTION_DESCRIPTIONS.quiet, config?.quiet ?? false).action(
+    (text2, options) => executeAction(() => executeSpeech(text2, options, env), env)
+  );
+}
+// src/cli/vision-command.ts
+init_model_shortcuts();
+async function executeVision(imagePath, options, env) {
+  const client = env.createClient();
+  const model = resolveModel(options.model);
+  const imageBuffer = await readFileBuffer(imagePath);
+  const prompt = options.prompt ?? "Describe this image in detail.";
+  const stderrTTY = env.stderr.isTTY === true;
+  if (!options.quiet && stderrTTY) {
+    env.stderr.write(`${SUMMARY_PREFIX} Analyzing image with ${model}...
+`);
+  }
+  const result = await client.vision.analyze({
+    model,
+    image: imageBuffer,
+    prompt,
+    maxTokens: options.maxTokens
+  });
+  env.stdout.write(result);
+  env.stdout.write("\n");
+}
+function registerVisionCommand(program, env) {
+  program.command(COMMANDS.vision ?? "vision").description("Analyze an image using vision-capable models").argument("<image>", "Path to image file to analyze").option(
+    OPTION_FLAGS.model,
+    OPTION_DESCRIPTIONS.model,
+    "gpt-4o"
+    // Default to a vision-capable model
+  ).option("-p, --prompt <prompt>", "Analysis prompt describing what to extract or describe").option(
+    OPTION_FLAGS.maxTokens,
+    OPTION_DESCRIPTIONS.maxTokens,
+    createNumericParser({ label: "Max tokens", integer: true, min: 1 })
+  ).option(OPTION_FLAGS.quiet, OPTION_DESCRIPTIONS.quiet).action(
+    (imagePath, options) => executeAction(() => executeVision(imagePath, options, env), env)
+  );
+}
 // src/cli/environment.ts
 init_client();
 init_logger();
@@ -4334,7 +4922,7 @@ function createLoggerFactory(config) {
 }
 function createPromptFunction(stdin, stdout) {
   return (question) => {
-    return new Promise((resolve2) => {
+    return new Promise((resolve3) => {
       const rl = readline.createInterface({
         input: stdin,
         output: stdout
@@ -4349,7 +4937,7 @@ function createPromptFunction(stdin, stdout) {
 `);
       rl.question(chalk9.green.bold("You: "), (answer) => {
         rl.close();
-        resolve2(answer);
+        resolve3(answer);
       });
     });
   };
@@ -4440,6 +5028,9 @@ function createProgram(env, config) {
   });
   registerCompleteCommand(program, env, config?.complete);
   registerAgentCommand(program, env, config?.agent);
+  registerImageCommand(program, env, config?.image);
+  registerSpeechCommand(program, env, config?.speech);
+  registerVisionCommand(program, env);
   registerModelsCommand(program, env);
   registerGadgetCommand(program, env);
   if (config) {