npm - llmist - Versions diffs - 2.4.0 → 2.5.0 - Mend

llmist 2.4.0 → 2.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

package/README.md +7 -0
package/dist/{chunk-QFRVTS5F.js → chunk-IHSZUAYN.js} +4 -2
package/dist/chunk-IHSZUAYN.js.map +1 -0
package/dist/{chunk-6ZDUWO6N.js → chunk-YHS2DYXP.js} +1781 -528
package/dist/chunk-YHS2DYXP.js.map +1 -0
package/dist/cli.cjs +1218 -151
package/dist/cli.cjs.map +1 -1
package/dist/cli.js +172 -26
package/dist/cli.js.map +1 -1
package/dist/index.cjs +1393 -124
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +93 -20
package/dist/index.d.ts +93 -20
package/dist/index.js +34 -2
package/dist/{mock-stream-BQcC2VCP.d.cts → mock-stream-ga4KIiwX.d.cts} +714 -12
package/dist/{mock-stream-BQcC2VCP.d.ts → mock-stream-ga4KIiwX.d.ts} +714 -12
package/dist/testing/index.cjs +1713 -508
package/dist/testing/index.cjs.map +1 -1
package/dist/testing/index.d.cts +2 -2
package/dist/testing/index.d.ts +2 -2
package/dist/testing/index.js +1 -1
package/package.json +1 -1
package/dist/chunk-6ZDUWO6N.js.map +0 -1
package/dist/chunk-QFRVTS5F.js.map +0 -1

package/dist/cli.js CHANGED Viewed

@@ -1,5 +1,5 @@
 #!/usr/bin/env node
-import "./chunk-QFRVTS5F.js";
+import "./chunk-IHSZUAYN.js";
 import {
   AgentBuilder,
   BaseGadget,
@@ -10,14 +10,20 @@ import {
   LLMMessageBuilder,
   LLMist,
   MODEL_ALIASES,
+  audioFromBuffer,
   createGadget,
   createLogger,
+  detectAudioMimeType,
+  detectImageMimeType,
+  extractText,
+  imageFromBuffer,
   init_builder,
   init_client,
   init_constants,
   init_create_gadget,
   init_exceptions,
   init_gadget,
+  init_input_content,
   init_logger,
   init_messages,
   init_model_shortcuts,
@@ -26,8 +32,9 @@ import {
   init_schema_validator,
   resolveModel,
   schemaToJSONSchema,
+  text,
   validateGadgetSchema
-} from "./chunk-6ZDUWO6N.js";
+} from "./chunk-YHS2DYXP.js";
 // src/cli/constants.ts
 var CLI_NAME = "llmist";
@@ -38,7 +45,8 @@ var COMMANDS = {
   models: "models",
   gadget: "gadget",
   image: "image",
-  speech: "speech"
+  speech: "speech",
+  vision: "vision"
 };
 var LOG_LEVELS = ["silly", "trace", "debug", "info", "warn", "error", "fatal"];
 var DEFAULT_MODEL = "openai:gpt-5-nano";
@@ -60,6 +68,9 @@ var OPTION_FLAGS = {
   dockerRo: "--docker-ro",
   noDocker: "--no-docker",
   dockerDev: "--docker-dev",
+  // Multimodal input options
+  inputImage: "--image <path>",
+  inputAudio: "--audio <path>",
   // Image generation options
   imageSize: "--size <size>",
   imageQuality: "--quality <quality>",
@@ -85,6 +96,9 @@ var OPTION_DESCRIPTIONS = {
   noBuiltins: "Disable built-in gadgets (AskUser, TellUser).",
   noBuiltinInteraction: "Disable interactive gadgets (AskUser) while keeping TellUser.",
   quiet: "Suppress all output except content (text and TellUser messages).",
+  // Multimodal input descriptions
+  inputImage: "Image file to include with the prompt (vision models).",
+  inputAudio: "Audio file to include with the prompt (Gemini only).",
   docker: "Run agent in a Docker sandbox container for security isolation.",
   dockerRo: "Run in Docker with current directory mounted read-only.",
   noDocker: "Disable Docker sandboxing (override config).",
@@ -108,7 +122,7 @@ import { Command, InvalidArgumentError as InvalidArgumentError2 } from "commande
 // package.json
 var package_default = {
   name: "llmist",
-  version: "2.4.0",
+  version: "2.5.0",
   description: "TypeScript LLM client with streaming tool execution. Tools fire mid-stream. Built-in function calling works with any model\u2014no structured outputs or native tool support required.",
   type: "module",
   main: "dist/index.cjs",
@@ -248,6 +262,7 @@ function isAbortError(error) {
 }
 // src/cli/agent-command.ts
+init_input_content();
 init_registry();
 init_constants();
@@ -572,6 +587,75 @@ var finish = createGadget({
 });
 var builtinGadgets = [askUser, tellUser, finish];
+// src/cli/file-utils.ts
+init_input_content();
+import { readFile, stat } from "node:fs/promises";
+import { resolve as resolve2 } from "node:path";
+var DEFAULT_MAX_FILE_SIZE = 50 * 1024 * 1024;
+function formatFileSize(bytes) {
+  if (bytes < 1024) return `${bytes} bytes`;
+  if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`;
+  if (bytes < 1024 * 1024 * 1024) return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
+  return `${(bytes / (1024 * 1024 * 1024)).toFixed(1)} GB`;
+}
+async function checkFileSize(absolutePath, filePath, maxSize) {
+  const stats = await stat(absolutePath);
+  if (stats.size > maxSize) {
+    throw new Error(
+      `File "${filePath}" is too large (${formatFileSize(stats.size)}). Maximum allowed size is ${formatFileSize(maxSize)}. Consider compressing the file or using a smaller version.`
+    );
+  }
+}
+async function readImageFile(filePath, options = {}) {
+  const absolutePath = resolve2(filePath);
+  const maxFileSize = options.maxFileSize ?? DEFAULT_MAX_FILE_SIZE;
+  let buffer;
+  try {
+    await checkFileSize(absolutePath, filePath, maxFileSize);
+    buffer = await readFile(absolutePath);
+  } catch (error) {
+    const message = error instanceof Error ? error.message : String(error);
+    throw new Error(`Failed to read image file "${filePath}": ${message}`);
+  }
+  const mimeType = detectImageMimeType(buffer);
+  if (!mimeType) {
+    throw new Error(
+      `File "${filePath}" is not a supported image format. Supported formats: JPEG, PNG, GIF, WebP`
+    );
+  }
+  return imageFromBuffer(buffer, mimeType);
+}
+async function readAudioFile(filePath, options = {}) {
+  const absolutePath = resolve2(filePath);
+  const maxFileSize = options.maxFileSize ?? DEFAULT_MAX_FILE_SIZE;
+  let buffer;
+  try {
+    await checkFileSize(absolutePath, filePath, maxFileSize);
+    buffer = await readFile(absolutePath);
+  } catch (error) {
+    const message = error instanceof Error ? error.message : String(error);
+    throw new Error(`Failed to read audio file "${filePath}": ${message}`);
+  }
+  const mimeType = detectAudioMimeType(buffer);
+  if (!mimeType) {
+    throw new Error(
+      `File "${filePath}" is not a supported audio format. Supported formats: MP3, WAV, OGG, WebM`
+    );
+  }
+  return audioFromBuffer(buffer, mimeType);
+}
+async function readFileBuffer(filePath, options = {}) {
+  const absolutePath = resolve2(filePath);
+  const maxFileSize = options.maxFileSize ?? DEFAULT_MAX_FILE_SIZE;
+  try {
+    await checkFileSize(absolutePath, filePath, maxFileSize);
+    return await readFile(absolutePath);
+  } catch (error) {
+    const message = error instanceof Error ? error.message : String(error);
+    throw new Error(`Failed to read file "${filePath}": ${message}`);
+  }
+}
 // src/cli/gadgets.ts
 init_gadget();
 import fs5 from "node:fs";
@@ -735,7 +819,7 @@ ${formattedList}`;
 // src/cli/builtins/filesystem/read-file.ts
 import fs3 from "node:fs";
 import { z as z3 } from "zod";
-var readFile = createGadget({
+var readFile2 = createGadget({
   name: "ReadFile",
   description: "Read the entire content of a file and return it as text. The file path must be within the current working directory or its subdirectories.",
   schema: z3.object({
@@ -1006,7 +1090,7 @@ error: ${message}`;
 // src/cli/builtins/index.ts
 var builtinGadgetRegistry = {
   ListDirectory: listDirectory,
-  ReadFile: readFile,
+  ReadFile: readFile2,
   WriteFile: writeFile,
   EditFile: editFile,
   RunCommand: runCommand
@@ -1141,6 +1225,7 @@ async function loadGadgets(specifiers, cwd, importer = (specifier) => import(spe
 }
 // src/cli/llm-logging.ts
+init_messages();
 import { mkdir, writeFile as writeFile2 } from "node:fs/promises";
 import { homedir } from "node:os";
 import { join } from "node:path";
@@ -1158,7 +1243,7 @@ function formatLlmRequest(messages) {
   const lines = [];
   for (const msg of messages) {
     lines.push(`=== ${msg.role.toUpperCase()} ===`);
-    lines.push(msg.content ?? "");
+    lines.push(msg.content ? extractText(msg.content) : "");
     lines.push("");
   }
   return lines.join("\n");
@@ -1232,9 +1317,9 @@ function ensureMarkedConfigured() {
     markedConfigured = true;
   }
 }
-function renderMarkdown(text) {
+function renderMarkdown(text2) {
   ensureMarkedConfigured();
-  let rendered = marked.parse(text);
+  let rendered = marked.parse(text2);
   rendered = rendered.replace(/\*\*(.+?)\*\*/g, (_, content) => chalk3.bold(content)).replace(/(?<!\*)\*(\S[^*]*)\*(?!\*)/g, (_, content) => chalk3.italic(content));
   return rendered.trimEnd();
 }
@@ -1248,8 +1333,8 @@ function createRainbowSeparator() {
   }
   return result;
 }
-function renderMarkdownWithSeparators(text) {
-  const rendered = renderMarkdown(text);
+function renderMarkdownWithSeparators(text2) {
+  const rendered = renderMarkdown(text2);
   const separator = createRainbowSeparator();
   return `
 ${separator}
@@ -1417,12 +1502,12 @@ var StreamPrinter = class {
    *
    * @param text - Text to write
    */
-  write(text) {
-    if (!text) {
+  write(text2) {
+    if (!text2) {
       return;
     }
-    this.target.write(text);
-    this.endedWithNewline = text.endsWith("\n");
+    this.target.write(text2);
+    this.endedWithNewline = text2.endsWith("\n");
   }
   /**
    * Ensures output ends with a newline by writing one if needed.
@@ -1901,7 +1986,7 @@ function addCompleteOptions(cmd, defaults) {
     OPTION_DESCRIPTIONS.maxTokens,
     createNumericParser({ label: "Max tokens", integer: true, min: 1 }),
     defaults?.["max-tokens"]
-  ).option(OPTION_FLAGS.quiet, OPTION_DESCRIPTIONS.quiet, defaults?.quiet).option(OPTION_FLAGS.logLlmRequests, OPTION_DESCRIPTIONS.logLlmRequests, defaults?.["log-llm-requests"]);
+  ).option(OPTION_FLAGS.quiet, OPTION_DESCRIPTIONS.quiet, defaults?.quiet).option(OPTION_FLAGS.logLlmRequests, OPTION_DESCRIPTIONS.logLlmRequests, defaults?.["log-llm-requests"]).option(OPTION_FLAGS.inputImage, OPTION_DESCRIPTIONS.inputImage).option(OPTION_FLAGS.inputAudio, OPTION_DESCRIPTIONS.inputAudio);
 }
 function addAgentOptions(cmd, defaults) {
   const gadgetAccumulator = (value, previous = []) => [
@@ -1925,7 +2010,7 @@ function addAgentOptions(cmd, defaults) {
     OPTION_FLAGS.noBuiltinInteraction,
     OPTION_DESCRIPTIONS.noBuiltinInteraction,
     defaults?.["builtin-interaction"] !== false
-  ).option(OPTION_FLAGS.quiet, OPTION_DESCRIPTIONS.quiet, defaults?.quiet).option(OPTION_FLAGS.logLlmRequests, OPTION_DESCRIPTIONS.logLlmRequests, defaults?.["log-llm-requests"]).option(OPTION_FLAGS.docker, OPTION_DESCRIPTIONS.docker).option(OPTION_FLAGS.dockerRo, OPTION_DESCRIPTIONS.dockerRo).option(OPTION_FLAGS.noDocker, OPTION_DESCRIPTIONS.noDocker).option(OPTION_FLAGS.dockerDev, OPTION_DESCRIPTIONS.dockerDev);
+  ).option(OPTION_FLAGS.quiet, OPTION_DESCRIPTIONS.quiet, defaults?.quiet).option(OPTION_FLAGS.logLlmRequests, OPTION_DESCRIPTIONS.logLlmRequests, defaults?.["log-llm-requests"]).option(OPTION_FLAGS.inputImage, OPTION_DESCRIPTIONS.inputImage).option(OPTION_FLAGS.inputAudio, OPTION_DESCRIPTIONS.inputAudio).option(OPTION_FLAGS.docker, OPTION_DESCRIPTIONS.docker).option(OPTION_FLAGS.dockerRo, OPTION_DESCRIPTIONS.dockerRo).option(OPTION_FLAGS.noDocker, OPTION_DESCRIPTIONS.noDocker).option(OPTION_FLAGS.dockerDev, OPTION_DESCRIPTIONS.dockerDev);
 }
 function configToCompleteOptions(config) {
   const result = {};
@@ -3639,8 +3724,8 @@ Denied: ${result.reason ?? "by user"}`
   builder.withTextOnlyHandler("acknowledge");
   builder.withTextWithGadgetsHandler({
     gadgetName: "TellUser",
-    parameterMapping: (text) => ({ message: text, done: false, type: "info" }),
-    resultMapping: (text) => `\u2139\uFE0F  ${text}`
+    parameterMapping: (text2) => ({ message: text2, done: false, type: "info" }),
+    resultMapping: (text2) => `\u2139\uFE0F  ${text2}`
   });
   builder.withTrailingMessage(
     (ctx) => [
@@ -3649,7 +3734,19 @@ Denied: ${result.reason ?? "by user"}`
       "Maximize efficiency by batching independent operations in a single response."
     ].join(" ")
   );
-  const agent = builder.ask(prompt);
+  let agent;
+  if (options.image || options.audio) {
+    const parts = [text(prompt)];
+    if (options.image) {
+      parts.push(await readImageFile(options.image));
+    }
+    if (options.audio) {
+      parts.push(await readAudioFile(options.audio));
+    }
+    agent = builder.askWithContent(parts);
+  } else {
+    agent = builder.ask(prompt);
+  }
   let textBuffer = "";
   const flushTextBuffer = () => {
     if (textBuffer) {
@@ -3724,6 +3821,7 @@ function registerAgentCommand(program, env, config) {
 }
 // src/cli/complete-command.ts
+init_input_content();
 init_messages();
 init_model_shortcuts();
 init_constants();
@@ -3735,7 +3833,18 @@ async function executeComplete(promptArg, options, env) {
   if (options.system) {
     builder.addSystem(options.system);
   }
-  builder.addUser(prompt);
+  if (options.image || options.audio) {
+    const parts = [text(prompt)];
+    if (options.image) {
+      parts.push(await readImageFile(options.image));
+    }
+    if (options.audio) {
+      parts.push(await readAudioFile(options.audio));
+    }
+    builder.addUserMultimodal(parts);
+  } else {
+    builder.addUser(prompt);
+  }
   const messages = builder.build();
   const llmLogsBaseDir = resolveLogDir(options.logLlmRequests, "requests");
   let llmSessionDir;
@@ -4683,7 +4792,7 @@ import { writeFileSync as writeFileSync3 } from "node:fs";
 var DEFAULT_SPEECH_MODEL = "tts-1";
 var DEFAULT_VOICE = "nova";
 async function executeSpeech(textArg, options, env) {
-  const text = await resolvePrompt(textArg, env);
+  const text2 = await resolvePrompt(textArg, env);
   const client = env.createClient();
   const model = options.model;
   const voice = options.voice ?? DEFAULT_VOICE;
@@ -4695,7 +4804,7 @@ async function executeSpeech(textArg, options, env) {
   }
   const result = await client.speech.generate({
     model,
-    input: text,
+    input: text2,
     voice,
     responseFormat: options.format,
     speed
@@ -4728,7 +4837,43 @@ function registerSpeechCommand(program, env, config) {
     OPTION_DESCRIPTIONS.model,
     config?.model ?? DEFAULT_SPEECH_MODEL
   ).option(OPTION_FLAGS.voice, OPTION_DESCRIPTIONS.voice, config?.voice ?? DEFAULT_VOICE).option(OPTION_FLAGS.speechFormat, OPTION_DESCRIPTIONS.speechFormat, config?.format).option(OPTION_FLAGS.speechSpeed, OPTION_DESCRIPTIONS.speechSpeed, config?.speed?.toString()).option(OPTION_FLAGS.speechOutput, OPTION_DESCRIPTIONS.speechOutput, config?.output).option(OPTION_FLAGS.quiet, OPTION_DESCRIPTIONS.quiet, config?.quiet ?? false).action(
-    (text, options) => executeAction(() => executeSpeech(text, options, env), env)
+    (text2, options) => executeAction(() => executeSpeech(text2, options, env), env)
+  );
+}
+// src/cli/vision-command.ts
+init_model_shortcuts();
+async function executeVision(imagePath, options, env) {
+  const client = env.createClient();
+  const model = resolveModel(options.model);
+  const imageBuffer = await readFileBuffer(imagePath);
+  const prompt = options.prompt ?? "Describe this image in detail.";
+  const stderrTTY = env.stderr.isTTY === true;
+  if (!options.quiet && stderrTTY) {
+    env.stderr.write(`${SUMMARY_PREFIX} Analyzing image with ${model}...
+`);
+  }
+  const result = await client.vision.analyze({
+    model,
+    image: imageBuffer,
+    prompt,
+    maxTokens: options.maxTokens
+  });
+  env.stdout.write(result);
+  env.stdout.write("\n");
+}
+function registerVisionCommand(program, env) {
+  program.command(COMMANDS.vision ?? "vision").description("Analyze an image using vision-capable models").argument("<image>", "Path to image file to analyze").option(
+    OPTION_FLAGS.model,
+    OPTION_DESCRIPTIONS.model,
+    "gpt-4o"
+    // Default to a vision-capable model
+  ).option("-p, --prompt <prompt>", "Analysis prompt describing what to extract or describe").option(
+    OPTION_FLAGS.maxTokens,
+    OPTION_DESCRIPTIONS.maxTokens,
+    createNumericParser({ label: "Max tokens", integer: true, min: 1 })
+  ).option(OPTION_FLAGS.quiet, OPTION_DESCRIPTIONS.quiet).action(
+    (imagePath, options) => executeAction(() => executeVision(imagePath, options, env), env)
   );
 }
@@ -4777,7 +4922,7 @@ function createLoggerFactory(config) {
 }
 function createPromptFunction(stdin, stdout) {
   return (question) => {
-    return new Promise((resolve2) => {
+    return new Promise((resolve3) => {
       const rl = readline.createInterface({
         input: stdin,
         output: stdout
@@ -4792,7 +4937,7 @@ function createPromptFunction(stdin, stdout) {
 `);
       rl.question(chalk9.green.bold("You: "), (answer) => {
         rl.close();
-        resolve2(answer);
+        resolve3(answer);
       });
     });
   };
@@ -4885,6 +5030,7 @@ function createProgram(env, config) {
   registerAgentCommand(program, env, config?.agent);
   registerImageCommand(program, env, config?.image);
   registerSpeechCommand(program, env, config?.speech);
+  registerVisionCommand(program, env);
   registerModelsCommand(program, env);
   registerGadgetCommand(program, env);
   if (config) {