llmist 2.4.0 → 2.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env node
2
- import "./chunk-QFRVTS5F.js";
2
+ import "./chunk-IHSZUAYN.js";
3
3
  import {
4
4
  AgentBuilder,
5
5
  BaseGadget,
@@ -10,14 +10,20 @@ import {
10
10
  LLMMessageBuilder,
11
11
  LLMist,
12
12
  MODEL_ALIASES,
13
+ audioFromBuffer,
13
14
  createGadget,
14
15
  createLogger,
16
+ detectAudioMimeType,
17
+ detectImageMimeType,
18
+ extractText,
19
+ imageFromBuffer,
15
20
  init_builder,
16
21
  init_client,
17
22
  init_constants,
18
23
  init_create_gadget,
19
24
  init_exceptions,
20
25
  init_gadget,
26
+ init_input_content,
21
27
  init_logger,
22
28
  init_messages,
23
29
  init_model_shortcuts,
@@ -26,8 +32,9 @@ import {
26
32
  init_schema_validator,
27
33
  resolveModel,
28
34
  schemaToJSONSchema,
35
+ text,
29
36
  validateGadgetSchema
30
- } from "./chunk-6ZDUWO6N.js";
37
+ } from "./chunk-YHS2DYXP.js";
31
38
 
32
39
  // src/cli/constants.ts
33
40
  var CLI_NAME = "llmist";
@@ -38,7 +45,8 @@ var COMMANDS = {
38
45
  models: "models",
39
46
  gadget: "gadget",
40
47
  image: "image",
41
- speech: "speech"
48
+ speech: "speech",
49
+ vision: "vision"
42
50
  };
43
51
  var LOG_LEVELS = ["silly", "trace", "debug", "info", "warn", "error", "fatal"];
44
52
  var DEFAULT_MODEL = "openai:gpt-5-nano";
@@ -60,6 +68,9 @@ var OPTION_FLAGS = {
60
68
  dockerRo: "--docker-ro",
61
69
  noDocker: "--no-docker",
62
70
  dockerDev: "--docker-dev",
71
+ // Multimodal input options
72
+ inputImage: "--image <path>",
73
+ inputAudio: "--audio <path>",
63
74
  // Image generation options
64
75
  imageSize: "--size <size>",
65
76
  imageQuality: "--quality <quality>",
@@ -85,6 +96,9 @@ var OPTION_DESCRIPTIONS = {
85
96
  noBuiltins: "Disable built-in gadgets (AskUser, TellUser).",
86
97
  noBuiltinInteraction: "Disable interactive gadgets (AskUser) while keeping TellUser.",
87
98
  quiet: "Suppress all output except content (text and TellUser messages).",
99
+ // Multimodal input descriptions
100
+ inputImage: "Image file to include with the prompt (vision models).",
101
+ inputAudio: "Audio file to include with the prompt (Gemini only).",
88
102
  docker: "Run agent in a Docker sandbox container for security isolation.",
89
103
  dockerRo: "Run in Docker with current directory mounted read-only.",
90
104
  noDocker: "Disable Docker sandboxing (override config).",
@@ -108,7 +122,7 @@ import { Command, InvalidArgumentError as InvalidArgumentError2 } from "commande
108
122
  // package.json
109
123
  var package_default = {
110
124
  name: "llmist",
111
- version: "2.4.0",
125
+ version: "2.5.0",
112
126
  description: "TypeScript LLM client with streaming tool execution. Tools fire mid-stream. Built-in function calling works with any model\u2014no structured outputs or native tool support required.",
113
127
  type: "module",
114
128
  main: "dist/index.cjs",
@@ -248,6 +262,7 @@ function isAbortError(error) {
248
262
  }
249
263
 
250
264
  // src/cli/agent-command.ts
265
+ init_input_content();
251
266
  init_registry();
252
267
  init_constants();
253
268
 
@@ -572,6 +587,75 @@ var finish = createGadget({
572
587
  });
573
588
  var builtinGadgets = [askUser, tellUser, finish];
574
589
 
590
+ // src/cli/file-utils.ts
591
+ init_input_content();
592
+ import { readFile, stat } from "node:fs/promises";
593
+ import { resolve as resolve2 } from "node:path";
594
+ var DEFAULT_MAX_FILE_SIZE = 50 * 1024 * 1024;
595
+ function formatFileSize(bytes) {
596
+ if (bytes < 1024) return `${bytes} bytes`;
597
+ if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`;
598
+ if (bytes < 1024 * 1024 * 1024) return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
599
+ return `${(bytes / (1024 * 1024 * 1024)).toFixed(1)} GB`;
600
+ }
601
+ async function checkFileSize(absolutePath, filePath, maxSize) {
602
+ const stats = await stat(absolutePath);
603
+ if (stats.size > maxSize) {
604
+ throw new Error(
605
+ `File "${filePath}" is too large (${formatFileSize(stats.size)}). Maximum allowed size is ${formatFileSize(maxSize)}. Consider compressing the file or using a smaller version.`
606
+ );
607
+ }
608
+ }
609
+ async function readImageFile(filePath, options = {}) {
610
+ const absolutePath = resolve2(filePath);
611
+ const maxFileSize = options.maxFileSize ?? DEFAULT_MAX_FILE_SIZE;
612
+ let buffer;
613
+ try {
614
+ await checkFileSize(absolutePath, filePath, maxFileSize);
615
+ buffer = await readFile(absolutePath);
616
+ } catch (error) {
617
+ const message = error instanceof Error ? error.message : String(error);
618
+ throw new Error(`Failed to read image file "${filePath}": ${message}`);
619
+ }
620
+ const mimeType = detectImageMimeType(buffer);
621
+ if (!mimeType) {
622
+ throw new Error(
623
+ `File "${filePath}" is not a supported image format. Supported formats: JPEG, PNG, GIF, WebP`
624
+ );
625
+ }
626
+ return imageFromBuffer(buffer, mimeType);
627
+ }
628
+ async function readAudioFile(filePath, options = {}) {
629
+ const absolutePath = resolve2(filePath);
630
+ const maxFileSize = options.maxFileSize ?? DEFAULT_MAX_FILE_SIZE;
631
+ let buffer;
632
+ try {
633
+ await checkFileSize(absolutePath, filePath, maxFileSize);
634
+ buffer = await readFile(absolutePath);
635
+ } catch (error) {
636
+ const message = error instanceof Error ? error.message : String(error);
637
+ throw new Error(`Failed to read audio file "${filePath}": ${message}`);
638
+ }
639
+ const mimeType = detectAudioMimeType(buffer);
640
+ if (!mimeType) {
641
+ throw new Error(
642
+ `File "${filePath}" is not a supported audio format. Supported formats: MP3, WAV, OGG, WebM`
643
+ );
644
+ }
645
+ return audioFromBuffer(buffer, mimeType);
646
+ }
647
+ async function readFileBuffer(filePath, options = {}) {
648
+ const absolutePath = resolve2(filePath);
649
+ const maxFileSize = options.maxFileSize ?? DEFAULT_MAX_FILE_SIZE;
650
+ try {
651
+ await checkFileSize(absolutePath, filePath, maxFileSize);
652
+ return await readFile(absolutePath);
653
+ } catch (error) {
654
+ const message = error instanceof Error ? error.message : String(error);
655
+ throw new Error(`Failed to read file "${filePath}": ${message}`);
656
+ }
657
+ }
658
+
575
659
  // src/cli/gadgets.ts
576
660
  init_gadget();
577
661
  import fs5 from "node:fs";
@@ -735,7 +819,7 @@ ${formattedList}`;
735
819
  // src/cli/builtins/filesystem/read-file.ts
736
820
  import fs3 from "node:fs";
737
821
  import { z as z3 } from "zod";
738
- var readFile = createGadget({
822
+ var readFile2 = createGadget({
739
823
  name: "ReadFile",
740
824
  description: "Read the entire content of a file and return it as text. The file path must be within the current working directory or its subdirectories.",
741
825
  schema: z3.object({
@@ -1006,7 +1090,7 @@ error: ${message}`;
1006
1090
  // src/cli/builtins/index.ts
1007
1091
  var builtinGadgetRegistry = {
1008
1092
  ListDirectory: listDirectory,
1009
- ReadFile: readFile,
1093
+ ReadFile: readFile2,
1010
1094
  WriteFile: writeFile,
1011
1095
  EditFile: editFile,
1012
1096
  RunCommand: runCommand
@@ -1141,6 +1225,7 @@ async function loadGadgets(specifiers, cwd, importer = (specifier) => import(spe
1141
1225
  }
1142
1226
 
1143
1227
  // src/cli/llm-logging.ts
1228
+ init_messages();
1144
1229
  import { mkdir, writeFile as writeFile2 } from "node:fs/promises";
1145
1230
  import { homedir } from "node:os";
1146
1231
  import { join } from "node:path";
@@ -1158,7 +1243,7 @@ function formatLlmRequest(messages) {
1158
1243
  const lines = [];
1159
1244
  for (const msg of messages) {
1160
1245
  lines.push(`=== ${msg.role.toUpperCase()} ===`);
1161
- lines.push(msg.content ?? "");
1246
+ lines.push(msg.content ? extractText(msg.content) : "");
1162
1247
  lines.push("");
1163
1248
  }
1164
1249
  return lines.join("\n");
@@ -1232,9 +1317,9 @@ function ensureMarkedConfigured() {
1232
1317
  markedConfigured = true;
1233
1318
  }
1234
1319
  }
1235
- function renderMarkdown(text) {
1320
+ function renderMarkdown(text2) {
1236
1321
  ensureMarkedConfigured();
1237
- let rendered = marked.parse(text);
1322
+ let rendered = marked.parse(text2);
1238
1323
  rendered = rendered.replace(/\*\*(.+?)\*\*/g, (_, content) => chalk3.bold(content)).replace(/(?<!\*)\*(\S[^*]*)\*(?!\*)/g, (_, content) => chalk3.italic(content));
1239
1324
  return rendered.trimEnd();
1240
1325
  }
@@ -1248,8 +1333,8 @@ function createRainbowSeparator() {
1248
1333
  }
1249
1334
  return result;
1250
1335
  }
1251
- function renderMarkdownWithSeparators(text) {
1252
- const rendered = renderMarkdown(text);
1336
+ function renderMarkdownWithSeparators(text2) {
1337
+ const rendered = renderMarkdown(text2);
1253
1338
  const separator = createRainbowSeparator();
1254
1339
  return `
1255
1340
  ${separator}
@@ -1417,12 +1502,12 @@ var StreamPrinter = class {
1417
1502
  *
1418
1503
  * @param text - Text to write
1419
1504
  */
1420
- write(text) {
1421
- if (!text) {
1505
+ write(text2) {
1506
+ if (!text2) {
1422
1507
  return;
1423
1508
  }
1424
- this.target.write(text);
1425
- this.endedWithNewline = text.endsWith("\n");
1509
+ this.target.write(text2);
1510
+ this.endedWithNewline = text2.endsWith("\n");
1426
1511
  }
1427
1512
  /**
1428
1513
  * Ensures output ends with a newline by writing one if needed.
@@ -1901,7 +1986,7 @@ function addCompleteOptions(cmd, defaults) {
1901
1986
  OPTION_DESCRIPTIONS.maxTokens,
1902
1987
  createNumericParser({ label: "Max tokens", integer: true, min: 1 }),
1903
1988
  defaults?.["max-tokens"]
1904
- ).option(OPTION_FLAGS.quiet, OPTION_DESCRIPTIONS.quiet, defaults?.quiet).option(OPTION_FLAGS.logLlmRequests, OPTION_DESCRIPTIONS.logLlmRequests, defaults?.["log-llm-requests"]);
1989
+ ).option(OPTION_FLAGS.quiet, OPTION_DESCRIPTIONS.quiet, defaults?.quiet).option(OPTION_FLAGS.logLlmRequests, OPTION_DESCRIPTIONS.logLlmRequests, defaults?.["log-llm-requests"]).option(OPTION_FLAGS.inputImage, OPTION_DESCRIPTIONS.inputImage).option(OPTION_FLAGS.inputAudio, OPTION_DESCRIPTIONS.inputAudio);
1905
1990
  }
1906
1991
  function addAgentOptions(cmd, defaults) {
1907
1992
  const gadgetAccumulator = (value, previous = []) => [
@@ -1925,7 +2010,7 @@ function addAgentOptions(cmd, defaults) {
1925
2010
  OPTION_FLAGS.noBuiltinInteraction,
1926
2011
  OPTION_DESCRIPTIONS.noBuiltinInteraction,
1927
2012
  defaults?.["builtin-interaction"] !== false
1928
- ).option(OPTION_FLAGS.quiet, OPTION_DESCRIPTIONS.quiet, defaults?.quiet).option(OPTION_FLAGS.logLlmRequests, OPTION_DESCRIPTIONS.logLlmRequests, defaults?.["log-llm-requests"]).option(OPTION_FLAGS.docker, OPTION_DESCRIPTIONS.docker).option(OPTION_FLAGS.dockerRo, OPTION_DESCRIPTIONS.dockerRo).option(OPTION_FLAGS.noDocker, OPTION_DESCRIPTIONS.noDocker).option(OPTION_FLAGS.dockerDev, OPTION_DESCRIPTIONS.dockerDev);
2013
+ ).option(OPTION_FLAGS.quiet, OPTION_DESCRIPTIONS.quiet, defaults?.quiet).option(OPTION_FLAGS.logLlmRequests, OPTION_DESCRIPTIONS.logLlmRequests, defaults?.["log-llm-requests"]).option(OPTION_FLAGS.inputImage, OPTION_DESCRIPTIONS.inputImage).option(OPTION_FLAGS.inputAudio, OPTION_DESCRIPTIONS.inputAudio).option(OPTION_FLAGS.docker, OPTION_DESCRIPTIONS.docker).option(OPTION_FLAGS.dockerRo, OPTION_DESCRIPTIONS.dockerRo).option(OPTION_FLAGS.noDocker, OPTION_DESCRIPTIONS.noDocker).option(OPTION_FLAGS.dockerDev, OPTION_DESCRIPTIONS.dockerDev);
1929
2014
  }
1930
2015
  function configToCompleteOptions(config) {
1931
2016
  const result = {};
@@ -3639,8 +3724,8 @@ Denied: ${result.reason ?? "by user"}`
3639
3724
  builder.withTextOnlyHandler("acknowledge");
3640
3725
  builder.withTextWithGadgetsHandler({
3641
3726
  gadgetName: "TellUser",
3642
- parameterMapping: (text) => ({ message: text, done: false, type: "info" }),
3643
- resultMapping: (text) => `\u2139\uFE0F ${text}`
3727
+ parameterMapping: (text2) => ({ message: text2, done: false, type: "info" }),
3728
+ resultMapping: (text2) => `\u2139\uFE0F ${text2}`
3644
3729
  });
3645
3730
  builder.withTrailingMessage(
3646
3731
  (ctx) => [
@@ -3649,7 +3734,19 @@ Denied: ${result.reason ?? "by user"}`
3649
3734
  "Maximize efficiency by batching independent operations in a single response."
3650
3735
  ].join(" ")
3651
3736
  );
3652
- const agent = builder.ask(prompt);
3737
+ let agent;
3738
+ if (options.image || options.audio) {
3739
+ const parts = [text(prompt)];
3740
+ if (options.image) {
3741
+ parts.push(await readImageFile(options.image));
3742
+ }
3743
+ if (options.audio) {
3744
+ parts.push(await readAudioFile(options.audio));
3745
+ }
3746
+ agent = builder.askWithContent(parts);
3747
+ } else {
3748
+ agent = builder.ask(prompt);
3749
+ }
3653
3750
  let textBuffer = "";
3654
3751
  const flushTextBuffer = () => {
3655
3752
  if (textBuffer) {
@@ -3724,6 +3821,7 @@ function registerAgentCommand(program, env, config) {
3724
3821
  }
3725
3822
 
3726
3823
  // src/cli/complete-command.ts
3824
+ init_input_content();
3727
3825
  init_messages();
3728
3826
  init_model_shortcuts();
3729
3827
  init_constants();
@@ -3735,7 +3833,18 @@ async function executeComplete(promptArg, options, env) {
3735
3833
  if (options.system) {
3736
3834
  builder.addSystem(options.system);
3737
3835
  }
3738
- builder.addUser(prompt);
3836
+ if (options.image || options.audio) {
3837
+ const parts = [text(prompt)];
3838
+ if (options.image) {
3839
+ parts.push(await readImageFile(options.image));
3840
+ }
3841
+ if (options.audio) {
3842
+ parts.push(await readAudioFile(options.audio));
3843
+ }
3844
+ builder.addUserMultimodal(parts);
3845
+ } else {
3846
+ builder.addUser(prompt);
3847
+ }
3739
3848
  const messages = builder.build();
3740
3849
  const llmLogsBaseDir = resolveLogDir(options.logLlmRequests, "requests");
3741
3850
  let llmSessionDir;
@@ -4683,7 +4792,7 @@ import { writeFileSync as writeFileSync3 } from "node:fs";
4683
4792
  var DEFAULT_SPEECH_MODEL = "tts-1";
4684
4793
  var DEFAULT_VOICE = "nova";
4685
4794
  async function executeSpeech(textArg, options, env) {
4686
- const text = await resolvePrompt(textArg, env);
4795
+ const text2 = await resolvePrompt(textArg, env);
4687
4796
  const client = env.createClient();
4688
4797
  const model = options.model;
4689
4798
  const voice = options.voice ?? DEFAULT_VOICE;
@@ -4695,7 +4804,7 @@ async function executeSpeech(textArg, options, env) {
4695
4804
  }
4696
4805
  const result = await client.speech.generate({
4697
4806
  model,
4698
- input: text,
4807
+ input: text2,
4699
4808
  voice,
4700
4809
  responseFormat: options.format,
4701
4810
  speed
@@ -4728,7 +4837,43 @@ function registerSpeechCommand(program, env, config) {
4728
4837
  OPTION_DESCRIPTIONS.model,
4729
4838
  config?.model ?? DEFAULT_SPEECH_MODEL
4730
4839
  ).option(OPTION_FLAGS.voice, OPTION_DESCRIPTIONS.voice, config?.voice ?? DEFAULT_VOICE).option(OPTION_FLAGS.speechFormat, OPTION_DESCRIPTIONS.speechFormat, config?.format).option(OPTION_FLAGS.speechSpeed, OPTION_DESCRIPTIONS.speechSpeed, config?.speed?.toString()).option(OPTION_FLAGS.speechOutput, OPTION_DESCRIPTIONS.speechOutput, config?.output).option(OPTION_FLAGS.quiet, OPTION_DESCRIPTIONS.quiet, config?.quiet ?? false).action(
4731
- (text, options) => executeAction(() => executeSpeech(text, options, env), env)
4840
+ (text2, options) => executeAction(() => executeSpeech(text2, options, env), env)
4841
+ );
4842
+ }
4843
+
4844
+ // src/cli/vision-command.ts
4845
+ init_model_shortcuts();
4846
+ async function executeVision(imagePath, options, env) {
4847
+ const client = env.createClient();
4848
+ const model = resolveModel(options.model);
4849
+ const imageBuffer = await readFileBuffer(imagePath);
4850
+ const prompt = options.prompt ?? "Describe this image in detail.";
4851
+ const stderrTTY = env.stderr.isTTY === true;
4852
+ if (!options.quiet && stderrTTY) {
4853
+ env.stderr.write(`${SUMMARY_PREFIX} Analyzing image with ${model}...
4854
+ `);
4855
+ }
4856
+ const result = await client.vision.analyze({
4857
+ model,
4858
+ image: imageBuffer,
4859
+ prompt,
4860
+ maxTokens: options.maxTokens
4861
+ });
4862
+ env.stdout.write(result);
4863
+ env.stdout.write("\n");
4864
+ }
4865
+ function registerVisionCommand(program, env) {
4866
+ program.command(COMMANDS.vision ?? "vision").description("Analyze an image using vision-capable models").argument("<image>", "Path to image file to analyze").option(
4867
+ OPTION_FLAGS.model,
4868
+ OPTION_DESCRIPTIONS.model,
4869
+ "gpt-4o"
4870
+ // Default to a vision-capable model
4871
+ ).option("-p, --prompt <prompt>", "Analysis prompt describing what to extract or describe").option(
4872
+ OPTION_FLAGS.maxTokens,
4873
+ OPTION_DESCRIPTIONS.maxTokens,
4874
+ createNumericParser({ label: "Max tokens", integer: true, min: 1 })
4875
+ ).option(OPTION_FLAGS.quiet, OPTION_DESCRIPTIONS.quiet).action(
4876
+ (imagePath, options) => executeAction(() => executeVision(imagePath, options, env), env)
4732
4877
  );
4733
4878
  }
4734
4879
 
@@ -4777,7 +4922,7 @@ function createLoggerFactory(config) {
4777
4922
  }
4778
4923
  function createPromptFunction(stdin, stdout) {
4779
4924
  return (question) => {
4780
- return new Promise((resolve2) => {
4925
+ return new Promise((resolve3) => {
4781
4926
  const rl = readline.createInterface({
4782
4927
  input: stdin,
4783
4928
  output: stdout
@@ -4792,7 +4937,7 @@ function createPromptFunction(stdin, stdout) {
4792
4937
  `);
4793
4938
  rl.question(chalk9.green.bold("You: "), (answer) => {
4794
4939
  rl.close();
4795
- resolve2(answer);
4940
+ resolve3(answer);
4796
4941
  });
4797
4942
  });
4798
4943
  };
@@ -4885,6 +5030,7 @@ function createProgram(env, config) {
4885
5030
  registerAgentCommand(program, env, config?.agent);
4886
5031
  registerImageCommand(program, env, config?.image);
4887
5032
  registerSpeechCommand(program, env, config?.speech);
5033
+ registerVisionCommand(program, env);
4888
5034
  registerModelsCommand(program, env);
4889
5035
  registerGadgetCommand(program, env);
4890
5036
  if (config) {