llmist 2.4.0 → 2.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.cjs CHANGED
@@ -46,6 +46,137 @@ var init_constants = __esm({
46
46
  }
47
47
  });
48
48
 
49
+ // src/core/input-content.ts
50
+ function text(content) {
51
+ return { type: "text", text: content };
52
+ }
53
+ function imageFromUrl(url) {
54
+ return {
55
+ type: "image",
56
+ source: { type: "url", url }
57
+ };
58
+ }
59
+ function detectImageMimeType(data) {
60
+ const bytes = data instanceof Buffer ? data : Buffer.from(data);
61
+ for (const { bytes: magic, mimeType } of IMAGE_MAGIC_BYTES) {
62
+ if (bytes.length >= magic.length) {
63
+ let matches = true;
64
+ for (let i = 0; i < magic.length; i++) {
65
+ if (bytes[i] !== magic[i]) {
66
+ matches = false;
67
+ break;
68
+ }
69
+ }
70
+ if (matches) {
71
+ if (mimeType === "image/webp") {
72
+ if (bytes.length >= 12) {
73
+ const webpMarker = bytes[8] === 87 && bytes[9] === 69 && bytes[10] === 66 && bytes[11] === 80;
74
+ if (!webpMarker) continue;
75
+ }
76
+ }
77
+ return mimeType;
78
+ }
79
+ }
80
+ }
81
+ return null;
82
+ }
83
+ function detectAudioMimeType(data) {
84
+ const bytes = data instanceof Buffer ? data : Buffer.from(data);
85
+ for (const { bytes: magic, mimeType } of AUDIO_MAGIC_BYTES) {
86
+ if (bytes.length >= magic.length) {
87
+ let matches = true;
88
+ for (let i = 0; i < magic.length; i++) {
89
+ if (bytes[i] !== magic[i]) {
90
+ matches = false;
91
+ break;
92
+ }
93
+ }
94
+ if (matches) {
95
+ if (mimeType === "audio/wav") {
96
+ if (bytes.length >= 12) {
97
+ const waveMarker = bytes[8] === 87 && bytes[9] === 65 && bytes[10] === 86 && bytes[11] === 69;
98
+ if (!waveMarker) continue;
99
+ }
100
+ }
101
+ return mimeType;
102
+ }
103
+ }
104
+ }
105
+ return null;
106
+ }
107
+ function toBase64(data) {
108
+ if (typeof data === "string") {
109
+ return data;
110
+ }
111
+ return Buffer.from(data).toString("base64");
112
+ }
113
+ function imageFromBuffer(buffer, mediaType) {
114
+ const detectedType = mediaType ?? detectImageMimeType(buffer);
115
+ if (!detectedType) {
116
+ throw new Error(
117
+ "Could not detect image MIME type. Please provide the mediaType parameter explicitly."
118
+ );
119
+ }
120
+ return {
121
+ type: "image",
122
+ source: {
123
+ type: "base64",
124
+ mediaType: detectedType,
125
+ data: toBase64(buffer)
126
+ }
127
+ };
128
+ }
129
+ function audioFromBuffer(buffer, mediaType) {
130
+ const detectedType = mediaType ?? detectAudioMimeType(buffer);
131
+ if (!detectedType) {
132
+ throw new Error(
133
+ "Could not detect audio MIME type. Please provide the mediaType parameter explicitly."
134
+ );
135
+ }
136
+ return {
137
+ type: "audio",
138
+ source: {
139
+ type: "base64",
140
+ mediaType: detectedType,
141
+ data: toBase64(buffer)
142
+ }
143
+ };
144
+ }
145
+ function isDataUrl(input) {
146
+ return input.startsWith("data:");
147
+ }
148
+ function parseDataUrl(url) {
149
+ const match = url.match(/^data:([^;]+);base64,(.+)$/);
150
+ if (!match) return null;
151
+ return { mimeType: match[1], data: match[2] };
152
+ }
153
+ var IMAGE_MAGIC_BYTES, AUDIO_MAGIC_BYTES;
154
+ var init_input_content = __esm({
155
+ "src/core/input-content.ts"() {
156
+ "use strict";
157
+ IMAGE_MAGIC_BYTES = [
158
+ { bytes: [255, 216, 255], mimeType: "image/jpeg" },
159
+ { bytes: [137, 80, 78, 71], mimeType: "image/png" },
160
+ { bytes: [71, 73, 70, 56], mimeType: "image/gif" },
161
+ // WebP starts with RIFF....WEBP
162
+ { bytes: [82, 73, 70, 70], mimeType: "image/webp" }
163
+ ];
164
+ AUDIO_MAGIC_BYTES = [
165
+ // MP3 frame sync
166
+ { bytes: [255, 251], mimeType: "audio/mp3" },
167
+ { bytes: [255, 250], mimeType: "audio/mp3" },
168
+ // ID3 tag (MP3)
169
+ { bytes: [73, 68, 51], mimeType: "audio/mp3" },
170
+ // OGG
171
+ { bytes: [79, 103, 103, 83], mimeType: "audio/ogg" },
172
+ // WAV (RIFF)
173
+ { bytes: [82, 73, 70, 70], mimeType: "audio/wav" },
174
+ // WebM
175
+ { bytes: [26, 69, 223, 163], mimeType: "audio/webm" }
176
+ ];
177
+ }
178
+ });
179
+
49
180
  // src/core/model-shortcuts.ts
50
181
  function isKnownModelPattern(model) {
51
182
  const normalized = model.toLowerCase();
@@ -375,7 +506,9 @@ var init_prompt_config = __esm({
375
506
  rules: () => [
376
507
  "Output ONLY plain text with the exact markers - never use function/tool calling",
377
508
  "You can invoke multiple gadgets in a single response",
378
- "For dependent gadgets, invoke the first one and wait for the result"
509
+ "Gadgets without dependencies execute immediately (in parallel if multiple)",
510
+ "Use :invocation_id:dep1,dep2 syntax when a gadget needs results from prior gadgets",
511
+ "If any dependency fails, dependent gadgets are automatically skipped"
379
512
  ],
380
513
  customExamples: null
381
514
  };
@@ -383,11 +516,24 @@ var init_prompt_config = __esm({
383
516
  });
384
517
 
385
518
  // src/core/messages.ts
519
+ function normalizeContent(content) {
520
+ if (typeof content === "string") {
521
+ return [{ type: "text", text: content }];
522
+ }
523
+ return content;
524
+ }
525
+ function extractText(content) {
526
+ if (typeof content === "string") {
527
+ return content;
528
+ }
529
+ return content.filter((part) => part.type === "text").map((part) => part.text).join("");
530
+ }
386
531
  var LLMMessageBuilder;
387
532
  var init_messages = __esm({
388
533
  "src/core/messages.ts"() {
389
534
  "use strict";
390
535
  init_constants();
536
+ init_input_content();
391
537
  init_prompt_config();
392
538
  LLMMessageBuilder = class {
393
539
  messages = [];
@@ -489,6 +635,10 @@ CRITICAL: ${criticalUsage}
489
635
  parts.push(`
490
636
  1. Start marker: ${this.startPrefix}gadget_name`);
491
637
  parts.push(`
638
+ With ID: ${this.startPrefix}gadget_name:my_id`);
639
+ parts.push(`
640
+ With dependencies: ${this.startPrefix}gadget_name:my_id:dep1,dep2`);
641
+ parts.push(`
492
642
  2. ${formatDescription}`);
493
643
  parts.push(`
494
644
  3. End marker: ${this.endPrefix}`);
@@ -538,6 +688,25 @@ ${this.endPrefix}`;
538
688
  EXAMPLE (Multiple Gadgets):
539
689
 
540
690
  ${multipleExample}`);
691
+ const dependencyExample = `${this.startPrefix}fetch_data:fetch_1
692
+ ${this.argPrefix}url
693
+ https://api.example.com/users
694
+ ${this.endPrefix}
695
+ ${this.startPrefix}fetch_data:fetch_2
696
+ ${this.argPrefix}url
697
+ https://api.example.com/orders
698
+ ${this.endPrefix}
699
+ ${this.startPrefix}merge_data:merge_1:fetch_1,fetch_2
700
+ ${this.argPrefix}format
701
+ json
702
+ ${this.endPrefix}`;
703
+ parts.push(`
704
+
705
+ EXAMPLE (With Dependencies):
706
+ merge_1 waits for fetch_1 AND fetch_2 to complete.
707
+ If either fails, merge_1 is automatically skipped.
708
+
709
+ ${dependencyExample}`);
541
710
  parts.push(`
542
711
 
543
712
  BLOCK FORMAT SYNTAX:
@@ -588,6 +757,25 @@ Produces: { "items": ["first", "second"] }`);
588
757
  }
589
758
  return parts.join("");
590
759
  }
760
+ /**
761
+ * Add a user message.
762
+ * Content can be a string (text only) or an array of content parts (multimodal).
763
+ *
764
+ * @param content - Message content
765
+ * @param metadata - Optional metadata
766
+ *
767
+ * @example
768
+ * ```typescript
769
+ * // Text only
770
+ * builder.addUser("Hello!");
771
+ *
772
+ * // Multimodal
773
+ * builder.addUser([
774
+ * text("What's in this image?"),
775
+ * imageFromBuffer(imageData),
776
+ * ]);
777
+ * ```
778
+ */
591
779
  addUser(content, metadata) {
592
780
  this.messages.push({ role: "user", content, metadata });
593
781
  return this;
@@ -596,6 +784,104 @@ Produces: { "items": ["first", "second"] }`);
596
784
  this.messages.push({ role: "assistant", content, metadata });
597
785
  return this;
598
786
  }
787
+ /**
788
+ * Add a user message with an image attachment.
789
+ *
790
+ * @param textContent - Text prompt
791
+ * @param imageData - Image data (Buffer, Uint8Array, or base64 string)
792
+ * @param mimeType - Optional MIME type (auto-detected if not provided)
793
+ *
794
+ * @example
795
+ * ```typescript
796
+ * builder.addUserWithImage(
797
+ * "What's in this image?",
798
+ * await fs.readFile("photo.jpg"),
799
+ * "image/jpeg" // Optional - auto-detected
800
+ * );
801
+ * ```
802
+ */
803
+ addUserWithImage(textContent, imageData, mimeType) {
804
+ const imageBuffer = typeof imageData === "string" ? Buffer.from(imageData, "base64") : imageData;
805
+ const detectedMime = mimeType ?? detectImageMimeType(imageBuffer);
806
+ if (!detectedMime) {
807
+ throw new Error(
808
+ "Could not detect image MIME type. Please provide the mimeType parameter explicitly."
809
+ );
810
+ }
811
+ const content = [
812
+ text(textContent),
813
+ {
814
+ type: "image",
815
+ source: {
816
+ type: "base64",
817
+ mediaType: detectedMime,
818
+ data: toBase64(imageBuffer)
819
+ }
820
+ }
821
+ ];
822
+ this.messages.push({ role: "user", content });
823
+ return this;
824
+ }
825
+ /**
826
+ * Add a user message with an image URL (OpenAI only).
827
+ *
828
+ * @param textContent - Text prompt
829
+ * @param imageUrl - URL to the image
830
+ *
831
+ * @example
832
+ * ```typescript
833
+ * builder.addUserWithImageUrl(
834
+ * "What's in this image?",
835
+ * "https://example.com/image.jpg"
836
+ * );
837
+ * ```
838
+ */
839
+ addUserWithImageUrl(textContent, imageUrl) {
840
+ const content = [text(textContent), imageFromUrl(imageUrl)];
841
+ this.messages.push({ role: "user", content });
842
+ return this;
843
+ }
844
+ /**
845
+ * Add a user message with an audio attachment (Gemini only).
846
+ *
847
+ * @param textContent - Text prompt
848
+ * @param audioData - Audio data (Buffer, Uint8Array, or base64 string)
849
+ * @param mimeType - Optional MIME type (auto-detected if not provided)
850
+ *
851
+ * @example
852
+ * ```typescript
853
+ * builder.addUserWithAudio(
854
+ * "Transcribe this audio",
855
+ * await fs.readFile("recording.mp3"),
856
+ * "audio/mp3" // Optional - auto-detected
857
+ * );
858
+ * ```
859
+ */
860
+ addUserWithAudio(textContent, audioData, mimeType) {
861
+ const audioBuffer = typeof audioData === "string" ? Buffer.from(audioData, "base64") : audioData;
862
+ const content = [text(textContent), audioFromBuffer(audioBuffer, mimeType)];
863
+ this.messages.push({ role: "user", content });
864
+ return this;
865
+ }
866
+ /**
867
+ * Add a user message with multiple content parts.
868
+ * Provides full flexibility for complex multimodal messages.
869
+ *
870
+ * @param parts - Array of content parts
871
+ *
872
+ * @example
873
+ * ```typescript
874
+ * builder.addUserMultimodal([
875
+ * text("Compare these images:"),
876
+ * imageFromBuffer(image1),
877
+ * imageFromBuffer(image2),
878
+ * ]);
879
+ * ```
880
+ */
881
+ addUserMultimodal(parts) {
882
+ this.messages.push({ role: "user", content: parts });
883
+ return this;
884
+ }
599
885
  addGadgetCall(gadget, parameters, result) {
600
886
  const paramStr = this.formatBlockParameters(parameters, "");
601
887
  this.messages.push({
@@ -1914,7 +2200,7 @@ var init_conversation_manager = __esm({
1914
2200
  if (msg.role === "user") {
1915
2201
  this.historyBuilder.addUser(msg.content);
1916
2202
  } else if (msg.role === "assistant") {
1917
- this.historyBuilder.addAssistant(msg.content);
2203
+ this.historyBuilder.addAssistant(extractText(msg.content));
1918
2204
  }
1919
2205
  }
1920
2206
  }
@@ -1935,8 +2221,10 @@ async function runWithHandlers(agentGenerator, handlers) {
1935
2221
  if (handlers.onGadgetCall) {
1936
2222
  await handlers.onGadgetCall({
1937
2223
  gadgetName: event.call.gadgetName,
2224
+ invocationId: event.call.invocationId,
1938
2225
  parameters: event.call.parameters,
1939
- parametersRaw: event.call.parametersRaw
2226
+ parametersRaw: event.call.parametersRaw,
2227
+ dependencies: event.call.dependencies
1940
2228
  });
1941
2229
  }
1942
2230
  break;
@@ -2783,15 +3071,37 @@ var init_parser = __esm({
2783
3071
  return segment.trim().length > 0 ? segment : void 0;
2784
3072
  }
2785
3073
  /**
2786
- * Parse gadget name, handling both old format (name:invocationId) and new format (just name).
2787
- * For new format, generates a unique invocation ID.
3074
+ * Parse gadget name with optional invocation ID and dependencies.
3075
+ *
3076
+ * Supported formats:
3077
+ * - `GadgetName` - Auto-generate ID, no dependencies
3078
+ * - `GadgetName:my_id` - Explicit ID, no dependencies
3079
+ * - `GadgetName:my_id:dep1,dep2` - Explicit ID with dependencies
3080
+ *
3081
+ * Dependencies must be comma-separated invocation IDs.
2788
3082
  */
2789
3083
  parseGadgetName(gadgetName) {
2790
- if (gadgetName.includes(":")) {
2791
- const parts = gadgetName.split(":");
2792
- return { actualName: parts[0], invocationId: parts[1] };
3084
+ const parts = gadgetName.split(":");
3085
+ if (parts.length === 1) {
3086
+ return {
3087
+ actualName: parts[0],
3088
+ invocationId: `gadget_${++globalInvocationCounter}`,
3089
+ dependencies: []
3090
+ };
3091
+ } else if (parts.length === 2) {
3092
+ return {
3093
+ actualName: parts[0],
3094
+ invocationId: parts[1].trim(),
3095
+ dependencies: []
3096
+ };
3097
+ } else {
3098
+ const deps = parts[2].split(",").map((d) => d.trim()).filter((d) => d.length > 0);
3099
+ return {
3100
+ actualName: parts[0],
3101
+ invocationId: parts[1].trim(),
3102
+ dependencies: deps
3103
+ };
2793
3104
  }
2794
- return { actualName: gadgetName, invocationId: `gadget_${++globalInvocationCounter}` };
2795
3105
  }
2796
3106
  /**
2797
3107
  * Extract the error message from a parse error.
@@ -2827,39 +3137,20 @@ var init_parser = __esm({
2827
3137
  const metadataEndIndex = this.buffer.indexOf("\n", metadataStartIndex);
2828
3138
  if (metadataEndIndex === -1) break;
2829
3139
  const gadgetName = this.buffer.substring(metadataStartIndex, metadataEndIndex).trim();
2830
- const { actualName: actualGadgetName, invocationId } = this.parseGadgetName(gadgetName);
3140
+ const { actualName: actualGadgetName, invocationId, dependencies } = this.parseGadgetName(gadgetName);
2831
3141
  const contentStartIndex = metadataEndIndex + 1;
2832
3142
  let partEndIndex;
2833
3143
  let endMarkerLength = 0;
2834
- if (gadgetName.includes(":")) {
2835
- const oldEndMarker = `${this.endPrefix + actualGadgetName}:${invocationId}`;
2836
- partEndIndex = this.buffer.indexOf(oldEndMarker, contentStartIndex);
2837
- if (partEndIndex === -1) break;
2838
- endMarkerLength = oldEndMarker.length;
3144
+ const nextStartPos = this.buffer.indexOf(this.startPrefix, contentStartIndex);
3145
+ const endPos = this.buffer.indexOf(this.endPrefix, contentStartIndex);
3146
+ if (nextStartPos !== -1 && (endPos === -1 || nextStartPos < endPos)) {
3147
+ partEndIndex = nextStartPos;
3148
+ endMarkerLength = 0;
3149
+ } else if (endPos !== -1) {
3150
+ partEndIndex = endPos;
3151
+ endMarkerLength = this.endPrefix.length;
2839
3152
  } else {
2840
- const nextStartPos = this.buffer.indexOf(this.startPrefix, contentStartIndex);
2841
- let validEndPos = -1;
2842
- let searchPos = contentStartIndex;
2843
- while (true) {
2844
- const endPos = this.buffer.indexOf(this.endPrefix, searchPos);
2845
- if (endPos === -1) break;
2846
- const afterEnd = this.buffer.substring(endPos + this.endPrefix.length);
2847
- if (afterEnd.startsWith("\n") || afterEnd.startsWith("\r") || afterEnd.startsWith(this.startPrefix) || afterEnd.length === 0) {
2848
- validEndPos = endPos;
2849
- break;
2850
- } else {
2851
- searchPos = endPos + this.endPrefix.length;
2852
- }
2853
- }
2854
- if (nextStartPos !== -1 && (validEndPos === -1 || nextStartPos < validEndPos)) {
2855
- partEndIndex = nextStartPos;
2856
- endMarkerLength = 0;
2857
- } else if (validEndPos !== -1) {
2858
- partEndIndex = validEndPos;
2859
- endMarkerLength = this.endPrefix.length;
2860
- } else {
2861
- break;
2862
- }
3153
+ break;
2863
3154
  }
2864
3155
  const parametersRaw = this.buffer.substring(contentStartIndex, partEndIndex).trim();
2865
3156
  const { parameters, parseError } = this.parseParameters(parametersRaw);
@@ -2870,7 +3161,8 @@ var init_parser = __esm({
2870
3161
  invocationId,
2871
3162
  parametersRaw,
2872
3163
  parameters,
2873
- parseError
3164
+ parseError,
3165
+ dependencies
2874
3166
  }
2875
3167
  };
2876
3168
  startIndex = partEndIndex + endMarkerLength;
@@ -2893,7 +3185,7 @@ var init_parser = __esm({
2893
3185
  const metadataEndIndex = this.buffer.indexOf("\n", metadataStartIndex);
2894
3186
  if (metadataEndIndex !== -1) {
2895
3187
  const gadgetName = this.buffer.substring(metadataStartIndex, metadataEndIndex).trim();
2896
- const { actualName: actualGadgetName, invocationId } = this.parseGadgetName(gadgetName);
3188
+ const { actualName: actualGadgetName, invocationId, dependencies } = this.parseGadgetName(gadgetName);
2897
3189
  const contentStartIndex = metadataEndIndex + 1;
2898
3190
  const parametersRaw = this.buffer.substring(contentStartIndex).trim();
2899
3191
  const { parameters, parseError } = this.parseParameters(parametersRaw);
@@ -2904,7 +3196,8 @@ var init_parser = __esm({
2904
3196
  invocationId,
2905
3197
  parametersRaw,
2906
3198
  parameters,
2907
- parseError
3199
+ parseError,
3200
+ dependencies
2908
3201
  }
2909
3202
  };
2910
3203
  return;
@@ -3274,6 +3567,13 @@ var init_stream_processor = __esm({
3274
3567
  accumulatedText = "";
3275
3568
  shouldStopExecution = false;
3276
3569
  observerFailureCount = 0;
3570
+ // Dependency tracking for gadget execution DAG
3571
+ /** Gadgets waiting for their dependencies to complete */
3572
+ pendingGadgets = /* @__PURE__ */ new Map();
3573
+ /** Completed gadget results, keyed by invocation ID */
3574
+ completedResults = /* @__PURE__ */ new Map();
3575
+ /** Invocation IDs of gadgets that have failed (error or skipped due to dependency) */
3576
+ failedInvocations = /* @__PURE__ */ new Set();
3277
3577
  constructor(options) {
3278
3578
  this.iteration = options.iteration;
3279
3579
  this.registry = options.registry;
@@ -3374,6 +3674,16 @@ var init_stream_processor = __esm({
3374
3674
  }
3375
3675
  }
3376
3676
  }
3677
+ const finalPendingEvents = await this.processPendingGadgets();
3678
+ outputs.push(...finalPendingEvents);
3679
+ if (finalPendingEvents.some((e) => e.type === "gadget_result")) {
3680
+ didExecuteGadgets = true;
3681
+ }
3682
+ for (const evt of finalPendingEvents) {
3683
+ if (evt.type === "gadget_result" && evt.result.breaksLoop) {
3684
+ shouldBreakLoop = true;
3685
+ }
3686
+ }
3377
3687
  }
3378
3688
  let finalMessage = this.accumulatedText;
3379
3689
  if (this.hooks.interceptors?.interceptAssistantMessage) {
@@ -3425,7 +3735,11 @@ var init_stream_processor = __esm({
3425
3735
  return [{ type: "text", content }];
3426
3736
  }
3427
3737
  /**
3428
- * Process a gadget call through the full lifecycle.
3738
+ * Process a gadget call through the full lifecycle, handling dependencies.
3739
+ *
3740
+ * Gadgets without dependencies (or with all dependencies satisfied) execute immediately.
3741
+ * Gadgets with unsatisfied dependencies are queued for later execution.
3742
+ * After each execution, pending gadgets are checked to see if they can now run.
3429
3743
  */
3430
3744
  async processGadgetCall(call) {
3431
3745
  if (this.shouldStopExecution) {
@@ -3436,6 +3750,53 @@ var init_stream_processor = __esm({
3436
3750
  }
3437
3751
  const events = [];
3438
3752
  events.push({ type: "gadget_call", call });
3753
+ if (call.dependencies.length > 0) {
3754
+ if (call.dependencies.includes(call.invocationId)) {
3755
+ this.logger.warn("Gadget has self-referential dependency (depends on itself)", {
3756
+ gadgetName: call.gadgetName,
3757
+ invocationId: call.invocationId
3758
+ });
3759
+ this.failedInvocations.add(call.invocationId);
3760
+ const skipEvent = {
3761
+ type: "gadget_skipped",
3762
+ gadgetName: call.gadgetName,
3763
+ invocationId: call.invocationId,
3764
+ parameters: call.parameters ?? {},
3765
+ failedDependency: call.invocationId,
3766
+ failedDependencyError: `Gadget "${call.invocationId}" cannot depend on itself (self-referential dependency)`
3767
+ };
3768
+ events.push(skipEvent);
3769
+ return events;
3770
+ }
3771
+ const failedDep = call.dependencies.find((dep) => this.failedInvocations.has(dep));
3772
+ if (failedDep) {
3773
+ const skipEvents = await this.handleFailedDependency(call, failedDep);
3774
+ events.push(...skipEvents);
3775
+ return events;
3776
+ }
3777
+ const unsatisfied = call.dependencies.filter((dep) => !this.completedResults.has(dep));
3778
+ if (unsatisfied.length > 0) {
3779
+ this.logger.debug("Queueing gadget for later - waiting on dependencies", {
3780
+ gadgetName: call.gadgetName,
3781
+ invocationId: call.invocationId,
3782
+ waitingOn: unsatisfied
3783
+ });
3784
+ this.pendingGadgets.set(call.invocationId, call);
3785
+ return events;
3786
+ }
3787
+ }
3788
+ const executeEvents = await this.executeGadgetWithHooks(call);
3789
+ events.push(...executeEvents);
3790
+ const triggeredEvents = await this.processPendingGadgets();
3791
+ events.push(...triggeredEvents);
3792
+ return events;
3793
+ }
3794
+ /**
3795
+ * Execute a gadget through the full hook lifecycle.
3796
+ * This is the core execution logic, extracted from processGadgetCall.
3797
+ */
3798
+ async executeGadgetWithHooks(call) {
3799
+ const events = [];
3439
3800
  if (call.parseError) {
3440
3801
  this.logger.warn("Gadget has parse error", {
3441
3802
  gadgetName: call.gadgetName,
@@ -3566,6 +3927,10 @@ var init_stream_processor = __esm({
3566
3927
  });
3567
3928
  }
3568
3929
  await this.runObserversInParallel(completeObservers);
3930
+ this.completedResults.set(result.invocationId, result);
3931
+ if (result.error) {
3932
+ this.failedInvocations.add(result.invocationId);
3933
+ }
3569
3934
  events.push({ type: "gadget_result", result });
3570
3935
  if (result.error) {
3571
3936
  const errorType = this.determineErrorType(call, result);
@@ -3581,6 +3946,162 @@ var init_stream_processor = __esm({
3581
3946
  }
3582
3947
  return events;
3583
3948
  }
3949
+ /**
3950
+ * Handle a gadget that cannot execute because a dependency failed.
3951
+ * Calls the onDependencySkipped controller to allow customization.
3952
+ */
3953
+ async handleFailedDependency(call, failedDep) {
3954
+ const events = [];
3955
+ const depResult = this.completedResults.get(failedDep);
3956
+ const depError = depResult?.error ?? "Dependency failed";
3957
+ let action = { action: "skip" };
3958
+ if (this.hooks.controllers?.onDependencySkipped) {
3959
+ const context = {
3960
+ iteration: this.iteration,
3961
+ gadgetName: call.gadgetName,
3962
+ invocationId: call.invocationId,
3963
+ parameters: call.parameters ?? {},
3964
+ failedDependency: failedDep,
3965
+ failedDependencyError: depError,
3966
+ logger: this.logger
3967
+ };
3968
+ action = await this.hooks.controllers.onDependencySkipped(context);
3969
+ }
3970
+ if (action.action === "skip") {
3971
+ this.failedInvocations.add(call.invocationId);
3972
+ const skipEvent = {
3973
+ type: "gadget_skipped",
3974
+ gadgetName: call.gadgetName,
3975
+ invocationId: call.invocationId,
3976
+ parameters: call.parameters ?? {},
3977
+ failedDependency: failedDep,
3978
+ failedDependencyError: depError
3979
+ };
3980
+ events.push(skipEvent);
3981
+ if (this.hooks.observers?.onGadgetSkipped) {
3982
+ const observeContext = {
3983
+ iteration: this.iteration,
3984
+ gadgetName: call.gadgetName,
3985
+ invocationId: call.invocationId,
3986
+ parameters: call.parameters ?? {},
3987
+ failedDependency: failedDep,
3988
+ failedDependencyError: depError,
3989
+ logger: this.logger
3990
+ };
3991
+ await this.safeObserve(() => this.hooks.observers.onGadgetSkipped(observeContext));
3992
+ }
3993
+ this.logger.info("Gadget skipped due to failed dependency", {
3994
+ gadgetName: call.gadgetName,
3995
+ invocationId: call.invocationId,
3996
+ failedDependency: failedDep
3997
+ });
3998
+ } else if (action.action === "execute_anyway") {
3999
+ this.logger.info("Executing gadget despite failed dependency (controller override)", {
4000
+ gadgetName: call.gadgetName,
4001
+ invocationId: call.invocationId,
4002
+ failedDependency: failedDep
4003
+ });
4004
+ const executeEvents = await this.executeGadgetWithHooks(call);
4005
+ events.push(...executeEvents);
4006
+ } else if (action.action === "use_fallback") {
4007
+ const fallbackResult = {
4008
+ gadgetName: call.gadgetName,
4009
+ invocationId: call.invocationId,
4010
+ parameters: call.parameters ?? {},
4011
+ result: action.fallbackResult,
4012
+ executionTimeMs: 0
4013
+ };
4014
+ this.completedResults.set(call.invocationId, fallbackResult);
4015
+ events.push({ type: "gadget_result", result: fallbackResult });
4016
+ this.logger.info("Using fallback result for gadget with failed dependency", {
4017
+ gadgetName: call.gadgetName,
4018
+ invocationId: call.invocationId,
4019
+ failedDependency: failedDep
4020
+ });
4021
+ }
4022
+ return events;
4023
+ }
4024
+ /**
4025
+ * Process pending gadgets whose dependencies are now satisfied.
4026
+ * Executes ready gadgets in parallel and continues until no more can be triggered.
4027
+ */
4028
+ async processPendingGadgets() {
4029
+ const events = [];
4030
+ let progress = true;
4031
+ while (progress && this.pendingGadgets.size > 0) {
4032
+ progress = false;
4033
+ const readyToExecute = [];
4034
+ const readyToSkip = [];
4035
+ for (const [invocationId, call] of this.pendingGadgets) {
4036
+ const failedDep = call.dependencies.find((dep) => this.failedInvocations.has(dep));
4037
+ if (failedDep) {
4038
+ readyToSkip.push({ call, failedDep });
4039
+ continue;
4040
+ }
4041
+ const allSatisfied = call.dependencies.every((dep) => this.completedResults.has(dep));
4042
+ if (allSatisfied) {
4043
+ readyToExecute.push(call);
4044
+ }
4045
+ }
4046
+ for (const { call, failedDep } of readyToSkip) {
4047
+ this.pendingGadgets.delete(call.invocationId);
4048
+ const skipEvents = await this.handleFailedDependency(call, failedDep);
4049
+ events.push(...skipEvents);
4050
+ progress = true;
4051
+ }
4052
+ if (readyToExecute.length > 0) {
4053
+ this.logger.debug("Executing ready gadgets in parallel", {
4054
+ count: readyToExecute.length,
4055
+ invocationIds: readyToExecute.map((c) => c.invocationId)
4056
+ });
4057
+ for (const call of readyToExecute) {
4058
+ this.pendingGadgets.delete(call.invocationId);
4059
+ }
4060
+ const executePromises = readyToExecute.map((call) => this.executeGadgetWithHooks(call));
4061
+ const results = await Promise.all(executePromises);
4062
+ for (const executeEvents of results) {
4063
+ events.push(...executeEvents);
4064
+ }
4065
+ progress = true;
4066
+ }
4067
+ }
4068
+ if (this.pendingGadgets.size > 0) {
4069
+ const pendingIds = new Set(this.pendingGadgets.keys());
4070
+ for (const [invocationId, call] of this.pendingGadgets) {
4071
+ const missingDeps = call.dependencies.filter((dep) => !this.completedResults.has(dep));
4072
+ const circularDeps = missingDeps.filter((dep) => pendingIds.has(dep));
4073
+ const trulyMissingDeps = missingDeps.filter((dep) => !pendingIds.has(dep));
4074
+ let errorMessage;
4075
+ let logLevel = "warn";
4076
+ if (circularDeps.length > 0 && trulyMissingDeps.length > 0) {
4077
+ errorMessage = `Dependencies unresolvable: circular=[${circularDeps.join(", ")}], missing=[${trulyMissingDeps.join(", ")}]`;
4078
+ logLevel = "error";
4079
+ } else if (circularDeps.length > 0) {
4080
+ errorMessage = `Circular dependency detected: "${invocationId}" depends on "${circularDeps[0]}" which also depends on "${invocationId}" (directly or indirectly)`;
4081
+ } else {
4082
+ errorMessage = `Dependency "${missingDeps[0]}" was never executed - check that the invocation ID exists and is spelled correctly`;
4083
+ }
4084
+ this.logger[logLevel]("Gadget has unresolvable dependencies", {
4085
+ gadgetName: call.gadgetName,
4086
+ invocationId,
4087
+ circularDependencies: circularDeps,
4088
+ missingDependencies: trulyMissingDeps
4089
+ });
4090
+ this.failedInvocations.add(invocationId);
4091
+ const skipEvent = {
4092
+ type: "gadget_skipped",
4093
+ gadgetName: call.gadgetName,
4094
+ invocationId,
4095
+ parameters: call.parameters ?? {},
4096
+ failedDependency: missingDeps[0],
4097
+ failedDependencyError: errorMessage
4098
+ };
4099
+ events.push(skipEvent);
4100
+ }
4101
+ this.pendingGadgets.clear();
4102
+ }
4103
+ return events;
4104
+ }
3584
4105
  /**
3585
4106
  * Safely execute an observer, catching and logging any errors.
3586
4107
  * Observers are non-critical, so errors are logged but don't crash the system.
@@ -4018,9 +4539,9 @@ var init_agent = __esm({
4018
4539
  if (msg.role === "user") {
4019
4540
  this.conversation.addUserMessage(msg.content);
4020
4541
  } else if (msg.role === "assistant") {
4021
- this.conversation.addAssistantMessage(msg.content);
4542
+ this.conversation.addAssistantMessage(extractText(msg.content));
4022
4543
  } else if (msg.role === "system") {
4023
- this.conversation.addUserMessage(`[System] ${msg.content}`);
4544
+ this.conversation.addUserMessage(`[System] ${extractText(msg.content)}`);
4024
4545
  }
4025
4546
  }
4026
4547
  }
@@ -4599,6 +5120,7 @@ var init_anthropic = __esm({
4599
5120
  "src/providers/anthropic.ts"() {
4600
5121
  "use strict";
4601
5122
  import_sdk = __toESM(require("@anthropic-ai/sdk"), 1);
5123
+ init_messages();
4602
5124
  init_anthropic_models();
4603
5125
  init_base_provider();
4604
5126
  init_constants2();
@@ -4637,7 +5159,7 @@ var init_anthropic = __esm({
4637
5159
  const systemMessages = messages.filter((message) => message.role === "system");
4638
5160
  const system = systemMessages.length > 0 ? systemMessages.map((m, index) => ({
4639
5161
  type: "text",
4640
- text: m.content,
5162
+ text: extractText(m.content),
4641
5163
  // Add cache_control to the LAST system message block
4642
5164
  ...index === systemMessages.length - 1 ? { cache_control: { type: "ephemeral" } } : {}
4643
5165
  })) : void 0;
@@ -4650,14 +5172,10 @@ var init_anthropic = __esm({
4650
5172
  );
4651
5173
  const conversation = nonSystemMessages.map((message, index) => ({
4652
5174
  role: message.role,
4653
- content: [
4654
- {
4655
- type: "text",
4656
- text: message.content,
4657
- // Add cache_control to the LAST user message
4658
- ...message.role === "user" && index === lastUserIndex ? { cache_control: { type: "ephemeral" } } : {}
4659
- }
4660
- ]
5175
+ content: this.convertToAnthropicContent(
5176
+ message.content,
5177
+ message.role === "user" && index === lastUserIndex
5178
+ )
4661
5179
  }));
4662
5180
  const defaultMaxTokens = spec?.maxOutputTokens ?? ANTHROPIC_DEFAULT_MAX_OUTPUT_TOKENS;
4663
5181
  const payload = {
@@ -4673,6 +5191,52 @@ var init_anthropic = __esm({
4673
5191
  };
4674
5192
  return payload;
4675
5193
  }
5194
+ /**
5195
+ * Convert llmist content to Anthropic's content block format.
5196
+ * Handles text, images (base64 only), and applies cache_control.
5197
+ */
5198
+ convertToAnthropicContent(content, addCacheControl) {
5199
+ const parts = normalizeContent(content);
5200
+ return parts.map((part, index) => {
5201
+ const isLastPart = index === parts.length - 1;
5202
+ const cacheControl = addCacheControl && isLastPart ? { cache_control: { type: "ephemeral" } } : {};
5203
+ if (part.type === "text") {
5204
+ return {
5205
+ type: "text",
5206
+ text: part.text,
5207
+ ...cacheControl
5208
+ };
5209
+ }
5210
+ if (part.type === "image") {
5211
+ return this.convertImagePart(part, cacheControl);
5212
+ }
5213
+ if (part.type === "audio") {
5214
+ throw new Error(
5215
+ "Anthropic does not support audio input. Use Google Gemini for audio processing."
5216
+ );
5217
+ }
5218
+ throw new Error(`Unsupported content type: ${part.type}`);
5219
+ });
5220
+ }
5221
+ /**
5222
+ * Convert an image content part to Anthropic's image block format.
5223
+ */
5224
+ convertImagePart(part, cacheControl) {
5225
+ if (part.source.type === "url") {
5226
+ throw new Error(
5227
+ "Anthropic does not support image URLs. Please provide base64-encoded image data instead."
5228
+ );
5229
+ }
5230
+ return {
5231
+ type: "image",
5232
+ source: {
5233
+ type: "base64",
5234
+ media_type: part.source.mediaType,
5235
+ data: part.source.data
5236
+ },
5237
+ ...cacheControl
5238
+ };
5239
+ }
4676
5240
  async executeStreamRequest(payload, signal) {
4677
5241
  const client = this.client;
4678
5242
  const stream2 = await client.messages.create(payload, signal ? { signal } : void 0);
@@ -4755,17 +5319,12 @@ var init_anthropic = __esm({
4755
5319
  async countTokens(messages, descriptor, _spec) {
4756
5320
  const client = this.client;
4757
5321
  const systemMessages = messages.filter((message) => message.role === "system");
4758
- const system = systemMessages.length > 0 ? systemMessages.map((m) => m.content).join("\n\n") : void 0;
5322
+ const system = systemMessages.length > 0 ? systemMessages.map((m) => extractText(m.content)).join("\n\n") : void 0;
4759
5323
  const conversation = messages.filter(
4760
5324
  (message) => message.role !== "system"
4761
5325
  ).map((message) => ({
4762
5326
  role: message.role,
4763
- content: [
4764
- {
4765
- type: "text",
4766
- text: message.content
4767
- }
4768
- ]
5327
+ content: this.convertToAnthropicContent(message.content, false)
4769
5328
  }));
4770
5329
  try {
4771
5330
  const response = await client.messages.countTokens({
@@ -4779,8 +5338,19 @@ var init_anthropic = __esm({
4779
5338
  `Token counting failed for ${descriptor.name}, using fallback estimation:`,
4780
5339
  error
4781
5340
  );
4782
- const totalChars = messages.reduce((sum, msg) => sum + (msg.content?.length ?? 0), 0);
4783
- return Math.ceil(totalChars / FALLBACK_CHARS_PER_TOKEN);
5341
+ let totalChars = 0;
5342
+ let imageCount = 0;
5343
+ for (const msg of messages) {
5344
+ const parts = normalizeContent(msg.content);
5345
+ for (const part of parts) {
5346
+ if (part.type === "text") {
5347
+ totalChars += part.text.length;
5348
+ } else if (part.type === "image") {
5349
+ imageCount++;
5350
+ }
5351
+ }
5352
+ }
5353
+ return Math.ceil(totalChars / FALLBACK_CHARS_PER_TOKEN) + imageCount * 1e3;
4784
5354
  }
4785
5355
  }
4786
5356
  };
@@ -5309,6 +5879,7 @@ var init_gemini = __esm({
5309
5879
  "src/providers/gemini.ts"() {
5310
5880
  "use strict";
5311
5881
  import_genai = require("@google/genai");
5882
+ init_messages();
5312
5883
  init_base_provider();
5313
5884
  init_constants2();
5314
5885
  init_gemini_image_models();
@@ -5478,7 +6049,7 @@ var init_gemini = __esm({
5478
6049
  };
5479
6050
  return {
5480
6051
  model: descriptor.name,
5481
- contents: this.convertContentsForNewSDK(contents),
6052
+ contents,
5482
6053
  config
5483
6054
  };
5484
6055
  }
@@ -5513,18 +6084,25 @@ var init_gemini = __esm({
5513
6084
  if (message.role === "system") {
5514
6085
  expandedMessages.push({
5515
6086
  role: "user",
5516
- content: message.content
6087
+ content: extractText(message.content)
5517
6088
  });
5518
6089
  expandedMessages.push({
5519
6090
  role: "assistant",
5520
6091
  content: "Understood."
5521
6092
  });
5522
6093
  } else {
5523
- expandedMessages.push(message);
6094
+ expandedMessages.push({
6095
+ role: message.role,
6096
+ content: message.content
6097
+ });
5524
6098
  }
5525
6099
  }
5526
6100
  return this.mergeConsecutiveMessages(expandedMessages);
5527
6101
  }
6102
+ /**
6103
+ * Merge consecutive messages with the same role (required by Gemini).
6104
+ * Handles multimodal content by converting to Gemini's part format.
6105
+ */
5528
6106
  mergeConsecutiveMessages(messages) {
5529
6107
  if (messages.length === 0) {
5530
6108
  return [];
@@ -5533,15 +6111,16 @@ var init_gemini = __esm({
5533
6111
  let currentGroup = null;
5534
6112
  for (const message of messages) {
5535
6113
  const geminiRole = GEMINI_ROLE_MAP[message.role];
6114
+ const geminiParts = this.convertToGeminiParts(message.content);
5536
6115
  if (currentGroup && currentGroup.role === geminiRole) {
5537
- currentGroup.parts.push({ text: message.content });
6116
+ currentGroup.parts.push(...geminiParts);
5538
6117
  } else {
5539
6118
  if (currentGroup) {
5540
6119
  result.push(currentGroup);
5541
6120
  }
5542
6121
  currentGroup = {
5543
6122
  role: geminiRole,
5544
- parts: [{ text: message.content }]
6123
+ parts: geminiParts
5545
6124
  };
5546
6125
  }
5547
6126
  }
@@ -5550,11 +6129,39 @@ var init_gemini = __esm({
5550
6129
  }
5551
6130
  return result;
5552
6131
  }
5553
- convertContentsForNewSDK(contents) {
5554
- return contents.map((content) => ({
5555
- role: content.role,
5556
- parts: content.parts.map((part) => ({ text: part.text }))
5557
- }));
6132
+ /**
6133
+ * Convert llmist content to Gemini's part format.
6134
+ * Handles text, images, and audio (Gemini supports all three).
6135
+ */
6136
+ convertToGeminiParts(content) {
6137
+ const parts = normalizeContent(content);
6138
+ return parts.map((part) => {
6139
+ if (part.type === "text") {
6140
+ return { text: part.text };
6141
+ }
6142
+ if (part.type === "image") {
6143
+ if (part.source.type === "url") {
6144
+ throw new Error(
6145
+ "Gemini does not support image URLs directly. Please provide base64-encoded image data."
6146
+ );
6147
+ }
6148
+ return {
6149
+ inlineData: {
6150
+ mimeType: part.source.mediaType,
6151
+ data: part.source.data
6152
+ }
6153
+ };
6154
+ }
6155
+ if (part.type === "audio") {
6156
+ return {
6157
+ inlineData: {
6158
+ mimeType: part.source.mediaType,
6159
+ data: part.source.data
6160
+ }
6161
+ };
6162
+ }
6163
+ throw new Error(`Unsupported content type: ${part.type}`);
6164
+ });
5558
6165
  }
5559
6166
  buildGenerationConfig(options) {
5560
6167
  const config = {};
@@ -5575,9 +6182,9 @@ var init_gemini = __esm({
5575
6182
  async *wrapStream(iterable) {
5576
6183
  const stream2 = iterable;
5577
6184
  for await (const chunk of stream2) {
5578
- const text = this.extractText(chunk);
5579
- if (text) {
5580
- yield { text, rawEvent: chunk };
6185
+ const text3 = this.extractText(chunk);
6186
+ if (text3) {
6187
+ yield { text: text3, rawEvent: chunk };
5581
6188
  }
5582
6189
  const finishReason = this.extractFinishReason(chunk);
5583
6190
  const usage = this.extractUsage(chunk);
@@ -5638,7 +6245,7 @@ var init_gemini = __esm({
5638
6245
  try {
5639
6246
  const response = await client.models.countTokens({
5640
6247
  model: descriptor.name,
5641
- contents: this.convertContentsForNewSDK(contents)
6248
+ contents
5642
6249
  // Note: systemInstruction not used - it's not supported by countTokens()
5643
6250
  // and would cause a 2100% token counting error
5644
6251
  });
@@ -5648,8 +6255,19 @@ var init_gemini = __esm({
5648
6255
  `Token counting failed for ${descriptor.name}, using fallback estimation:`,
5649
6256
  error
5650
6257
  );
5651
- const totalChars = messages.reduce((sum, msg) => sum + (msg.content?.length ?? 0), 0);
5652
- return Math.ceil(totalChars / FALLBACK_CHARS_PER_TOKEN);
6258
+ let totalChars = 0;
6259
+ let mediaCount = 0;
6260
+ for (const msg of messages) {
6261
+ const parts = normalizeContent(msg.content);
6262
+ for (const part of parts) {
6263
+ if (part.type === "text") {
6264
+ totalChars += part.text.length;
6265
+ } else if (part.type === "image" || part.type === "audio") {
6266
+ mediaCount++;
6267
+ }
6268
+ }
6269
+ }
6270
+ return Math.ceil(totalChars / FALLBACK_CHARS_PER_TOKEN) + mediaCount * 258;
5653
6271
  }
5654
6272
  }
5655
6273
  };
@@ -6292,6 +6910,7 @@ var init_openai = __esm({
6292
6910
  "use strict";
6293
6911
  import_openai = __toESM(require("openai"), 1);
6294
6912
  import_tiktoken = require("tiktoken");
6913
+ init_messages();
6295
6914
  init_base_provider();
6296
6915
  init_constants2();
6297
6916
  init_openai_image_models();
@@ -6399,11 +7018,7 @@ var init_openai = __esm({
6399
7018
  const sanitizedExtra = sanitizeExtra(extra, shouldIncludeTemperature);
6400
7019
  return {
6401
7020
  model: descriptor.name,
6402
- messages: messages.map((message) => ({
6403
- role: ROLE_MAP[message.role],
6404
- content: message.content,
6405
- name: message.name
6406
- })),
7021
+ messages: messages.map((message) => this.convertToOpenAIMessage(message)),
6407
7022
  // Only set max_completion_tokens if explicitly provided
6408
7023
  // Otherwise let the API use "as much as fits" in the context window
6409
7024
  ...maxTokens !== void 0 ? { max_completion_tokens: maxTokens } : {},
@@ -6415,6 +7030,77 @@ var init_openai = __esm({
6415
7030
  ...shouldIncludeTemperature ? { temperature } : {}
6416
7031
  };
6417
7032
  }
7033
+ /**
7034
+ * Convert an LLMMessage to OpenAI's ChatCompletionMessageParam.
7035
+ * Handles role-specific content type requirements:
7036
+ * - system/assistant: string content only
7037
+ * - user: string or multimodal array content
7038
+ */
7039
+ convertToOpenAIMessage(message) {
7040
+ const role = ROLE_MAP[message.role];
7041
+ if (role === "user") {
7042
+ const content = this.convertToOpenAIContent(message.content);
7043
+ return {
7044
+ role: "user",
7045
+ content,
7046
+ ...message.name ? { name: message.name } : {}
7047
+ };
7048
+ }
7049
+ const textContent = typeof message.content === "string" ? message.content : extractText(message.content);
7050
+ if (role === "system") {
7051
+ return {
7052
+ role: "system",
7053
+ content: textContent,
7054
+ ...message.name ? { name: message.name } : {}
7055
+ };
7056
+ }
7057
+ return {
7058
+ role: "assistant",
7059
+ content: textContent,
7060
+ ...message.name ? { name: message.name } : {}
7061
+ };
7062
+ }
7063
+ /**
7064
+ * Convert llmist content to OpenAI's content format.
7065
+ * Optimizes by returning string for text-only content, array for multimodal.
7066
+ */
7067
+ convertToOpenAIContent(content) {
7068
+ if (typeof content === "string") {
7069
+ return content;
7070
+ }
7071
+ return content.map((part) => {
7072
+ if (part.type === "text") {
7073
+ return { type: "text", text: part.text };
7074
+ }
7075
+ if (part.type === "image") {
7076
+ return this.convertImagePart(part);
7077
+ }
7078
+ if (part.type === "audio") {
7079
+ throw new Error(
7080
+ "OpenAI chat completions do not support audio input. Use Whisper for transcription or Gemini for audio understanding."
7081
+ );
7082
+ }
7083
+ throw new Error(`Unsupported content type: ${part.type}`);
7084
+ });
7085
+ }
7086
+ /**
7087
+ * Convert an image content part to OpenAI's image_url format.
7088
+ * Supports both URLs and base64 data URLs.
7089
+ */
7090
+ convertImagePart(part) {
7091
+ if (part.source.type === "url") {
7092
+ return {
7093
+ type: "image_url",
7094
+ image_url: { url: part.source.url }
7095
+ };
7096
+ }
7097
+ return {
7098
+ type: "image_url",
7099
+ image_url: {
7100
+ url: `data:${part.source.mediaType};base64,${part.source.data}`
7101
+ }
7102
+ };
7103
+ }
6418
7104
  async executeStreamRequest(payload, signal) {
6419
7105
  const client = this.client;
6420
7106
  const stream2 = await client.chat.completions.create(payload, signal ? { signal } : void 0);
@@ -6423,9 +7109,9 @@ var init_openai = __esm({
6423
7109
  async *wrapStream(iterable) {
6424
7110
  const stream2 = iterable;
6425
7111
  for await (const chunk of stream2) {
6426
- const text = chunk.choices.map((choice) => choice.delta?.content ?? "").join("");
6427
- if (text) {
6428
- yield { text, rawEvent: chunk };
7112
+ const text3 = chunk.choices.map((choice) => choice.delta?.content ?? "").join("");
7113
+ if (text3) {
7114
+ yield { text: text3, rawEvent: chunk };
6429
7115
  }
6430
7116
  const finishReason = chunk.choices.find((choice) => choice.finish_reason)?.finish_reason;
6431
7117
  const usage = chunk.usage ? {
@@ -6473,17 +7159,26 @@ var init_openai = __esm({
6473
7159
  }
6474
7160
  try {
6475
7161
  let tokenCount = 0;
7162
+ let imageCount = 0;
6476
7163
  for (const message of messages) {
6477
7164
  tokenCount += OPENAI_MESSAGE_OVERHEAD_TOKENS;
6478
7165
  const roleText = ROLE_MAP[message.role];
6479
7166
  tokenCount += encoding.encode(roleText).length;
6480
- tokenCount += encoding.encode(message.content ?? "").length;
7167
+ const textContent = extractText(message.content);
7168
+ tokenCount += encoding.encode(textContent).length;
7169
+ const parts = normalizeContent(message.content);
7170
+ for (const part of parts) {
7171
+ if (part.type === "image") {
7172
+ imageCount++;
7173
+ }
7174
+ }
6481
7175
  if (message.name) {
6482
7176
  tokenCount += encoding.encode(message.name).length;
6483
7177
  tokenCount += OPENAI_NAME_FIELD_OVERHEAD_TOKENS;
6484
7178
  }
6485
7179
  }
6486
7180
  tokenCount += OPENAI_REPLY_PRIMING_TOKENS;
7181
+ tokenCount += imageCount * 765;
6487
7182
  return tokenCount;
6488
7183
  } finally {
6489
7184
  encoding.free();
@@ -6493,8 +7188,19 @@ var init_openai = __esm({
6493
7188
  `Token counting failed for ${descriptor.name}, using fallback estimation:`,
6494
7189
  error
6495
7190
  );
6496
- const totalChars = messages.reduce((sum, msg) => sum + (msg.content?.length ?? 0), 0);
6497
- return Math.ceil(totalChars / FALLBACK_CHARS_PER_TOKEN);
7191
+ let totalChars = 0;
7192
+ let imageCount = 0;
7193
+ for (const msg of messages) {
7194
+ const parts = normalizeContent(msg.content);
7195
+ for (const part of parts) {
7196
+ if (part.type === "text") {
7197
+ totalChars += part.text.length;
7198
+ } else if (part.type === "image") {
7199
+ imageCount++;
7200
+ }
7201
+ }
7202
+ }
7203
+ return Math.ceil(totalChars / FALLBACK_CHARS_PER_TOKEN) + imageCount * 765;
6498
7204
  }
6499
7205
  }
6500
7206
  };
@@ -6917,6 +7623,138 @@ var init_text = __esm({
6917
7623
  }
6918
7624
  });
6919
7625
 
7626
+ // src/core/namespaces/vision.ts
7627
+ var VisionNamespace;
7628
+ var init_vision = __esm({
7629
+ "src/core/namespaces/vision.ts"() {
7630
+ "use strict";
7631
+ init_input_content();
7632
+ init_messages();
7633
+ VisionNamespace = class {
7634
+ constructor(client) {
7635
+ this.client = client;
7636
+ }
7637
+ /**
7638
+ * Build a message builder with the image content attached.
7639
+ * Handles URLs, data URLs, base64 strings, and binary buffers.
7640
+ */
7641
+ buildImageMessage(options) {
7642
+ const builder = new LLMMessageBuilder();
7643
+ if (options.systemPrompt) {
7644
+ builder.addSystem(options.systemPrompt);
7645
+ }
7646
+ if (typeof options.image === "string") {
7647
+ if (options.image.startsWith("http://") || options.image.startsWith("https://")) {
7648
+ builder.addUserWithImageUrl(options.prompt, options.image);
7649
+ } else if (isDataUrl(options.image)) {
7650
+ const parsed = parseDataUrl(options.image);
7651
+ if (!parsed) {
7652
+ throw new Error("Invalid data URL format");
7653
+ }
7654
+ builder.addUserWithImage(
7655
+ options.prompt,
7656
+ parsed.data,
7657
+ parsed.mimeType
7658
+ );
7659
+ } else {
7660
+ const buffer = Buffer.from(options.image, "base64");
7661
+ builder.addUserWithImage(options.prompt, buffer, options.mimeType);
7662
+ }
7663
+ } else {
7664
+ builder.addUserWithImage(options.prompt, options.image, options.mimeType);
7665
+ }
7666
+ return builder;
7667
+ }
7668
+ /**
7669
+ * Stream the response and collect text and usage information.
7670
+ */
7671
+ async streamAndCollect(options, builder) {
7672
+ let response = "";
7673
+ let finalUsage;
7674
+ for await (const chunk of this.client.stream({
7675
+ model: options.model,
7676
+ messages: builder.build(),
7677
+ maxTokens: options.maxTokens,
7678
+ temperature: options.temperature
7679
+ })) {
7680
+ response += chunk.text;
7681
+ if (chunk.usage) {
7682
+ finalUsage = {
7683
+ inputTokens: chunk.usage.inputTokens,
7684
+ outputTokens: chunk.usage.outputTokens,
7685
+ totalTokens: chunk.usage.totalTokens
7686
+ };
7687
+ }
7688
+ }
7689
+ return { text: response.trim(), usage: finalUsage };
7690
+ }
7691
+ /**
7692
+ * Analyze an image with a vision-capable model.
7693
+ * Returns the analysis as a string.
7694
+ *
7695
+ * @param options - Vision analysis options
7696
+ * @returns Promise resolving to the analysis text
7697
+ * @throws Error if the image format is unsupported or model doesn't support vision
7698
+ *
7699
+ * @example
7700
+ * ```typescript
7701
+ * // From file
7702
+ * const result = await llmist.vision.analyze({
7703
+ * model: "gpt-4o",
7704
+ * image: await fs.readFile("photo.jpg"),
7705
+ * prompt: "What's in this image?",
7706
+ * });
7707
+ *
7708
+ * // From URL (OpenAI only)
7709
+ * const result = await llmist.vision.analyze({
7710
+ * model: "gpt-4o",
7711
+ * image: "https://example.com/image.jpg",
7712
+ * prompt: "Describe this image",
7713
+ * });
7714
+ * ```
7715
+ */
7716
+ async analyze(options) {
7717
+ const builder = this.buildImageMessage(options);
7718
+ const { text: text3 } = await this.streamAndCollect(options, builder);
7719
+ return text3;
7720
+ }
7721
+ /**
7722
+ * Analyze an image and return detailed result with usage info.
7723
+ *
7724
+ * @param options - Vision analysis options
7725
+ * @returns Promise resolving to the analysis result with usage info
7726
+ */
7727
+ async analyzeWithUsage(options) {
7728
+ const builder = this.buildImageMessage(options);
7729
+ const { text: text3, usage } = await this.streamAndCollect(options, builder);
7730
+ return {
7731
+ text: text3,
7732
+ model: options.model,
7733
+ usage
7734
+ };
7735
+ }
7736
+ /**
7737
+ * Check if a model supports vision/image input.
7738
+ *
7739
+ * @param modelId - Model ID to check
7740
+ * @returns True if the model supports vision
7741
+ */
7742
+ supportsModel(modelId) {
7743
+ const spec = this.client.modelRegistry.getModelSpec(modelId);
7744
+ return spec?.features?.vision === true;
7745
+ }
7746
+ /**
7747
+ * List all models that support vision.
7748
+ *
7749
+ * @returns Array of model IDs that support vision
7750
+ */
7751
+ listModels() {
7752
+ return this.client.modelRegistry.listModels().filter((spec) => spec.features?.vision === true).map((spec) => spec.modelId);
7753
+ }
7754
+ };
7755
+ }
7756
+ });
7757
+
6920
7758
  // src/core/options.ts
6921
7759
  var ModelIdentifierParser;
6922
7760
  var init_options = __esm({
@@ -6961,6 +7799,7 @@ var init_client = __esm({
6961
7799
  init_image();
6962
7800
  init_speech();
6963
7801
  init_text();
7802
+ init_vision();
6964
7803
  init_options();
6965
7804
  init_quick_methods();
6966
7805
  LLMist = class _LLMist {
@@ -6972,6 +7811,7 @@ var init_client = __esm({
6972
7811
  text;
6973
7812
  image;
6974
7813
  speech;
7814
+ vision;
6975
7815
  constructor(...args) {
6976
7816
  let adapters = [];
6977
7817
  let defaultProvider;
@@ -7022,6 +7862,7 @@ var init_client = __esm({
7022
7862
  this.text = new TextNamespace(this);
7023
7863
  this.image = new ImageNamespace(this.adapters, this.defaultProvider);
7024
7864
  this.speech = new SpeechNamespace(this.adapters, this.defaultProvider);
7865
+ this.vision = new VisionNamespace(this);
7025
7866
  }
7026
7867
  stream(options) {
7027
7868
  const descriptor = this.parser.parse(options.model);
@@ -7206,6 +8047,7 @@ var init_builder = __esm({
7206
8047
  "src/agent/builder.ts"() {
7207
8048
  "use strict";
7208
8049
  init_constants();
8050
+ init_input_content();
7209
8051
  init_model_shortcuts();
7210
8052
  init_registry();
7211
8053
  init_agent();
@@ -7853,13 +8695,17 @@ ${endPrefix}`
7853
8695
  * }
7854
8696
  * ```
7855
8697
  */
7856
- ask(userPrompt) {
8698
+ /**
8699
+ * Build AgentOptions with the given user prompt.
8700
+ * Centralizes options construction for ask(), askWithImage(), and askWithContent().
8701
+ */
8702
+ buildAgentOptions(userPrompt) {
7857
8703
  if (!this.client) {
7858
8704
  const { LLMist: LLMistClass } = (init_client(), __toCommonJS(client_exports));
7859
8705
  this.client = new LLMistClass();
7860
8706
  }
7861
8707
  const registry = GadgetRegistry.from(this.gadgets);
7862
- const options = {
8708
+ return {
7863
8709
  client: this.client,
7864
8710
  model: this.model ?? "openai:gpt-5-nano",
7865
8711
  systemPrompt: this.systemPrompt,
@@ -7885,6 +8731,83 @@ ${endPrefix}`
7885
8731
  compactionConfig: this.compactionConfig,
7886
8732
  signal: this.signal
7887
8733
  };
8734
+ }
8735
+ ask(userPrompt) {
8736
+ const options = this.buildAgentOptions(userPrompt);
8737
+ return new Agent(AGENT_INTERNAL_KEY, options);
8738
+ }
8739
+ /**
8740
+ * Build and create the agent with a multimodal user prompt (text + image).
8741
+ * Returns the Agent instance ready to run.
8742
+ *
8743
+ * @param textPrompt - Text prompt describing what to do with the image
8744
+ * @param imageData - Image data (Buffer, Uint8Array, or base64 string)
8745
+ * @param mimeType - Optional MIME type (auto-detected if not provided)
8746
+ * @returns Configured Agent instance
8747
+ *
8748
+ * @example
8749
+ * ```typescript
8750
+ * const agent = LLMist.createAgent()
8751
+ * .withModel("gpt-4o")
8752
+ * .withSystem("You analyze images")
8753
+ * .askWithImage(
8754
+ * "What's in this image?",
8755
+ * await fs.readFile("photo.jpg")
8756
+ * );
8757
+ *
8758
+ * for await (const event of agent.run()) {
8759
+ * // handle events
8760
+ * }
8761
+ * ```
8762
+ */
8763
+ askWithImage(textPrompt, imageData, mimeType) {
8764
+ const imageBuffer = typeof imageData === "string" ? Buffer.from(imageData, "base64") : imageData;
8765
+ const detectedMime = mimeType ?? detectImageMimeType(imageBuffer);
8766
+ if (!detectedMime) {
8767
+ throw new Error(
8768
+ "Could not detect image MIME type. Please provide the mimeType parameter explicitly."
8769
+ );
8770
+ }
8771
+ const userContent = [
8772
+ text(textPrompt),
8773
+ {
8774
+ type: "image",
8775
+ source: {
8776
+ type: "base64",
8777
+ mediaType: detectedMime,
8778
+ data: toBase64(imageBuffer)
8779
+ }
8780
+ }
8781
+ ];
8782
+ const options = this.buildAgentOptions(userContent);
8783
+ return new Agent(AGENT_INTERNAL_KEY, options);
8784
+ }
8785
+ /**
8786
+ * Build and return an Agent configured with multimodal content.
8787
+ * More flexible than askWithImage - accepts any combination of content parts.
8788
+ *
8789
+ * @param content - Array of content parts (text, images, audio)
8790
+ * @returns A configured Agent ready for execution
8791
+ *
8792
+ * @example
8793
+ * ```typescript
8794
+ * import { text, imageFromBuffer, audioFromBuffer } from "llmist";
8795
+ *
8796
+ * const agent = LLMist.createAgent()
8797
+ * .withModel("gemini:gemini-2.5-flash")
8798
+ * .askWithContent([
8799
+ * text("Describe this image and transcribe the audio:"),
8800
+ * imageFromBuffer(imageData),
8801
+ * audioFromBuffer(audioData),
8802
+ * ]);
8803
+ *
8804
+ * for await (const event of agent.run()) {
8805
+ * // handle events
8806
+ * }
8807
+ * ```
8808
+ */
8809
+ askWithContent(content) {
8810
+ const options = this.buildAgentOptions(content);
7888
8811
  return new Agent(AGENT_INTERNAL_KEY, options);
7889
8812
  }
7890
8813
  /**
@@ -8004,7 +8927,8 @@ var COMMANDS = {
8004
8927
  models: "models",
8005
8928
  gadget: "gadget",
8006
8929
  image: "image",
8007
- speech: "speech"
8930
+ speech: "speech",
8931
+ vision: "vision"
8008
8932
  };
8009
8933
  var LOG_LEVELS = ["silly", "trace", "debug", "info", "warn", "error", "fatal"];
8010
8934
  var DEFAULT_MODEL = "openai:gpt-5-nano";
@@ -8026,6 +8950,9 @@ var OPTION_FLAGS = {
8026
8950
  dockerRo: "--docker-ro",
8027
8951
  noDocker: "--no-docker",
8028
8952
  dockerDev: "--docker-dev",
8953
+ // Multimodal input options
8954
+ inputImage: "--image <path>",
8955
+ inputAudio: "--audio <path>",
8029
8956
  // Image generation options
8030
8957
  imageSize: "--size <size>",
8031
8958
  imageQuality: "--quality <quality>",
@@ -8051,6 +8978,9 @@ var OPTION_DESCRIPTIONS = {
8051
8978
  noBuiltins: "Disable built-in gadgets (AskUser, TellUser).",
8052
8979
  noBuiltinInteraction: "Disable interactive gadgets (AskUser) while keeping TellUser.",
8053
8980
  quiet: "Suppress all output except content (text and TellUser messages).",
8981
+ // Multimodal input descriptions
8982
+ inputImage: "Image file to include with the prompt (vision models).",
8983
+ inputAudio: "Audio file to include with the prompt (Gemini only).",
8054
8984
  docker: "Run agent in a Docker sandbox container for security isolation.",
8055
8985
  dockerRo: "Run in Docker with current directory mounted read-only.",
8056
8986
  noDocker: "Disable Docker sandboxing (override config).",
@@ -8074,7 +9004,7 @@ var import_commander2 = require("commander");
8074
9004
  // package.json
8075
9005
  var package_default = {
8076
9006
  name: "llmist",
8077
- version: "2.4.0",
9007
+ version: "2.5.0",
8078
9008
  description: "TypeScript LLM client with streaming tool execution. Tools fire mid-stream. Built-in function calling works with any model\u2014no structured outputs or native tool support required.",
8079
9009
  type: "module",
8080
9010
  main: "dist/index.cjs",
@@ -8196,7 +9126,7 @@ var package_default = {
8196
9126
  };
8197
9127
 
8198
9128
  // src/cli/agent-command.ts
8199
- var import_promises3 = require("readline/promises");
9129
+ var import_promises4 = require("readline/promises");
8200
9130
  var import_chalk5 = __toESM(require("chalk"), 1);
8201
9131
  init_builder();
8202
9132
 
@@ -8214,6 +9144,7 @@ function isAbortError(error) {
8214
9144
  }
8215
9145
 
8216
9146
  // src/cli/agent-command.ts
9147
+ init_input_content();
8217
9148
  init_registry();
8218
9149
  init_constants2();
8219
9150
 
@@ -8538,15 +9469,84 @@ var finish = createGadget({
8538
9469
  });
8539
9470
  var builtinGadgets = [askUser, tellUser, finish];
8540
9471
 
9472
+ // src/cli/file-utils.ts
9473
+ var import_promises2 = require("fs/promises");
9474
+ var import_node_path3 = require("path");
9475
+ init_input_content();
9476
+ var DEFAULT_MAX_FILE_SIZE = 50 * 1024 * 1024;
9477
+ function formatFileSize(bytes) {
9478
+ if (bytes < 1024) return `${bytes} bytes`;
9479
+ if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`;
9480
+ if (bytes < 1024 * 1024 * 1024) return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
9481
+ return `${(bytes / (1024 * 1024 * 1024)).toFixed(1)} GB`;
9482
+ }
9483
+ async function checkFileSize(absolutePath, filePath, maxSize) {
9484
+ const stats = await (0, import_promises2.stat)(absolutePath);
9485
+ if (stats.size > maxSize) {
9486
+ throw new Error(
9487
+ `File "${filePath}" is too large (${formatFileSize(stats.size)}). Maximum allowed size is ${formatFileSize(maxSize)}. Consider compressing the file or using a smaller version.`
9488
+ );
9489
+ }
9490
+ }
9491
+ async function readImageFile(filePath, options = {}) {
9492
+ const absolutePath = (0, import_node_path3.resolve)(filePath);
9493
+ const maxFileSize = options.maxFileSize ?? DEFAULT_MAX_FILE_SIZE;
9494
+ let buffer;
9495
+ try {
9496
+ await checkFileSize(absolutePath, filePath, maxFileSize);
9497
+ buffer = await (0, import_promises2.readFile)(absolutePath);
9498
+ } catch (error) {
9499
+ const message = error instanceof Error ? error.message : String(error);
9500
+ throw new Error(`Failed to read image file "${filePath}": ${message}`);
9501
+ }
9502
+ const mimeType = detectImageMimeType(buffer);
9503
+ if (!mimeType) {
9504
+ throw new Error(
9505
+ `File "${filePath}" is not a supported image format. Supported formats: JPEG, PNG, GIF, WebP`
9506
+ );
9507
+ }
9508
+ return imageFromBuffer(buffer, mimeType);
9509
+ }
9510
+ async function readAudioFile(filePath, options = {}) {
9511
+ const absolutePath = (0, import_node_path3.resolve)(filePath);
9512
+ const maxFileSize = options.maxFileSize ?? DEFAULT_MAX_FILE_SIZE;
9513
+ let buffer;
9514
+ try {
9515
+ await checkFileSize(absolutePath, filePath, maxFileSize);
9516
+ buffer = await (0, import_promises2.readFile)(absolutePath);
9517
+ } catch (error) {
9518
+ const message = error instanceof Error ? error.message : String(error);
9519
+ throw new Error(`Failed to read audio file "${filePath}": ${message}`);
9520
+ }
9521
+ const mimeType = detectAudioMimeType(buffer);
9522
+ if (!mimeType) {
9523
+ throw new Error(
9524
+ `File "${filePath}" is not a supported audio format. Supported formats: MP3, WAV, OGG, WebM`
9525
+ );
9526
+ }
9527
+ return audioFromBuffer(buffer, mimeType);
9528
+ }
9529
+ async function readFileBuffer(filePath, options = {}) {
9530
+ const absolutePath = (0, import_node_path3.resolve)(filePath);
9531
+ const maxFileSize = options.maxFileSize ?? DEFAULT_MAX_FILE_SIZE;
9532
+ try {
9533
+ await checkFileSize(absolutePath, filePath, maxFileSize);
9534
+ return await (0, import_promises2.readFile)(absolutePath);
9535
+ } catch (error) {
9536
+ const message = error instanceof Error ? error.message : String(error);
9537
+ throw new Error(`Failed to read file "${filePath}": ${message}`);
9538
+ }
9539
+ }
9540
+
8541
9541
  // src/cli/gadgets.ts
8542
9542
  var import_node_fs7 = __toESM(require("fs"), 1);
8543
- var import_node_path6 = __toESM(require("path"), 1);
9543
+ var import_node_path7 = __toESM(require("path"), 1);
8544
9544
  var import_node_url = require("url");
8545
9545
  init_gadget();
8546
9546
 
8547
9547
  // src/cli/builtins/filesystem/list-directory.ts
8548
9548
  var import_node_fs4 = __toESM(require("fs"), 1);
8549
- var import_node_path4 = __toESM(require("path"), 1);
9549
+ var import_node_path5 = __toESM(require("path"), 1);
8550
9550
  var import_zod4 = require("zod");
8551
9551
 
8552
9552
  // src/index.ts
@@ -8570,6 +9570,7 @@ init_prompt_config();
8570
9570
 
8571
9571
  // src/index.ts
8572
9572
  init_client();
9573
+ init_input_content();
8573
9574
  init_messages();
8574
9575
  init_model_registry();
8575
9576
  init_model_shortcuts();
@@ -8600,6 +9601,10 @@ init_logger();
8600
9601
  // src/testing/mock-stream.ts
8601
9602
  init_constants();
8602
9603
 
9604
+ // src/testing/mock-builder.ts
9605
+ init_input_content();
9606
+ init_messages();
9607
+
8603
9608
  // src/testing/mock-client.ts
8604
9609
  init_client();
8605
9610
 
@@ -8611,7 +9616,7 @@ var import_node_stream = require("stream");
8611
9616
 
8612
9617
  // src/cli/builtins/filesystem/utils.ts
8613
9618
  var import_node_fs3 = __toESM(require("fs"), 1);
8614
- var import_node_path3 = __toESM(require("path"), 1);
9619
+ var import_node_path4 = __toESM(require("path"), 1);
8615
9620
  var PathSandboxException = class extends Error {
8616
9621
  constructor(inputPath, reason) {
8617
9622
  super(`Path access denied: ${inputPath}. ${reason}`);
@@ -8620,7 +9625,7 @@ var PathSandboxException = class extends Error {
8620
9625
  };
8621
9626
  function validatePathIsWithinCwd(inputPath) {
8622
9627
  const cwd = process.cwd();
8623
- const resolvedPath = import_node_path3.default.resolve(cwd, inputPath);
9628
+ const resolvedPath = import_node_path4.default.resolve(cwd, inputPath);
8624
9629
  let finalPath;
8625
9630
  try {
8626
9631
  finalPath = import_node_fs3.default.realpathSync(resolvedPath);
@@ -8632,7 +9637,7 @@ function validatePathIsWithinCwd(inputPath) {
8632
9637
  throw error;
8633
9638
  }
8634
9639
  }
8635
- const cwdWithSep = cwd + import_node_path3.default.sep;
9640
+ const cwdWithSep = cwd + import_node_path4.default.sep;
8636
9641
  if (!finalPath.startsWith(cwdWithSep) && finalPath !== cwd) {
8637
9642
  throw new PathSandboxException(inputPath, "Path is outside the current working directory");
8638
9643
  }
@@ -8645,8 +9650,8 @@ function listFiles(dirPath, basePath = dirPath, maxDepth = 1, currentDepth = 1)
8645
9650
  try {
8646
9651
  const items = import_node_fs4.default.readdirSync(dirPath);
8647
9652
  for (const item of items) {
8648
- const fullPath = import_node_path4.default.join(dirPath, item);
8649
- const relativePath = import_node_path4.default.relative(basePath, fullPath);
9653
+ const fullPath = import_node_path5.default.join(dirPath, item);
9654
+ const relativePath = import_node_path5.default.relative(basePath, fullPath);
8650
9655
  try {
8651
9656
  const stats = import_node_fs4.default.lstatSync(fullPath);
8652
9657
  let type;
@@ -8761,7 +9766,7 @@ ${formattedList}`;
8761
9766
  // src/cli/builtins/filesystem/read-file.ts
8762
9767
  var import_node_fs5 = __toESM(require("fs"), 1);
8763
9768
  var import_zod5 = require("zod");
8764
- var readFile = createGadget({
9769
+ var readFile2 = createGadget({
8765
9770
  name: "ReadFile",
8766
9771
  description: "Read the entire content of a file and return it as text. The file path must be within the current working directory or its subdirectories.",
8767
9772
  schema: import_zod5.z.object({
@@ -8790,7 +9795,7 @@ ${content}`;
8790
9795
 
8791
9796
  // src/cli/builtins/filesystem/write-file.ts
8792
9797
  var import_node_fs6 = __toESM(require("fs"), 1);
8793
- var import_node_path5 = __toESM(require("path"), 1);
9798
+ var import_node_path6 = __toESM(require("path"), 1);
8794
9799
  var import_zod6 = require("zod");
8795
9800
  var writeFile = createGadget({
8796
9801
  name: "WriteFile",
@@ -8825,7 +9830,7 @@ console.log(\`Server running on http://localhost:\${port}\`);`
8825
9830
  ],
8826
9831
  execute: ({ filePath, content }) => {
8827
9832
  const validatedPath = validatePathIsWithinCwd(filePath);
8828
- const parentDir = import_node_path5.default.dirname(validatedPath);
9833
+ const parentDir = import_node_path6.default.dirname(validatedPath);
8829
9834
  let createdDir = false;
8830
9835
  if (!import_node_fs6.default.existsSync(parentDir)) {
8831
9836
  validatePathIsWithinCwd(parentDir);
@@ -8834,7 +9839,7 @@ console.log(\`Server running on http://localhost:\${port}\`);`
8834
9839
  }
8835
9840
  import_node_fs6.default.writeFileSync(validatedPath, content, "utf-8");
8836
9841
  const bytesWritten = Buffer.byteLength(content, "utf-8");
8837
- const dirNote = createdDir ? ` (created directory: ${import_node_path5.default.dirname(filePath)})` : "";
9842
+ const dirNote = createdDir ? ` (created directory: ${import_node_path6.default.dirname(filePath)})` : "";
8838
9843
  return `path=${filePath}
8839
9844
 
8840
9845
  Wrote ${bytesWritten} bytes${dirNote}`;
@@ -9032,7 +10037,7 @@ error: ${message}`;
9032
10037
  // src/cli/builtins/index.ts
9033
10038
  var builtinGadgetRegistry = {
9034
10039
  ListDirectory: listDirectory,
9035
- ReadFile: readFile,
10040
+ ReadFile: readFile2,
9036
10041
  WriteFile: writeFile,
9037
10042
  EditFile: editFile,
9038
10043
  RunCommand: runCommand
@@ -9069,10 +10074,10 @@ function expandHomePath(input) {
9069
10074
  if (!home) {
9070
10075
  return input;
9071
10076
  }
9072
- return import_node_path6.default.join(home, input.slice(1));
10077
+ return import_node_path7.default.join(home, input.slice(1));
9073
10078
  }
9074
10079
  function isFileLikeSpecifier(specifier) {
9075
- return PATH_PREFIXES.some((prefix) => specifier.startsWith(prefix)) || specifier.includes(import_node_path6.default.sep);
10080
+ return PATH_PREFIXES.some((prefix) => specifier.startsWith(prefix)) || specifier.includes(import_node_path7.default.sep);
9076
10081
  }
9077
10082
  function tryResolveBuiltin(specifier) {
9078
10083
  if (specifier.startsWith(BUILTIN_PREFIX)) {
@@ -9095,7 +10100,7 @@ function resolveGadgetSpecifier(specifier, cwd) {
9095
10100
  return specifier;
9096
10101
  }
9097
10102
  const expanded = expandHomePath(specifier);
9098
- const resolvedPath = import_node_path6.default.resolve(cwd, expanded);
10103
+ const resolvedPath = import_node_path7.default.resolve(cwd, expanded);
9099
10104
  if (!import_node_fs7.default.existsSync(resolvedPath)) {
9100
10105
  throw new Error(`Gadget module not found at ${resolvedPath}`);
9101
10106
  }
@@ -9167,13 +10172,14 @@ async function loadGadgets(specifiers, cwd, importer = (specifier) => import(spe
9167
10172
  }
9168
10173
 
9169
10174
  // src/cli/llm-logging.ts
9170
- var import_promises2 = require("fs/promises");
10175
+ var import_promises3 = require("fs/promises");
9171
10176
  var import_node_os = require("os");
9172
- var import_node_path7 = require("path");
9173
- var DEFAULT_LLM_LOG_DIR = (0, import_node_path7.join)((0, import_node_os.homedir)(), ".llmist", "logs");
10177
+ var import_node_path8 = require("path");
10178
+ init_messages();
10179
+ var DEFAULT_LLM_LOG_DIR = (0, import_node_path8.join)((0, import_node_os.homedir)(), ".llmist", "logs");
9174
10180
  function resolveLogDir(option, subdir) {
9175
10181
  if (option === true) {
9176
- return (0, import_node_path7.join)(DEFAULT_LLM_LOG_DIR, subdir);
10182
+ return (0, import_node_path8.join)(DEFAULT_LLM_LOG_DIR, subdir);
9177
10183
  }
9178
10184
  if (typeof option === "string") {
9179
10185
  return option;
@@ -9184,14 +10190,14 @@ function formatLlmRequest(messages) {
9184
10190
  const lines = [];
9185
10191
  for (const msg of messages) {
9186
10192
  lines.push(`=== ${msg.role.toUpperCase()} ===`);
9187
- lines.push(msg.content ?? "");
10193
+ lines.push(msg.content ? extractText(msg.content) : "");
9188
10194
  lines.push("");
9189
10195
  }
9190
10196
  return lines.join("\n");
9191
10197
  }
9192
10198
  async function writeLogFile(dir, filename, content) {
9193
- await (0, import_promises2.mkdir)(dir, { recursive: true });
9194
- await (0, import_promises2.writeFile)((0, import_node_path7.join)(dir, filename), content, "utf-8");
10199
+ await (0, import_promises3.mkdir)(dir, { recursive: true });
10200
+ await (0, import_promises3.writeFile)((0, import_node_path8.join)(dir, filename), content, "utf-8");
9195
10201
  }
9196
10202
  function formatSessionTimestamp(date = /* @__PURE__ */ new Date()) {
9197
10203
  const pad = (n) => n.toString().padStart(2, "0");
@@ -9205,9 +10211,9 @@ function formatSessionTimestamp(date = /* @__PURE__ */ new Date()) {
9205
10211
  }
9206
10212
  async function createSessionDir(baseDir) {
9207
10213
  const timestamp = formatSessionTimestamp();
9208
- const sessionDir = (0, import_node_path7.join)(baseDir, timestamp);
10214
+ const sessionDir = (0, import_node_path8.join)(baseDir, timestamp);
9209
10215
  try {
9210
- await (0, import_promises2.mkdir)(sessionDir, { recursive: true });
10216
+ await (0, import_promises3.mkdir)(sessionDir, { recursive: true });
9211
10217
  return sessionDir;
9212
10218
  } catch (error) {
9213
10219
  console.warn(`[llmist] Failed to create log session directory: ${sessionDir}`, error);
@@ -9258,9 +10264,9 @@ function ensureMarkedConfigured() {
9258
10264
  markedConfigured = true;
9259
10265
  }
9260
10266
  }
9261
- function renderMarkdown(text) {
10267
+ function renderMarkdown(text3) {
9262
10268
  ensureMarkedConfigured();
9263
- let rendered = import_marked.marked.parse(text);
10269
+ let rendered = import_marked.marked.parse(text3);
9264
10270
  rendered = rendered.replace(/\*\*(.+?)\*\*/g, (_, content) => import_chalk3.default.bold(content)).replace(/(?<!\*)\*(\S[^*]*)\*(?!\*)/g, (_, content) => import_chalk3.default.italic(content));
9265
10271
  return rendered.trimEnd();
9266
10272
  }
@@ -9274,8 +10280,8 @@ function createRainbowSeparator() {
9274
10280
  }
9275
10281
  return result;
9276
10282
  }
9277
- function renderMarkdownWithSeparators(text) {
9278
- const rendered = renderMarkdown(text);
10283
+ function renderMarkdownWithSeparators(text3) {
10284
+ const rendered = renderMarkdown(text3);
9279
10285
  const separator = createRainbowSeparator();
9280
10286
  return `
9281
10287
  ${separator}
@@ -9443,12 +10449,12 @@ var StreamPrinter = class {
9443
10449
  *
9444
10450
  * @param text - Text to write
9445
10451
  */
9446
- write(text) {
9447
- if (!text) {
10452
+ write(text3) {
10453
+ if (!text3) {
9448
10454
  return;
9449
10455
  }
9450
- this.target.write(text);
9451
- this.endedWithNewline = text.endsWith("\n");
10456
+ this.target.write(text3);
10457
+ this.endedWithNewline = text3.endsWith("\n");
9452
10458
  }
9453
10459
  /**
9454
10460
  * Ensures output ends with a newline by writing one if needed.
@@ -9927,7 +10933,7 @@ function addCompleteOptions(cmd, defaults) {
9927
10933
  OPTION_DESCRIPTIONS.maxTokens,
9928
10934
  createNumericParser({ label: "Max tokens", integer: true, min: 1 }),
9929
10935
  defaults?.["max-tokens"]
9930
- ).option(OPTION_FLAGS.quiet, OPTION_DESCRIPTIONS.quiet, defaults?.quiet).option(OPTION_FLAGS.logLlmRequests, OPTION_DESCRIPTIONS.logLlmRequests, defaults?.["log-llm-requests"]);
10936
+ ).option(OPTION_FLAGS.quiet, OPTION_DESCRIPTIONS.quiet, defaults?.quiet).option(OPTION_FLAGS.logLlmRequests, OPTION_DESCRIPTIONS.logLlmRequests, defaults?.["log-llm-requests"]).option(OPTION_FLAGS.inputImage, OPTION_DESCRIPTIONS.inputImage).option(OPTION_FLAGS.inputAudio, OPTION_DESCRIPTIONS.inputAudio);
9931
10937
  }
9932
10938
  function addAgentOptions(cmd, defaults) {
9933
10939
  const gadgetAccumulator = (value, previous = []) => [
@@ -9951,7 +10957,7 @@ function addAgentOptions(cmd, defaults) {
9951
10957
  OPTION_FLAGS.noBuiltinInteraction,
9952
10958
  OPTION_DESCRIPTIONS.noBuiltinInteraction,
9953
10959
  defaults?.["builtin-interaction"] !== false
9954
- ).option(OPTION_FLAGS.quiet, OPTION_DESCRIPTIONS.quiet, defaults?.quiet).option(OPTION_FLAGS.logLlmRequests, OPTION_DESCRIPTIONS.logLlmRequests, defaults?.["log-llm-requests"]).option(OPTION_FLAGS.docker, OPTION_DESCRIPTIONS.docker).option(OPTION_FLAGS.dockerRo, OPTION_DESCRIPTIONS.dockerRo).option(OPTION_FLAGS.noDocker, OPTION_DESCRIPTIONS.noDocker).option(OPTION_FLAGS.dockerDev, OPTION_DESCRIPTIONS.dockerDev);
10960
+ ).option(OPTION_FLAGS.quiet, OPTION_DESCRIPTIONS.quiet, defaults?.quiet).option(OPTION_FLAGS.logLlmRequests, OPTION_DESCRIPTIONS.logLlmRequests, defaults?.["log-llm-requests"]).option(OPTION_FLAGS.inputImage, OPTION_DESCRIPTIONS.inputImage).option(OPTION_FLAGS.inputAudio, OPTION_DESCRIPTIONS.inputAudio).option(OPTION_FLAGS.docker, OPTION_DESCRIPTIONS.docker).option(OPTION_FLAGS.dockerRo, OPTION_DESCRIPTIONS.dockerRo).option(OPTION_FLAGS.noDocker, OPTION_DESCRIPTIONS.noDocker).option(OPTION_FLAGS.dockerDev, OPTION_DESCRIPTIONS.dockerDev);
9955
10961
  }
9956
10962
  function configToCompleteOptions(config) {
9957
10963
  const result = {};
@@ -10018,7 +11024,7 @@ var DEV_SOURCE_MOUNT_TARGET = "/llmist-src";
10018
11024
  // src/cli/config.ts
10019
11025
  var import_node_fs8 = require("fs");
10020
11026
  var import_node_os2 = require("os");
10021
- var import_node_path8 = require("path");
11027
+ var import_node_path9 = require("path");
10022
11028
  var import_js_toml = require("js-toml");
10023
11029
 
10024
11030
  // src/cli/templates.ts
@@ -10179,7 +11185,7 @@ var CUSTOM_CONFIG_KEYS = /* @__PURE__ */ new Set([
10179
11185
  "description"
10180
11186
  ]);
10181
11187
  function getConfigPath() {
10182
- return (0, import_node_path8.join)((0, import_node_os2.homedir)(), ".llmist", "cli.toml");
11188
+ return (0, import_node_path9.join)((0, import_node_os2.homedir)(), ".llmist", "cli.toml");
10183
11189
  }
10184
11190
  var ConfigError = class extends Error {
10185
11191
  constructor(message, path5) {
@@ -11026,8 +12032,8 @@ function computeDockerfileHash(dockerfile) {
11026
12032
  // src/cli/docker/image-manager.ts
11027
12033
  var import_node_fs9 = require("fs");
11028
12034
  var import_node_os3 = require("os");
11029
- var import_node_path9 = require("path");
11030
- var CACHE_DIR = (0, import_node_path9.join)((0, import_node_os3.homedir)(), ".llmist", "docker-cache");
12035
+ var import_node_path10 = require("path");
12036
+ var CACHE_DIR = (0, import_node_path10.join)((0, import_node_os3.homedir)(), ".llmist", "docker-cache");
11031
12037
  var HASH_FILE = "image-hash.json";
11032
12038
  function ensureCacheDir() {
11033
12039
  if (!(0, import_node_fs9.existsSync)(CACHE_DIR)) {
@@ -11035,7 +12041,7 @@ function ensureCacheDir() {
11035
12041
  }
11036
12042
  }
11037
12043
  function getCachedHash(imageName) {
11038
- const hashPath = (0, import_node_path9.join)(CACHE_DIR, HASH_FILE);
12044
+ const hashPath = (0, import_node_path10.join)(CACHE_DIR, HASH_FILE);
11039
12045
  if (!(0, import_node_fs9.existsSync)(hashPath)) {
11040
12046
  return void 0;
11041
12047
  }
@@ -11049,7 +12055,7 @@ function getCachedHash(imageName) {
11049
12055
  }
11050
12056
  function setCachedHash(imageName, hash) {
11051
12057
  ensureCacheDir();
11052
- const hashPath = (0, import_node_path9.join)(CACHE_DIR, HASH_FILE);
12058
+ const hashPath = (0, import_node_path10.join)(CACHE_DIR, HASH_FILE);
11053
12059
  let cache = {};
11054
12060
  if ((0, import_node_fs9.existsSync)(hashPath)) {
11055
12061
  try {
@@ -11075,7 +12081,7 @@ var DockerBuildError = class extends Error {
11075
12081
  };
11076
12082
  async function buildImage(imageName, dockerfile) {
11077
12083
  ensureCacheDir();
11078
- const dockerfilePath = (0, import_node_path9.join)(CACHE_DIR, "Dockerfile");
12084
+ const dockerfilePath = (0, import_node_path10.join)(CACHE_DIR, "Dockerfile");
11079
12085
  (0, import_node_fs9.writeFileSync)(dockerfilePath, dockerfile);
11080
12086
  const proc = Bun.spawn(
11081
12087
  ["docker", "build", "-t", imageName, "-f", dockerfilePath, CACHE_DIR],
@@ -11110,7 +12116,7 @@ async function ensureImage(imageName = DEFAULT_IMAGE_NAME, dockerfile) {
11110
12116
 
11111
12117
  // src/cli/docker/docker-wrapper.ts
11112
12118
  var import_node_fs10 = require("fs");
11113
- var import_node_path10 = require("path");
12119
+ var import_node_path11 = require("path");
11114
12120
  var import_node_os4 = require("os");
11115
12121
  var DockerUnavailableError = class extends Error {
11116
12122
  constructor() {
@@ -11156,9 +12162,9 @@ function autoDetectDevSource() {
11156
12162
  if (!scriptPath || !scriptPath.endsWith("src/cli.ts")) {
11157
12163
  return void 0;
11158
12164
  }
11159
- const srcDir = (0, import_node_path10.dirname)(scriptPath);
11160
- const projectDir = (0, import_node_path10.dirname)(srcDir);
11161
- const packageJsonPath = (0, import_node_path10.join)(projectDir, "package.json");
12165
+ const srcDir = (0, import_node_path11.dirname)(scriptPath);
12166
+ const projectDir = (0, import_node_path11.dirname)(srcDir);
12167
+ const packageJsonPath = (0, import_node_path11.join)(projectDir, "package.json");
11162
12168
  if (!(0, import_node_fs10.existsSync)(packageJsonPath)) {
11163
12169
  return void 0;
11164
12170
  }
@@ -11307,7 +12313,7 @@ function createHumanInputHandler(env, progress, keyboard) {
11307
12313
  keyboard.cleanupEsc();
11308
12314
  keyboard.cleanupEsc = null;
11309
12315
  }
11310
- const rl = (0, import_promises3.createInterface)({ input: env.stdin, output: env.stdout });
12316
+ const rl = (0, import_promises4.createInterface)({ input: env.stdin, output: env.stdout });
11311
12317
  try {
11312
12318
  const questionLine = question.trim() ? `
11313
12319
  ${renderMarkdownWithSeparators(question.trim())}` : "";
@@ -11665,8 +12671,8 @@ Denied: ${result.reason ?? "by user"}`
11665
12671
  builder.withTextOnlyHandler("acknowledge");
11666
12672
  builder.withTextWithGadgetsHandler({
11667
12673
  gadgetName: "TellUser",
11668
- parameterMapping: (text) => ({ message: text, done: false, type: "info" }),
11669
- resultMapping: (text) => `\u2139\uFE0F ${text}`
12674
+ parameterMapping: (text3) => ({ message: text3, done: false, type: "info" }),
12675
+ resultMapping: (text3) => `\u2139\uFE0F ${text3}`
11670
12676
  });
11671
12677
  builder.withTrailingMessage(
11672
12678
  (ctx) => [
@@ -11675,7 +12681,19 @@ Denied: ${result.reason ?? "by user"}`
11675
12681
  "Maximize efficiency by batching independent operations in a single response."
11676
12682
  ].join(" ")
11677
12683
  );
11678
- const agent = builder.ask(prompt);
12684
+ let agent;
12685
+ if (options.image || options.audio) {
12686
+ const parts = [text(prompt)];
12687
+ if (options.image) {
12688
+ parts.push(await readImageFile(options.image));
12689
+ }
12690
+ if (options.audio) {
12691
+ parts.push(await readAudioFile(options.audio));
12692
+ }
12693
+ agent = builder.askWithContent(parts);
12694
+ } else {
12695
+ agent = builder.ask(prompt);
12696
+ }
11679
12697
  let textBuffer = "";
11680
12698
  const flushTextBuffer = () => {
11681
12699
  if (textBuffer) {
@@ -11750,6 +12768,7 @@ function registerAgentCommand(program, env, config) {
11750
12768
  }
11751
12769
 
11752
12770
  // src/cli/complete-command.ts
12771
+ init_input_content();
11753
12772
  init_messages();
11754
12773
  init_model_shortcuts();
11755
12774
  init_constants2();
@@ -11761,7 +12780,18 @@ async function executeComplete(promptArg, options, env) {
11761
12780
  if (options.system) {
11762
12781
  builder.addSystem(options.system);
11763
12782
  }
11764
- builder.addUser(prompt);
12783
+ if (options.image || options.audio) {
12784
+ const parts = [text(prompt)];
12785
+ if (options.image) {
12786
+ parts.push(await readImageFile(options.image));
12787
+ }
12788
+ if (options.audio) {
12789
+ parts.push(await readAudioFile(options.audio));
12790
+ }
12791
+ builder.addUserMultimodal(parts);
12792
+ } else {
12793
+ builder.addUser(prompt);
12794
+ }
11765
12795
  const messages = builder.build();
11766
12796
  const llmLogsBaseDir = resolveLogDir(options.logLlmRequests, "requests");
11767
12797
  let llmSessionDir;
@@ -11836,7 +12866,7 @@ init_schema_to_json();
11836
12866
  init_schema_validator();
11837
12867
 
11838
12868
  // src/cli/gadget-prompts.ts
11839
- var import_promises4 = require("readline/promises");
12869
+ var import_promises5 = require("readline/promises");
11840
12870
  var import_chalk6 = __toESM(require("chalk"), 1);
11841
12871
  init_schema_to_json();
11842
12872
  async function promptForParameters(schema, ctx) {
@@ -11847,7 +12877,7 @@ async function promptForParameters(schema, ctx) {
11847
12877
  if (!jsonSchema.properties || Object.keys(jsonSchema.properties).length === 0) {
11848
12878
  return {};
11849
12879
  }
11850
- const rl = (0, import_promises4.createInterface)({ input: ctx.stdin, output: ctx.stdout });
12880
+ const rl = (0, import_promises5.createInterface)({ input: ctx.stdin, output: ctx.stdout });
11851
12881
  const params = {};
11852
12882
  try {
11853
12883
  for (const [key, prop] of Object.entries(jsonSchema.properties)) {
@@ -12709,7 +13739,7 @@ var import_node_fs12 = require("fs");
12709
13739
  var DEFAULT_SPEECH_MODEL = "tts-1";
12710
13740
  var DEFAULT_VOICE = "nova";
12711
13741
  async function executeSpeech(textArg, options, env) {
12712
- const text = await resolvePrompt(textArg, env);
13742
+ const text3 = await resolvePrompt(textArg, env);
12713
13743
  const client = env.createClient();
12714
13744
  const model = options.model;
12715
13745
  const voice = options.voice ?? DEFAULT_VOICE;
@@ -12721,7 +13751,7 @@ async function executeSpeech(textArg, options, env) {
12721
13751
  }
12722
13752
  const result = await client.speech.generate({
12723
13753
  model,
12724
- input: text,
13754
+ input: text3,
12725
13755
  voice,
12726
13756
  responseFormat: options.format,
12727
13757
  speed
@@ -12754,7 +13784,43 @@ function registerSpeechCommand(program, env, config) {
12754
13784
  OPTION_DESCRIPTIONS.model,
12755
13785
  config?.model ?? DEFAULT_SPEECH_MODEL
12756
13786
  ).option(OPTION_FLAGS.voice, OPTION_DESCRIPTIONS.voice, config?.voice ?? DEFAULT_VOICE).option(OPTION_FLAGS.speechFormat, OPTION_DESCRIPTIONS.speechFormat, config?.format).option(OPTION_FLAGS.speechSpeed, OPTION_DESCRIPTIONS.speechSpeed, config?.speed?.toString()).option(OPTION_FLAGS.speechOutput, OPTION_DESCRIPTIONS.speechOutput, config?.output).option(OPTION_FLAGS.quiet, OPTION_DESCRIPTIONS.quiet, config?.quiet ?? false).action(
12757
- (text, options) => executeAction(() => executeSpeech(text, options, env), env)
13787
+ (text3, options) => executeAction(() => executeSpeech(text3, options, env), env)
13788
+ );
13789
+ }
13790
+
13791
+ // src/cli/vision-command.ts
13792
+ init_model_shortcuts();
13793
+ async function executeVision(imagePath, options, env) {
13794
+ const client = env.createClient();
13795
+ const model = resolveModel(options.model);
13796
+ const imageBuffer = await readFileBuffer(imagePath);
13797
+ const prompt = options.prompt ?? "Describe this image in detail.";
13798
+ const stderrTTY = env.stderr.isTTY === true;
13799
+ if (!options.quiet && stderrTTY) {
13800
+ env.stderr.write(`${SUMMARY_PREFIX} Analyzing image with ${model}...
13801
+ `);
13802
+ }
13803
+ const result = await client.vision.analyze({
13804
+ model,
13805
+ image: imageBuffer,
13806
+ prompt,
13807
+ maxTokens: options.maxTokens
13808
+ });
13809
+ env.stdout.write(result);
13810
+ env.stdout.write("\n");
13811
+ }
13812
+ function registerVisionCommand(program, env) {
13813
+ program.command(COMMANDS.vision ?? "vision").description("Analyze an image using vision-capable models").argument("<image>", "Path to image file to analyze").option(
13814
+ OPTION_FLAGS.model,
13815
+ OPTION_DESCRIPTIONS.model,
13816
+ "gpt-4o"
13817
+ // Default to a vision-capable model
13818
+ ).option("-p, --prompt <prompt>", "Analysis prompt describing what to extract or describe").option(
13819
+ OPTION_FLAGS.maxTokens,
13820
+ OPTION_DESCRIPTIONS.maxTokens,
13821
+ createNumericParser({ label: "Max tokens", integer: true, min: 1 })
13822
+ ).option(OPTION_FLAGS.quiet, OPTION_DESCRIPTIONS.quiet).action(
13823
+ (imagePath, options) => executeAction(() => executeVision(imagePath, options, env), env)
12758
13824
  );
12759
13825
  }
12760
13826
 
@@ -12803,7 +13869,7 @@ function createLoggerFactory(config) {
12803
13869
  }
12804
13870
  function createPromptFunction(stdin, stdout) {
12805
13871
  return (question) => {
12806
- return new Promise((resolve2) => {
13872
+ return new Promise((resolve3) => {
12807
13873
  const rl = import_node_readline.default.createInterface({
12808
13874
  input: stdin,
12809
13875
  output: stdout
@@ -12818,7 +13884,7 @@ function createPromptFunction(stdin, stdout) {
12818
13884
  `);
12819
13885
  rl.question(import_chalk9.default.green.bold("You: "), (answer) => {
12820
13886
  rl.close();
12821
- resolve2(answer);
13887
+ resolve3(answer);
12822
13888
  });
12823
13889
  });
12824
13890
  };
@@ -12911,6 +13977,7 @@ function createProgram(env, config) {
12911
13977
  registerAgentCommand(program, env, config?.agent);
12912
13978
  registerImageCommand(program, env, config?.image);
12913
13979
  registerSpeechCommand(program, env, config?.speech);
13980
+ registerVisionCommand(program, env);
12914
13981
  registerModelsCommand(program, env);
12915
13982
  registerGadgetCommand(program, env);
12916
13983
  if (config) {