llmist 2.4.0 → 2.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -0
- package/dist/{chunk-QFRVTS5F.js → chunk-IHSZUAYN.js} +4 -2
- package/dist/chunk-IHSZUAYN.js.map +1 -0
- package/dist/{chunk-6ZDUWO6N.js → chunk-YHS2DYXP.js} +1781 -528
- package/dist/chunk-YHS2DYXP.js.map +1 -0
- package/dist/cli.cjs +1218 -151
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.js +172 -26
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +1393 -124
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +93 -20
- package/dist/index.d.ts +93 -20
- package/dist/index.js +34 -2
- package/dist/{mock-stream-BQcC2VCP.d.cts → mock-stream-ga4KIiwX.d.cts} +714 -12
- package/dist/{mock-stream-BQcC2VCP.d.ts → mock-stream-ga4KIiwX.d.ts} +714 -12
- package/dist/testing/index.cjs +1713 -508
- package/dist/testing/index.cjs.map +1 -1
- package/dist/testing/index.d.cts +2 -2
- package/dist/testing/index.d.ts +2 -2
- package/dist/testing/index.js +1 -1
- package/package.json +1 -1
- package/dist/chunk-6ZDUWO6N.js.map +0 -1
- package/dist/chunk-QFRVTS5F.js.map +0 -1
package/dist/cli.cjs
CHANGED
|
@@ -46,6 +46,137 @@ var init_constants = __esm({
|
|
|
46
46
|
}
|
|
47
47
|
});
|
|
48
48
|
|
|
49
|
+
// src/core/input-content.ts
|
|
50
|
+
function text(content) {
|
|
51
|
+
return { type: "text", text: content };
|
|
52
|
+
}
|
|
53
|
+
function imageFromUrl(url) {
|
|
54
|
+
return {
|
|
55
|
+
type: "image",
|
|
56
|
+
source: { type: "url", url }
|
|
57
|
+
};
|
|
58
|
+
}
|
|
59
|
+
function detectImageMimeType(data) {
|
|
60
|
+
const bytes = data instanceof Buffer ? data : Buffer.from(data);
|
|
61
|
+
for (const { bytes: magic, mimeType } of IMAGE_MAGIC_BYTES) {
|
|
62
|
+
if (bytes.length >= magic.length) {
|
|
63
|
+
let matches = true;
|
|
64
|
+
for (let i = 0; i < magic.length; i++) {
|
|
65
|
+
if (bytes[i] !== magic[i]) {
|
|
66
|
+
matches = false;
|
|
67
|
+
break;
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
if (matches) {
|
|
71
|
+
if (mimeType === "image/webp") {
|
|
72
|
+
if (bytes.length >= 12) {
|
|
73
|
+
const webpMarker = bytes[8] === 87 && bytes[9] === 69 && bytes[10] === 66 && bytes[11] === 80;
|
|
74
|
+
if (!webpMarker) continue;
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
return mimeType;
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
return null;
|
|
82
|
+
}
|
|
83
|
+
function detectAudioMimeType(data) {
|
|
84
|
+
const bytes = data instanceof Buffer ? data : Buffer.from(data);
|
|
85
|
+
for (const { bytes: magic, mimeType } of AUDIO_MAGIC_BYTES) {
|
|
86
|
+
if (bytes.length >= magic.length) {
|
|
87
|
+
let matches = true;
|
|
88
|
+
for (let i = 0; i < magic.length; i++) {
|
|
89
|
+
if (bytes[i] !== magic[i]) {
|
|
90
|
+
matches = false;
|
|
91
|
+
break;
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
if (matches) {
|
|
95
|
+
if (mimeType === "audio/wav") {
|
|
96
|
+
if (bytes.length >= 12) {
|
|
97
|
+
const waveMarker = bytes[8] === 87 && bytes[9] === 65 && bytes[10] === 86 && bytes[11] === 69;
|
|
98
|
+
if (!waveMarker) continue;
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
return mimeType;
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
return null;
|
|
106
|
+
}
|
|
107
|
+
function toBase64(data) {
|
|
108
|
+
if (typeof data === "string") {
|
|
109
|
+
return data;
|
|
110
|
+
}
|
|
111
|
+
return Buffer.from(data).toString("base64");
|
|
112
|
+
}
|
|
113
|
+
function imageFromBuffer(buffer, mediaType) {
|
|
114
|
+
const detectedType = mediaType ?? detectImageMimeType(buffer);
|
|
115
|
+
if (!detectedType) {
|
|
116
|
+
throw new Error(
|
|
117
|
+
"Could not detect image MIME type. Please provide the mediaType parameter explicitly."
|
|
118
|
+
);
|
|
119
|
+
}
|
|
120
|
+
return {
|
|
121
|
+
type: "image",
|
|
122
|
+
source: {
|
|
123
|
+
type: "base64",
|
|
124
|
+
mediaType: detectedType,
|
|
125
|
+
data: toBase64(buffer)
|
|
126
|
+
}
|
|
127
|
+
};
|
|
128
|
+
}
|
|
129
|
+
function audioFromBuffer(buffer, mediaType) {
|
|
130
|
+
const detectedType = mediaType ?? detectAudioMimeType(buffer);
|
|
131
|
+
if (!detectedType) {
|
|
132
|
+
throw new Error(
|
|
133
|
+
"Could not detect audio MIME type. Please provide the mediaType parameter explicitly."
|
|
134
|
+
);
|
|
135
|
+
}
|
|
136
|
+
return {
|
|
137
|
+
type: "audio",
|
|
138
|
+
source: {
|
|
139
|
+
type: "base64",
|
|
140
|
+
mediaType: detectedType,
|
|
141
|
+
data: toBase64(buffer)
|
|
142
|
+
}
|
|
143
|
+
};
|
|
144
|
+
}
|
|
145
|
+
function isDataUrl(input) {
|
|
146
|
+
return input.startsWith("data:");
|
|
147
|
+
}
|
|
148
|
+
function parseDataUrl(url) {
|
|
149
|
+
const match = url.match(/^data:([^;]+);base64,(.+)$/);
|
|
150
|
+
if (!match) return null;
|
|
151
|
+
return { mimeType: match[1], data: match[2] };
|
|
152
|
+
}
|
|
153
|
+
var IMAGE_MAGIC_BYTES, AUDIO_MAGIC_BYTES;
|
|
154
|
+
var init_input_content = __esm({
|
|
155
|
+
"src/core/input-content.ts"() {
|
|
156
|
+
"use strict";
|
|
157
|
+
IMAGE_MAGIC_BYTES = [
|
|
158
|
+
{ bytes: [255, 216, 255], mimeType: "image/jpeg" },
|
|
159
|
+
{ bytes: [137, 80, 78, 71], mimeType: "image/png" },
|
|
160
|
+
{ bytes: [71, 73, 70, 56], mimeType: "image/gif" },
|
|
161
|
+
// WebP starts with RIFF....WEBP
|
|
162
|
+
{ bytes: [82, 73, 70, 70], mimeType: "image/webp" }
|
|
163
|
+
];
|
|
164
|
+
AUDIO_MAGIC_BYTES = [
|
|
165
|
+
// MP3 frame sync
|
|
166
|
+
{ bytes: [255, 251], mimeType: "audio/mp3" },
|
|
167
|
+
{ bytes: [255, 250], mimeType: "audio/mp3" },
|
|
168
|
+
// ID3 tag (MP3)
|
|
169
|
+
{ bytes: [73, 68, 51], mimeType: "audio/mp3" },
|
|
170
|
+
// OGG
|
|
171
|
+
{ bytes: [79, 103, 103, 83], mimeType: "audio/ogg" },
|
|
172
|
+
// WAV (RIFF)
|
|
173
|
+
{ bytes: [82, 73, 70, 70], mimeType: "audio/wav" },
|
|
174
|
+
// WebM
|
|
175
|
+
{ bytes: [26, 69, 223, 163], mimeType: "audio/webm" }
|
|
176
|
+
];
|
|
177
|
+
}
|
|
178
|
+
});
|
|
179
|
+
|
|
49
180
|
// src/core/model-shortcuts.ts
|
|
50
181
|
function isKnownModelPattern(model) {
|
|
51
182
|
const normalized = model.toLowerCase();
|
|
@@ -375,7 +506,9 @@ var init_prompt_config = __esm({
|
|
|
375
506
|
rules: () => [
|
|
376
507
|
"Output ONLY plain text with the exact markers - never use function/tool calling",
|
|
377
508
|
"You can invoke multiple gadgets in a single response",
|
|
378
|
-
"
|
|
509
|
+
"Gadgets without dependencies execute immediately (in parallel if multiple)",
|
|
510
|
+
"Use :invocation_id:dep1,dep2 syntax when a gadget needs results from prior gadgets",
|
|
511
|
+
"If any dependency fails, dependent gadgets are automatically skipped"
|
|
379
512
|
],
|
|
380
513
|
customExamples: null
|
|
381
514
|
};
|
|
@@ -383,11 +516,24 @@ var init_prompt_config = __esm({
|
|
|
383
516
|
});
|
|
384
517
|
|
|
385
518
|
// src/core/messages.ts
|
|
519
|
+
function normalizeContent(content) {
|
|
520
|
+
if (typeof content === "string") {
|
|
521
|
+
return [{ type: "text", text: content }];
|
|
522
|
+
}
|
|
523
|
+
return content;
|
|
524
|
+
}
|
|
525
|
+
function extractText(content) {
|
|
526
|
+
if (typeof content === "string") {
|
|
527
|
+
return content;
|
|
528
|
+
}
|
|
529
|
+
return content.filter((part) => part.type === "text").map((part) => part.text).join("");
|
|
530
|
+
}
|
|
386
531
|
var LLMMessageBuilder;
|
|
387
532
|
var init_messages = __esm({
|
|
388
533
|
"src/core/messages.ts"() {
|
|
389
534
|
"use strict";
|
|
390
535
|
init_constants();
|
|
536
|
+
init_input_content();
|
|
391
537
|
init_prompt_config();
|
|
392
538
|
LLMMessageBuilder = class {
|
|
393
539
|
messages = [];
|
|
@@ -489,6 +635,10 @@ CRITICAL: ${criticalUsage}
|
|
|
489
635
|
parts.push(`
|
|
490
636
|
1. Start marker: ${this.startPrefix}gadget_name`);
|
|
491
637
|
parts.push(`
|
|
638
|
+
With ID: ${this.startPrefix}gadget_name:my_id`);
|
|
639
|
+
parts.push(`
|
|
640
|
+
With dependencies: ${this.startPrefix}gadget_name:my_id:dep1,dep2`);
|
|
641
|
+
parts.push(`
|
|
492
642
|
2. ${formatDescription}`);
|
|
493
643
|
parts.push(`
|
|
494
644
|
3. End marker: ${this.endPrefix}`);
|
|
@@ -538,6 +688,25 @@ ${this.endPrefix}`;
|
|
|
538
688
|
EXAMPLE (Multiple Gadgets):
|
|
539
689
|
|
|
540
690
|
${multipleExample}`);
|
|
691
|
+
const dependencyExample = `${this.startPrefix}fetch_data:fetch_1
|
|
692
|
+
${this.argPrefix}url
|
|
693
|
+
https://api.example.com/users
|
|
694
|
+
${this.endPrefix}
|
|
695
|
+
${this.startPrefix}fetch_data:fetch_2
|
|
696
|
+
${this.argPrefix}url
|
|
697
|
+
https://api.example.com/orders
|
|
698
|
+
${this.endPrefix}
|
|
699
|
+
${this.startPrefix}merge_data:merge_1:fetch_1,fetch_2
|
|
700
|
+
${this.argPrefix}format
|
|
701
|
+
json
|
|
702
|
+
${this.endPrefix}`;
|
|
703
|
+
parts.push(`
|
|
704
|
+
|
|
705
|
+
EXAMPLE (With Dependencies):
|
|
706
|
+
merge_1 waits for fetch_1 AND fetch_2 to complete.
|
|
707
|
+
If either fails, merge_1 is automatically skipped.
|
|
708
|
+
|
|
709
|
+
${dependencyExample}`);
|
|
541
710
|
parts.push(`
|
|
542
711
|
|
|
543
712
|
BLOCK FORMAT SYNTAX:
|
|
@@ -588,6 +757,25 @@ Produces: { "items": ["first", "second"] }`);
|
|
|
588
757
|
}
|
|
589
758
|
return parts.join("");
|
|
590
759
|
}
|
|
760
|
+
/**
|
|
761
|
+
* Add a user message.
|
|
762
|
+
* Content can be a string (text only) or an array of content parts (multimodal).
|
|
763
|
+
*
|
|
764
|
+
* @param content - Message content
|
|
765
|
+
* @param metadata - Optional metadata
|
|
766
|
+
*
|
|
767
|
+
* @example
|
|
768
|
+
* ```typescript
|
|
769
|
+
* // Text only
|
|
770
|
+
* builder.addUser("Hello!");
|
|
771
|
+
*
|
|
772
|
+
* // Multimodal
|
|
773
|
+
* builder.addUser([
|
|
774
|
+
* text("What's in this image?"),
|
|
775
|
+
* imageFromBuffer(imageData),
|
|
776
|
+
* ]);
|
|
777
|
+
* ```
|
|
778
|
+
*/
|
|
591
779
|
addUser(content, metadata) {
|
|
592
780
|
this.messages.push({ role: "user", content, metadata });
|
|
593
781
|
return this;
|
|
@@ -596,6 +784,104 @@ Produces: { "items": ["first", "second"] }`);
|
|
|
596
784
|
this.messages.push({ role: "assistant", content, metadata });
|
|
597
785
|
return this;
|
|
598
786
|
}
|
|
787
|
+
/**
|
|
788
|
+
* Add a user message with an image attachment.
|
|
789
|
+
*
|
|
790
|
+
* @param textContent - Text prompt
|
|
791
|
+
* @param imageData - Image data (Buffer, Uint8Array, or base64 string)
|
|
792
|
+
* @param mimeType - Optional MIME type (auto-detected if not provided)
|
|
793
|
+
*
|
|
794
|
+
* @example
|
|
795
|
+
* ```typescript
|
|
796
|
+
* builder.addUserWithImage(
|
|
797
|
+
* "What's in this image?",
|
|
798
|
+
* await fs.readFile("photo.jpg"),
|
|
799
|
+
* "image/jpeg" // Optional - auto-detected
|
|
800
|
+
* );
|
|
801
|
+
* ```
|
|
802
|
+
*/
|
|
803
|
+
addUserWithImage(textContent, imageData, mimeType) {
|
|
804
|
+
const imageBuffer = typeof imageData === "string" ? Buffer.from(imageData, "base64") : imageData;
|
|
805
|
+
const detectedMime = mimeType ?? detectImageMimeType(imageBuffer);
|
|
806
|
+
if (!detectedMime) {
|
|
807
|
+
throw new Error(
|
|
808
|
+
"Could not detect image MIME type. Please provide the mimeType parameter explicitly."
|
|
809
|
+
);
|
|
810
|
+
}
|
|
811
|
+
const content = [
|
|
812
|
+
text(textContent),
|
|
813
|
+
{
|
|
814
|
+
type: "image",
|
|
815
|
+
source: {
|
|
816
|
+
type: "base64",
|
|
817
|
+
mediaType: detectedMime,
|
|
818
|
+
data: toBase64(imageBuffer)
|
|
819
|
+
}
|
|
820
|
+
}
|
|
821
|
+
];
|
|
822
|
+
this.messages.push({ role: "user", content });
|
|
823
|
+
return this;
|
|
824
|
+
}
|
|
825
|
+
/**
|
|
826
|
+
* Add a user message with an image URL (OpenAI only).
|
|
827
|
+
*
|
|
828
|
+
* @param textContent - Text prompt
|
|
829
|
+
* @param imageUrl - URL to the image
|
|
830
|
+
*
|
|
831
|
+
* @example
|
|
832
|
+
* ```typescript
|
|
833
|
+
* builder.addUserWithImageUrl(
|
|
834
|
+
* "What's in this image?",
|
|
835
|
+
* "https://example.com/image.jpg"
|
|
836
|
+
* );
|
|
837
|
+
* ```
|
|
838
|
+
*/
|
|
839
|
+
addUserWithImageUrl(textContent, imageUrl) {
|
|
840
|
+
const content = [text(textContent), imageFromUrl(imageUrl)];
|
|
841
|
+
this.messages.push({ role: "user", content });
|
|
842
|
+
return this;
|
|
843
|
+
}
|
|
844
|
+
/**
|
|
845
|
+
* Add a user message with an audio attachment (Gemini only).
|
|
846
|
+
*
|
|
847
|
+
* @param textContent - Text prompt
|
|
848
|
+
* @param audioData - Audio data (Buffer, Uint8Array, or base64 string)
|
|
849
|
+
* @param mimeType - Optional MIME type (auto-detected if not provided)
|
|
850
|
+
*
|
|
851
|
+
* @example
|
|
852
|
+
* ```typescript
|
|
853
|
+
* builder.addUserWithAudio(
|
|
854
|
+
* "Transcribe this audio",
|
|
855
|
+
* await fs.readFile("recording.mp3"),
|
|
856
|
+
* "audio/mp3" // Optional - auto-detected
|
|
857
|
+
* );
|
|
858
|
+
* ```
|
|
859
|
+
*/
|
|
860
|
+
addUserWithAudio(textContent, audioData, mimeType) {
|
|
861
|
+
const audioBuffer = typeof audioData === "string" ? Buffer.from(audioData, "base64") : audioData;
|
|
862
|
+
const content = [text(textContent), audioFromBuffer(audioBuffer, mimeType)];
|
|
863
|
+
this.messages.push({ role: "user", content });
|
|
864
|
+
return this;
|
|
865
|
+
}
|
|
866
|
+
/**
|
|
867
|
+
* Add a user message with multiple content parts.
|
|
868
|
+
* Provides full flexibility for complex multimodal messages.
|
|
869
|
+
*
|
|
870
|
+
* @param parts - Array of content parts
|
|
871
|
+
*
|
|
872
|
+
* @example
|
|
873
|
+
* ```typescript
|
|
874
|
+
* builder.addUserMultimodal([
|
|
875
|
+
* text("Compare these images:"),
|
|
876
|
+
* imageFromBuffer(image1),
|
|
877
|
+
* imageFromBuffer(image2),
|
|
878
|
+
* ]);
|
|
879
|
+
* ```
|
|
880
|
+
*/
|
|
881
|
+
addUserMultimodal(parts) {
|
|
882
|
+
this.messages.push({ role: "user", content: parts });
|
|
883
|
+
return this;
|
|
884
|
+
}
|
|
599
885
|
addGadgetCall(gadget, parameters, result) {
|
|
600
886
|
const paramStr = this.formatBlockParameters(parameters, "");
|
|
601
887
|
this.messages.push({
|
|
@@ -1914,7 +2200,7 @@ var init_conversation_manager = __esm({
|
|
|
1914
2200
|
if (msg.role === "user") {
|
|
1915
2201
|
this.historyBuilder.addUser(msg.content);
|
|
1916
2202
|
} else if (msg.role === "assistant") {
|
|
1917
|
-
this.historyBuilder.addAssistant(msg.content);
|
|
2203
|
+
this.historyBuilder.addAssistant(extractText(msg.content));
|
|
1918
2204
|
}
|
|
1919
2205
|
}
|
|
1920
2206
|
}
|
|
@@ -1935,8 +2221,10 @@ async function runWithHandlers(agentGenerator, handlers) {
|
|
|
1935
2221
|
if (handlers.onGadgetCall) {
|
|
1936
2222
|
await handlers.onGadgetCall({
|
|
1937
2223
|
gadgetName: event.call.gadgetName,
|
|
2224
|
+
invocationId: event.call.invocationId,
|
|
1938
2225
|
parameters: event.call.parameters,
|
|
1939
|
-
parametersRaw: event.call.parametersRaw
|
|
2226
|
+
parametersRaw: event.call.parametersRaw,
|
|
2227
|
+
dependencies: event.call.dependencies
|
|
1940
2228
|
});
|
|
1941
2229
|
}
|
|
1942
2230
|
break;
|
|
@@ -2783,15 +3071,37 @@ var init_parser = __esm({
|
|
|
2783
3071
|
return segment.trim().length > 0 ? segment : void 0;
|
|
2784
3072
|
}
|
|
2785
3073
|
/**
|
|
2786
|
-
* Parse gadget name
|
|
2787
|
-
*
|
|
3074
|
+
* Parse gadget name with optional invocation ID and dependencies.
|
|
3075
|
+
*
|
|
3076
|
+
* Supported formats:
|
|
3077
|
+
* - `GadgetName` - Auto-generate ID, no dependencies
|
|
3078
|
+
* - `GadgetName:my_id` - Explicit ID, no dependencies
|
|
3079
|
+
* - `GadgetName:my_id:dep1,dep2` - Explicit ID with dependencies
|
|
3080
|
+
*
|
|
3081
|
+
* Dependencies must be comma-separated invocation IDs.
|
|
2788
3082
|
*/
|
|
2789
3083
|
parseGadgetName(gadgetName) {
|
|
2790
|
-
|
|
2791
|
-
|
|
2792
|
-
return {
|
|
3084
|
+
const parts = gadgetName.split(":");
|
|
3085
|
+
if (parts.length === 1) {
|
|
3086
|
+
return {
|
|
3087
|
+
actualName: parts[0],
|
|
3088
|
+
invocationId: `gadget_${++globalInvocationCounter}`,
|
|
3089
|
+
dependencies: []
|
|
3090
|
+
};
|
|
3091
|
+
} else if (parts.length === 2) {
|
|
3092
|
+
return {
|
|
3093
|
+
actualName: parts[0],
|
|
3094
|
+
invocationId: parts[1].trim(),
|
|
3095
|
+
dependencies: []
|
|
3096
|
+
};
|
|
3097
|
+
} else {
|
|
3098
|
+
const deps = parts[2].split(",").map((d) => d.trim()).filter((d) => d.length > 0);
|
|
3099
|
+
return {
|
|
3100
|
+
actualName: parts[0],
|
|
3101
|
+
invocationId: parts[1].trim(),
|
|
3102
|
+
dependencies: deps
|
|
3103
|
+
};
|
|
2793
3104
|
}
|
|
2794
|
-
return { actualName: gadgetName, invocationId: `gadget_${++globalInvocationCounter}` };
|
|
2795
3105
|
}
|
|
2796
3106
|
/**
|
|
2797
3107
|
* Extract the error message from a parse error.
|
|
@@ -2827,39 +3137,20 @@ var init_parser = __esm({
|
|
|
2827
3137
|
const metadataEndIndex = this.buffer.indexOf("\n", metadataStartIndex);
|
|
2828
3138
|
if (metadataEndIndex === -1) break;
|
|
2829
3139
|
const gadgetName = this.buffer.substring(metadataStartIndex, metadataEndIndex).trim();
|
|
2830
|
-
const { actualName: actualGadgetName, invocationId } = this.parseGadgetName(gadgetName);
|
|
3140
|
+
const { actualName: actualGadgetName, invocationId, dependencies } = this.parseGadgetName(gadgetName);
|
|
2831
3141
|
const contentStartIndex = metadataEndIndex + 1;
|
|
2832
3142
|
let partEndIndex;
|
|
2833
3143
|
let endMarkerLength = 0;
|
|
2834
|
-
|
|
2835
|
-
|
|
2836
|
-
|
|
2837
|
-
|
|
2838
|
-
endMarkerLength =
|
|
3144
|
+
const nextStartPos = this.buffer.indexOf(this.startPrefix, contentStartIndex);
|
|
3145
|
+
const endPos = this.buffer.indexOf(this.endPrefix, contentStartIndex);
|
|
3146
|
+
if (nextStartPos !== -1 && (endPos === -1 || nextStartPos < endPos)) {
|
|
3147
|
+
partEndIndex = nextStartPos;
|
|
3148
|
+
endMarkerLength = 0;
|
|
3149
|
+
} else if (endPos !== -1) {
|
|
3150
|
+
partEndIndex = endPos;
|
|
3151
|
+
endMarkerLength = this.endPrefix.length;
|
|
2839
3152
|
} else {
|
|
2840
|
-
|
|
2841
|
-
let validEndPos = -1;
|
|
2842
|
-
let searchPos = contentStartIndex;
|
|
2843
|
-
while (true) {
|
|
2844
|
-
const endPos = this.buffer.indexOf(this.endPrefix, searchPos);
|
|
2845
|
-
if (endPos === -1) break;
|
|
2846
|
-
const afterEnd = this.buffer.substring(endPos + this.endPrefix.length);
|
|
2847
|
-
if (afterEnd.startsWith("\n") || afterEnd.startsWith("\r") || afterEnd.startsWith(this.startPrefix) || afterEnd.length === 0) {
|
|
2848
|
-
validEndPos = endPos;
|
|
2849
|
-
break;
|
|
2850
|
-
} else {
|
|
2851
|
-
searchPos = endPos + this.endPrefix.length;
|
|
2852
|
-
}
|
|
2853
|
-
}
|
|
2854
|
-
if (nextStartPos !== -1 && (validEndPos === -1 || nextStartPos < validEndPos)) {
|
|
2855
|
-
partEndIndex = nextStartPos;
|
|
2856
|
-
endMarkerLength = 0;
|
|
2857
|
-
} else if (validEndPos !== -1) {
|
|
2858
|
-
partEndIndex = validEndPos;
|
|
2859
|
-
endMarkerLength = this.endPrefix.length;
|
|
2860
|
-
} else {
|
|
2861
|
-
break;
|
|
2862
|
-
}
|
|
3153
|
+
break;
|
|
2863
3154
|
}
|
|
2864
3155
|
const parametersRaw = this.buffer.substring(contentStartIndex, partEndIndex).trim();
|
|
2865
3156
|
const { parameters, parseError } = this.parseParameters(parametersRaw);
|
|
@@ -2870,7 +3161,8 @@ var init_parser = __esm({
|
|
|
2870
3161
|
invocationId,
|
|
2871
3162
|
parametersRaw,
|
|
2872
3163
|
parameters,
|
|
2873
|
-
parseError
|
|
3164
|
+
parseError,
|
|
3165
|
+
dependencies
|
|
2874
3166
|
}
|
|
2875
3167
|
};
|
|
2876
3168
|
startIndex = partEndIndex + endMarkerLength;
|
|
@@ -2893,7 +3185,7 @@ var init_parser = __esm({
|
|
|
2893
3185
|
const metadataEndIndex = this.buffer.indexOf("\n", metadataStartIndex);
|
|
2894
3186
|
if (metadataEndIndex !== -1) {
|
|
2895
3187
|
const gadgetName = this.buffer.substring(metadataStartIndex, metadataEndIndex).trim();
|
|
2896
|
-
const { actualName: actualGadgetName, invocationId } = this.parseGadgetName(gadgetName);
|
|
3188
|
+
const { actualName: actualGadgetName, invocationId, dependencies } = this.parseGadgetName(gadgetName);
|
|
2897
3189
|
const contentStartIndex = metadataEndIndex + 1;
|
|
2898
3190
|
const parametersRaw = this.buffer.substring(contentStartIndex).trim();
|
|
2899
3191
|
const { parameters, parseError } = this.parseParameters(parametersRaw);
|
|
@@ -2904,7 +3196,8 @@ var init_parser = __esm({
|
|
|
2904
3196
|
invocationId,
|
|
2905
3197
|
parametersRaw,
|
|
2906
3198
|
parameters,
|
|
2907
|
-
parseError
|
|
3199
|
+
parseError,
|
|
3200
|
+
dependencies
|
|
2908
3201
|
}
|
|
2909
3202
|
};
|
|
2910
3203
|
return;
|
|
@@ -3274,6 +3567,13 @@ var init_stream_processor = __esm({
|
|
|
3274
3567
|
accumulatedText = "";
|
|
3275
3568
|
shouldStopExecution = false;
|
|
3276
3569
|
observerFailureCount = 0;
|
|
3570
|
+
// Dependency tracking for gadget execution DAG
|
|
3571
|
+
/** Gadgets waiting for their dependencies to complete */
|
|
3572
|
+
pendingGadgets = /* @__PURE__ */ new Map();
|
|
3573
|
+
/** Completed gadget results, keyed by invocation ID */
|
|
3574
|
+
completedResults = /* @__PURE__ */ new Map();
|
|
3575
|
+
/** Invocation IDs of gadgets that have failed (error or skipped due to dependency) */
|
|
3576
|
+
failedInvocations = /* @__PURE__ */ new Set();
|
|
3277
3577
|
constructor(options) {
|
|
3278
3578
|
this.iteration = options.iteration;
|
|
3279
3579
|
this.registry = options.registry;
|
|
@@ -3374,6 +3674,16 @@ var init_stream_processor = __esm({
|
|
|
3374
3674
|
}
|
|
3375
3675
|
}
|
|
3376
3676
|
}
|
|
3677
|
+
const finalPendingEvents = await this.processPendingGadgets();
|
|
3678
|
+
outputs.push(...finalPendingEvents);
|
|
3679
|
+
if (finalPendingEvents.some((e) => e.type === "gadget_result")) {
|
|
3680
|
+
didExecuteGadgets = true;
|
|
3681
|
+
}
|
|
3682
|
+
for (const evt of finalPendingEvents) {
|
|
3683
|
+
if (evt.type === "gadget_result" && evt.result.breaksLoop) {
|
|
3684
|
+
shouldBreakLoop = true;
|
|
3685
|
+
}
|
|
3686
|
+
}
|
|
3377
3687
|
}
|
|
3378
3688
|
let finalMessage = this.accumulatedText;
|
|
3379
3689
|
if (this.hooks.interceptors?.interceptAssistantMessage) {
|
|
@@ -3425,7 +3735,11 @@ var init_stream_processor = __esm({
|
|
|
3425
3735
|
return [{ type: "text", content }];
|
|
3426
3736
|
}
|
|
3427
3737
|
/**
|
|
3428
|
-
* Process a gadget call through the full lifecycle.
|
|
3738
|
+
* Process a gadget call through the full lifecycle, handling dependencies.
|
|
3739
|
+
*
|
|
3740
|
+
* Gadgets without dependencies (or with all dependencies satisfied) execute immediately.
|
|
3741
|
+
* Gadgets with unsatisfied dependencies are queued for later execution.
|
|
3742
|
+
* After each execution, pending gadgets are checked to see if they can now run.
|
|
3429
3743
|
*/
|
|
3430
3744
|
async processGadgetCall(call) {
|
|
3431
3745
|
if (this.shouldStopExecution) {
|
|
@@ -3436,6 +3750,53 @@ var init_stream_processor = __esm({
|
|
|
3436
3750
|
}
|
|
3437
3751
|
const events = [];
|
|
3438
3752
|
events.push({ type: "gadget_call", call });
|
|
3753
|
+
if (call.dependencies.length > 0) {
|
|
3754
|
+
if (call.dependencies.includes(call.invocationId)) {
|
|
3755
|
+
this.logger.warn("Gadget has self-referential dependency (depends on itself)", {
|
|
3756
|
+
gadgetName: call.gadgetName,
|
|
3757
|
+
invocationId: call.invocationId
|
|
3758
|
+
});
|
|
3759
|
+
this.failedInvocations.add(call.invocationId);
|
|
3760
|
+
const skipEvent = {
|
|
3761
|
+
type: "gadget_skipped",
|
|
3762
|
+
gadgetName: call.gadgetName,
|
|
3763
|
+
invocationId: call.invocationId,
|
|
3764
|
+
parameters: call.parameters ?? {},
|
|
3765
|
+
failedDependency: call.invocationId,
|
|
3766
|
+
failedDependencyError: `Gadget "${call.invocationId}" cannot depend on itself (self-referential dependency)`
|
|
3767
|
+
};
|
|
3768
|
+
events.push(skipEvent);
|
|
3769
|
+
return events;
|
|
3770
|
+
}
|
|
3771
|
+
const failedDep = call.dependencies.find((dep) => this.failedInvocations.has(dep));
|
|
3772
|
+
if (failedDep) {
|
|
3773
|
+
const skipEvents = await this.handleFailedDependency(call, failedDep);
|
|
3774
|
+
events.push(...skipEvents);
|
|
3775
|
+
return events;
|
|
3776
|
+
}
|
|
3777
|
+
const unsatisfied = call.dependencies.filter((dep) => !this.completedResults.has(dep));
|
|
3778
|
+
if (unsatisfied.length > 0) {
|
|
3779
|
+
this.logger.debug("Queueing gadget for later - waiting on dependencies", {
|
|
3780
|
+
gadgetName: call.gadgetName,
|
|
3781
|
+
invocationId: call.invocationId,
|
|
3782
|
+
waitingOn: unsatisfied
|
|
3783
|
+
});
|
|
3784
|
+
this.pendingGadgets.set(call.invocationId, call);
|
|
3785
|
+
return events;
|
|
3786
|
+
}
|
|
3787
|
+
}
|
|
3788
|
+
const executeEvents = await this.executeGadgetWithHooks(call);
|
|
3789
|
+
events.push(...executeEvents);
|
|
3790
|
+
const triggeredEvents = await this.processPendingGadgets();
|
|
3791
|
+
events.push(...triggeredEvents);
|
|
3792
|
+
return events;
|
|
3793
|
+
}
|
|
3794
|
+
/**
|
|
3795
|
+
* Execute a gadget through the full hook lifecycle.
|
|
3796
|
+
* This is the core execution logic, extracted from processGadgetCall.
|
|
3797
|
+
*/
|
|
3798
|
+
async executeGadgetWithHooks(call) {
|
|
3799
|
+
const events = [];
|
|
3439
3800
|
if (call.parseError) {
|
|
3440
3801
|
this.logger.warn("Gadget has parse error", {
|
|
3441
3802
|
gadgetName: call.gadgetName,
|
|
@@ -3566,6 +3927,10 @@ var init_stream_processor = __esm({
|
|
|
3566
3927
|
});
|
|
3567
3928
|
}
|
|
3568
3929
|
await this.runObserversInParallel(completeObservers);
|
|
3930
|
+
this.completedResults.set(result.invocationId, result);
|
|
3931
|
+
if (result.error) {
|
|
3932
|
+
this.failedInvocations.add(result.invocationId);
|
|
3933
|
+
}
|
|
3569
3934
|
events.push({ type: "gadget_result", result });
|
|
3570
3935
|
if (result.error) {
|
|
3571
3936
|
const errorType = this.determineErrorType(call, result);
|
|
@@ -3581,6 +3946,162 @@ var init_stream_processor = __esm({
|
|
|
3581
3946
|
}
|
|
3582
3947
|
return events;
|
|
3583
3948
|
}
|
|
3949
|
+
/**
|
|
3950
|
+
* Handle a gadget that cannot execute because a dependency failed.
|
|
3951
|
+
* Calls the onDependencySkipped controller to allow customization.
|
|
3952
|
+
*/
|
|
3953
|
+
async handleFailedDependency(call, failedDep) {
|
|
3954
|
+
const events = [];
|
|
3955
|
+
const depResult = this.completedResults.get(failedDep);
|
|
3956
|
+
const depError = depResult?.error ?? "Dependency failed";
|
|
3957
|
+
let action = { action: "skip" };
|
|
3958
|
+
if (this.hooks.controllers?.onDependencySkipped) {
|
|
3959
|
+
const context = {
|
|
3960
|
+
iteration: this.iteration,
|
|
3961
|
+
gadgetName: call.gadgetName,
|
|
3962
|
+
invocationId: call.invocationId,
|
|
3963
|
+
parameters: call.parameters ?? {},
|
|
3964
|
+
failedDependency: failedDep,
|
|
3965
|
+
failedDependencyError: depError,
|
|
3966
|
+
logger: this.logger
|
|
3967
|
+
};
|
|
3968
|
+
action = await this.hooks.controllers.onDependencySkipped(context);
|
|
3969
|
+
}
|
|
3970
|
+
if (action.action === "skip") {
|
|
3971
|
+
this.failedInvocations.add(call.invocationId);
|
|
3972
|
+
const skipEvent = {
|
|
3973
|
+
type: "gadget_skipped",
|
|
3974
|
+
gadgetName: call.gadgetName,
|
|
3975
|
+
invocationId: call.invocationId,
|
|
3976
|
+
parameters: call.parameters ?? {},
|
|
3977
|
+
failedDependency: failedDep,
|
|
3978
|
+
failedDependencyError: depError
|
|
3979
|
+
};
|
|
3980
|
+
events.push(skipEvent);
|
|
3981
|
+
if (this.hooks.observers?.onGadgetSkipped) {
|
|
3982
|
+
const observeContext = {
|
|
3983
|
+
iteration: this.iteration,
|
|
3984
|
+
gadgetName: call.gadgetName,
|
|
3985
|
+
invocationId: call.invocationId,
|
|
3986
|
+
parameters: call.parameters ?? {},
|
|
3987
|
+
failedDependency: failedDep,
|
|
3988
|
+
failedDependencyError: depError,
|
|
3989
|
+
logger: this.logger
|
|
3990
|
+
};
|
|
3991
|
+
await this.safeObserve(() => this.hooks.observers.onGadgetSkipped(observeContext));
|
|
3992
|
+
}
|
|
3993
|
+
this.logger.info("Gadget skipped due to failed dependency", {
|
|
3994
|
+
gadgetName: call.gadgetName,
|
|
3995
|
+
invocationId: call.invocationId,
|
|
3996
|
+
failedDependency: failedDep
|
|
3997
|
+
});
|
|
3998
|
+
} else if (action.action === "execute_anyway") {
|
|
3999
|
+
this.logger.info("Executing gadget despite failed dependency (controller override)", {
|
|
4000
|
+
gadgetName: call.gadgetName,
|
|
4001
|
+
invocationId: call.invocationId,
|
|
4002
|
+
failedDependency: failedDep
|
|
4003
|
+
});
|
|
4004
|
+
const executeEvents = await this.executeGadgetWithHooks(call);
|
|
4005
|
+
events.push(...executeEvents);
|
|
4006
|
+
} else if (action.action === "use_fallback") {
|
|
4007
|
+
const fallbackResult = {
|
|
4008
|
+
gadgetName: call.gadgetName,
|
|
4009
|
+
invocationId: call.invocationId,
|
|
4010
|
+
parameters: call.parameters ?? {},
|
|
4011
|
+
result: action.fallbackResult,
|
|
4012
|
+
executionTimeMs: 0
|
|
4013
|
+
};
|
|
4014
|
+
this.completedResults.set(call.invocationId, fallbackResult);
|
|
4015
|
+
events.push({ type: "gadget_result", result: fallbackResult });
|
|
4016
|
+
this.logger.info("Using fallback result for gadget with failed dependency", {
|
|
4017
|
+
gadgetName: call.gadgetName,
|
|
4018
|
+
invocationId: call.invocationId,
|
|
4019
|
+
failedDependency: failedDep
|
|
4020
|
+
});
|
|
4021
|
+
}
|
|
4022
|
+
return events;
|
|
4023
|
+
}
|
|
4024
|
+
/**
|
|
4025
|
+
* Process pending gadgets whose dependencies are now satisfied.
|
|
4026
|
+
* Executes ready gadgets in parallel and continues until no more can be triggered.
|
|
4027
|
+
*/
|
|
4028
|
+
async processPendingGadgets() {
|
|
4029
|
+
const events = [];
|
|
4030
|
+
let progress = true;
|
|
4031
|
+
while (progress && this.pendingGadgets.size > 0) {
|
|
4032
|
+
progress = false;
|
|
4033
|
+
const readyToExecute = [];
|
|
4034
|
+
const readyToSkip = [];
|
|
4035
|
+
for (const [invocationId, call] of this.pendingGadgets) {
|
|
4036
|
+
const failedDep = call.dependencies.find((dep) => this.failedInvocations.has(dep));
|
|
4037
|
+
if (failedDep) {
|
|
4038
|
+
readyToSkip.push({ call, failedDep });
|
|
4039
|
+
continue;
|
|
4040
|
+
}
|
|
4041
|
+
const allSatisfied = call.dependencies.every((dep) => this.completedResults.has(dep));
|
|
4042
|
+
if (allSatisfied) {
|
|
4043
|
+
readyToExecute.push(call);
|
|
4044
|
+
}
|
|
4045
|
+
}
|
|
4046
|
+
for (const { call, failedDep } of readyToSkip) {
|
|
4047
|
+
this.pendingGadgets.delete(call.invocationId);
|
|
4048
|
+
const skipEvents = await this.handleFailedDependency(call, failedDep);
|
|
4049
|
+
events.push(...skipEvents);
|
|
4050
|
+
progress = true;
|
|
4051
|
+
}
|
|
4052
|
+
if (readyToExecute.length > 0) {
|
|
4053
|
+
this.logger.debug("Executing ready gadgets in parallel", {
|
|
4054
|
+
count: readyToExecute.length,
|
|
4055
|
+
invocationIds: readyToExecute.map((c) => c.invocationId)
|
|
4056
|
+
});
|
|
4057
|
+
for (const call of readyToExecute) {
|
|
4058
|
+
this.pendingGadgets.delete(call.invocationId);
|
|
4059
|
+
}
|
|
4060
|
+
const executePromises = readyToExecute.map((call) => this.executeGadgetWithHooks(call));
|
|
4061
|
+
const results = await Promise.all(executePromises);
|
|
4062
|
+
for (const executeEvents of results) {
|
|
4063
|
+
events.push(...executeEvents);
|
|
4064
|
+
}
|
|
4065
|
+
progress = true;
|
|
4066
|
+
}
|
|
4067
|
+
}
|
|
4068
|
+
if (this.pendingGadgets.size > 0) {
|
|
4069
|
+
const pendingIds = new Set(this.pendingGadgets.keys());
|
|
4070
|
+
for (const [invocationId, call] of this.pendingGadgets) {
|
|
4071
|
+
const missingDeps = call.dependencies.filter((dep) => !this.completedResults.has(dep));
|
|
4072
|
+
const circularDeps = missingDeps.filter((dep) => pendingIds.has(dep));
|
|
4073
|
+
const trulyMissingDeps = missingDeps.filter((dep) => !pendingIds.has(dep));
|
|
4074
|
+
let errorMessage;
|
|
4075
|
+
let logLevel = "warn";
|
|
4076
|
+
if (circularDeps.length > 0 && trulyMissingDeps.length > 0) {
|
|
4077
|
+
errorMessage = `Dependencies unresolvable: circular=[${circularDeps.join(", ")}], missing=[${trulyMissingDeps.join(", ")}]`;
|
|
4078
|
+
logLevel = "error";
|
|
4079
|
+
} else if (circularDeps.length > 0) {
|
|
4080
|
+
errorMessage = `Circular dependency detected: "${invocationId}" depends on "${circularDeps[0]}" which also depends on "${invocationId}" (directly or indirectly)`;
|
|
4081
|
+
} else {
|
|
4082
|
+
errorMessage = `Dependency "${missingDeps[0]}" was never executed - check that the invocation ID exists and is spelled correctly`;
|
|
4083
|
+
}
|
|
4084
|
+
this.logger[logLevel]("Gadget has unresolvable dependencies", {
|
|
4085
|
+
gadgetName: call.gadgetName,
|
|
4086
|
+
invocationId,
|
|
4087
|
+
circularDependencies: circularDeps,
|
|
4088
|
+
missingDependencies: trulyMissingDeps
|
|
4089
|
+
});
|
|
4090
|
+
this.failedInvocations.add(invocationId);
|
|
4091
|
+
const skipEvent = {
|
|
4092
|
+
type: "gadget_skipped",
|
|
4093
|
+
gadgetName: call.gadgetName,
|
|
4094
|
+
invocationId,
|
|
4095
|
+
parameters: call.parameters ?? {},
|
|
4096
|
+
failedDependency: missingDeps[0],
|
|
4097
|
+
failedDependencyError: errorMessage
|
|
4098
|
+
};
|
|
4099
|
+
events.push(skipEvent);
|
|
4100
|
+
}
|
|
4101
|
+
this.pendingGadgets.clear();
|
|
4102
|
+
}
|
|
4103
|
+
return events;
|
|
4104
|
+
}
|
|
3584
4105
|
/**
|
|
3585
4106
|
* Safely execute an observer, catching and logging any errors.
|
|
3586
4107
|
* Observers are non-critical, so errors are logged but don't crash the system.
|
|
@@ -4018,9 +4539,9 @@ var init_agent = __esm({
|
|
|
4018
4539
|
if (msg.role === "user") {
|
|
4019
4540
|
this.conversation.addUserMessage(msg.content);
|
|
4020
4541
|
} else if (msg.role === "assistant") {
|
|
4021
|
-
this.conversation.addAssistantMessage(msg.content);
|
|
4542
|
+
this.conversation.addAssistantMessage(extractText(msg.content));
|
|
4022
4543
|
} else if (msg.role === "system") {
|
|
4023
|
-
this.conversation.addUserMessage(`[System] ${msg.content}`);
|
|
4544
|
+
this.conversation.addUserMessage(`[System] ${extractText(msg.content)}`);
|
|
4024
4545
|
}
|
|
4025
4546
|
}
|
|
4026
4547
|
}
|
|
@@ -4599,6 +5120,7 @@ var init_anthropic = __esm({
|
|
|
4599
5120
|
"src/providers/anthropic.ts"() {
|
|
4600
5121
|
"use strict";
|
|
4601
5122
|
import_sdk = __toESM(require("@anthropic-ai/sdk"), 1);
|
|
5123
|
+
init_messages();
|
|
4602
5124
|
init_anthropic_models();
|
|
4603
5125
|
init_base_provider();
|
|
4604
5126
|
init_constants2();
|
|
@@ -4637,7 +5159,7 @@ var init_anthropic = __esm({
|
|
|
4637
5159
|
const systemMessages = messages.filter((message) => message.role === "system");
|
|
4638
5160
|
const system = systemMessages.length > 0 ? systemMessages.map((m, index) => ({
|
|
4639
5161
|
type: "text",
|
|
4640
|
-
text: m.content,
|
|
5162
|
+
text: extractText(m.content),
|
|
4641
5163
|
// Add cache_control to the LAST system message block
|
|
4642
5164
|
...index === systemMessages.length - 1 ? { cache_control: { type: "ephemeral" } } : {}
|
|
4643
5165
|
})) : void 0;
|
|
@@ -4650,14 +5172,10 @@ var init_anthropic = __esm({
|
|
|
4650
5172
|
);
|
|
4651
5173
|
const conversation = nonSystemMessages.map((message, index) => ({
|
|
4652
5174
|
role: message.role,
|
|
4653
|
-
content:
|
|
4654
|
-
|
|
4655
|
-
|
|
4656
|
-
|
|
4657
|
-
// Add cache_control to the LAST user message
|
|
4658
|
-
...message.role === "user" && index === lastUserIndex ? { cache_control: { type: "ephemeral" } } : {}
|
|
4659
|
-
}
|
|
4660
|
-
]
|
|
5175
|
+
content: this.convertToAnthropicContent(
|
|
5176
|
+
message.content,
|
|
5177
|
+
message.role === "user" && index === lastUserIndex
|
|
5178
|
+
)
|
|
4661
5179
|
}));
|
|
4662
5180
|
const defaultMaxTokens = spec?.maxOutputTokens ?? ANTHROPIC_DEFAULT_MAX_OUTPUT_TOKENS;
|
|
4663
5181
|
const payload = {
|
|
@@ -4673,6 +5191,52 @@ var init_anthropic = __esm({
|
|
|
4673
5191
|
};
|
|
4674
5192
|
return payload;
|
|
4675
5193
|
}
|
|
5194
|
+
/**
|
|
5195
|
+
* Convert llmist content to Anthropic's content block format.
|
|
5196
|
+
* Handles text, images (base64 only), and applies cache_control.
|
|
5197
|
+
*/
|
|
5198
|
+
convertToAnthropicContent(content, addCacheControl) {
|
|
5199
|
+
const parts = normalizeContent(content);
|
|
5200
|
+
return parts.map((part, index) => {
|
|
5201
|
+
const isLastPart = index === parts.length - 1;
|
|
5202
|
+
const cacheControl = addCacheControl && isLastPart ? { cache_control: { type: "ephemeral" } } : {};
|
|
5203
|
+
if (part.type === "text") {
|
|
5204
|
+
return {
|
|
5205
|
+
type: "text",
|
|
5206
|
+
text: part.text,
|
|
5207
|
+
...cacheControl
|
|
5208
|
+
};
|
|
5209
|
+
}
|
|
5210
|
+
if (part.type === "image") {
|
|
5211
|
+
return this.convertImagePart(part, cacheControl);
|
|
5212
|
+
}
|
|
5213
|
+
if (part.type === "audio") {
|
|
5214
|
+
throw new Error(
|
|
5215
|
+
"Anthropic does not support audio input. Use Google Gemini for audio processing."
|
|
5216
|
+
);
|
|
5217
|
+
}
|
|
5218
|
+
throw new Error(`Unsupported content type: ${part.type}`);
|
|
5219
|
+
});
|
|
5220
|
+
}
|
|
5221
|
+
/**
|
|
5222
|
+
* Convert an image content part to Anthropic's image block format.
|
|
5223
|
+
*/
|
|
5224
|
+
convertImagePart(part, cacheControl) {
|
|
5225
|
+
if (part.source.type === "url") {
|
|
5226
|
+
throw new Error(
|
|
5227
|
+
"Anthropic does not support image URLs. Please provide base64-encoded image data instead."
|
|
5228
|
+
);
|
|
5229
|
+
}
|
|
5230
|
+
return {
|
|
5231
|
+
type: "image",
|
|
5232
|
+
source: {
|
|
5233
|
+
type: "base64",
|
|
5234
|
+
media_type: part.source.mediaType,
|
|
5235
|
+
data: part.source.data
|
|
5236
|
+
},
|
|
5237
|
+
...cacheControl
|
|
5238
|
+
};
|
|
5239
|
+
}
|
|
4676
5240
|
async executeStreamRequest(payload, signal) {
|
|
4677
5241
|
const client = this.client;
|
|
4678
5242
|
const stream2 = await client.messages.create(payload, signal ? { signal } : void 0);
|
|
@@ -4755,17 +5319,12 @@ var init_anthropic = __esm({
|
|
|
4755
5319
|
async countTokens(messages, descriptor, _spec) {
|
|
4756
5320
|
const client = this.client;
|
|
4757
5321
|
const systemMessages = messages.filter((message) => message.role === "system");
|
|
4758
|
-
const system = systemMessages.length > 0 ? systemMessages.map((m) => m.content).join("\n\n") : void 0;
|
|
5322
|
+
const system = systemMessages.length > 0 ? systemMessages.map((m) => extractText(m.content)).join("\n\n") : void 0;
|
|
4759
5323
|
const conversation = messages.filter(
|
|
4760
5324
|
(message) => message.role !== "system"
|
|
4761
5325
|
).map((message) => ({
|
|
4762
5326
|
role: message.role,
|
|
4763
|
-
content:
|
|
4764
|
-
{
|
|
4765
|
-
type: "text",
|
|
4766
|
-
text: message.content
|
|
4767
|
-
}
|
|
4768
|
-
]
|
|
5327
|
+
content: this.convertToAnthropicContent(message.content, false)
|
|
4769
5328
|
}));
|
|
4770
5329
|
try {
|
|
4771
5330
|
const response = await client.messages.countTokens({
|
|
@@ -4779,8 +5338,19 @@ var init_anthropic = __esm({
|
|
|
4779
5338
|
`Token counting failed for ${descriptor.name}, using fallback estimation:`,
|
|
4780
5339
|
error
|
|
4781
5340
|
);
|
|
4782
|
-
|
|
4783
|
-
|
|
5341
|
+
let totalChars = 0;
|
|
5342
|
+
let imageCount = 0;
|
|
5343
|
+
for (const msg of messages) {
|
|
5344
|
+
const parts = normalizeContent(msg.content);
|
|
5345
|
+
for (const part of parts) {
|
|
5346
|
+
if (part.type === "text") {
|
|
5347
|
+
totalChars += part.text.length;
|
|
5348
|
+
} else if (part.type === "image") {
|
|
5349
|
+
imageCount++;
|
|
5350
|
+
}
|
|
5351
|
+
}
|
|
5352
|
+
}
|
|
5353
|
+
return Math.ceil(totalChars / FALLBACK_CHARS_PER_TOKEN) + imageCount * 1e3;
|
|
4784
5354
|
}
|
|
4785
5355
|
}
|
|
4786
5356
|
};
|
|
@@ -5309,6 +5879,7 @@ var init_gemini = __esm({
|
|
|
5309
5879
|
"src/providers/gemini.ts"() {
|
|
5310
5880
|
"use strict";
|
|
5311
5881
|
import_genai = require("@google/genai");
|
|
5882
|
+
init_messages();
|
|
5312
5883
|
init_base_provider();
|
|
5313
5884
|
init_constants2();
|
|
5314
5885
|
init_gemini_image_models();
|
|
@@ -5478,7 +6049,7 @@ var init_gemini = __esm({
|
|
|
5478
6049
|
};
|
|
5479
6050
|
return {
|
|
5480
6051
|
model: descriptor.name,
|
|
5481
|
-
contents
|
|
6052
|
+
contents,
|
|
5482
6053
|
config
|
|
5483
6054
|
};
|
|
5484
6055
|
}
|
|
@@ -5513,18 +6084,25 @@ var init_gemini = __esm({
|
|
|
5513
6084
|
if (message.role === "system") {
|
|
5514
6085
|
expandedMessages.push({
|
|
5515
6086
|
role: "user",
|
|
5516
|
-
content: message.content
|
|
6087
|
+
content: extractText(message.content)
|
|
5517
6088
|
});
|
|
5518
6089
|
expandedMessages.push({
|
|
5519
6090
|
role: "assistant",
|
|
5520
6091
|
content: "Understood."
|
|
5521
6092
|
});
|
|
5522
6093
|
} else {
|
|
5523
|
-
expandedMessages.push(
|
|
6094
|
+
expandedMessages.push({
|
|
6095
|
+
role: message.role,
|
|
6096
|
+
content: message.content
|
|
6097
|
+
});
|
|
5524
6098
|
}
|
|
5525
6099
|
}
|
|
5526
6100
|
return this.mergeConsecutiveMessages(expandedMessages);
|
|
5527
6101
|
}
|
|
6102
|
+
/**
|
|
6103
|
+
* Merge consecutive messages with the same role (required by Gemini).
|
|
6104
|
+
* Handles multimodal content by converting to Gemini's part format.
|
|
6105
|
+
*/
|
|
5528
6106
|
mergeConsecutiveMessages(messages) {
|
|
5529
6107
|
if (messages.length === 0) {
|
|
5530
6108
|
return [];
|
|
@@ -5533,15 +6111,16 @@ var init_gemini = __esm({
|
|
|
5533
6111
|
let currentGroup = null;
|
|
5534
6112
|
for (const message of messages) {
|
|
5535
6113
|
const geminiRole = GEMINI_ROLE_MAP[message.role];
|
|
6114
|
+
const geminiParts = this.convertToGeminiParts(message.content);
|
|
5536
6115
|
if (currentGroup && currentGroup.role === geminiRole) {
|
|
5537
|
-
currentGroup.parts.push(
|
|
6116
|
+
currentGroup.parts.push(...geminiParts);
|
|
5538
6117
|
} else {
|
|
5539
6118
|
if (currentGroup) {
|
|
5540
6119
|
result.push(currentGroup);
|
|
5541
6120
|
}
|
|
5542
6121
|
currentGroup = {
|
|
5543
6122
|
role: geminiRole,
|
|
5544
|
-
parts:
|
|
6123
|
+
parts: geminiParts
|
|
5545
6124
|
};
|
|
5546
6125
|
}
|
|
5547
6126
|
}
|
|
@@ -5550,11 +6129,39 @@ var init_gemini = __esm({
|
|
|
5550
6129
|
}
|
|
5551
6130
|
return result;
|
|
5552
6131
|
}
|
|
5553
|
-
|
|
5554
|
-
|
|
5555
|
-
|
|
5556
|
-
|
|
5557
|
-
|
|
6132
|
+
/**
|
|
6133
|
+
* Convert llmist content to Gemini's part format.
|
|
6134
|
+
* Handles text, images, and audio (Gemini supports all three).
|
|
6135
|
+
*/
|
|
6136
|
+
convertToGeminiParts(content) {
|
|
6137
|
+
const parts = normalizeContent(content);
|
|
6138
|
+
return parts.map((part) => {
|
|
6139
|
+
if (part.type === "text") {
|
|
6140
|
+
return { text: part.text };
|
|
6141
|
+
}
|
|
6142
|
+
if (part.type === "image") {
|
|
6143
|
+
if (part.source.type === "url") {
|
|
6144
|
+
throw new Error(
|
|
6145
|
+
"Gemini does not support image URLs directly. Please provide base64-encoded image data."
|
|
6146
|
+
);
|
|
6147
|
+
}
|
|
6148
|
+
return {
|
|
6149
|
+
inlineData: {
|
|
6150
|
+
mimeType: part.source.mediaType,
|
|
6151
|
+
data: part.source.data
|
|
6152
|
+
}
|
|
6153
|
+
};
|
|
6154
|
+
}
|
|
6155
|
+
if (part.type === "audio") {
|
|
6156
|
+
return {
|
|
6157
|
+
inlineData: {
|
|
6158
|
+
mimeType: part.source.mediaType,
|
|
6159
|
+
data: part.source.data
|
|
6160
|
+
}
|
|
6161
|
+
};
|
|
6162
|
+
}
|
|
6163
|
+
throw new Error(`Unsupported content type: ${part.type}`);
|
|
6164
|
+
});
|
|
5558
6165
|
}
|
|
5559
6166
|
buildGenerationConfig(options) {
|
|
5560
6167
|
const config = {};
|
|
@@ -5575,9 +6182,9 @@ var init_gemini = __esm({
|
|
|
5575
6182
|
async *wrapStream(iterable) {
|
|
5576
6183
|
const stream2 = iterable;
|
|
5577
6184
|
for await (const chunk of stream2) {
|
|
5578
|
-
const
|
|
5579
|
-
if (
|
|
5580
|
-
yield { text, rawEvent: chunk };
|
|
6185
|
+
const text3 = this.extractText(chunk);
|
|
6186
|
+
if (text3) {
|
|
6187
|
+
yield { text: text3, rawEvent: chunk };
|
|
5581
6188
|
}
|
|
5582
6189
|
const finishReason = this.extractFinishReason(chunk);
|
|
5583
6190
|
const usage = this.extractUsage(chunk);
|
|
@@ -5638,7 +6245,7 @@ var init_gemini = __esm({
|
|
|
5638
6245
|
try {
|
|
5639
6246
|
const response = await client.models.countTokens({
|
|
5640
6247
|
model: descriptor.name,
|
|
5641
|
-
contents
|
|
6248
|
+
contents
|
|
5642
6249
|
// Note: systemInstruction not used - it's not supported by countTokens()
|
|
5643
6250
|
// and would cause a 2100% token counting error
|
|
5644
6251
|
});
|
|
@@ -5648,8 +6255,19 @@ var init_gemini = __esm({
|
|
|
5648
6255
|
`Token counting failed for ${descriptor.name}, using fallback estimation:`,
|
|
5649
6256
|
error
|
|
5650
6257
|
);
|
|
5651
|
-
|
|
5652
|
-
|
|
6258
|
+
let totalChars = 0;
|
|
6259
|
+
let mediaCount = 0;
|
|
6260
|
+
for (const msg of messages) {
|
|
6261
|
+
const parts = normalizeContent(msg.content);
|
|
6262
|
+
for (const part of parts) {
|
|
6263
|
+
if (part.type === "text") {
|
|
6264
|
+
totalChars += part.text.length;
|
|
6265
|
+
} else if (part.type === "image" || part.type === "audio") {
|
|
6266
|
+
mediaCount++;
|
|
6267
|
+
}
|
|
6268
|
+
}
|
|
6269
|
+
}
|
|
6270
|
+
return Math.ceil(totalChars / FALLBACK_CHARS_PER_TOKEN) + mediaCount * 258;
|
|
5653
6271
|
}
|
|
5654
6272
|
}
|
|
5655
6273
|
};
|
|
@@ -6292,6 +6910,7 @@ var init_openai = __esm({
|
|
|
6292
6910
|
"use strict";
|
|
6293
6911
|
import_openai = __toESM(require("openai"), 1);
|
|
6294
6912
|
import_tiktoken = require("tiktoken");
|
|
6913
|
+
init_messages();
|
|
6295
6914
|
init_base_provider();
|
|
6296
6915
|
init_constants2();
|
|
6297
6916
|
init_openai_image_models();
|
|
@@ -6399,11 +7018,7 @@ var init_openai = __esm({
|
|
|
6399
7018
|
const sanitizedExtra = sanitizeExtra(extra, shouldIncludeTemperature);
|
|
6400
7019
|
return {
|
|
6401
7020
|
model: descriptor.name,
|
|
6402
|
-
messages: messages.map((message) => (
|
|
6403
|
-
role: ROLE_MAP[message.role],
|
|
6404
|
-
content: message.content,
|
|
6405
|
-
name: message.name
|
|
6406
|
-
})),
|
|
7021
|
+
messages: messages.map((message) => this.convertToOpenAIMessage(message)),
|
|
6407
7022
|
// Only set max_completion_tokens if explicitly provided
|
|
6408
7023
|
// Otherwise let the API use "as much as fits" in the context window
|
|
6409
7024
|
...maxTokens !== void 0 ? { max_completion_tokens: maxTokens } : {},
|
|
@@ -6415,6 +7030,77 @@ var init_openai = __esm({
|
|
|
6415
7030
|
...shouldIncludeTemperature ? { temperature } : {}
|
|
6416
7031
|
};
|
|
6417
7032
|
}
|
|
7033
|
+
/**
|
|
7034
|
+
* Convert an LLMMessage to OpenAI's ChatCompletionMessageParam.
|
|
7035
|
+
* Handles role-specific content type requirements:
|
|
7036
|
+
* - system/assistant: string content only
|
|
7037
|
+
* - user: string or multimodal array content
|
|
7038
|
+
*/
|
|
7039
|
+
convertToOpenAIMessage(message) {
|
|
7040
|
+
const role = ROLE_MAP[message.role];
|
|
7041
|
+
if (role === "user") {
|
|
7042
|
+
const content = this.convertToOpenAIContent(message.content);
|
|
7043
|
+
return {
|
|
7044
|
+
role: "user",
|
|
7045
|
+
content,
|
|
7046
|
+
...message.name ? { name: message.name } : {}
|
|
7047
|
+
};
|
|
7048
|
+
}
|
|
7049
|
+
const textContent = typeof message.content === "string" ? message.content : extractText(message.content);
|
|
7050
|
+
if (role === "system") {
|
|
7051
|
+
return {
|
|
7052
|
+
role: "system",
|
|
7053
|
+
content: textContent,
|
|
7054
|
+
...message.name ? { name: message.name } : {}
|
|
7055
|
+
};
|
|
7056
|
+
}
|
|
7057
|
+
return {
|
|
7058
|
+
role: "assistant",
|
|
7059
|
+
content: textContent,
|
|
7060
|
+
...message.name ? { name: message.name } : {}
|
|
7061
|
+
};
|
|
7062
|
+
}
|
|
7063
|
+
/**
|
|
7064
|
+
* Convert llmist content to OpenAI's content format.
|
|
7065
|
+
* Optimizes by returning string for text-only content, array for multimodal.
|
|
7066
|
+
*/
|
|
7067
|
+
convertToOpenAIContent(content) {
|
|
7068
|
+
if (typeof content === "string") {
|
|
7069
|
+
return content;
|
|
7070
|
+
}
|
|
7071
|
+
return content.map((part) => {
|
|
7072
|
+
if (part.type === "text") {
|
|
7073
|
+
return { type: "text", text: part.text };
|
|
7074
|
+
}
|
|
7075
|
+
if (part.type === "image") {
|
|
7076
|
+
return this.convertImagePart(part);
|
|
7077
|
+
}
|
|
7078
|
+
if (part.type === "audio") {
|
|
7079
|
+
throw new Error(
|
|
7080
|
+
"OpenAI chat completions do not support audio input. Use Whisper for transcription or Gemini for audio understanding."
|
|
7081
|
+
);
|
|
7082
|
+
}
|
|
7083
|
+
throw new Error(`Unsupported content type: ${part.type}`);
|
|
7084
|
+
});
|
|
7085
|
+
}
|
|
7086
|
+
/**
|
|
7087
|
+
* Convert an image content part to OpenAI's image_url format.
|
|
7088
|
+
* Supports both URLs and base64 data URLs.
|
|
7089
|
+
*/
|
|
7090
|
+
convertImagePart(part) {
|
|
7091
|
+
if (part.source.type === "url") {
|
|
7092
|
+
return {
|
|
7093
|
+
type: "image_url",
|
|
7094
|
+
image_url: { url: part.source.url }
|
|
7095
|
+
};
|
|
7096
|
+
}
|
|
7097
|
+
return {
|
|
7098
|
+
type: "image_url",
|
|
7099
|
+
image_url: {
|
|
7100
|
+
url: `data:${part.source.mediaType};base64,${part.source.data}`
|
|
7101
|
+
}
|
|
7102
|
+
};
|
|
7103
|
+
}
|
|
6418
7104
|
async executeStreamRequest(payload, signal) {
|
|
6419
7105
|
const client = this.client;
|
|
6420
7106
|
const stream2 = await client.chat.completions.create(payload, signal ? { signal } : void 0);
|
|
@@ -6423,9 +7109,9 @@ var init_openai = __esm({
|
|
|
6423
7109
|
async *wrapStream(iterable) {
|
|
6424
7110
|
const stream2 = iterable;
|
|
6425
7111
|
for await (const chunk of stream2) {
|
|
6426
|
-
const
|
|
6427
|
-
if (
|
|
6428
|
-
yield { text, rawEvent: chunk };
|
|
7112
|
+
const text3 = chunk.choices.map((choice) => choice.delta?.content ?? "").join("");
|
|
7113
|
+
if (text3) {
|
|
7114
|
+
yield { text: text3, rawEvent: chunk };
|
|
6429
7115
|
}
|
|
6430
7116
|
const finishReason = chunk.choices.find((choice) => choice.finish_reason)?.finish_reason;
|
|
6431
7117
|
const usage = chunk.usage ? {
|
|
@@ -6473,17 +7159,26 @@ var init_openai = __esm({
|
|
|
6473
7159
|
}
|
|
6474
7160
|
try {
|
|
6475
7161
|
let tokenCount = 0;
|
|
7162
|
+
let imageCount = 0;
|
|
6476
7163
|
for (const message of messages) {
|
|
6477
7164
|
tokenCount += OPENAI_MESSAGE_OVERHEAD_TOKENS;
|
|
6478
7165
|
const roleText = ROLE_MAP[message.role];
|
|
6479
7166
|
tokenCount += encoding.encode(roleText).length;
|
|
6480
|
-
|
|
7167
|
+
const textContent = extractText(message.content);
|
|
7168
|
+
tokenCount += encoding.encode(textContent).length;
|
|
7169
|
+
const parts = normalizeContent(message.content);
|
|
7170
|
+
for (const part of parts) {
|
|
7171
|
+
if (part.type === "image") {
|
|
7172
|
+
imageCount++;
|
|
7173
|
+
}
|
|
7174
|
+
}
|
|
6481
7175
|
if (message.name) {
|
|
6482
7176
|
tokenCount += encoding.encode(message.name).length;
|
|
6483
7177
|
tokenCount += OPENAI_NAME_FIELD_OVERHEAD_TOKENS;
|
|
6484
7178
|
}
|
|
6485
7179
|
}
|
|
6486
7180
|
tokenCount += OPENAI_REPLY_PRIMING_TOKENS;
|
|
7181
|
+
tokenCount += imageCount * 765;
|
|
6487
7182
|
return tokenCount;
|
|
6488
7183
|
} finally {
|
|
6489
7184
|
encoding.free();
|
|
@@ -6493,8 +7188,19 @@ var init_openai = __esm({
|
|
|
6493
7188
|
`Token counting failed for ${descriptor.name}, using fallback estimation:`,
|
|
6494
7189
|
error
|
|
6495
7190
|
);
|
|
6496
|
-
|
|
6497
|
-
|
|
7191
|
+
let totalChars = 0;
|
|
7192
|
+
let imageCount = 0;
|
|
7193
|
+
for (const msg of messages) {
|
|
7194
|
+
const parts = normalizeContent(msg.content);
|
|
7195
|
+
for (const part of parts) {
|
|
7196
|
+
if (part.type === "text") {
|
|
7197
|
+
totalChars += part.text.length;
|
|
7198
|
+
} else if (part.type === "image") {
|
|
7199
|
+
imageCount++;
|
|
7200
|
+
}
|
|
7201
|
+
}
|
|
7202
|
+
}
|
|
7203
|
+
return Math.ceil(totalChars / FALLBACK_CHARS_PER_TOKEN) + imageCount * 765;
|
|
6498
7204
|
}
|
|
6499
7205
|
}
|
|
6500
7206
|
};
|
|
@@ -6917,6 +7623,138 @@ var init_text = __esm({
|
|
|
6917
7623
|
}
|
|
6918
7624
|
});
|
|
6919
7625
|
|
|
7626
|
+
// src/core/namespaces/vision.ts
|
|
7627
|
+
var VisionNamespace;
|
|
7628
|
+
var init_vision = __esm({
|
|
7629
|
+
"src/core/namespaces/vision.ts"() {
|
|
7630
|
+
"use strict";
|
|
7631
|
+
init_input_content();
|
|
7632
|
+
init_messages();
|
|
7633
|
+
VisionNamespace = class {
|
|
7634
|
+
constructor(client) {
|
|
7635
|
+
this.client = client;
|
|
7636
|
+
}
|
|
7637
|
+
/**
|
|
7638
|
+
* Build a message builder with the image content attached.
|
|
7639
|
+
* Handles URLs, data URLs, base64 strings, and binary buffers.
|
|
7640
|
+
*/
|
|
7641
|
+
buildImageMessage(options) {
|
|
7642
|
+
const builder = new LLMMessageBuilder();
|
|
7643
|
+
if (options.systemPrompt) {
|
|
7644
|
+
builder.addSystem(options.systemPrompt);
|
|
7645
|
+
}
|
|
7646
|
+
if (typeof options.image === "string") {
|
|
7647
|
+
if (options.image.startsWith("http://") || options.image.startsWith("https://")) {
|
|
7648
|
+
builder.addUserWithImageUrl(options.prompt, options.image);
|
|
7649
|
+
} else if (isDataUrl(options.image)) {
|
|
7650
|
+
const parsed = parseDataUrl(options.image);
|
|
7651
|
+
if (!parsed) {
|
|
7652
|
+
throw new Error("Invalid data URL format");
|
|
7653
|
+
}
|
|
7654
|
+
builder.addUserWithImage(
|
|
7655
|
+
options.prompt,
|
|
7656
|
+
parsed.data,
|
|
7657
|
+
parsed.mimeType
|
|
7658
|
+
);
|
|
7659
|
+
} else {
|
|
7660
|
+
const buffer = Buffer.from(options.image, "base64");
|
|
7661
|
+
builder.addUserWithImage(options.prompt, buffer, options.mimeType);
|
|
7662
|
+
}
|
|
7663
|
+
} else {
|
|
7664
|
+
builder.addUserWithImage(options.prompt, options.image, options.mimeType);
|
|
7665
|
+
}
|
|
7666
|
+
return builder;
|
|
7667
|
+
}
|
|
7668
|
+
/**
|
|
7669
|
+
* Stream the response and collect text and usage information.
|
|
7670
|
+
*/
|
|
7671
|
+
async streamAndCollect(options, builder) {
|
|
7672
|
+
let response = "";
|
|
7673
|
+
let finalUsage;
|
|
7674
|
+
for await (const chunk of this.client.stream({
|
|
7675
|
+
model: options.model,
|
|
7676
|
+
messages: builder.build(),
|
|
7677
|
+
maxTokens: options.maxTokens,
|
|
7678
|
+
temperature: options.temperature
|
|
7679
|
+
})) {
|
|
7680
|
+
response += chunk.text;
|
|
7681
|
+
if (chunk.usage) {
|
|
7682
|
+
finalUsage = {
|
|
7683
|
+
inputTokens: chunk.usage.inputTokens,
|
|
7684
|
+
outputTokens: chunk.usage.outputTokens,
|
|
7685
|
+
totalTokens: chunk.usage.totalTokens
|
|
7686
|
+
};
|
|
7687
|
+
}
|
|
7688
|
+
}
|
|
7689
|
+
return { text: response.trim(), usage: finalUsage };
|
|
7690
|
+
}
|
|
7691
|
+
/**
|
|
7692
|
+
* Analyze an image with a vision-capable model.
|
|
7693
|
+
* Returns the analysis as a string.
|
|
7694
|
+
*
|
|
7695
|
+
* @param options - Vision analysis options
|
|
7696
|
+
* @returns Promise resolving to the analysis text
|
|
7697
|
+
* @throws Error if the image format is unsupported or model doesn't support vision
|
|
7698
|
+
*
|
|
7699
|
+
* @example
|
|
7700
|
+
* ```typescript
|
|
7701
|
+
* // From file
|
|
7702
|
+
* const result = await llmist.vision.analyze({
|
|
7703
|
+
* model: "gpt-4o",
|
|
7704
|
+
* image: await fs.readFile("photo.jpg"),
|
|
7705
|
+
* prompt: "What's in this image?",
|
|
7706
|
+
* });
|
|
7707
|
+
*
|
|
7708
|
+
* // From URL (OpenAI only)
|
|
7709
|
+
* const result = await llmist.vision.analyze({
|
|
7710
|
+
* model: "gpt-4o",
|
|
7711
|
+
* image: "https://example.com/image.jpg",
|
|
7712
|
+
* prompt: "Describe this image",
|
|
7713
|
+
* });
|
|
7714
|
+
* ```
|
|
7715
|
+
*/
|
|
7716
|
+
async analyze(options) {
|
|
7717
|
+
const builder = this.buildImageMessage(options);
|
|
7718
|
+
const { text: text3 } = await this.streamAndCollect(options, builder);
|
|
7719
|
+
return text3;
|
|
7720
|
+
}
|
|
7721
|
+
/**
|
|
7722
|
+
* Analyze an image and return detailed result with usage info.
|
|
7723
|
+
*
|
|
7724
|
+
* @param options - Vision analysis options
|
|
7725
|
+
* @returns Promise resolving to the analysis result with usage info
|
|
7726
|
+
*/
|
|
7727
|
+
async analyzeWithUsage(options) {
|
|
7728
|
+
const builder = this.buildImageMessage(options);
|
|
7729
|
+
const { text: text3, usage } = await this.streamAndCollect(options, builder);
|
|
7730
|
+
return {
|
|
7731
|
+
text: text3,
|
|
7732
|
+
model: options.model,
|
|
7733
|
+
usage
|
|
7734
|
+
};
|
|
7735
|
+
}
|
|
7736
|
+
/**
|
|
7737
|
+
* Check if a model supports vision/image input.
|
|
7738
|
+
*
|
|
7739
|
+
* @param modelId - Model ID to check
|
|
7740
|
+
* @returns True if the model supports vision
|
|
7741
|
+
*/
|
|
7742
|
+
supportsModel(modelId) {
|
|
7743
|
+
const spec = this.client.modelRegistry.getModelSpec(modelId);
|
|
7744
|
+
return spec?.features?.vision === true;
|
|
7745
|
+
}
|
|
7746
|
+
/**
|
|
7747
|
+
* List all models that support vision.
|
|
7748
|
+
*
|
|
7749
|
+
* @returns Array of model IDs that support vision
|
|
7750
|
+
*/
|
|
7751
|
+
listModels() {
|
|
7752
|
+
return this.client.modelRegistry.listModels().filter((spec) => spec.features?.vision === true).map((spec) => spec.modelId);
|
|
7753
|
+
}
|
|
7754
|
+
};
|
|
7755
|
+
}
|
|
7756
|
+
});
|
|
7757
|
+
|
|
6920
7758
|
// src/core/options.ts
|
|
6921
7759
|
var ModelIdentifierParser;
|
|
6922
7760
|
var init_options = __esm({
|
|
@@ -6961,6 +7799,7 @@ var init_client = __esm({
|
|
|
6961
7799
|
init_image();
|
|
6962
7800
|
init_speech();
|
|
6963
7801
|
init_text();
|
|
7802
|
+
init_vision();
|
|
6964
7803
|
init_options();
|
|
6965
7804
|
init_quick_methods();
|
|
6966
7805
|
LLMist = class _LLMist {
|
|
@@ -6972,6 +7811,7 @@ var init_client = __esm({
|
|
|
6972
7811
|
text;
|
|
6973
7812
|
image;
|
|
6974
7813
|
speech;
|
|
7814
|
+
vision;
|
|
6975
7815
|
constructor(...args) {
|
|
6976
7816
|
let adapters = [];
|
|
6977
7817
|
let defaultProvider;
|
|
@@ -7022,6 +7862,7 @@ var init_client = __esm({
|
|
|
7022
7862
|
this.text = new TextNamespace(this);
|
|
7023
7863
|
this.image = new ImageNamespace(this.adapters, this.defaultProvider);
|
|
7024
7864
|
this.speech = new SpeechNamespace(this.adapters, this.defaultProvider);
|
|
7865
|
+
this.vision = new VisionNamespace(this);
|
|
7025
7866
|
}
|
|
7026
7867
|
stream(options) {
|
|
7027
7868
|
const descriptor = this.parser.parse(options.model);
|
|
@@ -7206,6 +8047,7 @@ var init_builder = __esm({
|
|
|
7206
8047
|
"src/agent/builder.ts"() {
|
|
7207
8048
|
"use strict";
|
|
7208
8049
|
init_constants();
|
|
8050
|
+
init_input_content();
|
|
7209
8051
|
init_model_shortcuts();
|
|
7210
8052
|
init_registry();
|
|
7211
8053
|
init_agent();
|
|
@@ -7853,13 +8695,17 @@ ${endPrefix}`
|
|
|
7853
8695
|
* }
|
|
7854
8696
|
* ```
|
|
7855
8697
|
*/
|
|
7856
|
-
|
|
8698
|
+
/**
|
|
8699
|
+
* Build AgentOptions with the given user prompt.
|
|
8700
|
+
* Centralizes options construction for ask(), askWithImage(), and askWithContent().
|
|
8701
|
+
*/
|
|
8702
|
+
buildAgentOptions(userPrompt) {
|
|
7857
8703
|
if (!this.client) {
|
|
7858
8704
|
const { LLMist: LLMistClass } = (init_client(), __toCommonJS(client_exports));
|
|
7859
8705
|
this.client = new LLMistClass();
|
|
7860
8706
|
}
|
|
7861
8707
|
const registry = GadgetRegistry.from(this.gadgets);
|
|
7862
|
-
|
|
8708
|
+
return {
|
|
7863
8709
|
client: this.client,
|
|
7864
8710
|
model: this.model ?? "openai:gpt-5-nano",
|
|
7865
8711
|
systemPrompt: this.systemPrompt,
|
|
@@ -7885,6 +8731,83 @@ ${endPrefix}`
|
|
|
7885
8731
|
compactionConfig: this.compactionConfig,
|
|
7886
8732
|
signal: this.signal
|
|
7887
8733
|
};
|
|
8734
|
+
}
|
|
8735
|
+
ask(userPrompt) {
|
|
8736
|
+
const options = this.buildAgentOptions(userPrompt);
|
|
8737
|
+
return new Agent(AGENT_INTERNAL_KEY, options);
|
|
8738
|
+
}
|
|
8739
|
+
/**
|
|
8740
|
+
* Build and create the agent with a multimodal user prompt (text + image).
|
|
8741
|
+
* Returns the Agent instance ready to run.
|
|
8742
|
+
*
|
|
8743
|
+
* @param textPrompt - Text prompt describing what to do with the image
|
|
8744
|
+
* @param imageData - Image data (Buffer, Uint8Array, or base64 string)
|
|
8745
|
+
* @param mimeType - Optional MIME type (auto-detected if not provided)
|
|
8746
|
+
* @returns Configured Agent instance
|
|
8747
|
+
*
|
|
8748
|
+
* @example
|
|
8749
|
+
* ```typescript
|
|
8750
|
+
* const agent = LLMist.createAgent()
|
|
8751
|
+
* .withModel("gpt-4o")
|
|
8752
|
+
* .withSystem("You analyze images")
|
|
8753
|
+
* .askWithImage(
|
|
8754
|
+
* "What's in this image?",
|
|
8755
|
+
* await fs.readFile("photo.jpg")
|
|
8756
|
+
* );
|
|
8757
|
+
*
|
|
8758
|
+
* for await (const event of agent.run()) {
|
|
8759
|
+
* // handle events
|
|
8760
|
+
* }
|
|
8761
|
+
* ```
|
|
8762
|
+
*/
|
|
8763
|
+
askWithImage(textPrompt, imageData, mimeType) {
|
|
8764
|
+
const imageBuffer = typeof imageData === "string" ? Buffer.from(imageData, "base64") : imageData;
|
|
8765
|
+
const detectedMime = mimeType ?? detectImageMimeType(imageBuffer);
|
|
8766
|
+
if (!detectedMime) {
|
|
8767
|
+
throw new Error(
|
|
8768
|
+
"Could not detect image MIME type. Please provide the mimeType parameter explicitly."
|
|
8769
|
+
);
|
|
8770
|
+
}
|
|
8771
|
+
const userContent = [
|
|
8772
|
+
text(textPrompt),
|
|
8773
|
+
{
|
|
8774
|
+
type: "image",
|
|
8775
|
+
source: {
|
|
8776
|
+
type: "base64",
|
|
8777
|
+
mediaType: detectedMime,
|
|
8778
|
+
data: toBase64(imageBuffer)
|
|
8779
|
+
}
|
|
8780
|
+
}
|
|
8781
|
+
];
|
|
8782
|
+
const options = this.buildAgentOptions(userContent);
|
|
8783
|
+
return new Agent(AGENT_INTERNAL_KEY, options);
|
|
8784
|
+
}
|
|
8785
|
+
/**
|
|
8786
|
+
* Build and return an Agent configured with multimodal content.
|
|
8787
|
+
* More flexible than askWithImage - accepts any combination of content parts.
|
|
8788
|
+
*
|
|
8789
|
+
* @param content - Array of content parts (text, images, audio)
|
|
8790
|
+
* @returns A configured Agent ready for execution
|
|
8791
|
+
*
|
|
8792
|
+
* @example
|
|
8793
|
+
* ```typescript
|
|
8794
|
+
* import { text, imageFromBuffer, audioFromBuffer } from "llmist";
|
|
8795
|
+
*
|
|
8796
|
+
* const agent = LLMist.createAgent()
|
|
8797
|
+
* .withModel("gemini:gemini-2.5-flash")
|
|
8798
|
+
* .askWithContent([
|
|
8799
|
+
* text("Describe this image and transcribe the audio:"),
|
|
8800
|
+
* imageFromBuffer(imageData),
|
|
8801
|
+
* audioFromBuffer(audioData),
|
|
8802
|
+
* ]);
|
|
8803
|
+
*
|
|
8804
|
+
* for await (const event of agent.run()) {
|
|
8805
|
+
* // handle events
|
|
8806
|
+
* }
|
|
8807
|
+
* ```
|
|
8808
|
+
*/
|
|
8809
|
+
askWithContent(content) {
|
|
8810
|
+
const options = this.buildAgentOptions(content);
|
|
7888
8811
|
return new Agent(AGENT_INTERNAL_KEY, options);
|
|
7889
8812
|
}
|
|
7890
8813
|
/**
|
|
@@ -8004,7 +8927,8 @@ var COMMANDS = {
|
|
|
8004
8927
|
models: "models",
|
|
8005
8928
|
gadget: "gadget",
|
|
8006
8929
|
image: "image",
|
|
8007
|
-
speech: "speech"
|
|
8930
|
+
speech: "speech",
|
|
8931
|
+
vision: "vision"
|
|
8008
8932
|
};
|
|
8009
8933
|
var LOG_LEVELS = ["silly", "trace", "debug", "info", "warn", "error", "fatal"];
|
|
8010
8934
|
var DEFAULT_MODEL = "openai:gpt-5-nano";
|
|
@@ -8026,6 +8950,9 @@ var OPTION_FLAGS = {
|
|
|
8026
8950
|
dockerRo: "--docker-ro",
|
|
8027
8951
|
noDocker: "--no-docker",
|
|
8028
8952
|
dockerDev: "--docker-dev",
|
|
8953
|
+
// Multimodal input options
|
|
8954
|
+
inputImage: "--image <path>",
|
|
8955
|
+
inputAudio: "--audio <path>",
|
|
8029
8956
|
// Image generation options
|
|
8030
8957
|
imageSize: "--size <size>",
|
|
8031
8958
|
imageQuality: "--quality <quality>",
|
|
@@ -8051,6 +8978,9 @@ var OPTION_DESCRIPTIONS = {
|
|
|
8051
8978
|
noBuiltins: "Disable built-in gadgets (AskUser, TellUser).",
|
|
8052
8979
|
noBuiltinInteraction: "Disable interactive gadgets (AskUser) while keeping TellUser.",
|
|
8053
8980
|
quiet: "Suppress all output except content (text and TellUser messages).",
|
|
8981
|
+
// Multimodal input descriptions
|
|
8982
|
+
inputImage: "Image file to include with the prompt (vision models).",
|
|
8983
|
+
inputAudio: "Audio file to include with the prompt (Gemini only).",
|
|
8054
8984
|
docker: "Run agent in a Docker sandbox container for security isolation.",
|
|
8055
8985
|
dockerRo: "Run in Docker with current directory mounted read-only.",
|
|
8056
8986
|
noDocker: "Disable Docker sandboxing (override config).",
|
|
@@ -8074,7 +9004,7 @@ var import_commander2 = require("commander");
|
|
|
8074
9004
|
// package.json
|
|
8075
9005
|
var package_default = {
|
|
8076
9006
|
name: "llmist",
|
|
8077
|
-
version: "2.
|
|
9007
|
+
version: "2.5.0",
|
|
8078
9008
|
description: "TypeScript LLM client with streaming tool execution. Tools fire mid-stream. Built-in function calling works with any model\u2014no structured outputs or native tool support required.",
|
|
8079
9009
|
type: "module",
|
|
8080
9010
|
main: "dist/index.cjs",
|
|
@@ -8196,7 +9126,7 @@ var package_default = {
|
|
|
8196
9126
|
};
|
|
8197
9127
|
|
|
8198
9128
|
// src/cli/agent-command.ts
|
|
8199
|
-
var
|
|
9129
|
+
var import_promises4 = require("readline/promises");
|
|
8200
9130
|
var import_chalk5 = __toESM(require("chalk"), 1);
|
|
8201
9131
|
init_builder();
|
|
8202
9132
|
|
|
@@ -8214,6 +9144,7 @@ function isAbortError(error) {
|
|
|
8214
9144
|
}
|
|
8215
9145
|
|
|
8216
9146
|
// src/cli/agent-command.ts
|
|
9147
|
+
init_input_content();
|
|
8217
9148
|
init_registry();
|
|
8218
9149
|
init_constants2();
|
|
8219
9150
|
|
|
@@ -8538,15 +9469,84 @@ var finish = createGadget({
|
|
|
8538
9469
|
});
|
|
8539
9470
|
var builtinGadgets = [askUser, tellUser, finish];
|
|
8540
9471
|
|
|
9472
|
+
// src/cli/file-utils.ts
|
|
9473
|
+
var import_promises2 = require("fs/promises");
|
|
9474
|
+
var import_node_path3 = require("path");
|
|
9475
|
+
init_input_content();
|
|
9476
|
+
var DEFAULT_MAX_FILE_SIZE = 50 * 1024 * 1024;
|
|
9477
|
+
function formatFileSize(bytes) {
|
|
9478
|
+
if (bytes < 1024) return `${bytes} bytes`;
|
|
9479
|
+
if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`;
|
|
9480
|
+
if (bytes < 1024 * 1024 * 1024) return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
|
|
9481
|
+
return `${(bytes / (1024 * 1024 * 1024)).toFixed(1)} GB`;
|
|
9482
|
+
}
|
|
9483
|
+
async function checkFileSize(absolutePath, filePath, maxSize) {
|
|
9484
|
+
const stats = await (0, import_promises2.stat)(absolutePath);
|
|
9485
|
+
if (stats.size > maxSize) {
|
|
9486
|
+
throw new Error(
|
|
9487
|
+
`File "${filePath}" is too large (${formatFileSize(stats.size)}). Maximum allowed size is ${formatFileSize(maxSize)}. Consider compressing the file or using a smaller version.`
|
|
9488
|
+
);
|
|
9489
|
+
}
|
|
9490
|
+
}
|
|
9491
|
+
async function readImageFile(filePath, options = {}) {
|
|
9492
|
+
const absolutePath = (0, import_node_path3.resolve)(filePath);
|
|
9493
|
+
const maxFileSize = options.maxFileSize ?? DEFAULT_MAX_FILE_SIZE;
|
|
9494
|
+
let buffer;
|
|
9495
|
+
try {
|
|
9496
|
+
await checkFileSize(absolutePath, filePath, maxFileSize);
|
|
9497
|
+
buffer = await (0, import_promises2.readFile)(absolutePath);
|
|
9498
|
+
} catch (error) {
|
|
9499
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
9500
|
+
throw new Error(`Failed to read image file "${filePath}": ${message}`);
|
|
9501
|
+
}
|
|
9502
|
+
const mimeType = detectImageMimeType(buffer);
|
|
9503
|
+
if (!mimeType) {
|
|
9504
|
+
throw new Error(
|
|
9505
|
+
`File "${filePath}" is not a supported image format. Supported formats: JPEG, PNG, GIF, WebP`
|
|
9506
|
+
);
|
|
9507
|
+
}
|
|
9508
|
+
return imageFromBuffer(buffer, mimeType);
|
|
9509
|
+
}
|
|
9510
|
+
async function readAudioFile(filePath, options = {}) {
|
|
9511
|
+
const absolutePath = (0, import_node_path3.resolve)(filePath);
|
|
9512
|
+
const maxFileSize = options.maxFileSize ?? DEFAULT_MAX_FILE_SIZE;
|
|
9513
|
+
let buffer;
|
|
9514
|
+
try {
|
|
9515
|
+
await checkFileSize(absolutePath, filePath, maxFileSize);
|
|
9516
|
+
buffer = await (0, import_promises2.readFile)(absolutePath);
|
|
9517
|
+
} catch (error) {
|
|
9518
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
9519
|
+
throw new Error(`Failed to read audio file "${filePath}": ${message}`);
|
|
9520
|
+
}
|
|
9521
|
+
const mimeType = detectAudioMimeType(buffer);
|
|
9522
|
+
if (!mimeType) {
|
|
9523
|
+
throw new Error(
|
|
9524
|
+
`File "${filePath}" is not a supported audio format. Supported formats: MP3, WAV, OGG, WebM`
|
|
9525
|
+
);
|
|
9526
|
+
}
|
|
9527
|
+
return audioFromBuffer(buffer, mimeType);
|
|
9528
|
+
}
|
|
9529
|
+
async function readFileBuffer(filePath, options = {}) {
|
|
9530
|
+
const absolutePath = (0, import_node_path3.resolve)(filePath);
|
|
9531
|
+
const maxFileSize = options.maxFileSize ?? DEFAULT_MAX_FILE_SIZE;
|
|
9532
|
+
try {
|
|
9533
|
+
await checkFileSize(absolutePath, filePath, maxFileSize);
|
|
9534
|
+
return await (0, import_promises2.readFile)(absolutePath);
|
|
9535
|
+
} catch (error) {
|
|
9536
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
9537
|
+
throw new Error(`Failed to read file "${filePath}": ${message}`);
|
|
9538
|
+
}
|
|
9539
|
+
}
|
|
9540
|
+
|
|
8541
9541
|
// src/cli/gadgets.ts
|
|
8542
9542
|
var import_node_fs7 = __toESM(require("fs"), 1);
|
|
8543
|
-
var
|
|
9543
|
+
var import_node_path7 = __toESM(require("path"), 1);
|
|
8544
9544
|
var import_node_url = require("url");
|
|
8545
9545
|
init_gadget();
|
|
8546
9546
|
|
|
8547
9547
|
// src/cli/builtins/filesystem/list-directory.ts
|
|
8548
9548
|
var import_node_fs4 = __toESM(require("fs"), 1);
|
|
8549
|
-
var
|
|
9549
|
+
var import_node_path5 = __toESM(require("path"), 1);
|
|
8550
9550
|
var import_zod4 = require("zod");
|
|
8551
9551
|
|
|
8552
9552
|
// src/index.ts
|
|
@@ -8570,6 +9570,7 @@ init_prompt_config();
|
|
|
8570
9570
|
|
|
8571
9571
|
// src/index.ts
|
|
8572
9572
|
init_client();
|
|
9573
|
+
init_input_content();
|
|
8573
9574
|
init_messages();
|
|
8574
9575
|
init_model_registry();
|
|
8575
9576
|
init_model_shortcuts();
|
|
@@ -8600,6 +9601,10 @@ init_logger();
|
|
|
8600
9601
|
// src/testing/mock-stream.ts
|
|
8601
9602
|
init_constants();
|
|
8602
9603
|
|
|
9604
|
+
// src/testing/mock-builder.ts
|
|
9605
|
+
init_input_content();
|
|
9606
|
+
init_messages();
|
|
9607
|
+
|
|
8603
9608
|
// src/testing/mock-client.ts
|
|
8604
9609
|
init_client();
|
|
8605
9610
|
|
|
@@ -8611,7 +9616,7 @@ var import_node_stream = require("stream");
|
|
|
8611
9616
|
|
|
8612
9617
|
// src/cli/builtins/filesystem/utils.ts
|
|
8613
9618
|
var import_node_fs3 = __toESM(require("fs"), 1);
|
|
8614
|
-
var
|
|
9619
|
+
var import_node_path4 = __toESM(require("path"), 1);
|
|
8615
9620
|
var PathSandboxException = class extends Error {
|
|
8616
9621
|
constructor(inputPath, reason) {
|
|
8617
9622
|
super(`Path access denied: ${inputPath}. ${reason}`);
|
|
@@ -8620,7 +9625,7 @@ var PathSandboxException = class extends Error {
|
|
|
8620
9625
|
};
|
|
8621
9626
|
function validatePathIsWithinCwd(inputPath) {
|
|
8622
9627
|
const cwd = process.cwd();
|
|
8623
|
-
const resolvedPath =
|
|
9628
|
+
const resolvedPath = import_node_path4.default.resolve(cwd, inputPath);
|
|
8624
9629
|
let finalPath;
|
|
8625
9630
|
try {
|
|
8626
9631
|
finalPath = import_node_fs3.default.realpathSync(resolvedPath);
|
|
@@ -8632,7 +9637,7 @@ function validatePathIsWithinCwd(inputPath) {
|
|
|
8632
9637
|
throw error;
|
|
8633
9638
|
}
|
|
8634
9639
|
}
|
|
8635
|
-
const cwdWithSep = cwd +
|
|
9640
|
+
const cwdWithSep = cwd + import_node_path4.default.sep;
|
|
8636
9641
|
if (!finalPath.startsWith(cwdWithSep) && finalPath !== cwd) {
|
|
8637
9642
|
throw new PathSandboxException(inputPath, "Path is outside the current working directory");
|
|
8638
9643
|
}
|
|
@@ -8645,8 +9650,8 @@ function listFiles(dirPath, basePath = dirPath, maxDepth = 1, currentDepth = 1)
|
|
|
8645
9650
|
try {
|
|
8646
9651
|
const items = import_node_fs4.default.readdirSync(dirPath);
|
|
8647
9652
|
for (const item of items) {
|
|
8648
|
-
const fullPath =
|
|
8649
|
-
const relativePath =
|
|
9653
|
+
const fullPath = import_node_path5.default.join(dirPath, item);
|
|
9654
|
+
const relativePath = import_node_path5.default.relative(basePath, fullPath);
|
|
8650
9655
|
try {
|
|
8651
9656
|
const stats = import_node_fs4.default.lstatSync(fullPath);
|
|
8652
9657
|
let type;
|
|
@@ -8761,7 +9766,7 @@ ${formattedList}`;
|
|
|
8761
9766
|
// src/cli/builtins/filesystem/read-file.ts
|
|
8762
9767
|
var import_node_fs5 = __toESM(require("fs"), 1);
|
|
8763
9768
|
var import_zod5 = require("zod");
|
|
8764
|
-
var
|
|
9769
|
+
var readFile2 = createGadget({
|
|
8765
9770
|
name: "ReadFile",
|
|
8766
9771
|
description: "Read the entire content of a file and return it as text. The file path must be within the current working directory or its subdirectories.",
|
|
8767
9772
|
schema: import_zod5.z.object({
|
|
@@ -8790,7 +9795,7 @@ ${content}`;
|
|
|
8790
9795
|
|
|
8791
9796
|
// src/cli/builtins/filesystem/write-file.ts
|
|
8792
9797
|
var import_node_fs6 = __toESM(require("fs"), 1);
|
|
8793
|
-
var
|
|
9798
|
+
var import_node_path6 = __toESM(require("path"), 1);
|
|
8794
9799
|
var import_zod6 = require("zod");
|
|
8795
9800
|
var writeFile = createGadget({
|
|
8796
9801
|
name: "WriteFile",
|
|
@@ -8825,7 +9830,7 @@ console.log(\`Server running on http://localhost:\${port}\`);`
|
|
|
8825
9830
|
],
|
|
8826
9831
|
execute: ({ filePath, content }) => {
|
|
8827
9832
|
const validatedPath = validatePathIsWithinCwd(filePath);
|
|
8828
|
-
const parentDir =
|
|
9833
|
+
const parentDir = import_node_path6.default.dirname(validatedPath);
|
|
8829
9834
|
let createdDir = false;
|
|
8830
9835
|
if (!import_node_fs6.default.existsSync(parentDir)) {
|
|
8831
9836
|
validatePathIsWithinCwd(parentDir);
|
|
@@ -8834,7 +9839,7 @@ console.log(\`Server running on http://localhost:\${port}\`);`
|
|
|
8834
9839
|
}
|
|
8835
9840
|
import_node_fs6.default.writeFileSync(validatedPath, content, "utf-8");
|
|
8836
9841
|
const bytesWritten = Buffer.byteLength(content, "utf-8");
|
|
8837
|
-
const dirNote = createdDir ? ` (created directory: ${
|
|
9842
|
+
const dirNote = createdDir ? ` (created directory: ${import_node_path6.default.dirname(filePath)})` : "";
|
|
8838
9843
|
return `path=${filePath}
|
|
8839
9844
|
|
|
8840
9845
|
Wrote ${bytesWritten} bytes${dirNote}`;
|
|
@@ -9032,7 +10037,7 @@ error: ${message}`;
|
|
|
9032
10037
|
// src/cli/builtins/index.ts
|
|
9033
10038
|
var builtinGadgetRegistry = {
|
|
9034
10039
|
ListDirectory: listDirectory,
|
|
9035
|
-
ReadFile:
|
|
10040
|
+
ReadFile: readFile2,
|
|
9036
10041
|
WriteFile: writeFile,
|
|
9037
10042
|
EditFile: editFile,
|
|
9038
10043
|
RunCommand: runCommand
|
|
@@ -9069,10 +10074,10 @@ function expandHomePath(input) {
|
|
|
9069
10074
|
if (!home) {
|
|
9070
10075
|
return input;
|
|
9071
10076
|
}
|
|
9072
|
-
return
|
|
10077
|
+
return import_node_path7.default.join(home, input.slice(1));
|
|
9073
10078
|
}
|
|
9074
10079
|
function isFileLikeSpecifier(specifier) {
|
|
9075
|
-
return PATH_PREFIXES.some((prefix) => specifier.startsWith(prefix)) || specifier.includes(
|
|
10080
|
+
return PATH_PREFIXES.some((prefix) => specifier.startsWith(prefix)) || specifier.includes(import_node_path7.default.sep);
|
|
9076
10081
|
}
|
|
9077
10082
|
function tryResolveBuiltin(specifier) {
|
|
9078
10083
|
if (specifier.startsWith(BUILTIN_PREFIX)) {
|
|
@@ -9095,7 +10100,7 @@ function resolveGadgetSpecifier(specifier, cwd) {
|
|
|
9095
10100
|
return specifier;
|
|
9096
10101
|
}
|
|
9097
10102
|
const expanded = expandHomePath(specifier);
|
|
9098
|
-
const resolvedPath =
|
|
10103
|
+
const resolvedPath = import_node_path7.default.resolve(cwd, expanded);
|
|
9099
10104
|
if (!import_node_fs7.default.existsSync(resolvedPath)) {
|
|
9100
10105
|
throw new Error(`Gadget module not found at ${resolvedPath}`);
|
|
9101
10106
|
}
|
|
@@ -9167,13 +10172,14 @@ async function loadGadgets(specifiers, cwd, importer = (specifier) => import(spe
|
|
|
9167
10172
|
}
|
|
9168
10173
|
|
|
9169
10174
|
// src/cli/llm-logging.ts
|
|
9170
|
-
var
|
|
10175
|
+
var import_promises3 = require("fs/promises");
|
|
9171
10176
|
var import_node_os = require("os");
|
|
9172
|
-
var
|
|
9173
|
-
|
|
10177
|
+
var import_node_path8 = require("path");
|
|
10178
|
+
init_messages();
|
|
10179
|
+
var DEFAULT_LLM_LOG_DIR = (0, import_node_path8.join)((0, import_node_os.homedir)(), ".llmist", "logs");
|
|
9174
10180
|
function resolveLogDir(option, subdir) {
|
|
9175
10181
|
if (option === true) {
|
|
9176
|
-
return (0,
|
|
10182
|
+
return (0, import_node_path8.join)(DEFAULT_LLM_LOG_DIR, subdir);
|
|
9177
10183
|
}
|
|
9178
10184
|
if (typeof option === "string") {
|
|
9179
10185
|
return option;
|
|
@@ -9184,14 +10190,14 @@ function formatLlmRequest(messages) {
|
|
|
9184
10190
|
const lines = [];
|
|
9185
10191
|
for (const msg of messages) {
|
|
9186
10192
|
lines.push(`=== ${msg.role.toUpperCase()} ===`);
|
|
9187
|
-
lines.push(msg.content
|
|
10193
|
+
lines.push(msg.content ? extractText(msg.content) : "");
|
|
9188
10194
|
lines.push("");
|
|
9189
10195
|
}
|
|
9190
10196
|
return lines.join("\n");
|
|
9191
10197
|
}
|
|
9192
10198
|
async function writeLogFile(dir, filename, content) {
|
|
9193
|
-
await (0,
|
|
9194
|
-
await (0,
|
|
10199
|
+
await (0, import_promises3.mkdir)(dir, { recursive: true });
|
|
10200
|
+
await (0, import_promises3.writeFile)((0, import_node_path8.join)(dir, filename), content, "utf-8");
|
|
9195
10201
|
}
|
|
9196
10202
|
function formatSessionTimestamp(date = /* @__PURE__ */ new Date()) {
|
|
9197
10203
|
const pad = (n) => n.toString().padStart(2, "0");
|
|
@@ -9205,9 +10211,9 @@ function formatSessionTimestamp(date = /* @__PURE__ */ new Date()) {
|
|
|
9205
10211
|
}
|
|
9206
10212
|
async function createSessionDir(baseDir) {
|
|
9207
10213
|
const timestamp = formatSessionTimestamp();
|
|
9208
|
-
const sessionDir = (0,
|
|
10214
|
+
const sessionDir = (0, import_node_path8.join)(baseDir, timestamp);
|
|
9209
10215
|
try {
|
|
9210
|
-
await (0,
|
|
10216
|
+
await (0, import_promises3.mkdir)(sessionDir, { recursive: true });
|
|
9211
10217
|
return sessionDir;
|
|
9212
10218
|
} catch (error) {
|
|
9213
10219
|
console.warn(`[llmist] Failed to create log session directory: ${sessionDir}`, error);
|
|
@@ -9258,9 +10264,9 @@ function ensureMarkedConfigured() {
|
|
|
9258
10264
|
markedConfigured = true;
|
|
9259
10265
|
}
|
|
9260
10266
|
}
|
|
9261
|
-
function renderMarkdown(
|
|
10267
|
+
function renderMarkdown(text3) {
|
|
9262
10268
|
ensureMarkedConfigured();
|
|
9263
|
-
let rendered = import_marked.marked.parse(
|
|
10269
|
+
let rendered = import_marked.marked.parse(text3);
|
|
9264
10270
|
rendered = rendered.replace(/\*\*(.+?)\*\*/g, (_, content) => import_chalk3.default.bold(content)).replace(/(?<!\*)\*(\S[^*]*)\*(?!\*)/g, (_, content) => import_chalk3.default.italic(content));
|
|
9265
10271
|
return rendered.trimEnd();
|
|
9266
10272
|
}
|
|
@@ -9274,8 +10280,8 @@ function createRainbowSeparator() {
|
|
|
9274
10280
|
}
|
|
9275
10281
|
return result;
|
|
9276
10282
|
}
|
|
9277
|
-
function renderMarkdownWithSeparators(
|
|
9278
|
-
const rendered = renderMarkdown(
|
|
10283
|
+
function renderMarkdownWithSeparators(text3) {
|
|
10284
|
+
const rendered = renderMarkdown(text3);
|
|
9279
10285
|
const separator = createRainbowSeparator();
|
|
9280
10286
|
return `
|
|
9281
10287
|
${separator}
|
|
@@ -9443,12 +10449,12 @@ var StreamPrinter = class {
|
|
|
9443
10449
|
*
|
|
9444
10450
|
* @param text - Text to write
|
|
9445
10451
|
*/
|
|
9446
|
-
write(
|
|
9447
|
-
if (!
|
|
10452
|
+
write(text3) {
|
|
10453
|
+
if (!text3) {
|
|
9448
10454
|
return;
|
|
9449
10455
|
}
|
|
9450
|
-
this.target.write(
|
|
9451
|
-
this.endedWithNewline =
|
|
10456
|
+
this.target.write(text3);
|
|
10457
|
+
this.endedWithNewline = text3.endsWith("\n");
|
|
9452
10458
|
}
|
|
9453
10459
|
/**
|
|
9454
10460
|
* Ensures output ends with a newline by writing one if needed.
|
|
@@ -9927,7 +10933,7 @@ function addCompleteOptions(cmd, defaults) {
|
|
|
9927
10933
|
OPTION_DESCRIPTIONS.maxTokens,
|
|
9928
10934
|
createNumericParser({ label: "Max tokens", integer: true, min: 1 }),
|
|
9929
10935
|
defaults?.["max-tokens"]
|
|
9930
|
-
).option(OPTION_FLAGS.quiet, OPTION_DESCRIPTIONS.quiet, defaults?.quiet).option(OPTION_FLAGS.logLlmRequests, OPTION_DESCRIPTIONS.logLlmRequests, defaults?.["log-llm-requests"]);
|
|
10936
|
+
).option(OPTION_FLAGS.quiet, OPTION_DESCRIPTIONS.quiet, defaults?.quiet).option(OPTION_FLAGS.logLlmRequests, OPTION_DESCRIPTIONS.logLlmRequests, defaults?.["log-llm-requests"]).option(OPTION_FLAGS.inputImage, OPTION_DESCRIPTIONS.inputImage).option(OPTION_FLAGS.inputAudio, OPTION_DESCRIPTIONS.inputAudio);
|
|
9931
10937
|
}
|
|
9932
10938
|
function addAgentOptions(cmd, defaults) {
|
|
9933
10939
|
const gadgetAccumulator = (value, previous = []) => [
|
|
@@ -9951,7 +10957,7 @@ function addAgentOptions(cmd, defaults) {
|
|
|
9951
10957
|
OPTION_FLAGS.noBuiltinInteraction,
|
|
9952
10958
|
OPTION_DESCRIPTIONS.noBuiltinInteraction,
|
|
9953
10959
|
defaults?.["builtin-interaction"] !== false
|
|
9954
|
-
).option(OPTION_FLAGS.quiet, OPTION_DESCRIPTIONS.quiet, defaults?.quiet).option(OPTION_FLAGS.logLlmRequests, OPTION_DESCRIPTIONS.logLlmRequests, defaults?.["log-llm-requests"]).option(OPTION_FLAGS.docker, OPTION_DESCRIPTIONS.docker).option(OPTION_FLAGS.dockerRo, OPTION_DESCRIPTIONS.dockerRo).option(OPTION_FLAGS.noDocker, OPTION_DESCRIPTIONS.noDocker).option(OPTION_FLAGS.dockerDev, OPTION_DESCRIPTIONS.dockerDev);
|
|
10960
|
+
).option(OPTION_FLAGS.quiet, OPTION_DESCRIPTIONS.quiet, defaults?.quiet).option(OPTION_FLAGS.logLlmRequests, OPTION_DESCRIPTIONS.logLlmRequests, defaults?.["log-llm-requests"]).option(OPTION_FLAGS.inputImage, OPTION_DESCRIPTIONS.inputImage).option(OPTION_FLAGS.inputAudio, OPTION_DESCRIPTIONS.inputAudio).option(OPTION_FLAGS.docker, OPTION_DESCRIPTIONS.docker).option(OPTION_FLAGS.dockerRo, OPTION_DESCRIPTIONS.dockerRo).option(OPTION_FLAGS.noDocker, OPTION_DESCRIPTIONS.noDocker).option(OPTION_FLAGS.dockerDev, OPTION_DESCRIPTIONS.dockerDev);
|
|
9955
10961
|
}
|
|
9956
10962
|
function configToCompleteOptions(config) {
|
|
9957
10963
|
const result = {};
|
|
@@ -10018,7 +11024,7 @@ var DEV_SOURCE_MOUNT_TARGET = "/llmist-src";
|
|
|
10018
11024
|
// src/cli/config.ts
|
|
10019
11025
|
var import_node_fs8 = require("fs");
|
|
10020
11026
|
var import_node_os2 = require("os");
|
|
10021
|
-
var
|
|
11027
|
+
var import_node_path9 = require("path");
|
|
10022
11028
|
var import_js_toml = require("js-toml");
|
|
10023
11029
|
|
|
10024
11030
|
// src/cli/templates.ts
|
|
@@ -10179,7 +11185,7 @@ var CUSTOM_CONFIG_KEYS = /* @__PURE__ */ new Set([
|
|
|
10179
11185
|
"description"
|
|
10180
11186
|
]);
|
|
10181
11187
|
function getConfigPath() {
|
|
10182
|
-
return (0,
|
|
11188
|
+
return (0, import_node_path9.join)((0, import_node_os2.homedir)(), ".llmist", "cli.toml");
|
|
10183
11189
|
}
|
|
10184
11190
|
var ConfigError = class extends Error {
|
|
10185
11191
|
constructor(message, path5) {
|
|
@@ -11026,8 +12032,8 @@ function computeDockerfileHash(dockerfile) {
|
|
|
11026
12032
|
// src/cli/docker/image-manager.ts
|
|
11027
12033
|
var import_node_fs9 = require("fs");
|
|
11028
12034
|
var import_node_os3 = require("os");
|
|
11029
|
-
var
|
|
11030
|
-
var CACHE_DIR = (0,
|
|
12035
|
+
var import_node_path10 = require("path");
|
|
12036
|
+
var CACHE_DIR = (0, import_node_path10.join)((0, import_node_os3.homedir)(), ".llmist", "docker-cache");
|
|
11031
12037
|
var HASH_FILE = "image-hash.json";
|
|
11032
12038
|
function ensureCacheDir() {
|
|
11033
12039
|
if (!(0, import_node_fs9.existsSync)(CACHE_DIR)) {
|
|
@@ -11035,7 +12041,7 @@ function ensureCacheDir() {
|
|
|
11035
12041
|
}
|
|
11036
12042
|
}
|
|
11037
12043
|
function getCachedHash(imageName) {
|
|
11038
|
-
const hashPath = (0,
|
|
12044
|
+
const hashPath = (0, import_node_path10.join)(CACHE_DIR, HASH_FILE);
|
|
11039
12045
|
if (!(0, import_node_fs9.existsSync)(hashPath)) {
|
|
11040
12046
|
return void 0;
|
|
11041
12047
|
}
|
|
@@ -11049,7 +12055,7 @@ function getCachedHash(imageName) {
|
|
|
11049
12055
|
}
|
|
11050
12056
|
function setCachedHash(imageName, hash) {
|
|
11051
12057
|
ensureCacheDir();
|
|
11052
|
-
const hashPath = (0,
|
|
12058
|
+
const hashPath = (0, import_node_path10.join)(CACHE_DIR, HASH_FILE);
|
|
11053
12059
|
let cache = {};
|
|
11054
12060
|
if ((0, import_node_fs9.existsSync)(hashPath)) {
|
|
11055
12061
|
try {
|
|
@@ -11075,7 +12081,7 @@ var DockerBuildError = class extends Error {
|
|
|
11075
12081
|
};
|
|
11076
12082
|
async function buildImage(imageName, dockerfile) {
|
|
11077
12083
|
ensureCacheDir();
|
|
11078
|
-
const dockerfilePath = (0,
|
|
12084
|
+
const dockerfilePath = (0, import_node_path10.join)(CACHE_DIR, "Dockerfile");
|
|
11079
12085
|
(0, import_node_fs9.writeFileSync)(dockerfilePath, dockerfile);
|
|
11080
12086
|
const proc = Bun.spawn(
|
|
11081
12087
|
["docker", "build", "-t", imageName, "-f", dockerfilePath, CACHE_DIR],
|
|
@@ -11110,7 +12116,7 @@ async function ensureImage(imageName = DEFAULT_IMAGE_NAME, dockerfile) {
|
|
|
11110
12116
|
|
|
11111
12117
|
// src/cli/docker/docker-wrapper.ts
|
|
11112
12118
|
var import_node_fs10 = require("fs");
|
|
11113
|
-
var
|
|
12119
|
+
var import_node_path11 = require("path");
|
|
11114
12120
|
var import_node_os4 = require("os");
|
|
11115
12121
|
var DockerUnavailableError = class extends Error {
|
|
11116
12122
|
constructor() {
|
|
@@ -11156,9 +12162,9 @@ function autoDetectDevSource() {
|
|
|
11156
12162
|
if (!scriptPath || !scriptPath.endsWith("src/cli.ts")) {
|
|
11157
12163
|
return void 0;
|
|
11158
12164
|
}
|
|
11159
|
-
const srcDir = (0,
|
|
11160
|
-
const projectDir = (0,
|
|
11161
|
-
const packageJsonPath = (0,
|
|
12165
|
+
const srcDir = (0, import_node_path11.dirname)(scriptPath);
|
|
12166
|
+
const projectDir = (0, import_node_path11.dirname)(srcDir);
|
|
12167
|
+
const packageJsonPath = (0, import_node_path11.join)(projectDir, "package.json");
|
|
11162
12168
|
if (!(0, import_node_fs10.existsSync)(packageJsonPath)) {
|
|
11163
12169
|
return void 0;
|
|
11164
12170
|
}
|
|
@@ -11307,7 +12313,7 @@ function createHumanInputHandler(env, progress, keyboard) {
|
|
|
11307
12313
|
keyboard.cleanupEsc();
|
|
11308
12314
|
keyboard.cleanupEsc = null;
|
|
11309
12315
|
}
|
|
11310
|
-
const rl = (0,
|
|
12316
|
+
const rl = (0, import_promises4.createInterface)({ input: env.stdin, output: env.stdout });
|
|
11311
12317
|
try {
|
|
11312
12318
|
const questionLine = question.trim() ? `
|
|
11313
12319
|
${renderMarkdownWithSeparators(question.trim())}` : "";
|
|
@@ -11665,8 +12671,8 @@ Denied: ${result.reason ?? "by user"}`
|
|
|
11665
12671
|
builder.withTextOnlyHandler("acknowledge");
|
|
11666
12672
|
builder.withTextWithGadgetsHandler({
|
|
11667
12673
|
gadgetName: "TellUser",
|
|
11668
|
-
parameterMapping: (
|
|
11669
|
-
resultMapping: (
|
|
12674
|
+
parameterMapping: (text3) => ({ message: text3, done: false, type: "info" }),
|
|
12675
|
+
resultMapping: (text3) => `\u2139\uFE0F ${text3}`
|
|
11670
12676
|
});
|
|
11671
12677
|
builder.withTrailingMessage(
|
|
11672
12678
|
(ctx) => [
|
|
@@ -11675,7 +12681,19 @@ Denied: ${result.reason ?? "by user"}`
|
|
|
11675
12681
|
"Maximize efficiency by batching independent operations in a single response."
|
|
11676
12682
|
].join(" ")
|
|
11677
12683
|
);
|
|
11678
|
-
|
|
12684
|
+
let agent;
|
|
12685
|
+
if (options.image || options.audio) {
|
|
12686
|
+
const parts = [text(prompt)];
|
|
12687
|
+
if (options.image) {
|
|
12688
|
+
parts.push(await readImageFile(options.image));
|
|
12689
|
+
}
|
|
12690
|
+
if (options.audio) {
|
|
12691
|
+
parts.push(await readAudioFile(options.audio));
|
|
12692
|
+
}
|
|
12693
|
+
agent = builder.askWithContent(parts);
|
|
12694
|
+
} else {
|
|
12695
|
+
agent = builder.ask(prompt);
|
|
12696
|
+
}
|
|
11679
12697
|
let textBuffer = "";
|
|
11680
12698
|
const flushTextBuffer = () => {
|
|
11681
12699
|
if (textBuffer) {
|
|
@@ -11750,6 +12768,7 @@ function registerAgentCommand(program, env, config) {
|
|
|
11750
12768
|
}
|
|
11751
12769
|
|
|
11752
12770
|
// src/cli/complete-command.ts
|
|
12771
|
+
init_input_content();
|
|
11753
12772
|
init_messages();
|
|
11754
12773
|
init_model_shortcuts();
|
|
11755
12774
|
init_constants2();
|
|
@@ -11761,7 +12780,18 @@ async function executeComplete(promptArg, options, env) {
|
|
|
11761
12780
|
if (options.system) {
|
|
11762
12781
|
builder.addSystem(options.system);
|
|
11763
12782
|
}
|
|
11764
|
-
|
|
12783
|
+
if (options.image || options.audio) {
|
|
12784
|
+
const parts = [text(prompt)];
|
|
12785
|
+
if (options.image) {
|
|
12786
|
+
parts.push(await readImageFile(options.image));
|
|
12787
|
+
}
|
|
12788
|
+
if (options.audio) {
|
|
12789
|
+
parts.push(await readAudioFile(options.audio));
|
|
12790
|
+
}
|
|
12791
|
+
builder.addUserMultimodal(parts);
|
|
12792
|
+
} else {
|
|
12793
|
+
builder.addUser(prompt);
|
|
12794
|
+
}
|
|
11765
12795
|
const messages = builder.build();
|
|
11766
12796
|
const llmLogsBaseDir = resolveLogDir(options.logLlmRequests, "requests");
|
|
11767
12797
|
let llmSessionDir;
|
|
@@ -11836,7 +12866,7 @@ init_schema_to_json();
|
|
|
11836
12866
|
init_schema_validator();
|
|
11837
12867
|
|
|
11838
12868
|
// src/cli/gadget-prompts.ts
|
|
11839
|
-
var
|
|
12869
|
+
var import_promises5 = require("readline/promises");
|
|
11840
12870
|
var import_chalk6 = __toESM(require("chalk"), 1);
|
|
11841
12871
|
init_schema_to_json();
|
|
11842
12872
|
async function promptForParameters(schema, ctx) {
|
|
@@ -11847,7 +12877,7 @@ async function promptForParameters(schema, ctx) {
|
|
|
11847
12877
|
if (!jsonSchema.properties || Object.keys(jsonSchema.properties).length === 0) {
|
|
11848
12878
|
return {};
|
|
11849
12879
|
}
|
|
11850
|
-
const rl = (0,
|
|
12880
|
+
const rl = (0, import_promises5.createInterface)({ input: ctx.stdin, output: ctx.stdout });
|
|
11851
12881
|
const params = {};
|
|
11852
12882
|
try {
|
|
11853
12883
|
for (const [key, prop] of Object.entries(jsonSchema.properties)) {
|
|
@@ -12709,7 +13739,7 @@ var import_node_fs12 = require("fs");
|
|
|
12709
13739
|
var DEFAULT_SPEECH_MODEL = "tts-1";
|
|
12710
13740
|
var DEFAULT_VOICE = "nova";
|
|
12711
13741
|
async function executeSpeech(textArg, options, env) {
|
|
12712
|
-
const
|
|
13742
|
+
const text3 = await resolvePrompt(textArg, env);
|
|
12713
13743
|
const client = env.createClient();
|
|
12714
13744
|
const model = options.model;
|
|
12715
13745
|
const voice = options.voice ?? DEFAULT_VOICE;
|
|
@@ -12721,7 +13751,7 @@ async function executeSpeech(textArg, options, env) {
|
|
|
12721
13751
|
}
|
|
12722
13752
|
const result = await client.speech.generate({
|
|
12723
13753
|
model,
|
|
12724
|
-
input:
|
|
13754
|
+
input: text3,
|
|
12725
13755
|
voice,
|
|
12726
13756
|
responseFormat: options.format,
|
|
12727
13757
|
speed
|
|
@@ -12754,7 +13784,43 @@ function registerSpeechCommand(program, env, config) {
|
|
|
12754
13784
|
OPTION_DESCRIPTIONS.model,
|
|
12755
13785
|
config?.model ?? DEFAULT_SPEECH_MODEL
|
|
12756
13786
|
).option(OPTION_FLAGS.voice, OPTION_DESCRIPTIONS.voice, config?.voice ?? DEFAULT_VOICE).option(OPTION_FLAGS.speechFormat, OPTION_DESCRIPTIONS.speechFormat, config?.format).option(OPTION_FLAGS.speechSpeed, OPTION_DESCRIPTIONS.speechSpeed, config?.speed?.toString()).option(OPTION_FLAGS.speechOutput, OPTION_DESCRIPTIONS.speechOutput, config?.output).option(OPTION_FLAGS.quiet, OPTION_DESCRIPTIONS.quiet, config?.quiet ?? false).action(
|
|
12757
|
-
(
|
|
13787
|
+
(text3, options) => executeAction(() => executeSpeech(text3, options, env), env)
|
|
13788
|
+
);
|
|
13789
|
+
}
|
|
13790
|
+
|
|
13791
|
+
// src/cli/vision-command.ts
|
|
13792
|
+
init_model_shortcuts();
|
|
13793
|
+
async function executeVision(imagePath, options, env) {
|
|
13794
|
+
const client = env.createClient();
|
|
13795
|
+
const model = resolveModel(options.model);
|
|
13796
|
+
const imageBuffer = await readFileBuffer(imagePath);
|
|
13797
|
+
const prompt = options.prompt ?? "Describe this image in detail.";
|
|
13798
|
+
const stderrTTY = env.stderr.isTTY === true;
|
|
13799
|
+
if (!options.quiet && stderrTTY) {
|
|
13800
|
+
env.stderr.write(`${SUMMARY_PREFIX} Analyzing image with ${model}...
|
|
13801
|
+
`);
|
|
13802
|
+
}
|
|
13803
|
+
const result = await client.vision.analyze({
|
|
13804
|
+
model,
|
|
13805
|
+
image: imageBuffer,
|
|
13806
|
+
prompt,
|
|
13807
|
+
maxTokens: options.maxTokens
|
|
13808
|
+
});
|
|
13809
|
+
env.stdout.write(result);
|
|
13810
|
+
env.stdout.write("\n");
|
|
13811
|
+
}
|
|
13812
|
+
function registerVisionCommand(program, env) {
|
|
13813
|
+
program.command(COMMANDS.vision ?? "vision").description("Analyze an image using vision-capable models").argument("<image>", "Path to image file to analyze").option(
|
|
13814
|
+
OPTION_FLAGS.model,
|
|
13815
|
+
OPTION_DESCRIPTIONS.model,
|
|
13816
|
+
"gpt-4o"
|
|
13817
|
+
// Default to a vision-capable model
|
|
13818
|
+
).option("-p, --prompt <prompt>", "Analysis prompt describing what to extract or describe").option(
|
|
13819
|
+
OPTION_FLAGS.maxTokens,
|
|
13820
|
+
OPTION_DESCRIPTIONS.maxTokens,
|
|
13821
|
+
createNumericParser({ label: "Max tokens", integer: true, min: 1 })
|
|
13822
|
+
).option(OPTION_FLAGS.quiet, OPTION_DESCRIPTIONS.quiet).action(
|
|
13823
|
+
(imagePath, options) => executeAction(() => executeVision(imagePath, options, env), env)
|
|
12758
13824
|
);
|
|
12759
13825
|
}
|
|
12760
13826
|
|
|
@@ -12803,7 +13869,7 @@ function createLoggerFactory(config) {
|
|
|
12803
13869
|
}
|
|
12804
13870
|
function createPromptFunction(stdin, stdout) {
|
|
12805
13871
|
return (question) => {
|
|
12806
|
-
return new Promise((
|
|
13872
|
+
return new Promise((resolve3) => {
|
|
12807
13873
|
const rl = import_node_readline.default.createInterface({
|
|
12808
13874
|
input: stdin,
|
|
12809
13875
|
output: stdout
|
|
@@ -12818,7 +13884,7 @@ function createPromptFunction(stdin, stdout) {
|
|
|
12818
13884
|
`);
|
|
12819
13885
|
rl.question(import_chalk9.default.green.bold("You: "), (answer) => {
|
|
12820
13886
|
rl.close();
|
|
12821
|
-
|
|
13887
|
+
resolve3(answer);
|
|
12822
13888
|
});
|
|
12823
13889
|
});
|
|
12824
13890
|
};
|
|
@@ -12911,6 +13977,7 @@ function createProgram(env, config) {
|
|
|
12911
13977
|
registerAgentCommand(program, env, config?.agent);
|
|
12912
13978
|
registerImageCommand(program, env, config?.image);
|
|
12913
13979
|
registerSpeechCommand(program, env, config?.speech);
|
|
13980
|
+
registerVisionCommand(program, env);
|
|
12914
13981
|
registerModelsCommand(program, env);
|
|
12915
13982
|
registerGadgetCommand(program, env);
|
|
12916
13983
|
if (config) {
|