llmist 2.4.0 → 2.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -0
- package/dist/{chunk-QFRVTS5F.js → chunk-IHSZUAYN.js} +4 -2
- package/dist/chunk-IHSZUAYN.js.map +1 -0
- package/dist/{chunk-6ZDUWO6N.js → chunk-YHS2DYXP.js} +1781 -528
- package/dist/chunk-YHS2DYXP.js.map +1 -0
- package/dist/cli.cjs +1218 -151
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.js +172 -26
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +1393 -124
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +93 -20
- package/dist/index.d.ts +93 -20
- package/dist/index.js +34 -2
- package/dist/{mock-stream-BQcC2VCP.d.cts → mock-stream-ga4KIiwX.d.cts} +714 -12
- package/dist/{mock-stream-BQcC2VCP.d.ts → mock-stream-ga4KIiwX.d.ts} +714 -12
- package/dist/testing/index.cjs +1713 -508
- package/dist/testing/index.cjs.map +1 -1
- package/dist/testing/index.d.cts +2 -2
- package/dist/testing/index.d.ts +2 -2
- package/dist/testing/index.js +1 -1
- package/package.json +1 -1
- package/dist/chunk-6ZDUWO6N.js.map +0 -1
- package/dist/chunk-QFRVTS5F.js.map +0 -1
package/dist/index.cjs
CHANGED
|
@@ -45,6 +45,158 @@ var init_constants = __esm({
|
|
|
45
45
|
}
|
|
46
46
|
});
|
|
47
47
|
|
|
48
|
+
// src/core/input-content.ts
|
|
49
|
+
function isTextPart(part) {
|
|
50
|
+
return part.type === "text";
|
|
51
|
+
}
|
|
52
|
+
function isImagePart(part) {
|
|
53
|
+
return part.type === "image";
|
|
54
|
+
}
|
|
55
|
+
function isAudioPart(part) {
|
|
56
|
+
return part.type === "audio";
|
|
57
|
+
}
|
|
58
|
+
function text(content) {
|
|
59
|
+
return { type: "text", text: content };
|
|
60
|
+
}
|
|
61
|
+
function imageFromBase64(data, mediaType) {
|
|
62
|
+
return {
|
|
63
|
+
type: "image",
|
|
64
|
+
source: { type: "base64", mediaType, data }
|
|
65
|
+
};
|
|
66
|
+
}
|
|
67
|
+
function imageFromUrl(url) {
|
|
68
|
+
return {
|
|
69
|
+
type: "image",
|
|
70
|
+
source: { type: "url", url }
|
|
71
|
+
};
|
|
72
|
+
}
|
|
73
|
+
function detectImageMimeType(data) {
|
|
74
|
+
const bytes = data instanceof Buffer ? data : Buffer.from(data);
|
|
75
|
+
for (const { bytes: magic, mimeType } of IMAGE_MAGIC_BYTES) {
|
|
76
|
+
if (bytes.length >= magic.length) {
|
|
77
|
+
let matches = true;
|
|
78
|
+
for (let i = 0; i < magic.length; i++) {
|
|
79
|
+
if (bytes[i] !== magic[i]) {
|
|
80
|
+
matches = false;
|
|
81
|
+
break;
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
if (matches) {
|
|
85
|
+
if (mimeType === "image/webp") {
|
|
86
|
+
if (bytes.length >= 12) {
|
|
87
|
+
const webpMarker = bytes[8] === 87 && bytes[9] === 69 && bytes[10] === 66 && bytes[11] === 80;
|
|
88
|
+
if (!webpMarker) continue;
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
return mimeType;
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
return null;
|
|
96
|
+
}
|
|
97
|
+
function detectAudioMimeType(data) {
|
|
98
|
+
const bytes = data instanceof Buffer ? data : Buffer.from(data);
|
|
99
|
+
for (const { bytes: magic, mimeType } of AUDIO_MAGIC_BYTES) {
|
|
100
|
+
if (bytes.length >= magic.length) {
|
|
101
|
+
let matches = true;
|
|
102
|
+
for (let i = 0; i < magic.length; i++) {
|
|
103
|
+
if (bytes[i] !== magic[i]) {
|
|
104
|
+
matches = false;
|
|
105
|
+
break;
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
if (matches) {
|
|
109
|
+
if (mimeType === "audio/wav") {
|
|
110
|
+
if (bytes.length >= 12) {
|
|
111
|
+
const waveMarker = bytes[8] === 87 && bytes[9] === 65 && bytes[10] === 86 && bytes[11] === 69;
|
|
112
|
+
if (!waveMarker) continue;
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
return mimeType;
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
return null;
|
|
120
|
+
}
|
|
121
|
+
function toBase64(data) {
|
|
122
|
+
if (typeof data === "string") {
|
|
123
|
+
return data;
|
|
124
|
+
}
|
|
125
|
+
return Buffer.from(data).toString("base64");
|
|
126
|
+
}
|
|
127
|
+
function imageFromBuffer(buffer, mediaType) {
|
|
128
|
+
const detectedType = mediaType ?? detectImageMimeType(buffer);
|
|
129
|
+
if (!detectedType) {
|
|
130
|
+
throw new Error(
|
|
131
|
+
"Could not detect image MIME type. Please provide the mediaType parameter explicitly."
|
|
132
|
+
);
|
|
133
|
+
}
|
|
134
|
+
return {
|
|
135
|
+
type: "image",
|
|
136
|
+
source: {
|
|
137
|
+
type: "base64",
|
|
138
|
+
mediaType: detectedType,
|
|
139
|
+
data: toBase64(buffer)
|
|
140
|
+
}
|
|
141
|
+
};
|
|
142
|
+
}
|
|
143
|
+
function audioFromBase64(data, mediaType) {
|
|
144
|
+
return {
|
|
145
|
+
type: "audio",
|
|
146
|
+
source: { type: "base64", mediaType, data }
|
|
147
|
+
};
|
|
148
|
+
}
|
|
149
|
+
function audioFromBuffer(buffer, mediaType) {
|
|
150
|
+
const detectedType = mediaType ?? detectAudioMimeType(buffer);
|
|
151
|
+
if (!detectedType) {
|
|
152
|
+
throw new Error(
|
|
153
|
+
"Could not detect audio MIME type. Please provide the mediaType parameter explicitly."
|
|
154
|
+
);
|
|
155
|
+
}
|
|
156
|
+
return {
|
|
157
|
+
type: "audio",
|
|
158
|
+
source: {
|
|
159
|
+
type: "base64",
|
|
160
|
+
mediaType: detectedType,
|
|
161
|
+
data: toBase64(buffer)
|
|
162
|
+
}
|
|
163
|
+
};
|
|
164
|
+
}
|
|
165
|
+
function isDataUrl(input) {
|
|
166
|
+
return input.startsWith("data:");
|
|
167
|
+
}
|
|
168
|
+
function parseDataUrl(url) {
|
|
169
|
+
const match = url.match(/^data:([^;]+);base64,(.+)$/);
|
|
170
|
+
if (!match) return null;
|
|
171
|
+
return { mimeType: match[1], data: match[2] };
|
|
172
|
+
}
|
|
173
|
+
var IMAGE_MAGIC_BYTES, AUDIO_MAGIC_BYTES;
|
|
174
|
+
var init_input_content = __esm({
|
|
175
|
+
"src/core/input-content.ts"() {
|
|
176
|
+
"use strict";
|
|
177
|
+
IMAGE_MAGIC_BYTES = [
|
|
178
|
+
{ bytes: [255, 216, 255], mimeType: "image/jpeg" },
|
|
179
|
+
{ bytes: [137, 80, 78, 71], mimeType: "image/png" },
|
|
180
|
+
{ bytes: [71, 73, 70, 56], mimeType: "image/gif" },
|
|
181
|
+
// WebP starts with RIFF....WEBP
|
|
182
|
+
{ bytes: [82, 73, 70, 70], mimeType: "image/webp" }
|
|
183
|
+
];
|
|
184
|
+
AUDIO_MAGIC_BYTES = [
|
|
185
|
+
// MP3 frame sync
|
|
186
|
+
{ bytes: [255, 251], mimeType: "audio/mp3" },
|
|
187
|
+
{ bytes: [255, 250], mimeType: "audio/mp3" },
|
|
188
|
+
// ID3 tag (MP3)
|
|
189
|
+
{ bytes: [73, 68, 51], mimeType: "audio/mp3" },
|
|
190
|
+
// OGG
|
|
191
|
+
{ bytes: [79, 103, 103, 83], mimeType: "audio/ogg" },
|
|
192
|
+
// WAV (RIFF)
|
|
193
|
+
{ bytes: [82, 73, 70, 70], mimeType: "audio/wav" },
|
|
194
|
+
// WebM
|
|
195
|
+
{ bytes: [26, 69, 223, 163], mimeType: "audio/webm" }
|
|
196
|
+
];
|
|
197
|
+
}
|
|
198
|
+
});
|
|
199
|
+
|
|
48
200
|
// src/core/model-shortcuts.ts
|
|
49
201
|
function isKnownModelPattern(model) {
|
|
50
202
|
const normalized = model.toLowerCase();
|
|
@@ -402,7 +554,9 @@ var init_prompt_config = __esm({
|
|
|
402
554
|
rules: () => [
|
|
403
555
|
"Output ONLY plain text with the exact markers - never use function/tool calling",
|
|
404
556
|
"You can invoke multiple gadgets in a single response",
|
|
405
|
-
"
|
|
557
|
+
"Gadgets without dependencies execute immediately (in parallel if multiple)",
|
|
558
|
+
"Use :invocation_id:dep1,dep2 syntax when a gadget needs results from prior gadgets",
|
|
559
|
+
"If any dependency fails, dependent gadgets are automatically skipped"
|
|
406
560
|
],
|
|
407
561
|
customExamples: null
|
|
408
562
|
};
|
|
@@ -410,11 +564,24 @@ var init_prompt_config = __esm({
|
|
|
410
564
|
});
|
|
411
565
|
|
|
412
566
|
// src/core/messages.ts
|
|
567
|
+
function normalizeContent(content) {
|
|
568
|
+
if (typeof content === "string") {
|
|
569
|
+
return [{ type: "text", text: content }];
|
|
570
|
+
}
|
|
571
|
+
return content;
|
|
572
|
+
}
|
|
573
|
+
function extractText(content) {
|
|
574
|
+
if (typeof content === "string") {
|
|
575
|
+
return content;
|
|
576
|
+
}
|
|
577
|
+
return content.filter((part) => part.type === "text").map((part) => part.text).join("");
|
|
578
|
+
}
|
|
413
579
|
var LLMMessageBuilder;
|
|
414
580
|
var init_messages = __esm({
|
|
415
581
|
"src/core/messages.ts"() {
|
|
416
582
|
"use strict";
|
|
417
583
|
init_constants();
|
|
584
|
+
init_input_content();
|
|
418
585
|
init_prompt_config();
|
|
419
586
|
LLMMessageBuilder = class {
|
|
420
587
|
messages = [];
|
|
@@ -516,6 +683,10 @@ CRITICAL: ${criticalUsage}
|
|
|
516
683
|
parts.push(`
|
|
517
684
|
1. Start marker: ${this.startPrefix}gadget_name`);
|
|
518
685
|
parts.push(`
|
|
686
|
+
With ID: ${this.startPrefix}gadget_name:my_id`);
|
|
687
|
+
parts.push(`
|
|
688
|
+
With dependencies: ${this.startPrefix}gadget_name:my_id:dep1,dep2`);
|
|
689
|
+
parts.push(`
|
|
519
690
|
2. ${formatDescription}`);
|
|
520
691
|
parts.push(`
|
|
521
692
|
3. End marker: ${this.endPrefix}`);
|
|
@@ -565,6 +736,25 @@ ${this.endPrefix}`;
|
|
|
565
736
|
EXAMPLE (Multiple Gadgets):
|
|
566
737
|
|
|
567
738
|
${multipleExample}`);
|
|
739
|
+
const dependencyExample = `${this.startPrefix}fetch_data:fetch_1
|
|
740
|
+
${this.argPrefix}url
|
|
741
|
+
https://api.example.com/users
|
|
742
|
+
${this.endPrefix}
|
|
743
|
+
${this.startPrefix}fetch_data:fetch_2
|
|
744
|
+
${this.argPrefix}url
|
|
745
|
+
https://api.example.com/orders
|
|
746
|
+
${this.endPrefix}
|
|
747
|
+
${this.startPrefix}merge_data:merge_1:fetch_1,fetch_2
|
|
748
|
+
${this.argPrefix}format
|
|
749
|
+
json
|
|
750
|
+
${this.endPrefix}`;
|
|
751
|
+
parts.push(`
|
|
752
|
+
|
|
753
|
+
EXAMPLE (With Dependencies):
|
|
754
|
+
merge_1 waits for fetch_1 AND fetch_2 to complete.
|
|
755
|
+
If either fails, merge_1 is automatically skipped.
|
|
756
|
+
|
|
757
|
+
${dependencyExample}`);
|
|
568
758
|
parts.push(`
|
|
569
759
|
|
|
570
760
|
BLOCK FORMAT SYNTAX:
|
|
@@ -615,6 +805,25 @@ Produces: { "items": ["first", "second"] }`);
|
|
|
615
805
|
}
|
|
616
806
|
return parts.join("");
|
|
617
807
|
}
|
|
808
|
+
/**
|
|
809
|
+
* Add a user message.
|
|
810
|
+
* Content can be a string (text only) or an array of content parts (multimodal).
|
|
811
|
+
*
|
|
812
|
+
* @param content - Message content
|
|
813
|
+
* @param metadata - Optional metadata
|
|
814
|
+
*
|
|
815
|
+
* @example
|
|
816
|
+
* ```typescript
|
|
817
|
+
* // Text only
|
|
818
|
+
* builder.addUser("Hello!");
|
|
819
|
+
*
|
|
820
|
+
* // Multimodal
|
|
821
|
+
* builder.addUser([
|
|
822
|
+
* text("What's in this image?"),
|
|
823
|
+
* imageFromBuffer(imageData),
|
|
824
|
+
* ]);
|
|
825
|
+
* ```
|
|
826
|
+
*/
|
|
618
827
|
addUser(content, metadata) {
|
|
619
828
|
this.messages.push({ role: "user", content, metadata });
|
|
620
829
|
return this;
|
|
@@ -623,6 +832,104 @@ Produces: { "items": ["first", "second"] }`);
|
|
|
623
832
|
this.messages.push({ role: "assistant", content, metadata });
|
|
624
833
|
return this;
|
|
625
834
|
}
|
|
835
|
+
/**
|
|
836
|
+
* Add a user message with an image attachment.
|
|
837
|
+
*
|
|
838
|
+
* @param textContent - Text prompt
|
|
839
|
+
* @param imageData - Image data (Buffer, Uint8Array, or base64 string)
|
|
840
|
+
* @param mimeType - Optional MIME type (auto-detected if not provided)
|
|
841
|
+
*
|
|
842
|
+
* @example
|
|
843
|
+
* ```typescript
|
|
844
|
+
* builder.addUserWithImage(
|
|
845
|
+
* "What's in this image?",
|
|
846
|
+
* await fs.readFile("photo.jpg"),
|
|
847
|
+
* "image/jpeg" // Optional - auto-detected
|
|
848
|
+
* );
|
|
849
|
+
* ```
|
|
850
|
+
*/
|
|
851
|
+
addUserWithImage(textContent, imageData, mimeType) {
|
|
852
|
+
const imageBuffer = typeof imageData === "string" ? Buffer.from(imageData, "base64") : imageData;
|
|
853
|
+
const detectedMime = mimeType ?? detectImageMimeType(imageBuffer);
|
|
854
|
+
if (!detectedMime) {
|
|
855
|
+
throw new Error(
|
|
856
|
+
"Could not detect image MIME type. Please provide the mimeType parameter explicitly."
|
|
857
|
+
);
|
|
858
|
+
}
|
|
859
|
+
const content = [
|
|
860
|
+
text(textContent),
|
|
861
|
+
{
|
|
862
|
+
type: "image",
|
|
863
|
+
source: {
|
|
864
|
+
type: "base64",
|
|
865
|
+
mediaType: detectedMime,
|
|
866
|
+
data: toBase64(imageBuffer)
|
|
867
|
+
}
|
|
868
|
+
}
|
|
869
|
+
];
|
|
870
|
+
this.messages.push({ role: "user", content });
|
|
871
|
+
return this;
|
|
872
|
+
}
|
|
873
|
+
/**
|
|
874
|
+
* Add a user message with an image URL (OpenAI only).
|
|
875
|
+
*
|
|
876
|
+
* @param textContent - Text prompt
|
|
877
|
+
* @param imageUrl - URL to the image
|
|
878
|
+
*
|
|
879
|
+
* @example
|
|
880
|
+
* ```typescript
|
|
881
|
+
* builder.addUserWithImageUrl(
|
|
882
|
+
* "What's in this image?",
|
|
883
|
+
* "https://example.com/image.jpg"
|
|
884
|
+
* );
|
|
885
|
+
* ```
|
|
886
|
+
*/
|
|
887
|
+
addUserWithImageUrl(textContent, imageUrl) {
|
|
888
|
+
const content = [text(textContent), imageFromUrl(imageUrl)];
|
|
889
|
+
this.messages.push({ role: "user", content });
|
|
890
|
+
return this;
|
|
891
|
+
}
|
|
892
|
+
/**
|
|
893
|
+
* Add a user message with an audio attachment (Gemini only).
|
|
894
|
+
*
|
|
895
|
+
* @param textContent - Text prompt
|
|
896
|
+
* @param audioData - Audio data (Buffer, Uint8Array, or base64 string)
|
|
897
|
+
* @param mimeType - Optional MIME type (auto-detected if not provided)
|
|
898
|
+
*
|
|
899
|
+
* @example
|
|
900
|
+
* ```typescript
|
|
901
|
+
* builder.addUserWithAudio(
|
|
902
|
+
* "Transcribe this audio",
|
|
903
|
+
* await fs.readFile("recording.mp3"),
|
|
904
|
+
* "audio/mp3" // Optional - auto-detected
|
|
905
|
+
* );
|
|
906
|
+
* ```
|
|
907
|
+
*/
|
|
908
|
+
addUserWithAudio(textContent, audioData, mimeType) {
|
|
909
|
+
const audioBuffer = typeof audioData === "string" ? Buffer.from(audioData, "base64") : audioData;
|
|
910
|
+
const content = [text(textContent), audioFromBuffer(audioBuffer, mimeType)];
|
|
911
|
+
this.messages.push({ role: "user", content });
|
|
912
|
+
return this;
|
|
913
|
+
}
|
|
914
|
+
/**
|
|
915
|
+
* Add a user message with multiple content parts.
|
|
916
|
+
* Provides full flexibility for complex multimodal messages.
|
|
917
|
+
*
|
|
918
|
+
* @param parts - Array of content parts
|
|
919
|
+
*
|
|
920
|
+
* @example
|
|
921
|
+
* ```typescript
|
|
922
|
+
* builder.addUserMultimodal([
|
|
923
|
+
* text("Compare these images:"),
|
|
924
|
+
* imageFromBuffer(image1),
|
|
925
|
+
* imageFromBuffer(image2),
|
|
926
|
+
* ]);
|
|
927
|
+
* ```
|
|
928
|
+
*/
|
|
929
|
+
addUserMultimodal(parts) {
|
|
930
|
+
this.messages.push({ role: "user", content: parts });
|
|
931
|
+
return this;
|
|
932
|
+
}
|
|
626
933
|
addGadgetCall(gadget, parameters, result) {
|
|
627
934
|
const paramStr = this.formatBlockParameters(parameters, "");
|
|
628
935
|
this.messages.push({
|
|
@@ -1941,7 +2248,7 @@ var init_conversation_manager = __esm({
|
|
|
1941
2248
|
if (msg.role === "user") {
|
|
1942
2249
|
this.historyBuilder.addUser(msg.content);
|
|
1943
2250
|
} else if (msg.role === "assistant") {
|
|
1944
|
-
this.historyBuilder.addAssistant(msg.content);
|
|
2251
|
+
this.historyBuilder.addAssistant(extractText(msg.content));
|
|
1945
2252
|
}
|
|
1946
2253
|
}
|
|
1947
2254
|
}
|
|
@@ -1962,8 +2269,10 @@ async function runWithHandlers(agentGenerator, handlers) {
|
|
|
1962
2269
|
if (handlers.onGadgetCall) {
|
|
1963
2270
|
await handlers.onGadgetCall({
|
|
1964
2271
|
gadgetName: event.call.gadgetName,
|
|
2272
|
+
invocationId: event.call.invocationId,
|
|
1965
2273
|
parameters: event.call.parameters,
|
|
1966
|
-
parametersRaw: event.call.parametersRaw
|
|
2274
|
+
parametersRaw: event.call.parametersRaw,
|
|
2275
|
+
dependencies: event.call.dependencies
|
|
1967
2276
|
});
|
|
1968
2277
|
}
|
|
1969
2278
|
break;
|
|
@@ -2840,15 +3149,37 @@ var init_parser = __esm({
|
|
|
2840
3149
|
return segment.trim().length > 0 ? segment : void 0;
|
|
2841
3150
|
}
|
|
2842
3151
|
/**
|
|
2843
|
-
* Parse gadget name
|
|
2844
|
-
*
|
|
3152
|
+
* Parse gadget name with optional invocation ID and dependencies.
|
|
3153
|
+
*
|
|
3154
|
+
* Supported formats:
|
|
3155
|
+
* - `GadgetName` - Auto-generate ID, no dependencies
|
|
3156
|
+
* - `GadgetName:my_id` - Explicit ID, no dependencies
|
|
3157
|
+
* - `GadgetName:my_id:dep1,dep2` - Explicit ID with dependencies
|
|
3158
|
+
*
|
|
3159
|
+
* Dependencies must be comma-separated invocation IDs.
|
|
2845
3160
|
*/
|
|
2846
3161
|
parseGadgetName(gadgetName) {
|
|
2847
|
-
|
|
2848
|
-
|
|
2849
|
-
return {
|
|
3162
|
+
const parts = gadgetName.split(":");
|
|
3163
|
+
if (parts.length === 1) {
|
|
3164
|
+
return {
|
|
3165
|
+
actualName: parts[0],
|
|
3166
|
+
invocationId: `gadget_${++globalInvocationCounter}`,
|
|
3167
|
+
dependencies: []
|
|
3168
|
+
};
|
|
3169
|
+
} else if (parts.length === 2) {
|
|
3170
|
+
return {
|
|
3171
|
+
actualName: parts[0],
|
|
3172
|
+
invocationId: parts[1].trim(),
|
|
3173
|
+
dependencies: []
|
|
3174
|
+
};
|
|
3175
|
+
} else {
|
|
3176
|
+
const deps = parts[2].split(",").map((d) => d.trim()).filter((d) => d.length > 0);
|
|
3177
|
+
return {
|
|
3178
|
+
actualName: parts[0],
|
|
3179
|
+
invocationId: parts[1].trim(),
|
|
3180
|
+
dependencies: deps
|
|
3181
|
+
};
|
|
2850
3182
|
}
|
|
2851
|
-
return { actualName: gadgetName, invocationId: `gadget_${++globalInvocationCounter}` };
|
|
2852
3183
|
}
|
|
2853
3184
|
/**
|
|
2854
3185
|
* Extract the error message from a parse error.
|
|
@@ -2884,39 +3215,20 @@ var init_parser = __esm({
|
|
|
2884
3215
|
const metadataEndIndex = this.buffer.indexOf("\n", metadataStartIndex);
|
|
2885
3216
|
if (metadataEndIndex === -1) break;
|
|
2886
3217
|
const gadgetName = this.buffer.substring(metadataStartIndex, metadataEndIndex).trim();
|
|
2887
|
-
const { actualName: actualGadgetName, invocationId } = this.parseGadgetName(gadgetName);
|
|
3218
|
+
const { actualName: actualGadgetName, invocationId, dependencies } = this.parseGadgetName(gadgetName);
|
|
2888
3219
|
const contentStartIndex = metadataEndIndex + 1;
|
|
2889
3220
|
let partEndIndex;
|
|
2890
3221
|
let endMarkerLength = 0;
|
|
2891
|
-
|
|
2892
|
-
|
|
2893
|
-
|
|
2894
|
-
|
|
2895
|
-
endMarkerLength =
|
|
3222
|
+
const nextStartPos = this.buffer.indexOf(this.startPrefix, contentStartIndex);
|
|
3223
|
+
const endPos = this.buffer.indexOf(this.endPrefix, contentStartIndex);
|
|
3224
|
+
if (nextStartPos !== -1 && (endPos === -1 || nextStartPos < endPos)) {
|
|
3225
|
+
partEndIndex = nextStartPos;
|
|
3226
|
+
endMarkerLength = 0;
|
|
3227
|
+
} else if (endPos !== -1) {
|
|
3228
|
+
partEndIndex = endPos;
|
|
3229
|
+
endMarkerLength = this.endPrefix.length;
|
|
2896
3230
|
} else {
|
|
2897
|
-
|
|
2898
|
-
let validEndPos = -1;
|
|
2899
|
-
let searchPos = contentStartIndex;
|
|
2900
|
-
while (true) {
|
|
2901
|
-
const endPos = this.buffer.indexOf(this.endPrefix, searchPos);
|
|
2902
|
-
if (endPos === -1) break;
|
|
2903
|
-
const afterEnd = this.buffer.substring(endPos + this.endPrefix.length);
|
|
2904
|
-
if (afterEnd.startsWith("\n") || afterEnd.startsWith("\r") || afterEnd.startsWith(this.startPrefix) || afterEnd.length === 0) {
|
|
2905
|
-
validEndPos = endPos;
|
|
2906
|
-
break;
|
|
2907
|
-
} else {
|
|
2908
|
-
searchPos = endPos + this.endPrefix.length;
|
|
2909
|
-
}
|
|
2910
|
-
}
|
|
2911
|
-
if (nextStartPos !== -1 && (validEndPos === -1 || nextStartPos < validEndPos)) {
|
|
2912
|
-
partEndIndex = nextStartPos;
|
|
2913
|
-
endMarkerLength = 0;
|
|
2914
|
-
} else if (validEndPos !== -1) {
|
|
2915
|
-
partEndIndex = validEndPos;
|
|
2916
|
-
endMarkerLength = this.endPrefix.length;
|
|
2917
|
-
} else {
|
|
2918
|
-
break;
|
|
2919
|
-
}
|
|
3231
|
+
break;
|
|
2920
3232
|
}
|
|
2921
3233
|
const parametersRaw = this.buffer.substring(contentStartIndex, partEndIndex).trim();
|
|
2922
3234
|
const { parameters, parseError } = this.parseParameters(parametersRaw);
|
|
@@ -2927,7 +3239,8 @@ var init_parser = __esm({
|
|
|
2927
3239
|
invocationId,
|
|
2928
3240
|
parametersRaw,
|
|
2929
3241
|
parameters,
|
|
2930
|
-
parseError
|
|
3242
|
+
parseError,
|
|
3243
|
+
dependencies
|
|
2931
3244
|
}
|
|
2932
3245
|
};
|
|
2933
3246
|
startIndex = partEndIndex + endMarkerLength;
|
|
@@ -2950,7 +3263,7 @@ var init_parser = __esm({
|
|
|
2950
3263
|
const metadataEndIndex = this.buffer.indexOf("\n", metadataStartIndex);
|
|
2951
3264
|
if (metadataEndIndex !== -1) {
|
|
2952
3265
|
const gadgetName = this.buffer.substring(metadataStartIndex, metadataEndIndex).trim();
|
|
2953
|
-
const { actualName: actualGadgetName, invocationId } = this.parseGadgetName(gadgetName);
|
|
3266
|
+
const { actualName: actualGadgetName, invocationId, dependencies } = this.parseGadgetName(gadgetName);
|
|
2954
3267
|
const contentStartIndex = metadataEndIndex + 1;
|
|
2955
3268
|
const parametersRaw = this.buffer.substring(contentStartIndex).trim();
|
|
2956
3269
|
const { parameters, parseError } = this.parseParameters(parametersRaw);
|
|
@@ -2961,7 +3274,8 @@ var init_parser = __esm({
|
|
|
2961
3274
|
invocationId,
|
|
2962
3275
|
parametersRaw,
|
|
2963
3276
|
parameters,
|
|
2964
|
-
parseError
|
|
3277
|
+
parseError,
|
|
3278
|
+
dependencies
|
|
2965
3279
|
}
|
|
2966
3280
|
};
|
|
2967
3281
|
return;
|
|
@@ -3331,6 +3645,13 @@ var init_stream_processor = __esm({
|
|
|
3331
3645
|
accumulatedText = "";
|
|
3332
3646
|
shouldStopExecution = false;
|
|
3333
3647
|
observerFailureCount = 0;
|
|
3648
|
+
// Dependency tracking for gadget execution DAG
|
|
3649
|
+
/** Gadgets waiting for their dependencies to complete */
|
|
3650
|
+
pendingGadgets = /* @__PURE__ */ new Map();
|
|
3651
|
+
/** Completed gadget results, keyed by invocation ID */
|
|
3652
|
+
completedResults = /* @__PURE__ */ new Map();
|
|
3653
|
+
/** Invocation IDs of gadgets that have failed (error or skipped due to dependency) */
|
|
3654
|
+
failedInvocations = /* @__PURE__ */ new Set();
|
|
3334
3655
|
constructor(options) {
|
|
3335
3656
|
this.iteration = options.iteration;
|
|
3336
3657
|
this.registry = options.registry;
|
|
@@ -3431,6 +3752,16 @@ var init_stream_processor = __esm({
|
|
|
3431
3752
|
}
|
|
3432
3753
|
}
|
|
3433
3754
|
}
|
|
3755
|
+
const finalPendingEvents = await this.processPendingGadgets();
|
|
3756
|
+
outputs.push(...finalPendingEvents);
|
|
3757
|
+
if (finalPendingEvents.some((e) => e.type === "gadget_result")) {
|
|
3758
|
+
didExecuteGadgets = true;
|
|
3759
|
+
}
|
|
3760
|
+
for (const evt of finalPendingEvents) {
|
|
3761
|
+
if (evt.type === "gadget_result" && evt.result.breaksLoop) {
|
|
3762
|
+
shouldBreakLoop = true;
|
|
3763
|
+
}
|
|
3764
|
+
}
|
|
3434
3765
|
}
|
|
3435
3766
|
let finalMessage = this.accumulatedText;
|
|
3436
3767
|
if (this.hooks.interceptors?.interceptAssistantMessage) {
|
|
@@ -3482,7 +3813,11 @@ var init_stream_processor = __esm({
|
|
|
3482
3813
|
return [{ type: "text", content }];
|
|
3483
3814
|
}
|
|
3484
3815
|
/**
|
|
3485
|
-
* Process a gadget call through the full lifecycle.
|
|
3816
|
+
* Process a gadget call through the full lifecycle, handling dependencies.
|
|
3817
|
+
*
|
|
3818
|
+
* Gadgets without dependencies (or with all dependencies satisfied) execute immediately.
|
|
3819
|
+
* Gadgets with unsatisfied dependencies are queued for later execution.
|
|
3820
|
+
* After each execution, pending gadgets are checked to see if they can now run.
|
|
3486
3821
|
*/
|
|
3487
3822
|
async processGadgetCall(call) {
|
|
3488
3823
|
if (this.shouldStopExecution) {
|
|
@@ -3493,6 +3828,53 @@ var init_stream_processor = __esm({
|
|
|
3493
3828
|
}
|
|
3494
3829
|
const events = [];
|
|
3495
3830
|
events.push({ type: "gadget_call", call });
|
|
3831
|
+
if (call.dependencies.length > 0) {
|
|
3832
|
+
if (call.dependencies.includes(call.invocationId)) {
|
|
3833
|
+
this.logger.warn("Gadget has self-referential dependency (depends on itself)", {
|
|
3834
|
+
gadgetName: call.gadgetName,
|
|
3835
|
+
invocationId: call.invocationId
|
|
3836
|
+
});
|
|
3837
|
+
this.failedInvocations.add(call.invocationId);
|
|
3838
|
+
const skipEvent = {
|
|
3839
|
+
type: "gadget_skipped",
|
|
3840
|
+
gadgetName: call.gadgetName,
|
|
3841
|
+
invocationId: call.invocationId,
|
|
3842
|
+
parameters: call.parameters ?? {},
|
|
3843
|
+
failedDependency: call.invocationId,
|
|
3844
|
+
failedDependencyError: `Gadget "${call.invocationId}" cannot depend on itself (self-referential dependency)`
|
|
3845
|
+
};
|
|
3846
|
+
events.push(skipEvent);
|
|
3847
|
+
return events;
|
|
3848
|
+
}
|
|
3849
|
+
const failedDep = call.dependencies.find((dep) => this.failedInvocations.has(dep));
|
|
3850
|
+
if (failedDep) {
|
|
3851
|
+
const skipEvents = await this.handleFailedDependency(call, failedDep);
|
|
3852
|
+
events.push(...skipEvents);
|
|
3853
|
+
return events;
|
|
3854
|
+
}
|
|
3855
|
+
const unsatisfied = call.dependencies.filter((dep) => !this.completedResults.has(dep));
|
|
3856
|
+
if (unsatisfied.length > 0) {
|
|
3857
|
+
this.logger.debug("Queueing gadget for later - waiting on dependencies", {
|
|
3858
|
+
gadgetName: call.gadgetName,
|
|
3859
|
+
invocationId: call.invocationId,
|
|
3860
|
+
waitingOn: unsatisfied
|
|
3861
|
+
});
|
|
3862
|
+
this.pendingGadgets.set(call.invocationId, call);
|
|
3863
|
+
return events;
|
|
3864
|
+
}
|
|
3865
|
+
}
|
|
3866
|
+
const executeEvents = await this.executeGadgetWithHooks(call);
|
|
3867
|
+
events.push(...executeEvents);
|
|
3868
|
+
const triggeredEvents = await this.processPendingGadgets();
|
|
3869
|
+
events.push(...triggeredEvents);
|
|
3870
|
+
return events;
|
|
3871
|
+
}
|
|
3872
|
+
/**
|
|
3873
|
+
* Execute a gadget through the full hook lifecycle.
|
|
3874
|
+
* This is the core execution logic, extracted from processGadgetCall.
|
|
3875
|
+
*/
|
|
3876
|
+
async executeGadgetWithHooks(call) {
|
|
3877
|
+
const events = [];
|
|
3496
3878
|
if (call.parseError) {
|
|
3497
3879
|
this.logger.warn("Gadget has parse error", {
|
|
3498
3880
|
gadgetName: call.gadgetName,
|
|
@@ -3623,6 +4005,10 @@ var init_stream_processor = __esm({
|
|
|
3623
4005
|
});
|
|
3624
4006
|
}
|
|
3625
4007
|
await this.runObserversInParallel(completeObservers);
|
|
4008
|
+
this.completedResults.set(result.invocationId, result);
|
|
4009
|
+
if (result.error) {
|
|
4010
|
+
this.failedInvocations.add(result.invocationId);
|
|
4011
|
+
}
|
|
3626
4012
|
events.push({ type: "gadget_result", result });
|
|
3627
4013
|
if (result.error) {
|
|
3628
4014
|
const errorType = this.determineErrorType(call, result);
|
|
@@ -3638,6 +4024,162 @@ var init_stream_processor = __esm({
|
|
|
3638
4024
|
}
|
|
3639
4025
|
return events;
|
|
3640
4026
|
}
|
|
4027
|
+
/**
|
|
4028
|
+
* Handle a gadget that cannot execute because a dependency failed.
|
|
4029
|
+
* Calls the onDependencySkipped controller to allow customization.
|
|
4030
|
+
*/
|
|
4031
|
+
async handleFailedDependency(call, failedDep) {
|
|
4032
|
+
const events = [];
|
|
4033
|
+
const depResult = this.completedResults.get(failedDep);
|
|
4034
|
+
const depError = depResult?.error ?? "Dependency failed";
|
|
4035
|
+
let action = { action: "skip" };
|
|
4036
|
+
if (this.hooks.controllers?.onDependencySkipped) {
|
|
4037
|
+
const context = {
|
|
4038
|
+
iteration: this.iteration,
|
|
4039
|
+
gadgetName: call.gadgetName,
|
|
4040
|
+
invocationId: call.invocationId,
|
|
4041
|
+
parameters: call.parameters ?? {},
|
|
4042
|
+
failedDependency: failedDep,
|
|
4043
|
+
failedDependencyError: depError,
|
|
4044
|
+
logger: this.logger
|
|
4045
|
+
};
|
|
4046
|
+
action = await this.hooks.controllers.onDependencySkipped(context);
|
|
4047
|
+
}
|
|
4048
|
+
if (action.action === "skip") {
|
|
4049
|
+
this.failedInvocations.add(call.invocationId);
|
|
4050
|
+
const skipEvent = {
|
|
4051
|
+
type: "gadget_skipped",
|
|
4052
|
+
gadgetName: call.gadgetName,
|
|
4053
|
+
invocationId: call.invocationId,
|
|
4054
|
+
parameters: call.parameters ?? {},
|
|
4055
|
+
failedDependency: failedDep,
|
|
4056
|
+
failedDependencyError: depError
|
|
4057
|
+
};
|
|
4058
|
+
events.push(skipEvent);
|
|
4059
|
+
if (this.hooks.observers?.onGadgetSkipped) {
|
|
4060
|
+
const observeContext = {
|
|
4061
|
+
iteration: this.iteration,
|
|
4062
|
+
gadgetName: call.gadgetName,
|
|
4063
|
+
invocationId: call.invocationId,
|
|
4064
|
+
parameters: call.parameters ?? {},
|
|
4065
|
+
failedDependency: failedDep,
|
|
4066
|
+
failedDependencyError: depError,
|
|
4067
|
+
logger: this.logger
|
|
4068
|
+
};
|
|
4069
|
+
await this.safeObserve(() => this.hooks.observers.onGadgetSkipped(observeContext));
|
|
4070
|
+
}
|
|
4071
|
+
this.logger.info("Gadget skipped due to failed dependency", {
|
|
4072
|
+
gadgetName: call.gadgetName,
|
|
4073
|
+
invocationId: call.invocationId,
|
|
4074
|
+
failedDependency: failedDep
|
|
4075
|
+
});
|
|
4076
|
+
} else if (action.action === "execute_anyway") {
|
|
4077
|
+
this.logger.info("Executing gadget despite failed dependency (controller override)", {
|
|
4078
|
+
gadgetName: call.gadgetName,
|
|
4079
|
+
invocationId: call.invocationId,
|
|
4080
|
+
failedDependency: failedDep
|
|
4081
|
+
});
|
|
4082
|
+
const executeEvents = await this.executeGadgetWithHooks(call);
|
|
4083
|
+
events.push(...executeEvents);
|
|
4084
|
+
} else if (action.action === "use_fallback") {
|
|
4085
|
+
const fallbackResult = {
|
|
4086
|
+
gadgetName: call.gadgetName,
|
|
4087
|
+
invocationId: call.invocationId,
|
|
4088
|
+
parameters: call.parameters ?? {},
|
|
4089
|
+
result: action.fallbackResult,
|
|
4090
|
+
executionTimeMs: 0
|
|
4091
|
+
};
|
|
4092
|
+
this.completedResults.set(call.invocationId, fallbackResult);
|
|
4093
|
+
events.push({ type: "gadget_result", result: fallbackResult });
|
|
4094
|
+
this.logger.info("Using fallback result for gadget with failed dependency", {
|
|
4095
|
+
gadgetName: call.gadgetName,
|
|
4096
|
+
invocationId: call.invocationId,
|
|
4097
|
+
failedDependency: failedDep
|
|
4098
|
+
});
|
|
4099
|
+
}
|
|
4100
|
+
return events;
|
|
4101
|
+
}
|
|
4102
|
+
/**
|
|
4103
|
+
* Process pending gadgets whose dependencies are now satisfied.
|
|
4104
|
+
* Executes ready gadgets in parallel and continues until no more can be triggered.
|
|
4105
|
+
*/
|
|
4106
|
+
async processPendingGadgets() {
|
|
4107
|
+
const events = [];
|
|
4108
|
+
let progress = true;
|
|
4109
|
+
while (progress && this.pendingGadgets.size > 0) {
|
|
4110
|
+
progress = false;
|
|
4111
|
+
const readyToExecute = [];
|
|
4112
|
+
const readyToSkip = [];
|
|
4113
|
+
for (const [invocationId, call] of this.pendingGadgets) {
|
|
4114
|
+
const failedDep = call.dependencies.find((dep) => this.failedInvocations.has(dep));
|
|
4115
|
+
if (failedDep) {
|
|
4116
|
+
readyToSkip.push({ call, failedDep });
|
|
4117
|
+
continue;
|
|
4118
|
+
}
|
|
4119
|
+
const allSatisfied = call.dependencies.every((dep) => this.completedResults.has(dep));
|
|
4120
|
+
if (allSatisfied) {
|
|
4121
|
+
readyToExecute.push(call);
|
|
4122
|
+
}
|
|
4123
|
+
}
|
|
4124
|
+
for (const { call, failedDep } of readyToSkip) {
|
|
4125
|
+
this.pendingGadgets.delete(call.invocationId);
|
|
4126
|
+
const skipEvents = await this.handleFailedDependency(call, failedDep);
|
|
4127
|
+
events.push(...skipEvents);
|
|
4128
|
+
progress = true;
|
|
4129
|
+
}
|
|
4130
|
+
if (readyToExecute.length > 0) {
|
|
4131
|
+
this.logger.debug("Executing ready gadgets in parallel", {
|
|
4132
|
+
count: readyToExecute.length,
|
|
4133
|
+
invocationIds: readyToExecute.map((c) => c.invocationId)
|
|
4134
|
+
});
|
|
4135
|
+
for (const call of readyToExecute) {
|
|
4136
|
+
this.pendingGadgets.delete(call.invocationId);
|
|
4137
|
+
}
|
|
4138
|
+
const executePromises = readyToExecute.map((call) => this.executeGadgetWithHooks(call));
|
|
4139
|
+
const results = await Promise.all(executePromises);
|
|
4140
|
+
for (const executeEvents of results) {
|
|
4141
|
+
events.push(...executeEvents);
|
|
4142
|
+
}
|
|
4143
|
+
progress = true;
|
|
4144
|
+
}
|
|
4145
|
+
}
|
|
4146
|
+
if (this.pendingGadgets.size > 0) {
|
|
4147
|
+
const pendingIds = new Set(this.pendingGadgets.keys());
|
|
4148
|
+
for (const [invocationId, call] of this.pendingGadgets) {
|
|
4149
|
+
const missingDeps = call.dependencies.filter((dep) => !this.completedResults.has(dep));
|
|
4150
|
+
const circularDeps = missingDeps.filter((dep) => pendingIds.has(dep));
|
|
4151
|
+
const trulyMissingDeps = missingDeps.filter((dep) => !pendingIds.has(dep));
|
|
4152
|
+
let errorMessage;
|
|
4153
|
+
let logLevel = "warn";
|
|
4154
|
+
if (circularDeps.length > 0 && trulyMissingDeps.length > 0) {
|
|
4155
|
+
errorMessage = `Dependencies unresolvable: circular=[${circularDeps.join(", ")}], missing=[${trulyMissingDeps.join(", ")}]`;
|
|
4156
|
+
logLevel = "error";
|
|
4157
|
+
} else if (circularDeps.length > 0) {
|
|
4158
|
+
errorMessage = `Circular dependency detected: "${invocationId}" depends on "${circularDeps[0]}" which also depends on "${invocationId}" (directly or indirectly)`;
|
|
4159
|
+
} else {
|
|
4160
|
+
errorMessage = `Dependency "${missingDeps[0]}" was never executed - check that the invocation ID exists and is spelled correctly`;
|
|
4161
|
+
}
|
|
4162
|
+
this.logger[logLevel]("Gadget has unresolvable dependencies", {
|
|
4163
|
+
gadgetName: call.gadgetName,
|
|
4164
|
+
invocationId,
|
|
4165
|
+
circularDependencies: circularDeps,
|
|
4166
|
+
missingDependencies: trulyMissingDeps
|
|
4167
|
+
});
|
|
4168
|
+
this.failedInvocations.add(invocationId);
|
|
4169
|
+
const skipEvent = {
|
|
4170
|
+
type: "gadget_skipped",
|
|
4171
|
+
gadgetName: call.gadgetName,
|
|
4172
|
+
invocationId,
|
|
4173
|
+
parameters: call.parameters ?? {},
|
|
4174
|
+
failedDependency: missingDeps[0],
|
|
4175
|
+
failedDependencyError: errorMessage
|
|
4176
|
+
};
|
|
4177
|
+
events.push(skipEvent);
|
|
4178
|
+
}
|
|
4179
|
+
this.pendingGadgets.clear();
|
|
4180
|
+
}
|
|
4181
|
+
return events;
|
|
4182
|
+
}
|
|
3641
4183
|
/**
|
|
3642
4184
|
* Safely execute an observer, catching and logging any errors.
|
|
3643
4185
|
* Observers are non-critical, so errors are logged but don't crash the system.
|
|
@@ -4075,9 +4617,9 @@ var init_agent = __esm({
|
|
|
4075
4617
|
if (msg.role === "user") {
|
|
4076
4618
|
this.conversation.addUserMessage(msg.content);
|
|
4077
4619
|
} else if (msg.role === "assistant") {
|
|
4078
|
-
this.conversation.addAssistantMessage(msg.content);
|
|
4620
|
+
this.conversation.addAssistantMessage(extractText(msg.content));
|
|
4079
4621
|
} else if (msg.role === "system") {
|
|
4080
|
-
this.conversation.addUserMessage(`[System] ${msg.content}`);
|
|
4622
|
+
this.conversation.addUserMessage(`[System] ${extractText(msg.content)}`);
|
|
4081
4623
|
}
|
|
4082
4624
|
}
|
|
4083
4625
|
}
|
|
@@ -4656,6 +5198,7 @@ var init_anthropic = __esm({
|
|
|
4656
5198
|
"src/providers/anthropic.ts"() {
|
|
4657
5199
|
"use strict";
|
|
4658
5200
|
import_sdk = __toESM(require("@anthropic-ai/sdk"), 1);
|
|
5201
|
+
init_messages();
|
|
4659
5202
|
init_anthropic_models();
|
|
4660
5203
|
init_base_provider();
|
|
4661
5204
|
init_constants2();
|
|
@@ -4694,7 +5237,7 @@ var init_anthropic = __esm({
|
|
|
4694
5237
|
const systemMessages = messages.filter((message) => message.role === "system");
|
|
4695
5238
|
const system = systemMessages.length > 0 ? systemMessages.map((m, index) => ({
|
|
4696
5239
|
type: "text",
|
|
4697
|
-
text: m.content,
|
|
5240
|
+
text: extractText(m.content),
|
|
4698
5241
|
// Add cache_control to the LAST system message block
|
|
4699
5242
|
...index === systemMessages.length - 1 ? { cache_control: { type: "ephemeral" } } : {}
|
|
4700
5243
|
})) : void 0;
|
|
@@ -4707,14 +5250,10 @@ var init_anthropic = __esm({
|
|
|
4707
5250
|
);
|
|
4708
5251
|
const conversation = nonSystemMessages.map((message, index) => ({
|
|
4709
5252
|
role: message.role,
|
|
4710
|
-
content:
|
|
4711
|
-
|
|
4712
|
-
|
|
4713
|
-
|
|
4714
|
-
// Add cache_control to the LAST user message
|
|
4715
|
-
...message.role === "user" && index === lastUserIndex ? { cache_control: { type: "ephemeral" } } : {}
|
|
4716
|
-
}
|
|
4717
|
-
]
|
|
5253
|
+
content: this.convertToAnthropicContent(
|
|
5254
|
+
message.content,
|
|
5255
|
+
message.role === "user" && index === lastUserIndex
|
|
5256
|
+
)
|
|
4718
5257
|
}));
|
|
4719
5258
|
const defaultMaxTokens = spec?.maxOutputTokens ?? ANTHROPIC_DEFAULT_MAX_OUTPUT_TOKENS;
|
|
4720
5259
|
const payload = {
|
|
@@ -4730,6 +5269,52 @@ var init_anthropic = __esm({
|
|
|
4730
5269
|
};
|
|
4731
5270
|
return payload;
|
|
4732
5271
|
}
|
|
5272
|
+
/**
|
|
5273
|
+
* Convert llmist content to Anthropic's content block format.
|
|
5274
|
+
* Handles text, images (base64 only), and applies cache_control.
|
|
5275
|
+
*/
|
|
5276
|
+
convertToAnthropicContent(content, addCacheControl) {
|
|
5277
|
+
const parts = normalizeContent(content);
|
|
5278
|
+
return parts.map((part, index) => {
|
|
5279
|
+
const isLastPart = index === parts.length - 1;
|
|
5280
|
+
const cacheControl = addCacheControl && isLastPart ? { cache_control: { type: "ephemeral" } } : {};
|
|
5281
|
+
if (part.type === "text") {
|
|
5282
|
+
return {
|
|
5283
|
+
type: "text",
|
|
5284
|
+
text: part.text,
|
|
5285
|
+
...cacheControl
|
|
5286
|
+
};
|
|
5287
|
+
}
|
|
5288
|
+
if (part.type === "image") {
|
|
5289
|
+
return this.convertImagePart(part, cacheControl);
|
|
5290
|
+
}
|
|
5291
|
+
if (part.type === "audio") {
|
|
5292
|
+
throw new Error(
|
|
5293
|
+
"Anthropic does not support audio input. Use Google Gemini for audio processing."
|
|
5294
|
+
);
|
|
5295
|
+
}
|
|
5296
|
+
throw new Error(`Unsupported content type: ${part.type}`);
|
|
5297
|
+
});
|
|
5298
|
+
}
|
|
5299
|
+
/**
|
|
5300
|
+
* Convert an image content part to Anthropic's image block format.
|
|
5301
|
+
*/
|
|
5302
|
+
convertImagePart(part, cacheControl) {
|
|
5303
|
+
if (part.source.type === "url") {
|
|
5304
|
+
throw new Error(
|
|
5305
|
+
"Anthropic does not support image URLs. Please provide base64-encoded image data instead."
|
|
5306
|
+
);
|
|
5307
|
+
}
|
|
5308
|
+
return {
|
|
5309
|
+
type: "image",
|
|
5310
|
+
source: {
|
|
5311
|
+
type: "base64",
|
|
5312
|
+
media_type: part.source.mediaType,
|
|
5313
|
+
data: part.source.data
|
|
5314
|
+
},
|
|
5315
|
+
...cacheControl
|
|
5316
|
+
};
|
|
5317
|
+
}
|
|
4733
5318
|
async executeStreamRequest(payload, signal) {
|
|
4734
5319
|
const client = this.client;
|
|
4735
5320
|
const stream2 = await client.messages.create(payload, signal ? { signal } : void 0);
|
|
@@ -4812,17 +5397,12 @@ var init_anthropic = __esm({
|
|
|
4812
5397
|
async countTokens(messages, descriptor, _spec) {
|
|
4813
5398
|
const client = this.client;
|
|
4814
5399
|
const systemMessages = messages.filter((message) => message.role === "system");
|
|
4815
|
-
const system = systemMessages.length > 0 ? systemMessages.map((m) => m.content).join("\n\n") : void 0;
|
|
5400
|
+
const system = systemMessages.length > 0 ? systemMessages.map((m) => extractText(m.content)).join("\n\n") : void 0;
|
|
4816
5401
|
const conversation = messages.filter(
|
|
4817
5402
|
(message) => message.role !== "system"
|
|
4818
5403
|
).map((message) => ({
|
|
4819
5404
|
role: message.role,
|
|
4820
|
-
content:
|
|
4821
|
-
{
|
|
4822
|
-
type: "text",
|
|
4823
|
-
text: message.content
|
|
4824
|
-
}
|
|
4825
|
-
]
|
|
5405
|
+
content: this.convertToAnthropicContent(message.content, false)
|
|
4826
5406
|
}));
|
|
4827
5407
|
try {
|
|
4828
5408
|
const response = await client.messages.countTokens({
|
|
@@ -4836,8 +5416,19 @@ var init_anthropic = __esm({
|
|
|
4836
5416
|
`Token counting failed for ${descriptor.name}, using fallback estimation:`,
|
|
4837
5417
|
error
|
|
4838
5418
|
);
|
|
4839
|
-
|
|
4840
|
-
|
|
5419
|
+
let totalChars = 0;
|
|
5420
|
+
let imageCount = 0;
|
|
5421
|
+
for (const msg of messages) {
|
|
5422
|
+
const parts = normalizeContent(msg.content);
|
|
5423
|
+
for (const part of parts) {
|
|
5424
|
+
if (part.type === "text") {
|
|
5425
|
+
totalChars += part.text.length;
|
|
5426
|
+
} else if (part.type === "image") {
|
|
5427
|
+
imageCount++;
|
|
5428
|
+
}
|
|
5429
|
+
}
|
|
5430
|
+
}
|
|
5431
|
+
return Math.ceil(totalChars / FALLBACK_CHARS_PER_TOKEN) + imageCount * 1e3;
|
|
4841
5432
|
}
|
|
4842
5433
|
}
|
|
4843
5434
|
};
|
|
@@ -5366,6 +5957,7 @@ var init_gemini = __esm({
|
|
|
5366
5957
|
"src/providers/gemini.ts"() {
|
|
5367
5958
|
"use strict";
|
|
5368
5959
|
import_genai = require("@google/genai");
|
|
5960
|
+
init_messages();
|
|
5369
5961
|
init_base_provider();
|
|
5370
5962
|
init_constants2();
|
|
5371
5963
|
init_gemini_image_models();
|
|
@@ -5535,7 +6127,7 @@ var init_gemini = __esm({
|
|
|
5535
6127
|
};
|
|
5536
6128
|
return {
|
|
5537
6129
|
model: descriptor.name,
|
|
5538
|
-
contents
|
|
6130
|
+
contents,
|
|
5539
6131
|
config
|
|
5540
6132
|
};
|
|
5541
6133
|
}
|
|
@@ -5570,18 +6162,25 @@ var init_gemini = __esm({
|
|
|
5570
6162
|
if (message.role === "system") {
|
|
5571
6163
|
expandedMessages.push({
|
|
5572
6164
|
role: "user",
|
|
5573
|
-
content: message.content
|
|
6165
|
+
content: extractText(message.content)
|
|
5574
6166
|
});
|
|
5575
6167
|
expandedMessages.push({
|
|
5576
6168
|
role: "assistant",
|
|
5577
6169
|
content: "Understood."
|
|
5578
6170
|
});
|
|
5579
6171
|
} else {
|
|
5580
|
-
expandedMessages.push(
|
|
6172
|
+
expandedMessages.push({
|
|
6173
|
+
role: message.role,
|
|
6174
|
+
content: message.content
|
|
6175
|
+
});
|
|
5581
6176
|
}
|
|
5582
6177
|
}
|
|
5583
6178
|
return this.mergeConsecutiveMessages(expandedMessages);
|
|
5584
6179
|
}
|
|
6180
|
+
/**
|
|
6181
|
+
* Merge consecutive messages with the same role (required by Gemini).
|
|
6182
|
+
* Handles multimodal content by converting to Gemini's part format.
|
|
6183
|
+
*/
|
|
5585
6184
|
mergeConsecutiveMessages(messages) {
|
|
5586
6185
|
if (messages.length === 0) {
|
|
5587
6186
|
return [];
|
|
@@ -5590,15 +6189,16 @@ var init_gemini = __esm({
|
|
|
5590
6189
|
let currentGroup = null;
|
|
5591
6190
|
for (const message of messages) {
|
|
5592
6191
|
const geminiRole = GEMINI_ROLE_MAP[message.role];
|
|
6192
|
+
const geminiParts = this.convertToGeminiParts(message.content);
|
|
5593
6193
|
if (currentGroup && currentGroup.role === geminiRole) {
|
|
5594
|
-
currentGroup.parts.push(
|
|
6194
|
+
currentGroup.parts.push(...geminiParts);
|
|
5595
6195
|
} else {
|
|
5596
6196
|
if (currentGroup) {
|
|
5597
6197
|
result.push(currentGroup);
|
|
5598
6198
|
}
|
|
5599
6199
|
currentGroup = {
|
|
5600
6200
|
role: geminiRole,
|
|
5601
|
-
parts:
|
|
6201
|
+
parts: geminiParts
|
|
5602
6202
|
};
|
|
5603
6203
|
}
|
|
5604
6204
|
}
|
|
@@ -5607,11 +6207,39 @@ var init_gemini = __esm({
|
|
|
5607
6207
|
}
|
|
5608
6208
|
return result;
|
|
5609
6209
|
}
|
|
5610
|
-
|
|
5611
|
-
|
|
5612
|
-
|
|
5613
|
-
|
|
5614
|
-
|
|
6210
|
+
/**
|
|
6211
|
+
* Convert llmist content to Gemini's part format.
|
|
6212
|
+
* Handles text, images, and audio (Gemini supports all three).
|
|
6213
|
+
*/
|
|
6214
|
+
convertToGeminiParts(content) {
|
|
6215
|
+
const parts = normalizeContent(content);
|
|
6216
|
+
return parts.map((part) => {
|
|
6217
|
+
if (part.type === "text") {
|
|
6218
|
+
return { text: part.text };
|
|
6219
|
+
}
|
|
6220
|
+
if (part.type === "image") {
|
|
6221
|
+
if (part.source.type === "url") {
|
|
6222
|
+
throw new Error(
|
|
6223
|
+
"Gemini does not support image URLs directly. Please provide base64-encoded image data."
|
|
6224
|
+
);
|
|
6225
|
+
}
|
|
6226
|
+
return {
|
|
6227
|
+
inlineData: {
|
|
6228
|
+
mimeType: part.source.mediaType,
|
|
6229
|
+
data: part.source.data
|
|
6230
|
+
}
|
|
6231
|
+
};
|
|
6232
|
+
}
|
|
6233
|
+
if (part.type === "audio") {
|
|
6234
|
+
return {
|
|
6235
|
+
inlineData: {
|
|
6236
|
+
mimeType: part.source.mediaType,
|
|
6237
|
+
data: part.source.data
|
|
6238
|
+
}
|
|
6239
|
+
};
|
|
6240
|
+
}
|
|
6241
|
+
throw new Error(`Unsupported content type: ${part.type}`);
|
|
6242
|
+
});
|
|
5615
6243
|
}
|
|
5616
6244
|
buildGenerationConfig(options) {
|
|
5617
6245
|
const config = {};
|
|
@@ -5632,9 +6260,9 @@ var init_gemini = __esm({
|
|
|
5632
6260
|
async *wrapStream(iterable) {
|
|
5633
6261
|
const stream2 = iterable;
|
|
5634
6262
|
for await (const chunk of stream2) {
|
|
5635
|
-
const
|
|
5636
|
-
if (
|
|
5637
|
-
yield { text, rawEvent: chunk };
|
|
6263
|
+
const text3 = this.extractText(chunk);
|
|
6264
|
+
if (text3) {
|
|
6265
|
+
yield { text: text3, rawEvent: chunk };
|
|
5638
6266
|
}
|
|
5639
6267
|
const finishReason = this.extractFinishReason(chunk);
|
|
5640
6268
|
const usage = this.extractUsage(chunk);
|
|
@@ -5695,7 +6323,7 @@ var init_gemini = __esm({
|
|
|
5695
6323
|
try {
|
|
5696
6324
|
const response = await client.models.countTokens({
|
|
5697
6325
|
model: descriptor.name,
|
|
5698
|
-
contents
|
|
6326
|
+
contents
|
|
5699
6327
|
// Note: systemInstruction not used - it's not supported by countTokens()
|
|
5700
6328
|
// and would cause a 2100% token counting error
|
|
5701
6329
|
});
|
|
@@ -5705,8 +6333,19 @@ var init_gemini = __esm({
|
|
|
5705
6333
|
`Token counting failed for ${descriptor.name}, using fallback estimation:`,
|
|
5706
6334
|
error
|
|
5707
6335
|
);
|
|
5708
|
-
|
|
5709
|
-
|
|
6336
|
+
let totalChars = 0;
|
|
6337
|
+
let mediaCount = 0;
|
|
6338
|
+
for (const msg of messages) {
|
|
6339
|
+
const parts = normalizeContent(msg.content);
|
|
6340
|
+
for (const part of parts) {
|
|
6341
|
+
if (part.type === "text") {
|
|
6342
|
+
totalChars += part.text.length;
|
|
6343
|
+
} else if (part.type === "image" || part.type === "audio") {
|
|
6344
|
+
mediaCount++;
|
|
6345
|
+
}
|
|
6346
|
+
}
|
|
6347
|
+
}
|
|
6348
|
+
return Math.ceil(totalChars / FALLBACK_CHARS_PER_TOKEN) + mediaCount * 258;
|
|
5710
6349
|
}
|
|
5711
6350
|
}
|
|
5712
6351
|
};
|
|
@@ -6349,6 +6988,7 @@ var init_openai = __esm({
|
|
|
6349
6988
|
"use strict";
|
|
6350
6989
|
import_openai = __toESM(require("openai"), 1);
|
|
6351
6990
|
import_tiktoken = require("tiktoken");
|
|
6991
|
+
init_messages();
|
|
6352
6992
|
init_base_provider();
|
|
6353
6993
|
init_constants2();
|
|
6354
6994
|
init_openai_image_models();
|
|
@@ -6456,11 +7096,7 @@ var init_openai = __esm({
|
|
|
6456
7096
|
const sanitizedExtra = sanitizeExtra(extra, shouldIncludeTemperature);
|
|
6457
7097
|
return {
|
|
6458
7098
|
model: descriptor.name,
|
|
6459
|
-
messages: messages.map((message) => (
|
|
6460
|
-
role: ROLE_MAP[message.role],
|
|
6461
|
-
content: message.content,
|
|
6462
|
-
name: message.name
|
|
6463
|
-
})),
|
|
7099
|
+
messages: messages.map((message) => this.convertToOpenAIMessage(message)),
|
|
6464
7100
|
// Only set max_completion_tokens if explicitly provided
|
|
6465
7101
|
// Otherwise let the API use "as much as fits" in the context window
|
|
6466
7102
|
...maxTokens !== void 0 ? { max_completion_tokens: maxTokens } : {},
|
|
@@ -6472,6 +7108,77 @@ var init_openai = __esm({
|
|
|
6472
7108
|
...shouldIncludeTemperature ? { temperature } : {}
|
|
6473
7109
|
};
|
|
6474
7110
|
}
|
|
7111
|
+
/**
|
|
7112
|
+
* Convert an LLMMessage to OpenAI's ChatCompletionMessageParam.
|
|
7113
|
+
* Handles role-specific content type requirements:
|
|
7114
|
+
* - system/assistant: string content only
|
|
7115
|
+
* - user: string or multimodal array content
|
|
7116
|
+
*/
|
|
7117
|
+
convertToOpenAIMessage(message) {
|
|
7118
|
+
const role = ROLE_MAP[message.role];
|
|
7119
|
+
if (role === "user") {
|
|
7120
|
+
const content = this.convertToOpenAIContent(message.content);
|
|
7121
|
+
return {
|
|
7122
|
+
role: "user",
|
|
7123
|
+
content,
|
|
7124
|
+
...message.name ? { name: message.name } : {}
|
|
7125
|
+
};
|
|
7126
|
+
}
|
|
7127
|
+
const textContent = typeof message.content === "string" ? message.content : extractText(message.content);
|
|
7128
|
+
if (role === "system") {
|
|
7129
|
+
return {
|
|
7130
|
+
role: "system",
|
|
7131
|
+
content: textContent,
|
|
7132
|
+
...message.name ? { name: message.name } : {}
|
|
7133
|
+
};
|
|
7134
|
+
}
|
|
7135
|
+
return {
|
|
7136
|
+
role: "assistant",
|
|
7137
|
+
content: textContent,
|
|
7138
|
+
...message.name ? { name: message.name } : {}
|
|
7139
|
+
};
|
|
7140
|
+
}
|
|
7141
|
+
/**
|
|
7142
|
+
* Convert llmist content to OpenAI's content format.
|
|
7143
|
+
* Optimizes by returning string for text-only content, array for multimodal.
|
|
7144
|
+
*/
|
|
7145
|
+
convertToOpenAIContent(content) {
|
|
7146
|
+
if (typeof content === "string") {
|
|
7147
|
+
return content;
|
|
7148
|
+
}
|
|
7149
|
+
return content.map((part) => {
|
|
7150
|
+
if (part.type === "text") {
|
|
7151
|
+
return { type: "text", text: part.text };
|
|
7152
|
+
}
|
|
7153
|
+
if (part.type === "image") {
|
|
7154
|
+
return this.convertImagePart(part);
|
|
7155
|
+
}
|
|
7156
|
+
if (part.type === "audio") {
|
|
7157
|
+
throw new Error(
|
|
7158
|
+
"OpenAI chat completions do not support audio input. Use Whisper for transcription or Gemini for audio understanding."
|
|
7159
|
+
);
|
|
7160
|
+
}
|
|
7161
|
+
throw new Error(`Unsupported content type: ${part.type}`);
|
|
7162
|
+
});
|
|
7163
|
+
}
|
|
7164
|
+
/**
|
|
7165
|
+
* Convert an image content part to OpenAI's image_url format.
|
|
7166
|
+
* Supports both URLs and base64 data URLs.
|
|
7167
|
+
*/
|
|
7168
|
+
convertImagePart(part) {
|
|
7169
|
+
if (part.source.type === "url") {
|
|
7170
|
+
return {
|
|
7171
|
+
type: "image_url",
|
|
7172
|
+
image_url: { url: part.source.url }
|
|
7173
|
+
};
|
|
7174
|
+
}
|
|
7175
|
+
return {
|
|
7176
|
+
type: "image_url",
|
|
7177
|
+
image_url: {
|
|
7178
|
+
url: `data:${part.source.mediaType};base64,${part.source.data}`
|
|
7179
|
+
}
|
|
7180
|
+
};
|
|
7181
|
+
}
|
|
6475
7182
|
async executeStreamRequest(payload, signal) {
|
|
6476
7183
|
const client = this.client;
|
|
6477
7184
|
const stream2 = await client.chat.completions.create(payload, signal ? { signal } : void 0);
|
|
@@ -6480,9 +7187,9 @@ var init_openai = __esm({
|
|
|
6480
7187
|
async *wrapStream(iterable) {
|
|
6481
7188
|
const stream2 = iterable;
|
|
6482
7189
|
for await (const chunk of stream2) {
|
|
6483
|
-
const
|
|
6484
|
-
if (
|
|
6485
|
-
yield { text, rawEvent: chunk };
|
|
7190
|
+
const text3 = chunk.choices.map((choice) => choice.delta?.content ?? "").join("");
|
|
7191
|
+
if (text3) {
|
|
7192
|
+
yield { text: text3, rawEvent: chunk };
|
|
6486
7193
|
}
|
|
6487
7194
|
const finishReason = chunk.choices.find((choice) => choice.finish_reason)?.finish_reason;
|
|
6488
7195
|
const usage = chunk.usage ? {
|
|
@@ -6530,17 +7237,26 @@ var init_openai = __esm({
|
|
|
6530
7237
|
}
|
|
6531
7238
|
try {
|
|
6532
7239
|
let tokenCount = 0;
|
|
7240
|
+
let imageCount = 0;
|
|
6533
7241
|
for (const message of messages) {
|
|
6534
7242
|
tokenCount += OPENAI_MESSAGE_OVERHEAD_TOKENS;
|
|
6535
7243
|
const roleText = ROLE_MAP[message.role];
|
|
6536
7244
|
tokenCount += encoding.encode(roleText).length;
|
|
6537
|
-
|
|
7245
|
+
const textContent = extractText(message.content);
|
|
7246
|
+
tokenCount += encoding.encode(textContent).length;
|
|
7247
|
+
const parts = normalizeContent(message.content);
|
|
7248
|
+
for (const part of parts) {
|
|
7249
|
+
if (part.type === "image") {
|
|
7250
|
+
imageCount++;
|
|
7251
|
+
}
|
|
7252
|
+
}
|
|
6538
7253
|
if (message.name) {
|
|
6539
7254
|
tokenCount += encoding.encode(message.name).length;
|
|
6540
7255
|
tokenCount += OPENAI_NAME_FIELD_OVERHEAD_TOKENS;
|
|
6541
7256
|
}
|
|
6542
7257
|
}
|
|
6543
7258
|
tokenCount += OPENAI_REPLY_PRIMING_TOKENS;
|
|
7259
|
+
tokenCount += imageCount * 765;
|
|
6544
7260
|
return tokenCount;
|
|
6545
7261
|
} finally {
|
|
6546
7262
|
encoding.free();
|
|
@@ -6550,8 +7266,19 @@ var init_openai = __esm({
|
|
|
6550
7266
|
`Token counting failed for ${descriptor.name}, using fallback estimation:`,
|
|
6551
7267
|
error
|
|
6552
7268
|
);
|
|
6553
|
-
|
|
6554
|
-
|
|
7269
|
+
let totalChars = 0;
|
|
7270
|
+
let imageCount = 0;
|
|
7271
|
+
for (const msg of messages) {
|
|
7272
|
+
const parts = normalizeContent(msg.content);
|
|
7273
|
+
for (const part of parts) {
|
|
7274
|
+
if (part.type === "text") {
|
|
7275
|
+
totalChars += part.text.length;
|
|
7276
|
+
} else if (part.type === "image") {
|
|
7277
|
+
imageCount++;
|
|
7278
|
+
}
|
|
7279
|
+
}
|
|
7280
|
+
}
|
|
7281
|
+
return Math.ceil(totalChars / FALLBACK_CHARS_PER_TOKEN) + imageCount * 765;
|
|
6555
7282
|
}
|
|
6556
7283
|
}
|
|
6557
7284
|
};
|
|
@@ -6974,6 +7701,138 @@ var init_text = __esm({
|
|
|
6974
7701
|
}
|
|
6975
7702
|
});
|
|
6976
7703
|
|
|
7704
|
+
// src/core/namespaces/vision.ts
|
|
7705
|
+
var VisionNamespace;
|
|
7706
|
+
var init_vision = __esm({
|
|
7707
|
+
"src/core/namespaces/vision.ts"() {
|
|
7708
|
+
"use strict";
|
|
7709
|
+
init_input_content();
|
|
7710
|
+
init_messages();
|
|
7711
|
+
VisionNamespace = class {
|
|
7712
|
+
constructor(client) {
|
|
7713
|
+
this.client = client;
|
|
7714
|
+
}
|
|
7715
|
+
/**
|
|
7716
|
+
* Build a message builder with the image content attached.
|
|
7717
|
+
* Handles URLs, data URLs, base64 strings, and binary buffers.
|
|
7718
|
+
*/
|
|
7719
|
+
buildImageMessage(options) {
|
|
7720
|
+
const builder = new LLMMessageBuilder();
|
|
7721
|
+
if (options.systemPrompt) {
|
|
7722
|
+
builder.addSystem(options.systemPrompt);
|
|
7723
|
+
}
|
|
7724
|
+
if (typeof options.image === "string") {
|
|
7725
|
+
if (options.image.startsWith("http://") || options.image.startsWith("https://")) {
|
|
7726
|
+
builder.addUserWithImageUrl(options.prompt, options.image);
|
|
7727
|
+
} else if (isDataUrl(options.image)) {
|
|
7728
|
+
const parsed = parseDataUrl(options.image);
|
|
7729
|
+
if (!parsed) {
|
|
7730
|
+
throw new Error("Invalid data URL format");
|
|
7731
|
+
}
|
|
7732
|
+
builder.addUserWithImage(
|
|
7733
|
+
options.prompt,
|
|
7734
|
+
parsed.data,
|
|
7735
|
+
parsed.mimeType
|
|
7736
|
+
);
|
|
7737
|
+
} else {
|
|
7738
|
+
const buffer = Buffer.from(options.image, "base64");
|
|
7739
|
+
builder.addUserWithImage(options.prompt, buffer, options.mimeType);
|
|
7740
|
+
}
|
|
7741
|
+
} else {
|
|
7742
|
+
builder.addUserWithImage(options.prompt, options.image, options.mimeType);
|
|
7743
|
+
}
|
|
7744
|
+
return builder;
|
|
7745
|
+
}
|
|
7746
|
+
/**
|
|
7747
|
+
* Stream the response and collect text and usage information.
|
|
7748
|
+
*/
|
|
7749
|
+
async streamAndCollect(options, builder) {
|
|
7750
|
+
let response = "";
|
|
7751
|
+
let finalUsage;
|
|
7752
|
+
for await (const chunk of this.client.stream({
|
|
7753
|
+
model: options.model,
|
|
7754
|
+
messages: builder.build(),
|
|
7755
|
+
maxTokens: options.maxTokens,
|
|
7756
|
+
temperature: options.temperature
|
|
7757
|
+
})) {
|
|
7758
|
+
response += chunk.text;
|
|
7759
|
+
if (chunk.usage) {
|
|
7760
|
+
finalUsage = {
|
|
7761
|
+
inputTokens: chunk.usage.inputTokens,
|
|
7762
|
+
outputTokens: chunk.usage.outputTokens,
|
|
7763
|
+
totalTokens: chunk.usage.totalTokens
|
|
7764
|
+
};
|
|
7765
|
+
}
|
|
7766
|
+
}
|
|
7767
|
+
return { text: response.trim(), usage: finalUsage };
|
|
7768
|
+
}
|
|
7769
|
+
/**
|
|
7770
|
+
* Analyze an image with a vision-capable model.
|
|
7771
|
+
* Returns the analysis as a string.
|
|
7772
|
+
*
|
|
7773
|
+
* @param options - Vision analysis options
|
|
7774
|
+
* @returns Promise resolving to the analysis text
|
|
7775
|
+
* @throws Error if the image format is unsupported or model doesn't support vision
|
|
7776
|
+
*
|
|
7777
|
+
* @example
|
|
7778
|
+
* ```typescript
|
|
7779
|
+
* // From file
|
|
7780
|
+
* const result = await llmist.vision.analyze({
|
|
7781
|
+
* model: "gpt-4o",
|
|
7782
|
+
* image: await fs.readFile("photo.jpg"),
|
|
7783
|
+
* prompt: "What's in this image?",
|
|
7784
|
+
* });
|
|
7785
|
+
*
|
|
7786
|
+
* // From URL (OpenAI only)
|
|
7787
|
+
* const result = await llmist.vision.analyze({
|
|
7788
|
+
* model: "gpt-4o",
|
|
7789
|
+
* image: "https://example.com/image.jpg",
|
|
7790
|
+
* prompt: "Describe this image",
|
|
7791
|
+
* });
|
|
7792
|
+
* ```
|
|
7793
|
+
*/
|
|
7794
|
+
async analyze(options) {
|
|
7795
|
+
const builder = this.buildImageMessage(options);
|
|
7796
|
+
const { text: text3 } = await this.streamAndCollect(options, builder);
|
|
7797
|
+
return text3;
|
|
7798
|
+
}
|
|
7799
|
+
/**
|
|
7800
|
+
* Analyze an image and return detailed result with usage info.
|
|
7801
|
+
*
|
|
7802
|
+
* @param options - Vision analysis options
|
|
7803
|
+
* @returns Promise resolving to the analysis result with usage info
|
|
7804
|
+
*/
|
|
7805
|
+
async analyzeWithUsage(options) {
|
|
7806
|
+
const builder = this.buildImageMessage(options);
|
|
7807
|
+
const { text: text3, usage } = await this.streamAndCollect(options, builder);
|
|
7808
|
+
return {
|
|
7809
|
+
text: text3,
|
|
7810
|
+
model: options.model,
|
|
7811
|
+
usage
|
|
7812
|
+
};
|
|
7813
|
+
}
|
|
7814
|
+
/**
|
|
7815
|
+
* Check if a model supports vision/image input.
|
|
7816
|
+
*
|
|
7817
|
+
* @param modelId - Model ID to check
|
|
7818
|
+
* @returns True if the model supports vision
|
|
7819
|
+
*/
|
|
7820
|
+
supportsModel(modelId) {
|
|
7821
|
+
const spec = this.client.modelRegistry.getModelSpec(modelId);
|
|
7822
|
+
return spec?.features?.vision === true;
|
|
7823
|
+
}
|
|
7824
|
+
/**
|
|
7825
|
+
* List all models that support vision.
|
|
7826
|
+
*
|
|
7827
|
+
* @returns Array of model IDs that support vision
|
|
7828
|
+
*/
|
|
7829
|
+
listModels() {
|
|
7830
|
+
return this.client.modelRegistry.listModels().filter((spec) => spec.features?.vision === true).map((spec) => spec.modelId);
|
|
7831
|
+
}
|
|
7832
|
+
};
|
|
7833
|
+
}
|
|
7834
|
+
});
|
|
7835
|
+
|
|
6977
7836
|
// src/core/options.ts
|
|
6978
7837
|
var ModelIdentifierParser;
|
|
6979
7838
|
var init_options = __esm({
|
|
@@ -7018,6 +7877,7 @@ var init_client = __esm({
|
|
|
7018
7877
|
init_image();
|
|
7019
7878
|
init_speech();
|
|
7020
7879
|
init_text();
|
|
7880
|
+
init_vision();
|
|
7021
7881
|
init_options();
|
|
7022
7882
|
init_quick_methods();
|
|
7023
7883
|
LLMist = class _LLMist {
|
|
@@ -7029,6 +7889,7 @@ var init_client = __esm({
|
|
|
7029
7889
|
text;
|
|
7030
7890
|
image;
|
|
7031
7891
|
speech;
|
|
7892
|
+
vision;
|
|
7032
7893
|
constructor(...args) {
|
|
7033
7894
|
let adapters = [];
|
|
7034
7895
|
let defaultProvider;
|
|
@@ -7079,6 +7940,7 @@ var init_client = __esm({
|
|
|
7079
7940
|
this.text = new TextNamespace(this);
|
|
7080
7941
|
this.image = new ImageNamespace(this.adapters, this.defaultProvider);
|
|
7081
7942
|
this.speech = new SpeechNamespace(this.adapters, this.defaultProvider);
|
|
7943
|
+
this.vision = new VisionNamespace(this);
|
|
7082
7944
|
}
|
|
7083
7945
|
stream(options) {
|
|
7084
7946
|
const descriptor = this.parser.parse(options.model);
|
|
@@ -7263,6 +8125,7 @@ var init_builder = __esm({
|
|
|
7263
8125
|
"src/agent/builder.ts"() {
|
|
7264
8126
|
"use strict";
|
|
7265
8127
|
init_constants();
|
|
8128
|
+
init_input_content();
|
|
7266
8129
|
init_model_shortcuts();
|
|
7267
8130
|
init_registry();
|
|
7268
8131
|
init_agent();
|
|
@@ -7910,13 +8773,17 @@ ${endPrefix}`
|
|
|
7910
8773
|
* }
|
|
7911
8774
|
* ```
|
|
7912
8775
|
*/
|
|
7913
|
-
|
|
8776
|
+
/**
|
|
8777
|
+
* Build AgentOptions with the given user prompt.
|
|
8778
|
+
* Centralizes options construction for ask(), askWithImage(), and askWithContent().
|
|
8779
|
+
*/
|
|
8780
|
+
buildAgentOptions(userPrompt) {
|
|
7914
8781
|
if (!this.client) {
|
|
7915
8782
|
const { LLMist: LLMistClass } = (init_client(), __toCommonJS(client_exports));
|
|
7916
8783
|
this.client = new LLMistClass();
|
|
7917
8784
|
}
|
|
7918
8785
|
const registry = GadgetRegistry.from(this.gadgets);
|
|
7919
|
-
|
|
8786
|
+
return {
|
|
7920
8787
|
client: this.client,
|
|
7921
8788
|
model: this.model ?? "openai:gpt-5-nano",
|
|
7922
8789
|
systemPrompt: this.systemPrompt,
|
|
@@ -7942,6 +8809,83 @@ ${endPrefix}`
|
|
|
7942
8809
|
compactionConfig: this.compactionConfig,
|
|
7943
8810
|
signal: this.signal
|
|
7944
8811
|
};
|
|
8812
|
+
}
|
|
8813
|
+
ask(userPrompt) {
|
|
8814
|
+
const options = this.buildAgentOptions(userPrompt);
|
|
8815
|
+
return new Agent(AGENT_INTERNAL_KEY, options);
|
|
8816
|
+
}
|
|
8817
|
+
/**
|
|
8818
|
+
* Build and create the agent with a multimodal user prompt (text + image).
|
|
8819
|
+
* Returns the Agent instance ready to run.
|
|
8820
|
+
*
|
|
8821
|
+
* @param textPrompt - Text prompt describing what to do with the image
|
|
8822
|
+
* @param imageData - Image data (Buffer, Uint8Array, or base64 string)
|
|
8823
|
+
* @param mimeType - Optional MIME type (auto-detected if not provided)
|
|
8824
|
+
* @returns Configured Agent instance
|
|
8825
|
+
*
|
|
8826
|
+
* @example
|
|
8827
|
+
* ```typescript
|
|
8828
|
+
* const agent = LLMist.createAgent()
|
|
8829
|
+
* .withModel("gpt-4o")
|
|
8830
|
+
* .withSystem("You analyze images")
|
|
8831
|
+
* .askWithImage(
|
|
8832
|
+
* "What's in this image?",
|
|
8833
|
+
* await fs.readFile("photo.jpg")
|
|
8834
|
+
* );
|
|
8835
|
+
*
|
|
8836
|
+
* for await (const event of agent.run()) {
|
|
8837
|
+
* // handle events
|
|
8838
|
+
* }
|
|
8839
|
+
* ```
|
|
8840
|
+
*/
|
|
8841
|
+
askWithImage(textPrompt, imageData, mimeType) {
|
|
8842
|
+
const imageBuffer = typeof imageData === "string" ? Buffer.from(imageData, "base64") : imageData;
|
|
8843
|
+
const detectedMime = mimeType ?? detectImageMimeType(imageBuffer);
|
|
8844
|
+
if (!detectedMime) {
|
|
8845
|
+
throw new Error(
|
|
8846
|
+
"Could not detect image MIME type. Please provide the mimeType parameter explicitly."
|
|
8847
|
+
);
|
|
8848
|
+
}
|
|
8849
|
+
const userContent = [
|
|
8850
|
+
text(textPrompt),
|
|
8851
|
+
{
|
|
8852
|
+
type: "image",
|
|
8853
|
+
source: {
|
|
8854
|
+
type: "base64",
|
|
8855
|
+
mediaType: detectedMime,
|
|
8856
|
+
data: toBase64(imageBuffer)
|
|
8857
|
+
}
|
|
8858
|
+
}
|
|
8859
|
+
];
|
|
8860
|
+
const options = this.buildAgentOptions(userContent);
|
|
8861
|
+
return new Agent(AGENT_INTERNAL_KEY, options);
|
|
8862
|
+
}
|
|
8863
|
+
/**
|
|
8864
|
+
* Build and return an Agent configured with multimodal content.
|
|
8865
|
+
* More flexible than askWithImage - accepts any combination of content parts.
|
|
8866
|
+
*
|
|
8867
|
+
* @param content - Array of content parts (text, images, audio)
|
|
8868
|
+
* @returns A configured Agent ready for execution
|
|
8869
|
+
*
|
|
8870
|
+
* @example
|
|
8871
|
+
* ```typescript
|
|
8872
|
+
* import { text, imageFromBuffer, audioFromBuffer } from "llmist";
|
|
8873
|
+
*
|
|
8874
|
+
* const agent = LLMist.createAgent()
|
|
8875
|
+
* .withModel("gemini:gemini-2.5-flash")
|
|
8876
|
+
* .askWithContent([
|
|
8877
|
+
* text("Describe this image and transcribe the audio:"),
|
|
8878
|
+
* imageFromBuffer(imageData),
|
|
8879
|
+
* audioFromBuffer(audioData),
|
|
8880
|
+
* ]);
|
|
8881
|
+
*
|
|
8882
|
+
* for await (const event of agent.run()) {
|
|
8883
|
+
* // handle events
|
|
8884
|
+
* }
|
|
8885
|
+
* ```
|
|
8886
|
+
*/
|
|
8887
|
+
askWithContent(content) {
|
|
8888
|
+
const options = this.buildAgentOptions(content);
|
|
7945
8889
|
return new Agent(AGENT_INTERNAL_KEY, options);
|
|
7946
8890
|
}
|
|
7947
8891
|
/**
|
|
@@ -8087,6 +9031,8 @@ __export(index_exports, {
|
|
|
8087
9031
|
StreamParser: () => StreamParser,
|
|
8088
9032
|
StreamProcessor: () => StreamProcessor,
|
|
8089
9033
|
SummarizationStrategy: () => SummarizationStrategy,
|
|
9034
|
+
audioFromBase64: () => audioFromBase64,
|
|
9035
|
+
audioFromBuffer: () => audioFromBuffer,
|
|
8090
9036
|
collectEvents: () => collectEvents,
|
|
8091
9037
|
collectText: () => collectText,
|
|
8092
9038
|
complete: () => complete,
|
|
@@ -8102,20 +9048,34 @@ __export(index_exports, {
|
|
|
8102
9048
|
createOpenAIProviderFromEnv: () => createOpenAIProviderFromEnv,
|
|
8103
9049
|
createTextMockStream: () => createTextMockStream,
|
|
8104
9050
|
defaultLogger: () => defaultLogger,
|
|
9051
|
+
detectAudioMimeType: () => detectAudioMimeType,
|
|
9052
|
+
detectImageMimeType: () => detectImageMimeType,
|
|
8105
9053
|
discoverProviderAdapters: () => discoverProviderAdapters,
|
|
9054
|
+
extractText: () => extractText,
|
|
8106
9055
|
getMockManager: () => getMockManager,
|
|
8107
9056
|
getModelId: () => getModelId,
|
|
8108
9057
|
getProvider: () => getProvider,
|
|
8109
9058
|
hasProviderPrefix: () => hasProviderPrefix,
|
|
9059
|
+
imageFromBase64: () => imageFromBase64,
|
|
9060
|
+
imageFromBuffer: () => imageFromBuffer,
|
|
9061
|
+
imageFromUrl: () => imageFromUrl,
|
|
9062
|
+
isAudioPart: () => isAudioPart,
|
|
9063
|
+
isDataUrl: () => isDataUrl,
|
|
9064
|
+
isImagePart: () => isImagePart,
|
|
9065
|
+
isTextPart: () => isTextPart,
|
|
8110
9066
|
iterationProgressHint: () => iterationProgressHint,
|
|
8111
9067
|
mockLLM: () => mockLLM,
|
|
9068
|
+
normalizeContent: () => normalizeContent,
|
|
8112
9069
|
parallelGadgetHint: () => parallelGadgetHint,
|
|
9070
|
+
parseDataUrl: () => parseDataUrl,
|
|
8113
9071
|
resolveHintTemplate: () => resolveHintTemplate,
|
|
8114
9072
|
resolveModel: () => resolveModel,
|
|
8115
9073
|
resolvePromptTemplate: () => resolvePromptTemplate,
|
|
8116
9074
|
resolveRulesTemplate: () => resolveRulesTemplate,
|
|
8117
9075
|
runWithHandlers: () => runWithHandlers,
|
|
8118
9076
|
stream: () => stream,
|
|
9077
|
+
text: () => text,
|
|
9078
|
+
toBase64: () => toBase64,
|
|
8119
9079
|
validateAndApplyDefaults: () => validateAndApplyDefaults,
|
|
8120
9080
|
validateGadgetParams: () => validateGadgetParams,
|
|
8121
9081
|
z: () => import_zod2.z
|
|
@@ -9016,6 +9976,7 @@ function createHints(config) {
|
|
|
9016
9976
|
|
|
9017
9977
|
// src/index.ts
|
|
9018
9978
|
init_client();
|
|
9979
|
+
init_input_content();
|
|
9019
9980
|
init_messages();
|
|
9020
9981
|
init_model_registry();
|
|
9021
9982
|
init_model_shortcuts();
|
|
@@ -9263,9 +10224,9 @@ function sleep(ms) {
|
|
|
9263
10224
|
function generateInvocationId() {
|
|
9264
10225
|
return `inv-${Date.now()}-${Math.random().toString(36).substring(2, 9)}`;
|
|
9265
10226
|
}
|
|
9266
|
-
function splitIntoChunks(
|
|
10227
|
+
function splitIntoChunks(text3, minChunkSize = 5, maxChunkSize = 30) {
|
|
9267
10228
|
const chunks = [];
|
|
9268
|
-
let remaining =
|
|
10229
|
+
let remaining = text3;
|
|
9269
10230
|
while (remaining.length > 0) {
|
|
9270
10231
|
const chunkSize = Math.min(
|
|
9271
10232
|
Math.floor(Math.random() * (maxChunkSize - minChunkSize + 1)) + minChunkSize,
|
|
@@ -9324,17 +10285,17 @@ ${String(value)}
|
|
|
9324
10285
|
return result;
|
|
9325
10286
|
}
|
|
9326
10287
|
function formatGadgetCalls(gadgetCalls) {
|
|
9327
|
-
let
|
|
10288
|
+
let text3 = "";
|
|
9328
10289
|
const calls = [];
|
|
9329
10290
|
for (const call of gadgetCalls) {
|
|
9330
10291
|
const invocationId = call.invocationId ?? generateInvocationId();
|
|
9331
10292
|
calls.push({ name: call.gadgetName, invocationId });
|
|
9332
10293
|
const blockParams = serializeToBlockFormat(call.parameters);
|
|
9333
|
-
|
|
10294
|
+
text3 += `
|
|
9334
10295
|
${GADGET_START_PREFIX}${call.gadgetName}
|
|
9335
10296
|
${blockParams}${GADGET_END_PREFIX}`;
|
|
9336
10297
|
}
|
|
9337
|
-
return { text, calls };
|
|
10298
|
+
return { text: text3, calls };
|
|
9338
10299
|
}
|
|
9339
10300
|
async function* createMockStream(response) {
|
|
9340
10301
|
if (response.delayMs) {
|
|
@@ -9374,9 +10335,9 @@ async function* createMockStream(response) {
|
|
|
9374
10335
|
};
|
|
9375
10336
|
}
|
|
9376
10337
|
}
|
|
9377
|
-
function createTextMockStream(
|
|
10338
|
+
function createTextMockStream(text3, options) {
|
|
9378
10339
|
return createMockStream({
|
|
9379
|
-
text,
|
|
10340
|
+
text: text3,
|
|
9380
10341
|
delayMs: options?.delayMs,
|
|
9381
10342
|
streamDelayMs: options?.streamDelayMs,
|
|
9382
10343
|
usage: options?.usage,
|
|
@@ -9393,10 +10354,10 @@ var MockProviderAdapter = class {
|
|
|
9393
10354
|
constructor(options) {
|
|
9394
10355
|
this.mockManager = getMockManager(options);
|
|
9395
10356
|
}
|
|
9396
|
-
supports(
|
|
10357
|
+
supports(_descriptor) {
|
|
9397
10358
|
return true;
|
|
9398
10359
|
}
|
|
9399
|
-
stream(options, descriptor,
|
|
10360
|
+
stream(options, descriptor, _spec) {
|
|
9400
10361
|
const context = {
|
|
9401
10362
|
model: options.model,
|
|
9402
10363
|
provider: descriptor.provider,
|
|
@@ -9407,20 +10368,154 @@ var MockProviderAdapter = class {
|
|
|
9407
10368
|
return this.createMockStreamFromContext(context);
|
|
9408
10369
|
}
|
|
9409
10370
|
async *createMockStreamFromContext(context) {
|
|
9410
|
-
|
|
9411
|
-
|
|
9412
|
-
|
|
9413
|
-
|
|
9414
|
-
|
|
9415
|
-
|
|
9416
|
-
|
|
9417
|
-
|
|
9418
|
-
|
|
9419
|
-
|
|
9420
|
-
|
|
9421
|
-
|
|
9422
|
-
|
|
10371
|
+
const mockResponse = await this.mockManager.findMatch(context);
|
|
10372
|
+
if (!mockResponse) {
|
|
10373
|
+
yield {
|
|
10374
|
+
text: "",
|
|
10375
|
+
finishReason: "stop",
|
|
10376
|
+
usage: { inputTokens: 0, outputTokens: 0, totalTokens: 0 }
|
|
10377
|
+
};
|
|
10378
|
+
return;
|
|
10379
|
+
}
|
|
10380
|
+
yield* createMockStream(mockResponse);
|
|
10381
|
+
}
|
|
10382
|
+
// ==========================================================================
|
|
10383
|
+
// Image Generation Support
|
|
10384
|
+
// ==========================================================================
|
|
10385
|
+
/**
|
|
10386
|
+
* Check if this adapter supports image generation for a given model.
|
|
10387
|
+
* Returns true if there's a registered mock with images for this model.
|
|
10388
|
+
*/
|
|
10389
|
+
supportsImageGeneration(_modelId) {
|
|
10390
|
+
return true;
|
|
10391
|
+
}
|
|
10392
|
+
/**
|
|
10393
|
+
* Generate mock images based on registered mocks.
|
|
10394
|
+
*
|
|
10395
|
+
* @param options - Image generation options
|
|
10396
|
+
* @returns Mock image generation result
|
|
10397
|
+
*/
|
|
10398
|
+
async generateImage(options) {
|
|
10399
|
+
const context = {
|
|
10400
|
+
model: options.model,
|
|
10401
|
+
provider: "mock",
|
|
10402
|
+
modelName: options.model,
|
|
10403
|
+
options: {
|
|
10404
|
+
model: options.model,
|
|
10405
|
+
messages: [{ role: "user", content: options.prompt }]
|
|
10406
|
+
},
|
|
10407
|
+
messages: [{ role: "user", content: options.prompt }]
|
|
10408
|
+
};
|
|
10409
|
+
const mockResponse = await this.mockManager.findMatch(context);
|
|
10410
|
+
if (!mockResponse?.images || mockResponse.images.length === 0) {
|
|
10411
|
+
throw new Error(
|
|
10412
|
+
`No mock registered for image generation with model "${options.model}". Use mockLLM().forModel("${options.model}").returnsImage(...).register() to add one.`
|
|
10413
|
+
);
|
|
10414
|
+
}
|
|
10415
|
+
return this.createImageResult(options, mockResponse);
|
|
10416
|
+
}
|
|
10417
|
+
/**
|
|
10418
|
+
* Transform mock response into ImageGenerationResult format.
|
|
10419
|
+
*
|
|
10420
|
+
* @param options - Original image generation options
|
|
10421
|
+
* @param mockResponse - Mock response containing image data
|
|
10422
|
+
* @returns ImageGenerationResult with mock data and zero cost
|
|
10423
|
+
*/
|
|
10424
|
+
createImageResult(options, mockResponse) {
|
|
10425
|
+
const images = mockResponse.images ?? [];
|
|
10426
|
+
return {
|
|
10427
|
+
images: images.map((img) => ({
|
|
10428
|
+
b64Json: img.data,
|
|
10429
|
+
revisedPrompt: img.revisedPrompt
|
|
10430
|
+
})),
|
|
10431
|
+
model: options.model,
|
|
10432
|
+
usage: {
|
|
10433
|
+
imagesGenerated: images.length,
|
|
10434
|
+
size: options.size ?? "1024x1024",
|
|
10435
|
+
quality: options.quality ?? "standard"
|
|
10436
|
+
},
|
|
10437
|
+
cost: 0
|
|
10438
|
+
// Mock cost is always 0
|
|
10439
|
+
};
|
|
10440
|
+
}
|
|
10441
|
+
// ==========================================================================
|
|
10442
|
+
// Speech Generation Support
|
|
10443
|
+
// ==========================================================================
|
|
10444
|
+
/**
|
|
10445
|
+
* Check if this adapter supports speech generation for a given model.
|
|
10446
|
+
* Returns true if there's a registered mock with audio for this model.
|
|
10447
|
+
*/
|
|
10448
|
+
supportsSpeechGeneration(_modelId) {
|
|
10449
|
+
return true;
|
|
10450
|
+
}
|
|
10451
|
+
/**
|
|
10452
|
+
* Generate mock speech based on registered mocks.
|
|
10453
|
+
*
|
|
10454
|
+
* @param options - Speech generation options
|
|
10455
|
+
* @returns Mock speech generation result
|
|
10456
|
+
*/
|
|
10457
|
+
async generateSpeech(options) {
|
|
10458
|
+
const context = {
|
|
10459
|
+
model: options.model,
|
|
10460
|
+
provider: "mock",
|
|
10461
|
+
modelName: options.model,
|
|
10462
|
+
options: {
|
|
10463
|
+
model: options.model,
|
|
10464
|
+
messages: [{ role: "user", content: options.input }]
|
|
10465
|
+
},
|
|
10466
|
+
messages: [{ role: "user", content: options.input }]
|
|
10467
|
+
};
|
|
10468
|
+
const mockResponse = await this.mockManager.findMatch(context);
|
|
10469
|
+
if (!mockResponse?.audio) {
|
|
10470
|
+
throw new Error(
|
|
10471
|
+
`No mock registered for speech generation with model "${options.model}". Use mockLLM().forModel("${options.model}").returnsAudio(...).register() to add one.`
|
|
10472
|
+
);
|
|
9423
10473
|
}
|
|
10474
|
+
return this.createSpeechResult(options, mockResponse);
|
|
10475
|
+
}
|
|
10476
|
+
/**
|
|
10477
|
+
* Transform mock response into SpeechGenerationResult format.
|
|
10478
|
+
* Converts base64 audio data to ArrayBuffer.
|
|
10479
|
+
*
|
|
10480
|
+
* @param options - Original speech generation options
|
|
10481
|
+
* @param mockResponse - Mock response containing audio data
|
|
10482
|
+
* @returns SpeechGenerationResult with mock data and zero cost
|
|
10483
|
+
*/
|
|
10484
|
+
createSpeechResult(options, mockResponse) {
|
|
10485
|
+
const audio = mockResponse.audio;
|
|
10486
|
+
const binaryString = atob(audio.data);
|
|
10487
|
+
const bytes = new Uint8Array(binaryString.length);
|
|
10488
|
+
for (let i = 0; i < binaryString.length; i++) {
|
|
10489
|
+
bytes[i] = binaryString.charCodeAt(i);
|
|
10490
|
+
}
|
|
10491
|
+
const format = this.mimeTypeToAudioFormat(audio.mimeType);
|
|
10492
|
+
return {
|
|
10493
|
+
audio: bytes.buffer,
|
|
10494
|
+
model: options.model,
|
|
10495
|
+
usage: {
|
|
10496
|
+
characterCount: options.input.length
|
|
10497
|
+
},
|
|
10498
|
+
cost: 0,
|
|
10499
|
+
// Mock cost is always 0
|
|
10500
|
+
format
|
|
10501
|
+
};
|
|
10502
|
+
}
|
|
10503
|
+
/**
|
|
10504
|
+
* Map MIME type to audio format for SpeechGenerationResult.
|
|
10505
|
+
* Defaults to "mp3" for unknown MIME types.
|
|
10506
|
+
*
|
|
10507
|
+
* @param mimeType - Audio MIME type string
|
|
10508
|
+
* @returns Audio format identifier
|
|
10509
|
+
*/
|
|
10510
|
+
mimeTypeToAudioFormat(mimeType) {
|
|
10511
|
+
const mapping = {
|
|
10512
|
+
"audio/mp3": "mp3",
|
|
10513
|
+
"audio/mpeg": "mp3",
|
|
10514
|
+
"audio/wav": "wav",
|
|
10515
|
+
"audio/webm": "opus",
|
|
10516
|
+
"audio/ogg": "opus"
|
|
10517
|
+
};
|
|
10518
|
+
return mapping[mimeType] ?? "mp3";
|
|
9424
10519
|
}
|
|
9425
10520
|
};
|
|
9426
10521
|
function createMockAdapter(options) {
|
|
@@ -9428,6 +10523,20 @@ function createMockAdapter(options) {
|
|
|
9428
10523
|
}
|
|
9429
10524
|
|
|
9430
10525
|
// src/testing/mock-builder.ts
|
|
10526
|
+
init_input_content();
|
|
10527
|
+
init_messages();
|
|
10528
|
+
function hasImageContent(content) {
|
|
10529
|
+
if (typeof content === "string") return false;
|
|
10530
|
+
return content.some((part) => isImagePart(part));
|
|
10531
|
+
}
|
|
10532
|
+
function hasAudioContent(content) {
|
|
10533
|
+
if (typeof content === "string") return false;
|
|
10534
|
+
return content.some((part) => isAudioPart(part));
|
|
10535
|
+
}
|
|
10536
|
+
function countImages(content) {
|
|
10537
|
+
if (typeof content === "string") return 0;
|
|
10538
|
+
return content.filter((part) => isImagePart(part)).length;
|
|
10539
|
+
}
|
|
9431
10540
|
var MockBuilder = class {
|
|
9432
10541
|
matchers = [];
|
|
9433
10542
|
response = {};
|
|
@@ -9490,9 +10599,9 @@ var MockBuilder = class {
|
|
|
9490
10599
|
* @example
|
|
9491
10600
|
* mockLLM().whenMessageContains('hello')
|
|
9492
10601
|
*/
|
|
9493
|
-
whenMessageContains(
|
|
10602
|
+
whenMessageContains(text3) {
|
|
9494
10603
|
this.matchers.push(
|
|
9495
|
-
(ctx) => ctx.messages.some((msg) => msg.content
|
|
10604
|
+
(ctx) => ctx.messages.some((msg) => extractText(msg.content).toLowerCase().includes(text3.toLowerCase()))
|
|
9496
10605
|
);
|
|
9497
10606
|
return this;
|
|
9498
10607
|
}
|
|
@@ -9502,10 +10611,11 @@ var MockBuilder = class {
|
|
|
9502
10611
|
* @example
|
|
9503
10612
|
* mockLLM().whenLastMessageContains('goodbye')
|
|
9504
10613
|
*/
|
|
9505
|
-
whenLastMessageContains(
|
|
10614
|
+
whenLastMessageContains(text3) {
|
|
9506
10615
|
this.matchers.push((ctx) => {
|
|
9507
10616
|
const lastMsg = ctx.messages[ctx.messages.length - 1];
|
|
9508
|
-
|
|
10617
|
+
if (!lastMsg) return false;
|
|
10618
|
+
return extractText(lastMsg.content).toLowerCase().includes(text3.toLowerCase());
|
|
9509
10619
|
});
|
|
9510
10620
|
return this;
|
|
9511
10621
|
}
|
|
@@ -9516,7 +10626,7 @@ var MockBuilder = class {
|
|
|
9516
10626
|
* mockLLM().whenMessageMatches(/calculate \d+/)
|
|
9517
10627
|
*/
|
|
9518
10628
|
whenMessageMatches(regex) {
|
|
9519
|
-
this.matchers.push((ctx) => ctx.messages.some((msg) => regex.test(msg.content
|
|
10629
|
+
this.matchers.push((ctx) => ctx.messages.some((msg) => regex.test(extractText(msg.content))));
|
|
9520
10630
|
return this;
|
|
9521
10631
|
}
|
|
9522
10632
|
/**
|
|
@@ -9525,10 +10635,10 @@ var MockBuilder = class {
|
|
|
9525
10635
|
* @example
|
|
9526
10636
|
* mockLLM().whenRoleContains('system', 'You are a helpful assistant')
|
|
9527
10637
|
*/
|
|
9528
|
-
whenRoleContains(role,
|
|
10638
|
+
whenRoleContains(role, text3) {
|
|
9529
10639
|
this.matchers.push(
|
|
9530
10640
|
(ctx) => ctx.messages.some(
|
|
9531
|
-
(msg) => msg.role === role && msg.content
|
|
10641
|
+
(msg) => msg.role === role && extractText(msg.content).toLowerCase().includes(text3.toLowerCase())
|
|
9532
10642
|
)
|
|
9533
10643
|
);
|
|
9534
10644
|
return this;
|
|
@@ -9556,6 +10666,43 @@ var MockBuilder = class {
|
|
|
9556
10666
|
this.matchers.push(matcher);
|
|
9557
10667
|
return this;
|
|
9558
10668
|
}
|
|
10669
|
+
// ==========================================================================
|
|
10670
|
+
// Multimodal Matchers
|
|
10671
|
+
// ==========================================================================
|
|
10672
|
+
/**
|
|
10673
|
+
* Match when any message contains an image.
|
|
10674
|
+
*
|
|
10675
|
+
* @example
|
|
10676
|
+
* mockLLM().whenMessageHasImage().returns("I see an image of a sunset.")
|
|
10677
|
+
*/
|
|
10678
|
+
whenMessageHasImage() {
|
|
10679
|
+
this.matchers.push((ctx) => ctx.messages.some((msg) => hasImageContent(msg.content)));
|
|
10680
|
+
return this;
|
|
10681
|
+
}
|
|
10682
|
+
/**
|
|
10683
|
+
* Match when any message contains audio.
|
|
10684
|
+
*
|
|
10685
|
+
* @example
|
|
10686
|
+
* mockLLM().whenMessageHasAudio().returns("I hear music playing.")
|
|
10687
|
+
*/
|
|
10688
|
+
whenMessageHasAudio() {
|
|
10689
|
+
this.matchers.push((ctx) => ctx.messages.some((msg) => hasAudioContent(msg.content)));
|
|
10690
|
+
return this;
|
|
10691
|
+
}
|
|
10692
|
+
/**
|
|
10693
|
+
* Match based on the number of images in the last message.
|
|
10694
|
+
*
|
|
10695
|
+
* @example
|
|
10696
|
+
* mockLLM().whenImageCount((n) => n >= 2).returns("Comparing multiple images...")
|
|
10697
|
+
*/
|
|
10698
|
+
whenImageCount(predicate) {
|
|
10699
|
+
this.matchers.push((ctx) => {
|
|
10700
|
+
const lastMsg = ctx.messages[ctx.messages.length - 1];
|
|
10701
|
+
if (!lastMsg) return false;
|
|
10702
|
+
return predicate(countImages(lastMsg.content));
|
|
10703
|
+
});
|
|
10704
|
+
return this;
|
|
10705
|
+
}
|
|
9559
10706
|
/**
|
|
9560
10707
|
* Set the text response to return.
|
|
9561
10708
|
* Can be a static string or a function that returns a string dynamically.
|
|
@@ -9565,17 +10712,17 @@ var MockBuilder = class {
|
|
|
9565
10712
|
* mockLLM().returns(() => `Response at ${Date.now()}`)
|
|
9566
10713
|
* mockLLM().returns((ctx) => `You said: ${ctx.messages[0]?.content}`)
|
|
9567
10714
|
*/
|
|
9568
|
-
returns(
|
|
9569
|
-
if (typeof
|
|
10715
|
+
returns(text3) {
|
|
10716
|
+
if (typeof text3 === "function") {
|
|
9570
10717
|
this.response = async (ctx) => {
|
|
9571
|
-
const resolvedText = await Promise.resolve().then(() =>
|
|
10718
|
+
const resolvedText = await Promise.resolve().then(() => text3(ctx));
|
|
9572
10719
|
return { text: resolvedText };
|
|
9573
10720
|
};
|
|
9574
10721
|
} else {
|
|
9575
10722
|
if (typeof this.response === "function") {
|
|
9576
10723
|
throw new Error("Cannot use returns() after withResponse() with a function");
|
|
9577
10724
|
}
|
|
9578
|
-
this.response.text =
|
|
10725
|
+
this.response.text = text3;
|
|
9579
10726
|
}
|
|
9580
10727
|
return this;
|
|
9581
10728
|
}
|
|
@@ -9612,6 +10759,112 @@ var MockBuilder = class {
|
|
|
9612
10759
|
this.response.gadgetCalls.push({ gadgetName, parameters });
|
|
9613
10760
|
return this;
|
|
9614
10761
|
}
|
|
10762
|
+
// ==========================================================================
|
|
10763
|
+
// Multimodal Response Helpers
|
|
10764
|
+
// ==========================================================================
|
|
10765
|
+
/**
|
|
10766
|
+
* Return a single image in the response.
|
|
10767
|
+
* Useful for mocking image generation endpoints.
|
|
10768
|
+
*
|
|
10769
|
+
* @param data - Image data (base64 string or Buffer)
|
|
10770
|
+
* @param mimeType - MIME type (auto-detected if Buffer provided without type)
|
|
10771
|
+
*
|
|
10772
|
+
* @example
|
|
10773
|
+
* mockLLM()
|
|
10774
|
+
* .forModel('dall-e-3')
|
|
10775
|
+
* .returnsImage(pngBuffer)
|
|
10776
|
+
* .register();
|
|
10777
|
+
*/
|
|
10778
|
+
returnsImage(data, mimeType) {
|
|
10779
|
+
if (typeof this.response === "function") {
|
|
10780
|
+
throw new Error("Cannot use returnsImage() after withResponse() with a function");
|
|
10781
|
+
}
|
|
10782
|
+
let imageData;
|
|
10783
|
+
let imageMime;
|
|
10784
|
+
if (typeof data === "string") {
|
|
10785
|
+
imageData = data;
|
|
10786
|
+
if (!mimeType) {
|
|
10787
|
+
throw new Error("MIME type is required when providing base64 string data");
|
|
10788
|
+
}
|
|
10789
|
+
imageMime = mimeType;
|
|
10790
|
+
} else {
|
|
10791
|
+
imageData = toBase64(data);
|
|
10792
|
+
const detected = mimeType ?? detectImageMimeType(data);
|
|
10793
|
+
if (!detected) {
|
|
10794
|
+
throw new Error(
|
|
10795
|
+
"Could not detect image MIME type. Please provide the mimeType parameter explicitly."
|
|
10796
|
+
);
|
|
10797
|
+
}
|
|
10798
|
+
imageMime = detected;
|
|
10799
|
+
}
|
|
10800
|
+
if (!this.response.images) {
|
|
10801
|
+
this.response.images = [];
|
|
10802
|
+
}
|
|
10803
|
+
this.response.images.push({ data: imageData, mimeType: imageMime });
|
|
10804
|
+
return this;
|
|
10805
|
+
}
|
|
10806
|
+
/**
|
|
10807
|
+
* Return multiple images in the response.
|
|
10808
|
+
*
|
|
10809
|
+
* @example
|
|
10810
|
+
* mockLLM()
|
|
10811
|
+
* .forModel('dall-e-3')
|
|
10812
|
+
* .returnsImages([
|
|
10813
|
+
* { data: pngBuffer1 },
|
|
10814
|
+
* { data: pngBuffer2 },
|
|
10815
|
+
* ])
|
|
10816
|
+
* .register();
|
|
10817
|
+
*/
|
|
10818
|
+
returnsImages(images) {
|
|
10819
|
+
for (const img of images) {
|
|
10820
|
+
this.returnsImage(img.data, img.mimeType);
|
|
10821
|
+
if (img.revisedPrompt && this.response && typeof this.response !== "function") {
|
|
10822
|
+
const lastImage = this.response.images?.[this.response.images.length - 1];
|
|
10823
|
+
if (lastImage) {
|
|
10824
|
+
lastImage.revisedPrompt = img.revisedPrompt;
|
|
10825
|
+
}
|
|
10826
|
+
}
|
|
10827
|
+
}
|
|
10828
|
+
return this;
|
|
10829
|
+
}
|
|
10830
|
+
/**
|
|
10831
|
+
* Return audio data in the response.
|
|
10832
|
+
* Useful for mocking speech synthesis endpoints.
|
|
10833
|
+
*
|
|
10834
|
+
* @param data - Audio data (base64 string or Buffer)
|
|
10835
|
+
* @param mimeType - MIME type (auto-detected if Buffer provided without type)
|
|
10836
|
+
*
|
|
10837
|
+
* @example
|
|
10838
|
+
* mockLLM()
|
|
10839
|
+
* .forModel('tts-1')
|
|
10840
|
+
* .returnsAudio(mp3Buffer)
|
|
10841
|
+
* .register();
|
|
10842
|
+
*/
|
|
10843
|
+
returnsAudio(data, mimeType) {
|
|
10844
|
+
if (typeof this.response === "function") {
|
|
10845
|
+
throw new Error("Cannot use returnsAudio() after withResponse() with a function");
|
|
10846
|
+
}
|
|
10847
|
+
let audioData;
|
|
10848
|
+
let audioMime;
|
|
10849
|
+
if (typeof data === "string") {
|
|
10850
|
+
audioData = data;
|
|
10851
|
+
if (!mimeType) {
|
|
10852
|
+
throw new Error("MIME type is required when providing base64 string data");
|
|
10853
|
+
}
|
|
10854
|
+
audioMime = mimeType;
|
|
10855
|
+
} else {
|
|
10856
|
+
audioData = toBase64(data);
|
|
10857
|
+
const detected = mimeType ?? detectAudioMimeType(data);
|
|
10858
|
+
if (!detected) {
|
|
10859
|
+
throw new Error(
|
|
10860
|
+
"Could not detect audio MIME type. Please provide the mimeType parameter explicitly."
|
|
10861
|
+
);
|
|
10862
|
+
}
|
|
10863
|
+
audioMime = detected;
|
|
10864
|
+
}
|
|
10865
|
+
this.response.audio = { data: audioData, mimeType: audioMime };
|
|
10866
|
+
return this;
|
|
10867
|
+
}
|
|
9615
10868
|
/**
|
|
9616
10869
|
* Set the complete mock response object.
|
|
9617
10870
|
* This allows full control over all response properties.
|
|
@@ -9825,6 +11078,8 @@ var import_node_stream = require("stream");
|
|
|
9825
11078
|
StreamParser,
|
|
9826
11079
|
StreamProcessor,
|
|
9827
11080
|
SummarizationStrategy,
|
|
11081
|
+
audioFromBase64,
|
|
11082
|
+
audioFromBuffer,
|
|
9828
11083
|
collectEvents,
|
|
9829
11084
|
collectText,
|
|
9830
11085
|
complete,
|
|
@@ -9840,20 +11095,34 @@ var import_node_stream = require("stream");
|
|
|
9840
11095
|
createOpenAIProviderFromEnv,
|
|
9841
11096
|
createTextMockStream,
|
|
9842
11097
|
defaultLogger,
|
|
11098
|
+
detectAudioMimeType,
|
|
11099
|
+
detectImageMimeType,
|
|
9843
11100
|
discoverProviderAdapters,
|
|
11101
|
+
extractText,
|
|
9844
11102
|
getMockManager,
|
|
9845
11103
|
getModelId,
|
|
9846
11104
|
getProvider,
|
|
9847
11105
|
hasProviderPrefix,
|
|
11106
|
+
imageFromBase64,
|
|
11107
|
+
imageFromBuffer,
|
|
11108
|
+
imageFromUrl,
|
|
11109
|
+
isAudioPart,
|
|
11110
|
+
isDataUrl,
|
|
11111
|
+
isImagePart,
|
|
11112
|
+
isTextPart,
|
|
9848
11113
|
iterationProgressHint,
|
|
9849
11114
|
mockLLM,
|
|
11115
|
+
normalizeContent,
|
|
9850
11116
|
parallelGadgetHint,
|
|
11117
|
+
parseDataUrl,
|
|
9851
11118
|
resolveHintTemplate,
|
|
9852
11119
|
resolveModel,
|
|
9853
11120
|
resolvePromptTemplate,
|
|
9854
11121
|
resolveRulesTemplate,
|
|
9855
11122
|
runWithHandlers,
|
|
9856
11123
|
stream,
|
|
11124
|
+
text,
|
|
11125
|
+
toBase64,
|
|
9857
11126
|
validateAndApplyDefaults,
|
|
9858
11127
|
validateGadgetParams,
|
|
9859
11128
|
z
|