llmist 2.3.0 → 2.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -0
- package/dist/{chunk-ZDNV7DDO.js → chunk-IHSZUAYN.js} +4 -2
- package/dist/chunk-IHSZUAYN.js.map +1 -0
- package/dist/{chunk-GANXNBIZ.js → chunk-YHS2DYXP.js} +2839 -579
- package/dist/chunk-YHS2DYXP.js.map +1 -0
- package/dist/cli.cjs +2717 -198
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.js +638 -47
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +2496 -220
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +109 -20
- package/dist/index.d.ts +109 -20
- package/dist/index.js +34 -2
- package/dist/{mock-stream-wRfUqXx4.d.cts → mock-stream-ga4KIiwX.d.cts} +1121 -12
- package/dist/{mock-stream-wRfUqXx4.d.ts → mock-stream-ga4KIiwX.d.ts} +1121 -12
- package/dist/testing/index.cjs +2771 -559
- package/dist/testing/index.cjs.map +1 -1
- package/dist/testing/index.d.cts +2 -2
- package/dist/testing/index.d.ts +2 -2
- package/dist/testing/index.js +1 -1
- package/package.json +1 -1
- package/dist/chunk-GANXNBIZ.js.map +0 -1
- package/dist/chunk-ZDNV7DDO.js.map +0 -1
package/dist/index.cjs
CHANGED
|
@@ -45,6 +45,158 @@ var init_constants = __esm({
|
|
|
45
45
|
}
|
|
46
46
|
});
|
|
47
47
|
|
|
48
|
+
// src/core/input-content.ts
|
|
49
|
+
function isTextPart(part) {
|
|
50
|
+
return part.type === "text";
|
|
51
|
+
}
|
|
52
|
+
function isImagePart(part) {
|
|
53
|
+
return part.type === "image";
|
|
54
|
+
}
|
|
55
|
+
function isAudioPart(part) {
|
|
56
|
+
return part.type === "audio";
|
|
57
|
+
}
|
|
58
|
+
function text(content) {
|
|
59
|
+
return { type: "text", text: content };
|
|
60
|
+
}
|
|
61
|
+
function imageFromBase64(data, mediaType) {
|
|
62
|
+
return {
|
|
63
|
+
type: "image",
|
|
64
|
+
source: { type: "base64", mediaType, data }
|
|
65
|
+
};
|
|
66
|
+
}
|
|
67
|
+
function imageFromUrl(url) {
|
|
68
|
+
return {
|
|
69
|
+
type: "image",
|
|
70
|
+
source: { type: "url", url }
|
|
71
|
+
};
|
|
72
|
+
}
|
|
73
|
+
function detectImageMimeType(data) {
|
|
74
|
+
const bytes = data instanceof Buffer ? data : Buffer.from(data);
|
|
75
|
+
for (const { bytes: magic, mimeType } of IMAGE_MAGIC_BYTES) {
|
|
76
|
+
if (bytes.length >= magic.length) {
|
|
77
|
+
let matches = true;
|
|
78
|
+
for (let i = 0; i < magic.length; i++) {
|
|
79
|
+
if (bytes[i] !== magic[i]) {
|
|
80
|
+
matches = false;
|
|
81
|
+
break;
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
if (matches) {
|
|
85
|
+
if (mimeType === "image/webp") {
|
|
86
|
+
if (bytes.length >= 12) {
|
|
87
|
+
const webpMarker = bytes[8] === 87 && bytes[9] === 69 && bytes[10] === 66 && bytes[11] === 80;
|
|
88
|
+
if (!webpMarker) continue;
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
return mimeType;
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
return null;
|
|
96
|
+
}
|
|
97
|
+
function detectAudioMimeType(data) {
|
|
98
|
+
const bytes = data instanceof Buffer ? data : Buffer.from(data);
|
|
99
|
+
for (const { bytes: magic, mimeType } of AUDIO_MAGIC_BYTES) {
|
|
100
|
+
if (bytes.length >= magic.length) {
|
|
101
|
+
let matches = true;
|
|
102
|
+
for (let i = 0; i < magic.length; i++) {
|
|
103
|
+
if (bytes[i] !== magic[i]) {
|
|
104
|
+
matches = false;
|
|
105
|
+
break;
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
if (matches) {
|
|
109
|
+
if (mimeType === "audio/wav") {
|
|
110
|
+
if (bytes.length >= 12) {
|
|
111
|
+
const waveMarker = bytes[8] === 87 && bytes[9] === 65 && bytes[10] === 86 && bytes[11] === 69;
|
|
112
|
+
if (!waveMarker) continue;
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
return mimeType;
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
return null;
|
|
120
|
+
}
|
|
121
|
+
function toBase64(data) {
|
|
122
|
+
if (typeof data === "string") {
|
|
123
|
+
return data;
|
|
124
|
+
}
|
|
125
|
+
return Buffer.from(data).toString("base64");
|
|
126
|
+
}
|
|
127
|
+
function imageFromBuffer(buffer, mediaType) {
|
|
128
|
+
const detectedType = mediaType ?? detectImageMimeType(buffer);
|
|
129
|
+
if (!detectedType) {
|
|
130
|
+
throw new Error(
|
|
131
|
+
"Could not detect image MIME type. Please provide the mediaType parameter explicitly."
|
|
132
|
+
);
|
|
133
|
+
}
|
|
134
|
+
return {
|
|
135
|
+
type: "image",
|
|
136
|
+
source: {
|
|
137
|
+
type: "base64",
|
|
138
|
+
mediaType: detectedType,
|
|
139
|
+
data: toBase64(buffer)
|
|
140
|
+
}
|
|
141
|
+
};
|
|
142
|
+
}
|
|
143
|
+
function audioFromBase64(data, mediaType) {
|
|
144
|
+
return {
|
|
145
|
+
type: "audio",
|
|
146
|
+
source: { type: "base64", mediaType, data }
|
|
147
|
+
};
|
|
148
|
+
}
|
|
149
|
+
function audioFromBuffer(buffer, mediaType) {
|
|
150
|
+
const detectedType = mediaType ?? detectAudioMimeType(buffer);
|
|
151
|
+
if (!detectedType) {
|
|
152
|
+
throw new Error(
|
|
153
|
+
"Could not detect audio MIME type. Please provide the mediaType parameter explicitly."
|
|
154
|
+
);
|
|
155
|
+
}
|
|
156
|
+
return {
|
|
157
|
+
type: "audio",
|
|
158
|
+
source: {
|
|
159
|
+
type: "base64",
|
|
160
|
+
mediaType: detectedType,
|
|
161
|
+
data: toBase64(buffer)
|
|
162
|
+
}
|
|
163
|
+
};
|
|
164
|
+
}
|
|
165
|
+
function isDataUrl(input) {
|
|
166
|
+
return input.startsWith("data:");
|
|
167
|
+
}
|
|
168
|
+
function parseDataUrl(url) {
|
|
169
|
+
const match = url.match(/^data:([^;]+);base64,(.+)$/);
|
|
170
|
+
if (!match) return null;
|
|
171
|
+
return { mimeType: match[1], data: match[2] };
|
|
172
|
+
}
|
|
173
|
+
var IMAGE_MAGIC_BYTES, AUDIO_MAGIC_BYTES;
|
|
174
|
+
var init_input_content = __esm({
|
|
175
|
+
"src/core/input-content.ts"() {
|
|
176
|
+
"use strict";
|
|
177
|
+
IMAGE_MAGIC_BYTES = [
|
|
178
|
+
{ bytes: [255, 216, 255], mimeType: "image/jpeg" },
|
|
179
|
+
{ bytes: [137, 80, 78, 71], mimeType: "image/png" },
|
|
180
|
+
{ bytes: [71, 73, 70, 56], mimeType: "image/gif" },
|
|
181
|
+
// WebP starts with RIFF....WEBP
|
|
182
|
+
{ bytes: [82, 73, 70, 70], mimeType: "image/webp" }
|
|
183
|
+
];
|
|
184
|
+
AUDIO_MAGIC_BYTES = [
|
|
185
|
+
// MP3 frame sync
|
|
186
|
+
{ bytes: [255, 251], mimeType: "audio/mp3" },
|
|
187
|
+
{ bytes: [255, 250], mimeType: "audio/mp3" },
|
|
188
|
+
// ID3 tag (MP3)
|
|
189
|
+
{ bytes: [73, 68, 51], mimeType: "audio/mp3" },
|
|
190
|
+
// OGG
|
|
191
|
+
{ bytes: [79, 103, 103, 83], mimeType: "audio/ogg" },
|
|
192
|
+
// WAV (RIFF)
|
|
193
|
+
{ bytes: [82, 73, 70, 70], mimeType: "audio/wav" },
|
|
194
|
+
// WebM
|
|
195
|
+
{ bytes: [26, 69, 223, 163], mimeType: "audio/webm" }
|
|
196
|
+
];
|
|
197
|
+
}
|
|
198
|
+
});
|
|
199
|
+
|
|
48
200
|
// src/core/model-shortcuts.ts
|
|
49
201
|
function isKnownModelPattern(model) {
|
|
50
202
|
const normalized = model.toLowerCase();
|
|
@@ -402,7 +554,9 @@ var init_prompt_config = __esm({
|
|
|
402
554
|
rules: () => [
|
|
403
555
|
"Output ONLY plain text with the exact markers - never use function/tool calling",
|
|
404
556
|
"You can invoke multiple gadgets in a single response",
|
|
405
|
-
"
|
|
557
|
+
"Gadgets without dependencies execute immediately (in parallel if multiple)",
|
|
558
|
+
"Use :invocation_id:dep1,dep2 syntax when a gadget needs results from prior gadgets",
|
|
559
|
+
"If any dependency fails, dependent gadgets are automatically skipped"
|
|
406
560
|
],
|
|
407
561
|
customExamples: null
|
|
408
562
|
};
|
|
@@ -410,11 +564,24 @@ var init_prompt_config = __esm({
|
|
|
410
564
|
});
|
|
411
565
|
|
|
412
566
|
// src/core/messages.ts
|
|
567
|
+
function normalizeContent(content) {
|
|
568
|
+
if (typeof content === "string") {
|
|
569
|
+
return [{ type: "text", text: content }];
|
|
570
|
+
}
|
|
571
|
+
return content;
|
|
572
|
+
}
|
|
573
|
+
function extractText(content) {
|
|
574
|
+
if (typeof content === "string") {
|
|
575
|
+
return content;
|
|
576
|
+
}
|
|
577
|
+
return content.filter((part) => part.type === "text").map((part) => part.text).join("");
|
|
578
|
+
}
|
|
413
579
|
var LLMMessageBuilder;
|
|
414
580
|
var init_messages = __esm({
|
|
415
581
|
"src/core/messages.ts"() {
|
|
416
582
|
"use strict";
|
|
417
583
|
init_constants();
|
|
584
|
+
init_input_content();
|
|
418
585
|
init_prompt_config();
|
|
419
586
|
LLMMessageBuilder = class {
|
|
420
587
|
messages = [];
|
|
@@ -516,6 +683,10 @@ CRITICAL: ${criticalUsage}
|
|
|
516
683
|
parts.push(`
|
|
517
684
|
1. Start marker: ${this.startPrefix}gadget_name`);
|
|
518
685
|
parts.push(`
|
|
686
|
+
With ID: ${this.startPrefix}gadget_name:my_id`);
|
|
687
|
+
parts.push(`
|
|
688
|
+
With dependencies: ${this.startPrefix}gadget_name:my_id:dep1,dep2`);
|
|
689
|
+
parts.push(`
|
|
519
690
|
2. ${formatDescription}`);
|
|
520
691
|
parts.push(`
|
|
521
692
|
3. End marker: ${this.endPrefix}`);
|
|
@@ -565,6 +736,25 @@ ${this.endPrefix}`;
|
|
|
565
736
|
EXAMPLE (Multiple Gadgets):
|
|
566
737
|
|
|
567
738
|
${multipleExample}`);
|
|
739
|
+
const dependencyExample = `${this.startPrefix}fetch_data:fetch_1
|
|
740
|
+
${this.argPrefix}url
|
|
741
|
+
https://api.example.com/users
|
|
742
|
+
${this.endPrefix}
|
|
743
|
+
${this.startPrefix}fetch_data:fetch_2
|
|
744
|
+
${this.argPrefix}url
|
|
745
|
+
https://api.example.com/orders
|
|
746
|
+
${this.endPrefix}
|
|
747
|
+
${this.startPrefix}merge_data:merge_1:fetch_1,fetch_2
|
|
748
|
+
${this.argPrefix}format
|
|
749
|
+
json
|
|
750
|
+
${this.endPrefix}`;
|
|
751
|
+
parts.push(`
|
|
752
|
+
|
|
753
|
+
EXAMPLE (With Dependencies):
|
|
754
|
+
merge_1 waits for fetch_1 AND fetch_2 to complete.
|
|
755
|
+
If either fails, merge_1 is automatically skipped.
|
|
756
|
+
|
|
757
|
+
${dependencyExample}`);
|
|
568
758
|
parts.push(`
|
|
569
759
|
|
|
570
760
|
BLOCK FORMAT SYNTAX:
|
|
@@ -615,6 +805,25 @@ Produces: { "items": ["first", "second"] }`);
|
|
|
615
805
|
}
|
|
616
806
|
return parts.join("");
|
|
617
807
|
}
|
|
808
|
+
/**
|
|
809
|
+
* Add a user message.
|
|
810
|
+
* Content can be a string (text only) or an array of content parts (multimodal).
|
|
811
|
+
*
|
|
812
|
+
* @param content - Message content
|
|
813
|
+
* @param metadata - Optional metadata
|
|
814
|
+
*
|
|
815
|
+
* @example
|
|
816
|
+
* ```typescript
|
|
817
|
+
* // Text only
|
|
818
|
+
* builder.addUser("Hello!");
|
|
819
|
+
*
|
|
820
|
+
* // Multimodal
|
|
821
|
+
* builder.addUser([
|
|
822
|
+
* text("What's in this image?"),
|
|
823
|
+
* imageFromBuffer(imageData),
|
|
824
|
+
* ]);
|
|
825
|
+
* ```
|
|
826
|
+
*/
|
|
618
827
|
addUser(content, metadata) {
|
|
619
828
|
this.messages.push({ role: "user", content, metadata });
|
|
620
829
|
return this;
|
|
@@ -623,6 +832,104 @@ Produces: { "items": ["first", "second"] }`);
|
|
|
623
832
|
this.messages.push({ role: "assistant", content, metadata });
|
|
624
833
|
return this;
|
|
625
834
|
}
|
|
835
|
+
/**
|
|
836
|
+
* Add a user message with an image attachment.
|
|
837
|
+
*
|
|
838
|
+
* @param textContent - Text prompt
|
|
839
|
+
* @param imageData - Image data (Buffer, Uint8Array, or base64 string)
|
|
840
|
+
* @param mimeType - Optional MIME type (auto-detected if not provided)
|
|
841
|
+
*
|
|
842
|
+
* @example
|
|
843
|
+
* ```typescript
|
|
844
|
+
* builder.addUserWithImage(
|
|
845
|
+
* "What's in this image?",
|
|
846
|
+
* await fs.readFile("photo.jpg"),
|
|
847
|
+
* "image/jpeg" // Optional - auto-detected
|
|
848
|
+
* );
|
|
849
|
+
* ```
|
|
850
|
+
*/
|
|
851
|
+
addUserWithImage(textContent, imageData, mimeType) {
|
|
852
|
+
const imageBuffer = typeof imageData === "string" ? Buffer.from(imageData, "base64") : imageData;
|
|
853
|
+
const detectedMime = mimeType ?? detectImageMimeType(imageBuffer);
|
|
854
|
+
if (!detectedMime) {
|
|
855
|
+
throw new Error(
|
|
856
|
+
"Could not detect image MIME type. Please provide the mimeType parameter explicitly."
|
|
857
|
+
);
|
|
858
|
+
}
|
|
859
|
+
const content = [
|
|
860
|
+
text(textContent),
|
|
861
|
+
{
|
|
862
|
+
type: "image",
|
|
863
|
+
source: {
|
|
864
|
+
type: "base64",
|
|
865
|
+
mediaType: detectedMime,
|
|
866
|
+
data: toBase64(imageBuffer)
|
|
867
|
+
}
|
|
868
|
+
}
|
|
869
|
+
];
|
|
870
|
+
this.messages.push({ role: "user", content });
|
|
871
|
+
return this;
|
|
872
|
+
}
|
|
873
|
+
/**
|
|
874
|
+
* Add a user message with an image URL (OpenAI only).
|
|
875
|
+
*
|
|
876
|
+
* @param textContent - Text prompt
|
|
877
|
+
* @param imageUrl - URL to the image
|
|
878
|
+
*
|
|
879
|
+
* @example
|
|
880
|
+
* ```typescript
|
|
881
|
+
* builder.addUserWithImageUrl(
|
|
882
|
+
* "What's in this image?",
|
|
883
|
+
* "https://example.com/image.jpg"
|
|
884
|
+
* );
|
|
885
|
+
* ```
|
|
886
|
+
*/
|
|
887
|
+
addUserWithImageUrl(textContent, imageUrl) {
|
|
888
|
+
const content = [text(textContent), imageFromUrl(imageUrl)];
|
|
889
|
+
this.messages.push({ role: "user", content });
|
|
890
|
+
return this;
|
|
891
|
+
}
|
|
892
|
+
/**
|
|
893
|
+
* Add a user message with an audio attachment (Gemini only).
|
|
894
|
+
*
|
|
895
|
+
* @param textContent - Text prompt
|
|
896
|
+
* @param audioData - Audio data (Buffer, Uint8Array, or base64 string)
|
|
897
|
+
* @param mimeType - Optional MIME type (auto-detected if not provided)
|
|
898
|
+
*
|
|
899
|
+
* @example
|
|
900
|
+
* ```typescript
|
|
901
|
+
* builder.addUserWithAudio(
|
|
902
|
+
* "Transcribe this audio",
|
|
903
|
+
* await fs.readFile("recording.mp3"),
|
|
904
|
+
* "audio/mp3" // Optional - auto-detected
|
|
905
|
+
* );
|
|
906
|
+
* ```
|
|
907
|
+
*/
|
|
908
|
+
addUserWithAudio(textContent, audioData, mimeType) {
|
|
909
|
+
const audioBuffer = typeof audioData === "string" ? Buffer.from(audioData, "base64") : audioData;
|
|
910
|
+
const content = [text(textContent), audioFromBuffer(audioBuffer, mimeType)];
|
|
911
|
+
this.messages.push({ role: "user", content });
|
|
912
|
+
return this;
|
|
913
|
+
}
|
|
914
|
+
/**
|
|
915
|
+
* Add a user message with multiple content parts.
|
|
916
|
+
* Provides full flexibility for complex multimodal messages.
|
|
917
|
+
*
|
|
918
|
+
* @param parts - Array of content parts
|
|
919
|
+
*
|
|
920
|
+
* @example
|
|
921
|
+
* ```typescript
|
|
922
|
+
* builder.addUserMultimodal([
|
|
923
|
+
* text("Compare these images:"),
|
|
924
|
+
* imageFromBuffer(image1),
|
|
925
|
+
* imageFromBuffer(image2),
|
|
926
|
+
* ]);
|
|
927
|
+
* ```
|
|
928
|
+
*/
|
|
929
|
+
addUserMultimodal(parts) {
|
|
930
|
+
this.messages.push({ role: "user", content: parts });
|
|
931
|
+
return this;
|
|
932
|
+
}
|
|
626
933
|
addGadgetCall(gadget, parameters, result) {
|
|
627
934
|
const paramStr = this.formatBlockParameters(parameters, "");
|
|
628
935
|
this.messages.push({
|
|
@@ -1941,7 +2248,7 @@ var init_conversation_manager = __esm({
|
|
|
1941
2248
|
if (msg.role === "user") {
|
|
1942
2249
|
this.historyBuilder.addUser(msg.content);
|
|
1943
2250
|
} else if (msg.role === "assistant") {
|
|
1944
|
-
this.historyBuilder.addAssistant(msg.content);
|
|
2251
|
+
this.historyBuilder.addAssistant(extractText(msg.content));
|
|
1945
2252
|
}
|
|
1946
2253
|
}
|
|
1947
2254
|
}
|
|
@@ -1962,8 +2269,10 @@ async function runWithHandlers(agentGenerator, handlers) {
|
|
|
1962
2269
|
if (handlers.onGadgetCall) {
|
|
1963
2270
|
await handlers.onGadgetCall({
|
|
1964
2271
|
gadgetName: event.call.gadgetName,
|
|
2272
|
+
invocationId: event.call.invocationId,
|
|
1965
2273
|
parameters: event.call.parameters,
|
|
1966
|
-
parametersRaw: event.call.parametersRaw
|
|
2274
|
+
parametersRaw: event.call.parametersRaw,
|
|
2275
|
+
dependencies: event.call.dependencies
|
|
1967
2276
|
});
|
|
1968
2277
|
}
|
|
1969
2278
|
break;
|
|
@@ -2555,7 +2864,27 @@ var init_cost_reporting_client = __esm({
|
|
|
2555
2864
|
constructor(client, reportCost) {
|
|
2556
2865
|
this.client = client;
|
|
2557
2866
|
this.reportCost = reportCost;
|
|
2867
|
+
this.image = {
|
|
2868
|
+
generate: async (options) => {
|
|
2869
|
+
const result = await this.client.image.generate(options);
|
|
2870
|
+
if (result.cost !== void 0 && result.cost > 0) {
|
|
2871
|
+
this.reportCost(result.cost);
|
|
2872
|
+
}
|
|
2873
|
+
return result;
|
|
2874
|
+
}
|
|
2875
|
+
};
|
|
2876
|
+
this.speech = {
|
|
2877
|
+
generate: async (options) => {
|
|
2878
|
+
const result = await this.client.speech.generate(options);
|
|
2879
|
+
if (result.cost !== void 0 && result.cost > 0) {
|
|
2880
|
+
this.reportCost(result.cost);
|
|
2881
|
+
}
|
|
2882
|
+
return result;
|
|
2883
|
+
}
|
|
2884
|
+
};
|
|
2558
2885
|
}
|
|
2886
|
+
image;
|
|
2887
|
+
speech;
|
|
2559
2888
|
/**
|
|
2560
2889
|
* Access to model registry for cost estimation.
|
|
2561
2890
|
*/
|
|
@@ -2820,15 +3149,37 @@ var init_parser = __esm({
|
|
|
2820
3149
|
return segment.trim().length > 0 ? segment : void 0;
|
|
2821
3150
|
}
|
|
2822
3151
|
/**
|
|
2823
|
-
* Parse gadget name
|
|
2824
|
-
*
|
|
3152
|
+
* Parse gadget name with optional invocation ID and dependencies.
|
|
3153
|
+
*
|
|
3154
|
+
* Supported formats:
|
|
3155
|
+
* - `GadgetName` - Auto-generate ID, no dependencies
|
|
3156
|
+
* - `GadgetName:my_id` - Explicit ID, no dependencies
|
|
3157
|
+
* - `GadgetName:my_id:dep1,dep2` - Explicit ID with dependencies
|
|
3158
|
+
*
|
|
3159
|
+
* Dependencies must be comma-separated invocation IDs.
|
|
2825
3160
|
*/
|
|
2826
3161
|
parseGadgetName(gadgetName) {
|
|
2827
|
-
|
|
2828
|
-
|
|
2829
|
-
return {
|
|
3162
|
+
const parts = gadgetName.split(":");
|
|
3163
|
+
if (parts.length === 1) {
|
|
3164
|
+
return {
|
|
3165
|
+
actualName: parts[0],
|
|
3166
|
+
invocationId: `gadget_${++globalInvocationCounter}`,
|
|
3167
|
+
dependencies: []
|
|
3168
|
+
};
|
|
3169
|
+
} else if (parts.length === 2) {
|
|
3170
|
+
return {
|
|
3171
|
+
actualName: parts[0],
|
|
3172
|
+
invocationId: parts[1].trim(),
|
|
3173
|
+
dependencies: []
|
|
3174
|
+
};
|
|
3175
|
+
} else {
|
|
3176
|
+
const deps = parts[2].split(",").map((d) => d.trim()).filter((d) => d.length > 0);
|
|
3177
|
+
return {
|
|
3178
|
+
actualName: parts[0],
|
|
3179
|
+
invocationId: parts[1].trim(),
|
|
3180
|
+
dependencies: deps
|
|
3181
|
+
};
|
|
2830
3182
|
}
|
|
2831
|
-
return { actualName: gadgetName, invocationId: `gadget_${++globalInvocationCounter}` };
|
|
2832
3183
|
}
|
|
2833
3184
|
/**
|
|
2834
3185
|
* Extract the error message from a parse error.
|
|
@@ -2864,39 +3215,20 @@ var init_parser = __esm({
|
|
|
2864
3215
|
const metadataEndIndex = this.buffer.indexOf("\n", metadataStartIndex);
|
|
2865
3216
|
if (metadataEndIndex === -1) break;
|
|
2866
3217
|
const gadgetName = this.buffer.substring(metadataStartIndex, metadataEndIndex).trim();
|
|
2867
|
-
const { actualName: actualGadgetName, invocationId } = this.parseGadgetName(gadgetName);
|
|
3218
|
+
const { actualName: actualGadgetName, invocationId, dependencies } = this.parseGadgetName(gadgetName);
|
|
2868
3219
|
const contentStartIndex = metadataEndIndex + 1;
|
|
2869
3220
|
let partEndIndex;
|
|
2870
3221
|
let endMarkerLength = 0;
|
|
2871
|
-
|
|
2872
|
-
|
|
2873
|
-
|
|
2874
|
-
|
|
2875
|
-
endMarkerLength =
|
|
3222
|
+
const nextStartPos = this.buffer.indexOf(this.startPrefix, contentStartIndex);
|
|
3223
|
+
const endPos = this.buffer.indexOf(this.endPrefix, contentStartIndex);
|
|
3224
|
+
if (nextStartPos !== -1 && (endPos === -1 || nextStartPos < endPos)) {
|
|
3225
|
+
partEndIndex = nextStartPos;
|
|
3226
|
+
endMarkerLength = 0;
|
|
3227
|
+
} else if (endPos !== -1) {
|
|
3228
|
+
partEndIndex = endPos;
|
|
3229
|
+
endMarkerLength = this.endPrefix.length;
|
|
2876
3230
|
} else {
|
|
2877
|
-
|
|
2878
|
-
let validEndPos = -1;
|
|
2879
|
-
let searchPos = contentStartIndex;
|
|
2880
|
-
while (true) {
|
|
2881
|
-
const endPos = this.buffer.indexOf(this.endPrefix, searchPos);
|
|
2882
|
-
if (endPos === -1) break;
|
|
2883
|
-
const afterEnd = this.buffer.substring(endPos + this.endPrefix.length);
|
|
2884
|
-
if (afterEnd.startsWith("\n") || afterEnd.startsWith("\r") || afterEnd.startsWith(this.startPrefix) || afterEnd.length === 0) {
|
|
2885
|
-
validEndPos = endPos;
|
|
2886
|
-
break;
|
|
2887
|
-
} else {
|
|
2888
|
-
searchPos = endPos + this.endPrefix.length;
|
|
2889
|
-
}
|
|
2890
|
-
}
|
|
2891
|
-
if (nextStartPos !== -1 && (validEndPos === -1 || nextStartPos < validEndPos)) {
|
|
2892
|
-
partEndIndex = nextStartPos;
|
|
2893
|
-
endMarkerLength = 0;
|
|
2894
|
-
} else if (validEndPos !== -1) {
|
|
2895
|
-
partEndIndex = validEndPos;
|
|
2896
|
-
endMarkerLength = this.endPrefix.length;
|
|
2897
|
-
} else {
|
|
2898
|
-
break;
|
|
2899
|
-
}
|
|
3231
|
+
break;
|
|
2900
3232
|
}
|
|
2901
3233
|
const parametersRaw = this.buffer.substring(contentStartIndex, partEndIndex).trim();
|
|
2902
3234
|
const { parameters, parseError } = this.parseParameters(parametersRaw);
|
|
@@ -2907,7 +3239,8 @@ var init_parser = __esm({
|
|
|
2907
3239
|
invocationId,
|
|
2908
3240
|
parametersRaw,
|
|
2909
3241
|
parameters,
|
|
2910
|
-
parseError
|
|
3242
|
+
parseError,
|
|
3243
|
+
dependencies
|
|
2911
3244
|
}
|
|
2912
3245
|
};
|
|
2913
3246
|
startIndex = partEndIndex + endMarkerLength;
|
|
@@ -2930,7 +3263,7 @@ var init_parser = __esm({
|
|
|
2930
3263
|
const metadataEndIndex = this.buffer.indexOf("\n", metadataStartIndex);
|
|
2931
3264
|
if (metadataEndIndex !== -1) {
|
|
2932
3265
|
const gadgetName = this.buffer.substring(metadataStartIndex, metadataEndIndex).trim();
|
|
2933
|
-
const { actualName: actualGadgetName, invocationId } = this.parseGadgetName(gadgetName);
|
|
3266
|
+
const { actualName: actualGadgetName, invocationId, dependencies } = this.parseGadgetName(gadgetName);
|
|
2934
3267
|
const contentStartIndex = metadataEndIndex + 1;
|
|
2935
3268
|
const parametersRaw = this.buffer.substring(contentStartIndex).trim();
|
|
2936
3269
|
const { parameters, parseError } = this.parseParameters(parametersRaw);
|
|
@@ -2941,7 +3274,8 @@ var init_parser = __esm({
|
|
|
2941
3274
|
invocationId,
|
|
2942
3275
|
parametersRaw,
|
|
2943
3276
|
parameters,
|
|
2944
|
-
parseError
|
|
3277
|
+
parseError,
|
|
3278
|
+
dependencies
|
|
2945
3279
|
}
|
|
2946
3280
|
};
|
|
2947
3281
|
return;
|
|
@@ -3311,6 +3645,13 @@ var init_stream_processor = __esm({
|
|
|
3311
3645
|
accumulatedText = "";
|
|
3312
3646
|
shouldStopExecution = false;
|
|
3313
3647
|
observerFailureCount = 0;
|
|
3648
|
+
// Dependency tracking for gadget execution DAG
|
|
3649
|
+
/** Gadgets waiting for their dependencies to complete */
|
|
3650
|
+
pendingGadgets = /* @__PURE__ */ new Map();
|
|
3651
|
+
/** Completed gadget results, keyed by invocation ID */
|
|
3652
|
+
completedResults = /* @__PURE__ */ new Map();
|
|
3653
|
+
/** Invocation IDs of gadgets that have failed (error or skipped due to dependency) */
|
|
3654
|
+
failedInvocations = /* @__PURE__ */ new Set();
|
|
3314
3655
|
constructor(options) {
|
|
3315
3656
|
this.iteration = options.iteration;
|
|
3316
3657
|
this.registry = options.registry;
|
|
@@ -3411,6 +3752,16 @@ var init_stream_processor = __esm({
|
|
|
3411
3752
|
}
|
|
3412
3753
|
}
|
|
3413
3754
|
}
|
|
3755
|
+
const finalPendingEvents = await this.processPendingGadgets();
|
|
3756
|
+
outputs.push(...finalPendingEvents);
|
|
3757
|
+
if (finalPendingEvents.some((e) => e.type === "gadget_result")) {
|
|
3758
|
+
didExecuteGadgets = true;
|
|
3759
|
+
}
|
|
3760
|
+
for (const evt of finalPendingEvents) {
|
|
3761
|
+
if (evt.type === "gadget_result" && evt.result.breaksLoop) {
|
|
3762
|
+
shouldBreakLoop = true;
|
|
3763
|
+
}
|
|
3764
|
+
}
|
|
3414
3765
|
}
|
|
3415
3766
|
let finalMessage = this.accumulatedText;
|
|
3416
3767
|
if (this.hooks.interceptors?.interceptAssistantMessage) {
|
|
@@ -3462,7 +3813,11 @@ var init_stream_processor = __esm({
|
|
|
3462
3813
|
return [{ type: "text", content }];
|
|
3463
3814
|
}
|
|
3464
3815
|
/**
|
|
3465
|
-
* Process a gadget call through the full lifecycle.
|
|
3816
|
+
* Process a gadget call through the full lifecycle, handling dependencies.
|
|
3817
|
+
*
|
|
3818
|
+
* Gadgets without dependencies (or with all dependencies satisfied) execute immediately.
|
|
3819
|
+
* Gadgets with unsatisfied dependencies are queued for later execution.
|
|
3820
|
+
* After each execution, pending gadgets are checked to see if they can now run.
|
|
3466
3821
|
*/
|
|
3467
3822
|
async processGadgetCall(call) {
|
|
3468
3823
|
if (this.shouldStopExecution) {
|
|
@@ -3473,6 +3828,53 @@ var init_stream_processor = __esm({
|
|
|
3473
3828
|
}
|
|
3474
3829
|
const events = [];
|
|
3475
3830
|
events.push({ type: "gadget_call", call });
|
|
3831
|
+
if (call.dependencies.length > 0) {
|
|
3832
|
+
if (call.dependencies.includes(call.invocationId)) {
|
|
3833
|
+
this.logger.warn("Gadget has self-referential dependency (depends on itself)", {
|
|
3834
|
+
gadgetName: call.gadgetName,
|
|
3835
|
+
invocationId: call.invocationId
|
|
3836
|
+
});
|
|
3837
|
+
this.failedInvocations.add(call.invocationId);
|
|
3838
|
+
const skipEvent = {
|
|
3839
|
+
type: "gadget_skipped",
|
|
3840
|
+
gadgetName: call.gadgetName,
|
|
3841
|
+
invocationId: call.invocationId,
|
|
3842
|
+
parameters: call.parameters ?? {},
|
|
3843
|
+
failedDependency: call.invocationId,
|
|
3844
|
+
failedDependencyError: `Gadget "${call.invocationId}" cannot depend on itself (self-referential dependency)`
|
|
3845
|
+
};
|
|
3846
|
+
events.push(skipEvent);
|
|
3847
|
+
return events;
|
|
3848
|
+
}
|
|
3849
|
+
const failedDep = call.dependencies.find((dep) => this.failedInvocations.has(dep));
|
|
3850
|
+
if (failedDep) {
|
|
3851
|
+
const skipEvents = await this.handleFailedDependency(call, failedDep);
|
|
3852
|
+
events.push(...skipEvents);
|
|
3853
|
+
return events;
|
|
3854
|
+
}
|
|
3855
|
+
const unsatisfied = call.dependencies.filter((dep) => !this.completedResults.has(dep));
|
|
3856
|
+
if (unsatisfied.length > 0) {
|
|
3857
|
+
this.logger.debug("Queueing gadget for later - waiting on dependencies", {
|
|
3858
|
+
gadgetName: call.gadgetName,
|
|
3859
|
+
invocationId: call.invocationId,
|
|
3860
|
+
waitingOn: unsatisfied
|
|
3861
|
+
});
|
|
3862
|
+
this.pendingGadgets.set(call.invocationId, call);
|
|
3863
|
+
return events;
|
|
3864
|
+
}
|
|
3865
|
+
}
|
|
3866
|
+
const executeEvents = await this.executeGadgetWithHooks(call);
|
|
3867
|
+
events.push(...executeEvents);
|
|
3868
|
+
const triggeredEvents = await this.processPendingGadgets();
|
|
3869
|
+
events.push(...triggeredEvents);
|
|
3870
|
+
return events;
|
|
3871
|
+
}
|
|
3872
|
+
/**
|
|
3873
|
+
* Execute a gadget through the full hook lifecycle.
|
|
3874
|
+
* This is the core execution logic, extracted from processGadgetCall.
|
|
3875
|
+
*/
|
|
3876
|
+
async executeGadgetWithHooks(call) {
|
|
3877
|
+
const events = [];
|
|
3476
3878
|
if (call.parseError) {
|
|
3477
3879
|
this.logger.warn("Gadget has parse error", {
|
|
3478
3880
|
gadgetName: call.gadgetName,
|
|
@@ -3603,6 +4005,10 @@ var init_stream_processor = __esm({
|
|
|
3603
4005
|
});
|
|
3604
4006
|
}
|
|
3605
4007
|
await this.runObserversInParallel(completeObservers);
|
|
4008
|
+
this.completedResults.set(result.invocationId, result);
|
|
4009
|
+
if (result.error) {
|
|
4010
|
+
this.failedInvocations.add(result.invocationId);
|
|
4011
|
+
}
|
|
3606
4012
|
events.push({ type: "gadget_result", result });
|
|
3607
4013
|
if (result.error) {
|
|
3608
4014
|
const errorType = this.determineErrorType(call, result);
|
|
@@ -3618,6 +4024,162 @@ var init_stream_processor = __esm({
|
|
|
3618
4024
|
}
|
|
3619
4025
|
return events;
|
|
3620
4026
|
}
|
|
4027
|
+
/**
|
|
4028
|
+
* Handle a gadget that cannot execute because a dependency failed.
|
|
4029
|
+
* Calls the onDependencySkipped controller to allow customization.
|
|
4030
|
+
*/
|
|
4031
|
+
async handleFailedDependency(call, failedDep) {
|
|
4032
|
+
const events = [];
|
|
4033
|
+
const depResult = this.completedResults.get(failedDep);
|
|
4034
|
+
const depError = depResult?.error ?? "Dependency failed";
|
|
4035
|
+
let action = { action: "skip" };
|
|
4036
|
+
if (this.hooks.controllers?.onDependencySkipped) {
|
|
4037
|
+
const context = {
|
|
4038
|
+
iteration: this.iteration,
|
|
4039
|
+
gadgetName: call.gadgetName,
|
|
4040
|
+
invocationId: call.invocationId,
|
|
4041
|
+
parameters: call.parameters ?? {},
|
|
4042
|
+
failedDependency: failedDep,
|
|
4043
|
+
failedDependencyError: depError,
|
|
4044
|
+
logger: this.logger
|
|
4045
|
+
};
|
|
4046
|
+
action = await this.hooks.controllers.onDependencySkipped(context);
|
|
4047
|
+
}
|
|
4048
|
+
if (action.action === "skip") {
|
|
4049
|
+
this.failedInvocations.add(call.invocationId);
|
|
4050
|
+
const skipEvent = {
|
|
4051
|
+
type: "gadget_skipped",
|
|
4052
|
+
gadgetName: call.gadgetName,
|
|
4053
|
+
invocationId: call.invocationId,
|
|
4054
|
+
parameters: call.parameters ?? {},
|
|
4055
|
+
failedDependency: failedDep,
|
|
4056
|
+
failedDependencyError: depError
|
|
4057
|
+
};
|
|
4058
|
+
events.push(skipEvent);
|
|
4059
|
+
if (this.hooks.observers?.onGadgetSkipped) {
|
|
4060
|
+
const observeContext = {
|
|
4061
|
+
iteration: this.iteration,
|
|
4062
|
+
gadgetName: call.gadgetName,
|
|
4063
|
+
invocationId: call.invocationId,
|
|
4064
|
+
parameters: call.parameters ?? {},
|
|
4065
|
+
failedDependency: failedDep,
|
|
4066
|
+
failedDependencyError: depError,
|
|
4067
|
+
logger: this.logger
|
|
4068
|
+
};
|
|
4069
|
+
await this.safeObserve(() => this.hooks.observers.onGadgetSkipped(observeContext));
|
|
4070
|
+
}
|
|
4071
|
+
this.logger.info("Gadget skipped due to failed dependency", {
|
|
4072
|
+
gadgetName: call.gadgetName,
|
|
4073
|
+
invocationId: call.invocationId,
|
|
4074
|
+
failedDependency: failedDep
|
|
4075
|
+
});
|
|
4076
|
+
} else if (action.action === "execute_anyway") {
|
|
4077
|
+
this.logger.info("Executing gadget despite failed dependency (controller override)", {
|
|
4078
|
+
gadgetName: call.gadgetName,
|
|
4079
|
+
invocationId: call.invocationId,
|
|
4080
|
+
failedDependency: failedDep
|
|
4081
|
+
});
|
|
4082
|
+
const executeEvents = await this.executeGadgetWithHooks(call);
|
|
4083
|
+
events.push(...executeEvents);
|
|
4084
|
+
} else if (action.action === "use_fallback") {
|
|
4085
|
+
const fallbackResult = {
|
|
4086
|
+
gadgetName: call.gadgetName,
|
|
4087
|
+
invocationId: call.invocationId,
|
|
4088
|
+
parameters: call.parameters ?? {},
|
|
4089
|
+
result: action.fallbackResult,
|
|
4090
|
+
executionTimeMs: 0
|
|
4091
|
+
};
|
|
4092
|
+
this.completedResults.set(call.invocationId, fallbackResult);
|
|
4093
|
+
events.push({ type: "gadget_result", result: fallbackResult });
|
|
4094
|
+
this.logger.info("Using fallback result for gadget with failed dependency", {
|
|
4095
|
+
gadgetName: call.gadgetName,
|
|
4096
|
+
invocationId: call.invocationId,
|
|
4097
|
+
failedDependency: failedDep
|
|
4098
|
+
});
|
|
4099
|
+
}
|
|
4100
|
+
return events;
|
|
4101
|
+
}
|
|
4102
|
+
/**
|
|
4103
|
+
* Process pending gadgets whose dependencies are now satisfied.
|
|
4104
|
+
* Executes ready gadgets in parallel and continues until no more can be triggered.
|
|
4105
|
+
*/
|
|
4106
|
+
async processPendingGadgets() {
|
|
4107
|
+
const events = [];
|
|
4108
|
+
let progress = true;
|
|
4109
|
+
while (progress && this.pendingGadgets.size > 0) {
|
|
4110
|
+
progress = false;
|
|
4111
|
+
const readyToExecute = [];
|
|
4112
|
+
const readyToSkip = [];
|
|
4113
|
+
for (const [invocationId, call] of this.pendingGadgets) {
|
|
4114
|
+
const failedDep = call.dependencies.find((dep) => this.failedInvocations.has(dep));
|
|
4115
|
+
if (failedDep) {
|
|
4116
|
+
readyToSkip.push({ call, failedDep });
|
|
4117
|
+
continue;
|
|
4118
|
+
}
|
|
4119
|
+
const allSatisfied = call.dependencies.every((dep) => this.completedResults.has(dep));
|
|
4120
|
+
if (allSatisfied) {
|
|
4121
|
+
readyToExecute.push(call);
|
|
4122
|
+
}
|
|
4123
|
+
}
|
|
4124
|
+
for (const { call, failedDep } of readyToSkip) {
|
|
4125
|
+
this.pendingGadgets.delete(call.invocationId);
|
|
4126
|
+
const skipEvents = await this.handleFailedDependency(call, failedDep);
|
|
4127
|
+
events.push(...skipEvents);
|
|
4128
|
+
progress = true;
|
|
4129
|
+
}
|
|
4130
|
+
if (readyToExecute.length > 0) {
|
|
4131
|
+
this.logger.debug("Executing ready gadgets in parallel", {
|
|
4132
|
+
count: readyToExecute.length,
|
|
4133
|
+
invocationIds: readyToExecute.map((c) => c.invocationId)
|
|
4134
|
+
});
|
|
4135
|
+
for (const call of readyToExecute) {
|
|
4136
|
+
this.pendingGadgets.delete(call.invocationId);
|
|
4137
|
+
}
|
|
4138
|
+
const executePromises = readyToExecute.map((call) => this.executeGadgetWithHooks(call));
|
|
4139
|
+
const results = await Promise.all(executePromises);
|
|
4140
|
+
for (const executeEvents of results) {
|
|
4141
|
+
events.push(...executeEvents);
|
|
4142
|
+
}
|
|
4143
|
+
progress = true;
|
|
4144
|
+
}
|
|
4145
|
+
}
|
|
4146
|
+
if (this.pendingGadgets.size > 0) {
|
|
4147
|
+
const pendingIds = new Set(this.pendingGadgets.keys());
|
|
4148
|
+
for (const [invocationId, call] of this.pendingGadgets) {
|
|
4149
|
+
const missingDeps = call.dependencies.filter((dep) => !this.completedResults.has(dep));
|
|
4150
|
+
const circularDeps = missingDeps.filter((dep) => pendingIds.has(dep));
|
|
4151
|
+
const trulyMissingDeps = missingDeps.filter((dep) => !pendingIds.has(dep));
|
|
4152
|
+
let errorMessage;
|
|
4153
|
+
let logLevel = "warn";
|
|
4154
|
+
if (circularDeps.length > 0 && trulyMissingDeps.length > 0) {
|
|
4155
|
+
errorMessage = `Dependencies unresolvable: circular=[${circularDeps.join(", ")}], missing=[${trulyMissingDeps.join(", ")}]`;
|
|
4156
|
+
logLevel = "error";
|
|
4157
|
+
} else if (circularDeps.length > 0) {
|
|
4158
|
+
errorMessage = `Circular dependency detected: "${invocationId}" depends on "${circularDeps[0]}" which also depends on "${invocationId}" (directly or indirectly)`;
|
|
4159
|
+
} else {
|
|
4160
|
+
errorMessage = `Dependency "${missingDeps[0]}" was never executed - check that the invocation ID exists and is spelled correctly`;
|
|
4161
|
+
}
|
|
4162
|
+
this.logger[logLevel]("Gadget has unresolvable dependencies", {
|
|
4163
|
+
gadgetName: call.gadgetName,
|
|
4164
|
+
invocationId,
|
|
4165
|
+
circularDependencies: circularDeps,
|
|
4166
|
+
missingDependencies: trulyMissingDeps
|
|
4167
|
+
});
|
|
4168
|
+
this.failedInvocations.add(invocationId);
|
|
4169
|
+
const skipEvent = {
|
|
4170
|
+
type: "gadget_skipped",
|
|
4171
|
+
gadgetName: call.gadgetName,
|
|
4172
|
+
invocationId,
|
|
4173
|
+
parameters: call.parameters ?? {},
|
|
4174
|
+
failedDependency: missingDeps[0],
|
|
4175
|
+
failedDependencyError: errorMessage
|
|
4176
|
+
};
|
|
4177
|
+
events.push(skipEvent);
|
|
4178
|
+
}
|
|
4179
|
+
this.pendingGadgets.clear();
|
|
4180
|
+
}
|
|
4181
|
+
return events;
|
|
4182
|
+
}
|
|
3621
4183
|
/**
|
|
3622
4184
|
* Safely execute an observer, catching and logging any errors.
|
|
3623
4185
|
* Observers are non-critical, so errors are logged but don't crash the system.
|
|
@@ -4055,9 +4617,9 @@ var init_agent = __esm({
|
|
|
4055
4617
|
if (msg.role === "user") {
|
|
4056
4618
|
this.conversation.addUserMessage(msg.content);
|
|
4057
4619
|
} else if (msg.role === "assistant") {
|
|
4058
|
-
this.conversation.addAssistantMessage(msg.content);
|
|
4620
|
+
this.conversation.addAssistantMessage(extractText(msg.content));
|
|
4059
4621
|
} else if (msg.role === "system") {
|
|
4060
|
-
this.conversation.addUserMessage(`[System] ${msg.content}`);
|
|
4622
|
+
this.conversation.addUserMessage(`[System] ${extractText(msg.content)}`);
|
|
4061
4623
|
}
|
|
4062
4624
|
}
|
|
4063
4625
|
}
|
|
@@ -4636,6 +5198,7 @@ var init_anthropic = __esm({
|
|
|
4636
5198
|
"src/providers/anthropic.ts"() {
|
|
4637
5199
|
"use strict";
|
|
4638
5200
|
import_sdk = __toESM(require("@anthropic-ai/sdk"), 1);
|
|
5201
|
+
init_messages();
|
|
4639
5202
|
init_anthropic_models();
|
|
4640
5203
|
init_base_provider();
|
|
4641
5204
|
init_constants2();
|
|
@@ -4648,11 +5211,33 @@ var init_anthropic = __esm({
|
|
|
4648
5211
|
getModelSpecs() {
|
|
4649
5212
|
return ANTHROPIC_MODELS;
|
|
4650
5213
|
}
|
|
5214
|
+
// =========================================================================
|
|
5215
|
+
// Image Generation (Not Supported)
|
|
5216
|
+
// =========================================================================
|
|
5217
|
+
supportsImageGeneration(_modelId) {
|
|
5218
|
+
return false;
|
|
5219
|
+
}
|
|
5220
|
+
async generateImage() {
|
|
5221
|
+
throw new Error(
|
|
5222
|
+
"Anthropic does not support image generation. Use OpenAI (DALL-E, GPT Image) or Google Gemini (Imagen) instead."
|
|
5223
|
+
);
|
|
5224
|
+
}
|
|
5225
|
+
// =========================================================================
|
|
5226
|
+
// Speech Generation (Not Supported)
|
|
5227
|
+
// =========================================================================
|
|
5228
|
+
supportsSpeechGeneration(_modelId) {
|
|
5229
|
+
return false;
|
|
5230
|
+
}
|
|
5231
|
+
async generateSpeech() {
|
|
5232
|
+
throw new Error(
|
|
5233
|
+
"Anthropic does not support speech generation. Use OpenAI (TTS) or Google Gemini (TTS) instead."
|
|
5234
|
+
);
|
|
5235
|
+
}
|
|
4651
5236
|
buildRequestPayload(options, descriptor, spec, messages) {
|
|
4652
5237
|
const systemMessages = messages.filter((message) => message.role === "system");
|
|
4653
5238
|
const system = systemMessages.length > 0 ? systemMessages.map((m, index) => ({
|
|
4654
5239
|
type: "text",
|
|
4655
|
-
text: m.content,
|
|
5240
|
+
text: extractText(m.content),
|
|
4656
5241
|
// Add cache_control to the LAST system message block
|
|
4657
5242
|
...index === systemMessages.length - 1 ? { cache_control: { type: "ephemeral" } } : {}
|
|
4658
5243
|
})) : void 0;
|
|
@@ -4665,14 +5250,10 @@ var init_anthropic = __esm({
|
|
|
4665
5250
|
);
|
|
4666
5251
|
const conversation = nonSystemMessages.map((message, index) => ({
|
|
4667
5252
|
role: message.role,
|
|
4668
|
-
content:
|
|
4669
|
-
|
|
4670
|
-
|
|
4671
|
-
|
|
4672
|
-
// Add cache_control to the LAST user message
|
|
4673
|
-
...message.role === "user" && index === lastUserIndex ? { cache_control: { type: "ephemeral" } } : {}
|
|
4674
|
-
}
|
|
4675
|
-
]
|
|
5253
|
+
content: this.convertToAnthropicContent(
|
|
5254
|
+
message.content,
|
|
5255
|
+
message.role === "user" && index === lastUserIndex
|
|
5256
|
+
)
|
|
4676
5257
|
}));
|
|
4677
5258
|
const defaultMaxTokens = spec?.maxOutputTokens ?? ANTHROPIC_DEFAULT_MAX_OUTPUT_TOKENS;
|
|
4678
5259
|
const payload = {
|
|
@@ -4688,16 +5269,62 @@ var init_anthropic = __esm({
|
|
|
4688
5269
|
};
|
|
4689
5270
|
return payload;
|
|
4690
5271
|
}
|
|
4691
|
-
|
|
4692
|
-
|
|
4693
|
-
|
|
4694
|
-
|
|
4695
|
-
|
|
4696
|
-
|
|
4697
|
-
|
|
4698
|
-
|
|
4699
|
-
|
|
4700
|
-
|
|
5272
|
+
/**
|
|
5273
|
+
* Convert llmist content to Anthropic's content block format.
|
|
5274
|
+
* Handles text, images (base64 only), and applies cache_control.
|
|
5275
|
+
*/
|
|
5276
|
+
convertToAnthropicContent(content, addCacheControl) {
|
|
5277
|
+
const parts = normalizeContent(content);
|
|
5278
|
+
return parts.map((part, index) => {
|
|
5279
|
+
const isLastPart = index === parts.length - 1;
|
|
5280
|
+
const cacheControl = addCacheControl && isLastPart ? { cache_control: { type: "ephemeral" } } : {};
|
|
5281
|
+
if (part.type === "text") {
|
|
5282
|
+
return {
|
|
5283
|
+
type: "text",
|
|
5284
|
+
text: part.text,
|
|
5285
|
+
...cacheControl
|
|
5286
|
+
};
|
|
5287
|
+
}
|
|
5288
|
+
if (part.type === "image") {
|
|
5289
|
+
return this.convertImagePart(part, cacheControl);
|
|
5290
|
+
}
|
|
5291
|
+
if (part.type === "audio") {
|
|
5292
|
+
throw new Error(
|
|
5293
|
+
"Anthropic does not support audio input. Use Google Gemini for audio processing."
|
|
5294
|
+
);
|
|
5295
|
+
}
|
|
5296
|
+
throw new Error(`Unsupported content type: ${part.type}`);
|
|
5297
|
+
});
|
|
5298
|
+
}
|
|
5299
|
+
/**
|
|
5300
|
+
* Convert an image content part to Anthropic's image block format.
|
|
5301
|
+
*/
|
|
5302
|
+
convertImagePart(part, cacheControl) {
|
|
5303
|
+
if (part.source.type === "url") {
|
|
5304
|
+
throw new Error(
|
|
5305
|
+
"Anthropic does not support image URLs. Please provide base64-encoded image data instead."
|
|
5306
|
+
);
|
|
5307
|
+
}
|
|
5308
|
+
return {
|
|
5309
|
+
type: "image",
|
|
5310
|
+
source: {
|
|
5311
|
+
type: "base64",
|
|
5312
|
+
media_type: part.source.mediaType,
|
|
5313
|
+
data: part.source.data
|
|
5314
|
+
},
|
|
5315
|
+
...cacheControl
|
|
5316
|
+
};
|
|
5317
|
+
}
|
|
5318
|
+
async executeStreamRequest(payload, signal) {
|
|
5319
|
+
const client = this.client;
|
|
5320
|
+
const stream2 = await client.messages.create(payload, signal ? { signal } : void 0);
|
|
5321
|
+
return stream2;
|
|
5322
|
+
}
|
|
5323
|
+
async *wrapStream(iterable) {
|
|
5324
|
+
const stream2 = iterable;
|
|
5325
|
+
let inputTokens = 0;
|
|
5326
|
+
let cachedInputTokens = 0;
|
|
5327
|
+
let cacheCreationInputTokens = 0;
|
|
4701
5328
|
for await (const event of stream2) {
|
|
4702
5329
|
if (event.type === "message_start") {
|
|
4703
5330
|
const usage = event.message.usage;
|
|
@@ -4770,17 +5397,12 @@ var init_anthropic = __esm({
|
|
|
4770
5397
|
async countTokens(messages, descriptor, _spec) {
|
|
4771
5398
|
const client = this.client;
|
|
4772
5399
|
const systemMessages = messages.filter((message) => message.role === "system");
|
|
4773
|
-
const system = systemMessages.length > 0 ? systemMessages.map((m) => m.content).join("\n\n") : void 0;
|
|
5400
|
+
const system = systemMessages.length > 0 ? systemMessages.map((m) => extractText(m.content)).join("\n\n") : void 0;
|
|
4774
5401
|
const conversation = messages.filter(
|
|
4775
5402
|
(message) => message.role !== "system"
|
|
4776
5403
|
).map((message) => ({
|
|
4777
5404
|
role: message.role,
|
|
4778
|
-
content:
|
|
4779
|
-
{
|
|
4780
|
-
type: "text",
|
|
4781
|
-
text: message.content
|
|
4782
|
-
}
|
|
4783
|
-
]
|
|
5405
|
+
content: this.convertToAnthropicContent(message.content, false)
|
|
4784
5406
|
}));
|
|
4785
5407
|
try {
|
|
4786
5408
|
const response = await client.messages.countTokens({
|
|
@@ -4794,14 +5416,201 @@ var init_anthropic = __esm({
|
|
|
4794
5416
|
`Token counting failed for ${descriptor.name}, using fallback estimation:`,
|
|
4795
5417
|
error
|
|
4796
5418
|
);
|
|
4797
|
-
|
|
4798
|
-
|
|
5419
|
+
let totalChars = 0;
|
|
5420
|
+
let imageCount = 0;
|
|
5421
|
+
for (const msg of messages) {
|
|
5422
|
+
const parts = normalizeContent(msg.content);
|
|
5423
|
+
for (const part of parts) {
|
|
5424
|
+
if (part.type === "text") {
|
|
5425
|
+
totalChars += part.text.length;
|
|
5426
|
+
} else if (part.type === "image") {
|
|
5427
|
+
imageCount++;
|
|
5428
|
+
}
|
|
5429
|
+
}
|
|
5430
|
+
}
|
|
5431
|
+
return Math.ceil(totalChars / FALLBACK_CHARS_PER_TOKEN) + imageCount * 1e3;
|
|
4799
5432
|
}
|
|
4800
5433
|
}
|
|
4801
5434
|
};
|
|
4802
5435
|
}
|
|
4803
5436
|
});
|
|
4804
5437
|
|
|
5438
|
+
// src/providers/gemini-image-models.ts
|
|
5439
|
+
function getGeminiImageModelSpec(modelId) {
|
|
5440
|
+
return geminiImageModels.find((m) => m.modelId === modelId);
|
|
5441
|
+
}
|
|
5442
|
+
function isGeminiImageModel(modelId) {
|
|
5443
|
+
return geminiImageModels.some((m) => m.modelId === modelId);
|
|
5444
|
+
}
|
|
5445
|
+
function calculateGeminiImageCost(modelId, size = "1:1", n = 1) {
|
|
5446
|
+
const spec = getGeminiImageModelSpec(modelId);
|
|
5447
|
+
if (!spec) return void 0;
|
|
5448
|
+
if (spec.pricing.perImage !== void 0) {
|
|
5449
|
+
return spec.pricing.perImage * n;
|
|
5450
|
+
}
|
|
5451
|
+
if (spec.pricing.bySize) {
|
|
5452
|
+
const sizePrice = spec.pricing.bySize[size];
|
|
5453
|
+
if (typeof sizePrice === "number") {
|
|
5454
|
+
return sizePrice * n;
|
|
5455
|
+
}
|
|
5456
|
+
}
|
|
5457
|
+
return void 0;
|
|
5458
|
+
}
|
|
5459
|
+
var IMAGEN4_ASPECT_RATIOS, GEMINI_IMAGE_ASPECT_RATIOS, geminiImageModels;
|
|
5460
|
+
var init_gemini_image_models = __esm({
|
|
5461
|
+
"src/providers/gemini-image-models.ts"() {
|
|
5462
|
+
"use strict";
|
|
5463
|
+
IMAGEN4_ASPECT_RATIOS = ["1:1", "3:4", "4:3", "9:16", "16:9"];
|
|
5464
|
+
GEMINI_IMAGE_ASPECT_RATIOS = ["1:1", "3:4", "4:3", "9:16", "16:9"];
|
|
5465
|
+
geminiImageModels = [
|
|
5466
|
+
// Imagen 4 Family (standalone image generation)
|
|
5467
|
+
{
|
|
5468
|
+
provider: "gemini",
|
|
5469
|
+
modelId: "imagen-4.0-fast-generate-001",
|
|
5470
|
+
displayName: "Imagen 4 Fast",
|
|
5471
|
+
pricing: {
|
|
5472
|
+
perImage: 0.02
|
|
5473
|
+
},
|
|
5474
|
+
supportedSizes: [...IMAGEN4_ASPECT_RATIOS],
|
|
5475
|
+
maxImages: 4,
|
|
5476
|
+
defaultSize: "1:1",
|
|
5477
|
+
features: {
|
|
5478
|
+
textRendering: true
|
|
5479
|
+
}
|
|
5480
|
+
},
|
|
5481
|
+
{
|
|
5482
|
+
provider: "gemini",
|
|
5483
|
+
modelId: "imagen-4.0-generate-001",
|
|
5484
|
+
displayName: "Imagen 4",
|
|
5485
|
+
pricing: {
|
|
5486
|
+
perImage: 0.04
|
|
5487
|
+
},
|
|
5488
|
+
supportedSizes: [...IMAGEN4_ASPECT_RATIOS],
|
|
5489
|
+
maxImages: 4,
|
|
5490
|
+
defaultSize: "1:1",
|
|
5491
|
+
features: {
|
|
5492
|
+
textRendering: true
|
|
5493
|
+
}
|
|
5494
|
+
},
|
|
5495
|
+
{
|
|
5496
|
+
provider: "gemini",
|
|
5497
|
+
modelId: "imagen-4.0-ultra-generate-001",
|
|
5498
|
+
displayName: "Imagen 4 Ultra",
|
|
5499
|
+
pricing: {
|
|
5500
|
+
perImage: 0.06
|
|
5501
|
+
},
|
|
5502
|
+
supportedSizes: [...IMAGEN4_ASPECT_RATIOS],
|
|
5503
|
+
maxImages: 4,
|
|
5504
|
+
defaultSize: "1:1",
|
|
5505
|
+
features: {
|
|
5506
|
+
textRendering: true
|
|
5507
|
+
}
|
|
5508
|
+
},
|
|
5509
|
+
// Preview versions
|
|
5510
|
+
{
|
|
5511
|
+
provider: "gemini",
|
|
5512
|
+
modelId: "imagen-4.0-generate-preview-06-06",
|
|
5513
|
+
displayName: "Imagen 4 (Preview)",
|
|
5514
|
+
pricing: {
|
|
5515
|
+
perImage: 0.04
|
|
5516
|
+
},
|
|
5517
|
+
supportedSizes: [...IMAGEN4_ASPECT_RATIOS],
|
|
5518
|
+
maxImages: 4,
|
|
5519
|
+
defaultSize: "1:1",
|
|
5520
|
+
features: {
|
|
5521
|
+
textRendering: true
|
|
5522
|
+
}
|
|
5523
|
+
},
|
|
5524
|
+
{
|
|
5525
|
+
provider: "gemini",
|
|
5526
|
+
modelId: "imagen-4.0-ultra-generate-preview-06-06",
|
|
5527
|
+
displayName: "Imagen 4 Ultra (Preview)",
|
|
5528
|
+
pricing: {
|
|
5529
|
+
perImage: 0.06
|
|
5530
|
+
},
|
|
5531
|
+
supportedSizes: [...IMAGEN4_ASPECT_RATIOS],
|
|
5532
|
+
maxImages: 4,
|
|
5533
|
+
defaultSize: "1:1",
|
|
5534
|
+
features: {
|
|
5535
|
+
textRendering: true
|
|
5536
|
+
}
|
|
5537
|
+
},
|
|
5538
|
+
// Gemini Native Image Generation (multimodal models)
|
|
5539
|
+
{
|
|
5540
|
+
provider: "gemini",
|
|
5541
|
+
modelId: "gemini-2.5-flash-image",
|
|
5542
|
+
displayName: "Gemini 2.5 Flash Image",
|
|
5543
|
+
pricing: {
|
|
5544
|
+
perImage: 0.039
|
|
5545
|
+
},
|
|
5546
|
+
supportedSizes: [...GEMINI_IMAGE_ASPECT_RATIOS],
|
|
5547
|
+
maxImages: 1,
|
|
5548
|
+
defaultSize: "1:1",
|
|
5549
|
+
features: {
|
|
5550
|
+
conversational: true,
|
|
5551
|
+
textRendering: true
|
|
5552
|
+
}
|
|
5553
|
+
},
|
|
5554
|
+
{
|
|
5555
|
+
provider: "gemini",
|
|
5556
|
+
modelId: "gemini-2.5-flash-image-preview",
|
|
5557
|
+
displayName: "Gemini 2.5 Flash Image (Preview)",
|
|
5558
|
+
pricing: {
|
|
5559
|
+
perImage: 0.039
|
|
5560
|
+
},
|
|
5561
|
+
supportedSizes: [...GEMINI_IMAGE_ASPECT_RATIOS],
|
|
5562
|
+
maxImages: 1,
|
|
5563
|
+
defaultSize: "1:1",
|
|
5564
|
+
features: {
|
|
5565
|
+
conversational: true,
|
|
5566
|
+
textRendering: true
|
|
5567
|
+
}
|
|
5568
|
+
},
|
|
5569
|
+
{
|
|
5570
|
+
provider: "gemini",
|
|
5571
|
+
modelId: "gemini-3-pro-image-preview",
|
|
5572
|
+
displayName: "Gemini 3 Pro Image (Preview)",
|
|
5573
|
+
pricing: {
|
|
5574
|
+
// Token-based: ~$0.134 per 1K/2K image, $0.24 per 4K
|
|
5575
|
+
// Using 2K as default
|
|
5576
|
+
bySize: {
|
|
5577
|
+
"1K": 0.134,
|
|
5578
|
+
"2K": 0.134,
|
|
5579
|
+
"4K": 0.24
|
|
5580
|
+
}
|
|
5581
|
+
},
|
|
5582
|
+
supportedSizes: ["1K", "2K", "4K"],
|
|
5583
|
+
maxImages: 1,
|
|
5584
|
+
defaultSize: "2K",
|
|
5585
|
+
features: {
|
|
5586
|
+
conversational: true,
|
|
5587
|
+
textRendering: true
|
|
5588
|
+
}
|
|
5589
|
+
},
|
|
5590
|
+
// Alias: nano-banana-pro-preview is gemini-3-pro-image-preview
|
|
5591
|
+
{
|
|
5592
|
+
provider: "gemini",
|
|
5593
|
+
modelId: "nano-banana-pro-preview",
|
|
5594
|
+
displayName: "Nano Banana Pro (Gemini 3 Pro Image)",
|
|
5595
|
+
pricing: {
|
|
5596
|
+
bySize: {
|
|
5597
|
+
"1K": 0.134,
|
|
5598
|
+
"2K": 0.134,
|
|
5599
|
+
"4K": 0.24
|
|
5600
|
+
}
|
|
5601
|
+
},
|
|
5602
|
+
supportedSizes: ["1K", "2K", "4K"],
|
|
5603
|
+
maxImages: 1,
|
|
5604
|
+
defaultSize: "2K",
|
|
5605
|
+
features: {
|
|
5606
|
+
conversational: true,
|
|
5607
|
+
textRendering: true
|
|
5608
|
+
}
|
|
5609
|
+
}
|
|
5610
|
+
];
|
|
5611
|
+
}
|
|
5612
|
+
});
|
|
5613
|
+
|
|
4805
5614
|
// src/providers/gemini-models.ts
|
|
4806
5615
|
var GEMINI_MODELS;
|
|
4807
5616
|
var init_gemini_models = __esm({
|
|
@@ -4975,7 +5784,171 @@ var init_gemini_models = __esm({
|
|
|
4975
5784
|
}
|
|
4976
5785
|
});
|
|
4977
5786
|
|
|
5787
|
+
// src/providers/gemini-speech-models.ts
|
|
5788
|
+
function getGeminiSpeechModelSpec(modelId) {
|
|
5789
|
+
return geminiSpeechModels.find((m) => m.modelId === modelId);
|
|
5790
|
+
}
|
|
5791
|
+
function isGeminiSpeechModel(modelId) {
|
|
5792
|
+
return geminiSpeechModels.some((m) => m.modelId === modelId);
|
|
5793
|
+
}
|
|
5794
|
+
function calculateGeminiSpeechCost(modelId, characterCount, estimatedMinutes) {
|
|
5795
|
+
const spec = getGeminiSpeechModelSpec(modelId);
|
|
5796
|
+
if (!spec) return void 0;
|
|
5797
|
+
if (spec.pricing.perMinute !== void 0) {
|
|
5798
|
+
if (estimatedMinutes !== void 0) {
|
|
5799
|
+
return estimatedMinutes * spec.pricing.perMinute;
|
|
5800
|
+
}
|
|
5801
|
+
const approxMinutes = characterCount / 750;
|
|
5802
|
+
return approxMinutes * spec.pricing.perMinute;
|
|
5803
|
+
}
|
|
5804
|
+
return void 0;
|
|
5805
|
+
}
|
|
5806
|
+
var GEMINI_TTS_VOICES, GEMINI_TTS_FORMATS, geminiSpeechModels;
|
|
5807
|
+
var init_gemini_speech_models = __esm({
|
|
5808
|
+
"src/providers/gemini-speech-models.ts"() {
|
|
5809
|
+
"use strict";
|
|
5810
|
+
GEMINI_TTS_VOICES = [
|
|
5811
|
+
"Zephyr",
|
|
5812
|
+
// Bright
|
|
5813
|
+
"Puck",
|
|
5814
|
+
// Upbeat
|
|
5815
|
+
"Charon",
|
|
5816
|
+
// Informative
|
|
5817
|
+
"Kore",
|
|
5818
|
+
// Firm
|
|
5819
|
+
"Fenrir",
|
|
5820
|
+
// Excitable
|
|
5821
|
+
"Leda",
|
|
5822
|
+
// Youthful
|
|
5823
|
+
"Orus",
|
|
5824
|
+
// Firm
|
|
5825
|
+
"Aoede",
|
|
5826
|
+
// Breezy
|
|
5827
|
+
"Callirrhoe",
|
|
5828
|
+
// Easy-going
|
|
5829
|
+
"Autonoe",
|
|
5830
|
+
// Bright
|
|
5831
|
+
"Enceladus",
|
|
5832
|
+
// Breathy
|
|
5833
|
+
"Iapetus",
|
|
5834
|
+
// Clear
|
|
5835
|
+
"Umbriel",
|
|
5836
|
+
// Easy-going
|
|
5837
|
+
"Algieba",
|
|
5838
|
+
// Smooth
|
|
5839
|
+
"Despina",
|
|
5840
|
+
// Smooth
|
|
5841
|
+
"Erinome",
|
|
5842
|
+
// Clear
|
|
5843
|
+
"Algenib",
|
|
5844
|
+
// Gravelly
|
|
5845
|
+
"Rasalgethi",
|
|
5846
|
+
// Informative
|
|
5847
|
+
"Laomedeia",
|
|
5848
|
+
// Upbeat
|
|
5849
|
+
"Achernar",
|
|
5850
|
+
// Soft
|
|
5851
|
+
"Alnilam",
|
|
5852
|
+
// Firm
|
|
5853
|
+
"Schedar",
|
|
5854
|
+
// Even
|
|
5855
|
+
"Gacrux",
|
|
5856
|
+
// Mature
|
|
5857
|
+
"Pulcherrima",
|
|
5858
|
+
// Forward
|
|
5859
|
+
"Achird",
|
|
5860
|
+
// Friendly
|
|
5861
|
+
"Zubenelgenubi",
|
|
5862
|
+
// Casual
|
|
5863
|
+
"Vindemiatrix",
|
|
5864
|
+
// Gentle
|
|
5865
|
+
"Sadachbia",
|
|
5866
|
+
// Lively
|
|
5867
|
+
"Sadaltager",
|
|
5868
|
+
// Knowledgeable
|
|
5869
|
+
"Sulafat"
|
|
5870
|
+
// Warm
|
|
5871
|
+
];
|
|
5872
|
+
GEMINI_TTS_FORMATS = ["pcm", "wav"];
|
|
5873
|
+
geminiSpeechModels = [
|
|
5874
|
+
{
|
|
5875
|
+
provider: "gemini",
|
|
5876
|
+
modelId: "gemini-2.5-flash-preview-tts",
|
|
5877
|
+
displayName: "Gemini 2.5 Flash TTS (Preview)",
|
|
5878
|
+
pricing: {
|
|
5879
|
+
// $0.50 per 1M input tokens = $0.0000005 per token
|
|
5880
|
+
perInputToken: 5e-7,
|
|
5881
|
+
// $10.00 per 1M audio output tokens = $0.00001 per token
|
|
5882
|
+
perAudioOutputToken: 1e-5,
|
|
5883
|
+
// Rough estimate: ~$0.01 per minute of audio
|
|
5884
|
+
perMinute: 0.01
|
|
5885
|
+
},
|
|
5886
|
+
voices: [...GEMINI_TTS_VOICES],
|
|
5887
|
+
formats: GEMINI_TTS_FORMATS,
|
|
5888
|
+
maxInputLength: 8e3,
|
|
5889
|
+
// bytes (text + prompt combined)
|
|
5890
|
+
defaultVoice: "Zephyr",
|
|
5891
|
+
defaultFormat: "wav",
|
|
5892
|
+
features: {
|
|
5893
|
+
multiSpeaker: true,
|
|
5894
|
+
languages: 24,
|
|
5895
|
+
voiceInstructions: true
|
|
5896
|
+
}
|
|
5897
|
+
},
|
|
5898
|
+
{
|
|
5899
|
+
provider: "gemini",
|
|
5900
|
+
modelId: "gemini-2.5-pro-preview-tts",
|
|
5901
|
+
displayName: "Gemini 2.5 Pro TTS (Preview)",
|
|
5902
|
+
pricing: {
|
|
5903
|
+
// $1.00 per 1M input tokens = $0.000001 per token
|
|
5904
|
+
perInputToken: 1e-6,
|
|
5905
|
+
// $20.00 per 1M audio output tokens = $0.00002 per token
|
|
5906
|
+
perAudioOutputToken: 2e-5,
|
|
5907
|
+
// Rough estimate: ~$0.02 per minute of audio
|
|
5908
|
+
perMinute: 0.02
|
|
5909
|
+
},
|
|
5910
|
+
voices: [...GEMINI_TTS_VOICES],
|
|
5911
|
+
formats: GEMINI_TTS_FORMATS,
|
|
5912
|
+
maxInputLength: 8e3,
|
|
5913
|
+
// bytes
|
|
5914
|
+
defaultVoice: "Zephyr",
|
|
5915
|
+
defaultFormat: "wav",
|
|
5916
|
+
features: {
|
|
5917
|
+
multiSpeaker: true,
|
|
5918
|
+
languages: 24,
|
|
5919
|
+
voiceInstructions: true
|
|
5920
|
+
}
|
|
5921
|
+
}
|
|
5922
|
+
];
|
|
5923
|
+
}
|
|
5924
|
+
});
|
|
5925
|
+
|
|
4978
5926
|
// src/providers/gemini.ts
|
|
5927
|
+
function wrapPcmInWav(pcmData, sampleRate, bitsPerSample, numChannels) {
|
|
5928
|
+
const byteRate = sampleRate * numChannels * bitsPerSample / 8;
|
|
5929
|
+
const blockAlign = numChannels * bitsPerSample / 8;
|
|
5930
|
+
const dataSize = pcmData.length;
|
|
5931
|
+
const headerSize = 44;
|
|
5932
|
+
const fileSize = headerSize + dataSize - 8;
|
|
5933
|
+
const buffer = new ArrayBuffer(headerSize + dataSize);
|
|
5934
|
+
const view = new DataView(buffer);
|
|
5935
|
+
const uint8 = new Uint8Array(buffer);
|
|
5936
|
+
view.setUint32(0, 1380533830, false);
|
|
5937
|
+
view.setUint32(4, fileSize, true);
|
|
5938
|
+
view.setUint32(8, 1463899717, false);
|
|
5939
|
+
view.setUint32(12, 1718449184, false);
|
|
5940
|
+
view.setUint32(16, 16, true);
|
|
5941
|
+
view.setUint16(20, 1, true);
|
|
5942
|
+
view.setUint16(22, numChannels, true);
|
|
5943
|
+
view.setUint32(24, sampleRate, true);
|
|
5944
|
+
view.setUint32(28, byteRate, true);
|
|
5945
|
+
view.setUint16(32, blockAlign, true);
|
|
5946
|
+
view.setUint16(34, bitsPerSample, true);
|
|
5947
|
+
view.setUint32(36, 1684108385, false);
|
|
5948
|
+
view.setUint32(40, dataSize, true);
|
|
5949
|
+
uint8.set(pcmData, headerSize);
|
|
5950
|
+
return buffer;
|
|
5951
|
+
}
|
|
4979
5952
|
function createGeminiProviderFromEnv() {
|
|
4980
5953
|
return createProviderFromEnv("GEMINI_API_KEY", import_genai.GoogleGenAI, GeminiGenerativeProvider);
|
|
4981
5954
|
}
|
|
@@ -4984,9 +5957,12 @@ var init_gemini = __esm({
|
|
|
4984
5957
|
"src/providers/gemini.ts"() {
|
|
4985
5958
|
"use strict";
|
|
4986
5959
|
import_genai = require("@google/genai");
|
|
5960
|
+
init_messages();
|
|
4987
5961
|
init_base_provider();
|
|
4988
5962
|
init_constants2();
|
|
5963
|
+
init_gemini_image_models();
|
|
4989
5964
|
init_gemini_models();
|
|
5965
|
+
init_gemini_speech_models();
|
|
4990
5966
|
init_utils();
|
|
4991
5967
|
GEMINI_ROLE_MAP = {
|
|
4992
5968
|
system: "user",
|
|
@@ -5001,6 +5977,139 @@ var init_gemini = __esm({
|
|
|
5001
5977
|
getModelSpecs() {
|
|
5002
5978
|
return GEMINI_MODELS;
|
|
5003
5979
|
}
|
|
5980
|
+
// =========================================================================
|
|
5981
|
+
// Image Generation
|
|
5982
|
+
// =========================================================================
|
|
5983
|
+
getImageModelSpecs() {
|
|
5984
|
+
return geminiImageModels;
|
|
5985
|
+
}
|
|
5986
|
+
supportsImageGeneration(modelId) {
|
|
5987
|
+
return isGeminiImageModel(modelId);
|
|
5988
|
+
}
|
|
5989
|
+
async generateImage(options) {
|
|
5990
|
+
const client = this.client;
|
|
5991
|
+
const spec = getGeminiImageModelSpec(options.model);
|
|
5992
|
+
const isImagenModel = options.model.startsWith("imagen");
|
|
5993
|
+
const aspectRatio = options.size ?? spec?.defaultSize ?? "1:1";
|
|
5994
|
+
const n = options.n ?? 1;
|
|
5995
|
+
if (isImagenModel) {
|
|
5996
|
+
const response2 = await client.models.generateImages({
|
|
5997
|
+
model: options.model,
|
|
5998
|
+
prompt: options.prompt,
|
|
5999
|
+
config: {
|
|
6000
|
+
numberOfImages: n,
|
|
6001
|
+
aspectRatio,
|
|
6002
|
+
outputMimeType: options.responseFormat === "b64_json" ? "image/png" : "image/jpeg"
|
|
6003
|
+
}
|
|
6004
|
+
});
|
|
6005
|
+
const images2 = response2.generatedImages ?? [];
|
|
6006
|
+
const cost2 = calculateGeminiImageCost(options.model, aspectRatio, images2.length);
|
|
6007
|
+
return {
|
|
6008
|
+
// Gemini's imageBytes is already base64 encoded, so use it directly
|
|
6009
|
+
images: images2.map((img) => ({
|
|
6010
|
+
b64Json: img.image?.imageBytes ?? void 0
|
|
6011
|
+
})),
|
|
6012
|
+
model: options.model,
|
|
6013
|
+
usage: {
|
|
6014
|
+
imagesGenerated: images2.length,
|
|
6015
|
+
size: aspectRatio,
|
|
6016
|
+
quality: "standard"
|
|
6017
|
+
},
|
|
6018
|
+
cost: cost2
|
|
6019
|
+
};
|
|
6020
|
+
}
|
|
6021
|
+
const response = await client.models.generateContent({
|
|
6022
|
+
model: options.model,
|
|
6023
|
+
contents: [{ role: "user", parts: [{ text: options.prompt }] }],
|
|
6024
|
+
config: {
|
|
6025
|
+
responseModalities: [import_genai.Modality.IMAGE, import_genai.Modality.TEXT]
|
|
6026
|
+
}
|
|
6027
|
+
});
|
|
6028
|
+
const images = [];
|
|
6029
|
+
const candidate = response.candidates?.[0];
|
|
6030
|
+
if (candidate?.content?.parts) {
|
|
6031
|
+
for (const part of candidate.content.parts) {
|
|
6032
|
+
if ("inlineData" in part && part.inlineData) {
|
|
6033
|
+
images.push({
|
|
6034
|
+
b64Json: part.inlineData.data
|
|
6035
|
+
});
|
|
6036
|
+
}
|
|
6037
|
+
}
|
|
6038
|
+
}
|
|
6039
|
+
const cost = calculateGeminiImageCost(options.model, aspectRatio, images.length);
|
|
6040
|
+
return {
|
|
6041
|
+
images,
|
|
6042
|
+
model: options.model,
|
|
6043
|
+
usage: {
|
|
6044
|
+
imagesGenerated: images.length,
|
|
6045
|
+
size: aspectRatio,
|
|
6046
|
+
quality: "standard"
|
|
6047
|
+
},
|
|
6048
|
+
cost
|
|
6049
|
+
};
|
|
6050
|
+
}
|
|
6051
|
+
// =========================================================================
|
|
6052
|
+
// Speech Generation
|
|
6053
|
+
// =========================================================================
|
|
6054
|
+
getSpeechModelSpecs() {
|
|
6055
|
+
return geminiSpeechModels;
|
|
6056
|
+
}
|
|
6057
|
+
supportsSpeechGeneration(modelId) {
|
|
6058
|
+
return isGeminiSpeechModel(modelId);
|
|
6059
|
+
}
|
|
6060
|
+
async generateSpeech(options) {
|
|
6061
|
+
const client = this.client;
|
|
6062
|
+
const spec = getGeminiSpeechModelSpec(options.model);
|
|
6063
|
+
const voice = options.voice ?? spec?.defaultVoice ?? "Zephyr";
|
|
6064
|
+
const response = await client.models.generateContent({
|
|
6065
|
+
model: options.model,
|
|
6066
|
+
contents: [
|
|
6067
|
+
{
|
|
6068
|
+
role: "user",
|
|
6069
|
+
parts: [{ text: options.input }]
|
|
6070
|
+
}
|
|
6071
|
+
],
|
|
6072
|
+
config: {
|
|
6073
|
+
responseModalities: [import_genai.Modality.AUDIO],
|
|
6074
|
+
speechConfig: {
|
|
6075
|
+
voiceConfig: {
|
|
6076
|
+
prebuiltVoiceConfig: {
|
|
6077
|
+
voiceName: voice
|
|
6078
|
+
}
|
|
6079
|
+
}
|
|
6080
|
+
}
|
|
6081
|
+
}
|
|
6082
|
+
});
|
|
6083
|
+
let pcmData;
|
|
6084
|
+
const candidate = response.candidates?.[0];
|
|
6085
|
+
if (candidate?.content?.parts) {
|
|
6086
|
+
for (const part of candidate.content.parts) {
|
|
6087
|
+
if ("inlineData" in part && part.inlineData?.data) {
|
|
6088
|
+
const base64 = part.inlineData.data;
|
|
6089
|
+
const binary = atob(base64);
|
|
6090
|
+
pcmData = new Uint8Array(binary.length);
|
|
6091
|
+
for (let i = 0; i < binary.length; i++) {
|
|
6092
|
+
pcmData[i] = binary.charCodeAt(i);
|
|
6093
|
+
}
|
|
6094
|
+
break;
|
|
6095
|
+
}
|
|
6096
|
+
}
|
|
6097
|
+
}
|
|
6098
|
+
if (!pcmData) {
|
|
6099
|
+
throw new Error("No audio data in Gemini TTS response");
|
|
6100
|
+
}
|
|
6101
|
+
const audioData = wrapPcmInWav(pcmData, 24e3, 16, 1);
|
|
6102
|
+
const cost = calculateGeminiSpeechCost(options.model, options.input.length);
|
|
6103
|
+
return {
|
|
6104
|
+
audio: audioData,
|
|
6105
|
+
model: options.model,
|
|
6106
|
+
usage: {
|
|
6107
|
+
characterCount: options.input.length
|
|
6108
|
+
},
|
|
6109
|
+
cost,
|
|
6110
|
+
format: spec?.defaultFormat ?? "wav"
|
|
6111
|
+
};
|
|
6112
|
+
}
|
|
5004
6113
|
buildRequestPayload(options, descriptor, _spec, messages) {
|
|
5005
6114
|
const contents = this.convertMessagesToContents(messages);
|
|
5006
6115
|
const generationConfig = this.buildGenerationConfig(options);
|
|
@@ -5018,7 +6127,7 @@ var init_gemini = __esm({
|
|
|
5018
6127
|
};
|
|
5019
6128
|
return {
|
|
5020
6129
|
model: descriptor.name,
|
|
5021
|
-
contents
|
|
6130
|
+
contents,
|
|
5022
6131
|
config
|
|
5023
6132
|
};
|
|
5024
6133
|
}
|
|
@@ -5053,18 +6162,25 @@ var init_gemini = __esm({
|
|
|
5053
6162
|
if (message.role === "system") {
|
|
5054
6163
|
expandedMessages.push({
|
|
5055
6164
|
role: "user",
|
|
5056
|
-
content: message.content
|
|
6165
|
+
content: extractText(message.content)
|
|
5057
6166
|
});
|
|
5058
6167
|
expandedMessages.push({
|
|
5059
6168
|
role: "assistant",
|
|
5060
6169
|
content: "Understood."
|
|
5061
6170
|
});
|
|
5062
6171
|
} else {
|
|
5063
|
-
expandedMessages.push(
|
|
6172
|
+
expandedMessages.push({
|
|
6173
|
+
role: message.role,
|
|
6174
|
+
content: message.content
|
|
6175
|
+
});
|
|
5064
6176
|
}
|
|
5065
6177
|
}
|
|
5066
6178
|
return this.mergeConsecutiveMessages(expandedMessages);
|
|
5067
6179
|
}
|
|
6180
|
+
/**
|
|
6181
|
+
* Merge consecutive messages with the same role (required by Gemini).
|
|
6182
|
+
* Handles multimodal content by converting to Gemini's part format.
|
|
6183
|
+
*/
|
|
5068
6184
|
mergeConsecutiveMessages(messages) {
|
|
5069
6185
|
if (messages.length === 0) {
|
|
5070
6186
|
return [];
|
|
@@ -5073,15 +6189,16 @@ var init_gemini = __esm({
|
|
|
5073
6189
|
let currentGroup = null;
|
|
5074
6190
|
for (const message of messages) {
|
|
5075
6191
|
const geminiRole = GEMINI_ROLE_MAP[message.role];
|
|
6192
|
+
const geminiParts = this.convertToGeminiParts(message.content);
|
|
5076
6193
|
if (currentGroup && currentGroup.role === geminiRole) {
|
|
5077
|
-
currentGroup.parts.push(
|
|
6194
|
+
currentGroup.parts.push(...geminiParts);
|
|
5078
6195
|
} else {
|
|
5079
6196
|
if (currentGroup) {
|
|
5080
6197
|
result.push(currentGroup);
|
|
5081
6198
|
}
|
|
5082
6199
|
currentGroup = {
|
|
5083
6200
|
role: geminiRole,
|
|
5084
|
-
parts:
|
|
6201
|
+
parts: geminiParts
|
|
5085
6202
|
};
|
|
5086
6203
|
}
|
|
5087
6204
|
}
|
|
@@ -5090,11 +6207,39 @@ var init_gemini = __esm({
|
|
|
5090
6207
|
}
|
|
5091
6208
|
return result;
|
|
5092
6209
|
}
|
|
5093
|
-
|
|
5094
|
-
|
|
5095
|
-
|
|
5096
|
-
|
|
5097
|
-
|
|
6210
|
+
/**
|
|
6211
|
+
* Convert llmist content to Gemini's part format.
|
|
6212
|
+
* Handles text, images, and audio (Gemini supports all three).
|
|
6213
|
+
*/
|
|
6214
|
+
convertToGeminiParts(content) {
|
|
6215
|
+
const parts = normalizeContent(content);
|
|
6216
|
+
return parts.map((part) => {
|
|
6217
|
+
if (part.type === "text") {
|
|
6218
|
+
return { text: part.text };
|
|
6219
|
+
}
|
|
6220
|
+
if (part.type === "image") {
|
|
6221
|
+
if (part.source.type === "url") {
|
|
6222
|
+
throw new Error(
|
|
6223
|
+
"Gemini does not support image URLs directly. Please provide base64-encoded image data."
|
|
6224
|
+
);
|
|
6225
|
+
}
|
|
6226
|
+
return {
|
|
6227
|
+
inlineData: {
|
|
6228
|
+
mimeType: part.source.mediaType,
|
|
6229
|
+
data: part.source.data
|
|
6230
|
+
}
|
|
6231
|
+
};
|
|
6232
|
+
}
|
|
6233
|
+
if (part.type === "audio") {
|
|
6234
|
+
return {
|
|
6235
|
+
inlineData: {
|
|
6236
|
+
mimeType: part.source.mediaType,
|
|
6237
|
+
data: part.source.data
|
|
6238
|
+
}
|
|
6239
|
+
};
|
|
6240
|
+
}
|
|
6241
|
+
throw new Error(`Unsupported content type: ${part.type}`);
|
|
6242
|
+
});
|
|
5098
6243
|
}
|
|
5099
6244
|
buildGenerationConfig(options) {
|
|
5100
6245
|
const config = {};
|
|
@@ -5115,9 +6260,9 @@ var init_gemini = __esm({
|
|
|
5115
6260
|
async *wrapStream(iterable) {
|
|
5116
6261
|
const stream2 = iterable;
|
|
5117
6262
|
for await (const chunk of stream2) {
|
|
5118
|
-
const
|
|
5119
|
-
if (
|
|
5120
|
-
yield { text, rawEvent: chunk };
|
|
6263
|
+
const text3 = this.extractText(chunk);
|
|
6264
|
+
if (text3) {
|
|
6265
|
+
yield { text: text3, rawEvent: chunk };
|
|
5121
6266
|
}
|
|
5122
6267
|
const finishReason = this.extractFinishReason(chunk);
|
|
5123
6268
|
const usage = this.extractUsage(chunk);
|
|
@@ -5178,7 +6323,7 @@ var init_gemini = __esm({
|
|
|
5178
6323
|
try {
|
|
5179
6324
|
const response = await client.models.countTokens({
|
|
5180
6325
|
model: descriptor.name,
|
|
5181
|
-
contents
|
|
6326
|
+
contents
|
|
5182
6327
|
// Note: systemInstruction not used - it's not supported by countTokens()
|
|
5183
6328
|
// and would cause a 2100% token counting error
|
|
5184
6329
|
});
|
|
@@ -5188,14 +6333,140 @@ var init_gemini = __esm({
|
|
|
5188
6333
|
`Token counting failed for ${descriptor.name}, using fallback estimation:`,
|
|
5189
6334
|
error
|
|
5190
6335
|
);
|
|
5191
|
-
|
|
5192
|
-
|
|
6336
|
+
let totalChars = 0;
|
|
6337
|
+
let mediaCount = 0;
|
|
6338
|
+
for (const msg of messages) {
|
|
6339
|
+
const parts = normalizeContent(msg.content);
|
|
6340
|
+
for (const part of parts) {
|
|
6341
|
+
if (part.type === "text") {
|
|
6342
|
+
totalChars += part.text.length;
|
|
6343
|
+
} else if (part.type === "image" || part.type === "audio") {
|
|
6344
|
+
mediaCount++;
|
|
6345
|
+
}
|
|
6346
|
+
}
|
|
6347
|
+
}
|
|
6348
|
+
return Math.ceil(totalChars / FALLBACK_CHARS_PER_TOKEN) + mediaCount * 258;
|
|
5193
6349
|
}
|
|
5194
6350
|
}
|
|
5195
6351
|
};
|
|
5196
6352
|
}
|
|
5197
6353
|
});
|
|
5198
6354
|
|
|
6355
|
+
// src/providers/openai-image-models.ts
|
|
6356
|
+
function getOpenAIImageModelSpec(modelId) {
|
|
6357
|
+
return openaiImageModels.find((m) => m.modelId === modelId);
|
|
6358
|
+
}
|
|
6359
|
+
function isOpenAIImageModel(modelId) {
|
|
6360
|
+
return openaiImageModels.some((m) => m.modelId === modelId);
|
|
6361
|
+
}
|
|
6362
|
+
function calculateOpenAIImageCost(modelId, size, quality = "standard", n = 1) {
|
|
6363
|
+
const spec = getOpenAIImageModelSpec(modelId);
|
|
6364
|
+
if (!spec) return void 0;
|
|
6365
|
+
const sizePrice = spec.pricing.bySize?.[size];
|
|
6366
|
+
if (sizePrice === void 0) return void 0;
|
|
6367
|
+
let pricePerImage;
|
|
6368
|
+
if (typeof sizePrice === "number") {
|
|
6369
|
+
pricePerImage = sizePrice;
|
|
6370
|
+
} else {
|
|
6371
|
+
pricePerImage = sizePrice[quality];
|
|
6372
|
+
if (pricePerImage === void 0) return void 0;
|
|
6373
|
+
}
|
|
6374
|
+
return pricePerImage * n;
|
|
6375
|
+
}
|
|
6376
|
+
var GPT_IMAGE_SIZES, GPT_IMAGE_QUALITIES, DALLE3_SIZES, DALLE3_QUALITIES, DALLE2_SIZES, openaiImageModels;
|
|
6377
|
+
var init_openai_image_models = __esm({
|
|
6378
|
+
"src/providers/openai-image-models.ts"() {
|
|
6379
|
+
"use strict";
|
|
6380
|
+
GPT_IMAGE_SIZES = ["1024x1024", "1024x1536", "1536x1024"];
|
|
6381
|
+
GPT_IMAGE_QUALITIES = ["low", "medium", "high"];
|
|
6382
|
+
DALLE3_SIZES = ["1024x1024", "1024x1792", "1792x1024"];
|
|
6383
|
+
DALLE3_QUALITIES = ["standard", "hd"];
|
|
6384
|
+
DALLE2_SIZES = ["256x256", "512x512", "1024x1024"];
|
|
6385
|
+
openaiImageModels = [
|
|
6386
|
+
// GPT Image 1 Family (flagship)
|
|
6387
|
+
{
|
|
6388
|
+
provider: "openai",
|
|
6389
|
+
modelId: "gpt-image-1",
|
|
6390
|
+
displayName: "GPT Image 1",
|
|
6391
|
+
pricing: {
|
|
6392
|
+
bySize: {
|
|
6393
|
+
"1024x1024": { low: 0.011, medium: 0.04, high: 0.17 },
|
|
6394
|
+
"1024x1536": { low: 0.016, medium: 0.06, high: 0.25 },
|
|
6395
|
+
"1536x1024": { low: 0.016, medium: 0.06, high: 0.25 }
|
|
6396
|
+
}
|
|
6397
|
+
},
|
|
6398
|
+
supportedSizes: [...GPT_IMAGE_SIZES],
|
|
6399
|
+
supportedQualities: [...GPT_IMAGE_QUALITIES],
|
|
6400
|
+
maxImages: 1,
|
|
6401
|
+
defaultSize: "1024x1024",
|
|
6402
|
+
defaultQuality: "medium",
|
|
6403
|
+
features: {
|
|
6404
|
+
textRendering: true,
|
|
6405
|
+
transparency: true
|
|
6406
|
+
}
|
|
6407
|
+
},
|
|
6408
|
+
{
|
|
6409
|
+
provider: "openai",
|
|
6410
|
+
modelId: "gpt-image-1-mini",
|
|
6411
|
+
displayName: "GPT Image 1 Mini",
|
|
6412
|
+
pricing: {
|
|
6413
|
+
bySize: {
|
|
6414
|
+
"1024x1024": { low: 5e-3, medium: 0.02, high: 0.052 },
|
|
6415
|
+
"1024x1536": { low: 75e-4, medium: 0.03, high: 0.078 },
|
|
6416
|
+
"1536x1024": { low: 75e-4, medium: 0.03, high: 0.078 }
|
|
6417
|
+
}
|
|
6418
|
+
},
|
|
6419
|
+
supportedSizes: [...GPT_IMAGE_SIZES],
|
|
6420
|
+
supportedQualities: [...GPT_IMAGE_QUALITIES],
|
|
6421
|
+
maxImages: 1,
|
|
6422
|
+
defaultSize: "1024x1024",
|
|
6423
|
+
defaultQuality: "medium",
|
|
6424
|
+
features: {
|
|
6425
|
+
textRendering: true,
|
|
6426
|
+
transparency: true
|
|
6427
|
+
}
|
|
6428
|
+
},
|
|
6429
|
+
// DALL-E Family
|
|
6430
|
+
{
|
|
6431
|
+
provider: "openai",
|
|
6432
|
+
modelId: "dall-e-3",
|
|
6433
|
+
displayName: "DALL-E 3",
|
|
6434
|
+
pricing: {
|
|
6435
|
+
bySize: {
|
|
6436
|
+
"1024x1024": { standard: 0.04, hd: 0.08 },
|
|
6437
|
+
"1024x1792": { standard: 0.08, hd: 0.12 },
|
|
6438
|
+
"1792x1024": { standard: 0.08, hd: 0.12 }
|
|
6439
|
+
}
|
|
6440
|
+
},
|
|
6441
|
+
supportedSizes: [...DALLE3_SIZES],
|
|
6442
|
+
supportedQualities: [...DALLE3_QUALITIES],
|
|
6443
|
+
maxImages: 1,
|
|
6444
|
+
// DALL-E 3 only supports n=1
|
|
6445
|
+
defaultSize: "1024x1024",
|
|
6446
|
+
defaultQuality: "standard",
|
|
6447
|
+
features: {
|
|
6448
|
+
textRendering: true
|
|
6449
|
+
}
|
|
6450
|
+
},
|
|
6451
|
+
{
|
|
6452
|
+
provider: "openai",
|
|
6453
|
+
modelId: "dall-e-2",
|
|
6454
|
+
displayName: "DALL-E 2 (Legacy)",
|
|
6455
|
+
pricing: {
|
|
6456
|
+
bySize: {
|
|
6457
|
+
"256x256": 0.016,
|
|
6458
|
+
"512x512": 0.018,
|
|
6459
|
+
"1024x1024": 0.02
|
|
6460
|
+
}
|
|
6461
|
+
},
|
|
6462
|
+
supportedSizes: [...DALLE2_SIZES],
|
|
6463
|
+
maxImages: 10,
|
|
6464
|
+
defaultSize: "1024x1024"
|
|
6465
|
+
}
|
|
6466
|
+
];
|
|
6467
|
+
}
|
|
6468
|
+
});
|
|
6469
|
+
|
|
5199
6470
|
// src/providers/openai-models.ts
|
|
5200
6471
|
var OPENAI_MODELS;
|
|
5201
6472
|
var init_openai_models = __esm({
|
|
@@ -5560,15 +6831,153 @@ var init_openai_models = __esm({
|
|
|
5560
6831
|
}
|
|
5561
6832
|
});
|
|
5562
6833
|
|
|
5563
|
-
// src/providers/openai.ts
|
|
5564
|
-
function
|
|
5565
|
-
|
|
5566
|
-
|
|
6834
|
+
// src/providers/openai-speech-models.ts
|
|
6835
|
+
function getOpenAISpeechModelSpec(modelId) {
|
|
6836
|
+
return openaiSpeechModels.find((m) => m.modelId === modelId);
|
|
6837
|
+
}
|
|
6838
|
+
function isOpenAISpeechModel(modelId) {
|
|
6839
|
+
return openaiSpeechModels.some((m) => m.modelId === modelId);
|
|
6840
|
+
}
|
|
6841
|
+
function calculateOpenAISpeechCost(modelId, characterCount, estimatedMinutes) {
|
|
6842
|
+
const spec = getOpenAISpeechModelSpec(modelId);
|
|
6843
|
+
if (!spec) return void 0;
|
|
6844
|
+
if (spec.pricing.perCharacter !== void 0) {
|
|
6845
|
+
return characterCount * spec.pricing.perCharacter;
|
|
5567
6846
|
}
|
|
5568
|
-
if (
|
|
5569
|
-
return
|
|
6847
|
+
if (spec.pricing.perMinute !== void 0 && estimatedMinutes !== void 0) {
|
|
6848
|
+
return estimatedMinutes * spec.pricing.perMinute;
|
|
5570
6849
|
}
|
|
5571
|
-
|
|
6850
|
+
if (spec.pricing.perMinute !== void 0) {
|
|
6851
|
+
const approxMinutes = characterCount / 750;
|
|
6852
|
+
return approxMinutes * spec.pricing.perMinute;
|
|
6853
|
+
}
|
|
6854
|
+
return void 0;
|
|
6855
|
+
}
|
|
6856
|
+
var OPENAI_TTS_VOICES, OPENAI_TTS_EXTENDED_VOICES, OPENAI_TTS_FORMATS, openaiSpeechModels;
|
|
6857
|
+
var init_openai_speech_models = __esm({
|
|
6858
|
+
"src/providers/openai-speech-models.ts"() {
|
|
6859
|
+
"use strict";
|
|
6860
|
+
OPENAI_TTS_VOICES = [
|
|
6861
|
+
"alloy",
|
|
6862
|
+
"echo",
|
|
6863
|
+
"fable",
|
|
6864
|
+
"onyx",
|
|
6865
|
+
"nova",
|
|
6866
|
+
"shimmer"
|
|
6867
|
+
];
|
|
6868
|
+
OPENAI_TTS_EXTENDED_VOICES = [
|
|
6869
|
+
...OPENAI_TTS_VOICES,
|
|
6870
|
+
"ash",
|
|
6871
|
+
"ballad",
|
|
6872
|
+
"coral",
|
|
6873
|
+
"sage",
|
|
6874
|
+
"verse"
|
|
6875
|
+
];
|
|
6876
|
+
OPENAI_TTS_FORMATS = ["mp3", "opus", "aac", "flac", "wav", "pcm"];
|
|
6877
|
+
openaiSpeechModels = [
|
|
6878
|
+
// Standard TTS models (character-based pricing)
|
|
6879
|
+
{
|
|
6880
|
+
provider: "openai",
|
|
6881
|
+
modelId: "tts-1",
|
|
6882
|
+
displayName: "TTS-1",
|
|
6883
|
+
pricing: {
|
|
6884
|
+
// $15 per 1M characters = $0.000015 per character
|
|
6885
|
+
perCharacter: 15e-6
|
|
6886
|
+
},
|
|
6887
|
+
voices: [...OPENAI_TTS_VOICES],
|
|
6888
|
+
formats: OPENAI_TTS_FORMATS,
|
|
6889
|
+
maxInputLength: 4096,
|
|
6890
|
+
defaultVoice: "alloy",
|
|
6891
|
+
defaultFormat: "mp3",
|
|
6892
|
+
features: {
|
|
6893
|
+
voiceInstructions: false
|
|
6894
|
+
}
|
|
6895
|
+
},
|
|
6896
|
+
{
|
|
6897
|
+
provider: "openai",
|
|
6898
|
+
modelId: "tts-1-1106",
|
|
6899
|
+
displayName: "TTS-1 (Nov 2023)",
|
|
6900
|
+
pricing: {
|
|
6901
|
+
perCharacter: 15e-6
|
|
6902
|
+
},
|
|
6903
|
+
voices: [...OPENAI_TTS_VOICES],
|
|
6904
|
+
formats: OPENAI_TTS_FORMATS,
|
|
6905
|
+
maxInputLength: 4096,
|
|
6906
|
+
defaultVoice: "alloy",
|
|
6907
|
+
defaultFormat: "mp3",
|
|
6908
|
+
features: {
|
|
6909
|
+
voiceInstructions: false
|
|
6910
|
+
}
|
|
6911
|
+
},
|
|
6912
|
+
{
|
|
6913
|
+
provider: "openai",
|
|
6914
|
+
modelId: "tts-1-hd",
|
|
6915
|
+
displayName: "TTS-1 HD",
|
|
6916
|
+
pricing: {
|
|
6917
|
+
// $30 per 1M characters = $0.00003 per character
|
|
6918
|
+
perCharacter: 3e-5
|
|
6919
|
+
},
|
|
6920
|
+
voices: [...OPENAI_TTS_VOICES],
|
|
6921
|
+
formats: OPENAI_TTS_FORMATS,
|
|
6922
|
+
maxInputLength: 4096,
|
|
6923
|
+
defaultVoice: "alloy",
|
|
6924
|
+
defaultFormat: "mp3",
|
|
6925
|
+
features: {
|
|
6926
|
+
voiceInstructions: false
|
|
6927
|
+
}
|
|
6928
|
+
},
|
|
6929
|
+
{
|
|
6930
|
+
provider: "openai",
|
|
6931
|
+
modelId: "tts-1-hd-1106",
|
|
6932
|
+
displayName: "TTS-1 HD (Nov 2023)",
|
|
6933
|
+
pricing: {
|
|
6934
|
+
perCharacter: 3e-5
|
|
6935
|
+
},
|
|
6936
|
+
voices: [...OPENAI_TTS_VOICES],
|
|
6937
|
+
formats: OPENAI_TTS_FORMATS,
|
|
6938
|
+
maxInputLength: 4096,
|
|
6939
|
+
defaultVoice: "alloy",
|
|
6940
|
+
defaultFormat: "mp3",
|
|
6941
|
+
features: {
|
|
6942
|
+
voiceInstructions: false
|
|
6943
|
+
}
|
|
6944
|
+
},
|
|
6945
|
+
// Token-based TTS model with voice instructions support
|
|
6946
|
+
{
|
|
6947
|
+
provider: "openai",
|
|
6948
|
+
modelId: "gpt-4o-mini-tts",
|
|
6949
|
+
displayName: "GPT-4o Mini TTS",
|
|
6950
|
+
pricing: {
|
|
6951
|
+
// $0.60 per 1M input tokens = $0.0000006 per token
|
|
6952
|
+
perInputToken: 6e-7,
|
|
6953
|
+
// $12 per 1M audio output tokens = $0.000012 per token
|
|
6954
|
+
perAudioOutputToken: 12e-6,
|
|
6955
|
+
// ~$0.015 per minute of audio
|
|
6956
|
+
perMinute: 0.015
|
|
6957
|
+
},
|
|
6958
|
+
voices: [...OPENAI_TTS_EXTENDED_VOICES],
|
|
6959
|
+
formats: OPENAI_TTS_FORMATS,
|
|
6960
|
+
maxInputLength: 2e3,
|
|
6961
|
+
// tokens, not characters
|
|
6962
|
+
defaultVoice: "alloy",
|
|
6963
|
+
defaultFormat: "mp3",
|
|
6964
|
+
features: {
|
|
6965
|
+
voiceInstructions: true
|
|
6966
|
+
}
|
|
6967
|
+
}
|
|
6968
|
+
];
|
|
6969
|
+
}
|
|
6970
|
+
});
|
|
6971
|
+
|
|
6972
|
+
// src/providers/openai.ts
|
|
6973
|
+
function sanitizeExtra(extra, allowTemperature) {
|
|
6974
|
+
if (!extra) {
|
|
6975
|
+
return void 0;
|
|
6976
|
+
}
|
|
6977
|
+
if (allowTemperature || !Object.hasOwn(extra, "temperature")) {
|
|
6978
|
+
return extra;
|
|
6979
|
+
}
|
|
6980
|
+
return Object.fromEntries(Object.entries(extra).filter(([key]) => key !== "temperature"));
|
|
5572
6981
|
}
|
|
5573
6982
|
function createOpenAIProviderFromEnv() {
|
|
5574
6983
|
return createProviderFromEnv("OPENAI_API_KEY", import_openai.default, OpenAIChatProvider);
|
|
@@ -5579,9 +6988,12 @@ var init_openai = __esm({
|
|
|
5579
6988
|
"use strict";
|
|
5580
6989
|
import_openai = __toESM(require("openai"), 1);
|
|
5581
6990
|
import_tiktoken = require("tiktoken");
|
|
6991
|
+
init_messages();
|
|
5582
6992
|
init_base_provider();
|
|
5583
6993
|
init_constants2();
|
|
6994
|
+
init_openai_image_models();
|
|
5584
6995
|
init_openai_models();
|
|
6996
|
+
init_openai_speech_models();
|
|
5585
6997
|
init_utils();
|
|
5586
6998
|
ROLE_MAP = {
|
|
5587
6999
|
system: "system",
|
|
@@ -5596,6 +7008,87 @@ var init_openai = __esm({
|
|
|
5596
7008
|
getModelSpecs() {
|
|
5597
7009
|
return OPENAI_MODELS;
|
|
5598
7010
|
}
|
|
7011
|
+
// =========================================================================
|
|
7012
|
+
// Image Generation
|
|
7013
|
+
// =========================================================================
|
|
7014
|
+
getImageModelSpecs() {
|
|
7015
|
+
return openaiImageModels;
|
|
7016
|
+
}
|
|
7017
|
+
supportsImageGeneration(modelId) {
|
|
7018
|
+
return isOpenAIImageModel(modelId);
|
|
7019
|
+
}
|
|
7020
|
+
async generateImage(options) {
|
|
7021
|
+
const client = this.client;
|
|
7022
|
+
const spec = getOpenAIImageModelSpec(options.model);
|
|
7023
|
+
const size = options.size ?? spec?.defaultSize ?? "1024x1024";
|
|
7024
|
+
const quality = options.quality ?? spec?.defaultQuality ?? "standard";
|
|
7025
|
+
const n = options.n ?? 1;
|
|
7026
|
+
const isDallE2 = options.model === "dall-e-2";
|
|
7027
|
+
const isGptImage = options.model.startsWith("gpt-image");
|
|
7028
|
+
const requestParams = {
|
|
7029
|
+
model: options.model,
|
|
7030
|
+
prompt: options.prompt,
|
|
7031
|
+
size,
|
|
7032
|
+
n
|
|
7033
|
+
};
|
|
7034
|
+
if (!isDallE2 && !isGptImage) {
|
|
7035
|
+
requestParams.quality = quality;
|
|
7036
|
+
}
|
|
7037
|
+
if (isGptImage) {
|
|
7038
|
+
} else if (!isDallE2) {
|
|
7039
|
+
requestParams.response_format = options.responseFormat ?? "url";
|
|
7040
|
+
}
|
|
7041
|
+
const response = await client.images.generate(requestParams);
|
|
7042
|
+
const cost = calculateOpenAIImageCost(options.model, size, quality, n);
|
|
7043
|
+
const images = response.data ?? [];
|
|
7044
|
+
return {
|
|
7045
|
+
images: images.map((img) => ({
|
|
7046
|
+
url: img.url,
|
|
7047
|
+
b64Json: img.b64_json,
|
|
7048
|
+
revisedPrompt: img.revised_prompt
|
|
7049
|
+
})),
|
|
7050
|
+
model: options.model,
|
|
7051
|
+
usage: {
|
|
7052
|
+
imagesGenerated: images.length,
|
|
7053
|
+
size,
|
|
7054
|
+
quality
|
|
7055
|
+
},
|
|
7056
|
+
cost
|
|
7057
|
+
};
|
|
7058
|
+
}
|
|
7059
|
+
// =========================================================================
|
|
7060
|
+
// Speech Generation
|
|
7061
|
+
// =========================================================================
|
|
7062
|
+
getSpeechModelSpecs() {
|
|
7063
|
+
return openaiSpeechModels;
|
|
7064
|
+
}
|
|
7065
|
+
supportsSpeechGeneration(modelId) {
|
|
7066
|
+
return isOpenAISpeechModel(modelId);
|
|
7067
|
+
}
|
|
7068
|
+
async generateSpeech(options) {
|
|
7069
|
+
const client = this.client;
|
|
7070
|
+
const spec = getOpenAISpeechModelSpec(options.model);
|
|
7071
|
+
const format = options.responseFormat ?? spec?.defaultFormat ?? "mp3";
|
|
7072
|
+
const voice = options.voice ?? spec?.defaultVoice ?? "alloy";
|
|
7073
|
+
const response = await client.audio.speech.create({
|
|
7074
|
+
model: options.model,
|
|
7075
|
+
input: options.input,
|
|
7076
|
+
voice,
|
|
7077
|
+
response_format: format,
|
|
7078
|
+
speed: options.speed ?? 1
|
|
7079
|
+
});
|
|
7080
|
+
const audioBuffer = await response.arrayBuffer();
|
|
7081
|
+
const cost = calculateOpenAISpeechCost(options.model, options.input.length);
|
|
7082
|
+
return {
|
|
7083
|
+
audio: audioBuffer,
|
|
7084
|
+
model: options.model,
|
|
7085
|
+
usage: {
|
|
7086
|
+
characterCount: options.input.length
|
|
7087
|
+
},
|
|
7088
|
+
cost,
|
|
7089
|
+
format
|
|
7090
|
+
};
|
|
7091
|
+
}
|
|
5599
7092
|
buildRequestPayload(options, descriptor, spec, messages) {
|
|
5600
7093
|
const { maxTokens, temperature, topP, stopSequences, extra } = options;
|
|
5601
7094
|
const supportsTemperature = spec?.metadata?.supportsTemperature !== false;
|
|
@@ -5603,11 +7096,7 @@ var init_openai = __esm({
|
|
|
5603
7096
|
const sanitizedExtra = sanitizeExtra(extra, shouldIncludeTemperature);
|
|
5604
7097
|
return {
|
|
5605
7098
|
model: descriptor.name,
|
|
5606
|
-
messages: messages.map((message) => (
|
|
5607
|
-
role: ROLE_MAP[message.role],
|
|
5608
|
-
content: message.content,
|
|
5609
|
-
name: message.name
|
|
5610
|
-
})),
|
|
7099
|
+
messages: messages.map((message) => this.convertToOpenAIMessage(message)),
|
|
5611
7100
|
// Only set max_completion_tokens if explicitly provided
|
|
5612
7101
|
// Otherwise let the API use "as much as fits" in the context window
|
|
5613
7102
|
...maxTokens !== void 0 ? { max_completion_tokens: maxTokens } : {},
|
|
@@ -5619,6 +7108,77 @@ var init_openai = __esm({
|
|
|
5619
7108
|
...shouldIncludeTemperature ? { temperature } : {}
|
|
5620
7109
|
};
|
|
5621
7110
|
}
|
|
7111
|
+
/**
|
|
7112
|
+
* Convert an LLMMessage to OpenAI's ChatCompletionMessageParam.
|
|
7113
|
+
* Handles role-specific content type requirements:
|
|
7114
|
+
* - system/assistant: string content only
|
|
7115
|
+
* - user: string or multimodal array content
|
|
7116
|
+
*/
|
|
7117
|
+
convertToOpenAIMessage(message) {
|
|
7118
|
+
const role = ROLE_MAP[message.role];
|
|
7119
|
+
if (role === "user") {
|
|
7120
|
+
const content = this.convertToOpenAIContent(message.content);
|
|
7121
|
+
return {
|
|
7122
|
+
role: "user",
|
|
7123
|
+
content,
|
|
7124
|
+
...message.name ? { name: message.name } : {}
|
|
7125
|
+
};
|
|
7126
|
+
}
|
|
7127
|
+
const textContent = typeof message.content === "string" ? message.content : extractText(message.content);
|
|
7128
|
+
if (role === "system") {
|
|
7129
|
+
return {
|
|
7130
|
+
role: "system",
|
|
7131
|
+
content: textContent,
|
|
7132
|
+
...message.name ? { name: message.name } : {}
|
|
7133
|
+
};
|
|
7134
|
+
}
|
|
7135
|
+
return {
|
|
7136
|
+
role: "assistant",
|
|
7137
|
+
content: textContent,
|
|
7138
|
+
...message.name ? { name: message.name } : {}
|
|
7139
|
+
};
|
|
7140
|
+
}
|
|
7141
|
+
/**
|
|
7142
|
+
* Convert llmist content to OpenAI's content format.
|
|
7143
|
+
* Optimizes by returning string for text-only content, array for multimodal.
|
|
7144
|
+
*/
|
|
7145
|
+
convertToOpenAIContent(content) {
|
|
7146
|
+
if (typeof content === "string") {
|
|
7147
|
+
return content;
|
|
7148
|
+
}
|
|
7149
|
+
return content.map((part) => {
|
|
7150
|
+
if (part.type === "text") {
|
|
7151
|
+
return { type: "text", text: part.text };
|
|
7152
|
+
}
|
|
7153
|
+
if (part.type === "image") {
|
|
7154
|
+
return this.convertImagePart(part);
|
|
7155
|
+
}
|
|
7156
|
+
if (part.type === "audio") {
|
|
7157
|
+
throw new Error(
|
|
7158
|
+
"OpenAI chat completions do not support audio input. Use Whisper for transcription or Gemini for audio understanding."
|
|
7159
|
+
);
|
|
7160
|
+
}
|
|
7161
|
+
throw new Error(`Unsupported content type: ${part.type}`);
|
|
7162
|
+
});
|
|
7163
|
+
}
|
|
7164
|
+
/**
|
|
7165
|
+
* Convert an image content part to OpenAI's image_url format.
|
|
7166
|
+
* Supports both URLs and base64 data URLs.
|
|
7167
|
+
*/
|
|
7168
|
+
convertImagePart(part) {
|
|
7169
|
+
if (part.source.type === "url") {
|
|
7170
|
+
return {
|
|
7171
|
+
type: "image_url",
|
|
7172
|
+
image_url: { url: part.source.url }
|
|
7173
|
+
};
|
|
7174
|
+
}
|
|
7175
|
+
return {
|
|
7176
|
+
type: "image_url",
|
|
7177
|
+
image_url: {
|
|
7178
|
+
url: `data:${part.source.mediaType};base64,${part.source.data}`
|
|
7179
|
+
}
|
|
7180
|
+
};
|
|
7181
|
+
}
|
|
5622
7182
|
async executeStreamRequest(payload, signal) {
|
|
5623
7183
|
const client = this.client;
|
|
5624
7184
|
const stream2 = await client.chat.completions.create(payload, signal ? { signal } : void 0);
|
|
@@ -5627,9 +7187,9 @@ var init_openai = __esm({
|
|
|
5627
7187
|
async *wrapStream(iterable) {
|
|
5628
7188
|
const stream2 = iterable;
|
|
5629
7189
|
for await (const chunk of stream2) {
|
|
5630
|
-
const
|
|
5631
|
-
if (
|
|
5632
|
-
yield { text, rawEvent: chunk };
|
|
7190
|
+
const text3 = chunk.choices.map((choice) => choice.delta?.content ?? "").join("");
|
|
7191
|
+
if (text3) {
|
|
7192
|
+
yield { text: text3, rawEvent: chunk };
|
|
5633
7193
|
}
|
|
5634
7194
|
const finishReason = chunk.choices.find((choice) => choice.finish_reason)?.finish_reason;
|
|
5635
7195
|
const usage = chunk.usage ? {
|
|
@@ -5677,17 +7237,26 @@ var init_openai = __esm({
|
|
|
5677
7237
|
}
|
|
5678
7238
|
try {
|
|
5679
7239
|
let tokenCount = 0;
|
|
7240
|
+
let imageCount = 0;
|
|
5680
7241
|
for (const message of messages) {
|
|
5681
7242
|
tokenCount += OPENAI_MESSAGE_OVERHEAD_TOKENS;
|
|
5682
7243
|
const roleText = ROLE_MAP[message.role];
|
|
5683
7244
|
tokenCount += encoding.encode(roleText).length;
|
|
5684
|
-
|
|
7245
|
+
const textContent = extractText(message.content);
|
|
7246
|
+
tokenCount += encoding.encode(textContent).length;
|
|
7247
|
+
const parts = normalizeContent(message.content);
|
|
7248
|
+
for (const part of parts) {
|
|
7249
|
+
if (part.type === "image") {
|
|
7250
|
+
imageCount++;
|
|
7251
|
+
}
|
|
7252
|
+
}
|
|
5685
7253
|
if (message.name) {
|
|
5686
7254
|
tokenCount += encoding.encode(message.name).length;
|
|
5687
7255
|
tokenCount += OPENAI_NAME_FIELD_OVERHEAD_TOKENS;
|
|
5688
7256
|
}
|
|
5689
7257
|
}
|
|
5690
7258
|
tokenCount += OPENAI_REPLY_PRIMING_TOKENS;
|
|
7259
|
+
tokenCount += imageCount * 765;
|
|
5691
7260
|
return tokenCount;
|
|
5692
7261
|
} finally {
|
|
5693
7262
|
encoding.free();
|
|
@@ -5697,8 +7266,19 @@ var init_openai = __esm({
|
|
|
5697
7266
|
`Token counting failed for ${descriptor.name}, using fallback estimation:`,
|
|
5698
7267
|
error
|
|
5699
7268
|
);
|
|
5700
|
-
|
|
5701
|
-
|
|
7269
|
+
let totalChars = 0;
|
|
7270
|
+
let imageCount = 0;
|
|
7271
|
+
for (const msg of messages) {
|
|
7272
|
+
const parts = normalizeContent(msg.content);
|
|
7273
|
+
for (const part of parts) {
|
|
7274
|
+
if (part.type === "text") {
|
|
7275
|
+
totalChars += part.text.length;
|
|
7276
|
+
} else if (part.type === "image") {
|
|
7277
|
+
imageCount++;
|
|
7278
|
+
}
|
|
7279
|
+
}
|
|
7280
|
+
}
|
|
7281
|
+
return Math.ceil(totalChars / FALLBACK_CHARS_PER_TOKEN) + imageCount * 765;
|
|
5702
7282
|
}
|
|
5703
7283
|
}
|
|
5704
7284
|
};
|
|
@@ -5886,51 +7466,368 @@ var init_model_registry = __esm({
|
|
|
5886
7466
|
* @param requestedTokens - Total tokens requested (input + output)
|
|
5887
7467
|
* @returns true if valid, false if model not found or exceeds limits
|
|
5888
7468
|
*/
|
|
5889
|
-
validateModelConfig(modelId, requestedTokens) {
|
|
5890
|
-
const limits = this.getModelLimits(modelId);
|
|
5891
|
-
if (!limits) return false;
|
|
5892
|
-
return requestedTokens <= limits.contextWindow;
|
|
7469
|
+
validateModelConfig(modelId, requestedTokens) {
|
|
7470
|
+
const limits = this.getModelLimits(modelId);
|
|
7471
|
+
if (!limits) return false;
|
|
7472
|
+
return requestedTokens <= limits.contextWindow;
|
|
7473
|
+
}
|
|
7474
|
+
/**
|
|
7475
|
+
* Check if a model supports a specific feature
|
|
7476
|
+
* @param modelId - Full model identifier
|
|
7477
|
+
* @param feature - Feature to check ('streaming', 'functionCalling', 'vision', etc.)
|
|
7478
|
+
* @returns true if model supports feature, false otherwise
|
|
7479
|
+
*/
|
|
7480
|
+
supportsFeature(modelId, feature) {
|
|
7481
|
+
const spec = this.getModelSpec(modelId);
|
|
7482
|
+
if (!spec) return false;
|
|
7483
|
+
return spec.features[feature] === true;
|
|
7484
|
+
}
|
|
7485
|
+
/**
|
|
7486
|
+
* Get all models that support a specific feature
|
|
7487
|
+
* @param feature - Feature to filter by
|
|
7488
|
+
* @param providerId - Optional provider ID to filter by
|
|
7489
|
+
* @returns Array of ModelSpec objects that support the feature
|
|
7490
|
+
*/
|
|
7491
|
+
getModelsByFeature(feature, providerId) {
|
|
7492
|
+
const models = this.listModels(providerId);
|
|
7493
|
+
return models.filter((model) => model.features[feature] === true);
|
|
7494
|
+
}
|
|
7495
|
+
/**
|
|
7496
|
+
* Get the most cost-effective model for a given provider and token budget
|
|
7497
|
+
* @param inputTokens - Expected input tokens
|
|
7498
|
+
* @param outputTokens - Expected output tokens
|
|
7499
|
+
* @param providerId - Optional provider ID to filter by
|
|
7500
|
+
* @returns ModelSpec with lowest total cost, or undefined if no models found
|
|
7501
|
+
*/
|
|
7502
|
+
getCheapestModel(inputTokens, outputTokens, providerId) {
|
|
7503
|
+
const models = this.listModels(providerId);
|
|
7504
|
+
if (models.length === 0) return void 0;
|
|
7505
|
+
let cheapest;
|
|
7506
|
+
for (const model of models) {
|
|
7507
|
+
const estimate = this.estimateCost(model.modelId, inputTokens, outputTokens);
|
|
7508
|
+
if (!estimate) continue;
|
|
7509
|
+
if (!cheapest || estimate.totalCost < cheapest.cost) {
|
|
7510
|
+
cheapest = { model, cost: estimate.totalCost };
|
|
7511
|
+
}
|
|
7512
|
+
}
|
|
7513
|
+
return cheapest?.model;
|
|
7514
|
+
}
|
|
7515
|
+
};
|
|
7516
|
+
}
|
|
7517
|
+
});
|
|
7518
|
+
|
|
7519
|
+
// src/core/namespaces/image.ts
|
|
7520
|
+
var ImageNamespace;
|
|
7521
|
+
var init_image = __esm({
|
|
7522
|
+
"src/core/namespaces/image.ts"() {
|
|
7523
|
+
"use strict";
|
|
7524
|
+
ImageNamespace = class {
|
|
7525
|
+
constructor(adapters, defaultProvider) {
|
|
7526
|
+
this.adapters = adapters;
|
|
7527
|
+
this.defaultProvider = defaultProvider;
|
|
7528
|
+
}
|
|
7529
|
+
/**
|
|
7530
|
+
* Generate images from a text prompt.
|
|
7531
|
+
*
|
|
7532
|
+
* @param options - Image generation options
|
|
7533
|
+
* @returns Promise resolving to the generation result with images and cost
|
|
7534
|
+
* @throws Error if the provider doesn't support image generation
|
|
7535
|
+
*/
|
|
7536
|
+
async generate(options) {
|
|
7537
|
+
const modelId = options.model;
|
|
7538
|
+
const adapter = this.findImageAdapter(modelId);
|
|
7539
|
+
if (!adapter || !adapter.generateImage) {
|
|
7540
|
+
throw new Error(
|
|
7541
|
+
`No provider supports image generation for model "${modelId}". Available image models: ${this.listModels().map((m) => m.modelId).join(", ")}`
|
|
7542
|
+
);
|
|
7543
|
+
}
|
|
7544
|
+
return adapter.generateImage(options);
|
|
7545
|
+
}
|
|
7546
|
+
/**
|
|
7547
|
+
* List all available image generation models.
|
|
7548
|
+
*/
|
|
7549
|
+
listModels() {
|
|
7550
|
+
const models = [];
|
|
7551
|
+
for (const adapter of this.adapters) {
|
|
7552
|
+
if (adapter.getImageModelSpecs) {
|
|
7553
|
+
models.push(...adapter.getImageModelSpecs());
|
|
7554
|
+
}
|
|
7555
|
+
}
|
|
7556
|
+
return models;
|
|
7557
|
+
}
|
|
7558
|
+
/**
|
|
7559
|
+
* Check if a model is supported for image generation.
|
|
7560
|
+
*/
|
|
7561
|
+
supportsModel(modelId) {
|
|
7562
|
+
return this.findImageAdapter(modelId) !== void 0;
|
|
7563
|
+
}
|
|
7564
|
+
findImageAdapter(modelId) {
|
|
7565
|
+
return this.adapters.find(
|
|
7566
|
+
(adapter) => adapter.supportsImageGeneration?.(modelId) ?? false
|
|
7567
|
+
);
|
|
7568
|
+
}
|
|
7569
|
+
};
|
|
7570
|
+
}
|
|
7571
|
+
});
|
|
7572
|
+
|
|
7573
|
+
// src/core/namespaces/speech.ts
|
|
7574
|
+
var SpeechNamespace;
|
|
7575
|
+
var init_speech = __esm({
|
|
7576
|
+
"src/core/namespaces/speech.ts"() {
|
|
7577
|
+
"use strict";
|
|
7578
|
+
SpeechNamespace = class {
|
|
7579
|
+
constructor(adapters, defaultProvider) {
|
|
7580
|
+
this.adapters = adapters;
|
|
7581
|
+
this.defaultProvider = defaultProvider;
|
|
7582
|
+
}
|
|
7583
|
+
/**
|
|
7584
|
+
* Generate speech audio from text.
|
|
7585
|
+
*
|
|
7586
|
+
* @param options - Speech generation options
|
|
7587
|
+
* @returns Promise resolving to the generation result with audio and cost
|
|
7588
|
+
* @throws Error if the provider doesn't support speech generation
|
|
7589
|
+
*/
|
|
7590
|
+
async generate(options) {
|
|
7591
|
+
const modelId = options.model;
|
|
7592
|
+
const adapter = this.findSpeechAdapter(modelId);
|
|
7593
|
+
if (!adapter || !adapter.generateSpeech) {
|
|
7594
|
+
throw new Error(
|
|
7595
|
+
`No provider supports speech generation for model "${modelId}". Available speech models: ${this.listModels().map((m) => m.modelId).join(", ")}`
|
|
7596
|
+
);
|
|
7597
|
+
}
|
|
7598
|
+
return adapter.generateSpeech(options);
|
|
7599
|
+
}
|
|
7600
|
+
/**
|
|
7601
|
+
* List all available speech generation models.
|
|
7602
|
+
*/
|
|
7603
|
+
listModels() {
|
|
7604
|
+
const models = [];
|
|
7605
|
+
for (const adapter of this.adapters) {
|
|
7606
|
+
if (adapter.getSpeechModelSpecs) {
|
|
7607
|
+
models.push(...adapter.getSpeechModelSpecs());
|
|
7608
|
+
}
|
|
7609
|
+
}
|
|
7610
|
+
return models;
|
|
7611
|
+
}
|
|
7612
|
+
/**
|
|
7613
|
+
* Check if a model is supported for speech generation.
|
|
7614
|
+
*/
|
|
7615
|
+
supportsModel(modelId) {
|
|
7616
|
+
return this.findSpeechAdapter(modelId) !== void 0;
|
|
7617
|
+
}
|
|
7618
|
+
findSpeechAdapter(modelId) {
|
|
7619
|
+
return this.adapters.find(
|
|
7620
|
+
(adapter) => adapter.supportsSpeechGeneration?.(modelId) ?? false
|
|
7621
|
+
);
|
|
7622
|
+
}
|
|
7623
|
+
};
|
|
7624
|
+
}
|
|
7625
|
+
});
|
|
7626
|
+
|
|
7627
|
+
// src/core/quick-methods.ts
|
|
7628
|
+
async function complete(client, prompt, options = {}) {
|
|
7629
|
+
const model = resolveModel(options.model ?? "gpt-5-nano");
|
|
7630
|
+
const builder = new LLMMessageBuilder();
|
|
7631
|
+
if (options.systemPrompt) {
|
|
7632
|
+
builder.addSystem(options.systemPrompt);
|
|
7633
|
+
}
|
|
7634
|
+
builder.addUser(prompt);
|
|
7635
|
+
let fullResponse = "";
|
|
7636
|
+
for await (const chunk of client.stream({
|
|
7637
|
+
model,
|
|
7638
|
+
messages: builder.build(),
|
|
7639
|
+
temperature: options.temperature,
|
|
7640
|
+
maxTokens: options.maxTokens
|
|
7641
|
+
})) {
|
|
7642
|
+
fullResponse += chunk.text;
|
|
7643
|
+
}
|
|
7644
|
+
return fullResponse.trim();
|
|
7645
|
+
}
|
|
7646
|
+
async function* stream(client, prompt, options = {}) {
|
|
7647
|
+
const model = resolveModel(options.model ?? "gpt-5-nano");
|
|
7648
|
+
const builder = new LLMMessageBuilder();
|
|
7649
|
+
if (options.systemPrompt) {
|
|
7650
|
+
builder.addSystem(options.systemPrompt);
|
|
7651
|
+
}
|
|
7652
|
+
builder.addUser(prompt);
|
|
7653
|
+
for await (const chunk of client.stream({
|
|
7654
|
+
model,
|
|
7655
|
+
messages: builder.build(),
|
|
7656
|
+
temperature: options.temperature,
|
|
7657
|
+
maxTokens: options.maxTokens
|
|
7658
|
+
})) {
|
|
7659
|
+
yield chunk.text;
|
|
7660
|
+
}
|
|
7661
|
+
}
|
|
7662
|
+
var init_quick_methods = __esm({
|
|
7663
|
+
"src/core/quick-methods.ts"() {
|
|
7664
|
+
"use strict";
|
|
7665
|
+
init_messages();
|
|
7666
|
+
init_model_shortcuts();
|
|
7667
|
+
}
|
|
7668
|
+
});
|
|
7669
|
+
|
|
7670
|
+
// src/core/namespaces/text.ts
|
|
7671
|
+
var TextNamespace;
|
|
7672
|
+
var init_text = __esm({
|
|
7673
|
+
"src/core/namespaces/text.ts"() {
|
|
7674
|
+
"use strict";
|
|
7675
|
+
init_quick_methods();
|
|
7676
|
+
TextNamespace = class {
|
|
7677
|
+
constructor(client) {
|
|
7678
|
+
this.client = client;
|
|
7679
|
+
}
|
|
7680
|
+
/**
|
|
7681
|
+
* Generate a complete text response.
|
|
7682
|
+
*
|
|
7683
|
+
* @param prompt - User prompt
|
|
7684
|
+
* @param options - Optional configuration
|
|
7685
|
+
* @returns Complete text response
|
|
7686
|
+
*/
|
|
7687
|
+
async complete(prompt, options) {
|
|
7688
|
+
return complete(this.client, prompt, options);
|
|
7689
|
+
}
|
|
7690
|
+
/**
|
|
7691
|
+
* Stream text chunks.
|
|
7692
|
+
*
|
|
7693
|
+
* @param prompt - User prompt
|
|
7694
|
+
* @param options - Optional configuration
|
|
7695
|
+
* @returns Async generator yielding text chunks
|
|
7696
|
+
*/
|
|
7697
|
+
stream(prompt, options) {
|
|
7698
|
+
return stream(this.client, prompt, options);
|
|
7699
|
+
}
|
|
7700
|
+
};
|
|
7701
|
+
}
|
|
7702
|
+
});
|
|
7703
|
+
|
|
7704
|
+
// src/core/namespaces/vision.ts
|
|
7705
|
+
var VisionNamespace;
|
|
7706
|
+
var init_vision = __esm({
|
|
7707
|
+
"src/core/namespaces/vision.ts"() {
|
|
7708
|
+
"use strict";
|
|
7709
|
+
init_input_content();
|
|
7710
|
+
init_messages();
|
|
7711
|
+
VisionNamespace = class {
|
|
7712
|
+
constructor(client) {
|
|
7713
|
+
this.client = client;
|
|
7714
|
+
}
|
|
7715
|
+
/**
|
|
7716
|
+
* Build a message builder with the image content attached.
|
|
7717
|
+
* Handles URLs, data URLs, base64 strings, and binary buffers.
|
|
7718
|
+
*/
|
|
7719
|
+
buildImageMessage(options) {
|
|
7720
|
+
const builder = new LLMMessageBuilder();
|
|
7721
|
+
if (options.systemPrompt) {
|
|
7722
|
+
builder.addSystem(options.systemPrompt);
|
|
7723
|
+
}
|
|
7724
|
+
if (typeof options.image === "string") {
|
|
7725
|
+
if (options.image.startsWith("http://") || options.image.startsWith("https://")) {
|
|
7726
|
+
builder.addUserWithImageUrl(options.prompt, options.image);
|
|
7727
|
+
} else if (isDataUrl(options.image)) {
|
|
7728
|
+
const parsed = parseDataUrl(options.image);
|
|
7729
|
+
if (!parsed) {
|
|
7730
|
+
throw new Error("Invalid data URL format");
|
|
7731
|
+
}
|
|
7732
|
+
builder.addUserWithImage(
|
|
7733
|
+
options.prompt,
|
|
7734
|
+
parsed.data,
|
|
7735
|
+
parsed.mimeType
|
|
7736
|
+
);
|
|
7737
|
+
} else {
|
|
7738
|
+
const buffer = Buffer.from(options.image, "base64");
|
|
7739
|
+
builder.addUserWithImage(options.prompt, buffer, options.mimeType);
|
|
7740
|
+
}
|
|
7741
|
+
} else {
|
|
7742
|
+
builder.addUserWithImage(options.prompt, options.image, options.mimeType);
|
|
7743
|
+
}
|
|
7744
|
+
return builder;
|
|
7745
|
+
}
|
|
7746
|
+
/**
|
|
7747
|
+
* Stream the response and collect text and usage information.
|
|
7748
|
+
*/
|
|
7749
|
+
async streamAndCollect(options, builder) {
|
|
7750
|
+
let response = "";
|
|
7751
|
+
let finalUsage;
|
|
7752
|
+
for await (const chunk of this.client.stream({
|
|
7753
|
+
model: options.model,
|
|
7754
|
+
messages: builder.build(),
|
|
7755
|
+
maxTokens: options.maxTokens,
|
|
7756
|
+
temperature: options.temperature
|
|
7757
|
+
})) {
|
|
7758
|
+
response += chunk.text;
|
|
7759
|
+
if (chunk.usage) {
|
|
7760
|
+
finalUsage = {
|
|
7761
|
+
inputTokens: chunk.usage.inputTokens,
|
|
7762
|
+
outputTokens: chunk.usage.outputTokens,
|
|
7763
|
+
totalTokens: chunk.usage.totalTokens
|
|
7764
|
+
};
|
|
7765
|
+
}
|
|
7766
|
+
}
|
|
7767
|
+
return { text: response.trim(), usage: finalUsage };
|
|
7768
|
+
}
|
|
7769
|
+
/**
|
|
7770
|
+
* Analyze an image with a vision-capable model.
|
|
7771
|
+
* Returns the analysis as a string.
|
|
7772
|
+
*
|
|
7773
|
+
* @param options - Vision analysis options
|
|
7774
|
+
* @returns Promise resolving to the analysis text
|
|
7775
|
+
* @throws Error if the image format is unsupported or model doesn't support vision
|
|
7776
|
+
*
|
|
7777
|
+
* @example
|
|
7778
|
+
* ```typescript
|
|
7779
|
+
* // From file
|
|
7780
|
+
* const result = await llmist.vision.analyze({
|
|
7781
|
+
* model: "gpt-4o",
|
|
7782
|
+
* image: await fs.readFile("photo.jpg"),
|
|
7783
|
+
* prompt: "What's in this image?",
|
|
7784
|
+
* });
|
|
7785
|
+
*
|
|
7786
|
+
* // From URL (OpenAI only)
|
|
7787
|
+
* const result = await llmist.vision.analyze({
|
|
7788
|
+
* model: "gpt-4o",
|
|
7789
|
+
* image: "https://example.com/image.jpg",
|
|
7790
|
+
* prompt: "Describe this image",
|
|
7791
|
+
* });
|
|
7792
|
+
* ```
|
|
7793
|
+
*/
|
|
7794
|
+
async analyze(options) {
|
|
7795
|
+
const builder = this.buildImageMessage(options);
|
|
7796
|
+
const { text: text3 } = await this.streamAndCollect(options, builder);
|
|
7797
|
+
return text3;
|
|
5893
7798
|
}
|
|
5894
7799
|
/**
|
|
5895
|
-
*
|
|
5896
|
-
*
|
|
5897
|
-
* @param
|
|
5898
|
-
* @returns
|
|
7800
|
+
* Analyze an image and return detailed result with usage info.
|
|
7801
|
+
*
|
|
7802
|
+
* @param options - Vision analysis options
|
|
7803
|
+
* @returns Promise resolving to the analysis result with usage info
|
|
5899
7804
|
*/
|
|
5900
|
-
|
|
5901
|
-
const
|
|
5902
|
-
|
|
5903
|
-
return
|
|
7805
|
+
async analyzeWithUsage(options) {
|
|
7806
|
+
const builder = this.buildImageMessage(options);
|
|
7807
|
+
const { text: text3, usage } = await this.streamAndCollect(options, builder);
|
|
7808
|
+
return {
|
|
7809
|
+
text: text3,
|
|
7810
|
+
model: options.model,
|
|
7811
|
+
usage
|
|
7812
|
+
};
|
|
5904
7813
|
}
|
|
5905
7814
|
/**
|
|
5906
|
-
*
|
|
5907
|
-
*
|
|
5908
|
-
* @param
|
|
5909
|
-
* @returns
|
|
7815
|
+
* Check if a model supports vision/image input.
|
|
7816
|
+
*
|
|
7817
|
+
* @param modelId - Model ID to check
|
|
7818
|
+
* @returns True if the model supports vision
|
|
5910
7819
|
*/
|
|
5911
|
-
|
|
5912
|
-
const
|
|
5913
|
-
return
|
|
7820
|
+
supportsModel(modelId) {
|
|
7821
|
+
const spec = this.client.modelRegistry.getModelSpec(modelId);
|
|
7822
|
+
return spec?.features?.vision === true;
|
|
5914
7823
|
}
|
|
5915
7824
|
/**
|
|
5916
|
-
*
|
|
5917
|
-
*
|
|
5918
|
-
* @
|
|
5919
|
-
* @param providerId - Optional provider ID to filter by
|
|
5920
|
-
* @returns ModelSpec with lowest total cost, or undefined if no models found
|
|
7825
|
+
* List all models that support vision.
|
|
7826
|
+
*
|
|
7827
|
+
* @returns Array of model IDs that support vision
|
|
5921
7828
|
*/
|
|
5922
|
-
|
|
5923
|
-
|
|
5924
|
-
if (models.length === 0) return void 0;
|
|
5925
|
-
let cheapest;
|
|
5926
|
-
for (const model of models) {
|
|
5927
|
-
const estimate = this.estimateCost(model.modelId, inputTokens, outputTokens);
|
|
5928
|
-
if (!estimate) continue;
|
|
5929
|
-
if (!cheapest || estimate.totalCost < cheapest.cost) {
|
|
5930
|
-
cheapest = { model, cost: estimate.totalCost };
|
|
5931
|
-
}
|
|
5932
|
-
}
|
|
5933
|
-
return cheapest?.model;
|
|
7829
|
+
listModels() {
|
|
7830
|
+
return this.client.modelRegistry.listModels().filter((spec) => spec.features?.vision === true).map((spec) => spec.modelId);
|
|
5934
7831
|
}
|
|
5935
7832
|
};
|
|
5936
7833
|
}
|
|
@@ -5965,49 +7862,6 @@ var init_options = __esm({
|
|
|
5965
7862
|
}
|
|
5966
7863
|
});
|
|
5967
7864
|
|
|
5968
|
-
// src/core/quick-methods.ts
|
|
5969
|
-
async function complete(client, prompt, options = {}) {
|
|
5970
|
-
const model = resolveModel(options.model ?? "gpt-5-nano");
|
|
5971
|
-
const builder = new LLMMessageBuilder();
|
|
5972
|
-
if (options.systemPrompt) {
|
|
5973
|
-
builder.addSystem(options.systemPrompt);
|
|
5974
|
-
}
|
|
5975
|
-
builder.addUser(prompt);
|
|
5976
|
-
let fullResponse = "";
|
|
5977
|
-
for await (const chunk of client.stream({
|
|
5978
|
-
model,
|
|
5979
|
-
messages: builder.build(),
|
|
5980
|
-
temperature: options.temperature,
|
|
5981
|
-
maxTokens: options.maxTokens
|
|
5982
|
-
})) {
|
|
5983
|
-
fullResponse += chunk.text;
|
|
5984
|
-
}
|
|
5985
|
-
return fullResponse.trim();
|
|
5986
|
-
}
|
|
5987
|
-
async function* stream(client, prompt, options = {}) {
|
|
5988
|
-
const model = resolveModel(options.model ?? "gpt-5-nano");
|
|
5989
|
-
const builder = new LLMMessageBuilder();
|
|
5990
|
-
if (options.systemPrompt) {
|
|
5991
|
-
builder.addSystem(options.systemPrompt);
|
|
5992
|
-
}
|
|
5993
|
-
builder.addUser(prompt);
|
|
5994
|
-
for await (const chunk of client.stream({
|
|
5995
|
-
model,
|
|
5996
|
-
messages: builder.build(),
|
|
5997
|
-
temperature: options.temperature,
|
|
5998
|
-
maxTokens: options.maxTokens
|
|
5999
|
-
})) {
|
|
6000
|
-
yield chunk.text;
|
|
6001
|
-
}
|
|
6002
|
-
}
|
|
6003
|
-
var init_quick_methods = __esm({
|
|
6004
|
-
"src/core/quick-methods.ts"() {
|
|
6005
|
-
"use strict";
|
|
6006
|
-
init_messages();
|
|
6007
|
-
init_model_shortcuts();
|
|
6008
|
-
}
|
|
6009
|
-
});
|
|
6010
|
-
|
|
6011
7865
|
// src/core/client.ts
|
|
6012
7866
|
var client_exports = {};
|
|
6013
7867
|
__export(client_exports, {
|
|
@@ -6020,12 +7874,22 @@ var init_client = __esm({
|
|
|
6020
7874
|
init_builder();
|
|
6021
7875
|
init_discovery();
|
|
6022
7876
|
init_model_registry();
|
|
7877
|
+
init_image();
|
|
7878
|
+
init_speech();
|
|
7879
|
+
init_text();
|
|
7880
|
+
init_vision();
|
|
6023
7881
|
init_options();
|
|
6024
7882
|
init_quick_methods();
|
|
6025
7883
|
LLMist = class _LLMist {
|
|
6026
7884
|
parser;
|
|
7885
|
+
defaultProvider;
|
|
6027
7886
|
modelRegistry;
|
|
6028
7887
|
adapters;
|
|
7888
|
+
// Namespaces for different generation types
|
|
7889
|
+
text;
|
|
7890
|
+
image;
|
|
7891
|
+
speech;
|
|
7892
|
+
vision;
|
|
6029
7893
|
constructor(...args) {
|
|
6030
7894
|
let adapters = [];
|
|
6031
7895
|
let defaultProvider;
|
|
@@ -6064,6 +7928,7 @@ var init_client = __esm({
|
|
|
6064
7928
|
const priorityB = b.priority ?? 0;
|
|
6065
7929
|
return priorityB - priorityA;
|
|
6066
7930
|
});
|
|
7931
|
+
this.defaultProvider = resolvedDefaultProvider;
|
|
6067
7932
|
this.parser = new ModelIdentifierParser(resolvedDefaultProvider);
|
|
6068
7933
|
this.modelRegistry = new ModelRegistry();
|
|
6069
7934
|
for (const adapter of this.adapters) {
|
|
@@ -6072,6 +7937,10 @@ var init_client = __esm({
|
|
|
6072
7937
|
if (customModels.length > 0) {
|
|
6073
7938
|
this.modelRegistry.registerModels(customModels);
|
|
6074
7939
|
}
|
|
7940
|
+
this.text = new TextNamespace(this);
|
|
7941
|
+
this.image = new ImageNamespace(this.adapters, this.defaultProvider);
|
|
7942
|
+
this.speech = new SpeechNamespace(this.adapters, this.defaultProvider);
|
|
7943
|
+
this.vision = new VisionNamespace(this);
|
|
6075
7944
|
}
|
|
6076
7945
|
stream(options) {
|
|
6077
7946
|
const descriptor = this.parser.parse(options.model);
|
|
@@ -6256,6 +8125,7 @@ var init_builder = __esm({
|
|
|
6256
8125
|
"src/agent/builder.ts"() {
|
|
6257
8126
|
"use strict";
|
|
6258
8127
|
init_constants();
|
|
8128
|
+
init_input_content();
|
|
6259
8129
|
init_model_shortcuts();
|
|
6260
8130
|
init_registry();
|
|
6261
8131
|
init_agent();
|
|
@@ -6903,13 +8773,17 @@ ${endPrefix}`
|
|
|
6903
8773
|
* }
|
|
6904
8774
|
* ```
|
|
6905
8775
|
*/
|
|
6906
|
-
|
|
8776
|
+
/**
|
|
8777
|
+
* Build AgentOptions with the given user prompt.
|
|
8778
|
+
* Centralizes options construction for ask(), askWithImage(), and askWithContent().
|
|
8779
|
+
*/
|
|
8780
|
+
buildAgentOptions(userPrompt) {
|
|
6907
8781
|
if (!this.client) {
|
|
6908
8782
|
const { LLMist: LLMistClass } = (init_client(), __toCommonJS(client_exports));
|
|
6909
8783
|
this.client = new LLMistClass();
|
|
6910
8784
|
}
|
|
6911
8785
|
const registry = GadgetRegistry.from(this.gadgets);
|
|
6912
|
-
|
|
8786
|
+
return {
|
|
6913
8787
|
client: this.client,
|
|
6914
8788
|
model: this.model ?? "openai:gpt-5-nano",
|
|
6915
8789
|
systemPrompt: this.systemPrompt,
|
|
@@ -6935,6 +8809,83 @@ ${endPrefix}`
|
|
|
6935
8809
|
compactionConfig: this.compactionConfig,
|
|
6936
8810
|
signal: this.signal
|
|
6937
8811
|
};
|
|
8812
|
+
}
|
|
8813
|
+
ask(userPrompt) {
|
|
8814
|
+
const options = this.buildAgentOptions(userPrompt);
|
|
8815
|
+
return new Agent(AGENT_INTERNAL_KEY, options);
|
|
8816
|
+
}
|
|
8817
|
+
/**
|
|
8818
|
+
* Build and create the agent with a multimodal user prompt (text + image).
|
|
8819
|
+
* Returns the Agent instance ready to run.
|
|
8820
|
+
*
|
|
8821
|
+
* @param textPrompt - Text prompt describing what to do with the image
|
|
8822
|
+
* @param imageData - Image data (Buffer, Uint8Array, or base64 string)
|
|
8823
|
+
* @param mimeType - Optional MIME type (auto-detected if not provided)
|
|
8824
|
+
* @returns Configured Agent instance
|
|
8825
|
+
*
|
|
8826
|
+
* @example
|
|
8827
|
+
* ```typescript
|
|
8828
|
+
* const agent = LLMist.createAgent()
|
|
8829
|
+
* .withModel("gpt-4o")
|
|
8830
|
+
* .withSystem("You analyze images")
|
|
8831
|
+
* .askWithImage(
|
|
8832
|
+
* "What's in this image?",
|
|
8833
|
+
* await fs.readFile("photo.jpg")
|
|
8834
|
+
* );
|
|
8835
|
+
*
|
|
8836
|
+
* for await (const event of agent.run()) {
|
|
8837
|
+
* // handle events
|
|
8838
|
+
* }
|
|
8839
|
+
* ```
|
|
8840
|
+
*/
|
|
8841
|
+
askWithImage(textPrompt, imageData, mimeType) {
|
|
8842
|
+
const imageBuffer = typeof imageData === "string" ? Buffer.from(imageData, "base64") : imageData;
|
|
8843
|
+
const detectedMime = mimeType ?? detectImageMimeType(imageBuffer);
|
|
8844
|
+
if (!detectedMime) {
|
|
8845
|
+
throw new Error(
|
|
8846
|
+
"Could not detect image MIME type. Please provide the mimeType parameter explicitly."
|
|
8847
|
+
);
|
|
8848
|
+
}
|
|
8849
|
+
const userContent = [
|
|
8850
|
+
text(textPrompt),
|
|
8851
|
+
{
|
|
8852
|
+
type: "image",
|
|
8853
|
+
source: {
|
|
8854
|
+
type: "base64",
|
|
8855
|
+
mediaType: detectedMime,
|
|
8856
|
+
data: toBase64(imageBuffer)
|
|
8857
|
+
}
|
|
8858
|
+
}
|
|
8859
|
+
];
|
|
8860
|
+
const options = this.buildAgentOptions(userContent);
|
|
8861
|
+
return new Agent(AGENT_INTERNAL_KEY, options);
|
|
8862
|
+
}
|
|
8863
|
+
/**
|
|
8864
|
+
* Build and return an Agent configured with multimodal content.
|
|
8865
|
+
* More flexible than askWithImage - accepts any combination of content parts.
|
|
8866
|
+
*
|
|
8867
|
+
* @param content - Array of content parts (text, images, audio)
|
|
8868
|
+
* @returns A configured Agent ready for execution
|
|
8869
|
+
*
|
|
8870
|
+
* @example
|
|
8871
|
+
* ```typescript
|
|
8872
|
+
* import { text, imageFromBuffer, audioFromBuffer } from "llmist";
|
|
8873
|
+
*
|
|
8874
|
+
* const agent = LLMist.createAgent()
|
|
8875
|
+
* .withModel("gemini:gemini-2.5-flash")
|
|
8876
|
+
* .askWithContent([
|
|
8877
|
+
* text("Describe this image and transcribe the audio:"),
|
|
8878
|
+
* imageFromBuffer(imageData),
|
|
8879
|
+
* audioFromBuffer(audioData),
|
|
8880
|
+
* ]);
|
|
8881
|
+
*
|
|
8882
|
+
* for await (const event of agent.run()) {
|
|
8883
|
+
* // handle events
|
|
8884
|
+
* }
|
|
8885
|
+
* ```
|
|
8886
|
+
*/
|
|
8887
|
+
askWithContent(content) {
|
|
8888
|
+
const options = this.buildAgentOptions(content);
|
|
6938
8889
|
return new Agent(AGENT_INTERNAL_KEY, options);
|
|
6939
8890
|
}
|
|
6940
8891
|
/**
|
|
@@ -7080,6 +9031,8 @@ __export(index_exports, {
|
|
|
7080
9031
|
StreamParser: () => StreamParser,
|
|
7081
9032
|
StreamProcessor: () => StreamProcessor,
|
|
7082
9033
|
SummarizationStrategy: () => SummarizationStrategy,
|
|
9034
|
+
audioFromBase64: () => audioFromBase64,
|
|
9035
|
+
audioFromBuffer: () => audioFromBuffer,
|
|
7083
9036
|
collectEvents: () => collectEvents,
|
|
7084
9037
|
collectText: () => collectText,
|
|
7085
9038
|
complete: () => complete,
|
|
@@ -7095,20 +9048,34 @@ __export(index_exports, {
|
|
|
7095
9048
|
createOpenAIProviderFromEnv: () => createOpenAIProviderFromEnv,
|
|
7096
9049
|
createTextMockStream: () => createTextMockStream,
|
|
7097
9050
|
defaultLogger: () => defaultLogger,
|
|
9051
|
+
detectAudioMimeType: () => detectAudioMimeType,
|
|
9052
|
+
detectImageMimeType: () => detectImageMimeType,
|
|
7098
9053
|
discoverProviderAdapters: () => discoverProviderAdapters,
|
|
9054
|
+
extractText: () => extractText,
|
|
7099
9055
|
getMockManager: () => getMockManager,
|
|
7100
9056
|
getModelId: () => getModelId,
|
|
7101
9057
|
getProvider: () => getProvider,
|
|
7102
9058
|
hasProviderPrefix: () => hasProviderPrefix,
|
|
9059
|
+
imageFromBase64: () => imageFromBase64,
|
|
9060
|
+
imageFromBuffer: () => imageFromBuffer,
|
|
9061
|
+
imageFromUrl: () => imageFromUrl,
|
|
9062
|
+
isAudioPart: () => isAudioPart,
|
|
9063
|
+
isDataUrl: () => isDataUrl,
|
|
9064
|
+
isImagePart: () => isImagePart,
|
|
9065
|
+
isTextPart: () => isTextPart,
|
|
7103
9066
|
iterationProgressHint: () => iterationProgressHint,
|
|
7104
9067
|
mockLLM: () => mockLLM,
|
|
9068
|
+
normalizeContent: () => normalizeContent,
|
|
7105
9069
|
parallelGadgetHint: () => parallelGadgetHint,
|
|
9070
|
+
parseDataUrl: () => parseDataUrl,
|
|
7106
9071
|
resolveHintTemplate: () => resolveHintTemplate,
|
|
7107
9072
|
resolveModel: () => resolveModel,
|
|
7108
9073
|
resolvePromptTemplate: () => resolvePromptTemplate,
|
|
7109
9074
|
resolveRulesTemplate: () => resolveRulesTemplate,
|
|
7110
9075
|
runWithHandlers: () => runWithHandlers,
|
|
7111
9076
|
stream: () => stream,
|
|
9077
|
+
text: () => text,
|
|
9078
|
+
toBase64: () => toBase64,
|
|
7112
9079
|
validateAndApplyDefaults: () => validateAndApplyDefaults,
|
|
7113
9080
|
validateGadgetParams: () => validateGadgetParams,
|
|
7114
9081
|
z: () => import_zod2.z
|
|
@@ -8009,6 +9976,7 @@ function createHints(config) {
|
|
|
8009
9976
|
|
|
8010
9977
|
// src/index.ts
|
|
8011
9978
|
init_client();
|
|
9979
|
+
init_input_content();
|
|
8012
9980
|
init_messages();
|
|
8013
9981
|
init_model_registry();
|
|
8014
9982
|
init_model_shortcuts();
|
|
@@ -8256,9 +10224,9 @@ function sleep(ms) {
|
|
|
8256
10224
|
function generateInvocationId() {
|
|
8257
10225
|
return `inv-${Date.now()}-${Math.random().toString(36).substring(2, 9)}`;
|
|
8258
10226
|
}
|
|
8259
|
-
function splitIntoChunks(
|
|
10227
|
+
function splitIntoChunks(text3, minChunkSize = 5, maxChunkSize = 30) {
|
|
8260
10228
|
const chunks = [];
|
|
8261
|
-
let remaining =
|
|
10229
|
+
let remaining = text3;
|
|
8262
10230
|
while (remaining.length > 0) {
|
|
8263
10231
|
const chunkSize = Math.min(
|
|
8264
10232
|
Math.floor(Math.random() * (maxChunkSize - minChunkSize + 1)) + minChunkSize,
|
|
@@ -8317,17 +10285,17 @@ ${String(value)}
|
|
|
8317
10285
|
return result;
|
|
8318
10286
|
}
|
|
8319
10287
|
function formatGadgetCalls(gadgetCalls) {
|
|
8320
|
-
let
|
|
10288
|
+
let text3 = "";
|
|
8321
10289
|
const calls = [];
|
|
8322
10290
|
for (const call of gadgetCalls) {
|
|
8323
10291
|
const invocationId = call.invocationId ?? generateInvocationId();
|
|
8324
10292
|
calls.push({ name: call.gadgetName, invocationId });
|
|
8325
10293
|
const blockParams = serializeToBlockFormat(call.parameters);
|
|
8326
|
-
|
|
10294
|
+
text3 += `
|
|
8327
10295
|
${GADGET_START_PREFIX}${call.gadgetName}
|
|
8328
10296
|
${blockParams}${GADGET_END_PREFIX}`;
|
|
8329
10297
|
}
|
|
8330
|
-
return { text, calls };
|
|
10298
|
+
return { text: text3, calls };
|
|
8331
10299
|
}
|
|
8332
10300
|
async function* createMockStream(response) {
|
|
8333
10301
|
if (response.delayMs) {
|
|
@@ -8367,9 +10335,9 @@ async function* createMockStream(response) {
|
|
|
8367
10335
|
};
|
|
8368
10336
|
}
|
|
8369
10337
|
}
|
|
8370
|
-
function createTextMockStream(
|
|
10338
|
+
function createTextMockStream(text3, options) {
|
|
8371
10339
|
return createMockStream({
|
|
8372
|
-
text,
|
|
10340
|
+
text: text3,
|
|
8373
10341
|
delayMs: options?.delayMs,
|
|
8374
10342
|
streamDelayMs: options?.streamDelayMs,
|
|
8375
10343
|
usage: options?.usage,
|
|
@@ -8386,10 +10354,10 @@ var MockProviderAdapter = class {
|
|
|
8386
10354
|
constructor(options) {
|
|
8387
10355
|
this.mockManager = getMockManager(options);
|
|
8388
10356
|
}
|
|
8389
|
-
supports(
|
|
10357
|
+
supports(_descriptor) {
|
|
8390
10358
|
return true;
|
|
8391
10359
|
}
|
|
8392
|
-
stream(options, descriptor,
|
|
10360
|
+
stream(options, descriptor, _spec) {
|
|
8393
10361
|
const context = {
|
|
8394
10362
|
model: options.model,
|
|
8395
10363
|
provider: descriptor.provider,
|
|
@@ -8400,20 +10368,154 @@ var MockProviderAdapter = class {
|
|
|
8400
10368
|
return this.createMockStreamFromContext(context);
|
|
8401
10369
|
}
|
|
8402
10370
|
async *createMockStreamFromContext(context) {
|
|
8403
|
-
|
|
8404
|
-
|
|
8405
|
-
|
|
8406
|
-
|
|
8407
|
-
|
|
8408
|
-
|
|
8409
|
-
|
|
8410
|
-
|
|
8411
|
-
|
|
8412
|
-
|
|
8413
|
-
|
|
8414
|
-
|
|
8415
|
-
|
|
10371
|
+
const mockResponse = await this.mockManager.findMatch(context);
|
|
10372
|
+
if (!mockResponse) {
|
|
10373
|
+
yield {
|
|
10374
|
+
text: "",
|
|
10375
|
+
finishReason: "stop",
|
|
10376
|
+
usage: { inputTokens: 0, outputTokens: 0, totalTokens: 0 }
|
|
10377
|
+
};
|
|
10378
|
+
return;
|
|
10379
|
+
}
|
|
10380
|
+
yield* createMockStream(mockResponse);
|
|
10381
|
+
}
|
|
10382
|
+
// ==========================================================================
|
|
10383
|
+
// Image Generation Support
|
|
10384
|
+
// ==========================================================================
|
|
10385
|
+
/**
|
|
10386
|
+
* Check if this adapter supports image generation for a given model.
|
|
10387
|
+
* Returns true if there's a registered mock with images for this model.
|
|
10388
|
+
*/
|
|
10389
|
+
supportsImageGeneration(_modelId) {
|
|
10390
|
+
return true;
|
|
10391
|
+
}
|
|
10392
|
+
/**
|
|
10393
|
+
* Generate mock images based on registered mocks.
|
|
10394
|
+
*
|
|
10395
|
+
* @param options - Image generation options
|
|
10396
|
+
* @returns Mock image generation result
|
|
10397
|
+
*/
|
|
10398
|
+
async generateImage(options) {
|
|
10399
|
+
const context = {
|
|
10400
|
+
model: options.model,
|
|
10401
|
+
provider: "mock",
|
|
10402
|
+
modelName: options.model,
|
|
10403
|
+
options: {
|
|
10404
|
+
model: options.model,
|
|
10405
|
+
messages: [{ role: "user", content: options.prompt }]
|
|
10406
|
+
},
|
|
10407
|
+
messages: [{ role: "user", content: options.prompt }]
|
|
10408
|
+
};
|
|
10409
|
+
const mockResponse = await this.mockManager.findMatch(context);
|
|
10410
|
+
if (!mockResponse?.images || mockResponse.images.length === 0) {
|
|
10411
|
+
throw new Error(
|
|
10412
|
+
`No mock registered for image generation with model "${options.model}". Use mockLLM().forModel("${options.model}").returnsImage(...).register() to add one.`
|
|
10413
|
+
);
|
|
10414
|
+
}
|
|
10415
|
+
return this.createImageResult(options, mockResponse);
|
|
10416
|
+
}
|
|
10417
|
+
/**
|
|
10418
|
+
* Transform mock response into ImageGenerationResult format.
|
|
10419
|
+
*
|
|
10420
|
+
* @param options - Original image generation options
|
|
10421
|
+
* @param mockResponse - Mock response containing image data
|
|
10422
|
+
* @returns ImageGenerationResult with mock data and zero cost
|
|
10423
|
+
*/
|
|
10424
|
+
createImageResult(options, mockResponse) {
|
|
10425
|
+
const images = mockResponse.images ?? [];
|
|
10426
|
+
return {
|
|
10427
|
+
images: images.map((img) => ({
|
|
10428
|
+
b64Json: img.data,
|
|
10429
|
+
revisedPrompt: img.revisedPrompt
|
|
10430
|
+
})),
|
|
10431
|
+
model: options.model,
|
|
10432
|
+
usage: {
|
|
10433
|
+
imagesGenerated: images.length,
|
|
10434
|
+
size: options.size ?? "1024x1024",
|
|
10435
|
+
quality: options.quality ?? "standard"
|
|
10436
|
+
},
|
|
10437
|
+
cost: 0
|
|
10438
|
+
// Mock cost is always 0
|
|
10439
|
+
};
|
|
10440
|
+
}
|
|
10441
|
+
// ==========================================================================
|
|
10442
|
+
// Speech Generation Support
|
|
10443
|
+
// ==========================================================================
|
|
10444
|
+
/**
|
|
10445
|
+
* Check if this adapter supports speech generation for a given model.
|
|
10446
|
+
* Returns true if there's a registered mock with audio for this model.
|
|
10447
|
+
*/
|
|
10448
|
+
supportsSpeechGeneration(_modelId) {
|
|
10449
|
+
return true;
|
|
10450
|
+
}
|
|
10451
|
+
/**
|
|
10452
|
+
* Generate mock speech based on registered mocks.
|
|
10453
|
+
*
|
|
10454
|
+
* @param options - Speech generation options
|
|
10455
|
+
* @returns Mock speech generation result
|
|
10456
|
+
*/
|
|
10457
|
+
async generateSpeech(options) {
|
|
10458
|
+
const context = {
|
|
10459
|
+
model: options.model,
|
|
10460
|
+
provider: "mock",
|
|
10461
|
+
modelName: options.model,
|
|
10462
|
+
options: {
|
|
10463
|
+
model: options.model,
|
|
10464
|
+
messages: [{ role: "user", content: options.input }]
|
|
10465
|
+
},
|
|
10466
|
+
messages: [{ role: "user", content: options.input }]
|
|
10467
|
+
};
|
|
10468
|
+
const mockResponse = await this.mockManager.findMatch(context);
|
|
10469
|
+
if (!mockResponse?.audio) {
|
|
10470
|
+
throw new Error(
|
|
10471
|
+
`No mock registered for speech generation with model "${options.model}". Use mockLLM().forModel("${options.model}").returnsAudio(...).register() to add one.`
|
|
10472
|
+
);
|
|
10473
|
+
}
|
|
10474
|
+
return this.createSpeechResult(options, mockResponse);
|
|
10475
|
+
}
|
|
10476
|
+
/**
|
|
10477
|
+
* Transform mock response into SpeechGenerationResult format.
|
|
10478
|
+
* Converts base64 audio data to ArrayBuffer.
|
|
10479
|
+
*
|
|
10480
|
+
* @param options - Original speech generation options
|
|
10481
|
+
* @param mockResponse - Mock response containing audio data
|
|
10482
|
+
* @returns SpeechGenerationResult with mock data and zero cost
|
|
10483
|
+
*/
|
|
10484
|
+
createSpeechResult(options, mockResponse) {
|
|
10485
|
+
const audio = mockResponse.audio;
|
|
10486
|
+
const binaryString = atob(audio.data);
|
|
10487
|
+
const bytes = new Uint8Array(binaryString.length);
|
|
10488
|
+
for (let i = 0; i < binaryString.length; i++) {
|
|
10489
|
+
bytes[i] = binaryString.charCodeAt(i);
|
|
8416
10490
|
}
|
|
10491
|
+
const format = this.mimeTypeToAudioFormat(audio.mimeType);
|
|
10492
|
+
return {
|
|
10493
|
+
audio: bytes.buffer,
|
|
10494
|
+
model: options.model,
|
|
10495
|
+
usage: {
|
|
10496
|
+
characterCount: options.input.length
|
|
10497
|
+
},
|
|
10498
|
+
cost: 0,
|
|
10499
|
+
// Mock cost is always 0
|
|
10500
|
+
format
|
|
10501
|
+
};
|
|
10502
|
+
}
|
|
10503
|
+
/**
|
|
10504
|
+
* Map MIME type to audio format for SpeechGenerationResult.
|
|
10505
|
+
* Defaults to "mp3" for unknown MIME types.
|
|
10506
|
+
*
|
|
10507
|
+
* @param mimeType - Audio MIME type string
|
|
10508
|
+
* @returns Audio format identifier
|
|
10509
|
+
*/
|
|
10510
|
+
mimeTypeToAudioFormat(mimeType) {
|
|
10511
|
+
const mapping = {
|
|
10512
|
+
"audio/mp3": "mp3",
|
|
10513
|
+
"audio/mpeg": "mp3",
|
|
10514
|
+
"audio/wav": "wav",
|
|
10515
|
+
"audio/webm": "opus",
|
|
10516
|
+
"audio/ogg": "opus"
|
|
10517
|
+
};
|
|
10518
|
+
return mapping[mimeType] ?? "mp3";
|
|
8417
10519
|
}
|
|
8418
10520
|
};
|
|
8419
10521
|
function createMockAdapter(options) {
|
|
@@ -8421,6 +10523,20 @@ function createMockAdapter(options) {
|
|
|
8421
10523
|
}
|
|
8422
10524
|
|
|
8423
10525
|
// src/testing/mock-builder.ts
|
|
10526
|
+
init_input_content();
|
|
10527
|
+
init_messages();
|
|
10528
|
+
function hasImageContent(content) {
|
|
10529
|
+
if (typeof content === "string") return false;
|
|
10530
|
+
return content.some((part) => isImagePart(part));
|
|
10531
|
+
}
|
|
10532
|
+
function hasAudioContent(content) {
|
|
10533
|
+
if (typeof content === "string") return false;
|
|
10534
|
+
return content.some((part) => isAudioPart(part));
|
|
10535
|
+
}
|
|
10536
|
+
function countImages(content) {
|
|
10537
|
+
if (typeof content === "string") return 0;
|
|
10538
|
+
return content.filter((part) => isImagePart(part)).length;
|
|
10539
|
+
}
|
|
8424
10540
|
var MockBuilder = class {
|
|
8425
10541
|
matchers = [];
|
|
8426
10542
|
response = {};
|
|
@@ -8483,9 +10599,9 @@ var MockBuilder = class {
|
|
|
8483
10599
|
* @example
|
|
8484
10600
|
* mockLLM().whenMessageContains('hello')
|
|
8485
10601
|
*/
|
|
8486
|
-
whenMessageContains(
|
|
10602
|
+
whenMessageContains(text3) {
|
|
8487
10603
|
this.matchers.push(
|
|
8488
|
-
(ctx) => ctx.messages.some((msg) => msg.content
|
|
10604
|
+
(ctx) => ctx.messages.some((msg) => extractText(msg.content).toLowerCase().includes(text3.toLowerCase()))
|
|
8489
10605
|
);
|
|
8490
10606
|
return this;
|
|
8491
10607
|
}
|
|
@@ -8495,10 +10611,11 @@ var MockBuilder = class {
|
|
|
8495
10611
|
* @example
|
|
8496
10612
|
* mockLLM().whenLastMessageContains('goodbye')
|
|
8497
10613
|
*/
|
|
8498
|
-
whenLastMessageContains(
|
|
10614
|
+
whenLastMessageContains(text3) {
|
|
8499
10615
|
this.matchers.push((ctx) => {
|
|
8500
10616
|
const lastMsg = ctx.messages[ctx.messages.length - 1];
|
|
8501
|
-
|
|
10617
|
+
if (!lastMsg) return false;
|
|
10618
|
+
return extractText(lastMsg.content).toLowerCase().includes(text3.toLowerCase());
|
|
8502
10619
|
});
|
|
8503
10620
|
return this;
|
|
8504
10621
|
}
|
|
@@ -8509,7 +10626,7 @@ var MockBuilder = class {
|
|
|
8509
10626
|
* mockLLM().whenMessageMatches(/calculate \d+/)
|
|
8510
10627
|
*/
|
|
8511
10628
|
whenMessageMatches(regex) {
|
|
8512
|
-
this.matchers.push((ctx) => ctx.messages.some((msg) => regex.test(msg.content
|
|
10629
|
+
this.matchers.push((ctx) => ctx.messages.some((msg) => regex.test(extractText(msg.content))));
|
|
8513
10630
|
return this;
|
|
8514
10631
|
}
|
|
8515
10632
|
/**
|
|
@@ -8518,10 +10635,10 @@ var MockBuilder = class {
|
|
|
8518
10635
|
* @example
|
|
8519
10636
|
* mockLLM().whenRoleContains('system', 'You are a helpful assistant')
|
|
8520
10637
|
*/
|
|
8521
|
-
whenRoleContains(role,
|
|
10638
|
+
whenRoleContains(role, text3) {
|
|
8522
10639
|
this.matchers.push(
|
|
8523
10640
|
(ctx) => ctx.messages.some(
|
|
8524
|
-
(msg) => msg.role === role && msg.content
|
|
10641
|
+
(msg) => msg.role === role && extractText(msg.content).toLowerCase().includes(text3.toLowerCase())
|
|
8525
10642
|
)
|
|
8526
10643
|
);
|
|
8527
10644
|
return this;
|
|
@@ -8549,6 +10666,43 @@ var MockBuilder = class {
|
|
|
8549
10666
|
this.matchers.push(matcher);
|
|
8550
10667
|
return this;
|
|
8551
10668
|
}
|
|
10669
|
+
// ==========================================================================
|
|
10670
|
+
// Multimodal Matchers
|
|
10671
|
+
// ==========================================================================
|
|
10672
|
+
/**
|
|
10673
|
+
* Match when any message contains an image.
|
|
10674
|
+
*
|
|
10675
|
+
* @example
|
|
10676
|
+
* mockLLM().whenMessageHasImage().returns("I see an image of a sunset.")
|
|
10677
|
+
*/
|
|
10678
|
+
whenMessageHasImage() {
|
|
10679
|
+
this.matchers.push((ctx) => ctx.messages.some((msg) => hasImageContent(msg.content)));
|
|
10680
|
+
return this;
|
|
10681
|
+
}
|
|
10682
|
+
/**
|
|
10683
|
+
* Match when any message contains audio.
|
|
10684
|
+
*
|
|
10685
|
+
* @example
|
|
10686
|
+
* mockLLM().whenMessageHasAudio().returns("I hear music playing.")
|
|
10687
|
+
*/
|
|
10688
|
+
whenMessageHasAudio() {
|
|
10689
|
+
this.matchers.push((ctx) => ctx.messages.some((msg) => hasAudioContent(msg.content)));
|
|
10690
|
+
return this;
|
|
10691
|
+
}
|
|
10692
|
+
/**
|
|
10693
|
+
* Match based on the number of images in the last message.
|
|
10694
|
+
*
|
|
10695
|
+
* @example
|
|
10696
|
+
* mockLLM().whenImageCount((n) => n >= 2).returns("Comparing multiple images...")
|
|
10697
|
+
*/
|
|
10698
|
+
whenImageCount(predicate) {
|
|
10699
|
+
this.matchers.push((ctx) => {
|
|
10700
|
+
const lastMsg = ctx.messages[ctx.messages.length - 1];
|
|
10701
|
+
if (!lastMsg) return false;
|
|
10702
|
+
return predicate(countImages(lastMsg.content));
|
|
10703
|
+
});
|
|
10704
|
+
return this;
|
|
10705
|
+
}
|
|
8552
10706
|
/**
|
|
8553
10707
|
* Set the text response to return.
|
|
8554
10708
|
* Can be a static string or a function that returns a string dynamically.
|
|
@@ -8558,17 +10712,17 @@ var MockBuilder = class {
|
|
|
8558
10712
|
* mockLLM().returns(() => `Response at ${Date.now()}`)
|
|
8559
10713
|
* mockLLM().returns((ctx) => `You said: ${ctx.messages[0]?.content}`)
|
|
8560
10714
|
*/
|
|
8561
|
-
returns(
|
|
8562
|
-
if (typeof
|
|
10715
|
+
returns(text3) {
|
|
10716
|
+
if (typeof text3 === "function") {
|
|
8563
10717
|
this.response = async (ctx) => {
|
|
8564
|
-
const resolvedText = await Promise.resolve().then(() =>
|
|
10718
|
+
const resolvedText = await Promise.resolve().then(() => text3(ctx));
|
|
8565
10719
|
return { text: resolvedText };
|
|
8566
10720
|
};
|
|
8567
10721
|
} else {
|
|
8568
10722
|
if (typeof this.response === "function") {
|
|
8569
10723
|
throw new Error("Cannot use returns() after withResponse() with a function");
|
|
8570
10724
|
}
|
|
8571
|
-
this.response.text =
|
|
10725
|
+
this.response.text = text3;
|
|
8572
10726
|
}
|
|
8573
10727
|
return this;
|
|
8574
10728
|
}
|
|
@@ -8605,6 +10759,112 @@ var MockBuilder = class {
|
|
|
8605
10759
|
this.response.gadgetCalls.push({ gadgetName, parameters });
|
|
8606
10760
|
return this;
|
|
8607
10761
|
}
|
|
10762
|
+
// ==========================================================================
|
|
10763
|
+
// Multimodal Response Helpers
|
|
10764
|
+
// ==========================================================================
|
|
10765
|
+
/**
|
|
10766
|
+
* Return a single image in the response.
|
|
10767
|
+
* Useful for mocking image generation endpoints.
|
|
10768
|
+
*
|
|
10769
|
+
* @param data - Image data (base64 string or Buffer)
|
|
10770
|
+
* @param mimeType - MIME type (auto-detected if Buffer provided without type)
|
|
10771
|
+
*
|
|
10772
|
+
* @example
|
|
10773
|
+
* mockLLM()
|
|
10774
|
+
* .forModel('dall-e-3')
|
|
10775
|
+
* .returnsImage(pngBuffer)
|
|
10776
|
+
* .register();
|
|
10777
|
+
*/
|
|
10778
|
+
returnsImage(data, mimeType) {
|
|
10779
|
+
if (typeof this.response === "function") {
|
|
10780
|
+
throw new Error("Cannot use returnsImage() after withResponse() with a function");
|
|
10781
|
+
}
|
|
10782
|
+
let imageData;
|
|
10783
|
+
let imageMime;
|
|
10784
|
+
if (typeof data === "string") {
|
|
10785
|
+
imageData = data;
|
|
10786
|
+
if (!mimeType) {
|
|
10787
|
+
throw new Error("MIME type is required when providing base64 string data");
|
|
10788
|
+
}
|
|
10789
|
+
imageMime = mimeType;
|
|
10790
|
+
} else {
|
|
10791
|
+
imageData = toBase64(data);
|
|
10792
|
+
const detected = mimeType ?? detectImageMimeType(data);
|
|
10793
|
+
if (!detected) {
|
|
10794
|
+
throw new Error(
|
|
10795
|
+
"Could not detect image MIME type. Please provide the mimeType parameter explicitly."
|
|
10796
|
+
);
|
|
10797
|
+
}
|
|
10798
|
+
imageMime = detected;
|
|
10799
|
+
}
|
|
10800
|
+
if (!this.response.images) {
|
|
10801
|
+
this.response.images = [];
|
|
10802
|
+
}
|
|
10803
|
+
this.response.images.push({ data: imageData, mimeType: imageMime });
|
|
10804
|
+
return this;
|
|
10805
|
+
}
|
|
10806
|
+
/**
|
|
10807
|
+
* Return multiple images in the response.
|
|
10808
|
+
*
|
|
10809
|
+
* @example
|
|
10810
|
+
* mockLLM()
|
|
10811
|
+
* .forModel('dall-e-3')
|
|
10812
|
+
* .returnsImages([
|
|
10813
|
+
* { data: pngBuffer1 },
|
|
10814
|
+
* { data: pngBuffer2 },
|
|
10815
|
+
* ])
|
|
10816
|
+
* .register();
|
|
10817
|
+
*/
|
|
10818
|
+
returnsImages(images) {
|
|
10819
|
+
for (const img of images) {
|
|
10820
|
+
this.returnsImage(img.data, img.mimeType);
|
|
10821
|
+
if (img.revisedPrompt && this.response && typeof this.response !== "function") {
|
|
10822
|
+
const lastImage = this.response.images?.[this.response.images.length - 1];
|
|
10823
|
+
if (lastImage) {
|
|
10824
|
+
lastImage.revisedPrompt = img.revisedPrompt;
|
|
10825
|
+
}
|
|
10826
|
+
}
|
|
10827
|
+
}
|
|
10828
|
+
return this;
|
|
10829
|
+
}
|
|
10830
|
+
/**
|
|
10831
|
+
* Return audio data in the response.
|
|
10832
|
+
* Useful for mocking speech synthesis endpoints.
|
|
10833
|
+
*
|
|
10834
|
+
* @param data - Audio data (base64 string or Buffer)
|
|
10835
|
+
* @param mimeType - MIME type (auto-detected if Buffer provided without type)
|
|
10836
|
+
*
|
|
10837
|
+
* @example
|
|
10838
|
+
* mockLLM()
|
|
10839
|
+
* .forModel('tts-1')
|
|
10840
|
+
* .returnsAudio(mp3Buffer)
|
|
10841
|
+
* .register();
|
|
10842
|
+
*/
|
|
10843
|
+
returnsAudio(data, mimeType) {
|
|
10844
|
+
if (typeof this.response === "function") {
|
|
10845
|
+
throw new Error("Cannot use returnsAudio() after withResponse() with a function");
|
|
10846
|
+
}
|
|
10847
|
+
let audioData;
|
|
10848
|
+
let audioMime;
|
|
10849
|
+
if (typeof data === "string") {
|
|
10850
|
+
audioData = data;
|
|
10851
|
+
if (!mimeType) {
|
|
10852
|
+
throw new Error("MIME type is required when providing base64 string data");
|
|
10853
|
+
}
|
|
10854
|
+
audioMime = mimeType;
|
|
10855
|
+
} else {
|
|
10856
|
+
audioData = toBase64(data);
|
|
10857
|
+
const detected = mimeType ?? detectAudioMimeType(data);
|
|
10858
|
+
if (!detected) {
|
|
10859
|
+
throw new Error(
|
|
10860
|
+
"Could not detect audio MIME type. Please provide the mimeType parameter explicitly."
|
|
10861
|
+
);
|
|
10862
|
+
}
|
|
10863
|
+
audioMime = detected;
|
|
10864
|
+
}
|
|
10865
|
+
this.response.audio = { data: audioData, mimeType: audioMime };
|
|
10866
|
+
return this;
|
|
10867
|
+
}
|
|
8608
10868
|
/**
|
|
8609
10869
|
* Set the complete mock response object.
|
|
8610
10870
|
* This allows full control over all response properties.
|
|
@@ -8818,6 +11078,8 @@ var import_node_stream = require("stream");
|
|
|
8818
11078
|
StreamParser,
|
|
8819
11079
|
StreamProcessor,
|
|
8820
11080
|
SummarizationStrategy,
|
|
11081
|
+
audioFromBase64,
|
|
11082
|
+
audioFromBuffer,
|
|
8821
11083
|
collectEvents,
|
|
8822
11084
|
collectText,
|
|
8823
11085
|
complete,
|
|
@@ -8833,20 +11095,34 @@ var import_node_stream = require("stream");
|
|
|
8833
11095
|
createOpenAIProviderFromEnv,
|
|
8834
11096
|
createTextMockStream,
|
|
8835
11097
|
defaultLogger,
|
|
11098
|
+
detectAudioMimeType,
|
|
11099
|
+
detectImageMimeType,
|
|
8836
11100
|
discoverProviderAdapters,
|
|
11101
|
+
extractText,
|
|
8837
11102
|
getMockManager,
|
|
8838
11103
|
getModelId,
|
|
8839
11104
|
getProvider,
|
|
8840
11105
|
hasProviderPrefix,
|
|
11106
|
+
imageFromBase64,
|
|
11107
|
+
imageFromBuffer,
|
|
11108
|
+
imageFromUrl,
|
|
11109
|
+
isAudioPart,
|
|
11110
|
+
isDataUrl,
|
|
11111
|
+
isImagePart,
|
|
11112
|
+
isTextPart,
|
|
8841
11113
|
iterationProgressHint,
|
|
8842
11114
|
mockLLM,
|
|
11115
|
+
normalizeContent,
|
|
8843
11116
|
parallelGadgetHint,
|
|
11117
|
+
parseDataUrl,
|
|
8844
11118
|
resolveHintTemplate,
|
|
8845
11119
|
resolveModel,
|
|
8846
11120
|
resolvePromptTemplate,
|
|
8847
11121
|
resolveRulesTemplate,
|
|
8848
11122
|
runWithHandlers,
|
|
8849
11123
|
stream,
|
|
11124
|
+
text,
|
|
11125
|
+
toBase64,
|
|
8850
11126
|
validateAndApplyDefaults,
|
|
8851
11127
|
validateGadgetParams,
|
|
8852
11128
|
z
|