llmist 2.3.0 → 2.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -0
- package/dist/{chunk-ZDNV7DDO.js → chunk-IHSZUAYN.js} +4 -2
- package/dist/chunk-IHSZUAYN.js.map +1 -0
- package/dist/{chunk-GANXNBIZ.js → chunk-YHS2DYXP.js} +2839 -579
- package/dist/chunk-YHS2DYXP.js.map +1 -0
- package/dist/cli.cjs +2717 -198
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.js +638 -47
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +2496 -220
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +109 -20
- package/dist/index.d.ts +109 -20
- package/dist/index.js +34 -2
- package/dist/{mock-stream-wRfUqXx4.d.cts → mock-stream-ga4KIiwX.d.cts} +1121 -12
- package/dist/{mock-stream-wRfUqXx4.d.ts → mock-stream-ga4KIiwX.d.ts} +1121 -12
- package/dist/testing/index.cjs +2771 -559
- package/dist/testing/index.cjs.map +1 -1
- package/dist/testing/index.d.cts +2 -2
- package/dist/testing/index.d.ts +2 -2
- package/dist/testing/index.js +1 -1
- package/package.json +1 -1
- package/dist/chunk-GANXNBIZ.js.map +0 -1
- package/dist/chunk-ZDNV7DDO.js.map +0 -1
package/dist/cli.cjs
CHANGED
|
@@ -46,6 +46,137 @@ var init_constants = __esm({
|
|
|
46
46
|
}
|
|
47
47
|
});
|
|
48
48
|
|
|
49
|
+
// src/core/input-content.ts
|
|
50
|
+
function text(content) {
|
|
51
|
+
return { type: "text", text: content };
|
|
52
|
+
}
|
|
53
|
+
function imageFromUrl(url) {
|
|
54
|
+
return {
|
|
55
|
+
type: "image",
|
|
56
|
+
source: { type: "url", url }
|
|
57
|
+
};
|
|
58
|
+
}
|
|
59
|
+
function detectImageMimeType(data) {
|
|
60
|
+
const bytes = data instanceof Buffer ? data : Buffer.from(data);
|
|
61
|
+
for (const { bytes: magic, mimeType } of IMAGE_MAGIC_BYTES) {
|
|
62
|
+
if (bytes.length >= magic.length) {
|
|
63
|
+
let matches = true;
|
|
64
|
+
for (let i = 0; i < magic.length; i++) {
|
|
65
|
+
if (bytes[i] !== magic[i]) {
|
|
66
|
+
matches = false;
|
|
67
|
+
break;
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
if (matches) {
|
|
71
|
+
if (mimeType === "image/webp") {
|
|
72
|
+
if (bytes.length >= 12) {
|
|
73
|
+
const webpMarker = bytes[8] === 87 && bytes[9] === 69 && bytes[10] === 66 && bytes[11] === 80;
|
|
74
|
+
if (!webpMarker) continue;
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
return mimeType;
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
return null;
|
|
82
|
+
}
|
|
83
|
+
function detectAudioMimeType(data) {
|
|
84
|
+
const bytes = data instanceof Buffer ? data : Buffer.from(data);
|
|
85
|
+
for (const { bytes: magic, mimeType } of AUDIO_MAGIC_BYTES) {
|
|
86
|
+
if (bytes.length >= magic.length) {
|
|
87
|
+
let matches = true;
|
|
88
|
+
for (let i = 0; i < magic.length; i++) {
|
|
89
|
+
if (bytes[i] !== magic[i]) {
|
|
90
|
+
matches = false;
|
|
91
|
+
break;
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
if (matches) {
|
|
95
|
+
if (mimeType === "audio/wav") {
|
|
96
|
+
if (bytes.length >= 12) {
|
|
97
|
+
const waveMarker = bytes[8] === 87 && bytes[9] === 65 && bytes[10] === 86 && bytes[11] === 69;
|
|
98
|
+
if (!waveMarker) continue;
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
return mimeType;
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
return null;
|
|
106
|
+
}
|
|
107
|
+
function toBase64(data) {
|
|
108
|
+
if (typeof data === "string") {
|
|
109
|
+
return data;
|
|
110
|
+
}
|
|
111
|
+
return Buffer.from(data).toString("base64");
|
|
112
|
+
}
|
|
113
|
+
function imageFromBuffer(buffer, mediaType) {
|
|
114
|
+
const detectedType = mediaType ?? detectImageMimeType(buffer);
|
|
115
|
+
if (!detectedType) {
|
|
116
|
+
throw new Error(
|
|
117
|
+
"Could not detect image MIME type. Please provide the mediaType parameter explicitly."
|
|
118
|
+
);
|
|
119
|
+
}
|
|
120
|
+
return {
|
|
121
|
+
type: "image",
|
|
122
|
+
source: {
|
|
123
|
+
type: "base64",
|
|
124
|
+
mediaType: detectedType,
|
|
125
|
+
data: toBase64(buffer)
|
|
126
|
+
}
|
|
127
|
+
};
|
|
128
|
+
}
|
|
129
|
+
function audioFromBuffer(buffer, mediaType) {
|
|
130
|
+
const detectedType = mediaType ?? detectAudioMimeType(buffer);
|
|
131
|
+
if (!detectedType) {
|
|
132
|
+
throw new Error(
|
|
133
|
+
"Could not detect audio MIME type. Please provide the mediaType parameter explicitly."
|
|
134
|
+
);
|
|
135
|
+
}
|
|
136
|
+
return {
|
|
137
|
+
type: "audio",
|
|
138
|
+
source: {
|
|
139
|
+
type: "base64",
|
|
140
|
+
mediaType: detectedType,
|
|
141
|
+
data: toBase64(buffer)
|
|
142
|
+
}
|
|
143
|
+
};
|
|
144
|
+
}
|
|
145
|
+
function isDataUrl(input) {
|
|
146
|
+
return input.startsWith("data:");
|
|
147
|
+
}
|
|
148
|
+
function parseDataUrl(url) {
|
|
149
|
+
const match = url.match(/^data:([^;]+);base64,(.+)$/);
|
|
150
|
+
if (!match) return null;
|
|
151
|
+
return { mimeType: match[1], data: match[2] };
|
|
152
|
+
}
|
|
153
|
+
var IMAGE_MAGIC_BYTES, AUDIO_MAGIC_BYTES;
|
|
154
|
+
var init_input_content = __esm({
|
|
155
|
+
"src/core/input-content.ts"() {
|
|
156
|
+
"use strict";
|
|
157
|
+
IMAGE_MAGIC_BYTES = [
|
|
158
|
+
{ bytes: [255, 216, 255], mimeType: "image/jpeg" },
|
|
159
|
+
{ bytes: [137, 80, 78, 71], mimeType: "image/png" },
|
|
160
|
+
{ bytes: [71, 73, 70, 56], mimeType: "image/gif" },
|
|
161
|
+
// WebP starts with RIFF....WEBP
|
|
162
|
+
{ bytes: [82, 73, 70, 70], mimeType: "image/webp" }
|
|
163
|
+
];
|
|
164
|
+
AUDIO_MAGIC_BYTES = [
|
|
165
|
+
// MP3 frame sync
|
|
166
|
+
{ bytes: [255, 251], mimeType: "audio/mp3" },
|
|
167
|
+
{ bytes: [255, 250], mimeType: "audio/mp3" },
|
|
168
|
+
// ID3 tag (MP3)
|
|
169
|
+
{ bytes: [73, 68, 51], mimeType: "audio/mp3" },
|
|
170
|
+
// OGG
|
|
171
|
+
{ bytes: [79, 103, 103, 83], mimeType: "audio/ogg" },
|
|
172
|
+
// WAV (RIFF)
|
|
173
|
+
{ bytes: [82, 73, 70, 70], mimeType: "audio/wav" },
|
|
174
|
+
// WebM
|
|
175
|
+
{ bytes: [26, 69, 223, 163], mimeType: "audio/webm" }
|
|
176
|
+
];
|
|
177
|
+
}
|
|
178
|
+
});
|
|
179
|
+
|
|
49
180
|
// src/core/model-shortcuts.ts
|
|
50
181
|
function isKnownModelPattern(model) {
|
|
51
182
|
const normalized = model.toLowerCase();
|
|
@@ -375,7 +506,9 @@ var init_prompt_config = __esm({
|
|
|
375
506
|
rules: () => [
|
|
376
507
|
"Output ONLY plain text with the exact markers - never use function/tool calling",
|
|
377
508
|
"You can invoke multiple gadgets in a single response",
|
|
378
|
-
"
|
|
509
|
+
"Gadgets without dependencies execute immediately (in parallel if multiple)",
|
|
510
|
+
"Use :invocation_id:dep1,dep2 syntax when a gadget needs results from prior gadgets",
|
|
511
|
+
"If any dependency fails, dependent gadgets are automatically skipped"
|
|
379
512
|
],
|
|
380
513
|
customExamples: null
|
|
381
514
|
};
|
|
@@ -383,11 +516,24 @@ var init_prompt_config = __esm({
|
|
|
383
516
|
});
|
|
384
517
|
|
|
385
518
|
// src/core/messages.ts
|
|
519
|
+
function normalizeContent(content) {
|
|
520
|
+
if (typeof content === "string") {
|
|
521
|
+
return [{ type: "text", text: content }];
|
|
522
|
+
}
|
|
523
|
+
return content;
|
|
524
|
+
}
|
|
525
|
+
function extractText(content) {
|
|
526
|
+
if (typeof content === "string") {
|
|
527
|
+
return content;
|
|
528
|
+
}
|
|
529
|
+
return content.filter((part) => part.type === "text").map((part) => part.text).join("");
|
|
530
|
+
}
|
|
386
531
|
var LLMMessageBuilder;
|
|
387
532
|
var init_messages = __esm({
|
|
388
533
|
"src/core/messages.ts"() {
|
|
389
534
|
"use strict";
|
|
390
535
|
init_constants();
|
|
536
|
+
init_input_content();
|
|
391
537
|
init_prompt_config();
|
|
392
538
|
LLMMessageBuilder = class {
|
|
393
539
|
messages = [];
|
|
@@ -489,6 +635,10 @@ CRITICAL: ${criticalUsage}
|
|
|
489
635
|
parts.push(`
|
|
490
636
|
1. Start marker: ${this.startPrefix}gadget_name`);
|
|
491
637
|
parts.push(`
|
|
638
|
+
With ID: ${this.startPrefix}gadget_name:my_id`);
|
|
639
|
+
parts.push(`
|
|
640
|
+
With dependencies: ${this.startPrefix}gadget_name:my_id:dep1,dep2`);
|
|
641
|
+
parts.push(`
|
|
492
642
|
2. ${formatDescription}`);
|
|
493
643
|
parts.push(`
|
|
494
644
|
3. End marker: ${this.endPrefix}`);
|
|
@@ -538,6 +688,25 @@ ${this.endPrefix}`;
|
|
|
538
688
|
EXAMPLE (Multiple Gadgets):
|
|
539
689
|
|
|
540
690
|
${multipleExample}`);
|
|
691
|
+
const dependencyExample = `${this.startPrefix}fetch_data:fetch_1
|
|
692
|
+
${this.argPrefix}url
|
|
693
|
+
https://api.example.com/users
|
|
694
|
+
${this.endPrefix}
|
|
695
|
+
${this.startPrefix}fetch_data:fetch_2
|
|
696
|
+
${this.argPrefix}url
|
|
697
|
+
https://api.example.com/orders
|
|
698
|
+
${this.endPrefix}
|
|
699
|
+
${this.startPrefix}merge_data:merge_1:fetch_1,fetch_2
|
|
700
|
+
${this.argPrefix}format
|
|
701
|
+
json
|
|
702
|
+
${this.endPrefix}`;
|
|
703
|
+
parts.push(`
|
|
704
|
+
|
|
705
|
+
EXAMPLE (With Dependencies):
|
|
706
|
+
merge_1 waits for fetch_1 AND fetch_2 to complete.
|
|
707
|
+
If either fails, merge_1 is automatically skipped.
|
|
708
|
+
|
|
709
|
+
${dependencyExample}`);
|
|
541
710
|
parts.push(`
|
|
542
711
|
|
|
543
712
|
BLOCK FORMAT SYNTAX:
|
|
@@ -588,6 +757,25 @@ Produces: { "items": ["first", "second"] }`);
|
|
|
588
757
|
}
|
|
589
758
|
return parts.join("");
|
|
590
759
|
}
|
|
760
|
+
/**
|
|
761
|
+
* Add a user message.
|
|
762
|
+
* Content can be a string (text only) or an array of content parts (multimodal).
|
|
763
|
+
*
|
|
764
|
+
* @param content - Message content
|
|
765
|
+
* @param metadata - Optional metadata
|
|
766
|
+
*
|
|
767
|
+
* @example
|
|
768
|
+
* ```typescript
|
|
769
|
+
* // Text only
|
|
770
|
+
* builder.addUser("Hello!");
|
|
771
|
+
*
|
|
772
|
+
* // Multimodal
|
|
773
|
+
* builder.addUser([
|
|
774
|
+
* text("What's in this image?"),
|
|
775
|
+
* imageFromBuffer(imageData),
|
|
776
|
+
* ]);
|
|
777
|
+
* ```
|
|
778
|
+
*/
|
|
591
779
|
addUser(content, metadata) {
|
|
592
780
|
this.messages.push({ role: "user", content, metadata });
|
|
593
781
|
return this;
|
|
@@ -596,6 +784,104 @@ Produces: { "items": ["first", "second"] }`);
|
|
|
596
784
|
this.messages.push({ role: "assistant", content, metadata });
|
|
597
785
|
return this;
|
|
598
786
|
}
|
|
787
|
+
/**
|
|
788
|
+
* Add a user message with an image attachment.
|
|
789
|
+
*
|
|
790
|
+
* @param textContent - Text prompt
|
|
791
|
+
* @param imageData - Image data (Buffer, Uint8Array, or base64 string)
|
|
792
|
+
* @param mimeType - Optional MIME type (auto-detected if not provided)
|
|
793
|
+
*
|
|
794
|
+
* @example
|
|
795
|
+
* ```typescript
|
|
796
|
+
* builder.addUserWithImage(
|
|
797
|
+
* "What's in this image?",
|
|
798
|
+
* await fs.readFile("photo.jpg"),
|
|
799
|
+
* "image/jpeg" // Optional - auto-detected
|
|
800
|
+
* );
|
|
801
|
+
* ```
|
|
802
|
+
*/
|
|
803
|
+
addUserWithImage(textContent, imageData, mimeType) {
|
|
804
|
+
const imageBuffer = typeof imageData === "string" ? Buffer.from(imageData, "base64") : imageData;
|
|
805
|
+
const detectedMime = mimeType ?? detectImageMimeType(imageBuffer);
|
|
806
|
+
if (!detectedMime) {
|
|
807
|
+
throw new Error(
|
|
808
|
+
"Could not detect image MIME type. Please provide the mimeType parameter explicitly."
|
|
809
|
+
);
|
|
810
|
+
}
|
|
811
|
+
const content = [
|
|
812
|
+
text(textContent),
|
|
813
|
+
{
|
|
814
|
+
type: "image",
|
|
815
|
+
source: {
|
|
816
|
+
type: "base64",
|
|
817
|
+
mediaType: detectedMime,
|
|
818
|
+
data: toBase64(imageBuffer)
|
|
819
|
+
}
|
|
820
|
+
}
|
|
821
|
+
];
|
|
822
|
+
this.messages.push({ role: "user", content });
|
|
823
|
+
return this;
|
|
824
|
+
}
|
|
825
|
+
/**
|
|
826
|
+
* Add a user message with an image URL (OpenAI only).
|
|
827
|
+
*
|
|
828
|
+
* @param textContent - Text prompt
|
|
829
|
+
* @param imageUrl - URL to the image
|
|
830
|
+
*
|
|
831
|
+
* @example
|
|
832
|
+
* ```typescript
|
|
833
|
+
* builder.addUserWithImageUrl(
|
|
834
|
+
* "What's in this image?",
|
|
835
|
+
* "https://example.com/image.jpg"
|
|
836
|
+
* );
|
|
837
|
+
* ```
|
|
838
|
+
*/
|
|
839
|
+
addUserWithImageUrl(textContent, imageUrl) {
|
|
840
|
+
const content = [text(textContent), imageFromUrl(imageUrl)];
|
|
841
|
+
this.messages.push({ role: "user", content });
|
|
842
|
+
return this;
|
|
843
|
+
}
|
|
844
|
+
/**
|
|
845
|
+
* Add a user message with an audio attachment (Gemini only).
|
|
846
|
+
*
|
|
847
|
+
* @param textContent - Text prompt
|
|
848
|
+
* @param audioData - Audio data (Buffer, Uint8Array, or base64 string)
|
|
849
|
+
* @param mimeType - Optional MIME type (auto-detected if not provided)
|
|
850
|
+
*
|
|
851
|
+
* @example
|
|
852
|
+
* ```typescript
|
|
853
|
+
* builder.addUserWithAudio(
|
|
854
|
+
* "Transcribe this audio",
|
|
855
|
+
* await fs.readFile("recording.mp3"),
|
|
856
|
+
* "audio/mp3" // Optional - auto-detected
|
|
857
|
+
* );
|
|
858
|
+
* ```
|
|
859
|
+
*/
|
|
860
|
+
addUserWithAudio(textContent, audioData, mimeType) {
|
|
861
|
+
const audioBuffer = typeof audioData === "string" ? Buffer.from(audioData, "base64") : audioData;
|
|
862
|
+
const content = [text(textContent), audioFromBuffer(audioBuffer, mimeType)];
|
|
863
|
+
this.messages.push({ role: "user", content });
|
|
864
|
+
return this;
|
|
865
|
+
}
|
|
866
|
+
/**
|
|
867
|
+
* Add a user message with multiple content parts.
|
|
868
|
+
* Provides full flexibility for complex multimodal messages.
|
|
869
|
+
*
|
|
870
|
+
* @param parts - Array of content parts
|
|
871
|
+
*
|
|
872
|
+
* @example
|
|
873
|
+
* ```typescript
|
|
874
|
+
* builder.addUserMultimodal([
|
|
875
|
+
* text("Compare these images:"),
|
|
876
|
+
* imageFromBuffer(image1),
|
|
877
|
+
* imageFromBuffer(image2),
|
|
878
|
+
* ]);
|
|
879
|
+
* ```
|
|
880
|
+
*/
|
|
881
|
+
addUserMultimodal(parts) {
|
|
882
|
+
this.messages.push({ role: "user", content: parts });
|
|
883
|
+
return this;
|
|
884
|
+
}
|
|
599
885
|
addGadgetCall(gadget, parameters, result) {
|
|
600
886
|
const paramStr = this.formatBlockParameters(parameters, "");
|
|
601
887
|
this.messages.push({
|
|
@@ -1914,7 +2200,7 @@ var init_conversation_manager = __esm({
|
|
|
1914
2200
|
if (msg.role === "user") {
|
|
1915
2201
|
this.historyBuilder.addUser(msg.content);
|
|
1916
2202
|
} else if (msg.role === "assistant") {
|
|
1917
|
-
this.historyBuilder.addAssistant(msg.content);
|
|
2203
|
+
this.historyBuilder.addAssistant(extractText(msg.content));
|
|
1918
2204
|
}
|
|
1919
2205
|
}
|
|
1920
2206
|
}
|
|
@@ -1935,8 +2221,10 @@ async function runWithHandlers(agentGenerator, handlers) {
|
|
|
1935
2221
|
if (handlers.onGadgetCall) {
|
|
1936
2222
|
await handlers.onGadgetCall({
|
|
1937
2223
|
gadgetName: event.call.gadgetName,
|
|
2224
|
+
invocationId: event.call.invocationId,
|
|
1938
2225
|
parameters: event.call.parameters,
|
|
1939
|
-
parametersRaw: event.call.parametersRaw
|
|
2226
|
+
parametersRaw: event.call.parametersRaw,
|
|
2227
|
+
dependencies: event.call.dependencies
|
|
1940
2228
|
});
|
|
1941
2229
|
}
|
|
1942
2230
|
break;
|
|
@@ -2498,7 +2786,27 @@ var init_cost_reporting_client = __esm({
|
|
|
2498
2786
|
constructor(client, reportCost) {
|
|
2499
2787
|
this.client = client;
|
|
2500
2788
|
this.reportCost = reportCost;
|
|
2789
|
+
this.image = {
|
|
2790
|
+
generate: async (options) => {
|
|
2791
|
+
const result = await this.client.image.generate(options);
|
|
2792
|
+
if (result.cost !== void 0 && result.cost > 0) {
|
|
2793
|
+
this.reportCost(result.cost);
|
|
2794
|
+
}
|
|
2795
|
+
return result;
|
|
2796
|
+
}
|
|
2797
|
+
};
|
|
2798
|
+
this.speech = {
|
|
2799
|
+
generate: async (options) => {
|
|
2800
|
+
const result = await this.client.speech.generate(options);
|
|
2801
|
+
if (result.cost !== void 0 && result.cost > 0) {
|
|
2802
|
+
this.reportCost(result.cost);
|
|
2803
|
+
}
|
|
2804
|
+
return result;
|
|
2805
|
+
}
|
|
2806
|
+
};
|
|
2501
2807
|
}
|
|
2808
|
+
image;
|
|
2809
|
+
speech;
|
|
2502
2810
|
/**
|
|
2503
2811
|
* Access to model registry for cost estimation.
|
|
2504
2812
|
*/
|
|
@@ -2763,15 +3071,37 @@ var init_parser = __esm({
|
|
|
2763
3071
|
return segment.trim().length > 0 ? segment : void 0;
|
|
2764
3072
|
}
|
|
2765
3073
|
/**
|
|
2766
|
-
* Parse gadget name
|
|
2767
|
-
*
|
|
3074
|
+
* Parse gadget name with optional invocation ID and dependencies.
|
|
3075
|
+
*
|
|
3076
|
+
* Supported formats:
|
|
3077
|
+
* - `GadgetName` - Auto-generate ID, no dependencies
|
|
3078
|
+
* - `GadgetName:my_id` - Explicit ID, no dependencies
|
|
3079
|
+
* - `GadgetName:my_id:dep1,dep2` - Explicit ID with dependencies
|
|
3080
|
+
*
|
|
3081
|
+
* Dependencies must be comma-separated invocation IDs.
|
|
2768
3082
|
*/
|
|
2769
3083
|
parseGadgetName(gadgetName) {
|
|
2770
|
-
|
|
2771
|
-
|
|
2772
|
-
return {
|
|
3084
|
+
const parts = gadgetName.split(":");
|
|
3085
|
+
if (parts.length === 1) {
|
|
3086
|
+
return {
|
|
3087
|
+
actualName: parts[0],
|
|
3088
|
+
invocationId: `gadget_${++globalInvocationCounter}`,
|
|
3089
|
+
dependencies: []
|
|
3090
|
+
};
|
|
3091
|
+
} else if (parts.length === 2) {
|
|
3092
|
+
return {
|
|
3093
|
+
actualName: parts[0],
|
|
3094
|
+
invocationId: parts[1].trim(),
|
|
3095
|
+
dependencies: []
|
|
3096
|
+
};
|
|
3097
|
+
} else {
|
|
3098
|
+
const deps = parts[2].split(",").map((d) => d.trim()).filter((d) => d.length > 0);
|
|
3099
|
+
return {
|
|
3100
|
+
actualName: parts[0],
|
|
3101
|
+
invocationId: parts[1].trim(),
|
|
3102
|
+
dependencies: deps
|
|
3103
|
+
};
|
|
2773
3104
|
}
|
|
2774
|
-
return { actualName: gadgetName, invocationId: `gadget_${++globalInvocationCounter}` };
|
|
2775
3105
|
}
|
|
2776
3106
|
/**
|
|
2777
3107
|
* Extract the error message from a parse error.
|
|
@@ -2807,39 +3137,20 @@ var init_parser = __esm({
|
|
|
2807
3137
|
const metadataEndIndex = this.buffer.indexOf("\n", metadataStartIndex);
|
|
2808
3138
|
if (metadataEndIndex === -1) break;
|
|
2809
3139
|
const gadgetName = this.buffer.substring(metadataStartIndex, metadataEndIndex).trim();
|
|
2810
|
-
const { actualName: actualGadgetName, invocationId } = this.parseGadgetName(gadgetName);
|
|
3140
|
+
const { actualName: actualGadgetName, invocationId, dependencies } = this.parseGadgetName(gadgetName);
|
|
2811
3141
|
const contentStartIndex = metadataEndIndex + 1;
|
|
2812
3142
|
let partEndIndex;
|
|
2813
3143
|
let endMarkerLength = 0;
|
|
2814
|
-
|
|
2815
|
-
|
|
2816
|
-
|
|
2817
|
-
|
|
2818
|
-
endMarkerLength =
|
|
3144
|
+
const nextStartPos = this.buffer.indexOf(this.startPrefix, contentStartIndex);
|
|
3145
|
+
const endPos = this.buffer.indexOf(this.endPrefix, contentStartIndex);
|
|
3146
|
+
if (nextStartPos !== -1 && (endPos === -1 || nextStartPos < endPos)) {
|
|
3147
|
+
partEndIndex = nextStartPos;
|
|
3148
|
+
endMarkerLength = 0;
|
|
3149
|
+
} else if (endPos !== -1) {
|
|
3150
|
+
partEndIndex = endPos;
|
|
3151
|
+
endMarkerLength = this.endPrefix.length;
|
|
2819
3152
|
} else {
|
|
2820
|
-
|
|
2821
|
-
let validEndPos = -1;
|
|
2822
|
-
let searchPos = contentStartIndex;
|
|
2823
|
-
while (true) {
|
|
2824
|
-
const endPos = this.buffer.indexOf(this.endPrefix, searchPos);
|
|
2825
|
-
if (endPos === -1) break;
|
|
2826
|
-
const afterEnd = this.buffer.substring(endPos + this.endPrefix.length);
|
|
2827
|
-
if (afterEnd.startsWith("\n") || afterEnd.startsWith("\r") || afterEnd.startsWith(this.startPrefix) || afterEnd.length === 0) {
|
|
2828
|
-
validEndPos = endPos;
|
|
2829
|
-
break;
|
|
2830
|
-
} else {
|
|
2831
|
-
searchPos = endPos + this.endPrefix.length;
|
|
2832
|
-
}
|
|
2833
|
-
}
|
|
2834
|
-
if (nextStartPos !== -1 && (validEndPos === -1 || nextStartPos < validEndPos)) {
|
|
2835
|
-
partEndIndex = nextStartPos;
|
|
2836
|
-
endMarkerLength = 0;
|
|
2837
|
-
} else if (validEndPos !== -1) {
|
|
2838
|
-
partEndIndex = validEndPos;
|
|
2839
|
-
endMarkerLength = this.endPrefix.length;
|
|
2840
|
-
} else {
|
|
2841
|
-
break;
|
|
2842
|
-
}
|
|
3153
|
+
break;
|
|
2843
3154
|
}
|
|
2844
3155
|
const parametersRaw = this.buffer.substring(contentStartIndex, partEndIndex).trim();
|
|
2845
3156
|
const { parameters, parseError } = this.parseParameters(parametersRaw);
|
|
@@ -2850,7 +3161,8 @@ var init_parser = __esm({
|
|
|
2850
3161
|
invocationId,
|
|
2851
3162
|
parametersRaw,
|
|
2852
3163
|
parameters,
|
|
2853
|
-
parseError
|
|
3164
|
+
parseError,
|
|
3165
|
+
dependencies
|
|
2854
3166
|
}
|
|
2855
3167
|
};
|
|
2856
3168
|
startIndex = partEndIndex + endMarkerLength;
|
|
@@ -2873,7 +3185,7 @@ var init_parser = __esm({
|
|
|
2873
3185
|
const metadataEndIndex = this.buffer.indexOf("\n", metadataStartIndex);
|
|
2874
3186
|
if (metadataEndIndex !== -1) {
|
|
2875
3187
|
const gadgetName = this.buffer.substring(metadataStartIndex, metadataEndIndex).trim();
|
|
2876
|
-
const { actualName: actualGadgetName, invocationId } = this.parseGadgetName(gadgetName);
|
|
3188
|
+
const { actualName: actualGadgetName, invocationId, dependencies } = this.parseGadgetName(gadgetName);
|
|
2877
3189
|
const contentStartIndex = metadataEndIndex + 1;
|
|
2878
3190
|
const parametersRaw = this.buffer.substring(contentStartIndex).trim();
|
|
2879
3191
|
const { parameters, parseError } = this.parseParameters(parametersRaw);
|
|
@@ -2884,7 +3196,8 @@ var init_parser = __esm({
|
|
|
2884
3196
|
invocationId,
|
|
2885
3197
|
parametersRaw,
|
|
2886
3198
|
parameters,
|
|
2887
|
-
parseError
|
|
3199
|
+
parseError,
|
|
3200
|
+
dependencies
|
|
2888
3201
|
}
|
|
2889
3202
|
};
|
|
2890
3203
|
return;
|
|
@@ -3254,6 +3567,13 @@ var init_stream_processor = __esm({
|
|
|
3254
3567
|
accumulatedText = "";
|
|
3255
3568
|
shouldStopExecution = false;
|
|
3256
3569
|
observerFailureCount = 0;
|
|
3570
|
+
// Dependency tracking for gadget execution DAG
|
|
3571
|
+
/** Gadgets waiting for their dependencies to complete */
|
|
3572
|
+
pendingGadgets = /* @__PURE__ */ new Map();
|
|
3573
|
+
/** Completed gadget results, keyed by invocation ID */
|
|
3574
|
+
completedResults = /* @__PURE__ */ new Map();
|
|
3575
|
+
/** Invocation IDs of gadgets that have failed (error or skipped due to dependency) */
|
|
3576
|
+
failedInvocations = /* @__PURE__ */ new Set();
|
|
3257
3577
|
constructor(options) {
|
|
3258
3578
|
this.iteration = options.iteration;
|
|
3259
3579
|
this.registry = options.registry;
|
|
@@ -3354,6 +3674,16 @@ var init_stream_processor = __esm({
|
|
|
3354
3674
|
}
|
|
3355
3675
|
}
|
|
3356
3676
|
}
|
|
3677
|
+
const finalPendingEvents = await this.processPendingGadgets();
|
|
3678
|
+
outputs.push(...finalPendingEvents);
|
|
3679
|
+
if (finalPendingEvents.some((e) => e.type === "gadget_result")) {
|
|
3680
|
+
didExecuteGadgets = true;
|
|
3681
|
+
}
|
|
3682
|
+
for (const evt of finalPendingEvents) {
|
|
3683
|
+
if (evt.type === "gadget_result" && evt.result.breaksLoop) {
|
|
3684
|
+
shouldBreakLoop = true;
|
|
3685
|
+
}
|
|
3686
|
+
}
|
|
3357
3687
|
}
|
|
3358
3688
|
let finalMessage = this.accumulatedText;
|
|
3359
3689
|
if (this.hooks.interceptors?.interceptAssistantMessage) {
|
|
@@ -3405,7 +3735,11 @@ var init_stream_processor = __esm({
|
|
|
3405
3735
|
return [{ type: "text", content }];
|
|
3406
3736
|
}
|
|
3407
3737
|
/**
|
|
3408
|
-
* Process a gadget call through the full lifecycle.
|
|
3738
|
+
* Process a gadget call through the full lifecycle, handling dependencies.
|
|
3739
|
+
*
|
|
3740
|
+
* Gadgets without dependencies (or with all dependencies satisfied) execute immediately.
|
|
3741
|
+
* Gadgets with unsatisfied dependencies are queued for later execution.
|
|
3742
|
+
* After each execution, pending gadgets are checked to see if they can now run.
|
|
3409
3743
|
*/
|
|
3410
3744
|
async processGadgetCall(call) {
|
|
3411
3745
|
if (this.shouldStopExecution) {
|
|
@@ -3416,6 +3750,53 @@ var init_stream_processor = __esm({
|
|
|
3416
3750
|
}
|
|
3417
3751
|
const events = [];
|
|
3418
3752
|
events.push({ type: "gadget_call", call });
|
|
3753
|
+
if (call.dependencies.length > 0) {
|
|
3754
|
+
if (call.dependencies.includes(call.invocationId)) {
|
|
3755
|
+
this.logger.warn("Gadget has self-referential dependency (depends on itself)", {
|
|
3756
|
+
gadgetName: call.gadgetName,
|
|
3757
|
+
invocationId: call.invocationId
|
|
3758
|
+
});
|
|
3759
|
+
this.failedInvocations.add(call.invocationId);
|
|
3760
|
+
const skipEvent = {
|
|
3761
|
+
type: "gadget_skipped",
|
|
3762
|
+
gadgetName: call.gadgetName,
|
|
3763
|
+
invocationId: call.invocationId,
|
|
3764
|
+
parameters: call.parameters ?? {},
|
|
3765
|
+
failedDependency: call.invocationId,
|
|
3766
|
+
failedDependencyError: `Gadget "${call.invocationId}" cannot depend on itself (self-referential dependency)`
|
|
3767
|
+
};
|
|
3768
|
+
events.push(skipEvent);
|
|
3769
|
+
return events;
|
|
3770
|
+
}
|
|
3771
|
+
const failedDep = call.dependencies.find((dep) => this.failedInvocations.has(dep));
|
|
3772
|
+
if (failedDep) {
|
|
3773
|
+
const skipEvents = await this.handleFailedDependency(call, failedDep);
|
|
3774
|
+
events.push(...skipEvents);
|
|
3775
|
+
return events;
|
|
3776
|
+
}
|
|
3777
|
+
const unsatisfied = call.dependencies.filter((dep) => !this.completedResults.has(dep));
|
|
3778
|
+
if (unsatisfied.length > 0) {
|
|
3779
|
+
this.logger.debug("Queueing gadget for later - waiting on dependencies", {
|
|
3780
|
+
gadgetName: call.gadgetName,
|
|
3781
|
+
invocationId: call.invocationId,
|
|
3782
|
+
waitingOn: unsatisfied
|
|
3783
|
+
});
|
|
3784
|
+
this.pendingGadgets.set(call.invocationId, call);
|
|
3785
|
+
return events;
|
|
3786
|
+
}
|
|
3787
|
+
}
|
|
3788
|
+
const executeEvents = await this.executeGadgetWithHooks(call);
|
|
3789
|
+
events.push(...executeEvents);
|
|
3790
|
+
const triggeredEvents = await this.processPendingGadgets();
|
|
3791
|
+
events.push(...triggeredEvents);
|
|
3792
|
+
return events;
|
|
3793
|
+
}
|
|
3794
|
+
/**
|
|
3795
|
+
* Execute a gadget through the full hook lifecycle.
|
|
3796
|
+
* This is the core execution logic, extracted from processGadgetCall.
|
|
3797
|
+
*/
|
|
3798
|
+
async executeGadgetWithHooks(call) {
|
|
3799
|
+
const events = [];
|
|
3419
3800
|
if (call.parseError) {
|
|
3420
3801
|
this.logger.warn("Gadget has parse error", {
|
|
3421
3802
|
gadgetName: call.gadgetName,
|
|
@@ -3546,6 +3927,10 @@ var init_stream_processor = __esm({
|
|
|
3546
3927
|
});
|
|
3547
3928
|
}
|
|
3548
3929
|
await this.runObserversInParallel(completeObservers);
|
|
3930
|
+
this.completedResults.set(result.invocationId, result);
|
|
3931
|
+
if (result.error) {
|
|
3932
|
+
this.failedInvocations.add(result.invocationId);
|
|
3933
|
+
}
|
|
3549
3934
|
events.push({ type: "gadget_result", result });
|
|
3550
3935
|
if (result.error) {
|
|
3551
3936
|
const errorType = this.determineErrorType(call, result);
|
|
@@ -3561,6 +3946,162 @@ var init_stream_processor = __esm({
|
|
|
3561
3946
|
}
|
|
3562
3947
|
return events;
|
|
3563
3948
|
}
|
|
3949
|
+
/**
|
|
3950
|
+
* Handle a gadget that cannot execute because a dependency failed.
|
|
3951
|
+
* Calls the onDependencySkipped controller to allow customization.
|
|
3952
|
+
*/
|
|
3953
|
+
async handleFailedDependency(call, failedDep) {
|
|
3954
|
+
const events = [];
|
|
3955
|
+
const depResult = this.completedResults.get(failedDep);
|
|
3956
|
+
const depError = depResult?.error ?? "Dependency failed";
|
|
3957
|
+
let action = { action: "skip" };
|
|
3958
|
+
if (this.hooks.controllers?.onDependencySkipped) {
|
|
3959
|
+
const context = {
|
|
3960
|
+
iteration: this.iteration,
|
|
3961
|
+
gadgetName: call.gadgetName,
|
|
3962
|
+
invocationId: call.invocationId,
|
|
3963
|
+
parameters: call.parameters ?? {},
|
|
3964
|
+
failedDependency: failedDep,
|
|
3965
|
+
failedDependencyError: depError,
|
|
3966
|
+
logger: this.logger
|
|
3967
|
+
};
|
|
3968
|
+
action = await this.hooks.controllers.onDependencySkipped(context);
|
|
3969
|
+
}
|
|
3970
|
+
if (action.action === "skip") {
|
|
3971
|
+
this.failedInvocations.add(call.invocationId);
|
|
3972
|
+
const skipEvent = {
|
|
3973
|
+
type: "gadget_skipped",
|
|
3974
|
+
gadgetName: call.gadgetName,
|
|
3975
|
+
invocationId: call.invocationId,
|
|
3976
|
+
parameters: call.parameters ?? {},
|
|
3977
|
+
failedDependency: failedDep,
|
|
3978
|
+
failedDependencyError: depError
|
|
3979
|
+
};
|
|
3980
|
+
events.push(skipEvent);
|
|
3981
|
+
if (this.hooks.observers?.onGadgetSkipped) {
|
|
3982
|
+
const observeContext = {
|
|
3983
|
+
iteration: this.iteration,
|
|
3984
|
+
gadgetName: call.gadgetName,
|
|
3985
|
+
invocationId: call.invocationId,
|
|
3986
|
+
parameters: call.parameters ?? {},
|
|
3987
|
+
failedDependency: failedDep,
|
|
3988
|
+
failedDependencyError: depError,
|
|
3989
|
+
logger: this.logger
|
|
3990
|
+
};
|
|
3991
|
+
await this.safeObserve(() => this.hooks.observers.onGadgetSkipped(observeContext));
|
|
3992
|
+
}
|
|
3993
|
+
this.logger.info("Gadget skipped due to failed dependency", {
|
|
3994
|
+
gadgetName: call.gadgetName,
|
|
3995
|
+
invocationId: call.invocationId,
|
|
3996
|
+
failedDependency: failedDep
|
|
3997
|
+
});
|
|
3998
|
+
} else if (action.action === "execute_anyway") {
|
|
3999
|
+
this.logger.info("Executing gadget despite failed dependency (controller override)", {
|
|
4000
|
+
gadgetName: call.gadgetName,
|
|
4001
|
+
invocationId: call.invocationId,
|
|
4002
|
+
failedDependency: failedDep
|
|
4003
|
+
});
|
|
4004
|
+
const executeEvents = await this.executeGadgetWithHooks(call);
|
|
4005
|
+
events.push(...executeEvents);
|
|
4006
|
+
} else if (action.action === "use_fallback") {
|
|
4007
|
+
const fallbackResult = {
|
|
4008
|
+
gadgetName: call.gadgetName,
|
|
4009
|
+
invocationId: call.invocationId,
|
|
4010
|
+
parameters: call.parameters ?? {},
|
|
4011
|
+
result: action.fallbackResult,
|
|
4012
|
+
executionTimeMs: 0
|
|
4013
|
+
};
|
|
4014
|
+
this.completedResults.set(call.invocationId, fallbackResult);
|
|
4015
|
+
events.push({ type: "gadget_result", result: fallbackResult });
|
|
4016
|
+
this.logger.info("Using fallback result for gadget with failed dependency", {
|
|
4017
|
+
gadgetName: call.gadgetName,
|
|
4018
|
+
invocationId: call.invocationId,
|
|
4019
|
+
failedDependency: failedDep
|
|
4020
|
+
});
|
|
4021
|
+
}
|
|
4022
|
+
return events;
|
|
4023
|
+
}
|
|
4024
|
+
/**
|
|
4025
|
+
* Process pending gadgets whose dependencies are now satisfied.
|
|
4026
|
+
* Executes ready gadgets in parallel and continues until no more can be triggered.
|
|
4027
|
+
*/
|
|
4028
|
+
async processPendingGadgets() {
|
|
4029
|
+
const events = [];
|
|
4030
|
+
let progress = true;
|
|
4031
|
+
while (progress && this.pendingGadgets.size > 0) {
|
|
4032
|
+
progress = false;
|
|
4033
|
+
const readyToExecute = [];
|
|
4034
|
+
const readyToSkip = [];
|
|
4035
|
+
for (const [invocationId, call] of this.pendingGadgets) {
|
|
4036
|
+
const failedDep = call.dependencies.find((dep) => this.failedInvocations.has(dep));
|
|
4037
|
+
if (failedDep) {
|
|
4038
|
+
readyToSkip.push({ call, failedDep });
|
|
4039
|
+
continue;
|
|
4040
|
+
}
|
|
4041
|
+
const allSatisfied = call.dependencies.every((dep) => this.completedResults.has(dep));
|
|
4042
|
+
if (allSatisfied) {
|
|
4043
|
+
readyToExecute.push(call);
|
|
4044
|
+
}
|
|
4045
|
+
}
|
|
4046
|
+
for (const { call, failedDep } of readyToSkip) {
|
|
4047
|
+
this.pendingGadgets.delete(call.invocationId);
|
|
4048
|
+
const skipEvents = await this.handleFailedDependency(call, failedDep);
|
|
4049
|
+
events.push(...skipEvents);
|
|
4050
|
+
progress = true;
|
|
4051
|
+
}
|
|
4052
|
+
if (readyToExecute.length > 0) {
|
|
4053
|
+
this.logger.debug("Executing ready gadgets in parallel", {
|
|
4054
|
+
count: readyToExecute.length,
|
|
4055
|
+
invocationIds: readyToExecute.map((c) => c.invocationId)
|
|
4056
|
+
});
|
|
4057
|
+
for (const call of readyToExecute) {
|
|
4058
|
+
this.pendingGadgets.delete(call.invocationId);
|
|
4059
|
+
}
|
|
4060
|
+
const executePromises = readyToExecute.map((call) => this.executeGadgetWithHooks(call));
|
|
4061
|
+
const results = await Promise.all(executePromises);
|
|
4062
|
+
for (const executeEvents of results) {
|
|
4063
|
+
events.push(...executeEvents);
|
|
4064
|
+
}
|
|
4065
|
+
progress = true;
|
|
4066
|
+
}
|
|
4067
|
+
}
|
|
4068
|
+
if (this.pendingGadgets.size > 0) {
|
|
4069
|
+
const pendingIds = new Set(this.pendingGadgets.keys());
|
|
4070
|
+
for (const [invocationId, call] of this.pendingGadgets) {
|
|
4071
|
+
const missingDeps = call.dependencies.filter((dep) => !this.completedResults.has(dep));
|
|
4072
|
+
const circularDeps = missingDeps.filter((dep) => pendingIds.has(dep));
|
|
4073
|
+
const trulyMissingDeps = missingDeps.filter((dep) => !pendingIds.has(dep));
|
|
4074
|
+
let errorMessage;
|
|
4075
|
+
let logLevel = "warn";
|
|
4076
|
+
if (circularDeps.length > 0 && trulyMissingDeps.length > 0) {
|
|
4077
|
+
errorMessage = `Dependencies unresolvable: circular=[${circularDeps.join(", ")}], missing=[${trulyMissingDeps.join(", ")}]`;
|
|
4078
|
+
logLevel = "error";
|
|
4079
|
+
} else if (circularDeps.length > 0) {
|
|
4080
|
+
errorMessage = `Circular dependency detected: "${invocationId}" depends on "${circularDeps[0]}" which also depends on "${invocationId}" (directly or indirectly)`;
|
|
4081
|
+
} else {
|
|
4082
|
+
errorMessage = `Dependency "${missingDeps[0]}" was never executed - check that the invocation ID exists and is spelled correctly`;
|
|
4083
|
+
}
|
|
4084
|
+
this.logger[logLevel]("Gadget has unresolvable dependencies", {
|
|
4085
|
+
gadgetName: call.gadgetName,
|
|
4086
|
+
invocationId,
|
|
4087
|
+
circularDependencies: circularDeps,
|
|
4088
|
+
missingDependencies: trulyMissingDeps
|
|
4089
|
+
});
|
|
4090
|
+
this.failedInvocations.add(invocationId);
|
|
4091
|
+
const skipEvent = {
|
|
4092
|
+
type: "gadget_skipped",
|
|
4093
|
+
gadgetName: call.gadgetName,
|
|
4094
|
+
invocationId,
|
|
4095
|
+
parameters: call.parameters ?? {},
|
|
4096
|
+
failedDependency: missingDeps[0],
|
|
4097
|
+
failedDependencyError: errorMessage
|
|
4098
|
+
};
|
|
4099
|
+
events.push(skipEvent);
|
|
4100
|
+
}
|
|
4101
|
+
this.pendingGadgets.clear();
|
|
4102
|
+
}
|
|
4103
|
+
return events;
|
|
4104
|
+
}
|
|
3564
4105
|
/**
|
|
3565
4106
|
* Safely execute an observer, catching and logging any errors.
|
|
3566
4107
|
* Observers are non-critical, so errors are logged but don't crash the system.
|
|
@@ -3998,9 +4539,9 @@ var init_agent = __esm({
|
|
|
3998
4539
|
if (msg.role === "user") {
|
|
3999
4540
|
this.conversation.addUserMessage(msg.content);
|
|
4000
4541
|
} else if (msg.role === "assistant") {
|
|
4001
|
-
this.conversation.addAssistantMessage(msg.content);
|
|
4542
|
+
this.conversation.addAssistantMessage(extractText(msg.content));
|
|
4002
4543
|
} else if (msg.role === "system") {
|
|
4003
|
-
this.conversation.addUserMessage(`[System] ${msg.content}`);
|
|
4544
|
+
this.conversation.addUserMessage(`[System] ${extractText(msg.content)}`);
|
|
4004
4545
|
}
|
|
4005
4546
|
}
|
|
4006
4547
|
}
|
|
@@ -4579,6 +5120,7 @@ var init_anthropic = __esm({
|
|
|
4579
5120
|
"src/providers/anthropic.ts"() {
|
|
4580
5121
|
"use strict";
|
|
4581
5122
|
import_sdk = __toESM(require("@anthropic-ai/sdk"), 1);
|
|
5123
|
+
init_messages();
|
|
4582
5124
|
init_anthropic_models();
|
|
4583
5125
|
init_base_provider();
|
|
4584
5126
|
init_constants2();
|
|
@@ -4591,11 +5133,33 @@ var init_anthropic = __esm({
|
|
|
4591
5133
|
getModelSpecs() {
|
|
4592
5134
|
return ANTHROPIC_MODELS;
|
|
4593
5135
|
}
|
|
5136
|
+
// =========================================================================
|
|
5137
|
+
// Image Generation (Not Supported)
|
|
5138
|
+
// =========================================================================
|
|
5139
|
+
supportsImageGeneration(_modelId) {
|
|
5140
|
+
return false;
|
|
5141
|
+
}
|
|
5142
|
+
async generateImage() {
|
|
5143
|
+
throw new Error(
|
|
5144
|
+
"Anthropic does not support image generation. Use OpenAI (DALL-E, GPT Image) or Google Gemini (Imagen) instead."
|
|
5145
|
+
);
|
|
5146
|
+
}
|
|
5147
|
+
// =========================================================================
|
|
5148
|
+
// Speech Generation (Not Supported)
|
|
5149
|
+
// =========================================================================
|
|
5150
|
+
supportsSpeechGeneration(_modelId) {
|
|
5151
|
+
return false;
|
|
5152
|
+
}
|
|
5153
|
+
async generateSpeech() {
|
|
5154
|
+
throw new Error(
|
|
5155
|
+
"Anthropic does not support speech generation. Use OpenAI (TTS) or Google Gemini (TTS) instead."
|
|
5156
|
+
);
|
|
5157
|
+
}
|
|
4594
5158
|
buildRequestPayload(options, descriptor, spec, messages) {
|
|
4595
5159
|
const systemMessages = messages.filter((message) => message.role === "system");
|
|
4596
5160
|
const system = systemMessages.length > 0 ? systemMessages.map((m, index) => ({
|
|
4597
5161
|
type: "text",
|
|
4598
|
-
text: m.content,
|
|
5162
|
+
text: extractText(m.content),
|
|
4599
5163
|
// Add cache_control to the LAST system message block
|
|
4600
5164
|
...index === systemMessages.length - 1 ? { cache_control: { type: "ephemeral" } } : {}
|
|
4601
5165
|
})) : void 0;
|
|
@@ -4608,14 +5172,10 @@ var init_anthropic = __esm({
|
|
|
4608
5172
|
);
|
|
4609
5173
|
const conversation = nonSystemMessages.map((message, index) => ({
|
|
4610
5174
|
role: message.role,
|
|
4611
|
-
content:
|
|
4612
|
-
|
|
4613
|
-
|
|
4614
|
-
|
|
4615
|
-
// Add cache_control to the LAST user message
|
|
4616
|
-
...message.role === "user" && index === lastUserIndex ? { cache_control: { type: "ephemeral" } } : {}
|
|
4617
|
-
}
|
|
4618
|
-
]
|
|
5175
|
+
content: this.convertToAnthropicContent(
|
|
5176
|
+
message.content,
|
|
5177
|
+
message.role === "user" && index === lastUserIndex
|
|
5178
|
+
)
|
|
4619
5179
|
}));
|
|
4620
5180
|
const defaultMaxTokens = spec?.maxOutputTokens ?? ANTHROPIC_DEFAULT_MAX_OUTPUT_TOKENS;
|
|
4621
5181
|
const payload = {
|
|
@@ -4631,15 +5191,61 @@ var init_anthropic = __esm({
|
|
|
4631
5191
|
};
|
|
4632
5192
|
return payload;
|
|
4633
5193
|
}
|
|
4634
|
-
|
|
4635
|
-
|
|
4636
|
-
|
|
4637
|
-
|
|
5194
|
+
/**
|
|
5195
|
+
* Convert llmist content to Anthropic's content block format.
|
|
5196
|
+
* Handles text, images (base64 only), and applies cache_control.
|
|
5197
|
+
*/
|
|
5198
|
+
convertToAnthropicContent(content, addCacheControl) {
|
|
5199
|
+
const parts = normalizeContent(content);
|
|
5200
|
+
return parts.map((part, index) => {
|
|
5201
|
+
const isLastPart = index === parts.length - 1;
|
|
5202
|
+
const cacheControl = addCacheControl && isLastPart ? { cache_control: { type: "ephemeral" } } : {};
|
|
5203
|
+
if (part.type === "text") {
|
|
5204
|
+
return {
|
|
5205
|
+
type: "text",
|
|
5206
|
+
text: part.text,
|
|
5207
|
+
...cacheControl
|
|
5208
|
+
};
|
|
5209
|
+
}
|
|
5210
|
+
if (part.type === "image") {
|
|
5211
|
+
return this.convertImagePart(part, cacheControl);
|
|
5212
|
+
}
|
|
5213
|
+
if (part.type === "audio") {
|
|
5214
|
+
throw new Error(
|
|
5215
|
+
"Anthropic does not support audio input. Use Google Gemini for audio processing."
|
|
5216
|
+
);
|
|
5217
|
+
}
|
|
5218
|
+
throw new Error(`Unsupported content type: ${part.type}`);
|
|
5219
|
+
});
|
|
4638
5220
|
}
|
|
4639
|
-
|
|
4640
|
-
|
|
4641
|
-
|
|
4642
|
-
|
|
5221
|
+
/**
|
|
5222
|
+
* Convert an image content part to Anthropic's image block format.
|
|
5223
|
+
*/
|
|
5224
|
+
convertImagePart(part, cacheControl) {
|
|
5225
|
+
if (part.source.type === "url") {
|
|
5226
|
+
throw new Error(
|
|
5227
|
+
"Anthropic does not support image URLs. Please provide base64-encoded image data instead."
|
|
5228
|
+
);
|
|
5229
|
+
}
|
|
5230
|
+
return {
|
|
5231
|
+
type: "image",
|
|
5232
|
+
source: {
|
|
5233
|
+
type: "base64",
|
|
5234
|
+
media_type: part.source.mediaType,
|
|
5235
|
+
data: part.source.data
|
|
5236
|
+
},
|
|
5237
|
+
...cacheControl
|
|
5238
|
+
};
|
|
5239
|
+
}
|
|
5240
|
+
async executeStreamRequest(payload, signal) {
|
|
5241
|
+
const client = this.client;
|
|
5242
|
+
const stream2 = await client.messages.create(payload, signal ? { signal } : void 0);
|
|
5243
|
+
return stream2;
|
|
5244
|
+
}
|
|
5245
|
+
async *wrapStream(iterable) {
|
|
5246
|
+
const stream2 = iterable;
|
|
5247
|
+
let inputTokens = 0;
|
|
5248
|
+
let cachedInputTokens = 0;
|
|
4643
5249
|
let cacheCreationInputTokens = 0;
|
|
4644
5250
|
for await (const event of stream2) {
|
|
4645
5251
|
if (event.type === "message_start") {
|
|
@@ -4713,17 +5319,12 @@ var init_anthropic = __esm({
|
|
|
4713
5319
|
async countTokens(messages, descriptor, _spec) {
|
|
4714
5320
|
const client = this.client;
|
|
4715
5321
|
const systemMessages = messages.filter((message) => message.role === "system");
|
|
4716
|
-
const system = systemMessages.length > 0 ? systemMessages.map((m) => m.content).join("\n\n") : void 0;
|
|
5322
|
+
const system = systemMessages.length > 0 ? systemMessages.map((m) => extractText(m.content)).join("\n\n") : void 0;
|
|
4717
5323
|
const conversation = messages.filter(
|
|
4718
5324
|
(message) => message.role !== "system"
|
|
4719
5325
|
).map((message) => ({
|
|
4720
5326
|
role: message.role,
|
|
4721
|
-
content:
|
|
4722
|
-
{
|
|
4723
|
-
type: "text",
|
|
4724
|
-
text: message.content
|
|
4725
|
-
}
|
|
4726
|
-
]
|
|
5327
|
+
content: this.convertToAnthropicContent(message.content, false)
|
|
4727
5328
|
}));
|
|
4728
5329
|
try {
|
|
4729
5330
|
const response = await client.messages.countTokens({
|
|
@@ -4737,14 +5338,201 @@ var init_anthropic = __esm({
|
|
|
4737
5338
|
`Token counting failed for ${descriptor.name}, using fallback estimation:`,
|
|
4738
5339
|
error
|
|
4739
5340
|
);
|
|
4740
|
-
|
|
4741
|
-
|
|
5341
|
+
let totalChars = 0;
|
|
5342
|
+
let imageCount = 0;
|
|
5343
|
+
for (const msg of messages) {
|
|
5344
|
+
const parts = normalizeContent(msg.content);
|
|
5345
|
+
for (const part of parts) {
|
|
5346
|
+
if (part.type === "text") {
|
|
5347
|
+
totalChars += part.text.length;
|
|
5348
|
+
} else if (part.type === "image") {
|
|
5349
|
+
imageCount++;
|
|
5350
|
+
}
|
|
5351
|
+
}
|
|
5352
|
+
}
|
|
5353
|
+
return Math.ceil(totalChars / FALLBACK_CHARS_PER_TOKEN) + imageCount * 1e3;
|
|
4742
5354
|
}
|
|
4743
5355
|
}
|
|
4744
5356
|
};
|
|
4745
5357
|
}
|
|
4746
5358
|
});
|
|
4747
5359
|
|
|
5360
|
+
// src/providers/gemini-image-models.ts
|
|
5361
|
+
function getGeminiImageModelSpec(modelId) {
|
|
5362
|
+
return geminiImageModels.find((m) => m.modelId === modelId);
|
|
5363
|
+
}
|
|
5364
|
+
function isGeminiImageModel(modelId) {
|
|
5365
|
+
return geminiImageModels.some((m) => m.modelId === modelId);
|
|
5366
|
+
}
|
|
5367
|
+
function calculateGeminiImageCost(modelId, size = "1:1", n = 1) {
|
|
5368
|
+
const spec = getGeminiImageModelSpec(modelId);
|
|
5369
|
+
if (!spec) return void 0;
|
|
5370
|
+
if (spec.pricing.perImage !== void 0) {
|
|
5371
|
+
return spec.pricing.perImage * n;
|
|
5372
|
+
}
|
|
5373
|
+
if (spec.pricing.bySize) {
|
|
5374
|
+
const sizePrice = spec.pricing.bySize[size];
|
|
5375
|
+
if (typeof sizePrice === "number") {
|
|
5376
|
+
return sizePrice * n;
|
|
5377
|
+
}
|
|
5378
|
+
}
|
|
5379
|
+
return void 0;
|
|
5380
|
+
}
|
|
5381
|
+
var IMAGEN4_ASPECT_RATIOS, GEMINI_IMAGE_ASPECT_RATIOS, geminiImageModels;
|
|
5382
|
+
var init_gemini_image_models = __esm({
|
|
5383
|
+
"src/providers/gemini-image-models.ts"() {
|
|
5384
|
+
"use strict";
|
|
5385
|
+
IMAGEN4_ASPECT_RATIOS = ["1:1", "3:4", "4:3", "9:16", "16:9"];
|
|
5386
|
+
GEMINI_IMAGE_ASPECT_RATIOS = ["1:1", "3:4", "4:3", "9:16", "16:9"];
|
|
5387
|
+
geminiImageModels = [
|
|
5388
|
+
// Imagen 4 Family (standalone image generation)
|
|
5389
|
+
{
|
|
5390
|
+
provider: "gemini",
|
|
5391
|
+
modelId: "imagen-4.0-fast-generate-001",
|
|
5392
|
+
displayName: "Imagen 4 Fast",
|
|
5393
|
+
pricing: {
|
|
5394
|
+
perImage: 0.02
|
|
5395
|
+
},
|
|
5396
|
+
supportedSizes: [...IMAGEN4_ASPECT_RATIOS],
|
|
5397
|
+
maxImages: 4,
|
|
5398
|
+
defaultSize: "1:1",
|
|
5399
|
+
features: {
|
|
5400
|
+
textRendering: true
|
|
5401
|
+
}
|
|
5402
|
+
},
|
|
5403
|
+
{
|
|
5404
|
+
provider: "gemini",
|
|
5405
|
+
modelId: "imagen-4.0-generate-001",
|
|
5406
|
+
displayName: "Imagen 4",
|
|
5407
|
+
pricing: {
|
|
5408
|
+
perImage: 0.04
|
|
5409
|
+
},
|
|
5410
|
+
supportedSizes: [...IMAGEN4_ASPECT_RATIOS],
|
|
5411
|
+
maxImages: 4,
|
|
5412
|
+
defaultSize: "1:1",
|
|
5413
|
+
features: {
|
|
5414
|
+
textRendering: true
|
|
5415
|
+
}
|
|
5416
|
+
},
|
|
5417
|
+
{
|
|
5418
|
+
provider: "gemini",
|
|
5419
|
+
modelId: "imagen-4.0-ultra-generate-001",
|
|
5420
|
+
displayName: "Imagen 4 Ultra",
|
|
5421
|
+
pricing: {
|
|
5422
|
+
perImage: 0.06
|
|
5423
|
+
},
|
|
5424
|
+
supportedSizes: [...IMAGEN4_ASPECT_RATIOS],
|
|
5425
|
+
maxImages: 4,
|
|
5426
|
+
defaultSize: "1:1",
|
|
5427
|
+
features: {
|
|
5428
|
+
textRendering: true
|
|
5429
|
+
}
|
|
5430
|
+
},
|
|
5431
|
+
// Preview versions
|
|
5432
|
+
{
|
|
5433
|
+
provider: "gemini",
|
|
5434
|
+
modelId: "imagen-4.0-generate-preview-06-06",
|
|
5435
|
+
displayName: "Imagen 4 (Preview)",
|
|
5436
|
+
pricing: {
|
|
5437
|
+
perImage: 0.04
|
|
5438
|
+
},
|
|
5439
|
+
supportedSizes: [...IMAGEN4_ASPECT_RATIOS],
|
|
5440
|
+
maxImages: 4,
|
|
5441
|
+
defaultSize: "1:1",
|
|
5442
|
+
features: {
|
|
5443
|
+
textRendering: true
|
|
5444
|
+
}
|
|
5445
|
+
},
|
|
5446
|
+
{
|
|
5447
|
+
provider: "gemini",
|
|
5448
|
+
modelId: "imagen-4.0-ultra-generate-preview-06-06",
|
|
5449
|
+
displayName: "Imagen 4 Ultra (Preview)",
|
|
5450
|
+
pricing: {
|
|
5451
|
+
perImage: 0.06
|
|
5452
|
+
},
|
|
5453
|
+
supportedSizes: [...IMAGEN4_ASPECT_RATIOS],
|
|
5454
|
+
maxImages: 4,
|
|
5455
|
+
defaultSize: "1:1",
|
|
5456
|
+
features: {
|
|
5457
|
+
textRendering: true
|
|
5458
|
+
}
|
|
5459
|
+
},
|
|
5460
|
+
// Gemini Native Image Generation (multimodal models)
|
|
5461
|
+
{
|
|
5462
|
+
provider: "gemini",
|
|
5463
|
+
modelId: "gemini-2.5-flash-image",
|
|
5464
|
+
displayName: "Gemini 2.5 Flash Image",
|
|
5465
|
+
pricing: {
|
|
5466
|
+
perImage: 0.039
|
|
5467
|
+
},
|
|
5468
|
+
supportedSizes: [...GEMINI_IMAGE_ASPECT_RATIOS],
|
|
5469
|
+
maxImages: 1,
|
|
5470
|
+
defaultSize: "1:1",
|
|
5471
|
+
features: {
|
|
5472
|
+
conversational: true,
|
|
5473
|
+
textRendering: true
|
|
5474
|
+
}
|
|
5475
|
+
},
|
|
5476
|
+
{
|
|
5477
|
+
provider: "gemini",
|
|
5478
|
+
modelId: "gemini-2.5-flash-image-preview",
|
|
5479
|
+
displayName: "Gemini 2.5 Flash Image (Preview)",
|
|
5480
|
+
pricing: {
|
|
5481
|
+
perImage: 0.039
|
|
5482
|
+
},
|
|
5483
|
+
supportedSizes: [...GEMINI_IMAGE_ASPECT_RATIOS],
|
|
5484
|
+
maxImages: 1,
|
|
5485
|
+
defaultSize: "1:1",
|
|
5486
|
+
features: {
|
|
5487
|
+
conversational: true,
|
|
5488
|
+
textRendering: true
|
|
5489
|
+
}
|
|
5490
|
+
},
|
|
5491
|
+
{
|
|
5492
|
+
provider: "gemini",
|
|
5493
|
+
modelId: "gemini-3-pro-image-preview",
|
|
5494
|
+
displayName: "Gemini 3 Pro Image (Preview)",
|
|
5495
|
+
pricing: {
|
|
5496
|
+
// Token-based: ~$0.134 per 1K/2K image, $0.24 per 4K
|
|
5497
|
+
// Using 2K as default
|
|
5498
|
+
bySize: {
|
|
5499
|
+
"1K": 0.134,
|
|
5500
|
+
"2K": 0.134,
|
|
5501
|
+
"4K": 0.24
|
|
5502
|
+
}
|
|
5503
|
+
},
|
|
5504
|
+
supportedSizes: ["1K", "2K", "4K"],
|
|
5505
|
+
maxImages: 1,
|
|
5506
|
+
defaultSize: "2K",
|
|
5507
|
+
features: {
|
|
5508
|
+
conversational: true,
|
|
5509
|
+
textRendering: true
|
|
5510
|
+
}
|
|
5511
|
+
},
|
|
5512
|
+
// Alias: nano-banana-pro-preview is gemini-3-pro-image-preview
|
|
5513
|
+
{
|
|
5514
|
+
provider: "gemini",
|
|
5515
|
+
modelId: "nano-banana-pro-preview",
|
|
5516
|
+
displayName: "Nano Banana Pro (Gemini 3 Pro Image)",
|
|
5517
|
+
pricing: {
|
|
5518
|
+
bySize: {
|
|
5519
|
+
"1K": 0.134,
|
|
5520
|
+
"2K": 0.134,
|
|
5521
|
+
"4K": 0.24
|
|
5522
|
+
}
|
|
5523
|
+
},
|
|
5524
|
+
supportedSizes: ["1K", "2K", "4K"],
|
|
5525
|
+
maxImages: 1,
|
|
5526
|
+
defaultSize: "2K",
|
|
5527
|
+
features: {
|
|
5528
|
+
conversational: true,
|
|
5529
|
+
textRendering: true
|
|
5530
|
+
}
|
|
5531
|
+
}
|
|
5532
|
+
];
|
|
5533
|
+
}
|
|
5534
|
+
});
|
|
5535
|
+
|
|
4748
5536
|
// src/providers/gemini-models.ts
|
|
4749
5537
|
var GEMINI_MODELS;
|
|
4750
5538
|
var init_gemini_models = __esm({
|
|
@@ -4918,7 +5706,171 @@ var init_gemini_models = __esm({
|
|
|
4918
5706
|
}
|
|
4919
5707
|
});
|
|
4920
5708
|
|
|
5709
|
+
// src/providers/gemini-speech-models.ts
|
|
5710
|
+
function getGeminiSpeechModelSpec(modelId) {
|
|
5711
|
+
return geminiSpeechModels.find((m) => m.modelId === modelId);
|
|
5712
|
+
}
|
|
5713
|
+
function isGeminiSpeechModel(modelId) {
|
|
5714
|
+
return geminiSpeechModels.some((m) => m.modelId === modelId);
|
|
5715
|
+
}
|
|
5716
|
+
function calculateGeminiSpeechCost(modelId, characterCount, estimatedMinutes) {
|
|
5717
|
+
const spec = getGeminiSpeechModelSpec(modelId);
|
|
5718
|
+
if (!spec) return void 0;
|
|
5719
|
+
if (spec.pricing.perMinute !== void 0) {
|
|
5720
|
+
if (estimatedMinutes !== void 0) {
|
|
5721
|
+
return estimatedMinutes * spec.pricing.perMinute;
|
|
5722
|
+
}
|
|
5723
|
+
const approxMinutes = characterCount / 750;
|
|
5724
|
+
return approxMinutes * spec.pricing.perMinute;
|
|
5725
|
+
}
|
|
5726
|
+
return void 0;
|
|
5727
|
+
}
|
|
5728
|
+
var GEMINI_TTS_VOICES, GEMINI_TTS_FORMATS, geminiSpeechModels;
|
|
5729
|
+
var init_gemini_speech_models = __esm({
|
|
5730
|
+
"src/providers/gemini-speech-models.ts"() {
|
|
5731
|
+
"use strict";
|
|
5732
|
+
GEMINI_TTS_VOICES = [
|
|
5733
|
+
"Zephyr",
|
|
5734
|
+
// Bright
|
|
5735
|
+
"Puck",
|
|
5736
|
+
// Upbeat
|
|
5737
|
+
"Charon",
|
|
5738
|
+
// Informative
|
|
5739
|
+
"Kore",
|
|
5740
|
+
// Firm
|
|
5741
|
+
"Fenrir",
|
|
5742
|
+
// Excitable
|
|
5743
|
+
"Leda",
|
|
5744
|
+
// Youthful
|
|
5745
|
+
"Orus",
|
|
5746
|
+
// Firm
|
|
5747
|
+
"Aoede",
|
|
5748
|
+
// Breezy
|
|
5749
|
+
"Callirrhoe",
|
|
5750
|
+
// Easy-going
|
|
5751
|
+
"Autonoe",
|
|
5752
|
+
// Bright
|
|
5753
|
+
"Enceladus",
|
|
5754
|
+
// Breathy
|
|
5755
|
+
"Iapetus",
|
|
5756
|
+
// Clear
|
|
5757
|
+
"Umbriel",
|
|
5758
|
+
// Easy-going
|
|
5759
|
+
"Algieba",
|
|
5760
|
+
// Smooth
|
|
5761
|
+
"Despina",
|
|
5762
|
+
// Smooth
|
|
5763
|
+
"Erinome",
|
|
5764
|
+
// Clear
|
|
5765
|
+
"Algenib",
|
|
5766
|
+
// Gravelly
|
|
5767
|
+
"Rasalgethi",
|
|
5768
|
+
// Informative
|
|
5769
|
+
"Laomedeia",
|
|
5770
|
+
// Upbeat
|
|
5771
|
+
"Achernar",
|
|
5772
|
+
// Soft
|
|
5773
|
+
"Alnilam",
|
|
5774
|
+
// Firm
|
|
5775
|
+
"Schedar",
|
|
5776
|
+
// Even
|
|
5777
|
+
"Gacrux",
|
|
5778
|
+
// Mature
|
|
5779
|
+
"Pulcherrima",
|
|
5780
|
+
// Forward
|
|
5781
|
+
"Achird",
|
|
5782
|
+
// Friendly
|
|
5783
|
+
"Zubenelgenubi",
|
|
5784
|
+
// Casual
|
|
5785
|
+
"Vindemiatrix",
|
|
5786
|
+
// Gentle
|
|
5787
|
+
"Sadachbia",
|
|
5788
|
+
// Lively
|
|
5789
|
+
"Sadaltager",
|
|
5790
|
+
// Knowledgeable
|
|
5791
|
+
"Sulafat"
|
|
5792
|
+
// Warm
|
|
5793
|
+
];
|
|
5794
|
+
GEMINI_TTS_FORMATS = ["pcm", "wav"];
|
|
5795
|
+
geminiSpeechModels = [
|
|
5796
|
+
{
|
|
5797
|
+
provider: "gemini",
|
|
5798
|
+
modelId: "gemini-2.5-flash-preview-tts",
|
|
5799
|
+
displayName: "Gemini 2.5 Flash TTS (Preview)",
|
|
5800
|
+
pricing: {
|
|
5801
|
+
// $0.50 per 1M input tokens = $0.0000005 per token
|
|
5802
|
+
perInputToken: 5e-7,
|
|
5803
|
+
// $10.00 per 1M audio output tokens = $0.00001 per token
|
|
5804
|
+
perAudioOutputToken: 1e-5,
|
|
5805
|
+
// Rough estimate: ~$0.01 per minute of audio
|
|
5806
|
+
perMinute: 0.01
|
|
5807
|
+
},
|
|
5808
|
+
voices: [...GEMINI_TTS_VOICES],
|
|
5809
|
+
formats: GEMINI_TTS_FORMATS,
|
|
5810
|
+
maxInputLength: 8e3,
|
|
5811
|
+
// bytes (text + prompt combined)
|
|
5812
|
+
defaultVoice: "Zephyr",
|
|
5813
|
+
defaultFormat: "wav",
|
|
5814
|
+
features: {
|
|
5815
|
+
multiSpeaker: true,
|
|
5816
|
+
languages: 24,
|
|
5817
|
+
voiceInstructions: true
|
|
5818
|
+
}
|
|
5819
|
+
},
|
|
5820
|
+
{
|
|
5821
|
+
provider: "gemini",
|
|
5822
|
+
modelId: "gemini-2.5-pro-preview-tts",
|
|
5823
|
+
displayName: "Gemini 2.5 Pro TTS (Preview)",
|
|
5824
|
+
pricing: {
|
|
5825
|
+
// $1.00 per 1M input tokens = $0.000001 per token
|
|
5826
|
+
perInputToken: 1e-6,
|
|
5827
|
+
// $20.00 per 1M audio output tokens = $0.00002 per token
|
|
5828
|
+
perAudioOutputToken: 2e-5,
|
|
5829
|
+
// Rough estimate: ~$0.02 per minute of audio
|
|
5830
|
+
perMinute: 0.02
|
|
5831
|
+
},
|
|
5832
|
+
voices: [...GEMINI_TTS_VOICES],
|
|
5833
|
+
formats: GEMINI_TTS_FORMATS,
|
|
5834
|
+
maxInputLength: 8e3,
|
|
5835
|
+
// bytes
|
|
5836
|
+
defaultVoice: "Zephyr",
|
|
5837
|
+
defaultFormat: "wav",
|
|
5838
|
+
features: {
|
|
5839
|
+
multiSpeaker: true,
|
|
5840
|
+
languages: 24,
|
|
5841
|
+
voiceInstructions: true
|
|
5842
|
+
}
|
|
5843
|
+
}
|
|
5844
|
+
];
|
|
5845
|
+
}
|
|
5846
|
+
});
|
|
5847
|
+
|
|
4921
5848
|
// src/providers/gemini.ts
|
|
5849
|
+
function wrapPcmInWav(pcmData, sampleRate, bitsPerSample, numChannels) {
|
|
5850
|
+
const byteRate = sampleRate * numChannels * bitsPerSample / 8;
|
|
5851
|
+
const blockAlign = numChannels * bitsPerSample / 8;
|
|
5852
|
+
const dataSize = pcmData.length;
|
|
5853
|
+
const headerSize = 44;
|
|
5854
|
+
const fileSize = headerSize + dataSize - 8;
|
|
5855
|
+
const buffer = new ArrayBuffer(headerSize + dataSize);
|
|
5856
|
+
const view = new DataView(buffer);
|
|
5857
|
+
const uint8 = new Uint8Array(buffer);
|
|
5858
|
+
view.setUint32(0, 1380533830, false);
|
|
5859
|
+
view.setUint32(4, fileSize, true);
|
|
5860
|
+
view.setUint32(8, 1463899717, false);
|
|
5861
|
+
view.setUint32(12, 1718449184, false);
|
|
5862
|
+
view.setUint32(16, 16, true);
|
|
5863
|
+
view.setUint16(20, 1, true);
|
|
5864
|
+
view.setUint16(22, numChannels, true);
|
|
5865
|
+
view.setUint32(24, sampleRate, true);
|
|
5866
|
+
view.setUint32(28, byteRate, true);
|
|
5867
|
+
view.setUint16(32, blockAlign, true);
|
|
5868
|
+
view.setUint16(34, bitsPerSample, true);
|
|
5869
|
+
view.setUint32(36, 1684108385, false);
|
|
5870
|
+
view.setUint32(40, dataSize, true);
|
|
5871
|
+
uint8.set(pcmData, headerSize);
|
|
5872
|
+
return buffer;
|
|
5873
|
+
}
|
|
4922
5874
|
function createGeminiProviderFromEnv() {
|
|
4923
5875
|
return createProviderFromEnv("GEMINI_API_KEY", import_genai.GoogleGenAI, GeminiGenerativeProvider);
|
|
4924
5876
|
}
|
|
@@ -4927,9 +5879,12 @@ var init_gemini = __esm({
|
|
|
4927
5879
|
"src/providers/gemini.ts"() {
|
|
4928
5880
|
"use strict";
|
|
4929
5881
|
import_genai = require("@google/genai");
|
|
5882
|
+
init_messages();
|
|
4930
5883
|
init_base_provider();
|
|
4931
5884
|
init_constants2();
|
|
5885
|
+
init_gemini_image_models();
|
|
4932
5886
|
init_gemini_models();
|
|
5887
|
+
init_gemini_speech_models();
|
|
4933
5888
|
init_utils();
|
|
4934
5889
|
GEMINI_ROLE_MAP = {
|
|
4935
5890
|
system: "user",
|
|
@@ -4944,6 +5899,139 @@ var init_gemini = __esm({
|
|
|
4944
5899
|
getModelSpecs() {
|
|
4945
5900
|
return GEMINI_MODELS;
|
|
4946
5901
|
}
|
|
5902
|
+
// =========================================================================
|
|
5903
|
+
// Image Generation
|
|
5904
|
+
// =========================================================================
|
|
5905
|
+
getImageModelSpecs() {
|
|
5906
|
+
return geminiImageModels;
|
|
5907
|
+
}
|
|
5908
|
+
supportsImageGeneration(modelId) {
|
|
5909
|
+
return isGeminiImageModel(modelId);
|
|
5910
|
+
}
|
|
5911
|
+
async generateImage(options) {
|
|
5912
|
+
const client = this.client;
|
|
5913
|
+
const spec = getGeminiImageModelSpec(options.model);
|
|
5914
|
+
const isImagenModel = options.model.startsWith("imagen");
|
|
5915
|
+
const aspectRatio = options.size ?? spec?.defaultSize ?? "1:1";
|
|
5916
|
+
const n = options.n ?? 1;
|
|
5917
|
+
if (isImagenModel) {
|
|
5918
|
+
const response2 = await client.models.generateImages({
|
|
5919
|
+
model: options.model,
|
|
5920
|
+
prompt: options.prompt,
|
|
5921
|
+
config: {
|
|
5922
|
+
numberOfImages: n,
|
|
5923
|
+
aspectRatio,
|
|
5924
|
+
outputMimeType: options.responseFormat === "b64_json" ? "image/png" : "image/jpeg"
|
|
5925
|
+
}
|
|
5926
|
+
});
|
|
5927
|
+
const images2 = response2.generatedImages ?? [];
|
|
5928
|
+
const cost2 = calculateGeminiImageCost(options.model, aspectRatio, images2.length);
|
|
5929
|
+
return {
|
|
5930
|
+
// Gemini's imageBytes is already base64 encoded, so use it directly
|
|
5931
|
+
images: images2.map((img) => ({
|
|
5932
|
+
b64Json: img.image?.imageBytes ?? void 0
|
|
5933
|
+
})),
|
|
5934
|
+
model: options.model,
|
|
5935
|
+
usage: {
|
|
5936
|
+
imagesGenerated: images2.length,
|
|
5937
|
+
size: aspectRatio,
|
|
5938
|
+
quality: "standard"
|
|
5939
|
+
},
|
|
5940
|
+
cost: cost2
|
|
5941
|
+
};
|
|
5942
|
+
}
|
|
5943
|
+
const response = await client.models.generateContent({
|
|
5944
|
+
model: options.model,
|
|
5945
|
+
contents: [{ role: "user", parts: [{ text: options.prompt }] }],
|
|
5946
|
+
config: {
|
|
5947
|
+
responseModalities: [import_genai.Modality.IMAGE, import_genai.Modality.TEXT]
|
|
5948
|
+
}
|
|
5949
|
+
});
|
|
5950
|
+
const images = [];
|
|
5951
|
+
const candidate = response.candidates?.[0];
|
|
5952
|
+
if (candidate?.content?.parts) {
|
|
5953
|
+
for (const part of candidate.content.parts) {
|
|
5954
|
+
if ("inlineData" in part && part.inlineData) {
|
|
5955
|
+
images.push({
|
|
5956
|
+
b64Json: part.inlineData.data
|
|
5957
|
+
});
|
|
5958
|
+
}
|
|
5959
|
+
}
|
|
5960
|
+
}
|
|
5961
|
+
const cost = calculateGeminiImageCost(options.model, aspectRatio, images.length);
|
|
5962
|
+
return {
|
|
5963
|
+
images,
|
|
5964
|
+
model: options.model,
|
|
5965
|
+
usage: {
|
|
5966
|
+
imagesGenerated: images.length,
|
|
5967
|
+
size: aspectRatio,
|
|
5968
|
+
quality: "standard"
|
|
5969
|
+
},
|
|
5970
|
+
cost
|
|
5971
|
+
};
|
|
5972
|
+
}
|
|
5973
|
+
// =========================================================================
|
|
5974
|
+
// Speech Generation
|
|
5975
|
+
// =========================================================================
|
|
5976
|
+
getSpeechModelSpecs() {
|
|
5977
|
+
return geminiSpeechModels;
|
|
5978
|
+
}
|
|
5979
|
+
supportsSpeechGeneration(modelId) {
|
|
5980
|
+
return isGeminiSpeechModel(modelId);
|
|
5981
|
+
}
|
|
5982
|
+
async generateSpeech(options) {
|
|
5983
|
+
const client = this.client;
|
|
5984
|
+
const spec = getGeminiSpeechModelSpec(options.model);
|
|
5985
|
+
const voice = options.voice ?? spec?.defaultVoice ?? "Zephyr";
|
|
5986
|
+
const response = await client.models.generateContent({
|
|
5987
|
+
model: options.model,
|
|
5988
|
+
contents: [
|
|
5989
|
+
{
|
|
5990
|
+
role: "user",
|
|
5991
|
+
parts: [{ text: options.input }]
|
|
5992
|
+
}
|
|
5993
|
+
],
|
|
5994
|
+
config: {
|
|
5995
|
+
responseModalities: [import_genai.Modality.AUDIO],
|
|
5996
|
+
speechConfig: {
|
|
5997
|
+
voiceConfig: {
|
|
5998
|
+
prebuiltVoiceConfig: {
|
|
5999
|
+
voiceName: voice
|
|
6000
|
+
}
|
|
6001
|
+
}
|
|
6002
|
+
}
|
|
6003
|
+
}
|
|
6004
|
+
});
|
|
6005
|
+
let pcmData;
|
|
6006
|
+
const candidate = response.candidates?.[0];
|
|
6007
|
+
if (candidate?.content?.parts) {
|
|
6008
|
+
for (const part of candidate.content.parts) {
|
|
6009
|
+
if ("inlineData" in part && part.inlineData?.data) {
|
|
6010
|
+
const base64 = part.inlineData.data;
|
|
6011
|
+
const binary = atob(base64);
|
|
6012
|
+
pcmData = new Uint8Array(binary.length);
|
|
6013
|
+
for (let i = 0; i < binary.length; i++) {
|
|
6014
|
+
pcmData[i] = binary.charCodeAt(i);
|
|
6015
|
+
}
|
|
6016
|
+
break;
|
|
6017
|
+
}
|
|
6018
|
+
}
|
|
6019
|
+
}
|
|
6020
|
+
if (!pcmData) {
|
|
6021
|
+
throw new Error("No audio data in Gemini TTS response");
|
|
6022
|
+
}
|
|
6023
|
+
const audioData = wrapPcmInWav(pcmData, 24e3, 16, 1);
|
|
6024
|
+
const cost = calculateGeminiSpeechCost(options.model, options.input.length);
|
|
6025
|
+
return {
|
|
6026
|
+
audio: audioData,
|
|
6027
|
+
model: options.model,
|
|
6028
|
+
usage: {
|
|
6029
|
+
characterCount: options.input.length
|
|
6030
|
+
},
|
|
6031
|
+
cost,
|
|
6032
|
+
format: spec?.defaultFormat ?? "wav"
|
|
6033
|
+
};
|
|
6034
|
+
}
|
|
4947
6035
|
buildRequestPayload(options, descriptor, _spec, messages) {
|
|
4948
6036
|
const contents = this.convertMessagesToContents(messages);
|
|
4949
6037
|
const generationConfig = this.buildGenerationConfig(options);
|
|
@@ -4961,7 +6049,7 @@ var init_gemini = __esm({
|
|
|
4961
6049
|
};
|
|
4962
6050
|
return {
|
|
4963
6051
|
model: descriptor.name,
|
|
4964
|
-
contents
|
|
6052
|
+
contents,
|
|
4965
6053
|
config
|
|
4966
6054
|
};
|
|
4967
6055
|
}
|
|
@@ -4996,18 +6084,25 @@ var init_gemini = __esm({
|
|
|
4996
6084
|
if (message.role === "system") {
|
|
4997
6085
|
expandedMessages.push({
|
|
4998
6086
|
role: "user",
|
|
4999
|
-
content: message.content
|
|
6087
|
+
content: extractText(message.content)
|
|
5000
6088
|
});
|
|
5001
6089
|
expandedMessages.push({
|
|
5002
6090
|
role: "assistant",
|
|
5003
6091
|
content: "Understood."
|
|
5004
6092
|
});
|
|
5005
6093
|
} else {
|
|
5006
|
-
expandedMessages.push(
|
|
6094
|
+
expandedMessages.push({
|
|
6095
|
+
role: message.role,
|
|
6096
|
+
content: message.content
|
|
6097
|
+
});
|
|
5007
6098
|
}
|
|
5008
6099
|
}
|
|
5009
6100
|
return this.mergeConsecutiveMessages(expandedMessages);
|
|
5010
6101
|
}
|
|
6102
|
+
/**
|
|
6103
|
+
* Merge consecutive messages with the same role (required by Gemini).
|
|
6104
|
+
* Handles multimodal content by converting to Gemini's part format.
|
|
6105
|
+
*/
|
|
5011
6106
|
mergeConsecutiveMessages(messages) {
|
|
5012
6107
|
if (messages.length === 0) {
|
|
5013
6108
|
return [];
|
|
@@ -5016,15 +6111,16 @@ var init_gemini = __esm({
|
|
|
5016
6111
|
let currentGroup = null;
|
|
5017
6112
|
for (const message of messages) {
|
|
5018
6113
|
const geminiRole = GEMINI_ROLE_MAP[message.role];
|
|
6114
|
+
const geminiParts = this.convertToGeminiParts(message.content);
|
|
5019
6115
|
if (currentGroup && currentGroup.role === geminiRole) {
|
|
5020
|
-
currentGroup.parts.push(
|
|
6116
|
+
currentGroup.parts.push(...geminiParts);
|
|
5021
6117
|
} else {
|
|
5022
6118
|
if (currentGroup) {
|
|
5023
6119
|
result.push(currentGroup);
|
|
5024
6120
|
}
|
|
5025
6121
|
currentGroup = {
|
|
5026
6122
|
role: geminiRole,
|
|
5027
|
-
parts:
|
|
6123
|
+
parts: geminiParts
|
|
5028
6124
|
};
|
|
5029
6125
|
}
|
|
5030
6126
|
}
|
|
@@ -5033,11 +6129,39 @@ var init_gemini = __esm({
|
|
|
5033
6129
|
}
|
|
5034
6130
|
return result;
|
|
5035
6131
|
}
|
|
5036
|
-
|
|
5037
|
-
|
|
5038
|
-
|
|
5039
|
-
|
|
5040
|
-
|
|
6132
|
+
/**
|
|
6133
|
+
* Convert llmist content to Gemini's part format.
|
|
6134
|
+
* Handles text, images, and audio (Gemini supports all three).
|
|
6135
|
+
*/
|
|
6136
|
+
convertToGeminiParts(content) {
|
|
6137
|
+
const parts = normalizeContent(content);
|
|
6138
|
+
return parts.map((part) => {
|
|
6139
|
+
if (part.type === "text") {
|
|
6140
|
+
return { text: part.text };
|
|
6141
|
+
}
|
|
6142
|
+
if (part.type === "image") {
|
|
6143
|
+
if (part.source.type === "url") {
|
|
6144
|
+
throw new Error(
|
|
6145
|
+
"Gemini does not support image URLs directly. Please provide base64-encoded image data."
|
|
6146
|
+
);
|
|
6147
|
+
}
|
|
6148
|
+
return {
|
|
6149
|
+
inlineData: {
|
|
6150
|
+
mimeType: part.source.mediaType,
|
|
6151
|
+
data: part.source.data
|
|
6152
|
+
}
|
|
6153
|
+
};
|
|
6154
|
+
}
|
|
6155
|
+
if (part.type === "audio") {
|
|
6156
|
+
return {
|
|
6157
|
+
inlineData: {
|
|
6158
|
+
mimeType: part.source.mediaType,
|
|
6159
|
+
data: part.source.data
|
|
6160
|
+
}
|
|
6161
|
+
};
|
|
6162
|
+
}
|
|
6163
|
+
throw new Error(`Unsupported content type: ${part.type}`);
|
|
6164
|
+
});
|
|
5041
6165
|
}
|
|
5042
6166
|
buildGenerationConfig(options) {
|
|
5043
6167
|
const config = {};
|
|
@@ -5058,9 +6182,9 @@ var init_gemini = __esm({
|
|
|
5058
6182
|
async *wrapStream(iterable) {
|
|
5059
6183
|
const stream2 = iterable;
|
|
5060
6184
|
for await (const chunk of stream2) {
|
|
5061
|
-
const
|
|
5062
|
-
if (
|
|
5063
|
-
yield { text, rawEvent: chunk };
|
|
6185
|
+
const text3 = this.extractText(chunk);
|
|
6186
|
+
if (text3) {
|
|
6187
|
+
yield { text: text3, rawEvent: chunk };
|
|
5064
6188
|
}
|
|
5065
6189
|
const finishReason = this.extractFinishReason(chunk);
|
|
5066
6190
|
const usage = this.extractUsage(chunk);
|
|
@@ -5121,7 +6245,7 @@ var init_gemini = __esm({
|
|
|
5121
6245
|
try {
|
|
5122
6246
|
const response = await client.models.countTokens({
|
|
5123
6247
|
model: descriptor.name,
|
|
5124
|
-
contents
|
|
6248
|
+
contents
|
|
5125
6249
|
// Note: systemInstruction not used - it's not supported by countTokens()
|
|
5126
6250
|
// and would cause a 2100% token counting error
|
|
5127
6251
|
});
|
|
@@ -5131,14 +6255,140 @@ var init_gemini = __esm({
|
|
|
5131
6255
|
`Token counting failed for ${descriptor.name}, using fallback estimation:`,
|
|
5132
6256
|
error
|
|
5133
6257
|
);
|
|
5134
|
-
|
|
5135
|
-
|
|
6258
|
+
let totalChars = 0;
|
|
6259
|
+
let mediaCount = 0;
|
|
6260
|
+
for (const msg of messages) {
|
|
6261
|
+
const parts = normalizeContent(msg.content);
|
|
6262
|
+
for (const part of parts) {
|
|
6263
|
+
if (part.type === "text") {
|
|
6264
|
+
totalChars += part.text.length;
|
|
6265
|
+
} else if (part.type === "image" || part.type === "audio") {
|
|
6266
|
+
mediaCount++;
|
|
6267
|
+
}
|
|
6268
|
+
}
|
|
6269
|
+
}
|
|
6270
|
+
return Math.ceil(totalChars / FALLBACK_CHARS_PER_TOKEN) + mediaCount * 258;
|
|
5136
6271
|
}
|
|
5137
6272
|
}
|
|
5138
6273
|
};
|
|
5139
6274
|
}
|
|
5140
6275
|
});
|
|
5141
6276
|
|
|
6277
|
+
// src/providers/openai-image-models.ts
|
|
6278
|
+
function getOpenAIImageModelSpec(modelId) {
|
|
6279
|
+
return openaiImageModels.find((m) => m.modelId === modelId);
|
|
6280
|
+
}
|
|
6281
|
+
function isOpenAIImageModel(modelId) {
|
|
6282
|
+
return openaiImageModels.some((m) => m.modelId === modelId);
|
|
6283
|
+
}
|
|
6284
|
+
function calculateOpenAIImageCost(modelId, size, quality = "standard", n = 1) {
|
|
6285
|
+
const spec = getOpenAIImageModelSpec(modelId);
|
|
6286
|
+
if (!spec) return void 0;
|
|
6287
|
+
const sizePrice = spec.pricing.bySize?.[size];
|
|
6288
|
+
if (sizePrice === void 0) return void 0;
|
|
6289
|
+
let pricePerImage;
|
|
6290
|
+
if (typeof sizePrice === "number") {
|
|
6291
|
+
pricePerImage = sizePrice;
|
|
6292
|
+
} else {
|
|
6293
|
+
pricePerImage = sizePrice[quality];
|
|
6294
|
+
if (pricePerImage === void 0) return void 0;
|
|
6295
|
+
}
|
|
6296
|
+
return pricePerImage * n;
|
|
6297
|
+
}
|
|
6298
|
+
var GPT_IMAGE_SIZES, GPT_IMAGE_QUALITIES, DALLE3_SIZES, DALLE3_QUALITIES, DALLE2_SIZES, openaiImageModels;
|
|
6299
|
+
var init_openai_image_models = __esm({
|
|
6300
|
+
"src/providers/openai-image-models.ts"() {
|
|
6301
|
+
"use strict";
|
|
6302
|
+
GPT_IMAGE_SIZES = ["1024x1024", "1024x1536", "1536x1024"];
|
|
6303
|
+
GPT_IMAGE_QUALITIES = ["low", "medium", "high"];
|
|
6304
|
+
DALLE3_SIZES = ["1024x1024", "1024x1792", "1792x1024"];
|
|
6305
|
+
DALLE3_QUALITIES = ["standard", "hd"];
|
|
6306
|
+
DALLE2_SIZES = ["256x256", "512x512", "1024x1024"];
|
|
6307
|
+
openaiImageModels = [
|
|
6308
|
+
// GPT Image 1 Family (flagship)
|
|
6309
|
+
{
|
|
6310
|
+
provider: "openai",
|
|
6311
|
+
modelId: "gpt-image-1",
|
|
6312
|
+
displayName: "GPT Image 1",
|
|
6313
|
+
pricing: {
|
|
6314
|
+
bySize: {
|
|
6315
|
+
"1024x1024": { low: 0.011, medium: 0.04, high: 0.17 },
|
|
6316
|
+
"1024x1536": { low: 0.016, medium: 0.06, high: 0.25 },
|
|
6317
|
+
"1536x1024": { low: 0.016, medium: 0.06, high: 0.25 }
|
|
6318
|
+
}
|
|
6319
|
+
},
|
|
6320
|
+
supportedSizes: [...GPT_IMAGE_SIZES],
|
|
6321
|
+
supportedQualities: [...GPT_IMAGE_QUALITIES],
|
|
6322
|
+
maxImages: 1,
|
|
6323
|
+
defaultSize: "1024x1024",
|
|
6324
|
+
defaultQuality: "medium",
|
|
6325
|
+
features: {
|
|
6326
|
+
textRendering: true,
|
|
6327
|
+
transparency: true
|
|
6328
|
+
}
|
|
6329
|
+
},
|
|
6330
|
+
{
|
|
6331
|
+
provider: "openai",
|
|
6332
|
+
modelId: "gpt-image-1-mini",
|
|
6333
|
+
displayName: "GPT Image 1 Mini",
|
|
6334
|
+
pricing: {
|
|
6335
|
+
bySize: {
|
|
6336
|
+
"1024x1024": { low: 5e-3, medium: 0.02, high: 0.052 },
|
|
6337
|
+
"1024x1536": { low: 75e-4, medium: 0.03, high: 0.078 },
|
|
6338
|
+
"1536x1024": { low: 75e-4, medium: 0.03, high: 0.078 }
|
|
6339
|
+
}
|
|
6340
|
+
},
|
|
6341
|
+
supportedSizes: [...GPT_IMAGE_SIZES],
|
|
6342
|
+
supportedQualities: [...GPT_IMAGE_QUALITIES],
|
|
6343
|
+
maxImages: 1,
|
|
6344
|
+
defaultSize: "1024x1024",
|
|
6345
|
+
defaultQuality: "medium",
|
|
6346
|
+
features: {
|
|
6347
|
+
textRendering: true,
|
|
6348
|
+
transparency: true
|
|
6349
|
+
}
|
|
6350
|
+
},
|
|
6351
|
+
// DALL-E Family
|
|
6352
|
+
{
|
|
6353
|
+
provider: "openai",
|
|
6354
|
+
modelId: "dall-e-3",
|
|
6355
|
+
displayName: "DALL-E 3",
|
|
6356
|
+
pricing: {
|
|
6357
|
+
bySize: {
|
|
6358
|
+
"1024x1024": { standard: 0.04, hd: 0.08 },
|
|
6359
|
+
"1024x1792": { standard: 0.08, hd: 0.12 },
|
|
6360
|
+
"1792x1024": { standard: 0.08, hd: 0.12 }
|
|
6361
|
+
}
|
|
6362
|
+
},
|
|
6363
|
+
supportedSizes: [...DALLE3_SIZES],
|
|
6364
|
+
supportedQualities: [...DALLE3_QUALITIES],
|
|
6365
|
+
maxImages: 1,
|
|
6366
|
+
// DALL-E 3 only supports n=1
|
|
6367
|
+
defaultSize: "1024x1024",
|
|
6368
|
+
defaultQuality: "standard",
|
|
6369
|
+
features: {
|
|
6370
|
+
textRendering: true
|
|
6371
|
+
}
|
|
6372
|
+
},
|
|
6373
|
+
{
|
|
6374
|
+
provider: "openai",
|
|
6375
|
+
modelId: "dall-e-2",
|
|
6376
|
+
displayName: "DALL-E 2 (Legacy)",
|
|
6377
|
+
pricing: {
|
|
6378
|
+
bySize: {
|
|
6379
|
+
"256x256": 0.016,
|
|
6380
|
+
"512x512": 0.018,
|
|
6381
|
+
"1024x1024": 0.02
|
|
6382
|
+
}
|
|
6383
|
+
},
|
|
6384
|
+
supportedSizes: [...DALLE2_SIZES],
|
|
6385
|
+
maxImages: 10,
|
|
6386
|
+
defaultSize: "1024x1024"
|
|
6387
|
+
}
|
|
6388
|
+
];
|
|
6389
|
+
}
|
|
6390
|
+
});
|
|
6391
|
+
|
|
5142
6392
|
// src/providers/openai-models.ts
|
|
5143
6393
|
var OPENAI_MODELS;
|
|
5144
6394
|
var init_openai_models = __esm({
|
|
@@ -5503,6 +6753,144 @@ var init_openai_models = __esm({
|
|
|
5503
6753
|
}
|
|
5504
6754
|
});
|
|
5505
6755
|
|
|
6756
|
+
// src/providers/openai-speech-models.ts
|
|
6757
|
+
function getOpenAISpeechModelSpec(modelId) {
|
|
6758
|
+
return openaiSpeechModels.find((m) => m.modelId === modelId);
|
|
6759
|
+
}
|
|
6760
|
+
function isOpenAISpeechModel(modelId) {
|
|
6761
|
+
return openaiSpeechModels.some((m) => m.modelId === modelId);
|
|
6762
|
+
}
|
|
6763
|
+
function calculateOpenAISpeechCost(modelId, characterCount, estimatedMinutes) {
|
|
6764
|
+
const spec = getOpenAISpeechModelSpec(modelId);
|
|
6765
|
+
if (!spec) return void 0;
|
|
6766
|
+
if (spec.pricing.perCharacter !== void 0) {
|
|
6767
|
+
return characterCount * spec.pricing.perCharacter;
|
|
6768
|
+
}
|
|
6769
|
+
if (spec.pricing.perMinute !== void 0 && estimatedMinutes !== void 0) {
|
|
6770
|
+
return estimatedMinutes * spec.pricing.perMinute;
|
|
6771
|
+
}
|
|
6772
|
+
if (spec.pricing.perMinute !== void 0) {
|
|
6773
|
+
const approxMinutes = characterCount / 750;
|
|
6774
|
+
return approxMinutes * spec.pricing.perMinute;
|
|
6775
|
+
}
|
|
6776
|
+
return void 0;
|
|
6777
|
+
}
|
|
6778
|
+
var OPENAI_TTS_VOICES, OPENAI_TTS_EXTENDED_VOICES, OPENAI_TTS_FORMATS, openaiSpeechModels;
|
|
6779
|
+
var init_openai_speech_models = __esm({
|
|
6780
|
+
"src/providers/openai-speech-models.ts"() {
|
|
6781
|
+
"use strict";
|
|
6782
|
+
OPENAI_TTS_VOICES = [
|
|
6783
|
+
"alloy",
|
|
6784
|
+
"echo",
|
|
6785
|
+
"fable",
|
|
6786
|
+
"onyx",
|
|
6787
|
+
"nova",
|
|
6788
|
+
"shimmer"
|
|
6789
|
+
];
|
|
6790
|
+
OPENAI_TTS_EXTENDED_VOICES = [
|
|
6791
|
+
...OPENAI_TTS_VOICES,
|
|
6792
|
+
"ash",
|
|
6793
|
+
"ballad",
|
|
6794
|
+
"coral",
|
|
6795
|
+
"sage",
|
|
6796
|
+
"verse"
|
|
6797
|
+
];
|
|
6798
|
+
OPENAI_TTS_FORMATS = ["mp3", "opus", "aac", "flac", "wav", "pcm"];
|
|
6799
|
+
openaiSpeechModels = [
|
|
6800
|
+
// Standard TTS models (character-based pricing)
|
|
6801
|
+
{
|
|
6802
|
+
provider: "openai",
|
|
6803
|
+
modelId: "tts-1",
|
|
6804
|
+
displayName: "TTS-1",
|
|
6805
|
+
pricing: {
|
|
6806
|
+
// $15 per 1M characters = $0.000015 per character
|
|
6807
|
+
perCharacter: 15e-6
|
|
6808
|
+
},
|
|
6809
|
+
voices: [...OPENAI_TTS_VOICES],
|
|
6810
|
+
formats: OPENAI_TTS_FORMATS,
|
|
6811
|
+
maxInputLength: 4096,
|
|
6812
|
+
defaultVoice: "alloy",
|
|
6813
|
+
defaultFormat: "mp3",
|
|
6814
|
+
features: {
|
|
6815
|
+
voiceInstructions: false
|
|
6816
|
+
}
|
|
6817
|
+
},
|
|
6818
|
+
{
|
|
6819
|
+
provider: "openai",
|
|
6820
|
+
modelId: "tts-1-1106",
|
|
6821
|
+
displayName: "TTS-1 (Nov 2023)",
|
|
6822
|
+
pricing: {
|
|
6823
|
+
perCharacter: 15e-6
|
|
6824
|
+
},
|
|
6825
|
+
voices: [...OPENAI_TTS_VOICES],
|
|
6826
|
+
formats: OPENAI_TTS_FORMATS,
|
|
6827
|
+
maxInputLength: 4096,
|
|
6828
|
+
defaultVoice: "alloy",
|
|
6829
|
+
defaultFormat: "mp3",
|
|
6830
|
+
features: {
|
|
6831
|
+
voiceInstructions: false
|
|
6832
|
+
}
|
|
6833
|
+
},
|
|
6834
|
+
{
|
|
6835
|
+
provider: "openai",
|
|
6836
|
+
modelId: "tts-1-hd",
|
|
6837
|
+
displayName: "TTS-1 HD",
|
|
6838
|
+
pricing: {
|
|
6839
|
+
// $30 per 1M characters = $0.00003 per character
|
|
6840
|
+
perCharacter: 3e-5
|
|
6841
|
+
},
|
|
6842
|
+
voices: [...OPENAI_TTS_VOICES],
|
|
6843
|
+
formats: OPENAI_TTS_FORMATS,
|
|
6844
|
+
maxInputLength: 4096,
|
|
6845
|
+
defaultVoice: "alloy",
|
|
6846
|
+
defaultFormat: "mp3",
|
|
6847
|
+
features: {
|
|
6848
|
+
voiceInstructions: false
|
|
6849
|
+
}
|
|
6850
|
+
},
|
|
6851
|
+
{
|
|
6852
|
+
provider: "openai",
|
|
6853
|
+
modelId: "tts-1-hd-1106",
|
|
6854
|
+
displayName: "TTS-1 HD (Nov 2023)",
|
|
6855
|
+
pricing: {
|
|
6856
|
+
perCharacter: 3e-5
|
|
6857
|
+
},
|
|
6858
|
+
voices: [...OPENAI_TTS_VOICES],
|
|
6859
|
+
formats: OPENAI_TTS_FORMATS,
|
|
6860
|
+
maxInputLength: 4096,
|
|
6861
|
+
defaultVoice: "alloy",
|
|
6862
|
+
defaultFormat: "mp3",
|
|
6863
|
+
features: {
|
|
6864
|
+
voiceInstructions: false
|
|
6865
|
+
}
|
|
6866
|
+
},
|
|
6867
|
+
// Token-based TTS model with voice instructions support
|
|
6868
|
+
{
|
|
6869
|
+
provider: "openai",
|
|
6870
|
+
modelId: "gpt-4o-mini-tts",
|
|
6871
|
+
displayName: "GPT-4o Mini TTS",
|
|
6872
|
+
pricing: {
|
|
6873
|
+
// $0.60 per 1M input tokens = $0.0000006 per token
|
|
6874
|
+
perInputToken: 6e-7,
|
|
6875
|
+
// $12 per 1M audio output tokens = $0.000012 per token
|
|
6876
|
+
perAudioOutputToken: 12e-6,
|
|
6877
|
+
// ~$0.015 per minute of audio
|
|
6878
|
+
perMinute: 0.015
|
|
6879
|
+
},
|
|
6880
|
+
voices: [...OPENAI_TTS_EXTENDED_VOICES],
|
|
6881
|
+
formats: OPENAI_TTS_FORMATS,
|
|
6882
|
+
maxInputLength: 2e3,
|
|
6883
|
+
// tokens, not characters
|
|
6884
|
+
defaultVoice: "alloy",
|
|
6885
|
+
defaultFormat: "mp3",
|
|
6886
|
+
features: {
|
|
6887
|
+
voiceInstructions: true
|
|
6888
|
+
}
|
|
6889
|
+
}
|
|
6890
|
+
];
|
|
6891
|
+
}
|
|
6892
|
+
});
|
|
6893
|
+
|
|
5506
6894
|
// src/providers/openai.ts
|
|
5507
6895
|
function sanitizeExtra(extra, allowTemperature) {
|
|
5508
6896
|
if (!extra) {
|
|
@@ -5522,9 +6910,12 @@ var init_openai = __esm({
|
|
|
5522
6910
|
"use strict";
|
|
5523
6911
|
import_openai = __toESM(require("openai"), 1);
|
|
5524
6912
|
import_tiktoken = require("tiktoken");
|
|
6913
|
+
init_messages();
|
|
5525
6914
|
init_base_provider();
|
|
5526
6915
|
init_constants2();
|
|
6916
|
+
init_openai_image_models();
|
|
5527
6917
|
init_openai_models();
|
|
6918
|
+
init_openai_speech_models();
|
|
5528
6919
|
init_utils();
|
|
5529
6920
|
ROLE_MAP = {
|
|
5530
6921
|
system: "system",
|
|
@@ -5539,6 +6930,87 @@ var init_openai = __esm({
|
|
|
5539
6930
|
getModelSpecs() {
|
|
5540
6931
|
return OPENAI_MODELS;
|
|
5541
6932
|
}
|
|
6933
|
+
// =========================================================================
|
|
6934
|
+
// Image Generation
|
|
6935
|
+
// =========================================================================
|
|
6936
|
+
getImageModelSpecs() {
|
|
6937
|
+
return openaiImageModels;
|
|
6938
|
+
}
|
|
6939
|
+
supportsImageGeneration(modelId) {
|
|
6940
|
+
return isOpenAIImageModel(modelId);
|
|
6941
|
+
}
|
|
6942
|
+
async generateImage(options) {
|
|
6943
|
+
const client = this.client;
|
|
6944
|
+
const spec = getOpenAIImageModelSpec(options.model);
|
|
6945
|
+
const size = options.size ?? spec?.defaultSize ?? "1024x1024";
|
|
6946
|
+
const quality = options.quality ?? spec?.defaultQuality ?? "standard";
|
|
6947
|
+
const n = options.n ?? 1;
|
|
6948
|
+
const isDallE2 = options.model === "dall-e-2";
|
|
6949
|
+
const isGptImage = options.model.startsWith("gpt-image");
|
|
6950
|
+
const requestParams = {
|
|
6951
|
+
model: options.model,
|
|
6952
|
+
prompt: options.prompt,
|
|
6953
|
+
size,
|
|
6954
|
+
n
|
|
6955
|
+
};
|
|
6956
|
+
if (!isDallE2 && !isGptImage) {
|
|
6957
|
+
requestParams.quality = quality;
|
|
6958
|
+
}
|
|
6959
|
+
if (isGptImage) {
|
|
6960
|
+
} else if (!isDallE2) {
|
|
6961
|
+
requestParams.response_format = options.responseFormat ?? "url";
|
|
6962
|
+
}
|
|
6963
|
+
const response = await client.images.generate(requestParams);
|
|
6964
|
+
const cost = calculateOpenAIImageCost(options.model, size, quality, n);
|
|
6965
|
+
const images = response.data ?? [];
|
|
6966
|
+
return {
|
|
6967
|
+
images: images.map((img) => ({
|
|
6968
|
+
url: img.url,
|
|
6969
|
+
b64Json: img.b64_json,
|
|
6970
|
+
revisedPrompt: img.revised_prompt
|
|
6971
|
+
})),
|
|
6972
|
+
model: options.model,
|
|
6973
|
+
usage: {
|
|
6974
|
+
imagesGenerated: images.length,
|
|
6975
|
+
size,
|
|
6976
|
+
quality
|
|
6977
|
+
},
|
|
6978
|
+
cost
|
|
6979
|
+
};
|
|
6980
|
+
}
|
|
6981
|
+
// =========================================================================
|
|
6982
|
+
// Speech Generation
|
|
6983
|
+
// =========================================================================
|
|
6984
|
+
getSpeechModelSpecs() {
|
|
6985
|
+
return openaiSpeechModels;
|
|
6986
|
+
}
|
|
6987
|
+
supportsSpeechGeneration(modelId) {
|
|
6988
|
+
return isOpenAISpeechModel(modelId);
|
|
6989
|
+
}
|
|
6990
|
+
async generateSpeech(options) {
|
|
6991
|
+
const client = this.client;
|
|
6992
|
+
const spec = getOpenAISpeechModelSpec(options.model);
|
|
6993
|
+
const format = options.responseFormat ?? spec?.defaultFormat ?? "mp3";
|
|
6994
|
+
const voice = options.voice ?? spec?.defaultVoice ?? "alloy";
|
|
6995
|
+
const response = await client.audio.speech.create({
|
|
6996
|
+
model: options.model,
|
|
6997
|
+
input: options.input,
|
|
6998
|
+
voice,
|
|
6999
|
+
response_format: format,
|
|
7000
|
+
speed: options.speed ?? 1
|
|
7001
|
+
});
|
|
7002
|
+
const audioBuffer = await response.arrayBuffer();
|
|
7003
|
+
const cost = calculateOpenAISpeechCost(options.model, options.input.length);
|
|
7004
|
+
return {
|
|
7005
|
+
audio: audioBuffer,
|
|
7006
|
+
model: options.model,
|
|
7007
|
+
usage: {
|
|
7008
|
+
characterCount: options.input.length
|
|
7009
|
+
},
|
|
7010
|
+
cost,
|
|
7011
|
+
format
|
|
7012
|
+
};
|
|
7013
|
+
}
|
|
5542
7014
|
buildRequestPayload(options, descriptor, spec, messages) {
|
|
5543
7015
|
const { maxTokens, temperature, topP, stopSequences, extra } = options;
|
|
5544
7016
|
const supportsTemperature = spec?.metadata?.supportsTemperature !== false;
|
|
@@ -5546,11 +7018,7 @@ var init_openai = __esm({
|
|
|
5546
7018
|
const sanitizedExtra = sanitizeExtra(extra, shouldIncludeTemperature);
|
|
5547
7019
|
return {
|
|
5548
7020
|
model: descriptor.name,
|
|
5549
|
-
messages: messages.map((message) => (
|
|
5550
|
-
role: ROLE_MAP[message.role],
|
|
5551
|
-
content: message.content,
|
|
5552
|
-
name: message.name
|
|
5553
|
-
})),
|
|
7021
|
+
messages: messages.map((message) => this.convertToOpenAIMessage(message)),
|
|
5554
7022
|
// Only set max_completion_tokens if explicitly provided
|
|
5555
7023
|
// Otherwise let the API use "as much as fits" in the context window
|
|
5556
7024
|
...maxTokens !== void 0 ? { max_completion_tokens: maxTokens } : {},
|
|
@@ -5562,6 +7030,77 @@ var init_openai = __esm({
|
|
|
5562
7030
|
...shouldIncludeTemperature ? { temperature } : {}
|
|
5563
7031
|
};
|
|
5564
7032
|
}
|
|
7033
|
+
/**
|
|
7034
|
+
* Convert an LLMMessage to OpenAI's ChatCompletionMessageParam.
|
|
7035
|
+
* Handles role-specific content type requirements:
|
|
7036
|
+
* - system/assistant: string content only
|
|
7037
|
+
* - user: string or multimodal array content
|
|
7038
|
+
*/
|
|
7039
|
+
convertToOpenAIMessage(message) {
|
|
7040
|
+
const role = ROLE_MAP[message.role];
|
|
7041
|
+
if (role === "user") {
|
|
7042
|
+
const content = this.convertToOpenAIContent(message.content);
|
|
7043
|
+
return {
|
|
7044
|
+
role: "user",
|
|
7045
|
+
content,
|
|
7046
|
+
...message.name ? { name: message.name } : {}
|
|
7047
|
+
};
|
|
7048
|
+
}
|
|
7049
|
+
const textContent = typeof message.content === "string" ? message.content : extractText(message.content);
|
|
7050
|
+
if (role === "system") {
|
|
7051
|
+
return {
|
|
7052
|
+
role: "system",
|
|
7053
|
+
content: textContent,
|
|
7054
|
+
...message.name ? { name: message.name } : {}
|
|
7055
|
+
};
|
|
7056
|
+
}
|
|
7057
|
+
return {
|
|
7058
|
+
role: "assistant",
|
|
7059
|
+
content: textContent,
|
|
7060
|
+
...message.name ? { name: message.name } : {}
|
|
7061
|
+
};
|
|
7062
|
+
}
|
|
7063
|
+
/**
|
|
7064
|
+
* Convert llmist content to OpenAI's content format.
|
|
7065
|
+
* Optimizes by returning string for text-only content, array for multimodal.
|
|
7066
|
+
*/
|
|
7067
|
+
convertToOpenAIContent(content) {
|
|
7068
|
+
if (typeof content === "string") {
|
|
7069
|
+
return content;
|
|
7070
|
+
}
|
|
7071
|
+
return content.map((part) => {
|
|
7072
|
+
if (part.type === "text") {
|
|
7073
|
+
return { type: "text", text: part.text };
|
|
7074
|
+
}
|
|
7075
|
+
if (part.type === "image") {
|
|
7076
|
+
return this.convertImagePart(part);
|
|
7077
|
+
}
|
|
7078
|
+
if (part.type === "audio") {
|
|
7079
|
+
throw new Error(
|
|
7080
|
+
"OpenAI chat completions do not support audio input. Use Whisper for transcription or Gemini for audio understanding."
|
|
7081
|
+
);
|
|
7082
|
+
}
|
|
7083
|
+
throw new Error(`Unsupported content type: ${part.type}`);
|
|
7084
|
+
});
|
|
7085
|
+
}
|
|
7086
|
+
/**
|
|
7087
|
+
* Convert an image content part to OpenAI's image_url format.
|
|
7088
|
+
* Supports both URLs and base64 data URLs.
|
|
7089
|
+
*/
|
|
7090
|
+
convertImagePart(part) {
|
|
7091
|
+
if (part.source.type === "url") {
|
|
7092
|
+
return {
|
|
7093
|
+
type: "image_url",
|
|
7094
|
+
image_url: { url: part.source.url }
|
|
7095
|
+
};
|
|
7096
|
+
}
|
|
7097
|
+
return {
|
|
7098
|
+
type: "image_url",
|
|
7099
|
+
image_url: {
|
|
7100
|
+
url: `data:${part.source.mediaType};base64,${part.source.data}`
|
|
7101
|
+
}
|
|
7102
|
+
};
|
|
7103
|
+
}
|
|
5565
7104
|
async executeStreamRequest(payload, signal) {
|
|
5566
7105
|
const client = this.client;
|
|
5567
7106
|
const stream2 = await client.chat.completions.create(payload, signal ? { signal } : void 0);
|
|
@@ -5570,9 +7109,9 @@ var init_openai = __esm({
|
|
|
5570
7109
|
async *wrapStream(iterable) {
|
|
5571
7110
|
const stream2 = iterable;
|
|
5572
7111
|
for await (const chunk of stream2) {
|
|
5573
|
-
const
|
|
5574
|
-
if (
|
|
5575
|
-
yield { text, rawEvent: chunk };
|
|
7112
|
+
const text3 = chunk.choices.map((choice) => choice.delta?.content ?? "").join("");
|
|
7113
|
+
if (text3) {
|
|
7114
|
+
yield { text: text3, rawEvent: chunk };
|
|
5576
7115
|
}
|
|
5577
7116
|
const finishReason = chunk.choices.find((choice) => choice.finish_reason)?.finish_reason;
|
|
5578
7117
|
const usage = chunk.usage ? {
|
|
@@ -5620,17 +7159,26 @@ var init_openai = __esm({
|
|
|
5620
7159
|
}
|
|
5621
7160
|
try {
|
|
5622
7161
|
let tokenCount = 0;
|
|
7162
|
+
let imageCount = 0;
|
|
5623
7163
|
for (const message of messages) {
|
|
5624
7164
|
tokenCount += OPENAI_MESSAGE_OVERHEAD_TOKENS;
|
|
5625
7165
|
const roleText = ROLE_MAP[message.role];
|
|
5626
7166
|
tokenCount += encoding.encode(roleText).length;
|
|
5627
|
-
|
|
7167
|
+
const textContent = extractText(message.content);
|
|
7168
|
+
tokenCount += encoding.encode(textContent).length;
|
|
7169
|
+
const parts = normalizeContent(message.content);
|
|
7170
|
+
for (const part of parts) {
|
|
7171
|
+
if (part.type === "image") {
|
|
7172
|
+
imageCount++;
|
|
7173
|
+
}
|
|
7174
|
+
}
|
|
5628
7175
|
if (message.name) {
|
|
5629
7176
|
tokenCount += encoding.encode(message.name).length;
|
|
5630
7177
|
tokenCount += OPENAI_NAME_FIELD_OVERHEAD_TOKENS;
|
|
5631
7178
|
}
|
|
5632
7179
|
}
|
|
5633
7180
|
tokenCount += OPENAI_REPLY_PRIMING_TOKENS;
|
|
7181
|
+
tokenCount += imageCount * 765;
|
|
5634
7182
|
return tokenCount;
|
|
5635
7183
|
} finally {
|
|
5636
7184
|
encoding.free();
|
|
@@ -5640,8 +7188,19 @@ var init_openai = __esm({
|
|
|
5640
7188
|
`Token counting failed for ${descriptor.name}, using fallback estimation:`,
|
|
5641
7189
|
error
|
|
5642
7190
|
);
|
|
5643
|
-
|
|
5644
|
-
|
|
7191
|
+
let totalChars = 0;
|
|
7192
|
+
let imageCount = 0;
|
|
7193
|
+
for (const msg of messages) {
|
|
7194
|
+
const parts = normalizeContent(msg.content);
|
|
7195
|
+
for (const part of parts) {
|
|
7196
|
+
if (part.type === "text") {
|
|
7197
|
+
totalChars += part.text.length;
|
|
7198
|
+
} else if (part.type === "image") {
|
|
7199
|
+
imageCount++;
|
|
7200
|
+
}
|
|
7201
|
+
}
|
|
7202
|
+
}
|
|
7203
|
+
return Math.ceil(totalChars / FALLBACK_CHARS_PER_TOKEN) + imageCount * 765;
|
|
5645
7204
|
}
|
|
5646
7205
|
}
|
|
5647
7206
|
};
|
|
@@ -5879,30 +7438,109 @@ var init_model_registry = __esm({
|
|
|
5879
7438
|
}
|
|
5880
7439
|
});
|
|
5881
7440
|
|
|
5882
|
-
// src/core/
|
|
5883
|
-
var
|
|
5884
|
-
var
|
|
5885
|
-
"src/core/
|
|
7441
|
+
// src/core/namespaces/image.ts
|
|
7442
|
+
var ImageNamespace;
|
|
7443
|
+
var init_image = __esm({
|
|
7444
|
+
"src/core/namespaces/image.ts"() {
|
|
5886
7445
|
"use strict";
|
|
5887
|
-
|
|
5888
|
-
constructor(defaultProvider
|
|
7446
|
+
ImageNamespace = class {
|
|
7447
|
+
constructor(adapters, defaultProvider) {
|
|
7448
|
+
this.adapters = adapters;
|
|
5889
7449
|
this.defaultProvider = defaultProvider;
|
|
5890
7450
|
}
|
|
5891
|
-
|
|
5892
|
-
|
|
5893
|
-
|
|
5894
|
-
|
|
7451
|
+
/**
|
|
7452
|
+
* Generate images from a text prompt.
|
|
7453
|
+
*
|
|
7454
|
+
* @param options - Image generation options
|
|
7455
|
+
* @returns Promise resolving to the generation result with images and cost
|
|
7456
|
+
* @throws Error if the provider doesn't support image generation
|
|
7457
|
+
*/
|
|
7458
|
+
async generate(options) {
|
|
7459
|
+
const modelId = options.model;
|
|
7460
|
+
const adapter = this.findImageAdapter(modelId);
|
|
7461
|
+
if (!adapter || !adapter.generateImage) {
|
|
7462
|
+
throw new Error(
|
|
7463
|
+
`No provider supports image generation for model "${modelId}". Available image models: ${this.listModels().map((m) => m.modelId).join(", ")}`
|
|
7464
|
+
);
|
|
5895
7465
|
}
|
|
5896
|
-
|
|
5897
|
-
|
|
5898
|
-
|
|
7466
|
+
return adapter.generateImage(options);
|
|
7467
|
+
}
|
|
7468
|
+
/**
|
|
7469
|
+
* List all available image generation models.
|
|
7470
|
+
*/
|
|
7471
|
+
listModels() {
|
|
7472
|
+
const models = [];
|
|
7473
|
+
for (const adapter of this.adapters) {
|
|
7474
|
+
if (adapter.getImageModelSpecs) {
|
|
7475
|
+
models.push(...adapter.getImageModelSpecs());
|
|
7476
|
+
}
|
|
5899
7477
|
}
|
|
5900
|
-
|
|
5901
|
-
|
|
5902
|
-
|
|
5903
|
-
|
|
7478
|
+
return models;
|
|
7479
|
+
}
|
|
7480
|
+
/**
|
|
7481
|
+
* Check if a model is supported for image generation.
|
|
7482
|
+
*/
|
|
7483
|
+
supportsModel(modelId) {
|
|
7484
|
+
return this.findImageAdapter(modelId) !== void 0;
|
|
7485
|
+
}
|
|
7486
|
+
findImageAdapter(modelId) {
|
|
7487
|
+
return this.adapters.find(
|
|
7488
|
+
(adapter) => adapter.supportsImageGeneration?.(modelId) ?? false
|
|
7489
|
+
);
|
|
7490
|
+
}
|
|
7491
|
+
};
|
|
7492
|
+
}
|
|
7493
|
+
});
|
|
7494
|
+
|
|
7495
|
+
// src/core/namespaces/speech.ts
|
|
7496
|
+
var SpeechNamespace;
|
|
7497
|
+
var init_speech = __esm({
|
|
7498
|
+
"src/core/namespaces/speech.ts"() {
|
|
7499
|
+
"use strict";
|
|
7500
|
+
SpeechNamespace = class {
|
|
7501
|
+
constructor(adapters, defaultProvider) {
|
|
7502
|
+
this.adapters = adapters;
|
|
7503
|
+
this.defaultProvider = defaultProvider;
|
|
7504
|
+
}
|
|
7505
|
+
/**
|
|
7506
|
+
* Generate speech audio from text.
|
|
7507
|
+
*
|
|
7508
|
+
* @param options - Speech generation options
|
|
7509
|
+
* @returns Promise resolving to the generation result with audio and cost
|
|
7510
|
+
* @throws Error if the provider doesn't support speech generation
|
|
7511
|
+
*/
|
|
7512
|
+
async generate(options) {
|
|
7513
|
+
const modelId = options.model;
|
|
7514
|
+
const adapter = this.findSpeechAdapter(modelId);
|
|
7515
|
+
if (!adapter || !adapter.generateSpeech) {
|
|
7516
|
+
throw new Error(
|
|
7517
|
+
`No provider supports speech generation for model "${modelId}". Available speech models: ${this.listModels().map((m) => m.modelId).join(", ")}`
|
|
7518
|
+
);
|
|
5904
7519
|
}
|
|
5905
|
-
return
|
|
7520
|
+
return adapter.generateSpeech(options);
|
|
7521
|
+
}
|
|
7522
|
+
/**
|
|
7523
|
+
* List all available speech generation models.
|
|
7524
|
+
*/
|
|
7525
|
+
listModels() {
|
|
7526
|
+
const models = [];
|
|
7527
|
+
for (const adapter of this.adapters) {
|
|
7528
|
+
if (adapter.getSpeechModelSpecs) {
|
|
7529
|
+
models.push(...adapter.getSpeechModelSpecs());
|
|
7530
|
+
}
|
|
7531
|
+
}
|
|
7532
|
+
return models;
|
|
7533
|
+
}
|
|
7534
|
+
/**
|
|
7535
|
+
* Check if a model is supported for speech generation.
|
|
7536
|
+
*/
|
|
7537
|
+
supportsModel(modelId) {
|
|
7538
|
+
return this.findSpeechAdapter(modelId) !== void 0;
|
|
7539
|
+
}
|
|
7540
|
+
findSpeechAdapter(modelId) {
|
|
7541
|
+
return this.adapters.find(
|
|
7542
|
+
(adapter) => adapter.supportsSpeechGeneration?.(modelId) ?? false
|
|
7543
|
+
);
|
|
5906
7544
|
}
|
|
5907
7545
|
};
|
|
5908
7546
|
}
|
|
@@ -5951,6 +7589,201 @@ var init_quick_methods = __esm({
|
|
|
5951
7589
|
}
|
|
5952
7590
|
});
|
|
5953
7591
|
|
|
7592
|
+
// src/core/namespaces/text.ts
|
|
7593
|
+
var TextNamespace;
|
|
7594
|
+
var init_text = __esm({
|
|
7595
|
+
"src/core/namespaces/text.ts"() {
|
|
7596
|
+
"use strict";
|
|
7597
|
+
init_quick_methods();
|
|
7598
|
+
TextNamespace = class {
|
|
7599
|
+
constructor(client) {
|
|
7600
|
+
this.client = client;
|
|
7601
|
+
}
|
|
7602
|
+
/**
|
|
7603
|
+
* Generate a complete text response.
|
|
7604
|
+
*
|
|
7605
|
+
* @param prompt - User prompt
|
|
7606
|
+
* @param options - Optional configuration
|
|
7607
|
+
* @returns Complete text response
|
|
7608
|
+
*/
|
|
7609
|
+
async complete(prompt, options) {
|
|
7610
|
+
return complete(this.client, prompt, options);
|
|
7611
|
+
}
|
|
7612
|
+
/**
|
|
7613
|
+
* Stream text chunks.
|
|
7614
|
+
*
|
|
7615
|
+
* @param prompt - User prompt
|
|
7616
|
+
* @param options - Optional configuration
|
|
7617
|
+
* @returns Async generator yielding text chunks
|
|
7618
|
+
*/
|
|
7619
|
+
stream(prompt, options) {
|
|
7620
|
+
return stream(this.client, prompt, options);
|
|
7621
|
+
}
|
|
7622
|
+
};
|
|
7623
|
+
}
|
|
7624
|
+
});
|
|
7625
|
+
|
|
7626
|
+
// src/core/namespaces/vision.ts
|
|
7627
|
+
var VisionNamespace;
|
|
7628
|
+
var init_vision = __esm({
|
|
7629
|
+
"src/core/namespaces/vision.ts"() {
|
|
7630
|
+
"use strict";
|
|
7631
|
+
init_input_content();
|
|
7632
|
+
init_messages();
|
|
7633
|
+
VisionNamespace = class {
|
|
7634
|
+
constructor(client) {
|
|
7635
|
+
this.client = client;
|
|
7636
|
+
}
|
|
7637
|
+
/**
|
|
7638
|
+
* Build a message builder with the image content attached.
|
|
7639
|
+
* Handles URLs, data URLs, base64 strings, and binary buffers.
|
|
7640
|
+
*/
|
|
7641
|
+
buildImageMessage(options) {
|
|
7642
|
+
const builder = new LLMMessageBuilder();
|
|
7643
|
+
if (options.systemPrompt) {
|
|
7644
|
+
builder.addSystem(options.systemPrompt);
|
|
7645
|
+
}
|
|
7646
|
+
if (typeof options.image === "string") {
|
|
7647
|
+
if (options.image.startsWith("http://") || options.image.startsWith("https://")) {
|
|
7648
|
+
builder.addUserWithImageUrl(options.prompt, options.image);
|
|
7649
|
+
} else if (isDataUrl(options.image)) {
|
|
7650
|
+
const parsed = parseDataUrl(options.image);
|
|
7651
|
+
if (!parsed) {
|
|
7652
|
+
throw new Error("Invalid data URL format");
|
|
7653
|
+
}
|
|
7654
|
+
builder.addUserWithImage(
|
|
7655
|
+
options.prompt,
|
|
7656
|
+
parsed.data,
|
|
7657
|
+
parsed.mimeType
|
|
7658
|
+
);
|
|
7659
|
+
} else {
|
|
7660
|
+
const buffer = Buffer.from(options.image, "base64");
|
|
7661
|
+
builder.addUserWithImage(options.prompt, buffer, options.mimeType);
|
|
7662
|
+
}
|
|
7663
|
+
} else {
|
|
7664
|
+
builder.addUserWithImage(options.prompt, options.image, options.mimeType);
|
|
7665
|
+
}
|
|
7666
|
+
return builder;
|
|
7667
|
+
}
|
|
7668
|
+
/**
|
|
7669
|
+
* Stream the response and collect text and usage information.
|
|
7670
|
+
*/
|
|
7671
|
+
async streamAndCollect(options, builder) {
|
|
7672
|
+
let response = "";
|
|
7673
|
+
let finalUsage;
|
|
7674
|
+
for await (const chunk of this.client.stream({
|
|
7675
|
+
model: options.model,
|
|
7676
|
+
messages: builder.build(),
|
|
7677
|
+
maxTokens: options.maxTokens,
|
|
7678
|
+
temperature: options.temperature
|
|
7679
|
+
})) {
|
|
7680
|
+
response += chunk.text;
|
|
7681
|
+
if (chunk.usage) {
|
|
7682
|
+
finalUsage = {
|
|
7683
|
+
inputTokens: chunk.usage.inputTokens,
|
|
7684
|
+
outputTokens: chunk.usage.outputTokens,
|
|
7685
|
+
totalTokens: chunk.usage.totalTokens
|
|
7686
|
+
};
|
|
7687
|
+
}
|
|
7688
|
+
}
|
|
7689
|
+
return { text: response.trim(), usage: finalUsage };
|
|
7690
|
+
}
|
|
7691
|
+
/**
|
|
7692
|
+
* Analyze an image with a vision-capable model.
|
|
7693
|
+
* Returns the analysis as a string.
|
|
7694
|
+
*
|
|
7695
|
+
* @param options - Vision analysis options
|
|
7696
|
+
* @returns Promise resolving to the analysis text
|
|
7697
|
+
* @throws Error if the image format is unsupported or model doesn't support vision
|
|
7698
|
+
*
|
|
7699
|
+
* @example
|
|
7700
|
+
* ```typescript
|
|
7701
|
+
* // From file
|
|
7702
|
+
* const result = await llmist.vision.analyze({
|
|
7703
|
+
* model: "gpt-4o",
|
|
7704
|
+
* image: await fs.readFile("photo.jpg"),
|
|
7705
|
+
* prompt: "What's in this image?",
|
|
7706
|
+
* });
|
|
7707
|
+
*
|
|
7708
|
+
* // From URL (OpenAI only)
|
|
7709
|
+
* const result = await llmist.vision.analyze({
|
|
7710
|
+
* model: "gpt-4o",
|
|
7711
|
+
* image: "https://example.com/image.jpg",
|
|
7712
|
+
* prompt: "Describe this image",
|
|
7713
|
+
* });
|
|
7714
|
+
* ```
|
|
7715
|
+
*/
|
|
7716
|
+
async analyze(options) {
|
|
7717
|
+
const builder = this.buildImageMessage(options);
|
|
7718
|
+
const { text: text3 } = await this.streamAndCollect(options, builder);
|
|
7719
|
+
return text3;
|
|
7720
|
+
}
|
|
7721
|
+
/**
|
|
7722
|
+
* Analyze an image and return detailed result with usage info.
|
|
7723
|
+
*
|
|
7724
|
+
* @param options - Vision analysis options
|
|
7725
|
+
* @returns Promise resolving to the analysis result with usage info
|
|
7726
|
+
*/
|
|
7727
|
+
async analyzeWithUsage(options) {
|
|
7728
|
+
const builder = this.buildImageMessage(options);
|
|
7729
|
+
const { text: text3, usage } = await this.streamAndCollect(options, builder);
|
|
7730
|
+
return {
|
|
7731
|
+
text: text3,
|
|
7732
|
+
model: options.model,
|
|
7733
|
+
usage
|
|
7734
|
+
};
|
|
7735
|
+
}
|
|
7736
|
+
/**
|
|
7737
|
+
* Check if a model supports vision/image input.
|
|
7738
|
+
*
|
|
7739
|
+
* @param modelId - Model ID to check
|
|
7740
|
+
* @returns True if the model supports vision
|
|
7741
|
+
*/
|
|
7742
|
+
supportsModel(modelId) {
|
|
7743
|
+
const spec = this.client.modelRegistry.getModelSpec(modelId);
|
|
7744
|
+
return spec?.features?.vision === true;
|
|
7745
|
+
}
|
|
7746
|
+
/**
|
|
7747
|
+
* List all models that support vision.
|
|
7748
|
+
*
|
|
7749
|
+
* @returns Array of model IDs that support vision
|
|
7750
|
+
*/
|
|
7751
|
+
listModels() {
|
|
7752
|
+
return this.client.modelRegistry.listModels().filter((spec) => spec.features?.vision === true).map((spec) => spec.modelId);
|
|
7753
|
+
}
|
|
7754
|
+
};
|
|
7755
|
+
}
|
|
7756
|
+
});
|
|
7757
|
+
|
|
7758
|
+
// src/core/options.ts
|
|
7759
|
+
var ModelIdentifierParser;
|
|
7760
|
+
var init_options = __esm({
|
|
7761
|
+
"src/core/options.ts"() {
|
|
7762
|
+
"use strict";
|
|
7763
|
+
ModelIdentifierParser = class {
|
|
7764
|
+
constructor(defaultProvider = "openai") {
|
|
7765
|
+
this.defaultProvider = defaultProvider;
|
|
7766
|
+
}
|
|
7767
|
+
parse(identifier) {
|
|
7768
|
+
const trimmed = identifier.trim();
|
|
7769
|
+
if (!trimmed) {
|
|
7770
|
+
throw new Error("Model identifier cannot be empty");
|
|
7771
|
+
}
|
|
7772
|
+
const [maybeProvider, ...rest] = trimmed.split(":");
|
|
7773
|
+
if (rest.length === 0) {
|
|
7774
|
+
return { provider: this.defaultProvider, name: maybeProvider };
|
|
7775
|
+
}
|
|
7776
|
+
const provider = maybeProvider;
|
|
7777
|
+
const name = rest.join(":");
|
|
7778
|
+
if (!name) {
|
|
7779
|
+
throw new Error("Model name cannot be empty");
|
|
7780
|
+
}
|
|
7781
|
+
return { provider, name };
|
|
7782
|
+
}
|
|
7783
|
+
};
|
|
7784
|
+
}
|
|
7785
|
+
});
|
|
7786
|
+
|
|
5954
7787
|
// src/core/client.ts
|
|
5955
7788
|
var client_exports = {};
|
|
5956
7789
|
__export(client_exports, {
|
|
@@ -5963,12 +7796,22 @@ var init_client = __esm({
|
|
|
5963
7796
|
init_builder();
|
|
5964
7797
|
init_discovery();
|
|
5965
7798
|
init_model_registry();
|
|
7799
|
+
init_image();
|
|
7800
|
+
init_speech();
|
|
7801
|
+
init_text();
|
|
7802
|
+
init_vision();
|
|
5966
7803
|
init_options();
|
|
5967
7804
|
init_quick_methods();
|
|
5968
7805
|
LLMist = class _LLMist {
|
|
5969
7806
|
parser;
|
|
7807
|
+
defaultProvider;
|
|
5970
7808
|
modelRegistry;
|
|
5971
7809
|
adapters;
|
|
7810
|
+
// Namespaces for different generation types
|
|
7811
|
+
text;
|
|
7812
|
+
image;
|
|
7813
|
+
speech;
|
|
7814
|
+
vision;
|
|
5972
7815
|
constructor(...args) {
|
|
5973
7816
|
let adapters = [];
|
|
5974
7817
|
let defaultProvider;
|
|
@@ -6007,6 +7850,7 @@ var init_client = __esm({
|
|
|
6007
7850
|
const priorityB = b.priority ?? 0;
|
|
6008
7851
|
return priorityB - priorityA;
|
|
6009
7852
|
});
|
|
7853
|
+
this.defaultProvider = resolvedDefaultProvider;
|
|
6010
7854
|
this.parser = new ModelIdentifierParser(resolvedDefaultProvider);
|
|
6011
7855
|
this.modelRegistry = new ModelRegistry();
|
|
6012
7856
|
for (const adapter of this.adapters) {
|
|
@@ -6015,6 +7859,10 @@ var init_client = __esm({
|
|
|
6015
7859
|
if (customModels.length > 0) {
|
|
6016
7860
|
this.modelRegistry.registerModels(customModels);
|
|
6017
7861
|
}
|
|
7862
|
+
this.text = new TextNamespace(this);
|
|
7863
|
+
this.image = new ImageNamespace(this.adapters, this.defaultProvider);
|
|
7864
|
+
this.speech = new SpeechNamespace(this.adapters, this.defaultProvider);
|
|
7865
|
+
this.vision = new VisionNamespace(this);
|
|
6018
7866
|
}
|
|
6019
7867
|
stream(options) {
|
|
6020
7868
|
const descriptor = this.parser.parse(options.model);
|
|
@@ -6199,6 +8047,7 @@ var init_builder = __esm({
|
|
|
6199
8047
|
"src/agent/builder.ts"() {
|
|
6200
8048
|
"use strict";
|
|
6201
8049
|
init_constants();
|
|
8050
|
+
init_input_content();
|
|
6202
8051
|
init_model_shortcuts();
|
|
6203
8052
|
init_registry();
|
|
6204
8053
|
init_agent();
|
|
@@ -6846,13 +8695,17 @@ ${endPrefix}`
|
|
|
6846
8695
|
* }
|
|
6847
8696
|
* ```
|
|
6848
8697
|
*/
|
|
6849
|
-
|
|
8698
|
+
/**
|
|
8699
|
+
* Build AgentOptions with the given user prompt.
|
|
8700
|
+
* Centralizes options construction for ask(), askWithImage(), and askWithContent().
|
|
8701
|
+
*/
|
|
8702
|
+
buildAgentOptions(userPrompt) {
|
|
6850
8703
|
if (!this.client) {
|
|
6851
8704
|
const { LLMist: LLMistClass } = (init_client(), __toCommonJS(client_exports));
|
|
6852
8705
|
this.client = new LLMistClass();
|
|
6853
8706
|
}
|
|
6854
8707
|
const registry = GadgetRegistry.from(this.gadgets);
|
|
6855
|
-
|
|
8708
|
+
return {
|
|
6856
8709
|
client: this.client,
|
|
6857
8710
|
model: this.model ?? "openai:gpt-5-nano",
|
|
6858
8711
|
systemPrompt: this.systemPrompt,
|
|
@@ -6878,6 +8731,83 @@ ${endPrefix}`
|
|
|
6878
8731
|
compactionConfig: this.compactionConfig,
|
|
6879
8732
|
signal: this.signal
|
|
6880
8733
|
};
|
|
8734
|
+
}
|
|
8735
|
+
ask(userPrompt) {
|
|
8736
|
+
const options = this.buildAgentOptions(userPrompt);
|
|
8737
|
+
return new Agent(AGENT_INTERNAL_KEY, options);
|
|
8738
|
+
}
|
|
8739
|
+
/**
|
|
8740
|
+
* Build and create the agent with a multimodal user prompt (text + image).
|
|
8741
|
+
* Returns the Agent instance ready to run.
|
|
8742
|
+
*
|
|
8743
|
+
* @param textPrompt - Text prompt describing what to do with the image
|
|
8744
|
+
* @param imageData - Image data (Buffer, Uint8Array, or base64 string)
|
|
8745
|
+
* @param mimeType - Optional MIME type (auto-detected if not provided)
|
|
8746
|
+
* @returns Configured Agent instance
|
|
8747
|
+
*
|
|
8748
|
+
* @example
|
|
8749
|
+
* ```typescript
|
|
8750
|
+
* const agent = LLMist.createAgent()
|
|
8751
|
+
* .withModel("gpt-4o")
|
|
8752
|
+
* .withSystem("You analyze images")
|
|
8753
|
+
* .askWithImage(
|
|
8754
|
+
* "What's in this image?",
|
|
8755
|
+
* await fs.readFile("photo.jpg")
|
|
8756
|
+
* );
|
|
8757
|
+
*
|
|
8758
|
+
* for await (const event of agent.run()) {
|
|
8759
|
+
* // handle events
|
|
8760
|
+
* }
|
|
8761
|
+
* ```
|
|
8762
|
+
*/
|
|
8763
|
+
askWithImage(textPrompt, imageData, mimeType) {
|
|
8764
|
+
const imageBuffer = typeof imageData === "string" ? Buffer.from(imageData, "base64") : imageData;
|
|
8765
|
+
const detectedMime = mimeType ?? detectImageMimeType(imageBuffer);
|
|
8766
|
+
if (!detectedMime) {
|
|
8767
|
+
throw new Error(
|
|
8768
|
+
"Could not detect image MIME type. Please provide the mimeType parameter explicitly."
|
|
8769
|
+
);
|
|
8770
|
+
}
|
|
8771
|
+
const userContent = [
|
|
8772
|
+
text(textPrompt),
|
|
8773
|
+
{
|
|
8774
|
+
type: "image",
|
|
8775
|
+
source: {
|
|
8776
|
+
type: "base64",
|
|
8777
|
+
mediaType: detectedMime,
|
|
8778
|
+
data: toBase64(imageBuffer)
|
|
8779
|
+
}
|
|
8780
|
+
}
|
|
8781
|
+
];
|
|
8782
|
+
const options = this.buildAgentOptions(userContent);
|
|
8783
|
+
return new Agent(AGENT_INTERNAL_KEY, options);
|
|
8784
|
+
}
|
|
8785
|
+
/**
|
|
8786
|
+
* Build and return an Agent configured with multimodal content.
|
|
8787
|
+
* More flexible than askWithImage - accepts any combination of content parts.
|
|
8788
|
+
*
|
|
8789
|
+
* @param content - Array of content parts (text, images, audio)
|
|
8790
|
+
* @returns A configured Agent ready for execution
|
|
8791
|
+
*
|
|
8792
|
+
* @example
|
|
8793
|
+
* ```typescript
|
|
8794
|
+
* import { text, imageFromBuffer, audioFromBuffer } from "llmist";
|
|
8795
|
+
*
|
|
8796
|
+
* const agent = LLMist.createAgent()
|
|
8797
|
+
* .withModel("gemini:gemini-2.5-flash")
|
|
8798
|
+
* .askWithContent([
|
|
8799
|
+
* text("Describe this image and transcribe the audio:"),
|
|
8800
|
+
* imageFromBuffer(imageData),
|
|
8801
|
+
* audioFromBuffer(audioData),
|
|
8802
|
+
* ]);
|
|
8803
|
+
*
|
|
8804
|
+
* for await (const event of agent.run()) {
|
|
8805
|
+
* // handle events
|
|
8806
|
+
* }
|
|
8807
|
+
* ```
|
|
8808
|
+
*/
|
|
8809
|
+
askWithContent(content) {
|
|
8810
|
+
const options = this.buildAgentOptions(content);
|
|
6881
8811
|
return new Agent(AGENT_INTERNAL_KEY, options);
|
|
6882
8812
|
}
|
|
6883
8813
|
/**
|
|
@@ -6995,7 +8925,10 @@ var COMMANDS = {
|
|
|
6995
8925
|
complete: "complete",
|
|
6996
8926
|
agent: "agent",
|
|
6997
8927
|
models: "models",
|
|
6998
|
-
gadget: "gadget"
|
|
8928
|
+
gadget: "gadget",
|
|
8929
|
+
image: "image",
|
|
8930
|
+
speech: "speech",
|
|
8931
|
+
vision: "vision"
|
|
6999
8932
|
};
|
|
7000
8933
|
var LOG_LEVELS = ["silly", "trace", "debug", "info", "warn", "error", "fatal"];
|
|
7001
8934
|
var DEFAULT_MODEL = "openai:gpt-5-nano";
|
|
@@ -7016,7 +8949,20 @@ var OPTION_FLAGS = {
|
|
|
7016
8949
|
docker: "--docker",
|
|
7017
8950
|
dockerRo: "--docker-ro",
|
|
7018
8951
|
noDocker: "--no-docker",
|
|
7019
|
-
dockerDev: "--docker-dev"
|
|
8952
|
+
dockerDev: "--docker-dev",
|
|
8953
|
+
// Multimodal input options
|
|
8954
|
+
inputImage: "--image <path>",
|
|
8955
|
+
inputAudio: "--audio <path>",
|
|
8956
|
+
// Image generation options
|
|
8957
|
+
imageSize: "--size <size>",
|
|
8958
|
+
imageQuality: "--quality <quality>",
|
|
8959
|
+
imageCount: "-n, --count <number>",
|
|
8960
|
+
imageOutput: "-o, --output <path>",
|
|
8961
|
+
// Speech generation options
|
|
8962
|
+
voice: "--voice <name>",
|
|
8963
|
+
speechFormat: "--format <format>",
|
|
8964
|
+
speechSpeed: "--speed <value>",
|
|
8965
|
+
speechOutput: "-o, --output <path>"
|
|
7020
8966
|
};
|
|
7021
8967
|
var OPTION_DESCRIPTIONS = {
|
|
7022
8968
|
model: "Model identifier, e.g. openai:gpt-5-nano or anthropic:claude-sonnet-4-5.",
|
|
@@ -7032,10 +8978,23 @@ var OPTION_DESCRIPTIONS = {
|
|
|
7032
8978
|
noBuiltins: "Disable built-in gadgets (AskUser, TellUser).",
|
|
7033
8979
|
noBuiltinInteraction: "Disable interactive gadgets (AskUser) while keeping TellUser.",
|
|
7034
8980
|
quiet: "Suppress all output except content (text and TellUser messages).",
|
|
8981
|
+
// Multimodal input descriptions
|
|
8982
|
+
inputImage: "Image file to include with the prompt (vision models).",
|
|
8983
|
+
inputAudio: "Audio file to include with the prompt (Gemini only).",
|
|
7035
8984
|
docker: "Run agent in a Docker sandbox container for security isolation.",
|
|
7036
8985
|
dockerRo: "Run in Docker with current directory mounted read-only.",
|
|
7037
8986
|
noDocker: "Disable Docker sandboxing (override config).",
|
|
7038
|
-
dockerDev: "Run in Docker dev mode (mount local source instead of npm install)."
|
|
8987
|
+
dockerDev: "Run in Docker dev mode (mount local source instead of npm install).",
|
|
8988
|
+
// Image generation descriptions
|
|
8989
|
+
imageSize: "Image size/aspect ratio, e.g. '1024x1024', '1:1', '16:9'.",
|
|
8990
|
+
imageQuality: "Image quality: 'standard', 'hd', 'low', 'medium', 'high'.",
|
|
8991
|
+
imageCount: "Number of images to generate (model dependent, usually 1-4).",
|
|
8992
|
+
imageOutput: "Output path for the generated image. Defaults to stdout if not specified.",
|
|
8993
|
+
// Speech generation descriptions
|
|
8994
|
+
voice: "Voice name for speech generation, e.g. 'nova', 'alloy', 'Zephyr'.",
|
|
8995
|
+
speechFormat: "Audio format: 'mp3', 'opus', 'aac', 'flac', 'wav', 'pcm'.",
|
|
8996
|
+
speechSpeed: "Speech speed multiplier (0.25 to 4.0, default 1.0).",
|
|
8997
|
+
speechOutput: "Output path for audio file. Defaults to stdout if not specified."
|
|
7039
8998
|
};
|
|
7040
8999
|
var SUMMARY_PREFIX = "[llmist]";
|
|
7041
9000
|
|
|
@@ -7045,7 +9004,7 @@ var import_commander2 = require("commander");
|
|
|
7045
9004
|
// package.json
|
|
7046
9005
|
var package_default = {
|
|
7047
9006
|
name: "llmist",
|
|
7048
|
-
version: "2.
|
|
9007
|
+
version: "2.5.0",
|
|
7049
9008
|
description: "TypeScript LLM client with streaming tool execution. Tools fire mid-stream. Built-in function calling works with any model\u2014no structured outputs or native tool support required.",
|
|
7050
9009
|
type: "module",
|
|
7051
9010
|
main: "dist/index.cjs",
|
|
@@ -7167,7 +9126,7 @@ var package_default = {
|
|
|
7167
9126
|
};
|
|
7168
9127
|
|
|
7169
9128
|
// src/cli/agent-command.ts
|
|
7170
|
-
var
|
|
9129
|
+
var import_promises4 = require("readline/promises");
|
|
7171
9130
|
var import_chalk5 = __toESM(require("chalk"), 1);
|
|
7172
9131
|
init_builder();
|
|
7173
9132
|
|
|
@@ -7185,6 +9144,7 @@ function isAbortError(error) {
|
|
|
7185
9144
|
}
|
|
7186
9145
|
|
|
7187
9146
|
// src/cli/agent-command.ts
|
|
9147
|
+
init_input_content();
|
|
7188
9148
|
init_registry();
|
|
7189
9149
|
init_constants2();
|
|
7190
9150
|
|
|
@@ -7509,15 +9469,84 @@ var finish = createGadget({
|
|
|
7509
9469
|
});
|
|
7510
9470
|
var builtinGadgets = [askUser, tellUser, finish];
|
|
7511
9471
|
|
|
9472
|
+
// src/cli/file-utils.ts
|
|
9473
|
+
var import_promises2 = require("fs/promises");
|
|
9474
|
+
var import_node_path3 = require("path");
|
|
9475
|
+
init_input_content();
|
|
9476
|
+
var DEFAULT_MAX_FILE_SIZE = 50 * 1024 * 1024;
|
|
9477
|
+
function formatFileSize(bytes) {
|
|
9478
|
+
if (bytes < 1024) return `${bytes} bytes`;
|
|
9479
|
+
if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`;
|
|
9480
|
+
if (bytes < 1024 * 1024 * 1024) return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
|
|
9481
|
+
return `${(bytes / (1024 * 1024 * 1024)).toFixed(1)} GB`;
|
|
9482
|
+
}
|
|
9483
|
+
async function checkFileSize(absolutePath, filePath, maxSize) {
|
|
9484
|
+
const stats = await (0, import_promises2.stat)(absolutePath);
|
|
9485
|
+
if (stats.size > maxSize) {
|
|
9486
|
+
throw new Error(
|
|
9487
|
+
`File "${filePath}" is too large (${formatFileSize(stats.size)}). Maximum allowed size is ${formatFileSize(maxSize)}. Consider compressing the file or using a smaller version.`
|
|
9488
|
+
);
|
|
9489
|
+
}
|
|
9490
|
+
}
|
|
9491
|
+
async function readImageFile(filePath, options = {}) {
|
|
9492
|
+
const absolutePath = (0, import_node_path3.resolve)(filePath);
|
|
9493
|
+
const maxFileSize = options.maxFileSize ?? DEFAULT_MAX_FILE_SIZE;
|
|
9494
|
+
let buffer;
|
|
9495
|
+
try {
|
|
9496
|
+
await checkFileSize(absolutePath, filePath, maxFileSize);
|
|
9497
|
+
buffer = await (0, import_promises2.readFile)(absolutePath);
|
|
9498
|
+
} catch (error) {
|
|
9499
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
9500
|
+
throw new Error(`Failed to read image file "${filePath}": ${message}`);
|
|
9501
|
+
}
|
|
9502
|
+
const mimeType = detectImageMimeType(buffer);
|
|
9503
|
+
if (!mimeType) {
|
|
9504
|
+
throw new Error(
|
|
9505
|
+
`File "${filePath}" is not a supported image format. Supported formats: JPEG, PNG, GIF, WebP`
|
|
9506
|
+
);
|
|
9507
|
+
}
|
|
9508
|
+
return imageFromBuffer(buffer, mimeType);
|
|
9509
|
+
}
|
|
9510
|
+
async function readAudioFile(filePath, options = {}) {
|
|
9511
|
+
const absolutePath = (0, import_node_path3.resolve)(filePath);
|
|
9512
|
+
const maxFileSize = options.maxFileSize ?? DEFAULT_MAX_FILE_SIZE;
|
|
9513
|
+
let buffer;
|
|
9514
|
+
try {
|
|
9515
|
+
await checkFileSize(absolutePath, filePath, maxFileSize);
|
|
9516
|
+
buffer = await (0, import_promises2.readFile)(absolutePath);
|
|
9517
|
+
} catch (error) {
|
|
9518
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
9519
|
+
throw new Error(`Failed to read audio file "${filePath}": ${message}`);
|
|
9520
|
+
}
|
|
9521
|
+
const mimeType = detectAudioMimeType(buffer);
|
|
9522
|
+
if (!mimeType) {
|
|
9523
|
+
throw new Error(
|
|
9524
|
+
`File "${filePath}" is not a supported audio format. Supported formats: MP3, WAV, OGG, WebM`
|
|
9525
|
+
);
|
|
9526
|
+
}
|
|
9527
|
+
return audioFromBuffer(buffer, mimeType);
|
|
9528
|
+
}
|
|
9529
|
+
async function readFileBuffer(filePath, options = {}) {
|
|
9530
|
+
const absolutePath = (0, import_node_path3.resolve)(filePath);
|
|
9531
|
+
const maxFileSize = options.maxFileSize ?? DEFAULT_MAX_FILE_SIZE;
|
|
9532
|
+
try {
|
|
9533
|
+
await checkFileSize(absolutePath, filePath, maxFileSize);
|
|
9534
|
+
return await (0, import_promises2.readFile)(absolutePath);
|
|
9535
|
+
} catch (error) {
|
|
9536
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
9537
|
+
throw new Error(`Failed to read file "${filePath}": ${message}`);
|
|
9538
|
+
}
|
|
9539
|
+
}
|
|
9540
|
+
|
|
7512
9541
|
// src/cli/gadgets.ts
|
|
7513
9542
|
var import_node_fs7 = __toESM(require("fs"), 1);
|
|
7514
|
-
var
|
|
9543
|
+
var import_node_path7 = __toESM(require("path"), 1);
|
|
7515
9544
|
var import_node_url = require("url");
|
|
7516
9545
|
init_gadget();
|
|
7517
9546
|
|
|
7518
9547
|
// src/cli/builtins/filesystem/list-directory.ts
|
|
7519
9548
|
var import_node_fs4 = __toESM(require("fs"), 1);
|
|
7520
|
-
var
|
|
9549
|
+
var import_node_path5 = __toESM(require("path"), 1);
|
|
7521
9550
|
var import_zod4 = require("zod");
|
|
7522
9551
|
|
|
7523
9552
|
// src/index.ts
|
|
@@ -7541,6 +9570,7 @@ init_prompt_config();
|
|
|
7541
9570
|
|
|
7542
9571
|
// src/index.ts
|
|
7543
9572
|
init_client();
|
|
9573
|
+
init_input_content();
|
|
7544
9574
|
init_messages();
|
|
7545
9575
|
init_model_registry();
|
|
7546
9576
|
init_model_shortcuts();
|
|
@@ -7571,6 +9601,10 @@ init_logger();
|
|
|
7571
9601
|
// src/testing/mock-stream.ts
|
|
7572
9602
|
init_constants();
|
|
7573
9603
|
|
|
9604
|
+
// src/testing/mock-builder.ts
|
|
9605
|
+
init_input_content();
|
|
9606
|
+
init_messages();
|
|
9607
|
+
|
|
7574
9608
|
// src/testing/mock-client.ts
|
|
7575
9609
|
init_client();
|
|
7576
9610
|
|
|
@@ -7582,7 +9616,7 @@ var import_node_stream = require("stream");
|
|
|
7582
9616
|
|
|
7583
9617
|
// src/cli/builtins/filesystem/utils.ts
|
|
7584
9618
|
var import_node_fs3 = __toESM(require("fs"), 1);
|
|
7585
|
-
var
|
|
9619
|
+
var import_node_path4 = __toESM(require("path"), 1);
|
|
7586
9620
|
var PathSandboxException = class extends Error {
|
|
7587
9621
|
constructor(inputPath, reason) {
|
|
7588
9622
|
super(`Path access denied: ${inputPath}. ${reason}`);
|
|
@@ -7591,7 +9625,7 @@ var PathSandboxException = class extends Error {
|
|
|
7591
9625
|
};
|
|
7592
9626
|
function validatePathIsWithinCwd(inputPath) {
|
|
7593
9627
|
const cwd = process.cwd();
|
|
7594
|
-
const resolvedPath =
|
|
9628
|
+
const resolvedPath = import_node_path4.default.resolve(cwd, inputPath);
|
|
7595
9629
|
let finalPath;
|
|
7596
9630
|
try {
|
|
7597
9631
|
finalPath = import_node_fs3.default.realpathSync(resolvedPath);
|
|
@@ -7603,7 +9637,7 @@ function validatePathIsWithinCwd(inputPath) {
|
|
|
7603
9637
|
throw error;
|
|
7604
9638
|
}
|
|
7605
9639
|
}
|
|
7606
|
-
const cwdWithSep = cwd +
|
|
9640
|
+
const cwdWithSep = cwd + import_node_path4.default.sep;
|
|
7607
9641
|
if (!finalPath.startsWith(cwdWithSep) && finalPath !== cwd) {
|
|
7608
9642
|
throw new PathSandboxException(inputPath, "Path is outside the current working directory");
|
|
7609
9643
|
}
|
|
@@ -7616,8 +9650,8 @@ function listFiles(dirPath, basePath = dirPath, maxDepth = 1, currentDepth = 1)
|
|
|
7616
9650
|
try {
|
|
7617
9651
|
const items = import_node_fs4.default.readdirSync(dirPath);
|
|
7618
9652
|
for (const item of items) {
|
|
7619
|
-
const fullPath =
|
|
7620
|
-
const relativePath =
|
|
9653
|
+
const fullPath = import_node_path5.default.join(dirPath, item);
|
|
9654
|
+
const relativePath = import_node_path5.default.relative(basePath, fullPath);
|
|
7621
9655
|
try {
|
|
7622
9656
|
const stats = import_node_fs4.default.lstatSync(fullPath);
|
|
7623
9657
|
let type;
|
|
@@ -7732,7 +9766,7 @@ ${formattedList}`;
|
|
|
7732
9766
|
// src/cli/builtins/filesystem/read-file.ts
|
|
7733
9767
|
var import_node_fs5 = __toESM(require("fs"), 1);
|
|
7734
9768
|
var import_zod5 = require("zod");
|
|
7735
|
-
var
|
|
9769
|
+
var readFile2 = createGadget({
|
|
7736
9770
|
name: "ReadFile",
|
|
7737
9771
|
description: "Read the entire content of a file and return it as text. The file path must be within the current working directory or its subdirectories.",
|
|
7738
9772
|
schema: import_zod5.z.object({
|
|
@@ -7761,7 +9795,7 @@ ${content}`;
|
|
|
7761
9795
|
|
|
7762
9796
|
// src/cli/builtins/filesystem/write-file.ts
|
|
7763
9797
|
var import_node_fs6 = __toESM(require("fs"), 1);
|
|
7764
|
-
var
|
|
9798
|
+
var import_node_path6 = __toESM(require("path"), 1);
|
|
7765
9799
|
var import_zod6 = require("zod");
|
|
7766
9800
|
var writeFile = createGadget({
|
|
7767
9801
|
name: "WriteFile",
|
|
@@ -7796,7 +9830,7 @@ console.log(\`Server running on http://localhost:\${port}\`);`
|
|
|
7796
9830
|
],
|
|
7797
9831
|
execute: ({ filePath, content }) => {
|
|
7798
9832
|
const validatedPath = validatePathIsWithinCwd(filePath);
|
|
7799
|
-
const parentDir =
|
|
9833
|
+
const parentDir = import_node_path6.default.dirname(validatedPath);
|
|
7800
9834
|
let createdDir = false;
|
|
7801
9835
|
if (!import_node_fs6.default.existsSync(parentDir)) {
|
|
7802
9836
|
validatePathIsWithinCwd(parentDir);
|
|
@@ -7805,7 +9839,7 @@ console.log(\`Server running on http://localhost:\${port}\`);`
|
|
|
7805
9839
|
}
|
|
7806
9840
|
import_node_fs6.default.writeFileSync(validatedPath, content, "utf-8");
|
|
7807
9841
|
const bytesWritten = Buffer.byteLength(content, "utf-8");
|
|
7808
|
-
const dirNote = createdDir ? ` (created directory: ${
|
|
9842
|
+
const dirNote = createdDir ? ` (created directory: ${import_node_path6.default.dirname(filePath)})` : "";
|
|
7809
9843
|
return `path=${filePath}
|
|
7810
9844
|
|
|
7811
9845
|
Wrote ${bytesWritten} bytes${dirNote}`;
|
|
@@ -8003,7 +10037,7 @@ error: ${message}`;
|
|
|
8003
10037
|
// src/cli/builtins/index.ts
|
|
8004
10038
|
var builtinGadgetRegistry = {
|
|
8005
10039
|
ListDirectory: listDirectory,
|
|
8006
|
-
ReadFile:
|
|
10040
|
+
ReadFile: readFile2,
|
|
8007
10041
|
WriteFile: writeFile,
|
|
8008
10042
|
EditFile: editFile,
|
|
8009
10043
|
RunCommand: runCommand
|
|
@@ -8040,10 +10074,10 @@ function expandHomePath(input) {
|
|
|
8040
10074
|
if (!home) {
|
|
8041
10075
|
return input;
|
|
8042
10076
|
}
|
|
8043
|
-
return
|
|
10077
|
+
return import_node_path7.default.join(home, input.slice(1));
|
|
8044
10078
|
}
|
|
8045
10079
|
function isFileLikeSpecifier(specifier) {
|
|
8046
|
-
return PATH_PREFIXES.some((prefix) => specifier.startsWith(prefix)) || specifier.includes(
|
|
10080
|
+
return PATH_PREFIXES.some((prefix) => specifier.startsWith(prefix)) || specifier.includes(import_node_path7.default.sep);
|
|
8047
10081
|
}
|
|
8048
10082
|
function tryResolveBuiltin(specifier) {
|
|
8049
10083
|
if (specifier.startsWith(BUILTIN_PREFIX)) {
|
|
@@ -8066,7 +10100,7 @@ function resolveGadgetSpecifier(specifier, cwd) {
|
|
|
8066
10100
|
return specifier;
|
|
8067
10101
|
}
|
|
8068
10102
|
const expanded = expandHomePath(specifier);
|
|
8069
|
-
const resolvedPath =
|
|
10103
|
+
const resolvedPath = import_node_path7.default.resolve(cwd, expanded);
|
|
8070
10104
|
if (!import_node_fs7.default.existsSync(resolvedPath)) {
|
|
8071
10105
|
throw new Error(`Gadget module not found at ${resolvedPath}`);
|
|
8072
10106
|
}
|
|
@@ -8138,13 +10172,14 @@ async function loadGadgets(specifiers, cwd, importer = (specifier) => import(spe
|
|
|
8138
10172
|
}
|
|
8139
10173
|
|
|
8140
10174
|
// src/cli/llm-logging.ts
|
|
8141
|
-
var
|
|
10175
|
+
var import_promises3 = require("fs/promises");
|
|
8142
10176
|
var import_node_os = require("os");
|
|
8143
|
-
var
|
|
8144
|
-
|
|
10177
|
+
var import_node_path8 = require("path");
|
|
10178
|
+
init_messages();
|
|
10179
|
+
var DEFAULT_LLM_LOG_DIR = (0, import_node_path8.join)((0, import_node_os.homedir)(), ".llmist", "logs");
|
|
8145
10180
|
function resolveLogDir(option, subdir) {
|
|
8146
10181
|
if (option === true) {
|
|
8147
|
-
return (0,
|
|
10182
|
+
return (0, import_node_path8.join)(DEFAULT_LLM_LOG_DIR, subdir);
|
|
8148
10183
|
}
|
|
8149
10184
|
if (typeof option === "string") {
|
|
8150
10185
|
return option;
|
|
@@ -8155,14 +10190,14 @@ function formatLlmRequest(messages) {
|
|
|
8155
10190
|
const lines = [];
|
|
8156
10191
|
for (const msg of messages) {
|
|
8157
10192
|
lines.push(`=== ${msg.role.toUpperCase()} ===`);
|
|
8158
|
-
lines.push(msg.content
|
|
10193
|
+
lines.push(msg.content ? extractText(msg.content) : "");
|
|
8159
10194
|
lines.push("");
|
|
8160
10195
|
}
|
|
8161
10196
|
return lines.join("\n");
|
|
8162
10197
|
}
|
|
8163
10198
|
async function writeLogFile(dir, filename, content) {
|
|
8164
|
-
await (0,
|
|
8165
|
-
await (0,
|
|
10199
|
+
await (0, import_promises3.mkdir)(dir, { recursive: true });
|
|
10200
|
+
await (0, import_promises3.writeFile)((0, import_node_path8.join)(dir, filename), content, "utf-8");
|
|
8166
10201
|
}
|
|
8167
10202
|
function formatSessionTimestamp(date = /* @__PURE__ */ new Date()) {
|
|
8168
10203
|
const pad = (n) => n.toString().padStart(2, "0");
|
|
@@ -8176,9 +10211,9 @@ function formatSessionTimestamp(date = /* @__PURE__ */ new Date()) {
|
|
|
8176
10211
|
}
|
|
8177
10212
|
async function createSessionDir(baseDir) {
|
|
8178
10213
|
const timestamp = formatSessionTimestamp();
|
|
8179
|
-
const sessionDir = (0,
|
|
10214
|
+
const sessionDir = (0, import_node_path8.join)(baseDir, timestamp);
|
|
8180
10215
|
try {
|
|
8181
|
-
await (0,
|
|
10216
|
+
await (0, import_promises3.mkdir)(sessionDir, { recursive: true });
|
|
8182
10217
|
return sessionDir;
|
|
8183
10218
|
} catch (error) {
|
|
8184
10219
|
console.warn(`[llmist] Failed to create log session directory: ${sessionDir}`, error);
|
|
@@ -8229,9 +10264,9 @@ function ensureMarkedConfigured() {
|
|
|
8229
10264
|
markedConfigured = true;
|
|
8230
10265
|
}
|
|
8231
10266
|
}
|
|
8232
|
-
function renderMarkdown(
|
|
10267
|
+
function renderMarkdown(text3) {
|
|
8233
10268
|
ensureMarkedConfigured();
|
|
8234
|
-
let rendered = import_marked.marked.parse(
|
|
10269
|
+
let rendered = import_marked.marked.parse(text3);
|
|
8235
10270
|
rendered = rendered.replace(/\*\*(.+?)\*\*/g, (_, content) => import_chalk3.default.bold(content)).replace(/(?<!\*)\*(\S[^*]*)\*(?!\*)/g, (_, content) => import_chalk3.default.italic(content));
|
|
8236
10271
|
return rendered.trimEnd();
|
|
8237
10272
|
}
|
|
@@ -8245,8 +10280,8 @@ function createRainbowSeparator() {
|
|
|
8245
10280
|
}
|
|
8246
10281
|
return result;
|
|
8247
10282
|
}
|
|
8248
|
-
function renderMarkdownWithSeparators(
|
|
8249
|
-
const rendered = renderMarkdown(
|
|
10283
|
+
function renderMarkdownWithSeparators(text3) {
|
|
10284
|
+
const rendered = renderMarkdown(text3);
|
|
8250
10285
|
const separator = createRainbowSeparator();
|
|
8251
10286
|
return `
|
|
8252
10287
|
${separator}
|
|
@@ -8414,12 +10449,12 @@ var StreamPrinter = class {
|
|
|
8414
10449
|
*
|
|
8415
10450
|
* @param text - Text to write
|
|
8416
10451
|
*/
|
|
8417
|
-
write(
|
|
8418
|
-
if (!
|
|
10452
|
+
write(text3) {
|
|
10453
|
+
if (!text3) {
|
|
8419
10454
|
return;
|
|
8420
10455
|
}
|
|
8421
|
-
this.target.write(
|
|
8422
|
-
this.endedWithNewline =
|
|
10456
|
+
this.target.write(text3);
|
|
10457
|
+
this.endedWithNewline = text3.endsWith("\n");
|
|
8423
10458
|
}
|
|
8424
10459
|
/**
|
|
8425
10460
|
* Ensures output ends with a newline by writing one if needed.
|
|
@@ -8898,7 +10933,7 @@ function addCompleteOptions(cmd, defaults) {
|
|
|
8898
10933
|
OPTION_DESCRIPTIONS.maxTokens,
|
|
8899
10934
|
createNumericParser({ label: "Max tokens", integer: true, min: 1 }),
|
|
8900
10935
|
defaults?.["max-tokens"]
|
|
8901
|
-
).option(OPTION_FLAGS.quiet, OPTION_DESCRIPTIONS.quiet, defaults?.quiet).option(OPTION_FLAGS.logLlmRequests, OPTION_DESCRIPTIONS.logLlmRequests, defaults?.["log-llm-requests"]);
|
|
10936
|
+
).option(OPTION_FLAGS.quiet, OPTION_DESCRIPTIONS.quiet, defaults?.quiet).option(OPTION_FLAGS.logLlmRequests, OPTION_DESCRIPTIONS.logLlmRequests, defaults?.["log-llm-requests"]).option(OPTION_FLAGS.inputImage, OPTION_DESCRIPTIONS.inputImage).option(OPTION_FLAGS.inputAudio, OPTION_DESCRIPTIONS.inputAudio);
|
|
8902
10937
|
}
|
|
8903
10938
|
function addAgentOptions(cmd, defaults) {
|
|
8904
10939
|
const gadgetAccumulator = (value, previous = []) => [
|
|
@@ -8922,7 +10957,7 @@ function addAgentOptions(cmd, defaults) {
|
|
|
8922
10957
|
OPTION_FLAGS.noBuiltinInteraction,
|
|
8923
10958
|
OPTION_DESCRIPTIONS.noBuiltinInteraction,
|
|
8924
10959
|
defaults?.["builtin-interaction"] !== false
|
|
8925
|
-
).option(OPTION_FLAGS.quiet, OPTION_DESCRIPTIONS.quiet, defaults?.quiet).option(OPTION_FLAGS.logLlmRequests, OPTION_DESCRIPTIONS.logLlmRequests, defaults?.["log-llm-requests"]).option(OPTION_FLAGS.docker, OPTION_DESCRIPTIONS.docker).option(OPTION_FLAGS.dockerRo, OPTION_DESCRIPTIONS.dockerRo).option(OPTION_FLAGS.noDocker, OPTION_DESCRIPTIONS.noDocker).option(OPTION_FLAGS.dockerDev, OPTION_DESCRIPTIONS.dockerDev);
|
|
10960
|
+
).option(OPTION_FLAGS.quiet, OPTION_DESCRIPTIONS.quiet, defaults?.quiet).option(OPTION_FLAGS.logLlmRequests, OPTION_DESCRIPTIONS.logLlmRequests, defaults?.["log-llm-requests"]).option(OPTION_FLAGS.inputImage, OPTION_DESCRIPTIONS.inputImage).option(OPTION_FLAGS.inputAudio, OPTION_DESCRIPTIONS.inputAudio).option(OPTION_FLAGS.docker, OPTION_DESCRIPTIONS.docker).option(OPTION_FLAGS.dockerRo, OPTION_DESCRIPTIONS.dockerRo).option(OPTION_FLAGS.noDocker, OPTION_DESCRIPTIONS.noDocker).option(OPTION_FLAGS.dockerDev, OPTION_DESCRIPTIONS.dockerDev);
|
|
8926
10961
|
}
|
|
8927
10962
|
function configToCompleteOptions(config) {
|
|
8928
10963
|
const result = {};
|
|
@@ -8989,7 +11024,7 @@ var DEV_SOURCE_MOUNT_TARGET = "/llmist-src";
|
|
|
8989
11024
|
// src/cli/config.ts
|
|
8990
11025
|
var import_node_fs8 = require("fs");
|
|
8991
11026
|
var import_node_os2 = require("os");
|
|
8992
|
-
var
|
|
11027
|
+
var import_node_path9 = require("path");
|
|
8993
11028
|
var import_js_toml = require("js-toml");
|
|
8994
11029
|
|
|
8995
11030
|
// src/cli/templates.ts
|
|
@@ -9127,6 +11162,22 @@ var AGENT_CONFIG_KEYS = /* @__PURE__ */ new Set([
|
|
|
9127
11162
|
"docker-cwd-permission"
|
|
9128
11163
|
// Override CWD mount permission for this profile
|
|
9129
11164
|
]);
|
|
11165
|
+
var IMAGE_CONFIG_KEYS = /* @__PURE__ */ new Set([
|
|
11166
|
+
"model",
|
|
11167
|
+
"size",
|
|
11168
|
+
"quality",
|
|
11169
|
+
"count",
|
|
11170
|
+
"output",
|
|
11171
|
+
"quiet"
|
|
11172
|
+
]);
|
|
11173
|
+
var SPEECH_CONFIG_KEYS = /* @__PURE__ */ new Set([
|
|
11174
|
+
"model",
|
|
11175
|
+
"voice",
|
|
11176
|
+
"format",
|
|
11177
|
+
"speed",
|
|
11178
|
+
"output",
|
|
11179
|
+
"quiet"
|
|
11180
|
+
]);
|
|
9130
11181
|
var CUSTOM_CONFIG_KEYS = /* @__PURE__ */ new Set([
|
|
9131
11182
|
...COMPLETE_CONFIG_KEYS,
|
|
9132
11183
|
...AGENT_CONFIG_KEYS,
|
|
@@ -9134,7 +11185,7 @@ var CUSTOM_CONFIG_KEYS = /* @__PURE__ */ new Set([
|
|
|
9134
11185
|
"description"
|
|
9135
11186
|
]);
|
|
9136
11187
|
function getConfigPath() {
|
|
9137
|
-
return (0,
|
|
11188
|
+
return (0, import_node_path9.join)((0, import_node_os2.homedir)(), ".llmist", "cli.toml");
|
|
9138
11189
|
}
|
|
9139
11190
|
var ConfigError = class extends Error {
|
|
9140
11191
|
constructor(message, path5) {
|
|
@@ -9387,6 +11438,75 @@ function validateAgentConfig(raw, section) {
|
|
|
9387
11438
|
}
|
|
9388
11439
|
return result;
|
|
9389
11440
|
}
|
|
11441
|
+
function validateImageConfig(raw, section) {
|
|
11442
|
+
if (typeof raw !== "object" || raw === null) {
|
|
11443
|
+
throw new ConfigError(`[${section}] must be a table`);
|
|
11444
|
+
}
|
|
11445
|
+
const rawObj = raw;
|
|
11446
|
+
for (const key of Object.keys(rawObj)) {
|
|
11447
|
+
if (!IMAGE_CONFIG_KEYS.has(key)) {
|
|
11448
|
+
throw new ConfigError(`[${section}].${key} is not a valid option`);
|
|
11449
|
+
}
|
|
11450
|
+
}
|
|
11451
|
+
const result = {};
|
|
11452
|
+
if ("model" in rawObj) {
|
|
11453
|
+
result.model = validateString(rawObj.model, "model", section);
|
|
11454
|
+
}
|
|
11455
|
+
if ("size" in rawObj) {
|
|
11456
|
+
result.size = validateString(rawObj.size, "size", section);
|
|
11457
|
+
}
|
|
11458
|
+
if ("quality" in rawObj) {
|
|
11459
|
+
result.quality = validateString(rawObj.quality, "quality", section);
|
|
11460
|
+
}
|
|
11461
|
+
if ("count" in rawObj) {
|
|
11462
|
+
result.count = validateNumber(rawObj.count, "count", section, {
|
|
11463
|
+
integer: true,
|
|
11464
|
+
min: 1,
|
|
11465
|
+
max: 10
|
|
11466
|
+
});
|
|
11467
|
+
}
|
|
11468
|
+
if ("output" in rawObj) {
|
|
11469
|
+
result.output = validateString(rawObj.output, "output", section);
|
|
11470
|
+
}
|
|
11471
|
+
if ("quiet" in rawObj) {
|
|
11472
|
+
result.quiet = validateBoolean(rawObj.quiet, "quiet", section);
|
|
11473
|
+
}
|
|
11474
|
+
return result;
|
|
11475
|
+
}
|
|
11476
|
+
function validateSpeechConfig(raw, section) {
|
|
11477
|
+
if (typeof raw !== "object" || raw === null) {
|
|
11478
|
+
throw new ConfigError(`[${section}] must be a table`);
|
|
11479
|
+
}
|
|
11480
|
+
const rawObj = raw;
|
|
11481
|
+
for (const key of Object.keys(rawObj)) {
|
|
11482
|
+
if (!SPEECH_CONFIG_KEYS.has(key)) {
|
|
11483
|
+
throw new ConfigError(`[${section}].${key} is not a valid option`);
|
|
11484
|
+
}
|
|
11485
|
+
}
|
|
11486
|
+
const result = {};
|
|
11487
|
+
if ("model" in rawObj) {
|
|
11488
|
+
result.model = validateString(rawObj.model, "model", section);
|
|
11489
|
+
}
|
|
11490
|
+
if ("voice" in rawObj) {
|
|
11491
|
+
result.voice = validateString(rawObj.voice, "voice", section);
|
|
11492
|
+
}
|
|
11493
|
+
if ("format" in rawObj) {
|
|
11494
|
+
result.format = validateString(rawObj.format, "format", section);
|
|
11495
|
+
}
|
|
11496
|
+
if ("speed" in rawObj) {
|
|
11497
|
+
result.speed = validateNumber(rawObj.speed, "speed", section, {
|
|
11498
|
+
min: 0.25,
|
|
11499
|
+
max: 4
|
|
11500
|
+
});
|
|
11501
|
+
}
|
|
11502
|
+
if ("output" in rawObj) {
|
|
11503
|
+
result.output = validateString(rawObj.output, "output", section);
|
|
11504
|
+
}
|
|
11505
|
+
if ("quiet" in rawObj) {
|
|
11506
|
+
result.quiet = validateBoolean(rawObj.quiet, "quiet", section);
|
|
11507
|
+
}
|
|
11508
|
+
return result;
|
|
11509
|
+
}
|
|
9390
11510
|
function validateStringOrBoolean(value, field, section) {
|
|
9391
11511
|
if (typeof value === "string" || typeof value === "boolean") {
|
|
9392
11512
|
return value;
|
|
@@ -9509,6 +11629,10 @@ function validateConfig(raw, configPath) {
|
|
|
9509
11629
|
result.complete = validateCompleteConfig(value, key);
|
|
9510
11630
|
} else if (key === "agent") {
|
|
9511
11631
|
result.agent = validateAgentConfig(value, key);
|
|
11632
|
+
} else if (key === "image") {
|
|
11633
|
+
result.image = validateImageConfig(value, key);
|
|
11634
|
+
} else if (key === "speech") {
|
|
11635
|
+
result.speech = validateSpeechConfig(value, key);
|
|
9512
11636
|
} else if (key === "prompts") {
|
|
9513
11637
|
result.prompts = validatePromptsConfig(value, key);
|
|
9514
11638
|
} else if (key === "docker") {
|
|
@@ -9553,7 +11677,7 @@ function loadConfig() {
|
|
|
9553
11677
|
return resolveTemplatesInConfig(inherited, configPath);
|
|
9554
11678
|
}
|
|
9555
11679
|
function getCustomCommandNames(config) {
|
|
9556
|
-
const reserved = /* @__PURE__ */ new Set(["global", "complete", "agent", "prompts", "docker"]);
|
|
11680
|
+
const reserved = /* @__PURE__ */ new Set(["global", "complete", "agent", "image", "speech", "prompts", "docker"]);
|
|
9557
11681
|
return Object.keys(config).filter((key) => !reserved.has(key));
|
|
9558
11682
|
}
|
|
9559
11683
|
function resolveTemplatesInConfig(config, configPath) {
|
|
@@ -9908,8 +12032,8 @@ function computeDockerfileHash(dockerfile) {
|
|
|
9908
12032
|
// src/cli/docker/image-manager.ts
|
|
9909
12033
|
var import_node_fs9 = require("fs");
|
|
9910
12034
|
var import_node_os3 = require("os");
|
|
9911
|
-
var
|
|
9912
|
-
var CACHE_DIR = (0,
|
|
12035
|
+
var import_node_path10 = require("path");
|
|
12036
|
+
var CACHE_DIR = (0, import_node_path10.join)((0, import_node_os3.homedir)(), ".llmist", "docker-cache");
|
|
9913
12037
|
var HASH_FILE = "image-hash.json";
|
|
9914
12038
|
function ensureCacheDir() {
|
|
9915
12039
|
if (!(0, import_node_fs9.existsSync)(CACHE_DIR)) {
|
|
@@ -9917,7 +12041,7 @@ function ensureCacheDir() {
|
|
|
9917
12041
|
}
|
|
9918
12042
|
}
|
|
9919
12043
|
function getCachedHash(imageName) {
|
|
9920
|
-
const hashPath = (0,
|
|
12044
|
+
const hashPath = (0, import_node_path10.join)(CACHE_DIR, HASH_FILE);
|
|
9921
12045
|
if (!(0, import_node_fs9.existsSync)(hashPath)) {
|
|
9922
12046
|
return void 0;
|
|
9923
12047
|
}
|
|
@@ -9931,7 +12055,7 @@ function getCachedHash(imageName) {
|
|
|
9931
12055
|
}
|
|
9932
12056
|
function setCachedHash(imageName, hash) {
|
|
9933
12057
|
ensureCacheDir();
|
|
9934
|
-
const hashPath = (0,
|
|
12058
|
+
const hashPath = (0, import_node_path10.join)(CACHE_DIR, HASH_FILE);
|
|
9935
12059
|
let cache = {};
|
|
9936
12060
|
if ((0, import_node_fs9.existsSync)(hashPath)) {
|
|
9937
12061
|
try {
|
|
@@ -9957,7 +12081,7 @@ var DockerBuildError = class extends Error {
|
|
|
9957
12081
|
};
|
|
9958
12082
|
async function buildImage(imageName, dockerfile) {
|
|
9959
12083
|
ensureCacheDir();
|
|
9960
|
-
const dockerfilePath = (0,
|
|
12084
|
+
const dockerfilePath = (0, import_node_path10.join)(CACHE_DIR, "Dockerfile");
|
|
9961
12085
|
(0, import_node_fs9.writeFileSync)(dockerfilePath, dockerfile);
|
|
9962
12086
|
const proc = Bun.spawn(
|
|
9963
12087
|
["docker", "build", "-t", imageName, "-f", dockerfilePath, CACHE_DIR],
|
|
@@ -9992,7 +12116,7 @@ async function ensureImage(imageName = DEFAULT_IMAGE_NAME, dockerfile) {
|
|
|
9992
12116
|
|
|
9993
12117
|
// src/cli/docker/docker-wrapper.ts
|
|
9994
12118
|
var import_node_fs10 = require("fs");
|
|
9995
|
-
var
|
|
12119
|
+
var import_node_path11 = require("path");
|
|
9996
12120
|
var import_node_os4 = require("os");
|
|
9997
12121
|
var DockerUnavailableError = class extends Error {
|
|
9998
12122
|
constructor() {
|
|
@@ -10038,9 +12162,9 @@ function autoDetectDevSource() {
|
|
|
10038
12162
|
if (!scriptPath || !scriptPath.endsWith("src/cli.ts")) {
|
|
10039
12163
|
return void 0;
|
|
10040
12164
|
}
|
|
10041
|
-
const srcDir = (0,
|
|
10042
|
-
const projectDir = (0,
|
|
10043
|
-
const packageJsonPath = (0,
|
|
12165
|
+
const srcDir = (0, import_node_path11.dirname)(scriptPath);
|
|
12166
|
+
const projectDir = (0, import_node_path11.dirname)(srcDir);
|
|
12167
|
+
const packageJsonPath = (0, import_node_path11.join)(projectDir, "package.json");
|
|
10044
12168
|
if (!(0, import_node_fs10.existsSync)(packageJsonPath)) {
|
|
10045
12169
|
return void 0;
|
|
10046
12170
|
}
|
|
@@ -10189,7 +12313,7 @@ function createHumanInputHandler(env, progress, keyboard) {
|
|
|
10189
12313
|
keyboard.cleanupEsc();
|
|
10190
12314
|
keyboard.cleanupEsc = null;
|
|
10191
12315
|
}
|
|
10192
|
-
const rl = (0,
|
|
12316
|
+
const rl = (0, import_promises4.createInterface)({ input: env.stdin, output: env.stdout });
|
|
10193
12317
|
try {
|
|
10194
12318
|
const questionLine = question.trim() ? `
|
|
10195
12319
|
${renderMarkdownWithSeparators(question.trim())}` : "";
|
|
@@ -10547,8 +12671,8 @@ Denied: ${result.reason ?? "by user"}`
|
|
|
10547
12671
|
builder.withTextOnlyHandler("acknowledge");
|
|
10548
12672
|
builder.withTextWithGadgetsHandler({
|
|
10549
12673
|
gadgetName: "TellUser",
|
|
10550
|
-
parameterMapping: (
|
|
10551
|
-
resultMapping: (
|
|
12674
|
+
parameterMapping: (text3) => ({ message: text3, done: false, type: "info" }),
|
|
12675
|
+
resultMapping: (text3) => `\u2139\uFE0F ${text3}`
|
|
10552
12676
|
});
|
|
10553
12677
|
builder.withTrailingMessage(
|
|
10554
12678
|
(ctx) => [
|
|
@@ -10557,7 +12681,19 @@ Denied: ${result.reason ?? "by user"}`
|
|
|
10557
12681
|
"Maximize efficiency by batching independent operations in a single response."
|
|
10558
12682
|
].join(" ")
|
|
10559
12683
|
);
|
|
10560
|
-
|
|
12684
|
+
let agent;
|
|
12685
|
+
if (options.image || options.audio) {
|
|
12686
|
+
const parts = [text(prompt)];
|
|
12687
|
+
if (options.image) {
|
|
12688
|
+
parts.push(await readImageFile(options.image));
|
|
12689
|
+
}
|
|
12690
|
+
if (options.audio) {
|
|
12691
|
+
parts.push(await readAudioFile(options.audio));
|
|
12692
|
+
}
|
|
12693
|
+
agent = builder.askWithContent(parts);
|
|
12694
|
+
} else {
|
|
12695
|
+
agent = builder.ask(prompt);
|
|
12696
|
+
}
|
|
10561
12697
|
let textBuffer = "";
|
|
10562
12698
|
const flushTextBuffer = () => {
|
|
10563
12699
|
if (textBuffer) {
|
|
@@ -10632,6 +12768,7 @@ function registerAgentCommand(program, env, config) {
|
|
|
10632
12768
|
}
|
|
10633
12769
|
|
|
10634
12770
|
// src/cli/complete-command.ts
|
|
12771
|
+
init_input_content();
|
|
10635
12772
|
init_messages();
|
|
10636
12773
|
init_model_shortcuts();
|
|
10637
12774
|
init_constants2();
|
|
@@ -10643,7 +12780,18 @@ async function executeComplete(promptArg, options, env) {
|
|
|
10643
12780
|
if (options.system) {
|
|
10644
12781
|
builder.addSystem(options.system);
|
|
10645
12782
|
}
|
|
10646
|
-
|
|
12783
|
+
if (options.image || options.audio) {
|
|
12784
|
+
const parts = [text(prompt)];
|
|
12785
|
+
if (options.image) {
|
|
12786
|
+
parts.push(await readImageFile(options.image));
|
|
12787
|
+
}
|
|
12788
|
+
if (options.audio) {
|
|
12789
|
+
parts.push(await readAudioFile(options.audio));
|
|
12790
|
+
}
|
|
12791
|
+
builder.addUserMultimodal(parts);
|
|
12792
|
+
} else {
|
|
12793
|
+
builder.addUser(prompt);
|
|
12794
|
+
}
|
|
10647
12795
|
const messages = builder.build();
|
|
10648
12796
|
const llmLogsBaseDir = resolveLogDir(options.logLlmRequests, "requests");
|
|
10649
12797
|
let llmSessionDir;
|
|
@@ -10718,7 +12866,7 @@ init_schema_to_json();
|
|
|
10718
12866
|
init_schema_validator();
|
|
10719
12867
|
|
|
10720
12868
|
// src/cli/gadget-prompts.ts
|
|
10721
|
-
var
|
|
12869
|
+
var import_promises5 = require("readline/promises");
|
|
10722
12870
|
var import_chalk6 = __toESM(require("chalk"), 1);
|
|
10723
12871
|
init_schema_to_json();
|
|
10724
12872
|
async function promptForParameters(schema, ctx) {
|
|
@@ -10729,7 +12877,7 @@ async function promptForParameters(schema, ctx) {
|
|
|
10729
12877
|
if (!jsonSchema.properties || Object.keys(jsonSchema.properties).length === 0) {
|
|
10730
12878
|
return {};
|
|
10731
12879
|
}
|
|
10732
|
-
const rl = (0,
|
|
12880
|
+
const rl = (0, import_promises5.createInterface)({ input: ctx.stdin, output: ctx.stdout });
|
|
10733
12881
|
const params = {};
|
|
10734
12882
|
try {
|
|
10735
12883
|
for (const [key, prop] of Object.entries(jsonSchema.properties)) {
|
|
@@ -11148,19 +13296,118 @@ function registerGadgetCommand(program, env) {
|
|
|
11148
13296
|
);
|
|
11149
13297
|
}
|
|
11150
13298
|
|
|
13299
|
+
// src/cli/image-command.ts
|
|
13300
|
+
var import_node_fs11 = require("fs");
|
|
13301
|
+
var DEFAULT_IMAGE_MODEL = "dall-e-3";
|
|
13302
|
+
async function executeImage(promptArg, options, env) {
|
|
13303
|
+
const prompt = await resolvePrompt(promptArg, env);
|
|
13304
|
+
const client = env.createClient();
|
|
13305
|
+
const model = options.model;
|
|
13306
|
+
const n = options.count ? Number.parseInt(options.count, 10) : 1;
|
|
13307
|
+
const stderrTTY = env.stderr.isTTY === true;
|
|
13308
|
+
if (!options.quiet && stderrTTY) {
|
|
13309
|
+
env.stderr.write(`${SUMMARY_PREFIX} Generating image with ${model}...
|
|
13310
|
+
`);
|
|
13311
|
+
}
|
|
13312
|
+
const result = await client.image.generate({
|
|
13313
|
+
model,
|
|
13314
|
+
prompt,
|
|
13315
|
+
size: options.size,
|
|
13316
|
+
quality: options.quality,
|
|
13317
|
+
n,
|
|
13318
|
+
responseFormat: options.output ? "b64_json" : "url"
|
|
13319
|
+
});
|
|
13320
|
+
if (options.output) {
|
|
13321
|
+
const imageData = result.images[0];
|
|
13322
|
+
if (imageData.b64Json) {
|
|
13323
|
+
const buffer = Buffer.from(imageData.b64Json, "base64");
|
|
13324
|
+
(0, import_node_fs11.writeFileSync)(options.output, buffer);
|
|
13325
|
+
if (!options.quiet) {
|
|
13326
|
+
env.stderr.write(`${SUMMARY_PREFIX} Image saved to ${options.output}
|
|
13327
|
+
`);
|
|
13328
|
+
}
|
|
13329
|
+
} else if (imageData.url) {
|
|
13330
|
+
env.stdout.write(`${imageData.url}
|
|
13331
|
+
`);
|
|
13332
|
+
}
|
|
13333
|
+
} else {
|
|
13334
|
+
for (const image of result.images) {
|
|
13335
|
+
if (image.url) {
|
|
13336
|
+
env.stdout.write(`${image.url}
|
|
13337
|
+
`);
|
|
13338
|
+
} else if (image.b64Json) {
|
|
13339
|
+
env.stdout.write(image.b64Json);
|
|
13340
|
+
}
|
|
13341
|
+
}
|
|
13342
|
+
}
|
|
13343
|
+
if (!options.quiet && stderrTTY) {
|
|
13344
|
+
const parts = [
|
|
13345
|
+
`${result.images.length} image(s)`,
|
|
13346
|
+
`size: ${result.usage.size}`,
|
|
13347
|
+
`quality: ${result.usage.quality}`
|
|
13348
|
+
];
|
|
13349
|
+
if (result.cost !== void 0) {
|
|
13350
|
+
parts.push(`cost: ${formatCost(result.cost)}`);
|
|
13351
|
+
}
|
|
13352
|
+
env.stderr.write(`${SUMMARY_PREFIX} ${parts.join(" | ")}
|
|
13353
|
+
`);
|
|
13354
|
+
}
|
|
13355
|
+
}
|
|
13356
|
+
function registerImageCommand(program, env, config) {
|
|
13357
|
+
program.command(COMMANDS.image).description("Generate images from a text prompt.").argument("[prompt]", "Image generation prompt. If omitted, stdin is used when available.").option(
|
|
13358
|
+
OPTION_FLAGS.model,
|
|
13359
|
+
OPTION_DESCRIPTIONS.model,
|
|
13360
|
+
config?.model ?? DEFAULT_IMAGE_MODEL
|
|
13361
|
+
).option(OPTION_FLAGS.imageSize, OPTION_DESCRIPTIONS.imageSize, config?.size).option(OPTION_FLAGS.imageQuality, OPTION_DESCRIPTIONS.imageQuality, config?.quality).option(OPTION_FLAGS.imageCount, OPTION_DESCRIPTIONS.imageCount, config?.count?.toString()).option(OPTION_FLAGS.imageOutput, OPTION_DESCRIPTIONS.imageOutput, config?.output).option(OPTION_FLAGS.quiet, OPTION_DESCRIPTIONS.quiet, config?.quiet ?? false).action(
|
|
13362
|
+
(prompt, options) => executeAction(() => executeImage(prompt, options, env), env)
|
|
13363
|
+
);
|
|
13364
|
+
}
|
|
13365
|
+
|
|
11151
13366
|
// src/cli/models-command.ts
|
|
11152
13367
|
var import_chalk8 = __toESM(require("chalk"), 1);
|
|
11153
13368
|
init_model_shortcuts();
|
|
11154
13369
|
async function handleModelsCommand(options, env) {
|
|
11155
13370
|
const client = env.createClient();
|
|
11156
|
-
const
|
|
13371
|
+
const showText = options.all || options.text || !options.image && !options.speech;
|
|
13372
|
+
const showImage = options.all || options.image;
|
|
13373
|
+
const showSpeech = options.all || options.speech;
|
|
13374
|
+
const textModels = showText ? client.modelRegistry.listModels(options.provider) : [];
|
|
13375
|
+
const imageModels = showImage ? client.image.listModels().filter((m) => !options.provider || m.provider === options.provider) : [];
|
|
13376
|
+
const speechModels = showSpeech ? client.speech.listModels().filter((m) => !options.provider || m.provider === options.provider) : [];
|
|
11157
13377
|
if (options.format === "json") {
|
|
11158
|
-
renderJSON(
|
|
13378
|
+
renderJSON(textModels, imageModels, speechModels, env.stdout);
|
|
11159
13379
|
} else {
|
|
11160
|
-
|
|
13380
|
+
renderAllTables(textModels, imageModels, speechModels, options.verbose || false, env.stdout);
|
|
13381
|
+
}
|
|
13382
|
+
}
|
|
13383
|
+
function renderAllTables(textModels, imageModels, speechModels, verbose, stream2) {
|
|
13384
|
+
const hasAnyModels = textModels.length > 0 || imageModels.length > 0 || speechModels.length > 0;
|
|
13385
|
+
if (!hasAnyModels) {
|
|
13386
|
+
stream2.write(import_chalk8.default.yellow("\nNo models found matching the specified criteria.\n\n"));
|
|
13387
|
+
return;
|
|
13388
|
+
}
|
|
13389
|
+
stream2.write(import_chalk8.default.bold.cyan("\nAvailable Models\n"));
|
|
13390
|
+
stream2.write(import_chalk8.default.cyan("=".repeat(80)) + "\n\n");
|
|
13391
|
+
if (textModels.length > 0) {
|
|
13392
|
+
renderTextTable(textModels, verbose, stream2);
|
|
13393
|
+
}
|
|
13394
|
+
if (imageModels.length > 0) {
|
|
13395
|
+
renderImageTable(imageModels, verbose, stream2);
|
|
13396
|
+
}
|
|
13397
|
+
if (speechModels.length > 0) {
|
|
13398
|
+
renderSpeechTable(speechModels, verbose, stream2);
|
|
13399
|
+
}
|
|
13400
|
+
if (textModels.length > 0) {
|
|
13401
|
+
stream2.write(import_chalk8.default.bold.magenta("Model Shortcuts\n"));
|
|
13402
|
+
stream2.write(import_chalk8.default.dim("\u2500".repeat(80)) + "\n");
|
|
13403
|
+
const shortcuts = Object.entries(MODEL_ALIASES).sort((a, b) => a[0].localeCompare(b[0]));
|
|
13404
|
+
for (const [shortcut, fullName] of shortcuts) {
|
|
13405
|
+
stream2.write(import_chalk8.default.cyan(` ${shortcut.padEnd(15)}`) + import_chalk8.default.dim(" \u2192 ") + import_chalk8.default.white(fullName) + "\n");
|
|
13406
|
+
}
|
|
13407
|
+
stream2.write("\n");
|
|
11161
13408
|
}
|
|
11162
13409
|
}
|
|
11163
|
-
function
|
|
13410
|
+
function renderTextTable(models, verbose, stream2) {
|
|
11164
13411
|
const grouped = /* @__PURE__ */ new Map();
|
|
11165
13412
|
for (const model of models) {
|
|
11166
13413
|
const provider = model.provider;
|
|
@@ -11169,13 +13416,13 @@ function renderTable(models, verbose, stream2) {
|
|
|
11169
13416
|
}
|
|
11170
13417
|
grouped.get(provider).push(model);
|
|
11171
13418
|
}
|
|
11172
|
-
stream2.write(import_chalk8.default.bold.
|
|
11173
|
-
stream2.write(import_chalk8.default.
|
|
13419
|
+
stream2.write(import_chalk8.default.bold.blue("\u{1F4DD} Text/LLM Models\n"));
|
|
13420
|
+
stream2.write(import_chalk8.default.dim("\u2500".repeat(80)) + "\n\n");
|
|
11174
13421
|
const providers = Array.from(grouped.keys()).sort();
|
|
11175
13422
|
for (const provider of providers) {
|
|
11176
13423
|
const providerModels = grouped.get(provider);
|
|
11177
13424
|
const providerName = provider.charAt(0).toUpperCase() + provider.slice(1);
|
|
11178
|
-
stream2.write(import_chalk8.default.bold.yellow(`${providerName}
|
|
13425
|
+
stream2.write(import_chalk8.default.bold.yellow(`${providerName}
|
|
11179
13426
|
`));
|
|
11180
13427
|
if (verbose) {
|
|
11181
13428
|
renderVerboseTable(providerModels, stream2);
|
|
@@ -11184,13 +13431,6 @@ function renderTable(models, verbose, stream2) {
|
|
|
11184
13431
|
}
|
|
11185
13432
|
stream2.write("\n");
|
|
11186
13433
|
}
|
|
11187
|
-
stream2.write(import_chalk8.default.bold.magenta("Model Shortcuts\n"));
|
|
11188
|
-
stream2.write(import_chalk8.default.dim("\u2500".repeat(80)) + "\n");
|
|
11189
|
-
const shortcuts = Object.entries(MODEL_ALIASES).sort((a, b) => a[0].localeCompare(b[0]));
|
|
11190
|
-
for (const [shortcut, fullName] of shortcuts) {
|
|
11191
|
-
stream2.write(import_chalk8.default.cyan(` ${shortcut.padEnd(15)}`) + import_chalk8.default.dim(" \u2192 ") + import_chalk8.default.white(fullName) + "\n");
|
|
11192
|
-
}
|
|
11193
|
-
stream2.write("\n");
|
|
11194
13434
|
}
|
|
11195
13435
|
function renderCompactTable(models, stream2) {
|
|
11196
13436
|
const idWidth = 25;
|
|
@@ -11267,9 +13507,171 @@ function renderVerboseTable(models, stream2) {
|
|
|
11267
13507
|
}
|
|
11268
13508
|
stream2.write("\n");
|
|
11269
13509
|
}
|
|
11270
|
-
function
|
|
11271
|
-
|
|
11272
|
-
|
|
13510
|
+
function renderImageTable(models, verbose, stream2) {
|
|
13511
|
+
stream2.write(import_chalk8.default.bold.green("\u{1F3A8} Image Generation Models\n"));
|
|
13512
|
+
stream2.write(import_chalk8.default.dim("\u2500".repeat(80)) + "\n\n");
|
|
13513
|
+
const grouped = /* @__PURE__ */ new Map();
|
|
13514
|
+
for (const model of models) {
|
|
13515
|
+
if (!grouped.has(model.provider)) {
|
|
13516
|
+
grouped.set(model.provider, []);
|
|
13517
|
+
}
|
|
13518
|
+
grouped.get(model.provider).push(model);
|
|
13519
|
+
}
|
|
13520
|
+
for (const [provider, providerModels] of Array.from(grouped.entries()).sort()) {
|
|
13521
|
+
const providerName = provider.charAt(0).toUpperCase() + provider.slice(1);
|
|
13522
|
+
stream2.write(import_chalk8.default.bold.yellow(`${providerName}
|
|
13523
|
+
`));
|
|
13524
|
+
if (verbose) {
|
|
13525
|
+
for (const model of providerModels) {
|
|
13526
|
+
stream2.write(import_chalk8.default.bold.green(`
|
|
13527
|
+
${model.modelId}
|
|
13528
|
+
`));
|
|
13529
|
+
stream2.write(import_chalk8.default.dim(" " + "\u2500".repeat(60)) + "\n");
|
|
13530
|
+
stream2.write(` ${import_chalk8.default.dim("Name:")} ${import_chalk8.default.white(model.displayName)}
|
|
13531
|
+
`);
|
|
13532
|
+
stream2.write(` ${import_chalk8.default.dim("Sizes:")} ${import_chalk8.default.yellow(model.supportedSizes.join(", "))}
|
|
13533
|
+
`);
|
|
13534
|
+
if (model.supportedQualities) {
|
|
13535
|
+
stream2.write(` ${import_chalk8.default.dim("Qualities:")} ${import_chalk8.default.yellow(model.supportedQualities.join(", "))}
|
|
13536
|
+
`);
|
|
13537
|
+
}
|
|
13538
|
+
stream2.write(` ${import_chalk8.default.dim("Max Images:")} ${import_chalk8.default.yellow(model.maxImages.toString())}
|
|
13539
|
+
`);
|
|
13540
|
+
stream2.write(` ${import_chalk8.default.dim("Pricing:")} ${import_chalk8.default.cyan(formatImagePrice(model))}
|
|
13541
|
+
`);
|
|
13542
|
+
if (model.features) {
|
|
13543
|
+
const features = [];
|
|
13544
|
+
if (model.features.textRendering) features.push("text-rendering");
|
|
13545
|
+
if (model.features.transparency) features.push("transparency");
|
|
13546
|
+
if (model.features.conversational) features.push("conversational");
|
|
13547
|
+
if (features.length > 0) {
|
|
13548
|
+
stream2.write(` ${import_chalk8.default.dim("Features:")} ${import_chalk8.default.blue(features.join(", "))}
|
|
13549
|
+
`);
|
|
13550
|
+
}
|
|
13551
|
+
}
|
|
13552
|
+
}
|
|
13553
|
+
} else {
|
|
13554
|
+
const idWidth = 32;
|
|
13555
|
+
const nameWidth = 25;
|
|
13556
|
+
const sizesWidth = 20;
|
|
13557
|
+
const priceWidth = 15;
|
|
13558
|
+
stream2.write(import_chalk8.default.dim("\u2500".repeat(idWidth + nameWidth + sizesWidth + priceWidth + 6)) + "\n");
|
|
13559
|
+
stream2.write(
|
|
13560
|
+
import_chalk8.default.bold(
|
|
13561
|
+
"Model ID".padEnd(idWidth) + " " + "Display Name".padEnd(nameWidth) + " " + "Sizes".padEnd(sizesWidth) + " " + "Price".padEnd(priceWidth)
|
|
13562
|
+
) + "\n"
|
|
13563
|
+
);
|
|
13564
|
+
stream2.write(import_chalk8.default.dim("\u2500".repeat(idWidth + nameWidth + sizesWidth + priceWidth + 6)) + "\n");
|
|
13565
|
+
for (const model of providerModels) {
|
|
13566
|
+
const sizes = model.supportedSizes.length > 2 ? model.supportedSizes.slice(0, 2).join(", ") + "..." : model.supportedSizes.join(", ");
|
|
13567
|
+
stream2.write(
|
|
13568
|
+
import_chalk8.default.green(model.modelId.padEnd(idWidth)) + " " + import_chalk8.default.white(model.displayName.substring(0, nameWidth - 1).padEnd(nameWidth)) + " " + import_chalk8.default.yellow(sizes.padEnd(sizesWidth)) + " " + import_chalk8.default.cyan(formatImagePrice(model).padEnd(priceWidth)) + "\n"
|
|
13569
|
+
);
|
|
13570
|
+
}
|
|
13571
|
+
stream2.write(import_chalk8.default.dim("\u2500".repeat(idWidth + nameWidth + sizesWidth + priceWidth + 6)) + "\n");
|
|
13572
|
+
}
|
|
13573
|
+
stream2.write("\n");
|
|
13574
|
+
}
|
|
13575
|
+
}
|
|
13576
|
+
function renderSpeechTable(models, verbose, stream2) {
|
|
13577
|
+
stream2.write(import_chalk8.default.bold.magenta("\u{1F3A4} Speech (TTS) Models\n"));
|
|
13578
|
+
stream2.write(import_chalk8.default.dim("\u2500".repeat(80)) + "\n\n");
|
|
13579
|
+
const grouped = /* @__PURE__ */ new Map();
|
|
13580
|
+
for (const model of models) {
|
|
13581
|
+
if (!grouped.has(model.provider)) {
|
|
13582
|
+
grouped.set(model.provider, []);
|
|
13583
|
+
}
|
|
13584
|
+
grouped.get(model.provider).push(model);
|
|
13585
|
+
}
|
|
13586
|
+
for (const [provider, providerModels] of Array.from(grouped.entries()).sort()) {
|
|
13587
|
+
const providerName = provider.charAt(0).toUpperCase() + provider.slice(1);
|
|
13588
|
+
stream2.write(import_chalk8.default.bold.yellow(`${providerName}
|
|
13589
|
+
`));
|
|
13590
|
+
if (verbose) {
|
|
13591
|
+
for (const model of providerModels) {
|
|
13592
|
+
stream2.write(import_chalk8.default.bold.green(`
|
|
13593
|
+
${model.modelId}
|
|
13594
|
+
`));
|
|
13595
|
+
stream2.write(import_chalk8.default.dim(" " + "\u2500".repeat(60)) + "\n");
|
|
13596
|
+
stream2.write(` ${import_chalk8.default.dim("Name:")} ${import_chalk8.default.white(model.displayName)}
|
|
13597
|
+
`);
|
|
13598
|
+
stream2.write(` ${import_chalk8.default.dim("Voices:")} ${import_chalk8.default.yellow(model.voices.length.toString())} voices
|
|
13599
|
+
`);
|
|
13600
|
+
if (model.voices.length <= 6) {
|
|
13601
|
+
stream2.write(` ${import_chalk8.default.dim(model.voices.join(", "))}
|
|
13602
|
+
`);
|
|
13603
|
+
} else {
|
|
13604
|
+
stream2.write(` ${import_chalk8.default.dim(model.voices.slice(0, 6).join(", ") + "...")}
|
|
13605
|
+
`);
|
|
13606
|
+
}
|
|
13607
|
+
stream2.write(` ${import_chalk8.default.dim("Formats:")} ${import_chalk8.default.yellow(model.formats.join(", "))}
|
|
13608
|
+
`);
|
|
13609
|
+
stream2.write(` ${import_chalk8.default.dim("Max Input:")} ${import_chalk8.default.yellow(model.maxInputLength.toString())} chars
|
|
13610
|
+
`);
|
|
13611
|
+
stream2.write(` ${import_chalk8.default.dim("Pricing:")} ${import_chalk8.default.cyan(formatSpeechPrice(model))}
|
|
13612
|
+
`);
|
|
13613
|
+
if (model.features) {
|
|
13614
|
+
const features = [];
|
|
13615
|
+
if (model.features.multiSpeaker) features.push("multi-speaker");
|
|
13616
|
+
if (model.features.voiceInstructions) features.push("voice-instructions");
|
|
13617
|
+
if (model.features.languages) features.push(`${model.features.languages} languages`);
|
|
13618
|
+
if (features.length > 0) {
|
|
13619
|
+
stream2.write(` ${import_chalk8.default.dim("Features:")} ${import_chalk8.default.blue(features.join(", "))}
|
|
13620
|
+
`);
|
|
13621
|
+
}
|
|
13622
|
+
}
|
|
13623
|
+
}
|
|
13624
|
+
} else {
|
|
13625
|
+
const idWidth = 30;
|
|
13626
|
+
const nameWidth = 28;
|
|
13627
|
+
const voicesWidth = 12;
|
|
13628
|
+
const priceWidth = 18;
|
|
13629
|
+
stream2.write(import_chalk8.default.dim("\u2500".repeat(idWidth + nameWidth + voicesWidth + priceWidth + 6)) + "\n");
|
|
13630
|
+
stream2.write(
|
|
13631
|
+
import_chalk8.default.bold(
|
|
13632
|
+
"Model ID".padEnd(idWidth) + " " + "Display Name".padEnd(nameWidth) + " " + "Voices".padEnd(voicesWidth) + " " + "Price".padEnd(priceWidth)
|
|
13633
|
+
) + "\n"
|
|
13634
|
+
);
|
|
13635
|
+
stream2.write(import_chalk8.default.dim("\u2500".repeat(idWidth + nameWidth + voicesWidth + priceWidth + 6)) + "\n");
|
|
13636
|
+
for (const model of providerModels) {
|
|
13637
|
+
stream2.write(
|
|
13638
|
+
import_chalk8.default.green(model.modelId.padEnd(idWidth)) + " " + import_chalk8.default.white(model.displayName.substring(0, nameWidth - 1).padEnd(nameWidth)) + " " + import_chalk8.default.yellow(`${model.voices.length} voices`.padEnd(voicesWidth)) + " " + import_chalk8.default.cyan(formatSpeechPrice(model).padEnd(priceWidth)) + "\n"
|
|
13639
|
+
);
|
|
13640
|
+
}
|
|
13641
|
+
stream2.write(import_chalk8.default.dim("\u2500".repeat(idWidth + nameWidth + voicesWidth + priceWidth + 6)) + "\n");
|
|
13642
|
+
}
|
|
13643
|
+
stream2.write("\n");
|
|
13644
|
+
}
|
|
13645
|
+
}
|
|
13646
|
+
function formatImagePrice(model) {
|
|
13647
|
+
if (model.pricing.perImage !== void 0) {
|
|
13648
|
+
return `$${model.pricing.perImage.toFixed(2)}/img`;
|
|
13649
|
+
}
|
|
13650
|
+
if (model.pricing.bySize) {
|
|
13651
|
+
const prices = Object.values(model.pricing.bySize);
|
|
13652
|
+
const minPrice = Math.min(...prices.flatMap((p) => typeof p === "number" ? [p] : Object.values(p)));
|
|
13653
|
+
const maxPrice = Math.max(...prices.flatMap((p) => typeof p === "number" ? [p] : Object.values(p)));
|
|
13654
|
+
if (minPrice === maxPrice) {
|
|
13655
|
+
return `$${minPrice.toFixed(2)}/img`;
|
|
13656
|
+
}
|
|
13657
|
+
return `$${minPrice.toFixed(2)}-${maxPrice.toFixed(2)}`;
|
|
13658
|
+
}
|
|
13659
|
+
return "varies";
|
|
13660
|
+
}
|
|
13661
|
+
function formatSpeechPrice(model) {
|
|
13662
|
+
if (model.pricing.perCharacter !== void 0) {
|
|
13663
|
+
const perMillion = model.pricing.perCharacter * 1e6;
|
|
13664
|
+
return `$${perMillion.toFixed(0)}/1M chars`;
|
|
13665
|
+
}
|
|
13666
|
+
if (model.pricing.perMinute !== void 0) {
|
|
13667
|
+
return `~$${model.pricing.perMinute.toFixed(2)}/min`;
|
|
13668
|
+
}
|
|
13669
|
+
return "varies";
|
|
13670
|
+
}
|
|
13671
|
+
function renderJSON(textModels, imageModels, speechModels, stream2) {
|
|
13672
|
+
const output = {};
|
|
13673
|
+
if (textModels.length > 0) {
|
|
13674
|
+
output.textModels = textModels.map((model) => ({
|
|
11273
13675
|
provider: model.provider,
|
|
11274
13676
|
modelId: model.modelId,
|
|
11275
13677
|
displayName: model.displayName,
|
|
@@ -11285,9 +13687,33 @@ function renderJSON(models, stream2) {
|
|
|
11285
13687
|
knowledgeCutoff: model.knowledgeCutoff,
|
|
11286
13688
|
features: model.features,
|
|
11287
13689
|
metadata: model.metadata
|
|
11288
|
-
}))
|
|
11289
|
-
shortcuts
|
|
11290
|
-
}
|
|
13690
|
+
}));
|
|
13691
|
+
output.shortcuts = MODEL_ALIASES;
|
|
13692
|
+
}
|
|
13693
|
+
if (imageModels.length > 0) {
|
|
13694
|
+
output.imageModels = imageModels.map((model) => ({
|
|
13695
|
+
provider: model.provider,
|
|
13696
|
+
modelId: model.modelId,
|
|
13697
|
+
displayName: model.displayName,
|
|
13698
|
+
supportedSizes: model.supportedSizes,
|
|
13699
|
+
supportedQualities: model.supportedQualities,
|
|
13700
|
+
maxImages: model.maxImages,
|
|
13701
|
+
pricing: model.pricing,
|
|
13702
|
+
features: model.features
|
|
13703
|
+
}));
|
|
13704
|
+
}
|
|
13705
|
+
if (speechModels.length > 0) {
|
|
13706
|
+
output.speechModels = speechModels.map((model) => ({
|
|
13707
|
+
provider: model.provider,
|
|
13708
|
+
modelId: model.modelId,
|
|
13709
|
+
displayName: model.displayName,
|
|
13710
|
+
voices: model.voices,
|
|
13711
|
+
formats: model.formats,
|
|
13712
|
+
maxInputLength: model.maxInputLength,
|
|
13713
|
+
pricing: model.pricing,
|
|
13714
|
+
features: model.features
|
|
13715
|
+
}));
|
|
13716
|
+
}
|
|
11291
13717
|
stream2.write(JSON.stringify(output, null, 2) + "\n");
|
|
11292
13718
|
}
|
|
11293
13719
|
function formatTokens2(count) {
|
|
@@ -11300,7 +13726,7 @@ function formatTokens2(count) {
|
|
|
11300
13726
|
}
|
|
11301
13727
|
}
|
|
11302
13728
|
function registerModelsCommand(program, env) {
|
|
11303
|
-
program.command(COMMANDS.models).description("List
|
|
13729
|
+
program.command(COMMANDS.models).description("List available models with pricing and capabilities.").option("--provider <name>", "Filter by provider (openai, anthropic, gemini)").option("--format <format>", "Output format: table or json", "table").option("--verbose", "Show detailed model information", false).option("--text", "Show text/LLM models (default if no type specified)").option("--image", "Show image generation models").option("--speech", "Show speech/TTS models").option("--all", "Show all model types (text, image, speech)").action(
|
|
11304
13730
|
(options) => executeAction(
|
|
11305
13731
|
() => handleModelsCommand(options, env),
|
|
11306
13732
|
env
|
|
@@ -11308,6 +13734,96 @@ function registerModelsCommand(program, env) {
|
|
|
11308
13734
|
);
|
|
11309
13735
|
}
|
|
11310
13736
|
|
|
13737
|
+
// src/cli/speech-command.ts
|
|
13738
|
+
var import_node_fs12 = require("fs");
|
|
13739
|
+
var DEFAULT_SPEECH_MODEL = "tts-1";
|
|
13740
|
+
var DEFAULT_VOICE = "nova";
|
|
13741
|
+
async function executeSpeech(textArg, options, env) {
|
|
13742
|
+
const text3 = await resolvePrompt(textArg, env);
|
|
13743
|
+
const client = env.createClient();
|
|
13744
|
+
const model = options.model;
|
|
13745
|
+
const voice = options.voice ?? DEFAULT_VOICE;
|
|
13746
|
+
const speed = options.speed ? Number.parseFloat(options.speed) : void 0;
|
|
13747
|
+
const stderrTTY = env.stderr.isTTY === true;
|
|
13748
|
+
if (!options.quiet && stderrTTY) {
|
|
13749
|
+
env.stderr.write(`${SUMMARY_PREFIX} Generating speech with ${model} (voice: ${voice})...
|
|
13750
|
+
`);
|
|
13751
|
+
}
|
|
13752
|
+
const result = await client.speech.generate({
|
|
13753
|
+
model,
|
|
13754
|
+
input: text3,
|
|
13755
|
+
voice,
|
|
13756
|
+
responseFormat: options.format,
|
|
13757
|
+
speed
|
|
13758
|
+
});
|
|
13759
|
+
const audioBuffer = Buffer.from(result.audio);
|
|
13760
|
+
if (options.output) {
|
|
13761
|
+
(0, import_node_fs12.writeFileSync)(options.output, audioBuffer);
|
|
13762
|
+
if (!options.quiet) {
|
|
13763
|
+
env.stderr.write(`${SUMMARY_PREFIX} Audio saved to ${options.output}
|
|
13764
|
+
`);
|
|
13765
|
+
}
|
|
13766
|
+
} else {
|
|
13767
|
+
env.stdout.write(audioBuffer);
|
|
13768
|
+
}
|
|
13769
|
+
if (!options.quiet && stderrTTY) {
|
|
13770
|
+
const parts = [
|
|
13771
|
+
`${result.usage.characterCount} characters`,
|
|
13772
|
+
`format: ${result.format}`
|
|
13773
|
+
];
|
|
13774
|
+
if (result.cost !== void 0) {
|
|
13775
|
+
parts.push(`cost: ${formatCost(result.cost)}`);
|
|
13776
|
+
}
|
|
13777
|
+
env.stderr.write(`${SUMMARY_PREFIX} ${parts.join(" | ")}
|
|
13778
|
+
`);
|
|
13779
|
+
}
|
|
13780
|
+
}
|
|
13781
|
+
function registerSpeechCommand(program, env, config) {
|
|
13782
|
+
program.command(COMMANDS.speech).description("Generate speech audio from text.").argument("[text]", "Text to convert to speech. If omitted, stdin is used when available.").option(
|
|
13783
|
+
OPTION_FLAGS.model,
|
|
13784
|
+
OPTION_DESCRIPTIONS.model,
|
|
13785
|
+
config?.model ?? DEFAULT_SPEECH_MODEL
|
|
13786
|
+
).option(OPTION_FLAGS.voice, OPTION_DESCRIPTIONS.voice, config?.voice ?? DEFAULT_VOICE).option(OPTION_FLAGS.speechFormat, OPTION_DESCRIPTIONS.speechFormat, config?.format).option(OPTION_FLAGS.speechSpeed, OPTION_DESCRIPTIONS.speechSpeed, config?.speed?.toString()).option(OPTION_FLAGS.speechOutput, OPTION_DESCRIPTIONS.speechOutput, config?.output).option(OPTION_FLAGS.quiet, OPTION_DESCRIPTIONS.quiet, config?.quiet ?? false).action(
|
|
13787
|
+
(text3, options) => executeAction(() => executeSpeech(text3, options, env), env)
|
|
13788
|
+
);
|
|
13789
|
+
}
|
|
13790
|
+
|
|
13791
|
+
// src/cli/vision-command.ts
|
|
13792
|
+
init_model_shortcuts();
|
|
13793
|
+
async function executeVision(imagePath, options, env) {
|
|
13794
|
+
const client = env.createClient();
|
|
13795
|
+
const model = resolveModel(options.model);
|
|
13796
|
+
const imageBuffer = await readFileBuffer(imagePath);
|
|
13797
|
+
const prompt = options.prompt ?? "Describe this image in detail.";
|
|
13798
|
+
const stderrTTY = env.stderr.isTTY === true;
|
|
13799
|
+
if (!options.quiet && stderrTTY) {
|
|
13800
|
+
env.stderr.write(`${SUMMARY_PREFIX} Analyzing image with ${model}...
|
|
13801
|
+
`);
|
|
13802
|
+
}
|
|
13803
|
+
const result = await client.vision.analyze({
|
|
13804
|
+
model,
|
|
13805
|
+
image: imageBuffer,
|
|
13806
|
+
prompt,
|
|
13807
|
+
maxTokens: options.maxTokens
|
|
13808
|
+
});
|
|
13809
|
+
env.stdout.write(result);
|
|
13810
|
+
env.stdout.write("\n");
|
|
13811
|
+
}
|
|
13812
|
+
function registerVisionCommand(program, env) {
|
|
13813
|
+
program.command(COMMANDS.vision ?? "vision").description("Analyze an image using vision-capable models").argument("<image>", "Path to image file to analyze").option(
|
|
13814
|
+
OPTION_FLAGS.model,
|
|
13815
|
+
OPTION_DESCRIPTIONS.model,
|
|
13816
|
+
"gpt-4o"
|
|
13817
|
+
// Default to a vision-capable model
|
|
13818
|
+
).option("-p, --prompt <prompt>", "Analysis prompt describing what to extract or describe").option(
|
|
13819
|
+
OPTION_FLAGS.maxTokens,
|
|
13820
|
+
OPTION_DESCRIPTIONS.maxTokens,
|
|
13821
|
+
createNumericParser({ label: "Max tokens", integer: true, min: 1 })
|
|
13822
|
+
).option(OPTION_FLAGS.quiet, OPTION_DESCRIPTIONS.quiet).action(
|
|
13823
|
+
(imagePath, options) => executeAction(() => executeVision(imagePath, options, env), env)
|
|
13824
|
+
);
|
|
13825
|
+
}
|
|
13826
|
+
|
|
11311
13827
|
// src/cli/environment.ts
|
|
11312
13828
|
var import_node_readline = __toESM(require("readline"), 1);
|
|
11313
13829
|
var import_chalk9 = __toESM(require("chalk"), 1);
|
|
@@ -11353,7 +13869,7 @@ function createLoggerFactory(config) {
|
|
|
11353
13869
|
}
|
|
11354
13870
|
function createPromptFunction(stdin, stdout) {
|
|
11355
13871
|
return (question) => {
|
|
11356
|
-
return new Promise((
|
|
13872
|
+
return new Promise((resolve3) => {
|
|
11357
13873
|
const rl = import_node_readline.default.createInterface({
|
|
11358
13874
|
input: stdin,
|
|
11359
13875
|
output: stdout
|
|
@@ -11368,7 +13884,7 @@ function createPromptFunction(stdin, stdout) {
|
|
|
11368
13884
|
`);
|
|
11369
13885
|
rl.question(import_chalk9.default.green.bold("You: "), (answer) => {
|
|
11370
13886
|
rl.close();
|
|
11371
|
-
|
|
13887
|
+
resolve3(answer);
|
|
11372
13888
|
});
|
|
11373
13889
|
});
|
|
11374
13890
|
};
|
|
@@ -11459,6 +13975,9 @@ function createProgram(env, config) {
|
|
|
11459
13975
|
});
|
|
11460
13976
|
registerCompleteCommand(program, env, config?.complete);
|
|
11461
13977
|
registerAgentCommand(program, env, config?.agent);
|
|
13978
|
+
registerImageCommand(program, env, config?.image);
|
|
13979
|
+
registerSpeechCommand(program, env, config?.speech);
|
|
13980
|
+
registerVisionCommand(program, env);
|
|
11462
13981
|
registerModelsCommand(program, env);
|
|
11463
13982
|
registerGadgetCommand(program, env);
|
|
11464
13983
|
if (config) {
|