llmist 2.3.0 → 2.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-GANXNBIZ.js → chunk-6ZDUWO6N.js} +1029 -22
- package/dist/chunk-6ZDUWO6N.js.map +1 -0
- package/dist/{chunk-ZDNV7DDO.js → chunk-QFRVTS5F.js} +2 -2
- package/dist/cli.cjs +1497 -45
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.js +473 -28
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +1025 -18
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +18 -2
- package/dist/index.d.ts +18 -2
- package/dist/index.js +2 -2
- package/dist/{mock-stream-wRfUqXx4.d.cts → mock-stream-BQcC2VCP.d.cts} +408 -1
- package/dist/{mock-stream-wRfUqXx4.d.ts → mock-stream-BQcC2VCP.d.ts} +408 -1
- package/dist/testing/index.cjs +1025 -18
- package/dist/testing/index.cjs.map +1 -1
- package/dist/testing/index.d.cts +2 -2
- package/dist/testing/index.d.ts +2 -2
- package/dist/testing/index.js +1 -1
- package/package.json +1 -1
- package/dist/chunk-GANXNBIZ.js.map +0 -1
- /package/dist/{chunk-ZDNV7DDO.js.map → chunk-QFRVTS5F.js.map} +0 -0
package/dist/cli.cjs
CHANGED
|
@@ -2498,7 +2498,27 @@ var init_cost_reporting_client = __esm({
|
|
|
2498
2498
|
constructor(client, reportCost) {
|
|
2499
2499
|
this.client = client;
|
|
2500
2500
|
this.reportCost = reportCost;
|
|
2501
|
+
this.image = {
|
|
2502
|
+
generate: async (options) => {
|
|
2503
|
+
const result = await this.client.image.generate(options);
|
|
2504
|
+
if (result.cost !== void 0 && result.cost > 0) {
|
|
2505
|
+
this.reportCost(result.cost);
|
|
2506
|
+
}
|
|
2507
|
+
return result;
|
|
2508
|
+
}
|
|
2509
|
+
};
|
|
2510
|
+
this.speech = {
|
|
2511
|
+
generate: async (options) => {
|
|
2512
|
+
const result = await this.client.speech.generate(options);
|
|
2513
|
+
if (result.cost !== void 0 && result.cost > 0) {
|
|
2514
|
+
this.reportCost(result.cost);
|
|
2515
|
+
}
|
|
2516
|
+
return result;
|
|
2517
|
+
}
|
|
2518
|
+
};
|
|
2501
2519
|
}
|
|
2520
|
+
image;
|
|
2521
|
+
speech;
|
|
2502
2522
|
/**
|
|
2503
2523
|
* Access to model registry for cost estimation.
|
|
2504
2524
|
*/
|
|
@@ -4591,6 +4611,28 @@ var init_anthropic = __esm({
|
|
|
4591
4611
|
getModelSpecs() {
|
|
4592
4612
|
return ANTHROPIC_MODELS;
|
|
4593
4613
|
}
|
|
4614
|
+
// =========================================================================
|
|
4615
|
+
// Image Generation (Not Supported)
|
|
4616
|
+
// =========================================================================
|
|
4617
|
+
supportsImageGeneration(_modelId) {
|
|
4618
|
+
return false;
|
|
4619
|
+
}
|
|
4620
|
+
async generateImage() {
|
|
4621
|
+
throw new Error(
|
|
4622
|
+
"Anthropic does not support image generation. Use OpenAI (DALL-E, GPT Image) or Google Gemini (Imagen) instead."
|
|
4623
|
+
);
|
|
4624
|
+
}
|
|
4625
|
+
// =========================================================================
|
|
4626
|
+
// Speech Generation (Not Supported)
|
|
4627
|
+
// =========================================================================
|
|
4628
|
+
supportsSpeechGeneration(_modelId) {
|
|
4629
|
+
return false;
|
|
4630
|
+
}
|
|
4631
|
+
async generateSpeech() {
|
|
4632
|
+
throw new Error(
|
|
4633
|
+
"Anthropic does not support speech generation. Use OpenAI (TTS) or Google Gemini (TTS) instead."
|
|
4634
|
+
);
|
|
4635
|
+
}
|
|
4594
4636
|
buildRequestPayload(options, descriptor, spec, messages) {
|
|
4595
4637
|
const systemMessages = messages.filter((message) => message.role === "system");
|
|
4596
4638
|
const system = systemMessages.length > 0 ? systemMessages.map((m, index) => ({
|
|
@@ -4745,6 +4787,182 @@ var init_anthropic = __esm({
|
|
|
4745
4787
|
}
|
|
4746
4788
|
});
|
|
4747
4789
|
|
|
4790
|
+
// src/providers/gemini-image-models.ts
|
|
4791
|
+
function getGeminiImageModelSpec(modelId) {
|
|
4792
|
+
return geminiImageModels.find((m) => m.modelId === modelId);
|
|
4793
|
+
}
|
|
4794
|
+
function isGeminiImageModel(modelId) {
|
|
4795
|
+
return geminiImageModels.some((m) => m.modelId === modelId);
|
|
4796
|
+
}
|
|
4797
|
+
function calculateGeminiImageCost(modelId, size = "1:1", n = 1) {
|
|
4798
|
+
const spec = getGeminiImageModelSpec(modelId);
|
|
4799
|
+
if (!spec) return void 0;
|
|
4800
|
+
if (spec.pricing.perImage !== void 0) {
|
|
4801
|
+
return spec.pricing.perImage * n;
|
|
4802
|
+
}
|
|
4803
|
+
if (spec.pricing.bySize) {
|
|
4804
|
+
const sizePrice = spec.pricing.bySize[size];
|
|
4805
|
+
if (typeof sizePrice === "number") {
|
|
4806
|
+
return sizePrice * n;
|
|
4807
|
+
}
|
|
4808
|
+
}
|
|
4809
|
+
return void 0;
|
|
4810
|
+
}
|
|
4811
|
+
var IMAGEN4_ASPECT_RATIOS, GEMINI_IMAGE_ASPECT_RATIOS, geminiImageModels;
|
|
4812
|
+
var init_gemini_image_models = __esm({
|
|
4813
|
+
"src/providers/gemini-image-models.ts"() {
|
|
4814
|
+
"use strict";
|
|
4815
|
+
IMAGEN4_ASPECT_RATIOS = ["1:1", "3:4", "4:3", "9:16", "16:9"];
|
|
4816
|
+
GEMINI_IMAGE_ASPECT_RATIOS = ["1:1", "3:4", "4:3", "9:16", "16:9"];
|
|
4817
|
+
geminiImageModels = [
|
|
4818
|
+
// Imagen 4 Family (standalone image generation)
|
|
4819
|
+
{
|
|
4820
|
+
provider: "gemini",
|
|
4821
|
+
modelId: "imagen-4.0-fast-generate-001",
|
|
4822
|
+
displayName: "Imagen 4 Fast",
|
|
4823
|
+
pricing: {
|
|
4824
|
+
perImage: 0.02
|
|
4825
|
+
},
|
|
4826
|
+
supportedSizes: [...IMAGEN4_ASPECT_RATIOS],
|
|
4827
|
+
maxImages: 4,
|
|
4828
|
+
defaultSize: "1:1",
|
|
4829
|
+
features: {
|
|
4830
|
+
textRendering: true
|
|
4831
|
+
}
|
|
4832
|
+
},
|
|
4833
|
+
{
|
|
4834
|
+
provider: "gemini",
|
|
4835
|
+
modelId: "imagen-4.0-generate-001",
|
|
4836
|
+
displayName: "Imagen 4",
|
|
4837
|
+
pricing: {
|
|
4838
|
+
perImage: 0.04
|
|
4839
|
+
},
|
|
4840
|
+
supportedSizes: [...IMAGEN4_ASPECT_RATIOS],
|
|
4841
|
+
maxImages: 4,
|
|
4842
|
+
defaultSize: "1:1",
|
|
4843
|
+
features: {
|
|
4844
|
+
textRendering: true
|
|
4845
|
+
}
|
|
4846
|
+
},
|
|
4847
|
+
{
|
|
4848
|
+
provider: "gemini",
|
|
4849
|
+
modelId: "imagen-4.0-ultra-generate-001",
|
|
4850
|
+
displayName: "Imagen 4 Ultra",
|
|
4851
|
+
pricing: {
|
|
4852
|
+
perImage: 0.06
|
|
4853
|
+
},
|
|
4854
|
+
supportedSizes: [...IMAGEN4_ASPECT_RATIOS],
|
|
4855
|
+
maxImages: 4,
|
|
4856
|
+
defaultSize: "1:1",
|
|
4857
|
+
features: {
|
|
4858
|
+
textRendering: true
|
|
4859
|
+
}
|
|
4860
|
+
},
|
|
4861
|
+
// Preview versions
|
|
4862
|
+
{
|
|
4863
|
+
provider: "gemini",
|
|
4864
|
+
modelId: "imagen-4.0-generate-preview-06-06",
|
|
4865
|
+
displayName: "Imagen 4 (Preview)",
|
|
4866
|
+
pricing: {
|
|
4867
|
+
perImage: 0.04
|
|
4868
|
+
},
|
|
4869
|
+
supportedSizes: [...IMAGEN4_ASPECT_RATIOS],
|
|
4870
|
+
maxImages: 4,
|
|
4871
|
+
defaultSize: "1:1",
|
|
4872
|
+
features: {
|
|
4873
|
+
textRendering: true
|
|
4874
|
+
}
|
|
4875
|
+
},
|
|
4876
|
+
{
|
|
4877
|
+
provider: "gemini",
|
|
4878
|
+
modelId: "imagen-4.0-ultra-generate-preview-06-06",
|
|
4879
|
+
displayName: "Imagen 4 Ultra (Preview)",
|
|
4880
|
+
pricing: {
|
|
4881
|
+
perImage: 0.06
|
|
4882
|
+
},
|
|
4883
|
+
supportedSizes: [...IMAGEN4_ASPECT_RATIOS],
|
|
4884
|
+
maxImages: 4,
|
|
4885
|
+
defaultSize: "1:1",
|
|
4886
|
+
features: {
|
|
4887
|
+
textRendering: true
|
|
4888
|
+
}
|
|
4889
|
+
},
|
|
4890
|
+
// Gemini Native Image Generation (multimodal models)
|
|
4891
|
+
{
|
|
4892
|
+
provider: "gemini",
|
|
4893
|
+
modelId: "gemini-2.5-flash-image",
|
|
4894
|
+
displayName: "Gemini 2.5 Flash Image",
|
|
4895
|
+
pricing: {
|
|
4896
|
+
perImage: 0.039
|
|
4897
|
+
},
|
|
4898
|
+
supportedSizes: [...GEMINI_IMAGE_ASPECT_RATIOS],
|
|
4899
|
+
maxImages: 1,
|
|
4900
|
+
defaultSize: "1:1",
|
|
4901
|
+
features: {
|
|
4902
|
+
conversational: true,
|
|
4903
|
+
textRendering: true
|
|
4904
|
+
}
|
|
4905
|
+
},
|
|
4906
|
+
{
|
|
4907
|
+
provider: "gemini",
|
|
4908
|
+
modelId: "gemini-2.5-flash-image-preview",
|
|
4909
|
+
displayName: "Gemini 2.5 Flash Image (Preview)",
|
|
4910
|
+
pricing: {
|
|
4911
|
+
perImage: 0.039
|
|
4912
|
+
},
|
|
4913
|
+
supportedSizes: [...GEMINI_IMAGE_ASPECT_RATIOS],
|
|
4914
|
+
maxImages: 1,
|
|
4915
|
+
defaultSize: "1:1",
|
|
4916
|
+
features: {
|
|
4917
|
+
conversational: true,
|
|
4918
|
+
textRendering: true
|
|
4919
|
+
}
|
|
4920
|
+
},
|
|
4921
|
+
{
|
|
4922
|
+
provider: "gemini",
|
|
4923
|
+
modelId: "gemini-3-pro-image-preview",
|
|
4924
|
+
displayName: "Gemini 3 Pro Image (Preview)",
|
|
4925
|
+
pricing: {
|
|
4926
|
+
// Token-based: ~$0.134 per 1K/2K image, $0.24 per 4K
|
|
4927
|
+
// Using 2K as default
|
|
4928
|
+
bySize: {
|
|
4929
|
+
"1K": 0.134,
|
|
4930
|
+
"2K": 0.134,
|
|
4931
|
+
"4K": 0.24
|
|
4932
|
+
}
|
|
4933
|
+
},
|
|
4934
|
+
supportedSizes: ["1K", "2K", "4K"],
|
|
4935
|
+
maxImages: 1,
|
|
4936
|
+
defaultSize: "2K",
|
|
4937
|
+
features: {
|
|
4938
|
+
conversational: true,
|
|
4939
|
+
textRendering: true
|
|
4940
|
+
}
|
|
4941
|
+
},
|
|
4942
|
+
// Alias: nano-banana-pro-preview is gemini-3-pro-image-preview
|
|
4943
|
+
{
|
|
4944
|
+
provider: "gemini",
|
|
4945
|
+
modelId: "nano-banana-pro-preview",
|
|
4946
|
+
displayName: "Nano Banana Pro (Gemini 3 Pro Image)",
|
|
4947
|
+
pricing: {
|
|
4948
|
+
bySize: {
|
|
4949
|
+
"1K": 0.134,
|
|
4950
|
+
"2K": 0.134,
|
|
4951
|
+
"4K": 0.24
|
|
4952
|
+
}
|
|
4953
|
+
},
|
|
4954
|
+
supportedSizes: ["1K", "2K", "4K"],
|
|
4955
|
+
maxImages: 1,
|
|
4956
|
+
defaultSize: "2K",
|
|
4957
|
+
features: {
|
|
4958
|
+
conversational: true,
|
|
4959
|
+
textRendering: true
|
|
4960
|
+
}
|
|
4961
|
+
}
|
|
4962
|
+
];
|
|
4963
|
+
}
|
|
4964
|
+
});
|
|
4965
|
+
|
|
4748
4966
|
// src/providers/gemini-models.ts
|
|
4749
4967
|
var GEMINI_MODELS;
|
|
4750
4968
|
var init_gemini_models = __esm({
|
|
@@ -4918,7 +5136,171 @@ var init_gemini_models = __esm({
|
|
|
4918
5136
|
}
|
|
4919
5137
|
});
|
|
4920
5138
|
|
|
5139
|
+
// src/providers/gemini-speech-models.ts
|
|
5140
|
+
function getGeminiSpeechModelSpec(modelId) {
|
|
5141
|
+
return geminiSpeechModels.find((m) => m.modelId === modelId);
|
|
5142
|
+
}
|
|
5143
|
+
function isGeminiSpeechModel(modelId) {
|
|
5144
|
+
return geminiSpeechModels.some((m) => m.modelId === modelId);
|
|
5145
|
+
}
|
|
5146
|
+
function calculateGeminiSpeechCost(modelId, characterCount, estimatedMinutes) {
|
|
5147
|
+
const spec = getGeminiSpeechModelSpec(modelId);
|
|
5148
|
+
if (!spec) return void 0;
|
|
5149
|
+
if (spec.pricing.perMinute !== void 0) {
|
|
5150
|
+
if (estimatedMinutes !== void 0) {
|
|
5151
|
+
return estimatedMinutes * spec.pricing.perMinute;
|
|
5152
|
+
}
|
|
5153
|
+
const approxMinutes = characterCount / 750;
|
|
5154
|
+
return approxMinutes * spec.pricing.perMinute;
|
|
5155
|
+
}
|
|
5156
|
+
return void 0;
|
|
5157
|
+
}
|
|
5158
|
+
var GEMINI_TTS_VOICES, GEMINI_TTS_FORMATS, geminiSpeechModels;
|
|
5159
|
+
var init_gemini_speech_models = __esm({
|
|
5160
|
+
"src/providers/gemini-speech-models.ts"() {
|
|
5161
|
+
"use strict";
|
|
5162
|
+
GEMINI_TTS_VOICES = [
|
|
5163
|
+
"Zephyr",
|
|
5164
|
+
// Bright
|
|
5165
|
+
"Puck",
|
|
5166
|
+
// Upbeat
|
|
5167
|
+
"Charon",
|
|
5168
|
+
// Informative
|
|
5169
|
+
"Kore",
|
|
5170
|
+
// Firm
|
|
5171
|
+
"Fenrir",
|
|
5172
|
+
// Excitable
|
|
5173
|
+
"Leda",
|
|
5174
|
+
// Youthful
|
|
5175
|
+
"Orus",
|
|
5176
|
+
// Firm
|
|
5177
|
+
"Aoede",
|
|
5178
|
+
// Breezy
|
|
5179
|
+
"Callirrhoe",
|
|
5180
|
+
// Easy-going
|
|
5181
|
+
"Autonoe",
|
|
5182
|
+
// Bright
|
|
5183
|
+
"Enceladus",
|
|
5184
|
+
// Breathy
|
|
5185
|
+
"Iapetus",
|
|
5186
|
+
// Clear
|
|
5187
|
+
"Umbriel",
|
|
5188
|
+
// Easy-going
|
|
5189
|
+
"Algieba",
|
|
5190
|
+
// Smooth
|
|
5191
|
+
"Despina",
|
|
5192
|
+
// Smooth
|
|
5193
|
+
"Erinome",
|
|
5194
|
+
// Clear
|
|
5195
|
+
"Algenib",
|
|
5196
|
+
// Gravelly
|
|
5197
|
+
"Rasalgethi",
|
|
5198
|
+
// Informative
|
|
5199
|
+
"Laomedeia",
|
|
5200
|
+
// Upbeat
|
|
5201
|
+
"Achernar",
|
|
5202
|
+
// Soft
|
|
5203
|
+
"Alnilam",
|
|
5204
|
+
// Firm
|
|
5205
|
+
"Schedar",
|
|
5206
|
+
// Even
|
|
5207
|
+
"Gacrux",
|
|
5208
|
+
// Mature
|
|
5209
|
+
"Pulcherrima",
|
|
5210
|
+
// Forward
|
|
5211
|
+
"Achird",
|
|
5212
|
+
// Friendly
|
|
5213
|
+
"Zubenelgenubi",
|
|
5214
|
+
// Casual
|
|
5215
|
+
"Vindemiatrix",
|
|
5216
|
+
// Gentle
|
|
5217
|
+
"Sadachbia",
|
|
5218
|
+
// Lively
|
|
5219
|
+
"Sadaltager",
|
|
5220
|
+
// Knowledgeable
|
|
5221
|
+
"Sulafat"
|
|
5222
|
+
// Warm
|
|
5223
|
+
];
|
|
5224
|
+
GEMINI_TTS_FORMATS = ["pcm", "wav"];
|
|
5225
|
+
geminiSpeechModels = [
|
|
5226
|
+
{
|
|
5227
|
+
provider: "gemini",
|
|
5228
|
+
modelId: "gemini-2.5-flash-preview-tts",
|
|
5229
|
+
displayName: "Gemini 2.5 Flash TTS (Preview)",
|
|
5230
|
+
pricing: {
|
|
5231
|
+
// $0.50 per 1M input tokens = $0.0000005 per token
|
|
5232
|
+
perInputToken: 5e-7,
|
|
5233
|
+
// $10.00 per 1M audio output tokens = $0.00001 per token
|
|
5234
|
+
perAudioOutputToken: 1e-5,
|
|
5235
|
+
// Rough estimate: ~$0.01 per minute of audio
|
|
5236
|
+
perMinute: 0.01
|
|
5237
|
+
},
|
|
5238
|
+
voices: [...GEMINI_TTS_VOICES],
|
|
5239
|
+
formats: GEMINI_TTS_FORMATS,
|
|
5240
|
+
maxInputLength: 8e3,
|
|
5241
|
+
// bytes (text + prompt combined)
|
|
5242
|
+
defaultVoice: "Zephyr",
|
|
5243
|
+
defaultFormat: "wav",
|
|
5244
|
+
features: {
|
|
5245
|
+
multiSpeaker: true,
|
|
5246
|
+
languages: 24,
|
|
5247
|
+
voiceInstructions: true
|
|
5248
|
+
}
|
|
5249
|
+
},
|
|
5250
|
+
{
|
|
5251
|
+
provider: "gemini",
|
|
5252
|
+
modelId: "gemini-2.5-pro-preview-tts",
|
|
5253
|
+
displayName: "Gemini 2.5 Pro TTS (Preview)",
|
|
5254
|
+
pricing: {
|
|
5255
|
+
// $1.00 per 1M input tokens = $0.000001 per token
|
|
5256
|
+
perInputToken: 1e-6,
|
|
5257
|
+
// $20.00 per 1M audio output tokens = $0.00002 per token
|
|
5258
|
+
perAudioOutputToken: 2e-5,
|
|
5259
|
+
// Rough estimate: ~$0.02 per minute of audio
|
|
5260
|
+
perMinute: 0.02
|
|
5261
|
+
},
|
|
5262
|
+
voices: [...GEMINI_TTS_VOICES],
|
|
5263
|
+
formats: GEMINI_TTS_FORMATS,
|
|
5264
|
+
maxInputLength: 8e3,
|
|
5265
|
+
// bytes
|
|
5266
|
+
defaultVoice: "Zephyr",
|
|
5267
|
+
defaultFormat: "wav",
|
|
5268
|
+
features: {
|
|
5269
|
+
multiSpeaker: true,
|
|
5270
|
+
languages: 24,
|
|
5271
|
+
voiceInstructions: true
|
|
5272
|
+
}
|
|
5273
|
+
}
|
|
5274
|
+
];
|
|
5275
|
+
}
|
|
5276
|
+
});
|
|
5277
|
+
|
|
4921
5278
|
// src/providers/gemini.ts
|
|
5279
|
+
function wrapPcmInWav(pcmData, sampleRate, bitsPerSample, numChannels) {
|
|
5280
|
+
const byteRate = sampleRate * numChannels * bitsPerSample / 8;
|
|
5281
|
+
const blockAlign = numChannels * bitsPerSample / 8;
|
|
5282
|
+
const dataSize = pcmData.length;
|
|
5283
|
+
const headerSize = 44;
|
|
5284
|
+
const fileSize = headerSize + dataSize - 8;
|
|
5285
|
+
const buffer = new ArrayBuffer(headerSize + dataSize);
|
|
5286
|
+
const view = new DataView(buffer);
|
|
5287
|
+
const uint8 = new Uint8Array(buffer);
|
|
5288
|
+
view.setUint32(0, 1380533830, false);
|
|
5289
|
+
view.setUint32(4, fileSize, true);
|
|
5290
|
+
view.setUint32(8, 1463899717, false);
|
|
5291
|
+
view.setUint32(12, 1718449184, false);
|
|
5292
|
+
view.setUint32(16, 16, true);
|
|
5293
|
+
view.setUint16(20, 1, true);
|
|
5294
|
+
view.setUint16(22, numChannels, true);
|
|
5295
|
+
view.setUint32(24, sampleRate, true);
|
|
5296
|
+
view.setUint32(28, byteRate, true);
|
|
5297
|
+
view.setUint16(32, blockAlign, true);
|
|
5298
|
+
view.setUint16(34, bitsPerSample, true);
|
|
5299
|
+
view.setUint32(36, 1684108385, false);
|
|
5300
|
+
view.setUint32(40, dataSize, true);
|
|
5301
|
+
uint8.set(pcmData, headerSize);
|
|
5302
|
+
return buffer;
|
|
5303
|
+
}
|
|
4922
5304
|
function createGeminiProviderFromEnv() {
|
|
4923
5305
|
return createProviderFromEnv("GEMINI_API_KEY", import_genai.GoogleGenAI, GeminiGenerativeProvider);
|
|
4924
5306
|
}
|
|
@@ -4929,7 +5311,9 @@ var init_gemini = __esm({
|
|
|
4929
5311
|
import_genai = require("@google/genai");
|
|
4930
5312
|
init_base_provider();
|
|
4931
5313
|
init_constants2();
|
|
5314
|
+
init_gemini_image_models();
|
|
4932
5315
|
init_gemini_models();
|
|
5316
|
+
init_gemini_speech_models();
|
|
4933
5317
|
init_utils();
|
|
4934
5318
|
GEMINI_ROLE_MAP = {
|
|
4935
5319
|
system: "user",
|
|
@@ -4944,6 +5328,139 @@ var init_gemini = __esm({
|
|
|
4944
5328
|
getModelSpecs() {
|
|
4945
5329
|
return GEMINI_MODELS;
|
|
4946
5330
|
}
|
|
5331
|
+
// =========================================================================
|
|
5332
|
+
// Image Generation
|
|
5333
|
+
// =========================================================================
|
|
5334
|
+
getImageModelSpecs() {
|
|
5335
|
+
return geminiImageModels;
|
|
5336
|
+
}
|
|
5337
|
+
supportsImageGeneration(modelId) {
|
|
5338
|
+
return isGeminiImageModel(modelId);
|
|
5339
|
+
}
|
|
5340
|
+
async generateImage(options) {
|
|
5341
|
+
const client = this.client;
|
|
5342
|
+
const spec = getGeminiImageModelSpec(options.model);
|
|
5343
|
+
const isImagenModel = options.model.startsWith("imagen");
|
|
5344
|
+
const aspectRatio = options.size ?? spec?.defaultSize ?? "1:1";
|
|
5345
|
+
const n = options.n ?? 1;
|
|
5346
|
+
if (isImagenModel) {
|
|
5347
|
+
const response2 = await client.models.generateImages({
|
|
5348
|
+
model: options.model,
|
|
5349
|
+
prompt: options.prompt,
|
|
5350
|
+
config: {
|
|
5351
|
+
numberOfImages: n,
|
|
5352
|
+
aspectRatio,
|
|
5353
|
+
outputMimeType: options.responseFormat === "b64_json" ? "image/png" : "image/jpeg"
|
|
5354
|
+
}
|
|
5355
|
+
});
|
|
5356
|
+
const images2 = response2.generatedImages ?? [];
|
|
5357
|
+
const cost2 = calculateGeminiImageCost(options.model, aspectRatio, images2.length);
|
|
5358
|
+
return {
|
|
5359
|
+
// Gemini's imageBytes is already base64 encoded, so use it directly
|
|
5360
|
+
images: images2.map((img) => ({
|
|
5361
|
+
b64Json: img.image?.imageBytes ?? void 0
|
|
5362
|
+
})),
|
|
5363
|
+
model: options.model,
|
|
5364
|
+
usage: {
|
|
5365
|
+
imagesGenerated: images2.length,
|
|
5366
|
+
size: aspectRatio,
|
|
5367
|
+
quality: "standard"
|
|
5368
|
+
},
|
|
5369
|
+
cost: cost2
|
|
5370
|
+
};
|
|
5371
|
+
}
|
|
5372
|
+
const response = await client.models.generateContent({
|
|
5373
|
+
model: options.model,
|
|
5374
|
+
contents: [{ role: "user", parts: [{ text: options.prompt }] }],
|
|
5375
|
+
config: {
|
|
5376
|
+
responseModalities: [import_genai.Modality.IMAGE, import_genai.Modality.TEXT]
|
|
5377
|
+
}
|
|
5378
|
+
});
|
|
5379
|
+
const images = [];
|
|
5380
|
+
const candidate = response.candidates?.[0];
|
|
5381
|
+
if (candidate?.content?.parts) {
|
|
5382
|
+
for (const part of candidate.content.parts) {
|
|
5383
|
+
if ("inlineData" in part && part.inlineData) {
|
|
5384
|
+
images.push({
|
|
5385
|
+
b64Json: part.inlineData.data
|
|
5386
|
+
});
|
|
5387
|
+
}
|
|
5388
|
+
}
|
|
5389
|
+
}
|
|
5390
|
+
const cost = calculateGeminiImageCost(options.model, aspectRatio, images.length);
|
|
5391
|
+
return {
|
|
5392
|
+
images,
|
|
5393
|
+
model: options.model,
|
|
5394
|
+
usage: {
|
|
5395
|
+
imagesGenerated: images.length,
|
|
5396
|
+
size: aspectRatio,
|
|
5397
|
+
quality: "standard"
|
|
5398
|
+
},
|
|
5399
|
+
cost
|
|
5400
|
+
};
|
|
5401
|
+
}
|
|
5402
|
+
// =========================================================================
|
|
5403
|
+
// Speech Generation
|
|
5404
|
+
// =========================================================================
|
|
5405
|
+
getSpeechModelSpecs() {
|
|
5406
|
+
return geminiSpeechModels;
|
|
5407
|
+
}
|
|
5408
|
+
supportsSpeechGeneration(modelId) {
|
|
5409
|
+
return isGeminiSpeechModel(modelId);
|
|
5410
|
+
}
|
|
5411
|
+
async generateSpeech(options) {
|
|
5412
|
+
const client = this.client;
|
|
5413
|
+
const spec = getGeminiSpeechModelSpec(options.model);
|
|
5414
|
+
const voice = options.voice ?? spec?.defaultVoice ?? "Zephyr";
|
|
5415
|
+
const response = await client.models.generateContent({
|
|
5416
|
+
model: options.model,
|
|
5417
|
+
contents: [
|
|
5418
|
+
{
|
|
5419
|
+
role: "user",
|
|
5420
|
+
parts: [{ text: options.input }]
|
|
5421
|
+
}
|
|
5422
|
+
],
|
|
5423
|
+
config: {
|
|
5424
|
+
responseModalities: [import_genai.Modality.AUDIO],
|
|
5425
|
+
speechConfig: {
|
|
5426
|
+
voiceConfig: {
|
|
5427
|
+
prebuiltVoiceConfig: {
|
|
5428
|
+
voiceName: voice
|
|
5429
|
+
}
|
|
5430
|
+
}
|
|
5431
|
+
}
|
|
5432
|
+
}
|
|
5433
|
+
});
|
|
5434
|
+
let pcmData;
|
|
5435
|
+
const candidate = response.candidates?.[0];
|
|
5436
|
+
if (candidate?.content?.parts) {
|
|
5437
|
+
for (const part of candidate.content.parts) {
|
|
5438
|
+
if ("inlineData" in part && part.inlineData?.data) {
|
|
5439
|
+
const base64 = part.inlineData.data;
|
|
5440
|
+
const binary = atob(base64);
|
|
5441
|
+
pcmData = new Uint8Array(binary.length);
|
|
5442
|
+
for (let i = 0; i < binary.length; i++) {
|
|
5443
|
+
pcmData[i] = binary.charCodeAt(i);
|
|
5444
|
+
}
|
|
5445
|
+
break;
|
|
5446
|
+
}
|
|
5447
|
+
}
|
|
5448
|
+
}
|
|
5449
|
+
if (!pcmData) {
|
|
5450
|
+
throw new Error("No audio data in Gemini TTS response");
|
|
5451
|
+
}
|
|
5452
|
+
const audioData = wrapPcmInWav(pcmData, 24e3, 16, 1);
|
|
5453
|
+
const cost = calculateGeminiSpeechCost(options.model, options.input.length);
|
|
5454
|
+
return {
|
|
5455
|
+
audio: audioData,
|
|
5456
|
+
model: options.model,
|
|
5457
|
+
usage: {
|
|
5458
|
+
characterCount: options.input.length
|
|
5459
|
+
},
|
|
5460
|
+
cost,
|
|
5461
|
+
format: spec?.defaultFormat ?? "wav"
|
|
5462
|
+
};
|
|
5463
|
+
}
|
|
4947
5464
|
buildRequestPayload(options, descriptor, _spec, messages) {
|
|
4948
5465
|
const contents = this.convertMessagesToContents(messages);
|
|
4949
5466
|
const generationConfig = this.buildGenerationConfig(options);
|
|
@@ -5139,6 +5656,121 @@ var init_gemini = __esm({
|
|
|
5139
5656
|
}
|
|
5140
5657
|
});
|
|
5141
5658
|
|
|
5659
|
+
// src/providers/openai-image-models.ts
|
|
5660
|
+
function getOpenAIImageModelSpec(modelId) {
|
|
5661
|
+
return openaiImageModels.find((m) => m.modelId === modelId);
|
|
5662
|
+
}
|
|
5663
|
+
function isOpenAIImageModel(modelId) {
|
|
5664
|
+
return openaiImageModels.some((m) => m.modelId === modelId);
|
|
5665
|
+
}
|
|
5666
|
+
function calculateOpenAIImageCost(modelId, size, quality = "standard", n = 1) {
|
|
5667
|
+
const spec = getOpenAIImageModelSpec(modelId);
|
|
5668
|
+
if (!spec) return void 0;
|
|
5669
|
+
const sizePrice = spec.pricing.bySize?.[size];
|
|
5670
|
+
if (sizePrice === void 0) return void 0;
|
|
5671
|
+
let pricePerImage;
|
|
5672
|
+
if (typeof sizePrice === "number") {
|
|
5673
|
+
pricePerImage = sizePrice;
|
|
5674
|
+
} else {
|
|
5675
|
+
pricePerImage = sizePrice[quality];
|
|
5676
|
+
if (pricePerImage === void 0) return void 0;
|
|
5677
|
+
}
|
|
5678
|
+
return pricePerImage * n;
|
|
5679
|
+
}
|
|
5680
|
+
var GPT_IMAGE_SIZES, GPT_IMAGE_QUALITIES, DALLE3_SIZES, DALLE3_QUALITIES, DALLE2_SIZES, openaiImageModels;
|
|
5681
|
+
var init_openai_image_models = __esm({
|
|
5682
|
+
"src/providers/openai-image-models.ts"() {
|
|
5683
|
+
"use strict";
|
|
5684
|
+
GPT_IMAGE_SIZES = ["1024x1024", "1024x1536", "1536x1024"];
|
|
5685
|
+
GPT_IMAGE_QUALITIES = ["low", "medium", "high"];
|
|
5686
|
+
DALLE3_SIZES = ["1024x1024", "1024x1792", "1792x1024"];
|
|
5687
|
+
DALLE3_QUALITIES = ["standard", "hd"];
|
|
5688
|
+
DALLE2_SIZES = ["256x256", "512x512", "1024x1024"];
|
|
5689
|
+
openaiImageModels = [
|
|
5690
|
+
// GPT Image 1 Family (flagship)
|
|
5691
|
+
{
|
|
5692
|
+
provider: "openai",
|
|
5693
|
+
modelId: "gpt-image-1",
|
|
5694
|
+
displayName: "GPT Image 1",
|
|
5695
|
+
pricing: {
|
|
5696
|
+
bySize: {
|
|
5697
|
+
"1024x1024": { low: 0.011, medium: 0.04, high: 0.17 },
|
|
5698
|
+
"1024x1536": { low: 0.016, medium: 0.06, high: 0.25 },
|
|
5699
|
+
"1536x1024": { low: 0.016, medium: 0.06, high: 0.25 }
|
|
5700
|
+
}
|
|
5701
|
+
},
|
|
5702
|
+
supportedSizes: [...GPT_IMAGE_SIZES],
|
|
5703
|
+
supportedQualities: [...GPT_IMAGE_QUALITIES],
|
|
5704
|
+
maxImages: 1,
|
|
5705
|
+
defaultSize: "1024x1024",
|
|
5706
|
+
defaultQuality: "medium",
|
|
5707
|
+
features: {
|
|
5708
|
+
textRendering: true,
|
|
5709
|
+
transparency: true
|
|
5710
|
+
}
|
|
5711
|
+
},
|
|
5712
|
+
{
|
|
5713
|
+
provider: "openai",
|
|
5714
|
+
modelId: "gpt-image-1-mini",
|
|
5715
|
+
displayName: "GPT Image 1 Mini",
|
|
5716
|
+
pricing: {
|
|
5717
|
+
bySize: {
|
|
5718
|
+
"1024x1024": { low: 5e-3, medium: 0.02, high: 0.052 },
|
|
5719
|
+
"1024x1536": { low: 75e-4, medium: 0.03, high: 0.078 },
|
|
5720
|
+
"1536x1024": { low: 75e-4, medium: 0.03, high: 0.078 }
|
|
5721
|
+
}
|
|
5722
|
+
},
|
|
5723
|
+
supportedSizes: [...GPT_IMAGE_SIZES],
|
|
5724
|
+
supportedQualities: [...GPT_IMAGE_QUALITIES],
|
|
5725
|
+
maxImages: 1,
|
|
5726
|
+
defaultSize: "1024x1024",
|
|
5727
|
+
defaultQuality: "medium",
|
|
5728
|
+
features: {
|
|
5729
|
+
textRendering: true,
|
|
5730
|
+
transparency: true
|
|
5731
|
+
}
|
|
5732
|
+
},
|
|
5733
|
+
// DALL-E Family
|
|
5734
|
+
{
|
|
5735
|
+
provider: "openai",
|
|
5736
|
+
modelId: "dall-e-3",
|
|
5737
|
+
displayName: "DALL-E 3",
|
|
5738
|
+
pricing: {
|
|
5739
|
+
bySize: {
|
|
5740
|
+
"1024x1024": { standard: 0.04, hd: 0.08 },
|
|
5741
|
+
"1024x1792": { standard: 0.08, hd: 0.12 },
|
|
5742
|
+
"1792x1024": { standard: 0.08, hd: 0.12 }
|
|
5743
|
+
}
|
|
5744
|
+
},
|
|
5745
|
+
supportedSizes: [...DALLE3_SIZES],
|
|
5746
|
+
supportedQualities: [...DALLE3_QUALITIES],
|
|
5747
|
+
maxImages: 1,
|
|
5748
|
+
// DALL-E 3 only supports n=1
|
|
5749
|
+
defaultSize: "1024x1024",
|
|
5750
|
+
defaultQuality: "standard",
|
|
5751
|
+
features: {
|
|
5752
|
+
textRendering: true
|
|
5753
|
+
}
|
|
5754
|
+
},
|
|
5755
|
+
{
|
|
5756
|
+
provider: "openai",
|
|
5757
|
+
modelId: "dall-e-2",
|
|
5758
|
+
displayName: "DALL-E 2 (Legacy)",
|
|
5759
|
+
pricing: {
|
|
5760
|
+
bySize: {
|
|
5761
|
+
"256x256": 0.016,
|
|
5762
|
+
"512x512": 0.018,
|
|
5763
|
+
"1024x1024": 0.02
|
|
5764
|
+
}
|
|
5765
|
+
},
|
|
5766
|
+
supportedSizes: [...DALLE2_SIZES],
|
|
5767
|
+
maxImages: 10,
|
|
5768
|
+
defaultSize: "1024x1024"
|
|
5769
|
+
}
|
|
5770
|
+
];
|
|
5771
|
+
}
|
|
5772
|
+
});
|
|
5773
|
+
|
|
5142
5774
|
// src/providers/openai-models.ts
|
|
5143
5775
|
var OPENAI_MODELS;
|
|
5144
5776
|
var init_openai_models = __esm({
|
|
@@ -5503,6 +6135,144 @@ var init_openai_models = __esm({
|
|
|
5503
6135
|
}
|
|
5504
6136
|
});
|
|
5505
6137
|
|
|
6138
|
+
// src/providers/openai-speech-models.ts
|
|
6139
|
+
function getOpenAISpeechModelSpec(modelId) {
|
|
6140
|
+
return openaiSpeechModels.find((m) => m.modelId === modelId);
|
|
6141
|
+
}
|
|
6142
|
+
function isOpenAISpeechModel(modelId) {
|
|
6143
|
+
return openaiSpeechModels.some((m) => m.modelId === modelId);
|
|
6144
|
+
}
|
|
6145
|
+
function calculateOpenAISpeechCost(modelId, characterCount, estimatedMinutes) {
|
|
6146
|
+
const spec = getOpenAISpeechModelSpec(modelId);
|
|
6147
|
+
if (!spec) return void 0;
|
|
6148
|
+
if (spec.pricing.perCharacter !== void 0) {
|
|
6149
|
+
return characterCount * spec.pricing.perCharacter;
|
|
6150
|
+
}
|
|
6151
|
+
if (spec.pricing.perMinute !== void 0 && estimatedMinutes !== void 0) {
|
|
6152
|
+
return estimatedMinutes * spec.pricing.perMinute;
|
|
6153
|
+
}
|
|
6154
|
+
if (spec.pricing.perMinute !== void 0) {
|
|
6155
|
+
const approxMinutes = characterCount / 750;
|
|
6156
|
+
return approxMinutes * spec.pricing.perMinute;
|
|
6157
|
+
}
|
|
6158
|
+
return void 0;
|
|
6159
|
+
}
|
|
6160
|
+
var OPENAI_TTS_VOICES, OPENAI_TTS_EXTENDED_VOICES, OPENAI_TTS_FORMATS, openaiSpeechModels;
|
|
6161
|
+
var init_openai_speech_models = __esm({
|
|
6162
|
+
"src/providers/openai-speech-models.ts"() {
|
|
6163
|
+
"use strict";
|
|
6164
|
+
OPENAI_TTS_VOICES = [
|
|
6165
|
+
"alloy",
|
|
6166
|
+
"echo",
|
|
6167
|
+
"fable",
|
|
6168
|
+
"onyx",
|
|
6169
|
+
"nova",
|
|
6170
|
+
"shimmer"
|
|
6171
|
+
];
|
|
6172
|
+
OPENAI_TTS_EXTENDED_VOICES = [
|
|
6173
|
+
...OPENAI_TTS_VOICES,
|
|
6174
|
+
"ash",
|
|
6175
|
+
"ballad",
|
|
6176
|
+
"coral",
|
|
6177
|
+
"sage",
|
|
6178
|
+
"verse"
|
|
6179
|
+
];
|
|
6180
|
+
OPENAI_TTS_FORMATS = ["mp3", "opus", "aac", "flac", "wav", "pcm"];
|
|
6181
|
+
openaiSpeechModels = [
|
|
6182
|
+
// Standard TTS models (character-based pricing)
|
|
6183
|
+
{
|
|
6184
|
+
provider: "openai",
|
|
6185
|
+
modelId: "tts-1",
|
|
6186
|
+
displayName: "TTS-1",
|
|
6187
|
+
pricing: {
|
|
6188
|
+
// $15 per 1M characters = $0.000015 per character
|
|
6189
|
+
perCharacter: 15e-6
|
|
6190
|
+
},
|
|
6191
|
+
voices: [...OPENAI_TTS_VOICES],
|
|
6192
|
+
formats: OPENAI_TTS_FORMATS,
|
|
6193
|
+
maxInputLength: 4096,
|
|
6194
|
+
defaultVoice: "alloy",
|
|
6195
|
+
defaultFormat: "mp3",
|
|
6196
|
+
features: {
|
|
6197
|
+
voiceInstructions: false
|
|
6198
|
+
}
|
|
6199
|
+
},
|
|
6200
|
+
{
|
|
6201
|
+
provider: "openai",
|
|
6202
|
+
modelId: "tts-1-1106",
|
|
6203
|
+
displayName: "TTS-1 (Nov 2023)",
|
|
6204
|
+
pricing: {
|
|
6205
|
+
perCharacter: 15e-6
|
|
6206
|
+
},
|
|
6207
|
+
voices: [...OPENAI_TTS_VOICES],
|
|
6208
|
+
formats: OPENAI_TTS_FORMATS,
|
|
6209
|
+
maxInputLength: 4096,
|
|
6210
|
+
defaultVoice: "alloy",
|
|
6211
|
+
defaultFormat: "mp3",
|
|
6212
|
+
features: {
|
|
6213
|
+
voiceInstructions: false
|
|
6214
|
+
}
|
|
6215
|
+
},
|
|
6216
|
+
{
|
|
6217
|
+
provider: "openai",
|
|
6218
|
+
modelId: "tts-1-hd",
|
|
6219
|
+
displayName: "TTS-1 HD",
|
|
6220
|
+
pricing: {
|
|
6221
|
+
// $30 per 1M characters = $0.00003 per character
|
|
6222
|
+
perCharacter: 3e-5
|
|
6223
|
+
},
|
|
6224
|
+
voices: [...OPENAI_TTS_VOICES],
|
|
6225
|
+
formats: OPENAI_TTS_FORMATS,
|
|
6226
|
+
maxInputLength: 4096,
|
|
6227
|
+
defaultVoice: "alloy",
|
|
6228
|
+
defaultFormat: "mp3",
|
|
6229
|
+
features: {
|
|
6230
|
+
voiceInstructions: false
|
|
6231
|
+
}
|
|
6232
|
+
},
|
|
6233
|
+
{
|
|
6234
|
+
provider: "openai",
|
|
6235
|
+
modelId: "tts-1-hd-1106",
|
|
6236
|
+
displayName: "TTS-1 HD (Nov 2023)",
|
|
6237
|
+
pricing: {
|
|
6238
|
+
perCharacter: 3e-5
|
|
6239
|
+
},
|
|
6240
|
+
voices: [...OPENAI_TTS_VOICES],
|
|
6241
|
+
formats: OPENAI_TTS_FORMATS,
|
|
6242
|
+
maxInputLength: 4096,
|
|
6243
|
+
defaultVoice: "alloy",
|
|
6244
|
+
defaultFormat: "mp3",
|
|
6245
|
+
features: {
|
|
6246
|
+
voiceInstructions: false
|
|
6247
|
+
}
|
|
6248
|
+
},
|
|
6249
|
+
// Token-based TTS model with voice instructions support
|
|
6250
|
+
{
|
|
6251
|
+
provider: "openai",
|
|
6252
|
+
modelId: "gpt-4o-mini-tts",
|
|
6253
|
+
displayName: "GPT-4o Mini TTS",
|
|
6254
|
+
pricing: {
|
|
6255
|
+
// $0.60 per 1M input tokens = $0.0000006 per token
|
|
6256
|
+
perInputToken: 6e-7,
|
|
6257
|
+
// $12 per 1M audio output tokens = $0.000012 per token
|
|
6258
|
+
perAudioOutputToken: 12e-6,
|
|
6259
|
+
// ~$0.015 per minute of audio
|
|
6260
|
+
perMinute: 0.015
|
|
6261
|
+
},
|
|
6262
|
+
voices: [...OPENAI_TTS_EXTENDED_VOICES],
|
|
6263
|
+
formats: OPENAI_TTS_FORMATS,
|
|
6264
|
+
maxInputLength: 2e3,
|
|
6265
|
+
// tokens, not characters
|
|
6266
|
+
defaultVoice: "alloy",
|
|
6267
|
+
defaultFormat: "mp3",
|
|
6268
|
+
features: {
|
|
6269
|
+
voiceInstructions: true
|
|
6270
|
+
}
|
|
6271
|
+
}
|
|
6272
|
+
];
|
|
6273
|
+
}
|
|
6274
|
+
});
|
|
6275
|
+
|
|
5506
6276
|
// src/providers/openai.ts
|
|
5507
6277
|
function sanitizeExtra(extra, allowTemperature) {
|
|
5508
6278
|
if (!extra) {
|
|
@@ -5524,7 +6294,9 @@ var init_openai = __esm({
|
|
|
5524
6294
|
import_tiktoken = require("tiktoken");
|
|
5525
6295
|
init_base_provider();
|
|
5526
6296
|
init_constants2();
|
|
6297
|
+
init_openai_image_models();
|
|
5527
6298
|
init_openai_models();
|
|
6299
|
+
init_openai_speech_models();
|
|
5528
6300
|
init_utils();
|
|
5529
6301
|
ROLE_MAP = {
|
|
5530
6302
|
system: "system",
|
|
@@ -5539,6 +6311,87 @@ var init_openai = __esm({
|
|
|
5539
6311
|
getModelSpecs() {
|
|
5540
6312
|
return OPENAI_MODELS;
|
|
5541
6313
|
}
|
|
6314
|
+
// =========================================================================
|
|
6315
|
+
// Image Generation
|
|
6316
|
+
// =========================================================================
|
|
6317
|
+
getImageModelSpecs() {
|
|
6318
|
+
return openaiImageModels;
|
|
6319
|
+
}
|
|
6320
|
+
supportsImageGeneration(modelId) {
|
|
6321
|
+
return isOpenAIImageModel(modelId);
|
|
6322
|
+
}
|
|
6323
|
+
async generateImage(options) {
|
|
6324
|
+
const client = this.client;
|
|
6325
|
+
const spec = getOpenAIImageModelSpec(options.model);
|
|
6326
|
+
const size = options.size ?? spec?.defaultSize ?? "1024x1024";
|
|
6327
|
+
const quality = options.quality ?? spec?.defaultQuality ?? "standard";
|
|
6328
|
+
const n = options.n ?? 1;
|
|
6329
|
+
const isDallE2 = options.model === "dall-e-2";
|
|
6330
|
+
const isGptImage = options.model.startsWith("gpt-image");
|
|
6331
|
+
const requestParams = {
|
|
6332
|
+
model: options.model,
|
|
6333
|
+
prompt: options.prompt,
|
|
6334
|
+
size,
|
|
6335
|
+
n
|
|
6336
|
+
};
|
|
6337
|
+
if (!isDallE2 && !isGptImage) {
|
|
6338
|
+
requestParams.quality = quality;
|
|
6339
|
+
}
|
|
6340
|
+
if (isGptImage) {
|
|
6341
|
+
} else if (!isDallE2) {
|
|
6342
|
+
requestParams.response_format = options.responseFormat ?? "url";
|
|
6343
|
+
}
|
|
6344
|
+
const response = await client.images.generate(requestParams);
|
|
6345
|
+
const cost = calculateOpenAIImageCost(options.model, size, quality, n);
|
|
6346
|
+
const images = response.data ?? [];
|
|
6347
|
+
return {
|
|
6348
|
+
images: images.map((img) => ({
|
|
6349
|
+
url: img.url,
|
|
6350
|
+
b64Json: img.b64_json,
|
|
6351
|
+
revisedPrompt: img.revised_prompt
|
|
6352
|
+
})),
|
|
6353
|
+
model: options.model,
|
|
6354
|
+
usage: {
|
|
6355
|
+
imagesGenerated: images.length,
|
|
6356
|
+
size,
|
|
6357
|
+
quality
|
|
6358
|
+
},
|
|
6359
|
+
cost
|
|
6360
|
+
};
|
|
6361
|
+
}
|
|
6362
|
+
// =========================================================================
|
|
6363
|
+
// Speech Generation
|
|
6364
|
+
// =========================================================================
|
|
6365
|
+
getSpeechModelSpecs() {
|
|
6366
|
+
return openaiSpeechModels;
|
|
6367
|
+
}
|
|
6368
|
+
supportsSpeechGeneration(modelId) {
|
|
6369
|
+
return isOpenAISpeechModel(modelId);
|
|
6370
|
+
}
|
|
6371
|
+
async generateSpeech(options) {
|
|
6372
|
+
const client = this.client;
|
|
6373
|
+
const spec = getOpenAISpeechModelSpec(options.model);
|
|
6374
|
+
const format = options.responseFormat ?? spec?.defaultFormat ?? "mp3";
|
|
6375
|
+
const voice = options.voice ?? spec?.defaultVoice ?? "alloy";
|
|
6376
|
+
const response = await client.audio.speech.create({
|
|
6377
|
+
model: options.model,
|
|
6378
|
+
input: options.input,
|
|
6379
|
+
voice,
|
|
6380
|
+
response_format: format,
|
|
6381
|
+
speed: options.speed ?? 1
|
|
6382
|
+
});
|
|
6383
|
+
const audioBuffer = await response.arrayBuffer();
|
|
6384
|
+
const cost = calculateOpenAISpeechCost(options.model, options.input.length);
|
|
6385
|
+
return {
|
|
6386
|
+
audio: audioBuffer,
|
|
6387
|
+
model: options.model,
|
|
6388
|
+
usage: {
|
|
6389
|
+
characterCount: options.input.length
|
|
6390
|
+
},
|
|
6391
|
+
cost,
|
|
6392
|
+
format
|
|
6393
|
+
};
|
|
6394
|
+
}
|
|
5542
6395
|
buildRequestPayload(options, descriptor, spec, messages) {
|
|
5543
6396
|
const { maxTokens, temperature, topP, stopSequences, extra } = options;
|
|
5544
6397
|
const supportsTemperature = spec?.metadata?.supportsTemperature !== false;
|
|
@@ -5879,30 +6732,109 @@ var init_model_registry = __esm({
|
|
|
5879
6732
|
}
|
|
5880
6733
|
});
|
|
5881
6734
|
|
|
5882
|
-
// src/core/
|
|
5883
|
-
var
|
|
5884
|
-
var
|
|
5885
|
-
"src/core/
|
|
6735
|
+
// src/core/namespaces/image.ts
|
|
6736
|
+
var ImageNamespace;
|
|
6737
|
+
var init_image = __esm({
|
|
6738
|
+
"src/core/namespaces/image.ts"() {
|
|
5886
6739
|
"use strict";
|
|
5887
|
-
|
|
5888
|
-
constructor(defaultProvider
|
|
6740
|
+
ImageNamespace = class {
|
|
6741
|
+
constructor(adapters, defaultProvider) {
|
|
6742
|
+
this.adapters = adapters;
|
|
6743
|
+
this.defaultProvider = defaultProvider;
|
|
6744
|
+
}
|
|
6745
|
+
/**
|
|
6746
|
+
* Generate images from a text prompt.
|
|
6747
|
+
*
|
|
6748
|
+
* @param options - Image generation options
|
|
6749
|
+
* @returns Promise resolving to the generation result with images and cost
|
|
6750
|
+
* @throws Error if the provider doesn't support image generation
|
|
6751
|
+
*/
|
|
6752
|
+
async generate(options) {
|
|
6753
|
+
const modelId = options.model;
|
|
6754
|
+
const adapter = this.findImageAdapter(modelId);
|
|
6755
|
+
if (!adapter || !adapter.generateImage) {
|
|
6756
|
+
throw new Error(
|
|
6757
|
+
`No provider supports image generation for model "${modelId}". Available image models: ${this.listModels().map((m) => m.modelId).join(", ")}`
|
|
6758
|
+
);
|
|
6759
|
+
}
|
|
6760
|
+
return adapter.generateImage(options);
|
|
6761
|
+
}
|
|
6762
|
+
/**
|
|
6763
|
+
* List all available image generation models.
|
|
6764
|
+
*/
|
|
6765
|
+
listModels() {
|
|
6766
|
+
const models = [];
|
|
6767
|
+
for (const adapter of this.adapters) {
|
|
6768
|
+
if (adapter.getImageModelSpecs) {
|
|
6769
|
+
models.push(...adapter.getImageModelSpecs());
|
|
6770
|
+
}
|
|
6771
|
+
}
|
|
6772
|
+
return models;
|
|
6773
|
+
}
|
|
6774
|
+
/**
|
|
6775
|
+
* Check if a model is supported for image generation.
|
|
6776
|
+
*/
|
|
6777
|
+
supportsModel(modelId) {
|
|
6778
|
+
return this.findImageAdapter(modelId) !== void 0;
|
|
6779
|
+
}
|
|
6780
|
+
findImageAdapter(modelId) {
|
|
6781
|
+
return this.adapters.find(
|
|
6782
|
+
(adapter) => adapter.supportsImageGeneration?.(modelId) ?? false
|
|
6783
|
+
);
|
|
6784
|
+
}
|
|
6785
|
+
};
|
|
6786
|
+
}
|
|
6787
|
+
});
|
|
6788
|
+
|
|
6789
|
+
// src/core/namespaces/speech.ts
|
|
6790
|
+
var SpeechNamespace;
|
|
6791
|
+
var init_speech = __esm({
|
|
6792
|
+
"src/core/namespaces/speech.ts"() {
|
|
6793
|
+
"use strict";
|
|
6794
|
+
SpeechNamespace = class {
|
|
6795
|
+
constructor(adapters, defaultProvider) {
|
|
6796
|
+
this.adapters = adapters;
|
|
5889
6797
|
this.defaultProvider = defaultProvider;
|
|
5890
6798
|
}
|
|
5891
|
-
|
|
5892
|
-
|
|
5893
|
-
|
|
5894
|
-
|
|
5895
|
-
|
|
5896
|
-
|
|
5897
|
-
|
|
5898
|
-
|
|
6799
|
+
/**
|
|
6800
|
+
* Generate speech audio from text.
|
|
6801
|
+
*
|
|
6802
|
+
* @param options - Speech generation options
|
|
6803
|
+
* @returns Promise resolving to the generation result with audio and cost
|
|
6804
|
+
* @throws Error if the provider doesn't support speech generation
|
|
6805
|
+
*/
|
|
6806
|
+
async generate(options) {
|
|
6807
|
+
const modelId = options.model;
|
|
6808
|
+
const adapter = this.findSpeechAdapter(modelId);
|
|
6809
|
+
if (!adapter || !adapter.generateSpeech) {
|
|
6810
|
+
throw new Error(
|
|
6811
|
+
`No provider supports speech generation for model "${modelId}". Available speech models: ${this.listModels().map((m) => m.modelId).join(", ")}`
|
|
6812
|
+
);
|
|
5899
6813
|
}
|
|
5900
|
-
|
|
5901
|
-
|
|
5902
|
-
|
|
5903
|
-
|
|
6814
|
+
return adapter.generateSpeech(options);
|
|
6815
|
+
}
|
|
6816
|
+
/**
|
|
6817
|
+
* List all available speech generation models.
|
|
6818
|
+
*/
|
|
6819
|
+
listModels() {
|
|
6820
|
+
const models = [];
|
|
6821
|
+
for (const adapter of this.adapters) {
|
|
6822
|
+
if (adapter.getSpeechModelSpecs) {
|
|
6823
|
+
models.push(...adapter.getSpeechModelSpecs());
|
|
6824
|
+
}
|
|
5904
6825
|
}
|
|
5905
|
-
return
|
|
6826
|
+
return models;
|
|
6827
|
+
}
|
|
6828
|
+
/**
|
|
6829
|
+
* Check if a model is supported for speech generation.
|
|
6830
|
+
*/
|
|
6831
|
+
supportsModel(modelId) {
|
|
6832
|
+
return this.findSpeechAdapter(modelId) !== void 0;
|
|
6833
|
+
}
|
|
6834
|
+
findSpeechAdapter(modelId) {
|
|
6835
|
+
return this.adapters.find(
|
|
6836
|
+
(adapter) => adapter.supportsSpeechGeneration?.(modelId) ?? false
|
|
6837
|
+
);
|
|
5906
6838
|
}
|
|
5907
6839
|
};
|
|
5908
6840
|
}
|
|
@@ -5951,6 +6883,69 @@ var init_quick_methods = __esm({
|
|
|
5951
6883
|
}
|
|
5952
6884
|
});
|
|
5953
6885
|
|
|
6886
|
+
// src/core/namespaces/text.ts
|
|
6887
|
+
var TextNamespace;
|
|
6888
|
+
var init_text = __esm({
|
|
6889
|
+
"src/core/namespaces/text.ts"() {
|
|
6890
|
+
"use strict";
|
|
6891
|
+
init_quick_methods();
|
|
6892
|
+
TextNamespace = class {
|
|
6893
|
+
constructor(client) {
|
|
6894
|
+
this.client = client;
|
|
6895
|
+
}
|
|
6896
|
+
/**
|
|
6897
|
+
* Generate a complete text response.
|
|
6898
|
+
*
|
|
6899
|
+
* @param prompt - User prompt
|
|
6900
|
+
* @param options - Optional configuration
|
|
6901
|
+
* @returns Complete text response
|
|
6902
|
+
*/
|
|
6903
|
+
async complete(prompt, options) {
|
|
6904
|
+
return complete(this.client, prompt, options);
|
|
6905
|
+
}
|
|
6906
|
+
/**
|
|
6907
|
+
* Stream text chunks.
|
|
6908
|
+
*
|
|
6909
|
+
* @param prompt - User prompt
|
|
6910
|
+
* @param options - Optional configuration
|
|
6911
|
+
* @returns Async generator yielding text chunks
|
|
6912
|
+
*/
|
|
6913
|
+
stream(prompt, options) {
|
|
6914
|
+
return stream(this.client, prompt, options);
|
|
6915
|
+
}
|
|
6916
|
+
};
|
|
6917
|
+
}
|
|
6918
|
+
});
|
|
6919
|
+
|
|
6920
|
+
// src/core/options.ts
|
|
6921
|
+
var ModelIdentifierParser;
|
|
6922
|
+
var init_options = __esm({
|
|
6923
|
+
"src/core/options.ts"() {
|
|
6924
|
+
"use strict";
|
|
6925
|
+
ModelIdentifierParser = class {
|
|
6926
|
+
constructor(defaultProvider = "openai") {
|
|
6927
|
+
this.defaultProvider = defaultProvider;
|
|
6928
|
+
}
|
|
6929
|
+
parse(identifier) {
|
|
6930
|
+
const trimmed = identifier.trim();
|
|
6931
|
+
if (!trimmed) {
|
|
6932
|
+
throw new Error("Model identifier cannot be empty");
|
|
6933
|
+
}
|
|
6934
|
+
const [maybeProvider, ...rest] = trimmed.split(":");
|
|
6935
|
+
if (rest.length === 0) {
|
|
6936
|
+
return { provider: this.defaultProvider, name: maybeProvider };
|
|
6937
|
+
}
|
|
6938
|
+
const provider = maybeProvider;
|
|
6939
|
+
const name = rest.join(":");
|
|
6940
|
+
if (!name) {
|
|
6941
|
+
throw new Error("Model name cannot be empty");
|
|
6942
|
+
}
|
|
6943
|
+
return { provider, name };
|
|
6944
|
+
}
|
|
6945
|
+
};
|
|
6946
|
+
}
|
|
6947
|
+
});
|
|
6948
|
+
|
|
5954
6949
|
// src/core/client.ts
|
|
5955
6950
|
var client_exports = {};
|
|
5956
6951
|
__export(client_exports, {
|
|
@@ -5963,12 +6958,20 @@ var init_client = __esm({
|
|
|
5963
6958
|
init_builder();
|
|
5964
6959
|
init_discovery();
|
|
5965
6960
|
init_model_registry();
|
|
6961
|
+
init_image();
|
|
6962
|
+
init_speech();
|
|
6963
|
+
init_text();
|
|
5966
6964
|
init_options();
|
|
5967
6965
|
init_quick_methods();
|
|
5968
6966
|
LLMist = class _LLMist {
|
|
5969
6967
|
parser;
|
|
6968
|
+
defaultProvider;
|
|
5970
6969
|
modelRegistry;
|
|
5971
6970
|
adapters;
|
|
6971
|
+
// Namespaces for different generation types
|
|
6972
|
+
text;
|
|
6973
|
+
image;
|
|
6974
|
+
speech;
|
|
5972
6975
|
constructor(...args) {
|
|
5973
6976
|
let adapters = [];
|
|
5974
6977
|
let defaultProvider;
|
|
@@ -6007,6 +7010,7 @@ var init_client = __esm({
|
|
|
6007
7010
|
const priorityB = b.priority ?? 0;
|
|
6008
7011
|
return priorityB - priorityA;
|
|
6009
7012
|
});
|
|
7013
|
+
this.defaultProvider = resolvedDefaultProvider;
|
|
6010
7014
|
this.parser = new ModelIdentifierParser(resolvedDefaultProvider);
|
|
6011
7015
|
this.modelRegistry = new ModelRegistry();
|
|
6012
7016
|
for (const adapter of this.adapters) {
|
|
@@ -6015,6 +7019,9 @@ var init_client = __esm({
|
|
|
6015
7019
|
if (customModels.length > 0) {
|
|
6016
7020
|
this.modelRegistry.registerModels(customModels);
|
|
6017
7021
|
}
|
|
7022
|
+
this.text = new TextNamespace(this);
|
|
7023
|
+
this.image = new ImageNamespace(this.adapters, this.defaultProvider);
|
|
7024
|
+
this.speech = new SpeechNamespace(this.adapters, this.defaultProvider);
|
|
6018
7025
|
}
|
|
6019
7026
|
stream(options) {
|
|
6020
7027
|
const descriptor = this.parser.parse(options.model);
|
|
@@ -6995,7 +8002,9 @@ var COMMANDS = {
|
|
|
6995
8002
|
complete: "complete",
|
|
6996
8003
|
agent: "agent",
|
|
6997
8004
|
models: "models",
|
|
6998
|
-
gadget: "gadget"
|
|
8005
|
+
gadget: "gadget",
|
|
8006
|
+
image: "image",
|
|
8007
|
+
speech: "speech"
|
|
6999
8008
|
};
|
|
7000
8009
|
var LOG_LEVELS = ["silly", "trace", "debug", "info", "warn", "error", "fatal"];
|
|
7001
8010
|
var DEFAULT_MODEL = "openai:gpt-5-nano";
|
|
@@ -7016,7 +8025,17 @@ var OPTION_FLAGS = {
|
|
|
7016
8025
|
docker: "--docker",
|
|
7017
8026
|
dockerRo: "--docker-ro",
|
|
7018
8027
|
noDocker: "--no-docker",
|
|
7019
|
-
dockerDev: "--docker-dev"
|
|
8028
|
+
dockerDev: "--docker-dev",
|
|
8029
|
+
// Image generation options
|
|
8030
|
+
imageSize: "--size <size>",
|
|
8031
|
+
imageQuality: "--quality <quality>",
|
|
8032
|
+
imageCount: "-n, --count <number>",
|
|
8033
|
+
imageOutput: "-o, --output <path>",
|
|
8034
|
+
// Speech generation options
|
|
8035
|
+
voice: "--voice <name>",
|
|
8036
|
+
speechFormat: "--format <format>",
|
|
8037
|
+
speechSpeed: "--speed <value>",
|
|
8038
|
+
speechOutput: "-o, --output <path>"
|
|
7020
8039
|
};
|
|
7021
8040
|
var OPTION_DESCRIPTIONS = {
|
|
7022
8041
|
model: "Model identifier, e.g. openai:gpt-5-nano or anthropic:claude-sonnet-4-5.",
|
|
@@ -7035,7 +8054,17 @@ var OPTION_DESCRIPTIONS = {
|
|
|
7035
8054
|
docker: "Run agent in a Docker sandbox container for security isolation.",
|
|
7036
8055
|
dockerRo: "Run in Docker with current directory mounted read-only.",
|
|
7037
8056
|
noDocker: "Disable Docker sandboxing (override config).",
|
|
7038
|
-
dockerDev: "Run in Docker dev mode (mount local source instead of npm install)."
|
|
8057
|
+
dockerDev: "Run in Docker dev mode (mount local source instead of npm install).",
|
|
8058
|
+
// Image generation descriptions
|
|
8059
|
+
imageSize: "Image size/aspect ratio, e.g. '1024x1024', '1:1', '16:9'.",
|
|
8060
|
+
imageQuality: "Image quality: 'standard', 'hd', 'low', 'medium', 'high'.",
|
|
8061
|
+
imageCount: "Number of images to generate (model dependent, usually 1-4).",
|
|
8062
|
+
imageOutput: "Output path for the generated image. Defaults to stdout if not specified.",
|
|
8063
|
+
// Speech generation descriptions
|
|
8064
|
+
voice: "Voice name for speech generation, e.g. 'nova', 'alloy', 'Zephyr'.",
|
|
8065
|
+
speechFormat: "Audio format: 'mp3', 'opus', 'aac', 'flac', 'wav', 'pcm'.",
|
|
8066
|
+
speechSpeed: "Speech speed multiplier (0.25 to 4.0, default 1.0).",
|
|
8067
|
+
speechOutput: "Output path for audio file. Defaults to stdout if not specified."
|
|
7039
8068
|
};
|
|
7040
8069
|
var SUMMARY_PREFIX = "[llmist]";
|
|
7041
8070
|
|
|
@@ -7045,7 +8074,7 @@ var import_commander2 = require("commander");
|
|
|
7045
8074
|
// package.json
|
|
7046
8075
|
var package_default = {
|
|
7047
8076
|
name: "llmist",
|
|
7048
|
-
version: "2.
|
|
8077
|
+
version: "2.4.0",
|
|
7049
8078
|
description: "TypeScript LLM client with streaming tool execution. Tools fire mid-stream. Built-in function calling works with any model\u2014no structured outputs or native tool support required.",
|
|
7050
8079
|
type: "module",
|
|
7051
8080
|
main: "dist/index.cjs",
|
|
@@ -9127,6 +10156,22 @@ var AGENT_CONFIG_KEYS = /* @__PURE__ */ new Set([
|
|
|
9127
10156
|
"docker-cwd-permission"
|
|
9128
10157
|
// Override CWD mount permission for this profile
|
|
9129
10158
|
]);
|
|
10159
|
+
var IMAGE_CONFIG_KEYS = /* @__PURE__ */ new Set([
|
|
10160
|
+
"model",
|
|
10161
|
+
"size",
|
|
10162
|
+
"quality",
|
|
10163
|
+
"count",
|
|
10164
|
+
"output",
|
|
10165
|
+
"quiet"
|
|
10166
|
+
]);
|
|
10167
|
+
var SPEECH_CONFIG_KEYS = /* @__PURE__ */ new Set([
|
|
10168
|
+
"model",
|
|
10169
|
+
"voice",
|
|
10170
|
+
"format",
|
|
10171
|
+
"speed",
|
|
10172
|
+
"output",
|
|
10173
|
+
"quiet"
|
|
10174
|
+
]);
|
|
9130
10175
|
var CUSTOM_CONFIG_KEYS = /* @__PURE__ */ new Set([
|
|
9131
10176
|
...COMPLETE_CONFIG_KEYS,
|
|
9132
10177
|
...AGENT_CONFIG_KEYS,
|
|
@@ -9387,6 +10432,75 @@ function validateAgentConfig(raw, section) {
|
|
|
9387
10432
|
}
|
|
9388
10433
|
return result;
|
|
9389
10434
|
}
|
|
10435
|
+
function validateImageConfig(raw, section) {
|
|
10436
|
+
if (typeof raw !== "object" || raw === null) {
|
|
10437
|
+
throw new ConfigError(`[${section}] must be a table`);
|
|
10438
|
+
}
|
|
10439
|
+
const rawObj = raw;
|
|
10440
|
+
for (const key of Object.keys(rawObj)) {
|
|
10441
|
+
if (!IMAGE_CONFIG_KEYS.has(key)) {
|
|
10442
|
+
throw new ConfigError(`[${section}].${key} is not a valid option`);
|
|
10443
|
+
}
|
|
10444
|
+
}
|
|
10445
|
+
const result = {};
|
|
10446
|
+
if ("model" in rawObj) {
|
|
10447
|
+
result.model = validateString(rawObj.model, "model", section);
|
|
10448
|
+
}
|
|
10449
|
+
if ("size" in rawObj) {
|
|
10450
|
+
result.size = validateString(rawObj.size, "size", section);
|
|
10451
|
+
}
|
|
10452
|
+
if ("quality" in rawObj) {
|
|
10453
|
+
result.quality = validateString(rawObj.quality, "quality", section);
|
|
10454
|
+
}
|
|
10455
|
+
if ("count" in rawObj) {
|
|
10456
|
+
result.count = validateNumber(rawObj.count, "count", section, {
|
|
10457
|
+
integer: true,
|
|
10458
|
+
min: 1,
|
|
10459
|
+
max: 10
|
|
10460
|
+
});
|
|
10461
|
+
}
|
|
10462
|
+
if ("output" in rawObj) {
|
|
10463
|
+
result.output = validateString(rawObj.output, "output", section);
|
|
10464
|
+
}
|
|
10465
|
+
if ("quiet" in rawObj) {
|
|
10466
|
+
result.quiet = validateBoolean(rawObj.quiet, "quiet", section);
|
|
10467
|
+
}
|
|
10468
|
+
return result;
|
|
10469
|
+
}
|
|
10470
|
+
function validateSpeechConfig(raw, section) {
|
|
10471
|
+
if (typeof raw !== "object" || raw === null) {
|
|
10472
|
+
throw new ConfigError(`[${section}] must be a table`);
|
|
10473
|
+
}
|
|
10474
|
+
const rawObj = raw;
|
|
10475
|
+
for (const key of Object.keys(rawObj)) {
|
|
10476
|
+
if (!SPEECH_CONFIG_KEYS.has(key)) {
|
|
10477
|
+
throw new ConfigError(`[${section}].${key} is not a valid option`);
|
|
10478
|
+
}
|
|
10479
|
+
}
|
|
10480
|
+
const result = {};
|
|
10481
|
+
if ("model" in rawObj) {
|
|
10482
|
+
result.model = validateString(rawObj.model, "model", section);
|
|
10483
|
+
}
|
|
10484
|
+
if ("voice" in rawObj) {
|
|
10485
|
+
result.voice = validateString(rawObj.voice, "voice", section);
|
|
10486
|
+
}
|
|
10487
|
+
if ("format" in rawObj) {
|
|
10488
|
+
result.format = validateString(rawObj.format, "format", section);
|
|
10489
|
+
}
|
|
10490
|
+
if ("speed" in rawObj) {
|
|
10491
|
+
result.speed = validateNumber(rawObj.speed, "speed", section, {
|
|
10492
|
+
min: 0.25,
|
|
10493
|
+
max: 4
|
|
10494
|
+
});
|
|
10495
|
+
}
|
|
10496
|
+
if ("output" in rawObj) {
|
|
10497
|
+
result.output = validateString(rawObj.output, "output", section);
|
|
10498
|
+
}
|
|
10499
|
+
if ("quiet" in rawObj) {
|
|
10500
|
+
result.quiet = validateBoolean(rawObj.quiet, "quiet", section);
|
|
10501
|
+
}
|
|
10502
|
+
return result;
|
|
10503
|
+
}
|
|
9390
10504
|
function validateStringOrBoolean(value, field, section) {
|
|
9391
10505
|
if (typeof value === "string" || typeof value === "boolean") {
|
|
9392
10506
|
return value;
|
|
@@ -9509,6 +10623,10 @@ function validateConfig(raw, configPath) {
|
|
|
9509
10623
|
result.complete = validateCompleteConfig(value, key);
|
|
9510
10624
|
} else if (key === "agent") {
|
|
9511
10625
|
result.agent = validateAgentConfig(value, key);
|
|
10626
|
+
} else if (key === "image") {
|
|
10627
|
+
result.image = validateImageConfig(value, key);
|
|
10628
|
+
} else if (key === "speech") {
|
|
10629
|
+
result.speech = validateSpeechConfig(value, key);
|
|
9512
10630
|
} else if (key === "prompts") {
|
|
9513
10631
|
result.prompts = validatePromptsConfig(value, key);
|
|
9514
10632
|
} else if (key === "docker") {
|
|
@@ -9553,7 +10671,7 @@ function loadConfig() {
|
|
|
9553
10671
|
return resolveTemplatesInConfig(inherited, configPath);
|
|
9554
10672
|
}
|
|
9555
10673
|
function getCustomCommandNames(config) {
|
|
9556
|
-
const reserved = /* @__PURE__ */ new Set(["global", "complete", "agent", "prompts", "docker"]);
|
|
10674
|
+
const reserved = /* @__PURE__ */ new Set(["global", "complete", "agent", "image", "speech", "prompts", "docker"]);
|
|
9557
10675
|
return Object.keys(config).filter((key) => !reserved.has(key));
|
|
9558
10676
|
}
|
|
9559
10677
|
function resolveTemplatesInConfig(config, configPath) {
|
|
@@ -11148,19 +12266,118 @@ function registerGadgetCommand(program, env) {
|
|
|
11148
12266
|
);
|
|
11149
12267
|
}
|
|
11150
12268
|
|
|
12269
|
+
// src/cli/image-command.ts
|
|
12270
|
+
var import_node_fs11 = require("fs");
|
|
12271
|
+
var DEFAULT_IMAGE_MODEL = "dall-e-3";
|
|
12272
|
+
async function executeImage(promptArg, options, env) {
|
|
12273
|
+
const prompt = await resolvePrompt(promptArg, env);
|
|
12274
|
+
const client = env.createClient();
|
|
12275
|
+
const model = options.model;
|
|
12276
|
+
const n = options.count ? Number.parseInt(options.count, 10) : 1;
|
|
12277
|
+
const stderrTTY = env.stderr.isTTY === true;
|
|
12278
|
+
if (!options.quiet && stderrTTY) {
|
|
12279
|
+
env.stderr.write(`${SUMMARY_PREFIX} Generating image with ${model}...
|
|
12280
|
+
`);
|
|
12281
|
+
}
|
|
12282
|
+
const result = await client.image.generate({
|
|
12283
|
+
model,
|
|
12284
|
+
prompt,
|
|
12285
|
+
size: options.size,
|
|
12286
|
+
quality: options.quality,
|
|
12287
|
+
n,
|
|
12288
|
+
responseFormat: options.output ? "b64_json" : "url"
|
|
12289
|
+
});
|
|
12290
|
+
if (options.output) {
|
|
12291
|
+
const imageData = result.images[0];
|
|
12292
|
+
if (imageData.b64Json) {
|
|
12293
|
+
const buffer = Buffer.from(imageData.b64Json, "base64");
|
|
12294
|
+
(0, import_node_fs11.writeFileSync)(options.output, buffer);
|
|
12295
|
+
if (!options.quiet) {
|
|
12296
|
+
env.stderr.write(`${SUMMARY_PREFIX} Image saved to ${options.output}
|
|
12297
|
+
`);
|
|
12298
|
+
}
|
|
12299
|
+
} else if (imageData.url) {
|
|
12300
|
+
env.stdout.write(`${imageData.url}
|
|
12301
|
+
`);
|
|
12302
|
+
}
|
|
12303
|
+
} else {
|
|
12304
|
+
for (const image of result.images) {
|
|
12305
|
+
if (image.url) {
|
|
12306
|
+
env.stdout.write(`${image.url}
|
|
12307
|
+
`);
|
|
12308
|
+
} else if (image.b64Json) {
|
|
12309
|
+
env.stdout.write(image.b64Json);
|
|
12310
|
+
}
|
|
12311
|
+
}
|
|
12312
|
+
}
|
|
12313
|
+
if (!options.quiet && stderrTTY) {
|
|
12314
|
+
const parts = [
|
|
12315
|
+
`${result.images.length} image(s)`,
|
|
12316
|
+
`size: ${result.usage.size}`,
|
|
12317
|
+
`quality: ${result.usage.quality}`
|
|
12318
|
+
];
|
|
12319
|
+
if (result.cost !== void 0) {
|
|
12320
|
+
parts.push(`cost: ${formatCost(result.cost)}`);
|
|
12321
|
+
}
|
|
12322
|
+
env.stderr.write(`${SUMMARY_PREFIX} ${parts.join(" | ")}
|
|
12323
|
+
`);
|
|
12324
|
+
}
|
|
12325
|
+
}
|
|
12326
|
+
function registerImageCommand(program, env, config) {
|
|
12327
|
+
program.command(COMMANDS.image).description("Generate images from a text prompt.").argument("[prompt]", "Image generation prompt. If omitted, stdin is used when available.").option(
|
|
12328
|
+
OPTION_FLAGS.model,
|
|
12329
|
+
OPTION_DESCRIPTIONS.model,
|
|
12330
|
+
config?.model ?? DEFAULT_IMAGE_MODEL
|
|
12331
|
+
).option(OPTION_FLAGS.imageSize, OPTION_DESCRIPTIONS.imageSize, config?.size).option(OPTION_FLAGS.imageQuality, OPTION_DESCRIPTIONS.imageQuality, config?.quality).option(OPTION_FLAGS.imageCount, OPTION_DESCRIPTIONS.imageCount, config?.count?.toString()).option(OPTION_FLAGS.imageOutput, OPTION_DESCRIPTIONS.imageOutput, config?.output).option(OPTION_FLAGS.quiet, OPTION_DESCRIPTIONS.quiet, config?.quiet ?? false).action(
|
|
12332
|
+
(prompt, options) => executeAction(() => executeImage(prompt, options, env), env)
|
|
12333
|
+
);
|
|
12334
|
+
}
|
|
12335
|
+
|
|
11151
12336
|
// src/cli/models-command.ts
|
|
11152
12337
|
var import_chalk8 = __toESM(require("chalk"), 1);
|
|
11153
12338
|
init_model_shortcuts();
|
|
11154
12339
|
async function handleModelsCommand(options, env) {
|
|
11155
12340
|
const client = env.createClient();
|
|
11156
|
-
const
|
|
12341
|
+
const showText = options.all || options.text || !options.image && !options.speech;
|
|
12342
|
+
const showImage = options.all || options.image;
|
|
12343
|
+
const showSpeech = options.all || options.speech;
|
|
12344
|
+
const textModels = showText ? client.modelRegistry.listModels(options.provider) : [];
|
|
12345
|
+
const imageModels = showImage ? client.image.listModels().filter((m) => !options.provider || m.provider === options.provider) : [];
|
|
12346
|
+
const speechModels = showSpeech ? client.speech.listModels().filter((m) => !options.provider || m.provider === options.provider) : [];
|
|
11157
12347
|
if (options.format === "json") {
|
|
11158
|
-
renderJSON(
|
|
12348
|
+
renderJSON(textModels, imageModels, speechModels, env.stdout);
|
|
11159
12349
|
} else {
|
|
11160
|
-
|
|
12350
|
+
renderAllTables(textModels, imageModels, speechModels, options.verbose || false, env.stdout);
|
|
12351
|
+
}
|
|
12352
|
+
}
|
|
12353
|
+
function renderAllTables(textModels, imageModels, speechModels, verbose, stream2) {
|
|
12354
|
+
const hasAnyModels = textModels.length > 0 || imageModels.length > 0 || speechModels.length > 0;
|
|
12355
|
+
if (!hasAnyModels) {
|
|
12356
|
+
stream2.write(import_chalk8.default.yellow("\nNo models found matching the specified criteria.\n\n"));
|
|
12357
|
+
return;
|
|
12358
|
+
}
|
|
12359
|
+
stream2.write(import_chalk8.default.bold.cyan("\nAvailable Models\n"));
|
|
12360
|
+
stream2.write(import_chalk8.default.cyan("=".repeat(80)) + "\n\n");
|
|
12361
|
+
if (textModels.length > 0) {
|
|
12362
|
+
renderTextTable(textModels, verbose, stream2);
|
|
12363
|
+
}
|
|
12364
|
+
if (imageModels.length > 0) {
|
|
12365
|
+
renderImageTable(imageModels, verbose, stream2);
|
|
12366
|
+
}
|
|
12367
|
+
if (speechModels.length > 0) {
|
|
12368
|
+
renderSpeechTable(speechModels, verbose, stream2);
|
|
12369
|
+
}
|
|
12370
|
+
if (textModels.length > 0) {
|
|
12371
|
+
stream2.write(import_chalk8.default.bold.magenta("Model Shortcuts\n"));
|
|
12372
|
+
stream2.write(import_chalk8.default.dim("\u2500".repeat(80)) + "\n");
|
|
12373
|
+
const shortcuts = Object.entries(MODEL_ALIASES).sort((a, b) => a[0].localeCompare(b[0]));
|
|
12374
|
+
for (const [shortcut, fullName] of shortcuts) {
|
|
12375
|
+
stream2.write(import_chalk8.default.cyan(` ${shortcut.padEnd(15)}`) + import_chalk8.default.dim(" \u2192 ") + import_chalk8.default.white(fullName) + "\n");
|
|
12376
|
+
}
|
|
12377
|
+
stream2.write("\n");
|
|
11161
12378
|
}
|
|
11162
12379
|
}
|
|
11163
|
-
function
|
|
12380
|
+
function renderTextTable(models, verbose, stream2) {
|
|
11164
12381
|
const grouped = /* @__PURE__ */ new Map();
|
|
11165
12382
|
for (const model of models) {
|
|
11166
12383
|
const provider = model.provider;
|
|
@@ -11169,13 +12386,13 @@ function renderTable(models, verbose, stream2) {
|
|
|
11169
12386
|
}
|
|
11170
12387
|
grouped.get(provider).push(model);
|
|
11171
12388
|
}
|
|
11172
|
-
stream2.write(import_chalk8.default.bold.
|
|
11173
|
-
stream2.write(import_chalk8.default.
|
|
12389
|
+
stream2.write(import_chalk8.default.bold.blue("\u{1F4DD} Text/LLM Models\n"));
|
|
12390
|
+
stream2.write(import_chalk8.default.dim("\u2500".repeat(80)) + "\n\n");
|
|
11174
12391
|
const providers = Array.from(grouped.keys()).sort();
|
|
11175
12392
|
for (const provider of providers) {
|
|
11176
12393
|
const providerModels = grouped.get(provider);
|
|
11177
12394
|
const providerName = provider.charAt(0).toUpperCase() + provider.slice(1);
|
|
11178
|
-
stream2.write(import_chalk8.default.bold.yellow(`${providerName}
|
|
12395
|
+
stream2.write(import_chalk8.default.bold.yellow(`${providerName}
|
|
11179
12396
|
`));
|
|
11180
12397
|
if (verbose) {
|
|
11181
12398
|
renderVerboseTable(providerModels, stream2);
|
|
@@ -11184,13 +12401,6 @@ function renderTable(models, verbose, stream2) {
|
|
|
11184
12401
|
}
|
|
11185
12402
|
stream2.write("\n");
|
|
11186
12403
|
}
|
|
11187
|
-
stream2.write(import_chalk8.default.bold.magenta("Model Shortcuts\n"));
|
|
11188
|
-
stream2.write(import_chalk8.default.dim("\u2500".repeat(80)) + "\n");
|
|
11189
|
-
const shortcuts = Object.entries(MODEL_ALIASES).sort((a, b) => a[0].localeCompare(b[0]));
|
|
11190
|
-
for (const [shortcut, fullName] of shortcuts) {
|
|
11191
|
-
stream2.write(import_chalk8.default.cyan(` ${shortcut.padEnd(15)}`) + import_chalk8.default.dim(" \u2192 ") + import_chalk8.default.white(fullName) + "\n");
|
|
11192
|
-
}
|
|
11193
|
-
stream2.write("\n");
|
|
11194
12404
|
}
|
|
11195
12405
|
function renderCompactTable(models, stream2) {
|
|
11196
12406
|
const idWidth = 25;
|
|
@@ -11267,9 +12477,171 @@ function renderVerboseTable(models, stream2) {
|
|
|
11267
12477
|
}
|
|
11268
12478
|
stream2.write("\n");
|
|
11269
12479
|
}
|
|
11270
|
-
function
|
|
11271
|
-
|
|
11272
|
-
|
|
12480
|
+
function renderImageTable(models, verbose, stream2) {
|
|
12481
|
+
stream2.write(import_chalk8.default.bold.green("\u{1F3A8} Image Generation Models\n"));
|
|
12482
|
+
stream2.write(import_chalk8.default.dim("\u2500".repeat(80)) + "\n\n");
|
|
12483
|
+
const grouped = /* @__PURE__ */ new Map();
|
|
12484
|
+
for (const model of models) {
|
|
12485
|
+
if (!grouped.has(model.provider)) {
|
|
12486
|
+
grouped.set(model.provider, []);
|
|
12487
|
+
}
|
|
12488
|
+
grouped.get(model.provider).push(model);
|
|
12489
|
+
}
|
|
12490
|
+
for (const [provider, providerModels] of Array.from(grouped.entries()).sort()) {
|
|
12491
|
+
const providerName = provider.charAt(0).toUpperCase() + provider.slice(1);
|
|
12492
|
+
stream2.write(import_chalk8.default.bold.yellow(`${providerName}
|
|
12493
|
+
`));
|
|
12494
|
+
if (verbose) {
|
|
12495
|
+
for (const model of providerModels) {
|
|
12496
|
+
stream2.write(import_chalk8.default.bold.green(`
|
|
12497
|
+
${model.modelId}
|
|
12498
|
+
`));
|
|
12499
|
+
stream2.write(import_chalk8.default.dim(" " + "\u2500".repeat(60)) + "\n");
|
|
12500
|
+
stream2.write(` ${import_chalk8.default.dim("Name:")} ${import_chalk8.default.white(model.displayName)}
|
|
12501
|
+
`);
|
|
12502
|
+
stream2.write(` ${import_chalk8.default.dim("Sizes:")} ${import_chalk8.default.yellow(model.supportedSizes.join(", "))}
|
|
12503
|
+
`);
|
|
12504
|
+
if (model.supportedQualities) {
|
|
12505
|
+
stream2.write(` ${import_chalk8.default.dim("Qualities:")} ${import_chalk8.default.yellow(model.supportedQualities.join(", "))}
|
|
12506
|
+
`);
|
|
12507
|
+
}
|
|
12508
|
+
stream2.write(` ${import_chalk8.default.dim("Max Images:")} ${import_chalk8.default.yellow(model.maxImages.toString())}
|
|
12509
|
+
`);
|
|
12510
|
+
stream2.write(` ${import_chalk8.default.dim("Pricing:")} ${import_chalk8.default.cyan(formatImagePrice(model))}
|
|
12511
|
+
`);
|
|
12512
|
+
if (model.features) {
|
|
12513
|
+
const features = [];
|
|
12514
|
+
if (model.features.textRendering) features.push("text-rendering");
|
|
12515
|
+
if (model.features.transparency) features.push("transparency");
|
|
12516
|
+
if (model.features.conversational) features.push("conversational");
|
|
12517
|
+
if (features.length > 0) {
|
|
12518
|
+
stream2.write(` ${import_chalk8.default.dim("Features:")} ${import_chalk8.default.blue(features.join(", "))}
|
|
12519
|
+
`);
|
|
12520
|
+
}
|
|
12521
|
+
}
|
|
12522
|
+
}
|
|
12523
|
+
} else {
|
|
12524
|
+
const idWidth = 32;
|
|
12525
|
+
const nameWidth = 25;
|
|
12526
|
+
const sizesWidth = 20;
|
|
12527
|
+
const priceWidth = 15;
|
|
12528
|
+
stream2.write(import_chalk8.default.dim("\u2500".repeat(idWidth + nameWidth + sizesWidth + priceWidth + 6)) + "\n");
|
|
12529
|
+
stream2.write(
|
|
12530
|
+
import_chalk8.default.bold(
|
|
12531
|
+
"Model ID".padEnd(idWidth) + " " + "Display Name".padEnd(nameWidth) + " " + "Sizes".padEnd(sizesWidth) + " " + "Price".padEnd(priceWidth)
|
|
12532
|
+
) + "\n"
|
|
12533
|
+
);
|
|
12534
|
+
stream2.write(import_chalk8.default.dim("\u2500".repeat(idWidth + nameWidth + sizesWidth + priceWidth + 6)) + "\n");
|
|
12535
|
+
for (const model of providerModels) {
|
|
12536
|
+
const sizes = model.supportedSizes.length > 2 ? model.supportedSizes.slice(0, 2).join(", ") + "..." : model.supportedSizes.join(", ");
|
|
12537
|
+
stream2.write(
|
|
12538
|
+
import_chalk8.default.green(model.modelId.padEnd(idWidth)) + " " + import_chalk8.default.white(model.displayName.substring(0, nameWidth - 1).padEnd(nameWidth)) + " " + import_chalk8.default.yellow(sizes.padEnd(sizesWidth)) + " " + import_chalk8.default.cyan(formatImagePrice(model).padEnd(priceWidth)) + "\n"
|
|
12539
|
+
);
|
|
12540
|
+
}
|
|
12541
|
+
stream2.write(import_chalk8.default.dim("\u2500".repeat(idWidth + nameWidth + sizesWidth + priceWidth + 6)) + "\n");
|
|
12542
|
+
}
|
|
12543
|
+
stream2.write("\n");
|
|
12544
|
+
}
|
|
12545
|
+
}
|
|
12546
|
+
function renderSpeechTable(models, verbose, stream2) {
|
|
12547
|
+
stream2.write(import_chalk8.default.bold.magenta("\u{1F3A4} Speech (TTS) Models\n"));
|
|
12548
|
+
stream2.write(import_chalk8.default.dim("\u2500".repeat(80)) + "\n\n");
|
|
12549
|
+
const grouped = /* @__PURE__ */ new Map();
|
|
12550
|
+
for (const model of models) {
|
|
12551
|
+
if (!grouped.has(model.provider)) {
|
|
12552
|
+
grouped.set(model.provider, []);
|
|
12553
|
+
}
|
|
12554
|
+
grouped.get(model.provider).push(model);
|
|
12555
|
+
}
|
|
12556
|
+
for (const [provider, providerModels] of Array.from(grouped.entries()).sort()) {
|
|
12557
|
+
const providerName = provider.charAt(0).toUpperCase() + provider.slice(1);
|
|
12558
|
+
stream2.write(import_chalk8.default.bold.yellow(`${providerName}
|
|
12559
|
+
`));
|
|
12560
|
+
if (verbose) {
|
|
12561
|
+
for (const model of providerModels) {
|
|
12562
|
+
stream2.write(import_chalk8.default.bold.green(`
|
|
12563
|
+
${model.modelId}
|
|
12564
|
+
`));
|
|
12565
|
+
stream2.write(import_chalk8.default.dim(" " + "\u2500".repeat(60)) + "\n");
|
|
12566
|
+
stream2.write(` ${import_chalk8.default.dim("Name:")} ${import_chalk8.default.white(model.displayName)}
|
|
12567
|
+
`);
|
|
12568
|
+
stream2.write(` ${import_chalk8.default.dim("Voices:")} ${import_chalk8.default.yellow(model.voices.length.toString())} voices
|
|
12569
|
+
`);
|
|
12570
|
+
if (model.voices.length <= 6) {
|
|
12571
|
+
stream2.write(` ${import_chalk8.default.dim(model.voices.join(", "))}
|
|
12572
|
+
`);
|
|
12573
|
+
} else {
|
|
12574
|
+
stream2.write(` ${import_chalk8.default.dim(model.voices.slice(0, 6).join(", ") + "...")}
|
|
12575
|
+
`);
|
|
12576
|
+
}
|
|
12577
|
+
stream2.write(` ${import_chalk8.default.dim("Formats:")} ${import_chalk8.default.yellow(model.formats.join(", "))}
|
|
12578
|
+
`);
|
|
12579
|
+
stream2.write(` ${import_chalk8.default.dim("Max Input:")} ${import_chalk8.default.yellow(model.maxInputLength.toString())} chars
|
|
12580
|
+
`);
|
|
12581
|
+
stream2.write(` ${import_chalk8.default.dim("Pricing:")} ${import_chalk8.default.cyan(formatSpeechPrice(model))}
|
|
12582
|
+
`);
|
|
12583
|
+
if (model.features) {
|
|
12584
|
+
const features = [];
|
|
12585
|
+
if (model.features.multiSpeaker) features.push("multi-speaker");
|
|
12586
|
+
if (model.features.voiceInstructions) features.push("voice-instructions");
|
|
12587
|
+
if (model.features.languages) features.push(`${model.features.languages} languages`);
|
|
12588
|
+
if (features.length > 0) {
|
|
12589
|
+
stream2.write(` ${import_chalk8.default.dim("Features:")} ${import_chalk8.default.blue(features.join(", "))}
|
|
12590
|
+
`);
|
|
12591
|
+
}
|
|
12592
|
+
}
|
|
12593
|
+
}
|
|
12594
|
+
} else {
|
|
12595
|
+
const idWidth = 30;
|
|
12596
|
+
const nameWidth = 28;
|
|
12597
|
+
const voicesWidth = 12;
|
|
12598
|
+
const priceWidth = 18;
|
|
12599
|
+
stream2.write(import_chalk8.default.dim("\u2500".repeat(idWidth + nameWidth + voicesWidth + priceWidth + 6)) + "\n");
|
|
12600
|
+
stream2.write(
|
|
12601
|
+
import_chalk8.default.bold(
|
|
12602
|
+
"Model ID".padEnd(idWidth) + " " + "Display Name".padEnd(nameWidth) + " " + "Voices".padEnd(voicesWidth) + " " + "Price".padEnd(priceWidth)
|
|
12603
|
+
) + "\n"
|
|
12604
|
+
);
|
|
12605
|
+
stream2.write(import_chalk8.default.dim("\u2500".repeat(idWidth + nameWidth + voicesWidth + priceWidth + 6)) + "\n");
|
|
12606
|
+
for (const model of providerModels) {
|
|
12607
|
+
stream2.write(
|
|
12608
|
+
import_chalk8.default.green(model.modelId.padEnd(idWidth)) + " " + import_chalk8.default.white(model.displayName.substring(0, nameWidth - 1).padEnd(nameWidth)) + " " + import_chalk8.default.yellow(`${model.voices.length} voices`.padEnd(voicesWidth)) + " " + import_chalk8.default.cyan(formatSpeechPrice(model).padEnd(priceWidth)) + "\n"
|
|
12609
|
+
);
|
|
12610
|
+
}
|
|
12611
|
+
stream2.write(import_chalk8.default.dim("\u2500".repeat(idWidth + nameWidth + voicesWidth + priceWidth + 6)) + "\n");
|
|
12612
|
+
}
|
|
12613
|
+
stream2.write("\n");
|
|
12614
|
+
}
|
|
12615
|
+
}
|
|
12616
|
+
function formatImagePrice(model) {
|
|
12617
|
+
if (model.pricing.perImage !== void 0) {
|
|
12618
|
+
return `$${model.pricing.perImage.toFixed(2)}/img`;
|
|
12619
|
+
}
|
|
12620
|
+
if (model.pricing.bySize) {
|
|
12621
|
+
const prices = Object.values(model.pricing.bySize);
|
|
12622
|
+
const minPrice = Math.min(...prices.flatMap((p) => typeof p === "number" ? [p] : Object.values(p)));
|
|
12623
|
+
const maxPrice = Math.max(...prices.flatMap((p) => typeof p === "number" ? [p] : Object.values(p)));
|
|
12624
|
+
if (minPrice === maxPrice) {
|
|
12625
|
+
return `$${minPrice.toFixed(2)}/img`;
|
|
12626
|
+
}
|
|
12627
|
+
return `$${minPrice.toFixed(2)}-${maxPrice.toFixed(2)}`;
|
|
12628
|
+
}
|
|
12629
|
+
return "varies";
|
|
12630
|
+
}
|
|
12631
|
+
function formatSpeechPrice(model) {
|
|
12632
|
+
if (model.pricing.perCharacter !== void 0) {
|
|
12633
|
+
const perMillion = model.pricing.perCharacter * 1e6;
|
|
12634
|
+
return `$${perMillion.toFixed(0)}/1M chars`;
|
|
12635
|
+
}
|
|
12636
|
+
if (model.pricing.perMinute !== void 0) {
|
|
12637
|
+
return `~$${model.pricing.perMinute.toFixed(2)}/min`;
|
|
12638
|
+
}
|
|
12639
|
+
return "varies";
|
|
12640
|
+
}
|
|
12641
|
+
function renderJSON(textModels, imageModels, speechModels, stream2) {
|
|
12642
|
+
const output = {};
|
|
12643
|
+
if (textModels.length > 0) {
|
|
12644
|
+
output.textModels = textModels.map((model) => ({
|
|
11273
12645
|
provider: model.provider,
|
|
11274
12646
|
modelId: model.modelId,
|
|
11275
12647
|
displayName: model.displayName,
|
|
@@ -11285,9 +12657,33 @@ function renderJSON(models, stream2) {
|
|
|
11285
12657
|
knowledgeCutoff: model.knowledgeCutoff,
|
|
11286
12658
|
features: model.features,
|
|
11287
12659
|
metadata: model.metadata
|
|
11288
|
-
}))
|
|
11289
|
-
shortcuts
|
|
11290
|
-
}
|
|
12660
|
+
}));
|
|
12661
|
+
output.shortcuts = MODEL_ALIASES;
|
|
12662
|
+
}
|
|
12663
|
+
if (imageModels.length > 0) {
|
|
12664
|
+
output.imageModels = imageModels.map((model) => ({
|
|
12665
|
+
provider: model.provider,
|
|
12666
|
+
modelId: model.modelId,
|
|
12667
|
+
displayName: model.displayName,
|
|
12668
|
+
supportedSizes: model.supportedSizes,
|
|
12669
|
+
supportedQualities: model.supportedQualities,
|
|
12670
|
+
maxImages: model.maxImages,
|
|
12671
|
+
pricing: model.pricing,
|
|
12672
|
+
features: model.features
|
|
12673
|
+
}));
|
|
12674
|
+
}
|
|
12675
|
+
if (speechModels.length > 0) {
|
|
12676
|
+
output.speechModels = speechModels.map((model) => ({
|
|
12677
|
+
provider: model.provider,
|
|
12678
|
+
modelId: model.modelId,
|
|
12679
|
+
displayName: model.displayName,
|
|
12680
|
+
voices: model.voices,
|
|
12681
|
+
formats: model.formats,
|
|
12682
|
+
maxInputLength: model.maxInputLength,
|
|
12683
|
+
pricing: model.pricing,
|
|
12684
|
+
features: model.features
|
|
12685
|
+
}));
|
|
12686
|
+
}
|
|
11291
12687
|
stream2.write(JSON.stringify(output, null, 2) + "\n");
|
|
11292
12688
|
}
|
|
11293
12689
|
function formatTokens2(count) {
|
|
@@ -11300,7 +12696,7 @@ function formatTokens2(count) {
|
|
|
11300
12696
|
}
|
|
11301
12697
|
}
|
|
11302
12698
|
function registerModelsCommand(program, env) {
|
|
11303
|
-
program.command(COMMANDS.models).description("List
|
|
12699
|
+
program.command(COMMANDS.models).description("List available models with pricing and capabilities.").option("--provider <name>", "Filter by provider (openai, anthropic, gemini)").option("--format <format>", "Output format: table or json", "table").option("--verbose", "Show detailed model information", false).option("--text", "Show text/LLM models (default if no type specified)").option("--image", "Show image generation models").option("--speech", "Show speech/TTS models").option("--all", "Show all model types (text, image, speech)").action(
|
|
11304
12700
|
(options) => executeAction(
|
|
11305
12701
|
() => handleModelsCommand(options, env),
|
|
11306
12702
|
env
|
|
@@ -11308,6 +12704,60 @@ function registerModelsCommand(program, env) {
|
|
|
11308
12704
|
);
|
|
11309
12705
|
}
|
|
11310
12706
|
|
|
12707
|
+
// src/cli/speech-command.ts
|
|
12708
|
+
var import_node_fs12 = require("fs");
|
|
12709
|
+
var DEFAULT_SPEECH_MODEL = "tts-1";
|
|
12710
|
+
var DEFAULT_VOICE = "nova";
|
|
12711
|
+
async function executeSpeech(textArg, options, env) {
|
|
12712
|
+
const text = await resolvePrompt(textArg, env);
|
|
12713
|
+
const client = env.createClient();
|
|
12714
|
+
const model = options.model;
|
|
12715
|
+
const voice = options.voice ?? DEFAULT_VOICE;
|
|
12716
|
+
const speed = options.speed ? Number.parseFloat(options.speed) : void 0;
|
|
12717
|
+
const stderrTTY = env.stderr.isTTY === true;
|
|
12718
|
+
if (!options.quiet && stderrTTY) {
|
|
12719
|
+
env.stderr.write(`${SUMMARY_PREFIX} Generating speech with ${model} (voice: ${voice})...
|
|
12720
|
+
`);
|
|
12721
|
+
}
|
|
12722
|
+
const result = await client.speech.generate({
|
|
12723
|
+
model,
|
|
12724
|
+
input: text,
|
|
12725
|
+
voice,
|
|
12726
|
+
responseFormat: options.format,
|
|
12727
|
+
speed
|
|
12728
|
+
});
|
|
12729
|
+
const audioBuffer = Buffer.from(result.audio);
|
|
12730
|
+
if (options.output) {
|
|
12731
|
+
(0, import_node_fs12.writeFileSync)(options.output, audioBuffer);
|
|
12732
|
+
if (!options.quiet) {
|
|
12733
|
+
env.stderr.write(`${SUMMARY_PREFIX} Audio saved to ${options.output}
|
|
12734
|
+
`);
|
|
12735
|
+
}
|
|
12736
|
+
} else {
|
|
12737
|
+
env.stdout.write(audioBuffer);
|
|
12738
|
+
}
|
|
12739
|
+
if (!options.quiet && stderrTTY) {
|
|
12740
|
+
const parts = [
|
|
12741
|
+
`${result.usage.characterCount} characters`,
|
|
12742
|
+
`format: ${result.format}`
|
|
12743
|
+
];
|
|
12744
|
+
if (result.cost !== void 0) {
|
|
12745
|
+
parts.push(`cost: ${formatCost(result.cost)}`);
|
|
12746
|
+
}
|
|
12747
|
+
env.stderr.write(`${SUMMARY_PREFIX} ${parts.join(" | ")}
|
|
12748
|
+
`);
|
|
12749
|
+
}
|
|
12750
|
+
}
|
|
12751
|
+
function registerSpeechCommand(program, env, config) {
|
|
12752
|
+
program.command(COMMANDS.speech).description("Generate speech audio from text.").argument("[text]", "Text to convert to speech. If omitted, stdin is used when available.").option(
|
|
12753
|
+
OPTION_FLAGS.model,
|
|
12754
|
+
OPTION_DESCRIPTIONS.model,
|
|
12755
|
+
config?.model ?? DEFAULT_SPEECH_MODEL
|
|
12756
|
+
).option(OPTION_FLAGS.voice, OPTION_DESCRIPTIONS.voice, config?.voice ?? DEFAULT_VOICE).option(OPTION_FLAGS.speechFormat, OPTION_DESCRIPTIONS.speechFormat, config?.format).option(OPTION_FLAGS.speechSpeed, OPTION_DESCRIPTIONS.speechSpeed, config?.speed?.toString()).option(OPTION_FLAGS.speechOutput, OPTION_DESCRIPTIONS.speechOutput, config?.output).option(OPTION_FLAGS.quiet, OPTION_DESCRIPTIONS.quiet, config?.quiet ?? false).action(
|
|
12757
|
+
(text, options) => executeAction(() => executeSpeech(text, options, env), env)
|
|
12758
|
+
);
|
|
12759
|
+
}
|
|
12760
|
+
|
|
11311
12761
|
// src/cli/environment.ts
|
|
11312
12762
|
var import_node_readline = __toESM(require("readline"), 1);
|
|
11313
12763
|
var import_chalk9 = __toESM(require("chalk"), 1);
|
|
@@ -11459,6 +12909,8 @@ function createProgram(env, config) {
|
|
|
11459
12909
|
});
|
|
11460
12910
|
registerCompleteCommand(program, env, config?.complete);
|
|
11461
12911
|
registerAgentCommand(program, env, config?.agent);
|
|
12912
|
+
registerImageCommand(program, env, config?.image);
|
|
12913
|
+
registerSpeechCommand(program, env, config?.speech);
|
|
11462
12914
|
registerModelsCommand(program, env);
|
|
11463
12915
|
registerGadgetCommand(program, env);
|
|
11464
12916
|
if (config) {
|