llmist 2.3.0 → 2.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-GANXNBIZ.js → chunk-6ZDUWO6N.js} +1029 -22
- package/dist/chunk-6ZDUWO6N.js.map +1 -0
- package/dist/{chunk-ZDNV7DDO.js → chunk-QFRVTS5F.js} +2 -2
- package/dist/cli.cjs +1497 -45
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.js +473 -28
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +1025 -18
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +18 -2
- package/dist/index.d.ts +18 -2
- package/dist/index.js +2 -2
- package/dist/{mock-stream-wRfUqXx4.d.cts → mock-stream-BQcC2VCP.d.cts} +408 -1
- package/dist/{mock-stream-wRfUqXx4.d.ts → mock-stream-BQcC2VCP.d.ts} +408 -1
- package/dist/testing/index.cjs +1025 -18
- package/dist/testing/index.cjs.map +1 -1
- package/dist/testing/index.d.cts +2 -2
- package/dist/testing/index.d.ts +2 -2
- package/dist/testing/index.js +1 -1
- package/package.json +1 -1
- package/dist/chunk-GANXNBIZ.js.map +0 -1
- /package/dist/{chunk-ZDNV7DDO.js.map → chunk-QFRVTS5F.js.map} +0 -0
package/dist/index.cjs
CHANGED
|
@@ -2555,7 +2555,27 @@ var init_cost_reporting_client = __esm({
|
|
|
2555
2555
|
constructor(client, reportCost) {
|
|
2556
2556
|
this.client = client;
|
|
2557
2557
|
this.reportCost = reportCost;
|
|
2558
|
+
this.image = {
|
|
2559
|
+
generate: async (options) => {
|
|
2560
|
+
const result = await this.client.image.generate(options);
|
|
2561
|
+
if (result.cost !== void 0 && result.cost > 0) {
|
|
2562
|
+
this.reportCost(result.cost);
|
|
2563
|
+
}
|
|
2564
|
+
return result;
|
|
2565
|
+
}
|
|
2566
|
+
};
|
|
2567
|
+
this.speech = {
|
|
2568
|
+
generate: async (options) => {
|
|
2569
|
+
const result = await this.client.speech.generate(options);
|
|
2570
|
+
if (result.cost !== void 0 && result.cost > 0) {
|
|
2571
|
+
this.reportCost(result.cost);
|
|
2572
|
+
}
|
|
2573
|
+
return result;
|
|
2574
|
+
}
|
|
2575
|
+
};
|
|
2558
2576
|
}
|
|
2577
|
+
image;
|
|
2578
|
+
speech;
|
|
2559
2579
|
/**
|
|
2560
2580
|
* Access to model registry for cost estimation.
|
|
2561
2581
|
*/
|
|
@@ -4648,6 +4668,28 @@ var init_anthropic = __esm({
|
|
|
4648
4668
|
getModelSpecs() {
|
|
4649
4669
|
return ANTHROPIC_MODELS;
|
|
4650
4670
|
}
|
|
4671
|
+
// =========================================================================
|
|
4672
|
+
// Image Generation (Not Supported)
|
|
4673
|
+
// =========================================================================
|
|
4674
|
+
supportsImageGeneration(_modelId) {
|
|
4675
|
+
return false;
|
|
4676
|
+
}
|
|
4677
|
+
async generateImage() {
|
|
4678
|
+
throw new Error(
|
|
4679
|
+
"Anthropic does not support image generation. Use OpenAI (DALL-E, GPT Image) or Google Gemini (Imagen) instead."
|
|
4680
|
+
);
|
|
4681
|
+
}
|
|
4682
|
+
// =========================================================================
|
|
4683
|
+
// Speech Generation (Not Supported)
|
|
4684
|
+
// =========================================================================
|
|
4685
|
+
supportsSpeechGeneration(_modelId) {
|
|
4686
|
+
return false;
|
|
4687
|
+
}
|
|
4688
|
+
async generateSpeech() {
|
|
4689
|
+
throw new Error(
|
|
4690
|
+
"Anthropic does not support speech generation. Use OpenAI (TTS) or Google Gemini (TTS) instead."
|
|
4691
|
+
);
|
|
4692
|
+
}
|
|
4651
4693
|
buildRequestPayload(options, descriptor, spec, messages) {
|
|
4652
4694
|
const systemMessages = messages.filter((message) => message.role === "system");
|
|
4653
4695
|
const system = systemMessages.length > 0 ? systemMessages.map((m, index) => ({
|
|
@@ -4802,6 +4844,182 @@ var init_anthropic = __esm({
|
|
|
4802
4844
|
}
|
|
4803
4845
|
});
|
|
4804
4846
|
|
|
4847
|
+
// src/providers/gemini-image-models.ts
|
|
4848
|
+
function getGeminiImageModelSpec(modelId) {
|
|
4849
|
+
return geminiImageModels.find((m) => m.modelId === modelId);
|
|
4850
|
+
}
|
|
4851
|
+
function isGeminiImageModel(modelId) {
|
|
4852
|
+
return geminiImageModels.some((m) => m.modelId === modelId);
|
|
4853
|
+
}
|
|
4854
|
+
function calculateGeminiImageCost(modelId, size = "1:1", n = 1) {
|
|
4855
|
+
const spec = getGeminiImageModelSpec(modelId);
|
|
4856
|
+
if (!spec) return void 0;
|
|
4857
|
+
if (spec.pricing.perImage !== void 0) {
|
|
4858
|
+
return spec.pricing.perImage * n;
|
|
4859
|
+
}
|
|
4860
|
+
if (spec.pricing.bySize) {
|
|
4861
|
+
const sizePrice = spec.pricing.bySize[size];
|
|
4862
|
+
if (typeof sizePrice === "number") {
|
|
4863
|
+
return sizePrice * n;
|
|
4864
|
+
}
|
|
4865
|
+
}
|
|
4866
|
+
return void 0;
|
|
4867
|
+
}
|
|
4868
|
+
var IMAGEN4_ASPECT_RATIOS, GEMINI_IMAGE_ASPECT_RATIOS, geminiImageModels;
|
|
4869
|
+
var init_gemini_image_models = __esm({
|
|
4870
|
+
"src/providers/gemini-image-models.ts"() {
|
|
4871
|
+
"use strict";
|
|
4872
|
+
IMAGEN4_ASPECT_RATIOS = ["1:1", "3:4", "4:3", "9:16", "16:9"];
|
|
4873
|
+
GEMINI_IMAGE_ASPECT_RATIOS = ["1:1", "3:4", "4:3", "9:16", "16:9"];
|
|
4874
|
+
geminiImageModels = [
|
|
4875
|
+
// Imagen 4 Family (standalone image generation)
|
|
4876
|
+
{
|
|
4877
|
+
provider: "gemini",
|
|
4878
|
+
modelId: "imagen-4.0-fast-generate-001",
|
|
4879
|
+
displayName: "Imagen 4 Fast",
|
|
4880
|
+
pricing: {
|
|
4881
|
+
perImage: 0.02
|
|
4882
|
+
},
|
|
4883
|
+
supportedSizes: [...IMAGEN4_ASPECT_RATIOS],
|
|
4884
|
+
maxImages: 4,
|
|
4885
|
+
defaultSize: "1:1",
|
|
4886
|
+
features: {
|
|
4887
|
+
textRendering: true
|
|
4888
|
+
}
|
|
4889
|
+
},
|
|
4890
|
+
{
|
|
4891
|
+
provider: "gemini",
|
|
4892
|
+
modelId: "imagen-4.0-generate-001",
|
|
4893
|
+
displayName: "Imagen 4",
|
|
4894
|
+
pricing: {
|
|
4895
|
+
perImage: 0.04
|
|
4896
|
+
},
|
|
4897
|
+
supportedSizes: [...IMAGEN4_ASPECT_RATIOS],
|
|
4898
|
+
maxImages: 4,
|
|
4899
|
+
defaultSize: "1:1",
|
|
4900
|
+
features: {
|
|
4901
|
+
textRendering: true
|
|
4902
|
+
}
|
|
4903
|
+
},
|
|
4904
|
+
{
|
|
4905
|
+
provider: "gemini",
|
|
4906
|
+
modelId: "imagen-4.0-ultra-generate-001",
|
|
4907
|
+
displayName: "Imagen 4 Ultra",
|
|
4908
|
+
pricing: {
|
|
4909
|
+
perImage: 0.06
|
|
4910
|
+
},
|
|
4911
|
+
supportedSizes: [...IMAGEN4_ASPECT_RATIOS],
|
|
4912
|
+
maxImages: 4,
|
|
4913
|
+
defaultSize: "1:1",
|
|
4914
|
+
features: {
|
|
4915
|
+
textRendering: true
|
|
4916
|
+
}
|
|
4917
|
+
},
|
|
4918
|
+
// Preview versions
|
|
4919
|
+
{
|
|
4920
|
+
provider: "gemini",
|
|
4921
|
+
modelId: "imagen-4.0-generate-preview-06-06",
|
|
4922
|
+
displayName: "Imagen 4 (Preview)",
|
|
4923
|
+
pricing: {
|
|
4924
|
+
perImage: 0.04
|
|
4925
|
+
},
|
|
4926
|
+
supportedSizes: [...IMAGEN4_ASPECT_RATIOS],
|
|
4927
|
+
maxImages: 4,
|
|
4928
|
+
defaultSize: "1:1",
|
|
4929
|
+
features: {
|
|
4930
|
+
textRendering: true
|
|
4931
|
+
}
|
|
4932
|
+
},
|
|
4933
|
+
{
|
|
4934
|
+
provider: "gemini",
|
|
4935
|
+
modelId: "imagen-4.0-ultra-generate-preview-06-06",
|
|
4936
|
+
displayName: "Imagen 4 Ultra (Preview)",
|
|
4937
|
+
pricing: {
|
|
4938
|
+
perImage: 0.06
|
|
4939
|
+
},
|
|
4940
|
+
supportedSizes: [...IMAGEN4_ASPECT_RATIOS],
|
|
4941
|
+
maxImages: 4,
|
|
4942
|
+
defaultSize: "1:1",
|
|
4943
|
+
features: {
|
|
4944
|
+
textRendering: true
|
|
4945
|
+
}
|
|
4946
|
+
},
|
|
4947
|
+
// Gemini Native Image Generation (multimodal models)
|
|
4948
|
+
{
|
|
4949
|
+
provider: "gemini",
|
|
4950
|
+
modelId: "gemini-2.5-flash-image",
|
|
4951
|
+
displayName: "Gemini 2.5 Flash Image",
|
|
4952
|
+
pricing: {
|
|
4953
|
+
perImage: 0.039
|
|
4954
|
+
},
|
|
4955
|
+
supportedSizes: [...GEMINI_IMAGE_ASPECT_RATIOS],
|
|
4956
|
+
maxImages: 1,
|
|
4957
|
+
defaultSize: "1:1",
|
|
4958
|
+
features: {
|
|
4959
|
+
conversational: true,
|
|
4960
|
+
textRendering: true
|
|
4961
|
+
}
|
|
4962
|
+
},
|
|
4963
|
+
{
|
|
4964
|
+
provider: "gemini",
|
|
4965
|
+
modelId: "gemini-2.5-flash-image-preview",
|
|
4966
|
+
displayName: "Gemini 2.5 Flash Image (Preview)",
|
|
4967
|
+
pricing: {
|
|
4968
|
+
perImage: 0.039
|
|
4969
|
+
},
|
|
4970
|
+
supportedSizes: [...GEMINI_IMAGE_ASPECT_RATIOS],
|
|
4971
|
+
maxImages: 1,
|
|
4972
|
+
defaultSize: "1:1",
|
|
4973
|
+
features: {
|
|
4974
|
+
conversational: true,
|
|
4975
|
+
textRendering: true
|
|
4976
|
+
}
|
|
4977
|
+
},
|
|
4978
|
+
{
|
|
4979
|
+
provider: "gemini",
|
|
4980
|
+
modelId: "gemini-3-pro-image-preview",
|
|
4981
|
+
displayName: "Gemini 3 Pro Image (Preview)",
|
|
4982
|
+
pricing: {
|
|
4983
|
+
// Token-based: ~$0.134 per 1K/2K image, $0.24 per 4K
|
|
4984
|
+
// Using 2K as default
|
|
4985
|
+
bySize: {
|
|
4986
|
+
"1K": 0.134,
|
|
4987
|
+
"2K": 0.134,
|
|
4988
|
+
"4K": 0.24
|
|
4989
|
+
}
|
|
4990
|
+
},
|
|
4991
|
+
supportedSizes: ["1K", "2K", "4K"],
|
|
4992
|
+
maxImages: 1,
|
|
4993
|
+
defaultSize: "2K",
|
|
4994
|
+
features: {
|
|
4995
|
+
conversational: true,
|
|
4996
|
+
textRendering: true
|
|
4997
|
+
}
|
|
4998
|
+
},
|
|
4999
|
+
// Alias: nano-banana-pro-preview is gemini-3-pro-image-preview
|
|
5000
|
+
{
|
|
5001
|
+
provider: "gemini",
|
|
5002
|
+
modelId: "nano-banana-pro-preview",
|
|
5003
|
+
displayName: "Nano Banana Pro (Gemini 3 Pro Image)",
|
|
5004
|
+
pricing: {
|
|
5005
|
+
bySize: {
|
|
5006
|
+
"1K": 0.134,
|
|
5007
|
+
"2K": 0.134,
|
|
5008
|
+
"4K": 0.24
|
|
5009
|
+
}
|
|
5010
|
+
},
|
|
5011
|
+
supportedSizes: ["1K", "2K", "4K"],
|
|
5012
|
+
maxImages: 1,
|
|
5013
|
+
defaultSize: "2K",
|
|
5014
|
+
features: {
|
|
5015
|
+
conversational: true,
|
|
5016
|
+
textRendering: true
|
|
5017
|
+
}
|
|
5018
|
+
}
|
|
5019
|
+
];
|
|
5020
|
+
}
|
|
5021
|
+
});
|
|
5022
|
+
|
|
4805
5023
|
// src/providers/gemini-models.ts
|
|
4806
5024
|
var GEMINI_MODELS;
|
|
4807
5025
|
var init_gemini_models = __esm({
|
|
@@ -4975,7 +5193,171 @@ var init_gemini_models = __esm({
|
|
|
4975
5193
|
}
|
|
4976
5194
|
});
|
|
4977
5195
|
|
|
5196
|
+
// src/providers/gemini-speech-models.ts
|
|
5197
|
+
function getGeminiSpeechModelSpec(modelId) {
|
|
5198
|
+
return geminiSpeechModels.find((m) => m.modelId === modelId);
|
|
5199
|
+
}
|
|
5200
|
+
function isGeminiSpeechModel(modelId) {
|
|
5201
|
+
return geminiSpeechModels.some((m) => m.modelId === modelId);
|
|
5202
|
+
}
|
|
5203
|
+
function calculateGeminiSpeechCost(modelId, characterCount, estimatedMinutes) {
|
|
5204
|
+
const spec = getGeminiSpeechModelSpec(modelId);
|
|
5205
|
+
if (!spec) return void 0;
|
|
5206
|
+
if (spec.pricing.perMinute !== void 0) {
|
|
5207
|
+
if (estimatedMinutes !== void 0) {
|
|
5208
|
+
return estimatedMinutes * spec.pricing.perMinute;
|
|
5209
|
+
}
|
|
5210
|
+
const approxMinutes = characterCount / 750;
|
|
5211
|
+
return approxMinutes * spec.pricing.perMinute;
|
|
5212
|
+
}
|
|
5213
|
+
return void 0;
|
|
5214
|
+
}
|
|
5215
|
+
var GEMINI_TTS_VOICES, GEMINI_TTS_FORMATS, geminiSpeechModels;
|
|
5216
|
+
var init_gemini_speech_models = __esm({
|
|
5217
|
+
"src/providers/gemini-speech-models.ts"() {
|
|
5218
|
+
"use strict";
|
|
5219
|
+
GEMINI_TTS_VOICES = [
|
|
5220
|
+
"Zephyr",
|
|
5221
|
+
// Bright
|
|
5222
|
+
"Puck",
|
|
5223
|
+
// Upbeat
|
|
5224
|
+
"Charon",
|
|
5225
|
+
// Informative
|
|
5226
|
+
"Kore",
|
|
5227
|
+
// Firm
|
|
5228
|
+
"Fenrir",
|
|
5229
|
+
// Excitable
|
|
5230
|
+
"Leda",
|
|
5231
|
+
// Youthful
|
|
5232
|
+
"Orus",
|
|
5233
|
+
// Firm
|
|
5234
|
+
"Aoede",
|
|
5235
|
+
// Breezy
|
|
5236
|
+
"Callirrhoe",
|
|
5237
|
+
// Easy-going
|
|
5238
|
+
"Autonoe",
|
|
5239
|
+
// Bright
|
|
5240
|
+
"Enceladus",
|
|
5241
|
+
// Breathy
|
|
5242
|
+
"Iapetus",
|
|
5243
|
+
// Clear
|
|
5244
|
+
"Umbriel",
|
|
5245
|
+
// Easy-going
|
|
5246
|
+
"Algieba",
|
|
5247
|
+
// Smooth
|
|
5248
|
+
"Despina",
|
|
5249
|
+
// Smooth
|
|
5250
|
+
"Erinome",
|
|
5251
|
+
// Clear
|
|
5252
|
+
"Algenib",
|
|
5253
|
+
// Gravelly
|
|
5254
|
+
"Rasalgethi",
|
|
5255
|
+
// Informative
|
|
5256
|
+
"Laomedeia",
|
|
5257
|
+
// Upbeat
|
|
5258
|
+
"Achernar",
|
|
5259
|
+
// Soft
|
|
5260
|
+
"Alnilam",
|
|
5261
|
+
// Firm
|
|
5262
|
+
"Schedar",
|
|
5263
|
+
// Even
|
|
5264
|
+
"Gacrux",
|
|
5265
|
+
// Mature
|
|
5266
|
+
"Pulcherrima",
|
|
5267
|
+
// Forward
|
|
5268
|
+
"Achird",
|
|
5269
|
+
// Friendly
|
|
5270
|
+
"Zubenelgenubi",
|
|
5271
|
+
// Casual
|
|
5272
|
+
"Vindemiatrix",
|
|
5273
|
+
// Gentle
|
|
5274
|
+
"Sadachbia",
|
|
5275
|
+
// Lively
|
|
5276
|
+
"Sadaltager",
|
|
5277
|
+
// Knowledgeable
|
|
5278
|
+
"Sulafat"
|
|
5279
|
+
// Warm
|
|
5280
|
+
];
|
|
5281
|
+
GEMINI_TTS_FORMATS = ["pcm", "wav"];
|
|
5282
|
+
geminiSpeechModels = [
|
|
5283
|
+
{
|
|
5284
|
+
provider: "gemini",
|
|
5285
|
+
modelId: "gemini-2.5-flash-preview-tts",
|
|
5286
|
+
displayName: "Gemini 2.5 Flash TTS (Preview)",
|
|
5287
|
+
pricing: {
|
|
5288
|
+
// $0.50 per 1M input tokens = $0.0000005 per token
|
|
5289
|
+
perInputToken: 5e-7,
|
|
5290
|
+
// $10.00 per 1M audio output tokens = $0.00001 per token
|
|
5291
|
+
perAudioOutputToken: 1e-5,
|
|
5292
|
+
// Rough estimate: ~$0.01 per minute of audio
|
|
5293
|
+
perMinute: 0.01
|
|
5294
|
+
},
|
|
5295
|
+
voices: [...GEMINI_TTS_VOICES],
|
|
5296
|
+
formats: GEMINI_TTS_FORMATS,
|
|
5297
|
+
maxInputLength: 8e3,
|
|
5298
|
+
// bytes (text + prompt combined)
|
|
5299
|
+
defaultVoice: "Zephyr",
|
|
5300
|
+
defaultFormat: "wav",
|
|
5301
|
+
features: {
|
|
5302
|
+
multiSpeaker: true,
|
|
5303
|
+
languages: 24,
|
|
5304
|
+
voiceInstructions: true
|
|
5305
|
+
}
|
|
5306
|
+
},
|
|
5307
|
+
{
|
|
5308
|
+
provider: "gemini",
|
|
5309
|
+
modelId: "gemini-2.5-pro-preview-tts",
|
|
5310
|
+
displayName: "Gemini 2.5 Pro TTS (Preview)",
|
|
5311
|
+
pricing: {
|
|
5312
|
+
// $1.00 per 1M input tokens = $0.000001 per token
|
|
5313
|
+
perInputToken: 1e-6,
|
|
5314
|
+
// $20.00 per 1M audio output tokens = $0.00002 per token
|
|
5315
|
+
perAudioOutputToken: 2e-5,
|
|
5316
|
+
// Rough estimate: ~$0.02 per minute of audio
|
|
5317
|
+
perMinute: 0.02
|
|
5318
|
+
},
|
|
5319
|
+
voices: [...GEMINI_TTS_VOICES],
|
|
5320
|
+
formats: GEMINI_TTS_FORMATS,
|
|
5321
|
+
maxInputLength: 8e3,
|
|
5322
|
+
// bytes
|
|
5323
|
+
defaultVoice: "Zephyr",
|
|
5324
|
+
defaultFormat: "wav",
|
|
5325
|
+
features: {
|
|
5326
|
+
multiSpeaker: true,
|
|
5327
|
+
languages: 24,
|
|
5328
|
+
voiceInstructions: true
|
|
5329
|
+
}
|
|
5330
|
+
}
|
|
5331
|
+
];
|
|
5332
|
+
}
|
|
5333
|
+
});
|
|
5334
|
+
|
|
4978
5335
|
// src/providers/gemini.ts
|
|
5336
|
+
function wrapPcmInWav(pcmData, sampleRate, bitsPerSample, numChannels) {
|
|
5337
|
+
const byteRate = sampleRate * numChannels * bitsPerSample / 8;
|
|
5338
|
+
const blockAlign = numChannels * bitsPerSample / 8;
|
|
5339
|
+
const dataSize = pcmData.length;
|
|
5340
|
+
const headerSize = 44;
|
|
5341
|
+
const fileSize = headerSize + dataSize - 8;
|
|
5342
|
+
const buffer = new ArrayBuffer(headerSize + dataSize);
|
|
5343
|
+
const view = new DataView(buffer);
|
|
5344
|
+
const uint8 = new Uint8Array(buffer);
|
|
5345
|
+
view.setUint32(0, 1380533830, false);
|
|
5346
|
+
view.setUint32(4, fileSize, true);
|
|
5347
|
+
view.setUint32(8, 1463899717, false);
|
|
5348
|
+
view.setUint32(12, 1718449184, false);
|
|
5349
|
+
view.setUint32(16, 16, true);
|
|
5350
|
+
view.setUint16(20, 1, true);
|
|
5351
|
+
view.setUint16(22, numChannels, true);
|
|
5352
|
+
view.setUint32(24, sampleRate, true);
|
|
5353
|
+
view.setUint32(28, byteRate, true);
|
|
5354
|
+
view.setUint16(32, blockAlign, true);
|
|
5355
|
+
view.setUint16(34, bitsPerSample, true);
|
|
5356
|
+
view.setUint32(36, 1684108385, false);
|
|
5357
|
+
view.setUint32(40, dataSize, true);
|
|
5358
|
+
uint8.set(pcmData, headerSize);
|
|
5359
|
+
return buffer;
|
|
5360
|
+
}
|
|
4979
5361
|
function createGeminiProviderFromEnv() {
|
|
4980
5362
|
return createProviderFromEnv("GEMINI_API_KEY", import_genai.GoogleGenAI, GeminiGenerativeProvider);
|
|
4981
5363
|
}
|
|
@@ -4986,7 +5368,9 @@ var init_gemini = __esm({
|
|
|
4986
5368
|
import_genai = require("@google/genai");
|
|
4987
5369
|
init_base_provider();
|
|
4988
5370
|
init_constants2();
|
|
5371
|
+
init_gemini_image_models();
|
|
4989
5372
|
init_gemini_models();
|
|
5373
|
+
init_gemini_speech_models();
|
|
4990
5374
|
init_utils();
|
|
4991
5375
|
GEMINI_ROLE_MAP = {
|
|
4992
5376
|
system: "user",
|
|
@@ -5001,6 +5385,139 @@ var init_gemini = __esm({
|
|
|
5001
5385
|
getModelSpecs() {
|
|
5002
5386
|
return GEMINI_MODELS;
|
|
5003
5387
|
}
|
|
5388
|
+
// =========================================================================
|
|
5389
|
+
// Image Generation
|
|
5390
|
+
// =========================================================================
|
|
5391
|
+
getImageModelSpecs() {
|
|
5392
|
+
return geminiImageModels;
|
|
5393
|
+
}
|
|
5394
|
+
supportsImageGeneration(modelId) {
|
|
5395
|
+
return isGeminiImageModel(modelId);
|
|
5396
|
+
}
|
|
5397
|
+
async generateImage(options) {
|
|
5398
|
+
const client = this.client;
|
|
5399
|
+
const spec = getGeminiImageModelSpec(options.model);
|
|
5400
|
+
const isImagenModel = options.model.startsWith("imagen");
|
|
5401
|
+
const aspectRatio = options.size ?? spec?.defaultSize ?? "1:1";
|
|
5402
|
+
const n = options.n ?? 1;
|
|
5403
|
+
if (isImagenModel) {
|
|
5404
|
+
const response2 = await client.models.generateImages({
|
|
5405
|
+
model: options.model,
|
|
5406
|
+
prompt: options.prompt,
|
|
5407
|
+
config: {
|
|
5408
|
+
numberOfImages: n,
|
|
5409
|
+
aspectRatio,
|
|
5410
|
+
outputMimeType: options.responseFormat === "b64_json" ? "image/png" : "image/jpeg"
|
|
5411
|
+
}
|
|
5412
|
+
});
|
|
5413
|
+
const images2 = response2.generatedImages ?? [];
|
|
5414
|
+
const cost2 = calculateGeminiImageCost(options.model, aspectRatio, images2.length);
|
|
5415
|
+
return {
|
|
5416
|
+
// Gemini's imageBytes is already base64 encoded, so use it directly
|
|
5417
|
+
images: images2.map((img) => ({
|
|
5418
|
+
b64Json: img.image?.imageBytes ?? void 0
|
|
5419
|
+
})),
|
|
5420
|
+
model: options.model,
|
|
5421
|
+
usage: {
|
|
5422
|
+
imagesGenerated: images2.length,
|
|
5423
|
+
size: aspectRatio,
|
|
5424
|
+
quality: "standard"
|
|
5425
|
+
},
|
|
5426
|
+
cost: cost2
|
|
5427
|
+
};
|
|
5428
|
+
}
|
|
5429
|
+
const response = await client.models.generateContent({
|
|
5430
|
+
model: options.model,
|
|
5431
|
+
contents: [{ role: "user", parts: [{ text: options.prompt }] }],
|
|
5432
|
+
config: {
|
|
5433
|
+
responseModalities: [import_genai.Modality.IMAGE, import_genai.Modality.TEXT]
|
|
5434
|
+
}
|
|
5435
|
+
});
|
|
5436
|
+
const images = [];
|
|
5437
|
+
const candidate = response.candidates?.[0];
|
|
5438
|
+
if (candidate?.content?.parts) {
|
|
5439
|
+
for (const part of candidate.content.parts) {
|
|
5440
|
+
if ("inlineData" in part && part.inlineData) {
|
|
5441
|
+
images.push({
|
|
5442
|
+
b64Json: part.inlineData.data
|
|
5443
|
+
});
|
|
5444
|
+
}
|
|
5445
|
+
}
|
|
5446
|
+
}
|
|
5447
|
+
const cost = calculateGeminiImageCost(options.model, aspectRatio, images.length);
|
|
5448
|
+
return {
|
|
5449
|
+
images,
|
|
5450
|
+
model: options.model,
|
|
5451
|
+
usage: {
|
|
5452
|
+
imagesGenerated: images.length,
|
|
5453
|
+
size: aspectRatio,
|
|
5454
|
+
quality: "standard"
|
|
5455
|
+
},
|
|
5456
|
+
cost
|
|
5457
|
+
};
|
|
5458
|
+
}
|
|
5459
|
+
// =========================================================================
|
|
5460
|
+
// Speech Generation
|
|
5461
|
+
// =========================================================================
|
|
5462
|
+
getSpeechModelSpecs() {
|
|
5463
|
+
return geminiSpeechModels;
|
|
5464
|
+
}
|
|
5465
|
+
supportsSpeechGeneration(modelId) {
|
|
5466
|
+
return isGeminiSpeechModel(modelId);
|
|
5467
|
+
}
|
|
5468
|
+
async generateSpeech(options) {
|
|
5469
|
+
const client = this.client;
|
|
5470
|
+
const spec = getGeminiSpeechModelSpec(options.model);
|
|
5471
|
+
const voice = options.voice ?? spec?.defaultVoice ?? "Zephyr";
|
|
5472
|
+
const response = await client.models.generateContent({
|
|
5473
|
+
model: options.model,
|
|
5474
|
+
contents: [
|
|
5475
|
+
{
|
|
5476
|
+
role: "user",
|
|
5477
|
+
parts: [{ text: options.input }]
|
|
5478
|
+
}
|
|
5479
|
+
],
|
|
5480
|
+
config: {
|
|
5481
|
+
responseModalities: [import_genai.Modality.AUDIO],
|
|
5482
|
+
speechConfig: {
|
|
5483
|
+
voiceConfig: {
|
|
5484
|
+
prebuiltVoiceConfig: {
|
|
5485
|
+
voiceName: voice
|
|
5486
|
+
}
|
|
5487
|
+
}
|
|
5488
|
+
}
|
|
5489
|
+
}
|
|
5490
|
+
});
|
|
5491
|
+
let pcmData;
|
|
5492
|
+
const candidate = response.candidates?.[0];
|
|
5493
|
+
if (candidate?.content?.parts) {
|
|
5494
|
+
for (const part of candidate.content.parts) {
|
|
5495
|
+
if ("inlineData" in part && part.inlineData?.data) {
|
|
5496
|
+
const base64 = part.inlineData.data;
|
|
5497
|
+
const binary = atob(base64);
|
|
5498
|
+
pcmData = new Uint8Array(binary.length);
|
|
5499
|
+
for (let i = 0; i < binary.length; i++) {
|
|
5500
|
+
pcmData[i] = binary.charCodeAt(i);
|
|
5501
|
+
}
|
|
5502
|
+
break;
|
|
5503
|
+
}
|
|
5504
|
+
}
|
|
5505
|
+
}
|
|
5506
|
+
if (!pcmData) {
|
|
5507
|
+
throw new Error("No audio data in Gemini TTS response");
|
|
5508
|
+
}
|
|
5509
|
+
const audioData = wrapPcmInWav(pcmData, 24e3, 16, 1);
|
|
5510
|
+
const cost = calculateGeminiSpeechCost(options.model, options.input.length);
|
|
5511
|
+
return {
|
|
5512
|
+
audio: audioData,
|
|
5513
|
+
model: options.model,
|
|
5514
|
+
usage: {
|
|
5515
|
+
characterCount: options.input.length
|
|
5516
|
+
},
|
|
5517
|
+
cost,
|
|
5518
|
+
format: spec?.defaultFormat ?? "wav"
|
|
5519
|
+
};
|
|
5520
|
+
}
|
|
5004
5521
|
buildRequestPayload(options, descriptor, _spec, messages) {
|
|
5005
5522
|
const contents = this.convertMessagesToContents(messages);
|
|
5006
5523
|
const generationConfig = this.buildGenerationConfig(options);
|
|
@@ -5196,6 +5713,121 @@ var init_gemini = __esm({
|
|
|
5196
5713
|
}
|
|
5197
5714
|
});
|
|
5198
5715
|
|
|
5716
|
+
// src/providers/openai-image-models.ts
|
|
5717
|
+
function getOpenAIImageModelSpec(modelId) {
|
|
5718
|
+
return openaiImageModels.find((m) => m.modelId === modelId);
|
|
5719
|
+
}
|
|
5720
|
+
function isOpenAIImageModel(modelId) {
|
|
5721
|
+
return openaiImageModels.some((m) => m.modelId === modelId);
|
|
5722
|
+
}
|
|
5723
|
+
function calculateOpenAIImageCost(modelId, size, quality = "standard", n = 1) {
|
|
5724
|
+
const spec = getOpenAIImageModelSpec(modelId);
|
|
5725
|
+
if (!spec) return void 0;
|
|
5726
|
+
const sizePrice = spec.pricing.bySize?.[size];
|
|
5727
|
+
if (sizePrice === void 0) return void 0;
|
|
5728
|
+
let pricePerImage;
|
|
5729
|
+
if (typeof sizePrice === "number") {
|
|
5730
|
+
pricePerImage = sizePrice;
|
|
5731
|
+
} else {
|
|
5732
|
+
pricePerImage = sizePrice[quality];
|
|
5733
|
+
if (pricePerImage === void 0) return void 0;
|
|
5734
|
+
}
|
|
5735
|
+
return pricePerImage * n;
|
|
5736
|
+
}
|
|
5737
|
+
var GPT_IMAGE_SIZES, GPT_IMAGE_QUALITIES, DALLE3_SIZES, DALLE3_QUALITIES, DALLE2_SIZES, openaiImageModels;
|
|
5738
|
+
var init_openai_image_models = __esm({
|
|
5739
|
+
"src/providers/openai-image-models.ts"() {
|
|
5740
|
+
"use strict";
|
|
5741
|
+
GPT_IMAGE_SIZES = ["1024x1024", "1024x1536", "1536x1024"];
|
|
5742
|
+
GPT_IMAGE_QUALITIES = ["low", "medium", "high"];
|
|
5743
|
+
DALLE3_SIZES = ["1024x1024", "1024x1792", "1792x1024"];
|
|
5744
|
+
DALLE3_QUALITIES = ["standard", "hd"];
|
|
5745
|
+
DALLE2_SIZES = ["256x256", "512x512", "1024x1024"];
|
|
5746
|
+
openaiImageModels = [
|
|
5747
|
+
// GPT Image 1 Family (flagship)
|
|
5748
|
+
{
|
|
5749
|
+
provider: "openai",
|
|
5750
|
+
modelId: "gpt-image-1",
|
|
5751
|
+
displayName: "GPT Image 1",
|
|
5752
|
+
pricing: {
|
|
5753
|
+
bySize: {
|
|
5754
|
+
"1024x1024": { low: 0.011, medium: 0.04, high: 0.17 },
|
|
5755
|
+
"1024x1536": { low: 0.016, medium: 0.06, high: 0.25 },
|
|
5756
|
+
"1536x1024": { low: 0.016, medium: 0.06, high: 0.25 }
|
|
5757
|
+
}
|
|
5758
|
+
},
|
|
5759
|
+
supportedSizes: [...GPT_IMAGE_SIZES],
|
|
5760
|
+
supportedQualities: [...GPT_IMAGE_QUALITIES],
|
|
5761
|
+
maxImages: 1,
|
|
5762
|
+
defaultSize: "1024x1024",
|
|
5763
|
+
defaultQuality: "medium",
|
|
5764
|
+
features: {
|
|
5765
|
+
textRendering: true,
|
|
5766
|
+
transparency: true
|
|
5767
|
+
}
|
|
5768
|
+
},
|
|
5769
|
+
{
|
|
5770
|
+
provider: "openai",
|
|
5771
|
+
modelId: "gpt-image-1-mini",
|
|
5772
|
+
displayName: "GPT Image 1 Mini",
|
|
5773
|
+
pricing: {
|
|
5774
|
+
bySize: {
|
|
5775
|
+
"1024x1024": { low: 5e-3, medium: 0.02, high: 0.052 },
|
|
5776
|
+
"1024x1536": { low: 75e-4, medium: 0.03, high: 0.078 },
|
|
5777
|
+
"1536x1024": { low: 75e-4, medium: 0.03, high: 0.078 }
|
|
5778
|
+
}
|
|
5779
|
+
},
|
|
5780
|
+
supportedSizes: [...GPT_IMAGE_SIZES],
|
|
5781
|
+
supportedQualities: [...GPT_IMAGE_QUALITIES],
|
|
5782
|
+
maxImages: 1,
|
|
5783
|
+
defaultSize: "1024x1024",
|
|
5784
|
+
defaultQuality: "medium",
|
|
5785
|
+
features: {
|
|
5786
|
+
textRendering: true,
|
|
5787
|
+
transparency: true
|
|
5788
|
+
}
|
|
5789
|
+
},
|
|
5790
|
+
// DALL-E Family
|
|
5791
|
+
{
|
|
5792
|
+
provider: "openai",
|
|
5793
|
+
modelId: "dall-e-3",
|
|
5794
|
+
displayName: "DALL-E 3",
|
|
5795
|
+
pricing: {
|
|
5796
|
+
bySize: {
|
|
5797
|
+
"1024x1024": { standard: 0.04, hd: 0.08 },
|
|
5798
|
+
"1024x1792": { standard: 0.08, hd: 0.12 },
|
|
5799
|
+
"1792x1024": { standard: 0.08, hd: 0.12 }
|
|
5800
|
+
}
|
|
5801
|
+
},
|
|
5802
|
+
supportedSizes: [...DALLE3_SIZES],
|
|
5803
|
+
supportedQualities: [...DALLE3_QUALITIES],
|
|
5804
|
+
maxImages: 1,
|
|
5805
|
+
// DALL-E 3 only supports n=1
|
|
5806
|
+
defaultSize: "1024x1024",
|
|
5807
|
+
defaultQuality: "standard",
|
|
5808
|
+
features: {
|
|
5809
|
+
textRendering: true
|
|
5810
|
+
}
|
|
5811
|
+
},
|
|
5812
|
+
{
|
|
5813
|
+
provider: "openai",
|
|
5814
|
+
modelId: "dall-e-2",
|
|
5815
|
+
displayName: "DALL-E 2 (Legacy)",
|
|
5816
|
+
pricing: {
|
|
5817
|
+
bySize: {
|
|
5818
|
+
"256x256": 0.016,
|
|
5819
|
+
"512x512": 0.018,
|
|
5820
|
+
"1024x1024": 0.02
|
|
5821
|
+
}
|
|
5822
|
+
},
|
|
5823
|
+
supportedSizes: [...DALLE2_SIZES],
|
|
5824
|
+
maxImages: 10,
|
|
5825
|
+
defaultSize: "1024x1024"
|
|
5826
|
+
}
|
|
5827
|
+
];
|
|
5828
|
+
}
|
|
5829
|
+
});
|
|
5830
|
+
|
|
5199
5831
|
// src/providers/openai-models.ts
|
|
5200
5832
|
var OPENAI_MODELS;
|
|
5201
5833
|
var init_openai_models = __esm({
|
|
@@ -5560,6 +6192,144 @@ var init_openai_models = __esm({
|
|
|
5560
6192
|
}
|
|
5561
6193
|
});
|
|
5562
6194
|
|
|
6195
|
+
// src/providers/openai-speech-models.ts
|
|
6196
|
+
function getOpenAISpeechModelSpec(modelId) {
|
|
6197
|
+
return openaiSpeechModels.find((m) => m.modelId === modelId);
|
|
6198
|
+
}
|
|
6199
|
+
function isOpenAISpeechModel(modelId) {
|
|
6200
|
+
return openaiSpeechModels.some((m) => m.modelId === modelId);
|
|
6201
|
+
}
|
|
6202
|
+
function calculateOpenAISpeechCost(modelId, characterCount, estimatedMinutes) {
|
|
6203
|
+
const spec = getOpenAISpeechModelSpec(modelId);
|
|
6204
|
+
if (!spec) return void 0;
|
|
6205
|
+
if (spec.pricing.perCharacter !== void 0) {
|
|
6206
|
+
return characterCount * spec.pricing.perCharacter;
|
|
6207
|
+
}
|
|
6208
|
+
if (spec.pricing.perMinute !== void 0 && estimatedMinutes !== void 0) {
|
|
6209
|
+
return estimatedMinutes * spec.pricing.perMinute;
|
|
6210
|
+
}
|
|
6211
|
+
if (spec.pricing.perMinute !== void 0) {
|
|
6212
|
+
const approxMinutes = characterCount / 750;
|
|
6213
|
+
return approxMinutes * spec.pricing.perMinute;
|
|
6214
|
+
}
|
|
6215
|
+
return void 0;
|
|
6216
|
+
}
|
|
6217
|
+
var OPENAI_TTS_VOICES, OPENAI_TTS_EXTENDED_VOICES, OPENAI_TTS_FORMATS, openaiSpeechModels;
|
|
6218
|
+
var init_openai_speech_models = __esm({
|
|
6219
|
+
"src/providers/openai-speech-models.ts"() {
|
|
6220
|
+
"use strict";
|
|
6221
|
+
OPENAI_TTS_VOICES = [
|
|
6222
|
+
"alloy",
|
|
6223
|
+
"echo",
|
|
6224
|
+
"fable",
|
|
6225
|
+
"onyx",
|
|
6226
|
+
"nova",
|
|
6227
|
+
"shimmer"
|
|
6228
|
+
];
|
|
6229
|
+
OPENAI_TTS_EXTENDED_VOICES = [
|
|
6230
|
+
...OPENAI_TTS_VOICES,
|
|
6231
|
+
"ash",
|
|
6232
|
+
"ballad",
|
|
6233
|
+
"coral",
|
|
6234
|
+
"sage",
|
|
6235
|
+
"verse"
|
|
6236
|
+
];
|
|
6237
|
+
OPENAI_TTS_FORMATS = ["mp3", "opus", "aac", "flac", "wav", "pcm"];
|
|
6238
|
+
openaiSpeechModels = [
|
|
6239
|
+
// Standard TTS models (character-based pricing)
|
|
6240
|
+
{
|
|
6241
|
+
provider: "openai",
|
|
6242
|
+
modelId: "tts-1",
|
|
6243
|
+
displayName: "TTS-1",
|
|
6244
|
+
pricing: {
|
|
6245
|
+
// $15 per 1M characters = $0.000015 per character
|
|
6246
|
+
perCharacter: 15e-6
|
|
6247
|
+
},
|
|
6248
|
+
voices: [...OPENAI_TTS_VOICES],
|
|
6249
|
+
formats: OPENAI_TTS_FORMATS,
|
|
6250
|
+
maxInputLength: 4096,
|
|
6251
|
+
defaultVoice: "alloy",
|
|
6252
|
+
defaultFormat: "mp3",
|
|
6253
|
+
features: {
|
|
6254
|
+
voiceInstructions: false
|
|
6255
|
+
}
|
|
6256
|
+
},
|
|
6257
|
+
{
|
|
6258
|
+
provider: "openai",
|
|
6259
|
+
modelId: "tts-1-1106",
|
|
6260
|
+
displayName: "TTS-1 (Nov 2023)",
|
|
6261
|
+
pricing: {
|
|
6262
|
+
perCharacter: 15e-6
|
|
6263
|
+
},
|
|
6264
|
+
voices: [...OPENAI_TTS_VOICES],
|
|
6265
|
+
formats: OPENAI_TTS_FORMATS,
|
|
6266
|
+
maxInputLength: 4096,
|
|
6267
|
+
defaultVoice: "alloy",
|
|
6268
|
+
defaultFormat: "mp3",
|
|
6269
|
+
features: {
|
|
6270
|
+
voiceInstructions: false
|
|
6271
|
+
}
|
|
6272
|
+
},
|
|
6273
|
+
{
|
|
6274
|
+
provider: "openai",
|
|
6275
|
+
modelId: "tts-1-hd",
|
|
6276
|
+
displayName: "TTS-1 HD",
|
|
6277
|
+
pricing: {
|
|
6278
|
+
// $30 per 1M characters = $0.00003 per character
|
|
6279
|
+
perCharacter: 3e-5
|
|
6280
|
+
},
|
|
6281
|
+
voices: [...OPENAI_TTS_VOICES],
|
|
6282
|
+
formats: OPENAI_TTS_FORMATS,
|
|
6283
|
+
maxInputLength: 4096,
|
|
6284
|
+
defaultVoice: "alloy",
|
|
6285
|
+
defaultFormat: "mp3",
|
|
6286
|
+
features: {
|
|
6287
|
+
voiceInstructions: false
|
|
6288
|
+
}
|
|
6289
|
+
},
|
|
6290
|
+
{
|
|
6291
|
+
provider: "openai",
|
|
6292
|
+
modelId: "tts-1-hd-1106",
|
|
6293
|
+
displayName: "TTS-1 HD (Nov 2023)",
|
|
6294
|
+
pricing: {
|
|
6295
|
+
perCharacter: 3e-5
|
|
6296
|
+
},
|
|
6297
|
+
voices: [...OPENAI_TTS_VOICES],
|
|
6298
|
+
formats: OPENAI_TTS_FORMATS,
|
|
6299
|
+
maxInputLength: 4096,
|
|
6300
|
+
defaultVoice: "alloy",
|
|
6301
|
+
defaultFormat: "mp3",
|
|
6302
|
+
features: {
|
|
6303
|
+
voiceInstructions: false
|
|
6304
|
+
}
|
|
6305
|
+
},
|
|
6306
|
+
// Token-based TTS model with voice instructions support
|
|
6307
|
+
{
|
|
6308
|
+
provider: "openai",
|
|
6309
|
+
modelId: "gpt-4o-mini-tts",
|
|
6310
|
+
displayName: "GPT-4o Mini TTS",
|
|
6311
|
+
pricing: {
|
|
6312
|
+
// $0.60 per 1M input tokens = $0.0000006 per token
|
|
6313
|
+
perInputToken: 6e-7,
|
|
6314
|
+
// $12 per 1M audio output tokens = $0.000012 per token
|
|
6315
|
+
perAudioOutputToken: 12e-6,
|
|
6316
|
+
// ~$0.015 per minute of audio
|
|
6317
|
+
perMinute: 0.015
|
|
6318
|
+
},
|
|
6319
|
+
voices: [...OPENAI_TTS_EXTENDED_VOICES],
|
|
6320
|
+
formats: OPENAI_TTS_FORMATS,
|
|
6321
|
+
maxInputLength: 2e3,
|
|
6322
|
+
// tokens, not characters
|
|
6323
|
+
defaultVoice: "alloy",
|
|
6324
|
+
defaultFormat: "mp3",
|
|
6325
|
+
features: {
|
|
6326
|
+
voiceInstructions: true
|
|
6327
|
+
}
|
|
6328
|
+
}
|
|
6329
|
+
];
|
|
6330
|
+
}
|
|
6331
|
+
});
|
|
6332
|
+
|
|
5563
6333
|
// src/providers/openai.ts
|
|
5564
6334
|
function sanitizeExtra(extra, allowTemperature) {
|
|
5565
6335
|
if (!extra) {
|
|
@@ -5581,7 +6351,9 @@ var init_openai = __esm({
|
|
|
5581
6351
|
import_tiktoken = require("tiktoken");
|
|
5582
6352
|
init_base_provider();
|
|
5583
6353
|
init_constants2();
|
|
6354
|
+
init_openai_image_models();
|
|
5584
6355
|
init_openai_models();
|
|
6356
|
+
init_openai_speech_models();
|
|
5585
6357
|
init_utils();
|
|
5586
6358
|
ROLE_MAP = {
|
|
5587
6359
|
system: "system",
|
|
@@ -5596,6 +6368,87 @@ var init_openai = __esm({
|
|
|
5596
6368
|
getModelSpecs() {
|
|
5597
6369
|
return OPENAI_MODELS;
|
|
5598
6370
|
}
|
|
6371
|
+
// =========================================================================
|
|
6372
|
+
// Image Generation
|
|
6373
|
+
// =========================================================================
|
|
6374
|
+
getImageModelSpecs() {
|
|
6375
|
+
return openaiImageModels;
|
|
6376
|
+
}
|
|
6377
|
+
supportsImageGeneration(modelId) {
|
|
6378
|
+
return isOpenAIImageModel(modelId);
|
|
6379
|
+
}
|
|
6380
|
+
async generateImage(options) {
|
|
6381
|
+
const client = this.client;
|
|
6382
|
+
const spec = getOpenAIImageModelSpec(options.model);
|
|
6383
|
+
const size = options.size ?? spec?.defaultSize ?? "1024x1024";
|
|
6384
|
+
const quality = options.quality ?? spec?.defaultQuality ?? "standard";
|
|
6385
|
+
const n = options.n ?? 1;
|
|
6386
|
+
const isDallE2 = options.model === "dall-e-2";
|
|
6387
|
+
const isGptImage = options.model.startsWith("gpt-image");
|
|
6388
|
+
const requestParams = {
|
|
6389
|
+
model: options.model,
|
|
6390
|
+
prompt: options.prompt,
|
|
6391
|
+
size,
|
|
6392
|
+
n
|
|
6393
|
+
};
|
|
6394
|
+
if (!isDallE2 && !isGptImage) {
|
|
6395
|
+
requestParams.quality = quality;
|
|
6396
|
+
}
|
|
6397
|
+
if (isGptImage) {
|
|
6398
|
+
} else if (!isDallE2) {
|
|
6399
|
+
requestParams.response_format = options.responseFormat ?? "url";
|
|
6400
|
+
}
|
|
6401
|
+
const response = await client.images.generate(requestParams);
|
|
6402
|
+
const cost = calculateOpenAIImageCost(options.model, size, quality, n);
|
|
6403
|
+
const images = response.data ?? [];
|
|
6404
|
+
return {
|
|
6405
|
+
images: images.map((img) => ({
|
|
6406
|
+
url: img.url,
|
|
6407
|
+
b64Json: img.b64_json,
|
|
6408
|
+
revisedPrompt: img.revised_prompt
|
|
6409
|
+
})),
|
|
6410
|
+
model: options.model,
|
|
6411
|
+
usage: {
|
|
6412
|
+
imagesGenerated: images.length,
|
|
6413
|
+
size,
|
|
6414
|
+
quality
|
|
6415
|
+
},
|
|
6416
|
+
cost
|
|
6417
|
+
};
|
|
6418
|
+
}
|
|
6419
|
+
// =========================================================================
|
|
6420
|
+
// Speech Generation
|
|
6421
|
+
// =========================================================================
|
|
6422
|
+
getSpeechModelSpecs() {
|
|
6423
|
+
return openaiSpeechModels;
|
|
6424
|
+
}
|
|
6425
|
+
supportsSpeechGeneration(modelId) {
|
|
6426
|
+
return isOpenAISpeechModel(modelId);
|
|
6427
|
+
}
|
|
6428
|
+
async generateSpeech(options) {
|
|
6429
|
+
const client = this.client;
|
|
6430
|
+
const spec = getOpenAISpeechModelSpec(options.model);
|
|
6431
|
+
const format = options.responseFormat ?? spec?.defaultFormat ?? "mp3";
|
|
6432
|
+
const voice = options.voice ?? spec?.defaultVoice ?? "alloy";
|
|
6433
|
+
const response = await client.audio.speech.create({
|
|
6434
|
+
model: options.model,
|
|
6435
|
+
input: options.input,
|
|
6436
|
+
voice,
|
|
6437
|
+
response_format: format,
|
|
6438
|
+
speed: options.speed ?? 1
|
|
6439
|
+
});
|
|
6440
|
+
const audioBuffer = await response.arrayBuffer();
|
|
6441
|
+
const cost = calculateOpenAISpeechCost(options.model, options.input.length);
|
|
6442
|
+
return {
|
|
6443
|
+
audio: audioBuffer,
|
|
6444
|
+
model: options.model,
|
|
6445
|
+
usage: {
|
|
6446
|
+
characterCount: options.input.length
|
|
6447
|
+
},
|
|
6448
|
+
cost,
|
|
6449
|
+
format
|
|
6450
|
+
};
|
|
6451
|
+
}
|
|
5599
6452
|
buildRequestPayload(options, descriptor, spec, messages) {
|
|
5600
6453
|
const { maxTokens, temperature, topP, stopSequences, extra } = options;
|
|
5601
6454
|
const supportsTemperature = spec?.metadata?.supportsTemperature !== false;
|
|
@@ -5936,30 +6789,109 @@ var init_model_registry = __esm({
|
|
|
5936
6789
|
}
|
|
5937
6790
|
});
|
|
5938
6791
|
|
|
5939
|
-
// src/core/
|
|
5940
|
-
var
|
|
5941
|
-
var
|
|
5942
|
-
"src/core/
|
|
6792
|
+
// src/core/namespaces/image.ts
|
|
6793
|
+
var ImageNamespace;
|
|
6794
|
+
var init_image = __esm({
|
|
6795
|
+
"src/core/namespaces/image.ts"() {
|
|
5943
6796
|
"use strict";
|
|
5944
|
-
|
|
5945
|
-
constructor(defaultProvider
|
|
6797
|
+
ImageNamespace = class {
|
|
6798
|
+
constructor(adapters, defaultProvider) {
|
|
6799
|
+
this.adapters = adapters;
|
|
5946
6800
|
this.defaultProvider = defaultProvider;
|
|
5947
6801
|
}
|
|
5948
|
-
|
|
5949
|
-
|
|
5950
|
-
|
|
5951
|
-
|
|
6802
|
+
/**
|
|
6803
|
+
* Generate images from a text prompt.
|
|
6804
|
+
*
|
|
6805
|
+
* @param options - Image generation options
|
|
6806
|
+
* @returns Promise resolving to the generation result with images and cost
|
|
6807
|
+
* @throws Error if the provider doesn't support image generation
|
|
6808
|
+
*/
|
|
6809
|
+
async generate(options) {
|
|
6810
|
+
const modelId = options.model;
|
|
6811
|
+
const adapter = this.findImageAdapter(modelId);
|
|
6812
|
+
if (!adapter || !adapter.generateImage) {
|
|
6813
|
+
throw new Error(
|
|
6814
|
+
`No provider supports image generation for model "${modelId}". Available image models: ${this.listModels().map((m) => m.modelId).join(", ")}`
|
|
6815
|
+
);
|
|
5952
6816
|
}
|
|
5953
|
-
|
|
5954
|
-
|
|
5955
|
-
|
|
6817
|
+
return adapter.generateImage(options);
|
|
6818
|
+
}
|
|
6819
|
+
/**
|
|
6820
|
+
* List all available image generation models.
|
|
6821
|
+
*/
|
|
6822
|
+
listModels() {
|
|
6823
|
+
const models = [];
|
|
6824
|
+
for (const adapter of this.adapters) {
|
|
6825
|
+
if (adapter.getImageModelSpecs) {
|
|
6826
|
+
models.push(...adapter.getImageModelSpecs());
|
|
6827
|
+
}
|
|
5956
6828
|
}
|
|
5957
|
-
|
|
5958
|
-
|
|
5959
|
-
|
|
5960
|
-
|
|
6829
|
+
return models;
|
|
6830
|
+
}
|
|
6831
|
+
/**
|
|
6832
|
+
* Check if a model is supported for image generation.
|
|
6833
|
+
*/
|
|
6834
|
+
supportsModel(modelId) {
|
|
6835
|
+
return this.findImageAdapter(modelId) !== void 0;
|
|
6836
|
+
}
|
|
6837
|
+
findImageAdapter(modelId) {
|
|
6838
|
+
return this.adapters.find(
|
|
6839
|
+
(adapter) => adapter.supportsImageGeneration?.(modelId) ?? false
|
|
6840
|
+
);
|
|
6841
|
+
}
|
|
6842
|
+
};
|
|
6843
|
+
}
|
|
6844
|
+
});
|
|
6845
|
+
|
|
6846
|
+
// src/core/namespaces/speech.ts
|
|
6847
|
+
var SpeechNamespace;
|
|
6848
|
+
var init_speech = __esm({
|
|
6849
|
+
"src/core/namespaces/speech.ts"() {
|
|
6850
|
+
"use strict";
|
|
6851
|
+
SpeechNamespace = class {
|
|
6852
|
+
constructor(adapters, defaultProvider) {
|
|
6853
|
+
this.adapters = adapters;
|
|
6854
|
+
this.defaultProvider = defaultProvider;
|
|
6855
|
+
}
|
|
6856
|
+
/**
|
|
6857
|
+
* Generate speech audio from text.
|
|
6858
|
+
*
|
|
6859
|
+
* @param options - Speech generation options
|
|
6860
|
+
* @returns Promise resolving to the generation result with audio and cost
|
|
6861
|
+
* @throws Error if the provider doesn't support speech generation
|
|
6862
|
+
*/
|
|
6863
|
+
async generate(options) {
|
|
6864
|
+
const modelId = options.model;
|
|
6865
|
+
const adapter = this.findSpeechAdapter(modelId);
|
|
6866
|
+
if (!adapter || !adapter.generateSpeech) {
|
|
6867
|
+
throw new Error(
|
|
6868
|
+
`No provider supports speech generation for model "${modelId}". Available speech models: ${this.listModels().map((m) => m.modelId).join(", ")}`
|
|
6869
|
+
);
|
|
5961
6870
|
}
|
|
5962
|
-
return
|
|
6871
|
+
return adapter.generateSpeech(options);
|
|
6872
|
+
}
|
|
6873
|
+
/**
|
|
6874
|
+
* List all available speech generation models.
|
|
6875
|
+
*/
|
|
6876
|
+
listModels() {
|
|
6877
|
+
const models = [];
|
|
6878
|
+
for (const adapter of this.adapters) {
|
|
6879
|
+
if (adapter.getSpeechModelSpecs) {
|
|
6880
|
+
models.push(...adapter.getSpeechModelSpecs());
|
|
6881
|
+
}
|
|
6882
|
+
}
|
|
6883
|
+
return models;
|
|
6884
|
+
}
|
|
6885
|
+
/**
|
|
6886
|
+
* Check if a model is supported for speech generation.
|
|
6887
|
+
*/
|
|
6888
|
+
supportsModel(modelId) {
|
|
6889
|
+
return this.findSpeechAdapter(modelId) !== void 0;
|
|
6890
|
+
}
|
|
6891
|
+
findSpeechAdapter(modelId) {
|
|
6892
|
+
return this.adapters.find(
|
|
6893
|
+
(adapter) => adapter.supportsSpeechGeneration?.(modelId) ?? false
|
|
6894
|
+
);
|
|
5963
6895
|
}
|
|
5964
6896
|
};
|
|
5965
6897
|
}
|
|
@@ -6008,6 +6940,69 @@ var init_quick_methods = __esm({
|
|
|
6008
6940
|
}
|
|
6009
6941
|
});
|
|
6010
6942
|
|
|
6943
|
+
// src/core/namespaces/text.ts
|
|
6944
|
+
var TextNamespace;
|
|
6945
|
+
var init_text = __esm({
|
|
6946
|
+
"src/core/namespaces/text.ts"() {
|
|
6947
|
+
"use strict";
|
|
6948
|
+
init_quick_methods();
|
|
6949
|
+
TextNamespace = class {
|
|
6950
|
+
constructor(client) {
|
|
6951
|
+
this.client = client;
|
|
6952
|
+
}
|
|
6953
|
+
/**
|
|
6954
|
+
* Generate a complete text response.
|
|
6955
|
+
*
|
|
6956
|
+
* @param prompt - User prompt
|
|
6957
|
+
* @param options - Optional configuration
|
|
6958
|
+
* @returns Complete text response
|
|
6959
|
+
*/
|
|
6960
|
+
async complete(prompt, options) {
|
|
6961
|
+
return complete(this.client, prompt, options);
|
|
6962
|
+
}
|
|
6963
|
+
/**
|
|
6964
|
+
* Stream text chunks.
|
|
6965
|
+
*
|
|
6966
|
+
* @param prompt - User prompt
|
|
6967
|
+
* @param options - Optional configuration
|
|
6968
|
+
* @returns Async generator yielding text chunks
|
|
6969
|
+
*/
|
|
6970
|
+
stream(prompt, options) {
|
|
6971
|
+
return stream(this.client, prompt, options);
|
|
6972
|
+
}
|
|
6973
|
+
};
|
|
6974
|
+
}
|
|
6975
|
+
});
|
|
6976
|
+
|
|
6977
|
+
// src/core/options.ts
|
|
6978
|
+
var ModelIdentifierParser;
|
|
6979
|
+
var init_options = __esm({
|
|
6980
|
+
"src/core/options.ts"() {
|
|
6981
|
+
"use strict";
|
|
6982
|
+
ModelIdentifierParser = class {
|
|
6983
|
+
constructor(defaultProvider = "openai") {
|
|
6984
|
+
this.defaultProvider = defaultProvider;
|
|
6985
|
+
}
|
|
6986
|
+
parse(identifier) {
|
|
6987
|
+
const trimmed = identifier.trim();
|
|
6988
|
+
if (!trimmed) {
|
|
6989
|
+
throw new Error("Model identifier cannot be empty");
|
|
6990
|
+
}
|
|
6991
|
+
const [maybeProvider, ...rest] = trimmed.split(":");
|
|
6992
|
+
if (rest.length === 0) {
|
|
6993
|
+
return { provider: this.defaultProvider, name: maybeProvider };
|
|
6994
|
+
}
|
|
6995
|
+
const provider = maybeProvider;
|
|
6996
|
+
const name = rest.join(":");
|
|
6997
|
+
if (!name) {
|
|
6998
|
+
throw new Error("Model name cannot be empty");
|
|
6999
|
+
}
|
|
7000
|
+
return { provider, name };
|
|
7001
|
+
}
|
|
7002
|
+
};
|
|
7003
|
+
}
|
|
7004
|
+
});
|
|
7005
|
+
|
|
6011
7006
|
// src/core/client.ts
|
|
6012
7007
|
var client_exports = {};
|
|
6013
7008
|
__export(client_exports, {
|
|
@@ -6020,12 +7015,20 @@ var init_client = __esm({
|
|
|
6020
7015
|
init_builder();
|
|
6021
7016
|
init_discovery();
|
|
6022
7017
|
init_model_registry();
|
|
7018
|
+
init_image();
|
|
7019
|
+
init_speech();
|
|
7020
|
+
init_text();
|
|
6023
7021
|
init_options();
|
|
6024
7022
|
init_quick_methods();
|
|
6025
7023
|
LLMist = class _LLMist {
|
|
6026
7024
|
parser;
|
|
7025
|
+
defaultProvider;
|
|
6027
7026
|
modelRegistry;
|
|
6028
7027
|
adapters;
|
|
7028
|
+
// Namespaces for different generation types
|
|
7029
|
+
text;
|
|
7030
|
+
image;
|
|
7031
|
+
speech;
|
|
6029
7032
|
constructor(...args) {
|
|
6030
7033
|
let adapters = [];
|
|
6031
7034
|
let defaultProvider;
|
|
@@ -6064,6 +7067,7 @@ var init_client = __esm({
|
|
|
6064
7067
|
const priorityB = b.priority ?? 0;
|
|
6065
7068
|
return priorityB - priorityA;
|
|
6066
7069
|
});
|
|
7070
|
+
this.defaultProvider = resolvedDefaultProvider;
|
|
6067
7071
|
this.parser = new ModelIdentifierParser(resolvedDefaultProvider);
|
|
6068
7072
|
this.modelRegistry = new ModelRegistry();
|
|
6069
7073
|
for (const adapter of this.adapters) {
|
|
@@ -6072,6 +7076,9 @@ var init_client = __esm({
|
|
|
6072
7076
|
if (customModels.length > 0) {
|
|
6073
7077
|
this.modelRegistry.registerModels(customModels);
|
|
6074
7078
|
}
|
|
7079
|
+
this.text = new TextNamespace(this);
|
|
7080
|
+
this.image = new ImageNamespace(this.adapters, this.defaultProvider);
|
|
7081
|
+
this.speech = new SpeechNamespace(this.adapters, this.defaultProvider);
|
|
6075
7082
|
}
|
|
6076
7083
|
stream(options) {
|
|
6077
7084
|
const descriptor = this.parser.parse(options.model);
|