llmist 2.3.0 → 2.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-GANXNBIZ.js → chunk-6ZDUWO6N.js} +1029 -22
- package/dist/chunk-6ZDUWO6N.js.map +1 -0
- package/dist/{chunk-ZDNV7DDO.js → chunk-QFRVTS5F.js} +2 -2
- package/dist/cli.cjs +1497 -45
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.js +473 -28
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +1025 -18
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +18 -2
- package/dist/index.d.ts +18 -2
- package/dist/index.js +2 -2
- package/dist/{mock-stream-wRfUqXx4.d.cts → mock-stream-BQcC2VCP.d.cts} +408 -1
- package/dist/{mock-stream-wRfUqXx4.d.ts → mock-stream-BQcC2VCP.d.ts} +408 -1
- package/dist/testing/index.cjs +1025 -18
- package/dist/testing/index.cjs.map +1 -1
- package/dist/testing/index.d.cts +2 -2
- package/dist/testing/index.d.ts +2 -2
- package/dist/testing/index.js +1 -1
- package/package.json +1 -1
- package/dist/chunk-GANXNBIZ.js.map +0 -1
- /package/dist/{chunk-ZDNV7DDO.js.map → chunk-QFRVTS5F.js.map} +0 -0
package/dist/testing/index.cjs
CHANGED
|
@@ -2497,7 +2497,27 @@ var init_cost_reporting_client = __esm({
|
|
|
2497
2497
|
constructor(client, reportCost) {
|
|
2498
2498
|
this.client = client;
|
|
2499
2499
|
this.reportCost = reportCost;
|
|
2500
|
+
this.image = {
|
|
2501
|
+
generate: async (options) => {
|
|
2502
|
+
const result = await this.client.image.generate(options);
|
|
2503
|
+
if (result.cost !== void 0 && result.cost > 0) {
|
|
2504
|
+
this.reportCost(result.cost);
|
|
2505
|
+
}
|
|
2506
|
+
return result;
|
|
2507
|
+
}
|
|
2508
|
+
};
|
|
2509
|
+
this.speech = {
|
|
2510
|
+
generate: async (options) => {
|
|
2511
|
+
const result = await this.client.speech.generate(options);
|
|
2512
|
+
if (result.cost !== void 0 && result.cost > 0) {
|
|
2513
|
+
this.reportCost(result.cost);
|
|
2514
|
+
}
|
|
2515
|
+
return result;
|
|
2516
|
+
}
|
|
2517
|
+
};
|
|
2500
2518
|
}
|
|
2519
|
+
image;
|
|
2520
|
+
speech;
|
|
2501
2521
|
/**
|
|
2502
2522
|
* Access to model registry for cost estimation.
|
|
2503
2523
|
*/
|
|
@@ -5385,6 +5405,28 @@ var init_anthropic = __esm({
|
|
|
5385
5405
|
getModelSpecs() {
|
|
5386
5406
|
return ANTHROPIC_MODELS;
|
|
5387
5407
|
}
|
|
5408
|
+
// =========================================================================
|
|
5409
|
+
// Image Generation (Not Supported)
|
|
5410
|
+
// =========================================================================
|
|
5411
|
+
supportsImageGeneration(_modelId) {
|
|
5412
|
+
return false;
|
|
5413
|
+
}
|
|
5414
|
+
async generateImage() {
|
|
5415
|
+
throw new Error(
|
|
5416
|
+
"Anthropic does not support image generation. Use OpenAI (DALL-E, GPT Image) or Google Gemini (Imagen) instead."
|
|
5417
|
+
);
|
|
5418
|
+
}
|
|
5419
|
+
// =========================================================================
|
|
5420
|
+
// Speech Generation (Not Supported)
|
|
5421
|
+
// =========================================================================
|
|
5422
|
+
supportsSpeechGeneration(_modelId) {
|
|
5423
|
+
return false;
|
|
5424
|
+
}
|
|
5425
|
+
async generateSpeech() {
|
|
5426
|
+
throw new Error(
|
|
5427
|
+
"Anthropic does not support speech generation. Use OpenAI (TTS) or Google Gemini (TTS) instead."
|
|
5428
|
+
);
|
|
5429
|
+
}
|
|
5388
5430
|
buildRequestPayload(options, descriptor, spec, messages) {
|
|
5389
5431
|
const systemMessages = messages.filter((message) => message.role === "system");
|
|
5390
5432
|
const system = systemMessages.length > 0 ? systemMessages.map((m, index) => ({
|
|
@@ -5539,6 +5581,182 @@ var init_anthropic = __esm({
|
|
|
5539
5581
|
}
|
|
5540
5582
|
});
|
|
5541
5583
|
|
|
5584
|
+
// src/providers/gemini-image-models.ts
|
|
5585
|
+
function getGeminiImageModelSpec(modelId) {
|
|
5586
|
+
return geminiImageModels.find((m) => m.modelId === modelId);
|
|
5587
|
+
}
|
|
5588
|
+
function isGeminiImageModel(modelId) {
|
|
5589
|
+
return geminiImageModels.some((m) => m.modelId === modelId);
|
|
5590
|
+
}
|
|
5591
|
+
function calculateGeminiImageCost(modelId, size = "1:1", n = 1) {
|
|
5592
|
+
const spec = getGeminiImageModelSpec(modelId);
|
|
5593
|
+
if (!spec) return void 0;
|
|
5594
|
+
if (spec.pricing.perImage !== void 0) {
|
|
5595
|
+
return spec.pricing.perImage * n;
|
|
5596
|
+
}
|
|
5597
|
+
if (spec.pricing.bySize) {
|
|
5598
|
+
const sizePrice = spec.pricing.bySize[size];
|
|
5599
|
+
if (typeof sizePrice === "number") {
|
|
5600
|
+
return sizePrice * n;
|
|
5601
|
+
}
|
|
5602
|
+
}
|
|
5603
|
+
return void 0;
|
|
5604
|
+
}
|
|
5605
|
+
var IMAGEN4_ASPECT_RATIOS, GEMINI_IMAGE_ASPECT_RATIOS, geminiImageModels;
|
|
5606
|
+
var init_gemini_image_models = __esm({
|
|
5607
|
+
"src/providers/gemini-image-models.ts"() {
|
|
5608
|
+
"use strict";
|
|
5609
|
+
IMAGEN4_ASPECT_RATIOS = ["1:1", "3:4", "4:3", "9:16", "16:9"];
|
|
5610
|
+
GEMINI_IMAGE_ASPECT_RATIOS = ["1:1", "3:4", "4:3", "9:16", "16:9"];
|
|
5611
|
+
geminiImageModels = [
|
|
5612
|
+
// Imagen 4 Family (standalone image generation)
|
|
5613
|
+
{
|
|
5614
|
+
provider: "gemini",
|
|
5615
|
+
modelId: "imagen-4.0-fast-generate-001",
|
|
5616
|
+
displayName: "Imagen 4 Fast",
|
|
5617
|
+
pricing: {
|
|
5618
|
+
perImage: 0.02
|
|
5619
|
+
},
|
|
5620
|
+
supportedSizes: [...IMAGEN4_ASPECT_RATIOS],
|
|
5621
|
+
maxImages: 4,
|
|
5622
|
+
defaultSize: "1:1",
|
|
5623
|
+
features: {
|
|
5624
|
+
textRendering: true
|
|
5625
|
+
}
|
|
5626
|
+
},
|
|
5627
|
+
{
|
|
5628
|
+
provider: "gemini",
|
|
5629
|
+
modelId: "imagen-4.0-generate-001",
|
|
5630
|
+
displayName: "Imagen 4",
|
|
5631
|
+
pricing: {
|
|
5632
|
+
perImage: 0.04
|
|
5633
|
+
},
|
|
5634
|
+
supportedSizes: [...IMAGEN4_ASPECT_RATIOS],
|
|
5635
|
+
maxImages: 4,
|
|
5636
|
+
defaultSize: "1:1",
|
|
5637
|
+
features: {
|
|
5638
|
+
textRendering: true
|
|
5639
|
+
}
|
|
5640
|
+
},
|
|
5641
|
+
{
|
|
5642
|
+
provider: "gemini",
|
|
5643
|
+
modelId: "imagen-4.0-ultra-generate-001",
|
|
5644
|
+
displayName: "Imagen 4 Ultra",
|
|
5645
|
+
pricing: {
|
|
5646
|
+
perImage: 0.06
|
|
5647
|
+
},
|
|
5648
|
+
supportedSizes: [...IMAGEN4_ASPECT_RATIOS],
|
|
5649
|
+
maxImages: 4,
|
|
5650
|
+
defaultSize: "1:1",
|
|
5651
|
+
features: {
|
|
5652
|
+
textRendering: true
|
|
5653
|
+
}
|
|
5654
|
+
},
|
|
5655
|
+
// Preview versions
|
|
5656
|
+
{
|
|
5657
|
+
provider: "gemini",
|
|
5658
|
+
modelId: "imagen-4.0-generate-preview-06-06",
|
|
5659
|
+
displayName: "Imagen 4 (Preview)",
|
|
5660
|
+
pricing: {
|
|
5661
|
+
perImage: 0.04
|
|
5662
|
+
},
|
|
5663
|
+
supportedSizes: [...IMAGEN4_ASPECT_RATIOS],
|
|
5664
|
+
maxImages: 4,
|
|
5665
|
+
defaultSize: "1:1",
|
|
5666
|
+
features: {
|
|
5667
|
+
textRendering: true
|
|
5668
|
+
}
|
|
5669
|
+
},
|
|
5670
|
+
{
|
|
5671
|
+
provider: "gemini",
|
|
5672
|
+
modelId: "imagen-4.0-ultra-generate-preview-06-06",
|
|
5673
|
+
displayName: "Imagen 4 Ultra (Preview)",
|
|
5674
|
+
pricing: {
|
|
5675
|
+
perImage: 0.06
|
|
5676
|
+
},
|
|
5677
|
+
supportedSizes: [...IMAGEN4_ASPECT_RATIOS],
|
|
5678
|
+
maxImages: 4,
|
|
5679
|
+
defaultSize: "1:1",
|
|
5680
|
+
features: {
|
|
5681
|
+
textRendering: true
|
|
5682
|
+
}
|
|
5683
|
+
},
|
|
5684
|
+
// Gemini Native Image Generation (multimodal models)
|
|
5685
|
+
{
|
|
5686
|
+
provider: "gemini",
|
|
5687
|
+
modelId: "gemini-2.5-flash-image",
|
|
5688
|
+
displayName: "Gemini 2.5 Flash Image",
|
|
5689
|
+
pricing: {
|
|
5690
|
+
perImage: 0.039
|
|
5691
|
+
},
|
|
5692
|
+
supportedSizes: [...GEMINI_IMAGE_ASPECT_RATIOS],
|
|
5693
|
+
maxImages: 1,
|
|
5694
|
+
defaultSize: "1:1",
|
|
5695
|
+
features: {
|
|
5696
|
+
conversational: true,
|
|
5697
|
+
textRendering: true
|
|
5698
|
+
}
|
|
5699
|
+
},
|
|
5700
|
+
{
|
|
5701
|
+
provider: "gemini",
|
|
5702
|
+
modelId: "gemini-2.5-flash-image-preview",
|
|
5703
|
+
displayName: "Gemini 2.5 Flash Image (Preview)",
|
|
5704
|
+
pricing: {
|
|
5705
|
+
perImage: 0.039
|
|
5706
|
+
},
|
|
5707
|
+
supportedSizes: [...GEMINI_IMAGE_ASPECT_RATIOS],
|
|
5708
|
+
maxImages: 1,
|
|
5709
|
+
defaultSize: "1:1",
|
|
5710
|
+
features: {
|
|
5711
|
+
conversational: true,
|
|
5712
|
+
textRendering: true
|
|
5713
|
+
}
|
|
5714
|
+
},
|
|
5715
|
+
{
|
|
5716
|
+
provider: "gemini",
|
|
5717
|
+
modelId: "gemini-3-pro-image-preview",
|
|
5718
|
+
displayName: "Gemini 3 Pro Image (Preview)",
|
|
5719
|
+
pricing: {
|
|
5720
|
+
// Token-based: ~$0.134 per 1K/2K image, $0.24 per 4K
|
|
5721
|
+
// Using 2K as default
|
|
5722
|
+
bySize: {
|
|
5723
|
+
"1K": 0.134,
|
|
5724
|
+
"2K": 0.134,
|
|
5725
|
+
"4K": 0.24
|
|
5726
|
+
}
|
|
5727
|
+
},
|
|
5728
|
+
supportedSizes: ["1K", "2K", "4K"],
|
|
5729
|
+
maxImages: 1,
|
|
5730
|
+
defaultSize: "2K",
|
|
5731
|
+
features: {
|
|
5732
|
+
conversational: true,
|
|
5733
|
+
textRendering: true
|
|
5734
|
+
}
|
|
5735
|
+
},
|
|
5736
|
+
// Alias: nano-banana-pro-preview is gemini-3-pro-image-preview
|
|
5737
|
+
{
|
|
5738
|
+
provider: "gemini",
|
|
5739
|
+
modelId: "nano-banana-pro-preview",
|
|
5740
|
+
displayName: "Nano Banana Pro (Gemini 3 Pro Image)",
|
|
5741
|
+
pricing: {
|
|
5742
|
+
bySize: {
|
|
5743
|
+
"1K": 0.134,
|
|
5744
|
+
"2K": 0.134,
|
|
5745
|
+
"4K": 0.24
|
|
5746
|
+
}
|
|
5747
|
+
},
|
|
5748
|
+
supportedSizes: ["1K", "2K", "4K"],
|
|
5749
|
+
maxImages: 1,
|
|
5750
|
+
defaultSize: "2K",
|
|
5751
|
+
features: {
|
|
5752
|
+
conversational: true,
|
|
5753
|
+
textRendering: true
|
|
5754
|
+
}
|
|
5755
|
+
}
|
|
5756
|
+
];
|
|
5757
|
+
}
|
|
5758
|
+
});
|
|
5759
|
+
|
|
5542
5760
|
// src/providers/gemini-models.ts
|
|
5543
5761
|
var GEMINI_MODELS;
|
|
5544
5762
|
var init_gemini_models = __esm({
|
|
@@ -5712,7 +5930,171 @@ var init_gemini_models = __esm({
|
|
|
5712
5930
|
}
|
|
5713
5931
|
});
|
|
5714
5932
|
|
|
5933
|
+
// src/providers/gemini-speech-models.ts
|
|
5934
|
+
function getGeminiSpeechModelSpec(modelId) {
|
|
5935
|
+
return geminiSpeechModels.find((m) => m.modelId === modelId);
|
|
5936
|
+
}
|
|
5937
|
+
function isGeminiSpeechModel(modelId) {
|
|
5938
|
+
return geminiSpeechModels.some((m) => m.modelId === modelId);
|
|
5939
|
+
}
|
|
5940
|
+
function calculateGeminiSpeechCost(modelId, characterCount, estimatedMinutes) {
|
|
5941
|
+
const spec = getGeminiSpeechModelSpec(modelId);
|
|
5942
|
+
if (!spec) return void 0;
|
|
5943
|
+
if (spec.pricing.perMinute !== void 0) {
|
|
5944
|
+
if (estimatedMinutes !== void 0) {
|
|
5945
|
+
return estimatedMinutes * spec.pricing.perMinute;
|
|
5946
|
+
}
|
|
5947
|
+
const approxMinutes = characterCount / 750;
|
|
5948
|
+
return approxMinutes * spec.pricing.perMinute;
|
|
5949
|
+
}
|
|
5950
|
+
return void 0;
|
|
5951
|
+
}
|
|
5952
|
+
var GEMINI_TTS_VOICES, GEMINI_TTS_FORMATS, geminiSpeechModels;
|
|
5953
|
+
var init_gemini_speech_models = __esm({
|
|
5954
|
+
"src/providers/gemini-speech-models.ts"() {
|
|
5955
|
+
"use strict";
|
|
5956
|
+
GEMINI_TTS_VOICES = [
|
|
5957
|
+
"Zephyr",
|
|
5958
|
+
// Bright
|
|
5959
|
+
"Puck",
|
|
5960
|
+
// Upbeat
|
|
5961
|
+
"Charon",
|
|
5962
|
+
// Informative
|
|
5963
|
+
"Kore",
|
|
5964
|
+
// Firm
|
|
5965
|
+
"Fenrir",
|
|
5966
|
+
// Excitable
|
|
5967
|
+
"Leda",
|
|
5968
|
+
// Youthful
|
|
5969
|
+
"Orus",
|
|
5970
|
+
// Firm
|
|
5971
|
+
"Aoede",
|
|
5972
|
+
// Breezy
|
|
5973
|
+
"Callirrhoe",
|
|
5974
|
+
// Easy-going
|
|
5975
|
+
"Autonoe",
|
|
5976
|
+
// Bright
|
|
5977
|
+
"Enceladus",
|
|
5978
|
+
// Breathy
|
|
5979
|
+
"Iapetus",
|
|
5980
|
+
// Clear
|
|
5981
|
+
"Umbriel",
|
|
5982
|
+
// Easy-going
|
|
5983
|
+
"Algieba",
|
|
5984
|
+
// Smooth
|
|
5985
|
+
"Despina",
|
|
5986
|
+
// Smooth
|
|
5987
|
+
"Erinome",
|
|
5988
|
+
// Clear
|
|
5989
|
+
"Algenib",
|
|
5990
|
+
// Gravelly
|
|
5991
|
+
"Rasalgethi",
|
|
5992
|
+
// Informative
|
|
5993
|
+
"Laomedeia",
|
|
5994
|
+
// Upbeat
|
|
5995
|
+
"Achernar",
|
|
5996
|
+
// Soft
|
|
5997
|
+
"Alnilam",
|
|
5998
|
+
// Firm
|
|
5999
|
+
"Schedar",
|
|
6000
|
+
// Even
|
|
6001
|
+
"Gacrux",
|
|
6002
|
+
// Mature
|
|
6003
|
+
"Pulcherrima",
|
|
6004
|
+
// Forward
|
|
6005
|
+
"Achird",
|
|
6006
|
+
// Friendly
|
|
6007
|
+
"Zubenelgenubi",
|
|
6008
|
+
// Casual
|
|
6009
|
+
"Vindemiatrix",
|
|
6010
|
+
// Gentle
|
|
6011
|
+
"Sadachbia",
|
|
6012
|
+
// Lively
|
|
6013
|
+
"Sadaltager",
|
|
6014
|
+
// Knowledgeable
|
|
6015
|
+
"Sulafat"
|
|
6016
|
+
// Warm
|
|
6017
|
+
];
|
|
6018
|
+
GEMINI_TTS_FORMATS = ["pcm", "wav"];
|
|
6019
|
+
geminiSpeechModels = [
|
|
6020
|
+
{
|
|
6021
|
+
provider: "gemini",
|
|
6022
|
+
modelId: "gemini-2.5-flash-preview-tts",
|
|
6023
|
+
displayName: "Gemini 2.5 Flash TTS (Preview)",
|
|
6024
|
+
pricing: {
|
|
6025
|
+
// $0.50 per 1M input tokens = $0.0000005 per token
|
|
6026
|
+
perInputToken: 5e-7,
|
|
6027
|
+
// $10.00 per 1M audio output tokens = $0.00001 per token
|
|
6028
|
+
perAudioOutputToken: 1e-5,
|
|
6029
|
+
// Rough estimate: ~$0.01 per minute of audio
|
|
6030
|
+
perMinute: 0.01
|
|
6031
|
+
},
|
|
6032
|
+
voices: [...GEMINI_TTS_VOICES],
|
|
6033
|
+
formats: GEMINI_TTS_FORMATS,
|
|
6034
|
+
maxInputLength: 8e3,
|
|
6035
|
+
// bytes (text + prompt combined)
|
|
6036
|
+
defaultVoice: "Zephyr",
|
|
6037
|
+
defaultFormat: "wav",
|
|
6038
|
+
features: {
|
|
6039
|
+
multiSpeaker: true,
|
|
6040
|
+
languages: 24,
|
|
6041
|
+
voiceInstructions: true
|
|
6042
|
+
}
|
|
6043
|
+
},
|
|
6044
|
+
{
|
|
6045
|
+
provider: "gemini",
|
|
6046
|
+
modelId: "gemini-2.5-pro-preview-tts",
|
|
6047
|
+
displayName: "Gemini 2.5 Pro TTS (Preview)",
|
|
6048
|
+
pricing: {
|
|
6049
|
+
// $1.00 per 1M input tokens = $0.000001 per token
|
|
6050
|
+
perInputToken: 1e-6,
|
|
6051
|
+
// $20.00 per 1M audio output tokens = $0.00002 per token
|
|
6052
|
+
perAudioOutputToken: 2e-5,
|
|
6053
|
+
// Rough estimate: ~$0.02 per minute of audio
|
|
6054
|
+
perMinute: 0.02
|
|
6055
|
+
},
|
|
6056
|
+
voices: [...GEMINI_TTS_VOICES],
|
|
6057
|
+
formats: GEMINI_TTS_FORMATS,
|
|
6058
|
+
maxInputLength: 8e3,
|
|
6059
|
+
// bytes
|
|
6060
|
+
defaultVoice: "Zephyr",
|
|
6061
|
+
defaultFormat: "wav",
|
|
6062
|
+
features: {
|
|
6063
|
+
multiSpeaker: true,
|
|
6064
|
+
languages: 24,
|
|
6065
|
+
voiceInstructions: true
|
|
6066
|
+
}
|
|
6067
|
+
}
|
|
6068
|
+
];
|
|
6069
|
+
}
|
|
6070
|
+
});
|
|
6071
|
+
|
|
5715
6072
|
// src/providers/gemini.ts
|
|
6073
|
+
function wrapPcmInWav(pcmData, sampleRate, bitsPerSample, numChannels) {
|
|
6074
|
+
const byteRate = sampleRate * numChannels * bitsPerSample / 8;
|
|
6075
|
+
const blockAlign = numChannels * bitsPerSample / 8;
|
|
6076
|
+
const dataSize = pcmData.length;
|
|
6077
|
+
const headerSize = 44;
|
|
6078
|
+
const fileSize = headerSize + dataSize - 8;
|
|
6079
|
+
const buffer = new ArrayBuffer(headerSize + dataSize);
|
|
6080
|
+
const view = new DataView(buffer);
|
|
6081
|
+
const uint8 = new Uint8Array(buffer);
|
|
6082
|
+
view.setUint32(0, 1380533830, false);
|
|
6083
|
+
view.setUint32(4, fileSize, true);
|
|
6084
|
+
view.setUint32(8, 1463899717, false);
|
|
6085
|
+
view.setUint32(12, 1718449184, false);
|
|
6086
|
+
view.setUint32(16, 16, true);
|
|
6087
|
+
view.setUint16(20, 1, true);
|
|
6088
|
+
view.setUint16(22, numChannels, true);
|
|
6089
|
+
view.setUint32(24, sampleRate, true);
|
|
6090
|
+
view.setUint32(28, byteRate, true);
|
|
6091
|
+
view.setUint16(32, blockAlign, true);
|
|
6092
|
+
view.setUint16(34, bitsPerSample, true);
|
|
6093
|
+
view.setUint32(36, 1684108385, false);
|
|
6094
|
+
view.setUint32(40, dataSize, true);
|
|
6095
|
+
uint8.set(pcmData, headerSize);
|
|
6096
|
+
return buffer;
|
|
6097
|
+
}
|
|
5716
6098
|
function createGeminiProviderFromEnv() {
|
|
5717
6099
|
return createProviderFromEnv("GEMINI_API_KEY", import_genai.GoogleGenAI, GeminiGenerativeProvider);
|
|
5718
6100
|
}
|
|
@@ -5723,7 +6105,9 @@ var init_gemini = __esm({
|
|
|
5723
6105
|
import_genai = require("@google/genai");
|
|
5724
6106
|
init_base_provider();
|
|
5725
6107
|
init_constants2();
|
|
6108
|
+
init_gemini_image_models();
|
|
5726
6109
|
init_gemini_models();
|
|
6110
|
+
init_gemini_speech_models();
|
|
5727
6111
|
init_utils();
|
|
5728
6112
|
GEMINI_ROLE_MAP = {
|
|
5729
6113
|
system: "user",
|
|
@@ -5738,6 +6122,139 @@ var init_gemini = __esm({
|
|
|
5738
6122
|
getModelSpecs() {
|
|
5739
6123
|
return GEMINI_MODELS;
|
|
5740
6124
|
}
|
|
6125
|
+
// =========================================================================
|
|
6126
|
+
// Image Generation
|
|
6127
|
+
// =========================================================================
|
|
6128
|
+
getImageModelSpecs() {
|
|
6129
|
+
return geminiImageModels;
|
|
6130
|
+
}
|
|
6131
|
+
supportsImageGeneration(modelId) {
|
|
6132
|
+
return isGeminiImageModel(modelId);
|
|
6133
|
+
}
|
|
6134
|
+
async generateImage(options) {
|
|
6135
|
+
const client = this.client;
|
|
6136
|
+
const spec = getGeminiImageModelSpec(options.model);
|
|
6137
|
+
const isImagenModel = options.model.startsWith("imagen");
|
|
6138
|
+
const aspectRatio = options.size ?? spec?.defaultSize ?? "1:1";
|
|
6139
|
+
const n = options.n ?? 1;
|
|
6140
|
+
if (isImagenModel) {
|
|
6141
|
+
const response2 = await client.models.generateImages({
|
|
6142
|
+
model: options.model,
|
|
6143
|
+
prompt: options.prompt,
|
|
6144
|
+
config: {
|
|
6145
|
+
numberOfImages: n,
|
|
6146
|
+
aspectRatio,
|
|
6147
|
+
outputMimeType: options.responseFormat === "b64_json" ? "image/png" : "image/jpeg"
|
|
6148
|
+
}
|
|
6149
|
+
});
|
|
6150
|
+
const images2 = response2.generatedImages ?? [];
|
|
6151
|
+
const cost2 = calculateGeminiImageCost(options.model, aspectRatio, images2.length);
|
|
6152
|
+
return {
|
|
6153
|
+
// Gemini's imageBytes is already base64 encoded, so use it directly
|
|
6154
|
+
images: images2.map((img) => ({
|
|
6155
|
+
b64Json: img.image?.imageBytes ?? void 0
|
|
6156
|
+
})),
|
|
6157
|
+
model: options.model,
|
|
6158
|
+
usage: {
|
|
6159
|
+
imagesGenerated: images2.length,
|
|
6160
|
+
size: aspectRatio,
|
|
6161
|
+
quality: "standard"
|
|
6162
|
+
},
|
|
6163
|
+
cost: cost2
|
|
6164
|
+
};
|
|
6165
|
+
}
|
|
6166
|
+
const response = await client.models.generateContent({
|
|
6167
|
+
model: options.model,
|
|
6168
|
+
contents: [{ role: "user", parts: [{ text: options.prompt }] }],
|
|
6169
|
+
config: {
|
|
6170
|
+
responseModalities: [import_genai.Modality.IMAGE, import_genai.Modality.TEXT]
|
|
6171
|
+
}
|
|
6172
|
+
});
|
|
6173
|
+
const images = [];
|
|
6174
|
+
const candidate = response.candidates?.[0];
|
|
6175
|
+
if (candidate?.content?.parts) {
|
|
6176
|
+
for (const part of candidate.content.parts) {
|
|
6177
|
+
if ("inlineData" in part && part.inlineData) {
|
|
6178
|
+
images.push({
|
|
6179
|
+
b64Json: part.inlineData.data
|
|
6180
|
+
});
|
|
6181
|
+
}
|
|
6182
|
+
}
|
|
6183
|
+
}
|
|
6184
|
+
const cost = calculateGeminiImageCost(options.model, aspectRatio, images.length);
|
|
6185
|
+
return {
|
|
6186
|
+
images,
|
|
6187
|
+
model: options.model,
|
|
6188
|
+
usage: {
|
|
6189
|
+
imagesGenerated: images.length,
|
|
6190
|
+
size: aspectRatio,
|
|
6191
|
+
quality: "standard"
|
|
6192
|
+
},
|
|
6193
|
+
cost
|
|
6194
|
+
};
|
|
6195
|
+
}
|
|
6196
|
+
// =========================================================================
|
|
6197
|
+
// Speech Generation
|
|
6198
|
+
// =========================================================================
|
|
6199
|
+
getSpeechModelSpecs() {
|
|
6200
|
+
return geminiSpeechModels;
|
|
6201
|
+
}
|
|
6202
|
+
supportsSpeechGeneration(modelId) {
|
|
6203
|
+
return isGeminiSpeechModel(modelId);
|
|
6204
|
+
}
|
|
6205
|
+
async generateSpeech(options) {
|
|
6206
|
+
const client = this.client;
|
|
6207
|
+
const spec = getGeminiSpeechModelSpec(options.model);
|
|
6208
|
+
const voice = options.voice ?? spec?.defaultVoice ?? "Zephyr";
|
|
6209
|
+
const response = await client.models.generateContent({
|
|
6210
|
+
model: options.model,
|
|
6211
|
+
contents: [
|
|
6212
|
+
{
|
|
6213
|
+
role: "user",
|
|
6214
|
+
parts: [{ text: options.input }]
|
|
6215
|
+
}
|
|
6216
|
+
],
|
|
6217
|
+
config: {
|
|
6218
|
+
responseModalities: [import_genai.Modality.AUDIO],
|
|
6219
|
+
speechConfig: {
|
|
6220
|
+
voiceConfig: {
|
|
6221
|
+
prebuiltVoiceConfig: {
|
|
6222
|
+
voiceName: voice
|
|
6223
|
+
}
|
|
6224
|
+
}
|
|
6225
|
+
}
|
|
6226
|
+
}
|
|
6227
|
+
});
|
|
6228
|
+
let pcmData;
|
|
6229
|
+
const candidate = response.candidates?.[0];
|
|
6230
|
+
if (candidate?.content?.parts) {
|
|
6231
|
+
for (const part of candidate.content.parts) {
|
|
6232
|
+
if ("inlineData" in part && part.inlineData?.data) {
|
|
6233
|
+
const base64 = part.inlineData.data;
|
|
6234
|
+
const binary = atob(base64);
|
|
6235
|
+
pcmData = new Uint8Array(binary.length);
|
|
6236
|
+
for (let i = 0; i < binary.length; i++) {
|
|
6237
|
+
pcmData[i] = binary.charCodeAt(i);
|
|
6238
|
+
}
|
|
6239
|
+
break;
|
|
6240
|
+
}
|
|
6241
|
+
}
|
|
6242
|
+
}
|
|
6243
|
+
if (!pcmData) {
|
|
6244
|
+
throw new Error("No audio data in Gemini TTS response");
|
|
6245
|
+
}
|
|
6246
|
+
const audioData = wrapPcmInWav(pcmData, 24e3, 16, 1);
|
|
6247
|
+
const cost = calculateGeminiSpeechCost(options.model, options.input.length);
|
|
6248
|
+
return {
|
|
6249
|
+
audio: audioData,
|
|
6250
|
+
model: options.model,
|
|
6251
|
+
usage: {
|
|
6252
|
+
characterCount: options.input.length
|
|
6253
|
+
},
|
|
6254
|
+
cost,
|
|
6255
|
+
format: spec?.defaultFormat ?? "wav"
|
|
6256
|
+
};
|
|
6257
|
+
}
|
|
5741
6258
|
buildRequestPayload(options, descriptor, _spec, messages) {
|
|
5742
6259
|
const contents = this.convertMessagesToContents(messages);
|
|
5743
6260
|
const generationConfig = this.buildGenerationConfig(options);
|
|
@@ -5933,6 +6450,121 @@ var init_gemini = __esm({
|
|
|
5933
6450
|
}
|
|
5934
6451
|
});
|
|
5935
6452
|
|
|
6453
|
+
// src/providers/openai-image-models.ts
|
|
6454
|
+
function getOpenAIImageModelSpec(modelId) {
|
|
6455
|
+
return openaiImageModels.find((m) => m.modelId === modelId);
|
|
6456
|
+
}
|
|
6457
|
+
function isOpenAIImageModel(modelId) {
|
|
6458
|
+
return openaiImageModels.some((m) => m.modelId === modelId);
|
|
6459
|
+
}
|
|
6460
|
+
function calculateOpenAIImageCost(modelId, size, quality = "standard", n = 1) {
|
|
6461
|
+
const spec = getOpenAIImageModelSpec(modelId);
|
|
6462
|
+
if (!spec) return void 0;
|
|
6463
|
+
const sizePrice = spec.pricing.bySize?.[size];
|
|
6464
|
+
if (sizePrice === void 0) return void 0;
|
|
6465
|
+
let pricePerImage;
|
|
6466
|
+
if (typeof sizePrice === "number") {
|
|
6467
|
+
pricePerImage = sizePrice;
|
|
6468
|
+
} else {
|
|
6469
|
+
pricePerImage = sizePrice[quality];
|
|
6470
|
+
if (pricePerImage === void 0) return void 0;
|
|
6471
|
+
}
|
|
6472
|
+
return pricePerImage * n;
|
|
6473
|
+
}
|
|
6474
|
+
var GPT_IMAGE_SIZES, GPT_IMAGE_QUALITIES, DALLE3_SIZES, DALLE3_QUALITIES, DALLE2_SIZES, openaiImageModels;
|
|
6475
|
+
var init_openai_image_models = __esm({
|
|
6476
|
+
"src/providers/openai-image-models.ts"() {
|
|
6477
|
+
"use strict";
|
|
6478
|
+
GPT_IMAGE_SIZES = ["1024x1024", "1024x1536", "1536x1024"];
|
|
6479
|
+
GPT_IMAGE_QUALITIES = ["low", "medium", "high"];
|
|
6480
|
+
DALLE3_SIZES = ["1024x1024", "1024x1792", "1792x1024"];
|
|
6481
|
+
DALLE3_QUALITIES = ["standard", "hd"];
|
|
6482
|
+
DALLE2_SIZES = ["256x256", "512x512", "1024x1024"];
|
|
6483
|
+
openaiImageModels = [
|
|
6484
|
+
// GPT Image 1 Family (flagship)
|
|
6485
|
+
{
|
|
6486
|
+
provider: "openai",
|
|
6487
|
+
modelId: "gpt-image-1",
|
|
6488
|
+
displayName: "GPT Image 1",
|
|
6489
|
+
pricing: {
|
|
6490
|
+
bySize: {
|
|
6491
|
+
"1024x1024": { low: 0.011, medium: 0.04, high: 0.17 },
|
|
6492
|
+
"1024x1536": { low: 0.016, medium: 0.06, high: 0.25 },
|
|
6493
|
+
"1536x1024": { low: 0.016, medium: 0.06, high: 0.25 }
|
|
6494
|
+
}
|
|
6495
|
+
},
|
|
6496
|
+
supportedSizes: [...GPT_IMAGE_SIZES],
|
|
6497
|
+
supportedQualities: [...GPT_IMAGE_QUALITIES],
|
|
6498
|
+
maxImages: 1,
|
|
6499
|
+
defaultSize: "1024x1024",
|
|
6500
|
+
defaultQuality: "medium",
|
|
6501
|
+
features: {
|
|
6502
|
+
textRendering: true,
|
|
6503
|
+
transparency: true
|
|
6504
|
+
}
|
|
6505
|
+
},
|
|
6506
|
+
{
|
|
6507
|
+
provider: "openai",
|
|
6508
|
+
modelId: "gpt-image-1-mini",
|
|
6509
|
+
displayName: "GPT Image 1 Mini",
|
|
6510
|
+
pricing: {
|
|
6511
|
+
bySize: {
|
|
6512
|
+
"1024x1024": { low: 5e-3, medium: 0.02, high: 0.052 },
|
|
6513
|
+
"1024x1536": { low: 75e-4, medium: 0.03, high: 0.078 },
|
|
6514
|
+
"1536x1024": { low: 75e-4, medium: 0.03, high: 0.078 }
|
|
6515
|
+
}
|
|
6516
|
+
},
|
|
6517
|
+
supportedSizes: [...GPT_IMAGE_SIZES],
|
|
6518
|
+
supportedQualities: [...GPT_IMAGE_QUALITIES],
|
|
6519
|
+
maxImages: 1,
|
|
6520
|
+
defaultSize: "1024x1024",
|
|
6521
|
+
defaultQuality: "medium",
|
|
6522
|
+
features: {
|
|
6523
|
+
textRendering: true,
|
|
6524
|
+
transparency: true
|
|
6525
|
+
}
|
|
6526
|
+
},
|
|
6527
|
+
// DALL-E Family
|
|
6528
|
+
{
|
|
6529
|
+
provider: "openai",
|
|
6530
|
+
modelId: "dall-e-3",
|
|
6531
|
+
displayName: "DALL-E 3",
|
|
6532
|
+
pricing: {
|
|
6533
|
+
bySize: {
|
|
6534
|
+
"1024x1024": { standard: 0.04, hd: 0.08 },
|
|
6535
|
+
"1024x1792": { standard: 0.08, hd: 0.12 },
|
|
6536
|
+
"1792x1024": { standard: 0.08, hd: 0.12 }
|
|
6537
|
+
}
|
|
6538
|
+
},
|
|
6539
|
+
supportedSizes: [...DALLE3_SIZES],
|
|
6540
|
+
supportedQualities: [...DALLE3_QUALITIES],
|
|
6541
|
+
maxImages: 1,
|
|
6542
|
+
// DALL-E 3 only supports n=1
|
|
6543
|
+
defaultSize: "1024x1024",
|
|
6544
|
+
defaultQuality: "standard",
|
|
6545
|
+
features: {
|
|
6546
|
+
textRendering: true
|
|
6547
|
+
}
|
|
6548
|
+
},
|
|
6549
|
+
{
|
|
6550
|
+
provider: "openai",
|
|
6551
|
+
modelId: "dall-e-2",
|
|
6552
|
+
displayName: "DALL-E 2 (Legacy)",
|
|
6553
|
+
pricing: {
|
|
6554
|
+
bySize: {
|
|
6555
|
+
"256x256": 0.016,
|
|
6556
|
+
"512x512": 0.018,
|
|
6557
|
+
"1024x1024": 0.02
|
|
6558
|
+
}
|
|
6559
|
+
},
|
|
6560
|
+
supportedSizes: [...DALLE2_SIZES],
|
|
6561
|
+
maxImages: 10,
|
|
6562
|
+
defaultSize: "1024x1024"
|
|
6563
|
+
}
|
|
6564
|
+
];
|
|
6565
|
+
}
|
|
6566
|
+
});
|
|
6567
|
+
|
|
5936
6568
|
// src/providers/openai-models.ts
|
|
5937
6569
|
var OPENAI_MODELS;
|
|
5938
6570
|
var init_openai_models = __esm({
|
|
@@ -6297,6 +6929,144 @@ var init_openai_models = __esm({
|
|
|
6297
6929
|
}
|
|
6298
6930
|
});
|
|
6299
6931
|
|
|
6932
|
+
// src/providers/openai-speech-models.ts
|
|
6933
|
+
function getOpenAISpeechModelSpec(modelId) {
|
|
6934
|
+
return openaiSpeechModels.find((m) => m.modelId === modelId);
|
|
6935
|
+
}
|
|
6936
|
+
function isOpenAISpeechModel(modelId) {
|
|
6937
|
+
return openaiSpeechModels.some((m) => m.modelId === modelId);
|
|
6938
|
+
}
|
|
6939
|
+
function calculateOpenAISpeechCost(modelId, characterCount, estimatedMinutes) {
|
|
6940
|
+
const spec = getOpenAISpeechModelSpec(modelId);
|
|
6941
|
+
if (!spec) return void 0;
|
|
6942
|
+
if (spec.pricing.perCharacter !== void 0) {
|
|
6943
|
+
return characterCount * spec.pricing.perCharacter;
|
|
6944
|
+
}
|
|
6945
|
+
if (spec.pricing.perMinute !== void 0 && estimatedMinutes !== void 0) {
|
|
6946
|
+
return estimatedMinutes * spec.pricing.perMinute;
|
|
6947
|
+
}
|
|
6948
|
+
if (spec.pricing.perMinute !== void 0) {
|
|
6949
|
+
const approxMinutes = characterCount / 750;
|
|
6950
|
+
return approxMinutes * spec.pricing.perMinute;
|
|
6951
|
+
}
|
|
6952
|
+
return void 0;
|
|
6953
|
+
}
|
|
6954
|
+
var OPENAI_TTS_VOICES, OPENAI_TTS_EXTENDED_VOICES, OPENAI_TTS_FORMATS, openaiSpeechModels;
|
|
6955
|
+
var init_openai_speech_models = __esm({
|
|
6956
|
+
"src/providers/openai-speech-models.ts"() {
|
|
6957
|
+
"use strict";
|
|
6958
|
+
OPENAI_TTS_VOICES = [
|
|
6959
|
+
"alloy",
|
|
6960
|
+
"echo",
|
|
6961
|
+
"fable",
|
|
6962
|
+
"onyx",
|
|
6963
|
+
"nova",
|
|
6964
|
+
"shimmer"
|
|
6965
|
+
];
|
|
6966
|
+
OPENAI_TTS_EXTENDED_VOICES = [
|
|
6967
|
+
...OPENAI_TTS_VOICES,
|
|
6968
|
+
"ash",
|
|
6969
|
+
"ballad",
|
|
6970
|
+
"coral",
|
|
6971
|
+
"sage",
|
|
6972
|
+
"verse"
|
|
6973
|
+
];
|
|
6974
|
+
OPENAI_TTS_FORMATS = ["mp3", "opus", "aac", "flac", "wav", "pcm"];
|
|
6975
|
+
openaiSpeechModels = [
|
|
6976
|
+
// Standard TTS models (character-based pricing)
|
|
6977
|
+
{
|
|
6978
|
+
provider: "openai",
|
|
6979
|
+
modelId: "tts-1",
|
|
6980
|
+
displayName: "TTS-1",
|
|
6981
|
+
pricing: {
|
|
6982
|
+
// $15 per 1M characters = $0.000015 per character
|
|
6983
|
+
perCharacter: 15e-6
|
|
6984
|
+
},
|
|
6985
|
+
voices: [...OPENAI_TTS_VOICES],
|
|
6986
|
+
formats: OPENAI_TTS_FORMATS,
|
|
6987
|
+
maxInputLength: 4096,
|
|
6988
|
+
defaultVoice: "alloy",
|
|
6989
|
+
defaultFormat: "mp3",
|
|
6990
|
+
features: {
|
|
6991
|
+
voiceInstructions: false
|
|
6992
|
+
}
|
|
6993
|
+
},
|
|
6994
|
+
{
|
|
6995
|
+
provider: "openai",
|
|
6996
|
+
modelId: "tts-1-1106",
|
|
6997
|
+
displayName: "TTS-1 (Nov 2023)",
|
|
6998
|
+
pricing: {
|
|
6999
|
+
perCharacter: 15e-6
|
|
7000
|
+
},
|
|
7001
|
+
voices: [...OPENAI_TTS_VOICES],
|
|
7002
|
+
formats: OPENAI_TTS_FORMATS,
|
|
7003
|
+
maxInputLength: 4096,
|
|
7004
|
+
defaultVoice: "alloy",
|
|
7005
|
+
defaultFormat: "mp3",
|
|
7006
|
+
features: {
|
|
7007
|
+
voiceInstructions: false
|
|
7008
|
+
}
|
|
7009
|
+
},
|
|
7010
|
+
{
|
|
7011
|
+
provider: "openai",
|
|
7012
|
+
modelId: "tts-1-hd",
|
|
7013
|
+
displayName: "TTS-1 HD",
|
|
7014
|
+
pricing: {
|
|
7015
|
+
// $30 per 1M characters = $0.00003 per character
|
|
7016
|
+
perCharacter: 3e-5
|
|
7017
|
+
},
|
|
7018
|
+
voices: [...OPENAI_TTS_VOICES],
|
|
7019
|
+
formats: OPENAI_TTS_FORMATS,
|
|
7020
|
+
maxInputLength: 4096,
|
|
7021
|
+
defaultVoice: "alloy",
|
|
7022
|
+
defaultFormat: "mp3",
|
|
7023
|
+
features: {
|
|
7024
|
+
voiceInstructions: false
|
|
7025
|
+
}
|
|
7026
|
+
},
|
|
7027
|
+
{
|
|
7028
|
+
provider: "openai",
|
|
7029
|
+
modelId: "tts-1-hd-1106",
|
|
7030
|
+
displayName: "TTS-1 HD (Nov 2023)",
|
|
7031
|
+
pricing: {
|
|
7032
|
+
perCharacter: 3e-5
|
|
7033
|
+
},
|
|
7034
|
+
voices: [...OPENAI_TTS_VOICES],
|
|
7035
|
+
formats: OPENAI_TTS_FORMATS,
|
|
7036
|
+
maxInputLength: 4096,
|
|
7037
|
+
defaultVoice: "alloy",
|
|
7038
|
+
defaultFormat: "mp3",
|
|
7039
|
+
features: {
|
|
7040
|
+
voiceInstructions: false
|
|
7041
|
+
}
|
|
7042
|
+
},
|
|
7043
|
+
// Token-based TTS model with voice instructions support
|
|
7044
|
+
{
|
|
7045
|
+
provider: "openai",
|
|
7046
|
+
modelId: "gpt-4o-mini-tts",
|
|
7047
|
+
displayName: "GPT-4o Mini TTS",
|
|
7048
|
+
pricing: {
|
|
7049
|
+
// $0.60 per 1M input tokens = $0.0000006 per token
|
|
7050
|
+
perInputToken: 6e-7,
|
|
7051
|
+
// $12 per 1M audio output tokens = $0.000012 per token
|
|
7052
|
+
perAudioOutputToken: 12e-6,
|
|
7053
|
+
// ~$0.015 per minute of audio
|
|
7054
|
+
perMinute: 0.015
|
|
7055
|
+
},
|
|
7056
|
+
voices: [...OPENAI_TTS_EXTENDED_VOICES],
|
|
7057
|
+
formats: OPENAI_TTS_FORMATS,
|
|
7058
|
+
maxInputLength: 2e3,
|
|
7059
|
+
// tokens, not characters
|
|
7060
|
+
defaultVoice: "alloy",
|
|
7061
|
+
defaultFormat: "mp3",
|
|
7062
|
+
features: {
|
|
7063
|
+
voiceInstructions: true
|
|
7064
|
+
}
|
|
7065
|
+
}
|
|
7066
|
+
];
|
|
7067
|
+
}
|
|
7068
|
+
});
|
|
7069
|
+
|
|
6300
7070
|
// src/providers/openai.ts
|
|
6301
7071
|
function sanitizeExtra(extra, allowTemperature) {
|
|
6302
7072
|
if (!extra) {
|
|
@@ -6318,7 +7088,9 @@ var init_openai = __esm({
|
|
|
6318
7088
|
import_tiktoken = require("tiktoken");
|
|
6319
7089
|
init_base_provider();
|
|
6320
7090
|
init_constants2();
|
|
7091
|
+
init_openai_image_models();
|
|
6321
7092
|
init_openai_models();
|
|
7093
|
+
init_openai_speech_models();
|
|
6322
7094
|
init_utils();
|
|
6323
7095
|
ROLE_MAP = {
|
|
6324
7096
|
system: "system",
|
|
@@ -6333,6 +7105,87 @@ var init_openai = __esm({
|
|
|
6333
7105
|
getModelSpecs() {
|
|
6334
7106
|
return OPENAI_MODELS;
|
|
6335
7107
|
}
|
|
7108
|
+
// =========================================================================
|
|
7109
|
+
// Image Generation
|
|
7110
|
+
// =========================================================================
|
|
7111
|
+
getImageModelSpecs() {
|
|
7112
|
+
return openaiImageModels;
|
|
7113
|
+
}
|
|
7114
|
+
supportsImageGeneration(modelId) {
|
|
7115
|
+
return isOpenAIImageModel(modelId);
|
|
7116
|
+
}
|
|
7117
|
+
async generateImage(options) {
|
|
7118
|
+
const client = this.client;
|
|
7119
|
+
const spec = getOpenAIImageModelSpec(options.model);
|
|
7120
|
+
const size = options.size ?? spec?.defaultSize ?? "1024x1024";
|
|
7121
|
+
const quality = options.quality ?? spec?.defaultQuality ?? "standard";
|
|
7122
|
+
const n = options.n ?? 1;
|
|
7123
|
+
const isDallE2 = options.model === "dall-e-2";
|
|
7124
|
+
const isGptImage = options.model.startsWith("gpt-image");
|
|
7125
|
+
const requestParams = {
|
|
7126
|
+
model: options.model,
|
|
7127
|
+
prompt: options.prompt,
|
|
7128
|
+
size,
|
|
7129
|
+
n
|
|
7130
|
+
};
|
|
7131
|
+
if (!isDallE2 && !isGptImage) {
|
|
7132
|
+
requestParams.quality = quality;
|
|
7133
|
+
}
|
|
7134
|
+
if (isGptImage) {
|
|
7135
|
+
} else if (!isDallE2) {
|
|
7136
|
+
requestParams.response_format = options.responseFormat ?? "url";
|
|
7137
|
+
}
|
|
7138
|
+
const response = await client.images.generate(requestParams);
|
|
7139
|
+
const cost = calculateOpenAIImageCost(options.model, size, quality, n);
|
|
7140
|
+
const images = response.data ?? [];
|
|
7141
|
+
return {
|
|
7142
|
+
images: images.map((img) => ({
|
|
7143
|
+
url: img.url,
|
|
7144
|
+
b64Json: img.b64_json,
|
|
7145
|
+
revisedPrompt: img.revised_prompt
|
|
7146
|
+
})),
|
|
7147
|
+
model: options.model,
|
|
7148
|
+
usage: {
|
|
7149
|
+
imagesGenerated: images.length,
|
|
7150
|
+
size,
|
|
7151
|
+
quality
|
|
7152
|
+
},
|
|
7153
|
+
cost
|
|
7154
|
+
};
|
|
7155
|
+
}
|
|
7156
|
+
// =========================================================================
|
|
7157
|
+
// Speech Generation
|
|
7158
|
+
// =========================================================================
|
|
7159
|
+
getSpeechModelSpecs() {
|
|
7160
|
+
return openaiSpeechModels;
|
|
7161
|
+
}
|
|
7162
|
+
supportsSpeechGeneration(modelId) {
|
|
7163
|
+
return isOpenAISpeechModel(modelId);
|
|
7164
|
+
}
|
|
7165
|
+
async generateSpeech(options) {
|
|
7166
|
+
const client = this.client;
|
|
7167
|
+
const spec = getOpenAISpeechModelSpec(options.model);
|
|
7168
|
+
const format = options.responseFormat ?? spec?.defaultFormat ?? "mp3";
|
|
7169
|
+
const voice = options.voice ?? spec?.defaultVoice ?? "alloy";
|
|
7170
|
+
const response = await client.audio.speech.create({
|
|
7171
|
+
model: options.model,
|
|
7172
|
+
input: options.input,
|
|
7173
|
+
voice,
|
|
7174
|
+
response_format: format,
|
|
7175
|
+
speed: options.speed ?? 1
|
|
7176
|
+
});
|
|
7177
|
+
const audioBuffer = await response.arrayBuffer();
|
|
7178
|
+
const cost = calculateOpenAISpeechCost(options.model, options.input.length);
|
|
7179
|
+
return {
|
|
7180
|
+
audio: audioBuffer,
|
|
7181
|
+
model: options.model,
|
|
7182
|
+
usage: {
|
|
7183
|
+
characterCount: options.input.length
|
|
7184
|
+
},
|
|
7185
|
+
cost,
|
|
7186
|
+
format
|
|
7187
|
+
};
|
|
7188
|
+
}
|
|
6336
7189
|
buildRequestPayload(options, descriptor, spec, messages) {
|
|
6337
7190
|
const { maxTokens, temperature, topP, stopSequences, extra } = options;
|
|
6338
7191
|
const supportsTemperature = spec?.metadata?.supportsTemperature !== false;
|
|
@@ -6673,30 +7526,109 @@ var init_model_registry = __esm({
|
|
|
6673
7526
|
}
|
|
6674
7527
|
});
|
|
6675
7528
|
|
|
6676
|
-
// src/core/
|
|
6677
|
-
var
|
|
6678
|
-
var
|
|
6679
|
-
"src/core/
|
|
7529
|
+
// src/core/namespaces/image.ts
|
|
7530
|
+
var ImageNamespace;
|
|
7531
|
+
var init_image = __esm({
|
|
7532
|
+
"src/core/namespaces/image.ts"() {
|
|
6680
7533
|
"use strict";
|
|
6681
|
-
|
|
6682
|
-
constructor(defaultProvider
|
|
7534
|
+
ImageNamespace = class {
|
|
7535
|
+
constructor(adapters, defaultProvider) {
|
|
7536
|
+
this.adapters = adapters;
|
|
6683
7537
|
this.defaultProvider = defaultProvider;
|
|
6684
7538
|
}
|
|
6685
|
-
|
|
6686
|
-
|
|
6687
|
-
|
|
6688
|
-
|
|
7539
|
+
/**
|
|
7540
|
+
* Generate images from a text prompt.
|
|
7541
|
+
*
|
|
7542
|
+
* @param options - Image generation options
|
|
7543
|
+
* @returns Promise resolving to the generation result with images and cost
|
|
7544
|
+
* @throws Error if the provider doesn't support image generation
|
|
7545
|
+
*/
|
|
7546
|
+
async generate(options) {
|
|
7547
|
+
const modelId = options.model;
|
|
7548
|
+
const adapter = this.findImageAdapter(modelId);
|
|
7549
|
+
if (!adapter || !adapter.generateImage) {
|
|
7550
|
+
throw new Error(
|
|
7551
|
+
`No provider supports image generation for model "${modelId}". Available image models: ${this.listModels().map((m) => m.modelId).join(", ")}`
|
|
7552
|
+
);
|
|
6689
7553
|
}
|
|
6690
|
-
|
|
6691
|
-
|
|
6692
|
-
|
|
7554
|
+
return adapter.generateImage(options);
|
|
7555
|
+
}
|
|
7556
|
+
/**
|
|
7557
|
+
* List all available image generation models.
|
|
7558
|
+
*/
|
|
7559
|
+
listModels() {
|
|
7560
|
+
const models = [];
|
|
7561
|
+
for (const adapter of this.adapters) {
|
|
7562
|
+
if (adapter.getImageModelSpecs) {
|
|
7563
|
+
models.push(...adapter.getImageModelSpecs());
|
|
7564
|
+
}
|
|
6693
7565
|
}
|
|
6694
|
-
|
|
6695
|
-
|
|
6696
|
-
|
|
6697
|
-
|
|
7566
|
+
return models;
|
|
7567
|
+
}
|
|
7568
|
+
/**
|
|
7569
|
+
* Check if a model is supported for image generation.
|
|
7570
|
+
*/
|
|
7571
|
+
supportsModel(modelId) {
|
|
7572
|
+
return this.findImageAdapter(modelId) !== void 0;
|
|
7573
|
+
}
|
|
7574
|
+
findImageAdapter(modelId) {
|
|
7575
|
+
return this.adapters.find(
|
|
7576
|
+
(adapter) => adapter.supportsImageGeneration?.(modelId) ?? false
|
|
7577
|
+
);
|
|
7578
|
+
}
|
|
7579
|
+
};
|
|
7580
|
+
}
|
|
7581
|
+
});
|
|
7582
|
+
|
|
7583
|
+
// src/core/namespaces/speech.ts
|
|
7584
|
+
var SpeechNamespace;
|
|
7585
|
+
var init_speech = __esm({
|
|
7586
|
+
"src/core/namespaces/speech.ts"() {
|
|
7587
|
+
"use strict";
|
|
7588
|
+
SpeechNamespace = class {
|
|
7589
|
+
constructor(adapters, defaultProvider) {
|
|
7590
|
+
this.adapters = adapters;
|
|
7591
|
+
this.defaultProvider = defaultProvider;
|
|
7592
|
+
}
|
|
7593
|
+
/**
|
|
7594
|
+
* Generate speech audio from text.
|
|
7595
|
+
*
|
|
7596
|
+
* @param options - Speech generation options
|
|
7597
|
+
* @returns Promise resolving to the generation result with audio and cost
|
|
7598
|
+
* @throws Error if the provider doesn't support speech generation
|
|
7599
|
+
*/
|
|
7600
|
+
async generate(options) {
|
|
7601
|
+
const modelId = options.model;
|
|
7602
|
+
const adapter = this.findSpeechAdapter(modelId);
|
|
7603
|
+
if (!adapter || !adapter.generateSpeech) {
|
|
7604
|
+
throw new Error(
|
|
7605
|
+
`No provider supports speech generation for model "${modelId}". Available speech models: ${this.listModels().map((m) => m.modelId).join(", ")}`
|
|
7606
|
+
);
|
|
6698
7607
|
}
|
|
6699
|
-
return
|
|
7608
|
+
return adapter.generateSpeech(options);
|
|
7609
|
+
}
|
|
7610
|
+
/**
|
|
7611
|
+
* List all available speech generation models.
|
|
7612
|
+
*/
|
|
7613
|
+
listModels() {
|
|
7614
|
+
const models = [];
|
|
7615
|
+
for (const adapter of this.adapters) {
|
|
7616
|
+
if (adapter.getSpeechModelSpecs) {
|
|
7617
|
+
models.push(...adapter.getSpeechModelSpecs());
|
|
7618
|
+
}
|
|
7619
|
+
}
|
|
7620
|
+
return models;
|
|
7621
|
+
}
|
|
7622
|
+
/**
|
|
7623
|
+
* Check if a model is supported for speech generation.
|
|
7624
|
+
*/
|
|
7625
|
+
supportsModel(modelId) {
|
|
7626
|
+
return this.findSpeechAdapter(modelId) !== void 0;
|
|
7627
|
+
}
|
|
7628
|
+
findSpeechAdapter(modelId) {
|
|
7629
|
+
return this.adapters.find(
|
|
7630
|
+
(adapter) => adapter.supportsSpeechGeneration?.(modelId) ?? false
|
|
7631
|
+
);
|
|
6700
7632
|
}
|
|
6701
7633
|
};
|
|
6702
7634
|
}
|
|
@@ -6745,6 +7677,69 @@ var init_quick_methods = __esm({
|
|
|
6745
7677
|
}
|
|
6746
7678
|
});
|
|
6747
7679
|
|
|
7680
|
+
// src/core/namespaces/text.ts
|
|
7681
|
+
var TextNamespace;
|
|
7682
|
+
var init_text = __esm({
|
|
7683
|
+
"src/core/namespaces/text.ts"() {
|
|
7684
|
+
"use strict";
|
|
7685
|
+
init_quick_methods();
|
|
7686
|
+
TextNamespace = class {
|
|
7687
|
+
constructor(client) {
|
|
7688
|
+
this.client = client;
|
|
7689
|
+
}
|
|
7690
|
+
/**
|
|
7691
|
+
* Generate a complete text response.
|
|
7692
|
+
*
|
|
7693
|
+
* @param prompt - User prompt
|
|
7694
|
+
* @param options - Optional configuration
|
|
7695
|
+
* @returns Complete text response
|
|
7696
|
+
*/
|
|
7697
|
+
async complete(prompt, options) {
|
|
7698
|
+
return complete(this.client, prompt, options);
|
|
7699
|
+
}
|
|
7700
|
+
/**
|
|
7701
|
+
* Stream text chunks.
|
|
7702
|
+
*
|
|
7703
|
+
* @param prompt - User prompt
|
|
7704
|
+
* @param options - Optional configuration
|
|
7705
|
+
* @returns Async generator yielding text chunks
|
|
7706
|
+
*/
|
|
7707
|
+
stream(prompt, options) {
|
|
7708
|
+
return stream(this.client, prompt, options);
|
|
7709
|
+
}
|
|
7710
|
+
};
|
|
7711
|
+
}
|
|
7712
|
+
});
|
|
7713
|
+
|
|
7714
|
+
// src/core/options.ts
|
|
7715
|
+
var ModelIdentifierParser;
|
|
7716
|
+
var init_options = __esm({
|
|
7717
|
+
"src/core/options.ts"() {
|
|
7718
|
+
"use strict";
|
|
7719
|
+
ModelIdentifierParser = class {
|
|
7720
|
+
constructor(defaultProvider = "openai") {
|
|
7721
|
+
this.defaultProvider = defaultProvider;
|
|
7722
|
+
}
|
|
7723
|
+
parse(identifier) {
|
|
7724
|
+
const trimmed = identifier.trim();
|
|
7725
|
+
if (!trimmed) {
|
|
7726
|
+
throw new Error("Model identifier cannot be empty");
|
|
7727
|
+
}
|
|
7728
|
+
const [maybeProvider, ...rest] = trimmed.split(":");
|
|
7729
|
+
if (rest.length === 0) {
|
|
7730
|
+
return { provider: this.defaultProvider, name: maybeProvider };
|
|
7731
|
+
}
|
|
7732
|
+
const provider = maybeProvider;
|
|
7733
|
+
const name = rest.join(":");
|
|
7734
|
+
if (!name) {
|
|
7735
|
+
throw new Error("Model name cannot be empty");
|
|
7736
|
+
}
|
|
7737
|
+
return { provider, name };
|
|
7738
|
+
}
|
|
7739
|
+
};
|
|
7740
|
+
}
|
|
7741
|
+
});
|
|
7742
|
+
|
|
6748
7743
|
// src/core/client.ts
|
|
6749
7744
|
var client_exports = {};
|
|
6750
7745
|
__export(client_exports, {
|
|
@@ -6757,12 +7752,20 @@ var init_client = __esm({
|
|
|
6757
7752
|
init_builder();
|
|
6758
7753
|
init_discovery();
|
|
6759
7754
|
init_model_registry();
|
|
7755
|
+
init_image();
|
|
7756
|
+
init_speech();
|
|
7757
|
+
init_text();
|
|
6760
7758
|
init_options();
|
|
6761
7759
|
init_quick_methods();
|
|
6762
7760
|
LLMist = class _LLMist {
|
|
6763
7761
|
parser;
|
|
7762
|
+
defaultProvider;
|
|
6764
7763
|
modelRegistry;
|
|
6765
7764
|
adapters;
|
|
7765
|
+
// Namespaces for different generation types
|
|
7766
|
+
text;
|
|
7767
|
+
image;
|
|
7768
|
+
speech;
|
|
6766
7769
|
constructor(...args) {
|
|
6767
7770
|
let adapters = [];
|
|
6768
7771
|
let defaultProvider;
|
|
@@ -6801,6 +7804,7 @@ var init_client = __esm({
|
|
|
6801
7804
|
const priorityB = b.priority ?? 0;
|
|
6802
7805
|
return priorityB - priorityA;
|
|
6803
7806
|
});
|
|
7807
|
+
this.defaultProvider = resolvedDefaultProvider;
|
|
6804
7808
|
this.parser = new ModelIdentifierParser(resolvedDefaultProvider);
|
|
6805
7809
|
this.modelRegistry = new ModelRegistry();
|
|
6806
7810
|
for (const adapter of this.adapters) {
|
|
@@ -6809,6 +7813,9 @@ var init_client = __esm({
|
|
|
6809
7813
|
if (customModels.length > 0) {
|
|
6810
7814
|
this.modelRegistry.registerModels(customModels);
|
|
6811
7815
|
}
|
|
7816
|
+
this.text = new TextNamespace(this);
|
|
7817
|
+
this.image = new ImageNamespace(this.adapters, this.defaultProvider);
|
|
7818
|
+
this.speech = new SpeechNamespace(this.adapters, this.defaultProvider);
|
|
6812
7819
|
}
|
|
6813
7820
|
stream(options) {
|
|
6814
7821
|
const descriptor = this.parser.parse(options.model);
|