llmist 2.3.0 → 2.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-GANXNBIZ.js → chunk-6ZDUWO6N.js} +1029 -22
- package/dist/chunk-6ZDUWO6N.js.map +1 -0
- package/dist/{chunk-ZDNV7DDO.js → chunk-QFRVTS5F.js} +2 -2
- package/dist/cli.cjs +1497 -45
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.js +473 -28
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +1025 -18
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +18 -2
- package/dist/index.d.ts +18 -2
- package/dist/index.js +2 -2
- package/dist/{mock-stream-wRfUqXx4.d.cts → mock-stream-BQcC2VCP.d.cts} +408 -1
- package/dist/{mock-stream-wRfUqXx4.d.ts → mock-stream-BQcC2VCP.d.ts} +408 -1
- package/dist/testing/index.cjs +1025 -18
- package/dist/testing/index.cjs.map +1 -1
- package/dist/testing/index.d.cts +2 -2
- package/dist/testing/index.d.ts +2 -2
- package/dist/testing/index.js +1 -1
- package/package.json +1 -1
- package/dist/chunk-GANXNBIZ.js.map +0 -1
- /package/dist/{chunk-ZDNV7DDO.js.map → chunk-QFRVTS5F.js.map} +0 -0
|
@@ -2542,7 +2542,27 @@ var init_cost_reporting_client = __esm({
|
|
|
2542
2542
|
constructor(client, reportCost) {
|
|
2543
2543
|
this.client = client;
|
|
2544
2544
|
this.reportCost = reportCost;
|
|
2545
|
+
this.image = {
|
|
2546
|
+
generate: async (options) => {
|
|
2547
|
+
const result = await this.client.image.generate(options);
|
|
2548
|
+
if (result.cost !== void 0 && result.cost > 0) {
|
|
2549
|
+
this.reportCost(result.cost);
|
|
2550
|
+
}
|
|
2551
|
+
return result;
|
|
2552
|
+
}
|
|
2553
|
+
};
|
|
2554
|
+
this.speech = {
|
|
2555
|
+
generate: async (options) => {
|
|
2556
|
+
const result = await this.client.speech.generate(options);
|
|
2557
|
+
if (result.cost !== void 0 && result.cost > 0) {
|
|
2558
|
+
this.reportCost(result.cost);
|
|
2559
|
+
}
|
|
2560
|
+
return result;
|
|
2561
|
+
}
|
|
2562
|
+
};
|
|
2545
2563
|
}
|
|
2564
|
+
image;
|
|
2565
|
+
speech;
|
|
2546
2566
|
/**
|
|
2547
2567
|
* Access to model registry for cost estimation.
|
|
2548
2568
|
*/
|
|
@@ -5430,6 +5450,28 @@ var init_anthropic = __esm({
|
|
|
5430
5450
|
getModelSpecs() {
|
|
5431
5451
|
return ANTHROPIC_MODELS;
|
|
5432
5452
|
}
|
|
5453
|
+
// =========================================================================
|
|
5454
|
+
// Image Generation (Not Supported)
|
|
5455
|
+
// =========================================================================
|
|
5456
|
+
supportsImageGeneration(_modelId) {
|
|
5457
|
+
return false;
|
|
5458
|
+
}
|
|
5459
|
+
async generateImage() {
|
|
5460
|
+
throw new Error(
|
|
5461
|
+
"Anthropic does not support image generation. Use OpenAI (DALL-E, GPT Image) or Google Gemini (Imagen) instead."
|
|
5462
|
+
);
|
|
5463
|
+
}
|
|
5464
|
+
// =========================================================================
|
|
5465
|
+
// Speech Generation (Not Supported)
|
|
5466
|
+
// =========================================================================
|
|
5467
|
+
supportsSpeechGeneration(_modelId) {
|
|
5468
|
+
return false;
|
|
5469
|
+
}
|
|
5470
|
+
async generateSpeech() {
|
|
5471
|
+
throw new Error(
|
|
5472
|
+
"Anthropic does not support speech generation. Use OpenAI (TTS) or Google Gemini (TTS) instead."
|
|
5473
|
+
);
|
|
5474
|
+
}
|
|
5433
5475
|
buildRequestPayload(options, descriptor, spec, messages) {
|
|
5434
5476
|
const systemMessages = messages.filter((message) => message.role === "system");
|
|
5435
5477
|
const system = systemMessages.length > 0 ? systemMessages.map((m, index) => ({
|
|
@@ -5584,6 +5626,182 @@ var init_anthropic = __esm({
|
|
|
5584
5626
|
}
|
|
5585
5627
|
});
|
|
5586
5628
|
|
|
5629
|
+
// src/providers/gemini-image-models.ts
|
|
5630
|
+
function getGeminiImageModelSpec(modelId) {
|
|
5631
|
+
return geminiImageModels.find((m) => m.modelId === modelId);
|
|
5632
|
+
}
|
|
5633
|
+
function isGeminiImageModel(modelId) {
|
|
5634
|
+
return geminiImageModels.some((m) => m.modelId === modelId);
|
|
5635
|
+
}
|
|
5636
|
+
function calculateGeminiImageCost(modelId, size = "1:1", n = 1) {
|
|
5637
|
+
const spec = getGeminiImageModelSpec(modelId);
|
|
5638
|
+
if (!spec) return void 0;
|
|
5639
|
+
if (spec.pricing.perImage !== void 0) {
|
|
5640
|
+
return spec.pricing.perImage * n;
|
|
5641
|
+
}
|
|
5642
|
+
if (spec.pricing.bySize) {
|
|
5643
|
+
const sizePrice = spec.pricing.bySize[size];
|
|
5644
|
+
if (typeof sizePrice === "number") {
|
|
5645
|
+
return sizePrice * n;
|
|
5646
|
+
}
|
|
5647
|
+
}
|
|
5648
|
+
return void 0;
|
|
5649
|
+
}
|
|
5650
|
+
var IMAGEN4_ASPECT_RATIOS, GEMINI_IMAGE_ASPECT_RATIOS, geminiImageModels;
|
|
5651
|
+
var init_gemini_image_models = __esm({
|
|
5652
|
+
"src/providers/gemini-image-models.ts"() {
|
|
5653
|
+
"use strict";
|
|
5654
|
+
IMAGEN4_ASPECT_RATIOS = ["1:1", "3:4", "4:3", "9:16", "16:9"];
|
|
5655
|
+
GEMINI_IMAGE_ASPECT_RATIOS = ["1:1", "3:4", "4:3", "9:16", "16:9"];
|
|
5656
|
+
geminiImageModels = [
|
|
5657
|
+
// Imagen 4 Family (standalone image generation)
|
|
5658
|
+
{
|
|
5659
|
+
provider: "gemini",
|
|
5660
|
+
modelId: "imagen-4.0-fast-generate-001",
|
|
5661
|
+
displayName: "Imagen 4 Fast",
|
|
5662
|
+
pricing: {
|
|
5663
|
+
perImage: 0.02
|
|
5664
|
+
},
|
|
5665
|
+
supportedSizes: [...IMAGEN4_ASPECT_RATIOS],
|
|
5666
|
+
maxImages: 4,
|
|
5667
|
+
defaultSize: "1:1",
|
|
5668
|
+
features: {
|
|
5669
|
+
textRendering: true
|
|
5670
|
+
}
|
|
5671
|
+
},
|
|
5672
|
+
{
|
|
5673
|
+
provider: "gemini",
|
|
5674
|
+
modelId: "imagen-4.0-generate-001",
|
|
5675
|
+
displayName: "Imagen 4",
|
|
5676
|
+
pricing: {
|
|
5677
|
+
perImage: 0.04
|
|
5678
|
+
},
|
|
5679
|
+
supportedSizes: [...IMAGEN4_ASPECT_RATIOS],
|
|
5680
|
+
maxImages: 4,
|
|
5681
|
+
defaultSize: "1:1",
|
|
5682
|
+
features: {
|
|
5683
|
+
textRendering: true
|
|
5684
|
+
}
|
|
5685
|
+
},
|
|
5686
|
+
{
|
|
5687
|
+
provider: "gemini",
|
|
5688
|
+
modelId: "imagen-4.0-ultra-generate-001",
|
|
5689
|
+
displayName: "Imagen 4 Ultra",
|
|
5690
|
+
pricing: {
|
|
5691
|
+
perImage: 0.06
|
|
5692
|
+
},
|
|
5693
|
+
supportedSizes: [...IMAGEN4_ASPECT_RATIOS],
|
|
5694
|
+
maxImages: 4,
|
|
5695
|
+
defaultSize: "1:1",
|
|
5696
|
+
features: {
|
|
5697
|
+
textRendering: true
|
|
5698
|
+
}
|
|
5699
|
+
},
|
|
5700
|
+
// Preview versions
|
|
5701
|
+
{
|
|
5702
|
+
provider: "gemini",
|
|
5703
|
+
modelId: "imagen-4.0-generate-preview-06-06",
|
|
5704
|
+
displayName: "Imagen 4 (Preview)",
|
|
5705
|
+
pricing: {
|
|
5706
|
+
perImage: 0.04
|
|
5707
|
+
},
|
|
5708
|
+
supportedSizes: [...IMAGEN4_ASPECT_RATIOS],
|
|
5709
|
+
maxImages: 4,
|
|
5710
|
+
defaultSize: "1:1",
|
|
5711
|
+
features: {
|
|
5712
|
+
textRendering: true
|
|
5713
|
+
}
|
|
5714
|
+
},
|
|
5715
|
+
{
|
|
5716
|
+
provider: "gemini",
|
|
5717
|
+
modelId: "imagen-4.0-ultra-generate-preview-06-06",
|
|
5718
|
+
displayName: "Imagen 4 Ultra (Preview)",
|
|
5719
|
+
pricing: {
|
|
5720
|
+
perImage: 0.06
|
|
5721
|
+
},
|
|
5722
|
+
supportedSizes: [...IMAGEN4_ASPECT_RATIOS],
|
|
5723
|
+
maxImages: 4,
|
|
5724
|
+
defaultSize: "1:1",
|
|
5725
|
+
features: {
|
|
5726
|
+
textRendering: true
|
|
5727
|
+
}
|
|
5728
|
+
},
|
|
5729
|
+
// Gemini Native Image Generation (multimodal models)
|
|
5730
|
+
{
|
|
5731
|
+
provider: "gemini",
|
|
5732
|
+
modelId: "gemini-2.5-flash-image",
|
|
5733
|
+
displayName: "Gemini 2.5 Flash Image",
|
|
5734
|
+
pricing: {
|
|
5735
|
+
perImage: 0.039
|
|
5736
|
+
},
|
|
5737
|
+
supportedSizes: [...GEMINI_IMAGE_ASPECT_RATIOS],
|
|
5738
|
+
maxImages: 1,
|
|
5739
|
+
defaultSize: "1:1",
|
|
5740
|
+
features: {
|
|
5741
|
+
conversational: true,
|
|
5742
|
+
textRendering: true
|
|
5743
|
+
}
|
|
5744
|
+
},
|
|
5745
|
+
{
|
|
5746
|
+
provider: "gemini",
|
|
5747
|
+
modelId: "gemini-2.5-flash-image-preview",
|
|
5748
|
+
displayName: "Gemini 2.5 Flash Image (Preview)",
|
|
5749
|
+
pricing: {
|
|
5750
|
+
perImage: 0.039
|
|
5751
|
+
},
|
|
5752
|
+
supportedSizes: [...GEMINI_IMAGE_ASPECT_RATIOS],
|
|
5753
|
+
maxImages: 1,
|
|
5754
|
+
defaultSize: "1:1",
|
|
5755
|
+
features: {
|
|
5756
|
+
conversational: true,
|
|
5757
|
+
textRendering: true
|
|
5758
|
+
}
|
|
5759
|
+
},
|
|
5760
|
+
{
|
|
5761
|
+
provider: "gemini",
|
|
5762
|
+
modelId: "gemini-3-pro-image-preview",
|
|
5763
|
+
displayName: "Gemini 3 Pro Image (Preview)",
|
|
5764
|
+
pricing: {
|
|
5765
|
+
// Token-based: ~$0.134 per 1K/2K image, $0.24 per 4K
|
|
5766
|
+
// Using 2K as default
|
|
5767
|
+
bySize: {
|
|
5768
|
+
"1K": 0.134,
|
|
5769
|
+
"2K": 0.134,
|
|
5770
|
+
"4K": 0.24
|
|
5771
|
+
}
|
|
5772
|
+
},
|
|
5773
|
+
supportedSizes: ["1K", "2K", "4K"],
|
|
5774
|
+
maxImages: 1,
|
|
5775
|
+
defaultSize: "2K",
|
|
5776
|
+
features: {
|
|
5777
|
+
conversational: true,
|
|
5778
|
+
textRendering: true
|
|
5779
|
+
}
|
|
5780
|
+
},
|
|
5781
|
+
// Alias: nano-banana-pro-preview is gemini-3-pro-image-preview
|
|
5782
|
+
{
|
|
5783
|
+
provider: "gemini",
|
|
5784
|
+
modelId: "nano-banana-pro-preview",
|
|
5785
|
+
displayName: "Nano Banana Pro (Gemini 3 Pro Image)",
|
|
5786
|
+
pricing: {
|
|
5787
|
+
bySize: {
|
|
5788
|
+
"1K": 0.134,
|
|
5789
|
+
"2K": 0.134,
|
|
5790
|
+
"4K": 0.24
|
|
5791
|
+
}
|
|
5792
|
+
},
|
|
5793
|
+
supportedSizes: ["1K", "2K", "4K"],
|
|
5794
|
+
maxImages: 1,
|
|
5795
|
+
defaultSize: "2K",
|
|
5796
|
+
features: {
|
|
5797
|
+
conversational: true,
|
|
5798
|
+
textRendering: true
|
|
5799
|
+
}
|
|
5800
|
+
}
|
|
5801
|
+
];
|
|
5802
|
+
}
|
|
5803
|
+
});
|
|
5804
|
+
|
|
5587
5805
|
// src/providers/gemini-models.ts
|
|
5588
5806
|
var GEMINI_MODELS;
|
|
5589
5807
|
var init_gemini_models = __esm({
|
|
@@ -5757,8 +5975,172 @@ var init_gemini_models = __esm({
|
|
|
5757
5975
|
}
|
|
5758
5976
|
});
|
|
5759
5977
|
|
|
5978
|
+
// src/providers/gemini-speech-models.ts
|
|
5979
|
+
function getGeminiSpeechModelSpec(modelId) {
|
|
5980
|
+
return geminiSpeechModels.find((m) => m.modelId === modelId);
|
|
5981
|
+
}
|
|
5982
|
+
function isGeminiSpeechModel(modelId) {
|
|
5983
|
+
return geminiSpeechModels.some((m) => m.modelId === modelId);
|
|
5984
|
+
}
|
|
5985
|
+
function calculateGeminiSpeechCost(modelId, characterCount, estimatedMinutes) {
|
|
5986
|
+
const spec = getGeminiSpeechModelSpec(modelId);
|
|
5987
|
+
if (!spec) return void 0;
|
|
5988
|
+
if (spec.pricing.perMinute !== void 0) {
|
|
5989
|
+
if (estimatedMinutes !== void 0) {
|
|
5990
|
+
return estimatedMinutes * spec.pricing.perMinute;
|
|
5991
|
+
}
|
|
5992
|
+
const approxMinutes = characterCount / 750;
|
|
5993
|
+
return approxMinutes * spec.pricing.perMinute;
|
|
5994
|
+
}
|
|
5995
|
+
return void 0;
|
|
5996
|
+
}
|
|
5997
|
+
var GEMINI_TTS_VOICES, GEMINI_TTS_FORMATS, geminiSpeechModels;
|
|
5998
|
+
var init_gemini_speech_models = __esm({
|
|
5999
|
+
"src/providers/gemini-speech-models.ts"() {
|
|
6000
|
+
"use strict";
|
|
6001
|
+
GEMINI_TTS_VOICES = [
|
|
6002
|
+
"Zephyr",
|
|
6003
|
+
// Bright
|
|
6004
|
+
"Puck",
|
|
6005
|
+
// Upbeat
|
|
6006
|
+
"Charon",
|
|
6007
|
+
// Informative
|
|
6008
|
+
"Kore",
|
|
6009
|
+
// Firm
|
|
6010
|
+
"Fenrir",
|
|
6011
|
+
// Excitable
|
|
6012
|
+
"Leda",
|
|
6013
|
+
// Youthful
|
|
6014
|
+
"Orus",
|
|
6015
|
+
// Firm
|
|
6016
|
+
"Aoede",
|
|
6017
|
+
// Breezy
|
|
6018
|
+
"Callirrhoe",
|
|
6019
|
+
// Easy-going
|
|
6020
|
+
"Autonoe",
|
|
6021
|
+
// Bright
|
|
6022
|
+
"Enceladus",
|
|
6023
|
+
// Breathy
|
|
6024
|
+
"Iapetus",
|
|
6025
|
+
// Clear
|
|
6026
|
+
"Umbriel",
|
|
6027
|
+
// Easy-going
|
|
6028
|
+
"Algieba",
|
|
6029
|
+
// Smooth
|
|
6030
|
+
"Despina",
|
|
6031
|
+
// Smooth
|
|
6032
|
+
"Erinome",
|
|
6033
|
+
// Clear
|
|
6034
|
+
"Algenib",
|
|
6035
|
+
// Gravelly
|
|
6036
|
+
"Rasalgethi",
|
|
6037
|
+
// Informative
|
|
6038
|
+
"Laomedeia",
|
|
6039
|
+
// Upbeat
|
|
6040
|
+
"Achernar",
|
|
6041
|
+
// Soft
|
|
6042
|
+
"Alnilam",
|
|
6043
|
+
// Firm
|
|
6044
|
+
"Schedar",
|
|
6045
|
+
// Even
|
|
6046
|
+
"Gacrux",
|
|
6047
|
+
// Mature
|
|
6048
|
+
"Pulcherrima",
|
|
6049
|
+
// Forward
|
|
6050
|
+
"Achird",
|
|
6051
|
+
// Friendly
|
|
6052
|
+
"Zubenelgenubi",
|
|
6053
|
+
// Casual
|
|
6054
|
+
"Vindemiatrix",
|
|
6055
|
+
// Gentle
|
|
6056
|
+
"Sadachbia",
|
|
6057
|
+
// Lively
|
|
6058
|
+
"Sadaltager",
|
|
6059
|
+
// Knowledgeable
|
|
6060
|
+
"Sulafat"
|
|
6061
|
+
// Warm
|
|
6062
|
+
];
|
|
6063
|
+
GEMINI_TTS_FORMATS = ["pcm", "wav"];
|
|
6064
|
+
geminiSpeechModels = [
|
|
6065
|
+
{
|
|
6066
|
+
provider: "gemini",
|
|
6067
|
+
modelId: "gemini-2.5-flash-preview-tts",
|
|
6068
|
+
displayName: "Gemini 2.5 Flash TTS (Preview)",
|
|
6069
|
+
pricing: {
|
|
6070
|
+
// $0.50 per 1M input tokens = $0.0000005 per token
|
|
6071
|
+
perInputToken: 5e-7,
|
|
6072
|
+
// $10.00 per 1M audio output tokens = $0.00001 per token
|
|
6073
|
+
perAudioOutputToken: 1e-5,
|
|
6074
|
+
// Rough estimate: ~$0.01 per minute of audio
|
|
6075
|
+
perMinute: 0.01
|
|
6076
|
+
},
|
|
6077
|
+
voices: [...GEMINI_TTS_VOICES],
|
|
6078
|
+
formats: GEMINI_TTS_FORMATS,
|
|
6079
|
+
maxInputLength: 8e3,
|
|
6080
|
+
// bytes (text + prompt combined)
|
|
6081
|
+
defaultVoice: "Zephyr",
|
|
6082
|
+
defaultFormat: "wav",
|
|
6083
|
+
features: {
|
|
6084
|
+
multiSpeaker: true,
|
|
6085
|
+
languages: 24,
|
|
6086
|
+
voiceInstructions: true
|
|
6087
|
+
}
|
|
6088
|
+
},
|
|
6089
|
+
{
|
|
6090
|
+
provider: "gemini",
|
|
6091
|
+
modelId: "gemini-2.5-pro-preview-tts",
|
|
6092
|
+
displayName: "Gemini 2.5 Pro TTS (Preview)",
|
|
6093
|
+
pricing: {
|
|
6094
|
+
// $1.00 per 1M input tokens = $0.000001 per token
|
|
6095
|
+
perInputToken: 1e-6,
|
|
6096
|
+
// $20.00 per 1M audio output tokens = $0.00002 per token
|
|
6097
|
+
perAudioOutputToken: 2e-5,
|
|
6098
|
+
// Rough estimate: ~$0.02 per minute of audio
|
|
6099
|
+
perMinute: 0.02
|
|
6100
|
+
},
|
|
6101
|
+
voices: [...GEMINI_TTS_VOICES],
|
|
6102
|
+
formats: GEMINI_TTS_FORMATS,
|
|
6103
|
+
maxInputLength: 8e3,
|
|
6104
|
+
// bytes
|
|
6105
|
+
defaultVoice: "Zephyr",
|
|
6106
|
+
defaultFormat: "wav",
|
|
6107
|
+
features: {
|
|
6108
|
+
multiSpeaker: true,
|
|
6109
|
+
languages: 24,
|
|
6110
|
+
voiceInstructions: true
|
|
6111
|
+
}
|
|
6112
|
+
}
|
|
6113
|
+
];
|
|
6114
|
+
}
|
|
6115
|
+
});
|
|
6116
|
+
|
|
5760
6117
|
// src/providers/gemini.ts
|
|
5761
|
-
import { FunctionCallingConfigMode, GoogleGenAI } from "@google/genai";
|
|
6118
|
+
import { FunctionCallingConfigMode, GoogleGenAI, Modality } from "@google/genai";
|
|
6119
|
+
function wrapPcmInWav(pcmData, sampleRate, bitsPerSample, numChannels) {
|
|
6120
|
+
const byteRate = sampleRate * numChannels * bitsPerSample / 8;
|
|
6121
|
+
const blockAlign = numChannels * bitsPerSample / 8;
|
|
6122
|
+
const dataSize = pcmData.length;
|
|
6123
|
+
const headerSize = 44;
|
|
6124
|
+
const fileSize = headerSize + dataSize - 8;
|
|
6125
|
+
const buffer = new ArrayBuffer(headerSize + dataSize);
|
|
6126
|
+
const view = new DataView(buffer);
|
|
6127
|
+
const uint8 = new Uint8Array(buffer);
|
|
6128
|
+
view.setUint32(0, 1380533830, false);
|
|
6129
|
+
view.setUint32(4, fileSize, true);
|
|
6130
|
+
view.setUint32(8, 1463899717, false);
|
|
6131
|
+
view.setUint32(12, 1718449184, false);
|
|
6132
|
+
view.setUint32(16, 16, true);
|
|
6133
|
+
view.setUint16(20, 1, true);
|
|
6134
|
+
view.setUint16(22, numChannels, true);
|
|
6135
|
+
view.setUint32(24, sampleRate, true);
|
|
6136
|
+
view.setUint32(28, byteRate, true);
|
|
6137
|
+
view.setUint16(32, blockAlign, true);
|
|
6138
|
+
view.setUint16(34, bitsPerSample, true);
|
|
6139
|
+
view.setUint32(36, 1684108385, false);
|
|
6140
|
+
view.setUint32(40, dataSize, true);
|
|
6141
|
+
uint8.set(pcmData, headerSize);
|
|
6142
|
+
return buffer;
|
|
6143
|
+
}
|
|
5762
6144
|
function createGeminiProviderFromEnv() {
|
|
5763
6145
|
return createProviderFromEnv("GEMINI_API_KEY", GoogleGenAI, GeminiGenerativeProvider);
|
|
5764
6146
|
}
|
|
@@ -5768,7 +6150,9 @@ var init_gemini = __esm({
|
|
|
5768
6150
|
"use strict";
|
|
5769
6151
|
init_base_provider();
|
|
5770
6152
|
init_constants2();
|
|
6153
|
+
init_gemini_image_models();
|
|
5771
6154
|
init_gemini_models();
|
|
6155
|
+
init_gemini_speech_models();
|
|
5772
6156
|
init_utils();
|
|
5773
6157
|
GEMINI_ROLE_MAP = {
|
|
5774
6158
|
system: "user",
|
|
@@ -5783,6 +6167,139 @@ var init_gemini = __esm({
|
|
|
5783
6167
|
getModelSpecs() {
|
|
5784
6168
|
return GEMINI_MODELS;
|
|
5785
6169
|
}
|
|
6170
|
+
// =========================================================================
|
|
6171
|
+
// Image Generation
|
|
6172
|
+
// =========================================================================
|
|
6173
|
+
getImageModelSpecs() {
|
|
6174
|
+
return geminiImageModels;
|
|
6175
|
+
}
|
|
6176
|
+
supportsImageGeneration(modelId) {
|
|
6177
|
+
return isGeminiImageModel(modelId);
|
|
6178
|
+
}
|
|
6179
|
+
async generateImage(options) {
|
|
6180
|
+
const client = this.client;
|
|
6181
|
+
const spec = getGeminiImageModelSpec(options.model);
|
|
6182
|
+
const isImagenModel = options.model.startsWith("imagen");
|
|
6183
|
+
const aspectRatio = options.size ?? spec?.defaultSize ?? "1:1";
|
|
6184
|
+
const n = options.n ?? 1;
|
|
6185
|
+
if (isImagenModel) {
|
|
6186
|
+
const response2 = await client.models.generateImages({
|
|
6187
|
+
model: options.model,
|
|
6188
|
+
prompt: options.prompt,
|
|
6189
|
+
config: {
|
|
6190
|
+
numberOfImages: n,
|
|
6191
|
+
aspectRatio,
|
|
6192
|
+
outputMimeType: options.responseFormat === "b64_json" ? "image/png" : "image/jpeg"
|
|
6193
|
+
}
|
|
6194
|
+
});
|
|
6195
|
+
const images2 = response2.generatedImages ?? [];
|
|
6196
|
+
const cost2 = calculateGeminiImageCost(options.model, aspectRatio, images2.length);
|
|
6197
|
+
return {
|
|
6198
|
+
// Gemini's imageBytes is already base64 encoded, so use it directly
|
|
6199
|
+
images: images2.map((img) => ({
|
|
6200
|
+
b64Json: img.image?.imageBytes ?? void 0
|
|
6201
|
+
})),
|
|
6202
|
+
model: options.model,
|
|
6203
|
+
usage: {
|
|
6204
|
+
imagesGenerated: images2.length,
|
|
6205
|
+
size: aspectRatio,
|
|
6206
|
+
quality: "standard"
|
|
6207
|
+
},
|
|
6208
|
+
cost: cost2
|
|
6209
|
+
};
|
|
6210
|
+
}
|
|
6211
|
+
const response = await client.models.generateContent({
|
|
6212
|
+
model: options.model,
|
|
6213
|
+
contents: [{ role: "user", parts: [{ text: options.prompt }] }],
|
|
6214
|
+
config: {
|
|
6215
|
+
responseModalities: [Modality.IMAGE, Modality.TEXT]
|
|
6216
|
+
}
|
|
6217
|
+
});
|
|
6218
|
+
const images = [];
|
|
6219
|
+
const candidate = response.candidates?.[0];
|
|
6220
|
+
if (candidate?.content?.parts) {
|
|
6221
|
+
for (const part of candidate.content.parts) {
|
|
6222
|
+
if ("inlineData" in part && part.inlineData) {
|
|
6223
|
+
images.push({
|
|
6224
|
+
b64Json: part.inlineData.data
|
|
6225
|
+
});
|
|
6226
|
+
}
|
|
6227
|
+
}
|
|
6228
|
+
}
|
|
6229
|
+
const cost = calculateGeminiImageCost(options.model, aspectRatio, images.length);
|
|
6230
|
+
return {
|
|
6231
|
+
images,
|
|
6232
|
+
model: options.model,
|
|
6233
|
+
usage: {
|
|
6234
|
+
imagesGenerated: images.length,
|
|
6235
|
+
size: aspectRatio,
|
|
6236
|
+
quality: "standard"
|
|
6237
|
+
},
|
|
6238
|
+
cost
|
|
6239
|
+
};
|
|
6240
|
+
}
|
|
6241
|
+
// =========================================================================
|
|
6242
|
+
// Speech Generation
|
|
6243
|
+
// =========================================================================
|
|
6244
|
+
getSpeechModelSpecs() {
|
|
6245
|
+
return geminiSpeechModels;
|
|
6246
|
+
}
|
|
6247
|
+
supportsSpeechGeneration(modelId) {
|
|
6248
|
+
return isGeminiSpeechModel(modelId);
|
|
6249
|
+
}
|
|
6250
|
+
async generateSpeech(options) {
|
|
6251
|
+
const client = this.client;
|
|
6252
|
+
const spec = getGeminiSpeechModelSpec(options.model);
|
|
6253
|
+
const voice = options.voice ?? spec?.defaultVoice ?? "Zephyr";
|
|
6254
|
+
const response = await client.models.generateContent({
|
|
6255
|
+
model: options.model,
|
|
6256
|
+
contents: [
|
|
6257
|
+
{
|
|
6258
|
+
role: "user",
|
|
6259
|
+
parts: [{ text: options.input }]
|
|
6260
|
+
}
|
|
6261
|
+
],
|
|
6262
|
+
config: {
|
|
6263
|
+
responseModalities: [Modality.AUDIO],
|
|
6264
|
+
speechConfig: {
|
|
6265
|
+
voiceConfig: {
|
|
6266
|
+
prebuiltVoiceConfig: {
|
|
6267
|
+
voiceName: voice
|
|
6268
|
+
}
|
|
6269
|
+
}
|
|
6270
|
+
}
|
|
6271
|
+
}
|
|
6272
|
+
});
|
|
6273
|
+
let pcmData;
|
|
6274
|
+
const candidate = response.candidates?.[0];
|
|
6275
|
+
if (candidate?.content?.parts) {
|
|
6276
|
+
for (const part of candidate.content.parts) {
|
|
6277
|
+
if ("inlineData" in part && part.inlineData?.data) {
|
|
6278
|
+
const base64 = part.inlineData.data;
|
|
6279
|
+
const binary = atob(base64);
|
|
6280
|
+
pcmData = new Uint8Array(binary.length);
|
|
6281
|
+
for (let i = 0; i < binary.length; i++) {
|
|
6282
|
+
pcmData[i] = binary.charCodeAt(i);
|
|
6283
|
+
}
|
|
6284
|
+
break;
|
|
6285
|
+
}
|
|
6286
|
+
}
|
|
6287
|
+
}
|
|
6288
|
+
if (!pcmData) {
|
|
6289
|
+
throw new Error("No audio data in Gemini TTS response");
|
|
6290
|
+
}
|
|
6291
|
+
const audioData = wrapPcmInWav(pcmData, 24e3, 16, 1);
|
|
6292
|
+
const cost = calculateGeminiSpeechCost(options.model, options.input.length);
|
|
6293
|
+
return {
|
|
6294
|
+
audio: audioData,
|
|
6295
|
+
model: options.model,
|
|
6296
|
+
usage: {
|
|
6297
|
+
characterCount: options.input.length
|
|
6298
|
+
},
|
|
6299
|
+
cost,
|
|
6300
|
+
format: spec?.defaultFormat ?? "wav"
|
|
6301
|
+
};
|
|
6302
|
+
}
|
|
5786
6303
|
buildRequestPayload(options, descriptor, _spec, messages) {
|
|
5787
6304
|
const contents = this.convertMessagesToContents(messages);
|
|
5788
6305
|
const generationConfig = this.buildGenerationConfig(options);
|
|
@@ -5978,6 +6495,121 @@ var init_gemini = __esm({
|
|
|
5978
6495
|
}
|
|
5979
6496
|
});
|
|
5980
6497
|
|
|
6498
|
+
// src/providers/openai-image-models.ts
|
|
6499
|
+
function getOpenAIImageModelSpec(modelId) {
|
|
6500
|
+
return openaiImageModels.find((m) => m.modelId === modelId);
|
|
6501
|
+
}
|
|
6502
|
+
function isOpenAIImageModel(modelId) {
|
|
6503
|
+
return openaiImageModels.some((m) => m.modelId === modelId);
|
|
6504
|
+
}
|
|
6505
|
+
function calculateOpenAIImageCost(modelId, size, quality = "standard", n = 1) {
|
|
6506
|
+
const spec = getOpenAIImageModelSpec(modelId);
|
|
6507
|
+
if (!spec) return void 0;
|
|
6508
|
+
const sizePrice = spec.pricing.bySize?.[size];
|
|
6509
|
+
if (sizePrice === void 0) return void 0;
|
|
6510
|
+
let pricePerImage;
|
|
6511
|
+
if (typeof sizePrice === "number") {
|
|
6512
|
+
pricePerImage = sizePrice;
|
|
6513
|
+
} else {
|
|
6514
|
+
pricePerImage = sizePrice[quality];
|
|
6515
|
+
if (pricePerImage === void 0) return void 0;
|
|
6516
|
+
}
|
|
6517
|
+
return pricePerImage * n;
|
|
6518
|
+
}
|
|
6519
|
+
var GPT_IMAGE_SIZES, GPT_IMAGE_QUALITIES, DALLE3_SIZES, DALLE3_QUALITIES, DALLE2_SIZES, openaiImageModels;
|
|
6520
|
+
var init_openai_image_models = __esm({
|
|
6521
|
+
"src/providers/openai-image-models.ts"() {
|
|
6522
|
+
"use strict";
|
|
6523
|
+
GPT_IMAGE_SIZES = ["1024x1024", "1024x1536", "1536x1024"];
|
|
6524
|
+
GPT_IMAGE_QUALITIES = ["low", "medium", "high"];
|
|
6525
|
+
DALLE3_SIZES = ["1024x1024", "1024x1792", "1792x1024"];
|
|
6526
|
+
DALLE3_QUALITIES = ["standard", "hd"];
|
|
6527
|
+
DALLE2_SIZES = ["256x256", "512x512", "1024x1024"];
|
|
6528
|
+
openaiImageModels = [
|
|
6529
|
+
// GPT Image 1 Family (flagship)
|
|
6530
|
+
{
|
|
6531
|
+
provider: "openai",
|
|
6532
|
+
modelId: "gpt-image-1",
|
|
6533
|
+
displayName: "GPT Image 1",
|
|
6534
|
+
pricing: {
|
|
6535
|
+
bySize: {
|
|
6536
|
+
"1024x1024": { low: 0.011, medium: 0.04, high: 0.17 },
|
|
6537
|
+
"1024x1536": { low: 0.016, medium: 0.06, high: 0.25 },
|
|
6538
|
+
"1536x1024": { low: 0.016, medium: 0.06, high: 0.25 }
|
|
6539
|
+
}
|
|
6540
|
+
},
|
|
6541
|
+
supportedSizes: [...GPT_IMAGE_SIZES],
|
|
6542
|
+
supportedQualities: [...GPT_IMAGE_QUALITIES],
|
|
6543
|
+
maxImages: 1,
|
|
6544
|
+
defaultSize: "1024x1024",
|
|
6545
|
+
defaultQuality: "medium",
|
|
6546
|
+
features: {
|
|
6547
|
+
textRendering: true,
|
|
6548
|
+
transparency: true
|
|
6549
|
+
}
|
|
6550
|
+
},
|
|
6551
|
+
{
|
|
6552
|
+
provider: "openai",
|
|
6553
|
+
modelId: "gpt-image-1-mini",
|
|
6554
|
+
displayName: "GPT Image 1 Mini",
|
|
6555
|
+
pricing: {
|
|
6556
|
+
bySize: {
|
|
6557
|
+
"1024x1024": { low: 5e-3, medium: 0.02, high: 0.052 },
|
|
6558
|
+
"1024x1536": { low: 75e-4, medium: 0.03, high: 0.078 },
|
|
6559
|
+
"1536x1024": { low: 75e-4, medium: 0.03, high: 0.078 }
|
|
6560
|
+
}
|
|
6561
|
+
},
|
|
6562
|
+
supportedSizes: [...GPT_IMAGE_SIZES],
|
|
6563
|
+
supportedQualities: [...GPT_IMAGE_QUALITIES],
|
|
6564
|
+
maxImages: 1,
|
|
6565
|
+
defaultSize: "1024x1024",
|
|
6566
|
+
defaultQuality: "medium",
|
|
6567
|
+
features: {
|
|
6568
|
+
textRendering: true,
|
|
6569
|
+
transparency: true
|
|
6570
|
+
}
|
|
6571
|
+
},
|
|
6572
|
+
// DALL-E Family
|
|
6573
|
+
{
|
|
6574
|
+
provider: "openai",
|
|
6575
|
+
modelId: "dall-e-3",
|
|
6576
|
+
displayName: "DALL-E 3",
|
|
6577
|
+
pricing: {
|
|
6578
|
+
bySize: {
|
|
6579
|
+
"1024x1024": { standard: 0.04, hd: 0.08 },
|
|
6580
|
+
"1024x1792": { standard: 0.08, hd: 0.12 },
|
|
6581
|
+
"1792x1024": { standard: 0.08, hd: 0.12 }
|
|
6582
|
+
}
|
|
6583
|
+
},
|
|
6584
|
+
supportedSizes: [...DALLE3_SIZES],
|
|
6585
|
+
supportedQualities: [...DALLE3_QUALITIES],
|
|
6586
|
+
maxImages: 1,
|
|
6587
|
+
// DALL-E 3 only supports n=1
|
|
6588
|
+
defaultSize: "1024x1024",
|
|
6589
|
+
defaultQuality: "standard",
|
|
6590
|
+
features: {
|
|
6591
|
+
textRendering: true
|
|
6592
|
+
}
|
|
6593
|
+
},
|
|
6594
|
+
{
|
|
6595
|
+
provider: "openai",
|
|
6596
|
+
modelId: "dall-e-2",
|
|
6597
|
+
displayName: "DALL-E 2 (Legacy)",
|
|
6598
|
+
pricing: {
|
|
6599
|
+
bySize: {
|
|
6600
|
+
"256x256": 0.016,
|
|
6601
|
+
"512x512": 0.018,
|
|
6602
|
+
"1024x1024": 0.02
|
|
6603
|
+
}
|
|
6604
|
+
},
|
|
6605
|
+
supportedSizes: [...DALLE2_SIZES],
|
|
6606
|
+
maxImages: 10,
|
|
6607
|
+
defaultSize: "1024x1024"
|
|
6608
|
+
}
|
|
6609
|
+
];
|
|
6610
|
+
}
|
|
6611
|
+
});
|
|
6612
|
+
|
|
5981
6613
|
// src/providers/openai-models.ts
|
|
5982
6614
|
var OPENAI_MODELS;
|
|
5983
6615
|
var init_openai_models = __esm({
|
|
@@ -6342,6 +6974,144 @@ var init_openai_models = __esm({
|
|
|
6342
6974
|
}
|
|
6343
6975
|
});
|
|
6344
6976
|
|
|
6977
|
+
// src/providers/openai-speech-models.ts
|
|
6978
|
+
function getOpenAISpeechModelSpec(modelId) {
|
|
6979
|
+
return openaiSpeechModels.find((m) => m.modelId === modelId);
|
|
6980
|
+
}
|
|
6981
|
+
function isOpenAISpeechModel(modelId) {
|
|
6982
|
+
return openaiSpeechModels.some((m) => m.modelId === modelId);
|
|
6983
|
+
}
|
|
6984
|
+
function calculateOpenAISpeechCost(modelId, characterCount, estimatedMinutes) {
|
|
6985
|
+
const spec = getOpenAISpeechModelSpec(modelId);
|
|
6986
|
+
if (!spec) return void 0;
|
|
6987
|
+
if (spec.pricing.perCharacter !== void 0) {
|
|
6988
|
+
return characterCount * spec.pricing.perCharacter;
|
|
6989
|
+
}
|
|
6990
|
+
if (spec.pricing.perMinute !== void 0 && estimatedMinutes !== void 0) {
|
|
6991
|
+
return estimatedMinutes * spec.pricing.perMinute;
|
|
6992
|
+
}
|
|
6993
|
+
if (spec.pricing.perMinute !== void 0) {
|
|
6994
|
+
const approxMinutes = characterCount / 750;
|
|
6995
|
+
return approxMinutes * spec.pricing.perMinute;
|
|
6996
|
+
}
|
|
6997
|
+
return void 0;
|
|
6998
|
+
}
|
|
6999
|
+
var OPENAI_TTS_VOICES, OPENAI_TTS_EXTENDED_VOICES, OPENAI_TTS_FORMATS, openaiSpeechModels;
|
|
7000
|
+
var init_openai_speech_models = __esm({
|
|
7001
|
+
"src/providers/openai-speech-models.ts"() {
|
|
7002
|
+
"use strict";
|
|
7003
|
+
OPENAI_TTS_VOICES = [
|
|
7004
|
+
"alloy",
|
|
7005
|
+
"echo",
|
|
7006
|
+
"fable",
|
|
7007
|
+
"onyx",
|
|
7008
|
+
"nova",
|
|
7009
|
+
"shimmer"
|
|
7010
|
+
];
|
|
7011
|
+
OPENAI_TTS_EXTENDED_VOICES = [
|
|
7012
|
+
...OPENAI_TTS_VOICES,
|
|
7013
|
+
"ash",
|
|
7014
|
+
"ballad",
|
|
7015
|
+
"coral",
|
|
7016
|
+
"sage",
|
|
7017
|
+
"verse"
|
|
7018
|
+
];
|
|
7019
|
+
OPENAI_TTS_FORMATS = ["mp3", "opus", "aac", "flac", "wav", "pcm"];
|
|
7020
|
+
openaiSpeechModels = [
|
|
7021
|
+
// Standard TTS models (character-based pricing)
|
|
7022
|
+
{
|
|
7023
|
+
provider: "openai",
|
|
7024
|
+
modelId: "tts-1",
|
|
7025
|
+
displayName: "TTS-1",
|
|
7026
|
+
pricing: {
|
|
7027
|
+
// $15 per 1M characters = $0.000015 per character
|
|
7028
|
+
perCharacter: 15e-6
|
|
7029
|
+
},
|
|
7030
|
+
voices: [...OPENAI_TTS_VOICES],
|
|
7031
|
+
formats: OPENAI_TTS_FORMATS,
|
|
7032
|
+
maxInputLength: 4096,
|
|
7033
|
+
defaultVoice: "alloy",
|
|
7034
|
+
defaultFormat: "mp3",
|
|
7035
|
+
features: {
|
|
7036
|
+
voiceInstructions: false
|
|
7037
|
+
}
|
|
7038
|
+
},
|
|
7039
|
+
{
|
|
7040
|
+
provider: "openai",
|
|
7041
|
+
modelId: "tts-1-1106",
|
|
7042
|
+
displayName: "TTS-1 (Nov 2023)",
|
|
7043
|
+
pricing: {
|
|
7044
|
+
perCharacter: 15e-6
|
|
7045
|
+
},
|
|
7046
|
+
voices: [...OPENAI_TTS_VOICES],
|
|
7047
|
+
formats: OPENAI_TTS_FORMATS,
|
|
7048
|
+
maxInputLength: 4096,
|
|
7049
|
+
defaultVoice: "alloy",
|
|
7050
|
+
defaultFormat: "mp3",
|
|
7051
|
+
features: {
|
|
7052
|
+
voiceInstructions: false
|
|
7053
|
+
}
|
|
7054
|
+
},
|
|
7055
|
+
{
|
|
7056
|
+
provider: "openai",
|
|
7057
|
+
modelId: "tts-1-hd",
|
|
7058
|
+
displayName: "TTS-1 HD",
|
|
7059
|
+
pricing: {
|
|
7060
|
+
// $30 per 1M characters = $0.00003 per character
|
|
7061
|
+
perCharacter: 3e-5
|
|
7062
|
+
},
|
|
7063
|
+
voices: [...OPENAI_TTS_VOICES],
|
|
7064
|
+
formats: OPENAI_TTS_FORMATS,
|
|
7065
|
+
maxInputLength: 4096,
|
|
7066
|
+
defaultVoice: "alloy",
|
|
7067
|
+
defaultFormat: "mp3",
|
|
7068
|
+
features: {
|
|
7069
|
+
voiceInstructions: false
|
|
7070
|
+
}
|
|
7071
|
+
},
|
|
7072
|
+
{
|
|
7073
|
+
provider: "openai",
|
|
7074
|
+
modelId: "tts-1-hd-1106",
|
|
7075
|
+
displayName: "TTS-1 HD (Nov 2023)",
|
|
7076
|
+
pricing: {
|
|
7077
|
+
perCharacter: 3e-5
|
|
7078
|
+
},
|
|
7079
|
+
voices: [...OPENAI_TTS_VOICES],
|
|
7080
|
+
formats: OPENAI_TTS_FORMATS,
|
|
7081
|
+
maxInputLength: 4096,
|
|
7082
|
+
defaultVoice: "alloy",
|
|
7083
|
+
defaultFormat: "mp3",
|
|
7084
|
+
features: {
|
|
7085
|
+
voiceInstructions: false
|
|
7086
|
+
}
|
|
7087
|
+
},
|
|
7088
|
+
// Token-based TTS model with voice instructions support
|
|
7089
|
+
{
|
|
7090
|
+
provider: "openai",
|
|
7091
|
+
modelId: "gpt-4o-mini-tts",
|
|
7092
|
+
displayName: "GPT-4o Mini TTS",
|
|
7093
|
+
pricing: {
|
|
7094
|
+
// $0.60 per 1M input tokens = $0.0000006 per token
|
|
7095
|
+
perInputToken: 6e-7,
|
|
7096
|
+
// $12 per 1M audio output tokens = $0.000012 per token
|
|
7097
|
+
perAudioOutputToken: 12e-6,
|
|
7098
|
+
// ~$0.015 per minute of audio
|
|
7099
|
+
perMinute: 0.015
|
|
7100
|
+
},
|
|
7101
|
+
voices: [...OPENAI_TTS_EXTENDED_VOICES],
|
|
7102
|
+
formats: OPENAI_TTS_FORMATS,
|
|
7103
|
+
maxInputLength: 2e3,
|
|
7104
|
+
// tokens, not characters
|
|
7105
|
+
defaultVoice: "alloy",
|
|
7106
|
+
defaultFormat: "mp3",
|
|
7107
|
+
features: {
|
|
7108
|
+
voiceInstructions: true
|
|
7109
|
+
}
|
|
7110
|
+
}
|
|
7111
|
+
];
|
|
7112
|
+
}
|
|
7113
|
+
});
|
|
7114
|
+
|
|
6345
7115
|
// src/providers/openai.ts
|
|
6346
7116
|
import OpenAI from "openai";
|
|
6347
7117
|
import { encoding_for_model } from "tiktoken";
|
|
@@ -6363,7 +7133,9 @@ var init_openai = __esm({
|
|
|
6363
7133
|
"use strict";
|
|
6364
7134
|
init_base_provider();
|
|
6365
7135
|
init_constants2();
|
|
7136
|
+
init_openai_image_models();
|
|
6366
7137
|
init_openai_models();
|
|
7138
|
+
init_openai_speech_models();
|
|
6367
7139
|
init_utils();
|
|
6368
7140
|
ROLE_MAP = {
|
|
6369
7141
|
system: "system",
|
|
@@ -6378,6 +7150,87 @@ var init_openai = __esm({
|
|
|
6378
7150
|
getModelSpecs() {
|
|
6379
7151
|
return OPENAI_MODELS;
|
|
6380
7152
|
}
|
|
7153
|
+
// =========================================================================
|
|
7154
|
+
// Image Generation
|
|
7155
|
+
// =========================================================================
|
|
7156
|
+
getImageModelSpecs() {
|
|
7157
|
+
return openaiImageModels;
|
|
7158
|
+
}
|
|
7159
|
+
supportsImageGeneration(modelId) {
|
|
7160
|
+
return isOpenAIImageModel(modelId);
|
|
7161
|
+
}
|
|
7162
|
+
async generateImage(options) {
|
|
7163
|
+
const client = this.client;
|
|
7164
|
+
const spec = getOpenAIImageModelSpec(options.model);
|
|
7165
|
+
const size = options.size ?? spec?.defaultSize ?? "1024x1024";
|
|
7166
|
+
const quality = options.quality ?? spec?.defaultQuality ?? "standard";
|
|
7167
|
+
const n = options.n ?? 1;
|
|
7168
|
+
const isDallE2 = options.model === "dall-e-2";
|
|
7169
|
+
const isGptImage = options.model.startsWith("gpt-image");
|
|
7170
|
+
const requestParams = {
|
|
7171
|
+
model: options.model,
|
|
7172
|
+
prompt: options.prompt,
|
|
7173
|
+
size,
|
|
7174
|
+
n
|
|
7175
|
+
};
|
|
7176
|
+
if (!isDallE2 && !isGptImage) {
|
|
7177
|
+
requestParams.quality = quality;
|
|
7178
|
+
}
|
|
7179
|
+
if (isGptImage) {
|
|
7180
|
+
} else if (!isDallE2) {
|
|
7181
|
+
requestParams.response_format = options.responseFormat ?? "url";
|
|
7182
|
+
}
|
|
7183
|
+
const response = await client.images.generate(requestParams);
|
|
7184
|
+
const cost = calculateOpenAIImageCost(options.model, size, quality, n);
|
|
7185
|
+
const images = response.data ?? [];
|
|
7186
|
+
return {
|
|
7187
|
+
images: images.map((img) => ({
|
|
7188
|
+
url: img.url,
|
|
7189
|
+
b64Json: img.b64_json,
|
|
7190
|
+
revisedPrompt: img.revised_prompt
|
|
7191
|
+
})),
|
|
7192
|
+
model: options.model,
|
|
7193
|
+
usage: {
|
|
7194
|
+
imagesGenerated: images.length,
|
|
7195
|
+
size,
|
|
7196
|
+
quality
|
|
7197
|
+
},
|
|
7198
|
+
cost
|
|
7199
|
+
};
|
|
7200
|
+
}
|
|
7201
|
+
// =========================================================================
|
|
7202
|
+
// Speech Generation
|
|
7203
|
+
// =========================================================================
|
|
7204
|
+
getSpeechModelSpecs() {
|
|
7205
|
+
return openaiSpeechModels;
|
|
7206
|
+
}
|
|
7207
|
+
supportsSpeechGeneration(modelId) {
|
|
7208
|
+
return isOpenAISpeechModel(modelId);
|
|
7209
|
+
}
|
|
7210
|
+
async generateSpeech(options) {
|
|
7211
|
+
const client = this.client;
|
|
7212
|
+
const spec = getOpenAISpeechModelSpec(options.model);
|
|
7213
|
+
const format = options.responseFormat ?? spec?.defaultFormat ?? "mp3";
|
|
7214
|
+
const voice = options.voice ?? spec?.defaultVoice ?? "alloy";
|
|
7215
|
+
const response = await client.audio.speech.create({
|
|
7216
|
+
model: options.model,
|
|
7217
|
+
input: options.input,
|
|
7218
|
+
voice,
|
|
7219
|
+
response_format: format,
|
|
7220
|
+
speed: options.speed ?? 1
|
|
7221
|
+
});
|
|
7222
|
+
const audioBuffer = await response.arrayBuffer();
|
|
7223
|
+
const cost = calculateOpenAISpeechCost(options.model, options.input.length);
|
|
7224
|
+
return {
|
|
7225
|
+
audio: audioBuffer,
|
|
7226
|
+
model: options.model,
|
|
7227
|
+
usage: {
|
|
7228
|
+
characterCount: options.input.length
|
|
7229
|
+
},
|
|
7230
|
+
cost,
|
|
7231
|
+
format
|
|
7232
|
+
};
|
|
7233
|
+
}
|
|
6381
7234
|
buildRequestPayload(options, descriptor, spec, messages) {
|
|
6382
7235
|
const { maxTokens, temperature, topP, stopSequences, extra } = options;
|
|
6383
7236
|
const supportsTemperature = spec?.metadata?.supportsTemperature !== false;
|
|
@@ -6718,30 +7571,109 @@ var init_model_registry = __esm({
|
|
|
6718
7571
|
}
|
|
6719
7572
|
});
|
|
6720
7573
|
|
|
6721
|
-
// src/core/
|
|
6722
|
-
var
|
|
6723
|
-
var
|
|
6724
|
-
"src/core/
|
|
7574
|
+
// src/core/namespaces/image.ts
|
|
7575
|
+
var ImageNamespace;
|
|
7576
|
+
var init_image = __esm({
|
|
7577
|
+
"src/core/namespaces/image.ts"() {
|
|
6725
7578
|
"use strict";
|
|
6726
|
-
|
|
6727
|
-
constructor(defaultProvider
|
|
7579
|
+
ImageNamespace = class {
|
|
7580
|
+
constructor(adapters, defaultProvider) {
|
|
7581
|
+
this.adapters = adapters;
|
|
6728
7582
|
this.defaultProvider = defaultProvider;
|
|
6729
7583
|
}
|
|
6730
|
-
|
|
6731
|
-
|
|
6732
|
-
|
|
6733
|
-
|
|
7584
|
+
/**
|
|
7585
|
+
* Generate images from a text prompt.
|
|
7586
|
+
*
|
|
7587
|
+
* @param options - Image generation options
|
|
7588
|
+
* @returns Promise resolving to the generation result with images and cost
|
|
7589
|
+
* @throws Error if the provider doesn't support image generation
|
|
7590
|
+
*/
|
|
7591
|
+
async generate(options) {
|
|
7592
|
+
const modelId = options.model;
|
|
7593
|
+
const adapter = this.findImageAdapter(modelId);
|
|
7594
|
+
if (!adapter || !adapter.generateImage) {
|
|
7595
|
+
throw new Error(
|
|
7596
|
+
`No provider supports image generation for model "${modelId}". Available image models: ${this.listModels().map((m) => m.modelId).join(", ")}`
|
|
7597
|
+
);
|
|
6734
7598
|
}
|
|
6735
|
-
|
|
6736
|
-
|
|
6737
|
-
|
|
7599
|
+
return adapter.generateImage(options);
|
|
7600
|
+
}
|
|
7601
|
+
/**
|
|
7602
|
+
* List all available image generation models.
|
|
7603
|
+
*/
|
|
7604
|
+
listModels() {
|
|
7605
|
+
const models = [];
|
|
7606
|
+
for (const adapter of this.adapters) {
|
|
7607
|
+
if (adapter.getImageModelSpecs) {
|
|
7608
|
+
models.push(...adapter.getImageModelSpecs());
|
|
7609
|
+
}
|
|
6738
7610
|
}
|
|
6739
|
-
|
|
6740
|
-
|
|
6741
|
-
|
|
6742
|
-
|
|
7611
|
+
return models;
|
|
7612
|
+
}
|
|
7613
|
+
/**
|
|
7614
|
+
* Check if a model is supported for image generation.
|
|
7615
|
+
*/
|
|
7616
|
+
supportsModel(modelId) {
|
|
7617
|
+
return this.findImageAdapter(modelId) !== void 0;
|
|
7618
|
+
}
|
|
7619
|
+
findImageAdapter(modelId) {
|
|
7620
|
+
return this.adapters.find(
|
|
7621
|
+
(adapter) => adapter.supportsImageGeneration?.(modelId) ?? false
|
|
7622
|
+
);
|
|
7623
|
+
}
|
|
7624
|
+
};
|
|
7625
|
+
}
|
|
7626
|
+
});
|
|
7627
|
+
|
|
7628
|
+
// src/core/namespaces/speech.ts
|
|
7629
|
+
var SpeechNamespace;
|
|
7630
|
+
var init_speech = __esm({
|
|
7631
|
+
"src/core/namespaces/speech.ts"() {
|
|
7632
|
+
"use strict";
|
|
7633
|
+
SpeechNamespace = class {
|
|
7634
|
+
constructor(adapters, defaultProvider) {
|
|
7635
|
+
this.adapters = adapters;
|
|
7636
|
+
this.defaultProvider = defaultProvider;
|
|
7637
|
+
}
|
|
7638
|
+
/**
|
|
7639
|
+
* Generate speech audio from text.
|
|
7640
|
+
*
|
|
7641
|
+
* @param options - Speech generation options
|
|
7642
|
+
* @returns Promise resolving to the generation result with audio and cost
|
|
7643
|
+
* @throws Error if the provider doesn't support speech generation
|
|
7644
|
+
*/
|
|
7645
|
+
async generate(options) {
|
|
7646
|
+
const modelId = options.model;
|
|
7647
|
+
const adapter = this.findSpeechAdapter(modelId);
|
|
7648
|
+
if (!adapter || !adapter.generateSpeech) {
|
|
7649
|
+
throw new Error(
|
|
7650
|
+
`No provider supports speech generation for model "${modelId}". Available speech models: ${this.listModels().map((m) => m.modelId).join(", ")}`
|
|
7651
|
+
);
|
|
6743
7652
|
}
|
|
6744
|
-
return
|
|
7653
|
+
return adapter.generateSpeech(options);
|
|
7654
|
+
}
|
|
7655
|
+
/**
|
|
7656
|
+
* List all available speech generation models.
|
|
7657
|
+
*/
|
|
7658
|
+
listModels() {
|
|
7659
|
+
const models = [];
|
|
7660
|
+
for (const adapter of this.adapters) {
|
|
7661
|
+
if (adapter.getSpeechModelSpecs) {
|
|
7662
|
+
models.push(...adapter.getSpeechModelSpecs());
|
|
7663
|
+
}
|
|
7664
|
+
}
|
|
7665
|
+
return models;
|
|
7666
|
+
}
|
|
7667
|
+
/**
|
|
7668
|
+
* Check if a model is supported for speech generation.
|
|
7669
|
+
*/
|
|
7670
|
+
supportsModel(modelId) {
|
|
7671
|
+
return this.findSpeechAdapter(modelId) !== void 0;
|
|
7672
|
+
}
|
|
7673
|
+
findSpeechAdapter(modelId) {
|
|
7674
|
+
return this.adapters.find(
|
|
7675
|
+
(adapter) => adapter.supportsSpeechGeneration?.(modelId) ?? false
|
|
7676
|
+
);
|
|
6745
7677
|
}
|
|
6746
7678
|
};
|
|
6747
7679
|
}
|
|
@@ -6790,6 +7722,69 @@ var init_quick_methods = __esm({
|
|
|
6790
7722
|
}
|
|
6791
7723
|
});
|
|
6792
7724
|
|
|
7725
|
+
// src/core/namespaces/text.ts
|
|
7726
|
+
var TextNamespace;
|
|
7727
|
+
var init_text = __esm({
|
|
7728
|
+
"src/core/namespaces/text.ts"() {
|
|
7729
|
+
"use strict";
|
|
7730
|
+
init_quick_methods();
|
|
7731
|
+
TextNamespace = class {
|
|
7732
|
+
constructor(client) {
|
|
7733
|
+
this.client = client;
|
|
7734
|
+
}
|
|
7735
|
+
/**
|
|
7736
|
+
* Generate a complete text response.
|
|
7737
|
+
*
|
|
7738
|
+
* @param prompt - User prompt
|
|
7739
|
+
* @param options - Optional configuration
|
|
7740
|
+
* @returns Complete text response
|
|
7741
|
+
*/
|
|
7742
|
+
async complete(prompt, options) {
|
|
7743
|
+
return complete(this.client, prompt, options);
|
|
7744
|
+
}
|
|
7745
|
+
/**
|
|
7746
|
+
* Stream text chunks.
|
|
7747
|
+
*
|
|
7748
|
+
* @param prompt - User prompt
|
|
7749
|
+
* @param options - Optional configuration
|
|
7750
|
+
* @returns Async generator yielding text chunks
|
|
7751
|
+
*/
|
|
7752
|
+
stream(prompt, options) {
|
|
7753
|
+
return stream(this.client, prompt, options);
|
|
7754
|
+
}
|
|
7755
|
+
};
|
|
7756
|
+
}
|
|
7757
|
+
});
|
|
7758
|
+
|
|
7759
|
+
// src/core/options.ts
|
|
7760
|
+
var ModelIdentifierParser;
|
|
7761
|
+
var init_options = __esm({
|
|
7762
|
+
"src/core/options.ts"() {
|
|
7763
|
+
"use strict";
|
|
7764
|
+
ModelIdentifierParser = class {
|
|
7765
|
+
constructor(defaultProvider = "openai") {
|
|
7766
|
+
this.defaultProvider = defaultProvider;
|
|
7767
|
+
}
|
|
7768
|
+
parse(identifier) {
|
|
7769
|
+
const trimmed = identifier.trim();
|
|
7770
|
+
if (!trimmed) {
|
|
7771
|
+
throw new Error("Model identifier cannot be empty");
|
|
7772
|
+
}
|
|
7773
|
+
const [maybeProvider, ...rest] = trimmed.split(":");
|
|
7774
|
+
if (rest.length === 0) {
|
|
7775
|
+
return { provider: this.defaultProvider, name: maybeProvider };
|
|
7776
|
+
}
|
|
7777
|
+
const provider = maybeProvider;
|
|
7778
|
+
const name = rest.join(":");
|
|
7779
|
+
if (!name) {
|
|
7780
|
+
throw new Error("Model name cannot be empty");
|
|
7781
|
+
}
|
|
7782
|
+
return { provider, name };
|
|
7783
|
+
}
|
|
7784
|
+
};
|
|
7785
|
+
}
|
|
7786
|
+
});
|
|
7787
|
+
|
|
6793
7788
|
// src/core/client.ts
|
|
6794
7789
|
var client_exports = {};
|
|
6795
7790
|
__export(client_exports, {
|
|
@@ -6802,12 +7797,20 @@ var init_client = __esm({
|
|
|
6802
7797
|
init_builder();
|
|
6803
7798
|
init_discovery();
|
|
6804
7799
|
init_model_registry();
|
|
7800
|
+
init_image();
|
|
7801
|
+
init_speech();
|
|
7802
|
+
init_text();
|
|
6805
7803
|
init_options();
|
|
6806
7804
|
init_quick_methods();
|
|
6807
7805
|
LLMist = class _LLMist {
|
|
6808
7806
|
parser;
|
|
7807
|
+
defaultProvider;
|
|
6809
7808
|
modelRegistry;
|
|
6810
7809
|
adapters;
|
|
7810
|
+
// Namespaces for different generation types
|
|
7811
|
+
text;
|
|
7812
|
+
image;
|
|
7813
|
+
speech;
|
|
6811
7814
|
constructor(...args) {
|
|
6812
7815
|
let adapters = [];
|
|
6813
7816
|
let defaultProvider;
|
|
@@ -6846,6 +7849,7 @@ var init_client = __esm({
|
|
|
6846
7849
|
const priorityB = b.priority ?? 0;
|
|
6847
7850
|
return priorityB - priorityA;
|
|
6848
7851
|
});
|
|
7852
|
+
this.defaultProvider = resolvedDefaultProvider;
|
|
6849
7853
|
this.parser = new ModelIdentifierParser(resolvedDefaultProvider);
|
|
6850
7854
|
this.modelRegistry = new ModelRegistry();
|
|
6851
7855
|
for (const adapter of this.adapters) {
|
|
@@ -6854,6 +7858,9 @@ var init_client = __esm({
|
|
|
6854
7858
|
if (customModels.length > 0) {
|
|
6855
7859
|
this.modelRegistry.registerModels(customModels);
|
|
6856
7860
|
}
|
|
7861
|
+
this.text = new TextNamespace(this);
|
|
7862
|
+
this.image = new ImageNamespace(this.adapters, this.defaultProvider);
|
|
7863
|
+
this.speech = new SpeechNamespace(this.adapters, this.defaultProvider);
|
|
6857
7864
|
}
|
|
6858
7865
|
stream(options) {
|
|
6859
7866
|
const descriptor = this.parser.parse(options.model);
|
|
@@ -8450,11 +9457,11 @@ export {
|
|
|
8450
9457
|
init_discovery,
|
|
8451
9458
|
ModelRegistry,
|
|
8452
9459
|
init_model_registry,
|
|
8453
|
-
ModelIdentifierParser,
|
|
8454
|
-
init_options,
|
|
8455
9460
|
complete,
|
|
8456
9461
|
stream,
|
|
8457
9462
|
init_quick_methods,
|
|
9463
|
+
ModelIdentifierParser,
|
|
9464
|
+
init_options,
|
|
8458
9465
|
LLMist,
|
|
8459
9466
|
init_client,
|
|
8460
9467
|
AgentBuilder,
|
|
@@ -8501,4 +9508,4 @@ export {
|
|
|
8501
9508
|
MockPromptRecorder,
|
|
8502
9509
|
waitFor
|
|
8503
9510
|
};
|
|
8504
|
-
//# sourceMappingURL=chunk-
|
|
9511
|
+
//# sourceMappingURL=chunk-6ZDUWO6N.js.map
|