llmist 0.7.0 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-CTC2WJZA.js → chunk-4IMGADVY.js} +2 -2
- package/dist/{chunk-ZFHFBEQ5.js → chunk-62M4TDAK.js} +359 -66
- package/dist/chunk-62M4TDAK.js.map +1 -0
- package/dist/cli.cjs +726 -123
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.js +369 -59
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +358 -65
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +6 -9
- package/dist/index.d.ts +6 -9
- package/dist/index.js +2 -2
- package/dist/{mock-stream-B2qwECvd.d.cts → mock-stream-CjmvWDc3.d.cts} +21 -20
- package/dist/{mock-stream-B2qwECvd.d.ts → mock-stream-CjmvWDc3.d.ts} +21 -20
- package/dist/testing/index.cjs +358 -65
- package/dist/testing/index.cjs.map +1 -1
- package/dist/testing/index.d.cts +2 -2
- package/dist/testing/index.d.ts +2 -2
- package/dist/testing/index.js +2 -2
- package/package.json +2 -1
- package/dist/chunk-ZFHFBEQ5.js.map +0 -1
- /package/dist/{chunk-CTC2WJZA.js.map → chunk-4IMGADVY.js.map} +0 -0
|
@@ -869,7 +869,7 @@ function findSafeDelimiter(content) {
|
|
|
869
869
|
}
|
|
870
870
|
let counter = 1;
|
|
871
871
|
while (counter < 1e3) {
|
|
872
|
-
const delimiter = `
|
|
872
|
+
const delimiter = `__GADGET_PARAM_${counter}__`;
|
|
873
873
|
const regex = new RegExp(`^${delimiter}\\s*$`);
|
|
874
874
|
const isUsed = lines.some((line) => regex.test(line));
|
|
875
875
|
if (!isUsed) {
|
|
@@ -975,7 +975,16 @@ var init_gadget = __esm({
|
|
|
975
975
|
"use strict";
|
|
976
976
|
init_schema_to_json();
|
|
977
977
|
init_schema_validator();
|
|
978
|
-
HEREDOC_DELIMITERS = [
|
|
978
|
+
HEREDOC_DELIMITERS = [
|
|
979
|
+
"__GADGET_PARAM_EOF__",
|
|
980
|
+
"__GADGET_PARAM_END__",
|
|
981
|
+
"__GADGET_PARAM_DOC__",
|
|
982
|
+
"__GADGET_PARAM_CONTENT__",
|
|
983
|
+
"__GADGET_PARAM_TEXT__",
|
|
984
|
+
"__GADGET_PARAM_HEREDOC__",
|
|
985
|
+
"__GADGET_PARAM_DATA__",
|
|
986
|
+
"__GADGET_PARAM_BLOCK__"
|
|
987
|
+
];
|
|
979
988
|
BaseGadget = class {
|
|
980
989
|
/**
|
|
981
990
|
* The name of the gadget. Used for identification when LLM calls it.
|
|
@@ -2614,7 +2623,8 @@ var init_anthropic_models = __esm({
|
|
|
2614
2623
|
pricing: {
|
|
2615
2624
|
input: 3,
|
|
2616
2625
|
output: 15,
|
|
2617
|
-
cachedInput: 0.3
|
|
2626
|
+
cachedInput: 0.3,
|
|
2627
|
+
cacheWriteInput: 3.75
|
|
2618
2628
|
},
|
|
2619
2629
|
knowledgeCutoff: "2025-01",
|
|
2620
2630
|
features: {
|
|
@@ -2638,7 +2648,8 @@ var init_anthropic_models = __esm({
|
|
|
2638
2648
|
pricing: {
|
|
2639
2649
|
input: 1,
|
|
2640
2650
|
output: 5,
|
|
2641
|
-
cachedInput: 0.1
|
|
2651
|
+
cachedInput: 0.1,
|
|
2652
|
+
cacheWriteInput: 1.25
|
|
2642
2653
|
},
|
|
2643
2654
|
knowledgeCutoff: "2025-02",
|
|
2644
2655
|
features: {
|
|
@@ -2662,7 +2673,8 @@ var init_anthropic_models = __esm({
|
|
|
2662
2673
|
pricing: {
|
|
2663
2674
|
input: 3,
|
|
2664
2675
|
output: 15,
|
|
2665
|
-
cachedInput: 0.3
|
|
2676
|
+
cachedInput: 0.3,
|
|
2677
|
+
cacheWriteInput: 3.75
|
|
2666
2678
|
},
|
|
2667
2679
|
knowledgeCutoff: "2025-03",
|
|
2668
2680
|
features: {
|
|
@@ -2686,7 +2698,8 @@ var init_anthropic_models = __esm({
|
|
|
2686
2698
|
pricing: {
|
|
2687
2699
|
input: 3,
|
|
2688
2700
|
output: 15,
|
|
2689
|
-
cachedInput: 0.3
|
|
2701
|
+
cachedInput: 0.3,
|
|
2702
|
+
cacheWriteInput: 3.75
|
|
2690
2703
|
},
|
|
2691
2704
|
knowledgeCutoff: "2024-11",
|
|
2692
2705
|
features: {
|
|
@@ -2710,7 +2723,8 @@ var init_anthropic_models = __esm({
|
|
|
2710
2723
|
pricing: {
|
|
2711
2724
|
input: 15,
|
|
2712
2725
|
output: 75,
|
|
2713
|
-
cachedInput: 1.5
|
|
2726
|
+
cachedInput: 1.5,
|
|
2727
|
+
cacheWriteInput: 18.75
|
|
2714
2728
|
},
|
|
2715
2729
|
knowledgeCutoff: "2025-01",
|
|
2716
2730
|
features: {
|
|
@@ -2734,7 +2748,8 @@ var init_anthropic_models = __esm({
|
|
|
2734
2748
|
pricing: {
|
|
2735
2749
|
input: 15,
|
|
2736
2750
|
output: 75,
|
|
2737
|
-
cachedInput: 1.5
|
|
2751
|
+
cachedInput: 1.5,
|
|
2752
|
+
cacheWriteInput: 18.75
|
|
2738
2753
|
},
|
|
2739
2754
|
knowledgeCutoff: "2025-03",
|
|
2740
2755
|
features: {
|
|
@@ -2757,7 +2772,8 @@ var init_anthropic_models = __esm({
|
|
|
2757
2772
|
pricing: {
|
|
2758
2773
|
input: 0.8,
|
|
2759
2774
|
output: 4,
|
|
2760
|
-
cachedInput: 0.08
|
|
2775
|
+
cachedInput: 0.08,
|
|
2776
|
+
cacheWriteInput: 1
|
|
2761
2777
|
},
|
|
2762
2778
|
knowledgeCutoff: "2024-07",
|
|
2763
2779
|
features: {
|
|
@@ -2780,7 +2796,8 @@ var init_anthropic_models = __esm({
|
|
|
2780
2796
|
pricing: {
|
|
2781
2797
|
input: 0.25,
|
|
2782
2798
|
output: 1.25,
|
|
2783
|
-
cachedInput: 0.025
|
|
2799
|
+
cachedInput: 0.025,
|
|
2800
|
+
cacheWriteInput: 0.3125
|
|
2784
2801
|
},
|
|
2785
2802
|
knowledgeCutoff: "2023-08",
|
|
2786
2803
|
features: {
|
|
@@ -2804,7 +2821,8 @@ var init_anthropic_models = __esm({
|
|
|
2804
2821
|
pricing: {
|
|
2805
2822
|
input: 1,
|
|
2806
2823
|
output: 5,
|
|
2807
|
-
cachedInput: 0.1
|
|
2824
|
+
cachedInput: 0.1,
|
|
2825
|
+
cacheWriteInput: 1.25
|
|
2808
2826
|
},
|
|
2809
2827
|
knowledgeCutoff: "2025-02",
|
|
2810
2828
|
features: {
|
|
@@ -2828,7 +2846,8 @@ var init_anthropic_models = __esm({
|
|
|
2828
2846
|
pricing: {
|
|
2829
2847
|
input: 3,
|
|
2830
2848
|
output: 15,
|
|
2831
|
-
cachedInput: 0.3
|
|
2849
|
+
cachedInput: 0.3,
|
|
2850
|
+
cacheWriteInput: 3.75
|
|
2832
2851
|
},
|
|
2833
2852
|
knowledgeCutoff: "2025-01",
|
|
2834
2853
|
features: {
|
|
@@ -2852,7 +2871,8 @@ var init_anthropic_models = __esm({
|
|
|
2852
2871
|
pricing: {
|
|
2853
2872
|
input: 5,
|
|
2854
2873
|
output: 25,
|
|
2855
|
-
cachedInput: 0.5
|
|
2874
|
+
cachedInput: 0.5,
|
|
2875
|
+
cacheWriteInput: 6.25
|
|
2856
2876
|
},
|
|
2857
2877
|
knowledgeCutoff: "2025-03",
|
|
2858
2878
|
features: {
|
|
@@ -2967,15 +2987,27 @@ var init_anthropic = __esm({
|
|
|
2967
2987
|
}
|
|
2968
2988
|
buildRequestPayload(options, descriptor, spec, messages) {
|
|
2969
2989
|
const systemMessages = messages.filter((message) => message.role === "system");
|
|
2970
|
-
const system = systemMessages.length > 0 ? systemMessages.map((m) =>
|
|
2971
|
-
|
|
2990
|
+
const system = systemMessages.length > 0 ? systemMessages.map((m, index) => ({
|
|
2991
|
+
type: "text",
|
|
2992
|
+
text: m.content,
|
|
2993
|
+
// Add cache_control to the LAST system message block
|
|
2994
|
+
...index === systemMessages.length - 1 ? { cache_control: { type: "ephemeral" } } : {}
|
|
2995
|
+
})) : void 0;
|
|
2996
|
+
const nonSystemMessages = messages.filter(
|
|
2972
2997
|
(message) => message.role !== "system"
|
|
2973
|
-
)
|
|
2998
|
+
);
|
|
2999
|
+
const lastUserIndex = nonSystemMessages.reduce(
|
|
3000
|
+
(lastIdx, msg, idx) => msg.role === "user" ? idx : lastIdx,
|
|
3001
|
+
-1
|
|
3002
|
+
);
|
|
3003
|
+
const conversation = nonSystemMessages.map((message, index) => ({
|
|
2974
3004
|
role: message.role,
|
|
2975
3005
|
content: [
|
|
2976
3006
|
{
|
|
2977
3007
|
type: "text",
|
|
2978
|
-
text: message.content
|
|
3008
|
+
text: message.content,
|
|
3009
|
+
// Add cache_control to the LAST user message
|
|
3010
|
+
...message.role === "user" && index === lastUserIndex ? { cache_control: { type: "ephemeral" } } : {}
|
|
2979
3011
|
}
|
|
2980
3012
|
]
|
|
2981
3013
|
}));
|
|
@@ -3001,15 +3033,22 @@ var init_anthropic = __esm({
|
|
|
3001
3033
|
async *wrapStream(iterable) {
|
|
3002
3034
|
const stream2 = iterable;
|
|
3003
3035
|
let inputTokens = 0;
|
|
3036
|
+
let cachedInputTokens = 0;
|
|
3037
|
+
let cacheCreationInputTokens = 0;
|
|
3004
3038
|
for await (const event of stream2) {
|
|
3005
3039
|
if (event.type === "message_start") {
|
|
3006
|
-
|
|
3040
|
+
const usage = event.message.usage;
|
|
3041
|
+
cachedInputTokens = usage.cache_read_input_tokens ?? 0;
|
|
3042
|
+
cacheCreationInputTokens = usage.cache_creation_input_tokens ?? 0;
|
|
3043
|
+
inputTokens = usage.input_tokens + cachedInputTokens + cacheCreationInputTokens;
|
|
3007
3044
|
yield {
|
|
3008
3045
|
text: "",
|
|
3009
3046
|
usage: {
|
|
3010
3047
|
inputTokens,
|
|
3011
3048
|
outputTokens: 0,
|
|
3012
|
-
totalTokens: inputTokens
|
|
3049
|
+
totalTokens: inputTokens,
|
|
3050
|
+
cachedInputTokens,
|
|
3051
|
+
cacheCreationInputTokens
|
|
3013
3052
|
},
|
|
3014
3053
|
rawEvent: event
|
|
3015
3054
|
};
|
|
@@ -3023,7 +3062,9 @@ var init_anthropic = __esm({
|
|
|
3023
3062
|
const usage = event.usage ? {
|
|
3024
3063
|
inputTokens,
|
|
3025
3064
|
outputTokens: event.usage.output_tokens,
|
|
3026
|
-
totalTokens: inputTokens + event.usage.output_tokens
|
|
3065
|
+
totalTokens: inputTokens + event.usage.output_tokens,
|
|
3066
|
+
cachedInputTokens,
|
|
3067
|
+
cacheCreationInputTokens
|
|
3027
3068
|
} : void 0;
|
|
3028
3069
|
if (event.delta.stop_reason || usage) {
|
|
3029
3070
|
yield {
|
|
@@ -3104,6 +3145,7 @@ var init_gemini_models = __esm({
|
|
|
3104
3145
|
"src/providers/gemini-models.ts"() {
|
|
3105
3146
|
"use strict";
|
|
3106
3147
|
GEMINI_MODELS = [
|
|
3148
|
+
// Gemini 3 Pro (Preview)
|
|
3107
3149
|
{
|
|
3108
3150
|
provider: "gemini",
|
|
3109
3151
|
modelId: "gemini-3-pro-preview",
|
|
@@ -3112,8 +3154,11 @@ var init_gemini_models = __esm({
|
|
|
3112
3154
|
maxOutputTokens: 65536,
|
|
3113
3155
|
pricing: {
|
|
3114
3156
|
input: 2,
|
|
3157
|
+
// $2.00 for prompts <= 200k, $4.00 for > 200k (using lower tier)
|
|
3115
3158
|
output: 12,
|
|
3159
|
+
// $12.00 for prompts <= 200k, $18.00 for > 200k
|
|
3116
3160
|
cachedInput: 0.2
|
|
3161
|
+
// $0.20 for prompts <= 200k
|
|
3117
3162
|
},
|
|
3118
3163
|
knowledgeCutoff: "2025-01",
|
|
3119
3164
|
features: {
|
|
@@ -3126,9 +3171,10 @@ var init_gemini_models = __esm({
|
|
|
3126
3171
|
metadata: {
|
|
3127
3172
|
family: "Gemini 3",
|
|
3128
3173
|
releaseDate: "2025-11-18",
|
|
3129
|
-
notes: "
|
|
3174
|
+
notes: "Best model for multimodal understanding, agentic and vibe-coding. Deep Think mode available."
|
|
3130
3175
|
}
|
|
3131
3176
|
},
|
|
3177
|
+
// Gemini 2.5 Pro
|
|
3132
3178
|
{
|
|
3133
3179
|
provider: "gemini",
|
|
3134
3180
|
modelId: "gemini-2.5-pro",
|
|
@@ -3137,8 +3183,11 @@ var init_gemini_models = __esm({
|
|
|
3137
3183
|
maxOutputTokens: 65536,
|
|
3138
3184
|
pricing: {
|
|
3139
3185
|
input: 1.25,
|
|
3186
|
+
// $1.25 for prompts <= 200k, $2.50 for > 200k
|
|
3140
3187
|
output: 10,
|
|
3188
|
+
// $10.00 for prompts <= 200k, $15.00 for > 200k
|
|
3141
3189
|
cachedInput: 0.125
|
|
3190
|
+
// $0.125 for prompts <= 200k
|
|
3142
3191
|
},
|
|
3143
3192
|
knowledgeCutoff: "2025-01",
|
|
3144
3193
|
features: {
|
|
@@ -3151,9 +3200,10 @@ var init_gemini_models = __esm({
|
|
|
3151
3200
|
metadata: {
|
|
3152
3201
|
family: "Gemini 2.5",
|
|
3153
3202
|
releaseDate: "2025-06",
|
|
3154
|
-
notes: "
|
|
3203
|
+
notes: "State-of-the-art multipurpose model. Excels at coding and complex reasoning."
|
|
3155
3204
|
}
|
|
3156
3205
|
},
|
|
3206
|
+
// Gemini 2.5 Flash
|
|
3157
3207
|
{
|
|
3158
3208
|
provider: "gemini",
|
|
3159
3209
|
modelId: "gemini-2.5-flash",
|
|
@@ -3162,8 +3212,10 @@ var init_gemini_models = __esm({
|
|
|
3162
3212
|
maxOutputTokens: 65536,
|
|
3163
3213
|
pricing: {
|
|
3164
3214
|
input: 0.3,
|
|
3215
|
+
// $0.30 for text/image/video, $1.00 for audio
|
|
3165
3216
|
output: 2.5,
|
|
3166
3217
|
cachedInput: 0.03
|
|
3218
|
+
// $0.03 for text/image/video
|
|
3167
3219
|
},
|
|
3168
3220
|
knowledgeCutoff: "2025-01",
|
|
3169
3221
|
features: {
|
|
@@ -3176,9 +3228,10 @@ var init_gemini_models = __esm({
|
|
|
3176
3228
|
metadata: {
|
|
3177
3229
|
family: "Gemini 2.5",
|
|
3178
3230
|
releaseDate: "2025-06",
|
|
3179
|
-
notes: "
|
|
3231
|
+
notes: "First hybrid reasoning model with 1M context and thinking budgets."
|
|
3180
3232
|
}
|
|
3181
3233
|
},
|
|
3234
|
+
// Gemini 2.5 Flash-Lite
|
|
3182
3235
|
{
|
|
3183
3236
|
provider: "gemini",
|
|
3184
3237
|
modelId: "gemini-2.5-flash-lite",
|
|
@@ -3187,8 +3240,10 @@ var init_gemini_models = __esm({
|
|
|
3187
3240
|
maxOutputTokens: 65536,
|
|
3188
3241
|
pricing: {
|
|
3189
3242
|
input: 0.1,
|
|
3243
|
+
// $0.10 for text/image/video, $0.30 for audio
|
|
3190
3244
|
output: 0.4,
|
|
3191
3245
|
cachedInput: 0.01
|
|
3246
|
+
// $0.01 for text/image/video
|
|
3192
3247
|
},
|
|
3193
3248
|
knowledgeCutoff: "2025-01",
|
|
3194
3249
|
features: {
|
|
@@ -3200,9 +3255,10 @@ var init_gemini_models = __esm({
|
|
|
3200
3255
|
metadata: {
|
|
3201
3256
|
family: "Gemini 2.5",
|
|
3202
3257
|
releaseDate: "2025-06",
|
|
3203
|
-
notes: "
|
|
3258
|
+
notes: "Smallest and most cost effective model, built for at scale usage."
|
|
3204
3259
|
}
|
|
3205
3260
|
},
|
|
3261
|
+
// Gemini 2.0 Flash
|
|
3206
3262
|
{
|
|
3207
3263
|
provider: "gemini",
|
|
3208
3264
|
modelId: "gemini-2.0-flash",
|
|
@@ -3211,8 +3267,10 @@ var init_gemini_models = __esm({
|
|
|
3211
3267
|
maxOutputTokens: 8192,
|
|
3212
3268
|
pricing: {
|
|
3213
3269
|
input: 0.1,
|
|
3270
|
+
// $0.10 for text/image/video, $0.70 for audio
|
|
3214
3271
|
output: 0.4,
|
|
3215
|
-
cachedInput: 0.
|
|
3272
|
+
cachedInput: 0.025
|
|
3273
|
+
// $0.025 for text/image/video
|
|
3216
3274
|
},
|
|
3217
3275
|
knowledgeCutoff: "2024-08",
|
|
3218
3276
|
features: {
|
|
@@ -3223,9 +3281,10 @@ var init_gemini_models = __esm({
|
|
|
3223
3281
|
},
|
|
3224
3282
|
metadata: {
|
|
3225
3283
|
family: "Gemini 2.0",
|
|
3226
|
-
notes: "
|
|
3284
|
+
notes: "Balanced multimodal model with 1M context, built for the era of Agents."
|
|
3227
3285
|
}
|
|
3228
3286
|
},
|
|
3287
|
+
// Gemini 2.0 Flash-Lite
|
|
3229
3288
|
{
|
|
3230
3289
|
provider: "gemini",
|
|
3231
3290
|
modelId: "gemini-2.0-flash-lite",
|
|
@@ -3234,8 +3293,8 @@ var init_gemini_models = __esm({
|
|
|
3234
3293
|
maxOutputTokens: 8192,
|
|
3235
3294
|
pricing: {
|
|
3236
3295
|
input: 0.075,
|
|
3237
|
-
output: 0.3
|
|
3238
|
-
|
|
3296
|
+
output: 0.3
|
|
3297
|
+
// No context caching available for 2.0-flash-lite
|
|
3239
3298
|
},
|
|
3240
3299
|
knowledgeCutoff: "2024-08",
|
|
3241
3300
|
features: {
|
|
@@ -3246,7 +3305,7 @@ var init_gemini_models = __esm({
|
|
|
3246
3305
|
},
|
|
3247
3306
|
metadata: {
|
|
3248
3307
|
family: "Gemini 2.0",
|
|
3249
|
-
notes: "
|
|
3308
|
+
notes: "Smallest and most cost effective 2.0 model for at scale usage."
|
|
3250
3309
|
}
|
|
3251
3310
|
}
|
|
3252
3311
|
];
|
|
@@ -3416,7 +3475,9 @@ var init_gemini = __esm({
|
|
|
3416
3475
|
return {
|
|
3417
3476
|
inputTokens: usageMetadata.promptTokenCount ?? 0,
|
|
3418
3477
|
outputTokens: usageMetadata.candidatesTokenCount ?? 0,
|
|
3419
|
-
totalTokens: usageMetadata.totalTokenCount ?? 0
|
|
3478
|
+
totalTokens: usageMetadata.totalTokenCount ?? 0,
|
|
3479
|
+
// Gemini returns cached token count in cachedContentTokenCount
|
|
3480
|
+
cachedInputTokens: usageMetadata.cachedContentTokenCount ?? 0
|
|
3420
3481
|
};
|
|
3421
3482
|
}
|
|
3422
3483
|
/**
|
|
@@ -3472,10 +3533,11 @@ var init_openai_models = __esm({
|
|
|
3472
3533
|
"src/providers/openai-models.ts"() {
|
|
3473
3534
|
"use strict";
|
|
3474
3535
|
OPENAI_MODELS = [
|
|
3536
|
+
// GPT-5 Family
|
|
3475
3537
|
{
|
|
3476
3538
|
provider: "openai",
|
|
3477
3539
|
modelId: "gpt-5.1",
|
|
3478
|
-
displayName: "GPT-5.1
|
|
3540
|
+
displayName: "GPT-5.1",
|
|
3479
3541
|
contextWindow: 128e3,
|
|
3480
3542
|
maxOutputTokens: 32768,
|
|
3481
3543
|
pricing: {
|
|
@@ -3495,34 +3557,7 @@ var init_openai_models = __esm({
|
|
|
3495
3557
|
metadata: {
|
|
3496
3558
|
family: "GPT-5",
|
|
3497
3559
|
releaseDate: "2025-11-12",
|
|
3498
|
-
notes: "
|
|
3499
|
-
supportsTemperature: false
|
|
3500
|
-
}
|
|
3501
|
-
},
|
|
3502
|
-
{
|
|
3503
|
-
provider: "openai",
|
|
3504
|
-
modelId: "gpt-5.1-thinking",
|
|
3505
|
-
displayName: "GPT-5.1 Thinking",
|
|
3506
|
-
contextWindow: 196e3,
|
|
3507
|
-
maxOutputTokens: 32768,
|
|
3508
|
-
pricing: {
|
|
3509
|
-
input: 1.25,
|
|
3510
|
-
output: 10,
|
|
3511
|
-
cachedInput: 0.125
|
|
3512
|
-
},
|
|
3513
|
-
knowledgeCutoff: "2024-09-30",
|
|
3514
|
-
features: {
|
|
3515
|
-
streaming: true,
|
|
3516
|
-
functionCalling: true,
|
|
3517
|
-
vision: true,
|
|
3518
|
-
reasoning: true,
|
|
3519
|
-
structuredOutputs: true,
|
|
3520
|
-
fineTuning: true
|
|
3521
|
-
},
|
|
3522
|
-
metadata: {
|
|
3523
|
-
family: "GPT-5",
|
|
3524
|
-
releaseDate: "2025-11-12",
|
|
3525
|
-
notes: "Advanced reasoning with thinking levels: Light, Standard, Extended, Heavy. Best for complex tasks.",
|
|
3560
|
+
notes: "Latest GPT-5 with improved instruction following. 2-3x faster than GPT-5.",
|
|
3526
3561
|
supportsTemperature: false
|
|
3527
3562
|
}
|
|
3528
3563
|
},
|
|
@@ -3602,6 +3637,255 @@ var init_openai_models = __esm({
|
|
|
3602
3637
|
notes: "Fastest, most cost-efficient version for well-defined tasks",
|
|
3603
3638
|
supportsTemperature: false
|
|
3604
3639
|
}
|
|
3640
|
+
},
|
|
3641
|
+
{
|
|
3642
|
+
provider: "openai",
|
|
3643
|
+
modelId: "gpt-5-pro",
|
|
3644
|
+
displayName: "GPT-5 Pro",
|
|
3645
|
+
contextWindow: 272e3,
|
|
3646
|
+
maxOutputTokens: 128e3,
|
|
3647
|
+
pricing: {
|
|
3648
|
+
input: 15,
|
|
3649
|
+
output: 120
|
|
3650
|
+
// No cached input pricing for gpt-5-pro
|
|
3651
|
+
},
|
|
3652
|
+
knowledgeCutoff: "2024-09-30",
|
|
3653
|
+
features: {
|
|
3654
|
+
streaming: true,
|
|
3655
|
+
functionCalling: true,
|
|
3656
|
+
vision: true,
|
|
3657
|
+
reasoning: true,
|
|
3658
|
+
structuredOutputs: true
|
|
3659
|
+
},
|
|
3660
|
+
metadata: {
|
|
3661
|
+
family: "GPT-5",
|
|
3662
|
+
notes: "Premium tier with enhanced capabilities. Does not support prompt caching.",
|
|
3663
|
+
supportsTemperature: false
|
|
3664
|
+
}
|
|
3665
|
+
},
|
|
3666
|
+
// GPT-4.1 Family
|
|
3667
|
+
{
|
|
3668
|
+
provider: "openai",
|
|
3669
|
+
modelId: "gpt-4.1",
|
|
3670
|
+
displayName: "GPT-4.1",
|
|
3671
|
+
contextWindow: 128e3,
|
|
3672
|
+
maxOutputTokens: 32768,
|
|
3673
|
+
pricing: {
|
|
3674
|
+
input: 2,
|
|
3675
|
+
output: 8,
|
|
3676
|
+
cachedInput: 0.5
|
|
3677
|
+
},
|
|
3678
|
+
knowledgeCutoff: "2024-04-01",
|
|
3679
|
+
features: {
|
|
3680
|
+
streaming: true,
|
|
3681
|
+
functionCalling: true,
|
|
3682
|
+
vision: true,
|
|
3683
|
+
structuredOutputs: true,
|
|
3684
|
+
fineTuning: true
|
|
3685
|
+
},
|
|
3686
|
+
metadata: {
|
|
3687
|
+
family: "GPT-4.1",
|
|
3688
|
+
notes: "Improved GPT-4 with better instruction following"
|
|
3689
|
+
}
|
|
3690
|
+
},
|
|
3691
|
+
{
|
|
3692
|
+
provider: "openai",
|
|
3693
|
+
modelId: "gpt-4.1-mini",
|
|
3694
|
+
displayName: "GPT-4.1 Mini",
|
|
3695
|
+
contextWindow: 128e3,
|
|
3696
|
+
maxOutputTokens: 32768,
|
|
3697
|
+
pricing: {
|
|
3698
|
+
input: 0.4,
|
|
3699
|
+
output: 1.6,
|
|
3700
|
+
cachedInput: 0.1
|
|
3701
|
+
},
|
|
3702
|
+
knowledgeCutoff: "2024-04-01",
|
|
3703
|
+
features: {
|
|
3704
|
+
streaming: true,
|
|
3705
|
+
functionCalling: true,
|
|
3706
|
+
vision: true,
|
|
3707
|
+
structuredOutputs: true,
|
|
3708
|
+
fineTuning: true
|
|
3709
|
+
},
|
|
3710
|
+
metadata: {
|
|
3711
|
+
family: "GPT-4.1",
|
|
3712
|
+
notes: "Cost-efficient GPT-4.1 variant"
|
|
3713
|
+
}
|
|
3714
|
+
},
|
|
3715
|
+
{
|
|
3716
|
+
provider: "openai",
|
|
3717
|
+
modelId: "gpt-4.1-nano",
|
|
3718
|
+
displayName: "GPT-4.1 Nano",
|
|
3719
|
+
contextWindow: 128e3,
|
|
3720
|
+
maxOutputTokens: 32768,
|
|
3721
|
+
pricing: {
|
|
3722
|
+
input: 0.1,
|
|
3723
|
+
output: 0.4,
|
|
3724
|
+
cachedInput: 0.025
|
|
3725
|
+
},
|
|
3726
|
+
knowledgeCutoff: "2024-04-01",
|
|
3727
|
+
features: {
|
|
3728
|
+
streaming: true,
|
|
3729
|
+
functionCalling: true,
|
|
3730
|
+
vision: true,
|
|
3731
|
+
structuredOutputs: true,
|
|
3732
|
+
fineTuning: true
|
|
3733
|
+
},
|
|
3734
|
+
metadata: {
|
|
3735
|
+
family: "GPT-4.1",
|
|
3736
|
+
notes: "Fastest GPT-4.1 variant for simple tasks"
|
|
3737
|
+
}
|
|
3738
|
+
},
|
|
3739
|
+
// GPT-4o Family
|
|
3740
|
+
{
|
|
3741
|
+
provider: "openai",
|
|
3742
|
+
modelId: "gpt-4o",
|
|
3743
|
+
displayName: "GPT-4o",
|
|
3744
|
+
contextWindow: 128e3,
|
|
3745
|
+
maxOutputTokens: 16384,
|
|
3746
|
+
pricing: {
|
|
3747
|
+
input: 2.5,
|
|
3748
|
+
output: 10,
|
|
3749
|
+
cachedInput: 1.25
|
|
3750
|
+
},
|
|
3751
|
+
knowledgeCutoff: "2024-04-01",
|
|
3752
|
+
features: {
|
|
3753
|
+
streaming: true,
|
|
3754
|
+
functionCalling: true,
|
|
3755
|
+
vision: true,
|
|
3756
|
+
structuredOutputs: true,
|
|
3757
|
+
fineTuning: true
|
|
3758
|
+
},
|
|
3759
|
+
metadata: {
|
|
3760
|
+
family: "GPT-4o",
|
|
3761
|
+
notes: "Multimodal model optimized for speed"
|
|
3762
|
+
}
|
|
3763
|
+
},
|
|
3764
|
+
{
|
|
3765
|
+
provider: "openai",
|
|
3766
|
+
modelId: "gpt-4o-mini",
|
|
3767
|
+
displayName: "GPT-4o Mini",
|
|
3768
|
+
contextWindow: 128e3,
|
|
3769
|
+
maxOutputTokens: 16384,
|
|
3770
|
+
pricing: {
|
|
3771
|
+
input: 0.15,
|
|
3772
|
+
output: 0.6,
|
|
3773
|
+
cachedInput: 0.075
|
|
3774
|
+
},
|
|
3775
|
+
knowledgeCutoff: "2024-04-01",
|
|
3776
|
+
features: {
|
|
3777
|
+
streaming: true,
|
|
3778
|
+
functionCalling: true,
|
|
3779
|
+
vision: true,
|
|
3780
|
+
structuredOutputs: true,
|
|
3781
|
+
fineTuning: true
|
|
3782
|
+
},
|
|
3783
|
+
metadata: {
|
|
3784
|
+
family: "GPT-4o",
|
|
3785
|
+
notes: "Fast and affordable multimodal model"
|
|
3786
|
+
}
|
|
3787
|
+
},
|
|
3788
|
+
// o-series (Reasoning models)
|
|
3789
|
+
{
|
|
3790
|
+
provider: "openai",
|
|
3791
|
+
modelId: "o1",
|
|
3792
|
+
displayName: "o1",
|
|
3793
|
+
contextWindow: 2e5,
|
|
3794
|
+
maxOutputTokens: 1e5,
|
|
3795
|
+
pricing: {
|
|
3796
|
+
input: 15,
|
|
3797
|
+
output: 60,
|
|
3798
|
+
cachedInput: 7.5
|
|
3799
|
+
},
|
|
3800
|
+
knowledgeCutoff: "2024-12-01",
|
|
3801
|
+
features: {
|
|
3802
|
+
streaming: true,
|
|
3803
|
+
functionCalling: true,
|
|
3804
|
+
vision: true,
|
|
3805
|
+
reasoning: true,
|
|
3806
|
+
structuredOutputs: true
|
|
3807
|
+
},
|
|
3808
|
+
metadata: {
|
|
3809
|
+
family: "o-series",
|
|
3810
|
+
notes: "Advanced reasoning model with chain-of-thought",
|
|
3811
|
+
supportsTemperature: false
|
|
3812
|
+
}
|
|
3813
|
+
},
|
|
3814
|
+
{
|
|
3815
|
+
provider: "openai",
|
|
3816
|
+
modelId: "o3",
|
|
3817
|
+
displayName: "o3",
|
|
3818
|
+
contextWindow: 2e5,
|
|
3819
|
+
maxOutputTokens: 1e5,
|
|
3820
|
+
pricing: {
|
|
3821
|
+
input: 2,
|
|
3822
|
+
output: 8,
|
|
3823
|
+
cachedInput: 0.5
|
|
3824
|
+
},
|
|
3825
|
+
knowledgeCutoff: "2025-01-01",
|
|
3826
|
+
features: {
|
|
3827
|
+
streaming: true,
|
|
3828
|
+
functionCalling: true,
|
|
3829
|
+
vision: true,
|
|
3830
|
+
reasoning: true,
|
|
3831
|
+
structuredOutputs: true
|
|
3832
|
+
},
|
|
3833
|
+
metadata: {
|
|
3834
|
+
family: "o-series",
|
|
3835
|
+
notes: "Next-gen reasoning model, more efficient than o1",
|
|
3836
|
+
supportsTemperature: false
|
|
3837
|
+
}
|
|
3838
|
+
},
|
|
3839
|
+
{
|
|
3840
|
+
provider: "openai",
|
|
3841
|
+
modelId: "o4-mini",
|
|
3842
|
+
displayName: "o4 Mini",
|
|
3843
|
+
contextWindow: 2e5,
|
|
3844
|
+
maxOutputTokens: 1e5,
|
|
3845
|
+
pricing: {
|
|
3846
|
+
input: 1.1,
|
|
3847
|
+
output: 4.4,
|
|
3848
|
+
cachedInput: 0.275
|
|
3849
|
+
},
|
|
3850
|
+
knowledgeCutoff: "2025-04-01",
|
|
3851
|
+
features: {
|
|
3852
|
+
streaming: true,
|
|
3853
|
+
functionCalling: true,
|
|
3854
|
+
vision: true,
|
|
3855
|
+
reasoning: true,
|
|
3856
|
+
structuredOutputs: true,
|
|
3857
|
+
fineTuning: true
|
|
3858
|
+
},
|
|
3859
|
+
metadata: {
|
|
3860
|
+
family: "o-series",
|
|
3861
|
+
notes: "Cost-efficient reasoning model",
|
|
3862
|
+
supportsTemperature: false
|
|
3863
|
+
}
|
|
3864
|
+
},
|
|
3865
|
+
{
|
|
3866
|
+
provider: "openai",
|
|
3867
|
+
modelId: "o3-mini",
|
|
3868
|
+
displayName: "o3 Mini",
|
|
3869
|
+
contextWindow: 2e5,
|
|
3870
|
+
maxOutputTokens: 1e5,
|
|
3871
|
+
pricing: {
|
|
3872
|
+
input: 1.1,
|
|
3873
|
+
output: 4.4,
|
|
3874
|
+
cachedInput: 0.55
|
|
3875
|
+
},
|
|
3876
|
+
knowledgeCutoff: "2025-01-01",
|
|
3877
|
+
features: {
|
|
3878
|
+
streaming: true,
|
|
3879
|
+
functionCalling: true,
|
|
3880
|
+
vision: true,
|
|
3881
|
+
reasoning: true,
|
|
3882
|
+
structuredOutputs: true
|
|
3883
|
+
},
|
|
3884
|
+
metadata: {
|
|
3885
|
+
family: "o-series",
|
|
3886
|
+
notes: "Compact reasoning model for cost-sensitive applications",
|
|
3887
|
+
supportsTemperature: false
|
|
3888
|
+
}
|
|
3605
3889
|
}
|
|
3606
3890
|
];
|
|
3607
3891
|
}
|
|
@@ -3682,7 +3966,8 @@ var init_openai = __esm({
|
|
|
3682
3966
|
const usage = chunk.usage ? {
|
|
3683
3967
|
inputTokens: chunk.usage.prompt_tokens,
|
|
3684
3968
|
outputTokens: chunk.usage.completion_tokens,
|
|
3685
|
-
totalTokens: chunk.usage.total_tokens
|
|
3969
|
+
totalTokens: chunk.usage.total_tokens,
|
|
3970
|
+
cachedInputTokens: chunk.usage.prompt_tokens_details?.cached_tokens ?? 0
|
|
3686
3971
|
} : void 0;
|
|
3687
3972
|
if (finishReason || usage) {
|
|
3688
3973
|
yield { text: "", finishReason, usage, rawEvent: chunk };
|
|
@@ -3899,20 +4184,28 @@ var init_model_registry = __esm({
|
|
|
3899
4184
|
/**
|
|
3900
4185
|
* Estimate API cost for a given model and token usage
|
|
3901
4186
|
* @param modelId - Full model identifier
|
|
3902
|
-
* @param inputTokens - Number of input tokens
|
|
4187
|
+
* @param inputTokens - Number of input tokens (total, including cached and cache creation)
|
|
3903
4188
|
* @param outputTokens - Number of output tokens
|
|
3904
|
-
* @param
|
|
4189
|
+
* @param cachedInputTokens - Number of cached input tokens (subset of inputTokens)
|
|
4190
|
+
* @param cacheCreationInputTokens - Number of cache creation tokens (subset of inputTokens, Anthropic only)
|
|
3905
4191
|
* @returns CostEstimate if model found, undefined otherwise
|
|
3906
4192
|
*/
|
|
3907
|
-
estimateCost(modelId, inputTokens, outputTokens,
|
|
4193
|
+
estimateCost(modelId, inputTokens, outputTokens, cachedInputTokens = 0, cacheCreationInputTokens = 0) {
|
|
3908
4194
|
const spec = this.getModelSpec(modelId);
|
|
3909
4195
|
if (!spec) return void 0;
|
|
3910
|
-
const
|
|
3911
|
-
const
|
|
4196
|
+
const cachedRate = spec.pricing.cachedInput ?? spec.pricing.input;
|
|
4197
|
+
const cacheWriteRate = spec.pricing.cacheWriteInput ?? spec.pricing.input;
|
|
4198
|
+
const uncachedInputTokens = inputTokens - cachedInputTokens - cacheCreationInputTokens;
|
|
4199
|
+
const uncachedInputCost = uncachedInputTokens / 1e6 * spec.pricing.input;
|
|
4200
|
+
const cachedInputCost = cachedInputTokens / 1e6 * cachedRate;
|
|
4201
|
+
const cacheCreationCost = cacheCreationInputTokens / 1e6 * cacheWriteRate;
|
|
4202
|
+
const inputCost = uncachedInputCost + cachedInputCost + cacheCreationCost;
|
|
3912
4203
|
const outputCost = outputTokens / 1e6 * spec.pricing.output;
|
|
3913
4204
|
const totalCost = inputCost + outputCost;
|
|
3914
4205
|
return {
|
|
3915
4206
|
inputCost,
|
|
4207
|
+
cachedInputCost,
|
|
4208
|
+
cacheCreationCost,
|
|
3916
4209
|
outputCost,
|
|
3917
4210
|
totalCost,
|
|
3918
4211
|
currency: "USD"
|
|
@@ -5474,4 +5767,4 @@ export {
|
|
|
5474
5767
|
AgentBuilder,
|
|
5475
5768
|
init_builder
|
|
5476
5769
|
};
|
|
5477
|
-
//# sourceMappingURL=chunk-
|
|
5770
|
+
//# sourceMappingURL=chunk-62M4TDAK.js.map
|