llmist 0.7.0 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-CTC2WJZA.js → chunk-4IMGADVY.js} +2 -2
- package/dist/{chunk-ZFHFBEQ5.js → chunk-62M4TDAK.js} +359 -66
- package/dist/chunk-62M4TDAK.js.map +1 -0
- package/dist/cli.cjs +726 -123
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.js +369 -59
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +358 -65
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +6 -9
- package/dist/index.d.ts +6 -9
- package/dist/index.js +2 -2
- package/dist/{mock-stream-B2qwECvd.d.cts → mock-stream-CjmvWDc3.d.cts} +21 -20
- package/dist/{mock-stream-B2qwECvd.d.ts → mock-stream-CjmvWDc3.d.ts} +21 -20
- package/dist/testing/index.cjs +358 -65
- package/dist/testing/index.cjs.map +1 -1
- package/dist/testing/index.d.cts +2 -2
- package/dist/testing/index.d.ts +2 -2
- package/dist/testing/index.js +2 -2
- package/package.json +2 -1
- package/dist/chunk-ZFHFBEQ5.js.map +0 -1
- /package/dist/{chunk-CTC2WJZA.js.map → chunk-4IMGADVY.js.map} +0 -0
package/dist/index.cjs
CHANGED
|
@@ -881,7 +881,7 @@ function findSafeDelimiter(content) {
|
|
|
881
881
|
}
|
|
882
882
|
let counter = 1;
|
|
883
883
|
while (counter < 1e3) {
|
|
884
|
-
const delimiter = `
|
|
884
|
+
const delimiter = `__GADGET_PARAM_${counter}__`;
|
|
885
885
|
const regex = new RegExp(`^${delimiter}\\s*$`);
|
|
886
886
|
const isUsed = lines.some((line) => regex.test(line));
|
|
887
887
|
if (!isUsed) {
|
|
@@ -988,7 +988,16 @@ var init_gadget = __esm({
|
|
|
988
988
|
yaml = __toESM(require("js-yaml"), 1);
|
|
989
989
|
init_schema_to_json();
|
|
990
990
|
init_schema_validator();
|
|
991
|
-
HEREDOC_DELIMITERS = [
|
|
991
|
+
HEREDOC_DELIMITERS = [
|
|
992
|
+
"__GADGET_PARAM_EOF__",
|
|
993
|
+
"__GADGET_PARAM_END__",
|
|
994
|
+
"__GADGET_PARAM_DOC__",
|
|
995
|
+
"__GADGET_PARAM_CONTENT__",
|
|
996
|
+
"__GADGET_PARAM_TEXT__",
|
|
997
|
+
"__GADGET_PARAM_HEREDOC__",
|
|
998
|
+
"__GADGET_PARAM_DATA__",
|
|
999
|
+
"__GADGET_PARAM_BLOCK__"
|
|
1000
|
+
];
|
|
992
1001
|
BaseGadget = class {
|
|
993
1002
|
/**
|
|
994
1003
|
* The name of the gadget. Used for identification when LLM calls it.
|
|
@@ -3096,7 +3105,8 @@ var init_anthropic_models = __esm({
|
|
|
3096
3105
|
pricing: {
|
|
3097
3106
|
input: 3,
|
|
3098
3107
|
output: 15,
|
|
3099
|
-
cachedInput: 0.3
|
|
3108
|
+
cachedInput: 0.3,
|
|
3109
|
+
cacheWriteInput: 3.75
|
|
3100
3110
|
},
|
|
3101
3111
|
knowledgeCutoff: "2025-01",
|
|
3102
3112
|
features: {
|
|
@@ -3120,7 +3130,8 @@ var init_anthropic_models = __esm({
|
|
|
3120
3130
|
pricing: {
|
|
3121
3131
|
input: 1,
|
|
3122
3132
|
output: 5,
|
|
3123
|
-
cachedInput: 0.1
|
|
3133
|
+
cachedInput: 0.1,
|
|
3134
|
+
cacheWriteInput: 1.25
|
|
3124
3135
|
},
|
|
3125
3136
|
knowledgeCutoff: "2025-02",
|
|
3126
3137
|
features: {
|
|
@@ -3144,7 +3155,8 @@ var init_anthropic_models = __esm({
|
|
|
3144
3155
|
pricing: {
|
|
3145
3156
|
input: 3,
|
|
3146
3157
|
output: 15,
|
|
3147
|
-
cachedInput: 0.3
|
|
3158
|
+
cachedInput: 0.3,
|
|
3159
|
+
cacheWriteInput: 3.75
|
|
3148
3160
|
},
|
|
3149
3161
|
knowledgeCutoff: "2025-03",
|
|
3150
3162
|
features: {
|
|
@@ -3168,7 +3180,8 @@ var init_anthropic_models = __esm({
|
|
|
3168
3180
|
pricing: {
|
|
3169
3181
|
input: 3,
|
|
3170
3182
|
output: 15,
|
|
3171
|
-
cachedInput: 0.3
|
|
3183
|
+
cachedInput: 0.3,
|
|
3184
|
+
cacheWriteInput: 3.75
|
|
3172
3185
|
},
|
|
3173
3186
|
knowledgeCutoff: "2024-11",
|
|
3174
3187
|
features: {
|
|
@@ -3192,7 +3205,8 @@ var init_anthropic_models = __esm({
|
|
|
3192
3205
|
pricing: {
|
|
3193
3206
|
input: 15,
|
|
3194
3207
|
output: 75,
|
|
3195
|
-
cachedInput: 1.5
|
|
3208
|
+
cachedInput: 1.5,
|
|
3209
|
+
cacheWriteInput: 18.75
|
|
3196
3210
|
},
|
|
3197
3211
|
knowledgeCutoff: "2025-01",
|
|
3198
3212
|
features: {
|
|
@@ -3216,7 +3230,8 @@ var init_anthropic_models = __esm({
|
|
|
3216
3230
|
pricing: {
|
|
3217
3231
|
input: 15,
|
|
3218
3232
|
output: 75,
|
|
3219
|
-
cachedInput: 1.5
|
|
3233
|
+
cachedInput: 1.5,
|
|
3234
|
+
cacheWriteInput: 18.75
|
|
3220
3235
|
},
|
|
3221
3236
|
knowledgeCutoff: "2025-03",
|
|
3222
3237
|
features: {
|
|
@@ -3239,7 +3254,8 @@ var init_anthropic_models = __esm({
|
|
|
3239
3254
|
pricing: {
|
|
3240
3255
|
input: 0.8,
|
|
3241
3256
|
output: 4,
|
|
3242
|
-
cachedInput: 0.08
|
|
3257
|
+
cachedInput: 0.08,
|
|
3258
|
+
cacheWriteInput: 1
|
|
3243
3259
|
},
|
|
3244
3260
|
knowledgeCutoff: "2024-07",
|
|
3245
3261
|
features: {
|
|
@@ -3262,7 +3278,8 @@ var init_anthropic_models = __esm({
|
|
|
3262
3278
|
pricing: {
|
|
3263
3279
|
input: 0.25,
|
|
3264
3280
|
output: 1.25,
|
|
3265
|
-
cachedInput: 0.025
|
|
3281
|
+
cachedInput: 0.025,
|
|
3282
|
+
cacheWriteInput: 0.3125
|
|
3266
3283
|
},
|
|
3267
3284
|
knowledgeCutoff: "2023-08",
|
|
3268
3285
|
features: {
|
|
@@ -3286,7 +3303,8 @@ var init_anthropic_models = __esm({
|
|
|
3286
3303
|
pricing: {
|
|
3287
3304
|
input: 1,
|
|
3288
3305
|
output: 5,
|
|
3289
|
-
cachedInput: 0.1
|
|
3306
|
+
cachedInput: 0.1,
|
|
3307
|
+
cacheWriteInput: 1.25
|
|
3290
3308
|
},
|
|
3291
3309
|
knowledgeCutoff: "2025-02",
|
|
3292
3310
|
features: {
|
|
@@ -3310,7 +3328,8 @@ var init_anthropic_models = __esm({
|
|
|
3310
3328
|
pricing: {
|
|
3311
3329
|
input: 3,
|
|
3312
3330
|
output: 15,
|
|
3313
|
-
cachedInput: 0.3
|
|
3331
|
+
cachedInput: 0.3,
|
|
3332
|
+
cacheWriteInput: 3.75
|
|
3314
3333
|
},
|
|
3315
3334
|
knowledgeCutoff: "2025-01",
|
|
3316
3335
|
features: {
|
|
@@ -3334,7 +3353,8 @@ var init_anthropic_models = __esm({
|
|
|
3334
3353
|
pricing: {
|
|
3335
3354
|
input: 5,
|
|
3336
3355
|
output: 25,
|
|
3337
|
-
cachedInput: 0.5
|
|
3356
|
+
cachedInput: 0.5,
|
|
3357
|
+
cacheWriteInput: 6.25
|
|
3338
3358
|
},
|
|
3339
3359
|
knowledgeCutoff: "2025-03",
|
|
3340
3360
|
features: {
|
|
@@ -3449,15 +3469,27 @@ var init_anthropic = __esm({
|
|
|
3449
3469
|
}
|
|
3450
3470
|
buildRequestPayload(options, descriptor, spec, messages) {
|
|
3451
3471
|
const systemMessages = messages.filter((message) => message.role === "system");
|
|
3452
|
-
const system = systemMessages.length > 0 ? systemMessages.map((m) =>
|
|
3453
|
-
|
|
3472
|
+
const system = systemMessages.length > 0 ? systemMessages.map((m, index) => ({
|
|
3473
|
+
type: "text",
|
|
3474
|
+
text: m.content,
|
|
3475
|
+
// Add cache_control to the LAST system message block
|
|
3476
|
+
...index === systemMessages.length - 1 ? { cache_control: { type: "ephemeral" } } : {}
|
|
3477
|
+
})) : void 0;
|
|
3478
|
+
const nonSystemMessages = messages.filter(
|
|
3454
3479
|
(message) => message.role !== "system"
|
|
3455
|
-
)
|
|
3480
|
+
);
|
|
3481
|
+
const lastUserIndex = nonSystemMessages.reduce(
|
|
3482
|
+
(lastIdx, msg, idx) => msg.role === "user" ? idx : lastIdx,
|
|
3483
|
+
-1
|
|
3484
|
+
);
|
|
3485
|
+
const conversation = nonSystemMessages.map((message, index) => ({
|
|
3456
3486
|
role: message.role,
|
|
3457
3487
|
content: [
|
|
3458
3488
|
{
|
|
3459
3489
|
type: "text",
|
|
3460
|
-
text: message.content
|
|
3490
|
+
text: message.content,
|
|
3491
|
+
// Add cache_control to the LAST user message
|
|
3492
|
+
...message.role === "user" && index === lastUserIndex ? { cache_control: { type: "ephemeral" } } : {}
|
|
3461
3493
|
}
|
|
3462
3494
|
]
|
|
3463
3495
|
}));
|
|
@@ -3483,15 +3515,22 @@ var init_anthropic = __esm({
|
|
|
3483
3515
|
async *wrapStream(iterable) {
|
|
3484
3516
|
const stream2 = iterable;
|
|
3485
3517
|
let inputTokens = 0;
|
|
3518
|
+
let cachedInputTokens = 0;
|
|
3519
|
+
let cacheCreationInputTokens = 0;
|
|
3486
3520
|
for await (const event of stream2) {
|
|
3487
3521
|
if (event.type === "message_start") {
|
|
3488
|
-
|
|
3522
|
+
const usage = event.message.usage;
|
|
3523
|
+
cachedInputTokens = usage.cache_read_input_tokens ?? 0;
|
|
3524
|
+
cacheCreationInputTokens = usage.cache_creation_input_tokens ?? 0;
|
|
3525
|
+
inputTokens = usage.input_tokens + cachedInputTokens + cacheCreationInputTokens;
|
|
3489
3526
|
yield {
|
|
3490
3527
|
text: "",
|
|
3491
3528
|
usage: {
|
|
3492
3529
|
inputTokens,
|
|
3493
3530
|
outputTokens: 0,
|
|
3494
|
-
totalTokens: inputTokens
|
|
3531
|
+
totalTokens: inputTokens,
|
|
3532
|
+
cachedInputTokens,
|
|
3533
|
+
cacheCreationInputTokens
|
|
3495
3534
|
},
|
|
3496
3535
|
rawEvent: event
|
|
3497
3536
|
};
|
|
@@ -3505,7 +3544,9 @@ var init_anthropic = __esm({
|
|
|
3505
3544
|
const usage = event.usage ? {
|
|
3506
3545
|
inputTokens,
|
|
3507
3546
|
outputTokens: event.usage.output_tokens,
|
|
3508
|
-
totalTokens: inputTokens + event.usage.output_tokens
|
|
3547
|
+
totalTokens: inputTokens + event.usage.output_tokens,
|
|
3548
|
+
cachedInputTokens,
|
|
3549
|
+
cacheCreationInputTokens
|
|
3509
3550
|
} : void 0;
|
|
3510
3551
|
if (event.delta.stop_reason || usage) {
|
|
3511
3552
|
yield {
|
|
@@ -3586,6 +3627,7 @@ var init_gemini_models = __esm({
|
|
|
3586
3627
|
"src/providers/gemini-models.ts"() {
|
|
3587
3628
|
"use strict";
|
|
3588
3629
|
GEMINI_MODELS = [
|
|
3630
|
+
// Gemini 3 Pro (Preview)
|
|
3589
3631
|
{
|
|
3590
3632
|
provider: "gemini",
|
|
3591
3633
|
modelId: "gemini-3-pro-preview",
|
|
@@ -3594,8 +3636,11 @@ var init_gemini_models = __esm({
|
|
|
3594
3636
|
maxOutputTokens: 65536,
|
|
3595
3637
|
pricing: {
|
|
3596
3638
|
input: 2,
|
|
3639
|
+
// $2.00 for prompts <= 200k, $4.00 for > 200k (using lower tier)
|
|
3597
3640
|
output: 12,
|
|
3641
|
+
// $12.00 for prompts <= 200k, $18.00 for > 200k
|
|
3598
3642
|
cachedInput: 0.2
|
|
3643
|
+
// $0.20 for prompts <= 200k
|
|
3599
3644
|
},
|
|
3600
3645
|
knowledgeCutoff: "2025-01",
|
|
3601
3646
|
features: {
|
|
@@ -3608,9 +3653,10 @@ var init_gemini_models = __esm({
|
|
|
3608
3653
|
metadata: {
|
|
3609
3654
|
family: "Gemini 3",
|
|
3610
3655
|
releaseDate: "2025-11-18",
|
|
3611
|
-
notes: "
|
|
3656
|
+
notes: "Best model for multimodal understanding, agentic and vibe-coding. Deep Think mode available."
|
|
3612
3657
|
}
|
|
3613
3658
|
},
|
|
3659
|
+
// Gemini 2.5 Pro
|
|
3614
3660
|
{
|
|
3615
3661
|
provider: "gemini",
|
|
3616
3662
|
modelId: "gemini-2.5-pro",
|
|
@@ -3619,8 +3665,11 @@ var init_gemini_models = __esm({
|
|
|
3619
3665
|
maxOutputTokens: 65536,
|
|
3620
3666
|
pricing: {
|
|
3621
3667
|
input: 1.25,
|
|
3668
|
+
// $1.25 for prompts <= 200k, $2.50 for > 200k
|
|
3622
3669
|
output: 10,
|
|
3670
|
+
// $10.00 for prompts <= 200k, $15.00 for > 200k
|
|
3623
3671
|
cachedInput: 0.125
|
|
3672
|
+
// $0.125 for prompts <= 200k
|
|
3624
3673
|
},
|
|
3625
3674
|
knowledgeCutoff: "2025-01",
|
|
3626
3675
|
features: {
|
|
@@ -3633,9 +3682,10 @@ var init_gemini_models = __esm({
|
|
|
3633
3682
|
metadata: {
|
|
3634
3683
|
family: "Gemini 2.5",
|
|
3635
3684
|
releaseDate: "2025-06",
|
|
3636
|
-
notes: "
|
|
3685
|
+
notes: "State-of-the-art multipurpose model. Excels at coding and complex reasoning."
|
|
3637
3686
|
}
|
|
3638
3687
|
},
|
|
3688
|
+
// Gemini 2.5 Flash
|
|
3639
3689
|
{
|
|
3640
3690
|
provider: "gemini",
|
|
3641
3691
|
modelId: "gemini-2.5-flash",
|
|
@@ -3644,8 +3694,10 @@ var init_gemini_models = __esm({
|
|
|
3644
3694
|
maxOutputTokens: 65536,
|
|
3645
3695
|
pricing: {
|
|
3646
3696
|
input: 0.3,
|
|
3697
|
+
// $0.30 for text/image/video, $1.00 for audio
|
|
3647
3698
|
output: 2.5,
|
|
3648
3699
|
cachedInput: 0.03
|
|
3700
|
+
// $0.03 for text/image/video
|
|
3649
3701
|
},
|
|
3650
3702
|
knowledgeCutoff: "2025-01",
|
|
3651
3703
|
features: {
|
|
@@ -3658,9 +3710,10 @@ var init_gemini_models = __esm({
|
|
|
3658
3710
|
metadata: {
|
|
3659
3711
|
family: "Gemini 2.5",
|
|
3660
3712
|
releaseDate: "2025-06",
|
|
3661
|
-
notes: "
|
|
3713
|
+
notes: "First hybrid reasoning model with 1M context and thinking budgets."
|
|
3662
3714
|
}
|
|
3663
3715
|
},
|
|
3716
|
+
// Gemini 2.5 Flash-Lite
|
|
3664
3717
|
{
|
|
3665
3718
|
provider: "gemini",
|
|
3666
3719
|
modelId: "gemini-2.5-flash-lite",
|
|
@@ -3669,8 +3722,10 @@ var init_gemini_models = __esm({
|
|
|
3669
3722
|
maxOutputTokens: 65536,
|
|
3670
3723
|
pricing: {
|
|
3671
3724
|
input: 0.1,
|
|
3725
|
+
// $0.10 for text/image/video, $0.30 for audio
|
|
3672
3726
|
output: 0.4,
|
|
3673
3727
|
cachedInput: 0.01
|
|
3728
|
+
// $0.01 for text/image/video
|
|
3674
3729
|
},
|
|
3675
3730
|
knowledgeCutoff: "2025-01",
|
|
3676
3731
|
features: {
|
|
@@ -3682,9 +3737,10 @@ var init_gemini_models = __esm({
|
|
|
3682
3737
|
metadata: {
|
|
3683
3738
|
family: "Gemini 2.5",
|
|
3684
3739
|
releaseDate: "2025-06",
|
|
3685
|
-
notes: "
|
|
3740
|
+
notes: "Smallest and most cost effective model, built for at scale usage."
|
|
3686
3741
|
}
|
|
3687
3742
|
},
|
|
3743
|
+
// Gemini 2.0 Flash
|
|
3688
3744
|
{
|
|
3689
3745
|
provider: "gemini",
|
|
3690
3746
|
modelId: "gemini-2.0-flash",
|
|
@@ -3693,8 +3749,10 @@ var init_gemini_models = __esm({
|
|
|
3693
3749
|
maxOutputTokens: 8192,
|
|
3694
3750
|
pricing: {
|
|
3695
3751
|
input: 0.1,
|
|
3752
|
+
// $0.10 for text/image/video, $0.70 for audio
|
|
3696
3753
|
output: 0.4,
|
|
3697
|
-
cachedInput: 0.
|
|
3754
|
+
cachedInput: 0.025
|
|
3755
|
+
// $0.025 for text/image/video
|
|
3698
3756
|
},
|
|
3699
3757
|
knowledgeCutoff: "2024-08",
|
|
3700
3758
|
features: {
|
|
@@ -3705,9 +3763,10 @@ var init_gemini_models = __esm({
|
|
|
3705
3763
|
},
|
|
3706
3764
|
metadata: {
|
|
3707
3765
|
family: "Gemini 2.0",
|
|
3708
|
-
notes: "
|
|
3766
|
+
notes: "Balanced multimodal model with 1M context, built for the era of Agents."
|
|
3709
3767
|
}
|
|
3710
3768
|
},
|
|
3769
|
+
// Gemini 2.0 Flash-Lite
|
|
3711
3770
|
{
|
|
3712
3771
|
provider: "gemini",
|
|
3713
3772
|
modelId: "gemini-2.0-flash-lite",
|
|
@@ -3716,8 +3775,8 @@ var init_gemini_models = __esm({
|
|
|
3716
3775
|
maxOutputTokens: 8192,
|
|
3717
3776
|
pricing: {
|
|
3718
3777
|
input: 0.075,
|
|
3719
|
-
output: 0.3
|
|
3720
|
-
|
|
3778
|
+
output: 0.3
|
|
3779
|
+
// No context caching available for 2.0-flash-lite
|
|
3721
3780
|
},
|
|
3722
3781
|
knowledgeCutoff: "2024-08",
|
|
3723
3782
|
features: {
|
|
@@ -3728,7 +3787,7 @@ var init_gemini_models = __esm({
|
|
|
3728
3787
|
},
|
|
3729
3788
|
metadata: {
|
|
3730
3789
|
family: "Gemini 2.0",
|
|
3731
|
-
notes: "
|
|
3790
|
+
notes: "Smallest and most cost effective 2.0 model for at scale usage."
|
|
3732
3791
|
}
|
|
3733
3792
|
}
|
|
3734
3793
|
];
|
|
@@ -3898,7 +3957,9 @@ var init_gemini = __esm({
|
|
|
3898
3957
|
return {
|
|
3899
3958
|
inputTokens: usageMetadata.promptTokenCount ?? 0,
|
|
3900
3959
|
outputTokens: usageMetadata.candidatesTokenCount ?? 0,
|
|
3901
|
-
totalTokens: usageMetadata.totalTokenCount ?? 0
|
|
3960
|
+
totalTokens: usageMetadata.totalTokenCount ?? 0,
|
|
3961
|
+
// Gemini returns cached token count in cachedContentTokenCount
|
|
3962
|
+
cachedInputTokens: usageMetadata.cachedContentTokenCount ?? 0
|
|
3902
3963
|
};
|
|
3903
3964
|
}
|
|
3904
3965
|
/**
|
|
@@ -3954,10 +4015,11 @@ var init_openai_models = __esm({
|
|
|
3954
4015
|
"src/providers/openai-models.ts"() {
|
|
3955
4016
|
"use strict";
|
|
3956
4017
|
OPENAI_MODELS = [
|
|
4018
|
+
// GPT-5 Family
|
|
3957
4019
|
{
|
|
3958
4020
|
provider: "openai",
|
|
3959
4021
|
modelId: "gpt-5.1",
|
|
3960
|
-
displayName: "GPT-5.1
|
|
4022
|
+
displayName: "GPT-5.1",
|
|
3961
4023
|
contextWindow: 128e3,
|
|
3962
4024
|
maxOutputTokens: 32768,
|
|
3963
4025
|
pricing: {
|
|
@@ -3977,34 +4039,7 @@ var init_openai_models = __esm({
|
|
|
3977
4039
|
metadata: {
|
|
3978
4040
|
family: "GPT-5",
|
|
3979
4041
|
releaseDate: "2025-11-12",
|
|
3980
|
-
notes: "
|
|
3981
|
-
supportsTemperature: false
|
|
3982
|
-
}
|
|
3983
|
-
},
|
|
3984
|
-
{
|
|
3985
|
-
provider: "openai",
|
|
3986
|
-
modelId: "gpt-5.1-thinking",
|
|
3987
|
-
displayName: "GPT-5.1 Thinking",
|
|
3988
|
-
contextWindow: 196e3,
|
|
3989
|
-
maxOutputTokens: 32768,
|
|
3990
|
-
pricing: {
|
|
3991
|
-
input: 1.25,
|
|
3992
|
-
output: 10,
|
|
3993
|
-
cachedInput: 0.125
|
|
3994
|
-
},
|
|
3995
|
-
knowledgeCutoff: "2024-09-30",
|
|
3996
|
-
features: {
|
|
3997
|
-
streaming: true,
|
|
3998
|
-
functionCalling: true,
|
|
3999
|
-
vision: true,
|
|
4000
|
-
reasoning: true,
|
|
4001
|
-
structuredOutputs: true,
|
|
4002
|
-
fineTuning: true
|
|
4003
|
-
},
|
|
4004
|
-
metadata: {
|
|
4005
|
-
family: "GPT-5",
|
|
4006
|
-
releaseDate: "2025-11-12",
|
|
4007
|
-
notes: "Advanced reasoning with thinking levels: Light, Standard, Extended, Heavy. Best for complex tasks.",
|
|
4042
|
+
notes: "Latest GPT-5 with improved instruction following. 2-3x faster than GPT-5.",
|
|
4008
4043
|
supportsTemperature: false
|
|
4009
4044
|
}
|
|
4010
4045
|
},
|
|
@@ -4084,6 +4119,255 @@ var init_openai_models = __esm({
|
|
|
4084
4119
|
notes: "Fastest, most cost-efficient version for well-defined tasks",
|
|
4085
4120
|
supportsTemperature: false
|
|
4086
4121
|
}
|
|
4122
|
+
},
|
|
4123
|
+
{
|
|
4124
|
+
provider: "openai",
|
|
4125
|
+
modelId: "gpt-5-pro",
|
|
4126
|
+
displayName: "GPT-5 Pro",
|
|
4127
|
+
contextWindow: 272e3,
|
|
4128
|
+
maxOutputTokens: 128e3,
|
|
4129
|
+
pricing: {
|
|
4130
|
+
input: 15,
|
|
4131
|
+
output: 120
|
|
4132
|
+
// No cached input pricing for gpt-5-pro
|
|
4133
|
+
},
|
|
4134
|
+
knowledgeCutoff: "2024-09-30",
|
|
4135
|
+
features: {
|
|
4136
|
+
streaming: true,
|
|
4137
|
+
functionCalling: true,
|
|
4138
|
+
vision: true,
|
|
4139
|
+
reasoning: true,
|
|
4140
|
+
structuredOutputs: true
|
|
4141
|
+
},
|
|
4142
|
+
metadata: {
|
|
4143
|
+
family: "GPT-5",
|
|
4144
|
+
notes: "Premium tier with enhanced capabilities. Does not support prompt caching.",
|
|
4145
|
+
supportsTemperature: false
|
|
4146
|
+
}
|
|
4147
|
+
},
|
|
4148
|
+
// GPT-4.1 Family
|
|
4149
|
+
{
|
|
4150
|
+
provider: "openai",
|
|
4151
|
+
modelId: "gpt-4.1",
|
|
4152
|
+
displayName: "GPT-4.1",
|
|
4153
|
+
contextWindow: 128e3,
|
|
4154
|
+
maxOutputTokens: 32768,
|
|
4155
|
+
pricing: {
|
|
4156
|
+
input: 2,
|
|
4157
|
+
output: 8,
|
|
4158
|
+
cachedInput: 0.5
|
|
4159
|
+
},
|
|
4160
|
+
knowledgeCutoff: "2024-04-01",
|
|
4161
|
+
features: {
|
|
4162
|
+
streaming: true,
|
|
4163
|
+
functionCalling: true,
|
|
4164
|
+
vision: true,
|
|
4165
|
+
structuredOutputs: true,
|
|
4166
|
+
fineTuning: true
|
|
4167
|
+
},
|
|
4168
|
+
metadata: {
|
|
4169
|
+
family: "GPT-4.1",
|
|
4170
|
+
notes: "Improved GPT-4 with better instruction following"
|
|
4171
|
+
}
|
|
4172
|
+
},
|
|
4173
|
+
{
|
|
4174
|
+
provider: "openai",
|
|
4175
|
+
modelId: "gpt-4.1-mini",
|
|
4176
|
+
displayName: "GPT-4.1 Mini",
|
|
4177
|
+
contextWindow: 128e3,
|
|
4178
|
+
maxOutputTokens: 32768,
|
|
4179
|
+
pricing: {
|
|
4180
|
+
input: 0.4,
|
|
4181
|
+
output: 1.6,
|
|
4182
|
+
cachedInput: 0.1
|
|
4183
|
+
},
|
|
4184
|
+
knowledgeCutoff: "2024-04-01",
|
|
4185
|
+
features: {
|
|
4186
|
+
streaming: true,
|
|
4187
|
+
functionCalling: true,
|
|
4188
|
+
vision: true,
|
|
4189
|
+
structuredOutputs: true,
|
|
4190
|
+
fineTuning: true
|
|
4191
|
+
},
|
|
4192
|
+
metadata: {
|
|
4193
|
+
family: "GPT-4.1",
|
|
4194
|
+
notes: "Cost-efficient GPT-4.1 variant"
|
|
4195
|
+
}
|
|
4196
|
+
},
|
|
4197
|
+
{
|
|
4198
|
+
provider: "openai",
|
|
4199
|
+
modelId: "gpt-4.1-nano",
|
|
4200
|
+
displayName: "GPT-4.1 Nano",
|
|
4201
|
+
contextWindow: 128e3,
|
|
4202
|
+
maxOutputTokens: 32768,
|
|
4203
|
+
pricing: {
|
|
4204
|
+
input: 0.1,
|
|
4205
|
+
output: 0.4,
|
|
4206
|
+
cachedInput: 0.025
|
|
4207
|
+
},
|
|
4208
|
+
knowledgeCutoff: "2024-04-01",
|
|
4209
|
+
features: {
|
|
4210
|
+
streaming: true,
|
|
4211
|
+
functionCalling: true,
|
|
4212
|
+
vision: true,
|
|
4213
|
+
structuredOutputs: true,
|
|
4214
|
+
fineTuning: true
|
|
4215
|
+
},
|
|
4216
|
+
metadata: {
|
|
4217
|
+
family: "GPT-4.1",
|
|
4218
|
+
notes: "Fastest GPT-4.1 variant for simple tasks"
|
|
4219
|
+
}
|
|
4220
|
+
},
|
|
4221
|
+
// GPT-4o Family
|
|
4222
|
+
{
|
|
4223
|
+
provider: "openai",
|
|
4224
|
+
modelId: "gpt-4o",
|
|
4225
|
+
displayName: "GPT-4o",
|
|
4226
|
+
contextWindow: 128e3,
|
|
4227
|
+
maxOutputTokens: 16384,
|
|
4228
|
+
pricing: {
|
|
4229
|
+
input: 2.5,
|
|
4230
|
+
output: 10,
|
|
4231
|
+
cachedInput: 1.25
|
|
4232
|
+
},
|
|
4233
|
+
knowledgeCutoff: "2024-04-01",
|
|
4234
|
+
features: {
|
|
4235
|
+
streaming: true,
|
|
4236
|
+
functionCalling: true,
|
|
4237
|
+
vision: true,
|
|
4238
|
+
structuredOutputs: true,
|
|
4239
|
+
fineTuning: true
|
|
4240
|
+
},
|
|
4241
|
+
metadata: {
|
|
4242
|
+
family: "GPT-4o",
|
|
4243
|
+
notes: "Multimodal model optimized for speed"
|
|
4244
|
+
}
|
|
4245
|
+
},
|
|
4246
|
+
{
|
|
4247
|
+
provider: "openai",
|
|
4248
|
+
modelId: "gpt-4o-mini",
|
|
4249
|
+
displayName: "GPT-4o Mini",
|
|
4250
|
+
contextWindow: 128e3,
|
|
4251
|
+
maxOutputTokens: 16384,
|
|
4252
|
+
pricing: {
|
|
4253
|
+
input: 0.15,
|
|
4254
|
+
output: 0.6,
|
|
4255
|
+
cachedInput: 0.075
|
|
4256
|
+
},
|
|
4257
|
+
knowledgeCutoff: "2024-04-01",
|
|
4258
|
+
features: {
|
|
4259
|
+
streaming: true,
|
|
4260
|
+
functionCalling: true,
|
|
4261
|
+
vision: true,
|
|
4262
|
+
structuredOutputs: true,
|
|
4263
|
+
fineTuning: true
|
|
4264
|
+
},
|
|
4265
|
+
metadata: {
|
|
4266
|
+
family: "GPT-4o",
|
|
4267
|
+
notes: "Fast and affordable multimodal model"
|
|
4268
|
+
}
|
|
4269
|
+
},
|
|
4270
|
+
// o-series (Reasoning models)
|
|
4271
|
+
{
|
|
4272
|
+
provider: "openai",
|
|
4273
|
+
modelId: "o1",
|
|
4274
|
+
displayName: "o1",
|
|
4275
|
+
contextWindow: 2e5,
|
|
4276
|
+
maxOutputTokens: 1e5,
|
|
4277
|
+
pricing: {
|
|
4278
|
+
input: 15,
|
|
4279
|
+
output: 60,
|
|
4280
|
+
cachedInput: 7.5
|
|
4281
|
+
},
|
|
4282
|
+
knowledgeCutoff: "2024-12-01",
|
|
4283
|
+
features: {
|
|
4284
|
+
streaming: true,
|
|
4285
|
+
functionCalling: true,
|
|
4286
|
+
vision: true,
|
|
4287
|
+
reasoning: true,
|
|
4288
|
+
structuredOutputs: true
|
|
4289
|
+
},
|
|
4290
|
+
metadata: {
|
|
4291
|
+
family: "o-series",
|
|
4292
|
+
notes: "Advanced reasoning model with chain-of-thought",
|
|
4293
|
+
supportsTemperature: false
|
|
4294
|
+
}
|
|
4295
|
+
},
|
|
4296
|
+
{
|
|
4297
|
+
provider: "openai",
|
|
4298
|
+
modelId: "o3",
|
|
4299
|
+
displayName: "o3",
|
|
4300
|
+
contextWindow: 2e5,
|
|
4301
|
+
maxOutputTokens: 1e5,
|
|
4302
|
+
pricing: {
|
|
4303
|
+
input: 2,
|
|
4304
|
+
output: 8,
|
|
4305
|
+
cachedInput: 0.5
|
|
4306
|
+
},
|
|
4307
|
+
knowledgeCutoff: "2025-01-01",
|
|
4308
|
+
features: {
|
|
4309
|
+
streaming: true,
|
|
4310
|
+
functionCalling: true,
|
|
4311
|
+
vision: true,
|
|
4312
|
+
reasoning: true,
|
|
4313
|
+
structuredOutputs: true
|
|
4314
|
+
},
|
|
4315
|
+
metadata: {
|
|
4316
|
+
family: "o-series",
|
|
4317
|
+
notes: "Next-gen reasoning model, more efficient than o1",
|
|
4318
|
+
supportsTemperature: false
|
|
4319
|
+
}
|
|
4320
|
+
},
|
|
4321
|
+
{
|
|
4322
|
+
provider: "openai",
|
|
4323
|
+
modelId: "o4-mini",
|
|
4324
|
+
displayName: "o4 Mini",
|
|
4325
|
+
contextWindow: 2e5,
|
|
4326
|
+
maxOutputTokens: 1e5,
|
|
4327
|
+
pricing: {
|
|
4328
|
+
input: 1.1,
|
|
4329
|
+
output: 4.4,
|
|
4330
|
+
cachedInput: 0.275
|
|
4331
|
+
},
|
|
4332
|
+
knowledgeCutoff: "2025-04-01",
|
|
4333
|
+
features: {
|
|
4334
|
+
streaming: true,
|
|
4335
|
+
functionCalling: true,
|
|
4336
|
+
vision: true,
|
|
4337
|
+
reasoning: true,
|
|
4338
|
+
structuredOutputs: true,
|
|
4339
|
+
fineTuning: true
|
|
4340
|
+
},
|
|
4341
|
+
metadata: {
|
|
4342
|
+
family: "o-series",
|
|
4343
|
+
notes: "Cost-efficient reasoning model",
|
|
4344
|
+
supportsTemperature: false
|
|
4345
|
+
}
|
|
4346
|
+
},
|
|
4347
|
+
{
|
|
4348
|
+
provider: "openai",
|
|
4349
|
+
modelId: "o3-mini",
|
|
4350
|
+
displayName: "o3 Mini",
|
|
4351
|
+
contextWindow: 2e5,
|
|
4352
|
+
maxOutputTokens: 1e5,
|
|
4353
|
+
pricing: {
|
|
4354
|
+
input: 1.1,
|
|
4355
|
+
output: 4.4,
|
|
4356
|
+
cachedInput: 0.55
|
|
4357
|
+
},
|
|
4358
|
+
knowledgeCutoff: "2025-01-01",
|
|
4359
|
+
features: {
|
|
4360
|
+
streaming: true,
|
|
4361
|
+
functionCalling: true,
|
|
4362
|
+
vision: true,
|
|
4363
|
+
reasoning: true,
|
|
4364
|
+
structuredOutputs: true
|
|
4365
|
+
},
|
|
4366
|
+
metadata: {
|
|
4367
|
+
family: "o-series",
|
|
4368
|
+
notes: "Compact reasoning model for cost-sensitive applications",
|
|
4369
|
+
supportsTemperature: false
|
|
4370
|
+
}
|
|
4087
4371
|
}
|
|
4088
4372
|
];
|
|
4089
4373
|
}
|
|
@@ -4164,7 +4448,8 @@ var init_openai = __esm({
|
|
|
4164
4448
|
const usage = chunk.usage ? {
|
|
4165
4449
|
inputTokens: chunk.usage.prompt_tokens,
|
|
4166
4450
|
outputTokens: chunk.usage.completion_tokens,
|
|
4167
|
-
totalTokens: chunk.usage.total_tokens
|
|
4451
|
+
totalTokens: chunk.usage.total_tokens,
|
|
4452
|
+
cachedInputTokens: chunk.usage.prompt_tokens_details?.cached_tokens ?? 0
|
|
4168
4453
|
} : void 0;
|
|
4169
4454
|
if (finishReason || usage) {
|
|
4170
4455
|
yield { text: "", finishReason, usage, rawEvent: chunk };
|
|
@@ -4381,20 +4666,28 @@ var init_model_registry = __esm({
|
|
|
4381
4666
|
/**
|
|
4382
4667
|
* Estimate API cost for a given model and token usage
|
|
4383
4668
|
* @param modelId - Full model identifier
|
|
4384
|
-
* @param inputTokens - Number of input tokens
|
|
4669
|
+
* @param inputTokens - Number of input tokens (total, including cached and cache creation)
|
|
4385
4670
|
* @param outputTokens - Number of output tokens
|
|
4386
|
-
* @param
|
|
4671
|
+
* @param cachedInputTokens - Number of cached input tokens (subset of inputTokens)
|
|
4672
|
+
* @param cacheCreationInputTokens - Number of cache creation tokens (subset of inputTokens, Anthropic only)
|
|
4387
4673
|
* @returns CostEstimate if model found, undefined otherwise
|
|
4388
4674
|
*/
|
|
4389
|
-
estimateCost(modelId, inputTokens, outputTokens,
|
|
4675
|
+
estimateCost(modelId, inputTokens, outputTokens, cachedInputTokens = 0, cacheCreationInputTokens = 0) {
|
|
4390
4676
|
const spec = this.getModelSpec(modelId);
|
|
4391
4677
|
if (!spec) return void 0;
|
|
4392
|
-
const
|
|
4393
|
-
const
|
|
4678
|
+
const cachedRate = spec.pricing.cachedInput ?? spec.pricing.input;
|
|
4679
|
+
const cacheWriteRate = spec.pricing.cacheWriteInput ?? spec.pricing.input;
|
|
4680
|
+
const uncachedInputTokens = inputTokens - cachedInputTokens - cacheCreationInputTokens;
|
|
4681
|
+
const uncachedInputCost = uncachedInputTokens / 1e6 * spec.pricing.input;
|
|
4682
|
+
const cachedInputCost = cachedInputTokens / 1e6 * cachedRate;
|
|
4683
|
+
const cacheCreationCost = cacheCreationInputTokens / 1e6 * cacheWriteRate;
|
|
4684
|
+
const inputCost = uncachedInputCost + cachedInputCost + cacheCreationCost;
|
|
4394
4685
|
const outputCost = outputTokens / 1e6 * spec.pricing.output;
|
|
4395
4686
|
const totalCost = inputCost + outputCost;
|
|
4396
4687
|
return {
|
|
4397
4688
|
inputCost,
|
|
4689
|
+
cachedInputCost,
|
|
4690
|
+
cacheCreationCost,
|
|
4398
4691
|
outputCost,
|
|
4399
4692
|
totalCost,
|
|
4400
4693
|
currency: "USD"
|