llmist 0.7.0 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-CTC2WJZA.js → chunk-4IMGADVY.js} +2 -2
- package/dist/{chunk-ZFHFBEQ5.js → chunk-62M4TDAK.js} +359 -66
- package/dist/chunk-62M4TDAK.js.map +1 -0
- package/dist/cli.cjs +726 -123
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.js +369 -59
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +358 -65
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +6 -9
- package/dist/index.d.ts +6 -9
- package/dist/index.js +2 -2
- package/dist/{mock-stream-B2qwECvd.d.cts → mock-stream-CjmvWDc3.d.cts} +21 -20
- package/dist/{mock-stream-B2qwECvd.d.ts → mock-stream-CjmvWDc3.d.ts} +21 -20
- package/dist/testing/index.cjs +358 -65
- package/dist/testing/index.cjs.map +1 -1
- package/dist/testing/index.d.cts +2 -2
- package/dist/testing/index.d.ts +2 -2
- package/dist/testing/index.js +2 -2
- package/package.json +2 -1
- package/dist/chunk-ZFHFBEQ5.js.map +0 -1
- /package/dist/{chunk-CTC2WJZA.js.map → chunk-4IMGADVY.js.map} +0 -0
package/dist/testing/index.cjs
CHANGED
|
@@ -864,7 +864,7 @@ function findSafeDelimiter(content) {
|
|
|
864
864
|
}
|
|
865
865
|
let counter = 1;
|
|
866
866
|
while (counter < 1e3) {
|
|
867
|
-
const delimiter = `
|
|
867
|
+
const delimiter = `__GADGET_PARAM_${counter}__`;
|
|
868
868
|
const regex = new RegExp(`^${delimiter}\\s*$`);
|
|
869
869
|
const isUsed = lines.some((line) => regex.test(line));
|
|
870
870
|
if (!isUsed) {
|
|
@@ -971,7 +971,16 @@ var init_gadget = __esm({
|
|
|
971
971
|
yaml = __toESM(require("js-yaml"), 1);
|
|
972
972
|
init_schema_to_json();
|
|
973
973
|
init_schema_validator();
|
|
974
|
-
HEREDOC_DELIMITERS = [
|
|
974
|
+
HEREDOC_DELIMITERS = [
|
|
975
|
+
"__GADGET_PARAM_EOF__",
|
|
976
|
+
"__GADGET_PARAM_END__",
|
|
977
|
+
"__GADGET_PARAM_DOC__",
|
|
978
|
+
"__GADGET_PARAM_CONTENT__",
|
|
979
|
+
"__GADGET_PARAM_TEXT__",
|
|
980
|
+
"__GADGET_PARAM_HEREDOC__",
|
|
981
|
+
"__GADGET_PARAM_DATA__",
|
|
982
|
+
"__GADGET_PARAM_BLOCK__"
|
|
983
|
+
];
|
|
975
984
|
BaseGadget = class {
|
|
976
985
|
/**
|
|
977
986
|
* The name of the gadget. Used for identification when LLM calls it.
|
|
@@ -3697,7 +3706,8 @@ var init_anthropic_models = __esm({
|
|
|
3697
3706
|
pricing: {
|
|
3698
3707
|
input: 3,
|
|
3699
3708
|
output: 15,
|
|
3700
|
-
cachedInput: 0.3
|
|
3709
|
+
cachedInput: 0.3,
|
|
3710
|
+
cacheWriteInput: 3.75
|
|
3701
3711
|
},
|
|
3702
3712
|
knowledgeCutoff: "2025-01",
|
|
3703
3713
|
features: {
|
|
@@ -3721,7 +3731,8 @@ var init_anthropic_models = __esm({
|
|
|
3721
3731
|
pricing: {
|
|
3722
3732
|
input: 1,
|
|
3723
3733
|
output: 5,
|
|
3724
|
-
cachedInput: 0.1
|
|
3734
|
+
cachedInput: 0.1,
|
|
3735
|
+
cacheWriteInput: 1.25
|
|
3725
3736
|
},
|
|
3726
3737
|
knowledgeCutoff: "2025-02",
|
|
3727
3738
|
features: {
|
|
@@ -3745,7 +3756,8 @@ var init_anthropic_models = __esm({
|
|
|
3745
3756
|
pricing: {
|
|
3746
3757
|
input: 3,
|
|
3747
3758
|
output: 15,
|
|
3748
|
-
cachedInput: 0.3
|
|
3759
|
+
cachedInput: 0.3,
|
|
3760
|
+
cacheWriteInput: 3.75
|
|
3749
3761
|
},
|
|
3750
3762
|
knowledgeCutoff: "2025-03",
|
|
3751
3763
|
features: {
|
|
@@ -3769,7 +3781,8 @@ var init_anthropic_models = __esm({
|
|
|
3769
3781
|
pricing: {
|
|
3770
3782
|
input: 3,
|
|
3771
3783
|
output: 15,
|
|
3772
|
-
cachedInput: 0.3
|
|
3784
|
+
cachedInput: 0.3,
|
|
3785
|
+
cacheWriteInput: 3.75
|
|
3773
3786
|
},
|
|
3774
3787
|
knowledgeCutoff: "2024-11",
|
|
3775
3788
|
features: {
|
|
@@ -3793,7 +3806,8 @@ var init_anthropic_models = __esm({
|
|
|
3793
3806
|
pricing: {
|
|
3794
3807
|
input: 15,
|
|
3795
3808
|
output: 75,
|
|
3796
|
-
cachedInput: 1.5
|
|
3809
|
+
cachedInput: 1.5,
|
|
3810
|
+
cacheWriteInput: 18.75
|
|
3797
3811
|
},
|
|
3798
3812
|
knowledgeCutoff: "2025-01",
|
|
3799
3813
|
features: {
|
|
@@ -3817,7 +3831,8 @@ var init_anthropic_models = __esm({
|
|
|
3817
3831
|
pricing: {
|
|
3818
3832
|
input: 15,
|
|
3819
3833
|
output: 75,
|
|
3820
|
-
cachedInput: 1.5
|
|
3834
|
+
cachedInput: 1.5,
|
|
3835
|
+
cacheWriteInput: 18.75
|
|
3821
3836
|
},
|
|
3822
3837
|
knowledgeCutoff: "2025-03",
|
|
3823
3838
|
features: {
|
|
@@ -3840,7 +3855,8 @@ var init_anthropic_models = __esm({
|
|
|
3840
3855
|
pricing: {
|
|
3841
3856
|
input: 0.8,
|
|
3842
3857
|
output: 4,
|
|
3843
|
-
cachedInput: 0.08
|
|
3858
|
+
cachedInput: 0.08,
|
|
3859
|
+
cacheWriteInput: 1
|
|
3844
3860
|
},
|
|
3845
3861
|
knowledgeCutoff: "2024-07",
|
|
3846
3862
|
features: {
|
|
@@ -3863,7 +3879,8 @@ var init_anthropic_models = __esm({
|
|
|
3863
3879
|
pricing: {
|
|
3864
3880
|
input: 0.25,
|
|
3865
3881
|
output: 1.25,
|
|
3866
|
-
cachedInput: 0.025
|
|
3882
|
+
cachedInput: 0.025,
|
|
3883
|
+
cacheWriteInput: 0.3125
|
|
3867
3884
|
},
|
|
3868
3885
|
knowledgeCutoff: "2023-08",
|
|
3869
3886
|
features: {
|
|
@@ -3887,7 +3904,8 @@ var init_anthropic_models = __esm({
|
|
|
3887
3904
|
pricing: {
|
|
3888
3905
|
input: 1,
|
|
3889
3906
|
output: 5,
|
|
3890
|
-
cachedInput: 0.1
|
|
3907
|
+
cachedInput: 0.1,
|
|
3908
|
+
cacheWriteInput: 1.25
|
|
3891
3909
|
},
|
|
3892
3910
|
knowledgeCutoff: "2025-02",
|
|
3893
3911
|
features: {
|
|
@@ -3911,7 +3929,8 @@ var init_anthropic_models = __esm({
|
|
|
3911
3929
|
pricing: {
|
|
3912
3930
|
input: 3,
|
|
3913
3931
|
output: 15,
|
|
3914
|
-
cachedInput: 0.3
|
|
3932
|
+
cachedInput: 0.3,
|
|
3933
|
+
cacheWriteInput: 3.75
|
|
3915
3934
|
},
|
|
3916
3935
|
knowledgeCutoff: "2025-01",
|
|
3917
3936
|
features: {
|
|
@@ -3935,7 +3954,8 @@ var init_anthropic_models = __esm({
|
|
|
3935
3954
|
pricing: {
|
|
3936
3955
|
input: 5,
|
|
3937
3956
|
output: 25,
|
|
3938
|
-
cachedInput: 0.5
|
|
3957
|
+
cachedInput: 0.5,
|
|
3958
|
+
cacheWriteInput: 6.25
|
|
3939
3959
|
},
|
|
3940
3960
|
knowledgeCutoff: "2025-03",
|
|
3941
3961
|
features: {
|
|
@@ -4050,15 +4070,27 @@ var init_anthropic = __esm({
|
|
|
4050
4070
|
}
|
|
4051
4071
|
buildRequestPayload(options, descriptor, spec, messages) {
|
|
4052
4072
|
const systemMessages = messages.filter((message) => message.role === "system");
|
|
4053
|
-
const system = systemMessages.length > 0 ? systemMessages.map((m) =>
|
|
4054
|
-
|
|
4073
|
+
const system = systemMessages.length > 0 ? systemMessages.map((m, index) => ({
|
|
4074
|
+
type: "text",
|
|
4075
|
+
text: m.content,
|
|
4076
|
+
// Add cache_control to the LAST system message block
|
|
4077
|
+
...index === systemMessages.length - 1 ? { cache_control: { type: "ephemeral" } } : {}
|
|
4078
|
+
})) : void 0;
|
|
4079
|
+
const nonSystemMessages = messages.filter(
|
|
4055
4080
|
(message) => message.role !== "system"
|
|
4056
|
-
)
|
|
4081
|
+
);
|
|
4082
|
+
const lastUserIndex = nonSystemMessages.reduce(
|
|
4083
|
+
(lastIdx, msg, idx) => msg.role === "user" ? idx : lastIdx,
|
|
4084
|
+
-1
|
|
4085
|
+
);
|
|
4086
|
+
const conversation = nonSystemMessages.map((message, index) => ({
|
|
4057
4087
|
role: message.role,
|
|
4058
4088
|
content: [
|
|
4059
4089
|
{
|
|
4060
4090
|
type: "text",
|
|
4061
|
-
text: message.content
|
|
4091
|
+
text: message.content,
|
|
4092
|
+
// Add cache_control to the LAST user message
|
|
4093
|
+
...message.role === "user" && index === lastUserIndex ? { cache_control: { type: "ephemeral" } } : {}
|
|
4062
4094
|
}
|
|
4063
4095
|
]
|
|
4064
4096
|
}));
|
|
@@ -4084,15 +4116,22 @@ var init_anthropic = __esm({
|
|
|
4084
4116
|
async *wrapStream(iterable) {
|
|
4085
4117
|
const stream2 = iterable;
|
|
4086
4118
|
let inputTokens = 0;
|
|
4119
|
+
let cachedInputTokens = 0;
|
|
4120
|
+
let cacheCreationInputTokens = 0;
|
|
4087
4121
|
for await (const event of stream2) {
|
|
4088
4122
|
if (event.type === "message_start") {
|
|
4089
|
-
|
|
4123
|
+
const usage = event.message.usage;
|
|
4124
|
+
cachedInputTokens = usage.cache_read_input_tokens ?? 0;
|
|
4125
|
+
cacheCreationInputTokens = usage.cache_creation_input_tokens ?? 0;
|
|
4126
|
+
inputTokens = usage.input_tokens + cachedInputTokens + cacheCreationInputTokens;
|
|
4090
4127
|
yield {
|
|
4091
4128
|
text: "",
|
|
4092
4129
|
usage: {
|
|
4093
4130
|
inputTokens,
|
|
4094
4131
|
outputTokens: 0,
|
|
4095
|
-
totalTokens: inputTokens
|
|
4132
|
+
totalTokens: inputTokens,
|
|
4133
|
+
cachedInputTokens,
|
|
4134
|
+
cacheCreationInputTokens
|
|
4096
4135
|
},
|
|
4097
4136
|
rawEvent: event
|
|
4098
4137
|
};
|
|
@@ -4106,7 +4145,9 @@ var init_anthropic = __esm({
|
|
|
4106
4145
|
const usage = event.usage ? {
|
|
4107
4146
|
inputTokens,
|
|
4108
4147
|
outputTokens: event.usage.output_tokens,
|
|
4109
|
-
totalTokens: inputTokens + event.usage.output_tokens
|
|
4148
|
+
totalTokens: inputTokens + event.usage.output_tokens,
|
|
4149
|
+
cachedInputTokens,
|
|
4150
|
+
cacheCreationInputTokens
|
|
4110
4151
|
} : void 0;
|
|
4111
4152
|
if (event.delta.stop_reason || usage) {
|
|
4112
4153
|
yield {
|
|
@@ -4187,6 +4228,7 @@ var init_gemini_models = __esm({
|
|
|
4187
4228
|
"src/providers/gemini-models.ts"() {
|
|
4188
4229
|
"use strict";
|
|
4189
4230
|
GEMINI_MODELS = [
|
|
4231
|
+
// Gemini 3 Pro (Preview)
|
|
4190
4232
|
{
|
|
4191
4233
|
provider: "gemini",
|
|
4192
4234
|
modelId: "gemini-3-pro-preview",
|
|
@@ -4195,8 +4237,11 @@ var init_gemini_models = __esm({
|
|
|
4195
4237
|
maxOutputTokens: 65536,
|
|
4196
4238
|
pricing: {
|
|
4197
4239
|
input: 2,
|
|
4240
|
+
// $2.00 for prompts <= 200k, $4.00 for > 200k (using lower tier)
|
|
4198
4241
|
output: 12,
|
|
4242
|
+
// $12.00 for prompts <= 200k, $18.00 for > 200k
|
|
4199
4243
|
cachedInput: 0.2
|
|
4244
|
+
// $0.20 for prompts <= 200k
|
|
4200
4245
|
},
|
|
4201
4246
|
knowledgeCutoff: "2025-01",
|
|
4202
4247
|
features: {
|
|
@@ -4209,9 +4254,10 @@ var init_gemini_models = __esm({
|
|
|
4209
4254
|
metadata: {
|
|
4210
4255
|
family: "Gemini 3",
|
|
4211
4256
|
releaseDate: "2025-11-18",
|
|
4212
|
-
notes: "
|
|
4257
|
+
notes: "Best model for multimodal understanding, agentic and vibe-coding. Deep Think mode available."
|
|
4213
4258
|
}
|
|
4214
4259
|
},
|
|
4260
|
+
// Gemini 2.5 Pro
|
|
4215
4261
|
{
|
|
4216
4262
|
provider: "gemini",
|
|
4217
4263
|
modelId: "gemini-2.5-pro",
|
|
@@ -4220,8 +4266,11 @@ var init_gemini_models = __esm({
|
|
|
4220
4266
|
maxOutputTokens: 65536,
|
|
4221
4267
|
pricing: {
|
|
4222
4268
|
input: 1.25,
|
|
4269
|
+
// $1.25 for prompts <= 200k, $2.50 for > 200k
|
|
4223
4270
|
output: 10,
|
|
4271
|
+
// $10.00 for prompts <= 200k, $15.00 for > 200k
|
|
4224
4272
|
cachedInput: 0.125
|
|
4273
|
+
// $0.125 for prompts <= 200k
|
|
4225
4274
|
},
|
|
4226
4275
|
knowledgeCutoff: "2025-01",
|
|
4227
4276
|
features: {
|
|
@@ -4234,9 +4283,10 @@ var init_gemini_models = __esm({
|
|
|
4234
4283
|
metadata: {
|
|
4235
4284
|
family: "Gemini 2.5",
|
|
4236
4285
|
releaseDate: "2025-06",
|
|
4237
|
-
notes: "
|
|
4286
|
+
notes: "State-of-the-art multipurpose model. Excels at coding and complex reasoning."
|
|
4238
4287
|
}
|
|
4239
4288
|
},
|
|
4289
|
+
// Gemini 2.5 Flash
|
|
4240
4290
|
{
|
|
4241
4291
|
provider: "gemini",
|
|
4242
4292
|
modelId: "gemini-2.5-flash",
|
|
@@ -4245,8 +4295,10 @@ var init_gemini_models = __esm({
|
|
|
4245
4295
|
maxOutputTokens: 65536,
|
|
4246
4296
|
pricing: {
|
|
4247
4297
|
input: 0.3,
|
|
4298
|
+
// $0.30 for text/image/video, $1.00 for audio
|
|
4248
4299
|
output: 2.5,
|
|
4249
4300
|
cachedInput: 0.03
|
|
4301
|
+
// $0.03 for text/image/video
|
|
4250
4302
|
},
|
|
4251
4303
|
knowledgeCutoff: "2025-01",
|
|
4252
4304
|
features: {
|
|
@@ -4259,9 +4311,10 @@ var init_gemini_models = __esm({
|
|
|
4259
4311
|
metadata: {
|
|
4260
4312
|
family: "Gemini 2.5",
|
|
4261
4313
|
releaseDate: "2025-06",
|
|
4262
|
-
notes: "
|
|
4314
|
+
notes: "First hybrid reasoning model with 1M context and thinking budgets."
|
|
4263
4315
|
}
|
|
4264
4316
|
},
|
|
4317
|
+
// Gemini 2.5 Flash-Lite
|
|
4265
4318
|
{
|
|
4266
4319
|
provider: "gemini",
|
|
4267
4320
|
modelId: "gemini-2.5-flash-lite",
|
|
@@ -4270,8 +4323,10 @@ var init_gemini_models = __esm({
|
|
|
4270
4323
|
maxOutputTokens: 65536,
|
|
4271
4324
|
pricing: {
|
|
4272
4325
|
input: 0.1,
|
|
4326
|
+
// $0.10 for text/image/video, $0.30 for audio
|
|
4273
4327
|
output: 0.4,
|
|
4274
4328
|
cachedInput: 0.01
|
|
4329
|
+
// $0.01 for text/image/video
|
|
4275
4330
|
},
|
|
4276
4331
|
knowledgeCutoff: "2025-01",
|
|
4277
4332
|
features: {
|
|
@@ -4283,9 +4338,10 @@ var init_gemini_models = __esm({
|
|
|
4283
4338
|
metadata: {
|
|
4284
4339
|
family: "Gemini 2.5",
|
|
4285
4340
|
releaseDate: "2025-06",
|
|
4286
|
-
notes: "
|
|
4341
|
+
notes: "Smallest and most cost effective model, built for at scale usage."
|
|
4287
4342
|
}
|
|
4288
4343
|
},
|
|
4344
|
+
// Gemini 2.0 Flash
|
|
4289
4345
|
{
|
|
4290
4346
|
provider: "gemini",
|
|
4291
4347
|
modelId: "gemini-2.0-flash",
|
|
@@ -4294,8 +4350,10 @@ var init_gemini_models = __esm({
|
|
|
4294
4350
|
maxOutputTokens: 8192,
|
|
4295
4351
|
pricing: {
|
|
4296
4352
|
input: 0.1,
|
|
4353
|
+
// $0.10 for text/image/video, $0.70 for audio
|
|
4297
4354
|
output: 0.4,
|
|
4298
|
-
cachedInput: 0.
|
|
4355
|
+
cachedInput: 0.025
|
|
4356
|
+
// $0.025 for text/image/video
|
|
4299
4357
|
},
|
|
4300
4358
|
knowledgeCutoff: "2024-08",
|
|
4301
4359
|
features: {
|
|
@@ -4306,9 +4364,10 @@ var init_gemini_models = __esm({
|
|
|
4306
4364
|
},
|
|
4307
4365
|
metadata: {
|
|
4308
4366
|
family: "Gemini 2.0",
|
|
4309
|
-
notes: "
|
|
4367
|
+
notes: "Balanced multimodal model with 1M context, built for the era of Agents."
|
|
4310
4368
|
}
|
|
4311
4369
|
},
|
|
4370
|
+
// Gemini 2.0 Flash-Lite
|
|
4312
4371
|
{
|
|
4313
4372
|
provider: "gemini",
|
|
4314
4373
|
modelId: "gemini-2.0-flash-lite",
|
|
@@ -4317,8 +4376,8 @@ var init_gemini_models = __esm({
|
|
|
4317
4376
|
maxOutputTokens: 8192,
|
|
4318
4377
|
pricing: {
|
|
4319
4378
|
input: 0.075,
|
|
4320
|
-
output: 0.3
|
|
4321
|
-
|
|
4379
|
+
output: 0.3
|
|
4380
|
+
// No context caching available for 2.0-flash-lite
|
|
4322
4381
|
},
|
|
4323
4382
|
knowledgeCutoff: "2024-08",
|
|
4324
4383
|
features: {
|
|
@@ -4329,7 +4388,7 @@ var init_gemini_models = __esm({
|
|
|
4329
4388
|
},
|
|
4330
4389
|
metadata: {
|
|
4331
4390
|
family: "Gemini 2.0",
|
|
4332
|
-
notes: "
|
|
4391
|
+
notes: "Smallest and most cost effective 2.0 model for at scale usage."
|
|
4333
4392
|
}
|
|
4334
4393
|
}
|
|
4335
4394
|
];
|
|
@@ -4499,7 +4558,9 @@ var init_gemini = __esm({
|
|
|
4499
4558
|
return {
|
|
4500
4559
|
inputTokens: usageMetadata.promptTokenCount ?? 0,
|
|
4501
4560
|
outputTokens: usageMetadata.candidatesTokenCount ?? 0,
|
|
4502
|
-
totalTokens: usageMetadata.totalTokenCount ?? 0
|
|
4561
|
+
totalTokens: usageMetadata.totalTokenCount ?? 0,
|
|
4562
|
+
// Gemini returns cached token count in cachedContentTokenCount
|
|
4563
|
+
cachedInputTokens: usageMetadata.cachedContentTokenCount ?? 0
|
|
4503
4564
|
};
|
|
4504
4565
|
}
|
|
4505
4566
|
/**
|
|
@@ -4555,10 +4616,11 @@ var init_openai_models = __esm({
|
|
|
4555
4616
|
"src/providers/openai-models.ts"() {
|
|
4556
4617
|
"use strict";
|
|
4557
4618
|
OPENAI_MODELS = [
|
|
4619
|
+
// GPT-5 Family
|
|
4558
4620
|
{
|
|
4559
4621
|
provider: "openai",
|
|
4560
4622
|
modelId: "gpt-5.1",
|
|
4561
|
-
displayName: "GPT-5.1
|
|
4623
|
+
displayName: "GPT-5.1",
|
|
4562
4624
|
contextWindow: 128e3,
|
|
4563
4625
|
maxOutputTokens: 32768,
|
|
4564
4626
|
pricing: {
|
|
@@ -4578,34 +4640,7 @@ var init_openai_models = __esm({
|
|
|
4578
4640
|
metadata: {
|
|
4579
4641
|
family: "GPT-5",
|
|
4580
4642
|
releaseDate: "2025-11-12",
|
|
4581
|
-
notes: "
|
|
4582
|
-
supportsTemperature: false
|
|
4583
|
-
}
|
|
4584
|
-
},
|
|
4585
|
-
{
|
|
4586
|
-
provider: "openai",
|
|
4587
|
-
modelId: "gpt-5.1-thinking",
|
|
4588
|
-
displayName: "GPT-5.1 Thinking",
|
|
4589
|
-
contextWindow: 196e3,
|
|
4590
|
-
maxOutputTokens: 32768,
|
|
4591
|
-
pricing: {
|
|
4592
|
-
input: 1.25,
|
|
4593
|
-
output: 10,
|
|
4594
|
-
cachedInput: 0.125
|
|
4595
|
-
},
|
|
4596
|
-
knowledgeCutoff: "2024-09-30",
|
|
4597
|
-
features: {
|
|
4598
|
-
streaming: true,
|
|
4599
|
-
functionCalling: true,
|
|
4600
|
-
vision: true,
|
|
4601
|
-
reasoning: true,
|
|
4602
|
-
structuredOutputs: true,
|
|
4603
|
-
fineTuning: true
|
|
4604
|
-
},
|
|
4605
|
-
metadata: {
|
|
4606
|
-
family: "GPT-5",
|
|
4607
|
-
releaseDate: "2025-11-12",
|
|
4608
|
-
notes: "Advanced reasoning with thinking levels: Light, Standard, Extended, Heavy. Best for complex tasks.",
|
|
4643
|
+
notes: "Latest GPT-5 with improved instruction following. 2-3x faster than GPT-5.",
|
|
4609
4644
|
supportsTemperature: false
|
|
4610
4645
|
}
|
|
4611
4646
|
},
|
|
@@ -4685,6 +4720,255 @@ var init_openai_models = __esm({
|
|
|
4685
4720
|
notes: "Fastest, most cost-efficient version for well-defined tasks",
|
|
4686
4721
|
supportsTemperature: false
|
|
4687
4722
|
}
|
|
4723
|
+
},
|
|
4724
|
+
{
|
|
4725
|
+
provider: "openai",
|
|
4726
|
+
modelId: "gpt-5-pro",
|
|
4727
|
+
displayName: "GPT-5 Pro",
|
|
4728
|
+
contextWindow: 272e3,
|
|
4729
|
+
maxOutputTokens: 128e3,
|
|
4730
|
+
pricing: {
|
|
4731
|
+
input: 15,
|
|
4732
|
+
output: 120
|
|
4733
|
+
// No cached input pricing for gpt-5-pro
|
|
4734
|
+
},
|
|
4735
|
+
knowledgeCutoff: "2024-09-30",
|
|
4736
|
+
features: {
|
|
4737
|
+
streaming: true,
|
|
4738
|
+
functionCalling: true,
|
|
4739
|
+
vision: true,
|
|
4740
|
+
reasoning: true,
|
|
4741
|
+
structuredOutputs: true
|
|
4742
|
+
},
|
|
4743
|
+
metadata: {
|
|
4744
|
+
family: "GPT-5",
|
|
4745
|
+
notes: "Premium tier with enhanced capabilities. Does not support prompt caching.",
|
|
4746
|
+
supportsTemperature: false
|
|
4747
|
+
}
|
|
4748
|
+
},
|
|
4749
|
+
// GPT-4.1 Family
|
|
4750
|
+
{
|
|
4751
|
+
provider: "openai",
|
|
4752
|
+
modelId: "gpt-4.1",
|
|
4753
|
+
displayName: "GPT-4.1",
|
|
4754
|
+
contextWindow: 128e3,
|
|
4755
|
+
maxOutputTokens: 32768,
|
|
4756
|
+
pricing: {
|
|
4757
|
+
input: 2,
|
|
4758
|
+
output: 8,
|
|
4759
|
+
cachedInput: 0.5
|
|
4760
|
+
},
|
|
4761
|
+
knowledgeCutoff: "2024-04-01",
|
|
4762
|
+
features: {
|
|
4763
|
+
streaming: true,
|
|
4764
|
+
functionCalling: true,
|
|
4765
|
+
vision: true,
|
|
4766
|
+
structuredOutputs: true,
|
|
4767
|
+
fineTuning: true
|
|
4768
|
+
},
|
|
4769
|
+
metadata: {
|
|
4770
|
+
family: "GPT-4.1",
|
|
4771
|
+
notes: "Improved GPT-4 with better instruction following"
|
|
4772
|
+
}
|
|
4773
|
+
},
|
|
4774
|
+
{
|
|
4775
|
+
provider: "openai",
|
|
4776
|
+
modelId: "gpt-4.1-mini",
|
|
4777
|
+
displayName: "GPT-4.1 Mini",
|
|
4778
|
+
contextWindow: 128e3,
|
|
4779
|
+
maxOutputTokens: 32768,
|
|
4780
|
+
pricing: {
|
|
4781
|
+
input: 0.4,
|
|
4782
|
+
output: 1.6,
|
|
4783
|
+
cachedInput: 0.1
|
|
4784
|
+
},
|
|
4785
|
+
knowledgeCutoff: "2024-04-01",
|
|
4786
|
+
features: {
|
|
4787
|
+
streaming: true,
|
|
4788
|
+
functionCalling: true,
|
|
4789
|
+
vision: true,
|
|
4790
|
+
structuredOutputs: true,
|
|
4791
|
+
fineTuning: true
|
|
4792
|
+
},
|
|
4793
|
+
metadata: {
|
|
4794
|
+
family: "GPT-4.1",
|
|
4795
|
+
notes: "Cost-efficient GPT-4.1 variant"
|
|
4796
|
+
}
|
|
4797
|
+
},
|
|
4798
|
+
{
|
|
4799
|
+
provider: "openai",
|
|
4800
|
+
modelId: "gpt-4.1-nano",
|
|
4801
|
+
displayName: "GPT-4.1 Nano",
|
|
4802
|
+
contextWindow: 128e3,
|
|
4803
|
+
maxOutputTokens: 32768,
|
|
4804
|
+
pricing: {
|
|
4805
|
+
input: 0.1,
|
|
4806
|
+
output: 0.4,
|
|
4807
|
+
cachedInput: 0.025
|
|
4808
|
+
},
|
|
4809
|
+
knowledgeCutoff: "2024-04-01",
|
|
4810
|
+
features: {
|
|
4811
|
+
streaming: true,
|
|
4812
|
+
functionCalling: true,
|
|
4813
|
+
vision: true,
|
|
4814
|
+
structuredOutputs: true,
|
|
4815
|
+
fineTuning: true
|
|
4816
|
+
},
|
|
4817
|
+
metadata: {
|
|
4818
|
+
family: "GPT-4.1",
|
|
4819
|
+
notes: "Fastest GPT-4.1 variant for simple tasks"
|
|
4820
|
+
}
|
|
4821
|
+
},
|
|
4822
|
+
// GPT-4o Family
|
|
4823
|
+
{
|
|
4824
|
+
provider: "openai",
|
|
4825
|
+
modelId: "gpt-4o",
|
|
4826
|
+
displayName: "GPT-4o",
|
|
4827
|
+
contextWindow: 128e3,
|
|
4828
|
+
maxOutputTokens: 16384,
|
|
4829
|
+
pricing: {
|
|
4830
|
+
input: 2.5,
|
|
4831
|
+
output: 10,
|
|
4832
|
+
cachedInput: 1.25
|
|
4833
|
+
},
|
|
4834
|
+
knowledgeCutoff: "2024-04-01",
|
|
4835
|
+
features: {
|
|
4836
|
+
streaming: true,
|
|
4837
|
+
functionCalling: true,
|
|
4838
|
+
vision: true,
|
|
4839
|
+
structuredOutputs: true,
|
|
4840
|
+
fineTuning: true
|
|
4841
|
+
},
|
|
4842
|
+
metadata: {
|
|
4843
|
+
family: "GPT-4o",
|
|
4844
|
+
notes: "Multimodal model optimized for speed"
|
|
4845
|
+
}
|
|
4846
|
+
},
|
|
4847
|
+
{
|
|
4848
|
+
provider: "openai",
|
|
4849
|
+
modelId: "gpt-4o-mini",
|
|
4850
|
+
displayName: "GPT-4o Mini",
|
|
4851
|
+
contextWindow: 128e3,
|
|
4852
|
+
maxOutputTokens: 16384,
|
|
4853
|
+
pricing: {
|
|
4854
|
+
input: 0.15,
|
|
4855
|
+
output: 0.6,
|
|
4856
|
+
cachedInput: 0.075
|
|
4857
|
+
},
|
|
4858
|
+
knowledgeCutoff: "2024-04-01",
|
|
4859
|
+
features: {
|
|
4860
|
+
streaming: true,
|
|
4861
|
+
functionCalling: true,
|
|
4862
|
+
vision: true,
|
|
4863
|
+
structuredOutputs: true,
|
|
4864
|
+
fineTuning: true
|
|
4865
|
+
},
|
|
4866
|
+
metadata: {
|
|
4867
|
+
family: "GPT-4o",
|
|
4868
|
+
notes: "Fast and affordable multimodal model"
|
|
4869
|
+
}
|
|
4870
|
+
},
|
|
4871
|
+
// o-series (Reasoning models)
|
|
4872
|
+
{
|
|
4873
|
+
provider: "openai",
|
|
4874
|
+
modelId: "o1",
|
|
4875
|
+
displayName: "o1",
|
|
4876
|
+
contextWindow: 2e5,
|
|
4877
|
+
maxOutputTokens: 1e5,
|
|
4878
|
+
pricing: {
|
|
4879
|
+
input: 15,
|
|
4880
|
+
output: 60,
|
|
4881
|
+
cachedInput: 7.5
|
|
4882
|
+
},
|
|
4883
|
+
knowledgeCutoff: "2024-12-01",
|
|
4884
|
+
features: {
|
|
4885
|
+
streaming: true,
|
|
4886
|
+
functionCalling: true,
|
|
4887
|
+
vision: true,
|
|
4888
|
+
reasoning: true,
|
|
4889
|
+
structuredOutputs: true
|
|
4890
|
+
},
|
|
4891
|
+
metadata: {
|
|
4892
|
+
family: "o-series",
|
|
4893
|
+
notes: "Advanced reasoning model with chain-of-thought",
|
|
4894
|
+
supportsTemperature: false
|
|
4895
|
+
}
|
|
4896
|
+
},
|
|
4897
|
+
{
|
|
4898
|
+
provider: "openai",
|
|
4899
|
+
modelId: "o3",
|
|
4900
|
+
displayName: "o3",
|
|
4901
|
+
contextWindow: 2e5,
|
|
4902
|
+
maxOutputTokens: 1e5,
|
|
4903
|
+
pricing: {
|
|
4904
|
+
input: 2,
|
|
4905
|
+
output: 8,
|
|
4906
|
+
cachedInput: 0.5
|
|
4907
|
+
},
|
|
4908
|
+
knowledgeCutoff: "2025-01-01",
|
|
4909
|
+
features: {
|
|
4910
|
+
streaming: true,
|
|
4911
|
+
functionCalling: true,
|
|
4912
|
+
vision: true,
|
|
4913
|
+
reasoning: true,
|
|
4914
|
+
structuredOutputs: true
|
|
4915
|
+
},
|
|
4916
|
+
metadata: {
|
|
4917
|
+
family: "o-series",
|
|
4918
|
+
notes: "Next-gen reasoning model, more efficient than o1",
|
|
4919
|
+
supportsTemperature: false
|
|
4920
|
+
}
|
|
4921
|
+
},
|
|
4922
|
+
{
|
|
4923
|
+
provider: "openai",
|
|
4924
|
+
modelId: "o4-mini",
|
|
4925
|
+
displayName: "o4 Mini",
|
|
4926
|
+
contextWindow: 2e5,
|
|
4927
|
+
maxOutputTokens: 1e5,
|
|
4928
|
+
pricing: {
|
|
4929
|
+
input: 1.1,
|
|
4930
|
+
output: 4.4,
|
|
4931
|
+
cachedInput: 0.275
|
|
4932
|
+
},
|
|
4933
|
+
knowledgeCutoff: "2025-04-01",
|
|
4934
|
+
features: {
|
|
4935
|
+
streaming: true,
|
|
4936
|
+
functionCalling: true,
|
|
4937
|
+
vision: true,
|
|
4938
|
+
reasoning: true,
|
|
4939
|
+
structuredOutputs: true,
|
|
4940
|
+
fineTuning: true
|
|
4941
|
+
},
|
|
4942
|
+
metadata: {
|
|
4943
|
+
family: "o-series",
|
|
4944
|
+
notes: "Cost-efficient reasoning model",
|
|
4945
|
+
supportsTemperature: false
|
|
4946
|
+
}
|
|
4947
|
+
},
|
|
4948
|
+
{
|
|
4949
|
+
provider: "openai",
|
|
4950
|
+
modelId: "o3-mini",
|
|
4951
|
+
displayName: "o3 Mini",
|
|
4952
|
+
contextWindow: 2e5,
|
|
4953
|
+
maxOutputTokens: 1e5,
|
|
4954
|
+
pricing: {
|
|
4955
|
+
input: 1.1,
|
|
4956
|
+
output: 4.4,
|
|
4957
|
+
cachedInput: 0.55
|
|
4958
|
+
},
|
|
4959
|
+
knowledgeCutoff: "2025-01-01",
|
|
4960
|
+
features: {
|
|
4961
|
+
streaming: true,
|
|
4962
|
+
functionCalling: true,
|
|
4963
|
+
vision: true,
|
|
4964
|
+
reasoning: true,
|
|
4965
|
+
structuredOutputs: true
|
|
4966
|
+
},
|
|
4967
|
+
metadata: {
|
|
4968
|
+
family: "o-series",
|
|
4969
|
+
notes: "Compact reasoning model for cost-sensitive applications",
|
|
4970
|
+
supportsTemperature: false
|
|
4971
|
+
}
|
|
4688
4972
|
}
|
|
4689
4973
|
];
|
|
4690
4974
|
}
|
|
@@ -4765,7 +5049,8 @@ var init_openai = __esm({
|
|
|
4765
5049
|
const usage = chunk.usage ? {
|
|
4766
5050
|
inputTokens: chunk.usage.prompt_tokens,
|
|
4767
5051
|
outputTokens: chunk.usage.completion_tokens,
|
|
4768
|
-
totalTokens: chunk.usage.total_tokens
|
|
5052
|
+
totalTokens: chunk.usage.total_tokens,
|
|
5053
|
+
cachedInputTokens: chunk.usage.prompt_tokens_details?.cached_tokens ?? 0
|
|
4769
5054
|
} : void 0;
|
|
4770
5055
|
if (finishReason || usage) {
|
|
4771
5056
|
yield { text: "", finishReason, usage, rawEvent: chunk };
|
|
@@ -4982,20 +5267,28 @@ var init_model_registry = __esm({
|
|
|
4982
5267
|
/**
|
|
4983
5268
|
* Estimate API cost for a given model and token usage
|
|
4984
5269
|
* @param modelId - Full model identifier
|
|
4985
|
-
* @param inputTokens - Number of input tokens
|
|
5270
|
+
* @param inputTokens - Number of input tokens (total, including cached and cache creation)
|
|
4986
5271
|
* @param outputTokens - Number of output tokens
|
|
4987
|
-
* @param
|
|
5272
|
+
* @param cachedInputTokens - Number of cached input tokens (subset of inputTokens)
|
|
5273
|
+
* @param cacheCreationInputTokens - Number of cache creation tokens (subset of inputTokens, Anthropic only)
|
|
4988
5274
|
* @returns CostEstimate if model found, undefined otherwise
|
|
4989
5275
|
*/
|
|
4990
|
-
estimateCost(modelId, inputTokens, outputTokens,
|
|
5276
|
+
estimateCost(modelId, inputTokens, outputTokens, cachedInputTokens = 0, cacheCreationInputTokens = 0) {
|
|
4991
5277
|
const spec = this.getModelSpec(modelId);
|
|
4992
5278
|
if (!spec) return void 0;
|
|
4993
|
-
const
|
|
4994
|
-
const
|
|
5279
|
+
const cachedRate = spec.pricing.cachedInput ?? spec.pricing.input;
|
|
5280
|
+
const cacheWriteRate = spec.pricing.cacheWriteInput ?? spec.pricing.input;
|
|
5281
|
+
const uncachedInputTokens = inputTokens - cachedInputTokens - cacheCreationInputTokens;
|
|
5282
|
+
const uncachedInputCost = uncachedInputTokens / 1e6 * spec.pricing.input;
|
|
5283
|
+
const cachedInputCost = cachedInputTokens / 1e6 * cachedRate;
|
|
5284
|
+
const cacheCreationCost = cacheCreationInputTokens / 1e6 * cacheWriteRate;
|
|
5285
|
+
const inputCost = uncachedInputCost + cachedInputCost + cacheCreationCost;
|
|
4995
5286
|
const outputCost = outputTokens / 1e6 * spec.pricing.output;
|
|
4996
5287
|
const totalCost = inputCost + outputCost;
|
|
4997
5288
|
return {
|
|
4998
5289
|
inputCost,
|
|
5290
|
+
cachedInputCost,
|
|
5291
|
+
cacheCreationCost,
|
|
4999
5292
|
outputCost,
|
|
5000
5293
|
totalCost,
|
|
5001
5294
|
currency: "USD"
|