llmist 0.7.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -869,7 +869,7 @@ function findSafeDelimiter(content) {
869
869
  }
870
870
  let counter = 1;
871
871
  while (counter < 1e3) {
872
- const delimiter = `HEREDOC_${counter}`;
872
+ const delimiter = `__GADGET_PARAM_${counter}__`;
873
873
  const regex = new RegExp(`^${delimiter}\\s*$`);
874
874
  const isUsed = lines.some((line) => regex.test(line));
875
875
  if (!isUsed) {
@@ -975,7 +975,16 @@ var init_gadget = __esm({
975
975
  "use strict";
976
976
  init_schema_to_json();
977
977
  init_schema_validator();
978
- HEREDOC_DELIMITERS = ["EOF", "END", "DOC", "CONTENT", "TEXT", "HEREDOC", "DATA", "BLOCK"];
978
+ HEREDOC_DELIMITERS = [
979
+ "__GADGET_PARAM_EOF__",
980
+ "__GADGET_PARAM_END__",
981
+ "__GADGET_PARAM_DOC__",
982
+ "__GADGET_PARAM_CONTENT__",
983
+ "__GADGET_PARAM_TEXT__",
984
+ "__GADGET_PARAM_HEREDOC__",
985
+ "__GADGET_PARAM_DATA__",
986
+ "__GADGET_PARAM_BLOCK__"
987
+ ];
979
988
  BaseGadget = class {
980
989
  /**
981
990
  * The name of the gadget. Used for identification when LLM calls it.
@@ -2614,7 +2623,8 @@ var init_anthropic_models = __esm({
2614
2623
  pricing: {
2615
2624
  input: 3,
2616
2625
  output: 15,
2617
- cachedInput: 0.3
2626
+ cachedInput: 0.3,
2627
+ cacheWriteInput: 3.75
2618
2628
  },
2619
2629
  knowledgeCutoff: "2025-01",
2620
2630
  features: {
@@ -2638,7 +2648,8 @@ var init_anthropic_models = __esm({
2638
2648
  pricing: {
2639
2649
  input: 1,
2640
2650
  output: 5,
2641
- cachedInput: 0.1
2651
+ cachedInput: 0.1,
2652
+ cacheWriteInput: 1.25
2642
2653
  },
2643
2654
  knowledgeCutoff: "2025-02",
2644
2655
  features: {
@@ -2662,7 +2673,8 @@ var init_anthropic_models = __esm({
2662
2673
  pricing: {
2663
2674
  input: 3,
2664
2675
  output: 15,
2665
- cachedInput: 0.3
2676
+ cachedInput: 0.3,
2677
+ cacheWriteInput: 3.75
2666
2678
  },
2667
2679
  knowledgeCutoff: "2025-03",
2668
2680
  features: {
@@ -2686,7 +2698,8 @@ var init_anthropic_models = __esm({
2686
2698
  pricing: {
2687
2699
  input: 3,
2688
2700
  output: 15,
2689
- cachedInput: 0.3
2701
+ cachedInput: 0.3,
2702
+ cacheWriteInput: 3.75
2690
2703
  },
2691
2704
  knowledgeCutoff: "2024-11",
2692
2705
  features: {
@@ -2710,7 +2723,8 @@ var init_anthropic_models = __esm({
2710
2723
  pricing: {
2711
2724
  input: 15,
2712
2725
  output: 75,
2713
- cachedInput: 1.5
2726
+ cachedInput: 1.5,
2727
+ cacheWriteInput: 18.75
2714
2728
  },
2715
2729
  knowledgeCutoff: "2025-01",
2716
2730
  features: {
@@ -2734,7 +2748,8 @@ var init_anthropic_models = __esm({
2734
2748
  pricing: {
2735
2749
  input: 15,
2736
2750
  output: 75,
2737
- cachedInput: 1.5
2751
+ cachedInput: 1.5,
2752
+ cacheWriteInput: 18.75
2738
2753
  },
2739
2754
  knowledgeCutoff: "2025-03",
2740
2755
  features: {
@@ -2757,7 +2772,8 @@ var init_anthropic_models = __esm({
2757
2772
  pricing: {
2758
2773
  input: 0.8,
2759
2774
  output: 4,
2760
- cachedInput: 0.08
2775
+ cachedInput: 0.08,
2776
+ cacheWriteInput: 1
2761
2777
  },
2762
2778
  knowledgeCutoff: "2024-07",
2763
2779
  features: {
@@ -2780,7 +2796,8 @@ var init_anthropic_models = __esm({
2780
2796
  pricing: {
2781
2797
  input: 0.25,
2782
2798
  output: 1.25,
2783
- cachedInput: 0.025
2799
+ cachedInput: 0.025,
2800
+ cacheWriteInput: 0.3125
2784
2801
  },
2785
2802
  knowledgeCutoff: "2023-08",
2786
2803
  features: {
@@ -2804,7 +2821,8 @@ var init_anthropic_models = __esm({
2804
2821
  pricing: {
2805
2822
  input: 1,
2806
2823
  output: 5,
2807
- cachedInput: 0.1
2824
+ cachedInput: 0.1,
2825
+ cacheWriteInput: 1.25
2808
2826
  },
2809
2827
  knowledgeCutoff: "2025-02",
2810
2828
  features: {
@@ -2828,7 +2846,8 @@ var init_anthropic_models = __esm({
2828
2846
  pricing: {
2829
2847
  input: 3,
2830
2848
  output: 15,
2831
- cachedInput: 0.3
2849
+ cachedInput: 0.3,
2850
+ cacheWriteInput: 3.75
2832
2851
  },
2833
2852
  knowledgeCutoff: "2025-01",
2834
2853
  features: {
@@ -2852,7 +2871,8 @@ var init_anthropic_models = __esm({
2852
2871
  pricing: {
2853
2872
  input: 5,
2854
2873
  output: 25,
2855
- cachedInput: 0.5
2874
+ cachedInput: 0.5,
2875
+ cacheWriteInput: 6.25
2856
2876
  },
2857
2877
  knowledgeCutoff: "2025-03",
2858
2878
  features: {
@@ -2967,15 +2987,27 @@ var init_anthropic = __esm({
2967
2987
  }
2968
2988
  buildRequestPayload(options, descriptor, spec, messages) {
2969
2989
  const systemMessages = messages.filter((message) => message.role === "system");
2970
- const system = systemMessages.length > 0 ? systemMessages.map((m) => m.content).join("\n\n") : void 0;
2971
- const conversation = messages.filter(
2990
+ const system = systemMessages.length > 0 ? systemMessages.map((m, index) => ({
2991
+ type: "text",
2992
+ text: m.content,
2993
+ // Add cache_control to the LAST system message block
2994
+ ...index === systemMessages.length - 1 ? { cache_control: { type: "ephemeral" } } : {}
2995
+ })) : void 0;
2996
+ const nonSystemMessages = messages.filter(
2972
2997
  (message) => message.role !== "system"
2973
- ).map((message) => ({
2998
+ );
2999
+ const lastUserIndex = nonSystemMessages.reduce(
3000
+ (lastIdx, msg, idx) => msg.role === "user" ? idx : lastIdx,
3001
+ -1
3002
+ );
3003
+ const conversation = nonSystemMessages.map((message, index) => ({
2974
3004
  role: message.role,
2975
3005
  content: [
2976
3006
  {
2977
3007
  type: "text",
2978
- text: message.content
3008
+ text: message.content,
3009
+ // Add cache_control to the LAST user message
3010
+ ...message.role === "user" && index === lastUserIndex ? { cache_control: { type: "ephemeral" } } : {}
2979
3011
  }
2980
3012
  ]
2981
3013
  }));
@@ -3001,15 +3033,22 @@ var init_anthropic = __esm({
3001
3033
  async *wrapStream(iterable) {
3002
3034
  const stream2 = iterable;
3003
3035
  let inputTokens = 0;
3036
+ let cachedInputTokens = 0;
3037
+ let cacheCreationInputTokens = 0;
3004
3038
  for await (const event of stream2) {
3005
3039
  if (event.type === "message_start") {
3006
- inputTokens = event.message.usage.input_tokens;
3040
+ const usage = event.message.usage;
3041
+ cachedInputTokens = usage.cache_read_input_tokens ?? 0;
3042
+ cacheCreationInputTokens = usage.cache_creation_input_tokens ?? 0;
3043
+ inputTokens = usage.input_tokens + cachedInputTokens + cacheCreationInputTokens;
3007
3044
  yield {
3008
3045
  text: "",
3009
3046
  usage: {
3010
3047
  inputTokens,
3011
3048
  outputTokens: 0,
3012
- totalTokens: inputTokens
3049
+ totalTokens: inputTokens,
3050
+ cachedInputTokens,
3051
+ cacheCreationInputTokens
3013
3052
  },
3014
3053
  rawEvent: event
3015
3054
  };
@@ -3023,7 +3062,9 @@ var init_anthropic = __esm({
3023
3062
  const usage = event.usage ? {
3024
3063
  inputTokens,
3025
3064
  outputTokens: event.usage.output_tokens,
3026
- totalTokens: inputTokens + event.usage.output_tokens
3065
+ totalTokens: inputTokens + event.usage.output_tokens,
3066
+ cachedInputTokens,
3067
+ cacheCreationInputTokens
3027
3068
  } : void 0;
3028
3069
  if (event.delta.stop_reason || usage) {
3029
3070
  yield {
@@ -3104,6 +3145,7 @@ var init_gemini_models = __esm({
3104
3145
  "src/providers/gemini-models.ts"() {
3105
3146
  "use strict";
3106
3147
  GEMINI_MODELS = [
3148
+ // Gemini 3 Pro (Preview)
3107
3149
  {
3108
3150
  provider: "gemini",
3109
3151
  modelId: "gemini-3-pro-preview",
@@ -3112,8 +3154,11 @@ var init_gemini_models = __esm({
3112
3154
  maxOutputTokens: 65536,
3113
3155
  pricing: {
3114
3156
  input: 2,
3157
+ // $2.00 for prompts <= 200k, $4.00 for > 200k (using lower tier)
3115
3158
  output: 12,
3159
+ // $12.00 for prompts <= 200k, $18.00 for > 200k
3116
3160
  cachedInput: 0.2
3161
+ // $0.20 for prompts <= 200k
3117
3162
  },
3118
3163
  knowledgeCutoff: "2025-01",
3119
3164
  features: {
@@ -3126,9 +3171,10 @@ var init_gemini_models = __esm({
3126
3171
  metadata: {
3127
3172
  family: "Gemini 3",
3128
3173
  releaseDate: "2025-11-18",
3129
- notes: "Most advanced model. 1501 Elo LMArena, 91.9% GPQA Diamond, 76.2% SWE-bench. Deep Think mode available."
3174
+ notes: "Best model for multimodal understanding, agentic and vibe-coding. Deep Think mode available."
3130
3175
  }
3131
3176
  },
3177
+ // Gemini 2.5 Pro
3132
3178
  {
3133
3179
  provider: "gemini",
3134
3180
  modelId: "gemini-2.5-pro",
@@ -3137,8 +3183,11 @@ var init_gemini_models = __esm({
3137
3183
  maxOutputTokens: 65536,
3138
3184
  pricing: {
3139
3185
  input: 1.25,
3186
+ // $1.25 for prompts <= 200k, $2.50 for > 200k
3140
3187
  output: 10,
3188
+ // $10.00 for prompts <= 200k, $15.00 for > 200k
3141
3189
  cachedInput: 0.125
3190
+ // $0.125 for prompts <= 200k
3142
3191
  },
3143
3192
  knowledgeCutoff: "2025-01",
3144
3193
  features: {
@@ -3151,9 +3200,10 @@ var init_gemini_models = __esm({
3151
3200
  metadata: {
3152
3201
  family: "Gemini 2.5",
3153
3202
  releaseDate: "2025-06",
3154
- notes: "Balanced multimodal model with 1M context. Best for complex agents and reasoning."
3203
+ notes: "State-of-the-art multipurpose model. Excels at coding and complex reasoning."
3155
3204
  }
3156
3205
  },
3206
+ // Gemini 2.5 Flash
3157
3207
  {
3158
3208
  provider: "gemini",
3159
3209
  modelId: "gemini-2.5-flash",
@@ -3162,8 +3212,10 @@ var init_gemini_models = __esm({
3162
3212
  maxOutputTokens: 65536,
3163
3213
  pricing: {
3164
3214
  input: 0.3,
3215
+ // $0.30 for text/image/video, $1.00 for audio
3165
3216
  output: 2.5,
3166
3217
  cachedInput: 0.03
3218
+ // $0.03 for text/image/video
3167
3219
  },
3168
3220
  knowledgeCutoff: "2025-01",
3169
3221
  features: {
@@ -3176,9 +3228,10 @@ var init_gemini_models = __esm({
3176
3228
  metadata: {
3177
3229
  family: "Gemini 2.5",
3178
3230
  releaseDate: "2025-06",
3179
- notes: "Best price-performance ratio with thinking enabled by default"
3231
+ notes: "First hybrid reasoning model with 1M context and thinking budgets."
3180
3232
  }
3181
3233
  },
3234
+ // Gemini 2.5 Flash-Lite
3182
3235
  {
3183
3236
  provider: "gemini",
3184
3237
  modelId: "gemini-2.5-flash-lite",
@@ -3187,8 +3240,10 @@ var init_gemini_models = __esm({
3187
3240
  maxOutputTokens: 65536,
3188
3241
  pricing: {
3189
3242
  input: 0.1,
3243
+ // $0.10 for text/image/video, $0.30 for audio
3190
3244
  output: 0.4,
3191
3245
  cachedInput: 0.01
3246
+ // $0.01 for text/image/video
3192
3247
  },
3193
3248
  knowledgeCutoff: "2025-01",
3194
3249
  features: {
@@ -3200,9 +3255,10 @@ var init_gemini_models = __esm({
3200
3255
  metadata: {
3201
3256
  family: "Gemini 2.5",
3202
3257
  releaseDate: "2025-06",
3203
- notes: "Fastest and most cost-efficient model for high-volume, low-latency tasks"
3258
+ notes: "Smallest and most cost effective model, built for at scale usage."
3204
3259
  }
3205
3260
  },
3261
+ // Gemini 2.0 Flash
3206
3262
  {
3207
3263
  provider: "gemini",
3208
3264
  modelId: "gemini-2.0-flash",
@@ -3211,8 +3267,10 @@ var init_gemini_models = __esm({
3211
3267
  maxOutputTokens: 8192,
3212
3268
  pricing: {
3213
3269
  input: 0.1,
3270
+ // $0.10 for text/image/video, $0.70 for audio
3214
3271
  output: 0.4,
3215
- cachedInput: 0.01
3272
+ cachedInput: 0.025
3273
+ // $0.025 for text/image/video
3216
3274
  },
3217
3275
  knowledgeCutoff: "2024-08",
3218
3276
  features: {
@@ -3223,9 +3281,10 @@ var init_gemini_models = __esm({
3223
3281
  },
3224
3282
  metadata: {
3225
3283
  family: "Gemini 2.0",
3226
- notes: "Previous generation with 1M context and multimodal capabilities"
3284
+ notes: "Balanced multimodal model with 1M context, built for the era of Agents."
3227
3285
  }
3228
3286
  },
3287
+ // Gemini 2.0 Flash-Lite
3229
3288
  {
3230
3289
  provider: "gemini",
3231
3290
  modelId: "gemini-2.0-flash-lite",
@@ -3234,8 +3293,8 @@ var init_gemini_models = __esm({
3234
3293
  maxOutputTokens: 8192,
3235
3294
  pricing: {
3236
3295
  input: 0.075,
3237
- output: 0.3,
3238
- cachedInput: 75e-4
3296
+ output: 0.3
3297
+ // No context caching available for 2.0-flash-lite
3239
3298
  },
3240
3299
  knowledgeCutoff: "2024-08",
3241
3300
  features: {
@@ -3246,7 +3305,7 @@ var init_gemini_models = __esm({
3246
3305
  },
3247
3306
  metadata: {
3248
3307
  family: "Gemini 2.0",
3249
- notes: "Lightweight previous generation model for cost-sensitive applications"
3308
+ notes: "Smallest and most cost effective 2.0 model for at scale usage."
3250
3309
  }
3251
3310
  }
3252
3311
  ];
@@ -3416,7 +3475,9 @@ var init_gemini = __esm({
3416
3475
  return {
3417
3476
  inputTokens: usageMetadata.promptTokenCount ?? 0,
3418
3477
  outputTokens: usageMetadata.candidatesTokenCount ?? 0,
3419
- totalTokens: usageMetadata.totalTokenCount ?? 0
3478
+ totalTokens: usageMetadata.totalTokenCount ?? 0,
3479
+ // Gemini returns cached token count in cachedContentTokenCount
3480
+ cachedInputTokens: usageMetadata.cachedContentTokenCount ?? 0
3420
3481
  };
3421
3482
  }
3422
3483
  /**
@@ -3472,10 +3533,11 @@ var init_openai_models = __esm({
3472
3533
  "src/providers/openai-models.ts"() {
3473
3534
  "use strict";
3474
3535
  OPENAI_MODELS = [
3536
+ // GPT-5 Family
3475
3537
  {
3476
3538
  provider: "openai",
3477
3539
  modelId: "gpt-5.1",
3478
- displayName: "GPT-5.1 Instant",
3540
+ displayName: "GPT-5.1",
3479
3541
  contextWindow: 128e3,
3480
3542
  maxOutputTokens: 32768,
3481
3543
  pricing: {
@@ -3495,34 +3557,7 @@ var init_openai_models = __esm({
3495
3557
  metadata: {
3496
3558
  family: "GPT-5",
3497
3559
  releaseDate: "2025-11-12",
3498
- notes: "Warmer, more intelligent, better instruction following. 2-3x faster than GPT-5.",
3499
- supportsTemperature: false
3500
- }
3501
- },
3502
- {
3503
- provider: "openai",
3504
- modelId: "gpt-5.1-thinking",
3505
- displayName: "GPT-5.1 Thinking",
3506
- contextWindow: 196e3,
3507
- maxOutputTokens: 32768,
3508
- pricing: {
3509
- input: 1.25,
3510
- output: 10,
3511
- cachedInput: 0.125
3512
- },
3513
- knowledgeCutoff: "2024-09-30",
3514
- features: {
3515
- streaming: true,
3516
- functionCalling: true,
3517
- vision: true,
3518
- reasoning: true,
3519
- structuredOutputs: true,
3520
- fineTuning: true
3521
- },
3522
- metadata: {
3523
- family: "GPT-5",
3524
- releaseDate: "2025-11-12",
3525
- notes: "Advanced reasoning with thinking levels: Light, Standard, Extended, Heavy. Best for complex tasks.",
3560
+ notes: "Latest GPT-5 with improved instruction following. 2-3x faster than GPT-5.",
3526
3561
  supportsTemperature: false
3527
3562
  }
3528
3563
  },
@@ -3602,6 +3637,255 @@ var init_openai_models = __esm({
3602
3637
  notes: "Fastest, most cost-efficient version for well-defined tasks",
3603
3638
  supportsTemperature: false
3604
3639
  }
3640
+ },
3641
+ {
3642
+ provider: "openai",
3643
+ modelId: "gpt-5-pro",
3644
+ displayName: "GPT-5 Pro",
3645
+ contextWindow: 272e3,
3646
+ maxOutputTokens: 128e3,
3647
+ pricing: {
3648
+ input: 15,
3649
+ output: 120
3650
+ // No cached input pricing for gpt-5-pro
3651
+ },
3652
+ knowledgeCutoff: "2024-09-30",
3653
+ features: {
3654
+ streaming: true,
3655
+ functionCalling: true,
3656
+ vision: true,
3657
+ reasoning: true,
3658
+ structuredOutputs: true
3659
+ },
3660
+ metadata: {
3661
+ family: "GPT-5",
3662
+ notes: "Premium tier with enhanced capabilities. Does not support prompt caching.",
3663
+ supportsTemperature: false
3664
+ }
3665
+ },
3666
+ // GPT-4.1 Family
3667
+ {
3668
+ provider: "openai",
3669
+ modelId: "gpt-4.1",
3670
+ displayName: "GPT-4.1",
3671
+ contextWindow: 128e3,
3672
+ maxOutputTokens: 32768,
3673
+ pricing: {
3674
+ input: 2,
3675
+ output: 8,
3676
+ cachedInput: 0.5
3677
+ },
3678
+ knowledgeCutoff: "2024-04-01",
3679
+ features: {
3680
+ streaming: true,
3681
+ functionCalling: true,
3682
+ vision: true,
3683
+ structuredOutputs: true,
3684
+ fineTuning: true
3685
+ },
3686
+ metadata: {
3687
+ family: "GPT-4.1",
3688
+ notes: "Improved GPT-4 with better instruction following"
3689
+ }
3690
+ },
3691
+ {
3692
+ provider: "openai",
3693
+ modelId: "gpt-4.1-mini",
3694
+ displayName: "GPT-4.1 Mini",
3695
+ contextWindow: 128e3,
3696
+ maxOutputTokens: 32768,
3697
+ pricing: {
3698
+ input: 0.4,
3699
+ output: 1.6,
3700
+ cachedInput: 0.1
3701
+ },
3702
+ knowledgeCutoff: "2024-04-01",
3703
+ features: {
3704
+ streaming: true,
3705
+ functionCalling: true,
3706
+ vision: true,
3707
+ structuredOutputs: true,
3708
+ fineTuning: true
3709
+ },
3710
+ metadata: {
3711
+ family: "GPT-4.1",
3712
+ notes: "Cost-efficient GPT-4.1 variant"
3713
+ }
3714
+ },
3715
+ {
3716
+ provider: "openai",
3717
+ modelId: "gpt-4.1-nano",
3718
+ displayName: "GPT-4.1 Nano",
3719
+ contextWindow: 128e3,
3720
+ maxOutputTokens: 32768,
3721
+ pricing: {
3722
+ input: 0.1,
3723
+ output: 0.4,
3724
+ cachedInput: 0.025
3725
+ },
3726
+ knowledgeCutoff: "2024-04-01",
3727
+ features: {
3728
+ streaming: true,
3729
+ functionCalling: true,
3730
+ vision: true,
3731
+ structuredOutputs: true,
3732
+ fineTuning: true
3733
+ },
3734
+ metadata: {
3735
+ family: "GPT-4.1",
3736
+ notes: "Fastest GPT-4.1 variant for simple tasks"
3737
+ }
3738
+ },
3739
+ // GPT-4o Family
3740
+ {
3741
+ provider: "openai",
3742
+ modelId: "gpt-4o",
3743
+ displayName: "GPT-4o",
3744
+ contextWindow: 128e3,
3745
+ maxOutputTokens: 16384,
3746
+ pricing: {
3747
+ input: 2.5,
3748
+ output: 10,
3749
+ cachedInput: 1.25
3750
+ },
3751
+ knowledgeCutoff: "2024-04-01",
3752
+ features: {
3753
+ streaming: true,
3754
+ functionCalling: true,
3755
+ vision: true,
3756
+ structuredOutputs: true,
3757
+ fineTuning: true
3758
+ },
3759
+ metadata: {
3760
+ family: "GPT-4o",
3761
+ notes: "Multimodal model optimized for speed"
3762
+ }
3763
+ },
3764
+ {
3765
+ provider: "openai",
3766
+ modelId: "gpt-4o-mini",
3767
+ displayName: "GPT-4o Mini",
3768
+ contextWindow: 128e3,
3769
+ maxOutputTokens: 16384,
3770
+ pricing: {
3771
+ input: 0.15,
3772
+ output: 0.6,
3773
+ cachedInput: 0.075
3774
+ },
3775
+ knowledgeCutoff: "2024-04-01",
3776
+ features: {
3777
+ streaming: true,
3778
+ functionCalling: true,
3779
+ vision: true,
3780
+ structuredOutputs: true,
3781
+ fineTuning: true
3782
+ },
3783
+ metadata: {
3784
+ family: "GPT-4o",
3785
+ notes: "Fast and affordable multimodal model"
3786
+ }
3787
+ },
3788
+ // o-series (Reasoning models)
3789
+ {
3790
+ provider: "openai",
3791
+ modelId: "o1",
3792
+ displayName: "o1",
3793
+ contextWindow: 2e5,
3794
+ maxOutputTokens: 1e5,
3795
+ pricing: {
3796
+ input: 15,
3797
+ output: 60,
3798
+ cachedInput: 7.5
3799
+ },
3800
+ knowledgeCutoff: "2024-12-01",
3801
+ features: {
3802
+ streaming: true,
3803
+ functionCalling: true,
3804
+ vision: true,
3805
+ reasoning: true,
3806
+ structuredOutputs: true
3807
+ },
3808
+ metadata: {
3809
+ family: "o-series",
3810
+ notes: "Advanced reasoning model with chain-of-thought",
3811
+ supportsTemperature: false
3812
+ }
3813
+ },
3814
+ {
3815
+ provider: "openai",
3816
+ modelId: "o3",
3817
+ displayName: "o3",
3818
+ contextWindow: 2e5,
3819
+ maxOutputTokens: 1e5,
3820
+ pricing: {
3821
+ input: 2,
3822
+ output: 8,
3823
+ cachedInput: 0.5
3824
+ },
3825
+ knowledgeCutoff: "2025-01-01",
3826
+ features: {
3827
+ streaming: true,
3828
+ functionCalling: true,
3829
+ vision: true,
3830
+ reasoning: true,
3831
+ structuredOutputs: true
3832
+ },
3833
+ metadata: {
3834
+ family: "o-series",
3835
+ notes: "Next-gen reasoning model, more efficient than o1",
3836
+ supportsTemperature: false
3837
+ }
3838
+ },
3839
+ {
3840
+ provider: "openai",
3841
+ modelId: "o4-mini",
3842
+ displayName: "o4 Mini",
3843
+ contextWindow: 2e5,
3844
+ maxOutputTokens: 1e5,
3845
+ pricing: {
3846
+ input: 1.1,
3847
+ output: 4.4,
3848
+ cachedInput: 0.275
3849
+ },
3850
+ knowledgeCutoff: "2025-04-01",
3851
+ features: {
3852
+ streaming: true,
3853
+ functionCalling: true,
3854
+ vision: true,
3855
+ reasoning: true,
3856
+ structuredOutputs: true,
3857
+ fineTuning: true
3858
+ },
3859
+ metadata: {
3860
+ family: "o-series",
3861
+ notes: "Cost-efficient reasoning model",
3862
+ supportsTemperature: false
3863
+ }
3864
+ },
3865
+ {
3866
+ provider: "openai",
3867
+ modelId: "o3-mini",
3868
+ displayName: "o3 Mini",
3869
+ contextWindow: 2e5,
3870
+ maxOutputTokens: 1e5,
3871
+ pricing: {
3872
+ input: 1.1,
3873
+ output: 4.4,
3874
+ cachedInput: 0.55
3875
+ },
3876
+ knowledgeCutoff: "2025-01-01",
3877
+ features: {
3878
+ streaming: true,
3879
+ functionCalling: true,
3880
+ vision: true,
3881
+ reasoning: true,
3882
+ structuredOutputs: true
3883
+ },
3884
+ metadata: {
3885
+ family: "o-series",
3886
+ notes: "Compact reasoning model for cost-sensitive applications",
3887
+ supportsTemperature: false
3888
+ }
3605
3889
  }
3606
3890
  ];
3607
3891
  }
@@ -3682,7 +3966,8 @@ var init_openai = __esm({
3682
3966
  const usage = chunk.usage ? {
3683
3967
  inputTokens: chunk.usage.prompt_tokens,
3684
3968
  outputTokens: chunk.usage.completion_tokens,
3685
- totalTokens: chunk.usage.total_tokens
3969
+ totalTokens: chunk.usage.total_tokens,
3970
+ cachedInputTokens: chunk.usage.prompt_tokens_details?.cached_tokens ?? 0
3686
3971
  } : void 0;
3687
3972
  if (finishReason || usage) {
3688
3973
  yield { text: "", finishReason, usage, rawEvent: chunk };
@@ -3899,20 +4184,28 @@ var init_model_registry = __esm({
3899
4184
  /**
3900
4185
  * Estimate API cost for a given model and token usage
3901
4186
  * @param modelId - Full model identifier
3902
- * @param inputTokens - Number of input tokens
4187
+ * @param inputTokens - Number of input tokens (total, including cached and cache creation)
3903
4188
  * @param outputTokens - Number of output tokens
3904
- * @param useCachedInput - Whether to use cached input pricing (if supported by provider)
4189
+ * @param cachedInputTokens - Number of cached input tokens (subset of inputTokens)
4190
+ * @param cacheCreationInputTokens - Number of cache creation tokens (subset of inputTokens, Anthropic only)
3905
4191
  * @returns CostEstimate if model found, undefined otherwise
3906
4192
  */
3907
- estimateCost(modelId, inputTokens, outputTokens, useCachedInput = false) {
4193
+ estimateCost(modelId, inputTokens, outputTokens, cachedInputTokens = 0, cacheCreationInputTokens = 0) {
3908
4194
  const spec = this.getModelSpec(modelId);
3909
4195
  if (!spec) return void 0;
3910
- const inputRate = useCachedInput && spec.pricing.cachedInput !== void 0 ? spec.pricing.cachedInput : spec.pricing.input;
3911
- const inputCost = inputTokens / 1e6 * inputRate;
4196
+ const cachedRate = spec.pricing.cachedInput ?? spec.pricing.input;
4197
+ const cacheWriteRate = spec.pricing.cacheWriteInput ?? spec.pricing.input;
4198
+ const uncachedInputTokens = inputTokens - cachedInputTokens - cacheCreationInputTokens;
4199
+ const uncachedInputCost = uncachedInputTokens / 1e6 * spec.pricing.input;
4200
+ const cachedInputCost = cachedInputTokens / 1e6 * cachedRate;
4201
+ const cacheCreationCost = cacheCreationInputTokens / 1e6 * cacheWriteRate;
4202
+ const inputCost = uncachedInputCost + cachedInputCost + cacheCreationCost;
3912
4203
  const outputCost = outputTokens / 1e6 * spec.pricing.output;
3913
4204
  const totalCost = inputCost + outputCost;
3914
4205
  return {
3915
4206
  inputCost,
4207
+ cachedInputCost,
4208
+ cacheCreationCost,
3916
4209
  outputCost,
3917
4210
  totalCost,
3918
4211
  currency: "USD"
@@ -5474,4 +5767,4 @@ export {
5474
5767
  AgentBuilder,
5475
5768
  init_builder
5476
5769
  };
5477
- //# sourceMappingURL=chunk-ZFHFBEQ5.js.map
5770
+ //# sourceMappingURL=chunk-62M4TDAK.js.map