llmist 0.7.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.cjs CHANGED
@@ -865,7 +865,7 @@ function findSafeDelimiter(content) {
865
865
  }
866
866
  let counter = 1;
867
867
  while (counter < 1e3) {
868
- const delimiter = `HEREDOC_${counter}`;
868
+ const delimiter = `__GADGET_PARAM_${counter}__`;
869
869
  const regex = new RegExp(`^${delimiter}\\s*$`);
870
870
  const isUsed = lines.some((line) => regex.test(line));
871
871
  if (!isUsed) {
@@ -972,7 +972,16 @@ var init_gadget = __esm({
972
972
  yaml = __toESM(require("js-yaml"), 1);
973
973
  init_schema_to_json();
974
974
  init_schema_validator();
975
- HEREDOC_DELIMITERS = ["EOF", "END", "DOC", "CONTENT", "TEXT", "HEREDOC", "DATA", "BLOCK"];
975
+ HEREDOC_DELIMITERS = [
976
+ "__GADGET_PARAM_EOF__",
977
+ "__GADGET_PARAM_END__",
978
+ "__GADGET_PARAM_DOC__",
979
+ "__GADGET_PARAM_CONTENT__",
980
+ "__GADGET_PARAM_TEXT__",
981
+ "__GADGET_PARAM_HEREDOC__",
982
+ "__GADGET_PARAM_DATA__",
983
+ "__GADGET_PARAM_BLOCK__"
984
+ ];
976
985
  BaseGadget = class {
977
986
  /**
978
987
  * The name of the gadget. Used for identification when LLM calls it.
@@ -3050,7 +3059,8 @@ var init_anthropic_models = __esm({
3050
3059
  pricing: {
3051
3060
  input: 3,
3052
3061
  output: 15,
3053
- cachedInput: 0.3
3062
+ cachedInput: 0.3,
3063
+ cacheWriteInput: 3.75
3054
3064
  },
3055
3065
  knowledgeCutoff: "2025-01",
3056
3066
  features: {
@@ -3074,7 +3084,8 @@ var init_anthropic_models = __esm({
3074
3084
  pricing: {
3075
3085
  input: 1,
3076
3086
  output: 5,
3077
- cachedInput: 0.1
3087
+ cachedInput: 0.1,
3088
+ cacheWriteInput: 1.25
3078
3089
  },
3079
3090
  knowledgeCutoff: "2025-02",
3080
3091
  features: {
@@ -3098,7 +3109,8 @@ var init_anthropic_models = __esm({
3098
3109
  pricing: {
3099
3110
  input: 3,
3100
3111
  output: 15,
3101
- cachedInput: 0.3
3112
+ cachedInput: 0.3,
3113
+ cacheWriteInput: 3.75
3102
3114
  },
3103
3115
  knowledgeCutoff: "2025-03",
3104
3116
  features: {
@@ -3122,7 +3134,8 @@ var init_anthropic_models = __esm({
3122
3134
  pricing: {
3123
3135
  input: 3,
3124
3136
  output: 15,
3125
- cachedInput: 0.3
3137
+ cachedInput: 0.3,
3138
+ cacheWriteInput: 3.75
3126
3139
  },
3127
3140
  knowledgeCutoff: "2024-11",
3128
3141
  features: {
@@ -3146,7 +3159,8 @@ var init_anthropic_models = __esm({
3146
3159
  pricing: {
3147
3160
  input: 15,
3148
3161
  output: 75,
3149
- cachedInput: 1.5
3162
+ cachedInput: 1.5,
3163
+ cacheWriteInput: 18.75
3150
3164
  },
3151
3165
  knowledgeCutoff: "2025-01",
3152
3166
  features: {
@@ -3170,7 +3184,8 @@ var init_anthropic_models = __esm({
3170
3184
  pricing: {
3171
3185
  input: 15,
3172
3186
  output: 75,
3173
- cachedInput: 1.5
3187
+ cachedInput: 1.5,
3188
+ cacheWriteInput: 18.75
3174
3189
  },
3175
3190
  knowledgeCutoff: "2025-03",
3176
3191
  features: {
@@ -3193,7 +3208,8 @@ var init_anthropic_models = __esm({
3193
3208
  pricing: {
3194
3209
  input: 0.8,
3195
3210
  output: 4,
3196
- cachedInput: 0.08
3211
+ cachedInput: 0.08,
3212
+ cacheWriteInput: 1
3197
3213
  },
3198
3214
  knowledgeCutoff: "2024-07",
3199
3215
  features: {
@@ -3216,7 +3232,8 @@ var init_anthropic_models = __esm({
3216
3232
  pricing: {
3217
3233
  input: 0.25,
3218
3234
  output: 1.25,
3219
- cachedInput: 0.025
3235
+ cachedInput: 0.025,
3236
+ cacheWriteInput: 0.3125
3220
3237
  },
3221
3238
  knowledgeCutoff: "2023-08",
3222
3239
  features: {
@@ -3240,7 +3257,8 @@ var init_anthropic_models = __esm({
3240
3257
  pricing: {
3241
3258
  input: 1,
3242
3259
  output: 5,
3243
- cachedInput: 0.1
3260
+ cachedInput: 0.1,
3261
+ cacheWriteInput: 1.25
3244
3262
  },
3245
3263
  knowledgeCutoff: "2025-02",
3246
3264
  features: {
@@ -3264,7 +3282,8 @@ var init_anthropic_models = __esm({
3264
3282
  pricing: {
3265
3283
  input: 3,
3266
3284
  output: 15,
3267
- cachedInput: 0.3
3285
+ cachedInput: 0.3,
3286
+ cacheWriteInput: 3.75
3268
3287
  },
3269
3288
  knowledgeCutoff: "2025-01",
3270
3289
  features: {
@@ -3288,7 +3307,8 @@ var init_anthropic_models = __esm({
3288
3307
  pricing: {
3289
3308
  input: 5,
3290
3309
  output: 25,
3291
- cachedInput: 0.5
3310
+ cachedInput: 0.5,
3311
+ cacheWriteInput: 6.25
3292
3312
  },
3293
3313
  knowledgeCutoff: "2025-03",
3294
3314
  features: {
@@ -3403,15 +3423,27 @@ var init_anthropic = __esm({
3403
3423
  }
3404
3424
  buildRequestPayload(options, descriptor, spec, messages) {
3405
3425
  const systemMessages = messages.filter((message) => message.role === "system");
3406
- const system = systemMessages.length > 0 ? systemMessages.map((m) => m.content).join("\n\n") : void 0;
3407
- const conversation = messages.filter(
3426
+ const system = systemMessages.length > 0 ? systemMessages.map((m, index) => ({
3427
+ type: "text",
3428
+ text: m.content,
3429
+ // Add cache_control to the LAST system message block
3430
+ ...index === systemMessages.length - 1 ? { cache_control: { type: "ephemeral" } } : {}
3431
+ })) : void 0;
3432
+ const nonSystemMessages = messages.filter(
3408
3433
  (message) => message.role !== "system"
3409
- ).map((message) => ({
3434
+ );
3435
+ const lastUserIndex = nonSystemMessages.reduce(
3436
+ (lastIdx, msg, idx) => msg.role === "user" ? idx : lastIdx,
3437
+ -1
3438
+ );
3439
+ const conversation = nonSystemMessages.map((message, index) => ({
3410
3440
  role: message.role,
3411
3441
  content: [
3412
3442
  {
3413
3443
  type: "text",
3414
- text: message.content
3444
+ text: message.content,
3445
+ // Add cache_control to the LAST user message
3446
+ ...message.role === "user" && index === lastUserIndex ? { cache_control: { type: "ephemeral" } } : {}
3415
3447
  }
3416
3448
  ]
3417
3449
  }));
@@ -3437,15 +3469,22 @@ var init_anthropic = __esm({
3437
3469
  async *wrapStream(iterable) {
3438
3470
  const stream2 = iterable;
3439
3471
  let inputTokens = 0;
3472
+ let cachedInputTokens = 0;
3473
+ let cacheCreationInputTokens = 0;
3440
3474
  for await (const event of stream2) {
3441
3475
  if (event.type === "message_start") {
3442
- inputTokens = event.message.usage.input_tokens;
3476
+ const usage = event.message.usage;
3477
+ cachedInputTokens = usage.cache_read_input_tokens ?? 0;
3478
+ cacheCreationInputTokens = usage.cache_creation_input_tokens ?? 0;
3479
+ inputTokens = usage.input_tokens + cachedInputTokens + cacheCreationInputTokens;
3443
3480
  yield {
3444
3481
  text: "",
3445
3482
  usage: {
3446
3483
  inputTokens,
3447
3484
  outputTokens: 0,
3448
- totalTokens: inputTokens
3485
+ totalTokens: inputTokens,
3486
+ cachedInputTokens,
3487
+ cacheCreationInputTokens
3449
3488
  },
3450
3489
  rawEvent: event
3451
3490
  };
@@ -3459,7 +3498,9 @@ var init_anthropic = __esm({
3459
3498
  const usage = event.usage ? {
3460
3499
  inputTokens,
3461
3500
  outputTokens: event.usage.output_tokens,
3462
- totalTokens: inputTokens + event.usage.output_tokens
3501
+ totalTokens: inputTokens + event.usage.output_tokens,
3502
+ cachedInputTokens,
3503
+ cacheCreationInputTokens
3463
3504
  } : void 0;
3464
3505
  if (event.delta.stop_reason || usage) {
3465
3506
  yield {
@@ -3540,6 +3581,7 @@ var init_gemini_models = __esm({
3540
3581
  "src/providers/gemini-models.ts"() {
3541
3582
  "use strict";
3542
3583
  GEMINI_MODELS = [
3584
+ // Gemini 3 Pro (Preview)
3543
3585
  {
3544
3586
  provider: "gemini",
3545
3587
  modelId: "gemini-3-pro-preview",
@@ -3548,8 +3590,11 @@ var init_gemini_models = __esm({
3548
3590
  maxOutputTokens: 65536,
3549
3591
  pricing: {
3550
3592
  input: 2,
3593
+ // $2.00 for prompts <= 200k, $4.00 for > 200k (using lower tier)
3551
3594
  output: 12,
3595
+ // $12.00 for prompts <= 200k, $18.00 for > 200k
3552
3596
  cachedInput: 0.2
3597
+ // $0.20 for prompts <= 200k
3553
3598
  },
3554
3599
  knowledgeCutoff: "2025-01",
3555
3600
  features: {
@@ -3562,9 +3607,10 @@ var init_gemini_models = __esm({
3562
3607
  metadata: {
3563
3608
  family: "Gemini 3",
3564
3609
  releaseDate: "2025-11-18",
3565
- notes: "Most advanced model. 1501 Elo LMArena, 91.9% GPQA Diamond, 76.2% SWE-bench. Deep Think mode available."
3610
+ notes: "Best model for multimodal understanding, agentic and vibe-coding. Deep Think mode available."
3566
3611
  }
3567
3612
  },
3613
+ // Gemini 2.5 Pro
3568
3614
  {
3569
3615
  provider: "gemini",
3570
3616
  modelId: "gemini-2.5-pro",
@@ -3573,8 +3619,11 @@ var init_gemini_models = __esm({
3573
3619
  maxOutputTokens: 65536,
3574
3620
  pricing: {
3575
3621
  input: 1.25,
3622
+ // $1.25 for prompts <= 200k, $2.50 for > 200k
3576
3623
  output: 10,
3624
+ // $10.00 for prompts <= 200k, $15.00 for > 200k
3577
3625
  cachedInput: 0.125
3626
+ // $0.125 for prompts <= 200k
3578
3627
  },
3579
3628
  knowledgeCutoff: "2025-01",
3580
3629
  features: {
@@ -3587,9 +3636,10 @@ var init_gemini_models = __esm({
3587
3636
  metadata: {
3588
3637
  family: "Gemini 2.5",
3589
3638
  releaseDate: "2025-06",
3590
- notes: "Balanced multimodal model with 1M context. Best for complex agents and reasoning."
3639
+ notes: "State-of-the-art multipurpose model. Excels at coding and complex reasoning."
3591
3640
  }
3592
3641
  },
3642
+ // Gemini 2.5 Flash
3593
3643
  {
3594
3644
  provider: "gemini",
3595
3645
  modelId: "gemini-2.5-flash",
@@ -3598,8 +3648,10 @@ var init_gemini_models = __esm({
3598
3648
  maxOutputTokens: 65536,
3599
3649
  pricing: {
3600
3650
  input: 0.3,
3651
+ // $0.30 for text/image/video, $1.00 for audio
3601
3652
  output: 2.5,
3602
3653
  cachedInput: 0.03
3654
+ // $0.03 for text/image/video
3603
3655
  },
3604
3656
  knowledgeCutoff: "2025-01",
3605
3657
  features: {
@@ -3612,9 +3664,10 @@ var init_gemini_models = __esm({
3612
3664
  metadata: {
3613
3665
  family: "Gemini 2.5",
3614
3666
  releaseDate: "2025-06",
3615
- notes: "Best price-performance ratio with thinking enabled by default"
3667
+ notes: "First hybrid reasoning model with 1M context and thinking budgets."
3616
3668
  }
3617
3669
  },
3670
+ // Gemini 2.5 Flash-Lite
3618
3671
  {
3619
3672
  provider: "gemini",
3620
3673
  modelId: "gemini-2.5-flash-lite",
@@ -3623,8 +3676,10 @@ var init_gemini_models = __esm({
3623
3676
  maxOutputTokens: 65536,
3624
3677
  pricing: {
3625
3678
  input: 0.1,
3679
+ // $0.10 for text/image/video, $0.30 for audio
3626
3680
  output: 0.4,
3627
3681
  cachedInput: 0.01
3682
+ // $0.01 for text/image/video
3628
3683
  },
3629
3684
  knowledgeCutoff: "2025-01",
3630
3685
  features: {
@@ -3636,9 +3691,10 @@ var init_gemini_models = __esm({
3636
3691
  metadata: {
3637
3692
  family: "Gemini 2.5",
3638
3693
  releaseDate: "2025-06",
3639
- notes: "Fastest and most cost-efficient model for high-volume, low-latency tasks"
3694
+ notes: "Smallest and most cost effective model, built for at scale usage."
3640
3695
  }
3641
3696
  },
3697
+ // Gemini 2.0 Flash
3642
3698
  {
3643
3699
  provider: "gemini",
3644
3700
  modelId: "gemini-2.0-flash",
@@ -3647,8 +3703,10 @@ var init_gemini_models = __esm({
3647
3703
  maxOutputTokens: 8192,
3648
3704
  pricing: {
3649
3705
  input: 0.1,
3706
+ // $0.10 for text/image/video, $0.70 for audio
3650
3707
  output: 0.4,
3651
- cachedInput: 0.01
3708
+ cachedInput: 0.025
3709
+ // $0.025 for text/image/video
3652
3710
  },
3653
3711
  knowledgeCutoff: "2024-08",
3654
3712
  features: {
@@ -3659,9 +3717,10 @@ var init_gemini_models = __esm({
3659
3717
  },
3660
3718
  metadata: {
3661
3719
  family: "Gemini 2.0",
3662
- notes: "Previous generation with 1M context and multimodal capabilities"
3720
+ notes: "Balanced multimodal model with 1M context, built for the era of Agents."
3663
3721
  }
3664
3722
  },
3723
+ // Gemini 2.0 Flash-Lite
3665
3724
  {
3666
3725
  provider: "gemini",
3667
3726
  modelId: "gemini-2.0-flash-lite",
@@ -3670,8 +3729,8 @@ var init_gemini_models = __esm({
3670
3729
  maxOutputTokens: 8192,
3671
3730
  pricing: {
3672
3731
  input: 0.075,
3673
- output: 0.3,
3674
- cachedInput: 75e-4
3732
+ output: 0.3
3733
+ // No context caching available for 2.0-flash-lite
3675
3734
  },
3676
3735
  knowledgeCutoff: "2024-08",
3677
3736
  features: {
@@ -3682,7 +3741,7 @@ var init_gemini_models = __esm({
3682
3741
  },
3683
3742
  metadata: {
3684
3743
  family: "Gemini 2.0",
3685
- notes: "Lightweight previous generation model for cost-sensitive applications"
3744
+ notes: "Smallest and most cost effective 2.0 model for at scale usage."
3686
3745
  }
3687
3746
  }
3688
3747
  ];
@@ -3852,7 +3911,9 @@ var init_gemini = __esm({
3852
3911
  return {
3853
3912
  inputTokens: usageMetadata.promptTokenCount ?? 0,
3854
3913
  outputTokens: usageMetadata.candidatesTokenCount ?? 0,
3855
- totalTokens: usageMetadata.totalTokenCount ?? 0
3914
+ totalTokens: usageMetadata.totalTokenCount ?? 0,
3915
+ // Gemini returns cached token count in cachedContentTokenCount
3916
+ cachedInputTokens: usageMetadata.cachedContentTokenCount ?? 0
3856
3917
  };
3857
3918
  }
3858
3919
  /**
@@ -3908,10 +3969,11 @@ var init_openai_models = __esm({
3908
3969
  "src/providers/openai-models.ts"() {
3909
3970
  "use strict";
3910
3971
  OPENAI_MODELS = [
3972
+ // GPT-5 Family
3911
3973
  {
3912
3974
  provider: "openai",
3913
3975
  modelId: "gpt-5.1",
3914
- displayName: "GPT-5.1 Instant",
3976
+ displayName: "GPT-5.1",
3915
3977
  contextWindow: 128e3,
3916
3978
  maxOutputTokens: 32768,
3917
3979
  pricing: {
@@ -3931,34 +3993,7 @@ var init_openai_models = __esm({
3931
3993
  metadata: {
3932
3994
  family: "GPT-5",
3933
3995
  releaseDate: "2025-11-12",
3934
- notes: "Warmer, more intelligent, better instruction following. 2-3x faster than GPT-5.",
3935
- supportsTemperature: false
3936
- }
3937
- },
3938
- {
3939
- provider: "openai",
3940
- modelId: "gpt-5.1-thinking",
3941
- displayName: "GPT-5.1 Thinking",
3942
- contextWindow: 196e3,
3943
- maxOutputTokens: 32768,
3944
- pricing: {
3945
- input: 1.25,
3946
- output: 10,
3947
- cachedInput: 0.125
3948
- },
3949
- knowledgeCutoff: "2024-09-30",
3950
- features: {
3951
- streaming: true,
3952
- functionCalling: true,
3953
- vision: true,
3954
- reasoning: true,
3955
- structuredOutputs: true,
3956
- fineTuning: true
3957
- },
3958
- metadata: {
3959
- family: "GPT-5",
3960
- releaseDate: "2025-11-12",
3961
- notes: "Advanced reasoning with thinking levels: Light, Standard, Extended, Heavy. Best for complex tasks.",
3996
+ notes: "Latest GPT-5 with improved instruction following. 2-3x faster than GPT-5.",
3962
3997
  supportsTemperature: false
3963
3998
  }
3964
3999
  },
@@ -4038,6 +4073,255 @@ var init_openai_models = __esm({
4038
4073
  notes: "Fastest, most cost-efficient version for well-defined tasks",
4039
4074
  supportsTemperature: false
4040
4075
  }
4076
+ },
4077
+ {
4078
+ provider: "openai",
4079
+ modelId: "gpt-5-pro",
4080
+ displayName: "GPT-5 Pro",
4081
+ contextWindow: 272e3,
4082
+ maxOutputTokens: 128e3,
4083
+ pricing: {
4084
+ input: 15,
4085
+ output: 120
4086
+ // No cached input pricing for gpt-5-pro
4087
+ },
4088
+ knowledgeCutoff: "2024-09-30",
4089
+ features: {
4090
+ streaming: true,
4091
+ functionCalling: true,
4092
+ vision: true,
4093
+ reasoning: true,
4094
+ structuredOutputs: true
4095
+ },
4096
+ metadata: {
4097
+ family: "GPT-5",
4098
+ notes: "Premium tier with enhanced capabilities. Does not support prompt caching.",
4099
+ supportsTemperature: false
4100
+ }
4101
+ },
4102
+ // GPT-4.1 Family
4103
+ {
4104
+ provider: "openai",
4105
+ modelId: "gpt-4.1",
4106
+ displayName: "GPT-4.1",
4107
+ contextWindow: 128e3,
4108
+ maxOutputTokens: 32768,
4109
+ pricing: {
4110
+ input: 2,
4111
+ output: 8,
4112
+ cachedInput: 0.5
4113
+ },
4114
+ knowledgeCutoff: "2024-04-01",
4115
+ features: {
4116
+ streaming: true,
4117
+ functionCalling: true,
4118
+ vision: true,
4119
+ structuredOutputs: true,
4120
+ fineTuning: true
4121
+ },
4122
+ metadata: {
4123
+ family: "GPT-4.1",
4124
+ notes: "Improved GPT-4 with better instruction following"
4125
+ }
4126
+ },
4127
+ {
4128
+ provider: "openai",
4129
+ modelId: "gpt-4.1-mini",
4130
+ displayName: "GPT-4.1 Mini",
4131
+ contextWindow: 128e3,
4132
+ maxOutputTokens: 32768,
4133
+ pricing: {
4134
+ input: 0.4,
4135
+ output: 1.6,
4136
+ cachedInput: 0.1
4137
+ },
4138
+ knowledgeCutoff: "2024-04-01",
4139
+ features: {
4140
+ streaming: true,
4141
+ functionCalling: true,
4142
+ vision: true,
4143
+ structuredOutputs: true,
4144
+ fineTuning: true
4145
+ },
4146
+ metadata: {
4147
+ family: "GPT-4.1",
4148
+ notes: "Cost-efficient GPT-4.1 variant"
4149
+ }
4150
+ },
4151
+ {
4152
+ provider: "openai",
4153
+ modelId: "gpt-4.1-nano",
4154
+ displayName: "GPT-4.1 Nano",
4155
+ contextWindow: 128e3,
4156
+ maxOutputTokens: 32768,
4157
+ pricing: {
4158
+ input: 0.1,
4159
+ output: 0.4,
4160
+ cachedInput: 0.025
4161
+ },
4162
+ knowledgeCutoff: "2024-04-01",
4163
+ features: {
4164
+ streaming: true,
4165
+ functionCalling: true,
4166
+ vision: true,
4167
+ structuredOutputs: true,
4168
+ fineTuning: true
4169
+ },
4170
+ metadata: {
4171
+ family: "GPT-4.1",
4172
+ notes: "Fastest GPT-4.1 variant for simple tasks"
4173
+ }
4174
+ },
4175
+ // GPT-4o Family
4176
+ {
4177
+ provider: "openai",
4178
+ modelId: "gpt-4o",
4179
+ displayName: "GPT-4o",
4180
+ contextWindow: 128e3,
4181
+ maxOutputTokens: 16384,
4182
+ pricing: {
4183
+ input: 2.5,
4184
+ output: 10,
4185
+ cachedInput: 1.25
4186
+ },
4187
+ knowledgeCutoff: "2024-04-01",
4188
+ features: {
4189
+ streaming: true,
4190
+ functionCalling: true,
4191
+ vision: true,
4192
+ structuredOutputs: true,
4193
+ fineTuning: true
4194
+ },
4195
+ metadata: {
4196
+ family: "GPT-4o",
4197
+ notes: "Multimodal model optimized for speed"
4198
+ }
4199
+ },
4200
+ {
4201
+ provider: "openai",
4202
+ modelId: "gpt-4o-mini",
4203
+ displayName: "GPT-4o Mini",
4204
+ contextWindow: 128e3,
4205
+ maxOutputTokens: 16384,
4206
+ pricing: {
4207
+ input: 0.15,
4208
+ output: 0.6,
4209
+ cachedInput: 0.075
4210
+ },
4211
+ knowledgeCutoff: "2024-04-01",
4212
+ features: {
4213
+ streaming: true,
4214
+ functionCalling: true,
4215
+ vision: true,
4216
+ structuredOutputs: true,
4217
+ fineTuning: true
4218
+ },
4219
+ metadata: {
4220
+ family: "GPT-4o",
4221
+ notes: "Fast and affordable multimodal model"
4222
+ }
4223
+ },
4224
+ // o-series (Reasoning models)
4225
+ {
4226
+ provider: "openai",
4227
+ modelId: "o1",
4228
+ displayName: "o1",
4229
+ contextWindow: 2e5,
4230
+ maxOutputTokens: 1e5,
4231
+ pricing: {
4232
+ input: 15,
4233
+ output: 60,
4234
+ cachedInput: 7.5
4235
+ },
4236
+ knowledgeCutoff: "2024-12-01",
4237
+ features: {
4238
+ streaming: true,
4239
+ functionCalling: true,
4240
+ vision: true,
4241
+ reasoning: true,
4242
+ structuredOutputs: true
4243
+ },
4244
+ metadata: {
4245
+ family: "o-series",
4246
+ notes: "Advanced reasoning model with chain-of-thought",
4247
+ supportsTemperature: false
4248
+ }
4249
+ },
4250
+ {
4251
+ provider: "openai",
4252
+ modelId: "o3",
4253
+ displayName: "o3",
4254
+ contextWindow: 2e5,
4255
+ maxOutputTokens: 1e5,
4256
+ pricing: {
4257
+ input: 2,
4258
+ output: 8,
4259
+ cachedInput: 0.5
4260
+ },
4261
+ knowledgeCutoff: "2025-01-01",
4262
+ features: {
4263
+ streaming: true,
4264
+ functionCalling: true,
4265
+ vision: true,
4266
+ reasoning: true,
4267
+ structuredOutputs: true
4268
+ },
4269
+ metadata: {
4270
+ family: "o-series",
4271
+ notes: "Next-gen reasoning model, more efficient than o1",
4272
+ supportsTemperature: false
4273
+ }
4274
+ },
4275
+ {
4276
+ provider: "openai",
4277
+ modelId: "o4-mini",
4278
+ displayName: "o4 Mini",
4279
+ contextWindow: 2e5,
4280
+ maxOutputTokens: 1e5,
4281
+ pricing: {
4282
+ input: 1.1,
4283
+ output: 4.4,
4284
+ cachedInput: 0.275
4285
+ },
4286
+ knowledgeCutoff: "2025-04-01",
4287
+ features: {
4288
+ streaming: true,
4289
+ functionCalling: true,
4290
+ vision: true,
4291
+ reasoning: true,
4292
+ structuredOutputs: true,
4293
+ fineTuning: true
4294
+ },
4295
+ metadata: {
4296
+ family: "o-series",
4297
+ notes: "Cost-efficient reasoning model",
4298
+ supportsTemperature: false
4299
+ }
4300
+ },
4301
+ {
4302
+ provider: "openai",
4303
+ modelId: "o3-mini",
4304
+ displayName: "o3 Mini",
4305
+ contextWindow: 2e5,
4306
+ maxOutputTokens: 1e5,
4307
+ pricing: {
4308
+ input: 1.1,
4309
+ output: 4.4,
4310
+ cachedInput: 0.55
4311
+ },
4312
+ knowledgeCutoff: "2025-01-01",
4313
+ features: {
4314
+ streaming: true,
4315
+ functionCalling: true,
4316
+ vision: true,
4317
+ reasoning: true,
4318
+ structuredOutputs: true
4319
+ },
4320
+ metadata: {
4321
+ family: "o-series",
4322
+ notes: "Compact reasoning model for cost-sensitive applications",
4323
+ supportsTemperature: false
4324
+ }
4041
4325
  }
4042
4326
  ];
4043
4327
  }
@@ -4118,7 +4402,8 @@ var init_openai = __esm({
4118
4402
  const usage = chunk.usage ? {
4119
4403
  inputTokens: chunk.usage.prompt_tokens,
4120
4404
  outputTokens: chunk.usage.completion_tokens,
4121
- totalTokens: chunk.usage.total_tokens
4405
+ totalTokens: chunk.usage.total_tokens,
4406
+ cachedInputTokens: chunk.usage.prompt_tokens_details?.cached_tokens ?? 0
4122
4407
  } : void 0;
4123
4408
  if (finishReason || usage) {
4124
4409
  yield { text: "", finishReason, usage, rawEvent: chunk };
@@ -4335,20 +4620,28 @@ var init_model_registry = __esm({
4335
4620
  /**
4336
4621
  * Estimate API cost for a given model and token usage
4337
4622
  * @param modelId - Full model identifier
4338
- * @param inputTokens - Number of input tokens
4623
+ * @param inputTokens - Number of input tokens (total, including cached and cache creation)
4339
4624
  * @param outputTokens - Number of output tokens
4340
- * @param useCachedInput - Whether to use cached input pricing (if supported by provider)
4625
+ * @param cachedInputTokens - Number of cached input tokens (subset of inputTokens)
4626
+ * @param cacheCreationInputTokens - Number of cache creation tokens (subset of inputTokens, Anthropic only)
4341
4627
  * @returns CostEstimate if model found, undefined otherwise
4342
4628
  */
4343
- estimateCost(modelId, inputTokens, outputTokens, useCachedInput = false) {
4629
+ estimateCost(modelId, inputTokens, outputTokens, cachedInputTokens = 0, cacheCreationInputTokens = 0) {
4344
4630
  const spec = this.getModelSpec(modelId);
4345
4631
  if (!spec) return void 0;
4346
- const inputRate = useCachedInput && spec.pricing.cachedInput !== void 0 ? spec.pricing.cachedInput : spec.pricing.input;
4347
- const inputCost = inputTokens / 1e6 * inputRate;
4632
+ const cachedRate = spec.pricing.cachedInput ?? spec.pricing.input;
4633
+ const cacheWriteRate = spec.pricing.cacheWriteInput ?? spec.pricing.input;
4634
+ const uncachedInputTokens = inputTokens - cachedInputTokens - cacheCreationInputTokens;
4635
+ const uncachedInputCost = uncachedInputTokens / 1e6 * spec.pricing.input;
4636
+ const cachedInputCost = cachedInputTokens / 1e6 * cachedRate;
4637
+ const cacheCreationCost = cacheCreationInputTokens / 1e6 * cacheWriteRate;
4638
+ const inputCost = uncachedInputCost + cachedInputCost + cacheCreationCost;
4348
4639
  const outputCost = outputTokens / 1e6 * spec.pricing.output;
4349
4640
  const totalCost = inputCost + outputCost;
4350
4641
  return {
4351
4642
  inputCost,
4643
+ cachedInputCost,
4644
+ cacheCreationCost,
4352
4645
  outputCost,
4353
4646
  totalCost,
4354
4647
  currency: "USD"
@@ -5395,7 +5688,8 @@ var OPTION_FLAGS = {
5395
5688
  logFile: "--log-file <path>",
5396
5689
  logReset: "--log-reset",
5397
5690
  noBuiltins: "--no-builtins",
5398
- noBuiltinInteraction: "--no-builtin-interaction"
5691
+ noBuiltinInteraction: "--no-builtin-interaction",
5692
+ quiet: "-q, --quiet"
5399
5693
  };
5400
5694
  var OPTION_DESCRIPTIONS = {
5401
5695
  model: "Model identifier, e.g. openai:gpt-5-nano or anthropic:claude-sonnet-4-5.",
@@ -5409,7 +5703,8 @@ var OPTION_DESCRIPTIONS = {
5409
5703
  logFile: "Path to log file. When set, logs are written to file instead of stderr.",
5410
5704
  logReset: "Reset (truncate) the log file at session start instead of appending.",
5411
5705
  noBuiltins: "Disable built-in gadgets (AskUser, TellUser).",
5412
- noBuiltinInteraction: "Disable interactive gadgets (AskUser) while keeping TellUser."
5706
+ noBuiltinInteraction: "Disable interactive gadgets (AskUser) while keeping TellUser.",
5707
+ quiet: "Suppress all output except content (text and TellUser messages)."
5413
5708
  };
5414
5709
  var SUMMARY_PREFIX = "[llmist]";
5415
5710
 
@@ -5419,7 +5714,7 @@ var import_commander3 = require("commander");
5419
5714
  // package.json
5420
5715
  var package_default = {
5421
5716
  name: "llmist",
5422
- version: "0.6.2",
5717
+ version: "0.7.0",
5423
5718
  description: "Universal TypeScript LLM client with streaming-first agent framework. Works with any model - no structured outputs or native tool calling required. Implements its own flexible grammar for function calling.",
5424
5719
  type: "module",
5425
5720
  main: "dist/index.cjs",
@@ -5503,6 +5798,7 @@ var package_default = {
5503
5798
  "@google/genai": "^1.27.0",
5504
5799
  chalk: "^5.6.2",
5505
5800
  commander: "^12.1.0",
5801
+ eta: "^4.4.1",
5506
5802
  "js-toml": "^1.0.2",
5507
5803
  "js-yaml": "^4.1.0",
5508
5804
  marked: "^15.0.12",
@@ -5622,12 +5918,19 @@ var import_node_path2 = __toESM(require("path"), 1);
5622
5918
  var import_node_url = require("url");
5623
5919
  init_gadget();
5624
5920
  var PATH_PREFIXES = [".", "/", "~"];
5921
+ function isGadgetLike(value) {
5922
+ if (typeof value !== "object" || value === null) {
5923
+ return false;
5924
+ }
5925
+ const obj = value;
5926
+ return typeof obj.execute === "function" && typeof obj.description === "string" && ("parameterSchema" in obj || "schema" in obj);
5927
+ }
5625
5928
  function isGadgetConstructor(value) {
5626
5929
  if (typeof value !== "function") {
5627
5930
  return false;
5628
5931
  }
5629
5932
  const prototype = value.prototype;
5630
- return Boolean(prototype) && prototype instanceof BaseGadget;
5933
+ return Boolean(prototype) && (prototype instanceof BaseGadget || isGadgetLike(prototype));
5631
5934
  }
5632
5935
  function expandHomePath(input) {
5633
5936
  if (!input.startsWith("~")) {
@@ -5664,7 +5967,7 @@ function extractGadgetsFromModule(moduleExports) {
5664
5967
  return;
5665
5968
  }
5666
5969
  visited.add(value);
5667
- if (value instanceof BaseGadget) {
5970
+ if (value instanceof BaseGadget || isGadgetLike(value)) {
5668
5971
  results.push(value);
5669
5972
  return;
5670
5973
  }
@@ -5789,8 +6092,14 @@ function renderSummary(metadata) {
5789
6092
  parts.push(import_chalk.default.magenta(metadata.model));
5790
6093
  }
5791
6094
  if (metadata.usage) {
5792
- const { inputTokens, outputTokens } = metadata.usage;
6095
+ const { inputTokens, outputTokens, cachedInputTokens, cacheCreationInputTokens } = metadata.usage;
5793
6096
  parts.push(import_chalk.default.dim("\u2191") + import_chalk.default.yellow(` ${formatTokens(inputTokens)}`));
6097
+ if (cachedInputTokens && cachedInputTokens > 0) {
6098
+ parts.push(import_chalk.default.dim("\u27F3") + import_chalk.default.blue(` ${formatTokens(cachedInputTokens)}`));
6099
+ }
6100
+ if (cacheCreationInputTokens && cacheCreationInputTokens > 0) {
6101
+ parts.push(import_chalk.default.dim("\u270E") + import_chalk.default.magenta(` ${formatTokens(cacheCreationInputTokens)}`));
6102
+ }
5794
6103
  parts.push(import_chalk.default.dim("\u2193") + import_chalk.default.green(` ${formatTokens(outputTokens)}`));
5795
6104
  }
5796
6105
  if (metadata.elapsedSeconds !== void 0 && metadata.elapsedSeconds > 0) {
@@ -5959,6 +6268,9 @@ var StreamProgress = class {
5959
6268
  callOutputTokensEstimated = true;
5960
6269
  callOutputChars = 0;
5961
6270
  isStreaming = false;
6271
+ // Cache token tracking for live cost estimation during streaming
6272
+ callCachedInputTokens = 0;
6273
+ callCacheCreationInputTokens = 0;
5962
6274
  // Cumulative stats (cumulative mode)
5963
6275
  totalStartTime = Date.now();
5964
6276
  totalTokens = 0;
@@ -5984,11 +6296,13 @@ var StreamProgress = class {
5984
6296
  this.callOutputTokensEstimated = true;
5985
6297
  this.callOutputChars = 0;
5986
6298
  this.isStreaming = false;
6299
+ this.callCachedInputTokens = 0;
6300
+ this.callCacheCreationInputTokens = 0;
5987
6301
  this.start();
5988
6302
  }
5989
6303
  /**
5990
6304
  * Ends the current LLM call. Updates cumulative stats and switches to cumulative mode.
5991
- * @param usage - Final token usage from the call
6305
+ * @param usage - Final token usage from the call (including cached tokens if available)
5992
6306
  */
5993
6307
  endCall(usage) {
5994
6308
  this.iterations++;
@@ -6000,7 +6314,9 @@ var StreamProgress = class {
6000
6314
  const cost = this.modelRegistry.estimateCost(
6001
6315
  modelName,
6002
6316
  usage.inputTokens,
6003
- usage.outputTokens
6317
+ usage.outputTokens,
6318
+ usage.cachedInputTokens ?? 0,
6319
+ usage.cacheCreationInputTokens ?? 0
6004
6320
  );
6005
6321
  if (cost) {
6006
6322
  this.totalCost += cost.totalCost;
@@ -6040,6 +6356,16 @@ var StreamProgress = class {
6040
6356
  this.callOutputTokens = tokens;
6041
6357
  this.callOutputTokensEstimated = estimated;
6042
6358
  }
6359
+ /**
6360
+ * Sets cached token counts for the current call (from stream metadata).
6361
+ * Used for live cost estimation during streaming.
6362
+ * @param cachedInputTokens - Number of tokens read from cache (cheaper)
6363
+ * @param cacheCreationInputTokens - Number of tokens written to cache (more expensive)
6364
+ */
6365
+ setCachedTokens(cachedInputTokens, cacheCreationInputTokens) {
6366
+ this.callCachedInputTokens = cachedInputTokens;
6367
+ this.callCacheCreationInputTokens = cacheCreationInputTokens;
6368
+ }
6043
6369
  /**
6044
6370
  * Get total elapsed time in seconds since the first call started.
6045
6371
  * @returns Elapsed time in seconds with 1 decimal place
@@ -6104,11 +6430,32 @@ var StreamProgress = class {
6104
6430
  parts.push(import_chalk2.default.dim("\u2193") + import_chalk2.default.green(` ${prefix}${formatTokens(outTokens)}`));
6105
6431
  }
6106
6432
  parts.push(import_chalk2.default.dim(`${elapsed}s`));
6107
- if (this.totalCost > 0) {
6108
- parts.push(import_chalk2.default.cyan(`$${formatCost(this.totalCost)}`));
6433
+ const callCost = this.calculateCurrentCallCost(outTokens);
6434
+ if (callCost > 0) {
6435
+ parts.push(import_chalk2.default.cyan(`$${formatCost(callCost)}`));
6109
6436
  }
6110
6437
  this.target.write(`\r${parts.join(import_chalk2.default.dim(" | "))} ${import_chalk2.default.cyan(spinner)}`);
6111
6438
  }
6439
+ /**
6440
+ * Calculates live cost estimate for the current streaming call.
6441
+ * Uses current input/output tokens and cached token counts.
6442
+ */
6443
+ calculateCurrentCallCost(outputTokens) {
6444
+ if (!this.modelRegistry || !this.model) return 0;
6445
+ try {
6446
+ const modelName = this.model.includes(":") ? this.model.split(":")[1] : this.model;
6447
+ const cost = this.modelRegistry.estimateCost(
6448
+ modelName,
6449
+ this.callInputTokens,
6450
+ outputTokens,
6451
+ this.callCachedInputTokens,
6452
+ this.callCacheCreationInputTokens
6453
+ );
6454
+ return cost?.totalCost ?? 0;
6455
+ } catch {
6456
+ return 0;
6457
+ }
6458
+ }
6112
6459
  renderCumulativeMode(spinner) {
6113
6460
  const elapsed = ((Date.now() - this.totalStartTime) / 1e3).toFixed(1);
6114
6461
  const parts = [];
@@ -6257,7 +6604,7 @@ function addCompleteOptions(cmd, defaults) {
6257
6604
  OPTION_DESCRIPTIONS.maxTokens,
6258
6605
  createNumericParser({ label: "Max tokens", integer: true, min: 1 }),
6259
6606
  defaults?.["max-tokens"]
6260
- );
6607
+ ).option(OPTION_FLAGS.quiet, OPTION_DESCRIPTIONS.quiet, defaults?.quiet);
6261
6608
  }
6262
6609
  function addAgentOptions(cmd, defaults) {
6263
6610
  const gadgetAccumulator = (value, previous = []) => [
@@ -6286,7 +6633,7 @@ function addAgentOptions(cmd, defaults) {
6286
6633
  OPTION_FLAGS.noBuiltinInteraction,
6287
6634
  OPTION_DESCRIPTIONS.noBuiltinInteraction,
6288
6635
  defaults?.["builtin-interaction"] !== false
6289
- );
6636
+ ).option(OPTION_FLAGS.quiet, OPTION_DESCRIPTIONS.quiet, defaults?.quiet);
6290
6637
  }
6291
6638
  function configToCompleteOptions(config) {
6292
6639
  const result = {};
@@ -6294,6 +6641,7 @@ function configToCompleteOptions(config) {
6294
6641
  if (config.system !== void 0) result.system = config.system;
6295
6642
  if (config.temperature !== void 0) result.temperature = config.temperature;
6296
6643
  if (config["max-tokens"] !== void 0) result.maxTokens = config["max-tokens"];
6644
+ if (config.quiet !== void 0) result.quiet = config.quiet;
6297
6645
  return result;
6298
6646
  }
6299
6647
  function configToAgentOptions(config) {
@@ -6311,6 +6659,7 @@ function configToAgentOptions(config) {
6311
6659
  result.gadgetStartPrefix = config["gadget-start-prefix"];
6312
6660
  if (config["gadget-end-prefix"] !== void 0)
6313
6661
  result.gadgetEndPrefix = config["gadget-end-prefix"];
6662
+ if (config.quiet !== void 0) result.quiet = config.quiet;
6314
6663
  return result;
6315
6664
  }
6316
6665
 
@@ -6356,9 +6705,10 @@ async function executeAgent(promptArg, options, env) {
6356
6705
  const prompt = await resolvePrompt(promptArg, env);
6357
6706
  const client = env.createClient();
6358
6707
  const registry = new GadgetRegistry();
6708
+ const stdinIsInteractive = isInteractive(env.stdin);
6359
6709
  if (options.builtins !== false) {
6360
6710
  for (const gadget of builtinGadgets) {
6361
- if (options.builtinInteraction === false && gadget.name === "AskUser") {
6711
+ if (gadget.name === "AskUser" && (options.builtinInteraction === false || !stdinIsInteractive)) {
6362
6712
  continue;
6363
6713
  }
6364
6714
  registry.registerByClass(gadget);
@@ -6416,6 +6766,10 @@ async function executeAgent(promptArg, options, env) {
6416
6766
  if (context.usage.outputTokens) {
6417
6767
  progress.setOutputTokens(context.usage.outputTokens, false);
6418
6768
  }
6769
+ progress.setCachedTokens(
6770
+ context.usage.cachedInputTokens ?? 0,
6771
+ context.usage.cacheCreationInputTokens ?? 0
6772
+ );
6419
6773
  }
6420
6774
  },
6421
6775
  // onLLMCallComplete: Finalize metrics after each LLM call
@@ -6434,11 +6788,13 @@ async function executeAgent(promptArg, options, env) {
6434
6788
  let callCost;
6435
6789
  if (context.usage && client.modelRegistry) {
6436
6790
  try {
6437
- const modelName = options.model.includes(":") ? options.model.split(":")[1] : options.model;
6791
+ const modelName = context.options.model.includes(":") ? context.options.model.split(":")[1] : context.options.model;
6438
6792
  const costResult = client.modelRegistry.estimateCost(
6439
6793
  modelName,
6440
6794
  context.usage.inputTokens,
6441
- context.usage.outputTokens
6795
+ context.usage.outputTokens,
6796
+ context.usage.cachedInputTokens ?? 0,
6797
+ context.usage.cacheCreationInputTokens ?? 0
6442
6798
  );
6443
6799
  if (costResult) callCost = costResult.totalCost;
6444
6800
  } catch {
@@ -6446,7 +6802,7 @@ async function executeAgent(promptArg, options, env) {
6446
6802
  }
6447
6803
  const callElapsed = progress.getCallElapsedSeconds();
6448
6804
  progress.endCall(context.usage);
6449
- if (stderrTTY) {
6805
+ if (!options.quiet) {
6450
6806
  const summary = renderSummary({
6451
6807
  iterations: context.iteration + 1,
6452
6808
  model: options.model,
@@ -6553,7 +6909,14 @@ Command rejected by user with message: "${response}"`
6553
6909
  printer.write(event.content);
6554
6910
  } else if (event.type === "gadget_result") {
6555
6911
  progress.pause();
6556
- if (stderrTTY) {
6912
+ if (options.quiet) {
6913
+ if (event.result.gadgetName === "TellUser" && event.result.parameters?.message) {
6914
+ const message = String(event.result.parameters.message);
6915
+ const rendered = renderMarkdown(message);
6916
+ env.stdout.write(`${rendered}
6917
+ `);
6918
+ }
6919
+ } else {
6557
6920
  const tokenCount = await countGadgetOutputTokens(event.result.result);
6558
6921
  env.stderr.write(`${formatGadgetSummary({ ...event.result, tokenCount })}
6559
6922
  `);
@@ -6562,7 +6925,7 @@ Command rejected by user with message: "${response}"`
6562
6925
  }
6563
6926
  progress.complete();
6564
6927
  printer.ensureNewline();
6565
- if (stderrTTY && iterations > 1) {
6928
+ if (!options.quiet && iterations > 1) {
6566
6929
  env.stderr.write(`${import_chalk3.default.dim("\u2500".repeat(40))}
6567
6930
  `);
6568
6931
  const summary = renderOverallSummary({
@@ -6635,7 +6998,7 @@ async function executeComplete(promptArg, options, env) {
6635
6998
  progress.endCall(usage);
6636
6999
  progress.complete();
6637
7000
  printer.ensureNewline();
6638
- if (stderrTTY) {
7001
+ if (stderrTTY && !options.quiet) {
6639
7002
  const summary = renderSummary({ finishReason, usage, cost: progress.getTotalCost() });
6640
7003
  if (summary) {
6641
7004
  env.stderr.write(`${summary}
@@ -6656,9 +7019,102 @@ var import_node_fs3 = require("fs");
6656
7019
  var import_node_os = require("os");
6657
7020
  var import_node_path3 = require("path");
6658
7021
  var import_js_toml2 = require("js-toml");
7022
+
7023
+ // src/cli/templates.ts
7024
+ var import_eta = require("eta");
7025
+ var TemplateError = class extends Error {
7026
+ constructor(message, promptName, configPath) {
7027
+ super(promptName ? `[prompts.${promptName}]: ${message}` : message);
7028
+ this.promptName = promptName;
7029
+ this.configPath = configPath;
7030
+ this.name = "TemplateError";
7031
+ }
7032
+ };
7033
+ function createTemplateEngine(prompts, configPath) {
7034
+ const eta = new import_eta.Eta({
7035
+ views: "/",
7036
+ // Required but we use named templates
7037
+ autoEscape: false,
7038
+ // Don't escape - these are prompts, not HTML
7039
+ autoTrim: false
7040
+ // Preserve whitespace in prompts
7041
+ });
7042
+ for (const [name, template] of Object.entries(prompts)) {
7043
+ try {
7044
+ eta.loadTemplate(`@${name}`, template);
7045
+ } catch (error) {
7046
+ throw new TemplateError(
7047
+ error instanceof Error ? error.message : String(error),
7048
+ name,
7049
+ configPath
7050
+ );
7051
+ }
7052
+ }
7053
+ return eta;
7054
+ }
7055
+ function resolveTemplate(eta, template, context = {}, configPath) {
7056
+ try {
7057
+ const fullContext = {
7058
+ ...context,
7059
+ env: process.env
7060
+ };
7061
+ return eta.renderString(template, fullContext);
7062
+ } catch (error) {
7063
+ throw new TemplateError(
7064
+ error instanceof Error ? error.message : String(error),
7065
+ void 0,
7066
+ configPath
7067
+ );
7068
+ }
7069
+ }
7070
+ function validatePrompts(prompts, configPath) {
7071
+ const eta = createTemplateEngine(prompts, configPath);
7072
+ for (const [name, template] of Object.entries(prompts)) {
7073
+ try {
7074
+ eta.renderString(template, { env: {} });
7075
+ } catch (error) {
7076
+ throw new TemplateError(
7077
+ error instanceof Error ? error.message : String(error),
7078
+ name,
7079
+ configPath
7080
+ );
7081
+ }
7082
+ }
7083
+ }
7084
+ function validateEnvVars(template, promptName, configPath) {
7085
+ const envVarPattern = /<%=\s*it\.env\.(\w+)\s*%>/g;
7086
+ const matches = template.matchAll(envVarPattern);
7087
+ for (const match of matches) {
7088
+ const varName = match[1];
7089
+ if (process.env[varName] === void 0) {
7090
+ throw new TemplateError(
7091
+ `Environment variable '${varName}' is not set`,
7092
+ promptName,
7093
+ configPath
7094
+ );
7095
+ }
7096
+ }
7097
+ }
7098
+ function hasTemplateSyntax(str) {
7099
+ return str.includes("<%");
7100
+ }
7101
+
7102
+ // src/cli/config.ts
6659
7103
  var GLOBAL_CONFIG_KEYS = /* @__PURE__ */ new Set(["log-level", "log-file", "log-reset"]);
6660
7104
  var VALID_LOG_LEVELS = ["silly", "trace", "debug", "info", "warn", "error", "fatal"];
6661
- var COMPLETE_CONFIG_KEYS = /* @__PURE__ */ new Set(["model", "system", "temperature", "max-tokens"]);
7105
+ var COMPLETE_CONFIG_KEYS = /* @__PURE__ */ new Set([
7106
+ "model",
7107
+ "system",
7108
+ "temperature",
7109
+ "max-tokens",
7110
+ "quiet",
7111
+ "inherits",
7112
+ "log-level",
7113
+ "log-file",
7114
+ "log-reset",
7115
+ "type"
7116
+ // Allowed for inheritance compatibility, ignored for built-in commands
7117
+ ]);
6662
7118
  var AGENT_CONFIG_KEYS = /* @__PURE__ */ new Set([
6663
7119
  "model",
6664
7120
  "system",
@@ -6669,16 +7125,20 @@ var AGENT_CONFIG_KEYS = /* @__PURE__ */ new Set([
6669
7125
  "builtins",
6670
7126
  "builtin-interaction",
6671
7127
  "gadget-start-prefix",
6672
- "gadget-end-prefix"
7128
+ "gadget-end-prefix",
7129
+ "quiet",
7130
+ "inherits",
7131
+ "log-level",
7132
+ "log-file",
7133
+ "log-reset",
7134
+ "type"
7135
+ // Allowed for inheritance compatibility, ignored for built-in commands
6673
7136
  ]);
6674
7137
  var CUSTOM_CONFIG_KEYS = /* @__PURE__ */ new Set([
6675
7138
  ...COMPLETE_CONFIG_KEYS,
6676
7139
  ...AGENT_CONFIG_KEYS,
6677
7140
  "type",
6678
- "description",
6679
- "log-level",
6680
- "log-file",
6681
- "log-reset"
7141
+ "description"
6682
7142
  ]);
6683
7143
  var VALID_PARAMETER_FORMATS = ["json", "yaml", "toml", "auto"];
6684
7144
  function getConfigPath() {
@@ -6729,6 +7189,39 @@ function validateStringArray(value, key, section) {
6729
7189
  }
6730
7190
  return value;
6731
7191
  }
7192
+ function validateInherits(value, section) {
7193
+ if (typeof value === "string") {
7194
+ return value;
7195
+ }
7196
+ if (Array.isArray(value)) {
7197
+ for (let i = 0; i < value.length; i++) {
7198
+ if (typeof value[i] !== "string") {
7199
+ throw new ConfigError(`[${section}].inherits[${i}] must be a string`);
7200
+ }
7201
+ }
7202
+ return value;
7203
+ }
7204
+ throw new ConfigError(`[${section}].inherits must be a string or array of strings`);
7205
+ }
7206
+ function validateLoggingConfig(raw, section) {
7207
+ const result = {};
7208
+ if ("log-level" in raw) {
7209
+ const level = validateString(raw["log-level"], "log-level", section);
7210
+ if (!VALID_LOG_LEVELS.includes(level)) {
7211
+ throw new ConfigError(
7212
+ `[${section}].log-level must be one of: ${VALID_LOG_LEVELS.join(", ")}`
7213
+ );
7214
+ }
7215
+ result["log-level"] = level;
7216
+ }
7217
+ if ("log-file" in raw) {
7218
+ result["log-file"] = validateString(raw["log-file"], "log-file", section);
7219
+ }
7220
+ if ("log-reset" in raw) {
7221
+ result["log-reset"] = validateBoolean(raw["log-reset"], "log-reset", section);
7222
+ }
7223
+ return result;
7224
+ }
6732
7225
  function validateBaseConfig(raw, section) {
6733
7226
  const result = {};
6734
7227
  if ("model" in raw) {
@@ -6743,6 +7236,9 @@ function validateBaseConfig(raw, section) {
6743
7236
  max: 2
6744
7237
  });
6745
7238
  }
7239
+ if ("inherits" in raw) {
7240
+ result.inherits = validateInherits(raw.inherits, section);
7241
+ }
6746
7242
  return result;
6747
7243
  }
6748
7244
  function validateGlobalConfig(raw, section) {
@@ -6755,23 +7251,7 @@ function validateGlobalConfig(raw, section) {
6755
7251
  throw new ConfigError(`[${section}].${key} is not a valid option`);
6756
7252
  }
6757
7253
  }
6758
- const result = {};
6759
- if ("log-level" in rawObj) {
6760
- const level = validateString(rawObj["log-level"], "log-level", section);
6761
- if (!VALID_LOG_LEVELS.includes(level)) {
6762
- throw new ConfigError(
6763
- `[${section}].log-level must be one of: ${VALID_LOG_LEVELS.join(", ")}`
6764
- );
6765
- }
6766
- result["log-level"] = level;
6767
- }
6768
- if ("log-file" in rawObj) {
6769
- result["log-file"] = validateString(rawObj["log-file"], "log-file", section);
6770
- }
6771
- if ("log-reset" in rawObj) {
6772
- result["log-reset"] = validateBoolean(rawObj["log-reset"], "log-reset", section);
6773
- }
6774
- return result;
7254
+ return validateLoggingConfig(rawObj, section);
6775
7255
  }
6776
7256
  function validateCompleteConfig(raw, section) {
6777
7257
  if (typeof raw !== "object" || raw === null) {
@@ -6783,13 +7263,19 @@ function validateCompleteConfig(raw, section) {
6783
7263
  throw new ConfigError(`[${section}].${key} is not a valid option`);
6784
7264
  }
6785
7265
  }
6786
- const result = { ...validateBaseConfig(rawObj, section) };
7266
+ const result = {
7267
+ ...validateBaseConfig(rawObj, section),
7268
+ ...validateLoggingConfig(rawObj, section)
7269
+ };
6787
7270
  if ("max-tokens" in rawObj) {
6788
7271
  result["max-tokens"] = validateNumber(rawObj["max-tokens"], "max-tokens", section, {
6789
7272
  integer: true,
6790
7273
  min: 1
6791
7274
  });
6792
7275
  }
7276
+ if ("quiet" in rawObj) {
7277
+ result.quiet = validateBoolean(rawObj.quiet, "quiet", section);
7278
+ }
6793
7279
  return result;
6794
7280
  }
6795
7281
  function validateAgentConfig(raw, section) {
@@ -6802,7 +7288,10 @@ function validateAgentConfig(raw, section) {
6802
7288
  throw new ConfigError(`[${section}].${key} is not a valid option`);
6803
7289
  }
6804
7290
  }
6805
- const result = { ...validateBaseConfig(rawObj, section) };
7291
+ const result = {
7292
+ ...validateBaseConfig(rawObj, section),
7293
+ ...validateLoggingConfig(rawObj, section)
7294
+ };
6806
7295
  if ("max-iterations" in rawObj) {
6807
7296
  result["max-iterations"] = validateNumber(rawObj["max-iterations"], "max-iterations", section, {
6808
7297
  integer: true,
@@ -6845,6 +7334,9 @@ function validateAgentConfig(raw, section) {
6845
7334
  section
6846
7335
  );
6847
7336
  }
7337
+ if ("quiet" in rawObj) {
7338
+ result.quiet = validateBoolean(rawObj.quiet, "quiet", section);
7339
+ }
6848
7340
  return result;
6849
7341
  }
6850
7342
  function validateCustomConfig(raw, section) {
@@ -6920,20 +7412,22 @@ function validateCustomConfig(raw, section) {
6920
7412
  min: 1
6921
7413
  });
6922
7414
  }
6923
- if ("log-level" in rawObj) {
6924
- const level = validateString(rawObj["log-level"], "log-level", section);
6925
- if (!VALID_LOG_LEVELS.includes(level)) {
6926
- throw new ConfigError(
6927
- `[${section}].log-level must be one of: ${VALID_LOG_LEVELS.join(", ")}`
6928
- );
6929
- }
6930
- result["log-level"] = level;
7415
+ if ("quiet" in rawObj) {
7416
+ result.quiet = validateBoolean(rawObj.quiet, "quiet", section);
6931
7417
  }
6932
- if ("log-file" in rawObj) {
6933
- result["log-file"] = validateString(rawObj["log-file"], "log-file", section);
7418
+ Object.assign(result, validateLoggingConfig(rawObj, section));
7419
+ return result;
7420
+ }
7421
+ function validatePromptsConfig(raw, section) {
7422
+ if (typeof raw !== "object" || raw === null) {
7423
+ throw new ConfigError(`[${section}] must be a table`);
6934
7424
  }
6935
- if ("log-reset" in rawObj) {
6936
- result["log-reset"] = validateBoolean(rawObj["log-reset"], "log-reset", section);
7425
+ const result = {};
7426
+ for (const [key, value] of Object.entries(raw)) {
7427
+ if (typeof value !== "string") {
7428
+ throw new ConfigError(`[${section}].${key} must be a string`);
7429
+ }
7430
+ result[key] = value;
6937
7431
  }
6938
7432
  return result;
6939
7433
  }
@@ -6951,6 +7445,8 @@ function validateConfig(raw, configPath) {
6951
7445
  result.complete = validateCompleteConfig(value, key);
6952
7446
  } else if (key === "agent") {
6953
7447
  result.agent = validateAgentConfig(value, key);
7448
+ } else if (key === "prompts") {
7449
+ result.prompts = validatePromptsConfig(value, key);
6954
7450
  } else {
6955
7451
  result[key] = validateCustomConfig(value, key);
6956
7452
  }
@@ -6986,12 +7482,119 @@ function loadConfig() {
6986
7482
  configPath
6987
7483
  );
6988
7484
  }
6989
- return validateConfig(raw, configPath);
7485
+ const validated = validateConfig(raw, configPath);
7486
+ const inherited = resolveInheritance(validated, configPath);
7487
+ return resolveTemplatesInConfig(inherited, configPath);
6990
7488
  }
6991
7489
  function getCustomCommandNames(config) {
6992
- const reserved = /* @__PURE__ */ new Set(["global", "complete", "agent"]);
7490
+ const reserved = /* @__PURE__ */ new Set(["global", "complete", "agent", "prompts"]);
6993
7491
  return Object.keys(config).filter((key) => !reserved.has(key));
6994
7492
  }
7493
+ function resolveTemplatesInConfig(config, configPath) {
7494
+ const prompts = config.prompts ?? {};
7495
+ const hasPrompts = Object.keys(prompts).length > 0;
7496
+ let hasTemplates = false;
7497
+ for (const [sectionName, section] of Object.entries(config)) {
7498
+ if (sectionName === "global" || sectionName === "prompts") continue;
7499
+ if (!section || typeof section !== "object") continue;
7500
+ const sectionObj = section;
7501
+ if (typeof sectionObj.system === "string" && hasTemplateSyntax(sectionObj.system)) {
7502
+ hasTemplates = true;
7503
+ break;
7504
+ }
7505
+ }
7506
+ for (const template of Object.values(prompts)) {
7507
+ if (hasTemplateSyntax(template)) {
7508
+ hasTemplates = true;
7509
+ break;
7510
+ }
7511
+ }
7512
+ if (!hasPrompts && !hasTemplates) {
7513
+ return config;
7514
+ }
7515
+ try {
7516
+ validatePrompts(prompts, configPath);
7517
+ } catch (error) {
7518
+ if (error instanceof TemplateError) {
7519
+ throw new ConfigError(error.message, configPath);
7520
+ }
7521
+ throw error;
7522
+ }
7523
+ for (const [name, template] of Object.entries(prompts)) {
7524
+ try {
7525
+ validateEnvVars(template, name, configPath);
7526
+ } catch (error) {
7527
+ if (error instanceof TemplateError) {
7528
+ throw new ConfigError(error.message, configPath);
7529
+ }
7530
+ throw error;
7531
+ }
7532
+ }
7533
+ const eta = createTemplateEngine(prompts, configPath);
7534
+ const result = { ...config };
7535
+ for (const [sectionName, section] of Object.entries(config)) {
7536
+ if (sectionName === "global" || sectionName === "prompts") continue;
7537
+ if (!section || typeof section !== "object") continue;
7538
+ const sectionObj = section;
7539
+ if (typeof sectionObj.system === "string" && hasTemplateSyntax(sectionObj.system)) {
7540
+ try {
7541
+ validateEnvVars(sectionObj.system, void 0, configPath);
7542
+ } catch (error) {
7543
+ if (error instanceof TemplateError) {
7544
+ throw new ConfigError(`[${sectionName}].system: ${error.message}`, configPath);
7545
+ }
7546
+ throw error;
7547
+ }
7548
+ try {
7549
+ const resolved = resolveTemplate(eta, sectionObj.system, {}, configPath);
7550
+ result[sectionName] = {
7551
+ ...sectionObj,
7552
+ system: resolved
7553
+ };
7554
+ } catch (error) {
7555
+ if (error instanceof TemplateError) {
7556
+ throw new ConfigError(`[${sectionName}].system: ${error.message}`, configPath);
7557
+ }
7558
+ throw error;
7559
+ }
7560
+ }
7561
+ }
7562
+ return result;
7563
+ }
7564
+ function resolveInheritance(config, configPath) {
7565
+ const resolved = {};
7566
+ const resolving = /* @__PURE__ */ new Set();
7567
+ function resolveSection(name) {
7568
+ if (name in resolved) {
7569
+ return resolved[name];
7570
+ }
7571
+ if (resolving.has(name)) {
7572
+ throw new ConfigError(`Circular inheritance detected: ${name}`, configPath);
7573
+ }
7574
+ const section = config[name];
7575
+ if (section === void 0 || typeof section !== "object") {
7576
+ throw new ConfigError(`Cannot inherit from unknown section: ${name}`, configPath);
7577
+ }
7578
+ resolving.add(name);
7579
+ const sectionObj = section;
7580
+ const inheritsRaw = sectionObj.inherits;
7581
+ const inheritsList = inheritsRaw ? Array.isArray(inheritsRaw) ? inheritsRaw : [inheritsRaw] : [];
7582
+ let merged = {};
7583
+ for (const parent of inheritsList) {
7584
+ const parentResolved = resolveSection(parent);
7585
+ merged = { ...merged, ...parentResolved };
7586
+ }
7587
+ const { inherits: _inherits, ...ownValues } = sectionObj;
7588
+ merged = { ...merged, ...ownValues };
7589
+ resolving.delete(name);
7590
+ resolved[name] = merged;
7591
+ return merged;
7592
+ }
7593
+ for (const name of Object.keys(config)) {
7594
+ resolveSection(name);
7595
+ }
7596
+ return resolved;
7597
+ }
6995
7598
 
6996
7599
  // src/cli/models-command.ts
6997
7600
  var import_chalk4 = __toESM(require("chalk"), 1);