llmist 0.6.2 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -869,7 +869,7 @@ function findSafeDelimiter(content) {
869
869
  }
870
870
  let counter = 1;
871
871
  while (counter < 1e3) {
872
- const delimiter = `HEREDOC_${counter}`;
872
+ const delimiter = `__GADGET_PARAM_${counter}__`;
873
873
  const regex = new RegExp(`^${delimiter}\\s*$`);
874
874
  const isUsed = lines.some((line) => regex.test(line));
875
875
  if (!isUsed) {
@@ -927,6 +927,10 @@ function formatParamsAsYaml(params) {
927
927
  }
928
928
  return lines.join("\n");
929
929
  }
930
+ function formatTomlInlineTable(obj) {
931
+ const entries = Object.entries(obj).map(([k, v]) => `${k} = ${formatTomlValue(v)}`);
932
+ return `{ ${entries.join(", ")} }`;
933
+ }
930
934
  function formatTomlValue(value) {
931
935
  if (typeof value === "string") {
932
936
  if (value.includes("\n")) {
@@ -944,10 +948,17 @@ ${delimiter}`;
944
948
  return '""';
945
949
  }
946
950
  if (Array.isArray(value)) {
947
- return JSON.stringify(value);
951
+ if (value.length === 0) return "[]";
952
+ const items = value.map((item) => {
953
+ if (typeof item === "object" && item !== null && !Array.isArray(item)) {
954
+ return formatTomlInlineTable(item);
955
+ }
956
+ return formatTomlValue(item);
957
+ });
958
+ return `[${items.join(", ")}]`;
948
959
  }
949
960
  if (typeof value === "object") {
950
- return JSON.stringify(value);
961
+ return formatTomlInlineTable(value);
951
962
  }
952
963
  return JSON.stringify(value);
953
964
  }
@@ -964,7 +975,16 @@ var init_gadget = __esm({
964
975
  "use strict";
965
976
  init_schema_to_json();
966
977
  init_schema_validator();
967
- HEREDOC_DELIMITERS = ["EOF", "END", "DOC", "CONTENT", "TEXT", "HEREDOC", "DATA", "BLOCK"];
978
+ HEREDOC_DELIMITERS = [
979
+ "__GADGET_PARAM_EOF__",
980
+ "__GADGET_PARAM_END__",
981
+ "__GADGET_PARAM_DOC__",
982
+ "__GADGET_PARAM_CONTENT__",
983
+ "__GADGET_PARAM_TEXT__",
984
+ "__GADGET_PARAM_HEREDOC__",
985
+ "__GADGET_PARAM_DATA__",
986
+ "__GADGET_PARAM_BLOCK__"
987
+ ];
968
988
  BaseGadget = class {
969
989
  /**
970
990
  * The name of the gadget. Used for identification when LLM calls it.
@@ -1828,6 +1848,14 @@ function preprocessTomlHeredoc(tomlStr) {
1828
1848
  }
1829
1849
  return result.join("\n");
1830
1850
  }
1851
+ function stripMarkdownFences(content) {
1852
+ let cleaned = content.trim();
1853
+ const openingFence = /^```(?:toml|yaml|json)?\s*\n/i;
1854
+ const closingFence = /\n?```\s*$/;
1855
+ cleaned = cleaned.replace(openingFence, "");
1856
+ cleaned = cleaned.replace(closingFence, "");
1857
+ return cleaned.trim();
1858
+ }
1831
1859
  var globalInvocationCounter, StreamParser;
1832
1860
  var init_parser = __esm({
1833
1861
  "src/gadgets/parser.ts"() {
@@ -1881,35 +1909,36 @@ var init_parser = __esm({
1881
1909
  * Parse parameter string according to configured format
1882
1910
  */
1883
1911
  parseParameters(raw) {
1912
+ const cleaned = stripMarkdownFences(raw);
1884
1913
  if (this.parameterFormat === "json") {
1885
1914
  try {
1886
- return { parameters: JSON.parse(raw) };
1915
+ return { parameters: JSON.parse(cleaned) };
1887
1916
  } catch (error) {
1888
1917
  return { parseError: this.truncateParseError(error, "JSON") };
1889
1918
  }
1890
1919
  }
1891
1920
  if (this.parameterFormat === "yaml") {
1892
1921
  try {
1893
- return { parameters: yaml2.load(preprocessYaml(raw)) };
1922
+ return { parameters: yaml2.load(preprocessYaml(cleaned)) };
1894
1923
  } catch (error) {
1895
1924
  return { parseError: this.truncateParseError(error, "YAML") };
1896
1925
  }
1897
1926
  }
1898
1927
  if (this.parameterFormat === "toml") {
1899
1928
  try {
1900
- return { parameters: parseToml(preprocessTomlHeredoc(raw)) };
1929
+ return { parameters: parseToml(preprocessTomlHeredoc(cleaned)) };
1901
1930
  } catch (error) {
1902
1931
  return { parseError: this.truncateParseError(error, "TOML") };
1903
1932
  }
1904
1933
  }
1905
1934
  try {
1906
- return { parameters: JSON.parse(raw) };
1935
+ return { parameters: JSON.parse(cleaned) };
1907
1936
  } catch {
1908
1937
  try {
1909
- return { parameters: parseToml(preprocessTomlHeredoc(raw)) };
1938
+ return { parameters: parseToml(preprocessTomlHeredoc(cleaned)) };
1910
1939
  } catch {
1911
1940
  try {
1912
- return { parameters: yaml2.load(preprocessYaml(raw)) };
1941
+ return { parameters: yaml2.load(preprocessYaml(cleaned)) };
1913
1942
  } catch (error) {
1914
1943
  return { parseError: this.truncateParseError(error, "auto") };
1915
1944
  }
@@ -2594,7 +2623,8 @@ var init_anthropic_models = __esm({
2594
2623
  pricing: {
2595
2624
  input: 3,
2596
2625
  output: 15,
2597
- cachedInput: 0.3
2626
+ cachedInput: 0.3,
2627
+ cacheWriteInput: 3.75
2598
2628
  },
2599
2629
  knowledgeCutoff: "2025-01",
2600
2630
  features: {
@@ -2618,7 +2648,8 @@ var init_anthropic_models = __esm({
2618
2648
  pricing: {
2619
2649
  input: 1,
2620
2650
  output: 5,
2621
- cachedInput: 0.1
2651
+ cachedInput: 0.1,
2652
+ cacheWriteInput: 1.25
2622
2653
  },
2623
2654
  knowledgeCutoff: "2025-02",
2624
2655
  features: {
@@ -2642,7 +2673,8 @@ var init_anthropic_models = __esm({
2642
2673
  pricing: {
2643
2674
  input: 3,
2644
2675
  output: 15,
2645
- cachedInput: 0.3
2676
+ cachedInput: 0.3,
2677
+ cacheWriteInput: 3.75
2646
2678
  },
2647
2679
  knowledgeCutoff: "2025-03",
2648
2680
  features: {
@@ -2666,7 +2698,8 @@ var init_anthropic_models = __esm({
2666
2698
  pricing: {
2667
2699
  input: 3,
2668
2700
  output: 15,
2669
- cachedInput: 0.3
2701
+ cachedInput: 0.3,
2702
+ cacheWriteInput: 3.75
2670
2703
  },
2671
2704
  knowledgeCutoff: "2024-11",
2672
2705
  features: {
@@ -2690,7 +2723,8 @@ var init_anthropic_models = __esm({
2690
2723
  pricing: {
2691
2724
  input: 15,
2692
2725
  output: 75,
2693
- cachedInput: 1.5
2726
+ cachedInput: 1.5,
2727
+ cacheWriteInput: 18.75
2694
2728
  },
2695
2729
  knowledgeCutoff: "2025-01",
2696
2730
  features: {
@@ -2714,7 +2748,8 @@ var init_anthropic_models = __esm({
2714
2748
  pricing: {
2715
2749
  input: 15,
2716
2750
  output: 75,
2717
- cachedInput: 1.5
2751
+ cachedInput: 1.5,
2752
+ cacheWriteInput: 18.75
2718
2753
  },
2719
2754
  knowledgeCutoff: "2025-03",
2720
2755
  features: {
@@ -2737,7 +2772,8 @@ var init_anthropic_models = __esm({
2737
2772
  pricing: {
2738
2773
  input: 0.8,
2739
2774
  output: 4,
2740
- cachedInput: 0.08
2775
+ cachedInput: 0.08,
2776
+ cacheWriteInput: 1
2741
2777
  },
2742
2778
  knowledgeCutoff: "2024-07",
2743
2779
  features: {
@@ -2760,7 +2796,8 @@ var init_anthropic_models = __esm({
2760
2796
  pricing: {
2761
2797
  input: 0.25,
2762
2798
  output: 1.25,
2763
- cachedInput: 0.025
2799
+ cachedInput: 0.025,
2800
+ cacheWriteInput: 0.3125
2764
2801
  },
2765
2802
  knowledgeCutoff: "2023-08",
2766
2803
  features: {
@@ -2784,7 +2821,8 @@ var init_anthropic_models = __esm({
2784
2821
  pricing: {
2785
2822
  input: 1,
2786
2823
  output: 5,
2787
- cachedInput: 0.1
2824
+ cachedInput: 0.1,
2825
+ cacheWriteInput: 1.25
2788
2826
  },
2789
2827
  knowledgeCutoff: "2025-02",
2790
2828
  features: {
@@ -2808,7 +2846,8 @@ var init_anthropic_models = __esm({
2808
2846
  pricing: {
2809
2847
  input: 3,
2810
2848
  output: 15,
2811
- cachedInput: 0.3
2849
+ cachedInput: 0.3,
2850
+ cacheWriteInput: 3.75
2812
2851
  },
2813
2852
  knowledgeCutoff: "2025-01",
2814
2853
  features: {
@@ -2832,7 +2871,8 @@ var init_anthropic_models = __esm({
2832
2871
  pricing: {
2833
2872
  input: 5,
2834
2873
  output: 25,
2835
- cachedInput: 0.5
2874
+ cachedInput: 0.5,
2875
+ cacheWriteInput: 6.25
2836
2876
  },
2837
2877
  knowledgeCutoff: "2025-03",
2838
2878
  features: {
@@ -2947,15 +2987,27 @@ var init_anthropic = __esm({
2947
2987
  }
2948
2988
  buildRequestPayload(options, descriptor, spec, messages) {
2949
2989
  const systemMessages = messages.filter((message) => message.role === "system");
2950
- const system = systemMessages.length > 0 ? systemMessages.map((m) => m.content).join("\n\n") : void 0;
2951
- const conversation = messages.filter(
2990
+ const system = systemMessages.length > 0 ? systemMessages.map((m, index) => ({
2991
+ type: "text",
2992
+ text: m.content,
2993
+ // Add cache_control to the LAST system message block
2994
+ ...index === systemMessages.length - 1 ? { cache_control: { type: "ephemeral" } } : {}
2995
+ })) : void 0;
2996
+ const nonSystemMessages = messages.filter(
2952
2997
  (message) => message.role !== "system"
2953
- ).map((message) => ({
2998
+ );
2999
+ const lastUserIndex = nonSystemMessages.reduce(
3000
+ (lastIdx, msg, idx) => msg.role === "user" ? idx : lastIdx,
3001
+ -1
3002
+ );
3003
+ const conversation = nonSystemMessages.map((message, index) => ({
2954
3004
  role: message.role,
2955
3005
  content: [
2956
3006
  {
2957
3007
  type: "text",
2958
- text: message.content
3008
+ text: message.content,
3009
+ // Add cache_control to the LAST user message
3010
+ ...message.role === "user" && index === lastUserIndex ? { cache_control: { type: "ephemeral" } } : {}
2959
3011
  }
2960
3012
  ]
2961
3013
  }));
@@ -2981,15 +3033,22 @@ var init_anthropic = __esm({
2981
3033
  async *wrapStream(iterable) {
2982
3034
  const stream2 = iterable;
2983
3035
  let inputTokens = 0;
3036
+ let cachedInputTokens = 0;
3037
+ let cacheCreationInputTokens = 0;
2984
3038
  for await (const event of stream2) {
2985
3039
  if (event.type === "message_start") {
2986
- inputTokens = event.message.usage.input_tokens;
3040
+ const usage = event.message.usage;
3041
+ cachedInputTokens = usage.cache_read_input_tokens ?? 0;
3042
+ cacheCreationInputTokens = usage.cache_creation_input_tokens ?? 0;
3043
+ inputTokens = usage.input_tokens + cachedInputTokens + cacheCreationInputTokens;
2987
3044
  yield {
2988
3045
  text: "",
2989
3046
  usage: {
2990
3047
  inputTokens,
2991
3048
  outputTokens: 0,
2992
- totalTokens: inputTokens
3049
+ totalTokens: inputTokens,
3050
+ cachedInputTokens,
3051
+ cacheCreationInputTokens
2993
3052
  },
2994
3053
  rawEvent: event
2995
3054
  };
@@ -3003,7 +3062,9 @@ var init_anthropic = __esm({
3003
3062
  const usage = event.usage ? {
3004
3063
  inputTokens,
3005
3064
  outputTokens: event.usage.output_tokens,
3006
- totalTokens: inputTokens + event.usage.output_tokens
3065
+ totalTokens: inputTokens + event.usage.output_tokens,
3066
+ cachedInputTokens,
3067
+ cacheCreationInputTokens
3007
3068
  } : void 0;
3008
3069
  if (event.delta.stop_reason || usage) {
3009
3070
  yield {
@@ -3084,6 +3145,7 @@ var init_gemini_models = __esm({
3084
3145
  "src/providers/gemini-models.ts"() {
3085
3146
  "use strict";
3086
3147
  GEMINI_MODELS = [
3148
+ // Gemini 3 Pro (Preview)
3087
3149
  {
3088
3150
  provider: "gemini",
3089
3151
  modelId: "gemini-3-pro-preview",
@@ -3092,8 +3154,11 @@ var init_gemini_models = __esm({
3092
3154
  maxOutputTokens: 65536,
3093
3155
  pricing: {
3094
3156
  input: 2,
3157
+ // $2.00 for prompts <= 200k, $4.00 for > 200k (using lower tier)
3095
3158
  output: 12,
3159
+ // $12.00 for prompts <= 200k, $18.00 for > 200k
3096
3160
  cachedInput: 0.2
3161
+ // $0.20 for prompts <= 200k
3097
3162
  },
3098
3163
  knowledgeCutoff: "2025-01",
3099
3164
  features: {
@@ -3106,9 +3171,10 @@ var init_gemini_models = __esm({
3106
3171
  metadata: {
3107
3172
  family: "Gemini 3",
3108
3173
  releaseDate: "2025-11-18",
3109
- notes: "Most advanced model. 1501 Elo LMArena, 91.9% GPQA Diamond, 76.2% SWE-bench. Deep Think mode available."
3174
+ notes: "Best model for multimodal understanding, agentic and vibe-coding. Deep Think mode available."
3110
3175
  }
3111
3176
  },
3177
+ // Gemini 2.5 Pro
3112
3178
  {
3113
3179
  provider: "gemini",
3114
3180
  modelId: "gemini-2.5-pro",
@@ -3117,8 +3183,11 @@ var init_gemini_models = __esm({
3117
3183
  maxOutputTokens: 65536,
3118
3184
  pricing: {
3119
3185
  input: 1.25,
3186
+ // $1.25 for prompts <= 200k, $2.50 for > 200k
3120
3187
  output: 10,
3188
+ // $10.00 for prompts <= 200k, $15.00 for > 200k
3121
3189
  cachedInput: 0.125
3190
+ // $0.125 for prompts <= 200k
3122
3191
  },
3123
3192
  knowledgeCutoff: "2025-01",
3124
3193
  features: {
@@ -3131,9 +3200,10 @@ var init_gemini_models = __esm({
3131
3200
  metadata: {
3132
3201
  family: "Gemini 2.5",
3133
3202
  releaseDate: "2025-06",
3134
- notes: "Balanced multimodal model with 1M context. Best for complex agents and reasoning."
3203
+ notes: "State-of-the-art multipurpose model. Excels at coding and complex reasoning."
3135
3204
  }
3136
3205
  },
3206
+ // Gemini 2.5 Flash
3137
3207
  {
3138
3208
  provider: "gemini",
3139
3209
  modelId: "gemini-2.5-flash",
@@ -3142,8 +3212,10 @@ var init_gemini_models = __esm({
3142
3212
  maxOutputTokens: 65536,
3143
3213
  pricing: {
3144
3214
  input: 0.3,
3215
+ // $0.30 for text/image/video, $1.00 for audio
3145
3216
  output: 2.5,
3146
3217
  cachedInput: 0.03
3218
+ // $0.03 for text/image/video
3147
3219
  },
3148
3220
  knowledgeCutoff: "2025-01",
3149
3221
  features: {
@@ -3156,9 +3228,10 @@ var init_gemini_models = __esm({
3156
3228
  metadata: {
3157
3229
  family: "Gemini 2.5",
3158
3230
  releaseDate: "2025-06",
3159
- notes: "Best price-performance ratio with thinking enabled by default"
3231
+ notes: "First hybrid reasoning model with 1M context and thinking budgets."
3160
3232
  }
3161
3233
  },
3234
+ // Gemini 2.5 Flash-Lite
3162
3235
  {
3163
3236
  provider: "gemini",
3164
3237
  modelId: "gemini-2.5-flash-lite",
@@ -3167,8 +3240,10 @@ var init_gemini_models = __esm({
3167
3240
  maxOutputTokens: 65536,
3168
3241
  pricing: {
3169
3242
  input: 0.1,
3243
+ // $0.10 for text/image/video, $0.30 for audio
3170
3244
  output: 0.4,
3171
3245
  cachedInput: 0.01
3246
+ // $0.01 for text/image/video
3172
3247
  },
3173
3248
  knowledgeCutoff: "2025-01",
3174
3249
  features: {
@@ -3180,9 +3255,10 @@ var init_gemini_models = __esm({
3180
3255
  metadata: {
3181
3256
  family: "Gemini 2.5",
3182
3257
  releaseDate: "2025-06",
3183
- notes: "Fastest and most cost-efficient model for high-volume, low-latency tasks"
3258
+ notes: "Smallest and most cost effective model, built for at scale usage."
3184
3259
  }
3185
3260
  },
3261
+ // Gemini 2.0 Flash
3186
3262
  {
3187
3263
  provider: "gemini",
3188
3264
  modelId: "gemini-2.0-flash",
@@ -3191,8 +3267,10 @@ var init_gemini_models = __esm({
3191
3267
  maxOutputTokens: 8192,
3192
3268
  pricing: {
3193
3269
  input: 0.1,
3270
+ // $0.10 for text/image/video, $0.70 for audio
3194
3271
  output: 0.4,
3195
- cachedInput: 0.01
3272
+ cachedInput: 0.025
3273
+ // $0.025 for text/image/video
3196
3274
  },
3197
3275
  knowledgeCutoff: "2024-08",
3198
3276
  features: {
@@ -3203,9 +3281,10 @@ var init_gemini_models = __esm({
3203
3281
  },
3204
3282
  metadata: {
3205
3283
  family: "Gemini 2.0",
3206
- notes: "Previous generation with 1M context and multimodal capabilities"
3284
+ notes: "Balanced multimodal model with 1M context, built for the era of Agents."
3207
3285
  }
3208
3286
  },
3287
+ // Gemini 2.0 Flash-Lite
3209
3288
  {
3210
3289
  provider: "gemini",
3211
3290
  modelId: "gemini-2.0-flash-lite",
@@ -3214,8 +3293,8 @@ var init_gemini_models = __esm({
3214
3293
  maxOutputTokens: 8192,
3215
3294
  pricing: {
3216
3295
  input: 0.075,
3217
- output: 0.3,
3218
- cachedInput: 75e-4
3296
+ output: 0.3
3297
+ // No context caching available for 2.0-flash-lite
3219
3298
  },
3220
3299
  knowledgeCutoff: "2024-08",
3221
3300
  features: {
@@ -3226,7 +3305,7 @@ var init_gemini_models = __esm({
3226
3305
  },
3227
3306
  metadata: {
3228
3307
  family: "Gemini 2.0",
3229
- notes: "Lightweight previous generation model for cost-sensitive applications"
3308
+ notes: "Smallest and most cost effective 2.0 model for at scale usage."
3230
3309
  }
3231
3310
  }
3232
3311
  ];
@@ -3396,7 +3475,9 @@ var init_gemini = __esm({
3396
3475
  return {
3397
3476
  inputTokens: usageMetadata.promptTokenCount ?? 0,
3398
3477
  outputTokens: usageMetadata.candidatesTokenCount ?? 0,
3399
- totalTokens: usageMetadata.totalTokenCount ?? 0
3478
+ totalTokens: usageMetadata.totalTokenCount ?? 0,
3479
+ // Gemini returns cached token count in cachedContentTokenCount
3480
+ cachedInputTokens: usageMetadata.cachedContentTokenCount ?? 0
3400
3481
  };
3401
3482
  }
3402
3483
  /**
@@ -3452,10 +3533,11 @@ var init_openai_models = __esm({
3452
3533
  "src/providers/openai-models.ts"() {
3453
3534
  "use strict";
3454
3535
  OPENAI_MODELS = [
3536
+ // GPT-5 Family
3455
3537
  {
3456
3538
  provider: "openai",
3457
3539
  modelId: "gpt-5.1",
3458
- displayName: "GPT-5.1 Instant",
3540
+ displayName: "GPT-5.1",
3459
3541
  contextWindow: 128e3,
3460
3542
  maxOutputTokens: 32768,
3461
3543
  pricing: {
@@ -3475,34 +3557,7 @@ var init_openai_models = __esm({
3475
3557
  metadata: {
3476
3558
  family: "GPT-5",
3477
3559
  releaseDate: "2025-11-12",
3478
- notes: "Warmer, more intelligent, better instruction following. 2-3x faster than GPT-5.",
3479
- supportsTemperature: false
3480
- }
3481
- },
3482
- {
3483
- provider: "openai",
3484
- modelId: "gpt-5.1-thinking",
3485
- displayName: "GPT-5.1 Thinking",
3486
- contextWindow: 196e3,
3487
- maxOutputTokens: 32768,
3488
- pricing: {
3489
- input: 1.25,
3490
- output: 10,
3491
- cachedInput: 0.125
3492
- },
3493
- knowledgeCutoff: "2024-09-30",
3494
- features: {
3495
- streaming: true,
3496
- functionCalling: true,
3497
- vision: true,
3498
- reasoning: true,
3499
- structuredOutputs: true,
3500
- fineTuning: true
3501
- },
3502
- metadata: {
3503
- family: "GPT-5",
3504
- releaseDate: "2025-11-12",
3505
- notes: "Advanced reasoning with thinking levels: Light, Standard, Extended, Heavy. Best for complex tasks.",
3560
+ notes: "Latest GPT-5 with improved instruction following. 2-3x faster than GPT-5.",
3506
3561
  supportsTemperature: false
3507
3562
  }
3508
3563
  },
@@ -3582,6 +3637,255 @@ var init_openai_models = __esm({
3582
3637
  notes: "Fastest, most cost-efficient version for well-defined tasks",
3583
3638
  supportsTemperature: false
3584
3639
  }
3640
+ },
3641
+ {
3642
+ provider: "openai",
3643
+ modelId: "gpt-5-pro",
3644
+ displayName: "GPT-5 Pro",
3645
+ contextWindow: 272e3,
3646
+ maxOutputTokens: 128e3,
3647
+ pricing: {
3648
+ input: 15,
3649
+ output: 120
3650
+ // No cached input pricing for gpt-5-pro
3651
+ },
3652
+ knowledgeCutoff: "2024-09-30",
3653
+ features: {
3654
+ streaming: true,
3655
+ functionCalling: true,
3656
+ vision: true,
3657
+ reasoning: true,
3658
+ structuredOutputs: true
3659
+ },
3660
+ metadata: {
3661
+ family: "GPT-5",
3662
+ notes: "Premium tier with enhanced capabilities. Does not support prompt caching.",
3663
+ supportsTemperature: false
3664
+ }
3665
+ },
3666
+ // GPT-4.1 Family
3667
+ {
3668
+ provider: "openai",
3669
+ modelId: "gpt-4.1",
3670
+ displayName: "GPT-4.1",
3671
+ contextWindow: 128e3,
3672
+ maxOutputTokens: 32768,
3673
+ pricing: {
3674
+ input: 2,
3675
+ output: 8,
3676
+ cachedInput: 0.5
3677
+ },
3678
+ knowledgeCutoff: "2024-04-01",
3679
+ features: {
3680
+ streaming: true,
3681
+ functionCalling: true,
3682
+ vision: true,
3683
+ structuredOutputs: true,
3684
+ fineTuning: true
3685
+ },
3686
+ metadata: {
3687
+ family: "GPT-4.1",
3688
+ notes: "Improved GPT-4 with better instruction following"
3689
+ }
3690
+ },
3691
+ {
3692
+ provider: "openai",
3693
+ modelId: "gpt-4.1-mini",
3694
+ displayName: "GPT-4.1 Mini",
3695
+ contextWindow: 128e3,
3696
+ maxOutputTokens: 32768,
3697
+ pricing: {
3698
+ input: 0.4,
3699
+ output: 1.6,
3700
+ cachedInput: 0.1
3701
+ },
3702
+ knowledgeCutoff: "2024-04-01",
3703
+ features: {
3704
+ streaming: true,
3705
+ functionCalling: true,
3706
+ vision: true,
3707
+ structuredOutputs: true,
3708
+ fineTuning: true
3709
+ },
3710
+ metadata: {
3711
+ family: "GPT-4.1",
3712
+ notes: "Cost-efficient GPT-4.1 variant"
3713
+ }
3714
+ },
3715
+ {
3716
+ provider: "openai",
3717
+ modelId: "gpt-4.1-nano",
3718
+ displayName: "GPT-4.1 Nano",
3719
+ contextWindow: 128e3,
3720
+ maxOutputTokens: 32768,
3721
+ pricing: {
3722
+ input: 0.1,
3723
+ output: 0.4,
3724
+ cachedInput: 0.025
3725
+ },
3726
+ knowledgeCutoff: "2024-04-01",
3727
+ features: {
3728
+ streaming: true,
3729
+ functionCalling: true,
3730
+ vision: true,
3731
+ structuredOutputs: true,
3732
+ fineTuning: true
3733
+ },
3734
+ metadata: {
3735
+ family: "GPT-4.1",
3736
+ notes: "Fastest GPT-4.1 variant for simple tasks"
3737
+ }
3738
+ },
3739
+ // GPT-4o Family
3740
+ {
3741
+ provider: "openai",
3742
+ modelId: "gpt-4o",
3743
+ displayName: "GPT-4o",
3744
+ contextWindow: 128e3,
3745
+ maxOutputTokens: 16384,
3746
+ pricing: {
3747
+ input: 2.5,
3748
+ output: 10,
3749
+ cachedInput: 1.25
3750
+ },
3751
+ knowledgeCutoff: "2024-04-01",
3752
+ features: {
3753
+ streaming: true,
3754
+ functionCalling: true,
3755
+ vision: true,
3756
+ structuredOutputs: true,
3757
+ fineTuning: true
3758
+ },
3759
+ metadata: {
3760
+ family: "GPT-4o",
3761
+ notes: "Multimodal model optimized for speed"
3762
+ }
3763
+ },
3764
+ {
3765
+ provider: "openai",
3766
+ modelId: "gpt-4o-mini",
3767
+ displayName: "GPT-4o Mini",
3768
+ contextWindow: 128e3,
3769
+ maxOutputTokens: 16384,
3770
+ pricing: {
3771
+ input: 0.15,
3772
+ output: 0.6,
3773
+ cachedInput: 0.075
3774
+ },
3775
+ knowledgeCutoff: "2024-04-01",
3776
+ features: {
3777
+ streaming: true,
3778
+ functionCalling: true,
3779
+ vision: true,
3780
+ structuredOutputs: true,
3781
+ fineTuning: true
3782
+ },
3783
+ metadata: {
3784
+ family: "GPT-4o",
3785
+ notes: "Fast and affordable multimodal model"
3786
+ }
3787
+ },
3788
+ // o-series (Reasoning models)
3789
+ {
3790
+ provider: "openai",
3791
+ modelId: "o1",
3792
+ displayName: "o1",
3793
+ contextWindow: 2e5,
3794
+ maxOutputTokens: 1e5,
3795
+ pricing: {
3796
+ input: 15,
3797
+ output: 60,
3798
+ cachedInput: 7.5
3799
+ },
3800
+ knowledgeCutoff: "2024-12-01",
3801
+ features: {
3802
+ streaming: true,
3803
+ functionCalling: true,
3804
+ vision: true,
3805
+ reasoning: true,
3806
+ structuredOutputs: true
3807
+ },
3808
+ metadata: {
3809
+ family: "o-series",
3810
+ notes: "Advanced reasoning model with chain-of-thought",
3811
+ supportsTemperature: false
3812
+ }
3813
+ },
3814
+ {
3815
+ provider: "openai",
3816
+ modelId: "o3",
3817
+ displayName: "o3",
3818
+ contextWindow: 2e5,
3819
+ maxOutputTokens: 1e5,
3820
+ pricing: {
3821
+ input: 2,
3822
+ output: 8,
3823
+ cachedInput: 0.5
3824
+ },
3825
+ knowledgeCutoff: "2025-01-01",
3826
+ features: {
3827
+ streaming: true,
3828
+ functionCalling: true,
3829
+ vision: true,
3830
+ reasoning: true,
3831
+ structuredOutputs: true
3832
+ },
3833
+ metadata: {
3834
+ family: "o-series",
3835
+ notes: "Next-gen reasoning model, more efficient than o1",
3836
+ supportsTemperature: false
3837
+ }
3838
+ },
3839
+ {
3840
+ provider: "openai",
3841
+ modelId: "o4-mini",
3842
+ displayName: "o4 Mini",
3843
+ contextWindow: 2e5,
3844
+ maxOutputTokens: 1e5,
3845
+ pricing: {
3846
+ input: 1.1,
3847
+ output: 4.4,
3848
+ cachedInput: 0.275
3849
+ },
3850
+ knowledgeCutoff: "2025-04-01",
3851
+ features: {
3852
+ streaming: true,
3853
+ functionCalling: true,
3854
+ vision: true,
3855
+ reasoning: true,
3856
+ structuredOutputs: true,
3857
+ fineTuning: true
3858
+ },
3859
+ metadata: {
3860
+ family: "o-series",
3861
+ notes: "Cost-efficient reasoning model",
3862
+ supportsTemperature: false
3863
+ }
3864
+ },
3865
+ {
3866
+ provider: "openai",
3867
+ modelId: "o3-mini",
3868
+ displayName: "o3 Mini",
3869
+ contextWindow: 2e5,
3870
+ maxOutputTokens: 1e5,
3871
+ pricing: {
3872
+ input: 1.1,
3873
+ output: 4.4,
3874
+ cachedInput: 0.55
3875
+ },
3876
+ knowledgeCutoff: "2025-01-01",
3877
+ features: {
3878
+ streaming: true,
3879
+ functionCalling: true,
3880
+ vision: true,
3881
+ reasoning: true,
3882
+ structuredOutputs: true
3883
+ },
3884
+ metadata: {
3885
+ family: "o-series",
3886
+ notes: "Compact reasoning model for cost-sensitive applications",
3887
+ supportsTemperature: false
3888
+ }
3585
3889
  }
3586
3890
  ];
3587
3891
  }
@@ -3662,7 +3966,8 @@ var init_openai = __esm({
3662
3966
  const usage = chunk.usage ? {
3663
3967
  inputTokens: chunk.usage.prompt_tokens,
3664
3968
  outputTokens: chunk.usage.completion_tokens,
3665
- totalTokens: chunk.usage.total_tokens
3969
+ totalTokens: chunk.usage.total_tokens,
3970
+ cachedInputTokens: chunk.usage.prompt_tokens_details?.cached_tokens ?? 0
3666
3971
  } : void 0;
3667
3972
  if (finishReason || usage) {
3668
3973
  yield { text: "", finishReason, usage, rawEvent: chunk };
@@ -3879,20 +4184,28 @@ var init_model_registry = __esm({
3879
4184
  /**
3880
4185
  * Estimate API cost for a given model and token usage
3881
4186
  * @param modelId - Full model identifier
3882
- * @param inputTokens - Number of input tokens
4187
+ * @param inputTokens - Number of input tokens (total, including cached and cache creation)
3883
4188
  * @param outputTokens - Number of output tokens
3884
- * @param useCachedInput - Whether to use cached input pricing (if supported by provider)
4189
+ * @param cachedInputTokens - Number of cached input tokens (subset of inputTokens)
4190
+ * @param cacheCreationInputTokens - Number of cache creation tokens (subset of inputTokens, Anthropic only)
3885
4191
  * @returns CostEstimate if model found, undefined otherwise
3886
4192
  */
3887
- estimateCost(modelId, inputTokens, outputTokens, useCachedInput = false) {
4193
+ estimateCost(modelId, inputTokens, outputTokens, cachedInputTokens = 0, cacheCreationInputTokens = 0) {
3888
4194
  const spec = this.getModelSpec(modelId);
3889
4195
  if (!spec) return void 0;
3890
- const inputRate = useCachedInput && spec.pricing.cachedInput !== void 0 ? spec.pricing.cachedInput : spec.pricing.input;
3891
- const inputCost = inputTokens / 1e6 * inputRate;
4196
+ const cachedRate = spec.pricing.cachedInput ?? spec.pricing.input;
4197
+ const cacheWriteRate = spec.pricing.cacheWriteInput ?? spec.pricing.input;
4198
+ const uncachedInputTokens = inputTokens - cachedInputTokens - cacheCreationInputTokens;
4199
+ const uncachedInputCost = uncachedInputTokens / 1e6 * spec.pricing.input;
4200
+ const cachedInputCost = cachedInputTokens / 1e6 * cachedRate;
4201
+ const cacheCreationCost = cacheCreationInputTokens / 1e6 * cacheWriteRate;
4202
+ const inputCost = uncachedInputCost + cachedInputCost + cacheCreationCost;
3892
4203
  const outputCost = outputTokens / 1e6 * spec.pricing.output;
3893
4204
  const totalCost = inputCost + outputCost;
3894
4205
  return {
3895
4206
  inputCost,
4207
+ cachedInputCost,
4208
+ cacheCreationCost,
3896
4209
  outputCost,
3897
4210
  totalCost,
3898
4211
  currency: "USD"
@@ -4068,6 +4381,7 @@ var init_agent = __esm({
4068
4381
  gadgetEndPrefix;
4069
4382
  onHumanInputRequired;
4070
4383
  textOnlyHandler;
4384
+ textWithGadgetsHandler;
4071
4385
  stopOnGadgetError;
4072
4386
  shouldContinueAfterError;
4073
4387
  defaultGadgetTimeoutMs;
@@ -4098,6 +4412,7 @@ var init_agent = __esm({
4098
4412
  this.gadgetEndPrefix = options.gadgetEndPrefix;
4099
4413
  this.onHumanInputRequired = options.onHumanInputRequired;
4100
4414
  this.textOnlyHandler = options.textOnlyHandler ?? "terminate";
4415
+ this.textWithGadgetsHandler = options.textWithGadgetsHandler;
4101
4416
  this.stopOnGadgetError = options.stopOnGadgetError ?? true;
4102
4417
  this.shouldContinueAfterError = options.shouldContinueAfterError;
4103
4418
  this.defaultGadgetTimeoutMs = options.defaultGadgetTimeoutMs;
@@ -4285,6 +4600,17 @@ var init_agent = __esm({
4285
4600
  }
4286
4601
  }
4287
4602
  if (result.didExecuteGadgets) {
4603
+ if (this.textWithGadgetsHandler) {
4604
+ const textContent = result.outputs.filter((output) => output.type === "text").map((output) => output.content).join("");
4605
+ if (textContent.trim()) {
4606
+ const { gadgetName, parameterMapping, resultMapping } = this.textWithGadgetsHandler;
4607
+ this.conversation.addGadgetCall(
4608
+ gadgetName,
4609
+ parameterMapping(textContent),
4610
+ resultMapping ? resultMapping(textContent) : textContent
4611
+ );
4612
+ }
4613
+ }
4288
4614
  for (const output of result.outputs) {
4289
4615
  if (output.type === "gadget_result") {
4290
4616
  const gadgetResult = output.result;
@@ -4296,7 +4622,13 @@ var init_agent = __esm({
4296
4622
  }
4297
4623
  }
4298
4624
  } else {
4299
- this.conversation.addAssistantMessage(finalMessage);
4625
+ if (finalMessage.trim()) {
4626
+ this.conversation.addGadgetCall(
4627
+ "TellUser",
4628
+ { message: finalMessage, done: false, type: "info" },
4629
+ `\u2139\uFE0F ${finalMessage}`
4630
+ );
4631
+ }
4300
4632
  const shouldBreak = await this.handleTextOnlyResponse(finalMessage);
4301
4633
  if (shouldBreak) {
4302
4634
  break;
@@ -4481,6 +4813,7 @@ var AgentBuilder;
4481
4813
  var init_builder = __esm({
4482
4814
  "src/agent/builder.ts"() {
4483
4815
  "use strict";
4816
+ init_constants();
4484
4817
  init_model_shortcuts();
4485
4818
  init_registry();
4486
4819
  init_agent();
@@ -4502,6 +4835,7 @@ var init_builder = __esm({
4502
4835
  gadgetStartPrefix;
4503
4836
  gadgetEndPrefix;
4504
4837
  textOnlyHandler;
4838
+ textWithGadgetsHandler;
4505
4839
  stopOnGadgetError;
4506
4840
  shouldContinueAfterError;
4507
4841
  defaultGadgetTimeoutMs;
@@ -4764,6 +5098,30 @@ var init_builder = __esm({
4764
5098
  this.textOnlyHandler = handler;
4765
5099
  return this;
4766
5100
  }
5101
+ /**
5102
+ * Set the handler for text content that appears alongside gadget calls.
5103
+ *
5104
+ * When set, text accompanying gadget responses will be wrapped as a
5105
+ * synthetic gadget call before the actual gadget results in the
5106
+ * conversation history.
5107
+ *
5108
+ * @param handler - Configuration for wrapping text
5109
+ * @returns This builder for chaining
5110
+ *
5111
+ * @example
5112
+ * ```typescript
5113
+ * // Wrap text as TellUser gadget
5114
+ * .withTextWithGadgetsHandler({
5115
+ * gadgetName: "TellUser",
5116
+ * parameterMapping: (text) => ({ message: text, done: false, type: "info" }),
5117
+ * resultMapping: (text) => `ℹ️ ${text}`,
5118
+ * })
5119
+ * ```
5120
+ */
5121
+ withTextWithGadgetsHandler(handler) {
5122
+ this.textWithGadgetsHandler = handler;
5123
+ return this;
5124
+ }
4767
5125
  /**
4768
5126
  * Set whether to stop gadget execution on first error.
4769
5127
  *
@@ -4878,6 +5236,69 @@ var init_builder = __esm({
4878
5236
  this.gadgetOutputLimitPercent = percent;
4879
5237
  return this;
4880
5238
  }
5239
+ /**
5240
+ * Add a synthetic gadget call to the conversation history.
5241
+ *
5242
+ * This is useful for in-context learning - showing the LLM what "past self"
5243
+ * did correctly so it mimics the pattern. The call is formatted with proper
5244
+ * markers and parameter format.
5245
+ *
5246
+ * @param gadgetName - Name of the gadget
5247
+ * @param parameters - Parameters passed to the gadget
5248
+ * @param result - Result returned by the gadget
5249
+ * @returns This builder for chaining
5250
+ *
5251
+ * @example
5252
+ * ```typescript
5253
+ * .withSyntheticGadgetCall(
5254
+ * 'TellUser',
5255
+ * {
5256
+ * message: '👋 Hello!\n\nHere\'s what I can do:\n- Analyze code\n- Run commands',
5257
+ * done: false,
5258
+ * type: 'info'
5259
+ * },
5260
+ * 'ℹ️ 👋 Hello!\n\nHere\'s what I can do:\n- Analyze code\n- Run commands'
5261
+ * )
5262
+ * ```
5263
+ */
5264
+ withSyntheticGadgetCall(gadgetName, parameters, result) {
5265
+ const startPrefix = this.gadgetStartPrefix ?? GADGET_START_PREFIX;
5266
+ const endPrefix = this.gadgetEndPrefix ?? GADGET_END_PREFIX;
5267
+ const format = this.parameterFormat ?? "yaml";
5268
+ const paramStr = this.formatSyntheticParameters(parameters, format);
5269
+ this.initialMessages.push({
5270
+ role: "assistant",
5271
+ content: `${startPrefix}${gadgetName}
5272
+ ${paramStr}
5273
+ ${endPrefix}`
5274
+ });
5275
+ this.initialMessages.push({
5276
+ role: "user",
5277
+ content: `Result: ${result}`
5278
+ });
5279
+ return this;
5280
+ }
5281
+ /**
5282
+ * Format parameters for synthetic gadget calls.
5283
+ * Uses heredoc for multiline string values.
5284
+ */
5285
+ formatSyntheticParameters(parameters, format) {
5286
+ if (format === "json" || format === "auto") {
5287
+ return JSON.stringify(parameters);
5288
+ }
5289
+ return Object.entries(parameters).map(([key, value]) => {
5290
+ if (typeof value === "string" && value.includes("\n")) {
5291
+ const separator = format === "yaml" ? ":" : " =";
5292
+ return `${key}${separator} <<<EOF
5293
+ ${value}
5294
+ EOF`;
5295
+ }
5296
+ if (format === "yaml") {
5297
+ return typeof value === "string" ? `${key}: ${value}` : `${key}: ${JSON.stringify(value)}`;
5298
+ }
5299
+ return `${key} = ${JSON.stringify(value)}`;
5300
+ }).join("\n");
5301
+ }
4881
5302
  /**
4882
5303
  * Build and create the agent with the given user prompt.
4883
5304
  * Returns the Agent instance ready to run.
@@ -4920,6 +5341,7 @@ var init_builder = __esm({
4920
5341
  gadgetStartPrefix: this.gadgetStartPrefix,
4921
5342
  gadgetEndPrefix: this.gadgetEndPrefix,
4922
5343
  textOnlyHandler: this.textOnlyHandler,
5344
+ textWithGadgetsHandler: this.textWithGadgetsHandler,
4923
5345
  stopOnGadgetError: this.stopOnGadgetError,
4924
5346
  shouldContinueAfterError: this.shouldContinueAfterError,
4925
5347
  defaultGadgetTimeoutMs: this.defaultGadgetTimeoutMs,
@@ -5021,6 +5443,7 @@ var init_builder = __esm({
5021
5443
  gadgetStartPrefix: this.gadgetStartPrefix,
5022
5444
  gadgetEndPrefix: this.gadgetEndPrefix,
5023
5445
  textOnlyHandler: this.textOnlyHandler,
5446
+ textWithGadgetsHandler: this.textWithGadgetsHandler,
5024
5447
  stopOnGadgetError: this.stopOnGadgetError,
5025
5448
  shouldContinueAfterError: this.shouldContinueAfterError,
5026
5449
  defaultGadgetTimeoutMs: this.defaultGadgetTimeoutMs,
@@ -5276,6 +5699,9 @@ var init_client = __esm({
5276
5699
  });
5277
5700
 
5278
5701
  export {
5702
+ GADGET_START_PREFIX,
5703
+ GADGET_END_PREFIX,
5704
+ init_constants,
5279
5705
  MODEL_ALIASES,
5280
5706
  resolveModel,
5281
5707
  hasProviderPrefix,
@@ -5284,9 +5710,6 @@ export {
5284
5710
  init_model_shortcuts,
5285
5711
  GadgetRegistry,
5286
5712
  init_registry,
5287
- GADGET_START_PREFIX,
5288
- GADGET_END_PREFIX,
5289
- init_constants,
5290
5713
  DEFAULT_PROMPTS,
5291
5714
  resolvePromptTemplate,
5292
5715
  resolveRulesTemplate,
@@ -5344,4 +5767,4 @@ export {
5344
5767
  AgentBuilder,
5345
5768
  init_builder
5346
5769
  };
5347
- //# sourceMappingURL=chunk-DVK6ZQOV.js.map
5770
+ //# sourceMappingURL=chunk-62M4TDAK.js.map