llmist 0.6.2 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -30,6 +30,20 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
30
30
  ));
31
31
  var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
32
32
 
33
+ // src/core/constants.ts
34
+ var GADGET_START_PREFIX, GADGET_END_PREFIX, DEFAULT_GADGET_OUTPUT_LIMIT, DEFAULT_GADGET_OUTPUT_LIMIT_PERCENT, CHARS_PER_TOKEN, FALLBACK_CONTEXT_WINDOW;
35
+ var init_constants = __esm({
36
+ "src/core/constants.ts"() {
37
+ "use strict";
38
+ GADGET_START_PREFIX = "!!!GADGET_START:";
39
+ GADGET_END_PREFIX = "!!!GADGET_END";
40
+ DEFAULT_GADGET_OUTPUT_LIMIT = true;
41
+ DEFAULT_GADGET_OUTPUT_LIMIT_PERCENT = 15;
42
+ CHARS_PER_TOKEN = 4;
43
+ FALLBACK_CONTEXT_WINDOW = 128e3;
44
+ }
45
+ });
46
+
33
47
  // src/core/model-shortcuts.ts
34
48
  function isKnownModelPattern(model) {
35
49
  const normalized = model.toLowerCase();
@@ -344,20 +358,6 @@ var init_registry = __esm({
344
358
  }
345
359
  });
346
360
 
347
- // src/core/constants.ts
348
- var GADGET_START_PREFIX, GADGET_END_PREFIX, DEFAULT_GADGET_OUTPUT_LIMIT, DEFAULT_GADGET_OUTPUT_LIMIT_PERCENT, CHARS_PER_TOKEN, FALLBACK_CONTEXT_WINDOW;
349
- var init_constants = __esm({
350
- "src/core/constants.ts"() {
351
- "use strict";
352
- GADGET_START_PREFIX = "!!!GADGET_START:";
353
- GADGET_END_PREFIX = "!!!GADGET_END";
354
- DEFAULT_GADGET_OUTPUT_LIMIT = true;
355
- DEFAULT_GADGET_OUTPUT_LIMIT_PERCENT = 15;
356
- CHARS_PER_TOKEN = 4;
357
- FALLBACK_CONTEXT_WINDOW = 128e3;
358
- }
359
- });
360
-
361
361
  // src/core/prompt-config.ts
362
362
  function resolvePromptTemplate(template, defaultValue, context) {
363
363
  const resolved = template ?? defaultValue;
@@ -881,7 +881,7 @@ function findSafeDelimiter(content) {
881
881
  }
882
882
  let counter = 1;
883
883
  while (counter < 1e3) {
884
- const delimiter = `HEREDOC_${counter}`;
884
+ const delimiter = `__GADGET_PARAM_${counter}__`;
885
885
  const regex = new RegExp(`^${delimiter}\\s*$`);
886
886
  const isUsed = lines.some((line) => regex.test(line));
887
887
  if (!isUsed) {
@@ -939,6 +939,10 @@ function formatParamsAsYaml(params) {
939
939
  }
940
940
  return lines.join("\n");
941
941
  }
942
+ function formatTomlInlineTable(obj) {
943
+ const entries = Object.entries(obj).map(([k, v]) => `${k} = ${formatTomlValue(v)}`);
944
+ return `{ ${entries.join(", ")} }`;
945
+ }
942
946
  function formatTomlValue(value) {
943
947
  if (typeof value === "string") {
944
948
  if (value.includes("\n")) {
@@ -956,10 +960,17 @@ ${delimiter}`;
956
960
  return '""';
957
961
  }
958
962
  if (Array.isArray(value)) {
959
- return JSON.stringify(value);
963
+ if (value.length === 0) return "[]";
964
+ const items = value.map((item) => {
965
+ if (typeof item === "object" && item !== null && !Array.isArray(item)) {
966
+ return formatTomlInlineTable(item);
967
+ }
968
+ return formatTomlValue(item);
969
+ });
970
+ return `[${items.join(", ")}]`;
960
971
  }
961
972
  if (typeof value === "object") {
962
- return JSON.stringify(value);
973
+ return formatTomlInlineTable(value);
963
974
  }
964
975
  return JSON.stringify(value);
965
976
  }
@@ -977,7 +988,16 @@ var init_gadget = __esm({
977
988
  yaml = __toESM(require("js-yaml"), 1);
978
989
  init_schema_to_json();
979
990
  init_schema_validator();
980
- HEREDOC_DELIMITERS = ["EOF", "END", "DOC", "CONTENT", "TEXT", "HEREDOC", "DATA", "BLOCK"];
991
+ HEREDOC_DELIMITERS = [
992
+ "__GADGET_PARAM_EOF__",
993
+ "__GADGET_PARAM_END__",
994
+ "__GADGET_PARAM_DOC__",
995
+ "__GADGET_PARAM_CONTENT__",
996
+ "__GADGET_PARAM_TEXT__",
997
+ "__GADGET_PARAM_HEREDOC__",
998
+ "__GADGET_PARAM_DATA__",
999
+ "__GADGET_PARAM_BLOCK__"
1000
+ ];
981
1001
  BaseGadget = class {
982
1002
  /**
983
1003
  * The name of the gadget. Used for identification when LLM calls it.
@@ -2005,6 +2025,14 @@ function preprocessTomlHeredoc(tomlStr) {
2005
2025
  }
2006
2026
  return result.join("\n");
2007
2027
  }
2028
+ function stripMarkdownFences(content) {
2029
+ let cleaned = content.trim();
2030
+ const openingFence = /^```(?:toml|yaml|json)?\s*\n/i;
2031
+ const closingFence = /\n?```\s*$/;
2032
+ cleaned = cleaned.replace(openingFence, "");
2033
+ cleaned = cleaned.replace(closingFence, "");
2034
+ return cleaned.trim();
2035
+ }
2008
2036
  var yaml2, import_js_toml, globalInvocationCounter, StreamParser;
2009
2037
  var init_parser = __esm({
2010
2038
  "src/gadgets/parser.ts"() {
@@ -2060,35 +2088,36 @@ var init_parser = __esm({
2060
2088
  * Parse parameter string according to configured format
2061
2089
  */
2062
2090
  parseParameters(raw) {
2091
+ const cleaned = stripMarkdownFences(raw);
2063
2092
  if (this.parameterFormat === "json") {
2064
2093
  try {
2065
- return { parameters: JSON.parse(raw) };
2094
+ return { parameters: JSON.parse(cleaned) };
2066
2095
  } catch (error) {
2067
2096
  return { parseError: this.truncateParseError(error, "JSON") };
2068
2097
  }
2069
2098
  }
2070
2099
  if (this.parameterFormat === "yaml") {
2071
2100
  try {
2072
- return { parameters: yaml2.load(preprocessYaml(raw)) };
2101
+ return { parameters: yaml2.load(preprocessYaml(cleaned)) };
2073
2102
  } catch (error) {
2074
2103
  return { parseError: this.truncateParseError(error, "YAML") };
2075
2104
  }
2076
2105
  }
2077
2106
  if (this.parameterFormat === "toml") {
2078
2107
  try {
2079
- return { parameters: (0, import_js_toml.load)(preprocessTomlHeredoc(raw)) };
2108
+ return { parameters: (0, import_js_toml.load)(preprocessTomlHeredoc(cleaned)) };
2080
2109
  } catch (error) {
2081
2110
  return { parseError: this.truncateParseError(error, "TOML") };
2082
2111
  }
2083
2112
  }
2084
2113
  try {
2085
- return { parameters: JSON.parse(raw) };
2114
+ return { parameters: JSON.parse(cleaned) };
2086
2115
  } catch {
2087
2116
  try {
2088
- return { parameters: (0, import_js_toml.load)(preprocessTomlHeredoc(raw)) };
2117
+ return { parameters: (0, import_js_toml.load)(preprocessTomlHeredoc(cleaned)) };
2089
2118
  } catch {
2090
2119
  try {
2091
- return { parameters: yaml2.load(preprocessYaml(raw)) };
2120
+ return { parameters: yaml2.load(preprocessYaml(cleaned)) };
2092
2121
  } catch (error) {
2093
2122
  return { parseError: this.truncateParseError(error, "auto") };
2094
2123
  }
@@ -2634,6 +2663,7 @@ var init_agent = __esm({
2634
2663
  gadgetEndPrefix;
2635
2664
  onHumanInputRequired;
2636
2665
  textOnlyHandler;
2666
+ textWithGadgetsHandler;
2637
2667
  stopOnGadgetError;
2638
2668
  shouldContinueAfterError;
2639
2669
  defaultGadgetTimeoutMs;
@@ -2664,6 +2694,7 @@ var init_agent = __esm({
2664
2694
  this.gadgetEndPrefix = options.gadgetEndPrefix;
2665
2695
  this.onHumanInputRequired = options.onHumanInputRequired;
2666
2696
  this.textOnlyHandler = options.textOnlyHandler ?? "terminate";
2697
+ this.textWithGadgetsHandler = options.textWithGadgetsHandler;
2667
2698
  this.stopOnGadgetError = options.stopOnGadgetError ?? true;
2668
2699
  this.shouldContinueAfterError = options.shouldContinueAfterError;
2669
2700
  this.defaultGadgetTimeoutMs = options.defaultGadgetTimeoutMs;
@@ -2851,6 +2882,17 @@ var init_agent = __esm({
2851
2882
  }
2852
2883
  }
2853
2884
  if (result.didExecuteGadgets) {
2885
+ if (this.textWithGadgetsHandler) {
2886
+ const textContent = result.outputs.filter((output) => output.type === "text").map((output) => output.content).join("");
2887
+ if (textContent.trim()) {
2888
+ const { gadgetName, parameterMapping, resultMapping } = this.textWithGadgetsHandler;
2889
+ this.conversation.addGadgetCall(
2890
+ gadgetName,
2891
+ parameterMapping(textContent),
2892
+ resultMapping ? resultMapping(textContent) : textContent
2893
+ );
2894
+ }
2895
+ }
2854
2896
  for (const output of result.outputs) {
2855
2897
  if (output.type === "gadget_result") {
2856
2898
  const gadgetResult = output.result;
@@ -2862,7 +2904,13 @@ var init_agent = __esm({
2862
2904
  }
2863
2905
  }
2864
2906
  } else {
2865
- this.conversation.addAssistantMessage(finalMessage);
2907
+ if (finalMessage.trim()) {
2908
+ this.conversation.addGadgetCall(
2909
+ "TellUser",
2910
+ { message: finalMessage, done: false, type: "info" },
2911
+ `\u2139\uFE0F ${finalMessage}`
2912
+ );
2913
+ }
2866
2914
  const shouldBreak = await this.handleTextOnlyResponse(finalMessage);
2867
2915
  if (shouldBreak) {
2868
2916
  break;
@@ -3057,7 +3105,8 @@ var init_anthropic_models = __esm({
3057
3105
  pricing: {
3058
3106
  input: 3,
3059
3107
  output: 15,
3060
- cachedInput: 0.3
3108
+ cachedInput: 0.3,
3109
+ cacheWriteInput: 3.75
3061
3110
  },
3062
3111
  knowledgeCutoff: "2025-01",
3063
3112
  features: {
@@ -3081,7 +3130,8 @@ var init_anthropic_models = __esm({
3081
3130
  pricing: {
3082
3131
  input: 1,
3083
3132
  output: 5,
3084
- cachedInput: 0.1
3133
+ cachedInput: 0.1,
3134
+ cacheWriteInput: 1.25
3085
3135
  },
3086
3136
  knowledgeCutoff: "2025-02",
3087
3137
  features: {
@@ -3105,7 +3155,8 @@ var init_anthropic_models = __esm({
3105
3155
  pricing: {
3106
3156
  input: 3,
3107
3157
  output: 15,
3108
- cachedInput: 0.3
3158
+ cachedInput: 0.3,
3159
+ cacheWriteInput: 3.75
3109
3160
  },
3110
3161
  knowledgeCutoff: "2025-03",
3111
3162
  features: {
@@ -3129,7 +3180,8 @@ var init_anthropic_models = __esm({
3129
3180
  pricing: {
3130
3181
  input: 3,
3131
3182
  output: 15,
3132
- cachedInput: 0.3
3183
+ cachedInput: 0.3,
3184
+ cacheWriteInput: 3.75
3133
3185
  },
3134
3186
  knowledgeCutoff: "2024-11",
3135
3187
  features: {
@@ -3153,7 +3205,8 @@ var init_anthropic_models = __esm({
3153
3205
  pricing: {
3154
3206
  input: 15,
3155
3207
  output: 75,
3156
- cachedInput: 1.5
3208
+ cachedInput: 1.5,
3209
+ cacheWriteInput: 18.75
3157
3210
  },
3158
3211
  knowledgeCutoff: "2025-01",
3159
3212
  features: {
@@ -3177,7 +3230,8 @@ var init_anthropic_models = __esm({
3177
3230
  pricing: {
3178
3231
  input: 15,
3179
3232
  output: 75,
3180
- cachedInput: 1.5
3233
+ cachedInput: 1.5,
3234
+ cacheWriteInput: 18.75
3181
3235
  },
3182
3236
  knowledgeCutoff: "2025-03",
3183
3237
  features: {
@@ -3200,7 +3254,8 @@ var init_anthropic_models = __esm({
3200
3254
  pricing: {
3201
3255
  input: 0.8,
3202
3256
  output: 4,
3203
- cachedInput: 0.08
3257
+ cachedInput: 0.08,
3258
+ cacheWriteInput: 1
3204
3259
  },
3205
3260
  knowledgeCutoff: "2024-07",
3206
3261
  features: {
@@ -3223,7 +3278,8 @@ var init_anthropic_models = __esm({
3223
3278
  pricing: {
3224
3279
  input: 0.25,
3225
3280
  output: 1.25,
3226
- cachedInput: 0.025
3281
+ cachedInput: 0.025,
3282
+ cacheWriteInput: 0.3125
3227
3283
  },
3228
3284
  knowledgeCutoff: "2023-08",
3229
3285
  features: {
@@ -3247,7 +3303,8 @@ var init_anthropic_models = __esm({
3247
3303
  pricing: {
3248
3304
  input: 1,
3249
3305
  output: 5,
3250
- cachedInput: 0.1
3306
+ cachedInput: 0.1,
3307
+ cacheWriteInput: 1.25
3251
3308
  },
3252
3309
  knowledgeCutoff: "2025-02",
3253
3310
  features: {
@@ -3271,7 +3328,8 @@ var init_anthropic_models = __esm({
3271
3328
  pricing: {
3272
3329
  input: 3,
3273
3330
  output: 15,
3274
- cachedInput: 0.3
3331
+ cachedInput: 0.3,
3332
+ cacheWriteInput: 3.75
3275
3333
  },
3276
3334
  knowledgeCutoff: "2025-01",
3277
3335
  features: {
@@ -3295,7 +3353,8 @@ var init_anthropic_models = __esm({
3295
3353
  pricing: {
3296
3354
  input: 5,
3297
3355
  output: 25,
3298
- cachedInput: 0.5
3356
+ cachedInput: 0.5,
3357
+ cacheWriteInput: 6.25
3299
3358
  },
3300
3359
  knowledgeCutoff: "2025-03",
3301
3360
  features: {
@@ -3410,15 +3469,27 @@ var init_anthropic = __esm({
3410
3469
  }
3411
3470
  buildRequestPayload(options, descriptor, spec, messages) {
3412
3471
  const systemMessages = messages.filter((message) => message.role === "system");
3413
- const system = systemMessages.length > 0 ? systemMessages.map((m) => m.content).join("\n\n") : void 0;
3414
- const conversation = messages.filter(
3472
+ const system = systemMessages.length > 0 ? systemMessages.map((m, index) => ({
3473
+ type: "text",
3474
+ text: m.content,
3475
+ // Add cache_control to the LAST system message block
3476
+ ...index === systemMessages.length - 1 ? { cache_control: { type: "ephemeral" } } : {}
3477
+ })) : void 0;
3478
+ const nonSystemMessages = messages.filter(
3415
3479
  (message) => message.role !== "system"
3416
- ).map((message) => ({
3480
+ );
3481
+ const lastUserIndex = nonSystemMessages.reduce(
3482
+ (lastIdx, msg, idx) => msg.role === "user" ? idx : lastIdx,
3483
+ -1
3484
+ );
3485
+ const conversation = nonSystemMessages.map((message, index) => ({
3417
3486
  role: message.role,
3418
3487
  content: [
3419
3488
  {
3420
3489
  type: "text",
3421
- text: message.content
3490
+ text: message.content,
3491
+ // Add cache_control to the LAST user message
3492
+ ...message.role === "user" && index === lastUserIndex ? { cache_control: { type: "ephemeral" } } : {}
3422
3493
  }
3423
3494
  ]
3424
3495
  }));
@@ -3444,15 +3515,22 @@ var init_anthropic = __esm({
3444
3515
  async *wrapStream(iterable) {
3445
3516
  const stream2 = iterable;
3446
3517
  let inputTokens = 0;
3518
+ let cachedInputTokens = 0;
3519
+ let cacheCreationInputTokens = 0;
3447
3520
  for await (const event of stream2) {
3448
3521
  if (event.type === "message_start") {
3449
- inputTokens = event.message.usage.input_tokens;
3522
+ const usage = event.message.usage;
3523
+ cachedInputTokens = usage.cache_read_input_tokens ?? 0;
3524
+ cacheCreationInputTokens = usage.cache_creation_input_tokens ?? 0;
3525
+ inputTokens = usage.input_tokens + cachedInputTokens + cacheCreationInputTokens;
3450
3526
  yield {
3451
3527
  text: "",
3452
3528
  usage: {
3453
3529
  inputTokens,
3454
3530
  outputTokens: 0,
3455
- totalTokens: inputTokens
3531
+ totalTokens: inputTokens,
3532
+ cachedInputTokens,
3533
+ cacheCreationInputTokens
3456
3534
  },
3457
3535
  rawEvent: event
3458
3536
  };
@@ -3466,7 +3544,9 @@ var init_anthropic = __esm({
3466
3544
  const usage = event.usage ? {
3467
3545
  inputTokens,
3468
3546
  outputTokens: event.usage.output_tokens,
3469
- totalTokens: inputTokens + event.usage.output_tokens
3547
+ totalTokens: inputTokens + event.usage.output_tokens,
3548
+ cachedInputTokens,
3549
+ cacheCreationInputTokens
3470
3550
  } : void 0;
3471
3551
  if (event.delta.stop_reason || usage) {
3472
3552
  yield {
@@ -3547,6 +3627,7 @@ var init_gemini_models = __esm({
3547
3627
  "src/providers/gemini-models.ts"() {
3548
3628
  "use strict";
3549
3629
  GEMINI_MODELS = [
3630
+ // Gemini 3 Pro (Preview)
3550
3631
  {
3551
3632
  provider: "gemini",
3552
3633
  modelId: "gemini-3-pro-preview",
@@ -3555,8 +3636,11 @@ var init_gemini_models = __esm({
3555
3636
  maxOutputTokens: 65536,
3556
3637
  pricing: {
3557
3638
  input: 2,
3639
+ // $2.00 for prompts <= 200k, $4.00 for > 200k (using lower tier)
3558
3640
  output: 12,
3641
+ // $12.00 for prompts <= 200k, $18.00 for > 200k
3559
3642
  cachedInput: 0.2
3643
+ // $0.20 for prompts <= 200k
3560
3644
  },
3561
3645
  knowledgeCutoff: "2025-01",
3562
3646
  features: {
@@ -3569,9 +3653,10 @@ var init_gemini_models = __esm({
3569
3653
  metadata: {
3570
3654
  family: "Gemini 3",
3571
3655
  releaseDate: "2025-11-18",
3572
- notes: "Most advanced model. 1501 Elo LMArena, 91.9% GPQA Diamond, 76.2% SWE-bench. Deep Think mode available."
3656
+ notes: "Best model for multimodal understanding, agentic and vibe-coding. Deep Think mode available."
3573
3657
  }
3574
3658
  },
3659
+ // Gemini 2.5 Pro
3575
3660
  {
3576
3661
  provider: "gemini",
3577
3662
  modelId: "gemini-2.5-pro",
@@ -3580,8 +3665,11 @@ var init_gemini_models = __esm({
3580
3665
  maxOutputTokens: 65536,
3581
3666
  pricing: {
3582
3667
  input: 1.25,
3668
+ // $1.25 for prompts <= 200k, $2.50 for > 200k
3583
3669
  output: 10,
3670
+ // $10.00 for prompts <= 200k, $15.00 for > 200k
3584
3671
  cachedInput: 0.125
3672
+ // $0.125 for prompts <= 200k
3585
3673
  },
3586
3674
  knowledgeCutoff: "2025-01",
3587
3675
  features: {
@@ -3594,9 +3682,10 @@ var init_gemini_models = __esm({
3594
3682
  metadata: {
3595
3683
  family: "Gemini 2.5",
3596
3684
  releaseDate: "2025-06",
3597
- notes: "Balanced multimodal model with 1M context. Best for complex agents and reasoning."
3685
+ notes: "State-of-the-art multipurpose model. Excels at coding and complex reasoning."
3598
3686
  }
3599
3687
  },
3688
+ // Gemini 2.5 Flash
3600
3689
  {
3601
3690
  provider: "gemini",
3602
3691
  modelId: "gemini-2.5-flash",
@@ -3605,8 +3694,10 @@ var init_gemini_models = __esm({
3605
3694
  maxOutputTokens: 65536,
3606
3695
  pricing: {
3607
3696
  input: 0.3,
3697
+ // $0.30 for text/image/video, $1.00 for audio
3608
3698
  output: 2.5,
3609
3699
  cachedInput: 0.03
3700
+ // $0.03 for text/image/video
3610
3701
  },
3611
3702
  knowledgeCutoff: "2025-01",
3612
3703
  features: {
@@ -3619,9 +3710,10 @@ var init_gemini_models = __esm({
3619
3710
  metadata: {
3620
3711
  family: "Gemini 2.5",
3621
3712
  releaseDate: "2025-06",
3622
- notes: "Best price-performance ratio with thinking enabled by default"
3713
+ notes: "First hybrid reasoning model with 1M context and thinking budgets."
3623
3714
  }
3624
3715
  },
3716
+ // Gemini 2.5 Flash-Lite
3625
3717
  {
3626
3718
  provider: "gemini",
3627
3719
  modelId: "gemini-2.5-flash-lite",
@@ -3630,8 +3722,10 @@ var init_gemini_models = __esm({
3630
3722
  maxOutputTokens: 65536,
3631
3723
  pricing: {
3632
3724
  input: 0.1,
3725
+ // $0.10 for text/image/video, $0.30 for audio
3633
3726
  output: 0.4,
3634
3727
  cachedInput: 0.01
3728
+ // $0.01 for text/image/video
3635
3729
  },
3636
3730
  knowledgeCutoff: "2025-01",
3637
3731
  features: {
@@ -3643,9 +3737,10 @@ var init_gemini_models = __esm({
3643
3737
  metadata: {
3644
3738
  family: "Gemini 2.5",
3645
3739
  releaseDate: "2025-06",
3646
- notes: "Fastest and most cost-efficient model for high-volume, low-latency tasks"
3740
+ notes: "Smallest and most cost effective model, built for at scale usage."
3647
3741
  }
3648
3742
  },
3743
+ // Gemini 2.0 Flash
3649
3744
  {
3650
3745
  provider: "gemini",
3651
3746
  modelId: "gemini-2.0-flash",
@@ -3654,8 +3749,10 @@ var init_gemini_models = __esm({
3654
3749
  maxOutputTokens: 8192,
3655
3750
  pricing: {
3656
3751
  input: 0.1,
3752
+ // $0.10 for text/image/video, $0.70 for audio
3657
3753
  output: 0.4,
3658
- cachedInput: 0.01
3754
+ cachedInput: 0.025
3755
+ // $0.025 for text/image/video
3659
3756
  },
3660
3757
  knowledgeCutoff: "2024-08",
3661
3758
  features: {
@@ -3666,9 +3763,10 @@ var init_gemini_models = __esm({
3666
3763
  },
3667
3764
  metadata: {
3668
3765
  family: "Gemini 2.0",
3669
- notes: "Previous generation with 1M context and multimodal capabilities"
3766
+ notes: "Balanced multimodal model with 1M context, built for the era of Agents."
3670
3767
  }
3671
3768
  },
3769
+ // Gemini 2.0 Flash-Lite
3672
3770
  {
3673
3771
  provider: "gemini",
3674
3772
  modelId: "gemini-2.0-flash-lite",
@@ -3677,8 +3775,8 @@ var init_gemini_models = __esm({
3677
3775
  maxOutputTokens: 8192,
3678
3776
  pricing: {
3679
3777
  input: 0.075,
3680
- output: 0.3,
3681
- cachedInput: 75e-4
3778
+ output: 0.3
3779
+ // No context caching available for 2.0-flash-lite
3682
3780
  },
3683
3781
  knowledgeCutoff: "2024-08",
3684
3782
  features: {
@@ -3689,7 +3787,7 @@ var init_gemini_models = __esm({
3689
3787
  },
3690
3788
  metadata: {
3691
3789
  family: "Gemini 2.0",
3692
- notes: "Lightweight previous generation model for cost-sensitive applications"
3790
+ notes: "Smallest and most cost effective 2.0 model for at scale usage."
3693
3791
  }
3694
3792
  }
3695
3793
  ];
@@ -3859,7 +3957,9 @@ var init_gemini = __esm({
3859
3957
  return {
3860
3958
  inputTokens: usageMetadata.promptTokenCount ?? 0,
3861
3959
  outputTokens: usageMetadata.candidatesTokenCount ?? 0,
3862
- totalTokens: usageMetadata.totalTokenCount ?? 0
3960
+ totalTokens: usageMetadata.totalTokenCount ?? 0,
3961
+ // Gemini returns cached token count in cachedContentTokenCount
3962
+ cachedInputTokens: usageMetadata.cachedContentTokenCount ?? 0
3863
3963
  };
3864
3964
  }
3865
3965
  /**
@@ -3915,10 +4015,11 @@ var init_openai_models = __esm({
3915
4015
  "src/providers/openai-models.ts"() {
3916
4016
  "use strict";
3917
4017
  OPENAI_MODELS = [
4018
+ // GPT-5 Family
3918
4019
  {
3919
4020
  provider: "openai",
3920
4021
  modelId: "gpt-5.1",
3921
- displayName: "GPT-5.1 Instant",
4022
+ displayName: "GPT-5.1",
3922
4023
  contextWindow: 128e3,
3923
4024
  maxOutputTokens: 32768,
3924
4025
  pricing: {
@@ -3938,34 +4039,7 @@ var init_openai_models = __esm({
3938
4039
  metadata: {
3939
4040
  family: "GPT-5",
3940
4041
  releaseDate: "2025-11-12",
3941
- notes: "Warmer, more intelligent, better instruction following. 2-3x faster than GPT-5.",
3942
- supportsTemperature: false
3943
- }
3944
- },
3945
- {
3946
- provider: "openai",
3947
- modelId: "gpt-5.1-thinking",
3948
- displayName: "GPT-5.1 Thinking",
3949
- contextWindow: 196e3,
3950
- maxOutputTokens: 32768,
3951
- pricing: {
3952
- input: 1.25,
3953
- output: 10,
3954
- cachedInput: 0.125
3955
- },
3956
- knowledgeCutoff: "2024-09-30",
3957
- features: {
3958
- streaming: true,
3959
- functionCalling: true,
3960
- vision: true,
3961
- reasoning: true,
3962
- structuredOutputs: true,
3963
- fineTuning: true
3964
- },
3965
- metadata: {
3966
- family: "GPT-5",
3967
- releaseDate: "2025-11-12",
3968
- notes: "Advanced reasoning with thinking levels: Light, Standard, Extended, Heavy. Best for complex tasks.",
4042
+ notes: "Latest GPT-5 with improved instruction following. 2-3x faster than GPT-5.",
3969
4043
  supportsTemperature: false
3970
4044
  }
3971
4045
  },
@@ -4045,6 +4119,255 @@ var init_openai_models = __esm({
4045
4119
  notes: "Fastest, most cost-efficient version for well-defined tasks",
4046
4120
  supportsTemperature: false
4047
4121
  }
4122
+ },
4123
+ {
4124
+ provider: "openai",
4125
+ modelId: "gpt-5-pro",
4126
+ displayName: "GPT-5 Pro",
4127
+ contextWindow: 272e3,
4128
+ maxOutputTokens: 128e3,
4129
+ pricing: {
4130
+ input: 15,
4131
+ output: 120
4132
+ // No cached input pricing for gpt-5-pro
4133
+ },
4134
+ knowledgeCutoff: "2024-09-30",
4135
+ features: {
4136
+ streaming: true,
4137
+ functionCalling: true,
4138
+ vision: true,
4139
+ reasoning: true,
4140
+ structuredOutputs: true
4141
+ },
4142
+ metadata: {
4143
+ family: "GPT-5",
4144
+ notes: "Premium tier with enhanced capabilities. Does not support prompt caching.",
4145
+ supportsTemperature: false
4146
+ }
4147
+ },
4148
+ // GPT-4.1 Family
4149
+ {
4150
+ provider: "openai",
4151
+ modelId: "gpt-4.1",
4152
+ displayName: "GPT-4.1",
4153
+ contextWindow: 128e3,
4154
+ maxOutputTokens: 32768,
4155
+ pricing: {
4156
+ input: 2,
4157
+ output: 8,
4158
+ cachedInput: 0.5
4159
+ },
4160
+ knowledgeCutoff: "2024-04-01",
4161
+ features: {
4162
+ streaming: true,
4163
+ functionCalling: true,
4164
+ vision: true,
4165
+ structuredOutputs: true,
4166
+ fineTuning: true
4167
+ },
4168
+ metadata: {
4169
+ family: "GPT-4.1",
4170
+ notes: "Improved GPT-4 with better instruction following"
4171
+ }
4172
+ },
4173
+ {
4174
+ provider: "openai",
4175
+ modelId: "gpt-4.1-mini",
4176
+ displayName: "GPT-4.1 Mini",
4177
+ contextWindow: 128e3,
4178
+ maxOutputTokens: 32768,
4179
+ pricing: {
4180
+ input: 0.4,
4181
+ output: 1.6,
4182
+ cachedInput: 0.1
4183
+ },
4184
+ knowledgeCutoff: "2024-04-01",
4185
+ features: {
4186
+ streaming: true,
4187
+ functionCalling: true,
4188
+ vision: true,
4189
+ structuredOutputs: true,
4190
+ fineTuning: true
4191
+ },
4192
+ metadata: {
4193
+ family: "GPT-4.1",
4194
+ notes: "Cost-efficient GPT-4.1 variant"
4195
+ }
4196
+ },
4197
+ {
4198
+ provider: "openai",
4199
+ modelId: "gpt-4.1-nano",
4200
+ displayName: "GPT-4.1 Nano",
4201
+ contextWindow: 128e3,
4202
+ maxOutputTokens: 32768,
4203
+ pricing: {
4204
+ input: 0.1,
4205
+ output: 0.4,
4206
+ cachedInput: 0.025
4207
+ },
4208
+ knowledgeCutoff: "2024-04-01",
4209
+ features: {
4210
+ streaming: true,
4211
+ functionCalling: true,
4212
+ vision: true,
4213
+ structuredOutputs: true,
4214
+ fineTuning: true
4215
+ },
4216
+ metadata: {
4217
+ family: "GPT-4.1",
4218
+ notes: "Fastest GPT-4.1 variant for simple tasks"
4219
+ }
4220
+ },
4221
+ // GPT-4o Family
4222
+ {
4223
+ provider: "openai",
4224
+ modelId: "gpt-4o",
4225
+ displayName: "GPT-4o",
4226
+ contextWindow: 128e3,
4227
+ maxOutputTokens: 16384,
4228
+ pricing: {
4229
+ input: 2.5,
4230
+ output: 10,
4231
+ cachedInput: 1.25
4232
+ },
4233
+ knowledgeCutoff: "2024-04-01",
4234
+ features: {
4235
+ streaming: true,
4236
+ functionCalling: true,
4237
+ vision: true,
4238
+ structuredOutputs: true,
4239
+ fineTuning: true
4240
+ },
4241
+ metadata: {
4242
+ family: "GPT-4o",
4243
+ notes: "Multimodal model optimized for speed"
4244
+ }
4245
+ },
4246
+ {
4247
+ provider: "openai",
4248
+ modelId: "gpt-4o-mini",
4249
+ displayName: "GPT-4o Mini",
4250
+ contextWindow: 128e3,
4251
+ maxOutputTokens: 16384,
4252
+ pricing: {
4253
+ input: 0.15,
4254
+ output: 0.6,
4255
+ cachedInput: 0.075
4256
+ },
4257
+ knowledgeCutoff: "2024-04-01",
4258
+ features: {
4259
+ streaming: true,
4260
+ functionCalling: true,
4261
+ vision: true,
4262
+ structuredOutputs: true,
4263
+ fineTuning: true
4264
+ },
4265
+ metadata: {
4266
+ family: "GPT-4o",
4267
+ notes: "Fast and affordable multimodal model"
4268
+ }
4269
+ },
4270
+ // o-series (Reasoning models)
4271
+ {
4272
+ provider: "openai",
4273
+ modelId: "o1",
4274
+ displayName: "o1",
4275
+ contextWindow: 2e5,
4276
+ maxOutputTokens: 1e5,
4277
+ pricing: {
4278
+ input: 15,
4279
+ output: 60,
4280
+ cachedInput: 7.5
4281
+ },
4282
+ knowledgeCutoff: "2024-12-01",
4283
+ features: {
4284
+ streaming: true,
4285
+ functionCalling: true,
4286
+ vision: true,
4287
+ reasoning: true,
4288
+ structuredOutputs: true
4289
+ },
4290
+ metadata: {
4291
+ family: "o-series",
4292
+ notes: "Advanced reasoning model with chain-of-thought",
4293
+ supportsTemperature: false
4294
+ }
4295
+ },
4296
+ {
4297
+ provider: "openai",
4298
+ modelId: "o3",
4299
+ displayName: "o3",
4300
+ contextWindow: 2e5,
4301
+ maxOutputTokens: 1e5,
4302
+ pricing: {
4303
+ input: 2,
4304
+ output: 8,
4305
+ cachedInput: 0.5
4306
+ },
4307
+ knowledgeCutoff: "2025-01-01",
4308
+ features: {
4309
+ streaming: true,
4310
+ functionCalling: true,
4311
+ vision: true,
4312
+ reasoning: true,
4313
+ structuredOutputs: true
4314
+ },
4315
+ metadata: {
4316
+ family: "o-series",
4317
+ notes: "Next-gen reasoning model, more efficient than o1",
4318
+ supportsTemperature: false
4319
+ }
4320
+ },
4321
+ {
4322
+ provider: "openai",
4323
+ modelId: "o4-mini",
4324
+ displayName: "o4 Mini",
4325
+ contextWindow: 2e5,
4326
+ maxOutputTokens: 1e5,
4327
+ pricing: {
4328
+ input: 1.1,
4329
+ output: 4.4,
4330
+ cachedInput: 0.275
4331
+ },
4332
+ knowledgeCutoff: "2025-04-01",
4333
+ features: {
4334
+ streaming: true,
4335
+ functionCalling: true,
4336
+ vision: true,
4337
+ reasoning: true,
4338
+ structuredOutputs: true,
4339
+ fineTuning: true
4340
+ },
4341
+ metadata: {
4342
+ family: "o-series",
4343
+ notes: "Cost-efficient reasoning model",
4344
+ supportsTemperature: false
4345
+ }
4346
+ },
4347
+ {
4348
+ provider: "openai",
4349
+ modelId: "o3-mini",
4350
+ displayName: "o3 Mini",
4351
+ contextWindow: 2e5,
4352
+ maxOutputTokens: 1e5,
4353
+ pricing: {
4354
+ input: 1.1,
4355
+ output: 4.4,
4356
+ cachedInput: 0.55
4357
+ },
4358
+ knowledgeCutoff: "2025-01-01",
4359
+ features: {
4360
+ streaming: true,
4361
+ functionCalling: true,
4362
+ vision: true,
4363
+ reasoning: true,
4364
+ structuredOutputs: true
4365
+ },
4366
+ metadata: {
4367
+ family: "o-series",
4368
+ notes: "Compact reasoning model for cost-sensitive applications",
4369
+ supportsTemperature: false
4370
+ }
4048
4371
  }
4049
4372
  ];
4050
4373
  }
@@ -4125,7 +4448,8 @@ var init_openai = __esm({
4125
4448
  const usage = chunk.usage ? {
4126
4449
  inputTokens: chunk.usage.prompt_tokens,
4127
4450
  outputTokens: chunk.usage.completion_tokens,
4128
- totalTokens: chunk.usage.total_tokens
4451
+ totalTokens: chunk.usage.total_tokens,
4452
+ cachedInputTokens: chunk.usage.prompt_tokens_details?.cached_tokens ?? 0
4129
4453
  } : void 0;
4130
4454
  if (finishReason || usage) {
4131
4455
  yield { text: "", finishReason, usage, rawEvent: chunk };
@@ -4342,20 +4666,28 @@ var init_model_registry = __esm({
4342
4666
  /**
4343
4667
  * Estimate API cost for a given model and token usage
4344
4668
  * @param modelId - Full model identifier
4345
- * @param inputTokens - Number of input tokens
4669
+ * @param inputTokens - Number of input tokens (total, including cached and cache creation)
4346
4670
  * @param outputTokens - Number of output tokens
4347
- * @param useCachedInput - Whether to use cached input pricing (if supported by provider)
4671
+ * @param cachedInputTokens - Number of cached input tokens (subset of inputTokens)
4672
+ * @param cacheCreationInputTokens - Number of cache creation tokens (subset of inputTokens, Anthropic only)
4348
4673
  * @returns CostEstimate if model found, undefined otherwise
4349
4674
  */
4350
- estimateCost(modelId, inputTokens, outputTokens, useCachedInput = false) {
4675
+ estimateCost(modelId, inputTokens, outputTokens, cachedInputTokens = 0, cacheCreationInputTokens = 0) {
4351
4676
  const spec = this.getModelSpec(modelId);
4352
4677
  if (!spec) return void 0;
4353
- const inputRate = useCachedInput && spec.pricing.cachedInput !== void 0 ? spec.pricing.cachedInput : spec.pricing.input;
4354
- const inputCost = inputTokens / 1e6 * inputRate;
4678
+ const cachedRate = spec.pricing.cachedInput ?? spec.pricing.input;
4679
+ const cacheWriteRate = spec.pricing.cacheWriteInput ?? spec.pricing.input;
4680
+ const uncachedInputTokens = inputTokens - cachedInputTokens - cacheCreationInputTokens;
4681
+ const uncachedInputCost = uncachedInputTokens / 1e6 * spec.pricing.input;
4682
+ const cachedInputCost = cachedInputTokens / 1e6 * cachedRate;
4683
+ const cacheCreationCost = cacheCreationInputTokens / 1e6 * cacheWriteRate;
4684
+ const inputCost = uncachedInputCost + cachedInputCost + cacheCreationCost;
4355
4685
  const outputCost = outputTokens / 1e6 * spec.pricing.output;
4356
4686
  const totalCost = inputCost + outputCost;
4357
4687
  return {
4358
4688
  inputCost,
4689
+ cachedInputCost,
4690
+ cacheCreationCost,
4359
4691
  outputCost,
4360
4692
  totalCost,
4361
4693
  currency: "USD"
@@ -4736,6 +5068,7 @@ var AgentBuilder;
4736
5068
  var init_builder = __esm({
4737
5069
  "src/agent/builder.ts"() {
4738
5070
  "use strict";
5071
+ init_constants();
4739
5072
  init_model_shortcuts();
4740
5073
  init_registry();
4741
5074
  init_agent();
@@ -4757,6 +5090,7 @@ var init_builder = __esm({
4757
5090
  gadgetStartPrefix;
4758
5091
  gadgetEndPrefix;
4759
5092
  textOnlyHandler;
5093
+ textWithGadgetsHandler;
4760
5094
  stopOnGadgetError;
4761
5095
  shouldContinueAfterError;
4762
5096
  defaultGadgetTimeoutMs;
@@ -5019,6 +5353,30 @@ var init_builder = __esm({
5019
5353
  this.textOnlyHandler = handler;
5020
5354
  return this;
5021
5355
  }
5356
+ /**
5357
+ * Set the handler for text content that appears alongside gadget calls.
5358
+ *
5359
+ * When set, text accompanying gadget responses will be wrapped as a
5360
+ * synthetic gadget call before the actual gadget results in the
5361
+ * conversation history.
5362
+ *
5363
+ * @param handler - Configuration for wrapping text
5364
+ * @returns This builder for chaining
5365
+ *
5366
+ * @example
5367
+ * ```typescript
5368
+ * // Wrap text as TellUser gadget
5369
+ * .withTextWithGadgetsHandler({
5370
+ * gadgetName: "TellUser",
5371
+ * parameterMapping: (text) => ({ message: text, done: false, type: "info" }),
5372
+ * resultMapping: (text) => `ℹ️ ${text}`,
5373
+ * })
5374
+ * ```
5375
+ */
5376
+ withTextWithGadgetsHandler(handler) {
5377
+ this.textWithGadgetsHandler = handler;
5378
+ return this;
5379
+ }
5022
5380
  /**
5023
5381
  * Set whether to stop gadget execution on first error.
5024
5382
  *
@@ -5133,6 +5491,69 @@ var init_builder = __esm({
5133
5491
  this.gadgetOutputLimitPercent = percent;
5134
5492
  return this;
5135
5493
  }
5494
+ /**
5495
+ * Add a synthetic gadget call to the conversation history.
5496
+ *
5497
+ * This is useful for in-context learning - showing the LLM what "past self"
5498
+ * did correctly so it mimics the pattern. The call is formatted with proper
5499
+ * markers and parameter format.
5500
+ *
5501
+ * @param gadgetName - Name of the gadget
5502
+ * @param parameters - Parameters passed to the gadget
5503
+ * @param result - Result returned by the gadget
5504
+ * @returns This builder for chaining
5505
+ *
5506
+ * @example
5507
+ * ```typescript
5508
+ * .withSyntheticGadgetCall(
5509
+ * 'TellUser',
5510
+ * {
5511
+ * message: '👋 Hello!\n\nHere\'s what I can do:\n- Analyze code\n- Run commands',
5512
+ * done: false,
5513
+ * type: 'info'
5514
+ * },
5515
+ * 'ℹ️ 👋 Hello!\n\nHere\'s what I can do:\n- Analyze code\n- Run commands'
5516
+ * )
5517
+ * ```
5518
+ */
5519
+ withSyntheticGadgetCall(gadgetName, parameters, result) {
5520
+ const startPrefix = this.gadgetStartPrefix ?? GADGET_START_PREFIX;
5521
+ const endPrefix = this.gadgetEndPrefix ?? GADGET_END_PREFIX;
5522
+ const format = this.parameterFormat ?? "yaml";
5523
+ const paramStr = this.formatSyntheticParameters(parameters, format);
5524
+ this.initialMessages.push({
5525
+ role: "assistant",
5526
+ content: `${startPrefix}${gadgetName}
5527
+ ${paramStr}
5528
+ ${endPrefix}`
5529
+ });
5530
+ this.initialMessages.push({
5531
+ role: "user",
5532
+ content: `Result: ${result}`
5533
+ });
5534
+ return this;
5535
+ }
5536
+ /**
5537
+ * Format parameters for synthetic gadget calls.
5538
+ * Uses heredoc for multiline string values.
5539
+ */
5540
+ formatSyntheticParameters(parameters, format) {
5541
+ if (format === "json" || format === "auto") {
5542
+ return JSON.stringify(parameters);
5543
+ }
5544
+ return Object.entries(parameters).map(([key, value]) => {
5545
+ if (typeof value === "string" && value.includes("\n")) {
5546
+ const separator = format === "yaml" ? ":" : " =";
5547
+ return `${key}${separator} <<<EOF
5548
+ ${value}
5549
+ EOF`;
5550
+ }
5551
+ if (format === "yaml") {
5552
+ return typeof value === "string" ? `${key}: ${value}` : `${key}: ${JSON.stringify(value)}`;
5553
+ }
5554
+ return `${key} = ${JSON.stringify(value)}`;
5555
+ }).join("\n");
5556
+ }
5136
5557
  /**
5137
5558
  * Build and create the agent with the given user prompt.
5138
5559
  * Returns the Agent instance ready to run.
@@ -5175,6 +5596,7 @@ var init_builder = __esm({
5175
5596
  gadgetStartPrefix: this.gadgetStartPrefix,
5176
5597
  gadgetEndPrefix: this.gadgetEndPrefix,
5177
5598
  textOnlyHandler: this.textOnlyHandler,
5599
+ textWithGadgetsHandler: this.textWithGadgetsHandler,
5178
5600
  stopOnGadgetError: this.stopOnGadgetError,
5179
5601
  shouldContinueAfterError: this.shouldContinueAfterError,
5180
5602
  defaultGadgetTimeoutMs: this.defaultGadgetTimeoutMs,
@@ -5276,6 +5698,7 @@ var init_builder = __esm({
5276
5698
  gadgetStartPrefix: this.gadgetStartPrefix,
5277
5699
  gadgetEndPrefix: this.gadgetEndPrefix,
5278
5700
  textOnlyHandler: this.textOnlyHandler,
5701
+ textWithGadgetsHandler: this.textWithGadgetsHandler,
5279
5702
  stopOnGadgetError: this.stopOnGadgetError,
5280
5703
  shouldContinueAfterError: this.shouldContinueAfterError,
5281
5704
  defaultGadgetTimeoutMs: this.defaultGadgetTimeoutMs,