llmist 15.12.0 → 15.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -229,7 +229,8 @@ var init_execution_tree = __esm({
229
229
  response: llmNode.response,
230
230
  usage: llmNode.usage,
231
231
  finishReason: llmNode.finishReason,
232
- cost: llmNode.cost
232
+ cost: llmNode.cost,
233
+ thinkingContent: params.thinkingContent
233
234
  });
234
235
  }
235
236
  /**
@@ -4529,7 +4530,10 @@ var init_hook_presets = __esm({
4529
4530
  const costEstimate = modelRegistry.estimateCost(
4530
4531
  modelName,
4531
4532
  ctx.usage.inputTokens,
4532
- ctx.usage.outputTokens
4533
+ ctx.usage.outputTokens,
4534
+ ctx.usage.cachedInputTokens ?? 0,
4535
+ ctx.usage.cacheCreationInputTokens ?? 0,
4536
+ ctx.usage.reasoningTokens ?? 0
4533
4537
  );
4534
4538
  if (costEstimate) {
4535
4539
  totalCost += costEstimate.totalCost;
@@ -5026,10 +5030,10 @@ var init_anthropic_models = __esm({
5026
5030
  contextWindow: 2e5,
5027
5031
  maxOutputTokens: 64e3,
5028
5032
  pricing: {
5029
- input: 0.8,
5030
- output: 4,
5031
- cachedInput: 0.08,
5032
- cacheWriteInput: 1
5033
+ input: 1,
5034
+ output: 5,
5035
+ cachedInput: 0.1,
5036
+ cacheWriteInput: 1.25
5033
5037
  },
5034
5038
  knowledgeCutoff: "2025-02",
5035
5039
  features: {
@@ -5225,10 +5229,10 @@ var init_anthropic_models = __esm({
5225
5229
  contextWindow: 2e5,
5226
5230
  maxOutputTokens: 64e3,
5227
5231
  pricing: {
5228
- input: 0.8,
5229
- output: 4,
5230
- cachedInput: 0.08,
5231
- cacheWriteInput: 1
5232
+ input: 1,
5233
+ output: 5,
5234
+ cachedInput: 0.1,
5235
+ cacheWriteInput: 1.25
5232
5236
  },
5233
5237
  knowledgeCutoff: "2025-02",
5234
5238
  features: {
@@ -5371,10 +5375,15 @@ var init_utils = __esm({
5371
5375
  });
5372
5376
 
5373
5377
  // src/providers/anthropic.ts
5378
+ function resolveAnthropicThinking(reasoning) {
5379
+ if (!reasoning?.enabled) return void 0;
5380
+ const budget = reasoning.budgetTokens ? Math.max(1024, reasoning.budgetTokens) : ANTHROPIC_EFFORT_BUDGET[reasoning.effort ?? "medium"];
5381
+ return { type: "enabled", budget_tokens: budget };
5382
+ }
5374
5383
  function createAnthropicProviderFromEnv() {
5375
5384
  return createProviderFromEnv("ANTHROPIC_API_KEY", import_sdk.default, AnthropicMessagesProvider);
5376
5385
  }
5377
- var import_sdk, AnthropicMessagesProvider;
5386
+ var import_sdk, ANTHROPIC_EFFORT_BUDGET, AnthropicMessagesProvider;
5378
5387
  var init_anthropic = __esm({
5379
5388
  "src/providers/anthropic.ts"() {
5380
5389
  "use strict";
@@ -5384,6 +5393,14 @@ var init_anthropic = __esm({
5384
5393
  init_base_provider();
5385
5394
  init_constants2();
5386
5395
  init_utils();
5396
+ ANTHROPIC_EFFORT_BUDGET = {
5397
+ none: 1024,
5398
+ // Minimum allowed by Anthropic
5399
+ low: 2048,
5400
+ medium: 8192,
5401
+ high: 16384,
5402
+ maximum: 32768
5403
+ };
5387
5404
  AnthropicMessagesProvider = class extends BaseProviderAdapter {
5388
5405
  providerId = "anthropic";
5389
5406
  supports(descriptor) {
@@ -5437,15 +5454,18 @@ var init_anthropic = __esm({
5437
5454
  )
5438
5455
  }));
5439
5456
  const defaultMaxTokens = spec?.maxOutputTokens ?? ANTHROPIC_DEFAULT_MAX_OUTPUT_TOKENS;
5457
+ const thinking = resolveAnthropicThinking(options.reasoning);
5458
+ const temperature = thinking ? void 0 : options.temperature;
5440
5459
  const payload = {
5441
5460
  model: descriptor.name,
5442
5461
  system,
5443
5462
  messages: conversation,
5444
5463
  max_tokens: options.maxTokens ?? defaultMaxTokens,
5445
- temperature: options.temperature,
5464
+ temperature,
5446
5465
  top_p: options.topP,
5447
5466
  stop_sequences: options.stopSequences,
5448
5467
  stream: true,
5468
+ ...thinking ? { thinking } : {},
5449
5469
  ...options.extra
5450
5470
  };
5451
5471
  return payload;
@@ -5525,8 +5545,39 @@ var init_anthropic = __esm({
5525
5545
  };
5526
5546
  continue;
5527
5547
  }
5528
- if (event.type === "content_block_delta" && event.delta.type === "text_delta") {
5529
- yield { text: event.delta.text ?? "", rawEvent: event };
5548
+ if (event.type === "content_block_start") {
5549
+ const block = event.content_block;
5550
+ if (block.type === "thinking") {
5551
+ yield { text: "", thinking: { content: "", type: "thinking" }, rawEvent: event };
5552
+ continue;
5553
+ }
5554
+ if (block.type === "redacted_thinking") {
5555
+ yield { text: "", thinking: { content: "", type: "redacted" }, rawEvent: event };
5556
+ continue;
5557
+ }
5558
+ }
5559
+ if (event.type === "content_block_delta") {
5560
+ const delta = event.delta;
5561
+ if (delta.type === "thinking_delta" && delta.thinking) {
5562
+ yield {
5563
+ text: "",
5564
+ thinking: { content: delta.thinking, type: "thinking" },
5565
+ rawEvent: event
5566
+ };
5567
+ continue;
5568
+ }
5569
+ if (delta.type === "signature_delta" && delta.signature) {
5570
+ yield {
5571
+ text: "",
5572
+ thinking: { content: "", type: "thinking", signature: delta.signature },
5573
+ rawEvent: event
5574
+ };
5575
+ continue;
5576
+ }
5577
+ if (delta.type === "text_delta") {
5578
+ yield { text: delta.text ?? "", rawEvent: event };
5579
+ continue;
5580
+ }
5530
5581
  continue;
5531
5582
  }
5532
5583
  if (event.type === "message_delta") {
@@ -5835,10 +5886,10 @@ var init_gemini_models = __esm({
5835
5886
  contextWindow: 1048576,
5836
5887
  maxOutputTokens: 65536,
5837
5888
  pricing: {
5838
- input: 0.4,
5839
- // $0.40 for text/image/video
5889
+ input: 0.5,
5890
+ // $0.50 for text/image/video
5840
5891
  output: 3,
5841
- cachedInput: 0.04
5892
+ cachedInput: 0.05
5842
5893
  },
5843
5894
  knowledgeCutoff: "2025-01",
5844
5895
  features: {
@@ -6132,6 +6183,23 @@ var init_gemini_speech_models = __esm({
6132
6183
  });
6133
6184
 
6134
6185
  // src/providers/gemini.ts
6186
+ function resolveGeminiThinkingConfig(reasoning, modelName) {
6187
+ if (!reasoning?.enabled) return void 0;
6188
+ const isGemini3 = modelName.includes("gemini-3");
6189
+ if (isGemini3) {
6190
+ return {
6191
+ thinkingConfig: {
6192
+ thinkingLevel: GEMINI3_THINKING_LEVEL[reasoning.effort ?? "medium"]
6193
+ }
6194
+ };
6195
+ }
6196
+ const budget = reasoning.budgetTokens ?? GEMINI25_THINKING_BUDGET[reasoning.effort ?? "medium"];
6197
+ return {
6198
+ thinkingConfig: {
6199
+ thinkingBudget: budget
6200
+ }
6201
+ };
6202
+ }
6135
6203
  function wrapPcmInWav(pcmData, sampleRate, bitsPerSample, numChannels) {
6136
6204
  const byteRate = sampleRate * numChannels * bitsPerSample / 8;
6137
6205
  const blockAlign = numChannels * bitsPerSample / 8;
@@ -6160,7 +6228,7 @@ function wrapPcmInWav(pcmData, sampleRate, bitsPerSample, numChannels) {
6160
6228
  function createGeminiProviderFromEnv() {
6161
6229
  return createProviderFromEnv("GEMINI_API_KEY", import_genai.GoogleGenAI, GeminiGenerativeProvider);
6162
6230
  }
6163
- var import_genai, GEMINI_ROLE_MAP, GeminiGenerativeProvider;
6231
+ var import_genai, GEMINI3_THINKING_LEVEL, GEMINI25_THINKING_BUDGET, GEMINI_ROLE_MAP, GeminiGenerativeProvider;
6164
6232
  var init_gemini = __esm({
6165
6233
  "src/providers/gemini.ts"() {
6166
6234
  "use strict";
@@ -6172,6 +6240,20 @@ var init_gemini = __esm({
6172
6240
  init_gemini_models();
6173
6241
  init_gemini_speech_models();
6174
6242
  init_utils();
6243
+ GEMINI3_THINKING_LEVEL = {
6244
+ none: "minimal",
6245
+ low: "low",
6246
+ medium: "medium",
6247
+ high: "high",
6248
+ maximum: "high"
6249
+ };
6250
+ GEMINI25_THINKING_BUDGET = {
6251
+ none: 0,
6252
+ low: 2048,
6253
+ medium: 8192,
6254
+ high: 16384,
6255
+ maximum: 24576
6256
+ };
6175
6257
  GEMINI_ROLE_MAP = {
6176
6258
  system: "user",
6177
6259
  user: "user",
@@ -6321,6 +6403,7 @@ var init_gemini = __esm({
6321
6403
  buildApiRequest(options, descriptor, _spec, messages) {
6322
6404
  const contents = this.convertMessagesToContents(messages);
6323
6405
  const generationConfig = this.buildGenerationConfig(options);
6406
+ const thinkingConfig = resolveGeminiThinkingConfig(options.reasoning, descriptor.name);
6324
6407
  const config = {
6325
6408
  // Note: systemInstruction removed - it doesn't work with countTokens()
6326
6409
  // System messages are now included in contents as user+model exchanges
@@ -6331,6 +6414,7 @@ var init_gemini = __esm({
6331
6414
  mode: import_genai.FunctionCallingConfigMode.NONE
6332
6415
  }
6333
6416
  },
6417
+ ...thinkingConfig ?? {},
6334
6418
  ...options.extra
6335
6419
  };
6336
6420
  return {
@@ -6468,7 +6552,18 @@ var init_gemini = __esm({
6468
6552
  async *normalizeProviderStream(iterable) {
6469
6553
  const stream2 = iterable;
6470
6554
  for await (const chunk of stream2) {
6471
- const text3 = this.extractMessageText(chunk);
6555
+ const { text: text3, thinkingText, thinkingSignature } = this.extractTextAndThinking(chunk);
6556
+ if (thinkingText) {
6557
+ yield {
6558
+ text: "",
6559
+ thinking: {
6560
+ content: thinkingText,
6561
+ type: "thinking",
6562
+ signature: thinkingSignature
6563
+ },
6564
+ rawEvent: chunk
6565
+ };
6566
+ }
6472
6567
  if (text3) {
6473
6568
  yield { text: text3, rawEvent: chunk };
6474
6569
  }
@@ -6479,11 +6574,30 @@ var init_gemini = __esm({
6479
6574
  }
6480
6575
  }
6481
6576
  }
6482
- extractMessageText(chunk) {
6577
+ /**
6578
+ * Extract both regular text and thinking text from a chunk.
6579
+ * Gemini marks thinking parts with `thought: true`.
6580
+ */
6581
+ extractTextAndThinking(chunk) {
6483
6582
  if (!chunk?.candidates) {
6484
- return "";
6583
+ return { text: "", thinkingText: "" };
6584
+ }
6585
+ let text3 = "";
6586
+ let thinkingText = "";
6587
+ let thinkingSignature;
6588
+ for (const candidate of chunk.candidates) {
6589
+ for (const part of candidate.content?.parts ?? []) {
6590
+ if (part.thought) {
6591
+ thinkingText += part.text ?? "";
6592
+ if (part.thoughtSignature) {
6593
+ thinkingSignature = part.thoughtSignature;
6594
+ }
6595
+ } else {
6596
+ text3 += part.text ?? "";
6597
+ }
6598
+ }
6485
6599
  }
6486
- return chunk.candidates.flatMap((candidate) => candidate.content?.parts ?? []).map((part) => part.text ?? "").join("");
6600
+ return { text: text3, thinkingText, thinkingSignature };
6487
6601
  }
6488
6602
  extractFinishReason(chunk) {
6489
6603
  const candidate = chunk?.candidates?.find((item) => item.finishReason);
@@ -6499,7 +6613,9 @@ var init_gemini = __esm({
6499
6613
  outputTokens: usageMetadata.candidatesTokenCount ?? 0,
6500
6614
  totalTokens: usageMetadata.totalTokenCount ?? 0,
6501
6615
  // Gemini returns cached token count in cachedContentTokenCount
6502
- cachedInputTokens: usageMetadata.cachedContentTokenCount ?? 0
6616
+ cachedInputTokens: usageMetadata.cachedContentTokenCount ?? 0,
6617
+ // Gemini returns thinking tokens in thoughtsTokenCount
6618
+ reasoningTokens: usageMetadata.thoughtsTokenCount
6503
6619
  };
6504
6620
  }
6505
6621
  /**
@@ -7520,11 +7636,13 @@ var init_openai_compatible_provider = __esm({
7520
7636
  yield { text: text3, rawEvent: chunk };
7521
7637
  }
7522
7638
  const finishReason = chunk.choices.find((choice) => choice.finish_reason)?.finish_reason;
7639
+ const usageDetails = chunk.usage;
7523
7640
  const usage = chunk.usage ? {
7524
7641
  inputTokens: chunk.usage.prompt_tokens,
7525
7642
  outputTokens: chunk.usage.completion_tokens,
7526
7643
  totalTokens: chunk.usage.total_tokens,
7527
- cachedInputTokens: 0
7644
+ cachedInputTokens: 0,
7645
+ reasoningTokens: usageDetails?.completion_tokens_details?.reasoning_tokens
7528
7646
  } : void 0;
7529
7647
  if (finishReason || usage) {
7530
7648
  yield { text: "", finishReason, usage, rawEvent: chunk };
@@ -7600,6 +7718,21 @@ var init_huggingface = __esm({
7600
7718
  getModelSpecs() {
7601
7719
  return HUGGINGFACE_MODELS;
7602
7720
  }
7721
+ /**
7722
+ * Override buildApiRequest to inject DeepSeek-specific thinking parameters.
7723
+ * DeepSeek models use `extra_body: { thinking: { type: "enabled" } }` for reasoning.
7724
+ */
7725
+ buildApiRequest(options, descriptor, spec, messages) {
7726
+ const request = super.buildApiRequest(options, descriptor, spec, messages);
7727
+ if (options.reasoning?.enabled && descriptor.name.toLowerCase().includes("deepseek")) {
7728
+ const requestObj = request;
7729
+ requestObj.extra_body = {
7730
+ ...requestObj.extra_body,
7731
+ thinking: { type: "enabled" }
7732
+ };
7733
+ }
7734
+ return request;
7735
+ }
7603
7736
  /**
7604
7737
  * Enhance error messages with HuggingFace-specific guidance.
7605
7738
  */
@@ -8485,7 +8618,7 @@ function sanitizeExtra(extra, allowTemperature) {
8485
8618
  function createOpenAIProviderFromEnv() {
8486
8619
  return createProviderFromEnv("OPENAI_API_KEY", import_openai3.default, OpenAIChatProvider);
8487
8620
  }
8488
- var import_openai3, import_tiktoken, ROLE_MAP2, OpenAIChatProvider;
8621
+ var import_openai3, import_tiktoken, ROLE_MAP2, OPENAI_EFFORT_MAP, OpenAIChatProvider;
8489
8622
  var init_openai = __esm({
8490
8623
  "src/providers/openai.ts"() {
8491
8624
  "use strict";
@@ -8503,6 +8636,13 @@ var init_openai = __esm({
8503
8636
  user: "user",
8504
8637
  assistant: "assistant"
8505
8638
  };
8639
+ OPENAI_EFFORT_MAP = {
8640
+ none: "none",
8641
+ low: "low",
8642
+ medium: "medium",
8643
+ high: "high",
8644
+ maximum: "xhigh"
8645
+ };
8506
8646
  OpenAIChatProvider = class extends BaseProviderAdapter {
8507
8647
  providerId = "openai";
8508
8648
  supports(descriptor) {
@@ -8593,10 +8733,15 @@ var init_openai = __esm({
8593
8733
  };
8594
8734
  }
8595
8735
  buildApiRequest(options, descriptor, spec, messages) {
8596
- const { maxTokens, temperature, topP, stopSequences, extra } = options;
8736
+ const { maxTokens, temperature, topP, stopSequences, extra, reasoning } = options;
8597
8737
  const supportsTemperature = spec?.metadata?.supportsTemperature !== false;
8598
8738
  const shouldIncludeTemperature = typeof temperature === "number" && supportsTemperature;
8599
8739
  const sanitizedExtra = sanitizeExtra(extra, shouldIncludeTemperature);
8740
+ const reasoningParam = reasoning?.enabled !== void 0 ? {
8741
+ reasoning: {
8742
+ effort: OPENAI_EFFORT_MAP[reasoning.effort ?? "medium"]
8743
+ }
8744
+ } : {};
8600
8745
  return {
8601
8746
  model: descriptor.name,
8602
8747
  messages: messages.map((message) => this.convertToOpenAIMessage(message)),
@@ -8607,6 +8752,7 @@ var init_openai = __esm({
8607
8752
  stop: stopSequences,
8608
8753
  stream: true,
8609
8754
  stream_options: { include_usage: true },
8755
+ ...reasoningParam,
8610
8756
  ...sanitizedExtra ?? {},
8611
8757
  ...shouldIncludeTemperature ? { temperature } : {}
8612
8758
  };
@@ -8695,11 +8841,13 @@ var init_openai = __esm({
8695
8841
  yield { text: text3, rawEvent: chunk };
8696
8842
  }
8697
8843
  const finishReason = chunk.choices.find((choice) => choice.finish_reason)?.finish_reason;
8844
+ const usageDetails = chunk.usage;
8698
8845
  const usage = chunk.usage ? {
8699
8846
  inputTokens: chunk.usage.prompt_tokens,
8700
8847
  outputTokens: chunk.usage.completion_tokens,
8701
8848
  totalTokens: chunk.usage.total_tokens,
8702
- cachedInputTokens: chunk.usage.prompt_tokens_details?.cached_tokens ?? 0
8849
+ cachedInputTokens: usageDetails?.prompt_tokens_details?.cached_tokens ?? 0,
8850
+ reasoningTokens: usageDetails?.completion_tokens_details?.reasoning_tokens
8703
8851
  } : void 0;
8704
8852
  if (finishReason || usage) {
8705
8853
  yield { text: "", finishReason, usage, rawEvent: chunk };
@@ -9234,7 +9382,7 @@ function createOpenRouterProviderFromEnv() {
9234
9382
  });
9235
9383
  return new OpenRouterProvider(client, config);
9236
9384
  }
9237
- var import_openai4, OpenRouterProvider;
9385
+ var import_openai4, OPENROUTER_EFFORT_MAP, OpenRouterProvider;
9238
9386
  var init_openrouter = __esm({
9239
9387
  "src/providers/openrouter.ts"() {
9240
9388
  "use strict";
@@ -9242,6 +9390,13 @@ var init_openrouter = __esm({
9242
9390
  init_openai_compatible_provider();
9243
9391
  init_openrouter_models();
9244
9392
  init_utils();
9393
+ OPENROUTER_EFFORT_MAP = {
9394
+ none: "none",
9395
+ low: "low",
9396
+ medium: "medium",
9397
+ high: "high",
9398
+ maximum: "xhigh"
9399
+ };
9245
9400
  OpenRouterProvider = class extends OpenAICompatibleProvider {
9246
9401
  providerId = "openrouter";
9247
9402
  providerAlias = "or";
@@ -9251,6 +9406,20 @@ var init_openrouter = __esm({
9251
9406
  getModelSpecs() {
9252
9407
  return OPENROUTER_MODELS;
9253
9408
  }
9409
+ /**
9410
+ * Override buildApiRequest to inject reasoning parameters.
9411
+ * OpenRouter normalizes reasoning into the standard OpenAI format.
9412
+ */
9413
+ buildApiRequest(options, descriptor, spec, messages) {
9414
+ const request = super.buildApiRequest(options, descriptor, spec, messages);
9415
+ if (options.reasoning?.enabled !== void 0) {
9416
+ const requestObj = request;
9417
+ requestObj.reasoning = {
9418
+ effort: OPENROUTER_EFFORT_MAP[options.reasoning.effort ?? "medium"]
9419
+ };
9420
+ }
9421
+ return request;
9422
+ }
9254
9423
  /**
9255
9424
  * Get custom headers for OpenRouter analytics.
9256
9425
  */
@@ -9488,9 +9657,10 @@ var init_model_registry = __esm({
9488
9657
  * @param outputTokens - Number of output tokens
9489
9658
  * @param cachedInputTokens - Number of cached input tokens (subset of inputTokens)
9490
9659
  * @param cacheCreationInputTokens - Number of cache creation tokens (subset of inputTokens, Anthropic only)
9660
+ * @param reasoningTokens - Number of reasoning/thinking tokens (subset of outputTokens)
9491
9661
  * @returns CostEstimate if model found, undefined otherwise
9492
9662
  */
9493
- estimateCost(modelId, inputTokens, outputTokens, cachedInputTokens = 0, cacheCreationInputTokens = 0) {
9663
+ estimateCost(modelId, inputTokens, outputTokens, cachedInputTokens = 0, cacheCreationInputTokens = 0, reasoningTokens = 0) {
9494
9664
  const spec = this.getModelSpec(modelId);
9495
9665
  if (!spec) return void 0;
9496
9666
  const cachedRate = spec.pricing.cachedInput ?? spec.pricing.input;
@@ -9500,13 +9670,18 @@ var init_model_registry = __esm({
9500
9670
  const cachedInputCost = cachedInputTokens / 1e6 * cachedRate;
9501
9671
  const cacheCreationCost = cacheCreationInputTokens / 1e6 * cacheWriteRate;
9502
9672
  const inputCost = uncachedInputCost + cachedInputCost + cacheCreationCost;
9503
- const outputCost = outputTokens / 1e6 * spec.pricing.output;
9673
+ const reasoningRate = spec.pricing.reasoningOutput ?? spec.pricing.output;
9674
+ const nonReasoningOutputTokens = outputTokens - reasoningTokens;
9675
+ const reasoningCost = reasoningTokens / 1e6 * reasoningRate;
9676
+ const nonReasoningOutputCost = nonReasoningOutputTokens / 1e6 * spec.pricing.output;
9677
+ const outputCost = nonReasoningOutputCost + reasoningCost;
9504
9678
  const totalCost = inputCost + outputCost;
9505
9679
  return {
9506
9680
  inputCost,
9507
9681
  cachedInputCost,
9508
9682
  cacheCreationCost,
9509
9683
  outputCost,
9684
+ reasoningCost,
9510
9685
  totalCost,
9511
9686
  currency: "USD"
9512
9687
  };
@@ -10221,6 +10396,7 @@ var init_builder = __esm({
10221
10396
  // Shared retry config from parent for consistent backoff behavior
10222
10397
  // When a gadget calls withParentContext(ctx), this config is shared
10223
10398
  sharedRetryConfig;
10399
+ reasoningConfig;
10224
10400
  constructor(client) {
10225
10401
  this.client = client;
10226
10402
  }
@@ -10806,6 +10982,60 @@ var init_builder = __esm({
10806
10982
  this.signal = signal;
10807
10983
  return this;
10808
10984
  }
10985
+ /**
10986
+ * Enable reasoning/thinking mode for reasoning-capable models.
10987
+ *
10988
+ * Can be called with:
10989
+ * - No args: enables reasoning at "medium" effort
10990
+ * - A string effort level: `withReasoning("high")`
10991
+ * - A full config object: `withReasoning({ enabled: true, budgetTokens: 10000 })`
10992
+ *
10993
+ * @param config - Optional effort level or full reasoning config
10994
+ * @returns This builder for chaining
10995
+ *
10996
+ * @example
10997
+ * ```typescript
10998
+ * // Simple — medium effort
10999
+ * LLMist.createAgent()
11000
+ * .withModel("o3")
11001
+ * .withReasoning()
11002
+ * .ask("Solve this logic puzzle...");
11003
+ *
11004
+ * // Explicit effort level
11005
+ * LLMist.createAgent()
11006
+ * .withModel("anthropic:claude-4-opus")
11007
+ * .withReasoning("high")
11008
+ * .ask("Analyze this complex problem");
11009
+ *
11010
+ * // Full config with explicit token budget
11011
+ * LLMist.createAgent()
11012
+ * .withModel("anthropic:claude-4-opus")
11013
+ * .withReasoning({ enabled: true, budgetTokens: 16000 })
11014
+ * .ask("Step through this proof");
11015
+ * ```
11016
+ */
11017
+ withReasoning(config) {
11018
+ if (typeof config === "string") {
11019
+ this.reasoningConfig = { enabled: true, effort: config };
11020
+ } else if (config === void 0) {
11021
+ this.reasoningConfig = { enabled: true, effort: "medium" };
11022
+ } else {
11023
+ this.reasoningConfig = config;
11024
+ }
11025
+ return this;
11026
+ }
11027
+ /**
11028
+ * Explicitly disable reasoning for this agent, even if the model supports it.
11029
+ *
11030
+ * By default, reasoning is auto-enabled at "medium" effort for models with
11031
+ * `features.reasoning: true`. Use this to opt out.
11032
+ *
11033
+ * @returns This builder for chaining
11034
+ */
11035
+ withoutReasoning() {
11036
+ this.reasoningConfig = { enabled: false };
11037
+ return this;
11038
+ }
10809
11039
  /**
10810
11040
  * Set subagent configuration overrides.
10811
11041
  *
@@ -11091,6 +11321,7 @@ ${endPrefix}`
11091
11321
  retryConfig: this.retryConfig,
11092
11322
  rateLimitConfig: this.rateLimitConfig,
11093
11323
  signal: this.signal,
11324
+ reasoning: this.reasoningConfig,
11094
11325
  subagentConfig: this.subagentConfig,
11095
11326
  // Tree context for shared tree model (subagents share parent's tree)
11096
11327
  parentTree: this.parentContext?.tree,
@@ -11278,6 +11509,7 @@ ${endPrefix}`
11278
11509
  retryConfig: this.retryConfig,
11279
11510
  rateLimitConfig: this.rateLimitConfig,
11280
11511
  signal: this.signal,
11512
+ reasoning: this.reasoningConfig,
11281
11513
  subagentConfig: this.subagentConfig,
11282
11514
  // Tree context for shared tree model (subagents share parent's tree)
11283
11515
  parentTree: this.parentContext?.tree,
@@ -11732,6 +11964,7 @@ var init_cost_reporting_client = __esm({
11732
11964
  let outputTokens = 0;
11733
11965
  let cachedInputTokens = 0;
11734
11966
  let cacheCreationInputTokens = 0;
11967
+ let reasoningTokens = 0;
11735
11968
  const messages = [
11736
11969
  ...options?.systemPrompt ? [{ role: "system", content: options.systemPrompt }] : [],
11737
11970
  { role: "user", content: prompt }
@@ -11748,6 +11981,7 @@ var init_cost_reporting_client = __esm({
11748
11981
  outputTokens = chunk.usage.outputTokens;
11749
11982
  cachedInputTokens = chunk.usage.cachedInputTokens ?? 0;
11750
11983
  cacheCreationInputTokens = chunk.usage.cacheCreationInputTokens ?? 0;
11984
+ reasoningTokens = chunk.usage.reasoningTokens ?? 0;
11751
11985
  }
11752
11986
  }
11753
11987
  this.reportCostFromUsage(
@@ -11755,7 +11989,8 @@ var init_cost_reporting_client = __esm({
11755
11989
  inputTokens,
11756
11990
  outputTokens,
11757
11991
  cachedInputTokens,
11758
- cacheCreationInputTokens
11992
+ cacheCreationInputTokens,
11993
+ reasoningTokens
11759
11994
  );
11760
11995
  return result;
11761
11996
  }
@@ -11774,6 +12009,7 @@ var init_cost_reporting_client = __esm({
11774
12009
  let outputTokens = 0;
11775
12010
  let cachedInputTokens = 0;
11776
12011
  let cacheCreationInputTokens = 0;
12012
+ let reasoningTokens = 0;
11777
12013
  const messages = [
11778
12014
  ...options?.systemPrompt ? [{ role: "system", content: options.systemPrompt }] : [],
11779
12015
  { role: "user", content: prompt }
@@ -11793,6 +12029,7 @@ var init_cost_reporting_client = __esm({
11793
12029
  outputTokens = chunk.usage.outputTokens;
11794
12030
  cachedInputTokens = chunk.usage.cachedInputTokens ?? 0;
11795
12031
  cacheCreationInputTokens = chunk.usage.cacheCreationInputTokens ?? 0;
12032
+ reasoningTokens = chunk.usage.reasoningTokens ?? 0;
11796
12033
  }
11797
12034
  }
11798
12035
  } finally {
@@ -11801,7 +12038,8 @@ var init_cost_reporting_client = __esm({
11801
12038
  inputTokens,
11802
12039
  outputTokens,
11803
12040
  cachedInputTokens,
11804
- cacheCreationInputTokens
12041
+ cacheCreationInputTokens,
12042
+ reasoningTokens
11805
12043
  );
11806
12044
  }
11807
12045
  }
@@ -11828,6 +12066,7 @@ var init_cost_reporting_client = __esm({
11828
12066
  let outputTokens = 0;
11829
12067
  let cachedInputTokens = 0;
11830
12068
  let cacheCreationInputTokens = 0;
12069
+ let reasoningTokens = 0;
11831
12070
  try {
11832
12071
  for await (const chunk of innerStream) {
11833
12072
  if (chunk.usage) {
@@ -11835,6 +12074,7 @@ var init_cost_reporting_client = __esm({
11835
12074
  outputTokens = chunk.usage.outputTokens;
11836
12075
  cachedInputTokens = chunk.usage.cachedInputTokens ?? 0;
11837
12076
  cacheCreationInputTokens = chunk.usage.cacheCreationInputTokens ?? 0;
12077
+ reasoningTokens = chunk.usage.reasoningTokens ?? 0;
11838
12078
  }
11839
12079
  yield chunk;
11840
12080
  }
@@ -11845,7 +12085,8 @@ var init_cost_reporting_client = __esm({
11845
12085
  inputTokens,
11846
12086
  outputTokens,
11847
12087
  cachedInputTokens,
11848
- cacheCreationInputTokens
12088
+ cacheCreationInputTokens,
12089
+ reasoningTokens
11849
12090
  );
11850
12091
  }
11851
12092
  }
@@ -11855,14 +12096,15 @@ var init_cost_reporting_client = __esm({
11855
12096
  /**
11856
12097
  * Calculates and reports cost from token usage.
11857
12098
  */
11858
- reportCostFromUsage(model, inputTokens, outputTokens, cachedInputTokens = 0, cacheCreationInputTokens = 0) {
12099
+ reportCostFromUsage(model, inputTokens, outputTokens, cachedInputTokens = 0, cacheCreationInputTokens = 0, reasoningTokens = 0) {
11859
12100
  if (inputTokens === 0 && outputTokens === 0) return;
11860
12101
  const estimate = this.client.modelRegistry.estimateCost(
11861
12102
  model,
11862
12103
  inputTokens,
11863
12104
  outputTokens,
11864
12105
  cachedInputTokens,
11865
- cacheCreationInputTokens
12106
+ cacheCreationInputTokens,
12107
+ reasoningTokens
11866
12108
  );
11867
12109
  if (estimate && estimate.totalCost > 0) {
11868
12110
  this.reportCost(estimate.totalCost);
@@ -12954,9 +13196,18 @@ var init_stream_processor = __esm({
12954
13196
  let usage;
12955
13197
  let didExecuteGadgets = false;
12956
13198
  let shouldBreakLoop = false;
13199
+ let thinkingContent = "";
12957
13200
  for await (const chunk of stream2) {
12958
13201
  if (chunk.finishReason) finishReason = chunk.finishReason;
12959
13202
  if (chunk.usage) usage = chunk.usage;
13203
+ if (chunk.thinking?.content) {
13204
+ thinkingContent += chunk.thinking.content;
13205
+ yield {
13206
+ type: "thinking",
13207
+ content: chunk.thinking.content,
13208
+ thinkingType: chunk.thinking.type
13209
+ };
13210
+ }
12960
13211
  let processedChunk = "";
12961
13212
  if (chunk.text) {
12962
13213
  processedChunk = chunk.text;
@@ -13070,7 +13321,8 @@ var init_stream_processor = __esm({
13070
13321
  finishReason,
13071
13322
  usage,
13072
13323
  rawResponse: this.responseText,
13073
- finalMessage
13324
+ finalMessage,
13325
+ thinkingContent: thinkingContent || void 0
13074
13326
  };
13075
13327
  yield completionEvent;
13076
13328
  }
@@ -13872,6 +14124,7 @@ var init_agent = __esm({
13872
14124
  mediaStore;
13873
14125
  // Cancellation
13874
14126
  signal;
14127
+ reasoning;
13875
14128
  // Retry configuration
13876
14129
  retryConfig;
13877
14130
  // Rate limit tracker for proactive throttling
@@ -13963,6 +14216,7 @@ var init_agent = __esm({
13963
14216
  );
13964
14217
  }
13965
14218
  this.signal = options.signal;
14219
+ this.reasoning = options.reasoning;
13966
14220
  this.retryConfig = options.sharedRetryConfig ?? resolveRetryConfig(options.retryConfig);
13967
14221
  if (options.sharedRateLimitTracker) {
13968
14222
  this.rateLimitTracker = options.sharedRateLimitTracker;
@@ -14365,6 +14619,7 @@ var init_agent = __esm({
14365
14619
  usage: result.usage,
14366
14620
  rawResponse: result.rawResponse,
14367
14621
  finalMessage: result.finalMessage,
14622
+ thinkingContent: result.thinkingContent,
14368
14623
  logger: this.logger,
14369
14624
  subagentContext
14370
14625
  };
@@ -14665,17 +14920,34 @@ var init_agent = __esm({
14665
14920
  });
14666
14921
  return { type: "compaction", event: compactionEvent };
14667
14922
  }
14923
+ /**
14924
+ * Resolve reasoning configuration with auto-enable logic.
14925
+ *
14926
+ * Priority: explicit config > auto-enable for reasoning models > undefined
14927
+ * When a model has `features.reasoning: true` and no explicit config is set,
14928
+ * reasoning is automatically enabled at "medium" effort.
14929
+ */
14930
+ resolveReasoningConfig(spec) {
14931
+ if (this.reasoning !== void 0) return this.reasoning;
14932
+ if (spec?.features?.reasoning) {
14933
+ return { enabled: true, effort: "medium" };
14934
+ }
14935
+ return void 0;
14936
+ }
14668
14937
  /**
14669
14938
  * Prepare LLM call options, create tree node, and process beforeLLMCall controller.
14670
14939
  * @returns options, node ID, and optional skipWithSynthetic response if controller wants to skip
14671
14940
  */
14672
14941
  async prepareLLMCall(iteration) {
14942
+ const spec = this.client.modelRegistry?.getModelSpec?.(this.model);
14943
+ const reasoning = this.resolveReasoningConfig(spec);
14673
14944
  let llmOptions = {
14674
14945
  model: this.model,
14675
14946
  messages: this.conversation.getMessages(),
14676
14947
  temperature: this.temperature,
14677
14948
  maxTokens: this.defaultMaxTokens,
14678
- signal: this.signal
14949
+ signal: this.signal,
14950
+ reasoning
14679
14951
  };
14680
14952
  const llmNode = this.tree.addLLMCall({
14681
14953
  iteration,
@@ -14745,13 +15017,15 @@ var init_agent = __esm({
14745
15017
  inputTokens,
14746
15018
  outputTokens,
14747
15019
  result.usage?.cachedInputTokens ?? 0,
14748
- result.usage?.cacheCreationInputTokens ?? 0
15020
+ result.usage?.cacheCreationInputTokens ?? 0,
15021
+ result.usage?.reasoningTokens ?? 0
14749
15022
  )?.totalCost;
14750
15023
  this.tree.completeLLMCall(nodeId, {
14751
15024
  response: result.rawResponse,
14752
15025
  usage: result.usage,
14753
15026
  finishReason: result.finishReason,
14754
- cost: llmCost
15027
+ cost: llmCost,
15028
+ thinkingContent: result.thinkingContent
14755
15029
  });
14756
15030
  }
14757
15031
  /**