npm - llmist - Versions diffs - 15.12.0 → 15.13.0 - Mend

llmist 15.12.0 → 15.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/index.cjs CHANGED Viewed

@@ -229,7 +229,8 @@ var init_execution_tree = __esm({
           response: llmNode.response,
           usage: llmNode.usage,
           finishReason: llmNode.finishReason,
-          cost: llmNode.cost
+          cost: llmNode.cost,
+          thinkingContent: params.thinkingContent
         });
       }
       /**
@@ -4529,7 +4530,10 @@ var init_hook_presets = __esm({
                     const costEstimate = modelRegistry.estimateCost(
                       modelName,
                       ctx.usage.inputTokens,
-                      ctx.usage.outputTokens
+                      ctx.usage.outputTokens,
+                      ctx.usage.cachedInputTokens ?? 0,
+                      ctx.usage.cacheCreationInputTokens ?? 0,
+                      ctx.usage.reasoningTokens ?? 0
                     );
                     if (costEstimate) {
                       totalCost += costEstimate.totalCost;
@@ -5026,10 +5030,10 @@ var init_anthropic_models = __esm({
         contextWindow: 2e5,
         maxOutputTokens: 64e3,
         pricing: {
-          input: 0.8,
-          output: 4,
-          cachedInput: 0.08,
-          cacheWriteInput: 1
+          input: 1,
+          output: 5,
+          cachedInput: 0.1,
+          cacheWriteInput: 1.25
         },
         knowledgeCutoff: "2025-02",
         features: {
@@ -5225,10 +5229,10 @@ var init_anthropic_models = __esm({
         contextWindow: 2e5,
         maxOutputTokens: 64e3,
         pricing: {
-          input: 0.8,
-          output: 4,
-          cachedInput: 0.08,
-          cacheWriteInput: 1
+          input: 1,
+          output: 5,
+          cachedInput: 0.1,
+          cacheWriteInput: 1.25
         },
         knowledgeCutoff: "2025-02",
         features: {
@@ -5371,10 +5375,15 @@ var init_utils = __esm({
 });
 // src/providers/anthropic.ts
+function resolveAnthropicThinking(reasoning) {
+  if (!reasoning?.enabled) return void 0;
+  const budget = reasoning.budgetTokens ? Math.max(1024, reasoning.budgetTokens) : ANTHROPIC_EFFORT_BUDGET[reasoning.effort ?? "medium"];
+  return { type: "enabled", budget_tokens: budget };
+}
 function createAnthropicProviderFromEnv() {
   return createProviderFromEnv("ANTHROPIC_API_KEY", import_sdk.default, AnthropicMessagesProvider);
 }
-var import_sdk, AnthropicMessagesProvider;
+var import_sdk, ANTHROPIC_EFFORT_BUDGET, AnthropicMessagesProvider;
 var init_anthropic = __esm({
   "src/providers/anthropic.ts"() {
     "use strict";
@@ -5384,6 +5393,14 @@ var init_anthropic = __esm({
     init_base_provider();
     init_constants2();
     init_utils();
+    ANTHROPIC_EFFORT_BUDGET = {
+      none: 1024,
+      // Minimum allowed by Anthropic
+      low: 2048,
+      medium: 8192,
+      high: 16384,
+      maximum: 32768
+    };
     AnthropicMessagesProvider = class extends BaseProviderAdapter {
       providerId = "anthropic";
       supports(descriptor) {
@@ -5437,15 +5454,18 @@ var init_anthropic = __esm({
           )
         }));
         const defaultMaxTokens = spec?.maxOutputTokens ?? ANTHROPIC_DEFAULT_MAX_OUTPUT_TOKENS;
+        const thinking = resolveAnthropicThinking(options.reasoning);
+        const temperature = thinking ? void 0 : options.temperature;
         const payload = {
           model: descriptor.name,
           system,
           messages: conversation,
           max_tokens: options.maxTokens ?? defaultMaxTokens,
-          temperature: options.temperature,
+          temperature,
           top_p: options.topP,
           stop_sequences: options.stopSequences,
           stream: true,
+          ...thinking ? { thinking } : {},
           ...options.extra
         };
         return payload;
@@ -5525,8 +5545,39 @@ var init_anthropic = __esm({
             };
             continue;
           }
-          if (event.type === "content_block_delta" && event.delta.type === "text_delta") {
-            yield { text: event.delta.text ?? "", rawEvent: event };
+          if (event.type === "content_block_start") {
+            const block = event.content_block;
+            if (block.type === "thinking") {
+              yield { text: "", thinking: { content: "", type: "thinking" }, rawEvent: event };
+              continue;
+            }
+            if (block.type === "redacted_thinking") {
+              yield { text: "", thinking: { content: "", type: "redacted" }, rawEvent: event };
+              continue;
+            }
+          }
+          if (event.type === "content_block_delta") {
+            const delta = event.delta;
+            if (delta.type === "thinking_delta" && delta.thinking) {
+              yield {
+                text: "",
+                thinking: { content: delta.thinking, type: "thinking" },
+                rawEvent: event
+              };
+              continue;
+            }
+            if (delta.type === "signature_delta" && delta.signature) {
+              yield {
+                text: "",
+                thinking: { content: "", type: "thinking", signature: delta.signature },
+                rawEvent: event
+              };
+              continue;
+            }
+            if (delta.type === "text_delta") {
+              yield { text: delta.text ?? "", rawEvent: event };
+              continue;
+            }
             continue;
           }
           if (event.type === "message_delta") {
@@ -5835,10 +5886,10 @@ var init_gemini_models = __esm({
         contextWindow: 1048576,
         maxOutputTokens: 65536,
         pricing: {
-          input: 0.4,
-          // $0.40 for text/image/video
+          input: 0.5,
+          // $0.50 for text/image/video
           output: 3,
-          cachedInput: 0.04
+          cachedInput: 0.05
         },
         knowledgeCutoff: "2025-01",
         features: {
@@ -6132,6 +6183,23 @@ var init_gemini_speech_models = __esm({
 });
 // src/providers/gemini.ts
+function resolveGeminiThinkingConfig(reasoning, modelName) {
+  if (!reasoning?.enabled) return void 0;
+  const isGemini3 = modelName.includes("gemini-3");
+  if (isGemini3) {
+    return {
+      thinkingConfig: {
+        thinkingLevel: GEMINI3_THINKING_LEVEL[reasoning.effort ?? "medium"]
+      }
+    };
+  }
+  const budget = reasoning.budgetTokens ?? GEMINI25_THINKING_BUDGET[reasoning.effort ?? "medium"];
+  return {
+    thinkingConfig: {
+      thinkingBudget: budget
+    }
+  };
+}
 function wrapPcmInWav(pcmData, sampleRate, bitsPerSample, numChannels) {
   const byteRate = sampleRate * numChannels * bitsPerSample / 8;
   const blockAlign = numChannels * bitsPerSample / 8;
@@ -6160,7 +6228,7 @@ function wrapPcmInWav(pcmData, sampleRate, bitsPerSample, numChannels) {
 function createGeminiProviderFromEnv() {
   return createProviderFromEnv("GEMINI_API_KEY", import_genai.GoogleGenAI, GeminiGenerativeProvider);
 }
-var import_genai, GEMINI_ROLE_MAP, GeminiGenerativeProvider;
+var import_genai, GEMINI3_THINKING_LEVEL, GEMINI25_THINKING_BUDGET, GEMINI_ROLE_MAP, GeminiGenerativeProvider;
 var init_gemini = __esm({
   "src/providers/gemini.ts"() {
     "use strict";
@@ -6172,6 +6240,20 @@ var init_gemini = __esm({
     init_gemini_models();
     init_gemini_speech_models();
     init_utils();
+    GEMINI3_THINKING_LEVEL = {
+      none: "minimal",
+      low: "low",
+      medium: "medium",
+      high: "high",
+      maximum: "high"
+    };
+    GEMINI25_THINKING_BUDGET = {
+      none: 0,
+      low: 2048,
+      medium: 8192,
+      high: 16384,
+      maximum: 24576
+    };
     GEMINI_ROLE_MAP = {
       system: "user",
       user: "user",
@@ -6321,6 +6403,7 @@ var init_gemini = __esm({
       buildApiRequest(options, descriptor, _spec, messages) {
         const contents = this.convertMessagesToContents(messages);
         const generationConfig = this.buildGenerationConfig(options);
+        const thinkingConfig = resolveGeminiThinkingConfig(options.reasoning, descriptor.name);
         const config = {
           // Note: systemInstruction removed - it doesn't work with countTokens()
           // System messages are now included in contents as user+model exchanges
@@ -6331,6 +6414,7 @@ var init_gemini = __esm({
               mode: import_genai.FunctionCallingConfigMode.NONE
             }
           },
+          ...thinkingConfig ?? {},
           ...options.extra
         };
         return {
@@ -6468,7 +6552,18 @@ var init_gemini = __esm({
       async *normalizeProviderStream(iterable) {
         const stream2 = iterable;
         for await (const chunk of stream2) {
-          const text3 = this.extractMessageText(chunk);
+          const { text: text3, thinkingText, thinkingSignature } = this.extractTextAndThinking(chunk);
+          if (thinkingText) {
+            yield {
+              text: "",
+              thinking: {
+                content: thinkingText,
+                type: "thinking",
+                signature: thinkingSignature
+              },
+              rawEvent: chunk
+            };
+          }
           if (text3) {
             yield { text: text3, rawEvent: chunk };
           }
@@ -6479,11 +6574,30 @@ var init_gemini = __esm({
           }
         }
       }
-      extractMessageText(chunk) {
+      /**
+       * Extract both regular text and thinking text from a chunk.
+       * Gemini marks thinking parts with `thought: true`.
+       */
+      extractTextAndThinking(chunk) {
         if (!chunk?.candidates) {
-          return "";
+          return { text: "", thinkingText: "" };
+        }
+        let text3 = "";
+        let thinkingText = "";
+        let thinkingSignature;
+        for (const candidate of chunk.candidates) {
+          for (const part of candidate.content?.parts ?? []) {
+            if (part.thought) {
+              thinkingText += part.text ?? "";
+              if (part.thoughtSignature) {
+                thinkingSignature = part.thoughtSignature;
+              }
+            } else {
+              text3 += part.text ?? "";
+            }
+          }
         }
-        return chunk.candidates.flatMap((candidate) => candidate.content?.parts ?? []).map((part) => part.text ?? "").join("");
+        return { text: text3, thinkingText, thinkingSignature };
       }
       extractFinishReason(chunk) {
         const candidate = chunk?.candidates?.find((item) => item.finishReason);
@@ -6499,7 +6613,9 @@ var init_gemini = __esm({
           outputTokens: usageMetadata.candidatesTokenCount ?? 0,
           totalTokens: usageMetadata.totalTokenCount ?? 0,
           // Gemini returns cached token count in cachedContentTokenCount
-          cachedInputTokens: usageMetadata.cachedContentTokenCount ?? 0
+          cachedInputTokens: usageMetadata.cachedContentTokenCount ?? 0,
+          // Gemini returns thinking tokens in thoughtsTokenCount
+          reasoningTokens: usageMetadata.thoughtsTokenCount
         };
       }
       /**
@@ -7520,11 +7636,13 @@ var init_openai_compatible_provider = __esm({
             yield { text: text3, rawEvent: chunk };
           }
           const finishReason = chunk.choices.find((choice) => choice.finish_reason)?.finish_reason;
+          const usageDetails = chunk.usage;
           const usage = chunk.usage ? {
             inputTokens: chunk.usage.prompt_tokens,
             outputTokens: chunk.usage.completion_tokens,
             totalTokens: chunk.usage.total_tokens,
-            cachedInputTokens: 0
+            cachedInputTokens: 0,
+            reasoningTokens: usageDetails?.completion_tokens_details?.reasoning_tokens
           } : void 0;
           if (finishReason || usage) {
             yield { text: "", finishReason, usage, rawEvent: chunk };
@@ -7600,6 +7718,21 @@ var init_huggingface = __esm({
       getModelSpecs() {
         return HUGGINGFACE_MODELS;
       }
+      /**
+       * Override buildApiRequest to inject DeepSeek-specific thinking parameters.
+       * DeepSeek models use `extra_body: { thinking: { type: "enabled" } }` for reasoning.
+       */
+      buildApiRequest(options, descriptor, spec, messages) {
+        const request = super.buildApiRequest(options, descriptor, spec, messages);
+        if (options.reasoning?.enabled && descriptor.name.toLowerCase().includes("deepseek")) {
+          const requestObj = request;
+          requestObj.extra_body = {
+            ...requestObj.extra_body,
+            thinking: { type: "enabled" }
+          };
+        }
+        return request;
+      }
       /**
        * Enhance error messages with HuggingFace-specific guidance.
        */
@@ -8485,7 +8618,7 @@ function sanitizeExtra(extra, allowTemperature) {
 function createOpenAIProviderFromEnv() {
   return createProviderFromEnv("OPENAI_API_KEY", import_openai3.default, OpenAIChatProvider);
 }
-var import_openai3, import_tiktoken, ROLE_MAP2, OpenAIChatProvider;
+var import_openai3, import_tiktoken, ROLE_MAP2, OPENAI_EFFORT_MAP, OpenAIChatProvider;
 var init_openai = __esm({
   "src/providers/openai.ts"() {
     "use strict";
@@ -8503,6 +8636,13 @@ var init_openai = __esm({
       user: "user",
       assistant: "assistant"
     };
+    OPENAI_EFFORT_MAP = {
+      none: "none",
+      low: "low",
+      medium: "medium",
+      high: "high",
+      maximum: "xhigh"
+    };
     OpenAIChatProvider = class extends BaseProviderAdapter {
       providerId = "openai";
       supports(descriptor) {
@@ -8593,10 +8733,15 @@ var init_openai = __esm({
         };
       }
       buildApiRequest(options, descriptor, spec, messages) {
-        const { maxTokens, temperature, topP, stopSequences, extra } = options;
+        const { maxTokens, temperature, topP, stopSequences, extra, reasoning } = options;
         const supportsTemperature = spec?.metadata?.supportsTemperature !== false;
         const shouldIncludeTemperature = typeof temperature === "number" && supportsTemperature;
         const sanitizedExtra = sanitizeExtra(extra, shouldIncludeTemperature);
+        const reasoningParam = reasoning?.enabled !== void 0 ? {
+          reasoning: {
+            effort: OPENAI_EFFORT_MAP[reasoning.effort ?? "medium"]
+          }
+        } : {};
         return {
           model: descriptor.name,
           messages: messages.map((message) => this.convertToOpenAIMessage(message)),
@@ -8607,6 +8752,7 @@ var init_openai = __esm({
           stop: stopSequences,
           stream: true,
           stream_options: { include_usage: true },
+          ...reasoningParam,
           ...sanitizedExtra ?? {},
           ...shouldIncludeTemperature ? { temperature } : {}
         };
@@ -8695,11 +8841,13 @@ var init_openai = __esm({
             yield { text: text3, rawEvent: chunk };
           }
           const finishReason = chunk.choices.find((choice) => choice.finish_reason)?.finish_reason;
+          const usageDetails = chunk.usage;
           const usage = chunk.usage ? {
             inputTokens: chunk.usage.prompt_tokens,
             outputTokens: chunk.usage.completion_tokens,
             totalTokens: chunk.usage.total_tokens,
-            cachedInputTokens: chunk.usage.prompt_tokens_details?.cached_tokens ?? 0
+            cachedInputTokens: usageDetails?.prompt_tokens_details?.cached_tokens ?? 0,
+            reasoningTokens: usageDetails?.completion_tokens_details?.reasoning_tokens
           } : void 0;
           if (finishReason || usage) {
             yield { text: "", finishReason, usage, rawEvent: chunk };
@@ -9234,7 +9382,7 @@ function createOpenRouterProviderFromEnv() {
   });
   return new OpenRouterProvider(client, config);
 }
-var import_openai4, OpenRouterProvider;
+var import_openai4, OPENROUTER_EFFORT_MAP, OpenRouterProvider;
 var init_openrouter = __esm({
   "src/providers/openrouter.ts"() {
     "use strict";
@@ -9242,6 +9390,13 @@ var init_openrouter = __esm({
     init_openai_compatible_provider();
     init_openrouter_models();
     init_utils();
+    OPENROUTER_EFFORT_MAP = {
+      none: "none",
+      low: "low",
+      medium: "medium",
+      high: "high",
+      maximum: "xhigh"
+    };
     OpenRouterProvider = class extends OpenAICompatibleProvider {
       providerId = "openrouter";
       providerAlias = "or";
@@ -9251,6 +9406,20 @@ var init_openrouter = __esm({
       getModelSpecs() {
         return OPENROUTER_MODELS;
       }
+      /**
+       * Override buildApiRequest to inject reasoning parameters.
+       * OpenRouter normalizes reasoning into the standard OpenAI format.
+       */
+      buildApiRequest(options, descriptor, spec, messages) {
+        const request = super.buildApiRequest(options, descriptor, spec, messages);
+        if (options.reasoning?.enabled !== void 0) {
+          const requestObj = request;
+          requestObj.reasoning = {
+            effort: OPENROUTER_EFFORT_MAP[options.reasoning.effort ?? "medium"]
+          };
+        }
+        return request;
+      }
       /**
        * Get custom headers for OpenRouter analytics.
        */
@@ -9488,9 +9657,10 @@ var init_model_registry = __esm({
        * @param outputTokens - Number of output tokens
        * @param cachedInputTokens - Number of cached input tokens (subset of inputTokens)
        * @param cacheCreationInputTokens - Number of cache creation tokens (subset of inputTokens, Anthropic only)
+       * @param reasoningTokens - Number of reasoning/thinking tokens (subset of outputTokens)
        * @returns CostEstimate if model found, undefined otherwise
        */
-      estimateCost(modelId, inputTokens, outputTokens, cachedInputTokens = 0, cacheCreationInputTokens = 0) {
+      estimateCost(modelId, inputTokens, outputTokens, cachedInputTokens = 0, cacheCreationInputTokens = 0, reasoningTokens = 0) {
         const spec = this.getModelSpec(modelId);
         if (!spec) return void 0;
         const cachedRate = spec.pricing.cachedInput ?? spec.pricing.input;
@@ -9500,13 +9670,18 @@ var init_model_registry = __esm({
         const cachedInputCost = cachedInputTokens / 1e6 * cachedRate;
         const cacheCreationCost = cacheCreationInputTokens / 1e6 * cacheWriteRate;
         const inputCost = uncachedInputCost + cachedInputCost + cacheCreationCost;
-        const outputCost = outputTokens / 1e6 * spec.pricing.output;
+        const reasoningRate = spec.pricing.reasoningOutput ?? spec.pricing.output;
+        const nonReasoningOutputTokens = outputTokens - reasoningTokens;
+        const reasoningCost = reasoningTokens / 1e6 * reasoningRate;
+        const nonReasoningOutputCost = nonReasoningOutputTokens / 1e6 * spec.pricing.output;
+        const outputCost = nonReasoningOutputCost + reasoningCost;
         const totalCost = inputCost + outputCost;
         return {
           inputCost,
           cachedInputCost,
           cacheCreationCost,
           outputCost,
+          reasoningCost,
           totalCost,
           currency: "USD"
         };
@@ -10221,6 +10396,7 @@ var init_builder = __esm({
       // Shared retry config from parent for consistent backoff behavior
       // When a gadget calls withParentContext(ctx), this config is shared
       sharedRetryConfig;
+      reasoningConfig;
       constructor(client) {
         this.client = client;
       }
@@ -10806,6 +10982,60 @@ var init_builder = __esm({
         this.signal = signal;
         return this;
       }
+      /**
+       * Enable reasoning/thinking mode for reasoning-capable models.
+       *
+       * Can be called with:
+       * - No args: enables reasoning at "medium" effort
+       * - A string effort level: `withReasoning("high")`
+       * - A full config object: `withReasoning({ enabled: true, budgetTokens: 10000 })`
+       *
+       * @param config - Optional effort level or full reasoning config
+       * @returns This builder for chaining
+       *
+       * @example
+       * ```typescript
+       * // Simple — medium effort
+       * LLMist.createAgent()
+       *   .withModel("o3")
+       *   .withReasoning()
+       *   .ask("Solve this logic puzzle...");
+       *
+       * // Explicit effort level
+       * LLMist.createAgent()
+       *   .withModel("anthropic:claude-4-opus")
+       *   .withReasoning("high")
+       *   .ask("Analyze this complex problem");
+       *
+       * // Full config with explicit token budget
+       * LLMist.createAgent()
+       *   .withModel("anthropic:claude-4-opus")
+       *   .withReasoning({ enabled: true, budgetTokens: 16000 })
+       *   .ask("Step through this proof");
+       * ```
+       */
+      withReasoning(config) {
+        if (typeof config === "string") {
+          this.reasoningConfig = { enabled: true, effort: config };
+        } else if (config === void 0) {
+          this.reasoningConfig = { enabled: true, effort: "medium" };
+        } else {
+          this.reasoningConfig = config;
+        }
+        return this;
+      }
+      /**
+       * Explicitly disable reasoning for this agent, even if the model supports it.
+       *
+       * By default, reasoning is auto-enabled at "medium" effort for models with
+       * `features.reasoning: true`. Use this to opt out.
+       *
+       * @returns This builder for chaining
+       */
+      withoutReasoning() {
+        this.reasoningConfig = { enabled: false };
+        return this;
+      }
       /**
        * Set subagent configuration overrides.
        *
@@ -11091,6 +11321,7 @@ ${endPrefix}`
           retryConfig: this.retryConfig,
           rateLimitConfig: this.rateLimitConfig,
           signal: this.signal,
+          reasoning: this.reasoningConfig,
           subagentConfig: this.subagentConfig,
           // Tree context for shared tree model (subagents share parent's tree)
           parentTree: this.parentContext?.tree,
@@ -11278,6 +11509,7 @@ ${endPrefix}`
           retryConfig: this.retryConfig,
           rateLimitConfig: this.rateLimitConfig,
           signal: this.signal,
+          reasoning: this.reasoningConfig,
           subagentConfig: this.subagentConfig,
           // Tree context for shared tree model (subagents share parent's tree)
           parentTree: this.parentContext?.tree,
@@ -11732,6 +11964,7 @@ var init_cost_reporting_client = __esm({
         let outputTokens = 0;
         let cachedInputTokens = 0;
         let cacheCreationInputTokens = 0;
+        let reasoningTokens = 0;
         const messages = [
           ...options?.systemPrompt ? [{ role: "system", content: options.systemPrompt }] : [],
           { role: "user", content: prompt }
@@ -11748,6 +11981,7 @@ var init_cost_reporting_client = __esm({
             outputTokens = chunk.usage.outputTokens;
             cachedInputTokens = chunk.usage.cachedInputTokens ?? 0;
             cacheCreationInputTokens = chunk.usage.cacheCreationInputTokens ?? 0;
+            reasoningTokens = chunk.usage.reasoningTokens ?? 0;
           }
         }
         this.reportCostFromUsage(
@@ -11755,7 +11989,8 @@ var init_cost_reporting_client = __esm({
           inputTokens,
           outputTokens,
           cachedInputTokens,
-          cacheCreationInputTokens
+          cacheCreationInputTokens,
+          reasoningTokens
         );
         return result;
       }
@@ -11774,6 +12009,7 @@ var init_cost_reporting_client = __esm({
         let outputTokens = 0;
         let cachedInputTokens = 0;
         let cacheCreationInputTokens = 0;
+        let reasoningTokens = 0;
         const messages = [
           ...options?.systemPrompt ? [{ role: "system", content: options.systemPrompt }] : [],
           { role: "user", content: prompt }
@@ -11793,6 +12029,7 @@ var init_cost_reporting_client = __esm({
               outputTokens = chunk.usage.outputTokens;
               cachedInputTokens = chunk.usage.cachedInputTokens ?? 0;
               cacheCreationInputTokens = chunk.usage.cacheCreationInputTokens ?? 0;
+              reasoningTokens = chunk.usage.reasoningTokens ?? 0;
             }
           }
         } finally {
@@ -11801,7 +12038,8 @@ var init_cost_reporting_client = __esm({
             inputTokens,
             outputTokens,
             cachedInputTokens,
-            cacheCreationInputTokens
+            cacheCreationInputTokens,
+            reasoningTokens
           );
         }
       }
@@ -11828,6 +12066,7 @@ var init_cost_reporting_client = __esm({
           let outputTokens = 0;
           let cachedInputTokens = 0;
           let cacheCreationInputTokens = 0;
+          let reasoningTokens = 0;
           try {
             for await (const chunk of innerStream) {
               if (chunk.usage) {
@@ -11835,6 +12074,7 @@ var init_cost_reporting_client = __esm({
                 outputTokens = chunk.usage.outputTokens;
                 cachedInputTokens = chunk.usage.cachedInputTokens ?? 0;
                 cacheCreationInputTokens = chunk.usage.cacheCreationInputTokens ?? 0;
+                reasoningTokens = chunk.usage.reasoningTokens ?? 0;
               }
               yield chunk;
             }
@@ -11845,7 +12085,8 @@ var init_cost_reporting_client = __esm({
                 inputTokens,
                 outputTokens,
                 cachedInputTokens,
-                cacheCreationInputTokens
+                cacheCreationInputTokens,
+                reasoningTokens
               );
             }
           }
@@ -11855,14 +12096,15 @@ var init_cost_reporting_client = __esm({
       /**
        * Calculates and reports cost from token usage.
        */
-      reportCostFromUsage(model, inputTokens, outputTokens, cachedInputTokens = 0, cacheCreationInputTokens = 0) {
+      reportCostFromUsage(model, inputTokens, outputTokens, cachedInputTokens = 0, cacheCreationInputTokens = 0, reasoningTokens = 0) {
         if (inputTokens === 0 && outputTokens === 0) return;
         const estimate = this.client.modelRegistry.estimateCost(
           model,
           inputTokens,
           outputTokens,
           cachedInputTokens,
-          cacheCreationInputTokens
+          cacheCreationInputTokens,
+          reasoningTokens
         );
         if (estimate && estimate.totalCost > 0) {
           this.reportCost(estimate.totalCost);
@@ -12954,9 +13196,18 @@ var init_stream_processor = __esm({
         let usage;
         let didExecuteGadgets = false;
         let shouldBreakLoop = false;
+        let thinkingContent = "";
         for await (const chunk of stream2) {
           if (chunk.finishReason) finishReason = chunk.finishReason;
           if (chunk.usage) usage = chunk.usage;
+          if (chunk.thinking?.content) {
+            thinkingContent += chunk.thinking.content;
+            yield {
+              type: "thinking",
+              content: chunk.thinking.content,
+              thinkingType: chunk.thinking.type
+            };
+          }
           let processedChunk = "";
           if (chunk.text) {
             processedChunk = chunk.text;
@@ -13070,7 +13321,8 @@ var init_stream_processor = __esm({
           finishReason,
           usage,
           rawResponse: this.responseText,
-          finalMessage
+          finalMessage,
+          thinkingContent: thinkingContent || void 0
         };
         yield completionEvent;
       }
@@ -13872,6 +14124,7 @@ var init_agent = __esm({
       mediaStore;
       // Cancellation
       signal;
+      reasoning;
       // Retry configuration
       retryConfig;
       // Rate limit tracker for proactive throttling
@@ -13963,6 +14216,7 @@ var init_agent = __esm({
           );
         }
         this.signal = options.signal;
+        this.reasoning = options.reasoning;
         this.retryConfig = options.sharedRetryConfig ?? resolveRetryConfig(options.retryConfig);
         if (options.sharedRateLimitTracker) {
           this.rateLimitTracker = options.sharedRateLimitTracker;
@@ -14365,6 +14619,7 @@ var init_agent = __esm({
                     usage: result.usage,
                     rawResponse: result.rawResponse,
                     finalMessage: result.finalMessage,
+                    thinkingContent: result.thinkingContent,
                     logger: this.logger,
                     subagentContext
                   };
@@ -14665,17 +14920,34 @@ var init_agent = __esm({
         });
         return { type: "compaction", event: compactionEvent };
       }
+      /**
+       * Resolve reasoning configuration with auto-enable logic.
+       *
+       * Priority: explicit config > auto-enable for reasoning models > undefined
+       * When a model has `features.reasoning: true` and no explicit config is set,
+       * reasoning is automatically enabled at "medium" effort.
+       */
+      resolveReasoningConfig(spec) {
+        if (this.reasoning !== void 0) return this.reasoning;
+        if (spec?.features?.reasoning) {
+          return { enabled: true, effort: "medium" };
+        }
+        return void 0;
+      }
       /**
        * Prepare LLM call options, create tree node, and process beforeLLMCall controller.
        * @returns options, node ID, and optional skipWithSynthetic response if controller wants to skip
        */
       async prepareLLMCall(iteration) {
+        const spec = this.client.modelRegistry?.getModelSpec?.(this.model);
+        const reasoning = this.resolveReasoningConfig(spec);
         let llmOptions = {
           model: this.model,
           messages: this.conversation.getMessages(),
           temperature: this.temperature,
           maxTokens: this.defaultMaxTokens,
-          signal: this.signal
+          signal: this.signal,
+          reasoning
         };
         const llmNode = this.tree.addLLMCall({
           iteration,
@@ -14745,13 +15017,15 @@ var init_agent = __esm({
           inputTokens,
           outputTokens,
           result.usage?.cachedInputTokens ?? 0,
-          result.usage?.cacheCreationInputTokens ?? 0
+          result.usage?.cacheCreationInputTokens ?? 0,
+          result.usage?.reasoningTokens ?? 0
         )?.totalCost;
         this.tree.completeLLMCall(nodeId, {
           response: result.rawResponse,
           usage: result.usage,
           finishReason: result.finishReason,
-          cost: llmCost
+          cost: llmCost,
+          thinkingContent: result.thinkingContent
         });
       }
       /**