npm - @corbat-tech/coco - Versions diffs - 2.30.0 → 2.32.0 - Mend

@corbat-tech/coco 2.30.0 → 2.32.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/cli/index.js CHANGED Viewed

@@ -259,6 +259,7 @@ __export(schema_exports, {
   ShipConfigSchema: () => ShipConfigSchema,
   SkillsConfigSchema: () => SkillsConfigSchema,
   StackConfigSchema: () => StackConfigSchema,
+  ThinkingModeSchema: () => ThinkingModeSchema,
   ToolsConfigSchema: () => ToolsConfigSchema,
   createDefaultConfigObject: () => createDefaultConfigObject,
   validateConfig: () => validateConfig
@@ -303,9 +304,13 @@ function createDefaultConfigObject(projectName, language = "typescript") {
     }
   };
 }
-var ProviderConfigSchema, QualityConfigSchema, PersistenceConfigSchema, StackConfigSchema, ProjectConfigSchema, GitHubConfigSchema, IntegrationsConfigSchema, MCPServerConfigEntrySchema, MCPConfigSchema, ToolsConfigSchema, ShipConfigSchema, SkillsConfigSchema, CocoConfigSchema;
+var ThinkingModeSchema, ProviderConfigSchema, QualityConfigSchema, PersistenceConfigSchema, StackConfigSchema, ProjectConfigSchema, GitHubConfigSchema, IntegrationsConfigSchema, MCPServerConfigEntrySchema, MCPConfigSchema, ToolsConfigSchema, ShipConfigSchema, SkillsConfigSchema, CocoConfigSchema;
 var init_schema = __esm({
   "src/config/schema.ts"() {
+    ThinkingModeSchema = z.union([
+      z.enum(["off", "auto", "low", "medium", "high"]),
+      z.object({ budget: z.number().int().min(0).max(2e5) })
+    ]);
     ProviderConfigSchema = z.object({
       type: z.enum([
         "anthropic",
@@ -467,6 +472,7 @@ var init_schema = __esm({
         timeout: 12e4
       }),
       providerModels: z.record(z.string(), z.string()).optional(),
+      providerThinking: z.record(z.string(), ThinkingModeSchema).optional(),
       quality: QualityConfigSchema.default({
         minScore: 85,
         minCoverage: 80,
@@ -2687,10 +2693,12 @@ __export(env_exports, {
   getInternalProviderId: () => getInternalProviderId,
   getLastUsedModel: () => getLastUsedModel,
   getLastUsedProvider: () => getLastUsedProvider,
+  getLastUsedThinking: () => getLastUsedThinking,
   isOAuthProvider: () => isOAuthProvider,
   migrateOldPreferences: () => migrateOldPreferences,
   removeEnvProvider: () => removeEnvProvider,
-  saveProviderPreference: () => saveProviderPreference
+  saveProviderPreference: () => saveProviderPreference,
+  saveThinkingPreference: () => saveThinkingPreference
 });
 function loadGlobalCocoEnv() {
   try {
@@ -2910,6 +2918,51 @@ async function getLastUsedModel(provider) {
   }
   return void 0;
 }
+async function getLastUsedThinking(provider) {
+  try {
+    const config = await loadConfig(CONFIG_PATHS.config);
+    const mode = config.providerThinking?.[provider];
+    return mode;
+  } catch {
+    return void 0;
+  }
+}
+async function saveThinkingPreference(provider, mode) {
+  let config;
+  try {
+    config = await loadConfig(CONFIG_PATHS.config);
+  } catch {
+    config = {
+      project: { name: "global", version: "0.1.0" },
+      provider: {
+        type: "anthropic",
+        model: "claude-sonnet-4-6",
+        maxTokens: 8192,
+        temperature: 0,
+        timeout: 12e4
+      },
+      quality: {
+        minScore: 85,
+        minCoverage: 80,
+        maxIterations: 10,
+        minIterations: 2,
+        convergenceThreshold: 2,
+        securityThreshold: 100
+      },
+      persistence: {
+        checkpointInterval: 3e5,
+        maxCheckpoints: 50,
+        retentionDays: 7,
+        compressOldCheckpoints: true
+      }
+    };
+  }
+  config.providerThinking = {
+    ...config.providerThinking,
+    [provider]: mode
+  };
+  await saveConfig(config, void 0, true);
+}
 async function saveProviderPreference(provider, model, options) {
   let config;
   try {
@@ -3280,6 +3333,160 @@ var init_logger = __esm({
     globalLogger = null;
   }
 });
+// src/providers/thinking.ts
+function isAnthropicThinkingModel(model) {
+  const m = model.toLowerCase();
+  if (m === "kimi-for-coding") return false;
+  return m.includes("claude-3-7") || m.includes("claude-opus-4") || m.includes("claude-sonnet-4") || m.includes("claude-haiku-4-5") || m.includes("claude-4");
+}
+function isOpenAIReasoningModel(model) {
+  const m = model.toLowerCase();
+  return m.startsWith("o1") || m.startsWith("o3") || m.startsWith("o4") || m.startsWith("gpt-5") || m.includes("codex");
+}
+function isGeminiThinkingModel(model) {
+  const m = model.toLowerCase();
+  return m.includes("gemini-2.5-pro") || m.includes("gemini-2.5-flash") || m.includes("gemini-3") && !m.includes("flash-lite") || m.includes("gemini-2.0-flash-thinking");
+}
+function isKimiThinkingModel(model) {
+  const m = model.toLowerCase();
+  return m.includes("kimi-k2") || m === "kimi-latest";
+}
+function getThinkingCapability(provider, model) {
+  switch (provider) {
+    case "anthropic":
+    case "kimi-code":
+      return isAnthropicThinkingModel(model) ? ANTHROPIC_CAPABILITY : UNSUPPORTED;
+    case "openai":
+    case "copilot":
+    case "groq":
+    case "openrouter":
+    case "mistral":
+    case "deepseek":
+    case "together":
+    case "huggingface":
+    case "qwen":
+      return isOpenAIReasoningModel(model) ? OPENAI_CAPABILITY : UNSUPPORTED;
+    case "kimi":
+      return isKimiThinkingModel(model) ? KIMI_CAPABILITY : UNSUPPORTED;
+    case "gemini":
+    case "vertex":
+      return isGeminiThinkingModel(model) ? GEMINI_CAPABILITY : UNSUPPORTED;
+    case "lmstudio":
+    case "ollama":
+    case "codex":
+      return UNSUPPORTED;
+    default:
+      return UNSUPPORTED;
+  }
+}
+function resolveDefaultThinking(provider, model) {
+  return getThinkingCapability(provider, model).defaultMode;
+}
+function formatThinkingMode(mode) {
+  if (typeof mode === "object") return `${mode.budget}t`;
+  return mode;
+}
+function mapToAnthropic(mode, model) {
+  if (!mode || mode === "off") return void 0;
+  if (!isAnthropicThinkingModel(model)) return void 0;
+  const cap = ANTHROPIC_CAPABILITY;
+  const { min, max } = cap.budgetRange;
+  if (typeof mode === "object") {
+    return { type: "enabled", budget_tokens: Math.min(Math.max(mode.budget, min), max) };
+  }
+  const budgetMap = {
+    auto: cap.budgetRange.default,
+    low: ANTHROPIC_BUDGET.low,
+    medium: ANTHROPIC_BUDGET.medium,
+    high: ANTHROPIC_BUDGET.high
+  };
+  const budget = budgetMap[mode];
+  if (budget === void 0) return void 0;
+  return { type: "enabled", budget_tokens: budget };
+}
+function mapToOpenAIEffort(mode, model) {
+  if (!mode || mode === "off") return void 0;
+  if (!isOpenAIReasoningModel(model)) return void 0;
+  if (typeof mode === "object") {
+    const { budget } = mode;
+    if (budget <= 2048) return "low";
+    if (budget <= 8e3) return "medium";
+    return "high";
+  }
+  if (mode === "auto") return "medium";
+  if (mode === "low" || mode === "medium" || mode === "high") return mode;
+  return void 0;
+}
+function mapToGeminiBudget(mode, model) {
+  if (!isGeminiThinkingModel(model)) return void 0;
+  if (!mode) return void 0;
+  if (mode === "off") return 0;
+  if (mode === "auto") return -1;
+  const { min, max } = GEMINI_CAPABILITY.budgetRange;
+  if (typeof mode === "object") {
+    return Math.min(Math.max(mode.budget, min), max);
+  }
+  const budgetMap = {
+    low: GEMINI_BUDGET.low,
+    medium: GEMINI_BUDGET.medium,
+    high: GEMINI_BUDGET.high
+  };
+  return budgetMap[mode];
+}
+function mapToKimiExtraBody(mode, model) {
+  if (!isKimiThinkingModel(model)) return void 0;
+  const effectiveMode = mode ?? "off";
+  const enabled = effectiveMode !== "off";
+  return { thinking: { type: enabled ? "enabled" : "disabled" } };
+}
+var ANTHROPIC_BUDGET, GEMINI_BUDGET, UNSUPPORTED, ANTHROPIC_CAPABILITY, OPENAI_CAPABILITY, GEMINI_CAPABILITY, KIMI_CAPABILITY;
+var init_thinking = __esm({
+  "src/providers/thinking.ts"() {
+    ANTHROPIC_BUDGET = {
+      low: 2048,
+      medium: 8e3,
+      high: 16e3
+    };
+    GEMINI_BUDGET = {
+      low: 2048,
+      medium: 8e3,
+      high: 16e3
+    };
+    UNSUPPORTED = {
+      supported: false,
+      kinds: [],
+      levels: ["off"],
+      defaultMode: "off"
+    };
+    ANTHROPIC_CAPABILITY = {
+      supported: true,
+      kinds: ["budget"],
+      levels: ["off", "auto", "low", "medium", "high"],
+      budgetRange: { min: 1024, max: 64e3, default: ANTHROPIC_BUDGET.medium },
+      defaultMode: "off"
+    };
+    OPENAI_CAPABILITY = {
+      supported: true,
+      kinds: ["effort"],
+      levels: ["off", "auto", "low", "medium", "high"],
+      defaultMode: "medium"
+    };
+    GEMINI_CAPABILITY = {
+      supported: true,
+      kinds: ["budget"],
+      levels: ["off", "auto", "low", "medium", "high"],
+      budgetRange: { min: 0, max: 32e3, default: GEMINI_BUDGET.medium },
+      defaultMode: "auto"
+    };
+    KIMI_CAPABILITY = {
+      supported: true,
+      kinds: ["effort"],
+      levels: ["off", "auto"],
+      defaultMode: "off"
+    };
+  }
+});
 function createAnthropicProvider(config) {
   const provider = new AnthropicProvider();
   if (config) {
@@ -3308,6 +3515,7 @@ var init_anthropic = __esm({
     init_errors();
     init_retry();
     init_logger();
+    init_thinking();
     DEFAULT_MODEL = "claude-opus-4-6";
     CONTEXT_WINDOWS = {
       // Kimi Code model (Anthropic-compatible endpoint)
@@ -3368,13 +3576,19 @@ var init_anthropic = __esm({
         this.ensureInitialized();
         return withRetry(async () => {
           try {
+            const model = options?.model ?? this.config.model ?? DEFAULT_MODEL;
+            const thinkingParam = mapToAnthropic(options?.thinking, model);
+            const baseMaxTokens = options?.maxTokens ?? this.config.maxTokens ?? 8192;
             const response = await this.client.messages.create({
-              model: options?.model ?? this.config.model ?? DEFAULT_MODEL,
-              max_tokens: options?.maxTokens ?? this.config.maxTokens ?? 8192,
-              temperature: options?.temperature ?? this.config.temperature ?? 0,
+              model,
+              // Anthropic requires max_tokens > budget_tokens
+              max_tokens: thinkingParam ? Math.max(baseMaxTokens, thinkingParam.budget_tokens + 1024) : baseMaxTokens,
+              // Anthropic requires temperature=1 when thinking is enabled
+              temperature: thinkingParam ? 1 : options?.temperature ?? this.config.temperature ?? 0,
               system: this.extractSystem(messages, options?.system),
               messages: this.convertMessages(messages),
-              stop_sequences: options?.stopSequences
+              stop_sequences: options?.stopSequences,
+              ...thinkingParam && { thinking: thinkingParam }
             });
             return {
               id: response.id,
@@ -3398,14 +3612,18 @@ var init_anthropic = __esm({
         this.ensureInitialized();
         return withRetry(async () => {
           try {
+            const model = options?.model ?? this.config.model ?? DEFAULT_MODEL;
+            const thinkingParam = mapToAnthropic(options?.thinking, model);
+            const baseMaxTokens = options?.maxTokens ?? this.config.maxTokens ?? 8192;
             const response = await this.client.messages.create({
-              model: options?.model ?? this.config.model ?? DEFAULT_MODEL,
-              max_tokens: options?.maxTokens ?? this.config.maxTokens ?? 8192,
-              temperature: options?.temperature ?? this.config.temperature ?? 0,
+              model,
+              max_tokens: thinkingParam ? Math.max(baseMaxTokens, thinkingParam.budget_tokens + 1024) : baseMaxTokens,
+              temperature: thinkingParam ? 1 : options?.temperature ?? this.config.temperature ?? 0,
               system: this.extractSystem(messages, options?.system),
               messages: this.convertMessages(messages),
               tools: this.convertTools(options.tools),
-              tool_choice: options.toolChoice ? this.convertToolChoice(options.toolChoice) : void 0
+              tool_choice: options.toolChoice ? this.convertToolChoice(options.toolChoice) : void 0,
+              ...thinkingParam && { thinking: thinkingParam }
             });
             const toolCalls = this.extractToolCalls(response.content);
             return {
@@ -3431,13 +3649,17 @@ var init_anthropic = __esm({
         this.ensureInitialized();
         let timeoutTriggered = false;
         try {
+          const model = options?.model ?? this.config.model ?? DEFAULT_MODEL;
+          const thinkingParam = mapToAnthropic(options?.thinking, model);
+          const baseMaxTokens = options?.maxTokens ?? this.config.maxTokens ?? 8192;
           const stream = await this.client.messages.stream(
             {
-              model: options?.model ?? this.config.model ?? DEFAULT_MODEL,
-              max_tokens: options?.maxTokens ?? this.config.maxTokens ?? 8192,
-              temperature: options?.temperature ?? this.config.temperature ?? 0,
+              model,
+              max_tokens: thinkingParam ? Math.max(baseMaxTokens, thinkingParam.budget_tokens + 1024) : baseMaxTokens,
+              temperature: thinkingParam ? 1 : options?.temperature ?? this.config.temperature ?? 0,
               system: this.extractSystem(messages, options?.system),
-              messages: this.convertMessages(messages)
+              messages: this.convertMessages(messages),
+              ...thinkingParam && { thinking: thinkingParam }
             },
             { signal: options?.signal }
           );
@@ -3493,15 +3715,19 @@ var init_anthropic = __esm({
         this.ensureInitialized();
         let timeoutTriggered = false;
         try {
+          const model = options?.model ?? this.config.model ?? DEFAULT_MODEL;
+          const thinkingParam = mapToAnthropic(options?.thinking, model);
+          const baseMaxTokens = options?.maxTokens ?? this.config.maxTokens ?? 8192;
           const stream = await this.client.messages.stream(
             {
-              model: options?.model ?? this.config.model ?? DEFAULT_MODEL,
-              max_tokens: options?.maxTokens ?? this.config.maxTokens ?? 8192,
-              temperature: options?.temperature ?? this.config.temperature ?? 0,
+              model,
+              max_tokens: thinkingParam ? Math.max(baseMaxTokens, thinkingParam.budget_tokens + 1024) : baseMaxTokens,
+              temperature: thinkingParam ? 1 : options?.temperature ?? this.config.temperature ?? 0,
               system: this.extractSystem(messages, options?.system),
               messages: this.convertMessages(messages),
               tools: this.convertTools(options.tools),
-              tool_choice: options.toolChoice ? this.convertToolChoice(options.toolChoice) : void 0
+              tool_choice: options.toolChoice ? this.convertToolChoice(options.toolChoice) : void 0,
+              ...thinkingParam && { thinking: thinkingParam }
             },
             { signal: options?.signal }
           );
@@ -4022,6 +4248,7 @@ var init_openai = __esm({
     init_errors();
     init_retry();
     init_tool_call_normalizer();
+    init_thinking();
     DEFAULT_MODEL2 = "gpt-5.3-codex";
     CONTEXT_WINDOWS2 = {
       // OpenAI models
@@ -4174,26 +4401,15 @@ var init_openai = __esm({
         return !MODELS_WITHOUT_TEMPERATURE.some((m) => model.toLowerCase().includes(m.toLowerCase()));
       }
       /**
-       * Check if a model needs thinking mode disabled for tool use
-       * Kimi models have thinking mode enabled by default which requires
-       * reasoning_content in multi-turn conversations with tools
-       */
-      needsThinkingDisabled(model) {
-        return MODELS_WITH_THINKING_MODE.some((m) => model.toLowerCase().includes(m.toLowerCase()));
-      }
-      /**
-       * Get extra body parameters for API calls
-       * Used to disable thinking mode for Kimi models
-       * See: https://huggingface.co/moonshotai/Kimi-K2.5
-       *
-       * For Official Moonshot API: {'thinking': {'type': 'disabled'}}
-       * For vLLM/SGLang: {'chat_template_kwargs': {"thinking": False}}
+       * Get extra body parameters for API calls.
+       * Honors the user's ThinkingMode for Kimi models; defaults to disabled
+       * (preserving existing behavior) when no mode is specified.
        */
-      getExtraBody(model) {
-        if (this.needsThinkingDisabled(model)) {
-          return {
-            thinking: { type: "disabled" }
-          };
+      getExtraBody(model, thinking) {
+        const kimiBody = mapToKimiExtraBody(thinking, model);
+        if (kimiBody) return kimiBody;
+        if (MODELS_WITH_THINKING_MODE.some((m) => model.toLowerCase().includes(m.toLowerCase()))) {
+          return { thinking: { type: "disabled" } };
         }
         return void 0;
       }
@@ -4210,6 +4426,7 @@ var init_openai = __esm({
           try {
             const supportsTemp = this.supportsTemperature(model);
             const maxTokens = options?.maxTokens ?? this.config.maxTokens ?? 8192;
+            const reasoningEffort = mapToOpenAIEffort(options?.thinking, model);
             const response = await this.client.chat.completions.create({
               model,
               ...buildMaxTokensParam(model, maxTokens),
@@ -4217,7 +4434,8 @@ var init_openai = __esm({
               stop: options?.stopSequences,
               ...supportsTemp && {
                 temperature: options?.temperature ?? this.config.temperature ?? 0
-              }
+              },
+              ...reasoningEffort && { reasoning_effort: reasoningEffort }
             });
             const choice = response.choices[0];
             return {
@@ -4247,7 +4465,8 @@ var init_openai = __esm({
         return withRetry(async () => {
           try {
             const supportsTemp = this.supportsTemperature(model);
-            const extraBody = this.getExtraBody(model);
+            const extraBody = this.getExtraBody(model, options?.thinking);
+            const reasoningEffort = mapToOpenAIEffort(options?.thinking, model);
             const maxTokens = options?.maxTokens ?? this.config.maxTokens ?? 8192;
             const requestParams = {
               model,
@@ -4259,6 +4478,9 @@ var init_openai = __esm({
             if (supportsTemp) {
               requestParams.temperature = options?.temperature ?? this.config.temperature ?? 0;
             }
+            if (reasoningEffort) {
+              requestParams.reasoning_effort = reasoningEffort;
+            }
             if (extraBody) {
               Object.assign(requestParams, extraBody);
             }
@@ -4296,12 +4518,14 @@ var init_openai = __esm({
         try {
           const supportsTemp = this.supportsTemperature(model);
           const maxTokens = options?.maxTokens ?? this.config.maxTokens ?? 8192;
+          const reasoningEffort = mapToOpenAIEffort(options?.thinking, model);
           const stream = await this.client.chat.completions.create({
             model,
             ...buildMaxTokensParam(model, maxTokens),
             messages: this.convertMessages(messages, options?.system),
             stream: true,
-            ...supportsTemp && { temperature: options?.temperature ?? this.config.temperature ?? 0 }
+            ...supportsTemp && { temperature: options?.temperature ?? this.config.temperature ?? 0 },
+            ...reasoningEffort && { reasoning_effort: reasoningEffort }
           });
           let streamStopReason;
           for await (const chunk of stream) {
@@ -4332,7 +4556,8 @@ var init_openai = __esm({
         let timeoutTriggered = false;
         try {
           const supportsTemp = this.supportsTemperature(model);
-          const extraBody = this.getExtraBody(model);
+          const extraBody = this.getExtraBody(model, options?.thinking);
+          const reasoningEffort = mapToOpenAIEffort(options?.thinking, model);
           const maxTokens = options?.maxTokens ?? this.config.maxTokens ?? 8192;
           const requestParams = {
             model,
@@ -4345,6 +4570,9 @@ var init_openai = __esm({
           if (supportsTemp) {
             requestParams.temperature = options?.temperature ?? this.config.temperature ?? 0;
           }
+          if (reasoningEffort) {
+            requestParams.reasoning_effort = reasoningEffort;
+          }
           if (extraBody) {
             Object.assign(requestParams, extraBody);
           }
@@ -4812,6 +5040,7 @@ var init_openai = __esm({
             const model = options?.model ?? this.config.model ?? DEFAULT_MODEL2;
             const { input, instructions } = this.convertToResponsesInput(messages, options?.system);
             const supportsTemp = this.supportsTemperature(model);
+            const reasoningEffort = mapToOpenAIEffort(options?.thinking, model);
             const response = await this.client.responses.create({
               model,
               input,
@@ -4820,6 +5049,8 @@ var init_openai = __esm({
               ...supportsTemp && {
                 temperature: options?.temperature ?? this.config.temperature ?? 0
               },
+              // Responses API uses nested reasoning.effort (not top-level reasoning_effort)
+              ...reasoningEffort && { reasoning: { effort: reasoningEffort } },
               store: false
             });
             return {
@@ -4848,6 +5079,7 @@ var init_openai = __esm({
             const { input, instructions } = this.convertToResponsesInput(messages, options?.system);
             const tools = this.convertToolsForResponses(options.tools);
             const supportsTemp = this.supportsTemperature(model);
+            const reasoningEffort = mapToOpenAIEffort(options?.thinking, model);
             const response = await this.client.responses.create({
               model,
               input,
@@ -4857,6 +5089,7 @@ var init_openai = __esm({
               ...supportsTemp && {
                 temperature: options?.temperature ?? this.config.temperature ?? 0
               },
+              ...reasoningEffort && { reasoning: { effort: reasoningEffort } },
               store: false
             });
             let content = "";
@@ -4902,12 +5135,14 @@ var init_openai = __esm({
           const model = options?.model ?? this.config.model ?? DEFAULT_MODEL2;
           const { input, instructions } = this.convertToResponsesInput(messages, options?.system);
           const supportsTemp = this.supportsTemperature(model);
+          const reasoningEffort = mapToOpenAIEffort(options?.thinking, model);
           const stream = await this.client.responses.create({
             model,
             input,
             instructions: instructions ?? void 0,
             max_output_tokens: options?.maxTokens ?? this.config.maxTokens ?? 8192,
             ...supportsTemp && { temperature: options?.temperature ?? this.config.temperature ?? 0 },
+            ...reasoningEffort && { reasoning: { effort: reasoningEffort } },
             store: false,
             stream: true
           });
@@ -4965,12 +5200,14 @@ var init_openai = __esm({
           const { input, instructions } = this.convertToResponsesInput(messages, options?.system);
           const tools = options.tools.length > 0 ? this.convertToolsForResponses(options.tools) : void 0;
           const supportsTemp = this.supportsTemperature(model);
+          const reasoningEffort = mapToOpenAIEffort(options?.thinking, model);
           const requestParams = {
             model,
             input,
             instructions: instructions ?? void 0,
             max_output_tokens: options?.maxTokens ?? this.config.maxTokens ?? 8192,
             ...supportsTemp && { temperature: options?.temperature ?? this.config.temperature ?? 0 },
+            ...reasoningEffort && { reasoning: { effort: reasoningEffort } },
             store: false,
             stream: true
           };
@@ -6010,6 +6247,7 @@ var DEFAULT_MODEL5, SKIP_THOUGHT_SIGNATURE_VALIDATOR, CONTEXT_WINDOWS5, GeminiPr
 var init_gemini = __esm({
   "src/providers/gemini.ts"() {
     init_errors();
+    init_thinking();
     DEFAULT_MODEL5 = "gemini-3.1-pro-preview";
     SKIP_THOUGHT_SIGNATURE_VALIDATOR = "skip_thought_signature_validator";
     CONTEXT_WINDOWS5 = {
@@ -6168,12 +6406,17 @@ var init_gemini = __esm({
         return model ?? this.config.model ?? DEFAULT_MODEL5;
       }
       buildConfig(messages, options, tools, toolChoice) {
+        const model = this.getModel(options?.model);
+        const thinkingBudget = mapToGeminiBudget(options?.thinking, model);
         const config = {
           maxOutputTokens: options?.maxTokens ?? this.config.maxTokens ?? 8192,
           temperature: options?.temperature ?? this.config.temperature ?? 0,
           stopSequences: options?.stopSequences,
           systemInstruction: this.extractSystem(messages, options?.system)
         };
+        if (thinkingBudget !== void 0) {
+          config.thinkingConfig = { thinkingBudget };
+        }
         if (tools && tools.length > 0) {
           config.tools = [{ functionDeclarations: this.convertTools(tools) }];
           config.toolConfig = {
@@ -10541,11 +10784,15 @@ function generateToolCatalog(registry) {
 async function createDefaultReplConfig() {
   const providerType = await getLastUsedProvider();
   const model = await getLastUsedModel(providerType) || getDefaultModel(providerType);
+  const persistedThinking = await getLastUsedThinking(providerType);
+  const thinking = persistedThinking ?? resolveDefaultThinking(providerType, model);
+  const thinkingToStore = thinking === "off" ? void 0 : thinking;
   return {
     provider: {
       type: providerType,
       model,
-      maxTokens: 8192
+      maxTokens: 8192,
+      thinking: thinkingToStore
     },
     ui: {
       theme: "auto",
@@ -10980,6 +11227,7 @@ var MAX_SKILL_INSTRUCTIONS_CHARS, TRUST_SETTINGS_DIR, TRUST_SETTINGS_FILE, PROJE
 var init_session = __esm({
   "src/cli/repl/session.ts"() {
     init_env();
+    init_thinking();
     init_manager();
     init_compactor();
     init_memory();
@@ -33683,6 +33931,7 @@ var helpCommand = {
         commands: [
           { cmd: "/model, /m", desc: "View or change the current model" },
           { cmd: "/provider", desc: "View or change the LLM provider" },
+          { cmd: "/thinking, /think", desc: "View or change the reasoning/thinking mode" },
           { cmd: "/doctor, /dr", desc: "Run local diagnostics for config, auth, hooks, and tools" },
           { cmd: "/compact", desc: "Toggle compact mode (less verbose)" },
           { cmd: "/cost, /tokens", desc: "Show token usage and cost" },
@@ -34149,6 +34398,7 @@ function truncate2(str, maxLength, suffix = "...") {
 // src/cli/repl/commands/model.ts
 init_env();
+init_thinking();
 async function fetchLocalModels(providerType) {
   try {
     const baseUrl = getBaseUrl(providerType);
@@ -34330,6 +34580,32 @@ async function persistModelPreference(provider, model) {
     console.log(chalk.dim("  Model changed for this session only.\n"));
   }
 }
+function reconcileThinkingAfterModelChange(session, newModel) {
+  const provider = session.config.provider.type;
+  const cap = getThinkingCapability(provider, newModel);
+  if (!cap.supported) {
+    if (session.config.provider.thinking !== void 0) {
+      session.config.provider.thinking = void 0;
+      console.log(chalk.dim("  \u2139 Thinking not supported on this model \u2014 turned off."));
+    }
+    return;
+  }
+  const current = session.config.provider.thinking;
+  if (current !== void 0 && typeof current === "object" && !cap.kinds.includes("budget")) {
+    const newDefault = resolveDefaultThinking(provider, newModel);
+    session.config.provider.thinking = newDefault === "off" ? void 0 : newDefault;
+    console.log(
+      chalk.dim(
+        `  \u2139 Thinking reset to ${session.config.provider.thinking ?? "off"} (model uses effort levels).`
+      )
+    );
+    return;
+  }
+  if (current === void 0) {
+    const def = resolveDefaultThinking(provider, newModel);
+    session.config.provider.thinking = def === "off" ? void 0 : def;
+  }
+}
 var modelCommand = {
   name: "model",
   aliases: ["m"],
@@ -34378,6 +34654,7 @@ var modelCommand = {
         return false;
       }
       session.config.provider.model = selectedModel;
+      reconcileThinkingAfterModelChange(session, selectedModel);
       await persistModelPreference(currentProvider, selectedModel);
       const modelInfo2 = providerDef.models.find((m) => m.id === selectedModel);
       console.log(chalk.green(`\u2713 Switched to ${modelInfo2?.name ?? selectedModel}
@@ -34400,6 +34677,7 @@ var modelCommand = {
     if (!foundInProvider) {
       console.log(chalk.yellow(`Model "${newModel}" not in known list, setting anyway...`));
       session.config.provider.model = newModel;
+      reconcileThinkingAfterModelChange(session, newModel);
       await persistModelPreference(currentProvider, newModel);
       console.log(chalk.green(`\u2713 Model set to: ${newModel}
 `));
@@ -34415,6 +34693,7 @@ var modelCommand = {
       return false;
     }
     session.config.provider.model = newModel;
+    reconcileThinkingAfterModelChange(session, newModel);
     await persistModelPreference(currentProvider, newModel);
     const modelInfo = providerDef.models.find((m) => m.id === newModel);
     console.log(chalk.green(`\u2713 Switched to ${modelInfo?.name ?? newModel}
@@ -36407,6 +36686,7 @@ async function promptVertexSettings2(defaults) {
 // src/cli/repl/commands/status.ts
 init_state();
 init_trust_store();
+init_thinking();
 function getGitStatus2(projectPath) {
   try {
     execSync("git rev-parse --git-dir", { cwd: projectPath, stdio: "pipe" });
@@ -36532,6 +36812,11 @@ var statusCommand = {
     p26.log.step("Session");
     p26.log.message(`  \u{1F4C1} ${session.projectPath}`);
     p26.log.message(`  \u{1F916} ${session.config.provider.type} / ${session.config.provider.model}`);
+    const cap = getThinkingCapability(session.config.provider.type, session.config.provider.model);
+    if (cap.supported) {
+      const thinkingLabel = session.config.provider.thinking !== void 0 ? formatThinkingMode(session.config.provider.thinking) : "off";
+      p26.log.message(`  \u{1F9E0} thinking: ${thinkingLabel}  ${chalk.dim("(/thinking to change)")}`);
+    }
     p26.outro("Done");
     return false;
   }
@@ -50467,6 +50752,209 @@ var doctorCommand = {
   }
 };
+// src/cli/repl/commands/thinking.ts
+init_thinking();
+init_env();
+var EFFORT_LEVELS = ["off", "auto", "low", "medium", "high"];
+function isEffortLevel(s) {
+  return EFFORT_LEVELS.includes(s);
+}
+function parseThinkingArg(arg) {
+  if (isEffortLevel(arg)) return arg;
+  const n = parseInt(arg, 10);
+  if (!isNaN(n) && n >= 0) return { budget: n };
+  return null;
+}
+function modeDescription(mode, hasBudget) {
+  if (typeof mode === "object") return `${mode.budget} token budget`;
+  const descs = {
+    off: "No reasoning \u2014 fastest and cheapest",
+    auto: hasBudget ? "Dynamic budget \u2014 provider decides" : "Provider default effort",
+    low: hasBudget ? "~2 048 tokens \u2014 quick reasoning" : "Minimal effort",
+    medium: hasBudget ? "~8 000 tokens \u2014 balanced" : "Balanced effort",
+    high: hasBudget ? "~16 000 tokens \u2014 deep reasoning" : "Maximum effort"
+  };
+  return descs[mode] ?? mode;
+}
+async function selectThinkingInteractively(modes, currentMode, hasBudget) {
+  const currentLabel = formatThinkingMode(currentMode);
+  return new Promise((resolve4) => {
+    let selectedIndex = modes.findIndex((m) => formatThinkingMode(m) === currentLabel);
+    if (selectedIndex === -1) selectedIndex = 0;
+    let lastTotalLines = 0;
+    const clearPrevious = () => {
+      if (lastTotalLines === 0) return;
+      process.stdout.write("\x1B[2K\r");
+      for (let i = 0; i < lastTotalLines; i++) {
+        process.stdout.write("\x1B[1A\x1B[2K");
+      }
+      process.stdout.write("\r");
+    };
+    const renderMenu = () => {
+      clearPrevious();
+      let totalLines = 0;
+      for (let i = 0; i < modes.length; i++) {
+        const mode = modes[i];
+        const label = formatThinkingMode(mode);
+        const isCurrent = label === currentLabel;
+        const isSelected = i === selectedIndex;
+        const desc = modeDescription(mode, hasBudget);
+        let line = "";
+        if (isSelected) {
+          line = chalk.bgBlue.white(` \u25B6 ${label.padEnd(8)}`) + chalk.bgBlue.white(` ${desc} `);
+        } else if (isCurrent) {
+          line = chalk.green(` \u25CF ${label.padEnd(8)}`) + chalk.dim(` ${desc}`);
+        } else {
+          line = chalk.dim(` \u25CB ${label.padEnd(8)}`) + chalk.dim(` ${desc}`);
+        }
+        process.stdout.write(line + "\n");
+        totalLines++;
+      }
+      process.stdout.write("\n" + chalk.dim("\u2191/\u2193 navigate \u2022 Enter select \u2022 Esc cancel") + "\n");
+      totalLines += 2;
+      lastTotalLines = totalLines;
+    };
+    const cleanup = () => {
+      if (process.stdin.isTTY) process.stdin.setRawMode(false);
+      process.stdin.pause();
+      process.stdin.removeListener("data", onKeyPress);
+    };
+    const onKeyPress = (data) => {
+      const key = data.toString();
+      if (key === "\r" || key === "\n") {
+        clearPrevious();
+        cleanup();
+        resolve4(modes[selectedIndex] ?? null);
+        return;
+      }
+      if (key === "\x1B" || key === "q" || key === "") {
+        clearPrevious();
+        cleanup();
+        resolve4(null);
+        return;
+      }
+      if (key === "\x1B[A") {
+        selectedIndex = (selectedIndex - 1 + modes.length) % modes.length;
+        renderMenu();
+        return;
+      }
+      if (key === "\x1B[B") {
+        selectedIndex = (selectedIndex + 1) % modes.length;
+        renderMenu();
+        return;
+      }
+    };
+    if (process.stdin.isTTY) process.stdin.setRawMode(true);
+    process.stdin.resume();
+    process.stdin.on("data", onKeyPress);
+    renderMenu();
+  });
+}
+async function applyMode(parsed, session, provider, model) {
+  const capability = getThinkingCapability(provider, model);
+  if ((provider === "kimi" || provider === "kimi-code") && parsed !== "off" && parsed !== "auto") {
+    console.log(
+      chalk.yellow(
+        "\n\u26A0  Enabling thinking on Kimi may cause issues with tool calling.\n   If you experience errors, run /thinking off to restore default behavior.\n"
+      )
+    );
+  }
+  const previousMode = session.config.provider.thinking;
+  const newMode = parsed === "off" ? void 0 : parsed;
+  session.config.provider.thinking = newMode;
+  const modeToSave = newMode ?? resolveDefaultThinking(provider, model);
+  await saveThinkingPreference(provider, modeToSave);
+  const previousLabel = previousMode !== void 0 ? formatThinkingMode(previousMode) : "off";
+  const newLabel = newMode !== void 0 ? formatThinkingMode(newMode) : "off";
+  if (previousLabel === newLabel) {
+    console.log(chalk.dim(`
+  Already using thinking: ${newLabel}
+`));
+  } else {
+    const kindLabel = capability.kinds.includes("budget") ? "budget" : "effort";
+    console.log(chalk.green(`
+\u2713 Thinking (${kindLabel}): ${previousLabel} \u2192 ${newLabel}
+`));
+  }
+}
+var thinkingCommand = {
+  name: "thinking",
+  aliases: ["think", "reason"],
+  description: "View or change the reasoning/thinking mode for the current model",
+  usage: "/thinking [off|auto|low|medium|high|<budget-tokens>]",
+  async execute(args, session) {
+    const provider = session.config.provider.type;
+    const model = session.config.provider.model;
+    const capability = getThinkingCapability(provider, model);
+    if (!capability.supported) {
+      console.log(
+        chalk.yellow(`
+\u26A0  Thinking not supported for ${model} on ${provider}.
+`) + chalk.dim(
+          "   Compatible models: claude-3-7+, claude-4+, o3, o4-mini, gpt-5*, gemini-2.5+\n"
+        )
+      );
+      return false;
+    }
+    const hasBudget = capability.kinds.includes("budget");
+    const current = session.config.provider.thinking;
+    const currentMode = current ?? "off";
+    if (args.length === 0) {
+      console.log(chalk.cyan.bold("\n\u2550\u2550\u2550 Thinking Mode \u2550\u2550\u2550\n"));
+      console.log(
+        `  ${chalk.dim(provider + "/")}${chalk.cyan(model)}  ${chalk.dim("\xB7")}  kind: ${chalk.dim(capability.kinds.join(", "))}`
+      );
+      if (hasBudget && capability.budgetRange) {
+        const { min, max } = capability.budgetRange;
+        console.log(
+          chalk.dim(`  Budget range: ${min}\u2013${max} tokens  (/thinking 8000 for custom)
+`)
+        );
+      } else {
+        console.log();
+      }
+      const selected = await selectThinkingInteractively(capability.levels, currentMode, hasBudget);
+      if (selected === null) {
+        console.log(chalk.dim("  Cancelled\n"));
+        return false;
+      }
+      await applyMode(selected, session, provider, model);
+      return false;
+    }
+    const rawArg = args[0].toLowerCase();
+    const parsed = parseThinkingArg(rawArg);
+    if (parsed === null) {
+      console.log(chalk.red(`
+\u2717 Unknown thinking mode: "${args[0]}"`));
+      console.log(
+        chalk.dim("  Valid options: off, auto, low, medium, high, or a token budget number\n")
+      );
+      return false;
+    }
+    if (typeof parsed === "object" && !hasBudget) {
+      console.log(
+        chalk.red(`
+\u2717 ${provider}/${model} uses effort levels, not token budgets.`) + chalk.dim("\n  Use: off, auto, low, medium, or high\n")
+      );
+      return false;
+    }
+    if (typeof parsed === "object" && capability.budgetRange) {
+      const { min, max } = capability.budgetRange;
+      if (parsed.budget < min || parsed.budget > max) {
+        console.log(
+          chalk.red(`
+\u2717 Budget ${parsed.budget} is out of range.`) + chalk.dim(`
+  Valid range for ${model}: ${min}\u2013${max} tokens
+`)
+        );
+        return false;
+      }
+    }
+    await applyMode(parsed, session, provider, model);
+    return false;
+  }
+};
 // src/cli/repl/output/renderer.ts
 init_syntax();
 var lineBuffer = "";
@@ -51498,7 +51986,8 @@ var commands = [
   buildAppCommand,
   contextCommand,
   bestOfNCommand,
-  doctorCommand
+  doctorCommand,
+  thinkingCommand
 ];
 function isSlashCommand(input) {
   return input.startsWith("/");
@@ -53868,6 +54357,7 @@ ${tail}`;
         tools: [],
         maxTokens: session.config.provider.maxTokens,
         signal: options.signal
+        // Omit thinking for the final explanation turn to avoid unnecessary cost
       })) {
         if (options.signal?.aborted) break;
         if (chunk.type === "text" && chunk.text) {
@@ -53922,7 +54412,8 @@ ${tail}`;
           for await (const chunk of provider.streamWithTools(messages, {
             tools,
             maxTokens: session.config.provider.maxTokens,
-            signal: options.signal
+            signal: options.signal,
+            thinking: session.config.provider.thinking
           })) {
             if (options.signal?.aborted) {
               break;
@@ -55034,6 +55525,7 @@ init_allowed_paths();
 // src/cli/repl/status-bar.ts
 init_env();
 init_full_access_mode();
+init_thinking();
 function formatContextUsage(percent) {
   const label = `ctx ${percent.toFixed(0)}%`;
   if (percent >= 90) return chalk.red(label);
@@ -55053,7 +55545,9 @@ function formatStatusBar(projectPath, config, gitCtx, contextUsagePercent) {
   parts.push(chalk.dim("\u{1F4C1} ") + chalk.magenta(projectName));
   const providerName = config.provider.type;
   const modelName = getDisplayModel(config);
-  parts.push(chalk.dim(`${providerName}/`) + chalk.cyan(modelName));
+  const thinkingMode = config.provider.thinking;
+  const thinkingSuffix = thinkingMode !== void 0 ? chalk.dim(" [") + chalk.magenta(formatThinkingMode(thinkingMode)) + chalk.dim("]") : "";
+  parts.push(chalk.dim(`${providerName}/`) + chalk.cyan(modelName) + thinkingSuffix);
   if (isQualityLoop()) {
     parts.push(chalk.green("\u{1F504} quality loop"));
   }