npm - winter-super-cli - Versions diffs - 2026.6.5 → 2026.6.7 - Mend

winter-super-cli 2026.6.5 → 2026.6.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

package/bin/winter.js +1 -0
package/package.json +3 -3
package/src/agent/runtime.js +13 -16
package/src/ai/model-capabilities.js +17 -1
package/src/ai/prompts/system-prompt.js +33 -52
package/src/ai/providers.js +179 -62
package/src/ai/small-model-amplifier.js +7 -19
package/src/cli/commands.js +162 -0
package/src/cli/context-loader.js +1 -1
package/src/cli/input-controller.js +55 -44
package/src/cli/prompt-builder.js +20 -11
package/src/cli/repl-commands.js +3 -0
package/src/cli/repl.js +318 -444
package/src/cli/slash-commands.js +1 -0
package/src/cli/snowflake-logo.js +64 -86
package/src/cli/terminal-ui.js +139 -85
package/src/cli/tool-runtime.js +8 -3
package/src/cli/tui.js +181 -0
package/src/codebase-index/codegraph-adapter.js +154 -0
package/src/codebase-index/indexer.js +1 -1
package/src/codebase-index/search.js +31 -2
package/src/context/router.js +4 -41
package/src/context/token-juice.js +37 -10
package/src/tools/executor.js +78 -3

package/bin/winter.js CHANGED Viewed

@@ -23,6 +23,7 @@ const COMMANDS = new Set([
   'autopilot', 'plan',
   'provider', 'providers', 'model', 'models', 'ecc', 'page-agent', 'pageagent',
   'resources', 'htmlfx', 'memory-vault', 'doctor', 'context', 'scorecard',
+  'tui',
 ]);
 function isInteractiveRequest(args) {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "winter-super-cli",
-  "version": "2026.6.5",
+  "version": "2026.6.7",
   "description": "❄️ AI-Powered Development CLI with Interactive REPL",
   "type": "module",
   "main": "bin/winter.js",
@@ -57,7 +57,7 @@
     "diff": "^9.0.0"
   },
   "optionalDependencies": {
+    "@colbymchenry/codegraph": "^0.7.12",
     "puppeteer": "^24.43.1"
-  },
-  "devDependencies": {}
+  }
 }

package/src/agent/runtime.js CHANGED Viewed

@@ -1,6 +1,6 @@
 import { Spinner } from '../cli/spinner.js';
 import { colors } from '../cli/snowflake-logo.js';
-import { renderBox, terminalWidth, wrapText } from '../cli/terminal-ui.js';
+import { renderToolPanel } from '../cli/tui.js';
 import { getMutatingToolNames, recordToolCallAdapterStats } from '../cli/tool-runtime.js';
 import { buildSmallModelAmplification } from '../ai/small-model-amplifier.js';
@@ -38,6 +38,10 @@ export class AgentRuntime {
       depth,
     });
     const maxToolTurns = amplifier.maxToolTurns || 8;
+    // Keep self-critique as prompt discipline only. A second runtime model turn
+    // duplicates the final answer because the first answer is already rendered.
+    amplifier.enforceSelfCritique = false;
+    let forceTextToolFallback = false;
     try {
       for (let i = 0; i < maxToolTurns; i++) {
@@ -46,6 +50,7 @@ export class AgentRuntime {
           provider: executionProfile.provider,
           model: executionProfile.model,
           enableTools: true,
+          toolPromptOnly: forceTextToolFallback,
           requireToolEvidence: requireToolEvidence && !usedTools,
         }, startedAt, totalUsage);
@@ -73,6 +78,7 @@ export class AgentRuntime {
               role: 'user',
               content: repl.buildToolEvidenceCorrection(messages),
             });
+            forceTextToolFallback = true;
             finalContent = '';
             continue;
           }
@@ -113,7 +119,6 @@ export class AgentRuntime {
           }
         }
-        const BOX_WIDTH = terminalWidth(76, 116, 92);
         messages.push({
           role: 'assistant',
           content: assistantMsg.content || '',
@@ -186,20 +191,12 @@ export class AgentRuntime {
           const summary = repl.formatToolResultForConsole(canonicalToolName, result);
           if (summary) {
             toolSummaries.push(`${canonicalToolName}: ${summary}`);
-            const statusIcon = result.success === false
-              ? `${colors.red}${repl.useUnicodeUi ? '✖' : 'x'}${colors.reset}`
-              : `${colors.green}${repl.useUnicodeUi ? '✓' : 'ok'}${colors.reset}`;
-            const toolLine = `${icon} ${colors.cyan}${colors.bright}${toolName}${colors.reset}`;
-            const summaryLines = summary.split('\n').flatMap(line => wrapText(line, BOX_WIDTH - 8));
-            console.log(renderBox({
-              title: 'AGENT TOOLS EXECUTION',
-              width: BOX_WIDTH,
-              borderColor: colors.magenta,
-              titleColor: colors.bright,
-              body: [
-                toolLine,
-                ...summaryLines.map((line, index) => index === 0 ? `${statusIcon} ${colors.dim}${line}${colors.reset}` : `${colors.dim}${line}${colors.reset}`),
-              ],
+            console.log(renderToolPanel({
+              toolName: `${icon} ${toolName}`,
+              summary,
+              success: result.success !== false,
+              colors,
+              title: 'Agent Tools',
             }));
           }
         }

package/src/ai/model-capabilities.js CHANGED Viewed

@@ -32,6 +32,7 @@ export function classifyModelTier(modelName, provider = '') {
     /claude-3-5-sonnet/i, /claude-opus/i, /claude-4/i, /claude-sonnet-4/i,
     /gpt-4o/i, /gpt-4-turbo/i, /o1/i, /o3/i,
     /gemini-2\.5-pro/i, /gemini-2\.0-ultra/i,
+    /minimax-?m2\.5/i, /minimax.*m2\.5/i, /minimax/i,
     /deepseek-v3/i, /deepseek-r1/i,
     /llama-4/i, /llama-3-70b/i, /llama3-70b/i, /llama3\.1-70b/i, /llama3\.2-90b/i, /llama3\.3/i,
     /qwen2\.5-?72b/i, /qwen2\.5-?70b/i, /qwen-?2\.5-?72b/i,
@@ -123,7 +124,7 @@ export function classifyModelTier(modelName, provider = '') {
     if (pattern.test(name)) return MODEL_TIERS.SMALL;
   }
-  if (/tiny/i.test(name) || /mini/i.test(name) || /small/i.test(name) || /nano/i.test(name)) {
+  if (/\btiny\b/i.test(name) || /(?:^|[-_:/])mini(?:$|[-_:/])/i.test(name) || /\bsmall\b/i.test(name) || /\bnano\b/i.test(name)) {
     return MODEL_TIERS.TINY;
   }
@@ -170,6 +171,21 @@ export function getReasoningBump(tier) {
   }
 }
+/**
+ * Get a budget multiplier for prompt/context sizing.
+ * Bigger models can safely absorb more context and larger tool outputs.
+ */
+export function getModelBudgetMultiplier(tier) {
+  switch (tier) {
+    case MODEL_TIERS.TINY: return 0.5;
+    case MODEL_TIERS.SMALL: return 0.75;
+    case MODEL_TIERS.MEDIUM: return 1;
+    case MODEL_TIERS.LARGE: return 2;
+    case MODEL_TIERS.FLAGSHIP: return 4;
+    default: return 1;
+  }
+}
 /**
  * Build a short string describing model capability for system prompt injection.
  */

package/src/ai/prompts/system-prompt.js CHANGED Viewed

@@ -1,11 +1,11 @@
 /**
  * Dynamic System Prompt Builder
  * Builds context-aware system prompts based on task, role, and session state.
- * Small models get compact structural guidance so the task stays in focus.
+ * Winter always gives every model the strongest available agent instructions.
  */
-import { isSmallModel, getModelCapabilityLabel } from '../model-capabilities.js';
 import { formatRuntimeEnvironmentSummary, getRuntimeEnvironment } from '../../cli/runtime-env.js';
+import { getModelBudgetMultiplier } from '../model-capabilities.js';
 const BASE_PRINCIPLES = [
   'Execute, don\'t describe - Do the work, don\'t write plans about doing the work',
@@ -35,11 +35,22 @@ function buildEnvironmentSummary() {
   ].join('\n');
 }
+function getPromptBudgets(modelTier = '') {
+  const scale = getModelBudgetMultiplier(modelTier);
+  const compactSystemPrompt = scale <= 0.75;
+  return {
+    compactSystemPrompt,
+    projectContextBudget: Math.round(3200 * scale),
+    resourceContextBudget: Math.round(1200 * scale),
+  };
+}
 function formatToolList(tools = []) {
   return tools.length > 0 ? tools.slice(0, 10).join(', ') : '';
 }
-function appendSharedContext(parts, { environment, session, design, resourceContext, context, includeResources = false } = {}) {
+function appendSharedContext(parts, { environment, session, design, resourceContext, context, includeResources = false, resourceContextBudget = 1200 } = {}) {
   parts.push('## Runtime Environment', environment || buildEnvironmentSummary(), '');
   if (session?.memory?.length) {
@@ -66,7 +77,7 @@ function appendSharedContext(parts, { environment, session, design, resourceCont
   }
   if (includeResources && resourceContext) {
-    parts.push(resourceContext.trim().slice(0, 1200), '');
+    parts.push(resourceContext.trim().slice(0, resourceContextBudget), '');
   }
   if (context && typeof context === 'object') {
@@ -74,37 +85,11 @@ function appendSharedContext(parts, { environment, session, design, resourceCont
   }
 }
-function buildCompactSmallModelPrompt(options = {}) {
-  const { tools = [], modelTier } = options;
-  const parts = [
-    'You are Winter, an AI coding assistant running on a ' + getModelCapabilityLabel(modelTier) + '.',
-    '',
-    '## Operating Rules',
-    '1. Understand the user request first. If project state matters, inspect files before answering.',
-    '2. Operate as an agent: inspect -> hypothesize -> act -> verify -> final.',
-    '3. Keep context tight. Use only relevant tools and avoid long explanations.',
-    '4. For coding/debug: Read/Grep/Glob/logs -> Edit/Write -> Bash/test/browser smoke. Do not guess file paths.',
-    '5. For UI/design: inspect existing components/styles/resources before changing visuals.',
-    '6. Final answer in Vietnamese. Mention changed files and verification only.',
-    '',
-  ];
-  const toolList = formatToolList(tools);
-  if (toolList) parts.push('## Tools', toolList, '');
-  appendSharedContext(parts, { ...options, includeResources: false });
-  parts.push(
-    '## Response Shape',
-    '- If action is needed, use tools instead of describing the action.',
-    '- If an image is provided, analyze the image directly and tie findings to project files when relevant.',
-    '- Keep final output short and concrete.',
-  );
-  return parts.filter(Boolean).join('\n');
-}
 function buildStandardSystemPrompt(options = {}) {
-  const { role = 'coding', tools = [], resourceContext } = options;
+  const { role = 'coding', tools = [], resourceContext, modelTier = '' } = options;
+  const budgets = getPromptBudgets(modelTier);
+  const projectContextBudget = options.projectContextBudget ?? budgets.projectContextBudget;
+  const compactSystemPrompt = options.compactSystemPrompt ?? budgets.compactSystemPrompt;
   const parts = [
     'You are Winter, an expert AI coding assistant.',
     '',
@@ -113,6 +98,7 @@ function buildStandardSystemPrompt(options = {}) {
     '',
     '## Tool Usage',
     'Use tools when they materially improve correctness. Inspect before editing. Verify after changes.',
+    'Use maximum reasoning discipline for every model tier, including tiny, local, free, and routed models.',
     'Never invent file paths, APIs, command output, or test results.',
     'For debug work, locate the first hard failure, patch the root cause, and verify with the closest test/build/browser smoke.',
     'For design/UI work, inspect the existing interface and design resources first; avoid generic placeholder layouts.',
@@ -122,7 +108,11 @@ function buildStandardSystemPrompt(options = {}) {
   const toolList = formatToolList(tools);
   if (toolList) parts.push('## Tools', toolList, '');
-  appendSharedContext(parts, { ...options, includeResources: Boolean(resourceContext) && (role === 'design' || role === 'ui') });
+  appendSharedContext(parts, {
+    ...options,
+    includeResources: Boolean(resourceContext) && (role === 'design' || role === 'ui'),
+    resourceContextBudget: budgets.resourceContextBudget,
+  });
   parts.push('Always respond in Vietnamese.');
   return parts.filter(Boolean).join('\n');
@@ -138,10 +128,11 @@ export function buildSystemPrompt({
   resourceContext,
   modelTier,
 } = {}) {
+  const budgets = getPromptBudgets(modelTier);
   const options = { role, context, tools, session, environment, design, resourceContext, modelTier };
-  return isSmallModel(modelTier)
-    ? buildCompactSmallModelPrompt(options)
-    : buildStandardSystemPrompt(options);
+  options.projectContextBudget = options.projectContextBudget ?? budgets.projectContextBudget;
+  options.compactSystemPrompt = options.compactSystemPrompt ?? budgets.compactSystemPrompt;
+  return buildStandardSystemPrompt(options);
 }
 export function buildFastSystemPrompt({
@@ -149,18 +140,10 @@ export function buildFastSystemPrompt({
   tools = [],
   modelTier,
 } = {}) {
-  if (modelTier && isSmallModel(modelTier)) {
-    return [
-      'Winter (fast mode - small model). Be concise. Use tools when needed.',
-      tools.length > 0 ? `Tools: ${tools.join(', ')}` : '',
-      'Use a brief private plan, then answer in 1 sentence.',
-    ].filter(Boolean).join('\n');
-  }
   return [
-    'You are Winter (fast mode). Be concise. Use tools when needed.',
+    'You are Winter (fast mode with maximum correctness). Be concise, but inspect and use tools when needed.',
     tools.length > 0 ? `Tools: ${tools.join(', ')}` : '',
-    'Keep responses brief and focused on the immediate task.',
+    'Use a brief private plan, then execute or answer with concrete evidence.',
   ].filter(Boolean).join('\n');
 }
@@ -176,15 +159,13 @@ export function buildAgentSystemPrompt(role, { tools = [], modelTier } = {}) {
   };
   const base = roleConfigs[role] || roleConfigs.coding;
-  const smallNote = modelTier && isSmallModel(modelTier)
-    ? '\n\nYou are running on a small model. Keep context tight, use tools early, and keep final output short.'
-    : '';
+  const strengthNote = '\n\nWinter Strength Mode: use the full agent loop, inspect real code, reason carefully, verify results, and avoid unsupported claims regardless of base model size.';
   return [
     `You are Winter (${role} agent).`,
     base,
     tools.length > 0 ? `\nTools: ${tools.join(', ')}` : '',
-    smallNote,
+    strengthNote,
     '\nCRITICAL: Output only the requested format. No extra commentary.',
   ].filter(Boolean).join('\n');
 }

package/src/ai/providers.js CHANGED Viewed

@@ -25,11 +25,64 @@ const RESERVED_CONFIG_SECTIONS = new Set([
   'ui',
 ]);
+const DEFAULT_REQUEST_TIMEOUT_MS = 120000;
 function isAuthError(error) {
   const msg = String(error?.message || error || '');
   return /\b(401|403)\b/.test(msg) || /authentication_error|invalid_api_key|unauthorized|auth\s*failed/i.test(msg);
 }
+function isRateLimitError(error) {
+  const msg = String(error?.message || error || '');
+  return error?.status === 429 || /\b429\b|rate[_ -]?limit|tokens per minute|\bTPM\b/i.test(msg);
+}
+function getRequestTimeoutMs(options = {}) {
+  const raw = options.timeoutMs ?? process.env.WINTER_REQUEST_TIMEOUT_MS;
+  const value = Number(raw);
+  if (Number.isFinite(value) && value > 0) return value;
+  return DEFAULT_REQUEST_TIMEOUT_MS;
+}
+function createTimeoutSignal(timeoutMs, externalSignal = null) {
+  const controller = new AbortController();
+  let timedOut = false;
+  const onAbort = () => {
+    controller.abort(externalSignal?.reason || new DOMException('The operation was aborted.', 'AbortError'));
+  };
+  if (externalSignal?.aborted) {
+    onAbort();
+  } else if (externalSignal) {
+    externalSignal.addEventListener('abort', onAbort, { once: true });
+  }
+  const timer = setTimeout(() => {
+    timedOut = true;
+    controller.abort(new Error(`Winter request timed out after ${timeoutMs}ms`));
+  }, timeoutMs);
+  if (typeof timer.unref === 'function') timer.unref();
+  return {
+    signal: controller.signal,
+    timedOut: () => timedOut,
+    cleanup: () => {
+      clearTimeout(timer);
+      if (externalSignal) externalSignal.removeEventListener('abort', onAbort);
+    },
+  };
+}
+function normalizeFetchError(error, provider, timeoutMs, stream = false, timedOut = false) {
+  if (timedOut || /timed out/i.test(String(error?.message || ''))) {
+    const label = stream ? 'stream' : 'request';
+    return new Error(`${provider?.name || 'Provider'} ${label} timed out after ${Math.ceil(timeoutMs / 1000)}s`);
+  }
+  if (error?.name === 'AbortError' || /abort/i.test(String(error?.message || ''))) {
+    const abortError = new Error('AbortError');
+    abortError.name = 'AbortError';
+    return abortError;
+  }
+  return error;
+}
 export class AIProviderManager {
   constructor(config) {
     this.config = config;
@@ -293,6 +346,44 @@ export class AIProviderManager {
     this.tools = tools;
   }
+  normalizeToolDefinitionsForApi(tools = []) {
+    if (!Array.isArray(tools)) return [];
+    return tools
+      .map(tool => {
+        if (!tool || typeof tool !== 'object') return null;
+        if (tool.type === 'function' && tool.function && typeof tool.function === 'object') {
+          return tool;
+        }
+        if (tool.name && tool.parameters) {
+          return {
+            type: 'function',
+            function: {
+              name: tool.name,
+              description: tool.description || '',
+              parameters: tool.parameters,
+            },
+          };
+        }
+        if (tool.function?.name) {
+          return {
+            type: 'function',
+            function: {
+              name: tool.function.name,
+              description: tool.function.description || tool.description || '',
+              parameters: tool.function.parameters || tool.parameters || { type: 'object', properties: {} },
+            },
+          };
+        }
+        return null;
+      })
+      .filter(Boolean);
+  }
   async chat(message, options = {}) {
     await this.init();
     const messages = [
@@ -327,7 +418,7 @@ export class AIProviderManager {
         model: routingModel,
         reasoning: routingReasoning,
         reasoningLevel: options.reasoningLevel || executionProfile.reasoningLevel,
-      }), { maxAttempts: 3, baseDelayMs: 150 });
+      }), { maxAttempts: 3, baseDelayMs: 150, retryable: error => !isRateLimitError(error) && !/\b(400|404)\b/.test(String(error?.message || error || '')) });
     } catch (error) {
       if (isAuthError(error) && routedProvider !== defaultProvider && defaultProvider) {
         if (!this._fallbackWarned) {
@@ -339,7 +430,7 @@ export class AIProviderManager {
           model: options.model || defaultProvider.model,
           reasoning: routingReasoning,
           reasoningLevel: options.reasoningLevel || executionProfile.reasoningLevel,
-        }), { maxAttempts: 1, baseDelayMs: 0 });
+        }), { maxAttempts: 1, baseDelayMs: 0, retryable: error => !isRateLimitError(error) && !/\b(400|404)\b/.test(String(error?.message || error || '')) });
       }
       throw error;
     }
@@ -388,6 +479,7 @@ export class AIProviderManager {
     if (!provider) {
       throw new Error('No active provider is configured');
     }
+    const timeoutMs = getRequestTimeoutMs(options);
     const body = {
       model: options.model || provider.model,
@@ -405,8 +497,9 @@ export class AIProviderManager {
       }
     }
-    if (this.tools.length > 0 && options.enableTools) {
-      body.tools = this.tools;
+    if (this.tools.length > 0 && options.enableTools && !options.toolPromptOnly) {
+      const tools = this.normalizeToolDefinitionsForApi(this.tools);
+      if (tools.length > 0) body.tools = tools;
     }
     const headers = {
@@ -420,15 +513,26 @@ export class AIProviderManager {
       headers['Authorization'] = `Bearer ${provider.apiKey}`;
     }
-    const response = await fetch(`${provider.baseURL}/chat/completions`, {
-      method: 'POST',
-      headers,
-      body: JSON.stringify(body),
-    });
+    const timeout = createTimeoutSignal(timeoutMs, options.signal || options.abortSignal);
+    let response;
+    try {
+      response = await fetch(`${provider.baseURL}/chat/completions`, {
+        method: 'POST',
+        headers,
+        body: JSON.stringify(body),
+        signal: timeout.signal,
+      });
+    } catch (error) {
+      throw normalizeFetchError(error, provider, timeoutMs, false, timeout.timedOut());
+    } finally {
+      timeout.cleanup();
+    }
     if (!response.ok) {
       const error = await response.text();
-      throw new Error(`${provider.name} error (${response.status}): ${error}`);
+      const requestError = new Error(`${provider.name} error (${response.status}): ${error}`);
+      requestError.status = response.status;
+      throw requestError;
     }
     return await response.json();
@@ -438,6 +542,7 @@ export class AIProviderManager {
     if (!provider) {
       throw new Error('No active provider is configured');
     }
+    const timeoutMs = getRequestTimeoutMs(options);
     const body = {
       model: options.model || provider.model,
@@ -460,8 +565,9 @@ export class AIProviderManager {
       }
     }
-    if (this.tools.length > 0 && options.enableTools) {
-      body.tools = this.tools;
+    if (this.tools.length > 0 && options.enableTools && !options.toolPromptOnly) {
+      const tools = this.normalizeToolDefinitionsForApi(this.tools);
+      if (tools.length > 0) body.tools = tools;
     }
     const headers = {
@@ -475,67 +581,78 @@ export class AIProviderManager {
       headers['Authorization'] = `Bearer ${provider.apiKey}`;
     }
-    const response = await fetch(`${provider.baseURL}/chat/completions`, {
-      method: 'POST',
-      headers,
-      body: JSON.stringify(body),
-    });
-    if (!response.ok) {
-      const error = await response.text();
-      throw new Error(`${provider.name} stream error (${response.status}): ${error}`);
-    }
+    const timeout = createTimeoutSignal(timeoutMs, options.signal || options.abortSignal);
+    let response;
+    try {
+      response = await fetch(`${provider.baseURL}/chat/completions`, {
+        method: 'POST',
+        headers,
+        body: JSON.stringify(body),
+        signal: timeout.signal,
+      });
-    if (!response.body) {
-      throw new Error(`${provider.name} did not return a stream body`);
-    }
+      if (!response.ok) {
+        const error = await response.text();
+        const streamError = new Error(`${provider.name} stream error (${response.status}): ${error}`);
+        streamError.status = response.status;
+        throw streamError;
+      }
-    const decoder = new TextDecoder();
-    let buffer = '';
+      if (!response.body) {
+        throw new Error(`${provider.name} did not return a stream body`);
+      }
-    for await (const chunk of response.body) {
-      buffer += decoder.decode(chunk, { stream: true });
-      const lines = buffer.split(/\r?\n/);
-      buffer = lines.pop() || '';
+      const decoder = new TextDecoder();
+      let buffer = '';
-      for (const line of lines) {
-        const trimmed = line.trim();
-        if (!trimmed || !trimmed.startsWith('data:')) continue;
+      for await (const chunk of response.body) {
+        buffer += decoder.decode(chunk, { stream: true });
+        const lines = buffer.split(/\r?\n/);
+        buffer = lines.pop() || '';
-        const payload = trimmed.slice(5).trim();
-        if (!payload || payload === '[DONE]') continue;
+        for (const line of lines) {
+          const trimmed = line.trim();
+          if (!trimmed || !trimmed.startsWith('data:')) continue;
-        let data;
-        try {
-          data = JSON.parse(payload);
-        } catch {
-          continue;
-        }
+          const payload = trimmed.slice(5).trim();
+          if (!payload || payload === '[DONE]') continue;
-        const choice = data.choices?.[0] || {};
-        const content = choice.delta?.content ?? choice.message?.content ?? choice.text ?? '';
-        yield {
-          content,
-          usage: data.usage,
-          raw: data,
-        };
-      }
-    }
+          let data;
+          try {
+            data = JSON.parse(payload);
+          } catch {
+            continue;
+          }
-    const tail = buffer.trim();
-    if (tail.startsWith('data:')) {
-      const payload = tail.slice(5).trim();
-      if (payload && payload !== '[DONE]') {
-        try {
-          const data = JSON.parse(payload);
           const choice = data.choices?.[0] || {};
+          const content = choice.delta?.content ?? choice.message?.content ?? choice.text ?? '';
           yield {
-            content: choice.delta?.content ?? choice.message?.content ?? choice.text ?? '',
+            content,
             usage: data.usage,
             raw: data,
           };
-        } catch {}
+        }
       }
+      const tail = buffer.trim();
+      if (tail.startsWith('data:')) {
+        const payload = tail.slice(5).trim();
+        if (payload && payload !== '[DONE]') {
+          try {
+            const data = JSON.parse(payload);
+            const choice = data.choices?.[0] || {};
+            yield {
+              content: choice.delta?.content ?? choice.message?.content ?? choice.text ?? '',
+              usage: data.usage,
+              raw: data,
+            };
+          } catch {}
+        }
+      }
+    } catch (error) {
+      throw normalizeFetchError(error, provider, timeoutMs, true, timeout.timedOut());
+    } finally {
+      timeout.cleanup();
     }
   }
@@ -586,7 +703,7 @@ export class AIProviderManager {
       const body = {
         model: options.model || provider.model,
         messages: currentMessages,
-        tools: this.tools.length > 0 ? this.tools : undefined,
+        tools: this.tools.length > 0 ? this.normalizeToolDefinitionsForApi(this.tools) : undefined,
       };
       const headers = {
@@ -681,13 +798,13 @@ export class AIProviderManager {
     let reasoningPrompt = '';
     if (options.reasoningLevel || options.reasoningPrompt) {
       reasoningPrompt = options.reasoningPrompt || new ReasoningConfig({
-        level: options.reasoningLevel || REASONING_LEVELS.MEDIUM,
+        level: options.reasoningLevel || REASONING_LEVELS.MAX,
         provider: this.activeProvider,
         modelTier: this._modelTier,
       }).getPromptInstructions();
     } else if (taskInfo) {
       // Auto-inject based on task complexity for providers without API reasoning
-      const level = complexityToReasoningLevel(taskInfo.type);
+      const level = REASONING_LEVELS.MAX;
       const config = new ReasoningConfig({
         level,
         provider: this.activeProvider,