npm - @vybestack/llxprt-code-core - Versions diffs - 0.6.2 → 0.7.0-nightly.251206.43b97dbf4 - Mend

@vybestack/llxprt-code-core 0.6.2 → 0.7.0-nightly.251206.43b97dbf4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (92) hide show

package/dist/src/providers/openai/OpenAIProvider.js CHANGED Viewed

@@ -22,9 +22,11 @@ import crypto from 'node:crypto';
 import * as http from 'http';
 import * as https from 'https';
 import * as net from 'net';
+import { isKimiModel, getToolIdStrategy, } from '../../tools/ToolIdStrategy.js';
 import { BaseProvider, } from '../BaseProvider.js';
 import { DebugLogger } from '../../debug/index.js';
 import { ToolFormatter } from '../../tools/ToolFormatter.js';
+import { convertToolsToOpenAI } from './schemaConverter.js';
 import { GemmaToolCallParser } from '../../parsers/TextToolCallParser.js';
 import { processToolParameters } from '../../tools/doubleEscapeUtils.js';
 import { getCoreSystemPromptAsync } from '../../core/prompts.js';
@@ -36,6 +38,7 @@ import { ensureJsonSafe } from '../../utils/unicodeUtils.js';
 import { ToolCallPipeline } from './ToolCallPipeline.js';
 import { buildToolResponsePayload, EMPTY_TOOL_RESULT_PLACEHOLDER, } from '../utils/toolResponsePayload.js';
 import { isLocalEndpoint } from '../utils/localEndpoint.js';
+import { filterThinkingForContext, thinkingToReasoningField, extractThinkingBlocks, } from '../reasoning/reasoningUtils.js';
 const MAX_TOOL_RESPONSE_CHARS = 1024;
 const MAX_TOOL_RESPONSE_RETRY_CHARS = 512;
 const TOOL_ARGS_PREVIEW_LENGTH = 500;
@@ -218,6 +221,278 @@ export class OpenAIProvider extends BaseProvider {
         }
         return new OpenAI(clientOptions);
     }
+    /**
+     * Coerce provider "content" (which may be a string or an array-of-parts)
+     * into a plain string. Defensive for OpenAI-compatible providers that emit
+     * structured content blocks.
+     */
+    coerceMessageContentToString(content) {
+        if (typeof content === 'string') {
+            return content;
+        }
+        if (Array.isArray(content)) {
+            const parts = [];
+            for (const part of content) {
+                if (!part)
+                    continue;
+                if (typeof part === 'string') {
+                    parts.push(part);
+                }
+                else if (typeof part === 'object' &&
+                    part !== null &&
+                    'text' in part &&
+                    typeof part.text === 'string') {
+                    parts.push(part.text);
+                }
+            }
+            return parts.length ? parts.join('') : undefined;
+        }
+        return undefined;
+    }
+    /**
+     * Strip provider-specific "thinking" / reasoning markup from visible text.
+     * This prevents DeepSeek / Kimi-style <think> blocks from leaking into
+     * user-visible output or tool arguments.
+     */
+    sanitizeProviderText(text) {
+        if (text === null || text === undefined) {
+            return '';
+        }
+        const logger = this.getLogger();
+        let str = typeof text === 'string' ? text : String(text);
+        const beforeLen = str.length;
+        const hadReasoningTags = /<(?:think|thinking|analysis)>|<\/(?:think|thinking|analysis)>/i.test(str);
+        // DeepSeek / generic <think>...</think> blocks.
+        // Replace with a single space to preserve word spacing when tags appear mid-sentence.
+        // This prevents "these<think>...</think>5" from becoming "these5" instead of "these 5".
+        // Multiple consecutive spaces will be collapsed below.
+        str = str.replace(/<think>[\s\S]*?<\/think>/gi, ' ');
+        // Alternative reasoning tags some providers use.
+        str = str.replace(/<thinking>[\s\S]*?<\/thinking>/gi, ' ');
+        str = str.replace(/<analysis>[\s\S]*?<\/analysis>/gi, ' ');
+        // Clean up stray unmatched tags - replace with space to preserve word separation.
+        str = str.replace(/<\/?(?:think|thinking|analysis)>/gi, ' ');
+        // Only clean up whitespace if we had reasoning tags to strip
+        // This preserves meaningful whitespace in regular text chunks during streaming
+        // (e.g., " 5 Biggest" should remain " 5 Biggest", not become "5 Biggest")
+        if (hadReasoningTags) {
+            // Clean up multiple consecutive spaces/whitespace that may result from stripping
+            str = str.replace(/[ \t]+/g, ' ');
+            str = str.replace(/\n{3,}/g, '\n\n');
+            // Only trim leading whitespace when think tags were at the beginning
+            // This prevents leading spaces from "<think>...</think>text" -> " text"
+            // but preserves trailing whitespace for streaming chunk concatenation
+            str = str.trimStart();
+        }
+        const afterLen = str.length;
+        if (hadReasoningTags && afterLen !== beforeLen) {
+            logger.debug(() => `[OpenAIProvider] Stripped reasoning tags`, {
+                beforeLen,
+                afterLen,
+            });
+        }
+        return str;
+    }
+    /**
+     * Extract thinking content from <think>, <thinking>, or <analysis> tags
+     * and return it as a ThinkingBlock. Returns null if no thinking tags found.
+     *
+     * This must be called BEFORE sanitizeProviderText which strips these tags.
+     *
+     * Handles two formats:
+     * 1. Standard: <think>Full thinking paragraph here...</think>
+     * 2. Fragmented (Synthetic API): <think>word</think><think>word</think>...
+     *
+     * For fragmented format, joins with spaces. For standard, joins with newlines.
+     *
+     * @plan PLAN-20251202-THINKING.P16
+     * @requirement REQ-THINK-003
+     */
+    extractThinkTagsAsBlock(text) {
+        if (!text) {
+            return null;
+        }
+        // Collect all thinking content from various tag formats
+        // Note: We only trim leading/trailing whitespace from each part, not internal newlines
+        // This preserves formatting like numbered lists within thinking content
+        const thinkingParts = [];
+        // Match <think>...</think>
+        const thinkMatches = text.matchAll(/<think>([\s\S]*?)<\/think>/gi);
+        for (const match of thinkMatches) {
+            const content = match[1];
+            if (content?.trim()) {
+                // Preserve internal newlines but remove leading/trailing whitespace
+                thinkingParts.push(content.trim());
+            }
+        }
+        // Match <thinking>...</thinking>
+        const thinkingMatches = text.matchAll(/<thinking>([\s\S]*?)<\/thinking>/gi);
+        for (const match of thinkingMatches) {
+            const content = match[1];
+            if (content?.trim()) {
+                thinkingParts.push(content.trim());
+            }
+        }
+        // Match <analysis>...</analysis>
+        const analysisMatches = text.matchAll(/<analysis>([\s\S]*?)<\/analysis>/gi);
+        for (const match of analysisMatches) {
+            const content = match[1];
+            if (content?.trim()) {
+                thinkingParts.push(content.trim());
+            }
+        }
+        if (thinkingParts.length === 0) {
+            return null;
+        }
+        // Detect fragmented format: many short parts (likely token-by-token streaming)
+        // If average part length is very short (< 10 chars) and we have many parts,
+        // it's likely fragmented and should be joined with spaces
+        const avgPartLength = thinkingParts.reduce((sum, p) => sum + p.length, 0) /
+            thinkingParts.length;
+        const isFragmented = thinkingParts.length > 5 && avgPartLength < 15;
+        // Join with space for fragmented, newlines for standard multi-paragraph thinking
+        const combinedThought = isFragmented
+            ? thinkingParts.join(' ')
+            : thinkingParts.join('\n\n');
+        this.getLogger().debug(() => `[OpenAIProvider] Extracted thinking from tags: ${combinedThought.length} chars`, { tagCount: thinkingParts.length, isFragmented, avgPartLength });
+        return {
+            type: 'thinking',
+            thought: combinedThought,
+            sourceField: 'think_tags',
+            isHidden: false,
+        };
+    }
+    /**
+     * Normalize tool name by stripping Kimi-K2 style prefixes.
+     *
+     * Handles malformed tool names where the model concatenates prefixes like
+     * "functions" or "call_functions" with the actual tool name:
+     * - "functionslist_directory" -> "list_directory"
+     * - "call_functionslist_directory6" -> "list_directory"
+     * - "call_functionsglob7" -> "glob"
+     */
+    normalizeToolName(name) {
+        let normalized = (name || '').trim();
+        // Strip Kimi-K2 style prefixes where model concatenates "functions" or "call_functions"
+        // with the actual tool name (e.g., "functionslist_directory" -> "list_directory")
+        // Pattern: (call_)?functions<actual_tool_name><optional_number>
+        const kimiPrefixMatch = /^(?:call_)?functions([a-z_]+[a-z])(\d*)$/i.exec(normalized);
+        if (kimiPrefixMatch) {
+            const originalName = normalized;
+            normalized = kimiPrefixMatch[1];
+            this.getLogger().debug(() => `[OpenAIProvider] Stripped Kimi-style prefix from tool name: "${originalName}" -> "${normalized}"`);
+        }
+        return normalized.toLowerCase();
+    }
+    /**
+     * Sanitize raw tool argument payloads before JSON parsing:
+     * - Remove thinking blocks (<think>...</think>, etc.).
+     * - Strip Markdown code fences (```json ... ```).
+     * - Try to isolate the main JSON object if wrapped in prose.
+     */
+    sanitizeToolArgumentsString(raw) {
+        if (raw === null || raw === undefined) {
+            return '{}';
+        }
+        let text;
+        if (typeof raw === 'string') {
+            text = raw;
+        }
+        else {
+            try {
+                text = JSON.stringify(raw);
+            }
+            catch {
+                text = String(raw);
+            }
+        }
+        text = text.trim();
+        // Strip fenced code blocks like ```json { ... } ```.
+        if (text.startsWith('```')) {
+            text = text.replace(/^```[a-zA-Z0-9_-]*\s*/m, '');
+            text = text.replace(/```$/m, '');
+            text = text.trim();
+        }
+        // Remove provider reasoning / thinking markup.
+        text = this.sanitizeProviderText(text);
+        // If provider wrapped JSON in explanation text, try to isolate the object.
+        const firstBrace = text.indexOf('{');
+        const lastBrace = text.lastIndexOf('}');
+        if (firstBrace !== -1 && lastBrace !== -1 && lastBrace > firstBrace) {
+            const candidate = text.slice(firstBrace, lastBrace + 1).trim();
+            if (candidate.startsWith('{') && candidate.endsWith('}')) {
+                return candidate;
+            }
+        }
+        return text.length ? text : '{}';
+    }
+    /**
+     * Parse Kimi-K2 `<|tool_calls_section_begin|> ... <|tool_calls_section_end|>`
+     * blocks out of a text string.
+     *
+     * - Returns cleanedText with the whole section removed.
+     * - Returns ToolCallBlock[] constructed from the section contents.
+     *
+     * This is used for HF/vLLM-style Kimi deployments where `tool_calls` is empty
+     * and all tool info is only encoded in the text template.
+     */
+    extractKimiToolCallsFromText(raw) {
+        if (!raw || !raw.includes('<|tool_calls_section_begin|>')) {
+            return { cleanedText: raw, toolCalls: [] };
+        }
+        const logger = this.getLogger();
+        const toolCalls = [];
+        let text = raw;
+        const sectionRegex = /<\|tool_calls_section_begin\|>([\s\S]*?)<\|tool_calls_section_end\|>/g;
+        text = text.replace(sectionRegex, (_sectionMatch, sectionBody) => {
+            try {
+                const callRegex = /<\|tool_call_begin\|>\s*([^<]+?)\s*<\|tool_call_argument_begin\|>\s*([\s\S]*?)\s*<\|tool_call_end\|>/g;
+                let m;
+                while ((m = callRegex.exec(sectionBody)) !== null) {
+                    const rawId = m[1].trim();
+                    const rawArgs = m[2].trim();
+                    // Infer tool name from ID.
+                    let toolName = '';
+                    const match = /^functions\.([A-Za-z0-9_]+):\d+/i.exec(rawId) ||
+                        /^[A-Za-z0-9_]+\.([A-Za-z0-9_]+):\d+/.exec(rawId);
+                    if (match) {
+                        toolName = match[1];
+                    }
+                    else {
+                        const colonParts = rawId.split(':');
+                        const head = colonParts[0] || rawId;
+                        const dotParts = head.split('.');
+                        toolName = dotParts[dotParts.length - 1] || head;
+                    }
+                    // Normalize tool name (handles Kimi-K2 style prefixes like call_functionsglob7)
+                    toolName = this.normalizeToolName(toolName);
+                    const sanitizedArgs = this.sanitizeToolArgumentsString(rawArgs);
+                    const processedParameters = processToolParameters(sanitizedArgs, toolName);
+                    toolCalls.push({
+                        type: 'tool_call',
+                        id: this.normalizeToHistoryToolId(rawId),
+                        name: toolName,
+                        parameters: processedParameters,
+                    });
+                }
+            }
+            catch (err) {
+                logger.debug(() => `[OpenAIProvider] Failed to parse Kimi tool_calls_section: ${err}`);
+            }
+            // Strip the entire tool section from user-visible text
+            return '';
+        });
+        if (toolCalls.length > 0) {
+            logger.debug(() => `[OpenAIProvider] Parsed Kimi tool_calls_section`, {
+                toolCallCount: toolCalls.length,
+                originalLength: raw.length,
+                cleanedLength: text.length,
+            });
+        }
+        // Don't trim - preserve leading/trailing newlines that are important for formatting
+        // (e.g., numbered lists from Kimi K2 that have newlines between items)
+        return { cleanedText: text, toolCalls };
+    }
     /**
      * @plan:PLAN-20251023-STATELESS-HARDENING.P09
      * @requirement:REQ-SP4-002
@@ -685,6 +960,113 @@ export class OpenAIProvider extends BaseProvider {
         // This ensures each tool message has a corresponding tool_calls in previous message
         return this.validateToolMessageSequence(messages);
     }
+    /**
+     * Build messages with optional reasoning_content based on settings.
+     *
+     * @plan PLAN-20251202-THINKING.P14
+     * @requirement REQ-THINK-004, REQ-THINK-006
+     */
+    buildMessagesWithReasoning(contents, options, toolFormat) {
+        // Read settings with defaults
+        const stripPolicy = options.settings.get('reasoning.stripFromContext') ??
+            'none';
+        const includeInContext = options.settings.get('reasoning.includeInContext') ?? false;
+        // Apply strip policy first
+        const filteredContents = filterThinkingForContext(contents, stripPolicy);
+        const messages = [];
+        // Create a ToolIdMapper based on the tool format
+        // For Kimi K2, this generates sequential IDs in the format functions.{name}:{index}
+        const toolIdMapper = toolFormat === 'kimi'
+            ? getToolIdStrategy('kimi').createMapper(filteredContents)
+            : null;
+        // Helper to resolve tool call IDs based on format
+        const resolveToolCallId = (tc) => {
+            if (toolIdMapper) {
+                return toolIdMapper.resolveToolCallId(tc);
+            }
+            return this.normalizeToOpenAIToolId(tc.id);
+        };
+        // Helper to resolve tool response IDs based on format
+        const resolveToolResponseId = (tr) => {
+            if (toolIdMapper) {
+                return toolIdMapper.resolveToolResponseId(tr);
+            }
+            return this.normalizeToOpenAIToolId(tr.callId);
+        };
+        for (const content of filteredContents) {
+            if (content.speaker === 'human') {
+                // Convert human messages to user messages
+                const textBlocks = content.blocks.filter((b) => b.type === 'text');
+                const text = textBlocks.map((b) => b.text).join('\n');
+                if (text) {
+                    messages.push({
+                        role: 'user',
+                        content: text,
+                    });
+                }
+            }
+            else if (content.speaker === 'ai') {
+                // Convert AI messages with optional reasoning_content
+                const textBlocks = content.blocks.filter((b) => b.type === 'text');
+                const text = textBlocks.map((b) => b.text).join('\n');
+                const thinkingBlocks = extractThinkingBlocks(content);
+                const toolCalls = content.blocks.filter((b) => b.type === 'tool_call');
+                if (toolCalls.length > 0) {
+                    // Assistant message with tool calls
+                    const baseMessage = {
+                        role: 'assistant',
+                        content: text || null,
+                        tool_calls: toolCalls.map((tc) => ({
+                            id: resolveToolCallId(tc),
+                            type: 'function',
+                            function: {
+                                name: tc.name,
+                                arguments: this.normalizeToolCallArguments(tc.parameters),
+                            },
+                        })),
+                    };
+                    if (includeInContext && thinkingBlocks.length > 0) {
+                        const messageWithReasoning = baseMessage;
+                        messageWithReasoning.reasoning_content =
+                            thinkingToReasoningField(thinkingBlocks);
+                        messages.push(messageWithReasoning);
+                    }
+                    else {
+                        messages.push(baseMessage);
+                    }
+                }
+                else if (textBlocks.length > 0 || thinkingBlocks.length > 0) {
+                    // Plain assistant message
+                    const baseMessage = {
+                        role: 'assistant',
+                        content: text,
+                    };
+                    if (includeInContext && thinkingBlocks.length > 0) {
+                        const messageWithReasoning = baseMessage;
+                        messageWithReasoning.reasoning_content =
+                            thinkingToReasoningField(thinkingBlocks);
+                        messages.push(messageWithReasoning);
+                    }
+                    else {
+                        messages.push(baseMessage);
+                    }
+                }
+            }
+            else if (content.speaker === 'tool') {
+                // Convert tool responses
+                const toolResponses = content.blocks.filter((b) => b.type === 'tool_response');
+                for (const tr of toolResponses) {
+                    messages.push({
+                        role: 'tool',
+                        content: this.buildToolResponseContent(tr, options.config),
+                        tool_call_id: resolveToolResponseId(tr),
+                    });
+                }
+            }
+        }
+        // Validate tool message sequence to prevent API errors
+        return this.validateToolMessageSequence(messages);
+    }
     /**
      * Validates tool message sequence to ensure each tool message has a corresponding tool_calls
      * This prevents "messages with role 'tool' must be a response to a preceeding message with 'tool_calls'" errors
@@ -698,6 +1080,18 @@ export class OpenAIProvider extends BaseProvider {
         const logger = this.getLogger();
         const validatedMessages = [...messages];
         let removedCount = 0;
+        // Debug: Log the full message sequence for tool call analysis
+        logger.debug(() => `[OpenAIProvider] validateToolMessageSequence: analyzing ${messages.length} messages`, {
+            messageRoles: messages.map((m) => m.role),
+            toolCallIds: messages
+                .filter((m) => m.role === 'assistant' &&
+                'tool_calls' in m &&
+                Array.isArray(m.tool_calls))
+                .flatMap((m) => m.tool_calls?.map((tc) => tc.id) ?? []),
+            toolResponseIds: messages
+                .filter((m) => m.role === 'tool')
+                .map((m) => m.tool_call_id),
+        });
         // Check if there are any tool_calls in conversation
         // If no tool_calls exist, this might be isolated tool response testing - skip validation
         const hasToolCallsInConversation = validatedMessages.some((msg) => msg.role === 'assistant' &&
@@ -826,13 +1220,8 @@ export class OpenAIProvider extends BaseProvider {
                 metadataKeys: Object.keys(metadata ?? {}),
             });
         }
-        // Convert IContent to OpenAI messages format
-        const configForMessages = options.config ?? options.runtime?.config ?? this.globalConfig;
-        const messages = this.convertToOpenAIMessages(contents, toolReplayMode, configForMessages);
-        if (logger.enabled && toolReplayMode !== 'native') {
-            logger.debug(() => `[OpenAIProvider] Using textual tool replay mode for model '${model}'`);
-        }
-        // Detect the tool format to use (once at the start of the method)
+        // Detect the tool format to use BEFORE building messages
+        // This is needed so that Kimi K2 tool IDs can be generated in the correct format
         const detectedFormat = this.detectToolFormat();
         // Log the detected format for debugging
         logger.debug(() => `[OpenAIProvider] Using tool format '${detectedFormat}' for model '${model}'`, {
@@ -840,8 +1229,18 @@ export class OpenAIProvider extends BaseProvider {
             detectedFormat,
             provider: this.name,
         });
-        // Convert Gemini format tools to the detected format
-        let formattedTools = toolFormatter.convertGeminiToFormat(tools, detectedFormat);
+        // Convert IContent to OpenAI messages format
+        // Use buildMessagesWithReasoning for reasoning-aware message building
+        // Pass detectedFormat so that Kimi K2 tool IDs are generated correctly
+        const messages = toolReplayMode === 'native'
+            ? this.buildMessagesWithReasoning(contents, options, detectedFormat)
+            : this.convertToOpenAIMessages(contents, toolReplayMode, options.config ?? options.runtime?.config ?? this.globalConfig);
+        if (logger.enabled && toolReplayMode !== 'native') {
+            logger.debug(() => `[OpenAIProvider] Using textual tool replay mode for model '${model}'`);
+        }
+        // Convert Gemini format tools to OpenAI format using the schema converter
+        // This ensures required fields are always present in tool schemas
+        let formattedTools = convertToolsToOpenAI(tools);
         // CRITICAL FIX: Ensure we never pass an empty tools array
         // The OpenAI API errors when tools=[] but a tool call is attempted
         if (Array.isArray(formattedTools) && formattedTools.length === 0) {
@@ -1106,16 +1505,37 @@ export class OpenAIProvider extends BaseProvider {
             // Buffer for accumulating text chunks for providers that need it
             let textBuffer = '';
             // Use the same detected format from earlier for consistency
-            // Buffer text for Qwen format providers to avoid stanza formatting
-            const shouldBufferText = detectedFormat === 'qwen';
+            const isKimiModel = model.toLowerCase().includes('kimi-k2');
+            // Buffer text for Qwen format providers and Kimi-K2 to avoid stanza formatting
+            const shouldBufferText = detectedFormat === 'qwen' || isKimiModel;
+            // Accumulate thinking content across the entire stream to emit as ONE block
+            // This handles fragmented <think>word</think> streaming from Synthetic API
+            // @plan PLAN-20251202-THINKING.P16
+            let accumulatedThinkingContent = '';
+            let hasEmittedThinking = false;
+            // Accumulate reasoning_content from streaming deltas (legacy path)
+            // Synthetic API sends reasoning token-by-token, so we accumulate to emit ONE block
+            // @plan PLAN-20251202-THINKING.P16
+            let accumulatedReasoningContent = '';
             // Track token usage from streaming chunks
             let streamingUsage = null;
+            // Track total chunks for debugging empty responses
+            let totalChunksReceived = 0;
             try {
                 // Handle streaming response
                 for await (const chunk of response) {
+                    totalChunksReceived++;
                     if (abortSignal?.aborted) {
                         break;
                     }
+                    // Debug: Log first few chunks and every 10th chunk to understand stream behavior
+                    if (totalChunksReceived <= 3 || totalChunksReceived % 10 === 0) {
+                        logger.debug(() => `[Streaming legacy] Chunk #${totalChunksReceived} received`, {
+                            hasChoices: !!chunk.choices?.length,
+                            firstChoiceDelta: chunk.choices?.[0]?.delta,
+                            finishReason: chunk.choices?.[0]?.finish_reason,
+                        });
+                    }
                     const chunkRecord = chunk;
                     let parsedData;
                     const rawData = chunkRecord?.data;
@@ -1152,6 +1572,14 @@ export class OpenAIProvider extends BaseProvider {
                     const choice = chunk.choices?.[0];
                     if (!choice)
                         continue;
+                    // Parse reasoning_content from streaming delta (Phase 16 integration)
+                    // ACCUMULATE instead of yielding immediately to handle token-by-token streaming
+                    // @plan PLAN-20251202-THINKING.P16
+                    const reasoningBlock = this.parseStreamingReasoningDelta(choice.delta);
+                    if (reasoningBlock) {
+                        // Accumulate reasoning content - will emit ONE block later
+                        accumulatedReasoningContent += reasoningBlock.thought;
+                    }
                     // Check for finish_reason to detect proper stream ending
                     if (choice.finish_reason) {
                         logger.debug(() => `[Streaming] Stream finished with reason: ${choice.finish_reason}`, {
@@ -1165,23 +1593,23 @@ export class OpenAIProvider extends BaseProvider {
                         if (choice.finish_reason === 'length') {
                             logger.debug(() => `Response truncated due to length limit for model ${model}`);
                         }
-                        // Flush any buffered text when stream finishes
-                        if (textBuffer.length > 0) {
-                            yield {
-                                speaker: 'ai',
-                                blocks: [
-                                    {
-                                        type: 'text',
-                                        text: textBuffer,
-                                    },
-                                ],
-                            };
-                            textBuffer = '';
-                        }
+                        // Don't flush buffer here on finish - let the final buffer handling
+                        // after the loop process it with proper sanitization and think tag extraction
+                        // This was causing unsanitized <think> tags to leak into output (legacy path)
+                        // @plan PLAN-20251202-THINKING.P16
                     }
                     // Handle text content - buffer for Qwen format, emit immediately for others
-                    const deltaContent = choice.delta?.content;
-                    if (deltaContent) {
+                    // Note: Synthetic API sends content that may duplicate reasoning_content.
+                    // This is the model's behavior - we don't filter it here.
+                    // @plan PLAN-20251202-THINKING.P16
+                    const rawDeltaContent = this.coerceMessageContentToString(choice.delta?.content);
+                    if (rawDeltaContent) {
+                        const deltaContent = isKimiModel
+                            ? rawDeltaContent
+                            : this.sanitizeProviderText(rawDeltaContent);
+                        if (!deltaContent) {
+                            continue;
+                        }
                         _accumulatedText += deltaContent;
                         // Debug log for providers that need buffering
                         if (shouldBufferText) {
@@ -1194,22 +1622,103 @@ export class OpenAIProvider extends BaseProvider {
                             });
                             // Buffer text to avoid stanza formatting
                             textBuffer += deltaContent;
+                            const hasKimiBegin = textBuffer.includes('<|tool_calls_section_begin|>');
+                            const hasKimiEnd = textBuffer.includes('<|tool_calls_section_end|>');
+                            const hasOpenKimiSection = hasKimiBegin && !hasKimiEnd;
                             // Emit buffered text when we have a complete sentence or paragraph
-                            // Look for natural break points
-                            if (textBuffer.includes('\n') ||
-                                textBuffer.endsWith('. ') ||
-                                textBuffer.endsWith('! ') ||
-                                textBuffer.endsWith('? ') ||
-                                textBuffer.length > 100) {
-                                yield {
-                                    speaker: 'ai',
-                                    blocks: [
-                                        {
-                                            type: 'text',
-                                            text: textBuffer,
-                                        },
-                                    ],
-                                };
+                            // Look for natural break points, but avoid flushing mid Kimi section
+                            if (!hasOpenKimiSection &&
+                                (textBuffer.includes('\n') ||
+                                    textBuffer.endsWith('. ') ||
+                                    textBuffer.endsWith('! ') ||
+                                    textBuffer.endsWith('? ') ||
+                                    textBuffer.length > 100)) {
+                                const parsedToolCalls = [];
+                                let workingText = textBuffer;
+                                // Extract <think> tags and ACCUMULATE instead of emitting immediately (legacy path)
+                                // This handles fragmented <think>word</think> streaming from Synthetic API
+                                // @plan PLAN-20251202-THINKING.P16
+                                // @requirement REQ-THINK-003
+                                const tagBasedThinking = this.extractThinkTagsAsBlock(workingText);
+                                if (tagBasedThinking) {
+                                    // Accumulate thinking content - don't emit yet
+                                    // Use newline to preserve formatting between chunks (not space)
+                                    if (accumulatedThinkingContent.length > 0) {
+                                        accumulatedThinkingContent += '\n';
+                                    }
+                                    accumulatedThinkingContent += tagBasedThinking.thought;
+                                    logger.debug(() => `[Streaming legacy] Accumulated thinking: ${accumulatedThinkingContent.length} chars total`);
+                                }
+                                const kimiParsed = this.extractKimiToolCallsFromText(workingText);
+                                if (kimiParsed.toolCalls.length > 0) {
+                                    parsedToolCalls.push(...kimiParsed.toolCalls);
+                                    logger.debug(() => `[OpenAIProvider] Streaming buffer (legacy) parsed Kimi tool calls`, {
+                                        count: kimiParsed.toolCalls.length,
+                                        bufferLength: workingText.length,
+                                        cleanedLength: kimiParsed.cleanedText.length,
+                                    });
+                                }
+                                workingText = kimiParsed.cleanedText;
+                                const parsingText = this.sanitizeProviderText(workingText);
+                                let cleanedText = parsingText;
+                                try {
+                                    const parsedResult = this.textToolParser.parse(parsingText);
+                                    if (parsedResult.toolCalls.length > 0) {
+                                        parsedToolCalls.push(...parsedResult.toolCalls.map((call) => ({
+                                            type: 'tool_call',
+                                            id: `text_tool_${Date.now()}_${Math.random()
+                                                .toString(36)
+                                                .substring(7)}`,
+                                            name: this.normalizeToolName(call.name),
+                                            parameters: call.arguments,
+                                        })));
+                                        cleanedText = parsedResult.cleanedContent;
+                                    }
+                                }
+                                catch (error) {
+                                    const logger = this.getLogger();
+                                    logger.debug(() => `TextToolCallParser failed on buffered text: ${error}`);
+                                }
+                                // Emit accumulated thinking BEFORE tool calls or text content (legacy path)
+                                // This ensures thinking appears first in the response
+                                // @plan PLAN-20251202-THINKING.P16
+                                if (!hasEmittedThinking &&
+                                    accumulatedThinkingContent.length > 0 &&
+                                    (parsedToolCalls.length > 0 || cleanedText.trim().length > 0)) {
+                                    yield {
+                                        speaker: 'ai',
+                                        blocks: [
+                                            {
+                                                type: 'thinking',
+                                                thought: accumulatedThinkingContent,
+                                                sourceField: 'think_tags',
+                                                isHidden: false,
+                                            },
+                                        ],
+                                    };
+                                    hasEmittedThinking = true;
+                                    logger.debug(() => `[Streaming legacy] Emitted accumulated thinking: ${accumulatedThinkingContent.length} chars`);
+                                }
+                                if (parsedToolCalls.length > 0) {
+                                    yield {
+                                        speaker: 'ai',
+                                        blocks: parsedToolCalls,
+                                    };
+                                }
+                                // Always use sanitized text to strip <think> tags (legacy streaming)
+                                // Bug fix: Previously Kimi used unsanitized workingText
+                                // @plan PLAN-20251202-THINKING.P16
+                                if (cleanedText.trim().length > 0) {
+                                    yield {
+                                        speaker: 'ai',
+                                        blocks: [
+                                            {
+                                                type: 'text',
+                                                text: cleanedText,
+                                            },
+                                        ],
+                                    };
+                                }
                                 textBuffer = '';
                             }
                         }
@@ -1308,19 +1817,45 @@ export class OpenAIProvider extends BaseProvider {
             }
             // Check buffered text for <tool_call> format before flushing as plain text
             if (textBuffer.length > 0) {
-                // Try to parse <tool_call> format from buffered text
-                let parsedToolCalls = [];
-                let cleanedText = textBuffer;
+                const parsedToolCalls = [];
+                let workingText = textBuffer;
+                // Note: Synthetic API sends reasoning via both reasoning_content AND content fields.
+                // This is the model's behavior - we don't strip it since the model is the source.
+                // The user can configure reasoning display settings if they don't want duplicates.
+                // @plan PLAN-20251202-THINKING.P16
+                // Extract any remaining <think> tags from final buffer (legacy path)
+                // @plan PLAN-20251202-THINKING.P16
+                const tagBasedThinking = this.extractThinkTagsAsBlock(workingText);
+                if (tagBasedThinking) {
+                    // Use newline to preserve formatting between chunks (not space)
+                    if (accumulatedThinkingContent.length > 0) {
+                        accumulatedThinkingContent += '\n';
+                    }
+                    accumulatedThinkingContent += tagBasedThinking.thought;
+                }
+                const kimiParsed = this.extractKimiToolCallsFromText(workingText);
+                if (kimiParsed.toolCalls.length > 0) {
+                    parsedToolCalls.push(...kimiParsed.toolCalls);
+                    this.getLogger().debug(() => `[OpenAIProvider] Final buffer flush (legacy) parsed Kimi tool calls`, {
+                        count: kimiParsed.toolCalls.length,
+                        bufferLength: workingText.length,
+                        cleanedLength: kimiParsed.cleanedText.length,
+                    });
+                }
+                workingText = kimiParsed.cleanedText;
+                const parsingText = this.sanitizeProviderText(workingText);
+                let cleanedText = parsingText;
                 try {
-                    const parsedResult = this.textToolParser.parse(textBuffer);
+                    const parsedResult = this.textToolParser.parse(parsingText);
                     if (parsedResult.toolCalls.length > 0) {
-                        // Convert parsed tool calls to ToolCallBlock format
-                        parsedToolCalls = parsedResult.toolCalls.map((call) => ({
+                        parsedToolCalls.push(...parsedResult.toolCalls.map((call) => ({
                             type: 'tool_call',
-                            id: `text_tool_${Date.now()}_${Math.random().toString(36).substring(7)}`,
-                            name: call.name,
+                            id: `text_tool_${Date.now()}_${Math.random()
+                                .toString(36)
+                                .substring(7)}`,
+                            name: this.normalizeToolName(call.name),
                             parameters: call.arguments,
-                        }));
+                        })));
                         cleanedText = parsedResult.cleanedContent;
                     }
                 }
@@ -1328,14 +1863,33 @@ export class OpenAIProvider extends BaseProvider {
                     const logger = this.getLogger();
                     logger.debug(() => `TextToolCallParser failed on buffered text: ${error}`);
                 }
-                // Emit tool calls from text parsing first
+                // Emit accumulated thinking BEFORE tool calls or text content (legacy path)
+                // @plan PLAN-20251202-THINKING.P16
+                if (!hasEmittedThinking &&
+                    accumulatedThinkingContent.length > 0 &&
+                    (parsedToolCalls.length > 0 || cleanedText.trim().length > 0)) {
+                    yield {
+                        speaker: 'ai',
+                        blocks: [
+                            {
+                                type: 'thinking',
+                                thought: accumulatedThinkingContent,
+                                sourceField: 'think_tags',
+                                isHidden: false,
+                            },
+                        ],
+                    };
+                    hasEmittedThinking = true;
+                }
                 if (parsedToolCalls.length > 0) {
                     yield {
                         speaker: 'ai',
                         blocks: parsedToolCalls,
                     };
                 }
-                // Then emit any remaining cleaned text
+                // Always use sanitized text to strip <think> tags (legacy final buffer)
+                // Bug fix: Previously Kimi used unsanitized workingText
+                // @plan PLAN-20251202-THINKING.P16
                 if (cleanedText.trim().length > 0) {
                     yield {
                         speaker: 'ai',
@@ -1349,18 +1903,54 @@ export class OpenAIProvider extends BaseProvider {
                 }
                 textBuffer = '';
             }
+            // Emit any remaining accumulated thinking that wasn't emitted yet (legacy path)
+            // (e.g., if entire response was just thinking with no content)
+            // @plan PLAN-20251202-THINKING.P16
+            if (!hasEmittedThinking && accumulatedThinkingContent.length > 0) {
+                yield {
+                    speaker: 'ai',
+                    blocks: [
+                        {
+                            type: 'thinking',
+                            thought: accumulatedThinkingContent,
+                            sourceField: 'think_tags',
+                            isHidden: false,
+                        },
+                    ],
+                };
+                hasEmittedThinking = true;
+            }
+            // Emit accumulated reasoning_content as ONE ThinkingBlock (legacy path)
+            // This consolidates token-by-token reasoning from Synthetic API into a single block
+            // @plan PLAN-20251202-THINKING.P16
+            if (accumulatedReasoningContent.length > 0) {
+                yield {
+                    speaker: 'ai',
+                    blocks: [
+                        {
+                            type: 'thinking',
+                            thought: accumulatedReasoningContent,
+                            sourceField: 'reasoning_content',
+                            isHidden: false,
+                        },
+                    ],
+                };
+            }
             // Process and emit tool calls using legacy accumulated approach
             if (accumulatedToolCalls.length > 0) {
                 const blocks = [];
                 for (const tc of accumulatedToolCalls) {
                     if (!tc)
                         continue;
+                    const sanitizedArgs = this.sanitizeToolArgumentsString(tc.function.arguments);
+                    // Normalize tool name (handles Kimi-K2 style prefixes)
+                    const normalizedName = this.normalizeToolName(tc.function.name || '');
                     // Process tool parameters with double-escape handling
-                    const processedParameters = processToolParameters(tc.function.arguments || '', tc.function.name || '');
+                    const processedParameters = processToolParameters(sanitizedArgs, normalizedName);
                     blocks.push({
                         type: 'tool_call',
                         id: this.normalizeToHistoryToolId(tc.id),
-                        name: tc.function.name || '',
+                        name: normalizedName,
                         parameters: processedParameters,
                     });
                 }
@@ -1400,6 +1990,40 @@ export class OpenAIProvider extends BaseProvider {
                     },
                 };
             }
+            // Detect and warn about empty streaming responses (common with Kimi K2 after tool calls)
+            // Only warn if we truly got nothing - not even reasoning content
+            if (_accumulatedText.length === 0 &&
+                accumulatedToolCalls.length === 0 &&
+                textBuffer.length === 0 &&
+                accumulatedReasoningContent.length === 0 &&
+                accumulatedThinkingContent.length === 0) {
+                // Provide actionable guidance for users
+                const isKimi = model.toLowerCase().includes('kimi');
+                const isSynthetic = (baseURL ?? this.getBaseURL())?.includes('synthetic') ?? false;
+                const troubleshooting = isKimi
+                    ? isSynthetic
+                        ? ' To fix: use streaming: "disabled" in your profile settings. Synthetic API streaming does not work reliably with tool calls.'
+                        : ' This provider may not support streaming with tool calls.'
+                    : ' Consider using streaming: "disabled" in your profile settings.';
+                logger.warn(() => `[OpenAIProvider] Empty streaming response for model '${model}' (received ${totalChunksReceived} chunks with no content).${troubleshooting}`, {
+                    model,
+                    baseURL: baseURL ?? this.getBaseURL(),
+                    isKimiModel: isKimi,
+                    isSyntheticAPI: isSynthetic,
+                    totalChunksReceived,
+                });
+            }
+            else {
+                // Log what we DID get for debugging
+                logger.debug(() => `[Streaming legacy] Stream completed with accumulated content`, {
+                    textLength: _accumulatedText.length,
+                    toolCallCount: accumulatedToolCalls.length,
+                    textBufferLength: textBuffer.length,
+                    reasoningLength: accumulatedReasoningContent.length,
+                    thinkingLength: accumulatedThinkingContent.length,
+                    totalChunksReceived,
+                });
+            }
         }
         else {
             // Handle non-streaming response
@@ -1425,22 +2049,57 @@ export class OpenAIProvider extends BaseProvider {
                 }
             }
             const blocks = [];
-            // Handle text content
-            if (choice.message?.content) {
-                blocks.push({
-                    type: 'text',
-                    text: choice.message.content,
-                });
+            // Parse reasoning_content from response (Phase 16 integration)
+            const reasoningBlock = this.parseNonStreamingReasoning(choice.message);
+            logger.debug(() => `[Non-streaming] parseNonStreamingReasoning result: ${reasoningBlock ? `found (${reasoningBlock.thought?.length} chars)` : 'not found'}`, {
+                hasReasoningContent: 'reasoning_content' in
+                    (choice.message ?? {}),
+                messageKeys: Object.keys(choice.message ?? {}),
+            });
+            if (reasoningBlock) {
+                blocks.push(reasoningBlock);
+            }
+            // Handle text content (strip thinking / reasoning blocks) and Kimi tool sections
+            const rawMessageContent = this.coerceMessageContentToString(choice.message?.content);
+            let kimiCleanContent;
+            let kimiToolBlocks = [];
+            if (rawMessageContent) {
+                // Extract <think> tags as ThinkingBlock BEFORE stripping them
+                // Only do this if we didn't already get reasoning from reasoning_content field
+                // @plan PLAN-20251202-THINKING.P16
+                // @requirement REQ-THINK-003
+                if (!reasoningBlock) {
+                    const tagBasedThinking = this.extractThinkTagsAsBlock(rawMessageContent);
+                    if (tagBasedThinking) {
+                        blocks.push(tagBasedThinking);
+                        logger.debug(() => `[Non-streaming] Extracted thinking from <think> tags: ${tagBasedThinking.thought.length} chars`);
+                    }
+                }
+                const kimiParsed = this.extractKimiToolCallsFromText(rawMessageContent);
+                kimiCleanContent = kimiParsed.cleanedText;
+                kimiToolBlocks = kimiParsed.toolCalls;
+                // Always sanitize text content to remove <think> tags
+                // Bug fix: Previously Kimi-K2 used unsanitized kimiCleanContent,
+                // which caused <think> tags to leak into visible output
+                // @plan PLAN-20251202-THINKING.P16
+                const cleanedText = this.sanitizeProviderText(kimiCleanContent);
+                if (cleanedText) {
+                    blocks.push({
+                        type: 'text',
+                        text: cleanedText,
+                    });
+                }
             }
             // Handle tool calls
             if (choice.message?.tool_calls && choice.message.tool_calls.length > 0) {
                 // Use the same detected format from earlier for consistency
                 for (const toolCall of choice.message.tool_calls) {
                     if (toolCall.type === 'function') {
-                        // Use tool name directly without normalization for legacy compatibility
-                        const toolName = toolCall.function.name || '';
+                        // Normalize tool name (handles Kimi-K2 style prefixes)
+                        const toolName = this.normalizeToolName(toolCall.function.name || '');
+                        const sanitizedArgs = this.sanitizeToolArgumentsString(toolCall.function.arguments);
                         // Process tool parameters with double-escape handling
-                        const processedParameters = processToolParameters(toolCall.function.arguments || '', toolName);
+                        const processedParameters = processToolParameters(sanitizedArgs, toolName);
                         blocks.push({
                             type: 'tool_call',
                             id: this.normalizeToHistoryToolId(toolCall.id),
@@ -1450,42 +2109,49 @@ export class OpenAIProvider extends BaseProvider {
                     }
                 }
             }
+            // Add any tool calls parsed from Kimi inline sections
+            if (kimiToolBlocks.length > 0) {
+                blocks.push(...kimiToolBlocks);
+                this.getLogger().debug(() => `[OpenAIProvider] Non-stream legacy added Kimi tool calls from text`, { count: kimiToolBlocks.length });
+            }
             // Additionally check for <tool_call> format in text content
-            if (choice.message?.content &&
-                typeof choice.message.content === 'string') {
-                try {
-                    const parsedResult = this.textToolParser.parse(choice.message.content);
-                    if (parsedResult.toolCalls.length > 0) {
-                        // Add tool calls found in text content
-                        for (const call of parsedResult.toolCalls) {
-                            blocks.push({
-                                type: 'tool_call',
-                                id: `text_tool_${Date.now()}_${Math.random().toString(36).substring(7)}`,
-                                name: call.name,
-                                parameters: call.arguments,
-                            });
-                        }
-                        // Update the text content to remove the tool call parts
-                        if (choice.message.content !== parsedResult.cleanedContent) {
-                            // Find the text block and update it
-                            const textBlockIndex = blocks.findIndex((block) => block.type === 'text');
-                            if (textBlockIndex >= 0) {
-                                blocks[textBlockIndex].text =
-                                    parsedResult.cleanedContent;
-                            }
-                            else if (parsedResult.cleanedContent.trim()) {
-                                // Add cleaned text if it doesn't exist
-                                blocks.unshift({
-                                    type: 'text',
-                                    text: parsedResult.cleanedContent,
+            if (kimiCleanContent) {
+                const cleanedSource = this.sanitizeProviderText(kimiCleanContent);
+                if (cleanedSource) {
+                    try {
+                        const parsedResult = this.textToolParser.parse(cleanedSource);
+                        if (parsedResult.toolCalls.length > 0) {
+                            // Add tool calls found in text content
+                            for (const call of parsedResult.toolCalls) {
+                                blocks.push({
+                                    type: 'tool_call',
+                                    id: `text_tool_${Date.now()}_${Math.random().toString(36).substring(7)}`,
+                                    name: this.normalizeToolName(call.name),
+                                    parameters: call.arguments,
                                 });
                             }
+                            // Update the text content to remove the tool call parts
+                            if (choice.message.content !== parsedResult.cleanedContent) {
+                                // Find the text block and update it
+                                const textBlockIndex = blocks.findIndex((block) => block.type === 'text');
+                                if (textBlockIndex >= 0) {
+                                    blocks[textBlockIndex].text =
+                                        parsedResult.cleanedContent;
+                                }
+                                else if (parsedResult.cleanedContent.trim()) {
+                                    // Add cleaned text if it doesn't exist
+                                    blocks.unshift({
+                                        type: 'text',
+                                        text: parsedResult.cleanedContent,
+                                    });
+                                }
+                            }
                         }
                     }
-                }
-                catch (error) {
-                    const logger = this.getLogger();
-                    logger.debug(() => `TextToolCallParser failed on message content: ${error}`);
+                    catch (error) {
+                        const logger = this.getLogger();
+                        logger.debug(() => `TextToolCallParser failed on message content: ${error}`);
+                    }
                 }
             }
             // Emit the complete response as a single IContent
@@ -1613,14 +2279,8 @@ export class OpenAIProvider extends BaseProvider {
         }
         // Determine tool replay mode for model compatibility (e.g., polaris-alpha)
         const toolReplayMode = this.determineToolReplayMode(model);
-        // Convert IContent to OpenAI messages format
-        const configForMessages = options.config ?? options.runtime?.config ?? this.globalConfig;
-        const messages = this.convertToOpenAIMessages(contents, toolReplayMode, configForMessages);
-        // Log tool replay mode usage for debugging
-        if (logger.enabled && toolReplayMode !== 'native') {
-            logger.debug(() => `[OpenAIProvider] Using textual tool replay mode for model '${model}'`);
-        }
-        // Detect the tool format to use (once at the start of the method)
+        // Detect the tool format to use BEFORE building messages
+        // This is needed so that Kimi K2 tool IDs can be generated in the correct format
         const detectedFormat = this.detectToolFormat();
         // Log the detected format for debugging
         logger.debug(() => `[OpenAIProvider] Using tool format '${detectedFormat}' for model '${model}'`, {
@@ -1628,8 +2288,19 @@ export class OpenAIProvider extends BaseProvider {
             detectedFormat,
             provider: this.name,
         });
-        // Convert Gemini format tools to the detected format
-        let formattedTools = toolFormatter.convertGeminiToFormat(tools, detectedFormat);
+        // Convert IContent to OpenAI messages format
+        // Use buildMessagesWithReasoning for reasoning-aware message building
+        // Pass detectedFormat so that Kimi K2 tool IDs are generated correctly
+        const messages = toolReplayMode === 'native'
+            ? this.buildMessagesWithReasoning(contents, options, detectedFormat)
+            : this.convertToOpenAIMessages(contents, toolReplayMode, options.config ?? options.runtime?.config ?? this.globalConfig);
+        // Log tool replay mode usage for debugging
+        if (logger.enabled && toolReplayMode !== 'native') {
+            logger.debug(() => `[OpenAIProvider] Using textual tool replay mode for model '${model}'`);
+        }
+        // Convert Gemini format tools to OpenAI format using the schema converter
+        // This ensures required fields are always present in tool schemas
+        let formattedTools = convertToolsToOpenAI(tools);
         // CRITICAL FIX: Ensure we never pass an empty tools array
         // The OpenAI API errors when tools=[] but a tool call is attempted
         if (Array.isArray(formattedTools) && formattedTools.length === 0) {
@@ -1905,8 +2576,18 @@ export class OpenAIProvider extends BaseProvider {
             // Buffer for accumulating text chunks for providers that need it
             let textBuffer = '';
             // Use the same detected format from earlier for consistency
-            // Buffer text for Qwen format providers to avoid stanza formatting
-            const shouldBufferText = detectedFormat === 'qwen';
+            const isKimiModel = model.toLowerCase().includes('kimi-k2');
+            // Buffer text for Qwen format providers and Kimi-K2 to avoid stanza formatting
+            const shouldBufferText = detectedFormat === 'qwen' || isKimiModel;
+            // Accumulate thinking content across the entire stream to emit as ONE block
+            // This handles fragmented <think>word</think> streaming from Synthetic API
+            // @plan PLAN-20251202-THINKING.P16
+            let accumulatedThinkingContent = '';
+            let hasEmittedThinking = false;
+            // Accumulate reasoning_content from streaming deltas (pipeline path)
+            // Synthetic API sends reasoning token-by-token, so we accumulate to emit ONE block
+            // @plan PLAN-20251202-THINKING.P16
+            let accumulatedReasoningContent = '';
             // Track token usage from streaming chunks
             let streamingUsage = null;
             const allChunks = []; // Collect all chunks first
@@ -1918,6 +2599,11 @@ export class OpenAIProvider extends BaseProvider {
                     }
                     allChunks.push(chunk);
                 }
+                // Debug: Log how many chunks were received
+                logger.debug(() => `[Streaming pipeline] Collected ${allChunks.length} chunks from stream`, {
+                    firstChunkDelta: allChunks[0]?.choices?.[0]?.delta,
+                    lastChunkFinishReason: allChunks[allChunks.length - 1]?.choices?.[0]?.finish_reason,
+                });
                 // Now process all collected chunks
                 for (const chunk of allChunks) {
                     // Check for cancellation during chunk processing
@@ -1960,6 +2646,15 @@ export class OpenAIProvider extends BaseProvider {
                     const choice = chunk.choices?.[0];
                     if (!choice)
                         continue;
+                    // Parse reasoning_content from streaming delta (Pipeline path)
+                    // ACCUMULATE instead of yielding immediately to handle token-by-token streaming
+                    // @plan PLAN-20251202-THINKING.P16
+                    // @requirement REQ-THINK-003.1
+                    const reasoningBlock = this.parseStreamingReasoningDelta(choice.delta);
+                    if (reasoningBlock) {
+                        // Accumulate reasoning content - will emit ONE block later
+                        accumulatedReasoningContent += reasoningBlock.thought;
+                    }
                     // Check for finish_reason to detect proper stream ending
                     if (choice.finish_reason) {
                         logger.debug(() => `[Streaming] Stream finished with reason: ${choice.finish_reason}`, {
@@ -1973,23 +2668,23 @@ export class OpenAIProvider extends BaseProvider {
                         if (choice.finish_reason === 'length') {
                             logger.debug(() => `Response truncated due to length limit for model ${model}`);
                         }
-                        // Flush any buffered text when stream finishes
-                        if (textBuffer.length > 0) {
-                            yield {
-                                speaker: 'ai',
-                                blocks: [
-                                    {
-                                        type: 'text',
-                                        text: textBuffer,
-                                    },
-                                ],
-                            };
-                            textBuffer = '';
-                        }
+                        // Don't flush buffer here on finish - let the final buffer handling
+                        // after the loop process it with proper sanitization and think tag extraction
+                        // This was causing unsanitized <think> tags to leak into output (pipeline path)
+                        // @plan PLAN-20251202-THINKING.P16
                     }
                     // Handle text content - buffer for Qwen format, emit immediately for others
-                    const deltaContent = choice.delta?.content;
-                    if (deltaContent) {
+                    // Note: Synthetic API sends content that may duplicate reasoning_content.
+                    // This is the model's behavior - we don't filter it here.
+                    // @plan PLAN-20251202-THINKING.P16
+                    const rawDeltaContent = this.coerceMessageContentToString(choice.delta?.content);
+                    if (rawDeltaContent) {
+                        const deltaContent = isKimiModel
+                            ? rawDeltaContent
+                            : this.sanitizeProviderText(rawDeltaContent);
+                        if (!deltaContent) {
+                            continue;
+                        }
                         _accumulatedText += deltaContent;
                         // Debug log for providers that need buffering
                         if (shouldBufferText) {
@@ -2002,22 +2697,103 @@ export class OpenAIProvider extends BaseProvider {
                             });
                             // Buffer text to avoid stanza formatting
                             textBuffer += deltaContent;
+                            const hasKimiBegin = textBuffer.includes('<|tool_calls_section_begin|>');
+                            const hasKimiEnd = textBuffer.includes('<|tool_calls_section_end|>');
+                            const hasOpenKimiSection = hasKimiBegin && !hasKimiEnd;
                             // Emit buffered text when we have a complete sentence or paragraph
-                            // Look for natural break points
-                            if (textBuffer.includes('\n') ||
-                                textBuffer.endsWith('. ') ||
-                                textBuffer.endsWith('! ') ||
-                                textBuffer.endsWith('? ') ||
-                                textBuffer.length > 100) {
-                                yield {
-                                    speaker: 'ai',
-                                    blocks: [
-                                        {
-                                            type: 'text',
-                                            text: textBuffer,
-                                        },
-                                    ],
-                                };
+                            // Look for natural break points, avoiding flush mid Kimi section
+                            if (!hasOpenKimiSection &&
+                                (textBuffer.includes('\n') ||
+                                    textBuffer.endsWith('. ') ||
+                                    textBuffer.endsWith('! ') ||
+                                    textBuffer.endsWith('? ') ||
+                                    textBuffer.length > 100)) {
+                                const parsedToolCalls = [];
+                                let workingText = textBuffer;
+                                // Extract <think> tags and ACCUMULATE instead of emitting immediately
+                                // This handles fragmented <think>word</think> streaming from Synthetic API
+                                // @plan PLAN-20251202-THINKING.P16
+                                // @requirement REQ-THINK-003
+                                const tagBasedThinking = this.extractThinkTagsAsBlock(workingText);
+                                if (tagBasedThinking) {
+                                    // Accumulate thinking content - don't emit yet
+                                    // Use newline to preserve formatting between chunks (not space)
+                                    if (accumulatedThinkingContent.length > 0) {
+                                        accumulatedThinkingContent += '\n';
+                                    }
+                                    accumulatedThinkingContent += tagBasedThinking.thought;
+                                    logger.debug(() => `[Streaming] Accumulated thinking: ${accumulatedThinkingContent.length} chars total`);
+                                }
+                                const kimiParsed = this.extractKimiToolCallsFromText(workingText);
+                                if (kimiParsed.toolCalls.length > 0) {
+                                    parsedToolCalls.push(...kimiParsed.toolCalls);
+                                    logger.debug(() => `[OpenAIProvider] Streaming buffer (pipeline) parsed Kimi tool calls`, {
+                                        count: kimiParsed.toolCalls.length,
+                                        bufferLength: workingText.length,
+                                        cleanedLength: kimiParsed.cleanedText.length,
+                                    });
+                                }
+                                workingText = kimiParsed.cleanedText;
+                                const parsingText = this.sanitizeProviderText(workingText);
+                                let cleanedText = parsingText;
+                                try {
+                                    const parsedResult = this.textToolParser.parse(parsingText);
+                                    if (parsedResult.toolCalls.length > 0) {
+                                        parsedToolCalls.push(...parsedResult.toolCalls.map((call) => ({
+                                            type: 'tool_call',
+                                            id: `text_tool_${Date.now()}_${Math.random()
+                                                .toString(36)
+                                                .substring(7)}`,
+                                            name: this.normalizeToolName(call.name),
+                                            parameters: call.arguments,
+                                        })));
+                                        cleanedText = parsedResult.cleanedContent;
+                                    }
+                                }
+                                catch (error) {
+                                    const logger = this.getLogger();
+                                    logger.debug(() => `TextToolCallParser failed on buffered text: ${error}`);
+                                }
+                                // Emit accumulated thinking BEFORE tool calls or text content
+                                // This ensures thinking appears first in the response
+                                // @plan PLAN-20251202-THINKING.P16
+                                if (!hasEmittedThinking &&
+                                    accumulatedThinkingContent.length > 0 &&
+                                    (parsedToolCalls.length > 0 || cleanedText.trim().length > 0)) {
+                                    yield {
+                                        speaker: 'ai',
+                                        blocks: [
+                                            {
+                                                type: 'thinking',
+                                                thought: accumulatedThinkingContent,
+                                                sourceField: 'think_tags',
+                                                isHidden: false,
+                                            },
+                                        ],
+                                    };
+                                    hasEmittedThinking = true;
+                                    logger.debug(() => `[Streaming pipeline] Emitted accumulated thinking: ${accumulatedThinkingContent.length} chars`);
+                                }
+                                if (parsedToolCalls.length > 0) {
+                                    yield {
+                                        speaker: 'ai',
+                                        blocks: parsedToolCalls,
+                                    };
+                                }
+                                // Always use sanitized text to strip <think> tags (pipeline streaming)
+                                // Bug fix: Previously Kimi used unsanitized workingText
+                                // @plan PLAN-20251202-THINKING.P16
+                                if (cleanedText.trim().length > 0) {
+                                    yield {
+                                        speaker: 'ai',
+                                        blocks: [
+                                            {
+                                                type: 'text',
+                                                text: cleanedText,
+                                            },
+                                        ],
+                                    };
+                                }
                                 textBuffer = '';
                             }
                         }
@@ -2097,19 +2873,45 @@ export class OpenAIProvider extends BaseProvider {
             }
             // Check buffered text for <tool_call> format before flushing as plain text
             if (textBuffer.length > 0) {
-                // Try to parse <tool_call> format from buffered text
-                let parsedToolCalls = [];
-                let cleanedText = textBuffer;
+                const parsedToolCalls = [];
+                let workingText = textBuffer;
+                // Note: Synthetic API sends reasoning via both reasoning_content AND content fields.
+                // This is the model's behavior - we don't strip it since the model is the source.
+                // The user can configure reasoning display settings if they don't want duplicates.
+                // @plan PLAN-20251202-THINKING.P16
+                // Extract any remaining <think> tags from final buffer
+                // @plan PLAN-20251202-THINKING.P16
+                const tagBasedThinking = this.extractThinkTagsAsBlock(workingText);
+                if (tagBasedThinking) {
+                    // Use newline to preserve formatting between chunks (not space)
+                    if (accumulatedThinkingContent.length > 0) {
+                        accumulatedThinkingContent += '\n';
+                    }
+                    accumulatedThinkingContent += tagBasedThinking.thought;
+                }
+                const kimiParsed = this.extractKimiToolCallsFromText(workingText);
+                if (kimiParsed.toolCalls.length > 0) {
+                    parsedToolCalls.push(...kimiParsed.toolCalls);
+                    this.getLogger().debug(() => `[OpenAIProvider] Final buffer flush (pipeline) parsed Kimi tool calls`, {
+                        count: kimiParsed.toolCalls.length,
+                        bufferLength: workingText.length,
+                        cleanedLength: kimiParsed.cleanedText.length,
+                    });
+                }
+                workingText = kimiParsed.cleanedText;
+                const parsingText = this.sanitizeProviderText(workingText);
+                let cleanedText = parsingText;
                 try {
-                    const parsedResult = this.textToolParser.parse(textBuffer);
+                    const parsedResult = this.textToolParser.parse(parsingText);
                     if (parsedResult.toolCalls.length > 0) {
-                        // Convert parsed tool calls to ToolCallBlock format
-                        parsedToolCalls = parsedResult.toolCalls.map((call) => ({
+                        parsedToolCalls.push(...parsedResult.toolCalls.map((call) => ({
                             type: 'tool_call',
-                            id: `text_tool_${Date.now()}_${Math.random().toString(36).substring(7)}`,
-                            name: call.name,
+                            id: `text_tool_${Date.now()}_${Math.random()
+                                .toString(36)
+                                .substring(7)}`,
+                            name: this.normalizeToolName(call.name),
                             parameters: call.arguments,
-                        }));
+                        })));
                         cleanedText = parsedResult.cleanedContent;
                     }
                 }
@@ -2117,14 +2919,33 @@ export class OpenAIProvider extends BaseProvider {
                     const logger = this.getLogger();
                     logger.debug(() => `TextToolCallParser failed on buffered text: ${error}`);
                 }
-                // Emit tool calls from text parsing first
+                // Emit accumulated thinking BEFORE tool calls or text content
+                // @plan PLAN-20251202-THINKING.P16
+                if (!hasEmittedThinking &&
+                    accumulatedThinkingContent.length > 0 &&
+                    (parsedToolCalls.length > 0 || cleanedText.trim().length > 0)) {
+                    yield {
+                        speaker: 'ai',
+                        blocks: [
+                            {
+                                type: 'thinking',
+                                thought: accumulatedThinkingContent,
+                                sourceField: 'think_tags',
+                                isHidden: false,
+                            },
+                        ],
+                    };
+                    hasEmittedThinking = true;
+                }
                 if (parsedToolCalls.length > 0) {
                     yield {
                         speaker: 'ai',
                         blocks: parsedToolCalls,
                     };
                 }
-                // Then emit any remaining cleaned text
+                // Always use sanitized text to strip <think> tags (pipeline final buffer)
+                // Bug fix: Previously Kimi used unsanitized workingText
+                // @plan PLAN-20251202-THINKING.P16
                 if (cleanedText.trim().length > 0) {
                     yield {
                         speaker: 'ai',
@@ -2138,6 +2959,39 @@ export class OpenAIProvider extends BaseProvider {
                 }
                 textBuffer = '';
             }
+            // Emit any remaining accumulated thinking that wasn't emitted yet
+            // (e.g., if entire response was just thinking with no content)
+            // @plan PLAN-20251202-THINKING.P16
+            if (!hasEmittedThinking && accumulatedThinkingContent.length > 0) {
+                yield {
+                    speaker: 'ai',
+                    blocks: [
+                        {
+                            type: 'thinking',
+                            thought: accumulatedThinkingContent,
+                            sourceField: 'think_tags',
+                            isHidden: false,
+                        },
+                    ],
+                };
+                hasEmittedThinking = true;
+            }
+            // Emit accumulated reasoning_content as ONE ThinkingBlock (pipeline path)
+            // This consolidates token-by-token reasoning from Synthetic API into a single block
+            // @plan PLAN-20251202-THINKING.P16
+            if (accumulatedReasoningContent.length > 0) {
+                yield {
+                    speaker: 'ai',
+                    blocks: [
+                        {
+                            type: 'thinking',
+                            thought: accumulatedReasoningContent,
+                            sourceField: 'reasoning_content',
+                            isHidden: false,
+                        },
+                    ],
+                };
+            }
             // Process and emit tool calls using the pipeline
             const pipelineResult = await this.toolCallPipeline.process(abortSignal);
             if (pipelineResult.normalized.length > 0 ||
@@ -2145,8 +2999,9 @@ export class OpenAIProvider extends BaseProvider {
                 const blocks = [];
                 // Process successful tool calls
                 for (const normalizedCall of pipelineResult.normalized) {
+                    const sanitizedArgs = this.sanitizeToolArgumentsString(normalizedCall.originalArgs ?? normalizedCall.args);
                     // Process tool parameters with double-escape handling
-                    const processedParameters = processToolParameters(normalizedCall.originalArgs || JSON.stringify(normalizedCall.args), normalizedCall.name);
+                    const processedParameters = processToolParameters(sanitizedArgs, normalizedCall.name);
                     blocks.push({
                         type: 'tool_call',
                         id: this.normalizeToHistoryToolId(`call_${normalizedCall.index}`),
@@ -2195,6 +3050,41 @@ export class OpenAIProvider extends BaseProvider {
                     },
                 };
             }
+            // Detect and warn about empty streaming responses (common with Kimi K2 after tool calls)
+            // Only warn if we truly got nothing - not even reasoning content
+            const pipelineStats = this.toolCallPipeline.getStats();
+            if (_accumulatedText.length === 0 &&
+                pipelineStats.collector.totalCalls === 0 &&
+                textBuffer.length === 0 &&
+                accumulatedReasoningContent.length === 0 &&
+                accumulatedThinkingContent.length === 0) {
+                // Provide actionable guidance for users
+                const isKimi = model.toLowerCase().includes('kimi');
+                const isSynthetic = (baseURL ?? this.getBaseURL())?.includes('synthetic') ?? false;
+                const troubleshooting = isKimi
+                    ? isSynthetic
+                        ? ' To fix: use streaming: "disabled" in your profile settings. Synthetic API streaming does not work reliably with tool calls.'
+                        : ' This provider may not support streaming with tool calls.'
+                    : ' Consider using streaming: "disabled" in your profile settings.';
+                logger.warn(() => `[OpenAIProvider] Empty streaming response for model '${model}' (received ${allChunks.length} chunks with no content).${troubleshooting}`, {
+                    model,
+                    baseURL: baseURL ?? this.getBaseURL(),
+                    isKimiModel: isKimi,
+                    isSyntheticAPI: isSynthetic,
+                    totalChunksReceived: allChunks.length,
+                });
+            }
+            else {
+                // Log what we DID get for debugging
+                logger.debug(() => `[Streaming pipeline] Stream completed with accumulated content`, {
+                    textLength: _accumulatedText.length,
+                    toolCallCount: pipelineStats.collector.totalCalls,
+                    textBufferLength: textBuffer.length,
+                    reasoningLength: accumulatedReasoningContent.length,
+                    thinkingLength: accumulatedThinkingContent.length,
+                    totalChunksReceived: allChunks.length,
+                });
+            }
         }
         else {
             // Handle non-streaming response
@@ -2220,12 +3110,22 @@ export class OpenAIProvider extends BaseProvider {
                 }
             }
             const blocks = [];
-            // Handle text content
-            if (choice.message?.content) {
-                blocks.push({
-                    type: 'text',
-                    text: choice.message.content,
-                });
+            // Handle text content (strip thinking / reasoning blocks) and Kimi tool sections
+            const pipelineRawMessageContent = this.coerceMessageContentToString(choice.message?.content);
+            let pipelineKimiCleanContent;
+            let pipelineKimiToolBlocks = [];
+            if (pipelineRawMessageContent) {
+                const kimiParsed = this.extractKimiToolCallsFromText(pipelineRawMessageContent);
+                pipelineKimiCleanContent = kimiParsed.cleanedText;
+                pipelineKimiToolBlocks = kimiParsed.toolCalls;
+                // Always use sanitized text - even Kimi-K2 should have consistent tag stripping
+                const cleanedText = this.sanitizeProviderText(pipelineKimiCleanContent);
+                if (cleanedText) {
+                    blocks.push({
+                        type: 'text',
+                        text: cleanedText,
+                    });
+                }
             }
             // Handle tool calls
             if (choice.message?.tool_calls && choice.message.tool_calls.length > 0) {
@@ -2234,8 +3134,9 @@ export class OpenAIProvider extends BaseProvider {
                     if (toolCall.type === 'function') {
                         // Normalize tool name for consistency with streaming path
                         const normalizedName = this.toolCallPipeline.normalizeToolName(toolCall.function.name, toolCall.function.arguments);
+                        const sanitizedArgs = this.sanitizeToolArgumentsString(toolCall.function.arguments);
                         // Process tool parameters with double-escape handling
-                        const processedParameters = processToolParameters(toolCall.function.arguments || '', normalizedName);
+                        const processedParameters = processToolParameters(sanitizedArgs, normalizedName);
                         blocks.push({
                             type: 'tool_call',
                             id: this.normalizeToHistoryToolId(toolCall.id),
@@ -2245,42 +3146,48 @@ export class OpenAIProvider extends BaseProvider {
                     }
                 }
             }
+            if (pipelineKimiToolBlocks.length > 0) {
+                blocks.push(...pipelineKimiToolBlocks);
+                this.getLogger().debug(() => `[OpenAIProvider] Non-stream pipeline added Kimi tool calls from text`, { count: pipelineKimiToolBlocks.length });
+            }
             // Additionally check for <tool_call> format in text content
-            if (choice.message?.content &&
-                typeof choice.message.content === 'string') {
-                try {
-                    const parsedResult = this.textToolParser.parse(choice.message.content);
-                    if (parsedResult.toolCalls.length > 0) {
-                        // Add tool calls found in text content
-                        for (const call of parsedResult.toolCalls) {
-                            blocks.push({
-                                type: 'tool_call',
-                                id: `text_tool_${Date.now()}_${Math.random().toString(36).substring(7)}`,
-                                name: call.name,
-                                parameters: call.arguments,
-                            });
-                        }
-                        // Update the text content to remove the tool call parts
-                        if (choice.message.content !== parsedResult.cleanedContent) {
-                            // Find the text block and update it
-                            const textBlockIndex = blocks.findIndex((block) => block.type === 'text');
-                            if (textBlockIndex >= 0) {
-                                blocks[textBlockIndex].text =
-                                    parsedResult.cleanedContent;
-                            }
-                            else if (parsedResult.cleanedContent.trim()) {
-                                // Add cleaned text if it doesn't exist
-                                blocks.unshift({
-                                    type: 'text',
-                                    text: parsedResult.cleanedContent,
+            if (pipelineKimiCleanContent) {
+                const cleanedSource = this.sanitizeProviderText(pipelineKimiCleanContent);
+                if (cleanedSource) {
+                    try {
+                        const parsedResult = this.textToolParser.parse(cleanedSource);
+                        if (parsedResult.toolCalls.length > 0) {
+                            // Add tool calls found in text content
+                            for (const call of parsedResult.toolCalls) {
+                                blocks.push({
+                                    type: 'tool_call',
+                                    id: `text_tool_${Date.now()}_${Math.random().toString(36).substring(7)}`,
+                                    name: this.normalizeToolName(call.name),
+                                    parameters: call.arguments,
                                 });
                             }
+                            // Update the text content to remove the tool call parts
+                            if (choice.message.content !== parsedResult.cleanedContent) {
+                                // Find the text block and update it
+                                const textBlockIndex = blocks.findIndex((block) => block.type === 'text');
+                                if (textBlockIndex >= 0) {
+                                    blocks[textBlockIndex].text =
+                                        parsedResult.cleanedContent;
+                                }
+                                else if (parsedResult.cleanedContent.trim()) {
+                                    // Add cleaned text if it doesn't exist
+                                    blocks.unshift({
+                                        type: 'text',
+                                        text: parsedResult.cleanedContent,
+                                    });
+                                }
+                            }
                         }
                     }
-                }
-                catch (error) {
-                    const logger = this.getLogger();
-                    logger.debug(() => `TextToolCallParser failed on message content: ${error}`);
+                    catch (error) {
+                        const logger = this.getLogger();
+                        logger.debug(() => `TextToolCallParser failed on message content: ${error}`);
+                    }
                 }
             }
             // Emit the complete response as a single IContent
@@ -2338,19 +3245,25 @@ export class OpenAIProvider extends BaseProvider {
     }
     /**
      * Detects the tool call format based on the model being used
-     * @returns The detected tool format ('openai' or 'qwen')
+     * @returns The detected tool format ('openai', 'qwen', or 'kimi')
      */
     detectToolFormat() {
         // Auto-detect based on model name if set to 'auto' or not set
-        const modelName = (this.getModel() || this.getDefaultModel()).toLowerCase();
+        const modelName = this.getModel() || this.getDefaultModel();
         const logger = new DebugLogger('llxprt:provider:openai');
+        // Check for Kimi K2 models (requires special ID format: functions.{name}:{index})
+        if (isKimiModel(modelName)) {
+            logger.debug(() => `Auto-detected 'kimi' format for K2 model: ${modelName}`);
+            return 'kimi';
+        }
+        const lowerModelName = modelName.toLowerCase();
         // Check for GLM-4 models (glm-4, glm-4.5, glm-4.6, glm-4-5, etc.)
-        if (modelName.includes('glm-4')) {
+        if (lowerModelName.includes('glm-4')) {
             logger.debug(() => `Auto-detected 'qwen' format for GLM-4.x model: ${modelName}`);
             return 'qwen';
         }
         // Check for qwen models
-        if (modelName.includes('qwen')) {
+        if (lowerModelName.includes('qwen')) {
             logger.debug(() => `Auto-detected 'qwen' format for Qwen model: ${modelName}`);
             return 'qwen';
         }
@@ -2421,5 +3334,61 @@ export class OpenAIProvider extends BaseProvider {
         }
         return shouldRetry;
     }
+    /**
+     * Parse reasoning_content from streaming delta.
+     *
+     * @plan PLAN-20251202-THINKING.P11, PLAN-20251202-THINKING.P16
+     * @requirement REQ-THINK-003.1, REQ-THINK-003.3, REQ-THINK-003.4
+     */
+    parseStreamingReasoningDelta(delta) {
+        if (!delta) {
+            return null;
+        }
+        // Access reasoning_content via type assertion since OpenAI SDK doesn't declare it
+        const reasoningContent = delta
+            .reasoning_content;
+        // Handle absent, null, or non-string
+        if (!reasoningContent || typeof reasoningContent !== 'string') {
+            return null;
+        }
+        // Handle empty string or whitespace-only
+        if (reasoningContent.trim().length === 0) {
+            return null;
+        }
+        return {
+            type: 'thinking',
+            thought: reasoningContent,
+            sourceField: 'reasoning_content',
+            isHidden: false,
+        };
+    }
+    /**
+     * Parse reasoning_content from non-streaming message.
+     *
+     * @plan PLAN-20251202-THINKING.P11, PLAN-20251202-THINKING.P16
+     * @requirement REQ-THINK-003.2, REQ-THINK-003.3, REQ-THINK-003.4
+     */
+    parseNonStreamingReasoning(message) {
+        if (!message) {
+            return null;
+        }
+        // Access reasoning_content via type assertion since OpenAI SDK doesn't declare it
+        const reasoningContent = message
+            .reasoning_content;
+        // Handle absent, null, or non-string
+        if (!reasoningContent || typeof reasoningContent !== 'string') {
+            return null;
+        }
+        // Handle empty string or whitespace-only
+        if (reasoningContent.trim().length === 0) {
+            return null;
+        }
+        return {
+            type: 'thinking',
+            thought: reasoningContent,
+            sourceField: 'reasoning_content',
+            isHidden: false,
+        };
+    }
 }
 //# sourceMappingURL=OpenAIProvider.js.map