npm - @vybestack/llxprt-code-core - Versions diffs - 0.5.0-nightly.251109.557a0fe7 → 0.5.0-nightly.251110.c0116408 - Mend

@vybestack/llxprt-code-core 0.5.0-nightly.251109.557a0fe7 → 0.5.0-nightly.251110.c0116408

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (56) hide show

package/dist/src/providers/openai/OpenAIProvider.js CHANGED Viewed

@@ -18,20 +18,31 @@
  * @requirement REQ-INT-001.1
  */
 import OpenAI from 'openai';
+import crypto from 'node:crypto';
 import * as http from 'http';
 import * as https from 'https';
 import * as net from 'net';
 import { BaseProvider, } from '../BaseProvider.js';
 import { DebugLogger } from '../../debug/index.js';
 import { ToolFormatter } from '../../tools/ToolFormatter.js';
+import { GemmaToolCallParser } from '../../parsers/TextToolCallParser.js';
 import { processToolParameters } from '../../tools/doubleEscapeUtils.js';
 import { getCoreSystemPromptAsync } from '../../core/prompts.js';
 import { retryWithBackoff } from '../../utils/retry.js';
 import { resolveUserMemory } from '../utils/userMemory.js';
 import { resolveRuntimeAuthToken } from '../utils/authToken.js';
 import { filterOpenAIRequestParams } from './openaiRequestParams.js';
+import { ensureJsonSafe } from '../../utils/unicodeUtils.js';
+import { ToolCallPipeline } from './ToolCallPipeline.js';
+import { buildToolResponsePayload, EMPTY_TOOL_RESULT_PLACEHOLDER, } from '../utils/toolResponsePayload.js';
+const MAX_TOOL_RESPONSE_CHARS = 1024;
+const MAX_TOOL_RESPONSE_RETRY_CHARS = 512;
+const TOOL_ARGS_PREVIEW_LENGTH = 500;
+const TEXTUAL_TOOL_REPLAY_MODELS = new Set(['openrouter/polaris-alpha']);
 export class OpenAIProvider extends BaseProvider {
-    name = 'openai';
+    textToolParser = new GemmaToolCallParser();
+    toolCallPipeline = new ToolCallPipeline();
+    toolCallProcessingMode;
     getLogger() {
         return new DebugLogger('llxprt:provider:openai');
     }
@@ -75,6 +86,8 @@ export class OpenAIProvider extends BaseProvider {
             oauthProvider: isQwenEndpoint || forceQwenOAuth ? 'qwen' : undefined,
             oauthManager,
         }, config);
+        // Initialize tool call processing mode - default to 'pipeline' (optimized)
+        this.toolCallProcessingMode = config?.toolCallProcessingMode ?? 'pipeline';
         // @plan:PLAN-20251023-STATELESS-HARDENING.P08
         // @requirement:REQ-SP4-002
         // No constructor-captured state - all values sourced from normalized options per call
@@ -385,22 +398,24 @@ export class OpenAIProvider extends BaseProvider {
      * Handles IDs from OpenAI (call_xxx), Anthropic (toolu_xxx), and history (hist_tool_xxx)
      */
     normalizeToOpenAIToolId(id) {
+        const sanitize = (value) => value.replace(/[^a-zA-Z0-9_]/g, '') ||
+            'call_' + crypto.randomUUID().replace(/-/g, '');
         // If already in OpenAI format, return as-is
         if (id.startsWith('call_')) {
-            return id;
+            return sanitize(id);
         }
         // For history format, extract the UUID and add OpenAI prefix
         if (id.startsWith('hist_tool_')) {
             const uuid = id.substring('hist_tool_'.length);
-            return 'call_' + uuid;
+            return sanitize('call_' + uuid);
         }
         // For Anthropic format, extract the UUID and add OpenAI prefix
         if (id.startsWith('toolu_')) {
             const uuid = id.substring('toolu_'.length);
-            return 'call_' + uuid;
+            return sanitize('call_' + uuid);
         }
         // Unknown format - assume it's a raw UUID
-        return 'call_' + id;
+        return sanitize('call_' + id);
     }
     /**
      * Normalize tool IDs from OpenAI format to history format
@@ -458,10 +473,124 @@ export class OpenAIProvider extends BaseProvider {
             yield item;
         }
     }
+    normalizeToolCallArguments(parameters) {
+        if (parameters === undefined || parameters === null) {
+            return '{}';
+        }
+        if (typeof parameters === 'string') {
+            const trimmed = parameters.trim();
+            if (!trimmed) {
+                return '{}';
+            }
+            try {
+                const parsed = JSON.parse(trimmed);
+                if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) {
+                    return JSON.stringify(parsed);
+                }
+                return JSON.stringify({ value: parsed });
+            }
+            catch {
+                return JSON.stringify({ raw: trimmed });
+            }
+        }
+        if (typeof parameters === 'object') {
+            try {
+                return JSON.stringify(parameters);
+            }
+            catch {
+                return JSON.stringify({ raw: '[unserializable object]' });
+            }
+        }
+        return JSON.stringify({ value: parameters });
+    }
+    determineToolReplayMode(model) {
+        if (!model) {
+            return 'native';
+        }
+        const normalized = model.toLowerCase();
+        if (TEXTUAL_TOOL_REPLAY_MODELS.has(normalized)) {
+            return 'textual';
+        }
+        return 'native';
+    }
+    describeToolCallForText(block) {
+        const normalizedArgs = this.normalizeToolCallArguments(block.parameters);
+        const preview = normalizedArgs.length > MAX_TOOL_RESPONSE_CHARS
+            ? `${normalizedArgs.slice(0, MAX_TOOL_RESPONSE_CHARS)}… [truncated ${normalizedArgs.length - MAX_TOOL_RESPONSE_CHARS} chars]`
+            : normalizedArgs;
+        const callId = block.id ? ` ${this.normalizeToOpenAIToolId(block.id)}` : '';
+        return `[TOOL CALL${callId ? ` ${callId}` : ''}] ${block.name ?? 'unknown_tool'} args=${preview}`;
+    }
+    describeToolResponseForText(block, config) {
+        const payload = buildToolResponsePayload(block, config);
+        const header = `[TOOL RESULT] ${payload.toolName ?? block.toolName ?? 'unknown_tool'} (${payload.status ?? 'unknown'})`;
+        const bodyParts = [];
+        if (payload.error) {
+            bodyParts.push(`error: ${payload.error}`);
+        }
+        if (payload.result && payload.result !== EMPTY_TOOL_RESULT_PLACEHOLDER) {
+            bodyParts.push(payload.result);
+        }
+        if (payload.limitMessage) {
+            bodyParts.push(payload.limitMessage);
+        }
+        return bodyParts.length > 0 ? `${header}\n${bodyParts.join('\n')}` : header;
+    }
+    buildToolResponseContent(block, config) {
+        const payload = buildToolResponsePayload(block, config);
+        return ensureJsonSafe(JSON.stringify(payload));
+    }
+    shouldCompressToolMessages(error, logger) {
+        if (error &&
+            typeof error === 'object' &&
+            'status' in error &&
+            error.status === 400) {
+            const raw = error &&
+                typeof error === 'object' &&
+                'error' in error &&
+                typeof error.error ===
+                    'object'
+                ? (error.error ?? {})
+                    .metadata?.raw
+                : undefined;
+            if (raw === 'ERROR') {
+                logger.debug(() => `[OpenAIProvider] Detected OpenRouter 400 response with raw metadata. Will attempt tool-response compression.`);
+                return true;
+            }
+        }
+        return false;
+    }
+    compressToolMessages(messages, maxLength, logger) {
+        let modified = false;
+        messages.forEach((message, index) => {
+            if (message.role !== 'tool' || typeof message.content !== 'string') {
+                return;
+            }
+            const original = message.content;
+            if (original.length <= maxLength) {
+                return;
+            }
+            let nextContent = original;
+            try {
+                const parsed = JSON.parse(original);
+                parsed.result = `[omitted ${original.length} chars due to provider limits]`;
+                parsed.truncated = true;
+                parsed.originalLength = original.length;
+                nextContent = JSON.stringify(parsed);
+            }
+            catch {
+                nextContent = `${original.slice(0, maxLength)}… [truncated ${original.length - maxLength} chars]`;
+            }
+            message.content = ensureJsonSafe(nextContent);
+            modified = true;
+            logger.debug(() => `[OpenAIProvider] Compressed tool message #${index} from ${original.length} chars to ${message.content.length} chars`);
+        });
+        return modified;
+    }
     /**
      * Convert IContent array to OpenAI ChatCompletionMessageParam array
      */
-    convertToOpenAIMessages(contents) {
+    convertToOpenAIMessages(contents, mode = 'native', config) {
         const messages = [];
         for (const content of contents) {
             if (content.speaker === 'human') {
@@ -478,28 +607,43 @@ export class OpenAIProvider extends BaseProvider {
             else if (content.speaker === 'ai') {
                 // Convert AI messages
                 const textBlocks = content.blocks.filter((b) => b.type === 'text');
+                const text = textBlocks.map((b) => b.text).join('\n');
                 const toolCalls = content.blocks.filter((b) => b.type === 'tool_call');
                 if (toolCalls.length > 0) {
-                    // Assistant message with tool calls
-                    const text = textBlocks.map((b) => b.text).join('\n');
-                    messages.push({
-                        role: 'assistant',
-                        content: text || null,
-                        tool_calls: toolCalls.map((tc) => ({
-                            id: this.normalizeToOpenAIToolId(tc.id),
-                            type: 'function',
-                            function: {
-                                name: tc.name,
-                                arguments: typeof tc.parameters === 'string'
-                                    ? tc.parameters
-                                    : JSON.stringify(tc.parameters),
-                            },
-                        })),
-                    });
+                    if (mode === 'textual') {
+                        const segments = [];
+                        if (text) {
+                            segments.push(text);
+                        }
+                        for (const tc of toolCalls) {
+                            segments.push(this.describeToolCallForText(tc));
+                        }
+                        const combined = segments.join('\n\n').trim();
+                        if (combined) {
+                            messages.push({
+                                role: 'assistant',
+                                content: combined,
+                            });
+                        }
+                    }
+                    else {
+                        // Assistant message with tool calls
+                        messages.push({
+                            role: 'assistant',
+                            content: text || null,
+                            tool_calls: toolCalls.map((tc) => ({
+                                id: this.normalizeToOpenAIToolId(tc.id),
+                                type: 'function',
+                                function: {
+                                    name: tc.name,
+                                    arguments: this.normalizeToolCallArguments(tc.parameters),
+                                },
+                            })),
+                        });
+                    }
                 }
                 else if (textBlocks.length > 0) {
                     // Plain assistant message
-                    const text = textBlocks.map((b) => b.text).join('\n');
                     messages.push({
                         role: 'assistant',
                         content: text,
@@ -509,27 +653,72 @@ export class OpenAIProvider extends BaseProvider {
             else if (content.speaker === 'tool') {
                 // Convert tool responses
                 const toolResponses = content.blocks.filter((b) => b.type === 'tool_response');
-                for (const tr of toolResponses) {
-                    messages.push({
-                        role: 'tool',
-                        content: typeof tr.result === 'string'
-                            ? tr.result
-                            : JSON.stringify(tr.result),
-                        tool_call_id: this.normalizeToOpenAIToolId(tr.callId),
-                    });
+                if (mode === 'textual') {
+                    const segments = toolResponses
+                        .map((tr) => this.describeToolResponseForText(tr, config))
+                        .filter(Boolean);
+                    if (segments.length > 0) {
+                        messages.push({
+                            role: 'user',
+                            content: segments.join('\n\n'),
+                        });
+                    }
+                }
+                else {
+                    for (const tr of toolResponses) {
+                        messages.push({
+                            role: 'tool',
+                            content: this.buildToolResponseContent(tr, config),
+                            tool_call_id: this.normalizeToOpenAIToolId(tr.callId),
+                        });
+                    }
                 }
             }
         }
         return messages;
     }
+    getContentPreview(content, maxLength = 200) {
+        if (content === null || content === undefined) {
+            return undefined;
+        }
+        if (typeof content === 'string') {
+            if (content.length <= maxLength) {
+                return content;
+            }
+            return `${content.slice(0, maxLength)}…`;
+        }
+        if (Array.isArray(content)) {
+            const textParts = content
+                .filter((part) => typeof part === 'object' && part !== null && 'type' in part)
+                .map((part) => part.type === 'text' && typeof part.text === 'string'
+                ? part.text
+                : JSON.stringify(part));
+            const joined = textParts.join('\n');
+            if (joined.length <= maxLength) {
+                return joined;
+            }
+            return `${joined.slice(0, maxLength)}…`;
+        }
+        try {
+            const serialized = JSON.stringify(content);
+            if (serialized.length <= maxLength) {
+                return serialized;
+            }
+            return `${serialized.slice(0, maxLength)}…`;
+        }
+        catch {
+            return '[unserializable content]';
+        }
+    }
     /**
      * @plan:PLAN-20251023-STATELESS-HARDENING.P08
      * @requirement:REQ-SP4-003
-     * Internal implementation for chat completion using normalized options
+     * Legacy implementation for chat completion using accumulated tool calls approach
      */
-    async *generateChatCompletionImpl(options, toolFormatter, client, logger) {
+    async *generateLegacyChatCompletionImpl(options, toolFormatter, client, logger) {
         const { contents, tools, metadata } = options;
         const model = options.resolved.model || this.getDefaultModel();
+        const toolReplayMode = this.determineToolReplayMode(model);
         const abortSignal = metadata?.abortSignal;
         const ephemeralSettings = options.invocation?.ephemerals ?? {};
         if (logger.enabled) {
@@ -546,7 +735,11 @@ export class OpenAIProvider extends BaseProvider {
             });
         }
         // Convert IContent to OpenAI messages format
-        const messages = this.convertToOpenAIMessages(contents);
+        const configForMessages = options.config ?? options.runtime?.config ?? this.globalConfig;
+        const messages = this.convertToOpenAIMessages(contents, toolReplayMode, configForMessages);
+        if (logger.enabled && toolReplayMode !== 'native') {
+            logger.debug(() => `[OpenAIProvider] Using textual tool replay mode for model '${model}'`);
+        }
         // Detect the tool format to use (once at the start of the method)
         const detectedFormat = this.detectToolFormat();
         // Log the detected format for debugging
@@ -581,6 +774,9 @@ export class OpenAIProvider extends BaseProvider {
                 outputToolsLength: formattedTools?.length,
                 outputToolNames: formattedTools?.map((t) => t.function.name),
             });
+            logger.debug(() => `[OpenAIProvider] Tool conversion detail`, {
+                tools: formattedTools,
+            });
         }
         // Get streaming setting from ephemeral settings (default: enabled)
         const streamingSetting = ephemeralSettings['streaming'];
@@ -602,6 +798,40 @@ export class OpenAIProvider extends BaseProvider {
             { role: 'system', content: systemPrompt },
             ...messages,
         ];
+        if (logger.enabled) {
+            logger.debug(() => `[OpenAIProvider] Chat payload snapshot`, {
+                messageCount: messagesWithSystem.length,
+                messages: messagesWithSystem.map((msg) => ({
+                    role: msg.role,
+                    contentPreview: this.getContentPreview(msg.content),
+                    contentLength: typeof msg.content === 'string' ? msg.content.length : undefined,
+                    rawContent: typeof msg.content === 'string' ? msg.content : undefined,
+                    toolCallCount: 'tool_calls' in msg && Array.isArray(msg.tool_calls)
+                        ? msg.tool_calls.length
+                        : undefined,
+                    toolCalls: 'tool_calls' in msg && Array.isArray(msg.tool_calls)
+                        ? msg.tool_calls.map((call) => {
+                            if (call.type === 'function') {
+                                const args = call.function.arguments ?? '';
+                                const preview = typeof args === 'string' &&
+                                    args.length > TOOL_ARGS_PREVIEW_LENGTH
+                                    ? `${args.slice(0, TOOL_ARGS_PREVIEW_LENGTH)}…`
+                                    : args;
+                                return {
+                                    id: call.id,
+                                    name: call.function.name,
+                                    argumentsPreview: preview,
+                                };
+                            }
+                            return { id: call.id, type: call.type };
+                        })
+                        : undefined,
+                    toolCallId: 'tool_call_id' in msg
+                        ? msg.tool_call_id
+                        : undefined,
+                })),
+            });
+        }
         const maxTokens = metadata?.maxTokens ??
             ephemeralSettings['max-tokens'];
         // Build request - only include tools if they exist and are not empty
@@ -674,8 +904,10 @@ export class OpenAIProvider extends BaseProvider {
                 requestHasSystemPrompt: Boolean(systemPrompt?.length),
                 messageCount: messagesWithSystem.length,
             });
+            logger.debug(() => `[OpenAIProvider] Request body detail`, {
+                body: requestBody,
+            });
         }
-        let response;
         // Debug log throttle tracker status
         logger.debug(() => `Retry configuration:`, {
             hasThrottleTracker: !!this.throttleTracker,
@@ -684,6 +916,11 @@ export class OpenAIProvider extends BaseProvider {
             initialDelayMs,
         });
         const customHeaders = this.getCustomHeaders();
+        if (logger.enabled && customHeaders) {
+            logger.debug(() => `[OpenAIProvider] Applying custom headers`, {
+                headerKeys: Object.keys(customHeaders),
+            });
+        }
         if (logger.enabled) {
             logger.debug(() => `[OpenAIProvider] Request body preview`, {
                 model: requestBody.model,
@@ -693,53 +930,80 @@ export class OpenAIProvider extends BaseProvider {
                 overrideKeys: requestOverrides ? Object.keys(requestOverrides) : [],
             });
         }
-        try {
-            response = await retryWithBackoff(() => client.chat.completions.create(requestBody, {
-                ...(abortSignal ? { signal: abortSignal } : {}),
-                ...(customHeaders ? { headers: customHeaders } : {}),
-            }), {
+        const executeRequest = () => client.chat.completions.create(requestBody, {
+            ...(abortSignal ? { signal: abortSignal } : {}),
+            ...(customHeaders ? { headers: customHeaders } : {}),
+        });
+        let response;
+        if (streamingEnabled) {
+            response = await retryWithBackoff(executeRequest, {
                 maxAttempts: maxRetries,
                 initialDelayMs,
                 shouldRetry: this.shouldRetryResponse.bind(this),
                 trackThrottleWaitTime: this.throttleTracker,
             });
         }
-        catch (error) {
-            // Special handling for Cerebras/Qwen "Tool not present" errors
-            const errorMessage = String(error);
-            if (errorMessage.includes('Tool is not present in the tools list') &&
-                (model.toLowerCase().includes('qwen') ||
-                    this.getBaseURL()?.includes('cerebras'))) {
-                logger.error('Cerebras/Qwen API error: Tool not found despite being in request. This is a known API issue.', {
-                    error,
-                    model,
-                    toolsProvided: formattedTools?.length || 0,
-                    toolNames: formattedTools?.map((t) => t.function.name),
-                    streamingEnabled,
-                });
-                // Re-throw but with better context
-                const enhancedError = new Error(`Cerebras/Qwen API bug: Tool not found in list. We sent ${formattedTools?.length || 0} tools. Known API issue.`);
-                enhancedError.originalError =
-                    error;
-                throw enhancedError;
+        else {
+            let compressedOnce = false;
+            while (true) {
+                try {
+                    response = (await retryWithBackoff(executeRequest, {
+                        maxAttempts: maxRetries,
+                        initialDelayMs,
+                        shouldRetry: this.shouldRetryResponse.bind(this),
+                        trackThrottleWaitTime: this.throttleTracker,
+                    }));
+                    break;
+                }
+                catch (error) {
+                    const errorMessage = String(error);
+                    logger.debug(() => `[OpenAIProvider] Chat request error`, {
+                        errorType: error?.constructor?.name,
+                        status: typeof error === 'object' && error && 'status' in error
+                            ? error.status
+                            : undefined,
+                        errorKeys: error && typeof error === 'object' ? Object.keys(error) : [],
+                    });
+                    const isCerebrasToolError = errorMessage.includes('Tool is not present in the tools list') &&
+                        (model.toLowerCase().includes('qwen') ||
+                            this.getBaseURL()?.includes('cerebras'));
+                    if (isCerebrasToolError) {
+                        logger.error('Cerebras/Qwen API error: Tool not found despite being in request. This is a known API issue.', {
+                            error,
+                            model,
+                            toolsProvided: formattedTools?.length || 0,
+                            toolNames: formattedTools?.map((t) => t.function.name),
+                            streamingEnabled,
+                        });
+                        const enhancedError = new Error(`Cerebras/Qwen API bug: Tool not found in list. We sent ${formattedTools?.length || 0} tools. Known API issue.`);
+                        enhancedError.originalError = error;
+                        throw enhancedError;
+                    }
+                    if (!compressedOnce &&
+                        this.shouldCompressToolMessages(error, logger) &&
+                        this.compressToolMessages(requestBody.messages, MAX_TOOL_RESPONSE_RETRY_CHARS, logger)) {
+                        compressedOnce = true;
+                        logger.warn(() => `[OpenAIProvider] Retrying request after compressing tool responses due to provider 400`);
+                        continue;
+                    }
+                    const capturedErrorMessage = error instanceof Error ? error.message : String(error);
+                    const status = typeof error === 'object' &&
+                        error !== null &&
+                        'status' in error &&
+                        typeof error.status === 'number'
+                        ? error.status
+                        : undefined;
+                    logger.error(() => `[OpenAIProvider] Chat completion failed for model '${model}' at '${baseURL ?? this.getBaseURL() ?? 'default'}': ${capturedErrorMessage}`, {
+                        model,
+                        baseURL: baseURL ?? this.getBaseURL(),
+                        streamingEnabled,
+                        hasTools: formattedTools?.length ?? 0,
+                        requestHasSystemPrompt: !!systemPrompt,
+                        status,
+                    });
+                    throw error;
+                }
             }
-            // Re-throw other errors as-is
-            const capturedErrorMessage = error instanceof Error ? error.message : String(error);
-            const status = typeof error === 'object' &&
-                error !== null &&
-                'status' in error &&
-                typeof error.status === 'number'
-                ? error.status
-                : undefined;
-            logger.error(() => `[OpenAIProvider] Chat completion failed for model '${model}' at '${baseURL ?? this.getBaseURL() ?? 'default'}': ${capturedErrorMessage}`, {
-                model,
-                baseURL: baseURL ?? this.getBaseURL(),
-                streamingEnabled,
-                hasTools: formattedTools?.length ?? 0,
-                requestHasSystemPrompt: !!systemPrompt,
-                status,
-            });
-            throw error;
         }
         // Check if response is streaming or not
         if (streamingEnabled) {
@@ -869,13 +1133,12 @@ export class OpenAIProvider extends BaseProvider {
                             };
                         }
                     }
-                    // Handle tool calls
+                    // Handle tool calls using legacy accumulated approach
                     const deltaToolCalls = choice.delta?.tool_calls;
                     if (deltaToolCalls && deltaToolCalls.length > 0) {
                         for (const deltaToolCall of deltaToolCalls) {
                             if (deltaToolCall.index === undefined)
                                 continue;
-                            // Initialize or update accumulated tool call
                             if (!accumulatedToolCalls[deltaToolCall.index]) {
                                 accumulatedToolCalls[deltaToolCall.index] = {
                                     id: deltaToolCall.id || '',
@@ -905,37 +1168,16 @@ export class OpenAIProvider extends BaseProvider {
                             if (!toolCall || toolCall.type !== 'function') {
                                 return;
                             }
-                            let targetIndex = index;
-                            const annotated = toolCall;
-                            if (typeof annotated.index === 'number') {
-                                targetIndex = annotated.index;
-                            }
-                            else if (toolCall.id) {
-                                const matchIndex = accumulatedToolCalls.findIndex((existing) => existing && existing.id === toolCall.id);
-                                if (matchIndex >= 0) {
-                                    targetIndex = matchIndex;
-                                }
-                            }
-                            if (!accumulatedToolCalls[targetIndex]) {
-                                accumulatedToolCalls[targetIndex] = {
+                            if (!accumulatedToolCalls[index]) {
+                                accumulatedToolCalls[index] = {
                                     id: toolCall.id || '',
                                     type: 'function',
                                     function: {
                                         name: toolCall.function?.name || '',
-                                        arguments: '',
+                                        arguments: toolCall.function?.arguments || '',
                                     },
                                 };
                             }
-                            const target = accumulatedToolCalls[targetIndex];
-                            if (toolCall.id) {
-                                target.id = toolCall.id;
-                            }
-                            if (toolCall.function?.name) {
-                                target.function.name = toolCall.function.name;
-                            }
-                            if (toolCall.function?.arguments !== undefined) {
-                                target.function.arguments = toolCall.function.arguments ?? '';
-                            }
                         });
                     }
                 }
@@ -965,23 +1207,52 @@ export class OpenAIProvider extends BaseProvider {
                     throw error;
                 }
             }
-            // Flush any remaining buffered text
+            // Check buffered text for <tool_call> format before flushing as plain text
             if (textBuffer.length > 0) {
-                yield {
-                    speaker: 'ai',
-                    blocks: [
-                        {
-                            type: 'text',
-                            text: textBuffer,
-                        },
-                    ],
-                };
+                // Try to parse <tool_call> format from buffered text
+                let parsedToolCalls = [];
+                let cleanedText = textBuffer;
+                try {
+                    const parsedResult = this.textToolParser.parse(textBuffer);
+                    if (parsedResult.toolCalls.length > 0) {
+                        // Convert parsed tool calls to ToolCallBlock format
+                        parsedToolCalls = parsedResult.toolCalls.map((call) => ({
+                            type: 'tool_call',
+                            id: `text_tool_${Date.now()}_${Math.random().toString(36).substring(7)}`,
+                            name: call.name,
+                            parameters: call.arguments,
+                        }));
+                        cleanedText = parsedResult.cleanedContent;
+                    }
+                }
+                catch (error) {
+                    const logger = this.getLogger();
+                    logger.debug(() => `TextToolCallParser failed on buffered text: ${error}`);
+                }
+                // Emit tool calls from text parsing first
+                if (parsedToolCalls.length > 0) {
+                    yield {
+                        speaker: 'ai',
+                        blocks: parsedToolCalls,
+                    };
+                }
+                // Then emit any remaining cleaned text
+                if (cleanedText.trim().length > 0) {
+                    yield {
+                        speaker: 'ai',
+                        blocks: [
+                            {
+                                type: 'text',
+                                text: cleanedText,
+                            },
+                        ],
+                    };
+                }
                 textBuffer = '';
             }
-            // Emit accumulated tool calls as IContent if any
+            // Process and emit tool calls using legacy accumulated approach
             if (accumulatedToolCalls.length > 0) {
                 const blocks = [];
-                // Use the same detected format from earlier for consistency
                 for (const tc of accumulatedToolCalls) {
                     if (!tc)
                         continue;
@@ -1067,17 +1338,57 @@ export class OpenAIProvider extends BaseProvider {
                 // Use the same detected format from earlier for consistency
                 for (const toolCall of choice.message.tool_calls) {
                     if (toolCall.type === 'function') {
+                        // Use tool name directly without normalization for legacy compatibility
+                        const toolName = toolCall.function.name || '';
                         // Process tool parameters with double-escape handling
-                        const processedParameters = processToolParameters(toolCall.function.arguments || '', toolCall.function.name || '', detectedFormat);
+                        const processedParameters = processToolParameters(toolCall.function.arguments || '', toolName, detectedFormat);
                         blocks.push({
                             type: 'tool_call',
                             id: this.normalizeToHistoryToolId(toolCall.id),
-                            name: toolCall.function.name || '',
+                            name: toolName,
                             parameters: processedParameters,
                         });
                     }
                 }
             }
+            // Additionally check for <tool_call> format in text content
+            if (choice.message?.content &&
+                typeof choice.message.content === 'string') {
+                try {
+                    const parsedResult = this.textToolParser.parse(choice.message.content);
+                    if (parsedResult.toolCalls.length > 0) {
+                        // Add tool calls found in text content
+                        for (const call of parsedResult.toolCalls) {
+                            blocks.push({
+                                type: 'tool_call',
+                                id: `text_tool_${Date.now()}_${Math.random().toString(36).substring(7)}`,
+                                name: call.name,
+                                parameters: call.arguments,
+                            });
+                        }
+                        // Update the text content to remove the tool call parts
+                        if (choice.message.content !== parsedResult.cleanedContent) {
+                            // Find the text block and update it
+                            const textBlockIndex = blocks.findIndex((block) => block.type === 'text');
+                            if (textBlockIndex >= 0) {
+                                blocks[textBlockIndex].text =
+                                    parsedResult.cleanedContent;
+                            }
+                            else if (parsedResult.cleanedContent.trim()) {
+                                // Add cleaned text if it doesn't exist
+                                blocks.unshift({
+                                    type: 'text',
+                                    text: parsedResult.cleanedContent,
+                                });
+                            }
+                        }
+                    }
+                }
+                catch (error) {
+                    const logger = this.getLogger();
+                    logger.debug(() => `TextToolCallParser failed on message content: ${error}`);
+                }
+            }
             // Emit the complete response as a single IContent
             if (blocks.length > 0) {
                 const responseContent = {
@@ -1167,8 +1478,655 @@ export class OpenAIProvider extends BaseProvider {
     /**
      * @plan:PLAN-20251023-STATELESS-HARDENING.P08
      * @requirement:REQ-SP4-003
-     * Get the tool format for this provider using normalized options
-     * @returns The tool format to use
+     * Internal implementation for chat completion using normalized options
+     * Routes to appropriate implementation based on toolCallProcessingMode
+     */
+    async *generateChatCompletionImpl(options, toolFormatter, client, logger) {
+        if (this.toolCallProcessingMode === 'legacy') {
+            yield* this.generateLegacyChatCompletionImpl(options, toolFormatter, client, logger);
+        }
+        else {
+            yield* this.generatePipelineChatCompletionImpl(options, toolFormatter, client, logger);
+        }
+    }
+    /**
+     * @plan:PLAN-20251023-STATELESS-HARDENING.P08
+     * @requirement:REQ-SP4-003
+     * Pipeline implementation for chat completion using optimized tool call pipeline
+     */
+    async *generatePipelineChatCompletionImpl(options, toolFormatter, client, logger) {
+        const { contents, tools, metadata } = options;
+        const model = options.resolved.model || this.getDefaultModel();
+        const abortSignal = metadata?.abortSignal;
+        const ephemeralSettings = options.invocation?.ephemerals ?? {};
+        if (logger.enabled) {
+            const resolved = options.resolved;
+            logger.debug(() => `[OpenAIProvider] Resolved request context`, {
+                provider: this.name,
+                model,
+                resolvedModel: resolved.model,
+                resolvedBaseUrl: resolved.baseURL,
+                authTokenPresent: Boolean(resolved.authToken),
+                messageCount: contents.length,
+                toolCount: tools?.length ?? 0,
+                metadataKeys: Object.keys(metadata ?? {}),
+            });
+        }
+        // Convert IContent to OpenAI messages format
+        const messages = this.convertToOpenAIMessages(contents);
+        // Detect the tool format to use (once at the start of the method)
+        const detectedFormat = this.detectToolFormat();
+        // Log the detected format for debugging
+        logger.debug(() => `[OpenAIProvider] Using tool format '${detectedFormat}' for model '${model}'`, {
+            model,
+            detectedFormat,
+            provider: this.name,
+        });
+        // Convert Gemini format tools to the detected format
+        let formattedTools = toolFormatter.convertGeminiToFormat(tools, detectedFormat);
+        // CRITICAL FIX: Ensure we never pass an empty tools array
+        // The OpenAI API errors when tools=[] but a tool call is attempted
+        if (Array.isArray(formattedTools) && formattedTools.length === 0) {
+            logger.warn(() => `[OpenAIProvider] CRITICAL: Formatted tools is empty array! Setting to undefined to prevent API errors.`, {
+                model,
+                inputTools: tools,
+                inputToolsLength: tools?.length,
+                inputFirstGroup: tools?.[0],
+                stackTrace: new Error().stack,
+            });
+            formattedTools = undefined;
+        }
+        // Debug log the conversion result - enhanced logging for intermittent issues
+        if (logger.enabled && formattedTools) {
+            logger.debug(() => `[OpenAIProvider] Tool conversion summary:`, {
+                detectedFormat,
+                inputHadTools: !!tools,
+                inputToolsLength: tools?.length,
+                inputFirstGroup: tools?.[0],
+                inputFunctionDeclarationsLength: tools?.[0]?.functionDeclarations?.length,
+                outputHasTools: !!formattedTools,
+                outputToolsLength: formattedTools?.length,
+                outputToolNames: formattedTools?.map((t) => t.function.name),
+            });
+        }
+        // Get streaming setting from ephemeral settings (default: enabled)
+        const streamingSetting = ephemeralSettings['streaming'];
+        const streamingEnabled = streamingSetting !== 'disabled';
+        // Get the system prompt
+        const flattenedToolNames = tools?.flatMap((group) => group.functionDeclarations
+            .map((decl) => decl.name)
+            .filter((name) => !!name)) ?? [];
+        const toolNamesArg = tools === undefined ? undefined : Array.from(new Set(flattenedToolNames));
+        /**
+         * @plan:PLAN-20251023-STATELESS-HARDENING.P08
+         * @requirement:REQ-SP4-003
+         * Source user memory from normalized options instead of global config
+         */
+        const userMemory = await resolveUserMemory(options.userMemory, () => options.invocation?.userMemory);
+        const systemPrompt = await getCoreSystemPromptAsync(userMemory, model, toolNamesArg);
+        // Add system prompt as the first message in the array
+        const messagesWithSystem = [
+            { role: 'system', content: systemPrompt },
+            ...messages,
+        ];
+        const maxTokens = metadata?.maxTokens ??
+            ephemeralSettings['max-tokens'];
+        // Build request - only include tools if they exist and are not empty
+        // IMPORTANT: Create a deep copy of tools to prevent mutation issues
+        const requestBody = {
+            model,
+            messages: messagesWithSystem,
+            stream: streamingEnabled,
+        };
+        if (formattedTools && formattedTools.length > 0) {
+            requestBody.tools = JSON.parse(JSON.stringify(formattedTools));
+            requestBody.tool_choice = 'auto';
+        }
+        /**
+         * @plan:PLAN-20251023-STATELESS-HARDENING.P08
+         * @requirement:REQ-SP4-002
+         * Extract per-call request overrides from normalized options instead of cached state
+         */
+        const requestOverrides = this.extractModelParamsFromOptions(options);
+        if (requestOverrides) {
+            if (logger.enabled) {
+                logger.debug(() => `[OpenAIProvider] Applying request overrides`, {
+                    overrideKeys: Object.keys(requestOverrides),
+                });
+            }
+            Object.assign(requestBody, requestOverrides);
+        }
+        if (typeof maxTokens === 'number' && Number.isFinite(maxTokens)) {
+            requestBody.max_tokens = maxTokens;
+        }
+        // Debug log request summary for Cerebras/Qwen
+        const baseURL = options.resolved.baseURL ?? this.getBaseURL();
+        if (logger.enabled &&
+            (model.toLowerCase().includes('qwen') || baseURL?.includes('cerebras'))) {
+            logger.debug(() => `Request to ${baseURL} for model ${model}:`, {
+                baseURL,
+                model,
+                streamingEnabled,
+                hasTools: 'tools' in requestBody,
+                toolCount: formattedTools?.length || 0,
+                messageCount: messages.length,
+                toolsInRequest: 'tools' in requestBody ? requestBody.tools?.length : 'not included',
+            });
+        }
+        // Get retry settings from ephemeral settings
+        const maxRetries = ephemeralSettings['retries'] ?? 6; // Default for OpenAI
+        const initialDelayMs = ephemeralSettings['retrywait'] ?? 4000; // Default for OpenAI
+        // Get stream options from ephemeral settings (default: include usage for token tracking)
+        const streamOptions = ephemeralSettings['stream-options'] || { include_usage: true };
+        // Add stream options to request if streaming is enabled
+        if (streamingEnabled && streamOptions) {
+            Object.assign(requestBody, { stream_options: streamOptions });
+        }
+        // Log the exact tools being sent for debugging
+        if (logger.enabled && 'tools' in requestBody) {
+            logger.debug(() => `[OpenAIProvider] Exact tools being sent to API:`, {
+                toolCount: requestBody.tools?.length,
+                toolNames: requestBody.tools?.map((t) => 'function' in t ? t.function?.name : undefined),
+                firstTool: requestBody.tools?.[0],
+            });
+        }
+        // Wrap the API call with retry logic using centralized retry utility
+        if (logger.enabled) {
+            logger.debug(() => `[OpenAIProvider] Sending chat request`, {
+                model,
+                baseURL: baseURL ?? this.getBaseURL(),
+                streamingEnabled,
+                toolCount: formattedTools?.length ?? 0,
+                hasAuthToken: Boolean(options.resolved.authToken),
+                requestHasSystemPrompt: Boolean(systemPrompt?.length),
+                messageCount: messagesWithSystem.length,
+            });
+        }
+        let response;
+        // Debug log throttle tracker status
+        logger.debug(() => `Retry configuration:`, {
+            hasThrottleTracker: !!this.throttleTracker,
+            throttleTrackerType: typeof this.throttleTracker,
+            maxRetries,
+            initialDelayMs,
+        });
+        const customHeaders = this.getCustomHeaders();
+        if (logger.enabled) {
+            logger.debug(() => `[OpenAIProvider] Request body preview`, {
+                model: requestBody.model,
+                hasStop: 'stop' in requestBody,
+                hasMaxTokens: 'max_tokens' in requestBody,
+                hasResponseFormat: 'response_format' in requestBody,
+                overrideKeys: requestOverrides ? Object.keys(requestOverrides) : [],
+            });
+        }
+        try {
+            response = await retryWithBackoff(() => client.chat.completions.create(requestBody, {
+                ...(abortSignal ? { signal: abortSignal } : {}),
+                ...(customHeaders ? { headers: customHeaders } : {}),
+            }), {
+                maxAttempts: maxRetries,
+                initialDelayMs,
+                shouldRetry: this.shouldRetryResponse.bind(this),
+                trackThrottleWaitTime: this.throttleTracker,
+            });
+        }
+        catch (error) {
+            // Special handling for Cerebras/Qwen "Tool not present" errors
+            const errorMessage = String(error);
+            if (errorMessage.includes('Tool is not present in the tools list') &&
+                (model.toLowerCase().includes('qwen') ||
+                    this.getBaseURL()?.includes('cerebras'))) {
+                logger.error('Cerebras/Qwen API error: Tool not found despite being in request. This is a known API issue.', {
+                    error,
+                    model,
+                    toolsProvided: formattedTools?.length || 0,
+                    toolNames: formattedTools?.map((t) => t.function.name),
+                    streamingEnabled,
+                });
+                // Re-throw but with better context
+                const enhancedError = new Error(`Cerebras/Qwen API bug: Tool not found in list. We sent ${formattedTools?.length || 0} tools. Known API issue.`);
+                enhancedError.originalError =
+                    error;
+                throw enhancedError;
+            }
+            // Re-throw other errors as-is
+            const capturedErrorMessage = error instanceof Error ? error.message : String(error);
+            const status = typeof error === 'object' &&
+                error !== null &&
+                'status' in error &&
+                typeof error.status === 'number'
+                ? error.status
+                : undefined;
+            logger.error(() => `[OpenAIProvider] Chat completion failed for model '${model}' at '${baseURL ?? this.getBaseURL() ?? 'default'}': ${capturedErrorMessage}`, {
+                model,
+                baseURL: baseURL ?? this.getBaseURL(),
+                streamingEnabled,
+                hasTools: formattedTools?.length ?? 0,
+                requestHasSystemPrompt: !!systemPrompt,
+                status,
+            });
+            throw error;
+        }
+        // Check if response is streaming or not
+        if (streamingEnabled) {
+            // Process streaming response
+            let _accumulatedText = '';
+            // Initialize tool call pipeline for this streaming session
+            this.toolCallPipeline.reset();
+            // Buffer for accumulating text chunks for providers that need it
+            let textBuffer = '';
+            // Use the same detected format from earlier for consistency
+            // Buffer text for Qwen format providers to avoid stanza formatting
+            const shouldBufferText = detectedFormat === 'qwen';
+            // Track token usage from streaming chunks
+            let streamingUsage = null;
+            const allChunks = []; // Collect all chunks first
+            try {
+                // Handle streaming response - collect all chunks
+                for await (const chunk of response) {
+                    if (abortSignal?.aborted) {
+                        break;
+                    }
+                    allChunks.push(chunk);
+                }
+                // Now process all collected chunks
+                for (const chunk of allChunks) {
+                    const chunkRecord = chunk;
+                    let parsedData;
+                    const rawData = chunkRecord?.data;
+                    if (typeof rawData === 'string') {
+                        try {
+                            parsedData = JSON.parse(rawData);
+                        }
+                        catch {
+                            parsedData = undefined;
+                        }
+                    }
+                    else if (rawData && typeof rawData === 'object') {
+                        parsedData = rawData;
+                    }
+                    const streamingError = chunkRecord?.error ??
+                        parsedData?.error ??
+                        parsedData?.data?.error;
+                    const streamingEvent = (chunkRecord?.event ?? parsedData?.event);
+                    const streamingErrorMessage = streamingError?.message ??
+                        streamingError?.error ??
+                        parsedData?.message;
+                    if (streamingEvent === 'error' ||
+                        (streamingError && typeof streamingError === 'object')) {
+                        const errorMessage = streamingErrorMessage ??
+                            (typeof streamingError === 'string'
+                                ? streamingError
+                                : 'Streaming response reported an error.');
+                        throw new Error(errorMessage);
+                    }
+                    // Extract usage information if present (typically in final chunk)
+                    if (chunk.usage) {
+                        streamingUsage = chunk.usage;
+                    }
+                    const choice = chunk.choices?.[0];
+                    if (!choice)
+                        continue;
+                    // Check for finish_reason to detect proper stream ending
+                    if (choice.finish_reason) {
+                        logger.debug(() => `[Streaming] Stream finished with reason: ${choice.finish_reason}`, {
+                            model,
+                            finishReason: choice.finish_reason,
+                            hasAccumulatedText: _accumulatedText.length > 0,
+                            hasAccumulatedTools: this.toolCallPipeline.getStats().collector.totalCalls > 0,
+                            hasBufferedText: textBuffer.length > 0,
+                        });
+                        // If finish_reason is 'length', the response was cut off
+                        if (choice.finish_reason === 'length') {
+                            logger.debug(() => `Response truncated due to length limit for model ${model}`);
+                        }
+                        // Flush any buffered text when stream finishes
+                        if (textBuffer.length > 0) {
+                            yield {
+                                speaker: 'ai',
+                                blocks: [
+                                    {
+                                        type: 'text',
+                                        text: textBuffer,
+                                    },
+                                ],
+                            };
+                            textBuffer = '';
+                        }
+                    }
+                    // Handle text content - buffer for Qwen format, emit immediately for others
+                    const deltaContent = choice.delta?.content;
+                    if (deltaContent) {
+                        _accumulatedText += deltaContent;
+                        // Debug log for providers that need buffering
+                        if (shouldBufferText) {
+                            logger.debug(() => `[Streaming] Chunk content for ${detectedFormat} format:`, {
+                                deltaContent,
+                                length: deltaContent.length,
+                                hasNewline: deltaContent.includes('\n'),
+                                escaped: JSON.stringify(deltaContent),
+                                bufferSize: textBuffer.length,
+                            });
+                            // Buffer text to avoid stanza formatting
+                            textBuffer += deltaContent;
+                            // Emit buffered text when we have a complete sentence or paragraph
+                            // Look for natural break points
+                            if (textBuffer.includes('\n') ||
+                                textBuffer.endsWith('. ') ||
+                                textBuffer.endsWith('! ') ||
+                                textBuffer.endsWith('? ') ||
+                                textBuffer.length > 100) {
+                                yield {
+                                    speaker: 'ai',
+                                    blocks: [
+                                        {
+                                            type: 'text',
+                                            text: textBuffer,
+                                        },
+                                    ],
+                                };
+                                textBuffer = '';
+                            }
+                        }
+                        else {
+                            // For other providers, emit text immediately as before
+                            yield {
+                                speaker: 'ai',
+                                blocks: [
+                                    {
+                                        type: 'text',
+                                        text: deltaContent,
+                                    },
+                                ],
+                            };
+                        }
+                    }
+                    // Handle tool calls using the new pipeline
+                    const deltaToolCalls = choice.delta?.tool_calls;
+                    if (deltaToolCalls && deltaToolCalls.length > 0) {
+                        for (const deltaToolCall of deltaToolCalls) {
+                            if (deltaToolCall.index === undefined)
+                                continue;
+                            // Add fragment to pipeline instead of accumulating strings
+                            this.toolCallPipeline.addFragment(deltaToolCall.index, {
+                                name: deltaToolCall.function?.name,
+                                args: deltaToolCall.function?.arguments,
+                            });
+                        }
+                    }
+                    const choiceMessage = choice.message;
+                    const messageToolCalls = choiceMessage?.tool_calls;
+                    if (messageToolCalls && messageToolCalls.length > 0) {
+                        messageToolCalls.forEach((toolCall, index) => {
+                            if (!toolCall || toolCall.type !== 'function') {
+                                return;
+                            }
+                            // Add final complete tool call to pipeline
+                            this.toolCallPipeline.addFragment(index, {
+                                name: toolCall.function?.name,
+                                args: toolCall.function?.arguments,
+                            });
+                        });
+                    }
+                }
+            }
+            catch (error) {
+                if (abortSignal?.aborted) {
+                    throw error;
+                }
+                else {
+                    // Special handling for Cerebras/Qwen "Tool not present" errors
+                    const errorMessage = String(error);
+                    if (errorMessage.includes('Tool is not present in the tools list') &&
+                        (model.toLowerCase().includes('qwen') ||
+                            this.getBaseURL()?.includes('cerebras'))) {
+                        logger.error('Cerebras/Qwen API error: Tool not found despite being in request. This is a known API issue.', {
+                            error,
+                            model,
+                            toolsProvided: formattedTools?.length || 0,
+                            toolNames: formattedTools?.map((t) => t.function.name),
+                            streamingEnabled,
+                        });
+                        // Re-throw but with better context
+                        const enhancedError = new Error(`Cerebras/Qwen API bug: Tool not found in list during streaming. We sent ${formattedTools?.length || 0} tools. Known API issue.`);
+                        enhancedError.originalError = error;
+                        throw enhancedError;
+                    }
+                    logger.error('Error processing streaming response:', error);
+                    throw error;
+                }
+            }
+            // Check buffered text for <tool_call> format before flushing as plain text
+            if (textBuffer.length > 0) {
+                // Try to parse <tool_call> format from buffered text
+                let parsedToolCalls = [];
+                let cleanedText = textBuffer;
+                try {
+                    const parsedResult = this.textToolParser.parse(textBuffer);
+                    if (parsedResult.toolCalls.length > 0) {
+                        // Convert parsed tool calls to ToolCallBlock format
+                        parsedToolCalls = parsedResult.toolCalls.map((call) => ({
+                            type: 'tool_call',
+                            id: `text_tool_${Date.now()}_${Math.random().toString(36).substring(7)}`,
+                            name: call.name,
+                            parameters: call.arguments,
+                        }));
+                        cleanedText = parsedResult.cleanedContent;
+                    }
+                }
+                catch (error) {
+                    const logger = this.getLogger();
+                    logger.debug(() => `TextToolCallParser failed on buffered text: ${error}`);
+                }
+                // Emit tool calls from text parsing first
+                if (parsedToolCalls.length > 0) {
+                    yield {
+                        speaker: 'ai',
+                        blocks: parsedToolCalls,
+                    };
+                }
+                // Then emit any remaining cleaned text
+                if (cleanedText.trim().length > 0) {
+                    yield {
+                        speaker: 'ai',
+                        blocks: [
+                            {
+                                type: 'text',
+                                text: cleanedText,
+                            },
+                        ],
+                    };
+                }
+                textBuffer = '';
+            }
+            // Process and emit tool calls using the pipeline
+            const pipelineResult = await this.toolCallPipeline.process();
+            if (pipelineResult.executed.length > 0 ||
+                pipelineResult.failed.length > 0) {
+                const blocks = [];
+                // Process successful tool calls
+                for (const normalizedCall of pipelineResult.normalized) {
+                    // Process tool parameters with double-escape handling
+                    const processedParameters = processToolParameters(normalizedCall.originalArgs || JSON.stringify(normalizedCall.args), normalizedCall.name, detectedFormat);
+                    blocks.push({
+                        type: 'tool_call',
+                        id: this.normalizeToHistoryToolId(`call_${normalizedCall.index}`),
+                        name: normalizedCall.name,
+                        parameters: processedParameters,
+                    });
+                }
+                // Handle failed tool calls (could emit as errors or warnings)
+                for (const failed of pipelineResult.failed) {
+                    this.getLogger().warn(`Tool call validation failed for index ${failed.index}: ${failed.validationErrors.join(', ')}`);
+                }
+                if (blocks.length > 0) {
+                    const toolCallsContent = {
+                        speaker: 'ai',
+                        blocks,
+                    };
+                    // Add usage metadata if we captured it from streaming
+                    if (streamingUsage) {
+                        toolCallsContent.metadata = {
+                            usage: {
+                                promptTokens: streamingUsage.prompt_tokens || 0,
+                                completionTokens: streamingUsage.completion_tokens || 0,
+                                totalTokens: streamingUsage.total_tokens ||
+                                    (streamingUsage.prompt_tokens || 0) +
+                                        (streamingUsage.completion_tokens || 0),
+                            },
+                        };
+                    }
+                    yield toolCallsContent;
+                }
+            }
+            // If we have usage information but no tool calls, emit a metadata-only response
+            if (streamingUsage &&
+                this.toolCallPipeline.getStats().collector.totalCalls === 0) {
+                yield {
+                    speaker: 'ai',
+                    blocks: [],
+                    metadata: {
+                        usage: {
+                            promptTokens: streamingUsage.prompt_tokens || 0,
+                            completionTokens: streamingUsage.completion_tokens || 0,
+                            totalTokens: streamingUsage.total_tokens ||
+                                (streamingUsage.prompt_tokens || 0) +
+                                    (streamingUsage.completion_tokens || 0),
+                        },
+                    },
+                };
+            }
+        }
+        else {
+            // Handle non-streaming response
+            const completion = response;
+            const choice = completion.choices?.[0];
+            if (!choice) {
+                throw new Error('No choices in completion response');
+            }
+            // Log finish reason for debugging Qwen issues
+            if (choice.finish_reason) {
+                logger.debug(() => `[Non-streaming] Response finish_reason: ${choice.finish_reason}`, {
+                    model,
+                    finishReason: choice.finish_reason,
+                    hasContent: !!choice.message?.content,
+                    hasToolCalls: !!(choice.message?.tool_calls && choice.message.tool_calls.length > 0),
+                    contentLength: choice.message?.content?.length || 0,
+                    toolCallCount: choice.message?.tool_calls?.length || 0,
+                    detectedFormat,
+                });
+                // Warn if the response was truncated
+                if (choice.finish_reason === 'length') {
+                    logger.warn(() => `Response truncated due to max_tokens limit for model ${model}. Consider increasing max_tokens.`);
+                }
+            }
+            const blocks = [];
+            // Handle text content
+            if (choice.message?.content) {
+                blocks.push({
+                    type: 'text',
+                    text: choice.message.content,
+                });
+            }
+            // Handle tool calls
+            if (choice.message?.tool_calls && choice.message.tool_calls.length > 0) {
+                // Use the same detected format from earlier for consistency
+                for (const toolCall of choice.message.tool_calls) {
+                    if (toolCall.type === 'function') {
+                        // Normalize tool name for consistency with streaming path
+                        const normalizedName = this.toolCallPipeline.normalizeToolName(toolCall.function.name, toolCall.function.arguments);
+                        // Process tool parameters with double-escape handling
+                        const processedParameters = processToolParameters(toolCall.function.arguments || '', normalizedName, detectedFormat);
+                        blocks.push({
+                            type: 'tool_call',
+                            id: this.normalizeToHistoryToolId(toolCall.id),
+                            name: normalizedName,
+                            parameters: processedParameters,
+                        });
+                    }
+                }
+            }
+            // Additionally check for <tool_call> format in text content
+            if (choice.message?.content &&
+                typeof choice.message.content === 'string') {
+                try {
+                    const parsedResult = this.textToolParser.parse(choice.message.content);
+                    if (parsedResult.toolCalls.length > 0) {
+                        // Add tool calls found in text content
+                        for (const call of parsedResult.toolCalls) {
+                            blocks.push({
+                                type: 'tool_call',
+                                id: `text_tool_${Date.now()}_${Math.random().toString(36).substring(7)}`,
+                                name: call.name,
+                                parameters: call.arguments,
+                            });
+                        }
+                        // Update the text content to remove the tool call parts
+                        if (choice.message.content !== parsedResult.cleanedContent) {
+                            // Find the text block and update it
+                            const textBlockIndex = blocks.findIndex((block) => block.type === 'text');
+                            if (textBlockIndex >= 0) {
+                                blocks[textBlockIndex].text =
+                                    parsedResult.cleanedContent;
+                            }
+                            else if (parsedResult.cleanedContent.trim()) {
+                                // Add cleaned text if it doesn't exist
+                                blocks.unshift({
+                                    type: 'text',
+                                    text: parsedResult.cleanedContent,
+                                });
+                            }
+                        }
+                    }
+                }
+                catch (error) {
+                    const logger = this.getLogger();
+                    logger.debug(() => `TextToolCallParser failed on message content: ${error}`);
+                }
+            }
+            // Emit the complete response as a single IContent
+            if (blocks.length > 0) {
+                const responseContent = {
+                    speaker: 'ai',
+                    blocks,
+                };
+                // Add usage metadata from non-streaming response
+                if (completion.usage) {
+                    responseContent.metadata = {
+                        usage: {
+                            promptTokens: completion.usage.prompt_tokens || 0,
+                            completionTokens: completion.usage.completion_tokens || 0,
+                            totalTokens: completion.usage.total_tokens ||
+                                (completion.usage.prompt_tokens || 0) +
+                                    (completion.usage.completion_tokens || 0),
+                        },
+                    };
+                }
+                yield responseContent;
+            }
+            else if (completion.usage) {
+                // Emit metadata-only response if no content blocks but have usage info
+                yield {
+                    speaker: 'ai',
+                    blocks: [],
+                    metadata: {
+                        usage: {
+                            promptTokens: completion.usage.prompt_tokens || 0,
+                            completionTokens: completion.usage.completion_tokens || 0,
+                            totalTokens: completion.usage.total_tokens ||
+                                (completion.usage.prompt_tokens || 0) +
+                                    (completion.usage.completion_tokens || 0),
+                        },
+                    },
+                };
+            }
+        }
+    }
+    /**
+     * @plan:PLAN-20251023-STATELESS-HARDENING.P08
+     * @requirement:REQ-SP4-003
+     * Legacy implementation for chat completion using accumulated tool calls approach
      */
     getToolFormat() {
         const format = this.detectToolFormat();
@@ -1254,6 +2212,9 @@ export class OpenAIProvider extends BaseProvider {
             status,
             errorMessage: error instanceof Error ? error.message : String(error),
             errorKeys: error && typeof error === 'object' ? Object.keys(error) : [],
+            errorData: error && typeof error === 'object' && 'error' in error
+                ? error.error
+                : undefined,
         });
         // Retry on 429 rate limit errors or 5xx server errors
         const shouldRetry = Boolean(status === 429 || status === 503 || status === 504);