npm - @vybestack/llxprt-code-core - Versions diffs - 0.1.23 → 0.2.2-nightly.250908.fb8099b7 - Mend

@vybestack/llxprt-code-core 0.1.23 → 0.2.2-nightly.250908.fb8099b7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (149) hide show

package/README.md +21 -17
package/dist/src/adapters/IStreamAdapter.d.ts +3 -3
package/dist/src/auth/oauth-errors.d.ts +173 -0
package/dist/src/auth/oauth-errors.js +461 -0
package/dist/src/auth/oauth-errors.js.map +1 -0
package/dist/src/auth/precedence.d.ts +1 -5
package/dist/src/auth/precedence.js +28 -48
package/dist/src/auth/precedence.js.map +1 -1
package/dist/src/auth/token-store.js +2 -2
package/dist/src/auth/token-store.js.map +1 -1
package/dist/src/auth/types.d.ts +4 -4
package/dist/src/code_assist/codeAssist.js +19 -6
package/dist/src/code_assist/codeAssist.js.map +1 -1
package/dist/src/code_assist/oauth2.d.ts +7 -0
package/dist/src/code_assist/oauth2.js +82 -32
package/dist/src/code_assist/oauth2.js.map +1 -1
package/dist/src/code_assist/server.js +15 -4
package/dist/src/code_assist/server.js.map +1 -1
package/dist/src/code_assist/setup.js +9 -0
package/dist/src/code_assist/setup.js.map +1 -1
package/dist/src/config/index.d.ts +7 -0
package/dist/src/config/index.js +8 -0
package/dist/src/config/index.js.map +1 -0
package/dist/src/core/client.d.ts +15 -20
package/dist/src/core/client.js +98 -124
package/dist/src/core/client.js.map +1 -1
package/dist/src/core/compression-config.d.ts +10 -0
package/dist/src/core/compression-config.js +17 -0
package/dist/src/core/compression-config.js.map +1 -0
package/dist/src/core/coreToolScheduler.js +50 -15
package/dist/src/core/coreToolScheduler.js.map +1 -1
package/dist/src/core/geminiChat.d.ts +68 -9
package/dist/src/core/geminiChat.js +940 -405
package/dist/src/core/geminiChat.js.map +1 -1
package/dist/src/core/nonInteractiveToolExecutor.js +70 -19
package/dist/src/core/nonInteractiveToolExecutor.js.map +1 -1
package/dist/src/core/prompts.js +35 -25
package/dist/src/core/prompts.js.map +1 -1
package/dist/src/core/turn.d.ts +1 -0
package/dist/src/core/turn.js +8 -6
package/dist/src/core/turn.js.map +1 -1
package/dist/src/ide/ide-client.d.ts +1 -1
package/dist/src/ide/ide-client.js +12 -6
package/dist/src/ide/ide-client.js.map +1 -1
package/dist/src/index.d.ts +4 -2
package/dist/src/index.js +5 -2
package/dist/src/index.js.map +1 -1
package/dist/src/prompt-config/TemplateEngine.js +17 -0
package/dist/src/prompt-config/TemplateEngine.js.map +1 -1
package/dist/src/prompt-config/defaults/core-defaults.js +39 -32
package/dist/src/prompt-config/defaults/core-defaults.js.map +1 -1
package/dist/src/prompt-config/defaults/core.md +2 -0
package/dist/src/prompt-config/defaults/provider-defaults.js +34 -27
package/dist/src/prompt-config/defaults/provider-defaults.js.map +1 -1
package/dist/src/prompt-config/defaults/providers/gemini/core.md +270 -0
package/dist/src/prompt-config/defaults/providers/gemini/models/gemini-2.5-flash/core.md +12 -0
package/dist/src/prompt-config/defaults/providers/gemini/models/gemini-2.5-flash/gemini-2-5-flash/core.md +12 -0
package/dist/src/prompt-config/types.d.ts +2 -0
package/dist/src/providers/BaseProvider.d.ts +39 -13
package/dist/src/providers/BaseProvider.js +102 -28
package/dist/src/providers/BaseProvider.js.map +1 -1
package/dist/src/providers/IProvider.d.ts +17 -3
package/dist/src/providers/LoggingProviderWrapper.d.ts +10 -3
package/dist/src/providers/LoggingProviderWrapper.js +33 -27
package/dist/src/providers/LoggingProviderWrapper.js.map +1 -1
package/dist/src/providers/ProviderContentGenerator.d.ts +2 -2
package/dist/src/providers/ProviderContentGenerator.js +9 -6
package/dist/src/providers/ProviderContentGenerator.js.map +1 -1
package/dist/src/providers/ProviderManager.d.ts +4 -0
package/dist/src/providers/ProviderManager.js +6 -0
package/dist/src/providers/ProviderManager.js.map +1 -1
package/dist/src/providers/anthropic/AnthropicProvider.d.ts +34 -21
package/dist/src/providers/anthropic/AnthropicProvider.js +505 -492
package/dist/src/providers/anthropic/AnthropicProvider.js.map +1 -1
package/dist/src/providers/gemini/GeminiProvider.d.ts +23 -9
package/dist/src/providers/gemini/GeminiProvider.js +344 -515
package/dist/src/providers/gemini/GeminiProvider.js.map +1 -1
package/dist/src/providers/openai/ConversationCache.d.ts +3 -3
package/dist/src/providers/openai/IChatGenerateParams.d.ts +9 -4
package/dist/src/providers/openai/OpenAIProvider.d.ts +46 -96
package/dist/src/providers/openai/OpenAIProvider.js +532 -1393
package/dist/src/providers/openai/OpenAIProvider.js.map +1 -1
package/dist/src/providers/openai/buildResponsesRequest.d.ts +3 -3
package/dist/src/providers/openai/buildResponsesRequest.js +67 -37
package/dist/src/providers/openai/buildResponsesRequest.js.map +1 -1
package/dist/src/providers/openai/estimateRemoteTokens.d.ts +2 -2
package/dist/src/providers/openai/estimateRemoteTokens.js +21 -8
package/dist/src/providers/openai/estimateRemoteTokens.js.map +1 -1
package/dist/src/providers/openai/parseResponsesStream.d.ts +6 -2
package/dist/src/providers/openai/parseResponsesStream.js +99 -391
package/dist/src/providers/openai/parseResponsesStream.js.map +1 -1
package/dist/src/providers/openai/syntheticToolResponses.d.ts +5 -5
package/dist/src/providers/openai/syntheticToolResponses.js +102 -91
package/dist/src/providers/openai/syntheticToolResponses.js.map +1 -1
package/dist/src/providers/openai-responses/OpenAIResponsesProvider.d.ts +89 -0
package/dist/src/providers/openai-responses/OpenAIResponsesProvider.js +451 -0
package/dist/src/providers/openai-responses/OpenAIResponsesProvider.js.map +1 -0
package/dist/src/providers/openai-responses/index.d.ts +1 -0
package/dist/src/providers/openai-responses/index.js +2 -0
package/dist/src/providers/openai-responses/index.js.map +1 -0
package/dist/src/providers/tokenizers/OpenAITokenizer.js +3 -3
package/dist/src/providers/tokenizers/OpenAITokenizer.js.map +1 -1
package/dist/src/providers/types.d.ts +1 -1
package/dist/src/services/ClipboardService.d.ts +19 -0
package/dist/src/services/ClipboardService.js +66 -0
package/dist/src/services/ClipboardService.js.map +1 -0
package/dist/src/services/history/ContentConverters.d.ts +43 -0
package/dist/src/services/history/ContentConverters.js +325 -0
package/dist/src/services/history/ContentConverters.js.map +1 -0
package/dist/src/{providers/IMessage.d.ts → services/history/HistoryEvents.d.ts} +16 -22
package/dist/src/{providers/IMessage.js → services/history/HistoryEvents.js} +1 -1
package/dist/src/services/history/HistoryEvents.js.map +1 -0
package/dist/src/services/history/HistoryService.d.ts +220 -0
package/dist/src/services/history/HistoryService.js +673 -0
package/dist/src/services/history/HistoryService.js.map +1 -0
package/dist/src/services/history/IContent.d.ts +183 -0
package/dist/src/services/history/IContent.js +104 -0
package/dist/src/services/history/IContent.js.map +1 -0
package/dist/src/services/index.d.ts +1 -0
package/dist/src/services/index.js +1 -0
package/dist/src/services/index.js.map +1 -1
package/dist/src/telemetry/types.d.ts +16 -4
package/dist/src/telemetry/types.js.map +1 -1
package/dist/src/tools/IToolFormatter.d.ts +2 -2
package/dist/src/tools/ToolFormatter.d.ts +42 -4
package/dist/src/tools/ToolFormatter.js +159 -37
package/dist/src/tools/ToolFormatter.js.map +1 -1
package/dist/src/tools/doubleEscapeUtils.d.ts +57 -0
package/dist/src/tools/doubleEscapeUtils.js +241 -0
package/dist/src/tools/doubleEscapeUtils.js.map +1 -0
package/dist/src/tools/read-file.d.ts +6 -1
package/dist/src/tools/read-file.js +25 -11
package/dist/src/tools/read-file.js.map +1 -1
package/dist/src/tools/todo-schemas.d.ts +4 -4
package/dist/src/tools/tools.js +13 -0
package/dist/src/tools/tools.js.map +1 -1
package/dist/src/tools/write-file.d.ts +6 -1
package/dist/src/tools/write-file.js +48 -26
package/dist/src/tools/write-file.js.map +1 -1
package/dist/src/types/modelParams.d.ts +8 -0
package/dist/src/utils/bfsFileSearch.js +2 -6
package/dist/src/utils/bfsFileSearch.js.map +1 -1
package/dist/src/utils/schemaValidator.js +16 -1
package/dist/src/utils/schemaValidator.js.map +1 -1
package/package.json +8 -7
package/dist/src/providers/IMessage.js.map +0 -1
package/dist/src/providers/adapters/GeminiCompatibleWrapper.d.ts +0 -69
package/dist/src/providers/adapters/GeminiCompatibleWrapper.js +0 -577
package/dist/src/providers/adapters/GeminiCompatibleWrapper.js.map +0 -1

package/dist/src/providers/openai/OpenAIProvider.js CHANGED Viewed

@@ -17,463 +17,157 @@
  * @plan PLAN-20250120-DEBUGLOGGING.P15
  * @requirement REQ-INT-001.1
  */
-import { DebugLogger } from '../../debug/index.js';
-import { ContentGeneratorRole } from '../ContentGeneratorRole.js';
-import { GemmaToolCallParser } from '../../parsers/TextToolCallParser.js';
-import { ToolFormatter } from '../../tools/ToolFormatter.js';
 import OpenAI from 'openai';
-import { RESPONSES_API_MODELS } from './RESPONSES_API_MODELS.js';
-import { ConversationCache } from './ConversationCache.js';
-import { estimateMessagesTokens, estimateRemoteTokens, } from './estimateRemoteTokens.js';
-// ConversationContext removed - using inline conversation ID generation
-import { parseResponsesStream, parseErrorResponse, } from './parseResponsesStream.js';
-import { buildResponsesRequest } from './buildResponsesRequest.js';
+import * as http from 'http';
+import * as https from 'https';
+import * as net from 'net';
 import { BaseProvider } from '../BaseProvider.js';
-import { isQwenEndpoint, generateOAuthEndpointMismatchError, } from '../../config/endpoints.js';
-import { getSettingsService } from '../../settings/settingsServiceInstance.js';
+import { DebugLogger } from '../../debug/index.js';
+import { ToolFormatter } from '../../tools/ToolFormatter.js';
+import { processToolParameters } from '../../tools/doubleEscapeUtils.js';
+import { getCoreSystemPromptAsync } from '../../core/prompts.js';
 export class OpenAIProvider extends BaseProvider {
+    name = 'openai';
     logger;
-    openai;
-    currentModel = process.env.LLXPRT_DEFAULT_MODEL || 'gpt-5';
-    baseURL;
-    providerConfig;
     toolFormatter;
-    toolFormatOverride;
-    conversationCache;
-    modelParams;
     _cachedClient;
     _cachedClientKey;
     constructor(apiKey, baseURL, config, oauthManager) {
+        // Normalize empty string to undefined for proper precedence handling
+        const normalizedApiKey = apiKey && apiKey.trim() !== '' ? apiKey : undefined;
+        // Detect if this is a Qwen endpoint
+        const isQwenEndpoint = !!(baseURL &&
+            (baseURL.includes('dashscope.aliyuncs.com') ||
+                baseURL.includes('api.qwen.com') ||
+                baseURL.includes('qwen')));
         // Initialize base provider with auth configuration
-        // Check if we should enable OAuth for Qwen
-        // Check OAuth enablement from OAuth manager if available
-        let shouldEnableQwenOAuth = false;
-        if (oauthManager) {
-            // Check if OAuth is enabled for qwen in the OAuth manager (from settings)
-            const manager = oauthManager;
-            if (manager.isOAuthEnabled &&
-                typeof manager.isOAuthEnabled === 'function') {
-                shouldEnableQwenOAuth = manager.isOAuthEnabled('qwen');
-            }
-            // Also enable if this looks like a Qwen endpoint
-            if (!shouldEnableQwenOAuth) {
-                shouldEnableQwenOAuth =
-                    isQwenEndpoint(baseURL || '') ||
-                        (!baseURL && (!apiKey || apiKey === '')) ||
-                        baseURL === 'https://portal.qwen.ai/v1';
-            }
-        }
-        const baseConfig = {
+        super({
             name: 'openai',
-            apiKey,
+            apiKey: normalizedApiKey,
             baseURL,
-            cliKey: !apiKey || apiKey === '' ? undefined : apiKey, // Don't set cliKey if no API key to allow OAuth
-            envKeyNames: ['OPENAI_API_KEY'],
-            isOAuthEnabled: shouldEnableQwenOAuth,
-            oauthProvider: shouldEnableQwenOAuth ? 'qwen' : undefined,
+            envKeyNames: ['OPENAI_API_KEY'], // Support environment variable fallback
+            isOAuthEnabled: isQwenEndpoint && !!oauthManager,
+            oauthProvider: isQwenEndpoint ? 'qwen' : undefined,
             oauthManager,
-        };
-        super(baseConfig);
-        this.logger = new DebugLogger('llxprt:providers:openai');
-        this.logger.debug(() => `Constructor - baseURL: ${baseURL}, apiKey: ${apiKey?.substring(0, 10) || 'none'}, oauthManager: ${!!oauthManager}, shouldEnableQwenOAuth: ${shouldEnableQwenOAuth}`);
-        this.baseURL = baseURL;
-        this.providerConfig = config;
+        }, config);
         this.toolFormatter = new ToolFormatter();
-        this.conversationCache = new ConversationCache();
-        // Initialize from SettingsService
-        this.initializeFromSettings().catch((error) => {
-            this.logger.debug(() => `Failed to initialize from SettingsService: ${error}`);
-        });
-        // Set appropriate default model based on the provider
-        if (shouldEnableQwenOAuth || isQwenEndpoint(baseURL || '')) {
-            // Default to Qwen model when using Qwen endpoints
-            this.currentModel = 'qwen3-coder-plus';
-        }
-        else if (process.env.LLXPRT_DEFAULT_MODEL) {
-            // Use environment variable if set
-            this.currentModel = process.env.LLXPRT_DEFAULT_MODEL;
-        }
-        const clientOptions = {
-            apiKey: apiKey || 'placeholder', // OpenAI client requires a string, use placeholder if OAuth will be used
-            // Allow browser environment if explicitly configured
-            dangerouslyAllowBrowser: config?.allowBrowserEnvironment || false,
-        };
-        // Only include baseURL if it's defined
-        if (baseURL) {
-            clientOptions.baseURL = baseURL;
-        }
-        this.openai = new OpenAI(clientOptions);
-        this._cachedClientKey = apiKey; // Track the initial key used
-        // Cached client reserved for future optimization
-        void this._cachedClient;
-    }
-    /**
-     * Implementation of BaseProvider abstract method
-     * Determines if this provider supports OAuth authentication
-     */
-    supportsOAuth() {
-        // Only support Qwen OAuth for Qwen endpoints
-        // Use baseProviderConfig.baseURL if this.baseURL not set yet (during constructor)
-        const baseURL = this.baseURL ||
-            this.baseProviderConfig.baseURL ||
-            'https://api.openai.com/v1';
-        return isQwenEndpoint(baseURL);
+        // new DebugLogger('llxprt:core:toolformatter'), // TODO: Fix ToolFormatter constructor
+        // Setup debug logger
+        this.logger = new DebugLogger('llxprt:provider:openai');
     }
     /**
-     * Helper method to determine if we're using Qwen (via OAuth or direct endpoint)
+     * Create HTTP/HTTPS agents with socket configuration for local AI servers
+     * Returns undefined if no socket settings are configured
      */
-    isUsingQwen() {
-        // Check if we're using qwen format based on tool format detection
-        const toolFormat = this.detectToolFormat();
-        return toolFormat === 'qwen';
-    }
-    /**
-     * Update the OpenAI client with resolved authentication if needed
-     */
-    async updateClientWithResolvedAuth() {
-        const resolvedKey = await this.getAuthToken();
-        if (!resolvedKey) {
-            // Provide specific error message based on endpoint validation
-            const endpoint = this.baseURL || 'https://api.openai.com/v1';
-            if (this.isOAuthEnabled() && !this.supportsOAuth()) {
-                throw new Error(generateOAuthEndpointMismatchError(endpoint, 'qwen'));
-            }
-            throw new Error('No authentication available for OpenAI API calls');
-        }
-        // Check if we're using Qwen OAuth and need to update the baseURL
-        let effectiveBaseURL = this.baseURL;
-        this.logger.debug(() => `updateClientWithResolvedAuth - OAuth enabled: ${this.isOAuthEnabled()}, OAuth provider: ${this.baseProviderConfig.oauthProvider}, baseURL: ${this.baseURL}, resolvedKey: ${resolvedKey?.substring(0, 10)}...`);
-        if (this.isOAuthEnabled() &&
-            this.baseProviderConfig.oauthProvider === 'qwen') {
-            // Get the OAuth token to check for resource_url
-            const oauthManager = this.baseProviderConfig.oauthManager;
-            if (oauthManager?.getOAuthToken) {
-                const oauthToken = await oauthManager.getOAuthToken('qwen');
-                this.logger.debug(() => `OAuth token retrieved, resource_url: ${oauthToken?.resource_url}, access_token: ${oauthToken?.access_token?.substring(0, 10)}...`);
-                if (oauthToken?.resource_url) {
-                    // Use the resource_url from the OAuth token
-                    effectiveBaseURL = `https://${oauthToken.resource_url}/v1`;
-                    this.logger.debug(() => `Using Qwen OAuth endpoint: ${effectiveBaseURL}`);
-                }
-            }
-        }
-        // Only update client if the key or URL has changed
-        if (this._cachedClientKey !== resolvedKey ||
-            this.baseURL !== effectiveBaseURL) {
-            const clientOptions = {
-                apiKey: resolvedKey,
-                // Allow browser environment if explicitly configured
-                dangerouslyAllowBrowser: this.providerConfig?.allowBrowserEnvironment || false,
-            };
-            // Only include baseURL if it's defined
-            if (effectiveBaseURL) {
-                clientOptions.baseURL = effectiveBaseURL;
-            }
-            this.openai = new OpenAI(clientOptions);
-            this._cachedClientKey = resolvedKey;
-            // Update the baseURL to track changes
-            if (effectiveBaseURL !== this.baseURL) {
-                this.baseURL = effectiveBaseURL;
-            }
-        }
-    }
-    requiresTextToolCallParsing() {
-        if (this.providerConfig?.enableTextToolCallParsing === false) {
-            return false;
-        }
-        // Check if current tool format requires text-based parsing
-        const currentFormat = this.getToolFormat();
-        const textBasedFormats = ['hermes', 'xml', 'llama'];
-        if (textBasedFormats.includes(currentFormat)) {
-            return true;
-        }
-        const configuredModels = this.providerConfig?.textToolCallModels || [];
-        return configuredModels.includes(this.currentModel);
-    }
-    getToolFormat() {
-        // Check manual override first
-        if (this.toolFormatOverride) {
-            return this.toolFormatOverride;
-        }
-        // Check for settings override
-        if (this.providerConfig?.providerToolFormatOverrides?.[this.name]) {
-            return this.providerConfig.providerToolFormatOverrides[this.name];
-        }
-        // Auto-detect tool format based on model or base URL
-        if (this.currentModel.includes('deepseek') ||
-            this.baseURL?.includes('deepseek')) {
-            return 'deepseek';
-        }
-        // Check for Qwen - including OAuth authenticated Qwen
-        if (this.isUsingQwen()) {
-            return 'qwen';
-        }
-        // Default to OpenAI format
-        return 'openai';
-    }
-    shouldUseResponses(model) {
-        // Check env flag override (highest priority)
-        if (process.env.OPENAI_RESPONSES_DISABLE === 'true') {
-            return false;
-        }
-        // Check settings override - if explicitly set to false, always respect that
-        if (this.providerConfig?.openaiResponsesEnabled === false) {
-            return false;
-        }
-        // Never use Responses API for non-OpenAI providers (those with custom base URLs)
-        const baseURL = this.baseURL || 'https://api.openai.com/v1';
-        if (baseURL !== 'https://api.openai.com/v1') {
-            return false;
-        }
-        // Default: Check if model starts with any of the responses API model prefixes
-        return RESPONSES_API_MODELS.some((responsesModel) => model.startsWith(responsesModel));
-    }
-    async callResponsesEndpoint(messages, tools, options) {
-        // Check if API key is available (using resolved authentication)
-        const apiKey = await this.getAuthToken();
-        if (!apiKey) {
-            const endpoint = this.baseURL || 'https://api.openai.com/v1';
-            if (this.isOAuthEnabled() && !this.supportsOAuth()) {
-                throw new Error(generateOAuthEndpointMismatchError(endpoint, 'qwen'));
-            }
-            throw new Error('OpenAI API key is required to make API calls');
-        }
-        // Remove the stateful mode error to allow O3 to work with conversation IDs
-        // Check context usage and warn if getting close to limit
-        if (options?.conversationId && options?.parentId) {
-            const contextInfo = this.estimateContextUsage(options.conversationId, options.parentId, messages);
-            // Warn if less than 4k tokens remaining
-            if (contextInfo.tokensRemaining < 4000) {
-                this.logger.debug(() => `Warning: Only ${contextInfo.tokensRemaining} tokens remaining (${contextInfo.contextUsedPercent.toFixed(1)}% context used). Consider starting a new conversation.`);
-            }
-        }
-        // Check cache for existing conversation
-        if (options?.conversationId && options?.parentId) {
-            const cachedMessages = this.conversationCache.get(options.conversationId, options.parentId);
-            if (cachedMessages) {
-                // Return cached messages as an async iterable
-                return (async function* () {
-                    for (const message of cachedMessages) {
-                        yield message;
-                    }
-                })();
-            }
+    createHttpAgents() {
+        // Get socket configuration from ephemeral settings
+        const settings = this.providerConfig?.getEphemeralSettings?.() || {};
+        // Check if any socket settings are explicitly configured
+        const hasSocketSettings = 'socket-timeout' in settings ||
+            'socket-keepalive' in settings ||
+            'socket-nodelay' in settings;
+        // Only create custom agents if socket settings are configured
+        if (!hasSocketSettings) {
+            return undefined;
         }
-        // Format tools for Responses API
-        const formattedTools = tools
-            ? this.toolFormatter.toResponsesTool(tools)
-            : undefined;
-        // Patch messages to include synthetic responses for cancelled tools
-        const { SyntheticToolResponseHandler } = await import('./syntheticToolResponses.js');
-        const patchedMessages = SyntheticToolResponseHandler.patchMessageHistory(messages);
-        // Build the request
-        const request = buildResponsesRequest({
-            model: this.currentModel,
-            messages: patchedMessages,
-            tools: formattedTools,
-            stream: options?.stream ?? true,
-            conversationId: options?.conversationId,
-            parentId: options?.parentId,
-            tool_choice: options?.tool_choice,
+        // Socket configuration with defaults for when settings ARE configured
+        const socketTimeout = settings['socket-timeout'] || 60000; // 60 seconds default
+        const socketKeepAlive = settings['socket-keepalive'] !== false; // true by default
+        const socketNoDelay = settings['socket-nodelay'] !== false; // true by default
+        // Create HTTP agent with socket options
+        const httpAgent = new http.Agent({
+            keepAlive: socketKeepAlive,
+            keepAliveMsecs: 1000,
+            timeout: socketTimeout,
         });
-        // Make the API call
-        const baseURL = this.baseURL || 'https://api.openai.com/v1';
-        const responsesURL = `${baseURL}/responses`;
-        // Ensure proper UTF-8 encoding for the request body
-        // This is crucial for handling multibyte characters (e.g., Japanese, Chinese)
-        const requestBody = JSON.stringify(request);
-        const bodyBlob = new Blob([requestBody], {
-            type: 'application/json; charset=utf-8',
+        // Create HTTPS agent with socket options
+        const httpsAgent = new https.Agent({
+            keepAlive: socketKeepAlive,
+            keepAliveMsecs: 1000,
+            timeout: socketTimeout,
         });
-        const response = await fetch(responsesURL, {
-            method: 'POST',
-            headers: {
-                Authorization: `Bearer ${apiKey}`,
-                'Content-Type': 'application/json; charset=utf-8',
-            },
-            body: bodyBlob,
-        });
-        // Handle errors
-        if (!response.ok) {
-            const errorBody = await response.text();
-            // Handle 422 context_length_exceeded error
-            if (response.status === 422 &&
-                errorBody.includes('context_length_exceeded')) {
-                this.logger.debug(() => 'Context length exceeded, invalidating cache and retrying stateless...');
-                // Invalidate the cache for this conversation
-                if (options?.conversationId && options?.parentId) {
-                    this.conversationCache.invalidate(options.conversationId, options.parentId);
-                }
-                // Retry without conversation context (pure stateless)
-                const retryRequest = buildResponsesRequest({
-                    model: this.currentModel,
-                    messages,
-                    tools: formattedTools,
-                    stream: options?.stream ?? true,
-                    // Omit conversationId and parentId for stateless retry
-                    tool_choice: options?.tool_choice,
-                });
-                // Ensure proper UTF-8 encoding for retry request as well
-                const retryRequestBody = JSON.stringify(retryRequest);
-                const retryBodyBlob = new Blob([retryRequestBody], {
-                    type: 'application/json; charset=utf-8',
-                });
-                const retryResponse = await fetch(responsesURL, {
-                    method: 'POST',
-                    headers: {
-                        Authorization: `Bearer ${apiKey}`,
-                        'Content-Type': 'application/json; charset=utf-8',
-                    },
-                    body: retryBodyBlob,
-                });
-                if (!retryResponse.ok) {
-                    const retryErrorBody = await retryResponse.text();
-                    throw parseErrorResponse(retryResponse.status, retryErrorBody, this.name);
-                }
-                // Use the retry response
-                return this.handleResponsesApiResponse(retryResponse, messages, undefined, // No conversation context on retry
-                undefined, options?.stream !== false);
-            }
-            throw parseErrorResponse(response.status, errorBody, this.name);
-        }
-        // Handle the response
-        return this.handleResponsesApiResponse(response, messages, options?.conversationId, options?.parentId, options?.stream !== false);
-    }
-    async handleResponsesApiResponse(response, messages, conversationId, parentId, isStreaming) {
-        // Handle streaming response
-        if (isStreaming && response.body) {
-            const collectedMessages = [];
-            const cache = this.conversationCache;
-            return (async function* () {
-                for await (const message of parseResponsesStream(response.body)) {
-                    // Collect messages for caching
-                    if (message.content || message.tool_calls) {
-                        collectedMessages.push(message);
-                    }
-                    else if (message.usage && collectedMessages.length === 0) {
-                        // If we only got a usage message with no content, add a placeholder
-                        collectedMessages.push({
-                            role: ContentGeneratorRole.ASSISTANT,
-                            content: '',
-                        });
-                    }
-                    // Update the parentId in the context as soon as we get a message ID
-                    if (message.id) {
-                        // ConversationContext.setParentId(message.id);
-                        // TODO: Handle parent ID updates when ConversationContext is available
-                    }
-                    yield message;
-                }
-                // Cache the collected messages with token count
-                if (conversationId && parentId && collectedMessages.length > 0) {
-                    // Get previous accumulated tokens
-                    const previousTokens = cache.getAccumulatedTokens(conversationId, parentId);
-                    // Calculate tokens for this request (messages + response)
-                    const requestTokens = estimateMessagesTokens(messages);
-                    const responseTokens = estimateMessagesTokens(collectedMessages);
-                    const totalTokensForRequest = requestTokens + responseTokens;
-                    // Update cache with new accumulated total
-                    cache.set(conversationId, parentId, collectedMessages, previousTokens + totalTokensForRequest);
-                }
-            })();
-        }
-        const data = (await response.json());
-        const resultMessages = [];
-        // DEFENSIVE FIX: Handle potential array response from providers that violate OpenAI spec
-        // Some providers (like Cerebras) may return an array of responses instead of a single response
-        if (Array.isArray(data)) {
-            this.logger.error(() => '[Cerebras Corruption] Detected malformed array response from provider, aggregating...', {
-                provider: this.baseURL,
-                arrayLength: data.length,
-            });
-            const aggregatedContent = [];
-            let aggregatedToolCalls = [];
-            let aggregatedUsage = undefined;
-            for (const item of data) {
-                if (item.choices?.[0]?.message?.content) {
-                    aggregatedContent.push(item.choices[0].message.content);
+        // Apply TCP_NODELAY if enabled (reduces latency for local servers)
+        if (socketNoDelay) {
+            const originalCreateConnection = httpAgent.createConnection;
+            httpAgent.createConnection = function (options, callback) {
+                const socket = originalCreateConnection.call(this, options, callback);
+                if (socket instanceof net.Socket) {
+                    socket.setNoDelay(true);
                 }
-                if (item.choices?.[0]?.message?.tool_calls) {
-                    aggregatedToolCalls = item.choices[0].message.tool_calls;
-                }
-                if (item.usage) {
-                    aggregatedUsage = item.usage;
-                }
-            }
-            const message = {
-                role: ContentGeneratorRole.ASSISTANT,
-                content: aggregatedContent.join(''),
+                return socket;
             };
-            if (aggregatedToolCalls.length > 0) {
-                message.tool_calls = aggregatedToolCalls;
-            }
-            if (aggregatedUsage) {
-                message.usage = {
-                    prompt_tokens: aggregatedUsage.prompt_tokens || 0,
-                    completion_tokens: aggregatedUsage.completion_tokens || 0,
-                    total_tokens: aggregatedUsage.total_tokens || 0,
-                };
-            }
-            resultMessages.push(message);
-            // Convert to async iterator for consistent return type
-            return (async function* () {
-                for (const msg of resultMessages) {
-                    yield msg;
+            const originalHttpsCreateConnection = httpsAgent.createConnection;
+            httpsAgent.createConnection = function (options, callback) {
+                const socket = originalHttpsCreateConnection.call(this, options, callback);
+                if (socket instanceof net.Socket) {
+                    socket.setNoDelay(true);
                 }
-            })();
-        }
-        if (data.choices && data.choices.length > 0) {
-            const choice = data.choices[0];
-            const message = {
-                role: choice.message.role,
-                content: choice.message.content || '',
+                return socket;
             };
-            if (choice.message.tool_calls) {
-                message.tool_calls = choice.message.tool_calls;
-            }
-            if (data.usage) {
-                message.usage = {
-                    prompt_tokens: data.usage.prompt_tokens || 0,
-                    completion_tokens: data.usage.completion_tokens || 0,
-                    total_tokens: data.usage.total_tokens || 0,
-                };
-            }
-            resultMessages.push(message);
         }
-        // Cache the result with token count
-        if (conversationId && parentId && resultMessages.length > 0) {
-            // Get previous accumulated tokens
-            const previousTokens = this.conversationCache.getAccumulatedTokens(conversationId, parentId);
-            // Calculate tokens for this request
-            const requestTokens = estimateMessagesTokens(messages);
-            const responseTokens = estimateMessagesTokens(resultMessages);
-            const totalTokensForRequest = requestTokens + responseTokens;
-            // Update cache with new accumulated total
-            this.conversationCache.set(conversationId, parentId, resultMessages, previousTokens + totalTokensForRequest);
+        return { httpAgent, httpsAgent };
+    }
+    /**
+     * Get or create OpenAI client instance
+     * Will use the API key from resolved auth
+     * @returns OpenAI client instance
+     */
+    async getClient() {
+        const resolvedKey = await this.getAuthToken();
+        // Use the unified getBaseURL() method from BaseProvider
+        const baseURL = this.getBaseURL();
+        const clientKey = `${baseURL}-${resolvedKey}`;
+        // Return cached client if available and auth hasn't changed
+        if (this._cachedClient && this._cachedClientKey === clientKey) {
+            return this._cachedClient;
+        }
+        // Create HTTP agents with socket configuration (if configured)
+        const agents = this.createHttpAgents();
+        // Build client options - OpenAI SDK accepts httpAgent/httpsAgent at runtime
+        // even though they're not in the TypeScript definitions
+        const baseOptions = {
+            apiKey: resolvedKey || '',
+            baseURL,
+        };
+        // Add socket configuration if available
+        const clientOptions = agents
+            ? {
+                ...baseOptions,
+                httpAgent: agents.httpAgent,
+                httpsAgent: agents.httpsAgent,
+            }
+            : baseOptions;
+        // Create new client with current auth and optional socket configuration
+        // Cast to unknown then to the expected type to bypass TypeScript's structural checking
+        this._cachedClient = new OpenAI(clientOptions);
+        this._cachedClientKey = clientKey;
+        return this._cachedClient;
+    }
+    /**
+     * Check if OAuth is supported for this provider
+     * Qwen endpoints support OAuth, standard OpenAI does not
+     */
+    supportsOAuth() {
+        const baseURL = this.getBaseURL();
+        // Check if this is a Qwen endpoint that supports OAuth
+        if (baseURL &&
+            (baseURL.includes('dashscope.aliyuncs.com') ||
+                baseURL.includes('api.qwen.com') ||
+                baseURL.includes('qwen'))) {
+            return true;
         }
-        return (async function* () {
-            for (const message of resultMessages) {
-                yield message;
-            }
-        })();
+        // Standard OpenAI endpoints don't support OAuth
+        return false;
     }
     async getModels() {
-        // Check if API key is available (using resolved authentication)
-        const apiKey = await this.getAuthToken();
-        if (!apiKey) {
-            const endpoint = this.baseURL || 'https://api.openai.com/v1';
-            if (this.isOAuthEnabled() && !this.supportsOAuth()) {
-                throw new Error(generateOAuthEndpointMismatchError(endpoint, 'qwen'));
-            }
-            throw new Error('OpenAI API key is required to fetch models');
-        }
         try {
-            // Get resolved authentication and update client if needed
-            await this.updateClientWithResolvedAuth();
-            const response = await this.openai.models.list();
+            // Always try to fetch models, regardless of auth status
+            // Local endpoints often work without authentication
+            const client = await this.getClient();
+            const response = await client.models.list();
             const models = [];
             for await (const model of response) {
-                // Filter out non-chat models (embeddings, audio, image, moderation, DALL·E, etc.)
+                // Filter out non-chat models (embeddings, audio, image, vision, DALL·E, etc.)
                 if (!/embedding|whisper|audio|tts|image|vision|dall[- ]?e|moderation/i.test(model.id)) {
                     models.push({
                         id: model.id,
@@ -488,1029 +182,452 @@ export class OpenAIProvider extends BaseProvider {
         catch (error) {
             this.logger.debug(() => `Error fetching models from OpenAI: ${error}`);
             // Return a hardcoded list as fallback
-            // Check if this is a Qwen endpoint
-            if (isQwenEndpoint(this.baseURL || '')) {
-                return [
-                    {
-                        id: 'qwen3-coder-plus',
-                        name: 'qwen3-coder-plus',
-                        provider: 'openai',
-                        supportedToolFormats: ['openai'],
-                    },
-                ];
-            }
-            // Default OpenAI models
-            return [
-                {
-                    id: 'gpt-4o',
-                    name: 'gpt-4o',
-                    provider: 'openai',
-                    supportedToolFormats: ['openai'],
-                },
-                {
-                    id: 'gpt-4o-mini',
-                    name: 'gpt-4o-mini',
-                    provider: 'openai',
-                    supportedToolFormats: ['openai'],
-                },
-                {
-                    id: 'gpt-4-turbo',
-                    name: 'gpt-4-turbo',
-                    provider: 'openai',
-                    supportedToolFormats: ['openai'],
-                },
-                {
-                    id: 'gpt-3.5-turbo',
-                    name: 'gpt-3.5-turbo',
-                    provider: 'openai',
-                    supportedToolFormats: ['openai'],
-                },
-            ];
+            return this.getFallbackModels();
         }
     }
-    async *generateChatCompletion(messages, tools, _toolFormat) {
-        // Check if API key is available (using resolved authentication)
-        const apiKey = await this.getAuthToken();
-        if (!apiKey) {
-            const endpoint = this.baseURL || 'https://api.openai.com/v1';
-            if (this.isOAuthEnabled() && !this.supportsOAuth()) {
-                throw new Error(generateOAuthEndpointMismatchError(endpoint, 'qwen'));
-            }
-            throw new Error('OpenAI API key is required to generate completions');
+    getFallbackModels() {
+        return [
+            {
+                id: 'gpt-5',
+                name: 'GPT-5',
+                provider: 'openai',
+                supportedToolFormats: ['openai'],
+            },
+            {
+                id: 'gpt-4.1',
+                name: 'GPT-4.1',
+                provider: 'openai',
+                supportedToolFormats: ['openai'],
+            },
+            {
+                id: 'gpt-4o',
+                name: 'GPT-4o',
+                provider: 'openai',
+                supportedToolFormats: ['openai'],
+            },
+            {
+                id: 'o3',
+                name: 'O3',
+                provider: 'openai',
+                supportedToolFormats: ['openai'],
+            },
+            {
+                id: 'o4-mini',
+                name: 'O4 Mini',
+                provider: 'openai',
+                supportedToolFormats: ['openai'],
+            },
+            {
+                id: 'gpt-3.5-turbo',
+                name: 'GPT-3.5 Turbo (Legacy)',
+                provider: 'openai',
+                supportedToolFormats: ['openai'],
+            },
+        ];
+    }
+    getDefaultModel() {
+        // Return hardcoded default - do NOT call getModel() to avoid circular dependency
+        return process.env.LLXPRT_DEFAULT_MODEL || 'gpt-5';
+    }
+    getServerTools() {
+        // TODO: Implement server tools for OpenAI provider
+        return [];
+    }
+    async invokeServerTool(toolName, _params, _config) {
+        // TODO: Implement server tool invocation for OpenAI provider
+        throw new Error(`Server tool '${toolName}' not supported by OpenAI provider`);
+    }
+    /**
+     * Normalize tool IDs from various formats to OpenAI format
+     * Handles IDs from OpenAI (call_xxx), Anthropic (toolu_xxx), and history (hist_tool_xxx)
+     */
+    normalizeToOpenAIToolId(id) {
+        // If already in OpenAI format, return as-is
+        if (id.startsWith('call_')) {
+            return id;
         }
-        // Check if we should use responses endpoint
-        if (this.shouldUseResponses(this.currentModel)) {
-            // Generate conversation IDs inline (would normally come from application context)
-            const conversationId = undefined;
-            const parentId = undefined;
-            yield* await this.callResponsesEndpoint(messages, tools, {
-                stream: true,
-                tool_choice: tools && tools.length > 0 ? 'auto' : undefined,
-                stateful: false, // Always stateless for Phase 22-01
-                conversationId,
-                parentId,
-            });
-            return;
+        // For history format, extract the UUID and add OpenAI prefix
+        if (id.startsWith('hist_tool_')) {
+            const uuid = id.substring('hist_tool_'.length);
+            return 'call_' + uuid;
         }
-        // Fix messages in place like AnthropicProvider does - this ensures synthetic responses persist
-        // This is critical for preventing 400 errors on subsequent calls with Qwen/Cerebras
-        const missingToolIds = this.identifyAndFixMissingToolResponses(messages);
-        if (missingToolIds.length > 0) {
-            this.logger.debug(() => `[Synthetic] Added ${missingToolIds.length} synthetic responses to conversation history: ${JSON.stringify(missingToolIds)}`);
-            // Log the actual tool calls and their IDs for debugging
-            const assistantMessagesWithTools = messages.filter((m) => m.role === 'assistant' && m.tool_calls && m.tool_calls.length > 0);
-            const lastAssistantWithTools = assistantMessagesWithTools[assistantMessagesWithTools.length - 1];
-            if (lastAssistantWithTools?.tool_calls) {
-                this.logger.debug(() => `[Synthetic] Last assistant tool calls: ${JSON.stringify(lastAssistantWithTools.tool_calls?.map((tc) => ({ id: tc.id, name: tc.function.name })) ?? [])}`);
-            }
+        // For Anthropic format, extract the UUID and add OpenAI prefix
+        if (id.startsWith('toolu_')) {
+            const uuid = id.substring('toolu_'.length);
+            return 'call_' + uuid;
         }
-        // Now messages array has been modified in place with synthetic responses
-        const patchedMessages = messages;
-        // Validate tool messages have required tool_call_id
-        const toolMessages = patchedMessages.filter((msg) => msg.role === 'tool');
-        const missingIds = toolMessages.filter((msg) => !msg.tool_call_id);
-        if (missingIds.length > 0) {
-            this.logger.error(() => `FATAL: Tool messages missing tool_call_id: ${JSON.stringify(missingIds)}`);
-            throw new Error(`OpenAI API requires tool_call_id for all tool messages. Found ${missingIds.length} tool message(s) without IDs.`);
+        // Unknown format - assume it's a raw UUID
+        return 'call_' + id;
+    }
+    /**
+     * Normalize tool IDs from OpenAI format to history format
+     */
+    normalizeToHistoryToolId(id) {
+        // If already in history format, return as-is
+        if (id.startsWith('hist_tool_')) {
+            return id;
         }
-        // Log synthetic responses for debugging
-        const syntheticMessages = patchedMessages.filter((msg) => msg._synthetic);
-        if (syntheticMessages.length > 0) {
-            this.logger.debug(() => `[Synthetic] Added ${syntheticMessages.length} synthetic tool responses`);
+        // For OpenAI format, extract the UUID and add history prefix
+        if (id.startsWith('call_')) {
+            const uuid = id.substring('call_'.length);
+            return 'hist_tool_' + uuid;
         }
-        const parser = this.requiresTextToolCallParsing()
-            ? new GemmaToolCallParser()
-            : null;
-        // Get current tool format (with override support)
-        const currentToolFormat = this.getToolFormat();
-        // Format tools using formatToolsForAPI method
-        const formattedTools = tools ? this.formatToolsForAPI(tools) : undefined;
-        // Get stream_options from ephemeral settings (not model params)
-        const streamOptions = this.providerConfig?.getEphemeralSettings?.()?.['stream-options'];
-        // Default stream_options to { include_usage: true } unless explicitly set
-        const finalStreamOptions = streamOptions !== undefined ? streamOptions : { include_usage: true };
-        // Get streaming setting from ephemeral settings (default: enabled)
-        const streamingSetting = this.providerConfig?.getEphemeralSettings?.()?.['streaming'];
-        let streamingEnabled = streamingSetting !== 'disabled';
-        // Get resolved authentication and update client if needed
-        await this.updateClientWithResolvedAuth();
-        // Strip internal tracking fields that some APIs don't accept
-        // We keep the synthetic responses but remove the metadata fields
-        const cleanedMessages = patchedMessages.map((msg) => {
-            // Create a shallow copy and remove internal fields
-            const { _synthetic, _cancelled, ...cleanMsg } = msg;
-            // Log synthetic tool responses for debugging
-            if (msg._synthetic) {
-                this.logger.debug(() => `[Synthetic Tool Response] ${JSON.stringify(cleanMsg)}`);
-            }
-            return cleanMsg;
+        // For Anthropic format, extract the UUID and add history prefix
+        if (id.startsWith('toolu_')) {
+            const uuid = id.substring('toolu_'.length);
+            return 'hist_tool_' + uuid;
+        }
+        // Unknown format - assume it's a raw UUID
+        return 'hist_tool_' + id;
+    }
+    /**
+     * Generate chat completion with IContent interface
+     * Internally converts to OpenAI API format, but only yields IContent
+     * @param contents Array of content blocks (text and tool_call)
+     * @param tools Array of available tools
+     */
+    async *generateChatCompletion(contents, tools) {
+        // Debug log what we receive
+        this.logger.debug(() => `[OpenAIProvider] generateChatCompletion received tools:`, {
+            hasTools: !!tools,
+            toolsLength: tools?.length,
+            toolsType: typeof tools,
+            isArray: Array.isArray(tools),
+            firstToolName: tools?.[0]?.functionDeclarations?.[0]?.name,
+            toolsStructure: tools
+                ? JSON.stringify(tools).substring(0, 200)
+                : 'undefined',
         });
-        this.logger.debug(() => `About to make API call with model: ${this.currentModel}, baseURL: ${this.openai.baseURL}, apiKey: ${this.openai.apiKey?.substring(0, 10)}..., streaming: ${streamingEnabled}`);
-        // Debug: Log message roles being sent
-        this.logger.debug(() => `Messages being sent to OpenAI (${cleanedMessages.length} total): ${cleanedMessages
-            .map((m) => `${m.role}${m.role === 'system' ? ` (length: ${m.content?.length})` : ''}`)
-            .join(', ')}`);
-        let response;
-        try {
-            // Build request params with exact order from original
-            response = await this.openai.chat.completions.create({
-                model: this.currentModel,
-                messages: cleanedMessages,
-                stream: streamingEnabled,
-                ...(streamingEnabled && finalStreamOptions !== null
-                    ? { stream_options: finalStreamOptions }
-                    : {}),
-                tools: formattedTools,
-                tool_choice: this.getToolChoiceForFormat(tools),
-                ...this.modelParams,
-            });
+        // Pass tools directly in Gemini format - they'll be converted in generateChatCompletionImpl
+        const generator = this.generateChatCompletionImpl(contents, tools, undefined, undefined, undefined);
+        for await (const item of generator) {
+            yield item;
         }
-        catch (error) {
-            // Debug the error
-            this.logger.error(() => `[Cancellation 400] Error caught in API call: ${error}`);
-            this.logger.error(() => `[Cancellation 400] Error type: ${error?.constructor?.name}`);
-            this.logger.error(() => `[Cancellation 400] Error status: ${error?.status || error?.response?.status}`);
-            this.logger.error(() => `[Cancellation 400] Error response data: ${JSON.stringify(error?.response?.data, null, 2)}`);
-            // Log the last few messages to understand what's being sent
-            if (error?.status === 400 ||
-                error?.response?.status === 400) {
-                this.logger.error(() => `[Cancellation 400] Last 5 messages being sent:`);
-                const lastMessages = cleanedMessages.slice(-5);
-                lastMessages.forEach((msg, idx) => {
-                    this.logger.error(() => `  [${cleanedMessages.length - 5 + idx}] ${msg.role}${msg.tool_call_id ? ` (tool response for ${msg.tool_call_id})` : ''}${msg.tool_calls ? ` (${msg.tool_calls.length} tool calls)` : ''}`);
-                    if (msg.tool_calls) {
-                        msg.tool_calls.forEach((tc) => {
-                            this.logger.error(() => `    - Tool call: ${tc.id} -> ${tc.function.name}`);
-                        });
-                    }
-                });
+    }
+    /**
+     * Convert IContent array to OpenAI ChatCompletionMessageParam array
+     */
+    convertToOpenAIMessages(contents) {
+        const messages = [];
+        for (const content of contents) {
+            if (content.speaker === 'human') {
+                // Convert human messages to user messages
+                const textBlocks = content.blocks.filter((b) => b.type === 'text');
+                const text = textBlocks.map((b) => b.text).join('\n');
+                if (text) {
+                    messages.push({
+                        role: 'user',
+                        content: text,
+                    });
+                }
             }
-            // Check for JSONResponse mutation errors
-            const errorMessage = error instanceof Error ? error.message : String(error);
-            if (errorMessage?.includes('JSONResponse') &&
-                errorMessage?.includes('does not support item assignment')) {
-                this.logger.debug(() => '[Cancellation 400] Detected JSONResponse mutation error, retrying without streaming');
-                this.logger.error(() => '[Cerebras Corruption] JSONResponse mutation error detected. This typically occurs with certain providers like Cerebras. Falling back to non-streaming mode.', {
-                    errorMessage,
-                    provider: this.baseURL,
-                    streamingEnabled,
-                });
-                // Retry with streaming disabled
-                response = await this.openai.chat.completions.create({
-                    model: this.currentModel,
-                    messages: cleanedMessages,
-                    stream: false, // Force non-streaming
-                    tools: formattedTools,
-                    tool_choice: this.getToolChoiceForFormat(tools),
-                    ...this.modelParams,
-                });
-                // Override streamingEnabled for the rest of this function
-                streamingEnabled = false;
+            else if (content.speaker === 'ai') {
+                // Convert AI messages
+                const textBlocks = content.blocks.filter((b) => b.type === 'text');
+                const toolCalls = content.blocks.filter((b) => b.type === 'tool_call');
+                if (toolCalls.length > 0) {
+                    // Assistant message with tool calls
+                    const text = textBlocks.map((b) => b.text).join('\n');
+                    messages.push({
+                        role: 'assistant',
+                        content: text || null,
+                        tool_calls: toolCalls.map((tc) => ({
+                            id: this.normalizeToOpenAIToolId(tc.id),
+                            type: 'function',
+                            function: {
+                                name: tc.name,
+                                arguments: typeof tc.parameters === 'string'
+                                    ? tc.parameters
+                                    : JSON.stringify(tc.parameters),
+                            },
+                        })),
+                    });
+                }
+                else if (textBlocks.length > 0) {
+                    // Plain assistant message
+                    const text = textBlocks.map((b) => b.text).join('\n');
+                    messages.push({
+                        role: 'assistant',
+                        content: text,
+                    });
+                }
             }
-            else {
-                this.logger.debug(() => '[Cancellation 400] Re-throwing error (not a JSONResponse mutation)');
-                // Re-throw other errors
-                throw error;
+            else if (content.speaker === 'tool') {
+                // Convert tool responses
+                const toolResponses = content.blocks.filter((b) => b.type === 'tool_response');
+                for (const tr of toolResponses) {
+                    messages.push({
+                        role: 'tool',
+                        content: typeof tr.result === 'string'
+                            ? tr.result
+                            : JSON.stringify(tr.result),
+                        tool_call_id: this.normalizeToOpenAIToolId(tr.callId),
+                    });
+                }
             }
         }
-        let fullContent = '';
-        const accumulatedToolCalls = [];
-        let hasStreamedContent = false;
-        let usageData;
-        // For Qwen streaming, buffer whitespace-only chunks to preserve spacing across chunk boundaries
-        let pendingWhitespace = null;
-        // Handle streaming vs non-streaming response
-        if (streamingEnabled) {
-            // We need to buffer all chunks to detect and handle malformed streams
-            // Some providers (like Cerebras) send message format instead of delta
-            const allChunks = [];
-            this.logger.debug(() => '[Stream Detection] Starting to buffer chunks for corruption detection', {
-                provider: this.baseURL,
+        return messages;
+    }
+    /**
+     * Internal implementation for chat completion
+     */
+    async *generateChatCompletionImpl(contents, tools, maxTokens, abortSignal, modelName) {
+        // Always look up model from SettingsService
+        const model = modelName || this.getModel() || this.getDefaultModel();
+        // Convert IContent to OpenAI messages format
+        const messages = this.convertToOpenAIMessages(contents);
+        // Debug log what we're about to convert
+        this.logger.debug(() => `[OpenAIProvider] Before convertGeminiToOpenAI:`, {
+            inputTools: tools ? JSON.stringify(tools).substring(0, 500) : 'undefined',
+            hasTools: !!tools,
+            toolsLength: tools?.length,
+            firstToolStructure: tools?.[0]
+                ? JSON.stringify(tools[0]).substring(0, 300)
+                : 'undefined',
+        });
+        // Convert Gemini format tools directly to OpenAI format using the new method
+        const formattedTools = this.toolFormatter.convertGeminiToOpenAI(tools);
+        // Debug log the conversion result
+        this.logger.debug(() => `[OpenAIProvider] After convertGeminiToOpenAI:`, {
+            inputHadTools: !!tools,
+            outputHasTools: !!formattedTools,
+            outputToolsLength: formattedTools?.length,
+            outputFirstTool: formattedTools?.[0],
+            outputToolNames: formattedTools?.map((t) => t.function.name),
+            firstToolParameters: formattedTools?.[0]
+                ? JSON.stringify(formattedTools[0].function.parameters)
+                : 'undefined',
+        });
+        // Get streaming setting from ephemeral settings (default: enabled)
+        const streamingSetting = this.providerConfig?.getEphemeralSettings?.()?.['streaming'];
+        const streamingEnabled = streamingSetting !== 'disabled';
+        // Get the system prompt
+        const userMemory = this.globalConfig?.getUserMemory
+            ? this.globalConfig.getUserMemory()
+            : '';
+        const systemPrompt = await getCoreSystemPromptAsync(userMemory, model, undefined);
+        // Add system prompt as the first message in the array
+        const messagesWithSystem = [
+            { role: 'system', content: systemPrompt },
+            ...messages,
+        ];
+        // Build request - only include tools if they exist and are not empty
+        const requestBody = {
+            model,
+            messages: messagesWithSystem,
+            ...(formattedTools && formattedTools.length > 0
+                ? { tools: formattedTools }
+                : {}),
+            max_tokens: maxTokens,
+            stream: streamingEnabled,
+        };
+        // Debug log the full request for Cerebras/Qwen
+        if (model.toLowerCase().includes('qwen') ||
+            this.getBaseURL()?.includes('cerebras')) {
+            this.logger.debug(() => `Full request to ${this.getBaseURL()} for model ${model}:`, {
+                baseURL: this.getBaseURL(),
+                model,
                 streamingEnabled,
+                hasTools: 'tools' in requestBody,
+                toolCount: formattedTools?.length || 0,
+                messageCount: messages.length,
+                toolsInRequest: 'tools' in requestBody ? requestBody.tools?.length : 'not included',
+                requestBody: {
+                    ...requestBody,
+                    messages: messages.slice(-2), // Only log last 2 messages for brevity
+                    tools: requestBody.tools?.slice(0, 2), // Only log first 2 tools for brevity if they exist
+                },
+            });
+        }
+        // Get OpenAI client
+        const client = await this.getClient();
+        // Wrap the API call with retry logic
+        const makeApiCall = async () => {
+            const response = await client.chat.completions.create(requestBody, {
+                signal: abortSignal,
             });
+            return response;
+        };
+        let retryCount = 0;
+        const maxRetries = 5;
+        let response;
+        while (retryCount <= maxRetries) {
             try {
-                for await (const chunk of response) {
-                    // CRITICAL: Create a deep copy to avoid JSONResponse mutation issues
-                    // Cerebras and other providers may return immutable JSONResponse objects
-                    // Cast to unknown first to bypass type checking, then to our extended type
-                    const extendedChunk = chunk;
-                    const safeChunk = {
-                        choices: extendedChunk.choices?.map((choice) => ({
-                            delta: choice.delta
-                                ? {
-                                    content: choice.delta.content ?? undefined,
-                                    role: choice.delta.role,
-                                    tool_calls: choice.delta.tool_calls?.map((tc, idx) => ({
-                                        id: tc.id,
-                                        type: tc.type,
-                                        function: tc.function
-                                            ? {
-                                                name: tc.function.name,
-                                                arguments: tc.function.arguments,
-                                            }
-                                            : undefined,
-                                        index: tc.index !== undefined ? tc.index : idx,
-                                    })),
-                                }
-                                : undefined,
-                            message: choice.message
-                                ? {
-                                    content: choice.message.content ?? undefined,
-                                    role: choice.message.role,
-                                    tool_calls: choice.message.tool_calls?.map((tc) => ({
-                                        id: tc.id,
-                                        type: tc.type,
-                                        function: tc.function
-                                            ? {
-                                                name: tc.function.name,
-                                                arguments: tc.function.arguments,
-                                            }
-                                            : undefined,
-                                    })),
-                                }
-                                : undefined,
-                            index: choice.index,
-                            finish_reason: choice.finish_reason,
-                        })),
-                        usage: extendedChunk.usage
-                            ? {
-                                prompt_tokens: extendedChunk.usage.prompt_tokens,
-                                completion_tokens: extendedChunk.usage.completion_tokens,
-                                total_tokens: extendedChunk.usage.total_tokens,
-                            }
-                            : undefined,
-                    };
-                    allChunks.push(safeChunk);
-                }
+                response = await makeApiCall();
+                break; // Success, exit retry loop
             }
             catch (error) {
-                // Handle JSONResponse mutation errors that occur during iteration
-                const errorMessage = error instanceof Error ? error.message : String(error);
-                if (errorMessage?.includes('JSONResponse') &&
-                    errorMessage?.includes('does not support item assignment')) {
-                    this.logger.error(() => '[Cerebras Corruption] JSONResponse mutation error during stream iteration. This is a known issue with Cerebras. The OpenAI client library is trying to mutate immutable response objects. Falling back to non-streaming mode.', {
-                        error: errorMessage,
-                        provider: this.baseURL,
-                        chunksCollected: allChunks.length,
-                    });
-                    // Retry the entire request with streaming disabled
-                    // This is the nuclear option but ensures we get a response
-                    const nonStreamingResponse = await this.openai.chat.completions.create({
-                        model: this.currentModel,
-                        messages: cleanedMessages,
-                        stream: false, // Force non-streaming
-                        tools: formattedTools,
-                        tool_choice: this.getToolChoiceForFormat(tools),
-                        ...this.modelParams,
-                    });
-                    // Handle as non-streaming response
-                    const completionResponse = nonStreamingResponse;
-                    const choice = completionResponse.choices[0];
-                    if (choice?.message.content) {
-                        fullContent = choice.message.content;
-                    }
-                    if (choice?.message.tool_calls) {
-                        for (const toolCall of choice.message.tool_calls) {
-                            if (toolCall.type === 'function' && toolCall.function) {
-                                accumulatedToolCalls.push({
-                                    id: toolCall.id,
-                                    type: 'function',
-                                    function: toolCall.function,
-                                });
-                            }
-                        }
-                    }
-                    if (completionResponse.usage) {
-                        usageData = {
-                            prompt_tokens: completionResponse.usage.prompt_tokens,
-                            completion_tokens: completionResponse.usage.completion_tokens,
-                            total_tokens: completionResponse.usage.total_tokens,
-                        };
-                    }
-                    // Yield the complete response
-                    yield {
-                        role: ContentGeneratorRole.ASSISTANT,
-                        content: fullContent || '',
-                        tool_calls: accumulatedToolCalls.length > 0
-                            ? accumulatedToolCalls
-                            : undefined,
-                        usage: usageData,
-                    };
-                    return;
+                if (retryCount === maxRetries) {
+                    throw error; // Max retries reached, re-throw error
                 }
-                // Re-throw other errors
-                throw error;
+                retryCount++;
+                this.logger.debug(() => `API call failed (attempt ${retryCount}), retrying...`, error);
+                // Exponential backoff: 4s, 8s, 16s, 32s, 64s
+                const delay = 4000 * Math.pow(2, retryCount - 1);
+                await new Promise((resolve) => setTimeout(resolve, delay));
             }
-            // Check first chunk to see if we have malformed stream
-            let detectedMalformedStream = false;
-            if (allChunks.length > 0) {
-                const firstChunk = allChunks[0];
-                if (firstChunk.choices?.[0]?.message &&
-                    !firstChunk.choices?.[0]?.delta) {
-                    detectedMalformedStream = true;
-                    this.logger.debug(() => 'Detected malformed stream (message instead of delta), using aggregation mode');
-                }
-            }
-            // If we detected issues, aggregate everything
-            if (detectedMalformedStream) {
-                const contentParts = [];
-                let aggregatedToolCalls = [];
-                let finalUsageData = undefined;
-                // Process all buffered chunks
-                for (const chunk of allChunks) {
-                    const message = chunk.choices?.[0]?.message || chunk.choices?.[0]?.delta;
-                    if (message?.content) {
-                        contentParts.push(message.content);
-                    }
-                    if (message?.tool_calls) {
-                        // Ensure tool_calls match the expected format
-                        aggregatedToolCalls = message.tool_calls.map((tc) => ({
-                            id: tc.id || `call_${Date.now()}`,
-                            type: (tc.type || 'function'),
-                            function: {
-                                name: tc.function?.name || '',
-                                arguments: tc.function?.arguments || '',
-                            },
-                        }));
-                    }
-                    if (chunk.usage) {
-                        finalUsageData = {
-                            prompt_tokens: chunk.usage.prompt_tokens || 0,
-                            completion_tokens: chunk.usage.completion_tokens || 0,
-                            total_tokens: chunk.usage.total_tokens || 0,
-                        };
+        }
+        if (!response) {
+            throw new Error('Failed to get response after retries');
+        }
+        // Check if response is streaming or not
+        if (streamingEnabled) {
+            // Process streaming response
+            let _accumulatedText = '';
+            const accumulatedToolCalls = [];
+            try {
+                // Handle streaming response
+                for await (const chunk of response) {
+                    if (abortSignal?.aborted) {
+                        break;
                     }
-                }
-                // Yield single reconstructed message
-                yield {
-                    role: ContentGeneratorRole.ASSISTANT,
-                    content: contentParts.join(''),
-                    tool_calls: aggregatedToolCalls.length > 0 ? aggregatedToolCalls : undefined,
-                    usage: finalUsageData,
-                };
-                return;
-            }
-            // Process chunks normally - stream them as they come
-            for (const chunk of allChunks) {
-                // Since we created safe copies during buffering, chunks are now mutable
-                // Check if this chunk has message format instead of delta (malformed stream)
-                let processedChunk = chunk;
-                if (chunk.choices?.[0]?.message && !chunk.choices?.[0]?.delta) {
-                    this.logger.error(() => '[Cerebras Corruption] Converting malformed chunk from message to delta format', {
-                        provider: this.baseURL,
-                        hasMessage: true,
-                        hasDelta: false,
-                        messageContent: chunk.choices[0].message?.content?.substring(0, 100),
-                    });
-                    // Convert message format to delta format for consistent processing
-                    const message = chunk.choices[0].message;
-                    processedChunk = {
-                        choices: [
-                            {
-                                delta: {
-                                    content: message?.content ?? undefined,
-                                    role: message?.role,
-                                    tool_calls: message?.tool_calls,
+                    const choice = chunk.choices?.[0];
+                    if (!choice)
+                        continue;
+                    // Handle text content - emit immediately without buffering
+                    const deltaContent = choice.delta?.content;
+                    if (deltaContent) {
+                        _accumulatedText += deltaContent;
+                        // Emit text immediately without buffering
+                        yield {
+                            speaker: 'ai',
+                            blocks: [
+                                {
+                                    type: 'text',
+                                    text: deltaContent,
                                 },
-                            },
-                        ],
-                        usage: chunk.usage,
-                    };
-                }
-                const delta = processedChunk.choices?.[0]?.delta;
-                if (delta?.content) {
-                    // Enhanced debug logging to understand streaming behavior
-                    if (this.isUsingQwen()) {
-                        this.logger.debug(() => `Chunk: ${JSON.stringify({
-                            content: delta.content,
-                            contentLength: delta.content?.length ?? 0,
-                            isWhitespaceOnly: delta.content?.trim() === '',
-                            chunkIndex: 0,
-                        })}`);
+                            ],
+                        };
                     }
-                    // For text-based models, don't yield content chunks yet
-                    if (!parser && delta.content) {
-                        if (this.isUsingQwen()) {
-                            const isWhitespaceOnly = delta.content.trim() === '';
-                            if (isWhitespaceOnly) {
-                                // Buffer whitespace-only chunk
-                                pendingWhitespace = (pendingWhitespace || '') + delta.content;
-                                this.logger.debug(() => `Buffered whitespace-only chunk (len=${delta.content?.length ?? 0}). pendingWhitespace now len=${pendingWhitespace?.length ?? 0}`);
+                    // Handle tool calls
+                    const deltaToolCalls = choice.delta?.tool_calls;
+                    if (deltaToolCalls && deltaToolCalls.length > 0) {
+                        for (const deltaToolCall of deltaToolCalls) {
+                            if (deltaToolCall.index === undefined)
                                 continue;
-                            }
-                            else if (pendingWhitespace) {
-                                // Flush buffered whitespace before non-empty chunk to preserve spacing
-                                this.logger.debug(() => `Flushing pending whitespace (len=${pendingWhitespace?.length ?? 0}) before non-empty chunk`);
-                                yield {
-                                    role: ContentGeneratorRole.ASSISTANT,
-                                    content: pendingWhitespace,
+                            // Initialize or update accumulated tool call
+                            if (!accumulatedToolCalls[deltaToolCall.index]) {
+                                accumulatedToolCalls[deltaToolCall.index] = {
+                                    id: deltaToolCall.id || '',
+                                    type: 'function',
+                                    function: {
+                                        name: deltaToolCall.function?.name || '',
+                                        arguments: '',
+                                    },
                                 };
-                                hasStreamedContent = true;
-                                fullContent += pendingWhitespace;
-                                pendingWhitespace = null;
+                            }
+                            const tc = accumulatedToolCalls[deltaToolCall.index];
+                            if (tc) {
+                                if (deltaToolCall.id)
+                                    tc.id = deltaToolCall.id;
+                                if (deltaToolCall.function?.name)
+                                    tc.function.name = deltaToolCall.function.name;
+                                if (deltaToolCall.function?.arguments) {
+                                    tc.function.arguments += deltaToolCall.function.arguments;
+                                }
                             }
                         }
-                        yield {
-                            role: ContentGeneratorRole.ASSISTANT,
-                            content: delta.content,
-                        };
-                        hasStreamedContent = true;
-                    }
-                    fullContent += delta.content;
-                }
-                if (delta?.tool_calls) {
-                    for (const toolCall of delta.tool_calls) {
-                        this.toolFormatter.accumulateStreamingToolCall(toolCall, accumulatedToolCalls, currentToolFormat);
                     }
                 }
-                // Check for usage data in the chunk
-                if (processedChunk.usage) {
-                    usageData = {
-                        prompt_tokens: processedChunk.usage.prompt_tokens || 0,
-                        completion_tokens: processedChunk.usage.completion_tokens || 0,
-                        total_tokens: processedChunk.usage.total_tokens || 0,
-                    };
-                }
             }
-        }
-        else {
-            // Non-streaming response - handle as a single completion
-            const completionResponse = response;
-            const choice = completionResponse.choices[0];
-            if (choice?.message.content) {
-                fullContent = choice.message.content;
-            }
-            if (choice?.message.tool_calls) {
-                // Convert tool calls to the standard format
-                for (const toolCall of choice.message.tool_calls) {
-                    if (toolCall.type === 'function' && toolCall.function) {
-                        // Don't fix double stringification here - it's handled later in the final processing
-                        accumulatedToolCalls.push({
-                            id: toolCall.id,
-                            type: 'function',
-                            function: toolCall.function,
-                        });
-                    }
+            catch (error) {
+                if (abortSignal?.aborted) {
+                    throw error;
+                }
+                else {
+                    this.logger.error('Error processing streaming response:', error);
+                    throw error;
                 }
             }
-            if (completionResponse.usage) {
-                usageData = {
-                    prompt_tokens: completionResponse.usage.prompt_tokens,
-                    completion_tokens: completionResponse.usage.completion_tokens,
-                    total_tokens: completionResponse.usage.total_tokens,
-                };
-            }
-            // For non-streaming, we yield the full content at once if there's no parser
-            if (!parser && fullContent) {
-                yield {
-                    role: ContentGeneratorRole.ASSISTANT,
-                    content: fullContent,
-                };
-                hasStreamedContent = true;
-            }
-        }
-        // Flush any remaining pending whitespace for Qwen
-        if (pendingWhitespace && this.isUsingQwen() && !parser) {
-            this.logger.debug(() => `Flushing trailing pending whitespace (len=${pendingWhitespace?.length ?? 0}) at stream end`);
-            yield {
-                role: ContentGeneratorRole.ASSISTANT,
-                content: pendingWhitespace,
-            };
-            hasStreamedContent = true;
-            fullContent += pendingWhitespace;
-            pendingWhitespace = null;
-        }
-        // After stream ends, parse text-based tool calls if needed
-        if (parser && fullContent) {
-            const { cleanedContent, toolCalls } = parser.parse(fullContent);
-            if (toolCalls.length > 0) {
-                // Convert to standard format
-                const standardToolCalls = toolCalls.map((tc, index) => ({
-                    id: `call_${Date.now()}_${index}`,
-                    type: 'function',
-                    function: {
-                        name: tc.name,
-                        arguments: JSON.stringify(tc.arguments),
-                    },
-                }));
-                yield {
-                    role: ContentGeneratorRole.ASSISTANT,
-                    content: cleanedContent,
-                    tool_calls: standardToolCalls,
-                    usage: usageData,
-                };
-            }
-            else {
-                // No tool calls found, yield cleaned content
-                yield {
-                    role: ContentGeneratorRole.ASSISTANT,
-                    content: cleanedContent,
-                    usage: usageData,
-                };
-            }
-        }
-        else {
-            // Standard OpenAI tool call handling
+            // No need to flush buffer since we're emitting immediately
+            // Emit accumulated tool calls as IContent if any
             if (accumulatedToolCalls.length > 0) {
-                // Fix double stringification for Qwen tool calls
-                // Qwen models pre-stringify arguments values, but later in the process
-                // they are being JSON.stringify'd again
-                let fixedToolCalls = accumulatedToolCalls;
-                if (this.isUsingQwen()) {
-                    this.logger.debug(() => `[Qwen Fix] Processing ${accumulatedToolCalls.length} tool calls for double-stringification fix`);
-                    fixedToolCalls = accumulatedToolCalls.map((toolCall, index) => {
-                        this.logger.debug(() => `[Qwen Fix] Tool call ${index}: ${JSON.stringify({
-                            name: toolCall.function.name,
-                            argumentsType: typeof toolCall.function.arguments,
-                            argumentsLength: toolCall.function.arguments?.length,
-                            argumentsSample: toolCall.function.arguments?.substring(0, 100),
-                        })}`);
-                        // For Qwen, check for nested double-stringification
-                        // Qwen models stringify array/object values WITHIN the JSON arguments
-                        if (toolCall.function.arguments &&
-                            typeof toolCall.function.arguments === 'string') {
-                            try {
-                                // First, parse the arguments to get the JSON object
-                                const parsedArgs = JSON.parse(toolCall.function.arguments);
-                                let hasNestedStringification = false;
-                                // Check each property to see if it's a stringified array/object
-                                const fixedArgs = {};
-                                for (const [key, value] of Object.entries(parsedArgs)) {
-                                    if (typeof value === 'string') {
-                                        const trimmed = value.trim();
-                                        // Check if it looks like a stringified array or object
-                                        // Also check for Python-style dictionaries with single quotes
-                                        if ((trimmed.startsWith('[') && trimmed.endsWith(']')) ||
-                                            (trimmed.startsWith('{') && trimmed.endsWith('}'))) {
-                                            try {
-                                                // Try to parse it as JSON
-                                                const nestedParsed = JSON.parse(value);
-                                                fixedArgs[key] = nestedParsed;
-                                                hasNestedStringification = true;
-                                                this.logger.debug(() => `[Qwen Fix] Fixed nested stringification in property '${key}' for ${toolCall.function.name}`);
-                                            }
-                                            catch {
-                                                // Try to convert Python-style to JSON (single quotes to double quotes)
-                                                try {
-                                                    const jsonified = value
-                                                        .replace(/'/g, '"')
-                                                        .replace(/: True/g, ': true')
-                                                        .replace(/: False/g, ': false')
-                                                        .replace(/: None/g, ': null');
-                                                    const nestedParsed = JSON.parse(jsonified);
-                                                    fixedArgs[key] = nestedParsed;
-                                                    hasNestedStringification = true;
-                                                    this.logger.debug(() => `[Qwen Fix] Fixed Python-style nested stringification in property '${key}' for ${toolCall.function.name}`);
-                                                }
-                                                catch {
-                                                    // Not valid JSON even after conversion, keep as string
-                                                    fixedArgs[key] = value;
-                                                }
-                                            }
-                                        }
-                                        else {
-                                            fixedArgs[key] = value;
-                                        }
-                                    }
-                                    else {
-                                        fixedArgs[key] = value;
-                                    }
-                                }
-                                if (hasNestedStringification) {
-                                    this.logger.debug(() => `[Qwen Fix] Fixed nested double-stringification for ${toolCall.function.name}`);
-                                    return {
-                                        ...toolCall,
-                                        function: {
-                                            ...toolCall.function,
-                                            arguments: JSON.stringify(fixedArgs),
-                                        },
-                                    };
-                                }
-                            }
-                            catch (_e) {
-                                // If parsing fails, check for old-style double-stringification
-                                if (toolCall.function.arguments.startsWith('"') &&
-                                    toolCall.function.arguments.endsWith('"')) {
-                                    try {
-                                        // Old fix: entire arguments were double-stringified
-                                        const parsedArgs = JSON.parse(toolCall.function.arguments);
-                                        this.logger.debug(() => `[Qwen Fix] Fixed whole-argument double-stringification for ${toolCall.function.name}`);
-                                        return {
-                                            ...toolCall,
-                                            function: {
-                                                ...toolCall.function,
-                                                arguments: JSON.stringify(parsedArgs),
-                                            },
-                                        };
-                                    }
-                                    catch {
-                                        // Leave as-is if we can't parse
-                                    }
-                                }
-                            }
-                        }
-                        // No fix needed
-                        this.logger.debug(() => `[Qwen Fix] No double-stringification detected for ${toolCall.function.name}, keeping original`);
-                        return toolCall;
+                const blocks = [];
+                const detectedFormat = this.detectToolFormat();
+                for (const tc of accumulatedToolCalls) {
+                    if (!tc)
+                        continue;
+                    // Process tool parameters with double-escape handling
+                    const processedParameters = processToolParameters(tc.function.arguments || '', tc.function.name || '', detectedFormat);
+                    blocks.push({
+                        type: 'tool_call',
+                        id: this.normalizeToHistoryToolId(tc.id),
+                        name: tc.function.name || '',
+                        parameters: processedParameters,
                     });
                 }
-                if (this.isUsingQwen()) {
-                    this.logger.debug(() => `Final message with tool calls: ${JSON.stringify({
-                        contentLength: fullContent.length,
-                        content: fullContent.substring(0, 200) +
-                            (fullContent.length > 200 ? '...' : ''),
-                        toolCallCount: accumulatedToolCalls.length,
-                        hasStreamedContent,
-                    })}`);
-                }
-                // For Qwen models, don't duplicate content if we've already streamed it
-                // BUT Cerebras needs at least a space to continue after tool responses
-                const isCerebras = this.baseURL?.toLowerCase().includes('cerebras.ai');
-                if (isCerebras) {
-                    this.logger.debug(() => '[Cerebras] Special handling for Cerebras provider after tool responses', {
-                        hasStreamedContent,
-                        willSendSpace: hasStreamedContent,
-                    });
-                }
-                const shouldOmitContent = hasStreamedContent && this.isUsingQwen() && !isCerebras;
-                if (shouldOmitContent) {
-                    // Only yield tool calls with empty content to avoid duplication
-                    yield {
-                        role: ContentGeneratorRole.ASSISTANT,
-                        content: '',
-                        tool_calls: fixedToolCalls,
-                        usage: usageData,
-                    };
-                }
-                else if (isCerebras && hasStreamedContent) {
-                    // Cerebras: Send just a space to prevent duplication but allow continuation
-                    // This prevents the repeated "Let me search..." text
-                    this.logger.debug(() => '[Cerebras] Sending minimal space content to prevent duplication');
-                    yield {
-                        role: ContentGeneratorRole.ASSISTANT,
-                        content: ' ', // Single space instead of full content
-                        tool_calls: fixedToolCalls,
-                        usage: usageData,
-                    };
-                }
-                else {
-                    // Include full content with tool calls
+                if (blocks.length > 0) {
                     yield {
-                        role: ContentGeneratorRole.ASSISTANT,
-                        content: fullContent || '',
-                        tool_calls: fixedToolCalls,
-                        usage: usageData,
+                        speaker: 'ai',
+                        blocks,
                     };
                 }
             }
-            else if (usageData) {
-                // Always emit usage data so downstream consumers can update stats
-                yield {
-                    role: ContentGeneratorRole.ASSISTANT,
-                    content: '',
-                    usage: usageData,
-                };
-            }
         }
-    }
-    setModel(modelId) {
-        // Update SettingsService as the source of truth
-        this.setModelInSettings(modelId).catch((error) => {
-            this.logger.debug(() => `Failed to persist model to SettingsService: ${error}`);
-        });
-        // Keep local cache for performance
-        this.currentModel = modelId;
-    }
-    getCurrentModel() {
-        // Try to get from SettingsService first (source of truth)
-        try {
-            const settingsService = getSettingsService();
-            const providerSettings = settingsService.getProviderSettings(this.name);
-            if (providerSettings.model) {
-                return providerSettings.model;
+        else {
+            // Handle non-streaming response
+            const completion = response;
+            const choice = completion.choices?.[0];
+            if (!choice) {
+                throw new Error('No choices in completion response');
+            }
+            const blocks = [];
+            // Handle text content
+            if (choice.message?.content) {
+                blocks.push({
+                    type: 'text',
+                    text: choice.message.content,
+                });
             }
-        }
-        catch (error) {
-            this.logger.debug(() => `Failed to get model from SettingsService: ${error}`);
-        }
-        // Fall back to cached value or default
-        return this.currentModel || this.getDefaultModel();
-    }
-    getDefaultModel() {
-        // Return the default model for this provider
-        // This can be overridden based on configuration or endpoint
-        if (this.isUsingQwen()) {
-            return 'qwen3-coder-plus';
-        }
-        return process.env.LLXPRT_DEFAULT_MODEL || 'gpt-5';
-    }
-    setApiKey(apiKey) {
-        // Call base provider implementation
-        super.setApiKey?.(apiKey);
-        // Persist to SettingsService if available
-        this.setApiKeyInSettings(apiKey).catch((error) => {
-            this.logger.debug(() => `Failed to persist API key to SettingsService: ${error}`);
-        });
-        // Create a new OpenAI client with the updated API key
-        const clientOptions = {
-            apiKey,
-            dangerouslyAllowBrowser: this.providerConfig?.allowBrowserEnvironment || false,
-        };
-        // Only include baseURL if it's defined
-        if (this.baseURL) {
-            clientOptions.baseURL = this.baseURL;
-        }
-        this.openai = new OpenAI(clientOptions);
-        this._cachedClientKey = apiKey; // Update cached key
-    }
-    setBaseUrl(baseUrl) {
-        // If no baseUrl is provided, clear to default (undefined)
-        this.baseURL = baseUrl && baseUrl.trim() !== '' ? baseUrl : undefined;
-        // Persist to SettingsService if available
-        this.setBaseUrlInSettings(this.baseURL).catch((error) => {
-            this.logger.debug(() => `Failed to persist base URL to SettingsService: ${error}`);
-        });
-        // Update OAuth configuration based on endpoint validation
-        // Enable OAuth for Qwen endpoints if we have an OAuth manager
-        const shouldEnableQwenOAuth = !!this.baseProviderConfig.oauthManager &&
-            (isQwenEndpoint(this.baseURL || '') ||
-                this.baseURL === 'https://portal.qwen.ai/v1');
-        this.updateOAuthConfig(shouldEnableQwenOAuth, shouldEnableQwenOAuth ? 'qwen' : undefined, this.baseProviderConfig.oauthManager);
-        // Call base provider implementation
-        super.setBaseUrl?.(baseUrl);
-        // Create a new OpenAI client with the updated (or cleared) base URL
-        const clientOptions = {
-            // Use existing key or empty string as placeholder
-            apiKey: this._cachedClientKey || 'placeholder',
-            dangerouslyAllowBrowser: this.providerConfig?.allowBrowserEnvironment || false,
-        };
-        // Only include baseURL if it's defined
-        if (this.baseURL) {
-            clientOptions.baseURL = this.baseURL;
-        }
-        this.openai = new OpenAI(clientOptions);
-        // Clear cached key to force re-resolution on next API call
-        this._cachedClientKey = undefined;
-    }
-    setConfig(config) {
-        this.providerConfig = config;
-    }
-    setToolFormatOverride(format) {
-        this.toolFormatOverride = format || undefined;
-    }
-    /**
-     * Estimates the remote context usage for the current conversation
-     * @param conversationId The conversation ID
-     * @param parentId The parent message ID
-     * @param promptMessages The messages being sent in the current prompt
-     * @returns Context usage information including remote tokens
-     */
-    estimateContextUsage(conversationId, parentId, promptMessages) {
-        const promptTokens = estimateMessagesTokens(promptMessages);
-        return estimateRemoteTokens(this.currentModel, this.conversationCache, conversationId, parentId, promptTokens);
-    }
-    /**
-     * Get the conversation cache instance
-     * @returns The conversation cache
-     */
-    getConversationCache() {
-        return this.conversationCache;
-    }
-    /**
-     * Identifies and fixes missing tool responses by adding synthetic responses in place.
-     * Similar to AnthropicProvider's validateAndFixMessages approach.
-     * This ensures synthetic responses persist in the conversation history.
-     * @param messages The message array to fix in place
-     * @returns Array of tool call IDs that were fixed
-     */
-    identifyAndFixMissingToolResponses(messages) {
-        const fixedIds = [];
-        const pendingToolCalls = [];
-        // Process messages in order, tracking tool calls and responses
-        for (let i = 0; i < messages.length; i++) {
-            const msg = messages[i];
-            if (msg.role === 'assistant' && msg.tool_calls) {
-                // If we have pending tool calls from a previous assistant message,
-                // add synthetic responses for them before processing this new assistant message
-                if (pendingToolCalls.length > 0) {
-                    const syntheticResponses = pendingToolCalls.map((tc) => ({
-                        role: 'tool',
-                        tool_call_id: tc.id,
-                        content: 'Tool execution cancelled by user',
-                        _synthetic: true,
-                        _cancelled: true,
-                    }));
-                    // Insert synthetic responses before the current assistant message
-                    messages.splice(i, 0, ...syntheticResponses);
-                    // Track what we fixed
-                    fixedIds.push(...pendingToolCalls.map((tc) => tc.id));
-                    // Adjust index to account for inserted messages
-                    i += syntheticResponses.length;
-                    // Clear pending tool calls
-                    pendingToolCalls.length = 0;
-                }
-                // Now track the new tool calls from this assistant message
-                msg.tool_calls.forEach((toolCall) => {
-                    if (toolCall.id) {
-                        pendingToolCalls.push({
-                            id: toolCall.id,
-                            name: toolCall.function.name,
+            // Handle tool calls
+            if (choice.message?.tool_calls && choice.message.tool_calls.length > 0) {
+                const detectedFormat = this.detectToolFormat();
+                for (const toolCall of choice.message.tool_calls) {
+                    if (toolCall.type === 'function') {
+                        // Process tool parameters with double-escape handling
+                        const processedParameters = processToolParameters(toolCall.function.arguments || '', toolCall.function.name || '', detectedFormat);
+                        blocks.push({
+                            type: 'tool_call',
+                            id: this.normalizeToHistoryToolId(toolCall.id),
+                            name: toolCall.function.name || '',
+                            parameters: processedParameters,
                         });
                     }
-                });
-            }
-            else if (msg.role === 'tool' && pendingToolCalls.length > 0) {
-                // Match tool responses with pending tool calls
-                pendingToolCalls.splice(pendingToolCalls.findIndex((tc) => tc.id === msg.tool_call_id), 1);
-            }
-            else if ((msg.role === 'assistant' || msg.role === 'user') &&
-                pendingToolCalls.length > 0) {
-                // We hit a non-tool message with pending tool calls - need to add synthetic responses
-                const syntheticResponses = pendingToolCalls.map((tc) => ({
-                    role: 'tool',
-                    tool_call_id: tc.id,
-                    content: 'Tool execution cancelled by user',
-                    _synthetic: true,
-                    _cancelled: true,
-                }));
-                // Insert synthetic responses before the current message
-                messages.splice(i, 0, ...syntheticResponses);
-                // Track what we fixed
-                fixedIds.push(...pendingToolCalls.map((tc) => tc.id));
-                // Adjust index to account for inserted messages
-                i += syntheticResponses.length;
-                // Clear pending tool calls
-                pendingToolCalls.length = 0;
-            }
-        }
-        // Handle any remaining pending tool calls at the end
-        if (pendingToolCalls.length > 0) {
-            const syntheticResponses = pendingToolCalls.map((tc) => ({
-                role: 'tool',
-                tool_call_id: tc.id,
-                content: 'Tool execution cancelled by user',
-                _synthetic: true,
-                _cancelled: true,
-            }));
-            // Add to the end of messages
-            messages.push(...syntheticResponses);
-            // Track what we fixed
-            fixedIds.push(...pendingToolCalls.map((tc) => tc.id));
-        }
-        return fixedIds;
-    }
-    /**
-     * OpenAI always requires payment (API key)
-     */
-    isPaidMode() {
-        return true;
-    }
-    clearState() {
-        // Clear the conversation cache to prevent tool call ID mismatches
-        this.conversationCache.clear();
-    }
-    /**
-     * Get the list of server tools supported by this provider
-     */
-    getServerTools() {
-        return [];
-    }
-    /**
-     * Invoke a server tool (native provider tool)
-     */
-    async invokeServerTool(_toolName, _params, _config) {
-        throw new Error('Server tools not supported by OpenAI provider');
-    }
-    /**
-     * Set model parameters to be included in API calls
-     * @param params Parameters to merge with existing, or undefined to clear all
-     */
-    setModelParams(params) {
-        if (params === undefined) {
-            this.modelParams = undefined;
-        }
-        else {
-            this.modelParams = { ...this.modelParams, ...params };
-        }
-        // Persist to SettingsService if available
-        this.setModelParamsInSettings(this.modelParams).catch((error) => {
-            this.logger.debug(() => `Failed to persist model params to SettingsService: ${error}`);
-        });
-    }
-    /**
-     * Get current model parameters
-     * @returns Current parameters or undefined if not set
-     */
-    getModelParams() {
-        return this.modelParams;
-    }
-    /**
-     * Initialize provider configuration from SettingsService
-     */
-    async initializeFromSettings() {
-        try {
-            // Load saved model if available
-            const savedModel = await this.getModelFromSettings();
-            if (savedModel) {
-                this.currentModel = savedModel;
-            }
-            // Load saved base URL if available
-            const savedBaseUrl = await this.getBaseUrlFromSettings();
-            if (savedBaseUrl !== undefined) {
-                this.baseURL = savedBaseUrl;
+                }
             }
-            // Load saved model parameters if available
-            const savedParams = await this.getModelParamsFromSettings();
-            if (savedParams) {
-                this.modelParams = savedParams;
+            // Emit the complete response as a single IContent
+            if (blocks.length > 0) {
+                yield {
+                    speaker: 'ai',
+                    blocks,
+                };
             }
-            this.logger.debug(() => `Initialized from SettingsService - model: ${this.currentModel}, baseURL: ${this.baseURL}, params: ${JSON.stringify(this.modelParams)}`);
-        }
-        catch (error) {
-            this.logger.debug(() => `Failed to initialize OpenAI provider from SettingsService: ${error}`);
         }
     }
     /**
-     * Check if the provider is authenticated using any available method
-     * Uses the base provider's isAuthenticated implementation
-     */
-    async isAuthenticated() {
-        return super.isAuthenticated();
-    }
-    /**
-     * Detect the appropriate tool format for the current model/configuration
-     * @returns The detected tool format
+     * Detects the tool call format based on the model being used
+     * @returns The detected tool format ('openai' or 'qwen')
      */
     detectToolFormat() {
         try {
-            const settingsService = getSettingsService();
-            // First check SettingsService for toolFormat override in provider settings
-            // Note: This is synchronous access to cached settings, not async
-            const currentSettings = settingsService['settings'];
-            const providerSettings = currentSettings?.providers?.[this.name];
-            const toolFormatOverride = providerSettings?.toolFormat;
-            // If explicitly set to a specific format (not 'auto'), use it
-            if (toolFormatOverride && toolFormatOverride !== 'auto') {
-                return toolFormatOverride;
-            }
-            // Auto-detect based on model name if set to 'auto' or not set
-            const modelName = this.currentModel.toLowerCase();
-            // Check for GLM-4.5 models (glm-4.5, glm-4-5)
-            if (modelName.includes('glm-4.5') || modelName.includes('glm-4-5')) {
-                return 'qwen';
+            // Try to get format from SettingsService if available
+            const settings = this.providerConfig?.getEphemeralSettings?.();
+            if (settings && settings['tool-format']) {
+                return settings['tool-format'];
             }
-            // Check for qwen models
-            if (modelName.includes('qwen')) {
-                return 'qwen';
-            }
-            // Default to 'openai' format
-            return 'openai';
         }
         catch (error) {
             this.logger.debug(() => `Failed to detect tool format from SettingsService: ${error}`);
-            // Fallback detection without SettingsService
-            const modelName = this.currentModel.toLowerCase();
-            if (modelName.includes('glm-4.5') || modelName.includes('glm-4-5')) {
-                return 'qwen';
-            }
-            if (modelName.includes('qwen')) {
-                return 'qwen';
-            }
-            return 'openai';
         }
-    }
-    /**
-     * Get appropriate tool_choice value based on detected tool format
-     * @param tools Array of tools (if any)
-     * @returns Appropriate tool_choice value for the current format
-     */
-    getToolChoiceForFormat(tools) {
-        if (!tools || tools.length === 0) {
-            return undefined;
+        // Fallback detection without SettingsService - always look up current model
+        const modelName = (this.getModel() || this.getDefaultModel()).toLowerCase();
+        if (modelName.includes('glm-4.5') || modelName.includes('glm-4-5')) {
+            return 'qwen';
         }
-        // For all formats, use 'auto' (standard behavior)
-        // Future enhancement: different formats may need different tool_choice values
-        return 'auto';
-    }
-    /**
-     * Format tools for API based on detected tool format
-     * @param tools Array of tools to format
-     * @returns Formatted tools for API consumption
-     */
-    formatToolsForAPI(tools) {
-        // For now, always use OpenAI format through OpenRouter
-        // TODO: Investigate if OpenRouter needs special handling for GLM/Qwen
-        // const detectedFormat = this.detectToolFormat();
-        // if (detectedFormat === 'qwen') {
-        //   // Convert OpenAI format to Qwen format: {name, description, parameters} without type/function wrapper
-        //   return tools.map((tool) => ({
-        //     name: tool.function.name,
-        //     description: tool.function.description,
-        //     parameters: tool.function.parameters,
-        //   }));
-        // }
-        // For all formats, use the existing ToolFormatter
-        return this.toolFormatter.toProviderFormat(tools, 'openai');
+        if (modelName.includes('qwen')) {
+            return 'qwen';
+        }
+        return 'openai';
     }
     /**
      * Parse tool response from API (placeholder for future response parsing)
@@ -1522,5 +639,27 @@ export class OpenAIProvider extends BaseProvider {
         // For now, return the response as-is
         return response;
     }
+    /**
+     * Determines whether a response should be retried based on error codes
+     * @param error The error object from the API response
+     * @returns true if the request should be retried, false otherwise
+     */
+    shouldRetryResponse(error) {
+        // Don't retry if we're streaming chunks - just continue processing
+        if (error &&
+            typeof error === 'object' &&
+            'status' in error &&
+            error.status === 200) {
+            return false;
+        }
+        // Retry on 429 rate limit errors or 5xx server errors
+        const shouldRetry = Boolean(error &&
+            typeof error === 'object' &&
+            'status' in error &&
+            (error.status === 429 ||
+                (error.status >= 500 &&
+                    error.status < 600)));
+        return shouldRetry;
+    }
 }
 //# sourceMappingURL=OpenAIProvider.js.map