@vybestack/llxprt-code-core 0.1.23 → 0.2.2-nightly.250908.7b895396

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (153) hide show
  1. package/README.md +21 -17
  2. package/dist/src/adapters/IStreamAdapter.d.ts +3 -3
  3. package/dist/src/auth/oauth-errors.d.ts +173 -0
  4. package/dist/src/auth/oauth-errors.js +461 -0
  5. package/dist/src/auth/oauth-errors.js.map +1 -0
  6. package/dist/src/auth/precedence.d.ts +1 -5
  7. package/dist/src/auth/precedence.js +28 -48
  8. package/dist/src/auth/precedence.js.map +1 -1
  9. package/dist/src/auth/token-store.js +2 -2
  10. package/dist/src/auth/token-store.js.map +1 -1
  11. package/dist/src/auth/types.d.ts +4 -4
  12. package/dist/src/code_assist/codeAssist.js +19 -6
  13. package/dist/src/code_assist/codeAssist.js.map +1 -1
  14. package/dist/src/code_assist/oauth2.d.ts +7 -0
  15. package/dist/src/code_assist/oauth2.js +82 -32
  16. package/dist/src/code_assist/oauth2.js.map +1 -1
  17. package/dist/src/code_assist/server.js +15 -4
  18. package/dist/src/code_assist/server.js.map +1 -1
  19. package/dist/src/code_assist/setup.js +9 -0
  20. package/dist/src/code_assist/setup.js.map +1 -1
  21. package/dist/src/config/index.d.ts +7 -0
  22. package/dist/src/config/index.js +8 -0
  23. package/dist/src/config/index.js.map +1 -0
  24. package/dist/src/core/client.d.ts +15 -20
  25. package/dist/src/core/client.js +98 -124
  26. package/dist/src/core/client.js.map +1 -1
  27. package/dist/src/core/compression-config.d.ts +10 -0
  28. package/dist/src/core/compression-config.js +17 -0
  29. package/dist/src/core/compression-config.js.map +1 -0
  30. package/dist/src/core/coreToolScheduler.js +50 -15
  31. package/dist/src/core/coreToolScheduler.js.map +1 -1
  32. package/dist/src/core/geminiChat.d.ts +68 -9
  33. package/dist/src/core/geminiChat.js +940 -405
  34. package/dist/src/core/geminiChat.js.map +1 -1
  35. package/dist/src/core/nonInteractiveToolExecutor.js +70 -19
  36. package/dist/src/core/nonInteractiveToolExecutor.js.map +1 -1
  37. package/dist/src/core/prompts.js +35 -25
  38. package/dist/src/core/prompts.js.map +1 -1
  39. package/dist/src/core/turn.d.ts +1 -0
  40. package/dist/src/core/turn.js +8 -6
  41. package/dist/src/core/turn.js.map +1 -1
  42. package/dist/src/ide/ide-client.d.ts +1 -1
  43. package/dist/src/ide/ide-client.js +12 -6
  44. package/dist/src/ide/ide-client.js.map +1 -1
  45. package/dist/src/index.d.ts +4 -2
  46. package/dist/src/index.js +5 -2
  47. package/dist/src/index.js.map +1 -1
  48. package/dist/src/prompt-config/TemplateEngine.js +17 -0
  49. package/dist/src/prompt-config/TemplateEngine.js.map +1 -1
  50. package/dist/src/prompt-config/defaults/core-defaults.js +39 -32
  51. package/dist/src/prompt-config/defaults/core-defaults.js.map +1 -1
  52. package/dist/src/prompt-config/defaults/core.md +2 -0
  53. package/dist/src/prompt-config/defaults/provider-defaults.js +34 -27
  54. package/dist/src/prompt-config/defaults/provider-defaults.js.map +1 -1
  55. package/dist/src/prompt-config/defaults/providers/gemini/core.md +270 -0
  56. package/dist/src/prompt-config/defaults/providers/gemini/models/gemini-2.5-flash/core.md +12 -0
  57. package/dist/src/prompt-config/defaults/providers/gemini/models/gemini-2.5-flash/gemini-2-5-flash/core.md +12 -0
  58. package/dist/src/prompt-config/types.d.ts +2 -0
  59. package/dist/src/providers/BaseProvider.d.ts +39 -13
  60. package/dist/src/providers/BaseProvider.js +102 -28
  61. package/dist/src/providers/BaseProvider.js.map +1 -1
  62. package/dist/src/providers/IProvider.d.ts +17 -3
  63. package/dist/src/providers/LoggingProviderWrapper.d.ts +10 -3
  64. package/dist/src/providers/LoggingProviderWrapper.js +33 -27
  65. package/dist/src/providers/LoggingProviderWrapper.js.map +1 -1
  66. package/dist/src/providers/ProviderContentGenerator.d.ts +2 -2
  67. package/dist/src/providers/ProviderContentGenerator.js +9 -6
  68. package/dist/src/providers/ProviderContentGenerator.js.map +1 -1
  69. package/dist/src/providers/ProviderManager.d.ts +4 -0
  70. package/dist/src/providers/ProviderManager.js +6 -0
  71. package/dist/src/providers/ProviderManager.js.map +1 -1
  72. package/dist/src/providers/anthropic/AnthropicProvider.d.ts +34 -21
  73. package/dist/src/providers/anthropic/AnthropicProvider.js +505 -492
  74. package/dist/src/providers/anthropic/AnthropicProvider.js.map +1 -1
  75. package/dist/src/providers/gemini/GeminiProvider.d.ts +23 -9
  76. package/dist/src/providers/gemini/GeminiProvider.js +344 -515
  77. package/dist/src/providers/gemini/GeminiProvider.js.map +1 -1
  78. package/dist/src/providers/openai/ConversationCache.d.ts +3 -3
  79. package/dist/src/providers/openai/IChatGenerateParams.d.ts +9 -4
  80. package/dist/src/providers/openai/OpenAIProvider.d.ts +46 -96
  81. package/dist/src/providers/openai/OpenAIProvider.js +580 -1392
  82. package/dist/src/providers/openai/OpenAIProvider.js.map +1 -1
  83. package/dist/src/providers/openai/buildResponsesRequest.d.ts +3 -3
  84. package/dist/src/providers/openai/buildResponsesRequest.js +67 -37
  85. package/dist/src/providers/openai/buildResponsesRequest.js.map +1 -1
  86. package/dist/src/providers/openai/estimateRemoteTokens.d.ts +2 -2
  87. package/dist/src/providers/openai/estimateRemoteTokens.js +21 -8
  88. package/dist/src/providers/openai/estimateRemoteTokens.js.map +1 -1
  89. package/dist/src/providers/openai/parseResponsesStream.d.ts +6 -2
  90. package/dist/src/providers/openai/parseResponsesStream.js +99 -391
  91. package/dist/src/providers/openai/parseResponsesStream.js.map +1 -1
  92. package/dist/src/providers/openai/syntheticToolResponses.d.ts +5 -5
  93. package/dist/src/providers/openai/syntheticToolResponses.js +102 -91
  94. package/dist/src/providers/openai/syntheticToolResponses.js.map +1 -1
  95. package/dist/src/providers/openai-responses/OpenAIResponsesProvider.d.ts +89 -0
  96. package/dist/src/providers/openai-responses/OpenAIResponsesProvider.js +451 -0
  97. package/dist/src/providers/openai-responses/OpenAIResponsesProvider.js.map +1 -0
  98. package/dist/src/providers/openai-responses/index.d.ts +1 -0
  99. package/dist/src/providers/openai-responses/index.js +2 -0
  100. package/dist/src/providers/openai-responses/index.js.map +1 -0
  101. package/dist/src/providers/tokenizers/OpenAITokenizer.js +3 -3
  102. package/dist/src/providers/tokenizers/OpenAITokenizer.js.map +1 -1
  103. package/dist/src/providers/types.d.ts +1 -1
  104. package/dist/src/services/ClipboardService.d.ts +19 -0
  105. package/dist/src/services/ClipboardService.js +66 -0
  106. package/dist/src/services/ClipboardService.js.map +1 -0
  107. package/dist/src/services/history/ContentConverters.d.ts +43 -0
  108. package/dist/src/services/history/ContentConverters.js +325 -0
  109. package/dist/src/services/history/ContentConverters.js.map +1 -0
  110. package/dist/src/{providers/IMessage.d.ts → services/history/HistoryEvents.d.ts} +16 -22
  111. package/dist/src/{providers/IMessage.js → services/history/HistoryEvents.js} +1 -1
  112. package/dist/src/services/history/HistoryEvents.js.map +1 -0
  113. package/dist/src/services/history/HistoryService.d.ts +220 -0
  114. package/dist/src/services/history/HistoryService.js +673 -0
  115. package/dist/src/services/history/HistoryService.js.map +1 -0
  116. package/dist/src/services/history/IContent.d.ts +183 -0
  117. package/dist/src/services/history/IContent.js +104 -0
  118. package/dist/src/services/history/IContent.js.map +1 -0
  119. package/dist/src/services/index.d.ts +1 -0
  120. package/dist/src/services/index.js +1 -0
  121. package/dist/src/services/index.js.map +1 -1
  122. package/dist/src/settings/SettingsService.js.map +1 -1
  123. package/dist/src/telemetry/types.d.ts +16 -4
  124. package/dist/src/telemetry/types.js.map +1 -1
  125. package/dist/src/tools/IToolFormatter.d.ts +2 -2
  126. package/dist/src/tools/ToolFormatter.d.ts +42 -4
  127. package/dist/src/tools/ToolFormatter.js +151 -64
  128. package/dist/src/tools/ToolFormatter.js.map +1 -1
  129. package/dist/src/tools/doubleEscapeUtils.d.ts +57 -0
  130. package/dist/src/tools/doubleEscapeUtils.js +241 -0
  131. package/dist/src/tools/doubleEscapeUtils.js.map +1 -0
  132. package/dist/src/tools/read-file.d.ts +6 -1
  133. package/dist/src/tools/read-file.js +25 -11
  134. package/dist/src/tools/read-file.js.map +1 -1
  135. package/dist/src/tools/todo-schemas.d.ts +4 -4
  136. package/dist/src/tools/tool-registry.d.ts +8 -1
  137. package/dist/src/tools/tool-registry.js +79 -23
  138. package/dist/src/tools/tool-registry.js.map +1 -1
  139. package/dist/src/tools/tools.js +13 -0
  140. package/dist/src/tools/tools.js.map +1 -1
  141. package/dist/src/tools/write-file.d.ts +6 -1
  142. package/dist/src/tools/write-file.js +48 -26
  143. package/dist/src/tools/write-file.js.map +1 -1
  144. package/dist/src/types/modelParams.d.ts +12 -0
  145. package/dist/src/utils/bfsFileSearch.js +2 -6
  146. package/dist/src/utils/bfsFileSearch.js.map +1 -1
  147. package/dist/src/utils/schemaValidator.js +16 -1
  148. package/dist/src/utils/schemaValidator.js.map +1 -1
  149. package/package.json +8 -7
  150. package/dist/src/providers/IMessage.js.map +0 -1
  151. package/dist/src/providers/adapters/GeminiCompatibleWrapper.d.ts +0 -69
  152. package/dist/src/providers/adapters/GeminiCompatibleWrapper.js +0 -577
  153. package/dist/src/providers/adapters/GeminiCompatibleWrapper.js.map +0 -1
@@ -17,463 +17,158 @@
17
17
  * @plan PLAN-20250120-DEBUGLOGGING.P15
18
18
  * @requirement REQ-INT-001.1
19
19
  */
20
- import { DebugLogger } from '../../debug/index.js';
21
- import { ContentGeneratorRole } from '../ContentGeneratorRole.js';
22
- import { GemmaToolCallParser } from '../../parsers/TextToolCallParser.js';
23
- import { ToolFormatter } from '../../tools/ToolFormatter.js';
24
20
  import OpenAI from 'openai';
25
- import { RESPONSES_API_MODELS } from './RESPONSES_API_MODELS.js';
26
- import { ConversationCache } from './ConversationCache.js';
27
- import { estimateMessagesTokens, estimateRemoteTokens, } from './estimateRemoteTokens.js';
28
- // ConversationContext removed - using inline conversation ID generation
29
- import { parseResponsesStream, parseErrorResponse, } from './parseResponsesStream.js';
30
- import { buildResponsesRequest } from './buildResponsesRequest.js';
21
+ import * as http from 'http';
22
+ import * as https from 'https';
23
+ import * as net from 'net';
31
24
  import { BaseProvider } from '../BaseProvider.js';
32
- import { isQwenEndpoint, generateOAuthEndpointMismatchError, } from '../../config/endpoints.js';
33
- import { getSettingsService } from '../../settings/settingsServiceInstance.js';
25
+ import { DebugLogger } from '../../debug/index.js';
26
+ import { ToolFormatter } from '../../tools/ToolFormatter.js';
27
+ import { processToolParameters } from '../../tools/doubleEscapeUtils.js';
28
+ import { getCoreSystemPromptAsync } from '../../core/prompts.js';
29
+ import { retryWithBackoff } from '../../utils/retry.js';
34
30
  export class OpenAIProvider extends BaseProvider {
31
+ name = 'openai';
35
32
  logger;
36
- openai;
37
- currentModel = process.env.LLXPRT_DEFAULT_MODEL || 'gpt-5';
38
- baseURL;
39
- providerConfig;
40
33
  toolFormatter;
41
- toolFormatOverride;
42
- conversationCache;
43
- modelParams;
44
34
  _cachedClient;
45
35
  _cachedClientKey;
46
36
  constructor(apiKey, baseURL, config, oauthManager) {
37
+ // Normalize empty string to undefined for proper precedence handling
38
+ const normalizedApiKey = apiKey && apiKey.trim() !== '' ? apiKey : undefined;
39
+ // Detect if this is a Qwen endpoint
40
+ const isQwenEndpoint = !!(baseURL &&
41
+ (baseURL.includes('dashscope.aliyuncs.com') ||
42
+ baseURL.includes('api.qwen.com') ||
43
+ baseURL.includes('qwen')));
47
44
  // Initialize base provider with auth configuration
48
- // Check if we should enable OAuth for Qwen
49
- // Check OAuth enablement from OAuth manager if available
50
- let shouldEnableQwenOAuth = false;
51
- if (oauthManager) {
52
- // Check if OAuth is enabled for qwen in the OAuth manager (from settings)
53
- const manager = oauthManager;
54
- if (manager.isOAuthEnabled &&
55
- typeof manager.isOAuthEnabled === 'function') {
56
- shouldEnableQwenOAuth = manager.isOAuthEnabled('qwen');
57
- }
58
- // Also enable if this looks like a Qwen endpoint
59
- if (!shouldEnableQwenOAuth) {
60
- shouldEnableQwenOAuth =
61
- isQwenEndpoint(baseURL || '') ||
62
- (!baseURL && (!apiKey || apiKey === '')) ||
63
- baseURL === 'https://portal.qwen.ai/v1';
64
- }
65
- }
66
- const baseConfig = {
45
+ super({
67
46
  name: 'openai',
68
- apiKey,
47
+ apiKey: normalizedApiKey,
69
48
  baseURL,
70
- cliKey: !apiKey || apiKey === '' ? undefined : apiKey, // Don't set cliKey if no API key to allow OAuth
71
- envKeyNames: ['OPENAI_API_KEY'],
72
- isOAuthEnabled: shouldEnableQwenOAuth,
73
- oauthProvider: shouldEnableQwenOAuth ? 'qwen' : undefined,
49
+ envKeyNames: ['OPENAI_API_KEY'], // Support environment variable fallback
50
+ isOAuthEnabled: isQwenEndpoint && !!oauthManager,
51
+ oauthProvider: isQwenEndpoint ? 'qwen' : undefined,
74
52
  oauthManager,
75
- };
76
- super(baseConfig);
77
- this.logger = new DebugLogger('llxprt:providers:openai');
78
- this.logger.debug(() => `Constructor - baseURL: ${baseURL}, apiKey: ${apiKey?.substring(0, 10) || 'none'}, oauthManager: ${!!oauthManager}, shouldEnableQwenOAuth: ${shouldEnableQwenOAuth}`);
79
- this.baseURL = baseURL;
80
- this.providerConfig = config;
53
+ }, config);
81
54
  this.toolFormatter = new ToolFormatter();
82
- this.conversationCache = new ConversationCache();
83
- // Initialize from SettingsService
84
- this.initializeFromSettings().catch((error) => {
85
- this.logger.debug(() => `Failed to initialize from SettingsService: ${error}`);
86
- });
87
- // Set appropriate default model based on the provider
88
- if (shouldEnableQwenOAuth || isQwenEndpoint(baseURL || '')) {
89
- // Default to Qwen model when using Qwen endpoints
90
- this.currentModel = 'qwen3-coder-plus';
91
- }
92
- else if (process.env.LLXPRT_DEFAULT_MODEL) {
93
- // Use environment variable if set
94
- this.currentModel = process.env.LLXPRT_DEFAULT_MODEL;
95
- }
96
- const clientOptions = {
97
- apiKey: apiKey || 'placeholder', // OpenAI client requires a string, use placeholder if OAuth will be used
98
- // Allow browser environment if explicitly configured
99
- dangerouslyAllowBrowser: config?.allowBrowserEnvironment || false,
100
- };
101
- // Only include baseURL if it's defined
102
- if (baseURL) {
103
- clientOptions.baseURL = baseURL;
104
- }
105
- this.openai = new OpenAI(clientOptions);
106
- this._cachedClientKey = apiKey; // Track the initial key used
107
- // Cached client reserved for future optimization
108
- void this._cachedClient;
109
- }
110
- /**
111
- * Implementation of BaseProvider abstract method
112
- * Determines if this provider supports OAuth authentication
113
- */
114
- supportsOAuth() {
115
- // Only support Qwen OAuth for Qwen endpoints
116
- // Use baseProviderConfig.baseURL if this.baseURL not set yet (during constructor)
117
- const baseURL = this.baseURL ||
118
- this.baseProviderConfig.baseURL ||
119
- 'https://api.openai.com/v1';
120
- return isQwenEndpoint(baseURL);
55
+ // new DebugLogger('llxprt:core:toolformatter'), // TODO: Fix ToolFormatter constructor
56
+ // Setup debug logger
57
+ this.logger = new DebugLogger('llxprt:provider:openai');
121
58
  }
122
59
  /**
123
- * Helper method to determine if we're using Qwen (via OAuth or direct endpoint)
60
+ * Create HTTP/HTTPS agents with socket configuration for local AI servers
61
+ * Returns undefined if no socket settings are configured
124
62
  */
125
- isUsingQwen() {
126
- // Check if we're using qwen format based on tool format detection
127
- const toolFormat = this.detectToolFormat();
128
- return toolFormat === 'qwen';
129
- }
130
- /**
131
- * Update the OpenAI client with resolved authentication if needed
132
- */
133
- async updateClientWithResolvedAuth() {
134
- const resolvedKey = await this.getAuthToken();
135
- if (!resolvedKey) {
136
- // Provide specific error message based on endpoint validation
137
- const endpoint = this.baseURL || 'https://api.openai.com/v1';
138
- if (this.isOAuthEnabled() && !this.supportsOAuth()) {
139
- throw new Error(generateOAuthEndpointMismatchError(endpoint, 'qwen'));
140
- }
141
- throw new Error('No authentication available for OpenAI API calls');
142
- }
143
- // Check if we're using Qwen OAuth and need to update the baseURL
144
- let effectiveBaseURL = this.baseURL;
145
- this.logger.debug(() => `updateClientWithResolvedAuth - OAuth enabled: ${this.isOAuthEnabled()}, OAuth provider: ${this.baseProviderConfig.oauthProvider}, baseURL: ${this.baseURL}, resolvedKey: ${resolvedKey?.substring(0, 10)}...`);
146
- if (this.isOAuthEnabled() &&
147
- this.baseProviderConfig.oauthProvider === 'qwen') {
148
- // Get the OAuth token to check for resource_url
149
- const oauthManager = this.baseProviderConfig.oauthManager;
150
- if (oauthManager?.getOAuthToken) {
151
- const oauthToken = await oauthManager.getOAuthToken('qwen');
152
- this.logger.debug(() => `OAuth token retrieved, resource_url: ${oauthToken?.resource_url}, access_token: ${oauthToken?.access_token?.substring(0, 10)}...`);
153
- if (oauthToken?.resource_url) {
154
- // Use the resource_url from the OAuth token
155
- effectiveBaseURL = `https://${oauthToken.resource_url}/v1`;
156
- this.logger.debug(() => `Using Qwen OAuth endpoint: ${effectiveBaseURL}`);
157
- }
158
- }
159
- }
160
- // Only update client if the key or URL has changed
161
- if (this._cachedClientKey !== resolvedKey ||
162
- this.baseURL !== effectiveBaseURL) {
163
- const clientOptions = {
164
- apiKey: resolvedKey,
165
- // Allow browser environment if explicitly configured
166
- dangerouslyAllowBrowser: this.providerConfig?.allowBrowserEnvironment || false,
167
- };
168
- // Only include baseURL if it's defined
169
- if (effectiveBaseURL) {
170
- clientOptions.baseURL = effectiveBaseURL;
171
- }
172
- this.openai = new OpenAI(clientOptions);
173
- this._cachedClientKey = resolvedKey;
174
- // Update the baseURL to track changes
175
- if (effectiveBaseURL !== this.baseURL) {
176
- this.baseURL = effectiveBaseURL;
177
- }
178
- }
179
- }
180
- requiresTextToolCallParsing() {
181
- if (this.providerConfig?.enableTextToolCallParsing === false) {
182
- return false;
183
- }
184
- // Check if current tool format requires text-based parsing
185
- const currentFormat = this.getToolFormat();
186
- const textBasedFormats = ['hermes', 'xml', 'llama'];
187
- if (textBasedFormats.includes(currentFormat)) {
188
- return true;
189
- }
190
- const configuredModels = this.providerConfig?.textToolCallModels || [];
191
- return configuredModels.includes(this.currentModel);
192
- }
193
- getToolFormat() {
194
- // Check manual override first
195
- if (this.toolFormatOverride) {
196
- return this.toolFormatOverride;
197
- }
198
- // Check for settings override
199
- if (this.providerConfig?.providerToolFormatOverrides?.[this.name]) {
200
- return this.providerConfig.providerToolFormatOverrides[this.name];
201
- }
202
- // Auto-detect tool format based on model or base URL
203
- if (this.currentModel.includes('deepseek') ||
204
- this.baseURL?.includes('deepseek')) {
205
- return 'deepseek';
206
- }
207
- // Check for Qwen - including OAuth authenticated Qwen
208
- if (this.isUsingQwen()) {
209
- return 'qwen';
210
- }
211
- // Default to OpenAI format
212
- return 'openai';
213
- }
214
- shouldUseResponses(model) {
215
- // Check env flag override (highest priority)
216
- if (process.env.OPENAI_RESPONSES_DISABLE === 'true') {
217
- return false;
218
- }
219
- // Check settings override - if explicitly set to false, always respect that
220
- if (this.providerConfig?.openaiResponsesEnabled === false) {
221
- return false;
222
- }
223
- // Never use Responses API for non-OpenAI providers (those with custom base URLs)
224
- const baseURL = this.baseURL || 'https://api.openai.com/v1';
225
- if (baseURL !== 'https://api.openai.com/v1') {
226
- return false;
227
- }
228
- // Default: Check if model starts with any of the responses API model prefixes
229
- return RESPONSES_API_MODELS.some((responsesModel) => model.startsWith(responsesModel));
230
- }
231
- async callResponsesEndpoint(messages, tools, options) {
232
- // Check if API key is available (using resolved authentication)
233
- const apiKey = await this.getAuthToken();
234
- if (!apiKey) {
235
- const endpoint = this.baseURL || 'https://api.openai.com/v1';
236
- if (this.isOAuthEnabled() && !this.supportsOAuth()) {
237
- throw new Error(generateOAuthEndpointMismatchError(endpoint, 'qwen'));
238
- }
239
- throw new Error('OpenAI API key is required to make API calls');
240
- }
241
- // Remove the stateful mode error to allow O3 to work with conversation IDs
242
- // Check context usage and warn if getting close to limit
243
- if (options?.conversationId && options?.parentId) {
244
- const contextInfo = this.estimateContextUsage(options.conversationId, options.parentId, messages);
245
- // Warn if less than 4k tokens remaining
246
- if (contextInfo.tokensRemaining < 4000) {
247
- this.logger.debug(() => `Warning: Only ${contextInfo.tokensRemaining} tokens remaining (${contextInfo.contextUsedPercent.toFixed(1)}% context used). Consider starting a new conversation.`);
248
- }
249
- }
250
- // Check cache for existing conversation
251
- if (options?.conversationId && options?.parentId) {
252
- const cachedMessages = this.conversationCache.get(options.conversationId, options.parentId);
253
- if (cachedMessages) {
254
- // Return cached messages as an async iterable
255
- return (async function* () {
256
- for (const message of cachedMessages) {
257
- yield message;
258
- }
259
- })();
260
- }
63
+ createHttpAgents() {
64
+ // Get socket configuration from ephemeral settings
65
+ const settings = this.providerConfig?.getEphemeralSettings?.() || {};
66
+ // Check if any socket settings are explicitly configured
67
+ const hasSocketSettings = 'socket-timeout' in settings ||
68
+ 'socket-keepalive' in settings ||
69
+ 'socket-nodelay' in settings;
70
+ // Only create custom agents if socket settings are configured
71
+ if (!hasSocketSettings) {
72
+ return undefined;
261
73
  }
262
- // Format tools for Responses API
263
- const formattedTools = tools
264
- ? this.toolFormatter.toResponsesTool(tools)
265
- : undefined;
266
- // Patch messages to include synthetic responses for cancelled tools
267
- const { SyntheticToolResponseHandler } = await import('./syntheticToolResponses.js');
268
- const patchedMessages = SyntheticToolResponseHandler.patchMessageHistory(messages);
269
- // Build the request
270
- const request = buildResponsesRequest({
271
- model: this.currentModel,
272
- messages: patchedMessages,
273
- tools: formattedTools,
274
- stream: options?.stream ?? true,
275
- conversationId: options?.conversationId,
276
- parentId: options?.parentId,
277
- tool_choice: options?.tool_choice,
74
+ // Socket configuration with defaults for when settings ARE configured
75
+ const socketTimeout = settings['socket-timeout'] || 60000; // 60 seconds default
76
+ const socketKeepAlive = settings['socket-keepalive'] !== false; // true by default
77
+ const socketNoDelay = settings['socket-nodelay'] !== false; // true by default
78
+ // Create HTTP agent with socket options
79
+ const httpAgent = new http.Agent({
80
+ keepAlive: socketKeepAlive,
81
+ keepAliveMsecs: 1000,
82
+ timeout: socketTimeout,
278
83
  });
279
- // Make the API call
280
- const baseURL = this.baseURL || 'https://api.openai.com/v1';
281
- const responsesURL = `${baseURL}/responses`;
282
- // Ensure proper UTF-8 encoding for the request body
283
- // This is crucial for handling multibyte characters (e.g., Japanese, Chinese)
284
- const requestBody = JSON.stringify(request);
285
- const bodyBlob = new Blob([requestBody], {
286
- type: 'application/json; charset=utf-8',
84
+ // Create HTTPS agent with socket options
85
+ const httpsAgent = new https.Agent({
86
+ keepAlive: socketKeepAlive,
87
+ keepAliveMsecs: 1000,
88
+ timeout: socketTimeout,
287
89
  });
288
- const response = await fetch(responsesURL, {
289
- method: 'POST',
290
- headers: {
291
- Authorization: `Bearer ${apiKey}`,
292
- 'Content-Type': 'application/json; charset=utf-8',
293
- },
294
- body: bodyBlob,
295
- });
296
- // Handle errors
297
- if (!response.ok) {
298
- const errorBody = await response.text();
299
- // Handle 422 context_length_exceeded error
300
- if (response.status === 422 &&
301
- errorBody.includes('context_length_exceeded')) {
302
- this.logger.debug(() => 'Context length exceeded, invalidating cache and retrying stateless...');
303
- // Invalidate the cache for this conversation
304
- if (options?.conversationId && options?.parentId) {
305
- this.conversationCache.invalidate(options.conversationId, options.parentId);
90
+ // Apply TCP_NODELAY if enabled (reduces latency for local servers)
91
+ if (socketNoDelay) {
92
+ const originalCreateConnection = httpAgent.createConnection;
93
+ httpAgent.createConnection = function (options, callback) {
94
+ const socket = originalCreateConnection.call(this, options, callback);
95
+ if (socket instanceof net.Socket) {
96
+ socket.setNoDelay(true);
306
97
  }
307
- // Retry without conversation context (pure stateless)
308
- const retryRequest = buildResponsesRequest({
309
- model: this.currentModel,
310
- messages,
311
- tools: formattedTools,
312
- stream: options?.stream ?? true,
313
- // Omit conversationId and parentId for stateless retry
314
- tool_choice: options?.tool_choice,
315
- });
316
- // Ensure proper UTF-8 encoding for retry request as well
317
- const retryRequestBody = JSON.stringify(retryRequest);
318
- const retryBodyBlob = new Blob([retryRequestBody], {
319
- type: 'application/json; charset=utf-8',
320
- });
321
- const retryResponse = await fetch(responsesURL, {
322
- method: 'POST',
323
- headers: {
324
- Authorization: `Bearer ${apiKey}`,
325
- 'Content-Type': 'application/json; charset=utf-8',
326
- },
327
- body: retryBodyBlob,
328
- });
329
- if (!retryResponse.ok) {
330
- const retryErrorBody = await retryResponse.text();
331
- throw parseErrorResponse(retryResponse.status, retryErrorBody, this.name);
332
- }
333
- // Use the retry response
334
- return this.handleResponsesApiResponse(retryResponse, messages, undefined, // No conversation context on retry
335
- undefined, options?.stream !== false);
336
- }
337
- throw parseErrorResponse(response.status, errorBody, this.name);
338
- }
339
- // Handle the response
340
- return this.handleResponsesApiResponse(response, messages, options?.conversationId, options?.parentId, options?.stream !== false);
341
- }
342
- async handleResponsesApiResponse(response, messages, conversationId, parentId, isStreaming) {
343
- // Handle streaming response
344
- if (isStreaming && response.body) {
345
- const collectedMessages = [];
346
- const cache = this.conversationCache;
347
- return (async function* () {
348
- for await (const message of parseResponsesStream(response.body)) {
349
- // Collect messages for caching
350
- if (message.content || message.tool_calls) {
351
- collectedMessages.push(message);
352
- }
353
- else if (message.usage && collectedMessages.length === 0) {
354
- // If we only got a usage message with no content, add a placeholder
355
- collectedMessages.push({
356
- role: ContentGeneratorRole.ASSISTANT,
357
- content: '',
358
- });
359
- }
360
- // Update the parentId in the context as soon as we get a message ID
361
- if (message.id) {
362
- // ConversationContext.setParentId(message.id);
363
- // TODO: Handle parent ID updates when ConversationContext is available
364
- }
365
- yield message;
366
- }
367
- // Cache the collected messages with token count
368
- if (conversationId && parentId && collectedMessages.length > 0) {
369
- // Get previous accumulated tokens
370
- const previousTokens = cache.getAccumulatedTokens(conversationId, parentId);
371
- // Calculate tokens for this request (messages + response)
372
- const requestTokens = estimateMessagesTokens(messages);
373
- const responseTokens = estimateMessagesTokens(collectedMessages);
374
- const totalTokensForRequest = requestTokens + responseTokens;
375
- // Update cache with new accumulated total
376
- cache.set(conversationId, parentId, collectedMessages, previousTokens + totalTokensForRequest);
377
- }
378
- })();
379
- }
380
- const data = (await response.json());
381
- const resultMessages = [];
382
- // DEFENSIVE FIX: Handle potential array response from providers that violate OpenAI spec
383
- // Some providers (like Cerebras) may return an array of responses instead of a single response
384
- if (Array.isArray(data)) {
385
- this.logger.error(() => '[Cerebras Corruption] Detected malformed array response from provider, aggregating...', {
386
- provider: this.baseURL,
387
- arrayLength: data.length,
388
- });
389
- const aggregatedContent = [];
390
- let aggregatedToolCalls = [];
391
- let aggregatedUsage = undefined;
392
- for (const item of data) {
393
- if (item.choices?.[0]?.message?.content) {
394
- aggregatedContent.push(item.choices[0].message.content);
395
- }
396
- if (item.choices?.[0]?.message?.tool_calls) {
397
- aggregatedToolCalls = item.choices[0].message.tool_calls;
398
- }
399
- if (item.usage) {
400
- aggregatedUsage = item.usage;
401
- }
402
- }
403
- const message = {
404
- role: ContentGeneratorRole.ASSISTANT,
405
- content: aggregatedContent.join(''),
98
+ return socket;
406
99
  };
407
- if (aggregatedToolCalls.length > 0) {
408
- message.tool_calls = aggregatedToolCalls;
409
- }
410
- if (aggregatedUsage) {
411
- message.usage = {
412
- prompt_tokens: aggregatedUsage.prompt_tokens || 0,
413
- completion_tokens: aggregatedUsage.completion_tokens || 0,
414
- total_tokens: aggregatedUsage.total_tokens || 0,
415
- };
416
- }
417
- resultMessages.push(message);
418
- // Convert to async iterator for consistent return type
419
- return (async function* () {
420
- for (const msg of resultMessages) {
421
- yield msg;
100
+ const originalHttpsCreateConnection = httpsAgent.createConnection;
101
+ httpsAgent.createConnection = function (options, callback) {
102
+ const socket = originalHttpsCreateConnection.call(this, options, callback);
103
+ if (socket instanceof net.Socket) {
104
+ socket.setNoDelay(true);
422
105
  }
423
- })();
424
- }
425
- if (data.choices && data.choices.length > 0) {
426
- const choice = data.choices[0];
427
- const message = {
428
- role: choice.message.role,
429
- content: choice.message.content || '',
106
+ return socket;
430
107
  };
431
- if (choice.message.tool_calls) {
432
- message.tool_calls = choice.message.tool_calls;
433
- }
434
- if (data.usage) {
435
- message.usage = {
436
- prompt_tokens: data.usage.prompt_tokens || 0,
437
- completion_tokens: data.usage.completion_tokens || 0,
438
- total_tokens: data.usage.total_tokens || 0,
439
- };
440
- }
441
- resultMessages.push(message);
442
108
  }
443
- // Cache the result with token count
444
- if (conversationId && parentId && resultMessages.length > 0) {
445
- // Get previous accumulated tokens
446
- const previousTokens = this.conversationCache.getAccumulatedTokens(conversationId, parentId);
447
- // Calculate tokens for this request
448
- const requestTokens = estimateMessagesTokens(messages);
449
- const responseTokens = estimateMessagesTokens(resultMessages);
450
- const totalTokensForRequest = requestTokens + responseTokens;
451
- // Update cache with new accumulated total
452
- this.conversationCache.set(conversationId, parentId, resultMessages, previousTokens + totalTokensForRequest);
109
+ return { httpAgent, httpsAgent };
110
+ }
111
+ /**
112
+ * Get or create OpenAI client instance
113
+ * Will use the API key from resolved auth
114
+ * @returns OpenAI client instance
115
+ */
116
+ async getClient() {
117
+ const resolvedKey = await this.getAuthToken();
118
+ // Use the unified getBaseURL() method from BaseProvider
119
+ const baseURL = this.getBaseURL();
120
+ const clientKey = `${baseURL}-${resolvedKey}`;
121
+ // Return cached client if available and auth hasn't changed
122
+ if (this._cachedClient && this._cachedClientKey === clientKey) {
123
+ return this._cachedClient;
124
+ }
125
+ // Create HTTP agents with socket configuration (if configured)
126
+ const agents = this.createHttpAgents();
127
+ // Build client options - OpenAI SDK accepts httpAgent/httpsAgent at runtime
128
+ // even though they're not in the TypeScript definitions
129
+ const baseOptions = {
130
+ apiKey: resolvedKey || '',
131
+ baseURL,
132
+ };
133
+ // Add socket configuration if available
134
+ const clientOptions = agents
135
+ ? {
136
+ ...baseOptions,
137
+ httpAgent: agents.httpAgent,
138
+ httpsAgent: agents.httpsAgent,
139
+ }
140
+ : baseOptions;
141
+ // Create new client with current auth and optional socket configuration
142
+ // Cast to unknown then to the expected type to bypass TypeScript's structural checking
143
+ this._cachedClient = new OpenAI(clientOptions);
144
+ this._cachedClientKey = clientKey;
145
+ return this._cachedClient;
146
+ }
147
+ /**
148
+ * Check if OAuth is supported for this provider
149
+ * Qwen endpoints support OAuth, standard OpenAI does not
150
+ */
151
+ supportsOAuth() {
152
+ const baseURL = this.getBaseURL();
153
+ // Check if this is a Qwen endpoint that supports OAuth
154
+ if (baseURL &&
155
+ (baseURL.includes('dashscope.aliyuncs.com') ||
156
+ baseURL.includes('api.qwen.com') ||
157
+ baseURL.includes('qwen'))) {
158
+ return true;
453
159
  }
454
- return (async function* () {
455
- for (const message of resultMessages) {
456
- yield message;
457
- }
458
- })();
160
+ // Standard OpenAI endpoints don't support OAuth
161
+ return false;
459
162
  }
460
163
  async getModels() {
461
- // Check if API key is available (using resolved authentication)
462
- const apiKey = await this.getAuthToken();
463
- if (!apiKey) {
464
- const endpoint = this.baseURL || 'https://api.openai.com/v1';
465
- if (this.isOAuthEnabled() && !this.supportsOAuth()) {
466
- throw new Error(generateOAuthEndpointMismatchError(endpoint, 'qwen'));
467
- }
468
- throw new Error('OpenAI API key is required to fetch models');
469
- }
470
164
  try {
471
- // Get resolved authentication and update client if needed
472
- await this.updateClientWithResolvedAuth();
473
- const response = await this.openai.models.list();
165
+ // Always try to fetch models, regardless of auth status
166
+ // Local endpoints often work without authentication
167
+ const client = await this.getClient();
168
+ const response = await client.models.list();
474
169
  const models = [];
475
170
  for await (const model of response) {
476
- // Filter out non-chat models (embeddings, audio, image, moderation, DALL·E, etc.)
171
+ // Filter out non-chat models (embeddings, audio, image, vision, DALL·E, etc.)
477
172
  if (!/embedding|whisper|audio|tts|image|vision|dall[- ]?e|moderation/i.test(model.id)) {
478
173
  models.push({
479
174
  id: model.id,
@@ -488,1029 +183,500 @@ export class OpenAIProvider extends BaseProvider {
488
183
  catch (error) {
489
184
  this.logger.debug(() => `Error fetching models from OpenAI: ${error}`);
490
185
  // Return a hardcoded list as fallback
491
- // Check if this is a Qwen endpoint
492
- if (isQwenEndpoint(this.baseURL || '')) {
493
- return [
494
- {
495
- id: 'qwen3-coder-plus',
496
- name: 'qwen3-coder-plus',
497
- provider: 'openai',
498
- supportedToolFormats: ['openai'],
499
- },
500
- ];
501
- }
502
- // Default OpenAI models
503
- return [
504
- {
505
- id: 'gpt-4o',
506
- name: 'gpt-4o',
507
- provider: 'openai',
508
- supportedToolFormats: ['openai'],
509
- },
510
- {
511
- id: 'gpt-4o-mini',
512
- name: 'gpt-4o-mini',
513
- provider: 'openai',
514
- supportedToolFormats: ['openai'],
515
- },
516
- {
517
- id: 'gpt-4-turbo',
518
- name: 'gpt-4-turbo',
519
- provider: 'openai',
520
- supportedToolFormats: ['openai'],
521
- },
522
- {
523
- id: 'gpt-3.5-turbo',
524
- name: 'gpt-3.5-turbo',
525
- provider: 'openai',
526
- supportedToolFormats: ['openai'],
527
- },
528
- ];
186
+ return this.getFallbackModels();
529
187
  }
530
188
  }
531
- async *generateChatCompletion(messages, tools, _toolFormat) {
532
- // Check if API key is available (using resolved authentication)
533
- const apiKey = await this.getAuthToken();
534
- if (!apiKey) {
535
- const endpoint = this.baseURL || 'https://api.openai.com/v1';
536
- if (this.isOAuthEnabled() && !this.supportsOAuth()) {
537
- throw new Error(generateOAuthEndpointMismatchError(endpoint, 'qwen'));
538
- }
539
- throw new Error('OpenAI API key is required to generate completions');
189
+ getFallbackModels() {
190
+ return [
191
+ {
192
+ id: 'gpt-5',
193
+ name: 'GPT-5',
194
+ provider: 'openai',
195
+ supportedToolFormats: ['openai'],
196
+ },
197
+ {
198
+ id: 'gpt-4.1',
199
+ name: 'GPT-4.1',
200
+ provider: 'openai',
201
+ supportedToolFormats: ['openai'],
202
+ },
203
+ {
204
+ id: 'gpt-4o',
205
+ name: 'GPT-4o',
206
+ provider: 'openai',
207
+ supportedToolFormats: ['openai'],
208
+ },
209
+ {
210
+ id: 'o3',
211
+ name: 'O3',
212
+ provider: 'openai',
213
+ supportedToolFormats: ['openai'],
214
+ },
215
+ {
216
+ id: 'o4-mini',
217
+ name: 'O4 Mini',
218
+ provider: 'openai',
219
+ supportedToolFormats: ['openai'],
220
+ },
221
+ {
222
+ id: 'gpt-3.5-turbo',
223
+ name: 'GPT-3.5 Turbo (Legacy)',
224
+ provider: 'openai',
225
+ supportedToolFormats: ['openai'],
226
+ },
227
+ ];
228
+ }
229
+ getDefaultModel() {
230
+ // Return hardcoded default - do NOT call getModel() to avoid circular dependency
231
+ return process.env.LLXPRT_DEFAULT_MODEL || 'gpt-5';
232
+ }
233
+ getServerTools() {
234
+ // TODO: Implement server tools for OpenAI provider
235
+ return [];
236
+ }
237
+ async invokeServerTool(toolName, _params, _config) {
238
+ // TODO: Implement server tool invocation for OpenAI provider
239
+ throw new Error(`Server tool '${toolName}' not supported by OpenAI provider`);
240
+ }
241
+ /**
242
+ * Normalize tool IDs from various formats to OpenAI format
243
+ * Handles IDs from OpenAI (call_xxx), Anthropic (toolu_xxx), and history (hist_tool_xxx)
244
+ */
245
+ normalizeToOpenAIToolId(id) {
246
+ // If already in OpenAI format, return as-is
247
+ if (id.startsWith('call_')) {
248
+ return id;
540
249
  }
541
- // Check if we should use responses endpoint
542
- if (this.shouldUseResponses(this.currentModel)) {
543
- // Generate conversation IDs inline (would normally come from application context)
544
- const conversationId = undefined;
545
- const parentId = undefined;
546
- yield* await this.callResponsesEndpoint(messages, tools, {
547
- stream: true,
548
- tool_choice: tools && tools.length > 0 ? 'auto' : undefined,
549
- stateful: false, // Always stateless for Phase 22-01
550
- conversationId,
551
- parentId,
552
- });
553
- return;
250
+ // For history format, extract the UUID and add OpenAI prefix
251
+ if (id.startsWith('hist_tool_')) {
252
+ const uuid = id.substring('hist_tool_'.length);
253
+ return 'call_' + uuid;
554
254
  }
555
- // Fix messages in place like AnthropicProvider does - this ensures synthetic responses persist
556
- // This is critical for preventing 400 errors on subsequent calls with Qwen/Cerebras
557
- const missingToolIds = this.identifyAndFixMissingToolResponses(messages);
558
- if (missingToolIds.length > 0) {
559
- this.logger.debug(() => `[Synthetic] Added ${missingToolIds.length} synthetic responses to conversation history: ${JSON.stringify(missingToolIds)}`);
560
- // Log the actual tool calls and their IDs for debugging
561
- const assistantMessagesWithTools = messages.filter((m) => m.role === 'assistant' && m.tool_calls && m.tool_calls.length > 0);
562
- const lastAssistantWithTools = assistantMessagesWithTools[assistantMessagesWithTools.length - 1];
563
- if (lastAssistantWithTools?.tool_calls) {
564
- this.logger.debug(() => `[Synthetic] Last assistant tool calls: ${JSON.stringify(lastAssistantWithTools.tool_calls?.map((tc) => ({ id: tc.id, name: tc.function.name })) ?? [])}`);
565
- }
255
+ // For Anthropic format, extract the UUID and add OpenAI prefix
256
+ if (id.startsWith('toolu_')) {
257
+ const uuid = id.substring('toolu_'.length);
258
+ return 'call_' + uuid;
259
+ }
260
+ // Unknown format - assume it's a raw UUID
261
+ return 'call_' + id;
262
+ }
263
+ /**
264
+ * Normalize tool IDs from OpenAI format to history format
265
+ */
266
+ normalizeToHistoryToolId(id) {
267
+ // If already in history format, return as-is
268
+ if (id.startsWith('hist_tool_')) {
269
+ return id;
566
270
  }
567
- // Now messages array has been modified in place with synthetic responses
568
- const patchedMessages = messages;
569
- // Validate tool messages have required tool_call_id
570
- const toolMessages = patchedMessages.filter((msg) => msg.role === 'tool');
571
- const missingIds = toolMessages.filter((msg) => !msg.tool_call_id);
572
- if (missingIds.length > 0) {
573
- this.logger.error(() => `FATAL: Tool messages missing tool_call_id: ${JSON.stringify(missingIds)}`);
574
- throw new Error(`OpenAI API requires tool_call_id for all tool messages. Found ${missingIds.length} tool message(s) without IDs.`);
271
+ // For OpenAI format, extract the UUID and add history prefix
272
+ if (id.startsWith('call_')) {
273
+ const uuid = id.substring('call_'.length);
274
+ return 'hist_tool_' + uuid;
575
275
  }
576
- // Log synthetic responses for debugging
577
- const syntheticMessages = patchedMessages.filter((msg) => msg._synthetic);
578
- if (syntheticMessages.length > 0) {
579
- this.logger.debug(() => `[Synthetic] Added ${syntheticMessages.length} synthetic tool responses`);
276
+ // For Anthropic format, extract the UUID and add history prefix
277
+ if (id.startsWith('toolu_')) {
278
+ const uuid = id.substring('toolu_'.length);
279
+ return 'hist_tool_' + uuid;
580
280
  }
581
- const parser = this.requiresTextToolCallParsing()
582
- ? new GemmaToolCallParser()
583
- : null;
584
- // Get current tool format (with override support)
585
- const currentToolFormat = this.getToolFormat();
586
- // Format tools using formatToolsForAPI method
587
- const formattedTools = tools ? this.formatToolsForAPI(tools) : undefined;
588
- // Get stream_options from ephemeral settings (not model params)
589
- const streamOptions = this.providerConfig?.getEphemeralSettings?.()?.['stream-options'];
590
- // Default stream_options to { include_usage: true } unless explicitly set
591
- const finalStreamOptions = streamOptions !== undefined ? streamOptions : { include_usage: true };
592
- // Get streaming setting from ephemeral settings (default: enabled)
593
- const streamingSetting = this.providerConfig?.getEphemeralSettings?.()?.['streaming'];
594
- let streamingEnabled = streamingSetting !== 'disabled';
595
- // Get resolved authentication and update client if needed
596
- await this.updateClientWithResolvedAuth();
597
- // Strip internal tracking fields that some APIs don't accept
598
- // We keep the synthetic responses but remove the metadata fields
599
- const cleanedMessages = patchedMessages.map((msg) => {
600
- // Create a shallow copy and remove internal fields
601
- const { _synthetic, _cancelled, ...cleanMsg } = msg;
602
- // Log synthetic tool responses for debugging
603
- if (msg._synthetic) {
604
- this.logger.debug(() => `[Synthetic Tool Response] ${JSON.stringify(cleanMsg)}`);
605
- }
606
- return cleanMsg;
607
- });
608
- this.logger.debug(() => `About to make API call with model: ${this.currentModel}, baseURL: ${this.openai.baseURL}, apiKey: ${this.openai.apiKey?.substring(0, 10)}..., streaming: ${streamingEnabled}`);
609
- // Debug: Log message roles being sent
610
- this.logger.debug(() => `Messages being sent to OpenAI (${cleanedMessages.length} total): ${cleanedMessages
611
- .map((m) => `${m.role}${m.role === 'system' ? ` (length: ${m.content?.length})` : ''}`)
612
- .join(', ')}`);
613
- let response;
614
- try {
615
- // Build request params with exact order from original
616
- response = await this.openai.chat.completions.create({
617
- model: this.currentModel,
618
- messages: cleanedMessages,
619
- stream: streamingEnabled,
620
- ...(streamingEnabled && finalStreamOptions !== null
621
- ? { stream_options: finalStreamOptions }
622
- : {}),
623
- tools: formattedTools,
624
- tool_choice: this.getToolChoiceForFormat(tools),
625
- ...this.modelParams,
281
+ // Unknown format - assume it's a raw UUID
282
+ return 'hist_tool_' + id;
283
+ }
284
+ /**
285
+ * Generate chat completion with IContent interface
286
+ * Internally converts to OpenAI API format, but only yields IContent
287
+ * @param contents Array of content blocks (text and tool_call)
288
+ * @param tools Array of available tools
289
+ */
290
+ async *generateChatCompletion(contents, tools) {
291
+ // Debug log what we receive
292
+ if (this.logger.enabled) {
293
+ this.logger.debug(() => `[OpenAIProvider] generateChatCompletion received tools:`, {
294
+ hasTools: !!tools,
295
+ toolsLength: tools?.length,
296
+ toolsType: typeof tools,
297
+ isArray: Array.isArray(tools),
298
+ firstToolName: tools?.[0]?.functionDeclarations?.[0]?.name,
299
+ toolsStructure: tools ? 'available' : 'undefined',
626
300
  });
627
301
  }
628
- catch (error) {
629
- // Debug the error
630
- this.logger.error(() => `[Cancellation 400] Error caught in API call: ${error}`);
631
- this.logger.error(() => `[Cancellation 400] Error type: ${error?.constructor?.name}`);
632
- this.logger.error(() => `[Cancellation 400] Error status: ${error?.status || error?.response?.status}`);
633
- this.logger.error(() => `[Cancellation 400] Error response data: ${JSON.stringify(error?.response?.data, null, 2)}`);
634
- // Log the last few messages to understand what's being sent
635
- if (error?.status === 400 ||
636
- error?.response?.status === 400) {
637
- this.logger.error(() => `[Cancellation 400] Last 5 messages being sent:`);
638
- const lastMessages = cleanedMessages.slice(-5);
639
- lastMessages.forEach((msg, idx) => {
640
- this.logger.error(() => ` [${cleanedMessages.length - 5 + idx}] ${msg.role}${msg.tool_call_id ? ` (tool response for ${msg.tool_call_id})` : ''}${msg.tool_calls ? ` (${msg.tool_calls.length} tool calls)` : ''}`);
641
- if (msg.tool_calls) {
642
- msg.tool_calls.forEach((tc) => {
643
- this.logger.error(() => ` - Tool call: ${tc.id} -> ${tc.function.name}`);
644
- });
645
- }
646
- });
647
- }
648
- // Check for JSONResponse mutation errors
649
- const errorMessage = error instanceof Error ? error.message : String(error);
650
- if (errorMessage?.includes('JSONResponse') &&
651
- errorMessage?.includes('does not support item assignment')) {
652
- this.logger.debug(() => '[Cancellation 400] Detected JSONResponse mutation error, retrying without streaming');
653
- this.logger.error(() => '[Cerebras Corruption] JSONResponse mutation error detected. This typically occurs with certain providers like Cerebras. Falling back to non-streaming mode.', {
654
- errorMessage,
655
- provider: this.baseURL,
656
- streamingEnabled,
657
- });
658
- // Retry with streaming disabled
659
- response = await this.openai.chat.completions.create({
660
- model: this.currentModel,
661
- messages: cleanedMessages,
662
- stream: false, // Force non-streaming
663
- tools: formattedTools,
664
- tool_choice: this.getToolChoiceForFormat(tools),
665
- ...this.modelParams,
666
- });
667
- // Override streamingEnabled for the rest of this function
668
- streamingEnabled = false;
669
- }
670
- else {
671
- this.logger.debug(() => '[Cancellation 400] Re-throwing error (not a JSONResponse mutation)');
672
- // Re-throw other errors
673
- throw error;
674
- }
302
+ // Pass tools directly in Gemini format - they'll be converted in generateChatCompletionImpl
303
+ const generator = this.generateChatCompletionImpl(contents, tools, undefined, undefined, undefined);
304
+ for await (const item of generator) {
305
+ yield item;
675
306
  }
676
- let fullContent = '';
677
- const accumulatedToolCalls = [];
678
- let hasStreamedContent = false;
679
- let usageData;
680
- // For Qwen streaming, buffer whitespace-only chunks to preserve spacing across chunk boundaries
681
- let pendingWhitespace = null;
682
- // Handle streaming vs non-streaming response
683
- if (streamingEnabled) {
684
- // We need to buffer all chunks to detect and handle malformed streams
685
- // Some providers (like Cerebras) send message format instead of delta
686
- const allChunks = [];
687
- this.logger.debug(() => '[Stream Detection] Starting to buffer chunks for corruption detection', {
688
- provider: this.baseURL,
689
- streamingEnabled,
690
- });
691
- try {
692
- for await (const chunk of response) {
693
- // CRITICAL: Create a deep copy to avoid JSONResponse mutation issues
694
- // Cerebras and other providers may return immutable JSONResponse objects
695
- // Cast to unknown first to bypass type checking, then to our extended type
696
- const extendedChunk = chunk;
697
- const safeChunk = {
698
- choices: extendedChunk.choices?.map((choice) => ({
699
- delta: choice.delta
700
- ? {
701
- content: choice.delta.content ?? undefined,
702
- role: choice.delta.role,
703
- tool_calls: choice.delta.tool_calls?.map((tc, idx) => ({
704
- id: tc.id,
705
- type: tc.type,
706
- function: tc.function
707
- ? {
708
- name: tc.function.name,
709
- arguments: tc.function.arguments,
710
- }
711
- : undefined,
712
- index: tc.index !== undefined ? tc.index : idx,
713
- })),
714
- }
715
- : undefined,
716
- message: choice.message
717
- ? {
718
- content: choice.message.content ?? undefined,
719
- role: choice.message.role,
720
- tool_calls: choice.message.tool_calls?.map((tc) => ({
721
- id: tc.id,
722
- type: tc.type,
723
- function: tc.function
724
- ? {
725
- name: tc.function.name,
726
- arguments: tc.function.arguments,
727
- }
728
- : undefined,
729
- })),
730
- }
731
- : undefined,
732
- index: choice.index,
733
- finish_reason: choice.finish_reason,
734
- })),
735
- usage: extendedChunk.usage
736
- ? {
737
- prompt_tokens: extendedChunk.usage.prompt_tokens,
738
- completion_tokens: extendedChunk.usage.completion_tokens,
739
- total_tokens: extendedChunk.usage.total_tokens,
740
- }
741
- : undefined,
742
- };
743
- allChunks.push(safeChunk);
744
- }
745
- }
746
- catch (error) {
747
- // Handle JSONResponse mutation errors that occur during iteration
748
- const errorMessage = error instanceof Error ? error.message : String(error);
749
- if (errorMessage?.includes('JSONResponse') &&
750
- errorMessage?.includes('does not support item assignment')) {
751
- this.logger.error(() => '[Cerebras Corruption] JSONResponse mutation error during stream iteration. This is a known issue with Cerebras. The OpenAI client library is trying to mutate immutable response objects. Falling back to non-streaming mode.', {
752
- error: errorMessage,
753
- provider: this.baseURL,
754
- chunksCollected: allChunks.length,
755
- });
756
- // Retry the entire request with streaming disabled
757
- // This is the nuclear option but ensures we get a response
758
- const nonStreamingResponse = await this.openai.chat.completions.create({
759
- model: this.currentModel,
760
- messages: cleanedMessages,
761
- stream: false, // Force non-streaming
762
- tools: formattedTools,
763
- tool_choice: this.getToolChoiceForFormat(tools),
764
- ...this.modelParams,
307
+ }
308
+ /**
309
+ * Convert IContent array to OpenAI ChatCompletionMessageParam array
310
+ */
311
+ convertToOpenAIMessages(contents) {
312
+ const messages = [];
313
+ for (const content of contents) {
314
+ if (content.speaker === 'human') {
315
+ // Convert human messages to user messages
316
+ const textBlocks = content.blocks.filter((b) => b.type === 'text');
317
+ const text = textBlocks.map((b) => b.text).join('\n');
318
+ if (text) {
319
+ messages.push({
320
+ role: 'user',
321
+ content: text,
765
322
  });
766
- // Handle as non-streaming response
767
- const completionResponse = nonStreamingResponse;
768
- const choice = completionResponse.choices[0];
769
- if (choice?.message.content) {
770
- fullContent = choice.message.content;
771
- }
772
- if (choice?.message.tool_calls) {
773
- for (const toolCall of choice.message.tool_calls) {
774
- if (toolCall.type === 'function' && toolCall.function) {
775
- accumulatedToolCalls.push({
776
- id: toolCall.id,
777
- type: 'function',
778
- function: toolCall.function,
779
- });
780
- }
781
- }
782
- }
783
- if (completionResponse.usage) {
784
- usageData = {
785
- prompt_tokens: completionResponse.usage.prompt_tokens,
786
- completion_tokens: completionResponse.usage.completion_tokens,
787
- total_tokens: completionResponse.usage.total_tokens,
788
- };
789
- }
790
- // Yield the complete response
791
- yield {
792
- role: ContentGeneratorRole.ASSISTANT,
793
- content: fullContent || '',
794
- tool_calls: accumulatedToolCalls.length > 0
795
- ? accumulatedToolCalls
796
- : undefined,
797
- usage: usageData,
798
- };
799
- return;
800
323
  }
801
- // Re-throw other errors
802
- throw error;
803
324
  }
804
- // Check first chunk to see if we have malformed stream
805
- let detectedMalformedStream = false;
806
- if (allChunks.length > 0) {
807
- const firstChunk = allChunks[0];
808
- if (firstChunk.choices?.[0]?.message &&
809
- !firstChunk.choices?.[0]?.delta) {
810
- detectedMalformedStream = true;
811
- this.logger.debug(() => 'Detected malformed stream (message instead of delta), using aggregation mode');
812
- }
813
- }
814
- // If we detected issues, aggregate everything
815
- if (detectedMalformedStream) {
816
- const contentParts = [];
817
- let aggregatedToolCalls = [];
818
- let finalUsageData = undefined;
819
- // Process all buffered chunks
820
- for (const chunk of allChunks) {
821
- const message = chunk.choices?.[0]?.message || chunk.choices?.[0]?.delta;
822
- if (message?.content) {
823
- contentParts.push(message.content);
824
- }
825
- if (message?.tool_calls) {
826
- // Ensure tool_calls match the expected format
827
- aggregatedToolCalls = message.tool_calls.map((tc) => ({
828
- id: tc.id || `call_${Date.now()}`,
829
- type: (tc.type || 'function'),
325
+ else if (content.speaker === 'ai') {
326
+ // Convert AI messages
327
+ const textBlocks = content.blocks.filter((b) => b.type === 'text');
328
+ const toolCalls = content.blocks.filter((b) => b.type === 'tool_call');
329
+ if (toolCalls.length > 0) {
330
+ // Assistant message with tool calls
331
+ const text = textBlocks.map((b) => b.text).join('\n');
332
+ messages.push({
333
+ role: 'assistant',
334
+ content: text || null,
335
+ tool_calls: toolCalls.map((tc) => ({
336
+ id: this.normalizeToOpenAIToolId(tc.id),
337
+ type: 'function',
830
338
  function: {
831
- name: tc.function?.name || '',
832
- arguments: tc.function?.arguments || '',
339
+ name: tc.name,
340
+ arguments: typeof tc.parameters === 'string'
341
+ ? tc.parameters
342
+ : JSON.stringify(tc.parameters),
833
343
  },
834
- }));
835
- }
836
- if (chunk.usage) {
837
- finalUsageData = {
838
- prompt_tokens: chunk.usage.prompt_tokens || 0,
839
- completion_tokens: chunk.usage.completion_tokens || 0,
840
- total_tokens: chunk.usage.total_tokens || 0,
841
- };
842
- }
344
+ })),
345
+ });
346
+ }
347
+ else if (textBlocks.length > 0) {
348
+ // Plain assistant message
349
+ const text = textBlocks.map((b) => b.text).join('\n');
350
+ messages.push({
351
+ role: 'assistant',
352
+ content: text,
353
+ });
843
354
  }
844
- // Yield single reconstructed message
845
- yield {
846
- role: ContentGeneratorRole.ASSISTANT,
847
- content: contentParts.join(''),
848
- tool_calls: aggregatedToolCalls.length > 0 ? aggregatedToolCalls : undefined,
849
- usage: finalUsageData,
850
- };
851
- return;
852
355
  }
853
- // Process chunks normally - stream them as they come
854
- for (const chunk of allChunks) {
855
- // Since we created safe copies during buffering, chunks are now mutable
856
- // Check if this chunk has message format instead of delta (malformed stream)
857
- let processedChunk = chunk;
858
- if (chunk.choices?.[0]?.message && !chunk.choices?.[0]?.delta) {
859
- this.logger.error(() => '[Cerebras Corruption] Converting malformed chunk from message to delta format', {
860
- provider: this.baseURL,
861
- hasMessage: true,
862
- hasDelta: false,
863
- messageContent: chunk.choices[0].message?.content?.substring(0, 100),
356
+ else if (content.speaker === 'tool') {
357
+ // Convert tool responses
358
+ const toolResponses = content.blocks.filter((b) => b.type === 'tool_response');
359
+ for (const tr of toolResponses) {
360
+ messages.push({
361
+ role: 'tool',
362
+ content: typeof tr.result === 'string'
363
+ ? tr.result
364
+ : JSON.stringify(tr.result),
365
+ tool_call_id: this.normalizeToOpenAIToolId(tr.callId),
864
366
  });
865
- // Convert message format to delta format for consistent processing
866
- const message = chunk.choices[0].message;
867
- processedChunk = {
868
- choices: [
869
- {
870
- delta: {
871
- content: message?.content ?? undefined,
872
- role: message?.role,
873
- tool_calls: message?.tool_calls,
874
- },
875
- },
876
- ],
877
- usage: chunk.usage,
878
- };
879
367
  }
880
- const delta = processedChunk.choices?.[0]?.delta;
881
- if (delta?.content) {
882
- // Enhanced debug logging to understand streaming behavior
883
- if (this.isUsingQwen()) {
884
- this.logger.debug(() => `Chunk: ${JSON.stringify({
885
- content: delta.content,
886
- contentLength: delta.content?.length ?? 0,
887
- isWhitespaceOnly: delta.content?.trim() === '',
888
- chunkIndex: 0,
889
- })}`);
368
+ }
369
+ }
370
+ return messages;
371
+ }
372
+ /**
373
+ * Internal implementation for chat completion
374
+ */
375
+ async *generateChatCompletionImpl(contents, tools, maxTokens, abortSignal, modelName) {
376
+ // Always look up model from SettingsService
377
+ const model = modelName || this.getModel() || this.getDefaultModel();
378
+ // Convert IContent to OpenAI messages format
379
+ const messages = this.convertToOpenAIMessages(contents);
380
+ // Convert Gemini format tools directly to OpenAI format using the new method
381
+ const formattedTools = this.toolFormatter.convertGeminiToOpenAI(tools);
382
+ // Debug log the conversion result
383
+ if (this.logger.enabled) {
384
+ this.logger.debug(() => `[OpenAIProvider] Tool conversion summary:`, {
385
+ inputHadTools: !!tools,
386
+ inputToolsLength: tools?.length,
387
+ outputHasTools: !!formattedTools,
388
+ outputToolsLength: formattedTools?.length,
389
+ outputToolNames: formattedTools?.map((t) => t.function.name),
390
+ });
391
+ }
392
+ // Get streaming setting from ephemeral settings (default: enabled)
393
+ const streamingSetting = this.providerConfig?.getEphemeralSettings?.()?.['streaming'];
394
+ const streamingEnabled = streamingSetting !== 'disabled';
395
+ // Get the system prompt
396
+ const userMemory = this.globalConfig?.getUserMemory
397
+ ? this.globalConfig.getUserMemory()
398
+ : '';
399
+ const systemPrompt = await getCoreSystemPromptAsync(userMemory, model, undefined);
400
+ // Add system prompt as the first message in the array
401
+ const messagesWithSystem = [
402
+ { role: 'system', content: systemPrompt },
403
+ ...messages,
404
+ ];
405
+ // Build request - only include tools if they exist and are not empty
406
+ const requestBody = {
407
+ model,
408
+ messages: messagesWithSystem,
409
+ ...(formattedTools && formattedTools.length > 0
410
+ ? {
411
+ tools: formattedTools,
412
+ // Add tool_choice for Qwen/Cerebras to ensure proper tool calling
413
+ tool_choice: 'auto',
414
+ }
415
+ : {}),
416
+ max_tokens: maxTokens,
417
+ stream: streamingEnabled,
418
+ };
419
+ // Debug log request summary for Cerebras/Qwen
420
+ if (this.logger.enabled &&
421
+ (model.toLowerCase().includes('qwen') ||
422
+ this.getBaseURL()?.includes('cerebras'))) {
423
+ this.logger.debug(() => `Request to ${this.getBaseURL()} for model ${model}:`, {
424
+ baseURL: this.getBaseURL(),
425
+ model,
426
+ streamingEnabled,
427
+ hasTools: 'tools' in requestBody,
428
+ toolCount: formattedTools?.length || 0,
429
+ messageCount: messages.length,
430
+ toolsInRequest: 'tools' in requestBody ? requestBody.tools?.length : 'not included',
431
+ });
432
+ }
433
+ // Get OpenAI client
434
+ const client = await this.getClient();
435
+ // Get retry settings from ephemeral settings
436
+ const ephemeralSettings = this.providerConfig?.getEphemeralSettings?.() || {};
437
+ const maxRetries = ephemeralSettings['retries'] ?? 6; // Default for OpenAI
438
+ const initialDelayMs = ephemeralSettings['retrywait'] ?? 4000; // Default for OpenAI
439
+ // Wrap the API call with retry logic using centralized retry utility
440
+ const response = await retryWithBackoff(() => client.chat.completions.create(requestBody, { signal: abortSignal }), {
441
+ maxAttempts: maxRetries,
442
+ initialDelayMs,
443
+ maxDelayMs: 30000, // 30 seconds
444
+ shouldRetry: this.shouldRetryResponse.bind(this),
445
+ });
446
+ // Check if response is streaming or not
447
+ if (streamingEnabled) {
448
+ // Process streaming response
449
+ let _accumulatedText = '';
450
+ const accumulatedToolCalls = [];
451
+ // Buffer for accumulating text chunks for providers that need it
452
+ let textBuffer = '';
453
+ const detectedFormat = this.detectToolFormat();
454
+ // Buffer text for Qwen format providers to avoid stanza formatting
455
+ const shouldBufferText = detectedFormat === 'qwen';
456
+ try {
457
+ // Handle streaming response
458
+ for await (const chunk of response) {
459
+ if (abortSignal?.aborted) {
460
+ break;
890
461
  }
891
- // For text-based models, don't yield content chunks yet
892
- if (!parser && delta.content) {
893
- if (this.isUsingQwen()) {
894
- const isWhitespaceOnly = delta.content.trim() === '';
895
- if (isWhitespaceOnly) {
896
- // Buffer whitespace-only chunk
897
- pendingWhitespace = (pendingWhitespace || '') + delta.content;
898
- this.logger.debug(() => `Buffered whitespace-only chunk (len=${delta.content?.length ?? 0}). pendingWhitespace now len=${pendingWhitespace?.length ?? 0}`);
899
- continue;
900
- }
901
- else if (pendingWhitespace) {
902
- // Flush buffered whitespace before non-empty chunk to preserve spacing
903
- this.logger.debug(() => `Flushing pending whitespace (len=${pendingWhitespace?.length ?? 0}) before non-empty chunk`);
462
+ const choice = chunk.choices?.[0];
463
+ if (!choice)
464
+ continue;
465
+ // Check for finish_reason to detect proper stream ending
466
+ if (choice.finish_reason) {
467
+ this.logger.debug(() => `[Streaming] Stream finished with reason: ${choice.finish_reason}`, {
468
+ model,
469
+ finishReason: choice.finish_reason,
470
+ hasAccumulatedText: _accumulatedText.length > 0,
471
+ hasAccumulatedTools: accumulatedToolCalls.length > 0,
472
+ hasBufferedText: textBuffer.length > 0,
473
+ });
474
+ // If finish_reason is 'length', the response was cut off
475
+ if (choice.finish_reason === 'length') {
476
+ this.logger.debug(() => `Response truncated due to length limit for model ${model}`);
477
+ }
478
+ // Flush any buffered text when stream finishes
479
+ if (textBuffer.length > 0) {
480
+ yield {
481
+ speaker: 'ai',
482
+ blocks: [
483
+ {
484
+ type: 'text',
485
+ text: textBuffer,
486
+ },
487
+ ],
488
+ };
489
+ textBuffer = '';
490
+ }
491
+ }
492
+ // Handle text content - buffer for Qwen format, emit immediately for others
493
+ const deltaContent = choice.delta?.content;
494
+ if (deltaContent) {
495
+ _accumulatedText += deltaContent;
496
+ // Debug log for providers that need buffering
497
+ if (shouldBufferText) {
498
+ this.logger.debug(() => `[Streaming] Chunk content for ${detectedFormat} format:`, {
499
+ deltaContent,
500
+ length: deltaContent.length,
501
+ hasNewline: deltaContent.includes('\n'),
502
+ escaped: JSON.stringify(deltaContent),
503
+ bufferSize: textBuffer.length,
504
+ });
505
+ // Buffer text to avoid stanza formatting
506
+ textBuffer += deltaContent;
507
+ // Emit buffered text when we have a complete sentence or paragraph
508
+ // Look for natural break points
509
+ if (textBuffer.includes('\n') ||
510
+ textBuffer.endsWith('. ') ||
511
+ textBuffer.endsWith('! ') ||
512
+ textBuffer.endsWith('? ') ||
513
+ textBuffer.length > 100) {
904
514
  yield {
905
- role: ContentGeneratorRole.ASSISTANT,
906
- content: pendingWhitespace,
515
+ speaker: 'ai',
516
+ blocks: [
517
+ {
518
+ type: 'text',
519
+ text: textBuffer,
520
+ },
521
+ ],
907
522
  };
908
- hasStreamedContent = true;
909
- fullContent += pendingWhitespace;
910
- pendingWhitespace = null;
523
+ textBuffer = '';
911
524
  }
912
525
  }
913
- yield {
914
- role: ContentGeneratorRole.ASSISTANT,
915
- content: delta.content,
916
- };
917
- hasStreamedContent = true;
526
+ else {
527
+ // For other providers, emit text immediately as before
528
+ yield {
529
+ speaker: 'ai',
530
+ blocks: [
531
+ {
532
+ type: 'text',
533
+ text: deltaContent,
534
+ },
535
+ ],
536
+ };
537
+ }
918
538
  }
919
- fullContent += delta.content;
920
- }
921
- if (delta?.tool_calls) {
922
- for (const toolCall of delta.tool_calls) {
923
- this.toolFormatter.accumulateStreamingToolCall(toolCall, accumulatedToolCalls, currentToolFormat);
539
+ // Handle tool calls
540
+ const deltaToolCalls = choice.delta?.tool_calls;
541
+ if (deltaToolCalls && deltaToolCalls.length > 0) {
542
+ for (const deltaToolCall of deltaToolCalls) {
543
+ if (deltaToolCall.index === undefined)
544
+ continue;
545
+ // Initialize or update accumulated tool call
546
+ if (!accumulatedToolCalls[deltaToolCall.index]) {
547
+ accumulatedToolCalls[deltaToolCall.index] = {
548
+ id: deltaToolCall.id || '',
549
+ type: 'function',
550
+ function: {
551
+ name: deltaToolCall.function?.name || '',
552
+ arguments: '',
553
+ },
554
+ };
555
+ }
556
+ const tc = accumulatedToolCalls[deltaToolCall.index];
557
+ if (tc) {
558
+ if (deltaToolCall.id)
559
+ tc.id = deltaToolCall.id;
560
+ if (deltaToolCall.function?.name)
561
+ tc.function.name = deltaToolCall.function.name;
562
+ if (deltaToolCall.function?.arguments) {
563
+ tc.function.arguments += deltaToolCall.function.arguments;
564
+ }
565
+ }
566
+ }
924
567
  }
925
568
  }
926
- // Check for usage data in the chunk
927
- if (processedChunk.usage) {
928
- usageData = {
929
- prompt_tokens: processedChunk.usage.prompt_tokens || 0,
930
- completion_tokens: processedChunk.usage.completion_tokens || 0,
931
- total_tokens: processedChunk.usage.total_tokens || 0,
932
- };
933
- }
934
569
  }
935
- }
936
- else {
937
- // Non-streaming response - handle as a single completion
938
- const completionResponse = response;
939
- const choice = completionResponse.choices[0];
940
- if (choice?.message.content) {
941
- fullContent = choice.message.content;
942
- }
943
- if (choice?.message.tool_calls) {
944
- // Convert tool calls to the standard format
945
- for (const toolCall of choice.message.tool_calls) {
946
- if (toolCall.type === 'function' && toolCall.function) {
947
- // Don't fix double stringification here - it's handled later in the final processing
948
- accumulatedToolCalls.push({
949
- id: toolCall.id,
950
- type: 'function',
951
- function: toolCall.function,
952
- });
953
- }
570
+ catch (error) {
571
+ if (abortSignal?.aborted) {
572
+ throw error;
573
+ }
574
+ else {
575
+ this.logger.error('Error processing streaming response:', error);
576
+ throw error;
954
577
  }
955
578
  }
956
- if (completionResponse.usage) {
957
- usageData = {
958
- prompt_tokens: completionResponse.usage.prompt_tokens,
959
- completion_tokens: completionResponse.usage.completion_tokens,
960
- total_tokens: completionResponse.usage.total_tokens,
961
- };
962
- }
963
- // For non-streaming, we yield the full content at once if there's no parser
964
- if (!parser && fullContent) {
965
- yield {
966
- role: ContentGeneratorRole.ASSISTANT,
967
- content: fullContent,
968
- };
969
- hasStreamedContent = true;
970
- }
971
- }
972
- // Flush any remaining pending whitespace for Qwen
973
- if (pendingWhitespace && this.isUsingQwen() && !parser) {
974
- this.logger.debug(() => `Flushing trailing pending whitespace (len=${pendingWhitespace?.length ?? 0}) at stream end`);
975
- yield {
976
- role: ContentGeneratorRole.ASSISTANT,
977
- content: pendingWhitespace,
978
- };
979
- hasStreamedContent = true;
980
- fullContent += pendingWhitespace;
981
- pendingWhitespace = null;
982
- }
983
- // After stream ends, parse text-based tool calls if needed
984
- if (parser && fullContent) {
985
- const { cleanedContent, toolCalls } = parser.parse(fullContent);
986
- if (toolCalls.length > 0) {
987
- // Convert to standard format
988
- const standardToolCalls = toolCalls.map((tc, index) => ({
989
- id: `call_${Date.now()}_${index}`,
990
- type: 'function',
991
- function: {
992
- name: tc.name,
993
- arguments: JSON.stringify(tc.arguments),
994
- },
995
- }));
996
- yield {
997
- role: ContentGeneratorRole.ASSISTANT,
998
- content: cleanedContent,
999
- tool_calls: standardToolCalls,
1000
- usage: usageData,
1001
- };
1002
- }
1003
- else {
1004
- // No tool calls found, yield cleaned content
579
+ // Flush any remaining buffered text
580
+ if (textBuffer.length > 0) {
1005
581
  yield {
1006
- role: ContentGeneratorRole.ASSISTANT,
1007
- content: cleanedContent,
1008
- usage: usageData,
582
+ speaker: 'ai',
583
+ blocks: [
584
+ {
585
+ type: 'text',
586
+ text: textBuffer,
587
+ },
588
+ ],
1009
589
  };
590
+ textBuffer = '';
1010
591
  }
1011
- }
1012
- else {
1013
- // Standard OpenAI tool call handling
592
+ // Emit accumulated tool calls as IContent if any
1014
593
  if (accumulatedToolCalls.length > 0) {
1015
- // Fix double stringification for Qwen tool calls
1016
- // Qwen models pre-stringify arguments values, but later in the process
1017
- // they are being JSON.stringify'd again
1018
- let fixedToolCalls = accumulatedToolCalls;
1019
- if (this.isUsingQwen()) {
1020
- this.logger.debug(() => `[Qwen Fix] Processing ${accumulatedToolCalls.length} tool calls for double-stringification fix`);
1021
- fixedToolCalls = accumulatedToolCalls.map((toolCall, index) => {
1022
- this.logger.debug(() => `[Qwen Fix] Tool call ${index}: ${JSON.stringify({
1023
- name: toolCall.function.name,
1024
- argumentsType: typeof toolCall.function.arguments,
1025
- argumentsLength: toolCall.function.arguments?.length,
1026
- argumentsSample: toolCall.function.arguments?.substring(0, 100),
1027
- })}`);
1028
- // For Qwen, check for nested double-stringification
1029
- // Qwen models stringify array/object values WITHIN the JSON arguments
1030
- if (toolCall.function.arguments &&
1031
- typeof toolCall.function.arguments === 'string') {
1032
- try {
1033
- // First, parse the arguments to get the JSON object
1034
- const parsedArgs = JSON.parse(toolCall.function.arguments);
1035
- let hasNestedStringification = false;
1036
- // Check each property to see if it's a stringified array/object
1037
- const fixedArgs = {};
1038
- for (const [key, value] of Object.entries(parsedArgs)) {
1039
- if (typeof value === 'string') {
1040
- const trimmed = value.trim();
1041
- // Check if it looks like a stringified array or object
1042
- // Also check for Python-style dictionaries with single quotes
1043
- if ((trimmed.startsWith('[') && trimmed.endsWith(']')) ||
1044
- (trimmed.startsWith('{') && trimmed.endsWith('}'))) {
1045
- try {
1046
- // Try to parse it as JSON
1047
- const nestedParsed = JSON.parse(value);
1048
- fixedArgs[key] = nestedParsed;
1049
- hasNestedStringification = true;
1050
- this.logger.debug(() => `[Qwen Fix] Fixed nested stringification in property '${key}' for ${toolCall.function.name}`);
1051
- }
1052
- catch {
1053
- // Try to convert Python-style to JSON (single quotes to double quotes)
1054
- try {
1055
- const jsonified = value
1056
- .replace(/'/g, '"')
1057
- .replace(/: True/g, ': true')
1058
- .replace(/: False/g, ': false')
1059
- .replace(/: None/g, ': null');
1060
- const nestedParsed = JSON.parse(jsonified);
1061
- fixedArgs[key] = nestedParsed;
1062
- hasNestedStringification = true;
1063
- this.logger.debug(() => `[Qwen Fix] Fixed Python-style nested stringification in property '${key}' for ${toolCall.function.name}`);
1064
- }
1065
- catch {
1066
- // Not valid JSON even after conversion, keep as string
1067
- fixedArgs[key] = value;
1068
- }
1069
- }
1070
- }
1071
- else {
1072
- fixedArgs[key] = value;
1073
- }
1074
- }
1075
- else {
1076
- fixedArgs[key] = value;
1077
- }
1078
- }
1079
- if (hasNestedStringification) {
1080
- this.logger.debug(() => `[Qwen Fix] Fixed nested double-stringification for ${toolCall.function.name}`);
1081
- return {
1082
- ...toolCall,
1083
- function: {
1084
- ...toolCall.function,
1085
- arguments: JSON.stringify(fixedArgs),
1086
- },
1087
- };
1088
- }
1089
- }
1090
- catch (_e) {
1091
- // If parsing fails, check for old-style double-stringification
1092
- if (toolCall.function.arguments.startsWith('"') &&
1093
- toolCall.function.arguments.endsWith('"')) {
1094
- try {
1095
- // Old fix: entire arguments were double-stringified
1096
- const parsedArgs = JSON.parse(toolCall.function.arguments);
1097
- this.logger.debug(() => `[Qwen Fix] Fixed whole-argument double-stringification for ${toolCall.function.name}`);
1098
- return {
1099
- ...toolCall,
1100
- function: {
1101
- ...toolCall.function,
1102
- arguments: JSON.stringify(parsedArgs),
1103
- },
1104
- };
1105
- }
1106
- catch {
1107
- // Leave as-is if we can't parse
1108
- }
1109
- }
1110
- }
1111
- }
1112
- // No fix needed
1113
- this.logger.debug(() => `[Qwen Fix] No double-stringification detected for ${toolCall.function.name}, keeping original`);
1114
- return toolCall;
1115
- });
1116
- }
1117
- if (this.isUsingQwen()) {
1118
- this.logger.debug(() => `Final message with tool calls: ${JSON.stringify({
1119
- contentLength: fullContent.length,
1120
- content: fullContent.substring(0, 200) +
1121
- (fullContent.length > 200 ? '...' : ''),
1122
- toolCallCount: accumulatedToolCalls.length,
1123
- hasStreamedContent,
1124
- })}`);
1125
- }
1126
- // For Qwen models, don't duplicate content if we've already streamed it
1127
- // BUT Cerebras needs at least a space to continue after tool responses
1128
- const isCerebras = this.baseURL?.toLowerCase().includes('cerebras.ai');
1129
- if (isCerebras) {
1130
- this.logger.debug(() => '[Cerebras] Special handling for Cerebras provider after tool responses', {
1131
- hasStreamedContent,
1132
- willSendSpace: hasStreamedContent,
594
+ const blocks = [];
595
+ const detectedFormat = this.detectToolFormat();
596
+ for (const tc of accumulatedToolCalls) {
597
+ if (!tc)
598
+ continue;
599
+ // Process tool parameters with double-escape handling
600
+ const processedParameters = processToolParameters(tc.function.arguments || '', tc.function.name || '', detectedFormat);
601
+ blocks.push({
602
+ type: 'tool_call',
603
+ id: this.normalizeToHistoryToolId(tc.id),
604
+ name: tc.function.name || '',
605
+ parameters: processedParameters,
1133
606
  });
1134
607
  }
1135
- const shouldOmitContent = hasStreamedContent && this.isUsingQwen() && !isCerebras;
1136
- if (shouldOmitContent) {
1137
- // Only yield tool calls with empty content to avoid duplication
1138
- yield {
1139
- role: ContentGeneratorRole.ASSISTANT,
1140
- content: '',
1141
- tool_calls: fixedToolCalls,
1142
- usage: usageData,
1143
- };
1144
- }
1145
- else if (isCerebras && hasStreamedContent) {
1146
- // Cerebras: Send just a space to prevent duplication but allow continuation
1147
- // This prevents the repeated "Let me search..." text
1148
- this.logger.debug(() => '[Cerebras] Sending minimal space content to prevent duplication');
1149
- yield {
1150
- role: ContentGeneratorRole.ASSISTANT,
1151
- content: ' ', // Single space instead of full content
1152
- tool_calls: fixedToolCalls,
1153
- usage: usageData,
1154
- };
1155
- }
1156
- else {
1157
- // Include full content with tool calls
608
+ if (blocks.length > 0) {
1158
609
  yield {
1159
- role: ContentGeneratorRole.ASSISTANT,
1160
- content: fullContent || '',
1161
- tool_calls: fixedToolCalls,
1162
- usage: usageData,
610
+ speaker: 'ai',
611
+ blocks,
1163
612
  };
1164
613
  }
1165
614
  }
1166
- else if (usageData) {
1167
- // Always emit usage data so downstream consumers can update stats
1168
- yield {
1169
- role: ContentGeneratorRole.ASSISTANT,
1170
- content: '',
1171
- usage: usageData,
1172
- };
1173
- }
1174
615
  }
1175
- }
1176
- setModel(modelId) {
1177
- // Update SettingsService as the source of truth
1178
- this.setModelInSettings(modelId).catch((error) => {
1179
- this.logger.debug(() => `Failed to persist model to SettingsService: ${error}`);
1180
- });
1181
- // Keep local cache for performance
1182
- this.currentModel = modelId;
1183
- }
1184
- getCurrentModel() {
1185
- // Try to get from SettingsService first (source of truth)
1186
- try {
1187
- const settingsService = getSettingsService();
1188
- const providerSettings = settingsService.getProviderSettings(this.name);
1189
- if (providerSettings.model) {
1190
- return providerSettings.model;
616
+ else {
617
+ // Handle non-streaming response
618
+ const completion = response;
619
+ const choice = completion.choices?.[0];
620
+ if (!choice) {
621
+ throw new Error('No choices in completion response');
622
+ }
623
+ const blocks = [];
624
+ // Handle text content
625
+ if (choice.message?.content) {
626
+ blocks.push({
627
+ type: 'text',
628
+ text: choice.message.content,
629
+ });
1191
630
  }
1192
- }
1193
- catch (error) {
1194
- this.logger.debug(() => `Failed to get model from SettingsService: ${error}`);
1195
- }
1196
- // Fall back to cached value or default
1197
- return this.currentModel || this.getDefaultModel();
1198
- }
1199
- getDefaultModel() {
1200
- // Return the default model for this provider
1201
- // This can be overridden based on configuration or endpoint
1202
- if (this.isUsingQwen()) {
1203
- return 'qwen3-coder-plus';
1204
- }
1205
- return process.env.LLXPRT_DEFAULT_MODEL || 'gpt-5';
1206
- }
1207
- setApiKey(apiKey) {
1208
- // Call base provider implementation
1209
- super.setApiKey?.(apiKey);
1210
- // Persist to SettingsService if available
1211
- this.setApiKeyInSettings(apiKey).catch((error) => {
1212
- this.logger.debug(() => `Failed to persist API key to SettingsService: ${error}`);
1213
- });
1214
- // Create a new OpenAI client with the updated API key
1215
- const clientOptions = {
1216
- apiKey,
1217
- dangerouslyAllowBrowser: this.providerConfig?.allowBrowserEnvironment || false,
1218
- };
1219
- // Only include baseURL if it's defined
1220
- if (this.baseURL) {
1221
- clientOptions.baseURL = this.baseURL;
1222
- }
1223
- this.openai = new OpenAI(clientOptions);
1224
- this._cachedClientKey = apiKey; // Update cached key
1225
- }
1226
- setBaseUrl(baseUrl) {
1227
- // If no baseUrl is provided, clear to default (undefined)
1228
- this.baseURL = baseUrl && baseUrl.trim() !== '' ? baseUrl : undefined;
1229
- // Persist to SettingsService if available
1230
- this.setBaseUrlInSettings(this.baseURL).catch((error) => {
1231
- this.logger.debug(() => `Failed to persist base URL to SettingsService: ${error}`);
1232
- });
1233
- // Update OAuth configuration based on endpoint validation
1234
- // Enable OAuth for Qwen endpoints if we have an OAuth manager
1235
- const shouldEnableQwenOAuth = !!this.baseProviderConfig.oauthManager &&
1236
- (isQwenEndpoint(this.baseURL || '') ||
1237
- this.baseURL === 'https://portal.qwen.ai/v1');
1238
- this.updateOAuthConfig(shouldEnableQwenOAuth, shouldEnableQwenOAuth ? 'qwen' : undefined, this.baseProviderConfig.oauthManager);
1239
- // Call base provider implementation
1240
- super.setBaseUrl?.(baseUrl);
1241
- // Create a new OpenAI client with the updated (or cleared) base URL
1242
- const clientOptions = {
1243
- // Use existing key or empty string as placeholder
1244
- apiKey: this._cachedClientKey || 'placeholder',
1245
- dangerouslyAllowBrowser: this.providerConfig?.allowBrowserEnvironment || false,
1246
- };
1247
- // Only include baseURL if it's defined
1248
- if (this.baseURL) {
1249
- clientOptions.baseURL = this.baseURL;
1250
- }
1251
- this.openai = new OpenAI(clientOptions);
1252
- // Clear cached key to force re-resolution on next API call
1253
- this._cachedClientKey = undefined;
1254
- }
1255
- setConfig(config) {
1256
- this.providerConfig = config;
1257
- }
1258
- setToolFormatOverride(format) {
1259
- this.toolFormatOverride = format || undefined;
1260
- }
1261
- /**
1262
- * Estimates the remote context usage for the current conversation
1263
- * @param conversationId The conversation ID
1264
- * @param parentId The parent message ID
1265
- * @param promptMessages The messages being sent in the current prompt
1266
- * @returns Context usage information including remote tokens
1267
- */
1268
- estimateContextUsage(conversationId, parentId, promptMessages) {
1269
- const promptTokens = estimateMessagesTokens(promptMessages);
1270
- return estimateRemoteTokens(this.currentModel, this.conversationCache, conversationId, parentId, promptTokens);
1271
- }
1272
- /**
1273
- * Get the conversation cache instance
1274
- * @returns The conversation cache
1275
- */
1276
- getConversationCache() {
1277
- return this.conversationCache;
1278
- }
1279
- /**
1280
- * Identifies and fixes missing tool responses by adding synthetic responses in place.
1281
- * Similar to AnthropicProvider's validateAndFixMessages approach.
1282
- * This ensures synthetic responses persist in the conversation history.
1283
- * @param messages The message array to fix in place
1284
- * @returns Array of tool call IDs that were fixed
1285
- */
1286
- identifyAndFixMissingToolResponses(messages) {
1287
- const fixedIds = [];
1288
- const pendingToolCalls = [];
1289
- // Process messages in order, tracking tool calls and responses
1290
- for (let i = 0; i < messages.length; i++) {
1291
- const msg = messages[i];
1292
- if (msg.role === 'assistant' && msg.tool_calls) {
1293
- // If we have pending tool calls from a previous assistant message,
1294
- // add synthetic responses for them before processing this new assistant message
1295
- if (pendingToolCalls.length > 0) {
1296
- const syntheticResponses = pendingToolCalls.map((tc) => ({
1297
- role: 'tool',
1298
- tool_call_id: tc.id,
1299
- content: 'Tool execution cancelled by user',
1300
- _synthetic: true,
1301
- _cancelled: true,
1302
- }));
1303
- // Insert synthetic responses before the current assistant message
1304
- messages.splice(i, 0, ...syntheticResponses);
1305
- // Track what we fixed
1306
- fixedIds.push(...pendingToolCalls.map((tc) => tc.id));
1307
- // Adjust index to account for inserted messages
1308
- i += syntheticResponses.length;
1309
- // Clear pending tool calls
1310
- pendingToolCalls.length = 0;
1311
- }
1312
- // Now track the new tool calls from this assistant message
1313
- msg.tool_calls.forEach((toolCall) => {
1314
- if (toolCall.id) {
1315
- pendingToolCalls.push({
1316
- id: toolCall.id,
1317
- name: toolCall.function.name,
631
+ // Handle tool calls
632
+ if (choice.message?.tool_calls && choice.message.tool_calls.length > 0) {
633
+ const detectedFormat = this.detectToolFormat();
634
+ for (const toolCall of choice.message.tool_calls) {
635
+ if (toolCall.type === 'function') {
636
+ // Process tool parameters with double-escape handling
637
+ const processedParameters = processToolParameters(toolCall.function.arguments || '', toolCall.function.name || '', detectedFormat);
638
+ blocks.push({
639
+ type: 'tool_call',
640
+ id: this.normalizeToHistoryToolId(toolCall.id),
641
+ name: toolCall.function.name || '',
642
+ parameters: processedParameters,
1318
643
  });
1319
644
  }
1320
- });
1321
- }
1322
- else if (msg.role === 'tool' && pendingToolCalls.length > 0) {
1323
- // Match tool responses with pending tool calls
1324
- pendingToolCalls.splice(pendingToolCalls.findIndex((tc) => tc.id === msg.tool_call_id), 1);
1325
- }
1326
- else if ((msg.role === 'assistant' || msg.role === 'user') &&
1327
- pendingToolCalls.length > 0) {
1328
- // We hit a non-tool message with pending tool calls - need to add synthetic responses
1329
- const syntheticResponses = pendingToolCalls.map((tc) => ({
1330
- role: 'tool',
1331
- tool_call_id: tc.id,
1332
- content: 'Tool execution cancelled by user',
1333
- _synthetic: true,
1334
- _cancelled: true,
1335
- }));
1336
- // Insert synthetic responses before the current message
1337
- messages.splice(i, 0, ...syntheticResponses);
1338
- // Track what we fixed
1339
- fixedIds.push(...pendingToolCalls.map((tc) => tc.id));
1340
- // Adjust index to account for inserted messages
1341
- i += syntheticResponses.length;
1342
- // Clear pending tool calls
1343
- pendingToolCalls.length = 0;
1344
- }
1345
- }
1346
- // Handle any remaining pending tool calls at the end
1347
- if (pendingToolCalls.length > 0) {
1348
- const syntheticResponses = pendingToolCalls.map((tc) => ({
1349
- role: 'tool',
1350
- tool_call_id: tc.id,
1351
- content: 'Tool execution cancelled by user',
1352
- _synthetic: true,
1353
- _cancelled: true,
1354
- }));
1355
- // Add to the end of messages
1356
- messages.push(...syntheticResponses);
1357
- // Track what we fixed
1358
- fixedIds.push(...pendingToolCalls.map((tc) => tc.id));
1359
- }
1360
- return fixedIds;
1361
- }
1362
- /**
1363
- * OpenAI always requires payment (API key)
1364
- */
1365
- isPaidMode() {
1366
- return true;
1367
- }
1368
- clearState() {
1369
- // Clear the conversation cache to prevent tool call ID mismatches
1370
- this.conversationCache.clear();
1371
- }
1372
- /**
1373
- * Get the list of server tools supported by this provider
1374
- */
1375
- getServerTools() {
1376
- return [];
1377
- }
1378
- /**
1379
- * Invoke a server tool (native provider tool)
1380
- */
1381
- async invokeServerTool(_toolName, _params, _config) {
1382
- throw new Error('Server tools not supported by OpenAI provider');
1383
- }
1384
- /**
1385
- * Set model parameters to be included in API calls
1386
- * @param params Parameters to merge with existing, or undefined to clear all
1387
- */
1388
- setModelParams(params) {
1389
- if (params === undefined) {
1390
- this.modelParams = undefined;
1391
- }
1392
- else {
1393
- this.modelParams = { ...this.modelParams, ...params };
1394
- }
1395
- // Persist to SettingsService if available
1396
- this.setModelParamsInSettings(this.modelParams).catch((error) => {
1397
- this.logger.debug(() => `Failed to persist model params to SettingsService: ${error}`);
1398
- });
1399
- }
1400
- /**
1401
- * Get current model parameters
1402
- * @returns Current parameters or undefined if not set
1403
- */
1404
- getModelParams() {
1405
- return this.modelParams;
1406
- }
1407
- /**
1408
- * Initialize provider configuration from SettingsService
1409
- */
1410
- async initializeFromSettings() {
1411
- try {
1412
- // Load saved model if available
1413
- const savedModel = await this.getModelFromSettings();
1414
- if (savedModel) {
1415
- this.currentModel = savedModel;
1416
- }
1417
- // Load saved base URL if available
1418
- const savedBaseUrl = await this.getBaseUrlFromSettings();
1419
- if (savedBaseUrl !== undefined) {
1420
- this.baseURL = savedBaseUrl;
645
+ }
1421
646
  }
1422
- // Load saved model parameters if available
1423
- const savedParams = await this.getModelParamsFromSettings();
1424
- if (savedParams) {
1425
- this.modelParams = savedParams;
647
+ // Emit the complete response as a single IContent
648
+ if (blocks.length > 0) {
649
+ yield {
650
+ speaker: 'ai',
651
+ blocks,
652
+ };
1426
653
  }
1427
- this.logger.debug(() => `Initialized from SettingsService - model: ${this.currentModel}, baseURL: ${this.baseURL}, params: ${JSON.stringify(this.modelParams)}`);
1428
- }
1429
- catch (error) {
1430
- this.logger.debug(() => `Failed to initialize OpenAI provider from SettingsService: ${error}`);
1431
654
  }
1432
655
  }
1433
656
  /**
1434
- * Check if the provider is authenticated using any available method
1435
- * Uses the base provider's isAuthenticated implementation
1436
- */
1437
- async isAuthenticated() {
1438
- return super.isAuthenticated();
1439
- }
1440
- /**
1441
- * Detect the appropriate tool format for the current model/configuration
1442
- * @returns The detected tool format
657
+ * Detects the tool call format based on the model being used
658
+ * @returns The detected tool format ('openai' or 'qwen')
1443
659
  */
1444
660
  detectToolFormat() {
1445
661
  try {
1446
- const settingsService = getSettingsService();
1447
- // First check SettingsService for toolFormat override in provider settings
1448
- // Note: This is synchronous access to cached settings, not async
1449
- const currentSettings = settingsService['settings'];
1450
- const providerSettings = currentSettings?.providers?.[this.name];
1451
- const toolFormatOverride = providerSettings?.toolFormat;
1452
- // If explicitly set to a specific format (not 'auto'), use it
1453
- if (toolFormatOverride && toolFormatOverride !== 'auto') {
1454
- return toolFormatOverride;
1455
- }
1456
- // Auto-detect based on model name if set to 'auto' or not set
1457
- const modelName = this.currentModel.toLowerCase();
1458
- // Check for GLM-4.5 models (glm-4.5, glm-4-5)
1459
- if (modelName.includes('glm-4.5') || modelName.includes('glm-4-5')) {
1460
- return 'qwen';
662
+ // Try to get format from SettingsService if available
663
+ const settings = this.providerConfig?.getEphemeralSettings?.();
664
+ if (settings && settings['tool-format']) {
665
+ return settings['tool-format'];
1461
666
  }
1462
- // Check for qwen models
1463
- if (modelName.includes('qwen')) {
1464
- return 'qwen';
1465
- }
1466
- // Default to 'openai' format
1467
- return 'openai';
1468
667
  }
1469
668
  catch (error) {
1470
669
  this.logger.debug(() => `Failed to detect tool format from SettingsService: ${error}`);
1471
- // Fallback detection without SettingsService
1472
- const modelName = this.currentModel.toLowerCase();
1473
- if (modelName.includes('glm-4.5') || modelName.includes('glm-4-5')) {
1474
- return 'qwen';
1475
- }
1476
- if (modelName.includes('qwen')) {
1477
- return 'qwen';
1478
- }
1479
- return 'openai';
1480
670
  }
1481
- }
1482
- /**
1483
- * Get appropriate tool_choice value based on detected tool format
1484
- * @param tools Array of tools (if any)
1485
- * @returns Appropriate tool_choice value for the current format
1486
- */
1487
- getToolChoiceForFormat(tools) {
1488
- if (!tools || tools.length === 0) {
1489
- return undefined;
671
+ // Fallback detection without SettingsService - always look up current model
672
+ const modelName = (this.getModel() || this.getDefaultModel()).toLowerCase();
673
+ if (modelName.includes('glm-4.5') || modelName.includes('glm-4-5')) {
674
+ return 'qwen';
1490
675
  }
1491
- // For all formats, use 'auto' (standard behavior)
1492
- // Future enhancement: different formats may need different tool_choice values
1493
- return 'auto';
1494
- }
1495
- /**
1496
- * Format tools for API based on detected tool format
1497
- * @param tools Array of tools to format
1498
- * @returns Formatted tools for API consumption
1499
- */
1500
- formatToolsForAPI(tools) {
1501
- // For now, always use OpenAI format through OpenRouter
1502
- // TODO: Investigate if OpenRouter needs special handling for GLM/Qwen
1503
- // const detectedFormat = this.detectToolFormat();
1504
- // if (detectedFormat === 'qwen') {
1505
- // // Convert OpenAI format to Qwen format: {name, description, parameters} without type/function wrapper
1506
- // return tools.map((tool) => ({
1507
- // name: tool.function.name,
1508
- // description: tool.function.description,
1509
- // parameters: tool.function.parameters,
1510
- // }));
1511
- // }
1512
- // For all formats, use the existing ToolFormatter
1513
- return this.toolFormatter.toProviderFormat(tools, 'openai');
676
+ if (modelName.includes('qwen')) {
677
+ return 'qwen';
678
+ }
679
+ return 'openai';
1514
680
  }
1515
681
  /**
1516
682
  * Parse tool response from API (placeholder for future response parsing)
@@ -1522,5 +688,27 @@ export class OpenAIProvider extends BaseProvider {
1522
688
  // For now, return the response as-is
1523
689
  return response;
1524
690
  }
691
+ /**
692
+ * Determines whether a response should be retried based on error codes
693
+ * @param error The error object from the API response
694
+ * @returns true if the request should be retried, false otherwise
695
+ */
696
+ shouldRetryResponse(error) {
697
+ // Don't retry if we're streaming chunks - just continue processing
698
+ if (error &&
699
+ typeof error === 'object' &&
700
+ 'status' in error &&
701
+ error.status === 200) {
702
+ return false;
703
+ }
704
+ // Retry on 429 rate limit errors or 5xx server errors
705
+ const shouldRetry = Boolean(error &&
706
+ typeof error === 'object' &&
707
+ 'status' in error &&
708
+ (error.status === 429 ||
709
+ (error.status >= 500 &&
710
+ error.status < 600)));
711
+ return shouldRetry;
712
+ }
1525
713
  }
1526
714
  //# sourceMappingURL=OpenAIProvider.js.map