@vybestack/llxprt-code-core 0.1.23 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (149) hide show
  1. package/README.md +21 -17
  2. package/dist/src/adapters/IStreamAdapter.d.ts +3 -3
  3. package/dist/src/auth/oauth-errors.d.ts +173 -0
  4. package/dist/src/auth/oauth-errors.js +461 -0
  5. package/dist/src/auth/oauth-errors.js.map +1 -0
  6. package/dist/src/auth/precedence.d.ts +1 -5
  7. package/dist/src/auth/precedence.js +28 -48
  8. package/dist/src/auth/precedence.js.map +1 -1
  9. package/dist/src/auth/token-store.js +2 -2
  10. package/dist/src/auth/token-store.js.map +1 -1
  11. package/dist/src/auth/types.d.ts +4 -4
  12. package/dist/src/code_assist/codeAssist.js +19 -6
  13. package/dist/src/code_assist/codeAssist.js.map +1 -1
  14. package/dist/src/code_assist/oauth2.d.ts +7 -0
  15. package/dist/src/code_assist/oauth2.js +82 -32
  16. package/dist/src/code_assist/oauth2.js.map +1 -1
  17. package/dist/src/code_assist/server.js +15 -4
  18. package/dist/src/code_assist/server.js.map +1 -1
  19. package/dist/src/code_assist/setup.js +9 -0
  20. package/dist/src/code_assist/setup.js.map +1 -1
  21. package/dist/src/config/index.d.ts +7 -0
  22. package/dist/src/config/index.js +8 -0
  23. package/dist/src/config/index.js.map +1 -0
  24. package/dist/src/core/client.d.ts +15 -20
  25. package/dist/src/core/client.js +98 -124
  26. package/dist/src/core/client.js.map +1 -1
  27. package/dist/src/core/compression-config.d.ts +10 -0
  28. package/dist/src/core/compression-config.js +17 -0
  29. package/dist/src/core/compression-config.js.map +1 -0
  30. package/dist/src/core/coreToolScheduler.js +50 -15
  31. package/dist/src/core/coreToolScheduler.js.map +1 -1
  32. package/dist/src/core/geminiChat.d.ts +68 -9
  33. package/dist/src/core/geminiChat.js +940 -405
  34. package/dist/src/core/geminiChat.js.map +1 -1
  35. package/dist/src/core/nonInteractiveToolExecutor.js +70 -19
  36. package/dist/src/core/nonInteractiveToolExecutor.js.map +1 -1
  37. package/dist/src/core/prompts.js +35 -25
  38. package/dist/src/core/prompts.js.map +1 -1
  39. package/dist/src/core/turn.d.ts +1 -0
  40. package/dist/src/core/turn.js +8 -6
  41. package/dist/src/core/turn.js.map +1 -1
  42. package/dist/src/ide/ide-client.d.ts +1 -1
  43. package/dist/src/ide/ide-client.js +12 -6
  44. package/dist/src/ide/ide-client.js.map +1 -1
  45. package/dist/src/index.d.ts +4 -2
  46. package/dist/src/index.js +5 -2
  47. package/dist/src/index.js.map +1 -1
  48. package/dist/src/prompt-config/TemplateEngine.js +17 -0
  49. package/dist/src/prompt-config/TemplateEngine.js.map +1 -1
  50. package/dist/src/prompt-config/defaults/core-defaults.js +39 -32
  51. package/dist/src/prompt-config/defaults/core-defaults.js.map +1 -1
  52. package/dist/src/prompt-config/defaults/core.md +2 -0
  53. package/dist/src/prompt-config/defaults/provider-defaults.js +34 -27
  54. package/dist/src/prompt-config/defaults/provider-defaults.js.map +1 -1
  55. package/dist/src/prompt-config/defaults/providers/gemini/core.md +270 -0
  56. package/dist/src/prompt-config/defaults/providers/gemini/models/gemini-2.5-flash/core.md +12 -0
  57. package/dist/src/prompt-config/defaults/providers/gemini/models/gemini-2.5-flash/gemini-2-5-flash/core.md +12 -0
  58. package/dist/src/prompt-config/types.d.ts +2 -0
  59. package/dist/src/providers/BaseProvider.d.ts +39 -13
  60. package/dist/src/providers/BaseProvider.js +102 -28
  61. package/dist/src/providers/BaseProvider.js.map +1 -1
  62. package/dist/src/providers/IProvider.d.ts +17 -3
  63. package/dist/src/providers/LoggingProviderWrapper.d.ts +10 -3
  64. package/dist/src/providers/LoggingProviderWrapper.js +33 -27
  65. package/dist/src/providers/LoggingProviderWrapper.js.map +1 -1
  66. package/dist/src/providers/ProviderContentGenerator.d.ts +2 -2
  67. package/dist/src/providers/ProviderContentGenerator.js +9 -6
  68. package/dist/src/providers/ProviderContentGenerator.js.map +1 -1
  69. package/dist/src/providers/ProviderManager.d.ts +4 -0
  70. package/dist/src/providers/ProviderManager.js +6 -0
  71. package/dist/src/providers/ProviderManager.js.map +1 -1
  72. package/dist/src/providers/anthropic/AnthropicProvider.d.ts +34 -21
  73. package/dist/src/providers/anthropic/AnthropicProvider.js +505 -492
  74. package/dist/src/providers/anthropic/AnthropicProvider.js.map +1 -1
  75. package/dist/src/providers/gemini/GeminiProvider.d.ts +23 -9
  76. package/dist/src/providers/gemini/GeminiProvider.js +344 -515
  77. package/dist/src/providers/gemini/GeminiProvider.js.map +1 -1
  78. package/dist/src/providers/openai/ConversationCache.d.ts +3 -3
  79. package/dist/src/providers/openai/IChatGenerateParams.d.ts +9 -4
  80. package/dist/src/providers/openai/OpenAIProvider.d.ts +46 -96
  81. package/dist/src/providers/openai/OpenAIProvider.js +532 -1393
  82. package/dist/src/providers/openai/OpenAIProvider.js.map +1 -1
  83. package/dist/src/providers/openai/buildResponsesRequest.d.ts +3 -3
  84. package/dist/src/providers/openai/buildResponsesRequest.js +67 -37
  85. package/dist/src/providers/openai/buildResponsesRequest.js.map +1 -1
  86. package/dist/src/providers/openai/estimateRemoteTokens.d.ts +2 -2
  87. package/dist/src/providers/openai/estimateRemoteTokens.js +21 -8
  88. package/dist/src/providers/openai/estimateRemoteTokens.js.map +1 -1
  89. package/dist/src/providers/openai/parseResponsesStream.d.ts +6 -2
  90. package/dist/src/providers/openai/parseResponsesStream.js +99 -391
  91. package/dist/src/providers/openai/parseResponsesStream.js.map +1 -1
  92. package/dist/src/providers/openai/syntheticToolResponses.d.ts +5 -5
  93. package/dist/src/providers/openai/syntheticToolResponses.js +102 -91
  94. package/dist/src/providers/openai/syntheticToolResponses.js.map +1 -1
  95. package/dist/src/providers/openai-responses/OpenAIResponsesProvider.d.ts +89 -0
  96. package/dist/src/providers/openai-responses/OpenAIResponsesProvider.js +451 -0
  97. package/dist/src/providers/openai-responses/OpenAIResponsesProvider.js.map +1 -0
  98. package/dist/src/providers/openai-responses/index.d.ts +1 -0
  99. package/dist/src/providers/openai-responses/index.js +2 -0
  100. package/dist/src/providers/openai-responses/index.js.map +1 -0
  101. package/dist/src/providers/tokenizers/OpenAITokenizer.js +3 -3
  102. package/dist/src/providers/tokenizers/OpenAITokenizer.js.map +1 -1
  103. package/dist/src/providers/types.d.ts +1 -1
  104. package/dist/src/services/ClipboardService.d.ts +19 -0
  105. package/dist/src/services/ClipboardService.js +66 -0
  106. package/dist/src/services/ClipboardService.js.map +1 -0
  107. package/dist/src/services/history/ContentConverters.d.ts +43 -0
  108. package/dist/src/services/history/ContentConverters.js +325 -0
  109. package/dist/src/services/history/ContentConverters.js.map +1 -0
  110. package/dist/src/{providers/IMessage.d.ts → services/history/HistoryEvents.d.ts} +16 -22
  111. package/dist/src/{providers/IMessage.js → services/history/HistoryEvents.js} +1 -1
  112. package/dist/src/services/history/HistoryEvents.js.map +1 -0
  113. package/dist/src/services/history/HistoryService.d.ts +220 -0
  114. package/dist/src/services/history/HistoryService.js +673 -0
  115. package/dist/src/services/history/HistoryService.js.map +1 -0
  116. package/dist/src/services/history/IContent.d.ts +183 -0
  117. package/dist/src/services/history/IContent.js +104 -0
  118. package/dist/src/services/history/IContent.js.map +1 -0
  119. package/dist/src/services/index.d.ts +1 -0
  120. package/dist/src/services/index.js +1 -0
  121. package/dist/src/services/index.js.map +1 -1
  122. package/dist/src/telemetry/types.d.ts +16 -4
  123. package/dist/src/telemetry/types.js.map +1 -1
  124. package/dist/src/tools/IToolFormatter.d.ts +2 -2
  125. package/dist/src/tools/ToolFormatter.d.ts +42 -4
  126. package/dist/src/tools/ToolFormatter.js +159 -37
  127. package/dist/src/tools/ToolFormatter.js.map +1 -1
  128. package/dist/src/tools/doubleEscapeUtils.d.ts +57 -0
  129. package/dist/src/tools/doubleEscapeUtils.js +241 -0
  130. package/dist/src/tools/doubleEscapeUtils.js.map +1 -0
  131. package/dist/src/tools/read-file.d.ts +6 -1
  132. package/dist/src/tools/read-file.js +25 -11
  133. package/dist/src/tools/read-file.js.map +1 -1
  134. package/dist/src/tools/todo-schemas.d.ts +4 -4
  135. package/dist/src/tools/tools.js +13 -0
  136. package/dist/src/tools/tools.js.map +1 -1
  137. package/dist/src/tools/write-file.d.ts +6 -1
  138. package/dist/src/tools/write-file.js +48 -26
  139. package/dist/src/tools/write-file.js.map +1 -1
  140. package/dist/src/types/modelParams.d.ts +8 -0
  141. package/dist/src/utils/bfsFileSearch.js +2 -6
  142. package/dist/src/utils/bfsFileSearch.js.map +1 -1
  143. package/dist/src/utils/schemaValidator.js +16 -1
  144. package/dist/src/utils/schemaValidator.js.map +1 -1
  145. package/package.json +8 -7
  146. package/dist/src/providers/IMessage.js.map +0 -1
  147. package/dist/src/providers/adapters/GeminiCompatibleWrapper.d.ts +0 -69
  148. package/dist/src/providers/adapters/GeminiCompatibleWrapper.js +0 -577
  149. package/dist/src/providers/adapters/GeminiCompatibleWrapper.js.map +0 -1
@@ -17,463 +17,157 @@
17
17
  * @plan PLAN-20250120-DEBUGLOGGING.P15
18
18
  * @requirement REQ-INT-001.1
19
19
  */
20
- import { DebugLogger } from '../../debug/index.js';
21
- import { ContentGeneratorRole } from '../ContentGeneratorRole.js';
22
- import { GemmaToolCallParser } from '../../parsers/TextToolCallParser.js';
23
- import { ToolFormatter } from '../../tools/ToolFormatter.js';
24
20
  import OpenAI from 'openai';
25
- import { RESPONSES_API_MODELS } from './RESPONSES_API_MODELS.js';
26
- import { ConversationCache } from './ConversationCache.js';
27
- import { estimateMessagesTokens, estimateRemoteTokens, } from './estimateRemoteTokens.js';
28
- // ConversationContext removed - using inline conversation ID generation
29
- import { parseResponsesStream, parseErrorResponse, } from './parseResponsesStream.js';
30
- import { buildResponsesRequest } from './buildResponsesRequest.js';
21
+ import * as http from 'http';
22
+ import * as https from 'https';
23
+ import * as net from 'net';
31
24
  import { BaseProvider } from '../BaseProvider.js';
32
- import { isQwenEndpoint, generateOAuthEndpointMismatchError, } from '../../config/endpoints.js';
33
- import { getSettingsService } from '../../settings/settingsServiceInstance.js';
25
+ import { DebugLogger } from '../../debug/index.js';
26
+ import { ToolFormatter } from '../../tools/ToolFormatter.js';
27
+ import { processToolParameters } from '../../tools/doubleEscapeUtils.js';
28
+ import { getCoreSystemPromptAsync } from '../../core/prompts.js';
34
29
  export class OpenAIProvider extends BaseProvider {
30
+ name = 'openai';
35
31
  logger;
36
- openai;
37
- currentModel = process.env.LLXPRT_DEFAULT_MODEL || 'gpt-5';
38
- baseURL;
39
- providerConfig;
40
32
  toolFormatter;
41
- toolFormatOverride;
42
- conversationCache;
43
- modelParams;
44
33
  _cachedClient;
45
34
  _cachedClientKey;
46
35
  constructor(apiKey, baseURL, config, oauthManager) {
36
+ // Normalize empty string to undefined for proper precedence handling
37
+ const normalizedApiKey = apiKey && apiKey.trim() !== '' ? apiKey : undefined;
38
+ // Detect if this is a Qwen endpoint
39
+ const isQwenEndpoint = !!(baseURL &&
40
+ (baseURL.includes('dashscope.aliyuncs.com') ||
41
+ baseURL.includes('api.qwen.com') ||
42
+ baseURL.includes('qwen')));
47
43
  // Initialize base provider with auth configuration
48
- // Check if we should enable OAuth for Qwen
49
- // Check OAuth enablement from OAuth manager if available
50
- let shouldEnableQwenOAuth = false;
51
- if (oauthManager) {
52
- // Check if OAuth is enabled for qwen in the OAuth manager (from settings)
53
- const manager = oauthManager;
54
- if (manager.isOAuthEnabled &&
55
- typeof manager.isOAuthEnabled === 'function') {
56
- shouldEnableQwenOAuth = manager.isOAuthEnabled('qwen');
57
- }
58
- // Also enable if this looks like a Qwen endpoint
59
- if (!shouldEnableQwenOAuth) {
60
- shouldEnableQwenOAuth =
61
- isQwenEndpoint(baseURL || '') ||
62
- (!baseURL && (!apiKey || apiKey === '')) ||
63
- baseURL === 'https://portal.qwen.ai/v1';
64
- }
65
- }
66
- const baseConfig = {
44
+ super({
67
45
  name: 'openai',
68
- apiKey,
46
+ apiKey: normalizedApiKey,
69
47
  baseURL,
70
- cliKey: !apiKey || apiKey === '' ? undefined : apiKey, // Don't set cliKey if no API key to allow OAuth
71
- envKeyNames: ['OPENAI_API_KEY'],
72
- isOAuthEnabled: shouldEnableQwenOAuth,
73
- oauthProvider: shouldEnableQwenOAuth ? 'qwen' : undefined,
48
+ envKeyNames: ['OPENAI_API_KEY'], // Support environment variable fallback
49
+ isOAuthEnabled: isQwenEndpoint && !!oauthManager,
50
+ oauthProvider: isQwenEndpoint ? 'qwen' : undefined,
74
51
  oauthManager,
75
- };
76
- super(baseConfig);
77
- this.logger = new DebugLogger('llxprt:providers:openai');
78
- this.logger.debug(() => `Constructor - baseURL: ${baseURL}, apiKey: ${apiKey?.substring(0, 10) || 'none'}, oauthManager: ${!!oauthManager}, shouldEnableQwenOAuth: ${shouldEnableQwenOAuth}`);
79
- this.baseURL = baseURL;
80
- this.providerConfig = config;
52
+ }, config);
81
53
  this.toolFormatter = new ToolFormatter();
82
- this.conversationCache = new ConversationCache();
83
- // Initialize from SettingsService
84
- this.initializeFromSettings().catch((error) => {
85
- this.logger.debug(() => `Failed to initialize from SettingsService: ${error}`);
86
- });
87
- // Set appropriate default model based on the provider
88
- if (shouldEnableQwenOAuth || isQwenEndpoint(baseURL || '')) {
89
- // Default to Qwen model when using Qwen endpoints
90
- this.currentModel = 'qwen3-coder-plus';
91
- }
92
- else if (process.env.LLXPRT_DEFAULT_MODEL) {
93
- // Use environment variable if set
94
- this.currentModel = process.env.LLXPRT_DEFAULT_MODEL;
95
- }
96
- const clientOptions = {
97
- apiKey: apiKey || 'placeholder', // OpenAI client requires a string, use placeholder if OAuth will be used
98
- // Allow browser environment if explicitly configured
99
- dangerouslyAllowBrowser: config?.allowBrowserEnvironment || false,
100
- };
101
- // Only include baseURL if it's defined
102
- if (baseURL) {
103
- clientOptions.baseURL = baseURL;
104
- }
105
- this.openai = new OpenAI(clientOptions);
106
- this._cachedClientKey = apiKey; // Track the initial key used
107
- // Cached client reserved for future optimization
108
- void this._cachedClient;
109
- }
110
- /**
111
- * Implementation of BaseProvider abstract method
112
- * Determines if this provider supports OAuth authentication
113
- */
114
- supportsOAuth() {
115
- // Only support Qwen OAuth for Qwen endpoints
116
- // Use baseProviderConfig.baseURL if this.baseURL not set yet (during constructor)
117
- const baseURL = this.baseURL ||
118
- this.baseProviderConfig.baseURL ||
119
- 'https://api.openai.com/v1';
120
- return isQwenEndpoint(baseURL);
54
+ // new DebugLogger('llxprt:core:toolformatter'), // TODO: Fix ToolFormatter constructor
55
+ // Setup debug logger
56
+ this.logger = new DebugLogger('llxprt:provider:openai');
121
57
  }
122
58
  /**
123
- * Helper method to determine if we're using Qwen (via OAuth or direct endpoint)
59
+ * Create HTTP/HTTPS agents with socket configuration for local AI servers
60
+ * Returns undefined if no socket settings are configured
124
61
  */
125
- isUsingQwen() {
126
- // Check if we're using qwen format based on tool format detection
127
- const toolFormat = this.detectToolFormat();
128
- return toolFormat === 'qwen';
129
- }
130
- /**
131
- * Update the OpenAI client with resolved authentication if needed
132
- */
133
- async updateClientWithResolvedAuth() {
134
- const resolvedKey = await this.getAuthToken();
135
- if (!resolvedKey) {
136
- // Provide specific error message based on endpoint validation
137
- const endpoint = this.baseURL || 'https://api.openai.com/v1';
138
- if (this.isOAuthEnabled() && !this.supportsOAuth()) {
139
- throw new Error(generateOAuthEndpointMismatchError(endpoint, 'qwen'));
140
- }
141
- throw new Error('No authentication available for OpenAI API calls');
142
- }
143
- // Check if we're using Qwen OAuth and need to update the baseURL
144
- let effectiveBaseURL = this.baseURL;
145
- this.logger.debug(() => `updateClientWithResolvedAuth - OAuth enabled: ${this.isOAuthEnabled()}, OAuth provider: ${this.baseProviderConfig.oauthProvider}, baseURL: ${this.baseURL}, resolvedKey: ${resolvedKey?.substring(0, 10)}...`);
146
- if (this.isOAuthEnabled() &&
147
- this.baseProviderConfig.oauthProvider === 'qwen') {
148
- // Get the OAuth token to check for resource_url
149
- const oauthManager = this.baseProviderConfig.oauthManager;
150
- if (oauthManager?.getOAuthToken) {
151
- const oauthToken = await oauthManager.getOAuthToken('qwen');
152
- this.logger.debug(() => `OAuth token retrieved, resource_url: ${oauthToken?.resource_url}, access_token: ${oauthToken?.access_token?.substring(0, 10)}...`);
153
- if (oauthToken?.resource_url) {
154
- // Use the resource_url from the OAuth token
155
- effectiveBaseURL = `https://${oauthToken.resource_url}/v1`;
156
- this.logger.debug(() => `Using Qwen OAuth endpoint: ${effectiveBaseURL}`);
157
- }
158
- }
159
- }
160
- // Only update client if the key or URL has changed
161
- if (this._cachedClientKey !== resolvedKey ||
162
- this.baseURL !== effectiveBaseURL) {
163
- const clientOptions = {
164
- apiKey: resolvedKey,
165
- // Allow browser environment if explicitly configured
166
- dangerouslyAllowBrowser: this.providerConfig?.allowBrowserEnvironment || false,
167
- };
168
- // Only include baseURL if it's defined
169
- if (effectiveBaseURL) {
170
- clientOptions.baseURL = effectiveBaseURL;
171
- }
172
- this.openai = new OpenAI(clientOptions);
173
- this._cachedClientKey = resolvedKey;
174
- // Update the baseURL to track changes
175
- if (effectiveBaseURL !== this.baseURL) {
176
- this.baseURL = effectiveBaseURL;
177
- }
178
- }
179
- }
180
- requiresTextToolCallParsing() {
181
- if (this.providerConfig?.enableTextToolCallParsing === false) {
182
- return false;
183
- }
184
- // Check if current tool format requires text-based parsing
185
- const currentFormat = this.getToolFormat();
186
- const textBasedFormats = ['hermes', 'xml', 'llama'];
187
- if (textBasedFormats.includes(currentFormat)) {
188
- return true;
189
- }
190
- const configuredModels = this.providerConfig?.textToolCallModels || [];
191
- return configuredModels.includes(this.currentModel);
192
- }
193
- getToolFormat() {
194
- // Check manual override first
195
- if (this.toolFormatOverride) {
196
- return this.toolFormatOverride;
197
- }
198
- // Check for settings override
199
- if (this.providerConfig?.providerToolFormatOverrides?.[this.name]) {
200
- return this.providerConfig.providerToolFormatOverrides[this.name];
201
- }
202
- // Auto-detect tool format based on model or base URL
203
- if (this.currentModel.includes('deepseek') ||
204
- this.baseURL?.includes('deepseek')) {
205
- return 'deepseek';
206
- }
207
- // Check for Qwen - including OAuth authenticated Qwen
208
- if (this.isUsingQwen()) {
209
- return 'qwen';
210
- }
211
- // Default to OpenAI format
212
- return 'openai';
213
- }
214
- shouldUseResponses(model) {
215
- // Check env flag override (highest priority)
216
- if (process.env.OPENAI_RESPONSES_DISABLE === 'true') {
217
- return false;
218
- }
219
- // Check settings override - if explicitly set to false, always respect that
220
- if (this.providerConfig?.openaiResponsesEnabled === false) {
221
- return false;
222
- }
223
- // Never use Responses API for non-OpenAI providers (those with custom base URLs)
224
- const baseURL = this.baseURL || 'https://api.openai.com/v1';
225
- if (baseURL !== 'https://api.openai.com/v1') {
226
- return false;
227
- }
228
- // Default: Check if model starts with any of the responses API model prefixes
229
- return RESPONSES_API_MODELS.some((responsesModel) => model.startsWith(responsesModel));
230
- }
231
- async callResponsesEndpoint(messages, tools, options) {
232
- // Check if API key is available (using resolved authentication)
233
- const apiKey = await this.getAuthToken();
234
- if (!apiKey) {
235
- const endpoint = this.baseURL || 'https://api.openai.com/v1';
236
- if (this.isOAuthEnabled() && !this.supportsOAuth()) {
237
- throw new Error(generateOAuthEndpointMismatchError(endpoint, 'qwen'));
238
- }
239
- throw new Error('OpenAI API key is required to make API calls');
240
- }
241
- // Remove the stateful mode error to allow O3 to work with conversation IDs
242
- // Check context usage and warn if getting close to limit
243
- if (options?.conversationId && options?.parentId) {
244
- const contextInfo = this.estimateContextUsage(options.conversationId, options.parentId, messages);
245
- // Warn if less than 4k tokens remaining
246
- if (contextInfo.tokensRemaining < 4000) {
247
- this.logger.debug(() => `Warning: Only ${contextInfo.tokensRemaining} tokens remaining (${contextInfo.contextUsedPercent.toFixed(1)}% context used). Consider starting a new conversation.`);
248
- }
249
- }
250
- // Check cache for existing conversation
251
- if (options?.conversationId && options?.parentId) {
252
- const cachedMessages = this.conversationCache.get(options.conversationId, options.parentId);
253
- if (cachedMessages) {
254
- // Return cached messages as an async iterable
255
- return (async function* () {
256
- for (const message of cachedMessages) {
257
- yield message;
258
- }
259
- })();
260
- }
62
+ createHttpAgents() {
63
+ // Get socket configuration from ephemeral settings
64
+ const settings = this.providerConfig?.getEphemeralSettings?.() || {};
65
+ // Check if any socket settings are explicitly configured
66
+ const hasSocketSettings = 'socket-timeout' in settings ||
67
+ 'socket-keepalive' in settings ||
68
+ 'socket-nodelay' in settings;
69
+ // Only create custom agents if socket settings are configured
70
+ if (!hasSocketSettings) {
71
+ return undefined;
261
72
  }
262
- // Format tools for Responses API
263
- const formattedTools = tools
264
- ? this.toolFormatter.toResponsesTool(tools)
265
- : undefined;
266
- // Patch messages to include synthetic responses for cancelled tools
267
- const { SyntheticToolResponseHandler } = await import('./syntheticToolResponses.js');
268
- const patchedMessages = SyntheticToolResponseHandler.patchMessageHistory(messages);
269
- // Build the request
270
- const request = buildResponsesRequest({
271
- model: this.currentModel,
272
- messages: patchedMessages,
273
- tools: formattedTools,
274
- stream: options?.stream ?? true,
275
- conversationId: options?.conversationId,
276
- parentId: options?.parentId,
277
- tool_choice: options?.tool_choice,
73
+ // Socket configuration with defaults for when settings ARE configured
74
+ const socketTimeout = settings['socket-timeout'] || 60000; // 60 seconds default
75
+ const socketKeepAlive = settings['socket-keepalive'] !== false; // true by default
76
+ const socketNoDelay = settings['socket-nodelay'] !== false; // true by default
77
+ // Create HTTP agent with socket options
78
+ const httpAgent = new http.Agent({
79
+ keepAlive: socketKeepAlive,
80
+ keepAliveMsecs: 1000,
81
+ timeout: socketTimeout,
278
82
  });
279
- // Make the API call
280
- const baseURL = this.baseURL || 'https://api.openai.com/v1';
281
- const responsesURL = `${baseURL}/responses`;
282
- // Ensure proper UTF-8 encoding for the request body
283
- // This is crucial for handling multibyte characters (e.g., Japanese, Chinese)
284
- const requestBody = JSON.stringify(request);
285
- const bodyBlob = new Blob([requestBody], {
286
- type: 'application/json; charset=utf-8',
83
+ // Create HTTPS agent with socket options
84
+ const httpsAgent = new https.Agent({
85
+ keepAlive: socketKeepAlive,
86
+ keepAliveMsecs: 1000,
87
+ timeout: socketTimeout,
287
88
  });
288
- const response = await fetch(responsesURL, {
289
- method: 'POST',
290
- headers: {
291
- Authorization: `Bearer ${apiKey}`,
292
- 'Content-Type': 'application/json; charset=utf-8',
293
- },
294
- body: bodyBlob,
295
- });
296
- // Handle errors
297
- if (!response.ok) {
298
- const errorBody = await response.text();
299
- // Handle 422 context_length_exceeded error
300
- if (response.status === 422 &&
301
- errorBody.includes('context_length_exceeded')) {
302
- this.logger.debug(() => 'Context length exceeded, invalidating cache and retrying stateless...');
303
- // Invalidate the cache for this conversation
304
- if (options?.conversationId && options?.parentId) {
305
- this.conversationCache.invalidate(options.conversationId, options.parentId);
306
- }
307
- // Retry without conversation context (pure stateless)
308
- const retryRequest = buildResponsesRequest({
309
- model: this.currentModel,
310
- messages,
311
- tools: formattedTools,
312
- stream: options?.stream ?? true,
313
- // Omit conversationId and parentId for stateless retry
314
- tool_choice: options?.tool_choice,
315
- });
316
- // Ensure proper UTF-8 encoding for retry request as well
317
- const retryRequestBody = JSON.stringify(retryRequest);
318
- const retryBodyBlob = new Blob([retryRequestBody], {
319
- type: 'application/json; charset=utf-8',
320
- });
321
- const retryResponse = await fetch(responsesURL, {
322
- method: 'POST',
323
- headers: {
324
- Authorization: `Bearer ${apiKey}`,
325
- 'Content-Type': 'application/json; charset=utf-8',
326
- },
327
- body: retryBodyBlob,
328
- });
329
- if (!retryResponse.ok) {
330
- const retryErrorBody = await retryResponse.text();
331
- throw parseErrorResponse(retryResponse.status, retryErrorBody, this.name);
332
- }
333
- // Use the retry response
334
- return this.handleResponsesApiResponse(retryResponse, messages, undefined, // No conversation context on retry
335
- undefined, options?.stream !== false);
336
- }
337
- throw parseErrorResponse(response.status, errorBody, this.name);
338
- }
339
- // Handle the response
340
- return this.handleResponsesApiResponse(response, messages, options?.conversationId, options?.parentId, options?.stream !== false);
341
- }
342
- async handleResponsesApiResponse(response, messages, conversationId, parentId, isStreaming) {
343
- // Handle streaming response
344
- if (isStreaming && response.body) {
345
- const collectedMessages = [];
346
- const cache = this.conversationCache;
347
- return (async function* () {
348
- for await (const message of parseResponsesStream(response.body)) {
349
- // Collect messages for caching
350
- if (message.content || message.tool_calls) {
351
- collectedMessages.push(message);
352
- }
353
- else if (message.usage && collectedMessages.length === 0) {
354
- // If we only got a usage message with no content, add a placeholder
355
- collectedMessages.push({
356
- role: ContentGeneratorRole.ASSISTANT,
357
- content: '',
358
- });
359
- }
360
- // Update the parentId in the context as soon as we get a message ID
361
- if (message.id) {
362
- // ConversationContext.setParentId(message.id);
363
- // TODO: Handle parent ID updates when ConversationContext is available
364
- }
365
- yield message;
366
- }
367
- // Cache the collected messages with token count
368
- if (conversationId && parentId && collectedMessages.length > 0) {
369
- // Get previous accumulated tokens
370
- const previousTokens = cache.getAccumulatedTokens(conversationId, parentId);
371
- // Calculate tokens for this request (messages + response)
372
- const requestTokens = estimateMessagesTokens(messages);
373
- const responseTokens = estimateMessagesTokens(collectedMessages);
374
- const totalTokensForRequest = requestTokens + responseTokens;
375
- // Update cache with new accumulated total
376
- cache.set(conversationId, parentId, collectedMessages, previousTokens + totalTokensForRequest);
377
- }
378
- })();
379
- }
380
- const data = (await response.json());
381
- const resultMessages = [];
382
- // DEFENSIVE FIX: Handle potential array response from providers that violate OpenAI spec
383
- // Some providers (like Cerebras) may return an array of responses instead of a single response
384
- if (Array.isArray(data)) {
385
- this.logger.error(() => '[Cerebras Corruption] Detected malformed array response from provider, aggregating...', {
386
- provider: this.baseURL,
387
- arrayLength: data.length,
388
- });
389
- const aggregatedContent = [];
390
- let aggregatedToolCalls = [];
391
- let aggregatedUsage = undefined;
392
- for (const item of data) {
393
- if (item.choices?.[0]?.message?.content) {
394
- aggregatedContent.push(item.choices[0].message.content);
89
+ // Apply TCP_NODELAY if enabled (reduces latency for local servers)
90
+ if (socketNoDelay) {
91
+ const originalCreateConnection = httpAgent.createConnection;
92
+ httpAgent.createConnection = function (options, callback) {
93
+ const socket = originalCreateConnection.call(this, options, callback);
94
+ if (socket instanceof net.Socket) {
95
+ socket.setNoDelay(true);
395
96
  }
396
- if (item.choices?.[0]?.message?.tool_calls) {
397
- aggregatedToolCalls = item.choices[0].message.tool_calls;
398
- }
399
- if (item.usage) {
400
- aggregatedUsage = item.usage;
401
- }
402
- }
403
- const message = {
404
- role: ContentGeneratorRole.ASSISTANT,
405
- content: aggregatedContent.join(''),
97
+ return socket;
406
98
  };
407
- if (aggregatedToolCalls.length > 0) {
408
- message.tool_calls = aggregatedToolCalls;
409
- }
410
- if (aggregatedUsage) {
411
- message.usage = {
412
- prompt_tokens: aggregatedUsage.prompt_tokens || 0,
413
- completion_tokens: aggregatedUsage.completion_tokens || 0,
414
- total_tokens: aggregatedUsage.total_tokens || 0,
415
- };
416
- }
417
- resultMessages.push(message);
418
- // Convert to async iterator for consistent return type
419
- return (async function* () {
420
- for (const msg of resultMessages) {
421
- yield msg;
99
+ const originalHttpsCreateConnection = httpsAgent.createConnection;
100
+ httpsAgent.createConnection = function (options, callback) {
101
+ const socket = originalHttpsCreateConnection.call(this, options, callback);
102
+ if (socket instanceof net.Socket) {
103
+ socket.setNoDelay(true);
422
104
  }
423
- })();
424
- }
425
- if (data.choices && data.choices.length > 0) {
426
- const choice = data.choices[0];
427
- const message = {
428
- role: choice.message.role,
429
- content: choice.message.content || '',
105
+ return socket;
430
106
  };
431
- if (choice.message.tool_calls) {
432
- message.tool_calls = choice.message.tool_calls;
433
- }
434
- if (data.usage) {
435
- message.usage = {
436
- prompt_tokens: data.usage.prompt_tokens || 0,
437
- completion_tokens: data.usage.completion_tokens || 0,
438
- total_tokens: data.usage.total_tokens || 0,
439
- };
440
- }
441
- resultMessages.push(message);
442
107
  }
443
- // Cache the result with token count
444
- if (conversationId && parentId && resultMessages.length > 0) {
445
- // Get previous accumulated tokens
446
- const previousTokens = this.conversationCache.getAccumulatedTokens(conversationId, parentId);
447
- // Calculate tokens for this request
448
- const requestTokens = estimateMessagesTokens(messages);
449
- const responseTokens = estimateMessagesTokens(resultMessages);
450
- const totalTokensForRequest = requestTokens + responseTokens;
451
- // Update cache with new accumulated total
452
- this.conversationCache.set(conversationId, parentId, resultMessages, previousTokens + totalTokensForRequest);
108
+ return { httpAgent, httpsAgent };
109
+ }
110
+ /**
111
+ * Get or create OpenAI client instance
112
+ * Will use the API key from resolved auth
113
+ * @returns OpenAI client instance
114
+ */
115
+ async getClient() {
116
+ const resolvedKey = await this.getAuthToken();
117
+ // Use the unified getBaseURL() method from BaseProvider
118
+ const baseURL = this.getBaseURL();
119
+ const clientKey = `${baseURL}-${resolvedKey}`;
120
+ // Return cached client if available and auth hasn't changed
121
+ if (this._cachedClient && this._cachedClientKey === clientKey) {
122
+ return this._cachedClient;
123
+ }
124
+ // Create HTTP agents with socket configuration (if configured)
125
+ const agents = this.createHttpAgents();
126
+ // Build client options - OpenAI SDK accepts httpAgent/httpsAgent at runtime
127
+ // even though they're not in the TypeScript definitions
128
+ const baseOptions = {
129
+ apiKey: resolvedKey || '',
130
+ baseURL,
131
+ };
132
+ // Add socket configuration if available
133
+ const clientOptions = agents
134
+ ? {
135
+ ...baseOptions,
136
+ httpAgent: agents.httpAgent,
137
+ httpsAgent: agents.httpsAgent,
138
+ }
139
+ : baseOptions;
140
+ // Create new client with current auth and optional socket configuration
141
+ // Cast to unknown then to the expected type to bypass TypeScript's structural checking
142
+ this._cachedClient = new OpenAI(clientOptions);
143
+ this._cachedClientKey = clientKey;
144
+ return this._cachedClient;
145
+ }
146
+ /**
147
+ * Check if OAuth is supported for this provider
148
+ * Qwen endpoints support OAuth, standard OpenAI does not
149
+ */
150
+ supportsOAuth() {
151
+ const baseURL = this.getBaseURL();
152
+ // Check if this is a Qwen endpoint that supports OAuth
153
+ if (baseURL &&
154
+ (baseURL.includes('dashscope.aliyuncs.com') ||
155
+ baseURL.includes('api.qwen.com') ||
156
+ baseURL.includes('qwen'))) {
157
+ return true;
453
158
  }
454
- return (async function* () {
455
- for (const message of resultMessages) {
456
- yield message;
457
- }
458
- })();
159
+ // Standard OpenAI endpoints don't support OAuth
160
+ return false;
459
161
  }
460
162
  async getModels() {
461
- // Check if API key is available (using resolved authentication)
462
- const apiKey = await this.getAuthToken();
463
- if (!apiKey) {
464
- const endpoint = this.baseURL || 'https://api.openai.com/v1';
465
- if (this.isOAuthEnabled() && !this.supportsOAuth()) {
466
- throw new Error(generateOAuthEndpointMismatchError(endpoint, 'qwen'));
467
- }
468
- throw new Error('OpenAI API key is required to fetch models');
469
- }
470
163
  try {
471
- // Get resolved authentication and update client if needed
472
- await this.updateClientWithResolvedAuth();
473
- const response = await this.openai.models.list();
164
+ // Always try to fetch models, regardless of auth status
165
+ // Local endpoints often work without authentication
166
+ const client = await this.getClient();
167
+ const response = await client.models.list();
474
168
  const models = [];
475
169
  for await (const model of response) {
476
- // Filter out non-chat models (embeddings, audio, image, moderation, DALL·E, etc.)
170
+ // Filter out non-chat models (embeddings, audio, image, vision, DALL·E, etc.)
477
171
  if (!/embedding|whisper|audio|tts|image|vision|dall[- ]?e|moderation/i.test(model.id)) {
478
172
  models.push({
479
173
  id: model.id,
@@ -488,1029 +182,452 @@ export class OpenAIProvider extends BaseProvider {
488
182
  catch (error) {
489
183
  this.logger.debug(() => `Error fetching models from OpenAI: ${error}`);
490
184
  // Return a hardcoded list as fallback
491
- // Check if this is a Qwen endpoint
492
- if (isQwenEndpoint(this.baseURL || '')) {
493
- return [
494
- {
495
- id: 'qwen3-coder-plus',
496
- name: 'qwen3-coder-plus',
497
- provider: 'openai',
498
- supportedToolFormats: ['openai'],
499
- },
500
- ];
501
- }
502
- // Default OpenAI models
503
- return [
504
- {
505
- id: 'gpt-4o',
506
- name: 'gpt-4o',
507
- provider: 'openai',
508
- supportedToolFormats: ['openai'],
509
- },
510
- {
511
- id: 'gpt-4o-mini',
512
- name: 'gpt-4o-mini',
513
- provider: 'openai',
514
- supportedToolFormats: ['openai'],
515
- },
516
- {
517
- id: 'gpt-4-turbo',
518
- name: 'gpt-4-turbo',
519
- provider: 'openai',
520
- supportedToolFormats: ['openai'],
521
- },
522
- {
523
- id: 'gpt-3.5-turbo',
524
- name: 'gpt-3.5-turbo',
525
- provider: 'openai',
526
- supportedToolFormats: ['openai'],
527
- },
528
- ];
185
+ return this.getFallbackModels();
529
186
  }
530
187
  }
531
- async *generateChatCompletion(messages, tools, _toolFormat) {
532
- // Check if API key is available (using resolved authentication)
533
- const apiKey = await this.getAuthToken();
534
- if (!apiKey) {
535
- const endpoint = this.baseURL || 'https://api.openai.com/v1';
536
- if (this.isOAuthEnabled() && !this.supportsOAuth()) {
537
- throw new Error(generateOAuthEndpointMismatchError(endpoint, 'qwen'));
538
- }
539
- throw new Error('OpenAI API key is required to generate completions');
188
+ getFallbackModels() {
189
+ return [
190
+ {
191
+ id: 'gpt-5',
192
+ name: 'GPT-5',
193
+ provider: 'openai',
194
+ supportedToolFormats: ['openai'],
195
+ },
196
+ {
197
+ id: 'gpt-4.1',
198
+ name: 'GPT-4.1',
199
+ provider: 'openai',
200
+ supportedToolFormats: ['openai'],
201
+ },
202
+ {
203
+ id: 'gpt-4o',
204
+ name: 'GPT-4o',
205
+ provider: 'openai',
206
+ supportedToolFormats: ['openai'],
207
+ },
208
+ {
209
+ id: 'o3',
210
+ name: 'O3',
211
+ provider: 'openai',
212
+ supportedToolFormats: ['openai'],
213
+ },
214
+ {
215
+ id: 'o4-mini',
216
+ name: 'O4 Mini',
217
+ provider: 'openai',
218
+ supportedToolFormats: ['openai'],
219
+ },
220
+ {
221
+ id: 'gpt-3.5-turbo',
222
+ name: 'GPT-3.5 Turbo (Legacy)',
223
+ provider: 'openai',
224
+ supportedToolFormats: ['openai'],
225
+ },
226
+ ];
227
+ }
228
+ getDefaultModel() {
229
+ // Return hardcoded default - do NOT call getModel() to avoid circular dependency
230
+ return process.env.LLXPRT_DEFAULT_MODEL || 'gpt-5';
231
+ }
232
+ getServerTools() {
233
+ // TODO: Implement server tools for OpenAI provider
234
+ return [];
235
+ }
236
+ async invokeServerTool(toolName, _params, _config) {
237
+ // TODO: Implement server tool invocation for OpenAI provider
238
+ throw new Error(`Server tool '${toolName}' not supported by OpenAI provider`);
239
+ }
240
+ /**
241
+ * Normalize tool IDs from various formats to OpenAI format
242
+ * Handles IDs from OpenAI (call_xxx), Anthropic (toolu_xxx), and history (hist_tool_xxx)
243
+ */
244
+ normalizeToOpenAIToolId(id) {
245
+ // If already in OpenAI format, return as-is
246
+ if (id.startsWith('call_')) {
247
+ return id;
540
248
  }
541
- // Check if we should use responses endpoint
542
- if (this.shouldUseResponses(this.currentModel)) {
543
- // Generate conversation IDs inline (would normally come from application context)
544
- const conversationId = undefined;
545
- const parentId = undefined;
546
- yield* await this.callResponsesEndpoint(messages, tools, {
547
- stream: true,
548
- tool_choice: tools && tools.length > 0 ? 'auto' : undefined,
549
- stateful: false, // Always stateless for Phase 22-01
550
- conversationId,
551
- parentId,
552
- });
553
- return;
249
+ // For history format, extract the UUID and add OpenAI prefix
250
+ if (id.startsWith('hist_tool_')) {
251
+ const uuid = id.substring('hist_tool_'.length);
252
+ return 'call_' + uuid;
554
253
  }
555
- // Fix messages in place like AnthropicProvider does - this ensures synthetic responses persist
556
- // This is critical for preventing 400 errors on subsequent calls with Qwen/Cerebras
557
- const missingToolIds = this.identifyAndFixMissingToolResponses(messages);
558
- if (missingToolIds.length > 0) {
559
- this.logger.debug(() => `[Synthetic] Added ${missingToolIds.length} synthetic responses to conversation history: ${JSON.stringify(missingToolIds)}`);
560
- // Log the actual tool calls and their IDs for debugging
561
- const assistantMessagesWithTools = messages.filter((m) => m.role === 'assistant' && m.tool_calls && m.tool_calls.length > 0);
562
- const lastAssistantWithTools = assistantMessagesWithTools[assistantMessagesWithTools.length - 1];
563
- if (lastAssistantWithTools?.tool_calls) {
564
- this.logger.debug(() => `[Synthetic] Last assistant tool calls: ${JSON.stringify(lastAssistantWithTools.tool_calls?.map((tc) => ({ id: tc.id, name: tc.function.name })) ?? [])}`);
565
- }
254
+ // For Anthropic format, extract the UUID and add OpenAI prefix
255
+ if (id.startsWith('toolu_')) {
256
+ const uuid = id.substring('toolu_'.length);
257
+ return 'call_' + uuid;
566
258
  }
567
- // Now messages array has been modified in place with synthetic responses
568
- const patchedMessages = messages;
569
- // Validate tool messages have required tool_call_id
570
- const toolMessages = patchedMessages.filter((msg) => msg.role === 'tool');
571
- const missingIds = toolMessages.filter((msg) => !msg.tool_call_id);
572
- if (missingIds.length > 0) {
573
- this.logger.error(() => `FATAL: Tool messages missing tool_call_id: ${JSON.stringify(missingIds)}`);
574
- throw new Error(`OpenAI API requires tool_call_id for all tool messages. Found ${missingIds.length} tool message(s) without IDs.`);
259
+ // Unknown format - assume it's a raw UUID
260
+ return 'call_' + id;
261
+ }
262
+ /**
263
+ * Normalize tool IDs from OpenAI format to history format
264
+ */
265
+ normalizeToHistoryToolId(id) {
266
+ // If already in history format, return as-is
267
+ if (id.startsWith('hist_tool_')) {
268
+ return id;
575
269
  }
576
- // Log synthetic responses for debugging
577
- const syntheticMessages = patchedMessages.filter((msg) => msg._synthetic);
578
- if (syntheticMessages.length > 0) {
579
- this.logger.debug(() => `[Synthetic] Added ${syntheticMessages.length} synthetic tool responses`);
270
+ // For OpenAI format, extract the UUID and add history prefix
271
+ if (id.startsWith('call_')) {
272
+ const uuid = id.substring('call_'.length);
273
+ return 'hist_tool_' + uuid;
580
274
  }
581
- const parser = this.requiresTextToolCallParsing()
582
- ? new GemmaToolCallParser()
583
- : null;
584
- // Get current tool format (with override support)
585
- const currentToolFormat = this.getToolFormat();
586
- // Format tools using formatToolsForAPI method
587
- const formattedTools = tools ? this.formatToolsForAPI(tools) : undefined;
588
- // Get stream_options from ephemeral settings (not model params)
589
- const streamOptions = this.providerConfig?.getEphemeralSettings?.()?.['stream-options'];
590
- // Default stream_options to { include_usage: true } unless explicitly set
591
- const finalStreamOptions = streamOptions !== undefined ? streamOptions : { include_usage: true };
592
- // Get streaming setting from ephemeral settings (default: enabled)
593
- const streamingSetting = this.providerConfig?.getEphemeralSettings?.()?.['streaming'];
594
- let streamingEnabled = streamingSetting !== 'disabled';
595
- // Get resolved authentication and update client if needed
596
- await this.updateClientWithResolvedAuth();
597
- // Strip internal tracking fields that some APIs don't accept
598
- // We keep the synthetic responses but remove the metadata fields
599
- const cleanedMessages = patchedMessages.map((msg) => {
600
- // Create a shallow copy and remove internal fields
601
- const { _synthetic, _cancelled, ...cleanMsg } = msg;
602
- // Log synthetic tool responses for debugging
603
- if (msg._synthetic) {
604
- this.logger.debug(() => `[Synthetic Tool Response] ${JSON.stringify(cleanMsg)}`);
605
- }
606
- return cleanMsg;
275
+ // For Anthropic format, extract the UUID and add history prefix
276
+ if (id.startsWith('toolu_')) {
277
+ const uuid = id.substring('toolu_'.length);
278
+ return 'hist_tool_' + uuid;
279
+ }
280
+ // Unknown format - assume it's a raw UUID
281
+ return 'hist_tool_' + id;
282
+ }
283
+ /**
284
+ * Generate chat completion with IContent interface
285
+ * Internally converts to OpenAI API format, but only yields IContent
286
+ * @param contents Array of content blocks (text and tool_call)
287
+ * @param tools Array of available tools
288
+ */
289
+ async *generateChatCompletion(contents, tools) {
290
+ // Debug log what we receive
291
+ this.logger.debug(() => `[OpenAIProvider] generateChatCompletion received tools:`, {
292
+ hasTools: !!tools,
293
+ toolsLength: tools?.length,
294
+ toolsType: typeof tools,
295
+ isArray: Array.isArray(tools),
296
+ firstToolName: tools?.[0]?.functionDeclarations?.[0]?.name,
297
+ toolsStructure: tools
298
+ ? JSON.stringify(tools).substring(0, 200)
299
+ : 'undefined',
607
300
  });
608
- this.logger.debug(() => `About to make API call with model: ${this.currentModel}, baseURL: ${this.openai.baseURL}, apiKey: ${this.openai.apiKey?.substring(0, 10)}..., streaming: ${streamingEnabled}`);
609
- // Debug: Log message roles being sent
610
- this.logger.debug(() => `Messages being sent to OpenAI (${cleanedMessages.length} total): ${cleanedMessages
611
- .map((m) => `${m.role}${m.role === 'system' ? ` (length: ${m.content?.length})` : ''}`)
612
- .join(', ')}`);
613
- let response;
614
- try {
615
- // Build request params with exact order from original
616
- response = await this.openai.chat.completions.create({
617
- model: this.currentModel,
618
- messages: cleanedMessages,
619
- stream: streamingEnabled,
620
- ...(streamingEnabled && finalStreamOptions !== null
621
- ? { stream_options: finalStreamOptions }
622
- : {}),
623
- tools: formattedTools,
624
- tool_choice: this.getToolChoiceForFormat(tools),
625
- ...this.modelParams,
626
- });
301
+ // Pass tools directly in Gemini format - they'll be converted in generateChatCompletionImpl
302
+ const generator = this.generateChatCompletionImpl(contents, tools, undefined, undefined, undefined);
303
+ for await (const item of generator) {
304
+ yield item;
627
305
  }
628
- catch (error) {
629
- // Debug the error
630
- this.logger.error(() => `[Cancellation 400] Error caught in API call: ${error}`);
631
- this.logger.error(() => `[Cancellation 400] Error type: ${error?.constructor?.name}`);
632
- this.logger.error(() => `[Cancellation 400] Error status: ${error?.status || error?.response?.status}`);
633
- this.logger.error(() => `[Cancellation 400] Error response data: ${JSON.stringify(error?.response?.data, null, 2)}`);
634
- // Log the last few messages to understand what's being sent
635
- if (error?.status === 400 ||
636
- error?.response?.status === 400) {
637
- this.logger.error(() => `[Cancellation 400] Last 5 messages being sent:`);
638
- const lastMessages = cleanedMessages.slice(-5);
639
- lastMessages.forEach((msg, idx) => {
640
- this.logger.error(() => ` [${cleanedMessages.length - 5 + idx}] ${msg.role}${msg.tool_call_id ? ` (tool response for ${msg.tool_call_id})` : ''}${msg.tool_calls ? ` (${msg.tool_calls.length} tool calls)` : ''}`);
641
- if (msg.tool_calls) {
642
- msg.tool_calls.forEach((tc) => {
643
- this.logger.error(() => ` - Tool call: ${tc.id} -> ${tc.function.name}`);
644
- });
645
- }
646
- });
306
+ }
307
+ /**
308
+ * Convert IContent array to OpenAI ChatCompletionMessageParam array
309
+ */
310
+ convertToOpenAIMessages(contents) {
311
+ const messages = [];
312
+ for (const content of contents) {
313
+ if (content.speaker === 'human') {
314
+ // Convert human messages to user messages
315
+ const textBlocks = content.blocks.filter((b) => b.type === 'text');
316
+ const text = textBlocks.map((b) => b.text).join('\n');
317
+ if (text) {
318
+ messages.push({
319
+ role: 'user',
320
+ content: text,
321
+ });
322
+ }
647
323
  }
648
- // Check for JSONResponse mutation errors
649
- const errorMessage = error instanceof Error ? error.message : String(error);
650
- if (errorMessage?.includes('JSONResponse') &&
651
- errorMessage?.includes('does not support item assignment')) {
652
- this.logger.debug(() => '[Cancellation 400] Detected JSONResponse mutation error, retrying without streaming');
653
- this.logger.error(() => '[Cerebras Corruption] JSONResponse mutation error detected. This typically occurs with certain providers like Cerebras. Falling back to non-streaming mode.', {
654
- errorMessage,
655
- provider: this.baseURL,
656
- streamingEnabled,
657
- });
658
- // Retry with streaming disabled
659
- response = await this.openai.chat.completions.create({
660
- model: this.currentModel,
661
- messages: cleanedMessages,
662
- stream: false, // Force non-streaming
663
- tools: formattedTools,
664
- tool_choice: this.getToolChoiceForFormat(tools),
665
- ...this.modelParams,
666
- });
667
- // Override streamingEnabled for the rest of this function
668
- streamingEnabled = false;
324
+ else if (content.speaker === 'ai') {
325
+ // Convert AI messages
326
+ const textBlocks = content.blocks.filter((b) => b.type === 'text');
327
+ const toolCalls = content.blocks.filter((b) => b.type === 'tool_call');
328
+ if (toolCalls.length > 0) {
329
+ // Assistant message with tool calls
330
+ const text = textBlocks.map((b) => b.text).join('\n');
331
+ messages.push({
332
+ role: 'assistant',
333
+ content: text || null,
334
+ tool_calls: toolCalls.map((tc) => ({
335
+ id: this.normalizeToOpenAIToolId(tc.id),
336
+ type: 'function',
337
+ function: {
338
+ name: tc.name,
339
+ arguments: typeof tc.parameters === 'string'
340
+ ? tc.parameters
341
+ : JSON.stringify(tc.parameters),
342
+ },
343
+ })),
344
+ });
345
+ }
346
+ else if (textBlocks.length > 0) {
347
+ // Plain assistant message
348
+ const text = textBlocks.map((b) => b.text).join('\n');
349
+ messages.push({
350
+ role: 'assistant',
351
+ content: text,
352
+ });
353
+ }
669
354
  }
670
- else {
671
- this.logger.debug(() => '[Cancellation 400] Re-throwing error (not a JSONResponse mutation)');
672
- // Re-throw other errors
673
- throw error;
355
+ else if (content.speaker === 'tool') {
356
+ // Convert tool responses
357
+ const toolResponses = content.blocks.filter((b) => b.type === 'tool_response');
358
+ for (const tr of toolResponses) {
359
+ messages.push({
360
+ role: 'tool',
361
+ content: typeof tr.result === 'string'
362
+ ? tr.result
363
+ : JSON.stringify(tr.result),
364
+ tool_call_id: this.normalizeToOpenAIToolId(tr.callId),
365
+ });
366
+ }
674
367
  }
675
368
  }
676
- let fullContent = '';
677
- const accumulatedToolCalls = [];
678
- let hasStreamedContent = false;
679
- let usageData;
680
- // For Qwen streaming, buffer whitespace-only chunks to preserve spacing across chunk boundaries
681
- let pendingWhitespace = null;
682
- // Handle streaming vs non-streaming response
683
- if (streamingEnabled) {
684
- // We need to buffer all chunks to detect and handle malformed streams
685
- // Some providers (like Cerebras) send message format instead of delta
686
- const allChunks = [];
687
- this.logger.debug(() => '[Stream Detection] Starting to buffer chunks for corruption detection', {
688
- provider: this.baseURL,
369
+ return messages;
370
+ }
371
+ /**
372
+ * Internal implementation for chat completion
373
+ */
374
+ async *generateChatCompletionImpl(contents, tools, maxTokens, abortSignal, modelName) {
375
+ // Always look up model from SettingsService
376
+ const model = modelName || this.getModel() || this.getDefaultModel();
377
+ // Convert IContent to OpenAI messages format
378
+ const messages = this.convertToOpenAIMessages(contents);
379
+ // Debug log what we're about to convert
380
+ this.logger.debug(() => `[OpenAIProvider] Before convertGeminiToOpenAI:`, {
381
+ inputTools: tools ? JSON.stringify(tools).substring(0, 500) : 'undefined',
382
+ hasTools: !!tools,
383
+ toolsLength: tools?.length,
384
+ firstToolStructure: tools?.[0]
385
+ ? JSON.stringify(tools[0]).substring(0, 300)
386
+ : 'undefined',
387
+ });
388
+ // Convert Gemini format tools directly to OpenAI format using the new method
389
+ const formattedTools = this.toolFormatter.convertGeminiToOpenAI(tools);
390
+ // Debug log the conversion result
391
+ this.logger.debug(() => `[OpenAIProvider] After convertGeminiToOpenAI:`, {
392
+ inputHadTools: !!tools,
393
+ outputHasTools: !!formattedTools,
394
+ outputToolsLength: formattedTools?.length,
395
+ outputFirstTool: formattedTools?.[0],
396
+ outputToolNames: formattedTools?.map((t) => t.function.name),
397
+ firstToolParameters: formattedTools?.[0]
398
+ ? JSON.stringify(formattedTools[0].function.parameters)
399
+ : 'undefined',
400
+ });
401
+ // Get streaming setting from ephemeral settings (default: enabled)
402
+ const streamingSetting = this.providerConfig?.getEphemeralSettings?.()?.['streaming'];
403
+ const streamingEnabled = streamingSetting !== 'disabled';
404
+ // Get the system prompt
405
+ const userMemory = this.globalConfig?.getUserMemory
406
+ ? this.globalConfig.getUserMemory()
407
+ : '';
408
+ const systemPrompt = await getCoreSystemPromptAsync(userMemory, model, undefined);
409
+ // Add system prompt as the first message in the array
410
+ const messagesWithSystem = [
411
+ { role: 'system', content: systemPrompt },
412
+ ...messages,
413
+ ];
414
+ // Build request - only include tools if they exist and are not empty
415
+ const requestBody = {
416
+ model,
417
+ messages: messagesWithSystem,
418
+ ...(formattedTools && formattedTools.length > 0
419
+ ? { tools: formattedTools }
420
+ : {}),
421
+ max_tokens: maxTokens,
422
+ stream: streamingEnabled,
423
+ };
424
+ // Debug log the full request for Cerebras/Qwen
425
+ if (model.toLowerCase().includes('qwen') ||
426
+ this.getBaseURL()?.includes('cerebras')) {
427
+ this.logger.debug(() => `Full request to ${this.getBaseURL()} for model ${model}:`, {
428
+ baseURL: this.getBaseURL(),
429
+ model,
689
430
  streamingEnabled,
431
+ hasTools: 'tools' in requestBody,
432
+ toolCount: formattedTools?.length || 0,
433
+ messageCount: messages.length,
434
+ toolsInRequest: 'tools' in requestBody ? requestBody.tools?.length : 'not included',
435
+ requestBody: {
436
+ ...requestBody,
437
+ messages: messages.slice(-2), // Only log last 2 messages for brevity
438
+ tools: requestBody.tools?.slice(0, 2), // Only log first 2 tools for brevity if they exist
439
+ },
440
+ });
441
+ }
442
+ // Get OpenAI client
443
+ const client = await this.getClient();
444
+ // Wrap the API call with retry logic
445
+ const makeApiCall = async () => {
446
+ const response = await client.chat.completions.create(requestBody, {
447
+ signal: abortSignal,
690
448
  });
449
+ return response;
450
+ };
451
+ let retryCount = 0;
452
+ const maxRetries = 5;
453
+ let response;
454
+ while (retryCount <= maxRetries) {
691
455
  try {
692
- for await (const chunk of response) {
693
- // CRITICAL: Create a deep copy to avoid JSONResponse mutation issues
694
- // Cerebras and other providers may return immutable JSONResponse objects
695
- // Cast to unknown first to bypass type checking, then to our extended type
696
- const extendedChunk = chunk;
697
- const safeChunk = {
698
- choices: extendedChunk.choices?.map((choice) => ({
699
- delta: choice.delta
700
- ? {
701
- content: choice.delta.content ?? undefined,
702
- role: choice.delta.role,
703
- tool_calls: choice.delta.tool_calls?.map((tc, idx) => ({
704
- id: tc.id,
705
- type: tc.type,
706
- function: tc.function
707
- ? {
708
- name: tc.function.name,
709
- arguments: tc.function.arguments,
710
- }
711
- : undefined,
712
- index: tc.index !== undefined ? tc.index : idx,
713
- })),
714
- }
715
- : undefined,
716
- message: choice.message
717
- ? {
718
- content: choice.message.content ?? undefined,
719
- role: choice.message.role,
720
- tool_calls: choice.message.tool_calls?.map((tc) => ({
721
- id: tc.id,
722
- type: tc.type,
723
- function: tc.function
724
- ? {
725
- name: tc.function.name,
726
- arguments: tc.function.arguments,
727
- }
728
- : undefined,
729
- })),
730
- }
731
- : undefined,
732
- index: choice.index,
733
- finish_reason: choice.finish_reason,
734
- })),
735
- usage: extendedChunk.usage
736
- ? {
737
- prompt_tokens: extendedChunk.usage.prompt_tokens,
738
- completion_tokens: extendedChunk.usage.completion_tokens,
739
- total_tokens: extendedChunk.usage.total_tokens,
740
- }
741
- : undefined,
742
- };
743
- allChunks.push(safeChunk);
744
- }
456
+ response = await makeApiCall();
457
+ break; // Success, exit retry loop
745
458
  }
746
459
  catch (error) {
747
- // Handle JSONResponse mutation errors that occur during iteration
748
- const errorMessage = error instanceof Error ? error.message : String(error);
749
- if (errorMessage?.includes('JSONResponse') &&
750
- errorMessage?.includes('does not support item assignment')) {
751
- this.logger.error(() => '[Cerebras Corruption] JSONResponse mutation error during stream iteration. This is a known issue with Cerebras. The OpenAI client library is trying to mutate immutable response objects. Falling back to non-streaming mode.', {
752
- error: errorMessage,
753
- provider: this.baseURL,
754
- chunksCollected: allChunks.length,
755
- });
756
- // Retry the entire request with streaming disabled
757
- // This is the nuclear option but ensures we get a response
758
- const nonStreamingResponse = await this.openai.chat.completions.create({
759
- model: this.currentModel,
760
- messages: cleanedMessages,
761
- stream: false, // Force non-streaming
762
- tools: formattedTools,
763
- tool_choice: this.getToolChoiceForFormat(tools),
764
- ...this.modelParams,
765
- });
766
- // Handle as non-streaming response
767
- const completionResponse = nonStreamingResponse;
768
- const choice = completionResponse.choices[0];
769
- if (choice?.message.content) {
770
- fullContent = choice.message.content;
771
- }
772
- if (choice?.message.tool_calls) {
773
- for (const toolCall of choice.message.tool_calls) {
774
- if (toolCall.type === 'function' && toolCall.function) {
775
- accumulatedToolCalls.push({
776
- id: toolCall.id,
777
- type: 'function',
778
- function: toolCall.function,
779
- });
780
- }
781
- }
782
- }
783
- if (completionResponse.usage) {
784
- usageData = {
785
- prompt_tokens: completionResponse.usage.prompt_tokens,
786
- completion_tokens: completionResponse.usage.completion_tokens,
787
- total_tokens: completionResponse.usage.total_tokens,
788
- };
789
- }
790
- // Yield the complete response
791
- yield {
792
- role: ContentGeneratorRole.ASSISTANT,
793
- content: fullContent || '',
794
- tool_calls: accumulatedToolCalls.length > 0
795
- ? accumulatedToolCalls
796
- : undefined,
797
- usage: usageData,
798
- };
799
- return;
460
+ if (retryCount === maxRetries) {
461
+ throw error; // Max retries reached, re-throw error
800
462
  }
801
- // Re-throw other errors
802
- throw error;
463
+ retryCount++;
464
+ this.logger.debug(() => `API call failed (attempt ${retryCount}), retrying...`, error);
465
+ // Exponential backoff: 4s, 8s, 16s, 32s, 64s
466
+ const delay = 4000 * Math.pow(2, retryCount - 1);
467
+ await new Promise((resolve) => setTimeout(resolve, delay));
803
468
  }
804
- // Check first chunk to see if we have malformed stream
805
- let detectedMalformedStream = false;
806
- if (allChunks.length > 0) {
807
- const firstChunk = allChunks[0];
808
- if (firstChunk.choices?.[0]?.message &&
809
- !firstChunk.choices?.[0]?.delta) {
810
- detectedMalformedStream = true;
811
- this.logger.debug(() => 'Detected malformed stream (message instead of delta), using aggregation mode');
812
- }
813
- }
814
- // If we detected issues, aggregate everything
815
- if (detectedMalformedStream) {
816
- const contentParts = [];
817
- let aggregatedToolCalls = [];
818
- let finalUsageData = undefined;
819
- // Process all buffered chunks
820
- for (const chunk of allChunks) {
821
- const message = chunk.choices?.[0]?.message || chunk.choices?.[0]?.delta;
822
- if (message?.content) {
823
- contentParts.push(message.content);
824
- }
825
- if (message?.tool_calls) {
826
- // Ensure tool_calls match the expected format
827
- aggregatedToolCalls = message.tool_calls.map((tc) => ({
828
- id: tc.id || `call_${Date.now()}`,
829
- type: (tc.type || 'function'),
830
- function: {
831
- name: tc.function?.name || '',
832
- arguments: tc.function?.arguments || '',
833
- },
834
- }));
835
- }
836
- if (chunk.usage) {
837
- finalUsageData = {
838
- prompt_tokens: chunk.usage.prompt_tokens || 0,
839
- completion_tokens: chunk.usage.completion_tokens || 0,
840
- total_tokens: chunk.usage.total_tokens || 0,
841
- };
469
+ }
470
+ if (!response) {
471
+ throw new Error('Failed to get response after retries');
472
+ }
473
+ // Check if response is streaming or not
474
+ if (streamingEnabled) {
475
+ // Process streaming response
476
+ let _accumulatedText = '';
477
+ const accumulatedToolCalls = [];
478
+ try {
479
+ // Handle streaming response
480
+ for await (const chunk of response) {
481
+ if (abortSignal?.aborted) {
482
+ break;
842
483
  }
843
- }
844
- // Yield single reconstructed message
845
- yield {
846
- role: ContentGeneratorRole.ASSISTANT,
847
- content: contentParts.join(''),
848
- tool_calls: aggregatedToolCalls.length > 0 ? aggregatedToolCalls : undefined,
849
- usage: finalUsageData,
850
- };
851
- return;
852
- }
853
- // Process chunks normally - stream them as they come
854
- for (const chunk of allChunks) {
855
- // Since we created safe copies during buffering, chunks are now mutable
856
- // Check if this chunk has message format instead of delta (malformed stream)
857
- let processedChunk = chunk;
858
- if (chunk.choices?.[0]?.message && !chunk.choices?.[0]?.delta) {
859
- this.logger.error(() => '[Cerebras Corruption] Converting malformed chunk from message to delta format', {
860
- provider: this.baseURL,
861
- hasMessage: true,
862
- hasDelta: false,
863
- messageContent: chunk.choices[0].message?.content?.substring(0, 100),
864
- });
865
- // Convert message format to delta format for consistent processing
866
- const message = chunk.choices[0].message;
867
- processedChunk = {
868
- choices: [
869
- {
870
- delta: {
871
- content: message?.content ?? undefined,
872
- role: message?.role,
873
- tool_calls: message?.tool_calls,
484
+ const choice = chunk.choices?.[0];
485
+ if (!choice)
486
+ continue;
487
+ // Handle text content - emit immediately without buffering
488
+ const deltaContent = choice.delta?.content;
489
+ if (deltaContent) {
490
+ _accumulatedText += deltaContent;
491
+ // Emit text immediately without buffering
492
+ yield {
493
+ speaker: 'ai',
494
+ blocks: [
495
+ {
496
+ type: 'text',
497
+ text: deltaContent,
874
498
  },
875
- },
876
- ],
877
- usage: chunk.usage,
878
- };
879
- }
880
- const delta = processedChunk.choices?.[0]?.delta;
881
- if (delta?.content) {
882
- // Enhanced debug logging to understand streaming behavior
883
- if (this.isUsingQwen()) {
884
- this.logger.debug(() => `Chunk: ${JSON.stringify({
885
- content: delta.content,
886
- contentLength: delta.content?.length ?? 0,
887
- isWhitespaceOnly: delta.content?.trim() === '',
888
- chunkIndex: 0,
889
- })}`);
499
+ ],
500
+ };
890
501
  }
891
- // For text-based models, don't yield content chunks yet
892
- if (!parser && delta.content) {
893
- if (this.isUsingQwen()) {
894
- const isWhitespaceOnly = delta.content.trim() === '';
895
- if (isWhitespaceOnly) {
896
- // Buffer whitespace-only chunk
897
- pendingWhitespace = (pendingWhitespace || '') + delta.content;
898
- this.logger.debug(() => `Buffered whitespace-only chunk (len=${delta.content?.length ?? 0}). pendingWhitespace now len=${pendingWhitespace?.length ?? 0}`);
502
+ // Handle tool calls
503
+ const deltaToolCalls = choice.delta?.tool_calls;
504
+ if (deltaToolCalls && deltaToolCalls.length > 0) {
505
+ for (const deltaToolCall of deltaToolCalls) {
506
+ if (deltaToolCall.index === undefined)
899
507
  continue;
900
- }
901
- else if (pendingWhitespace) {
902
- // Flush buffered whitespace before non-empty chunk to preserve spacing
903
- this.logger.debug(() => `Flushing pending whitespace (len=${pendingWhitespace?.length ?? 0}) before non-empty chunk`);
904
- yield {
905
- role: ContentGeneratorRole.ASSISTANT,
906
- content: pendingWhitespace,
508
+ // Initialize or update accumulated tool call
509
+ if (!accumulatedToolCalls[deltaToolCall.index]) {
510
+ accumulatedToolCalls[deltaToolCall.index] = {
511
+ id: deltaToolCall.id || '',
512
+ type: 'function',
513
+ function: {
514
+ name: deltaToolCall.function?.name || '',
515
+ arguments: '',
516
+ },
907
517
  };
908
- hasStreamedContent = true;
909
- fullContent += pendingWhitespace;
910
- pendingWhitespace = null;
518
+ }
519
+ const tc = accumulatedToolCalls[deltaToolCall.index];
520
+ if (tc) {
521
+ if (deltaToolCall.id)
522
+ tc.id = deltaToolCall.id;
523
+ if (deltaToolCall.function?.name)
524
+ tc.function.name = deltaToolCall.function.name;
525
+ if (deltaToolCall.function?.arguments) {
526
+ tc.function.arguments += deltaToolCall.function.arguments;
527
+ }
911
528
  }
912
529
  }
913
- yield {
914
- role: ContentGeneratorRole.ASSISTANT,
915
- content: delta.content,
916
- };
917
- hasStreamedContent = true;
918
- }
919
- fullContent += delta.content;
920
- }
921
- if (delta?.tool_calls) {
922
- for (const toolCall of delta.tool_calls) {
923
- this.toolFormatter.accumulateStreamingToolCall(toolCall, accumulatedToolCalls, currentToolFormat);
924
530
  }
925
531
  }
926
- // Check for usage data in the chunk
927
- if (processedChunk.usage) {
928
- usageData = {
929
- prompt_tokens: processedChunk.usage.prompt_tokens || 0,
930
- completion_tokens: processedChunk.usage.completion_tokens || 0,
931
- total_tokens: processedChunk.usage.total_tokens || 0,
932
- };
933
- }
934
532
  }
935
- }
936
- else {
937
- // Non-streaming response - handle as a single completion
938
- const completionResponse = response;
939
- const choice = completionResponse.choices[0];
940
- if (choice?.message.content) {
941
- fullContent = choice.message.content;
942
- }
943
- if (choice?.message.tool_calls) {
944
- // Convert tool calls to the standard format
945
- for (const toolCall of choice.message.tool_calls) {
946
- if (toolCall.type === 'function' && toolCall.function) {
947
- // Don't fix double stringification here - it's handled later in the final processing
948
- accumulatedToolCalls.push({
949
- id: toolCall.id,
950
- type: 'function',
951
- function: toolCall.function,
952
- });
953
- }
533
+ catch (error) {
534
+ if (abortSignal?.aborted) {
535
+ throw error;
536
+ }
537
+ else {
538
+ this.logger.error('Error processing streaming response:', error);
539
+ throw error;
954
540
  }
955
541
  }
956
- if (completionResponse.usage) {
957
- usageData = {
958
- prompt_tokens: completionResponse.usage.prompt_tokens,
959
- completion_tokens: completionResponse.usage.completion_tokens,
960
- total_tokens: completionResponse.usage.total_tokens,
961
- };
962
- }
963
- // For non-streaming, we yield the full content at once if there's no parser
964
- if (!parser && fullContent) {
965
- yield {
966
- role: ContentGeneratorRole.ASSISTANT,
967
- content: fullContent,
968
- };
969
- hasStreamedContent = true;
970
- }
971
- }
972
- // Flush any remaining pending whitespace for Qwen
973
- if (pendingWhitespace && this.isUsingQwen() && !parser) {
974
- this.logger.debug(() => `Flushing trailing pending whitespace (len=${pendingWhitespace?.length ?? 0}) at stream end`);
975
- yield {
976
- role: ContentGeneratorRole.ASSISTANT,
977
- content: pendingWhitespace,
978
- };
979
- hasStreamedContent = true;
980
- fullContent += pendingWhitespace;
981
- pendingWhitespace = null;
982
- }
983
- // After stream ends, parse text-based tool calls if needed
984
- if (parser && fullContent) {
985
- const { cleanedContent, toolCalls } = parser.parse(fullContent);
986
- if (toolCalls.length > 0) {
987
- // Convert to standard format
988
- const standardToolCalls = toolCalls.map((tc, index) => ({
989
- id: `call_${Date.now()}_${index}`,
990
- type: 'function',
991
- function: {
992
- name: tc.name,
993
- arguments: JSON.stringify(tc.arguments),
994
- },
995
- }));
996
- yield {
997
- role: ContentGeneratorRole.ASSISTANT,
998
- content: cleanedContent,
999
- tool_calls: standardToolCalls,
1000
- usage: usageData,
1001
- };
1002
- }
1003
- else {
1004
- // No tool calls found, yield cleaned content
1005
- yield {
1006
- role: ContentGeneratorRole.ASSISTANT,
1007
- content: cleanedContent,
1008
- usage: usageData,
1009
- };
1010
- }
1011
- }
1012
- else {
1013
- // Standard OpenAI tool call handling
542
+ // No need to flush buffer since we're emitting immediately
543
+ // Emit accumulated tool calls as IContent if any
1014
544
  if (accumulatedToolCalls.length > 0) {
1015
- // Fix double stringification for Qwen tool calls
1016
- // Qwen models pre-stringify arguments values, but later in the process
1017
- // they are being JSON.stringify'd again
1018
- let fixedToolCalls = accumulatedToolCalls;
1019
- if (this.isUsingQwen()) {
1020
- this.logger.debug(() => `[Qwen Fix] Processing ${accumulatedToolCalls.length} tool calls for double-stringification fix`);
1021
- fixedToolCalls = accumulatedToolCalls.map((toolCall, index) => {
1022
- this.logger.debug(() => `[Qwen Fix] Tool call ${index}: ${JSON.stringify({
1023
- name: toolCall.function.name,
1024
- argumentsType: typeof toolCall.function.arguments,
1025
- argumentsLength: toolCall.function.arguments?.length,
1026
- argumentsSample: toolCall.function.arguments?.substring(0, 100),
1027
- })}`);
1028
- // For Qwen, check for nested double-stringification
1029
- // Qwen models stringify array/object values WITHIN the JSON arguments
1030
- if (toolCall.function.arguments &&
1031
- typeof toolCall.function.arguments === 'string') {
1032
- try {
1033
- // First, parse the arguments to get the JSON object
1034
- const parsedArgs = JSON.parse(toolCall.function.arguments);
1035
- let hasNestedStringification = false;
1036
- // Check each property to see if it's a stringified array/object
1037
- const fixedArgs = {};
1038
- for (const [key, value] of Object.entries(parsedArgs)) {
1039
- if (typeof value === 'string') {
1040
- const trimmed = value.trim();
1041
- // Check if it looks like a stringified array or object
1042
- // Also check for Python-style dictionaries with single quotes
1043
- if ((trimmed.startsWith('[') && trimmed.endsWith(']')) ||
1044
- (trimmed.startsWith('{') && trimmed.endsWith('}'))) {
1045
- try {
1046
- // Try to parse it as JSON
1047
- const nestedParsed = JSON.parse(value);
1048
- fixedArgs[key] = nestedParsed;
1049
- hasNestedStringification = true;
1050
- this.logger.debug(() => `[Qwen Fix] Fixed nested stringification in property '${key}' for ${toolCall.function.name}`);
1051
- }
1052
- catch {
1053
- // Try to convert Python-style to JSON (single quotes to double quotes)
1054
- try {
1055
- const jsonified = value
1056
- .replace(/'/g, '"')
1057
- .replace(/: True/g, ': true')
1058
- .replace(/: False/g, ': false')
1059
- .replace(/: None/g, ': null');
1060
- const nestedParsed = JSON.parse(jsonified);
1061
- fixedArgs[key] = nestedParsed;
1062
- hasNestedStringification = true;
1063
- this.logger.debug(() => `[Qwen Fix] Fixed Python-style nested stringification in property '${key}' for ${toolCall.function.name}`);
1064
- }
1065
- catch {
1066
- // Not valid JSON even after conversion, keep as string
1067
- fixedArgs[key] = value;
1068
- }
1069
- }
1070
- }
1071
- else {
1072
- fixedArgs[key] = value;
1073
- }
1074
- }
1075
- else {
1076
- fixedArgs[key] = value;
1077
- }
1078
- }
1079
- if (hasNestedStringification) {
1080
- this.logger.debug(() => `[Qwen Fix] Fixed nested double-stringification for ${toolCall.function.name}`);
1081
- return {
1082
- ...toolCall,
1083
- function: {
1084
- ...toolCall.function,
1085
- arguments: JSON.stringify(fixedArgs),
1086
- },
1087
- };
1088
- }
1089
- }
1090
- catch (_e) {
1091
- // If parsing fails, check for old-style double-stringification
1092
- if (toolCall.function.arguments.startsWith('"') &&
1093
- toolCall.function.arguments.endsWith('"')) {
1094
- try {
1095
- // Old fix: entire arguments were double-stringified
1096
- const parsedArgs = JSON.parse(toolCall.function.arguments);
1097
- this.logger.debug(() => `[Qwen Fix] Fixed whole-argument double-stringification for ${toolCall.function.name}`);
1098
- return {
1099
- ...toolCall,
1100
- function: {
1101
- ...toolCall.function,
1102
- arguments: JSON.stringify(parsedArgs),
1103
- },
1104
- };
1105
- }
1106
- catch {
1107
- // Leave as-is if we can't parse
1108
- }
1109
- }
1110
- }
1111
- }
1112
- // No fix needed
1113
- this.logger.debug(() => `[Qwen Fix] No double-stringification detected for ${toolCall.function.name}, keeping original`);
1114
- return toolCall;
545
+ const blocks = [];
546
+ const detectedFormat = this.detectToolFormat();
547
+ for (const tc of accumulatedToolCalls) {
548
+ if (!tc)
549
+ continue;
550
+ // Process tool parameters with double-escape handling
551
+ const processedParameters = processToolParameters(tc.function.arguments || '', tc.function.name || '', detectedFormat);
552
+ blocks.push({
553
+ type: 'tool_call',
554
+ id: this.normalizeToHistoryToolId(tc.id),
555
+ name: tc.function.name || '',
556
+ parameters: processedParameters,
1115
557
  });
1116
558
  }
1117
- if (this.isUsingQwen()) {
1118
- this.logger.debug(() => `Final message with tool calls: ${JSON.stringify({
1119
- contentLength: fullContent.length,
1120
- content: fullContent.substring(0, 200) +
1121
- (fullContent.length > 200 ? '...' : ''),
1122
- toolCallCount: accumulatedToolCalls.length,
1123
- hasStreamedContent,
1124
- })}`);
1125
- }
1126
- // For Qwen models, don't duplicate content if we've already streamed it
1127
- // BUT Cerebras needs at least a space to continue after tool responses
1128
- const isCerebras = this.baseURL?.toLowerCase().includes('cerebras.ai');
1129
- if (isCerebras) {
1130
- this.logger.debug(() => '[Cerebras] Special handling for Cerebras provider after tool responses', {
1131
- hasStreamedContent,
1132
- willSendSpace: hasStreamedContent,
1133
- });
1134
- }
1135
- const shouldOmitContent = hasStreamedContent && this.isUsingQwen() && !isCerebras;
1136
- if (shouldOmitContent) {
1137
- // Only yield tool calls with empty content to avoid duplication
1138
- yield {
1139
- role: ContentGeneratorRole.ASSISTANT,
1140
- content: '',
1141
- tool_calls: fixedToolCalls,
1142
- usage: usageData,
1143
- };
1144
- }
1145
- else if (isCerebras && hasStreamedContent) {
1146
- // Cerebras: Send just a space to prevent duplication but allow continuation
1147
- // This prevents the repeated "Let me search..." text
1148
- this.logger.debug(() => '[Cerebras] Sending minimal space content to prevent duplication');
1149
- yield {
1150
- role: ContentGeneratorRole.ASSISTANT,
1151
- content: ' ', // Single space instead of full content
1152
- tool_calls: fixedToolCalls,
1153
- usage: usageData,
1154
- };
1155
- }
1156
- else {
1157
- // Include full content with tool calls
559
+ if (blocks.length > 0) {
1158
560
  yield {
1159
- role: ContentGeneratorRole.ASSISTANT,
1160
- content: fullContent || '',
1161
- tool_calls: fixedToolCalls,
1162
- usage: usageData,
561
+ speaker: 'ai',
562
+ blocks,
1163
563
  };
1164
564
  }
1165
565
  }
1166
- else if (usageData) {
1167
- // Always emit usage data so downstream consumers can update stats
1168
- yield {
1169
- role: ContentGeneratorRole.ASSISTANT,
1170
- content: '',
1171
- usage: usageData,
1172
- };
1173
- }
1174
566
  }
1175
- }
1176
- setModel(modelId) {
1177
- // Update SettingsService as the source of truth
1178
- this.setModelInSettings(modelId).catch((error) => {
1179
- this.logger.debug(() => `Failed to persist model to SettingsService: ${error}`);
1180
- });
1181
- // Keep local cache for performance
1182
- this.currentModel = modelId;
1183
- }
1184
- getCurrentModel() {
1185
- // Try to get from SettingsService first (source of truth)
1186
- try {
1187
- const settingsService = getSettingsService();
1188
- const providerSettings = settingsService.getProviderSettings(this.name);
1189
- if (providerSettings.model) {
1190
- return providerSettings.model;
567
+ else {
568
+ // Handle non-streaming response
569
+ const completion = response;
570
+ const choice = completion.choices?.[0];
571
+ if (!choice) {
572
+ throw new Error('No choices in completion response');
573
+ }
574
+ const blocks = [];
575
+ // Handle text content
576
+ if (choice.message?.content) {
577
+ blocks.push({
578
+ type: 'text',
579
+ text: choice.message.content,
580
+ });
1191
581
  }
1192
- }
1193
- catch (error) {
1194
- this.logger.debug(() => `Failed to get model from SettingsService: ${error}`);
1195
- }
1196
- // Fall back to cached value or default
1197
- return this.currentModel || this.getDefaultModel();
1198
- }
1199
- getDefaultModel() {
1200
- // Return the default model for this provider
1201
- // This can be overridden based on configuration or endpoint
1202
- if (this.isUsingQwen()) {
1203
- return 'qwen3-coder-plus';
1204
- }
1205
- return process.env.LLXPRT_DEFAULT_MODEL || 'gpt-5';
1206
- }
1207
- setApiKey(apiKey) {
1208
- // Call base provider implementation
1209
- super.setApiKey?.(apiKey);
1210
- // Persist to SettingsService if available
1211
- this.setApiKeyInSettings(apiKey).catch((error) => {
1212
- this.logger.debug(() => `Failed to persist API key to SettingsService: ${error}`);
1213
- });
1214
- // Create a new OpenAI client with the updated API key
1215
- const clientOptions = {
1216
- apiKey,
1217
- dangerouslyAllowBrowser: this.providerConfig?.allowBrowserEnvironment || false,
1218
- };
1219
- // Only include baseURL if it's defined
1220
- if (this.baseURL) {
1221
- clientOptions.baseURL = this.baseURL;
1222
- }
1223
- this.openai = new OpenAI(clientOptions);
1224
- this._cachedClientKey = apiKey; // Update cached key
1225
- }
1226
- setBaseUrl(baseUrl) {
1227
- // If no baseUrl is provided, clear to default (undefined)
1228
- this.baseURL = baseUrl && baseUrl.trim() !== '' ? baseUrl : undefined;
1229
- // Persist to SettingsService if available
1230
- this.setBaseUrlInSettings(this.baseURL).catch((error) => {
1231
- this.logger.debug(() => `Failed to persist base URL to SettingsService: ${error}`);
1232
- });
1233
- // Update OAuth configuration based on endpoint validation
1234
- // Enable OAuth for Qwen endpoints if we have an OAuth manager
1235
- const shouldEnableQwenOAuth = !!this.baseProviderConfig.oauthManager &&
1236
- (isQwenEndpoint(this.baseURL || '') ||
1237
- this.baseURL === 'https://portal.qwen.ai/v1');
1238
- this.updateOAuthConfig(shouldEnableQwenOAuth, shouldEnableQwenOAuth ? 'qwen' : undefined, this.baseProviderConfig.oauthManager);
1239
- // Call base provider implementation
1240
- super.setBaseUrl?.(baseUrl);
1241
- // Create a new OpenAI client with the updated (or cleared) base URL
1242
- const clientOptions = {
1243
- // Use existing key or empty string as placeholder
1244
- apiKey: this._cachedClientKey || 'placeholder',
1245
- dangerouslyAllowBrowser: this.providerConfig?.allowBrowserEnvironment || false,
1246
- };
1247
- // Only include baseURL if it's defined
1248
- if (this.baseURL) {
1249
- clientOptions.baseURL = this.baseURL;
1250
- }
1251
- this.openai = new OpenAI(clientOptions);
1252
- // Clear cached key to force re-resolution on next API call
1253
- this._cachedClientKey = undefined;
1254
- }
1255
- setConfig(config) {
1256
- this.providerConfig = config;
1257
- }
1258
- setToolFormatOverride(format) {
1259
- this.toolFormatOverride = format || undefined;
1260
- }
1261
- /**
1262
- * Estimates the remote context usage for the current conversation
1263
- * @param conversationId The conversation ID
1264
- * @param parentId The parent message ID
1265
- * @param promptMessages The messages being sent in the current prompt
1266
- * @returns Context usage information including remote tokens
1267
- */
1268
- estimateContextUsage(conversationId, parentId, promptMessages) {
1269
- const promptTokens = estimateMessagesTokens(promptMessages);
1270
- return estimateRemoteTokens(this.currentModel, this.conversationCache, conversationId, parentId, promptTokens);
1271
- }
1272
- /**
1273
- * Get the conversation cache instance
1274
- * @returns The conversation cache
1275
- */
1276
- getConversationCache() {
1277
- return this.conversationCache;
1278
- }
1279
- /**
1280
- * Identifies and fixes missing tool responses by adding synthetic responses in place.
1281
- * Similar to AnthropicProvider's validateAndFixMessages approach.
1282
- * This ensures synthetic responses persist in the conversation history.
1283
- * @param messages The message array to fix in place
1284
- * @returns Array of tool call IDs that were fixed
1285
- */
1286
- identifyAndFixMissingToolResponses(messages) {
1287
- const fixedIds = [];
1288
- const pendingToolCalls = [];
1289
- // Process messages in order, tracking tool calls and responses
1290
- for (let i = 0; i < messages.length; i++) {
1291
- const msg = messages[i];
1292
- if (msg.role === 'assistant' && msg.tool_calls) {
1293
- // If we have pending tool calls from a previous assistant message,
1294
- // add synthetic responses for them before processing this new assistant message
1295
- if (pendingToolCalls.length > 0) {
1296
- const syntheticResponses = pendingToolCalls.map((tc) => ({
1297
- role: 'tool',
1298
- tool_call_id: tc.id,
1299
- content: 'Tool execution cancelled by user',
1300
- _synthetic: true,
1301
- _cancelled: true,
1302
- }));
1303
- // Insert synthetic responses before the current assistant message
1304
- messages.splice(i, 0, ...syntheticResponses);
1305
- // Track what we fixed
1306
- fixedIds.push(...pendingToolCalls.map((tc) => tc.id));
1307
- // Adjust index to account for inserted messages
1308
- i += syntheticResponses.length;
1309
- // Clear pending tool calls
1310
- pendingToolCalls.length = 0;
1311
- }
1312
- // Now track the new tool calls from this assistant message
1313
- msg.tool_calls.forEach((toolCall) => {
1314
- if (toolCall.id) {
1315
- pendingToolCalls.push({
1316
- id: toolCall.id,
1317
- name: toolCall.function.name,
582
+ // Handle tool calls
583
+ if (choice.message?.tool_calls && choice.message.tool_calls.length > 0) {
584
+ const detectedFormat = this.detectToolFormat();
585
+ for (const toolCall of choice.message.tool_calls) {
586
+ if (toolCall.type === 'function') {
587
+ // Process tool parameters with double-escape handling
588
+ const processedParameters = processToolParameters(toolCall.function.arguments || '', toolCall.function.name || '', detectedFormat);
589
+ blocks.push({
590
+ type: 'tool_call',
591
+ id: this.normalizeToHistoryToolId(toolCall.id),
592
+ name: toolCall.function.name || '',
593
+ parameters: processedParameters,
1318
594
  });
1319
595
  }
1320
- });
1321
- }
1322
- else if (msg.role === 'tool' && pendingToolCalls.length > 0) {
1323
- // Match tool responses with pending tool calls
1324
- pendingToolCalls.splice(pendingToolCalls.findIndex((tc) => tc.id === msg.tool_call_id), 1);
1325
- }
1326
- else if ((msg.role === 'assistant' || msg.role === 'user') &&
1327
- pendingToolCalls.length > 0) {
1328
- // We hit a non-tool message with pending tool calls - need to add synthetic responses
1329
- const syntheticResponses = pendingToolCalls.map((tc) => ({
1330
- role: 'tool',
1331
- tool_call_id: tc.id,
1332
- content: 'Tool execution cancelled by user',
1333
- _synthetic: true,
1334
- _cancelled: true,
1335
- }));
1336
- // Insert synthetic responses before the current message
1337
- messages.splice(i, 0, ...syntheticResponses);
1338
- // Track what we fixed
1339
- fixedIds.push(...pendingToolCalls.map((tc) => tc.id));
1340
- // Adjust index to account for inserted messages
1341
- i += syntheticResponses.length;
1342
- // Clear pending tool calls
1343
- pendingToolCalls.length = 0;
1344
- }
1345
- }
1346
- // Handle any remaining pending tool calls at the end
1347
- if (pendingToolCalls.length > 0) {
1348
- const syntheticResponses = pendingToolCalls.map((tc) => ({
1349
- role: 'tool',
1350
- tool_call_id: tc.id,
1351
- content: 'Tool execution cancelled by user',
1352
- _synthetic: true,
1353
- _cancelled: true,
1354
- }));
1355
- // Add to the end of messages
1356
- messages.push(...syntheticResponses);
1357
- // Track what we fixed
1358
- fixedIds.push(...pendingToolCalls.map((tc) => tc.id));
1359
- }
1360
- return fixedIds;
1361
- }
1362
- /**
1363
- * OpenAI always requires payment (API key)
1364
- */
1365
- isPaidMode() {
1366
- return true;
1367
- }
1368
- clearState() {
1369
- // Clear the conversation cache to prevent tool call ID mismatches
1370
- this.conversationCache.clear();
1371
- }
1372
- /**
1373
- * Get the list of server tools supported by this provider
1374
- */
1375
- getServerTools() {
1376
- return [];
1377
- }
1378
- /**
1379
- * Invoke a server tool (native provider tool)
1380
- */
1381
- async invokeServerTool(_toolName, _params, _config) {
1382
- throw new Error('Server tools not supported by OpenAI provider');
1383
- }
1384
- /**
1385
- * Set model parameters to be included in API calls
1386
- * @param params Parameters to merge with existing, or undefined to clear all
1387
- */
1388
- setModelParams(params) {
1389
- if (params === undefined) {
1390
- this.modelParams = undefined;
1391
- }
1392
- else {
1393
- this.modelParams = { ...this.modelParams, ...params };
1394
- }
1395
- // Persist to SettingsService if available
1396
- this.setModelParamsInSettings(this.modelParams).catch((error) => {
1397
- this.logger.debug(() => `Failed to persist model params to SettingsService: ${error}`);
1398
- });
1399
- }
1400
- /**
1401
- * Get current model parameters
1402
- * @returns Current parameters or undefined if not set
1403
- */
1404
- getModelParams() {
1405
- return this.modelParams;
1406
- }
1407
- /**
1408
- * Initialize provider configuration from SettingsService
1409
- */
1410
- async initializeFromSettings() {
1411
- try {
1412
- // Load saved model if available
1413
- const savedModel = await this.getModelFromSettings();
1414
- if (savedModel) {
1415
- this.currentModel = savedModel;
1416
- }
1417
- // Load saved base URL if available
1418
- const savedBaseUrl = await this.getBaseUrlFromSettings();
1419
- if (savedBaseUrl !== undefined) {
1420
- this.baseURL = savedBaseUrl;
596
+ }
1421
597
  }
1422
- // Load saved model parameters if available
1423
- const savedParams = await this.getModelParamsFromSettings();
1424
- if (savedParams) {
1425
- this.modelParams = savedParams;
598
+ // Emit the complete response as a single IContent
599
+ if (blocks.length > 0) {
600
+ yield {
601
+ speaker: 'ai',
602
+ blocks,
603
+ };
1426
604
  }
1427
- this.logger.debug(() => `Initialized from SettingsService - model: ${this.currentModel}, baseURL: ${this.baseURL}, params: ${JSON.stringify(this.modelParams)}`);
1428
- }
1429
- catch (error) {
1430
- this.logger.debug(() => `Failed to initialize OpenAI provider from SettingsService: ${error}`);
1431
605
  }
1432
606
  }
1433
607
  /**
1434
- * Check if the provider is authenticated using any available method
1435
- * Uses the base provider's isAuthenticated implementation
1436
- */
1437
- async isAuthenticated() {
1438
- return super.isAuthenticated();
1439
- }
1440
- /**
1441
- * Detect the appropriate tool format for the current model/configuration
1442
- * @returns The detected tool format
608
+ * Detects the tool call format based on the model being used
609
+ * @returns The detected tool format ('openai' or 'qwen')
1443
610
  */
1444
611
  detectToolFormat() {
1445
612
  try {
1446
- const settingsService = getSettingsService();
1447
- // First check SettingsService for toolFormat override in provider settings
1448
- // Note: This is synchronous access to cached settings, not async
1449
- const currentSettings = settingsService['settings'];
1450
- const providerSettings = currentSettings?.providers?.[this.name];
1451
- const toolFormatOverride = providerSettings?.toolFormat;
1452
- // If explicitly set to a specific format (not 'auto'), use it
1453
- if (toolFormatOverride && toolFormatOverride !== 'auto') {
1454
- return toolFormatOverride;
1455
- }
1456
- // Auto-detect based on model name if set to 'auto' or not set
1457
- const modelName = this.currentModel.toLowerCase();
1458
- // Check for GLM-4.5 models (glm-4.5, glm-4-5)
1459
- if (modelName.includes('glm-4.5') || modelName.includes('glm-4-5')) {
1460
- return 'qwen';
613
+ // Try to get format from SettingsService if available
614
+ const settings = this.providerConfig?.getEphemeralSettings?.();
615
+ if (settings && settings['tool-format']) {
616
+ return settings['tool-format'];
1461
617
  }
1462
- // Check for qwen models
1463
- if (modelName.includes('qwen')) {
1464
- return 'qwen';
1465
- }
1466
- // Default to 'openai' format
1467
- return 'openai';
1468
618
  }
1469
619
  catch (error) {
1470
620
  this.logger.debug(() => `Failed to detect tool format from SettingsService: ${error}`);
1471
- // Fallback detection without SettingsService
1472
- const modelName = this.currentModel.toLowerCase();
1473
- if (modelName.includes('glm-4.5') || modelName.includes('glm-4-5')) {
1474
- return 'qwen';
1475
- }
1476
- if (modelName.includes('qwen')) {
1477
- return 'qwen';
1478
- }
1479
- return 'openai';
1480
621
  }
1481
- }
1482
- /**
1483
- * Get appropriate tool_choice value based on detected tool format
1484
- * @param tools Array of tools (if any)
1485
- * @returns Appropriate tool_choice value for the current format
1486
- */
1487
- getToolChoiceForFormat(tools) {
1488
- if (!tools || tools.length === 0) {
1489
- return undefined;
622
+ // Fallback detection without SettingsService - always look up current model
623
+ const modelName = (this.getModel() || this.getDefaultModel()).toLowerCase();
624
+ if (modelName.includes('glm-4.5') || modelName.includes('glm-4-5')) {
625
+ return 'qwen';
1490
626
  }
1491
- // For all formats, use 'auto' (standard behavior)
1492
- // Future enhancement: different formats may need different tool_choice values
1493
- return 'auto';
1494
- }
1495
- /**
1496
- * Format tools for API based on detected tool format
1497
- * @param tools Array of tools to format
1498
- * @returns Formatted tools for API consumption
1499
- */
1500
- formatToolsForAPI(tools) {
1501
- // For now, always use OpenAI format through OpenRouter
1502
- // TODO: Investigate if OpenRouter needs special handling for GLM/Qwen
1503
- // const detectedFormat = this.detectToolFormat();
1504
- // if (detectedFormat === 'qwen') {
1505
- // // Convert OpenAI format to Qwen format: {name, description, parameters} without type/function wrapper
1506
- // return tools.map((tool) => ({
1507
- // name: tool.function.name,
1508
- // description: tool.function.description,
1509
- // parameters: tool.function.parameters,
1510
- // }));
1511
- // }
1512
- // For all formats, use the existing ToolFormatter
1513
- return this.toolFormatter.toProviderFormat(tools, 'openai');
627
+ if (modelName.includes('qwen')) {
628
+ return 'qwen';
629
+ }
630
+ return 'openai';
1514
631
  }
1515
632
  /**
1516
633
  * Parse tool response from API (placeholder for future response parsing)
@@ -1522,5 +639,27 @@ export class OpenAIProvider extends BaseProvider {
1522
639
  // For now, return the response as-is
1523
640
  return response;
1524
641
  }
642
+ /**
643
+ * Determines whether a response should be retried based on error codes
644
+ * @param error The error object from the API response
645
+ * @returns true if the request should be retried, false otherwise
646
+ */
647
+ shouldRetryResponse(error) {
648
+ // Don't retry if we're streaming chunks - just continue processing
649
+ if (error &&
650
+ typeof error === 'object' &&
651
+ 'status' in error &&
652
+ error.status === 200) {
653
+ return false;
654
+ }
655
+ // Retry on 429 rate limit errors or 5xx server errors
656
+ const shouldRetry = Boolean(error &&
657
+ typeof error === 'object' &&
658
+ 'status' in error &&
659
+ (error.status === 429 ||
660
+ (error.status >= 500 &&
661
+ error.status < 600)));
662
+ return shouldRetry;
663
+ }
1525
664
  }
1526
665
  //# sourceMappingURL=OpenAIProvider.js.map