@vybestack/llxprt-code-core 0.1.23-nightly.250829.6bacfcba → 0.1.23-nightly.250902.c3d3686d

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/dist/src/code_assist/codeAssist.js +17 -6
  2. package/dist/src/code_assist/codeAssist.js.map +1 -1
  3. package/dist/src/code_assist/server.js +15 -4
  4. package/dist/src/code_assist/server.js.map +1 -1
  5. package/dist/src/code_assist/setup.js +7 -0
  6. package/dist/src/code_assist/setup.js.map +1 -1
  7. package/dist/src/core/ContentGeneratorAdapter.d.ts +37 -0
  8. package/dist/src/core/ContentGeneratorAdapter.js +58 -0
  9. package/dist/src/core/ContentGeneratorAdapter.js.map +1 -0
  10. package/dist/src/core/client.d.ts +9 -2
  11. package/dist/src/core/client.js +111 -36
  12. package/dist/src/core/client.js.map +1 -1
  13. package/dist/src/core/compression-config.d.ts +10 -0
  14. package/dist/src/core/compression-config.js +18 -0
  15. package/dist/src/core/compression-config.js.map +1 -0
  16. package/dist/src/core/geminiChat.d.ts +8 -2
  17. package/dist/src/core/geminiChat.js +148 -32
  18. package/dist/src/core/geminiChat.js.map +1 -1
  19. package/dist/src/core/prompts.js +4 -2
  20. package/dist/src/core/prompts.js.map +1 -1
  21. package/dist/src/index.d.ts +2 -0
  22. package/dist/src/index.js +2 -0
  23. package/dist/src/index.js.map +1 -1
  24. package/dist/src/providers/BaseProvider.d.ts +1 -1
  25. package/dist/src/providers/BaseProvider.js.map +1 -1
  26. package/dist/src/providers/anthropic/AnthropicProvider.js +1 -1
  27. package/dist/src/providers/anthropic/AnthropicProvider.js.map +1 -1
  28. package/dist/src/providers/gemini/GeminiProvider.js +100 -36
  29. package/dist/src/providers/gemini/GeminiProvider.js.map +1 -1
  30. package/dist/src/providers/openai/OpenAIProvider.d.ts +54 -25
  31. package/dist/src/providers/openai/OpenAIProvider.js +528 -984
  32. package/dist/src/providers/openai/OpenAIProvider.js.map +1 -1
  33. package/dist/src/providers/openai-responses/OpenAIResponsesProvider.d.ts +91 -0
  34. package/dist/src/providers/openai-responses/OpenAIResponsesProvider.js +440 -0
  35. package/dist/src/providers/openai-responses/OpenAIResponsesProvider.js.map +1 -0
  36. package/dist/src/providers/openai-responses/index.d.ts +1 -0
  37. package/dist/src/providers/openai-responses/index.js +2 -0
  38. package/dist/src/providers/openai-responses/index.js.map +1 -0
  39. package/dist/src/services/history/ContentConverters.d.ts +38 -0
  40. package/dist/src/services/history/ContentConverters.js +188 -0
  41. package/dist/src/services/history/ContentConverters.js.map +1 -0
  42. package/dist/src/services/history/HistoryEvents.d.ts +32 -0
  43. package/dist/src/services/history/HistoryEvents.js +17 -0
  44. package/dist/src/services/history/HistoryEvents.js.map +1 -0
  45. package/dist/src/services/history/HistoryService.d.ts +168 -0
  46. package/dist/src/services/history/HistoryService.js +521 -0
  47. package/dist/src/services/history/HistoryService.js.map +1 -0
  48. package/dist/src/services/history/IContent.d.ts +179 -0
  49. package/dist/src/services/history/IContent.js +104 -0
  50. package/dist/src/services/history/IContent.js.map +1 -0
  51. package/package.json +1 -1
@@ -22,12 +22,6 @@ import { ContentGeneratorRole } from '../ContentGeneratorRole.js';
22
22
  import { GemmaToolCallParser } from '../../parsers/TextToolCallParser.js';
23
23
  import { ToolFormatter } from '../../tools/ToolFormatter.js';
24
24
  import OpenAI from 'openai';
25
- import { RESPONSES_API_MODELS } from './RESPONSES_API_MODELS.js';
26
- import { ConversationCache } from './ConversationCache.js';
27
- import { estimateMessagesTokens, estimateRemoteTokens, } from './estimateRemoteTokens.js';
28
- // ConversationContext removed - using inline conversation ID generation
29
- import { parseResponsesStream, parseErrorResponse, } from './parseResponsesStream.js';
30
- import { buildResponsesRequest } from './buildResponsesRequest.js';
31
25
  import { BaseProvider } from '../BaseProvider.js';
32
26
  import { isQwenEndpoint, generateOAuthEndpointMismatchError, } from '../../config/endpoints.js';
33
27
  import { getSettingsService } from '../../settings/settingsServiceInstance.js';
@@ -39,7 +33,6 @@ export class OpenAIProvider extends BaseProvider {
39
33
  providerConfig;
40
34
  toolFormatter;
41
35
  toolFormatOverride;
42
- conversationCache;
43
36
  modelParams;
44
37
  _cachedClient;
45
38
  _cachedClientKey;
@@ -78,7 +71,6 @@ export class OpenAIProvider extends BaseProvider {
78
71
  this.baseURL = baseURL;
79
72
  this.providerConfig = config;
80
73
  this.toolFormatter = new ToolFormatter();
81
- this.conversationCache = new ConversationCache();
82
74
  // Initialize from SettingsService
83
75
  this.initializeFromSettings().catch((error) => {
84
76
  this.logger.debug(() => `Failed to initialize from SettingsService: ${error}`);
@@ -148,7 +140,9 @@ export class OpenAIProvider extends BaseProvider {
148
140
  const oauthManager = this.baseProviderConfig.oauthManager;
149
141
  if (oauthManager?.getOAuthToken) {
150
142
  const oauthToken = await oauthManager.getOAuthToken('qwen');
151
- this.logger.debug(() => `OAuth token retrieved, resource_url: ${oauthToken?.resource_url}, access_token: ${oauthToken?.access_token?.substring(0, 10)}...`);
143
+ this.logger.debug(() => `OAuth token retrieved:\n` +
144
+ ` resource_url: ${oauthToken?.resource_url}\n` +
145
+ ` access_token: ${oauthToken?.access_token?.substring(0, 10)}...`);
152
146
  if (oauthToken?.resource_url) {
153
147
  // Use the resource_url from the OAuth token
154
148
  effectiveBaseURL = `https://${oauthToken.resource_url}/v1`;
@@ -210,254 +204,6 @@ export class OpenAIProvider extends BaseProvider {
210
204
  // Default to OpenAI format
211
205
  return 'openai';
212
206
  }
213
- shouldUseResponses(model) {
214
- // Check env flag override (highest priority)
215
- if (process.env.OPENAI_RESPONSES_DISABLE === 'true') {
216
- return false;
217
- }
218
- // Check settings override - if explicitly set to false, always respect that
219
- if (this.providerConfig?.openaiResponsesEnabled === false) {
220
- return false;
221
- }
222
- // Never use Responses API for non-OpenAI providers (those with custom base URLs)
223
- const baseURL = this.baseURL || 'https://api.openai.com/v1';
224
- if (baseURL !== 'https://api.openai.com/v1') {
225
- return false;
226
- }
227
- // Default: Check if model starts with any of the responses API model prefixes
228
- return RESPONSES_API_MODELS.some((responsesModel) => model.startsWith(responsesModel));
229
- }
230
- async callResponsesEndpoint(messages, tools, options) {
231
- // Check if API key is available (using resolved authentication)
232
- const apiKey = await this.getAuthToken();
233
- if (!apiKey) {
234
- const endpoint = this.baseURL || 'https://api.openai.com/v1';
235
- if (this.isOAuthEnabled() && !this.supportsOAuth()) {
236
- throw new Error(generateOAuthEndpointMismatchError(endpoint, 'qwen'));
237
- }
238
- throw new Error('OpenAI API key is required to make API calls');
239
- }
240
- // Remove the stateful mode error to allow O3 to work with conversation IDs
241
- // Check context usage and warn if getting close to limit
242
- if (options?.conversationId && options?.parentId) {
243
- const contextInfo = this.estimateContextUsage(options.conversationId, options.parentId, messages);
244
- // Warn if less than 4k tokens remaining
245
- if (contextInfo.tokensRemaining < 4000) {
246
- this.logger.debug(() => `Warning: Only ${contextInfo.tokensRemaining} tokens remaining (${contextInfo.contextUsedPercent.toFixed(1)}% context used). Consider starting a new conversation.`);
247
- }
248
- }
249
- // Check cache for existing conversation
250
- if (options?.conversationId && options?.parentId) {
251
- const cachedMessages = this.conversationCache.get(options.conversationId, options.parentId);
252
- if (cachedMessages) {
253
- // Return cached messages as an async iterable
254
- return (async function* () {
255
- for (const message of cachedMessages) {
256
- yield message;
257
- }
258
- })();
259
- }
260
- }
261
- // Format tools for Responses API
262
- const formattedTools = tools
263
- ? this.toolFormatter.toResponsesTool(tools)
264
- : undefined;
265
- // Patch messages to include synthetic responses for cancelled tools
266
- const { SyntheticToolResponseHandler } = await import('./syntheticToolResponses.js');
267
- const patchedMessages = SyntheticToolResponseHandler.patchMessageHistory(messages);
268
- // Build the request
269
- const request = buildResponsesRequest({
270
- model: this.currentModel,
271
- messages: patchedMessages,
272
- tools: formattedTools,
273
- stream: options?.stream ?? true,
274
- conversationId: options?.conversationId,
275
- parentId: options?.parentId,
276
- tool_choice: options?.tool_choice,
277
- ...(this.modelParams || {}),
278
- });
279
- // Make the API call
280
- const baseURL = this.baseURL || 'https://api.openai.com/v1';
281
- const responsesURL = `${baseURL}/responses`;
282
- // Ensure proper UTF-8 encoding for the request body
283
- // This is crucial for handling multibyte characters (e.g., Japanese, Chinese)
284
- const requestBody = JSON.stringify(request);
285
- const bodyBlob = new Blob([requestBody], {
286
- type: 'application/json; charset=utf-8',
287
- });
288
- const response = await fetch(responsesURL, {
289
- method: 'POST',
290
- headers: {
291
- Authorization: `Bearer ${apiKey}`,
292
- 'Content-Type': 'application/json; charset=utf-8',
293
- },
294
- body: bodyBlob,
295
- });
296
- // Handle errors
297
- if (!response.ok) {
298
- const errorBody = await response.text();
299
- // Handle 422 context_length_exceeded error
300
- if (response.status === 422 &&
301
- errorBody.includes('context_length_exceeded')) {
302
- this.logger.debug(() => 'Context length exceeded, invalidating cache and retrying stateless...');
303
- // Invalidate the cache for this conversation
304
- if (options?.conversationId && options?.parentId) {
305
- this.conversationCache.invalidate(options.conversationId, options.parentId);
306
- }
307
- // Retry without conversation context (pure stateless)
308
- const retryRequest = buildResponsesRequest({
309
- model: this.currentModel,
310
- messages,
311
- tools: formattedTools,
312
- stream: options?.stream ?? true,
313
- // Omit conversationId and parentId for stateless retry
314
- tool_choice: options?.tool_choice,
315
- ...(this.modelParams || {}),
316
- });
317
- // Ensure proper UTF-8 encoding for retry request as well
318
- const retryRequestBody = JSON.stringify(retryRequest);
319
- const retryBodyBlob = new Blob([retryRequestBody], {
320
- type: 'application/json; charset=utf-8',
321
- });
322
- const retryResponse = await fetch(responsesURL, {
323
- method: 'POST',
324
- headers: {
325
- Authorization: `Bearer ${apiKey}`,
326
- 'Content-Type': 'application/json; charset=utf-8',
327
- },
328
- body: retryBodyBlob,
329
- });
330
- if (!retryResponse.ok) {
331
- const retryErrorBody = await retryResponse.text();
332
- throw parseErrorResponse(retryResponse.status, retryErrorBody, this.name);
333
- }
334
- // Use the retry response
335
- return this.handleResponsesApiResponse(retryResponse, messages, undefined, // No conversation context on retry
336
- undefined, options?.stream !== false);
337
- }
338
- throw parseErrorResponse(response.status, errorBody, this.name);
339
- }
340
- // Handle the response
341
- return this.handleResponsesApiResponse(response, messages, options?.conversationId, options?.parentId, options?.stream !== false);
342
- }
343
- async handleResponsesApiResponse(response, messages, conversationId, parentId, isStreaming) {
344
- // Handle streaming response
345
- if (isStreaming && response.body) {
346
- const collectedMessages = [];
347
- const cache = this.conversationCache;
348
- return (async function* () {
349
- for await (const message of parseResponsesStream(response.body)) {
350
- // Collect messages for caching
351
- if (message.content || message.tool_calls) {
352
- collectedMessages.push(message);
353
- }
354
- else if (message.usage && collectedMessages.length === 0) {
355
- // If we only got a usage message with no content, add a placeholder
356
- collectedMessages.push({
357
- role: ContentGeneratorRole.ASSISTANT,
358
- content: '',
359
- });
360
- }
361
- // Update the parentId in the context as soon as we get a message ID
362
- if (message.id) {
363
- // ConversationContext.setParentId(message.id);
364
- // TODO: Handle parent ID updates when ConversationContext is available
365
- }
366
- yield message;
367
- }
368
- // Cache the collected messages with token count
369
- if (conversationId && parentId && collectedMessages.length > 0) {
370
- // Get previous accumulated tokens
371
- const previousTokens = cache.getAccumulatedTokens(conversationId, parentId);
372
- // Calculate tokens for this request (messages + response)
373
- const requestTokens = estimateMessagesTokens(messages);
374
- const responseTokens = estimateMessagesTokens(collectedMessages);
375
- const totalTokensForRequest = requestTokens + responseTokens;
376
- // Update cache with new accumulated total
377
- cache.set(conversationId, parentId, collectedMessages, previousTokens + totalTokensForRequest);
378
- }
379
- })();
380
- }
381
- const data = (await response.json());
382
- const resultMessages = [];
383
- // DEFENSIVE FIX: Handle potential array response from providers that violate OpenAI spec
384
- // Some providers (like Cerebras) may return an array of responses instead of a single response
385
- if (Array.isArray(data)) {
386
- this.logger.error(() => '[Cerebras Corruption] Detected malformed array response from provider, aggregating...', {
387
- provider: this.baseURL,
388
- arrayLength: data.length,
389
- });
390
- const aggregatedContent = [];
391
- let aggregatedToolCalls = [];
392
- let aggregatedUsage = undefined;
393
- for (const item of data) {
394
- if (item.choices?.[0]?.message?.content) {
395
- aggregatedContent.push(item.choices[0].message.content);
396
- }
397
- if (item.choices?.[0]?.message?.tool_calls) {
398
- aggregatedToolCalls = item.choices[0].message.tool_calls;
399
- }
400
- if (item.usage) {
401
- aggregatedUsage = item.usage;
402
- }
403
- }
404
- const message = {
405
- role: ContentGeneratorRole.ASSISTANT,
406
- content: aggregatedContent.join(''),
407
- };
408
- if (aggregatedToolCalls.length > 0) {
409
- message.tool_calls = aggregatedToolCalls;
410
- }
411
- if (aggregatedUsage) {
412
- message.usage = {
413
- prompt_tokens: aggregatedUsage.prompt_tokens || 0,
414
- completion_tokens: aggregatedUsage.completion_tokens || 0,
415
- total_tokens: aggregatedUsage.total_tokens || 0,
416
- };
417
- }
418
- resultMessages.push(message);
419
- // Convert to async iterator for consistent return type
420
- return (async function* () {
421
- for (const msg of resultMessages) {
422
- yield msg;
423
- }
424
- })();
425
- }
426
- if (data.choices && data.choices.length > 0) {
427
- const choice = data.choices[0];
428
- const message = {
429
- role: choice.message.role,
430
- content: choice.message.content || '',
431
- };
432
- if (choice.message.tool_calls) {
433
- message.tool_calls = choice.message.tool_calls;
434
- }
435
- if (data.usage) {
436
- message.usage = {
437
- prompt_tokens: data.usage.prompt_tokens || 0,
438
- completion_tokens: data.usage.completion_tokens || 0,
439
- total_tokens: data.usage.total_tokens || 0,
440
- };
441
- }
442
- resultMessages.push(message);
443
- }
444
- // Cache the result with token count
445
- if (conversationId && parentId && resultMessages.length > 0) {
446
- // Get previous accumulated tokens
447
- const previousTokens = this.conversationCache.getAccumulatedTokens(conversationId, parentId);
448
- // Calculate tokens for this request
449
- const requestTokens = estimateMessagesTokens(messages);
450
- const responseTokens = estimateMessagesTokens(resultMessages);
451
- const totalTokensForRequest = requestTokens + responseTokens;
452
- // Update cache with new accumulated total
453
- this.conversationCache.set(conversationId, parentId, resultMessages, previousTokens + totalTokensForRequest);
454
- }
455
- return (async function* () {
456
- for (const message of resultMessages) {
457
- yield message;
458
- }
459
- })();
460
- }
461
207
  async getModels() {
462
208
  // Check if API key is available (using resolved authentication)
463
209
  const apiKey = await this.getAuthToken();
@@ -530,739 +276,122 @@ export class OpenAIProvider extends BaseProvider {
530
276
  }
531
277
  }
532
278
  async *generateChatCompletion(messages, tools, _toolFormat) {
533
- // Check if API key is available (using resolved authentication)
534
- const apiKey = await this.getAuthToken();
535
- if (!apiKey) {
536
- const endpoint = this.baseURL || 'https://api.openai.com/v1';
537
- if (this.isOAuthEnabled() && !this.supportsOAuth()) {
538
- throw new Error(generateOAuthEndpointMismatchError(endpoint, 'qwen'));
539
- }
540
- throw new Error('OpenAI API key is required to generate completions');
541
- }
542
- // Check if we should use responses endpoint
543
- if (this.shouldUseResponses(this.currentModel)) {
544
- // Generate conversation IDs inline (would normally come from application context)
545
- const conversationId = undefined;
546
- const parentId = undefined;
547
- yield* await this.callResponsesEndpoint(messages, tools, {
548
- stream: true,
549
- tool_choice: tools && tools.length > 0 ? 'auto' : undefined,
550
- stateful: false, // Always stateless for Phase 22-01
551
- conversationId,
552
- parentId,
553
- });
554
- return;
555
- }
556
- // Don't automatically add synthetic responses - they should only be added when tools are actually cancelled
557
- // Check if we have any existing synthetic responses (from actual cancellations)
558
- const existingSyntheticCount = messages.filter((msg) => msg._synthetic).length;
559
- if (existingSyntheticCount > 0) {
560
- this.logger.debug(() => `[Synthetic] Found ${existingSyntheticCount} existing synthetic responses in conversation`);
561
- }
562
- // Just use the messages as-is without "fixing" them
563
- const patchedMessages = messages;
564
- // Validate tool messages have required tool_call_id
565
- const toolMessages = patchedMessages.filter((msg) => msg.role === 'tool');
566
- const missingIds = toolMessages.filter((msg) => !msg.tool_call_id);
567
- if (missingIds.length > 0) {
568
- this.logger.error(() => `FATAL: Tool messages missing tool_call_id: ${JSON.stringify(missingIds)}`);
569
- throw new Error(`OpenAI API requires tool_call_id for all tool messages. Found ${missingIds.length} tool message(s) without IDs.`);
570
- }
571
- // Log synthetic responses for debugging
572
- const syntheticMessages = patchedMessages.filter((msg) => msg._synthetic);
573
- if (syntheticMessages.length > 0) {
574
- this.logger.debug(() => `[Synthetic] Added ${syntheticMessages.length} synthetic tool responses`);
575
- // Check for ordering issues - using debug logger which only executes when enabled
576
- this.logger.debug(() => {
577
- const orderingErrors = [];
578
- const orderingWarnings = [];
579
- for (let i = 0; i < patchedMessages.length - 1; i++) {
580
- const current = patchedMessages[i];
581
- const next = patchedMessages[i + 1];
582
- // Check if a tool response comes before its corresponding tool call
583
- if (current.role === 'tool' && current.tool_call_id) {
584
- // Find the assistant message with this tool call
585
- const callIndex = patchedMessages.findIndex((m) => m.role === 'assistant' &&
586
- m.tool_calls?.some((tc) => tc.id === current.tool_call_id));
587
- if (callIndex === -1 || callIndex > i) {
588
- orderingErrors.push(`Tool response ${current.tool_call_id} appears before its tool call or call not found`);
589
- }
590
- }
591
- // Check if we have consecutive assistant messages with tool calls
592
- if (current.role === 'assistant' &&
593
- current.tool_calls &&
594
- next.role === 'assistant' &&
595
- next.tool_calls) {
596
- orderingWarnings.push(`Consecutive assistant messages with tool calls at indices ${i} and ${i + 1}`);
597
- }
598
- }
599
- if (orderingErrors.length > 0) {
600
- return `[Synthetic Order Check] Errors found: ${orderingErrors.join('; ')}`;
601
- }
602
- else if (orderingWarnings.length > 0) {
603
- return `[Synthetic Order Check] Warnings: ${orderingWarnings.join('; ')}`;
604
- }
605
- else {
606
- return '[Synthetic Order Check] No issues found';
607
- }
608
- });
609
- }
610
- const parser = this.requiresTextToolCallParsing()
611
- ? new GemmaToolCallParser()
612
- : null;
613
- // Get current tool format (with override support)
614
- const currentToolFormat = this.getToolFormat();
615
- // Format tools using formatToolsForAPI method
616
- const formattedTools = tools ? this.formatToolsForAPI(tools) : undefined;
617
- // Get stream_options from ephemeral settings (not model params)
618
- const streamOptions = this.providerConfig?.getEphemeralSettings?.()?.['stream-options'];
619
- // Default stream_options to { include_usage: true } unless explicitly set
620
- const finalStreamOptions = streamOptions !== undefined ? streamOptions : { include_usage: true };
621
- // Get streaming setting from ephemeral settings (default: enabled)
622
- const streamingSetting = this.providerConfig?.getEphemeralSettings?.()?.['streaming'];
623
- let streamingEnabled = streamingSetting !== 'disabled';
624
- // Get resolved authentication and update client if needed
625
- await this.updateClientWithResolvedAuth();
626
- // Strip internal tracking fields that some APIs don't accept
627
- // We keep the synthetic responses but remove the metadata fields
628
- const cleanedMessages = patchedMessages.map((msg) => {
629
- // Create a shallow copy and remove internal fields
630
- const { _synthetic, _cancelled, ...cleanMsg } = msg;
631
- // Log synthetic tool responses for debugging
632
- if (msg._synthetic) {
633
- this.logger.debug(() => `[Synthetic Tool Response] ${JSON.stringify(cleanMsg)}`);
634
- }
635
- return cleanMsg;
636
- });
637
- this.logger.debug(() => `About to make API call with model: ${this.currentModel}, baseURL: ${this.openai.baseURL}, apiKey: ${this.openai.apiKey?.substring(0, 10)}..., streaming: ${streamingEnabled}`);
638
- // Debug: Log message roles being sent
639
- this.logger.debug(() => `Messages being sent to OpenAI (${cleanedMessages.length} total): ${cleanedMessages
640
- .map((m) => `${m.role}${m.role === 'system' ? ` (length: ${m.content?.length})` : ''}`)
641
- .join(', ')}`);
642
- let response;
643
- try {
644
- // Build request params with exact order from original
645
- response = await this.openai.chat.completions.create({
646
- model: this.currentModel,
647
- messages: cleanedMessages,
648
- stream: streamingEnabled,
649
- ...(streamingEnabled && finalStreamOptions
650
- ? { stream_options: finalStreamOptions }
651
- : {}),
652
- tools: formattedTools,
653
- tool_choice: this.getToolChoiceForFormat(tools),
654
- ...this.modelParams,
655
- });
656
- }
657
- catch (error) {
658
- // Debug the error
659
- const errorStatus = error?.status ||
660
- error?.response?.status;
661
- const errorLabel = errorStatus === 400 ? '[API Error 400]' : '[API Error]';
662
- this.logger.error(() => `${errorLabel} Error caught in API call: ${error}`);
663
- this.logger.error(() => `${errorLabel} Error type: ${error?.constructor?.name}`);
664
- this.logger.error(() => `${errorLabel} Error status: ${errorStatus}`);
665
- this.logger.error(() => `${errorLabel} Error response data: ${JSON.stringify(error?.response?.data, null, 2)}`);
666
- // Log the last few messages to understand what's being sent
667
- if (errorStatus === 400) {
668
- // Log additional diagnostics for 400 errors
669
- const hasSyntheticMessages = cleanedMessages.some((msg) => msg.role === 'tool' &&
670
- msg.content === 'Tool execution cancelled by user');
671
- const hasPendingToolCalls = cleanedMessages.some((msg, idx) => {
672
- if (msg.role === 'assistant' && msg.tool_calls) {
673
- // Check if there's a matching tool response
674
- const toolCallIds = msg.tool_calls.map((tc) => tc.id);
675
- const hasResponses = toolCallIds.every((id) => cleanedMessages
676
- .slice(idx + 1)
677
- .some((m) => m.role === 'tool' && m.tool_call_id === id));
678
- return !hasResponses;
679
- }
680
- return false;
681
- });
682
- this.logger.error(() => `${errorLabel} Last 5 messages being sent:`);
683
- this.logger.error(() => `${errorLabel} Has synthetic messages: ${hasSyntheticMessages}`);
684
- this.logger.error(() => `${errorLabel} Has pending tool calls without responses: ${hasPendingToolCalls}`);
685
- const lastMessages = cleanedMessages.slice(-5);
686
- lastMessages.forEach((msg, idx) => {
687
- this.logger.error(() => ` [${cleanedMessages.length - 5 + idx}] ${msg.role}${msg.tool_call_id ? ` (tool response for ${msg.tool_call_id})` : ''}${msg.tool_calls ? ` (${msg.tool_calls.length} tool calls)` : ''}`);
688
- if (msg.tool_calls) {
689
- msg.tool_calls.forEach((tc) => {
690
- this.logger.error(() => ` - Tool call: ${tc.id} -> ${tc.function.name}`);
691
- });
692
- }
693
- });
694
- }
695
- // Check for JSONResponse mutation errors
696
- const errorMessage = error instanceof Error ? error.message : String(error);
697
- if (errorMessage?.includes('JSONResponse') &&
698
- errorMessage?.includes('does not support item assignment')) {
699
- this.logger.debug(() => '[JSONResponse Error] Detected JSONResponse mutation error, retrying without streaming');
700
- this.logger.error(() => '[Cerebras Corruption] JSONResponse mutation error detected. This typically occurs with certain providers like Cerebras. Falling back to non-streaming mode.', {
701
- errorMessage,
702
- provider: this.baseURL,
703
- streamingEnabled,
704
- });
705
- // Retry with streaming disabled
706
- response = await this.openai.chat.completions.create({
707
- model: this.currentModel,
708
- messages: cleanedMessages,
709
- stream: false, // Force non-streaming
710
- tools: formattedTools,
711
- tool_choice: this.getToolChoiceForFormat(tools),
712
- ...this.modelParams,
713
- });
714
- // Override streamingEnabled for the rest of this function
715
- streamingEnabled = false;
716
- }
717
- else {
718
- this.logger.debug(() => `${errorLabel} Re-throwing error (not a JSONResponse mutation)`);
719
- // Re-throw other errors
720
- throw error;
721
- }
722
- }
723
- let fullContent = '';
724
- const accumulatedToolCalls = [];
725
- let hasStreamedContent = false;
726
- let usageData;
727
- // For Qwen streaming, buffer whitespace-only chunks to preserve spacing across chunk boundaries
728
- let pendingWhitespace = null;
729
- // Handle streaming vs non-streaming response
730
- if (streamingEnabled) {
731
- // We need to buffer all chunks to detect and handle malformed streams
732
- // Some providers (like Cerebras) send message format instead of delta
733
- const allChunks = [];
734
- this.logger.debug(() => '[Stream Detection] Starting to buffer chunks for corruption detection', {
735
- provider: this.baseURL,
736
- streamingEnabled,
737
- isUsingQwen: this.isUsingQwen(),
738
- currentModel: this.currentModel,
739
- });
740
- try {
741
- for await (const chunk of response) {
742
- // CRITICAL: Create a deep copy to avoid JSONResponse mutation issues
743
- // Cerebras and other providers may return immutable JSONResponse objects
744
- // Cast to unknown first to bypass type checking, then to our extended type
745
- const extendedChunk = chunk;
746
- const safeChunk = {
747
- choices: extendedChunk.choices?.map((choice) => ({
748
- delta: choice.delta
749
- ? {
750
- content: choice.delta.content ?? undefined,
751
- role: choice.delta.role,
752
- tool_calls: choice.delta.tool_calls?.map((tc, idx) => ({
753
- id: tc.id,
754
- type: tc.type,
755
- function: tc.function
756
- ? {
757
- name: tc.function.name,
758
- arguments: tc.function.arguments,
759
- }
760
- : undefined,
761
- index: tc.index !== undefined ? tc.index : idx,
762
- })),
763
- }
764
- : undefined,
765
- message: choice.message
766
- ? {
767
- content: choice.message.content ?? undefined,
768
- role: choice.message.role,
769
- tool_calls: choice.message.tool_calls?.map((tc) => ({
770
- id: tc.id,
771
- type: tc.type,
772
- function: tc.function
773
- ? {
774
- name: tc.function.name,
775
- arguments: tc.function.arguments,
776
- }
777
- : undefined,
778
- })),
779
- }
780
- : undefined,
781
- index: choice.index,
782
- finish_reason: choice.finish_reason,
783
- })),
784
- usage: extendedChunk.usage
785
- ? {
786
- prompt_tokens: extendedChunk.usage.prompt_tokens,
787
- completion_tokens: extendedChunk.usage.completion_tokens,
788
- total_tokens: extendedChunk.usage.total_tokens,
789
- }
790
- : undefined,
791
- };
792
- allChunks.push(safeChunk);
793
- }
794
- this.logger.debug(() => `[Stream Buffering Complete] Collected ${allChunks.length} chunks`, {
795
- chunkCount: allChunks.length,
796
- hasContent: allChunks.some((c) => c.choices?.[0]?.delta?.content),
797
- hasToolCalls: allChunks.some((c) => c.choices?.[0]?.delta?.tool_calls),
798
- });
799
- }
800
- catch (error) {
801
- // Handle JSONResponse mutation errors that occur during iteration
802
- const errorMessage = error instanceof Error ? error.message : String(error);
803
- if (errorMessage?.includes('JSONResponse') &&
804
- errorMessage?.includes('does not support item assignment')) {
805
- this.logger.error(() => '[Cerebras Corruption] JSONResponse mutation error during stream iteration. This is a known issue with Cerebras. The OpenAI client library is trying to mutate immutable response objects. Falling back to non-streaming mode.', {
806
- error: errorMessage,
807
- provider: this.baseURL,
808
- chunksCollected: allChunks.length,
809
- });
810
- // Retry the entire request with streaming disabled
811
- // This is the nuclear option but ensures we get a response
812
- const nonStreamingResponse = await this.openai.chat.completions.create({
813
- model: this.currentModel,
814
- messages: cleanedMessages,
815
- stream: false, // Force non-streaming
816
- tools: formattedTools,
817
- tool_choice: this.getToolChoiceForFormat(tools),
818
- ...this.modelParams,
819
- });
820
- // Handle as non-streaming response
821
- const completionResponse = nonStreamingResponse;
822
- const choice = completionResponse.choices[0];
823
- if (choice?.message.content) {
824
- fullContent = choice.message.content;
825
- }
826
- if (choice?.message.tool_calls) {
827
- for (const toolCall of choice.message.tool_calls) {
828
- if (toolCall.type === 'function' && toolCall.function) {
829
- accumulatedToolCalls.push({
830
- id: toolCall.id,
831
- type: 'function',
832
- function: toolCall.function,
833
- });
834
- }
279
+ // 1. Validate authentication and messages
280
+ await this.validateRequestPreconditions(messages);
281
+ // 2. Prepare request configuration
282
+ const requestConfig = this.prepareApiRequest(messages, tools);
283
+ // 3. Make API call with error handling
284
+ const response = await this.executeApiCall(messages, tools, requestConfig);
285
+ // 4. Process response based on streaming mode
286
+ let processedData = {
287
+ fullContent: '',
288
+ accumulatedToolCalls: [],
289
+ hasStreamedContent: false,
290
+ usageData: undefined,
291
+ pendingWhitespace: null,
292
+ };
293
+ if (requestConfig.streamingEnabled) {
294
+ // Need to yield streaming content as it comes
295
+ const streamResponse = response;
296
+ for await (const chunk of streamResponse) {
297
+ const delta = chunk.choices?.[0]?.delta;
298
+ if (delta?.content && !requestConfig.parser) {
299
+ if (this.isUsingQwen()) {
300
+ // Handle Qwen whitespace buffering inline for yielding
301
+ // This is needed because we yield during streaming
302
+ // We'll refactor this separately if needed
303
+ const whitespaceResult = this.handleQwenStreamingWhitespace(delta, processedData.pendingWhitespace, processedData.fullContent);
304
+ if (whitespaceResult.shouldYield) {
305
+ yield {
306
+ role: ContentGeneratorRole.ASSISTANT,
307
+ content: whitespaceResult.content,
308
+ };
835
309
  }
836
- }
837
- if (completionResponse.usage) {
838
- usageData = {
839
- prompt_tokens: completionResponse.usage.prompt_tokens,
840
- completion_tokens: completionResponse.usage.completion_tokens,
841
- total_tokens: completionResponse.usage.total_tokens,
310
+ // Update our tracking of processed data
311
+ processedData = {
312
+ fullContent: whitespaceResult.updatedFullContent,
313
+ accumulatedToolCalls: processedData.accumulatedToolCalls,
314
+ hasStreamedContent: processedData.hasStreamedContent ||
315
+ whitespaceResult.shouldYield,
316
+ usageData: processedData.usageData,
317
+ pendingWhitespace: whitespaceResult.updatedPendingWhitespace,
842
318
  };
843
319
  }
844
- // Yield the complete response
845
- yield {
846
- role: ContentGeneratorRole.ASSISTANT,
847
- content: fullContent || '',
848
- tool_calls: accumulatedToolCalls.length > 0
849
- ? accumulatedToolCalls
850
- : undefined,
851
- usage: usageData,
852
- };
853
- return;
854
- }
855
- // Re-throw other errors
856
- throw error;
857
- }
858
- // Check first chunk to see if we have malformed stream
859
- let detectedMalformedStream = false;
860
- if (allChunks.length > 0) {
861
- const firstChunk = allChunks[0];
862
- if (firstChunk.choices?.[0]?.message &&
863
- !firstChunk.choices?.[0]?.delta) {
864
- detectedMalformedStream = true;
865
- this.logger.debug(() => 'Detected malformed stream (message instead of delta), using aggregation mode');
866
- }
867
- }
868
- // If we detected issues, aggregate everything
869
- if (detectedMalformedStream) {
870
- const contentParts = [];
871
- let aggregatedToolCalls = [];
872
- let finalUsageData = undefined;
873
- // Process all buffered chunks
874
- for (const chunk of allChunks) {
875
- const message = chunk.choices?.[0]?.message || chunk.choices?.[0]?.delta;
876
- if (message?.content) {
877
- contentParts.push(message.content);
878
- }
879
- if (message?.tool_calls) {
880
- // Ensure tool_calls match the expected format
881
- aggregatedToolCalls = message.tool_calls.map((tc) => ({
882
- id: tc.id || `call_${Date.now()}`,
883
- type: (tc.type || 'function'),
884
- function: {
885
- name: tc.function?.name || '',
886
- arguments: tc.function?.arguments || '',
887
- },
888
- }));
889
- }
890
- if (chunk.usage) {
891
- finalUsageData = {
892
- prompt_tokens: chunk.usage.prompt_tokens || 0,
893
- completion_tokens: chunk.usage.completion_tokens || 0,
894
- total_tokens: chunk.usage.total_tokens || 0,
895
- };
896
- }
897
- }
898
- // Yield single reconstructed message
899
- yield {
900
- role: ContentGeneratorRole.ASSISTANT,
901
- content: contentParts.join(''),
902
- tool_calls: aggregatedToolCalls.length > 0 ? aggregatedToolCalls : undefined,
903
- usage: finalUsageData,
904
- };
905
- return;
906
- }
907
- // Process chunks normally - stream them as they come
908
- this.logger.debug(() => `[Processing Chunks] Starting to process ${allChunks.length} buffered chunks`, {
909
- isUsingQwen: this.isUsingQwen(),
910
- });
911
- let chunkIndex = 0;
912
- for (const chunk of allChunks) {
913
- chunkIndex++;
914
- // Since we created safe copies during buffering, chunks are now mutable
915
- // Check if this chunk has message format instead of delta (malformed stream)
916
- let processedChunk = chunk;
917
- if (chunk.choices?.[0]?.message && !chunk.choices?.[0]?.delta) {
918
- this.logger.error(() => '[Cerebras Corruption] Converting malformed chunk from message to delta format', {
919
- provider: this.baseURL,
920
- hasMessage: true,
921
- hasDelta: false,
922
- messageContent: chunk.choices[0].message?.content?.substring(0, 100),
923
- });
924
- // Convert message format to delta format for consistent processing
925
- const message = chunk.choices[0].message;
926
- processedChunk = {
927
- choices: [
928
- {
929
- delta: {
930
- content: message?.content ?? undefined,
931
- role: message?.role,
932
- tool_calls: message?.tool_calls,
933
- },
934
- },
935
- ],
936
- usage: chunk.usage,
937
- };
938
- }
939
- const delta = processedChunk.choices?.[0]?.delta;
940
- if (delta?.content) {
941
- // Enhanced debug logging to understand streaming behavior
942
- if (this.isUsingQwen()) {
943
- this.logger.debug(() => `Chunk: ${JSON.stringify({
944
- content: delta.content,
945
- contentLength: delta.content?.length ?? 0,
946
- isWhitespaceOnly: delta.content?.trim() === '',
947
- chunkIndex: 0,
948
- })}`);
949
- }
950
- // For text-based models, don't yield content chunks yet
951
- if (!parser && delta.content) {
952
- this.logger.debug(() => `[Content Processing] Chunk ${chunkIndex}/${allChunks.length} has content`, {
953
- contentLength: delta.content.length,
954
- contentPreview: delta.content.substring(0, 50),
955
- isUsingQwen: this.isUsingQwen(),
956
- willBuffer: this.isUsingQwen() && delta.content.trim() === '',
957
- });
958
- if (this.isUsingQwen()) {
959
- const isWhitespaceOnly = delta.content.trim() === '';
960
- if (isWhitespaceOnly) {
961
- // Buffer whitespace-only chunk
962
- pendingWhitespace = (pendingWhitespace || '') + delta.content;
963
- this.logger.debug(() => `[Whitespace Buffering] Buffered whitespace-only chunk (len=${delta.content?.length ?? 0}). pendingWhitespace now len=${pendingWhitespace?.length ?? 0}`, {
964
- chunkIndex,
965
- totalChunks: allChunks.length,
966
- isLastChunk: chunkIndex === allChunks.length,
967
- contentHex: Buffer.from(delta.content).toString('hex'),
968
- });
969
- continue;
970
- }
971
- else if (pendingWhitespace) {
972
- // Flush buffered whitespace before non-empty chunk to preserve spacing
973
- this.logger.debug(() => `Flushing pending whitespace (len=${pendingWhitespace?.length ?? 0}) before non-empty chunk`);
974
- yield {
975
- role: ContentGeneratorRole.ASSISTANT,
976
- content: pendingWhitespace,
977
- };
978
- hasStreamedContent = true;
979
- fullContent += pendingWhitespace;
980
- pendingWhitespace = null;
981
- }
982
- }
983
- this.logger.debug(() => `[Yielding Content] Yielding chunk ${chunkIndex}/${allChunks.length}`, {
984
- contentLength: delta.content.length,
985
- hasStreamedContent,
986
- });
320
+ else {
987
321
  yield {
988
322
  role: ContentGeneratorRole.ASSISTANT,
989
323
  content: delta.content,
990
324
  };
991
- hasStreamedContent = true;
325
+ processedData = {
326
+ fullContent: processedData.fullContent + delta.content,
327
+ accumulatedToolCalls: processedData.accumulatedToolCalls,
328
+ hasStreamedContent: true,
329
+ usageData: processedData.usageData,
330
+ pendingWhitespace: null,
331
+ };
992
332
  }
993
- fullContent += delta.content;
994
333
  }
334
+ else if (delta?.content) {
335
+ // Parser mode - just accumulate
336
+ processedData = {
337
+ fullContent: processedData.fullContent + delta.content,
338
+ accumulatedToolCalls: processedData.accumulatedToolCalls,
339
+ hasStreamedContent: processedData.hasStreamedContent,
340
+ usageData: processedData.usageData,
341
+ pendingWhitespace: processedData.pendingWhitespace,
342
+ };
343
+ }
344
+ // Handle tool calls
995
345
  if (delta?.tool_calls) {
346
+ const accumulated = processedData.accumulatedToolCalls;
996
347
  for (const toolCall of delta.tool_calls) {
997
- this.toolFormatter.accumulateStreamingToolCall(toolCall, accumulatedToolCalls, currentToolFormat);
348
+ this.toolFormatter.accumulateStreamingToolCall(toolCall, accumulated, requestConfig.currentToolFormat);
998
349
  }
350
+ processedData = {
351
+ ...processedData,
352
+ accumulatedToolCalls: accumulated,
353
+ };
999
354
  }
1000
- // Check for usage data in the chunk
1001
- if (processedChunk.usage) {
1002
- usageData = {
1003
- prompt_tokens: processedChunk.usage.prompt_tokens || 0,
1004
- completion_tokens: processedChunk.usage.completion_tokens || 0,
1005
- total_tokens: processedChunk.usage.total_tokens || 0,
355
+ // Check for usage data
356
+ if (chunk.usage) {
357
+ processedData = {
358
+ ...processedData,
359
+ usageData: {
360
+ prompt_tokens: chunk.usage.prompt_tokens || 0,
361
+ completion_tokens: chunk.usage.completion_tokens || 0,
362
+ total_tokens: chunk.usage.total_tokens || 0,
363
+ },
1006
364
  };
1007
365
  }
1008
366
  }
1009
367
  }
1010
368
  else {
1011
- // Non-streaming response - handle as a single completion
1012
- const completionResponse = response;
1013
- const choice = completionResponse.choices[0];
1014
- if (choice?.message.content) {
1015
- fullContent = choice.message.content;
1016
- }
1017
- if (choice?.message.tool_calls) {
1018
- // Convert tool calls to the standard format
1019
- for (const toolCall of choice.message.tool_calls) {
1020
- if (toolCall.type === 'function' && toolCall.function) {
1021
- // Don't fix double stringification here - it's handled later in the final processing
1022
- accumulatedToolCalls.push({
1023
- id: toolCall.id,
1024
- type: 'function',
1025
- function: toolCall.function,
1026
- });
1027
- }
1028
- }
1029
- }
1030
- if (completionResponse.usage) {
1031
- usageData = {
1032
- prompt_tokens: completionResponse.usage.prompt_tokens,
1033
- completion_tokens: completionResponse.usage.completion_tokens,
1034
- total_tokens: completionResponse.usage.total_tokens,
1035
- };
1036
- }
1037
- // For non-streaming, we yield the full content at once if there's no parser
1038
- if (!parser && fullContent) {
369
+ // Non-streaming response
370
+ processedData = this.processNonStreamingResponse(response);
371
+ // For non-streaming, yield content if no parser
372
+ if (!requestConfig.parser && processedData.fullContent) {
1039
373
  yield {
1040
374
  role: ContentGeneratorRole.ASSISTANT,
1041
- content: fullContent,
375
+ content: processedData.fullContent,
1042
376
  };
1043
- hasStreamedContent = true;
377
+ processedData.hasStreamedContent = true;
1044
378
  }
1045
379
  }
1046
- // Flush any remaining pending whitespace for Qwen
1047
- if (pendingWhitespace && this.isUsingQwen() && !parser) {
1048
- this.logger.debug(() => `Flushing trailing pending whitespace (len=${pendingWhitespace?.length ?? 0}) at stream end`);
380
+ // 5. Flush pending whitespace if needed (for Qwen)
381
+ if (processedData.pendingWhitespace &&
382
+ this.isUsingQwen() &&
383
+ !requestConfig.parser) {
384
+ this.logger.debug(() => `Flushing trailing pending whitespace (len=${processedData.pendingWhitespace?.length ?? 0}) at stream end`);
1049
385
  yield {
1050
386
  role: ContentGeneratorRole.ASSISTANT,
1051
- content: pendingWhitespace,
387
+ content: processedData.pendingWhitespace,
1052
388
  };
1053
- hasStreamedContent = true;
1054
- fullContent += pendingWhitespace;
1055
- pendingWhitespace = null;
1056
- }
1057
- // After stream ends, parse text-based tool calls if needed
1058
- if (parser && fullContent) {
1059
- const { cleanedContent, toolCalls } = parser.parse(fullContent);
1060
- if (toolCalls.length > 0) {
1061
- // Convert to standard format
1062
- const standardToolCalls = toolCalls.map((tc, index) => ({
1063
- id: `call_${Date.now()}_${index}`,
1064
- type: 'function',
1065
- function: {
1066
- name: tc.name,
1067
- arguments: JSON.stringify(tc.arguments),
1068
- },
1069
- }));
1070
- yield {
1071
- role: ContentGeneratorRole.ASSISTANT,
1072
- content: cleanedContent,
1073
- tool_calls: standardToolCalls,
1074
- usage: usageData,
1075
- };
1076
- }
1077
- else {
1078
- // No tool calls found, yield cleaned content
1079
- yield {
1080
- role: ContentGeneratorRole.ASSISTANT,
1081
- content: cleanedContent,
1082
- usage: usageData,
1083
- };
1084
- }
1085
- }
1086
- else {
1087
- // Standard OpenAI tool call handling
1088
- if (accumulatedToolCalls.length > 0) {
1089
- // Fix double stringification for Qwen tool calls
1090
- // Qwen models pre-stringify arguments values, but later in the process
1091
- // they are being JSON.stringify'd again
1092
- let fixedToolCalls = accumulatedToolCalls;
1093
- if (this.isUsingQwen()) {
1094
- this.logger.debug(() => `[Qwen Fix] Processing ${accumulatedToolCalls.length} tool calls for double-stringification fix`);
1095
- fixedToolCalls = accumulatedToolCalls.map((toolCall, index) => {
1096
- this.logger.debug(() => `[Qwen Fix] Tool call ${index}: ${JSON.stringify({
1097
- name: toolCall.function.name,
1098
- argumentsType: typeof toolCall.function.arguments,
1099
- argumentsLength: toolCall.function.arguments?.length,
1100
- argumentsSample: toolCall.function.arguments?.substring(0, 100),
1101
- })}`);
1102
- // For Qwen, check for nested double-stringification
1103
- // Qwen models stringify array/object values WITHIN the JSON arguments
1104
- if (toolCall.function.arguments &&
1105
- typeof toolCall.function.arguments === 'string') {
1106
- try {
1107
- // First, parse the arguments to get the JSON object
1108
- const parsedArgs = JSON.parse(toolCall.function.arguments);
1109
- let hasNestedStringification = false;
1110
- // Check each property to see if it's a stringified array/object/number
1111
- const fixedArgs = {};
1112
- for (const [key, value] of Object.entries(parsedArgs)) {
1113
- if (typeof value === 'string') {
1114
- const trimmed = value.trim();
1115
- // Check if it's a stringified number (integer or float)
1116
- if (/^-?\d+(\.\d+)?$/.test(trimmed)) {
1117
- const numValue = trimmed.includes('.')
1118
- ? parseFloat(trimmed)
1119
- : parseInt(trimmed, 10);
1120
- fixedArgs[key] = numValue;
1121
- hasNestedStringification = true;
1122
- this.logger.debug(() => `[Qwen Fix] Fixed stringified number in property '${key}' for ${toolCall.function.name}: "${value}" -> ${numValue}`);
1123
- }
1124
- // Check if it looks like a stringified array or object
1125
- // Also check for Python-style dictionaries with single quotes
1126
- else if ((trimmed.startsWith('[') && trimmed.endsWith(']')) ||
1127
- (trimmed.startsWith('{') && trimmed.endsWith('}'))) {
1128
- try {
1129
- // Try to parse it as JSON
1130
- const nestedParsed = JSON.parse(value);
1131
- fixedArgs[key] = nestedParsed;
1132
- hasNestedStringification = true;
1133
- this.logger.debug(() => `[Qwen Fix] Fixed nested stringification in property '${key}' for ${toolCall.function.name}`);
1134
- }
1135
- catch {
1136
- // Try to convert Python-style to JSON (single quotes to double quotes)
1137
- try {
1138
- const jsonified = value
1139
- .replace(/'/g, '"')
1140
- .replace(/: True/g, ': true')
1141
- .replace(/: False/g, ': false')
1142
- .replace(/: None/g, ': null');
1143
- const nestedParsed = JSON.parse(jsonified);
1144
- fixedArgs[key] = nestedParsed;
1145
- hasNestedStringification = true;
1146
- this.logger.debug(() => `[Qwen Fix] Fixed Python-style nested stringification in property '${key}' for ${toolCall.function.name}`);
1147
- }
1148
- catch {
1149
- // Not valid JSON even after conversion, keep as string
1150
- fixedArgs[key] = value;
1151
- }
1152
- }
1153
- }
1154
- else {
1155
- fixedArgs[key] = value;
1156
- }
1157
- }
1158
- else {
1159
- fixedArgs[key] = value;
1160
- }
1161
- }
1162
- if (hasNestedStringification) {
1163
- this.logger.debug(() => `[Qwen Fix] Fixed nested double-stringification for ${toolCall.function.name}`);
1164
- return {
1165
- ...toolCall,
1166
- function: {
1167
- ...toolCall.function,
1168
- arguments: JSON.stringify(fixedArgs),
1169
- },
1170
- };
1171
- }
1172
- }
1173
- catch (_e) {
1174
- // If parsing fails, check for old-style double-stringification
1175
- if (toolCall.function.arguments.startsWith('"') &&
1176
- toolCall.function.arguments.endsWith('"')) {
1177
- try {
1178
- // Old fix: entire arguments were double-stringified
1179
- const parsedArgs = JSON.parse(toolCall.function.arguments);
1180
- this.logger.debug(() => `[Qwen Fix] Fixed whole-argument double-stringification for ${toolCall.function.name}`);
1181
- return {
1182
- ...toolCall,
1183
- function: {
1184
- ...toolCall.function,
1185
- arguments: JSON.stringify(parsedArgs),
1186
- },
1187
- };
1188
- }
1189
- catch {
1190
- // Leave as-is if we can't parse
1191
- }
1192
- }
1193
- }
1194
- }
1195
- // No fix needed
1196
- this.logger.debug(() => `[Qwen Fix] No double-stringification detected for ${toolCall.function.name}, keeping original`);
1197
- return toolCall;
1198
- });
1199
- }
1200
- if (this.isUsingQwen()) {
1201
- this.logger.debug(() => `Final message with tool calls: ${JSON.stringify({
1202
- contentLength: fullContent.length,
1203
- content: fullContent.substring(0, 200) +
1204
- (fullContent.length > 200 ? '...' : ''),
1205
- toolCallCount: accumulatedToolCalls.length,
1206
- hasStreamedContent,
1207
- })}`);
1208
- }
1209
- // For Qwen models, don't duplicate content if we've already streamed it
1210
- // BUT Cerebras needs at least a space to continue after tool responses
1211
- const isCerebras = this.baseURL?.toLowerCase().includes('cerebras.ai');
1212
- if (isCerebras) {
1213
- this.logger.debug(() => '[Cerebras] Special handling for Cerebras provider after tool responses', {
1214
- hasStreamedContent,
1215
- willSendSpace: hasStreamedContent,
1216
- });
1217
- }
1218
- const shouldOmitContent = hasStreamedContent && this.isUsingQwen() && !isCerebras;
1219
- this.logger.debug(() => '[Tool Call Handling] Deciding how to yield tool calls', {
1220
- hasStreamedContent,
1221
- isUsingQwen: this.isUsingQwen(),
1222
- isCerebras,
1223
- shouldOmitContent,
1224
- fullContentLength: fullContent.length,
1225
- toolCallCount: fixedToolCalls?.length || 0,
1226
- });
1227
- if (shouldOmitContent) {
1228
- // Qwen: Send just a space (like Cerebras) to prevent stream stopping
1229
- yield {
1230
- role: ContentGeneratorRole.ASSISTANT,
1231
- content: ' ', // Single space instead of empty to keep stream alive
1232
- tool_calls: fixedToolCalls,
1233
- usage: usageData,
1234
- };
1235
- }
1236
- else if (isCerebras && hasStreamedContent) {
1237
- // Cerebras: Send just a space to prevent duplication but allow continuation
1238
- // This prevents the repeated "Let me search..." text
1239
- this.logger.debug(() => '[Cerebras] Sending minimal space content to prevent duplication');
1240
- yield {
1241
- role: ContentGeneratorRole.ASSISTANT,
1242
- content: ' ', // Single space instead of full content
1243
- tool_calls: fixedToolCalls,
1244
- usage: usageData,
1245
- };
1246
- }
1247
- else {
1248
- // Include full content with tool calls
1249
- yield {
1250
- role: ContentGeneratorRole.ASSISTANT,
1251
- content: fullContent || '',
1252
- tool_calls: fixedToolCalls,
1253
- usage: usageData,
1254
- };
1255
- }
1256
- }
1257
- else if (usageData) {
1258
- // Always emit usage data so downstream consumers can update stats
1259
- yield {
1260
- role: ContentGeneratorRole.ASSISTANT,
1261
- content: '',
1262
- usage: usageData,
1263
- };
1264
- }
389
+ processedData.hasStreamedContent = true;
390
+ processedData.fullContent += processedData.pendingWhitespace;
391
+ processedData.pendingWhitespace = null;
1265
392
  }
393
+ // 6. Process and yield final results
394
+ yield* this.processFinalResponse(processedData, requestConfig.parser);
1266
395
  }
1267
396
  setModel(modelId) {
1268
397
  // Update SettingsService as the source of truth
@@ -1297,7 +426,7 @@ export class OpenAIProvider extends BaseProvider {
1297
426
  }
1298
427
  setApiKey(apiKey) {
1299
428
  // Call base provider implementation
1300
- super.setApiKey?.(apiKey);
429
+ super.setApiKey(apiKey);
1301
430
  // Persist to SettingsService if available
1302
431
  this.setApiKeyInSettings(apiKey).catch((error) => {
1303
432
  this.logger.debug(() => `Failed to persist API key to SettingsService: ${error}`);
@@ -1349,24 +478,6 @@ export class OpenAIProvider extends BaseProvider {
1349
478
  setToolFormatOverride(format) {
1350
479
  this.toolFormatOverride = format || undefined;
1351
480
  }
1352
- /**
1353
- * Estimates the remote context usage for the current conversation
1354
- * @param conversationId The conversation ID
1355
- * @param parentId The parent message ID
1356
- * @param promptMessages The messages being sent in the current prompt
1357
- * @returns Context usage information including remote tokens
1358
- */
1359
- estimateContextUsage(conversationId, parentId, promptMessages) {
1360
- const promptTokens = estimateMessagesTokens(promptMessages);
1361
- return estimateRemoteTokens(this.currentModel, this.conversationCache, conversationId, parentId, promptTokens);
1362
- }
1363
- /**
1364
- * Get the conversation cache instance
1365
- * @returns The conversation cache
1366
- */
1367
- getConversationCache() {
1368
- return this.conversationCache;
1369
- }
1370
481
  /**
1371
482
  * OpenAI always requires payment (API key)
1372
483
  */
@@ -1374,8 +485,7 @@ export class OpenAIProvider extends BaseProvider {
1374
485
  return true;
1375
486
  }
1376
487
  clearState() {
1377
- // Clear the conversation cache to prevent tool call ID mismatches
1378
- this.conversationCache.clear();
488
+ // No state to clear in base OpenAI provider
1379
489
  }
1380
490
  /**
1381
491
  * Get the list of server tools supported by this provider
@@ -1530,5 +640,439 @@ export class OpenAIProvider extends BaseProvider {
1530
640
  // For now, return the response as-is
1531
641
  return response;
1532
642
  }
643
+ /**
644
+ * Validate authentication and message preconditions for API calls
645
+ */
646
+ async validateRequestPreconditions(messages) {
647
+ // Check if API key is available (using resolved authentication)
648
+ const apiKey = await this.getAuthToken();
649
+ if (!apiKey) {
650
+ const endpoint = this.baseURL || 'https://api.openai.com/v1';
651
+ if (this.isOAuthEnabled() && !this.supportsOAuth()) {
652
+ throw new Error(generateOAuthEndpointMismatchError(endpoint, 'qwen'));
653
+ }
654
+ throw new Error('OpenAI API key is required to generate completions');
655
+ }
656
+ // Validate tool messages have required tool_call_id
657
+ const toolMessages = messages.filter((msg) => msg.role === 'tool');
658
+ const missingIds = toolMessages.filter((msg) => !msg.tool_call_id);
659
+ if (missingIds.length > 0) {
660
+ this.logger.error(() => `FATAL: Tool messages missing tool_call_id: ${JSON.stringify(missingIds)}`);
661
+ throw new Error(`OpenAI API requires tool_call_id for all tool messages. Found ${missingIds.length} tool message(s) without IDs.`);
662
+ }
663
+ }
664
+ /**
665
+ * Prepare API request configuration
666
+ */
667
+ prepareApiRequest(messages, tools) {
668
+ const parser = this.requiresTextToolCallParsing()
669
+ ? new GemmaToolCallParser()
670
+ : null;
671
+ // Get current tool format (with override support)
672
+ const currentToolFormat = this.getToolFormat();
673
+ // Format tools using formatToolsForAPI method
674
+ const formattedTools = tools ? this.formatToolsForAPI(tools) : undefined;
675
+ // Get stream_options from ephemeral settings (not model params)
676
+ const streamOptions = this.providerConfig?.getEphemeralSettings?.()?.['stream-options'];
677
+ // Default stream_options to { include_usage: true } unless explicitly set
678
+ const finalStreamOptions = streamOptions !== undefined ? streamOptions : { include_usage: true };
679
+ // Get streaming setting from ephemeral settings (default: enabled)
680
+ const streamingSetting = this.providerConfig?.getEphemeralSettings?.()?.['streaming'];
681
+ const streamingEnabled = streamingSetting !== 'disabled';
682
+ return {
683
+ parser,
684
+ currentToolFormat,
685
+ formattedTools,
686
+ finalStreamOptions,
687
+ streamingEnabled,
688
+ };
689
+ }
690
+ /**
691
+ * Execute API call with error handling
692
+ */
693
+ async executeApiCall(messages, tools, requestConfig) {
694
+ // Get resolved authentication and update client if needed
695
+ await this.updateClientWithResolvedAuth();
696
+ this.logger.debug(() => `About to make API call with model: ${this.currentModel}, baseURL: ${this.openai.baseURL}, apiKey: ${this.openai.apiKey?.substring(0, 10)}..., streaming: ${requestConfig.streamingEnabled}, messages (${messages.length} total): ${messages
697
+ .map((m) => `${m.role}${m.role === 'system' ? ` (length: ${m.content?.length})` : ''}`)
698
+ .join(', ')}`);
699
+ try {
700
+ // Build request params with exact order from original
701
+ return await this.openai.chat.completions.create({
702
+ model: this.currentModel,
703
+ messages: messages,
704
+ stream: requestConfig.streamingEnabled,
705
+ ...(requestConfig.streamingEnabled && requestConfig.finalStreamOptions
706
+ ? { stream_options: requestConfig.finalStreamOptions }
707
+ : {}),
708
+ tools: requestConfig.formattedTools,
709
+ tool_choice: this.getToolChoiceForFormat(tools),
710
+ ...this.modelParams,
711
+ });
712
+ }
713
+ catch (error) {
714
+ this.handleApiError(error, messages);
715
+ throw error; // Re-throw after logging
716
+ }
717
+ }
718
+ /**
719
+ * Handle and log API errors
720
+ */
721
+ handleApiError(error, messages) {
722
+ const errorStatus = error?.status ||
723
+ error?.response?.status;
724
+ const errorLabel = errorStatus === 400 ? '[API Error 400]' : '[API Error]';
725
+ this.logger.error(() => `${errorLabel} Error caught in API call:\n` +
726
+ ` Error: ${error}\n` +
727
+ ` Type: ${error?.constructor?.name}\n` +
728
+ ` Status: ${errorStatus}\n` +
729
+ ` Response data: ${JSON.stringify(error?.response?.data, null, 2)}`);
730
+ // Log the last few messages to understand what's being sent
731
+ if (errorStatus === 400) {
732
+ // Log additional diagnostics for 400 errors
733
+ const hasPendingToolCalls = messages.some((msg, idx) => {
734
+ if (msg.role === 'assistant' && msg.tool_calls) {
735
+ // Check if there's a matching tool response
736
+ const toolCallIds = msg.tool_calls.map((tc) => tc.id);
737
+ const hasResponses = toolCallIds.every((id) => messages
738
+ .slice(idx + 1)
739
+ .some((m) => m.role === 'tool' && m.tool_call_id === id));
740
+ return !hasResponses;
741
+ }
742
+ return false;
743
+ });
744
+ this.logger.error(() => `${errorLabel} Last 5 messages being sent:\n` +
745
+ ` Has pending tool calls without responses: ${hasPendingToolCalls}`);
746
+ const lastMessages = messages.slice(-5);
747
+ lastMessages.forEach((msg, idx) => {
748
+ this.logger.error(() => ` [${messages.length - 5 + idx}] ${msg.role}${msg.tool_call_id ? ` (tool response for ${msg.tool_call_id})` : ''}${msg.tool_calls ? ` (${msg.tool_calls.length} tool calls)` : ''}`);
749
+ if (msg.tool_calls) {
750
+ msg.tool_calls.forEach((tc) => {
751
+ this.logger.error(() => ` - Tool call: ${tc.id} -> ${tc.function.name}`);
752
+ });
753
+ }
754
+ });
755
+ }
756
+ }
757
+ /**
758
+ * Process non-streaming response
759
+ */
760
+ processNonStreamingResponse(response) {
761
+ const choice = response.choices[0];
762
+ let fullContent = '';
763
+ const accumulatedToolCalls = [];
764
+ let usageData;
765
+ if (choice?.message.content) {
766
+ fullContent = choice.message.content;
767
+ }
768
+ if (choice?.message.tool_calls) {
769
+ // Convert tool calls to the standard format
770
+ for (const toolCall of choice.message.tool_calls) {
771
+ if (toolCall.type === 'function' && toolCall.function) {
772
+ // Don't fix double stringification here - it's handled later in the final processing
773
+ accumulatedToolCalls.push({
774
+ id: toolCall.id,
775
+ type: 'function',
776
+ function: toolCall.function,
777
+ });
778
+ }
779
+ }
780
+ }
781
+ if (response.usage) {
782
+ usageData = {
783
+ prompt_tokens: response.usage.prompt_tokens,
784
+ completion_tokens: response.usage.completion_tokens,
785
+ total_tokens: response.usage.total_tokens,
786
+ };
787
+ }
788
+ return {
789
+ fullContent,
790
+ accumulatedToolCalls,
791
+ hasStreamedContent: false, // Non-streaming never has streamed content
792
+ usageData,
793
+ pendingWhitespace: null,
794
+ };
795
+ }
796
+ /**
797
+ * Process and build final response messages
798
+ */
799
+ *processFinalResponse(processedData, parser) {
800
+ const { fullContent, accumulatedToolCalls, hasStreamedContent, usageData, pendingWhitespace, } = processedData;
801
+ // Flush any remaining pending whitespace for Qwen
802
+ let finalFullContent = fullContent;
803
+ if (pendingWhitespace && this.isUsingQwen() && !parser) {
804
+ this.logger.debug(() => `Flushing trailing pending whitespace (len=${pendingWhitespace?.length ?? 0}) at stream end`);
805
+ finalFullContent += pendingWhitespace;
806
+ }
807
+ // After stream ends, parse text-based tool calls if needed
808
+ if (parser && finalFullContent) {
809
+ const { cleanedContent, toolCalls } = parser.parse(finalFullContent);
810
+ if (toolCalls.length > 0) {
811
+ // Convert to standard format
812
+ const standardToolCalls = toolCalls.map((tc, index) => ({
813
+ id: `call_${Date.now()}_${index}`,
814
+ type: 'function',
815
+ function: {
816
+ name: tc.name,
817
+ arguments: JSON.stringify(tc.arguments),
818
+ },
819
+ }));
820
+ yield {
821
+ role: ContentGeneratorRole.ASSISTANT,
822
+ content: cleanedContent,
823
+ tool_calls: standardToolCalls,
824
+ usage: usageData,
825
+ };
826
+ }
827
+ else {
828
+ // No tool calls found, yield cleaned content
829
+ yield {
830
+ role: ContentGeneratorRole.ASSISTANT,
831
+ content: cleanedContent,
832
+ usage: usageData,
833
+ };
834
+ }
835
+ }
836
+ else {
837
+ // Standard OpenAI tool call handling
838
+ if (accumulatedToolCalls.length > 0) {
839
+ // Process tool calls with Qwen-specific fixes if needed
840
+ const fixedToolCalls = this.processQwenToolCalls(accumulatedToolCalls);
841
+ if (this.isUsingQwen()) {
842
+ this.logger.debug(() => `Final message with tool calls: ${JSON.stringify({
843
+ contentLength: finalFullContent.length,
844
+ content: finalFullContent.substring(0, 200) +
845
+ (finalFullContent.length > 200 ? '...' : ''),
846
+ toolCallCount: accumulatedToolCalls.length,
847
+ hasStreamedContent,
848
+ })}`);
849
+ }
850
+ // Build the final message based on provider-specific requirements
851
+ const finalMessage = this.buildFinalToolCallMessage(hasStreamedContent, finalFullContent, fixedToolCalls, usageData);
852
+ yield finalMessage;
853
+ }
854
+ else if (usageData) {
855
+ // Always emit usage data so downstream consumers can update stats
856
+ yield {
857
+ role: ContentGeneratorRole.ASSISTANT,
858
+ content: '',
859
+ usage: usageData,
860
+ };
861
+ }
862
+ }
863
+ }
864
+ /**
865
+ * Handle Qwen-specific whitespace buffering during streaming
866
+ * @param delta The stream delta containing content
867
+ * @param pendingWhitespace Current buffered whitespace
868
+ * @param fullContent Accumulated full content
869
+ * @returns Object with updated state and whether to yield content
870
+ */
871
+ handleQwenStreamingWhitespace(delta, pendingWhitespace, fullContent) {
872
+ if (!delta.content) {
873
+ return {
874
+ shouldYield: false,
875
+ content: '',
876
+ updatedPendingWhitespace: pendingWhitespace,
877
+ updatedFullContent: fullContent,
878
+ };
879
+ }
880
+ const isWhitespaceOnly = delta.content.trim() === '';
881
+ if (isWhitespaceOnly) {
882
+ // Buffer whitespace-only chunk
883
+ const newPendingWhitespace = (pendingWhitespace || '') + delta.content;
884
+ this.logger.debug(() => `[Whitespace Buffering] Buffered whitespace-only chunk (len=${delta.content?.length ?? 0}). pendingWhitespace now len=${newPendingWhitespace?.length ?? 0}`);
885
+ return {
886
+ shouldYield: false,
887
+ content: '',
888
+ updatedPendingWhitespace: newPendingWhitespace,
889
+ updatedFullContent: fullContent + delta.content,
890
+ };
891
+ }
892
+ // Non-whitespace content - flush any pending whitespace first
893
+ if (pendingWhitespace) {
894
+ this.logger.debug(() => `Flushing pending whitespace (len=${pendingWhitespace?.length ?? 0}) before non-empty chunk`);
895
+ return {
896
+ shouldYield: true,
897
+ content: pendingWhitespace + delta.content,
898
+ updatedPendingWhitespace: null,
899
+ updatedFullContent: fullContent + pendingWhitespace + delta.content,
900
+ };
901
+ }
902
+ return {
903
+ shouldYield: true,
904
+ content: delta.content,
905
+ updatedPendingWhitespace: null,
906
+ updatedFullContent: fullContent + delta.content,
907
+ };
908
+ }
909
+ /**
910
+ * Process tool calls for Qwen models, fixing double stringification
911
+ * @param toolCalls The tool calls to process
912
+ * @returns Processed tool calls with fixes applied
913
+ */
914
+ processQwenToolCalls(toolCalls) {
915
+ if (!this.isUsingQwen()) {
916
+ return toolCalls;
917
+ }
918
+ this.logger.debug(() => `[Qwen Fix] Processing ${toolCalls.length} tool calls for double-stringification fix`);
919
+ return toolCalls.map((toolCall, index) => {
920
+ this.logger.debug(() => `[Qwen Fix] Tool call ${index}: ${JSON.stringify({
921
+ name: toolCall.function.name,
922
+ argumentsType: typeof toolCall.function.arguments,
923
+ argumentsLength: toolCall.function.arguments?.length,
924
+ argumentsSample: toolCall.function.arguments?.substring(0, 100),
925
+ })}`);
926
+ return this.fixQwenDoubleStringification(toolCall);
927
+ });
928
+ }
929
+ /**
930
+ * Determine how to yield the final message with tool calls based on provider quirks
931
+ * @param hasStreamedContent Whether content was already streamed
932
+ * @param fullContent The complete content
933
+ * @param toolCalls The tool calls to include
934
+ * @param usageData Optional usage statistics
935
+ * @returns The message to yield
936
+ */
937
+ buildFinalToolCallMessage(hasStreamedContent, fullContent, toolCalls, usageData) {
938
+ const isCerebras = this.baseURL?.toLowerCase().includes('cerebras.ai');
939
+ if (isCerebras) {
940
+ this.logger.debug(() => '[Cerebras] Special handling for Cerebras provider after tool responses', {
941
+ hasStreamedContent,
942
+ willSendSpace: hasStreamedContent,
943
+ });
944
+ }
945
+ const shouldOmitContent = hasStreamedContent && this.isUsingQwen() && !isCerebras;
946
+ this.logger.debug(() => '[Tool Call Handling] Deciding how to yield tool calls', {
947
+ hasStreamedContent,
948
+ isUsingQwen: this.isUsingQwen(),
949
+ isCerebras,
950
+ shouldOmitContent,
951
+ fullContentLength: fullContent.length,
952
+ toolCallCount: toolCalls?.length || 0,
953
+ });
954
+ if (shouldOmitContent || (isCerebras && hasStreamedContent)) {
955
+ // Send just a space to prevent stream stopping or duplication
956
+ if (isCerebras && hasStreamedContent) {
957
+ this.logger.debug(() => '[Cerebras] Sending minimal space content to prevent duplication');
958
+ }
959
+ return {
960
+ role: ContentGeneratorRole.ASSISTANT,
961
+ content: ' ',
962
+ tool_calls: toolCalls,
963
+ usage: usageData,
964
+ };
965
+ }
966
+ // Include full content with tool calls
967
+ return {
968
+ role: ContentGeneratorRole.ASSISTANT,
969
+ content: fullContent || '',
970
+ tool_calls: toolCalls,
971
+ usage: usageData,
972
+ };
973
+ }
974
+ /**
975
+ * Fix Qwen's double stringification of tool call arguments
976
+ * Qwen models stringify array/object values WITHIN the JSON arguments
977
+ * @param toolCall The tool call to fix
978
+ * @returns The fixed tool call or the original if no fix is needed
979
+ */
980
+ fixQwenDoubleStringification(toolCall) {
981
+ if (!toolCall.function.arguments ||
982
+ typeof toolCall.function.arguments !== 'string') {
983
+ return toolCall;
984
+ }
985
+ try {
986
+ // First, parse the arguments to get the JSON object
987
+ const parsedArgs = JSON.parse(toolCall.function.arguments);
988
+ let hasNestedStringification = false;
989
+ // Check each property to see if it's a stringified array/object/number
990
+ const fixedArgs = {};
991
+ for (const [key, value] of Object.entries(parsedArgs)) {
992
+ if (typeof value === 'string') {
993
+ const trimmed = value.trim();
994
+ // Check if it's a stringified number (integer or float)
995
+ if (/^-?\d+(\.\d+)?$/.test(trimmed)) {
996
+ const numValue = trimmed.includes('.')
997
+ ? parseFloat(trimmed)
998
+ : parseInt(trimmed, 10);
999
+ fixedArgs[key] = numValue;
1000
+ hasNestedStringification = true;
1001
+ this.logger.debug(() => `[Qwen Fix] Fixed stringified number in property '${key}' for ${toolCall.function.name}: "${value}" -> ${numValue}`);
1002
+ }
1003
+ // Check if it looks like a stringified array or object
1004
+ // Also check for Python-style dictionaries with single quotes
1005
+ else if ((trimmed.startsWith('[') && trimmed.endsWith(']')) ||
1006
+ (trimmed.startsWith('{') && trimmed.endsWith('}'))) {
1007
+ try {
1008
+ // Try to parse it as JSON
1009
+ const nestedParsed = JSON.parse(value);
1010
+ fixedArgs[key] = nestedParsed;
1011
+ hasNestedStringification = true;
1012
+ this.logger.debug(() => `[Qwen Fix] Fixed nested stringification in property '${key}' for ${toolCall.function.name}`);
1013
+ }
1014
+ catch {
1015
+ // Try to convert Python-style to JSON (single quotes to double quotes)
1016
+ try {
1017
+ const jsonified = value
1018
+ .replace(/'/g, '"')
1019
+ .replace(/: True/g, ': true')
1020
+ .replace(/: False/g, ': false')
1021
+ .replace(/: None/g, ': null');
1022
+ const nestedParsed = JSON.parse(jsonified);
1023
+ fixedArgs[key] = nestedParsed;
1024
+ hasNestedStringification = true;
1025
+ this.logger.debug(() => `[Qwen Fix] Fixed Python-style nested stringification in property '${key}' for ${toolCall.function.name}`);
1026
+ }
1027
+ catch {
1028
+ // Not valid JSON even after conversion, keep as string
1029
+ fixedArgs[key] = value;
1030
+ }
1031
+ }
1032
+ }
1033
+ else {
1034
+ fixedArgs[key] = value;
1035
+ }
1036
+ }
1037
+ else {
1038
+ fixedArgs[key] = value;
1039
+ }
1040
+ }
1041
+ if (hasNestedStringification) {
1042
+ this.logger.debug(() => `[Qwen Fix] Fixed nested double-stringification for ${toolCall.function.name}`);
1043
+ return {
1044
+ ...toolCall,
1045
+ function: {
1046
+ ...toolCall.function,
1047
+ arguments: JSON.stringify(fixedArgs),
1048
+ },
1049
+ };
1050
+ }
1051
+ }
1052
+ catch (_e) {
1053
+ // If parsing fails, check for old-style double-stringification
1054
+ if (toolCall.function.arguments.startsWith('"') &&
1055
+ toolCall.function.arguments.endsWith('"')) {
1056
+ try {
1057
+ // Old fix: entire arguments were double-stringified
1058
+ const parsedArgs = JSON.parse(toolCall.function.arguments);
1059
+ this.logger.debug(() => `[Qwen Fix] Fixed whole-argument double-stringification for ${toolCall.function.name}`);
1060
+ return {
1061
+ ...toolCall,
1062
+ function: {
1063
+ ...toolCall.function,
1064
+ arguments: JSON.stringify(parsedArgs),
1065
+ },
1066
+ };
1067
+ }
1068
+ catch {
1069
+ // Leave as-is if we can't parse
1070
+ }
1071
+ }
1072
+ }
1073
+ // No fix needed
1074
+ this.logger.debug(() => `[Qwen Fix] No double-stringification detected for ${toolCall.function.name}, keeping original`);
1075
+ return toolCall;
1076
+ }
1533
1077
  }
1534
1078
  //# sourceMappingURL=OpenAIProvider.js.map