@vybestack/llxprt-code-core 0.7.0-nightly.251209.0061bd6bf → 0.7.0-nightly.251211.5750c518a

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (186) hide show
  1. package/dist/index.d.ts +2 -1
  2. package/dist/index.js +1 -1
  3. package/dist/index.js.map +1 -1
  4. package/dist/src/adapters/IStreamAdapter.d.ts +2 -2
  5. package/dist/src/auth/anthropic-device-flow.d.ts +1 -1
  6. package/dist/src/auth/precedence.d.ts +1 -1
  7. package/dist/src/auth/qwen-device-flow.d.ts +1 -1
  8. package/dist/src/auth/token-store.d.ts +1 -1
  9. package/dist/src/auth/token-store.js.map +1 -1
  10. package/dist/src/code_assist/codeAssist.d.ts +1 -1
  11. package/dist/src/code_assist/codeAssist.js.map +1 -1
  12. package/dist/src/code_assist/converter.d.ts +1 -1
  13. package/dist/src/code_assist/server.d.ts +3 -3
  14. package/dist/src/config/config.d.ts +3 -3
  15. package/dist/src/config/config.js +1 -1
  16. package/dist/src/config/config.js.map +1 -1
  17. package/dist/src/config/profileManager.d.ts +1 -1
  18. package/dist/src/config/profileManager.js +2 -0
  19. package/dist/src/config/profileManager.js.map +1 -1
  20. package/dist/src/config/subagentManager.d.ts +1 -1
  21. package/dist/src/confirmation-bus/message-bus.d.ts +1 -1
  22. package/dist/src/core/client.d.ts +3 -3
  23. package/dist/src/core/client.js.map +1 -1
  24. package/dist/src/core/contentGenerator.d.ts +1 -1
  25. package/dist/src/core/coreToolScheduler.d.ts +4 -3
  26. package/dist/src/core/coreToolScheduler.js +28 -0
  27. package/dist/src/core/coreToolScheduler.js.map +1 -1
  28. package/dist/src/core/geminiChat.d.ts +2 -2
  29. package/dist/src/core/googleGenAIWrapper.d.ts +2 -2
  30. package/dist/src/core/logger.d.ts +1 -1
  31. package/dist/src/core/loggingContentGenerator.d.ts +2 -2
  32. package/dist/src/core/nonInteractiveToolExecutor.d.ts +1 -1
  33. package/dist/src/core/nonInteractiveToolExecutor.js.map +1 -1
  34. package/dist/src/core/subagent.d.ts +1 -1
  35. package/dist/src/core/subagent.js.map +1 -1
  36. package/dist/src/core/turn.d.ts +2 -2
  37. package/dist/src/debug/ConfigurationManager.d.ts +1 -1
  38. package/dist/src/ide/ide-client.d.ts +1 -1
  39. package/dist/src/ide/process-utils.js +45 -25
  40. package/dist/src/ide/process-utils.js.map +1 -1
  41. package/dist/src/index.d.ts +4 -2
  42. package/dist/src/index.js +2 -2
  43. package/dist/src/index.js.map +1 -1
  44. package/dist/src/mcp/file-token-store.d.ts +1 -1
  45. package/dist/src/mcp/google-auth-provider.d.ts +2 -2
  46. package/dist/src/mcp/oauth-provider.d.ts +1 -1
  47. package/dist/src/mcp/oauth-provider.js +1 -1
  48. package/dist/src/mcp/oauth-provider.js.map +1 -1
  49. package/dist/src/mcp/oauth-utils.d.ts +1 -1
  50. package/dist/src/prompt-config/TemplateEngine.d.ts +1 -1
  51. package/dist/src/prompt-config/prompt-cache.d.ts +1 -1
  52. package/dist/src/prompt-config/prompt-resolver.d.ts +1 -1
  53. package/dist/src/prompt-config/prompt-resolver.js +4 -0
  54. package/dist/src/prompt-config/prompt-resolver.js.map +1 -1
  55. package/dist/src/prompts/mcp-prompts.d.ts +1 -1
  56. package/dist/src/prompts/prompt-registry.d.ts +1 -1
  57. package/dist/src/providers/BaseProvider.d.ts +5 -5
  58. package/dist/src/providers/IProvider.d.ts +3 -3
  59. package/dist/src/providers/IProviderManager.d.ts +2 -2
  60. package/dist/src/providers/LoggingProviderWrapper.d.ts +4 -3
  61. package/dist/src/providers/LoggingProviderWrapper.js +16 -4
  62. package/dist/src/providers/LoggingProviderWrapper.js.map +1 -1
  63. package/dist/src/providers/ProviderContentGenerator.d.ts +2 -2
  64. package/dist/src/providers/ProviderManager.d.ts +9 -6
  65. package/dist/src/providers/ProviderManager.js +16 -4
  66. package/dist/src/providers/ProviderManager.js.map +1 -1
  67. package/dist/src/providers/anthropic/AnthropicProvider.d.ts +5 -5
  68. package/dist/src/providers/anthropic/AnthropicProvider.js +1 -1
  69. package/dist/src/providers/anthropic/AnthropicProvider.js.map +1 -1
  70. package/dist/src/providers/gemini/GeminiProvider.d.ts +4 -4
  71. package/dist/src/providers/openai/ConversationCache.d.ts +1 -1
  72. package/dist/src/providers/openai/IChatGenerateParams.d.ts +1 -1
  73. package/dist/src/providers/openai/OpenAIProvider.d.ts +24 -8
  74. package/dist/src/providers/openai/OpenAIProvider.js +483 -143
  75. package/dist/src/providers/openai/OpenAIProvider.js.map +1 -1
  76. package/dist/src/providers/openai/ToolCallPipeline.d.ts +2 -2
  77. package/dist/src/providers/openai/buildResponsesRequest.d.ts +3 -3
  78. package/dist/src/providers/openai/estimateRemoteTokens.d.ts +1 -1
  79. package/dist/src/providers/openai/parseResponsesStream.d.ts +1 -1
  80. package/dist/src/providers/openai/syntheticToolResponses.d.ts +1 -1
  81. package/dist/src/providers/openai-responses/OpenAIResponsesProvider.d.ts +4 -4
  82. package/dist/src/providers/openai-vercel/OpenAIVercelProvider.d.ts +9 -6
  83. package/dist/src/providers/openai-vercel/OpenAIVercelProvider.js +255 -22
  84. package/dist/src/providers/openai-vercel/OpenAIVercelProvider.js.map +1 -1
  85. package/dist/src/providers/openai-vercel/messageConversion.d.ts +4 -1
  86. package/dist/src/providers/openai-vercel/messageConversion.js +41 -6
  87. package/dist/src/providers/openai-vercel/messageConversion.js.map +1 -1
  88. package/dist/src/providers/reasoning/reasoningUtils.d.ts +26 -1
  89. package/dist/src/providers/reasoning/reasoningUtils.js +157 -0
  90. package/dist/src/providers/reasoning/reasoningUtils.js.map +1 -1
  91. package/dist/src/providers/test-utils/providerTestConfig.d.ts +1 -1
  92. package/dist/src/providers/tokenizers/AnthropicTokenizer.d.ts +1 -1
  93. package/dist/src/providers/tokenizers/OpenAITokenizer.d.ts +1 -1
  94. package/dist/src/providers/tokenizers/OpenAITokenizer.js.map +1 -1
  95. package/dist/src/providers/utils/cacheMetricsExtractor.d.ts +6 -0
  96. package/dist/src/providers/utils/cacheMetricsExtractor.js +36 -0
  97. package/dist/src/providers/utils/cacheMetricsExtractor.js.map +1 -0
  98. package/dist/src/services/fileDiscoveryService.js +1 -1
  99. package/dist/src/services/fileDiscoveryService.js.map +1 -1
  100. package/dist/src/services/gitService.js.map +1 -1
  101. package/dist/src/services/history/ContentConverters.d.ts +1 -1
  102. package/dist/src/services/history/HistoryService.d.ts +2 -2
  103. package/dist/src/services/history/IContent.d.ts +3 -7
  104. package/dist/src/services/history/IContent.js.map +1 -1
  105. package/dist/src/services/loopDetectionService.d.ts +1 -1
  106. package/dist/src/services/loopDetectionService.js.map +1 -1
  107. package/dist/src/services/shellExecutionService.js.map +1 -1
  108. package/dist/src/services/todo-reminder-service.d.ts +1 -1
  109. package/dist/src/services/tool-call-tracker-service.d.ts +1 -1
  110. package/dist/src/settings/SettingsService.d.ts +1 -1
  111. package/dist/src/telemetry/file-exporters.d.ts +4 -4
  112. package/dist/src/telemetry/file-exporters.js.map +1 -1
  113. package/dist/src/telemetry/index.d.ts +2 -1
  114. package/dist/src/telemetry/index.js.map +1 -1
  115. package/dist/src/telemetry/loggers.js +1 -1
  116. package/dist/src/telemetry/loggers.js.map +1 -1
  117. package/dist/src/telemetry/loggers.test.circular.js.map +1 -1
  118. package/dist/src/telemetry/metrics.d.ts +2 -2
  119. package/dist/src/telemetry/types.d.ts +1 -1
  120. package/dist/src/telemetry/types.js.map +1 -1
  121. package/dist/src/test-utils/config.js.map +1 -1
  122. package/dist/src/test-utils/tools.d.ts +2 -2
  123. package/dist/src/todo/todoFormatter.d.ts +1 -1
  124. package/dist/src/tools/IToolFormatter.d.ts +3 -3
  125. package/dist/src/tools/ToolFormatter.d.ts +3 -3
  126. package/dist/src/tools/ToolIdStrategy.d.ts +25 -0
  127. package/dist/src/tools/ToolIdStrategy.js +108 -0
  128. package/dist/src/tools/ToolIdStrategy.js.map +1 -1
  129. package/dist/src/tools/codesearch.d.ts +1 -1
  130. package/dist/src/tools/delete_line_range.d.ts +1 -1
  131. package/dist/src/tools/diffOptions.d.ts +1 -1
  132. package/dist/src/tools/direct-web-fetch.d.ts +1 -1
  133. package/dist/src/tools/direct-web-fetch.js.map +1 -1
  134. package/dist/src/tools/edit.d.ts +2 -2
  135. package/dist/src/tools/edit.js.map +1 -1
  136. package/dist/src/tools/exa-web-search.d.ts +1 -1
  137. package/dist/src/tools/google-web-fetch.d.ts +1 -1
  138. package/dist/src/tools/google-web-search-invocation.d.ts +2 -2
  139. package/dist/src/tools/google-web-search-invocation.js.map +1 -1
  140. package/dist/src/tools/google-web-search.d.ts +3 -3
  141. package/dist/src/tools/google-web-search.js.map +1 -1
  142. package/dist/src/tools/grep.d.ts +1 -1
  143. package/dist/src/tools/insert_at_line.d.ts +1 -1
  144. package/dist/src/tools/list-subagents.d.ts +1 -1
  145. package/dist/src/tools/ls.d.ts +1 -1
  146. package/dist/src/tools/mcp-tool.d.ts +2 -2
  147. package/dist/src/tools/memoryTool.d.ts +12 -4
  148. package/dist/src/tools/memoryTool.js +81 -29
  149. package/dist/src/tools/memoryTool.js.map +1 -1
  150. package/dist/src/tools/modifiable-tool.d.ts +2 -2
  151. package/dist/src/tools/modifiable-tool.js.map +1 -1
  152. package/dist/src/tools/read-file.d.ts +1 -1
  153. package/dist/src/tools/read-many-files.d.ts +1 -1
  154. package/dist/src/tools/read_line_range.d.ts +1 -1
  155. package/dist/src/tools/ripGrep.d.ts +1 -1
  156. package/dist/src/tools/shell.d.ts +1 -1
  157. package/dist/src/tools/task.d.ts +1 -1
  158. package/dist/src/tools/task.js +14 -2
  159. package/dist/src/tools/task.js.map +1 -1
  160. package/dist/src/tools/todo-events.d.ts +1 -1
  161. package/dist/src/tools/todo-pause.d.ts +1 -1
  162. package/dist/src/tools/todo-pause.js.map +1 -1
  163. package/dist/src/tools/todo-read.d.ts +1 -1
  164. package/dist/src/tools/todo-read.js.map +1 -1
  165. package/dist/src/tools/todo-store.d.ts +1 -1
  166. package/dist/src/tools/todo-store.js.map +1 -1
  167. package/dist/src/tools/todo-write.d.ts +2 -2
  168. package/dist/src/tools/todo-write.js.map +1 -1
  169. package/dist/src/tools/tool-registry.d.ts +3 -3
  170. package/dist/src/tools/tools.d.ts +6 -5
  171. package/dist/src/tools/tools.js +1 -1
  172. package/dist/src/tools/tools.js.map +1 -1
  173. package/dist/src/tools/write-file.d.ts +2 -2
  174. package/dist/src/tools/write-file.js.map +1 -1
  175. package/dist/src/utils/environmentContext.d.ts +1 -1
  176. package/dist/src/utils/errorReporting.d.ts +1 -1
  177. package/dist/src/utils/fileUtils.d.ts +2 -2
  178. package/dist/src/utils/filesearch/fileSearch.js.map +1 -1
  179. package/dist/src/utils/generateContentResponseUtilities.d.ts +1 -1
  180. package/dist/src/utils/generateContentResponseUtilities.js +6 -0
  181. package/dist/src/utils/generateContentResponseUtilities.js.map +1 -1
  182. package/dist/src/utils/messageInspectors.d.ts +1 -1
  183. package/dist/src/utils/partUtils.d.ts +1 -1
  184. package/dist/src/utils/quotaErrorDetection.d.ts +1 -1
  185. package/dist/src/utils/summarizer.d.ts +1 -1
  186. package/package.json +1 -1
@@ -22,7 +22,7 @@ import crypto from 'node:crypto';
22
22
  import * as http from 'http';
23
23
  import * as https from 'https';
24
24
  import * as net from 'net';
25
- import { isKimiModel, getToolIdStrategy, } from '../../tools/ToolIdStrategy.js';
25
+ import { isKimiModel, isMistralModel, getToolIdStrategy, } from '../../tools/ToolIdStrategy.js';
26
26
  import { BaseProvider, } from '../BaseProvider.js';
27
27
  import { DebugLogger } from '../../debug/index.js';
28
28
  import { ToolFormatter } from '../../tools/ToolFormatter.js';
@@ -40,6 +40,7 @@ import { buildToolResponsePayload, EMPTY_TOOL_RESULT_PLACEHOLDER, } from '../uti
40
40
  import { isLocalEndpoint } from '../utils/localEndpoint.js';
41
41
  import { filterThinkingForContext, thinkingToReasoningField, extractThinkingBlocks, } from '../reasoning/reasoningUtils.js';
42
42
  import { shouldDumpSDKContext, dumpSDKContext, } from '../utils/dumpSDKContext.js';
43
+ import { extractCacheMetrics } from '../utils/cacheMetricsExtractor.js';
43
44
  const MAX_TOOL_RESPONSE_CHARS = 1024;
44
45
  const MAX_TOOL_RESPONSE_RETRY_CHARS = 512;
45
46
  const TOOL_ARGS_PREVIEW_LENGTH = 500;
@@ -277,13 +278,12 @@ export class OpenAIProvider extends BaseProvider {
277
278
  // This preserves meaningful whitespace in regular text chunks during streaming
278
279
  // (e.g., " 5 Biggest" should remain " 5 Biggest", not become "5 Biggest")
279
280
  if (hadReasoningTags) {
280
- // Clean up multiple consecutive spaces/whitespace that may result from stripping
281
+ // Collapse multiple spaces/tabs but preserve newlines for proper paragraph/line breaks
281
282
  str = str.replace(/[ \t]+/g, ' ');
282
283
  str = str.replace(/\n{3,}/g, '\n\n');
283
- // Only trim leading whitespace when think tags were at the beginning
284
- // This prevents leading spaces from "<think>...</think>text" -> " text"
285
- // but preserves trailing whitespace for streaming chunk concatenation
286
- str = str.trimStart();
284
+ // Only trim leading horizontal whitespace (spaces/tabs), NOT newlines
285
+ // This preserves line breaks between think tags and content (fixes #721)
286
+ str = str.replace(/^[ \t]+/, '');
287
287
  }
288
288
  const afterLen = str.length;
289
289
  if (hadReasoningTags && afterLen !== beforeLen) {
@@ -438,62 +438,78 @@ export class OpenAIProvider extends BaseProvider {
438
438
  * and all tool info is only encoded in the text template.
439
439
  */
440
440
  extractKimiToolCallsFromText(raw) {
441
- if (!raw || !raw.includes('<|tool_calls_section_begin|>')) {
441
+ // Return early only if input is null/undefined/empty
442
+ if (!raw) {
442
443
  return { cleanedText: raw, toolCalls: [] };
443
444
  }
444
445
  const logger = this.getLogger();
445
446
  const toolCalls = [];
446
447
  let text = raw;
447
- const sectionRegex = /<\|tool_calls_section_begin\|>([\s\S]*?)<\|tool_calls_section_end\|>/g;
448
- text = text.replace(sectionRegex, (_sectionMatch, sectionBody) => {
449
- try {
450
- const callRegex = /<\|tool_call_begin\|>\s*([^<]+?)\s*<\|tool_call_argument_begin\|>\s*([\s\S]*?)\s*<\|tool_call_end\|>/g;
451
- let m;
452
- while ((m = callRegex.exec(sectionBody)) !== null) {
453
- const rawId = m[1].trim();
454
- const rawArgs = m[2].trim();
455
- // Infer tool name from ID.
456
- let toolName = '';
457
- const match = /^functions\.([A-Za-z0-9_]+):\d+/i.exec(rawId) ||
458
- /^[A-Za-z0-9_]+\.([A-Za-z0-9_]+):\d+/.exec(rawId);
459
- if (match) {
460
- toolName = match[1];
461
- }
462
- else {
463
- const colonParts = rawId.split(':');
464
- const head = colonParts[0] || rawId;
465
- const dotParts = head.split('.');
466
- toolName = dotParts[dotParts.length - 1] || head;
448
+ // Extract tool calls from complete sections if present
449
+ if (raw.includes('<|tool_calls_section_begin|>')) {
450
+ const sectionRegex = /<\|tool_calls_section_begin\|>([\s\S]*?)<\|tool_calls_section_end\|>/g;
451
+ text = text.replace(sectionRegex, (_sectionMatch, sectionBody) => {
452
+ try {
453
+ const callRegex = /<\|tool_call_begin\|>\s*([^<]+?)\s*<\|tool_call_argument_begin\|>\s*([\s\S]*?)\s*<\|tool_call_end\|>/g;
454
+ let m;
455
+ while ((m = callRegex.exec(sectionBody)) !== null) {
456
+ const rawId = m[1].trim();
457
+ const rawArgs = m[2].trim();
458
+ // Infer tool name from ID.
459
+ let toolName = '';
460
+ const match = /^functions\.([A-Za-z0-9_]+):\d+/i.exec(rawId) ||
461
+ /^[A-Za-z0-9_]+\.([A-Za-z0-9_]+):\d+/.exec(rawId);
462
+ if (match) {
463
+ toolName = match[1];
464
+ }
465
+ else {
466
+ const colonParts = rawId.split(':');
467
+ const head = colonParts[0] || rawId;
468
+ const dotParts = head.split('.');
469
+ toolName = dotParts[dotParts.length - 1] || head;
470
+ }
471
+ // Normalize tool name (handles Kimi-K2 style prefixes like call_functionsglob7)
472
+ toolName = this.normalizeToolName(toolName);
473
+ const sanitizedArgs = this.sanitizeToolArgumentsString(rawArgs);
474
+ const processedParameters = processToolParameters(sanitizedArgs, toolName);
475
+ toolCalls.push({
476
+ type: 'tool_call',
477
+ id: this.normalizeToHistoryToolId(rawId),
478
+ name: toolName,
479
+ parameters: processedParameters,
480
+ });
467
481
  }
468
- // Normalize tool name (handles Kimi-K2 style prefixes like call_functionsglob7)
469
- toolName = this.normalizeToolName(toolName);
470
- const sanitizedArgs = this.sanitizeToolArgumentsString(rawArgs);
471
- const processedParameters = processToolParameters(sanitizedArgs, toolName);
472
- toolCalls.push({
473
- type: 'tool_call',
474
- id: this.normalizeToHistoryToolId(rawId),
475
- name: toolName,
476
- parameters: processedParameters,
477
- });
478
482
  }
479
- }
480
- catch (err) {
481
- logger.debug(() => `[OpenAIProvider] Failed to parse Kimi tool_calls_section: ${err}`);
482
- }
483
- // Strip the entire tool section from user-visible text
484
- return '';
485
- });
486
- if (toolCalls.length > 0) {
487
- logger.debug(() => `[OpenAIProvider] Parsed Kimi tool_calls_section`, {
488
- toolCallCount: toolCalls.length,
489
- originalLength: raw.length,
490
- cleanedLength: text.length,
483
+ catch (err) {
484
+ logger.debug(() => `[OpenAIProvider] Failed to parse Kimi tool_calls_section: ${err}`);
485
+ }
486
+ // Strip the entire tool section from user-visible text
487
+ return '';
491
488
  });
489
+ if (toolCalls.length > 0) {
490
+ logger.debug(() => `[OpenAIProvider] Parsed Kimi tool_calls_section`, {
491
+ toolCallCount: toolCalls.length,
492
+ originalLength: raw.length,
493
+ cleanedLength: text.length,
494
+ });
495
+ }
492
496
  }
497
+ // ALWAYS run stray token cleanup, even if no complete sections were found
498
+ // This handles partial sections, malformed tokens, orphaned markers, etc.
499
+ text = text.replace(/<\|tool_call(?:_(?:begin|end|argument_begin))?\|>/g, '');
500
+ text = text.replace(/<\|tool_calls_section_(?:begin|end)\|>/g, '');
493
501
  // Don't trim - preserve leading/trailing newlines that are important for formatting
494
502
  // (e.g., numbered lists from Kimi K2 that have newlines between items)
495
503
  return { cleanedText: text, toolCalls };
496
504
  }
505
+ /**
506
+ * Clean Kimi K2 tool call tokens from thinking content.
507
+ * Used when extracting thinking from <think> tags that may contain embedded tool calls.
508
+ * @issue #749
509
+ */
510
+ cleanThinkingContent(thought) {
511
+ return this.extractKimiToolCallsFromText(thought).cleanedText;
512
+ }
497
513
  /**
498
514
  * @plan:PLAN-20251023-STATELESS-HARDENING.P09
499
515
  * @requirement:REQ-SP4-002
@@ -910,9 +926,12 @@ export class OpenAIProvider extends BaseProvider {
910
926
  }
911
927
  else {
912
928
  // Assistant message with tool calls
929
+ // CRITICAL for Mistral API compatibility (#760):
930
+ // When tool_calls are present, we must NOT include a content property at all
931
+ // (not even null). Mistral's OpenAI-compatible API requires this.
932
+ // See: https://docs.mistral.ai/capabilities/function_calling
913
933
  messages.push({
914
934
  role: 'assistant',
915
- content: text || null,
916
935
  tool_calls: toolCalls.map((tc) => ({
917
936
  id: this.normalizeToOpenAIToolId(tc.id),
918
937
  type: 'function',
@@ -948,10 +967,16 @@ export class OpenAIProvider extends BaseProvider {
948
967
  }
949
968
  else {
950
969
  for (const tr of toolResponses) {
970
+ // CRITICAL for Mistral API compatibility (#760):
971
+ // Tool messages must include a name field matching the function name.
972
+ // See: https://docs.mistral.ai/capabilities/function_calling
973
+ // Note: The OpenAI SDK types don't include name, but Mistral requires it.
974
+ // We use a type assertion to add this required field.
951
975
  messages.push({
952
976
  role: 'tool',
953
977
  content: this.buildToolResponseContent(tr, config),
954
978
  tool_call_id: this.normalizeToOpenAIToolId(tr.callId),
979
+ name: tr.toolName,
955
980
  });
956
981
  }
957
982
  }
@@ -977,8 +1002,9 @@ export class OpenAIProvider extends BaseProvider {
977
1002
  const messages = [];
978
1003
  // Create a ToolIdMapper based on the tool format
979
1004
  // For Kimi K2, this generates sequential IDs in the format functions.{name}:{index}
980
- const toolIdMapper = toolFormat === 'kimi'
981
- ? getToolIdStrategy('kimi').createMapper(filteredContents)
1005
+ // For Mistral, this generates 9-char alphanumeric IDs
1006
+ const toolIdMapper = toolFormat === 'kimi' || toolFormat === 'mistral'
1007
+ ? getToolIdStrategy(toolFormat).createMapper(filteredContents)
982
1008
  : null;
983
1009
  // Helper to resolve tool call IDs based on format
984
1010
  const resolveToolCallId = (tc) => {
@@ -1014,9 +1040,12 @@ export class OpenAIProvider extends BaseProvider {
1014
1040
  const toolCalls = content.blocks.filter((b) => b.type === 'tool_call');
1015
1041
  if (toolCalls.length > 0) {
1016
1042
  // Assistant message with tool calls
1043
+ // CRITICAL for Mistral API compatibility (#760):
1044
+ // When tool_calls are present, we must NOT include a content property at all
1045
+ // (not even null). Mistral's OpenAI-compatible API requires this.
1046
+ // See: https://docs.mistral.ai/capabilities/function_calling
1017
1047
  const baseMessage = {
1018
1048
  role: 'assistant',
1019
- content: text || null,
1020
1049
  tool_calls: toolCalls.map((tc) => ({
1021
1050
  id: resolveToolCallId(tc),
1022
1051
  type: 'function',
@@ -1057,10 +1086,16 @@ export class OpenAIProvider extends BaseProvider {
1057
1086
  // Convert tool responses
1058
1087
  const toolResponses = content.blocks.filter((b) => b.type === 'tool_response');
1059
1088
  for (const tr of toolResponses) {
1089
+ // CRITICAL for Mistral API compatibility (#760):
1090
+ // Tool messages must include a name field matching the function name.
1091
+ // See: https://docs.mistral.ai/capabilities/function_calling
1092
+ // Note: The OpenAI SDK types don't include name, but Mistral requires it.
1093
+ // We use a type assertion to add this required field.
1060
1094
  messages.push({
1061
1095
  role: 'tool',
1062
1096
  content: this.buildToolResponseContent(tr, options.config),
1063
1097
  tool_call_id: resolveToolResponseId(tr),
1098
+ name: tr.toolName,
1064
1099
  });
1065
1100
  }
1066
1101
  }
@@ -1506,9 +1541,9 @@ export class OpenAIProvider extends BaseProvider {
1506
1541
  // Buffer for accumulating text chunks for providers that need it
1507
1542
  let textBuffer = '';
1508
1543
  // Use the same detected format from earlier for consistency
1509
- const isKimiModel = model.toLowerCase().includes('kimi-k2');
1544
+ const isKimiK2Model = model.toLowerCase().includes('kimi-k2');
1510
1545
  // Buffer text for Qwen format providers and Kimi-K2 to avoid stanza formatting
1511
- const shouldBufferText = detectedFormat === 'qwen' || isKimiModel;
1546
+ const shouldBufferText = detectedFormat === 'qwen' || isKimiK2Model;
1512
1547
  // Accumulate thinking content across the entire stream to emit as ONE block
1513
1548
  // This handles fragmented <think>word</think> streaming from Synthetic API
1514
1549
  // @plan PLAN-20251202-THINKING.P16
@@ -1522,6 +1557,8 @@ export class OpenAIProvider extends BaseProvider {
1522
1557
  let streamingUsage = null;
1523
1558
  // Track total chunks for debugging empty responses
1524
1559
  let totalChunksReceived = 0;
1560
+ // Track finish_reason for detecting empty responses (issue #584)
1561
+ let lastFinishReason = null;
1525
1562
  try {
1526
1563
  // Handle streaming response
1527
1564
  for await (const chunk of response) {
@@ -1575,14 +1612,32 @@ export class OpenAIProvider extends BaseProvider {
1575
1612
  continue;
1576
1613
  // Parse reasoning_content from streaming delta (Phase 16 integration)
1577
1614
  // ACCUMULATE instead of yielding immediately to handle token-by-token streaming
1615
+ // Extract embedded Kimi K2 tool calls from reasoning_content (fixes #749)
1578
1616
  // @plan PLAN-20251202-THINKING.P16
1579
- const reasoningBlock = this.parseStreamingReasoningDelta(choice.delta);
1617
+ // @requirement REQ-KIMI-REASONING-001.1
1618
+ const { thinking: reasoningBlock, toolCalls: reasoningToolCalls } = this.parseStreamingReasoningDelta(choice.delta);
1580
1619
  if (reasoningBlock) {
1581
1620
  // Accumulate reasoning content - will emit ONE block later
1582
1621
  accumulatedReasoningContent += reasoningBlock.thought;
1583
1622
  }
1623
+ // Accumulate tool calls extracted from reasoning_content
1624
+ if (reasoningToolCalls.length > 0) {
1625
+ for (const toolCall of reasoningToolCalls) {
1626
+ // Convert ToolCallBlock to accumulated format
1627
+ const index = accumulatedToolCalls.length;
1628
+ accumulatedToolCalls[index] = {
1629
+ id: toolCall.id,
1630
+ type: 'function',
1631
+ function: {
1632
+ name: toolCall.name,
1633
+ arguments: JSON.stringify(toolCall.parameters),
1634
+ },
1635
+ };
1636
+ }
1637
+ }
1584
1638
  // Check for finish_reason to detect proper stream ending
1585
1639
  if (choice.finish_reason) {
1640
+ lastFinishReason = choice.finish_reason;
1586
1641
  logger.debug(() => `[Streaming] Stream finished with reason: ${choice.finish_reason}`, {
1587
1642
  model,
1588
1643
  finishReason: choice.finish_reason,
@@ -1601,13 +1656,25 @@ export class OpenAIProvider extends BaseProvider {
1601
1656
  }
1602
1657
  // Handle text content - buffer for Qwen format, emit immediately for others
1603
1658
  // Note: Synthetic API sends content that may duplicate reasoning_content.
1604
- // This is the model's behavior - we don't filter it here.
1659
+ // We now filter duplicates by tracking when content starts matching reasoning_content.
1660
+ // fixes #721
1605
1661
  // @plan PLAN-20251202-THINKING.P16
1606
1662
  const rawDeltaContent = this.coerceMessageContentToString(choice.delta?.content);
1607
1663
  if (rawDeltaContent) {
1608
- const deltaContent = isKimiModel
1609
- ? rawDeltaContent
1610
- : this.sanitizeProviderText(rawDeltaContent);
1664
+ // For Kimi models, we need to buffer the RAW content without processing
1665
+ // because Kimi tokens stream incrementally and partial tokens would leak
1666
+ // through if we try to process them immediately. The buffer will be
1667
+ // processed when flushed (at sentence boundaries or end of stream).
1668
+ let deltaContent;
1669
+ if (isKimiK2Model) {
1670
+ // For Kimi: Don't process yet - just pass through and let buffering handle it
1671
+ // We'll extract tool calls and sanitize when we flush the buffer
1672
+ deltaContent = rawDeltaContent;
1673
+ }
1674
+ else {
1675
+ // For non-Kimi models: sanitize immediately as before
1676
+ deltaContent = this.sanitizeProviderText(rawDeltaContent);
1677
+ }
1611
1678
  if (!deltaContent) {
1612
1679
  continue;
1613
1680
  }
@@ -1623,9 +1690,9 @@ export class OpenAIProvider extends BaseProvider {
1623
1690
  });
1624
1691
  // Buffer text to avoid stanza formatting
1625
1692
  textBuffer += deltaContent;
1626
- const hasKimiBegin = textBuffer.includes('<|tool_calls_section_begin|>');
1627
- const hasKimiEnd = textBuffer.includes('<|tool_calls_section_end|>');
1628
- const hasOpenKimiSection = hasKimiBegin && !hasKimiEnd;
1693
+ const kimiBeginCount = (textBuffer.match(/<\|tool_calls_section_begin\|>/g) || []).length;
1694
+ const kimiEndCount = (textBuffer.match(/<\|tool_calls_section_end\|>/g) || []).length;
1695
+ const hasOpenKimiSection = kimiBeginCount > kimiEndCount;
1629
1696
  // Emit buffered text when we have a complete sentence or paragraph
1630
1697
  // Look for natural break points, but avoid flushing mid Kimi section
1631
1698
  if (!hasOpenKimiSection &&
@@ -1642,12 +1709,14 @@ export class OpenAIProvider extends BaseProvider {
1642
1709
  // @requirement REQ-THINK-003
1643
1710
  const tagBasedThinking = this.extractThinkTagsAsBlock(workingText);
1644
1711
  if (tagBasedThinking) {
1712
+ // Clean Kimi tokens from thinking content before accumulating
1713
+ const cleanedThought = this.cleanThinkingContent(tagBasedThinking.thought);
1645
1714
  // Accumulate thinking content - don't emit yet
1646
1715
  // Use newline to preserve formatting between chunks (not space)
1647
1716
  if (accumulatedThinkingContent.length > 0) {
1648
1717
  accumulatedThinkingContent += '\n';
1649
1718
  }
1650
- accumulatedThinkingContent += tagBasedThinking.thought;
1719
+ accumulatedThinkingContent += cleanedThought;
1651
1720
  logger.debug(() => `[Streaming legacy] Accumulated thinking: ${accumulatedThinkingContent.length} chars total`);
1652
1721
  }
1653
1722
  const kimiParsed = this.extractKimiToolCallsFromText(workingText);
@@ -1709,7 +1778,10 @@ export class OpenAIProvider extends BaseProvider {
1709
1778
  // Always use sanitized text to strip <think> tags (legacy streaming)
1710
1779
  // Bug fix: Previously Kimi used unsanitized workingText
1711
1780
  // @plan PLAN-20251202-THINKING.P16
1712
- if (cleanedText.trim().length > 0) {
1781
+ // Bug fix #721: Emit whitespace-only chunks (e.g., " " between words)
1782
+ // Previously we used cleanedText.trim().length > 0 which dropped spaces,
1783
+ // causing "list 5" to become "list5". Now we emit any non-empty cleanedText.
1784
+ if (cleanedText.length > 0) {
1713
1785
  yield {
1714
1786
  speaker: 'ai',
1715
1787
  blocks: [
@@ -1828,11 +1900,13 @@ export class OpenAIProvider extends BaseProvider {
1828
1900
  // @plan PLAN-20251202-THINKING.P16
1829
1901
  const tagBasedThinking = this.extractThinkTagsAsBlock(workingText);
1830
1902
  if (tagBasedThinking) {
1903
+ // Clean Kimi tokens from thinking content before accumulating
1904
+ const cleanedThought = this.cleanThinkingContent(tagBasedThinking.thought);
1831
1905
  // Use newline to preserve formatting between chunks (not space)
1832
1906
  if (accumulatedThinkingContent.length > 0) {
1833
1907
  accumulatedThinkingContent += '\n';
1834
1908
  }
1835
- accumulatedThinkingContent += tagBasedThinking.thought;
1909
+ accumulatedThinkingContent += cleanedThought;
1836
1910
  }
1837
1911
  const kimiParsed = this.extractKimiToolCallsFromText(workingText);
1838
1912
  if (kimiParsed.toolCalls.length > 0) {
@@ -1891,7 +1965,10 @@ export class OpenAIProvider extends BaseProvider {
1891
1965
  // Always use sanitized text to strip <think> tags (legacy final buffer)
1892
1966
  // Bug fix: Previously Kimi used unsanitized workingText
1893
1967
  // @plan PLAN-20251202-THINKING.P16
1894
- if (cleanedText.trim().length > 0) {
1968
+ // Bug fix #721: Emit whitespace-only chunks (e.g., " " between words)
1969
+ // Previously we used cleanedText.trim().length > 0 which dropped spaces,
1970
+ // causing "list 5" to become "list5". Now we emit any non-empty cleanedText.
1971
+ if (cleanedText.length > 0) {
1895
1972
  yield {
1896
1973
  speaker: 'ai',
1897
1974
  blocks: [
@@ -1923,19 +2000,32 @@ export class OpenAIProvider extends BaseProvider {
1923
2000
  }
1924
2001
  // Emit accumulated reasoning_content as ONE ThinkingBlock (legacy path)
1925
2002
  // This consolidates token-by-token reasoning from Synthetic API into a single block
2003
+ // Clean Kimi tokens from the accumulated content (not per-chunk) to handle split tokens
1926
2004
  // @plan PLAN-20251202-THINKING.P16
1927
2005
  if (accumulatedReasoningContent.length > 0) {
1928
- yield {
1929
- speaker: 'ai',
1930
- blocks: [
1931
- {
1932
- type: 'thinking',
1933
- thought: accumulatedReasoningContent,
1934
- sourceField: 'reasoning_content',
1935
- isHidden: false,
1936
- },
1937
- ],
1938
- };
2006
+ // Extract Kimi tool calls from the complete accumulated reasoning content
2007
+ const { cleanedText: cleanedReasoning, toolCalls: reasoningToolCalls } = this.extractKimiToolCallsFromText(accumulatedReasoningContent);
2008
+ // Emit the cleaned thinking block
2009
+ if (cleanedReasoning.length > 0) {
2010
+ yield {
2011
+ speaker: 'ai',
2012
+ blocks: [
2013
+ {
2014
+ type: 'thinking',
2015
+ thought: cleanedReasoning,
2016
+ sourceField: 'reasoning_content',
2017
+ isHidden: false,
2018
+ },
2019
+ ],
2020
+ };
2021
+ }
2022
+ // Emit any tool calls extracted from reasoning content
2023
+ if (reasoningToolCalls.length > 0) {
2024
+ yield {
2025
+ speaker: 'ai',
2026
+ blocks: reasoningToolCalls,
2027
+ };
2028
+ }
1939
2029
  }
1940
2030
  // Process and emit tool calls using legacy accumulated approach
1941
2031
  if (accumulatedToolCalls.length > 0) {
@@ -1962,6 +2052,7 @@ export class OpenAIProvider extends BaseProvider {
1962
2052
  };
1963
2053
  // Add usage metadata if we captured it from streaming
1964
2054
  if (streamingUsage) {
2055
+ const cacheMetrics = extractCacheMetrics(streamingUsage);
1965
2056
  toolCallsContent.metadata = {
1966
2057
  usage: {
1967
2058
  promptTokens: streamingUsage.prompt_tokens || 0,
@@ -1969,6 +2060,9 @@ export class OpenAIProvider extends BaseProvider {
1969
2060
  totalTokens: streamingUsage.total_tokens ||
1970
2061
  (streamingUsage.prompt_tokens || 0) +
1971
2062
  (streamingUsage.completion_tokens || 0),
2063
+ cachedTokens: cacheMetrics.cachedTokens,
2064
+ cacheCreationTokens: cacheMetrics.cacheCreationTokens,
2065
+ cacheMissTokens: cacheMetrics.cacheMissTokens,
1972
2066
  },
1973
2067
  };
1974
2068
  }
@@ -1977,6 +2071,7 @@ export class OpenAIProvider extends BaseProvider {
1977
2071
  }
1978
2072
  // If we have usage information but no tool calls, emit a metadata-only response
1979
2073
  if (streamingUsage && accumulatedToolCalls.length === 0) {
2074
+ const cacheMetrics = extractCacheMetrics(streamingUsage);
1980
2075
  yield {
1981
2076
  speaker: 'ai',
1982
2077
  blocks: [],
@@ -1987,10 +2082,35 @@ export class OpenAIProvider extends BaseProvider {
1987
2082
  totalTokens: streamingUsage.total_tokens ||
1988
2083
  (streamingUsage.prompt_tokens || 0) +
1989
2084
  (streamingUsage.completion_tokens || 0),
2085
+ cachedTokens: cacheMetrics.cachedTokens,
2086
+ cacheCreationTokens: cacheMetrics.cacheCreationTokens,
2087
+ cacheMissTokens: cacheMetrics.cacheMissTokens,
1990
2088
  },
1991
2089
  },
1992
2090
  };
1993
2091
  }
2092
+ // Detect and handle empty streaming responses after tool calls (issue #584)
2093
+ // Some models (like gpt-oss-120b on OpenRouter) return finish_reason=stop with tools but no text
2094
+ const hasToolsButNoText = lastFinishReason === 'stop' &&
2095
+ accumulatedToolCalls.length > 0 &&
2096
+ _accumulatedText.length === 0 &&
2097
+ textBuffer.length === 0 &&
2098
+ accumulatedReasoningContent.length === 0 &&
2099
+ accumulatedThinkingContent.length === 0;
2100
+ if (hasToolsButNoText) {
2101
+ logger.log(() => `[OpenAIProvider] Model returned tool calls but no text (finish_reason=stop). Requesting continuation for model '${model}'.`, {
2102
+ model,
2103
+ toolCallCount: accumulatedToolCalls.length,
2104
+ baseURL: baseURL ?? this.getBaseURL(),
2105
+ });
2106
+ // Request continuation after tool calls (delegated to shared method)
2107
+ const toolCallsForContinuation = accumulatedToolCalls.map((tc) => ({
2108
+ id: tc.id,
2109
+ type: tc.type,
2110
+ function: tc.function,
2111
+ }));
2112
+ yield* this.requestContinuationAfterToolCalls(toolCallsForContinuation, messagesWithSystem, requestBody, client, abortSignal, model, logger, customHeaders);
2113
+ }
1994
2114
  // Detect and warn about empty streaming responses (common with Kimi K2 after tool calls)
1995
2115
  // Only warn if we truly got nothing - not even reasoning content
1996
2116
  if (_accumulatedText.length === 0 &&
@@ -2051,8 +2171,10 @@ export class OpenAIProvider extends BaseProvider {
2051
2171
  }
2052
2172
  const blocks = [];
2053
2173
  // Parse reasoning_content from response (Phase 16 integration)
2054
- const reasoningBlock = this.parseNonStreamingReasoning(choice.message);
2055
- logger.debug(() => `[Non-streaming] parseNonStreamingReasoning result: ${reasoningBlock ? `found (${reasoningBlock.thought?.length} chars)` : 'not found'}`, {
2174
+ // Extract embedded Kimi K2 tool calls from reasoning_content (fixes #749)
2175
+ // @requirement REQ-KIMI-REASONING-001.2
2176
+ const { thinking: reasoningBlock, toolCalls: reasoningToolCalls } = this.parseNonStreamingReasoning(choice.message);
2177
+ logger.debug(() => `[Non-streaming] parseNonStreamingReasoning result: ${reasoningBlock ? `found (${reasoningBlock.thought?.length} chars)` : 'not found'}, tool calls: ${reasoningToolCalls.length}`, {
2056
2178
  hasReasoningContent: 'reasoning_content' in
2057
2179
  (choice.message ?? {}),
2058
2180
  messageKeys: Object.keys(choice.message ?? {}),
@@ -2060,6 +2182,11 @@ export class OpenAIProvider extends BaseProvider {
2060
2182
  if (reasoningBlock) {
2061
2183
  blocks.push(reasoningBlock);
2062
2184
  }
2185
+ // Add tool calls extracted from reasoning_content
2186
+ if (reasoningToolCalls.length > 0) {
2187
+ blocks.push(...reasoningToolCalls);
2188
+ logger.debug(() => `[Non-streaming] Added ${reasoningToolCalls.length} tool calls from reasoning_content`);
2189
+ }
2063
2190
  // Handle text content (strip thinking / reasoning blocks) and Kimi tool sections
2064
2191
  const rawMessageContent = this.coerceMessageContentToString(choice.message?.content);
2065
2192
  let kimiCleanContent;
@@ -2163,6 +2290,7 @@ export class OpenAIProvider extends BaseProvider {
2163
2290
  };
2164
2291
  // Add usage metadata from non-streaming response
2165
2292
  if (completion.usage) {
2293
+ const cacheMetrics = extractCacheMetrics(completion.usage);
2166
2294
  responseContent.metadata = {
2167
2295
  usage: {
2168
2296
  promptTokens: completion.usage.prompt_tokens || 0,
@@ -2170,6 +2298,9 @@ export class OpenAIProvider extends BaseProvider {
2170
2298
  totalTokens: completion.usage.total_tokens ||
2171
2299
  (completion.usage.prompt_tokens || 0) +
2172
2300
  (completion.usage.completion_tokens || 0),
2301
+ cachedTokens: cacheMetrics.cachedTokens,
2302
+ cacheCreationTokens: cacheMetrics.cacheCreationTokens,
2303
+ cacheMissTokens: cacheMetrics.cacheMissTokens,
2173
2304
  },
2174
2305
  };
2175
2306
  }
@@ -2177,6 +2308,7 @@ export class OpenAIProvider extends BaseProvider {
2177
2308
  }
2178
2309
  else if (completion.usage) {
2179
2310
  // Emit metadata-only response if no content blocks but have usage info
2311
+ const cacheMetrics = extractCacheMetrics(completion.usage);
2180
2312
  yield {
2181
2313
  speaker: 'ai',
2182
2314
  blocks: [],
@@ -2187,6 +2319,9 @@ export class OpenAIProvider extends BaseProvider {
2187
2319
  totalTokens: completion.usage.total_tokens ||
2188
2320
  (completion.usage.prompt_tokens || 0) +
2189
2321
  (completion.usage.completion_tokens || 0),
2322
+ cachedTokens: cacheMetrics.cachedTokens,
2323
+ cacheCreationTokens: cacheMetrics.cacheCreationTokens,
2324
+ cacheMissTokens: cacheMetrics.cacheMissTokens,
2190
2325
  },
2191
2326
  },
2192
2327
  };
@@ -2459,7 +2594,7 @@ export class OpenAIProvider extends BaseProvider {
2459
2594
  });
2460
2595
  // Dump successful streaming request if enabled
2461
2596
  if (shouldDumpSuccess) {
2462
- await dumpSDKContext('openai', '/v1/chat/completions', requestBody, { streaming: true }, false, baseURL || 'https://api.openai.com');
2597
+ await dumpSDKContext('openai', '/chat/completions', requestBody, { streaming: true }, false, baseURL || 'https://api.openai.com/v1');
2463
2598
  }
2464
2599
  break;
2465
2600
  }
@@ -2492,7 +2627,7 @@ export class OpenAIProvider extends BaseProvider {
2492
2627
  // Dump error if enabled
2493
2628
  if (shouldDumpError) {
2494
2629
  const dumpErrorMessage = error instanceof Error ? error.message : String(error);
2495
- await dumpSDKContext('openai', '/v1/chat/completions', requestBody, { error: dumpErrorMessage }, true, baseURL || 'https://api.openai.com');
2630
+ await dumpSDKContext('openai', '/chat/completions', requestBody, { error: dumpErrorMessage }, true, baseURL || 'https://api.openai.com/v1');
2496
2631
  }
2497
2632
  // Re-throw other errors as-is
2498
2633
  const capturedErrorMessage = error instanceof Error ? error.message : String(error);
@@ -2530,7 +2665,7 @@ export class OpenAIProvider extends BaseProvider {
2530
2665
  }));
2531
2666
  // Dump successful non-streaming request if enabled
2532
2667
  if (shouldDumpSuccess) {
2533
- await dumpSDKContext('openai', '/v1/chat/completions', requestBody, response, false, baseURL || 'https://api.openai.com');
2668
+ await dumpSDKContext('openai', '/chat/completions', requestBody, response, false, baseURL || 'https://api.openai.com/v1');
2534
2669
  }
2535
2670
  break;
2536
2671
  }
@@ -2569,7 +2704,7 @@ export class OpenAIProvider extends BaseProvider {
2569
2704
  // Dump error if enabled
2570
2705
  if (shouldDumpError) {
2571
2706
  const dumpErrorMessage = error instanceof Error ? error.message : String(error);
2572
- await dumpSDKContext('openai', '/v1/chat/completions', requestBody, { error: dumpErrorMessage }, true, baseURL || 'https://api.openai.com');
2707
+ await dumpSDKContext('openai', '/chat/completions', requestBody, { error: dumpErrorMessage }, true, baseURL || 'https://api.openai.com/v1');
2573
2708
  }
2574
2709
  const capturedErrorMessage = error instanceof Error ? error.message : String(error);
2575
2710
  const status = typeof error === 'object' &&
@@ -2599,9 +2734,9 @@ export class OpenAIProvider extends BaseProvider {
2599
2734
  // Buffer for accumulating text chunks for providers that need it
2600
2735
  let textBuffer = '';
2601
2736
  // Use the same detected format from earlier for consistency
2602
- const isKimiModel = model.toLowerCase().includes('kimi-k2');
2737
+ const isKimiK2Model = model.toLowerCase().includes('kimi-k2');
2603
2738
  // Buffer text for Qwen format providers and Kimi-K2 to avoid stanza formatting
2604
- const shouldBufferText = detectedFormat === 'qwen' || isKimiModel;
2739
+ const shouldBufferText = detectedFormat === 'qwen' || isKimiK2Model;
2605
2740
  // Accumulate thinking content across the entire stream to emit as ONE block
2606
2741
  // This handles fragmented <think>word</think> streaming from Synthetic API
2607
2742
  // @plan PLAN-20251202-THINKING.P16
@@ -2613,6 +2748,10 @@ export class OpenAIProvider extends BaseProvider {
2613
2748
  let accumulatedReasoningContent = '';
2614
2749
  // Track token usage from streaming chunks
2615
2750
  let streamingUsage = null;
2751
+ // Track finish_reason for detecting empty responses (issue #584)
2752
+ let lastFinishReason = null;
2753
+ // Store pipeline result to avoid duplicate process() calls (CodeRabbit review #764)
2754
+ let cachedPipelineResult = null;
2616
2755
  const allChunks = []; // Collect all chunks first
2617
2756
  try {
2618
2757
  // Handle streaming response - collect all chunks
@@ -2671,15 +2810,31 @@ export class OpenAIProvider extends BaseProvider {
2671
2810
  continue;
2672
2811
  // Parse reasoning_content from streaming delta (Pipeline path)
2673
2812
  // ACCUMULATE instead of yielding immediately to handle token-by-token streaming
2813
+ // Extract embedded Kimi K2 tool calls from reasoning_content (fixes #749)
2674
2814
  // @plan PLAN-20251202-THINKING.P16
2675
- // @requirement REQ-THINK-003.1
2676
- const reasoningBlock = this.parseStreamingReasoningDelta(choice.delta);
2815
+ // @requirement REQ-THINK-003.1, REQ-KIMI-REASONING-001.1
2816
+ const { thinking: reasoningBlock, toolCalls: reasoningToolCalls } = this.parseStreamingReasoningDelta(choice.delta);
2677
2817
  if (reasoningBlock) {
2678
2818
  // Accumulate reasoning content - will emit ONE block later
2679
2819
  accumulatedReasoningContent += reasoningBlock.thought;
2680
2820
  }
2821
+ // Add tool calls extracted from reasoning_content to pipeline
2822
+ if (reasoningToolCalls.length > 0) {
2823
+ // Get current pipeline stats to determine next index
2824
+ const stats = this.toolCallPipeline.getStats();
2825
+ let baseIndex = stats.collector.totalCalls;
2826
+ for (const toolCall of reasoningToolCalls) {
2827
+ // Add complete tool call as fragments to pipeline
2828
+ this.toolCallPipeline.addFragment(baseIndex, {
2829
+ name: toolCall.name,
2830
+ args: JSON.stringify(toolCall.parameters),
2831
+ });
2832
+ baseIndex++;
2833
+ }
2834
+ }
2681
2835
  // Check for finish_reason to detect proper stream ending
2682
2836
  if (choice.finish_reason) {
2837
+ lastFinishReason = choice.finish_reason;
2683
2838
  logger.debug(() => `[Streaming] Stream finished with reason: ${choice.finish_reason}`, {
2684
2839
  model,
2685
2840
  finishReason: choice.finish_reason,
@@ -2698,13 +2853,24 @@ export class OpenAIProvider extends BaseProvider {
2698
2853
  }
2699
2854
  // Handle text content - buffer for Qwen format, emit immediately for others
2700
2855
  // Note: Synthetic API sends content that may duplicate reasoning_content.
2701
- // This is the model's behavior - we don't filter it here.
2856
+ // This is the model's behavior - we don't filter it here as detection is unreliable.
2702
2857
  // @plan PLAN-20251202-THINKING.P16
2703
2858
  const rawDeltaContent = this.coerceMessageContentToString(choice.delta?.content);
2704
2859
  if (rawDeltaContent) {
2705
- const deltaContent = isKimiModel
2706
- ? rawDeltaContent
2707
- : this.sanitizeProviderText(rawDeltaContent);
2860
+ // For Kimi models, we need to buffer the RAW content without processing
2861
+ // because Kimi tokens stream incrementally and partial tokens would leak
2862
+ // through if we try to process them immediately. The buffer will be
2863
+ // processed when flushed (at sentence boundaries or end of stream).
2864
+ let deltaContent;
2865
+ if (isKimiK2Model) {
2866
+ // For Kimi: Don't process yet - just pass through and let buffering handle it
2867
+ // We'll extract tool calls and sanitize when we flush the buffer
2868
+ deltaContent = rawDeltaContent;
2869
+ }
2870
+ else {
2871
+ // For non-Kimi models: sanitize immediately as before
2872
+ deltaContent = this.sanitizeProviderText(rawDeltaContent);
2873
+ }
2708
2874
  if (!deltaContent) {
2709
2875
  continue;
2710
2876
  }
@@ -2720,9 +2886,9 @@ export class OpenAIProvider extends BaseProvider {
2720
2886
  });
2721
2887
  // Buffer text to avoid stanza formatting
2722
2888
  textBuffer += deltaContent;
2723
- const hasKimiBegin = textBuffer.includes('<|tool_calls_section_begin|>');
2724
- const hasKimiEnd = textBuffer.includes('<|tool_calls_section_end|>');
2725
- const hasOpenKimiSection = hasKimiBegin && !hasKimiEnd;
2889
+ const kimiBeginCount = (textBuffer.match(/<\|tool_calls_section_begin\|>/g) || []).length;
2890
+ const kimiEndCount = (textBuffer.match(/<\|tool_calls_section_end\|>/g) || []).length;
2891
+ const hasOpenKimiSection = kimiBeginCount > kimiEndCount;
2726
2892
  // Emit buffered text when we have a complete sentence or paragraph
2727
2893
  // Look for natural break points, avoiding flush mid Kimi section
2728
2894
  if (!hasOpenKimiSection &&
@@ -2739,12 +2905,14 @@ export class OpenAIProvider extends BaseProvider {
2739
2905
  // @requirement REQ-THINK-003
2740
2906
  const tagBasedThinking = this.extractThinkTagsAsBlock(workingText);
2741
2907
  if (tagBasedThinking) {
2908
+ // Clean Kimi tokens from thinking content before accumulating
2909
+ const cleanedThought = this.cleanThinkingContent(tagBasedThinking.thought);
2742
2910
  // Accumulate thinking content - don't emit yet
2743
2911
  // Use newline to preserve formatting between chunks (not space)
2744
2912
  if (accumulatedThinkingContent.length > 0) {
2745
2913
  accumulatedThinkingContent += '\n';
2746
2914
  }
2747
- accumulatedThinkingContent += tagBasedThinking.thought;
2915
+ accumulatedThinkingContent += cleanedThought;
2748
2916
  logger.debug(() => `[Streaming] Accumulated thinking: ${accumulatedThinkingContent.length} chars total`);
2749
2917
  }
2750
2918
  const kimiParsed = this.extractKimiToolCallsFromText(workingText);
@@ -2806,7 +2974,10 @@ export class OpenAIProvider extends BaseProvider {
2806
2974
  // Always use sanitized text to strip <think> tags (pipeline streaming)
2807
2975
  // Bug fix: Previously Kimi used unsanitized workingText
2808
2976
  // @plan PLAN-20251202-THINKING.P16
2809
- if (cleanedText.trim().length > 0) {
2977
+ // Bug fix #721: Emit whitespace-only chunks (e.g., " " between words)
2978
+ // Previously we used cleanedText.trim().length > 0 which dropped spaces,
2979
+ // causing "list 5" to become "list5". Now we emit any non-empty cleanedText.
2980
+ if (cleanedText.length > 0) {
2810
2981
  yield {
2811
2982
  speaker: 'ai',
2812
2983
  blocks: [
@@ -2906,11 +3077,13 @@ export class OpenAIProvider extends BaseProvider {
2906
3077
  // @plan PLAN-20251202-THINKING.P16
2907
3078
  const tagBasedThinking = this.extractThinkTagsAsBlock(workingText);
2908
3079
  if (tagBasedThinking) {
3080
+ // Clean Kimi tokens from thinking content before accumulating
3081
+ const cleanedThought = this.cleanThinkingContent(tagBasedThinking.thought);
2909
3082
  // Use newline to preserve formatting between chunks (not space)
2910
3083
  if (accumulatedThinkingContent.length > 0) {
2911
3084
  accumulatedThinkingContent += '\n';
2912
3085
  }
2913
- accumulatedThinkingContent += tagBasedThinking.thought;
3086
+ accumulatedThinkingContent += cleanedThought;
2914
3087
  }
2915
3088
  const kimiParsed = this.extractKimiToolCallsFromText(workingText);
2916
3089
  if (kimiParsed.toolCalls.length > 0) {
@@ -2969,7 +3142,10 @@ export class OpenAIProvider extends BaseProvider {
2969
3142
  // Always use sanitized text to strip <think> tags (pipeline final buffer)
2970
3143
  // Bug fix: Previously Kimi used unsanitized workingText
2971
3144
  // @plan PLAN-20251202-THINKING.P16
2972
- if (cleanedText.trim().length > 0) {
3145
+ // Bug fix #721: Emit whitespace-only chunks (e.g., " " between words)
3146
+ // Previously we used cleanedText.trim().length > 0 which dropped spaces,
3147
+ // causing "list 5" to become "list5". Now we emit any non-empty cleanedText.
3148
+ if (cleanedText.length > 0) {
2973
3149
  yield {
2974
3150
  speaker: 'ai',
2975
3151
  blocks: [
@@ -3001,27 +3177,40 @@ export class OpenAIProvider extends BaseProvider {
3001
3177
  }
3002
3178
  // Emit accumulated reasoning_content as ONE ThinkingBlock (pipeline path)
3003
3179
  // This consolidates token-by-token reasoning from Synthetic API into a single block
3180
+ // Clean Kimi tokens from the accumulated content (not per-chunk) to handle split tokens
3004
3181
  // @plan PLAN-20251202-THINKING.P16
3005
3182
  if (accumulatedReasoningContent.length > 0) {
3006
- yield {
3007
- speaker: 'ai',
3008
- blocks: [
3009
- {
3010
- type: 'thinking',
3011
- thought: accumulatedReasoningContent,
3012
- sourceField: 'reasoning_content',
3013
- isHidden: false,
3014
- },
3015
- ],
3016
- };
3183
+ // Extract Kimi tool calls from the complete accumulated reasoning content
3184
+ const { cleanedText: cleanedReasoning, toolCalls: reasoningToolCalls } = this.extractKimiToolCallsFromText(accumulatedReasoningContent);
3185
+ // Emit the cleaned thinking block
3186
+ if (cleanedReasoning.length > 0) {
3187
+ yield {
3188
+ speaker: 'ai',
3189
+ blocks: [
3190
+ {
3191
+ type: 'thinking',
3192
+ thought: cleanedReasoning,
3193
+ sourceField: 'reasoning_content',
3194
+ isHidden: false,
3195
+ },
3196
+ ],
3197
+ };
3198
+ }
3199
+ // Emit any tool calls extracted from reasoning content
3200
+ if (reasoningToolCalls.length > 0) {
3201
+ yield {
3202
+ speaker: 'ai',
3203
+ blocks: reasoningToolCalls,
3204
+ };
3205
+ }
3017
3206
  }
3018
3207
  // Process and emit tool calls using the pipeline
3019
- const pipelineResult = await this.toolCallPipeline.process(abortSignal);
3020
- if (pipelineResult.normalized.length > 0 ||
3021
- pipelineResult.failed.length > 0) {
3208
+ cachedPipelineResult = await this.toolCallPipeline.process(abortSignal);
3209
+ if (cachedPipelineResult.normalized.length > 0 ||
3210
+ cachedPipelineResult.failed.length > 0) {
3022
3211
  const blocks = [];
3023
3212
  // Process successful tool calls
3024
- for (const normalizedCall of pipelineResult.normalized) {
3213
+ for (const normalizedCall of cachedPipelineResult.normalized) {
3025
3214
  const sanitizedArgs = this.sanitizeToolArgumentsString(normalizedCall.originalArgs ?? normalizedCall.args);
3026
3215
  // Process tool parameters with double-escape handling
3027
3216
  const processedParameters = processToolParameters(sanitizedArgs, normalizedCall.name);
@@ -3033,7 +3222,7 @@ export class OpenAIProvider extends BaseProvider {
3033
3222
  });
3034
3223
  }
3035
3224
  // Handle failed tool calls (could emit as errors or warnings)
3036
- for (const failed of pipelineResult.failed) {
3225
+ for (const failed of cachedPipelineResult.failed) {
3037
3226
  this.getLogger().warn(`Tool call validation failed for index ${failed.index}: ${failed.validationErrors.join(', ')}`);
3038
3227
  }
3039
3228
  if (blocks.length > 0) {
@@ -3043,6 +3232,7 @@ export class OpenAIProvider extends BaseProvider {
3043
3232
  };
3044
3233
  // Add usage metadata if we captured it from streaming
3045
3234
  if (streamingUsage) {
3235
+ const cacheMetrics = extractCacheMetrics(streamingUsage);
3046
3236
  toolCallsContent.metadata = {
3047
3237
  usage: {
3048
3238
  promptTokens: streamingUsage.prompt_tokens || 0,
@@ -3050,6 +3240,9 @@ export class OpenAIProvider extends BaseProvider {
3050
3240
  totalTokens: streamingUsage.total_tokens ||
3051
3241
  (streamingUsage.prompt_tokens || 0) +
3052
3242
  (streamingUsage.completion_tokens || 0),
3243
+ cachedTokens: cacheMetrics.cachedTokens,
3244
+ cacheCreationTokens: cacheMetrics.cacheCreationTokens,
3245
+ cacheMissTokens: cacheMetrics.cacheMissTokens,
3053
3246
  },
3054
3247
  };
3055
3248
  }
@@ -3059,6 +3252,7 @@ export class OpenAIProvider extends BaseProvider {
3059
3252
  // If we have usage information but no tool calls, emit a metadata-only response
3060
3253
  if (streamingUsage &&
3061
3254
  this.toolCallPipeline.getStats().collector.totalCalls === 0) {
3255
+ const cacheMetrics = extractCacheMetrics(streamingUsage);
3062
3256
  yield {
3063
3257
  speaker: 'ai',
3064
3258
  blocks: [],
@@ -3069,15 +3263,51 @@ export class OpenAIProvider extends BaseProvider {
3069
3263
  totalTokens: streamingUsage.total_tokens ||
3070
3264
  (streamingUsage.prompt_tokens || 0) +
3071
3265
  (streamingUsage.completion_tokens || 0),
3266
+ cachedTokens: cacheMetrics.cachedTokens,
3267
+ cacheCreationTokens: cacheMetrics.cacheCreationTokens,
3268
+ cacheMissTokens: cacheMetrics.cacheMissTokens,
3072
3269
  },
3073
3270
  },
3074
3271
  };
3075
3272
  }
3273
+ // Detect and handle empty streaming responses after tool calls (issue #584)
3274
+ // Some models (like gpt-oss-120b on OpenRouter) return finish_reason=stop with tools but no text
3275
+ // Use cachedPipelineResult instead of pipelineStats.collector.totalCalls since process() resets the collector (CodeRabbit review #764)
3276
+ const toolCallCount = (cachedPipelineResult?.normalized.length ?? 0) +
3277
+ (cachedPipelineResult?.failed.length ?? 0);
3278
+ const hasToolsButNoText = lastFinishReason === 'stop' &&
3279
+ toolCallCount > 0 &&
3280
+ _accumulatedText.length === 0 &&
3281
+ textBuffer.length === 0 &&
3282
+ accumulatedReasoningContent.length === 0 &&
3283
+ accumulatedThinkingContent.length === 0;
3284
+ if (hasToolsButNoText) {
3285
+ logger.log(() => `[OpenAIProvider] Model returned tool calls but no text (finish_reason=stop). Requesting continuation for model '${model}'.`, {
3286
+ model,
3287
+ toolCallCount,
3288
+ baseURL: baseURL ?? this.getBaseURL(),
3289
+ });
3290
+ // Note: In pipeline mode, tool calls have already been processed.
3291
+ // We need to get the normalized tool calls from the cached pipeline result to build continuation messages.
3292
+ // Use cached result to avoid duplicate process() call that would return empty results (CodeRabbit review #764)
3293
+ if (!cachedPipelineResult) {
3294
+ throw new Error('Pipeline result not cached - this should not happen in pipeline mode');
3295
+ }
3296
+ const toolCallsForHistory = cachedPipelineResult.normalized.map((normalizedCall, index) => ({
3297
+ id: `call_${index}`,
3298
+ type: 'function',
3299
+ function: {
3300
+ name: normalizedCall.name,
3301
+ arguments: JSON.stringify(normalizedCall.args),
3302
+ },
3303
+ }));
3304
+ // Request continuation after tool calls (delegated to shared method)
3305
+ yield* this.requestContinuationAfterToolCalls(toolCallsForHistory, messagesWithSystem, requestBody, client, abortSignal, model, logger, customHeaders);
3306
+ }
3076
3307
  // Detect and warn about empty streaming responses (common with Kimi K2 after tool calls)
3077
3308
  // Only warn if we truly got nothing - not even reasoning content
3078
- const pipelineStats = this.toolCallPipeline.getStats();
3079
3309
  if (_accumulatedText.length === 0 &&
3080
- pipelineStats.collector.totalCalls === 0 &&
3310
+ toolCallCount === 0 &&
3081
3311
  textBuffer.length === 0 &&
3082
3312
  accumulatedReasoningContent.length === 0 &&
3083
3313
  accumulatedThinkingContent.length === 0) {
@@ -3101,7 +3331,7 @@ export class OpenAIProvider extends BaseProvider {
3101
3331
  // Log what we DID get for debugging
3102
3332
  logger.debug(() => `[Streaming pipeline] Stream completed with accumulated content`, {
3103
3333
  textLength: _accumulatedText.length,
3104
- toolCallCount: pipelineStats.collector.totalCalls,
3334
+ toolCallCount,
3105
3335
  textBufferLength: textBuffer.length,
3106
3336
  reasoningLength: accumulatedReasoningContent.length,
3107
3337
  thinkingLength: accumulatedThinkingContent.length,
@@ -3221,6 +3451,7 @@ export class OpenAIProvider extends BaseProvider {
3221
3451
  };
3222
3452
  // Add usage metadata from non-streaming response
3223
3453
  if (completion.usage) {
3454
+ const cacheMetrics = extractCacheMetrics(completion.usage);
3224
3455
  responseContent.metadata = {
3225
3456
  usage: {
3226
3457
  promptTokens: completion.usage.prompt_tokens || 0,
@@ -3228,6 +3459,9 @@ export class OpenAIProvider extends BaseProvider {
3228
3459
  totalTokens: completion.usage.total_tokens ||
3229
3460
  (completion.usage.prompt_tokens || 0) +
3230
3461
  (completion.usage.completion_tokens || 0),
3462
+ cachedTokens: cacheMetrics.cachedTokens,
3463
+ cacheCreationTokens: cacheMetrics.cacheCreationTokens,
3464
+ cacheMissTokens: cacheMetrics.cacheMissTokens,
3231
3465
  },
3232
3466
  };
3233
3467
  }
@@ -3235,6 +3469,7 @@ export class OpenAIProvider extends BaseProvider {
3235
3469
  }
3236
3470
  else if (completion.usage) {
3237
3471
  // Emit metadata-only response if no content blocks but have usage info
3472
+ const cacheMetrics = extractCacheMetrics(completion.usage);
3238
3473
  yield {
3239
3474
  speaker: 'ai',
3240
3475
  blocks: [],
@@ -3245,6 +3480,9 @@ export class OpenAIProvider extends BaseProvider {
3245
3480
  totalTokens: completion.usage.total_tokens ||
3246
3481
  (completion.usage.prompt_tokens || 0) +
3247
3482
  (completion.usage.completion_tokens || 0),
3483
+ cachedTokens: cacheMetrics.cachedTokens,
3484
+ cacheCreationTokens: cacheMetrics.cacheCreationTokens,
3485
+ cacheMissTokens: cacheMetrics.cacheMissTokens,
3248
3486
  },
3249
3487
  },
3250
3488
  };
@@ -3279,6 +3517,12 @@ export class OpenAIProvider extends BaseProvider {
3279
3517
  logger.debug(() => `Auto-detected 'kimi' format for K2 model: ${modelName}`);
3280
3518
  return 'kimi';
3281
3519
  }
3520
+ // Check for Mistral models (requires 9-char alphanumeric IDs)
3521
+ // This applies to both hosted API and self-hosted Mistral models
3522
+ if (isMistralModel(modelName)) {
3523
+ logger.debug(() => `Auto-detected 'mistral' format for Mistral model: ${modelName}`);
3524
+ return 'mistral';
3525
+ }
3282
3526
  const lowerModelName = modelName.toLowerCase();
3283
3527
  // Check for GLM-4 models (glm-4, glm-4.5, glm-4.6, glm-4-5, etc.)
3284
3528
  if (lowerModelName.includes('glm-4')) {
@@ -3361,57 +3605,153 @@ export class OpenAIProvider extends BaseProvider {
3361
3605
  * Parse reasoning_content from streaming delta.
3362
3606
  *
3363
3607
  * @plan PLAN-20251202-THINKING.P11, PLAN-20251202-THINKING.P16
3364
- * @requirement REQ-THINK-003.1, REQ-THINK-003.3, REQ-THINK-003.4
3608
+ * @requirement REQ-THINK-003.1, REQ-THINK-003.3, REQ-THINK-003.4, REQ-KIMI-REASONING-001.1
3609
+ * @issue #749
3365
3610
  */
3366
3611
  parseStreamingReasoningDelta(delta) {
3367
3612
  if (!delta) {
3368
- return null;
3613
+ return { thinking: null, toolCalls: [] };
3369
3614
  }
3370
3615
  // Access reasoning_content via type assertion since OpenAI SDK doesn't declare it
3371
3616
  const reasoningContent = delta
3372
3617
  .reasoning_content;
3373
3618
  // Handle absent, null, or non-string
3374
3619
  if (!reasoningContent || typeof reasoningContent !== 'string') {
3375
- return null;
3376
- }
3377
- // Handle empty string or whitespace-only
3378
- if (reasoningContent.trim().length === 0) {
3379
- return null;
3380
- }
3381
- return {
3382
- type: 'thinking',
3383
- thought: reasoningContent,
3384
- sourceField: 'reasoning_content',
3385
- isHidden: false,
3386
- };
3620
+ return { thinking: null, toolCalls: [] };
3621
+ }
3622
+ // Handle empty string only - preserve whitespace-only content (spaces, tabs)
3623
+ // to maintain proper formatting in accumulated reasoning (fixes issue #721)
3624
+ if (reasoningContent.length === 0) {
3625
+ return { thinking: null, toolCalls: [] };
3626
+ }
3627
+ // Extract Kimi K2 tool calls embedded in reasoning_content (fixes issue #749)
3628
+ const { cleanedText, toolCalls } = this.extractKimiToolCallsFromText(reasoningContent);
3629
+ // For streaming, preserve whitespace-only content for proper formatting (issue #721)
3630
+ // Only return null if the cleaned text is empty (length 0)
3631
+ const thinkingBlock = cleanedText.length === 0
3632
+ ? null
3633
+ : {
3634
+ type: 'thinking',
3635
+ thought: cleanedText,
3636
+ sourceField: 'reasoning_content',
3637
+ isHidden: false,
3638
+ };
3639
+ return { thinking: thinkingBlock, toolCalls };
3387
3640
  }
3388
3641
  /**
3389
3642
  * Parse reasoning_content from non-streaming message.
3390
3643
  *
3391
3644
  * @plan PLAN-20251202-THINKING.P11, PLAN-20251202-THINKING.P16
3392
- * @requirement REQ-THINK-003.2, REQ-THINK-003.3, REQ-THINK-003.4
3645
+ * @requirement REQ-THINK-003.2, REQ-THINK-003.3, REQ-THINK-003.4, REQ-KIMI-REASONING-001.2
3646
+ * @issue #749
3393
3647
  */
3394
3648
  parseNonStreamingReasoning(message) {
3395
3649
  if (!message) {
3396
- return null;
3650
+ return { thinking: null, toolCalls: [] };
3397
3651
  }
3398
3652
  // Access reasoning_content via type assertion since OpenAI SDK doesn't declare it
3399
3653
  const reasoningContent = message
3400
3654
  .reasoning_content;
3401
3655
  // Handle absent, null, or non-string
3402
3656
  if (!reasoningContent || typeof reasoningContent !== 'string') {
3403
- return null;
3657
+ return { thinking: null, toolCalls: [] };
3404
3658
  }
3405
- // Handle empty string or whitespace-only
3659
+ // Handle empty string or whitespace-only - for non-streaming complete responses,
3660
+ // whitespace-only reasoning is unusual and should be treated as no reasoning
3406
3661
  if (reasoningContent.trim().length === 0) {
3407
- return null;
3662
+ return { thinking: null, toolCalls: [] };
3663
+ }
3664
+ // Extract Kimi K2 tool calls embedded in reasoning_content (fixes issue #749)
3665
+ const { cleanedText, toolCalls } = this.extractKimiToolCallsFromText(reasoningContent);
3666
+ // For non-streaming, trim whitespace after extraction
3667
+ const trimmedText = cleanedText.trim();
3668
+ const thinkingBlock = trimmedText.length === 0
3669
+ ? null
3670
+ : {
3671
+ type: 'thinking',
3672
+ thought: trimmedText,
3673
+ sourceField: 'reasoning_content',
3674
+ isHidden: false,
3675
+ };
3676
+ return { thinking: thinkingBlock, toolCalls };
3677
+ }
3678
+ /**
3679
+ * Request continuation after tool calls when model returned no text.
3680
+ * This is a helper to avoid code duplication between legacy and pipeline paths.
3681
+ *
3682
+ * @plan PLAN-20250120-DEBUGLOGGING.P15
3683
+ * @issue #584, #764 (CodeRabbit review)
3684
+ */
3685
+ async *requestContinuationAfterToolCalls(toolCalls, messagesWithSystem, requestBody, client, abortSignal, model, logger, customHeaders) {
3686
+ // Build continuation messages
3687
+ const continuationMessages = [
3688
+ ...messagesWithSystem,
3689
+ // Add the assistant's tool calls
3690
+ {
3691
+ role: 'assistant',
3692
+ tool_calls: toolCalls,
3693
+ },
3694
+ // Add placeholder tool responses (tools have NOT been executed yet - only acknowledged)
3695
+ ...toolCalls.map((tc) => ({
3696
+ role: 'tool',
3697
+ tool_call_id: tc.id,
3698
+ content: '[Tool call acknowledged - awaiting execution]',
3699
+ })),
3700
+ // Add continuation prompt
3701
+ {
3702
+ role: 'user',
3703
+ content: 'The tool calls above have been registered. Please continue with your response.',
3704
+ },
3705
+ ];
3706
+ // Make a continuation request (wrap in try-catch since tools were already yielded)
3707
+ try {
3708
+ const continuationResponse = await client.chat.completions.create({
3709
+ ...requestBody,
3710
+ messages: continuationMessages,
3711
+ stream: true, // Always stream for consistency
3712
+ }, {
3713
+ ...(abortSignal ? { signal: abortSignal } : {}),
3714
+ ...(customHeaders ? { headers: customHeaders } : {}),
3715
+ });
3716
+ let accumulatedText = '';
3717
+ // Process the continuation response
3718
+ for await (const chunk of continuationResponse) {
3719
+ if (abortSignal?.aborted) {
3720
+ break;
3721
+ }
3722
+ const choice = chunk.choices?.[0];
3723
+ if (!choice)
3724
+ continue;
3725
+ const deltaContent = this.coerceMessageContentToString(choice.delta?.content);
3726
+ if (deltaContent) {
3727
+ const sanitized = this.sanitizeProviderText(deltaContent);
3728
+ if (sanitized) {
3729
+ accumulatedText += sanitized;
3730
+ yield {
3731
+ speaker: 'ai',
3732
+ blocks: [
3733
+ {
3734
+ type: 'text',
3735
+ text: sanitized,
3736
+ },
3737
+ ],
3738
+ };
3739
+ }
3740
+ }
3741
+ }
3742
+ logger.debug(() => `[OpenAIProvider] Continuation request completed, received ${accumulatedText.length} chars`, {
3743
+ model,
3744
+ accumulatedTextLength: accumulatedText.length,
3745
+ });
3746
+ }
3747
+ catch (continuationError) {
3748
+ // Tool calls were already successfully yielded, so log warning and continue
3749
+ logger.warn(() => `[OpenAIProvider] Continuation request failed, but tool calls were already emitted: ${continuationError instanceof Error ? continuationError.message : String(continuationError)}`, {
3750
+ model,
3751
+ error: continuationError,
3752
+ });
3753
+ // Don't re-throw - tool calls were already successful
3408
3754
  }
3409
- return {
3410
- type: 'thinking',
3411
- thought: reasoningContent,
3412
- sourceField: 'reasoning_content',
3413
- isHidden: false,
3414
- };
3415
3755
  }
3416
3756
  }
3417
3757
  //# sourceMappingURL=OpenAIProvider.js.map