@vybestack/llxprt-code-core 0.6.1-nightly.251203.b119e390d → 0.6.1-nightly.251205.deee6dfc3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/dist/src/auth/types.d.ts +6 -6
  2. package/dist/src/core/geminiChat.d.ts +8 -0
  3. package/dist/src/core/geminiChat.js +63 -5
  4. package/dist/src/core/geminiChat.js.map +1 -1
  5. package/dist/src/core/turn.js +12 -8
  6. package/dist/src/core/turn.js.map +1 -1
  7. package/dist/src/ide/ide-client.js +4 -2
  8. package/dist/src/ide/ide-client.js.map +1 -1
  9. package/dist/src/providers/anthropic/AnthropicProvider.d.ts +0 -1
  10. package/dist/src/providers/anthropic/AnthropicProvider.js +6 -14
  11. package/dist/src/providers/anthropic/AnthropicProvider.js.map +1 -1
  12. package/dist/src/providers/anthropic/schemaConverter.d.ts +63 -0
  13. package/dist/src/providers/anthropic/schemaConverter.js +189 -0
  14. package/dist/src/providers/anthropic/schemaConverter.js.map +1 -0
  15. package/dist/src/providers/openai/OpenAIProvider.d.ts +77 -0
  16. package/dist/src/providers/openai/OpenAIProvider.js +1087 -175
  17. package/dist/src/providers/openai/OpenAIProvider.js.map +1 -1
  18. package/dist/src/providers/openai/ToolCallNormalizer.d.ts +6 -0
  19. package/dist/src/providers/openai/ToolCallNormalizer.js +16 -2
  20. package/dist/src/providers/openai/ToolCallNormalizer.js.map +1 -1
  21. package/dist/src/providers/openai/schemaConverter.d.ts +67 -0
  22. package/dist/src/providers/openai/schemaConverter.js +191 -0
  23. package/dist/src/providers/openai/schemaConverter.js.map +1 -0
  24. package/dist/src/providers/openai-responses/OpenAIResponsesProvider.d.ts +0 -4
  25. package/dist/src/providers/openai-responses/OpenAIResponsesProvider.js +3 -75
  26. package/dist/src/providers/openai-responses/OpenAIResponsesProvider.js.map +1 -1
  27. package/dist/src/providers/openai-responses/schemaConverter.d.ts +65 -0
  28. package/dist/src/providers/openai-responses/schemaConverter.js +195 -0
  29. package/dist/src/providers/openai-responses/schemaConverter.js.map +1 -0
  30. package/dist/src/providers/openai-vercel/OpenAIVercelProvider.d.ts +1 -6
  31. package/dist/src/providers/openai-vercel/OpenAIVercelProvider.js +4 -14
  32. package/dist/src/providers/openai-vercel/OpenAIVercelProvider.js.map +1 -1
  33. package/dist/src/providers/openai-vercel/schemaConverter.d.ts +66 -0
  34. package/dist/src/providers/openai-vercel/schemaConverter.js +191 -0
  35. package/dist/src/providers/openai-vercel/schemaConverter.js.map +1 -0
  36. package/dist/src/providers/reasoning/reasoningUtils.d.ts +43 -0
  37. package/dist/src/providers/reasoning/reasoningUtils.js +92 -0
  38. package/dist/src/providers/reasoning/reasoningUtils.js.map +1 -0
  39. package/dist/src/runtime/AgentRuntimeContext.d.ts +27 -0
  40. package/dist/src/runtime/AgentRuntimeContext.js.map +1 -1
  41. package/dist/src/runtime/createAgentRuntimeContext.js +27 -0
  42. package/dist/src/runtime/createAgentRuntimeContext.js.map +1 -1
  43. package/dist/src/services/history/IContent.d.ts +6 -0
  44. package/dist/src/services/history/IContent.js.map +1 -1
  45. package/dist/src/tools/ToolFormatter.js +6 -0
  46. package/dist/src/tools/ToolFormatter.js.map +1 -1
  47. package/dist/src/tools/todo-schemas.d.ts +4 -4
  48. package/package.json +1 -1
@@ -25,6 +25,7 @@ import * as net from 'net';
25
25
  import { BaseProvider, } from '../BaseProvider.js';
26
26
  import { DebugLogger } from '../../debug/index.js';
27
27
  import { ToolFormatter } from '../../tools/ToolFormatter.js';
28
+ import { convertToolsToOpenAI } from './schemaConverter.js';
28
29
  import { GemmaToolCallParser } from '../../parsers/TextToolCallParser.js';
29
30
  import { processToolParameters } from '../../tools/doubleEscapeUtils.js';
30
31
  import { getCoreSystemPromptAsync } from '../../core/prompts.js';
@@ -36,6 +37,7 @@ import { ensureJsonSafe } from '../../utils/unicodeUtils.js';
36
37
  import { ToolCallPipeline } from './ToolCallPipeline.js';
37
38
  import { buildToolResponsePayload, EMPTY_TOOL_RESULT_PLACEHOLDER, } from '../utils/toolResponsePayload.js';
38
39
  import { isLocalEndpoint } from '../utils/localEndpoint.js';
40
+ import { filterThinkingForContext, thinkingToReasoningField, extractThinkingBlocks, } from '../reasoning/reasoningUtils.js';
39
41
  const MAX_TOOL_RESPONSE_CHARS = 1024;
40
42
  const MAX_TOOL_RESPONSE_RETRY_CHARS = 512;
41
43
  const TOOL_ARGS_PREVIEW_LENGTH = 500;
@@ -218,6 +220,255 @@ export class OpenAIProvider extends BaseProvider {
218
220
  }
219
221
  return new OpenAI(clientOptions);
220
222
  }
223
+ /**
224
+ * Coerce provider "content" (which may be a string or an array-of-parts)
225
+ * into a plain string. Defensive for OpenAI-compatible providers that emit
226
+ * structured content blocks.
227
+ */
228
+ coerceMessageContentToString(content) {
229
+ if (typeof content === 'string') {
230
+ return content;
231
+ }
232
+ if (Array.isArray(content)) {
233
+ const parts = [];
234
+ for (const part of content) {
235
+ if (!part)
236
+ continue;
237
+ if (typeof part === 'string') {
238
+ parts.push(part);
239
+ }
240
+ else if (typeof part === 'object' &&
241
+ part !== null &&
242
+ 'text' in part &&
243
+ typeof part.text === 'string') {
244
+ parts.push(part.text);
245
+ }
246
+ }
247
+ return parts.length ? parts.join('') : undefined;
248
+ }
249
+ return undefined;
250
+ }
251
+ /**
252
+ * Strip provider-specific "thinking" / reasoning markup from visible text.
253
+ * This prevents DeepSeek / Kimi-style <think> blocks from leaking into
254
+ * user-visible output or tool arguments.
255
+ */
256
+ sanitizeProviderText(text) {
257
+ if (text === null || text === undefined) {
258
+ return '';
259
+ }
260
+ const logger = this.getLogger();
261
+ let str = typeof text === 'string' ? text : String(text);
262
+ const beforeLen = str.length;
263
+ const hadReasoningTags = /<(?:think|thinking|analysis)>|<\/(?:think|thinking|analysis)>/i.test(str);
264
+ // DeepSeek / generic <think>...</think> blocks.
265
+ str = str.replace(/<think>[\s\S]*?<\/think>/gi, '');
266
+ // Alternative reasoning tags some providers use.
267
+ str = str.replace(/<thinking>[\s\S]*?<\/thinking>/gi, '');
268
+ str = str.replace(/<analysis>[\s\S]*?<\/analysis>/gi, '');
269
+ // Clean up stray unmatched tags.
270
+ str = str.replace(/<\/?(?:think|thinking|analysis)>/gi, '');
271
+ const afterLen = str.length;
272
+ if (hadReasoningTags && afterLen !== beforeLen) {
273
+ logger.debug(() => `[OpenAIProvider] Stripped reasoning tags`, {
274
+ beforeLen,
275
+ afterLen,
276
+ });
277
+ }
278
+ return str;
279
+ }
280
+ /**
281
+ * Extract thinking content from <think>, <thinking>, or <analysis> tags
282
+ * and return it as a ThinkingBlock. Returns null if no thinking tags found.
283
+ *
284
+ * This must be called BEFORE sanitizeProviderText which strips these tags.
285
+ *
286
+ * Handles two formats:
287
+ * 1. Standard: <think>Full thinking paragraph here...</think>
288
+ * 2. Fragmented (Synthetic API): <think>word</think><think>word</think>...
289
+ *
290
+ * For fragmented format, joins with spaces. For standard, joins with newlines.
291
+ *
292
+ * @plan PLAN-20251202-THINKING.P16
293
+ * @requirement REQ-THINK-003
294
+ */
295
+ extractThinkTagsAsBlock(text) {
296
+ if (!text) {
297
+ return null;
298
+ }
299
+ // Collect all thinking content from various tag formats
300
+ const thinkingParts = [];
301
+ // Match <think>...</think>
302
+ const thinkMatches = text.matchAll(/<think>([\s\S]*?)<\/think>/gi);
303
+ for (const match of thinkMatches) {
304
+ if (match[1]?.trim()) {
305
+ thinkingParts.push(match[1].trim());
306
+ }
307
+ }
308
+ // Match <thinking>...</thinking>
309
+ const thinkingMatches = text.matchAll(/<thinking>([\s\S]*?)<\/thinking>/gi);
310
+ for (const match of thinkingMatches) {
311
+ if (match[1]?.trim()) {
312
+ thinkingParts.push(match[1].trim());
313
+ }
314
+ }
315
+ // Match <analysis>...</analysis>
316
+ const analysisMatches = text.matchAll(/<analysis>([\s\S]*?)<\/analysis>/gi);
317
+ for (const match of analysisMatches) {
318
+ if (match[1]?.trim()) {
319
+ thinkingParts.push(match[1].trim());
320
+ }
321
+ }
322
+ if (thinkingParts.length === 0) {
323
+ return null;
324
+ }
325
+ // Detect fragmented format: many short parts (likely token-by-token streaming)
326
+ // If average part length is very short (< 10 chars) and we have many parts,
327
+ // it's likely fragmented and should be joined with spaces
328
+ const avgPartLength = thinkingParts.reduce((sum, p) => sum + p.length, 0) /
329
+ thinkingParts.length;
330
+ const isFragmented = thinkingParts.length > 5 && avgPartLength < 15;
331
+ // Join with space for fragmented, newlines for standard multi-paragraph thinking
332
+ const combinedThought = isFragmented
333
+ ? thinkingParts.join(' ')
334
+ : thinkingParts.join('\n\n');
335
+ this.getLogger().debug(() => `[OpenAIProvider] Extracted thinking from tags: ${combinedThought.length} chars`, { tagCount: thinkingParts.length, isFragmented, avgPartLength });
336
+ return {
337
+ type: 'thinking',
338
+ thought: combinedThought,
339
+ sourceField: 'think_tags',
340
+ isHidden: false,
341
+ };
342
+ }
343
+ /**
344
+ * Normalize tool name by stripping Kimi-K2 style prefixes.
345
+ *
346
+ * Handles malformed tool names where the model concatenates prefixes like
347
+ * "functions" or "call_functions" with the actual tool name:
348
+ * - "functionslist_directory" -> "list_directory"
349
+ * - "call_functionslist_directory6" -> "list_directory"
350
+ * - "call_functionsglob7" -> "glob"
351
+ */
352
+ normalizeToolName(name) {
353
+ let normalized = (name || '').trim();
354
+ // Strip Kimi-K2 style prefixes where model concatenates "functions" or "call_functions"
355
+ // with the actual tool name (e.g., "functionslist_directory" -> "list_directory")
356
+ // Pattern: (call_)?functions<actual_tool_name><optional_number>
357
+ const kimiPrefixMatch = /^(?:call_)?functions([a-z_]+[a-z])(\d*)$/i.exec(normalized);
358
+ if (kimiPrefixMatch) {
359
+ const originalName = normalized;
360
+ normalized = kimiPrefixMatch[1];
361
+ this.getLogger().debug(() => `[OpenAIProvider] Stripped Kimi-style prefix from tool name: "${originalName}" -> "${normalized}"`);
362
+ }
363
+ return normalized.toLowerCase();
364
+ }
365
+ /**
366
+ * Sanitize raw tool argument payloads before JSON parsing:
367
+ * - Remove thinking blocks (<think>...</think>, etc.).
368
+ * - Strip Markdown code fences (```json ... ```).
369
+ * - Try to isolate the main JSON object if wrapped in prose.
370
+ */
371
+ sanitizeToolArgumentsString(raw) {
372
+ if (raw === null || raw === undefined) {
373
+ return '{}';
374
+ }
375
+ let text;
376
+ if (typeof raw === 'string') {
377
+ text = raw;
378
+ }
379
+ else {
380
+ try {
381
+ text = JSON.stringify(raw);
382
+ }
383
+ catch {
384
+ text = String(raw);
385
+ }
386
+ }
387
+ text = text.trim();
388
+ // Strip fenced code blocks like ```json { ... } ```.
389
+ if (text.startsWith('```')) {
390
+ text = text.replace(/^```[a-zA-Z0-9_-]*\s*/m, '');
391
+ text = text.replace(/```$/m, '');
392
+ text = text.trim();
393
+ }
394
+ // Remove provider reasoning / thinking markup.
395
+ text = this.sanitizeProviderText(text);
396
+ // If provider wrapped JSON in explanation text, try to isolate the object.
397
+ const firstBrace = text.indexOf('{');
398
+ const lastBrace = text.lastIndexOf('}');
399
+ if (firstBrace !== -1 && lastBrace !== -1 && lastBrace > firstBrace) {
400
+ const candidate = text.slice(firstBrace, lastBrace + 1).trim();
401
+ if (candidate.startsWith('{') && candidate.endsWith('}')) {
402
+ return candidate;
403
+ }
404
+ }
405
+ return text.length ? text : '{}';
406
+ }
407
+ /**
408
+ * Parse Kimi-K2 `<|tool_calls_section_begin|> ... <|tool_calls_section_end|>`
409
+ * blocks out of a text string.
410
+ *
411
+ * - Returns cleanedText with the whole section removed.
412
+ * - Returns ToolCallBlock[] constructed from the section contents.
413
+ *
414
+ * This is used for HF/vLLM-style Kimi deployments where `tool_calls` is empty
415
+ * and all tool info is only encoded in the text template.
416
+ */
417
+ extractKimiToolCallsFromText(raw) {
418
+ if (!raw || !raw.includes('<|tool_calls_section_begin|>')) {
419
+ return { cleanedText: raw, toolCalls: [] };
420
+ }
421
+ const logger = this.getLogger();
422
+ const toolCalls = [];
423
+ let text = raw;
424
+ const sectionRegex = /<\|tool_calls_section_begin\|>([\s\S]*?)<\|tool_calls_section_end\|>/g;
425
+ text = text.replace(sectionRegex, (_sectionMatch, sectionBody) => {
426
+ try {
427
+ const callRegex = /<\|tool_call_begin\|>\s*([^<]+?)\s*<\|tool_call_argument_begin\|>\s*([\s\S]*?)\s*<\|tool_call_end\|>/g;
428
+ let m;
429
+ while ((m = callRegex.exec(sectionBody)) !== null) {
430
+ const rawId = m[1].trim();
431
+ const rawArgs = m[2].trim();
432
+ // Infer tool name from ID.
433
+ let toolName = '';
434
+ const match = /^functions\.([A-Za-z0-9_]+):\d+/i.exec(rawId) ||
435
+ /^[A-Za-z0-9_]+\.([A-Za-z0-9_]+):\d+/.exec(rawId);
436
+ if (match) {
437
+ toolName = match[1];
438
+ }
439
+ else {
440
+ const colonParts = rawId.split(':');
441
+ const head = colonParts[0] || rawId;
442
+ const dotParts = head.split('.');
443
+ toolName = dotParts[dotParts.length - 1] || head;
444
+ }
445
+ // Normalize tool name (handles Kimi-K2 style prefixes like call_functionsglob7)
446
+ toolName = this.normalizeToolName(toolName);
447
+ const sanitizedArgs = this.sanitizeToolArgumentsString(rawArgs);
448
+ const processedParameters = processToolParameters(sanitizedArgs, toolName);
449
+ toolCalls.push({
450
+ type: 'tool_call',
451
+ id: this.normalizeToHistoryToolId(rawId),
452
+ name: toolName,
453
+ parameters: processedParameters,
454
+ });
455
+ }
456
+ }
457
+ catch (err) {
458
+ logger.debug(() => `[OpenAIProvider] Failed to parse Kimi tool_calls_section: ${err}`);
459
+ }
460
+ // Strip the entire tool section from user-visible text
461
+ return '';
462
+ });
463
+ if (toolCalls.length > 0) {
464
+ logger.debug(() => `[OpenAIProvider] Parsed Kimi tool_calls_section`, {
465
+ toolCallCount: toolCalls.length,
466
+ originalLength: raw.length,
467
+ cleanedLength: text.length,
468
+ });
469
+ }
470
+ return { cleanedText: text.trim(), toolCalls };
471
+ }
221
472
  /**
222
473
  * @plan:PLAN-20251023-STATELESS-HARDENING.P09
223
474
  * @requirement:REQ-SP4-002
@@ -685,6 +936,94 @@ export class OpenAIProvider extends BaseProvider {
685
936
  // This ensures each tool message has a corresponding tool_calls in previous message
686
937
  return this.validateToolMessageSequence(messages);
687
938
  }
939
+ /**
940
+ * Build messages with optional reasoning_content based on settings.
941
+ *
942
+ * @plan PLAN-20251202-THINKING.P14
943
+ * @requirement REQ-THINK-004, REQ-THINK-006
944
+ */
945
+ buildMessagesWithReasoning(contents, options) {
946
+ // Read settings with defaults
947
+ const stripPolicy = options.settings.get('reasoning.stripFromContext') ??
948
+ 'none';
949
+ const includeInContext = options.settings.get('reasoning.includeInContext') ?? false;
950
+ // Apply strip policy first
951
+ const filteredContents = filterThinkingForContext(contents, stripPolicy);
952
+ const messages = [];
953
+ for (const content of filteredContents) {
954
+ if (content.speaker === 'human') {
955
+ // Convert human messages to user messages
956
+ const textBlocks = content.blocks.filter((b) => b.type === 'text');
957
+ const text = textBlocks.map((b) => b.text).join('\n');
958
+ if (text) {
959
+ messages.push({
960
+ role: 'user',
961
+ content: text,
962
+ });
963
+ }
964
+ }
965
+ else if (content.speaker === 'ai') {
966
+ // Convert AI messages with optional reasoning_content
967
+ const textBlocks = content.blocks.filter((b) => b.type === 'text');
968
+ const text = textBlocks.map((b) => b.text).join('\n');
969
+ const thinkingBlocks = extractThinkingBlocks(content);
970
+ const toolCalls = content.blocks.filter((b) => b.type === 'tool_call');
971
+ if (toolCalls.length > 0) {
972
+ // Assistant message with tool calls
973
+ const baseMessage = {
974
+ role: 'assistant',
975
+ content: text || null,
976
+ tool_calls: toolCalls.map((tc) => ({
977
+ id: this.normalizeToOpenAIToolId(tc.id),
978
+ type: 'function',
979
+ function: {
980
+ name: tc.name,
981
+ arguments: this.normalizeToolCallArguments(tc.parameters),
982
+ },
983
+ })),
984
+ };
985
+ if (includeInContext && thinkingBlocks.length > 0) {
986
+ const messageWithReasoning = baseMessage;
987
+ messageWithReasoning.reasoning_content =
988
+ thinkingToReasoningField(thinkingBlocks);
989
+ messages.push(messageWithReasoning);
990
+ }
991
+ else {
992
+ messages.push(baseMessage);
993
+ }
994
+ }
995
+ else if (textBlocks.length > 0 || thinkingBlocks.length > 0) {
996
+ // Plain assistant message
997
+ const baseMessage = {
998
+ role: 'assistant',
999
+ content: text,
1000
+ };
1001
+ if (includeInContext && thinkingBlocks.length > 0) {
1002
+ const messageWithReasoning = baseMessage;
1003
+ messageWithReasoning.reasoning_content =
1004
+ thinkingToReasoningField(thinkingBlocks);
1005
+ messages.push(messageWithReasoning);
1006
+ }
1007
+ else {
1008
+ messages.push(baseMessage);
1009
+ }
1010
+ }
1011
+ }
1012
+ else if (content.speaker === 'tool') {
1013
+ // Convert tool responses
1014
+ const toolResponses = content.blocks.filter((b) => b.type === 'tool_response');
1015
+ for (const tr of toolResponses) {
1016
+ messages.push({
1017
+ role: 'tool',
1018
+ content: this.buildToolResponseContent(tr, options.config),
1019
+ tool_call_id: this.normalizeToOpenAIToolId(tr.callId),
1020
+ });
1021
+ }
1022
+ }
1023
+ }
1024
+ // Validate tool message sequence to prevent API errors
1025
+ return this.validateToolMessageSequence(messages);
1026
+ }
688
1027
  /**
689
1028
  * Validates tool message sequence to ensure each tool message has a corresponding tool_calls
690
1029
  * This prevents "messages with role 'tool' must be a response to a preceeding message with 'tool_calls'" errors
@@ -698,6 +1037,18 @@ export class OpenAIProvider extends BaseProvider {
698
1037
  const logger = this.getLogger();
699
1038
  const validatedMessages = [...messages];
700
1039
  let removedCount = 0;
1040
+ // Debug: Log the full message sequence for tool call analysis
1041
+ logger.debug(() => `[OpenAIProvider] validateToolMessageSequence: analyzing ${messages.length} messages`, {
1042
+ messageRoles: messages.map((m) => m.role),
1043
+ toolCallIds: messages
1044
+ .filter((m) => m.role === 'assistant' &&
1045
+ 'tool_calls' in m &&
1046
+ Array.isArray(m.tool_calls))
1047
+ .flatMap((m) => m.tool_calls?.map((tc) => tc.id) ?? []),
1048
+ toolResponseIds: messages
1049
+ .filter((m) => m.role === 'tool')
1050
+ .map((m) => m.tool_call_id),
1051
+ });
701
1052
  // Check if there are any tool_calls in conversation
702
1053
  // If no tool_calls exist, this might be isolated tool response testing - skip validation
703
1054
  const hasToolCallsInConversation = validatedMessages.some((msg) => msg.role === 'assistant' &&
@@ -827,8 +1178,10 @@ export class OpenAIProvider extends BaseProvider {
827
1178
  });
828
1179
  }
829
1180
  // Convert IContent to OpenAI messages format
830
- const configForMessages = options.config ?? options.runtime?.config ?? this.globalConfig;
831
- const messages = this.convertToOpenAIMessages(contents, toolReplayMode, configForMessages);
1181
+ // Use buildMessagesWithReasoning for reasoning-aware message building
1182
+ const messages = toolReplayMode === 'native'
1183
+ ? this.buildMessagesWithReasoning(contents, options)
1184
+ : this.convertToOpenAIMessages(contents, toolReplayMode, options.config ?? options.runtime?.config ?? this.globalConfig);
832
1185
  if (logger.enabled && toolReplayMode !== 'native') {
833
1186
  logger.debug(() => `[OpenAIProvider] Using textual tool replay mode for model '${model}'`);
834
1187
  }
@@ -840,8 +1193,9 @@ export class OpenAIProvider extends BaseProvider {
840
1193
  detectedFormat,
841
1194
  provider: this.name,
842
1195
  });
843
- // Convert Gemini format tools to the detected format
844
- let formattedTools = toolFormatter.convertGeminiToFormat(tools, detectedFormat);
1196
+ // Convert Gemini format tools to OpenAI format using the schema converter
1197
+ // This ensures required fields are always present in tool schemas
1198
+ let formattedTools = convertToolsToOpenAI(tools);
845
1199
  // CRITICAL FIX: Ensure we never pass an empty tools array
846
1200
  // The OpenAI API errors when tools=[] but a tool call is attempted
847
1201
  if (Array.isArray(formattedTools) && formattedTools.length === 0) {
@@ -1106,16 +1460,37 @@ export class OpenAIProvider extends BaseProvider {
1106
1460
  // Buffer for accumulating text chunks for providers that need it
1107
1461
  let textBuffer = '';
1108
1462
  // Use the same detected format from earlier for consistency
1109
- // Buffer text for Qwen format providers to avoid stanza formatting
1110
- const shouldBufferText = detectedFormat === 'qwen';
1463
+ const isKimiModel = model.toLowerCase().includes('kimi-k2');
1464
+ // Buffer text for Qwen format providers and Kimi-K2 to avoid stanza formatting
1465
+ const shouldBufferText = detectedFormat === 'qwen' || isKimiModel;
1466
+ // Accumulate thinking content across the entire stream to emit as ONE block
1467
+ // This handles fragmented <think>word</think> streaming from Synthetic API
1468
+ // @plan PLAN-20251202-THINKING.P16
1469
+ let accumulatedThinkingContent = '';
1470
+ let hasEmittedThinking = false;
1471
+ // Accumulate reasoning_content from streaming deltas (legacy path)
1472
+ // Synthetic API sends reasoning token-by-token, so we accumulate to emit ONE block
1473
+ // @plan PLAN-20251202-THINKING.P16
1474
+ let accumulatedReasoningContent = '';
1111
1475
  // Track token usage from streaming chunks
1112
1476
  let streamingUsage = null;
1477
+ // Track total chunks for debugging empty responses
1478
+ let totalChunksReceived = 0;
1113
1479
  try {
1114
1480
  // Handle streaming response
1115
1481
  for await (const chunk of response) {
1482
+ totalChunksReceived++;
1116
1483
  if (abortSignal?.aborted) {
1117
1484
  break;
1118
1485
  }
1486
+ // Debug: Log first few chunks and every 10th chunk to understand stream behavior
1487
+ if (totalChunksReceived <= 3 || totalChunksReceived % 10 === 0) {
1488
+ logger.debug(() => `[Streaming legacy] Chunk #${totalChunksReceived} received`, {
1489
+ hasChoices: !!chunk.choices?.length,
1490
+ firstChoiceDelta: chunk.choices?.[0]?.delta,
1491
+ finishReason: chunk.choices?.[0]?.finish_reason,
1492
+ });
1493
+ }
1119
1494
  const chunkRecord = chunk;
1120
1495
  let parsedData;
1121
1496
  const rawData = chunkRecord?.data;
@@ -1152,6 +1527,14 @@ export class OpenAIProvider extends BaseProvider {
1152
1527
  const choice = chunk.choices?.[0];
1153
1528
  if (!choice)
1154
1529
  continue;
1530
+ // Parse reasoning_content from streaming delta (Phase 16 integration)
1531
+ // ACCUMULATE instead of yielding immediately to handle token-by-token streaming
1532
+ // @plan PLAN-20251202-THINKING.P16
1533
+ const reasoningBlock = this.parseStreamingReasoningDelta(choice.delta);
1534
+ if (reasoningBlock) {
1535
+ // Accumulate reasoning content - will emit ONE block later
1536
+ accumulatedReasoningContent += reasoningBlock.thought;
1537
+ }
1155
1538
  // Check for finish_reason to detect proper stream ending
1156
1539
  if (choice.finish_reason) {
1157
1540
  logger.debug(() => `[Streaming] Stream finished with reason: ${choice.finish_reason}`, {
@@ -1165,23 +1548,23 @@ export class OpenAIProvider extends BaseProvider {
1165
1548
  if (choice.finish_reason === 'length') {
1166
1549
  logger.debug(() => `Response truncated due to length limit for model ${model}`);
1167
1550
  }
1168
- // Flush any buffered text when stream finishes
1169
- if (textBuffer.length > 0) {
1170
- yield {
1171
- speaker: 'ai',
1172
- blocks: [
1173
- {
1174
- type: 'text',
1175
- text: textBuffer,
1176
- },
1177
- ],
1178
- };
1179
- textBuffer = '';
1180
- }
1551
+ // Don't flush buffer here on finish - let the final buffer handling
1552
+ // after the loop process it with proper sanitization and think tag extraction
1553
+ // This was causing unsanitized <think> tags to leak into output (legacy path)
1554
+ // @plan PLAN-20251202-THINKING.P16
1181
1555
  }
1182
1556
  // Handle text content - buffer for Qwen format, emit immediately for others
1183
- const deltaContent = choice.delta?.content;
1184
- if (deltaContent) {
1557
+ // Note: Synthetic API sends content that may duplicate reasoning_content.
1558
+ // This is the model's behavior - we don't filter it here.
1559
+ // @plan PLAN-20251202-THINKING.P16
1560
+ const rawDeltaContent = this.coerceMessageContentToString(choice.delta?.content);
1561
+ if (rawDeltaContent) {
1562
+ const deltaContent = isKimiModel
1563
+ ? rawDeltaContent
1564
+ : this.sanitizeProviderText(rawDeltaContent);
1565
+ if (!deltaContent) {
1566
+ continue;
1567
+ }
1185
1568
  _accumulatedText += deltaContent;
1186
1569
  // Debug log for providers that need buffering
1187
1570
  if (shouldBufferText) {
@@ -1194,22 +1577,102 @@ export class OpenAIProvider extends BaseProvider {
1194
1577
  });
1195
1578
  // Buffer text to avoid stanza formatting
1196
1579
  textBuffer += deltaContent;
1580
+ const hasKimiBegin = textBuffer.includes('<|tool_calls_section_begin|>');
1581
+ const hasKimiEnd = textBuffer.includes('<|tool_calls_section_end|>');
1582
+ const hasOpenKimiSection = hasKimiBegin && !hasKimiEnd;
1197
1583
  // Emit buffered text when we have a complete sentence or paragraph
1198
- // Look for natural break points
1199
- if (textBuffer.includes('\n') ||
1200
- textBuffer.endsWith('. ') ||
1201
- textBuffer.endsWith('! ') ||
1202
- textBuffer.endsWith('? ') ||
1203
- textBuffer.length > 100) {
1204
- yield {
1205
- speaker: 'ai',
1206
- blocks: [
1207
- {
1208
- type: 'text',
1209
- text: textBuffer,
1210
- },
1211
- ],
1212
- };
1584
+ // Look for natural break points, but avoid flushing mid Kimi section
1585
+ if (!hasOpenKimiSection &&
1586
+ (textBuffer.includes('\n') ||
1587
+ textBuffer.endsWith('. ') ||
1588
+ textBuffer.endsWith('! ') ||
1589
+ textBuffer.endsWith('? ') ||
1590
+ textBuffer.length > 100)) {
1591
+ const parsedToolCalls = [];
1592
+ let workingText = textBuffer;
1593
+ // Extract <think> tags and ACCUMULATE instead of emitting immediately (legacy path)
1594
+ // This handles fragmented <think>word</think> streaming from Synthetic API
1595
+ // @plan PLAN-20251202-THINKING.P16
1596
+ // @requirement REQ-THINK-003
1597
+ const tagBasedThinking = this.extractThinkTagsAsBlock(workingText);
1598
+ if (tagBasedThinking) {
1599
+ // Accumulate thinking content - don't emit yet
1600
+ if (accumulatedThinkingContent.length > 0) {
1601
+ accumulatedThinkingContent += ' ';
1602
+ }
1603
+ accumulatedThinkingContent += tagBasedThinking.thought;
1604
+ logger.debug(() => `[Streaming legacy] Accumulated thinking: ${accumulatedThinkingContent.length} chars total`);
1605
+ }
1606
+ const kimiParsed = this.extractKimiToolCallsFromText(workingText);
1607
+ if (kimiParsed.toolCalls.length > 0) {
1608
+ parsedToolCalls.push(...kimiParsed.toolCalls);
1609
+ logger.debug(() => `[OpenAIProvider] Streaming buffer (legacy) parsed Kimi tool calls`, {
1610
+ count: kimiParsed.toolCalls.length,
1611
+ bufferLength: workingText.length,
1612
+ cleanedLength: kimiParsed.cleanedText.length,
1613
+ });
1614
+ }
1615
+ workingText = kimiParsed.cleanedText;
1616
+ const parsingText = this.sanitizeProviderText(workingText);
1617
+ let cleanedText = parsingText;
1618
+ try {
1619
+ const parsedResult = this.textToolParser.parse(parsingText);
1620
+ if (parsedResult.toolCalls.length > 0) {
1621
+ parsedToolCalls.push(...parsedResult.toolCalls.map((call) => ({
1622
+ type: 'tool_call',
1623
+ id: `text_tool_${Date.now()}_${Math.random()
1624
+ .toString(36)
1625
+ .substring(7)}`,
1626
+ name: this.normalizeToolName(call.name),
1627
+ parameters: call.arguments,
1628
+ })));
1629
+ cleanedText = parsedResult.cleanedContent;
1630
+ }
1631
+ }
1632
+ catch (error) {
1633
+ const logger = this.getLogger();
1634
+ logger.debug(() => `TextToolCallParser failed on buffered text: ${error}`);
1635
+ }
1636
+ // Emit accumulated thinking BEFORE tool calls or text content (legacy path)
1637
+ // This ensures thinking appears first in the response
1638
+ // @plan PLAN-20251202-THINKING.P16
1639
+ if (!hasEmittedThinking &&
1640
+ accumulatedThinkingContent.length > 0 &&
1641
+ (parsedToolCalls.length > 0 || cleanedText.trim().length > 0)) {
1642
+ yield {
1643
+ speaker: 'ai',
1644
+ blocks: [
1645
+ {
1646
+ type: 'thinking',
1647
+ thought: accumulatedThinkingContent,
1648
+ sourceField: 'think_tags',
1649
+ isHidden: false,
1650
+ },
1651
+ ],
1652
+ };
1653
+ hasEmittedThinking = true;
1654
+ logger.debug(() => `[Streaming legacy] Emitted accumulated thinking: ${accumulatedThinkingContent.length} chars`);
1655
+ }
1656
+ if (parsedToolCalls.length > 0) {
1657
+ yield {
1658
+ speaker: 'ai',
1659
+ blocks: parsedToolCalls,
1660
+ };
1661
+ }
1662
+ // Always use sanitized text to strip <think> tags (legacy streaming)
1663
+ // Bug fix: Previously Kimi used unsanitized workingText
1664
+ // @plan PLAN-20251202-THINKING.P16
1665
+ if (cleanedText.trim().length > 0) {
1666
+ yield {
1667
+ speaker: 'ai',
1668
+ blocks: [
1669
+ {
1670
+ type: 'text',
1671
+ text: cleanedText,
1672
+ },
1673
+ ],
1674
+ };
1675
+ }
1213
1676
  textBuffer = '';
1214
1677
  }
1215
1678
  }
@@ -1308,19 +1771,44 @@ export class OpenAIProvider extends BaseProvider {
1308
1771
  }
1309
1772
  // Check buffered text for <tool_call> format before flushing as plain text
1310
1773
  if (textBuffer.length > 0) {
1311
- // Try to parse <tool_call> format from buffered text
1312
- let parsedToolCalls = [];
1313
- let cleanedText = textBuffer;
1774
+ const parsedToolCalls = [];
1775
+ let workingText = textBuffer;
1776
+ // Note: Synthetic API sends reasoning via both reasoning_content AND content fields.
1777
+ // This is the model's behavior - we don't strip it since the model is the source.
1778
+ // The user can configure reasoning display settings if they don't want duplicates.
1779
+ // @plan PLAN-20251202-THINKING.P16
1780
+ // Extract any remaining <think> tags from final buffer (legacy path)
1781
+ // @plan PLAN-20251202-THINKING.P16
1782
+ const tagBasedThinking = this.extractThinkTagsAsBlock(workingText);
1783
+ if (tagBasedThinking) {
1784
+ if (accumulatedThinkingContent.length > 0) {
1785
+ accumulatedThinkingContent += ' ';
1786
+ }
1787
+ accumulatedThinkingContent += tagBasedThinking.thought;
1788
+ }
1789
+ const kimiParsed = this.extractKimiToolCallsFromText(workingText);
1790
+ if (kimiParsed.toolCalls.length > 0) {
1791
+ parsedToolCalls.push(...kimiParsed.toolCalls);
1792
+ this.getLogger().debug(() => `[OpenAIProvider] Final buffer flush (legacy) parsed Kimi tool calls`, {
1793
+ count: kimiParsed.toolCalls.length,
1794
+ bufferLength: workingText.length,
1795
+ cleanedLength: kimiParsed.cleanedText.length,
1796
+ });
1797
+ }
1798
+ workingText = kimiParsed.cleanedText;
1799
+ const parsingText = this.sanitizeProviderText(workingText);
1800
+ let cleanedText = parsingText;
1314
1801
  try {
1315
- const parsedResult = this.textToolParser.parse(textBuffer);
1802
+ const parsedResult = this.textToolParser.parse(parsingText);
1316
1803
  if (parsedResult.toolCalls.length > 0) {
1317
- // Convert parsed tool calls to ToolCallBlock format
1318
- parsedToolCalls = parsedResult.toolCalls.map((call) => ({
1804
+ parsedToolCalls.push(...parsedResult.toolCalls.map((call) => ({
1319
1805
  type: 'tool_call',
1320
- id: `text_tool_${Date.now()}_${Math.random().toString(36).substring(7)}`,
1321
- name: call.name,
1806
+ id: `text_tool_${Date.now()}_${Math.random()
1807
+ .toString(36)
1808
+ .substring(7)}`,
1809
+ name: this.normalizeToolName(call.name),
1322
1810
  parameters: call.arguments,
1323
- }));
1811
+ })));
1324
1812
  cleanedText = parsedResult.cleanedContent;
1325
1813
  }
1326
1814
  }
@@ -1328,14 +1816,33 @@ export class OpenAIProvider extends BaseProvider {
1328
1816
  const logger = this.getLogger();
1329
1817
  logger.debug(() => `TextToolCallParser failed on buffered text: ${error}`);
1330
1818
  }
1331
- // Emit tool calls from text parsing first
1819
+ // Emit accumulated thinking BEFORE tool calls or text content (legacy path)
1820
+ // @plan PLAN-20251202-THINKING.P16
1821
+ if (!hasEmittedThinking &&
1822
+ accumulatedThinkingContent.length > 0 &&
1823
+ (parsedToolCalls.length > 0 || cleanedText.trim().length > 0)) {
1824
+ yield {
1825
+ speaker: 'ai',
1826
+ blocks: [
1827
+ {
1828
+ type: 'thinking',
1829
+ thought: accumulatedThinkingContent,
1830
+ sourceField: 'think_tags',
1831
+ isHidden: false,
1832
+ },
1833
+ ],
1834
+ };
1835
+ hasEmittedThinking = true;
1836
+ }
1332
1837
  if (parsedToolCalls.length > 0) {
1333
1838
  yield {
1334
1839
  speaker: 'ai',
1335
1840
  blocks: parsedToolCalls,
1336
1841
  };
1337
1842
  }
1338
- // Then emit any remaining cleaned text
1843
+ // Always use sanitized text to strip <think> tags (legacy final buffer)
1844
+ // Bug fix: Previously Kimi used unsanitized workingText
1845
+ // @plan PLAN-20251202-THINKING.P16
1339
1846
  if (cleanedText.trim().length > 0) {
1340
1847
  yield {
1341
1848
  speaker: 'ai',
@@ -1349,18 +1856,54 @@ export class OpenAIProvider extends BaseProvider {
1349
1856
  }
1350
1857
  textBuffer = '';
1351
1858
  }
1859
+ // Emit any remaining accumulated thinking that wasn't emitted yet (legacy path)
1860
+ // (e.g., if entire response was just thinking with no content)
1861
+ // @plan PLAN-20251202-THINKING.P16
1862
+ if (!hasEmittedThinking && accumulatedThinkingContent.length > 0) {
1863
+ yield {
1864
+ speaker: 'ai',
1865
+ blocks: [
1866
+ {
1867
+ type: 'thinking',
1868
+ thought: accumulatedThinkingContent,
1869
+ sourceField: 'think_tags',
1870
+ isHidden: false,
1871
+ },
1872
+ ],
1873
+ };
1874
+ hasEmittedThinking = true;
1875
+ }
1876
+ // Emit accumulated reasoning_content as ONE ThinkingBlock (legacy path)
1877
+ // This consolidates token-by-token reasoning from Synthetic API into a single block
1878
+ // @plan PLAN-20251202-THINKING.P16
1879
+ if (accumulatedReasoningContent.length > 0) {
1880
+ yield {
1881
+ speaker: 'ai',
1882
+ blocks: [
1883
+ {
1884
+ type: 'thinking',
1885
+ thought: accumulatedReasoningContent,
1886
+ sourceField: 'reasoning_content',
1887
+ isHidden: false,
1888
+ },
1889
+ ],
1890
+ };
1891
+ }
1352
1892
  // Process and emit tool calls using legacy accumulated approach
1353
1893
  if (accumulatedToolCalls.length > 0) {
1354
1894
  const blocks = [];
1355
1895
  for (const tc of accumulatedToolCalls) {
1356
1896
  if (!tc)
1357
1897
  continue;
1898
+ const sanitizedArgs = this.sanitizeToolArgumentsString(tc.function.arguments);
1899
+ // Normalize tool name (handles Kimi-K2 style prefixes)
1900
+ const normalizedName = this.normalizeToolName(tc.function.name || '');
1358
1901
  // Process tool parameters with double-escape handling
1359
- const processedParameters = processToolParameters(tc.function.arguments || '', tc.function.name || '');
1902
+ const processedParameters = processToolParameters(sanitizedArgs, normalizedName);
1360
1903
  blocks.push({
1361
1904
  type: 'tool_call',
1362
1905
  id: this.normalizeToHistoryToolId(tc.id),
1363
- name: tc.function.name || '',
1906
+ name: normalizedName,
1364
1907
  parameters: processedParameters,
1365
1908
  });
1366
1909
  }
@@ -1400,6 +1943,40 @@ export class OpenAIProvider extends BaseProvider {
1400
1943
  },
1401
1944
  };
1402
1945
  }
1946
+ // Detect and warn about empty streaming responses (common with Kimi K2 after tool calls)
1947
+ // Only warn if we truly got nothing - not even reasoning content
1948
+ if (_accumulatedText.length === 0 &&
1949
+ accumulatedToolCalls.length === 0 &&
1950
+ textBuffer.length === 0 &&
1951
+ accumulatedReasoningContent.length === 0 &&
1952
+ accumulatedThinkingContent.length === 0) {
1953
+ // Provide actionable guidance for users
1954
+ const isKimi = model.toLowerCase().includes('kimi');
1955
+ const isSynthetic = (baseURL ?? this.getBaseURL())?.includes('synthetic') ?? false;
1956
+ const troubleshooting = isKimi
1957
+ ? isSynthetic
1958
+ ? ' To fix: use streaming: "disabled" in your profile settings. Synthetic API streaming does not work reliably with tool calls.'
1959
+ : ' This provider may not support streaming with tool calls.'
1960
+ : ' Consider using streaming: "disabled" in your profile settings.';
1961
+ logger.warn(() => `[OpenAIProvider] Empty streaming response for model '${model}' (received ${totalChunksReceived} chunks with no content).${troubleshooting}`, {
1962
+ model,
1963
+ baseURL: baseURL ?? this.getBaseURL(),
1964
+ isKimiModel: isKimi,
1965
+ isSyntheticAPI: isSynthetic,
1966
+ totalChunksReceived,
1967
+ });
1968
+ }
1969
+ else {
1970
+ // Log what we DID get for debugging
1971
+ logger.debug(() => `[Streaming legacy] Stream completed with accumulated content`, {
1972
+ textLength: _accumulatedText.length,
1973
+ toolCallCount: accumulatedToolCalls.length,
1974
+ textBufferLength: textBuffer.length,
1975
+ reasoningLength: accumulatedReasoningContent.length,
1976
+ thinkingLength: accumulatedThinkingContent.length,
1977
+ totalChunksReceived,
1978
+ });
1979
+ }
1403
1980
  }
1404
1981
  else {
1405
1982
  // Handle non-streaming response
@@ -1425,22 +2002,57 @@ export class OpenAIProvider extends BaseProvider {
1425
2002
  }
1426
2003
  }
1427
2004
  const blocks = [];
1428
- // Handle text content
1429
- if (choice.message?.content) {
1430
- blocks.push({
1431
- type: 'text',
1432
- text: choice.message.content,
1433
- });
2005
+ // Parse reasoning_content from response (Phase 16 integration)
2006
+ const reasoningBlock = this.parseNonStreamingReasoning(choice.message);
2007
+ logger.debug(() => `[Non-streaming] parseNonStreamingReasoning result: ${reasoningBlock ? `found (${reasoningBlock.thought?.length} chars)` : 'not found'}`, {
2008
+ hasReasoningContent: 'reasoning_content' in
2009
+ (choice.message ?? {}),
2010
+ messageKeys: Object.keys(choice.message ?? {}),
2011
+ });
2012
+ if (reasoningBlock) {
2013
+ blocks.push(reasoningBlock);
2014
+ }
2015
+ // Handle text content (strip thinking / reasoning blocks) and Kimi tool sections
2016
+ const rawMessageContent = this.coerceMessageContentToString(choice.message?.content);
2017
+ let kimiCleanContent;
2018
+ let kimiToolBlocks = [];
2019
+ if (rawMessageContent) {
2020
+ // Extract <think> tags as ThinkingBlock BEFORE stripping them
2021
+ // Only do this if we didn't already get reasoning from reasoning_content field
2022
+ // @plan PLAN-20251202-THINKING.P16
2023
+ // @requirement REQ-THINK-003
2024
+ if (!reasoningBlock) {
2025
+ const tagBasedThinking = this.extractThinkTagsAsBlock(rawMessageContent);
2026
+ if (tagBasedThinking) {
2027
+ blocks.push(tagBasedThinking);
2028
+ logger.debug(() => `[Non-streaming] Extracted thinking from <think> tags: ${tagBasedThinking.thought.length} chars`);
2029
+ }
2030
+ }
2031
+ const kimiParsed = this.extractKimiToolCallsFromText(rawMessageContent);
2032
+ kimiCleanContent = kimiParsed.cleanedText;
2033
+ kimiToolBlocks = kimiParsed.toolCalls;
2034
+ // Always sanitize text content to remove <think> tags
2035
+ // Bug fix: Previously Kimi-K2 used unsanitized kimiCleanContent,
2036
+ // which caused <think> tags to leak into visible output
2037
+ // @plan PLAN-20251202-THINKING.P16
2038
+ const cleanedText = this.sanitizeProviderText(kimiCleanContent);
2039
+ if (cleanedText) {
2040
+ blocks.push({
2041
+ type: 'text',
2042
+ text: cleanedText,
2043
+ });
2044
+ }
1434
2045
  }
1435
2046
  // Handle tool calls
1436
2047
  if (choice.message?.tool_calls && choice.message.tool_calls.length > 0) {
1437
2048
  // Use the same detected format from earlier for consistency
1438
2049
  for (const toolCall of choice.message.tool_calls) {
1439
2050
  if (toolCall.type === 'function') {
1440
- // Use tool name directly without normalization for legacy compatibility
1441
- const toolName = toolCall.function.name || '';
2051
+ // Normalize tool name (handles Kimi-K2 style prefixes)
2052
+ const toolName = this.normalizeToolName(toolCall.function.name || '');
2053
+ const sanitizedArgs = this.sanitizeToolArgumentsString(toolCall.function.arguments);
1442
2054
  // Process tool parameters with double-escape handling
1443
- const processedParameters = processToolParameters(toolCall.function.arguments || '', toolName);
2055
+ const processedParameters = processToolParameters(sanitizedArgs, toolName);
1444
2056
  blocks.push({
1445
2057
  type: 'tool_call',
1446
2058
  id: this.normalizeToHistoryToolId(toolCall.id),
@@ -1450,42 +2062,49 @@ export class OpenAIProvider extends BaseProvider {
1450
2062
  }
1451
2063
  }
1452
2064
  }
2065
+ // Add any tool calls parsed from Kimi inline sections
2066
+ if (kimiToolBlocks.length > 0) {
2067
+ blocks.push(...kimiToolBlocks);
2068
+ this.getLogger().debug(() => `[OpenAIProvider] Non-stream legacy added Kimi tool calls from text`, { count: kimiToolBlocks.length });
2069
+ }
1453
2070
  // Additionally check for <tool_call> format in text content
1454
- if (choice.message?.content &&
1455
- typeof choice.message.content === 'string') {
1456
- try {
1457
- const parsedResult = this.textToolParser.parse(choice.message.content);
1458
- if (parsedResult.toolCalls.length > 0) {
1459
- // Add tool calls found in text content
1460
- for (const call of parsedResult.toolCalls) {
1461
- blocks.push({
1462
- type: 'tool_call',
1463
- id: `text_tool_${Date.now()}_${Math.random().toString(36).substring(7)}`,
1464
- name: call.name,
1465
- parameters: call.arguments,
1466
- });
1467
- }
1468
- // Update the text content to remove the tool call parts
1469
- if (choice.message.content !== parsedResult.cleanedContent) {
1470
- // Find the text block and update it
1471
- const textBlockIndex = blocks.findIndex((block) => block.type === 'text');
1472
- if (textBlockIndex >= 0) {
1473
- blocks[textBlockIndex].text =
1474
- parsedResult.cleanedContent;
1475
- }
1476
- else if (parsedResult.cleanedContent.trim()) {
1477
- // Add cleaned text if it doesn't exist
1478
- blocks.unshift({
1479
- type: 'text',
1480
- text: parsedResult.cleanedContent,
2071
+ if (kimiCleanContent) {
2072
+ const cleanedSource = this.sanitizeProviderText(kimiCleanContent);
2073
+ if (cleanedSource) {
2074
+ try {
2075
+ const parsedResult = this.textToolParser.parse(cleanedSource);
2076
+ if (parsedResult.toolCalls.length > 0) {
2077
+ // Add tool calls found in text content
2078
+ for (const call of parsedResult.toolCalls) {
2079
+ blocks.push({
2080
+ type: 'tool_call',
2081
+ id: `text_tool_${Date.now()}_${Math.random().toString(36).substring(7)}`,
2082
+ name: this.normalizeToolName(call.name),
2083
+ parameters: call.arguments,
1481
2084
  });
1482
2085
  }
2086
+ // Update the text content to remove the tool call parts
2087
+ if (choice.message.content !== parsedResult.cleanedContent) {
2088
+ // Find the text block and update it
2089
+ const textBlockIndex = blocks.findIndex((block) => block.type === 'text');
2090
+ if (textBlockIndex >= 0) {
2091
+ blocks[textBlockIndex].text =
2092
+ parsedResult.cleanedContent;
2093
+ }
2094
+ else if (parsedResult.cleanedContent.trim()) {
2095
+ // Add cleaned text if it doesn't exist
2096
+ blocks.unshift({
2097
+ type: 'text',
2098
+ text: parsedResult.cleanedContent,
2099
+ });
2100
+ }
2101
+ }
1483
2102
  }
1484
2103
  }
1485
- }
1486
- catch (error) {
1487
- const logger = this.getLogger();
1488
- logger.debug(() => `TextToolCallParser failed on message content: ${error}`);
2104
+ catch (error) {
2105
+ const logger = this.getLogger();
2106
+ logger.debug(() => `TextToolCallParser failed on message content: ${error}`);
2107
+ }
1489
2108
  }
1490
2109
  }
1491
2110
  // Emit the complete response as a single IContent
@@ -1614,8 +2233,10 @@ export class OpenAIProvider extends BaseProvider {
1614
2233
  // Determine tool replay mode for model compatibility (e.g., polaris-alpha)
1615
2234
  const toolReplayMode = this.determineToolReplayMode(model);
1616
2235
  // Convert IContent to OpenAI messages format
1617
- const configForMessages = options.config ?? options.runtime?.config ?? this.globalConfig;
1618
- const messages = this.convertToOpenAIMessages(contents, toolReplayMode, configForMessages);
2236
+ // Use buildMessagesWithReasoning for reasoning-aware message building
2237
+ const messages = toolReplayMode === 'native'
2238
+ ? this.buildMessagesWithReasoning(contents, options)
2239
+ : this.convertToOpenAIMessages(contents, toolReplayMode, options.config ?? options.runtime?.config ?? this.globalConfig);
1619
2240
  // Log tool replay mode usage for debugging
1620
2241
  if (logger.enabled && toolReplayMode !== 'native') {
1621
2242
  logger.debug(() => `[OpenAIProvider] Using textual tool replay mode for model '${model}'`);
@@ -1628,8 +2249,9 @@ export class OpenAIProvider extends BaseProvider {
1628
2249
  detectedFormat,
1629
2250
  provider: this.name,
1630
2251
  });
1631
- // Convert Gemini format tools to the detected format
1632
- let formattedTools = toolFormatter.convertGeminiToFormat(tools, detectedFormat);
2252
+ // Convert Gemini format tools to OpenAI format using the schema converter
2253
+ // This ensures required fields are always present in tool schemas
2254
+ let formattedTools = convertToolsToOpenAI(tools);
1633
2255
  // CRITICAL FIX: Ensure we never pass an empty tools array
1634
2256
  // The OpenAI API errors when tools=[] but a tool call is attempted
1635
2257
  if (Array.isArray(formattedTools) && formattedTools.length === 0) {
@@ -1905,8 +2527,18 @@ export class OpenAIProvider extends BaseProvider {
1905
2527
  // Buffer for accumulating text chunks for providers that need it
1906
2528
  let textBuffer = '';
1907
2529
  // Use the same detected format from earlier for consistency
1908
- // Buffer text for Qwen format providers to avoid stanza formatting
1909
- const shouldBufferText = detectedFormat === 'qwen';
2530
+ const isKimiModel = model.toLowerCase().includes('kimi-k2');
2531
+ // Buffer text for Qwen format providers and Kimi-K2 to avoid stanza formatting
2532
+ const shouldBufferText = detectedFormat === 'qwen' || isKimiModel;
2533
+ // Accumulate thinking content across the entire stream to emit as ONE block
2534
+ // This handles fragmented <think>word</think> streaming from Synthetic API
2535
+ // @plan PLAN-20251202-THINKING.P16
2536
+ let accumulatedThinkingContent = '';
2537
+ let hasEmittedThinking = false;
2538
+ // Accumulate reasoning_content from streaming deltas (pipeline path)
2539
+ // Synthetic API sends reasoning token-by-token, so we accumulate to emit ONE block
2540
+ // @plan PLAN-20251202-THINKING.P16
2541
+ let accumulatedReasoningContent = '';
1910
2542
  // Track token usage from streaming chunks
1911
2543
  let streamingUsage = null;
1912
2544
  const allChunks = []; // Collect all chunks first
@@ -1918,6 +2550,11 @@ export class OpenAIProvider extends BaseProvider {
1918
2550
  }
1919
2551
  allChunks.push(chunk);
1920
2552
  }
2553
+ // Debug: Log how many chunks were received
2554
+ logger.debug(() => `[Streaming pipeline] Collected ${allChunks.length} chunks from stream`, {
2555
+ firstChunkDelta: allChunks[0]?.choices?.[0]?.delta,
2556
+ lastChunkFinishReason: allChunks[allChunks.length - 1]?.choices?.[0]?.finish_reason,
2557
+ });
1921
2558
  // Now process all collected chunks
1922
2559
  for (const chunk of allChunks) {
1923
2560
  // Check for cancellation during chunk processing
@@ -1960,6 +2597,15 @@ export class OpenAIProvider extends BaseProvider {
1960
2597
  const choice = chunk.choices?.[0];
1961
2598
  if (!choice)
1962
2599
  continue;
2600
+ // Parse reasoning_content from streaming delta (Pipeline path)
2601
+ // ACCUMULATE instead of yielding immediately to handle token-by-token streaming
2602
+ // @plan PLAN-20251202-THINKING.P16
2603
+ // @requirement REQ-THINK-003.1
2604
+ const reasoningBlock = this.parseStreamingReasoningDelta(choice.delta);
2605
+ if (reasoningBlock) {
2606
+ // Accumulate reasoning content - will emit ONE block later
2607
+ accumulatedReasoningContent += reasoningBlock.thought;
2608
+ }
1963
2609
  // Check for finish_reason to detect proper stream ending
1964
2610
  if (choice.finish_reason) {
1965
2611
  logger.debug(() => `[Streaming] Stream finished with reason: ${choice.finish_reason}`, {
@@ -1973,23 +2619,23 @@ export class OpenAIProvider extends BaseProvider {
1973
2619
  if (choice.finish_reason === 'length') {
1974
2620
  logger.debug(() => `Response truncated due to length limit for model ${model}`);
1975
2621
  }
1976
- // Flush any buffered text when stream finishes
1977
- if (textBuffer.length > 0) {
1978
- yield {
1979
- speaker: 'ai',
1980
- blocks: [
1981
- {
1982
- type: 'text',
1983
- text: textBuffer,
1984
- },
1985
- ],
1986
- };
1987
- textBuffer = '';
1988
- }
2622
+ // Don't flush buffer here on finish - let the final buffer handling
2623
+ // after the loop process it with proper sanitization and think tag extraction
2624
+ // This was causing unsanitized <think> tags to leak into output (pipeline path)
2625
+ // @plan PLAN-20251202-THINKING.P16
1989
2626
  }
1990
2627
  // Handle text content - buffer for Qwen format, emit immediately for others
1991
- const deltaContent = choice.delta?.content;
1992
- if (deltaContent) {
2628
+ // Note: Synthetic API sends content that may duplicate reasoning_content.
2629
+ // This is the model's behavior - we don't filter it here.
2630
+ // @plan PLAN-20251202-THINKING.P16
2631
+ const rawDeltaContent = this.coerceMessageContentToString(choice.delta?.content);
2632
+ if (rawDeltaContent) {
2633
+ const deltaContent = isKimiModel
2634
+ ? rawDeltaContent
2635
+ : this.sanitizeProviderText(rawDeltaContent);
2636
+ if (!deltaContent) {
2637
+ continue;
2638
+ }
1993
2639
  _accumulatedText += deltaContent;
1994
2640
  // Debug log for providers that need buffering
1995
2641
  if (shouldBufferText) {
@@ -2002,22 +2648,102 @@ export class OpenAIProvider extends BaseProvider {
2002
2648
  });
2003
2649
  // Buffer text to avoid stanza formatting
2004
2650
  textBuffer += deltaContent;
2651
+ const hasKimiBegin = textBuffer.includes('<|tool_calls_section_begin|>');
2652
+ const hasKimiEnd = textBuffer.includes('<|tool_calls_section_end|>');
2653
+ const hasOpenKimiSection = hasKimiBegin && !hasKimiEnd;
2005
2654
  // Emit buffered text when we have a complete sentence or paragraph
2006
- // Look for natural break points
2007
- if (textBuffer.includes('\n') ||
2008
- textBuffer.endsWith('. ') ||
2009
- textBuffer.endsWith('! ') ||
2010
- textBuffer.endsWith('? ') ||
2011
- textBuffer.length > 100) {
2012
- yield {
2013
- speaker: 'ai',
2014
- blocks: [
2015
- {
2016
- type: 'text',
2017
- text: textBuffer,
2018
- },
2019
- ],
2020
- };
2655
+ // Look for natural break points, avoiding flush mid Kimi section
2656
+ if (!hasOpenKimiSection &&
2657
+ (textBuffer.includes('\n') ||
2658
+ textBuffer.endsWith('. ') ||
2659
+ textBuffer.endsWith('! ') ||
2660
+ textBuffer.endsWith('? ') ||
2661
+ textBuffer.length > 100)) {
2662
+ const parsedToolCalls = [];
2663
+ let workingText = textBuffer;
2664
+ // Extract <think> tags and ACCUMULATE instead of emitting immediately
2665
+ // This handles fragmented <think>word</think> streaming from Synthetic API
2666
+ // @plan PLAN-20251202-THINKING.P16
2667
+ // @requirement REQ-THINK-003
2668
+ const tagBasedThinking = this.extractThinkTagsAsBlock(workingText);
2669
+ if (tagBasedThinking) {
2670
+ // Accumulate thinking content - don't emit yet
2671
+ if (accumulatedThinkingContent.length > 0) {
2672
+ accumulatedThinkingContent += ' ';
2673
+ }
2674
+ accumulatedThinkingContent += tagBasedThinking.thought;
2675
+ logger.debug(() => `[Streaming] Accumulated thinking: ${accumulatedThinkingContent.length} chars total`);
2676
+ }
2677
+ const kimiParsed = this.extractKimiToolCallsFromText(workingText);
2678
+ if (kimiParsed.toolCalls.length > 0) {
2679
+ parsedToolCalls.push(...kimiParsed.toolCalls);
2680
+ logger.debug(() => `[OpenAIProvider] Streaming buffer (pipeline) parsed Kimi tool calls`, {
2681
+ count: kimiParsed.toolCalls.length,
2682
+ bufferLength: workingText.length,
2683
+ cleanedLength: kimiParsed.cleanedText.length,
2684
+ });
2685
+ }
2686
+ workingText = kimiParsed.cleanedText;
2687
+ const parsingText = this.sanitizeProviderText(workingText);
2688
+ let cleanedText = parsingText;
2689
+ try {
2690
+ const parsedResult = this.textToolParser.parse(parsingText);
2691
+ if (parsedResult.toolCalls.length > 0) {
2692
+ parsedToolCalls.push(...parsedResult.toolCalls.map((call) => ({
2693
+ type: 'tool_call',
2694
+ id: `text_tool_${Date.now()}_${Math.random()
2695
+ .toString(36)
2696
+ .substring(7)}`,
2697
+ name: this.normalizeToolName(call.name),
2698
+ parameters: call.arguments,
2699
+ })));
2700
+ cleanedText = parsedResult.cleanedContent;
2701
+ }
2702
+ }
2703
+ catch (error) {
2704
+ const logger = this.getLogger();
2705
+ logger.debug(() => `TextToolCallParser failed on buffered text: ${error}`);
2706
+ }
2707
+ // Emit accumulated thinking BEFORE tool calls or text content
2708
+ // This ensures thinking appears first in the response
2709
+ // @plan PLAN-20251202-THINKING.P16
2710
+ if (!hasEmittedThinking &&
2711
+ accumulatedThinkingContent.length > 0 &&
2712
+ (parsedToolCalls.length > 0 || cleanedText.trim().length > 0)) {
2713
+ yield {
2714
+ speaker: 'ai',
2715
+ blocks: [
2716
+ {
2717
+ type: 'thinking',
2718
+ thought: accumulatedThinkingContent,
2719
+ sourceField: 'think_tags',
2720
+ isHidden: false,
2721
+ },
2722
+ ],
2723
+ };
2724
+ hasEmittedThinking = true;
2725
+ logger.debug(() => `[Streaming pipeline] Emitted accumulated thinking: ${accumulatedThinkingContent.length} chars`);
2726
+ }
2727
+ if (parsedToolCalls.length > 0) {
2728
+ yield {
2729
+ speaker: 'ai',
2730
+ blocks: parsedToolCalls,
2731
+ };
2732
+ }
2733
+ // Always use sanitized text to strip <think> tags (pipeline streaming)
2734
+ // Bug fix: Previously Kimi used unsanitized workingText
2735
+ // @plan PLAN-20251202-THINKING.P16
2736
+ if (cleanedText.trim().length > 0) {
2737
+ yield {
2738
+ speaker: 'ai',
2739
+ blocks: [
2740
+ {
2741
+ type: 'text',
2742
+ text: cleanedText,
2743
+ },
2744
+ ],
2745
+ };
2746
+ }
2021
2747
  textBuffer = '';
2022
2748
  }
2023
2749
  }
@@ -2097,19 +2823,44 @@ export class OpenAIProvider extends BaseProvider {
2097
2823
  }
2098
2824
  // Check buffered text for <tool_call> format before flushing as plain text
2099
2825
  if (textBuffer.length > 0) {
2100
- // Try to parse <tool_call> format from buffered text
2101
- let parsedToolCalls = [];
2102
- let cleanedText = textBuffer;
2826
+ const parsedToolCalls = [];
2827
+ let workingText = textBuffer;
2828
+ // Note: Synthetic API sends reasoning via both reasoning_content AND content fields.
2829
+ // This is the model's behavior - we don't strip it since the model is the source.
2830
+ // The user can configure reasoning display settings if they don't want duplicates.
2831
+ // @plan PLAN-20251202-THINKING.P16
2832
+ // Extract any remaining <think> tags from final buffer
2833
+ // @plan PLAN-20251202-THINKING.P16
2834
+ const tagBasedThinking = this.extractThinkTagsAsBlock(workingText);
2835
+ if (tagBasedThinking) {
2836
+ if (accumulatedThinkingContent.length > 0) {
2837
+ accumulatedThinkingContent += ' ';
2838
+ }
2839
+ accumulatedThinkingContent += tagBasedThinking.thought;
2840
+ }
2841
+ const kimiParsed = this.extractKimiToolCallsFromText(workingText);
2842
+ if (kimiParsed.toolCalls.length > 0) {
2843
+ parsedToolCalls.push(...kimiParsed.toolCalls);
2844
+ this.getLogger().debug(() => `[OpenAIProvider] Final buffer flush (pipeline) parsed Kimi tool calls`, {
2845
+ count: kimiParsed.toolCalls.length,
2846
+ bufferLength: workingText.length,
2847
+ cleanedLength: kimiParsed.cleanedText.length,
2848
+ });
2849
+ }
2850
+ workingText = kimiParsed.cleanedText;
2851
+ const parsingText = this.sanitizeProviderText(workingText);
2852
+ let cleanedText = parsingText;
2103
2853
  try {
2104
- const parsedResult = this.textToolParser.parse(textBuffer);
2854
+ const parsedResult = this.textToolParser.parse(parsingText);
2105
2855
  if (parsedResult.toolCalls.length > 0) {
2106
- // Convert parsed tool calls to ToolCallBlock format
2107
- parsedToolCalls = parsedResult.toolCalls.map((call) => ({
2856
+ parsedToolCalls.push(...parsedResult.toolCalls.map((call) => ({
2108
2857
  type: 'tool_call',
2109
- id: `text_tool_${Date.now()}_${Math.random().toString(36).substring(7)}`,
2110
- name: call.name,
2858
+ id: `text_tool_${Date.now()}_${Math.random()
2859
+ .toString(36)
2860
+ .substring(7)}`,
2861
+ name: this.normalizeToolName(call.name),
2111
2862
  parameters: call.arguments,
2112
- }));
2863
+ })));
2113
2864
  cleanedText = parsedResult.cleanedContent;
2114
2865
  }
2115
2866
  }
@@ -2117,14 +2868,33 @@ export class OpenAIProvider extends BaseProvider {
2117
2868
  const logger = this.getLogger();
2118
2869
  logger.debug(() => `TextToolCallParser failed on buffered text: ${error}`);
2119
2870
  }
2120
- // Emit tool calls from text parsing first
2871
+ // Emit accumulated thinking BEFORE tool calls or text content
2872
+ // @plan PLAN-20251202-THINKING.P16
2873
+ if (!hasEmittedThinking &&
2874
+ accumulatedThinkingContent.length > 0 &&
2875
+ (parsedToolCalls.length > 0 || cleanedText.trim().length > 0)) {
2876
+ yield {
2877
+ speaker: 'ai',
2878
+ blocks: [
2879
+ {
2880
+ type: 'thinking',
2881
+ thought: accumulatedThinkingContent,
2882
+ sourceField: 'think_tags',
2883
+ isHidden: false,
2884
+ },
2885
+ ],
2886
+ };
2887
+ hasEmittedThinking = true;
2888
+ }
2121
2889
  if (parsedToolCalls.length > 0) {
2122
2890
  yield {
2123
2891
  speaker: 'ai',
2124
2892
  blocks: parsedToolCalls,
2125
2893
  };
2126
2894
  }
2127
- // Then emit any remaining cleaned text
2895
+ // Always use sanitized text to strip <think> tags (pipeline final buffer)
2896
+ // Bug fix: Previously Kimi used unsanitized workingText
2897
+ // @plan PLAN-20251202-THINKING.P16
2128
2898
  if (cleanedText.trim().length > 0) {
2129
2899
  yield {
2130
2900
  speaker: 'ai',
@@ -2138,6 +2908,39 @@ export class OpenAIProvider extends BaseProvider {
2138
2908
  }
2139
2909
  textBuffer = '';
2140
2910
  }
2911
+ // Emit any remaining accumulated thinking that wasn't emitted yet
2912
+ // (e.g., if entire response was just thinking with no content)
2913
+ // @plan PLAN-20251202-THINKING.P16
2914
+ if (!hasEmittedThinking && accumulatedThinkingContent.length > 0) {
2915
+ yield {
2916
+ speaker: 'ai',
2917
+ blocks: [
2918
+ {
2919
+ type: 'thinking',
2920
+ thought: accumulatedThinkingContent,
2921
+ sourceField: 'think_tags',
2922
+ isHidden: false,
2923
+ },
2924
+ ],
2925
+ };
2926
+ hasEmittedThinking = true;
2927
+ }
2928
+ // Emit accumulated reasoning_content as ONE ThinkingBlock (pipeline path)
2929
+ // This consolidates token-by-token reasoning from Synthetic API into a single block
2930
+ // @plan PLAN-20251202-THINKING.P16
2931
+ if (accumulatedReasoningContent.length > 0) {
2932
+ yield {
2933
+ speaker: 'ai',
2934
+ blocks: [
2935
+ {
2936
+ type: 'thinking',
2937
+ thought: accumulatedReasoningContent,
2938
+ sourceField: 'reasoning_content',
2939
+ isHidden: false,
2940
+ },
2941
+ ],
2942
+ };
2943
+ }
2141
2944
  // Process and emit tool calls using the pipeline
2142
2945
  const pipelineResult = await this.toolCallPipeline.process(abortSignal);
2143
2946
  if (pipelineResult.normalized.length > 0 ||
@@ -2145,8 +2948,9 @@ export class OpenAIProvider extends BaseProvider {
2145
2948
  const blocks = [];
2146
2949
  // Process successful tool calls
2147
2950
  for (const normalizedCall of pipelineResult.normalized) {
2951
+ const sanitizedArgs = this.sanitizeToolArgumentsString(normalizedCall.originalArgs ?? normalizedCall.args);
2148
2952
  // Process tool parameters with double-escape handling
2149
- const processedParameters = processToolParameters(normalizedCall.originalArgs || JSON.stringify(normalizedCall.args), normalizedCall.name);
2953
+ const processedParameters = processToolParameters(sanitizedArgs, normalizedCall.name);
2150
2954
  blocks.push({
2151
2955
  type: 'tool_call',
2152
2956
  id: this.normalizeToHistoryToolId(`call_${normalizedCall.index}`),
@@ -2195,6 +2999,41 @@ export class OpenAIProvider extends BaseProvider {
2195
2999
  },
2196
3000
  };
2197
3001
  }
3002
+ // Detect and warn about empty streaming responses (common with Kimi K2 after tool calls)
3003
+ // Only warn if we truly got nothing - not even reasoning content
3004
+ const pipelineStats = this.toolCallPipeline.getStats();
3005
+ if (_accumulatedText.length === 0 &&
3006
+ pipelineStats.collector.totalCalls === 0 &&
3007
+ textBuffer.length === 0 &&
3008
+ accumulatedReasoningContent.length === 0 &&
3009
+ accumulatedThinkingContent.length === 0) {
3010
+ // Provide actionable guidance for users
3011
+ const isKimi = model.toLowerCase().includes('kimi');
3012
+ const isSynthetic = (baseURL ?? this.getBaseURL())?.includes('synthetic') ?? false;
3013
+ const troubleshooting = isKimi
3014
+ ? isSynthetic
3015
+ ? ' To fix: use streaming: "disabled" in your profile settings. Synthetic API streaming does not work reliably with tool calls.'
3016
+ : ' This provider may not support streaming with tool calls.'
3017
+ : ' Consider using streaming: "disabled" in your profile settings.';
3018
+ logger.warn(() => `[OpenAIProvider] Empty streaming response for model '${model}' (received ${allChunks.length} chunks with no content).${troubleshooting}`, {
3019
+ model,
3020
+ baseURL: baseURL ?? this.getBaseURL(),
3021
+ isKimiModel: isKimi,
3022
+ isSyntheticAPI: isSynthetic,
3023
+ totalChunksReceived: allChunks.length,
3024
+ });
3025
+ }
3026
+ else {
3027
+ // Log what we DID get for debugging
3028
+ logger.debug(() => `[Streaming pipeline] Stream completed with accumulated content`, {
3029
+ textLength: _accumulatedText.length,
3030
+ toolCallCount: pipelineStats.collector.totalCalls,
3031
+ textBufferLength: textBuffer.length,
3032
+ reasoningLength: accumulatedReasoningContent.length,
3033
+ thinkingLength: accumulatedThinkingContent.length,
3034
+ totalChunksReceived: allChunks.length,
3035
+ });
3036
+ }
2198
3037
  }
2199
3038
  else {
2200
3039
  // Handle non-streaming response
@@ -2220,12 +3059,22 @@ export class OpenAIProvider extends BaseProvider {
2220
3059
  }
2221
3060
  }
2222
3061
  const blocks = [];
2223
- // Handle text content
2224
- if (choice.message?.content) {
2225
- blocks.push({
2226
- type: 'text',
2227
- text: choice.message.content,
2228
- });
3062
+ // Handle text content (strip thinking / reasoning blocks) and Kimi tool sections
3063
+ const pipelineRawMessageContent = this.coerceMessageContentToString(choice.message?.content);
3064
+ let pipelineKimiCleanContent;
3065
+ let pipelineKimiToolBlocks = [];
3066
+ if (pipelineRawMessageContent) {
3067
+ const kimiParsed = this.extractKimiToolCallsFromText(pipelineRawMessageContent);
3068
+ pipelineKimiCleanContent = kimiParsed.cleanedText;
3069
+ pipelineKimiToolBlocks = kimiParsed.toolCalls;
3070
+ // Always use sanitized text - even Kimi-K2 should have consistent tag stripping
3071
+ const cleanedText = this.sanitizeProviderText(pipelineKimiCleanContent);
3072
+ if (cleanedText) {
3073
+ blocks.push({
3074
+ type: 'text',
3075
+ text: cleanedText,
3076
+ });
3077
+ }
2229
3078
  }
2230
3079
  // Handle tool calls
2231
3080
  if (choice.message?.tool_calls && choice.message.tool_calls.length > 0) {
@@ -2234,8 +3083,9 @@ export class OpenAIProvider extends BaseProvider {
2234
3083
  if (toolCall.type === 'function') {
2235
3084
  // Normalize tool name for consistency with streaming path
2236
3085
  const normalizedName = this.toolCallPipeline.normalizeToolName(toolCall.function.name, toolCall.function.arguments);
3086
+ const sanitizedArgs = this.sanitizeToolArgumentsString(toolCall.function.arguments);
2237
3087
  // Process tool parameters with double-escape handling
2238
- const processedParameters = processToolParameters(toolCall.function.arguments || '', normalizedName);
3088
+ const processedParameters = processToolParameters(sanitizedArgs, normalizedName);
2239
3089
  blocks.push({
2240
3090
  type: 'tool_call',
2241
3091
  id: this.normalizeToHistoryToolId(toolCall.id),
@@ -2245,42 +3095,48 @@ export class OpenAIProvider extends BaseProvider {
2245
3095
  }
2246
3096
  }
2247
3097
  }
3098
+ if (pipelineKimiToolBlocks.length > 0) {
3099
+ blocks.push(...pipelineKimiToolBlocks);
3100
+ this.getLogger().debug(() => `[OpenAIProvider] Non-stream pipeline added Kimi tool calls from text`, { count: pipelineKimiToolBlocks.length });
3101
+ }
2248
3102
  // Additionally check for <tool_call> format in text content
2249
- if (choice.message?.content &&
2250
- typeof choice.message.content === 'string') {
2251
- try {
2252
- const parsedResult = this.textToolParser.parse(choice.message.content);
2253
- if (parsedResult.toolCalls.length > 0) {
2254
- // Add tool calls found in text content
2255
- for (const call of parsedResult.toolCalls) {
2256
- blocks.push({
2257
- type: 'tool_call',
2258
- id: `text_tool_${Date.now()}_${Math.random().toString(36).substring(7)}`,
2259
- name: call.name,
2260
- parameters: call.arguments,
2261
- });
2262
- }
2263
- // Update the text content to remove the tool call parts
2264
- if (choice.message.content !== parsedResult.cleanedContent) {
2265
- // Find the text block and update it
2266
- const textBlockIndex = blocks.findIndex((block) => block.type === 'text');
2267
- if (textBlockIndex >= 0) {
2268
- blocks[textBlockIndex].text =
2269
- parsedResult.cleanedContent;
2270
- }
2271
- else if (parsedResult.cleanedContent.trim()) {
2272
- // Add cleaned text if it doesn't exist
2273
- blocks.unshift({
2274
- type: 'text',
2275
- text: parsedResult.cleanedContent,
3103
+ if (pipelineKimiCleanContent) {
3104
+ const cleanedSource = this.sanitizeProviderText(pipelineKimiCleanContent);
3105
+ if (cleanedSource) {
3106
+ try {
3107
+ const parsedResult = this.textToolParser.parse(cleanedSource);
3108
+ if (parsedResult.toolCalls.length > 0) {
3109
+ // Add tool calls found in text content
3110
+ for (const call of parsedResult.toolCalls) {
3111
+ blocks.push({
3112
+ type: 'tool_call',
3113
+ id: `text_tool_${Date.now()}_${Math.random().toString(36).substring(7)}`,
3114
+ name: this.normalizeToolName(call.name),
3115
+ parameters: call.arguments,
2276
3116
  });
2277
3117
  }
3118
+ // Update the text content to remove the tool call parts
3119
+ if (choice.message.content !== parsedResult.cleanedContent) {
3120
+ // Find the text block and update it
3121
+ const textBlockIndex = blocks.findIndex((block) => block.type === 'text');
3122
+ if (textBlockIndex >= 0) {
3123
+ blocks[textBlockIndex].text =
3124
+ parsedResult.cleanedContent;
3125
+ }
3126
+ else if (parsedResult.cleanedContent.trim()) {
3127
+ // Add cleaned text if it doesn't exist
3128
+ blocks.unshift({
3129
+ type: 'text',
3130
+ text: parsedResult.cleanedContent,
3131
+ });
3132
+ }
3133
+ }
2278
3134
  }
2279
3135
  }
2280
- }
2281
- catch (error) {
2282
- const logger = this.getLogger();
2283
- logger.debug(() => `TextToolCallParser failed on message content: ${error}`);
3136
+ catch (error) {
3137
+ const logger = this.getLogger();
3138
+ logger.debug(() => `TextToolCallParser failed on message content: ${error}`);
3139
+ }
2284
3140
  }
2285
3141
  }
2286
3142
  // Emit the complete response as a single IContent
@@ -2421,5 +3277,61 @@ export class OpenAIProvider extends BaseProvider {
2421
3277
  }
2422
3278
  return shouldRetry;
2423
3279
  }
3280
+ /**
3281
+ * Parse reasoning_content from streaming delta.
3282
+ *
3283
+ * @plan PLAN-20251202-THINKING.P11, PLAN-20251202-THINKING.P16
3284
+ * @requirement REQ-THINK-003.1, REQ-THINK-003.3, REQ-THINK-003.4
3285
+ */
3286
+ parseStreamingReasoningDelta(delta) {
3287
+ if (!delta) {
3288
+ return null;
3289
+ }
3290
+ // Access reasoning_content via type assertion since OpenAI SDK doesn't declare it
3291
+ const reasoningContent = delta
3292
+ .reasoning_content;
3293
+ // Handle absent, null, or non-string
3294
+ if (!reasoningContent || typeof reasoningContent !== 'string') {
3295
+ return null;
3296
+ }
3297
+ // Handle empty string or whitespace-only
3298
+ if (reasoningContent.trim().length === 0) {
3299
+ return null;
3300
+ }
3301
+ return {
3302
+ type: 'thinking',
3303
+ thought: reasoningContent,
3304
+ sourceField: 'reasoning_content',
3305
+ isHidden: false,
3306
+ };
3307
+ }
3308
+ /**
3309
+ * Parse reasoning_content from non-streaming message.
3310
+ *
3311
+ * @plan PLAN-20251202-THINKING.P11, PLAN-20251202-THINKING.P16
3312
+ * @requirement REQ-THINK-003.2, REQ-THINK-003.3, REQ-THINK-003.4
3313
+ */
3314
+ parseNonStreamingReasoning(message) {
3315
+ if (!message) {
3316
+ return null;
3317
+ }
3318
+ // Access reasoning_content via type assertion since OpenAI SDK doesn't declare it
3319
+ const reasoningContent = message
3320
+ .reasoning_content;
3321
+ // Handle absent, null, or non-string
3322
+ if (!reasoningContent || typeof reasoningContent !== 'string') {
3323
+ return null;
3324
+ }
3325
+ // Handle empty string or whitespace-only
3326
+ if (reasoningContent.trim().length === 0) {
3327
+ return null;
3328
+ }
3329
+ return {
3330
+ type: 'thinking',
3331
+ thought: reasoningContent,
3332
+ sourceField: 'reasoning_content',
3333
+ isHidden: false,
3334
+ };
3335
+ }
2424
3336
  }
2425
3337
  //# sourceMappingURL=OpenAIProvider.js.map