@vybestack/llxprt-code-core 0.6.2 → 0.7.0-nightly.251206.43b97dbf4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. package/dist/src/auth/precedence.js +9 -10
  2. package/dist/src/auth/precedence.js.map +1 -1
  3. package/dist/src/auth/types.d.ts +6 -6
  4. package/dist/src/core/geminiChat.d.ts +8 -0
  5. package/dist/src/core/geminiChat.js +63 -5
  6. package/dist/src/core/geminiChat.js.map +1 -1
  7. package/dist/src/core/turn.js +12 -8
  8. package/dist/src/core/turn.js.map +1 -1
  9. package/dist/src/ide/ide-client.js +4 -2
  10. package/dist/src/ide/ide-client.js.map +1 -1
  11. package/dist/src/index.d.ts +1 -0
  12. package/dist/src/index.js +1 -0
  13. package/dist/src/index.js.map +1 -1
  14. package/dist/src/parsers/TextToolCallParser.d.ts +0 -15
  15. package/dist/src/parsers/TextToolCallParser.js +21 -5
  16. package/dist/src/parsers/TextToolCallParser.js.map +1 -1
  17. package/dist/src/providers/BaseProvider.d.ts +3 -0
  18. package/dist/src/providers/BaseProvider.js +11 -0
  19. package/dist/src/providers/BaseProvider.js.map +1 -1
  20. package/dist/src/providers/IProvider.d.ts +3 -0
  21. package/dist/src/providers/ProviderManager.js +6 -0
  22. package/dist/src/providers/ProviderManager.js.map +1 -1
  23. package/dist/src/providers/anthropic/AnthropicProvider.d.ts +0 -1
  24. package/dist/src/providers/anthropic/AnthropicProvider.js +233 -22
  25. package/dist/src/providers/anthropic/AnthropicProvider.js.map +1 -1
  26. package/dist/src/providers/anthropic/schemaConverter.d.ts +63 -0
  27. package/dist/src/providers/anthropic/schemaConverter.js +189 -0
  28. package/dist/src/providers/anthropic/schemaConverter.js.map +1 -0
  29. package/dist/src/providers/gemini/GeminiProvider.js +108 -11
  30. package/dist/src/providers/gemini/GeminiProvider.js.map +1 -1
  31. package/dist/src/providers/gemini/thoughtSignatures.d.ts +51 -0
  32. package/dist/src/providers/gemini/thoughtSignatures.js +189 -0
  33. package/dist/src/providers/gemini/thoughtSignatures.js.map +1 -0
  34. package/dist/src/providers/openai/OpenAIProvider.d.ts +78 -1
  35. package/dist/src/providers/openai/OpenAIProvider.js +1159 -190
  36. package/dist/src/providers/openai/OpenAIProvider.js.map +1 -1
  37. package/dist/src/providers/openai/ToolCallNormalizer.d.ts +6 -0
  38. package/dist/src/providers/openai/ToolCallNormalizer.js +16 -2
  39. package/dist/src/providers/openai/ToolCallNormalizer.js.map +1 -1
  40. package/dist/src/providers/openai/schemaConverter.d.ts +67 -0
  41. package/dist/src/providers/openai/schemaConverter.js +191 -0
  42. package/dist/src/providers/openai/schemaConverter.js.map +1 -0
  43. package/dist/src/providers/openai-responses/OpenAIResponsesProvider.d.ts +0 -4
  44. package/dist/src/providers/openai-responses/OpenAIResponsesProvider.js +3 -75
  45. package/dist/src/providers/openai-responses/OpenAIResponsesProvider.js.map +1 -1
  46. package/dist/src/providers/openai-responses/schemaConverter.d.ts +65 -0
  47. package/dist/src/providers/openai-responses/schemaConverter.js +195 -0
  48. package/dist/src/providers/openai-responses/schemaConverter.js.map +1 -0
  49. package/dist/src/providers/openai-vercel/OpenAIVercelProvider.d.ts +146 -0
  50. package/dist/src/providers/openai-vercel/OpenAIVercelProvider.js +1177 -0
  51. package/dist/src/providers/openai-vercel/OpenAIVercelProvider.js.map +1 -0
  52. package/dist/src/providers/openai-vercel/errors.d.ts +46 -0
  53. package/dist/src/providers/openai-vercel/errors.js +137 -0
  54. package/dist/src/providers/openai-vercel/errors.js.map +1 -0
  55. package/dist/src/providers/openai-vercel/index.d.ts +22 -0
  56. package/dist/src/providers/openai-vercel/index.js +23 -0
  57. package/dist/src/providers/openai-vercel/index.js.map +1 -0
  58. package/dist/src/providers/openai-vercel/messageConversion.d.ts +36 -0
  59. package/dist/src/providers/openai-vercel/messageConversion.js +410 -0
  60. package/dist/src/providers/openai-vercel/messageConversion.js.map +1 -0
  61. package/dist/src/providers/openai-vercel/schemaConverter.d.ts +66 -0
  62. package/dist/src/providers/openai-vercel/schemaConverter.js +191 -0
  63. package/dist/src/providers/openai-vercel/schemaConverter.js.map +1 -0
  64. package/dist/src/providers/openai-vercel/toolIdUtils.d.ts +33 -0
  65. package/dist/src/providers/openai-vercel/toolIdUtils.js +117 -0
  66. package/dist/src/providers/openai-vercel/toolIdUtils.js.map +1 -0
  67. package/dist/src/providers/reasoning/reasoningUtils.d.ts +43 -0
  68. package/dist/src/providers/reasoning/reasoningUtils.js +92 -0
  69. package/dist/src/providers/reasoning/reasoningUtils.js.map +1 -0
  70. package/dist/src/providers/utils/localEndpoint.js +6 -2
  71. package/dist/src/providers/utils/localEndpoint.js.map +1 -1
  72. package/dist/src/runtime/AgentRuntimeContext.d.ts +27 -0
  73. package/dist/src/runtime/AgentRuntimeContext.js.map +1 -1
  74. package/dist/src/runtime/createAgentRuntimeContext.js +27 -1
  75. package/dist/src/runtime/createAgentRuntimeContext.js.map +1 -1
  76. package/dist/src/services/history/IContent.d.ts +6 -0
  77. package/dist/src/services/history/IContent.js.map +1 -1
  78. package/dist/src/settings/types.d.ts +1 -1
  79. package/dist/src/tools/IToolFormatter.d.ts +1 -1
  80. package/dist/src/tools/ToolFormatter.js +14 -2
  81. package/dist/src/tools/ToolFormatter.js.map +1 -1
  82. package/dist/src/tools/ToolIdStrategy.d.ts +72 -0
  83. package/dist/src/tools/ToolIdStrategy.js +107 -0
  84. package/dist/src/tools/ToolIdStrategy.js.map +1 -0
  85. package/dist/src/tools/todo-schemas.d.ts +4 -4
  86. package/dist/src/utils/filesearch/ignore.js +3 -2
  87. package/dist/src/utils/filesearch/ignore.js.map +1 -1
  88. package/dist/src/utils/gitIgnoreParser.js +2 -1
  89. package/dist/src/utils/gitIgnoreParser.js.map +1 -1
  90. package/dist/src/utils/schemaValidator.js +41 -6
  91. package/dist/src/utils/schemaValidator.js.map +1 -1
  92. package/package.json +3 -1
@@ -22,9 +22,11 @@ import crypto from 'node:crypto';
22
22
  import * as http from 'http';
23
23
  import * as https from 'https';
24
24
  import * as net from 'net';
25
+ import { isKimiModel, getToolIdStrategy, } from '../../tools/ToolIdStrategy.js';
25
26
  import { BaseProvider, } from '../BaseProvider.js';
26
27
  import { DebugLogger } from '../../debug/index.js';
27
28
  import { ToolFormatter } from '../../tools/ToolFormatter.js';
29
+ import { convertToolsToOpenAI } from './schemaConverter.js';
28
30
  import { GemmaToolCallParser } from '../../parsers/TextToolCallParser.js';
29
31
  import { processToolParameters } from '../../tools/doubleEscapeUtils.js';
30
32
  import { getCoreSystemPromptAsync } from '../../core/prompts.js';
@@ -36,6 +38,7 @@ import { ensureJsonSafe } from '../../utils/unicodeUtils.js';
36
38
  import { ToolCallPipeline } from './ToolCallPipeline.js';
37
39
  import { buildToolResponsePayload, EMPTY_TOOL_RESULT_PLACEHOLDER, } from '../utils/toolResponsePayload.js';
38
40
  import { isLocalEndpoint } from '../utils/localEndpoint.js';
41
+ import { filterThinkingForContext, thinkingToReasoningField, extractThinkingBlocks, } from '../reasoning/reasoningUtils.js';
39
42
  const MAX_TOOL_RESPONSE_CHARS = 1024;
40
43
  const MAX_TOOL_RESPONSE_RETRY_CHARS = 512;
41
44
  const TOOL_ARGS_PREVIEW_LENGTH = 500;
@@ -218,6 +221,278 @@ export class OpenAIProvider extends BaseProvider {
218
221
  }
219
222
  return new OpenAI(clientOptions);
220
223
  }
224
+ /**
225
+ * Coerce provider "content" (which may be a string or an array-of-parts)
226
+ * into a plain string. Defensive for OpenAI-compatible providers that emit
227
+ * structured content blocks.
228
+ */
229
+ coerceMessageContentToString(content) {
230
+ if (typeof content === 'string') {
231
+ return content;
232
+ }
233
+ if (Array.isArray(content)) {
234
+ const parts = [];
235
+ for (const part of content) {
236
+ if (!part)
237
+ continue;
238
+ if (typeof part === 'string') {
239
+ parts.push(part);
240
+ }
241
+ else if (typeof part === 'object' &&
242
+ part !== null &&
243
+ 'text' in part &&
244
+ typeof part.text === 'string') {
245
+ parts.push(part.text);
246
+ }
247
+ }
248
+ return parts.length ? parts.join('') : undefined;
249
+ }
250
+ return undefined;
251
+ }
252
+ /**
253
+ * Strip provider-specific "thinking" / reasoning markup from visible text.
254
+ * This prevents DeepSeek / Kimi-style <think> blocks from leaking into
255
+ * user-visible output or tool arguments.
256
+ */
257
+ sanitizeProviderText(text) {
258
+ if (text === null || text === undefined) {
259
+ return '';
260
+ }
261
+ const logger = this.getLogger();
262
+ let str = typeof text === 'string' ? text : String(text);
263
+ const beforeLen = str.length;
264
+ const hadReasoningTags = /<(?:think|thinking|analysis)>|<\/(?:think|thinking|analysis)>/i.test(str);
265
+ // DeepSeek / generic <think>...</think> blocks.
266
+ // Replace with a single space to preserve word spacing when tags appear mid-sentence.
267
+ // This prevents "these<think>...</think>5" from becoming "these5" instead of "these 5".
268
+ // Multiple consecutive spaces will be collapsed below.
269
+ str = str.replace(/<think>[\s\S]*?<\/think>/gi, ' ');
270
+ // Alternative reasoning tags some providers use.
271
+ str = str.replace(/<thinking>[\s\S]*?<\/thinking>/gi, ' ');
272
+ str = str.replace(/<analysis>[\s\S]*?<\/analysis>/gi, ' ');
273
+ // Clean up stray unmatched tags - replace with space to preserve word separation.
274
+ str = str.replace(/<\/?(?:think|thinking|analysis)>/gi, ' ');
275
+ // Only clean up whitespace if we had reasoning tags to strip
276
+ // This preserves meaningful whitespace in regular text chunks during streaming
277
+ // (e.g., " 5 Biggest" should remain " 5 Biggest", not become "5 Biggest")
278
+ if (hadReasoningTags) {
279
+ // Clean up multiple consecutive spaces/whitespace that may result from stripping
280
+ str = str.replace(/[ \t]+/g, ' ');
281
+ str = str.replace(/\n{3,}/g, '\n\n');
282
+ // Only trim leading whitespace when think tags were at the beginning
283
+ // This prevents leading spaces from "<think>...</think>text" -> " text"
284
+ // but preserves trailing whitespace for streaming chunk concatenation
285
+ str = str.trimStart();
286
+ }
287
+ const afterLen = str.length;
288
+ if (hadReasoningTags && afterLen !== beforeLen) {
289
+ logger.debug(() => `[OpenAIProvider] Stripped reasoning tags`, {
290
+ beforeLen,
291
+ afterLen,
292
+ });
293
+ }
294
+ return str;
295
+ }
296
+ /**
297
+ * Extract thinking content from <think>, <thinking>, or <analysis> tags
298
+ * and return it as a ThinkingBlock. Returns null if no thinking tags found.
299
+ *
300
+ * This must be called BEFORE sanitizeProviderText which strips these tags.
301
+ *
302
+ * Handles two formats:
303
+ * 1. Standard: <think>Full thinking paragraph here...</think>
304
+ * 2. Fragmented (Synthetic API): <think>word</think><think>word</think>...
305
+ *
306
+ * For fragmented format, joins with spaces. For standard, joins with newlines.
307
+ *
308
+ * @plan PLAN-20251202-THINKING.P16
309
+ * @requirement REQ-THINK-003
310
+ */
311
+ extractThinkTagsAsBlock(text) {
312
+ if (!text) {
313
+ return null;
314
+ }
315
+ // Collect all thinking content from various tag formats
316
+ // Note: We only trim leading/trailing whitespace from each part, not internal newlines
317
+ // This preserves formatting like numbered lists within thinking content
318
+ const thinkingParts = [];
319
+ // Match <think>...</think>
320
+ const thinkMatches = text.matchAll(/<think>([\s\S]*?)<\/think>/gi);
321
+ for (const match of thinkMatches) {
322
+ const content = match[1];
323
+ if (content?.trim()) {
324
+ // Preserve internal newlines but remove leading/trailing whitespace
325
+ thinkingParts.push(content.trim());
326
+ }
327
+ }
328
+ // Match <thinking>...</thinking>
329
+ const thinkingMatches = text.matchAll(/<thinking>([\s\S]*?)<\/thinking>/gi);
330
+ for (const match of thinkingMatches) {
331
+ const content = match[1];
332
+ if (content?.trim()) {
333
+ thinkingParts.push(content.trim());
334
+ }
335
+ }
336
+ // Match <analysis>...</analysis>
337
+ const analysisMatches = text.matchAll(/<analysis>([\s\S]*?)<\/analysis>/gi);
338
+ for (const match of analysisMatches) {
339
+ const content = match[1];
340
+ if (content?.trim()) {
341
+ thinkingParts.push(content.trim());
342
+ }
343
+ }
344
+ if (thinkingParts.length === 0) {
345
+ return null;
346
+ }
347
+ // Detect fragmented format: many short parts (likely token-by-token streaming)
348
+ // If average part length is very short (< 10 chars) and we have many parts,
349
+ // it's likely fragmented and should be joined with spaces
350
+ const avgPartLength = thinkingParts.reduce((sum, p) => sum + p.length, 0) /
351
+ thinkingParts.length;
352
+ const isFragmented = thinkingParts.length > 5 && avgPartLength < 15;
353
+ // Join with space for fragmented, newlines for standard multi-paragraph thinking
354
+ const combinedThought = isFragmented
355
+ ? thinkingParts.join(' ')
356
+ : thinkingParts.join('\n\n');
357
+ this.getLogger().debug(() => `[OpenAIProvider] Extracted thinking from tags: ${combinedThought.length} chars`, { tagCount: thinkingParts.length, isFragmented, avgPartLength });
358
+ return {
359
+ type: 'thinking',
360
+ thought: combinedThought,
361
+ sourceField: 'think_tags',
362
+ isHidden: false,
363
+ };
364
+ }
365
+ /**
366
+ * Normalize tool name by stripping Kimi-K2 style prefixes.
367
+ *
368
+ * Handles malformed tool names where the model concatenates prefixes like
369
+ * "functions" or "call_functions" with the actual tool name:
370
+ * - "functionslist_directory" -> "list_directory"
371
+ * - "call_functionslist_directory6" -> "list_directory"
372
+ * - "call_functionsglob7" -> "glob"
373
+ */
374
+ normalizeToolName(name) {
375
+ let normalized = (name || '').trim();
376
+ // Strip Kimi-K2 style prefixes where model concatenates "functions" or "call_functions"
377
+ // with the actual tool name (e.g., "functionslist_directory" -> "list_directory")
378
+ // Pattern: (call_)?functions<actual_tool_name><optional_number>
379
+ const kimiPrefixMatch = /^(?:call_)?functions([a-z_]+[a-z])(\d*)$/i.exec(normalized);
380
+ if (kimiPrefixMatch) {
381
+ const originalName = normalized;
382
+ normalized = kimiPrefixMatch[1];
383
+ this.getLogger().debug(() => `[OpenAIProvider] Stripped Kimi-style prefix from tool name: "${originalName}" -> "${normalized}"`);
384
+ }
385
+ return normalized.toLowerCase();
386
+ }
387
+ /**
388
+ * Sanitize raw tool argument payloads before JSON parsing:
389
+ * - Remove thinking blocks (<think>...</think>, etc.).
390
+ * - Strip Markdown code fences (```json ... ```).
391
+ * - Try to isolate the main JSON object if wrapped in prose.
392
+ */
393
+ sanitizeToolArgumentsString(raw) {
394
+ if (raw === null || raw === undefined) {
395
+ return '{}';
396
+ }
397
+ let text;
398
+ if (typeof raw === 'string') {
399
+ text = raw;
400
+ }
401
+ else {
402
+ try {
403
+ text = JSON.stringify(raw);
404
+ }
405
+ catch {
406
+ text = String(raw);
407
+ }
408
+ }
409
+ text = text.trim();
410
+ // Strip fenced code blocks like ```json { ... } ```.
411
+ if (text.startsWith('```')) {
412
+ text = text.replace(/^```[a-zA-Z0-9_-]*\s*/m, '');
413
+ text = text.replace(/```$/m, '');
414
+ text = text.trim();
415
+ }
416
+ // Remove provider reasoning / thinking markup.
417
+ text = this.sanitizeProviderText(text);
418
+ // If provider wrapped JSON in explanation text, try to isolate the object.
419
+ const firstBrace = text.indexOf('{');
420
+ const lastBrace = text.lastIndexOf('}');
421
+ if (firstBrace !== -1 && lastBrace !== -1 && lastBrace > firstBrace) {
422
+ const candidate = text.slice(firstBrace, lastBrace + 1).trim();
423
+ if (candidate.startsWith('{') && candidate.endsWith('}')) {
424
+ return candidate;
425
+ }
426
+ }
427
+ return text.length ? text : '{}';
428
+ }
429
+ /**
430
+ * Parse Kimi-K2 `<|tool_calls_section_begin|> ... <|tool_calls_section_end|>`
431
+ * blocks out of a text string.
432
+ *
433
+ * - Returns cleanedText with the whole section removed.
434
+ * - Returns ToolCallBlock[] constructed from the section contents.
435
+ *
436
+ * This is used for HF/vLLM-style Kimi deployments where `tool_calls` is empty
437
+ * and all tool info is only encoded in the text template.
438
+ */
439
+ extractKimiToolCallsFromText(raw) {
440
+ if (!raw || !raw.includes('<|tool_calls_section_begin|>')) {
441
+ return { cleanedText: raw, toolCalls: [] };
442
+ }
443
+ const logger = this.getLogger();
444
+ const toolCalls = [];
445
+ let text = raw;
446
+ const sectionRegex = /<\|tool_calls_section_begin\|>([\s\S]*?)<\|tool_calls_section_end\|>/g;
447
+ text = text.replace(sectionRegex, (_sectionMatch, sectionBody) => {
448
+ try {
449
+ const callRegex = /<\|tool_call_begin\|>\s*([^<]+?)\s*<\|tool_call_argument_begin\|>\s*([\s\S]*?)\s*<\|tool_call_end\|>/g;
450
+ let m;
451
+ while ((m = callRegex.exec(sectionBody)) !== null) {
452
+ const rawId = m[1].trim();
453
+ const rawArgs = m[2].trim();
454
+ // Infer tool name from ID.
455
+ let toolName = '';
456
+ const match = /^functions\.([A-Za-z0-9_]+):\d+/i.exec(rawId) ||
457
+ /^[A-Za-z0-9_]+\.([A-Za-z0-9_]+):\d+/.exec(rawId);
458
+ if (match) {
459
+ toolName = match[1];
460
+ }
461
+ else {
462
+ const colonParts = rawId.split(':');
463
+ const head = colonParts[0] || rawId;
464
+ const dotParts = head.split('.');
465
+ toolName = dotParts[dotParts.length - 1] || head;
466
+ }
467
+ // Normalize tool name (handles Kimi-K2 style prefixes like call_functionsglob7)
468
+ toolName = this.normalizeToolName(toolName);
469
+ const sanitizedArgs = this.sanitizeToolArgumentsString(rawArgs);
470
+ const processedParameters = processToolParameters(sanitizedArgs, toolName);
471
+ toolCalls.push({
472
+ type: 'tool_call',
473
+ id: this.normalizeToHistoryToolId(rawId),
474
+ name: toolName,
475
+ parameters: processedParameters,
476
+ });
477
+ }
478
+ }
479
+ catch (err) {
480
+ logger.debug(() => `[OpenAIProvider] Failed to parse Kimi tool_calls_section: ${err}`);
481
+ }
482
+ // Strip the entire tool section from user-visible text
483
+ return '';
484
+ });
485
+ if (toolCalls.length > 0) {
486
+ logger.debug(() => `[OpenAIProvider] Parsed Kimi tool_calls_section`, {
487
+ toolCallCount: toolCalls.length,
488
+ originalLength: raw.length,
489
+ cleanedLength: text.length,
490
+ });
491
+ }
492
+ // Don't trim - preserve leading/trailing newlines that are important for formatting
493
+ // (e.g., numbered lists from Kimi K2 that have newlines between items)
494
+ return { cleanedText: text, toolCalls };
495
+ }
221
496
  /**
222
497
  * @plan:PLAN-20251023-STATELESS-HARDENING.P09
223
498
  * @requirement:REQ-SP4-002
@@ -685,6 +960,113 @@ export class OpenAIProvider extends BaseProvider {
685
960
  // This ensures each tool message has a corresponding tool_calls in previous message
686
961
  return this.validateToolMessageSequence(messages);
687
962
  }
963
+ /**
964
+ * Build messages with optional reasoning_content based on settings.
965
+ *
966
+ * @plan PLAN-20251202-THINKING.P14
967
+ * @requirement REQ-THINK-004, REQ-THINK-006
968
+ */
969
+ buildMessagesWithReasoning(contents, options, toolFormat) {
970
+ // Read settings with defaults
971
+ const stripPolicy = options.settings.get('reasoning.stripFromContext') ??
972
+ 'none';
973
+ const includeInContext = options.settings.get('reasoning.includeInContext') ?? false;
974
+ // Apply strip policy first
975
+ const filteredContents = filterThinkingForContext(contents, stripPolicy);
976
+ const messages = [];
977
+ // Create a ToolIdMapper based on the tool format
978
+ // For Kimi K2, this generates sequential IDs in the format functions.{name}:{index}
979
+ const toolIdMapper = toolFormat === 'kimi'
980
+ ? getToolIdStrategy('kimi').createMapper(filteredContents)
981
+ : null;
982
+ // Helper to resolve tool call IDs based on format
983
+ const resolveToolCallId = (tc) => {
984
+ if (toolIdMapper) {
985
+ return toolIdMapper.resolveToolCallId(tc);
986
+ }
987
+ return this.normalizeToOpenAIToolId(tc.id);
988
+ };
989
+ // Helper to resolve tool response IDs based on format
990
+ const resolveToolResponseId = (tr) => {
991
+ if (toolIdMapper) {
992
+ return toolIdMapper.resolveToolResponseId(tr);
993
+ }
994
+ return this.normalizeToOpenAIToolId(tr.callId);
995
+ };
996
+ for (const content of filteredContents) {
997
+ if (content.speaker === 'human') {
998
+ // Convert human messages to user messages
999
+ const textBlocks = content.blocks.filter((b) => b.type === 'text');
1000
+ const text = textBlocks.map((b) => b.text).join('\n');
1001
+ if (text) {
1002
+ messages.push({
1003
+ role: 'user',
1004
+ content: text,
1005
+ });
1006
+ }
1007
+ }
1008
+ else if (content.speaker === 'ai') {
1009
+ // Convert AI messages with optional reasoning_content
1010
+ const textBlocks = content.blocks.filter((b) => b.type === 'text');
1011
+ const text = textBlocks.map((b) => b.text).join('\n');
1012
+ const thinkingBlocks = extractThinkingBlocks(content);
1013
+ const toolCalls = content.blocks.filter((b) => b.type === 'tool_call');
1014
+ if (toolCalls.length > 0) {
1015
+ // Assistant message with tool calls
1016
+ const baseMessage = {
1017
+ role: 'assistant',
1018
+ content: text || null,
1019
+ tool_calls: toolCalls.map((tc) => ({
1020
+ id: resolveToolCallId(tc),
1021
+ type: 'function',
1022
+ function: {
1023
+ name: tc.name,
1024
+ arguments: this.normalizeToolCallArguments(tc.parameters),
1025
+ },
1026
+ })),
1027
+ };
1028
+ if (includeInContext && thinkingBlocks.length > 0) {
1029
+ const messageWithReasoning = baseMessage;
1030
+ messageWithReasoning.reasoning_content =
1031
+ thinkingToReasoningField(thinkingBlocks);
1032
+ messages.push(messageWithReasoning);
1033
+ }
1034
+ else {
1035
+ messages.push(baseMessage);
1036
+ }
1037
+ }
1038
+ else if (textBlocks.length > 0 || thinkingBlocks.length > 0) {
1039
+ // Plain assistant message
1040
+ const baseMessage = {
1041
+ role: 'assistant',
1042
+ content: text,
1043
+ };
1044
+ if (includeInContext && thinkingBlocks.length > 0) {
1045
+ const messageWithReasoning = baseMessage;
1046
+ messageWithReasoning.reasoning_content =
1047
+ thinkingToReasoningField(thinkingBlocks);
1048
+ messages.push(messageWithReasoning);
1049
+ }
1050
+ else {
1051
+ messages.push(baseMessage);
1052
+ }
1053
+ }
1054
+ }
1055
+ else if (content.speaker === 'tool') {
1056
+ // Convert tool responses
1057
+ const toolResponses = content.blocks.filter((b) => b.type === 'tool_response');
1058
+ for (const tr of toolResponses) {
1059
+ messages.push({
1060
+ role: 'tool',
1061
+ content: this.buildToolResponseContent(tr, options.config),
1062
+ tool_call_id: resolveToolResponseId(tr),
1063
+ });
1064
+ }
1065
+ }
1066
+ }
1067
+ // Validate tool message sequence to prevent API errors
1068
+ return this.validateToolMessageSequence(messages);
1069
+ }
688
1070
  /**
689
1071
  * Validates tool message sequence to ensure each tool message has a corresponding tool_calls
690
1072
  * This prevents "messages with role 'tool' must be a response to a preceeding message with 'tool_calls'" errors
@@ -698,6 +1080,18 @@ export class OpenAIProvider extends BaseProvider {
698
1080
  const logger = this.getLogger();
699
1081
  const validatedMessages = [...messages];
700
1082
  let removedCount = 0;
1083
+ // Debug: Log the full message sequence for tool call analysis
1084
+ logger.debug(() => `[OpenAIProvider] validateToolMessageSequence: analyzing ${messages.length} messages`, {
1085
+ messageRoles: messages.map((m) => m.role),
1086
+ toolCallIds: messages
1087
+ .filter((m) => m.role === 'assistant' &&
1088
+ 'tool_calls' in m &&
1089
+ Array.isArray(m.tool_calls))
1090
+ .flatMap((m) => m.tool_calls?.map((tc) => tc.id) ?? []),
1091
+ toolResponseIds: messages
1092
+ .filter((m) => m.role === 'tool')
1093
+ .map((m) => m.tool_call_id),
1094
+ });
701
1095
  // Check if there are any tool_calls in conversation
702
1096
  // If no tool_calls exist, this might be isolated tool response testing - skip validation
703
1097
  const hasToolCallsInConversation = validatedMessages.some((msg) => msg.role === 'assistant' &&
@@ -826,13 +1220,8 @@ export class OpenAIProvider extends BaseProvider {
826
1220
  metadataKeys: Object.keys(metadata ?? {}),
827
1221
  });
828
1222
  }
829
- // Convert IContent to OpenAI messages format
830
- const configForMessages = options.config ?? options.runtime?.config ?? this.globalConfig;
831
- const messages = this.convertToOpenAIMessages(contents, toolReplayMode, configForMessages);
832
- if (logger.enabled && toolReplayMode !== 'native') {
833
- logger.debug(() => `[OpenAIProvider] Using textual tool replay mode for model '${model}'`);
834
- }
835
- // Detect the tool format to use (once at the start of the method)
1223
+ // Detect the tool format to use BEFORE building messages
1224
+ // This is needed so that Kimi K2 tool IDs can be generated in the correct format
836
1225
  const detectedFormat = this.detectToolFormat();
837
1226
  // Log the detected format for debugging
838
1227
  logger.debug(() => `[OpenAIProvider] Using tool format '${detectedFormat}' for model '${model}'`, {
@@ -840,8 +1229,18 @@ export class OpenAIProvider extends BaseProvider {
840
1229
  detectedFormat,
841
1230
  provider: this.name,
842
1231
  });
843
- // Convert Gemini format tools to the detected format
844
- let formattedTools = toolFormatter.convertGeminiToFormat(tools, detectedFormat);
1232
+ // Convert IContent to OpenAI messages format
1233
+ // Use buildMessagesWithReasoning for reasoning-aware message building
1234
+ // Pass detectedFormat so that Kimi K2 tool IDs are generated correctly
1235
+ const messages = toolReplayMode === 'native'
1236
+ ? this.buildMessagesWithReasoning(contents, options, detectedFormat)
1237
+ : this.convertToOpenAIMessages(contents, toolReplayMode, options.config ?? options.runtime?.config ?? this.globalConfig);
1238
+ if (logger.enabled && toolReplayMode !== 'native') {
1239
+ logger.debug(() => `[OpenAIProvider] Using textual tool replay mode for model '${model}'`);
1240
+ }
1241
+ // Convert Gemini format tools to OpenAI format using the schema converter
1242
+ // This ensures required fields are always present in tool schemas
1243
+ let formattedTools = convertToolsToOpenAI(tools);
845
1244
  // CRITICAL FIX: Ensure we never pass an empty tools array
846
1245
  // The OpenAI API errors when tools=[] but a tool call is attempted
847
1246
  if (Array.isArray(formattedTools) && formattedTools.length === 0) {
@@ -1106,16 +1505,37 @@ export class OpenAIProvider extends BaseProvider {
1106
1505
  // Buffer for accumulating text chunks for providers that need it
1107
1506
  let textBuffer = '';
1108
1507
  // Use the same detected format from earlier for consistency
1109
- // Buffer text for Qwen format providers to avoid stanza formatting
1110
- const shouldBufferText = detectedFormat === 'qwen';
1508
+ const isKimiModel = model.toLowerCase().includes('kimi-k2');
1509
+ // Buffer text for Qwen format providers and Kimi-K2 to avoid stanza formatting
1510
+ const shouldBufferText = detectedFormat === 'qwen' || isKimiModel;
1511
+ // Accumulate thinking content across the entire stream to emit as ONE block
1512
+ // This handles fragmented <think>word</think> streaming from Synthetic API
1513
+ // @plan PLAN-20251202-THINKING.P16
1514
+ let accumulatedThinkingContent = '';
1515
+ let hasEmittedThinking = false;
1516
+ // Accumulate reasoning_content from streaming deltas (legacy path)
1517
+ // Synthetic API sends reasoning token-by-token, so we accumulate to emit ONE block
1518
+ // @plan PLAN-20251202-THINKING.P16
1519
+ let accumulatedReasoningContent = '';
1111
1520
  // Track token usage from streaming chunks
1112
1521
  let streamingUsage = null;
1522
+ // Track total chunks for debugging empty responses
1523
+ let totalChunksReceived = 0;
1113
1524
  try {
1114
1525
  // Handle streaming response
1115
1526
  for await (const chunk of response) {
1527
+ totalChunksReceived++;
1116
1528
  if (abortSignal?.aborted) {
1117
1529
  break;
1118
1530
  }
1531
+ // Debug: Log first few chunks and every 10th chunk to understand stream behavior
1532
+ if (totalChunksReceived <= 3 || totalChunksReceived % 10 === 0) {
1533
+ logger.debug(() => `[Streaming legacy] Chunk #${totalChunksReceived} received`, {
1534
+ hasChoices: !!chunk.choices?.length,
1535
+ firstChoiceDelta: chunk.choices?.[0]?.delta,
1536
+ finishReason: chunk.choices?.[0]?.finish_reason,
1537
+ });
1538
+ }
1119
1539
  const chunkRecord = chunk;
1120
1540
  let parsedData;
1121
1541
  const rawData = chunkRecord?.data;
@@ -1152,6 +1572,14 @@ export class OpenAIProvider extends BaseProvider {
1152
1572
  const choice = chunk.choices?.[0];
1153
1573
  if (!choice)
1154
1574
  continue;
1575
+ // Parse reasoning_content from streaming delta (Phase 16 integration)
1576
+ // ACCUMULATE instead of yielding immediately to handle token-by-token streaming
1577
+ // @plan PLAN-20251202-THINKING.P16
1578
+ const reasoningBlock = this.parseStreamingReasoningDelta(choice.delta);
1579
+ if (reasoningBlock) {
1580
+ // Accumulate reasoning content - will emit ONE block later
1581
+ accumulatedReasoningContent += reasoningBlock.thought;
1582
+ }
1155
1583
  // Check for finish_reason to detect proper stream ending
1156
1584
  if (choice.finish_reason) {
1157
1585
  logger.debug(() => `[Streaming] Stream finished with reason: ${choice.finish_reason}`, {
@@ -1165,23 +1593,23 @@ export class OpenAIProvider extends BaseProvider {
1165
1593
  if (choice.finish_reason === 'length') {
1166
1594
  logger.debug(() => `Response truncated due to length limit for model ${model}`);
1167
1595
  }
1168
- // Flush any buffered text when stream finishes
1169
- if (textBuffer.length > 0) {
1170
- yield {
1171
- speaker: 'ai',
1172
- blocks: [
1173
- {
1174
- type: 'text',
1175
- text: textBuffer,
1176
- },
1177
- ],
1178
- };
1179
- textBuffer = '';
1180
- }
1596
+ // Don't flush buffer here on finish - let the final buffer handling
1597
+ // after the loop process it with proper sanitization and think tag extraction
1598
+ // This was causing unsanitized <think> tags to leak into output (legacy path)
1599
+ // @plan PLAN-20251202-THINKING.P16
1181
1600
  }
1182
1601
  // Handle text content - buffer for Qwen format, emit immediately for others
1183
- const deltaContent = choice.delta?.content;
1184
- if (deltaContent) {
1602
+ // Note: Synthetic API sends content that may duplicate reasoning_content.
1603
+ // This is the model's behavior - we don't filter it here.
1604
+ // @plan PLAN-20251202-THINKING.P16
1605
+ const rawDeltaContent = this.coerceMessageContentToString(choice.delta?.content);
1606
+ if (rawDeltaContent) {
1607
+ const deltaContent = isKimiModel
1608
+ ? rawDeltaContent
1609
+ : this.sanitizeProviderText(rawDeltaContent);
1610
+ if (!deltaContent) {
1611
+ continue;
1612
+ }
1185
1613
  _accumulatedText += deltaContent;
1186
1614
  // Debug log for providers that need buffering
1187
1615
  if (shouldBufferText) {
@@ -1194,22 +1622,103 @@ export class OpenAIProvider extends BaseProvider {
1194
1622
  });
1195
1623
  // Buffer text to avoid stanza formatting
1196
1624
  textBuffer += deltaContent;
1625
+ const hasKimiBegin = textBuffer.includes('<|tool_calls_section_begin|>');
1626
+ const hasKimiEnd = textBuffer.includes('<|tool_calls_section_end|>');
1627
+ const hasOpenKimiSection = hasKimiBegin && !hasKimiEnd;
1197
1628
  // Emit buffered text when we have a complete sentence or paragraph
1198
- // Look for natural break points
1199
- if (textBuffer.includes('\n') ||
1200
- textBuffer.endsWith('. ') ||
1201
- textBuffer.endsWith('! ') ||
1202
- textBuffer.endsWith('? ') ||
1203
- textBuffer.length > 100) {
1204
- yield {
1205
- speaker: 'ai',
1206
- blocks: [
1207
- {
1208
- type: 'text',
1209
- text: textBuffer,
1210
- },
1211
- ],
1212
- };
1629
+ // Look for natural break points, but avoid flushing mid Kimi section
1630
+ if (!hasOpenKimiSection &&
1631
+ (textBuffer.includes('\n') ||
1632
+ textBuffer.endsWith('. ') ||
1633
+ textBuffer.endsWith('! ') ||
1634
+ textBuffer.endsWith('? ') ||
1635
+ textBuffer.length > 100)) {
1636
+ const parsedToolCalls = [];
1637
+ let workingText = textBuffer;
1638
+ // Extract <think> tags and ACCUMULATE instead of emitting immediately (legacy path)
1639
+ // This handles fragmented <think>word</think> streaming from Synthetic API
1640
+ // @plan PLAN-20251202-THINKING.P16
1641
+ // @requirement REQ-THINK-003
1642
+ const tagBasedThinking = this.extractThinkTagsAsBlock(workingText);
1643
+ if (tagBasedThinking) {
1644
+ // Accumulate thinking content - don't emit yet
1645
+ // Use newline to preserve formatting between chunks (not space)
1646
+ if (accumulatedThinkingContent.length > 0) {
1647
+ accumulatedThinkingContent += '\n';
1648
+ }
1649
+ accumulatedThinkingContent += tagBasedThinking.thought;
1650
+ logger.debug(() => `[Streaming legacy] Accumulated thinking: ${accumulatedThinkingContent.length} chars total`);
1651
+ }
1652
+ const kimiParsed = this.extractKimiToolCallsFromText(workingText);
1653
+ if (kimiParsed.toolCalls.length > 0) {
1654
+ parsedToolCalls.push(...kimiParsed.toolCalls);
1655
+ logger.debug(() => `[OpenAIProvider] Streaming buffer (legacy) parsed Kimi tool calls`, {
1656
+ count: kimiParsed.toolCalls.length,
1657
+ bufferLength: workingText.length,
1658
+ cleanedLength: kimiParsed.cleanedText.length,
1659
+ });
1660
+ }
1661
+ workingText = kimiParsed.cleanedText;
1662
+ const parsingText = this.sanitizeProviderText(workingText);
1663
+ let cleanedText = parsingText;
1664
+ try {
1665
+ const parsedResult = this.textToolParser.parse(parsingText);
1666
+ if (parsedResult.toolCalls.length > 0) {
1667
+ parsedToolCalls.push(...parsedResult.toolCalls.map((call) => ({
1668
+ type: 'tool_call',
1669
+ id: `text_tool_${Date.now()}_${Math.random()
1670
+ .toString(36)
1671
+ .substring(7)}`,
1672
+ name: this.normalizeToolName(call.name),
1673
+ parameters: call.arguments,
1674
+ })));
1675
+ cleanedText = parsedResult.cleanedContent;
1676
+ }
1677
+ }
1678
+ catch (error) {
1679
+ const logger = this.getLogger();
1680
+ logger.debug(() => `TextToolCallParser failed on buffered text: ${error}`);
1681
+ }
1682
+ // Emit accumulated thinking BEFORE tool calls or text content (legacy path)
1683
+ // This ensures thinking appears first in the response
1684
+ // @plan PLAN-20251202-THINKING.P16
1685
+ if (!hasEmittedThinking &&
1686
+ accumulatedThinkingContent.length > 0 &&
1687
+ (parsedToolCalls.length > 0 || cleanedText.trim().length > 0)) {
1688
+ yield {
1689
+ speaker: 'ai',
1690
+ blocks: [
1691
+ {
1692
+ type: 'thinking',
1693
+ thought: accumulatedThinkingContent,
1694
+ sourceField: 'think_tags',
1695
+ isHidden: false,
1696
+ },
1697
+ ],
1698
+ };
1699
+ hasEmittedThinking = true;
1700
+ logger.debug(() => `[Streaming legacy] Emitted accumulated thinking: ${accumulatedThinkingContent.length} chars`);
1701
+ }
1702
+ if (parsedToolCalls.length > 0) {
1703
+ yield {
1704
+ speaker: 'ai',
1705
+ blocks: parsedToolCalls,
1706
+ };
1707
+ }
1708
+ // Always use sanitized text to strip <think> tags (legacy streaming)
1709
+ // Bug fix: Previously Kimi used unsanitized workingText
1710
+ // @plan PLAN-20251202-THINKING.P16
1711
+ if (cleanedText.trim().length > 0) {
1712
+ yield {
1713
+ speaker: 'ai',
1714
+ blocks: [
1715
+ {
1716
+ type: 'text',
1717
+ text: cleanedText,
1718
+ },
1719
+ ],
1720
+ };
1721
+ }
1213
1722
  textBuffer = '';
1214
1723
  }
1215
1724
  }
@@ -1308,19 +1817,45 @@ export class OpenAIProvider extends BaseProvider {
1308
1817
  }
1309
1818
  // Check buffered text for <tool_call> format before flushing as plain text
1310
1819
  if (textBuffer.length > 0) {
1311
- // Try to parse <tool_call> format from buffered text
1312
- let parsedToolCalls = [];
1313
- let cleanedText = textBuffer;
1820
+ const parsedToolCalls = [];
1821
+ let workingText = textBuffer;
1822
+ // Note: Synthetic API sends reasoning via both reasoning_content AND content fields.
1823
+ // This is the model's behavior - we don't strip it since the model is the source.
1824
+ // The user can configure reasoning display settings if they don't want duplicates.
1825
+ // @plan PLAN-20251202-THINKING.P16
1826
+ // Extract any remaining <think> tags from final buffer (legacy path)
1827
+ // @plan PLAN-20251202-THINKING.P16
1828
+ const tagBasedThinking = this.extractThinkTagsAsBlock(workingText);
1829
+ if (tagBasedThinking) {
1830
+ // Use newline to preserve formatting between chunks (not space)
1831
+ if (accumulatedThinkingContent.length > 0) {
1832
+ accumulatedThinkingContent += '\n';
1833
+ }
1834
+ accumulatedThinkingContent += tagBasedThinking.thought;
1835
+ }
1836
+ const kimiParsed = this.extractKimiToolCallsFromText(workingText);
1837
+ if (kimiParsed.toolCalls.length > 0) {
1838
+ parsedToolCalls.push(...kimiParsed.toolCalls);
1839
+ this.getLogger().debug(() => `[OpenAIProvider] Final buffer flush (legacy) parsed Kimi tool calls`, {
1840
+ count: kimiParsed.toolCalls.length,
1841
+ bufferLength: workingText.length,
1842
+ cleanedLength: kimiParsed.cleanedText.length,
1843
+ });
1844
+ }
1845
+ workingText = kimiParsed.cleanedText;
1846
+ const parsingText = this.sanitizeProviderText(workingText);
1847
+ let cleanedText = parsingText;
1314
1848
  try {
1315
- const parsedResult = this.textToolParser.parse(textBuffer);
1849
+ const parsedResult = this.textToolParser.parse(parsingText);
1316
1850
  if (parsedResult.toolCalls.length > 0) {
1317
- // Convert parsed tool calls to ToolCallBlock format
1318
- parsedToolCalls = parsedResult.toolCalls.map((call) => ({
1851
+ parsedToolCalls.push(...parsedResult.toolCalls.map((call) => ({
1319
1852
  type: 'tool_call',
1320
- id: `text_tool_${Date.now()}_${Math.random().toString(36).substring(7)}`,
1321
- name: call.name,
1853
+ id: `text_tool_${Date.now()}_${Math.random()
1854
+ .toString(36)
1855
+ .substring(7)}`,
1856
+ name: this.normalizeToolName(call.name),
1322
1857
  parameters: call.arguments,
1323
- }));
1858
+ })));
1324
1859
  cleanedText = parsedResult.cleanedContent;
1325
1860
  }
1326
1861
  }
@@ -1328,14 +1863,33 @@ export class OpenAIProvider extends BaseProvider {
1328
1863
  const logger = this.getLogger();
1329
1864
  logger.debug(() => `TextToolCallParser failed on buffered text: ${error}`);
1330
1865
  }
1331
- // Emit tool calls from text parsing first
1866
+ // Emit accumulated thinking BEFORE tool calls or text content (legacy path)
1867
+ // @plan PLAN-20251202-THINKING.P16
1868
+ if (!hasEmittedThinking &&
1869
+ accumulatedThinkingContent.length > 0 &&
1870
+ (parsedToolCalls.length > 0 || cleanedText.trim().length > 0)) {
1871
+ yield {
1872
+ speaker: 'ai',
1873
+ blocks: [
1874
+ {
1875
+ type: 'thinking',
1876
+ thought: accumulatedThinkingContent,
1877
+ sourceField: 'think_tags',
1878
+ isHidden: false,
1879
+ },
1880
+ ],
1881
+ };
1882
+ hasEmittedThinking = true;
1883
+ }
1332
1884
  if (parsedToolCalls.length > 0) {
1333
1885
  yield {
1334
1886
  speaker: 'ai',
1335
1887
  blocks: parsedToolCalls,
1336
1888
  };
1337
1889
  }
1338
- // Then emit any remaining cleaned text
1890
+ // Always use sanitized text to strip <think> tags (legacy final buffer)
1891
+ // Bug fix: Previously Kimi used unsanitized workingText
1892
+ // @plan PLAN-20251202-THINKING.P16
1339
1893
  if (cleanedText.trim().length > 0) {
1340
1894
  yield {
1341
1895
  speaker: 'ai',
@@ -1349,18 +1903,54 @@ export class OpenAIProvider extends BaseProvider {
1349
1903
  }
1350
1904
  textBuffer = '';
1351
1905
  }
1906
+ // Emit any remaining accumulated thinking that wasn't emitted yet (legacy path)
1907
+ // (e.g., if entire response was just thinking with no content)
1908
+ // @plan PLAN-20251202-THINKING.P16
1909
+ if (!hasEmittedThinking && accumulatedThinkingContent.length > 0) {
1910
+ yield {
1911
+ speaker: 'ai',
1912
+ blocks: [
1913
+ {
1914
+ type: 'thinking',
1915
+ thought: accumulatedThinkingContent,
1916
+ sourceField: 'think_tags',
1917
+ isHidden: false,
1918
+ },
1919
+ ],
1920
+ };
1921
+ hasEmittedThinking = true;
1922
+ }
1923
+ // Emit accumulated reasoning_content as ONE ThinkingBlock (legacy path)
1924
+ // This consolidates token-by-token reasoning from Synthetic API into a single block
1925
+ // @plan PLAN-20251202-THINKING.P16
1926
+ if (accumulatedReasoningContent.length > 0) {
1927
+ yield {
1928
+ speaker: 'ai',
1929
+ blocks: [
1930
+ {
1931
+ type: 'thinking',
1932
+ thought: accumulatedReasoningContent,
1933
+ sourceField: 'reasoning_content',
1934
+ isHidden: false,
1935
+ },
1936
+ ],
1937
+ };
1938
+ }
1352
1939
  // Process and emit tool calls using legacy accumulated approach
1353
1940
  if (accumulatedToolCalls.length > 0) {
1354
1941
  const blocks = [];
1355
1942
  for (const tc of accumulatedToolCalls) {
1356
1943
  if (!tc)
1357
1944
  continue;
1945
+ const sanitizedArgs = this.sanitizeToolArgumentsString(tc.function.arguments);
1946
+ // Normalize tool name (handles Kimi-K2 style prefixes)
1947
+ const normalizedName = this.normalizeToolName(tc.function.name || '');
1358
1948
  // Process tool parameters with double-escape handling
1359
- const processedParameters = processToolParameters(tc.function.arguments || '', tc.function.name || '');
1949
+ const processedParameters = processToolParameters(sanitizedArgs, normalizedName);
1360
1950
  blocks.push({
1361
1951
  type: 'tool_call',
1362
1952
  id: this.normalizeToHistoryToolId(tc.id),
1363
- name: tc.function.name || '',
1953
+ name: normalizedName,
1364
1954
  parameters: processedParameters,
1365
1955
  });
1366
1956
  }
@@ -1400,6 +1990,40 @@ export class OpenAIProvider extends BaseProvider {
1400
1990
  },
1401
1991
  };
1402
1992
  }
1993
+ // Detect and warn about empty streaming responses (common with Kimi K2 after tool calls)
1994
+ // Only warn if we truly got nothing - not even reasoning content
1995
+ if (_accumulatedText.length === 0 &&
1996
+ accumulatedToolCalls.length === 0 &&
1997
+ textBuffer.length === 0 &&
1998
+ accumulatedReasoningContent.length === 0 &&
1999
+ accumulatedThinkingContent.length === 0) {
2000
+ // Provide actionable guidance for users
2001
+ const isKimi = model.toLowerCase().includes('kimi');
2002
+ const isSynthetic = (baseURL ?? this.getBaseURL())?.includes('synthetic') ?? false;
2003
+ const troubleshooting = isKimi
2004
+ ? isSynthetic
2005
+ ? ' To fix: use streaming: "disabled" in your profile settings. Synthetic API streaming does not work reliably with tool calls.'
2006
+ : ' This provider may not support streaming with tool calls.'
2007
+ : ' Consider using streaming: "disabled" in your profile settings.';
2008
+ logger.warn(() => `[OpenAIProvider] Empty streaming response for model '${model}' (received ${totalChunksReceived} chunks with no content).${troubleshooting}`, {
2009
+ model,
2010
+ baseURL: baseURL ?? this.getBaseURL(),
2011
+ isKimiModel: isKimi,
2012
+ isSyntheticAPI: isSynthetic,
2013
+ totalChunksReceived,
2014
+ });
2015
+ }
2016
+ else {
2017
+ // Log what we DID get for debugging
2018
+ logger.debug(() => `[Streaming legacy] Stream completed with accumulated content`, {
2019
+ textLength: _accumulatedText.length,
2020
+ toolCallCount: accumulatedToolCalls.length,
2021
+ textBufferLength: textBuffer.length,
2022
+ reasoningLength: accumulatedReasoningContent.length,
2023
+ thinkingLength: accumulatedThinkingContent.length,
2024
+ totalChunksReceived,
2025
+ });
2026
+ }
1403
2027
  }
1404
2028
  else {
1405
2029
  // Handle non-streaming response
@@ -1425,22 +2049,57 @@ export class OpenAIProvider extends BaseProvider {
1425
2049
  }
1426
2050
  }
1427
2051
  const blocks = [];
1428
- // Handle text content
1429
- if (choice.message?.content) {
1430
- blocks.push({
1431
- type: 'text',
1432
- text: choice.message.content,
1433
- });
2052
+ // Parse reasoning_content from response (Phase 16 integration)
2053
+ const reasoningBlock = this.parseNonStreamingReasoning(choice.message);
2054
+ logger.debug(() => `[Non-streaming] parseNonStreamingReasoning result: ${reasoningBlock ? `found (${reasoningBlock.thought?.length} chars)` : 'not found'}`, {
2055
+ hasReasoningContent: 'reasoning_content' in
2056
+ (choice.message ?? {}),
2057
+ messageKeys: Object.keys(choice.message ?? {}),
2058
+ });
2059
+ if (reasoningBlock) {
2060
+ blocks.push(reasoningBlock);
2061
+ }
2062
+ // Handle text content (strip thinking / reasoning blocks) and Kimi tool sections
2063
+ const rawMessageContent = this.coerceMessageContentToString(choice.message?.content);
2064
+ let kimiCleanContent;
2065
+ let kimiToolBlocks = [];
2066
+ if (rawMessageContent) {
2067
+ // Extract <think> tags as ThinkingBlock BEFORE stripping them
2068
+ // Only do this if we didn't already get reasoning from reasoning_content field
2069
+ // @plan PLAN-20251202-THINKING.P16
2070
+ // @requirement REQ-THINK-003
2071
+ if (!reasoningBlock) {
2072
+ const tagBasedThinking = this.extractThinkTagsAsBlock(rawMessageContent);
2073
+ if (tagBasedThinking) {
2074
+ blocks.push(tagBasedThinking);
2075
+ logger.debug(() => `[Non-streaming] Extracted thinking from <think> tags: ${tagBasedThinking.thought.length} chars`);
2076
+ }
2077
+ }
2078
+ const kimiParsed = this.extractKimiToolCallsFromText(rawMessageContent);
2079
+ kimiCleanContent = kimiParsed.cleanedText;
2080
+ kimiToolBlocks = kimiParsed.toolCalls;
2081
+ // Always sanitize text content to remove <think> tags
2082
+ // Bug fix: Previously Kimi-K2 used unsanitized kimiCleanContent,
2083
+ // which caused <think> tags to leak into visible output
2084
+ // @plan PLAN-20251202-THINKING.P16
2085
+ const cleanedText = this.sanitizeProviderText(kimiCleanContent);
2086
+ if (cleanedText) {
2087
+ blocks.push({
2088
+ type: 'text',
2089
+ text: cleanedText,
2090
+ });
2091
+ }
1434
2092
  }
1435
2093
  // Handle tool calls
1436
2094
  if (choice.message?.tool_calls && choice.message.tool_calls.length > 0) {
1437
2095
  // Use the same detected format from earlier for consistency
1438
2096
  for (const toolCall of choice.message.tool_calls) {
1439
2097
  if (toolCall.type === 'function') {
1440
- // Use tool name directly without normalization for legacy compatibility
1441
- const toolName = toolCall.function.name || '';
2098
+ // Normalize tool name (handles Kimi-K2 style prefixes)
2099
+ const toolName = this.normalizeToolName(toolCall.function.name || '');
2100
+ const sanitizedArgs = this.sanitizeToolArgumentsString(toolCall.function.arguments);
1442
2101
  // Process tool parameters with double-escape handling
1443
- const processedParameters = processToolParameters(toolCall.function.arguments || '', toolName);
2102
+ const processedParameters = processToolParameters(sanitizedArgs, toolName);
1444
2103
  blocks.push({
1445
2104
  type: 'tool_call',
1446
2105
  id: this.normalizeToHistoryToolId(toolCall.id),
@@ -1450,42 +2109,49 @@ export class OpenAIProvider extends BaseProvider {
1450
2109
  }
1451
2110
  }
1452
2111
  }
2112
+ // Add any tool calls parsed from Kimi inline sections
2113
+ if (kimiToolBlocks.length > 0) {
2114
+ blocks.push(...kimiToolBlocks);
2115
+ this.getLogger().debug(() => `[OpenAIProvider] Non-stream legacy added Kimi tool calls from text`, { count: kimiToolBlocks.length });
2116
+ }
1453
2117
  // Additionally check for <tool_call> format in text content
1454
- if (choice.message?.content &&
1455
- typeof choice.message.content === 'string') {
1456
- try {
1457
- const parsedResult = this.textToolParser.parse(choice.message.content);
1458
- if (parsedResult.toolCalls.length > 0) {
1459
- // Add tool calls found in text content
1460
- for (const call of parsedResult.toolCalls) {
1461
- blocks.push({
1462
- type: 'tool_call',
1463
- id: `text_tool_${Date.now()}_${Math.random().toString(36).substring(7)}`,
1464
- name: call.name,
1465
- parameters: call.arguments,
1466
- });
1467
- }
1468
- // Update the text content to remove the tool call parts
1469
- if (choice.message.content !== parsedResult.cleanedContent) {
1470
- // Find the text block and update it
1471
- const textBlockIndex = blocks.findIndex((block) => block.type === 'text');
1472
- if (textBlockIndex >= 0) {
1473
- blocks[textBlockIndex].text =
1474
- parsedResult.cleanedContent;
1475
- }
1476
- else if (parsedResult.cleanedContent.trim()) {
1477
- // Add cleaned text if it doesn't exist
1478
- blocks.unshift({
1479
- type: 'text',
1480
- text: parsedResult.cleanedContent,
2118
+ if (kimiCleanContent) {
2119
+ const cleanedSource = this.sanitizeProviderText(kimiCleanContent);
2120
+ if (cleanedSource) {
2121
+ try {
2122
+ const parsedResult = this.textToolParser.parse(cleanedSource);
2123
+ if (parsedResult.toolCalls.length > 0) {
2124
+ // Add tool calls found in text content
2125
+ for (const call of parsedResult.toolCalls) {
2126
+ blocks.push({
2127
+ type: 'tool_call',
2128
+ id: `text_tool_${Date.now()}_${Math.random().toString(36).substring(7)}`,
2129
+ name: this.normalizeToolName(call.name),
2130
+ parameters: call.arguments,
1481
2131
  });
1482
2132
  }
2133
+ // Update the text content to remove the tool call parts
2134
+ if (choice.message.content !== parsedResult.cleanedContent) {
2135
+ // Find the text block and update it
2136
+ const textBlockIndex = blocks.findIndex((block) => block.type === 'text');
2137
+ if (textBlockIndex >= 0) {
2138
+ blocks[textBlockIndex].text =
2139
+ parsedResult.cleanedContent;
2140
+ }
2141
+ else if (parsedResult.cleanedContent.trim()) {
2142
+ // Add cleaned text if it doesn't exist
2143
+ blocks.unshift({
2144
+ type: 'text',
2145
+ text: parsedResult.cleanedContent,
2146
+ });
2147
+ }
2148
+ }
1483
2149
  }
1484
2150
  }
1485
- }
1486
- catch (error) {
1487
- const logger = this.getLogger();
1488
- logger.debug(() => `TextToolCallParser failed on message content: ${error}`);
2151
+ catch (error) {
2152
+ const logger = this.getLogger();
2153
+ logger.debug(() => `TextToolCallParser failed on message content: ${error}`);
2154
+ }
1489
2155
  }
1490
2156
  }
1491
2157
  // Emit the complete response as a single IContent
@@ -1613,14 +2279,8 @@ export class OpenAIProvider extends BaseProvider {
1613
2279
  }
1614
2280
  // Determine tool replay mode for model compatibility (e.g., polaris-alpha)
1615
2281
  const toolReplayMode = this.determineToolReplayMode(model);
1616
- // Convert IContent to OpenAI messages format
1617
- const configForMessages = options.config ?? options.runtime?.config ?? this.globalConfig;
1618
- const messages = this.convertToOpenAIMessages(contents, toolReplayMode, configForMessages);
1619
- // Log tool replay mode usage for debugging
1620
- if (logger.enabled && toolReplayMode !== 'native') {
1621
- logger.debug(() => `[OpenAIProvider] Using textual tool replay mode for model '${model}'`);
1622
- }
1623
- // Detect the tool format to use (once at the start of the method)
2282
+ // Detect the tool format to use BEFORE building messages
2283
+ // This is needed so that Kimi K2 tool IDs can be generated in the correct format
1624
2284
  const detectedFormat = this.detectToolFormat();
1625
2285
  // Log the detected format for debugging
1626
2286
  logger.debug(() => `[OpenAIProvider] Using tool format '${detectedFormat}' for model '${model}'`, {
@@ -1628,8 +2288,19 @@ export class OpenAIProvider extends BaseProvider {
1628
2288
  detectedFormat,
1629
2289
  provider: this.name,
1630
2290
  });
1631
- // Convert Gemini format tools to the detected format
1632
- let formattedTools = toolFormatter.convertGeminiToFormat(tools, detectedFormat);
2291
+ // Convert IContent to OpenAI messages format
2292
+ // Use buildMessagesWithReasoning for reasoning-aware message building
2293
+ // Pass detectedFormat so that Kimi K2 tool IDs are generated correctly
2294
+ const messages = toolReplayMode === 'native'
2295
+ ? this.buildMessagesWithReasoning(contents, options, detectedFormat)
2296
+ : this.convertToOpenAIMessages(contents, toolReplayMode, options.config ?? options.runtime?.config ?? this.globalConfig);
2297
+ // Log tool replay mode usage for debugging
2298
+ if (logger.enabled && toolReplayMode !== 'native') {
2299
+ logger.debug(() => `[OpenAIProvider] Using textual tool replay mode for model '${model}'`);
2300
+ }
2301
+ // Convert Gemini format tools to OpenAI format using the schema converter
2302
+ // This ensures required fields are always present in tool schemas
2303
+ let formattedTools = convertToolsToOpenAI(tools);
1633
2304
  // CRITICAL FIX: Ensure we never pass an empty tools array
1634
2305
  // The OpenAI API errors when tools=[] but a tool call is attempted
1635
2306
  if (Array.isArray(formattedTools) && formattedTools.length === 0) {
@@ -1905,8 +2576,18 @@ export class OpenAIProvider extends BaseProvider {
1905
2576
  // Buffer for accumulating text chunks for providers that need it
1906
2577
  let textBuffer = '';
1907
2578
  // Use the same detected format from earlier for consistency
1908
- // Buffer text for Qwen format providers to avoid stanza formatting
1909
- const shouldBufferText = detectedFormat === 'qwen';
2579
+ const isKimiModel = model.toLowerCase().includes('kimi-k2');
2580
+ // Buffer text for Qwen format providers and Kimi-K2 to avoid stanza formatting
2581
+ const shouldBufferText = detectedFormat === 'qwen' || isKimiModel;
2582
+ // Accumulate thinking content across the entire stream to emit as ONE block
2583
+ // This handles fragmented <think>word</think> streaming from Synthetic API
2584
+ // @plan PLAN-20251202-THINKING.P16
2585
+ let accumulatedThinkingContent = '';
2586
+ let hasEmittedThinking = false;
2587
+ // Accumulate reasoning_content from streaming deltas (pipeline path)
2588
+ // Synthetic API sends reasoning token-by-token, so we accumulate to emit ONE block
2589
+ // @plan PLAN-20251202-THINKING.P16
2590
+ let accumulatedReasoningContent = '';
1910
2591
  // Track token usage from streaming chunks
1911
2592
  let streamingUsage = null;
1912
2593
  const allChunks = []; // Collect all chunks first
@@ -1918,6 +2599,11 @@ export class OpenAIProvider extends BaseProvider {
1918
2599
  }
1919
2600
  allChunks.push(chunk);
1920
2601
  }
2602
+ // Debug: Log how many chunks were received
2603
+ logger.debug(() => `[Streaming pipeline] Collected ${allChunks.length} chunks from stream`, {
2604
+ firstChunkDelta: allChunks[0]?.choices?.[0]?.delta,
2605
+ lastChunkFinishReason: allChunks[allChunks.length - 1]?.choices?.[0]?.finish_reason,
2606
+ });
1921
2607
  // Now process all collected chunks
1922
2608
  for (const chunk of allChunks) {
1923
2609
  // Check for cancellation during chunk processing
@@ -1960,6 +2646,15 @@ export class OpenAIProvider extends BaseProvider {
1960
2646
  const choice = chunk.choices?.[0];
1961
2647
  if (!choice)
1962
2648
  continue;
2649
+ // Parse reasoning_content from streaming delta (Pipeline path)
2650
+ // ACCUMULATE instead of yielding immediately to handle token-by-token streaming
2651
+ // @plan PLAN-20251202-THINKING.P16
2652
+ // @requirement REQ-THINK-003.1
2653
+ const reasoningBlock = this.parseStreamingReasoningDelta(choice.delta);
2654
+ if (reasoningBlock) {
2655
+ // Accumulate reasoning content - will emit ONE block later
2656
+ accumulatedReasoningContent += reasoningBlock.thought;
2657
+ }
1963
2658
  // Check for finish_reason to detect proper stream ending
1964
2659
  if (choice.finish_reason) {
1965
2660
  logger.debug(() => `[Streaming] Stream finished with reason: ${choice.finish_reason}`, {
@@ -1973,23 +2668,23 @@ export class OpenAIProvider extends BaseProvider {
1973
2668
  if (choice.finish_reason === 'length') {
1974
2669
  logger.debug(() => `Response truncated due to length limit for model ${model}`);
1975
2670
  }
1976
- // Flush any buffered text when stream finishes
1977
- if (textBuffer.length > 0) {
1978
- yield {
1979
- speaker: 'ai',
1980
- blocks: [
1981
- {
1982
- type: 'text',
1983
- text: textBuffer,
1984
- },
1985
- ],
1986
- };
1987
- textBuffer = '';
1988
- }
2671
+ // Don't flush buffer here on finish - let the final buffer handling
2672
+ // after the loop process it with proper sanitization and think tag extraction
2673
+ // This was causing unsanitized <think> tags to leak into output (pipeline path)
2674
+ // @plan PLAN-20251202-THINKING.P16
1989
2675
  }
1990
2676
  // Handle text content - buffer for Qwen format, emit immediately for others
1991
- const deltaContent = choice.delta?.content;
1992
- if (deltaContent) {
2677
+ // Note: Synthetic API sends content that may duplicate reasoning_content.
2678
+ // This is the model's behavior - we don't filter it here.
2679
+ // @plan PLAN-20251202-THINKING.P16
2680
+ const rawDeltaContent = this.coerceMessageContentToString(choice.delta?.content);
2681
+ if (rawDeltaContent) {
2682
+ const deltaContent = isKimiModel
2683
+ ? rawDeltaContent
2684
+ : this.sanitizeProviderText(rawDeltaContent);
2685
+ if (!deltaContent) {
2686
+ continue;
2687
+ }
1993
2688
  _accumulatedText += deltaContent;
1994
2689
  // Debug log for providers that need buffering
1995
2690
  if (shouldBufferText) {
@@ -2002,22 +2697,103 @@ export class OpenAIProvider extends BaseProvider {
2002
2697
  });
2003
2698
  // Buffer text to avoid stanza formatting
2004
2699
  textBuffer += deltaContent;
2700
+ const hasKimiBegin = textBuffer.includes('<|tool_calls_section_begin|>');
2701
+ const hasKimiEnd = textBuffer.includes('<|tool_calls_section_end|>');
2702
+ const hasOpenKimiSection = hasKimiBegin && !hasKimiEnd;
2005
2703
  // Emit buffered text when we have a complete sentence or paragraph
2006
- // Look for natural break points
2007
- if (textBuffer.includes('\n') ||
2008
- textBuffer.endsWith('. ') ||
2009
- textBuffer.endsWith('! ') ||
2010
- textBuffer.endsWith('? ') ||
2011
- textBuffer.length > 100) {
2012
- yield {
2013
- speaker: 'ai',
2014
- blocks: [
2015
- {
2016
- type: 'text',
2017
- text: textBuffer,
2018
- },
2019
- ],
2020
- };
2704
+ // Look for natural break points, avoiding flush mid Kimi section
2705
+ if (!hasOpenKimiSection &&
2706
+ (textBuffer.includes('\n') ||
2707
+ textBuffer.endsWith('. ') ||
2708
+ textBuffer.endsWith('! ') ||
2709
+ textBuffer.endsWith('? ') ||
2710
+ textBuffer.length > 100)) {
2711
+ const parsedToolCalls = [];
2712
+ let workingText = textBuffer;
2713
+ // Extract <think> tags and ACCUMULATE instead of emitting immediately
2714
+ // This handles fragmented <think>word</think> streaming from Synthetic API
2715
+ // @plan PLAN-20251202-THINKING.P16
2716
+ // @requirement REQ-THINK-003
2717
+ const tagBasedThinking = this.extractThinkTagsAsBlock(workingText);
2718
+ if (tagBasedThinking) {
2719
+ // Accumulate thinking content - don't emit yet
2720
+ // Use newline to preserve formatting between chunks (not space)
2721
+ if (accumulatedThinkingContent.length > 0) {
2722
+ accumulatedThinkingContent += '\n';
2723
+ }
2724
+ accumulatedThinkingContent += tagBasedThinking.thought;
2725
+ logger.debug(() => `[Streaming] Accumulated thinking: ${accumulatedThinkingContent.length} chars total`);
2726
+ }
2727
+ const kimiParsed = this.extractKimiToolCallsFromText(workingText);
2728
+ if (kimiParsed.toolCalls.length > 0) {
2729
+ parsedToolCalls.push(...kimiParsed.toolCalls);
2730
+ logger.debug(() => `[OpenAIProvider] Streaming buffer (pipeline) parsed Kimi tool calls`, {
2731
+ count: kimiParsed.toolCalls.length,
2732
+ bufferLength: workingText.length,
2733
+ cleanedLength: kimiParsed.cleanedText.length,
2734
+ });
2735
+ }
2736
+ workingText = kimiParsed.cleanedText;
2737
+ const parsingText = this.sanitizeProviderText(workingText);
2738
+ let cleanedText = parsingText;
2739
+ try {
2740
+ const parsedResult = this.textToolParser.parse(parsingText);
2741
+ if (parsedResult.toolCalls.length > 0) {
2742
+ parsedToolCalls.push(...parsedResult.toolCalls.map((call) => ({
2743
+ type: 'tool_call',
2744
+ id: `text_tool_${Date.now()}_${Math.random()
2745
+ .toString(36)
2746
+ .substring(7)}`,
2747
+ name: this.normalizeToolName(call.name),
2748
+ parameters: call.arguments,
2749
+ })));
2750
+ cleanedText = parsedResult.cleanedContent;
2751
+ }
2752
+ }
2753
+ catch (error) {
2754
+ const logger = this.getLogger();
2755
+ logger.debug(() => `TextToolCallParser failed on buffered text: ${error}`);
2756
+ }
2757
+ // Emit accumulated thinking BEFORE tool calls or text content
2758
+ // This ensures thinking appears first in the response
2759
+ // @plan PLAN-20251202-THINKING.P16
2760
+ if (!hasEmittedThinking &&
2761
+ accumulatedThinkingContent.length > 0 &&
2762
+ (parsedToolCalls.length > 0 || cleanedText.trim().length > 0)) {
2763
+ yield {
2764
+ speaker: 'ai',
2765
+ blocks: [
2766
+ {
2767
+ type: 'thinking',
2768
+ thought: accumulatedThinkingContent,
2769
+ sourceField: 'think_tags',
2770
+ isHidden: false,
2771
+ },
2772
+ ],
2773
+ };
2774
+ hasEmittedThinking = true;
2775
+ logger.debug(() => `[Streaming pipeline] Emitted accumulated thinking: ${accumulatedThinkingContent.length} chars`);
2776
+ }
2777
+ if (parsedToolCalls.length > 0) {
2778
+ yield {
2779
+ speaker: 'ai',
2780
+ blocks: parsedToolCalls,
2781
+ };
2782
+ }
2783
+ // Always use sanitized text to strip <think> tags (pipeline streaming)
2784
+ // Bug fix: Previously Kimi used unsanitized workingText
2785
+ // @plan PLAN-20251202-THINKING.P16
2786
+ if (cleanedText.trim().length > 0) {
2787
+ yield {
2788
+ speaker: 'ai',
2789
+ blocks: [
2790
+ {
2791
+ type: 'text',
2792
+ text: cleanedText,
2793
+ },
2794
+ ],
2795
+ };
2796
+ }
2021
2797
  textBuffer = '';
2022
2798
  }
2023
2799
  }
@@ -2097,19 +2873,45 @@ export class OpenAIProvider extends BaseProvider {
2097
2873
  }
2098
2874
  // Check buffered text for <tool_call> format before flushing as plain text
2099
2875
  if (textBuffer.length > 0) {
2100
- // Try to parse <tool_call> format from buffered text
2101
- let parsedToolCalls = [];
2102
- let cleanedText = textBuffer;
2876
+ const parsedToolCalls = [];
2877
+ let workingText = textBuffer;
2878
+ // Note: Synthetic API sends reasoning via both reasoning_content AND content fields.
2879
+ // This is the model's behavior - we don't strip it since the model is the source.
2880
+ // The user can configure reasoning display settings if they don't want duplicates.
2881
+ // @plan PLAN-20251202-THINKING.P16
2882
+ // Extract any remaining <think> tags from final buffer
2883
+ // @plan PLAN-20251202-THINKING.P16
2884
+ const tagBasedThinking = this.extractThinkTagsAsBlock(workingText);
2885
+ if (tagBasedThinking) {
2886
+ // Use newline to preserve formatting between chunks (not space)
2887
+ if (accumulatedThinkingContent.length > 0) {
2888
+ accumulatedThinkingContent += '\n';
2889
+ }
2890
+ accumulatedThinkingContent += tagBasedThinking.thought;
2891
+ }
2892
+ const kimiParsed = this.extractKimiToolCallsFromText(workingText);
2893
+ if (kimiParsed.toolCalls.length > 0) {
2894
+ parsedToolCalls.push(...kimiParsed.toolCalls);
2895
+ this.getLogger().debug(() => `[OpenAIProvider] Final buffer flush (pipeline) parsed Kimi tool calls`, {
2896
+ count: kimiParsed.toolCalls.length,
2897
+ bufferLength: workingText.length,
2898
+ cleanedLength: kimiParsed.cleanedText.length,
2899
+ });
2900
+ }
2901
+ workingText = kimiParsed.cleanedText;
2902
+ const parsingText = this.sanitizeProviderText(workingText);
2903
+ let cleanedText = parsingText;
2103
2904
  try {
2104
- const parsedResult = this.textToolParser.parse(textBuffer);
2905
+ const parsedResult = this.textToolParser.parse(parsingText);
2105
2906
  if (parsedResult.toolCalls.length > 0) {
2106
- // Convert parsed tool calls to ToolCallBlock format
2107
- parsedToolCalls = parsedResult.toolCalls.map((call) => ({
2907
+ parsedToolCalls.push(...parsedResult.toolCalls.map((call) => ({
2108
2908
  type: 'tool_call',
2109
- id: `text_tool_${Date.now()}_${Math.random().toString(36).substring(7)}`,
2110
- name: call.name,
2909
+ id: `text_tool_${Date.now()}_${Math.random()
2910
+ .toString(36)
2911
+ .substring(7)}`,
2912
+ name: this.normalizeToolName(call.name),
2111
2913
  parameters: call.arguments,
2112
- }));
2914
+ })));
2113
2915
  cleanedText = parsedResult.cleanedContent;
2114
2916
  }
2115
2917
  }
@@ -2117,14 +2919,33 @@ export class OpenAIProvider extends BaseProvider {
2117
2919
  const logger = this.getLogger();
2118
2920
  logger.debug(() => `TextToolCallParser failed on buffered text: ${error}`);
2119
2921
  }
2120
- // Emit tool calls from text parsing first
2922
+ // Emit accumulated thinking BEFORE tool calls or text content
2923
+ // @plan PLAN-20251202-THINKING.P16
2924
+ if (!hasEmittedThinking &&
2925
+ accumulatedThinkingContent.length > 0 &&
2926
+ (parsedToolCalls.length > 0 || cleanedText.trim().length > 0)) {
2927
+ yield {
2928
+ speaker: 'ai',
2929
+ blocks: [
2930
+ {
2931
+ type: 'thinking',
2932
+ thought: accumulatedThinkingContent,
2933
+ sourceField: 'think_tags',
2934
+ isHidden: false,
2935
+ },
2936
+ ],
2937
+ };
2938
+ hasEmittedThinking = true;
2939
+ }
2121
2940
  if (parsedToolCalls.length > 0) {
2122
2941
  yield {
2123
2942
  speaker: 'ai',
2124
2943
  blocks: parsedToolCalls,
2125
2944
  };
2126
2945
  }
2127
- // Then emit any remaining cleaned text
2946
+ // Always use sanitized text to strip <think> tags (pipeline final buffer)
2947
+ // Bug fix: Previously Kimi used unsanitized workingText
2948
+ // @plan PLAN-20251202-THINKING.P16
2128
2949
  if (cleanedText.trim().length > 0) {
2129
2950
  yield {
2130
2951
  speaker: 'ai',
@@ -2138,6 +2959,39 @@ export class OpenAIProvider extends BaseProvider {
2138
2959
  }
2139
2960
  textBuffer = '';
2140
2961
  }
2962
+ // Emit any remaining accumulated thinking that wasn't emitted yet
2963
+ // (e.g., if entire response was just thinking with no content)
2964
+ // @plan PLAN-20251202-THINKING.P16
2965
+ if (!hasEmittedThinking && accumulatedThinkingContent.length > 0) {
2966
+ yield {
2967
+ speaker: 'ai',
2968
+ blocks: [
2969
+ {
2970
+ type: 'thinking',
2971
+ thought: accumulatedThinkingContent,
2972
+ sourceField: 'think_tags',
2973
+ isHidden: false,
2974
+ },
2975
+ ],
2976
+ };
2977
+ hasEmittedThinking = true;
2978
+ }
2979
+ // Emit accumulated reasoning_content as ONE ThinkingBlock (pipeline path)
2980
+ // This consolidates token-by-token reasoning from Synthetic API into a single block
2981
+ // @plan PLAN-20251202-THINKING.P16
2982
+ if (accumulatedReasoningContent.length > 0) {
2983
+ yield {
2984
+ speaker: 'ai',
2985
+ blocks: [
2986
+ {
2987
+ type: 'thinking',
2988
+ thought: accumulatedReasoningContent,
2989
+ sourceField: 'reasoning_content',
2990
+ isHidden: false,
2991
+ },
2992
+ ],
2993
+ };
2994
+ }
2141
2995
  // Process and emit tool calls using the pipeline
2142
2996
  const pipelineResult = await this.toolCallPipeline.process(abortSignal);
2143
2997
  if (pipelineResult.normalized.length > 0 ||
@@ -2145,8 +2999,9 @@ export class OpenAIProvider extends BaseProvider {
2145
2999
  const blocks = [];
2146
3000
  // Process successful tool calls
2147
3001
  for (const normalizedCall of pipelineResult.normalized) {
3002
+ const sanitizedArgs = this.sanitizeToolArgumentsString(normalizedCall.originalArgs ?? normalizedCall.args);
2148
3003
  // Process tool parameters with double-escape handling
2149
- const processedParameters = processToolParameters(normalizedCall.originalArgs || JSON.stringify(normalizedCall.args), normalizedCall.name);
3004
+ const processedParameters = processToolParameters(sanitizedArgs, normalizedCall.name);
2150
3005
  blocks.push({
2151
3006
  type: 'tool_call',
2152
3007
  id: this.normalizeToHistoryToolId(`call_${normalizedCall.index}`),
@@ -2195,6 +3050,41 @@ export class OpenAIProvider extends BaseProvider {
2195
3050
  },
2196
3051
  };
2197
3052
  }
3053
+ // Detect and warn about empty streaming responses (common with Kimi K2 after tool calls)
3054
+ // Only warn if we truly got nothing - not even reasoning content
3055
+ const pipelineStats = this.toolCallPipeline.getStats();
3056
+ if (_accumulatedText.length === 0 &&
3057
+ pipelineStats.collector.totalCalls === 0 &&
3058
+ textBuffer.length === 0 &&
3059
+ accumulatedReasoningContent.length === 0 &&
3060
+ accumulatedThinkingContent.length === 0) {
3061
+ // Provide actionable guidance for users
3062
+ const isKimi = model.toLowerCase().includes('kimi');
3063
+ const isSynthetic = (baseURL ?? this.getBaseURL())?.includes('synthetic') ?? false;
3064
+ const troubleshooting = isKimi
3065
+ ? isSynthetic
3066
+ ? ' To fix: use streaming: "disabled" in your profile settings. Synthetic API streaming does not work reliably with tool calls.'
3067
+ : ' This provider may not support streaming with tool calls.'
3068
+ : ' Consider using streaming: "disabled" in your profile settings.';
3069
+ logger.warn(() => `[OpenAIProvider] Empty streaming response for model '${model}' (received ${allChunks.length} chunks with no content).${troubleshooting}`, {
3070
+ model,
3071
+ baseURL: baseURL ?? this.getBaseURL(),
3072
+ isKimiModel: isKimi,
3073
+ isSyntheticAPI: isSynthetic,
3074
+ totalChunksReceived: allChunks.length,
3075
+ });
3076
+ }
3077
+ else {
3078
+ // Log what we DID get for debugging
3079
+ logger.debug(() => `[Streaming pipeline] Stream completed with accumulated content`, {
3080
+ textLength: _accumulatedText.length,
3081
+ toolCallCount: pipelineStats.collector.totalCalls,
3082
+ textBufferLength: textBuffer.length,
3083
+ reasoningLength: accumulatedReasoningContent.length,
3084
+ thinkingLength: accumulatedThinkingContent.length,
3085
+ totalChunksReceived: allChunks.length,
3086
+ });
3087
+ }
2198
3088
  }
2199
3089
  else {
2200
3090
  // Handle non-streaming response
@@ -2220,12 +3110,22 @@ export class OpenAIProvider extends BaseProvider {
2220
3110
  }
2221
3111
  }
2222
3112
  const blocks = [];
2223
- // Handle text content
2224
- if (choice.message?.content) {
2225
- blocks.push({
2226
- type: 'text',
2227
- text: choice.message.content,
2228
- });
3113
+ // Handle text content (strip thinking / reasoning blocks) and Kimi tool sections
3114
+ const pipelineRawMessageContent = this.coerceMessageContentToString(choice.message?.content);
3115
+ let pipelineKimiCleanContent;
3116
+ let pipelineKimiToolBlocks = [];
3117
+ if (pipelineRawMessageContent) {
3118
+ const kimiParsed = this.extractKimiToolCallsFromText(pipelineRawMessageContent);
3119
+ pipelineKimiCleanContent = kimiParsed.cleanedText;
3120
+ pipelineKimiToolBlocks = kimiParsed.toolCalls;
3121
+ // Always use sanitized text - even Kimi-K2 should have consistent tag stripping
3122
+ const cleanedText = this.sanitizeProviderText(pipelineKimiCleanContent);
3123
+ if (cleanedText) {
3124
+ blocks.push({
3125
+ type: 'text',
3126
+ text: cleanedText,
3127
+ });
3128
+ }
2229
3129
  }
2230
3130
  // Handle tool calls
2231
3131
  if (choice.message?.tool_calls && choice.message.tool_calls.length > 0) {
@@ -2234,8 +3134,9 @@ export class OpenAIProvider extends BaseProvider {
2234
3134
  if (toolCall.type === 'function') {
2235
3135
  // Normalize tool name for consistency with streaming path
2236
3136
  const normalizedName = this.toolCallPipeline.normalizeToolName(toolCall.function.name, toolCall.function.arguments);
3137
+ const sanitizedArgs = this.sanitizeToolArgumentsString(toolCall.function.arguments);
2237
3138
  // Process tool parameters with double-escape handling
2238
- const processedParameters = processToolParameters(toolCall.function.arguments || '', normalizedName);
3139
+ const processedParameters = processToolParameters(sanitizedArgs, normalizedName);
2239
3140
  blocks.push({
2240
3141
  type: 'tool_call',
2241
3142
  id: this.normalizeToHistoryToolId(toolCall.id),
@@ -2245,42 +3146,48 @@ export class OpenAIProvider extends BaseProvider {
2245
3146
  }
2246
3147
  }
2247
3148
  }
3149
+ if (pipelineKimiToolBlocks.length > 0) {
3150
+ blocks.push(...pipelineKimiToolBlocks);
3151
+ this.getLogger().debug(() => `[OpenAIProvider] Non-stream pipeline added Kimi tool calls from text`, { count: pipelineKimiToolBlocks.length });
3152
+ }
2248
3153
  // Additionally check for <tool_call> format in text content
2249
- if (choice.message?.content &&
2250
- typeof choice.message.content === 'string') {
2251
- try {
2252
- const parsedResult = this.textToolParser.parse(choice.message.content);
2253
- if (parsedResult.toolCalls.length > 0) {
2254
- // Add tool calls found in text content
2255
- for (const call of parsedResult.toolCalls) {
2256
- blocks.push({
2257
- type: 'tool_call',
2258
- id: `text_tool_${Date.now()}_${Math.random().toString(36).substring(7)}`,
2259
- name: call.name,
2260
- parameters: call.arguments,
2261
- });
2262
- }
2263
- // Update the text content to remove the tool call parts
2264
- if (choice.message.content !== parsedResult.cleanedContent) {
2265
- // Find the text block and update it
2266
- const textBlockIndex = blocks.findIndex((block) => block.type === 'text');
2267
- if (textBlockIndex >= 0) {
2268
- blocks[textBlockIndex].text =
2269
- parsedResult.cleanedContent;
2270
- }
2271
- else if (parsedResult.cleanedContent.trim()) {
2272
- // Add cleaned text if it doesn't exist
2273
- blocks.unshift({
2274
- type: 'text',
2275
- text: parsedResult.cleanedContent,
3154
+ if (pipelineKimiCleanContent) {
3155
+ const cleanedSource = this.sanitizeProviderText(pipelineKimiCleanContent);
3156
+ if (cleanedSource) {
3157
+ try {
3158
+ const parsedResult = this.textToolParser.parse(cleanedSource);
3159
+ if (parsedResult.toolCalls.length > 0) {
3160
+ // Add tool calls found in text content
3161
+ for (const call of parsedResult.toolCalls) {
3162
+ blocks.push({
3163
+ type: 'tool_call',
3164
+ id: `text_tool_${Date.now()}_${Math.random().toString(36).substring(7)}`,
3165
+ name: this.normalizeToolName(call.name),
3166
+ parameters: call.arguments,
2276
3167
  });
2277
3168
  }
3169
+ // Update the text content to remove the tool call parts
3170
+ if (choice.message.content !== parsedResult.cleanedContent) {
3171
+ // Find the text block and update it
3172
+ const textBlockIndex = blocks.findIndex((block) => block.type === 'text');
3173
+ if (textBlockIndex >= 0) {
3174
+ blocks[textBlockIndex].text =
3175
+ parsedResult.cleanedContent;
3176
+ }
3177
+ else if (parsedResult.cleanedContent.trim()) {
3178
+ // Add cleaned text if it doesn't exist
3179
+ blocks.unshift({
3180
+ type: 'text',
3181
+ text: parsedResult.cleanedContent,
3182
+ });
3183
+ }
3184
+ }
2278
3185
  }
2279
3186
  }
2280
- }
2281
- catch (error) {
2282
- const logger = this.getLogger();
2283
- logger.debug(() => `TextToolCallParser failed on message content: ${error}`);
3187
+ catch (error) {
3188
+ const logger = this.getLogger();
3189
+ logger.debug(() => `TextToolCallParser failed on message content: ${error}`);
3190
+ }
2284
3191
  }
2285
3192
  }
2286
3193
  // Emit the complete response as a single IContent
@@ -2338,19 +3245,25 @@ export class OpenAIProvider extends BaseProvider {
2338
3245
  }
2339
3246
  /**
2340
3247
  * Detects the tool call format based on the model being used
2341
- * @returns The detected tool format ('openai' or 'qwen')
3248
+ * @returns The detected tool format ('openai', 'qwen', or 'kimi')
2342
3249
  */
2343
3250
  detectToolFormat() {
2344
3251
  // Auto-detect based on model name if set to 'auto' or not set
2345
- const modelName = (this.getModel() || this.getDefaultModel()).toLowerCase();
3252
+ const modelName = this.getModel() || this.getDefaultModel();
2346
3253
  const logger = new DebugLogger('llxprt:provider:openai');
3254
+ // Check for Kimi K2 models (requires special ID format: functions.{name}:{index})
3255
+ if (isKimiModel(modelName)) {
3256
+ logger.debug(() => `Auto-detected 'kimi' format for K2 model: ${modelName}`);
3257
+ return 'kimi';
3258
+ }
3259
+ const lowerModelName = modelName.toLowerCase();
2347
3260
  // Check for GLM-4 models (glm-4, glm-4.5, glm-4.6, glm-4-5, etc.)
2348
- if (modelName.includes('glm-4')) {
3261
+ if (lowerModelName.includes('glm-4')) {
2349
3262
  logger.debug(() => `Auto-detected 'qwen' format for GLM-4.x model: ${modelName}`);
2350
3263
  return 'qwen';
2351
3264
  }
2352
3265
  // Check for qwen models
2353
- if (modelName.includes('qwen')) {
3266
+ if (lowerModelName.includes('qwen')) {
2354
3267
  logger.debug(() => `Auto-detected 'qwen' format for Qwen model: ${modelName}`);
2355
3268
  return 'qwen';
2356
3269
  }
@@ -2421,5 +3334,61 @@ export class OpenAIProvider extends BaseProvider {
2421
3334
  }
2422
3335
  return shouldRetry;
2423
3336
  }
3337
+ /**
3338
+ * Parse reasoning_content from streaming delta.
3339
+ *
3340
+ * @plan PLAN-20251202-THINKING.P11, PLAN-20251202-THINKING.P16
3341
+ * @requirement REQ-THINK-003.1, REQ-THINK-003.3, REQ-THINK-003.4
3342
+ */
3343
+ parseStreamingReasoningDelta(delta) {
3344
+ if (!delta) {
3345
+ return null;
3346
+ }
3347
+ // Access reasoning_content via type assertion since OpenAI SDK doesn't declare it
3348
+ const reasoningContent = delta
3349
+ .reasoning_content;
3350
+ // Handle absent, null, or non-string
3351
+ if (!reasoningContent || typeof reasoningContent !== 'string') {
3352
+ return null;
3353
+ }
3354
+ // Handle empty string or whitespace-only
3355
+ if (reasoningContent.trim().length === 0) {
3356
+ return null;
3357
+ }
3358
+ return {
3359
+ type: 'thinking',
3360
+ thought: reasoningContent,
3361
+ sourceField: 'reasoning_content',
3362
+ isHidden: false,
3363
+ };
3364
+ }
3365
+ /**
3366
+ * Parse reasoning_content from non-streaming message.
3367
+ *
3368
+ * @plan PLAN-20251202-THINKING.P11, PLAN-20251202-THINKING.P16
3369
+ * @requirement REQ-THINK-003.2, REQ-THINK-003.3, REQ-THINK-003.4
3370
+ */
3371
+ parseNonStreamingReasoning(message) {
3372
+ if (!message) {
3373
+ return null;
3374
+ }
3375
+ // Access reasoning_content via type assertion since OpenAI SDK doesn't declare it
3376
+ const reasoningContent = message
3377
+ .reasoning_content;
3378
+ // Handle absent, null, or non-string
3379
+ if (!reasoningContent || typeof reasoningContent !== 'string') {
3380
+ return null;
3381
+ }
3382
+ // Handle empty string or whitespace-only
3383
+ if (reasoningContent.trim().length === 0) {
3384
+ return null;
3385
+ }
3386
+ return {
3387
+ type: 'thinking',
3388
+ thought: reasoningContent,
3389
+ sourceField: 'reasoning_content',
3390
+ isHidden: false,
3391
+ };
3392
+ }
2424
3393
  }
2425
3394
  //# sourceMappingURL=OpenAIProvider.js.map