@vybestack/llxprt-code-core 0.7.0-nightly.251209.0061bd6bf → 0.7.0-nightly.251211.134f1920b
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/src/config/profileManager.js +2 -0
- package/dist/src/config/profileManager.js.map +1 -1
- package/dist/src/prompt-config/prompt-resolver.js +4 -0
- package/dist/src/prompt-config/prompt-resolver.js.map +1 -1
- package/dist/src/providers/LoggingProviderWrapper.d.ts +2 -1
- package/dist/src/providers/LoggingProviderWrapper.js +16 -4
- package/dist/src/providers/LoggingProviderWrapper.js.map +1 -1
- package/dist/src/providers/ProviderManager.d.ts +6 -3
- package/dist/src/providers/ProviderManager.js +16 -4
- package/dist/src/providers/ProviderManager.js.map +1 -1
- package/dist/src/providers/openai/OpenAIProvider.d.ts +10 -2
- package/dist/src/providers/openai/OpenAIProvider.js +335 -136
- package/dist/src/providers/openai/OpenAIProvider.js.map +1 -1
- package/dist/src/providers/openai-vercel/OpenAIVercelProvider.d.ts +3 -0
- package/dist/src/providers/openai-vercel/OpenAIVercelProvider.js +255 -22
- package/dist/src/providers/openai-vercel/OpenAIVercelProvider.js.map +1 -1
- package/dist/src/providers/openai-vercel/messageConversion.d.ts +4 -1
- package/dist/src/providers/openai-vercel/messageConversion.js +41 -6
- package/dist/src/providers/openai-vercel/messageConversion.js.map +1 -1
- package/dist/src/providers/reasoning/reasoningUtils.d.ts +26 -1
- package/dist/src/providers/reasoning/reasoningUtils.js +157 -0
- package/dist/src/providers/reasoning/reasoningUtils.js.map +1 -1
- package/dist/src/providers/utils/cacheMetricsExtractor.d.ts +6 -0
- package/dist/src/providers/utils/cacheMetricsExtractor.js +36 -0
- package/dist/src/providers/utils/cacheMetricsExtractor.js.map +1 -0
- package/dist/src/services/history/IContent.d.ts +3 -7
- package/dist/src/services/history/IContent.js.map +1 -1
- package/dist/src/tools/IToolFormatter.d.ts +1 -1
- package/dist/src/tools/ToolIdStrategy.d.ts +25 -0
- package/dist/src/tools/ToolIdStrategy.js +108 -0
- package/dist/src/tools/ToolIdStrategy.js.map +1 -1
- package/dist/src/tools/task.js +14 -2
- package/dist/src/tools/task.js.map +1 -1
- package/dist/src/utils/generateContentResponseUtilities.js +6 -0
- package/dist/src/utils/generateContentResponseUtilities.js.map +1 -1
- package/package.json +1 -1
|
@@ -22,7 +22,7 @@ import crypto from 'node:crypto';
|
|
|
22
22
|
import * as http from 'http';
|
|
23
23
|
import * as https from 'https';
|
|
24
24
|
import * as net from 'net';
|
|
25
|
-
import { isKimiModel, getToolIdStrategy, } from '../../tools/ToolIdStrategy.js';
|
|
25
|
+
import { isKimiModel, isMistralModel, getToolIdStrategy, } from '../../tools/ToolIdStrategy.js';
|
|
26
26
|
import { BaseProvider, } from '../BaseProvider.js';
|
|
27
27
|
import { DebugLogger } from '../../debug/index.js';
|
|
28
28
|
import { ToolFormatter } from '../../tools/ToolFormatter.js';
|
|
@@ -40,6 +40,7 @@ import { buildToolResponsePayload, EMPTY_TOOL_RESULT_PLACEHOLDER, } from '../uti
|
|
|
40
40
|
import { isLocalEndpoint } from '../utils/localEndpoint.js';
|
|
41
41
|
import { filterThinkingForContext, thinkingToReasoningField, extractThinkingBlocks, } from '../reasoning/reasoningUtils.js';
|
|
42
42
|
import { shouldDumpSDKContext, dumpSDKContext, } from '../utils/dumpSDKContext.js';
|
|
43
|
+
import { extractCacheMetrics } from '../utils/cacheMetricsExtractor.js';
|
|
43
44
|
const MAX_TOOL_RESPONSE_CHARS = 1024;
|
|
44
45
|
const MAX_TOOL_RESPONSE_RETRY_CHARS = 512;
|
|
45
46
|
const TOOL_ARGS_PREVIEW_LENGTH = 500;
|
|
@@ -277,13 +278,12 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
277
278
|
// This preserves meaningful whitespace in regular text chunks during streaming
|
|
278
279
|
// (e.g., " 5 Biggest" should remain " 5 Biggest", not become "5 Biggest")
|
|
279
280
|
if (hadReasoningTags) {
|
|
280
|
-
//
|
|
281
|
+
// Collapse multiple spaces/tabs but preserve newlines for proper paragraph/line breaks
|
|
281
282
|
str = str.replace(/[ \t]+/g, ' ');
|
|
282
283
|
str = str.replace(/\n{3,}/g, '\n\n');
|
|
283
|
-
// Only trim leading whitespace
|
|
284
|
-
// This
|
|
285
|
-
|
|
286
|
-
str = str.trimStart();
|
|
284
|
+
// Only trim leading horizontal whitespace (spaces/tabs), NOT newlines
|
|
285
|
+
// This preserves line breaks between think tags and content (fixes #721)
|
|
286
|
+
str = str.replace(/^[ \t]+/, '');
|
|
287
287
|
}
|
|
288
288
|
const afterLen = str.length;
|
|
289
289
|
if (hadReasoningTags && afterLen !== beforeLen) {
|
|
@@ -438,62 +438,78 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
438
438
|
* and all tool info is only encoded in the text template.
|
|
439
439
|
*/
|
|
440
440
|
extractKimiToolCallsFromText(raw) {
|
|
441
|
-
if
|
|
441
|
+
// Return early only if input is null/undefined/empty
|
|
442
|
+
if (!raw) {
|
|
442
443
|
return { cleanedText: raw, toolCalls: [] };
|
|
443
444
|
}
|
|
444
445
|
const logger = this.getLogger();
|
|
445
446
|
const toolCalls = [];
|
|
446
447
|
let text = raw;
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
448
|
+
// Extract tool calls from complete sections if present
|
|
449
|
+
if (raw.includes('<|tool_calls_section_begin|>')) {
|
|
450
|
+
const sectionRegex = /<\|tool_calls_section_begin\|>([\s\S]*?)<\|tool_calls_section_end\|>/g;
|
|
451
|
+
text = text.replace(sectionRegex, (_sectionMatch, sectionBody) => {
|
|
452
|
+
try {
|
|
453
|
+
const callRegex = /<\|tool_call_begin\|>\s*([^<]+?)\s*<\|tool_call_argument_begin\|>\s*([\s\S]*?)\s*<\|tool_call_end\|>/g;
|
|
454
|
+
let m;
|
|
455
|
+
while ((m = callRegex.exec(sectionBody)) !== null) {
|
|
456
|
+
const rawId = m[1].trim();
|
|
457
|
+
const rawArgs = m[2].trim();
|
|
458
|
+
// Infer tool name from ID.
|
|
459
|
+
let toolName = '';
|
|
460
|
+
const match = /^functions\.([A-Za-z0-9_]+):\d+/i.exec(rawId) ||
|
|
461
|
+
/^[A-Za-z0-9_]+\.([A-Za-z0-9_]+):\d+/.exec(rawId);
|
|
462
|
+
if (match) {
|
|
463
|
+
toolName = match[1];
|
|
464
|
+
}
|
|
465
|
+
else {
|
|
466
|
+
const colonParts = rawId.split(':');
|
|
467
|
+
const head = colonParts[0] || rawId;
|
|
468
|
+
const dotParts = head.split('.');
|
|
469
|
+
toolName = dotParts[dotParts.length - 1] || head;
|
|
470
|
+
}
|
|
471
|
+
// Normalize tool name (handles Kimi-K2 style prefixes like call_functionsglob7)
|
|
472
|
+
toolName = this.normalizeToolName(toolName);
|
|
473
|
+
const sanitizedArgs = this.sanitizeToolArgumentsString(rawArgs);
|
|
474
|
+
const processedParameters = processToolParameters(sanitizedArgs, toolName);
|
|
475
|
+
toolCalls.push({
|
|
476
|
+
type: 'tool_call',
|
|
477
|
+
id: this.normalizeToHistoryToolId(rawId),
|
|
478
|
+
name: toolName,
|
|
479
|
+
parameters: processedParameters,
|
|
480
|
+
});
|
|
467
481
|
}
|
|
468
|
-
// Normalize tool name (handles Kimi-K2 style prefixes like call_functionsglob7)
|
|
469
|
-
toolName = this.normalizeToolName(toolName);
|
|
470
|
-
const sanitizedArgs = this.sanitizeToolArgumentsString(rawArgs);
|
|
471
|
-
const processedParameters = processToolParameters(sanitizedArgs, toolName);
|
|
472
|
-
toolCalls.push({
|
|
473
|
-
type: 'tool_call',
|
|
474
|
-
id: this.normalizeToHistoryToolId(rawId),
|
|
475
|
-
name: toolName,
|
|
476
|
-
parameters: processedParameters,
|
|
477
|
-
});
|
|
478
482
|
}
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
return '';
|
|
485
|
-
});
|
|
486
|
-
if (toolCalls.length > 0) {
|
|
487
|
-
logger.debug(() => `[OpenAIProvider] Parsed Kimi tool_calls_section`, {
|
|
488
|
-
toolCallCount: toolCalls.length,
|
|
489
|
-
originalLength: raw.length,
|
|
490
|
-
cleanedLength: text.length,
|
|
483
|
+
catch (err) {
|
|
484
|
+
logger.debug(() => `[OpenAIProvider] Failed to parse Kimi tool_calls_section: ${err}`);
|
|
485
|
+
}
|
|
486
|
+
// Strip the entire tool section from user-visible text
|
|
487
|
+
return '';
|
|
491
488
|
});
|
|
489
|
+
if (toolCalls.length > 0) {
|
|
490
|
+
logger.debug(() => `[OpenAIProvider] Parsed Kimi tool_calls_section`, {
|
|
491
|
+
toolCallCount: toolCalls.length,
|
|
492
|
+
originalLength: raw.length,
|
|
493
|
+
cleanedLength: text.length,
|
|
494
|
+
});
|
|
495
|
+
}
|
|
492
496
|
}
|
|
497
|
+
// ALWAYS run stray token cleanup, even if no complete sections were found
|
|
498
|
+
// This handles partial sections, malformed tokens, orphaned markers, etc.
|
|
499
|
+
text = text.replace(/<\|tool_call(?:_(?:begin|end|argument_begin))?\|>/g, '');
|
|
500
|
+
text = text.replace(/<\|tool_calls_section_(?:begin|end)\|>/g, '');
|
|
493
501
|
// Don't trim - preserve leading/trailing newlines that are important for formatting
|
|
494
502
|
// (e.g., numbered lists from Kimi K2 that have newlines between items)
|
|
495
503
|
return { cleanedText: text, toolCalls };
|
|
496
504
|
}
|
|
505
|
+
/**
|
|
506
|
+
* Clean Kimi K2 tool call tokens from thinking content.
|
|
507
|
+
* Used when extracting thinking from <think> tags that may contain embedded tool calls.
|
|
508
|
+
* @issue #749
|
|
509
|
+
*/
|
|
510
|
+
cleanThinkingContent(thought) {
|
|
511
|
+
return this.extractKimiToolCallsFromText(thought).cleanedText;
|
|
512
|
+
}
|
|
497
513
|
/**
|
|
498
514
|
* @plan:PLAN-20251023-STATELESS-HARDENING.P09
|
|
499
515
|
* @requirement:REQ-SP4-002
|
|
@@ -910,9 +926,12 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
910
926
|
}
|
|
911
927
|
else {
|
|
912
928
|
// Assistant message with tool calls
|
|
929
|
+
// CRITICAL for Mistral API compatibility (#760):
|
|
930
|
+
// When tool_calls are present, we must NOT include a content property at all
|
|
931
|
+
// (not even null). Mistral's OpenAI-compatible API requires this.
|
|
932
|
+
// See: https://docs.mistral.ai/capabilities/function_calling
|
|
913
933
|
messages.push({
|
|
914
934
|
role: 'assistant',
|
|
915
|
-
content: text || null,
|
|
916
935
|
tool_calls: toolCalls.map((tc) => ({
|
|
917
936
|
id: this.normalizeToOpenAIToolId(tc.id),
|
|
918
937
|
type: 'function',
|
|
@@ -948,10 +967,16 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
948
967
|
}
|
|
949
968
|
else {
|
|
950
969
|
for (const tr of toolResponses) {
|
|
970
|
+
// CRITICAL for Mistral API compatibility (#760):
|
|
971
|
+
// Tool messages must include a name field matching the function name.
|
|
972
|
+
// See: https://docs.mistral.ai/capabilities/function_calling
|
|
973
|
+
// Note: The OpenAI SDK types don't include name, but Mistral requires it.
|
|
974
|
+
// We use a type assertion to add this required field.
|
|
951
975
|
messages.push({
|
|
952
976
|
role: 'tool',
|
|
953
977
|
content: this.buildToolResponseContent(tr, config),
|
|
954
978
|
tool_call_id: this.normalizeToOpenAIToolId(tr.callId),
|
|
979
|
+
name: tr.toolName,
|
|
955
980
|
});
|
|
956
981
|
}
|
|
957
982
|
}
|
|
@@ -977,8 +1002,9 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
977
1002
|
const messages = [];
|
|
978
1003
|
// Create a ToolIdMapper based on the tool format
|
|
979
1004
|
// For Kimi K2, this generates sequential IDs in the format functions.{name}:{index}
|
|
980
|
-
|
|
981
|
-
|
|
1005
|
+
// For Mistral, this generates 9-char alphanumeric IDs
|
|
1006
|
+
const toolIdMapper = toolFormat === 'kimi' || toolFormat === 'mistral'
|
|
1007
|
+
? getToolIdStrategy(toolFormat).createMapper(filteredContents)
|
|
982
1008
|
: null;
|
|
983
1009
|
// Helper to resolve tool call IDs based on format
|
|
984
1010
|
const resolveToolCallId = (tc) => {
|
|
@@ -1014,9 +1040,12 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
1014
1040
|
const toolCalls = content.blocks.filter((b) => b.type === 'tool_call');
|
|
1015
1041
|
if (toolCalls.length > 0) {
|
|
1016
1042
|
// Assistant message with tool calls
|
|
1043
|
+
// CRITICAL for Mistral API compatibility (#760):
|
|
1044
|
+
// When tool_calls are present, we must NOT include a content property at all
|
|
1045
|
+
// (not even null). Mistral's OpenAI-compatible API requires this.
|
|
1046
|
+
// See: https://docs.mistral.ai/capabilities/function_calling
|
|
1017
1047
|
const baseMessage = {
|
|
1018
1048
|
role: 'assistant',
|
|
1019
|
-
content: text || null,
|
|
1020
1049
|
tool_calls: toolCalls.map((tc) => ({
|
|
1021
1050
|
id: resolveToolCallId(tc),
|
|
1022
1051
|
type: 'function',
|
|
@@ -1057,10 +1086,16 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
1057
1086
|
// Convert tool responses
|
|
1058
1087
|
const toolResponses = content.blocks.filter((b) => b.type === 'tool_response');
|
|
1059
1088
|
for (const tr of toolResponses) {
|
|
1089
|
+
// CRITICAL for Mistral API compatibility (#760):
|
|
1090
|
+
// Tool messages must include a name field matching the function name.
|
|
1091
|
+
// See: https://docs.mistral.ai/capabilities/function_calling
|
|
1092
|
+
// Note: The OpenAI SDK types don't include name, but Mistral requires it.
|
|
1093
|
+
// We use a type assertion to add this required field.
|
|
1060
1094
|
messages.push({
|
|
1061
1095
|
role: 'tool',
|
|
1062
1096
|
content: this.buildToolResponseContent(tr, options.config),
|
|
1063
1097
|
tool_call_id: resolveToolResponseId(tr),
|
|
1098
|
+
name: tr.toolName,
|
|
1064
1099
|
});
|
|
1065
1100
|
}
|
|
1066
1101
|
}
|
|
@@ -1506,9 +1541,9 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
1506
1541
|
// Buffer for accumulating text chunks for providers that need it
|
|
1507
1542
|
let textBuffer = '';
|
|
1508
1543
|
// Use the same detected format from earlier for consistency
|
|
1509
|
-
const
|
|
1544
|
+
const isKimiK2Model = model.toLowerCase().includes('kimi-k2');
|
|
1510
1545
|
// Buffer text for Qwen format providers and Kimi-K2 to avoid stanza formatting
|
|
1511
|
-
const shouldBufferText = detectedFormat === 'qwen' ||
|
|
1546
|
+
const shouldBufferText = detectedFormat === 'qwen' || isKimiK2Model;
|
|
1512
1547
|
// Accumulate thinking content across the entire stream to emit as ONE block
|
|
1513
1548
|
// This handles fragmented <think>word</think> streaming from Synthetic API
|
|
1514
1549
|
// @plan PLAN-20251202-THINKING.P16
|
|
@@ -1575,12 +1610,29 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
1575
1610
|
continue;
|
|
1576
1611
|
// Parse reasoning_content from streaming delta (Phase 16 integration)
|
|
1577
1612
|
// ACCUMULATE instead of yielding immediately to handle token-by-token streaming
|
|
1613
|
+
// Extract embedded Kimi K2 tool calls from reasoning_content (fixes #749)
|
|
1578
1614
|
// @plan PLAN-20251202-THINKING.P16
|
|
1579
|
-
|
|
1615
|
+
// @requirement REQ-KIMI-REASONING-001.1
|
|
1616
|
+
const { thinking: reasoningBlock, toolCalls: reasoningToolCalls } = this.parseStreamingReasoningDelta(choice.delta);
|
|
1580
1617
|
if (reasoningBlock) {
|
|
1581
1618
|
// Accumulate reasoning content - will emit ONE block later
|
|
1582
1619
|
accumulatedReasoningContent += reasoningBlock.thought;
|
|
1583
1620
|
}
|
|
1621
|
+
// Accumulate tool calls extracted from reasoning_content
|
|
1622
|
+
if (reasoningToolCalls.length > 0) {
|
|
1623
|
+
for (const toolCall of reasoningToolCalls) {
|
|
1624
|
+
// Convert ToolCallBlock to accumulated format
|
|
1625
|
+
const index = accumulatedToolCalls.length;
|
|
1626
|
+
accumulatedToolCalls[index] = {
|
|
1627
|
+
id: toolCall.id,
|
|
1628
|
+
type: 'function',
|
|
1629
|
+
function: {
|
|
1630
|
+
name: toolCall.name,
|
|
1631
|
+
arguments: JSON.stringify(toolCall.parameters),
|
|
1632
|
+
},
|
|
1633
|
+
};
|
|
1634
|
+
}
|
|
1635
|
+
}
|
|
1584
1636
|
// Check for finish_reason to detect proper stream ending
|
|
1585
1637
|
if (choice.finish_reason) {
|
|
1586
1638
|
logger.debug(() => `[Streaming] Stream finished with reason: ${choice.finish_reason}`, {
|
|
@@ -1601,13 +1653,25 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
1601
1653
|
}
|
|
1602
1654
|
// Handle text content - buffer for Qwen format, emit immediately for others
|
|
1603
1655
|
// Note: Synthetic API sends content that may duplicate reasoning_content.
|
|
1604
|
-
//
|
|
1656
|
+
// We now filter duplicates by tracking when content starts matching reasoning_content.
|
|
1657
|
+
// fixes #721
|
|
1605
1658
|
// @plan PLAN-20251202-THINKING.P16
|
|
1606
1659
|
const rawDeltaContent = this.coerceMessageContentToString(choice.delta?.content);
|
|
1607
1660
|
if (rawDeltaContent) {
|
|
1608
|
-
|
|
1609
|
-
|
|
1610
|
-
|
|
1661
|
+
// For Kimi models, we need to buffer the RAW content without processing
|
|
1662
|
+
// because Kimi tokens stream incrementally and partial tokens would leak
|
|
1663
|
+
// through if we try to process them immediately. The buffer will be
|
|
1664
|
+
// processed when flushed (at sentence boundaries or end of stream).
|
|
1665
|
+
let deltaContent;
|
|
1666
|
+
if (isKimiK2Model) {
|
|
1667
|
+
// For Kimi: Don't process yet - just pass through and let buffering handle it
|
|
1668
|
+
// We'll extract tool calls and sanitize when we flush the buffer
|
|
1669
|
+
deltaContent = rawDeltaContent;
|
|
1670
|
+
}
|
|
1671
|
+
else {
|
|
1672
|
+
// For non-Kimi models: sanitize immediately as before
|
|
1673
|
+
deltaContent = this.sanitizeProviderText(rawDeltaContent);
|
|
1674
|
+
}
|
|
1611
1675
|
if (!deltaContent) {
|
|
1612
1676
|
continue;
|
|
1613
1677
|
}
|
|
@@ -1623,9 +1687,9 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
1623
1687
|
});
|
|
1624
1688
|
// Buffer text to avoid stanza formatting
|
|
1625
1689
|
textBuffer += deltaContent;
|
|
1626
|
-
const
|
|
1627
|
-
const
|
|
1628
|
-
const hasOpenKimiSection =
|
|
1690
|
+
const kimiBeginCount = (textBuffer.match(/<\|tool_calls_section_begin\|>/g) || []).length;
|
|
1691
|
+
const kimiEndCount = (textBuffer.match(/<\|tool_calls_section_end\|>/g) || []).length;
|
|
1692
|
+
const hasOpenKimiSection = kimiBeginCount > kimiEndCount;
|
|
1629
1693
|
// Emit buffered text when we have a complete sentence or paragraph
|
|
1630
1694
|
// Look for natural break points, but avoid flushing mid Kimi section
|
|
1631
1695
|
if (!hasOpenKimiSection &&
|
|
@@ -1642,12 +1706,14 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
1642
1706
|
// @requirement REQ-THINK-003
|
|
1643
1707
|
const tagBasedThinking = this.extractThinkTagsAsBlock(workingText);
|
|
1644
1708
|
if (tagBasedThinking) {
|
|
1709
|
+
// Clean Kimi tokens from thinking content before accumulating
|
|
1710
|
+
const cleanedThought = this.cleanThinkingContent(tagBasedThinking.thought);
|
|
1645
1711
|
// Accumulate thinking content - don't emit yet
|
|
1646
1712
|
// Use newline to preserve formatting between chunks (not space)
|
|
1647
1713
|
if (accumulatedThinkingContent.length > 0) {
|
|
1648
1714
|
accumulatedThinkingContent += '\n';
|
|
1649
1715
|
}
|
|
1650
|
-
accumulatedThinkingContent +=
|
|
1716
|
+
accumulatedThinkingContent += cleanedThought;
|
|
1651
1717
|
logger.debug(() => `[Streaming legacy] Accumulated thinking: ${accumulatedThinkingContent.length} chars total`);
|
|
1652
1718
|
}
|
|
1653
1719
|
const kimiParsed = this.extractKimiToolCallsFromText(workingText);
|
|
@@ -1709,7 +1775,10 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
1709
1775
|
// Always use sanitized text to strip <think> tags (legacy streaming)
|
|
1710
1776
|
// Bug fix: Previously Kimi used unsanitized workingText
|
|
1711
1777
|
// @plan PLAN-20251202-THINKING.P16
|
|
1712
|
-
|
|
1778
|
+
// Bug fix #721: Emit whitespace-only chunks (e.g., " " between words)
|
|
1779
|
+
// Previously we used cleanedText.trim().length > 0 which dropped spaces,
|
|
1780
|
+
// causing "list 5" to become "list5". Now we emit any non-empty cleanedText.
|
|
1781
|
+
if (cleanedText.length > 0) {
|
|
1713
1782
|
yield {
|
|
1714
1783
|
speaker: 'ai',
|
|
1715
1784
|
blocks: [
|
|
@@ -1828,11 +1897,13 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
1828
1897
|
// @plan PLAN-20251202-THINKING.P16
|
|
1829
1898
|
const tagBasedThinking = this.extractThinkTagsAsBlock(workingText);
|
|
1830
1899
|
if (tagBasedThinking) {
|
|
1900
|
+
// Clean Kimi tokens from thinking content before accumulating
|
|
1901
|
+
const cleanedThought = this.cleanThinkingContent(tagBasedThinking.thought);
|
|
1831
1902
|
// Use newline to preserve formatting between chunks (not space)
|
|
1832
1903
|
if (accumulatedThinkingContent.length > 0) {
|
|
1833
1904
|
accumulatedThinkingContent += '\n';
|
|
1834
1905
|
}
|
|
1835
|
-
accumulatedThinkingContent +=
|
|
1906
|
+
accumulatedThinkingContent += cleanedThought;
|
|
1836
1907
|
}
|
|
1837
1908
|
const kimiParsed = this.extractKimiToolCallsFromText(workingText);
|
|
1838
1909
|
if (kimiParsed.toolCalls.length > 0) {
|
|
@@ -1891,7 +1962,10 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
1891
1962
|
// Always use sanitized text to strip <think> tags (legacy final buffer)
|
|
1892
1963
|
// Bug fix: Previously Kimi used unsanitized workingText
|
|
1893
1964
|
// @plan PLAN-20251202-THINKING.P16
|
|
1894
|
-
|
|
1965
|
+
// Bug fix #721: Emit whitespace-only chunks (e.g., " " between words)
|
|
1966
|
+
// Previously we used cleanedText.trim().length > 0 which dropped spaces,
|
|
1967
|
+
// causing "list 5" to become "list5". Now we emit any non-empty cleanedText.
|
|
1968
|
+
if (cleanedText.length > 0) {
|
|
1895
1969
|
yield {
|
|
1896
1970
|
speaker: 'ai',
|
|
1897
1971
|
blocks: [
|
|
@@ -1923,19 +1997,32 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
1923
1997
|
}
|
|
1924
1998
|
// Emit accumulated reasoning_content as ONE ThinkingBlock (legacy path)
|
|
1925
1999
|
// This consolidates token-by-token reasoning from Synthetic API into a single block
|
|
2000
|
+
// Clean Kimi tokens from the accumulated content (not per-chunk) to handle split tokens
|
|
1926
2001
|
// @plan PLAN-20251202-THINKING.P16
|
|
1927
2002
|
if (accumulatedReasoningContent.length > 0) {
|
|
1928
|
-
|
|
1929
|
-
|
|
1930
|
-
|
|
1931
|
-
|
|
1932
|
-
|
|
1933
|
-
|
|
1934
|
-
|
|
1935
|
-
|
|
1936
|
-
|
|
1937
|
-
|
|
1938
|
-
|
|
2003
|
+
// Extract Kimi tool calls from the complete accumulated reasoning content
|
|
2004
|
+
const { cleanedText: cleanedReasoning, toolCalls: reasoningToolCalls } = this.extractKimiToolCallsFromText(accumulatedReasoningContent);
|
|
2005
|
+
// Emit the cleaned thinking block
|
|
2006
|
+
if (cleanedReasoning.length > 0) {
|
|
2007
|
+
yield {
|
|
2008
|
+
speaker: 'ai',
|
|
2009
|
+
blocks: [
|
|
2010
|
+
{
|
|
2011
|
+
type: 'thinking',
|
|
2012
|
+
thought: cleanedReasoning,
|
|
2013
|
+
sourceField: 'reasoning_content',
|
|
2014
|
+
isHidden: false,
|
|
2015
|
+
},
|
|
2016
|
+
],
|
|
2017
|
+
};
|
|
2018
|
+
}
|
|
2019
|
+
// Emit any tool calls extracted from reasoning content
|
|
2020
|
+
if (reasoningToolCalls.length > 0) {
|
|
2021
|
+
yield {
|
|
2022
|
+
speaker: 'ai',
|
|
2023
|
+
blocks: reasoningToolCalls,
|
|
2024
|
+
};
|
|
2025
|
+
}
|
|
1939
2026
|
}
|
|
1940
2027
|
// Process and emit tool calls using legacy accumulated approach
|
|
1941
2028
|
if (accumulatedToolCalls.length > 0) {
|
|
@@ -1962,6 +2049,7 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
1962
2049
|
};
|
|
1963
2050
|
// Add usage metadata if we captured it from streaming
|
|
1964
2051
|
if (streamingUsage) {
|
|
2052
|
+
const cacheMetrics = extractCacheMetrics(streamingUsage);
|
|
1965
2053
|
toolCallsContent.metadata = {
|
|
1966
2054
|
usage: {
|
|
1967
2055
|
promptTokens: streamingUsage.prompt_tokens || 0,
|
|
@@ -1969,6 +2057,9 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
1969
2057
|
totalTokens: streamingUsage.total_tokens ||
|
|
1970
2058
|
(streamingUsage.prompt_tokens || 0) +
|
|
1971
2059
|
(streamingUsage.completion_tokens || 0),
|
|
2060
|
+
cachedTokens: cacheMetrics.cachedTokens,
|
|
2061
|
+
cacheCreationTokens: cacheMetrics.cacheCreationTokens,
|
|
2062
|
+
cacheMissTokens: cacheMetrics.cacheMissTokens,
|
|
1972
2063
|
},
|
|
1973
2064
|
};
|
|
1974
2065
|
}
|
|
@@ -1977,6 +2068,7 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
1977
2068
|
}
|
|
1978
2069
|
// If we have usage information but no tool calls, emit a metadata-only response
|
|
1979
2070
|
if (streamingUsage && accumulatedToolCalls.length === 0) {
|
|
2071
|
+
const cacheMetrics = extractCacheMetrics(streamingUsage);
|
|
1980
2072
|
yield {
|
|
1981
2073
|
speaker: 'ai',
|
|
1982
2074
|
blocks: [],
|
|
@@ -1987,6 +2079,9 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
1987
2079
|
totalTokens: streamingUsage.total_tokens ||
|
|
1988
2080
|
(streamingUsage.prompt_tokens || 0) +
|
|
1989
2081
|
(streamingUsage.completion_tokens || 0),
|
|
2082
|
+
cachedTokens: cacheMetrics.cachedTokens,
|
|
2083
|
+
cacheCreationTokens: cacheMetrics.cacheCreationTokens,
|
|
2084
|
+
cacheMissTokens: cacheMetrics.cacheMissTokens,
|
|
1990
2085
|
},
|
|
1991
2086
|
},
|
|
1992
2087
|
};
|
|
@@ -2051,8 +2146,10 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
2051
2146
|
}
|
|
2052
2147
|
const blocks = [];
|
|
2053
2148
|
// Parse reasoning_content from response (Phase 16 integration)
|
|
2054
|
-
|
|
2055
|
-
|
|
2149
|
+
// Extract embedded Kimi K2 tool calls from reasoning_content (fixes #749)
|
|
2150
|
+
// @requirement REQ-KIMI-REASONING-001.2
|
|
2151
|
+
const { thinking: reasoningBlock, toolCalls: reasoningToolCalls } = this.parseNonStreamingReasoning(choice.message);
|
|
2152
|
+
logger.debug(() => `[Non-streaming] parseNonStreamingReasoning result: ${reasoningBlock ? `found (${reasoningBlock.thought?.length} chars)` : 'not found'}, tool calls: ${reasoningToolCalls.length}`, {
|
|
2056
2153
|
hasReasoningContent: 'reasoning_content' in
|
|
2057
2154
|
(choice.message ?? {}),
|
|
2058
2155
|
messageKeys: Object.keys(choice.message ?? {}),
|
|
@@ -2060,6 +2157,11 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
2060
2157
|
if (reasoningBlock) {
|
|
2061
2158
|
blocks.push(reasoningBlock);
|
|
2062
2159
|
}
|
|
2160
|
+
// Add tool calls extracted from reasoning_content
|
|
2161
|
+
if (reasoningToolCalls.length > 0) {
|
|
2162
|
+
blocks.push(...reasoningToolCalls);
|
|
2163
|
+
logger.debug(() => `[Non-streaming] Added ${reasoningToolCalls.length} tool calls from reasoning_content`);
|
|
2164
|
+
}
|
|
2063
2165
|
// Handle text content (strip thinking / reasoning blocks) and Kimi tool sections
|
|
2064
2166
|
const rawMessageContent = this.coerceMessageContentToString(choice.message?.content);
|
|
2065
2167
|
let kimiCleanContent;
|
|
@@ -2163,6 +2265,7 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
2163
2265
|
};
|
|
2164
2266
|
// Add usage metadata from non-streaming response
|
|
2165
2267
|
if (completion.usage) {
|
|
2268
|
+
const cacheMetrics = extractCacheMetrics(completion.usage);
|
|
2166
2269
|
responseContent.metadata = {
|
|
2167
2270
|
usage: {
|
|
2168
2271
|
promptTokens: completion.usage.prompt_tokens || 0,
|
|
@@ -2170,6 +2273,9 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
2170
2273
|
totalTokens: completion.usage.total_tokens ||
|
|
2171
2274
|
(completion.usage.prompt_tokens || 0) +
|
|
2172
2275
|
(completion.usage.completion_tokens || 0),
|
|
2276
|
+
cachedTokens: cacheMetrics.cachedTokens,
|
|
2277
|
+
cacheCreationTokens: cacheMetrics.cacheCreationTokens,
|
|
2278
|
+
cacheMissTokens: cacheMetrics.cacheMissTokens,
|
|
2173
2279
|
},
|
|
2174
2280
|
};
|
|
2175
2281
|
}
|
|
@@ -2177,6 +2283,7 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
2177
2283
|
}
|
|
2178
2284
|
else if (completion.usage) {
|
|
2179
2285
|
// Emit metadata-only response if no content blocks but have usage info
|
|
2286
|
+
const cacheMetrics = extractCacheMetrics(completion.usage);
|
|
2180
2287
|
yield {
|
|
2181
2288
|
speaker: 'ai',
|
|
2182
2289
|
blocks: [],
|
|
@@ -2187,6 +2294,9 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
2187
2294
|
totalTokens: completion.usage.total_tokens ||
|
|
2188
2295
|
(completion.usage.prompt_tokens || 0) +
|
|
2189
2296
|
(completion.usage.completion_tokens || 0),
|
|
2297
|
+
cachedTokens: cacheMetrics.cachedTokens,
|
|
2298
|
+
cacheCreationTokens: cacheMetrics.cacheCreationTokens,
|
|
2299
|
+
cacheMissTokens: cacheMetrics.cacheMissTokens,
|
|
2190
2300
|
},
|
|
2191
2301
|
},
|
|
2192
2302
|
};
|
|
@@ -2459,7 +2569,7 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
2459
2569
|
});
|
|
2460
2570
|
// Dump successful streaming request if enabled
|
|
2461
2571
|
if (shouldDumpSuccess) {
|
|
2462
|
-
await dumpSDKContext('openai', '/
|
|
2572
|
+
await dumpSDKContext('openai', '/chat/completions', requestBody, { streaming: true }, false, baseURL || 'https://api.openai.com/v1');
|
|
2463
2573
|
}
|
|
2464
2574
|
break;
|
|
2465
2575
|
}
|
|
@@ -2492,7 +2602,7 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
2492
2602
|
// Dump error if enabled
|
|
2493
2603
|
if (shouldDumpError) {
|
|
2494
2604
|
const dumpErrorMessage = error instanceof Error ? error.message : String(error);
|
|
2495
|
-
await dumpSDKContext('openai', '/
|
|
2605
|
+
await dumpSDKContext('openai', '/chat/completions', requestBody, { error: dumpErrorMessage }, true, baseURL || 'https://api.openai.com/v1');
|
|
2496
2606
|
}
|
|
2497
2607
|
// Re-throw other errors as-is
|
|
2498
2608
|
const capturedErrorMessage = error instanceof Error ? error.message : String(error);
|
|
@@ -2530,7 +2640,7 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
2530
2640
|
}));
|
|
2531
2641
|
// Dump successful non-streaming request if enabled
|
|
2532
2642
|
if (shouldDumpSuccess) {
|
|
2533
|
-
await dumpSDKContext('openai', '/
|
|
2643
|
+
await dumpSDKContext('openai', '/chat/completions', requestBody, response, false, baseURL || 'https://api.openai.com/v1');
|
|
2534
2644
|
}
|
|
2535
2645
|
break;
|
|
2536
2646
|
}
|
|
@@ -2569,7 +2679,7 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
2569
2679
|
// Dump error if enabled
|
|
2570
2680
|
if (shouldDumpError) {
|
|
2571
2681
|
const dumpErrorMessage = error instanceof Error ? error.message : String(error);
|
|
2572
|
-
await dumpSDKContext('openai', '/
|
|
2682
|
+
await dumpSDKContext('openai', '/chat/completions', requestBody, { error: dumpErrorMessage }, true, baseURL || 'https://api.openai.com/v1');
|
|
2573
2683
|
}
|
|
2574
2684
|
const capturedErrorMessage = error instanceof Error ? error.message : String(error);
|
|
2575
2685
|
const status = typeof error === 'object' &&
|
|
@@ -2599,9 +2709,9 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
2599
2709
|
// Buffer for accumulating text chunks for providers that need it
|
|
2600
2710
|
let textBuffer = '';
|
|
2601
2711
|
// Use the same detected format from earlier for consistency
|
|
2602
|
-
const
|
|
2712
|
+
const isKimiK2Model = model.toLowerCase().includes('kimi-k2');
|
|
2603
2713
|
// Buffer text for Qwen format providers and Kimi-K2 to avoid stanza formatting
|
|
2604
|
-
const shouldBufferText = detectedFormat === 'qwen' ||
|
|
2714
|
+
const shouldBufferText = detectedFormat === 'qwen' || isKimiK2Model;
|
|
2605
2715
|
// Accumulate thinking content across the entire stream to emit as ONE block
|
|
2606
2716
|
// This handles fragmented <think>word</think> streaming from Synthetic API
|
|
2607
2717
|
// @plan PLAN-20251202-THINKING.P16
|
|
@@ -2671,13 +2781,28 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
2671
2781
|
continue;
|
|
2672
2782
|
// Parse reasoning_content from streaming delta (Pipeline path)
|
|
2673
2783
|
// ACCUMULATE instead of yielding immediately to handle token-by-token streaming
|
|
2784
|
+
// Extract embedded Kimi K2 tool calls from reasoning_content (fixes #749)
|
|
2674
2785
|
// @plan PLAN-20251202-THINKING.P16
|
|
2675
|
-
// @requirement REQ-THINK-003.1
|
|
2676
|
-
const reasoningBlock = this.parseStreamingReasoningDelta(choice.delta);
|
|
2786
|
+
// @requirement REQ-THINK-003.1, REQ-KIMI-REASONING-001.1
|
|
2787
|
+
const { thinking: reasoningBlock, toolCalls: reasoningToolCalls } = this.parseStreamingReasoningDelta(choice.delta);
|
|
2677
2788
|
if (reasoningBlock) {
|
|
2678
2789
|
// Accumulate reasoning content - will emit ONE block later
|
|
2679
2790
|
accumulatedReasoningContent += reasoningBlock.thought;
|
|
2680
2791
|
}
|
|
2792
|
+
// Add tool calls extracted from reasoning_content to pipeline
|
|
2793
|
+
if (reasoningToolCalls.length > 0) {
|
|
2794
|
+
// Get current pipeline stats to determine next index
|
|
2795
|
+
const stats = this.toolCallPipeline.getStats();
|
|
2796
|
+
let baseIndex = stats.collector.totalCalls;
|
|
2797
|
+
for (const toolCall of reasoningToolCalls) {
|
|
2798
|
+
// Add complete tool call as fragments to pipeline
|
|
2799
|
+
this.toolCallPipeline.addFragment(baseIndex, {
|
|
2800
|
+
name: toolCall.name,
|
|
2801
|
+
args: JSON.stringify(toolCall.parameters),
|
|
2802
|
+
});
|
|
2803
|
+
baseIndex++;
|
|
2804
|
+
}
|
|
2805
|
+
}
|
|
2681
2806
|
// Check for finish_reason to detect proper stream ending
|
|
2682
2807
|
if (choice.finish_reason) {
|
|
2683
2808
|
logger.debug(() => `[Streaming] Stream finished with reason: ${choice.finish_reason}`, {
|
|
@@ -2698,13 +2823,24 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
2698
2823
|
}
|
|
2699
2824
|
// Handle text content - buffer for Qwen format, emit immediately for others
|
|
2700
2825
|
// Note: Synthetic API sends content that may duplicate reasoning_content.
|
|
2701
|
-
// This is the model's behavior - we don't filter it here.
|
|
2826
|
+
// This is the model's behavior - we don't filter it here as detection is unreliable.
|
|
2702
2827
|
// @plan PLAN-20251202-THINKING.P16
|
|
2703
2828
|
const rawDeltaContent = this.coerceMessageContentToString(choice.delta?.content);
|
|
2704
2829
|
if (rawDeltaContent) {
|
|
2705
|
-
|
|
2706
|
-
|
|
2707
|
-
|
|
2830
|
+
// For Kimi models, we need to buffer the RAW content without processing
|
|
2831
|
+
// because Kimi tokens stream incrementally and partial tokens would leak
|
|
2832
|
+
// through if we try to process them immediately. The buffer will be
|
|
2833
|
+
// processed when flushed (at sentence boundaries or end of stream).
|
|
2834
|
+
let deltaContent;
|
|
2835
|
+
if (isKimiK2Model) {
|
|
2836
|
+
// For Kimi: Don't process yet - just pass through and let buffering handle it
|
|
2837
|
+
// We'll extract tool calls and sanitize when we flush the buffer
|
|
2838
|
+
deltaContent = rawDeltaContent;
|
|
2839
|
+
}
|
|
2840
|
+
else {
|
|
2841
|
+
// For non-Kimi models: sanitize immediately as before
|
|
2842
|
+
deltaContent = this.sanitizeProviderText(rawDeltaContent);
|
|
2843
|
+
}
|
|
2708
2844
|
if (!deltaContent) {
|
|
2709
2845
|
continue;
|
|
2710
2846
|
}
|
|
@@ -2720,9 +2856,9 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
2720
2856
|
});
|
|
2721
2857
|
// Buffer text to avoid stanza formatting
|
|
2722
2858
|
textBuffer += deltaContent;
|
|
2723
|
-
const
|
|
2724
|
-
const
|
|
2725
|
-
const hasOpenKimiSection =
|
|
2859
|
+
const kimiBeginCount = (textBuffer.match(/<\|tool_calls_section_begin\|>/g) || []).length;
|
|
2860
|
+
const kimiEndCount = (textBuffer.match(/<\|tool_calls_section_end\|>/g) || []).length;
|
|
2861
|
+
const hasOpenKimiSection = kimiBeginCount > kimiEndCount;
|
|
2726
2862
|
// Emit buffered text when we have a complete sentence or paragraph
|
|
2727
2863
|
// Look for natural break points, avoiding flush mid Kimi section
|
|
2728
2864
|
if (!hasOpenKimiSection &&
|
|
@@ -2739,12 +2875,14 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
2739
2875
|
// @requirement REQ-THINK-003
|
|
2740
2876
|
const tagBasedThinking = this.extractThinkTagsAsBlock(workingText);
|
|
2741
2877
|
if (tagBasedThinking) {
|
|
2878
|
+
// Clean Kimi tokens from thinking content before accumulating
|
|
2879
|
+
const cleanedThought = this.cleanThinkingContent(tagBasedThinking.thought);
|
|
2742
2880
|
// Accumulate thinking content - don't emit yet
|
|
2743
2881
|
// Use newline to preserve formatting between chunks (not space)
|
|
2744
2882
|
if (accumulatedThinkingContent.length > 0) {
|
|
2745
2883
|
accumulatedThinkingContent += '\n';
|
|
2746
2884
|
}
|
|
2747
|
-
accumulatedThinkingContent +=
|
|
2885
|
+
accumulatedThinkingContent += cleanedThought;
|
|
2748
2886
|
logger.debug(() => `[Streaming] Accumulated thinking: ${accumulatedThinkingContent.length} chars total`);
|
|
2749
2887
|
}
|
|
2750
2888
|
const kimiParsed = this.extractKimiToolCallsFromText(workingText);
|
|
@@ -2806,7 +2944,10 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
2806
2944
|
// Always use sanitized text to strip <think> tags (pipeline streaming)
|
|
2807
2945
|
// Bug fix: Previously Kimi used unsanitized workingText
|
|
2808
2946
|
// @plan PLAN-20251202-THINKING.P16
|
|
2809
|
-
|
|
2947
|
+
// Bug fix #721: Emit whitespace-only chunks (e.g., " " between words)
|
|
2948
|
+
// Previously we used cleanedText.trim().length > 0 which dropped spaces,
|
|
2949
|
+
// causing "list 5" to become "list5". Now we emit any non-empty cleanedText.
|
|
2950
|
+
if (cleanedText.length > 0) {
|
|
2810
2951
|
yield {
|
|
2811
2952
|
speaker: 'ai',
|
|
2812
2953
|
blocks: [
|
|
@@ -2906,11 +3047,13 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
2906
3047
|
// @plan PLAN-20251202-THINKING.P16
|
|
2907
3048
|
const tagBasedThinking = this.extractThinkTagsAsBlock(workingText);
|
|
2908
3049
|
if (tagBasedThinking) {
|
|
3050
|
+
// Clean Kimi tokens from thinking content before accumulating
|
|
3051
|
+
const cleanedThought = this.cleanThinkingContent(tagBasedThinking.thought);
|
|
2909
3052
|
// Use newline to preserve formatting between chunks (not space)
|
|
2910
3053
|
if (accumulatedThinkingContent.length > 0) {
|
|
2911
3054
|
accumulatedThinkingContent += '\n';
|
|
2912
3055
|
}
|
|
2913
|
-
accumulatedThinkingContent +=
|
|
3056
|
+
accumulatedThinkingContent += cleanedThought;
|
|
2914
3057
|
}
|
|
2915
3058
|
const kimiParsed = this.extractKimiToolCallsFromText(workingText);
|
|
2916
3059
|
if (kimiParsed.toolCalls.length > 0) {
|
|
@@ -2969,7 +3112,10 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
2969
3112
|
// Always use sanitized text to strip <think> tags (pipeline final buffer)
|
|
2970
3113
|
// Bug fix: Previously Kimi used unsanitized workingText
|
|
2971
3114
|
// @plan PLAN-20251202-THINKING.P16
|
|
2972
|
-
|
|
3115
|
+
// Bug fix #721: Emit whitespace-only chunks (e.g., " " between words)
|
|
3116
|
+
// Previously we used cleanedText.trim().length > 0 which dropped spaces,
|
|
3117
|
+
// causing "list 5" to become "list5". Now we emit any non-empty cleanedText.
|
|
3118
|
+
if (cleanedText.length > 0) {
|
|
2973
3119
|
yield {
|
|
2974
3120
|
speaker: 'ai',
|
|
2975
3121
|
blocks: [
|
|
@@ -3001,19 +3147,32 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
3001
3147
|
}
|
|
3002
3148
|
// Emit accumulated reasoning_content as ONE ThinkingBlock (pipeline path)
|
|
3003
3149
|
// This consolidates token-by-token reasoning from Synthetic API into a single block
|
|
3150
|
+
// Clean Kimi tokens from the accumulated content (not per-chunk) to handle split tokens
|
|
3004
3151
|
// @plan PLAN-20251202-THINKING.P16
|
|
3005
3152
|
if (accumulatedReasoningContent.length > 0) {
|
|
3006
|
-
|
|
3007
|
-
|
|
3008
|
-
|
|
3009
|
-
|
|
3010
|
-
|
|
3011
|
-
|
|
3012
|
-
|
|
3013
|
-
|
|
3014
|
-
|
|
3015
|
-
|
|
3016
|
-
|
|
3153
|
+
// Extract Kimi tool calls from the complete accumulated reasoning content
|
|
3154
|
+
const { cleanedText: cleanedReasoning, toolCalls: reasoningToolCalls } = this.extractKimiToolCallsFromText(accumulatedReasoningContent);
|
|
3155
|
+
// Emit the cleaned thinking block
|
|
3156
|
+
if (cleanedReasoning.length > 0) {
|
|
3157
|
+
yield {
|
|
3158
|
+
speaker: 'ai',
|
|
3159
|
+
blocks: [
|
|
3160
|
+
{
|
|
3161
|
+
type: 'thinking',
|
|
3162
|
+
thought: cleanedReasoning,
|
|
3163
|
+
sourceField: 'reasoning_content',
|
|
3164
|
+
isHidden: false,
|
|
3165
|
+
},
|
|
3166
|
+
],
|
|
3167
|
+
};
|
|
3168
|
+
}
|
|
3169
|
+
// Emit any tool calls extracted from reasoning content
|
|
3170
|
+
if (reasoningToolCalls.length > 0) {
|
|
3171
|
+
yield {
|
|
3172
|
+
speaker: 'ai',
|
|
3173
|
+
blocks: reasoningToolCalls,
|
|
3174
|
+
};
|
|
3175
|
+
}
|
|
3017
3176
|
}
|
|
3018
3177
|
// Process and emit tool calls using the pipeline
|
|
3019
3178
|
const pipelineResult = await this.toolCallPipeline.process(abortSignal);
|
|
@@ -3043,6 +3202,7 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
3043
3202
|
};
|
|
3044
3203
|
// Add usage metadata if we captured it from streaming
|
|
3045
3204
|
if (streamingUsage) {
|
|
3205
|
+
const cacheMetrics = extractCacheMetrics(streamingUsage);
|
|
3046
3206
|
toolCallsContent.metadata = {
|
|
3047
3207
|
usage: {
|
|
3048
3208
|
promptTokens: streamingUsage.prompt_tokens || 0,
|
|
@@ -3050,6 +3210,9 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
3050
3210
|
totalTokens: streamingUsage.total_tokens ||
|
|
3051
3211
|
(streamingUsage.prompt_tokens || 0) +
|
|
3052
3212
|
(streamingUsage.completion_tokens || 0),
|
|
3213
|
+
cachedTokens: cacheMetrics.cachedTokens,
|
|
3214
|
+
cacheCreationTokens: cacheMetrics.cacheCreationTokens,
|
|
3215
|
+
cacheMissTokens: cacheMetrics.cacheMissTokens,
|
|
3053
3216
|
},
|
|
3054
3217
|
};
|
|
3055
3218
|
}
|
|
@@ -3059,6 +3222,7 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
3059
3222
|
// If we have usage information but no tool calls, emit a metadata-only response
|
|
3060
3223
|
if (streamingUsage &&
|
|
3061
3224
|
this.toolCallPipeline.getStats().collector.totalCalls === 0) {
|
|
3225
|
+
const cacheMetrics = extractCacheMetrics(streamingUsage);
|
|
3062
3226
|
yield {
|
|
3063
3227
|
speaker: 'ai',
|
|
3064
3228
|
blocks: [],
|
|
@@ -3069,6 +3233,9 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
3069
3233
|
totalTokens: streamingUsage.total_tokens ||
|
|
3070
3234
|
(streamingUsage.prompt_tokens || 0) +
|
|
3071
3235
|
(streamingUsage.completion_tokens || 0),
|
|
3236
|
+
cachedTokens: cacheMetrics.cachedTokens,
|
|
3237
|
+
cacheCreationTokens: cacheMetrics.cacheCreationTokens,
|
|
3238
|
+
cacheMissTokens: cacheMetrics.cacheMissTokens,
|
|
3072
3239
|
},
|
|
3073
3240
|
},
|
|
3074
3241
|
};
|
|
@@ -3221,6 +3388,7 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
3221
3388
|
};
|
|
3222
3389
|
// Add usage metadata from non-streaming response
|
|
3223
3390
|
if (completion.usage) {
|
|
3391
|
+
const cacheMetrics = extractCacheMetrics(completion.usage);
|
|
3224
3392
|
responseContent.metadata = {
|
|
3225
3393
|
usage: {
|
|
3226
3394
|
promptTokens: completion.usage.prompt_tokens || 0,
|
|
@@ -3228,6 +3396,9 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
3228
3396
|
totalTokens: completion.usage.total_tokens ||
|
|
3229
3397
|
(completion.usage.prompt_tokens || 0) +
|
|
3230
3398
|
(completion.usage.completion_tokens || 0),
|
|
3399
|
+
cachedTokens: cacheMetrics.cachedTokens,
|
|
3400
|
+
cacheCreationTokens: cacheMetrics.cacheCreationTokens,
|
|
3401
|
+
cacheMissTokens: cacheMetrics.cacheMissTokens,
|
|
3231
3402
|
},
|
|
3232
3403
|
};
|
|
3233
3404
|
}
|
|
@@ -3235,6 +3406,7 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
3235
3406
|
}
|
|
3236
3407
|
else if (completion.usage) {
|
|
3237
3408
|
// Emit metadata-only response if no content blocks but have usage info
|
|
3409
|
+
const cacheMetrics = extractCacheMetrics(completion.usage);
|
|
3238
3410
|
yield {
|
|
3239
3411
|
speaker: 'ai',
|
|
3240
3412
|
blocks: [],
|
|
@@ -3245,6 +3417,9 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
3245
3417
|
totalTokens: completion.usage.total_tokens ||
|
|
3246
3418
|
(completion.usage.prompt_tokens || 0) +
|
|
3247
3419
|
(completion.usage.completion_tokens || 0),
|
|
3420
|
+
cachedTokens: cacheMetrics.cachedTokens,
|
|
3421
|
+
cacheCreationTokens: cacheMetrics.cacheCreationTokens,
|
|
3422
|
+
cacheMissTokens: cacheMetrics.cacheMissTokens,
|
|
3248
3423
|
},
|
|
3249
3424
|
},
|
|
3250
3425
|
};
|
|
@@ -3279,6 +3454,12 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
3279
3454
|
logger.debug(() => `Auto-detected 'kimi' format for K2 model: ${modelName}`);
|
|
3280
3455
|
return 'kimi';
|
|
3281
3456
|
}
|
|
3457
|
+
// Check for Mistral models (requires 9-char alphanumeric IDs)
|
|
3458
|
+
// This applies to both hosted API and self-hosted Mistral models
|
|
3459
|
+
if (isMistralModel(modelName)) {
|
|
3460
|
+
logger.debug(() => `Auto-detected 'mistral' format for Mistral model: ${modelName}`);
|
|
3461
|
+
return 'mistral';
|
|
3462
|
+
}
|
|
3282
3463
|
const lowerModelName = modelName.toLowerCase();
|
|
3283
3464
|
// Check for GLM-4 models (glm-4, glm-4.5, glm-4.6, glm-4-5, etc.)
|
|
3284
3465
|
if (lowerModelName.includes('glm-4')) {
|
|
@@ -3361,57 +3542,75 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
3361
3542
|
* Parse reasoning_content from streaming delta.
|
|
3362
3543
|
*
|
|
3363
3544
|
* @plan PLAN-20251202-THINKING.P11, PLAN-20251202-THINKING.P16
|
|
3364
|
-
* @requirement REQ-THINK-003.1, REQ-THINK-003.3, REQ-THINK-003.4
|
|
3545
|
+
* @requirement REQ-THINK-003.1, REQ-THINK-003.3, REQ-THINK-003.4, REQ-KIMI-REASONING-001.1
|
|
3546
|
+
* @issue #749
|
|
3365
3547
|
*/
|
|
3366
3548
|
parseStreamingReasoningDelta(delta) {
|
|
3367
3549
|
if (!delta) {
|
|
3368
|
-
return null;
|
|
3550
|
+
return { thinking: null, toolCalls: [] };
|
|
3369
3551
|
}
|
|
3370
3552
|
// Access reasoning_content via type assertion since OpenAI SDK doesn't declare it
|
|
3371
3553
|
const reasoningContent = delta
|
|
3372
3554
|
.reasoning_content;
|
|
3373
3555
|
// Handle absent, null, or non-string
|
|
3374
3556
|
if (!reasoningContent || typeof reasoningContent !== 'string') {
|
|
3375
|
-
return null;
|
|
3376
|
-
}
|
|
3377
|
-
// Handle empty string
|
|
3378
|
-
|
|
3379
|
-
|
|
3380
|
-
|
|
3381
|
-
|
|
3382
|
-
|
|
3383
|
-
|
|
3384
|
-
|
|
3385
|
-
|
|
3386
|
-
|
|
3557
|
+
return { thinking: null, toolCalls: [] };
|
|
3558
|
+
}
|
|
3559
|
+
// Handle empty string only - preserve whitespace-only content (spaces, tabs)
|
|
3560
|
+
// to maintain proper formatting in accumulated reasoning (fixes issue #721)
|
|
3561
|
+
if (reasoningContent.length === 0) {
|
|
3562
|
+
return { thinking: null, toolCalls: [] };
|
|
3563
|
+
}
|
|
3564
|
+
// Extract Kimi K2 tool calls embedded in reasoning_content (fixes issue #749)
|
|
3565
|
+
const { cleanedText, toolCalls } = this.extractKimiToolCallsFromText(reasoningContent);
|
|
3566
|
+
// For streaming, preserve whitespace-only content for proper formatting (issue #721)
|
|
3567
|
+
// Only return null if the cleaned text is empty (length 0)
|
|
3568
|
+
const thinkingBlock = cleanedText.length === 0
|
|
3569
|
+
? null
|
|
3570
|
+
: {
|
|
3571
|
+
type: 'thinking',
|
|
3572
|
+
thought: cleanedText,
|
|
3573
|
+
sourceField: 'reasoning_content',
|
|
3574
|
+
isHidden: false,
|
|
3575
|
+
};
|
|
3576
|
+
return { thinking: thinkingBlock, toolCalls };
|
|
3387
3577
|
}
|
|
3388
3578
|
/**
|
|
3389
3579
|
* Parse reasoning_content from non-streaming message.
|
|
3390
3580
|
*
|
|
3391
3581
|
* @plan PLAN-20251202-THINKING.P11, PLAN-20251202-THINKING.P16
|
|
3392
|
-
* @requirement REQ-THINK-003.2, REQ-THINK-003.3, REQ-THINK-003.4
|
|
3582
|
+
* @requirement REQ-THINK-003.2, REQ-THINK-003.3, REQ-THINK-003.4, REQ-KIMI-REASONING-001.2
|
|
3583
|
+
* @issue #749
|
|
3393
3584
|
*/
|
|
3394
3585
|
parseNonStreamingReasoning(message) {
|
|
3395
3586
|
if (!message) {
|
|
3396
|
-
return null;
|
|
3587
|
+
return { thinking: null, toolCalls: [] };
|
|
3397
3588
|
}
|
|
3398
3589
|
// Access reasoning_content via type assertion since OpenAI SDK doesn't declare it
|
|
3399
3590
|
const reasoningContent = message
|
|
3400
3591
|
.reasoning_content;
|
|
3401
3592
|
// Handle absent, null, or non-string
|
|
3402
3593
|
if (!reasoningContent || typeof reasoningContent !== 'string') {
|
|
3403
|
-
return null;
|
|
3594
|
+
return { thinking: null, toolCalls: [] };
|
|
3404
3595
|
}
|
|
3405
|
-
// Handle empty string or whitespace-only
|
|
3596
|
+
// Handle empty string or whitespace-only - for non-streaming complete responses,
|
|
3597
|
+
// whitespace-only reasoning is unusual and should be treated as no reasoning
|
|
3406
3598
|
if (reasoningContent.trim().length === 0) {
|
|
3407
|
-
return null;
|
|
3408
|
-
}
|
|
3409
|
-
|
|
3410
|
-
|
|
3411
|
-
|
|
3412
|
-
|
|
3413
|
-
|
|
3414
|
-
|
|
3599
|
+
return { thinking: null, toolCalls: [] };
|
|
3600
|
+
}
|
|
3601
|
+
// Extract Kimi K2 tool calls embedded in reasoning_content (fixes issue #749)
|
|
3602
|
+
const { cleanedText, toolCalls } = this.extractKimiToolCallsFromText(reasoningContent);
|
|
3603
|
+
// For non-streaming, trim whitespace after extraction
|
|
3604
|
+
const trimmedText = cleanedText.trim();
|
|
3605
|
+
const thinkingBlock = trimmedText.length === 0
|
|
3606
|
+
? null
|
|
3607
|
+
: {
|
|
3608
|
+
type: 'thinking',
|
|
3609
|
+
thought: trimmedText,
|
|
3610
|
+
sourceField: 'reasoning_content',
|
|
3611
|
+
isHidden: false,
|
|
3612
|
+
};
|
|
3613
|
+
return { thinking: thinkingBlock, toolCalls };
|
|
3415
3614
|
}
|
|
3416
3615
|
}
|
|
3417
3616
|
//# sourceMappingURL=OpenAIProvider.js.map
|