@vybestack/llxprt-code-core 0.7.0-nightly.251208.a6190e71e → 0.7.0-nightly.251211.134f1920b
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/prompt-config/defaults/default-prompts.json +1 -1
- package/dist/src/config/profileManager.js +6 -0
- package/dist/src/config/profileManager.js.map +1 -1
- package/dist/src/core/geminiChat.d.ts +15 -1
- package/dist/src/core/geminiChat.js +35 -10
- package/dist/src/core/geminiChat.js.map +1 -1
- package/dist/src/core/prompts.js +29 -9
- package/dist/src/core/prompts.js.map +1 -1
- package/dist/src/index.d.ts +1 -0
- package/dist/src/index.js.map +1 -1
- package/dist/src/mcp/sa-impersonation-provider.js.map +1 -1
- package/dist/src/mcp/token-storage/file-token-storage.js +2 -1
- package/dist/src/mcp/token-storage/file-token-storage.js.map +1 -1
- package/dist/src/mcp/token-storage/hybrid-token-storage.js.map +1 -1
- package/dist/src/prompt-config/defaults/core.md +0 -3
- package/dist/src/prompt-config/prompt-installer.d.ts +33 -2
- package/dist/src/prompt-config/prompt-installer.js +163 -31
- package/dist/src/prompt-config/prompt-installer.js.map +1 -1
- package/dist/src/prompt-config/prompt-resolver.js +49 -41
- package/dist/src/prompt-config/prompt-resolver.js.map +1 -1
- package/dist/src/prompt-config/types.d.ts +1 -0
- package/dist/src/providers/LoggingProviderWrapper.d.ts +2 -1
- package/dist/src/providers/LoggingProviderWrapper.js +16 -4
- package/dist/src/providers/LoggingProviderWrapper.js.map +1 -1
- package/dist/src/providers/ProviderManager.d.ts +6 -3
- package/dist/src/providers/ProviderManager.js +16 -4
- package/dist/src/providers/ProviderManager.js.map +1 -1
- package/dist/src/providers/anthropic/AnthropicProvider.js +164 -145
- package/dist/src/providers/anthropic/AnthropicProvider.js.map +1 -1
- package/dist/src/providers/gemini/GeminiProvider.js +91 -30
- package/dist/src/providers/gemini/GeminiProvider.js.map +1 -1
- package/dist/src/providers/openai/OpenAIProvider.d.ts +10 -2
- package/dist/src/providers/openai/OpenAIProvider.js +354 -132
- package/dist/src/providers/openai/OpenAIProvider.js.map +1 -1
- package/dist/src/providers/openai-vercel/OpenAIVercelProvider.d.ts +3 -0
- package/dist/src/providers/openai-vercel/OpenAIVercelProvider.js +255 -22
- package/dist/src/providers/openai-vercel/OpenAIVercelProvider.js.map +1 -1
- package/dist/src/providers/openai-vercel/messageConversion.d.ts +4 -1
- package/dist/src/providers/openai-vercel/messageConversion.js +41 -6
- package/dist/src/providers/openai-vercel/messageConversion.js.map +1 -1
- package/dist/src/providers/reasoning/reasoningUtils.d.ts +26 -1
- package/dist/src/providers/reasoning/reasoningUtils.js +157 -0
- package/dist/src/providers/reasoning/reasoningUtils.js.map +1 -1
- package/dist/src/providers/utils/cacheMetricsExtractor.d.ts +6 -0
- package/dist/src/providers/utils/cacheMetricsExtractor.js +36 -0
- package/dist/src/providers/utils/cacheMetricsExtractor.js.map +1 -0
- package/dist/src/providers/utils/dumpContext.d.ts +36 -0
- package/dist/src/providers/utils/dumpContext.js +93 -0
- package/dist/src/providers/utils/dumpContext.js.map +1 -0
- package/dist/src/providers/utils/dumpSDKContext.d.ts +13 -0
- package/dist/src/providers/utils/dumpSDKContext.js +39 -0
- package/dist/src/providers/utils/dumpSDKContext.js.map +1 -0
- package/dist/src/services/history/IContent.d.ts +3 -7
- package/dist/src/services/history/IContent.js.map +1 -1
- package/dist/src/settings/types.d.ts +4 -2
- package/dist/src/tools/IToolFormatter.d.ts +1 -1
- package/dist/src/tools/ToolIdStrategy.d.ts +25 -0
- package/dist/src/tools/ToolIdStrategy.js +108 -0
- package/dist/src/tools/ToolIdStrategy.js.map +1 -1
- package/dist/src/tools/modifiable-tool.js.map +1 -1
- package/dist/src/tools/task.js +14 -2
- package/dist/src/tools/task.js.map +1 -1
- package/dist/src/tools/tools.js.map +1 -1
- package/dist/src/types/modelParams.d.ts +6 -0
- package/dist/src/utils/generateContentResponseUtilities.js +6 -0
- package/dist/src/utils/generateContentResponseUtilities.js.map +1 -1
- package/dist/src/utils/retry.js +1 -0
- package/dist/src/utils/retry.js.map +1 -1
- package/package.json +1 -1
|
@@ -22,7 +22,7 @@ import crypto from 'node:crypto';
|
|
|
22
22
|
import * as http from 'http';
|
|
23
23
|
import * as https from 'https';
|
|
24
24
|
import * as net from 'net';
|
|
25
|
-
import { isKimiModel, getToolIdStrategy, } from '../../tools/ToolIdStrategy.js';
|
|
25
|
+
import { isKimiModel, isMistralModel, getToolIdStrategy, } from '../../tools/ToolIdStrategy.js';
|
|
26
26
|
import { BaseProvider, } from '../BaseProvider.js';
|
|
27
27
|
import { DebugLogger } from '../../debug/index.js';
|
|
28
28
|
import { ToolFormatter } from '../../tools/ToolFormatter.js';
|
|
@@ -39,6 +39,8 @@ import { ToolCallPipeline } from './ToolCallPipeline.js';
|
|
|
39
39
|
import { buildToolResponsePayload, EMPTY_TOOL_RESULT_PLACEHOLDER, } from '../utils/toolResponsePayload.js';
|
|
40
40
|
import { isLocalEndpoint } from '../utils/localEndpoint.js';
|
|
41
41
|
import { filterThinkingForContext, thinkingToReasoningField, extractThinkingBlocks, } from '../reasoning/reasoningUtils.js';
|
|
42
|
+
import { shouldDumpSDKContext, dumpSDKContext, } from '../utils/dumpSDKContext.js';
|
|
43
|
+
import { extractCacheMetrics } from '../utils/cacheMetricsExtractor.js';
|
|
42
44
|
const MAX_TOOL_RESPONSE_CHARS = 1024;
|
|
43
45
|
const MAX_TOOL_RESPONSE_RETRY_CHARS = 512;
|
|
44
46
|
const TOOL_ARGS_PREVIEW_LENGTH = 500;
|
|
@@ -276,13 +278,12 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
276
278
|
// This preserves meaningful whitespace in regular text chunks during streaming
|
|
277
279
|
// (e.g., " 5 Biggest" should remain " 5 Biggest", not become "5 Biggest")
|
|
278
280
|
if (hadReasoningTags) {
|
|
279
|
-
//
|
|
281
|
+
// Collapse multiple spaces/tabs but preserve newlines for proper paragraph/line breaks
|
|
280
282
|
str = str.replace(/[ \t]+/g, ' ');
|
|
281
283
|
str = str.replace(/\n{3,}/g, '\n\n');
|
|
282
|
-
// Only trim leading whitespace
|
|
283
|
-
// This
|
|
284
|
-
|
|
285
|
-
str = str.trimStart();
|
|
284
|
+
// Only trim leading horizontal whitespace (spaces/tabs), NOT newlines
|
|
285
|
+
// This preserves line breaks between think tags and content (fixes #721)
|
|
286
|
+
str = str.replace(/^[ \t]+/, '');
|
|
286
287
|
}
|
|
287
288
|
const afterLen = str.length;
|
|
288
289
|
if (hadReasoningTags && afterLen !== beforeLen) {
|
|
@@ -437,62 +438,78 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
437
438
|
* and all tool info is only encoded in the text template.
|
|
438
439
|
*/
|
|
439
440
|
extractKimiToolCallsFromText(raw) {
|
|
440
|
-
if
|
|
441
|
+
// Return early only if input is null/undefined/empty
|
|
442
|
+
if (!raw) {
|
|
441
443
|
return { cleanedText: raw, toolCalls: [] };
|
|
442
444
|
}
|
|
443
445
|
const logger = this.getLogger();
|
|
444
446
|
const toolCalls = [];
|
|
445
447
|
let text = raw;
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
448
|
+
// Extract tool calls from complete sections if present
|
|
449
|
+
if (raw.includes('<|tool_calls_section_begin|>')) {
|
|
450
|
+
const sectionRegex = /<\|tool_calls_section_begin\|>([\s\S]*?)<\|tool_calls_section_end\|>/g;
|
|
451
|
+
text = text.replace(sectionRegex, (_sectionMatch, sectionBody) => {
|
|
452
|
+
try {
|
|
453
|
+
const callRegex = /<\|tool_call_begin\|>\s*([^<]+?)\s*<\|tool_call_argument_begin\|>\s*([\s\S]*?)\s*<\|tool_call_end\|>/g;
|
|
454
|
+
let m;
|
|
455
|
+
while ((m = callRegex.exec(sectionBody)) !== null) {
|
|
456
|
+
const rawId = m[1].trim();
|
|
457
|
+
const rawArgs = m[2].trim();
|
|
458
|
+
// Infer tool name from ID.
|
|
459
|
+
let toolName = '';
|
|
460
|
+
const match = /^functions\.([A-Za-z0-9_]+):\d+/i.exec(rawId) ||
|
|
461
|
+
/^[A-Za-z0-9_]+\.([A-Za-z0-9_]+):\d+/.exec(rawId);
|
|
462
|
+
if (match) {
|
|
463
|
+
toolName = match[1];
|
|
464
|
+
}
|
|
465
|
+
else {
|
|
466
|
+
const colonParts = rawId.split(':');
|
|
467
|
+
const head = colonParts[0] || rawId;
|
|
468
|
+
const dotParts = head.split('.');
|
|
469
|
+
toolName = dotParts[dotParts.length - 1] || head;
|
|
470
|
+
}
|
|
471
|
+
// Normalize tool name (handles Kimi-K2 style prefixes like call_functionsglob7)
|
|
472
|
+
toolName = this.normalizeToolName(toolName);
|
|
473
|
+
const sanitizedArgs = this.sanitizeToolArgumentsString(rawArgs);
|
|
474
|
+
const processedParameters = processToolParameters(sanitizedArgs, toolName);
|
|
475
|
+
toolCalls.push({
|
|
476
|
+
type: 'tool_call',
|
|
477
|
+
id: this.normalizeToHistoryToolId(rawId),
|
|
478
|
+
name: toolName,
|
|
479
|
+
parameters: processedParameters,
|
|
480
|
+
});
|
|
466
481
|
}
|
|
467
|
-
// Normalize tool name (handles Kimi-K2 style prefixes like call_functionsglob7)
|
|
468
|
-
toolName = this.normalizeToolName(toolName);
|
|
469
|
-
const sanitizedArgs = this.sanitizeToolArgumentsString(rawArgs);
|
|
470
|
-
const processedParameters = processToolParameters(sanitizedArgs, toolName);
|
|
471
|
-
toolCalls.push({
|
|
472
|
-
type: 'tool_call',
|
|
473
|
-
id: this.normalizeToHistoryToolId(rawId),
|
|
474
|
-
name: toolName,
|
|
475
|
-
parameters: processedParameters,
|
|
476
|
-
});
|
|
477
482
|
}
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
return '';
|
|
484
|
-
});
|
|
485
|
-
if (toolCalls.length > 0) {
|
|
486
|
-
logger.debug(() => `[OpenAIProvider] Parsed Kimi tool_calls_section`, {
|
|
487
|
-
toolCallCount: toolCalls.length,
|
|
488
|
-
originalLength: raw.length,
|
|
489
|
-
cleanedLength: text.length,
|
|
483
|
+
catch (err) {
|
|
484
|
+
logger.debug(() => `[OpenAIProvider] Failed to parse Kimi tool_calls_section: ${err}`);
|
|
485
|
+
}
|
|
486
|
+
// Strip the entire tool section from user-visible text
|
|
487
|
+
return '';
|
|
490
488
|
});
|
|
489
|
+
if (toolCalls.length > 0) {
|
|
490
|
+
logger.debug(() => `[OpenAIProvider] Parsed Kimi tool_calls_section`, {
|
|
491
|
+
toolCallCount: toolCalls.length,
|
|
492
|
+
originalLength: raw.length,
|
|
493
|
+
cleanedLength: text.length,
|
|
494
|
+
});
|
|
495
|
+
}
|
|
491
496
|
}
|
|
497
|
+
// ALWAYS run stray token cleanup, even if no complete sections were found
|
|
498
|
+
// This handles partial sections, malformed tokens, orphaned markers, etc.
|
|
499
|
+
text = text.replace(/<\|tool_call(?:_(?:begin|end|argument_begin))?\|>/g, '');
|
|
500
|
+
text = text.replace(/<\|tool_calls_section_(?:begin|end)\|>/g, '');
|
|
492
501
|
// Don't trim - preserve leading/trailing newlines that are important for formatting
|
|
493
502
|
// (e.g., numbered lists from Kimi K2 that have newlines between items)
|
|
494
503
|
return { cleanedText: text, toolCalls };
|
|
495
504
|
}
|
|
505
|
+
/**
|
|
506
|
+
* Clean Kimi K2 tool call tokens from thinking content.
|
|
507
|
+
* Used when extracting thinking from <think> tags that may contain embedded tool calls.
|
|
508
|
+
* @issue #749
|
|
509
|
+
*/
|
|
510
|
+
cleanThinkingContent(thought) {
|
|
511
|
+
return this.extractKimiToolCallsFromText(thought).cleanedText;
|
|
512
|
+
}
|
|
496
513
|
/**
|
|
497
514
|
* @plan:PLAN-20251023-STATELESS-HARDENING.P09
|
|
498
515
|
* @requirement:REQ-SP4-002
|
|
@@ -909,9 +926,12 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
909
926
|
}
|
|
910
927
|
else {
|
|
911
928
|
// Assistant message with tool calls
|
|
929
|
+
// CRITICAL for Mistral API compatibility (#760):
|
|
930
|
+
// When tool_calls are present, we must NOT include a content property at all
|
|
931
|
+
// (not even null). Mistral's OpenAI-compatible API requires this.
|
|
932
|
+
// See: https://docs.mistral.ai/capabilities/function_calling
|
|
912
933
|
messages.push({
|
|
913
934
|
role: 'assistant',
|
|
914
|
-
content: text || null,
|
|
915
935
|
tool_calls: toolCalls.map((tc) => ({
|
|
916
936
|
id: this.normalizeToOpenAIToolId(tc.id),
|
|
917
937
|
type: 'function',
|
|
@@ -947,10 +967,16 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
947
967
|
}
|
|
948
968
|
else {
|
|
949
969
|
for (const tr of toolResponses) {
|
|
970
|
+
// CRITICAL for Mistral API compatibility (#760):
|
|
971
|
+
// Tool messages must include a name field matching the function name.
|
|
972
|
+
// See: https://docs.mistral.ai/capabilities/function_calling
|
|
973
|
+
// Note: The OpenAI SDK types don't include name, but Mistral requires it.
|
|
974
|
+
// We use a type assertion to add this required field.
|
|
950
975
|
messages.push({
|
|
951
976
|
role: 'tool',
|
|
952
977
|
content: this.buildToolResponseContent(tr, config),
|
|
953
978
|
tool_call_id: this.normalizeToOpenAIToolId(tr.callId),
|
|
979
|
+
name: tr.toolName,
|
|
954
980
|
});
|
|
955
981
|
}
|
|
956
982
|
}
|
|
@@ -976,8 +1002,9 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
976
1002
|
const messages = [];
|
|
977
1003
|
// Create a ToolIdMapper based on the tool format
|
|
978
1004
|
// For Kimi K2, this generates sequential IDs in the format functions.{name}:{index}
|
|
979
|
-
|
|
980
|
-
|
|
1005
|
+
// For Mistral, this generates 9-char alphanumeric IDs
|
|
1006
|
+
const toolIdMapper = toolFormat === 'kimi' || toolFormat === 'mistral'
|
|
1007
|
+
? getToolIdStrategy(toolFormat).createMapper(filteredContents)
|
|
981
1008
|
: null;
|
|
982
1009
|
// Helper to resolve tool call IDs based on format
|
|
983
1010
|
const resolveToolCallId = (tc) => {
|
|
@@ -1013,9 +1040,12 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
1013
1040
|
const toolCalls = content.blocks.filter((b) => b.type === 'tool_call');
|
|
1014
1041
|
if (toolCalls.length > 0) {
|
|
1015
1042
|
// Assistant message with tool calls
|
|
1043
|
+
// CRITICAL for Mistral API compatibility (#760):
|
|
1044
|
+
// When tool_calls are present, we must NOT include a content property at all
|
|
1045
|
+
// (not even null). Mistral's OpenAI-compatible API requires this.
|
|
1046
|
+
// See: https://docs.mistral.ai/capabilities/function_calling
|
|
1016
1047
|
const baseMessage = {
|
|
1017
1048
|
role: 'assistant',
|
|
1018
|
-
content: text || null,
|
|
1019
1049
|
tool_calls: toolCalls.map((tc) => ({
|
|
1020
1050
|
id: resolveToolCallId(tc),
|
|
1021
1051
|
type: 'function',
|
|
@@ -1056,10 +1086,16 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
1056
1086
|
// Convert tool responses
|
|
1057
1087
|
const toolResponses = content.blocks.filter((b) => b.type === 'tool_response');
|
|
1058
1088
|
for (const tr of toolResponses) {
|
|
1089
|
+
// CRITICAL for Mistral API compatibility (#760):
|
|
1090
|
+
// Tool messages must include a name field matching the function name.
|
|
1091
|
+
// See: https://docs.mistral.ai/capabilities/function_calling
|
|
1092
|
+
// Note: The OpenAI SDK types don't include name, but Mistral requires it.
|
|
1093
|
+
// We use a type assertion to add this required field.
|
|
1059
1094
|
messages.push({
|
|
1060
1095
|
role: 'tool',
|
|
1061
1096
|
content: this.buildToolResponseContent(tr, options.config),
|
|
1062
1097
|
tool_call_id: resolveToolResponseId(tr),
|
|
1098
|
+
name: tr.toolName,
|
|
1063
1099
|
});
|
|
1064
1100
|
}
|
|
1065
1101
|
}
|
|
@@ -1505,9 +1541,9 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
1505
1541
|
// Buffer for accumulating text chunks for providers that need it
|
|
1506
1542
|
let textBuffer = '';
|
|
1507
1543
|
// Use the same detected format from earlier for consistency
|
|
1508
|
-
const
|
|
1544
|
+
const isKimiK2Model = model.toLowerCase().includes('kimi-k2');
|
|
1509
1545
|
// Buffer text for Qwen format providers and Kimi-K2 to avoid stanza formatting
|
|
1510
|
-
const shouldBufferText = detectedFormat === 'qwen' ||
|
|
1546
|
+
const shouldBufferText = detectedFormat === 'qwen' || isKimiK2Model;
|
|
1511
1547
|
// Accumulate thinking content across the entire stream to emit as ONE block
|
|
1512
1548
|
// This handles fragmented <think>word</think> streaming from Synthetic API
|
|
1513
1549
|
// @plan PLAN-20251202-THINKING.P16
|
|
@@ -1574,12 +1610,29 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
1574
1610
|
continue;
|
|
1575
1611
|
// Parse reasoning_content from streaming delta (Phase 16 integration)
|
|
1576
1612
|
// ACCUMULATE instead of yielding immediately to handle token-by-token streaming
|
|
1613
|
+
// Extract embedded Kimi K2 tool calls from reasoning_content (fixes #749)
|
|
1577
1614
|
// @plan PLAN-20251202-THINKING.P16
|
|
1578
|
-
|
|
1615
|
+
// @requirement REQ-KIMI-REASONING-001.1
|
|
1616
|
+
const { thinking: reasoningBlock, toolCalls: reasoningToolCalls } = this.parseStreamingReasoningDelta(choice.delta);
|
|
1579
1617
|
if (reasoningBlock) {
|
|
1580
1618
|
// Accumulate reasoning content - will emit ONE block later
|
|
1581
1619
|
accumulatedReasoningContent += reasoningBlock.thought;
|
|
1582
1620
|
}
|
|
1621
|
+
// Accumulate tool calls extracted from reasoning_content
|
|
1622
|
+
if (reasoningToolCalls.length > 0) {
|
|
1623
|
+
for (const toolCall of reasoningToolCalls) {
|
|
1624
|
+
// Convert ToolCallBlock to accumulated format
|
|
1625
|
+
const index = accumulatedToolCalls.length;
|
|
1626
|
+
accumulatedToolCalls[index] = {
|
|
1627
|
+
id: toolCall.id,
|
|
1628
|
+
type: 'function',
|
|
1629
|
+
function: {
|
|
1630
|
+
name: toolCall.name,
|
|
1631
|
+
arguments: JSON.stringify(toolCall.parameters),
|
|
1632
|
+
},
|
|
1633
|
+
};
|
|
1634
|
+
}
|
|
1635
|
+
}
|
|
1583
1636
|
// Check for finish_reason to detect proper stream ending
|
|
1584
1637
|
if (choice.finish_reason) {
|
|
1585
1638
|
logger.debug(() => `[Streaming] Stream finished with reason: ${choice.finish_reason}`, {
|
|
@@ -1600,13 +1653,25 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
1600
1653
|
}
|
|
1601
1654
|
// Handle text content - buffer for Qwen format, emit immediately for others
|
|
1602
1655
|
// Note: Synthetic API sends content that may duplicate reasoning_content.
|
|
1603
|
-
//
|
|
1656
|
+
// We now filter duplicates by tracking when content starts matching reasoning_content.
|
|
1657
|
+
// fixes #721
|
|
1604
1658
|
// @plan PLAN-20251202-THINKING.P16
|
|
1605
1659
|
const rawDeltaContent = this.coerceMessageContentToString(choice.delta?.content);
|
|
1606
1660
|
if (rawDeltaContent) {
|
|
1607
|
-
|
|
1608
|
-
|
|
1609
|
-
|
|
1661
|
+
// For Kimi models, we need to buffer the RAW content without processing
|
|
1662
|
+
// because Kimi tokens stream incrementally and partial tokens would leak
|
|
1663
|
+
// through if we try to process them immediately. The buffer will be
|
|
1664
|
+
// processed when flushed (at sentence boundaries or end of stream).
|
|
1665
|
+
let deltaContent;
|
|
1666
|
+
if (isKimiK2Model) {
|
|
1667
|
+
// For Kimi: Don't process yet - just pass through and let buffering handle it
|
|
1668
|
+
// We'll extract tool calls and sanitize when we flush the buffer
|
|
1669
|
+
deltaContent = rawDeltaContent;
|
|
1670
|
+
}
|
|
1671
|
+
else {
|
|
1672
|
+
// For non-Kimi models: sanitize immediately as before
|
|
1673
|
+
deltaContent = this.sanitizeProviderText(rawDeltaContent);
|
|
1674
|
+
}
|
|
1610
1675
|
if (!deltaContent) {
|
|
1611
1676
|
continue;
|
|
1612
1677
|
}
|
|
@@ -1622,9 +1687,9 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
1622
1687
|
});
|
|
1623
1688
|
// Buffer text to avoid stanza formatting
|
|
1624
1689
|
textBuffer += deltaContent;
|
|
1625
|
-
const
|
|
1626
|
-
const
|
|
1627
|
-
const hasOpenKimiSection =
|
|
1690
|
+
const kimiBeginCount = (textBuffer.match(/<\|tool_calls_section_begin\|>/g) || []).length;
|
|
1691
|
+
const kimiEndCount = (textBuffer.match(/<\|tool_calls_section_end\|>/g) || []).length;
|
|
1692
|
+
const hasOpenKimiSection = kimiBeginCount > kimiEndCount;
|
|
1628
1693
|
// Emit buffered text when we have a complete sentence or paragraph
|
|
1629
1694
|
// Look for natural break points, but avoid flushing mid Kimi section
|
|
1630
1695
|
if (!hasOpenKimiSection &&
|
|
@@ -1641,12 +1706,14 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
1641
1706
|
// @requirement REQ-THINK-003
|
|
1642
1707
|
const tagBasedThinking = this.extractThinkTagsAsBlock(workingText);
|
|
1643
1708
|
if (tagBasedThinking) {
|
|
1709
|
+
// Clean Kimi tokens from thinking content before accumulating
|
|
1710
|
+
const cleanedThought = this.cleanThinkingContent(tagBasedThinking.thought);
|
|
1644
1711
|
// Accumulate thinking content - don't emit yet
|
|
1645
1712
|
// Use newline to preserve formatting between chunks (not space)
|
|
1646
1713
|
if (accumulatedThinkingContent.length > 0) {
|
|
1647
1714
|
accumulatedThinkingContent += '\n';
|
|
1648
1715
|
}
|
|
1649
|
-
accumulatedThinkingContent +=
|
|
1716
|
+
accumulatedThinkingContent += cleanedThought;
|
|
1650
1717
|
logger.debug(() => `[Streaming legacy] Accumulated thinking: ${accumulatedThinkingContent.length} chars total`);
|
|
1651
1718
|
}
|
|
1652
1719
|
const kimiParsed = this.extractKimiToolCallsFromText(workingText);
|
|
@@ -1708,7 +1775,10 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
1708
1775
|
// Always use sanitized text to strip <think> tags (legacy streaming)
|
|
1709
1776
|
// Bug fix: Previously Kimi used unsanitized workingText
|
|
1710
1777
|
// @plan PLAN-20251202-THINKING.P16
|
|
1711
|
-
|
|
1778
|
+
// Bug fix #721: Emit whitespace-only chunks (e.g., " " between words)
|
|
1779
|
+
// Previously we used cleanedText.trim().length > 0 which dropped spaces,
|
|
1780
|
+
// causing "list 5" to become "list5". Now we emit any non-empty cleanedText.
|
|
1781
|
+
if (cleanedText.length > 0) {
|
|
1712
1782
|
yield {
|
|
1713
1783
|
speaker: 'ai',
|
|
1714
1784
|
blocks: [
|
|
@@ -1827,11 +1897,13 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
1827
1897
|
// @plan PLAN-20251202-THINKING.P16
|
|
1828
1898
|
const tagBasedThinking = this.extractThinkTagsAsBlock(workingText);
|
|
1829
1899
|
if (tagBasedThinking) {
|
|
1900
|
+
// Clean Kimi tokens from thinking content before accumulating
|
|
1901
|
+
const cleanedThought = this.cleanThinkingContent(tagBasedThinking.thought);
|
|
1830
1902
|
// Use newline to preserve formatting between chunks (not space)
|
|
1831
1903
|
if (accumulatedThinkingContent.length > 0) {
|
|
1832
1904
|
accumulatedThinkingContent += '\n';
|
|
1833
1905
|
}
|
|
1834
|
-
accumulatedThinkingContent +=
|
|
1906
|
+
accumulatedThinkingContent += cleanedThought;
|
|
1835
1907
|
}
|
|
1836
1908
|
const kimiParsed = this.extractKimiToolCallsFromText(workingText);
|
|
1837
1909
|
if (kimiParsed.toolCalls.length > 0) {
|
|
@@ -1890,7 +1962,10 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
1890
1962
|
// Always use sanitized text to strip <think> tags (legacy final buffer)
|
|
1891
1963
|
// Bug fix: Previously Kimi used unsanitized workingText
|
|
1892
1964
|
// @plan PLAN-20251202-THINKING.P16
|
|
1893
|
-
|
|
1965
|
+
// Bug fix #721: Emit whitespace-only chunks (e.g., " " between words)
|
|
1966
|
+
// Previously we used cleanedText.trim().length > 0 which dropped spaces,
|
|
1967
|
+
// causing "list 5" to become "list5". Now we emit any non-empty cleanedText.
|
|
1968
|
+
if (cleanedText.length > 0) {
|
|
1894
1969
|
yield {
|
|
1895
1970
|
speaker: 'ai',
|
|
1896
1971
|
blocks: [
|
|
@@ -1922,19 +1997,32 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
1922
1997
|
}
|
|
1923
1998
|
// Emit accumulated reasoning_content as ONE ThinkingBlock (legacy path)
|
|
1924
1999
|
// This consolidates token-by-token reasoning from Synthetic API into a single block
|
|
2000
|
+
// Clean Kimi tokens from the accumulated content (not per-chunk) to handle split tokens
|
|
1925
2001
|
// @plan PLAN-20251202-THINKING.P16
|
|
1926
2002
|
if (accumulatedReasoningContent.length > 0) {
|
|
1927
|
-
|
|
1928
|
-
|
|
1929
|
-
|
|
1930
|
-
|
|
1931
|
-
|
|
1932
|
-
|
|
1933
|
-
|
|
1934
|
-
|
|
1935
|
-
|
|
1936
|
-
|
|
1937
|
-
|
|
2003
|
+
// Extract Kimi tool calls from the complete accumulated reasoning content
|
|
2004
|
+
const { cleanedText: cleanedReasoning, toolCalls: reasoningToolCalls } = this.extractKimiToolCallsFromText(accumulatedReasoningContent);
|
|
2005
|
+
// Emit the cleaned thinking block
|
|
2006
|
+
if (cleanedReasoning.length > 0) {
|
|
2007
|
+
yield {
|
|
2008
|
+
speaker: 'ai',
|
|
2009
|
+
blocks: [
|
|
2010
|
+
{
|
|
2011
|
+
type: 'thinking',
|
|
2012
|
+
thought: cleanedReasoning,
|
|
2013
|
+
sourceField: 'reasoning_content',
|
|
2014
|
+
isHidden: false,
|
|
2015
|
+
},
|
|
2016
|
+
],
|
|
2017
|
+
};
|
|
2018
|
+
}
|
|
2019
|
+
// Emit any tool calls extracted from reasoning content
|
|
2020
|
+
if (reasoningToolCalls.length > 0) {
|
|
2021
|
+
yield {
|
|
2022
|
+
speaker: 'ai',
|
|
2023
|
+
blocks: reasoningToolCalls,
|
|
2024
|
+
};
|
|
2025
|
+
}
|
|
1938
2026
|
}
|
|
1939
2027
|
// Process and emit tool calls using legacy accumulated approach
|
|
1940
2028
|
if (accumulatedToolCalls.length > 0) {
|
|
@@ -1961,6 +2049,7 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
1961
2049
|
};
|
|
1962
2050
|
// Add usage metadata if we captured it from streaming
|
|
1963
2051
|
if (streamingUsage) {
|
|
2052
|
+
const cacheMetrics = extractCacheMetrics(streamingUsage);
|
|
1964
2053
|
toolCallsContent.metadata = {
|
|
1965
2054
|
usage: {
|
|
1966
2055
|
promptTokens: streamingUsage.prompt_tokens || 0,
|
|
@@ -1968,6 +2057,9 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
1968
2057
|
totalTokens: streamingUsage.total_tokens ||
|
|
1969
2058
|
(streamingUsage.prompt_tokens || 0) +
|
|
1970
2059
|
(streamingUsage.completion_tokens || 0),
|
|
2060
|
+
cachedTokens: cacheMetrics.cachedTokens,
|
|
2061
|
+
cacheCreationTokens: cacheMetrics.cacheCreationTokens,
|
|
2062
|
+
cacheMissTokens: cacheMetrics.cacheMissTokens,
|
|
1971
2063
|
},
|
|
1972
2064
|
};
|
|
1973
2065
|
}
|
|
@@ -1976,6 +2068,7 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
1976
2068
|
}
|
|
1977
2069
|
// If we have usage information but no tool calls, emit a metadata-only response
|
|
1978
2070
|
if (streamingUsage && accumulatedToolCalls.length === 0) {
|
|
2071
|
+
const cacheMetrics = extractCacheMetrics(streamingUsage);
|
|
1979
2072
|
yield {
|
|
1980
2073
|
speaker: 'ai',
|
|
1981
2074
|
blocks: [],
|
|
@@ -1986,6 +2079,9 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
1986
2079
|
totalTokens: streamingUsage.total_tokens ||
|
|
1987
2080
|
(streamingUsage.prompt_tokens || 0) +
|
|
1988
2081
|
(streamingUsage.completion_tokens || 0),
|
|
2082
|
+
cachedTokens: cacheMetrics.cachedTokens,
|
|
2083
|
+
cacheCreationTokens: cacheMetrics.cacheCreationTokens,
|
|
2084
|
+
cacheMissTokens: cacheMetrics.cacheMissTokens,
|
|
1989
2085
|
},
|
|
1990
2086
|
},
|
|
1991
2087
|
};
|
|
@@ -2050,8 +2146,10 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
2050
2146
|
}
|
|
2051
2147
|
const blocks = [];
|
|
2052
2148
|
// Parse reasoning_content from response (Phase 16 integration)
|
|
2053
|
-
|
|
2054
|
-
|
|
2149
|
+
// Extract embedded Kimi K2 tool calls from reasoning_content (fixes #749)
|
|
2150
|
+
// @requirement REQ-KIMI-REASONING-001.2
|
|
2151
|
+
const { thinking: reasoningBlock, toolCalls: reasoningToolCalls } = this.parseNonStreamingReasoning(choice.message);
|
|
2152
|
+
logger.debug(() => `[Non-streaming] parseNonStreamingReasoning result: ${reasoningBlock ? `found (${reasoningBlock.thought?.length} chars)` : 'not found'}, tool calls: ${reasoningToolCalls.length}`, {
|
|
2055
2153
|
hasReasoningContent: 'reasoning_content' in
|
|
2056
2154
|
(choice.message ?? {}),
|
|
2057
2155
|
messageKeys: Object.keys(choice.message ?? {}),
|
|
@@ -2059,6 +2157,11 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
2059
2157
|
if (reasoningBlock) {
|
|
2060
2158
|
blocks.push(reasoningBlock);
|
|
2061
2159
|
}
|
|
2160
|
+
// Add tool calls extracted from reasoning_content
|
|
2161
|
+
if (reasoningToolCalls.length > 0) {
|
|
2162
|
+
blocks.push(...reasoningToolCalls);
|
|
2163
|
+
logger.debug(() => `[Non-streaming] Added ${reasoningToolCalls.length} tool calls from reasoning_content`);
|
|
2164
|
+
}
|
|
2062
2165
|
// Handle text content (strip thinking / reasoning blocks) and Kimi tool sections
|
|
2063
2166
|
const rawMessageContent = this.coerceMessageContentToString(choice.message?.content);
|
|
2064
2167
|
let kimiCleanContent;
|
|
@@ -2162,6 +2265,7 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
2162
2265
|
};
|
|
2163
2266
|
// Add usage metadata from non-streaming response
|
|
2164
2267
|
if (completion.usage) {
|
|
2268
|
+
const cacheMetrics = extractCacheMetrics(completion.usage);
|
|
2165
2269
|
responseContent.metadata = {
|
|
2166
2270
|
usage: {
|
|
2167
2271
|
promptTokens: completion.usage.prompt_tokens || 0,
|
|
@@ -2169,6 +2273,9 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
2169
2273
|
totalTokens: completion.usage.total_tokens ||
|
|
2170
2274
|
(completion.usage.prompt_tokens || 0) +
|
|
2171
2275
|
(completion.usage.completion_tokens || 0),
|
|
2276
|
+
cachedTokens: cacheMetrics.cachedTokens,
|
|
2277
|
+
cacheCreationTokens: cacheMetrics.cacheCreationTokens,
|
|
2278
|
+
cacheMissTokens: cacheMetrics.cacheMissTokens,
|
|
2172
2279
|
},
|
|
2173
2280
|
};
|
|
2174
2281
|
}
|
|
@@ -2176,6 +2283,7 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
2176
2283
|
}
|
|
2177
2284
|
else if (completion.usage) {
|
|
2178
2285
|
// Emit metadata-only response if no content blocks but have usage info
|
|
2286
|
+
const cacheMetrics = extractCacheMetrics(completion.usage);
|
|
2179
2287
|
yield {
|
|
2180
2288
|
speaker: 'ai',
|
|
2181
2289
|
blocks: [],
|
|
@@ -2186,6 +2294,9 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
2186
2294
|
totalTokens: completion.usage.total_tokens ||
|
|
2187
2295
|
(completion.usage.prompt_tokens || 0) +
|
|
2188
2296
|
(completion.usage.completion_tokens || 0),
|
|
2297
|
+
cachedTokens: cacheMetrics.cachedTokens,
|
|
2298
|
+
cacheCreationTokens: cacheMetrics.cacheCreationTokens,
|
|
2299
|
+
cacheMissTokens: cacheMetrics.cacheMissTokens,
|
|
2189
2300
|
},
|
|
2190
2301
|
},
|
|
2191
2302
|
};
|
|
@@ -2438,6 +2549,10 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
2438
2549
|
overrideKeys: requestOverrides ? Object.keys(requestOverrides) : [],
|
|
2439
2550
|
});
|
|
2440
2551
|
}
|
|
2552
|
+
// Get dump mode from ephemeral settings
|
|
2553
|
+
const dumpMode = ephemeralSettings.dumpcontext;
|
|
2554
|
+
const shouldDumpSuccess = shouldDumpSDKContext(dumpMode, false);
|
|
2555
|
+
const shouldDumpError = shouldDumpSDKContext(dumpMode, true);
|
|
2441
2556
|
if (streamingEnabled) {
|
|
2442
2557
|
// Streaming mode - use retry loop with compression support
|
|
2443
2558
|
let compressedOnce = false;
|
|
@@ -2452,6 +2567,10 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
2452
2567
|
shouldRetryOnError: this.shouldRetryResponse.bind(this),
|
|
2453
2568
|
trackThrottleWaitTime: this.throttleTracker,
|
|
2454
2569
|
});
|
|
2570
|
+
// Dump successful streaming request if enabled
|
|
2571
|
+
if (shouldDumpSuccess) {
|
|
2572
|
+
await dumpSDKContext('openai', '/chat/completions', requestBody, { streaming: true }, false, baseURL || 'https://api.openai.com/v1');
|
|
2573
|
+
}
|
|
2455
2574
|
break;
|
|
2456
2575
|
}
|
|
2457
2576
|
catch (error) {
|
|
@@ -2480,6 +2599,11 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
2480
2599
|
logger.warn(() => `[OpenAIProvider] Retrying streaming request after compressing tool responses due to provider 400`);
|
|
2481
2600
|
continue;
|
|
2482
2601
|
}
|
|
2602
|
+
// Dump error if enabled
|
|
2603
|
+
if (shouldDumpError) {
|
|
2604
|
+
const dumpErrorMessage = error instanceof Error ? error.message : String(error);
|
|
2605
|
+
await dumpSDKContext('openai', '/chat/completions', requestBody, { error: dumpErrorMessage }, true, baseURL || 'https://api.openai.com/v1');
|
|
2606
|
+
}
|
|
2483
2607
|
// Re-throw other errors as-is
|
|
2484
2608
|
const capturedErrorMessage = error instanceof Error ? error.message : String(error);
|
|
2485
2609
|
const status = typeof error === 'object' &&
|
|
@@ -2514,6 +2638,10 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
2514
2638
|
shouldRetryOnError: this.shouldRetryResponse.bind(this),
|
|
2515
2639
|
trackThrottleWaitTime: this.throttleTracker,
|
|
2516
2640
|
}));
|
|
2641
|
+
// Dump successful non-streaming request if enabled
|
|
2642
|
+
if (shouldDumpSuccess) {
|
|
2643
|
+
await dumpSDKContext('openai', '/chat/completions', requestBody, response, false, baseURL || 'https://api.openai.com/v1');
|
|
2644
|
+
}
|
|
2517
2645
|
break;
|
|
2518
2646
|
}
|
|
2519
2647
|
catch (error) {
|
|
@@ -2548,6 +2676,11 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
2548
2676
|
logger.warn(() => `[OpenAIProvider] Retrying request after compressing tool responses due to provider 400`);
|
|
2549
2677
|
continue;
|
|
2550
2678
|
}
|
|
2679
|
+
// Dump error if enabled
|
|
2680
|
+
if (shouldDumpError) {
|
|
2681
|
+
const dumpErrorMessage = error instanceof Error ? error.message : String(error);
|
|
2682
|
+
await dumpSDKContext('openai', '/chat/completions', requestBody, { error: dumpErrorMessage }, true, baseURL || 'https://api.openai.com/v1');
|
|
2683
|
+
}
|
|
2551
2684
|
const capturedErrorMessage = error instanceof Error ? error.message : String(error);
|
|
2552
2685
|
const status = typeof error === 'object' &&
|
|
2553
2686
|
error !== null &&
|
|
@@ -2576,9 +2709,9 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
2576
2709
|
// Buffer for accumulating text chunks for providers that need it
|
|
2577
2710
|
let textBuffer = '';
|
|
2578
2711
|
// Use the same detected format from earlier for consistency
|
|
2579
|
-
const
|
|
2712
|
+
const isKimiK2Model = model.toLowerCase().includes('kimi-k2');
|
|
2580
2713
|
// Buffer text for Qwen format providers and Kimi-K2 to avoid stanza formatting
|
|
2581
|
-
const shouldBufferText = detectedFormat === 'qwen' ||
|
|
2714
|
+
const shouldBufferText = detectedFormat === 'qwen' || isKimiK2Model;
|
|
2582
2715
|
// Accumulate thinking content across the entire stream to emit as ONE block
|
|
2583
2716
|
// This handles fragmented <think>word</think> streaming from Synthetic API
|
|
2584
2717
|
// @plan PLAN-20251202-THINKING.P16
|
|
@@ -2648,13 +2781,28 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
2648
2781
|
continue;
|
|
2649
2782
|
// Parse reasoning_content from streaming delta (Pipeline path)
|
|
2650
2783
|
// ACCUMULATE instead of yielding immediately to handle token-by-token streaming
|
|
2784
|
+
// Extract embedded Kimi K2 tool calls from reasoning_content (fixes #749)
|
|
2651
2785
|
// @plan PLAN-20251202-THINKING.P16
|
|
2652
|
-
// @requirement REQ-THINK-003.1
|
|
2653
|
-
const reasoningBlock = this.parseStreamingReasoningDelta(choice.delta);
|
|
2786
|
+
// @requirement REQ-THINK-003.1, REQ-KIMI-REASONING-001.1
|
|
2787
|
+
const { thinking: reasoningBlock, toolCalls: reasoningToolCalls } = this.parseStreamingReasoningDelta(choice.delta);
|
|
2654
2788
|
if (reasoningBlock) {
|
|
2655
2789
|
// Accumulate reasoning content - will emit ONE block later
|
|
2656
2790
|
accumulatedReasoningContent += reasoningBlock.thought;
|
|
2657
2791
|
}
|
|
2792
|
+
// Add tool calls extracted from reasoning_content to pipeline
|
|
2793
|
+
if (reasoningToolCalls.length > 0) {
|
|
2794
|
+
// Get current pipeline stats to determine next index
|
|
2795
|
+
const stats = this.toolCallPipeline.getStats();
|
|
2796
|
+
let baseIndex = stats.collector.totalCalls;
|
|
2797
|
+
for (const toolCall of reasoningToolCalls) {
|
|
2798
|
+
// Add complete tool call as fragments to pipeline
|
|
2799
|
+
this.toolCallPipeline.addFragment(baseIndex, {
|
|
2800
|
+
name: toolCall.name,
|
|
2801
|
+
args: JSON.stringify(toolCall.parameters),
|
|
2802
|
+
});
|
|
2803
|
+
baseIndex++;
|
|
2804
|
+
}
|
|
2805
|
+
}
|
|
2658
2806
|
// Check for finish_reason to detect proper stream ending
|
|
2659
2807
|
if (choice.finish_reason) {
|
|
2660
2808
|
logger.debug(() => `[Streaming] Stream finished with reason: ${choice.finish_reason}`, {
|
|
@@ -2675,13 +2823,24 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
2675
2823
|
}
|
|
2676
2824
|
// Handle text content - buffer for Qwen format, emit immediately for others
|
|
2677
2825
|
// Note: Synthetic API sends content that may duplicate reasoning_content.
|
|
2678
|
-
// This is the model's behavior - we don't filter it here.
|
|
2826
|
+
// This is the model's behavior - we don't filter it here as detection is unreliable.
|
|
2679
2827
|
// @plan PLAN-20251202-THINKING.P16
|
|
2680
2828
|
const rawDeltaContent = this.coerceMessageContentToString(choice.delta?.content);
|
|
2681
2829
|
if (rawDeltaContent) {
|
|
2682
|
-
|
|
2683
|
-
|
|
2684
|
-
|
|
2830
|
+
// For Kimi models, we need to buffer the RAW content without processing
|
|
2831
|
+
// because Kimi tokens stream incrementally and partial tokens would leak
|
|
2832
|
+
// through if we try to process them immediately. The buffer will be
|
|
2833
|
+
// processed when flushed (at sentence boundaries or end of stream).
|
|
2834
|
+
let deltaContent;
|
|
2835
|
+
if (isKimiK2Model) {
|
|
2836
|
+
// For Kimi: Don't process yet - just pass through and let buffering handle it
|
|
2837
|
+
// We'll extract tool calls and sanitize when we flush the buffer
|
|
2838
|
+
deltaContent = rawDeltaContent;
|
|
2839
|
+
}
|
|
2840
|
+
else {
|
|
2841
|
+
// For non-Kimi models: sanitize immediately as before
|
|
2842
|
+
deltaContent = this.sanitizeProviderText(rawDeltaContent);
|
|
2843
|
+
}
|
|
2685
2844
|
if (!deltaContent) {
|
|
2686
2845
|
continue;
|
|
2687
2846
|
}
|
|
@@ -2697,9 +2856,9 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
2697
2856
|
});
|
|
2698
2857
|
// Buffer text to avoid stanza formatting
|
|
2699
2858
|
textBuffer += deltaContent;
|
|
2700
|
-
const
|
|
2701
|
-
const
|
|
2702
|
-
const hasOpenKimiSection =
|
|
2859
|
+
const kimiBeginCount = (textBuffer.match(/<\|tool_calls_section_begin\|>/g) || []).length;
|
|
2860
|
+
const kimiEndCount = (textBuffer.match(/<\|tool_calls_section_end\|>/g) || []).length;
|
|
2861
|
+
const hasOpenKimiSection = kimiBeginCount > kimiEndCount;
|
|
2703
2862
|
// Emit buffered text when we have a complete sentence or paragraph
|
|
2704
2863
|
// Look for natural break points, avoiding flush mid Kimi section
|
|
2705
2864
|
if (!hasOpenKimiSection &&
|
|
@@ -2716,12 +2875,14 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
2716
2875
|
// @requirement REQ-THINK-003
|
|
2717
2876
|
const tagBasedThinking = this.extractThinkTagsAsBlock(workingText);
|
|
2718
2877
|
if (tagBasedThinking) {
|
|
2878
|
+
// Clean Kimi tokens from thinking content before accumulating
|
|
2879
|
+
const cleanedThought = this.cleanThinkingContent(tagBasedThinking.thought);
|
|
2719
2880
|
// Accumulate thinking content - don't emit yet
|
|
2720
2881
|
// Use newline to preserve formatting between chunks (not space)
|
|
2721
2882
|
if (accumulatedThinkingContent.length > 0) {
|
|
2722
2883
|
accumulatedThinkingContent += '\n';
|
|
2723
2884
|
}
|
|
2724
|
-
accumulatedThinkingContent +=
|
|
2885
|
+
accumulatedThinkingContent += cleanedThought;
|
|
2725
2886
|
logger.debug(() => `[Streaming] Accumulated thinking: ${accumulatedThinkingContent.length} chars total`);
|
|
2726
2887
|
}
|
|
2727
2888
|
const kimiParsed = this.extractKimiToolCallsFromText(workingText);
|
|
@@ -2783,7 +2944,10 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
2783
2944
|
// Always use sanitized text to strip <think> tags (pipeline streaming)
|
|
2784
2945
|
// Bug fix: Previously Kimi used unsanitized workingText
|
|
2785
2946
|
// @plan PLAN-20251202-THINKING.P16
|
|
2786
|
-
|
|
2947
|
+
// Bug fix #721: Emit whitespace-only chunks (e.g., " " between words)
|
|
2948
|
+
// Previously we used cleanedText.trim().length > 0 which dropped spaces,
|
|
2949
|
+
// causing "list 5" to become "list5". Now we emit any non-empty cleanedText.
|
|
2950
|
+
if (cleanedText.length > 0) {
|
|
2787
2951
|
yield {
|
|
2788
2952
|
speaker: 'ai',
|
|
2789
2953
|
blocks: [
|
|
@@ -2883,11 +3047,13 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
2883
3047
|
// @plan PLAN-20251202-THINKING.P16
|
|
2884
3048
|
const tagBasedThinking = this.extractThinkTagsAsBlock(workingText);
|
|
2885
3049
|
if (tagBasedThinking) {
|
|
3050
|
+
// Clean Kimi tokens from thinking content before accumulating
|
|
3051
|
+
const cleanedThought = this.cleanThinkingContent(tagBasedThinking.thought);
|
|
2886
3052
|
// Use newline to preserve formatting between chunks (not space)
|
|
2887
3053
|
if (accumulatedThinkingContent.length > 0) {
|
|
2888
3054
|
accumulatedThinkingContent += '\n';
|
|
2889
3055
|
}
|
|
2890
|
-
accumulatedThinkingContent +=
|
|
3056
|
+
accumulatedThinkingContent += cleanedThought;
|
|
2891
3057
|
}
|
|
2892
3058
|
const kimiParsed = this.extractKimiToolCallsFromText(workingText);
|
|
2893
3059
|
if (kimiParsed.toolCalls.length > 0) {
|
|
@@ -2946,7 +3112,10 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
2946
3112
|
// Always use sanitized text to strip <think> tags (pipeline final buffer)
|
|
2947
3113
|
// Bug fix: Previously Kimi used unsanitized workingText
|
|
2948
3114
|
// @plan PLAN-20251202-THINKING.P16
|
|
2949
|
-
|
|
3115
|
+
// Bug fix #721: Emit whitespace-only chunks (e.g., " " between words)
|
|
3116
|
+
// Previously we used cleanedText.trim().length > 0 which dropped spaces,
|
|
3117
|
+
// causing "list 5" to become "list5". Now we emit any non-empty cleanedText.
|
|
3118
|
+
if (cleanedText.length > 0) {
|
|
2950
3119
|
yield {
|
|
2951
3120
|
speaker: 'ai',
|
|
2952
3121
|
blocks: [
|
|
@@ -2978,19 +3147,32 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
2978
3147
|
}
|
|
2979
3148
|
// Emit accumulated reasoning_content as ONE ThinkingBlock (pipeline path)
|
|
2980
3149
|
// This consolidates token-by-token reasoning from Synthetic API into a single block
|
|
3150
|
+
// Clean Kimi tokens from the accumulated content (not per-chunk) to handle split tokens
|
|
2981
3151
|
// @plan PLAN-20251202-THINKING.P16
|
|
2982
3152
|
if (accumulatedReasoningContent.length > 0) {
|
|
2983
|
-
|
|
2984
|
-
|
|
2985
|
-
|
|
2986
|
-
|
|
2987
|
-
|
|
2988
|
-
|
|
2989
|
-
|
|
2990
|
-
|
|
2991
|
-
|
|
2992
|
-
|
|
2993
|
-
|
|
3153
|
+
// Extract Kimi tool calls from the complete accumulated reasoning content
|
|
3154
|
+
const { cleanedText: cleanedReasoning, toolCalls: reasoningToolCalls } = this.extractKimiToolCallsFromText(accumulatedReasoningContent);
|
|
3155
|
+
// Emit the cleaned thinking block
|
|
3156
|
+
if (cleanedReasoning.length > 0) {
|
|
3157
|
+
yield {
|
|
3158
|
+
speaker: 'ai',
|
|
3159
|
+
blocks: [
|
|
3160
|
+
{
|
|
3161
|
+
type: 'thinking',
|
|
3162
|
+
thought: cleanedReasoning,
|
|
3163
|
+
sourceField: 'reasoning_content',
|
|
3164
|
+
isHidden: false,
|
|
3165
|
+
},
|
|
3166
|
+
],
|
|
3167
|
+
};
|
|
3168
|
+
}
|
|
3169
|
+
// Emit any tool calls extracted from reasoning content
|
|
3170
|
+
if (reasoningToolCalls.length > 0) {
|
|
3171
|
+
yield {
|
|
3172
|
+
speaker: 'ai',
|
|
3173
|
+
blocks: reasoningToolCalls,
|
|
3174
|
+
};
|
|
3175
|
+
}
|
|
2994
3176
|
}
|
|
2995
3177
|
// Process and emit tool calls using the pipeline
|
|
2996
3178
|
const pipelineResult = await this.toolCallPipeline.process(abortSignal);
|
|
@@ -3020,6 +3202,7 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
3020
3202
|
};
|
|
3021
3203
|
// Add usage metadata if we captured it from streaming
|
|
3022
3204
|
if (streamingUsage) {
|
|
3205
|
+
const cacheMetrics = extractCacheMetrics(streamingUsage);
|
|
3023
3206
|
toolCallsContent.metadata = {
|
|
3024
3207
|
usage: {
|
|
3025
3208
|
promptTokens: streamingUsage.prompt_tokens || 0,
|
|
@@ -3027,6 +3210,9 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
3027
3210
|
totalTokens: streamingUsage.total_tokens ||
|
|
3028
3211
|
(streamingUsage.prompt_tokens || 0) +
|
|
3029
3212
|
(streamingUsage.completion_tokens || 0),
|
|
3213
|
+
cachedTokens: cacheMetrics.cachedTokens,
|
|
3214
|
+
cacheCreationTokens: cacheMetrics.cacheCreationTokens,
|
|
3215
|
+
cacheMissTokens: cacheMetrics.cacheMissTokens,
|
|
3030
3216
|
},
|
|
3031
3217
|
};
|
|
3032
3218
|
}
|
|
@@ -3036,6 +3222,7 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
3036
3222
|
// If we have usage information but no tool calls, emit a metadata-only response
|
|
3037
3223
|
if (streamingUsage &&
|
|
3038
3224
|
this.toolCallPipeline.getStats().collector.totalCalls === 0) {
|
|
3225
|
+
const cacheMetrics = extractCacheMetrics(streamingUsage);
|
|
3039
3226
|
yield {
|
|
3040
3227
|
speaker: 'ai',
|
|
3041
3228
|
blocks: [],
|
|
@@ -3046,6 +3233,9 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
3046
3233
|
totalTokens: streamingUsage.total_tokens ||
|
|
3047
3234
|
(streamingUsage.prompt_tokens || 0) +
|
|
3048
3235
|
(streamingUsage.completion_tokens || 0),
|
|
3236
|
+
cachedTokens: cacheMetrics.cachedTokens,
|
|
3237
|
+
cacheCreationTokens: cacheMetrics.cacheCreationTokens,
|
|
3238
|
+
cacheMissTokens: cacheMetrics.cacheMissTokens,
|
|
3049
3239
|
},
|
|
3050
3240
|
},
|
|
3051
3241
|
};
|
|
@@ -3198,6 +3388,7 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
3198
3388
|
};
|
|
3199
3389
|
// Add usage metadata from non-streaming response
|
|
3200
3390
|
if (completion.usage) {
|
|
3391
|
+
const cacheMetrics = extractCacheMetrics(completion.usage);
|
|
3201
3392
|
responseContent.metadata = {
|
|
3202
3393
|
usage: {
|
|
3203
3394
|
promptTokens: completion.usage.prompt_tokens || 0,
|
|
@@ -3205,6 +3396,9 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
3205
3396
|
totalTokens: completion.usage.total_tokens ||
|
|
3206
3397
|
(completion.usage.prompt_tokens || 0) +
|
|
3207
3398
|
(completion.usage.completion_tokens || 0),
|
|
3399
|
+
cachedTokens: cacheMetrics.cachedTokens,
|
|
3400
|
+
cacheCreationTokens: cacheMetrics.cacheCreationTokens,
|
|
3401
|
+
cacheMissTokens: cacheMetrics.cacheMissTokens,
|
|
3208
3402
|
},
|
|
3209
3403
|
};
|
|
3210
3404
|
}
|
|
@@ -3212,6 +3406,7 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
3212
3406
|
}
|
|
3213
3407
|
else if (completion.usage) {
|
|
3214
3408
|
// Emit metadata-only response if no content blocks but have usage info
|
|
3409
|
+
const cacheMetrics = extractCacheMetrics(completion.usage);
|
|
3215
3410
|
yield {
|
|
3216
3411
|
speaker: 'ai',
|
|
3217
3412
|
blocks: [],
|
|
@@ -3222,6 +3417,9 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
3222
3417
|
totalTokens: completion.usage.total_tokens ||
|
|
3223
3418
|
(completion.usage.prompt_tokens || 0) +
|
|
3224
3419
|
(completion.usage.completion_tokens || 0),
|
|
3420
|
+
cachedTokens: cacheMetrics.cachedTokens,
|
|
3421
|
+
cacheCreationTokens: cacheMetrics.cacheCreationTokens,
|
|
3422
|
+
cacheMissTokens: cacheMetrics.cacheMissTokens,
|
|
3225
3423
|
},
|
|
3226
3424
|
},
|
|
3227
3425
|
};
|
|
@@ -3256,6 +3454,12 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
3256
3454
|
logger.debug(() => `Auto-detected 'kimi' format for K2 model: ${modelName}`);
|
|
3257
3455
|
return 'kimi';
|
|
3258
3456
|
}
|
|
3457
|
+
// Check for Mistral models (requires 9-char alphanumeric IDs)
|
|
3458
|
+
// This applies to both hosted API and self-hosted Mistral models
|
|
3459
|
+
if (isMistralModel(modelName)) {
|
|
3460
|
+
logger.debug(() => `Auto-detected 'mistral' format for Mistral model: ${modelName}`);
|
|
3461
|
+
return 'mistral';
|
|
3462
|
+
}
|
|
3259
3463
|
const lowerModelName = modelName.toLowerCase();
|
|
3260
3464
|
// Check for GLM-4 models (glm-4, glm-4.5, glm-4.6, glm-4-5, etc.)
|
|
3261
3465
|
if (lowerModelName.includes('glm-4')) {
|
|
@@ -3338,57 +3542,75 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
3338
3542
|
* Parse reasoning_content from streaming delta.
|
|
3339
3543
|
*
|
|
3340
3544
|
* @plan PLAN-20251202-THINKING.P11, PLAN-20251202-THINKING.P16
|
|
3341
|
-
* @requirement REQ-THINK-003.1, REQ-THINK-003.3, REQ-THINK-003.4
|
|
3545
|
+
* @requirement REQ-THINK-003.1, REQ-THINK-003.3, REQ-THINK-003.4, REQ-KIMI-REASONING-001.1
|
|
3546
|
+
* @issue #749
|
|
3342
3547
|
*/
|
|
3343
3548
|
parseStreamingReasoningDelta(delta) {
|
|
3344
3549
|
if (!delta) {
|
|
3345
|
-
return null;
|
|
3550
|
+
return { thinking: null, toolCalls: [] };
|
|
3346
3551
|
}
|
|
3347
3552
|
// Access reasoning_content via type assertion since OpenAI SDK doesn't declare it
|
|
3348
3553
|
const reasoningContent = delta
|
|
3349
3554
|
.reasoning_content;
|
|
3350
3555
|
// Handle absent, null, or non-string
|
|
3351
3556
|
if (!reasoningContent || typeof reasoningContent !== 'string') {
|
|
3352
|
-
return null;
|
|
3353
|
-
}
|
|
3354
|
-
// Handle empty string
|
|
3355
|
-
|
|
3356
|
-
|
|
3357
|
-
|
|
3358
|
-
|
|
3359
|
-
|
|
3360
|
-
|
|
3361
|
-
|
|
3362
|
-
|
|
3363
|
-
|
|
3557
|
+
return { thinking: null, toolCalls: [] };
|
|
3558
|
+
}
|
|
3559
|
+
// Handle empty string only - preserve whitespace-only content (spaces, tabs)
|
|
3560
|
+
// to maintain proper formatting in accumulated reasoning (fixes issue #721)
|
|
3561
|
+
if (reasoningContent.length === 0) {
|
|
3562
|
+
return { thinking: null, toolCalls: [] };
|
|
3563
|
+
}
|
|
3564
|
+
// Extract Kimi K2 tool calls embedded in reasoning_content (fixes issue #749)
|
|
3565
|
+
const { cleanedText, toolCalls } = this.extractKimiToolCallsFromText(reasoningContent);
|
|
3566
|
+
// For streaming, preserve whitespace-only content for proper formatting (issue #721)
|
|
3567
|
+
// Only return null if the cleaned text is empty (length 0)
|
|
3568
|
+
const thinkingBlock = cleanedText.length === 0
|
|
3569
|
+
? null
|
|
3570
|
+
: {
|
|
3571
|
+
type: 'thinking',
|
|
3572
|
+
thought: cleanedText,
|
|
3573
|
+
sourceField: 'reasoning_content',
|
|
3574
|
+
isHidden: false,
|
|
3575
|
+
};
|
|
3576
|
+
return { thinking: thinkingBlock, toolCalls };
|
|
3364
3577
|
}
|
|
3365
3578
|
/**
|
|
3366
3579
|
* Parse reasoning_content from non-streaming message.
|
|
3367
3580
|
*
|
|
3368
3581
|
* @plan PLAN-20251202-THINKING.P11, PLAN-20251202-THINKING.P16
|
|
3369
|
-
* @requirement REQ-THINK-003.2, REQ-THINK-003.3, REQ-THINK-003.4
|
|
3582
|
+
* @requirement REQ-THINK-003.2, REQ-THINK-003.3, REQ-THINK-003.4, REQ-KIMI-REASONING-001.2
|
|
3583
|
+
* @issue #749
|
|
3370
3584
|
*/
|
|
3371
3585
|
parseNonStreamingReasoning(message) {
|
|
3372
3586
|
if (!message) {
|
|
3373
|
-
return null;
|
|
3587
|
+
return { thinking: null, toolCalls: [] };
|
|
3374
3588
|
}
|
|
3375
3589
|
// Access reasoning_content via type assertion since OpenAI SDK doesn't declare it
|
|
3376
3590
|
const reasoningContent = message
|
|
3377
3591
|
.reasoning_content;
|
|
3378
3592
|
// Handle absent, null, or non-string
|
|
3379
3593
|
if (!reasoningContent || typeof reasoningContent !== 'string') {
|
|
3380
|
-
return null;
|
|
3594
|
+
return { thinking: null, toolCalls: [] };
|
|
3381
3595
|
}
|
|
3382
|
-
// Handle empty string or whitespace-only
|
|
3596
|
+
// Handle empty string or whitespace-only - for non-streaming complete responses,
|
|
3597
|
+
// whitespace-only reasoning is unusual and should be treated as no reasoning
|
|
3383
3598
|
if (reasoningContent.trim().length === 0) {
|
|
3384
|
-
return null;
|
|
3385
|
-
}
|
|
3386
|
-
|
|
3387
|
-
|
|
3388
|
-
|
|
3389
|
-
|
|
3390
|
-
|
|
3391
|
-
|
|
3599
|
+
return { thinking: null, toolCalls: [] };
|
|
3600
|
+
}
|
|
3601
|
+
// Extract Kimi K2 tool calls embedded in reasoning_content (fixes issue #749)
|
|
3602
|
+
const { cleanedText, toolCalls } = this.extractKimiToolCallsFromText(reasoningContent);
|
|
3603
|
+
// For non-streaming, trim whitespace after extraction
|
|
3604
|
+
const trimmedText = cleanedText.trim();
|
|
3605
|
+
const thinkingBlock = trimmedText.length === 0
|
|
3606
|
+
? null
|
|
3607
|
+
: {
|
|
3608
|
+
type: 'thinking',
|
|
3609
|
+
thought: trimmedText,
|
|
3610
|
+
sourceField: 'reasoning_content',
|
|
3611
|
+
isHidden: false,
|
|
3612
|
+
};
|
|
3613
|
+
return { thinking: thinkingBlock, toolCalls };
|
|
3392
3614
|
}
|
|
3393
3615
|
}
|
|
3394
3616
|
//# sourceMappingURL=OpenAIProvider.js.map
|