@vybestack/llxprt-code-core 0.7.0-nightly.251209.0061bd6bf → 0.7.0-nightly.251211.5750c518a
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +2 -1
- package/dist/index.js +1 -1
- package/dist/index.js.map +1 -1
- package/dist/src/adapters/IStreamAdapter.d.ts +2 -2
- package/dist/src/auth/anthropic-device-flow.d.ts +1 -1
- package/dist/src/auth/precedence.d.ts +1 -1
- package/dist/src/auth/qwen-device-flow.d.ts +1 -1
- package/dist/src/auth/token-store.d.ts +1 -1
- package/dist/src/auth/token-store.js.map +1 -1
- package/dist/src/code_assist/codeAssist.d.ts +1 -1
- package/dist/src/code_assist/codeAssist.js.map +1 -1
- package/dist/src/code_assist/converter.d.ts +1 -1
- package/dist/src/code_assist/server.d.ts +3 -3
- package/dist/src/config/config.d.ts +3 -3
- package/dist/src/config/config.js +1 -1
- package/dist/src/config/config.js.map +1 -1
- package/dist/src/config/profileManager.d.ts +1 -1
- package/dist/src/config/profileManager.js +2 -0
- package/dist/src/config/profileManager.js.map +1 -1
- package/dist/src/config/subagentManager.d.ts +1 -1
- package/dist/src/confirmation-bus/message-bus.d.ts +1 -1
- package/dist/src/core/client.d.ts +3 -3
- package/dist/src/core/client.js.map +1 -1
- package/dist/src/core/contentGenerator.d.ts +1 -1
- package/dist/src/core/coreToolScheduler.d.ts +4 -3
- package/dist/src/core/coreToolScheduler.js +28 -0
- package/dist/src/core/coreToolScheduler.js.map +1 -1
- package/dist/src/core/geminiChat.d.ts +2 -2
- package/dist/src/core/googleGenAIWrapper.d.ts +2 -2
- package/dist/src/core/logger.d.ts +1 -1
- package/dist/src/core/loggingContentGenerator.d.ts +2 -2
- package/dist/src/core/nonInteractiveToolExecutor.d.ts +1 -1
- package/dist/src/core/nonInteractiveToolExecutor.js.map +1 -1
- package/dist/src/core/subagent.d.ts +1 -1
- package/dist/src/core/subagent.js.map +1 -1
- package/dist/src/core/turn.d.ts +2 -2
- package/dist/src/debug/ConfigurationManager.d.ts +1 -1
- package/dist/src/ide/ide-client.d.ts +1 -1
- package/dist/src/ide/process-utils.js +45 -25
- package/dist/src/ide/process-utils.js.map +1 -1
- package/dist/src/index.d.ts +4 -2
- package/dist/src/index.js +2 -2
- package/dist/src/index.js.map +1 -1
- package/dist/src/mcp/file-token-store.d.ts +1 -1
- package/dist/src/mcp/google-auth-provider.d.ts +2 -2
- package/dist/src/mcp/oauth-provider.d.ts +1 -1
- package/dist/src/mcp/oauth-provider.js +1 -1
- package/dist/src/mcp/oauth-provider.js.map +1 -1
- package/dist/src/mcp/oauth-utils.d.ts +1 -1
- package/dist/src/prompt-config/TemplateEngine.d.ts +1 -1
- package/dist/src/prompt-config/prompt-cache.d.ts +1 -1
- package/dist/src/prompt-config/prompt-resolver.d.ts +1 -1
- package/dist/src/prompt-config/prompt-resolver.js +4 -0
- package/dist/src/prompt-config/prompt-resolver.js.map +1 -1
- package/dist/src/prompts/mcp-prompts.d.ts +1 -1
- package/dist/src/prompts/prompt-registry.d.ts +1 -1
- package/dist/src/providers/BaseProvider.d.ts +5 -5
- package/dist/src/providers/IProvider.d.ts +3 -3
- package/dist/src/providers/IProviderManager.d.ts +2 -2
- package/dist/src/providers/LoggingProviderWrapper.d.ts +4 -3
- package/dist/src/providers/LoggingProviderWrapper.js +16 -4
- package/dist/src/providers/LoggingProviderWrapper.js.map +1 -1
- package/dist/src/providers/ProviderContentGenerator.d.ts +2 -2
- package/dist/src/providers/ProviderManager.d.ts +9 -6
- package/dist/src/providers/ProviderManager.js +16 -4
- package/dist/src/providers/ProviderManager.js.map +1 -1
- package/dist/src/providers/anthropic/AnthropicProvider.d.ts +5 -5
- package/dist/src/providers/anthropic/AnthropicProvider.js +1 -1
- package/dist/src/providers/anthropic/AnthropicProvider.js.map +1 -1
- package/dist/src/providers/gemini/GeminiProvider.d.ts +4 -4
- package/dist/src/providers/openai/ConversationCache.d.ts +1 -1
- package/dist/src/providers/openai/IChatGenerateParams.d.ts +1 -1
- package/dist/src/providers/openai/OpenAIProvider.d.ts +24 -8
- package/dist/src/providers/openai/OpenAIProvider.js +483 -143
- package/dist/src/providers/openai/OpenAIProvider.js.map +1 -1
- package/dist/src/providers/openai/ToolCallPipeline.d.ts +2 -2
- package/dist/src/providers/openai/buildResponsesRequest.d.ts +3 -3
- package/dist/src/providers/openai/estimateRemoteTokens.d.ts +1 -1
- package/dist/src/providers/openai/parseResponsesStream.d.ts +1 -1
- package/dist/src/providers/openai/syntheticToolResponses.d.ts +1 -1
- package/dist/src/providers/openai-responses/OpenAIResponsesProvider.d.ts +4 -4
- package/dist/src/providers/openai-vercel/OpenAIVercelProvider.d.ts +9 -6
- package/dist/src/providers/openai-vercel/OpenAIVercelProvider.js +255 -22
- package/dist/src/providers/openai-vercel/OpenAIVercelProvider.js.map +1 -1
- package/dist/src/providers/openai-vercel/messageConversion.d.ts +4 -1
- package/dist/src/providers/openai-vercel/messageConversion.js +41 -6
- package/dist/src/providers/openai-vercel/messageConversion.js.map +1 -1
- package/dist/src/providers/reasoning/reasoningUtils.d.ts +26 -1
- package/dist/src/providers/reasoning/reasoningUtils.js +157 -0
- package/dist/src/providers/reasoning/reasoningUtils.js.map +1 -1
- package/dist/src/providers/test-utils/providerTestConfig.d.ts +1 -1
- package/dist/src/providers/tokenizers/AnthropicTokenizer.d.ts +1 -1
- package/dist/src/providers/tokenizers/OpenAITokenizer.d.ts +1 -1
- package/dist/src/providers/tokenizers/OpenAITokenizer.js.map +1 -1
- package/dist/src/providers/utils/cacheMetricsExtractor.d.ts +6 -0
- package/dist/src/providers/utils/cacheMetricsExtractor.js +36 -0
- package/dist/src/providers/utils/cacheMetricsExtractor.js.map +1 -0
- package/dist/src/services/fileDiscoveryService.js +1 -1
- package/dist/src/services/fileDiscoveryService.js.map +1 -1
- package/dist/src/services/gitService.js.map +1 -1
- package/dist/src/services/history/ContentConverters.d.ts +1 -1
- package/dist/src/services/history/HistoryService.d.ts +2 -2
- package/dist/src/services/history/IContent.d.ts +3 -7
- package/dist/src/services/history/IContent.js.map +1 -1
- package/dist/src/services/loopDetectionService.d.ts +1 -1
- package/dist/src/services/loopDetectionService.js.map +1 -1
- package/dist/src/services/shellExecutionService.js.map +1 -1
- package/dist/src/services/todo-reminder-service.d.ts +1 -1
- package/dist/src/services/tool-call-tracker-service.d.ts +1 -1
- package/dist/src/settings/SettingsService.d.ts +1 -1
- package/dist/src/telemetry/file-exporters.d.ts +4 -4
- package/dist/src/telemetry/file-exporters.js.map +1 -1
- package/dist/src/telemetry/index.d.ts +2 -1
- package/dist/src/telemetry/index.js.map +1 -1
- package/dist/src/telemetry/loggers.js +1 -1
- package/dist/src/telemetry/loggers.js.map +1 -1
- package/dist/src/telemetry/loggers.test.circular.js.map +1 -1
- package/dist/src/telemetry/metrics.d.ts +2 -2
- package/dist/src/telemetry/types.d.ts +1 -1
- package/dist/src/telemetry/types.js.map +1 -1
- package/dist/src/test-utils/config.js.map +1 -1
- package/dist/src/test-utils/tools.d.ts +2 -2
- package/dist/src/todo/todoFormatter.d.ts +1 -1
- package/dist/src/tools/IToolFormatter.d.ts +3 -3
- package/dist/src/tools/ToolFormatter.d.ts +3 -3
- package/dist/src/tools/ToolIdStrategy.d.ts +25 -0
- package/dist/src/tools/ToolIdStrategy.js +108 -0
- package/dist/src/tools/ToolIdStrategy.js.map +1 -1
- package/dist/src/tools/codesearch.d.ts +1 -1
- package/dist/src/tools/delete_line_range.d.ts +1 -1
- package/dist/src/tools/diffOptions.d.ts +1 -1
- package/dist/src/tools/direct-web-fetch.d.ts +1 -1
- package/dist/src/tools/direct-web-fetch.js.map +1 -1
- package/dist/src/tools/edit.d.ts +2 -2
- package/dist/src/tools/edit.js.map +1 -1
- package/dist/src/tools/exa-web-search.d.ts +1 -1
- package/dist/src/tools/google-web-fetch.d.ts +1 -1
- package/dist/src/tools/google-web-search-invocation.d.ts +2 -2
- package/dist/src/tools/google-web-search-invocation.js.map +1 -1
- package/dist/src/tools/google-web-search.d.ts +3 -3
- package/dist/src/tools/google-web-search.js.map +1 -1
- package/dist/src/tools/grep.d.ts +1 -1
- package/dist/src/tools/insert_at_line.d.ts +1 -1
- package/dist/src/tools/list-subagents.d.ts +1 -1
- package/dist/src/tools/ls.d.ts +1 -1
- package/dist/src/tools/mcp-tool.d.ts +2 -2
- package/dist/src/tools/memoryTool.d.ts +12 -4
- package/dist/src/tools/memoryTool.js +81 -29
- package/dist/src/tools/memoryTool.js.map +1 -1
- package/dist/src/tools/modifiable-tool.d.ts +2 -2
- package/dist/src/tools/modifiable-tool.js.map +1 -1
- package/dist/src/tools/read-file.d.ts +1 -1
- package/dist/src/tools/read-many-files.d.ts +1 -1
- package/dist/src/tools/read_line_range.d.ts +1 -1
- package/dist/src/tools/ripGrep.d.ts +1 -1
- package/dist/src/tools/shell.d.ts +1 -1
- package/dist/src/tools/task.d.ts +1 -1
- package/dist/src/tools/task.js +14 -2
- package/dist/src/tools/task.js.map +1 -1
- package/dist/src/tools/todo-events.d.ts +1 -1
- package/dist/src/tools/todo-pause.d.ts +1 -1
- package/dist/src/tools/todo-pause.js.map +1 -1
- package/dist/src/tools/todo-read.d.ts +1 -1
- package/dist/src/tools/todo-read.js.map +1 -1
- package/dist/src/tools/todo-store.d.ts +1 -1
- package/dist/src/tools/todo-store.js.map +1 -1
- package/dist/src/tools/todo-write.d.ts +2 -2
- package/dist/src/tools/todo-write.js.map +1 -1
- package/dist/src/tools/tool-registry.d.ts +3 -3
- package/dist/src/tools/tools.d.ts +6 -5
- package/dist/src/tools/tools.js +1 -1
- package/dist/src/tools/tools.js.map +1 -1
- package/dist/src/tools/write-file.d.ts +2 -2
- package/dist/src/tools/write-file.js.map +1 -1
- package/dist/src/utils/environmentContext.d.ts +1 -1
- package/dist/src/utils/errorReporting.d.ts +1 -1
- package/dist/src/utils/fileUtils.d.ts +2 -2
- package/dist/src/utils/filesearch/fileSearch.js.map +1 -1
- package/dist/src/utils/generateContentResponseUtilities.d.ts +1 -1
- package/dist/src/utils/generateContentResponseUtilities.js +6 -0
- package/dist/src/utils/generateContentResponseUtilities.js.map +1 -1
- package/dist/src/utils/messageInspectors.d.ts +1 -1
- package/dist/src/utils/partUtils.d.ts +1 -1
- package/dist/src/utils/quotaErrorDetection.d.ts +1 -1
- package/dist/src/utils/summarizer.d.ts +1 -1
- package/package.json +1 -1
|
@@ -22,7 +22,7 @@ import crypto from 'node:crypto';
|
|
|
22
22
|
import * as http from 'http';
|
|
23
23
|
import * as https from 'https';
|
|
24
24
|
import * as net from 'net';
|
|
25
|
-
import { isKimiModel, getToolIdStrategy, } from '../../tools/ToolIdStrategy.js';
|
|
25
|
+
import { isKimiModel, isMistralModel, getToolIdStrategy, } from '../../tools/ToolIdStrategy.js';
|
|
26
26
|
import { BaseProvider, } from '../BaseProvider.js';
|
|
27
27
|
import { DebugLogger } from '../../debug/index.js';
|
|
28
28
|
import { ToolFormatter } from '../../tools/ToolFormatter.js';
|
|
@@ -40,6 +40,7 @@ import { buildToolResponsePayload, EMPTY_TOOL_RESULT_PLACEHOLDER, } from '../uti
|
|
|
40
40
|
import { isLocalEndpoint } from '../utils/localEndpoint.js';
|
|
41
41
|
import { filterThinkingForContext, thinkingToReasoningField, extractThinkingBlocks, } from '../reasoning/reasoningUtils.js';
|
|
42
42
|
import { shouldDumpSDKContext, dumpSDKContext, } from '../utils/dumpSDKContext.js';
|
|
43
|
+
import { extractCacheMetrics } from '../utils/cacheMetricsExtractor.js';
|
|
43
44
|
const MAX_TOOL_RESPONSE_CHARS = 1024;
|
|
44
45
|
const MAX_TOOL_RESPONSE_RETRY_CHARS = 512;
|
|
45
46
|
const TOOL_ARGS_PREVIEW_LENGTH = 500;
|
|
@@ -277,13 +278,12 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
277
278
|
// This preserves meaningful whitespace in regular text chunks during streaming
|
|
278
279
|
// (e.g., " 5 Biggest" should remain " 5 Biggest", not become "5 Biggest")
|
|
279
280
|
if (hadReasoningTags) {
|
|
280
|
-
//
|
|
281
|
+
// Collapse multiple spaces/tabs but preserve newlines for proper paragraph/line breaks
|
|
281
282
|
str = str.replace(/[ \t]+/g, ' ');
|
|
282
283
|
str = str.replace(/\n{3,}/g, '\n\n');
|
|
283
|
-
// Only trim leading whitespace
|
|
284
|
-
// This
|
|
285
|
-
|
|
286
|
-
str = str.trimStart();
|
|
284
|
+
// Only trim leading horizontal whitespace (spaces/tabs), NOT newlines
|
|
285
|
+
// This preserves line breaks between think tags and content (fixes #721)
|
|
286
|
+
str = str.replace(/^[ \t]+/, '');
|
|
287
287
|
}
|
|
288
288
|
const afterLen = str.length;
|
|
289
289
|
if (hadReasoningTags && afterLen !== beforeLen) {
|
|
@@ -438,62 +438,78 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
438
438
|
* and all tool info is only encoded in the text template.
|
|
439
439
|
*/
|
|
440
440
|
extractKimiToolCallsFromText(raw) {
|
|
441
|
-
if
|
|
441
|
+
// Return early only if input is null/undefined/empty
|
|
442
|
+
if (!raw) {
|
|
442
443
|
return { cleanedText: raw, toolCalls: [] };
|
|
443
444
|
}
|
|
444
445
|
const logger = this.getLogger();
|
|
445
446
|
const toolCalls = [];
|
|
446
447
|
let text = raw;
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
448
|
+
// Extract tool calls from complete sections if present
|
|
449
|
+
if (raw.includes('<|tool_calls_section_begin|>')) {
|
|
450
|
+
const sectionRegex = /<\|tool_calls_section_begin\|>([\s\S]*?)<\|tool_calls_section_end\|>/g;
|
|
451
|
+
text = text.replace(sectionRegex, (_sectionMatch, sectionBody) => {
|
|
452
|
+
try {
|
|
453
|
+
const callRegex = /<\|tool_call_begin\|>\s*([^<]+?)\s*<\|tool_call_argument_begin\|>\s*([\s\S]*?)\s*<\|tool_call_end\|>/g;
|
|
454
|
+
let m;
|
|
455
|
+
while ((m = callRegex.exec(sectionBody)) !== null) {
|
|
456
|
+
const rawId = m[1].trim();
|
|
457
|
+
const rawArgs = m[2].trim();
|
|
458
|
+
// Infer tool name from ID.
|
|
459
|
+
let toolName = '';
|
|
460
|
+
const match = /^functions\.([A-Za-z0-9_]+):\d+/i.exec(rawId) ||
|
|
461
|
+
/^[A-Za-z0-9_]+\.([A-Za-z0-9_]+):\d+/.exec(rawId);
|
|
462
|
+
if (match) {
|
|
463
|
+
toolName = match[1];
|
|
464
|
+
}
|
|
465
|
+
else {
|
|
466
|
+
const colonParts = rawId.split(':');
|
|
467
|
+
const head = colonParts[0] || rawId;
|
|
468
|
+
const dotParts = head.split('.');
|
|
469
|
+
toolName = dotParts[dotParts.length - 1] || head;
|
|
470
|
+
}
|
|
471
|
+
// Normalize tool name (handles Kimi-K2 style prefixes like call_functionsglob7)
|
|
472
|
+
toolName = this.normalizeToolName(toolName);
|
|
473
|
+
const sanitizedArgs = this.sanitizeToolArgumentsString(rawArgs);
|
|
474
|
+
const processedParameters = processToolParameters(sanitizedArgs, toolName);
|
|
475
|
+
toolCalls.push({
|
|
476
|
+
type: 'tool_call',
|
|
477
|
+
id: this.normalizeToHistoryToolId(rawId),
|
|
478
|
+
name: toolName,
|
|
479
|
+
parameters: processedParameters,
|
|
480
|
+
});
|
|
467
481
|
}
|
|
468
|
-
// Normalize tool name (handles Kimi-K2 style prefixes like call_functionsglob7)
|
|
469
|
-
toolName = this.normalizeToolName(toolName);
|
|
470
|
-
const sanitizedArgs = this.sanitizeToolArgumentsString(rawArgs);
|
|
471
|
-
const processedParameters = processToolParameters(sanitizedArgs, toolName);
|
|
472
|
-
toolCalls.push({
|
|
473
|
-
type: 'tool_call',
|
|
474
|
-
id: this.normalizeToHistoryToolId(rawId),
|
|
475
|
-
name: toolName,
|
|
476
|
-
parameters: processedParameters,
|
|
477
|
-
});
|
|
478
482
|
}
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
return '';
|
|
485
|
-
});
|
|
486
|
-
if (toolCalls.length > 0) {
|
|
487
|
-
logger.debug(() => `[OpenAIProvider] Parsed Kimi tool_calls_section`, {
|
|
488
|
-
toolCallCount: toolCalls.length,
|
|
489
|
-
originalLength: raw.length,
|
|
490
|
-
cleanedLength: text.length,
|
|
483
|
+
catch (err) {
|
|
484
|
+
logger.debug(() => `[OpenAIProvider] Failed to parse Kimi tool_calls_section: ${err}`);
|
|
485
|
+
}
|
|
486
|
+
// Strip the entire tool section from user-visible text
|
|
487
|
+
return '';
|
|
491
488
|
});
|
|
489
|
+
if (toolCalls.length > 0) {
|
|
490
|
+
logger.debug(() => `[OpenAIProvider] Parsed Kimi tool_calls_section`, {
|
|
491
|
+
toolCallCount: toolCalls.length,
|
|
492
|
+
originalLength: raw.length,
|
|
493
|
+
cleanedLength: text.length,
|
|
494
|
+
});
|
|
495
|
+
}
|
|
492
496
|
}
|
|
497
|
+
// ALWAYS run stray token cleanup, even if no complete sections were found
|
|
498
|
+
// This handles partial sections, malformed tokens, orphaned markers, etc.
|
|
499
|
+
text = text.replace(/<\|tool_call(?:_(?:begin|end|argument_begin))?\|>/g, '');
|
|
500
|
+
text = text.replace(/<\|tool_calls_section_(?:begin|end)\|>/g, '');
|
|
493
501
|
// Don't trim - preserve leading/trailing newlines that are important for formatting
|
|
494
502
|
// (e.g., numbered lists from Kimi K2 that have newlines between items)
|
|
495
503
|
return { cleanedText: text, toolCalls };
|
|
496
504
|
}
|
|
505
|
+
/**
|
|
506
|
+
* Clean Kimi K2 tool call tokens from thinking content.
|
|
507
|
+
* Used when extracting thinking from <think> tags that may contain embedded tool calls.
|
|
508
|
+
* @issue #749
|
|
509
|
+
*/
|
|
510
|
+
cleanThinkingContent(thought) {
|
|
511
|
+
return this.extractKimiToolCallsFromText(thought).cleanedText;
|
|
512
|
+
}
|
|
497
513
|
/**
|
|
498
514
|
* @plan:PLAN-20251023-STATELESS-HARDENING.P09
|
|
499
515
|
* @requirement:REQ-SP4-002
|
|
@@ -910,9 +926,12 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
910
926
|
}
|
|
911
927
|
else {
|
|
912
928
|
// Assistant message with tool calls
|
|
929
|
+
// CRITICAL for Mistral API compatibility (#760):
|
|
930
|
+
// When tool_calls are present, we must NOT include a content property at all
|
|
931
|
+
// (not even null). Mistral's OpenAI-compatible API requires this.
|
|
932
|
+
// See: https://docs.mistral.ai/capabilities/function_calling
|
|
913
933
|
messages.push({
|
|
914
934
|
role: 'assistant',
|
|
915
|
-
content: text || null,
|
|
916
935
|
tool_calls: toolCalls.map((tc) => ({
|
|
917
936
|
id: this.normalizeToOpenAIToolId(tc.id),
|
|
918
937
|
type: 'function',
|
|
@@ -948,10 +967,16 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
948
967
|
}
|
|
949
968
|
else {
|
|
950
969
|
for (const tr of toolResponses) {
|
|
970
|
+
// CRITICAL for Mistral API compatibility (#760):
|
|
971
|
+
// Tool messages must include a name field matching the function name.
|
|
972
|
+
// See: https://docs.mistral.ai/capabilities/function_calling
|
|
973
|
+
// Note: The OpenAI SDK types don't include name, but Mistral requires it.
|
|
974
|
+
// We use a type assertion to add this required field.
|
|
951
975
|
messages.push({
|
|
952
976
|
role: 'tool',
|
|
953
977
|
content: this.buildToolResponseContent(tr, config),
|
|
954
978
|
tool_call_id: this.normalizeToOpenAIToolId(tr.callId),
|
|
979
|
+
name: tr.toolName,
|
|
955
980
|
});
|
|
956
981
|
}
|
|
957
982
|
}
|
|
@@ -977,8 +1002,9 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
977
1002
|
const messages = [];
|
|
978
1003
|
// Create a ToolIdMapper based on the tool format
|
|
979
1004
|
// For Kimi K2, this generates sequential IDs in the format functions.{name}:{index}
|
|
980
|
-
|
|
981
|
-
|
|
1005
|
+
// For Mistral, this generates 9-char alphanumeric IDs
|
|
1006
|
+
const toolIdMapper = toolFormat === 'kimi' || toolFormat === 'mistral'
|
|
1007
|
+
? getToolIdStrategy(toolFormat).createMapper(filteredContents)
|
|
982
1008
|
: null;
|
|
983
1009
|
// Helper to resolve tool call IDs based on format
|
|
984
1010
|
const resolveToolCallId = (tc) => {
|
|
@@ -1014,9 +1040,12 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
1014
1040
|
const toolCalls = content.blocks.filter((b) => b.type === 'tool_call');
|
|
1015
1041
|
if (toolCalls.length > 0) {
|
|
1016
1042
|
// Assistant message with tool calls
|
|
1043
|
+
// CRITICAL for Mistral API compatibility (#760):
|
|
1044
|
+
// When tool_calls are present, we must NOT include a content property at all
|
|
1045
|
+
// (not even null). Mistral's OpenAI-compatible API requires this.
|
|
1046
|
+
// See: https://docs.mistral.ai/capabilities/function_calling
|
|
1017
1047
|
const baseMessage = {
|
|
1018
1048
|
role: 'assistant',
|
|
1019
|
-
content: text || null,
|
|
1020
1049
|
tool_calls: toolCalls.map((tc) => ({
|
|
1021
1050
|
id: resolveToolCallId(tc),
|
|
1022
1051
|
type: 'function',
|
|
@@ -1057,10 +1086,16 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
1057
1086
|
// Convert tool responses
|
|
1058
1087
|
const toolResponses = content.blocks.filter((b) => b.type === 'tool_response');
|
|
1059
1088
|
for (const tr of toolResponses) {
|
|
1089
|
+
// CRITICAL for Mistral API compatibility (#760):
|
|
1090
|
+
// Tool messages must include a name field matching the function name.
|
|
1091
|
+
// See: https://docs.mistral.ai/capabilities/function_calling
|
|
1092
|
+
// Note: The OpenAI SDK types don't include name, but Mistral requires it.
|
|
1093
|
+
// We use a type assertion to add this required field.
|
|
1060
1094
|
messages.push({
|
|
1061
1095
|
role: 'tool',
|
|
1062
1096
|
content: this.buildToolResponseContent(tr, options.config),
|
|
1063
1097
|
tool_call_id: resolveToolResponseId(tr),
|
|
1098
|
+
name: tr.toolName,
|
|
1064
1099
|
});
|
|
1065
1100
|
}
|
|
1066
1101
|
}
|
|
@@ -1506,9 +1541,9 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
1506
1541
|
// Buffer for accumulating text chunks for providers that need it
|
|
1507
1542
|
let textBuffer = '';
|
|
1508
1543
|
// Use the same detected format from earlier for consistency
|
|
1509
|
-
const
|
|
1544
|
+
const isKimiK2Model = model.toLowerCase().includes('kimi-k2');
|
|
1510
1545
|
// Buffer text for Qwen format providers and Kimi-K2 to avoid stanza formatting
|
|
1511
|
-
const shouldBufferText = detectedFormat === 'qwen' ||
|
|
1546
|
+
const shouldBufferText = detectedFormat === 'qwen' || isKimiK2Model;
|
|
1512
1547
|
// Accumulate thinking content across the entire stream to emit as ONE block
|
|
1513
1548
|
// This handles fragmented <think>word</think> streaming from Synthetic API
|
|
1514
1549
|
// @plan PLAN-20251202-THINKING.P16
|
|
@@ -1522,6 +1557,8 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
1522
1557
|
let streamingUsage = null;
|
|
1523
1558
|
// Track total chunks for debugging empty responses
|
|
1524
1559
|
let totalChunksReceived = 0;
|
|
1560
|
+
// Track finish_reason for detecting empty responses (issue #584)
|
|
1561
|
+
let lastFinishReason = null;
|
|
1525
1562
|
try {
|
|
1526
1563
|
// Handle streaming response
|
|
1527
1564
|
for await (const chunk of response) {
|
|
@@ -1575,14 +1612,32 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
1575
1612
|
continue;
|
|
1576
1613
|
// Parse reasoning_content from streaming delta (Phase 16 integration)
|
|
1577
1614
|
// ACCUMULATE instead of yielding immediately to handle token-by-token streaming
|
|
1615
|
+
// Extract embedded Kimi K2 tool calls from reasoning_content (fixes #749)
|
|
1578
1616
|
// @plan PLAN-20251202-THINKING.P16
|
|
1579
|
-
|
|
1617
|
+
// @requirement REQ-KIMI-REASONING-001.1
|
|
1618
|
+
const { thinking: reasoningBlock, toolCalls: reasoningToolCalls } = this.parseStreamingReasoningDelta(choice.delta);
|
|
1580
1619
|
if (reasoningBlock) {
|
|
1581
1620
|
// Accumulate reasoning content - will emit ONE block later
|
|
1582
1621
|
accumulatedReasoningContent += reasoningBlock.thought;
|
|
1583
1622
|
}
|
|
1623
|
+
// Accumulate tool calls extracted from reasoning_content
|
|
1624
|
+
if (reasoningToolCalls.length > 0) {
|
|
1625
|
+
for (const toolCall of reasoningToolCalls) {
|
|
1626
|
+
// Convert ToolCallBlock to accumulated format
|
|
1627
|
+
const index = accumulatedToolCalls.length;
|
|
1628
|
+
accumulatedToolCalls[index] = {
|
|
1629
|
+
id: toolCall.id,
|
|
1630
|
+
type: 'function',
|
|
1631
|
+
function: {
|
|
1632
|
+
name: toolCall.name,
|
|
1633
|
+
arguments: JSON.stringify(toolCall.parameters),
|
|
1634
|
+
},
|
|
1635
|
+
};
|
|
1636
|
+
}
|
|
1637
|
+
}
|
|
1584
1638
|
// Check for finish_reason to detect proper stream ending
|
|
1585
1639
|
if (choice.finish_reason) {
|
|
1640
|
+
lastFinishReason = choice.finish_reason;
|
|
1586
1641
|
logger.debug(() => `[Streaming] Stream finished with reason: ${choice.finish_reason}`, {
|
|
1587
1642
|
model,
|
|
1588
1643
|
finishReason: choice.finish_reason,
|
|
@@ -1601,13 +1656,25 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
1601
1656
|
}
|
|
1602
1657
|
// Handle text content - buffer for Qwen format, emit immediately for others
|
|
1603
1658
|
// Note: Synthetic API sends content that may duplicate reasoning_content.
|
|
1604
|
-
//
|
|
1659
|
+
// We now filter duplicates by tracking when content starts matching reasoning_content.
|
|
1660
|
+
// fixes #721
|
|
1605
1661
|
// @plan PLAN-20251202-THINKING.P16
|
|
1606
1662
|
const rawDeltaContent = this.coerceMessageContentToString(choice.delta?.content);
|
|
1607
1663
|
if (rawDeltaContent) {
|
|
1608
|
-
|
|
1609
|
-
|
|
1610
|
-
|
|
1664
|
+
// For Kimi models, we need to buffer the RAW content without processing
|
|
1665
|
+
// because Kimi tokens stream incrementally and partial tokens would leak
|
|
1666
|
+
// through if we try to process them immediately. The buffer will be
|
|
1667
|
+
// processed when flushed (at sentence boundaries or end of stream).
|
|
1668
|
+
let deltaContent;
|
|
1669
|
+
if (isKimiK2Model) {
|
|
1670
|
+
// For Kimi: Don't process yet - just pass through and let buffering handle it
|
|
1671
|
+
// We'll extract tool calls and sanitize when we flush the buffer
|
|
1672
|
+
deltaContent = rawDeltaContent;
|
|
1673
|
+
}
|
|
1674
|
+
else {
|
|
1675
|
+
// For non-Kimi models: sanitize immediately as before
|
|
1676
|
+
deltaContent = this.sanitizeProviderText(rawDeltaContent);
|
|
1677
|
+
}
|
|
1611
1678
|
if (!deltaContent) {
|
|
1612
1679
|
continue;
|
|
1613
1680
|
}
|
|
@@ -1623,9 +1690,9 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
1623
1690
|
});
|
|
1624
1691
|
// Buffer text to avoid stanza formatting
|
|
1625
1692
|
textBuffer += deltaContent;
|
|
1626
|
-
const
|
|
1627
|
-
const
|
|
1628
|
-
const hasOpenKimiSection =
|
|
1693
|
+
const kimiBeginCount = (textBuffer.match(/<\|tool_calls_section_begin\|>/g) || []).length;
|
|
1694
|
+
const kimiEndCount = (textBuffer.match(/<\|tool_calls_section_end\|>/g) || []).length;
|
|
1695
|
+
const hasOpenKimiSection = kimiBeginCount > kimiEndCount;
|
|
1629
1696
|
// Emit buffered text when we have a complete sentence or paragraph
|
|
1630
1697
|
// Look for natural break points, but avoid flushing mid Kimi section
|
|
1631
1698
|
if (!hasOpenKimiSection &&
|
|
@@ -1642,12 +1709,14 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
1642
1709
|
// @requirement REQ-THINK-003
|
|
1643
1710
|
const tagBasedThinking = this.extractThinkTagsAsBlock(workingText);
|
|
1644
1711
|
if (tagBasedThinking) {
|
|
1712
|
+
// Clean Kimi tokens from thinking content before accumulating
|
|
1713
|
+
const cleanedThought = this.cleanThinkingContent(tagBasedThinking.thought);
|
|
1645
1714
|
// Accumulate thinking content - don't emit yet
|
|
1646
1715
|
// Use newline to preserve formatting between chunks (not space)
|
|
1647
1716
|
if (accumulatedThinkingContent.length > 0) {
|
|
1648
1717
|
accumulatedThinkingContent += '\n';
|
|
1649
1718
|
}
|
|
1650
|
-
accumulatedThinkingContent +=
|
|
1719
|
+
accumulatedThinkingContent += cleanedThought;
|
|
1651
1720
|
logger.debug(() => `[Streaming legacy] Accumulated thinking: ${accumulatedThinkingContent.length} chars total`);
|
|
1652
1721
|
}
|
|
1653
1722
|
const kimiParsed = this.extractKimiToolCallsFromText(workingText);
|
|
@@ -1709,7 +1778,10 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
1709
1778
|
// Always use sanitized text to strip <think> tags (legacy streaming)
|
|
1710
1779
|
// Bug fix: Previously Kimi used unsanitized workingText
|
|
1711
1780
|
// @plan PLAN-20251202-THINKING.P16
|
|
1712
|
-
|
|
1781
|
+
// Bug fix #721: Emit whitespace-only chunks (e.g., " " between words)
|
|
1782
|
+
// Previously we used cleanedText.trim().length > 0 which dropped spaces,
|
|
1783
|
+
// causing "list 5" to become "list5". Now we emit any non-empty cleanedText.
|
|
1784
|
+
if (cleanedText.length > 0) {
|
|
1713
1785
|
yield {
|
|
1714
1786
|
speaker: 'ai',
|
|
1715
1787
|
blocks: [
|
|
@@ -1828,11 +1900,13 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
1828
1900
|
// @plan PLAN-20251202-THINKING.P16
|
|
1829
1901
|
const tagBasedThinking = this.extractThinkTagsAsBlock(workingText);
|
|
1830
1902
|
if (tagBasedThinking) {
|
|
1903
|
+
// Clean Kimi tokens from thinking content before accumulating
|
|
1904
|
+
const cleanedThought = this.cleanThinkingContent(tagBasedThinking.thought);
|
|
1831
1905
|
// Use newline to preserve formatting between chunks (not space)
|
|
1832
1906
|
if (accumulatedThinkingContent.length > 0) {
|
|
1833
1907
|
accumulatedThinkingContent += '\n';
|
|
1834
1908
|
}
|
|
1835
|
-
accumulatedThinkingContent +=
|
|
1909
|
+
accumulatedThinkingContent += cleanedThought;
|
|
1836
1910
|
}
|
|
1837
1911
|
const kimiParsed = this.extractKimiToolCallsFromText(workingText);
|
|
1838
1912
|
if (kimiParsed.toolCalls.length > 0) {
|
|
@@ -1891,7 +1965,10 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
1891
1965
|
// Always use sanitized text to strip <think> tags (legacy final buffer)
|
|
1892
1966
|
// Bug fix: Previously Kimi used unsanitized workingText
|
|
1893
1967
|
// @plan PLAN-20251202-THINKING.P16
|
|
1894
|
-
|
|
1968
|
+
// Bug fix #721: Emit whitespace-only chunks (e.g., " " between words)
|
|
1969
|
+
// Previously we used cleanedText.trim().length > 0 which dropped spaces,
|
|
1970
|
+
// causing "list 5" to become "list5". Now we emit any non-empty cleanedText.
|
|
1971
|
+
if (cleanedText.length > 0) {
|
|
1895
1972
|
yield {
|
|
1896
1973
|
speaker: 'ai',
|
|
1897
1974
|
blocks: [
|
|
@@ -1923,19 +2000,32 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
1923
2000
|
}
|
|
1924
2001
|
// Emit accumulated reasoning_content as ONE ThinkingBlock (legacy path)
|
|
1925
2002
|
// This consolidates token-by-token reasoning from Synthetic API into a single block
|
|
2003
|
+
// Clean Kimi tokens from the accumulated content (not per-chunk) to handle split tokens
|
|
1926
2004
|
// @plan PLAN-20251202-THINKING.P16
|
|
1927
2005
|
if (accumulatedReasoningContent.length > 0) {
|
|
1928
|
-
|
|
1929
|
-
|
|
1930
|
-
|
|
1931
|
-
|
|
1932
|
-
|
|
1933
|
-
|
|
1934
|
-
|
|
1935
|
-
|
|
1936
|
-
|
|
1937
|
-
|
|
1938
|
-
|
|
2006
|
+
// Extract Kimi tool calls from the complete accumulated reasoning content
|
|
2007
|
+
const { cleanedText: cleanedReasoning, toolCalls: reasoningToolCalls } = this.extractKimiToolCallsFromText(accumulatedReasoningContent);
|
|
2008
|
+
// Emit the cleaned thinking block
|
|
2009
|
+
if (cleanedReasoning.length > 0) {
|
|
2010
|
+
yield {
|
|
2011
|
+
speaker: 'ai',
|
|
2012
|
+
blocks: [
|
|
2013
|
+
{
|
|
2014
|
+
type: 'thinking',
|
|
2015
|
+
thought: cleanedReasoning,
|
|
2016
|
+
sourceField: 'reasoning_content',
|
|
2017
|
+
isHidden: false,
|
|
2018
|
+
},
|
|
2019
|
+
],
|
|
2020
|
+
};
|
|
2021
|
+
}
|
|
2022
|
+
// Emit any tool calls extracted from reasoning content
|
|
2023
|
+
if (reasoningToolCalls.length > 0) {
|
|
2024
|
+
yield {
|
|
2025
|
+
speaker: 'ai',
|
|
2026
|
+
blocks: reasoningToolCalls,
|
|
2027
|
+
};
|
|
2028
|
+
}
|
|
1939
2029
|
}
|
|
1940
2030
|
// Process and emit tool calls using legacy accumulated approach
|
|
1941
2031
|
if (accumulatedToolCalls.length > 0) {
|
|
@@ -1962,6 +2052,7 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
1962
2052
|
};
|
|
1963
2053
|
// Add usage metadata if we captured it from streaming
|
|
1964
2054
|
if (streamingUsage) {
|
|
2055
|
+
const cacheMetrics = extractCacheMetrics(streamingUsage);
|
|
1965
2056
|
toolCallsContent.metadata = {
|
|
1966
2057
|
usage: {
|
|
1967
2058
|
promptTokens: streamingUsage.prompt_tokens || 0,
|
|
@@ -1969,6 +2060,9 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
1969
2060
|
totalTokens: streamingUsage.total_tokens ||
|
|
1970
2061
|
(streamingUsage.prompt_tokens || 0) +
|
|
1971
2062
|
(streamingUsage.completion_tokens || 0),
|
|
2063
|
+
cachedTokens: cacheMetrics.cachedTokens,
|
|
2064
|
+
cacheCreationTokens: cacheMetrics.cacheCreationTokens,
|
|
2065
|
+
cacheMissTokens: cacheMetrics.cacheMissTokens,
|
|
1972
2066
|
},
|
|
1973
2067
|
};
|
|
1974
2068
|
}
|
|
@@ -1977,6 +2071,7 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
1977
2071
|
}
|
|
1978
2072
|
// If we have usage information but no tool calls, emit a metadata-only response
|
|
1979
2073
|
if (streamingUsage && accumulatedToolCalls.length === 0) {
|
|
2074
|
+
const cacheMetrics = extractCacheMetrics(streamingUsage);
|
|
1980
2075
|
yield {
|
|
1981
2076
|
speaker: 'ai',
|
|
1982
2077
|
blocks: [],
|
|
@@ -1987,10 +2082,35 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
1987
2082
|
totalTokens: streamingUsage.total_tokens ||
|
|
1988
2083
|
(streamingUsage.prompt_tokens || 0) +
|
|
1989
2084
|
(streamingUsage.completion_tokens || 0),
|
|
2085
|
+
cachedTokens: cacheMetrics.cachedTokens,
|
|
2086
|
+
cacheCreationTokens: cacheMetrics.cacheCreationTokens,
|
|
2087
|
+
cacheMissTokens: cacheMetrics.cacheMissTokens,
|
|
1990
2088
|
},
|
|
1991
2089
|
},
|
|
1992
2090
|
};
|
|
1993
2091
|
}
|
|
2092
|
+
// Detect and handle empty streaming responses after tool calls (issue #584)
|
|
2093
|
+
// Some models (like gpt-oss-120b on OpenRouter) return finish_reason=stop with tools but no text
|
|
2094
|
+
const hasToolsButNoText = lastFinishReason === 'stop' &&
|
|
2095
|
+
accumulatedToolCalls.length > 0 &&
|
|
2096
|
+
_accumulatedText.length === 0 &&
|
|
2097
|
+
textBuffer.length === 0 &&
|
|
2098
|
+
accumulatedReasoningContent.length === 0 &&
|
|
2099
|
+
accumulatedThinkingContent.length === 0;
|
|
2100
|
+
if (hasToolsButNoText) {
|
|
2101
|
+
logger.log(() => `[OpenAIProvider] Model returned tool calls but no text (finish_reason=stop). Requesting continuation for model '${model}'.`, {
|
|
2102
|
+
model,
|
|
2103
|
+
toolCallCount: accumulatedToolCalls.length,
|
|
2104
|
+
baseURL: baseURL ?? this.getBaseURL(),
|
|
2105
|
+
});
|
|
2106
|
+
// Request continuation after tool calls (delegated to shared method)
|
|
2107
|
+
const toolCallsForContinuation = accumulatedToolCalls.map((tc) => ({
|
|
2108
|
+
id: tc.id,
|
|
2109
|
+
type: tc.type,
|
|
2110
|
+
function: tc.function,
|
|
2111
|
+
}));
|
|
2112
|
+
yield* this.requestContinuationAfterToolCalls(toolCallsForContinuation, messagesWithSystem, requestBody, client, abortSignal, model, logger, customHeaders);
|
|
2113
|
+
}
|
|
1994
2114
|
// Detect and warn about empty streaming responses (common with Kimi K2 after tool calls)
|
|
1995
2115
|
// Only warn if we truly got nothing - not even reasoning content
|
|
1996
2116
|
if (_accumulatedText.length === 0 &&
|
|
@@ -2051,8 +2171,10 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
2051
2171
|
}
|
|
2052
2172
|
const blocks = [];
|
|
2053
2173
|
// Parse reasoning_content from response (Phase 16 integration)
|
|
2054
|
-
|
|
2055
|
-
|
|
2174
|
+
// Extract embedded Kimi K2 tool calls from reasoning_content (fixes #749)
|
|
2175
|
+
// @requirement REQ-KIMI-REASONING-001.2
|
|
2176
|
+
const { thinking: reasoningBlock, toolCalls: reasoningToolCalls } = this.parseNonStreamingReasoning(choice.message);
|
|
2177
|
+
logger.debug(() => `[Non-streaming] parseNonStreamingReasoning result: ${reasoningBlock ? `found (${reasoningBlock.thought?.length} chars)` : 'not found'}, tool calls: ${reasoningToolCalls.length}`, {
|
|
2056
2178
|
hasReasoningContent: 'reasoning_content' in
|
|
2057
2179
|
(choice.message ?? {}),
|
|
2058
2180
|
messageKeys: Object.keys(choice.message ?? {}),
|
|
@@ -2060,6 +2182,11 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
2060
2182
|
if (reasoningBlock) {
|
|
2061
2183
|
blocks.push(reasoningBlock);
|
|
2062
2184
|
}
|
|
2185
|
+
// Add tool calls extracted from reasoning_content
|
|
2186
|
+
if (reasoningToolCalls.length > 0) {
|
|
2187
|
+
blocks.push(...reasoningToolCalls);
|
|
2188
|
+
logger.debug(() => `[Non-streaming] Added ${reasoningToolCalls.length} tool calls from reasoning_content`);
|
|
2189
|
+
}
|
|
2063
2190
|
// Handle text content (strip thinking / reasoning blocks) and Kimi tool sections
|
|
2064
2191
|
const rawMessageContent = this.coerceMessageContentToString(choice.message?.content);
|
|
2065
2192
|
let kimiCleanContent;
|
|
@@ -2163,6 +2290,7 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
2163
2290
|
};
|
|
2164
2291
|
// Add usage metadata from non-streaming response
|
|
2165
2292
|
if (completion.usage) {
|
|
2293
|
+
const cacheMetrics = extractCacheMetrics(completion.usage);
|
|
2166
2294
|
responseContent.metadata = {
|
|
2167
2295
|
usage: {
|
|
2168
2296
|
promptTokens: completion.usage.prompt_tokens || 0,
|
|
@@ -2170,6 +2298,9 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
2170
2298
|
totalTokens: completion.usage.total_tokens ||
|
|
2171
2299
|
(completion.usage.prompt_tokens || 0) +
|
|
2172
2300
|
(completion.usage.completion_tokens || 0),
|
|
2301
|
+
cachedTokens: cacheMetrics.cachedTokens,
|
|
2302
|
+
cacheCreationTokens: cacheMetrics.cacheCreationTokens,
|
|
2303
|
+
cacheMissTokens: cacheMetrics.cacheMissTokens,
|
|
2173
2304
|
},
|
|
2174
2305
|
};
|
|
2175
2306
|
}
|
|
@@ -2177,6 +2308,7 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
2177
2308
|
}
|
|
2178
2309
|
else if (completion.usage) {
|
|
2179
2310
|
// Emit metadata-only response if no content blocks but have usage info
|
|
2311
|
+
const cacheMetrics = extractCacheMetrics(completion.usage);
|
|
2180
2312
|
yield {
|
|
2181
2313
|
speaker: 'ai',
|
|
2182
2314
|
blocks: [],
|
|
@@ -2187,6 +2319,9 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
2187
2319
|
totalTokens: completion.usage.total_tokens ||
|
|
2188
2320
|
(completion.usage.prompt_tokens || 0) +
|
|
2189
2321
|
(completion.usage.completion_tokens || 0),
|
|
2322
|
+
cachedTokens: cacheMetrics.cachedTokens,
|
|
2323
|
+
cacheCreationTokens: cacheMetrics.cacheCreationTokens,
|
|
2324
|
+
cacheMissTokens: cacheMetrics.cacheMissTokens,
|
|
2190
2325
|
},
|
|
2191
2326
|
},
|
|
2192
2327
|
};
|
|
@@ -2459,7 +2594,7 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
2459
2594
|
});
|
|
2460
2595
|
// Dump successful streaming request if enabled
|
|
2461
2596
|
if (shouldDumpSuccess) {
|
|
2462
|
-
await dumpSDKContext('openai', '/
|
|
2597
|
+
await dumpSDKContext('openai', '/chat/completions', requestBody, { streaming: true }, false, baseURL || 'https://api.openai.com/v1');
|
|
2463
2598
|
}
|
|
2464
2599
|
break;
|
|
2465
2600
|
}
|
|
@@ -2492,7 +2627,7 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
2492
2627
|
// Dump error if enabled
|
|
2493
2628
|
if (shouldDumpError) {
|
|
2494
2629
|
const dumpErrorMessage = error instanceof Error ? error.message : String(error);
|
|
2495
|
-
await dumpSDKContext('openai', '/
|
|
2630
|
+
await dumpSDKContext('openai', '/chat/completions', requestBody, { error: dumpErrorMessage }, true, baseURL || 'https://api.openai.com/v1');
|
|
2496
2631
|
}
|
|
2497
2632
|
// Re-throw other errors as-is
|
|
2498
2633
|
const capturedErrorMessage = error instanceof Error ? error.message : String(error);
|
|
@@ -2530,7 +2665,7 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
2530
2665
|
}));
|
|
2531
2666
|
// Dump successful non-streaming request if enabled
|
|
2532
2667
|
if (shouldDumpSuccess) {
|
|
2533
|
-
await dumpSDKContext('openai', '/
|
|
2668
|
+
await dumpSDKContext('openai', '/chat/completions', requestBody, response, false, baseURL || 'https://api.openai.com/v1');
|
|
2534
2669
|
}
|
|
2535
2670
|
break;
|
|
2536
2671
|
}
|
|
@@ -2569,7 +2704,7 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
2569
2704
|
// Dump error if enabled
|
|
2570
2705
|
if (shouldDumpError) {
|
|
2571
2706
|
const dumpErrorMessage = error instanceof Error ? error.message : String(error);
|
|
2572
|
-
await dumpSDKContext('openai', '/
|
|
2707
|
+
await dumpSDKContext('openai', '/chat/completions', requestBody, { error: dumpErrorMessage }, true, baseURL || 'https://api.openai.com/v1');
|
|
2573
2708
|
}
|
|
2574
2709
|
const capturedErrorMessage = error instanceof Error ? error.message : String(error);
|
|
2575
2710
|
const status = typeof error === 'object' &&
|
|
@@ -2599,9 +2734,9 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
2599
2734
|
// Buffer for accumulating text chunks for providers that need it
|
|
2600
2735
|
let textBuffer = '';
|
|
2601
2736
|
// Use the same detected format from earlier for consistency
|
|
2602
|
-
const
|
|
2737
|
+
const isKimiK2Model = model.toLowerCase().includes('kimi-k2');
|
|
2603
2738
|
// Buffer text for Qwen format providers and Kimi-K2 to avoid stanza formatting
|
|
2604
|
-
const shouldBufferText = detectedFormat === 'qwen' ||
|
|
2739
|
+
const shouldBufferText = detectedFormat === 'qwen' || isKimiK2Model;
|
|
2605
2740
|
// Accumulate thinking content across the entire stream to emit as ONE block
|
|
2606
2741
|
// This handles fragmented <think>word</think> streaming from Synthetic API
|
|
2607
2742
|
// @plan PLAN-20251202-THINKING.P16
|
|
@@ -2613,6 +2748,10 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
2613
2748
|
let accumulatedReasoningContent = '';
|
|
2614
2749
|
// Track token usage from streaming chunks
|
|
2615
2750
|
let streamingUsage = null;
|
|
2751
|
+
// Track finish_reason for detecting empty responses (issue #584)
|
|
2752
|
+
let lastFinishReason = null;
|
|
2753
|
+
// Store pipeline result to avoid duplicate process() calls (CodeRabbit review #764)
|
|
2754
|
+
let cachedPipelineResult = null;
|
|
2616
2755
|
const allChunks = []; // Collect all chunks first
|
|
2617
2756
|
try {
|
|
2618
2757
|
// Handle streaming response - collect all chunks
|
|
@@ -2671,15 +2810,31 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
2671
2810
|
continue;
|
|
2672
2811
|
// Parse reasoning_content from streaming delta (Pipeline path)
|
|
2673
2812
|
// ACCUMULATE instead of yielding immediately to handle token-by-token streaming
|
|
2813
|
+
// Extract embedded Kimi K2 tool calls from reasoning_content (fixes #749)
|
|
2674
2814
|
// @plan PLAN-20251202-THINKING.P16
|
|
2675
|
-
// @requirement REQ-THINK-003.1
|
|
2676
|
-
const reasoningBlock = this.parseStreamingReasoningDelta(choice.delta);
|
|
2815
|
+
// @requirement REQ-THINK-003.1, REQ-KIMI-REASONING-001.1
|
|
2816
|
+
const { thinking: reasoningBlock, toolCalls: reasoningToolCalls } = this.parseStreamingReasoningDelta(choice.delta);
|
|
2677
2817
|
if (reasoningBlock) {
|
|
2678
2818
|
// Accumulate reasoning content - will emit ONE block later
|
|
2679
2819
|
accumulatedReasoningContent += reasoningBlock.thought;
|
|
2680
2820
|
}
|
|
2821
|
+
// Add tool calls extracted from reasoning_content to pipeline
|
|
2822
|
+
if (reasoningToolCalls.length > 0) {
|
|
2823
|
+
// Get current pipeline stats to determine next index
|
|
2824
|
+
const stats = this.toolCallPipeline.getStats();
|
|
2825
|
+
let baseIndex = stats.collector.totalCalls;
|
|
2826
|
+
for (const toolCall of reasoningToolCalls) {
|
|
2827
|
+
// Add complete tool call as fragments to pipeline
|
|
2828
|
+
this.toolCallPipeline.addFragment(baseIndex, {
|
|
2829
|
+
name: toolCall.name,
|
|
2830
|
+
args: JSON.stringify(toolCall.parameters),
|
|
2831
|
+
});
|
|
2832
|
+
baseIndex++;
|
|
2833
|
+
}
|
|
2834
|
+
}
|
|
2681
2835
|
// Check for finish_reason to detect proper stream ending
|
|
2682
2836
|
if (choice.finish_reason) {
|
|
2837
|
+
lastFinishReason = choice.finish_reason;
|
|
2683
2838
|
logger.debug(() => `[Streaming] Stream finished with reason: ${choice.finish_reason}`, {
|
|
2684
2839
|
model,
|
|
2685
2840
|
finishReason: choice.finish_reason,
|
|
@@ -2698,13 +2853,24 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
2698
2853
|
}
|
|
2699
2854
|
// Handle text content - buffer for Qwen format, emit immediately for others
|
|
2700
2855
|
// Note: Synthetic API sends content that may duplicate reasoning_content.
|
|
2701
|
-
// This is the model's behavior - we don't filter it here.
|
|
2856
|
+
// This is the model's behavior - we don't filter it here as detection is unreliable.
|
|
2702
2857
|
// @plan PLAN-20251202-THINKING.P16
|
|
2703
2858
|
const rawDeltaContent = this.coerceMessageContentToString(choice.delta?.content);
|
|
2704
2859
|
if (rawDeltaContent) {
|
|
2705
|
-
|
|
2706
|
-
|
|
2707
|
-
|
|
2860
|
+
// For Kimi models, we need to buffer the RAW content without processing
|
|
2861
|
+
// because Kimi tokens stream incrementally and partial tokens would leak
|
|
2862
|
+
// through if we try to process them immediately. The buffer will be
|
|
2863
|
+
// processed when flushed (at sentence boundaries or end of stream).
|
|
2864
|
+
let deltaContent;
|
|
2865
|
+
if (isKimiK2Model) {
|
|
2866
|
+
// For Kimi: Don't process yet - just pass through and let buffering handle it
|
|
2867
|
+
// We'll extract tool calls and sanitize when we flush the buffer
|
|
2868
|
+
deltaContent = rawDeltaContent;
|
|
2869
|
+
}
|
|
2870
|
+
else {
|
|
2871
|
+
// For non-Kimi models: sanitize immediately as before
|
|
2872
|
+
deltaContent = this.sanitizeProviderText(rawDeltaContent);
|
|
2873
|
+
}
|
|
2708
2874
|
if (!deltaContent) {
|
|
2709
2875
|
continue;
|
|
2710
2876
|
}
|
|
@@ -2720,9 +2886,9 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
2720
2886
|
});
|
|
2721
2887
|
// Buffer text to avoid stanza formatting
|
|
2722
2888
|
textBuffer += deltaContent;
|
|
2723
|
-
const
|
|
2724
|
-
const
|
|
2725
|
-
const hasOpenKimiSection =
|
|
2889
|
+
const kimiBeginCount = (textBuffer.match(/<\|tool_calls_section_begin\|>/g) || []).length;
|
|
2890
|
+
const kimiEndCount = (textBuffer.match(/<\|tool_calls_section_end\|>/g) || []).length;
|
|
2891
|
+
const hasOpenKimiSection = kimiBeginCount > kimiEndCount;
|
|
2726
2892
|
// Emit buffered text when we have a complete sentence or paragraph
|
|
2727
2893
|
// Look for natural break points, avoiding flush mid Kimi section
|
|
2728
2894
|
if (!hasOpenKimiSection &&
|
|
@@ -2739,12 +2905,14 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
2739
2905
|
// @requirement REQ-THINK-003
|
|
2740
2906
|
const tagBasedThinking = this.extractThinkTagsAsBlock(workingText);
|
|
2741
2907
|
if (tagBasedThinking) {
|
|
2908
|
+
// Clean Kimi tokens from thinking content before accumulating
|
|
2909
|
+
const cleanedThought = this.cleanThinkingContent(tagBasedThinking.thought);
|
|
2742
2910
|
// Accumulate thinking content - don't emit yet
|
|
2743
2911
|
// Use newline to preserve formatting between chunks (not space)
|
|
2744
2912
|
if (accumulatedThinkingContent.length > 0) {
|
|
2745
2913
|
accumulatedThinkingContent += '\n';
|
|
2746
2914
|
}
|
|
2747
|
-
accumulatedThinkingContent +=
|
|
2915
|
+
accumulatedThinkingContent += cleanedThought;
|
|
2748
2916
|
logger.debug(() => `[Streaming] Accumulated thinking: ${accumulatedThinkingContent.length} chars total`);
|
|
2749
2917
|
}
|
|
2750
2918
|
const kimiParsed = this.extractKimiToolCallsFromText(workingText);
|
|
@@ -2806,7 +2974,10 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
2806
2974
|
// Always use sanitized text to strip <think> tags (pipeline streaming)
|
|
2807
2975
|
// Bug fix: Previously Kimi used unsanitized workingText
|
|
2808
2976
|
// @plan PLAN-20251202-THINKING.P16
|
|
2809
|
-
|
|
2977
|
+
// Bug fix #721: Emit whitespace-only chunks (e.g., " " between words)
|
|
2978
|
+
// Previously we used cleanedText.trim().length > 0 which dropped spaces,
|
|
2979
|
+
// causing "list 5" to become "list5". Now we emit any non-empty cleanedText.
|
|
2980
|
+
if (cleanedText.length > 0) {
|
|
2810
2981
|
yield {
|
|
2811
2982
|
speaker: 'ai',
|
|
2812
2983
|
blocks: [
|
|
@@ -2906,11 +3077,13 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
2906
3077
|
// @plan PLAN-20251202-THINKING.P16
|
|
2907
3078
|
const tagBasedThinking = this.extractThinkTagsAsBlock(workingText);
|
|
2908
3079
|
if (tagBasedThinking) {
|
|
3080
|
+
// Clean Kimi tokens from thinking content before accumulating
|
|
3081
|
+
const cleanedThought = this.cleanThinkingContent(tagBasedThinking.thought);
|
|
2909
3082
|
// Use newline to preserve formatting between chunks (not space)
|
|
2910
3083
|
if (accumulatedThinkingContent.length > 0) {
|
|
2911
3084
|
accumulatedThinkingContent += '\n';
|
|
2912
3085
|
}
|
|
2913
|
-
accumulatedThinkingContent +=
|
|
3086
|
+
accumulatedThinkingContent += cleanedThought;
|
|
2914
3087
|
}
|
|
2915
3088
|
const kimiParsed = this.extractKimiToolCallsFromText(workingText);
|
|
2916
3089
|
if (kimiParsed.toolCalls.length > 0) {
|
|
@@ -2969,7 +3142,10 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
2969
3142
|
// Always use sanitized text to strip <think> tags (pipeline final buffer)
|
|
2970
3143
|
// Bug fix: Previously Kimi used unsanitized workingText
|
|
2971
3144
|
// @plan PLAN-20251202-THINKING.P16
|
|
2972
|
-
|
|
3145
|
+
// Bug fix #721: Emit whitespace-only chunks (e.g., " " between words)
|
|
3146
|
+
// Previously we used cleanedText.trim().length > 0 which dropped spaces,
|
|
3147
|
+
// causing "list 5" to become "list5". Now we emit any non-empty cleanedText.
|
|
3148
|
+
if (cleanedText.length > 0) {
|
|
2973
3149
|
yield {
|
|
2974
3150
|
speaker: 'ai',
|
|
2975
3151
|
blocks: [
|
|
@@ -3001,27 +3177,40 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
3001
3177
|
}
|
|
3002
3178
|
// Emit accumulated reasoning_content as ONE ThinkingBlock (pipeline path)
|
|
3003
3179
|
// This consolidates token-by-token reasoning from Synthetic API into a single block
|
|
3180
|
+
// Clean Kimi tokens from the accumulated content (not per-chunk) to handle split tokens
|
|
3004
3181
|
// @plan PLAN-20251202-THINKING.P16
|
|
3005
3182
|
if (accumulatedReasoningContent.length > 0) {
|
|
3006
|
-
|
|
3007
|
-
|
|
3008
|
-
|
|
3009
|
-
|
|
3010
|
-
|
|
3011
|
-
|
|
3012
|
-
|
|
3013
|
-
|
|
3014
|
-
|
|
3015
|
-
|
|
3016
|
-
|
|
3183
|
+
// Extract Kimi tool calls from the complete accumulated reasoning content
|
|
3184
|
+
const { cleanedText: cleanedReasoning, toolCalls: reasoningToolCalls } = this.extractKimiToolCallsFromText(accumulatedReasoningContent);
|
|
3185
|
+
// Emit the cleaned thinking block
|
|
3186
|
+
if (cleanedReasoning.length > 0) {
|
|
3187
|
+
yield {
|
|
3188
|
+
speaker: 'ai',
|
|
3189
|
+
blocks: [
|
|
3190
|
+
{
|
|
3191
|
+
type: 'thinking',
|
|
3192
|
+
thought: cleanedReasoning,
|
|
3193
|
+
sourceField: 'reasoning_content',
|
|
3194
|
+
isHidden: false,
|
|
3195
|
+
},
|
|
3196
|
+
],
|
|
3197
|
+
};
|
|
3198
|
+
}
|
|
3199
|
+
// Emit any tool calls extracted from reasoning content
|
|
3200
|
+
if (reasoningToolCalls.length > 0) {
|
|
3201
|
+
yield {
|
|
3202
|
+
speaker: 'ai',
|
|
3203
|
+
blocks: reasoningToolCalls,
|
|
3204
|
+
};
|
|
3205
|
+
}
|
|
3017
3206
|
}
|
|
3018
3207
|
// Process and emit tool calls using the pipeline
|
|
3019
|
-
|
|
3020
|
-
if (
|
|
3021
|
-
|
|
3208
|
+
cachedPipelineResult = await this.toolCallPipeline.process(abortSignal);
|
|
3209
|
+
if (cachedPipelineResult.normalized.length > 0 ||
|
|
3210
|
+
cachedPipelineResult.failed.length > 0) {
|
|
3022
3211
|
const blocks = [];
|
|
3023
3212
|
// Process successful tool calls
|
|
3024
|
-
for (const normalizedCall of
|
|
3213
|
+
for (const normalizedCall of cachedPipelineResult.normalized) {
|
|
3025
3214
|
const sanitizedArgs = this.sanitizeToolArgumentsString(normalizedCall.originalArgs ?? normalizedCall.args);
|
|
3026
3215
|
// Process tool parameters with double-escape handling
|
|
3027
3216
|
const processedParameters = processToolParameters(sanitizedArgs, normalizedCall.name);
|
|
@@ -3033,7 +3222,7 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
3033
3222
|
});
|
|
3034
3223
|
}
|
|
3035
3224
|
// Handle failed tool calls (could emit as errors or warnings)
|
|
3036
|
-
for (const failed of
|
|
3225
|
+
for (const failed of cachedPipelineResult.failed) {
|
|
3037
3226
|
this.getLogger().warn(`Tool call validation failed for index ${failed.index}: ${failed.validationErrors.join(', ')}`);
|
|
3038
3227
|
}
|
|
3039
3228
|
if (blocks.length > 0) {
|
|
@@ -3043,6 +3232,7 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
3043
3232
|
};
|
|
3044
3233
|
// Add usage metadata if we captured it from streaming
|
|
3045
3234
|
if (streamingUsage) {
|
|
3235
|
+
const cacheMetrics = extractCacheMetrics(streamingUsage);
|
|
3046
3236
|
toolCallsContent.metadata = {
|
|
3047
3237
|
usage: {
|
|
3048
3238
|
promptTokens: streamingUsage.prompt_tokens || 0,
|
|
@@ -3050,6 +3240,9 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
3050
3240
|
totalTokens: streamingUsage.total_tokens ||
|
|
3051
3241
|
(streamingUsage.prompt_tokens || 0) +
|
|
3052
3242
|
(streamingUsage.completion_tokens || 0),
|
|
3243
|
+
cachedTokens: cacheMetrics.cachedTokens,
|
|
3244
|
+
cacheCreationTokens: cacheMetrics.cacheCreationTokens,
|
|
3245
|
+
cacheMissTokens: cacheMetrics.cacheMissTokens,
|
|
3053
3246
|
},
|
|
3054
3247
|
};
|
|
3055
3248
|
}
|
|
@@ -3059,6 +3252,7 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
3059
3252
|
// If we have usage information but no tool calls, emit a metadata-only response
|
|
3060
3253
|
if (streamingUsage &&
|
|
3061
3254
|
this.toolCallPipeline.getStats().collector.totalCalls === 0) {
|
|
3255
|
+
const cacheMetrics = extractCacheMetrics(streamingUsage);
|
|
3062
3256
|
yield {
|
|
3063
3257
|
speaker: 'ai',
|
|
3064
3258
|
blocks: [],
|
|
@@ -3069,15 +3263,51 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
3069
3263
|
totalTokens: streamingUsage.total_tokens ||
|
|
3070
3264
|
(streamingUsage.prompt_tokens || 0) +
|
|
3071
3265
|
(streamingUsage.completion_tokens || 0),
|
|
3266
|
+
cachedTokens: cacheMetrics.cachedTokens,
|
|
3267
|
+
cacheCreationTokens: cacheMetrics.cacheCreationTokens,
|
|
3268
|
+
cacheMissTokens: cacheMetrics.cacheMissTokens,
|
|
3072
3269
|
},
|
|
3073
3270
|
},
|
|
3074
3271
|
};
|
|
3075
3272
|
}
|
|
3273
|
+
// Detect and handle empty streaming responses after tool calls (issue #584)
|
|
3274
|
+
// Some models (like gpt-oss-120b on OpenRouter) return finish_reason=stop with tools but no text
|
|
3275
|
+
// Use cachedPipelineResult instead of pipelineStats.collector.totalCalls since process() resets the collector (CodeRabbit review #764)
|
|
3276
|
+
const toolCallCount = (cachedPipelineResult?.normalized.length ?? 0) +
|
|
3277
|
+
(cachedPipelineResult?.failed.length ?? 0);
|
|
3278
|
+
const hasToolsButNoText = lastFinishReason === 'stop' &&
|
|
3279
|
+
toolCallCount > 0 &&
|
|
3280
|
+
_accumulatedText.length === 0 &&
|
|
3281
|
+
textBuffer.length === 0 &&
|
|
3282
|
+
accumulatedReasoningContent.length === 0 &&
|
|
3283
|
+
accumulatedThinkingContent.length === 0;
|
|
3284
|
+
if (hasToolsButNoText) {
|
|
3285
|
+
logger.log(() => `[OpenAIProvider] Model returned tool calls but no text (finish_reason=stop). Requesting continuation for model '${model}'.`, {
|
|
3286
|
+
model,
|
|
3287
|
+
toolCallCount,
|
|
3288
|
+
baseURL: baseURL ?? this.getBaseURL(),
|
|
3289
|
+
});
|
|
3290
|
+
// Note: In pipeline mode, tool calls have already been processed.
|
|
3291
|
+
// We need to get the normalized tool calls from the cached pipeline result to build continuation messages.
|
|
3292
|
+
// Use cached result to avoid duplicate process() call that would return empty results (CodeRabbit review #764)
|
|
3293
|
+
if (!cachedPipelineResult) {
|
|
3294
|
+
throw new Error('Pipeline result not cached - this should not happen in pipeline mode');
|
|
3295
|
+
}
|
|
3296
|
+
const toolCallsForHistory = cachedPipelineResult.normalized.map((normalizedCall, index) => ({
|
|
3297
|
+
id: `call_${index}`,
|
|
3298
|
+
type: 'function',
|
|
3299
|
+
function: {
|
|
3300
|
+
name: normalizedCall.name,
|
|
3301
|
+
arguments: JSON.stringify(normalizedCall.args),
|
|
3302
|
+
},
|
|
3303
|
+
}));
|
|
3304
|
+
// Request continuation after tool calls (delegated to shared method)
|
|
3305
|
+
yield* this.requestContinuationAfterToolCalls(toolCallsForHistory, messagesWithSystem, requestBody, client, abortSignal, model, logger, customHeaders);
|
|
3306
|
+
}
|
|
3076
3307
|
// Detect and warn about empty streaming responses (common with Kimi K2 after tool calls)
|
|
3077
3308
|
// Only warn if we truly got nothing - not even reasoning content
|
|
3078
|
-
const pipelineStats = this.toolCallPipeline.getStats();
|
|
3079
3309
|
if (_accumulatedText.length === 0 &&
|
|
3080
|
-
|
|
3310
|
+
toolCallCount === 0 &&
|
|
3081
3311
|
textBuffer.length === 0 &&
|
|
3082
3312
|
accumulatedReasoningContent.length === 0 &&
|
|
3083
3313
|
accumulatedThinkingContent.length === 0) {
|
|
@@ -3101,7 +3331,7 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
3101
3331
|
// Log what we DID get for debugging
|
|
3102
3332
|
logger.debug(() => `[Streaming pipeline] Stream completed with accumulated content`, {
|
|
3103
3333
|
textLength: _accumulatedText.length,
|
|
3104
|
-
toolCallCount
|
|
3334
|
+
toolCallCount,
|
|
3105
3335
|
textBufferLength: textBuffer.length,
|
|
3106
3336
|
reasoningLength: accumulatedReasoningContent.length,
|
|
3107
3337
|
thinkingLength: accumulatedThinkingContent.length,
|
|
@@ -3221,6 +3451,7 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
3221
3451
|
};
|
|
3222
3452
|
// Add usage metadata from non-streaming response
|
|
3223
3453
|
if (completion.usage) {
|
|
3454
|
+
const cacheMetrics = extractCacheMetrics(completion.usage);
|
|
3224
3455
|
responseContent.metadata = {
|
|
3225
3456
|
usage: {
|
|
3226
3457
|
promptTokens: completion.usage.prompt_tokens || 0,
|
|
@@ -3228,6 +3459,9 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
3228
3459
|
totalTokens: completion.usage.total_tokens ||
|
|
3229
3460
|
(completion.usage.prompt_tokens || 0) +
|
|
3230
3461
|
(completion.usage.completion_tokens || 0),
|
|
3462
|
+
cachedTokens: cacheMetrics.cachedTokens,
|
|
3463
|
+
cacheCreationTokens: cacheMetrics.cacheCreationTokens,
|
|
3464
|
+
cacheMissTokens: cacheMetrics.cacheMissTokens,
|
|
3231
3465
|
},
|
|
3232
3466
|
};
|
|
3233
3467
|
}
|
|
@@ -3235,6 +3469,7 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
3235
3469
|
}
|
|
3236
3470
|
else if (completion.usage) {
|
|
3237
3471
|
// Emit metadata-only response if no content blocks but have usage info
|
|
3472
|
+
const cacheMetrics = extractCacheMetrics(completion.usage);
|
|
3238
3473
|
yield {
|
|
3239
3474
|
speaker: 'ai',
|
|
3240
3475
|
blocks: [],
|
|
@@ -3245,6 +3480,9 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
3245
3480
|
totalTokens: completion.usage.total_tokens ||
|
|
3246
3481
|
(completion.usage.prompt_tokens || 0) +
|
|
3247
3482
|
(completion.usage.completion_tokens || 0),
|
|
3483
|
+
cachedTokens: cacheMetrics.cachedTokens,
|
|
3484
|
+
cacheCreationTokens: cacheMetrics.cacheCreationTokens,
|
|
3485
|
+
cacheMissTokens: cacheMetrics.cacheMissTokens,
|
|
3248
3486
|
},
|
|
3249
3487
|
},
|
|
3250
3488
|
};
|
|
@@ -3279,6 +3517,12 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
3279
3517
|
logger.debug(() => `Auto-detected 'kimi' format for K2 model: ${modelName}`);
|
|
3280
3518
|
return 'kimi';
|
|
3281
3519
|
}
|
|
3520
|
+
// Check for Mistral models (requires 9-char alphanumeric IDs)
|
|
3521
|
+
// This applies to both hosted API and self-hosted Mistral models
|
|
3522
|
+
if (isMistralModel(modelName)) {
|
|
3523
|
+
logger.debug(() => `Auto-detected 'mistral' format for Mistral model: ${modelName}`);
|
|
3524
|
+
return 'mistral';
|
|
3525
|
+
}
|
|
3282
3526
|
const lowerModelName = modelName.toLowerCase();
|
|
3283
3527
|
// Check for GLM-4 models (glm-4, glm-4.5, glm-4.6, glm-4-5, etc.)
|
|
3284
3528
|
if (lowerModelName.includes('glm-4')) {
|
|
@@ -3361,57 +3605,153 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
3361
3605
|
* Parse reasoning_content from streaming delta.
|
|
3362
3606
|
*
|
|
3363
3607
|
* @plan PLAN-20251202-THINKING.P11, PLAN-20251202-THINKING.P16
|
|
3364
|
-
* @requirement REQ-THINK-003.1, REQ-THINK-003.3, REQ-THINK-003.4
|
|
3608
|
+
* @requirement REQ-THINK-003.1, REQ-THINK-003.3, REQ-THINK-003.4, REQ-KIMI-REASONING-001.1
|
|
3609
|
+
* @issue #749
|
|
3365
3610
|
*/
|
|
3366
3611
|
parseStreamingReasoningDelta(delta) {
|
|
3367
3612
|
if (!delta) {
|
|
3368
|
-
return null;
|
|
3613
|
+
return { thinking: null, toolCalls: [] };
|
|
3369
3614
|
}
|
|
3370
3615
|
// Access reasoning_content via type assertion since OpenAI SDK doesn't declare it
|
|
3371
3616
|
const reasoningContent = delta
|
|
3372
3617
|
.reasoning_content;
|
|
3373
3618
|
// Handle absent, null, or non-string
|
|
3374
3619
|
if (!reasoningContent || typeof reasoningContent !== 'string') {
|
|
3375
|
-
return null;
|
|
3376
|
-
}
|
|
3377
|
-
// Handle empty string
|
|
3378
|
-
|
|
3379
|
-
|
|
3380
|
-
|
|
3381
|
-
|
|
3382
|
-
|
|
3383
|
-
|
|
3384
|
-
|
|
3385
|
-
|
|
3386
|
-
|
|
3620
|
+
return { thinking: null, toolCalls: [] };
|
|
3621
|
+
}
|
|
3622
|
+
// Handle empty string only - preserve whitespace-only content (spaces, tabs)
|
|
3623
|
+
// to maintain proper formatting in accumulated reasoning (fixes issue #721)
|
|
3624
|
+
if (reasoningContent.length === 0) {
|
|
3625
|
+
return { thinking: null, toolCalls: [] };
|
|
3626
|
+
}
|
|
3627
|
+
// Extract Kimi K2 tool calls embedded in reasoning_content (fixes issue #749)
|
|
3628
|
+
const { cleanedText, toolCalls } = this.extractKimiToolCallsFromText(reasoningContent);
|
|
3629
|
+
// For streaming, preserve whitespace-only content for proper formatting (issue #721)
|
|
3630
|
+
// Only return null if the cleaned text is empty (length 0)
|
|
3631
|
+
const thinkingBlock = cleanedText.length === 0
|
|
3632
|
+
? null
|
|
3633
|
+
: {
|
|
3634
|
+
type: 'thinking',
|
|
3635
|
+
thought: cleanedText,
|
|
3636
|
+
sourceField: 'reasoning_content',
|
|
3637
|
+
isHidden: false,
|
|
3638
|
+
};
|
|
3639
|
+
return { thinking: thinkingBlock, toolCalls };
|
|
3387
3640
|
}
|
|
3388
3641
|
/**
|
|
3389
3642
|
* Parse reasoning_content from non-streaming message.
|
|
3390
3643
|
*
|
|
3391
3644
|
* @plan PLAN-20251202-THINKING.P11, PLAN-20251202-THINKING.P16
|
|
3392
|
-
* @requirement REQ-THINK-003.2, REQ-THINK-003.3, REQ-THINK-003.4
|
|
3645
|
+
* @requirement REQ-THINK-003.2, REQ-THINK-003.3, REQ-THINK-003.4, REQ-KIMI-REASONING-001.2
|
|
3646
|
+
* @issue #749
|
|
3393
3647
|
*/
|
|
3394
3648
|
parseNonStreamingReasoning(message) {
|
|
3395
3649
|
if (!message) {
|
|
3396
|
-
return null;
|
|
3650
|
+
return { thinking: null, toolCalls: [] };
|
|
3397
3651
|
}
|
|
3398
3652
|
// Access reasoning_content via type assertion since OpenAI SDK doesn't declare it
|
|
3399
3653
|
const reasoningContent = message
|
|
3400
3654
|
.reasoning_content;
|
|
3401
3655
|
// Handle absent, null, or non-string
|
|
3402
3656
|
if (!reasoningContent || typeof reasoningContent !== 'string') {
|
|
3403
|
-
return null;
|
|
3657
|
+
return { thinking: null, toolCalls: [] };
|
|
3404
3658
|
}
|
|
3405
|
-
// Handle empty string or whitespace-only
|
|
3659
|
+
// Handle empty string or whitespace-only - for non-streaming complete responses,
|
|
3660
|
+
// whitespace-only reasoning is unusual and should be treated as no reasoning
|
|
3406
3661
|
if (reasoningContent.trim().length === 0) {
|
|
3407
|
-
return null;
|
|
3662
|
+
return { thinking: null, toolCalls: [] };
|
|
3663
|
+
}
|
|
3664
|
+
// Extract Kimi K2 tool calls embedded in reasoning_content (fixes issue #749)
|
|
3665
|
+
const { cleanedText, toolCalls } = this.extractKimiToolCallsFromText(reasoningContent);
|
|
3666
|
+
// For non-streaming, trim whitespace after extraction
|
|
3667
|
+
const trimmedText = cleanedText.trim();
|
|
3668
|
+
const thinkingBlock = trimmedText.length === 0
|
|
3669
|
+
? null
|
|
3670
|
+
: {
|
|
3671
|
+
type: 'thinking',
|
|
3672
|
+
thought: trimmedText,
|
|
3673
|
+
sourceField: 'reasoning_content',
|
|
3674
|
+
isHidden: false,
|
|
3675
|
+
};
|
|
3676
|
+
return { thinking: thinkingBlock, toolCalls };
|
|
3677
|
+
}
|
|
3678
|
+
/**
|
|
3679
|
+
* Request continuation after tool calls when model returned no text.
|
|
3680
|
+
* This is a helper to avoid code duplication between legacy and pipeline paths.
|
|
3681
|
+
*
|
|
3682
|
+
* @plan PLAN-20250120-DEBUGLOGGING.P15
|
|
3683
|
+
* @issue #584, #764 (CodeRabbit review)
|
|
3684
|
+
*/
|
|
3685
|
+
async *requestContinuationAfterToolCalls(toolCalls, messagesWithSystem, requestBody, client, abortSignal, model, logger, customHeaders) {
|
|
3686
|
+
// Build continuation messages
|
|
3687
|
+
const continuationMessages = [
|
|
3688
|
+
...messagesWithSystem,
|
|
3689
|
+
// Add the assistant's tool calls
|
|
3690
|
+
{
|
|
3691
|
+
role: 'assistant',
|
|
3692
|
+
tool_calls: toolCalls,
|
|
3693
|
+
},
|
|
3694
|
+
// Add placeholder tool responses (tools have NOT been executed yet - only acknowledged)
|
|
3695
|
+
...toolCalls.map((tc) => ({
|
|
3696
|
+
role: 'tool',
|
|
3697
|
+
tool_call_id: tc.id,
|
|
3698
|
+
content: '[Tool call acknowledged - awaiting execution]',
|
|
3699
|
+
})),
|
|
3700
|
+
// Add continuation prompt
|
|
3701
|
+
{
|
|
3702
|
+
role: 'user',
|
|
3703
|
+
content: 'The tool calls above have been registered. Please continue with your response.',
|
|
3704
|
+
},
|
|
3705
|
+
];
|
|
3706
|
+
// Make a continuation request (wrap in try-catch since tools were already yielded)
|
|
3707
|
+
try {
|
|
3708
|
+
const continuationResponse = await client.chat.completions.create({
|
|
3709
|
+
...requestBody,
|
|
3710
|
+
messages: continuationMessages,
|
|
3711
|
+
stream: true, // Always stream for consistency
|
|
3712
|
+
}, {
|
|
3713
|
+
...(abortSignal ? { signal: abortSignal } : {}),
|
|
3714
|
+
...(customHeaders ? { headers: customHeaders } : {}),
|
|
3715
|
+
});
|
|
3716
|
+
let accumulatedText = '';
|
|
3717
|
+
// Process the continuation response
|
|
3718
|
+
for await (const chunk of continuationResponse) {
|
|
3719
|
+
if (abortSignal?.aborted) {
|
|
3720
|
+
break;
|
|
3721
|
+
}
|
|
3722
|
+
const choice = chunk.choices?.[0];
|
|
3723
|
+
if (!choice)
|
|
3724
|
+
continue;
|
|
3725
|
+
const deltaContent = this.coerceMessageContentToString(choice.delta?.content);
|
|
3726
|
+
if (deltaContent) {
|
|
3727
|
+
const sanitized = this.sanitizeProviderText(deltaContent);
|
|
3728
|
+
if (sanitized) {
|
|
3729
|
+
accumulatedText += sanitized;
|
|
3730
|
+
yield {
|
|
3731
|
+
speaker: 'ai',
|
|
3732
|
+
blocks: [
|
|
3733
|
+
{
|
|
3734
|
+
type: 'text',
|
|
3735
|
+
text: sanitized,
|
|
3736
|
+
},
|
|
3737
|
+
],
|
|
3738
|
+
};
|
|
3739
|
+
}
|
|
3740
|
+
}
|
|
3741
|
+
}
|
|
3742
|
+
logger.debug(() => `[OpenAIProvider] Continuation request completed, received ${accumulatedText.length} chars`, {
|
|
3743
|
+
model,
|
|
3744
|
+
accumulatedTextLength: accumulatedText.length,
|
|
3745
|
+
});
|
|
3746
|
+
}
|
|
3747
|
+
catch (continuationError) {
|
|
3748
|
+
// Tool calls were already successfully yielded, so log warning and continue
|
|
3749
|
+
logger.warn(() => `[OpenAIProvider] Continuation request failed, but tool calls were already emitted: ${continuationError instanceof Error ? continuationError.message : String(continuationError)}`, {
|
|
3750
|
+
model,
|
|
3751
|
+
error: continuationError,
|
|
3752
|
+
});
|
|
3753
|
+
// Don't re-throw - tool calls were already successful
|
|
3408
3754
|
}
|
|
3409
|
-
return {
|
|
3410
|
-
type: 'thinking',
|
|
3411
|
-
thought: reasoningContent,
|
|
3412
|
-
sourceField: 'reasoning_content',
|
|
3413
|
-
isHidden: false,
|
|
3414
|
-
};
|
|
3415
3755
|
}
|
|
3416
3756
|
}
|
|
3417
3757
|
//# sourceMappingURL=OpenAIProvider.js.map
|