@vybestack/llxprt-code-core 0.6.1 → 0.7.0-nightly.251206.43b97dbf4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/prompt-config/defaults/default-prompts.json +5 -2
- package/dist/src/agents/executor.js +2 -2
- package/dist/src/agents/executor.js.map +1 -1
- package/dist/src/auth/precedence.js +9 -10
- package/dist/src/auth/precedence.js.map +1 -1
- package/dist/src/auth/types.d.ts +6 -6
- package/dist/src/config/config.js +10 -4
- package/dist/src/config/config.js.map +1 -1
- package/dist/src/core/coreToolScheduler.d.ts +7 -0
- package/dist/src/core/coreToolScheduler.js +67 -0
- package/dist/src/core/coreToolScheduler.js.map +1 -1
- package/dist/src/core/geminiChat.d.ts +8 -0
- package/dist/src/core/geminiChat.js +63 -5
- package/dist/src/core/geminiChat.js.map +1 -1
- package/dist/src/core/prompts.js +9 -4
- package/dist/src/core/prompts.js.map +1 -1
- package/dist/src/core/turn.js +12 -8
- package/dist/src/core/turn.js.map +1 -1
- package/dist/src/ide/ide-client.js +4 -2
- package/dist/src/ide/ide-client.js.map +1 -1
- package/dist/src/index.d.ts +6 -2
- package/dist/src/index.js +6 -2
- package/dist/src/index.js.map +1 -1
- package/dist/src/mcp/token-storage/file-token-storage.js +1 -6
- package/dist/src/mcp/token-storage/file-token-storage.js.map +1 -1
- package/dist/src/mcp/token-storage/keychain-token-storage.js +3 -6
- package/dist/src/mcp/token-storage/keychain-token-storage.js.map +1 -1
- package/dist/src/parsers/TextToolCallParser.d.ts +0 -15
- package/dist/src/parsers/TextToolCallParser.js +21 -5
- package/dist/src/parsers/TextToolCallParser.js.map +1 -1
- package/dist/src/prompt-config/defaults/tool-defaults.js +5 -2
- package/dist/src/prompt-config/defaults/tool-defaults.js.map +1 -1
- package/dist/src/prompt-config/defaults/tools/code-search.md +3 -0
- package/dist/src/prompt-config/defaults/tools/direct-web-fetch.md +3 -0
- package/dist/src/prompt-config/defaults/tools/exa-web-search.md +14 -0
- package/dist/src/prompt-config/defaults/tools/{web-fetch.md → google-web-fetch.md} +1 -1
- package/dist/src/prompt-config/defaults/tools/{web-search.md → google-web-search.md} +1 -1
- package/dist/src/providers/BaseProvider.d.ts +3 -0
- package/dist/src/providers/BaseProvider.js +11 -0
- package/dist/src/providers/BaseProvider.js.map +1 -1
- package/dist/src/providers/IProvider.d.ts +3 -0
- package/dist/src/providers/ProviderManager.js +6 -0
- package/dist/src/providers/ProviderManager.js.map +1 -1
- package/dist/src/providers/anthropic/AnthropicProvider.d.ts +0 -1
- package/dist/src/providers/anthropic/AnthropicProvider.js +233 -22
- package/dist/src/providers/anthropic/AnthropicProvider.js.map +1 -1
- package/dist/src/providers/anthropic/schemaConverter.d.ts +63 -0
- package/dist/src/providers/anthropic/schemaConverter.js +189 -0
- package/dist/src/providers/anthropic/schemaConverter.js.map +1 -0
- package/dist/src/providers/gemini/GeminiProvider.js +110 -13
- package/dist/src/providers/gemini/GeminiProvider.js.map +1 -1
- package/dist/src/providers/gemini/thoughtSignatures.d.ts +51 -0
- package/dist/src/providers/gemini/thoughtSignatures.js +189 -0
- package/dist/src/providers/gemini/thoughtSignatures.js.map +1 -0
- package/dist/src/providers/openai/OpenAIProvider.d.ts +78 -1
- package/dist/src/providers/openai/OpenAIProvider.js +1159 -190
- package/dist/src/providers/openai/OpenAIProvider.js.map +1 -1
- package/dist/src/providers/openai/ToolCallNormalizer.d.ts +6 -0
- package/dist/src/providers/openai/ToolCallNormalizer.js +16 -2
- package/dist/src/providers/openai/ToolCallNormalizer.js.map +1 -1
- package/dist/src/providers/openai/schemaConverter.d.ts +67 -0
- package/dist/src/providers/openai/schemaConverter.js +191 -0
- package/dist/src/providers/openai/schemaConverter.js.map +1 -0
- package/dist/src/providers/openai-responses/OpenAIResponsesProvider.d.ts +0 -4
- package/dist/src/providers/openai-responses/OpenAIResponsesProvider.js +3 -75
- package/dist/src/providers/openai-responses/OpenAIResponsesProvider.js.map +1 -1
- package/dist/src/providers/openai-responses/schemaConverter.d.ts +65 -0
- package/dist/src/providers/openai-responses/schemaConverter.js +195 -0
- package/dist/src/providers/openai-responses/schemaConverter.js.map +1 -0
- package/dist/src/providers/openai-vercel/OpenAIVercelProvider.d.ts +146 -0
- package/dist/src/providers/openai-vercel/OpenAIVercelProvider.js +1177 -0
- package/dist/src/providers/openai-vercel/OpenAIVercelProvider.js.map +1 -0
- package/dist/src/providers/openai-vercel/errors.d.ts +46 -0
- package/dist/src/providers/openai-vercel/errors.js +137 -0
- package/dist/src/providers/openai-vercel/errors.js.map +1 -0
- package/dist/src/providers/openai-vercel/index.d.ts +22 -0
- package/dist/src/providers/openai-vercel/index.js +23 -0
- package/dist/src/providers/openai-vercel/index.js.map +1 -0
- package/dist/src/providers/openai-vercel/messageConversion.d.ts +36 -0
- package/dist/src/providers/openai-vercel/messageConversion.js +410 -0
- package/dist/src/providers/openai-vercel/messageConversion.js.map +1 -0
- package/dist/src/providers/openai-vercel/schemaConverter.d.ts +66 -0
- package/dist/src/providers/openai-vercel/schemaConverter.js +191 -0
- package/dist/src/providers/openai-vercel/schemaConverter.js.map +1 -0
- package/dist/src/providers/openai-vercel/toolIdUtils.d.ts +33 -0
- package/dist/src/providers/openai-vercel/toolIdUtils.js +117 -0
- package/dist/src/providers/openai-vercel/toolIdUtils.js.map +1 -0
- package/dist/src/providers/reasoning/reasoningUtils.d.ts +43 -0
- package/dist/src/providers/reasoning/reasoningUtils.js +92 -0
- package/dist/src/providers/reasoning/reasoningUtils.js.map +1 -0
- package/dist/src/providers/utils/localEndpoint.js +6 -2
- package/dist/src/providers/utils/localEndpoint.js.map +1 -1
- package/dist/src/runtime/AgentRuntimeContext.d.ts +27 -0
- package/dist/src/runtime/AgentRuntimeContext.js.map +1 -1
- package/dist/src/runtime/createAgentRuntimeContext.js +27 -1
- package/dist/src/runtime/createAgentRuntimeContext.js.map +1 -1
- package/dist/src/services/history/IContent.d.ts +6 -0
- package/dist/src/services/history/IContent.js.map +1 -1
- package/dist/src/settings/types.d.ts +1 -1
- package/dist/src/tools/IToolFormatter.d.ts +1 -1
- package/dist/src/tools/ToolFormatter.js +14 -2
- package/dist/src/tools/ToolFormatter.js.map +1 -1
- package/dist/src/tools/ToolIdStrategy.d.ts +72 -0
- package/dist/src/tools/ToolIdStrategy.js +107 -0
- package/dist/src/tools/ToolIdStrategy.js.map +1 -0
- package/dist/src/tools/codesearch.d.ts +21 -0
- package/dist/src/tools/codesearch.js +145 -0
- package/dist/src/tools/codesearch.js.map +1 -0
- package/dist/src/tools/direct-web-fetch.d.ts +22 -0
- package/dist/src/tools/direct-web-fetch.js +169 -0
- package/dist/src/tools/direct-web-fetch.js.map +1 -0
- package/dist/src/tools/exa-web-search.d.ts +24 -0
- package/dist/src/tools/exa-web-search.js +137 -0
- package/dist/src/tools/exa-web-search.js.map +1 -0
- package/dist/src/tools/{web-fetch.d.ts → google-web-fetch.d.ts} +4 -4
- package/dist/src/tools/{web-fetch.js → google-web-fetch.js} +29 -16
- package/dist/src/tools/google-web-fetch.js.map +1 -0
- package/dist/src/tools/{web-search-invocation.d.ts → google-web-search-invocation.d.ts} +1 -1
- package/dist/src/tools/{web-search-invocation.js → google-web-search-invocation.js} +2 -2
- package/dist/src/tools/google-web-search-invocation.js.map +1 -0
- package/dist/src/tools/{web-search.d.ts → google-web-search.d.ts} +4 -4
- package/dist/src/tools/{web-search.js → google-web-search.js} +6 -6
- package/dist/src/tools/google-web-search.js.map +1 -0
- package/dist/src/tools/todo-schemas.d.ts +4 -4
- package/dist/src/tools/tool-error.d.ts +4 -1
- package/dist/src/tools/tool-error.js +5 -0
- package/dist/src/tools/tool-error.js.map +1 -1
- package/dist/src/utils/fetch.d.ts +1 -1
- package/dist/src/utils/fetch.js +24 -2
- package/dist/src/utils/fetch.js.map +1 -1
- package/dist/src/utils/filesearch/ignore.js +3 -2
- package/dist/src/utils/filesearch/ignore.js.map +1 -1
- package/dist/src/utils/gitIgnoreParser.js +2 -1
- package/dist/src/utils/gitIgnoreParser.js.map +1 -1
- package/dist/src/utils/schemaValidator.js +41 -6
- package/dist/src/utils/schemaValidator.js.map +1 -1
- package/package.json +7 -1
- package/dist/src/tools/web-fetch.js.map +0 -1
- package/dist/src/tools/web-search-invocation.js.map +0 -1
- package/dist/src/tools/web-search.js.map +0 -1
|
@@ -22,9 +22,11 @@ import crypto from 'node:crypto';
|
|
|
22
22
|
import * as http from 'http';
|
|
23
23
|
import * as https from 'https';
|
|
24
24
|
import * as net from 'net';
|
|
25
|
+
import { isKimiModel, getToolIdStrategy, } from '../../tools/ToolIdStrategy.js';
|
|
25
26
|
import { BaseProvider, } from '../BaseProvider.js';
|
|
26
27
|
import { DebugLogger } from '../../debug/index.js';
|
|
27
28
|
import { ToolFormatter } from '../../tools/ToolFormatter.js';
|
|
29
|
+
import { convertToolsToOpenAI } from './schemaConverter.js';
|
|
28
30
|
import { GemmaToolCallParser } from '../../parsers/TextToolCallParser.js';
|
|
29
31
|
import { processToolParameters } from '../../tools/doubleEscapeUtils.js';
|
|
30
32
|
import { getCoreSystemPromptAsync } from '../../core/prompts.js';
|
|
@@ -36,6 +38,7 @@ import { ensureJsonSafe } from '../../utils/unicodeUtils.js';
|
|
|
36
38
|
import { ToolCallPipeline } from './ToolCallPipeline.js';
|
|
37
39
|
import { buildToolResponsePayload, EMPTY_TOOL_RESULT_PLACEHOLDER, } from '../utils/toolResponsePayload.js';
|
|
38
40
|
import { isLocalEndpoint } from '../utils/localEndpoint.js';
|
|
41
|
+
import { filterThinkingForContext, thinkingToReasoningField, extractThinkingBlocks, } from '../reasoning/reasoningUtils.js';
|
|
39
42
|
const MAX_TOOL_RESPONSE_CHARS = 1024;
|
|
40
43
|
const MAX_TOOL_RESPONSE_RETRY_CHARS = 512;
|
|
41
44
|
const TOOL_ARGS_PREVIEW_LENGTH = 500;
|
|
@@ -218,6 +221,278 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
218
221
|
}
|
|
219
222
|
return new OpenAI(clientOptions);
|
|
220
223
|
}
|
|
224
|
+
/**
|
|
225
|
+
* Coerce provider "content" (which may be a string or an array-of-parts)
|
|
226
|
+
* into a plain string. Defensive for OpenAI-compatible providers that emit
|
|
227
|
+
* structured content blocks.
|
|
228
|
+
*/
|
|
229
|
+
coerceMessageContentToString(content) {
|
|
230
|
+
if (typeof content === 'string') {
|
|
231
|
+
return content;
|
|
232
|
+
}
|
|
233
|
+
if (Array.isArray(content)) {
|
|
234
|
+
const parts = [];
|
|
235
|
+
for (const part of content) {
|
|
236
|
+
if (!part)
|
|
237
|
+
continue;
|
|
238
|
+
if (typeof part === 'string') {
|
|
239
|
+
parts.push(part);
|
|
240
|
+
}
|
|
241
|
+
else if (typeof part === 'object' &&
|
|
242
|
+
part !== null &&
|
|
243
|
+
'text' in part &&
|
|
244
|
+
typeof part.text === 'string') {
|
|
245
|
+
parts.push(part.text);
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
return parts.length ? parts.join('') : undefined;
|
|
249
|
+
}
|
|
250
|
+
return undefined;
|
|
251
|
+
}
|
|
252
|
+
/**
|
|
253
|
+
* Strip provider-specific "thinking" / reasoning markup from visible text.
|
|
254
|
+
* This prevents DeepSeek / Kimi-style <think> blocks from leaking into
|
|
255
|
+
* user-visible output or tool arguments.
|
|
256
|
+
*/
|
|
257
|
+
sanitizeProviderText(text) {
|
|
258
|
+
if (text === null || text === undefined) {
|
|
259
|
+
return '';
|
|
260
|
+
}
|
|
261
|
+
const logger = this.getLogger();
|
|
262
|
+
let str = typeof text === 'string' ? text : String(text);
|
|
263
|
+
const beforeLen = str.length;
|
|
264
|
+
const hadReasoningTags = /<(?:think|thinking|analysis)>|<\/(?:think|thinking|analysis)>/i.test(str);
|
|
265
|
+
// DeepSeek / generic <think>...</think> blocks.
|
|
266
|
+
// Replace with a single space to preserve word spacing when tags appear mid-sentence.
|
|
267
|
+
// This prevents "these<think>...</think>5" from becoming "these5" instead of "these 5".
|
|
268
|
+
// Multiple consecutive spaces will be collapsed below.
|
|
269
|
+
str = str.replace(/<think>[\s\S]*?<\/think>/gi, ' ');
|
|
270
|
+
// Alternative reasoning tags some providers use.
|
|
271
|
+
str = str.replace(/<thinking>[\s\S]*?<\/thinking>/gi, ' ');
|
|
272
|
+
str = str.replace(/<analysis>[\s\S]*?<\/analysis>/gi, ' ');
|
|
273
|
+
// Clean up stray unmatched tags - replace with space to preserve word separation.
|
|
274
|
+
str = str.replace(/<\/?(?:think|thinking|analysis)>/gi, ' ');
|
|
275
|
+
// Only clean up whitespace if we had reasoning tags to strip
|
|
276
|
+
// This preserves meaningful whitespace in regular text chunks during streaming
|
|
277
|
+
// (e.g., " 5 Biggest" should remain " 5 Biggest", not become "5 Biggest")
|
|
278
|
+
if (hadReasoningTags) {
|
|
279
|
+
// Clean up multiple consecutive spaces/whitespace that may result from stripping
|
|
280
|
+
str = str.replace(/[ \t]+/g, ' ');
|
|
281
|
+
str = str.replace(/\n{3,}/g, '\n\n');
|
|
282
|
+
// Only trim leading whitespace when think tags were at the beginning
|
|
283
|
+
// This prevents leading spaces from "<think>...</think>text" -> " text"
|
|
284
|
+
// but preserves trailing whitespace for streaming chunk concatenation
|
|
285
|
+
str = str.trimStart();
|
|
286
|
+
}
|
|
287
|
+
const afterLen = str.length;
|
|
288
|
+
if (hadReasoningTags && afterLen !== beforeLen) {
|
|
289
|
+
logger.debug(() => `[OpenAIProvider] Stripped reasoning tags`, {
|
|
290
|
+
beforeLen,
|
|
291
|
+
afterLen,
|
|
292
|
+
});
|
|
293
|
+
}
|
|
294
|
+
return str;
|
|
295
|
+
}
|
|
296
|
+
/**
|
|
297
|
+
* Extract thinking content from <think>, <thinking>, or <analysis> tags
|
|
298
|
+
* and return it as a ThinkingBlock. Returns null if no thinking tags found.
|
|
299
|
+
*
|
|
300
|
+
* This must be called BEFORE sanitizeProviderText which strips these tags.
|
|
301
|
+
*
|
|
302
|
+
* Handles two formats:
|
|
303
|
+
* 1. Standard: <think>Full thinking paragraph here...</think>
|
|
304
|
+
* 2. Fragmented (Synthetic API): <think>word</think><think>word</think>...
|
|
305
|
+
*
|
|
306
|
+
* For fragmented format, joins with spaces. For standard, joins with newlines.
|
|
307
|
+
*
|
|
308
|
+
* @plan PLAN-20251202-THINKING.P16
|
|
309
|
+
* @requirement REQ-THINK-003
|
|
310
|
+
*/
|
|
311
|
+
extractThinkTagsAsBlock(text) {
|
|
312
|
+
if (!text) {
|
|
313
|
+
return null;
|
|
314
|
+
}
|
|
315
|
+
// Collect all thinking content from various tag formats
|
|
316
|
+
// Note: We only trim leading/trailing whitespace from each part, not internal newlines
|
|
317
|
+
// This preserves formatting like numbered lists within thinking content
|
|
318
|
+
const thinkingParts = [];
|
|
319
|
+
// Match <think>...</think>
|
|
320
|
+
const thinkMatches = text.matchAll(/<think>([\s\S]*?)<\/think>/gi);
|
|
321
|
+
for (const match of thinkMatches) {
|
|
322
|
+
const content = match[1];
|
|
323
|
+
if (content?.trim()) {
|
|
324
|
+
// Preserve internal newlines but remove leading/trailing whitespace
|
|
325
|
+
thinkingParts.push(content.trim());
|
|
326
|
+
}
|
|
327
|
+
}
|
|
328
|
+
// Match <thinking>...</thinking>
|
|
329
|
+
const thinkingMatches = text.matchAll(/<thinking>([\s\S]*?)<\/thinking>/gi);
|
|
330
|
+
for (const match of thinkingMatches) {
|
|
331
|
+
const content = match[1];
|
|
332
|
+
if (content?.trim()) {
|
|
333
|
+
thinkingParts.push(content.trim());
|
|
334
|
+
}
|
|
335
|
+
}
|
|
336
|
+
// Match <analysis>...</analysis>
|
|
337
|
+
const analysisMatches = text.matchAll(/<analysis>([\s\S]*?)<\/analysis>/gi);
|
|
338
|
+
for (const match of analysisMatches) {
|
|
339
|
+
const content = match[1];
|
|
340
|
+
if (content?.trim()) {
|
|
341
|
+
thinkingParts.push(content.trim());
|
|
342
|
+
}
|
|
343
|
+
}
|
|
344
|
+
if (thinkingParts.length === 0) {
|
|
345
|
+
return null;
|
|
346
|
+
}
|
|
347
|
+
// Detect fragmented format: many short parts (likely token-by-token streaming)
|
|
348
|
+
// If average part length is very short (< 10 chars) and we have many parts,
|
|
349
|
+
// it's likely fragmented and should be joined with spaces
|
|
350
|
+
const avgPartLength = thinkingParts.reduce((sum, p) => sum + p.length, 0) /
|
|
351
|
+
thinkingParts.length;
|
|
352
|
+
const isFragmented = thinkingParts.length > 5 && avgPartLength < 15;
|
|
353
|
+
// Join with space for fragmented, newlines for standard multi-paragraph thinking
|
|
354
|
+
const combinedThought = isFragmented
|
|
355
|
+
? thinkingParts.join(' ')
|
|
356
|
+
: thinkingParts.join('\n\n');
|
|
357
|
+
this.getLogger().debug(() => `[OpenAIProvider] Extracted thinking from tags: ${combinedThought.length} chars`, { tagCount: thinkingParts.length, isFragmented, avgPartLength });
|
|
358
|
+
return {
|
|
359
|
+
type: 'thinking',
|
|
360
|
+
thought: combinedThought,
|
|
361
|
+
sourceField: 'think_tags',
|
|
362
|
+
isHidden: false,
|
|
363
|
+
};
|
|
364
|
+
}
|
|
365
|
+
/**
|
|
366
|
+
* Normalize tool name by stripping Kimi-K2 style prefixes.
|
|
367
|
+
*
|
|
368
|
+
* Handles malformed tool names where the model concatenates prefixes like
|
|
369
|
+
* "functions" or "call_functions" with the actual tool name:
|
|
370
|
+
* - "functionslist_directory" -> "list_directory"
|
|
371
|
+
* - "call_functionslist_directory6" -> "list_directory"
|
|
372
|
+
* - "call_functionsglob7" -> "glob"
|
|
373
|
+
*/
|
|
374
|
+
normalizeToolName(name) {
|
|
375
|
+
let normalized = (name || '').trim();
|
|
376
|
+
// Strip Kimi-K2 style prefixes where model concatenates "functions" or "call_functions"
|
|
377
|
+
// with the actual tool name (e.g., "functionslist_directory" -> "list_directory")
|
|
378
|
+
// Pattern: (call_)?functions<actual_tool_name><optional_number>
|
|
379
|
+
const kimiPrefixMatch = /^(?:call_)?functions([a-z_]+[a-z])(\d*)$/i.exec(normalized);
|
|
380
|
+
if (kimiPrefixMatch) {
|
|
381
|
+
const originalName = normalized;
|
|
382
|
+
normalized = kimiPrefixMatch[1];
|
|
383
|
+
this.getLogger().debug(() => `[OpenAIProvider] Stripped Kimi-style prefix from tool name: "${originalName}" -> "${normalized}"`);
|
|
384
|
+
}
|
|
385
|
+
return normalized.toLowerCase();
|
|
386
|
+
}
|
|
387
|
+
/**
|
|
388
|
+
* Sanitize raw tool argument payloads before JSON parsing:
|
|
389
|
+
* - Remove thinking blocks (<think>...</think>, etc.).
|
|
390
|
+
* - Strip Markdown code fences (```json ... ```).
|
|
391
|
+
* - Try to isolate the main JSON object if wrapped in prose.
|
|
392
|
+
*/
|
|
393
|
+
sanitizeToolArgumentsString(raw) {
|
|
394
|
+
if (raw === null || raw === undefined) {
|
|
395
|
+
return '{}';
|
|
396
|
+
}
|
|
397
|
+
let text;
|
|
398
|
+
if (typeof raw === 'string') {
|
|
399
|
+
text = raw;
|
|
400
|
+
}
|
|
401
|
+
else {
|
|
402
|
+
try {
|
|
403
|
+
text = JSON.stringify(raw);
|
|
404
|
+
}
|
|
405
|
+
catch {
|
|
406
|
+
text = String(raw);
|
|
407
|
+
}
|
|
408
|
+
}
|
|
409
|
+
text = text.trim();
|
|
410
|
+
// Strip fenced code blocks like ```json { ... } ```.
|
|
411
|
+
if (text.startsWith('```')) {
|
|
412
|
+
text = text.replace(/^```[a-zA-Z0-9_-]*\s*/m, '');
|
|
413
|
+
text = text.replace(/```$/m, '');
|
|
414
|
+
text = text.trim();
|
|
415
|
+
}
|
|
416
|
+
// Remove provider reasoning / thinking markup.
|
|
417
|
+
text = this.sanitizeProviderText(text);
|
|
418
|
+
// If provider wrapped JSON in explanation text, try to isolate the object.
|
|
419
|
+
const firstBrace = text.indexOf('{');
|
|
420
|
+
const lastBrace = text.lastIndexOf('}');
|
|
421
|
+
if (firstBrace !== -1 && lastBrace !== -1 && lastBrace > firstBrace) {
|
|
422
|
+
const candidate = text.slice(firstBrace, lastBrace + 1).trim();
|
|
423
|
+
if (candidate.startsWith('{') && candidate.endsWith('}')) {
|
|
424
|
+
return candidate;
|
|
425
|
+
}
|
|
426
|
+
}
|
|
427
|
+
return text.length ? text : '{}';
|
|
428
|
+
}
|
|
429
|
+
/**
|
|
430
|
+
* Parse Kimi-K2 `<|tool_calls_section_begin|> ... <|tool_calls_section_end|>`
|
|
431
|
+
* blocks out of a text string.
|
|
432
|
+
*
|
|
433
|
+
* - Returns cleanedText with the whole section removed.
|
|
434
|
+
* - Returns ToolCallBlock[] constructed from the section contents.
|
|
435
|
+
*
|
|
436
|
+
* This is used for HF/vLLM-style Kimi deployments where `tool_calls` is empty
|
|
437
|
+
* and all tool info is only encoded in the text template.
|
|
438
|
+
*/
|
|
439
|
+
extractKimiToolCallsFromText(raw) {
|
|
440
|
+
if (!raw || !raw.includes('<|tool_calls_section_begin|>')) {
|
|
441
|
+
return { cleanedText: raw, toolCalls: [] };
|
|
442
|
+
}
|
|
443
|
+
const logger = this.getLogger();
|
|
444
|
+
const toolCalls = [];
|
|
445
|
+
let text = raw;
|
|
446
|
+
const sectionRegex = /<\|tool_calls_section_begin\|>([\s\S]*?)<\|tool_calls_section_end\|>/g;
|
|
447
|
+
text = text.replace(sectionRegex, (_sectionMatch, sectionBody) => {
|
|
448
|
+
try {
|
|
449
|
+
const callRegex = /<\|tool_call_begin\|>\s*([^<]+?)\s*<\|tool_call_argument_begin\|>\s*([\s\S]*?)\s*<\|tool_call_end\|>/g;
|
|
450
|
+
let m;
|
|
451
|
+
while ((m = callRegex.exec(sectionBody)) !== null) {
|
|
452
|
+
const rawId = m[1].trim();
|
|
453
|
+
const rawArgs = m[2].trim();
|
|
454
|
+
// Infer tool name from ID.
|
|
455
|
+
let toolName = '';
|
|
456
|
+
const match = /^functions\.([A-Za-z0-9_]+):\d+/i.exec(rawId) ||
|
|
457
|
+
/^[A-Za-z0-9_]+\.([A-Za-z0-9_]+):\d+/.exec(rawId);
|
|
458
|
+
if (match) {
|
|
459
|
+
toolName = match[1];
|
|
460
|
+
}
|
|
461
|
+
else {
|
|
462
|
+
const colonParts = rawId.split(':');
|
|
463
|
+
const head = colonParts[0] || rawId;
|
|
464
|
+
const dotParts = head.split('.');
|
|
465
|
+
toolName = dotParts[dotParts.length - 1] || head;
|
|
466
|
+
}
|
|
467
|
+
// Normalize tool name (handles Kimi-K2 style prefixes like call_functionsglob7)
|
|
468
|
+
toolName = this.normalizeToolName(toolName);
|
|
469
|
+
const sanitizedArgs = this.sanitizeToolArgumentsString(rawArgs);
|
|
470
|
+
const processedParameters = processToolParameters(sanitizedArgs, toolName);
|
|
471
|
+
toolCalls.push({
|
|
472
|
+
type: 'tool_call',
|
|
473
|
+
id: this.normalizeToHistoryToolId(rawId),
|
|
474
|
+
name: toolName,
|
|
475
|
+
parameters: processedParameters,
|
|
476
|
+
});
|
|
477
|
+
}
|
|
478
|
+
}
|
|
479
|
+
catch (err) {
|
|
480
|
+
logger.debug(() => `[OpenAIProvider] Failed to parse Kimi tool_calls_section: ${err}`);
|
|
481
|
+
}
|
|
482
|
+
// Strip the entire tool section from user-visible text
|
|
483
|
+
return '';
|
|
484
|
+
});
|
|
485
|
+
if (toolCalls.length > 0) {
|
|
486
|
+
logger.debug(() => `[OpenAIProvider] Parsed Kimi tool_calls_section`, {
|
|
487
|
+
toolCallCount: toolCalls.length,
|
|
488
|
+
originalLength: raw.length,
|
|
489
|
+
cleanedLength: text.length,
|
|
490
|
+
});
|
|
491
|
+
}
|
|
492
|
+
// Don't trim - preserve leading/trailing newlines that are important for formatting
|
|
493
|
+
// (e.g., numbered lists from Kimi K2 that have newlines between items)
|
|
494
|
+
return { cleanedText: text, toolCalls };
|
|
495
|
+
}
|
|
221
496
|
/**
|
|
222
497
|
* @plan:PLAN-20251023-STATELESS-HARDENING.P09
|
|
223
498
|
* @requirement:REQ-SP4-002
|
|
@@ -685,6 +960,113 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
685
960
|
// This ensures each tool message has a corresponding tool_calls in previous message
|
|
686
961
|
return this.validateToolMessageSequence(messages);
|
|
687
962
|
}
|
|
963
|
+
/**
|
|
964
|
+
* Build messages with optional reasoning_content based on settings.
|
|
965
|
+
*
|
|
966
|
+
* @plan PLAN-20251202-THINKING.P14
|
|
967
|
+
* @requirement REQ-THINK-004, REQ-THINK-006
|
|
968
|
+
*/
|
|
969
|
+
buildMessagesWithReasoning(contents, options, toolFormat) {
|
|
970
|
+
// Read settings with defaults
|
|
971
|
+
const stripPolicy = options.settings.get('reasoning.stripFromContext') ??
|
|
972
|
+
'none';
|
|
973
|
+
const includeInContext = options.settings.get('reasoning.includeInContext') ?? false;
|
|
974
|
+
// Apply strip policy first
|
|
975
|
+
const filteredContents = filterThinkingForContext(contents, stripPolicy);
|
|
976
|
+
const messages = [];
|
|
977
|
+
// Create a ToolIdMapper based on the tool format
|
|
978
|
+
// For Kimi K2, this generates sequential IDs in the format functions.{name}:{index}
|
|
979
|
+
const toolIdMapper = toolFormat === 'kimi'
|
|
980
|
+
? getToolIdStrategy('kimi').createMapper(filteredContents)
|
|
981
|
+
: null;
|
|
982
|
+
// Helper to resolve tool call IDs based on format
|
|
983
|
+
const resolveToolCallId = (tc) => {
|
|
984
|
+
if (toolIdMapper) {
|
|
985
|
+
return toolIdMapper.resolveToolCallId(tc);
|
|
986
|
+
}
|
|
987
|
+
return this.normalizeToOpenAIToolId(tc.id);
|
|
988
|
+
};
|
|
989
|
+
// Helper to resolve tool response IDs based on format
|
|
990
|
+
const resolveToolResponseId = (tr) => {
|
|
991
|
+
if (toolIdMapper) {
|
|
992
|
+
return toolIdMapper.resolveToolResponseId(tr);
|
|
993
|
+
}
|
|
994
|
+
return this.normalizeToOpenAIToolId(tr.callId);
|
|
995
|
+
};
|
|
996
|
+
for (const content of filteredContents) {
|
|
997
|
+
if (content.speaker === 'human') {
|
|
998
|
+
// Convert human messages to user messages
|
|
999
|
+
const textBlocks = content.blocks.filter((b) => b.type === 'text');
|
|
1000
|
+
const text = textBlocks.map((b) => b.text).join('\n');
|
|
1001
|
+
if (text) {
|
|
1002
|
+
messages.push({
|
|
1003
|
+
role: 'user',
|
|
1004
|
+
content: text,
|
|
1005
|
+
});
|
|
1006
|
+
}
|
|
1007
|
+
}
|
|
1008
|
+
else if (content.speaker === 'ai') {
|
|
1009
|
+
// Convert AI messages with optional reasoning_content
|
|
1010
|
+
const textBlocks = content.blocks.filter((b) => b.type === 'text');
|
|
1011
|
+
const text = textBlocks.map((b) => b.text).join('\n');
|
|
1012
|
+
const thinkingBlocks = extractThinkingBlocks(content);
|
|
1013
|
+
const toolCalls = content.blocks.filter((b) => b.type === 'tool_call');
|
|
1014
|
+
if (toolCalls.length > 0) {
|
|
1015
|
+
// Assistant message with tool calls
|
|
1016
|
+
const baseMessage = {
|
|
1017
|
+
role: 'assistant',
|
|
1018
|
+
content: text || null,
|
|
1019
|
+
tool_calls: toolCalls.map((tc) => ({
|
|
1020
|
+
id: resolveToolCallId(tc),
|
|
1021
|
+
type: 'function',
|
|
1022
|
+
function: {
|
|
1023
|
+
name: tc.name,
|
|
1024
|
+
arguments: this.normalizeToolCallArguments(tc.parameters),
|
|
1025
|
+
},
|
|
1026
|
+
})),
|
|
1027
|
+
};
|
|
1028
|
+
if (includeInContext && thinkingBlocks.length > 0) {
|
|
1029
|
+
const messageWithReasoning = baseMessage;
|
|
1030
|
+
messageWithReasoning.reasoning_content =
|
|
1031
|
+
thinkingToReasoningField(thinkingBlocks);
|
|
1032
|
+
messages.push(messageWithReasoning);
|
|
1033
|
+
}
|
|
1034
|
+
else {
|
|
1035
|
+
messages.push(baseMessage);
|
|
1036
|
+
}
|
|
1037
|
+
}
|
|
1038
|
+
else if (textBlocks.length > 0 || thinkingBlocks.length > 0) {
|
|
1039
|
+
// Plain assistant message
|
|
1040
|
+
const baseMessage = {
|
|
1041
|
+
role: 'assistant',
|
|
1042
|
+
content: text,
|
|
1043
|
+
};
|
|
1044
|
+
if (includeInContext && thinkingBlocks.length > 0) {
|
|
1045
|
+
const messageWithReasoning = baseMessage;
|
|
1046
|
+
messageWithReasoning.reasoning_content =
|
|
1047
|
+
thinkingToReasoningField(thinkingBlocks);
|
|
1048
|
+
messages.push(messageWithReasoning);
|
|
1049
|
+
}
|
|
1050
|
+
else {
|
|
1051
|
+
messages.push(baseMessage);
|
|
1052
|
+
}
|
|
1053
|
+
}
|
|
1054
|
+
}
|
|
1055
|
+
else if (content.speaker === 'tool') {
|
|
1056
|
+
// Convert tool responses
|
|
1057
|
+
const toolResponses = content.blocks.filter((b) => b.type === 'tool_response');
|
|
1058
|
+
for (const tr of toolResponses) {
|
|
1059
|
+
messages.push({
|
|
1060
|
+
role: 'tool',
|
|
1061
|
+
content: this.buildToolResponseContent(tr, options.config),
|
|
1062
|
+
tool_call_id: resolveToolResponseId(tr),
|
|
1063
|
+
});
|
|
1064
|
+
}
|
|
1065
|
+
}
|
|
1066
|
+
}
|
|
1067
|
+
// Validate tool message sequence to prevent API errors
|
|
1068
|
+
return this.validateToolMessageSequence(messages);
|
|
1069
|
+
}
|
|
688
1070
|
/**
|
|
689
1071
|
* Validates tool message sequence to ensure each tool message has a corresponding tool_calls
|
|
690
1072
|
* This prevents "messages with role 'tool' must be a response to a preceeding message with 'tool_calls'" errors
|
|
@@ -698,6 +1080,18 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
698
1080
|
const logger = this.getLogger();
|
|
699
1081
|
const validatedMessages = [...messages];
|
|
700
1082
|
let removedCount = 0;
|
|
1083
|
+
// Debug: Log the full message sequence for tool call analysis
|
|
1084
|
+
logger.debug(() => `[OpenAIProvider] validateToolMessageSequence: analyzing ${messages.length} messages`, {
|
|
1085
|
+
messageRoles: messages.map((m) => m.role),
|
|
1086
|
+
toolCallIds: messages
|
|
1087
|
+
.filter((m) => m.role === 'assistant' &&
|
|
1088
|
+
'tool_calls' in m &&
|
|
1089
|
+
Array.isArray(m.tool_calls))
|
|
1090
|
+
.flatMap((m) => m.tool_calls?.map((tc) => tc.id) ?? []),
|
|
1091
|
+
toolResponseIds: messages
|
|
1092
|
+
.filter((m) => m.role === 'tool')
|
|
1093
|
+
.map((m) => m.tool_call_id),
|
|
1094
|
+
});
|
|
701
1095
|
// Check if there are any tool_calls in conversation
|
|
702
1096
|
// If no tool_calls exist, this might be isolated tool response testing - skip validation
|
|
703
1097
|
const hasToolCallsInConversation = validatedMessages.some((msg) => msg.role === 'assistant' &&
|
|
@@ -826,13 +1220,8 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
826
1220
|
metadataKeys: Object.keys(metadata ?? {}),
|
|
827
1221
|
});
|
|
828
1222
|
}
|
|
829
|
-
//
|
|
830
|
-
|
|
831
|
-
const messages = this.convertToOpenAIMessages(contents, toolReplayMode, configForMessages);
|
|
832
|
-
if (logger.enabled && toolReplayMode !== 'native') {
|
|
833
|
-
logger.debug(() => `[OpenAIProvider] Using textual tool replay mode for model '${model}'`);
|
|
834
|
-
}
|
|
835
|
-
// Detect the tool format to use (once at the start of the method)
|
|
1223
|
+
// Detect the tool format to use BEFORE building messages
|
|
1224
|
+
// This is needed so that Kimi K2 tool IDs can be generated in the correct format
|
|
836
1225
|
const detectedFormat = this.detectToolFormat();
|
|
837
1226
|
// Log the detected format for debugging
|
|
838
1227
|
logger.debug(() => `[OpenAIProvider] Using tool format '${detectedFormat}' for model '${model}'`, {
|
|
@@ -840,8 +1229,18 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
840
1229
|
detectedFormat,
|
|
841
1230
|
provider: this.name,
|
|
842
1231
|
});
|
|
843
|
-
// Convert
|
|
844
|
-
|
|
1232
|
+
// Convert IContent to OpenAI messages format
|
|
1233
|
+
// Use buildMessagesWithReasoning for reasoning-aware message building
|
|
1234
|
+
// Pass detectedFormat so that Kimi K2 tool IDs are generated correctly
|
|
1235
|
+
const messages = toolReplayMode === 'native'
|
|
1236
|
+
? this.buildMessagesWithReasoning(contents, options, detectedFormat)
|
|
1237
|
+
: this.convertToOpenAIMessages(contents, toolReplayMode, options.config ?? options.runtime?.config ?? this.globalConfig);
|
|
1238
|
+
if (logger.enabled && toolReplayMode !== 'native') {
|
|
1239
|
+
logger.debug(() => `[OpenAIProvider] Using textual tool replay mode for model '${model}'`);
|
|
1240
|
+
}
|
|
1241
|
+
// Convert Gemini format tools to OpenAI format using the schema converter
|
|
1242
|
+
// This ensures required fields are always present in tool schemas
|
|
1243
|
+
let formattedTools = convertToolsToOpenAI(tools);
|
|
845
1244
|
// CRITICAL FIX: Ensure we never pass an empty tools array
|
|
846
1245
|
// The OpenAI API errors when tools=[] but a tool call is attempted
|
|
847
1246
|
if (Array.isArray(formattedTools) && formattedTools.length === 0) {
|
|
@@ -1106,16 +1505,37 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
1106
1505
|
// Buffer for accumulating text chunks for providers that need it
|
|
1107
1506
|
let textBuffer = '';
|
|
1108
1507
|
// Use the same detected format from earlier for consistency
|
|
1109
|
-
|
|
1110
|
-
|
|
1508
|
+
const isKimiModel = model.toLowerCase().includes('kimi-k2');
|
|
1509
|
+
// Buffer text for Qwen format providers and Kimi-K2 to avoid stanza formatting
|
|
1510
|
+
const shouldBufferText = detectedFormat === 'qwen' || isKimiModel;
|
|
1511
|
+
// Accumulate thinking content across the entire stream to emit as ONE block
|
|
1512
|
+
// This handles fragmented <think>word</think> streaming from Synthetic API
|
|
1513
|
+
// @plan PLAN-20251202-THINKING.P16
|
|
1514
|
+
let accumulatedThinkingContent = '';
|
|
1515
|
+
let hasEmittedThinking = false;
|
|
1516
|
+
// Accumulate reasoning_content from streaming deltas (legacy path)
|
|
1517
|
+
// Synthetic API sends reasoning token-by-token, so we accumulate to emit ONE block
|
|
1518
|
+
// @plan PLAN-20251202-THINKING.P16
|
|
1519
|
+
let accumulatedReasoningContent = '';
|
|
1111
1520
|
// Track token usage from streaming chunks
|
|
1112
1521
|
let streamingUsage = null;
|
|
1522
|
+
// Track total chunks for debugging empty responses
|
|
1523
|
+
let totalChunksReceived = 0;
|
|
1113
1524
|
try {
|
|
1114
1525
|
// Handle streaming response
|
|
1115
1526
|
for await (const chunk of response) {
|
|
1527
|
+
totalChunksReceived++;
|
|
1116
1528
|
if (abortSignal?.aborted) {
|
|
1117
1529
|
break;
|
|
1118
1530
|
}
|
|
1531
|
+
// Debug: Log first few chunks and every 10th chunk to understand stream behavior
|
|
1532
|
+
if (totalChunksReceived <= 3 || totalChunksReceived % 10 === 0) {
|
|
1533
|
+
logger.debug(() => `[Streaming legacy] Chunk #${totalChunksReceived} received`, {
|
|
1534
|
+
hasChoices: !!chunk.choices?.length,
|
|
1535
|
+
firstChoiceDelta: chunk.choices?.[0]?.delta,
|
|
1536
|
+
finishReason: chunk.choices?.[0]?.finish_reason,
|
|
1537
|
+
});
|
|
1538
|
+
}
|
|
1119
1539
|
const chunkRecord = chunk;
|
|
1120
1540
|
let parsedData;
|
|
1121
1541
|
const rawData = chunkRecord?.data;
|
|
@@ -1152,6 +1572,14 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
1152
1572
|
const choice = chunk.choices?.[0];
|
|
1153
1573
|
if (!choice)
|
|
1154
1574
|
continue;
|
|
1575
|
+
// Parse reasoning_content from streaming delta (Phase 16 integration)
|
|
1576
|
+
// ACCUMULATE instead of yielding immediately to handle token-by-token streaming
|
|
1577
|
+
// @plan PLAN-20251202-THINKING.P16
|
|
1578
|
+
const reasoningBlock = this.parseStreamingReasoningDelta(choice.delta);
|
|
1579
|
+
if (reasoningBlock) {
|
|
1580
|
+
// Accumulate reasoning content - will emit ONE block later
|
|
1581
|
+
accumulatedReasoningContent += reasoningBlock.thought;
|
|
1582
|
+
}
|
|
1155
1583
|
// Check for finish_reason to detect proper stream ending
|
|
1156
1584
|
if (choice.finish_reason) {
|
|
1157
1585
|
logger.debug(() => `[Streaming] Stream finished with reason: ${choice.finish_reason}`, {
|
|
@@ -1165,23 +1593,23 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
1165
1593
|
if (choice.finish_reason === 'length') {
|
|
1166
1594
|
logger.debug(() => `Response truncated due to length limit for model ${model}`);
|
|
1167
1595
|
}
|
|
1168
|
-
//
|
|
1169
|
-
|
|
1170
|
-
|
|
1171
|
-
|
|
1172
|
-
blocks: [
|
|
1173
|
-
{
|
|
1174
|
-
type: 'text',
|
|
1175
|
-
text: textBuffer,
|
|
1176
|
-
},
|
|
1177
|
-
],
|
|
1178
|
-
};
|
|
1179
|
-
textBuffer = '';
|
|
1180
|
-
}
|
|
1596
|
+
// Don't flush buffer here on finish - let the final buffer handling
|
|
1597
|
+
// after the loop process it with proper sanitization and think tag extraction
|
|
1598
|
+
// This was causing unsanitized <think> tags to leak into output (legacy path)
|
|
1599
|
+
// @plan PLAN-20251202-THINKING.P16
|
|
1181
1600
|
}
|
|
1182
1601
|
// Handle text content - buffer for Qwen format, emit immediately for others
|
|
1183
|
-
|
|
1184
|
-
|
|
1602
|
+
// Note: Synthetic API sends content that may duplicate reasoning_content.
|
|
1603
|
+
// This is the model's behavior - we don't filter it here.
|
|
1604
|
+
// @plan PLAN-20251202-THINKING.P16
|
|
1605
|
+
const rawDeltaContent = this.coerceMessageContentToString(choice.delta?.content);
|
|
1606
|
+
if (rawDeltaContent) {
|
|
1607
|
+
const deltaContent = isKimiModel
|
|
1608
|
+
? rawDeltaContent
|
|
1609
|
+
: this.sanitizeProviderText(rawDeltaContent);
|
|
1610
|
+
if (!deltaContent) {
|
|
1611
|
+
continue;
|
|
1612
|
+
}
|
|
1185
1613
|
_accumulatedText += deltaContent;
|
|
1186
1614
|
// Debug log for providers that need buffering
|
|
1187
1615
|
if (shouldBufferText) {
|
|
@@ -1194,22 +1622,103 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
1194
1622
|
});
|
|
1195
1623
|
// Buffer text to avoid stanza formatting
|
|
1196
1624
|
textBuffer += deltaContent;
|
|
1625
|
+
const hasKimiBegin = textBuffer.includes('<|tool_calls_section_begin|>');
|
|
1626
|
+
const hasKimiEnd = textBuffer.includes('<|tool_calls_section_end|>');
|
|
1627
|
+
const hasOpenKimiSection = hasKimiBegin && !hasKimiEnd;
|
|
1197
1628
|
// Emit buffered text when we have a complete sentence or paragraph
|
|
1198
|
-
// Look for natural break points
|
|
1199
|
-
if (
|
|
1200
|
-
textBuffer.
|
|
1201
|
-
|
|
1202
|
-
|
|
1203
|
-
|
|
1204
|
-
|
|
1205
|
-
|
|
1206
|
-
|
|
1207
|
-
|
|
1208
|
-
|
|
1209
|
-
|
|
1210
|
-
|
|
1211
|
-
|
|
1212
|
-
|
|
1629
|
+
// Look for natural break points, but avoid flushing mid Kimi section
|
|
1630
|
+
if (!hasOpenKimiSection &&
|
|
1631
|
+
(textBuffer.includes('\n') ||
|
|
1632
|
+
textBuffer.endsWith('. ') ||
|
|
1633
|
+
textBuffer.endsWith('! ') ||
|
|
1634
|
+
textBuffer.endsWith('? ') ||
|
|
1635
|
+
textBuffer.length > 100)) {
|
|
1636
|
+
const parsedToolCalls = [];
|
|
1637
|
+
let workingText = textBuffer;
|
|
1638
|
+
// Extract <think> tags and ACCUMULATE instead of emitting immediately (legacy path)
|
|
1639
|
+
// This handles fragmented <think>word</think> streaming from Synthetic API
|
|
1640
|
+
// @plan PLAN-20251202-THINKING.P16
|
|
1641
|
+
// @requirement REQ-THINK-003
|
|
1642
|
+
const tagBasedThinking = this.extractThinkTagsAsBlock(workingText);
|
|
1643
|
+
if (tagBasedThinking) {
|
|
1644
|
+
// Accumulate thinking content - don't emit yet
|
|
1645
|
+
// Use newline to preserve formatting between chunks (not space)
|
|
1646
|
+
if (accumulatedThinkingContent.length > 0) {
|
|
1647
|
+
accumulatedThinkingContent += '\n';
|
|
1648
|
+
}
|
|
1649
|
+
accumulatedThinkingContent += tagBasedThinking.thought;
|
|
1650
|
+
logger.debug(() => `[Streaming legacy] Accumulated thinking: ${accumulatedThinkingContent.length} chars total`);
|
|
1651
|
+
}
|
|
1652
|
+
const kimiParsed = this.extractKimiToolCallsFromText(workingText);
|
|
1653
|
+
if (kimiParsed.toolCalls.length > 0) {
|
|
1654
|
+
parsedToolCalls.push(...kimiParsed.toolCalls);
|
|
1655
|
+
logger.debug(() => `[OpenAIProvider] Streaming buffer (legacy) parsed Kimi tool calls`, {
|
|
1656
|
+
count: kimiParsed.toolCalls.length,
|
|
1657
|
+
bufferLength: workingText.length,
|
|
1658
|
+
cleanedLength: kimiParsed.cleanedText.length,
|
|
1659
|
+
});
|
|
1660
|
+
}
|
|
1661
|
+
workingText = kimiParsed.cleanedText;
|
|
1662
|
+
const parsingText = this.sanitizeProviderText(workingText);
|
|
1663
|
+
let cleanedText = parsingText;
|
|
1664
|
+
try {
|
|
1665
|
+
const parsedResult = this.textToolParser.parse(parsingText);
|
|
1666
|
+
if (parsedResult.toolCalls.length > 0) {
|
|
1667
|
+
parsedToolCalls.push(...parsedResult.toolCalls.map((call) => ({
|
|
1668
|
+
type: 'tool_call',
|
|
1669
|
+
id: `text_tool_${Date.now()}_${Math.random()
|
|
1670
|
+
.toString(36)
|
|
1671
|
+
.substring(7)}`,
|
|
1672
|
+
name: this.normalizeToolName(call.name),
|
|
1673
|
+
parameters: call.arguments,
|
|
1674
|
+
})));
|
|
1675
|
+
cleanedText = parsedResult.cleanedContent;
|
|
1676
|
+
}
|
|
1677
|
+
}
|
|
1678
|
+
catch (error) {
|
|
1679
|
+
const logger = this.getLogger();
|
|
1680
|
+
logger.debug(() => `TextToolCallParser failed on buffered text: ${error}`);
|
|
1681
|
+
}
|
|
1682
|
+
// Emit accumulated thinking BEFORE tool calls or text content (legacy path)
|
|
1683
|
+
// This ensures thinking appears first in the response
|
|
1684
|
+
// @plan PLAN-20251202-THINKING.P16
|
|
1685
|
+
if (!hasEmittedThinking &&
|
|
1686
|
+
accumulatedThinkingContent.length > 0 &&
|
|
1687
|
+
(parsedToolCalls.length > 0 || cleanedText.trim().length > 0)) {
|
|
1688
|
+
yield {
|
|
1689
|
+
speaker: 'ai',
|
|
1690
|
+
blocks: [
|
|
1691
|
+
{
|
|
1692
|
+
type: 'thinking',
|
|
1693
|
+
thought: accumulatedThinkingContent,
|
|
1694
|
+
sourceField: 'think_tags',
|
|
1695
|
+
isHidden: false,
|
|
1696
|
+
},
|
|
1697
|
+
],
|
|
1698
|
+
};
|
|
1699
|
+
hasEmittedThinking = true;
|
|
1700
|
+
logger.debug(() => `[Streaming legacy] Emitted accumulated thinking: ${accumulatedThinkingContent.length} chars`);
|
|
1701
|
+
}
|
|
1702
|
+
if (parsedToolCalls.length > 0) {
|
|
1703
|
+
yield {
|
|
1704
|
+
speaker: 'ai',
|
|
1705
|
+
blocks: parsedToolCalls,
|
|
1706
|
+
};
|
|
1707
|
+
}
|
|
1708
|
+
// Always use sanitized text to strip <think> tags (legacy streaming)
|
|
1709
|
+
// Bug fix: Previously Kimi used unsanitized workingText
|
|
1710
|
+
// @plan PLAN-20251202-THINKING.P16
|
|
1711
|
+
if (cleanedText.trim().length > 0) {
|
|
1712
|
+
yield {
|
|
1713
|
+
speaker: 'ai',
|
|
1714
|
+
blocks: [
|
|
1715
|
+
{
|
|
1716
|
+
type: 'text',
|
|
1717
|
+
text: cleanedText,
|
|
1718
|
+
},
|
|
1719
|
+
],
|
|
1720
|
+
};
|
|
1721
|
+
}
|
|
1213
1722
|
textBuffer = '';
|
|
1214
1723
|
}
|
|
1215
1724
|
}
|
|
@@ -1308,19 +1817,45 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
1308
1817
|
}
|
|
1309
1818
|
// Check buffered text for <tool_call> format before flushing as plain text
|
|
1310
1819
|
if (textBuffer.length > 0) {
|
|
1311
|
-
|
|
1312
|
-
let
|
|
1313
|
-
|
|
1820
|
+
const parsedToolCalls = [];
|
|
1821
|
+
let workingText = textBuffer;
|
|
1822
|
+
// Note: Synthetic API sends reasoning via both reasoning_content AND content fields.
|
|
1823
|
+
// This is the model's behavior - we don't strip it since the model is the source.
|
|
1824
|
+
// The user can configure reasoning display settings if they don't want duplicates.
|
|
1825
|
+
// @plan PLAN-20251202-THINKING.P16
|
|
1826
|
+
// Extract any remaining <think> tags from final buffer (legacy path)
|
|
1827
|
+
// @plan PLAN-20251202-THINKING.P16
|
|
1828
|
+
const tagBasedThinking = this.extractThinkTagsAsBlock(workingText);
|
|
1829
|
+
if (tagBasedThinking) {
|
|
1830
|
+
// Use newline to preserve formatting between chunks (not space)
|
|
1831
|
+
if (accumulatedThinkingContent.length > 0) {
|
|
1832
|
+
accumulatedThinkingContent += '\n';
|
|
1833
|
+
}
|
|
1834
|
+
accumulatedThinkingContent += tagBasedThinking.thought;
|
|
1835
|
+
}
|
|
1836
|
+
const kimiParsed = this.extractKimiToolCallsFromText(workingText);
|
|
1837
|
+
if (kimiParsed.toolCalls.length > 0) {
|
|
1838
|
+
parsedToolCalls.push(...kimiParsed.toolCalls);
|
|
1839
|
+
this.getLogger().debug(() => `[OpenAIProvider] Final buffer flush (legacy) parsed Kimi tool calls`, {
|
|
1840
|
+
count: kimiParsed.toolCalls.length,
|
|
1841
|
+
bufferLength: workingText.length,
|
|
1842
|
+
cleanedLength: kimiParsed.cleanedText.length,
|
|
1843
|
+
});
|
|
1844
|
+
}
|
|
1845
|
+
workingText = kimiParsed.cleanedText;
|
|
1846
|
+
const parsingText = this.sanitizeProviderText(workingText);
|
|
1847
|
+
let cleanedText = parsingText;
|
|
1314
1848
|
try {
|
|
1315
|
-
const parsedResult = this.textToolParser.parse(
|
|
1849
|
+
const parsedResult = this.textToolParser.parse(parsingText);
|
|
1316
1850
|
if (parsedResult.toolCalls.length > 0) {
|
|
1317
|
-
|
|
1318
|
-
parsedToolCalls = parsedResult.toolCalls.map((call) => ({
|
|
1851
|
+
parsedToolCalls.push(...parsedResult.toolCalls.map((call) => ({
|
|
1319
1852
|
type: 'tool_call',
|
|
1320
|
-
id: `text_tool_${Date.now()}_${Math.random()
|
|
1321
|
-
|
|
1853
|
+
id: `text_tool_${Date.now()}_${Math.random()
|
|
1854
|
+
.toString(36)
|
|
1855
|
+
.substring(7)}`,
|
|
1856
|
+
name: this.normalizeToolName(call.name),
|
|
1322
1857
|
parameters: call.arguments,
|
|
1323
|
-
}));
|
|
1858
|
+
})));
|
|
1324
1859
|
cleanedText = parsedResult.cleanedContent;
|
|
1325
1860
|
}
|
|
1326
1861
|
}
|
|
@@ -1328,14 +1863,33 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
1328
1863
|
const logger = this.getLogger();
|
|
1329
1864
|
logger.debug(() => `TextToolCallParser failed on buffered text: ${error}`);
|
|
1330
1865
|
}
|
|
1331
|
-
// Emit tool calls
|
|
1866
|
+
// Emit accumulated thinking BEFORE tool calls or text content (legacy path)
|
|
1867
|
+
// @plan PLAN-20251202-THINKING.P16
|
|
1868
|
+
if (!hasEmittedThinking &&
|
|
1869
|
+
accumulatedThinkingContent.length > 0 &&
|
|
1870
|
+
(parsedToolCalls.length > 0 || cleanedText.trim().length > 0)) {
|
|
1871
|
+
yield {
|
|
1872
|
+
speaker: 'ai',
|
|
1873
|
+
blocks: [
|
|
1874
|
+
{
|
|
1875
|
+
type: 'thinking',
|
|
1876
|
+
thought: accumulatedThinkingContent,
|
|
1877
|
+
sourceField: 'think_tags',
|
|
1878
|
+
isHidden: false,
|
|
1879
|
+
},
|
|
1880
|
+
],
|
|
1881
|
+
};
|
|
1882
|
+
hasEmittedThinking = true;
|
|
1883
|
+
}
|
|
1332
1884
|
if (parsedToolCalls.length > 0) {
|
|
1333
1885
|
yield {
|
|
1334
1886
|
speaker: 'ai',
|
|
1335
1887
|
blocks: parsedToolCalls,
|
|
1336
1888
|
};
|
|
1337
1889
|
}
|
|
1338
|
-
//
|
|
1890
|
+
// Always use sanitized text to strip <think> tags (legacy final buffer)
|
|
1891
|
+
// Bug fix: Previously Kimi used unsanitized workingText
|
|
1892
|
+
// @plan PLAN-20251202-THINKING.P16
|
|
1339
1893
|
if (cleanedText.trim().length > 0) {
|
|
1340
1894
|
yield {
|
|
1341
1895
|
speaker: 'ai',
|
|
@@ -1349,18 +1903,54 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
1349
1903
|
}
|
|
1350
1904
|
textBuffer = '';
|
|
1351
1905
|
}
|
|
1906
|
+
// Emit any remaining accumulated thinking that wasn't emitted yet (legacy path)
|
|
1907
|
+
// (e.g., if entire response was just thinking with no content)
|
|
1908
|
+
// @plan PLAN-20251202-THINKING.P16
|
|
1909
|
+
if (!hasEmittedThinking && accumulatedThinkingContent.length > 0) {
|
|
1910
|
+
yield {
|
|
1911
|
+
speaker: 'ai',
|
|
1912
|
+
blocks: [
|
|
1913
|
+
{
|
|
1914
|
+
type: 'thinking',
|
|
1915
|
+
thought: accumulatedThinkingContent,
|
|
1916
|
+
sourceField: 'think_tags',
|
|
1917
|
+
isHidden: false,
|
|
1918
|
+
},
|
|
1919
|
+
],
|
|
1920
|
+
};
|
|
1921
|
+
hasEmittedThinking = true;
|
|
1922
|
+
}
|
|
1923
|
+
// Emit accumulated reasoning_content as ONE ThinkingBlock (legacy path)
|
|
1924
|
+
// This consolidates token-by-token reasoning from Synthetic API into a single block
|
|
1925
|
+
// @plan PLAN-20251202-THINKING.P16
|
|
1926
|
+
if (accumulatedReasoningContent.length > 0) {
|
|
1927
|
+
yield {
|
|
1928
|
+
speaker: 'ai',
|
|
1929
|
+
blocks: [
|
|
1930
|
+
{
|
|
1931
|
+
type: 'thinking',
|
|
1932
|
+
thought: accumulatedReasoningContent,
|
|
1933
|
+
sourceField: 'reasoning_content',
|
|
1934
|
+
isHidden: false,
|
|
1935
|
+
},
|
|
1936
|
+
],
|
|
1937
|
+
};
|
|
1938
|
+
}
|
|
1352
1939
|
// Process and emit tool calls using legacy accumulated approach
|
|
1353
1940
|
if (accumulatedToolCalls.length > 0) {
|
|
1354
1941
|
const blocks = [];
|
|
1355
1942
|
for (const tc of accumulatedToolCalls) {
|
|
1356
1943
|
if (!tc)
|
|
1357
1944
|
continue;
|
|
1945
|
+
const sanitizedArgs = this.sanitizeToolArgumentsString(tc.function.arguments);
|
|
1946
|
+
// Normalize tool name (handles Kimi-K2 style prefixes)
|
|
1947
|
+
const normalizedName = this.normalizeToolName(tc.function.name || '');
|
|
1358
1948
|
// Process tool parameters with double-escape handling
|
|
1359
|
-
const processedParameters = processToolParameters(
|
|
1949
|
+
const processedParameters = processToolParameters(sanitizedArgs, normalizedName);
|
|
1360
1950
|
blocks.push({
|
|
1361
1951
|
type: 'tool_call',
|
|
1362
1952
|
id: this.normalizeToHistoryToolId(tc.id),
|
|
1363
|
-
name:
|
|
1953
|
+
name: normalizedName,
|
|
1364
1954
|
parameters: processedParameters,
|
|
1365
1955
|
});
|
|
1366
1956
|
}
|
|
@@ -1400,6 +1990,40 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
1400
1990
|
},
|
|
1401
1991
|
};
|
|
1402
1992
|
}
|
|
1993
|
+
// Detect and warn about empty streaming responses (common with Kimi K2 after tool calls)
|
|
1994
|
+
// Only warn if we truly got nothing - not even reasoning content
|
|
1995
|
+
if (_accumulatedText.length === 0 &&
|
|
1996
|
+
accumulatedToolCalls.length === 0 &&
|
|
1997
|
+
textBuffer.length === 0 &&
|
|
1998
|
+
accumulatedReasoningContent.length === 0 &&
|
|
1999
|
+
accumulatedThinkingContent.length === 0) {
|
|
2000
|
+
// Provide actionable guidance for users
|
|
2001
|
+
const isKimi = model.toLowerCase().includes('kimi');
|
|
2002
|
+
const isSynthetic = (baseURL ?? this.getBaseURL())?.includes('synthetic') ?? false;
|
|
2003
|
+
const troubleshooting = isKimi
|
|
2004
|
+
? isSynthetic
|
|
2005
|
+
? ' To fix: use streaming: "disabled" in your profile settings. Synthetic API streaming does not work reliably with tool calls.'
|
|
2006
|
+
: ' This provider may not support streaming with tool calls.'
|
|
2007
|
+
: ' Consider using streaming: "disabled" in your profile settings.';
|
|
2008
|
+
logger.warn(() => `[OpenAIProvider] Empty streaming response for model '${model}' (received ${totalChunksReceived} chunks with no content).${troubleshooting}`, {
|
|
2009
|
+
model,
|
|
2010
|
+
baseURL: baseURL ?? this.getBaseURL(),
|
|
2011
|
+
isKimiModel: isKimi,
|
|
2012
|
+
isSyntheticAPI: isSynthetic,
|
|
2013
|
+
totalChunksReceived,
|
|
2014
|
+
});
|
|
2015
|
+
}
|
|
2016
|
+
else {
|
|
2017
|
+
// Log what we DID get for debugging
|
|
2018
|
+
logger.debug(() => `[Streaming legacy] Stream completed with accumulated content`, {
|
|
2019
|
+
textLength: _accumulatedText.length,
|
|
2020
|
+
toolCallCount: accumulatedToolCalls.length,
|
|
2021
|
+
textBufferLength: textBuffer.length,
|
|
2022
|
+
reasoningLength: accumulatedReasoningContent.length,
|
|
2023
|
+
thinkingLength: accumulatedThinkingContent.length,
|
|
2024
|
+
totalChunksReceived,
|
|
2025
|
+
});
|
|
2026
|
+
}
|
|
1403
2027
|
}
|
|
1404
2028
|
else {
|
|
1405
2029
|
// Handle non-streaming response
|
|
@@ -1425,22 +2049,57 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
1425
2049
|
}
|
|
1426
2050
|
}
|
|
1427
2051
|
const blocks = [];
|
|
1428
|
-
//
|
|
1429
|
-
|
|
1430
|
-
|
|
1431
|
-
|
|
1432
|
-
|
|
1433
|
-
})
|
|
2052
|
+
// Parse reasoning_content from response (Phase 16 integration)
|
|
2053
|
+
const reasoningBlock = this.parseNonStreamingReasoning(choice.message);
|
|
2054
|
+
logger.debug(() => `[Non-streaming] parseNonStreamingReasoning result: ${reasoningBlock ? `found (${reasoningBlock.thought?.length} chars)` : 'not found'}`, {
|
|
2055
|
+
hasReasoningContent: 'reasoning_content' in
|
|
2056
|
+
(choice.message ?? {}),
|
|
2057
|
+
messageKeys: Object.keys(choice.message ?? {}),
|
|
2058
|
+
});
|
|
2059
|
+
if (reasoningBlock) {
|
|
2060
|
+
blocks.push(reasoningBlock);
|
|
2061
|
+
}
|
|
2062
|
+
// Handle text content (strip thinking / reasoning blocks) and Kimi tool sections
|
|
2063
|
+
const rawMessageContent = this.coerceMessageContentToString(choice.message?.content);
|
|
2064
|
+
let kimiCleanContent;
|
|
2065
|
+
let kimiToolBlocks = [];
|
|
2066
|
+
if (rawMessageContent) {
|
|
2067
|
+
// Extract <think> tags as ThinkingBlock BEFORE stripping them
|
|
2068
|
+
// Only do this if we didn't already get reasoning from reasoning_content field
|
|
2069
|
+
// @plan PLAN-20251202-THINKING.P16
|
|
2070
|
+
// @requirement REQ-THINK-003
|
|
2071
|
+
if (!reasoningBlock) {
|
|
2072
|
+
const tagBasedThinking = this.extractThinkTagsAsBlock(rawMessageContent);
|
|
2073
|
+
if (tagBasedThinking) {
|
|
2074
|
+
blocks.push(tagBasedThinking);
|
|
2075
|
+
logger.debug(() => `[Non-streaming] Extracted thinking from <think> tags: ${tagBasedThinking.thought.length} chars`);
|
|
2076
|
+
}
|
|
2077
|
+
}
|
|
2078
|
+
const kimiParsed = this.extractKimiToolCallsFromText(rawMessageContent);
|
|
2079
|
+
kimiCleanContent = kimiParsed.cleanedText;
|
|
2080
|
+
kimiToolBlocks = kimiParsed.toolCalls;
|
|
2081
|
+
// Always sanitize text content to remove <think> tags
|
|
2082
|
+
// Bug fix: Previously Kimi-K2 used unsanitized kimiCleanContent,
|
|
2083
|
+
// which caused <think> tags to leak into visible output
|
|
2084
|
+
// @plan PLAN-20251202-THINKING.P16
|
|
2085
|
+
const cleanedText = this.sanitizeProviderText(kimiCleanContent);
|
|
2086
|
+
if (cleanedText) {
|
|
2087
|
+
blocks.push({
|
|
2088
|
+
type: 'text',
|
|
2089
|
+
text: cleanedText,
|
|
2090
|
+
});
|
|
2091
|
+
}
|
|
1434
2092
|
}
|
|
1435
2093
|
// Handle tool calls
|
|
1436
2094
|
if (choice.message?.tool_calls && choice.message.tool_calls.length > 0) {
|
|
1437
2095
|
// Use the same detected format from earlier for consistency
|
|
1438
2096
|
for (const toolCall of choice.message.tool_calls) {
|
|
1439
2097
|
if (toolCall.type === 'function') {
|
|
1440
|
-
//
|
|
1441
|
-
const toolName = toolCall.function.name || '';
|
|
2098
|
+
// Normalize tool name (handles Kimi-K2 style prefixes)
|
|
2099
|
+
const toolName = this.normalizeToolName(toolCall.function.name || '');
|
|
2100
|
+
const sanitizedArgs = this.sanitizeToolArgumentsString(toolCall.function.arguments);
|
|
1442
2101
|
// Process tool parameters with double-escape handling
|
|
1443
|
-
const processedParameters = processToolParameters(
|
|
2102
|
+
const processedParameters = processToolParameters(sanitizedArgs, toolName);
|
|
1444
2103
|
blocks.push({
|
|
1445
2104
|
type: 'tool_call',
|
|
1446
2105
|
id: this.normalizeToHistoryToolId(toolCall.id),
|
|
@@ -1450,42 +2109,49 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
1450
2109
|
}
|
|
1451
2110
|
}
|
|
1452
2111
|
}
|
|
2112
|
+
// Add any tool calls parsed from Kimi inline sections
|
|
2113
|
+
if (kimiToolBlocks.length > 0) {
|
|
2114
|
+
blocks.push(...kimiToolBlocks);
|
|
2115
|
+
this.getLogger().debug(() => `[OpenAIProvider] Non-stream legacy added Kimi tool calls from text`, { count: kimiToolBlocks.length });
|
|
2116
|
+
}
|
|
1453
2117
|
// Additionally check for <tool_call> format in text content
|
|
1454
|
-
if (
|
|
1455
|
-
|
|
1456
|
-
|
|
1457
|
-
|
|
1458
|
-
|
|
1459
|
-
|
|
1460
|
-
|
|
1461
|
-
|
|
1462
|
-
|
|
1463
|
-
|
|
1464
|
-
|
|
1465
|
-
|
|
1466
|
-
|
|
1467
|
-
}
|
|
1468
|
-
// Update the text content to remove the tool call parts
|
|
1469
|
-
if (choice.message.content !== parsedResult.cleanedContent) {
|
|
1470
|
-
// Find the text block and update it
|
|
1471
|
-
const textBlockIndex = blocks.findIndex((block) => block.type === 'text');
|
|
1472
|
-
if (textBlockIndex >= 0) {
|
|
1473
|
-
blocks[textBlockIndex].text =
|
|
1474
|
-
parsedResult.cleanedContent;
|
|
1475
|
-
}
|
|
1476
|
-
else if (parsedResult.cleanedContent.trim()) {
|
|
1477
|
-
// Add cleaned text if it doesn't exist
|
|
1478
|
-
blocks.unshift({
|
|
1479
|
-
type: 'text',
|
|
1480
|
-
text: parsedResult.cleanedContent,
|
|
2118
|
+
if (kimiCleanContent) {
|
|
2119
|
+
const cleanedSource = this.sanitizeProviderText(kimiCleanContent);
|
|
2120
|
+
if (cleanedSource) {
|
|
2121
|
+
try {
|
|
2122
|
+
const parsedResult = this.textToolParser.parse(cleanedSource);
|
|
2123
|
+
if (parsedResult.toolCalls.length > 0) {
|
|
2124
|
+
// Add tool calls found in text content
|
|
2125
|
+
for (const call of parsedResult.toolCalls) {
|
|
2126
|
+
blocks.push({
|
|
2127
|
+
type: 'tool_call',
|
|
2128
|
+
id: `text_tool_${Date.now()}_${Math.random().toString(36).substring(7)}`,
|
|
2129
|
+
name: this.normalizeToolName(call.name),
|
|
2130
|
+
parameters: call.arguments,
|
|
1481
2131
|
});
|
|
1482
2132
|
}
|
|
2133
|
+
// Update the text content to remove the tool call parts
|
|
2134
|
+
if (choice.message.content !== parsedResult.cleanedContent) {
|
|
2135
|
+
// Find the text block and update it
|
|
2136
|
+
const textBlockIndex = blocks.findIndex((block) => block.type === 'text');
|
|
2137
|
+
if (textBlockIndex >= 0) {
|
|
2138
|
+
blocks[textBlockIndex].text =
|
|
2139
|
+
parsedResult.cleanedContent;
|
|
2140
|
+
}
|
|
2141
|
+
else if (parsedResult.cleanedContent.trim()) {
|
|
2142
|
+
// Add cleaned text if it doesn't exist
|
|
2143
|
+
blocks.unshift({
|
|
2144
|
+
type: 'text',
|
|
2145
|
+
text: parsedResult.cleanedContent,
|
|
2146
|
+
});
|
|
2147
|
+
}
|
|
2148
|
+
}
|
|
1483
2149
|
}
|
|
1484
2150
|
}
|
|
1485
|
-
|
|
1486
|
-
|
|
1487
|
-
|
|
1488
|
-
|
|
2151
|
+
catch (error) {
|
|
2152
|
+
const logger = this.getLogger();
|
|
2153
|
+
logger.debug(() => `TextToolCallParser failed on message content: ${error}`);
|
|
2154
|
+
}
|
|
1489
2155
|
}
|
|
1490
2156
|
}
|
|
1491
2157
|
// Emit the complete response as a single IContent
|
|
@@ -1613,14 +2279,8 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
1613
2279
|
}
|
|
1614
2280
|
// Determine tool replay mode for model compatibility (e.g., polaris-alpha)
|
|
1615
2281
|
const toolReplayMode = this.determineToolReplayMode(model);
|
|
1616
|
-
//
|
|
1617
|
-
|
|
1618
|
-
const messages = this.convertToOpenAIMessages(contents, toolReplayMode, configForMessages);
|
|
1619
|
-
// Log tool replay mode usage for debugging
|
|
1620
|
-
if (logger.enabled && toolReplayMode !== 'native') {
|
|
1621
|
-
logger.debug(() => `[OpenAIProvider] Using textual tool replay mode for model '${model}'`);
|
|
1622
|
-
}
|
|
1623
|
-
// Detect the tool format to use (once at the start of the method)
|
|
2282
|
+
// Detect the tool format to use BEFORE building messages
|
|
2283
|
+
// This is needed so that Kimi K2 tool IDs can be generated in the correct format
|
|
1624
2284
|
const detectedFormat = this.detectToolFormat();
|
|
1625
2285
|
// Log the detected format for debugging
|
|
1626
2286
|
logger.debug(() => `[OpenAIProvider] Using tool format '${detectedFormat}' for model '${model}'`, {
|
|
@@ -1628,8 +2288,19 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
1628
2288
|
detectedFormat,
|
|
1629
2289
|
provider: this.name,
|
|
1630
2290
|
});
|
|
1631
|
-
// Convert
|
|
1632
|
-
|
|
2291
|
+
// Convert IContent to OpenAI messages format
|
|
2292
|
+
// Use buildMessagesWithReasoning for reasoning-aware message building
|
|
2293
|
+
// Pass detectedFormat so that Kimi K2 tool IDs are generated correctly
|
|
2294
|
+
const messages = toolReplayMode === 'native'
|
|
2295
|
+
? this.buildMessagesWithReasoning(contents, options, detectedFormat)
|
|
2296
|
+
: this.convertToOpenAIMessages(contents, toolReplayMode, options.config ?? options.runtime?.config ?? this.globalConfig);
|
|
2297
|
+
// Log tool replay mode usage for debugging
|
|
2298
|
+
if (logger.enabled && toolReplayMode !== 'native') {
|
|
2299
|
+
logger.debug(() => `[OpenAIProvider] Using textual tool replay mode for model '${model}'`);
|
|
2300
|
+
}
|
|
2301
|
+
// Convert Gemini format tools to OpenAI format using the schema converter
|
|
2302
|
+
// This ensures required fields are always present in tool schemas
|
|
2303
|
+
let formattedTools = convertToolsToOpenAI(tools);
|
|
1633
2304
|
// CRITICAL FIX: Ensure we never pass an empty tools array
|
|
1634
2305
|
// The OpenAI API errors when tools=[] but a tool call is attempted
|
|
1635
2306
|
if (Array.isArray(formattedTools) && formattedTools.length === 0) {
|
|
@@ -1905,8 +2576,18 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
1905
2576
|
// Buffer for accumulating text chunks for providers that need it
|
|
1906
2577
|
let textBuffer = '';
|
|
1907
2578
|
// Use the same detected format from earlier for consistency
|
|
1908
|
-
|
|
1909
|
-
|
|
2579
|
+
const isKimiModel = model.toLowerCase().includes('kimi-k2');
|
|
2580
|
+
// Buffer text for Qwen format providers and Kimi-K2 to avoid stanza formatting
|
|
2581
|
+
const shouldBufferText = detectedFormat === 'qwen' || isKimiModel;
|
|
2582
|
+
// Accumulate thinking content across the entire stream to emit as ONE block
|
|
2583
|
+
// This handles fragmented <think>word</think> streaming from Synthetic API
|
|
2584
|
+
// @plan PLAN-20251202-THINKING.P16
|
|
2585
|
+
let accumulatedThinkingContent = '';
|
|
2586
|
+
let hasEmittedThinking = false;
|
|
2587
|
+
// Accumulate reasoning_content from streaming deltas (pipeline path)
|
|
2588
|
+
// Synthetic API sends reasoning token-by-token, so we accumulate to emit ONE block
|
|
2589
|
+
// @plan PLAN-20251202-THINKING.P16
|
|
2590
|
+
let accumulatedReasoningContent = '';
|
|
1910
2591
|
// Track token usage from streaming chunks
|
|
1911
2592
|
let streamingUsage = null;
|
|
1912
2593
|
const allChunks = []; // Collect all chunks first
|
|
@@ -1918,6 +2599,11 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
1918
2599
|
}
|
|
1919
2600
|
allChunks.push(chunk);
|
|
1920
2601
|
}
|
|
2602
|
+
// Debug: Log how many chunks were received
|
|
2603
|
+
logger.debug(() => `[Streaming pipeline] Collected ${allChunks.length} chunks from stream`, {
|
|
2604
|
+
firstChunkDelta: allChunks[0]?.choices?.[0]?.delta,
|
|
2605
|
+
lastChunkFinishReason: allChunks[allChunks.length - 1]?.choices?.[0]?.finish_reason,
|
|
2606
|
+
});
|
|
1921
2607
|
// Now process all collected chunks
|
|
1922
2608
|
for (const chunk of allChunks) {
|
|
1923
2609
|
// Check for cancellation during chunk processing
|
|
@@ -1960,6 +2646,15 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
1960
2646
|
const choice = chunk.choices?.[0];
|
|
1961
2647
|
if (!choice)
|
|
1962
2648
|
continue;
|
|
2649
|
+
// Parse reasoning_content from streaming delta (Pipeline path)
|
|
2650
|
+
// ACCUMULATE instead of yielding immediately to handle token-by-token streaming
|
|
2651
|
+
// @plan PLAN-20251202-THINKING.P16
|
|
2652
|
+
// @requirement REQ-THINK-003.1
|
|
2653
|
+
const reasoningBlock = this.parseStreamingReasoningDelta(choice.delta);
|
|
2654
|
+
if (reasoningBlock) {
|
|
2655
|
+
// Accumulate reasoning content - will emit ONE block later
|
|
2656
|
+
accumulatedReasoningContent += reasoningBlock.thought;
|
|
2657
|
+
}
|
|
1963
2658
|
// Check for finish_reason to detect proper stream ending
|
|
1964
2659
|
if (choice.finish_reason) {
|
|
1965
2660
|
logger.debug(() => `[Streaming] Stream finished with reason: ${choice.finish_reason}`, {
|
|
@@ -1973,23 +2668,23 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
1973
2668
|
if (choice.finish_reason === 'length') {
|
|
1974
2669
|
logger.debug(() => `Response truncated due to length limit for model ${model}`);
|
|
1975
2670
|
}
|
|
1976
|
-
//
|
|
1977
|
-
|
|
1978
|
-
|
|
1979
|
-
|
|
1980
|
-
blocks: [
|
|
1981
|
-
{
|
|
1982
|
-
type: 'text',
|
|
1983
|
-
text: textBuffer,
|
|
1984
|
-
},
|
|
1985
|
-
],
|
|
1986
|
-
};
|
|
1987
|
-
textBuffer = '';
|
|
1988
|
-
}
|
|
2671
|
+
// Don't flush buffer here on finish - let the final buffer handling
|
|
2672
|
+
// after the loop process it with proper sanitization and think tag extraction
|
|
2673
|
+
// This was causing unsanitized <think> tags to leak into output (pipeline path)
|
|
2674
|
+
// @plan PLAN-20251202-THINKING.P16
|
|
1989
2675
|
}
|
|
1990
2676
|
// Handle text content - buffer for Qwen format, emit immediately for others
|
|
1991
|
-
|
|
1992
|
-
|
|
2677
|
+
// Note: Synthetic API sends content that may duplicate reasoning_content.
|
|
2678
|
+
// This is the model's behavior - we don't filter it here.
|
|
2679
|
+
// @plan PLAN-20251202-THINKING.P16
|
|
2680
|
+
const rawDeltaContent = this.coerceMessageContentToString(choice.delta?.content);
|
|
2681
|
+
if (rawDeltaContent) {
|
|
2682
|
+
const deltaContent = isKimiModel
|
|
2683
|
+
? rawDeltaContent
|
|
2684
|
+
: this.sanitizeProviderText(rawDeltaContent);
|
|
2685
|
+
if (!deltaContent) {
|
|
2686
|
+
continue;
|
|
2687
|
+
}
|
|
1993
2688
|
_accumulatedText += deltaContent;
|
|
1994
2689
|
// Debug log for providers that need buffering
|
|
1995
2690
|
if (shouldBufferText) {
|
|
@@ -2002,22 +2697,103 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
2002
2697
|
});
|
|
2003
2698
|
// Buffer text to avoid stanza formatting
|
|
2004
2699
|
textBuffer += deltaContent;
|
|
2700
|
+
const hasKimiBegin = textBuffer.includes('<|tool_calls_section_begin|>');
|
|
2701
|
+
const hasKimiEnd = textBuffer.includes('<|tool_calls_section_end|>');
|
|
2702
|
+
const hasOpenKimiSection = hasKimiBegin && !hasKimiEnd;
|
|
2005
2703
|
// Emit buffered text when we have a complete sentence or paragraph
|
|
2006
|
-
// Look for natural break points
|
|
2007
|
-
if (
|
|
2008
|
-
textBuffer.
|
|
2009
|
-
|
|
2010
|
-
|
|
2011
|
-
|
|
2012
|
-
|
|
2013
|
-
|
|
2014
|
-
|
|
2015
|
-
|
|
2016
|
-
|
|
2017
|
-
|
|
2018
|
-
|
|
2019
|
-
|
|
2020
|
-
|
|
2704
|
+
// Look for natural break points, avoiding flush mid Kimi section
|
|
2705
|
+
if (!hasOpenKimiSection &&
|
|
2706
|
+
(textBuffer.includes('\n') ||
|
|
2707
|
+
textBuffer.endsWith('. ') ||
|
|
2708
|
+
textBuffer.endsWith('! ') ||
|
|
2709
|
+
textBuffer.endsWith('? ') ||
|
|
2710
|
+
textBuffer.length > 100)) {
|
|
2711
|
+
const parsedToolCalls = [];
|
|
2712
|
+
let workingText = textBuffer;
|
|
2713
|
+
// Extract <think> tags and ACCUMULATE instead of emitting immediately
|
|
2714
|
+
// This handles fragmented <think>word</think> streaming from Synthetic API
|
|
2715
|
+
// @plan PLAN-20251202-THINKING.P16
|
|
2716
|
+
// @requirement REQ-THINK-003
|
|
2717
|
+
const tagBasedThinking = this.extractThinkTagsAsBlock(workingText);
|
|
2718
|
+
if (tagBasedThinking) {
|
|
2719
|
+
// Accumulate thinking content - don't emit yet
|
|
2720
|
+
// Use newline to preserve formatting between chunks (not space)
|
|
2721
|
+
if (accumulatedThinkingContent.length > 0) {
|
|
2722
|
+
accumulatedThinkingContent += '\n';
|
|
2723
|
+
}
|
|
2724
|
+
accumulatedThinkingContent += tagBasedThinking.thought;
|
|
2725
|
+
logger.debug(() => `[Streaming] Accumulated thinking: ${accumulatedThinkingContent.length} chars total`);
|
|
2726
|
+
}
|
|
2727
|
+
const kimiParsed = this.extractKimiToolCallsFromText(workingText);
|
|
2728
|
+
if (kimiParsed.toolCalls.length > 0) {
|
|
2729
|
+
parsedToolCalls.push(...kimiParsed.toolCalls);
|
|
2730
|
+
logger.debug(() => `[OpenAIProvider] Streaming buffer (pipeline) parsed Kimi tool calls`, {
|
|
2731
|
+
count: kimiParsed.toolCalls.length,
|
|
2732
|
+
bufferLength: workingText.length,
|
|
2733
|
+
cleanedLength: kimiParsed.cleanedText.length,
|
|
2734
|
+
});
|
|
2735
|
+
}
|
|
2736
|
+
workingText = kimiParsed.cleanedText;
|
|
2737
|
+
const parsingText = this.sanitizeProviderText(workingText);
|
|
2738
|
+
let cleanedText = parsingText;
|
|
2739
|
+
try {
|
|
2740
|
+
const parsedResult = this.textToolParser.parse(parsingText);
|
|
2741
|
+
if (parsedResult.toolCalls.length > 0) {
|
|
2742
|
+
parsedToolCalls.push(...parsedResult.toolCalls.map((call) => ({
|
|
2743
|
+
type: 'tool_call',
|
|
2744
|
+
id: `text_tool_${Date.now()}_${Math.random()
|
|
2745
|
+
.toString(36)
|
|
2746
|
+
.substring(7)}`,
|
|
2747
|
+
name: this.normalizeToolName(call.name),
|
|
2748
|
+
parameters: call.arguments,
|
|
2749
|
+
})));
|
|
2750
|
+
cleanedText = parsedResult.cleanedContent;
|
|
2751
|
+
}
|
|
2752
|
+
}
|
|
2753
|
+
catch (error) {
|
|
2754
|
+
const logger = this.getLogger();
|
|
2755
|
+
logger.debug(() => `TextToolCallParser failed on buffered text: ${error}`);
|
|
2756
|
+
}
|
|
2757
|
+
// Emit accumulated thinking BEFORE tool calls or text content
|
|
2758
|
+
// This ensures thinking appears first in the response
|
|
2759
|
+
// @plan PLAN-20251202-THINKING.P16
|
|
2760
|
+
if (!hasEmittedThinking &&
|
|
2761
|
+
accumulatedThinkingContent.length > 0 &&
|
|
2762
|
+
(parsedToolCalls.length > 0 || cleanedText.trim().length > 0)) {
|
|
2763
|
+
yield {
|
|
2764
|
+
speaker: 'ai',
|
|
2765
|
+
blocks: [
|
|
2766
|
+
{
|
|
2767
|
+
type: 'thinking',
|
|
2768
|
+
thought: accumulatedThinkingContent,
|
|
2769
|
+
sourceField: 'think_tags',
|
|
2770
|
+
isHidden: false,
|
|
2771
|
+
},
|
|
2772
|
+
],
|
|
2773
|
+
};
|
|
2774
|
+
hasEmittedThinking = true;
|
|
2775
|
+
logger.debug(() => `[Streaming pipeline] Emitted accumulated thinking: ${accumulatedThinkingContent.length} chars`);
|
|
2776
|
+
}
|
|
2777
|
+
if (parsedToolCalls.length > 0) {
|
|
2778
|
+
yield {
|
|
2779
|
+
speaker: 'ai',
|
|
2780
|
+
blocks: parsedToolCalls,
|
|
2781
|
+
};
|
|
2782
|
+
}
|
|
2783
|
+
// Always use sanitized text to strip <think> tags (pipeline streaming)
|
|
2784
|
+
// Bug fix: Previously Kimi used unsanitized workingText
|
|
2785
|
+
// @plan PLAN-20251202-THINKING.P16
|
|
2786
|
+
if (cleanedText.trim().length > 0) {
|
|
2787
|
+
yield {
|
|
2788
|
+
speaker: 'ai',
|
|
2789
|
+
blocks: [
|
|
2790
|
+
{
|
|
2791
|
+
type: 'text',
|
|
2792
|
+
text: cleanedText,
|
|
2793
|
+
},
|
|
2794
|
+
],
|
|
2795
|
+
};
|
|
2796
|
+
}
|
|
2021
2797
|
textBuffer = '';
|
|
2022
2798
|
}
|
|
2023
2799
|
}
|
|
@@ -2097,19 +2873,45 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
2097
2873
|
}
|
|
2098
2874
|
// Check buffered text for <tool_call> format before flushing as plain text
|
|
2099
2875
|
if (textBuffer.length > 0) {
|
|
2100
|
-
|
|
2101
|
-
let
|
|
2102
|
-
|
|
2876
|
+
const parsedToolCalls = [];
|
|
2877
|
+
let workingText = textBuffer;
|
|
2878
|
+
// Note: Synthetic API sends reasoning via both reasoning_content AND content fields.
|
|
2879
|
+
// This is the model's behavior - we don't strip it since the model is the source.
|
|
2880
|
+
// The user can configure reasoning display settings if they don't want duplicates.
|
|
2881
|
+
// @plan PLAN-20251202-THINKING.P16
|
|
2882
|
+
// Extract any remaining <think> tags from final buffer
|
|
2883
|
+
// @plan PLAN-20251202-THINKING.P16
|
|
2884
|
+
const tagBasedThinking = this.extractThinkTagsAsBlock(workingText);
|
|
2885
|
+
if (tagBasedThinking) {
|
|
2886
|
+
// Use newline to preserve formatting between chunks (not space)
|
|
2887
|
+
if (accumulatedThinkingContent.length > 0) {
|
|
2888
|
+
accumulatedThinkingContent += '\n';
|
|
2889
|
+
}
|
|
2890
|
+
accumulatedThinkingContent += tagBasedThinking.thought;
|
|
2891
|
+
}
|
|
2892
|
+
const kimiParsed = this.extractKimiToolCallsFromText(workingText);
|
|
2893
|
+
if (kimiParsed.toolCalls.length > 0) {
|
|
2894
|
+
parsedToolCalls.push(...kimiParsed.toolCalls);
|
|
2895
|
+
this.getLogger().debug(() => `[OpenAIProvider] Final buffer flush (pipeline) parsed Kimi tool calls`, {
|
|
2896
|
+
count: kimiParsed.toolCalls.length,
|
|
2897
|
+
bufferLength: workingText.length,
|
|
2898
|
+
cleanedLength: kimiParsed.cleanedText.length,
|
|
2899
|
+
});
|
|
2900
|
+
}
|
|
2901
|
+
workingText = kimiParsed.cleanedText;
|
|
2902
|
+
const parsingText = this.sanitizeProviderText(workingText);
|
|
2903
|
+
let cleanedText = parsingText;
|
|
2103
2904
|
try {
|
|
2104
|
-
const parsedResult = this.textToolParser.parse(
|
|
2905
|
+
const parsedResult = this.textToolParser.parse(parsingText);
|
|
2105
2906
|
if (parsedResult.toolCalls.length > 0) {
|
|
2106
|
-
|
|
2107
|
-
parsedToolCalls = parsedResult.toolCalls.map((call) => ({
|
|
2907
|
+
parsedToolCalls.push(...parsedResult.toolCalls.map((call) => ({
|
|
2108
2908
|
type: 'tool_call',
|
|
2109
|
-
id: `text_tool_${Date.now()}_${Math.random()
|
|
2110
|
-
|
|
2909
|
+
id: `text_tool_${Date.now()}_${Math.random()
|
|
2910
|
+
.toString(36)
|
|
2911
|
+
.substring(7)}`,
|
|
2912
|
+
name: this.normalizeToolName(call.name),
|
|
2111
2913
|
parameters: call.arguments,
|
|
2112
|
-
}));
|
|
2914
|
+
})));
|
|
2113
2915
|
cleanedText = parsedResult.cleanedContent;
|
|
2114
2916
|
}
|
|
2115
2917
|
}
|
|
@@ -2117,14 +2919,33 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
2117
2919
|
const logger = this.getLogger();
|
|
2118
2920
|
logger.debug(() => `TextToolCallParser failed on buffered text: ${error}`);
|
|
2119
2921
|
}
|
|
2120
|
-
// Emit tool calls
|
|
2922
|
+
// Emit accumulated thinking BEFORE tool calls or text content
|
|
2923
|
+
// @plan PLAN-20251202-THINKING.P16
|
|
2924
|
+
if (!hasEmittedThinking &&
|
|
2925
|
+
accumulatedThinkingContent.length > 0 &&
|
|
2926
|
+
(parsedToolCalls.length > 0 || cleanedText.trim().length > 0)) {
|
|
2927
|
+
yield {
|
|
2928
|
+
speaker: 'ai',
|
|
2929
|
+
blocks: [
|
|
2930
|
+
{
|
|
2931
|
+
type: 'thinking',
|
|
2932
|
+
thought: accumulatedThinkingContent,
|
|
2933
|
+
sourceField: 'think_tags',
|
|
2934
|
+
isHidden: false,
|
|
2935
|
+
},
|
|
2936
|
+
],
|
|
2937
|
+
};
|
|
2938
|
+
hasEmittedThinking = true;
|
|
2939
|
+
}
|
|
2121
2940
|
if (parsedToolCalls.length > 0) {
|
|
2122
2941
|
yield {
|
|
2123
2942
|
speaker: 'ai',
|
|
2124
2943
|
blocks: parsedToolCalls,
|
|
2125
2944
|
};
|
|
2126
2945
|
}
|
|
2127
|
-
//
|
|
2946
|
+
// Always use sanitized text to strip <think> tags (pipeline final buffer)
|
|
2947
|
+
// Bug fix: Previously Kimi used unsanitized workingText
|
|
2948
|
+
// @plan PLAN-20251202-THINKING.P16
|
|
2128
2949
|
if (cleanedText.trim().length > 0) {
|
|
2129
2950
|
yield {
|
|
2130
2951
|
speaker: 'ai',
|
|
@@ -2138,6 +2959,39 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
2138
2959
|
}
|
|
2139
2960
|
textBuffer = '';
|
|
2140
2961
|
}
|
|
2962
|
+
// Emit any remaining accumulated thinking that wasn't emitted yet
|
|
2963
|
+
// (e.g., if entire response was just thinking with no content)
|
|
2964
|
+
// @plan PLAN-20251202-THINKING.P16
|
|
2965
|
+
if (!hasEmittedThinking && accumulatedThinkingContent.length > 0) {
|
|
2966
|
+
yield {
|
|
2967
|
+
speaker: 'ai',
|
|
2968
|
+
blocks: [
|
|
2969
|
+
{
|
|
2970
|
+
type: 'thinking',
|
|
2971
|
+
thought: accumulatedThinkingContent,
|
|
2972
|
+
sourceField: 'think_tags',
|
|
2973
|
+
isHidden: false,
|
|
2974
|
+
},
|
|
2975
|
+
],
|
|
2976
|
+
};
|
|
2977
|
+
hasEmittedThinking = true;
|
|
2978
|
+
}
|
|
2979
|
+
// Emit accumulated reasoning_content as ONE ThinkingBlock (pipeline path)
|
|
2980
|
+
// This consolidates token-by-token reasoning from Synthetic API into a single block
|
|
2981
|
+
// @plan PLAN-20251202-THINKING.P16
|
|
2982
|
+
if (accumulatedReasoningContent.length > 0) {
|
|
2983
|
+
yield {
|
|
2984
|
+
speaker: 'ai',
|
|
2985
|
+
blocks: [
|
|
2986
|
+
{
|
|
2987
|
+
type: 'thinking',
|
|
2988
|
+
thought: accumulatedReasoningContent,
|
|
2989
|
+
sourceField: 'reasoning_content',
|
|
2990
|
+
isHidden: false,
|
|
2991
|
+
},
|
|
2992
|
+
],
|
|
2993
|
+
};
|
|
2994
|
+
}
|
|
2141
2995
|
// Process and emit tool calls using the pipeline
|
|
2142
2996
|
const pipelineResult = await this.toolCallPipeline.process(abortSignal);
|
|
2143
2997
|
if (pipelineResult.normalized.length > 0 ||
|
|
@@ -2145,8 +2999,9 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
2145
2999
|
const blocks = [];
|
|
2146
3000
|
// Process successful tool calls
|
|
2147
3001
|
for (const normalizedCall of pipelineResult.normalized) {
|
|
3002
|
+
const sanitizedArgs = this.sanitizeToolArgumentsString(normalizedCall.originalArgs ?? normalizedCall.args);
|
|
2148
3003
|
// Process tool parameters with double-escape handling
|
|
2149
|
-
const processedParameters = processToolParameters(
|
|
3004
|
+
const processedParameters = processToolParameters(sanitizedArgs, normalizedCall.name);
|
|
2150
3005
|
blocks.push({
|
|
2151
3006
|
type: 'tool_call',
|
|
2152
3007
|
id: this.normalizeToHistoryToolId(`call_${normalizedCall.index}`),
|
|
@@ -2195,6 +3050,41 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
2195
3050
|
},
|
|
2196
3051
|
};
|
|
2197
3052
|
}
|
|
3053
|
+
// Detect and warn about empty streaming responses (common with Kimi K2 after tool calls)
|
|
3054
|
+
// Only warn if we truly got nothing - not even reasoning content
|
|
3055
|
+
const pipelineStats = this.toolCallPipeline.getStats();
|
|
3056
|
+
if (_accumulatedText.length === 0 &&
|
|
3057
|
+
pipelineStats.collector.totalCalls === 0 &&
|
|
3058
|
+
textBuffer.length === 0 &&
|
|
3059
|
+
accumulatedReasoningContent.length === 0 &&
|
|
3060
|
+
accumulatedThinkingContent.length === 0) {
|
|
3061
|
+
// Provide actionable guidance for users
|
|
3062
|
+
const isKimi = model.toLowerCase().includes('kimi');
|
|
3063
|
+
const isSynthetic = (baseURL ?? this.getBaseURL())?.includes('synthetic') ?? false;
|
|
3064
|
+
const troubleshooting = isKimi
|
|
3065
|
+
? isSynthetic
|
|
3066
|
+
? ' To fix: use streaming: "disabled" in your profile settings. Synthetic API streaming does not work reliably with tool calls.'
|
|
3067
|
+
: ' This provider may not support streaming with tool calls.'
|
|
3068
|
+
: ' Consider using streaming: "disabled" in your profile settings.';
|
|
3069
|
+
logger.warn(() => `[OpenAIProvider] Empty streaming response for model '${model}' (received ${allChunks.length} chunks with no content).${troubleshooting}`, {
|
|
3070
|
+
model,
|
|
3071
|
+
baseURL: baseURL ?? this.getBaseURL(),
|
|
3072
|
+
isKimiModel: isKimi,
|
|
3073
|
+
isSyntheticAPI: isSynthetic,
|
|
3074
|
+
totalChunksReceived: allChunks.length,
|
|
3075
|
+
});
|
|
3076
|
+
}
|
|
3077
|
+
else {
|
|
3078
|
+
// Log what we DID get for debugging
|
|
3079
|
+
logger.debug(() => `[Streaming pipeline] Stream completed with accumulated content`, {
|
|
3080
|
+
textLength: _accumulatedText.length,
|
|
3081
|
+
toolCallCount: pipelineStats.collector.totalCalls,
|
|
3082
|
+
textBufferLength: textBuffer.length,
|
|
3083
|
+
reasoningLength: accumulatedReasoningContent.length,
|
|
3084
|
+
thinkingLength: accumulatedThinkingContent.length,
|
|
3085
|
+
totalChunksReceived: allChunks.length,
|
|
3086
|
+
});
|
|
3087
|
+
}
|
|
2198
3088
|
}
|
|
2199
3089
|
else {
|
|
2200
3090
|
// Handle non-streaming response
|
|
@@ -2220,12 +3110,22 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
2220
3110
|
}
|
|
2221
3111
|
}
|
|
2222
3112
|
const blocks = [];
|
|
2223
|
-
// Handle text content
|
|
2224
|
-
|
|
2225
|
-
|
|
2226
|
-
|
|
2227
|
-
|
|
2228
|
-
|
|
3113
|
+
// Handle text content (strip thinking / reasoning blocks) and Kimi tool sections
|
|
3114
|
+
const pipelineRawMessageContent = this.coerceMessageContentToString(choice.message?.content);
|
|
3115
|
+
let pipelineKimiCleanContent;
|
|
3116
|
+
let pipelineKimiToolBlocks = [];
|
|
3117
|
+
if (pipelineRawMessageContent) {
|
|
3118
|
+
const kimiParsed = this.extractKimiToolCallsFromText(pipelineRawMessageContent);
|
|
3119
|
+
pipelineKimiCleanContent = kimiParsed.cleanedText;
|
|
3120
|
+
pipelineKimiToolBlocks = kimiParsed.toolCalls;
|
|
3121
|
+
// Always use sanitized text - even Kimi-K2 should have consistent tag stripping
|
|
3122
|
+
const cleanedText = this.sanitizeProviderText(pipelineKimiCleanContent);
|
|
3123
|
+
if (cleanedText) {
|
|
3124
|
+
blocks.push({
|
|
3125
|
+
type: 'text',
|
|
3126
|
+
text: cleanedText,
|
|
3127
|
+
});
|
|
3128
|
+
}
|
|
2229
3129
|
}
|
|
2230
3130
|
// Handle tool calls
|
|
2231
3131
|
if (choice.message?.tool_calls && choice.message.tool_calls.length > 0) {
|
|
@@ -2234,8 +3134,9 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
2234
3134
|
if (toolCall.type === 'function') {
|
|
2235
3135
|
// Normalize tool name for consistency with streaming path
|
|
2236
3136
|
const normalizedName = this.toolCallPipeline.normalizeToolName(toolCall.function.name, toolCall.function.arguments);
|
|
3137
|
+
const sanitizedArgs = this.sanitizeToolArgumentsString(toolCall.function.arguments);
|
|
2237
3138
|
// Process tool parameters with double-escape handling
|
|
2238
|
-
const processedParameters = processToolParameters(
|
|
3139
|
+
const processedParameters = processToolParameters(sanitizedArgs, normalizedName);
|
|
2239
3140
|
blocks.push({
|
|
2240
3141
|
type: 'tool_call',
|
|
2241
3142
|
id: this.normalizeToHistoryToolId(toolCall.id),
|
|
@@ -2245,42 +3146,48 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
2245
3146
|
}
|
|
2246
3147
|
}
|
|
2247
3148
|
}
|
|
3149
|
+
if (pipelineKimiToolBlocks.length > 0) {
|
|
3150
|
+
blocks.push(...pipelineKimiToolBlocks);
|
|
3151
|
+
this.getLogger().debug(() => `[OpenAIProvider] Non-stream pipeline added Kimi tool calls from text`, { count: pipelineKimiToolBlocks.length });
|
|
3152
|
+
}
|
|
2248
3153
|
// Additionally check for <tool_call> format in text content
|
|
2249
|
-
if (
|
|
2250
|
-
|
|
2251
|
-
|
|
2252
|
-
|
|
2253
|
-
|
|
2254
|
-
|
|
2255
|
-
|
|
2256
|
-
|
|
2257
|
-
|
|
2258
|
-
|
|
2259
|
-
|
|
2260
|
-
|
|
2261
|
-
|
|
2262
|
-
}
|
|
2263
|
-
// Update the text content to remove the tool call parts
|
|
2264
|
-
if (choice.message.content !== parsedResult.cleanedContent) {
|
|
2265
|
-
// Find the text block and update it
|
|
2266
|
-
const textBlockIndex = blocks.findIndex((block) => block.type === 'text');
|
|
2267
|
-
if (textBlockIndex >= 0) {
|
|
2268
|
-
blocks[textBlockIndex].text =
|
|
2269
|
-
parsedResult.cleanedContent;
|
|
2270
|
-
}
|
|
2271
|
-
else if (parsedResult.cleanedContent.trim()) {
|
|
2272
|
-
// Add cleaned text if it doesn't exist
|
|
2273
|
-
blocks.unshift({
|
|
2274
|
-
type: 'text',
|
|
2275
|
-
text: parsedResult.cleanedContent,
|
|
3154
|
+
if (pipelineKimiCleanContent) {
|
|
3155
|
+
const cleanedSource = this.sanitizeProviderText(pipelineKimiCleanContent);
|
|
3156
|
+
if (cleanedSource) {
|
|
3157
|
+
try {
|
|
3158
|
+
const parsedResult = this.textToolParser.parse(cleanedSource);
|
|
3159
|
+
if (parsedResult.toolCalls.length > 0) {
|
|
3160
|
+
// Add tool calls found in text content
|
|
3161
|
+
for (const call of parsedResult.toolCalls) {
|
|
3162
|
+
blocks.push({
|
|
3163
|
+
type: 'tool_call',
|
|
3164
|
+
id: `text_tool_${Date.now()}_${Math.random().toString(36).substring(7)}`,
|
|
3165
|
+
name: this.normalizeToolName(call.name),
|
|
3166
|
+
parameters: call.arguments,
|
|
2276
3167
|
});
|
|
2277
3168
|
}
|
|
3169
|
+
// Update the text content to remove the tool call parts
|
|
3170
|
+
if (choice.message.content !== parsedResult.cleanedContent) {
|
|
3171
|
+
// Find the text block and update it
|
|
3172
|
+
const textBlockIndex = blocks.findIndex((block) => block.type === 'text');
|
|
3173
|
+
if (textBlockIndex >= 0) {
|
|
3174
|
+
blocks[textBlockIndex].text =
|
|
3175
|
+
parsedResult.cleanedContent;
|
|
3176
|
+
}
|
|
3177
|
+
else if (parsedResult.cleanedContent.trim()) {
|
|
3178
|
+
// Add cleaned text if it doesn't exist
|
|
3179
|
+
blocks.unshift({
|
|
3180
|
+
type: 'text',
|
|
3181
|
+
text: parsedResult.cleanedContent,
|
|
3182
|
+
});
|
|
3183
|
+
}
|
|
3184
|
+
}
|
|
2278
3185
|
}
|
|
2279
3186
|
}
|
|
2280
|
-
|
|
2281
|
-
|
|
2282
|
-
|
|
2283
|
-
|
|
3187
|
+
catch (error) {
|
|
3188
|
+
const logger = this.getLogger();
|
|
3189
|
+
logger.debug(() => `TextToolCallParser failed on message content: ${error}`);
|
|
3190
|
+
}
|
|
2284
3191
|
}
|
|
2285
3192
|
}
|
|
2286
3193
|
// Emit the complete response as a single IContent
|
|
@@ -2338,19 +3245,25 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
2338
3245
|
}
|
|
2339
3246
|
/**
|
|
2340
3247
|
* Detects the tool call format based on the model being used
|
|
2341
|
-
* @returns The detected tool format ('openai' or '
|
|
3248
|
+
* @returns The detected tool format ('openai', 'qwen', or 'kimi')
|
|
2342
3249
|
*/
|
|
2343
3250
|
detectToolFormat() {
|
|
2344
3251
|
// Auto-detect based on model name if set to 'auto' or not set
|
|
2345
|
-
const modelName =
|
|
3252
|
+
const modelName = this.getModel() || this.getDefaultModel();
|
|
2346
3253
|
const logger = new DebugLogger('llxprt:provider:openai');
|
|
3254
|
+
// Check for Kimi K2 models (requires special ID format: functions.{name}:{index})
|
|
3255
|
+
if (isKimiModel(modelName)) {
|
|
3256
|
+
logger.debug(() => `Auto-detected 'kimi' format for K2 model: ${modelName}`);
|
|
3257
|
+
return 'kimi';
|
|
3258
|
+
}
|
|
3259
|
+
const lowerModelName = modelName.toLowerCase();
|
|
2347
3260
|
// Check for GLM-4 models (glm-4, glm-4.5, glm-4.6, glm-4-5, etc.)
|
|
2348
|
-
if (
|
|
3261
|
+
if (lowerModelName.includes('glm-4')) {
|
|
2349
3262
|
logger.debug(() => `Auto-detected 'qwen' format for GLM-4.x model: ${modelName}`);
|
|
2350
3263
|
return 'qwen';
|
|
2351
3264
|
}
|
|
2352
3265
|
// Check for qwen models
|
|
2353
|
-
if (
|
|
3266
|
+
if (lowerModelName.includes('qwen')) {
|
|
2354
3267
|
logger.debug(() => `Auto-detected 'qwen' format for Qwen model: ${modelName}`);
|
|
2355
3268
|
return 'qwen';
|
|
2356
3269
|
}
|
|
@@ -2421,5 +3334,61 @@ export class OpenAIProvider extends BaseProvider {
|
|
|
2421
3334
|
}
|
|
2422
3335
|
return shouldRetry;
|
|
2423
3336
|
}
|
|
3337
|
+
/**
|
|
3338
|
+
* Parse reasoning_content from streaming delta.
|
|
3339
|
+
*
|
|
3340
|
+
* @plan PLAN-20251202-THINKING.P11, PLAN-20251202-THINKING.P16
|
|
3341
|
+
* @requirement REQ-THINK-003.1, REQ-THINK-003.3, REQ-THINK-003.4
|
|
3342
|
+
*/
|
|
3343
|
+
parseStreamingReasoningDelta(delta) {
|
|
3344
|
+
if (!delta) {
|
|
3345
|
+
return null;
|
|
3346
|
+
}
|
|
3347
|
+
// Access reasoning_content via type assertion since OpenAI SDK doesn't declare it
|
|
3348
|
+
const reasoningContent = delta
|
|
3349
|
+
.reasoning_content;
|
|
3350
|
+
// Handle absent, null, or non-string
|
|
3351
|
+
if (!reasoningContent || typeof reasoningContent !== 'string') {
|
|
3352
|
+
return null;
|
|
3353
|
+
}
|
|
3354
|
+
// Handle empty string or whitespace-only
|
|
3355
|
+
if (reasoningContent.trim().length === 0) {
|
|
3356
|
+
return null;
|
|
3357
|
+
}
|
|
3358
|
+
return {
|
|
3359
|
+
type: 'thinking',
|
|
3360
|
+
thought: reasoningContent,
|
|
3361
|
+
sourceField: 'reasoning_content',
|
|
3362
|
+
isHidden: false,
|
|
3363
|
+
};
|
|
3364
|
+
}
|
|
3365
|
+
/**
|
|
3366
|
+
* Parse reasoning_content from non-streaming message.
|
|
3367
|
+
*
|
|
3368
|
+
* @plan PLAN-20251202-THINKING.P11, PLAN-20251202-THINKING.P16
|
|
3369
|
+
* @requirement REQ-THINK-003.2, REQ-THINK-003.3, REQ-THINK-003.4
|
|
3370
|
+
*/
|
|
3371
|
+
parseNonStreamingReasoning(message) {
|
|
3372
|
+
if (!message) {
|
|
3373
|
+
return null;
|
|
3374
|
+
}
|
|
3375
|
+
// Access reasoning_content via type assertion since OpenAI SDK doesn't declare it
|
|
3376
|
+
const reasoningContent = message
|
|
3377
|
+
.reasoning_content;
|
|
3378
|
+
// Handle absent, null, or non-string
|
|
3379
|
+
if (!reasoningContent || typeof reasoningContent !== 'string') {
|
|
3380
|
+
return null;
|
|
3381
|
+
}
|
|
3382
|
+
// Handle empty string or whitespace-only
|
|
3383
|
+
if (reasoningContent.trim().length === 0) {
|
|
3384
|
+
return null;
|
|
3385
|
+
}
|
|
3386
|
+
return {
|
|
3387
|
+
type: 'thinking',
|
|
3388
|
+
thought: reasoningContent,
|
|
3389
|
+
sourceField: 'reasoning_content',
|
|
3390
|
+
isHidden: false,
|
|
3391
|
+
};
|
|
3392
|
+
}
|
|
2424
3393
|
}
|
|
2425
3394
|
//# sourceMappingURL=OpenAIProvider.js.map
|