graphlit-client 1.0.20250622007 → 1.0.20250627001
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +235 -5
- package/dist/client.d.ts +26 -1
- package/dist/client.js +293 -55
- package/dist/generated/graphql-documents.d.ts +21 -0
- package/dist/generated/graphql-documents.js +1378 -0
- package/dist/generated/graphql-types.d.ts +2660 -53
- package/dist/generated/graphql-types.js +119 -0
- package/dist/streaming/llm-formatters.js +68 -5
- package/dist/streaming/providers.d.ts +18 -13
- package/dist/streaming/providers.js +690 -167
- package/dist/streaming/ui-event-adapter.d.ts +7 -0
- package/dist/streaming/ui-event-adapter.js +55 -0
- package/dist/types/internal.d.ts +11 -0
- package/dist/types/ui-events.d.ts +9 -0
- package/package.json +1 -1
@@ -1,3 +1,4 @@
|
|
1
|
+
import * as Types from "../generated/graphql-types.js";
|
1
2
|
import { getModelName } from "../model-mapping.js";
|
2
3
|
/**
|
3
4
|
* Helper to check if a string is valid JSON
|
@@ -75,7 +76,7 @@ function cleanSchemaForGoogle(schema) {
|
|
75
76
|
* Stream with OpenAI SDK
|
76
77
|
*/
|
77
78
|
export async function streamWithOpenAI(specification, messages, tools, openaiClient, // OpenAI client instance
|
78
|
-
onEvent, onComplete) {
|
79
|
+
onEvent, onComplete, abortSignal) {
|
79
80
|
let fullMessage = "";
|
80
81
|
let toolCalls = [];
|
81
82
|
// Performance metrics
|
@@ -131,7 +132,10 @@ onEvent, onComplete) {
|
|
131
132
|
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
132
133
|
console.log(`⏱️ [OpenAI] Starting LLM call at: ${new Date().toISOString()}`);
|
133
134
|
}
|
134
|
-
const stream = await openaiClient.chat.completions.create(
|
135
|
+
const stream = await openaiClient.chat.completions.create({
|
136
|
+
...streamConfig,
|
137
|
+
...(abortSignal && { signal: abortSignal }),
|
138
|
+
});
|
135
139
|
for await (const chunk of stream) {
|
136
140
|
const delta = chunk.choices[0]?.delta;
|
137
141
|
// Debug log chunk details
|
@@ -379,11 +383,8 @@ onEvent, onComplete) {
|
|
379
383
|
throw error;
|
380
384
|
}
|
381
385
|
}
|
382
|
-
|
383
|
-
|
384
|
-
*/
|
385
|
-
export async function streamWithAnthropic(specification, messages, systemPrompt, tools, anthropicClient, // Anthropic client instance
|
386
|
-
onEvent, onComplete) {
|
386
|
+
export async function streamWithAnthropic(specification, messages, systemPrompt, tools, anthropicClient, // Properly typed Anthropic client
|
387
|
+
onEvent, onComplete, abortSignal, thinkingConfig) {
|
387
388
|
let fullMessage = "";
|
388
389
|
let toolCalls = [];
|
389
390
|
// Performance metrics
|
@@ -413,14 +414,29 @@ onEvent, onComplete) {
|
|
413
414
|
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
414
415
|
console.log(`🤖 [Anthropic] Model Config: Service=Anthropic | Model=${modelName} | Temperature=${specification.anthropic?.temperature} | MaxTokens=${specification.anthropic?.completionTokenLimit || 8192} | SystemPrompt=${systemPrompt ? "Yes" : "No"} | Tools=${tools?.length || 0} | Spec="${specification.name}"`);
|
415
416
|
}
|
417
|
+
// Use proper Anthropic SDK types for the config
|
416
418
|
const streamConfig = {
|
417
419
|
model: modelName,
|
418
420
|
messages,
|
419
421
|
stream: true,
|
420
|
-
temperature: specification.anthropic?.temperature,
|
421
|
-
//top_p: specification.anthropic?.probability,
|
422
422
|
max_tokens: specification.anthropic?.completionTokenLimit || 8192, // required
|
423
423
|
};
|
424
|
+
// Handle temperature based on thinking configuration
|
425
|
+
if (thinkingConfig) {
|
426
|
+
// When thinking is enabled, temperature must be 1
|
427
|
+
streamConfig.temperature = 1;
|
428
|
+
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
429
|
+
console.log(`🧠 [Anthropic] Setting temperature to 1 (required for extended thinking)`);
|
430
|
+
}
|
431
|
+
}
|
432
|
+
else {
|
433
|
+
// Only add temperature if it's defined and valid for non-thinking requests
|
434
|
+
if (specification.anthropic?.temperature !== undefined &&
|
435
|
+
specification.anthropic?.temperature !== null &&
|
436
|
+
typeof specification.anthropic?.temperature === "number") {
|
437
|
+
streamConfig.temperature = specification.anthropic.temperature;
|
438
|
+
}
|
439
|
+
}
|
424
440
|
if (systemPrompt) {
|
425
441
|
streamConfig.system = systemPrompt;
|
426
442
|
}
|
@@ -432,11 +448,31 @@ onEvent, onComplete) {
|
|
432
448
|
input_schema: tool.schema ? JSON.parse(tool.schema) : {},
|
433
449
|
}));
|
434
450
|
}
|
451
|
+
// Add thinking config if provided
|
452
|
+
if (thinkingConfig) {
|
453
|
+
streamConfig.thinking = thinkingConfig;
|
454
|
+
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
455
|
+
console.log(`🧠 [Anthropic] Extended thinking enabled | Budget: ${thinkingConfig.budget_tokens} tokens`);
|
456
|
+
}
|
457
|
+
// Adjust max_tokens to account for thinking budget
|
458
|
+
const totalTokens = streamConfig.max_tokens + thinkingConfig.budget_tokens;
|
459
|
+
if (totalTokens > 200000) {
|
460
|
+
// Claude's context window limit
|
461
|
+
console.warn(`⚠️ [Anthropic] Total tokens (${totalTokens}) exceeds context window, adjusting completion tokens...`);
|
462
|
+
streamConfig.max_tokens = Math.max(1000, 200000 - thinkingConfig.budget_tokens);
|
463
|
+
}
|
464
|
+
}
|
435
465
|
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
436
466
|
console.log(`⏱️ [Anthropic] Starting LLM call at: ${new Date().toISOString()}`);
|
437
467
|
}
|
438
|
-
const stream = await anthropicClient.messages.create(streamConfig);
|
468
|
+
const stream = await anthropicClient.messages.create(streamConfig, abortSignal ? { signal: abortSignal } : undefined);
|
439
469
|
let activeContentBlock = false;
|
470
|
+
let currentContentBlockIndex;
|
471
|
+
let currentContentBlockType;
|
472
|
+
let thinkingContent = "";
|
473
|
+
let thinkingSignature = "";
|
474
|
+
let completeThinkingContent = ""; // Accumulate all thinking content for conversation history
|
475
|
+
let completeThinkingSignature = ""; // Accumulate signature for conversation history
|
440
476
|
for await (const chunk of stream) {
|
441
477
|
// Debug log all chunk types
|
442
478
|
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
@@ -444,7 +480,21 @@ onEvent, onComplete) {
|
|
444
480
|
}
|
445
481
|
if (chunk.type === "content_block_start") {
|
446
482
|
activeContentBlock = true;
|
447
|
-
|
483
|
+
currentContentBlockIndex = chunk.index;
|
484
|
+
currentContentBlockType = chunk.content_block.type;
|
485
|
+
if (chunk.content_block.type === "thinking") {
|
486
|
+
// Start of thinking block (native extended thinking)
|
487
|
+
thinkingContent = "";
|
488
|
+
thinkingSignature = "";
|
489
|
+
onEvent({
|
490
|
+
type: "reasoning_start",
|
491
|
+
format: "thinking_tag",
|
492
|
+
});
|
493
|
+
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
494
|
+
console.log("[Anthropic] Extended thinking block started");
|
495
|
+
}
|
496
|
+
}
|
497
|
+
else if (chunk.content_block.type === "tool_use") {
|
448
498
|
const toolCall = {
|
449
499
|
id: chunk.content_block.id,
|
450
500
|
name: chunk.content_block.name,
|
@@ -477,7 +527,33 @@ onEvent, onComplete) {
|
|
477
527
|
}
|
478
528
|
}
|
479
529
|
else if (chunk.type === "content_block_delta") {
|
480
|
-
|
530
|
+
// Handle thinking blocks with native extended thinking
|
531
|
+
if (chunk.delta.type === "thinking_delta" &&
|
532
|
+
"thinking" in chunk.delta) {
|
533
|
+
// Accumulate thinking content
|
534
|
+
thinkingContent += chunk.delta.thinking;
|
535
|
+
// Track first token time
|
536
|
+
if (firstTokenTime === 0) {
|
537
|
+
firstTokenTime = Date.now() - startTime;
|
538
|
+
}
|
539
|
+
onEvent({
|
540
|
+
type: "reasoning_delta",
|
541
|
+
content: chunk.delta.thinking,
|
542
|
+
format: "thinking_tag",
|
543
|
+
});
|
544
|
+
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
545
|
+
console.log(`[Anthropic] Thinking delta: "${chunk.delta.thinking}"`);
|
546
|
+
}
|
547
|
+
}
|
548
|
+
else if (chunk.delta.type === "signature_delta" &&
|
549
|
+
"signature" in chunk.delta) {
|
550
|
+
// Handle signature for thinking blocks
|
551
|
+
thinkingSignature += chunk.delta.signature;
|
552
|
+
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
553
|
+
console.log(`[Anthropic] Signature delta: "${chunk.delta.signature}"`);
|
554
|
+
}
|
555
|
+
}
|
556
|
+
else if (chunk.delta.type === "text_delta" && "text" in chunk.delta) {
|
481
557
|
fullMessage += chunk.delta.text;
|
482
558
|
tokenCount++;
|
483
559
|
const currentTime = Date.now();
|
@@ -531,9 +607,39 @@ onEvent, onComplete) {
|
|
531
607
|
}
|
532
608
|
else if (chunk.type === "content_block_stop") {
|
533
609
|
activeContentBlock = false;
|
610
|
+
// Check if we're stopping a thinking block
|
611
|
+
if (currentContentBlockType === "thinking" &&
|
612
|
+
chunk.index === currentContentBlockIndex) {
|
613
|
+
// Emit the complete thinking block with signature
|
614
|
+
onEvent({
|
615
|
+
type: "reasoning_end",
|
616
|
+
fullContent: thinkingContent,
|
617
|
+
signature: thinkingSignature || undefined,
|
618
|
+
});
|
619
|
+
// Accumulate thinking content and signature for conversation history preservation
|
620
|
+
if (thinkingContent.trim()) {
|
621
|
+
completeThinkingContent += thinkingContent;
|
622
|
+
}
|
623
|
+
if (thinkingSignature.trim()) {
|
624
|
+
completeThinkingSignature = thinkingSignature; // Use the last signature
|
625
|
+
}
|
626
|
+
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
627
|
+
console.log(`[Anthropic] Thinking block completed:`, {
|
628
|
+
contentLength: thinkingContent.length,
|
629
|
+
hasSignature: !!thinkingSignature,
|
630
|
+
signature: thinkingSignature,
|
631
|
+
totalThinkingLength: completeThinkingContent.length,
|
632
|
+
});
|
633
|
+
}
|
634
|
+
// Reset current thinking state (but keep completeThinkingContent)
|
635
|
+
thinkingContent = "";
|
636
|
+
thinkingSignature = "";
|
637
|
+
}
|
638
|
+
currentContentBlockType = undefined;
|
639
|
+
currentContentBlockIndex = undefined;
|
534
640
|
// Tool call complete
|
535
641
|
const currentTool = toolCalls[toolCalls.length - 1];
|
536
|
-
if (currentTool) {
|
642
|
+
if (currentTool && chunk.content_block?.type === "tool_use") {
|
537
643
|
const currentTime = Date.now();
|
538
644
|
// Update tool metrics
|
539
645
|
const toolIndex = toolCalls.length - 1;
|
@@ -682,7 +788,19 @@ onEvent, onComplete) {
|
|
682
788
|
}
|
683
789
|
console.log(`✅ [Anthropic] Final message (${fullMessage.length} chars): "${fullMessage}"`);
|
684
790
|
}
|
685
|
-
|
791
|
+
// Include thinking content in the final message for conversation history preservation
|
792
|
+
let finalMessage = fullMessage;
|
793
|
+
if (completeThinkingContent.trim()) {
|
794
|
+
// Wrap thinking content with signature in special tags that formatMessagesForAnthropic can parse
|
795
|
+
const thinkingBlock = completeThinkingSignature.trim()
|
796
|
+
? `<thinking signature="${completeThinkingSignature}">${completeThinkingContent}</thinking>`
|
797
|
+
: `<thinking>${completeThinkingContent}</thinking>`;
|
798
|
+
finalMessage = `${thinkingBlock}${fullMessage}`;
|
799
|
+
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
800
|
+
console.log(`🧠 [Anthropic] Including thinking content (${completeThinkingContent.length} chars) and signature (${completeThinkingSignature.length} chars) in conversation history`);
|
801
|
+
}
|
802
|
+
}
|
803
|
+
onComplete(finalMessage, validToolCalls);
|
686
804
|
}
|
687
805
|
catch (error) {
|
688
806
|
// Handle Anthropic-specific errors
|
@@ -717,7 +835,7 @@ onEvent, onComplete) {
|
|
717
835
|
* Stream with Google SDK
|
718
836
|
*/
|
719
837
|
export async function streamWithGoogle(specification, messages, systemPrompt, tools, googleClient, // Google GenerativeAI client instance
|
720
|
-
onEvent, onComplete) {
|
838
|
+
onEvent, onComplete, abortSignal) {
|
721
839
|
let fullMessage = "";
|
722
840
|
let toolCalls = [];
|
723
841
|
// Performance metrics
|
@@ -1070,19 +1188,35 @@ onEvent, onComplete) {
|
|
1070
1188
|
* Stream with Groq SDK (OpenAI-compatible)
|
1071
1189
|
*/
|
1072
1190
|
export async function streamWithGroq(specification, messages, tools, groqClient, // Groq client instance (OpenAI-compatible)
|
1073
|
-
onEvent, onComplete) {
|
1191
|
+
onEvent, onComplete, abortSignal) {
|
1074
1192
|
try {
|
1075
1193
|
const modelName = getModelName(specification);
|
1076
1194
|
// Filter or simplify tools for Groq models that have issues
|
1077
1195
|
let groqTools = tools;
|
1078
1196
|
if (tools && tools.length > 0) {
|
1079
|
-
//
|
1197
|
+
// Some models have tool calling issues - provide fallback prompt
|
1198
|
+
const problemModels = [
|
1199
|
+
"llama-3.3",
|
1200
|
+
"LLAMA_3_3",
|
1201
|
+
"llama3-groq-70b",
|
1202
|
+
"llama3-groq-8b",
|
1203
|
+
];
|
1080
1204
|
if (modelName &&
|
1081
|
-
|
1205
|
+
problemModels.some((model) => modelName.toLowerCase().includes(model.toLowerCase()))) {
|
1082
1206
|
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
1083
|
-
console.log(`⚠️ [Groq]
|
1207
|
+
console.log(`⚠️ [Groq] Model ${modelName} has limited tool support - using simplified schemas`);
|
1084
1208
|
}
|
1085
|
-
|
1209
|
+
// Don't disable tools entirely, but simplify them more aggressively
|
1210
|
+
groqTools = tools.map((tool) => ({
|
1211
|
+
...tool,
|
1212
|
+
schema: tool.schema
|
1213
|
+
? JSON.stringify({
|
1214
|
+
type: "object",
|
1215
|
+
properties: JSON.parse(tool.schema).properties || {},
|
1216
|
+
required: JSON.parse(tool.schema).required || [],
|
1217
|
+
})
|
1218
|
+
: tool.schema,
|
1219
|
+
}));
|
1086
1220
|
}
|
1087
1221
|
else {
|
1088
1222
|
// For other models, simplify complex schemas
|
@@ -1095,7 +1229,7 @@ onEvent, onComplete) {
|
|
1095
1229
|
}
|
1096
1230
|
}
|
1097
1231
|
// Groq uses the same API as OpenAI, so we can reuse the OpenAI streaming logic
|
1098
|
-
return await streamWithOpenAI(specification, messages, groqTools, groqClient, onEvent, onComplete);
|
1232
|
+
return await streamWithOpenAI(specification, messages, groqTools, groqClient, onEvent, onComplete, abortSignal);
|
1099
1233
|
}
|
1100
1234
|
catch (error) {
|
1101
1235
|
// Handle Groq-specific errors
|
@@ -1126,10 +1260,42 @@ onEvent, onComplete) {
|
|
1126
1260
|
* Stream with Cerebras SDK (OpenAI-compatible)
|
1127
1261
|
*/
|
1128
1262
|
export async function streamWithCerebras(specification, messages, tools, cerebrasClient, // OpenAI client instance configured for Cerebras
|
1129
|
-
onEvent, onComplete) {
|
1263
|
+
onEvent, onComplete, abortSignal) {
|
1130
1264
|
try {
|
1265
|
+
const modelName = getModelName(specification);
|
1266
|
+
// Cerebras has very limited tool support
|
1267
|
+
let cerebrasTools = tools;
|
1268
|
+
let filteredMessages = messages;
|
1269
|
+
if (modelName) {
|
1270
|
+
const isQwen = modelName.toLowerCase().includes("qwen-3-32b");
|
1271
|
+
if (tools && tools.length > 0) {
|
1272
|
+
if (!isQwen) {
|
1273
|
+
// Only qwen-3-32b supports tools
|
1274
|
+
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
1275
|
+
console.log(`⚠️ [Cerebras] Disabling tools for ${modelName} - only qwen-3-32b supports tools`);
|
1276
|
+
}
|
1277
|
+
cerebrasTools = undefined;
|
1278
|
+
}
|
1279
|
+
}
|
1280
|
+
// For non-qwen models, we need to filter out any assistant messages with tool_calls
|
1281
|
+
if (!isQwen) {
|
1282
|
+
filteredMessages = messages.map((msg) => {
|
1283
|
+
if (msg.role === "assistant" &&
|
1284
|
+
msg.tool_calls &&
|
1285
|
+
msg.tool_calls.length > 0) {
|
1286
|
+
// Remove tool_calls from assistant messages for non-qwen models
|
1287
|
+
const { tool_calls, ...msgWithoutTools } = msg;
|
1288
|
+
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
1289
|
+
console.log(`⚠️ [Cerebras] Removing tool_calls from assistant message for ${modelName}`);
|
1290
|
+
}
|
1291
|
+
return msgWithoutTools;
|
1292
|
+
}
|
1293
|
+
return msg;
|
1294
|
+
});
|
1295
|
+
}
|
1296
|
+
}
|
1131
1297
|
// Cerebras uses the same API as OpenAI, so we can reuse the OpenAI streaming logic
|
1132
|
-
return await streamWithOpenAI(specification,
|
1298
|
+
return await streamWithOpenAI(specification, filteredMessages, cerebrasTools, cerebrasClient, onEvent, onComplete, abortSignal);
|
1133
1299
|
}
|
1134
1300
|
catch (error) {
|
1135
1301
|
// Handle Cerebras-specific 429 errors
|
@@ -1149,9 +1315,22 @@ onEvent, onComplete) {
|
|
1149
1315
|
* Stream with Deepseek SDK (OpenAI-compatible)
|
1150
1316
|
*/
|
1151
1317
|
export async function streamWithDeepseek(specification, messages, tools, deepseekClient, // OpenAI client instance configured for Deepseek
|
1152
|
-
onEvent, onComplete) {
|
1318
|
+
onEvent, onComplete, abortSignal) {
|
1153
1319
|
let fullMessage = "";
|
1154
1320
|
let toolCalls = [];
|
1321
|
+
// Reasoning detection state
|
1322
|
+
let reasoningLines = [];
|
1323
|
+
let currentLine = "";
|
1324
|
+
const REASONING_PATTERNS = [
|
1325
|
+
/^🤔\s*Reasoning:/i,
|
1326
|
+
/^\*\*Step\s+\d+:/i,
|
1327
|
+
/^\*\*Reasoning:/i,
|
1328
|
+
/^\*\*Analysis:/i,
|
1329
|
+
/^\*\*Thought\s+\d+:/i,
|
1330
|
+
/^\*\*Consideration:/i,
|
1331
|
+
];
|
1332
|
+
let isInReasoning = false;
|
1333
|
+
let hasEmittedReasoningStart = false;
|
1155
1334
|
// Performance metrics
|
1156
1335
|
const startTime = Date.now();
|
1157
1336
|
let firstTokenTime = 0;
|
@@ -1219,7 +1398,10 @@ onEvent, onComplete) {
|
|
1219
1398
|
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
1220
1399
|
console.log(`⏱️ [Deepseek] Starting LLM call at: ${new Date().toISOString()}`);
|
1221
1400
|
}
|
1222
|
-
const stream = await deepseekClient.chat.completions.create(
|
1401
|
+
const stream = await deepseekClient.chat.completions.create({
|
1402
|
+
...streamConfig,
|
1403
|
+
...(abortSignal && { signal: abortSignal }),
|
1404
|
+
});
|
1223
1405
|
for await (const chunk of stream) {
|
1224
1406
|
const delta = chunk.choices[0]?.delta;
|
1225
1407
|
if (!delta)
|
@@ -1238,15 +1420,78 @@ onEvent, onComplete) {
|
|
1238
1420
|
// Handle message content
|
1239
1421
|
if (delta.content) {
|
1240
1422
|
tokenCount++;
|
1241
|
-
fullMessage += delta.content;
|
1242
1423
|
// Track first meaningful content
|
1243
1424
|
if (firstMeaningfulContentTime === 0 && fullMessage.trim().length > 0) {
|
1244
1425
|
firstMeaningfulContentTime = currentTime - startTime;
|
1245
1426
|
}
|
1246
|
-
|
1247
|
-
|
1248
|
-
|
1249
|
-
|
1427
|
+
// Process content for reasoning detection
|
1428
|
+
const content = delta.content;
|
1429
|
+
// Build current line for pattern matching
|
1430
|
+
for (const char of content) {
|
1431
|
+
if (char === "\n") {
|
1432
|
+
// Check if this line starts a reasoning section
|
1433
|
+
const trimmedLine = currentLine.trim();
|
1434
|
+
const isReasoningLine = REASONING_PATTERNS.some((pattern) => pattern.test(trimmedLine));
|
1435
|
+
if (isReasoningLine && !isInReasoning) {
|
1436
|
+
// Start reasoning mode
|
1437
|
+
isInReasoning = true;
|
1438
|
+
if (!hasEmittedReasoningStart) {
|
1439
|
+
onEvent({ type: "reasoning_start", format: "markdown" });
|
1440
|
+
hasEmittedReasoningStart = true;
|
1441
|
+
}
|
1442
|
+
reasoningLines.push(currentLine);
|
1443
|
+
onEvent({
|
1444
|
+
type: "reasoning_delta",
|
1445
|
+
content: currentLine + "\n",
|
1446
|
+
format: "markdown",
|
1447
|
+
});
|
1448
|
+
}
|
1449
|
+
else if (isInReasoning) {
|
1450
|
+
// Continue reasoning if line is indented or continues the pattern
|
1451
|
+
if (currentLine.startsWith(" ") ||
|
1452
|
+
currentLine.startsWith("\t") ||
|
1453
|
+
currentLine.trim().startsWith("**") ||
|
1454
|
+
currentLine.trim() === "") {
|
1455
|
+
reasoningLines.push(currentLine);
|
1456
|
+
onEvent({
|
1457
|
+
type: "reasoning_delta",
|
1458
|
+
content: currentLine + "\n",
|
1459
|
+
format: "markdown",
|
1460
|
+
});
|
1461
|
+
}
|
1462
|
+
else {
|
1463
|
+
// End reasoning mode
|
1464
|
+
isInReasoning = false;
|
1465
|
+
onEvent({
|
1466
|
+
type: "reasoning_end",
|
1467
|
+
fullContent: reasoningLines.join("\n"),
|
1468
|
+
});
|
1469
|
+
// This line is normal content
|
1470
|
+
fullMessage += currentLine + "\n";
|
1471
|
+
onEvent({ type: "token", token: currentLine + "\n" });
|
1472
|
+
}
|
1473
|
+
}
|
1474
|
+
else {
|
1475
|
+
// Normal content
|
1476
|
+
fullMessage += currentLine + "\n";
|
1477
|
+
onEvent({ type: "token", token: currentLine + "\n" });
|
1478
|
+
}
|
1479
|
+
currentLine = "";
|
1480
|
+
}
|
1481
|
+
else {
|
1482
|
+
currentLine += char;
|
1483
|
+
}
|
1484
|
+
}
|
1485
|
+
// Handle partial line
|
1486
|
+
if (currentLine && !isInReasoning) {
|
1487
|
+
// For partial lines, emit as normal content
|
1488
|
+
fullMessage += currentLine;
|
1489
|
+
onEvent({ type: "token", token: currentLine });
|
1490
|
+
currentLine = "";
|
1491
|
+
}
|
1492
|
+
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
1493
|
+
console.log(`[Deepseek] Token #${tokenCount}: "${delta.content}" | Accumulated: ${fullMessage.length} chars`);
|
1494
|
+
}
|
1250
1495
|
// Performance metrics tracking (internal only)
|
1251
1496
|
if (tokenCount % 10 === 0) {
|
1252
1497
|
const totalTokens = tokenCount + toolArgumentTokens;
|
@@ -1299,6 +1544,25 @@ onEvent, onComplete) {
|
|
1299
1544
|
}
|
1300
1545
|
}
|
1301
1546
|
}
|
1547
|
+
// Handle any remaining content
|
1548
|
+
if (currentLine) {
|
1549
|
+
if (isInReasoning) {
|
1550
|
+
reasoningLines.push(currentLine);
|
1551
|
+
onEvent({
|
1552
|
+
type: "reasoning_delta",
|
1553
|
+
content: currentLine,
|
1554
|
+
format: "markdown",
|
1555
|
+
});
|
1556
|
+
onEvent({
|
1557
|
+
type: "reasoning_end",
|
1558
|
+
fullContent: reasoningLines.join("\n"),
|
1559
|
+
});
|
1560
|
+
}
|
1561
|
+
else {
|
1562
|
+
fullMessage += currentLine;
|
1563
|
+
onEvent({ type: "token", token: currentLine });
|
1564
|
+
}
|
1565
|
+
}
|
1302
1566
|
// Process completed tool calls
|
1303
1567
|
const validToolCalls = toolCalls.filter((tc, idx) => {
|
1304
1568
|
if (!isValidJSON(tc.arguments)) {
|
@@ -1353,7 +1617,7 @@ onEvent, onComplete) {
|
|
1353
1617
|
* Stream with Cohere SDK
|
1354
1618
|
*/
|
1355
1619
|
export async function streamWithCohere(specification, messages, tools, cohereClient, // CohereClient instance
|
1356
|
-
onEvent, onComplete) {
|
1620
|
+
onEvent, onComplete, abortSignal) {
|
1357
1621
|
let fullMessage = "";
|
1358
1622
|
let toolCalls = [];
|
1359
1623
|
// Performance metrics
|
@@ -1372,107 +1636,89 @@ onEvent, onComplete) {
|
|
1372
1636
|
console.log(`🔍 [Cohere] Messages array length: ${messages.length}`);
|
1373
1637
|
console.log(`🔍 [Cohere] All messages:`, JSON.stringify(messages, null, 2));
|
1374
1638
|
}
|
1639
|
+
// V2 API validation
|
1375
1640
|
if (messages.length === 0) {
|
1376
1641
|
throw new Error("No messages found for Cohere streaming");
|
1377
1642
|
}
|
1378
|
-
|
1379
|
-
//
|
1380
|
-
|
1381
|
-
|
1382
|
-
|
1383
|
-
|
1384
|
-
|
1385
|
-
|
1386
|
-
|
1387
|
-
|
1388
|
-
|
1643
|
+
const v2Messages = [];
|
1644
|
+
// Map our GraphQL role types to Cohere v2 role strings
|
1645
|
+
messages.forEach((msg) => {
|
1646
|
+
switch (msg.role) {
|
1647
|
+
case Types.ConversationRoleTypes.System:
|
1648
|
+
v2Messages.push({
|
1649
|
+
role: "system",
|
1650
|
+
content: msg.message || "",
|
1651
|
+
});
|
1652
|
+
break;
|
1653
|
+
case Types.ConversationRoleTypes.User:
|
1654
|
+
v2Messages.push({
|
1655
|
+
role: "user",
|
1656
|
+
content: msg.message || "",
|
1657
|
+
});
|
1658
|
+
break;
|
1659
|
+
case Types.ConversationRoleTypes.Assistant:
|
1660
|
+
const assistantMsg = {
|
1661
|
+
role: "assistant",
|
1662
|
+
content: msg.message || "",
|
1663
|
+
};
|
1664
|
+
// V2 uses camelCase toolCalls
|
1665
|
+
if (msg.toolCalls && msg.toolCalls.length > 0) {
|
1666
|
+
// Convert our internal tool call format to Cohere V2 format
|
1667
|
+
assistantMsg.toolCalls = msg.toolCalls
|
1668
|
+
.filter((tc) => tc !== null)
|
1669
|
+
.map((tc) => ({
|
1670
|
+
id: tc.id,
|
1671
|
+
type: "function",
|
1672
|
+
function: {
|
1673
|
+
name: tc.name,
|
1674
|
+
arguments: tc.arguments,
|
1675
|
+
},
|
1676
|
+
}));
|
1677
|
+
}
|
1678
|
+
v2Messages.push(assistantMsg);
|
1679
|
+
break;
|
1680
|
+
case Types.ConversationRoleTypes.Tool:
|
1681
|
+
// Tool messages need the tool call ID
|
1682
|
+
const toolCallId = msg.toolCallId || "";
|
1683
|
+
if (!toolCallId && process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
1684
|
+
console.warn(`[Cohere] Tool message missing toolCallId:`, {
|
1685
|
+
message: msg.message?.substring(0, 50),
|
1686
|
+
});
|
1687
|
+
}
|
1688
|
+
v2Messages.push({
|
1689
|
+
role: "tool",
|
1690
|
+
content: msg.message || "",
|
1691
|
+
toolCallId: toolCallId,
|
1692
|
+
});
|
1693
|
+
break;
|
1694
|
+
default:
|
1695
|
+
console.warn(`[Cohere] Unknown message role: ${msg.role}, treating as user`);
|
1696
|
+
v2Messages.push({
|
1697
|
+
role: "user",
|
1698
|
+
content: msg.message || "",
|
1699
|
+
});
|
1700
|
+
}
|
1701
|
+
});
|
1389
1702
|
const streamConfig = {
|
1390
1703
|
model: modelName,
|
1391
|
-
|
1704
|
+
messages: v2Messages,
|
1705
|
+
stream: true,
|
1392
1706
|
};
|
1393
|
-
// Add system message as preamble if present
|
1394
|
-
if (systemMessages.length > 0) {
|
1395
|
-
// Combine all system messages into preamble
|
1396
|
-
streamConfig.preamble = systemMessages
|
1397
|
-
.map((msg) => msg.message)
|
1398
|
-
.join("\n\n");
|
1399
|
-
}
|
1400
|
-
// Add chat history if there are previous messages
|
1401
|
-
if (chatHistory.length > 0) {
|
1402
|
-
// Build properly typed chat history using Cohere SDK Message types
|
1403
|
-
// Note: SYSTEM messages are already filtered out and handled as preamble
|
1404
|
-
const cohereHistory = chatHistory.map((msg) => {
|
1405
|
-
switch (msg.role) {
|
1406
|
-
case "USER":
|
1407
|
-
return {
|
1408
|
-
role: "USER",
|
1409
|
-
message: msg.message,
|
1410
|
-
};
|
1411
|
-
case "CHATBOT":
|
1412
|
-
const chatbotMsg = {
|
1413
|
-
role: "CHATBOT",
|
1414
|
-
message: msg.message,
|
1415
|
-
};
|
1416
|
-
// Add tool calls if present
|
1417
|
-
if (msg.tool_calls && msg.tool_calls.length > 0) {
|
1418
|
-
chatbotMsg.toolCalls = msg.tool_calls.map((tc) => ({
|
1419
|
-
name: tc.name,
|
1420
|
-
parameters: tc.parameters || {},
|
1421
|
-
}));
|
1422
|
-
}
|
1423
|
-
return chatbotMsg;
|
1424
|
-
case "TOOL":
|
1425
|
-
return {
|
1426
|
-
role: "TOOL",
|
1427
|
-
toolResults: msg.tool_results || [],
|
1428
|
-
};
|
1429
|
-
default:
|
1430
|
-
// Fallback - treat as USER
|
1431
|
-
return {
|
1432
|
-
role: "USER",
|
1433
|
-
message: msg.message,
|
1434
|
-
};
|
1435
|
-
}
|
1436
|
-
});
|
1437
|
-
streamConfig.chatHistory = cohereHistory;
|
1438
|
-
}
|
1439
1707
|
// Only add temperature if it's defined
|
1440
1708
|
if (specification.cohere?.temperature !== undefined &&
|
1441
1709
|
specification.cohere.temperature !== null) {
|
1442
1710
|
streamConfig.temperature = specification.cohere.temperature;
|
1443
1711
|
}
|
1444
|
-
// Add tools if provided
|
1712
|
+
// Add tools if provided - V2 format is different
|
1445
1713
|
if (tools && tools.length > 0) {
|
1446
|
-
|
1447
|
-
|
1448
|
-
|
1449
|
-
name: tool.name || "",
|
1450
|
-
description: tool.description || "",
|
1451
|
-
parameterDefinitions: {},
|
1452
|
-
};
|
1453
|
-
}
|
1454
|
-
// Parse the JSON schema
|
1455
|
-
const schema = JSON.parse(tool.schema);
|
1456
|
-
// Convert JSON Schema to Cohere's expected format
|
1457
|
-
const parameterDefinitions = {};
|
1458
|
-
if (schema.properties) {
|
1459
|
-
for (const [key, value] of Object.entries(schema.properties)) {
|
1460
|
-
const prop = value;
|
1461
|
-
const paramDef = {
|
1462
|
-
type: prop.type || "str",
|
1463
|
-
description: prop.description || "",
|
1464
|
-
required: schema.required?.includes(key) || false,
|
1465
|
-
};
|
1466
|
-
parameterDefinitions[key] = paramDef;
|
1467
|
-
}
|
1468
|
-
}
|
1469
|
-
return {
|
1714
|
+
streamConfig.tools = tools.map((tool) => ({
|
1715
|
+
type: "function",
|
1716
|
+
function: {
|
1470
1717
|
name: tool.name || "",
|
1471
1718
|
description: tool.description || "",
|
1472
|
-
|
1473
|
-
}
|
1474
|
-
});
|
1475
|
-
streamConfig.tools = cohereTools;
|
1719
|
+
parameters: tool.schema ? JSON.parse(tool.schema) : {},
|
1720
|
+
},
|
1721
|
+
}));
|
1476
1722
|
}
|
1477
1723
|
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
1478
1724
|
console.log(`🔍 [Cohere] Final stream config:`, JSON.stringify(streamConfig, null, 2));
|
@@ -1488,7 +1734,10 @@ onEvent, onComplete) {
|
|
1488
1734
|
process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
1489
1735
|
console.log(`🔍 [Cohere] Full streamConfig for ${modelName}:`, JSON.stringify(streamConfig, null, 2));
|
1490
1736
|
}
|
1491
|
-
stream = await cohereClient.chatStream(
|
1737
|
+
stream = await cohereClient.chatStream({
|
1738
|
+
...streamConfig,
|
1739
|
+
...(abortSignal && { signal: abortSignal }),
|
1740
|
+
});
|
1492
1741
|
}
|
1493
1742
|
catch (streamError) {
|
1494
1743
|
// Enhanced error logging
|
@@ -1523,9 +1772,17 @@ onEvent, onComplete) {
|
|
1523
1772
|
}
|
1524
1773
|
throw streamError;
|
1525
1774
|
}
|
1775
|
+
// Track current tool call being built
|
1776
|
+
let currentToolCallIndex = -1;
|
1777
|
+
let currentToolCall = null;
|
1526
1778
|
for await (const chunk of stream) {
|
1527
|
-
if (
|
1528
|
-
|
1779
|
+
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
1780
|
+
console.log(`[Cohere] Event type: ${chunk.type}`);
|
1781
|
+
}
|
1782
|
+
// Handle v2 API event types
|
1783
|
+
if (chunk.type === "content-delta") {
|
1784
|
+
// Content streaming in response generation step
|
1785
|
+
const text = chunk.delta?.message?.content?.text;
|
1529
1786
|
if (text) {
|
1530
1787
|
fullMessage += text;
|
1531
1788
|
tokenCount++;
|
@@ -1539,34 +1796,92 @@ onEvent, onComplete) {
|
|
1539
1796
|
type: "token",
|
1540
1797
|
token: text,
|
1541
1798
|
});
|
1799
|
+
// Also emit message update
|
1800
|
+
onEvent({
|
1801
|
+
type: "message",
|
1802
|
+
message: fullMessage,
|
1803
|
+
});
|
1542
1804
|
}
|
1543
1805
|
}
|
1544
|
-
else if (chunk.
|
1545
|
-
//
|
1546
|
-
|
1547
|
-
|
1548
|
-
|
1549
|
-
|
1550
|
-
|
1551
|
-
|
1552
|
-
|
1553
|
-
|
1554
|
-
|
1555
|
-
|
1556
|
-
|
1557
|
-
|
1558
|
-
|
1806
|
+
else if (chunk.type === "tool-call-start") {
|
1807
|
+
// Start of a tool call
|
1808
|
+
currentToolCallIndex = chunk.index || 0;
|
1809
|
+
const toolCallData = chunk.delta?.message?.toolCalls; // Note: toolCalls not tool_calls
|
1810
|
+
if (toolCallData) {
|
1811
|
+
currentToolCall = {
|
1812
|
+
id: toolCallData.id ||
|
1813
|
+
`cohere_tool_${Date.now()}_${currentToolCallIndex}`,
|
1814
|
+
name: toolCallData.function?.name || "",
|
1815
|
+
arguments: "",
|
1816
|
+
};
|
1817
|
+
onEvent({
|
1818
|
+
type: "tool_call_start",
|
1819
|
+
toolCall: {
|
1820
|
+
id: currentToolCall.id,
|
1821
|
+
name: currentToolCall.name,
|
1822
|
+
},
|
1823
|
+
});
|
1824
|
+
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
1825
|
+
console.log(`[Cohere] Tool call started: ${currentToolCall.name}`);
|
1826
|
+
}
|
1827
|
+
}
|
1828
|
+
}
|
1829
|
+
else if (chunk.type === "tool-call-delta") {
|
1830
|
+
// Tool call argument streaming
|
1831
|
+
if (currentToolCall && chunk.index === currentToolCallIndex) {
|
1832
|
+
const argDelta = chunk.delta?.message?.toolCalls?.function?.arguments;
|
1833
|
+
if (argDelta) {
|
1834
|
+
currentToolCall.arguments += argDelta;
|
1559
1835
|
onEvent({
|
1560
|
-
type: "
|
1561
|
-
|
1836
|
+
type: "tool_call_delta",
|
1837
|
+
toolCallId: currentToolCall.id,
|
1838
|
+
argumentDelta: argDelta,
|
1562
1839
|
});
|
1563
1840
|
}
|
1564
1841
|
}
|
1565
1842
|
}
|
1843
|
+
else if (chunk.type === "tool-call-end") {
|
1844
|
+
// Tool call complete
|
1845
|
+
if (currentToolCall && chunk.index === currentToolCallIndex) {
|
1846
|
+
toolCalls.push(currentToolCall);
|
1847
|
+
onEvent({
|
1848
|
+
type: "tool_call_parsed",
|
1849
|
+
toolCall: currentToolCall,
|
1850
|
+
});
|
1851
|
+
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
1852
|
+
console.log(`[Cohere] Tool call completed: ${currentToolCall.name}`);
|
1853
|
+
}
|
1854
|
+
currentToolCall = null;
|
1855
|
+
currentToolCallIndex = -1;
|
1856
|
+
}
|
1857
|
+
}
|
1858
|
+
else if (chunk.type === "tool-plan-delta") {
|
1859
|
+
// Handle tool plan delta - Cohere might send this before tool calls
|
1860
|
+
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
1861
|
+
console.log(`[Cohere] Tool plan delta received`, chunk);
|
1862
|
+
}
|
1863
|
+
}
|
1864
|
+
else if (chunk.type === "message-start") {
|
1865
|
+
// Handle message start event
|
1866
|
+
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
1867
|
+
console.log(`[Cohere] Message start event received`, chunk);
|
1868
|
+
}
|
1869
|
+
}
|
1870
|
+
else if (chunk.type === "message-end") {
|
1871
|
+
// Handle message end event
|
1872
|
+
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
1873
|
+
console.log(`[Cohere] Message end event received`, chunk);
|
1874
|
+
}
|
1875
|
+
}
|
1566
1876
|
}
|
1567
1877
|
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
1568
|
-
console.log(`✅ [Cohere] Complete. Total tokens: ${tokenCount} | Message length: ${fullMessage.length}`);
|
1878
|
+
console.log(`✅ [Cohere] Complete. Total tokens: ${tokenCount} | Message length: ${fullMessage.length} | Tool calls: ${toolCalls.length}`);
|
1569
1879
|
}
|
1880
|
+
// Emit final complete event
|
1881
|
+
onEvent({
|
1882
|
+
type: "complete",
|
1883
|
+
tokens: tokenCount,
|
1884
|
+
});
|
1570
1885
|
onComplete(fullMessage, toolCalls);
|
1571
1886
|
}
|
1572
1887
|
catch (error) {
|
@@ -1589,7 +1904,7 @@ onEvent, onComplete) {
|
|
1589
1904
|
* Stream with Mistral SDK
|
1590
1905
|
*/
|
1591
1906
|
export async function streamWithMistral(specification, messages, tools, mistralClient, // Mistral client instance
|
1592
|
-
onEvent, onComplete) {
|
1907
|
+
onEvent, onComplete, abortSignal) {
|
1593
1908
|
let fullMessage = "";
|
1594
1909
|
let toolCalls = [];
|
1595
1910
|
// Performance metrics
|
@@ -1603,6 +1918,19 @@ onEvent, onComplete) {
|
|
1603
1918
|
}
|
1604
1919
|
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
1605
1920
|
console.log(`🤖 [Mistral] Model Config: Service=Mistral | Model=${modelName} | Temperature=${specification.mistral?.temperature} | Tools=${tools?.length || 0} | Spec="${specification.name}"`);
|
1921
|
+
console.log(`🔍 [Mistral] Messages being sent (${messages.length} total):`);
|
1922
|
+
messages.forEach((msg, idx) => {
|
1923
|
+
const msgWithTools = msg;
|
1924
|
+
console.log(` Message ${idx}: role=${msg.role}, hasContent=${!!msg.content}, hasToolCalls=${!!msgWithTools.tool_calls}, tool_call_id=${msgWithTools.tool_call_id}`);
|
1925
|
+
if (msgWithTools.tool_calls) {
|
1926
|
+
console.log(` Tool calls: ${JSON.stringify(msgWithTools.tool_calls)}`);
|
1927
|
+
}
|
1928
|
+
if (msgWithTools.tool_call_id) {
|
1929
|
+
console.log(` Tool call ID: ${msgWithTools.tool_call_id}`);
|
1930
|
+
}
|
1931
|
+
// Log full message for debugging
|
1932
|
+
console.log(` Full message: ${JSON.stringify(msg)}`);
|
1933
|
+
});
|
1606
1934
|
}
|
1607
1935
|
const streamConfig = {
|
1608
1936
|
model: modelName,
|
@@ -1620,8 +1948,100 @@ onEvent, onComplete) {
|
|
1620
1948
|
},
|
1621
1949
|
}));
|
1622
1950
|
}
|
1623
|
-
|
1951
|
+
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
1952
|
+
console.log(`[Mistral] Stream config:`, JSON.stringify({
|
1953
|
+
...streamConfig,
|
1954
|
+
messages: streamConfig.messages.map((m) => ({
|
1955
|
+
role: m.role,
|
1956
|
+
contentLength: typeof m.content === "string"
|
1957
|
+
? m.content.length
|
1958
|
+
: m.content?.length || 0,
|
1959
|
+
hasToolCalls: !!m.tool_calls,
|
1960
|
+
toolCallsCount: m.tool_calls?.length || 0,
|
1961
|
+
toolCallId: m.tool_call_id,
|
1962
|
+
})),
|
1963
|
+
}, null, 2));
|
1964
|
+
// Log full messages for debugging tool issues
|
1965
|
+
if (messages.some((m) => m.role === "tool" || m.tool_calls)) {
|
1966
|
+
console.log(`[Mistral] Full messages for tool debugging:`, JSON.stringify(messages, null, 2));
|
1967
|
+
}
|
1968
|
+
}
|
1969
|
+
let stream;
|
1970
|
+
try {
|
1971
|
+
// Log the full config for debugging tool issues
|
1972
|
+
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
1973
|
+
console.log(`[Mistral] About to call stream with:`, {
|
1974
|
+
model: streamConfig.model,
|
1975
|
+
messageCount: streamConfig.messages.length,
|
1976
|
+
hasTools: !!(streamConfig.tools && streamConfig.tools.length > 0),
|
1977
|
+
toolCount: streamConfig.tools?.length || 0,
|
1978
|
+
});
|
1979
|
+
// Log the EXACT payload being sent to Mistral API
|
1980
|
+
console.log(`[Mistral] EXACT API payload:`, JSON.stringify(streamConfig, null, 2));
|
1981
|
+
// Check for tool call/result mismatches
|
1982
|
+
const toolCallMessages = streamConfig.messages.filter((m) => m.tool_calls?.length > 0);
|
1983
|
+
const toolResultMessages = streamConfig.messages.filter((m) => m.role === "tool");
|
1984
|
+
if (toolCallMessages.length > 0 || toolResultMessages.length > 0) {
|
1985
|
+
console.log(`[Mistral] Tool message analysis:`, {
|
1986
|
+
toolCallMessages: toolCallMessages.length,
|
1987
|
+
toolResultMessages: toolResultMessages.length,
|
1988
|
+
toolCallsTotal: toolCallMessages.reduce((sum, m) => sum + (m.tool_calls?.length || 0), 0),
|
1989
|
+
});
|
1990
|
+
}
|
1991
|
+
}
|
1992
|
+
stream = await mistralClient.chat.stream({
|
1993
|
+
...streamConfig,
|
1994
|
+
...(abortSignal && { signal: abortSignal }),
|
1995
|
+
});
|
1996
|
+
}
|
1997
|
+
catch (error) {
|
1998
|
+
console.error(`[Mistral] Failed to create stream:`, error);
|
1999
|
+
// Better error handling for tool mismatch
|
2000
|
+
if (error.message?.includes("Not the same number of function calls and responses")) {
|
2001
|
+
console.error(`[Mistral] Tool call/response mismatch detected. This usually happens when there are unmatched tool calls in the conversation history.`);
|
2002
|
+
// Analyze the messages to find the mismatch
|
2003
|
+
const toolCallIds = new Set();
|
2004
|
+
const toolResponseIds = new Set();
|
2005
|
+
messages.forEach((msg, idx) => {
|
2006
|
+
const msgWithTools = msg;
|
2007
|
+
if (msg.role === "assistant" && msgWithTools.tool_calls) {
|
2008
|
+
msgWithTools.tool_calls.forEach((tc) => {
|
2009
|
+
toolCallIds.add(tc.id);
|
2010
|
+
console.error(` Message ${idx}: Assistant has tool call with id: ${tc.id}`);
|
2011
|
+
});
|
2012
|
+
}
|
2013
|
+
if (msg.role === "tool") {
|
2014
|
+
// Check both camelCase and snake_case versions
|
2015
|
+
const toolId = msgWithTools.tool_call_id;
|
2016
|
+
if (toolId) {
|
2017
|
+
toolResponseIds.add(toolId);
|
2018
|
+
console.error(` Message ${idx}: Tool response for id: ${toolId}`);
|
2019
|
+
}
|
2020
|
+
else {
|
2021
|
+
console.error(` Message ${idx}: Tool response missing ID!`);
|
2022
|
+
}
|
2023
|
+
}
|
2024
|
+
});
|
2025
|
+
console.error(`[Mistral] Tool call IDs: ${Array.from(toolCallIds).join(", ")}`);
|
2026
|
+
console.error(`[Mistral] Tool response IDs: ${Array.from(toolResponseIds).join(", ")}`);
|
2027
|
+
// Find mismatches
|
2028
|
+
const unmatchedCalls = Array.from(toolCallIds).filter((id) => !toolResponseIds.has(id));
|
2029
|
+
const unmatchedResponses = Array.from(toolResponseIds).filter((id) => !toolCallIds.has(id));
|
2030
|
+
if (unmatchedCalls.length > 0) {
|
2031
|
+
console.error(`[Mistral] Tool calls without responses: ${unmatchedCalls.join(", ")}`);
|
2032
|
+
}
|
2033
|
+
if (unmatchedResponses.length > 0) {
|
2034
|
+
console.error(`[Mistral] Tool responses without calls: ${unmatchedResponses.join(", ")}`);
|
2035
|
+
}
|
2036
|
+
}
|
2037
|
+
throw new Error(`Mistral streaming failed to start: ${error.message || "Unknown error"}`);
|
2038
|
+
}
|
2039
|
+
let chunkCount = 0;
|
1624
2040
|
for await (const chunk of stream) {
|
2041
|
+
chunkCount++;
|
2042
|
+
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
2043
|
+
console.log(`[Mistral] Raw chunk:`, JSON.stringify(chunk, null, 2));
|
2044
|
+
}
|
1625
2045
|
const delta = chunk.data.choices[0]?.delta;
|
1626
2046
|
if (delta?.content) {
|
1627
2047
|
fullMessage += delta.content;
|
@@ -1637,34 +2057,53 @@ onEvent, onComplete) {
|
|
1637
2057
|
token: delta.content,
|
1638
2058
|
});
|
1639
2059
|
}
|
1640
|
-
// Handle tool calls
|
1641
|
-
if (delta?.tool_calls) {
|
1642
|
-
|
2060
|
+
// Handle tool calls (Mistral uses camelCase 'toolCalls' not 'tool_calls')
|
2061
|
+
if (delta?.toolCalls || delta?.tool_calls) {
|
2062
|
+
const toolCallsArray = delta.toolCalls || delta.tool_calls;
|
2063
|
+
for (const toolCallDelta of toolCallsArray) {
|
1643
2064
|
const index = toolCallDelta.index || 0;
|
2065
|
+
// Mistral sends complete tool calls in one chunk
|
1644
2066
|
if (!toolCalls[index]) {
|
1645
2067
|
toolCalls[index] = {
|
1646
|
-
id: toolCallDelta.id ||
|
1647
|
-
|
1648
|
-
|
2068
|
+
id: toolCallDelta.id ||
|
2069
|
+
toolCallDelta.function?.id ||
|
2070
|
+
`tool_${Date.now()}_${index}`,
|
2071
|
+
name: toolCallDelta.function?.name || "",
|
2072
|
+
arguments: toolCallDelta.function?.arguments || "",
|
1649
2073
|
};
|
2074
|
+
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
2075
|
+
console.log(`[Mistral] Tool call received:`, toolCalls[index]);
|
2076
|
+
}
|
2077
|
+
// Emit start event
|
1650
2078
|
onEvent({
|
1651
2079
|
type: "tool_call_start",
|
1652
2080
|
toolCall: {
|
1653
2081
|
id: toolCalls[index].id,
|
1654
|
-
name:
|
2082
|
+
name: toolCalls[index].name,
|
1655
2083
|
},
|
1656
2084
|
});
|
2085
|
+
// If arguments are already complete (Mistral sends them all at once)
|
2086
|
+
if (toolCalls[index].arguments) {
|
2087
|
+
onEvent({
|
2088
|
+
type: "tool_call_delta",
|
2089
|
+
toolCallId: toolCalls[index].id,
|
2090
|
+
argumentDelta: toolCalls[index].arguments,
|
2091
|
+
});
|
2092
|
+
}
|
1657
2093
|
}
|
1658
|
-
|
1659
|
-
|
1660
|
-
|
1661
|
-
|
1662
|
-
|
1663
|
-
|
1664
|
-
|
1665
|
-
|
1666
|
-
|
1667
|
-
|
2094
|
+
else {
|
2095
|
+
// Update existing tool call (though Mistral typically sends complete calls)
|
2096
|
+
if (toolCallDelta.function?.name) {
|
2097
|
+
toolCalls[index].name = toolCallDelta.function.name;
|
2098
|
+
}
|
2099
|
+
if (toolCallDelta.function?.arguments) {
|
2100
|
+
toolCalls[index].arguments += toolCallDelta.function.arguments;
|
2101
|
+
onEvent({
|
2102
|
+
type: "tool_call_delta",
|
2103
|
+
toolCallId: toolCalls[index].id,
|
2104
|
+
argumentDelta: toolCallDelta.function.arguments,
|
2105
|
+
});
|
2106
|
+
}
|
1668
2107
|
}
|
1669
2108
|
}
|
1670
2109
|
}
|
@@ -1677,21 +2116,39 @@ onEvent, onComplete) {
|
|
1677
2116
|
toolCall,
|
1678
2117
|
});
|
1679
2118
|
}
|
2119
|
+
else {
|
2120
|
+
console.warn(`[Mistral] Skipping tool call with invalid JSON: ${toolCall.name}`, toolCall.arguments);
|
2121
|
+
}
|
1680
2122
|
}
|
1681
2123
|
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
1682
|
-
console.log(`✅ [Mistral] Complete.
|
2124
|
+
console.log(`✅ [Mistral] Complete. Chunks: ${chunkCount} | Tokens: ${tokenCount} | Message length: ${fullMessage.length} | Tool calls: ${toolCalls.length}`);
|
1683
2125
|
}
|
1684
2126
|
onComplete(fullMessage, toolCalls);
|
1685
2127
|
}
|
1686
2128
|
catch (error) {
|
1687
|
-
|
2129
|
+
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
2130
|
+
console.error(`❌ [Mistral] Streaming error:`, error.message || error, error.stack);
|
2131
|
+
}
|
2132
|
+
// Check for common Mistral errors
|
2133
|
+
if (error.message?.includes("401") ||
|
2134
|
+
error.message?.includes("Unauthorized")) {
|
2135
|
+
throw new Error("Mistral API authentication failed. Please check your MISTRAL_API_KEY.");
|
2136
|
+
}
|
2137
|
+
if (error.message?.includes("429") ||
|
2138
|
+
error.message?.includes("rate limit")) {
|
2139
|
+
const rateLimitError = new Error("Mistral API rate limit exceeded. Please try again later.");
|
2140
|
+
rateLimitError.statusCode = 429;
|
2141
|
+
throw rateLimitError;
|
2142
|
+
}
|
2143
|
+
// Re-throw with more context
|
2144
|
+
throw new Error(`Mistral streaming failed: ${error.message || "Unknown error"}`);
|
1688
2145
|
}
|
1689
2146
|
}
|
1690
2147
|
/**
|
1691
2148
|
* Stream with Bedrock SDK (for Claude models)
|
1692
2149
|
*/
|
1693
2150
|
export async function streamWithBedrock(specification, messages, systemPrompt, tools, bedrockClient, // BedrockRuntimeClient instance
|
1694
|
-
onEvent, onComplete) {
|
2151
|
+
onEvent, onComplete, abortSignal) {
|
1695
2152
|
let fullMessage = "";
|
1696
2153
|
let toolCalls = [];
|
1697
2154
|
// Map contentBlockIndex to tool calls for proper correlation
|
@@ -1700,6 +2157,12 @@ onEvent, onComplete) {
|
|
1700
2157
|
const startTime = Date.now();
|
1701
2158
|
let firstTokenTime = 0;
|
1702
2159
|
let tokenCount = 0;
|
2160
|
+
// Reasoning detection state
|
2161
|
+
let isInThinkingTag = false;
|
2162
|
+
let reasoningContent = "";
|
2163
|
+
let currentContent = "";
|
2164
|
+
const THINKING_START = "<thinking>";
|
2165
|
+
const THINKING_END = "</thinking>";
|
1703
2166
|
try {
|
1704
2167
|
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
1705
2168
|
console.log(`🔍 [Bedrock] Specification object:`, JSON.stringify(specification, null, 2));
|
@@ -1767,7 +2230,9 @@ onEvent, onComplete) {
|
|
1767
2230
|
console.log(`🔍 [Bedrock] Converse request:`, JSON.stringify(request, null, 2));
|
1768
2231
|
}
|
1769
2232
|
const command = new ConverseStreamCommand(request);
|
1770
|
-
const response = await bedrockClient.send(command
|
2233
|
+
const response = await bedrockClient.send(command, {
|
2234
|
+
...(abortSignal && { abortSignal }),
|
2235
|
+
});
|
1771
2236
|
if (response.stream) {
|
1772
2237
|
for await (const event of response.stream) {
|
1773
2238
|
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
@@ -1779,7 +2244,6 @@ onEvent, onComplete) {
|
|
1779
2244
|
const contentIndex = event.contentBlockDelta.contentBlockIndex;
|
1780
2245
|
if (delta?.text) {
|
1781
2246
|
const text = delta.text;
|
1782
|
-
fullMessage += text;
|
1783
2247
|
tokenCount++;
|
1784
2248
|
if (firstTokenTime === 0) {
|
1785
2249
|
firstTokenTime = Date.now() - startTime;
|
@@ -1787,10 +2251,69 @@ onEvent, onComplete) {
|
|
1787
2251
|
console.log(`⚡ [Bedrock] Time to First Token: ${firstTokenTime}ms`);
|
1788
2252
|
}
|
1789
2253
|
}
|
1790
|
-
|
1791
|
-
|
1792
|
-
|
1793
|
-
|
2254
|
+
// Accumulate content for thinking tag detection
|
2255
|
+
currentContent += text;
|
2256
|
+
// Check for thinking tags
|
2257
|
+
if (!isInThinkingTag && currentContent.includes(THINKING_START)) {
|
2258
|
+
const startIdx = currentContent.indexOf(THINKING_START);
|
2259
|
+
// Emit any content before the thinking tag
|
2260
|
+
const beforeThinking = currentContent.substring(0, startIdx);
|
2261
|
+
if (beforeThinking) {
|
2262
|
+
fullMessage += beforeThinking;
|
2263
|
+
onEvent({ type: "token", token: beforeThinking });
|
2264
|
+
}
|
2265
|
+
// Start reasoning mode
|
2266
|
+
isInThinkingTag = true;
|
2267
|
+
onEvent({ type: "reasoning_start", format: "thinking_tag" });
|
2268
|
+
// Process any content after the tag
|
2269
|
+
currentContent = currentContent.substring(startIdx + THINKING_START.length);
|
2270
|
+
reasoningContent = "";
|
2271
|
+
}
|
2272
|
+
if (isInThinkingTag) {
|
2273
|
+
// Check for end of thinking
|
2274
|
+
const endIdx = currentContent.indexOf(THINKING_END);
|
2275
|
+
if (endIdx !== -1) {
|
2276
|
+
// Add content up to the end tag
|
2277
|
+
reasoningContent += currentContent.substring(0, endIdx);
|
2278
|
+
// Emit final reasoning update
|
2279
|
+
onEvent({
|
2280
|
+
type: "reasoning_delta",
|
2281
|
+
content: currentContent.substring(0, endIdx),
|
2282
|
+
format: "thinking_tag",
|
2283
|
+
});
|
2284
|
+
onEvent({
|
2285
|
+
type: "reasoning_end",
|
2286
|
+
fullContent: reasoningContent,
|
2287
|
+
});
|
2288
|
+
// Exit reasoning mode
|
2289
|
+
isInThinkingTag = false;
|
2290
|
+
// Continue with remaining content
|
2291
|
+
currentContent = currentContent.substring(endIdx + THINKING_END.length);
|
2292
|
+
// Process any remaining text as normal content
|
2293
|
+
if (currentContent) {
|
2294
|
+
fullMessage += currentContent;
|
2295
|
+
onEvent({ type: "token", token: currentContent });
|
2296
|
+
currentContent = "";
|
2297
|
+
}
|
2298
|
+
}
|
2299
|
+
else {
|
2300
|
+
// Still in thinking mode, accumulate reasoning
|
2301
|
+
reasoningContent += currentContent;
|
2302
|
+
onEvent({
|
2303
|
+
type: "reasoning_delta",
|
2304
|
+
content: currentContent,
|
2305
|
+
format: "thinking_tag",
|
2306
|
+
});
|
2307
|
+
currentContent = "";
|
2308
|
+
}
|
2309
|
+
}
|
2310
|
+
else {
|
2311
|
+
// Normal content mode
|
2312
|
+
fullMessage += currentContent;
|
2313
|
+
onEvent({ type: "token", token: currentContent });
|
2314
|
+
currentContent = "";
|
2315
|
+
}
|
2316
|
+
// Always emit the current full message (excluding reasoning)
|
1794
2317
|
onEvent({
|
1795
2318
|
type: "message",
|
1796
2319
|
message: fullMessage,
|