graphlit-client 1.0.20250625001 → 1.0.20250627002

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,4 @@
1
+ import * as Types from "../generated/graphql-types.js";
1
2
  import { getModelName } from "../model-mapping.js";
2
3
  /**
3
4
  * Helper to check if a string is valid JSON
@@ -75,7 +76,7 @@ function cleanSchemaForGoogle(schema) {
75
76
  * Stream with OpenAI SDK
76
77
  */
77
78
  export async function streamWithOpenAI(specification, messages, tools, openaiClient, // OpenAI client instance
78
- onEvent, onComplete) {
79
+ onEvent, onComplete, abortSignal) {
79
80
  let fullMessage = "";
80
81
  let toolCalls = [];
81
82
  // Performance metrics
@@ -131,7 +132,10 @@ onEvent, onComplete) {
131
132
  if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
132
133
  console.log(`⏱️ [OpenAI] Starting LLM call at: ${new Date().toISOString()}`);
133
134
  }
134
- const stream = await openaiClient.chat.completions.create(streamConfig);
135
+ const stream = await openaiClient.chat.completions.create({
136
+ ...streamConfig,
137
+ ...(abortSignal && { signal: abortSignal }),
138
+ });
135
139
  for await (const chunk of stream) {
136
140
  const delta = chunk.choices[0]?.delta;
137
141
  // Debug log chunk details
@@ -379,11 +383,8 @@ onEvent, onComplete) {
379
383
  throw error;
380
384
  }
381
385
  }
382
- /**
383
- * Stream with Anthropic SDK
384
- */
385
- export async function streamWithAnthropic(specification, messages, systemPrompt, tools, anthropicClient, // Anthropic client instance
386
- onEvent, onComplete) {
386
+ export async function streamWithAnthropic(specification, messages, systemPrompt, tools, anthropicClient, // Properly typed Anthropic client
387
+ onEvent, onComplete, abortSignal, thinkingConfig) {
387
388
  let fullMessage = "";
388
389
  let toolCalls = [];
389
390
  // Performance metrics
@@ -413,14 +414,29 @@ onEvent, onComplete) {
413
414
  if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
414
415
  console.log(`🤖 [Anthropic] Model Config: Service=Anthropic | Model=${modelName} | Temperature=${specification.anthropic?.temperature} | MaxTokens=${specification.anthropic?.completionTokenLimit || 8192} | SystemPrompt=${systemPrompt ? "Yes" : "No"} | Tools=${tools?.length || 0} | Spec="${specification.name}"`);
415
416
  }
417
+ // Use proper Anthropic SDK types for the config
416
418
  const streamConfig = {
417
419
  model: modelName,
418
420
  messages,
419
421
  stream: true,
420
- temperature: specification.anthropic?.temperature,
421
- //top_p: specification.anthropic?.probability,
422
422
  max_tokens: specification.anthropic?.completionTokenLimit || 8192, // required
423
423
  };
424
+ // Handle temperature based on thinking configuration
425
+ if (thinkingConfig) {
426
+ // When thinking is enabled, temperature must be 1
427
+ streamConfig.temperature = 1;
428
+ if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
429
+ console.log(`🧠 [Anthropic] Setting temperature to 1 (required for extended thinking)`);
430
+ }
431
+ }
432
+ else {
433
+ // Only add temperature if it's defined and valid for non-thinking requests
434
+ if (specification.anthropic?.temperature !== undefined &&
435
+ specification.anthropic?.temperature !== null &&
436
+ typeof specification.anthropic?.temperature === "number") {
437
+ streamConfig.temperature = specification.anthropic.temperature;
438
+ }
439
+ }
424
440
  if (systemPrompt) {
425
441
  streamConfig.system = systemPrompt;
426
442
  }
@@ -432,11 +448,31 @@ onEvent, onComplete) {
432
448
  input_schema: tool.schema ? JSON.parse(tool.schema) : {},
433
449
  }));
434
450
  }
451
+ // Add thinking config if provided
452
+ if (thinkingConfig) {
453
+ streamConfig.thinking = thinkingConfig;
454
+ if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
455
+ console.log(`🧠 [Anthropic] Extended thinking enabled | Budget: ${thinkingConfig.budget_tokens} tokens`);
456
+ }
457
+ // Adjust max_tokens to account for thinking budget
458
+ const totalTokens = streamConfig.max_tokens + thinkingConfig.budget_tokens;
459
+ if (totalTokens > 200000) {
460
+ // Claude's context window limit
461
+ console.warn(`⚠️ [Anthropic] Total tokens (${totalTokens}) exceeds context window, adjusting completion tokens...`);
462
+ streamConfig.max_tokens = Math.max(1000, 200000 - thinkingConfig.budget_tokens);
463
+ }
464
+ }
435
465
  if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
436
466
  console.log(`⏱️ [Anthropic] Starting LLM call at: ${new Date().toISOString()}`);
437
467
  }
438
- const stream = await anthropicClient.messages.create(streamConfig);
468
+ const stream = await anthropicClient.messages.create(streamConfig, abortSignal ? { signal: abortSignal } : undefined);
439
469
  let activeContentBlock = false;
470
+ let currentContentBlockIndex;
471
+ let currentContentBlockType;
472
+ let thinkingContent = "";
473
+ let thinkingSignature = "";
474
+ let completeThinkingContent = ""; // Accumulate all thinking content for conversation history
475
+ let completeThinkingSignature = ""; // Accumulate signature for conversation history
440
476
  for await (const chunk of stream) {
441
477
  // Debug log all chunk types
442
478
  if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
@@ -444,7 +480,21 @@ onEvent, onComplete) {
444
480
  }
445
481
  if (chunk.type === "content_block_start") {
446
482
  activeContentBlock = true;
447
- if (chunk.content_block.type === "tool_use") {
483
+ currentContentBlockIndex = chunk.index;
484
+ currentContentBlockType = chunk.content_block.type;
485
+ if (chunk.content_block.type === "thinking") {
486
+ // Start of thinking block (native extended thinking)
487
+ thinkingContent = "";
488
+ thinkingSignature = "";
489
+ onEvent({
490
+ type: "reasoning_start",
491
+ format: "thinking_tag",
492
+ });
493
+ if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
494
+ console.log("[Anthropic] Extended thinking block started");
495
+ }
496
+ }
497
+ else if (chunk.content_block.type === "tool_use") {
448
498
  const toolCall = {
449
499
  id: chunk.content_block.id,
450
500
  name: chunk.content_block.name,
@@ -477,7 +527,33 @@ onEvent, onComplete) {
477
527
  }
478
528
  }
479
529
  else if (chunk.type === "content_block_delta") {
480
- if (chunk.delta.type === "text_delta") {
530
+ // Handle thinking blocks with native extended thinking
531
+ if (chunk.delta.type === "thinking_delta" &&
532
+ "thinking" in chunk.delta) {
533
+ // Accumulate thinking content
534
+ thinkingContent += chunk.delta.thinking;
535
+ // Track first token time
536
+ if (firstTokenTime === 0) {
537
+ firstTokenTime = Date.now() - startTime;
538
+ }
539
+ onEvent({
540
+ type: "reasoning_delta",
541
+ content: chunk.delta.thinking,
542
+ format: "thinking_tag",
543
+ });
544
+ if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
545
+ console.log(`[Anthropic] Thinking delta: "${chunk.delta.thinking}"`);
546
+ }
547
+ }
548
+ else if (chunk.delta.type === "signature_delta" &&
549
+ "signature" in chunk.delta) {
550
+ // Handle signature for thinking blocks
551
+ thinkingSignature += chunk.delta.signature;
552
+ if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
553
+ console.log(`[Anthropic] Signature delta: "${chunk.delta.signature}"`);
554
+ }
555
+ }
556
+ else if (chunk.delta.type === "text_delta" && "text" in chunk.delta) {
481
557
  fullMessage += chunk.delta.text;
482
558
  tokenCount++;
483
559
  const currentTime = Date.now();
@@ -531,9 +607,39 @@ onEvent, onComplete) {
531
607
  }
532
608
  else if (chunk.type === "content_block_stop") {
533
609
  activeContentBlock = false;
610
+ // Check if we're stopping a thinking block
611
+ if (currentContentBlockType === "thinking" &&
612
+ chunk.index === currentContentBlockIndex) {
613
+ // Emit the complete thinking block with signature
614
+ onEvent({
615
+ type: "reasoning_end",
616
+ fullContent: thinkingContent,
617
+ signature: thinkingSignature || undefined,
618
+ });
619
+ // Accumulate thinking content and signature for conversation history preservation
620
+ if (thinkingContent.trim()) {
621
+ completeThinkingContent += thinkingContent;
622
+ }
623
+ if (thinkingSignature.trim()) {
624
+ completeThinkingSignature = thinkingSignature; // Use the last signature
625
+ }
626
+ if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
627
+ console.log(`[Anthropic] Thinking block completed:`, {
628
+ contentLength: thinkingContent.length,
629
+ hasSignature: !!thinkingSignature,
630
+ signature: thinkingSignature,
631
+ totalThinkingLength: completeThinkingContent.length,
632
+ });
633
+ }
634
+ // Reset current thinking state (but keep completeThinkingContent)
635
+ thinkingContent = "";
636
+ thinkingSignature = "";
637
+ }
638
+ currentContentBlockType = undefined;
639
+ currentContentBlockIndex = undefined;
534
640
  // Tool call complete
535
641
  const currentTool = toolCalls[toolCalls.length - 1];
536
- if (currentTool) {
642
+ if (currentTool && chunk.content_block?.type === "tool_use") {
537
643
  const currentTime = Date.now();
538
644
  // Update tool metrics
539
645
  const toolIndex = toolCalls.length - 1;
@@ -682,7 +788,19 @@ onEvent, onComplete) {
682
788
  }
683
789
  console.log(`✅ [Anthropic] Final message (${fullMessage.length} chars): "${fullMessage}"`);
684
790
  }
685
- onComplete(fullMessage, validToolCalls);
791
+ // Include thinking content in the final message for conversation history preservation
792
+ let finalMessage = fullMessage;
793
+ if (completeThinkingContent.trim()) {
794
+ // Wrap thinking content with signature in special tags that formatMessagesForAnthropic can parse
795
+ const thinkingBlock = completeThinkingSignature.trim()
796
+ ? `<thinking signature="${completeThinkingSignature}">${completeThinkingContent}</thinking>`
797
+ : `<thinking>${completeThinkingContent}</thinking>`;
798
+ finalMessage = `${thinkingBlock}${fullMessage}`;
799
+ if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
800
+ console.log(`🧠 [Anthropic] Including thinking content (${completeThinkingContent.length} chars) and signature (${completeThinkingSignature.length} chars) in conversation history`);
801
+ }
802
+ }
803
+ onComplete(finalMessage, validToolCalls);
686
804
  }
687
805
  catch (error) {
688
806
  // Handle Anthropic-specific errors
@@ -717,7 +835,7 @@ onEvent, onComplete) {
717
835
  * Stream with Google SDK
718
836
  */
719
837
  export async function streamWithGoogle(specification, messages, systemPrompt, tools, googleClient, // Google GenerativeAI client instance
720
- onEvent, onComplete) {
838
+ onEvent, onComplete, abortSignal) {
721
839
  let fullMessage = "";
722
840
  let toolCalls = [];
723
841
  // Performance metrics
@@ -1070,19 +1188,35 @@ onEvent, onComplete) {
1070
1188
  * Stream with Groq SDK (OpenAI-compatible)
1071
1189
  */
1072
1190
  export async function streamWithGroq(specification, messages, tools, groqClient, // Groq client instance (OpenAI-compatible)
1073
- onEvent, onComplete) {
1191
+ onEvent, onComplete, abortSignal) {
1074
1192
  try {
1075
1193
  const modelName = getModelName(specification);
1076
1194
  // Filter or simplify tools for Groq models that have issues
1077
1195
  let groqTools = tools;
1078
1196
  if (tools && tools.length > 0) {
1079
- // LLaMA 3.3 70B seems to have tool calling issues - disable tools for this model
1197
+ // Some models have tool calling issues - provide fallback prompt
1198
+ const problemModels = [
1199
+ "llama-3.3",
1200
+ "LLAMA_3_3",
1201
+ "llama3-groq-70b",
1202
+ "llama3-groq-8b",
1203
+ ];
1080
1204
  if (modelName &&
1081
- (modelName.includes("llama-3.3") || modelName.includes("LLAMA_3_3"))) {
1205
+ problemModels.some((model) => modelName.toLowerCase().includes(model.toLowerCase()))) {
1082
1206
  if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
1083
- console.log(`⚠️ [Groq] Disabling tools for ${modelName} due to known compatibility issues`);
1207
+ console.log(`⚠️ [Groq] Model ${modelName} has limited tool support - using simplified schemas`);
1084
1208
  }
1085
- groqTools = undefined;
1209
+ // Don't disable tools entirely, but simplify them more aggressively
1210
+ groqTools = tools.map((tool) => ({
1211
+ ...tool,
1212
+ schema: tool.schema
1213
+ ? JSON.stringify({
1214
+ type: "object",
1215
+ properties: JSON.parse(tool.schema).properties || {},
1216
+ required: JSON.parse(tool.schema).required || [],
1217
+ })
1218
+ : tool.schema,
1219
+ }));
1086
1220
  }
1087
1221
  else {
1088
1222
  // For other models, simplify complex schemas
@@ -1095,7 +1229,7 @@ onEvent, onComplete) {
1095
1229
  }
1096
1230
  }
1097
1231
  // Groq uses the same API as OpenAI, so we can reuse the OpenAI streaming logic
1098
- return await streamWithOpenAI(specification, messages, groqTools, groqClient, onEvent, onComplete);
1232
+ return await streamWithOpenAI(specification, messages, groqTools, groqClient, onEvent, onComplete, abortSignal);
1099
1233
  }
1100
1234
  catch (error) {
1101
1235
  // Handle Groq-specific errors
@@ -1126,10 +1260,42 @@ onEvent, onComplete) {
1126
1260
  * Stream with Cerebras SDK (OpenAI-compatible)
1127
1261
  */
1128
1262
  export async function streamWithCerebras(specification, messages, tools, cerebrasClient, // OpenAI client instance configured for Cerebras
1129
- onEvent, onComplete) {
1263
+ onEvent, onComplete, abortSignal) {
1130
1264
  try {
1265
+ const modelName = getModelName(specification);
1266
+ // Cerebras has very limited tool support
1267
+ let cerebrasTools = tools;
1268
+ let filteredMessages = messages;
1269
+ if (modelName) {
1270
+ const isQwen = modelName.toLowerCase().includes("qwen-3-32b");
1271
+ if (tools && tools.length > 0) {
1272
+ if (!isQwen) {
1273
+ // Only qwen-3-32b supports tools
1274
+ if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
1275
+ console.log(`⚠️ [Cerebras] Disabling tools for ${modelName} - only qwen-3-32b supports tools`);
1276
+ }
1277
+ cerebrasTools = undefined;
1278
+ }
1279
+ }
1280
+ // For non-qwen models, we need to filter out any assistant messages with tool_calls
1281
+ if (!isQwen) {
1282
+ filteredMessages = messages.map((msg) => {
1283
+ if (msg.role === "assistant" &&
1284
+ msg.tool_calls &&
1285
+ msg.tool_calls.length > 0) {
1286
+ // Remove tool_calls from assistant messages for non-qwen models
1287
+ const { tool_calls, ...msgWithoutTools } = msg;
1288
+ if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
1289
+ console.log(`⚠️ [Cerebras] Removing tool_calls from assistant message for ${modelName}`);
1290
+ }
1291
+ return msgWithoutTools;
1292
+ }
1293
+ return msg;
1294
+ });
1295
+ }
1296
+ }
1131
1297
  // Cerebras uses the same API as OpenAI, so we can reuse the OpenAI streaming logic
1132
- return await streamWithOpenAI(specification, messages, tools, cerebrasClient, onEvent, onComplete);
1298
+ return await streamWithOpenAI(specification, filteredMessages, cerebrasTools, cerebrasClient, onEvent, onComplete, abortSignal);
1133
1299
  }
1134
1300
  catch (error) {
1135
1301
  // Handle Cerebras-specific 429 errors
@@ -1149,9 +1315,22 @@ onEvent, onComplete) {
1149
1315
  * Stream with Deepseek SDK (OpenAI-compatible)
1150
1316
  */
1151
1317
  export async function streamWithDeepseek(specification, messages, tools, deepseekClient, // OpenAI client instance configured for Deepseek
1152
- onEvent, onComplete) {
1318
+ onEvent, onComplete, abortSignal) {
1153
1319
  let fullMessage = "";
1154
1320
  let toolCalls = [];
1321
+ // Reasoning detection state
1322
+ let reasoningLines = [];
1323
+ let currentLine = "";
1324
+ const REASONING_PATTERNS = [
1325
+ /^🤔\s*Reasoning:/i,
1326
+ /^\*\*Step\s+\d+:/i,
1327
+ /^\*\*Reasoning:/i,
1328
+ /^\*\*Analysis:/i,
1329
+ /^\*\*Thought\s+\d+:/i,
1330
+ /^\*\*Consideration:/i,
1331
+ ];
1332
+ let isInReasoning = false;
1333
+ let hasEmittedReasoningStart = false;
1155
1334
  // Performance metrics
1156
1335
  const startTime = Date.now();
1157
1336
  let firstTokenTime = 0;
@@ -1219,7 +1398,10 @@ onEvent, onComplete) {
1219
1398
  if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
1220
1399
  console.log(`⏱️ [Deepseek] Starting LLM call at: ${new Date().toISOString()}`);
1221
1400
  }
1222
- const stream = await deepseekClient.chat.completions.create(streamConfig);
1401
+ const stream = await deepseekClient.chat.completions.create({
1402
+ ...streamConfig,
1403
+ ...(abortSignal && { signal: abortSignal }),
1404
+ });
1223
1405
  for await (const chunk of stream) {
1224
1406
  const delta = chunk.choices[0]?.delta;
1225
1407
  if (!delta)
@@ -1238,15 +1420,78 @@ onEvent, onComplete) {
1238
1420
  // Handle message content
1239
1421
  if (delta.content) {
1240
1422
  tokenCount++;
1241
- fullMessage += delta.content;
1242
1423
  // Track first meaningful content
1243
1424
  if (firstMeaningfulContentTime === 0 && fullMessage.trim().length > 0) {
1244
1425
  firstMeaningfulContentTime = currentTime - startTime;
1245
1426
  }
1246
- onEvent({
1247
- type: "message",
1248
- message: fullMessage,
1249
- });
1427
+ // Process content for reasoning detection
1428
+ const content = delta.content;
1429
+ // Build current line for pattern matching
1430
+ for (const char of content) {
1431
+ if (char === "\n") {
1432
+ // Check if this line starts a reasoning section
1433
+ const trimmedLine = currentLine.trim();
1434
+ const isReasoningLine = REASONING_PATTERNS.some((pattern) => pattern.test(trimmedLine));
1435
+ if (isReasoningLine && !isInReasoning) {
1436
+ // Start reasoning mode
1437
+ isInReasoning = true;
1438
+ if (!hasEmittedReasoningStart) {
1439
+ onEvent({ type: "reasoning_start", format: "markdown" });
1440
+ hasEmittedReasoningStart = true;
1441
+ }
1442
+ reasoningLines.push(currentLine);
1443
+ onEvent({
1444
+ type: "reasoning_delta",
1445
+ content: currentLine + "\n",
1446
+ format: "markdown",
1447
+ });
1448
+ }
1449
+ else if (isInReasoning) {
1450
+ // Continue reasoning if line is indented or continues the pattern
1451
+ if (currentLine.startsWith(" ") ||
1452
+ currentLine.startsWith("\t") ||
1453
+ currentLine.trim().startsWith("**") ||
1454
+ currentLine.trim() === "") {
1455
+ reasoningLines.push(currentLine);
1456
+ onEvent({
1457
+ type: "reasoning_delta",
1458
+ content: currentLine + "\n",
1459
+ format: "markdown",
1460
+ });
1461
+ }
1462
+ else {
1463
+ // End reasoning mode
1464
+ isInReasoning = false;
1465
+ onEvent({
1466
+ type: "reasoning_end",
1467
+ fullContent: reasoningLines.join("\n"),
1468
+ });
1469
+ // This line is normal content
1470
+ fullMessage += currentLine + "\n";
1471
+ onEvent({ type: "token", token: currentLine + "\n" });
1472
+ }
1473
+ }
1474
+ else {
1475
+ // Normal content
1476
+ fullMessage += currentLine + "\n";
1477
+ onEvent({ type: "token", token: currentLine + "\n" });
1478
+ }
1479
+ currentLine = "";
1480
+ }
1481
+ else {
1482
+ currentLine += char;
1483
+ }
1484
+ }
1485
+ // Handle partial line
1486
+ if (currentLine && !isInReasoning) {
1487
+ // For partial lines, emit as normal content
1488
+ fullMessage += currentLine;
1489
+ onEvent({ type: "token", token: currentLine });
1490
+ currentLine = "";
1491
+ }
1492
+ if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
1493
+ console.log(`[Deepseek] Token #${tokenCount}: "${delta.content}" | Accumulated: ${fullMessage.length} chars`);
1494
+ }
1250
1495
  // Performance metrics tracking (internal only)
1251
1496
  if (tokenCount % 10 === 0) {
1252
1497
  const totalTokens = tokenCount + toolArgumentTokens;
@@ -1299,6 +1544,25 @@ onEvent, onComplete) {
1299
1544
  }
1300
1545
  }
1301
1546
  }
1547
+ // Handle any remaining content
1548
+ if (currentLine) {
1549
+ if (isInReasoning) {
1550
+ reasoningLines.push(currentLine);
1551
+ onEvent({
1552
+ type: "reasoning_delta",
1553
+ content: currentLine,
1554
+ format: "markdown",
1555
+ });
1556
+ onEvent({
1557
+ type: "reasoning_end",
1558
+ fullContent: reasoningLines.join("\n"),
1559
+ });
1560
+ }
1561
+ else {
1562
+ fullMessage += currentLine;
1563
+ onEvent({ type: "token", token: currentLine });
1564
+ }
1565
+ }
1302
1566
  // Process completed tool calls
1303
1567
  const validToolCalls = toolCalls.filter((tc, idx) => {
1304
1568
  if (!isValidJSON(tc.arguments)) {
@@ -1353,7 +1617,7 @@ onEvent, onComplete) {
1353
1617
  * Stream with Cohere SDK
1354
1618
  */
1355
1619
  export async function streamWithCohere(specification, messages, tools, cohereClient, // CohereClient instance
1356
- onEvent, onComplete) {
1620
+ onEvent, onComplete, abortSignal) {
1357
1621
  let fullMessage = "";
1358
1622
  let toolCalls = [];
1359
1623
  // Performance metrics
@@ -1372,107 +1636,89 @@ onEvent, onComplete) {
1372
1636
  console.log(`🔍 [Cohere] Messages array length: ${messages.length}`);
1373
1637
  console.log(`🔍 [Cohere] All messages:`, JSON.stringify(messages, null, 2));
1374
1638
  }
1639
+ // V2 API validation
1375
1640
  if (messages.length === 0) {
1376
1641
  throw new Error("No messages found for Cohere streaming");
1377
1642
  }
1378
- // Cohere v7 expects a single message and optional chatHistory
1379
- // Extract system messages for preamble and filter them out of history
1380
- const systemMessages = messages.filter((msg) => msg.role === "SYSTEM");
1381
- const nonSystemMessages = messages.filter((msg) => msg.role !== "SYSTEM");
1382
- // Extract the last non-system message as the current message
1383
- const lastMessage = nonSystemMessages[nonSystemMessages.length - 1];
1384
- const chatHistory = nonSystemMessages.slice(0, -1);
1385
- if (!lastMessage || !lastMessage.message) {
1386
- throw new Error("Last message must have message property for Cohere streaming");
1387
- }
1388
- // Build properly typed request using Cohere SDK types
1643
+ const v2Messages = [];
1644
+ // Map our GraphQL role types to Cohere v2 role strings
1645
+ messages.forEach((msg) => {
1646
+ switch (msg.role) {
1647
+ case Types.ConversationRoleTypes.System:
1648
+ v2Messages.push({
1649
+ role: "system",
1650
+ content: msg.message || "",
1651
+ });
1652
+ break;
1653
+ case Types.ConversationRoleTypes.User:
1654
+ v2Messages.push({
1655
+ role: "user",
1656
+ content: msg.message || "",
1657
+ });
1658
+ break;
1659
+ case Types.ConversationRoleTypes.Assistant:
1660
+ const assistantMsg = {
1661
+ role: "assistant",
1662
+ content: msg.message || "",
1663
+ };
1664
+ // V2 uses camelCase toolCalls
1665
+ if (msg.toolCalls && msg.toolCalls.length > 0) {
1666
+ // Convert our internal tool call format to Cohere V2 format
1667
+ assistantMsg.toolCalls = msg.toolCalls
1668
+ .filter((tc) => tc !== null)
1669
+ .map((tc) => ({
1670
+ id: tc.id,
1671
+ type: "function",
1672
+ function: {
1673
+ name: tc.name,
1674
+ arguments: tc.arguments,
1675
+ },
1676
+ }));
1677
+ }
1678
+ v2Messages.push(assistantMsg);
1679
+ break;
1680
+ case Types.ConversationRoleTypes.Tool:
1681
+ // Tool messages need the tool call ID
1682
+ const toolCallId = msg.toolCallId || "";
1683
+ if (!toolCallId && process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
1684
+ console.warn(`[Cohere] Tool message missing toolCallId:`, {
1685
+ message: msg.message?.substring(0, 50),
1686
+ });
1687
+ }
1688
+ v2Messages.push({
1689
+ role: "tool",
1690
+ content: msg.message || "",
1691
+ toolCallId: toolCallId,
1692
+ });
1693
+ break;
1694
+ default:
1695
+ console.warn(`[Cohere] Unknown message role: ${msg.role}, treating as user`);
1696
+ v2Messages.push({
1697
+ role: "user",
1698
+ content: msg.message || "",
1699
+ });
1700
+ }
1701
+ });
1389
1702
  const streamConfig = {
1390
1703
  model: modelName,
1391
- message: lastMessage.message, // Current message (singular)
1704
+ messages: v2Messages,
1705
+ stream: true,
1392
1706
  };
1393
- // Add system message as preamble if present
1394
- if (systemMessages.length > 0) {
1395
- // Combine all system messages into preamble
1396
- streamConfig.preamble = systemMessages
1397
- .map((msg) => msg.message)
1398
- .join("\n\n");
1399
- }
1400
- // Add chat history if there are previous messages
1401
- if (chatHistory.length > 0) {
1402
- // Build properly typed chat history using Cohere SDK Message types
1403
- // Note: SYSTEM messages are already filtered out and handled as preamble
1404
- const cohereHistory = chatHistory.map((msg) => {
1405
- switch (msg.role) {
1406
- case "USER":
1407
- return {
1408
- role: "USER",
1409
- message: msg.message,
1410
- };
1411
- case "CHATBOT":
1412
- const chatbotMsg = {
1413
- role: "CHATBOT",
1414
- message: msg.message,
1415
- };
1416
- // Add tool calls if present
1417
- if (msg.tool_calls && msg.tool_calls.length > 0) {
1418
- chatbotMsg.toolCalls = msg.tool_calls.map((tc) => ({
1419
- name: tc.name,
1420
- parameters: tc.parameters || {},
1421
- }));
1422
- }
1423
- return chatbotMsg;
1424
- case "TOOL":
1425
- return {
1426
- role: "TOOL",
1427
- toolResults: msg.tool_results || [],
1428
- };
1429
- default:
1430
- // Fallback - treat as USER
1431
- return {
1432
- role: "USER",
1433
- message: msg.message,
1434
- };
1435
- }
1436
- });
1437
- streamConfig.chatHistory = cohereHistory;
1438
- }
1439
1707
  // Only add temperature if it's defined
1440
1708
  if (specification.cohere?.temperature !== undefined &&
1441
1709
  specification.cohere.temperature !== null) {
1442
1710
  streamConfig.temperature = specification.cohere.temperature;
1443
1711
  }
1444
- // Add tools if provided
1712
+ // Add tools if provided - V2 format is different
1445
1713
  if (tools && tools.length > 0) {
1446
- const cohereTools = tools.map((tool) => {
1447
- if (!tool.schema) {
1448
- return {
1449
- name: tool.name || "",
1450
- description: tool.description || "",
1451
- parameterDefinitions: {},
1452
- };
1453
- }
1454
- // Parse the JSON schema
1455
- const schema = JSON.parse(tool.schema);
1456
- // Convert JSON Schema to Cohere's expected format
1457
- const parameterDefinitions = {};
1458
- if (schema.properties) {
1459
- for (const [key, value] of Object.entries(schema.properties)) {
1460
- const prop = value;
1461
- const paramDef = {
1462
- type: prop.type || "str",
1463
- description: prop.description || "",
1464
- required: schema.required?.includes(key) || false,
1465
- };
1466
- parameterDefinitions[key] = paramDef;
1467
- }
1468
- }
1469
- return {
1714
+ streamConfig.tools = tools.map((tool) => ({
1715
+ type: "function",
1716
+ function: {
1470
1717
  name: tool.name || "",
1471
1718
  description: tool.description || "",
1472
- parameterDefinitions, // Use camelCase as expected by Cohere SDK
1473
- };
1474
- });
1475
- streamConfig.tools = cohereTools;
1719
+ parameters: tool.schema ? JSON.parse(tool.schema) : {},
1720
+ },
1721
+ }));
1476
1722
  }
1477
1723
  if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
1478
1724
  console.log(`🔍 [Cohere] Final stream config:`, JSON.stringify(streamConfig, null, 2));
@@ -1488,7 +1734,10 @@ onEvent, onComplete) {
1488
1734
  process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
1489
1735
  console.log(`🔍 [Cohere] Full streamConfig for ${modelName}:`, JSON.stringify(streamConfig, null, 2));
1490
1736
  }
1491
- stream = await cohereClient.chatStream(streamConfig);
1737
+ stream = await cohereClient.chatStream({
1738
+ ...streamConfig,
1739
+ ...(abortSignal && { signal: abortSignal }),
1740
+ });
1492
1741
  }
1493
1742
  catch (streamError) {
1494
1743
  // Enhanced error logging
@@ -1523,9 +1772,17 @@ onEvent, onComplete) {
1523
1772
  }
1524
1773
  throw streamError;
1525
1774
  }
1775
+ // Track current tool call being built
1776
+ let currentToolCallIndex = -1;
1777
+ let currentToolCall = null;
1526
1778
  for await (const chunk of stream) {
1527
- if (chunk.eventType === "text-generation") {
1528
- const text = chunk.text;
1779
+ if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
1780
+ console.log(`[Cohere] Event type: ${chunk.type}`);
1781
+ }
1782
+ // Handle v2 API event types
1783
+ if (chunk.type === "content-delta") {
1784
+ // Content streaming in response generation step
1785
+ const text = chunk.delta?.message?.content?.text;
1529
1786
  if (text) {
1530
1787
  fullMessage += text;
1531
1788
  tokenCount++;
@@ -1539,34 +1796,92 @@ onEvent, onComplete) {
1539
1796
  type: "token",
1540
1797
  token: text,
1541
1798
  });
1799
+ // Also emit message update
1800
+ onEvent({
1801
+ type: "message",
1802
+ message: fullMessage,
1803
+ });
1542
1804
  }
1543
1805
  }
1544
- else if (chunk.eventType === "tool-calls-generation") {
1545
- // Handle tool calls
1546
- if (chunk.toolCalls) {
1547
- for (const toolCall of chunk.toolCalls) {
1548
- const id = `tool_${Date.now()}_${toolCalls.length}`;
1549
- const formattedToolCall = {
1550
- id,
1551
- name: toolCall.name,
1552
- arguments: JSON.stringify(toolCall.parameters),
1553
- };
1554
- toolCalls.push(formattedToolCall);
1555
- onEvent({
1556
- type: "tool_call_start",
1557
- toolCall: { id, name: toolCall.name },
1558
- });
1806
+ else if (chunk.type === "tool-call-start") {
1807
+ // Start of a tool call
1808
+ currentToolCallIndex = chunk.index || 0;
1809
+ const toolCallData = chunk.delta?.message?.toolCalls; // Note: toolCalls not tool_calls
1810
+ if (toolCallData) {
1811
+ currentToolCall = {
1812
+ id: toolCallData.id ||
1813
+ `cohere_tool_${Date.now()}_${currentToolCallIndex}`,
1814
+ name: toolCallData.function?.name || "",
1815
+ arguments: "",
1816
+ };
1817
+ onEvent({
1818
+ type: "tool_call_start",
1819
+ toolCall: {
1820
+ id: currentToolCall.id,
1821
+ name: currentToolCall.name,
1822
+ },
1823
+ });
1824
+ if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
1825
+ console.log(`[Cohere] Tool call started: ${currentToolCall.name}`);
1826
+ }
1827
+ }
1828
+ }
1829
+ else if (chunk.type === "tool-call-delta") {
1830
+ // Tool call argument streaming
1831
+ if (currentToolCall && chunk.index === currentToolCallIndex) {
1832
+ const argDelta = chunk.delta?.message?.toolCalls?.function?.arguments;
1833
+ if (argDelta) {
1834
+ currentToolCall.arguments += argDelta;
1559
1835
  onEvent({
1560
- type: "tool_call_parsed",
1561
- toolCall: formattedToolCall,
1836
+ type: "tool_call_delta",
1837
+ toolCallId: currentToolCall.id,
1838
+ argumentDelta: argDelta,
1562
1839
  });
1563
1840
  }
1564
1841
  }
1565
1842
  }
1843
+ else if (chunk.type === "tool-call-end") {
1844
+ // Tool call complete
1845
+ if (currentToolCall && chunk.index === currentToolCallIndex) {
1846
+ toolCalls.push(currentToolCall);
1847
+ onEvent({
1848
+ type: "tool_call_parsed",
1849
+ toolCall: currentToolCall,
1850
+ });
1851
+ if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
1852
+ console.log(`[Cohere] Tool call completed: ${currentToolCall.name}`);
1853
+ }
1854
+ currentToolCall = null;
1855
+ currentToolCallIndex = -1;
1856
+ }
1857
+ }
1858
+ else if (chunk.type === "tool-plan-delta") {
1859
+ // Handle tool plan delta - Cohere might send this before tool calls
1860
+ if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
1861
+ console.log(`[Cohere] Tool plan delta received`, chunk);
1862
+ }
1863
+ }
1864
+ else if (chunk.type === "message-start") {
1865
+ // Handle message start event
1866
+ if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
1867
+ console.log(`[Cohere] Message start event received`, chunk);
1868
+ }
1869
+ }
1870
+ else if (chunk.type === "message-end") {
1871
+ // Handle message end event
1872
+ if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
1873
+ console.log(`[Cohere] Message end event received`, chunk);
1874
+ }
1875
+ }
1566
1876
  }
1567
1877
  if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
1568
- console.log(`✅ [Cohere] Complete. Total tokens: ${tokenCount} | Message length: ${fullMessage.length}`);
1878
+ console.log(`✅ [Cohere] Complete. Total tokens: ${tokenCount} | Message length: ${fullMessage.length} | Tool calls: ${toolCalls.length}`);
1569
1879
  }
1880
+ // Emit final complete event
1881
+ onEvent({
1882
+ type: "complete",
1883
+ tokens: tokenCount,
1884
+ });
1570
1885
  onComplete(fullMessage, toolCalls);
1571
1886
  }
1572
1887
  catch (error) {
@@ -1589,7 +1904,7 @@ onEvent, onComplete) {
1589
1904
  * Stream with Mistral SDK
1590
1905
  */
1591
1906
  export async function streamWithMistral(specification, messages, tools, mistralClient, // Mistral client instance
1592
- onEvent, onComplete) {
1907
+ onEvent, onComplete, abortSignal) {
1593
1908
  let fullMessage = "";
1594
1909
  let toolCalls = [];
1595
1910
  // Performance metrics
@@ -1603,6 +1918,19 @@ onEvent, onComplete) {
1603
1918
  }
1604
1919
  if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
1605
1920
  console.log(`🤖 [Mistral] Model Config: Service=Mistral | Model=${modelName} | Temperature=${specification.mistral?.temperature} | Tools=${tools?.length || 0} | Spec="${specification.name}"`);
1921
+ console.log(`🔍 [Mistral] Messages being sent (${messages.length} total):`);
1922
+ messages.forEach((msg, idx) => {
1923
+ const msgWithTools = msg;
1924
+ console.log(` Message ${idx}: role=${msg.role}, hasContent=${!!msg.content}, hasToolCalls=${!!msgWithTools.tool_calls}, tool_call_id=${msgWithTools.tool_call_id}`);
1925
+ if (msgWithTools.tool_calls) {
1926
+ console.log(` Tool calls: ${JSON.stringify(msgWithTools.tool_calls)}`);
1927
+ }
1928
+ if (msgWithTools.tool_call_id) {
1929
+ console.log(` Tool call ID: ${msgWithTools.tool_call_id}`);
1930
+ }
1931
+ // Log full message for debugging
1932
+ console.log(` Full message: ${JSON.stringify(msg)}`);
1933
+ });
1606
1934
  }
1607
1935
  const streamConfig = {
1608
1936
  model: modelName,
@@ -1620,8 +1948,100 @@ onEvent, onComplete) {
1620
1948
  },
1621
1949
  }));
1622
1950
  }
1623
- const stream = await mistralClient.chat.stream(streamConfig);
1951
+ if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
1952
+ console.log(`[Mistral] Stream config:`, JSON.stringify({
1953
+ ...streamConfig,
1954
+ messages: streamConfig.messages.map((m) => ({
1955
+ role: m.role,
1956
+ contentLength: typeof m.content === "string"
1957
+ ? m.content.length
1958
+ : m.content?.length || 0,
1959
+ hasToolCalls: !!m.tool_calls,
1960
+ toolCallsCount: m.tool_calls?.length || 0,
1961
+ toolCallId: m.tool_call_id,
1962
+ })),
1963
+ }, null, 2));
1964
+ // Log full messages for debugging tool issues
1965
+ if (messages.some((m) => m.role === "tool" || m.tool_calls)) {
1966
+ console.log(`[Mistral] Full messages for tool debugging:`, JSON.stringify(messages, null, 2));
1967
+ }
1968
+ }
1969
+ let stream;
1970
+ try {
1971
+ // Log the full config for debugging tool issues
1972
+ if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
1973
+ console.log(`[Mistral] About to call stream with:`, {
1974
+ model: streamConfig.model,
1975
+ messageCount: streamConfig.messages.length,
1976
+ hasTools: !!(streamConfig.tools && streamConfig.tools.length > 0),
1977
+ toolCount: streamConfig.tools?.length || 0,
1978
+ });
1979
+ // Log the EXACT payload being sent to Mistral API
1980
+ console.log(`[Mistral] EXACT API payload:`, JSON.stringify(streamConfig, null, 2));
1981
+ // Check for tool call/result mismatches
1982
+ const toolCallMessages = streamConfig.messages.filter((m) => m.tool_calls?.length > 0);
1983
+ const toolResultMessages = streamConfig.messages.filter((m) => m.role === "tool");
1984
+ if (toolCallMessages.length > 0 || toolResultMessages.length > 0) {
1985
+ console.log(`[Mistral] Tool message analysis:`, {
1986
+ toolCallMessages: toolCallMessages.length,
1987
+ toolResultMessages: toolResultMessages.length,
1988
+ toolCallsTotal: toolCallMessages.reduce((sum, m) => sum + (m.tool_calls?.length || 0), 0),
1989
+ });
1990
+ }
1991
+ }
1992
+ stream = await mistralClient.chat.stream({
1993
+ ...streamConfig,
1994
+ ...(abortSignal && { signal: abortSignal }),
1995
+ });
1996
+ }
1997
+ catch (error) {
1998
+ console.error(`[Mistral] Failed to create stream:`, error);
1999
+ // Better error handling for tool mismatch
2000
+ if (error.message?.includes("Not the same number of function calls and responses")) {
2001
+ console.error(`[Mistral] Tool call/response mismatch detected. This usually happens when there are unmatched tool calls in the conversation history.`);
2002
+ // Analyze the messages to find the mismatch
2003
+ const toolCallIds = new Set();
2004
+ const toolResponseIds = new Set();
2005
+ messages.forEach((msg, idx) => {
2006
+ const msgWithTools = msg;
2007
+ if (msg.role === "assistant" && msgWithTools.tool_calls) {
2008
+ msgWithTools.tool_calls.forEach((tc) => {
2009
+ toolCallIds.add(tc.id);
2010
+ console.error(` Message ${idx}: Assistant has tool call with id: ${tc.id}`);
2011
+ });
2012
+ }
2013
+ if (msg.role === "tool") {
2014
+ // Check both camelCase and snake_case versions
2015
+ const toolId = msgWithTools.tool_call_id;
2016
+ if (toolId) {
2017
+ toolResponseIds.add(toolId);
2018
+ console.error(` Message ${idx}: Tool response for id: ${toolId}`);
2019
+ }
2020
+ else {
2021
+ console.error(` Message ${idx}: Tool response missing ID!`);
2022
+ }
2023
+ }
2024
+ });
2025
+ console.error(`[Mistral] Tool call IDs: ${Array.from(toolCallIds).join(", ")}`);
2026
+ console.error(`[Mistral] Tool response IDs: ${Array.from(toolResponseIds).join(", ")}`);
2027
+ // Find mismatches
2028
+ const unmatchedCalls = Array.from(toolCallIds).filter((id) => !toolResponseIds.has(id));
2029
+ const unmatchedResponses = Array.from(toolResponseIds).filter((id) => !toolCallIds.has(id));
2030
+ if (unmatchedCalls.length > 0) {
2031
+ console.error(`[Mistral] Tool calls without responses: ${unmatchedCalls.join(", ")}`);
2032
+ }
2033
+ if (unmatchedResponses.length > 0) {
2034
+ console.error(`[Mistral] Tool responses without calls: ${unmatchedResponses.join(", ")}`);
2035
+ }
2036
+ }
2037
+ throw new Error(`Mistral streaming failed to start: ${error.message || "Unknown error"}`);
2038
+ }
2039
+ let chunkCount = 0;
1624
2040
  for await (const chunk of stream) {
2041
+ chunkCount++;
2042
+ if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
2043
+ console.log(`[Mistral] Raw chunk:`, JSON.stringify(chunk, null, 2));
2044
+ }
1625
2045
  const delta = chunk.data.choices[0]?.delta;
1626
2046
  if (delta?.content) {
1627
2047
  fullMessage += delta.content;
@@ -1637,34 +2057,53 @@ onEvent, onComplete) {
1637
2057
  token: delta.content,
1638
2058
  });
1639
2059
  }
1640
- // Handle tool calls
1641
- if (delta?.tool_calls) {
1642
- for (const toolCallDelta of delta.tool_calls) {
2060
+ // Handle tool calls (Mistral uses camelCase 'toolCalls' not 'tool_calls')
2061
+ if (delta?.toolCalls || delta?.tool_calls) {
2062
+ const toolCallsArray = delta.toolCalls || delta.tool_calls;
2063
+ for (const toolCallDelta of toolCallsArray) {
1643
2064
  const index = toolCallDelta.index || 0;
2065
+ // Mistral sends complete tool calls in one chunk
1644
2066
  if (!toolCalls[index]) {
1645
2067
  toolCalls[index] = {
1646
- id: toolCallDelta.id || `tool_${Date.now()}_${index}`,
1647
- name: "",
1648
- arguments: "",
2068
+ id: toolCallDelta.id ||
2069
+ toolCallDelta.function?.id ||
2070
+ `tool_${Date.now()}_${index}`,
2071
+ name: toolCallDelta.function?.name || "",
2072
+ arguments: toolCallDelta.function?.arguments || "",
1649
2073
  };
2074
+ if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
2075
+ console.log(`[Mistral] Tool call received:`, toolCalls[index]);
2076
+ }
2077
+ // Emit start event
1650
2078
  onEvent({
1651
2079
  type: "tool_call_start",
1652
2080
  toolCall: {
1653
2081
  id: toolCalls[index].id,
1654
- name: toolCallDelta.function?.name || "",
2082
+ name: toolCalls[index].name,
1655
2083
  },
1656
2084
  });
2085
+ // If arguments are already complete (Mistral sends them all at once)
2086
+ if (toolCalls[index].arguments) {
2087
+ onEvent({
2088
+ type: "tool_call_delta",
2089
+ toolCallId: toolCalls[index].id,
2090
+ argumentDelta: toolCalls[index].arguments,
2091
+ });
2092
+ }
1657
2093
  }
1658
- if (toolCallDelta.function?.name) {
1659
- toolCalls[index].name = toolCallDelta.function.name;
1660
- }
1661
- if (toolCallDelta.function?.arguments) {
1662
- toolCalls[index].arguments += toolCallDelta.function.arguments;
1663
- onEvent({
1664
- type: "tool_call_delta",
1665
- toolCallId: toolCalls[index].id,
1666
- argumentDelta: toolCallDelta.function.arguments,
1667
- });
2094
+ else {
2095
+ // Update existing tool call (though Mistral typically sends complete calls)
2096
+ if (toolCallDelta.function?.name) {
2097
+ toolCalls[index].name = toolCallDelta.function.name;
2098
+ }
2099
+ if (toolCallDelta.function?.arguments) {
2100
+ toolCalls[index].arguments += toolCallDelta.function.arguments;
2101
+ onEvent({
2102
+ type: "tool_call_delta",
2103
+ toolCallId: toolCalls[index].id,
2104
+ argumentDelta: toolCallDelta.function.arguments,
2105
+ });
2106
+ }
1668
2107
  }
1669
2108
  }
1670
2109
  }
@@ -1677,21 +2116,39 @@ onEvent, onComplete) {
1677
2116
  toolCall,
1678
2117
  });
1679
2118
  }
2119
+ else {
2120
+ console.warn(`[Mistral] Skipping tool call with invalid JSON: ${toolCall.name}`, toolCall.arguments);
2121
+ }
1680
2122
  }
1681
2123
  if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
1682
- console.log(`✅ [Mistral] Complete. Total tokens: ${tokenCount} | Message length: ${fullMessage.length}`);
2124
+ console.log(`✅ [Mistral] Complete. Chunks: ${chunkCount} | Tokens: ${tokenCount} | Message length: ${fullMessage.length} | Tool calls: ${toolCalls.length}`);
1683
2125
  }
1684
2126
  onComplete(fullMessage, toolCalls);
1685
2127
  }
1686
2128
  catch (error) {
1687
- throw error;
2129
+ if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
2130
+ console.error(`❌ [Mistral] Streaming error:`, error.message || error, error.stack);
2131
+ }
2132
+ // Check for common Mistral errors
2133
+ if (error.message?.includes("401") ||
2134
+ error.message?.includes("Unauthorized")) {
2135
+ throw new Error("Mistral API authentication failed. Please check your MISTRAL_API_KEY.");
2136
+ }
2137
+ if (error.message?.includes("429") ||
2138
+ error.message?.includes("rate limit")) {
2139
+ const rateLimitError = new Error("Mistral API rate limit exceeded. Please try again later.");
2140
+ rateLimitError.statusCode = 429;
2141
+ throw rateLimitError;
2142
+ }
2143
+ // Re-throw with more context
2144
+ throw new Error(`Mistral streaming failed: ${error.message || "Unknown error"}`);
1688
2145
  }
1689
2146
  }
1690
2147
  /**
1691
2148
  * Stream with Bedrock SDK (for Claude models)
1692
2149
  */
1693
2150
  export async function streamWithBedrock(specification, messages, systemPrompt, tools, bedrockClient, // BedrockRuntimeClient instance
1694
- onEvent, onComplete) {
2151
+ onEvent, onComplete, abortSignal) {
1695
2152
  let fullMessage = "";
1696
2153
  let toolCalls = [];
1697
2154
  // Map contentBlockIndex to tool calls for proper correlation
@@ -1700,6 +2157,12 @@ onEvent, onComplete) {
1700
2157
  const startTime = Date.now();
1701
2158
  let firstTokenTime = 0;
1702
2159
  let tokenCount = 0;
2160
+ // Reasoning detection state
2161
+ let isInThinkingTag = false;
2162
+ let reasoningContent = "";
2163
+ let currentContent = "";
2164
+ const THINKING_START = "<thinking>";
2165
+ const THINKING_END = "</thinking>";
1703
2166
  try {
1704
2167
  if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
1705
2168
  console.log(`🔍 [Bedrock] Specification object:`, JSON.stringify(specification, null, 2));
@@ -1767,7 +2230,9 @@ onEvent, onComplete) {
1767
2230
  console.log(`🔍 [Bedrock] Converse request:`, JSON.stringify(request, null, 2));
1768
2231
  }
1769
2232
  const command = new ConverseStreamCommand(request);
1770
- const response = await bedrockClient.send(command);
2233
+ const response = await bedrockClient.send(command, {
2234
+ ...(abortSignal && { abortSignal }),
2235
+ });
1771
2236
  if (response.stream) {
1772
2237
  for await (const event of response.stream) {
1773
2238
  if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
@@ -1779,7 +2244,6 @@ onEvent, onComplete) {
1779
2244
  const contentIndex = event.contentBlockDelta.contentBlockIndex;
1780
2245
  if (delta?.text) {
1781
2246
  const text = delta.text;
1782
- fullMessage += text;
1783
2247
  tokenCount++;
1784
2248
  if (firstTokenTime === 0) {
1785
2249
  firstTokenTime = Date.now() - startTime;
@@ -1787,10 +2251,69 @@ onEvent, onComplete) {
1787
2251
  console.log(`⚡ [Bedrock] Time to First Token: ${firstTokenTime}ms`);
1788
2252
  }
1789
2253
  }
1790
- onEvent({
1791
- type: "token",
1792
- token: text,
1793
- });
2254
+ // Accumulate content for thinking tag detection
2255
+ currentContent += text;
2256
+ // Check for thinking tags
2257
+ if (!isInThinkingTag && currentContent.includes(THINKING_START)) {
2258
+ const startIdx = currentContent.indexOf(THINKING_START);
2259
+ // Emit any content before the thinking tag
2260
+ const beforeThinking = currentContent.substring(0, startIdx);
2261
+ if (beforeThinking) {
2262
+ fullMessage += beforeThinking;
2263
+ onEvent({ type: "token", token: beforeThinking });
2264
+ }
2265
+ // Start reasoning mode
2266
+ isInThinkingTag = true;
2267
+ onEvent({ type: "reasoning_start", format: "thinking_tag" });
2268
+ // Process any content after the tag
2269
+ currentContent = currentContent.substring(startIdx + THINKING_START.length);
2270
+ reasoningContent = "";
2271
+ }
2272
+ if (isInThinkingTag) {
2273
+ // Check for end of thinking
2274
+ const endIdx = currentContent.indexOf(THINKING_END);
2275
+ if (endIdx !== -1) {
2276
+ // Add content up to the end tag
2277
+ reasoningContent += currentContent.substring(0, endIdx);
2278
+ // Emit final reasoning update
2279
+ onEvent({
2280
+ type: "reasoning_delta",
2281
+ content: currentContent.substring(0, endIdx),
2282
+ format: "thinking_tag",
2283
+ });
2284
+ onEvent({
2285
+ type: "reasoning_end",
2286
+ fullContent: reasoningContent,
2287
+ });
2288
+ // Exit reasoning mode
2289
+ isInThinkingTag = false;
2290
+ // Continue with remaining content
2291
+ currentContent = currentContent.substring(endIdx + THINKING_END.length);
2292
+ // Process any remaining text as normal content
2293
+ if (currentContent) {
2294
+ fullMessage += currentContent;
2295
+ onEvent({ type: "token", token: currentContent });
2296
+ currentContent = "";
2297
+ }
2298
+ }
2299
+ else {
2300
+ // Still in thinking mode, accumulate reasoning
2301
+ reasoningContent += currentContent;
2302
+ onEvent({
2303
+ type: "reasoning_delta",
2304
+ content: currentContent,
2305
+ format: "thinking_tag",
2306
+ });
2307
+ currentContent = "";
2308
+ }
2309
+ }
2310
+ else {
2311
+ // Normal content mode
2312
+ fullMessage += currentContent;
2313
+ onEvent({ type: "token", token: currentContent });
2314
+ currentContent = "";
2315
+ }
2316
+ // Always emit the current full message (excluding reasoning)
1794
2317
  onEvent({
1795
2318
  type: "message",
1796
2319
  message: fullMessage,