npm - graphlit-client - Versions diffs - 1.0.20250625001 → 1.0.20250627002 - Mend

graphlit-client 1.0.20250625001 → 1.0.20250627002

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/README.md +235 -5
package/dist/client.d.ts +24 -1
package/dist/client.js +280 -55
package/dist/generated/graphql-documents.d.ts +19 -0
package/dist/generated/graphql-documents.js +1161 -0
package/dist/generated/graphql-types.d.ts +1940 -85
package/dist/generated/graphql-types.js +51 -0
package/dist/streaming/llm-formatters.js +68 -5
package/dist/streaming/providers.d.ts +18 -13
package/dist/streaming/providers.js +690 -167
package/dist/streaming/ui-event-adapter.d.ts +7 -0
package/dist/streaming/ui-event-adapter.js +55 -0
package/dist/types/internal.d.ts +11 -0
package/dist/types/ui-events.d.ts +9 -0
package/package.json +1 -1

package/dist/streaming/providers.js CHANGED Viewed

@@ -1,3 +1,4 @@
+import * as Types from "../generated/graphql-types.js";
 import { getModelName } from "../model-mapping.js";
 /**
  * Helper to check if a string is valid JSON
@@ -75,7 +76,7 @@ function cleanSchemaForGoogle(schema) {
  * Stream with OpenAI SDK
  */
 export async function streamWithOpenAI(specification, messages, tools, openaiClient, // OpenAI client instance
-onEvent, onComplete) {
+onEvent, onComplete, abortSignal) {
     let fullMessage = "";
     let toolCalls = [];
     // Performance metrics
@@ -131,7 +132,10 @@ onEvent, onComplete) {
         if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
             console.log(`⏱️ [OpenAI] Starting LLM call at: ${new Date().toISOString()}`);
         }
-        const stream = await openaiClient.chat.completions.create(streamConfig);
+        const stream = await openaiClient.chat.completions.create({
+            ...streamConfig,
+            ...(abortSignal && { signal: abortSignal }),
+        });
         for await (const chunk of stream) {
             const delta = chunk.choices[0]?.delta;
             // Debug log chunk details
@@ -379,11 +383,8 @@ onEvent, onComplete) {
         throw error;
     }
 }
-/**
- * Stream with Anthropic SDK
- */
-export async function streamWithAnthropic(specification, messages, systemPrompt, tools, anthropicClient, // Anthropic client instance
-onEvent, onComplete) {
+export async function streamWithAnthropic(specification, messages, systemPrompt, tools, anthropicClient, // Properly typed Anthropic client
+onEvent, onComplete, abortSignal, thinkingConfig) {
     let fullMessage = "";
     let toolCalls = [];
     // Performance metrics
@@ -413,14 +414,29 @@ onEvent, onComplete) {
         if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
             console.log(`🤖 [Anthropic] Model Config: Service=Anthropic | Model=${modelName} | Temperature=${specification.anthropic?.temperature} | MaxTokens=${specification.anthropic?.completionTokenLimit || 8192} | SystemPrompt=${systemPrompt ? "Yes" : "No"} | Tools=${tools?.length || 0} | Spec="${specification.name}"`);
         }
+        // Use proper Anthropic SDK types for the config
         const streamConfig = {
             model: modelName,
             messages,
             stream: true,
-            temperature: specification.anthropic?.temperature,
-            //top_p: specification.anthropic?.probability,
             max_tokens: specification.anthropic?.completionTokenLimit || 8192, // required
         };
+        // Handle temperature based on thinking configuration
+        if (thinkingConfig) {
+            // When thinking is enabled, temperature must be 1
+            streamConfig.temperature = 1;
+            if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
+                console.log(`🧠 [Anthropic] Setting temperature to 1 (required for extended thinking)`);
+            }
+        }
+        else {
+            // Only add temperature if it's defined and valid for non-thinking requests
+            if (specification.anthropic?.temperature !== undefined &&
+                specification.anthropic?.temperature !== null &&
+                typeof specification.anthropic?.temperature === "number") {
+                streamConfig.temperature = specification.anthropic.temperature;
+            }
+        }
         if (systemPrompt) {
             streamConfig.system = systemPrompt;
         }
@@ -432,11 +448,31 @@ onEvent, onComplete) {
                 input_schema: tool.schema ? JSON.parse(tool.schema) : {},
             }));
         }
+        // Add thinking config if provided
+        if (thinkingConfig) {
+            streamConfig.thinking = thinkingConfig;
+            if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
+                console.log(`🧠 [Anthropic] Extended thinking enabled | Budget: ${thinkingConfig.budget_tokens} tokens`);
+            }
+            // Adjust max_tokens to account for thinking budget
+            const totalTokens = streamConfig.max_tokens + thinkingConfig.budget_tokens;
+            if (totalTokens > 200000) {
+                // Claude's context window limit
+                console.warn(`⚠️ [Anthropic] Total tokens (${totalTokens}) exceeds context window, adjusting completion tokens...`);
+                streamConfig.max_tokens = Math.max(1000, 200000 - thinkingConfig.budget_tokens);
+            }
+        }
         if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
             console.log(`⏱️ [Anthropic] Starting LLM call at: ${new Date().toISOString()}`);
         }
-        const stream = await anthropicClient.messages.create(streamConfig);
+        const stream = await anthropicClient.messages.create(streamConfig, abortSignal ? { signal: abortSignal } : undefined);
         let activeContentBlock = false;
+        let currentContentBlockIndex;
+        let currentContentBlockType;
+        let thinkingContent = "";
+        let thinkingSignature = "";
+        let completeThinkingContent = ""; // Accumulate all thinking content for conversation history
+        let completeThinkingSignature = ""; // Accumulate signature for conversation history
         for await (const chunk of stream) {
             // Debug log all chunk types
             if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
@@ -444,7 +480,21 @@ onEvent, onComplete) {
             }
             if (chunk.type === "content_block_start") {
                 activeContentBlock = true;
-                if (chunk.content_block.type === "tool_use") {
+                currentContentBlockIndex = chunk.index;
+                currentContentBlockType = chunk.content_block.type;
+                if (chunk.content_block.type === "thinking") {
+                    // Start of thinking block (native extended thinking)
+                    thinkingContent = "";
+                    thinkingSignature = "";
+                    onEvent({
+                        type: "reasoning_start",
+                        format: "thinking_tag",
+                    });
+                    if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
+                        console.log("[Anthropic] Extended thinking block started");
+                    }
+                }
+                else if (chunk.content_block.type === "tool_use") {
                     const toolCall = {
                         id: chunk.content_block.id,
                         name: chunk.content_block.name,
@@ -477,7 +527,33 @@ onEvent, onComplete) {
                 }
             }
             else if (chunk.type === "content_block_delta") {
-                if (chunk.delta.type === "text_delta") {
+                // Handle thinking blocks with native extended thinking
+                if (chunk.delta.type === "thinking_delta" &&
+                    "thinking" in chunk.delta) {
+                    // Accumulate thinking content
+                    thinkingContent += chunk.delta.thinking;
+                    // Track first token time
+                    if (firstTokenTime === 0) {
+                        firstTokenTime = Date.now() - startTime;
+                    }
+                    onEvent({
+                        type: "reasoning_delta",
+                        content: chunk.delta.thinking,
+                        format: "thinking_tag",
+                    });
+                    if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
+                        console.log(`[Anthropic] Thinking delta: "${chunk.delta.thinking}"`);
+                    }
+                }
+                else if (chunk.delta.type === "signature_delta" &&
+                    "signature" in chunk.delta) {
+                    // Handle signature for thinking blocks
+                    thinkingSignature += chunk.delta.signature;
+                    if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
+                        console.log(`[Anthropic] Signature delta: "${chunk.delta.signature}"`);
+                    }
+                }
+                else if (chunk.delta.type === "text_delta" && "text" in chunk.delta) {
                     fullMessage += chunk.delta.text;
                     tokenCount++;
                     const currentTime = Date.now();
@@ -531,9 +607,39 @@ onEvent, onComplete) {
             }
             else if (chunk.type === "content_block_stop") {
                 activeContentBlock = false;
+                // Check if we're stopping a thinking block
+                if (currentContentBlockType === "thinking" &&
+                    chunk.index === currentContentBlockIndex) {
+                    // Emit the complete thinking block with signature
+                    onEvent({
+                        type: "reasoning_end",
+                        fullContent: thinkingContent,
+                        signature: thinkingSignature || undefined,
+                    });
+                    // Accumulate thinking content and signature for conversation history preservation
+                    if (thinkingContent.trim()) {
+                        completeThinkingContent += thinkingContent;
+                    }
+                    if (thinkingSignature.trim()) {
+                        completeThinkingSignature = thinkingSignature; // Use the last signature
+                    }
+                    if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
+                        console.log(`[Anthropic] Thinking block completed:`, {
+                            contentLength: thinkingContent.length,
+                            hasSignature: !!thinkingSignature,
+                            signature: thinkingSignature,
+                            totalThinkingLength: completeThinkingContent.length,
+                        });
+                    }
+                    // Reset current thinking state (but keep completeThinkingContent)
+                    thinkingContent = "";
+                    thinkingSignature = "";
+                }
+                currentContentBlockType = undefined;
+                currentContentBlockIndex = undefined;
                 // Tool call complete
                 const currentTool = toolCalls[toolCalls.length - 1];
-                if (currentTool) {
+                if (currentTool && chunk.content_block?.type === "tool_use") {
                     const currentTime = Date.now();
                     // Update tool metrics
                     const toolIndex = toolCalls.length - 1;
@@ -682,7 +788,19 @@ onEvent, onComplete) {
             }
             console.log(`✅ [Anthropic] Final message (${fullMessage.length} chars): "${fullMessage}"`);
         }
-        onComplete(fullMessage, validToolCalls);
+        // Include thinking content in the final message for conversation history preservation
+        let finalMessage = fullMessage;
+        if (completeThinkingContent.trim()) {
+            // Wrap thinking content with signature in special tags that formatMessagesForAnthropic can parse
+            const thinkingBlock = completeThinkingSignature.trim()
+                ? `<thinking signature="${completeThinkingSignature}">${completeThinkingContent}</thinking>`
+                : `<thinking>${completeThinkingContent}</thinking>`;
+            finalMessage = `${thinkingBlock}${fullMessage}`;
+            if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
+                console.log(`🧠 [Anthropic] Including thinking content (${completeThinkingContent.length} chars) and signature (${completeThinkingSignature.length} chars) in conversation history`);
+            }
+        }
+        onComplete(finalMessage, validToolCalls);
     }
     catch (error) {
         // Handle Anthropic-specific errors
@@ -717,7 +835,7 @@ onEvent, onComplete) {
  * Stream with Google SDK
  */
 export async function streamWithGoogle(specification, messages, systemPrompt, tools, googleClient, // Google GenerativeAI client instance
-onEvent, onComplete) {
+onEvent, onComplete, abortSignal) {
     let fullMessage = "";
     let toolCalls = [];
     // Performance metrics
@@ -1070,19 +1188,35 @@ onEvent, onComplete) {
  * Stream with Groq SDK (OpenAI-compatible)
  */
 export async function streamWithGroq(specification, messages, tools, groqClient, // Groq client instance (OpenAI-compatible)
-onEvent, onComplete) {
+onEvent, onComplete, abortSignal) {
     try {
         const modelName = getModelName(specification);
         // Filter or simplify tools for Groq models that have issues
         let groqTools = tools;
         if (tools && tools.length > 0) {
-            // LLaMA 3.3 70B seems to have tool calling issues - disable tools for this model
+            // Some models have tool calling issues - provide fallback prompt
+            const problemModels = [
+                "llama-3.3",
+                "LLAMA_3_3",
+                "llama3-groq-70b",
+                "llama3-groq-8b",
+            ];
             if (modelName &&
-                (modelName.includes("llama-3.3") || modelName.includes("LLAMA_3_3"))) {
+                problemModels.some((model) => modelName.toLowerCase().includes(model.toLowerCase()))) {
                 if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
-                    console.log(`⚠️ [Groq] Disabling tools for ${modelName} due to known compatibility issues`);
+                    console.log(`⚠️ [Groq] Model ${modelName} has limited tool support - using simplified schemas`);
                 }
-                groqTools = undefined;
+                // Don't disable tools entirely, but simplify them more aggressively
+                groqTools = tools.map((tool) => ({
+                    ...tool,
+                    schema: tool.schema
+                        ? JSON.stringify({
+                            type: "object",
+                            properties: JSON.parse(tool.schema).properties || {},
+                            required: JSON.parse(tool.schema).required || [],
+                        })
+                        : tool.schema,
+                }));
             }
             else {
                 // For other models, simplify complex schemas
@@ -1095,7 +1229,7 @@ onEvent, onComplete) {
             }
         }
         // Groq uses the same API as OpenAI, so we can reuse the OpenAI streaming logic
-        return await streamWithOpenAI(specification, messages, groqTools, groqClient, onEvent, onComplete);
+        return await streamWithOpenAI(specification, messages, groqTools, groqClient, onEvent, onComplete, abortSignal);
     }
     catch (error) {
         // Handle Groq-specific errors
@@ -1126,10 +1260,42 @@ onEvent, onComplete) {
  * Stream with Cerebras SDK (OpenAI-compatible)
  */
 export async function streamWithCerebras(specification, messages, tools, cerebrasClient, // OpenAI client instance configured for Cerebras
-onEvent, onComplete) {
+onEvent, onComplete, abortSignal) {
     try {
+        const modelName = getModelName(specification);
+        // Cerebras has very limited tool support
+        let cerebrasTools = tools;
+        let filteredMessages = messages;
+        if (modelName) {
+            const isQwen = modelName.toLowerCase().includes("qwen-3-32b");
+            if (tools && tools.length > 0) {
+                if (!isQwen) {
+                    // Only qwen-3-32b supports tools
+                    if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
+                        console.log(`⚠️ [Cerebras] Disabling tools for ${modelName} - only qwen-3-32b supports tools`);
+                    }
+                    cerebrasTools = undefined;
+                }
+            }
+            // For non-qwen models, we need to filter out any assistant messages with tool_calls
+            if (!isQwen) {
+                filteredMessages = messages.map((msg) => {
+                    if (msg.role === "assistant" &&
+                        msg.tool_calls &&
+                        msg.tool_calls.length > 0) {
+                        // Remove tool_calls from assistant messages for non-qwen models
+                        const { tool_calls, ...msgWithoutTools } = msg;
+                        if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
+                            console.log(`⚠️ [Cerebras] Removing tool_calls from assistant message for ${modelName}`);
+                        }
+                        return msgWithoutTools;
+                    }
+                    return msg;
+                });
+            }
+        }
         // Cerebras uses the same API as OpenAI, so we can reuse the OpenAI streaming logic
-        return await streamWithOpenAI(specification, messages, tools, cerebrasClient, onEvent, onComplete);
+        return await streamWithOpenAI(specification, filteredMessages, cerebrasTools, cerebrasClient, onEvent, onComplete, abortSignal);
     }
     catch (error) {
         // Handle Cerebras-specific 429 errors
@@ -1149,9 +1315,22 @@ onEvent, onComplete) {
  * Stream with Deepseek SDK (OpenAI-compatible)
  */
 export async function streamWithDeepseek(specification, messages, tools, deepseekClient, // OpenAI client instance configured for Deepseek
-onEvent, onComplete) {
+onEvent, onComplete, abortSignal) {
     let fullMessage = "";
     let toolCalls = [];
+    // Reasoning detection state
+    let reasoningLines = [];
+    let currentLine = "";
+    const REASONING_PATTERNS = [
+        /^🤔\s*Reasoning:/i,
+        /^\*\*Step\s+\d+:/i,
+        /^\*\*Reasoning:/i,
+        /^\*\*Analysis:/i,
+        /^\*\*Thought\s+\d+:/i,
+        /^\*\*Consideration:/i,
+    ];
+    let isInReasoning = false;
+    let hasEmittedReasoningStart = false;
     // Performance metrics
     const startTime = Date.now();
     let firstTokenTime = 0;
@@ -1219,7 +1398,10 @@ onEvent, onComplete) {
         if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
             console.log(`⏱️ [Deepseek] Starting LLM call at: ${new Date().toISOString()}`);
         }
-        const stream = await deepseekClient.chat.completions.create(streamConfig);
+        const stream = await deepseekClient.chat.completions.create({
+            ...streamConfig,
+            ...(abortSignal && { signal: abortSignal }),
+        });
         for await (const chunk of stream) {
             const delta = chunk.choices[0]?.delta;
             if (!delta)
@@ -1238,15 +1420,78 @@ onEvent, onComplete) {
             // Handle message content
             if (delta.content) {
                 tokenCount++;
-                fullMessage += delta.content;
                 // Track first meaningful content
                 if (firstMeaningfulContentTime === 0 && fullMessage.trim().length > 0) {
                     firstMeaningfulContentTime = currentTime - startTime;
                 }
-                onEvent({
-                    type: "message",
-                    message: fullMessage,
-                });
+                // Process content for reasoning detection
+                const content = delta.content;
+                // Build current line for pattern matching
+                for (const char of content) {
+                    if (char === "\n") {
+                        // Check if this line starts a reasoning section
+                        const trimmedLine = currentLine.trim();
+                        const isReasoningLine = REASONING_PATTERNS.some((pattern) => pattern.test(trimmedLine));
+                        if (isReasoningLine && !isInReasoning) {
+                            // Start reasoning mode
+                            isInReasoning = true;
+                            if (!hasEmittedReasoningStart) {
+                                onEvent({ type: "reasoning_start", format: "markdown" });
+                                hasEmittedReasoningStart = true;
+                            }
+                            reasoningLines.push(currentLine);
+                            onEvent({
+                                type: "reasoning_delta",
+                                content: currentLine + "\n",
+                                format: "markdown",
+                            });
+                        }
+                        else if (isInReasoning) {
+                            // Continue reasoning if line is indented or continues the pattern
+                            if (currentLine.startsWith("  ") ||
+                                currentLine.startsWith("\t") ||
+                                currentLine.trim().startsWith("**") ||
+                                currentLine.trim() === "") {
+                                reasoningLines.push(currentLine);
+                                onEvent({
+                                    type: "reasoning_delta",
+                                    content: currentLine + "\n",
+                                    format: "markdown",
+                                });
+                            }
+                            else {
+                                // End reasoning mode
+                                isInReasoning = false;
+                                onEvent({
+                                    type: "reasoning_end",
+                                    fullContent: reasoningLines.join("\n"),
+                                });
+                                // This line is normal content
+                                fullMessage += currentLine + "\n";
+                                onEvent({ type: "token", token: currentLine + "\n" });
+                            }
+                        }
+                        else {
+                            // Normal content
+                            fullMessage += currentLine + "\n";
+                            onEvent({ type: "token", token: currentLine + "\n" });
+                        }
+                        currentLine = "";
+                    }
+                    else {
+                        currentLine += char;
+                    }
+                }
+                // Handle partial line
+                if (currentLine && !isInReasoning) {
+                    // For partial lines, emit as normal content
+                    fullMessage += currentLine;
+                    onEvent({ type: "token", token: currentLine });
+                    currentLine = "";
+                }
+                if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
+                    console.log(`[Deepseek] Token #${tokenCount}: "${delta.content}" | Accumulated: ${fullMessage.length} chars`);
+                }
                 // Performance metrics tracking (internal only)
                 if (tokenCount % 10 === 0) {
                     const totalTokens = tokenCount + toolArgumentTokens;
@@ -1299,6 +1544,25 @@ onEvent, onComplete) {
                 }
             }
         }
+        // Handle any remaining content
+        if (currentLine) {
+            if (isInReasoning) {
+                reasoningLines.push(currentLine);
+                onEvent({
+                    type: "reasoning_delta",
+                    content: currentLine,
+                    format: "markdown",
+                });
+                onEvent({
+                    type: "reasoning_end",
+                    fullContent: reasoningLines.join("\n"),
+                });
+            }
+            else {
+                fullMessage += currentLine;
+                onEvent({ type: "token", token: currentLine });
+            }
+        }
         // Process completed tool calls
         const validToolCalls = toolCalls.filter((tc, idx) => {
             if (!isValidJSON(tc.arguments)) {
@@ -1353,7 +1617,7 @@ onEvent, onComplete) {
  * Stream with Cohere SDK
  */
 export async function streamWithCohere(specification, messages, tools, cohereClient, // CohereClient instance
-onEvent, onComplete) {
+onEvent, onComplete, abortSignal) {
     let fullMessage = "";
     let toolCalls = [];
     // Performance metrics
@@ -1372,107 +1636,89 @@ onEvent, onComplete) {
             console.log(`🔍 [Cohere] Messages array length: ${messages.length}`);
             console.log(`🔍 [Cohere] All messages:`, JSON.stringify(messages, null, 2));
         }
+        // V2 API validation
         if (messages.length === 0) {
             throw new Error("No messages found for Cohere streaming");
         }
-        // Cohere v7 expects a single message and optional chatHistory
-        // Extract system messages for preamble and filter them out of history
-        const systemMessages = messages.filter((msg) => msg.role === "SYSTEM");
-        const nonSystemMessages = messages.filter((msg) => msg.role !== "SYSTEM");
-        // Extract the last non-system message as the current message
-        const lastMessage = nonSystemMessages[nonSystemMessages.length - 1];
-        const chatHistory = nonSystemMessages.slice(0, -1);
-        if (!lastMessage || !lastMessage.message) {
-            throw new Error("Last message must have message property for Cohere streaming");
-        }
-        // Build properly typed request using Cohere SDK types
+        const v2Messages = [];
+        // Map our GraphQL role types to Cohere v2 role strings
+        messages.forEach((msg) => {
+            switch (msg.role) {
+                case Types.ConversationRoleTypes.System:
+                    v2Messages.push({
+                        role: "system",
+                        content: msg.message || "",
+                    });
+                    break;
+                case Types.ConversationRoleTypes.User:
+                    v2Messages.push({
+                        role: "user",
+                        content: msg.message || "",
+                    });
+                    break;
+                case Types.ConversationRoleTypes.Assistant:
+                    const assistantMsg = {
+                        role: "assistant",
+                        content: msg.message || "",
+                    };
+                    // V2 uses camelCase toolCalls
+                    if (msg.toolCalls && msg.toolCalls.length > 0) {
+                        // Convert our internal tool call format to Cohere V2 format
+                        assistantMsg.toolCalls = msg.toolCalls
+                            .filter((tc) => tc !== null)
+                            .map((tc) => ({
+                            id: tc.id,
+                            type: "function",
+                            function: {
+                                name: tc.name,
+                                arguments: tc.arguments,
+                            },
+                        }));
+                    }
+                    v2Messages.push(assistantMsg);
+                    break;
+                case Types.ConversationRoleTypes.Tool:
+                    // Tool messages need the tool call ID
+                    const toolCallId = msg.toolCallId || "";
+                    if (!toolCallId && process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
+                        console.warn(`[Cohere] Tool message missing toolCallId:`, {
+                            message: msg.message?.substring(0, 50),
+                        });
+                    }
+                    v2Messages.push({
+                        role: "tool",
+                        content: msg.message || "",
+                        toolCallId: toolCallId,
+                    });
+                    break;
+                default:
+                    console.warn(`[Cohere] Unknown message role: ${msg.role}, treating as user`);
+                    v2Messages.push({
+                        role: "user",
+                        content: msg.message || "",
+                    });
+            }
+        });
         const streamConfig = {
             model: modelName,
-            message: lastMessage.message, // Current message (singular)
+            messages: v2Messages,
+            stream: true,
         };
-        // Add system message as preamble if present
-        if (systemMessages.length > 0) {
-            // Combine all system messages into preamble
-            streamConfig.preamble = systemMessages
-                .map((msg) => msg.message)
-                .join("\n\n");
-        }
-        // Add chat history if there are previous messages
-        if (chatHistory.length > 0) {
-            // Build properly typed chat history using Cohere SDK Message types
-            // Note: SYSTEM messages are already filtered out and handled as preamble
-            const cohereHistory = chatHistory.map((msg) => {
-                switch (msg.role) {
-                    case "USER":
-                        return {
-                            role: "USER",
-                            message: msg.message,
-                        };
-                    case "CHATBOT":
-                        const chatbotMsg = {
-                            role: "CHATBOT",
-                            message: msg.message,
-                        };
-                        // Add tool calls if present
-                        if (msg.tool_calls && msg.tool_calls.length > 0) {
-                            chatbotMsg.toolCalls = msg.tool_calls.map((tc) => ({
-                                name: tc.name,
-                                parameters: tc.parameters || {},
-                            }));
-                        }
-                        return chatbotMsg;
-                    case "TOOL":
-                        return {
-                            role: "TOOL",
-                            toolResults: msg.tool_results || [],
-                        };
-                    default:
-                        // Fallback - treat as USER
-                        return {
-                            role: "USER",
-                            message: msg.message,
-                        };
-                }
-            });
-            streamConfig.chatHistory = cohereHistory;
-        }
         // Only add temperature if it's defined
         if (specification.cohere?.temperature !== undefined &&
             specification.cohere.temperature !== null) {
             streamConfig.temperature = specification.cohere.temperature;
         }
-        // Add tools if provided
+        // Add tools if provided - V2 format is different
         if (tools && tools.length > 0) {
-            const cohereTools = tools.map((tool) => {
-                if (!tool.schema) {
-                    return {
-                        name: tool.name || "",
-                        description: tool.description || "",
-                        parameterDefinitions: {},
-                    };
-                }
-                // Parse the JSON schema
-                const schema = JSON.parse(tool.schema);
-                // Convert JSON Schema to Cohere's expected format
-                const parameterDefinitions = {};
-                if (schema.properties) {
-                    for (const [key, value] of Object.entries(schema.properties)) {
-                        const prop = value;
-                        const paramDef = {
-                            type: prop.type || "str",
-                            description: prop.description || "",
-                            required: schema.required?.includes(key) || false,
-                        };
-                        parameterDefinitions[key] = paramDef;
-                    }
-                }
-                return {
+            streamConfig.tools = tools.map((tool) => ({
+                type: "function",
+                function: {
                     name: tool.name || "",
                     description: tool.description || "",
-                    parameterDefinitions, // Use camelCase as expected by Cohere SDK
-                };
-            });
-            streamConfig.tools = cohereTools;
+                    parameters: tool.schema ? JSON.parse(tool.schema) : {},
+                },
+            }));
         }
         if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
             console.log(`🔍 [Cohere] Final stream config:`, JSON.stringify(streamConfig, null, 2));
@@ -1488,7 +1734,10 @@ onEvent, onComplete) {
                 process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
                 console.log(`🔍 [Cohere] Full streamConfig for ${modelName}:`, JSON.stringify(streamConfig, null, 2));
             }
-            stream = await cohereClient.chatStream(streamConfig);
+            stream = await cohereClient.chatStream({
+                ...streamConfig,
+                ...(abortSignal && { signal: abortSignal }),
+            });
         }
         catch (streamError) {
             // Enhanced error logging
@@ -1523,9 +1772,17 @@ onEvent, onComplete) {
             }
             throw streamError;
         }
+        // Track current tool call being built
+        let currentToolCallIndex = -1;
+        let currentToolCall = null;
         for await (const chunk of stream) {
-            if (chunk.eventType === "text-generation") {
-                const text = chunk.text;
+            if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
+                console.log(`[Cohere] Event type: ${chunk.type}`);
+            }
+            // Handle v2 API event types
+            if (chunk.type === "content-delta") {
+                // Content streaming in response generation step
+                const text = chunk.delta?.message?.content?.text;
                 if (text) {
                     fullMessage += text;
                     tokenCount++;
@@ -1539,34 +1796,92 @@ onEvent, onComplete) {
                         type: "token",
                         token: text,
                     });
+                    // Also emit message update
+                    onEvent({
+                        type: "message",
+                        message: fullMessage,
+                    });
                 }
             }
-            else if (chunk.eventType === "tool-calls-generation") {
-                // Handle tool calls
-                if (chunk.toolCalls) {
-                    for (const toolCall of chunk.toolCalls) {
-                        const id = `tool_${Date.now()}_${toolCalls.length}`;
-                        const formattedToolCall = {
-                            id,
-                            name: toolCall.name,
-                            arguments: JSON.stringify(toolCall.parameters),
-                        };
-                        toolCalls.push(formattedToolCall);
-                        onEvent({
-                            type: "tool_call_start",
-                            toolCall: { id, name: toolCall.name },
-                        });
+            else if (chunk.type === "tool-call-start") {
+                // Start of a tool call
+                currentToolCallIndex = chunk.index || 0;
+                const toolCallData = chunk.delta?.message?.toolCalls; // Note: toolCalls not tool_calls
+                if (toolCallData) {
+                    currentToolCall = {
+                        id: toolCallData.id ||
+                            `cohere_tool_${Date.now()}_${currentToolCallIndex}`,
+                        name: toolCallData.function?.name || "",
+                        arguments: "",
+                    };
+                    onEvent({
+                        type: "tool_call_start",
+                        toolCall: {
+                            id: currentToolCall.id,
+                            name: currentToolCall.name,
+                        },
+                    });
+                    if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
+                        console.log(`[Cohere] Tool call started: ${currentToolCall.name}`);
+                    }
+                }
+            }
+            else if (chunk.type === "tool-call-delta") {
+                // Tool call argument streaming
+                if (currentToolCall && chunk.index === currentToolCallIndex) {
+                    const argDelta = chunk.delta?.message?.toolCalls?.function?.arguments;
+                    if (argDelta) {
+                        currentToolCall.arguments += argDelta;
                         onEvent({
-                            type: "tool_call_parsed",
-                            toolCall: formattedToolCall,
+                            type: "tool_call_delta",
+                            toolCallId: currentToolCall.id,
+                            argumentDelta: argDelta,
                         });
                     }
                 }
             }
+            else if (chunk.type === "tool-call-end") {
+                // Tool call complete
+                if (currentToolCall && chunk.index === currentToolCallIndex) {
+                    toolCalls.push(currentToolCall);
+                    onEvent({
+                        type: "tool_call_parsed",
+                        toolCall: currentToolCall,
+                    });
+                    if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
+                        console.log(`[Cohere] Tool call completed: ${currentToolCall.name}`);
+                    }
+                    currentToolCall = null;
+                    currentToolCallIndex = -1;
+                }
+            }
+            else if (chunk.type === "tool-plan-delta") {
+                // Handle tool plan delta - Cohere might send this before tool calls
+                if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
+                    console.log(`[Cohere] Tool plan delta received`, chunk);
+                }
+            }
+            else if (chunk.type === "message-start") {
+                // Handle message start event
+                if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
+                    console.log(`[Cohere] Message start event received`, chunk);
+                }
+            }
+            else if (chunk.type === "message-end") {
+                // Handle message end event
+                if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
+                    console.log(`[Cohere] Message end event received`, chunk);
+                }
+            }
         }
         if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
-            console.log(`✅ [Cohere] Complete. Total tokens: ${tokenCount} | Message length: ${fullMessage.length}`);
+            console.log(`✅ [Cohere] Complete. Total tokens: ${tokenCount} | Message length: ${fullMessage.length} | Tool calls: ${toolCalls.length}`);
         }
+        // Emit final complete event
+        onEvent({
+            type: "complete",
+            tokens: tokenCount,
+        });
         onComplete(fullMessage, toolCalls);
     }
     catch (error) {
@@ -1589,7 +1904,7 @@ onEvent, onComplete) {
  * Stream with Mistral SDK
  */
 export async function streamWithMistral(specification, messages, tools, mistralClient, // Mistral client instance
-onEvent, onComplete) {
+onEvent, onComplete, abortSignal) {
     let fullMessage = "";
     let toolCalls = [];
     // Performance metrics
@@ -1603,6 +1918,19 @@ onEvent, onComplete) {
         }
         if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
             console.log(`🤖 [Mistral] Model Config: Service=Mistral | Model=${modelName} | Temperature=${specification.mistral?.temperature} | Tools=${tools?.length || 0} | Spec="${specification.name}"`);
+            console.log(`🔍 [Mistral] Messages being sent (${messages.length} total):`);
+            messages.forEach((msg, idx) => {
+                const msgWithTools = msg;
+                console.log(`  Message ${idx}: role=${msg.role}, hasContent=${!!msg.content}, hasToolCalls=${!!msgWithTools.tool_calls}, tool_call_id=${msgWithTools.tool_call_id}`);
+                if (msgWithTools.tool_calls) {
+                    console.log(`    Tool calls: ${JSON.stringify(msgWithTools.tool_calls)}`);
+                }
+                if (msgWithTools.tool_call_id) {
+                    console.log(`    Tool call ID: ${msgWithTools.tool_call_id}`);
+                }
+                // Log full message for debugging
+                console.log(`    Full message: ${JSON.stringify(msg)}`);
+            });
         }
         const streamConfig = {
             model: modelName,
@@ -1620,8 +1948,100 @@ onEvent, onComplete) {
                 },
             }));
         }
-        const stream = await mistralClient.chat.stream(streamConfig);
+        if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
+            console.log(`[Mistral] Stream config:`, JSON.stringify({
+                ...streamConfig,
+                messages: streamConfig.messages.map((m) => ({
+                    role: m.role,
+                    contentLength: typeof m.content === "string"
+                        ? m.content.length
+                        : m.content?.length || 0,
+                    hasToolCalls: !!m.tool_calls,
+                    toolCallsCount: m.tool_calls?.length || 0,
+                    toolCallId: m.tool_call_id,
+                })),
+            }, null, 2));
+            // Log full messages for debugging tool issues
+            if (messages.some((m) => m.role === "tool" || m.tool_calls)) {
+                console.log(`[Mistral] Full messages for tool debugging:`, JSON.stringify(messages, null, 2));
+            }
+        }
+        let stream;
+        try {
+            // Log the full config for debugging tool issues
+            if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
+                console.log(`[Mistral] About to call stream with:`, {
+                    model: streamConfig.model,
+                    messageCount: streamConfig.messages.length,
+                    hasTools: !!(streamConfig.tools && streamConfig.tools.length > 0),
+                    toolCount: streamConfig.tools?.length || 0,
+                });
+                // Log the EXACT payload being sent to Mistral API
+                console.log(`[Mistral] EXACT API payload:`, JSON.stringify(streamConfig, null, 2));
+                // Check for tool call/result mismatches
+                const toolCallMessages = streamConfig.messages.filter((m) => m.tool_calls?.length > 0);
+                const toolResultMessages = streamConfig.messages.filter((m) => m.role === "tool");
+                if (toolCallMessages.length > 0 || toolResultMessages.length > 0) {
+                    console.log(`[Mistral] Tool message analysis:`, {
+                        toolCallMessages: toolCallMessages.length,
+                        toolResultMessages: toolResultMessages.length,
+                        toolCallsTotal: toolCallMessages.reduce((sum, m) => sum + (m.tool_calls?.length || 0), 0),
+                    });
+                }
+            }
+            stream = await mistralClient.chat.stream({
+                ...streamConfig,
+                ...(abortSignal && { signal: abortSignal }),
+            });
+        }
+        catch (error) {
+            console.error(`[Mistral] Failed to create stream:`, error);
+            // Better error handling for tool mismatch
+            if (error.message?.includes("Not the same number of function calls and responses")) {
+                console.error(`[Mistral] Tool call/response mismatch detected. This usually happens when there are unmatched tool calls in the conversation history.`);
+                // Analyze the messages to find the mismatch
+                const toolCallIds = new Set();
+                const toolResponseIds = new Set();
+                messages.forEach((msg, idx) => {
+                    const msgWithTools = msg;
+                    if (msg.role === "assistant" && msgWithTools.tool_calls) {
+                        msgWithTools.tool_calls.forEach((tc) => {
+                            toolCallIds.add(tc.id);
+                            console.error(`  Message ${idx}: Assistant has tool call with id: ${tc.id}`);
+                        });
+                    }
+                    if (msg.role === "tool") {
+                        // Check both camelCase and snake_case versions
+                        const toolId = msgWithTools.tool_call_id;
+                        if (toolId) {
+                            toolResponseIds.add(toolId);
+                            console.error(`  Message ${idx}: Tool response for id: ${toolId}`);
+                        }
+                        else {
+                            console.error(`  Message ${idx}: Tool response missing ID!`);
+                        }
+                    }
+                });
+                console.error(`[Mistral] Tool call IDs: ${Array.from(toolCallIds).join(", ")}`);
+                console.error(`[Mistral] Tool response IDs: ${Array.from(toolResponseIds).join(", ")}`);
+                // Find mismatches
+                const unmatchedCalls = Array.from(toolCallIds).filter((id) => !toolResponseIds.has(id));
+                const unmatchedResponses = Array.from(toolResponseIds).filter((id) => !toolCallIds.has(id));
+                if (unmatchedCalls.length > 0) {
+                    console.error(`[Mistral] Tool calls without responses: ${unmatchedCalls.join(", ")}`);
+                }
+                if (unmatchedResponses.length > 0) {
+                    console.error(`[Mistral] Tool responses without calls: ${unmatchedResponses.join(", ")}`);
+                }
+            }
+            throw new Error(`Mistral streaming failed to start: ${error.message || "Unknown error"}`);
+        }
+        let chunkCount = 0;
         for await (const chunk of stream) {
+            chunkCount++;
+            if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
+                console.log(`[Mistral] Raw chunk:`, JSON.stringify(chunk, null, 2));
+            }
             const delta = chunk.data.choices[0]?.delta;
             if (delta?.content) {
                 fullMessage += delta.content;
@@ -1637,34 +2057,53 @@ onEvent, onComplete) {
                     token: delta.content,
                 });
             }
-            // Handle tool calls
-            if (delta?.tool_calls) {
-                for (const toolCallDelta of delta.tool_calls) {
+            // Handle tool calls (Mistral uses camelCase 'toolCalls' not 'tool_calls')
+            if (delta?.toolCalls || delta?.tool_calls) {
+                const toolCallsArray = delta.toolCalls || delta.tool_calls;
+                for (const toolCallDelta of toolCallsArray) {
                     const index = toolCallDelta.index || 0;
+                    // Mistral sends complete tool calls in one chunk
                     if (!toolCalls[index]) {
                         toolCalls[index] = {
-                            id: toolCallDelta.id || `tool_${Date.now()}_${index}`,
-                            name: "",
-                            arguments: "",
+                            id: toolCallDelta.id ||
+                                toolCallDelta.function?.id ||
+                                `tool_${Date.now()}_${index}`,
+                            name: toolCallDelta.function?.name || "",
+                            arguments: toolCallDelta.function?.arguments || "",
                         };
+                        if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
+                            console.log(`[Mistral] Tool call received:`, toolCalls[index]);
+                        }
+                        // Emit start event
                         onEvent({
                             type: "tool_call_start",
                             toolCall: {
                                 id: toolCalls[index].id,
-                                name: toolCallDelta.function?.name || "",
+                                name: toolCalls[index].name,
                             },
                         });
+                        // If arguments are already complete (Mistral sends them all at once)
+                        if (toolCalls[index].arguments) {
+                            onEvent({
+                                type: "tool_call_delta",
+                                toolCallId: toolCalls[index].id,
+                                argumentDelta: toolCalls[index].arguments,
+                            });
+                        }
                     }
-                    if (toolCallDelta.function?.name) {
-                        toolCalls[index].name = toolCallDelta.function.name;
-                    }
-                    if (toolCallDelta.function?.arguments) {
-                        toolCalls[index].arguments += toolCallDelta.function.arguments;
-                        onEvent({
-                            type: "tool_call_delta",
-                            toolCallId: toolCalls[index].id,
-                            argumentDelta: toolCallDelta.function.arguments,
-                        });
+                    else {
+                        // Update existing tool call (though Mistral typically sends complete calls)
+                        if (toolCallDelta.function?.name) {
+                            toolCalls[index].name = toolCallDelta.function.name;
+                        }
+                        if (toolCallDelta.function?.arguments) {
+                            toolCalls[index].arguments += toolCallDelta.function.arguments;
+                            onEvent({
+                                type: "tool_call_delta",
+                                toolCallId: toolCalls[index].id,
+                                argumentDelta: toolCallDelta.function.arguments,
+                            });
+                        }
                     }
                 }
             }
@@ -1677,21 +2116,39 @@ onEvent, onComplete) {
                     toolCall,
                 });
             }
+            else {
+                console.warn(`[Mistral] Skipping tool call with invalid JSON: ${toolCall.name}`, toolCall.arguments);
+            }
         }
         if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
-            console.log(`✅ [Mistral] Complete. Total tokens: ${tokenCount} | Message length: ${fullMessage.length}`);
+            console.log(`✅ [Mistral] Complete. Chunks: ${chunkCount} | Tokens: ${tokenCount} | Message length: ${fullMessage.length} | Tool calls: ${toolCalls.length}`);
         }
         onComplete(fullMessage, toolCalls);
     }
     catch (error) {
-        throw error;
+        if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
+            console.error(`❌ [Mistral] Streaming error:`, error.message || error, error.stack);
+        }
+        // Check for common Mistral errors
+        if (error.message?.includes("401") ||
+            error.message?.includes("Unauthorized")) {
+            throw new Error("Mistral API authentication failed. Please check your MISTRAL_API_KEY.");
+        }
+        if (error.message?.includes("429") ||
+            error.message?.includes("rate limit")) {
+            const rateLimitError = new Error("Mistral API rate limit exceeded. Please try again later.");
+            rateLimitError.statusCode = 429;
+            throw rateLimitError;
+        }
+        // Re-throw with more context
+        throw new Error(`Mistral streaming failed: ${error.message || "Unknown error"}`);
     }
 }
 /**
  * Stream with Bedrock SDK (for Claude models)
  */
 export async function streamWithBedrock(specification, messages, systemPrompt, tools, bedrockClient, // BedrockRuntimeClient instance
-onEvent, onComplete) {
+onEvent, onComplete, abortSignal) {
     let fullMessage = "";
     let toolCalls = [];
     // Map contentBlockIndex to tool calls for proper correlation
@@ -1700,6 +2157,12 @@ onEvent, onComplete) {
     const startTime = Date.now();
     let firstTokenTime = 0;
     let tokenCount = 0;
+    // Reasoning detection state
+    let isInThinkingTag = false;
+    let reasoningContent = "";
+    let currentContent = "";
+    const THINKING_START = "<thinking>";
+    const THINKING_END = "</thinking>";
     try {
         if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
             console.log(`🔍 [Bedrock] Specification object:`, JSON.stringify(specification, null, 2));
@@ -1767,7 +2230,9 @@ onEvent, onComplete) {
             console.log(`🔍 [Bedrock] Converse request:`, JSON.stringify(request, null, 2));
         }
         const command = new ConverseStreamCommand(request);
-        const response = await bedrockClient.send(command);
+        const response = await bedrockClient.send(command, {
+            ...(abortSignal && { abortSignal }),
+        });
         if (response.stream) {
             for await (const event of response.stream) {
                 if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
@@ -1779,7 +2244,6 @@ onEvent, onComplete) {
                     const contentIndex = event.contentBlockDelta.contentBlockIndex;
                     if (delta?.text) {
                         const text = delta.text;
-                        fullMessage += text;
                         tokenCount++;
                         if (firstTokenTime === 0) {
                             firstTokenTime = Date.now() - startTime;
@@ -1787,10 +2251,69 @@ onEvent, onComplete) {
                                 console.log(`⚡ [Bedrock] Time to First Token: ${firstTokenTime}ms`);
                             }
                         }
-                        onEvent({
-                            type: "token",
-                            token: text,
-                        });
+                        // Accumulate content for thinking tag detection
+                        currentContent += text;
+                        // Check for thinking tags
+                        if (!isInThinkingTag && currentContent.includes(THINKING_START)) {
+                            const startIdx = currentContent.indexOf(THINKING_START);
+                            // Emit any content before the thinking tag
+                            const beforeThinking = currentContent.substring(0, startIdx);
+                            if (beforeThinking) {
+                                fullMessage += beforeThinking;
+                                onEvent({ type: "token", token: beforeThinking });
+                            }
+                            // Start reasoning mode
+                            isInThinkingTag = true;
+                            onEvent({ type: "reasoning_start", format: "thinking_tag" });
+                            // Process any content after the tag
+                            currentContent = currentContent.substring(startIdx + THINKING_START.length);
+                            reasoningContent = "";
+                        }
+                        if (isInThinkingTag) {
+                            // Check for end of thinking
+                            const endIdx = currentContent.indexOf(THINKING_END);
+                            if (endIdx !== -1) {
+                                // Add content up to the end tag
+                                reasoningContent += currentContent.substring(0, endIdx);
+                                // Emit final reasoning update
+                                onEvent({
+                                    type: "reasoning_delta",
+                                    content: currentContent.substring(0, endIdx),
+                                    format: "thinking_tag",
+                                });
+                                onEvent({
+                                    type: "reasoning_end",
+                                    fullContent: reasoningContent,
+                                });
+                                // Exit reasoning mode
+                                isInThinkingTag = false;
+                                // Continue with remaining content
+                                currentContent = currentContent.substring(endIdx + THINKING_END.length);
+                                // Process any remaining text as normal content
+                                if (currentContent) {
+                                    fullMessage += currentContent;
+                                    onEvent({ type: "token", token: currentContent });
+                                    currentContent = "";
+                                }
+                            }
+                            else {
+                                // Still in thinking mode, accumulate reasoning
+                                reasoningContent += currentContent;
+                                onEvent({
+                                    type: "reasoning_delta",
+                                    content: currentContent,
+                                    format: "thinking_tag",
+                                });
+                                currentContent = "";
+                            }
+                        }
+                        else {
+                            // Normal content mode
+                            fullMessage += currentContent;
+                            onEvent({ type: "token", token: currentContent });
+                            currentContent = "";
+                        }
+                        // Always emit the current full message (excluding reasoning)
                         onEvent({
                             type: "message",
                             message: fullMessage,