npm - graphlit-client - Versions diffs - 1.0.20250615003 → 1.0.20250615005 - Mend

graphlit-client 1.0.20250615003 → 1.0.20250615005

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/README.md +59 -54
package/dist/client.js +46 -7
package/dist/streaming/llm-formatters.d.ts +6 -4
package/dist/streaming/llm-formatters.js +21 -16
package/dist/streaming/providers.js +225 -51
package/dist/streaming/ui-event-adapter.d.ts +2 -0
package/dist/streaming/ui-event-adapter.js +23 -2
package/dist/types/agent.d.ts +7 -0
package/dist/types/internal.d.ts +8 -0
package/dist/types/ui-events.d.ts +20 -1
package/package.json +1 -1

package/README.md CHANGED Viewed

@@ -75,7 +75,7 @@ await client.streamAgent(
     }
   },
   undefined, // conversationId (optional)
-  { id: spec.createSpecification.id } // specification
+  { id: spec.createSpecification.id }, // specification
 );
 ```
@@ -134,7 +134,7 @@ GOOGLE_API_KEY=your_key
 # Additional streaming providers
 GROQ_API_KEY=your_key          # For Groq models (Llama, Mixtral)
-CEREBRAS_API_KEY=your_key      # For Cerebras models
+CEREBRAS_API_KEY=your_key      # For Cerebras models
 COHERE_API_KEY=your_key        # For Cohere Command models
 MISTRAL_API_KEY=your_key       # For Mistral models
 DEEPSEEK_API_KEY=your_key      # For Deepseek models
@@ -152,8 +152,9 @@ The SDK now includes automatic retry logic for network errors and transient fail
 ### Default Retry Configuration
 By default, the client will automatically retry on these status codes:
 - `429` - Too Many Requests
-- `502` - Bad Gateway
+- `502` - Bad Gateway
 - `503` - Service Unavailable
 - `504` - Gateway Timeout
@@ -168,19 +169,19 @@ Configure retry behavior to match your needs:
 ```typescript
 const client = new Graphlit({
   organizationId: "your_org_id",
-  environmentId: "your_env_id",
+  environmentId: "your_env_id",
   jwtSecret: "your_secret",
   retryConfig: {
-    maxAttempts: 10,              // Maximum retry attempts (default: 5)
-    initialDelay: 500,            // Initial delay in ms (default: 300)
-    maxDelay: 60000,              // Maximum delay in ms (default: 30000)
-    jitter: true,                 // Add randomness to delays (default: true)
+    maxAttempts: 10, // Maximum retry attempts (default: 5)
+    initialDelay: 500, // Initial delay in ms (default: 300)
+    maxDelay: 60000, // Maximum delay in ms (default: 30000)
+    jitter: true, // Add randomness to delays (default: true)
     retryableStatusCodes: [429, 500, 502, 503, 504], // Custom status codes
     onRetry: (attempt, error, operation) => {
       console.log(`Retry attempt ${attempt} for ${operation.operationName}`);
       console.log(`Error: ${error.message}`);
-    }
-  }
+    },
+  },
 });
 ```
@@ -196,7 +197,7 @@ const client = new Graphlit();
 client.setRetryConfig({
   maxAttempts: 20,
   initialDelay: 100,
-  retryableStatusCodes: [429, 500, 502, 503, 504, 521, 522, 524]
+  retryableStatusCodes: [429, 500, 502, 503, 504, 521, 522, 524],
 });
 ```
@@ -210,8 +211,8 @@ const client = new Graphlit({
   environmentId: "your_env_id",
   jwtSecret: "your_secret",
   retryConfig: {
-    maxAttempts: 1  // No retries
-  }
+    maxAttempts: 1, // No retries
+  },
 });
 ```
@@ -221,17 +222,17 @@ The Graphlit SDK supports real-time streaming responses from 9 different LLM pro
 ### Supported Providers
-| Provider | Models | SDK Required | API Key |
-|----------|--------|--------------|---------|
-| **OpenAI** | GPT-4, GPT-4o, GPT-4.1, O1, O3, O4 | `openai` | `OPENAI_API_KEY` |
-| **Anthropic** | Claude 3, Claude 3.5, Claude 3.7, Claude 4 | `@anthropic-ai/sdk` | `ANTHROPIC_API_KEY` |
-| **Google** | Gemini 1.5, Gemini 2.0, Gemini 2.5 | `@google/generative-ai` | `GOOGLE_API_KEY` |
-| **Groq** | Llama 4, Llama 3.3, Mixtral, Deepseek R1 | `groq-sdk` | `GROQ_API_KEY` |
-| **Cerebras** | Llama 3.3, Llama 3.1 | `openai` | `CEREBRAS_API_KEY` |
-| **Cohere** | Command R+, Command R, Command R7B, Command A | `cohere-ai` | `COHERE_API_KEY` |
-| **Mistral** | Mistral Large, Medium, Small, Nemo, Pixtral | `@mistralai/mistralai` | `MISTRAL_API_KEY` |
-| **AWS Bedrock** | Nova Premier/Pro, Claude 3.7, Llama 4 | `@aws-sdk/client-bedrock-runtime` | AWS credentials |
-| **Deepseek** | Deepseek Chat, Deepseek Reasoner | `openai` | `DEEPSEEK_API_KEY` |
+| Provider        | Models                                        | SDK Required                      | API Key             |
+| --------------- | --------------------------------------------- | --------------------------------- | ------------------- |
+| **OpenAI**      | GPT-4, GPT-4o, GPT-4.1, O1, O3, O4            | `openai`                          | `OPENAI_API_KEY`    |
+| **Anthropic**   | Claude 3, Claude 3.5, Claude 3.7, Claude 4    | `@anthropic-ai/sdk`               | `ANTHROPIC_API_KEY` |
+| **Google**      | Gemini 1.5, Gemini 2.0, Gemini 2.5            | `@google/generative-ai`           | `GOOGLE_API_KEY`    |
+| **Groq**        | Llama 4, Llama 3.3, Mixtral, Deepseek R1      | `groq-sdk`                        | `GROQ_API_KEY`      |
+| **Cerebras**    | Llama 3.3, Llama 3.1                          | `openai`                          | `CEREBRAS_API_KEY`  |
+| **Cohere**      | Command R+, Command R, Command R7B, Command A | `cohere-ai`                       | `COHERE_API_KEY`    |
+| **Mistral**     | Mistral Large, Medium, Small, Nemo, Pixtral   | `@mistralai/mistralai`            | `MISTRAL_API_KEY`   |
+| **AWS Bedrock** | Nova Premier/Pro, Claude 3.7, Llama 4         | `@aws-sdk/client-bedrock-runtime` | AWS credentials     |
+| **Deepseek**    | Deepseek Chat, Deepseek Reasoner              | `openai`                          | `DEEPSEEK_API_KEY`  |
 ### Setting Up Streaming
@@ -250,7 +251,9 @@ if (process.env.OPENAI_API_KEY) {
 if (process.env.COHERE_API_KEY) {
   const { CohereClient } = await import("cohere-ai");
-  client.setCohereClient(new CohereClient({ token: process.env.COHERE_API_KEY }));
+  client.setCohereClient(
+    new CohereClient({ token: process.env.COHERE_API_KEY }),
+  );
 }
 if (process.env.GROQ_API_KEY) {
@@ -263,9 +266,9 @@ const spec = await client.createSpecification({
   name: "Multi-Provider Assistant",
   type: Types.SpecificationTypes.Completion,
   serviceType: Types.ModelServiceTypes.Cohere, // or any supported provider
-  cohere: {
+  cohere: {
     model: Types.CohereModels.CommandRPlus,
-    temperature: 0.7
+    temperature: 0.7,
   },
 });
 ```
@@ -310,7 +313,7 @@ await client.streamAgent(
     }
   },
   undefined, // conversationId
-  { id: spec.createSpecification.id } // specification
+  { id: spec.createSpecification.id }, // specification
 );
 ```
@@ -338,7 +341,7 @@ const content = await client.ingestUri(
   "https://arxiv.org/pdf/1706.03762.pdf", // Attention Is All You Need paper
   "AI Research Paper", // name
   undefined, // id
-  true // isSynchronous - waits for processing
+  true, // isSynchronous - waits for processing
 );
 console.log(`✅ Uploaded: ${content.ingestUri.id}`);
@@ -360,7 +363,7 @@ await client.streamAgent(
     }
   },
   conversation.createConversation.id, // conversationId with content filter
-  { id: spec.createSpecification.id } // specification
+  { id: spec.createSpecification.id }, // specification
 );
 ```
@@ -374,7 +377,7 @@ const webpage = await client.ingestUri(
   "https://en.wikipedia.org/wiki/Artificial_intelligence", // uri
   "AI Wikipedia Page", // name
   undefined, // id
-  true // isSynchronous
+  true, // isSynchronous
 );
 // Wait for content to be indexed
@@ -389,7 +392,7 @@ const conversation = await client.createConversation({
 const response = await client.promptAgent(
   "Summarize the key points about AI from this Wikipedia page",
   conversation.createConversation.id, // conversationId with filter
-  { id: spec.createSpecification.id } // specification (create one as shown above)
+  { id: spec.createSpecification.id }, // specification (create one as shown above)
 );
 console.log(response.message);
@@ -412,7 +415,9 @@ if (process.env.OPENAI_API_KEY) {
 if (process.env.COHERE_API_KEY) {
   const { CohereClient } = await import("cohere-ai");
-  client.setCohereClient(new CohereClient({ token: process.env.COHERE_API_KEY }));
+  client.setCohereClient(
+    new CohereClient({ token: process.env.COHERE_API_KEY }),
+  );
 }
 if (process.env.GROQ_API_KEY) {
@@ -425,24 +430,24 @@ const providers = [
   {
     name: "OpenAI GPT-4o",
     serviceType: Types.ModelServiceTypes.OpenAi,
-    openAI: { model: Types.OpenAiModels.Gpt4O_128K }
+    openAI: { model: Types.OpenAiModels.Gpt4O_128K },
   },
   {
     name: "Cohere Command R+",
     serviceType: Types.ModelServiceTypes.Cohere,
-    cohere: { model: Types.CohereModels.CommandRPlus }
+    cohere: { model: Types.CohereModels.CommandRPlus },
   },
   {
     name: "Groq Llama",
     serviceType: Types.ModelServiceTypes.Groq,
-    groq: { model: Types.GroqModels.Llama_3_3_70B }
-  }
+    groq: { model: Types.GroqModels.Llama_3_3_70B },
+  },
 ];
 // Compare responses
 for (const provider of providers) {
   console.log(`\n🤖 ${provider.name}:`);
   const spec = await client.createSpecification({
     ...provider,
     type: Types.SpecificationTypes.Completion,
@@ -456,7 +461,7 @@ for (const provider of providers) {
       }
     },
     undefined,
-    { id: spec.createSpecification.id }
+    { id: spec.createSpecification.id },
   );
 }
 ```
@@ -518,7 +523,7 @@ await client.streamAgent(
   undefined, // conversationId
   { id: spec.createSpecification.id }, // specification
   [weatherTool], // tools
-  toolHandlers // handlers
+  toolHandlers, // handlers
 );
 ```
@@ -563,7 +568,7 @@ class KnowledgeAssistant {
         url, // uri
         url.split("/").pop() || "Document", // name
         undefined, // id
-        true // isSynchronous - wait for processing
+        true, // isSynchronous - wait for processing
       );
       this.contentIds.push(content.ingestUri.id);
     }
@@ -593,7 +598,7 @@ class KnowledgeAssistant {
         }
       },
       this.conversationId, // Maintains conversation context
-      { id: this.specificationId! } // specification
+      { id: this.specificationId! }, // specification
     );
   }
 }
@@ -623,7 +628,7 @@ const document = await client.ingestUri(
   "https://example.com/document.pdf", // uri
   "Document #12345", // name
   undefined, // id
-  true // isSynchronous
+  true, // isSynchronous
 );
 // Wait for content to be indexed
@@ -634,7 +639,7 @@ const extraction = await client.extractContents(
   "Extract the key information from this document",
   undefined, // tools
   undefined, // specification
-  { contents: [{ id: document.ingestUri.id }] } // filter
+  { contents: [{ id: document.ingestUri.id }] }, // filter
 );
 console.log("Extracted data:", extraction.extractContents);
@@ -653,7 +658,7 @@ for (const url of documentUrls) {
     url, // uri
     url.split("/").pop() || "Document", // name
     undefined, // id
-    true // isSynchronous
+    true, // isSynchronous
   );
   ids.push(content.ingestUri.id);
 }
@@ -666,7 +671,7 @@ const summary = await client.summarizeContents(
       prompt: "Create an executive summary of these documents",
     },
   ], // summarizations
-  { contents: ids.map((id) => ({ id })) } // filter
+  { contents: ids.map((id) => ({ id })) }, // filter
 );
 console.log("Summary:", summary.summarizeContents);
@@ -680,13 +685,13 @@ const content = await client.ingestUri(
   "https://example.com/large-document.pdf", // uri
   undefined, // name
   undefined, // id
-  true // isSynchronous
+  true, // isSynchronous
 );
 console.log("✅ Content ready!");
 // Option 2: Asynchronous processing (for large files)
 const content = await client.ingestUri(
-  "https://example.com/very-large-video.mp4" // uri
+  "https://example.com/very-large-video.mp4", // uri
   // isSynchronous defaults to false
 );
@@ -724,7 +729,7 @@ const result = await client.promptAgent(
   {
     // Only allow retrieval from specific content
     contents: [{ id: "content-id-1" }, { id: "content-id-2" }],
-  }
+  },
 );
 // Example 2: Streaming with content filter
@@ -745,7 +750,7 @@ await client.streamAgent(
   {
     // Filter by collection
     collections: [{ id: "technical-docs-collection" }],
-  }
+  },
 );
 ```
@@ -775,7 +780,7 @@ await client.streamAgent(
   {
     // Force this content into context
     contents: [{ id: fileContent.content.id }],
-  }
+  },
 );
 ```
@@ -801,7 +806,7 @@ await client.promptAgent(
   {
     // Always include the specific code file
     contents: [{ id: "implementation-file-id" }],
-  }
+  },
 );
 ```
@@ -846,7 +851,7 @@ await client.updateProject({
 // Now all content will be automatically summarized
 const content = await client.ingestUri(
-  "https://example.com/report.pdf" // uri
+  "https://example.com/report.pdf", // uri
 );
 ```
@@ -879,7 +884,7 @@ await client.streamAgent(
     }
   },
   undefined,
-  { id: conversationSpec.createSpecification.id }
+  { id: conversationSpec.createSpecification.id },
 );
 ```

package/dist/client.js CHANGED Viewed

@@ -133,7 +133,8 @@ class Graphlit {
     constructor(organizationIdOrOptions, environmentId, jwtSecret, ownerId, userId, apiUri) {
         // Handle both old constructor signature and new options object
         let options;
-        if (typeof organizationIdOrOptions === 'object' && organizationIdOrOptions !== null) {
+        if (typeof organizationIdOrOptions === "object" &&
+            organizationIdOrOptions !== null) {
             // New constructor with options object
             options = organizationIdOrOptions;
         }
@@ -145,7 +146,7 @@ class Graphlit {
                 jwtSecret,
                 ownerId,
                 userId,
-                apiUri
+                apiUri,
             };
         }
         this.apiUri =
@@ -158,7 +159,8 @@ class Graphlit {
             dotenv.config();
             this.organizationId =
                 options.organizationId || process.env.GRAPHLIT_ORGANIZATION_ID;
-            this.environmentId = options.environmentId || process.env.GRAPHLIT_ENVIRONMENT_ID;
+            this.environmentId =
+                options.environmentId || process.env.GRAPHLIT_ENVIRONMENT_ID;
             this.jwtSecret = options.jwtSecret || process.env.GRAPHLIT_JWT_SECRET;
             // optional: for multi-tenant support
             this.ownerId = options.ownerId || process.env.GRAPHLIT_OWNER_ID;
@@ -179,7 +181,7 @@ class Graphlit {
             maxDelay: 30000,
             retryableStatusCodes: [429, 502, 503, 504],
             jitter: true,
-            ...options.retryConfig
+            ...options.retryConfig,
         };
         if (!this.organizationId) {
             throw new Error("Graphlit organization identifier is required.");
@@ -223,7 +225,9 @@ class Graphlit {
                     if (statusCode && this.retryConfig.retryableStatusCodes) {
                         const shouldRetry = this.retryConfig.retryableStatusCodes.includes(statusCode);
                         // Call onRetry callback if provided
-                        if (shouldRetry && this.retryConfig.onRetry && _operation.getContext().retryCount !== undefined) {
+                        if (shouldRetry &&
+                            this.retryConfig.onRetry &&
+                            _operation.getContext().retryCount !== undefined) {
                             const attempt = _operation.getContext().retryCount + 1;
                             this.retryConfig.onRetry(attempt, error, _operation);
                         }
@@ -332,7 +336,7 @@ class Graphlit {
     setRetryConfig(retryConfig) {
         this.retryConfig = {
             ...this.retryConfig,
-            ...retryConfig
+            ...retryConfig,
         };
         // Refresh client to apply new retry configuration
         this.refreshClient();
@@ -1693,12 +1697,28 @@ class Graphlit {
             }
             // 2. Initial prompt
             const promptResponse = await this.promptConversation(prompt, actualConversationId, specification, mimeType, data, tools, false, // requireTool
-            false, // includeDetails
+            true, // includeDetails - needed for context window tracking
             correlationId);
             let currentMessage = promptResponse.promptConversation?.message;
             if (!currentMessage) {
                 throw new Error("No message in prompt response");
             }
+            // Calculate and return context window usage in result
+            const details = promptResponse.promptConversation?.details;
+            let contextWindowUsage;
+            if (details?.tokenLimit && details?.messages) {
+                // Sum up all message tokens
+                const usedTokens = details.messages.reduce((sum, msg) => sum + (msg?.tokens || 0), 0);
+                contextWindowUsage = {
+                    usedTokens,
+                    maxTokens: details.tokenLimit,
+                    percentage: Math.round((usedTokens / details.tokenLimit) * 100),
+                    remainingTokens: Math.max(0, details.tokenLimit - usedTokens),
+                };
+                if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
+                    console.log(`📊 [Context Window] Using ${usedTokens.toLocaleString()}/${details.tokenLimit.toLocaleString()} tokens (${Math.round((usedTokens / details.tokenLimit) * 100)}%)`);
+                }
+            }
             // 3. Tool calling loop
             const allToolCalls = [];
             let rounds = 0;
@@ -1755,6 +1775,7 @@ class Graphlit {
                 toolResults: allToolCalls,
                 metrics,
                 usage,
+                contextWindow: contextWindowUsage,
             };
         }
         catch (error) {
@@ -1952,6 +1973,24 @@ class Graphlit {
                 });
             }
         }
+        // Emit context window usage event
+        const details = formatResponse.formatConversation?.details;
+        if (details?.tokenLimit && details?.messages) {
+            // Sum up all message tokens
+            const usedTokens = details.messages.reduce((sum, msg) => sum + (msg?.tokens || 0), 0);
+            uiAdapter.handleEvent({
+                type: "context_window",
+                usage: {
+                    usedTokens,
+                    maxTokens: details.tokenLimit,
+                    percentage: Math.round((usedTokens / details.tokenLimit) * 100),
+                    remainingTokens: Math.max(0, details.tokenLimit - usedTokens),
+                },
+            });
+            if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
+                console.log(`📊 [Context Window] Using ${usedTokens.toLocaleString()}/${details.tokenLimit.toLocaleString()} tokens (${Math.round((usedTokens / details.tokenLimit) * 100)}%)`);
+            }
+        }
         // Build message array with conversation history
         const messages = [];
         // Add system prompt if specified

package/dist/streaming/llm-formatters.d.ts CHANGED Viewed

@@ -79,10 +79,13 @@ export declare function formatMessagesForAnthropic(messages: ConversationMessage
 export declare function formatMessagesForGoogle(messages: ConversationMessage[]): GoogleMessage[];
 /**
  * Cohere message format
+ * Note: For Cohere v7 SDK, messages are handled differently:
+ * - Current message is passed as 'message' parameter
+ * - Previous messages are passed as 'chatHistory' array
  */
 export interface CohereMessage {
-    role: "user" | "assistant" | "system" | "tool";
-    content: string;
+    role: "USER" | "CHATBOT" | "SYSTEM" | "TOOL";
+    message: string;
     tool_calls?: Array<{
         id: string;
         name: string;
@@ -90,12 +93,11 @@ export interface CohereMessage {
     }>;
     tool_results?: Array<{
         call: {
-            id: string;
             name: string;
             parameters: Record<string, any>;
         };
         outputs: Array<{
-            text: string;
+            output: string;
         }>;
     }>;
 }

package/dist/streaming/llm-formatters.js CHANGED Viewed

@@ -284,14 +284,14 @@ export function formatMessagesForCohere(messages) {
         switch (message.role) {
             case ConversationRoleTypes.System:
                 formattedMessages.push({
-                    role: "system",
-                    content: trimmedMessage,
+                    role: "SYSTEM",
+                    message: trimmedMessage,
                 });
                 break;
             case ConversationRoleTypes.Assistant:
                 const assistantMessage = {
-                    role: "assistant",
-                    content: trimmedMessage,
+                    role: "CHATBOT",
+                    message: trimmedMessage,
                 };
                 // Add tool calls if present
                 if (message.toolCalls && message.toolCalls.length > 0) {
@@ -300,32 +300,37 @@ export function formatMessagesForCohere(messages) {
                         .map((toolCall) => ({
                         id: toolCall.id,
                         name: toolCall.name,
-                        parameters: toolCall.arguments ? JSON.parse(toolCall.arguments) : {},
+                        parameters: toolCall.arguments
+                            ? JSON.parse(toolCall.arguments)
+                            : {},
                     }));
                 }
                 formattedMessages.push(assistantMessage);
                 break;
             case ConversationRoleTypes.Tool:
-                // Cohere expects tool results as tool messages
+                // Cohere expects tool results as TOOL messages
                 formattedMessages.push({
-                    role: "tool",
-                    content: trimmedMessage,
-                    tool_results: [{
+                    role: "TOOL",
+                    message: trimmedMessage,
+                    tool_results: [
+                        {
                             call: {
-                                id: message.toolCallId || "",
                                 name: "", // Would need to be tracked from the tool call
                                 parameters: {},
                             },
-                            outputs: [{
-                                    text: trimmedMessage,
-                                }],
-                        }],
+                            outputs: [
+                                {
+                                    output: trimmedMessage, // Changed from 'text' to 'output'
+                                },
+                            ],
+                        },
+                    ],
                 });
                 break;
             default: // User messages
                 formattedMessages.push({
-                    role: "user",
-                    content: trimmedMessage,
+                    role: "USER",
+                    message: trimmedMessage,
                 });
                 break;
         }

package/dist/streaming/providers.js CHANGED Viewed

@@ -321,6 +321,30 @@ onEvent, onComplete) {
         onComplete(fullMessage, toolCalls);
     }
     catch (error) {
+        // Handle OpenAI-specific errors
+        const errorMessage = error.message || error.toString();
+        // Check for rate limit errors
+        if (error.status === 429 ||
+            error.statusCode === 429 ||
+            error.code === "rate_limit_exceeded") {
+            if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
+                console.log(`⚠️ [OpenAI] Rate limit hit`);
+            }
+            const rateLimitError = new Error("OpenAI rate limit exceeded");
+            rateLimitError.statusCode = 429;
+            throw rateLimitError;
+        }
+        // Check for network errors
+        if (errorMessage.includes("fetch failed") ||
+            error.code === "ECONNRESET" ||
+            error.code === "ETIMEDOUT") {
+            if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
+                console.log(`⚠️ [OpenAI] Network error: ${errorMessage}`);
+            }
+            const networkError = new Error(`OpenAI network error: ${errorMessage}`);
+            networkError.statusCode = 503; // Service unavailable
+            throw networkError;
+        }
         // Don't emit error event here - let the client handle it to avoid duplicates
         throw error;
     }
@@ -631,6 +655,30 @@ onEvent, onComplete) {
         onComplete(fullMessage, validToolCalls);
     }
     catch (error) {
+        // Handle Anthropic-specific errors
+        const errorMessage = error.message || error.toString();
+        // Check for overloaded errors
+        if (error.type === "overloaded_error" ||
+            errorMessage.includes("Overloaded")) {
+            if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
+                console.log(`⚠️ [Anthropic] Service overloaded`);
+            }
+            // Treat overloaded as a rate limit error for retry logic
+            const overloadError = new Error("Anthropic service overloaded");
+            overloadError.statusCode = 503; // Service unavailable
+            throw overloadError;
+        }
+        // Check for rate limit errors
+        if (error.status === 429 ||
+            error.statusCode === 429 ||
+            error.type === "rate_limit_error") {
+            if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
+                console.log(`⚠️ [Anthropic] Rate limit hit`);
+            }
+            const rateLimitError = new Error("Anthropic rate limit exceeded");
+            rateLimitError.statusCode = 429;
+            throw rateLimitError;
+        }
         // Don't emit error event here - let the client handle it to avoid duplicates
         throw error;
     }
@@ -993,16 +1041,57 @@ onEvent, onComplete) {
  */
 export async function streamWithGroq(specification, messages, tools, groqClient, // Groq client instance (OpenAI-compatible)
 onEvent, onComplete) {
-    // Groq uses the same API as OpenAI, so we can reuse the OpenAI streaming logic
-    return streamWithOpenAI(specification, messages, tools, groqClient, onEvent, onComplete);
+    try {
+        // Groq uses the same API as OpenAI, so we can reuse the OpenAI streaming logic
+        return await streamWithOpenAI(specification, messages, tools, groqClient, onEvent, onComplete);
+    }
+    catch (error) {
+        // Handle Groq-specific errors
+        const errorMessage = error.message || error.toString();
+        // Check for tool calling errors
+        if (error.status === 400 &&
+            errorMessage.includes("Failed to call a function")) {
+            if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
+                console.log(`⚠️ [Groq] Tool calling error: ${errorMessage}`);
+            }
+            // Groq may have limitations with certain tool schemas
+            // Re-throw with a more descriptive error
+            throw new Error(`Groq tool calling error: ${errorMessage}. The model may not support the provided tool schema format.`);
+        }
+        // Handle rate limits
+        if (error.status === 429 || error.statusCode === 429) {
+            if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
+                console.log(`⚠️ [Groq] Rate limit hit (429)`);
+            }
+            const rateLimitError = new Error("Groq rate limit exceeded");
+            rateLimitError.statusCode = 429;
+            throw rateLimitError;
+        }
+        throw error;
+    }
 }
 /**
  * Stream with Cerebras SDK (OpenAI-compatible)
  */
 export async function streamWithCerebras(specification, messages, tools, cerebrasClient, // OpenAI client instance configured for Cerebras
 onEvent, onComplete) {
-    // Cerebras uses the same API as OpenAI, so we can reuse the OpenAI streaming logic
-    return streamWithOpenAI(specification, messages, tools, cerebrasClient, onEvent, onComplete);
+    try {
+        // Cerebras uses the same API as OpenAI, so we can reuse the OpenAI streaming logic
+        return await streamWithOpenAI(specification, messages, tools, cerebrasClient, onEvent, onComplete);
+    }
+    catch (error) {
+        // Handle Cerebras-specific 429 errors
+        if (error.status === 429 || error.statusCode === 429) {
+            if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
+                console.log(`⚠️ [Cerebras] Rate limit hit (429)`);
+            }
+            // Re-throw with proper status code for retry logic
+            const rateLimitError = new Error("Cerebras rate limit exceeded");
+            rateLimitError.statusCode = 429;
+            throw rateLimitError;
+        }
+        throw error;
+    }
 }
 /**
  * Stream with Deepseek SDK (OpenAI-compatible)
@@ -1037,7 +1126,7 @@ onEvent, onComplete) {
                 serviceType: specification.serviceType,
                 deepseek: specification.deepseek,
                 hasDeepseekModel: !!specification.deepseek?.model,
-                deepseekModelValue: specification.deepseek?.model
+                deepseekModelValue: specification.deepseek?.model,
             });
         }
         const modelName = getModelName(specification);
@@ -1046,7 +1135,7 @@ onEvent, onComplete) {
                 name: specification.name,
                 serviceType: specification.serviceType,
                 deepseek: specification.deepseek,
-                hasCustomModelName: !!specification.deepseek?.modelName
+                hasCustomModelName: !!specification.deepseek?.modelName,
             });
             throw new Error(`No model name found for specification: ${specification.name} (service: ${specification.serviceType})`);
         }
@@ -1109,9 +1198,12 @@ onEvent, onComplete) {
                 // Performance metrics tracking (internal only)
                 if (tokenCount % 10 === 0) {
                     const totalTokens = tokenCount + toolArgumentTokens;
-                    const tokensPerSecond = totalTokens > 0 ? totalTokens / ((currentTime - startTime) / 1000) : 0;
+                    const tokensPerSecond = totalTokens > 0
+                        ? totalTokens / ((currentTime - startTime) / 1000)
+                        : 0;
                     const avgInterTokenDelay = interTokenDelays.length > 0
-                        ? interTokenDelays.reduce((a, b) => a + b, 0) / interTokenDelays.length
+                        ? interTokenDelays.reduce((a, b) => a + b, 0) /
+                            interTokenDelays.length
                         : 0;
                 }
             }
@@ -1231,82 +1323,144 @@ onEvent, onComplete) {
         if (messages.length === 0) {
             throw new Error("No messages found for Cohere streaming");
         }
+        // Cohere v7 expects a single message and optional chatHistory
+        // Extract the last message as the current message
+        const lastMessage = messages[messages.length - 1];
+        const chatHistory = messages.slice(0, -1);
+        if (!lastMessage || !lastMessage.message) {
+            throw new Error("Last message must have message property for Cohere streaming");
+        }
+        // Build properly typed request using Cohere SDK types
         const streamConfig = {
             model: modelName,
-            messages: messages, // All messages in chronological order
+            message: lastMessage.message, // Current message (singular)
         };
+        // Add chat history if there are previous messages
+        if (chatHistory.length > 0) {
+            // Build properly typed chat history using Cohere SDK Message types
+            const cohereHistory = chatHistory.map((msg) => {
+                switch (msg.role) {
+                    case "USER":
+                        return {
+                            role: "USER",
+                            message: msg.message,
+                        };
+                    case "CHATBOT":
+                        const chatbotMsg = {
+                            role: "CHATBOT",
+                            message: msg.message,
+                        };
+                        // Add tool calls if present
+                        if (msg.tool_calls && msg.tool_calls.length > 0) {
+                            chatbotMsg.toolCalls = msg.tool_calls.map((tc) => ({
+                                name: tc.name,
+                                parameters: tc.parameters || {},
+                            }));
+                        }
+                        return chatbotMsg;
+                    case "SYSTEM":
+                        return {
+                            role: "SYSTEM",
+                            message: msg.message,
+                        };
+                    case "TOOL":
+                        return {
+                            role: "TOOL",
+                            toolResults: msg.tool_results || [],
+                        };
+                    default:
+                        // Fallback - treat as USER
+                        return {
+                            role: "USER",
+                            message: msg.message,
+                        };
+                }
+            });
+            streamConfig.chatHistory = cohereHistory;
+        }
         // Only add temperature if it's defined
-        if (specification.cohere?.temperature !== undefined) {
+        if (specification.cohere?.temperature !== undefined &&
+            specification.cohere.temperature !== null) {
             streamConfig.temperature = specification.cohere.temperature;
         }
         // Add tools if provided
         if (tools && tools.length > 0) {
-            streamConfig.tools = tools.map((tool) => {
+            const cohereTools = tools.map((tool) => {
                 if (!tool.schema) {
                     return {
-                        name: tool.name,
-                        description: tool.description,
-                        parameter_definitions: {},
+                        name: tool.name || "",
+                        description: tool.description || "",
+                        parameterDefinitions: {},
                     };
                 }
                 // Parse the JSON schema
                 const schema = JSON.parse(tool.schema);
                 // Convert JSON Schema to Cohere's expected format
-                const parameter_definitions = {};
+                const parameterDefinitions = {};
                 if (schema.properties) {
                     for (const [key, value] of Object.entries(schema.properties)) {
                         const prop = value;
                         const paramDef = {
-                            type: prop.type || "string",
+                            type: prop.type || "str",
                             description: prop.description || "",
                             required: schema.required?.includes(key) || false,
                         };
-                        // Add additional properties that Cohere might expect
-                        if (prop.enum) {
-                            paramDef.options = prop.enum;
-                        }
-                        if (prop.default !== undefined) {
-                            paramDef.default = prop.default;
-                        }
-                        if (prop.items) {
-                            paramDef.items = prop.items;
-                        }
-                        parameter_definitions[key] = paramDef;
+                        parameterDefinitions[key] = paramDef;
                     }
                 }
                 return {
-                    name: tool.name,
-                    description: tool.description,
-                    parameter_definitions, // Use snake_case as expected by Cohere API
+                    name: tool.name || "",
+                    description: tool.description || "",
+                    parameterDefinitions, // Use camelCase as expected by Cohere SDK
                 };
             });
+            streamConfig.tools = cohereTools;
         }
         if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
             console.log(`🔍 [Cohere] Final stream config:`, JSON.stringify(streamConfig, null, 2));
-            console.log(`🔍 [Cohere] Cohere client methods available:`, Object.getOwnPropertyNames(cohereClient));
-            console.log(`🔍 [Cohere] Has chatStream method:`, typeof cohereClient.chatStream === 'function');
-            console.log(`🔍 [Cohere] Has chat property:`, !!cohereClient.chat);
-            if (cohereClient.chat) {
-                console.log(`🔍 [Cohere] Chat methods:`, Object.getOwnPropertyNames(cohereClient.chat));
-            }
+            console.log(`🔍 [Cohere] Current message: "${streamConfig.message}"`);
+            console.log(`🔍 [Cohere] Chat history length: ${streamConfig.chatHistory?.length || 0}`);
+            console.log(`🔍 [Cohere] Has tools: ${!!streamConfig.tools}`);
             console.log(`⏱️ [Cohere] Starting stream request at: ${new Date().toISOString()}`);
         }
         let stream;
         try {
+            // Always log the full config when debugging Command A errors
+            if (modelName.includes("command-a") ||
+                process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
+                console.log(`🔍 [Cohere] Full streamConfig for ${modelName}:`, JSON.stringify(streamConfig, null, 2));
+            }
             stream = await cohereClient.chatStream(streamConfig);
         }
         catch (streamError) {
-            if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
-                console.error(`❌ [Cohere] Stream creation failed:`, streamError);
-                if (streamError.response) {
-                    console.error(`❌ [Cohere] Stream response status: ${streamError.response.status}`);
-                    console.error(`❌ [Cohere] Stream response data:`, streamError.response.data);
-                }
-                if (streamError.status) {
-                    console.error(`❌ [Cohere] Direct status: ${streamError.status}`);
+            // Enhanced error logging
+            console.error(`❌ [Cohere] Stream creation failed for model ${modelName}`);
+            console.error(`❌ [Cohere] Error type: ${streamError.constructor.name}`);
+            console.error(`❌ [Cohere] Status code: ${streamError.statusCode || streamError.status || "unknown"}`);
+            console.error(`❌ [Cohere] Error message: ${streamError.message}`);
+            // Try to read the body if it's a ReadableStream
+            if (streamError.body &&
+                typeof streamError.body.getReader === "function") {
+                try {
+                    const reader = streamError.body.getReader();
+                    let fullBody = "";
+                    while (true) {
+                        const { done, value } = await reader.read();
+                        if (done)
+                            break;
+                        fullBody += new TextDecoder().decode(value);
+                    }
+                    console.error(`❌ [Cohere] Raw error body:`, fullBody);
+                    try {
+                        const parsed = JSON.parse(fullBody);
+                        console.error(`❌ [Cohere] Parsed error details:`, JSON.stringify(parsed, null, 2));
+                    }
+                    catch (e) {
+                        console.error(`❌ [Cohere] Could not parse error body as JSON`);
+                    }
                 }
-                if (streamError.body) {
-                    console.error(`❌ [Cohere] Response body:`, streamError.body);
+                catch (e) {
+                    console.error(`❌ [Cohere] Could not read error body:`, e);
                 }
             }
             throw streamError;
@@ -1498,7 +1652,7 @@ onEvent, onComplete) {
                 name: specification.name,
                 serviceType: specification.serviceType,
                 bedrock: specification.bedrock,
-                hasCustomModelName: !!specification.bedrock?.modelName
+                hasCustomModelName: !!specification.bedrock?.modelName,
             });
             throw new Error(`No model name found for Bedrock specification: ${specification.name} (service: ${specification.serviceType}, bedrock.model: ${specification.bedrock?.model})`);
         }
@@ -1511,9 +1665,13 @@ onEvent, onComplete) {
         // The AWS SDK expects content as an array of content blocks
         const converseMessages = messages.map((msg) => ({
             role: msg.role,
-            content: [{
-                    text: typeof msg.content === 'string' ? msg.content : msg.content.toString()
-                }]
+            content: [
+                {
+                    text: typeof msg.content === "string"
+                        ? msg.content
+                        : msg.content.toString(),
+                },
+            ],
         }));
         // Prepare the request using Converse API format
         // Using 'any' type because:
@@ -1649,9 +1807,25 @@ onEvent, onComplete) {
         if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
             console.error(`❌ [Bedrock] Stream error:`, error);
         }
+        // Handle specific Bedrock errors
+        const errorMessage = error.message || error.toString();
+        const errorName = error.name || "";
+        // Check for throttling errors
+        if (errorName === "ThrottlingException" ||
+            errorMessage.includes("Too many tokens") ||
+            errorMessage.includes("Too many requests")) {
+            onEvent({
+                type: "error",
+                error: `Bedrock rate limit: ${errorMessage}`,
+            });
+            // Re-throw with a specific error type that the retry logic can handle
+            const rateLimitError = new Error(errorMessage);
+            rateLimitError.statusCode = 429; // Treat as rate limit
+            throw rateLimitError;
+        }
         onEvent({
             type: "error",
-            error: `Bedrock streaming error: ${error}`,
+            error: `Bedrock streaming error: ${errorMessage}`,
         });
         throw error;
     }

package/dist/streaming/ui-event-adapter.d.ts CHANGED Viewed

@@ -24,6 +24,7 @@ export declare class UIEventAdapter {
     private chunkBuffer?;
     private smoothingDelay;
     private chunkQueue;
+    private contextWindowUsage?;
     constructor(onEvent: (event: AgentStreamEvent) => void, conversationId: string, options?: {
         smoothingEnabled?: boolean;
         chunkingStrategy?: ChunkingStrategy;
@@ -49,6 +50,7 @@ export declare class UIEventAdapter {
     private emitNextChunk;
     private emitMessageUpdate;
     private emitUIEvent;
+    private handleContextWindow;
     /**
      * Clean up any pending timers
      */

package/dist/streaming/ui-event-adapter.js CHANGED Viewed

@@ -23,6 +23,7 @@ export class UIEventAdapter {
     chunkBuffer;
     smoothingDelay = 30;
     chunkQueue = []; // Queue of chunks waiting to be emitted
+    contextWindowUsage;
     constructor(onEvent, conversationId, options = {}) {
         this.onEvent = onEvent;
         this.conversationId = conversationId;
@@ -66,6 +67,9 @@ export class UIEventAdapter {
             case "error":
                 this.handleError(event.error);
                 break;
+            case "context_window":
+                this.handleContextWindow(event.usage);
+                break;
         }
     }
     handleStart(conversationId) {
@@ -323,11 +327,16 @@ export class UIEventAdapter {
                 finalMetrics.streamingThroughput = Math.round((this.currentMessage.length / streamingTime) * 1000);
             }
         }
-        this.emitUIEvent({
+        // Include context window usage if available
+        const event = {
             type: "conversation_completed",
             message: finalMessage,
             metrics: finalMetrics,
-        });
+        };
+        if (this.contextWindowUsage) {
+            event.contextWindow = this.contextWindowUsage;
+        }
+        this.emitUIEvent(event);
     }
     handleError(error) {
         this.isStreaming = false;
@@ -468,6 +477,18 @@ export class UIEventAdapter {
     emitUIEvent(event) {
         this.onEvent(event);
     }
+    handleContextWindow(usage) {
+        // Store for later inclusion in completion event
+        this.contextWindowUsage = usage;
+        if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
+            console.log(`📊 [UIEventAdapter] Context window: ${usage.usedTokens}/${usage.maxTokens} (${usage.percentage}%)`);
+        }
+        this.emitUIEvent({
+            type: "context_window",
+            usage,
+            timestamp: new Date(),
+        });
+    }
     /**
      * Clean up any pending timers
      */

package/dist/types/agent.d.ts CHANGED Viewed

@@ -13,6 +13,12 @@ export interface AgentMetrics {
     toolExecutions?: number;
     rounds?: number;
 }
+export interface ContextWindowUsage {
+    usedTokens: number;
+    maxTokens: number;
+    percentage: number;
+    remainingTokens: number;
+}
 export interface AgentResult {
     message: string;
     conversationId: string;
@@ -21,6 +27,7 @@ export interface AgentResult {
     toolResults?: ToolCallResult[];
     metrics?: AgentMetrics;
     usage?: UsageInfo;
+    contextWindow?: ContextWindowUsage;
     error?: AgentError;
 }
 export interface StreamAgentOptions {

package/dist/types/internal.d.ts CHANGED Viewed

@@ -49,4 +49,12 @@ export type StreamEvent = {
 } | {
     type: "error";
     error: string;
+} | {
+    type: "context_window";
+    usage: {
+        usedTokens: number;
+        maxTokens: number;
+        percentage: number;
+        remainingTokens: number;
+    };
 };

package/dist/types/ui-events.d.ts CHANGED Viewed

@@ -3,6 +3,19 @@ import { ConversationMessage, ConversationToolCall } from "../generated/graphql-
  * Tool execution status for streaming
  */
 export type ToolExecutionStatus = "preparing" | "executing" | "ready" | "completed" | "failed";
+/**
+ * Context window usage event - emitted at start of agent interaction
+ */
+export type ContextWindowEvent = {
+    type: "context_window";
+    usage: {
+        usedTokens: number;
+        maxTokens: number;
+        percentage: number;
+        remainingTokens: number;
+    };
+    timestamp: Date;
+};
 /**
  * Simplified UI-focused streaming events using GraphQL types
  */
@@ -11,7 +24,7 @@ export type AgentStreamEvent = {
     conversationId: string;
     timestamp: Date;
     model?: string;
-} | {
+} | ContextWindowEvent | {
     type: "message_update";
     message: Partial<ConversationMessage> & {
         message: string;
@@ -43,6 +56,12 @@ export type AgentStreamEvent = {
         avgTokenDelay?: number;
         streamingThroughput?: number;
     };
+    contextWindow?: {
+        usedTokens: number;
+        maxTokens: number;
+        percentage: number;
+        remainingTokens: number;
+    };
 } | {
     type: "error";
     error: {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "graphlit-client",
-  "version": "1.0.20250615003",
+  "version": "1.0.20250615005",
   "description": "Graphlit API Client for TypeScript",
   "type": "module",
   "main": "./dist/client.js",