npm - graphlit-client - Versions diffs - 1.0.20250615003 → 1.0.20250615004 - Mend

graphlit-client 1.0.20250615003 → 1.0.20250615004

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/README.md +59 -54
package/dist/client.js +10 -6
package/dist/streaming/llm-formatters.d.ts +6 -4
package/dist/streaming/llm-formatters.js +21 -16
package/dist/streaming/providers.js +141 -20
package/package.json +1 -1

package/README.md CHANGED Viewed

@@ -75,7 +75,7 @@ await client.streamAgent(
     }
   },
   undefined, // conversationId (optional)
-  { id: spec.createSpecification.id } // specification
+  { id: spec.createSpecification.id }, // specification
 );
 ```
@@ -134,7 +134,7 @@ GOOGLE_API_KEY=your_key
 # Additional streaming providers
 GROQ_API_KEY=your_key          # For Groq models (Llama, Mixtral)
-CEREBRAS_API_KEY=your_key      # For Cerebras models
+CEREBRAS_API_KEY=your_key      # For Cerebras models
 COHERE_API_KEY=your_key        # For Cohere Command models
 MISTRAL_API_KEY=your_key       # For Mistral models
 DEEPSEEK_API_KEY=your_key      # For Deepseek models
@@ -152,8 +152,9 @@ The SDK now includes automatic retry logic for network errors and transient fail
 ### Default Retry Configuration
 By default, the client will automatically retry on these status codes:
 - `429` - Too Many Requests
-- `502` - Bad Gateway
+- `502` - Bad Gateway
 - `503` - Service Unavailable
 - `504` - Gateway Timeout
@@ -168,19 +169,19 @@ Configure retry behavior to match your needs:
 ```typescript
 const client = new Graphlit({
   organizationId: "your_org_id",
-  environmentId: "your_env_id",
+  environmentId: "your_env_id",
   jwtSecret: "your_secret",
   retryConfig: {
-    maxAttempts: 10,              // Maximum retry attempts (default: 5)
-    initialDelay: 500,            // Initial delay in ms (default: 300)
-    maxDelay: 60000,              // Maximum delay in ms (default: 30000)
-    jitter: true,                 // Add randomness to delays (default: true)
+    maxAttempts: 10, // Maximum retry attempts (default: 5)
+    initialDelay: 500, // Initial delay in ms (default: 300)
+    maxDelay: 60000, // Maximum delay in ms (default: 30000)
+    jitter: true, // Add randomness to delays (default: true)
     retryableStatusCodes: [429, 500, 502, 503, 504], // Custom status codes
     onRetry: (attempt, error, operation) => {
       console.log(`Retry attempt ${attempt} for ${operation.operationName}`);
       console.log(`Error: ${error.message}`);
-    }
-  }
+    },
+  },
 });
 ```
@@ -196,7 +197,7 @@ const client = new Graphlit();
 client.setRetryConfig({
   maxAttempts: 20,
   initialDelay: 100,
-  retryableStatusCodes: [429, 500, 502, 503, 504, 521, 522, 524]
+  retryableStatusCodes: [429, 500, 502, 503, 504, 521, 522, 524],
 });
 ```
@@ -210,8 +211,8 @@ const client = new Graphlit({
   environmentId: "your_env_id",
   jwtSecret: "your_secret",
   retryConfig: {
-    maxAttempts: 1  // No retries
-  }
+    maxAttempts: 1, // No retries
+  },
 });
 ```
@@ -221,17 +222,17 @@ The Graphlit SDK supports real-time streaming responses from 9 different LLM pro
 ### Supported Providers
-| Provider | Models | SDK Required | API Key |
-|----------|--------|--------------|---------|
-| **OpenAI** | GPT-4, GPT-4o, GPT-4.1, O1, O3, O4 | `openai` | `OPENAI_API_KEY` |
-| **Anthropic** | Claude 3, Claude 3.5, Claude 3.7, Claude 4 | `@anthropic-ai/sdk` | `ANTHROPIC_API_KEY` |
-| **Google** | Gemini 1.5, Gemini 2.0, Gemini 2.5 | `@google/generative-ai` | `GOOGLE_API_KEY` |
-| **Groq** | Llama 4, Llama 3.3, Mixtral, Deepseek R1 | `groq-sdk` | `GROQ_API_KEY` |
-| **Cerebras** | Llama 3.3, Llama 3.1 | `openai` | `CEREBRAS_API_KEY` |
-| **Cohere** | Command R+, Command R, Command R7B, Command A | `cohere-ai` | `COHERE_API_KEY` |
-| **Mistral** | Mistral Large, Medium, Small, Nemo, Pixtral | `@mistralai/mistralai` | `MISTRAL_API_KEY` |
-| **AWS Bedrock** | Nova Premier/Pro, Claude 3.7, Llama 4 | `@aws-sdk/client-bedrock-runtime` | AWS credentials |
-| **Deepseek** | Deepseek Chat, Deepseek Reasoner | `openai` | `DEEPSEEK_API_KEY` |
+| Provider        | Models                                        | SDK Required                      | API Key             |
+| --------------- | --------------------------------------------- | --------------------------------- | ------------------- |
+| **OpenAI**      | GPT-4, GPT-4o, GPT-4.1, O1, O3, O4            | `openai`                          | `OPENAI_API_KEY`    |
+| **Anthropic**   | Claude 3, Claude 3.5, Claude 3.7, Claude 4    | `@anthropic-ai/sdk`               | `ANTHROPIC_API_KEY` |
+| **Google**      | Gemini 1.5, Gemini 2.0, Gemini 2.5            | `@google/generative-ai`           | `GOOGLE_API_KEY`    |
+| **Groq**        | Llama 4, Llama 3.3, Mixtral, Deepseek R1      | `groq-sdk`                        | `GROQ_API_KEY`      |
+| **Cerebras**    | Llama 3.3, Llama 3.1                          | `openai`                          | `CEREBRAS_API_KEY`  |
+| **Cohere**      | Command R+, Command R, Command R7B, Command A | `cohere-ai`                       | `COHERE_API_KEY`    |
+| **Mistral**     | Mistral Large, Medium, Small, Nemo, Pixtral   | `@mistralai/mistralai`            | `MISTRAL_API_KEY`   |
+| **AWS Bedrock** | Nova Premier/Pro, Claude 3.7, Llama 4         | `@aws-sdk/client-bedrock-runtime` | AWS credentials     |
+| **Deepseek**    | Deepseek Chat, Deepseek Reasoner              | `openai`                          | `DEEPSEEK_API_KEY`  |
 ### Setting Up Streaming
@@ -250,7 +251,9 @@ if (process.env.OPENAI_API_KEY) {
 if (process.env.COHERE_API_KEY) {
   const { CohereClient } = await import("cohere-ai");
-  client.setCohereClient(new CohereClient({ token: process.env.COHERE_API_KEY }));
+  client.setCohereClient(
+    new CohereClient({ token: process.env.COHERE_API_KEY }),
+  );
 }
 if (process.env.GROQ_API_KEY) {
@@ -263,9 +266,9 @@ const spec = await client.createSpecification({
   name: "Multi-Provider Assistant",
   type: Types.SpecificationTypes.Completion,
   serviceType: Types.ModelServiceTypes.Cohere, // or any supported provider
-  cohere: {
+  cohere: {
     model: Types.CohereModels.CommandRPlus,
-    temperature: 0.7
+    temperature: 0.7,
   },
 });
 ```
@@ -310,7 +313,7 @@ await client.streamAgent(
     }
   },
   undefined, // conversationId
-  { id: spec.createSpecification.id } // specification
+  { id: spec.createSpecification.id }, // specification
 );
 ```
@@ -338,7 +341,7 @@ const content = await client.ingestUri(
   "https://arxiv.org/pdf/1706.03762.pdf", // Attention Is All You Need paper
   "AI Research Paper", // name
   undefined, // id
-  true // isSynchronous - waits for processing
+  true, // isSynchronous - waits for processing
 );
 console.log(`✅ Uploaded: ${content.ingestUri.id}`);
@@ -360,7 +363,7 @@ await client.streamAgent(
     }
   },
   conversation.createConversation.id, // conversationId with content filter
-  { id: spec.createSpecification.id } // specification
+  { id: spec.createSpecification.id }, // specification
 );
 ```
@@ -374,7 +377,7 @@ const webpage = await client.ingestUri(
   "https://en.wikipedia.org/wiki/Artificial_intelligence", // uri
   "AI Wikipedia Page", // name
   undefined, // id
-  true // isSynchronous
+  true, // isSynchronous
 );
 // Wait for content to be indexed
@@ -389,7 +392,7 @@ const conversation = await client.createConversation({
 const response = await client.promptAgent(
   "Summarize the key points about AI from this Wikipedia page",
   conversation.createConversation.id, // conversationId with filter
-  { id: spec.createSpecification.id } // specification (create one as shown above)
+  { id: spec.createSpecification.id }, // specification (create one as shown above)
 );
 console.log(response.message);
@@ -412,7 +415,9 @@ if (process.env.OPENAI_API_KEY) {
 if (process.env.COHERE_API_KEY) {
   const { CohereClient } = await import("cohere-ai");
-  client.setCohereClient(new CohereClient({ token: process.env.COHERE_API_KEY }));
+  client.setCohereClient(
+    new CohereClient({ token: process.env.COHERE_API_KEY }),
+  );
 }
 if (process.env.GROQ_API_KEY) {
@@ -425,24 +430,24 @@ const providers = [
   {
     name: "OpenAI GPT-4o",
     serviceType: Types.ModelServiceTypes.OpenAi,
-    openAI: { model: Types.OpenAiModels.Gpt4O_128K }
+    openAI: { model: Types.OpenAiModels.Gpt4O_128K },
   },
   {
     name: "Cohere Command R+",
     serviceType: Types.ModelServiceTypes.Cohere,
-    cohere: { model: Types.CohereModels.CommandRPlus }
+    cohere: { model: Types.CohereModels.CommandRPlus },
   },
   {
     name: "Groq Llama",
     serviceType: Types.ModelServiceTypes.Groq,
-    groq: { model: Types.GroqModels.Llama_3_3_70B }
-  }
+    groq: { model: Types.GroqModels.Llama_3_3_70B },
+  },
 ];
 // Compare responses
 for (const provider of providers) {
   console.log(`\n🤖 ${provider.name}:`);
   const spec = await client.createSpecification({
     ...provider,
     type: Types.SpecificationTypes.Completion,
@@ -456,7 +461,7 @@ for (const provider of providers) {
       }
     },
     undefined,
-    { id: spec.createSpecification.id }
+    { id: spec.createSpecification.id },
   );
 }
 ```
@@ -518,7 +523,7 @@ await client.streamAgent(
   undefined, // conversationId
   { id: spec.createSpecification.id }, // specification
   [weatherTool], // tools
-  toolHandlers // handlers
+  toolHandlers, // handlers
 );
 ```
@@ -563,7 +568,7 @@ class KnowledgeAssistant {
         url, // uri
         url.split("/").pop() || "Document", // name
         undefined, // id
-        true // isSynchronous - wait for processing
+        true, // isSynchronous - wait for processing
       );
       this.contentIds.push(content.ingestUri.id);
     }
@@ -593,7 +598,7 @@ class KnowledgeAssistant {
         }
       },
       this.conversationId, // Maintains conversation context
-      { id: this.specificationId! } // specification
+      { id: this.specificationId! }, // specification
     );
   }
 }
@@ -623,7 +628,7 @@ const document = await client.ingestUri(
   "https://example.com/document.pdf", // uri
   "Document #12345", // name
   undefined, // id
-  true // isSynchronous
+  true, // isSynchronous
 );
 // Wait for content to be indexed
@@ -634,7 +639,7 @@ const extraction = await client.extractContents(
   "Extract the key information from this document",
   undefined, // tools
   undefined, // specification
-  { contents: [{ id: document.ingestUri.id }] } // filter
+  { contents: [{ id: document.ingestUri.id }] }, // filter
 );
 console.log("Extracted data:", extraction.extractContents);
@@ -653,7 +658,7 @@ for (const url of documentUrls) {
     url, // uri
     url.split("/").pop() || "Document", // name
     undefined, // id
-    true // isSynchronous
+    true, // isSynchronous
   );
   ids.push(content.ingestUri.id);
 }
@@ -666,7 +671,7 @@ const summary = await client.summarizeContents(
       prompt: "Create an executive summary of these documents",
     },
   ], // summarizations
-  { contents: ids.map((id) => ({ id })) } // filter
+  { contents: ids.map((id) => ({ id })) }, // filter
 );
 console.log("Summary:", summary.summarizeContents);
@@ -680,13 +685,13 @@ const content = await client.ingestUri(
   "https://example.com/large-document.pdf", // uri
   undefined, // name
   undefined, // id
-  true // isSynchronous
+  true, // isSynchronous
 );
 console.log("✅ Content ready!");
 // Option 2: Asynchronous processing (for large files)
 const content = await client.ingestUri(
-  "https://example.com/very-large-video.mp4" // uri
+  "https://example.com/very-large-video.mp4", // uri
   // isSynchronous defaults to false
 );
@@ -724,7 +729,7 @@ const result = await client.promptAgent(
   {
     // Only allow retrieval from specific content
     contents: [{ id: "content-id-1" }, { id: "content-id-2" }],
-  }
+  },
 );
 // Example 2: Streaming with content filter
@@ -745,7 +750,7 @@ await client.streamAgent(
   {
     // Filter by collection
     collections: [{ id: "technical-docs-collection" }],
-  }
+  },
 );
 ```
@@ -775,7 +780,7 @@ await client.streamAgent(
   {
     // Force this content into context
     contents: [{ id: fileContent.content.id }],
-  }
+  },
 );
 ```
@@ -801,7 +806,7 @@ await client.promptAgent(
   {
     // Always include the specific code file
     contents: [{ id: "implementation-file-id" }],
-  }
+  },
 );
 ```
@@ -846,7 +851,7 @@ await client.updateProject({
 // Now all content will be automatically summarized
 const content = await client.ingestUri(
-  "https://example.com/report.pdf" // uri
+  "https://example.com/report.pdf", // uri
 );
 ```
@@ -879,7 +884,7 @@ await client.streamAgent(
     }
   },
   undefined,
-  { id: conversationSpec.createSpecification.id }
+  { id: conversationSpec.createSpecification.id },
 );
 ```

package/dist/client.js CHANGED Viewed

@@ -133,7 +133,8 @@ class Graphlit {
     constructor(organizationIdOrOptions, environmentId, jwtSecret, ownerId, userId, apiUri) {
         // Handle both old constructor signature and new options object
         let options;
-        if (typeof organizationIdOrOptions === 'object' && organizationIdOrOptions !== null) {
+        if (typeof organizationIdOrOptions === "object" &&
+            organizationIdOrOptions !== null) {
             // New constructor with options object
             options = organizationIdOrOptions;
         }
@@ -145,7 +146,7 @@ class Graphlit {
                 jwtSecret,
                 ownerId,
                 userId,
-                apiUri
+                apiUri,
             };
         }
         this.apiUri =
@@ -158,7 +159,8 @@ class Graphlit {
             dotenv.config();
             this.organizationId =
                 options.organizationId || process.env.GRAPHLIT_ORGANIZATION_ID;
-            this.environmentId = options.environmentId || process.env.GRAPHLIT_ENVIRONMENT_ID;
+            this.environmentId =
+                options.environmentId || process.env.GRAPHLIT_ENVIRONMENT_ID;
             this.jwtSecret = options.jwtSecret || process.env.GRAPHLIT_JWT_SECRET;
             // optional: for multi-tenant support
             this.ownerId = options.ownerId || process.env.GRAPHLIT_OWNER_ID;
@@ -179,7 +181,7 @@ class Graphlit {
             maxDelay: 30000,
             retryableStatusCodes: [429, 502, 503, 504],
             jitter: true,
-            ...options.retryConfig
+            ...options.retryConfig,
         };
         if (!this.organizationId) {
             throw new Error("Graphlit organization identifier is required.");
@@ -223,7 +225,9 @@ class Graphlit {
                     if (statusCode && this.retryConfig.retryableStatusCodes) {
                         const shouldRetry = this.retryConfig.retryableStatusCodes.includes(statusCode);
                         // Call onRetry callback if provided
-                        if (shouldRetry && this.retryConfig.onRetry && _operation.getContext().retryCount !== undefined) {
+                        if (shouldRetry &&
+                            this.retryConfig.onRetry &&
+                            _operation.getContext().retryCount !== undefined) {
                             const attempt = _operation.getContext().retryCount + 1;
                             this.retryConfig.onRetry(attempt, error, _operation);
                         }
@@ -332,7 +336,7 @@ class Graphlit {
     setRetryConfig(retryConfig) {
         this.retryConfig = {
             ...this.retryConfig,
-            ...retryConfig
+            ...retryConfig,
         };
         // Refresh client to apply new retry configuration
         this.refreshClient();

package/dist/streaming/llm-formatters.d.ts CHANGED Viewed

@@ -79,10 +79,13 @@ export declare function formatMessagesForAnthropic(messages: ConversationMessage
 export declare function formatMessagesForGoogle(messages: ConversationMessage[]): GoogleMessage[];
 /**
  * Cohere message format
+ * Note: For Cohere v7 SDK, messages are handled differently:
+ * - Current message is passed as 'message' parameter
+ * - Previous messages are passed as 'chatHistory' array
  */
 export interface CohereMessage {
-    role: "user" | "assistant" | "system" | "tool";
-    content: string;
+    role: "USER" | "CHATBOT" | "SYSTEM" | "TOOL";
+    message: string;
     tool_calls?: Array<{
         id: string;
         name: string;
@@ -90,12 +93,11 @@ export interface CohereMessage {
     }>;
     tool_results?: Array<{
         call: {
-            id: string;
             name: string;
             parameters: Record<string, any>;
         };
         outputs: Array<{
-            text: string;
+            output: string;
         }>;
     }>;
 }

package/dist/streaming/llm-formatters.js CHANGED Viewed

@@ -284,14 +284,14 @@ export function formatMessagesForCohere(messages) {
         switch (message.role) {
             case ConversationRoleTypes.System:
                 formattedMessages.push({
-                    role: "system",
-                    content: trimmedMessage,
+                    role: "SYSTEM",
+                    message: trimmedMessage,
                 });
                 break;
             case ConversationRoleTypes.Assistant:
                 const assistantMessage = {
-                    role: "assistant",
-                    content: trimmedMessage,
+                    role: "CHATBOT",
+                    message: trimmedMessage,
                 };
                 // Add tool calls if present
                 if (message.toolCalls && message.toolCalls.length > 0) {
@@ -300,32 +300,37 @@ export function formatMessagesForCohere(messages) {
                         .map((toolCall) => ({
                         id: toolCall.id,
                         name: toolCall.name,
-                        parameters: toolCall.arguments ? JSON.parse(toolCall.arguments) : {},
+                        parameters: toolCall.arguments
+                            ? JSON.parse(toolCall.arguments)
+                            : {},
                     }));
                 }
                 formattedMessages.push(assistantMessage);
                 break;
             case ConversationRoleTypes.Tool:
-                // Cohere expects tool results as tool messages
+                // Cohere expects tool results as TOOL messages
                 formattedMessages.push({
-                    role: "tool",
-                    content: trimmedMessage,
-                    tool_results: [{
+                    role: "TOOL",
+                    message: trimmedMessage,
+                    tool_results: [
+                        {
                             call: {
-                                id: message.toolCallId || "",
                                 name: "", // Would need to be tracked from the tool call
                                 parameters: {},
                             },
-                            outputs: [{
-                                    text: trimmedMessage,
-                                }],
-                        }],
+                            outputs: [
+                                {
+                                    output: trimmedMessage, // Changed from 'text' to 'output'
+                                },
+                            ],
+                        },
+                    ],
                 });
                 break;
             default: // User messages
                 formattedMessages.push({
-                    role: "user",
-                    content: trimmedMessage,
+                    role: "USER",
+                    message: trimmedMessage,
                 });
                 break;
         }

package/dist/streaming/providers.js CHANGED Viewed

@@ -321,6 +321,30 @@ onEvent, onComplete) {
         onComplete(fullMessage, toolCalls);
     }
     catch (error) {
+        // Handle OpenAI-specific errors
+        const errorMessage = error.message || error.toString();
+        // Check for rate limit errors
+        if (error.status === 429 ||
+            error.statusCode === 429 ||
+            error.code === "rate_limit_exceeded") {
+            if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
+                console.log(`⚠️ [OpenAI] Rate limit hit`);
+            }
+            const rateLimitError = new Error("OpenAI rate limit exceeded");
+            rateLimitError.statusCode = 429;
+            throw rateLimitError;
+        }
+        // Check for network errors
+        if (errorMessage.includes("fetch failed") ||
+            error.code === "ECONNRESET" ||
+            error.code === "ETIMEDOUT") {
+            if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
+                console.log(`⚠️ [OpenAI] Network error: ${errorMessage}`);
+            }
+            const networkError = new Error(`OpenAI network error: ${errorMessage}`);
+            networkError.statusCode = 503; // Service unavailable
+            throw networkError;
+        }
         // Don't emit error event here - let the client handle it to avoid duplicates
         throw error;
     }
@@ -631,6 +655,30 @@ onEvent, onComplete) {
         onComplete(fullMessage, validToolCalls);
     }
     catch (error) {
+        // Handle Anthropic-specific errors
+        const errorMessage = error.message || error.toString();
+        // Check for overloaded errors
+        if (error.type === "overloaded_error" ||
+            errorMessage.includes("Overloaded")) {
+            if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
+                console.log(`⚠️ [Anthropic] Service overloaded`);
+            }
+            // Treat overloaded as a rate limit error for retry logic
+            const overloadError = new Error("Anthropic service overloaded");
+            overloadError.statusCode = 503; // Service unavailable
+            throw overloadError;
+        }
+        // Check for rate limit errors
+        if (error.status === 429 ||
+            error.statusCode === 429 ||
+            error.type === "rate_limit_error") {
+            if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
+                console.log(`⚠️ [Anthropic] Rate limit hit`);
+            }
+            const rateLimitError = new Error("Anthropic rate limit exceeded");
+            rateLimitError.statusCode = 429;
+            throw rateLimitError;
+        }
         // Don't emit error event here - let the client handle it to avoid duplicates
         throw error;
     }
@@ -993,16 +1041,57 @@ onEvent, onComplete) {
  */
 export async function streamWithGroq(specification, messages, tools, groqClient, // Groq client instance (OpenAI-compatible)
 onEvent, onComplete) {
-    // Groq uses the same API as OpenAI, so we can reuse the OpenAI streaming logic
-    return streamWithOpenAI(specification, messages, tools, groqClient, onEvent, onComplete);
+    try {
+        // Groq uses the same API as OpenAI, so we can reuse the OpenAI streaming logic
+        return await streamWithOpenAI(specification, messages, tools, groqClient, onEvent, onComplete);
+    }
+    catch (error) {
+        // Handle Groq-specific errors
+        const errorMessage = error.message || error.toString();
+        // Check for tool calling errors
+        if (error.status === 400 &&
+            errorMessage.includes("Failed to call a function")) {
+            if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
+                console.log(`⚠️ [Groq] Tool calling error: ${errorMessage}`);
+            }
+            // Groq may have limitations with certain tool schemas
+            // Re-throw with a more descriptive error
+            throw new Error(`Groq tool calling error: ${errorMessage}. The model may not support the provided tool schema format.`);
+        }
+        // Handle rate limits
+        if (error.status === 429 || error.statusCode === 429) {
+            if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
+                console.log(`⚠️ [Groq] Rate limit hit (429)`);
+            }
+            const rateLimitError = new Error("Groq rate limit exceeded");
+            rateLimitError.statusCode = 429;
+            throw rateLimitError;
+        }
+        throw error;
+    }
 }
 /**
  * Stream with Cerebras SDK (OpenAI-compatible)
  */
 export async function streamWithCerebras(specification, messages, tools, cerebrasClient, // OpenAI client instance configured for Cerebras
 onEvent, onComplete) {
-    // Cerebras uses the same API as OpenAI, so we can reuse the OpenAI streaming logic
-    return streamWithOpenAI(specification, messages, tools, cerebrasClient, onEvent, onComplete);
+    try {
+        // Cerebras uses the same API as OpenAI, so we can reuse the OpenAI streaming logic
+        return await streamWithOpenAI(specification, messages, tools, cerebrasClient, onEvent, onComplete);
+    }
+    catch (error) {
+        // Handle Cerebras-specific 429 errors
+        if (error.status === 429 || error.statusCode === 429) {
+            if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
+                console.log(`⚠️ [Cerebras] Rate limit hit (429)`);
+            }
+            // Re-throw with proper status code for retry logic
+            const rateLimitError = new Error("Cerebras rate limit exceeded");
+            rateLimitError.statusCode = 429;
+            throw rateLimitError;
+        }
+        throw error;
+    }
 }
 /**
  * Stream with Deepseek SDK (OpenAI-compatible)
@@ -1037,7 +1126,7 @@ onEvent, onComplete) {
                 serviceType: specification.serviceType,
                 deepseek: specification.deepseek,
                 hasDeepseekModel: !!specification.deepseek?.model,
-                deepseekModelValue: specification.deepseek?.model
+                deepseekModelValue: specification.deepseek?.model,
             });
         }
         const modelName = getModelName(specification);
@@ -1046,7 +1135,7 @@ onEvent, onComplete) {
                 name: specification.name,
                 serviceType: specification.serviceType,
                 deepseek: specification.deepseek,
-                hasCustomModelName: !!specification.deepseek?.modelName
+                hasCustomModelName: !!specification.deepseek?.modelName,
             });
             throw new Error(`No model name found for specification: ${specification.name} (service: ${specification.serviceType})`);
         }
@@ -1109,9 +1198,12 @@ onEvent, onComplete) {
                 // Performance metrics tracking (internal only)
                 if (tokenCount % 10 === 0) {
                     const totalTokens = tokenCount + toolArgumentTokens;
-                    const tokensPerSecond = totalTokens > 0 ? totalTokens / ((currentTime - startTime) / 1000) : 0;
+                    const tokensPerSecond = totalTokens > 0
+                        ? totalTokens / ((currentTime - startTime) / 1000)
+                        : 0;
                     const avgInterTokenDelay = interTokenDelays.length > 0
-                        ? interTokenDelays.reduce((a, b) => a + b, 0) / interTokenDelays.length
+                        ? interTokenDelays.reduce((a, b) => a + b, 0) /
+                            interTokenDelays.length
                         : 0;
                 }
             }
@@ -1231,10 +1323,22 @@ onEvent, onComplete) {
         if (messages.length === 0) {
             throw new Error("No messages found for Cohere streaming");
         }
+        // Cohere v7 expects a single message and optional chatHistory
+        // Extract the last message as the current message
+        const lastMessage = messages[messages.length - 1];
+        const chatHistory = messages.slice(0, -1);
+        if (!lastMessage || !lastMessage.message) {
+            throw new Error("Last message must have message property for Cohere streaming");
+        }
         const streamConfig = {
             model: modelName,
-            messages: messages, // All messages in chronological order
+            message: lastMessage.message, // Current message (singular)
         };
+        // Add chat history if there are previous messages
+        if (chatHistory.length > 0) {
+            // Messages already have 'message' property from formatter
+            streamConfig.chatHistory = chatHistory;
+        }
         // Only add temperature if it's defined
         if (specification.cohere?.temperature !== undefined) {
             streamConfig.temperature = specification.cohere.temperature;
@@ -1283,12 +1387,9 @@ onEvent, onComplete) {
         }
         if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
             console.log(`🔍 [Cohere] Final stream config:`, JSON.stringify(streamConfig, null, 2));
-            console.log(`🔍 [Cohere] Cohere client methods available:`, Object.getOwnPropertyNames(cohereClient));
-            console.log(`🔍 [Cohere] Has chatStream method:`, typeof cohereClient.chatStream === 'function');
-            console.log(`🔍 [Cohere] Has chat property:`, !!cohereClient.chat);
-            if (cohereClient.chat) {
-                console.log(`🔍 [Cohere] Chat methods:`, Object.getOwnPropertyNames(cohereClient.chat));
-            }
+            console.log(`🔍 [Cohere] Current message: "${streamConfig.message}"`);
+            console.log(`🔍 [Cohere] Chat history length: ${streamConfig.chatHistory?.length || 0}`);
+            console.log(`🔍 [Cohere] Has tools: ${!!streamConfig.tools}`);
             console.log(`⏱️ [Cohere] Starting stream request at: ${new Date().toISOString()}`);
         }
         let stream;
@@ -1498,7 +1599,7 @@ onEvent, onComplete) {
                 name: specification.name,
                 serviceType: specification.serviceType,
                 bedrock: specification.bedrock,
-                hasCustomModelName: !!specification.bedrock?.modelName
+                hasCustomModelName: !!specification.bedrock?.modelName,
             });
             throw new Error(`No model name found for Bedrock specification: ${specification.name} (service: ${specification.serviceType}, bedrock.model: ${specification.bedrock?.model})`);
         }
@@ -1511,9 +1612,13 @@ onEvent, onComplete) {
         // The AWS SDK expects content as an array of content blocks
         const converseMessages = messages.map((msg) => ({
             role: msg.role,
-            content: [{
-                    text: typeof msg.content === 'string' ? msg.content : msg.content.toString()
-                }]
+            content: [
+                {
+                    text: typeof msg.content === "string"
+                        ? msg.content
+                        : msg.content.toString(),
+                },
+            ],
         }));
         // Prepare the request using Converse API format
         // Using 'any' type because:
@@ -1649,9 +1754,25 @@ onEvent, onComplete) {
         if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
             console.error(`❌ [Bedrock] Stream error:`, error);
         }
+        // Handle specific Bedrock errors
+        const errorMessage = error.message || error.toString();
+        const errorName = error.name || "";
+        // Check for throttling errors
+        if (errorName === "ThrottlingException" ||
+            errorMessage.includes("Too many tokens") ||
+            errorMessage.includes("Too many requests")) {
+            onEvent({
+                type: "error",
+                error: `Bedrock rate limit: ${errorMessage}`,
+            });
+            // Re-throw with a specific error type that the retry logic can handle
+            const rateLimitError = new Error(errorMessage);
+            rateLimitError.statusCode = 429; // Treat as rate limit
+            throw rateLimitError;
+        }
         onEvent({
             type: "error",
-            error: `Bedrock streaming error: ${error}`,
+            error: `Bedrock streaming error: ${errorMessage}`,
         });
         throw error;
     }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "graphlit-client",
-  "version": "1.0.20250615003",
+  "version": "1.0.20250615004",
   "description": "Graphlit API Client for TypeScript",
   "type": "module",
   "main": "./dist/client.js",