graphlit-client 1.0.20250615003 → 1.0.20250615004

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -75,7 +75,7 @@ await client.streamAgent(
75
75
  }
76
76
  },
77
77
  undefined, // conversationId (optional)
78
- { id: spec.createSpecification.id } // specification
78
+ { id: spec.createSpecification.id }, // specification
79
79
  );
80
80
  ```
81
81
 
@@ -134,7 +134,7 @@ GOOGLE_API_KEY=your_key
134
134
 
135
135
  # Additional streaming providers
136
136
  GROQ_API_KEY=your_key # For Groq models (Llama, Mixtral)
137
- CEREBRAS_API_KEY=your_key # For Cerebras models
137
+ CEREBRAS_API_KEY=your_key # For Cerebras models
138
138
  COHERE_API_KEY=your_key # For Cohere Command models
139
139
  MISTRAL_API_KEY=your_key # For Mistral models
140
140
  DEEPSEEK_API_KEY=your_key # For Deepseek models
@@ -152,8 +152,9 @@ The SDK now includes automatic retry logic for network errors and transient fail
152
152
  ### Default Retry Configuration
153
153
 
154
154
  By default, the client will automatically retry on these status codes:
155
+
155
156
  - `429` - Too Many Requests
156
- - `502` - Bad Gateway
157
+ - `502` - Bad Gateway
157
158
  - `503` - Service Unavailable
158
159
  - `504` - Gateway Timeout
159
160
 
@@ -168,19 +169,19 @@ Configure retry behavior to match your needs:
168
169
  ```typescript
169
170
  const client = new Graphlit({
170
171
  organizationId: "your_org_id",
171
- environmentId: "your_env_id",
172
+ environmentId: "your_env_id",
172
173
  jwtSecret: "your_secret",
173
174
  retryConfig: {
174
- maxAttempts: 10, // Maximum retry attempts (default: 5)
175
- initialDelay: 500, // Initial delay in ms (default: 300)
176
- maxDelay: 60000, // Maximum delay in ms (default: 30000)
177
- jitter: true, // Add randomness to delays (default: true)
175
+ maxAttempts: 10, // Maximum retry attempts (default: 5)
176
+ initialDelay: 500, // Initial delay in ms (default: 300)
177
+ maxDelay: 60000, // Maximum delay in ms (default: 30000)
178
+ jitter: true, // Add randomness to delays (default: true)
178
179
  retryableStatusCodes: [429, 500, 502, 503, 504], // Custom status codes
179
180
  onRetry: (attempt, error, operation) => {
180
181
  console.log(`Retry attempt ${attempt} for ${operation.operationName}`);
181
182
  console.log(`Error: ${error.message}`);
182
- }
183
- }
183
+ },
184
+ },
184
185
  });
185
186
  ```
186
187
 
@@ -196,7 +197,7 @@ const client = new Graphlit();
196
197
  client.setRetryConfig({
197
198
  maxAttempts: 20,
198
199
  initialDelay: 100,
199
- retryableStatusCodes: [429, 500, 502, 503, 504, 521, 522, 524]
200
+ retryableStatusCodes: [429, 500, 502, 503, 504, 521, 522, 524],
200
201
  });
201
202
  ```
202
203
 
@@ -210,8 +211,8 @@ const client = new Graphlit({
210
211
  environmentId: "your_env_id",
211
212
  jwtSecret: "your_secret",
212
213
  retryConfig: {
213
- maxAttempts: 1 // No retries
214
- }
214
+ maxAttempts: 1, // No retries
215
+ },
215
216
  });
216
217
  ```
217
218
 
@@ -221,17 +222,17 @@ The Graphlit SDK supports real-time streaming responses from 9 different LLM pro
221
222
 
222
223
  ### Supported Providers
223
224
 
224
- | Provider | Models | SDK Required | API Key |
225
- |----------|--------|--------------|---------|
226
- | **OpenAI** | GPT-4, GPT-4o, GPT-4.1, O1, O3, O4 | `openai` | `OPENAI_API_KEY` |
227
- | **Anthropic** | Claude 3, Claude 3.5, Claude 3.7, Claude 4 | `@anthropic-ai/sdk` | `ANTHROPIC_API_KEY` |
228
- | **Google** | Gemini 1.5, Gemini 2.0, Gemini 2.5 | `@google/generative-ai` | `GOOGLE_API_KEY` |
229
- | **Groq** | Llama 4, Llama 3.3, Mixtral, Deepseek R1 | `groq-sdk` | `GROQ_API_KEY` |
230
- | **Cerebras** | Llama 3.3, Llama 3.1 | `openai` | `CEREBRAS_API_KEY` |
231
- | **Cohere** | Command R+, Command R, Command R7B, Command A | `cohere-ai` | `COHERE_API_KEY` |
232
- | **Mistral** | Mistral Large, Medium, Small, Nemo, Pixtral | `@mistralai/mistralai` | `MISTRAL_API_KEY` |
233
- | **AWS Bedrock** | Nova Premier/Pro, Claude 3.7, Llama 4 | `@aws-sdk/client-bedrock-runtime` | AWS credentials |
234
- | **Deepseek** | Deepseek Chat, Deepseek Reasoner | `openai` | `DEEPSEEK_API_KEY` |
225
+ | Provider | Models | SDK Required | API Key |
226
+ | --------------- | --------------------------------------------- | --------------------------------- | ------------------- |
227
+ | **OpenAI** | GPT-4, GPT-4o, GPT-4.1, O1, O3, O4 | `openai` | `OPENAI_API_KEY` |
228
+ | **Anthropic** | Claude 3, Claude 3.5, Claude 3.7, Claude 4 | `@anthropic-ai/sdk` | `ANTHROPIC_API_KEY` |
229
+ | **Google** | Gemini 1.5, Gemini 2.0, Gemini 2.5 | `@google/generative-ai` | `GOOGLE_API_KEY` |
230
+ | **Groq** | Llama 4, Llama 3.3, Mixtral, Deepseek R1 | `groq-sdk` | `GROQ_API_KEY` |
231
+ | **Cerebras** | Llama 3.3, Llama 3.1 | `openai` | `CEREBRAS_API_KEY` |
232
+ | **Cohere** | Command R+, Command R, Command R7B, Command A | `cohere-ai` | `COHERE_API_KEY` |
233
+ | **Mistral** | Mistral Large, Medium, Small, Nemo, Pixtral | `@mistralai/mistralai` | `MISTRAL_API_KEY` |
234
+ | **AWS Bedrock** | Nova Premier/Pro, Claude 3.7, Llama 4 | `@aws-sdk/client-bedrock-runtime` | AWS credentials |
235
+ | **Deepseek** | Deepseek Chat, Deepseek Reasoner | `openai` | `DEEPSEEK_API_KEY` |
235
236
 
236
237
  ### Setting Up Streaming
237
238
 
@@ -250,7 +251,9 @@ if (process.env.OPENAI_API_KEY) {
250
251
 
251
252
  if (process.env.COHERE_API_KEY) {
252
253
  const { CohereClient } = await import("cohere-ai");
253
- client.setCohereClient(new CohereClient({ token: process.env.COHERE_API_KEY }));
254
+ client.setCohereClient(
255
+ new CohereClient({ token: process.env.COHERE_API_KEY }),
256
+ );
254
257
  }
255
258
 
256
259
  if (process.env.GROQ_API_KEY) {
@@ -263,9 +266,9 @@ const spec = await client.createSpecification({
263
266
  name: "Multi-Provider Assistant",
264
267
  type: Types.SpecificationTypes.Completion,
265
268
  serviceType: Types.ModelServiceTypes.Cohere, // or any supported provider
266
- cohere: {
269
+ cohere: {
267
270
  model: Types.CohereModels.CommandRPlus,
268
- temperature: 0.7
271
+ temperature: 0.7,
269
272
  },
270
273
  });
271
274
  ```
@@ -310,7 +313,7 @@ await client.streamAgent(
310
313
  }
311
314
  },
312
315
  undefined, // conversationId
313
- { id: spec.createSpecification.id } // specification
316
+ { id: spec.createSpecification.id }, // specification
314
317
  );
315
318
  ```
316
319
 
@@ -338,7 +341,7 @@ const content = await client.ingestUri(
338
341
  "https://arxiv.org/pdf/1706.03762.pdf", // Attention Is All You Need paper
339
342
  "AI Research Paper", // name
340
343
  undefined, // id
341
- true // isSynchronous - waits for processing
344
+ true, // isSynchronous - waits for processing
342
345
  );
343
346
 
344
347
  console.log(`✅ Uploaded: ${content.ingestUri.id}`);
@@ -360,7 +363,7 @@ await client.streamAgent(
360
363
  }
361
364
  },
362
365
  conversation.createConversation.id, // conversationId with content filter
363
- { id: spec.createSpecification.id } // specification
366
+ { id: spec.createSpecification.id }, // specification
364
367
  );
365
368
  ```
366
369
 
@@ -374,7 +377,7 @@ const webpage = await client.ingestUri(
374
377
  "https://en.wikipedia.org/wiki/Artificial_intelligence", // uri
375
378
  "AI Wikipedia Page", // name
376
379
  undefined, // id
377
- true // isSynchronous
380
+ true, // isSynchronous
378
381
  );
379
382
 
380
383
  // Wait for content to be indexed
@@ -389,7 +392,7 @@ const conversation = await client.createConversation({
389
392
  const response = await client.promptAgent(
390
393
  "Summarize the key points about AI from this Wikipedia page",
391
394
  conversation.createConversation.id, // conversationId with filter
392
- { id: spec.createSpecification.id } // specification (create one as shown above)
395
+ { id: spec.createSpecification.id }, // specification (create one as shown above)
393
396
  );
394
397
 
395
398
  console.log(response.message);
@@ -412,7 +415,9 @@ if (process.env.OPENAI_API_KEY) {
412
415
 
413
416
  if (process.env.COHERE_API_KEY) {
414
417
  const { CohereClient } = await import("cohere-ai");
415
- client.setCohereClient(new CohereClient({ token: process.env.COHERE_API_KEY }));
418
+ client.setCohereClient(
419
+ new CohereClient({ token: process.env.COHERE_API_KEY }),
420
+ );
416
421
  }
417
422
 
418
423
  if (process.env.GROQ_API_KEY) {
@@ -425,24 +430,24 @@ const providers = [
425
430
  {
426
431
  name: "OpenAI GPT-4o",
427
432
  serviceType: Types.ModelServiceTypes.OpenAi,
428
- openAI: { model: Types.OpenAiModels.Gpt4O_128K }
433
+ openAI: { model: Types.OpenAiModels.Gpt4O_128K },
429
434
  },
430
435
  {
431
436
  name: "Cohere Command R+",
432
437
  serviceType: Types.ModelServiceTypes.Cohere,
433
- cohere: { model: Types.CohereModels.CommandRPlus }
438
+ cohere: { model: Types.CohereModels.CommandRPlus },
434
439
  },
435
440
  {
436
441
  name: "Groq Llama",
437
442
  serviceType: Types.ModelServiceTypes.Groq,
438
- groq: { model: Types.GroqModels.Llama_3_3_70B }
439
- }
443
+ groq: { model: Types.GroqModels.Llama_3_3_70B },
444
+ },
440
445
  ];
441
446
 
442
447
  // Compare responses
443
448
  for (const provider of providers) {
444
449
  console.log(`\n🤖 ${provider.name}:`);
445
-
450
+
446
451
  const spec = await client.createSpecification({
447
452
  ...provider,
448
453
  type: Types.SpecificationTypes.Completion,
@@ -456,7 +461,7 @@ for (const provider of providers) {
456
461
  }
457
462
  },
458
463
  undefined,
459
- { id: spec.createSpecification.id }
464
+ { id: spec.createSpecification.id },
460
465
  );
461
466
  }
462
467
  ```
@@ -518,7 +523,7 @@ await client.streamAgent(
518
523
  undefined, // conversationId
519
524
  { id: spec.createSpecification.id }, // specification
520
525
  [weatherTool], // tools
521
- toolHandlers // handlers
526
+ toolHandlers, // handlers
522
527
  );
523
528
  ```
524
529
 
@@ -563,7 +568,7 @@ class KnowledgeAssistant {
563
568
  url, // uri
564
569
  url.split("/").pop() || "Document", // name
565
570
  undefined, // id
566
- true // isSynchronous - wait for processing
571
+ true, // isSynchronous - wait for processing
567
572
  );
568
573
  this.contentIds.push(content.ingestUri.id);
569
574
  }
@@ -593,7 +598,7 @@ class KnowledgeAssistant {
593
598
  }
594
599
  },
595
600
  this.conversationId, // Maintains conversation context
596
- { id: this.specificationId! } // specification
601
+ { id: this.specificationId! }, // specification
597
602
  );
598
603
  }
599
604
  }
@@ -623,7 +628,7 @@ const document = await client.ingestUri(
623
628
  "https://example.com/document.pdf", // uri
624
629
  "Document #12345", // name
625
630
  undefined, // id
626
- true // isSynchronous
631
+ true, // isSynchronous
627
632
  );
628
633
 
629
634
  // Wait for content to be indexed
@@ -634,7 +639,7 @@ const extraction = await client.extractContents(
634
639
  "Extract the key information from this document",
635
640
  undefined, // tools
636
641
  undefined, // specification
637
- { contents: [{ id: document.ingestUri.id }] } // filter
642
+ { contents: [{ id: document.ingestUri.id }] }, // filter
638
643
  );
639
644
 
640
645
  console.log("Extracted data:", extraction.extractContents);
@@ -653,7 +658,7 @@ for (const url of documentUrls) {
653
658
  url, // uri
654
659
  url.split("/").pop() || "Document", // name
655
660
  undefined, // id
656
- true // isSynchronous
661
+ true, // isSynchronous
657
662
  );
658
663
  ids.push(content.ingestUri.id);
659
664
  }
@@ -666,7 +671,7 @@ const summary = await client.summarizeContents(
666
671
  prompt: "Create an executive summary of these documents",
667
672
  },
668
673
  ], // summarizations
669
- { contents: ids.map((id) => ({ id })) } // filter
674
+ { contents: ids.map((id) => ({ id })) }, // filter
670
675
  );
671
676
 
672
677
  console.log("Summary:", summary.summarizeContents);
@@ -680,13 +685,13 @@ const content = await client.ingestUri(
680
685
  "https://example.com/large-document.pdf", // uri
681
686
  undefined, // name
682
687
  undefined, // id
683
- true // isSynchronous
688
+ true, // isSynchronous
684
689
  );
685
690
  console.log("✅ Content ready!");
686
691
 
687
692
  // Option 2: Asynchronous processing (for large files)
688
693
  const content = await client.ingestUri(
689
- "https://example.com/very-large-video.mp4" // uri
694
+ "https://example.com/very-large-video.mp4", // uri
690
695
  // isSynchronous defaults to false
691
696
  );
692
697
 
@@ -724,7 +729,7 @@ const result = await client.promptAgent(
724
729
  {
725
730
  // Only allow retrieval from specific content
726
731
  contents: [{ id: "content-id-1" }, { id: "content-id-2" }],
727
- }
732
+ },
728
733
  );
729
734
 
730
735
  // Example 2: Streaming with content filter
@@ -745,7 +750,7 @@ await client.streamAgent(
745
750
  {
746
751
  // Filter by collection
747
752
  collections: [{ id: "technical-docs-collection" }],
748
- }
753
+ },
749
754
  );
750
755
  ```
751
756
 
@@ -775,7 +780,7 @@ await client.streamAgent(
775
780
  {
776
781
  // Force this content into context
777
782
  contents: [{ id: fileContent.content.id }],
778
- }
783
+ },
779
784
  );
780
785
  ```
781
786
 
@@ -801,7 +806,7 @@ await client.promptAgent(
801
806
  {
802
807
  // Always include the specific code file
803
808
  contents: [{ id: "implementation-file-id" }],
804
- }
809
+ },
805
810
  );
806
811
  ```
807
812
 
@@ -846,7 +851,7 @@ await client.updateProject({
846
851
 
847
852
  // Now all content will be automatically summarized
848
853
  const content = await client.ingestUri(
849
- "https://example.com/report.pdf" // uri
854
+ "https://example.com/report.pdf", // uri
850
855
  );
851
856
  ```
852
857
 
@@ -879,7 +884,7 @@ await client.streamAgent(
879
884
  }
880
885
  },
881
886
  undefined,
882
- { id: conversationSpec.createSpecification.id }
887
+ { id: conversationSpec.createSpecification.id },
883
888
  );
884
889
  ```
885
890
 
package/dist/client.js CHANGED
@@ -133,7 +133,8 @@ class Graphlit {
133
133
  constructor(organizationIdOrOptions, environmentId, jwtSecret, ownerId, userId, apiUri) {
134
134
  // Handle both old constructor signature and new options object
135
135
  let options;
136
- if (typeof organizationIdOrOptions === 'object' && organizationIdOrOptions !== null) {
136
+ if (typeof organizationIdOrOptions === "object" &&
137
+ organizationIdOrOptions !== null) {
137
138
  // New constructor with options object
138
139
  options = organizationIdOrOptions;
139
140
  }
@@ -145,7 +146,7 @@ class Graphlit {
145
146
  jwtSecret,
146
147
  ownerId,
147
148
  userId,
148
- apiUri
149
+ apiUri,
149
150
  };
150
151
  }
151
152
  this.apiUri =
@@ -158,7 +159,8 @@ class Graphlit {
158
159
  dotenv.config();
159
160
  this.organizationId =
160
161
  options.organizationId || process.env.GRAPHLIT_ORGANIZATION_ID;
161
- this.environmentId = options.environmentId || process.env.GRAPHLIT_ENVIRONMENT_ID;
162
+ this.environmentId =
163
+ options.environmentId || process.env.GRAPHLIT_ENVIRONMENT_ID;
162
164
  this.jwtSecret = options.jwtSecret || process.env.GRAPHLIT_JWT_SECRET;
163
165
  // optional: for multi-tenant support
164
166
  this.ownerId = options.ownerId || process.env.GRAPHLIT_OWNER_ID;
@@ -179,7 +181,7 @@ class Graphlit {
179
181
  maxDelay: 30000,
180
182
  retryableStatusCodes: [429, 502, 503, 504],
181
183
  jitter: true,
182
- ...options.retryConfig
184
+ ...options.retryConfig,
183
185
  };
184
186
  if (!this.organizationId) {
185
187
  throw new Error("Graphlit organization identifier is required.");
@@ -223,7 +225,9 @@ class Graphlit {
223
225
  if (statusCode && this.retryConfig.retryableStatusCodes) {
224
226
  const shouldRetry = this.retryConfig.retryableStatusCodes.includes(statusCode);
225
227
  // Call onRetry callback if provided
226
- if (shouldRetry && this.retryConfig.onRetry && _operation.getContext().retryCount !== undefined) {
228
+ if (shouldRetry &&
229
+ this.retryConfig.onRetry &&
230
+ _operation.getContext().retryCount !== undefined) {
227
231
  const attempt = _operation.getContext().retryCount + 1;
228
232
  this.retryConfig.onRetry(attempt, error, _operation);
229
233
  }
@@ -332,7 +336,7 @@ class Graphlit {
332
336
  setRetryConfig(retryConfig) {
333
337
  this.retryConfig = {
334
338
  ...this.retryConfig,
335
- ...retryConfig
339
+ ...retryConfig,
336
340
  };
337
341
  // Refresh client to apply new retry configuration
338
342
  this.refreshClient();
@@ -79,10 +79,13 @@ export declare function formatMessagesForAnthropic(messages: ConversationMessage
79
79
  export declare function formatMessagesForGoogle(messages: ConversationMessage[]): GoogleMessage[];
80
80
  /**
81
81
  * Cohere message format
82
+ * Note: For Cohere v7 SDK, messages are handled differently:
83
+ * - Current message is passed as 'message' parameter
84
+ * - Previous messages are passed as 'chatHistory' array
82
85
  */
83
86
  export interface CohereMessage {
84
- role: "user" | "assistant" | "system" | "tool";
85
- content: string;
87
+ role: "USER" | "CHATBOT" | "SYSTEM" | "TOOL";
88
+ message: string;
86
89
  tool_calls?: Array<{
87
90
  id: string;
88
91
  name: string;
@@ -90,12 +93,11 @@ export interface CohereMessage {
90
93
  }>;
91
94
  tool_results?: Array<{
92
95
  call: {
93
- id: string;
94
96
  name: string;
95
97
  parameters: Record<string, any>;
96
98
  };
97
99
  outputs: Array<{
98
- text: string;
100
+ output: string;
99
101
  }>;
100
102
  }>;
101
103
  }
@@ -284,14 +284,14 @@ export function formatMessagesForCohere(messages) {
284
284
  switch (message.role) {
285
285
  case ConversationRoleTypes.System:
286
286
  formattedMessages.push({
287
- role: "system",
288
- content: trimmedMessage,
287
+ role: "SYSTEM",
288
+ message: trimmedMessage,
289
289
  });
290
290
  break;
291
291
  case ConversationRoleTypes.Assistant:
292
292
  const assistantMessage = {
293
- role: "assistant",
294
- content: trimmedMessage,
293
+ role: "CHATBOT",
294
+ message: trimmedMessage,
295
295
  };
296
296
  // Add tool calls if present
297
297
  if (message.toolCalls && message.toolCalls.length > 0) {
@@ -300,32 +300,37 @@ export function formatMessagesForCohere(messages) {
300
300
  .map((toolCall) => ({
301
301
  id: toolCall.id,
302
302
  name: toolCall.name,
303
- parameters: toolCall.arguments ? JSON.parse(toolCall.arguments) : {},
303
+ parameters: toolCall.arguments
304
+ ? JSON.parse(toolCall.arguments)
305
+ : {},
304
306
  }));
305
307
  }
306
308
  formattedMessages.push(assistantMessage);
307
309
  break;
308
310
  case ConversationRoleTypes.Tool:
309
- // Cohere expects tool results as tool messages
311
+ // Cohere expects tool results as TOOL messages
310
312
  formattedMessages.push({
311
- role: "tool",
312
- content: trimmedMessage,
313
- tool_results: [{
313
+ role: "TOOL",
314
+ message: trimmedMessage,
315
+ tool_results: [
316
+ {
314
317
  call: {
315
- id: message.toolCallId || "",
316
318
  name: "", // Would need to be tracked from the tool call
317
319
  parameters: {},
318
320
  },
319
- outputs: [{
320
- text: trimmedMessage,
321
- }],
322
- }],
321
+ outputs: [
322
+ {
323
+ output: trimmedMessage, // Changed from 'text' to 'output'
324
+ },
325
+ ],
326
+ },
327
+ ],
323
328
  });
324
329
  break;
325
330
  default: // User messages
326
331
  formattedMessages.push({
327
- role: "user",
328
- content: trimmedMessage,
332
+ role: "USER",
333
+ message: trimmedMessage,
329
334
  });
330
335
  break;
331
336
  }
@@ -321,6 +321,30 @@ onEvent, onComplete) {
321
321
  onComplete(fullMessage, toolCalls);
322
322
  }
323
323
  catch (error) {
324
+ // Handle OpenAI-specific errors
325
+ const errorMessage = error.message || error.toString();
326
+ // Check for rate limit errors
327
+ if (error.status === 429 ||
328
+ error.statusCode === 429 ||
329
+ error.code === "rate_limit_exceeded") {
330
+ if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
331
+ console.log(`⚠️ [OpenAI] Rate limit hit`);
332
+ }
333
+ const rateLimitError = new Error("OpenAI rate limit exceeded");
334
+ rateLimitError.statusCode = 429;
335
+ throw rateLimitError;
336
+ }
337
+ // Check for network errors
338
+ if (errorMessage.includes("fetch failed") ||
339
+ error.code === "ECONNRESET" ||
340
+ error.code === "ETIMEDOUT") {
341
+ if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
342
+ console.log(`⚠️ [OpenAI] Network error: ${errorMessage}`);
343
+ }
344
+ const networkError = new Error(`OpenAI network error: ${errorMessage}`);
345
+ networkError.statusCode = 503; // Service unavailable
346
+ throw networkError;
347
+ }
324
348
  // Don't emit error event here - let the client handle it to avoid duplicates
325
349
  throw error;
326
350
  }
@@ -631,6 +655,30 @@ onEvent, onComplete) {
631
655
  onComplete(fullMessage, validToolCalls);
632
656
  }
633
657
  catch (error) {
658
+ // Handle Anthropic-specific errors
659
+ const errorMessage = error.message || error.toString();
660
+ // Check for overloaded errors
661
+ if (error.type === "overloaded_error" ||
662
+ errorMessage.includes("Overloaded")) {
663
+ if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
664
+ console.log(`⚠️ [Anthropic] Service overloaded`);
665
+ }
666
+ // Treat overloaded as a rate limit error for retry logic
667
+ const overloadError = new Error("Anthropic service overloaded");
668
+ overloadError.statusCode = 503; // Service unavailable
669
+ throw overloadError;
670
+ }
671
+ // Check for rate limit errors
672
+ if (error.status === 429 ||
673
+ error.statusCode === 429 ||
674
+ error.type === "rate_limit_error") {
675
+ if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
676
+ console.log(`⚠️ [Anthropic] Rate limit hit`);
677
+ }
678
+ const rateLimitError = new Error("Anthropic rate limit exceeded");
679
+ rateLimitError.statusCode = 429;
680
+ throw rateLimitError;
681
+ }
634
682
  // Don't emit error event here - let the client handle it to avoid duplicates
635
683
  throw error;
636
684
  }
@@ -993,16 +1041,57 @@ onEvent, onComplete) {
993
1041
  */
994
1042
  export async function streamWithGroq(specification, messages, tools, groqClient, // Groq client instance (OpenAI-compatible)
995
1043
  onEvent, onComplete) {
996
- // Groq uses the same API as OpenAI, so we can reuse the OpenAI streaming logic
997
- return streamWithOpenAI(specification, messages, tools, groqClient, onEvent, onComplete);
1044
+ try {
1045
+ // Groq uses the same API as OpenAI, so we can reuse the OpenAI streaming logic
1046
+ return await streamWithOpenAI(specification, messages, tools, groqClient, onEvent, onComplete);
1047
+ }
1048
+ catch (error) {
1049
+ // Handle Groq-specific errors
1050
+ const errorMessage = error.message || error.toString();
1051
+ // Check for tool calling errors
1052
+ if (error.status === 400 &&
1053
+ errorMessage.includes("Failed to call a function")) {
1054
+ if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
1055
+ console.log(`⚠️ [Groq] Tool calling error: ${errorMessage}`);
1056
+ }
1057
+ // Groq may have limitations with certain tool schemas
1058
+ // Re-throw with a more descriptive error
1059
+ throw new Error(`Groq tool calling error: ${errorMessage}. The model may not support the provided tool schema format.`);
1060
+ }
1061
+ // Handle rate limits
1062
+ if (error.status === 429 || error.statusCode === 429) {
1063
+ if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
1064
+ console.log(`⚠️ [Groq] Rate limit hit (429)`);
1065
+ }
1066
+ const rateLimitError = new Error("Groq rate limit exceeded");
1067
+ rateLimitError.statusCode = 429;
1068
+ throw rateLimitError;
1069
+ }
1070
+ throw error;
1071
+ }
998
1072
  }
999
1073
  /**
1000
1074
  * Stream with Cerebras SDK (OpenAI-compatible)
1001
1075
  */
1002
1076
  export async function streamWithCerebras(specification, messages, tools, cerebrasClient, // OpenAI client instance configured for Cerebras
1003
1077
  onEvent, onComplete) {
1004
- // Cerebras uses the same API as OpenAI, so we can reuse the OpenAI streaming logic
1005
- return streamWithOpenAI(specification, messages, tools, cerebrasClient, onEvent, onComplete);
1078
+ try {
1079
+ // Cerebras uses the same API as OpenAI, so we can reuse the OpenAI streaming logic
1080
+ return await streamWithOpenAI(specification, messages, tools, cerebrasClient, onEvent, onComplete);
1081
+ }
1082
+ catch (error) {
1083
+ // Handle Cerebras-specific 429 errors
1084
+ if (error.status === 429 || error.statusCode === 429) {
1085
+ if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
1086
+ console.log(`⚠️ [Cerebras] Rate limit hit (429)`);
1087
+ }
1088
+ // Re-throw with proper status code for retry logic
1089
+ const rateLimitError = new Error("Cerebras rate limit exceeded");
1090
+ rateLimitError.statusCode = 429;
1091
+ throw rateLimitError;
1092
+ }
1093
+ throw error;
1094
+ }
1006
1095
  }
1007
1096
  /**
1008
1097
  * Stream with Deepseek SDK (OpenAI-compatible)
@@ -1037,7 +1126,7 @@ onEvent, onComplete) {
1037
1126
  serviceType: specification.serviceType,
1038
1127
  deepseek: specification.deepseek,
1039
1128
  hasDeepseekModel: !!specification.deepseek?.model,
1040
- deepseekModelValue: specification.deepseek?.model
1129
+ deepseekModelValue: specification.deepseek?.model,
1041
1130
  });
1042
1131
  }
1043
1132
  const modelName = getModelName(specification);
@@ -1046,7 +1135,7 @@ onEvent, onComplete) {
1046
1135
  name: specification.name,
1047
1136
  serviceType: specification.serviceType,
1048
1137
  deepseek: specification.deepseek,
1049
- hasCustomModelName: !!specification.deepseek?.modelName
1138
+ hasCustomModelName: !!specification.deepseek?.modelName,
1050
1139
  });
1051
1140
  throw new Error(`No model name found for specification: ${specification.name} (service: ${specification.serviceType})`);
1052
1141
  }
@@ -1109,9 +1198,12 @@ onEvent, onComplete) {
1109
1198
  // Performance metrics tracking (internal only)
1110
1199
  if (tokenCount % 10 === 0) {
1111
1200
  const totalTokens = tokenCount + toolArgumentTokens;
1112
- const tokensPerSecond = totalTokens > 0 ? totalTokens / ((currentTime - startTime) / 1000) : 0;
1201
+ const tokensPerSecond = totalTokens > 0
1202
+ ? totalTokens / ((currentTime - startTime) / 1000)
1203
+ : 0;
1113
1204
  const avgInterTokenDelay = interTokenDelays.length > 0
1114
- ? interTokenDelays.reduce((a, b) => a + b, 0) / interTokenDelays.length
1205
+ ? interTokenDelays.reduce((a, b) => a + b, 0) /
1206
+ interTokenDelays.length
1115
1207
  : 0;
1116
1208
  }
1117
1209
  }
@@ -1231,10 +1323,22 @@ onEvent, onComplete) {
1231
1323
  if (messages.length === 0) {
1232
1324
  throw new Error("No messages found for Cohere streaming");
1233
1325
  }
1326
+ // Cohere v7 expects a single message and optional chatHistory
1327
+ // Extract the last message as the current message
1328
+ const lastMessage = messages[messages.length - 1];
1329
+ const chatHistory = messages.slice(0, -1);
1330
+ if (!lastMessage || !lastMessage.message) {
1331
+ throw new Error("Last message must have message property for Cohere streaming");
1332
+ }
1234
1333
  const streamConfig = {
1235
1334
  model: modelName,
1236
- messages: messages, // All messages in chronological order
1335
+ message: lastMessage.message, // Current message (singular)
1237
1336
  };
1337
+ // Add chat history if there are previous messages
1338
+ if (chatHistory.length > 0) {
1339
+ // Messages already have 'message' property from formatter
1340
+ streamConfig.chatHistory = chatHistory;
1341
+ }
1238
1342
  // Only add temperature if it's defined
1239
1343
  if (specification.cohere?.temperature !== undefined) {
1240
1344
  streamConfig.temperature = specification.cohere.temperature;
@@ -1283,12 +1387,9 @@ onEvent, onComplete) {
1283
1387
  }
1284
1388
  if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
1285
1389
  console.log(`🔍 [Cohere] Final stream config:`, JSON.stringify(streamConfig, null, 2));
1286
- console.log(`🔍 [Cohere] Cohere client methods available:`, Object.getOwnPropertyNames(cohereClient));
1287
- console.log(`🔍 [Cohere] Has chatStream method:`, typeof cohereClient.chatStream === 'function');
1288
- console.log(`🔍 [Cohere] Has chat property:`, !!cohereClient.chat);
1289
- if (cohereClient.chat) {
1290
- console.log(`🔍 [Cohere] Chat methods:`, Object.getOwnPropertyNames(cohereClient.chat));
1291
- }
1390
+ console.log(`🔍 [Cohere] Current message: "${streamConfig.message}"`);
1391
+ console.log(`🔍 [Cohere] Chat history length: ${streamConfig.chatHistory?.length || 0}`);
1392
+ console.log(`🔍 [Cohere] Has tools: ${!!streamConfig.tools}`);
1292
1393
  console.log(`⏱️ [Cohere] Starting stream request at: ${new Date().toISOString()}`);
1293
1394
  }
1294
1395
  let stream;
@@ -1498,7 +1599,7 @@ onEvent, onComplete) {
1498
1599
  name: specification.name,
1499
1600
  serviceType: specification.serviceType,
1500
1601
  bedrock: specification.bedrock,
1501
- hasCustomModelName: !!specification.bedrock?.modelName
1602
+ hasCustomModelName: !!specification.bedrock?.modelName,
1502
1603
  });
1503
1604
  throw new Error(`No model name found for Bedrock specification: ${specification.name} (service: ${specification.serviceType}, bedrock.model: ${specification.bedrock?.model})`);
1504
1605
  }
@@ -1511,9 +1612,13 @@ onEvent, onComplete) {
1511
1612
  // The AWS SDK expects content as an array of content blocks
1512
1613
  const converseMessages = messages.map((msg) => ({
1513
1614
  role: msg.role,
1514
- content: [{
1515
- text: typeof msg.content === 'string' ? msg.content : msg.content.toString()
1516
- }]
1615
+ content: [
1616
+ {
1617
+ text: typeof msg.content === "string"
1618
+ ? msg.content
1619
+ : msg.content.toString(),
1620
+ },
1621
+ ],
1517
1622
  }));
1518
1623
  // Prepare the request using Converse API format
1519
1624
  // Using 'any' type because:
@@ -1649,9 +1754,25 @@ onEvent, onComplete) {
1649
1754
  if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
1650
1755
  console.error(`❌ [Bedrock] Stream error:`, error);
1651
1756
  }
1757
+ // Handle specific Bedrock errors
1758
+ const errorMessage = error.message || error.toString();
1759
+ const errorName = error.name || "";
1760
+ // Check for throttling errors
1761
+ if (errorName === "ThrottlingException" ||
1762
+ errorMessage.includes("Too many tokens") ||
1763
+ errorMessage.includes("Too many requests")) {
1764
+ onEvent({
1765
+ type: "error",
1766
+ error: `Bedrock rate limit: ${errorMessage}`,
1767
+ });
1768
+ // Re-throw with a specific error type that the retry logic can handle
1769
+ const rateLimitError = new Error(errorMessage);
1770
+ rateLimitError.statusCode = 429; // Treat as rate limit
1771
+ throw rateLimitError;
1772
+ }
1652
1773
  onEvent({
1653
1774
  type: "error",
1654
- error: `Bedrock streaming error: ${error}`,
1775
+ error: `Bedrock streaming error: ${errorMessage}`,
1655
1776
  });
1656
1777
  throw error;
1657
1778
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "graphlit-client",
3
- "version": "1.0.20250615003",
3
+ "version": "1.0.20250615004",
4
4
  "description": "Graphlit API Client for TypeScript",
5
5
  "type": "module",
6
6
  "main": "./dist/client.js",