graphlit-client 1.0.20250615003 → 1.0.20250615004
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +59 -54
- package/dist/client.js +10 -6
- package/dist/streaming/llm-formatters.d.ts +6 -4
- package/dist/streaming/llm-formatters.js +21 -16
- package/dist/streaming/providers.js +141 -20
- package/package.json +1 -1
package/README.md
CHANGED
@@ -75,7 +75,7 @@ await client.streamAgent(
|
|
75
75
|
}
|
76
76
|
},
|
77
77
|
undefined, // conversationId (optional)
|
78
|
-
{ id: spec.createSpecification.id } // specification
|
78
|
+
{ id: spec.createSpecification.id }, // specification
|
79
79
|
);
|
80
80
|
```
|
81
81
|
|
@@ -134,7 +134,7 @@ GOOGLE_API_KEY=your_key
|
|
134
134
|
|
135
135
|
# Additional streaming providers
|
136
136
|
GROQ_API_KEY=your_key # For Groq models (Llama, Mixtral)
|
137
|
-
CEREBRAS_API_KEY=your_key # For Cerebras models
|
137
|
+
CEREBRAS_API_KEY=your_key # For Cerebras models
|
138
138
|
COHERE_API_KEY=your_key # For Cohere Command models
|
139
139
|
MISTRAL_API_KEY=your_key # For Mistral models
|
140
140
|
DEEPSEEK_API_KEY=your_key # For Deepseek models
|
@@ -152,8 +152,9 @@ The SDK now includes automatic retry logic for network errors and transient fail
|
|
152
152
|
### Default Retry Configuration
|
153
153
|
|
154
154
|
By default, the client will automatically retry on these status codes:
|
155
|
+
|
155
156
|
- `429` - Too Many Requests
|
156
|
-
- `502` - Bad Gateway
|
157
|
+
- `502` - Bad Gateway
|
157
158
|
- `503` - Service Unavailable
|
158
159
|
- `504` - Gateway Timeout
|
159
160
|
|
@@ -168,19 +169,19 @@ Configure retry behavior to match your needs:
|
|
168
169
|
```typescript
|
169
170
|
const client = new Graphlit({
|
170
171
|
organizationId: "your_org_id",
|
171
|
-
environmentId: "your_env_id",
|
172
|
+
environmentId: "your_env_id",
|
172
173
|
jwtSecret: "your_secret",
|
173
174
|
retryConfig: {
|
174
|
-
maxAttempts: 10,
|
175
|
-
initialDelay: 500,
|
176
|
-
maxDelay: 60000,
|
177
|
-
jitter: true,
|
175
|
+
maxAttempts: 10, // Maximum retry attempts (default: 5)
|
176
|
+
initialDelay: 500, // Initial delay in ms (default: 300)
|
177
|
+
maxDelay: 60000, // Maximum delay in ms (default: 30000)
|
178
|
+
jitter: true, // Add randomness to delays (default: true)
|
178
179
|
retryableStatusCodes: [429, 500, 502, 503, 504], // Custom status codes
|
179
180
|
onRetry: (attempt, error, operation) => {
|
180
181
|
console.log(`Retry attempt ${attempt} for ${operation.operationName}`);
|
181
182
|
console.log(`Error: ${error.message}`);
|
182
|
-
}
|
183
|
-
}
|
183
|
+
},
|
184
|
+
},
|
184
185
|
});
|
185
186
|
```
|
186
187
|
|
@@ -196,7 +197,7 @@ const client = new Graphlit();
|
|
196
197
|
client.setRetryConfig({
|
197
198
|
maxAttempts: 20,
|
198
199
|
initialDelay: 100,
|
199
|
-
retryableStatusCodes: [429, 500, 502, 503, 504, 521, 522, 524]
|
200
|
+
retryableStatusCodes: [429, 500, 502, 503, 504, 521, 522, 524],
|
200
201
|
});
|
201
202
|
```
|
202
203
|
|
@@ -210,8 +211,8 @@ const client = new Graphlit({
|
|
210
211
|
environmentId: "your_env_id",
|
211
212
|
jwtSecret: "your_secret",
|
212
213
|
retryConfig: {
|
213
|
-
maxAttempts: 1
|
214
|
-
}
|
214
|
+
maxAttempts: 1, // No retries
|
215
|
+
},
|
215
216
|
});
|
216
217
|
```
|
217
218
|
|
@@ -221,17 +222,17 @@ The Graphlit SDK supports real-time streaming responses from 9 different LLM pro
|
|
221
222
|
|
222
223
|
### Supported Providers
|
223
224
|
|
224
|
-
| Provider
|
225
|
-
|
226
|
-
| **OpenAI**
|
227
|
-
| **Anthropic**
|
228
|
-
| **Google**
|
229
|
-
| **Groq**
|
230
|
-
| **Cerebras**
|
231
|
-
| **Cohere**
|
232
|
-
| **Mistral**
|
233
|
-
| **AWS Bedrock** | Nova Premier/Pro, Claude 3.7, Llama 4
|
234
|
-
| **Deepseek**
|
225
|
+
| Provider | Models | SDK Required | API Key |
|
226
|
+
| --------------- | --------------------------------------------- | --------------------------------- | ------------------- |
|
227
|
+
| **OpenAI** | GPT-4, GPT-4o, GPT-4.1, O1, O3, O4 | `openai` | `OPENAI_API_KEY` |
|
228
|
+
| **Anthropic** | Claude 3, Claude 3.5, Claude 3.7, Claude 4 | `@anthropic-ai/sdk` | `ANTHROPIC_API_KEY` |
|
229
|
+
| **Google** | Gemini 1.5, Gemini 2.0, Gemini 2.5 | `@google/generative-ai` | `GOOGLE_API_KEY` |
|
230
|
+
| **Groq** | Llama 4, Llama 3.3, Mixtral, Deepseek R1 | `groq-sdk` | `GROQ_API_KEY` |
|
231
|
+
| **Cerebras** | Llama 3.3, Llama 3.1 | `openai` | `CEREBRAS_API_KEY` |
|
232
|
+
| **Cohere** | Command R+, Command R, Command R7B, Command A | `cohere-ai` | `COHERE_API_KEY` |
|
233
|
+
| **Mistral** | Mistral Large, Medium, Small, Nemo, Pixtral | `@mistralai/mistralai` | `MISTRAL_API_KEY` |
|
234
|
+
| **AWS Bedrock** | Nova Premier/Pro, Claude 3.7, Llama 4 | `@aws-sdk/client-bedrock-runtime` | AWS credentials |
|
235
|
+
| **Deepseek** | Deepseek Chat, Deepseek Reasoner | `openai` | `DEEPSEEK_API_KEY` |
|
235
236
|
|
236
237
|
### Setting Up Streaming
|
237
238
|
|
@@ -250,7 +251,9 @@ if (process.env.OPENAI_API_KEY) {
|
|
250
251
|
|
251
252
|
if (process.env.COHERE_API_KEY) {
|
252
253
|
const { CohereClient } = await import("cohere-ai");
|
253
|
-
client.setCohereClient(
|
254
|
+
client.setCohereClient(
|
255
|
+
new CohereClient({ token: process.env.COHERE_API_KEY }),
|
256
|
+
);
|
254
257
|
}
|
255
258
|
|
256
259
|
if (process.env.GROQ_API_KEY) {
|
@@ -263,9 +266,9 @@ const spec = await client.createSpecification({
|
|
263
266
|
name: "Multi-Provider Assistant",
|
264
267
|
type: Types.SpecificationTypes.Completion,
|
265
268
|
serviceType: Types.ModelServiceTypes.Cohere, // or any supported provider
|
266
|
-
cohere: {
|
269
|
+
cohere: {
|
267
270
|
model: Types.CohereModels.CommandRPlus,
|
268
|
-
temperature: 0.7
|
271
|
+
temperature: 0.7,
|
269
272
|
},
|
270
273
|
});
|
271
274
|
```
|
@@ -310,7 +313,7 @@ await client.streamAgent(
|
|
310
313
|
}
|
311
314
|
},
|
312
315
|
undefined, // conversationId
|
313
|
-
{ id: spec.createSpecification.id } // specification
|
316
|
+
{ id: spec.createSpecification.id }, // specification
|
314
317
|
);
|
315
318
|
```
|
316
319
|
|
@@ -338,7 +341,7 @@ const content = await client.ingestUri(
|
|
338
341
|
"https://arxiv.org/pdf/1706.03762.pdf", // Attention Is All You Need paper
|
339
342
|
"AI Research Paper", // name
|
340
343
|
undefined, // id
|
341
|
-
true // isSynchronous - waits for processing
|
344
|
+
true, // isSynchronous - waits for processing
|
342
345
|
);
|
343
346
|
|
344
347
|
console.log(`✅ Uploaded: ${content.ingestUri.id}`);
|
@@ -360,7 +363,7 @@ await client.streamAgent(
|
|
360
363
|
}
|
361
364
|
},
|
362
365
|
conversation.createConversation.id, // conversationId with content filter
|
363
|
-
{ id: spec.createSpecification.id } // specification
|
366
|
+
{ id: spec.createSpecification.id }, // specification
|
364
367
|
);
|
365
368
|
```
|
366
369
|
|
@@ -374,7 +377,7 @@ const webpage = await client.ingestUri(
|
|
374
377
|
"https://en.wikipedia.org/wiki/Artificial_intelligence", // uri
|
375
378
|
"AI Wikipedia Page", // name
|
376
379
|
undefined, // id
|
377
|
-
true // isSynchronous
|
380
|
+
true, // isSynchronous
|
378
381
|
);
|
379
382
|
|
380
383
|
// Wait for content to be indexed
|
@@ -389,7 +392,7 @@ const conversation = await client.createConversation({
|
|
389
392
|
const response = await client.promptAgent(
|
390
393
|
"Summarize the key points about AI from this Wikipedia page",
|
391
394
|
conversation.createConversation.id, // conversationId with filter
|
392
|
-
{ id: spec.createSpecification.id } // specification (create one as shown above)
|
395
|
+
{ id: spec.createSpecification.id }, // specification (create one as shown above)
|
393
396
|
);
|
394
397
|
|
395
398
|
console.log(response.message);
|
@@ -412,7 +415,9 @@ if (process.env.OPENAI_API_KEY) {
|
|
412
415
|
|
413
416
|
if (process.env.COHERE_API_KEY) {
|
414
417
|
const { CohereClient } = await import("cohere-ai");
|
415
|
-
client.setCohereClient(
|
418
|
+
client.setCohereClient(
|
419
|
+
new CohereClient({ token: process.env.COHERE_API_KEY }),
|
420
|
+
);
|
416
421
|
}
|
417
422
|
|
418
423
|
if (process.env.GROQ_API_KEY) {
|
@@ -425,24 +430,24 @@ const providers = [
|
|
425
430
|
{
|
426
431
|
name: "OpenAI GPT-4o",
|
427
432
|
serviceType: Types.ModelServiceTypes.OpenAi,
|
428
|
-
openAI: { model: Types.OpenAiModels.Gpt4O_128K }
|
433
|
+
openAI: { model: Types.OpenAiModels.Gpt4O_128K },
|
429
434
|
},
|
430
435
|
{
|
431
436
|
name: "Cohere Command R+",
|
432
437
|
serviceType: Types.ModelServiceTypes.Cohere,
|
433
|
-
cohere: { model: Types.CohereModels.CommandRPlus }
|
438
|
+
cohere: { model: Types.CohereModels.CommandRPlus },
|
434
439
|
},
|
435
440
|
{
|
436
441
|
name: "Groq Llama",
|
437
442
|
serviceType: Types.ModelServiceTypes.Groq,
|
438
|
-
groq: { model: Types.GroqModels.Llama_3_3_70B }
|
439
|
-
}
|
443
|
+
groq: { model: Types.GroqModels.Llama_3_3_70B },
|
444
|
+
},
|
440
445
|
];
|
441
446
|
|
442
447
|
// Compare responses
|
443
448
|
for (const provider of providers) {
|
444
449
|
console.log(`\n🤖 ${provider.name}:`);
|
445
|
-
|
450
|
+
|
446
451
|
const spec = await client.createSpecification({
|
447
452
|
...provider,
|
448
453
|
type: Types.SpecificationTypes.Completion,
|
@@ -456,7 +461,7 @@ for (const provider of providers) {
|
|
456
461
|
}
|
457
462
|
},
|
458
463
|
undefined,
|
459
|
-
{ id: spec.createSpecification.id }
|
464
|
+
{ id: spec.createSpecification.id },
|
460
465
|
);
|
461
466
|
}
|
462
467
|
```
|
@@ -518,7 +523,7 @@ await client.streamAgent(
|
|
518
523
|
undefined, // conversationId
|
519
524
|
{ id: spec.createSpecification.id }, // specification
|
520
525
|
[weatherTool], // tools
|
521
|
-
toolHandlers // handlers
|
526
|
+
toolHandlers, // handlers
|
522
527
|
);
|
523
528
|
```
|
524
529
|
|
@@ -563,7 +568,7 @@ class KnowledgeAssistant {
|
|
563
568
|
url, // uri
|
564
569
|
url.split("/").pop() || "Document", // name
|
565
570
|
undefined, // id
|
566
|
-
true // isSynchronous - wait for processing
|
571
|
+
true, // isSynchronous - wait for processing
|
567
572
|
);
|
568
573
|
this.contentIds.push(content.ingestUri.id);
|
569
574
|
}
|
@@ -593,7 +598,7 @@ class KnowledgeAssistant {
|
|
593
598
|
}
|
594
599
|
},
|
595
600
|
this.conversationId, // Maintains conversation context
|
596
|
-
{ id: this.specificationId! } // specification
|
601
|
+
{ id: this.specificationId! }, // specification
|
597
602
|
);
|
598
603
|
}
|
599
604
|
}
|
@@ -623,7 +628,7 @@ const document = await client.ingestUri(
|
|
623
628
|
"https://example.com/document.pdf", // uri
|
624
629
|
"Document #12345", // name
|
625
630
|
undefined, // id
|
626
|
-
true // isSynchronous
|
631
|
+
true, // isSynchronous
|
627
632
|
);
|
628
633
|
|
629
634
|
// Wait for content to be indexed
|
@@ -634,7 +639,7 @@ const extraction = await client.extractContents(
|
|
634
639
|
"Extract the key information from this document",
|
635
640
|
undefined, // tools
|
636
641
|
undefined, // specification
|
637
|
-
{ contents: [{ id: document.ingestUri.id }] } // filter
|
642
|
+
{ contents: [{ id: document.ingestUri.id }] }, // filter
|
638
643
|
);
|
639
644
|
|
640
645
|
console.log("Extracted data:", extraction.extractContents);
|
@@ -653,7 +658,7 @@ for (const url of documentUrls) {
|
|
653
658
|
url, // uri
|
654
659
|
url.split("/").pop() || "Document", // name
|
655
660
|
undefined, // id
|
656
|
-
true // isSynchronous
|
661
|
+
true, // isSynchronous
|
657
662
|
);
|
658
663
|
ids.push(content.ingestUri.id);
|
659
664
|
}
|
@@ -666,7 +671,7 @@ const summary = await client.summarizeContents(
|
|
666
671
|
prompt: "Create an executive summary of these documents",
|
667
672
|
},
|
668
673
|
], // summarizations
|
669
|
-
{ contents: ids.map((id) => ({ id })) } // filter
|
674
|
+
{ contents: ids.map((id) => ({ id })) }, // filter
|
670
675
|
);
|
671
676
|
|
672
677
|
console.log("Summary:", summary.summarizeContents);
|
@@ -680,13 +685,13 @@ const content = await client.ingestUri(
|
|
680
685
|
"https://example.com/large-document.pdf", // uri
|
681
686
|
undefined, // name
|
682
687
|
undefined, // id
|
683
|
-
true // isSynchronous
|
688
|
+
true, // isSynchronous
|
684
689
|
);
|
685
690
|
console.log("✅ Content ready!");
|
686
691
|
|
687
692
|
// Option 2: Asynchronous processing (for large files)
|
688
693
|
const content = await client.ingestUri(
|
689
|
-
"https://example.com/very-large-video.mp4" // uri
|
694
|
+
"https://example.com/very-large-video.mp4", // uri
|
690
695
|
// isSynchronous defaults to false
|
691
696
|
);
|
692
697
|
|
@@ -724,7 +729,7 @@ const result = await client.promptAgent(
|
|
724
729
|
{
|
725
730
|
// Only allow retrieval from specific content
|
726
731
|
contents: [{ id: "content-id-1" }, { id: "content-id-2" }],
|
727
|
-
}
|
732
|
+
},
|
728
733
|
);
|
729
734
|
|
730
735
|
// Example 2: Streaming with content filter
|
@@ -745,7 +750,7 @@ await client.streamAgent(
|
|
745
750
|
{
|
746
751
|
// Filter by collection
|
747
752
|
collections: [{ id: "technical-docs-collection" }],
|
748
|
-
}
|
753
|
+
},
|
749
754
|
);
|
750
755
|
```
|
751
756
|
|
@@ -775,7 +780,7 @@ await client.streamAgent(
|
|
775
780
|
{
|
776
781
|
// Force this content into context
|
777
782
|
contents: [{ id: fileContent.content.id }],
|
778
|
-
}
|
783
|
+
},
|
779
784
|
);
|
780
785
|
```
|
781
786
|
|
@@ -801,7 +806,7 @@ await client.promptAgent(
|
|
801
806
|
{
|
802
807
|
// Always include the specific code file
|
803
808
|
contents: [{ id: "implementation-file-id" }],
|
804
|
-
}
|
809
|
+
},
|
805
810
|
);
|
806
811
|
```
|
807
812
|
|
@@ -846,7 +851,7 @@ await client.updateProject({
|
|
846
851
|
|
847
852
|
// Now all content will be automatically summarized
|
848
853
|
const content = await client.ingestUri(
|
849
|
-
"https://example.com/report.pdf" // uri
|
854
|
+
"https://example.com/report.pdf", // uri
|
850
855
|
);
|
851
856
|
```
|
852
857
|
|
@@ -879,7 +884,7 @@ await client.streamAgent(
|
|
879
884
|
}
|
880
885
|
},
|
881
886
|
undefined,
|
882
|
-
{ id: conversationSpec.createSpecification.id }
|
887
|
+
{ id: conversationSpec.createSpecification.id },
|
883
888
|
);
|
884
889
|
```
|
885
890
|
|
package/dist/client.js
CHANGED
@@ -133,7 +133,8 @@ class Graphlit {
|
|
133
133
|
constructor(organizationIdOrOptions, environmentId, jwtSecret, ownerId, userId, apiUri) {
|
134
134
|
// Handle both old constructor signature and new options object
|
135
135
|
let options;
|
136
|
-
if (typeof organizationIdOrOptions ===
|
136
|
+
if (typeof organizationIdOrOptions === "object" &&
|
137
|
+
organizationIdOrOptions !== null) {
|
137
138
|
// New constructor with options object
|
138
139
|
options = organizationIdOrOptions;
|
139
140
|
}
|
@@ -145,7 +146,7 @@ class Graphlit {
|
|
145
146
|
jwtSecret,
|
146
147
|
ownerId,
|
147
148
|
userId,
|
148
|
-
apiUri
|
149
|
+
apiUri,
|
149
150
|
};
|
150
151
|
}
|
151
152
|
this.apiUri =
|
@@ -158,7 +159,8 @@ class Graphlit {
|
|
158
159
|
dotenv.config();
|
159
160
|
this.organizationId =
|
160
161
|
options.organizationId || process.env.GRAPHLIT_ORGANIZATION_ID;
|
161
|
-
this.environmentId =
|
162
|
+
this.environmentId =
|
163
|
+
options.environmentId || process.env.GRAPHLIT_ENVIRONMENT_ID;
|
162
164
|
this.jwtSecret = options.jwtSecret || process.env.GRAPHLIT_JWT_SECRET;
|
163
165
|
// optional: for multi-tenant support
|
164
166
|
this.ownerId = options.ownerId || process.env.GRAPHLIT_OWNER_ID;
|
@@ -179,7 +181,7 @@ class Graphlit {
|
|
179
181
|
maxDelay: 30000,
|
180
182
|
retryableStatusCodes: [429, 502, 503, 504],
|
181
183
|
jitter: true,
|
182
|
-
...options.retryConfig
|
184
|
+
...options.retryConfig,
|
183
185
|
};
|
184
186
|
if (!this.organizationId) {
|
185
187
|
throw new Error("Graphlit organization identifier is required.");
|
@@ -223,7 +225,9 @@ class Graphlit {
|
|
223
225
|
if (statusCode && this.retryConfig.retryableStatusCodes) {
|
224
226
|
const shouldRetry = this.retryConfig.retryableStatusCodes.includes(statusCode);
|
225
227
|
// Call onRetry callback if provided
|
226
|
-
if (shouldRetry &&
|
228
|
+
if (shouldRetry &&
|
229
|
+
this.retryConfig.onRetry &&
|
230
|
+
_operation.getContext().retryCount !== undefined) {
|
227
231
|
const attempt = _operation.getContext().retryCount + 1;
|
228
232
|
this.retryConfig.onRetry(attempt, error, _operation);
|
229
233
|
}
|
@@ -332,7 +336,7 @@ class Graphlit {
|
|
332
336
|
setRetryConfig(retryConfig) {
|
333
337
|
this.retryConfig = {
|
334
338
|
...this.retryConfig,
|
335
|
-
...retryConfig
|
339
|
+
...retryConfig,
|
336
340
|
};
|
337
341
|
// Refresh client to apply new retry configuration
|
338
342
|
this.refreshClient();
|
@@ -79,10 +79,13 @@ export declare function formatMessagesForAnthropic(messages: ConversationMessage
|
|
79
79
|
export declare function formatMessagesForGoogle(messages: ConversationMessage[]): GoogleMessage[];
|
80
80
|
/**
|
81
81
|
* Cohere message format
|
82
|
+
* Note: For Cohere v7 SDK, messages are handled differently:
|
83
|
+
* - Current message is passed as 'message' parameter
|
84
|
+
* - Previous messages are passed as 'chatHistory' array
|
82
85
|
*/
|
83
86
|
export interface CohereMessage {
|
84
|
-
role: "
|
85
|
-
|
87
|
+
role: "USER" | "CHATBOT" | "SYSTEM" | "TOOL";
|
88
|
+
message: string;
|
86
89
|
tool_calls?: Array<{
|
87
90
|
id: string;
|
88
91
|
name: string;
|
@@ -90,12 +93,11 @@ export interface CohereMessage {
|
|
90
93
|
}>;
|
91
94
|
tool_results?: Array<{
|
92
95
|
call: {
|
93
|
-
id: string;
|
94
96
|
name: string;
|
95
97
|
parameters: Record<string, any>;
|
96
98
|
};
|
97
99
|
outputs: Array<{
|
98
|
-
|
100
|
+
output: string;
|
99
101
|
}>;
|
100
102
|
}>;
|
101
103
|
}
|
@@ -284,14 +284,14 @@ export function formatMessagesForCohere(messages) {
|
|
284
284
|
switch (message.role) {
|
285
285
|
case ConversationRoleTypes.System:
|
286
286
|
formattedMessages.push({
|
287
|
-
role: "
|
288
|
-
|
287
|
+
role: "SYSTEM",
|
288
|
+
message: trimmedMessage,
|
289
289
|
});
|
290
290
|
break;
|
291
291
|
case ConversationRoleTypes.Assistant:
|
292
292
|
const assistantMessage = {
|
293
|
-
role: "
|
294
|
-
|
293
|
+
role: "CHATBOT",
|
294
|
+
message: trimmedMessage,
|
295
295
|
};
|
296
296
|
// Add tool calls if present
|
297
297
|
if (message.toolCalls && message.toolCalls.length > 0) {
|
@@ -300,32 +300,37 @@ export function formatMessagesForCohere(messages) {
|
|
300
300
|
.map((toolCall) => ({
|
301
301
|
id: toolCall.id,
|
302
302
|
name: toolCall.name,
|
303
|
-
parameters: toolCall.arguments
|
303
|
+
parameters: toolCall.arguments
|
304
|
+
? JSON.parse(toolCall.arguments)
|
305
|
+
: {},
|
304
306
|
}));
|
305
307
|
}
|
306
308
|
formattedMessages.push(assistantMessage);
|
307
309
|
break;
|
308
310
|
case ConversationRoleTypes.Tool:
|
309
|
-
// Cohere expects tool results as
|
311
|
+
// Cohere expects tool results as TOOL messages
|
310
312
|
formattedMessages.push({
|
311
|
-
role: "
|
312
|
-
|
313
|
-
tool_results: [
|
313
|
+
role: "TOOL",
|
314
|
+
message: trimmedMessage,
|
315
|
+
tool_results: [
|
316
|
+
{
|
314
317
|
call: {
|
315
|
-
id: message.toolCallId || "",
|
316
318
|
name: "", // Would need to be tracked from the tool call
|
317
319
|
parameters: {},
|
318
320
|
},
|
319
|
-
outputs: [
|
320
|
-
|
321
|
-
|
322
|
-
|
321
|
+
outputs: [
|
322
|
+
{
|
323
|
+
output: trimmedMessage, // Changed from 'text' to 'output'
|
324
|
+
},
|
325
|
+
],
|
326
|
+
},
|
327
|
+
],
|
323
328
|
});
|
324
329
|
break;
|
325
330
|
default: // User messages
|
326
331
|
formattedMessages.push({
|
327
|
-
role: "
|
328
|
-
|
332
|
+
role: "USER",
|
333
|
+
message: trimmedMessage,
|
329
334
|
});
|
330
335
|
break;
|
331
336
|
}
|
@@ -321,6 +321,30 @@ onEvent, onComplete) {
|
|
321
321
|
onComplete(fullMessage, toolCalls);
|
322
322
|
}
|
323
323
|
catch (error) {
|
324
|
+
// Handle OpenAI-specific errors
|
325
|
+
const errorMessage = error.message || error.toString();
|
326
|
+
// Check for rate limit errors
|
327
|
+
if (error.status === 429 ||
|
328
|
+
error.statusCode === 429 ||
|
329
|
+
error.code === "rate_limit_exceeded") {
|
330
|
+
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
331
|
+
console.log(`⚠️ [OpenAI] Rate limit hit`);
|
332
|
+
}
|
333
|
+
const rateLimitError = new Error("OpenAI rate limit exceeded");
|
334
|
+
rateLimitError.statusCode = 429;
|
335
|
+
throw rateLimitError;
|
336
|
+
}
|
337
|
+
// Check for network errors
|
338
|
+
if (errorMessage.includes("fetch failed") ||
|
339
|
+
error.code === "ECONNRESET" ||
|
340
|
+
error.code === "ETIMEDOUT") {
|
341
|
+
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
342
|
+
console.log(`⚠️ [OpenAI] Network error: ${errorMessage}`);
|
343
|
+
}
|
344
|
+
const networkError = new Error(`OpenAI network error: ${errorMessage}`);
|
345
|
+
networkError.statusCode = 503; // Service unavailable
|
346
|
+
throw networkError;
|
347
|
+
}
|
324
348
|
// Don't emit error event here - let the client handle it to avoid duplicates
|
325
349
|
throw error;
|
326
350
|
}
|
@@ -631,6 +655,30 @@ onEvent, onComplete) {
|
|
631
655
|
onComplete(fullMessage, validToolCalls);
|
632
656
|
}
|
633
657
|
catch (error) {
|
658
|
+
// Handle Anthropic-specific errors
|
659
|
+
const errorMessage = error.message || error.toString();
|
660
|
+
// Check for overloaded errors
|
661
|
+
if (error.type === "overloaded_error" ||
|
662
|
+
errorMessage.includes("Overloaded")) {
|
663
|
+
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
664
|
+
console.log(`⚠️ [Anthropic] Service overloaded`);
|
665
|
+
}
|
666
|
+
// Treat overloaded as a rate limit error for retry logic
|
667
|
+
const overloadError = new Error("Anthropic service overloaded");
|
668
|
+
overloadError.statusCode = 503; // Service unavailable
|
669
|
+
throw overloadError;
|
670
|
+
}
|
671
|
+
// Check for rate limit errors
|
672
|
+
if (error.status === 429 ||
|
673
|
+
error.statusCode === 429 ||
|
674
|
+
error.type === "rate_limit_error") {
|
675
|
+
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
676
|
+
console.log(`⚠️ [Anthropic] Rate limit hit`);
|
677
|
+
}
|
678
|
+
const rateLimitError = new Error("Anthropic rate limit exceeded");
|
679
|
+
rateLimitError.statusCode = 429;
|
680
|
+
throw rateLimitError;
|
681
|
+
}
|
634
682
|
// Don't emit error event here - let the client handle it to avoid duplicates
|
635
683
|
throw error;
|
636
684
|
}
|
@@ -993,16 +1041,57 @@ onEvent, onComplete) {
|
|
993
1041
|
*/
|
994
1042
|
export async function streamWithGroq(specification, messages, tools, groqClient, // Groq client instance (OpenAI-compatible)
|
995
1043
|
onEvent, onComplete) {
|
996
|
-
|
997
|
-
|
1044
|
+
try {
|
1045
|
+
// Groq uses the same API as OpenAI, so we can reuse the OpenAI streaming logic
|
1046
|
+
return await streamWithOpenAI(specification, messages, tools, groqClient, onEvent, onComplete);
|
1047
|
+
}
|
1048
|
+
catch (error) {
|
1049
|
+
// Handle Groq-specific errors
|
1050
|
+
const errorMessage = error.message || error.toString();
|
1051
|
+
// Check for tool calling errors
|
1052
|
+
if (error.status === 400 &&
|
1053
|
+
errorMessage.includes("Failed to call a function")) {
|
1054
|
+
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
1055
|
+
console.log(`⚠️ [Groq] Tool calling error: ${errorMessage}`);
|
1056
|
+
}
|
1057
|
+
// Groq may have limitations with certain tool schemas
|
1058
|
+
// Re-throw with a more descriptive error
|
1059
|
+
throw new Error(`Groq tool calling error: ${errorMessage}. The model may not support the provided tool schema format.`);
|
1060
|
+
}
|
1061
|
+
// Handle rate limits
|
1062
|
+
if (error.status === 429 || error.statusCode === 429) {
|
1063
|
+
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
1064
|
+
console.log(`⚠️ [Groq] Rate limit hit (429)`);
|
1065
|
+
}
|
1066
|
+
const rateLimitError = new Error("Groq rate limit exceeded");
|
1067
|
+
rateLimitError.statusCode = 429;
|
1068
|
+
throw rateLimitError;
|
1069
|
+
}
|
1070
|
+
throw error;
|
1071
|
+
}
|
998
1072
|
}
|
999
1073
|
/**
|
1000
1074
|
* Stream with Cerebras SDK (OpenAI-compatible)
|
1001
1075
|
*/
|
1002
1076
|
export async function streamWithCerebras(specification, messages, tools, cerebrasClient, // OpenAI client instance configured for Cerebras
|
1003
1077
|
onEvent, onComplete) {
|
1004
|
-
|
1005
|
-
|
1078
|
+
try {
|
1079
|
+
// Cerebras uses the same API as OpenAI, so we can reuse the OpenAI streaming logic
|
1080
|
+
return await streamWithOpenAI(specification, messages, tools, cerebrasClient, onEvent, onComplete);
|
1081
|
+
}
|
1082
|
+
catch (error) {
|
1083
|
+
// Handle Cerebras-specific 429 errors
|
1084
|
+
if (error.status === 429 || error.statusCode === 429) {
|
1085
|
+
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
1086
|
+
console.log(`⚠️ [Cerebras] Rate limit hit (429)`);
|
1087
|
+
}
|
1088
|
+
// Re-throw with proper status code for retry logic
|
1089
|
+
const rateLimitError = new Error("Cerebras rate limit exceeded");
|
1090
|
+
rateLimitError.statusCode = 429;
|
1091
|
+
throw rateLimitError;
|
1092
|
+
}
|
1093
|
+
throw error;
|
1094
|
+
}
|
1006
1095
|
}
|
1007
1096
|
/**
|
1008
1097
|
* Stream with Deepseek SDK (OpenAI-compatible)
|
@@ -1037,7 +1126,7 @@ onEvent, onComplete) {
|
|
1037
1126
|
serviceType: specification.serviceType,
|
1038
1127
|
deepseek: specification.deepseek,
|
1039
1128
|
hasDeepseekModel: !!specification.deepseek?.model,
|
1040
|
-
deepseekModelValue: specification.deepseek?.model
|
1129
|
+
deepseekModelValue: specification.deepseek?.model,
|
1041
1130
|
});
|
1042
1131
|
}
|
1043
1132
|
const modelName = getModelName(specification);
|
@@ -1046,7 +1135,7 @@ onEvent, onComplete) {
|
|
1046
1135
|
name: specification.name,
|
1047
1136
|
serviceType: specification.serviceType,
|
1048
1137
|
deepseek: specification.deepseek,
|
1049
|
-
hasCustomModelName: !!specification.deepseek?.modelName
|
1138
|
+
hasCustomModelName: !!specification.deepseek?.modelName,
|
1050
1139
|
});
|
1051
1140
|
throw new Error(`No model name found for specification: ${specification.name} (service: ${specification.serviceType})`);
|
1052
1141
|
}
|
@@ -1109,9 +1198,12 @@ onEvent, onComplete) {
|
|
1109
1198
|
// Performance metrics tracking (internal only)
|
1110
1199
|
if (tokenCount % 10 === 0) {
|
1111
1200
|
const totalTokens = tokenCount + toolArgumentTokens;
|
1112
|
-
const tokensPerSecond = totalTokens > 0
|
1201
|
+
const tokensPerSecond = totalTokens > 0
|
1202
|
+
? totalTokens / ((currentTime - startTime) / 1000)
|
1203
|
+
: 0;
|
1113
1204
|
const avgInterTokenDelay = interTokenDelays.length > 0
|
1114
|
-
? interTokenDelays.reduce((a, b) => a + b, 0) /
|
1205
|
+
? interTokenDelays.reduce((a, b) => a + b, 0) /
|
1206
|
+
interTokenDelays.length
|
1115
1207
|
: 0;
|
1116
1208
|
}
|
1117
1209
|
}
|
@@ -1231,10 +1323,22 @@ onEvent, onComplete) {
|
|
1231
1323
|
if (messages.length === 0) {
|
1232
1324
|
throw new Error("No messages found for Cohere streaming");
|
1233
1325
|
}
|
1326
|
+
// Cohere v7 expects a single message and optional chatHistory
|
1327
|
+
// Extract the last message as the current message
|
1328
|
+
const lastMessage = messages[messages.length - 1];
|
1329
|
+
const chatHistory = messages.slice(0, -1);
|
1330
|
+
if (!lastMessage || !lastMessage.message) {
|
1331
|
+
throw new Error("Last message must have message property for Cohere streaming");
|
1332
|
+
}
|
1234
1333
|
const streamConfig = {
|
1235
1334
|
model: modelName,
|
1236
|
-
|
1335
|
+
message: lastMessage.message, // Current message (singular)
|
1237
1336
|
};
|
1337
|
+
// Add chat history if there are previous messages
|
1338
|
+
if (chatHistory.length > 0) {
|
1339
|
+
// Messages already have 'message' property from formatter
|
1340
|
+
streamConfig.chatHistory = chatHistory;
|
1341
|
+
}
|
1238
1342
|
// Only add temperature if it's defined
|
1239
1343
|
if (specification.cohere?.temperature !== undefined) {
|
1240
1344
|
streamConfig.temperature = specification.cohere.temperature;
|
@@ -1283,12 +1387,9 @@ onEvent, onComplete) {
|
|
1283
1387
|
}
|
1284
1388
|
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
1285
1389
|
console.log(`🔍 [Cohere] Final stream config:`, JSON.stringify(streamConfig, null, 2));
|
1286
|
-
console.log(`🔍 [Cohere]
|
1287
|
-
console.log(`🔍 [Cohere]
|
1288
|
-
console.log(`🔍 [Cohere] Has
|
1289
|
-
if (cohereClient.chat) {
|
1290
|
-
console.log(`🔍 [Cohere] Chat methods:`, Object.getOwnPropertyNames(cohereClient.chat));
|
1291
|
-
}
|
1390
|
+
console.log(`🔍 [Cohere] Current message: "${streamConfig.message}"`);
|
1391
|
+
console.log(`🔍 [Cohere] Chat history length: ${streamConfig.chatHistory?.length || 0}`);
|
1392
|
+
console.log(`🔍 [Cohere] Has tools: ${!!streamConfig.tools}`);
|
1292
1393
|
console.log(`⏱️ [Cohere] Starting stream request at: ${new Date().toISOString()}`);
|
1293
1394
|
}
|
1294
1395
|
let stream;
|
@@ -1498,7 +1599,7 @@ onEvent, onComplete) {
|
|
1498
1599
|
name: specification.name,
|
1499
1600
|
serviceType: specification.serviceType,
|
1500
1601
|
bedrock: specification.bedrock,
|
1501
|
-
hasCustomModelName: !!specification.bedrock?.modelName
|
1602
|
+
hasCustomModelName: !!specification.bedrock?.modelName,
|
1502
1603
|
});
|
1503
1604
|
throw new Error(`No model name found for Bedrock specification: ${specification.name} (service: ${specification.serviceType}, bedrock.model: ${specification.bedrock?.model})`);
|
1504
1605
|
}
|
@@ -1511,9 +1612,13 @@ onEvent, onComplete) {
|
|
1511
1612
|
// The AWS SDK expects content as an array of content blocks
|
1512
1613
|
const converseMessages = messages.map((msg) => ({
|
1513
1614
|
role: msg.role,
|
1514
|
-
content: [
|
1515
|
-
|
1516
|
-
|
1615
|
+
content: [
|
1616
|
+
{
|
1617
|
+
text: typeof msg.content === "string"
|
1618
|
+
? msg.content
|
1619
|
+
: msg.content.toString(),
|
1620
|
+
},
|
1621
|
+
],
|
1517
1622
|
}));
|
1518
1623
|
// Prepare the request using Converse API format
|
1519
1624
|
// Using 'any' type because:
|
@@ -1649,9 +1754,25 @@ onEvent, onComplete) {
|
|
1649
1754
|
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
1650
1755
|
console.error(`❌ [Bedrock] Stream error:`, error);
|
1651
1756
|
}
|
1757
|
+
// Handle specific Bedrock errors
|
1758
|
+
const errorMessage = error.message || error.toString();
|
1759
|
+
const errorName = error.name || "";
|
1760
|
+
// Check for throttling errors
|
1761
|
+
if (errorName === "ThrottlingException" ||
|
1762
|
+
errorMessage.includes("Too many tokens") ||
|
1763
|
+
errorMessage.includes("Too many requests")) {
|
1764
|
+
onEvent({
|
1765
|
+
type: "error",
|
1766
|
+
error: `Bedrock rate limit: ${errorMessage}`,
|
1767
|
+
});
|
1768
|
+
// Re-throw with a specific error type that the retry logic can handle
|
1769
|
+
const rateLimitError = new Error(errorMessage);
|
1770
|
+
rateLimitError.statusCode = 429; // Treat as rate limit
|
1771
|
+
throw rateLimitError;
|
1772
|
+
}
|
1652
1773
|
onEvent({
|
1653
1774
|
type: "error",
|
1654
|
-
error: `Bedrock streaming error: ${
|
1775
|
+
error: `Bedrock streaming error: ${errorMessage}`,
|
1655
1776
|
});
|
1656
1777
|
throw error;
|
1657
1778
|
}
|