graphlit-client 1.0.20250615003 → 1.0.20250615005
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +59 -54
- package/dist/client.js +46 -7
- package/dist/streaming/llm-formatters.d.ts +6 -4
- package/dist/streaming/llm-formatters.js +21 -16
- package/dist/streaming/providers.js +225 -51
- package/dist/streaming/ui-event-adapter.d.ts +2 -0
- package/dist/streaming/ui-event-adapter.js +23 -2
- package/dist/types/agent.d.ts +7 -0
- package/dist/types/internal.d.ts +8 -0
- package/dist/types/ui-events.d.ts +20 -1
- package/package.json +1 -1
package/README.md
CHANGED
@@ -75,7 +75,7 @@ await client.streamAgent(
|
|
75
75
|
}
|
76
76
|
},
|
77
77
|
undefined, // conversationId (optional)
|
78
|
-
{ id: spec.createSpecification.id } // specification
|
78
|
+
{ id: spec.createSpecification.id }, // specification
|
79
79
|
);
|
80
80
|
```
|
81
81
|
|
@@ -134,7 +134,7 @@ GOOGLE_API_KEY=your_key
|
|
134
134
|
|
135
135
|
# Additional streaming providers
|
136
136
|
GROQ_API_KEY=your_key # For Groq models (Llama, Mixtral)
|
137
|
-
CEREBRAS_API_KEY=your_key # For Cerebras models
|
137
|
+
CEREBRAS_API_KEY=your_key # For Cerebras models
|
138
138
|
COHERE_API_KEY=your_key # For Cohere Command models
|
139
139
|
MISTRAL_API_KEY=your_key # For Mistral models
|
140
140
|
DEEPSEEK_API_KEY=your_key # For Deepseek models
|
@@ -152,8 +152,9 @@ The SDK now includes automatic retry logic for network errors and transient fail
|
|
152
152
|
### Default Retry Configuration
|
153
153
|
|
154
154
|
By default, the client will automatically retry on these status codes:
|
155
|
+
|
155
156
|
- `429` - Too Many Requests
|
156
|
-
- `502` - Bad Gateway
|
157
|
+
- `502` - Bad Gateway
|
157
158
|
- `503` - Service Unavailable
|
158
159
|
- `504` - Gateway Timeout
|
159
160
|
|
@@ -168,19 +169,19 @@ Configure retry behavior to match your needs:
|
|
168
169
|
```typescript
|
169
170
|
const client = new Graphlit({
|
170
171
|
organizationId: "your_org_id",
|
171
|
-
environmentId: "your_env_id",
|
172
|
+
environmentId: "your_env_id",
|
172
173
|
jwtSecret: "your_secret",
|
173
174
|
retryConfig: {
|
174
|
-
maxAttempts: 10,
|
175
|
-
initialDelay: 500,
|
176
|
-
maxDelay: 60000,
|
177
|
-
jitter: true,
|
175
|
+
maxAttempts: 10, // Maximum retry attempts (default: 5)
|
176
|
+
initialDelay: 500, // Initial delay in ms (default: 300)
|
177
|
+
maxDelay: 60000, // Maximum delay in ms (default: 30000)
|
178
|
+
jitter: true, // Add randomness to delays (default: true)
|
178
179
|
retryableStatusCodes: [429, 500, 502, 503, 504], // Custom status codes
|
179
180
|
onRetry: (attempt, error, operation) => {
|
180
181
|
console.log(`Retry attempt ${attempt} for ${operation.operationName}`);
|
181
182
|
console.log(`Error: ${error.message}`);
|
182
|
-
}
|
183
|
-
}
|
183
|
+
},
|
184
|
+
},
|
184
185
|
});
|
185
186
|
```
|
186
187
|
|
@@ -196,7 +197,7 @@ const client = new Graphlit();
|
|
196
197
|
client.setRetryConfig({
|
197
198
|
maxAttempts: 20,
|
198
199
|
initialDelay: 100,
|
199
|
-
retryableStatusCodes: [429, 500, 502, 503, 504, 521, 522, 524]
|
200
|
+
retryableStatusCodes: [429, 500, 502, 503, 504, 521, 522, 524],
|
200
201
|
});
|
201
202
|
```
|
202
203
|
|
@@ -210,8 +211,8 @@ const client = new Graphlit({
|
|
210
211
|
environmentId: "your_env_id",
|
211
212
|
jwtSecret: "your_secret",
|
212
213
|
retryConfig: {
|
213
|
-
maxAttempts: 1
|
214
|
-
}
|
214
|
+
maxAttempts: 1, // No retries
|
215
|
+
},
|
215
216
|
});
|
216
217
|
```
|
217
218
|
|
@@ -221,17 +222,17 @@ The Graphlit SDK supports real-time streaming responses from 9 different LLM pro
|
|
221
222
|
|
222
223
|
### Supported Providers
|
223
224
|
|
224
|
-
| Provider
|
225
|
-
|
226
|
-
| **OpenAI**
|
227
|
-
| **Anthropic**
|
228
|
-
| **Google**
|
229
|
-
| **Groq**
|
230
|
-
| **Cerebras**
|
231
|
-
| **Cohere**
|
232
|
-
| **Mistral**
|
233
|
-
| **AWS Bedrock** | Nova Premier/Pro, Claude 3.7, Llama 4
|
234
|
-
| **Deepseek**
|
225
|
+
| Provider | Models | SDK Required | API Key |
|
226
|
+
| --------------- | --------------------------------------------- | --------------------------------- | ------------------- |
|
227
|
+
| **OpenAI** | GPT-4, GPT-4o, GPT-4.1, O1, O3, O4 | `openai` | `OPENAI_API_KEY` |
|
228
|
+
| **Anthropic** | Claude 3, Claude 3.5, Claude 3.7, Claude 4 | `@anthropic-ai/sdk` | `ANTHROPIC_API_KEY` |
|
229
|
+
| **Google** | Gemini 1.5, Gemini 2.0, Gemini 2.5 | `@google/generative-ai` | `GOOGLE_API_KEY` |
|
230
|
+
| **Groq** | Llama 4, Llama 3.3, Mixtral, Deepseek R1 | `groq-sdk` | `GROQ_API_KEY` |
|
231
|
+
| **Cerebras** | Llama 3.3, Llama 3.1 | `openai` | `CEREBRAS_API_KEY` |
|
232
|
+
| **Cohere** | Command R+, Command R, Command R7B, Command A | `cohere-ai` | `COHERE_API_KEY` |
|
233
|
+
| **Mistral** | Mistral Large, Medium, Small, Nemo, Pixtral | `@mistralai/mistralai` | `MISTRAL_API_KEY` |
|
234
|
+
| **AWS Bedrock** | Nova Premier/Pro, Claude 3.7, Llama 4 | `@aws-sdk/client-bedrock-runtime` | AWS credentials |
|
235
|
+
| **Deepseek** | Deepseek Chat, Deepseek Reasoner | `openai` | `DEEPSEEK_API_KEY` |
|
235
236
|
|
236
237
|
### Setting Up Streaming
|
237
238
|
|
@@ -250,7 +251,9 @@ if (process.env.OPENAI_API_KEY) {
|
|
250
251
|
|
251
252
|
if (process.env.COHERE_API_KEY) {
|
252
253
|
const { CohereClient } = await import("cohere-ai");
|
253
|
-
client.setCohereClient(
|
254
|
+
client.setCohereClient(
|
255
|
+
new CohereClient({ token: process.env.COHERE_API_KEY }),
|
256
|
+
);
|
254
257
|
}
|
255
258
|
|
256
259
|
if (process.env.GROQ_API_KEY) {
|
@@ -263,9 +266,9 @@ const spec = await client.createSpecification({
|
|
263
266
|
name: "Multi-Provider Assistant",
|
264
267
|
type: Types.SpecificationTypes.Completion,
|
265
268
|
serviceType: Types.ModelServiceTypes.Cohere, // or any supported provider
|
266
|
-
cohere: {
|
269
|
+
cohere: {
|
267
270
|
model: Types.CohereModels.CommandRPlus,
|
268
|
-
temperature: 0.7
|
271
|
+
temperature: 0.7,
|
269
272
|
},
|
270
273
|
});
|
271
274
|
```
|
@@ -310,7 +313,7 @@ await client.streamAgent(
|
|
310
313
|
}
|
311
314
|
},
|
312
315
|
undefined, // conversationId
|
313
|
-
{ id: spec.createSpecification.id } // specification
|
316
|
+
{ id: spec.createSpecification.id }, // specification
|
314
317
|
);
|
315
318
|
```
|
316
319
|
|
@@ -338,7 +341,7 @@ const content = await client.ingestUri(
|
|
338
341
|
"https://arxiv.org/pdf/1706.03762.pdf", // Attention Is All You Need paper
|
339
342
|
"AI Research Paper", // name
|
340
343
|
undefined, // id
|
341
|
-
true // isSynchronous - waits for processing
|
344
|
+
true, // isSynchronous - waits for processing
|
342
345
|
);
|
343
346
|
|
344
347
|
console.log(`✅ Uploaded: ${content.ingestUri.id}`);
|
@@ -360,7 +363,7 @@ await client.streamAgent(
|
|
360
363
|
}
|
361
364
|
},
|
362
365
|
conversation.createConversation.id, // conversationId with content filter
|
363
|
-
{ id: spec.createSpecification.id } // specification
|
366
|
+
{ id: spec.createSpecification.id }, // specification
|
364
367
|
);
|
365
368
|
```
|
366
369
|
|
@@ -374,7 +377,7 @@ const webpage = await client.ingestUri(
|
|
374
377
|
"https://en.wikipedia.org/wiki/Artificial_intelligence", // uri
|
375
378
|
"AI Wikipedia Page", // name
|
376
379
|
undefined, // id
|
377
|
-
true // isSynchronous
|
380
|
+
true, // isSynchronous
|
378
381
|
);
|
379
382
|
|
380
383
|
// Wait for content to be indexed
|
@@ -389,7 +392,7 @@ const conversation = await client.createConversation({
|
|
389
392
|
const response = await client.promptAgent(
|
390
393
|
"Summarize the key points about AI from this Wikipedia page",
|
391
394
|
conversation.createConversation.id, // conversationId with filter
|
392
|
-
{ id: spec.createSpecification.id } // specification (create one as shown above)
|
395
|
+
{ id: spec.createSpecification.id }, // specification (create one as shown above)
|
393
396
|
);
|
394
397
|
|
395
398
|
console.log(response.message);
|
@@ -412,7 +415,9 @@ if (process.env.OPENAI_API_KEY) {
|
|
412
415
|
|
413
416
|
if (process.env.COHERE_API_KEY) {
|
414
417
|
const { CohereClient } = await import("cohere-ai");
|
415
|
-
client.setCohereClient(
|
418
|
+
client.setCohereClient(
|
419
|
+
new CohereClient({ token: process.env.COHERE_API_KEY }),
|
420
|
+
);
|
416
421
|
}
|
417
422
|
|
418
423
|
if (process.env.GROQ_API_KEY) {
|
@@ -425,24 +430,24 @@ const providers = [
|
|
425
430
|
{
|
426
431
|
name: "OpenAI GPT-4o",
|
427
432
|
serviceType: Types.ModelServiceTypes.OpenAi,
|
428
|
-
openAI: { model: Types.OpenAiModels.Gpt4O_128K }
|
433
|
+
openAI: { model: Types.OpenAiModels.Gpt4O_128K },
|
429
434
|
},
|
430
435
|
{
|
431
436
|
name: "Cohere Command R+",
|
432
437
|
serviceType: Types.ModelServiceTypes.Cohere,
|
433
|
-
cohere: { model: Types.CohereModels.CommandRPlus }
|
438
|
+
cohere: { model: Types.CohereModels.CommandRPlus },
|
434
439
|
},
|
435
440
|
{
|
436
441
|
name: "Groq Llama",
|
437
442
|
serviceType: Types.ModelServiceTypes.Groq,
|
438
|
-
groq: { model: Types.GroqModels.Llama_3_3_70B }
|
439
|
-
}
|
443
|
+
groq: { model: Types.GroqModels.Llama_3_3_70B },
|
444
|
+
},
|
440
445
|
];
|
441
446
|
|
442
447
|
// Compare responses
|
443
448
|
for (const provider of providers) {
|
444
449
|
console.log(`\n🤖 ${provider.name}:`);
|
445
|
-
|
450
|
+
|
446
451
|
const spec = await client.createSpecification({
|
447
452
|
...provider,
|
448
453
|
type: Types.SpecificationTypes.Completion,
|
@@ -456,7 +461,7 @@ for (const provider of providers) {
|
|
456
461
|
}
|
457
462
|
},
|
458
463
|
undefined,
|
459
|
-
{ id: spec.createSpecification.id }
|
464
|
+
{ id: spec.createSpecification.id },
|
460
465
|
);
|
461
466
|
}
|
462
467
|
```
|
@@ -518,7 +523,7 @@ await client.streamAgent(
|
|
518
523
|
undefined, // conversationId
|
519
524
|
{ id: spec.createSpecification.id }, // specification
|
520
525
|
[weatherTool], // tools
|
521
|
-
toolHandlers // handlers
|
526
|
+
toolHandlers, // handlers
|
522
527
|
);
|
523
528
|
```
|
524
529
|
|
@@ -563,7 +568,7 @@ class KnowledgeAssistant {
|
|
563
568
|
url, // uri
|
564
569
|
url.split("/").pop() || "Document", // name
|
565
570
|
undefined, // id
|
566
|
-
true // isSynchronous - wait for processing
|
571
|
+
true, // isSynchronous - wait for processing
|
567
572
|
);
|
568
573
|
this.contentIds.push(content.ingestUri.id);
|
569
574
|
}
|
@@ -593,7 +598,7 @@ class KnowledgeAssistant {
|
|
593
598
|
}
|
594
599
|
},
|
595
600
|
this.conversationId, // Maintains conversation context
|
596
|
-
{ id: this.specificationId! } // specification
|
601
|
+
{ id: this.specificationId! }, // specification
|
597
602
|
);
|
598
603
|
}
|
599
604
|
}
|
@@ -623,7 +628,7 @@ const document = await client.ingestUri(
|
|
623
628
|
"https://example.com/document.pdf", // uri
|
624
629
|
"Document #12345", // name
|
625
630
|
undefined, // id
|
626
|
-
true // isSynchronous
|
631
|
+
true, // isSynchronous
|
627
632
|
);
|
628
633
|
|
629
634
|
// Wait for content to be indexed
|
@@ -634,7 +639,7 @@ const extraction = await client.extractContents(
|
|
634
639
|
"Extract the key information from this document",
|
635
640
|
undefined, // tools
|
636
641
|
undefined, // specification
|
637
|
-
{ contents: [{ id: document.ingestUri.id }] } // filter
|
642
|
+
{ contents: [{ id: document.ingestUri.id }] }, // filter
|
638
643
|
);
|
639
644
|
|
640
645
|
console.log("Extracted data:", extraction.extractContents);
|
@@ -653,7 +658,7 @@ for (const url of documentUrls) {
|
|
653
658
|
url, // uri
|
654
659
|
url.split("/").pop() || "Document", // name
|
655
660
|
undefined, // id
|
656
|
-
true // isSynchronous
|
661
|
+
true, // isSynchronous
|
657
662
|
);
|
658
663
|
ids.push(content.ingestUri.id);
|
659
664
|
}
|
@@ -666,7 +671,7 @@ const summary = await client.summarizeContents(
|
|
666
671
|
prompt: "Create an executive summary of these documents",
|
667
672
|
},
|
668
673
|
], // summarizations
|
669
|
-
{ contents: ids.map((id) => ({ id })) } // filter
|
674
|
+
{ contents: ids.map((id) => ({ id })) }, // filter
|
670
675
|
);
|
671
676
|
|
672
677
|
console.log("Summary:", summary.summarizeContents);
|
@@ -680,13 +685,13 @@ const content = await client.ingestUri(
|
|
680
685
|
"https://example.com/large-document.pdf", // uri
|
681
686
|
undefined, // name
|
682
687
|
undefined, // id
|
683
|
-
true // isSynchronous
|
688
|
+
true, // isSynchronous
|
684
689
|
);
|
685
690
|
console.log("✅ Content ready!");
|
686
691
|
|
687
692
|
// Option 2: Asynchronous processing (for large files)
|
688
693
|
const content = await client.ingestUri(
|
689
|
-
"https://example.com/very-large-video.mp4" // uri
|
694
|
+
"https://example.com/very-large-video.mp4", // uri
|
690
695
|
// isSynchronous defaults to false
|
691
696
|
);
|
692
697
|
|
@@ -724,7 +729,7 @@ const result = await client.promptAgent(
|
|
724
729
|
{
|
725
730
|
// Only allow retrieval from specific content
|
726
731
|
contents: [{ id: "content-id-1" }, { id: "content-id-2" }],
|
727
|
-
}
|
732
|
+
},
|
728
733
|
);
|
729
734
|
|
730
735
|
// Example 2: Streaming with content filter
|
@@ -745,7 +750,7 @@ await client.streamAgent(
|
|
745
750
|
{
|
746
751
|
// Filter by collection
|
747
752
|
collections: [{ id: "technical-docs-collection" }],
|
748
|
-
}
|
753
|
+
},
|
749
754
|
);
|
750
755
|
```
|
751
756
|
|
@@ -775,7 +780,7 @@ await client.streamAgent(
|
|
775
780
|
{
|
776
781
|
// Force this content into context
|
777
782
|
contents: [{ id: fileContent.content.id }],
|
778
|
-
}
|
783
|
+
},
|
779
784
|
);
|
780
785
|
```
|
781
786
|
|
@@ -801,7 +806,7 @@ await client.promptAgent(
|
|
801
806
|
{
|
802
807
|
// Always include the specific code file
|
803
808
|
contents: [{ id: "implementation-file-id" }],
|
804
|
-
}
|
809
|
+
},
|
805
810
|
);
|
806
811
|
```
|
807
812
|
|
@@ -846,7 +851,7 @@ await client.updateProject({
|
|
846
851
|
|
847
852
|
// Now all content will be automatically summarized
|
848
853
|
const content = await client.ingestUri(
|
849
|
-
"https://example.com/report.pdf" // uri
|
854
|
+
"https://example.com/report.pdf", // uri
|
850
855
|
);
|
851
856
|
```
|
852
857
|
|
@@ -879,7 +884,7 @@ await client.streamAgent(
|
|
879
884
|
}
|
880
885
|
},
|
881
886
|
undefined,
|
882
|
-
{ id: conversationSpec.createSpecification.id }
|
887
|
+
{ id: conversationSpec.createSpecification.id },
|
883
888
|
);
|
884
889
|
```
|
885
890
|
|
package/dist/client.js
CHANGED
@@ -133,7 +133,8 @@ class Graphlit {
|
|
133
133
|
constructor(organizationIdOrOptions, environmentId, jwtSecret, ownerId, userId, apiUri) {
|
134
134
|
// Handle both old constructor signature and new options object
|
135
135
|
let options;
|
136
|
-
if (typeof organizationIdOrOptions ===
|
136
|
+
if (typeof organizationIdOrOptions === "object" &&
|
137
|
+
organizationIdOrOptions !== null) {
|
137
138
|
// New constructor with options object
|
138
139
|
options = organizationIdOrOptions;
|
139
140
|
}
|
@@ -145,7 +146,7 @@ class Graphlit {
|
|
145
146
|
jwtSecret,
|
146
147
|
ownerId,
|
147
148
|
userId,
|
148
|
-
apiUri
|
149
|
+
apiUri,
|
149
150
|
};
|
150
151
|
}
|
151
152
|
this.apiUri =
|
@@ -158,7 +159,8 @@ class Graphlit {
|
|
158
159
|
dotenv.config();
|
159
160
|
this.organizationId =
|
160
161
|
options.organizationId || process.env.GRAPHLIT_ORGANIZATION_ID;
|
161
|
-
this.environmentId =
|
162
|
+
this.environmentId =
|
163
|
+
options.environmentId || process.env.GRAPHLIT_ENVIRONMENT_ID;
|
162
164
|
this.jwtSecret = options.jwtSecret || process.env.GRAPHLIT_JWT_SECRET;
|
163
165
|
// optional: for multi-tenant support
|
164
166
|
this.ownerId = options.ownerId || process.env.GRAPHLIT_OWNER_ID;
|
@@ -179,7 +181,7 @@ class Graphlit {
|
|
179
181
|
maxDelay: 30000,
|
180
182
|
retryableStatusCodes: [429, 502, 503, 504],
|
181
183
|
jitter: true,
|
182
|
-
...options.retryConfig
|
184
|
+
...options.retryConfig,
|
183
185
|
};
|
184
186
|
if (!this.organizationId) {
|
185
187
|
throw new Error("Graphlit organization identifier is required.");
|
@@ -223,7 +225,9 @@ class Graphlit {
|
|
223
225
|
if (statusCode && this.retryConfig.retryableStatusCodes) {
|
224
226
|
const shouldRetry = this.retryConfig.retryableStatusCodes.includes(statusCode);
|
225
227
|
// Call onRetry callback if provided
|
226
|
-
if (shouldRetry &&
|
228
|
+
if (shouldRetry &&
|
229
|
+
this.retryConfig.onRetry &&
|
230
|
+
_operation.getContext().retryCount !== undefined) {
|
227
231
|
const attempt = _operation.getContext().retryCount + 1;
|
228
232
|
this.retryConfig.onRetry(attempt, error, _operation);
|
229
233
|
}
|
@@ -332,7 +336,7 @@ class Graphlit {
|
|
332
336
|
setRetryConfig(retryConfig) {
|
333
337
|
this.retryConfig = {
|
334
338
|
...this.retryConfig,
|
335
|
-
...retryConfig
|
339
|
+
...retryConfig,
|
336
340
|
};
|
337
341
|
// Refresh client to apply new retry configuration
|
338
342
|
this.refreshClient();
|
@@ -1693,12 +1697,28 @@ class Graphlit {
|
|
1693
1697
|
}
|
1694
1698
|
// 2. Initial prompt
|
1695
1699
|
const promptResponse = await this.promptConversation(prompt, actualConversationId, specification, mimeType, data, tools, false, // requireTool
|
1696
|
-
|
1700
|
+
true, // includeDetails - needed for context window tracking
|
1697
1701
|
correlationId);
|
1698
1702
|
let currentMessage = promptResponse.promptConversation?.message;
|
1699
1703
|
if (!currentMessage) {
|
1700
1704
|
throw new Error("No message in prompt response");
|
1701
1705
|
}
|
1706
|
+
// Calculate and return context window usage in result
|
1707
|
+
const details = promptResponse.promptConversation?.details;
|
1708
|
+
let contextWindowUsage;
|
1709
|
+
if (details?.tokenLimit && details?.messages) {
|
1710
|
+
// Sum up all message tokens
|
1711
|
+
const usedTokens = details.messages.reduce((sum, msg) => sum + (msg?.tokens || 0), 0);
|
1712
|
+
contextWindowUsage = {
|
1713
|
+
usedTokens,
|
1714
|
+
maxTokens: details.tokenLimit,
|
1715
|
+
percentage: Math.round((usedTokens / details.tokenLimit) * 100),
|
1716
|
+
remainingTokens: Math.max(0, details.tokenLimit - usedTokens),
|
1717
|
+
};
|
1718
|
+
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
1719
|
+
console.log(`📊 [Context Window] Using ${usedTokens.toLocaleString()}/${details.tokenLimit.toLocaleString()} tokens (${Math.round((usedTokens / details.tokenLimit) * 100)}%)`);
|
1720
|
+
}
|
1721
|
+
}
|
1702
1722
|
// 3. Tool calling loop
|
1703
1723
|
const allToolCalls = [];
|
1704
1724
|
let rounds = 0;
|
@@ -1755,6 +1775,7 @@ class Graphlit {
|
|
1755
1775
|
toolResults: allToolCalls,
|
1756
1776
|
metrics,
|
1757
1777
|
usage,
|
1778
|
+
contextWindow: contextWindowUsage,
|
1758
1779
|
};
|
1759
1780
|
}
|
1760
1781
|
catch (error) {
|
@@ -1952,6 +1973,24 @@ class Graphlit {
|
|
1952
1973
|
});
|
1953
1974
|
}
|
1954
1975
|
}
|
1976
|
+
// Emit context window usage event
|
1977
|
+
const details = formatResponse.formatConversation?.details;
|
1978
|
+
if (details?.tokenLimit && details?.messages) {
|
1979
|
+
// Sum up all message tokens
|
1980
|
+
const usedTokens = details.messages.reduce((sum, msg) => sum + (msg?.tokens || 0), 0);
|
1981
|
+
uiAdapter.handleEvent({
|
1982
|
+
type: "context_window",
|
1983
|
+
usage: {
|
1984
|
+
usedTokens,
|
1985
|
+
maxTokens: details.tokenLimit,
|
1986
|
+
percentage: Math.round((usedTokens / details.tokenLimit) * 100),
|
1987
|
+
remainingTokens: Math.max(0, details.tokenLimit - usedTokens),
|
1988
|
+
},
|
1989
|
+
});
|
1990
|
+
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
1991
|
+
console.log(`📊 [Context Window] Using ${usedTokens.toLocaleString()}/${details.tokenLimit.toLocaleString()} tokens (${Math.round((usedTokens / details.tokenLimit) * 100)}%)`);
|
1992
|
+
}
|
1993
|
+
}
|
1955
1994
|
// Build message array with conversation history
|
1956
1995
|
const messages = [];
|
1957
1996
|
// Add system prompt if specified
|
@@ -79,10 +79,13 @@ export declare function formatMessagesForAnthropic(messages: ConversationMessage
|
|
79
79
|
export declare function formatMessagesForGoogle(messages: ConversationMessage[]): GoogleMessage[];
|
80
80
|
/**
|
81
81
|
* Cohere message format
|
82
|
+
* Note: For Cohere v7 SDK, messages are handled differently:
|
83
|
+
* - Current message is passed as 'message' parameter
|
84
|
+
* - Previous messages are passed as 'chatHistory' array
|
82
85
|
*/
|
83
86
|
export interface CohereMessage {
|
84
|
-
role: "
|
85
|
-
|
87
|
+
role: "USER" | "CHATBOT" | "SYSTEM" | "TOOL";
|
88
|
+
message: string;
|
86
89
|
tool_calls?: Array<{
|
87
90
|
id: string;
|
88
91
|
name: string;
|
@@ -90,12 +93,11 @@ export interface CohereMessage {
|
|
90
93
|
}>;
|
91
94
|
tool_results?: Array<{
|
92
95
|
call: {
|
93
|
-
id: string;
|
94
96
|
name: string;
|
95
97
|
parameters: Record<string, any>;
|
96
98
|
};
|
97
99
|
outputs: Array<{
|
98
|
-
|
100
|
+
output: string;
|
99
101
|
}>;
|
100
102
|
}>;
|
101
103
|
}
|
@@ -284,14 +284,14 @@ export function formatMessagesForCohere(messages) {
|
|
284
284
|
switch (message.role) {
|
285
285
|
case ConversationRoleTypes.System:
|
286
286
|
formattedMessages.push({
|
287
|
-
role: "
|
288
|
-
|
287
|
+
role: "SYSTEM",
|
288
|
+
message: trimmedMessage,
|
289
289
|
});
|
290
290
|
break;
|
291
291
|
case ConversationRoleTypes.Assistant:
|
292
292
|
const assistantMessage = {
|
293
|
-
role: "
|
294
|
-
|
293
|
+
role: "CHATBOT",
|
294
|
+
message: trimmedMessage,
|
295
295
|
};
|
296
296
|
// Add tool calls if present
|
297
297
|
if (message.toolCalls && message.toolCalls.length > 0) {
|
@@ -300,32 +300,37 @@ export function formatMessagesForCohere(messages) {
|
|
300
300
|
.map((toolCall) => ({
|
301
301
|
id: toolCall.id,
|
302
302
|
name: toolCall.name,
|
303
|
-
parameters: toolCall.arguments
|
303
|
+
parameters: toolCall.arguments
|
304
|
+
? JSON.parse(toolCall.arguments)
|
305
|
+
: {},
|
304
306
|
}));
|
305
307
|
}
|
306
308
|
formattedMessages.push(assistantMessage);
|
307
309
|
break;
|
308
310
|
case ConversationRoleTypes.Tool:
|
309
|
-
// Cohere expects tool results as
|
311
|
+
// Cohere expects tool results as TOOL messages
|
310
312
|
formattedMessages.push({
|
311
|
-
role: "
|
312
|
-
|
313
|
-
tool_results: [
|
313
|
+
role: "TOOL",
|
314
|
+
message: trimmedMessage,
|
315
|
+
tool_results: [
|
316
|
+
{
|
314
317
|
call: {
|
315
|
-
id: message.toolCallId || "",
|
316
318
|
name: "", // Would need to be tracked from the tool call
|
317
319
|
parameters: {},
|
318
320
|
},
|
319
|
-
outputs: [
|
320
|
-
|
321
|
-
|
322
|
-
|
321
|
+
outputs: [
|
322
|
+
{
|
323
|
+
output: trimmedMessage, // Changed from 'text' to 'output'
|
324
|
+
},
|
325
|
+
],
|
326
|
+
},
|
327
|
+
],
|
323
328
|
});
|
324
329
|
break;
|
325
330
|
default: // User messages
|
326
331
|
formattedMessages.push({
|
327
|
-
role: "
|
328
|
-
|
332
|
+
role: "USER",
|
333
|
+
message: trimmedMessage,
|
329
334
|
});
|
330
335
|
break;
|
331
336
|
}
|
@@ -321,6 +321,30 @@ onEvent, onComplete) {
|
|
321
321
|
onComplete(fullMessage, toolCalls);
|
322
322
|
}
|
323
323
|
catch (error) {
|
324
|
+
// Handle OpenAI-specific errors
|
325
|
+
const errorMessage = error.message || error.toString();
|
326
|
+
// Check for rate limit errors
|
327
|
+
if (error.status === 429 ||
|
328
|
+
error.statusCode === 429 ||
|
329
|
+
error.code === "rate_limit_exceeded") {
|
330
|
+
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
331
|
+
console.log(`⚠️ [OpenAI] Rate limit hit`);
|
332
|
+
}
|
333
|
+
const rateLimitError = new Error("OpenAI rate limit exceeded");
|
334
|
+
rateLimitError.statusCode = 429;
|
335
|
+
throw rateLimitError;
|
336
|
+
}
|
337
|
+
// Check for network errors
|
338
|
+
if (errorMessage.includes("fetch failed") ||
|
339
|
+
error.code === "ECONNRESET" ||
|
340
|
+
error.code === "ETIMEDOUT") {
|
341
|
+
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
342
|
+
console.log(`⚠️ [OpenAI] Network error: ${errorMessage}`);
|
343
|
+
}
|
344
|
+
const networkError = new Error(`OpenAI network error: ${errorMessage}`);
|
345
|
+
networkError.statusCode = 503; // Service unavailable
|
346
|
+
throw networkError;
|
347
|
+
}
|
324
348
|
// Don't emit error event here - let the client handle it to avoid duplicates
|
325
349
|
throw error;
|
326
350
|
}
|
@@ -631,6 +655,30 @@ onEvent, onComplete) {
|
|
631
655
|
onComplete(fullMessage, validToolCalls);
|
632
656
|
}
|
633
657
|
catch (error) {
|
658
|
+
// Handle Anthropic-specific errors
|
659
|
+
const errorMessage = error.message || error.toString();
|
660
|
+
// Check for overloaded errors
|
661
|
+
if (error.type === "overloaded_error" ||
|
662
|
+
errorMessage.includes("Overloaded")) {
|
663
|
+
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
664
|
+
console.log(`⚠️ [Anthropic] Service overloaded`);
|
665
|
+
}
|
666
|
+
// Treat overloaded as a rate limit error for retry logic
|
667
|
+
const overloadError = new Error("Anthropic service overloaded");
|
668
|
+
overloadError.statusCode = 503; // Service unavailable
|
669
|
+
throw overloadError;
|
670
|
+
}
|
671
|
+
// Check for rate limit errors
|
672
|
+
if (error.status === 429 ||
|
673
|
+
error.statusCode === 429 ||
|
674
|
+
error.type === "rate_limit_error") {
|
675
|
+
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
676
|
+
console.log(`⚠️ [Anthropic] Rate limit hit`);
|
677
|
+
}
|
678
|
+
const rateLimitError = new Error("Anthropic rate limit exceeded");
|
679
|
+
rateLimitError.statusCode = 429;
|
680
|
+
throw rateLimitError;
|
681
|
+
}
|
634
682
|
// Don't emit error event here - let the client handle it to avoid duplicates
|
635
683
|
throw error;
|
636
684
|
}
|
@@ -993,16 +1041,57 @@ onEvent, onComplete) {
|
|
993
1041
|
*/
|
994
1042
|
export async function streamWithGroq(specification, messages, tools, groqClient, // Groq client instance (OpenAI-compatible)
|
995
1043
|
onEvent, onComplete) {
|
996
|
-
|
997
|
-
|
1044
|
+
try {
|
1045
|
+
// Groq uses the same API as OpenAI, so we can reuse the OpenAI streaming logic
|
1046
|
+
return await streamWithOpenAI(specification, messages, tools, groqClient, onEvent, onComplete);
|
1047
|
+
}
|
1048
|
+
catch (error) {
|
1049
|
+
// Handle Groq-specific errors
|
1050
|
+
const errorMessage = error.message || error.toString();
|
1051
|
+
// Check for tool calling errors
|
1052
|
+
if (error.status === 400 &&
|
1053
|
+
errorMessage.includes("Failed to call a function")) {
|
1054
|
+
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
1055
|
+
console.log(`⚠️ [Groq] Tool calling error: ${errorMessage}`);
|
1056
|
+
}
|
1057
|
+
// Groq may have limitations with certain tool schemas
|
1058
|
+
// Re-throw with a more descriptive error
|
1059
|
+
throw new Error(`Groq tool calling error: ${errorMessage}. The model may not support the provided tool schema format.`);
|
1060
|
+
}
|
1061
|
+
// Handle rate limits
|
1062
|
+
if (error.status === 429 || error.statusCode === 429) {
|
1063
|
+
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
1064
|
+
console.log(`⚠️ [Groq] Rate limit hit (429)`);
|
1065
|
+
}
|
1066
|
+
const rateLimitError = new Error("Groq rate limit exceeded");
|
1067
|
+
rateLimitError.statusCode = 429;
|
1068
|
+
throw rateLimitError;
|
1069
|
+
}
|
1070
|
+
throw error;
|
1071
|
+
}
|
998
1072
|
}
|
999
1073
|
/**
|
1000
1074
|
* Stream with Cerebras SDK (OpenAI-compatible)
|
1001
1075
|
*/
|
1002
1076
|
export async function streamWithCerebras(specification, messages, tools, cerebrasClient, // OpenAI client instance configured for Cerebras
|
1003
1077
|
onEvent, onComplete) {
|
1004
|
-
|
1005
|
-
|
1078
|
+
try {
|
1079
|
+
// Cerebras uses the same API as OpenAI, so we can reuse the OpenAI streaming logic
|
1080
|
+
return await streamWithOpenAI(specification, messages, tools, cerebrasClient, onEvent, onComplete);
|
1081
|
+
}
|
1082
|
+
catch (error) {
|
1083
|
+
// Handle Cerebras-specific 429 errors
|
1084
|
+
if (error.status === 429 || error.statusCode === 429) {
|
1085
|
+
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
1086
|
+
console.log(`⚠️ [Cerebras] Rate limit hit (429)`);
|
1087
|
+
}
|
1088
|
+
// Re-throw with proper status code for retry logic
|
1089
|
+
const rateLimitError = new Error("Cerebras rate limit exceeded");
|
1090
|
+
rateLimitError.statusCode = 429;
|
1091
|
+
throw rateLimitError;
|
1092
|
+
}
|
1093
|
+
throw error;
|
1094
|
+
}
|
1006
1095
|
}
|
1007
1096
|
/**
|
1008
1097
|
* Stream with Deepseek SDK (OpenAI-compatible)
|
@@ -1037,7 +1126,7 @@ onEvent, onComplete) {
|
|
1037
1126
|
serviceType: specification.serviceType,
|
1038
1127
|
deepseek: specification.deepseek,
|
1039
1128
|
hasDeepseekModel: !!specification.deepseek?.model,
|
1040
|
-
deepseekModelValue: specification.deepseek?.model
|
1129
|
+
deepseekModelValue: specification.deepseek?.model,
|
1041
1130
|
});
|
1042
1131
|
}
|
1043
1132
|
const modelName = getModelName(specification);
|
@@ -1046,7 +1135,7 @@ onEvent, onComplete) {
|
|
1046
1135
|
name: specification.name,
|
1047
1136
|
serviceType: specification.serviceType,
|
1048
1137
|
deepseek: specification.deepseek,
|
1049
|
-
hasCustomModelName: !!specification.deepseek?.modelName
|
1138
|
+
hasCustomModelName: !!specification.deepseek?.modelName,
|
1050
1139
|
});
|
1051
1140
|
throw new Error(`No model name found for specification: ${specification.name} (service: ${specification.serviceType})`);
|
1052
1141
|
}
|
@@ -1109,9 +1198,12 @@ onEvent, onComplete) {
|
|
1109
1198
|
// Performance metrics tracking (internal only)
|
1110
1199
|
if (tokenCount % 10 === 0) {
|
1111
1200
|
const totalTokens = tokenCount + toolArgumentTokens;
|
1112
|
-
const tokensPerSecond = totalTokens > 0
|
1201
|
+
const tokensPerSecond = totalTokens > 0
|
1202
|
+
? totalTokens / ((currentTime - startTime) / 1000)
|
1203
|
+
: 0;
|
1113
1204
|
const avgInterTokenDelay = interTokenDelays.length > 0
|
1114
|
-
? interTokenDelays.reduce((a, b) => a + b, 0) /
|
1205
|
+
? interTokenDelays.reduce((a, b) => a + b, 0) /
|
1206
|
+
interTokenDelays.length
|
1115
1207
|
: 0;
|
1116
1208
|
}
|
1117
1209
|
}
|
@@ -1231,82 +1323,144 @@ onEvent, onComplete) {
|
|
1231
1323
|
if (messages.length === 0) {
|
1232
1324
|
throw new Error("No messages found for Cohere streaming");
|
1233
1325
|
}
|
1326
|
+
// Cohere v7 expects a single message and optional chatHistory
|
1327
|
+
// Extract the last message as the current message
|
1328
|
+
const lastMessage = messages[messages.length - 1];
|
1329
|
+
const chatHistory = messages.slice(0, -1);
|
1330
|
+
if (!lastMessage || !lastMessage.message) {
|
1331
|
+
throw new Error("Last message must have message property for Cohere streaming");
|
1332
|
+
}
|
1333
|
+
// Build properly typed request using Cohere SDK types
|
1234
1334
|
const streamConfig = {
|
1235
1335
|
model: modelName,
|
1236
|
-
|
1336
|
+
message: lastMessage.message, // Current message (singular)
|
1237
1337
|
};
|
1338
|
+
// Add chat history if there are previous messages
|
1339
|
+
if (chatHistory.length > 0) {
|
1340
|
+
// Build properly typed chat history using Cohere SDK Message types
|
1341
|
+
const cohereHistory = chatHistory.map((msg) => {
|
1342
|
+
switch (msg.role) {
|
1343
|
+
case "USER":
|
1344
|
+
return {
|
1345
|
+
role: "USER",
|
1346
|
+
message: msg.message,
|
1347
|
+
};
|
1348
|
+
case "CHATBOT":
|
1349
|
+
const chatbotMsg = {
|
1350
|
+
role: "CHATBOT",
|
1351
|
+
message: msg.message,
|
1352
|
+
};
|
1353
|
+
// Add tool calls if present
|
1354
|
+
if (msg.tool_calls && msg.tool_calls.length > 0) {
|
1355
|
+
chatbotMsg.toolCalls = msg.tool_calls.map((tc) => ({
|
1356
|
+
name: tc.name,
|
1357
|
+
parameters: tc.parameters || {},
|
1358
|
+
}));
|
1359
|
+
}
|
1360
|
+
return chatbotMsg;
|
1361
|
+
case "SYSTEM":
|
1362
|
+
return {
|
1363
|
+
role: "SYSTEM",
|
1364
|
+
message: msg.message,
|
1365
|
+
};
|
1366
|
+
case "TOOL":
|
1367
|
+
return {
|
1368
|
+
role: "TOOL",
|
1369
|
+
toolResults: msg.tool_results || [],
|
1370
|
+
};
|
1371
|
+
default:
|
1372
|
+
// Fallback - treat as USER
|
1373
|
+
return {
|
1374
|
+
role: "USER",
|
1375
|
+
message: msg.message,
|
1376
|
+
};
|
1377
|
+
}
|
1378
|
+
});
|
1379
|
+
streamConfig.chatHistory = cohereHistory;
|
1380
|
+
}
|
1238
1381
|
// Only add temperature if it's defined
|
1239
|
-
if (specification.cohere?.temperature !== undefined
|
1382
|
+
if (specification.cohere?.temperature !== undefined &&
|
1383
|
+
specification.cohere.temperature !== null) {
|
1240
1384
|
streamConfig.temperature = specification.cohere.temperature;
|
1241
1385
|
}
|
1242
1386
|
// Add tools if provided
|
1243
1387
|
if (tools && tools.length > 0) {
|
1244
|
-
|
1388
|
+
const cohereTools = tools.map((tool) => {
|
1245
1389
|
if (!tool.schema) {
|
1246
1390
|
return {
|
1247
|
-
name: tool.name,
|
1248
|
-
description: tool.description,
|
1249
|
-
|
1391
|
+
name: tool.name || "",
|
1392
|
+
description: tool.description || "",
|
1393
|
+
parameterDefinitions: {},
|
1250
1394
|
};
|
1251
1395
|
}
|
1252
1396
|
// Parse the JSON schema
|
1253
1397
|
const schema = JSON.parse(tool.schema);
|
1254
1398
|
// Convert JSON Schema to Cohere's expected format
|
1255
|
-
const
|
1399
|
+
const parameterDefinitions = {};
|
1256
1400
|
if (schema.properties) {
|
1257
1401
|
for (const [key, value] of Object.entries(schema.properties)) {
|
1258
1402
|
const prop = value;
|
1259
1403
|
const paramDef = {
|
1260
|
-
type: prop.type || "
|
1404
|
+
type: prop.type || "str",
|
1261
1405
|
description: prop.description || "",
|
1262
1406
|
required: schema.required?.includes(key) || false,
|
1263
1407
|
};
|
1264
|
-
|
1265
|
-
if (prop.enum) {
|
1266
|
-
paramDef.options = prop.enum;
|
1267
|
-
}
|
1268
|
-
if (prop.default !== undefined) {
|
1269
|
-
paramDef.default = prop.default;
|
1270
|
-
}
|
1271
|
-
if (prop.items) {
|
1272
|
-
paramDef.items = prop.items;
|
1273
|
-
}
|
1274
|
-
parameter_definitions[key] = paramDef;
|
1408
|
+
parameterDefinitions[key] = paramDef;
|
1275
1409
|
}
|
1276
1410
|
}
|
1277
1411
|
return {
|
1278
|
-
name: tool.name,
|
1279
|
-
description: tool.description,
|
1280
|
-
|
1412
|
+
name: tool.name || "",
|
1413
|
+
description: tool.description || "",
|
1414
|
+
parameterDefinitions, // Use camelCase as expected by Cohere SDK
|
1281
1415
|
};
|
1282
1416
|
});
|
1417
|
+
streamConfig.tools = cohereTools;
|
1283
1418
|
}
|
1284
1419
|
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
1285
1420
|
console.log(`🔍 [Cohere] Final stream config:`, JSON.stringify(streamConfig, null, 2));
|
1286
|
-
console.log(`🔍 [Cohere]
|
1287
|
-
console.log(`🔍 [Cohere]
|
1288
|
-
console.log(`🔍 [Cohere] Has
|
1289
|
-
if (cohereClient.chat) {
|
1290
|
-
console.log(`🔍 [Cohere] Chat methods:`, Object.getOwnPropertyNames(cohereClient.chat));
|
1291
|
-
}
|
1421
|
+
console.log(`🔍 [Cohere] Current message: "${streamConfig.message}"`);
|
1422
|
+
console.log(`🔍 [Cohere] Chat history length: ${streamConfig.chatHistory?.length || 0}`);
|
1423
|
+
console.log(`🔍 [Cohere] Has tools: ${!!streamConfig.tools}`);
|
1292
1424
|
console.log(`⏱️ [Cohere] Starting stream request at: ${new Date().toISOString()}`);
|
1293
1425
|
}
|
1294
1426
|
let stream;
|
1295
1427
|
try {
|
1428
|
+
// Always log the full config when debugging Command A errors
|
1429
|
+
if (modelName.includes("command-a") ||
|
1430
|
+
process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
1431
|
+
console.log(`🔍 [Cohere] Full streamConfig for ${modelName}:`, JSON.stringify(streamConfig, null, 2));
|
1432
|
+
}
|
1296
1433
|
stream = await cohereClient.chatStream(streamConfig);
|
1297
1434
|
}
|
1298
1435
|
catch (streamError) {
|
1299
|
-
|
1300
|
-
|
1301
|
-
|
1302
|
-
|
1303
|
-
|
1304
|
-
|
1305
|
-
|
1306
|
-
|
1436
|
+
// Enhanced error logging
|
1437
|
+
console.error(`❌ [Cohere] Stream creation failed for model ${modelName}`);
|
1438
|
+
console.error(`❌ [Cohere] Error type: ${streamError.constructor.name}`);
|
1439
|
+
console.error(`❌ [Cohere] Status code: ${streamError.statusCode || streamError.status || "unknown"}`);
|
1440
|
+
console.error(`❌ [Cohere] Error message: ${streamError.message}`);
|
1441
|
+
// Try to read the body if it's a ReadableStream
|
1442
|
+
if (streamError.body &&
|
1443
|
+
typeof streamError.body.getReader === "function") {
|
1444
|
+
try {
|
1445
|
+
const reader = streamError.body.getReader();
|
1446
|
+
let fullBody = "";
|
1447
|
+
while (true) {
|
1448
|
+
const { done, value } = await reader.read();
|
1449
|
+
if (done)
|
1450
|
+
break;
|
1451
|
+
fullBody += new TextDecoder().decode(value);
|
1452
|
+
}
|
1453
|
+
console.error(`❌ [Cohere] Raw error body:`, fullBody);
|
1454
|
+
try {
|
1455
|
+
const parsed = JSON.parse(fullBody);
|
1456
|
+
console.error(`❌ [Cohere] Parsed error details:`, JSON.stringify(parsed, null, 2));
|
1457
|
+
}
|
1458
|
+
catch (e) {
|
1459
|
+
console.error(`❌ [Cohere] Could not parse error body as JSON`);
|
1460
|
+
}
|
1307
1461
|
}
|
1308
|
-
|
1309
|
-
console.error(`❌ [Cohere]
|
1462
|
+
catch (e) {
|
1463
|
+
console.error(`❌ [Cohere] Could not read error body:`, e);
|
1310
1464
|
}
|
1311
1465
|
}
|
1312
1466
|
throw streamError;
|
@@ -1498,7 +1652,7 @@ onEvent, onComplete) {
|
|
1498
1652
|
name: specification.name,
|
1499
1653
|
serviceType: specification.serviceType,
|
1500
1654
|
bedrock: specification.bedrock,
|
1501
|
-
hasCustomModelName: !!specification.bedrock?.modelName
|
1655
|
+
hasCustomModelName: !!specification.bedrock?.modelName,
|
1502
1656
|
});
|
1503
1657
|
throw new Error(`No model name found for Bedrock specification: ${specification.name} (service: ${specification.serviceType}, bedrock.model: ${specification.bedrock?.model})`);
|
1504
1658
|
}
|
@@ -1511,9 +1665,13 @@ onEvent, onComplete) {
|
|
1511
1665
|
// The AWS SDK expects content as an array of content blocks
|
1512
1666
|
const converseMessages = messages.map((msg) => ({
|
1513
1667
|
role: msg.role,
|
1514
|
-
content: [
|
1515
|
-
|
1516
|
-
|
1668
|
+
content: [
|
1669
|
+
{
|
1670
|
+
text: typeof msg.content === "string"
|
1671
|
+
? msg.content
|
1672
|
+
: msg.content.toString(),
|
1673
|
+
},
|
1674
|
+
],
|
1517
1675
|
}));
|
1518
1676
|
// Prepare the request using Converse API format
|
1519
1677
|
// Using 'any' type because:
|
@@ -1649,9 +1807,25 @@ onEvent, onComplete) {
|
|
1649
1807
|
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
1650
1808
|
console.error(`❌ [Bedrock] Stream error:`, error);
|
1651
1809
|
}
|
1810
|
+
// Handle specific Bedrock errors
|
1811
|
+
const errorMessage = error.message || error.toString();
|
1812
|
+
const errorName = error.name || "";
|
1813
|
+
// Check for throttling errors
|
1814
|
+
if (errorName === "ThrottlingException" ||
|
1815
|
+
errorMessage.includes("Too many tokens") ||
|
1816
|
+
errorMessage.includes("Too many requests")) {
|
1817
|
+
onEvent({
|
1818
|
+
type: "error",
|
1819
|
+
error: `Bedrock rate limit: ${errorMessage}`,
|
1820
|
+
});
|
1821
|
+
// Re-throw with a specific error type that the retry logic can handle
|
1822
|
+
const rateLimitError = new Error(errorMessage);
|
1823
|
+
rateLimitError.statusCode = 429; // Treat as rate limit
|
1824
|
+
throw rateLimitError;
|
1825
|
+
}
|
1652
1826
|
onEvent({
|
1653
1827
|
type: "error",
|
1654
|
-
error: `Bedrock streaming error: ${
|
1828
|
+
error: `Bedrock streaming error: ${errorMessage}`,
|
1655
1829
|
});
|
1656
1830
|
throw error;
|
1657
1831
|
}
|
@@ -24,6 +24,7 @@ export declare class UIEventAdapter {
|
|
24
24
|
private chunkBuffer?;
|
25
25
|
private smoothingDelay;
|
26
26
|
private chunkQueue;
|
27
|
+
private contextWindowUsage?;
|
27
28
|
constructor(onEvent: (event: AgentStreamEvent) => void, conversationId: string, options?: {
|
28
29
|
smoothingEnabled?: boolean;
|
29
30
|
chunkingStrategy?: ChunkingStrategy;
|
@@ -49,6 +50,7 @@ export declare class UIEventAdapter {
|
|
49
50
|
private emitNextChunk;
|
50
51
|
private emitMessageUpdate;
|
51
52
|
private emitUIEvent;
|
53
|
+
private handleContextWindow;
|
52
54
|
/**
|
53
55
|
* Clean up any pending timers
|
54
56
|
*/
|
@@ -23,6 +23,7 @@ export class UIEventAdapter {
|
|
23
23
|
chunkBuffer;
|
24
24
|
smoothingDelay = 30;
|
25
25
|
chunkQueue = []; // Queue of chunks waiting to be emitted
|
26
|
+
contextWindowUsage;
|
26
27
|
constructor(onEvent, conversationId, options = {}) {
|
27
28
|
this.onEvent = onEvent;
|
28
29
|
this.conversationId = conversationId;
|
@@ -66,6 +67,9 @@ export class UIEventAdapter {
|
|
66
67
|
case "error":
|
67
68
|
this.handleError(event.error);
|
68
69
|
break;
|
70
|
+
case "context_window":
|
71
|
+
this.handleContextWindow(event.usage);
|
72
|
+
break;
|
69
73
|
}
|
70
74
|
}
|
71
75
|
handleStart(conversationId) {
|
@@ -323,11 +327,16 @@ export class UIEventAdapter {
|
|
323
327
|
finalMetrics.streamingThroughput = Math.round((this.currentMessage.length / streamingTime) * 1000);
|
324
328
|
}
|
325
329
|
}
|
326
|
-
|
330
|
+
// Include context window usage if available
|
331
|
+
const event = {
|
327
332
|
type: "conversation_completed",
|
328
333
|
message: finalMessage,
|
329
334
|
metrics: finalMetrics,
|
330
|
-
}
|
335
|
+
};
|
336
|
+
if (this.contextWindowUsage) {
|
337
|
+
event.contextWindow = this.contextWindowUsage;
|
338
|
+
}
|
339
|
+
this.emitUIEvent(event);
|
331
340
|
}
|
332
341
|
handleError(error) {
|
333
342
|
this.isStreaming = false;
|
@@ -468,6 +477,18 @@ export class UIEventAdapter {
|
|
468
477
|
emitUIEvent(event) {
|
469
478
|
this.onEvent(event);
|
470
479
|
}
|
480
|
+
handleContextWindow(usage) {
|
481
|
+
// Store for later inclusion in completion event
|
482
|
+
this.contextWindowUsage = usage;
|
483
|
+
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
484
|
+
console.log(`📊 [UIEventAdapter] Context window: ${usage.usedTokens}/${usage.maxTokens} (${usage.percentage}%)`);
|
485
|
+
}
|
486
|
+
this.emitUIEvent({
|
487
|
+
type: "context_window",
|
488
|
+
usage,
|
489
|
+
timestamp: new Date(),
|
490
|
+
});
|
491
|
+
}
|
471
492
|
/**
|
472
493
|
* Clean up any pending timers
|
473
494
|
*/
|
package/dist/types/agent.d.ts
CHANGED
@@ -13,6 +13,12 @@ export interface AgentMetrics {
|
|
13
13
|
toolExecutions?: number;
|
14
14
|
rounds?: number;
|
15
15
|
}
|
16
|
+
export interface ContextWindowUsage {
|
17
|
+
usedTokens: number;
|
18
|
+
maxTokens: number;
|
19
|
+
percentage: number;
|
20
|
+
remainingTokens: number;
|
21
|
+
}
|
16
22
|
export interface AgentResult {
|
17
23
|
message: string;
|
18
24
|
conversationId: string;
|
@@ -21,6 +27,7 @@ export interface AgentResult {
|
|
21
27
|
toolResults?: ToolCallResult[];
|
22
28
|
metrics?: AgentMetrics;
|
23
29
|
usage?: UsageInfo;
|
30
|
+
contextWindow?: ContextWindowUsage;
|
24
31
|
error?: AgentError;
|
25
32
|
}
|
26
33
|
export interface StreamAgentOptions {
|
package/dist/types/internal.d.ts
CHANGED
@@ -3,6 +3,19 @@ import { ConversationMessage, ConversationToolCall } from "../generated/graphql-
|
|
3
3
|
* Tool execution status for streaming
|
4
4
|
*/
|
5
5
|
export type ToolExecutionStatus = "preparing" | "executing" | "ready" | "completed" | "failed";
|
6
|
+
/**
|
7
|
+
* Context window usage event - emitted at start of agent interaction
|
8
|
+
*/
|
9
|
+
export type ContextWindowEvent = {
|
10
|
+
type: "context_window";
|
11
|
+
usage: {
|
12
|
+
usedTokens: number;
|
13
|
+
maxTokens: number;
|
14
|
+
percentage: number;
|
15
|
+
remainingTokens: number;
|
16
|
+
};
|
17
|
+
timestamp: Date;
|
18
|
+
};
|
6
19
|
/**
|
7
20
|
* Simplified UI-focused streaming events using GraphQL types
|
8
21
|
*/
|
@@ -11,7 +24,7 @@ export type AgentStreamEvent = {
|
|
11
24
|
conversationId: string;
|
12
25
|
timestamp: Date;
|
13
26
|
model?: string;
|
14
|
-
} | {
|
27
|
+
} | ContextWindowEvent | {
|
15
28
|
type: "message_update";
|
16
29
|
message: Partial<ConversationMessage> & {
|
17
30
|
message: string;
|
@@ -43,6 +56,12 @@ export type AgentStreamEvent = {
|
|
43
56
|
avgTokenDelay?: number;
|
44
57
|
streamingThroughput?: number;
|
45
58
|
};
|
59
|
+
contextWindow?: {
|
60
|
+
usedTokens: number;
|
61
|
+
maxTokens: number;
|
62
|
+
percentage: number;
|
63
|
+
remainingTokens: number;
|
64
|
+
};
|
46
65
|
} | {
|
47
66
|
type: "error";
|
48
67
|
error: {
|