graphlit-client 1.0.20250703001 ā 1.0.20250705001
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/client.js +36 -9
- package/dist/model-mapping.js +2 -0
- package/dist/streaming/providers.d.ts +9 -9
- package/dist/streaming/providers.js +102 -8
- package/dist/streaming/ui-event-adapter.d.ts +5 -0
- package/dist/streaming/ui-event-adapter.js +21 -0
- package/dist/types/internal.d.ts +7 -0
- package/dist/types/token-usage.d.ts +30 -0
- package/dist/types/token-usage.js +1 -0
- package/dist/types/ui-events.d.ts +7 -0
- package/package.json +1 -1
package/dist/client.js
CHANGED
@@ -2294,9 +2294,12 @@ class Graphlit {
|
|
2294
2294
|
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING_MESSAGES) {
|
2295
2295
|
console.log(`š [OpenAI] Sending ${openaiMessages.length} messages to LLM: ${JSON.stringify(openaiMessages)}`);
|
2296
2296
|
}
|
2297
|
-
await this.streamWithOpenAI(specification, openaiMessages, tools, uiAdapter, (message, calls) => {
|
2297
|
+
await this.streamWithOpenAI(specification, openaiMessages, tools, uiAdapter, (message, calls, usage) => {
|
2298
2298
|
roundMessage = message;
|
2299
2299
|
toolCalls = calls;
|
2300
|
+
if (usage) {
|
2301
|
+
uiAdapter.setUsageData(usage);
|
2302
|
+
}
|
2300
2303
|
}, abortSignal);
|
2301
2304
|
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
2302
2305
|
console.log(`\nš [Streaming] OpenAI native streaming completed (Round ${currentRound})`);
|
@@ -2311,9 +2314,12 @@ class Graphlit {
|
|
2311
2314
|
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING_MESSAGES) {
|
2312
2315
|
console.log(`š [Anthropic] Sending ${anthropicMessages.length} messages to LLM (system: ${system ? "yes" : "no"}): ${JSON.stringify(anthropicMessages)}`);
|
2313
2316
|
}
|
2314
|
-
await this.streamWithAnthropic(specification, anthropicMessages, system, tools, uiAdapter, (message, calls) => {
|
2317
|
+
await this.streamWithAnthropic(specification, anthropicMessages, system, tools, uiAdapter, (message, calls, usage) => {
|
2315
2318
|
roundMessage = message;
|
2316
2319
|
toolCalls = calls;
|
2320
|
+
if (usage) {
|
2321
|
+
uiAdapter.setUsageData(usage);
|
2322
|
+
}
|
2317
2323
|
}, abortSignal);
|
2318
2324
|
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
2319
2325
|
console.log(`\nš [Streaming] Anthropic native streaming completed (Round ${currentRound})`);
|
@@ -2330,9 +2336,12 @@ class Graphlit {
|
|
2330
2336
|
}
|
2331
2337
|
// Google doesn't use system prompts separately, they're incorporated into messages
|
2332
2338
|
await this.streamWithGoogle(specification, googleMessages, undefined, // systemPrompt - Google handles this differently
|
2333
|
-
tools, uiAdapter, (message, calls) => {
|
2339
|
+
tools, uiAdapter, (message, calls, usage) => {
|
2334
2340
|
roundMessage = message;
|
2335
2341
|
toolCalls = calls;
|
2342
|
+
if (usage) {
|
2343
|
+
uiAdapter.setUsageData(usage);
|
2344
|
+
}
|
2336
2345
|
}, abortSignal);
|
2337
2346
|
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
2338
2347
|
console.log(`\nš [Streaming] Google native streaming completed (Round ${currentRound})`);
|
@@ -2347,9 +2356,12 @@ class Graphlit {
|
|
2347
2356
|
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING_MESSAGES) {
|
2348
2357
|
console.log(`š [Groq] Sending ${groqMessages.length} messages to LLM: ${JSON.stringify(groqMessages)}`);
|
2349
2358
|
}
|
2350
|
-
await this.streamWithGroq(specification, groqMessages, tools, uiAdapter, (message, calls) => {
|
2359
|
+
await this.streamWithGroq(specification, groqMessages, tools, uiAdapter, (message, calls, usage) => {
|
2351
2360
|
roundMessage = message;
|
2352
2361
|
toolCalls = calls;
|
2362
|
+
if (usage) {
|
2363
|
+
uiAdapter.setUsageData(usage);
|
2364
|
+
}
|
2353
2365
|
}, abortSignal);
|
2354
2366
|
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
2355
2367
|
console.log(`\nš [Streaming] Groq native streaming completed (Round ${currentRound})`);
|
@@ -2364,9 +2376,12 @@ class Graphlit {
|
|
2364
2376
|
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING_MESSAGES) {
|
2365
2377
|
console.log(`š [Cerebras] Sending ${cerebrasMessages.length} messages to LLM: ${JSON.stringify(cerebrasMessages)}`);
|
2366
2378
|
}
|
2367
|
-
await this.streamWithCerebras(specification, cerebrasMessages, tools, uiAdapter, (message, calls) => {
|
2379
|
+
await this.streamWithCerebras(specification, cerebrasMessages, tools, uiAdapter, (message, calls, usage) => {
|
2368
2380
|
roundMessage = message;
|
2369
2381
|
toolCalls = calls;
|
2382
|
+
if (usage) {
|
2383
|
+
uiAdapter.setUsageData(usage);
|
2384
|
+
}
|
2370
2385
|
}, abortSignal);
|
2371
2386
|
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
2372
2387
|
console.log(`\nš [Streaming] Cerebras native streaming completed (Round ${currentRound})`);
|
@@ -2381,9 +2396,12 @@ class Graphlit {
|
|
2381
2396
|
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING_MESSAGES) {
|
2382
2397
|
console.log(`š [Cohere] Sending ${messages.length} messages to LLM`);
|
2383
2398
|
}
|
2384
|
-
await this.streamWithCohere(specification, messages, tools, uiAdapter, (message, calls) => {
|
2399
|
+
await this.streamWithCohere(specification, messages, tools, uiAdapter, (message, calls, usage) => {
|
2385
2400
|
roundMessage = message;
|
2386
2401
|
toolCalls = calls;
|
2402
|
+
if (usage) {
|
2403
|
+
uiAdapter.setUsageData(usage);
|
2404
|
+
}
|
2387
2405
|
}, abortSignal);
|
2388
2406
|
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
2389
2407
|
console.log(`\nš [Streaming] Cohere native streaming completed (Round ${currentRound})`);
|
@@ -2411,9 +2429,12 @@ class Graphlit {
|
|
2411
2429
|
console.log(`š [Mistral] IMPORTANT: We have tool responses, should we still pass tools?`);
|
2412
2430
|
}
|
2413
2431
|
}
|
2414
|
-
await this.streamWithMistral(specification, mistralMessages, tools, uiAdapter, (message, calls) => {
|
2432
|
+
await this.streamWithMistral(specification, mistralMessages, tools, uiAdapter, (message, calls, usage) => {
|
2415
2433
|
roundMessage = message;
|
2416
2434
|
toolCalls = calls;
|
2435
|
+
if (usage) {
|
2436
|
+
uiAdapter.setUsageData(usage);
|
2437
|
+
}
|
2417
2438
|
}, abortSignal);
|
2418
2439
|
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
2419
2440
|
console.log(`\nš [Streaming] Mistral native streaming completed (Round ${currentRound})`);
|
@@ -2428,9 +2449,12 @@ class Graphlit {
|
|
2428
2449
|
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING_MESSAGES) {
|
2429
2450
|
console.log(`š [Bedrock] Sending ${bedrockMessages.length} messages to LLM (system: ${system ? "yes" : "no"}): ${JSON.stringify(bedrockMessages)}`);
|
2430
2451
|
}
|
2431
|
-
await this.streamWithBedrock(specification, bedrockMessages, system, tools, uiAdapter, (message, calls) => {
|
2452
|
+
await this.streamWithBedrock(specification, bedrockMessages, system, tools, uiAdapter, (message, calls, usage) => {
|
2432
2453
|
roundMessage = message;
|
2433
2454
|
toolCalls = calls;
|
2455
|
+
if (usage) {
|
2456
|
+
uiAdapter.setUsageData(usage);
|
2457
|
+
}
|
2434
2458
|
}, abortSignal);
|
2435
2459
|
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
2436
2460
|
console.log(`\nš [Streaming] Bedrock native streaming completed (Round ${currentRound})`);
|
@@ -2445,9 +2469,12 @@ class Graphlit {
|
|
2445
2469
|
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING_MESSAGES) {
|
2446
2470
|
console.log(`š [Deepseek] Sending ${deepseekMessages.length} messages to LLM: ${JSON.stringify(deepseekMessages)}`);
|
2447
2471
|
}
|
2448
|
-
await this.streamWithDeepseek(specification, deepseekMessages, tools, uiAdapter, (message, calls) => {
|
2472
|
+
await this.streamWithDeepseek(specification, deepseekMessages, tools, uiAdapter, (message, calls, usage) => {
|
2449
2473
|
roundMessage = message;
|
2450
2474
|
toolCalls = calls;
|
2475
|
+
if (usage) {
|
2476
|
+
uiAdapter.setUsageData(usage);
|
2477
|
+
}
|
2451
2478
|
}, abortSignal);
|
2452
2479
|
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
2453
2480
|
console.log(`\nš [Streaming] Deepseek native streaming completed (Round ${currentRound})`);
|
package/dist/model-mapping.js
CHANGED
@@ -80,6 +80,8 @@ const GOOGLE_MODEL_MAP = {
|
|
80
80
|
// Gemini 2.5 models
|
81
81
|
[Types.GoogleModels.Gemini_2_5FlashPreview]: "gemini-2.5-flash-preview-05-20",
|
82
82
|
[Types.GoogleModels.Gemini_2_5ProPreview]: "gemini-2.5-pro-preview-06-05",
|
83
|
+
[Types.GoogleModels.Gemini_2_5Flash]: "gemini-2.5-flash",
|
84
|
+
[Types.GoogleModels.Gemini_2_5Pro]: "gemini-2.5-pro",
|
83
85
|
};
|
84
86
|
// Groq model mappings
|
85
87
|
const GROQ_MODEL_MAP = {
|
@@ -5,13 +5,13 @@ import { StreamEvent } from "../types/internal.js";
|
|
5
5
|
* Stream with OpenAI SDK
|
6
6
|
*/
|
7
7
|
export declare function streamWithOpenAI(specification: Specification, messages: OpenAIMessage[], tools: ToolDefinitionInput[] | undefined, openaiClient: any, // OpenAI client instance
|
8
|
-
onEvent: (event: StreamEvent) => void, onComplete: (message: string, toolCalls: ConversationToolCall[]) => void, abortSignal?: AbortSignal): Promise<void>;
|
8
|
+
onEvent: (event: StreamEvent) => void, onComplete: (message: string, toolCalls: ConversationToolCall[], usage?: any) => void, abortSignal?: AbortSignal): Promise<void>;
|
9
9
|
/**
|
10
10
|
* Stream with Anthropic SDK
|
11
11
|
*/
|
12
12
|
type AnthropicClient = import("@anthropic-ai/sdk").default;
|
13
13
|
export declare function streamWithAnthropic(specification: Specification, messages: AnthropicMessage[], systemPrompt: string | undefined, tools: ToolDefinitionInput[] | undefined, anthropicClient: AnthropicClient, // Properly typed Anthropic client
|
14
|
-
onEvent: (event: StreamEvent) => void, onComplete: (message: string, toolCalls: ConversationToolCall[]) => void, abortSignal?: AbortSignal, thinkingConfig?: {
|
14
|
+
onEvent: (event: StreamEvent) => void, onComplete: (message: string, toolCalls: ConversationToolCall[], usage?: any) => void, abortSignal?: AbortSignal, thinkingConfig?: {
|
15
15
|
type: "enabled";
|
16
16
|
budget_tokens: number;
|
17
17
|
}): Promise<void>;
|
@@ -19,35 +19,35 @@ onEvent: (event: StreamEvent) => void, onComplete: (message: string, toolCalls:
|
|
19
19
|
* Stream with Google SDK
|
20
20
|
*/
|
21
21
|
export declare function streamWithGoogle(specification: Specification, messages: GoogleMessage[], systemPrompt: string | undefined, tools: ToolDefinitionInput[] | undefined, googleClient: any, // Google GenerativeAI client instance
|
22
|
-
onEvent: (event: StreamEvent) => void, onComplete: (message: string, toolCalls: ConversationToolCall[]) => void, abortSignal?: AbortSignal): Promise<void>;
|
22
|
+
onEvent: (event: StreamEvent) => void, onComplete: (message: string, toolCalls: ConversationToolCall[], usage?: any) => void, abortSignal?: AbortSignal): Promise<void>;
|
23
23
|
/**
|
24
24
|
* Stream with Groq SDK (OpenAI-compatible)
|
25
25
|
*/
|
26
26
|
export declare function streamWithGroq(specification: Specification, messages: OpenAIMessage[], tools: ToolDefinitionInput[] | undefined, groqClient: any, // Groq client instance (OpenAI-compatible)
|
27
|
-
onEvent: (event: StreamEvent) => void, onComplete: (message: string, toolCalls: ConversationToolCall[]) => void, abortSignal?: AbortSignal): Promise<void>;
|
27
|
+
onEvent: (event: StreamEvent) => void, onComplete: (message: string, toolCalls: ConversationToolCall[], usage?: any) => void, abortSignal?: AbortSignal): Promise<void>;
|
28
28
|
/**
|
29
29
|
* Stream with Cerebras SDK (OpenAI-compatible)
|
30
30
|
*/
|
31
31
|
export declare function streamWithCerebras(specification: Specification, messages: OpenAIMessage[], tools: ToolDefinitionInput[] | undefined, cerebrasClient: any, // OpenAI client instance configured for Cerebras
|
32
|
-
onEvent: (event: StreamEvent) => void, onComplete: (message: string, toolCalls: ConversationToolCall[]) => void, abortSignal?: AbortSignal): Promise<void>;
|
32
|
+
onEvent: (event: StreamEvent) => void, onComplete: (message: string, toolCalls: ConversationToolCall[], usage?: any) => void, abortSignal?: AbortSignal): Promise<void>;
|
33
33
|
/**
|
34
34
|
* Stream with Deepseek SDK (OpenAI-compatible)
|
35
35
|
*/
|
36
36
|
export declare function streamWithDeepseek(specification: Specification, messages: OpenAIMessage[], tools: ToolDefinitionInput[] | undefined, deepseekClient: any, // OpenAI client instance configured for Deepseek
|
37
|
-
onEvent: (event: StreamEvent) => void, onComplete: (message: string, toolCalls: ConversationToolCall[]) => void, abortSignal?: AbortSignal): Promise<void>;
|
37
|
+
onEvent: (event: StreamEvent) => void, onComplete: (message: string, toolCalls: ConversationToolCall[], usage?: any) => void, abortSignal?: AbortSignal): Promise<void>;
|
38
38
|
/**
|
39
39
|
* Stream with Cohere SDK
|
40
40
|
*/
|
41
41
|
export declare function streamWithCohere(specification: Specification, messages: ConversationMessage[], tools: ToolDefinitionInput[] | undefined, cohereClient: any, // CohereClient instance
|
42
|
-
onEvent: (event: StreamEvent) => void, onComplete: (message: string, toolCalls: ConversationToolCall[]) => void, abortSignal?: AbortSignal): Promise<void>;
|
42
|
+
onEvent: (event: StreamEvent) => void, onComplete: (message: string, toolCalls: ConversationToolCall[], usage?: any) => void, abortSignal?: AbortSignal): Promise<void>;
|
43
43
|
/**
|
44
44
|
* Stream with Mistral SDK
|
45
45
|
*/
|
46
46
|
export declare function streamWithMistral(specification: Specification, messages: MistralMessage[], tools: ToolDefinitionInput[] | undefined, mistralClient: any, // Mistral client instance
|
47
|
-
onEvent: (event: StreamEvent) => void, onComplete: (message: string, toolCalls: ConversationToolCall[]) => void, abortSignal?: AbortSignal): Promise<void>;
|
47
|
+
onEvent: (event: StreamEvent) => void, onComplete: (message: string, toolCalls: ConversationToolCall[], usage?: any) => void, abortSignal?: AbortSignal): Promise<void>;
|
48
48
|
/**
|
49
49
|
* Stream with Bedrock SDK (for Claude models)
|
50
50
|
*/
|
51
51
|
export declare function streamWithBedrock(specification: Specification, messages: BedrockMessage[], systemPrompt: string | undefined, tools: ToolDefinitionInput[] | undefined, bedrockClient: any, // BedrockRuntimeClient instance
|
52
|
-
onEvent: (event: StreamEvent) => void, onComplete: (message: string, toolCalls: ConversationToolCall[]) => void, abortSignal?: AbortSignal): Promise<void>;
|
52
|
+
onEvent: (event: StreamEvent) => void, onComplete: (message: string, toolCalls: ConversationToolCall[], usage?: any) => void, abortSignal?: AbortSignal): Promise<void>;
|
53
53
|
export {};
|
@@ -79,6 +79,7 @@ export async function streamWithOpenAI(specification, messages, tools, openaiCli
|
|
79
79
|
onEvent, onComplete, abortSignal) {
|
80
80
|
let fullMessage = "";
|
81
81
|
let toolCalls = [];
|
82
|
+
let usageData = null;
|
82
83
|
// Performance metrics
|
83
84
|
const startTime = Date.now();
|
84
85
|
let firstTokenTime = 0;
|
@@ -110,6 +111,7 @@ onEvent, onComplete, abortSignal) {
|
|
110
111
|
model: modelName,
|
111
112
|
messages,
|
112
113
|
stream: true,
|
114
|
+
stream_options: { include_usage: true },
|
113
115
|
temperature: specification.openAI?.temperature,
|
114
116
|
//top_p: specification.openAI?.probability,
|
115
117
|
};
|
@@ -137,6 +139,13 @@ onEvent, onComplete, abortSignal) {
|
|
137
139
|
});
|
138
140
|
for await (const chunk of stream) {
|
139
141
|
const delta = chunk.choices[0]?.delta;
|
142
|
+
// Capture usage data from final chunk
|
143
|
+
if (chunk.usage || chunk.x_groq?.usage) {
|
144
|
+
usageData = chunk.usage || chunk.x_groq?.usage;
|
145
|
+
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
146
|
+
console.log(`[OpenAI] Usage data captured:`, usageData);
|
147
|
+
}
|
148
|
+
}
|
140
149
|
// Debug log chunk details
|
141
150
|
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
142
151
|
console.log(`[OpenAI] Chunk:`, JSON.stringify(chunk, null, 2));
|
@@ -351,7 +360,8 @@ onEvent, onComplete, abortSignal) {
|
|
351
360
|
}
|
352
361
|
console.log(`ā
[OpenAI] Final message (${fullMessage.length} chars): "${fullMessage}"`);
|
353
362
|
}
|
354
|
-
|
363
|
+
// Pass usage data if available
|
364
|
+
onComplete(fullMessage, toolCalls, usageData);
|
355
365
|
}
|
356
366
|
catch (error) {
|
357
367
|
// Handle OpenAI-specific errors
|
@@ -386,6 +396,7 @@ export async function streamWithAnthropic(specification, messages, systemPrompt,
|
|
386
396
|
onEvent, onComplete, abortSignal, thinkingConfig) {
|
387
397
|
let fullMessage = "";
|
388
398
|
let toolCalls = [];
|
399
|
+
let usageData = null;
|
389
400
|
// Performance metrics
|
390
401
|
const startTime = Date.now();
|
391
402
|
let firstTokenTime = 0;
|
@@ -477,6 +488,33 @@ onEvent, onComplete, abortSignal, thinkingConfig) {
|
|
477
488
|
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
478
489
|
console.log(`[Anthropic] Received chunk type: ${chunk.type}`);
|
479
490
|
}
|
491
|
+
// Capture usage data from various message events
|
492
|
+
// Prioritize message_start.message usage data as it's more complete
|
493
|
+
if (chunk.type === "message_start" && chunk.message?.usage) {
|
494
|
+
usageData = chunk.message.usage;
|
495
|
+
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
496
|
+
console.log(`[Anthropic] Usage data captured from message_start.message:`, usageData);
|
497
|
+
}
|
498
|
+
}
|
499
|
+
else if (chunk.type === "message_delta" && chunk.usage && !usageData?.input_tokens) {
|
500
|
+
// Only use message_delta if we don't have input_tokens yet
|
501
|
+
usageData = chunk.usage;
|
502
|
+
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
503
|
+
console.log(`[Anthropic] Usage data captured from ${chunk.type}:`, usageData);
|
504
|
+
}
|
505
|
+
}
|
506
|
+
else if ((chunk.type === "message_delta" || chunk.type === "message_start") && chunk.usage) {
|
507
|
+
// Merge usage data if we have partial data
|
508
|
+
if (usageData) {
|
509
|
+
usageData = { ...usageData, ...chunk.usage };
|
510
|
+
}
|
511
|
+
else {
|
512
|
+
usageData = chunk.usage;
|
513
|
+
}
|
514
|
+
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
515
|
+
console.log(`[Anthropic] Usage data merged from ${chunk.type}:`, usageData);
|
516
|
+
}
|
517
|
+
}
|
480
518
|
if (chunk.type === "content_block_start") {
|
481
519
|
activeContentBlock = true;
|
482
520
|
currentContentBlockIndex = chunk.index;
|
@@ -799,7 +837,7 @@ onEvent, onComplete, abortSignal, thinkingConfig) {
|
|
799
837
|
console.log(`š§ [Anthropic] Including thinking content (${completeThinkingContent.length} chars) and signature (${completeThinkingSignature.length} chars) in conversation history`);
|
800
838
|
}
|
801
839
|
}
|
802
|
-
onComplete(finalMessage, validToolCalls);
|
840
|
+
onComplete(finalMessage, validToolCalls, usageData);
|
803
841
|
}
|
804
842
|
catch (error) {
|
805
843
|
// Handle Anthropic-specific errors
|
@@ -837,6 +875,7 @@ export async function streamWithGoogle(specification, messages, systemPrompt, to
|
|
837
875
|
onEvent, onComplete, abortSignal) {
|
838
876
|
let fullMessage = "";
|
839
877
|
let toolCalls = [];
|
878
|
+
let usageData = null;
|
840
879
|
// Performance metrics
|
841
880
|
const startTime = Date.now();
|
842
881
|
let firstTokenTime = 0;
|
@@ -1176,7 +1215,24 @@ onEvent, onComplete, abortSignal) {
|
|
1176
1215
|
}
|
1177
1216
|
console.log(`ā
[Google] Final message (${fullMessage.length} chars): "${fullMessage}"`);
|
1178
1217
|
}
|
1179
|
-
|
1218
|
+
// Try to capture usage data from final response
|
1219
|
+
try {
|
1220
|
+
const response = await result.response;
|
1221
|
+
if (response.usageMetadata) {
|
1222
|
+
usageData = {
|
1223
|
+
prompt_tokens: response.usageMetadata.promptTokenCount,
|
1224
|
+
completion_tokens: response.usageMetadata.candidatesTokenCount,
|
1225
|
+
total_tokens: response.usageMetadata.totalTokenCount,
|
1226
|
+
};
|
1227
|
+
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
1228
|
+
console.log(`[Google] Usage data captured:`, usageData);
|
1229
|
+
}
|
1230
|
+
}
|
1231
|
+
}
|
1232
|
+
catch (e) {
|
1233
|
+
// Ignore errors capturing usage data
|
1234
|
+
}
|
1235
|
+
onComplete(fullMessage, toolCalls, usageData);
|
1180
1236
|
}
|
1181
1237
|
catch (error) {
|
1182
1238
|
// Don't emit error event here - let the client handle it to avoid duplicates
|
@@ -1317,6 +1373,7 @@ export async function streamWithDeepseek(specification, messages, tools, deepsee
|
|
1317
1373
|
onEvent, onComplete, abortSignal) {
|
1318
1374
|
let fullMessage = "";
|
1319
1375
|
let toolCalls = [];
|
1376
|
+
let usageData = null;
|
1320
1377
|
// Reasoning detection state
|
1321
1378
|
let reasoningLines = [];
|
1322
1379
|
let currentLine = "";
|
@@ -1405,6 +1462,13 @@ onEvent, onComplete, abortSignal) {
|
|
1405
1462
|
const delta = chunk.choices[0]?.delta;
|
1406
1463
|
if (!delta)
|
1407
1464
|
continue;
|
1465
|
+
// Check for usage data in the chunk (OpenAI-compatible format)
|
1466
|
+
if (chunk.usage) {
|
1467
|
+
usageData = chunk.usage;
|
1468
|
+
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
1469
|
+
console.log(`[Deepseek] Usage data captured:`, usageData);
|
1470
|
+
}
|
1471
|
+
}
|
1408
1472
|
const currentTime = Date.now();
|
1409
1473
|
// Track first token time
|
1410
1474
|
if (firstTokenTime === 0) {
|
@@ -1599,7 +1663,7 @@ onEvent, onComplete, abortSignal) {
|
|
1599
1663
|
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
1600
1664
|
console.log(`ā
[Deepseek] Stream completed: ${fullMessage.length} chars, ${validToolCalls.length} tools`);
|
1601
1665
|
}
|
1602
|
-
onComplete(fullMessage, validToolCalls);
|
1666
|
+
onComplete(fullMessage, validToolCalls, usageData);
|
1603
1667
|
}
|
1604
1668
|
catch (error) {
|
1605
1669
|
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
@@ -1619,6 +1683,7 @@ export async function streamWithCohere(specification, messages, tools, cohereCli
|
|
1619
1683
|
onEvent, onComplete, abortSignal) {
|
1620
1684
|
let fullMessage = "";
|
1621
1685
|
let toolCalls = [];
|
1686
|
+
let usageData = null;
|
1622
1687
|
// Performance metrics
|
1623
1688
|
const startTime = Date.now();
|
1624
1689
|
let firstTokenTime = 0;
|
@@ -1867,10 +1932,17 @@ onEvent, onComplete, abortSignal) {
|
|
1867
1932
|
}
|
1868
1933
|
}
|
1869
1934
|
else if (chunk.type === "message-end") {
|
1870
|
-
// Handle message end event
|
1935
|
+
// Handle message end event and capture usage data
|
1871
1936
|
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
1872
1937
|
console.log(`[Cohere] Message end event received`, chunk);
|
1873
1938
|
}
|
1939
|
+
// Capture usage data from message-end event
|
1940
|
+
if (chunk.delta?.usage || chunk.usage) {
|
1941
|
+
usageData = chunk.delta?.usage || chunk.usage;
|
1942
|
+
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
1943
|
+
console.log(`[Cohere] Usage data captured:`, usageData);
|
1944
|
+
}
|
1945
|
+
}
|
1874
1946
|
}
|
1875
1947
|
}
|
1876
1948
|
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
@@ -1881,7 +1953,7 @@ onEvent, onComplete, abortSignal) {
|
|
1881
1953
|
type: "complete",
|
1882
1954
|
tokens: tokenCount,
|
1883
1955
|
});
|
1884
|
-
onComplete(fullMessage, toolCalls);
|
1956
|
+
onComplete(fullMessage, toolCalls, usageData);
|
1885
1957
|
}
|
1886
1958
|
catch (error) {
|
1887
1959
|
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
@@ -1906,6 +1978,7 @@ export async function streamWithMistral(specification, messages, tools, mistralC
|
|
1906
1978
|
onEvent, onComplete, abortSignal) {
|
1907
1979
|
let fullMessage = "";
|
1908
1980
|
let toolCalls = [];
|
1981
|
+
let usageData = null;
|
1909
1982
|
// Performance metrics
|
1910
1983
|
const startTime = Date.now();
|
1911
1984
|
let firstTokenTime = 0;
|
@@ -2042,6 +2115,13 @@ onEvent, onComplete, abortSignal) {
|
|
2042
2115
|
console.log(`[Mistral] Raw chunk:`, JSON.stringify(chunk, null, 2));
|
2043
2116
|
}
|
2044
2117
|
const delta = chunk.data.choices[0]?.delta;
|
2118
|
+
// Check for usage data in the chunk
|
2119
|
+
if (chunk.data.usage) {
|
2120
|
+
usageData = chunk.data.usage;
|
2121
|
+
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
2122
|
+
console.log(`[Mistral] Usage data captured:`, usageData);
|
2123
|
+
}
|
2124
|
+
}
|
2045
2125
|
if (delta?.content) {
|
2046
2126
|
fullMessage += delta.content;
|
2047
2127
|
tokenCount++;
|
@@ -2122,7 +2202,9 @@ onEvent, onComplete, abortSignal) {
|
|
2122
2202
|
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
2123
2203
|
console.log(`ā
[Mistral] Complete. Chunks: ${chunkCount} | Tokens: ${tokenCount} | Message length: ${fullMessage.length} | Tool calls: ${toolCalls.length}`);
|
2124
2204
|
}
|
2125
|
-
|
2205
|
+
// Check if we captured usage data during streaming
|
2206
|
+
// Note: Mistral SDK may provide usage data differently than other providers
|
2207
|
+
onComplete(fullMessage, toolCalls, usageData);
|
2126
2208
|
}
|
2127
2209
|
catch (error) {
|
2128
2210
|
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
@@ -2150,6 +2232,7 @@ export async function streamWithBedrock(specification, messages, systemPrompt, t
|
|
2150
2232
|
onEvent, onComplete, abortSignal) {
|
2151
2233
|
let fullMessage = "";
|
2152
2234
|
let toolCalls = [];
|
2235
|
+
let usageData = null;
|
2153
2236
|
// Map contentBlockIndex to tool calls for proper correlation
|
2154
2237
|
const toolCallsByIndex = new Map();
|
2155
2238
|
// Performance metrics
|
@@ -2393,6 +2476,17 @@ onEvent, onComplete, abortSignal) {
|
|
2393
2476
|
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
2394
2477
|
console.log(`š [Bedrock] Metadata:`, event.metadata);
|
2395
2478
|
}
|
2479
|
+
// Capture usage data from metadata
|
2480
|
+
if (event.metadata.usage) {
|
2481
|
+
usageData = {
|
2482
|
+
prompt_tokens: event.metadata.usage.inputTokens,
|
2483
|
+
completion_tokens: event.metadata.usage.outputTokens,
|
2484
|
+
total_tokens: event.metadata.usage.totalTokens,
|
2485
|
+
};
|
2486
|
+
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
2487
|
+
console.log(`[Bedrock] Usage data captured:`, usageData);
|
2488
|
+
}
|
2489
|
+
}
|
2396
2490
|
}
|
2397
2491
|
}
|
2398
2492
|
}
|
@@ -2403,7 +2497,7 @@ onEvent, onComplete, abortSignal) {
|
|
2403
2497
|
type: "complete",
|
2404
2498
|
tokens: tokenCount,
|
2405
2499
|
});
|
2406
|
-
onComplete(fullMessage, toolCalls);
|
2500
|
+
onComplete(fullMessage, toolCalls, usageData);
|
2407
2501
|
}
|
2408
2502
|
catch (error) {
|
2409
2503
|
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
@@ -30,6 +30,7 @@ export declare class UIEventAdapter {
|
|
30
30
|
private reasoningFormat?;
|
31
31
|
private reasoningSignature?;
|
32
32
|
private isInReasoning;
|
33
|
+
private usageData?;
|
33
34
|
constructor(onEvent: (event: AgentStreamEvent) => void, conversationId: string, options?: {
|
34
35
|
smoothingEnabled?: boolean;
|
35
36
|
chunkingStrategy?: ChunkingStrategy;
|
@@ -75,4 +76,8 @@ export declare class UIEventAdapter {
|
|
75
76
|
* Get the throughput in tokens per second
|
76
77
|
*/
|
77
78
|
getThroughput(): number | undefined;
|
79
|
+
/**
|
80
|
+
* Set usage data from native provider
|
81
|
+
*/
|
82
|
+
setUsageData(usage: any): void;
|
78
83
|
}
|
@@ -29,6 +29,7 @@ export class UIEventAdapter {
|
|
29
29
|
reasoningFormat;
|
30
30
|
reasoningSignature;
|
31
31
|
isInReasoning = false;
|
32
|
+
usageData;
|
32
33
|
constructor(onEvent, conversationId, options = {}) {
|
33
34
|
this.onEvent = onEvent;
|
34
35
|
this.conversationId = conversationId;
|
@@ -352,6 +353,17 @@ export class UIEventAdapter {
|
|
352
353
|
if (this.contextWindowUsage) {
|
353
354
|
event.contextWindow = this.contextWindowUsage;
|
354
355
|
}
|
356
|
+
// Add native provider usage data if available
|
357
|
+
if (this.usageData) {
|
358
|
+
event.usage = {
|
359
|
+
promptTokens: this.usageData.prompt_tokens || this.usageData.promptTokens || this.usageData.input_tokens || 0,
|
360
|
+
completionTokens: this.usageData.completion_tokens || this.usageData.completionTokens || this.usageData.output_tokens || 0,
|
361
|
+
totalTokens: this.usageData.total_tokens || this.usageData.totalTokens ||
|
362
|
+
((this.usageData.input_tokens || 0) + (this.usageData.output_tokens || 0)) || 0,
|
363
|
+
model: this.model,
|
364
|
+
provider: this.modelService,
|
365
|
+
};
|
366
|
+
}
|
355
367
|
this.emitUIEvent(event);
|
356
368
|
}
|
357
369
|
handleError(error) {
|
@@ -575,4 +587,13 @@ export class UIEventAdapter {
|
|
575
587
|
getThroughput() {
|
576
588
|
return this.finalMetrics?.streamingThroughput;
|
577
589
|
}
|
590
|
+
/**
|
591
|
+
* Set usage data from native provider
|
592
|
+
*/
|
593
|
+
setUsageData(usage) {
|
594
|
+
this.usageData = usage;
|
595
|
+
if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
|
596
|
+
console.log(`š [UIEventAdapter] Usage data set:`, usage);
|
597
|
+
}
|
598
|
+
}
|
578
599
|
}
|
package/dist/types/internal.d.ts
CHANGED
@@ -46,6 +46,13 @@ export type StreamEvent = {
|
|
46
46
|
messageId?: string;
|
47
47
|
conversationId?: string;
|
48
48
|
tokens?: number;
|
49
|
+
usage?: {
|
50
|
+
promptTokens: number;
|
51
|
+
completionTokens: number;
|
52
|
+
totalTokens: number;
|
53
|
+
model?: string;
|
54
|
+
provider?: string;
|
55
|
+
};
|
49
56
|
} | {
|
50
57
|
type: "error";
|
51
58
|
error: string;
|
@@ -0,0 +1,30 @@
|
|
1
|
+
/**
|
2
|
+
* Token usage information from streaming providers
|
3
|
+
*/
|
4
|
+
export interface TokenUsage {
|
5
|
+
/** Number of tokens in the prompt/input */
|
6
|
+
promptTokens: number;
|
7
|
+
/** Number of tokens in the completion/output */
|
8
|
+
completionTokens: number;
|
9
|
+
/** Total tokens (prompt + completion) */
|
10
|
+
totalTokens: number;
|
11
|
+
/** Provider-specific model identifier */
|
12
|
+
model?: string;
|
13
|
+
/** Provider name (OpenAI, Anthropic, etc.) */
|
14
|
+
provider?: string;
|
15
|
+
/** Additional provider-specific usage data */
|
16
|
+
metadata?: Record<string, any>;
|
17
|
+
}
|
18
|
+
/**
|
19
|
+
* Extended token usage with timing information
|
20
|
+
*/
|
21
|
+
export interface ExtendedTokenUsage extends TokenUsage {
|
22
|
+
/** Time to generate the completion (ms) */
|
23
|
+
completionTime?: number;
|
24
|
+
/** Time to process the prompt (ms) */
|
25
|
+
promptTime?: number;
|
26
|
+
/** Queue time before processing (ms) */
|
27
|
+
queueTime?: number;
|
28
|
+
/** Tokens per second throughput */
|
29
|
+
tokensPerSecond?: number;
|
30
|
+
}
|
@@ -0,0 +1 @@
|
|
1
|
+
export {};
|
@@ -71,6 +71,13 @@ export type AgentStreamEvent = {
|
|
71
71
|
percentage: number;
|
72
72
|
remainingTokens: number;
|
73
73
|
};
|
74
|
+
usage?: {
|
75
|
+
promptTokens: number;
|
76
|
+
completionTokens: number;
|
77
|
+
totalTokens: number;
|
78
|
+
model?: string;
|
79
|
+
provider?: string;
|
80
|
+
};
|
74
81
|
} | {
|
75
82
|
type: "error";
|
76
83
|
error: {
|