graphlit-client 1.0.20250704001 → 1.0.20250705001

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/client.js CHANGED
@@ -2294,9 +2294,12 @@ class Graphlit {
2294
2294
  if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING_MESSAGES) {
2295
2295
  console.log(`šŸ” [OpenAI] Sending ${openaiMessages.length} messages to LLM: ${JSON.stringify(openaiMessages)}`);
2296
2296
  }
2297
- await this.streamWithOpenAI(specification, openaiMessages, tools, uiAdapter, (message, calls) => {
2297
+ await this.streamWithOpenAI(specification, openaiMessages, tools, uiAdapter, (message, calls, usage) => {
2298
2298
  roundMessage = message;
2299
2299
  toolCalls = calls;
2300
+ if (usage) {
2301
+ uiAdapter.setUsageData(usage);
2302
+ }
2300
2303
  }, abortSignal);
2301
2304
  if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
2302
2305
  console.log(`\nšŸ [Streaming] OpenAI native streaming completed (Round ${currentRound})`);
@@ -2311,9 +2314,12 @@ class Graphlit {
2311
2314
  if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING_MESSAGES) {
2312
2315
  console.log(`šŸ” [Anthropic] Sending ${anthropicMessages.length} messages to LLM (system: ${system ? "yes" : "no"}): ${JSON.stringify(anthropicMessages)}`);
2313
2316
  }
2314
- await this.streamWithAnthropic(specification, anthropicMessages, system, tools, uiAdapter, (message, calls) => {
2317
+ await this.streamWithAnthropic(specification, anthropicMessages, system, tools, uiAdapter, (message, calls, usage) => {
2315
2318
  roundMessage = message;
2316
2319
  toolCalls = calls;
2320
+ if (usage) {
2321
+ uiAdapter.setUsageData(usage);
2322
+ }
2317
2323
  }, abortSignal);
2318
2324
  if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
2319
2325
  console.log(`\nšŸ [Streaming] Anthropic native streaming completed (Round ${currentRound})`);
@@ -2330,9 +2336,12 @@ class Graphlit {
2330
2336
  }
2331
2337
  // Google doesn't use system prompts separately, they're incorporated into messages
2332
2338
  await this.streamWithGoogle(specification, googleMessages, undefined, // systemPrompt - Google handles this differently
2333
- tools, uiAdapter, (message, calls) => {
2339
+ tools, uiAdapter, (message, calls, usage) => {
2334
2340
  roundMessage = message;
2335
2341
  toolCalls = calls;
2342
+ if (usage) {
2343
+ uiAdapter.setUsageData(usage);
2344
+ }
2336
2345
  }, abortSignal);
2337
2346
  if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
2338
2347
  console.log(`\nšŸ [Streaming] Google native streaming completed (Round ${currentRound})`);
@@ -2347,9 +2356,12 @@ class Graphlit {
2347
2356
  if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING_MESSAGES) {
2348
2357
  console.log(`šŸ” [Groq] Sending ${groqMessages.length} messages to LLM: ${JSON.stringify(groqMessages)}`);
2349
2358
  }
2350
- await this.streamWithGroq(specification, groqMessages, tools, uiAdapter, (message, calls) => {
2359
+ await this.streamWithGroq(specification, groqMessages, tools, uiAdapter, (message, calls, usage) => {
2351
2360
  roundMessage = message;
2352
2361
  toolCalls = calls;
2362
+ if (usage) {
2363
+ uiAdapter.setUsageData(usage);
2364
+ }
2353
2365
  }, abortSignal);
2354
2366
  if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
2355
2367
  console.log(`\nšŸ [Streaming] Groq native streaming completed (Round ${currentRound})`);
@@ -2364,9 +2376,12 @@ class Graphlit {
2364
2376
  if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING_MESSAGES) {
2365
2377
  console.log(`šŸ” [Cerebras] Sending ${cerebrasMessages.length} messages to LLM: ${JSON.stringify(cerebrasMessages)}`);
2366
2378
  }
2367
- await this.streamWithCerebras(specification, cerebrasMessages, tools, uiAdapter, (message, calls) => {
2379
+ await this.streamWithCerebras(specification, cerebrasMessages, tools, uiAdapter, (message, calls, usage) => {
2368
2380
  roundMessage = message;
2369
2381
  toolCalls = calls;
2382
+ if (usage) {
2383
+ uiAdapter.setUsageData(usage);
2384
+ }
2370
2385
  }, abortSignal);
2371
2386
  if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
2372
2387
  console.log(`\nšŸ [Streaming] Cerebras native streaming completed (Round ${currentRound})`);
@@ -2381,9 +2396,12 @@ class Graphlit {
2381
2396
  if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING_MESSAGES) {
2382
2397
  console.log(`šŸ” [Cohere] Sending ${messages.length} messages to LLM`);
2383
2398
  }
2384
- await this.streamWithCohere(specification, messages, tools, uiAdapter, (message, calls) => {
2399
+ await this.streamWithCohere(specification, messages, tools, uiAdapter, (message, calls, usage) => {
2385
2400
  roundMessage = message;
2386
2401
  toolCalls = calls;
2402
+ if (usage) {
2403
+ uiAdapter.setUsageData(usage);
2404
+ }
2387
2405
  }, abortSignal);
2388
2406
  if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
2389
2407
  console.log(`\nšŸ [Streaming] Cohere native streaming completed (Round ${currentRound})`);
@@ -2411,9 +2429,12 @@ class Graphlit {
2411
2429
  console.log(`šŸ” [Mistral] IMPORTANT: We have tool responses, should we still pass tools?`);
2412
2430
  }
2413
2431
  }
2414
- await this.streamWithMistral(specification, mistralMessages, tools, uiAdapter, (message, calls) => {
2432
+ await this.streamWithMistral(specification, mistralMessages, tools, uiAdapter, (message, calls, usage) => {
2415
2433
  roundMessage = message;
2416
2434
  toolCalls = calls;
2435
+ if (usage) {
2436
+ uiAdapter.setUsageData(usage);
2437
+ }
2417
2438
  }, abortSignal);
2418
2439
  if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
2419
2440
  console.log(`\nšŸ [Streaming] Mistral native streaming completed (Round ${currentRound})`);
@@ -2428,9 +2449,12 @@ class Graphlit {
2428
2449
  if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING_MESSAGES) {
2429
2450
  console.log(`šŸ” [Bedrock] Sending ${bedrockMessages.length} messages to LLM (system: ${system ? "yes" : "no"}): ${JSON.stringify(bedrockMessages)}`);
2430
2451
  }
2431
- await this.streamWithBedrock(specification, bedrockMessages, system, tools, uiAdapter, (message, calls) => {
2452
+ await this.streamWithBedrock(specification, bedrockMessages, system, tools, uiAdapter, (message, calls, usage) => {
2432
2453
  roundMessage = message;
2433
2454
  toolCalls = calls;
2455
+ if (usage) {
2456
+ uiAdapter.setUsageData(usage);
2457
+ }
2434
2458
  }, abortSignal);
2435
2459
  if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
2436
2460
  console.log(`\nšŸ [Streaming] Bedrock native streaming completed (Round ${currentRound})`);
@@ -2445,9 +2469,12 @@ class Graphlit {
2445
2469
  if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING_MESSAGES) {
2446
2470
  console.log(`šŸ” [Deepseek] Sending ${deepseekMessages.length} messages to LLM: ${JSON.stringify(deepseekMessages)}`);
2447
2471
  }
2448
- await this.streamWithDeepseek(specification, deepseekMessages, tools, uiAdapter, (message, calls) => {
2472
+ await this.streamWithDeepseek(specification, deepseekMessages, tools, uiAdapter, (message, calls, usage) => {
2449
2473
  roundMessage = message;
2450
2474
  toolCalls = calls;
2475
+ if (usage) {
2476
+ uiAdapter.setUsageData(usage);
2477
+ }
2451
2478
  }, abortSignal);
2452
2479
  if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
2453
2480
  console.log(`\nšŸ [Streaming] Deepseek native streaming completed (Round ${currentRound})`);
@@ -5,13 +5,13 @@ import { StreamEvent } from "../types/internal.js";
5
5
  * Stream with OpenAI SDK
6
6
  */
7
7
  export declare function streamWithOpenAI(specification: Specification, messages: OpenAIMessage[], tools: ToolDefinitionInput[] | undefined, openaiClient: any, // OpenAI client instance
8
- onEvent: (event: StreamEvent) => void, onComplete: (message: string, toolCalls: ConversationToolCall[]) => void, abortSignal?: AbortSignal): Promise<void>;
8
+ onEvent: (event: StreamEvent) => void, onComplete: (message: string, toolCalls: ConversationToolCall[], usage?: any) => void, abortSignal?: AbortSignal): Promise<void>;
9
9
  /**
10
10
  * Stream with Anthropic SDK
11
11
  */
12
12
  type AnthropicClient = import("@anthropic-ai/sdk").default;
13
13
  export declare function streamWithAnthropic(specification: Specification, messages: AnthropicMessage[], systemPrompt: string | undefined, tools: ToolDefinitionInput[] | undefined, anthropicClient: AnthropicClient, // Properly typed Anthropic client
14
- onEvent: (event: StreamEvent) => void, onComplete: (message: string, toolCalls: ConversationToolCall[]) => void, abortSignal?: AbortSignal, thinkingConfig?: {
14
+ onEvent: (event: StreamEvent) => void, onComplete: (message: string, toolCalls: ConversationToolCall[], usage?: any) => void, abortSignal?: AbortSignal, thinkingConfig?: {
15
15
  type: "enabled";
16
16
  budget_tokens: number;
17
17
  }): Promise<void>;
@@ -19,35 +19,35 @@ onEvent: (event: StreamEvent) => void, onComplete: (message: string, toolCalls:
19
19
  * Stream with Google SDK
20
20
  */
21
21
  export declare function streamWithGoogle(specification: Specification, messages: GoogleMessage[], systemPrompt: string | undefined, tools: ToolDefinitionInput[] | undefined, googleClient: any, // Google GenerativeAI client instance
22
- onEvent: (event: StreamEvent) => void, onComplete: (message: string, toolCalls: ConversationToolCall[]) => void, abortSignal?: AbortSignal): Promise<void>;
22
+ onEvent: (event: StreamEvent) => void, onComplete: (message: string, toolCalls: ConversationToolCall[], usage?: any) => void, abortSignal?: AbortSignal): Promise<void>;
23
23
  /**
24
24
  * Stream with Groq SDK (OpenAI-compatible)
25
25
  */
26
26
  export declare function streamWithGroq(specification: Specification, messages: OpenAIMessage[], tools: ToolDefinitionInput[] | undefined, groqClient: any, // Groq client instance (OpenAI-compatible)
27
- onEvent: (event: StreamEvent) => void, onComplete: (message: string, toolCalls: ConversationToolCall[]) => void, abortSignal?: AbortSignal): Promise<void>;
27
+ onEvent: (event: StreamEvent) => void, onComplete: (message: string, toolCalls: ConversationToolCall[], usage?: any) => void, abortSignal?: AbortSignal): Promise<void>;
28
28
  /**
29
29
  * Stream with Cerebras SDK (OpenAI-compatible)
30
30
  */
31
31
  export declare function streamWithCerebras(specification: Specification, messages: OpenAIMessage[], tools: ToolDefinitionInput[] | undefined, cerebrasClient: any, // OpenAI client instance configured for Cerebras
32
- onEvent: (event: StreamEvent) => void, onComplete: (message: string, toolCalls: ConversationToolCall[]) => void, abortSignal?: AbortSignal): Promise<void>;
32
+ onEvent: (event: StreamEvent) => void, onComplete: (message: string, toolCalls: ConversationToolCall[], usage?: any) => void, abortSignal?: AbortSignal): Promise<void>;
33
33
  /**
34
34
  * Stream with Deepseek SDK (OpenAI-compatible)
35
35
  */
36
36
  export declare function streamWithDeepseek(specification: Specification, messages: OpenAIMessage[], tools: ToolDefinitionInput[] | undefined, deepseekClient: any, // OpenAI client instance configured for Deepseek
37
- onEvent: (event: StreamEvent) => void, onComplete: (message: string, toolCalls: ConversationToolCall[]) => void, abortSignal?: AbortSignal): Promise<void>;
37
+ onEvent: (event: StreamEvent) => void, onComplete: (message: string, toolCalls: ConversationToolCall[], usage?: any) => void, abortSignal?: AbortSignal): Promise<void>;
38
38
  /**
39
39
  * Stream with Cohere SDK
40
40
  */
41
41
  export declare function streamWithCohere(specification: Specification, messages: ConversationMessage[], tools: ToolDefinitionInput[] | undefined, cohereClient: any, // CohereClient instance
42
- onEvent: (event: StreamEvent) => void, onComplete: (message: string, toolCalls: ConversationToolCall[]) => void, abortSignal?: AbortSignal): Promise<void>;
42
+ onEvent: (event: StreamEvent) => void, onComplete: (message: string, toolCalls: ConversationToolCall[], usage?: any) => void, abortSignal?: AbortSignal): Promise<void>;
43
43
  /**
44
44
  * Stream with Mistral SDK
45
45
  */
46
46
  export declare function streamWithMistral(specification: Specification, messages: MistralMessage[], tools: ToolDefinitionInput[] | undefined, mistralClient: any, // Mistral client instance
47
- onEvent: (event: StreamEvent) => void, onComplete: (message: string, toolCalls: ConversationToolCall[]) => void, abortSignal?: AbortSignal): Promise<void>;
47
+ onEvent: (event: StreamEvent) => void, onComplete: (message: string, toolCalls: ConversationToolCall[], usage?: any) => void, abortSignal?: AbortSignal): Promise<void>;
48
48
  /**
49
49
  * Stream with Bedrock SDK (for Claude models)
50
50
  */
51
51
  export declare function streamWithBedrock(specification: Specification, messages: BedrockMessage[], systemPrompt: string | undefined, tools: ToolDefinitionInput[] | undefined, bedrockClient: any, // BedrockRuntimeClient instance
52
- onEvent: (event: StreamEvent) => void, onComplete: (message: string, toolCalls: ConversationToolCall[]) => void, abortSignal?: AbortSignal): Promise<void>;
52
+ onEvent: (event: StreamEvent) => void, onComplete: (message: string, toolCalls: ConversationToolCall[], usage?: any) => void, abortSignal?: AbortSignal): Promise<void>;
53
53
  export {};
@@ -79,6 +79,7 @@ export async function streamWithOpenAI(specification, messages, tools, openaiCli
79
79
  onEvent, onComplete, abortSignal) {
80
80
  let fullMessage = "";
81
81
  let toolCalls = [];
82
+ let usageData = null;
82
83
  // Performance metrics
83
84
  const startTime = Date.now();
84
85
  let firstTokenTime = 0;
@@ -110,6 +111,7 @@ onEvent, onComplete, abortSignal) {
110
111
  model: modelName,
111
112
  messages,
112
113
  stream: true,
114
+ stream_options: { include_usage: true },
113
115
  temperature: specification.openAI?.temperature,
114
116
  //top_p: specification.openAI?.probability,
115
117
  };
@@ -137,6 +139,13 @@ onEvent, onComplete, abortSignal) {
137
139
  });
138
140
  for await (const chunk of stream) {
139
141
  const delta = chunk.choices[0]?.delta;
142
+ // Capture usage data from final chunk
143
+ if (chunk.usage || chunk.x_groq?.usage) {
144
+ usageData = chunk.usage || chunk.x_groq?.usage;
145
+ if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
146
+ console.log(`[OpenAI] Usage data captured:`, usageData);
147
+ }
148
+ }
140
149
  // Debug log chunk details
141
150
  if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
142
151
  console.log(`[OpenAI] Chunk:`, JSON.stringify(chunk, null, 2));
@@ -351,7 +360,8 @@ onEvent, onComplete, abortSignal) {
351
360
  }
352
361
  console.log(`āœ… [OpenAI] Final message (${fullMessage.length} chars): "${fullMessage}"`);
353
362
  }
354
- onComplete(fullMessage, toolCalls);
363
+ // Pass usage data if available
364
+ onComplete(fullMessage, toolCalls, usageData);
355
365
  }
356
366
  catch (error) {
357
367
  // Handle OpenAI-specific errors
@@ -386,6 +396,7 @@ export async function streamWithAnthropic(specification, messages, systemPrompt,
386
396
  onEvent, onComplete, abortSignal, thinkingConfig) {
387
397
  let fullMessage = "";
388
398
  let toolCalls = [];
399
+ let usageData = null;
389
400
  // Performance metrics
390
401
  const startTime = Date.now();
391
402
  let firstTokenTime = 0;
@@ -477,6 +488,33 @@ onEvent, onComplete, abortSignal, thinkingConfig) {
477
488
  if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
478
489
  console.log(`[Anthropic] Received chunk type: ${chunk.type}`);
479
490
  }
491
+ // Capture usage data from various message events
492
+ // Prioritize message_start.message usage data as it's more complete
493
+ if (chunk.type === "message_start" && chunk.message?.usage) {
494
+ usageData = chunk.message.usage;
495
+ if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
496
+ console.log(`[Anthropic] Usage data captured from message_start.message:`, usageData);
497
+ }
498
+ }
499
+ else if (chunk.type === "message_delta" && chunk.usage && !usageData?.input_tokens) {
500
+ // Only use message_delta if we don't have input_tokens yet
501
+ usageData = chunk.usage;
502
+ if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
503
+ console.log(`[Anthropic] Usage data captured from ${chunk.type}:`, usageData);
504
+ }
505
+ }
506
+ else if ((chunk.type === "message_delta" || chunk.type === "message_start") && chunk.usage) {
507
+ // Merge usage data if we have partial data
508
+ if (usageData) {
509
+ usageData = { ...usageData, ...chunk.usage };
510
+ }
511
+ else {
512
+ usageData = chunk.usage;
513
+ }
514
+ if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
515
+ console.log(`[Anthropic] Usage data merged from ${chunk.type}:`, usageData);
516
+ }
517
+ }
480
518
  if (chunk.type === "content_block_start") {
481
519
  activeContentBlock = true;
482
520
  currentContentBlockIndex = chunk.index;
@@ -799,7 +837,7 @@ onEvent, onComplete, abortSignal, thinkingConfig) {
799
837
  console.log(`🧠 [Anthropic] Including thinking content (${completeThinkingContent.length} chars) and signature (${completeThinkingSignature.length} chars) in conversation history`);
800
838
  }
801
839
  }
802
- onComplete(finalMessage, validToolCalls);
840
+ onComplete(finalMessage, validToolCalls, usageData);
803
841
  }
804
842
  catch (error) {
805
843
  // Handle Anthropic-specific errors
@@ -837,6 +875,7 @@ export async function streamWithGoogle(specification, messages, systemPrompt, to
837
875
  onEvent, onComplete, abortSignal) {
838
876
  let fullMessage = "";
839
877
  let toolCalls = [];
878
+ let usageData = null;
840
879
  // Performance metrics
841
880
  const startTime = Date.now();
842
881
  let firstTokenTime = 0;
@@ -1176,7 +1215,24 @@ onEvent, onComplete, abortSignal) {
1176
1215
  }
1177
1216
  console.log(`āœ… [Google] Final message (${fullMessage.length} chars): "${fullMessage}"`);
1178
1217
  }
1179
- onComplete(fullMessage, toolCalls);
1218
+ // Try to capture usage data from final response
1219
+ try {
1220
+ const response = await result.response;
1221
+ if (response.usageMetadata) {
1222
+ usageData = {
1223
+ prompt_tokens: response.usageMetadata.promptTokenCount,
1224
+ completion_tokens: response.usageMetadata.candidatesTokenCount,
1225
+ total_tokens: response.usageMetadata.totalTokenCount,
1226
+ };
1227
+ if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
1228
+ console.log(`[Google] Usage data captured:`, usageData);
1229
+ }
1230
+ }
1231
+ }
1232
+ catch (e) {
1233
+ // Ignore errors capturing usage data
1234
+ }
1235
+ onComplete(fullMessage, toolCalls, usageData);
1180
1236
  }
1181
1237
  catch (error) {
1182
1238
  // Don't emit error event here - let the client handle it to avoid duplicates
@@ -1317,6 +1373,7 @@ export async function streamWithDeepseek(specification, messages, tools, deepsee
1317
1373
  onEvent, onComplete, abortSignal) {
1318
1374
  let fullMessage = "";
1319
1375
  let toolCalls = [];
1376
+ let usageData = null;
1320
1377
  // Reasoning detection state
1321
1378
  let reasoningLines = [];
1322
1379
  let currentLine = "";
@@ -1405,6 +1462,13 @@ onEvent, onComplete, abortSignal) {
1405
1462
  const delta = chunk.choices[0]?.delta;
1406
1463
  if (!delta)
1407
1464
  continue;
1465
+ // Check for usage data in the chunk (OpenAI-compatible format)
1466
+ if (chunk.usage) {
1467
+ usageData = chunk.usage;
1468
+ if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
1469
+ console.log(`[Deepseek] Usage data captured:`, usageData);
1470
+ }
1471
+ }
1408
1472
  const currentTime = Date.now();
1409
1473
  // Track first token time
1410
1474
  if (firstTokenTime === 0) {
@@ -1599,7 +1663,7 @@ onEvent, onComplete, abortSignal) {
1599
1663
  if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
1600
1664
  console.log(`āœ… [Deepseek] Stream completed: ${fullMessage.length} chars, ${validToolCalls.length} tools`);
1601
1665
  }
1602
- onComplete(fullMessage, validToolCalls);
1666
+ onComplete(fullMessage, validToolCalls, usageData);
1603
1667
  }
1604
1668
  catch (error) {
1605
1669
  if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
@@ -1619,6 +1683,7 @@ export async function streamWithCohere(specification, messages, tools, cohereCli
1619
1683
  onEvent, onComplete, abortSignal) {
1620
1684
  let fullMessage = "";
1621
1685
  let toolCalls = [];
1686
+ let usageData = null;
1622
1687
  // Performance metrics
1623
1688
  const startTime = Date.now();
1624
1689
  let firstTokenTime = 0;
@@ -1867,10 +1932,17 @@ onEvent, onComplete, abortSignal) {
1867
1932
  }
1868
1933
  }
1869
1934
  else if (chunk.type === "message-end") {
1870
- // Handle message end event
1935
+ // Handle message end event and capture usage data
1871
1936
  if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
1872
1937
  console.log(`[Cohere] Message end event received`, chunk);
1873
1938
  }
1939
+ // Capture usage data from message-end event
1940
+ if (chunk.delta?.usage || chunk.usage) {
1941
+ usageData = chunk.delta?.usage || chunk.usage;
1942
+ if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
1943
+ console.log(`[Cohere] Usage data captured:`, usageData);
1944
+ }
1945
+ }
1874
1946
  }
1875
1947
  }
1876
1948
  if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
@@ -1881,7 +1953,7 @@ onEvent, onComplete, abortSignal) {
1881
1953
  type: "complete",
1882
1954
  tokens: tokenCount,
1883
1955
  });
1884
- onComplete(fullMessage, toolCalls);
1956
+ onComplete(fullMessage, toolCalls, usageData);
1885
1957
  }
1886
1958
  catch (error) {
1887
1959
  if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
@@ -1906,6 +1978,7 @@ export async function streamWithMistral(specification, messages, tools, mistralC
1906
1978
  onEvent, onComplete, abortSignal) {
1907
1979
  let fullMessage = "";
1908
1980
  let toolCalls = [];
1981
+ let usageData = null;
1909
1982
  // Performance metrics
1910
1983
  const startTime = Date.now();
1911
1984
  let firstTokenTime = 0;
@@ -2042,6 +2115,13 @@ onEvent, onComplete, abortSignal) {
2042
2115
  console.log(`[Mistral] Raw chunk:`, JSON.stringify(chunk, null, 2));
2043
2116
  }
2044
2117
  const delta = chunk.data.choices[0]?.delta;
2118
+ // Check for usage data in the chunk
2119
+ if (chunk.data.usage) {
2120
+ usageData = chunk.data.usage;
2121
+ if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
2122
+ console.log(`[Mistral] Usage data captured:`, usageData);
2123
+ }
2124
+ }
2045
2125
  if (delta?.content) {
2046
2126
  fullMessage += delta.content;
2047
2127
  tokenCount++;
@@ -2122,7 +2202,9 @@ onEvent, onComplete, abortSignal) {
2122
2202
  if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
2123
2203
  console.log(`āœ… [Mistral] Complete. Chunks: ${chunkCount} | Tokens: ${tokenCount} | Message length: ${fullMessage.length} | Tool calls: ${toolCalls.length}`);
2124
2204
  }
2125
- onComplete(fullMessage, toolCalls);
2205
+ // Check if we captured usage data during streaming
2206
+ // Note: Mistral SDK may provide usage data differently than other providers
2207
+ onComplete(fullMessage, toolCalls, usageData);
2126
2208
  }
2127
2209
  catch (error) {
2128
2210
  if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
@@ -2150,6 +2232,7 @@ export async function streamWithBedrock(specification, messages, systemPrompt, t
2150
2232
  onEvent, onComplete, abortSignal) {
2151
2233
  let fullMessage = "";
2152
2234
  let toolCalls = [];
2235
+ let usageData = null;
2153
2236
  // Map contentBlockIndex to tool calls for proper correlation
2154
2237
  const toolCallsByIndex = new Map();
2155
2238
  // Performance metrics
@@ -2393,6 +2476,17 @@ onEvent, onComplete, abortSignal) {
2393
2476
  if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
2394
2477
  console.log(`šŸ“Š [Bedrock] Metadata:`, event.metadata);
2395
2478
  }
2479
+ // Capture usage data from metadata
2480
+ if (event.metadata.usage) {
2481
+ usageData = {
2482
+ prompt_tokens: event.metadata.usage.inputTokens,
2483
+ completion_tokens: event.metadata.usage.outputTokens,
2484
+ total_tokens: event.metadata.usage.totalTokens,
2485
+ };
2486
+ if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
2487
+ console.log(`[Bedrock] Usage data captured:`, usageData);
2488
+ }
2489
+ }
2396
2490
  }
2397
2491
  }
2398
2492
  }
@@ -2403,7 +2497,7 @@ onEvent, onComplete, abortSignal) {
2403
2497
  type: "complete",
2404
2498
  tokens: tokenCount,
2405
2499
  });
2406
- onComplete(fullMessage, toolCalls);
2500
+ onComplete(fullMessage, toolCalls, usageData);
2407
2501
  }
2408
2502
  catch (error) {
2409
2503
  if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
@@ -30,6 +30,7 @@ export declare class UIEventAdapter {
30
30
  private reasoningFormat?;
31
31
  private reasoningSignature?;
32
32
  private isInReasoning;
33
+ private usageData?;
33
34
  constructor(onEvent: (event: AgentStreamEvent) => void, conversationId: string, options?: {
34
35
  smoothingEnabled?: boolean;
35
36
  chunkingStrategy?: ChunkingStrategy;
@@ -75,4 +76,8 @@ export declare class UIEventAdapter {
75
76
  * Get the throughput in tokens per second
76
77
  */
77
78
  getThroughput(): number | undefined;
79
+ /**
80
+ * Set usage data from native provider
81
+ */
82
+ setUsageData(usage: any): void;
78
83
  }
@@ -29,6 +29,7 @@ export class UIEventAdapter {
29
29
  reasoningFormat;
30
30
  reasoningSignature;
31
31
  isInReasoning = false;
32
+ usageData;
32
33
  constructor(onEvent, conversationId, options = {}) {
33
34
  this.onEvent = onEvent;
34
35
  this.conversationId = conversationId;
@@ -352,6 +353,17 @@ export class UIEventAdapter {
352
353
  if (this.contextWindowUsage) {
353
354
  event.contextWindow = this.contextWindowUsage;
354
355
  }
356
+ // Add native provider usage data if available
357
+ if (this.usageData) {
358
+ event.usage = {
359
+ promptTokens: this.usageData.prompt_tokens || this.usageData.promptTokens || this.usageData.input_tokens || 0,
360
+ completionTokens: this.usageData.completion_tokens || this.usageData.completionTokens || this.usageData.output_tokens || 0,
361
+ totalTokens: this.usageData.total_tokens || this.usageData.totalTokens ||
362
+ ((this.usageData.input_tokens || 0) + (this.usageData.output_tokens || 0)) || 0,
363
+ model: this.model,
364
+ provider: this.modelService,
365
+ };
366
+ }
355
367
  this.emitUIEvent(event);
356
368
  }
357
369
  handleError(error) {
@@ -575,4 +587,13 @@ export class UIEventAdapter {
575
587
  getThroughput() {
576
588
  return this.finalMetrics?.streamingThroughput;
577
589
  }
590
+ /**
591
+ * Set usage data from native provider
592
+ */
593
+ setUsageData(usage) {
594
+ this.usageData = usage;
595
+ if (process.env.DEBUG_GRAPHLIT_SDK_STREAMING) {
596
+ console.log(`šŸ“Š [UIEventAdapter] Usage data set:`, usage);
597
+ }
598
+ }
578
599
  }
@@ -46,6 +46,13 @@ export type StreamEvent = {
46
46
  messageId?: string;
47
47
  conversationId?: string;
48
48
  tokens?: number;
49
+ usage?: {
50
+ promptTokens: number;
51
+ completionTokens: number;
52
+ totalTokens: number;
53
+ model?: string;
54
+ provider?: string;
55
+ };
49
56
  } | {
50
57
  type: "error";
51
58
  error: string;
@@ -0,0 +1,30 @@
1
+ /**
2
+ * Token usage information from streaming providers
3
+ */
4
+ export interface TokenUsage {
5
+ /** Number of tokens in the prompt/input */
6
+ promptTokens: number;
7
+ /** Number of tokens in the completion/output */
8
+ completionTokens: number;
9
+ /** Total tokens (prompt + completion) */
10
+ totalTokens: number;
11
+ /** Provider-specific model identifier */
12
+ model?: string;
13
+ /** Provider name (OpenAI, Anthropic, etc.) */
14
+ provider?: string;
15
+ /** Additional provider-specific usage data */
16
+ metadata?: Record<string, any>;
17
+ }
18
+ /**
19
+ * Extended token usage with timing information
20
+ */
21
+ export interface ExtendedTokenUsage extends TokenUsage {
22
+ /** Time to generate the completion (ms) */
23
+ completionTime?: number;
24
+ /** Time to process the prompt (ms) */
25
+ promptTime?: number;
26
+ /** Queue time before processing (ms) */
27
+ queueTime?: number;
28
+ /** Tokens per second throughput */
29
+ tokensPerSecond?: number;
30
+ }
@@ -0,0 +1 @@
1
+ export {};
@@ -71,6 +71,13 @@ export type AgentStreamEvent = {
71
71
  percentage: number;
72
72
  remainingTokens: number;
73
73
  };
74
+ usage?: {
75
+ promptTokens: number;
76
+ completionTokens: number;
77
+ totalTokens: number;
78
+ model?: string;
79
+ provider?: string;
80
+ };
74
81
  } | {
75
82
  type: "error";
76
83
  error: {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "graphlit-client",
3
- "version": "1.0.20250704001",
3
+ "version": "1.0.20250705001",
4
4
  "description": "Graphlit API Client for TypeScript",
5
5
  "type": "module",
6
6
  "main": "./dist/client.js",