@inference-gateway/sdk 0.5.1 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,12 @@
2
2
 
3
3
  All notable changes to this project will be documented in this file.
4
4
 
5
+ ## [0.6.0](https://github.com/inference-gateway/typescript-sdk/compare/v0.5.1...v0.6.0) (2025-04-28)
6
+
7
+ ### ✨ Features
8
+
9
+ * Add usage metrics handling to streaming chat completions and update tests ([#10](https://github.com/inference-gateway/typescript-sdk/issues/10)) ([576ff71](https://github.com/inference-gateway/typescript-sdk/commit/576ff711140c9e357bea4ba572e92027297c428b))
10
+
5
11
  ## [0.5.1](https://github.com/inference-gateway/typescript-sdk/compare/v0.5.0...v0.5.1) (2025-04-27)
6
12
 
7
13
  ### 🐛 Bug Fixes
package/README.md CHANGED
@@ -127,6 +127,7 @@ try {
127
127
  onOpen: () => console.log('Stream opened'),
128
128
  onContent: (content) => process.stdout.write(content),
129
129
  onChunk: (chunk) => console.log('Received chunk:', chunk.id),
130
+ onUsageMetrics: (metrics) => console.log('Usage metrics:', metrics),
130
131
  onFinish: () => console.log('\nStream completed'),
131
132
  onError: (error) => console.error('Stream error:', error),
132
133
  },
@@ -1,10 +1,11 @@
1
- import type { Provider, SchemaChatCompletionMessageToolCall, SchemaCreateChatCompletionRequest, SchemaCreateChatCompletionResponse, SchemaCreateChatCompletionStreamResponse, SchemaError, SchemaListModelsResponse } from './types/generated';
1
+ import type { Provider, SchemaChatCompletionMessageToolCall, SchemaCompletionUsage, SchemaCreateChatCompletionRequest, SchemaCreateChatCompletionResponse, SchemaCreateChatCompletionStreamResponse, SchemaError, SchemaListModelsResponse } from './types/generated';
2
2
  interface ChatCompletionStreamCallbacks {
3
3
  onOpen?: () => void;
4
4
  onChunk?: (chunk: SchemaCreateChatCompletionStreamResponse) => void;
5
5
  onReasoning?: (reasoningContent: string) => void;
6
6
  onContent?: (content: string) => void;
7
7
  onTool?: (toolCall: SchemaChatCompletionMessageToolCall) => void;
8
+ onUsageMetrics?: (usage: SchemaCompletionUsage) => void;
8
9
  onFinish?: (response: SchemaCreateChatCompletionStreamResponse | null) => void;
9
10
  onError?: (error: SchemaError) => void;
10
11
  }
@@ -162,6 +162,9 @@ class InferenceGatewayClient {
162
162
  try {
163
163
  const chunk = JSON.parse(data);
164
164
  callbacks.onChunk?.(chunk);
165
+ if (chunk.usage && callbacks.onUsageMetrics) {
166
+ callbacks.onUsageMetrics(chunk.usage);
167
+ }
165
168
  const reasoning_content = chunk.choices[0]?.delta?.reasoning_content;
166
169
  if (reasoning_content !== undefined) {
167
170
  callbacks.onReasoning?.(reasoning_content);
@@ -330,6 +330,58 @@ describe('InferenceGatewayClient', () => {
330
330
  await expect(client.streamChatCompletion(mockRequest, callbacks)).rejects.toThrow('Bad Request');
331
331
  expect(callbacks.onError).toHaveBeenCalledTimes(1);
332
332
  });
333
+ it('should handle streaming chat completions with usage metrics', async () => {
334
+ const mockRequest = {
335
+ model: 'gpt-4o',
336
+ messages: [{ role: generated_1.MessageRole.user, content: 'Hello' }],
337
+ stream: true,
338
+ stream_options: {
339
+ include_usage: true,
340
+ },
341
+ };
342
+ const mockStream = new web_1.TransformStream();
343
+ const writer = mockStream.writable.getWriter();
344
+ const encoder = new node_util_1.TextEncoder();
345
+ mockFetch.mockResolvedValueOnce({
346
+ ok: true,
347
+ body: mockStream.readable,
348
+ });
349
+ const callbacks = {
350
+ onOpen: jest.fn(),
351
+ onChunk: jest.fn(),
352
+ onContent: jest.fn(),
353
+ onUsageMetrics: jest.fn(),
354
+ onFinish: jest.fn(),
355
+ onError: jest.fn(),
356
+ };
357
+ const streamPromise = client.streamChatCompletion(mockRequest, callbacks);
358
+ await writer.write(encoder.encode('data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"gpt-4o","choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":null}]}\n\n' +
359
+ 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"gpt-4o","choices":[{"index":0,"delta":{"content":"Hello"},"finish_reason":null}]}\n\n' +
360
+ 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"gpt-4o","choices":[{"index":0,"delta":{"content":"!"},"finish_reason":null}]}\n\n' +
361
+ 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"gpt-4o","choices":[{"index":0,"delta":{},"finish_reason":"stop"}]}\n\n' +
362
+ 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"gpt-4o","choices":[],"usage":{"prompt_tokens":10,"completion_tokens":8,"total_tokens":18}}\n\n' +
363
+ 'data: [DONE]\n\n'));
364
+ await writer.close();
365
+ await streamPromise;
366
+ expect(callbacks.onOpen).toHaveBeenCalledTimes(1);
367
+ expect(callbacks.onChunk).toHaveBeenCalledTimes(5);
368
+ expect(callbacks.onContent).toHaveBeenCalledWith('Hello');
369
+ expect(callbacks.onContent).toHaveBeenCalledWith('!');
370
+ expect(callbacks.onUsageMetrics).toHaveBeenCalledTimes(1);
371
+ expect(callbacks.onUsageMetrics).toHaveBeenCalledWith({
372
+ prompt_tokens: 10,
373
+ completion_tokens: 8,
374
+ total_tokens: 18,
375
+ });
376
+ expect(callbacks.onFinish).toHaveBeenCalledTimes(1);
377
+ expect(mockFetch).toHaveBeenCalledWith('http://localhost:8080/v1/chat/completions', expect.objectContaining({
378
+ method: 'POST',
379
+ body: JSON.stringify({
380
+ ...mockRequest,
381
+ stream: true,
382
+ }),
383
+ }));
384
+ });
333
385
  });
334
386
  describe('proxy', () => {
335
387
  it('should proxy requests to a specific provider', async () => {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@inference-gateway/sdk",
3
- "version": "0.5.1",
3
+ "version": "0.6.0",
4
4
  "description": "An SDK written in Typescript for the [Inference Gateway](https://github.com/inference-gateway/inference-gateway).",
5
5
  "main": "dist/src/index.js",
6
6
  "types": "dist/src/index.d.ts",