@inference-gateway/sdk 0.5.1 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +6 -0
- package/README.md +1 -0
- package/dist/src/client.d.ts +2 -1
- package/dist/src/client.js +3 -0
- package/dist/tests/client.test.js +52 -0
- package/package.json +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,12 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to this project will be documented in this file.
|
|
4
4
|
|
|
5
|
+
## [0.6.0](https://github.com/inference-gateway/typescript-sdk/compare/v0.5.1...v0.6.0) (2025-04-28)
|
|
6
|
+
|
|
7
|
+
### ✨ Features
|
|
8
|
+
|
|
9
|
+
* Add usage metrics handling to streaming chat completions and update tests ([#10](https://github.com/inference-gateway/typescript-sdk/issues/10)) ([576ff71](https://github.com/inference-gateway/typescript-sdk/commit/576ff711140c9e357bea4ba572e92027297c428b))
|
|
10
|
+
|
|
5
11
|
## [0.5.1](https://github.com/inference-gateway/typescript-sdk/compare/v0.5.0...v0.5.1) (2025-04-27)
|
|
6
12
|
|
|
7
13
|
### 🐛 Bug Fixes
|
package/README.md
CHANGED
|
@@ -127,6 +127,7 @@ try {
|
|
|
127
127
|
onOpen: () => console.log('Stream opened'),
|
|
128
128
|
onContent: (content) => process.stdout.write(content),
|
|
129
129
|
onChunk: (chunk) => console.log('Received chunk:', chunk.id),
|
|
130
|
+
onUsageMetrics: (metrics) => console.log('Usage metrics:', metrics),
|
|
130
131
|
onFinish: () => console.log('\nStream completed'),
|
|
131
132
|
onError: (error) => console.error('Stream error:', error),
|
|
132
133
|
},
|
package/dist/src/client.d.ts
CHANGED
|
@@ -1,10 +1,11 @@
|
|
|
1
|
-
import type { Provider, SchemaChatCompletionMessageToolCall, SchemaCreateChatCompletionRequest, SchemaCreateChatCompletionResponse, SchemaCreateChatCompletionStreamResponse, SchemaError, SchemaListModelsResponse } from './types/generated';
|
|
1
|
+
import type { Provider, SchemaChatCompletionMessageToolCall, SchemaCompletionUsage, SchemaCreateChatCompletionRequest, SchemaCreateChatCompletionResponse, SchemaCreateChatCompletionStreamResponse, SchemaError, SchemaListModelsResponse } from './types/generated';
|
|
2
2
|
interface ChatCompletionStreamCallbacks {
|
|
3
3
|
onOpen?: () => void;
|
|
4
4
|
onChunk?: (chunk: SchemaCreateChatCompletionStreamResponse) => void;
|
|
5
5
|
onReasoning?: (reasoningContent: string) => void;
|
|
6
6
|
onContent?: (content: string) => void;
|
|
7
7
|
onTool?: (toolCall: SchemaChatCompletionMessageToolCall) => void;
|
|
8
|
+
onUsageMetrics?: (usage: SchemaCompletionUsage) => void;
|
|
8
9
|
onFinish?: (response: SchemaCreateChatCompletionStreamResponse | null) => void;
|
|
9
10
|
onError?: (error: SchemaError) => void;
|
|
10
11
|
}
|
package/dist/src/client.js
CHANGED
|
@@ -162,6 +162,9 @@ class InferenceGatewayClient {
|
|
|
162
162
|
try {
|
|
163
163
|
const chunk = JSON.parse(data);
|
|
164
164
|
callbacks.onChunk?.(chunk);
|
|
165
|
+
if (chunk.usage && callbacks.onUsageMetrics) {
|
|
166
|
+
callbacks.onUsageMetrics(chunk.usage);
|
|
167
|
+
}
|
|
165
168
|
const reasoning_content = chunk.choices[0]?.delta?.reasoning_content;
|
|
166
169
|
if (reasoning_content !== undefined) {
|
|
167
170
|
callbacks.onReasoning?.(reasoning_content);
|
|
@@ -330,6 +330,58 @@ describe('InferenceGatewayClient', () => {
|
|
|
330
330
|
await expect(client.streamChatCompletion(mockRequest, callbacks)).rejects.toThrow('Bad Request');
|
|
331
331
|
expect(callbacks.onError).toHaveBeenCalledTimes(1);
|
|
332
332
|
});
|
|
333
|
+
it('should handle streaming chat completions with usage metrics', async () => {
|
|
334
|
+
const mockRequest = {
|
|
335
|
+
model: 'gpt-4o',
|
|
336
|
+
messages: [{ role: generated_1.MessageRole.user, content: 'Hello' }],
|
|
337
|
+
stream: true,
|
|
338
|
+
stream_options: {
|
|
339
|
+
include_usage: true,
|
|
340
|
+
},
|
|
341
|
+
};
|
|
342
|
+
const mockStream = new web_1.TransformStream();
|
|
343
|
+
const writer = mockStream.writable.getWriter();
|
|
344
|
+
const encoder = new node_util_1.TextEncoder();
|
|
345
|
+
mockFetch.mockResolvedValueOnce({
|
|
346
|
+
ok: true,
|
|
347
|
+
body: mockStream.readable,
|
|
348
|
+
});
|
|
349
|
+
const callbacks = {
|
|
350
|
+
onOpen: jest.fn(),
|
|
351
|
+
onChunk: jest.fn(),
|
|
352
|
+
onContent: jest.fn(),
|
|
353
|
+
onUsageMetrics: jest.fn(),
|
|
354
|
+
onFinish: jest.fn(),
|
|
355
|
+
onError: jest.fn(),
|
|
356
|
+
};
|
|
357
|
+
const streamPromise = client.streamChatCompletion(mockRequest, callbacks);
|
|
358
|
+
await writer.write(encoder.encode('data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"gpt-4o","choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":null}]}\n\n' +
|
|
359
|
+
'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"gpt-4o","choices":[{"index":0,"delta":{"content":"Hello"},"finish_reason":null}]}\n\n' +
|
|
360
|
+
'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"gpt-4o","choices":[{"index":0,"delta":{"content":"!"},"finish_reason":null}]}\n\n' +
|
|
361
|
+
'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"gpt-4o","choices":[{"index":0,"delta":{},"finish_reason":"stop"}]}\n\n' +
|
|
362
|
+
'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"gpt-4o","choices":[],"usage":{"prompt_tokens":10,"completion_tokens":8,"total_tokens":18}}\n\n' +
|
|
363
|
+
'data: [DONE]\n\n'));
|
|
364
|
+
await writer.close();
|
|
365
|
+
await streamPromise;
|
|
366
|
+
expect(callbacks.onOpen).toHaveBeenCalledTimes(1);
|
|
367
|
+
expect(callbacks.onChunk).toHaveBeenCalledTimes(5);
|
|
368
|
+
expect(callbacks.onContent).toHaveBeenCalledWith('Hello');
|
|
369
|
+
expect(callbacks.onContent).toHaveBeenCalledWith('!');
|
|
370
|
+
expect(callbacks.onUsageMetrics).toHaveBeenCalledTimes(1);
|
|
371
|
+
expect(callbacks.onUsageMetrics).toHaveBeenCalledWith({
|
|
372
|
+
prompt_tokens: 10,
|
|
373
|
+
completion_tokens: 8,
|
|
374
|
+
total_tokens: 18,
|
|
375
|
+
});
|
|
376
|
+
expect(callbacks.onFinish).toHaveBeenCalledTimes(1);
|
|
377
|
+
expect(mockFetch).toHaveBeenCalledWith('http://localhost:8080/v1/chat/completions', expect.objectContaining({
|
|
378
|
+
method: 'POST',
|
|
379
|
+
body: JSON.stringify({
|
|
380
|
+
...mockRequest,
|
|
381
|
+
stream: true,
|
|
382
|
+
}),
|
|
383
|
+
}));
|
|
384
|
+
});
|
|
333
385
|
});
|
|
334
386
|
describe('proxy', () => {
|
|
335
387
|
it('should proxy requests to a specific provider', async () => {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@inference-gateway/sdk",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.6.0",
|
|
4
4
|
"description": "An SDK written in Typescript for the [Inference Gateway](https://github.com/inference-gateway/inference-gateway).",
|
|
5
5
|
"main": "dist/src/index.js",
|
|
6
6
|
"types": "dist/src/index.d.ts",
|