@inference-gateway/sdk 0.5.1 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/README.md +1 -0
- package/dist/src/client.d.ts +9 -3
- package/dist/src/client.js +12 -1
- package/dist/tests/client.test.js +69 -8
- package/package.json +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,18 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to this project will be documented in this file.
|
|
4
4
|
|
|
5
|
+
## [0.6.1](https://github.com/inference-gateway/typescript-sdk/compare/v0.6.0...v0.6.1) (2025-04-28)
|
|
6
|
+
|
|
7
|
+
### ♻️ Improvements
|
|
8
|
+
|
|
9
|
+
* Remove redundant request option ([#11](https://github.com/inference-gateway/typescript-sdk/issues/11)) ([82e34e2](https://github.com/inference-gateway/typescript-sdk/commit/82e34e2ee9782fd224945bff1bd4daf2859a4f79))
|
|
10
|
+
|
|
11
|
+
## [0.6.0](https://github.com/inference-gateway/typescript-sdk/compare/v0.5.1...v0.6.0) (2025-04-28)
|
|
12
|
+
|
|
13
|
+
### ✨ Features
|
|
14
|
+
|
|
15
|
+
* Add usage metrics handling to streaming chat completions and update tests ([#10](https://github.com/inference-gateway/typescript-sdk/issues/10)) ([576ff71](https://github.com/inference-gateway/typescript-sdk/commit/576ff711140c9e357bea4ba572e92027297c428b))
|
|
16
|
+
|
|
5
17
|
## [0.5.1](https://github.com/inference-gateway/typescript-sdk/compare/v0.5.0...v0.5.1) (2025-04-27)
|
|
6
18
|
|
|
7
19
|
### 🐛 Bug Fixes
|
package/README.md
CHANGED
|
@@ -127,6 +127,7 @@ try {
|
|
|
127
127
|
onOpen: () => console.log('Stream opened'),
|
|
128
128
|
onContent: (content) => process.stdout.write(content),
|
|
129
129
|
onChunk: (chunk) => console.log('Received chunk:', chunk.id),
|
|
130
|
+
onUsageMetrics: (metrics) => console.log('Usage metrics:', metrics),
|
|
130
131
|
onFinish: () => console.log('\nStream completed'),
|
|
131
132
|
onError: (error) => console.error('Stream error:', error),
|
|
132
133
|
},
|
package/dist/src/client.d.ts
CHANGED
|
@@ -1,10 +1,11 @@
|
|
|
1
|
-
import type { Provider, SchemaChatCompletionMessageToolCall, SchemaCreateChatCompletionRequest, SchemaCreateChatCompletionResponse, SchemaCreateChatCompletionStreamResponse, SchemaError, SchemaListModelsResponse } from './types/generated';
|
|
1
|
+
import type { Provider, SchemaChatCompletionMessageToolCall, SchemaCompletionUsage, SchemaCreateChatCompletionRequest, SchemaCreateChatCompletionResponse, SchemaCreateChatCompletionStreamResponse, SchemaError, SchemaListModelsResponse } from './types/generated';
|
|
2
2
|
interface ChatCompletionStreamCallbacks {
|
|
3
3
|
onOpen?: () => void;
|
|
4
4
|
onChunk?: (chunk: SchemaCreateChatCompletionStreamResponse) => void;
|
|
5
5
|
onReasoning?: (reasoningContent: string) => void;
|
|
6
6
|
onContent?: (content: string) => void;
|
|
7
7
|
onTool?: (toolCall: SchemaChatCompletionMessageToolCall) => void;
|
|
8
|
+
onUsageMetrics?: (usage: SchemaCompletionUsage) => void;
|
|
8
9
|
onFinish?: (response: SchemaCreateChatCompletionStreamResponse | null) => void;
|
|
9
10
|
onError?: (error: SchemaError) => void;
|
|
10
11
|
}
|
|
@@ -39,11 +40,16 @@ export declare class InferenceGatewayClient {
|
|
|
39
40
|
/**
|
|
40
41
|
* Creates a chat completion.
|
|
41
42
|
*/
|
|
42
|
-
createChatCompletion(request: SchemaCreateChatCompletionRequest, provider?: Provider): Promise<SchemaCreateChatCompletionResponse>;
|
|
43
|
+
createChatCompletion(request: Omit<SchemaCreateChatCompletionRequest, 'stream'>, provider?: Provider): Promise<SchemaCreateChatCompletionResponse>;
|
|
43
44
|
/**
|
|
44
45
|
* Creates a streaming chat completion.
|
|
46
|
+
* This method always sets stream=true internally, so there's no need to specify it in the request.
|
|
47
|
+
*
|
|
48
|
+
* @param request - Chat completion request (must include at least model and messages)
|
|
49
|
+
* @param callbacks - Callbacks for handling streaming events
|
|
50
|
+
* @param provider - Optional provider to use for this request
|
|
45
51
|
*/
|
|
46
|
-
streamChatCompletion(request: SchemaCreateChatCompletionRequest, callbacks: ChatCompletionStreamCallbacks, provider?: Provider): Promise<void>;
|
|
52
|
+
streamChatCompletion(request: Omit<SchemaCreateChatCompletionRequest, 'stream' | 'stream_options'>, callbacks: ChatCompletionStreamCallbacks, provider?: Provider): Promise<void>;
|
|
47
53
|
/**
|
|
48
54
|
* Proxy a request to a specific provider.
|
|
49
55
|
*/
|
package/dist/src/client.js
CHANGED
|
@@ -87,11 +87,16 @@ class InferenceGatewayClient {
|
|
|
87
87
|
}
|
|
88
88
|
return this.request('/chat/completions', {
|
|
89
89
|
method: 'POST',
|
|
90
|
-
body: JSON.stringify(request),
|
|
90
|
+
body: JSON.stringify({ ...request, stream: false }),
|
|
91
91
|
}, query);
|
|
92
92
|
}
|
|
93
93
|
/**
|
|
94
94
|
* Creates a streaming chat completion.
|
|
95
|
+
* This method always sets stream=true internally, so there's no need to specify it in the request.
|
|
96
|
+
*
|
|
97
|
+
* @param request - Chat completion request (must include at least model and messages)
|
|
98
|
+
* @param callbacks - Callbacks for handling streaming events
|
|
99
|
+
* @param provider - Optional provider to use for this request
|
|
95
100
|
*/
|
|
96
101
|
async streamChatCompletion(request, callbacks, provider) {
|
|
97
102
|
const query = {};
|
|
@@ -120,6 +125,9 @@ class InferenceGatewayClient {
|
|
|
120
125
|
body: JSON.stringify({
|
|
121
126
|
...request,
|
|
122
127
|
stream: true,
|
|
128
|
+
stream_options: {
|
|
129
|
+
include_usage: true,
|
|
130
|
+
},
|
|
123
131
|
}),
|
|
124
132
|
signal: controller.signal,
|
|
125
133
|
});
|
|
@@ -162,6 +170,9 @@ class InferenceGatewayClient {
|
|
|
162
170
|
try {
|
|
163
171
|
const chunk = JSON.parse(data);
|
|
164
172
|
callbacks.onChunk?.(chunk);
|
|
173
|
+
if (chunk.usage && callbacks.onUsageMetrics) {
|
|
174
|
+
callbacks.onUsageMetrics(chunk.usage);
|
|
175
|
+
}
|
|
165
176
|
const reasoning_content = chunk.choices[0]?.delta?.reasoning_content;
|
|
166
177
|
if (reasoning_content !== undefined) {
|
|
167
178
|
callbacks.onReasoning?.(reasoning_content);
|
|
@@ -89,7 +89,6 @@ describe('InferenceGatewayClient', () => {
|
|
|
89
89
|
{ role: generated_1.MessageRole.system, content: 'You are a helpful assistant' },
|
|
90
90
|
{ role: generated_1.MessageRole.user, content: 'Hello' },
|
|
91
91
|
],
|
|
92
|
-
stream: false,
|
|
93
92
|
};
|
|
94
93
|
const mockResponse = {
|
|
95
94
|
id: 'chatcmpl-123',
|
|
@@ -120,14 +119,13 @@ describe('InferenceGatewayClient', () => {
|
|
|
120
119
|
expect(result).toEqual(mockResponse);
|
|
121
120
|
expect(mockFetch).toHaveBeenCalledWith('http://localhost:8080/v1/chat/completions', expect.objectContaining({
|
|
122
121
|
method: 'POST',
|
|
123
|
-
body: JSON.stringify(mockRequest),
|
|
122
|
+
body: JSON.stringify({ ...mockRequest, stream: false }),
|
|
124
123
|
}));
|
|
125
124
|
});
|
|
126
125
|
it('should create a chat completion with a specific provider', async () => {
|
|
127
126
|
const mockRequest = {
|
|
128
127
|
model: 'claude-3-opus-20240229',
|
|
129
128
|
messages: [{ role: generated_1.MessageRole.user, content: 'Hello' }],
|
|
130
|
-
stream: false,
|
|
131
129
|
};
|
|
132
130
|
const mockResponse = {
|
|
133
131
|
id: 'chatcmpl-456',
|
|
@@ -158,7 +156,7 @@ describe('InferenceGatewayClient', () => {
|
|
|
158
156
|
expect(result).toEqual(mockResponse);
|
|
159
157
|
expect(mockFetch).toHaveBeenCalledWith('http://localhost:8080/v1/chat/completions?provider=anthropic', expect.objectContaining({
|
|
160
158
|
method: 'POST',
|
|
161
|
-
body: JSON.stringify(mockRequest),
|
|
159
|
+
body: JSON.stringify({ ...mockRequest, stream: false }),
|
|
162
160
|
}));
|
|
163
161
|
});
|
|
164
162
|
});
|
|
@@ -167,7 +165,6 @@ describe('InferenceGatewayClient', () => {
|
|
|
167
165
|
const mockRequest = {
|
|
168
166
|
model: 'gpt-4o',
|
|
169
167
|
messages: [{ role: generated_1.MessageRole.user, content: 'Hello' }],
|
|
170
|
-
stream: true,
|
|
171
168
|
};
|
|
172
169
|
const mockStream = new web_1.TransformStream();
|
|
173
170
|
const writer = mockStream.writable.getWriter();
|
|
@@ -201,6 +198,9 @@ describe('InferenceGatewayClient', () => {
|
|
|
201
198
|
body: JSON.stringify({
|
|
202
199
|
...mockRequest,
|
|
203
200
|
stream: true,
|
|
201
|
+
stream_options: {
|
|
202
|
+
include_usage: true,
|
|
203
|
+
},
|
|
204
204
|
}),
|
|
205
205
|
}));
|
|
206
206
|
});
|
|
@@ -208,7 +208,6 @@ describe('InferenceGatewayClient', () => {
|
|
|
208
208
|
const mockRequest = {
|
|
209
209
|
model: 'gpt-4o',
|
|
210
210
|
messages: [{ role: generated_1.MessageRole.user, content: 'Hello' }],
|
|
211
|
-
stream: true,
|
|
212
211
|
};
|
|
213
212
|
const mockStream = new web_1.TransformStream();
|
|
214
213
|
const writer = mockStream.writable.getWriter();
|
|
@@ -253,6 +252,9 @@ describe('InferenceGatewayClient', () => {
|
|
|
253
252
|
body: JSON.stringify({
|
|
254
253
|
...mockRequest,
|
|
255
254
|
stream: true,
|
|
255
|
+
stream_options: {
|
|
256
|
+
include_usage: true,
|
|
257
|
+
},
|
|
256
258
|
}),
|
|
257
259
|
}));
|
|
258
260
|
});
|
|
@@ -274,7 +276,6 @@ describe('InferenceGatewayClient', () => {
|
|
|
274
276
|
},
|
|
275
277
|
},
|
|
276
278
|
],
|
|
277
|
-
stream: true,
|
|
278
279
|
};
|
|
279
280
|
const mockStream = new web_1.TransformStream();
|
|
280
281
|
const writer = mockStream.writable.getWriter();
|
|
@@ -312,12 +313,21 @@ describe('InferenceGatewayClient', () => {
|
|
|
312
313
|
},
|
|
313
314
|
});
|
|
314
315
|
expect(callbacks.onFinish).toHaveBeenCalledTimes(1);
|
|
316
|
+
expect(mockFetch).toHaveBeenCalledWith('http://localhost:8080/v1/chat/completions', expect.objectContaining({
|
|
317
|
+
method: 'POST',
|
|
318
|
+
body: JSON.stringify({
|
|
319
|
+
...mockRequest,
|
|
320
|
+
stream: true,
|
|
321
|
+
stream_options: {
|
|
322
|
+
include_usage: true,
|
|
323
|
+
},
|
|
324
|
+
}),
|
|
325
|
+
}));
|
|
315
326
|
});
|
|
316
327
|
it('should handle errors in streaming chat completions', async () => {
|
|
317
328
|
const mockRequest = {
|
|
318
329
|
model: 'gpt-4o',
|
|
319
330
|
messages: [{ role: generated_1.MessageRole.user, content: 'Hello' }],
|
|
320
|
-
stream: true,
|
|
321
331
|
};
|
|
322
332
|
mockFetch.mockResolvedValueOnce({
|
|
323
333
|
ok: false,
|
|
@@ -330,6 +340,57 @@ describe('InferenceGatewayClient', () => {
|
|
|
330
340
|
await expect(client.streamChatCompletion(mockRequest, callbacks)).rejects.toThrow('Bad Request');
|
|
331
341
|
expect(callbacks.onError).toHaveBeenCalledTimes(1);
|
|
332
342
|
});
|
|
343
|
+
it('should handle streaming chat completions with usage metrics', async () => {
|
|
344
|
+
const mockRequest = {
|
|
345
|
+
model: 'gpt-4o',
|
|
346
|
+
messages: [{ role: generated_1.MessageRole.user, content: 'Hello' }],
|
|
347
|
+
};
|
|
348
|
+
const mockStream = new web_1.TransformStream();
|
|
349
|
+
const writer = mockStream.writable.getWriter();
|
|
350
|
+
const encoder = new node_util_1.TextEncoder();
|
|
351
|
+
mockFetch.mockResolvedValueOnce({
|
|
352
|
+
ok: true,
|
|
353
|
+
body: mockStream.readable,
|
|
354
|
+
});
|
|
355
|
+
const callbacks = {
|
|
356
|
+
onOpen: jest.fn(),
|
|
357
|
+
onChunk: jest.fn(),
|
|
358
|
+
onContent: jest.fn(),
|
|
359
|
+
onUsageMetrics: jest.fn(),
|
|
360
|
+
onFinish: jest.fn(),
|
|
361
|
+
onError: jest.fn(),
|
|
362
|
+
};
|
|
363
|
+
const streamPromise = client.streamChatCompletion(mockRequest, callbacks);
|
|
364
|
+
await writer.write(encoder.encode('data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"gpt-4o","choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":null}]}\n\n' +
|
|
365
|
+
'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"gpt-4o","choices":[{"index":0,"delta":{"content":"Hello"},"finish_reason":null}]}\n\n' +
|
|
366
|
+
'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"gpt-4o","choices":[{"index":0,"delta":{"content":"!"},"finish_reason":null}]}\n\n' +
|
|
367
|
+
'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"gpt-4o","choices":[{"index":0,"delta":{},"finish_reason":"stop"}]}\n\n' +
|
|
368
|
+
'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"gpt-4o","choices":[],"usage":{"prompt_tokens":10,"completion_tokens":8,"total_tokens":18}}\n\n' +
|
|
369
|
+
'data: [DONE]\n\n'));
|
|
370
|
+
await writer.close();
|
|
371
|
+
await streamPromise;
|
|
372
|
+
expect(callbacks.onOpen).toHaveBeenCalledTimes(1);
|
|
373
|
+
expect(callbacks.onChunk).toHaveBeenCalledTimes(5);
|
|
374
|
+
expect(callbacks.onContent).toHaveBeenCalledWith('Hello');
|
|
375
|
+
expect(callbacks.onContent).toHaveBeenCalledWith('!');
|
|
376
|
+
expect(callbacks.onUsageMetrics).toHaveBeenCalledTimes(1);
|
|
377
|
+
expect(callbacks.onUsageMetrics).toHaveBeenCalledWith({
|
|
378
|
+
prompt_tokens: 10,
|
|
379
|
+
completion_tokens: 8,
|
|
380
|
+
total_tokens: 18,
|
|
381
|
+
});
|
|
382
|
+
expect(callbacks.onFinish).toHaveBeenCalledTimes(1);
|
|
383
|
+
expect(mockFetch).toHaveBeenCalledWith('http://localhost:8080/v1/chat/completions', expect.objectContaining({
|
|
384
|
+
method: 'POST',
|
|
385
|
+
body: JSON.stringify({
|
|
386
|
+
...mockRequest,
|
|
387
|
+
stream: true,
|
|
388
|
+
stream_options: {
|
|
389
|
+
include_usage: true,
|
|
390
|
+
},
|
|
391
|
+
}),
|
|
392
|
+
}));
|
|
393
|
+
});
|
|
333
394
|
});
|
|
334
395
|
describe('proxy', () => {
|
|
335
396
|
it('should proxy requests to a specific provider', async () => {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@inference-gateway/sdk",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.6.1",
|
|
4
4
|
"description": "An SDK written in Typescript for the [Inference Gateway](https://github.com/inference-gateway/inference-gateway).",
|
|
5
5
|
"main": "dist/src/index.js",
|
|
6
6
|
"types": "dist/src/index.d.ts",
|