@inference-gateway/sdk 0.6.0 → 0.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/dist/src/client.d.ts +7 -2
- package/dist/src/client.js +13 -1
- package/dist/src/types/generated/index.d.ts +24 -15
- package/dist/tests/client.test.js +180 -12
- package/package.json +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,18 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to this project will be documented in this file.
|
|
4
4
|
|
|
5
|
+
## [0.6.2](https://github.com/inference-gateway/typescript-sdk/compare/v0.6.1...v0.6.2) (2025-04-30)
|
|
6
|
+
|
|
7
|
+
### ♻️ Improvements
|
|
8
|
+
|
|
9
|
+
* Process also groq reasoning models properly ([#12](https://github.com/inference-gateway/typescript-sdk/issues/12)) ([51ce3bb](https://github.com/inference-gateway/typescript-sdk/commit/51ce3bbbbdf03947bb7928e8edc413b977ea092a))
|
|
10
|
+
|
|
11
|
+
## [0.6.1](https://github.com/inference-gateway/typescript-sdk/compare/v0.6.0...v0.6.1) (2025-04-28)
|
|
12
|
+
|
|
13
|
+
### ♻️ Improvements
|
|
14
|
+
|
|
15
|
+
* Remove redundant request option ([#11](https://github.com/inference-gateway/typescript-sdk/issues/11)) ([82e34e2](https://github.com/inference-gateway/typescript-sdk/commit/82e34e2ee9782fd224945bff1bd4daf2859a4f79))
|
|
16
|
+
|
|
5
17
|
## [0.6.0](https://github.com/inference-gateway/typescript-sdk/compare/v0.5.1...v0.6.0) (2025-04-28)
|
|
6
18
|
|
|
7
19
|
### ✨ Features
|
package/dist/src/client.d.ts
CHANGED
|
@@ -40,11 +40,16 @@ export declare class InferenceGatewayClient {
|
|
|
40
40
|
/**
|
|
41
41
|
* Creates a chat completion.
|
|
42
42
|
*/
|
|
43
|
-
createChatCompletion(request: SchemaCreateChatCompletionRequest, provider?: Provider): Promise<SchemaCreateChatCompletionResponse>;
|
|
43
|
+
createChatCompletion(request: Omit<SchemaCreateChatCompletionRequest, 'stream'>, provider?: Provider): Promise<SchemaCreateChatCompletionResponse>;
|
|
44
44
|
/**
|
|
45
45
|
* Creates a streaming chat completion.
|
|
46
|
+
* This method always sets stream=true internally, so there's no need to specify it in the request.
|
|
47
|
+
*
|
|
48
|
+
* @param request - Chat completion request (must include at least model and messages)
|
|
49
|
+
* @param callbacks - Callbacks for handling streaming events
|
|
50
|
+
* @param provider - Optional provider to use for this request
|
|
46
51
|
*/
|
|
47
|
-
streamChatCompletion(request: SchemaCreateChatCompletionRequest, callbacks: ChatCompletionStreamCallbacks, provider?: Provider): Promise<void>;
|
|
52
|
+
streamChatCompletion(request: Omit<SchemaCreateChatCompletionRequest, 'stream' | 'stream_options'>, callbacks: ChatCompletionStreamCallbacks, provider?: Provider): Promise<void>;
|
|
48
53
|
/**
|
|
49
54
|
* Proxy a request to a specific provider.
|
|
50
55
|
*/
|
package/dist/src/client.js
CHANGED
|
@@ -87,11 +87,16 @@ class InferenceGatewayClient {
|
|
|
87
87
|
}
|
|
88
88
|
return this.request('/chat/completions', {
|
|
89
89
|
method: 'POST',
|
|
90
|
-
body: JSON.stringify(request),
|
|
90
|
+
body: JSON.stringify({ ...request, stream: false }),
|
|
91
91
|
}, query);
|
|
92
92
|
}
|
|
93
93
|
/**
|
|
94
94
|
* Creates a streaming chat completion.
|
|
95
|
+
* This method always sets stream=true internally, so there's no need to specify it in the request.
|
|
96
|
+
*
|
|
97
|
+
* @param request - Chat completion request (must include at least model and messages)
|
|
98
|
+
* @param callbacks - Callbacks for handling streaming events
|
|
99
|
+
* @param provider - Optional provider to use for this request
|
|
95
100
|
*/
|
|
96
101
|
async streamChatCompletion(request, callbacks, provider) {
|
|
97
102
|
const query = {};
|
|
@@ -120,6 +125,9 @@ class InferenceGatewayClient {
|
|
|
120
125
|
body: JSON.stringify({
|
|
121
126
|
...request,
|
|
122
127
|
stream: true,
|
|
128
|
+
stream_options: {
|
|
129
|
+
include_usage: true,
|
|
130
|
+
},
|
|
123
131
|
}),
|
|
124
132
|
signal: controller.signal,
|
|
125
133
|
});
|
|
@@ -169,6 +177,10 @@ class InferenceGatewayClient {
|
|
|
169
177
|
if (reasoning_content !== undefined) {
|
|
170
178
|
callbacks.onReasoning?.(reasoning_content);
|
|
171
179
|
}
|
|
180
|
+
const reasoning = chunk.choices[0]?.delta?.reasoning;
|
|
181
|
+
if (reasoning !== undefined) {
|
|
182
|
+
callbacks.onReasoning?.(reasoning);
|
|
183
|
+
}
|
|
172
184
|
const content = chunk.choices[0]?.delta?.content;
|
|
173
185
|
if (content) {
|
|
174
186
|
callbacks.onContent?.(content);
|
|
@@ -180,8 +180,8 @@ export interface components {
|
|
|
180
180
|
retry?: number;
|
|
181
181
|
};
|
|
182
182
|
Endpoints: {
|
|
183
|
-
models
|
|
184
|
-
chat
|
|
183
|
+
models: string;
|
|
184
|
+
chat: string;
|
|
185
185
|
};
|
|
186
186
|
Error: {
|
|
187
187
|
error?: string;
|
|
@@ -197,17 +197,19 @@ export interface components {
|
|
|
197
197
|
content: string;
|
|
198
198
|
tool_calls?: components['schemas']['ChatCompletionMessageToolCall'][];
|
|
199
199
|
tool_call_id?: string;
|
|
200
|
-
reasoning
|
|
200
|
+
/** @description The reasoning content of the chunk message. */
|
|
201
201
|
reasoning_content?: string;
|
|
202
|
+
/** @description The reasoning of the chunk message. Same as reasoning_content. */
|
|
203
|
+
reasoning?: string;
|
|
202
204
|
};
|
|
203
205
|
/** @description Common model information */
|
|
204
206
|
Model: {
|
|
205
|
-
id
|
|
206
|
-
object
|
|
207
|
+
id: string;
|
|
208
|
+
object: string;
|
|
207
209
|
/** Format: int64 */
|
|
208
|
-
created
|
|
209
|
-
owned_by
|
|
210
|
-
served_by
|
|
210
|
+
created: number;
|
|
211
|
+
owned_by: string;
|
|
212
|
+
served_by: components['schemas']['Provider'];
|
|
211
213
|
};
|
|
212
214
|
/** @description Response structure for listing models */
|
|
213
215
|
ListModelsResponse: {
|
|
@@ -266,11 +268,8 @@ export interface components {
|
|
|
266
268
|
/** @description Options for streaming response. Only set this when you set `stream: true`.
|
|
267
269
|
* */
|
|
268
270
|
ChatCompletionStreamOptions: {
|
|
269
|
-
/**
|
|
270
|
-
*
|
|
271
|
-
*
|
|
272
|
-
* @default true
|
|
273
|
-
*/
|
|
271
|
+
/** @description If set, an additional chunk will be streamed before the `data: [DONE]` message. The `usage` field on this chunk shows the token usage statistics for the entire request, and the `choices` field will always be an empty array. All other chunks will also include a `usage` field, but with a null value.
|
|
272
|
+
* */
|
|
274
273
|
include_usage: boolean;
|
|
275
274
|
};
|
|
276
275
|
CreateChatCompletionRequest: {
|
|
@@ -292,6 +291,10 @@ export interface components {
|
|
|
292
291
|
/** @description A list of tools the model may call. Currently, only functions are supported as a tool. Use this to provide a list of functions the model may generate JSON inputs for. A max of 128 functions are supported.
|
|
293
292
|
* */
|
|
294
293
|
tools?: components['schemas']['ChatCompletionTool'][];
|
|
294
|
+
/** @description The format of the reasoning content. Can be `raw` or `parsed`.
|
|
295
|
+
* When specified as raw some reasoning models will output <think /> tags. When specified as parsed the model will output the reasoning under `reasoning` or `reasoning_content` attribute.
|
|
296
|
+
* */
|
|
297
|
+
reasoning_format?: string;
|
|
295
298
|
};
|
|
296
299
|
/** @description The function that the model called. */
|
|
297
300
|
ChatCompletionMessageToolCallFunction: {
|
|
@@ -350,11 +353,13 @@ export interface components {
|
|
|
350
353
|
/** @description A chat completion delta generated by streamed model responses. */
|
|
351
354
|
ChatCompletionStreamResponseDelta: {
|
|
352
355
|
/** @description The contents of the chunk message. */
|
|
353
|
-
content
|
|
356
|
+
content: string;
|
|
354
357
|
/** @description The reasoning content of the chunk message. */
|
|
355
358
|
reasoning_content?: string;
|
|
359
|
+
/** @description The reasoning of the chunk message. Same as reasoning_content. */
|
|
360
|
+
reasoning?: string;
|
|
356
361
|
tool_calls?: components['schemas']['ChatCompletionMessageToolCallChunk'][];
|
|
357
|
-
role
|
|
362
|
+
role: components['schemas']['MessageRole'];
|
|
358
363
|
/** @description The refusal message generated by the model. */
|
|
359
364
|
refusal?: string;
|
|
360
365
|
};
|
|
@@ -418,6 +423,10 @@ export interface components {
|
|
|
418
423
|
/** @description The object type, which is always `chat.completion.chunk`. */
|
|
419
424
|
object: string;
|
|
420
425
|
usage?: components['schemas']['CompletionUsage'];
|
|
426
|
+
/** @description The format of the reasoning content. Can be `raw` or `parsed`.
|
|
427
|
+
* When specified as raw some reasoning models will output <think /> tags. When specified as parsed the model will output the reasoning under reasoning_content.
|
|
428
|
+
* */
|
|
429
|
+
reasoning_format?: string;
|
|
421
430
|
};
|
|
422
431
|
Config: unknown;
|
|
423
432
|
};
|
|
@@ -57,6 +57,7 @@ describe('InferenceGatewayClient', () => {
|
|
|
57
57
|
object: 'model',
|
|
58
58
|
created: 1686935002,
|
|
59
59
|
owned_by: 'openai',
|
|
60
|
+
served_by: generated_1.Provider.openai,
|
|
60
61
|
},
|
|
61
62
|
],
|
|
62
63
|
};
|
|
@@ -89,7 +90,6 @@ describe('InferenceGatewayClient', () => {
|
|
|
89
90
|
{ role: generated_1.MessageRole.system, content: 'You are a helpful assistant' },
|
|
90
91
|
{ role: generated_1.MessageRole.user, content: 'Hello' },
|
|
91
92
|
],
|
|
92
|
-
stream: false,
|
|
93
93
|
};
|
|
94
94
|
const mockResponse = {
|
|
95
95
|
id: 'chatcmpl-123',
|
|
@@ -120,14 +120,13 @@ describe('InferenceGatewayClient', () => {
|
|
|
120
120
|
expect(result).toEqual(mockResponse);
|
|
121
121
|
expect(mockFetch).toHaveBeenCalledWith('http://localhost:8080/v1/chat/completions', expect.objectContaining({
|
|
122
122
|
method: 'POST',
|
|
123
|
-
body: JSON.stringify(mockRequest),
|
|
123
|
+
body: JSON.stringify({ ...mockRequest, stream: false }),
|
|
124
124
|
}));
|
|
125
125
|
});
|
|
126
126
|
it('should create a chat completion with a specific provider', async () => {
|
|
127
127
|
const mockRequest = {
|
|
128
128
|
model: 'claude-3-opus-20240229',
|
|
129
129
|
messages: [{ role: generated_1.MessageRole.user, content: 'Hello' }],
|
|
130
|
-
stream: false,
|
|
131
130
|
};
|
|
132
131
|
const mockResponse = {
|
|
133
132
|
id: 'chatcmpl-456',
|
|
@@ -158,7 +157,7 @@ describe('InferenceGatewayClient', () => {
|
|
|
158
157
|
expect(result).toEqual(mockResponse);
|
|
159
158
|
expect(mockFetch).toHaveBeenCalledWith('http://localhost:8080/v1/chat/completions?provider=anthropic', expect.objectContaining({
|
|
160
159
|
method: 'POST',
|
|
161
|
-
body: JSON.stringify(mockRequest),
|
|
160
|
+
body: JSON.stringify({ ...mockRequest, stream: false }),
|
|
162
161
|
}));
|
|
163
162
|
});
|
|
164
163
|
});
|
|
@@ -167,7 +166,6 @@ describe('InferenceGatewayClient', () => {
|
|
|
167
166
|
const mockRequest = {
|
|
168
167
|
model: 'gpt-4o',
|
|
169
168
|
messages: [{ role: generated_1.MessageRole.user, content: 'Hello' }],
|
|
170
|
-
stream: true,
|
|
171
169
|
};
|
|
172
170
|
const mockStream = new web_1.TransformStream();
|
|
173
171
|
const writer = mockStream.writable.getWriter();
|
|
@@ -201,6 +199,9 @@ describe('InferenceGatewayClient', () => {
|
|
|
201
199
|
body: JSON.stringify({
|
|
202
200
|
...mockRequest,
|
|
203
201
|
stream: true,
|
|
202
|
+
stream_options: {
|
|
203
|
+
include_usage: true,
|
|
204
|
+
},
|
|
204
205
|
}),
|
|
205
206
|
}));
|
|
206
207
|
});
|
|
@@ -208,7 +209,6 @@ describe('InferenceGatewayClient', () => {
|
|
|
208
209
|
const mockRequest = {
|
|
209
210
|
model: 'gpt-4o',
|
|
210
211
|
messages: [{ role: generated_1.MessageRole.user, content: 'Hello' }],
|
|
211
|
-
stream: true,
|
|
212
212
|
};
|
|
213
213
|
const mockStream = new web_1.TransformStream();
|
|
214
214
|
const writer = mockStream.writable.getWriter();
|
|
@@ -253,6 +253,9 @@ describe('InferenceGatewayClient', () => {
|
|
|
253
253
|
body: JSON.stringify({
|
|
254
254
|
...mockRequest,
|
|
255
255
|
stream: true,
|
|
256
|
+
stream_options: {
|
|
257
|
+
include_usage: true,
|
|
258
|
+
},
|
|
256
259
|
}),
|
|
257
260
|
}));
|
|
258
261
|
});
|
|
@@ -274,7 +277,6 @@ describe('InferenceGatewayClient', () => {
|
|
|
274
277
|
},
|
|
275
278
|
},
|
|
276
279
|
],
|
|
277
|
-
stream: true,
|
|
278
280
|
};
|
|
279
281
|
const mockStream = new web_1.TransformStream();
|
|
280
282
|
const writer = mockStream.writable.getWriter();
|
|
@@ -312,12 +314,21 @@ describe('InferenceGatewayClient', () => {
|
|
|
312
314
|
},
|
|
313
315
|
});
|
|
314
316
|
expect(callbacks.onFinish).toHaveBeenCalledTimes(1);
|
|
317
|
+
expect(mockFetch).toHaveBeenCalledWith('http://localhost:8080/v1/chat/completions', expect.objectContaining({
|
|
318
|
+
method: 'POST',
|
|
319
|
+
body: JSON.stringify({
|
|
320
|
+
...mockRequest,
|
|
321
|
+
stream: true,
|
|
322
|
+
stream_options: {
|
|
323
|
+
include_usage: true,
|
|
324
|
+
},
|
|
325
|
+
}),
|
|
326
|
+
}));
|
|
315
327
|
});
|
|
316
328
|
it('should handle errors in streaming chat completions', async () => {
|
|
317
329
|
const mockRequest = {
|
|
318
330
|
model: 'gpt-4o',
|
|
319
331
|
messages: [{ role: generated_1.MessageRole.user, content: 'Hello' }],
|
|
320
|
-
stream: true,
|
|
321
332
|
};
|
|
322
333
|
mockFetch.mockResolvedValueOnce({
|
|
323
334
|
ok: false,
|
|
@@ -334,10 +345,6 @@ describe('InferenceGatewayClient', () => {
|
|
|
334
345
|
const mockRequest = {
|
|
335
346
|
model: 'gpt-4o',
|
|
336
347
|
messages: [{ role: generated_1.MessageRole.user, content: 'Hello' }],
|
|
337
|
-
stream: true,
|
|
338
|
-
stream_options: {
|
|
339
|
-
include_usage: true,
|
|
340
|
-
},
|
|
341
348
|
};
|
|
342
349
|
const mockStream = new web_1.TransformStream();
|
|
343
350
|
const writer = mockStream.writable.getWriter();
|
|
@@ -379,6 +386,167 @@ describe('InferenceGatewayClient', () => {
|
|
|
379
386
|
body: JSON.stringify({
|
|
380
387
|
...mockRequest,
|
|
381
388
|
stream: true,
|
|
389
|
+
stream_options: {
|
|
390
|
+
include_usage: true,
|
|
391
|
+
},
|
|
392
|
+
}),
|
|
393
|
+
}));
|
|
394
|
+
});
|
|
395
|
+
it('should handle streaming chat completions with reasoning field', async () => {
|
|
396
|
+
const mockRequest = {
|
|
397
|
+
model: 'groq/deepseek-distilled-llama-3.1-70b',
|
|
398
|
+
messages: [{ role: generated_1.MessageRole.user, content: 'Hello' }],
|
|
399
|
+
};
|
|
400
|
+
const mockStream = new web_1.TransformStream();
|
|
401
|
+
const writer = mockStream.writable.getWriter();
|
|
402
|
+
const encoder = new node_util_1.TextEncoder();
|
|
403
|
+
mockFetch.mockResolvedValueOnce({
|
|
404
|
+
ok: true,
|
|
405
|
+
body: mockStream.readable,
|
|
406
|
+
});
|
|
407
|
+
const callbacks = {
|
|
408
|
+
onOpen: jest.fn(),
|
|
409
|
+
onChunk: jest.fn(),
|
|
410
|
+
onReasoning: jest.fn(),
|
|
411
|
+
onContent: jest.fn(),
|
|
412
|
+
onFinish: jest.fn(),
|
|
413
|
+
};
|
|
414
|
+
const streamPromise = client.streamChatCompletion(mockRequest, callbacks);
|
|
415
|
+
await writer.write(encoder.encode('data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"groq/deepseek-distilled-llama-3.1-70b","choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":null}]}\n\n' +
|
|
416
|
+
'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"groq/deepseek-distilled-llama-3.1-70b","choices":[{"index":0,"delta":{"content":"","reasoning":"Let me"},"finish_reason":null}]}\n\n' +
|
|
417
|
+
'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"groq/deepseek-distilled-llama-3.1-70b","choices":[{"index":0,"delta":{"content":"","reasoning":" think"},"finish_reason":null}]}\n\n' +
|
|
418
|
+
'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"groq/deepseek-distilled-llama-3.1-70b","choices":[{"index":0,"delta":{"content":"","reasoning":" about"},"finish_reason":"stop"}]}\n\n' +
|
|
419
|
+
'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"groq/deepseek-distilled-llama-3.1-70b","choices":[{"index":0,"delta":{"content":"","reasoning":" this"},"finish_reason":"stop"}]}\n\n' +
|
|
420
|
+
'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"groq/deepseek-distilled-llama-3.1-70b","choices":[{"index":0,"delta":{"content":"Hello"},"finish_reason":null}]}\n\n' +
|
|
421
|
+
'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"groq/deepseek-distilled-llama-3.1-70b","choices":[{"index":0,"delta":{"content":"!"},"finish_reason":null}]}\n\n' +
|
|
422
|
+
'data: [DONE]\n\n'));
|
|
423
|
+
await writer.close();
|
|
424
|
+
await streamPromise;
|
|
425
|
+
expect(callbacks.onOpen).toHaveBeenCalledTimes(1);
|
|
426
|
+
expect(callbacks.onChunk).toHaveBeenCalledTimes(7);
|
|
427
|
+
expect(callbacks.onReasoning).toHaveBeenCalledTimes(4);
|
|
428
|
+
expect(callbacks.onReasoning).toHaveBeenCalledWith('Let me');
|
|
429
|
+
expect(callbacks.onReasoning).toHaveBeenCalledWith(' think');
|
|
430
|
+
expect(callbacks.onReasoning).toHaveBeenCalledWith(' about');
|
|
431
|
+
expect(callbacks.onReasoning).toHaveBeenCalledWith(' this');
|
|
432
|
+
expect(callbacks.onContent).toHaveBeenCalledTimes(2);
|
|
433
|
+
expect(callbacks.onContent).toHaveBeenCalledWith('Hello');
|
|
434
|
+
expect(callbacks.onContent).toHaveBeenCalledWith('!');
|
|
435
|
+
expect(callbacks.onFinish).toHaveBeenCalledTimes(1);
|
|
436
|
+
expect(mockFetch).toHaveBeenCalledWith('http://localhost:8080/v1/chat/completions', expect.objectContaining({
|
|
437
|
+
method: 'POST',
|
|
438
|
+
body: JSON.stringify({
|
|
439
|
+
...mockRequest,
|
|
440
|
+
stream: true,
|
|
441
|
+
stream_options: {
|
|
442
|
+
include_usage: true,
|
|
443
|
+
},
|
|
444
|
+
}),
|
|
445
|
+
}));
|
|
446
|
+
});
|
|
447
|
+
it('should handle streaming chat completions with reasoning_content (DeepSeek)', async () => {
|
|
448
|
+
const mockRequest = {
|
|
449
|
+
model: 'deepseek/deepseek-reasoner',
|
|
450
|
+
messages: [{ role: generated_1.MessageRole.user, content: 'Hello' }],
|
|
451
|
+
};
|
|
452
|
+
const mockStream = new web_1.TransformStream();
|
|
453
|
+
const writer = mockStream.writable.getWriter();
|
|
454
|
+
const encoder = new node_util_1.TextEncoder();
|
|
455
|
+
mockFetch.mockResolvedValueOnce({
|
|
456
|
+
ok: true,
|
|
457
|
+
body: mockStream.readable,
|
|
458
|
+
});
|
|
459
|
+
const callbacks = {
|
|
460
|
+
onOpen: jest.fn(),
|
|
461
|
+
onChunk: jest.fn(),
|
|
462
|
+
onReasoning: jest.fn(),
|
|
463
|
+
onContent: jest.fn(),
|
|
464
|
+
onFinish: jest.fn(),
|
|
465
|
+
};
|
|
466
|
+
const streamPromise = client.streamChatCompletion(mockRequest, callbacks);
|
|
467
|
+
await writer.write(encoder.encode('data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"deepseek/deepseek-reasoner","choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":null}]}\n\n' +
|
|
468
|
+
'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"deepseek/deepseek-reasoner","choices":[{"index":0,"delta":{"content":"","reasoning_content":"This"},"finish_reason":null}]}\n\n' +
|
|
469
|
+
'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"deepseek/deepseek-reasoner","choices":[{"index":0,"delta":{"content":"","reasoning_content":" is"},"finish_reason":null}]}\n\n' +
|
|
470
|
+
'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"deepseek/deepseek-reasoner","choices":[{"index":0,"delta":{"content":"","reasoning_content":" a"},"finish_reason":"stop"}]}\n\n' +
|
|
471
|
+
'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"deepseek/deepseek-reasoner","choices":[{"index":0,"delta":{"content":"","reasoning_content":" reasoning"},"finish_reason":"stop"}]}\n\n' +
|
|
472
|
+
'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"deepseek/deepseek-reasoner","choices":[{"index":0,"delta":{"content":"","reasoning_content":" content"},"finish_reason":"stop"}]}\n\n' +
|
|
473
|
+
'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"deepseek/deepseek-reasoner","choices":[{"index":0,"delta":{"content":"Hello"},"finish_reason":null}]}\n\n' +
|
|
474
|
+
'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"deepseek/deepseek-reasoner","choices":[{"index":0,"delta":{"content":"!"},"finish_reason":null}]}\n\n' +
|
|
475
|
+
'data: [DONE]\n\n'));
|
|
476
|
+
await writer.close();
|
|
477
|
+
await streamPromise;
|
|
478
|
+
expect(callbacks.onOpen).toHaveBeenCalledTimes(1);
|
|
479
|
+
expect(callbacks.onChunk).toHaveBeenCalledTimes(8);
|
|
480
|
+
expect(callbacks.onReasoning).toHaveBeenCalledTimes(5);
|
|
481
|
+
expect(callbacks.onReasoning).toHaveBeenCalledWith('This');
|
|
482
|
+
expect(callbacks.onReasoning).toHaveBeenCalledWith(' is');
|
|
483
|
+
expect(callbacks.onReasoning).toHaveBeenCalledWith(' a');
|
|
484
|
+
expect(callbacks.onReasoning).toHaveBeenCalledWith(' reasoning');
|
|
485
|
+
expect(callbacks.onReasoning).toHaveBeenCalledWith(' content');
|
|
486
|
+
expect(callbacks.onContent).toHaveBeenCalledTimes(2);
|
|
487
|
+
expect(callbacks.onContent).toHaveBeenCalledWith('Hello');
|
|
488
|
+
expect(callbacks.onContent).toHaveBeenCalledWith('!');
|
|
489
|
+
expect(callbacks.onFinish).toHaveBeenCalledTimes(1);
|
|
490
|
+
expect(mockFetch).toHaveBeenCalledWith('http://localhost:8080/v1/chat/completions', expect.objectContaining({
|
|
491
|
+
method: 'POST',
|
|
492
|
+
body: JSON.stringify({
|
|
493
|
+
...mockRequest,
|
|
494
|
+
stream: true,
|
|
495
|
+
stream_options: {
|
|
496
|
+
include_usage: true,
|
|
497
|
+
},
|
|
498
|
+
}),
|
|
499
|
+
}));
|
|
500
|
+
});
|
|
501
|
+
it('should handle streaming chat completions with reasoning field (Groq)', async () => {
|
|
502
|
+
const mockRequest = {
|
|
503
|
+
model: 'llama-3.1-70b-versatile',
|
|
504
|
+
messages: [{ role: generated_1.MessageRole.user, content: 'Hello' }],
|
|
505
|
+
};
|
|
506
|
+
const mockStream = new web_1.TransformStream();
|
|
507
|
+
const writer = mockStream.writable.getWriter();
|
|
508
|
+
const encoder = new node_util_1.TextEncoder();
|
|
509
|
+
mockFetch.mockResolvedValueOnce({
|
|
510
|
+
ok: true,
|
|
511
|
+
body: mockStream.readable,
|
|
512
|
+
});
|
|
513
|
+
const callbacks = {
|
|
514
|
+
onOpen: jest.fn(),
|
|
515
|
+
onChunk: jest.fn(),
|
|
516
|
+
onReasoning: jest.fn(),
|
|
517
|
+
onContent: jest.fn(),
|
|
518
|
+
onFinish: jest.fn(),
|
|
519
|
+
};
|
|
520
|
+
const streamPromise = client.streamChatCompletion(mockRequest, callbacks);
|
|
521
|
+
await writer.write(encoder.encode('data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"llama-3.1-70b-versatile","choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":null}]}\n\n' +
|
|
522
|
+
'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"llama-3.1-70b-versatile","choices":[{"index":0,"delta":{"content":"","reasoning":"Let me"},"finish_reason":null}]}\n\n' +
|
|
523
|
+
'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"llama-3.1-70b-versatile","choices":[{"index":0,"delta":{"content":"","reasoning":" think"},"finish_reason":null}]}\n\n' +
|
|
524
|
+
'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"llama-3.1-70b-versatile","choices":[{"index":0,"delta":{"content":"","reasoning":" about"},"finish_reason":"stop"}]}\n\n' +
|
|
525
|
+
'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"llama-3.1-70b-versatile","choices":[{"index":0,"delta":{"content":"","reasoning":" this"},"finish_reason":"stop"}]}\n\n' +
|
|
526
|
+
'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"llama-3.1-70b-versatile","choices":[{"index":0,"delta":{"content":"Hello"},"finish_reason":null}]}\n\n' +
|
|
527
|
+
'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"llama-3.1-70b-versatile","choices":[{"index":0,"delta":{"content":"!"},"finish_reason":null}]}\n\n' +
|
|
528
|
+
'data: [DONE]\n\n'));
|
|
529
|
+
await writer.close();
|
|
530
|
+
await streamPromise;
|
|
531
|
+
expect(callbacks.onOpen).toHaveBeenCalledTimes(1);
|
|
532
|
+
expect(callbacks.onChunk).toHaveBeenCalledTimes(7);
|
|
533
|
+
expect(callbacks.onReasoning).toHaveBeenCalledTimes(4);
|
|
534
|
+
expect(callbacks.onReasoning).toHaveBeenCalledWith('Let me');
|
|
535
|
+
expect(callbacks.onReasoning).toHaveBeenCalledWith(' think');
|
|
536
|
+
expect(callbacks.onReasoning).toHaveBeenCalledWith(' about');
|
|
537
|
+
expect(callbacks.onReasoning).toHaveBeenCalledWith(' this');
|
|
538
|
+
expect(callbacks.onContent).toHaveBeenCalledTimes(2);
|
|
539
|
+
expect(callbacks.onContent).toHaveBeenCalledWith('Hello');
|
|
540
|
+
expect(callbacks.onContent).toHaveBeenCalledWith('!');
|
|
541
|
+
expect(callbacks.onFinish).toHaveBeenCalledTimes(1);
|
|
542
|
+
expect(mockFetch).toHaveBeenCalledWith('http://localhost:8080/v1/chat/completions', expect.objectContaining({
|
|
543
|
+
method: 'POST',
|
|
544
|
+
body: JSON.stringify({
|
|
545
|
+
...mockRequest,
|
|
546
|
+
stream: true,
|
|
547
|
+
stream_options: {
|
|
548
|
+
include_usage: true,
|
|
549
|
+
},
|
|
382
550
|
}),
|
|
383
551
|
}));
|
|
384
552
|
});
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@inference-gateway/sdk",
|
|
3
|
-
"version": "0.6.
|
|
3
|
+
"version": "0.6.2",
|
|
4
4
|
"description": "An SDK written in Typescript for the [Inference Gateway](https://github.com/inference-gateway/inference-gateway).",
|
|
5
5
|
"main": "dist/src/index.js",
|
|
6
6
|
"types": "dist/src/index.d.ts",
|