@inference-gateway/sdk 0.6.1 → 0.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +6 -0
- package/dist/src/client.js +4 -0
- package/dist/src/types/generated/index.d.ts +24 -15
- package/dist/tests/client.test.js +159 -0
- package/package.json +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,12 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to this project will be documented in this file.
|
|
4
4
|
|
|
5
|
+
## [0.6.2](https://github.com/inference-gateway/typescript-sdk/compare/v0.6.1...v0.6.2) (2025-04-30)
|
|
6
|
+
|
|
7
|
+
### ♻️ Improvements
|
|
8
|
+
|
|
9
|
+
* Process also groq reasoning models properly ([#12](https://github.com/inference-gateway/typescript-sdk/issues/12)) ([51ce3bb](https://github.com/inference-gateway/typescript-sdk/commit/51ce3bbbbdf03947bb7928e8edc413b977ea092a))
|
|
10
|
+
|
|
5
11
|
## [0.6.1](https://github.com/inference-gateway/typescript-sdk/compare/v0.6.0...v0.6.1) (2025-04-28)
|
|
6
12
|
|
|
7
13
|
### ♻️ Improvements
|
package/dist/src/client.js
CHANGED
|
@@ -177,6 +177,10 @@ class InferenceGatewayClient {
|
|
|
177
177
|
if (reasoning_content !== undefined) {
|
|
178
178
|
callbacks.onReasoning?.(reasoning_content);
|
|
179
179
|
}
|
|
180
|
+
const reasoning = chunk.choices[0]?.delta?.reasoning;
|
|
181
|
+
if (reasoning !== undefined) {
|
|
182
|
+
callbacks.onReasoning?.(reasoning);
|
|
183
|
+
}
|
|
180
184
|
const content = chunk.choices[0]?.delta?.content;
|
|
181
185
|
if (content) {
|
|
182
186
|
callbacks.onContent?.(content);
|
|
@@ -180,8 +180,8 @@ export interface components {
|
|
|
180
180
|
retry?: number;
|
|
181
181
|
};
|
|
182
182
|
Endpoints: {
|
|
183
|
-
models
|
|
184
|
-
chat
|
|
183
|
+
models: string;
|
|
184
|
+
chat: string;
|
|
185
185
|
};
|
|
186
186
|
Error: {
|
|
187
187
|
error?: string;
|
|
@@ -197,17 +197,19 @@ export interface components {
|
|
|
197
197
|
content: string;
|
|
198
198
|
tool_calls?: components['schemas']['ChatCompletionMessageToolCall'][];
|
|
199
199
|
tool_call_id?: string;
|
|
200
|
-
reasoning
|
|
200
|
+
/** @description The reasoning content of the chunk message. */
|
|
201
201
|
reasoning_content?: string;
|
|
202
|
+
/** @description The reasoning of the chunk message. Same as reasoning_content. */
|
|
203
|
+
reasoning?: string;
|
|
202
204
|
};
|
|
203
205
|
/** @description Common model information */
|
|
204
206
|
Model: {
|
|
205
|
-
id
|
|
206
|
-
object
|
|
207
|
+
id: string;
|
|
208
|
+
object: string;
|
|
207
209
|
/** Format: int64 */
|
|
208
|
-
created
|
|
209
|
-
owned_by
|
|
210
|
-
served_by
|
|
210
|
+
created: number;
|
|
211
|
+
owned_by: string;
|
|
212
|
+
served_by: components['schemas']['Provider'];
|
|
211
213
|
};
|
|
212
214
|
/** @description Response structure for listing models */
|
|
213
215
|
ListModelsResponse: {
|
|
@@ -266,11 +268,8 @@ export interface components {
|
|
|
266
268
|
/** @description Options for streaming response. Only set this when you set `stream: true`.
|
|
267
269
|
* */
|
|
268
270
|
ChatCompletionStreamOptions: {
|
|
269
|
-
/**
|
|
270
|
-
*
|
|
271
|
-
*
|
|
272
|
-
* @default true
|
|
273
|
-
*/
|
|
271
|
+
/** @description If set, an additional chunk will be streamed before the `data: [DONE]` message. The `usage` field on this chunk shows the token usage statistics for the entire request, and the `choices` field will always be an empty array. All other chunks will also include a `usage` field, but with a null value.
|
|
272
|
+
* */
|
|
274
273
|
include_usage: boolean;
|
|
275
274
|
};
|
|
276
275
|
CreateChatCompletionRequest: {
|
|
@@ -292,6 +291,10 @@ export interface components {
|
|
|
292
291
|
/** @description A list of tools the model may call. Currently, only functions are supported as a tool. Use this to provide a list of functions the model may generate JSON inputs for. A max of 128 functions are supported.
|
|
293
292
|
* */
|
|
294
293
|
tools?: components['schemas']['ChatCompletionTool'][];
|
|
294
|
+
/** @description The format of the reasoning content. Can be `raw` or `parsed`.
|
|
295
|
+
* When specified as raw some reasoning models will output <think /> tags. When specified as parsed the model will output the reasoning under `reasoning` or `reasoning_content` attribute.
|
|
296
|
+
* */
|
|
297
|
+
reasoning_format?: string;
|
|
295
298
|
};
|
|
296
299
|
/** @description The function that the model called. */
|
|
297
300
|
ChatCompletionMessageToolCallFunction: {
|
|
@@ -350,11 +353,13 @@ export interface components {
|
|
|
350
353
|
/** @description A chat completion delta generated by streamed model responses. */
|
|
351
354
|
ChatCompletionStreamResponseDelta: {
|
|
352
355
|
/** @description The contents of the chunk message. */
|
|
353
|
-
content
|
|
356
|
+
content: string;
|
|
354
357
|
/** @description The reasoning content of the chunk message. */
|
|
355
358
|
reasoning_content?: string;
|
|
359
|
+
/** @description The reasoning of the chunk message. Same as reasoning_content. */
|
|
360
|
+
reasoning?: string;
|
|
356
361
|
tool_calls?: components['schemas']['ChatCompletionMessageToolCallChunk'][];
|
|
357
|
-
role
|
|
362
|
+
role: components['schemas']['MessageRole'];
|
|
358
363
|
/** @description The refusal message generated by the model. */
|
|
359
364
|
refusal?: string;
|
|
360
365
|
};
|
|
@@ -418,6 +423,10 @@ export interface components {
|
|
|
418
423
|
/** @description The object type, which is always `chat.completion.chunk`. */
|
|
419
424
|
object: string;
|
|
420
425
|
usage?: components['schemas']['CompletionUsage'];
|
|
426
|
+
/** @description The format of the reasoning content. Can be `raw` or `parsed`.
|
|
427
|
+
* When specified as raw some reasoning models will output <think /> tags. When specified as parsed the model will output the reasoning under reasoning_content.
|
|
428
|
+
* */
|
|
429
|
+
reasoning_format?: string;
|
|
421
430
|
};
|
|
422
431
|
Config: unknown;
|
|
423
432
|
};
|
|
@@ -57,6 +57,7 @@ describe('InferenceGatewayClient', () => {
|
|
|
57
57
|
object: 'model',
|
|
58
58
|
created: 1686935002,
|
|
59
59
|
owned_by: 'openai',
|
|
60
|
+
served_by: generated_1.Provider.openai,
|
|
60
61
|
},
|
|
61
62
|
],
|
|
62
63
|
};
|
|
@@ -391,6 +392,164 @@ describe('InferenceGatewayClient', () => {
|
|
|
391
392
|
}),
|
|
392
393
|
}));
|
|
393
394
|
});
|
|
395
|
+
it('should handle streaming chat completions with reasoning field', async () => {
|
|
396
|
+
const mockRequest = {
|
|
397
|
+
model: 'groq/deepseek-distilled-llama-3.1-70b',
|
|
398
|
+
messages: [{ role: generated_1.MessageRole.user, content: 'Hello' }],
|
|
399
|
+
};
|
|
400
|
+
const mockStream = new web_1.TransformStream();
|
|
401
|
+
const writer = mockStream.writable.getWriter();
|
|
402
|
+
const encoder = new node_util_1.TextEncoder();
|
|
403
|
+
mockFetch.mockResolvedValueOnce({
|
|
404
|
+
ok: true,
|
|
405
|
+
body: mockStream.readable,
|
|
406
|
+
});
|
|
407
|
+
const callbacks = {
|
|
408
|
+
onOpen: jest.fn(),
|
|
409
|
+
onChunk: jest.fn(),
|
|
410
|
+
onReasoning: jest.fn(),
|
|
411
|
+
onContent: jest.fn(),
|
|
412
|
+
onFinish: jest.fn(),
|
|
413
|
+
};
|
|
414
|
+
const streamPromise = client.streamChatCompletion(mockRequest, callbacks);
|
|
415
|
+
await writer.write(encoder.encode('data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"groq/deepseek-distilled-llama-3.1-70b","choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":null}]}\n\n' +
|
|
416
|
+
'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"groq/deepseek-distilled-llama-3.1-70b","choices":[{"index":0,"delta":{"content":"","reasoning":"Let me"},"finish_reason":null}]}\n\n' +
|
|
417
|
+
'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"groq/deepseek-distilled-llama-3.1-70b","choices":[{"index":0,"delta":{"content":"","reasoning":" think"},"finish_reason":null}]}\n\n' +
|
|
418
|
+
'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"groq/deepseek-distilled-llama-3.1-70b","choices":[{"index":0,"delta":{"content":"","reasoning":" about"},"finish_reason":"stop"}]}\n\n' +
|
|
419
|
+
'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"groq/deepseek-distilled-llama-3.1-70b","choices":[{"index":0,"delta":{"content":"","reasoning":" this"},"finish_reason":"stop"}]}\n\n' +
|
|
420
|
+
'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"groq/deepseek-distilled-llama-3.1-70b","choices":[{"index":0,"delta":{"content":"Hello"},"finish_reason":null}]}\n\n' +
|
|
421
|
+
'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"groq/deepseek-distilled-llama-3.1-70b","choices":[{"index":0,"delta":{"content":"!"},"finish_reason":null}]}\n\n' +
|
|
422
|
+
'data: [DONE]\n\n'));
|
|
423
|
+
await writer.close();
|
|
424
|
+
await streamPromise;
|
|
425
|
+
expect(callbacks.onOpen).toHaveBeenCalledTimes(1);
|
|
426
|
+
expect(callbacks.onChunk).toHaveBeenCalledTimes(7);
|
|
427
|
+
expect(callbacks.onReasoning).toHaveBeenCalledTimes(4);
|
|
428
|
+
expect(callbacks.onReasoning).toHaveBeenCalledWith('Let me');
|
|
429
|
+
expect(callbacks.onReasoning).toHaveBeenCalledWith(' think');
|
|
430
|
+
expect(callbacks.onReasoning).toHaveBeenCalledWith(' about');
|
|
431
|
+
expect(callbacks.onReasoning).toHaveBeenCalledWith(' this');
|
|
432
|
+
expect(callbacks.onContent).toHaveBeenCalledTimes(2);
|
|
433
|
+
expect(callbacks.onContent).toHaveBeenCalledWith('Hello');
|
|
434
|
+
expect(callbacks.onContent).toHaveBeenCalledWith('!');
|
|
435
|
+
expect(callbacks.onFinish).toHaveBeenCalledTimes(1);
|
|
436
|
+
expect(mockFetch).toHaveBeenCalledWith('http://localhost:8080/v1/chat/completions', expect.objectContaining({
|
|
437
|
+
method: 'POST',
|
|
438
|
+
body: JSON.stringify({
|
|
439
|
+
...mockRequest,
|
|
440
|
+
stream: true,
|
|
441
|
+
stream_options: {
|
|
442
|
+
include_usage: true,
|
|
443
|
+
},
|
|
444
|
+
}),
|
|
445
|
+
}));
|
|
446
|
+
});
|
|
447
|
+
it('should handle streaming chat completions with reasoning_content (DeepSeek)', async () => {
|
|
448
|
+
const mockRequest = {
|
|
449
|
+
model: 'deepseek/deepseek-reasoner',
|
|
450
|
+
messages: [{ role: generated_1.MessageRole.user, content: 'Hello' }],
|
|
451
|
+
};
|
|
452
|
+
const mockStream = new web_1.TransformStream();
|
|
453
|
+
const writer = mockStream.writable.getWriter();
|
|
454
|
+
const encoder = new node_util_1.TextEncoder();
|
|
455
|
+
mockFetch.mockResolvedValueOnce({
|
|
456
|
+
ok: true,
|
|
457
|
+
body: mockStream.readable,
|
|
458
|
+
});
|
|
459
|
+
const callbacks = {
|
|
460
|
+
onOpen: jest.fn(),
|
|
461
|
+
onChunk: jest.fn(),
|
|
462
|
+
onReasoning: jest.fn(),
|
|
463
|
+
onContent: jest.fn(),
|
|
464
|
+
onFinish: jest.fn(),
|
|
465
|
+
};
|
|
466
|
+
const streamPromise = client.streamChatCompletion(mockRequest, callbacks);
|
|
467
|
+
await writer.write(encoder.encode('data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"deepseek/deepseek-reasoner","choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":null}]}\n\n' +
|
|
468
|
+
'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"deepseek/deepseek-reasoner","choices":[{"index":0,"delta":{"content":"","reasoning_content":"This"},"finish_reason":null}]}\n\n' +
|
|
469
|
+
'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"deepseek/deepseek-reasoner","choices":[{"index":0,"delta":{"content":"","reasoning_content":" is"},"finish_reason":null}]}\n\n' +
|
|
470
|
+
'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"deepseek/deepseek-reasoner","choices":[{"index":0,"delta":{"content":"","reasoning_content":" a"},"finish_reason":"stop"}]}\n\n' +
|
|
471
|
+
'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"deepseek/deepseek-reasoner","choices":[{"index":0,"delta":{"content":"","reasoning_content":" reasoning"},"finish_reason":"stop"}]}\n\n' +
|
|
472
|
+
'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"deepseek/deepseek-reasoner","choices":[{"index":0,"delta":{"content":"","reasoning_content":" content"},"finish_reason":"stop"}]}\n\n' +
|
|
473
|
+
'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"deepseek/deepseek-reasoner","choices":[{"index":0,"delta":{"content":"Hello"},"finish_reason":null}]}\n\n' +
|
|
474
|
+
'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"deepseek/deepseek-reasoner","choices":[{"index":0,"delta":{"content":"!"},"finish_reason":null}]}\n\n' +
|
|
475
|
+
'data: [DONE]\n\n'));
|
|
476
|
+
await writer.close();
|
|
477
|
+
await streamPromise;
|
|
478
|
+
expect(callbacks.onOpen).toHaveBeenCalledTimes(1);
|
|
479
|
+
expect(callbacks.onChunk).toHaveBeenCalledTimes(8);
|
|
480
|
+
expect(callbacks.onReasoning).toHaveBeenCalledTimes(5);
|
|
481
|
+
expect(callbacks.onReasoning).toHaveBeenCalledWith('This');
|
|
482
|
+
expect(callbacks.onReasoning).toHaveBeenCalledWith(' is');
|
|
483
|
+
expect(callbacks.onReasoning).toHaveBeenCalledWith(' a');
|
|
484
|
+
expect(callbacks.onReasoning).toHaveBeenCalledWith(' reasoning');
|
|
485
|
+
expect(callbacks.onReasoning).toHaveBeenCalledWith(' content');
|
|
486
|
+
expect(callbacks.onContent).toHaveBeenCalledTimes(2);
|
|
487
|
+
expect(callbacks.onContent).toHaveBeenCalledWith('Hello');
|
|
488
|
+
expect(callbacks.onContent).toHaveBeenCalledWith('!');
|
|
489
|
+
expect(callbacks.onFinish).toHaveBeenCalledTimes(1);
|
|
490
|
+
expect(mockFetch).toHaveBeenCalledWith('http://localhost:8080/v1/chat/completions', expect.objectContaining({
|
|
491
|
+
method: 'POST',
|
|
492
|
+
body: JSON.stringify({
|
|
493
|
+
...mockRequest,
|
|
494
|
+
stream: true,
|
|
495
|
+
stream_options: {
|
|
496
|
+
include_usage: true,
|
|
497
|
+
},
|
|
498
|
+
}),
|
|
499
|
+
}));
|
|
500
|
+
});
|
|
501
|
+
it('should handle streaming chat completions with reasoning field (Groq)', async () => {
|
|
502
|
+
const mockRequest = {
|
|
503
|
+
model: 'llama-3.1-70b-versatile',
|
|
504
|
+
messages: [{ role: generated_1.MessageRole.user, content: 'Hello' }],
|
|
505
|
+
};
|
|
506
|
+
const mockStream = new web_1.TransformStream();
|
|
507
|
+
const writer = mockStream.writable.getWriter();
|
|
508
|
+
const encoder = new node_util_1.TextEncoder();
|
|
509
|
+
mockFetch.mockResolvedValueOnce({
|
|
510
|
+
ok: true,
|
|
511
|
+
body: mockStream.readable,
|
|
512
|
+
});
|
|
513
|
+
const callbacks = {
|
|
514
|
+
onOpen: jest.fn(),
|
|
515
|
+
onChunk: jest.fn(),
|
|
516
|
+
onReasoning: jest.fn(),
|
|
517
|
+
onContent: jest.fn(),
|
|
518
|
+
onFinish: jest.fn(),
|
|
519
|
+
};
|
|
520
|
+
const streamPromise = client.streamChatCompletion(mockRequest, callbacks);
|
|
521
|
+
await writer.write(encoder.encode('data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"llama-3.1-70b-versatile","choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":null}]}\n\n' +
|
|
522
|
+
'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"llama-3.1-70b-versatile","choices":[{"index":0,"delta":{"content":"","reasoning":"Let me"},"finish_reason":null}]}\n\n' +
|
|
523
|
+
'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"llama-3.1-70b-versatile","choices":[{"index":0,"delta":{"content":"","reasoning":" think"},"finish_reason":null}]}\n\n' +
|
|
524
|
+
'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"llama-3.1-70b-versatile","choices":[{"index":0,"delta":{"content":"","reasoning":" about"},"finish_reason":"stop"}]}\n\n' +
|
|
525
|
+
'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"llama-3.1-70b-versatile","choices":[{"index":0,"delta":{"content":"","reasoning":" this"},"finish_reason":"stop"}]}\n\n' +
|
|
526
|
+
'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"llama-3.1-70b-versatile","choices":[{"index":0,"delta":{"content":"Hello"},"finish_reason":null}]}\n\n' +
|
|
527
|
+
'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"llama-3.1-70b-versatile","choices":[{"index":0,"delta":{"content":"!"},"finish_reason":null}]}\n\n' +
|
|
528
|
+
'data: [DONE]\n\n'));
|
|
529
|
+
await writer.close();
|
|
530
|
+
await streamPromise;
|
|
531
|
+
expect(callbacks.onOpen).toHaveBeenCalledTimes(1);
|
|
532
|
+
expect(callbacks.onChunk).toHaveBeenCalledTimes(7);
|
|
533
|
+
expect(callbacks.onReasoning).toHaveBeenCalledTimes(4);
|
|
534
|
+
expect(callbacks.onReasoning).toHaveBeenCalledWith('Let me');
|
|
535
|
+
expect(callbacks.onReasoning).toHaveBeenCalledWith(' think');
|
|
536
|
+
expect(callbacks.onReasoning).toHaveBeenCalledWith(' about');
|
|
537
|
+
expect(callbacks.onReasoning).toHaveBeenCalledWith(' this');
|
|
538
|
+
expect(callbacks.onContent).toHaveBeenCalledTimes(2);
|
|
539
|
+
expect(callbacks.onContent).toHaveBeenCalledWith('Hello');
|
|
540
|
+
expect(callbacks.onContent).toHaveBeenCalledWith('!');
|
|
541
|
+
expect(callbacks.onFinish).toHaveBeenCalledTimes(1);
|
|
542
|
+
expect(mockFetch).toHaveBeenCalledWith('http://localhost:8080/v1/chat/completions', expect.objectContaining({
|
|
543
|
+
method: 'POST',
|
|
544
|
+
body: JSON.stringify({
|
|
545
|
+
...mockRequest,
|
|
546
|
+
stream: true,
|
|
547
|
+
stream_options: {
|
|
548
|
+
include_usage: true,
|
|
549
|
+
},
|
|
550
|
+
}),
|
|
551
|
+
}));
|
|
552
|
+
});
|
|
394
553
|
});
|
|
395
554
|
describe('proxy', () => {
|
|
396
555
|
it('should proxy requests to a specific provider', async () => {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@inference-gateway/sdk",
|
|
3
|
-
"version": "0.6.
|
|
3
|
+
"version": "0.6.2",
|
|
4
4
|
"description": "An SDK written in Typescript for the [Inference Gateway](https://github.com/inference-gateway/inference-gateway).",
|
|
5
5
|
"main": "dist/src/index.js",
|
|
6
6
|
"types": "dist/src/index.d.ts",
|