@inference-gateway/sdk 0.6.1 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,12 @@
2
2
 
3
3
  All notable changes to this project will be documented in this file.
4
4
 
5
+ ## [0.6.2](https://github.com/inference-gateway/typescript-sdk/compare/v0.6.1...v0.6.2) (2025-04-30)
6
+
7
+ ### ♻️ Improvements
8
+
9
+ * Process also groq reasoning models properly ([#12](https://github.com/inference-gateway/typescript-sdk/issues/12)) ([51ce3bb](https://github.com/inference-gateway/typescript-sdk/commit/51ce3bbbbdf03947bb7928e8edc413b977ea092a))
10
+
5
11
  ## [0.6.1](https://github.com/inference-gateway/typescript-sdk/compare/v0.6.0...v0.6.1) (2025-04-28)
6
12
 
7
13
  ### ♻️ Improvements
@@ -177,6 +177,10 @@ class InferenceGatewayClient {
177
177
  if (reasoning_content !== undefined) {
178
178
  callbacks.onReasoning?.(reasoning_content);
179
179
  }
180
+ const reasoning = chunk.choices[0]?.delta?.reasoning;
181
+ if (reasoning !== undefined) {
182
+ callbacks.onReasoning?.(reasoning);
183
+ }
180
184
  const content = chunk.choices[0]?.delta?.content;
181
185
  if (content) {
182
186
  callbacks.onContent?.(content);
@@ -180,8 +180,8 @@ export interface components {
180
180
  retry?: number;
181
181
  };
182
182
  Endpoints: {
183
- models?: string;
184
- chat?: string;
183
+ models: string;
184
+ chat: string;
185
185
  };
186
186
  Error: {
187
187
  error?: string;
@@ -197,17 +197,19 @@ export interface components {
197
197
  content: string;
198
198
  tool_calls?: components['schemas']['ChatCompletionMessageToolCall'][];
199
199
  tool_call_id?: string;
200
- reasoning?: string;
200
+ /** @description The reasoning content of the chunk message. */
201
201
  reasoning_content?: string;
202
+ /** @description The reasoning of the chunk message. Same as reasoning_content. */
203
+ reasoning?: string;
202
204
  };
203
205
  /** @description Common model information */
204
206
  Model: {
205
- id?: string;
206
- object?: string;
207
+ id: string;
208
+ object: string;
207
209
  /** Format: int64 */
208
- created?: number;
209
- owned_by?: string;
210
- served_by?: components['schemas']['Provider'];
210
+ created: number;
211
+ owned_by: string;
212
+ served_by: components['schemas']['Provider'];
211
213
  };
212
214
  /** @description Response structure for listing models */
213
215
  ListModelsResponse: {
@@ -266,11 +268,8 @@ export interface components {
266
268
  /** @description Options for streaming response. Only set this when you set `stream: true`.
267
269
  * */
268
270
  ChatCompletionStreamOptions: {
269
- /**
270
- * @description If set, an additional chunk will be streamed before the `data: [DONE]` message. The `usage` field on this chunk shows the token usage statistics for the entire request, and the `choices` field will always be an empty array. All other chunks will also include a `usage` field, but with a null value.
271
- *
272
- * @default true
273
- */
271
+ /** @description If set, an additional chunk will be streamed before the `data: [DONE]` message. The `usage` field on this chunk shows the token usage statistics for the entire request, and the `choices` field will always be an empty array. All other chunks will also include a `usage` field, but with a null value.
272
+ * */
274
273
  include_usage: boolean;
275
274
  };
276
275
  CreateChatCompletionRequest: {
@@ -292,6 +291,10 @@ export interface components {
292
291
  /** @description A list of tools the model may call. Currently, only functions are supported as a tool. Use this to provide a list of functions the model may generate JSON inputs for. A max of 128 functions are supported.
293
292
  * */
294
293
  tools?: components['schemas']['ChatCompletionTool'][];
294
+ /** @description The format of the reasoning content. Can be `raw` or `parsed`.
295
+ * When specified as raw some reasoning models will output <think /> tags. When specified as parsed the model will output the reasoning under `reasoning` or `reasoning_content` attribute.
296
+ * */
297
+ reasoning_format?: string;
295
298
  };
296
299
  /** @description The function that the model called. */
297
300
  ChatCompletionMessageToolCallFunction: {
@@ -350,11 +353,13 @@ export interface components {
350
353
  /** @description A chat completion delta generated by streamed model responses. */
351
354
  ChatCompletionStreamResponseDelta: {
352
355
  /** @description The contents of the chunk message. */
353
- content?: string;
356
+ content: string;
354
357
  /** @description The reasoning content of the chunk message. */
355
358
  reasoning_content?: string;
359
+ /** @description The reasoning of the chunk message. Same as reasoning_content. */
360
+ reasoning?: string;
356
361
  tool_calls?: components['schemas']['ChatCompletionMessageToolCallChunk'][];
357
- role?: components['schemas']['MessageRole'];
362
+ role: components['schemas']['MessageRole'];
358
363
  /** @description The refusal message generated by the model. */
359
364
  refusal?: string;
360
365
  };
@@ -418,6 +423,10 @@ export interface components {
418
423
  /** @description The object type, which is always `chat.completion.chunk`. */
419
424
  object: string;
420
425
  usage?: components['schemas']['CompletionUsage'];
426
+ /** @description The format of the reasoning content. Can be `raw` or `parsed`.
427
+ * When specified as raw some reasoning models will output <think /> tags. When specified as parsed the model will output the reasoning under reasoning_content.
428
+ * */
429
+ reasoning_format?: string;
421
430
  };
422
431
  Config: unknown;
423
432
  };
@@ -57,6 +57,7 @@ describe('InferenceGatewayClient', () => {
57
57
  object: 'model',
58
58
  created: 1686935002,
59
59
  owned_by: 'openai',
60
+ served_by: generated_1.Provider.openai,
60
61
  },
61
62
  ],
62
63
  };
@@ -391,6 +392,164 @@ describe('InferenceGatewayClient', () => {
391
392
  }),
392
393
  }));
393
394
  });
395
+ it('should handle streaming chat completions with reasoning field', async () => {
396
+ const mockRequest = {
397
+ model: 'groq/deepseek-distilled-llama-3.1-70b',
398
+ messages: [{ role: generated_1.MessageRole.user, content: 'Hello' }],
399
+ };
400
+ const mockStream = new web_1.TransformStream();
401
+ const writer = mockStream.writable.getWriter();
402
+ const encoder = new node_util_1.TextEncoder();
403
+ mockFetch.mockResolvedValueOnce({
404
+ ok: true,
405
+ body: mockStream.readable,
406
+ });
407
+ const callbacks = {
408
+ onOpen: jest.fn(),
409
+ onChunk: jest.fn(),
410
+ onReasoning: jest.fn(),
411
+ onContent: jest.fn(),
412
+ onFinish: jest.fn(),
413
+ };
414
+ const streamPromise = client.streamChatCompletion(mockRequest, callbacks);
415
+ await writer.write(encoder.encode('data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"groq/deepseek-distilled-llama-3.1-70b","choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":null}]}\n\n' +
416
+ 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"groq/deepseek-distilled-llama-3.1-70b","choices":[{"index":0,"delta":{"content":"","reasoning":"Let me"},"finish_reason":null}]}\n\n' +
417
+ 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"groq/deepseek-distilled-llama-3.1-70b","choices":[{"index":0,"delta":{"content":"","reasoning":" think"},"finish_reason":null}]}\n\n' +
418
+ 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"groq/deepseek-distilled-llama-3.1-70b","choices":[{"index":0,"delta":{"content":"","reasoning":" about"},"finish_reason":"stop"}]}\n\n' +
419
+ 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"groq/deepseek-distilled-llama-3.1-70b","choices":[{"index":0,"delta":{"content":"","reasoning":" this"},"finish_reason":"stop"}]}\n\n' +
420
+ 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"groq/deepseek-distilled-llama-3.1-70b","choices":[{"index":0,"delta":{"content":"Hello"},"finish_reason":null}]}\n\n' +
421
+ 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"groq/deepseek-distilled-llama-3.1-70b","choices":[{"index":0,"delta":{"content":"!"},"finish_reason":null}]}\n\n' +
422
+ 'data: [DONE]\n\n'));
423
+ await writer.close();
424
+ await streamPromise;
425
+ expect(callbacks.onOpen).toHaveBeenCalledTimes(1);
426
+ expect(callbacks.onChunk).toHaveBeenCalledTimes(7);
427
+ expect(callbacks.onReasoning).toHaveBeenCalledTimes(4);
428
+ expect(callbacks.onReasoning).toHaveBeenCalledWith('Let me');
429
+ expect(callbacks.onReasoning).toHaveBeenCalledWith(' think');
430
+ expect(callbacks.onReasoning).toHaveBeenCalledWith(' about');
431
+ expect(callbacks.onReasoning).toHaveBeenCalledWith(' this');
432
+ expect(callbacks.onContent).toHaveBeenCalledTimes(2);
433
+ expect(callbacks.onContent).toHaveBeenCalledWith('Hello');
434
+ expect(callbacks.onContent).toHaveBeenCalledWith('!');
435
+ expect(callbacks.onFinish).toHaveBeenCalledTimes(1);
436
+ expect(mockFetch).toHaveBeenCalledWith('http://localhost:8080/v1/chat/completions', expect.objectContaining({
437
+ method: 'POST',
438
+ body: JSON.stringify({
439
+ ...mockRequest,
440
+ stream: true,
441
+ stream_options: {
442
+ include_usage: true,
443
+ },
444
+ }),
445
+ }));
446
+ });
447
+ it('should handle streaming chat completions with reasoning_content (DeepSeek)', async () => {
448
+ const mockRequest = {
449
+ model: 'deepseek/deepseek-reasoner',
450
+ messages: [{ role: generated_1.MessageRole.user, content: 'Hello' }],
451
+ };
452
+ const mockStream = new web_1.TransformStream();
453
+ const writer = mockStream.writable.getWriter();
454
+ const encoder = new node_util_1.TextEncoder();
455
+ mockFetch.mockResolvedValueOnce({
456
+ ok: true,
457
+ body: mockStream.readable,
458
+ });
459
+ const callbacks = {
460
+ onOpen: jest.fn(),
461
+ onChunk: jest.fn(),
462
+ onReasoning: jest.fn(),
463
+ onContent: jest.fn(),
464
+ onFinish: jest.fn(),
465
+ };
466
+ const streamPromise = client.streamChatCompletion(mockRequest, callbacks);
467
+ await writer.write(encoder.encode('data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"deepseek/deepseek-reasoner","choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":null}]}\n\n' +
468
+ 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"deepseek/deepseek-reasoner","choices":[{"index":0,"delta":{"content":"","reasoning_content":"This"},"finish_reason":null}]}\n\n' +
469
+ 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"deepseek/deepseek-reasoner","choices":[{"index":0,"delta":{"content":"","reasoning_content":" is"},"finish_reason":null}]}\n\n' +
470
+ 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"deepseek/deepseek-reasoner","choices":[{"index":0,"delta":{"content":"","reasoning_content":" a"},"finish_reason":"stop"}]}\n\n' +
471
+ 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"deepseek/deepseek-reasoner","choices":[{"index":0,"delta":{"content":"","reasoning_content":" reasoning"},"finish_reason":"stop"}]}\n\n' +
472
+ 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"deepseek/deepseek-reasoner","choices":[{"index":0,"delta":{"content":"","reasoning_content":" content"},"finish_reason":"stop"}]}\n\n' +
473
+ 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"deepseek/deepseek-reasoner","choices":[{"index":0,"delta":{"content":"Hello"},"finish_reason":null}]}\n\n' +
474
+ 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"deepseek/deepseek-reasoner","choices":[{"index":0,"delta":{"content":"!"},"finish_reason":null}]}\n\n' +
475
+ 'data: [DONE]\n\n'));
476
+ await writer.close();
477
+ await streamPromise;
478
+ expect(callbacks.onOpen).toHaveBeenCalledTimes(1);
479
+ expect(callbacks.onChunk).toHaveBeenCalledTimes(8);
480
+ expect(callbacks.onReasoning).toHaveBeenCalledTimes(5);
481
+ expect(callbacks.onReasoning).toHaveBeenCalledWith('This');
482
+ expect(callbacks.onReasoning).toHaveBeenCalledWith(' is');
483
+ expect(callbacks.onReasoning).toHaveBeenCalledWith(' a');
484
+ expect(callbacks.onReasoning).toHaveBeenCalledWith(' reasoning');
485
+ expect(callbacks.onReasoning).toHaveBeenCalledWith(' content');
486
+ expect(callbacks.onContent).toHaveBeenCalledTimes(2);
487
+ expect(callbacks.onContent).toHaveBeenCalledWith('Hello');
488
+ expect(callbacks.onContent).toHaveBeenCalledWith('!');
489
+ expect(callbacks.onFinish).toHaveBeenCalledTimes(1);
490
+ expect(mockFetch).toHaveBeenCalledWith('http://localhost:8080/v1/chat/completions', expect.objectContaining({
491
+ method: 'POST',
492
+ body: JSON.stringify({
493
+ ...mockRequest,
494
+ stream: true,
495
+ stream_options: {
496
+ include_usage: true,
497
+ },
498
+ }),
499
+ }));
500
+ });
501
+ it('should handle streaming chat completions with reasoning field (Groq)', async () => {
502
+ const mockRequest = {
503
+ model: 'llama-3.1-70b-versatile',
504
+ messages: [{ role: generated_1.MessageRole.user, content: 'Hello' }],
505
+ };
506
+ const mockStream = new web_1.TransformStream();
507
+ const writer = mockStream.writable.getWriter();
508
+ const encoder = new node_util_1.TextEncoder();
509
+ mockFetch.mockResolvedValueOnce({
510
+ ok: true,
511
+ body: mockStream.readable,
512
+ });
513
+ const callbacks = {
514
+ onOpen: jest.fn(),
515
+ onChunk: jest.fn(),
516
+ onReasoning: jest.fn(),
517
+ onContent: jest.fn(),
518
+ onFinish: jest.fn(),
519
+ };
520
+ const streamPromise = client.streamChatCompletion(mockRequest, callbacks);
521
+ await writer.write(encoder.encode('data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"llama-3.1-70b-versatile","choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":null}]}\n\n' +
522
+ 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"llama-3.1-70b-versatile","choices":[{"index":0,"delta":{"content":"","reasoning":"Let me"},"finish_reason":null}]}\n\n' +
523
+ 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"llama-3.1-70b-versatile","choices":[{"index":0,"delta":{"content":"","reasoning":" think"},"finish_reason":null}]}\n\n' +
524
+ 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"llama-3.1-70b-versatile","choices":[{"index":0,"delta":{"content":"","reasoning":" about"},"finish_reason":"stop"}]}\n\n' +
525
+ 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"llama-3.1-70b-versatile","choices":[{"index":0,"delta":{"content":"","reasoning":" this"},"finish_reason":"stop"}]}\n\n' +
526
+ 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"llama-3.1-70b-versatile","choices":[{"index":0,"delta":{"content":"Hello"},"finish_reason":null}]}\n\n' +
527
+ 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"llama-3.1-70b-versatile","choices":[{"index":0,"delta":{"content":"!"},"finish_reason":null}]}\n\n' +
528
+ 'data: [DONE]\n\n'));
529
+ await writer.close();
530
+ await streamPromise;
531
+ expect(callbacks.onOpen).toHaveBeenCalledTimes(1);
532
+ expect(callbacks.onChunk).toHaveBeenCalledTimes(7);
533
+ expect(callbacks.onReasoning).toHaveBeenCalledTimes(4);
534
+ expect(callbacks.onReasoning).toHaveBeenCalledWith('Let me');
535
+ expect(callbacks.onReasoning).toHaveBeenCalledWith(' think');
536
+ expect(callbacks.onReasoning).toHaveBeenCalledWith(' about');
537
+ expect(callbacks.onReasoning).toHaveBeenCalledWith(' this');
538
+ expect(callbacks.onContent).toHaveBeenCalledTimes(2);
539
+ expect(callbacks.onContent).toHaveBeenCalledWith('Hello');
540
+ expect(callbacks.onContent).toHaveBeenCalledWith('!');
541
+ expect(callbacks.onFinish).toHaveBeenCalledTimes(1);
542
+ expect(mockFetch).toHaveBeenCalledWith('http://localhost:8080/v1/chat/completions', expect.objectContaining({
543
+ method: 'POST',
544
+ body: JSON.stringify({
545
+ ...mockRequest,
546
+ stream: true,
547
+ stream_options: {
548
+ include_usage: true,
549
+ },
550
+ }),
551
+ }));
552
+ });
394
553
  });
395
554
  describe('proxy', () => {
396
555
  it('should proxy requests to a specific provider', async () => {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@inference-gateway/sdk",
3
- "version": "0.6.1",
3
+ "version": "0.6.2",
4
4
  "description": "An SDK written in Typescript for the [Inference Gateway](https://github.com/inference-gateway/inference-gateway).",
5
5
  "main": "dist/src/index.js",
6
6
  "types": "dist/src/index.d.ts",