npm - @inference-gateway/sdk - Versions diffs - 0.6.1 → 0.6.2 - Mend

@inference-gateway/sdk 0.6.1 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/CHANGELOG.md +6 -0
package/dist/src/client.js +4 -0
package/dist/src/types/generated/index.d.ts +24 -15
package/dist/tests/client.test.js +159 -0
package/package.json +1 -1

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,12 @@
 All notable changes to this project will be documented in this file.
+## [0.6.2](https://github.com/inference-gateway/typescript-sdk/compare/v0.6.1...v0.6.2) (2025-04-30)
+### ♻️ Improvements
+* Process also groq reasoning models properly ([#12](https://github.com/inference-gateway/typescript-sdk/issues/12)) ([51ce3bb](https://github.com/inference-gateway/typescript-sdk/commit/51ce3bbbbdf03947bb7928e8edc413b977ea092a))
 ## [0.6.1](https://github.com/inference-gateway/typescript-sdk/compare/v0.6.0...v0.6.1) (2025-04-28)
 ### ♻️ Improvements

package/dist/src/client.js CHANGED Viewed

@@ -177,6 +177,10 @@ class InferenceGatewayClient {
                             if (reasoning_content !== undefined) {
                                 callbacks.onReasoning?.(reasoning_content);
                             }
+                            const reasoning = chunk.choices[0]?.delta?.reasoning;
+                            if (reasoning !== undefined) {
+                                callbacks.onReasoning?.(reasoning);
+                            }
                             const content = chunk.choices[0]?.delta?.content;
                             if (content) {
                                 callbacks.onContent?.(content);

package/dist/src/types/generated/index.d.ts CHANGED Viewed

@@ -180,8 +180,8 @@ export interface components {
             retry?: number;
         };
         Endpoints: {
-            models?: string;
-            chat?: string;
+            models: string;
+            chat: string;
         };
         Error: {
             error?: string;
@@ -197,17 +197,19 @@ export interface components {
             content: string;
             tool_calls?: components['schemas']['ChatCompletionMessageToolCall'][];
             tool_call_id?: string;
-            reasoning?: string;
+            /** @description The reasoning content of the chunk message. */
             reasoning_content?: string;
+            /** @description The reasoning of the chunk message. Same as reasoning_content. */
+            reasoning?: string;
         };
         /** @description Common model information */
         Model: {
-            id?: string;
-            object?: string;
+            id: string;
+            object: string;
             /** Format: int64 */
-            created?: number;
-            owned_by?: string;
-            served_by?: components['schemas']['Provider'];
+            created: number;
+            owned_by: string;
+            served_by: components['schemas']['Provider'];
         };
         /** @description Response structure for listing models */
         ListModelsResponse: {
@@ -266,11 +268,8 @@ export interface components {
         /** @description Options for streaming response. Only set this when you set `stream: true`.
          *      */
         ChatCompletionStreamOptions: {
-            /**
-             * @description If set, an additional chunk will be streamed before the `data: [DONE]` message. The `usage` field on this chunk shows the token usage statistics for the entire request, and the `choices` field will always be an empty array. All other chunks will also include a `usage` field, but with a null value.
-             *
-             * @default true
-             */
+            /** @description If set, an additional chunk will be streamed before the `data: [DONE]` message. The `usage` field on this chunk shows the token usage statistics for the entire request, and the `choices` field will always be an empty array. All other chunks will also include a `usage` field, but with a null value.
+             *      */
             include_usage: boolean;
         };
         CreateChatCompletionRequest: {
@@ -292,6 +291,10 @@ export interface components {
             /** @description A list of tools the model may call. Currently, only functions are supported as a tool. Use this to provide a list of functions the model may generate JSON inputs for. A max of 128 functions are supported.
              *      */
             tools?: components['schemas']['ChatCompletionTool'][];
+            /** @description The format of the reasoning content. Can be `raw` or `parsed`.
+             *     When specified as raw some reasoning models will output <think /> tags. When specified as parsed the model will output the reasoning under  `reasoning` or `reasoning_content` attribute.
+             *      */
+            reasoning_format?: string;
         };
         /** @description The function that the model called. */
         ChatCompletionMessageToolCallFunction: {
@@ -350,11 +353,13 @@ export interface components {
         /** @description A chat completion delta generated by streamed model responses. */
         ChatCompletionStreamResponseDelta: {
             /** @description The contents of the chunk message. */
-            content?: string;
+            content: string;
             /** @description The reasoning content of the chunk message. */
             reasoning_content?: string;
+            /** @description The reasoning of the chunk message. Same as reasoning_content. */
+            reasoning?: string;
             tool_calls?: components['schemas']['ChatCompletionMessageToolCallChunk'][];
-            role?: components['schemas']['MessageRole'];
+            role: components['schemas']['MessageRole'];
             /** @description The refusal message generated by the model. */
             refusal?: string;
         };
@@ -418,6 +423,10 @@ export interface components {
             /** @description The object type, which is always `chat.completion.chunk`. */
             object: string;
             usage?: components['schemas']['CompletionUsage'];
+            /** @description The format of the reasoning content. Can be `raw` or `parsed`.
+             *     When specified as raw some reasoning models will output <think /> tags. When specified as parsed the model will output the reasoning under reasoning_content.
+             *      */
+            reasoning_format?: string;
         };
         Config: unknown;
     };

package/dist/tests/client.test.js CHANGED Viewed

@@ -57,6 +57,7 @@ describe('InferenceGatewayClient', () => {
                         object: 'model',
                         created: 1686935002,
                         owned_by: 'openai',
+                        served_by: generated_1.Provider.openai,
                     },
                 ],
             };
@@ -391,6 +392,164 @@ describe('InferenceGatewayClient', () => {
                 }),
             }));
         });
+        it('should handle streaming chat completions with reasoning field', async () => {
+            const mockRequest = {
+                model: 'groq/deepseek-distilled-llama-3.1-70b',
+                messages: [{ role: generated_1.MessageRole.user, content: 'Hello' }],
+            };
+            const mockStream = new web_1.TransformStream();
+            const writer = mockStream.writable.getWriter();
+            const encoder = new node_util_1.TextEncoder();
+            mockFetch.mockResolvedValueOnce({
+                ok: true,
+                body: mockStream.readable,
+            });
+            const callbacks = {
+                onOpen: jest.fn(),
+                onChunk: jest.fn(),
+                onReasoning: jest.fn(),
+                onContent: jest.fn(),
+                onFinish: jest.fn(),
+            };
+            const streamPromise = client.streamChatCompletion(mockRequest, callbacks);
+            await writer.write(encoder.encode('data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"groq/deepseek-distilled-llama-3.1-70b","choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":null}]}\n\n' +
+                'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"groq/deepseek-distilled-llama-3.1-70b","choices":[{"index":0,"delta":{"content":"","reasoning":"Let me"},"finish_reason":null}]}\n\n' +
+                'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"groq/deepseek-distilled-llama-3.1-70b","choices":[{"index":0,"delta":{"content":"","reasoning":" think"},"finish_reason":null}]}\n\n' +
+                'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"groq/deepseek-distilled-llama-3.1-70b","choices":[{"index":0,"delta":{"content":"","reasoning":" about"},"finish_reason":"stop"}]}\n\n' +
+                'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"groq/deepseek-distilled-llama-3.1-70b","choices":[{"index":0,"delta":{"content":"","reasoning":" this"},"finish_reason":"stop"}]}\n\n' +
+                'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"groq/deepseek-distilled-llama-3.1-70b","choices":[{"index":0,"delta":{"content":"Hello"},"finish_reason":null}]}\n\n' +
+                'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"groq/deepseek-distilled-llama-3.1-70b","choices":[{"index":0,"delta":{"content":"!"},"finish_reason":null}]}\n\n' +
+                'data: [DONE]\n\n'));
+            await writer.close();
+            await streamPromise;
+            expect(callbacks.onOpen).toHaveBeenCalledTimes(1);
+            expect(callbacks.onChunk).toHaveBeenCalledTimes(7);
+            expect(callbacks.onReasoning).toHaveBeenCalledTimes(4);
+            expect(callbacks.onReasoning).toHaveBeenCalledWith('Let me');
+            expect(callbacks.onReasoning).toHaveBeenCalledWith(' think');
+            expect(callbacks.onReasoning).toHaveBeenCalledWith(' about');
+            expect(callbacks.onReasoning).toHaveBeenCalledWith(' this');
+            expect(callbacks.onContent).toHaveBeenCalledTimes(2);
+            expect(callbacks.onContent).toHaveBeenCalledWith('Hello');
+            expect(callbacks.onContent).toHaveBeenCalledWith('!');
+            expect(callbacks.onFinish).toHaveBeenCalledTimes(1);
+            expect(mockFetch).toHaveBeenCalledWith('http://localhost:8080/v1/chat/completions', expect.objectContaining({
+                method: 'POST',
+                body: JSON.stringify({
+                    ...mockRequest,
+                    stream: true,
+                    stream_options: {
+                        include_usage: true,
+                    },
+                }),
+            }));
+        });
+        it('should handle streaming chat completions with reasoning_content (DeepSeek)', async () => {
+            const mockRequest = {
+                model: 'deepseek/deepseek-reasoner',
+                messages: [{ role: generated_1.MessageRole.user, content: 'Hello' }],
+            };
+            const mockStream = new web_1.TransformStream();
+            const writer = mockStream.writable.getWriter();
+            const encoder = new node_util_1.TextEncoder();
+            mockFetch.mockResolvedValueOnce({
+                ok: true,
+                body: mockStream.readable,
+            });
+            const callbacks = {
+                onOpen: jest.fn(),
+                onChunk: jest.fn(),
+                onReasoning: jest.fn(),
+                onContent: jest.fn(),
+                onFinish: jest.fn(),
+            };
+            const streamPromise = client.streamChatCompletion(mockRequest, callbacks);
+            await writer.write(encoder.encode('data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"deepseek/deepseek-reasoner","choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":null}]}\n\n' +
+                'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"deepseek/deepseek-reasoner","choices":[{"index":0,"delta":{"content":"","reasoning_content":"This"},"finish_reason":null}]}\n\n' +
+                'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"deepseek/deepseek-reasoner","choices":[{"index":0,"delta":{"content":"","reasoning_content":" is"},"finish_reason":null}]}\n\n' +
+                'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"deepseek/deepseek-reasoner","choices":[{"index":0,"delta":{"content":"","reasoning_content":" a"},"finish_reason":"stop"}]}\n\n' +
+                'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"deepseek/deepseek-reasoner","choices":[{"index":0,"delta":{"content":"","reasoning_content":" reasoning"},"finish_reason":"stop"}]}\n\n' +
+                'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"deepseek/deepseek-reasoner","choices":[{"index":0,"delta":{"content":"","reasoning_content":" content"},"finish_reason":"stop"}]}\n\n' +
+                'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"deepseek/deepseek-reasoner","choices":[{"index":0,"delta":{"content":"Hello"},"finish_reason":null}]}\n\n' +
+                'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"deepseek/deepseek-reasoner","choices":[{"index":0,"delta":{"content":"!"},"finish_reason":null}]}\n\n' +
+                'data: [DONE]\n\n'));
+            await writer.close();
+            await streamPromise;
+            expect(callbacks.onOpen).toHaveBeenCalledTimes(1);
+            expect(callbacks.onChunk).toHaveBeenCalledTimes(8);
+            expect(callbacks.onReasoning).toHaveBeenCalledTimes(5);
+            expect(callbacks.onReasoning).toHaveBeenCalledWith('This');
+            expect(callbacks.onReasoning).toHaveBeenCalledWith(' is');
+            expect(callbacks.onReasoning).toHaveBeenCalledWith(' a');
+            expect(callbacks.onReasoning).toHaveBeenCalledWith(' reasoning');
+            expect(callbacks.onReasoning).toHaveBeenCalledWith(' content');
+            expect(callbacks.onContent).toHaveBeenCalledTimes(2);
+            expect(callbacks.onContent).toHaveBeenCalledWith('Hello');
+            expect(callbacks.onContent).toHaveBeenCalledWith('!');
+            expect(callbacks.onFinish).toHaveBeenCalledTimes(1);
+            expect(mockFetch).toHaveBeenCalledWith('http://localhost:8080/v1/chat/completions', expect.objectContaining({
+                method: 'POST',
+                body: JSON.stringify({
+                    ...mockRequest,
+                    stream: true,
+                    stream_options: {
+                        include_usage: true,
+                    },
+                }),
+            }));
+        });
+        it('should handle streaming chat completions with reasoning field (Groq)', async () => {
+            const mockRequest = {
+                model: 'llama-3.1-70b-versatile',
+                messages: [{ role: generated_1.MessageRole.user, content: 'Hello' }],
+            };
+            const mockStream = new web_1.TransformStream();
+            const writer = mockStream.writable.getWriter();
+            const encoder = new node_util_1.TextEncoder();
+            mockFetch.mockResolvedValueOnce({
+                ok: true,
+                body: mockStream.readable,
+            });
+            const callbacks = {
+                onOpen: jest.fn(),
+                onChunk: jest.fn(),
+                onReasoning: jest.fn(),
+                onContent: jest.fn(),
+                onFinish: jest.fn(),
+            };
+            const streamPromise = client.streamChatCompletion(mockRequest, callbacks);
+            await writer.write(encoder.encode('data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"llama-3.1-70b-versatile","choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":null}]}\n\n' +
+                'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"llama-3.1-70b-versatile","choices":[{"index":0,"delta":{"content":"","reasoning":"Let me"},"finish_reason":null}]}\n\n' +
+                'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"llama-3.1-70b-versatile","choices":[{"index":0,"delta":{"content":"","reasoning":" think"},"finish_reason":null}]}\n\n' +
+                'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"llama-3.1-70b-versatile","choices":[{"index":0,"delta":{"content":"","reasoning":" about"},"finish_reason":"stop"}]}\n\n' +
+                'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"llama-3.1-70b-versatile","choices":[{"index":0,"delta":{"content":"","reasoning":" this"},"finish_reason":"stop"}]}\n\n' +
+                'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"llama-3.1-70b-versatile","choices":[{"index":0,"delta":{"content":"Hello"},"finish_reason":null}]}\n\n' +
+                'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"llama-3.1-70b-versatile","choices":[{"index":0,"delta":{"content":"!"},"finish_reason":null}]}\n\n' +
+                'data: [DONE]\n\n'));
+            await writer.close();
+            await streamPromise;
+            expect(callbacks.onOpen).toHaveBeenCalledTimes(1);
+            expect(callbacks.onChunk).toHaveBeenCalledTimes(7);
+            expect(callbacks.onReasoning).toHaveBeenCalledTimes(4);
+            expect(callbacks.onReasoning).toHaveBeenCalledWith('Let me');
+            expect(callbacks.onReasoning).toHaveBeenCalledWith(' think');
+            expect(callbacks.onReasoning).toHaveBeenCalledWith(' about');
+            expect(callbacks.onReasoning).toHaveBeenCalledWith(' this');
+            expect(callbacks.onContent).toHaveBeenCalledTimes(2);
+            expect(callbacks.onContent).toHaveBeenCalledWith('Hello');
+            expect(callbacks.onContent).toHaveBeenCalledWith('!');
+            expect(callbacks.onFinish).toHaveBeenCalledTimes(1);
+            expect(mockFetch).toHaveBeenCalledWith('http://localhost:8080/v1/chat/completions', expect.objectContaining({
+                method: 'POST',
+                body: JSON.stringify({
+                    ...mockRequest,
+                    stream: true,
+                    stream_options: {
+                        include_usage: true,
+                    },
+                }),
+            }));
+        });
     });
     describe('proxy', () => {
         it('should proxy requests to a specific provider', async () => {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@inference-gateway/sdk",
-  "version": "0.6.1",
+  "version": "0.6.2",
   "description": "An SDK written in Typescript for the [Inference Gateway](https://github.com/inference-gateway/inference-gateway).",
   "main": "dist/src/index.js",
   "types": "dist/src/index.d.ts",