npm - @inference-gateway/sdk - Versions diffs - 0.7.1 → 0.7.3 - Mend

@inference-gateway/sdk 0.7.1 → 0.7.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,27 @@
 All notable changes to this project will be documented in this file.
+## [0.7.3](https://github.com/inference-gateway/typescript-sdk/compare/v0.7.2...v0.7.3) (2025-06-01)
+### ♻️ Improvements
+* Enhance stream processing with abort signal support and increase default timeout ([#18](https://github.com/inference-gateway/typescript-sdk/issues/18)) ([3778138](https://github.com/inference-gateway/typescript-sdk/commit/377813851b6635ca7aafe2a5c9888b720736c9f5))
+### 🔧 Miscellaneous
+* Update MCP example README and remove unused example file ([99b34e7](https://github.com/inference-gateway/typescript-sdk/commit/99b34e70edf0c8aada1d0e0d0874481ea8381a79))
+## [0.7.2](https://github.com/inference-gateway/typescript-sdk/compare/v0.7.1...v0.7.2) (2025-05-30)
+### 📚 Documentation
+* Add more examples how to use this SDK ([#15](https://github.com/inference-gateway/typescript-sdk/issues/15)) ([d771356](https://github.com/inference-gateway/typescript-sdk/commit/d771356657279e63a1c4aaac6fe8370a277f08f6))
+### 🔧 Miscellaneous
+* Add Docker-in-Docker feature to development container ([177e9f3](https://github.com/inference-gateway/typescript-sdk/commit/177e9f341c7b0fa84d975c754986c75fe98887c9))
+* Remove MCP documentation references and update related instructions ([b33c08f](https://github.com/inference-gateway/typescript-sdk/commit/b33c08f2e1d1a9ae7e0c523f6f1733db86329d90))
 ## [0.7.1](https://github.com/inference-gateway/typescript-sdk/compare/v0.7.0...v0.7.1) (2025-05-27)
 ### 🐛 Bug Fixes

package/README.md CHANGED Viewed

@@ -7,12 +7,14 @@ An SDK written in TypeScript for the [Inference Gateway](https://github.com/eden
   - [Usage](#usage)
     - [Creating a Client](#creating-a-client)
     - [Listing Models](#listing-models)
+    - [Listing MCP Tools](#listing-mcp-tools)
     - [Creating Chat Completions](#creating-chat-completions)
     - [Streaming Chat Completions](#streaming-chat-completions)
     - [Tool Calls](#tool-calls)
     - [Proxying Requests](#proxying-requests)
     - [Health Check](#health-check)
     - [Creating a Client with Custom Options](#creating-a-client-with-custom-options)
+    - [Examples](#examples)
   - [Contributing](#contributing)
   - [License](#license)
@@ -51,7 +53,7 @@ try {
   console.log('All models:', models);
   // List models from a specific provider
-  const openaiModels = await client.listModels(Provider.OpenAI);
+  const openaiModels = await client.listModels(Provider.openai);
   console.log('OpenAI models:', openaiModels);
 } catch (error) {
   console.error('Error:', error);
@@ -235,7 +237,7 @@ To proxy requests directly to a provider:
 import { InferenceGatewayClient, Provider } from '@inference-gateway/sdk';
 const client = new InferenceGatewayClient({
-  baseURL: 'http://localhost:8080/v1',
+  baseURL: 'http://localhost:8080',
 });
 try {
@@ -261,7 +263,7 @@ To check if the Inference Gateway is running:
 import { InferenceGatewayClient } from '@inference-gateway/sdk';
 const client = new InferenceGatewayClient({
-  baseURL: 'http://localhost:8080/v1',
+  baseURL: 'http://localhost:8080',
 });
 try {
@@ -292,6 +294,10 @@ const clientWithHeaders = client.withOptions({
 });
 ```
+### Examples
+For more examples, check the [examples directory](./examples).
 ## Contributing
 Please refer to the [CONTRIBUTING.md](CONTRIBUTING.md) file for information about how to get involved. We welcome issues, questions, and pull requests.

package/dist/src/client.d.ts CHANGED Viewed

@@ -1,5 +1,5 @@
 import type { Provider, SchemaChatCompletionMessageToolCall, SchemaCompletionUsage, SchemaCreateChatCompletionRequest, SchemaCreateChatCompletionResponse, SchemaCreateChatCompletionStreamResponse, SchemaError, SchemaListModelsResponse, SchemaListToolsResponse } from './types/generated';
-interface ChatCompletionStreamCallbacks {
+export interface ChatCompletionStreamCallbacks {
     onOpen?: () => void;
     onChunk?: (chunk: SchemaCreateChatCompletionStreamResponse) => void;
     onReasoning?: (reasoningContent: string) => void;
@@ -8,6 +8,7 @@ interface ChatCompletionStreamCallbacks {
     onUsageMetrics?: (usage: SchemaCompletionUsage) => void;
     onFinish?: (response: SchemaCreateChatCompletionStreamResponse | null) => void;
     onError?: (error: SchemaError) => void;
+    onMCPTool?: (toolCall: SchemaChatCompletionMessageToolCall) => void;
 }
 export interface ClientOptions {
     baseURL?: string;
@@ -53,8 +54,13 @@ export declare class InferenceGatewayClient {
      * @param request - Chat completion request (must include at least model and messages)
      * @param callbacks - Callbacks for handling streaming events
      * @param provider - Optional provider to use for this request
+     * @param abortSignal - Optional AbortSignal to cancel the request
      */
-    streamChatCompletion(request: Omit<SchemaCreateChatCompletionRequest, 'stream' | 'stream_options'>, callbacks: ChatCompletionStreamCallbacks, provider?: Provider): Promise<void>;
+    streamChatCompletion(request: Omit<SchemaCreateChatCompletionRequest, 'stream' | 'stream_options'>, callbacks: ChatCompletionStreamCallbacks, provider?: Provider, abortSignal?: AbortSignal): Promise<void>;
+    /**
+     * Initiates a streaming request to the chat completions endpoint
+     */
+    private initiateStreamingRequest;
     /**
      * Proxy a request to a specific provider.
      */
@@ -64,4 +70,3 @@ export declare class InferenceGatewayClient {
      */
     healthCheck(): Promise<boolean>;
 }
-export {};

package/dist/src/client.js CHANGED Viewed

@@ -2,6 +2,205 @@
 Object.defineProperty(exports, "__esModule", { value: true });
 exports.InferenceGatewayClient = void 0;
 const generated_1 = require("./types/generated");
+/**
+ * Handles streaming response processing with enhanced support for MCP and tool calls
+ */
+class StreamProcessor {
+    callbacks;
+    clientProvidedTools;
+    incompleteToolCalls = new Map();
+    constructor(callbacks, clientProvidedTools) {
+        this.callbacks = callbacks;
+        this.clientProvidedTools = clientProvidedTools;
+    }
+    async processStream(body, abortSignal) {
+        const reader = body.getReader();
+        const decoder = new TextDecoder();
+        let buffer = '';
+        try {
+            while (true) {
+                if (abortSignal?.aborted) {
+                    throw new Error('Stream processing was aborted');
+                }
+                const { done, value } = await reader.read();
+                if (done)
+                    break;
+                buffer += decoder.decode(value, { stream: true });
+                const lines = buffer.split('\n');
+                buffer = lines.pop() || '';
+                for (const line of lines) {
+                    if (line.startsWith('data: ')) {
+                        const data = line.slice(5).trim();
+                        await this.processSSEData(data);
+                    }
+                }
+            }
+        }
+        catch (error) {
+            if (abortSignal?.aborted || error.name === 'AbortError') {
+                console.log('Stream processing was cancelled');
+                return;
+            }
+            const apiError = {
+                error: error.message || 'Unknown error',
+            };
+            this.callbacks.onError?.(apiError);
+            throw error;
+        }
+        finally {
+            try {
+                reader.releaseLock();
+            }
+            catch {
+                // Reader might already be closed, ignore
+            }
+        }
+    }
+    async processSSEData(data) {
+        if (data === '[DONE]') {
+            this.finalizeIncompleteToolCalls();
+            this.callbacks.onFinish?.(null);
+            return;
+        }
+        try {
+            const chunk = JSON.parse(data);
+            // Handle mid-stream errors from the Inference Gateway
+            // When providers fail during streaming, the gateway embeds error info in the stream
+            if ('error' in chunk && chunk.error) {
+                const apiError = {
+                    error: typeof chunk.error === 'string'
+                        ? chunk.error
+                        : JSON.stringify(chunk.error),
+                };
+                this.callbacks.onError?.(apiError);
+                return;
+            }
+            const validChunk = chunk;
+            this.callbacks.onChunk?.(validChunk);
+            if (validChunk.usage && this.callbacks.onUsageMetrics) {
+                this.callbacks.onUsageMetrics(validChunk.usage);
+            }
+            const choice = validChunk.choices?.[0];
+            if (!choice)
+                return;
+            this.handleReasoningContent(choice);
+            const content = choice.delta?.content;
+            if (content) {
+                this.callbacks.onContent?.(content);
+            }
+            this.handleToolCalls(choice);
+            this.handleFinishReason(choice);
+        }
+        catch (parseError) {
+            let errorMessage = `Failed to parse SSE data: ${parseError.message}`;
+            const errorMatch = data.match(/"error":\s*"([^"]+)"/);
+            if (errorMatch) {
+                errorMessage = errorMatch[1];
+            }
+            else {
+                const nestedErrorMatch = data.match(/"message":\s*"([^"]+)"/);
+                if (nestedErrorMatch) {
+                    errorMessage = nestedErrorMatch[1];
+                }
+            }
+            const apiError = {
+                error: errorMessage,
+            };
+            this.callbacks.onError?.(apiError);
+        }
+    }
+    handleReasoningContent(choice) {
+        const reasoningContent = choice.delta?.reasoning_content;
+        if (reasoningContent !== undefined) {
+            this.callbacks.onReasoning?.(reasoningContent);
+        }
+        const reasoning = choice.delta?.reasoning;
+        if (reasoning !== undefined) {
+            this.callbacks.onReasoning?.(reasoning);
+        }
+    }
+    handleToolCalls(choice) {
+        const toolCalls = choice.delta?.tool_calls;
+        if (!toolCalls || toolCalls.length === 0)
+            return;
+        for (const toolCallChunk of toolCalls) {
+            const index = toolCallChunk.index;
+            if (!this.incompleteToolCalls.has(index)) {
+                this.incompleteToolCalls.set(index, {
+                    id: toolCallChunk.id || '',
+                    type: generated_1.ChatCompletionToolType.function,
+                    function: {
+                        name: toolCallChunk.function?.name || '',
+                        arguments: toolCallChunk.function?.arguments || '',
+                    },
+                });
+            }
+            else {
+                const existingToolCall = this.incompleteToolCalls.get(index);
+                if (toolCallChunk.id) {
+                    existingToolCall.id = toolCallChunk.id;
+                }
+                if (toolCallChunk.function?.name) {
+                    existingToolCall.function.name = toolCallChunk.function.name;
+                }
+                if (toolCallChunk.function?.arguments) {
+                    existingToolCall.function.arguments +=
+                        toolCallChunk.function.arguments;
+                }
+            }
+        }
+    }
+    handleFinishReason(choice) {
+        const finishReason = choice.finish_reason;
+        if (finishReason === 'tool_calls' && this.incompleteToolCalls.size > 0) {
+            this.finalizeIncompleteToolCalls();
+        }
+    }
+    finalizeIncompleteToolCalls() {
+        this.incompleteToolCalls.forEach((toolCall) => {
+            if (!toolCall.id || !toolCall.function.name) {
+                globalThis.console.warn('Incomplete tool call detected:', toolCall);
+                return;
+            }
+            const completedToolCall = {
+                id: toolCall.id,
+                type: toolCall.type,
+                function: {
+                    name: toolCall.function.name,
+                    arguments: toolCall.function.arguments,
+                },
+            };
+            if (this.isMCPTool(toolCall.function.name)) {
+                try {
+                    if (toolCall.function.arguments) {
+                        JSON.parse(toolCall.function.arguments);
+                    }
+                    this.callbacks.onMCPTool?.(completedToolCall);
+                }
+                catch (argError) {
+                    const isIncompleteJSON = toolCall.function.arguments &&
+                        !toolCall.function.arguments.trim().endsWith('}');
+                    if (isIncompleteJSON) {
+                        globalThis.console.warn(`Incomplete MCP tool arguments for ${toolCall.function.name} (stream was likely interrupted):`, toolCall.function.arguments);
+                    }
+                    else {
+                        globalThis.console.warn(`Invalid MCP tool arguments for ${toolCall.function.name}:`, argError);
+                    }
+                }
+            }
+            else {
+                this.callbacks.onTool?.(completedToolCall);
+            }
+        });
+        this.incompleteToolCalls.clear();
+    }
+    isMCPTool(toolName) {
+        if (!toolName || typeof toolName !== 'string') {
+            return false;
+        }
+        return !this.clientProvidedTools.has(toolName);
+    }
+}
 class InferenceGatewayClient {
     baseURL;
     apiKey;
@@ -14,7 +213,7 @@ class InferenceGatewayClient {
         this.apiKey = options.apiKey;
         this.defaultHeaders = options.defaultHeaders || {};
         this.defaultQuery = options.defaultQuery || {};
-        this.timeout = options.timeout || 30000;
+        this.timeout = options.timeout || 60000; // Increased default timeout to 60 seconds
         this.fetchFn = options.fetch || globalThis.fetch;
     }
     /**
@@ -106,8 +305,43 @@ class InferenceGatewayClient {
      * @param request - Chat completion request (must include at least model and messages)
      * @param callbacks - Callbacks for handling streaming events
      * @param provider - Optional provider to use for this request
+     * @param abortSignal - Optional AbortSignal to cancel the request
      */
-    async streamChatCompletion(request, callbacks, provider) {
+    async streamChatCompletion(request, callbacks, provider, abortSignal) {
+        try {
+            const response = await this.initiateStreamingRequest(request, provider, abortSignal);
+            if (!response.body) {
+                const error = {
+                    error: 'Response body is not readable',
+                };
+                callbacks.onError?.(error);
+                throw new Error('Response body is not readable');
+            }
+            callbacks.onOpen?.();
+            // Extract tool names from client-provided tools
+            const clientProvidedTools = new Set();
+            if (request.tools) {
+                for (const tool of request.tools) {
+                    if (tool.type === 'function' && tool.function?.name) {
+                        clientProvidedTools.add(tool.function.name);
+                    }
+                }
+            }
+            const streamProcessor = new StreamProcessor(callbacks, clientProvidedTools);
+            await streamProcessor.processStream(response.body, abortSignal);
+        }
+        catch (error) {
+            const apiError = {
+                error: error.message || 'Unknown error occurred',
+            };
+            callbacks.onError?.(apiError);
+            throw error;
+        }
+    }
+    /**
+     * Initiates a streaming request to the chat completions endpoint
+     */
+    async initiateStreamingRequest(request, provider, abortSignal) {
         const query = {};
         if (provider) {
             query.provider = provider;
@@ -126,6 +360,9 @@ class InferenceGatewayClient {
             headers.set('Authorization', `Bearer ${this.apiKey}`);
         }
         const controller = new AbortController();
+        const combinedSignal = abortSignal
+            ? AbortSignal.any([abortSignal, controller.signal])
+            : controller.signal;
         const timeoutId = globalThis.setTimeout(() => controller.abort(), this.timeout);
         try {
             const response = await this.fetchFn(url, {
@@ -138,121 +375,20 @@ class InferenceGatewayClient {
                         include_usage: true,
                     },
                 }),
-                signal: controller.signal,
+                signal: combinedSignal,
             });
             if (!response.ok) {
-                const error = await response.json();
-                throw new Error(error.error || `HTTP error! status: ${response.status}`);
-            }
-            if (!response.body) {
-                throw new Error('Response body is not readable');
-            }
-            callbacks.onOpen?.();
-            const reader = response.body.getReader();
-            const decoder = new TextDecoder();
-            let buffer = '';
-            const incompleteToolCalls = new Map();
-            while (true) {
-                const { done, value } = await reader.read();
-                if (done)
-                    break;
-                buffer += decoder.decode(value, { stream: true });
-                const lines = buffer.split('\n');
-                buffer = lines.pop() || '';
-                for (const line of lines) {
-                    if (line.startsWith('data: ')) {
-                        const data = line.slice(5).trim();
-                        if (data === '[DONE]') {
-                            for (const [, toolCall] of incompleteToolCalls.entries()) {
-                                callbacks.onTool?.({
-                                    id: toolCall.id,
-                                    type: toolCall.type,
-                                    function: {
-                                        name: toolCall.function.name,
-                                        arguments: toolCall.function.arguments,
-                                    },
-                                });
-                            }
-                            callbacks.onFinish?.(null);
-                            return;
-                        }
-                        try {
-                            const chunk = JSON.parse(data);
-                            callbacks.onChunk?.(chunk);
-                            if (chunk.usage && callbacks.onUsageMetrics) {
-                                callbacks.onUsageMetrics(chunk.usage);
-                            }
-                            const reasoning_content = chunk.choices[0]?.delta?.reasoning_content;
-                            if (reasoning_content !== undefined) {
-                                callbacks.onReasoning?.(reasoning_content);
-                            }
-                            const reasoning = chunk.choices[0]?.delta?.reasoning;
-                            if (reasoning !== undefined) {
-                                callbacks.onReasoning?.(reasoning);
-                            }
-                            const content = chunk.choices[0]?.delta?.content;
-                            if (content) {
-                                callbacks.onContent?.(content);
-                            }
-                            const toolCalls = chunk.choices[0]?.delta?.tool_calls;
-                            if (toolCalls && toolCalls.length > 0) {
-                                for (const toolCallChunk of toolCalls) {
-                                    const index = toolCallChunk.index;
-                                    if (!incompleteToolCalls.has(index)) {
-                                        incompleteToolCalls.set(index, {
-                                            id: toolCallChunk.id || '',
-                                            type: generated_1.ChatCompletionToolType.function,
-                                            function: {
-                                                name: toolCallChunk.function?.name || '',
-                                                arguments: toolCallChunk.function?.arguments || '',
-                                            },
-                                        });
-                                    }
-                                    else {
-                                        const existingToolCall = incompleteToolCalls.get(index);
-                                        if (toolCallChunk.id) {
-                                            existingToolCall.id = toolCallChunk.id;
-                                        }
-                                        if (toolCallChunk.function?.name) {
-                                            existingToolCall.function.name =
-                                                toolCallChunk.function.name;
-                                        }
-                                        if (toolCallChunk.function?.arguments) {
-                                            existingToolCall.function.arguments +=
-                                                toolCallChunk.function.arguments;
-                                        }
-                                    }
-                                }
-                            }
-                            const finishReason = chunk.choices[0]?.finish_reason;
-                            if (finishReason === 'tool_calls' &&
-                                incompleteToolCalls.size > 0) {
-                                for (const [, toolCall] of incompleteToolCalls.entries()) {
-                                    callbacks.onTool?.({
-                                        id: toolCall.id,
-                                        type: toolCall.type,
-                                        function: {
-                                            name: toolCall.function.name,
-                                            arguments: toolCall.function.arguments,
-                                        },
-                                    });
-                                }
-                                incompleteToolCalls.clear();
-                            }
-                        }
-                        catch (e) {
-                            globalThis.console.error('Error parsing SSE data:', e);
-                        }
-                    }
+                let errorMessage = `HTTP error! status: ${response.status}`;
+                try {
+                    const error = await response.json();
+                    errorMessage = error.error || errorMessage;
+                }
+                catch {
+                    // Failed to parse error response as JSON, use status message
                 }
+                throw new Error(errorMessage);
             }
-        }
-        catch (error) {
-            const apiError = {
-                error: error.message || 'Unknown error',
-            };
-            callbacks.onError?.(apiError);
-            throw error;
+            return response;
         }
         finally {
             globalThis.clearTimeout(timeoutId);

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@inference-gateway/sdk",
-  "version": "0.7.1",
+  "version": "0.7.3",
   "description": "An SDK written in Typescript for the [Inference Gateway](https://github.com/inference-gateway/inference-gateway).",
   "main": "dist/src/index.js",
   "types": "dist/src/index.d.ts",