@inference-gateway/sdk 0.7.1 → 0.7.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,27 @@
2
2
 
3
3
  All notable changes to this project will be documented in this file.
4
4
 
5
+ ## [0.7.3](https://github.com/inference-gateway/typescript-sdk/compare/v0.7.2...v0.7.3) (2025-06-01)
6
+
7
+ ### ♻️ Improvements
8
+
9
+ * Enhance stream processing with abort signal support and increase default timeout ([#18](https://github.com/inference-gateway/typescript-sdk/issues/18)) ([3778138](https://github.com/inference-gateway/typescript-sdk/commit/377813851b6635ca7aafe2a5c9888b720736c9f5))
10
+
11
+ ### 🔧 Miscellaneous
12
+
13
+ * Update MCP example README and remove unused example file ([99b34e7](https://github.com/inference-gateway/typescript-sdk/commit/99b34e70edf0c8aada1d0e0d0874481ea8381a79))
14
+
15
+ ## [0.7.2](https://github.com/inference-gateway/typescript-sdk/compare/v0.7.1...v0.7.2) (2025-05-30)
16
+
17
+ ### 📚 Documentation
18
+
19
+ * Add more examples how to use this SDK ([#15](https://github.com/inference-gateway/typescript-sdk/issues/15)) ([d771356](https://github.com/inference-gateway/typescript-sdk/commit/d771356657279e63a1c4aaac6fe8370a277f08f6))
20
+
21
+ ### 🔧 Miscellaneous
22
+
23
+ * Add Docker-in-Docker feature to development container ([177e9f3](https://github.com/inference-gateway/typescript-sdk/commit/177e9f341c7b0fa84d975c754986c75fe98887c9))
24
+ * Remove MCP documentation references and update related instructions ([b33c08f](https://github.com/inference-gateway/typescript-sdk/commit/b33c08f2e1d1a9ae7e0c523f6f1733db86329d90))
25
+
5
26
  ## [0.7.1](https://github.com/inference-gateway/typescript-sdk/compare/v0.7.0...v0.7.1) (2025-05-27)
6
27
 
7
28
  ### 🐛 Bug Fixes
package/README.md CHANGED
@@ -7,12 +7,14 @@ An SDK written in TypeScript for the [Inference Gateway](https://github.com/eden
7
7
  - [Usage](#usage)
8
8
  - [Creating a Client](#creating-a-client)
9
9
  - [Listing Models](#listing-models)
10
+ - [Listing MCP Tools](#listing-mcp-tools)
10
11
  - [Creating Chat Completions](#creating-chat-completions)
11
12
  - [Streaming Chat Completions](#streaming-chat-completions)
12
13
  - [Tool Calls](#tool-calls)
13
14
  - [Proxying Requests](#proxying-requests)
14
15
  - [Health Check](#health-check)
15
16
  - [Creating a Client with Custom Options](#creating-a-client-with-custom-options)
17
+ - [Examples](#examples)
16
18
  - [Contributing](#contributing)
17
19
  - [License](#license)
18
20
 
@@ -51,7 +53,7 @@ try {
51
53
  console.log('All models:', models);
52
54
 
53
55
  // List models from a specific provider
54
- const openaiModels = await client.listModels(Provider.OpenAI);
56
+ const openaiModels = await client.listModels(Provider.openai);
55
57
  console.log('OpenAI models:', openaiModels);
56
58
  } catch (error) {
57
59
  console.error('Error:', error);
@@ -235,7 +237,7 @@ To proxy requests directly to a provider:
235
237
  import { InferenceGatewayClient, Provider } from '@inference-gateway/sdk';
236
238
 
237
239
  const client = new InferenceGatewayClient({
238
- baseURL: 'http://localhost:8080/v1',
240
+ baseURL: 'http://localhost:8080',
239
241
  });
240
242
 
241
243
  try {
@@ -261,7 +263,7 @@ To check if the Inference Gateway is running:
261
263
  import { InferenceGatewayClient } from '@inference-gateway/sdk';
262
264
 
263
265
  const client = new InferenceGatewayClient({
264
- baseURL: 'http://localhost:8080/v1',
266
+ baseURL: 'http://localhost:8080',
265
267
  });
266
268
 
267
269
  try {
@@ -292,6 +294,10 @@ const clientWithHeaders = client.withOptions({
292
294
  });
293
295
  ```
294
296
 
297
+ ### Examples
298
+
299
+ For more examples, check the [examples directory](./examples).
300
+
295
301
  ## Contributing
296
302
 
297
303
  Please refer to the [CONTRIBUTING.md](CONTRIBUTING.md) file for information about how to get involved. We welcome issues, questions, and pull requests.
@@ -1,5 +1,5 @@
1
1
  import type { Provider, SchemaChatCompletionMessageToolCall, SchemaCompletionUsage, SchemaCreateChatCompletionRequest, SchemaCreateChatCompletionResponse, SchemaCreateChatCompletionStreamResponse, SchemaError, SchemaListModelsResponse, SchemaListToolsResponse } from './types/generated';
2
- interface ChatCompletionStreamCallbacks {
2
+ export interface ChatCompletionStreamCallbacks {
3
3
  onOpen?: () => void;
4
4
  onChunk?: (chunk: SchemaCreateChatCompletionStreamResponse) => void;
5
5
  onReasoning?: (reasoningContent: string) => void;
@@ -8,6 +8,7 @@ interface ChatCompletionStreamCallbacks {
8
8
  onUsageMetrics?: (usage: SchemaCompletionUsage) => void;
9
9
  onFinish?: (response: SchemaCreateChatCompletionStreamResponse | null) => void;
10
10
  onError?: (error: SchemaError) => void;
11
+ onMCPTool?: (toolCall: SchemaChatCompletionMessageToolCall) => void;
11
12
  }
12
13
  export interface ClientOptions {
13
14
  baseURL?: string;
@@ -53,8 +54,13 @@ export declare class InferenceGatewayClient {
53
54
  * @param request - Chat completion request (must include at least model and messages)
54
55
  * @param callbacks - Callbacks for handling streaming events
55
56
  * @param provider - Optional provider to use for this request
57
+ * @param abortSignal - Optional AbortSignal to cancel the request
56
58
  */
57
- streamChatCompletion(request: Omit<SchemaCreateChatCompletionRequest, 'stream' | 'stream_options'>, callbacks: ChatCompletionStreamCallbacks, provider?: Provider): Promise<void>;
59
+ streamChatCompletion(request: Omit<SchemaCreateChatCompletionRequest, 'stream' | 'stream_options'>, callbacks: ChatCompletionStreamCallbacks, provider?: Provider, abortSignal?: AbortSignal): Promise<void>;
60
+ /**
61
+ * Initiates a streaming request to the chat completions endpoint
62
+ */
63
+ private initiateStreamingRequest;
58
64
  /**
59
65
  * Proxy a request to a specific provider.
60
66
  */
@@ -64,4 +70,3 @@ export declare class InferenceGatewayClient {
64
70
  */
65
71
  healthCheck(): Promise<boolean>;
66
72
  }
67
- export {};
@@ -2,6 +2,205 @@
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.InferenceGatewayClient = void 0;
4
4
  const generated_1 = require("./types/generated");
5
+ /**
6
+ * Handles streaming response processing with enhanced support for MCP and tool calls
7
+ */
8
+ class StreamProcessor {
9
+ callbacks;
10
+ clientProvidedTools;
11
+ incompleteToolCalls = new Map();
12
+ constructor(callbacks, clientProvidedTools) {
13
+ this.callbacks = callbacks;
14
+ this.clientProvidedTools = clientProvidedTools;
15
+ }
16
+ async processStream(body, abortSignal) {
17
+ const reader = body.getReader();
18
+ const decoder = new TextDecoder();
19
+ let buffer = '';
20
+ try {
21
+ while (true) {
22
+ if (abortSignal?.aborted) {
23
+ throw new Error('Stream processing was aborted');
24
+ }
25
+ const { done, value } = await reader.read();
26
+ if (done)
27
+ break;
28
+ buffer += decoder.decode(value, { stream: true });
29
+ const lines = buffer.split('\n');
30
+ buffer = lines.pop() || '';
31
+ for (const line of lines) {
32
+ if (line.startsWith('data: ')) {
33
+ const data = line.slice(5).trim();
34
+ await this.processSSEData(data);
35
+ }
36
+ }
37
+ }
38
+ }
39
+ catch (error) {
40
+ if (abortSignal?.aborted || error.name === 'AbortError') {
41
+ console.log('Stream processing was cancelled');
42
+ return;
43
+ }
44
+ const apiError = {
45
+ error: error.message || 'Unknown error',
46
+ };
47
+ this.callbacks.onError?.(apiError);
48
+ throw error;
49
+ }
50
+ finally {
51
+ try {
52
+ reader.releaseLock();
53
+ }
54
+ catch {
55
+ // Reader might already be closed, ignore
56
+ }
57
+ }
58
+ }
59
+ async processSSEData(data) {
60
+ if (data === '[DONE]') {
61
+ this.finalizeIncompleteToolCalls();
62
+ this.callbacks.onFinish?.(null);
63
+ return;
64
+ }
65
+ try {
66
+ const chunk = JSON.parse(data);
67
+ // Handle mid-stream errors from the Inference Gateway
68
+ // When providers fail during streaming, the gateway embeds error info in the stream
69
+ if ('error' in chunk && chunk.error) {
70
+ const apiError = {
71
+ error: typeof chunk.error === 'string'
72
+ ? chunk.error
73
+ : JSON.stringify(chunk.error),
74
+ };
75
+ this.callbacks.onError?.(apiError);
76
+ return;
77
+ }
78
+ const validChunk = chunk;
79
+ this.callbacks.onChunk?.(validChunk);
80
+ if (validChunk.usage && this.callbacks.onUsageMetrics) {
81
+ this.callbacks.onUsageMetrics(validChunk.usage);
82
+ }
83
+ const choice = validChunk.choices?.[0];
84
+ if (!choice)
85
+ return;
86
+ this.handleReasoningContent(choice);
87
+ const content = choice.delta?.content;
88
+ if (content) {
89
+ this.callbacks.onContent?.(content);
90
+ }
91
+ this.handleToolCalls(choice);
92
+ this.handleFinishReason(choice);
93
+ }
94
+ catch (parseError) {
95
+ let errorMessage = `Failed to parse SSE data: ${parseError.message}`;
96
+ const errorMatch = data.match(/"error":\s*"([^"]+)"/);
97
+ if (errorMatch) {
98
+ errorMessage = errorMatch[1];
99
+ }
100
+ else {
101
+ const nestedErrorMatch = data.match(/"message":\s*"([^"]+)"/);
102
+ if (nestedErrorMatch) {
103
+ errorMessage = nestedErrorMatch[1];
104
+ }
105
+ }
106
+ const apiError = {
107
+ error: errorMessage,
108
+ };
109
+ this.callbacks.onError?.(apiError);
110
+ }
111
+ }
112
+ handleReasoningContent(choice) {
113
+ const reasoningContent = choice.delta?.reasoning_content;
114
+ if (reasoningContent !== undefined) {
115
+ this.callbacks.onReasoning?.(reasoningContent);
116
+ }
117
+ const reasoning = choice.delta?.reasoning;
118
+ if (reasoning !== undefined) {
119
+ this.callbacks.onReasoning?.(reasoning);
120
+ }
121
+ }
122
+ handleToolCalls(choice) {
123
+ const toolCalls = choice.delta?.tool_calls;
124
+ if (!toolCalls || toolCalls.length === 0)
125
+ return;
126
+ for (const toolCallChunk of toolCalls) {
127
+ const index = toolCallChunk.index;
128
+ if (!this.incompleteToolCalls.has(index)) {
129
+ this.incompleteToolCalls.set(index, {
130
+ id: toolCallChunk.id || '',
131
+ type: generated_1.ChatCompletionToolType.function,
132
+ function: {
133
+ name: toolCallChunk.function?.name || '',
134
+ arguments: toolCallChunk.function?.arguments || '',
135
+ },
136
+ });
137
+ }
138
+ else {
139
+ const existingToolCall = this.incompleteToolCalls.get(index);
140
+ if (toolCallChunk.id) {
141
+ existingToolCall.id = toolCallChunk.id;
142
+ }
143
+ if (toolCallChunk.function?.name) {
144
+ existingToolCall.function.name = toolCallChunk.function.name;
145
+ }
146
+ if (toolCallChunk.function?.arguments) {
147
+ existingToolCall.function.arguments +=
148
+ toolCallChunk.function.arguments;
149
+ }
150
+ }
151
+ }
152
+ }
153
+ handleFinishReason(choice) {
154
+ const finishReason = choice.finish_reason;
155
+ if (finishReason === 'tool_calls' && this.incompleteToolCalls.size > 0) {
156
+ this.finalizeIncompleteToolCalls();
157
+ }
158
+ }
159
+ finalizeIncompleteToolCalls() {
160
+ this.incompleteToolCalls.forEach((toolCall) => {
161
+ if (!toolCall.id || !toolCall.function.name) {
162
+ globalThis.console.warn('Incomplete tool call detected:', toolCall);
163
+ return;
164
+ }
165
+ const completedToolCall = {
166
+ id: toolCall.id,
167
+ type: toolCall.type,
168
+ function: {
169
+ name: toolCall.function.name,
170
+ arguments: toolCall.function.arguments,
171
+ },
172
+ };
173
+ if (this.isMCPTool(toolCall.function.name)) {
174
+ try {
175
+ if (toolCall.function.arguments) {
176
+ JSON.parse(toolCall.function.arguments);
177
+ }
178
+ this.callbacks.onMCPTool?.(completedToolCall);
179
+ }
180
+ catch (argError) {
181
+ const isIncompleteJSON = toolCall.function.arguments &&
182
+ !toolCall.function.arguments.trim().endsWith('}');
183
+ if (isIncompleteJSON) {
184
+ globalThis.console.warn(`Incomplete MCP tool arguments for ${toolCall.function.name} (stream was likely interrupted):`, toolCall.function.arguments);
185
+ }
186
+ else {
187
+ globalThis.console.warn(`Invalid MCP tool arguments for ${toolCall.function.name}:`, argError);
188
+ }
189
+ }
190
+ }
191
+ else {
192
+ this.callbacks.onTool?.(completedToolCall);
193
+ }
194
+ });
195
+ this.incompleteToolCalls.clear();
196
+ }
197
+ isMCPTool(toolName) {
198
+ if (!toolName || typeof toolName !== 'string') {
199
+ return false;
200
+ }
201
+ return !this.clientProvidedTools.has(toolName);
202
+ }
203
+ }
5
204
  class InferenceGatewayClient {
6
205
  baseURL;
7
206
  apiKey;
@@ -14,7 +213,7 @@ class InferenceGatewayClient {
14
213
  this.apiKey = options.apiKey;
15
214
  this.defaultHeaders = options.defaultHeaders || {};
16
215
  this.defaultQuery = options.defaultQuery || {};
17
- this.timeout = options.timeout || 30000;
216
+ this.timeout = options.timeout || 60000; // Increased default timeout to 60 seconds
18
217
  this.fetchFn = options.fetch || globalThis.fetch;
19
218
  }
20
219
  /**
@@ -106,8 +305,43 @@ class InferenceGatewayClient {
106
305
  * @param request - Chat completion request (must include at least model and messages)
107
306
  * @param callbacks - Callbacks for handling streaming events
108
307
  * @param provider - Optional provider to use for this request
308
+ * @param abortSignal - Optional AbortSignal to cancel the request
109
309
  */
110
- async streamChatCompletion(request, callbacks, provider) {
310
+ async streamChatCompletion(request, callbacks, provider, abortSignal) {
311
+ try {
312
+ const response = await this.initiateStreamingRequest(request, provider, abortSignal);
313
+ if (!response.body) {
314
+ const error = {
315
+ error: 'Response body is not readable',
316
+ };
317
+ callbacks.onError?.(error);
318
+ throw new Error('Response body is not readable');
319
+ }
320
+ callbacks.onOpen?.();
321
+ // Extract tool names from client-provided tools
322
+ const clientProvidedTools = new Set();
323
+ if (request.tools) {
324
+ for (const tool of request.tools) {
325
+ if (tool.type === 'function' && tool.function?.name) {
326
+ clientProvidedTools.add(tool.function.name);
327
+ }
328
+ }
329
+ }
330
+ const streamProcessor = new StreamProcessor(callbacks, clientProvidedTools);
331
+ await streamProcessor.processStream(response.body, abortSignal);
332
+ }
333
+ catch (error) {
334
+ const apiError = {
335
+ error: error.message || 'Unknown error occurred',
336
+ };
337
+ callbacks.onError?.(apiError);
338
+ throw error;
339
+ }
340
+ }
341
+ /**
342
+ * Initiates a streaming request to the chat completions endpoint
343
+ */
344
+ async initiateStreamingRequest(request, provider, abortSignal) {
111
345
  const query = {};
112
346
  if (provider) {
113
347
  query.provider = provider;
@@ -126,6 +360,9 @@ class InferenceGatewayClient {
126
360
  headers.set('Authorization', `Bearer ${this.apiKey}`);
127
361
  }
128
362
  const controller = new AbortController();
363
+ const combinedSignal = abortSignal
364
+ ? AbortSignal.any([abortSignal, controller.signal])
365
+ : controller.signal;
129
366
  const timeoutId = globalThis.setTimeout(() => controller.abort(), this.timeout);
130
367
  try {
131
368
  const response = await this.fetchFn(url, {
@@ -138,121 +375,20 @@ class InferenceGatewayClient {
138
375
  include_usage: true,
139
376
  },
140
377
  }),
141
- signal: controller.signal,
378
+ signal: combinedSignal,
142
379
  });
143
380
  if (!response.ok) {
144
- const error = await response.json();
145
- throw new Error(error.error || `HTTP error! status: ${response.status}`);
146
- }
147
- if (!response.body) {
148
- throw new Error('Response body is not readable');
149
- }
150
- callbacks.onOpen?.();
151
- const reader = response.body.getReader();
152
- const decoder = new TextDecoder();
153
- let buffer = '';
154
- const incompleteToolCalls = new Map();
155
- while (true) {
156
- const { done, value } = await reader.read();
157
- if (done)
158
- break;
159
- buffer += decoder.decode(value, { stream: true });
160
- const lines = buffer.split('\n');
161
- buffer = lines.pop() || '';
162
- for (const line of lines) {
163
- if (line.startsWith('data: ')) {
164
- const data = line.slice(5).trim();
165
- if (data === '[DONE]') {
166
- for (const [, toolCall] of incompleteToolCalls.entries()) {
167
- callbacks.onTool?.({
168
- id: toolCall.id,
169
- type: toolCall.type,
170
- function: {
171
- name: toolCall.function.name,
172
- arguments: toolCall.function.arguments,
173
- },
174
- });
175
- }
176
- callbacks.onFinish?.(null);
177
- return;
178
- }
179
- try {
180
- const chunk = JSON.parse(data);
181
- callbacks.onChunk?.(chunk);
182
- if (chunk.usage && callbacks.onUsageMetrics) {
183
- callbacks.onUsageMetrics(chunk.usage);
184
- }
185
- const reasoning_content = chunk.choices[0]?.delta?.reasoning_content;
186
- if (reasoning_content !== undefined) {
187
- callbacks.onReasoning?.(reasoning_content);
188
- }
189
- const reasoning = chunk.choices[0]?.delta?.reasoning;
190
- if (reasoning !== undefined) {
191
- callbacks.onReasoning?.(reasoning);
192
- }
193
- const content = chunk.choices[0]?.delta?.content;
194
- if (content) {
195
- callbacks.onContent?.(content);
196
- }
197
- const toolCalls = chunk.choices[0]?.delta?.tool_calls;
198
- if (toolCalls && toolCalls.length > 0) {
199
- for (const toolCallChunk of toolCalls) {
200
- const index = toolCallChunk.index;
201
- if (!incompleteToolCalls.has(index)) {
202
- incompleteToolCalls.set(index, {
203
- id: toolCallChunk.id || '',
204
- type: generated_1.ChatCompletionToolType.function,
205
- function: {
206
- name: toolCallChunk.function?.name || '',
207
- arguments: toolCallChunk.function?.arguments || '',
208
- },
209
- });
210
- }
211
- else {
212
- const existingToolCall = incompleteToolCalls.get(index);
213
- if (toolCallChunk.id) {
214
- existingToolCall.id = toolCallChunk.id;
215
- }
216
- if (toolCallChunk.function?.name) {
217
- existingToolCall.function.name =
218
- toolCallChunk.function.name;
219
- }
220
- if (toolCallChunk.function?.arguments) {
221
- existingToolCall.function.arguments +=
222
- toolCallChunk.function.arguments;
223
- }
224
- }
225
- }
226
- }
227
- const finishReason = chunk.choices[0]?.finish_reason;
228
- if (finishReason === 'tool_calls' &&
229
- incompleteToolCalls.size > 0) {
230
- for (const [, toolCall] of incompleteToolCalls.entries()) {
231
- callbacks.onTool?.({
232
- id: toolCall.id,
233
- type: toolCall.type,
234
- function: {
235
- name: toolCall.function.name,
236
- arguments: toolCall.function.arguments,
237
- },
238
- });
239
- }
240
- incompleteToolCalls.clear();
241
- }
242
- }
243
- catch (e) {
244
- globalThis.console.error('Error parsing SSE data:', e);
245
- }
246
- }
381
+ let errorMessage = `HTTP error! status: ${response.status}`;
382
+ try {
383
+ const error = await response.json();
384
+ errorMessage = error.error || errorMessage;
385
+ }
386
+ catch {
387
+ // Failed to parse error response as JSON, use status message
247
388
  }
389
+ throw new Error(errorMessage);
248
390
  }
249
- }
250
- catch (error) {
251
- const apiError = {
252
- error: error.message || 'Unknown error',
253
- };
254
- callbacks.onError?.(apiError);
255
- throw error;
391
+ return response;
256
392
  }
257
393
  finally {
258
394
  globalThis.clearTimeout(timeoutId);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@inference-gateway/sdk",
3
- "version": "0.7.1",
3
+ "version": "0.7.3",
4
4
  "description": "An SDK written in Typescript for the [Inference Gateway](https://github.com/inference-gateway/inference-gateway).",
5
5
  "main": "dist/src/index.js",
6
6
  "types": "dist/src/index.d.ts",