@inference-gateway/sdk 0.7.1 → 0.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,17 @@
2
2
 
3
3
  All notable changes to this project will be documented in this file.
4
4
 
5
+ ## [0.7.2](https://github.com/inference-gateway/typescript-sdk/compare/v0.7.1...v0.7.2) (2025-05-30)
6
+
7
+ ### 📚 Documentation
8
+
9
+ * Add more examples how to use this SDK ([#15](https://github.com/inference-gateway/typescript-sdk/issues/15)) ([d771356](https://github.com/inference-gateway/typescript-sdk/commit/d771356657279e63a1c4aaac6fe8370a277f08f6))
10
+
11
+ ### 🔧 Miscellaneous
12
+
13
+ * Add Docker-in-Docker feature to development container ([177e9f3](https://github.com/inference-gateway/typescript-sdk/commit/177e9f341c7b0fa84d975c754986c75fe98887c9))
14
+ * Remove MCP documentation references and update related instructions ([b33c08f](https://github.com/inference-gateway/typescript-sdk/commit/b33c08f2e1d1a9ae7e0c523f6f1733db86329d90))
15
+
5
16
  ## [0.7.1](https://github.com/inference-gateway/typescript-sdk/compare/v0.7.0...v0.7.1) (2025-05-27)
6
17
 
7
18
  ### 🐛 Bug Fixes
package/README.md CHANGED
@@ -7,12 +7,14 @@ An SDK written in TypeScript for the [Inference Gateway](https://github.com/eden
7
7
  - [Usage](#usage)
8
8
  - [Creating a Client](#creating-a-client)
9
9
  - [Listing Models](#listing-models)
10
+ - [Listing MCP Tools](#listing-mcp-tools)
10
11
  - [Creating Chat Completions](#creating-chat-completions)
11
12
  - [Streaming Chat Completions](#streaming-chat-completions)
12
13
  - [Tool Calls](#tool-calls)
13
14
  - [Proxying Requests](#proxying-requests)
14
15
  - [Health Check](#health-check)
15
16
  - [Creating a Client with Custom Options](#creating-a-client-with-custom-options)
17
+ - [Examples](#examples)
16
18
  - [Contributing](#contributing)
17
19
  - [License](#license)
18
20
 
@@ -51,7 +53,7 @@ try {
51
53
  console.log('All models:', models);
52
54
 
53
55
  // List models from a specific provider
54
- const openaiModels = await client.listModels(Provider.OpenAI);
56
+ const openaiModels = await client.listModels(Provider.openai);
55
57
  console.log('OpenAI models:', openaiModels);
56
58
  } catch (error) {
57
59
  console.error('Error:', error);
@@ -235,7 +237,7 @@ To proxy requests directly to a provider:
235
237
  import { InferenceGatewayClient, Provider } from '@inference-gateway/sdk';
236
238
 
237
239
  const client = new InferenceGatewayClient({
238
- baseURL: 'http://localhost:8080/v1',
240
+ baseURL: 'http://localhost:8080',
239
241
  });
240
242
 
241
243
  try {
@@ -261,7 +263,7 @@ To check if the Inference Gateway is running:
261
263
  import { InferenceGatewayClient } from '@inference-gateway/sdk';
262
264
 
263
265
  const client = new InferenceGatewayClient({
264
- baseURL: 'http://localhost:8080/v1',
266
+ baseURL: 'http://localhost:8080',
265
267
  });
266
268
 
267
269
  try {
@@ -292,6 +294,10 @@ const clientWithHeaders = client.withOptions({
292
294
  });
293
295
  ```
294
296
 
297
+ ### Examples
298
+
299
+ For more examples, check the [examples directory](./examples).
300
+
295
301
  ## Contributing
296
302
 
297
303
  Please refer to the [CONTRIBUTING.md](CONTRIBUTING.md) file for information about how to get involved. We welcome issues, questions, and pull requests.
@@ -1,5 +1,5 @@
1
1
  import type { Provider, SchemaChatCompletionMessageToolCall, SchemaCompletionUsage, SchemaCreateChatCompletionRequest, SchemaCreateChatCompletionResponse, SchemaCreateChatCompletionStreamResponse, SchemaError, SchemaListModelsResponse, SchemaListToolsResponse } from './types/generated';
2
- interface ChatCompletionStreamCallbacks {
2
+ export interface ChatCompletionStreamCallbacks {
3
3
  onOpen?: () => void;
4
4
  onChunk?: (chunk: SchemaCreateChatCompletionStreamResponse) => void;
5
5
  onReasoning?: (reasoningContent: string) => void;
@@ -8,6 +8,7 @@ interface ChatCompletionStreamCallbacks {
8
8
  onUsageMetrics?: (usage: SchemaCompletionUsage) => void;
9
9
  onFinish?: (response: SchemaCreateChatCompletionStreamResponse | null) => void;
10
10
  onError?: (error: SchemaError) => void;
11
+ onMCPTool?: (toolCall: SchemaChatCompletionMessageToolCall) => void;
11
12
  }
12
13
  export interface ClientOptions {
13
14
  baseURL?: string;
@@ -55,6 +56,10 @@ export declare class InferenceGatewayClient {
55
56
  * @param provider - Optional provider to use for this request
56
57
  */
57
58
  streamChatCompletion(request: Omit<SchemaCreateChatCompletionRequest, 'stream' | 'stream_options'>, callbacks: ChatCompletionStreamCallbacks, provider?: Provider): Promise<void>;
59
+ /**
60
+ * Initiates a streaming request to the chat completions endpoint
61
+ */
62
+ private initiateStreamingRequest;
58
63
  /**
59
64
  * Proxy a request to a specific provider.
60
65
  */
@@ -64,4 +69,3 @@ export declare class InferenceGatewayClient {
64
69
  */
65
70
  healthCheck(): Promise<boolean>;
66
71
  }
67
- export {};
@@ -2,6 +2,191 @@
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.InferenceGatewayClient = void 0;
4
4
  const generated_1 = require("./types/generated");
5
+ /**
6
+ * Handles streaming response processing with enhanced support for MCP and tool calls
7
+ */
8
+ class StreamProcessor {
9
+ callbacks;
10
+ clientProvidedTools;
11
+ incompleteToolCalls = new Map();
12
+ constructor(callbacks, clientProvidedTools) {
13
+ this.callbacks = callbacks;
14
+ this.clientProvidedTools = clientProvidedTools;
15
+ }
16
+ async processStream(body) {
17
+ const reader = body.getReader();
18
+ const decoder = new TextDecoder();
19
+ let buffer = '';
20
+ try {
21
+ while (true) {
22
+ const { done, value } = await reader.read();
23
+ if (done)
24
+ break;
25
+ buffer += decoder.decode(value, { stream: true });
26
+ const lines = buffer.split('\n');
27
+ buffer = lines.pop() || '';
28
+ for (const line of lines) {
29
+ if (line.startsWith('data: ')) {
30
+ const data = line.slice(5).trim();
31
+ await this.processSSEData(data);
32
+ }
33
+ }
34
+ }
35
+ }
36
+ catch (error) {
37
+ const apiError = {
38
+ error: error.message || 'Unknown error',
39
+ };
40
+ this.callbacks.onError?.(apiError);
41
+ throw error;
42
+ }
43
+ finally {
44
+ try {
45
+ reader.releaseLock();
46
+ }
47
+ catch {
48
+ // Reader might already be closed, ignore
49
+ }
50
+ }
51
+ }
52
+ async processSSEData(data) {
53
+ if (data === '[DONE]') {
54
+ this.finalizeIncompleteToolCalls();
55
+ this.callbacks.onFinish?.(null);
56
+ return;
57
+ }
58
+ try {
59
+ const chunk = JSON.parse(data);
60
+ // Handle mid-stream errors from the Inference Gateway
61
+ // When providers fail during streaming, the gateway embeds error info in the stream
62
+ if ('error' in chunk && chunk.error) {
63
+ const apiError = {
64
+ error: typeof chunk.error === 'string'
65
+ ? chunk.error
66
+ : JSON.stringify(chunk.error),
67
+ };
68
+ this.callbacks.onError?.(apiError);
69
+ return;
70
+ }
71
+ const validChunk = chunk;
72
+ this.callbacks.onChunk?.(validChunk);
73
+ if (validChunk.usage && this.callbacks.onUsageMetrics) {
74
+ this.callbacks.onUsageMetrics(validChunk.usage);
75
+ }
76
+ const choice = validChunk.choices?.[0];
77
+ if (!choice)
78
+ return;
79
+ this.handleReasoningContent(choice);
80
+ const content = choice.delta?.content;
81
+ if (content) {
82
+ this.callbacks.onContent?.(content);
83
+ }
84
+ this.handleToolCalls(choice);
85
+ this.handleFinishReason(choice);
86
+ }
87
+ catch (parseError) {
88
+ let errorMessage = `Failed to parse SSE data: ${parseError.message}`;
89
+ const errorMatch = data.match(/"error":\s*"([^"]+)"/);
90
+ if (errorMatch) {
91
+ errorMessage = errorMatch[1];
92
+ }
93
+ else {
94
+ const nestedErrorMatch = data.match(/"message":\s*"([^"]+)"/);
95
+ if (nestedErrorMatch) {
96
+ errorMessage = nestedErrorMatch[1];
97
+ }
98
+ }
99
+ const apiError = {
100
+ error: errorMessage,
101
+ };
102
+ this.callbacks.onError?.(apiError);
103
+ }
104
+ }
105
+ handleReasoningContent(choice) {
106
+ const reasoningContent = choice.delta?.reasoning_content;
107
+ if (reasoningContent !== undefined) {
108
+ this.callbacks.onReasoning?.(reasoningContent);
109
+ }
110
+ const reasoning = choice.delta?.reasoning;
111
+ if (reasoning !== undefined) {
112
+ this.callbacks.onReasoning?.(reasoning);
113
+ }
114
+ }
115
+ handleToolCalls(choice) {
116
+ const toolCalls = choice.delta?.tool_calls;
117
+ if (!toolCalls || toolCalls.length === 0)
118
+ return;
119
+ for (const toolCallChunk of toolCalls) {
120
+ const index = toolCallChunk.index;
121
+ if (!this.incompleteToolCalls.has(index)) {
122
+ this.incompleteToolCalls.set(index, {
123
+ id: toolCallChunk.id || '',
124
+ type: generated_1.ChatCompletionToolType.function,
125
+ function: {
126
+ name: toolCallChunk.function?.name || '',
127
+ arguments: toolCallChunk.function?.arguments || '',
128
+ },
129
+ });
130
+ }
131
+ else {
132
+ const existingToolCall = this.incompleteToolCalls.get(index);
133
+ if (toolCallChunk.id) {
134
+ existingToolCall.id = toolCallChunk.id;
135
+ }
136
+ if (toolCallChunk.function?.name) {
137
+ existingToolCall.function.name = toolCallChunk.function.name;
138
+ }
139
+ if (toolCallChunk.function?.arguments) {
140
+ existingToolCall.function.arguments +=
141
+ toolCallChunk.function.arguments;
142
+ }
143
+ }
144
+ }
145
+ }
146
+ handleFinishReason(choice) {
147
+ const finishReason = choice.finish_reason;
148
+ if (finishReason === 'tool_calls' && this.incompleteToolCalls.size > 0) {
149
+ this.finalizeIncompleteToolCalls();
150
+ }
151
+ }
152
+ finalizeIncompleteToolCalls() {
153
+ for (const [, toolCall] of this.incompleteToolCalls.entries()) {
154
+ if (!toolCall.id || !toolCall.function.name) {
155
+ globalThis.console.warn('Incomplete tool call detected:', toolCall);
156
+ continue;
157
+ }
158
+ const completedToolCall = {
159
+ id: toolCall.id,
160
+ type: toolCall.type,
161
+ function: {
162
+ name: toolCall.function.name,
163
+ arguments: toolCall.function.arguments,
164
+ },
165
+ };
166
+ if (this.isMCPTool(toolCall.function.name)) {
167
+ try {
168
+ if (toolCall.function.arguments) {
169
+ JSON.parse(toolCall.function.arguments);
170
+ }
171
+ this.callbacks.onMCPTool?.(completedToolCall);
172
+ }
173
+ catch (argError) {
174
+ globalThis.console.warn(`Invalid MCP tool arguments for ${toolCall.function.name}:`, argError);
175
+ }
176
+ }
177
+ else {
178
+ this.callbacks.onTool?.(completedToolCall);
179
+ }
180
+ }
181
+ this.incompleteToolCalls.clear();
182
+ }
183
+ isMCPTool(toolName) {
184
+ if (!toolName || typeof toolName !== 'string') {
185
+ return false;
186
+ }
187
+ return !this.clientProvidedTools.has(toolName);
188
+ }
189
+ }
5
190
  class InferenceGatewayClient {
6
191
  baseURL;
7
192
  apiKey;
@@ -108,6 +293,40 @@ class InferenceGatewayClient {
108
293
  * @param provider - Optional provider to use for this request
109
294
  */
110
295
  async streamChatCompletion(request, callbacks, provider) {
296
+ try {
297
+ const response = await this.initiateStreamingRequest(request, provider);
298
+ if (!response.body) {
299
+ const error = {
300
+ error: 'Response body is not readable',
301
+ };
302
+ callbacks.onError?.(error);
303
+ throw new Error('Response body is not readable');
304
+ }
305
+ callbacks.onOpen?.();
306
+ // Extract tool names from client-provided tools
307
+ const clientProvidedTools = new Set();
308
+ if (request.tools) {
309
+ for (const tool of request.tools) {
310
+ if (tool.type === 'function' && tool.function?.name) {
311
+ clientProvidedTools.add(tool.function.name);
312
+ }
313
+ }
314
+ }
315
+ const streamProcessor = new StreamProcessor(callbacks, clientProvidedTools);
316
+ await streamProcessor.processStream(response.body);
317
+ }
318
+ catch (error) {
319
+ const apiError = {
320
+ error: error.message || 'Unknown error occurred',
321
+ };
322
+ callbacks.onError?.(apiError);
323
+ throw error;
324
+ }
325
+ }
326
+ /**
327
+ * Initiates a streaming request to the chat completions endpoint
328
+ */
329
+ async initiateStreamingRequest(request, provider) {
111
330
  const query = {};
112
331
  if (provider) {
113
332
  query.provider = provider;
@@ -141,118 +360,17 @@ class InferenceGatewayClient {
141
360
  signal: controller.signal,
142
361
  });
143
362
  if (!response.ok) {
144
- const error = await response.json();
145
- throw new Error(error.error || `HTTP error! status: ${response.status}`);
146
- }
147
- if (!response.body) {
148
- throw new Error('Response body is not readable');
149
- }
150
- callbacks.onOpen?.();
151
- const reader = response.body.getReader();
152
- const decoder = new TextDecoder();
153
- let buffer = '';
154
- const incompleteToolCalls = new Map();
155
- while (true) {
156
- const { done, value } = await reader.read();
157
- if (done)
158
- break;
159
- buffer += decoder.decode(value, { stream: true });
160
- const lines = buffer.split('\n');
161
- buffer = lines.pop() || '';
162
- for (const line of lines) {
163
- if (line.startsWith('data: ')) {
164
- const data = line.slice(5).trim();
165
- if (data === '[DONE]') {
166
- for (const [, toolCall] of incompleteToolCalls.entries()) {
167
- callbacks.onTool?.({
168
- id: toolCall.id,
169
- type: toolCall.type,
170
- function: {
171
- name: toolCall.function.name,
172
- arguments: toolCall.function.arguments,
173
- },
174
- });
175
- }
176
- callbacks.onFinish?.(null);
177
- return;
178
- }
179
- try {
180
- const chunk = JSON.parse(data);
181
- callbacks.onChunk?.(chunk);
182
- if (chunk.usage && callbacks.onUsageMetrics) {
183
- callbacks.onUsageMetrics(chunk.usage);
184
- }
185
- const reasoning_content = chunk.choices[0]?.delta?.reasoning_content;
186
- if (reasoning_content !== undefined) {
187
- callbacks.onReasoning?.(reasoning_content);
188
- }
189
- const reasoning = chunk.choices[0]?.delta?.reasoning;
190
- if (reasoning !== undefined) {
191
- callbacks.onReasoning?.(reasoning);
192
- }
193
- const content = chunk.choices[0]?.delta?.content;
194
- if (content) {
195
- callbacks.onContent?.(content);
196
- }
197
- const toolCalls = chunk.choices[0]?.delta?.tool_calls;
198
- if (toolCalls && toolCalls.length > 0) {
199
- for (const toolCallChunk of toolCalls) {
200
- const index = toolCallChunk.index;
201
- if (!incompleteToolCalls.has(index)) {
202
- incompleteToolCalls.set(index, {
203
- id: toolCallChunk.id || '',
204
- type: generated_1.ChatCompletionToolType.function,
205
- function: {
206
- name: toolCallChunk.function?.name || '',
207
- arguments: toolCallChunk.function?.arguments || '',
208
- },
209
- });
210
- }
211
- else {
212
- const existingToolCall = incompleteToolCalls.get(index);
213
- if (toolCallChunk.id) {
214
- existingToolCall.id = toolCallChunk.id;
215
- }
216
- if (toolCallChunk.function?.name) {
217
- existingToolCall.function.name =
218
- toolCallChunk.function.name;
219
- }
220
- if (toolCallChunk.function?.arguments) {
221
- existingToolCall.function.arguments +=
222
- toolCallChunk.function.arguments;
223
- }
224
- }
225
- }
226
- }
227
- const finishReason = chunk.choices[0]?.finish_reason;
228
- if (finishReason === 'tool_calls' &&
229
- incompleteToolCalls.size > 0) {
230
- for (const [, toolCall] of incompleteToolCalls.entries()) {
231
- callbacks.onTool?.({
232
- id: toolCall.id,
233
- type: toolCall.type,
234
- function: {
235
- name: toolCall.function.name,
236
- arguments: toolCall.function.arguments,
237
- },
238
- });
239
- }
240
- incompleteToolCalls.clear();
241
- }
242
- }
243
- catch (e) {
244
- globalThis.console.error('Error parsing SSE data:', e);
245
- }
246
- }
363
+ let errorMessage = `HTTP error! status: ${response.status}`;
364
+ try {
365
+ const error = await response.json();
366
+ errorMessage = error.error || errorMessage;
367
+ }
368
+ catch {
369
+ // Failed to parse error response as JSON, use status message
247
370
  }
371
+ throw new Error(errorMessage);
248
372
  }
249
- }
250
- catch (error) {
251
- const apiError = {
252
- error: error.message || 'Unknown error',
253
- };
254
- callbacks.onError?.(apiError);
255
- throw error;
373
+ return response;
256
374
  }
257
375
  finally {
258
376
  globalThis.clearTimeout(timeoutId);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@inference-gateway/sdk",
3
- "version": "0.7.1",
3
+ "version": "0.7.2",
4
4
  "description": "An SDK written in Typescript for the [Inference Gateway](https://github.com/inference-gateway/inference-gateway).",
5
5
  "main": "dist/src/index.js",
6
6
  "types": "dist/src/index.d.ts",