@inference-gateway/sdk 0.7.1 → 0.7.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +21 -0
- package/README.md +9 -3
- package/dist/src/client.d.ts +8 -3
- package/dist/src/client.js +249 -113
- package/package.json +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,27 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to this project will be documented in this file.
|
|
4
4
|
|
|
5
|
+
## [0.7.3](https://github.com/inference-gateway/typescript-sdk/compare/v0.7.2...v0.7.3) (2025-06-01)
|
|
6
|
+
|
|
7
|
+
### ♻️ Improvements
|
|
8
|
+
|
|
9
|
+
* Enhance stream processing with abort signal support and increase default timeout ([#18](https://github.com/inference-gateway/typescript-sdk/issues/18)) ([3778138](https://github.com/inference-gateway/typescript-sdk/commit/377813851b6635ca7aafe2a5c9888b720736c9f5))
|
|
10
|
+
|
|
11
|
+
### 🔧 Miscellaneous
|
|
12
|
+
|
|
13
|
+
* Update MCP example README and remove unused example file ([99b34e7](https://github.com/inference-gateway/typescript-sdk/commit/99b34e70edf0c8aada1d0e0d0874481ea8381a79))
|
|
14
|
+
|
|
15
|
+
## [0.7.2](https://github.com/inference-gateway/typescript-sdk/compare/v0.7.1...v0.7.2) (2025-05-30)
|
|
16
|
+
|
|
17
|
+
### 📚 Documentation
|
|
18
|
+
|
|
19
|
+
* Add more examples how to use this SDK ([#15](https://github.com/inference-gateway/typescript-sdk/issues/15)) ([d771356](https://github.com/inference-gateway/typescript-sdk/commit/d771356657279e63a1c4aaac6fe8370a277f08f6))
|
|
20
|
+
|
|
21
|
+
### 🔧 Miscellaneous
|
|
22
|
+
|
|
23
|
+
* Add Docker-in-Docker feature to development container ([177e9f3](https://github.com/inference-gateway/typescript-sdk/commit/177e9f341c7b0fa84d975c754986c75fe98887c9))
|
|
24
|
+
* Remove MCP documentation references and update related instructions ([b33c08f](https://github.com/inference-gateway/typescript-sdk/commit/b33c08f2e1d1a9ae7e0c523f6f1733db86329d90))
|
|
25
|
+
|
|
5
26
|
## [0.7.1](https://github.com/inference-gateway/typescript-sdk/compare/v0.7.0...v0.7.1) (2025-05-27)
|
|
6
27
|
|
|
7
28
|
### 🐛 Bug Fixes
|
package/README.md
CHANGED
|
@@ -7,12 +7,14 @@ An SDK written in TypeScript for the [Inference Gateway](https://github.com/eden
|
|
|
7
7
|
- [Usage](#usage)
|
|
8
8
|
- [Creating a Client](#creating-a-client)
|
|
9
9
|
- [Listing Models](#listing-models)
|
|
10
|
+
- [Listing MCP Tools](#listing-mcp-tools)
|
|
10
11
|
- [Creating Chat Completions](#creating-chat-completions)
|
|
11
12
|
- [Streaming Chat Completions](#streaming-chat-completions)
|
|
12
13
|
- [Tool Calls](#tool-calls)
|
|
13
14
|
- [Proxying Requests](#proxying-requests)
|
|
14
15
|
- [Health Check](#health-check)
|
|
15
16
|
- [Creating a Client with Custom Options](#creating-a-client-with-custom-options)
|
|
17
|
+
- [Examples](#examples)
|
|
16
18
|
- [Contributing](#contributing)
|
|
17
19
|
- [License](#license)
|
|
18
20
|
|
|
@@ -51,7 +53,7 @@ try {
|
|
|
51
53
|
console.log('All models:', models);
|
|
52
54
|
|
|
53
55
|
// List models from a specific provider
|
|
54
|
-
const openaiModels = await client.listModels(Provider.
|
|
56
|
+
const openaiModels = await client.listModels(Provider.openai);
|
|
55
57
|
console.log('OpenAI models:', openaiModels);
|
|
56
58
|
} catch (error) {
|
|
57
59
|
console.error('Error:', error);
|
|
@@ -235,7 +237,7 @@ To proxy requests directly to a provider:
|
|
|
235
237
|
import { InferenceGatewayClient, Provider } from '@inference-gateway/sdk';
|
|
236
238
|
|
|
237
239
|
const client = new InferenceGatewayClient({
|
|
238
|
-
baseURL: 'http://localhost:8080
|
|
240
|
+
baseURL: 'http://localhost:8080',
|
|
239
241
|
});
|
|
240
242
|
|
|
241
243
|
try {
|
|
@@ -261,7 +263,7 @@ To check if the Inference Gateway is running:
|
|
|
261
263
|
import { InferenceGatewayClient } from '@inference-gateway/sdk';
|
|
262
264
|
|
|
263
265
|
const client = new InferenceGatewayClient({
|
|
264
|
-
baseURL: 'http://localhost:8080
|
|
266
|
+
baseURL: 'http://localhost:8080',
|
|
265
267
|
});
|
|
266
268
|
|
|
267
269
|
try {
|
|
@@ -292,6 +294,10 @@ const clientWithHeaders = client.withOptions({
|
|
|
292
294
|
});
|
|
293
295
|
```
|
|
294
296
|
|
|
297
|
+
### Examples
|
|
298
|
+
|
|
299
|
+
For more examples, check the [examples directory](./examples).
|
|
300
|
+
|
|
295
301
|
## Contributing
|
|
296
302
|
|
|
297
303
|
Please refer to the [CONTRIBUTING.md](CONTRIBUTING.md) file for information about how to get involved. We welcome issues, questions, and pull requests.
|
package/dist/src/client.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import type { Provider, SchemaChatCompletionMessageToolCall, SchemaCompletionUsage, SchemaCreateChatCompletionRequest, SchemaCreateChatCompletionResponse, SchemaCreateChatCompletionStreamResponse, SchemaError, SchemaListModelsResponse, SchemaListToolsResponse } from './types/generated';
|
|
2
|
-
interface ChatCompletionStreamCallbacks {
|
|
2
|
+
export interface ChatCompletionStreamCallbacks {
|
|
3
3
|
onOpen?: () => void;
|
|
4
4
|
onChunk?: (chunk: SchemaCreateChatCompletionStreamResponse) => void;
|
|
5
5
|
onReasoning?: (reasoningContent: string) => void;
|
|
@@ -8,6 +8,7 @@ interface ChatCompletionStreamCallbacks {
|
|
|
8
8
|
onUsageMetrics?: (usage: SchemaCompletionUsage) => void;
|
|
9
9
|
onFinish?: (response: SchemaCreateChatCompletionStreamResponse | null) => void;
|
|
10
10
|
onError?: (error: SchemaError) => void;
|
|
11
|
+
onMCPTool?: (toolCall: SchemaChatCompletionMessageToolCall) => void;
|
|
11
12
|
}
|
|
12
13
|
export interface ClientOptions {
|
|
13
14
|
baseURL?: string;
|
|
@@ -53,8 +54,13 @@ export declare class InferenceGatewayClient {
|
|
|
53
54
|
* @param request - Chat completion request (must include at least model and messages)
|
|
54
55
|
* @param callbacks - Callbacks for handling streaming events
|
|
55
56
|
* @param provider - Optional provider to use for this request
|
|
57
|
+
* @param abortSignal - Optional AbortSignal to cancel the request
|
|
56
58
|
*/
|
|
57
|
-
streamChatCompletion(request: Omit<SchemaCreateChatCompletionRequest, 'stream' | 'stream_options'>, callbacks: ChatCompletionStreamCallbacks, provider?: Provider): Promise<void>;
|
|
59
|
+
streamChatCompletion(request: Omit<SchemaCreateChatCompletionRequest, 'stream' | 'stream_options'>, callbacks: ChatCompletionStreamCallbacks, provider?: Provider, abortSignal?: AbortSignal): Promise<void>;
|
|
60
|
+
/**
|
|
61
|
+
* Initiates a streaming request to the chat completions endpoint
|
|
62
|
+
*/
|
|
63
|
+
private initiateStreamingRequest;
|
|
58
64
|
/**
|
|
59
65
|
* Proxy a request to a specific provider.
|
|
60
66
|
*/
|
|
@@ -64,4 +70,3 @@ export declare class InferenceGatewayClient {
|
|
|
64
70
|
*/
|
|
65
71
|
healthCheck(): Promise<boolean>;
|
|
66
72
|
}
|
|
67
|
-
export {};
|
package/dist/src/client.js
CHANGED
|
@@ -2,6 +2,205 @@
|
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.InferenceGatewayClient = void 0;
|
|
4
4
|
const generated_1 = require("./types/generated");
|
|
5
|
+
/**
|
|
6
|
+
* Handles streaming response processing with enhanced support for MCP and tool calls
|
|
7
|
+
*/
|
|
8
|
+
class StreamProcessor {
|
|
9
|
+
callbacks;
|
|
10
|
+
clientProvidedTools;
|
|
11
|
+
incompleteToolCalls = new Map();
|
|
12
|
+
constructor(callbacks, clientProvidedTools) {
|
|
13
|
+
this.callbacks = callbacks;
|
|
14
|
+
this.clientProvidedTools = clientProvidedTools;
|
|
15
|
+
}
|
|
16
|
+
async processStream(body, abortSignal) {
|
|
17
|
+
const reader = body.getReader();
|
|
18
|
+
const decoder = new TextDecoder();
|
|
19
|
+
let buffer = '';
|
|
20
|
+
try {
|
|
21
|
+
while (true) {
|
|
22
|
+
if (abortSignal?.aborted) {
|
|
23
|
+
throw new Error('Stream processing was aborted');
|
|
24
|
+
}
|
|
25
|
+
const { done, value } = await reader.read();
|
|
26
|
+
if (done)
|
|
27
|
+
break;
|
|
28
|
+
buffer += decoder.decode(value, { stream: true });
|
|
29
|
+
const lines = buffer.split('\n');
|
|
30
|
+
buffer = lines.pop() || '';
|
|
31
|
+
for (const line of lines) {
|
|
32
|
+
if (line.startsWith('data: ')) {
|
|
33
|
+
const data = line.slice(5).trim();
|
|
34
|
+
await this.processSSEData(data);
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
catch (error) {
|
|
40
|
+
if (abortSignal?.aborted || error.name === 'AbortError') {
|
|
41
|
+
console.log('Stream processing was cancelled');
|
|
42
|
+
return;
|
|
43
|
+
}
|
|
44
|
+
const apiError = {
|
|
45
|
+
error: error.message || 'Unknown error',
|
|
46
|
+
};
|
|
47
|
+
this.callbacks.onError?.(apiError);
|
|
48
|
+
throw error;
|
|
49
|
+
}
|
|
50
|
+
finally {
|
|
51
|
+
try {
|
|
52
|
+
reader.releaseLock();
|
|
53
|
+
}
|
|
54
|
+
catch {
|
|
55
|
+
// Reader might already be closed, ignore
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
async processSSEData(data) {
|
|
60
|
+
if (data === '[DONE]') {
|
|
61
|
+
this.finalizeIncompleteToolCalls();
|
|
62
|
+
this.callbacks.onFinish?.(null);
|
|
63
|
+
return;
|
|
64
|
+
}
|
|
65
|
+
try {
|
|
66
|
+
const chunk = JSON.parse(data);
|
|
67
|
+
// Handle mid-stream errors from the Inference Gateway
|
|
68
|
+
// When providers fail during streaming, the gateway embeds error info in the stream
|
|
69
|
+
if ('error' in chunk && chunk.error) {
|
|
70
|
+
const apiError = {
|
|
71
|
+
error: typeof chunk.error === 'string'
|
|
72
|
+
? chunk.error
|
|
73
|
+
: JSON.stringify(chunk.error),
|
|
74
|
+
};
|
|
75
|
+
this.callbacks.onError?.(apiError);
|
|
76
|
+
return;
|
|
77
|
+
}
|
|
78
|
+
const validChunk = chunk;
|
|
79
|
+
this.callbacks.onChunk?.(validChunk);
|
|
80
|
+
if (validChunk.usage && this.callbacks.onUsageMetrics) {
|
|
81
|
+
this.callbacks.onUsageMetrics(validChunk.usage);
|
|
82
|
+
}
|
|
83
|
+
const choice = validChunk.choices?.[0];
|
|
84
|
+
if (!choice)
|
|
85
|
+
return;
|
|
86
|
+
this.handleReasoningContent(choice);
|
|
87
|
+
const content = choice.delta?.content;
|
|
88
|
+
if (content) {
|
|
89
|
+
this.callbacks.onContent?.(content);
|
|
90
|
+
}
|
|
91
|
+
this.handleToolCalls(choice);
|
|
92
|
+
this.handleFinishReason(choice);
|
|
93
|
+
}
|
|
94
|
+
catch (parseError) {
|
|
95
|
+
let errorMessage = `Failed to parse SSE data: ${parseError.message}`;
|
|
96
|
+
const errorMatch = data.match(/"error":\s*"([^"]+)"/);
|
|
97
|
+
if (errorMatch) {
|
|
98
|
+
errorMessage = errorMatch[1];
|
|
99
|
+
}
|
|
100
|
+
else {
|
|
101
|
+
const nestedErrorMatch = data.match(/"message":\s*"([^"]+)"/);
|
|
102
|
+
if (nestedErrorMatch) {
|
|
103
|
+
errorMessage = nestedErrorMatch[1];
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
const apiError = {
|
|
107
|
+
error: errorMessage,
|
|
108
|
+
};
|
|
109
|
+
this.callbacks.onError?.(apiError);
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
handleReasoningContent(choice) {
|
|
113
|
+
const reasoningContent = choice.delta?.reasoning_content;
|
|
114
|
+
if (reasoningContent !== undefined) {
|
|
115
|
+
this.callbacks.onReasoning?.(reasoningContent);
|
|
116
|
+
}
|
|
117
|
+
const reasoning = choice.delta?.reasoning;
|
|
118
|
+
if (reasoning !== undefined) {
|
|
119
|
+
this.callbacks.onReasoning?.(reasoning);
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
handleToolCalls(choice) {
|
|
123
|
+
const toolCalls = choice.delta?.tool_calls;
|
|
124
|
+
if (!toolCalls || toolCalls.length === 0)
|
|
125
|
+
return;
|
|
126
|
+
for (const toolCallChunk of toolCalls) {
|
|
127
|
+
const index = toolCallChunk.index;
|
|
128
|
+
if (!this.incompleteToolCalls.has(index)) {
|
|
129
|
+
this.incompleteToolCalls.set(index, {
|
|
130
|
+
id: toolCallChunk.id || '',
|
|
131
|
+
type: generated_1.ChatCompletionToolType.function,
|
|
132
|
+
function: {
|
|
133
|
+
name: toolCallChunk.function?.name || '',
|
|
134
|
+
arguments: toolCallChunk.function?.arguments || '',
|
|
135
|
+
},
|
|
136
|
+
});
|
|
137
|
+
}
|
|
138
|
+
else {
|
|
139
|
+
const existingToolCall = this.incompleteToolCalls.get(index);
|
|
140
|
+
if (toolCallChunk.id) {
|
|
141
|
+
existingToolCall.id = toolCallChunk.id;
|
|
142
|
+
}
|
|
143
|
+
if (toolCallChunk.function?.name) {
|
|
144
|
+
existingToolCall.function.name = toolCallChunk.function.name;
|
|
145
|
+
}
|
|
146
|
+
if (toolCallChunk.function?.arguments) {
|
|
147
|
+
existingToolCall.function.arguments +=
|
|
148
|
+
toolCallChunk.function.arguments;
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
handleFinishReason(choice) {
|
|
154
|
+
const finishReason = choice.finish_reason;
|
|
155
|
+
if (finishReason === 'tool_calls' && this.incompleteToolCalls.size > 0) {
|
|
156
|
+
this.finalizeIncompleteToolCalls();
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
finalizeIncompleteToolCalls() {
|
|
160
|
+
this.incompleteToolCalls.forEach((toolCall) => {
|
|
161
|
+
if (!toolCall.id || !toolCall.function.name) {
|
|
162
|
+
globalThis.console.warn('Incomplete tool call detected:', toolCall);
|
|
163
|
+
return;
|
|
164
|
+
}
|
|
165
|
+
const completedToolCall = {
|
|
166
|
+
id: toolCall.id,
|
|
167
|
+
type: toolCall.type,
|
|
168
|
+
function: {
|
|
169
|
+
name: toolCall.function.name,
|
|
170
|
+
arguments: toolCall.function.arguments,
|
|
171
|
+
},
|
|
172
|
+
};
|
|
173
|
+
if (this.isMCPTool(toolCall.function.name)) {
|
|
174
|
+
try {
|
|
175
|
+
if (toolCall.function.arguments) {
|
|
176
|
+
JSON.parse(toolCall.function.arguments);
|
|
177
|
+
}
|
|
178
|
+
this.callbacks.onMCPTool?.(completedToolCall);
|
|
179
|
+
}
|
|
180
|
+
catch (argError) {
|
|
181
|
+
const isIncompleteJSON = toolCall.function.arguments &&
|
|
182
|
+
!toolCall.function.arguments.trim().endsWith('}');
|
|
183
|
+
if (isIncompleteJSON) {
|
|
184
|
+
globalThis.console.warn(`Incomplete MCP tool arguments for ${toolCall.function.name} (stream was likely interrupted):`, toolCall.function.arguments);
|
|
185
|
+
}
|
|
186
|
+
else {
|
|
187
|
+
globalThis.console.warn(`Invalid MCP tool arguments for ${toolCall.function.name}:`, argError);
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
else {
|
|
192
|
+
this.callbacks.onTool?.(completedToolCall);
|
|
193
|
+
}
|
|
194
|
+
});
|
|
195
|
+
this.incompleteToolCalls.clear();
|
|
196
|
+
}
|
|
197
|
+
isMCPTool(toolName) {
|
|
198
|
+
if (!toolName || typeof toolName !== 'string') {
|
|
199
|
+
return false;
|
|
200
|
+
}
|
|
201
|
+
return !this.clientProvidedTools.has(toolName);
|
|
202
|
+
}
|
|
203
|
+
}
|
|
5
204
|
class InferenceGatewayClient {
|
|
6
205
|
baseURL;
|
|
7
206
|
apiKey;
|
|
@@ -14,7 +213,7 @@ class InferenceGatewayClient {
|
|
|
14
213
|
this.apiKey = options.apiKey;
|
|
15
214
|
this.defaultHeaders = options.defaultHeaders || {};
|
|
16
215
|
this.defaultQuery = options.defaultQuery || {};
|
|
17
|
-
this.timeout = options.timeout ||
|
|
216
|
+
this.timeout = options.timeout || 60000; // Increased default timeout to 60 seconds
|
|
18
217
|
this.fetchFn = options.fetch || globalThis.fetch;
|
|
19
218
|
}
|
|
20
219
|
/**
|
|
@@ -106,8 +305,43 @@ class InferenceGatewayClient {
|
|
|
106
305
|
* @param request - Chat completion request (must include at least model and messages)
|
|
107
306
|
* @param callbacks - Callbacks for handling streaming events
|
|
108
307
|
* @param provider - Optional provider to use for this request
|
|
308
|
+
* @param abortSignal - Optional AbortSignal to cancel the request
|
|
109
309
|
*/
|
|
110
|
-
async streamChatCompletion(request, callbacks, provider) {
|
|
310
|
+
async streamChatCompletion(request, callbacks, provider, abortSignal) {
|
|
311
|
+
try {
|
|
312
|
+
const response = await this.initiateStreamingRequest(request, provider, abortSignal);
|
|
313
|
+
if (!response.body) {
|
|
314
|
+
const error = {
|
|
315
|
+
error: 'Response body is not readable',
|
|
316
|
+
};
|
|
317
|
+
callbacks.onError?.(error);
|
|
318
|
+
throw new Error('Response body is not readable');
|
|
319
|
+
}
|
|
320
|
+
callbacks.onOpen?.();
|
|
321
|
+
// Extract tool names from client-provided tools
|
|
322
|
+
const clientProvidedTools = new Set();
|
|
323
|
+
if (request.tools) {
|
|
324
|
+
for (const tool of request.tools) {
|
|
325
|
+
if (tool.type === 'function' && tool.function?.name) {
|
|
326
|
+
clientProvidedTools.add(tool.function.name);
|
|
327
|
+
}
|
|
328
|
+
}
|
|
329
|
+
}
|
|
330
|
+
const streamProcessor = new StreamProcessor(callbacks, clientProvidedTools);
|
|
331
|
+
await streamProcessor.processStream(response.body, abortSignal);
|
|
332
|
+
}
|
|
333
|
+
catch (error) {
|
|
334
|
+
const apiError = {
|
|
335
|
+
error: error.message || 'Unknown error occurred',
|
|
336
|
+
};
|
|
337
|
+
callbacks.onError?.(apiError);
|
|
338
|
+
throw error;
|
|
339
|
+
}
|
|
340
|
+
}
|
|
341
|
+
/**
|
|
342
|
+
* Initiates a streaming request to the chat completions endpoint
|
|
343
|
+
*/
|
|
344
|
+
async initiateStreamingRequest(request, provider, abortSignal) {
|
|
111
345
|
const query = {};
|
|
112
346
|
if (provider) {
|
|
113
347
|
query.provider = provider;
|
|
@@ -126,6 +360,9 @@ class InferenceGatewayClient {
|
|
|
126
360
|
headers.set('Authorization', `Bearer ${this.apiKey}`);
|
|
127
361
|
}
|
|
128
362
|
const controller = new AbortController();
|
|
363
|
+
const combinedSignal = abortSignal
|
|
364
|
+
? AbortSignal.any([abortSignal, controller.signal])
|
|
365
|
+
: controller.signal;
|
|
129
366
|
const timeoutId = globalThis.setTimeout(() => controller.abort(), this.timeout);
|
|
130
367
|
try {
|
|
131
368
|
const response = await this.fetchFn(url, {
|
|
@@ -138,121 +375,20 @@ class InferenceGatewayClient {
|
|
|
138
375
|
include_usage: true,
|
|
139
376
|
},
|
|
140
377
|
}),
|
|
141
|
-
signal:
|
|
378
|
+
signal: combinedSignal,
|
|
142
379
|
});
|
|
143
380
|
if (!response.ok) {
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
const reader = response.body.getReader();
|
|
152
|
-
const decoder = new TextDecoder();
|
|
153
|
-
let buffer = '';
|
|
154
|
-
const incompleteToolCalls = new Map();
|
|
155
|
-
while (true) {
|
|
156
|
-
const { done, value } = await reader.read();
|
|
157
|
-
if (done)
|
|
158
|
-
break;
|
|
159
|
-
buffer += decoder.decode(value, { stream: true });
|
|
160
|
-
const lines = buffer.split('\n');
|
|
161
|
-
buffer = lines.pop() || '';
|
|
162
|
-
for (const line of lines) {
|
|
163
|
-
if (line.startsWith('data: ')) {
|
|
164
|
-
const data = line.slice(5).trim();
|
|
165
|
-
if (data === '[DONE]') {
|
|
166
|
-
for (const [, toolCall] of incompleteToolCalls.entries()) {
|
|
167
|
-
callbacks.onTool?.({
|
|
168
|
-
id: toolCall.id,
|
|
169
|
-
type: toolCall.type,
|
|
170
|
-
function: {
|
|
171
|
-
name: toolCall.function.name,
|
|
172
|
-
arguments: toolCall.function.arguments,
|
|
173
|
-
},
|
|
174
|
-
});
|
|
175
|
-
}
|
|
176
|
-
callbacks.onFinish?.(null);
|
|
177
|
-
return;
|
|
178
|
-
}
|
|
179
|
-
try {
|
|
180
|
-
const chunk = JSON.parse(data);
|
|
181
|
-
callbacks.onChunk?.(chunk);
|
|
182
|
-
if (chunk.usage && callbacks.onUsageMetrics) {
|
|
183
|
-
callbacks.onUsageMetrics(chunk.usage);
|
|
184
|
-
}
|
|
185
|
-
const reasoning_content = chunk.choices[0]?.delta?.reasoning_content;
|
|
186
|
-
if (reasoning_content !== undefined) {
|
|
187
|
-
callbacks.onReasoning?.(reasoning_content);
|
|
188
|
-
}
|
|
189
|
-
const reasoning = chunk.choices[0]?.delta?.reasoning;
|
|
190
|
-
if (reasoning !== undefined) {
|
|
191
|
-
callbacks.onReasoning?.(reasoning);
|
|
192
|
-
}
|
|
193
|
-
const content = chunk.choices[0]?.delta?.content;
|
|
194
|
-
if (content) {
|
|
195
|
-
callbacks.onContent?.(content);
|
|
196
|
-
}
|
|
197
|
-
const toolCalls = chunk.choices[0]?.delta?.tool_calls;
|
|
198
|
-
if (toolCalls && toolCalls.length > 0) {
|
|
199
|
-
for (const toolCallChunk of toolCalls) {
|
|
200
|
-
const index = toolCallChunk.index;
|
|
201
|
-
if (!incompleteToolCalls.has(index)) {
|
|
202
|
-
incompleteToolCalls.set(index, {
|
|
203
|
-
id: toolCallChunk.id || '',
|
|
204
|
-
type: generated_1.ChatCompletionToolType.function,
|
|
205
|
-
function: {
|
|
206
|
-
name: toolCallChunk.function?.name || '',
|
|
207
|
-
arguments: toolCallChunk.function?.arguments || '',
|
|
208
|
-
},
|
|
209
|
-
});
|
|
210
|
-
}
|
|
211
|
-
else {
|
|
212
|
-
const existingToolCall = incompleteToolCalls.get(index);
|
|
213
|
-
if (toolCallChunk.id) {
|
|
214
|
-
existingToolCall.id = toolCallChunk.id;
|
|
215
|
-
}
|
|
216
|
-
if (toolCallChunk.function?.name) {
|
|
217
|
-
existingToolCall.function.name =
|
|
218
|
-
toolCallChunk.function.name;
|
|
219
|
-
}
|
|
220
|
-
if (toolCallChunk.function?.arguments) {
|
|
221
|
-
existingToolCall.function.arguments +=
|
|
222
|
-
toolCallChunk.function.arguments;
|
|
223
|
-
}
|
|
224
|
-
}
|
|
225
|
-
}
|
|
226
|
-
}
|
|
227
|
-
const finishReason = chunk.choices[0]?.finish_reason;
|
|
228
|
-
if (finishReason === 'tool_calls' &&
|
|
229
|
-
incompleteToolCalls.size > 0) {
|
|
230
|
-
for (const [, toolCall] of incompleteToolCalls.entries()) {
|
|
231
|
-
callbacks.onTool?.({
|
|
232
|
-
id: toolCall.id,
|
|
233
|
-
type: toolCall.type,
|
|
234
|
-
function: {
|
|
235
|
-
name: toolCall.function.name,
|
|
236
|
-
arguments: toolCall.function.arguments,
|
|
237
|
-
},
|
|
238
|
-
});
|
|
239
|
-
}
|
|
240
|
-
incompleteToolCalls.clear();
|
|
241
|
-
}
|
|
242
|
-
}
|
|
243
|
-
catch (e) {
|
|
244
|
-
globalThis.console.error('Error parsing SSE data:', e);
|
|
245
|
-
}
|
|
246
|
-
}
|
|
381
|
+
let errorMessage = `HTTP error! status: ${response.status}`;
|
|
382
|
+
try {
|
|
383
|
+
const error = await response.json();
|
|
384
|
+
errorMessage = error.error || errorMessage;
|
|
385
|
+
}
|
|
386
|
+
catch {
|
|
387
|
+
// Failed to parse error response as JSON, use status message
|
|
247
388
|
}
|
|
389
|
+
throw new Error(errorMessage);
|
|
248
390
|
}
|
|
249
|
-
|
|
250
|
-
catch (error) {
|
|
251
|
-
const apiError = {
|
|
252
|
-
error: error.message || 'Unknown error',
|
|
253
|
-
};
|
|
254
|
-
callbacks.onError?.(apiError);
|
|
255
|
-
throw error;
|
|
391
|
+
return response;
|
|
256
392
|
}
|
|
257
393
|
finally {
|
|
258
394
|
globalThis.clearTimeout(timeoutId);
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@inference-gateway/sdk",
|
|
3
|
-
"version": "0.7.
|
|
3
|
+
"version": "0.7.3",
|
|
4
4
|
"description": "An SDK written in Typescript for the [Inference Gateway](https://github.com/inference-gateway/inference-gateway).",
|
|
5
5
|
"main": "dist/src/index.js",
|
|
6
6
|
"types": "dist/src/index.d.ts",
|