@inference-gateway/sdk 0.6.1 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/README.md +26 -0
- package/dist/src/client.d.ts +6 -1
- package/dist/src/client.js +13 -0
- package/dist/src/types/generated/index.d.ts +130 -15
- package/dist/tests/client.test.js +206 -0
- package/package.json +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,18 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to this project will be documented in this file.
|
|
4
4
|
|
|
5
|
+
## [0.7.0](https://github.com/inference-gateway/typescript-sdk/compare/v0.6.2...v0.7.0) (2025-05-26)
|
|
6
|
+
|
|
7
|
+
### ✨ Features
|
|
8
|
+
|
|
9
|
+
* Implement MCP List Tools ([#13](https://github.com/inference-gateway/typescript-sdk/issues/13)) ([5c0a38c](https://github.com/inference-gateway/typescript-sdk/commit/5c0a38cbe825161c9d5dc1e15f59b31217aebb23))
|
|
10
|
+
|
|
11
|
+
## [0.6.2](https://github.com/inference-gateway/typescript-sdk/compare/v0.6.1...v0.6.2) (2025-04-30)
|
|
12
|
+
|
|
13
|
+
### ♻️ Improvements
|
|
14
|
+
|
|
15
|
+
* Process also groq reasoning models properly ([#12](https://github.com/inference-gateway/typescript-sdk/issues/12)) ([51ce3bb](https://github.com/inference-gateway/typescript-sdk/commit/51ce3bbbbdf03947bb7928e8edc413b977ea092a))
|
|
16
|
+
|
|
5
17
|
## [0.6.1](https://github.com/inference-gateway/typescript-sdk/compare/v0.6.0...v0.6.1) (2025-04-28)
|
|
6
18
|
|
|
7
19
|
### ♻️ Improvements
|
package/README.md
CHANGED
|
@@ -58,6 +58,32 @@ try {
|
|
|
58
58
|
}
|
|
59
59
|
```
|
|
60
60
|
|
|
61
|
+
### Listing MCP Tools
|
|
62
|
+
|
|
63
|
+
To list available Model Context Protocol (MCP) tools (only available when EXPOSE_MCP is enabled):
|
|
64
|
+
|
|
65
|
+
```typescript
|
|
66
|
+
import { InferenceGatewayClient } from '@inference-gateway/sdk';
|
|
67
|
+
|
|
68
|
+
const client = new InferenceGatewayClient({
|
|
69
|
+
baseURL: 'http://localhost:8080/v1',
|
|
70
|
+
});
|
|
71
|
+
|
|
72
|
+
try {
|
|
73
|
+
const tools = await client.listTools();
|
|
74
|
+
console.log('Available MCP tools:', tools.data);
|
|
75
|
+
|
|
76
|
+
// Each tool has: name, description, server, and optional input_schema
|
|
77
|
+
tools.data.forEach((tool) => {
|
|
78
|
+
console.log(`Tool: ${tool.name}`);
|
|
79
|
+
console.log(`Description: ${tool.description}`);
|
|
80
|
+
console.log(`Server: ${tool.server}`);
|
|
81
|
+
});
|
|
82
|
+
} catch (error) {
|
|
83
|
+
console.error('Error:', error);
|
|
84
|
+
}
|
|
85
|
+
```
|
|
86
|
+
|
|
61
87
|
### Creating Chat Completions
|
|
62
88
|
|
|
63
89
|
To generate content using a model:
|
package/dist/src/client.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type { Provider, SchemaChatCompletionMessageToolCall, SchemaCompletionUsage, SchemaCreateChatCompletionRequest, SchemaCreateChatCompletionResponse, SchemaCreateChatCompletionStreamResponse, SchemaError, SchemaListModelsResponse } from './types/generated';
|
|
1
|
+
import type { Provider, SchemaChatCompletionMessageToolCall, SchemaCompletionUsage, SchemaCreateChatCompletionRequest, SchemaCreateChatCompletionResponse, SchemaCreateChatCompletionStreamResponse, SchemaError, SchemaListModelsResponse, SchemaListToolsResponse } from './types/generated';
|
|
2
2
|
interface ChatCompletionStreamCallbacks {
|
|
3
3
|
onOpen?: () => void;
|
|
4
4
|
onChunk?: (chunk: SchemaCreateChatCompletionStreamResponse) => void;
|
|
@@ -37,6 +37,11 @@ export declare class InferenceGatewayClient {
|
|
|
37
37
|
* Lists the currently available models.
|
|
38
38
|
*/
|
|
39
39
|
listModels(provider?: Provider): Promise<SchemaListModelsResponse>;
|
|
40
|
+
/**
|
|
41
|
+
* Lists the currently available MCP tools.
|
|
42
|
+
* Only accessible when EXPOSE_MCP is enabled.
|
|
43
|
+
*/
|
|
44
|
+
listTools(): Promise<SchemaListToolsResponse>;
|
|
40
45
|
/**
|
|
41
46
|
* Creates a chat completion.
|
|
42
47
|
*/
|
package/dist/src/client.js
CHANGED
|
@@ -77,6 +77,15 @@ class InferenceGatewayClient {
|
|
|
77
77
|
}
|
|
78
78
|
return this.request('/models', { method: 'GET' }, query);
|
|
79
79
|
}
|
|
80
|
+
/**
|
|
81
|
+
* Lists the currently available MCP tools.
|
|
82
|
+
* Only accessible when EXPOSE_MCP is enabled.
|
|
83
|
+
*/
|
|
84
|
+
async listTools() {
|
|
85
|
+
return this.request('/mcp/tools', {
|
|
86
|
+
method: 'GET',
|
|
87
|
+
});
|
|
88
|
+
}
|
|
80
89
|
/**
|
|
81
90
|
* Creates a chat completion.
|
|
82
91
|
*/
|
|
@@ -177,6 +186,10 @@ class InferenceGatewayClient {
|
|
|
177
186
|
if (reasoning_content !== undefined) {
|
|
178
187
|
callbacks.onReasoning?.(reasoning_content);
|
|
179
188
|
}
|
|
189
|
+
const reasoning = chunk.choices[0]?.delta?.reasoning;
|
|
190
|
+
if (reasoning !== undefined) {
|
|
191
|
+
callbacks.onReasoning?.(reasoning);
|
|
192
|
+
}
|
|
180
193
|
const content = chunk.choices[0]?.delta?.content;
|
|
181
194
|
if (content) {
|
|
182
195
|
callbacks.onContent?.(content);
|
|
@@ -47,6 +47,27 @@ export interface paths {
|
|
|
47
47
|
patch?: never;
|
|
48
48
|
trace?: never;
|
|
49
49
|
};
|
|
50
|
+
'/mcp/tools': {
|
|
51
|
+
parameters: {
|
|
52
|
+
query?: never;
|
|
53
|
+
header?: never;
|
|
54
|
+
path?: never;
|
|
55
|
+
cookie?: never;
|
|
56
|
+
};
|
|
57
|
+
/**
|
|
58
|
+
* Lists the currently available MCP tools
|
|
59
|
+
* @description Lists the currently available MCP tools. Only accessible when EXPOSE_MCP is enabled.
|
|
60
|
+
*
|
|
61
|
+
*/
|
|
62
|
+
get: operations['listTools'];
|
|
63
|
+
put?: never;
|
|
64
|
+
post?: never;
|
|
65
|
+
delete?: never;
|
|
66
|
+
options?: never;
|
|
67
|
+
head?: never;
|
|
68
|
+
patch?: never;
|
|
69
|
+
trace?: never;
|
|
70
|
+
};
|
|
50
71
|
'/proxy/{provider}/{path}': {
|
|
51
72
|
parameters: {
|
|
52
73
|
query?: never;
|
|
@@ -180,8 +201,8 @@ export interface components {
|
|
|
180
201
|
retry?: number;
|
|
181
202
|
};
|
|
182
203
|
Endpoints: {
|
|
183
|
-
models
|
|
184
|
-
chat
|
|
204
|
+
models: string;
|
|
205
|
+
chat: string;
|
|
185
206
|
};
|
|
186
207
|
Error: {
|
|
187
208
|
error?: string;
|
|
@@ -197,17 +218,19 @@ export interface components {
|
|
|
197
218
|
content: string;
|
|
198
219
|
tool_calls?: components['schemas']['ChatCompletionMessageToolCall'][];
|
|
199
220
|
tool_call_id?: string;
|
|
200
|
-
reasoning
|
|
221
|
+
/** @description The reasoning content of the chunk message. */
|
|
201
222
|
reasoning_content?: string;
|
|
223
|
+
/** @description The reasoning of the chunk message. Same as reasoning_content. */
|
|
224
|
+
reasoning?: string;
|
|
202
225
|
};
|
|
203
226
|
/** @description Common model information */
|
|
204
227
|
Model: {
|
|
205
|
-
id
|
|
206
|
-
object
|
|
228
|
+
id: string;
|
|
229
|
+
object: string;
|
|
207
230
|
/** Format: int64 */
|
|
208
|
-
created
|
|
209
|
-
owned_by
|
|
210
|
-
served_by
|
|
231
|
+
created: number;
|
|
232
|
+
owned_by: string;
|
|
233
|
+
served_by: components['schemas']['Provider'];
|
|
211
234
|
};
|
|
212
235
|
/** @description Response structure for listing models */
|
|
213
236
|
ListModelsResponse: {
|
|
@@ -216,6 +239,53 @@ export interface components {
|
|
|
216
239
|
/** @default [] */
|
|
217
240
|
data: components['schemas']['Model'][];
|
|
218
241
|
};
|
|
242
|
+
/** @description Response structure for listing MCP tools */
|
|
243
|
+
ListToolsResponse: {
|
|
244
|
+
/**
|
|
245
|
+
* @description Always "list"
|
|
246
|
+
* @example list
|
|
247
|
+
*/
|
|
248
|
+
object: string;
|
|
249
|
+
/**
|
|
250
|
+
* @description Array of available MCP tools
|
|
251
|
+
* @default []
|
|
252
|
+
*/
|
|
253
|
+
data: components['schemas']['MCPTool'][];
|
|
254
|
+
};
|
|
255
|
+
/** @description An MCP tool definition */
|
|
256
|
+
MCPTool: {
|
|
257
|
+
/**
|
|
258
|
+
* @description The name of the tool
|
|
259
|
+
* @example read_file
|
|
260
|
+
*/
|
|
261
|
+
name: string;
|
|
262
|
+
/**
|
|
263
|
+
* @description A description of what the tool does
|
|
264
|
+
* @example Read content from a file
|
|
265
|
+
*/
|
|
266
|
+
description: string;
|
|
267
|
+
/**
|
|
268
|
+
* @description The MCP server that provides this tool
|
|
269
|
+
* @example http://mcp-filesystem-server:8083/mcp
|
|
270
|
+
*/
|
|
271
|
+
server: string;
|
|
272
|
+
/**
|
|
273
|
+
* @description JSON schema for the tool's input parameters
|
|
274
|
+
* @example {
|
|
275
|
+
* "type": "object",
|
|
276
|
+
* "properties": {
|
|
277
|
+
* "file_path": {
|
|
278
|
+
* "type": "string",
|
|
279
|
+
* "description": "Path to the file to read"
|
|
280
|
+
* }
|
|
281
|
+
* },
|
|
282
|
+
* "required": [
|
|
283
|
+
* "file_path"
|
|
284
|
+
* ]
|
|
285
|
+
* }
|
|
286
|
+
*/
|
|
287
|
+
input_schema?: Record<string, never>;
|
|
288
|
+
};
|
|
219
289
|
FunctionObject: {
|
|
220
290
|
/** @description A description of what the function does, used by the model to choose when and how to call the function. */
|
|
221
291
|
description?: string;
|
|
@@ -266,11 +336,8 @@ export interface components {
|
|
|
266
336
|
/** @description Options for streaming response. Only set this when you set `stream: true`.
|
|
267
337
|
* */
|
|
268
338
|
ChatCompletionStreamOptions: {
|
|
269
|
-
/**
|
|
270
|
-
*
|
|
271
|
-
*
|
|
272
|
-
* @default true
|
|
273
|
-
*/
|
|
339
|
+
/** @description If set, an additional chunk will be streamed before the `data: [DONE]` message. The `usage` field on this chunk shows the token usage statistics for the entire request, and the `choices` field will always be an empty array. All other chunks will also include a `usage` field, but with a null value.
|
|
340
|
+
* */
|
|
274
341
|
include_usage: boolean;
|
|
275
342
|
};
|
|
276
343
|
CreateChatCompletionRequest: {
|
|
@@ -292,6 +359,10 @@ export interface components {
|
|
|
292
359
|
/** @description A list of tools the model may call. Currently, only functions are supported as a tool. Use this to provide a list of functions the model may generate JSON inputs for. A max of 128 functions are supported.
|
|
293
360
|
* */
|
|
294
361
|
tools?: components['schemas']['ChatCompletionTool'][];
|
|
362
|
+
/** @description The format of the reasoning content. Can be `raw` or `parsed`.
|
|
363
|
+
* When specified as raw some reasoning models will output <think /> tags. When specified as parsed the model will output the reasoning under `reasoning` or `reasoning_content` attribute.
|
|
364
|
+
* */
|
|
365
|
+
reasoning_format?: string;
|
|
295
366
|
};
|
|
296
367
|
/** @description The function that the model called. */
|
|
297
368
|
ChatCompletionMessageToolCallFunction: {
|
|
@@ -350,11 +421,13 @@ export interface components {
|
|
|
350
421
|
/** @description A chat completion delta generated by streamed model responses. */
|
|
351
422
|
ChatCompletionStreamResponseDelta: {
|
|
352
423
|
/** @description The contents of the chunk message. */
|
|
353
|
-
content
|
|
424
|
+
content: string;
|
|
354
425
|
/** @description The reasoning content of the chunk message. */
|
|
355
426
|
reasoning_content?: string;
|
|
427
|
+
/** @description The reasoning of the chunk message. Same as reasoning_content. */
|
|
428
|
+
reasoning?: string;
|
|
356
429
|
tool_calls?: components['schemas']['ChatCompletionMessageToolCallChunk'][];
|
|
357
|
-
role
|
|
430
|
+
role: components['schemas']['MessageRole'];
|
|
358
431
|
/** @description The refusal message generated by the model. */
|
|
359
432
|
refusal?: string;
|
|
360
433
|
};
|
|
@@ -418,6 +491,10 @@ export interface components {
|
|
|
418
491
|
/** @description The object type, which is always `chat.completion.chunk`. */
|
|
419
492
|
object: string;
|
|
420
493
|
usage?: components['schemas']['CompletionUsage'];
|
|
494
|
+
/** @description The format of the reasoning content. Can be `raw` or `parsed`.
|
|
495
|
+
* When specified as raw some reasoning models will output <think /> tags. When specified as parsed the model will output the reasoning under reasoning_content.
|
|
496
|
+
* */
|
|
497
|
+
reasoning_format?: string;
|
|
421
498
|
};
|
|
422
499
|
Config: unknown;
|
|
423
500
|
};
|
|
@@ -449,6 +526,18 @@ export interface components {
|
|
|
449
526
|
'application/json': components['schemas']['Error'];
|
|
450
527
|
};
|
|
451
528
|
};
|
|
529
|
+
/** @description MCP tools endpoint is not exposed */
|
|
530
|
+
MCPNotExposed: {
|
|
531
|
+
headers: {
|
|
532
|
+
[name: string]: unknown;
|
|
533
|
+
};
|
|
534
|
+
content: {
|
|
535
|
+
/** @example {
|
|
536
|
+
* "error": "MCP tools endpoint is not exposed. Set EXPOSE_MCP=true to enable."
|
|
537
|
+
* } */
|
|
538
|
+
'application/json': components['schemas']['Error'];
|
|
539
|
+
};
|
|
540
|
+
};
|
|
452
541
|
/** @description ProviderResponse depends on the specific provider and endpoint being called
|
|
453
542
|
* If you decide to use this approach, please follow the provider-specific documentations.
|
|
454
543
|
* */
|
|
@@ -504,6 +593,8 @@ export type SchemaMessageRole = components['schemas']['MessageRole'];
|
|
|
504
593
|
export type SchemaMessage = components['schemas']['Message'];
|
|
505
594
|
export type SchemaModel = components['schemas']['Model'];
|
|
506
595
|
export type SchemaListModelsResponse = components['schemas']['ListModelsResponse'];
|
|
596
|
+
export type SchemaListToolsResponse = components['schemas']['ListToolsResponse'];
|
|
597
|
+
export type SchemaMcpTool = components['schemas']['MCPTool'];
|
|
507
598
|
export type SchemaFunctionObject = components['schemas']['FunctionObject'];
|
|
508
599
|
export type SchemaChatCompletionTool = components['schemas']['ChatCompletionTool'];
|
|
509
600
|
export type SchemaFunctionParameters = components['schemas']['FunctionParameters'];
|
|
@@ -525,6 +616,7 @@ export type SchemaConfig = components['schemas']['Config'];
|
|
|
525
616
|
export type ResponseBadRequest = components['responses']['BadRequest'];
|
|
526
617
|
export type ResponseUnauthorized = components['responses']['Unauthorized'];
|
|
527
618
|
export type ResponseInternalError = components['responses']['InternalError'];
|
|
619
|
+
export type ResponseMcpNotExposed = components['responses']['MCPNotExposed'];
|
|
528
620
|
export type ResponseProviderResponse = components['responses']['ProviderResponse'];
|
|
529
621
|
export type RequestBodyProviderRequest = components['requestBodies']['ProviderRequest'];
|
|
530
622
|
export type RequestBodyCreateChatCompletionRequest = components['requestBodies']['CreateChatCompletionRequest'];
|
|
@@ -582,6 +674,29 @@ export interface operations {
|
|
|
582
674
|
500: components['responses']['InternalError'];
|
|
583
675
|
};
|
|
584
676
|
};
|
|
677
|
+
listTools: {
|
|
678
|
+
parameters: {
|
|
679
|
+
query?: never;
|
|
680
|
+
header?: never;
|
|
681
|
+
path?: never;
|
|
682
|
+
cookie?: never;
|
|
683
|
+
};
|
|
684
|
+
requestBody?: never;
|
|
685
|
+
responses: {
|
|
686
|
+
/** @description Successful response */
|
|
687
|
+
200: {
|
|
688
|
+
headers: {
|
|
689
|
+
[name: string]: unknown;
|
|
690
|
+
};
|
|
691
|
+
content: {
|
|
692
|
+
'application/json': components['schemas']['ListToolsResponse'];
|
|
693
|
+
};
|
|
694
|
+
};
|
|
695
|
+
401: components['responses']['Unauthorized'];
|
|
696
|
+
403: components['responses']['MCPNotExposed'];
|
|
697
|
+
500: components['responses']['InternalError'];
|
|
698
|
+
};
|
|
699
|
+
};
|
|
585
700
|
proxyGet: {
|
|
586
701
|
parameters: {
|
|
587
702
|
query?: never;
|
|
@@ -57,6 +57,7 @@ describe('InferenceGatewayClient', () => {
|
|
|
57
57
|
object: 'model',
|
|
58
58
|
created: 1686935002,
|
|
59
59
|
owned_by: 'openai',
|
|
60
|
+
served_by: generated_1.Provider.openai,
|
|
60
61
|
},
|
|
61
62
|
],
|
|
62
63
|
};
|
|
@@ -81,6 +82,53 @@ describe('InferenceGatewayClient', () => {
|
|
|
81
82
|
await expect(client.listModels(generated_1.Provider.openai)).rejects.toThrow(errorMessage);
|
|
82
83
|
});
|
|
83
84
|
});
|
|
85
|
+
describe('listTools', () => {
|
|
86
|
+
it('should fetch available MCP tools', async () => {
|
|
87
|
+
const mockResponse = {
|
|
88
|
+
object: 'list',
|
|
89
|
+
data: [
|
|
90
|
+
{
|
|
91
|
+
name: 'read_file',
|
|
92
|
+
description: 'Read content from a file',
|
|
93
|
+
server: 'http://mcp-filesystem-server:8083/mcp',
|
|
94
|
+
},
|
|
95
|
+
{
|
|
96
|
+
name: 'write_file',
|
|
97
|
+
description: 'Write content to a file',
|
|
98
|
+
server: 'http://mcp-filesystem-server:8083/mcp',
|
|
99
|
+
},
|
|
100
|
+
],
|
|
101
|
+
};
|
|
102
|
+
mockFetch.mockResolvedValueOnce({
|
|
103
|
+
ok: true,
|
|
104
|
+
json: () => Promise.resolve(mockResponse),
|
|
105
|
+
});
|
|
106
|
+
const result = await client.listTools();
|
|
107
|
+
expect(result).toEqual(mockResponse);
|
|
108
|
+
expect(mockFetch).toHaveBeenCalledWith('http://localhost:8080/v1/mcp/tools', expect.objectContaining({
|
|
109
|
+
method: 'GET',
|
|
110
|
+
headers: expect.any(Headers),
|
|
111
|
+
}));
|
|
112
|
+
});
|
|
113
|
+
it('should throw error when MCP is not exposed', async () => {
|
|
114
|
+
const errorMessage = 'MCP not exposed';
|
|
115
|
+
mockFetch.mockResolvedValueOnce({
|
|
116
|
+
ok: false,
|
|
117
|
+
status: 403,
|
|
118
|
+
json: () => Promise.resolve({ error: errorMessage }),
|
|
119
|
+
});
|
|
120
|
+
await expect(client.listTools()).rejects.toThrow(errorMessage);
|
|
121
|
+
});
|
|
122
|
+
it('should throw error when unauthorized', async () => {
|
|
123
|
+
const errorMessage = 'Unauthorized';
|
|
124
|
+
mockFetch.mockResolvedValueOnce({
|
|
125
|
+
ok: false,
|
|
126
|
+
status: 401,
|
|
127
|
+
json: () => Promise.resolve({ error: errorMessage }),
|
|
128
|
+
});
|
|
129
|
+
await expect(client.listTools()).rejects.toThrow(errorMessage);
|
|
130
|
+
});
|
|
131
|
+
});
|
|
84
132
|
describe('createChatCompletion', () => {
|
|
85
133
|
it('should create a chat completion', async () => {
|
|
86
134
|
const mockRequest = {
|
|
@@ -391,6 +439,164 @@ describe('InferenceGatewayClient', () => {
|
|
|
391
439
|
}),
|
|
392
440
|
}));
|
|
393
441
|
});
|
|
442
|
+
it('should handle streaming chat completions with reasoning field', async () => {
|
|
443
|
+
const mockRequest = {
|
|
444
|
+
model: 'groq/deepseek-distilled-llama-3.1-70b',
|
|
445
|
+
messages: [{ role: generated_1.MessageRole.user, content: 'Hello' }],
|
|
446
|
+
};
|
|
447
|
+
const mockStream = new web_1.TransformStream();
|
|
448
|
+
const writer = mockStream.writable.getWriter();
|
|
449
|
+
const encoder = new node_util_1.TextEncoder();
|
|
450
|
+
mockFetch.mockResolvedValueOnce({
|
|
451
|
+
ok: true,
|
|
452
|
+
body: mockStream.readable,
|
|
453
|
+
});
|
|
454
|
+
const callbacks = {
|
|
455
|
+
onOpen: jest.fn(),
|
|
456
|
+
onChunk: jest.fn(),
|
|
457
|
+
onReasoning: jest.fn(),
|
|
458
|
+
onContent: jest.fn(),
|
|
459
|
+
onFinish: jest.fn(),
|
|
460
|
+
};
|
|
461
|
+
const streamPromise = client.streamChatCompletion(mockRequest, callbacks);
|
|
462
|
+
await writer.write(encoder.encode('data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"groq/deepseek-distilled-llama-3.1-70b","choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":null}]}\n\n' +
|
|
463
|
+
'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"groq/deepseek-distilled-llama-3.1-70b","choices":[{"index":0,"delta":{"content":"","reasoning":"Let me"},"finish_reason":null}]}\n\n' +
|
|
464
|
+
'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"groq/deepseek-distilled-llama-3.1-70b","choices":[{"index":0,"delta":{"content":"","reasoning":" think"},"finish_reason":null}]}\n\n' +
|
|
465
|
+
'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"groq/deepseek-distilled-llama-3.1-70b","choices":[{"index":0,"delta":{"content":"","reasoning":" about"},"finish_reason":"stop"}]}\n\n' +
|
|
466
|
+
'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"groq/deepseek-distilled-llama-3.1-70b","choices":[{"index":0,"delta":{"content":"","reasoning":" this"},"finish_reason":"stop"}]}\n\n' +
|
|
467
|
+
'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"groq/deepseek-distilled-llama-3.1-70b","choices":[{"index":0,"delta":{"content":"Hello"},"finish_reason":null}]}\n\n' +
|
|
468
|
+
'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"groq/deepseek-distilled-llama-3.1-70b","choices":[{"index":0,"delta":{"content":"!"},"finish_reason":null}]}\n\n' +
|
|
469
|
+
'data: [DONE]\n\n'));
|
|
470
|
+
await writer.close();
|
|
471
|
+
await streamPromise;
|
|
472
|
+
expect(callbacks.onOpen).toHaveBeenCalledTimes(1);
|
|
473
|
+
expect(callbacks.onChunk).toHaveBeenCalledTimes(7);
|
|
474
|
+
expect(callbacks.onReasoning).toHaveBeenCalledTimes(4);
|
|
475
|
+
expect(callbacks.onReasoning).toHaveBeenCalledWith('Let me');
|
|
476
|
+
expect(callbacks.onReasoning).toHaveBeenCalledWith(' think');
|
|
477
|
+
expect(callbacks.onReasoning).toHaveBeenCalledWith(' about');
|
|
478
|
+
expect(callbacks.onReasoning).toHaveBeenCalledWith(' this');
|
|
479
|
+
expect(callbacks.onContent).toHaveBeenCalledTimes(2);
|
|
480
|
+
expect(callbacks.onContent).toHaveBeenCalledWith('Hello');
|
|
481
|
+
expect(callbacks.onContent).toHaveBeenCalledWith('!');
|
|
482
|
+
expect(callbacks.onFinish).toHaveBeenCalledTimes(1);
|
|
483
|
+
expect(mockFetch).toHaveBeenCalledWith('http://localhost:8080/v1/chat/completions', expect.objectContaining({
|
|
484
|
+
method: 'POST',
|
|
485
|
+
body: JSON.stringify({
|
|
486
|
+
...mockRequest,
|
|
487
|
+
stream: true,
|
|
488
|
+
stream_options: {
|
|
489
|
+
include_usage: true,
|
|
490
|
+
},
|
|
491
|
+
}),
|
|
492
|
+
}));
|
|
493
|
+
});
|
|
494
|
+
it('should handle streaming chat completions with reasoning_content (DeepSeek)', async () => {
|
|
495
|
+
const mockRequest = {
|
|
496
|
+
model: 'deepseek/deepseek-reasoner',
|
|
497
|
+
messages: [{ role: generated_1.MessageRole.user, content: 'Hello' }],
|
|
498
|
+
};
|
|
499
|
+
const mockStream = new web_1.TransformStream();
|
|
500
|
+
const writer = mockStream.writable.getWriter();
|
|
501
|
+
const encoder = new node_util_1.TextEncoder();
|
|
502
|
+
mockFetch.mockResolvedValueOnce({
|
|
503
|
+
ok: true,
|
|
504
|
+
body: mockStream.readable,
|
|
505
|
+
});
|
|
506
|
+
const callbacks = {
|
|
507
|
+
onOpen: jest.fn(),
|
|
508
|
+
onChunk: jest.fn(),
|
|
509
|
+
onReasoning: jest.fn(),
|
|
510
|
+
onContent: jest.fn(),
|
|
511
|
+
onFinish: jest.fn(),
|
|
512
|
+
};
|
|
513
|
+
const streamPromise = client.streamChatCompletion(mockRequest, callbacks);
|
|
514
|
+
await writer.write(encoder.encode('data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"deepseek/deepseek-reasoner","choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":null}]}\n\n' +
|
|
515
|
+
'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"deepseek/deepseek-reasoner","choices":[{"index":0,"delta":{"content":"","reasoning_content":"This"},"finish_reason":null}]}\n\n' +
|
|
516
|
+
'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"deepseek/deepseek-reasoner","choices":[{"index":0,"delta":{"content":"","reasoning_content":" is"},"finish_reason":null}]}\n\n' +
|
|
517
|
+
'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"deepseek/deepseek-reasoner","choices":[{"index":0,"delta":{"content":"","reasoning_content":" a"},"finish_reason":"stop"}]}\n\n' +
|
|
518
|
+
'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"deepseek/deepseek-reasoner","choices":[{"index":0,"delta":{"content":"","reasoning_content":" reasoning"},"finish_reason":"stop"}]}\n\n' +
|
|
519
|
+
'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"deepseek/deepseek-reasoner","choices":[{"index":0,"delta":{"content":"","reasoning_content":" content"},"finish_reason":"stop"}]}\n\n' +
|
|
520
|
+
'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"deepseek/deepseek-reasoner","choices":[{"index":0,"delta":{"content":"Hello"},"finish_reason":null}]}\n\n' +
|
|
521
|
+
'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"deepseek/deepseek-reasoner","choices":[{"index":0,"delta":{"content":"!"},"finish_reason":null}]}\n\n' +
|
|
522
|
+
'data: [DONE]\n\n'));
|
|
523
|
+
await writer.close();
|
|
524
|
+
await streamPromise;
|
|
525
|
+
expect(callbacks.onOpen).toHaveBeenCalledTimes(1);
|
|
526
|
+
expect(callbacks.onChunk).toHaveBeenCalledTimes(8);
|
|
527
|
+
expect(callbacks.onReasoning).toHaveBeenCalledTimes(5);
|
|
528
|
+
expect(callbacks.onReasoning).toHaveBeenCalledWith('This');
|
|
529
|
+
expect(callbacks.onReasoning).toHaveBeenCalledWith(' is');
|
|
530
|
+
expect(callbacks.onReasoning).toHaveBeenCalledWith(' a');
|
|
531
|
+
expect(callbacks.onReasoning).toHaveBeenCalledWith(' reasoning');
|
|
532
|
+
expect(callbacks.onReasoning).toHaveBeenCalledWith(' content');
|
|
533
|
+
expect(callbacks.onContent).toHaveBeenCalledTimes(2);
|
|
534
|
+
expect(callbacks.onContent).toHaveBeenCalledWith('Hello');
|
|
535
|
+
expect(callbacks.onContent).toHaveBeenCalledWith('!');
|
|
536
|
+
expect(callbacks.onFinish).toHaveBeenCalledTimes(1);
|
|
537
|
+
expect(mockFetch).toHaveBeenCalledWith('http://localhost:8080/v1/chat/completions', expect.objectContaining({
|
|
538
|
+
method: 'POST',
|
|
539
|
+
body: JSON.stringify({
|
|
540
|
+
...mockRequest,
|
|
541
|
+
stream: true,
|
|
542
|
+
stream_options: {
|
|
543
|
+
include_usage: true,
|
|
544
|
+
},
|
|
545
|
+
}),
|
|
546
|
+
}));
|
|
547
|
+
});
|
|
548
|
+
it('should handle streaming chat completions with reasoning field (Groq)', async () => {
|
|
549
|
+
const mockRequest = {
|
|
550
|
+
model: 'llama-3.1-70b-versatile',
|
|
551
|
+
messages: [{ role: generated_1.MessageRole.user, content: 'Hello' }],
|
|
552
|
+
};
|
|
553
|
+
const mockStream = new web_1.TransformStream();
|
|
554
|
+
const writer = mockStream.writable.getWriter();
|
|
555
|
+
const encoder = new node_util_1.TextEncoder();
|
|
556
|
+
mockFetch.mockResolvedValueOnce({
|
|
557
|
+
ok: true,
|
|
558
|
+
body: mockStream.readable,
|
|
559
|
+
});
|
|
560
|
+
const callbacks = {
|
|
561
|
+
onOpen: jest.fn(),
|
|
562
|
+
onChunk: jest.fn(),
|
|
563
|
+
onReasoning: jest.fn(),
|
|
564
|
+
onContent: jest.fn(),
|
|
565
|
+
onFinish: jest.fn(),
|
|
566
|
+
};
|
|
567
|
+
const streamPromise = client.streamChatCompletion(mockRequest, callbacks);
|
|
568
|
+
await writer.write(encoder.encode('data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"llama-3.1-70b-versatile","choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":null}]}\n\n' +
|
|
569
|
+
'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"llama-3.1-70b-versatile","choices":[{"index":0,"delta":{"content":"","reasoning":"Let me"},"finish_reason":null}]}\n\n' +
|
|
570
|
+
'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"llama-3.1-70b-versatile","choices":[{"index":0,"delta":{"content":"","reasoning":" think"},"finish_reason":null}]}\n\n' +
|
|
571
|
+
'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"llama-3.1-70b-versatile","choices":[{"index":0,"delta":{"content":"","reasoning":" about"},"finish_reason":"stop"}]}\n\n' +
|
|
572
|
+
'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"llama-3.1-70b-versatile","choices":[{"index":0,"delta":{"content":"","reasoning":" this"},"finish_reason":"stop"}]}\n\n' +
|
|
573
|
+
'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"llama-3.1-70b-versatile","choices":[{"index":0,"delta":{"content":"Hello"},"finish_reason":null}]}\n\n' +
|
|
574
|
+
'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"llama-3.1-70b-versatile","choices":[{"index":0,"delta":{"content":"!"},"finish_reason":null}]}\n\n' +
|
|
575
|
+
'data: [DONE]\n\n'));
|
|
576
|
+
await writer.close();
|
|
577
|
+
await streamPromise;
|
|
578
|
+
expect(callbacks.onOpen).toHaveBeenCalledTimes(1);
|
|
579
|
+
expect(callbacks.onChunk).toHaveBeenCalledTimes(7);
|
|
580
|
+
expect(callbacks.onReasoning).toHaveBeenCalledTimes(4);
|
|
581
|
+
expect(callbacks.onReasoning).toHaveBeenCalledWith('Let me');
|
|
582
|
+
expect(callbacks.onReasoning).toHaveBeenCalledWith(' think');
|
|
583
|
+
expect(callbacks.onReasoning).toHaveBeenCalledWith(' about');
|
|
584
|
+
expect(callbacks.onReasoning).toHaveBeenCalledWith(' this');
|
|
585
|
+
expect(callbacks.onContent).toHaveBeenCalledTimes(2);
|
|
586
|
+
expect(callbacks.onContent).toHaveBeenCalledWith('Hello');
|
|
587
|
+
expect(callbacks.onContent).toHaveBeenCalledWith('!');
|
|
588
|
+
expect(callbacks.onFinish).toHaveBeenCalledTimes(1);
|
|
589
|
+
expect(mockFetch).toHaveBeenCalledWith('http://localhost:8080/v1/chat/completions', expect.objectContaining({
|
|
590
|
+
method: 'POST',
|
|
591
|
+
body: JSON.stringify({
|
|
592
|
+
...mockRequest,
|
|
593
|
+
stream: true,
|
|
594
|
+
stream_options: {
|
|
595
|
+
include_usage: true,
|
|
596
|
+
},
|
|
597
|
+
}),
|
|
598
|
+
}));
|
|
599
|
+
});
|
|
394
600
|
});
|
|
395
601
|
describe('proxy', () => {
|
|
396
602
|
it('should proxy requests to a specific provider', async () => {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@inference-gateway/sdk",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.7.0",
|
|
4
4
|
"description": "An SDK written in Typescript for the [Inference Gateway](https://github.com/inference-gateway/inference-gateway).",
|
|
5
5
|
"main": "dist/src/index.js",
|
|
6
6
|
"types": "dist/src/index.d.ts",
|