@inference-gateway/sdk 0.6.1 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,18 @@
2
2
 
3
3
  All notable changes to this project will be documented in this file.
4
4
 
5
+ ## [0.7.0](https://github.com/inference-gateway/typescript-sdk/compare/v0.6.2...v0.7.0) (2025-05-26)
6
+
7
+ ### ✨ Features
8
+
9
+ * Implement MCP List Tools ([#13](https://github.com/inference-gateway/typescript-sdk/issues/13)) ([5c0a38c](https://github.com/inference-gateway/typescript-sdk/commit/5c0a38cbe825161c9d5dc1e15f59b31217aebb23))
10
+
11
+ ## [0.6.2](https://github.com/inference-gateway/typescript-sdk/compare/v0.6.1...v0.6.2) (2025-04-30)
12
+
13
+ ### ♻️ Improvements
14
+
15
+ * Process also groq reasoning models properly ([#12](https://github.com/inference-gateway/typescript-sdk/issues/12)) ([51ce3bb](https://github.com/inference-gateway/typescript-sdk/commit/51ce3bbbbdf03947bb7928e8edc413b977ea092a))
16
+
5
17
  ## [0.6.1](https://github.com/inference-gateway/typescript-sdk/compare/v0.6.0...v0.6.1) (2025-04-28)
6
18
 
7
19
  ### ♻️ Improvements
package/README.md CHANGED
@@ -58,6 +58,32 @@ try {
58
58
  }
59
59
  ```
60
60
 
61
+ ### Listing MCP Tools
62
+
63
+ To list available Model Context Protocol (MCP) tools (only available when EXPOSE_MCP is enabled):
64
+
65
+ ```typescript
66
+ import { InferenceGatewayClient } from '@inference-gateway/sdk';
67
+
68
+ const client = new InferenceGatewayClient({
69
+ baseURL: 'http://localhost:8080/v1',
70
+ });
71
+
72
+ try {
73
+ const tools = await client.listTools();
74
+ console.log('Available MCP tools:', tools.data);
75
+
76
+ // Each tool has: name, description, server, and optional input_schema
77
+ tools.data.forEach((tool) => {
78
+ console.log(`Tool: ${tool.name}`);
79
+ console.log(`Description: ${tool.description}`);
80
+ console.log(`Server: ${tool.server}`);
81
+ });
82
+ } catch (error) {
83
+ console.error('Error:', error);
84
+ }
85
+ ```
86
+
61
87
  ### Creating Chat Completions
62
88
 
63
89
  To generate content using a model:
@@ -1,4 +1,4 @@
1
- import type { Provider, SchemaChatCompletionMessageToolCall, SchemaCompletionUsage, SchemaCreateChatCompletionRequest, SchemaCreateChatCompletionResponse, SchemaCreateChatCompletionStreamResponse, SchemaError, SchemaListModelsResponse } from './types/generated';
1
+ import type { Provider, SchemaChatCompletionMessageToolCall, SchemaCompletionUsage, SchemaCreateChatCompletionRequest, SchemaCreateChatCompletionResponse, SchemaCreateChatCompletionStreamResponse, SchemaError, SchemaListModelsResponse, SchemaListToolsResponse } from './types/generated';
2
2
  interface ChatCompletionStreamCallbacks {
3
3
  onOpen?: () => void;
4
4
  onChunk?: (chunk: SchemaCreateChatCompletionStreamResponse) => void;
@@ -37,6 +37,11 @@ export declare class InferenceGatewayClient {
37
37
  * Lists the currently available models.
38
38
  */
39
39
  listModels(provider?: Provider): Promise<SchemaListModelsResponse>;
40
+ /**
41
+ * Lists the currently available MCP tools.
42
+ * Only accessible when EXPOSE_MCP is enabled.
43
+ */
44
+ listTools(): Promise<SchemaListToolsResponse>;
40
45
  /**
41
46
  * Creates a chat completion.
42
47
  */
@@ -77,6 +77,15 @@ class InferenceGatewayClient {
77
77
  }
78
78
  return this.request('/models', { method: 'GET' }, query);
79
79
  }
80
+ /**
81
+ * Lists the currently available MCP tools.
82
+ * Only accessible when EXPOSE_MCP is enabled.
83
+ */
84
+ async listTools() {
85
+ return this.request('/mcp/tools', {
86
+ method: 'GET',
87
+ });
88
+ }
80
89
  /**
81
90
  * Creates a chat completion.
82
91
  */
@@ -177,6 +186,10 @@ class InferenceGatewayClient {
177
186
  if (reasoning_content !== undefined) {
178
187
  callbacks.onReasoning?.(reasoning_content);
179
188
  }
189
+ const reasoning = chunk.choices[0]?.delta?.reasoning;
190
+ if (reasoning !== undefined) {
191
+ callbacks.onReasoning?.(reasoning);
192
+ }
180
193
  const content = chunk.choices[0]?.delta?.content;
181
194
  if (content) {
182
195
  callbacks.onContent?.(content);
@@ -47,6 +47,27 @@ export interface paths {
47
47
  patch?: never;
48
48
  trace?: never;
49
49
  };
50
+ '/mcp/tools': {
51
+ parameters: {
52
+ query?: never;
53
+ header?: never;
54
+ path?: never;
55
+ cookie?: never;
56
+ };
57
+ /**
58
+ * Lists the currently available MCP tools
59
+ * @description Lists the currently available MCP tools. Only accessible when EXPOSE_MCP is enabled.
60
+ *
61
+ */
62
+ get: operations['listTools'];
63
+ put?: never;
64
+ post?: never;
65
+ delete?: never;
66
+ options?: never;
67
+ head?: never;
68
+ patch?: never;
69
+ trace?: never;
70
+ };
50
71
  '/proxy/{provider}/{path}': {
51
72
  parameters: {
52
73
  query?: never;
@@ -180,8 +201,8 @@ export interface components {
180
201
  retry?: number;
181
202
  };
182
203
  Endpoints: {
183
- models?: string;
184
- chat?: string;
204
+ models: string;
205
+ chat: string;
185
206
  };
186
207
  Error: {
187
208
  error?: string;
@@ -197,17 +218,19 @@ export interface components {
197
218
  content: string;
198
219
  tool_calls?: components['schemas']['ChatCompletionMessageToolCall'][];
199
220
  tool_call_id?: string;
200
- reasoning?: string;
221
+ /** @description The reasoning content of the chunk message. */
201
222
  reasoning_content?: string;
223
+ /** @description The reasoning of the chunk message. Same as reasoning_content. */
224
+ reasoning?: string;
202
225
  };
203
226
  /** @description Common model information */
204
227
  Model: {
205
- id?: string;
206
- object?: string;
228
+ id: string;
229
+ object: string;
207
230
  /** Format: int64 */
208
- created?: number;
209
- owned_by?: string;
210
- served_by?: components['schemas']['Provider'];
231
+ created: number;
232
+ owned_by: string;
233
+ served_by: components['schemas']['Provider'];
211
234
  };
212
235
  /** @description Response structure for listing models */
213
236
  ListModelsResponse: {
@@ -216,6 +239,53 @@ export interface components {
216
239
  /** @default [] */
217
240
  data: components['schemas']['Model'][];
218
241
  };
242
+ /** @description Response structure for listing MCP tools */
243
+ ListToolsResponse: {
244
+ /**
245
+ * @description Always "list"
246
+ * @example list
247
+ */
248
+ object: string;
249
+ /**
250
+ * @description Array of available MCP tools
251
+ * @default []
252
+ */
253
+ data: components['schemas']['MCPTool'][];
254
+ };
255
+ /** @description An MCP tool definition */
256
+ MCPTool: {
257
+ /**
258
+ * @description The name of the tool
259
+ * @example read_file
260
+ */
261
+ name: string;
262
+ /**
263
+ * @description A description of what the tool does
264
+ * @example Read content from a file
265
+ */
266
+ description: string;
267
+ /**
268
+ * @description The MCP server that provides this tool
269
+ * @example http://mcp-filesystem-server:8083/mcp
270
+ */
271
+ server: string;
272
+ /**
273
+ * @description JSON schema for the tool's input parameters
274
+ * @example {
275
+ * "type": "object",
276
+ * "properties": {
277
+ * "file_path": {
278
+ * "type": "string",
279
+ * "description": "Path to the file to read"
280
+ * }
281
+ * },
282
+ * "required": [
283
+ * "file_path"
284
+ * ]
285
+ * }
286
+ */
287
+ input_schema?: Record<string, never>;
288
+ };
219
289
  FunctionObject: {
220
290
  /** @description A description of what the function does, used by the model to choose when and how to call the function. */
221
291
  description?: string;
@@ -266,11 +336,8 @@ export interface components {
266
336
  /** @description Options for streaming response. Only set this when you set `stream: true`.
267
337
  * */
268
338
  ChatCompletionStreamOptions: {
269
- /**
270
- * @description If set, an additional chunk will be streamed before the `data: [DONE]` message. The `usage` field on this chunk shows the token usage statistics for the entire request, and the `choices` field will always be an empty array. All other chunks will also include a `usage` field, but with a null value.
271
- *
272
- * @default true
273
- */
339
+ /** @description If set, an additional chunk will be streamed before the `data: [DONE]` message. The `usage` field on this chunk shows the token usage statistics for the entire request, and the `choices` field will always be an empty array. All other chunks will also include a `usage` field, but with a null value.
340
+ * */
274
341
  include_usage: boolean;
275
342
  };
276
343
  CreateChatCompletionRequest: {
@@ -292,6 +359,10 @@ export interface components {
292
359
  /** @description A list of tools the model may call. Currently, only functions are supported as a tool. Use this to provide a list of functions the model may generate JSON inputs for. A max of 128 functions are supported.
293
360
  * */
294
361
  tools?: components['schemas']['ChatCompletionTool'][];
362
+ /** @description The format of the reasoning content. Can be `raw` or `parsed`.
363
+ * When specified as raw some reasoning models will output <think /> tags. When specified as parsed the model will output the reasoning under `reasoning` or `reasoning_content` attribute.
364
+ * */
365
+ reasoning_format?: string;
295
366
  };
296
367
  /** @description The function that the model called. */
297
368
  ChatCompletionMessageToolCallFunction: {
@@ -350,11 +421,13 @@ export interface components {
350
421
  /** @description A chat completion delta generated by streamed model responses. */
351
422
  ChatCompletionStreamResponseDelta: {
352
423
  /** @description The contents of the chunk message. */
353
- content?: string;
424
+ content: string;
354
425
  /** @description The reasoning content of the chunk message. */
355
426
  reasoning_content?: string;
427
+ /** @description The reasoning of the chunk message. Same as reasoning_content. */
428
+ reasoning?: string;
356
429
  tool_calls?: components['schemas']['ChatCompletionMessageToolCallChunk'][];
357
- role?: components['schemas']['MessageRole'];
430
+ role: components['schemas']['MessageRole'];
358
431
  /** @description The refusal message generated by the model. */
359
432
  refusal?: string;
360
433
  };
@@ -418,6 +491,10 @@ export interface components {
418
491
  /** @description The object type, which is always `chat.completion.chunk`. */
419
492
  object: string;
420
493
  usage?: components['schemas']['CompletionUsage'];
494
+ /** @description The format of the reasoning content. Can be `raw` or `parsed`.
495
+ * When specified as raw some reasoning models will output <think /> tags. When specified as parsed the model will output the reasoning under reasoning_content.
496
+ * */
497
+ reasoning_format?: string;
421
498
  };
422
499
  Config: unknown;
423
500
  };
@@ -449,6 +526,18 @@ export interface components {
449
526
  'application/json': components['schemas']['Error'];
450
527
  };
451
528
  };
529
+ /** @description MCP tools endpoint is not exposed */
530
+ MCPNotExposed: {
531
+ headers: {
532
+ [name: string]: unknown;
533
+ };
534
+ content: {
535
+ /** @example {
536
+ * "error": "MCP tools endpoint is not exposed. Set EXPOSE_MCP=true to enable."
537
+ * } */
538
+ 'application/json': components['schemas']['Error'];
539
+ };
540
+ };
452
541
  /** @description ProviderResponse depends on the specific provider and endpoint being called
453
542
  * If you decide to use this approach, please follow the provider-specific documentations.
454
543
  * */
@@ -504,6 +593,8 @@ export type SchemaMessageRole = components['schemas']['MessageRole'];
504
593
  export type SchemaMessage = components['schemas']['Message'];
505
594
  export type SchemaModel = components['schemas']['Model'];
506
595
  export type SchemaListModelsResponse = components['schemas']['ListModelsResponse'];
596
+ export type SchemaListToolsResponse = components['schemas']['ListToolsResponse'];
597
+ export type SchemaMcpTool = components['schemas']['MCPTool'];
507
598
  export type SchemaFunctionObject = components['schemas']['FunctionObject'];
508
599
  export type SchemaChatCompletionTool = components['schemas']['ChatCompletionTool'];
509
600
  export type SchemaFunctionParameters = components['schemas']['FunctionParameters'];
@@ -525,6 +616,7 @@ export type SchemaConfig = components['schemas']['Config'];
525
616
  export type ResponseBadRequest = components['responses']['BadRequest'];
526
617
  export type ResponseUnauthorized = components['responses']['Unauthorized'];
527
618
  export type ResponseInternalError = components['responses']['InternalError'];
619
+ export type ResponseMcpNotExposed = components['responses']['MCPNotExposed'];
528
620
  export type ResponseProviderResponse = components['responses']['ProviderResponse'];
529
621
  export type RequestBodyProviderRequest = components['requestBodies']['ProviderRequest'];
530
622
  export type RequestBodyCreateChatCompletionRequest = components['requestBodies']['CreateChatCompletionRequest'];
@@ -582,6 +674,29 @@ export interface operations {
582
674
  500: components['responses']['InternalError'];
583
675
  };
584
676
  };
677
+ listTools: {
678
+ parameters: {
679
+ query?: never;
680
+ header?: never;
681
+ path?: never;
682
+ cookie?: never;
683
+ };
684
+ requestBody?: never;
685
+ responses: {
686
+ /** @description Successful response */
687
+ 200: {
688
+ headers: {
689
+ [name: string]: unknown;
690
+ };
691
+ content: {
692
+ 'application/json': components['schemas']['ListToolsResponse'];
693
+ };
694
+ };
695
+ 401: components['responses']['Unauthorized'];
696
+ 403: components['responses']['MCPNotExposed'];
697
+ 500: components['responses']['InternalError'];
698
+ };
699
+ };
585
700
  proxyGet: {
586
701
  parameters: {
587
702
  query?: never;
@@ -57,6 +57,7 @@ describe('InferenceGatewayClient', () => {
57
57
  object: 'model',
58
58
  created: 1686935002,
59
59
  owned_by: 'openai',
60
+ served_by: generated_1.Provider.openai,
60
61
  },
61
62
  ],
62
63
  };
@@ -81,6 +82,53 @@ describe('InferenceGatewayClient', () => {
81
82
  await expect(client.listModels(generated_1.Provider.openai)).rejects.toThrow(errorMessage);
82
83
  });
83
84
  });
85
+ describe('listTools', () => {
86
+ it('should fetch available MCP tools', async () => {
87
+ const mockResponse = {
88
+ object: 'list',
89
+ data: [
90
+ {
91
+ name: 'read_file',
92
+ description: 'Read content from a file',
93
+ server: 'http://mcp-filesystem-server:8083/mcp',
94
+ },
95
+ {
96
+ name: 'write_file',
97
+ description: 'Write content to a file',
98
+ server: 'http://mcp-filesystem-server:8083/mcp',
99
+ },
100
+ ],
101
+ };
102
+ mockFetch.mockResolvedValueOnce({
103
+ ok: true,
104
+ json: () => Promise.resolve(mockResponse),
105
+ });
106
+ const result = await client.listTools();
107
+ expect(result).toEqual(mockResponse);
108
+ expect(mockFetch).toHaveBeenCalledWith('http://localhost:8080/v1/mcp/tools', expect.objectContaining({
109
+ method: 'GET',
110
+ headers: expect.any(Headers),
111
+ }));
112
+ });
113
+ it('should throw error when MCP is not exposed', async () => {
114
+ const errorMessage = 'MCP not exposed';
115
+ mockFetch.mockResolvedValueOnce({
116
+ ok: false,
117
+ status: 403,
118
+ json: () => Promise.resolve({ error: errorMessage }),
119
+ });
120
+ await expect(client.listTools()).rejects.toThrow(errorMessage);
121
+ });
122
+ it('should throw error when unauthorized', async () => {
123
+ const errorMessage = 'Unauthorized';
124
+ mockFetch.mockResolvedValueOnce({
125
+ ok: false,
126
+ status: 401,
127
+ json: () => Promise.resolve({ error: errorMessage }),
128
+ });
129
+ await expect(client.listTools()).rejects.toThrow(errorMessage);
130
+ });
131
+ });
84
132
  describe('createChatCompletion', () => {
85
133
  it('should create a chat completion', async () => {
86
134
  const mockRequest = {
@@ -391,6 +439,164 @@ describe('InferenceGatewayClient', () => {
391
439
  }),
392
440
  }));
393
441
  });
442
+ it('should handle streaming chat completions with reasoning field', async () => {
443
+ const mockRequest = {
444
+ model: 'groq/deepseek-distilled-llama-3.1-70b',
445
+ messages: [{ role: generated_1.MessageRole.user, content: 'Hello' }],
446
+ };
447
+ const mockStream = new web_1.TransformStream();
448
+ const writer = mockStream.writable.getWriter();
449
+ const encoder = new node_util_1.TextEncoder();
450
+ mockFetch.mockResolvedValueOnce({
451
+ ok: true,
452
+ body: mockStream.readable,
453
+ });
454
+ const callbacks = {
455
+ onOpen: jest.fn(),
456
+ onChunk: jest.fn(),
457
+ onReasoning: jest.fn(),
458
+ onContent: jest.fn(),
459
+ onFinish: jest.fn(),
460
+ };
461
+ const streamPromise = client.streamChatCompletion(mockRequest, callbacks);
462
+ await writer.write(encoder.encode('data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"groq/deepseek-distilled-llama-3.1-70b","choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":null}]}\n\n' +
463
+ 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"groq/deepseek-distilled-llama-3.1-70b","choices":[{"index":0,"delta":{"content":"","reasoning":"Let me"},"finish_reason":null}]}\n\n' +
464
+ 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"groq/deepseek-distilled-llama-3.1-70b","choices":[{"index":0,"delta":{"content":"","reasoning":" think"},"finish_reason":null}]}\n\n' +
465
+ 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"groq/deepseek-distilled-llama-3.1-70b","choices":[{"index":0,"delta":{"content":"","reasoning":" about"},"finish_reason":"stop"}]}\n\n' +
466
+ 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"groq/deepseek-distilled-llama-3.1-70b","choices":[{"index":0,"delta":{"content":"","reasoning":" this"},"finish_reason":"stop"}]}\n\n' +
467
+ 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"groq/deepseek-distilled-llama-3.1-70b","choices":[{"index":0,"delta":{"content":"Hello"},"finish_reason":null}]}\n\n' +
468
+ 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"groq/deepseek-distilled-llama-3.1-70b","choices":[{"index":0,"delta":{"content":"!"},"finish_reason":null}]}\n\n' +
469
+ 'data: [DONE]\n\n'));
470
+ await writer.close();
471
+ await streamPromise;
472
+ expect(callbacks.onOpen).toHaveBeenCalledTimes(1);
473
+ expect(callbacks.onChunk).toHaveBeenCalledTimes(7);
474
+ expect(callbacks.onReasoning).toHaveBeenCalledTimes(4);
475
+ expect(callbacks.onReasoning).toHaveBeenCalledWith('Let me');
476
+ expect(callbacks.onReasoning).toHaveBeenCalledWith(' think');
477
+ expect(callbacks.onReasoning).toHaveBeenCalledWith(' about');
478
+ expect(callbacks.onReasoning).toHaveBeenCalledWith(' this');
479
+ expect(callbacks.onContent).toHaveBeenCalledTimes(2);
480
+ expect(callbacks.onContent).toHaveBeenCalledWith('Hello');
481
+ expect(callbacks.onContent).toHaveBeenCalledWith('!');
482
+ expect(callbacks.onFinish).toHaveBeenCalledTimes(1);
483
+ expect(mockFetch).toHaveBeenCalledWith('http://localhost:8080/v1/chat/completions', expect.objectContaining({
484
+ method: 'POST',
485
+ body: JSON.stringify({
486
+ ...mockRequest,
487
+ stream: true,
488
+ stream_options: {
489
+ include_usage: true,
490
+ },
491
+ }),
492
+ }));
493
+ });
494
+ it('should handle streaming chat completions with reasoning_content (DeepSeek)', async () => {
495
+ const mockRequest = {
496
+ model: 'deepseek/deepseek-reasoner',
497
+ messages: [{ role: generated_1.MessageRole.user, content: 'Hello' }],
498
+ };
499
+ const mockStream = new web_1.TransformStream();
500
+ const writer = mockStream.writable.getWriter();
501
+ const encoder = new node_util_1.TextEncoder();
502
+ mockFetch.mockResolvedValueOnce({
503
+ ok: true,
504
+ body: mockStream.readable,
505
+ });
506
+ const callbacks = {
507
+ onOpen: jest.fn(),
508
+ onChunk: jest.fn(),
509
+ onReasoning: jest.fn(),
510
+ onContent: jest.fn(),
511
+ onFinish: jest.fn(),
512
+ };
513
+ const streamPromise = client.streamChatCompletion(mockRequest, callbacks);
514
+ await writer.write(encoder.encode('data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"deepseek/deepseek-reasoner","choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":null}]}\n\n' +
515
+ 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"deepseek/deepseek-reasoner","choices":[{"index":0,"delta":{"content":"","reasoning_content":"This"},"finish_reason":null}]}\n\n' +
516
+ 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"deepseek/deepseek-reasoner","choices":[{"index":0,"delta":{"content":"","reasoning_content":" is"},"finish_reason":null}]}\n\n' +
517
+ 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"deepseek/deepseek-reasoner","choices":[{"index":0,"delta":{"content":"","reasoning_content":" a"},"finish_reason":"stop"}]}\n\n' +
518
+ 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"deepseek/deepseek-reasoner","choices":[{"index":0,"delta":{"content":"","reasoning_content":" reasoning"},"finish_reason":"stop"}]}\n\n' +
519
+ 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"deepseek/deepseek-reasoner","choices":[{"index":0,"delta":{"content":"","reasoning_content":" content"},"finish_reason":"stop"}]}\n\n' +
520
+ 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"deepseek/deepseek-reasoner","choices":[{"index":0,"delta":{"content":"Hello"},"finish_reason":null}]}\n\n' +
521
+ 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"deepseek/deepseek-reasoner","choices":[{"index":0,"delta":{"content":"!"},"finish_reason":null}]}\n\n' +
522
+ 'data: [DONE]\n\n'));
523
+ await writer.close();
524
+ await streamPromise;
525
+ expect(callbacks.onOpen).toHaveBeenCalledTimes(1);
526
+ expect(callbacks.onChunk).toHaveBeenCalledTimes(8);
527
+ expect(callbacks.onReasoning).toHaveBeenCalledTimes(5);
528
+ expect(callbacks.onReasoning).toHaveBeenCalledWith('This');
529
+ expect(callbacks.onReasoning).toHaveBeenCalledWith(' is');
530
+ expect(callbacks.onReasoning).toHaveBeenCalledWith(' a');
531
+ expect(callbacks.onReasoning).toHaveBeenCalledWith(' reasoning');
532
+ expect(callbacks.onReasoning).toHaveBeenCalledWith(' content');
533
+ expect(callbacks.onContent).toHaveBeenCalledTimes(2);
534
+ expect(callbacks.onContent).toHaveBeenCalledWith('Hello');
535
+ expect(callbacks.onContent).toHaveBeenCalledWith('!');
536
+ expect(callbacks.onFinish).toHaveBeenCalledTimes(1);
537
+ expect(mockFetch).toHaveBeenCalledWith('http://localhost:8080/v1/chat/completions', expect.objectContaining({
538
+ method: 'POST',
539
+ body: JSON.stringify({
540
+ ...mockRequest,
541
+ stream: true,
542
+ stream_options: {
543
+ include_usage: true,
544
+ },
545
+ }),
546
+ }));
547
+ });
548
+ it('should handle streaming chat completions with reasoning field (Groq)', async () => {
549
+ const mockRequest = {
550
+ model: 'llama-3.1-70b-versatile',
551
+ messages: [{ role: generated_1.MessageRole.user, content: 'Hello' }],
552
+ };
553
+ const mockStream = new web_1.TransformStream();
554
+ const writer = mockStream.writable.getWriter();
555
+ const encoder = new node_util_1.TextEncoder();
556
+ mockFetch.mockResolvedValueOnce({
557
+ ok: true,
558
+ body: mockStream.readable,
559
+ });
560
+ const callbacks = {
561
+ onOpen: jest.fn(),
562
+ onChunk: jest.fn(),
563
+ onReasoning: jest.fn(),
564
+ onContent: jest.fn(),
565
+ onFinish: jest.fn(),
566
+ };
567
+ const streamPromise = client.streamChatCompletion(mockRequest, callbacks);
568
+ await writer.write(encoder.encode('data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"llama-3.1-70b-versatile","choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":null}]}\n\n' +
569
+ 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"llama-3.1-70b-versatile","choices":[{"index":0,"delta":{"content":"","reasoning":"Let me"},"finish_reason":null}]}\n\n' +
570
+ 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"llama-3.1-70b-versatile","choices":[{"index":0,"delta":{"content":"","reasoning":" think"},"finish_reason":null}]}\n\n' +
571
+ 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"llama-3.1-70b-versatile","choices":[{"index":0,"delta":{"content":"","reasoning":" about"},"finish_reason":"stop"}]}\n\n' +
572
+ 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"llama-3.1-70b-versatile","choices":[{"index":0,"delta":{"content":"","reasoning":" this"},"finish_reason":"stop"}]}\n\n' +
573
+ 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"llama-3.1-70b-versatile","choices":[{"index":0,"delta":{"content":"Hello"},"finish_reason":null}]}\n\n' +
574
+ 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"llama-3.1-70b-versatile","choices":[{"index":0,"delta":{"content":"!"},"finish_reason":null}]}\n\n' +
575
+ 'data: [DONE]\n\n'));
576
+ await writer.close();
577
+ await streamPromise;
578
+ expect(callbacks.onOpen).toHaveBeenCalledTimes(1);
579
+ expect(callbacks.onChunk).toHaveBeenCalledTimes(7);
580
+ expect(callbacks.onReasoning).toHaveBeenCalledTimes(4);
581
+ expect(callbacks.onReasoning).toHaveBeenCalledWith('Let me');
582
+ expect(callbacks.onReasoning).toHaveBeenCalledWith(' think');
583
+ expect(callbacks.onReasoning).toHaveBeenCalledWith(' about');
584
+ expect(callbacks.onReasoning).toHaveBeenCalledWith(' this');
585
+ expect(callbacks.onContent).toHaveBeenCalledTimes(2);
586
+ expect(callbacks.onContent).toHaveBeenCalledWith('Hello');
587
+ expect(callbacks.onContent).toHaveBeenCalledWith('!');
588
+ expect(callbacks.onFinish).toHaveBeenCalledTimes(1);
589
+ expect(mockFetch).toHaveBeenCalledWith('http://localhost:8080/v1/chat/completions', expect.objectContaining({
590
+ method: 'POST',
591
+ body: JSON.stringify({
592
+ ...mockRequest,
593
+ stream: true,
594
+ stream_options: {
595
+ include_usage: true,
596
+ },
597
+ }),
598
+ }));
599
+ });
394
600
  });
395
601
  describe('proxy', () => {
396
602
  it('should proxy requests to a specific provider', async () => {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@inference-gateway/sdk",
3
- "version": "0.6.1",
3
+ "version": "0.7.0",
4
4
  "description": "An SDK written in Typescript for the [Inference Gateway](https://github.com/inference-gateway/inference-gateway).",
5
5
  "main": "dist/src/index.js",
6
6
  "types": "dist/src/index.d.ts",