@inference-gateway/sdk 0.6.0 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,18 @@
2
2
 
3
3
  All notable changes to this project will be documented in this file.
4
4
 
5
+ ## [0.6.2](https://github.com/inference-gateway/typescript-sdk/compare/v0.6.1...v0.6.2) (2025-04-30)
6
+
7
+ ### ♻️ Improvements
8
+
9
+ * Process also groq reasoning models properly ([#12](https://github.com/inference-gateway/typescript-sdk/issues/12)) ([51ce3bb](https://github.com/inference-gateway/typescript-sdk/commit/51ce3bbbbdf03947bb7928e8edc413b977ea092a))
10
+
11
+ ## [0.6.1](https://github.com/inference-gateway/typescript-sdk/compare/v0.6.0...v0.6.1) (2025-04-28)
12
+
13
+ ### ♻️ Improvements
14
+
15
+ * Remove redundant request option ([#11](https://github.com/inference-gateway/typescript-sdk/issues/11)) ([82e34e2](https://github.com/inference-gateway/typescript-sdk/commit/82e34e2ee9782fd224945bff1bd4daf2859a4f79))
16
+
5
17
  ## [0.6.0](https://github.com/inference-gateway/typescript-sdk/compare/v0.5.1...v0.6.0) (2025-04-28)
6
18
 
7
19
  ### ✨ Features
@@ -40,11 +40,16 @@ export declare class InferenceGatewayClient {
40
40
  /**
41
41
  * Creates a chat completion.
42
42
  */
43
- createChatCompletion(request: SchemaCreateChatCompletionRequest, provider?: Provider): Promise<SchemaCreateChatCompletionResponse>;
43
+ createChatCompletion(request: Omit<SchemaCreateChatCompletionRequest, 'stream'>, provider?: Provider): Promise<SchemaCreateChatCompletionResponse>;
44
44
  /**
45
45
  * Creates a streaming chat completion.
46
+ * This method always sets stream=true internally, so there's no need to specify it in the request.
47
+ *
48
+ * @param request - Chat completion request (must include at least model and messages)
49
+ * @param callbacks - Callbacks for handling streaming events
50
+ * @param provider - Optional provider to use for this request
46
51
  */
47
- streamChatCompletion(request: SchemaCreateChatCompletionRequest, callbacks: ChatCompletionStreamCallbacks, provider?: Provider): Promise<void>;
52
+ streamChatCompletion(request: Omit<SchemaCreateChatCompletionRequest, 'stream' | 'stream_options'>, callbacks: ChatCompletionStreamCallbacks, provider?: Provider): Promise<void>;
48
53
  /**
49
54
  * Proxy a request to a specific provider.
50
55
  */
@@ -87,11 +87,16 @@ class InferenceGatewayClient {
87
87
  }
88
88
  return this.request('/chat/completions', {
89
89
  method: 'POST',
90
- body: JSON.stringify(request),
90
+ body: JSON.stringify({ ...request, stream: false }),
91
91
  }, query);
92
92
  }
93
93
  /**
94
94
  * Creates a streaming chat completion.
95
+ * This method always sets stream=true internally, so there's no need to specify it in the request.
96
+ *
97
+ * @param request - Chat completion request (must include at least model and messages)
98
+ * @param callbacks - Callbacks for handling streaming events
99
+ * @param provider - Optional provider to use for this request
95
100
  */
96
101
  async streamChatCompletion(request, callbacks, provider) {
97
102
  const query = {};
@@ -120,6 +125,9 @@ class InferenceGatewayClient {
120
125
  body: JSON.stringify({
121
126
  ...request,
122
127
  stream: true,
128
+ stream_options: {
129
+ include_usage: true,
130
+ },
123
131
  }),
124
132
  signal: controller.signal,
125
133
  });
@@ -169,6 +177,10 @@ class InferenceGatewayClient {
169
177
  if (reasoning_content !== undefined) {
170
178
  callbacks.onReasoning?.(reasoning_content);
171
179
  }
180
+ const reasoning = chunk.choices[0]?.delta?.reasoning;
181
+ if (reasoning !== undefined) {
182
+ callbacks.onReasoning?.(reasoning);
183
+ }
172
184
  const content = chunk.choices[0]?.delta?.content;
173
185
  if (content) {
174
186
  callbacks.onContent?.(content);
@@ -180,8 +180,8 @@ export interface components {
180
180
  retry?: number;
181
181
  };
182
182
  Endpoints: {
183
- models?: string;
184
- chat?: string;
183
+ models: string;
184
+ chat: string;
185
185
  };
186
186
  Error: {
187
187
  error?: string;
@@ -197,17 +197,19 @@ export interface components {
197
197
  content: string;
198
198
  tool_calls?: components['schemas']['ChatCompletionMessageToolCall'][];
199
199
  tool_call_id?: string;
200
- reasoning?: string;
200
+ /** @description The reasoning content of the chunk message. */
201
201
  reasoning_content?: string;
202
+ /** @description The reasoning of the chunk message. Same as reasoning_content. */
203
+ reasoning?: string;
202
204
  };
203
205
  /** @description Common model information */
204
206
  Model: {
205
- id?: string;
206
- object?: string;
207
+ id: string;
208
+ object: string;
207
209
  /** Format: int64 */
208
- created?: number;
209
- owned_by?: string;
210
- served_by?: components['schemas']['Provider'];
210
+ created: number;
211
+ owned_by: string;
212
+ served_by: components['schemas']['Provider'];
211
213
  };
212
214
  /** @description Response structure for listing models */
213
215
  ListModelsResponse: {
@@ -266,11 +268,8 @@ export interface components {
266
268
  /** @description Options for streaming response. Only set this when you set `stream: true`.
267
269
  * */
268
270
  ChatCompletionStreamOptions: {
269
- /**
270
- * @description If set, an additional chunk will be streamed before the `data: [DONE]` message. The `usage` field on this chunk shows the token usage statistics for the entire request, and the `choices` field will always be an empty array. All other chunks will also include a `usage` field, but with a null value.
271
- *
272
- * @default true
273
- */
271
+ /** @description If set, an additional chunk will be streamed before the `data: [DONE]` message. The `usage` field on this chunk shows the token usage statistics for the entire request, and the `choices` field will always be an empty array. All other chunks will also include a `usage` field, but with a null value.
272
+ * */
274
273
  include_usage: boolean;
275
274
  };
276
275
  CreateChatCompletionRequest: {
@@ -292,6 +291,10 @@ export interface components {
292
291
  /** @description A list of tools the model may call. Currently, only functions are supported as a tool. Use this to provide a list of functions the model may generate JSON inputs for. A max of 128 functions are supported.
293
292
  * */
294
293
  tools?: components['schemas']['ChatCompletionTool'][];
294
+ /** @description The format of the reasoning content. Can be `raw` or `parsed`.
295
+ * When specified as raw some reasoning models will output <think /> tags. When specified as parsed the model will output the reasoning under `reasoning` or `reasoning_content` attribute.
296
+ * */
297
+ reasoning_format?: string;
295
298
  };
296
299
  /** @description The function that the model called. */
297
300
  ChatCompletionMessageToolCallFunction: {
@@ -350,11 +353,13 @@ export interface components {
350
353
  /** @description A chat completion delta generated by streamed model responses. */
351
354
  ChatCompletionStreamResponseDelta: {
352
355
  /** @description The contents of the chunk message. */
353
- content?: string;
356
+ content: string;
354
357
  /** @description The reasoning content of the chunk message. */
355
358
  reasoning_content?: string;
359
+ /** @description The reasoning of the chunk message. Same as reasoning_content. */
360
+ reasoning?: string;
356
361
  tool_calls?: components['schemas']['ChatCompletionMessageToolCallChunk'][];
357
- role?: components['schemas']['MessageRole'];
362
+ role: components['schemas']['MessageRole'];
358
363
  /** @description The refusal message generated by the model. */
359
364
  refusal?: string;
360
365
  };
@@ -418,6 +423,10 @@ export interface components {
418
423
  /** @description The object type, which is always `chat.completion.chunk`. */
419
424
  object: string;
420
425
  usage?: components['schemas']['CompletionUsage'];
426
+ /** @description The format of the reasoning content. Can be `raw` or `parsed`.
427
+ * When specified as raw some reasoning models will output <think /> tags. When specified as parsed the model will output the reasoning under reasoning_content.
428
+ * */
429
+ reasoning_format?: string;
421
430
  };
422
431
  Config: unknown;
423
432
  };
@@ -57,6 +57,7 @@ describe('InferenceGatewayClient', () => {
57
57
  object: 'model',
58
58
  created: 1686935002,
59
59
  owned_by: 'openai',
60
+ served_by: generated_1.Provider.openai,
60
61
  },
61
62
  ],
62
63
  };
@@ -89,7 +90,6 @@ describe('InferenceGatewayClient', () => {
89
90
  { role: generated_1.MessageRole.system, content: 'You are a helpful assistant' },
90
91
  { role: generated_1.MessageRole.user, content: 'Hello' },
91
92
  ],
92
- stream: false,
93
93
  };
94
94
  const mockResponse = {
95
95
  id: 'chatcmpl-123',
@@ -120,14 +120,13 @@ describe('InferenceGatewayClient', () => {
120
120
  expect(result).toEqual(mockResponse);
121
121
  expect(mockFetch).toHaveBeenCalledWith('http://localhost:8080/v1/chat/completions', expect.objectContaining({
122
122
  method: 'POST',
123
- body: JSON.stringify(mockRequest),
123
+ body: JSON.stringify({ ...mockRequest, stream: false }),
124
124
  }));
125
125
  });
126
126
  it('should create a chat completion with a specific provider', async () => {
127
127
  const mockRequest = {
128
128
  model: 'claude-3-opus-20240229',
129
129
  messages: [{ role: generated_1.MessageRole.user, content: 'Hello' }],
130
- stream: false,
131
130
  };
132
131
  const mockResponse = {
133
132
  id: 'chatcmpl-456',
@@ -158,7 +157,7 @@ describe('InferenceGatewayClient', () => {
158
157
  expect(result).toEqual(mockResponse);
159
158
  expect(mockFetch).toHaveBeenCalledWith('http://localhost:8080/v1/chat/completions?provider=anthropic', expect.objectContaining({
160
159
  method: 'POST',
161
- body: JSON.stringify(mockRequest),
160
+ body: JSON.stringify({ ...mockRequest, stream: false }),
162
161
  }));
163
162
  });
164
163
  });
@@ -167,7 +166,6 @@ describe('InferenceGatewayClient', () => {
167
166
  const mockRequest = {
168
167
  model: 'gpt-4o',
169
168
  messages: [{ role: generated_1.MessageRole.user, content: 'Hello' }],
170
- stream: true,
171
169
  };
172
170
  const mockStream = new web_1.TransformStream();
173
171
  const writer = mockStream.writable.getWriter();
@@ -201,6 +199,9 @@ describe('InferenceGatewayClient', () => {
201
199
  body: JSON.stringify({
202
200
  ...mockRequest,
203
201
  stream: true,
202
+ stream_options: {
203
+ include_usage: true,
204
+ },
204
205
  }),
205
206
  }));
206
207
  });
@@ -208,7 +209,6 @@ describe('InferenceGatewayClient', () => {
208
209
  const mockRequest = {
209
210
  model: 'gpt-4o',
210
211
  messages: [{ role: generated_1.MessageRole.user, content: 'Hello' }],
211
- stream: true,
212
212
  };
213
213
  const mockStream = new web_1.TransformStream();
214
214
  const writer = mockStream.writable.getWriter();
@@ -253,6 +253,9 @@ describe('InferenceGatewayClient', () => {
253
253
  body: JSON.stringify({
254
254
  ...mockRequest,
255
255
  stream: true,
256
+ stream_options: {
257
+ include_usage: true,
258
+ },
256
259
  }),
257
260
  }));
258
261
  });
@@ -274,7 +277,6 @@ describe('InferenceGatewayClient', () => {
274
277
  },
275
278
  },
276
279
  ],
277
- stream: true,
278
280
  };
279
281
  const mockStream = new web_1.TransformStream();
280
282
  const writer = mockStream.writable.getWriter();
@@ -312,12 +314,21 @@ describe('InferenceGatewayClient', () => {
312
314
  },
313
315
  });
314
316
  expect(callbacks.onFinish).toHaveBeenCalledTimes(1);
317
+ expect(mockFetch).toHaveBeenCalledWith('http://localhost:8080/v1/chat/completions', expect.objectContaining({
318
+ method: 'POST',
319
+ body: JSON.stringify({
320
+ ...mockRequest,
321
+ stream: true,
322
+ stream_options: {
323
+ include_usage: true,
324
+ },
325
+ }),
326
+ }));
315
327
  });
316
328
  it('should handle errors in streaming chat completions', async () => {
317
329
  const mockRequest = {
318
330
  model: 'gpt-4o',
319
331
  messages: [{ role: generated_1.MessageRole.user, content: 'Hello' }],
320
- stream: true,
321
332
  };
322
333
  mockFetch.mockResolvedValueOnce({
323
334
  ok: false,
@@ -334,10 +345,6 @@ describe('InferenceGatewayClient', () => {
334
345
  const mockRequest = {
335
346
  model: 'gpt-4o',
336
347
  messages: [{ role: generated_1.MessageRole.user, content: 'Hello' }],
337
- stream: true,
338
- stream_options: {
339
- include_usage: true,
340
- },
341
348
  };
342
349
  const mockStream = new web_1.TransformStream();
343
350
  const writer = mockStream.writable.getWriter();
@@ -379,6 +386,167 @@ describe('InferenceGatewayClient', () => {
379
386
  body: JSON.stringify({
380
387
  ...mockRequest,
381
388
  stream: true,
389
+ stream_options: {
390
+ include_usage: true,
391
+ },
392
+ }),
393
+ }));
394
+ });
395
+ it('should handle streaming chat completions with reasoning field', async () => {
396
+ const mockRequest = {
397
+ model: 'groq/deepseek-distilled-llama-3.1-70b',
398
+ messages: [{ role: generated_1.MessageRole.user, content: 'Hello' }],
399
+ };
400
+ const mockStream = new web_1.TransformStream();
401
+ const writer = mockStream.writable.getWriter();
402
+ const encoder = new node_util_1.TextEncoder();
403
+ mockFetch.mockResolvedValueOnce({
404
+ ok: true,
405
+ body: mockStream.readable,
406
+ });
407
+ const callbacks = {
408
+ onOpen: jest.fn(),
409
+ onChunk: jest.fn(),
410
+ onReasoning: jest.fn(),
411
+ onContent: jest.fn(),
412
+ onFinish: jest.fn(),
413
+ };
414
+ const streamPromise = client.streamChatCompletion(mockRequest, callbacks);
415
+ await writer.write(encoder.encode('data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"groq/deepseek-distilled-llama-3.1-70b","choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":null}]}\n\n' +
416
+ 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"groq/deepseek-distilled-llama-3.1-70b","choices":[{"index":0,"delta":{"content":"","reasoning":"Let me"},"finish_reason":null}]}\n\n' +
417
+ 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"groq/deepseek-distilled-llama-3.1-70b","choices":[{"index":0,"delta":{"content":"","reasoning":" think"},"finish_reason":null}]}\n\n' +
418
+ 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"groq/deepseek-distilled-llama-3.1-70b","choices":[{"index":0,"delta":{"content":"","reasoning":" about"},"finish_reason":"stop"}]}\n\n' +
419
+ 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"groq/deepseek-distilled-llama-3.1-70b","choices":[{"index":0,"delta":{"content":"","reasoning":" this"},"finish_reason":"stop"}]}\n\n' +
420
+ 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"groq/deepseek-distilled-llama-3.1-70b","choices":[{"index":0,"delta":{"content":"Hello"},"finish_reason":null}]}\n\n' +
421
+ 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"groq/deepseek-distilled-llama-3.1-70b","choices":[{"index":0,"delta":{"content":"!"},"finish_reason":null}]}\n\n' +
422
+ 'data: [DONE]\n\n'));
423
+ await writer.close();
424
+ await streamPromise;
425
+ expect(callbacks.onOpen).toHaveBeenCalledTimes(1);
426
+ expect(callbacks.onChunk).toHaveBeenCalledTimes(7);
427
+ expect(callbacks.onReasoning).toHaveBeenCalledTimes(4);
428
+ expect(callbacks.onReasoning).toHaveBeenCalledWith('Let me');
429
+ expect(callbacks.onReasoning).toHaveBeenCalledWith(' think');
430
+ expect(callbacks.onReasoning).toHaveBeenCalledWith(' about');
431
+ expect(callbacks.onReasoning).toHaveBeenCalledWith(' this');
432
+ expect(callbacks.onContent).toHaveBeenCalledTimes(2);
433
+ expect(callbacks.onContent).toHaveBeenCalledWith('Hello');
434
+ expect(callbacks.onContent).toHaveBeenCalledWith('!');
435
+ expect(callbacks.onFinish).toHaveBeenCalledTimes(1);
436
+ expect(mockFetch).toHaveBeenCalledWith('http://localhost:8080/v1/chat/completions', expect.objectContaining({
437
+ method: 'POST',
438
+ body: JSON.stringify({
439
+ ...mockRequest,
440
+ stream: true,
441
+ stream_options: {
442
+ include_usage: true,
443
+ },
444
+ }),
445
+ }));
446
+ });
447
+ it('should handle streaming chat completions with reasoning_content (DeepSeek)', async () => {
448
+ const mockRequest = {
449
+ model: 'deepseek/deepseek-reasoner',
450
+ messages: [{ role: generated_1.MessageRole.user, content: 'Hello' }],
451
+ };
452
+ const mockStream = new web_1.TransformStream();
453
+ const writer = mockStream.writable.getWriter();
454
+ const encoder = new node_util_1.TextEncoder();
455
+ mockFetch.mockResolvedValueOnce({
456
+ ok: true,
457
+ body: mockStream.readable,
458
+ });
459
+ const callbacks = {
460
+ onOpen: jest.fn(),
461
+ onChunk: jest.fn(),
462
+ onReasoning: jest.fn(),
463
+ onContent: jest.fn(),
464
+ onFinish: jest.fn(),
465
+ };
466
+ const streamPromise = client.streamChatCompletion(mockRequest, callbacks);
467
+ await writer.write(encoder.encode('data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"deepseek/deepseek-reasoner","choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":null}]}\n\n' +
468
+ 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"deepseek/deepseek-reasoner","choices":[{"index":0,"delta":{"content":"","reasoning_content":"This"},"finish_reason":null}]}\n\n' +
469
+ 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"deepseek/deepseek-reasoner","choices":[{"index":0,"delta":{"content":"","reasoning_content":" is"},"finish_reason":null}]}\n\n' +
470
+ 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"deepseek/deepseek-reasoner","choices":[{"index":0,"delta":{"content":"","reasoning_content":" a"},"finish_reason":"stop"}]}\n\n' +
471
+ 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"deepseek/deepseek-reasoner","choices":[{"index":0,"delta":{"content":"","reasoning_content":" reasoning"},"finish_reason":"stop"}]}\n\n' +
472
+ 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"deepseek/deepseek-reasoner","choices":[{"index":0,"delta":{"content":"","reasoning_content":" content"},"finish_reason":"stop"}]}\n\n' +
473
+ 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"deepseek/deepseek-reasoner","choices":[{"index":0,"delta":{"content":"Hello"},"finish_reason":null}]}\n\n' +
474
+ 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"deepseek/deepseek-reasoner","choices":[{"index":0,"delta":{"content":"!"},"finish_reason":null}]}\n\n' +
475
+ 'data: [DONE]\n\n'));
476
+ await writer.close();
477
+ await streamPromise;
478
+ expect(callbacks.onOpen).toHaveBeenCalledTimes(1);
479
+ expect(callbacks.onChunk).toHaveBeenCalledTimes(8);
480
+ expect(callbacks.onReasoning).toHaveBeenCalledTimes(5);
481
+ expect(callbacks.onReasoning).toHaveBeenCalledWith('This');
482
+ expect(callbacks.onReasoning).toHaveBeenCalledWith(' is');
483
+ expect(callbacks.onReasoning).toHaveBeenCalledWith(' a');
484
+ expect(callbacks.onReasoning).toHaveBeenCalledWith(' reasoning');
485
+ expect(callbacks.onReasoning).toHaveBeenCalledWith(' content');
486
+ expect(callbacks.onContent).toHaveBeenCalledTimes(2);
487
+ expect(callbacks.onContent).toHaveBeenCalledWith('Hello');
488
+ expect(callbacks.onContent).toHaveBeenCalledWith('!');
489
+ expect(callbacks.onFinish).toHaveBeenCalledTimes(1);
490
+ expect(mockFetch).toHaveBeenCalledWith('http://localhost:8080/v1/chat/completions', expect.objectContaining({
491
+ method: 'POST',
492
+ body: JSON.stringify({
493
+ ...mockRequest,
494
+ stream: true,
495
+ stream_options: {
496
+ include_usage: true,
497
+ },
498
+ }),
499
+ }));
500
+ });
501
+ it('should handle streaming chat completions with reasoning field (Groq)', async () => {
502
+ const mockRequest = {
503
+ model: 'llama-3.1-70b-versatile',
504
+ messages: [{ role: generated_1.MessageRole.user, content: 'Hello' }],
505
+ };
506
+ const mockStream = new web_1.TransformStream();
507
+ const writer = mockStream.writable.getWriter();
508
+ const encoder = new node_util_1.TextEncoder();
509
+ mockFetch.mockResolvedValueOnce({
510
+ ok: true,
511
+ body: mockStream.readable,
512
+ });
513
+ const callbacks = {
514
+ onOpen: jest.fn(),
515
+ onChunk: jest.fn(),
516
+ onReasoning: jest.fn(),
517
+ onContent: jest.fn(),
518
+ onFinish: jest.fn(),
519
+ };
520
+ const streamPromise = client.streamChatCompletion(mockRequest, callbacks);
521
+ await writer.write(encoder.encode('data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"llama-3.1-70b-versatile","choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":null}]}\n\n' +
522
+ 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"llama-3.1-70b-versatile","choices":[{"index":0,"delta":{"content":"","reasoning":"Let me"},"finish_reason":null}]}\n\n' +
523
+ 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"llama-3.1-70b-versatile","choices":[{"index":0,"delta":{"content":"","reasoning":" think"},"finish_reason":null}]}\n\n' +
524
+ 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"llama-3.1-70b-versatile","choices":[{"index":0,"delta":{"content":"","reasoning":" about"},"finish_reason":"stop"}]}\n\n' +
525
+ 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"llama-3.1-70b-versatile","choices":[{"index":0,"delta":{"content":"","reasoning":" this"},"finish_reason":"stop"}]}\n\n' +
526
+ 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"llama-3.1-70b-versatile","choices":[{"index":0,"delta":{"content":"Hello"},"finish_reason":null}]}\n\n' +
527
+ 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"llama-3.1-70b-versatile","choices":[{"index":0,"delta":{"content":"!"},"finish_reason":null}]}\n\n' +
528
+ 'data: [DONE]\n\n'));
529
+ await writer.close();
530
+ await streamPromise;
531
+ expect(callbacks.onOpen).toHaveBeenCalledTimes(1);
532
+ expect(callbacks.onChunk).toHaveBeenCalledTimes(7);
533
+ expect(callbacks.onReasoning).toHaveBeenCalledTimes(4);
534
+ expect(callbacks.onReasoning).toHaveBeenCalledWith('Let me');
535
+ expect(callbacks.onReasoning).toHaveBeenCalledWith(' think');
536
+ expect(callbacks.onReasoning).toHaveBeenCalledWith(' about');
537
+ expect(callbacks.onReasoning).toHaveBeenCalledWith(' this');
538
+ expect(callbacks.onContent).toHaveBeenCalledTimes(2);
539
+ expect(callbacks.onContent).toHaveBeenCalledWith('Hello');
540
+ expect(callbacks.onContent).toHaveBeenCalledWith('!');
541
+ expect(callbacks.onFinish).toHaveBeenCalledTimes(1);
542
+ expect(mockFetch).toHaveBeenCalledWith('http://localhost:8080/v1/chat/completions', expect.objectContaining({
543
+ method: 'POST',
544
+ body: JSON.stringify({
545
+ ...mockRequest,
546
+ stream: true,
547
+ stream_options: {
548
+ include_usage: true,
549
+ },
382
550
  }),
383
551
  }));
384
552
  });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@inference-gateway/sdk",
3
- "version": "0.6.0",
3
+ "version": "0.6.2",
4
4
  "description": "An SDK written in Typescript for the [Inference Gateway](https://github.com/inference-gateway/inference-gateway).",
5
5
  "main": "dist/src/index.js",
6
6
  "types": "dist/src/index.d.ts",