@inference-gateway/sdk 0.2.0 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,3 +1,35 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project will be documented in this file.
4
+
5
+ ## [0.3.2](https://github.com/inference-gateway/typescript-sdk/compare/v0.3.1...v0.3.2) (2025-03-31)
6
+
7
+ ### 👷 CI
8
+
9
+ * Add npm ci step to install project dependencies in release workflow ([84791b1](https://github.com/inference-gateway/typescript-sdk/commit/84791b1e4c319f91798c456c783ded6e22da8f81))
10
+
11
+ ## [0.3.1](https://github.com/inference-gateway/typescript-sdk/compare/v0.3.0...v0.3.1) (2025-03-31)
12
+
13
+ ### ♻️ Improvements
14
+
15
+ * Make the SDK OpenAI compatible ([#2](https://github.com/inference-gateway/typescript-sdk/issues/2)) ([31657b3](https://github.com/inference-gateway/typescript-sdk/commit/31657b358f34ccc39acc5994248a95127f1ea46a))
16
+
17
+ ### 👷 CI
18
+
19
+ * Update GitHub Actions release workflow to use GitHub App token and improve release handling ([14835e8](https://github.com/inference-gateway/typescript-sdk/commit/14835e8f9289314f34e711c02faf865ad9af6d66))
20
+ * Update release configuration for semantic-release plugins and rules to be consistent with other repos ([20bd3f8](https://github.com/inference-gateway/typescript-sdk/commit/20bd3f82c68d0b1ee1d07b4fa75eb67524db4fb8))
21
+
22
+ ## [0.3.0](https://github.com/inference-gateway/typescript-sdk/compare/v0.2.0...v0.3.0) (2025-02-02)
23
+
24
+ ### ✨ Features
25
+
26
+ * add streaming content functionality to InferenceGatewayClient and update README ([ba41d2d](https://github.com/inference-gateway/typescript-sdk/commit/ba41d2dc136b83372820af2aefa63969932e16f0))
27
+
28
+ ### 📚 Documentation
29
+
30
+ * **fix:** Update examples in README.md ([4e972fc](https://github.com/inference-gateway/typescript-sdk/commit/4e972fc2c577f41b0b443f1c87cde7561717b577))
31
+ * Update OpenAPI spec - download it from Inference-gateway ([9816b15](https://github.com/inference-gateway/typescript-sdk/commit/9816b151db6b48b04723f93b988daf83239a09df))
32
+
1
33
  ## [0.2.0](https://github.com/inference-gateway/typescript-sdk/compare/v0.1.6...v0.2.0) (2025-01-28)
2
34
 
3
35
  ### ✨ Features
package/README.md CHANGED
@@ -1,15 +1,18 @@
1
- # Inference Gateway Typescript SDK
1
+ # Inference Gateway TypeScript SDK
2
2
 
3
- An SDK written in Typescript for the [Inference Gateway](https://github.com/edenreich/inference-gateway).
3
+ An SDK written in TypeScript for the [Inference Gateway](https://github.com/edenreich/inference-gateway).
4
4
 
5
- - [Inference Gateway Typescript SDK](#inference-gateway-typescript-sdk)
5
+ - [Inference Gateway TypeScript SDK](#inference-gateway-typescript-sdk)
6
6
  - [Installation](#installation)
7
7
  - [Usage](#usage)
8
8
  - [Creating a Client](#creating-a-client)
9
- - [Listing All Models](#listing-all-models)
10
- - [List Models by Provider](#list-models-by-provider)
11
- - [Generating Content](#generating-content)
9
+ - [Listing Models](#listing-models)
10
+ - [Creating Chat Completions](#creating-chat-completions)
11
+ - [Streaming Chat Completions](#streaming-chat-completions)
12
+ - [Tool Calls](#tool-calls)
13
+ - [Proxying Requests](#proxying-requests)
12
14
  - [Health Check](#health-check)
15
+ - [Creating a Client with Custom Options](#creating-a-client-with-custom-options)
13
16
  - [Contributing](#contributing)
14
17
  - [License](#license)
15
18
 
@@ -21,116 +24,199 @@ Run `npm i @inference-gateway/sdk`.
21
24
 
22
25
  ### Creating a Client
23
26
 
27
+ ```typescript
28
+ import { InferenceGatewayClient } from '@inference-gateway/sdk';
29
+
30
+ // Create a client with default options
31
+ const client = new InferenceGatewayClient({
32
+ baseURL: 'http://localhost:8080/v1',
33
+ apiKey: 'your-api-key', // Optional
34
+ });
35
+ ```
36
+
37
+ ### Listing Models
38
+
39
+ To list all available models:
40
+
41
+ ```typescript
42
+ import { InferenceGatewayClient, Provider } from '@inference-gateway/sdk';
43
+
44
+ const client = new InferenceGatewayClient({
45
+ baseURL: 'http://localhost:8080/v1',
46
+ });
47
+
48
+ try {
49
+ // List all models
50
+ const models = await client.listModels();
51
+ console.log('All models:', models);
52
+
53
+ // List models from a specific provider
54
+ const openaiModels = await client.listModels(Provider.OpenAI);
55
+ console.log('OpenAI models:', openaiModels);
56
+ } catch (error) {
57
+ console.error('Error:', error);
58
+ }
59
+ ```
60
+
61
+ ### Creating Chat Completions
62
+
63
+ To generate content using a model:
64
+
24
65
  ```typescript
25
66
  import {
26
67
  InferenceGatewayClient,
27
- Message,
68
+ MessageRole,
28
69
  Provider,
29
70
  } from '@inference-gateway/sdk';
30
71
 
31
- async function main() {
32
- const client = new InferenceGatewayClient('http://localhost:8080');
33
-
34
- try {
35
- // List available models
36
- const models = await client.listModels();
37
- models.forEach((providerModels) => {
38
- console.log(`Provider: ${providerModels.provider}`);
39
- providerModels.models.forEach((model) => {
40
- console.log(`Model: ${model.id}`);
41
- });
42
- });
43
-
44
- // Generate content
45
- const response = await client.generateContent({
46
- provider: Provider.Ollama,
47
- model: 'llama2',
72
+ const client = new InferenceGatewayClient({
73
+ baseURL: 'http://localhost:8080/v1',
74
+ });
75
+
76
+ try {
77
+ const response = await client.createChatCompletion(
78
+ {
79
+ model: 'gpt-4o',
48
80
  messages: [
49
81
  {
50
82
  role: MessageRole.System,
51
- content: 'You are a helpful llama',
83
+ content: 'You are a helpful assistant',
52
84
  },
53
85
  {
54
86
  role: MessageRole.User,
55
87
  content: 'Tell me a joke',
56
88
  },
57
89
  ],
58
- });
90
+ },
91
+ Provider.OpenAI
92
+ ); // Provider is optional
59
93
 
60
- console.log('Response:', response);
61
- } catch (error) {
62
- console.error('Error:', error);
63
- }
94
+ console.log('Response:', response.choices[0].message.content);
95
+ } catch (error) {
96
+ console.error('Error:', error);
64
97
  }
65
-
66
- main();
67
98
  ```
68
99
 
69
- ### Listing All Models
100
+ ### Streaming Chat Completions
70
101
 
71
- To list all available models from all providers, use the `listModels` method:
102
+ To stream content from a model:
72
103
 
73
104
  ```typescript
105
+ import {
106
+ InferenceGatewayClient,
107
+ MessageRole,
108
+ Provider,
109
+ } from '@inference-gateway/sdk';
110
+
111
+ const client = new InferenceGatewayClient({
112
+ baseURL: 'http://localhost:8080/v1',
113
+ });
114
+
74
115
  try {
75
- const models = await client.listModels();
76
- models.forEach((providerModels) => {
77
- console.log(`Provider: ${providerModels.provider}`);
78
- providerModels.models.forEach((model) => {
79
- console.log(`Model: ${model.id}`);
80
- });
81
- });
116
+ await client.streamChatCompletion(
117
+ {
118
+ model: 'llama-3.3-70b-versatile',
119
+ messages: [
120
+ {
121
+ role: MessageRole.User,
122
+ content: 'Tell me a story',
123
+ },
124
+ ],
125
+ },
126
+ {
127
+ onOpen: () => console.log('Stream opened'),
128
+ onContent: (content) => process.stdout.write(content),
129
+ onChunk: (chunk) => console.log('Received chunk:', chunk.id),
130
+ onFinish: () => console.log('\nStream completed'),
131
+ onError: (error) => console.error('Stream error:', error),
132
+ },
133
+ Provider.Groq // Provider is optional
134
+ );
82
135
  } catch (error) {
83
136
  console.error('Error:', error);
84
137
  }
85
138
  ```
86
139
 
87
- ### List Models by Provider
140
+ ### Tool Calls
88
141
 
89
- To list all available models from a specific provider, use the `listModelsByProvider` method:
142
+ To use tool calls with models that support them:
90
143
 
91
144
  ```typescript
145
+ import {
146
+ InferenceGatewayClient,
147
+ MessageRole,
148
+ Provider,
149
+ } from '@inference-gateway/sdk';
150
+
151
+ const client = new InferenceGatewayClient({
152
+ baseURL: 'http://localhost:8080/v1',
153
+ });
154
+
92
155
  try {
93
- const providerModels = await client.listModelsByProvider(Provider.OpenAI);
94
- console.log(`Provider: ${providerModels.provider}`);
95
- providerModels.models.forEach((model) => {
96
- console.log(`Model: ${model.name}`);
97
- });
156
+ await client.streamChatCompletion(
157
+ {
158
+ model: 'gpt-4o',
159
+ messages: [
160
+ {
161
+ role: MessageRole.User,
162
+ content: 'What's the weather in San Francisco?',
163
+ },
164
+ ],
165
+ tools: [
166
+ {
167
+ type: 'function',
168
+ function: {
169
+ name: 'get_weather',
170
+ parameters: {
171
+ type: 'object',
172
+ properties: {
173
+ location: {
174
+ type: 'string',
175
+ description: 'The city and state, e.g. San Francisco, CA',
176
+ },
177
+ },
178
+ required: ['location'],
179
+ },
180
+ },
181
+ },
182
+ ],
183
+ },
184
+ {
185
+ onTool: (toolCall) => {
186
+ console.log('Tool call:', toolCall.function.name);
187
+ console.log('Arguments:', toolCall.function.arguments);
188
+ },
189
+ onContent: (content) => process.stdout.write(content),
190
+ onFinish: () => console.log('\nStream completed'),
191
+ },
192
+ Provider.OpenAI
193
+ );
98
194
  } catch (error) {
99
195
  console.error('Error:', error);
100
196
  }
101
197
  ```
102
198
 
103
- ### Generating Content
199
+ ### Proxying Requests
104
200
 
105
- To generate content using a model, use the `generateContent` method:
201
+ To proxy requests directly to a provider:
106
202
 
107
203
  ```typescript
108
- import {
109
- InferenceGatewayClient,
110
- Message,
111
- MessageRole,
112
- Provider,
113
- } from '@inference-gateway/sdk';
204
+ import { InferenceGatewayClient, Provider } from '@inference-gateway/sdk';
114
205
 
115
- const client = new InferenceGatewayClient('http://localhost:8080');
206
+ const client = new InferenceGatewayClient({
207
+ baseURL: 'http://localhost:8080/v1',
208
+ });
116
209
 
117
- const response = await client.generateContent({
118
- provider: Provider.Ollama,
119
- model: 'llama2',
120
- messages: [
121
- {
122
- role: MessageRole.System,
123
- content: 'You are a helpful llama',
124
- },
125
- {
126
- role: MessageRole.User,
127
- content: 'Tell me a joke',
128
- },
129
- ],
210
+ try {
211
+ const response = await client.proxy(Provider.OpenAI, 'embeddings', {
212
+ method: 'POST',
213
+ body: JSON.stringify({
214
+ model: 'text-embedding-ada-002',
215
+ input: 'Hello world',
216
+ }),
130
217
  });
131
218
 
132
- console.log('Provider:', response.provider);
133
- console.log('Response:', response.response);
219
+ console.log('Embeddings:', response);
134
220
  } catch (error) {
135
221
  console.error('Error:', error);
136
222
  }
@@ -138,9 +224,15 @@ const client = new InferenceGatewayClient('http://localhost:8080');
138
224
 
139
225
  ### Health Check
140
226
 
141
- To check if the Inference Gateway is running, use the `healthCheck` method:
227
+ To check if the Inference Gateway is running:
142
228
 
143
229
  ```typescript
230
+ import { InferenceGatewayClient } from '@inference-gateway/sdk';
231
+
232
+ const client = new InferenceGatewayClient({
233
+ baseURL: 'http://localhost:8080/v1',
234
+ });
235
+
144
236
  try {
145
237
  const isHealthy = await client.healthCheck();
146
238
  console.log('API is healthy:', isHealthy);
@@ -149,6 +241,26 @@ try {
149
241
  }
150
242
  ```
151
243
 
244
+ ### Creating a Client with Custom Options
245
+
246
+ You can create a new client with custom options using the `withOptions` method:
247
+
248
+ ```typescript
249
+ import { InferenceGatewayClient } from '@inference-gateway/sdk';
250
+
251
+ const client = new InferenceGatewayClient({
252
+ baseURL: 'http://localhost:8080/v1',
253
+ });
254
+
255
+ // Create a new client with custom headers
256
+ const clientWithHeaders = client.withOptions({
257
+ defaultHeaders: {
258
+ 'X-Custom-Header': 'value',
259
+ },
260
+ timeout: 60000, // 60 seconds
261
+ });
262
+ ```
263
+
152
264
  ## Contributing
153
265
 
154
266
  Please refer to the [CONTRIBUTING.md](CONTRIBUTING.md) file for information about how to get involved. We welcome issues, questions, and pull requests.
@@ -1,11 +1,46 @@
1
- import { GenerateContentRequest, GenerateContentResponse, Provider, ProviderModels } from './types';
1
+ import { ChatCompletionRequest, ChatCompletionResponse, ChatCompletionStreamCallbacks, ListModelsResponse, Provider } from './types';
2
+ export interface ClientOptions {
3
+ baseURL?: string;
4
+ apiKey?: string;
5
+ defaultHeaders?: Record<string, string>;
6
+ defaultQuery?: Record<string, string>;
7
+ timeout?: number;
8
+ fetch?: typeof globalThis.fetch;
9
+ }
2
10
  export declare class InferenceGatewayClient {
3
- private baseUrl;
4
- private authToken?;
5
- constructor(baseUrl: string, authToken?: string);
11
+ private baseURL;
12
+ private apiKey?;
13
+ private defaultHeaders;
14
+ private defaultQuery;
15
+ private timeout;
16
+ private fetchFn;
17
+ constructor(options?: ClientOptions);
18
+ /**
19
+ * Creates a new instance of the client with the given options merged with the existing options.
20
+ */
21
+ withOptions(options: ClientOptions): InferenceGatewayClient;
22
+ /**
23
+ * Makes a request to the API.
24
+ */
6
25
  private request;
7
- listModels(): Promise<ProviderModels[]>;
8
- listModelsByProvider(provider: Provider): Promise<ProviderModels>;
9
- generateContent(params: GenerateContentRequest): Promise<GenerateContentResponse>;
26
+ /**
27
+ * Lists the currently available models.
28
+ */
29
+ listModels(provider?: Provider): Promise<ListModelsResponse>;
30
+ /**
31
+ * Creates a chat completion.
32
+ */
33
+ createChatCompletion(request: ChatCompletionRequest, provider?: Provider): Promise<ChatCompletionResponse>;
34
+ /**
35
+ * Creates a streaming chat completion.
36
+ */
37
+ streamChatCompletion(request: ChatCompletionRequest, callbacks: ChatCompletionStreamCallbacks, provider?: Provider): Promise<void>;
38
+ /**
39
+ * Proxy a request to a specific provider.
40
+ */
41
+ proxy<T = unknown>(provider: Provider, path: string, options?: RequestInit): Promise<T>;
42
+ /**
43
+ * Health check endpoint.
44
+ */
10
45
  healthCheck(): Promise<boolean>;
11
46
  }
@@ -2,48 +2,201 @@
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.InferenceGatewayClient = void 0;
4
4
  class InferenceGatewayClient {
5
- baseUrl;
6
- authToken;
7
- constructor(baseUrl, authToken) {
8
- this.baseUrl = baseUrl.replace(/\/$/, '');
9
- this.authToken = authToken;
5
+ baseURL;
6
+ apiKey;
7
+ defaultHeaders;
8
+ defaultQuery;
9
+ timeout;
10
+ fetchFn;
11
+ constructor(options = {}) {
12
+ this.baseURL = options.baseURL || 'http://localhost:8080/v1';
13
+ this.apiKey = options.apiKey;
14
+ this.defaultHeaders = options.defaultHeaders || {};
15
+ this.defaultQuery = options.defaultQuery || {};
16
+ this.timeout = options.timeout || 30000;
17
+ this.fetchFn = options.fetch || globalThis.fetch;
10
18
  }
11
- async request(path, options = {}) {
19
+ /**
20
+ * Creates a new instance of the client with the given options merged with the existing options.
21
+ */
22
+ withOptions(options) {
23
+ return new InferenceGatewayClient({
24
+ baseURL: options.baseURL || this.baseURL,
25
+ apiKey: options.apiKey || this.apiKey,
26
+ defaultHeaders: { ...this.defaultHeaders, ...options.defaultHeaders },
27
+ defaultQuery: { ...this.defaultQuery, ...options.defaultQuery },
28
+ timeout: options.timeout || this.timeout,
29
+ fetch: options.fetch || this.fetchFn,
30
+ });
31
+ }
32
+ /**
33
+ * Makes a request to the API.
34
+ */
35
+ async request(path, options = {}, query = {}) {
12
36
  const headers = new Headers({
13
37
  'Content-Type': 'application/json',
38
+ ...this.defaultHeaders,
14
39
  ...options.headers,
15
40
  });
16
- if (this.authToken) {
17
- headers.set('Authorization', `Bearer ${this.authToken}`);
41
+ if (this.apiKey) {
42
+ headers.set('Authorization', `Bearer ${this.apiKey}`);
18
43
  }
19
- const response = await fetch(`${this.baseUrl}${path}`, {
20
- ...options,
21
- headers,
44
+ // Combine default query parameters with provided ones
45
+ const queryParams = new URLSearchParams({
46
+ ...this.defaultQuery,
47
+ ...query,
22
48
  });
23
- if (!response.ok) {
24
- const error = await response.json();
25
- throw new Error(error.error || `HTTP error! status: ${response.status}`);
49
+ const queryString = queryParams.toString();
50
+ const url = `${this.baseURL}${path}${queryString ? `?${queryString}` : ''}`;
51
+ const controller = new AbortController();
52
+ const timeoutId = globalThis.setTimeout(() => controller.abort(), this.timeout);
53
+ try {
54
+ const response = await this.fetchFn(url, {
55
+ ...options,
56
+ headers,
57
+ signal: controller.signal,
58
+ });
59
+ if (!response.ok) {
60
+ const error = (await response.json());
61
+ throw new Error(error.error || `HTTP error! status: ${response.status}`);
62
+ }
63
+ return response.json();
64
+ }
65
+ finally {
66
+ globalThis.clearTimeout(timeoutId);
26
67
  }
27
- return response.json();
28
- }
29
- async listModels() {
30
- return this.request('/llms');
31
68
  }
32
- async listModelsByProvider(provider) {
33
- return this.request(`/llms/${provider}`);
69
+ /**
70
+ * Lists the currently available models.
71
+ */
72
+ async listModels(provider) {
73
+ const query = {};
74
+ if (provider) {
75
+ query.provider = provider;
76
+ }
77
+ return this.request('/models', { method: 'GET' }, query);
34
78
  }
35
- async generateContent(params) {
36
- return this.request(`/llms/${params.provider}/generate`, {
79
+ /**
80
+ * Creates a chat completion.
81
+ */
82
+ async createChatCompletion(request, provider) {
83
+ const query = {};
84
+ if (provider) {
85
+ query.provider = provider;
86
+ }
87
+ return this.request('/chat/completions', {
37
88
  method: 'POST',
38
- body: JSON.stringify({
39
- model: params.model,
40
- messages: params.messages,
41
- }),
89
+ body: JSON.stringify(request),
90
+ }, query);
91
+ }
92
+ /**
93
+ * Creates a streaming chat completion.
94
+ */
95
+ async streamChatCompletion(request, callbacks, provider) {
96
+ const query = {};
97
+ if (provider) {
98
+ query.provider = provider;
99
+ }
100
+ const queryParams = new URLSearchParams({
101
+ ...this.defaultQuery,
102
+ ...query,
42
103
  });
104
+ const queryString = queryParams.toString();
105
+ const url = `${this.baseURL}/chat/completions${queryString ? `?${queryString}` : ''}`;
106
+ const headers = new Headers({
107
+ 'Content-Type': 'application/json',
108
+ ...this.defaultHeaders,
109
+ });
110
+ if (this.apiKey) {
111
+ headers.set('Authorization', `Bearer ${this.apiKey}`);
112
+ }
113
+ const controller = new AbortController();
114
+ const timeoutId = globalThis.setTimeout(() => controller.abort(), this.timeout);
115
+ try {
116
+ const response = await this.fetchFn(url, {
117
+ method: 'POST',
118
+ headers,
119
+ body: JSON.stringify({
120
+ ...request,
121
+ stream: true,
122
+ }),
123
+ signal: controller.signal,
124
+ });
125
+ if (!response.ok) {
126
+ const error = (await response.json());
127
+ throw new Error(error.error || `HTTP error! status: ${response.status}`);
128
+ }
129
+ if (!response.body) {
130
+ throw new Error('Response body is not readable');
131
+ }
132
+ callbacks.onOpen?.();
133
+ const reader = response.body.getReader();
134
+ const decoder = new TextDecoder();
135
+ let buffer = '';
136
+ while (true) {
137
+ const { done, value } = await reader.read();
138
+ if (done)
139
+ break;
140
+ buffer += decoder.decode(value, { stream: true });
141
+ const lines = buffer.split('\n');
142
+ buffer = lines.pop() || '';
143
+ for (const line of lines) {
144
+ if (line.startsWith('data: ')) {
145
+ const data = line.slice(5).trim();
146
+ if (data === '[DONE]') {
147
+ callbacks.onFinish?.(null);
148
+ return;
149
+ }
150
+ try {
151
+ const chunk = JSON.parse(data);
152
+ callbacks.onChunk?.(chunk);
153
+ const content = chunk.choices[0]?.delta?.content;
154
+ if (content) {
155
+ callbacks.onContent?.(content);
156
+ }
157
+ const toolCalls = chunk.choices[0]?.delta?.tool_calls;
158
+ if (toolCalls && toolCalls.length > 0) {
159
+ const toolCall = {
160
+ id: toolCalls[0].id || '',
161
+ type: 'function',
162
+ function: {
163
+ name: toolCalls[0].function?.name || '',
164
+ arguments: toolCalls[0].function?.arguments || '',
165
+ },
166
+ };
167
+ callbacks.onTool?.(toolCall);
168
+ }
169
+ }
170
+ catch (e) {
171
+ globalThis.console.error('Error parsing SSE data:', e);
172
+ }
173
+ }
174
+ }
175
+ }
176
+ }
177
+ catch (error) {
178
+ const apiError = {
179
+ error: error.message || 'Unknown error',
180
+ };
181
+ callbacks.onError?.(apiError);
182
+ throw error;
183
+ }
184
+ finally {
185
+ globalThis.clearTimeout(timeoutId);
186
+ }
187
+ }
188
+ /**
189
+ * Proxy a request to a specific provider.
190
+ */
191
+ async proxy(provider, path, options = {}) {
192
+ return this.request(`/proxy/${provider}/${path}`, options);
43
193
  }
194
+ /**
195
+ * Health check endpoint.
196
+ */
44
197
  async healthCheck() {
45
198
  try {
46
- await this.request('/health');
199
+ await this.fetchFn(`${this.baseURL.replace('/v1', '')}/health`);
47
200
  return true;
48
201
  }
49
202
  catch {
@@ -2,37 +2,127 @@ export declare enum Provider {
2
2
  Ollama = "ollama",
3
3
  Groq = "groq",
4
4
  OpenAI = "openai",
5
- Google = "google",
6
5
  Cloudflare = "cloudflare",
7
6
  Cohere = "cohere",
8
- Anthropic = "anthropic"
7
+ Anthropic = "anthropic",
8
+ DeepSeek = "deepseek"
9
9
  }
10
10
  export declare enum MessageRole {
11
11
  System = "system",
12
12
  User = "user",
13
- Assistant = "assistant"
13
+ Assistant = "assistant",
14
+ Tool = "tool"
14
15
  }
15
16
  export interface Message {
16
17
  role: MessageRole;
17
18
  content: string;
19
+ tool_calls?: ChatCompletionMessageToolCall[];
20
+ tool_call_id?: string;
18
21
  }
19
22
  export interface Model {
23
+ id: string;
24
+ object: string;
25
+ created: number;
26
+ owned_by: string;
27
+ }
28
+ export interface ListModelsResponse {
29
+ object: string;
30
+ data: Model[];
31
+ }
32
+ export interface ChatCompletionMessageToolCallFunction {
33
+ name: string;
34
+ arguments: string;
35
+ }
36
+ export interface ChatCompletionMessageToolCall {
37
+ id: string;
38
+ type: 'function';
39
+ function: ChatCompletionMessageToolCallFunction;
40
+ }
41
+ export interface ChatCompletionMessageToolCallChunk {
42
+ index: number;
43
+ id?: string;
44
+ type?: string;
45
+ function?: {
46
+ name?: string;
47
+ arguments?: string;
48
+ };
49
+ }
50
+ export interface FunctionParameters {
51
+ type: string;
52
+ properties?: Record<string, unknown>;
53
+ required?: string[];
54
+ }
55
+ export interface FunctionObject {
56
+ description?: string;
20
57
  name: string;
58
+ parameters: FunctionParameters;
59
+ strict?: boolean;
21
60
  }
22
- export interface ProviderModels {
23
- provider: Provider;
24
- models: Model[];
61
+ export interface ChatCompletionTool {
62
+ type: 'function';
63
+ function: FunctionObject;
25
64
  }
26
- export interface GenerateContentRequest {
27
- provider: Provider;
65
+ export interface ChatCompletionRequest {
28
66
  model: string;
29
67
  messages: Message[];
68
+ max_tokens?: number;
69
+ stream?: boolean;
70
+ stream_options?: ChatCompletionStreamOptions;
71
+ tools?: ChatCompletionTool[];
72
+ temperature?: number;
73
+ top_p?: number;
74
+ top_k?: number;
30
75
  }
31
- export interface GenerateContentResponse {
32
- provider: string;
33
- response: {
34
- role: 'assistant';
35
- model: string;
36
- content: string;
37
- };
76
+ export interface ChatCompletionStreamOptions {
77
+ include_usage?: boolean;
78
+ }
79
+ export interface ChatCompletionChoice {
80
+ finish_reason: 'stop' | 'length' | 'tool_calls' | 'content_filter' | 'function_call';
81
+ index: number;
82
+ message: Message;
83
+ logprobs?: Record<string, unknown>;
84
+ }
85
+ export interface CompletionUsage {
86
+ prompt_tokens: number;
87
+ completion_tokens: number;
88
+ total_tokens: number;
89
+ }
90
+ export interface ChatCompletionResponse {
91
+ id: string;
92
+ choices: ChatCompletionChoice[];
93
+ created: number;
94
+ model: string;
95
+ object: string;
96
+ usage?: CompletionUsage;
97
+ }
98
+ export interface ChatCompletionStreamChoice {
99
+ delta: ChatCompletionStreamResponseDelta;
100
+ finish_reason: 'stop' | 'length' | 'tool_calls' | 'content_filter' | 'function_call' | null;
101
+ index: number;
102
+ logprobs?: Record<string, unknown>;
103
+ }
104
+ export interface ChatCompletionStreamResponseDelta {
105
+ content?: string;
106
+ tool_calls?: ChatCompletionMessageToolCallChunk[];
107
+ role?: MessageRole;
108
+ refusal?: string;
109
+ }
110
+ export interface ChatCompletionStreamResponse {
111
+ id: string;
112
+ choices: ChatCompletionStreamChoice[];
113
+ created: number;
114
+ model: string;
115
+ object: string;
116
+ usage?: CompletionUsage;
117
+ }
118
+ export interface ChatCompletionStreamCallbacks {
119
+ onOpen?: () => void;
120
+ onChunk?: (chunk: ChatCompletionStreamResponse) => void;
121
+ onContent?: (content: string) => void;
122
+ onTool?: (toolCall: ChatCompletionMessageToolCall) => void;
123
+ onFinish?: (response: ChatCompletionStreamResponse) => void;
124
+ onError?: (error: Error) => void;
125
+ }
126
+ export interface Error {
127
+ error: string;
38
128
  }
@@ -6,14 +6,15 @@ var Provider;
6
6
  Provider["Ollama"] = "ollama";
7
7
  Provider["Groq"] = "groq";
8
8
  Provider["OpenAI"] = "openai";
9
- Provider["Google"] = "google";
10
9
  Provider["Cloudflare"] = "cloudflare";
11
10
  Provider["Cohere"] = "cohere";
12
11
  Provider["Anthropic"] = "anthropic";
12
+ Provider["DeepSeek"] = "deepseek";
13
13
  })(Provider || (exports.Provider = Provider = {}));
14
14
  var MessageRole;
15
15
  (function (MessageRole) {
16
16
  MessageRole["System"] = "system";
17
17
  MessageRole["User"] = "user";
18
18
  MessageRole["Assistant"] = "assistant";
19
+ MessageRole["Tool"] = "tool";
19
20
  })(MessageRole || (exports.MessageRole = MessageRole = {}));
@@ -2,124 +2,338 @@
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  const client_1 = require("@/client");
4
4
  const types_1 = require("@/types");
5
+ const web_1 = require("node:stream/web");
6
+ const node_util_1 = require("node:util");
5
7
  describe('InferenceGatewayClient', () => {
6
8
  let client;
7
- const mockBaseUrl = 'http://localhost:8080';
9
+ const mockFetch = jest.fn();
8
10
  beforeEach(() => {
9
- client = new client_1.InferenceGatewayClient(mockBaseUrl);
10
- global.fetch = jest.fn();
11
+ client = new client_1.InferenceGatewayClient({
12
+ baseURL: 'http://localhost:8080/v1',
13
+ fetch: mockFetch,
14
+ });
15
+ });
16
+ afterEach(() => {
17
+ jest.clearAllMocks();
11
18
  });
12
19
  describe('listModels', () => {
13
20
  it('should fetch available models', async () => {
14
- const mockResponse = [
15
- {
16
- provider: types_1.Provider.Ollama,
17
- models: [
18
- {
19
- name: 'llama2',
20
- },
21
- ],
22
- },
23
- ];
24
- global.fetch.mockResolvedValueOnce({
21
+ const mockResponse = {
22
+ object: 'list',
23
+ data: [
24
+ {
25
+ id: 'gpt-4o',
26
+ object: 'model',
27
+ created: 1686935002,
28
+ owned_by: 'openai',
29
+ },
30
+ {
31
+ id: 'llama-3.3-70b-versatile',
32
+ object: 'model',
33
+ created: 1723651281,
34
+ owned_by: 'groq',
35
+ },
36
+ ],
37
+ };
38
+ mockFetch.mockResolvedValueOnce({
25
39
  ok: true,
26
40
  json: () => Promise.resolve(mockResponse),
27
41
  });
28
42
  const result = await client.listModels();
29
43
  expect(result).toEqual(mockResponse);
30
- expect(global.fetch).toHaveBeenCalledWith(`${mockBaseUrl}/llms`, expect.objectContaining({
44
+ expect(mockFetch).toHaveBeenCalledWith('http://localhost:8080/v1/models', expect.objectContaining({
45
+ method: 'GET',
31
46
  headers: expect.any(Headers),
32
47
  }));
33
48
  });
34
- });
35
- describe('listModelsByProvider', () => {
36
49
  it('should fetch models for a specific provider', async () => {
37
50
  const mockResponse = {
38
- provider: types_1.Provider.OpenAI,
39
- models: [
51
+ object: 'list',
52
+ data: [
40
53
  {
41
- name: 'gpt-4',
54
+ id: 'gpt-4o',
55
+ object: 'model',
56
+ created: 1686935002,
57
+ owned_by: 'openai',
42
58
  },
43
59
  ],
44
60
  };
45
- global.fetch.mockResolvedValueOnce({
61
+ mockFetch.mockResolvedValueOnce({
46
62
  ok: true,
47
63
  json: () => Promise.resolve(mockResponse),
48
64
  });
49
- const result = await client.listModelsByProvider(types_1.Provider.OpenAI);
65
+ const result = await client.listModels(types_1.Provider.OpenAI);
50
66
  expect(result).toEqual(mockResponse);
51
- expect(global.fetch).toHaveBeenCalledWith(`${mockBaseUrl}/llms/${types_1.Provider.OpenAI}`, expect.objectContaining({
67
+ expect(mockFetch).toHaveBeenCalledWith('http://localhost:8080/v1/models?provider=openai', expect.objectContaining({
68
+ method: 'GET',
52
69
  headers: expect.any(Headers),
53
70
  }));
54
71
  });
55
- it('should throw error when provider request fails', async () => {
72
+ it('should throw error when request fails', async () => {
56
73
  const errorMessage = 'Provider not found';
57
- global.fetch.mockResolvedValueOnce({
74
+ mockFetch.mockResolvedValueOnce({
58
75
  ok: false,
59
76
  status: 404,
60
77
  json: () => Promise.resolve({ error: errorMessage }),
61
78
  });
62
- await expect(client.listModelsByProvider(types_1.Provider.OpenAI)).rejects.toThrow(errorMessage);
79
+ await expect(client.listModels(types_1.Provider.OpenAI)).rejects.toThrow(errorMessage);
63
80
  });
64
81
  });
65
- describe('generateContent', () => {
66
- it('should generate content with the specified provider', async () => {
82
+ describe('createChatCompletion', () => {
83
+ it('should create a chat completion', async () => {
67
84
  const mockRequest = {
68
- provider: types_1.Provider.Ollama,
69
- model: 'llama2',
85
+ model: 'gpt-4o',
70
86
  messages: [
71
87
  { role: types_1.MessageRole.System, content: 'You are a helpful assistant' },
72
88
  { role: types_1.MessageRole.User, content: 'Hello' },
73
89
  ],
74
90
  };
75
91
  const mockResponse = {
76
- provider: types_1.Provider.Ollama,
77
- response: {
78
- role: types_1.MessageRole.Assistant,
79
- model: 'llama2',
80
- content: 'Hi there!',
92
+ id: 'chatcmpl-123',
93
+ object: 'chat.completion',
94
+ created: 1677652288,
95
+ model: 'gpt-4o',
96
+ choices: [
97
+ {
98
+ index: 0,
99
+ message: {
100
+ role: types_1.MessageRole.Assistant,
101
+ content: 'Hello! How can I help you today?',
102
+ },
103
+ finish_reason: 'stop',
104
+ },
105
+ ],
106
+ usage: {
107
+ prompt_tokens: 10,
108
+ completion_tokens: 8,
109
+ total_tokens: 18,
81
110
  },
82
111
  };
83
- global.fetch.mockResolvedValueOnce({
112
+ mockFetch.mockResolvedValueOnce({
113
+ ok: true,
114
+ json: () => Promise.resolve(mockResponse),
115
+ });
116
+ const result = await client.createChatCompletion(mockRequest);
117
+ expect(result).toEqual(mockResponse);
118
+ expect(mockFetch).toHaveBeenCalledWith('http://localhost:8080/v1/chat/completions', expect.objectContaining({
119
+ method: 'POST',
120
+ body: JSON.stringify(mockRequest),
121
+ }));
122
+ });
123
+ it('should create a chat completion with a specific provider', async () => {
124
+ const mockRequest = {
125
+ model: 'claude-3-opus-20240229',
126
+ messages: [{ role: types_1.MessageRole.User, content: 'Hello' }],
127
+ };
128
+ const mockResponse = {
129
+ id: 'chatcmpl-456',
130
+ object: 'chat.completion',
131
+ created: 1677652288,
132
+ model: 'claude-3-opus-20240229',
133
+ choices: [
134
+ {
135
+ index: 0,
136
+ message: {
137
+ role: types_1.MessageRole.Assistant,
138
+ content: 'Hello! How can I assist you today?',
139
+ },
140
+ finish_reason: 'stop',
141
+ },
142
+ ],
143
+ usage: {
144
+ prompt_tokens: 5,
145
+ completion_tokens: 8,
146
+ total_tokens: 13,
147
+ },
148
+ };
149
+ mockFetch.mockResolvedValueOnce({
150
+ ok: true,
151
+ json: () => Promise.resolve(mockResponse),
152
+ });
153
+ const result = await client.createChatCompletion(mockRequest, types_1.Provider.Anthropic);
154
+ expect(result).toEqual(mockResponse);
155
+ expect(mockFetch).toHaveBeenCalledWith('http://localhost:8080/v1/chat/completions?provider=anthropic', expect.objectContaining({
156
+ method: 'POST',
157
+ body: JSON.stringify(mockRequest),
158
+ }));
159
+ });
160
+ });
161
+ describe('streamChatCompletion', () => {
162
+ it('should handle streaming chat completions', async () => {
163
+ const mockRequest = {
164
+ model: 'gpt-4o',
165
+ messages: [{ role: types_1.MessageRole.User, content: 'Hello' }],
166
+ };
167
+ const mockStream = new web_1.TransformStream();
168
+ const writer = mockStream.writable.getWriter();
169
+ const encoder = new node_util_1.TextEncoder();
170
+ mockFetch.mockResolvedValueOnce({
171
+ ok: true,
172
+ body: mockStream.readable,
173
+ });
174
+ const callbacks = {
175
+ onOpen: jest.fn(),
176
+ onChunk: jest.fn(),
177
+ onContent: jest.fn(),
178
+ onFinish: jest.fn(),
179
+ onError: jest.fn(),
180
+ };
181
+ const streamPromise = client.streamChatCompletion(mockRequest, callbacks);
182
+ // Simulate SSE events
183
+ await writer.write(encoder.encode('data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"gpt-4o","choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":null}]}\n\n' +
184
+ 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"gpt-4o","choices":[{"index":0,"delta":{"content":"Hello"},"finish_reason":null}]}\n\n' +
185
+ 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"gpt-4o","choices":[{"index":0,"delta":{"content":"!"},"finish_reason":null}]}\n\n' +
186
+ 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"gpt-4o","choices":[{"index":0,"delta":{},"finish_reason":"stop"}]}\n\n' +
187
+ 'data: [DONE]\n\n'));
188
+ await writer.close();
189
+ await streamPromise;
190
+ expect(callbacks.onOpen).toHaveBeenCalledTimes(1);
191
+ expect(callbacks.onChunk).toHaveBeenCalledTimes(4);
192
+ expect(callbacks.onContent).toHaveBeenCalledWith('Hello');
193
+ expect(callbacks.onContent).toHaveBeenCalledWith('!');
194
+ expect(callbacks.onFinish).toHaveBeenCalledTimes(1);
195
+ expect(mockFetch).toHaveBeenCalledWith('http://localhost:8080/v1/chat/completions', expect.objectContaining({
196
+ method: 'POST',
197
+ body: JSON.stringify({
198
+ ...mockRequest,
199
+ stream: true,
200
+ }),
201
+ }));
202
+ });
203
+ it('should handle tool calls in streaming chat completions', async () => {
204
+ const mockRequest = {
205
+ model: 'gpt-4o',
206
+ messages: [
207
+ {
208
+ role: types_1.MessageRole.User,
209
+ content: 'What is the weather in San Francisco?',
210
+ },
211
+ ],
212
+ tools: [
213
+ {
214
+ type: 'function',
215
+ function: {
216
+ name: 'get_weather',
217
+ parameters: {
218
+ type: 'object',
219
+ properties: {
220
+ location: {
221
+ type: 'string',
222
+ description: 'The city and state, e.g. San Francisco, CA',
223
+ },
224
+ },
225
+ required: ['location'],
226
+ },
227
+ },
228
+ },
229
+ ],
230
+ };
231
+ const mockStream = new web_1.TransformStream();
232
+ const writer = mockStream.writable.getWriter();
233
+ const encoder = new node_util_1.TextEncoder();
234
+ mockFetch.mockResolvedValueOnce({
235
+ ok: true,
236
+ body: mockStream.readable,
237
+ });
238
+ const callbacks = {
239
+ onOpen: jest.fn(),
240
+ onChunk: jest.fn(),
241
+ onTool: jest.fn(),
242
+ onFinish: jest.fn(),
243
+ };
244
+ const streamPromise = client.streamChatCompletion(mockRequest, callbacks);
245
+ // Simulate SSE events with tool calls
246
+ await writer.write(encoder.encode('data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"gpt-4o","choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":null}]}\n\n' +
247
+ 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"gpt-4o","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"id":"call_123","type":"function","function":{"name":"get_weather"}}]},"finish_reason":null}]}\n\n' +
248
+ 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"gpt-4o","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\\"location\\""}}]},"finish_reason":null}]}\n\n' +
249
+ 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"gpt-4o","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":":\\"San Francisco, CA\\""}}]},"finish_reason":null}]}\n\n' +
250
+ 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"gpt-4o","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"}"}}]},"finish_reason":null}]}\n\n' +
251
+ 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"gpt-4o","choices":[{"index":0,"delta":{},"finish_reason":"tool_calls"}]}\n\n' +
252
+ 'data: [DONE]\n\n'));
253
+ await writer.close();
254
+ await streamPromise;
255
+ expect(callbacks.onOpen).toHaveBeenCalledTimes(1);
256
+ expect(callbacks.onChunk).toHaveBeenCalledTimes(6);
257
+ expect(callbacks.onTool).toHaveBeenCalledTimes(4); // Called for each chunk with tool_calls
258
+ expect(callbacks.onFinish).toHaveBeenCalledTimes(1);
259
+ });
260
+ it('should handle errors in streaming chat completions', async () => {
261
+ const mockRequest = {
262
+ model: 'gpt-4o',
263
+ messages: [{ role: types_1.MessageRole.User, content: 'Hello' }],
264
+ };
265
+ mockFetch.mockResolvedValueOnce({
266
+ ok: false,
267
+ status: 400,
268
+ json: () => Promise.resolve({ error: 'Bad Request' }),
269
+ });
270
+ const callbacks = {
271
+ onError: jest.fn(),
272
+ };
273
+ await expect(client.streamChatCompletion(mockRequest, callbacks)).rejects.toThrow('Bad Request');
274
+ expect(callbacks.onError).toHaveBeenCalledTimes(1);
275
+ });
276
+ });
277
+ describe('proxy', () => {
278
+ it('should proxy requests to a specific provider', async () => {
279
+ const mockResponse = { result: 'success' };
280
+ mockFetch.mockResolvedValueOnce({
84
281
  ok: true,
85
282
  json: () => Promise.resolve(mockResponse),
86
283
  });
87
- const result = await client.generateContent(mockRequest);
284
+ const result = await client.proxy(types_1.Provider.OpenAI, 'embeddings', {
285
+ method: 'POST',
286
+ body: JSON.stringify({
287
+ model: 'text-embedding-ada-002',
288
+ input: 'Hello world',
289
+ }),
290
+ });
88
291
  expect(result).toEqual(mockResponse);
89
- expect(global.fetch).toHaveBeenCalledWith(`${mockBaseUrl}/llms/${mockRequest.provider}/generate`, expect.objectContaining({
292
+ expect(mockFetch).toHaveBeenCalledWith('http://localhost:8080/v1/proxy/openai/embeddings', expect.objectContaining({
90
293
  method: 'POST',
91
294
  body: JSON.stringify({
92
- model: mockRequest.model,
93
- messages: mockRequest.messages,
295
+ model: 'text-embedding-ada-002',
296
+ input: 'Hello world',
94
297
  }),
95
298
  }));
96
299
  });
97
300
  });
98
301
  describe('healthCheck', () => {
99
302
  it('should return true when API is healthy', async () => {
100
- global.fetch.mockResolvedValueOnce({
303
+ mockFetch.mockResolvedValueOnce({
101
304
  ok: true,
102
- json: () => Promise.resolve({}),
103
305
  });
104
306
  const result = await client.healthCheck();
105
307
  expect(result).toBe(true);
106
- expect(global.fetch).toHaveBeenCalledWith(`${mockBaseUrl}/health`, expect.any(Object));
308
+ expect(mockFetch).toHaveBeenCalledWith('http://localhost:8080/health');
107
309
  });
108
310
  it('should return false when API is unhealthy', async () => {
109
- global.fetch.mockRejectedValueOnce(new Error('API error'));
311
+ mockFetch.mockRejectedValueOnce(new Error('API error'));
110
312
  const result = await client.healthCheck();
111
313
  expect(result).toBe(false);
112
314
  });
113
315
  });
114
- describe('error handling', () => {
115
- it('should throw error when API request fails', async () => {
116
- const errorMessage = 'Bad Request';
117
- global.fetch.mockResolvedValueOnce({
118
- ok: false,
119
- status: 400,
120
- json: () => Promise.resolve({ error: errorMessage }),
316
+ describe('withOptions', () => {
317
+ it('should create a new client with merged options', () => {
318
+ const originalClient = new client_1.InferenceGatewayClient({
319
+ baseURL: 'http://localhost:8080/v1',
320
+ apiKey: 'test-key',
321
+ fetch: mockFetch,
322
+ });
323
+ const newClient = originalClient.withOptions({
324
+ defaultHeaders: { 'X-Custom-Header': 'value' },
121
325
  });
122
- await expect(client.listModels()).rejects.toThrow(errorMessage);
326
+ expect(newClient).toBeInstanceOf(client_1.InferenceGatewayClient);
327
+ expect(newClient).not.toBe(originalClient);
328
+ // We can't directly test private properties, but we can test behavior
329
+ mockFetch.mockResolvedValueOnce({
330
+ ok: true,
331
+ json: () => Promise.resolve({}),
332
+ });
333
+ newClient.listModels();
334
+ expect(mockFetch).toHaveBeenCalledWith('http://localhost:8080/v1/models', expect.objectContaining({
335
+ headers: expect.any(Headers),
336
+ }));
123
337
  });
124
338
  });
125
339
  });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@inference-gateway/sdk",
3
- "version": "0.2.0",
3
+ "version": "0.3.2",
4
4
  "description": "An SDK written in Typescript for the [Inference Gateway](https://github.com/inference-gateway/inference-gateway).",
5
5
  "main": "dist/src/index.js",
6
6
  "types": "dist/src/index.d.ts",
@@ -18,7 +18,8 @@
18
18
  "ollama",
19
19
  "cloudflare",
20
20
  "cohere",
21
- "typescript"
21
+ "typescript",
22
+ "deepseek"
22
23
  ],
23
24
  "author": "Eden Reich <eden.reich@gmail.com>",
24
25
  "license": "MIT",