@inference-gateway/sdk 0.2.0 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +32 -0
- package/README.md +185 -73
- package/dist/src/client.d.ts +42 -7
- package/dist/src/client.js +180 -27
- package/dist/src/types/index.d.ts +105 -15
- package/dist/src/types/index.js +2 -1
- package/dist/tests/client.test.js +266 -52
- package/package.json +3 -2
package/CHANGELOG.md
CHANGED
|
@@ -1,3 +1,35 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file.
|
|
4
|
+
|
|
5
|
+
## [0.3.2](https://github.com/inference-gateway/typescript-sdk/compare/v0.3.1...v0.3.2) (2025-03-31)
|
|
6
|
+
|
|
7
|
+
### 👷 CI
|
|
8
|
+
|
|
9
|
+
* Add npm ci step to install project dependencies in release workflow ([84791b1](https://github.com/inference-gateway/typescript-sdk/commit/84791b1e4c319f91798c456c783ded6e22da8f81))
|
|
10
|
+
|
|
11
|
+
## [0.3.1](https://github.com/inference-gateway/typescript-sdk/compare/v0.3.0...v0.3.1) (2025-03-31)
|
|
12
|
+
|
|
13
|
+
### ♻️ Improvements
|
|
14
|
+
|
|
15
|
+
* Make the SDK OpenAI compatible ([#2](https://github.com/inference-gateway/typescript-sdk/issues/2)) ([31657b3](https://github.com/inference-gateway/typescript-sdk/commit/31657b358f34ccc39acc5994248a95127f1ea46a))
|
|
16
|
+
|
|
17
|
+
### 👷 CI
|
|
18
|
+
|
|
19
|
+
* Update GitHub Actions release workflow to use GitHub App token and improve release handling ([14835e8](https://github.com/inference-gateway/typescript-sdk/commit/14835e8f9289314f34e711c02faf865ad9af6d66))
|
|
20
|
+
* Update release configuration for semantic-release plugins and rules to be consistent with other repos ([20bd3f8](https://github.com/inference-gateway/typescript-sdk/commit/20bd3f82c68d0b1ee1d07b4fa75eb67524db4fb8))
|
|
21
|
+
|
|
22
|
+
## [0.3.0](https://github.com/inference-gateway/typescript-sdk/compare/v0.2.0...v0.3.0) (2025-02-02)
|
|
23
|
+
|
|
24
|
+
### ✨ Features
|
|
25
|
+
|
|
26
|
+
* add streaming content functionality to InferenceGatewayClient and update README ([ba41d2d](https://github.com/inference-gateway/typescript-sdk/commit/ba41d2dc136b83372820af2aefa63969932e16f0))
|
|
27
|
+
|
|
28
|
+
### 📚 Documentation
|
|
29
|
+
|
|
30
|
+
* **fix:** Update examples in README.md ([4e972fc](https://github.com/inference-gateway/typescript-sdk/commit/4e972fc2c577f41b0b443f1c87cde7561717b577))
|
|
31
|
+
* Update OpenAPI spec - download it from Inference-gateway ([9816b15](https://github.com/inference-gateway/typescript-sdk/commit/9816b151db6b48b04723f93b988daf83239a09df))
|
|
32
|
+
|
|
1
33
|
## [0.2.0](https://github.com/inference-gateway/typescript-sdk/compare/v0.1.6...v0.2.0) (2025-01-28)
|
|
2
34
|
|
|
3
35
|
### ✨ Features
|
package/README.md
CHANGED
|
@@ -1,15 +1,18 @@
|
|
|
1
|
-
# Inference Gateway
|
|
1
|
+
# Inference Gateway TypeScript SDK
|
|
2
2
|
|
|
3
|
-
An SDK written in
|
|
3
|
+
An SDK written in TypeScript for the [Inference Gateway](https://github.com/edenreich/inference-gateway).
|
|
4
4
|
|
|
5
|
-
- [Inference Gateway
|
|
5
|
+
- [Inference Gateway TypeScript SDK](#inference-gateway-typescript-sdk)
|
|
6
6
|
- [Installation](#installation)
|
|
7
7
|
- [Usage](#usage)
|
|
8
8
|
- [Creating a Client](#creating-a-client)
|
|
9
|
-
- [Listing
|
|
10
|
-
- [
|
|
11
|
-
- [
|
|
9
|
+
- [Listing Models](#listing-models)
|
|
10
|
+
- [Creating Chat Completions](#creating-chat-completions)
|
|
11
|
+
- [Streaming Chat Completions](#streaming-chat-completions)
|
|
12
|
+
- [Tool Calls](#tool-calls)
|
|
13
|
+
- [Proxying Requests](#proxying-requests)
|
|
12
14
|
- [Health Check](#health-check)
|
|
15
|
+
- [Creating a Client with Custom Options](#creating-a-client-with-custom-options)
|
|
13
16
|
- [Contributing](#contributing)
|
|
14
17
|
- [License](#license)
|
|
15
18
|
|
|
@@ -21,116 +24,199 @@ Run `npm i @inference-gateway/sdk`.
|
|
|
21
24
|
|
|
22
25
|
### Creating a Client
|
|
23
26
|
|
|
27
|
+
```typescript
|
|
28
|
+
import { InferenceGatewayClient } from '@inference-gateway/sdk';
|
|
29
|
+
|
|
30
|
+
// Create a client with default options
|
|
31
|
+
const client = new InferenceGatewayClient({
|
|
32
|
+
baseURL: 'http://localhost:8080/v1',
|
|
33
|
+
apiKey: 'your-api-key', // Optional
|
|
34
|
+
});
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
### Listing Models
|
|
38
|
+
|
|
39
|
+
To list all available models:
|
|
40
|
+
|
|
41
|
+
```typescript
|
|
42
|
+
import { InferenceGatewayClient, Provider } from '@inference-gateway/sdk';
|
|
43
|
+
|
|
44
|
+
const client = new InferenceGatewayClient({
|
|
45
|
+
baseURL: 'http://localhost:8080/v1',
|
|
46
|
+
});
|
|
47
|
+
|
|
48
|
+
try {
|
|
49
|
+
// List all models
|
|
50
|
+
const models = await client.listModels();
|
|
51
|
+
console.log('All models:', models);
|
|
52
|
+
|
|
53
|
+
// List models from a specific provider
|
|
54
|
+
const openaiModels = await client.listModels(Provider.OpenAI);
|
|
55
|
+
console.log('OpenAI models:', openaiModels);
|
|
56
|
+
} catch (error) {
|
|
57
|
+
console.error('Error:', error);
|
|
58
|
+
}
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
### Creating Chat Completions
|
|
62
|
+
|
|
63
|
+
To generate content using a model:
|
|
64
|
+
|
|
24
65
|
```typescript
|
|
25
66
|
import {
|
|
26
67
|
InferenceGatewayClient,
|
|
27
|
-
|
|
68
|
+
MessageRole,
|
|
28
69
|
Provider,
|
|
29
70
|
} from '@inference-gateway/sdk';
|
|
30
71
|
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
providerModels.models.forEach((model) => {
|
|
40
|
-
console.log(`Model: ${model.id}`);
|
|
41
|
-
});
|
|
42
|
-
});
|
|
43
|
-
|
|
44
|
-
// Generate content
|
|
45
|
-
const response = await client.generateContent({
|
|
46
|
-
provider: Provider.Ollama,
|
|
47
|
-
model: 'llama2',
|
|
72
|
+
const client = new InferenceGatewayClient({
|
|
73
|
+
baseURL: 'http://localhost:8080/v1',
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
try {
|
|
77
|
+
const response = await client.createChatCompletion(
|
|
78
|
+
{
|
|
79
|
+
model: 'gpt-4o',
|
|
48
80
|
messages: [
|
|
49
81
|
{
|
|
50
82
|
role: MessageRole.System,
|
|
51
|
-
content: 'You are a helpful
|
|
83
|
+
content: 'You are a helpful assistant',
|
|
52
84
|
},
|
|
53
85
|
{
|
|
54
86
|
role: MessageRole.User,
|
|
55
87
|
content: 'Tell me a joke',
|
|
56
88
|
},
|
|
57
89
|
],
|
|
58
|
-
}
|
|
90
|
+
},
|
|
91
|
+
Provider.OpenAI
|
|
92
|
+
); // Provider is optional
|
|
59
93
|
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
}
|
|
94
|
+
console.log('Response:', response.choices[0].message.content);
|
|
95
|
+
} catch (error) {
|
|
96
|
+
console.error('Error:', error);
|
|
64
97
|
}
|
|
65
|
-
|
|
66
|
-
main();
|
|
67
98
|
```
|
|
68
99
|
|
|
69
|
-
###
|
|
100
|
+
### Streaming Chat Completions
|
|
70
101
|
|
|
71
|
-
To
|
|
102
|
+
To stream content from a model:
|
|
72
103
|
|
|
73
104
|
```typescript
|
|
105
|
+
import {
|
|
106
|
+
InferenceGatewayClient,
|
|
107
|
+
MessageRole,
|
|
108
|
+
Provider,
|
|
109
|
+
} from '@inference-gateway/sdk';
|
|
110
|
+
|
|
111
|
+
const client = new InferenceGatewayClient({
|
|
112
|
+
baseURL: 'http://localhost:8080/v1',
|
|
113
|
+
});
|
|
114
|
+
|
|
74
115
|
try {
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
116
|
+
await client.streamChatCompletion(
|
|
117
|
+
{
|
|
118
|
+
model: 'llama-3.3-70b-versatile',
|
|
119
|
+
messages: [
|
|
120
|
+
{
|
|
121
|
+
role: MessageRole.User,
|
|
122
|
+
content: 'Tell me a story',
|
|
123
|
+
},
|
|
124
|
+
],
|
|
125
|
+
},
|
|
126
|
+
{
|
|
127
|
+
onOpen: () => console.log('Stream opened'),
|
|
128
|
+
onContent: (content) => process.stdout.write(content),
|
|
129
|
+
onChunk: (chunk) => console.log('Received chunk:', chunk.id),
|
|
130
|
+
onFinish: () => console.log('\nStream completed'),
|
|
131
|
+
onError: (error) => console.error('Stream error:', error),
|
|
132
|
+
},
|
|
133
|
+
Provider.Groq // Provider is optional
|
|
134
|
+
);
|
|
82
135
|
} catch (error) {
|
|
83
136
|
console.error('Error:', error);
|
|
84
137
|
}
|
|
85
138
|
```
|
|
86
139
|
|
|
87
|
-
###
|
|
140
|
+
### Tool Calls
|
|
88
141
|
|
|
89
|
-
To
|
|
142
|
+
To use tool calls with models that support them:
|
|
90
143
|
|
|
91
144
|
```typescript
|
|
145
|
+
import {
|
|
146
|
+
InferenceGatewayClient,
|
|
147
|
+
MessageRole,
|
|
148
|
+
Provider,
|
|
149
|
+
} from '@inference-gateway/sdk';
|
|
150
|
+
|
|
151
|
+
const client = new InferenceGatewayClient({
|
|
152
|
+
baseURL: 'http://localhost:8080/v1',
|
|
153
|
+
});
|
|
154
|
+
|
|
92
155
|
try {
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
156
|
+
await client.streamChatCompletion(
|
|
157
|
+
{
|
|
158
|
+
model: 'gpt-4o',
|
|
159
|
+
messages: [
|
|
160
|
+
{
|
|
161
|
+
role: MessageRole.User,
|
|
162
|
+
content: 'What's the weather in San Francisco?',
|
|
163
|
+
},
|
|
164
|
+
],
|
|
165
|
+
tools: [
|
|
166
|
+
{
|
|
167
|
+
type: 'function',
|
|
168
|
+
function: {
|
|
169
|
+
name: 'get_weather',
|
|
170
|
+
parameters: {
|
|
171
|
+
type: 'object',
|
|
172
|
+
properties: {
|
|
173
|
+
location: {
|
|
174
|
+
type: 'string',
|
|
175
|
+
description: 'The city and state, e.g. San Francisco, CA',
|
|
176
|
+
},
|
|
177
|
+
},
|
|
178
|
+
required: ['location'],
|
|
179
|
+
},
|
|
180
|
+
},
|
|
181
|
+
},
|
|
182
|
+
],
|
|
183
|
+
},
|
|
184
|
+
{
|
|
185
|
+
onTool: (toolCall) => {
|
|
186
|
+
console.log('Tool call:', toolCall.function.name);
|
|
187
|
+
console.log('Arguments:', toolCall.function.arguments);
|
|
188
|
+
},
|
|
189
|
+
onContent: (content) => process.stdout.write(content),
|
|
190
|
+
onFinish: () => console.log('\nStream completed'),
|
|
191
|
+
},
|
|
192
|
+
Provider.OpenAI
|
|
193
|
+
);
|
|
98
194
|
} catch (error) {
|
|
99
195
|
console.error('Error:', error);
|
|
100
196
|
}
|
|
101
197
|
```
|
|
102
198
|
|
|
103
|
-
###
|
|
199
|
+
### Proxying Requests
|
|
104
200
|
|
|
105
|
-
To
|
|
201
|
+
To proxy requests directly to a provider:
|
|
106
202
|
|
|
107
203
|
```typescript
|
|
108
|
-
import {
|
|
109
|
-
InferenceGatewayClient,
|
|
110
|
-
Message,
|
|
111
|
-
MessageRole,
|
|
112
|
-
Provider,
|
|
113
|
-
} from '@inference-gateway/sdk';
|
|
204
|
+
import { InferenceGatewayClient, Provider } from '@inference-gateway/sdk';
|
|
114
205
|
|
|
115
|
-
const client = new InferenceGatewayClient(
|
|
206
|
+
const client = new InferenceGatewayClient({
|
|
207
|
+
baseURL: 'http://localhost:8080/v1',
|
|
208
|
+
});
|
|
116
209
|
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
},
|
|
125
|
-
{
|
|
126
|
-
role: MessageRole.User,
|
|
127
|
-
content: 'Tell me a joke',
|
|
128
|
-
},
|
|
129
|
-
],
|
|
210
|
+
try {
|
|
211
|
+
const response = await client.proxy(Provider.OpenAI, 'embeddings', {
|
|
212
|
+
method: 'POST',
|
|
213
|
+
body: JSON.stringify({
|
|
214
|
+
model: 'text-embedding-ada-002',
|
|
215
|
+
input: 'Hello world',
|
|
216
|
+
}),
|
|
130
217
|
});
|
|
131
218
|
|
|
132
|
-
console.log('
|
|
133
|
-
console.log('Response:', response.response);
|
|
219
|
+
console.log('Embeddings:', response);
|
|
134
220
|
} catch (error) {
|
|
135
221
|
console.error('Error:', error);
|
|
136
222
|
}
|
|
@@ -138,9 +224,15 @@ const client = new InferenceGatewayClient('http://localhost:8080');
|
|
|
138
224
|
|
|
139
225
|
### Health Check
|
|
140
226
|
|
|
141
|
-
To check if the Inference Gateway is running
|
|
227
|
+
To check if the Inference Gateway is running:
|
|
142
228
|
|
|
143
229
|
```typescript
|
|
230
|
+
import { InferenceGatewayClient } from '@inference-gateway/sdk';
|
|
231
|
+
|
|
232
|
+
const client = new InferenceGatewayClient({
|
|
233
|
+
baseURL: 'http://localhost:8080/v1',
|
|
234
|
+
});
|
|
235
|
+
|
|
144
236
|
try {
|
|
145
237
|
const isHealthy = await client.healthCheck();
|
|
146
238
|
console.log('API is healthy:', isHealthy);
|
|
@@ -149,6 +241,26 @@ try {
|
|
|
149
241
|
}
|
|
150
242
|
```
|
|
151
243
|
|
|
244
|
+
### Creating a Client with Custom Options
|
|
245
|
+
|
|
246
|
+
You can create a new client with custom options using the `withOptions` method:
|
|
247
|
+
|
|
248
|
+
```typescript
|
|
249
|
+
import { InferenceGatewayClient } from '@inference-gateway/sdk';
|
|
250
|
+
|
|
251
|
+
const client = new InferenceGatewayClient({
|
|
252
|
+
baseURL: 'http://localhost:8080/v1',
|
|
253
|
+
});
|
|
254
|
+
|
|
255
|
+
// Create a new client with custom headers
|
|
256
|
+
const clientWithHeaders = client.withOptions({
|
|
257
|
+
defaultHeaders: {
|
|
258
|
+
'X-Custom-Header': 'value',
|
|
259
|
+
},
|
|
260
|
+
timeout: 60000, // 60 seconds
|
|
261
|
+
});
|
|
262
|
+
```
|
|
263
|
+
|
|
152
264
|
## Contributing
|
|
153
265
|
|
|
154
266
|
Please refer to the [CONTRIBUTING.md](CONTRIBUTING.md) file for information about how to get involved. We welcome issues, questions, and pull requests.
|
package/dist/src/client.d.ts
CHANGED
|
@@ -1,11 +1,46 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { ChatCompletionRequest, ChatCompletionResponse, ChatCompletionStreamCallbacks, ListModelsResponse, Provider } from './types';
|
|
2
|
+
export interface ClientOptions {
|
|
3
|
+
baseURL?: string;
|
|
4
|
+
apiKey?: string;
|
|
5
|
+
defaultHeaders?: Record<string, string>;
|
|
6
|
+
defaultQuery?: Record<string, string>;
|
|
7
|
+
timeout?: number;
|
|
8
|
+
fetch?: typeof globalThis.fetch;
|
|
9
|
+
}
|
|
2
10
|
export declare class InferenceGatewayClient {
|
|
3
|
-
private
|
|
4
|
-
private
|
|
5
|
-
|
|
11
|
+
private baseURL;
|
|
12
|
+
private apiKey?;
|
|
13
|
+
private defaultHeaders;
|
|
14
|
+
private defaultQuery;
|
|
15
|
+
private timeout;
|
|
16
|
+
private fetchFn;
|
|
17
|
+
constructor(options?: ClientOptions);
|
|
18
|
+
/**
|
|
19
|
+
* Creates a new instance of the client with the given options merged with the existing options.
|
|
20
|
+
*/
|
|
21
|
+
withOptions(options: ClientOptions): InferenceGatewayClient;
|
|
22
|
+
/**
|
|
23
|
+
* Makes a request to the API.
|
|
24
|
+
*/
|
|
6
25
|
private request;
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
26
|
+
/**
|
|
27
|
+
* Lists the currently available models.
|
|
28
|
+
*/
|
|
29
|
+
listModels(provider?: Provider): Promise<ListModelsResponse>;
|
|
30
|
+
/**
|
|
31
|
+
* Creates a chat completion.
|
|
32
|
+
*/
|
|
33
|
+
createChatCompletion(request: ChatCompletionRequest, provider?: Provider): Promise<ChatCompletionResponse>;
|
|
34
|
+
/**
|
|
35
|
+
* Creates a streaming chat completion.
|
|
36
|
+
*/
|
|
37
|
+
streamChatCompletion(request: ChatCompletionRequest, callbacks: ChatCompletionStreamCallbacks, provider?: Provider): Promise<void>;
|
|
38
|
+
/**
|
|
39
|
+
* Proxy a request to a specific provider.
|
|
40
|
+
*/
|
|
41
|
+
proxy<T = unknown>(provider: Provider, path: string, options?: RequestInit): Promise<T>;
|
|
42
|
+
/**
|
|
43
|
+
* Health check endpoint.
|
|
44
|
+
*/
|
|
10
45
|
healthCheck(): Promise<boolean>;
|
|
11
46
|
}
|
package/dist/src/client.js
CHANGED
|
@@ -2,48 +2,201 @@
|
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.InferenceGatewayClient = void 0;
|
|
4
4
|
class InferenceGatewayClient {
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
5
|
+
baseURL;
|
|
6
|
+
apiKey;
|
|
7
|
+
defaultHeaders;
|
|
8
|
+
defaultQuery;
|
|
9
|
+
timeout;
|
|
10
|
+
fetchFn;
|
|
11
|
+
constructor(options = {}) {
|
|
12
|
+
this.baseURL = options.baseURL || 'http://localhost:8080/v1';
|
|
13
|
+
this.apiKey = options.apiKey;
|
|
14
|
+
this.defaultHeaders = options.defaultHeaders || {};
|
|
15
|
+
this.defaultQuery = options.defaultQuery || {};
|
|
16
|
+
this.timeout = options.timeout || 30000;
|
|
17
|
+
this.fetchFn = options.fetch || globalThis.fetch;
|
|
10
18
|
}
|
|
11
|
-
|
|
19
|
+
/**
|
|
20
|
+
* Creates a new instance of the client with the given options merged with the existing options.
|
|
21
|
+
*/
|
|
22
|
+
withOptions(options) {
|
|
23
|
+
return new InferenceGatewayClient({
|
|
24
|
+
baseURL: options.baseURL || this.baseURL,
|
|
25
|
+
apiKey: options.apiKey || this.apiKey,
|
|
26
|
+
defaultHeaders: { ...this.defaultHeaders, ...options.defaultHeaders },
|
|
27
|
+
defaultQuery: { ...this.defaultQuery, ...options.defaultQuery },
|
|
28
|
+
timeout: options.timeout || this.timeout,
|
|
29
|
+
fetch: options.fetch || this.fetchFn,
|
|
30
|
+
});
|
|
31
|
+
}
|
|
32
|
+
/**
|
|
33
|
+
* Makes a request to the API.
|
|
34
|
+
*/
|
|
35
|
+
async request(path, options = {}, query = {}) {
|
|
12
36
|
const headers = new Headers({
|
|
13
37
|
'Content-Type': 'application/json',
|
|
38
|
+
...this.defaultHeaders,
|
|
14
39
|
...options.headers,
|
|
15
40
|
});
|
|
16
|
-
if (this.
|
|
17
|
-
headers.set('Authorization', `Bearer ${this.
|
|
41
|
+
if (this.apiKey) {
|
|
42
|
+
headers.set('Authorization', `Bearer ${this.apiKey}`);
|
|
18
43
|
}
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
44
|
+
// Combine default query parameters with provided ones
|
|
45
|
+
const queryParams = new URLSearchParams({
|
|
46
|
+
...this.defaultQuery,
|
|
47
|
+
...query,
|
|
22
48
|
});
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
49
|
+
const queryString = queryParams.toString();
|
|
50
|
+
const url = `${this.baseURL}${path}${queryString ? `?${queryString}` : ''}`;
|
|
51
|
+
const controller = new AbortController();
|
|
52
|
+
const timeoutId = globalThis.setTimeout(() => controller.abort(), this.timeout);
|
|
53
|
+
try {
|
|
54
|
+
const response = await this.fetchFn(url, {
|
|
55
|
+
...options,
|
|
56
|
+
headers,
|
|
57
|
+
signal: controller.signal,
|
|
58
|
+
});
|
|
59
|
+
if (!response.ok) {
|
|
60
|
+
const error = (await response.json());
|
|
61
|
+
throw new Error(error.error || `HTTP error! status: ${response.status}`);
|
|
62
|
+
}
|
|
63
|
+
return response.json();
|
|
64
|
+
}
|
|
65
|
+
finally {
|
|
66
|
+
globalThis.clearTimeout(timeoutId);
|
|
26
67
|
}
|
|
27
|
-
return response.json();
|
|
28
|
-
}
|
|
29
|
-
async listModels() {
|
|
30
|
-
return this.request('/llms');
|
|
31
68
|
}
|
|
32
|
-
|
|
33
|
-
|
|
69
|
+
/**
|
|
70
|
+
* Lists the currently available models.
|
|
71
|
+
*/
|
|
72
|
+
async listModels(provider) {
|
|
73
|
+
const query = {};
|
|
74
|
+
if (provider) {
|
|
75
|
+
query.provider = provider;
|
|
76
|
+
}
|
|
77
|
+
return this.request('/models', { method: 'GET' }, query);
|
|
34
78
|
}
|
|
35
|
-
|
|
36
|
-
|
|
79
|
+
/**
|
|
80
|
+
* Creates a chat completion.
|
|
81
|
+
*/
|
|
82
|
+
async createChatCompletion(request, provider) {
|
|
83
|
+
const query = {};
|
|
84
|
+
if (provider) {
|
|
85
|
+
query.provider = provider;
|
|
86
|
+
}
|
|
87
|
+
return this.request('/chat/completions', {
|
|
37
88
|
method: 'POST',
|
|
38
|
-
body: JSON.stringify(
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
89
|
+
body: JSON.stringify(request),
|
|
90
|
+
}, query);
|
|
91
|
+
}
|
|
92
|
+
/**
|
|
93
|
+
* Creates a streaming chat completion.
|
|
94
|
+
*/
|
|
95
|
+
async streamChatCompletion(request, callbacks, provider) {
|
|
96
|
+
const query = {};
|
|
97
|
+
if (provider) {
|
|
98
|
+
query.provider = provider;
|
|
99
|
+
}
|
|
100
|
+
const queryParams = new URLSearchParams({
|
|
101
|
+
...this.defaultQuery,
|
|
102
|
+
...query,
|
|
42
103
|
});
|
|
104
|
+
const queryString = queryParams.toString();
|
|
105
|
+
const url = `${this.baseURL}/chat/completions${queryString ? `?${queryString}` : ''}`;
|
|
106
|
+
const headers = new Headers({
|
|
107
|
+
'Content-Type': 'application/json',
|
|
108
|
+
...this.defaultHeaders,
|
|
109
|
+
});
|
|
110
|
+
if (this.apiKey) {
|
|
111
|
+
headers.set('Authorization', `Bearer ${this.apiKey}`);
|
|
112
|
+
}
|
|
113
|
+
const controller = new AbortController();
|
|
114
|
+
const timeoutId = globalThis.setTimeout(() => controller.abort(), this.timeout);
|
|
115
|
+
try {
|
|
116
|
+
const response = await this.fetchFn(url, {
|
|
117
|
+
method: 'POST',
|
|
118
|
+
headers,
|
|
119
|
+
body: JSON.stringify({
|
|
120
|
+
...request,
|
|
121
|
+
stream: true,
|
|
122
|
+
}),
|
|
123
|
+
signal: controller.signal,
|
|
124
|
+
});
|
|
125
|
+
if (!response.ok) {
|
|
126
|
+
const error = (await response.json());
|
|
127
|
+
throw new Error(error.error || `HTTP error! status: ${response.status}`);
|
|
128
|
+
}
|
|
129
|
+
if (!response.body) {
|
|
130
|
+
throw new Error('Response body is not readable');
|
|
131
|
+
}
|
|
132
|
+
callbacks.onOpen?.();
|
|
133
|
+
const reader = response.body.getReader();
|
|
134
|
+
const decoder = new TextDecoder();
|
|
135
|
+
let buffer = '';
|
|
136
|
+
while (true) {
|
|
137
|
+
const { done, value } = await reader.read();
|
|
138
|
+
if (done)
|
|
139
|
+
break;
|
|
140
|
+
buffer += decoder.decode(value, { stream: true });
|
|
141
|
+
const lines = buffer.split('\n');
|
|
142
|
+
buffer = lines.pop() || '';
|
|
143
|
+
for (const line of lines) {
|
|
144
|
+
if (line.startsWith('data: ')) {
|
|
145
|
+
const data = line.slice(5).trim();
|
|
146
|
+
if (data === '[DONE]') {
|
|
147
|
+
callbacks.onFinish?.(null);
|
|
148
|
+
return;
|
|
149
|
+
}
|
|
150
|
+
try {
|
|
151
|
+
const chunk = JSON.parse(data);
|
|
152
|
+
callbacks.onChunk?.(chunk);
|
|
153
|
+
const content = chunk.choices[0]?.delta?.content;
|
|
154
|
+
if (content) {
|
|
155
|
+
callbacks.onContent?.(content);
|
|
156
|
+
}
|
|
157
|
+
const toolCalls = chunk.choices[0]?.delta?.tool_calls;
|
|
158
|
+
if (toolCalls && toolCalls.length > 0) {
|
|
159
|
+
const toolCall = {
|
|
160
|
+
id: toolCalls[0].id || '',
|
|
161
|
+
type: 'function',
|
|
162
|
+
function: {
|
|
163
|
+
name: toolCalls[0].function?.name || '',
|
|
164
|
+
arguments: toolCalls[0].function?.arguments || '',
|
|
165
|
+
},
|
|
166
|
+
};
|
|
167
|
+
callbacks.onTool?.(toolCall);
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
catch (e) {
|
|
171
|
+
globalThis.console.error('Error parsing SSE data:', e);
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
catch (error) {
|
|
178
|
+
const apiError = {
|
|
179
|
+
error: error.message || 'Unknown error',
|
|
180
|
+
};
|
|
181
|
+
callbacks.onError?.(apiError);
|
|
182
|
+
throw error;
|
|
183
|
+
}
|
|
184
|
+
finally {
|
|
185
|
+
globalThis.clearTimeout(timeoutId);
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
/**
|
|
189
|
+
* Proxy a request to a specific provider.
|
|
190
|
+
*/
|
|
191
|
+
async proxy(provider, path, options = {}) {
|
|
192
|
+
return this.request(`/proxy/${provider}/${path}`, options);
|
|
43
193
|
}
|
|
194
|
+
/**
|
|
195
|
+
* Health check endpoint.
|
|
196
|
+
*/
|
|
44
197
|
async healthCheck() {
|
|
45
198
|
try {
|
|
46
|
-
await this.
|
|
199
|
+
await this.fetchFn(`${this.baseURL.replace('/v1', '')}/health`);
|
|
47
200
|
return true;
|
|
48
201
|
}
|
|
49
202
|
catch {
|
|
@@ -2,37 +2,127 @@ export declare enum Provider {
|
|
|
2
2
|
Ollama = "ollama",
|
|
3
3
|
Groq = "groq",
|
|
4
4
|
OpenAI = "openai",
|
|
5
|
-
Google = "google",
|
|
6
5
|
Cloudflare = "cloudflare",
|
|
7
6
|
Cohere = "cohere",
|
|
8
|
-
Anthropic = "anthropic"
|
|
7
|
+
Anthropic = "anthropic",
|
|
8
|
+
DeepSeek = "deepseek"
|
|
9
9
|
}
|
|
10
10
|
export declare enum MessageRole {
|
|
11
11
|
System = "system",
|
|
12
12
|
User = "user",
|
|
13
|
-
Assistant = "assistant"
|
|
13
|
+
Assistant = "assistant",
|
|
14
|
+
Tool = "tool"
|
|
14
15
|
}
|
|
15
16
|
export interface Message {
|
|
16
17
|
role: MessageRole;
|
|
17
18
|
content: string;
|
|
19
|
+
tool_calls?: ChatCompletionMessageToolCall[];
|
|
20
|
+
tool_call_id?: string;
|
|
18
21
|
}
|
|
19
22
|
export interface Model {
|
|
23
|
+
id: string;
|
|
24
|
+
object: string;
|
|
25
|
+
created: number;
|
|
26
|
+
owned_by: string;
|
|
27
|
+
}
|
|
28
|
+
export interface ListModelsResponse {
|
|
29
|
+
object: string;
|
|
30
|
+
data: Model[];
|
|
31
|
+
}
|
|
32
|
+
export interface ChatCompletionMessageToolCallFunction {
|
|
33
|
+
name: string;
|
|
34
|
+
arguments: string;
|
|
35
|
+
}
|
|
36
|
+
export interface ChatCompletionMessageToolCall {
|
|
37
|
+
id: string;
|
|
38
|
+
type: 'function';
|
|
39
|
+
function: ChatCompletionMessageToolCallFunction;
|
|
40
|
+
}
|
|
41
|
+
export interface ChatCompletionMessageToolCallChunk {
|
|
42
|
+
index: number;
|
|
43
|
+
id?: string;
|
|
44
|
+
type?: string;
|
|
45
|
+
function?: {
|
|
46
|
+
name?: string;
|
|
47
|
+
arguments?: string;
|
|
48
|
+
};
|
|
49
|
+
}
|
|
50
|
+
export interface FunctionParameters {
|
|
51
|
+
type: string;
|
|
52
|
+
properties?: Record<string, unknown>;
|
|
53
|
+
required?: string[];
|
|
54
|
+
}
|
|
55
|
+
export interface FunctionObject {
|
|
56
|
+
description?: string;
|
|
20
57
|
name: string;
|
|
58
|
+
parameters: FunctionParameters;
|
|
59
|
+
strict?: boolean;
|
|
21
60
|
}
|
|
22
|
-
export interface
|
|
23
|
-
|
|
24
|
-
|
|
61
|
+
export interface ChatCompletionTool {
|
|
62
|
+
type: 'function';
|
|
63
|
+
function: FunctionObject;
|
|
25
64
|
}
|
|
26
|
-
export interface
|
|
27
|
-
provider: Provider;
|
|
65
|
+
export interface ChatCompletionRequest {
|
|
28
66
|
model: string;
|
|
29
67
|
messages: Message[];
|
|
68
|
+
max_tokens?: number;
|
|
69
|
+
stream?: boolean;
|
|
70
|
+
stream_options?: ChatCompletionStreamOptions;
|
|
71
|
+
tools?: ChatCompletionTool[];
|
|
72
|
+
temperature?: number;
|
|
73
|
+
top_p?: number;
|
|
74
|
+
top_k?: number;
|
|
30
75
|
}
|
|
31
|
-
export interface
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
76
|
+
export interface ChatCompletionStreamOptions {
|
|
77
|
+
include_usage?: boolean;
|
|
78
|
+
}
|
|
79
|
+
export interface ChatCompletionChoice {
|
|
80
|
+
finish_reason: 'stop' | 'length' | 'tool_calls' | 'content_filter' | 'function_call';
|
|
81
|
+
index: number;
|
|
82
|
+
message: Message;
|
|
83
|
+
logprobs?: Record<string, unknown>;
|
|
84
|
+
}
|
|
85
|
+
export interface CompletionUsage {
|
|
86
|
+
prompt_tokens: number;
|
|
87
|
+
completion_tokens: number;
|
|
88
|
+
total_tokens: number;
|
|
89
|
+
}
|
|
90
|
+
export interface ChatCompletionResponse {
|
|
91
|
+
id: string;
|
|
92
|
+
choices: ChatCompletionChoice[];
|
|
93
|
+
created: number;
|
|
94
|
+
model: string;
|
|
95
|
+
object: string;
|
|
96
|
+
usage?: CompletionUsage;
|
|
97
|
+
}
|
|
98
|
+
export interface ChatCompletionStreamChoice {
|
|
99
|
+
delta: ChatCompletionStreamResponseDelta;
|
|
100
|
+
finish_reason: 'stop' | 'length' | 'tool_calls' | 'content_filter' | 'function_call' | null;
|
|
101
|
+
index: number;
|
|
102
|
+
logprobs?: Record<string, unknown>;
|
|
103
|
+
}
|
|
104
|
+
export interface ChatCompletionStreamResponseDelta {
|
|
105
|
+
content?: string;
|
|
106
|
+
tool_calls?: ChatCompletionMessageToolCallChunk[];
|
|
107
|
+
role?: MessageRole;
|
|
108
|
+
refusal?: string;
|
|
109
|
+
}
|
|
110
|
+
export interface ChatCompletionStreamResponse {
|
|
111
|
+
id: string;
|
|
112
|
+
choices: ChatCompletionStreamChoice[];
|
|
113
|
+
created: number;
|
|
114
|
+
model: string;
|
|
115
|
+
object: string;
|
|
116
|
+
usage?: CompletionUsage;
|
|
117
|
+
}
|
|
118
|
+
export interface ChatCompletionStreamCallbacks {
|
|
119
|
+
onOpen?: () => void;
|
|
120
|
+
onChunk?: (chunk: ChatCompletionStreamResponse) => void;
|
|
121
|
+
onContent?: (content: string) => void;
|
|
122
|
+
onTool?: (toolCall: ChatCompletionMessageToolCall) => void;
|
|
123
|
+
onFinish?: (response: ChatCompletionStreamResponse) => void;
|
|
124
|
+
onError?: (error: Error) => void;
|
|
125
|
+
}
|
|
126
|
+
export interface Error {
|
|
127
|
+
error: string;
|
|
38
128
|
}
|
package/dist/src/types/index.js
CHANGED
|
@@ -6,14 +6,15 @@ var Provider;
|
|
|
6
6
|
Provider["Ollama"] = "ollama";
|
|
7
7
|
Provider["Groq"] = "groq";
|
|
8
8
|
Provider["OpenAI"] = "openai";
|
|
9
|
-
Provider["Google"] = "google";
|
|
10
9
|
Provider["Cloudflare"] = "cloudflare";
|
|
11
10
|
Provider["Cohere"] = "cohere";
|
|
12
11
|
Provider["Anthropic"] = "anthropic";
|
|
12
|
+
Provider["DeepSeek"] = "deepseek";
|
|
13
13
|
})(Provider || (exports.Provider = Provider = {}));
|
|
14
14
|
var MessageRole;
|
|
15
15
|
(function (MessageRole) {
|
|
16
16
|
MessageRole["System"] = "system";
|
|
17
17
|
MessageRole["User"] = "user";
|
|
18
18
|
MessageRole["Assistant"] = "assistant";
|
|
19
|
+
MessageRole["Tool"] = "tool";
|
|
19
20
|
})(MessageRole || (exports.MessageRole = MessageRole = {}));
|
|
@@ -2,124 +2,338 @@
|
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
const client_1 = require("@/client");
|
|
4
4
|
const types_1 = require("@/types");
|
|
5
|
+
const web_1 = require("node:stream/web");
|
|
6
|
+
const node_util_1 = require("node:util");
|
|
5
7
|
describe('InferenceGatewayClient', () => {
|
|
6
8
|
let client;
|
|
7
|
-
const
|
|
9
|
+
const mockFetch = jest.fn();
|
|
8
10
|
beforeEach(() => {
|
|
9
|
-
client = new client_1.InferenceGatewayClient(
|
|
10
|
-
|
|
11
|
+
client = new client_1.InferenceGatewayClient({
|
|
12
|
+
baseURL: 'http://localhost:8080/v1',
|
|
13
|
+
fetch: mockFetch,
|
|
14
|
+
});
|
|
15
|
+
});
|
|
16
|
+
afterEach(() => {
|
|
17
|
+
jest.clearAllMocks();
|
|
11
18
|
});
|
|
12
19
|
describe('listModels', () => {
|
|
13
20
|
it('should fetch available models', async () => {
|
|
14
|
-
const mockResponse =
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
21
|
+
const mockResponse = {
|
|
22
|
+
object: 'list',
|
|
23
|
+
data: [
|
|
24
|
+
{
|
|
25
|
+
id: 'gpt-4o',
|
|
26
|
+
object: 'model',
|
|
27
|
+
created: 1686935002,
|
|
28
|
+
owned_by: 'openai',
|
|
29
|
+
},
|
|
30
|
+
{
|
|
31
|
+
id: 'llama-3.3-70b-versatile',
|
|
32
|
+
object: 'model',
|
|
33
|
+
created: 1723651281,
|
|
34
|
+
owned_by: 'groq',
|
|
35
|
+
},
|
|
36
|
+
],
|
|
37
|
+
};
|
|
38
|
+
mockFetch.mockResolvedValueOnce({
|
|
25
39
|
ok: true,
|
|
26
40
|
json: () => Promise.resolve(mockResponse),
|
|
27
41
|
});
|
|
28
42
|
const result = await client.listModels();
|
|
29
43
|
expect(result).toEqual(mockResponse);
|
|
30
|
-
expect(
|
|
44
|
+
expect(mockFetch).toHaveBeenCalledWith('http://localhost:8080/v1/models', expect.objectContaining({
|
|
45
|
+
method: 'GET',
|
|
31
46
|
headers: expect.any(Headers),
|
|
32
47
|
}));
|
|
33
48
|
});
|
|
34
|
-
});
|
|
35
|
-
describe('listModelsByProvider', () => {
|
|
36
49
|
it('should fetch models for a specific provider', async () => {
|
|
37
50
|
const mockResponse = {
|
|
38
|
-
|
|
39
|
-
|
|
51
|
+
object: 'list',
|
|
52
|
+
data: [
|
|
40
53
|
{
|
|
41
|
-
|
|
54
|
+
id: 'gpt-4o',
|
|
55
|
+
object: 'model',
|
|
56
|
+
created: 1686935002,
|
|
57
|
+
owned_by: 'openai',
|
|
42
58
|
},
|
|
43
59
|
],
|
|
44
60
|
};
|
|
45
|
-
|
|
61
|
+
mockFetch.mockResolvedValueOnce({
|
|
46
62
|
ok: true,
|
|
47
63
|
json: () => Promise.resolve(mockResponse),
|
|
48
64
|
});
|
|
49
|
-
const result = await client.
|
|
65
|
+
const result = await client.listModels(types_1.Provider.OpenAI);
|
|
50
66
|
expect(result).toEqual(mockResponse);
|
|
51
|
-
expect(
|
|
67
|
+
expect(mockFetch).toHaveBeenCalledWith('http://localhost:8080/v1/models?provider=openai', expect.objectContaining({
|
|
68
|
+
method: 'GET',
|
|
52
69
|
headers: expect.any(Headers),
|
|
53
70
|
}));
|
|
54
71
|
});
|
|
55
|
-
it('should throw error when
|
|
72
|
+
it('should throw error when request fails', async () => {
|
|
56
73
|
const errorMessage = 'Provider not found';
|
|
57
|
-
|
|
74
|
+
mockFetch.mockResolvedValueOnce({
|
|
58
75
|
ok: false,
|
|
59
76
|
status: 404,
|
|
60
77
|
json: () => Promise.resolve({ error: errorMessage }),
|
|
61
78
|
});
|
|
62
|
-
await expect(client.
|
|
79
|
+
await expect(client.listModels(types_1.Provider.OpenAI)).rejects.toThrow(errorMessage);
|
|
63
80
|
});
|
|
64
81
|
});
|
|
65
|
-
describe('
|
|
66
|
-
it('should
|
|
82
|
+
describe('createChatCompletion', () => {
|
|
83
|
+
it('should create a chat completion', async () => {
|
|
67
84
|
const mockRequest = {
|
|
68
|
-
|
|
69
|
-
model: 'llama2',
|
|
85
|
+
model: 'gpt-4o',
|
|
70
86
|
messages: [
|
|
71
87
|
{ role: types_1.MessageRole.System, content: 'You are a helpful assistant' },
|
|
72
88
|
{ role: types_1.MessageRole.User, content: 'Hello' },
|
|
73
89
|
],
|
|
74
90
|
};
|
|
75
91
|
const mockResponse = {
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
92
|
+
id: 'chatcmpl-123',
|
|
93
|
+
object: 'chat.completion',
|
|
94
|
+
created: 1677652288,
|
|
95
|
+
model: 'gpt-4o',
|
|
96
|
+
choices: [
|
|
97
|
+
{
|
|
98
|
+
index: 0,
|
|
99
|
+
message: {
|
|
100
|
+
role: types_1.MessageRole.Assistant,
|
|
101
|
+
content: 'Hello! How can I help you today?',
|
|
102
|
+
},
|
|
103
|
+
finish_reason: 'stop',
|
|
104
|
+
},
|
|
105
|
+
],
|
|
106
|
+
usage: {
|
|
107
|
+
prompt_tokens: 10,
|
|
108
|
+
completion_tokens: 8,
|
|
109
|
+
total_tokens: 18,
|
|
81
110
|
},
|
|
82
111
|
};
|
|
83
|
-
|
|
112
|
+
mockFetch.mockResolvedValueOnce({
|
|
113
|
+
ok: true,
|
|
114
|
+
json: () => Promise.resolve(mockResponse),
|
|
115
|
+
});
|
|
116
|
+
const result = await client.createChatCompletion(mockRequest);
|
|
117
|
+
expect(result).toEqual(mockResponse);
|
|
118
|
+
expect(mockFetch).toHaveBeenCalledWith('http://localhost:8080/v1/chat/completions', expect.objectContaining({
|
|
119
|
+
method: 'POST',
|
|
120
|
+
body: JSON.stringify(mockRequest),
|
|
121
|
+
}));
|
|
122
|
+
});
|
|
123
|
+
it('should create a chat completion with a specific provider', async () => {
|
|
124
|
+
const mockRequest = {
|
|
125
|
+
model: 'claude-3-opus-20240229',
|
|
126
|
+
messages: [{ role: types_1.MessageRole.User, content: 'Hello' }],
|
|
127
|
+
};
|
|
128
|
+
const mockResponse = {
|
|
129
|
+
id: 'chatcmpl-456',
|
|
130
|
+
object: 'chat.completion',
|
|
131
|
+
created: 1677652288,
|
|
132
|
+
model: 'claude-3-opus-20240229',
|
|
133
|
+
choices: [
|
|
134
|
+
{
|
|
135
|
+
index: 0,
|
|
136
|
+
message: {
|
|
137
|
+
role: types_1.MessageRole.Assistant,
|
|
138
|
+
content: 'Hello! How can I assist you today?',
|
|
139
|
+
},
|
|
140
|
+
finish_reason: 'stop',
|
|
141
|
+
},
|
|
142
|
+
],
|
|
143
|
+
usage: {
|
|
144
|
+
prompt_tokens: 5,
|
|
145
|
+
completion_tokens: 8,
|
|
146
|
+
total_tokens: 13,
|
|
147
|
+
},
|
|
148
|
+
};
|
|
149
|
+
mockFetch.mockResolvedValueOnce({
|
|
150
|
+
ok: true,
|
|
151
|
+
json: () => Promise.resolve(mockResponse),
|
|
152
|
+
});
|
|
153
|
+
const result = await client.createChatCompletion(mockRequest, types_1.Provider.Anthropic);
|
|
154
|
+
expect(result).toEqual(mockResponse);
|
|
155
|
+
expect(mockFetch).toHaveBeenCalledWith('http://localhost:8080/v1/chat/completions?provider=anthropic', expect.objectContaining({
|
|
156
|
+
method: 'POST',
|
|
157
|
+
body: JSON.stringify(mockRequest),
|
|
158
|
+
}));
|
|
159
|
+
});
|
|
160
|
+
});
|
|
161
|
+
describe('streamChatCompletion', () => {
|
|
162
|
+
it('should handle streaming chat completions', async () => {
|
|
163
|
+
const mockRequest = {
|
|
164
|
+
model: 'gpt-4o',
|
|
165
|
+
messages: [{ role: types_1.MessageRole.User, content: 'Hello' }],
|
|
166
|
+
};
|
|
167
|
+
const mockStream = new web_1.TransformStream();
|
|
168
|
+
const writer = mockStream.writable.getWriter();
|
|
169
|
+
const encoder = new node_util_1.TextEncoder();
|
|
170
|
+
mockFetch.mockResolvedValueOnce({
|
|
171
|
+
ok: true,
|
|
172
|
+
body: mockStream.readable,
|
|
173
|
+
});
|
|
174
|
+
const callbacks = {
|
|
175
|
+
onOpen: jest.fn(),
|
|
176
|
+
onChunk: jest.fn(),
|
|
177
|
+
onContent: jest.fn(),
|
|
178
|
+
onFinish: jest.fn(),
|
|
179
|
+
onError: jest.fn(),
|
|
180
|
+
};
|
|
181
|
+
const streamPromise = client.streamChatCompletion(mockRequest, callbacks);
|
|
182
|
+
// Simulate SSE events
|
|
183
|
+
await writer.write(encoder.encode('data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"gpt-4o","choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":null}]}\n\n' +
|
|
184
|
+
'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"gpt-4o","choices":[{"index":0,"delta":{"content":"Hello"},"finish_reason":null}]}\n\n' +
|
|
185
|
+
'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"gpt-4o","choices":[{"index":0,"delta":{"content":"!"},"finish_reason":null}]}\n\n' +
|
|
186
|
+
'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"gpt-4o","choices":[{"index":0,"delta":{},"finish_reason":"stop"}]}\n\n' +
|
|
187
|
+
'data: [DONE]\n\n'));
|
|
188
|
+
await writer.close();
|
|
189
|
+
await streamPromise;
|
|
190
|
+
expect(callbacks.onOpen).toHaveBeenCalledTimes(1);
|
|
191
|
+
expect(callbacks.onChunk).toHaveBeenCalledTimes(4);
|
|
192
|
+
expect(callbacks.onContent).toHaveBeenCalledWith('Hello');
|
|
193
|
+
expect(callbacks.onContent).toHaveBeenCalledWith('!');
|
|
194
|
+
expect(callbacks.onFinish).toHaveBeenCalledTimes(1);
|
|
195
|
+
expect(mockFetch).toHaveBeenCalledWith('http://localhost:8080/v1/chat/completions', expect.objectContaining({
|
|
196
|
+
method: 'POST',
|
|
197
|
+
body: JSON.stringify({
|
|
198
|
+
...mockRequest,
|
|
199
|
+
stream: true,
|
|
200
|
+
}),
|
|
201
|
+
}));
|
|
202
|
+
});
|
|
203
|
+
it('should handle tool calls in streaming chat completions', async () => {
|
|
204
|
+
const mockRequest = {
|
|
205
|
+
model: 'gpt-4o',
|
|
206
|
+
messages: [
|
|
207
|
+
{
|
|
208
|
+
role: types_1.MessageRole.User,
|
|
209
|
+
content: 'What is the weather in San Francisco?',
|
|
210
|
+
},
|
|
211
|
+
],
|
|
212
|
+
tools: [
|
|
213
|
+
{
|
|
214
|
+
type: 'function',
|
|
215
|
+
function: {
|
|
216
|
+
name: 'get_weather',
|
|
217
|
+
parameters: {
|
|
218
|
+
type: 'object',
|
|
219
|
+
properties: {
|
|
220
|
+
location: {
|
|
221
|
+
type: 'string',
|
|
222
|
+
description: 'The city and state, e.g. San Francisco, CA',
|
|
223
|
+
},
|
|
224
|
+
},
|
|
225
|
+
required: ['location'],
|
|
226
|
+
},
|
|
227
|
+
},
|
|
228
|
+
},
|
|
229
|
+
],
|
|
230
|
+
};
|
|
231
|
+
const mockStream = new web_1.TransformStream();
|
|
232
|
+
const writer = mockStream.writable.getWriter();
|
|
233
|
+
const encoder = new node_util_1.TextEncoder();
|
|
234
|
+
mockFetch.mockResolvedValueOnce({
|
|
235
|
+
ok: true,
|
|
236
|
+
body: mockStream.readable,
|
|
237
|
+
});
|
|
238
|
+
const callbacks = {
|
|
239
|
+
onOpen: jest.fn(),
|
|
240
|
+
onChunk: jest.fn(),
|
|
241
|
+
onTool: jest.fn(),
|
|
242
|
+
onFinish: jest.fn(),
|
|
243
|
+
};
|
|
244
|
+
const streamPromise = client.streamChatCompletion(mockRequest, callbacks);
|
|
245
|
+
// Simulate SSE events with tool calls
|
|
246
|
+
await writer.write(encoder.encode('data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"gpt-4o","choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":null}]}\n\n' +
|
|
247
|
+
'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"gpt-4o","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"id":"call_123","type":"function","function":{"name":"get_weather"}}]},"finish_reason":null}]}\n\n' +
|
|
248
|
+
'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"gpt-4o","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\\"location\\""}}]},"finish_reason":null}]}\n\n' +
|
|
249
|
+
'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"gpt-4o","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":":\\"San Francisco, CA\\""}}]},"finish_reason":null}]}\n\n' +
|
|
250
|
+
'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"gpt-4o","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"}"}}]},"finish_reason":null}]}\n\n' +
|
|
251
|
+
'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"gpt-4o","choices":[{"index":0,"delta":{},"finish_reason":"tool_calls"}]}\n\n' +
|
|
252
|
+
'data: [DONE]\n\n'));
|
|
253
|
+
await writer.close();
|
|
254
|
+
await streamPromise;
|
|
255
|
+
expect(callbacks.onOpen).toHaveBeenCalledTimes(1);
|
|
256
|
+
expect(callbacks.onChunk).toHaveBeenCalledTimes(6);
|
|
257
|
+
expect(callbacks.onTool).toHaveBeenCalledTimes(4); // Called for each chunk with tool_calls
|
|
258
|
+
expect(callbacks.onFinish).toHaveBeenCalledTimes(1);
|
|
259
|
+
});
|
|
260
|
+
it('should handle errors in streaming chat completions', async () => {
|
|
261
|
+
const mockRequest = {
|
|
262
|
+
model: 'gpt-4o',
|
|
263
|
+
messages: [{ role: types_1.MessageRole.User, content: 'Hello' }],
|
|
264
|
+
};
|
|
265
|
+
mockFetch.mockResolvedValueOnce({
|
|
266
|
+
ok: false,
|
|
267
|
+
status: 400,
|
|
268
|
+
json: () => Promise.resolve({ error: 'Bad Request' }),
|
|
269
|
+
});
|
|
270
|
+
const callbacks = {
|
|
271
|
+
onError: jest.fn(),
|
|
272
|
+
};
|
|
273
|
+
await expect(client.streamChatCompletion(mockRequest, callbacks)).rejects.toThrow('Bad Request');
|
|
274
|
+
expect(callbacks.onError).toHaveBeenCalledTimes(1);
|
|
275
|
+
});
|
|
276
|
+
});
|
|
277
|
+
describe('proxy', () => {
|
|
278
|
+
it('should proxy requests to a specific provider', async () => {
|
|
279
|
+
const mockResponse = { result: 'success' };
|
|
280
|
+
mockFetch.mockResolvedValueOnce({
|
|
84
281
|
ok: true,
|
|
85
282
|
json: () => Promise.resolve(mockResponse),
|
|
86
283
|
});
|
|
87
|
-
const result = await client.
|
|
284
|
+
const result = await client.proxy(types_1.Provider.OpenAI, 'embeddings', {
|
|
285
|
+
method: 'POST',
|
|
286
|
+
body: JSON.stringify({
|
|
287
|
+
model: 'text-embedding-ada-002',
|
|
288
|
+
input: 'Hello world',
|
|
289
|
+
}),
|
|
290
|
+
});
|
|
88
291
|
expect(result).toEqual(mockResponse);
|
|
89
|
-
expect(
|
|
292
|
+
expect(mockFetch).toHaveBeenCalledWith('http://localhost:8080/v1/proxy/openai/embeddings', expect.objectContaining({
|
|
90
293
|
method: 'POST',
|
|
91
294
|
body: JSON.stringify({
|
|
92
|
-
model:
|
|
93
|
-
|
|
295
|
+
model: 'text-embedding-ada-002',
|
|
296
|
+
input: 'Hello world',
|
|
94
297
|
}),
|
|
95
298
|
}));
|
|
96
299
|
});
|
|
97
300
|
});
|
|
98
301
|
describe('healthCheck', () => {
|
|
99
302
|
it('should return true when API is healthy', async () => {
|
|
100
|
-
|
|
303
|
+
mockFetch.mockResolvedValueOnce({
|
|
101
304
|
ok: true,
|
|
102
|
-
json: () => Promise.resolve({}),
|
|
103
305
|
});
|
|
104
306
|
const result = await client.healthCheck();
|
|
105
307
|
expect(result).toBe(true);
|
|
106
|
-
expect(
|
|
308
|
+
expect(mockFetch).toHaveBeenCalledWith('http://localhost:8080/health');
|
|
107
309
|
});
|
|
108
310
|
it('should return false when API is unhealthy', async () => {
|
|
109
|
-
|
|
311
|
+
mockFetch.mockRejectedValueOnce(new Error('API error'));
|
|
110
312
|
const result = await client.healthCheck();
|
|
111
313
|
expect(result).toBe(false);
|
|
112
314
|
});
|
|
113
315
|
});
|
|
114
|
-
describe('
|
|
115
|
-
it('should
|
|
116
|
-
const
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
316
|
+
describe('withOptions', () => {
|
|
317
|
+
it('should create a new client with merged options', () => {
|
|
318
|
+
const originalClient = new client_1.InferenceGatewayClient({
|
|
319
|
+
baseURL: 'http://localhost:8080/v1',
|
|
320
|
+
apiKey: 'test-key',
|
|
321
|
+
fetch: mockFetch,
|
|
322
|
+
});
|
|
323
|
+
const newClient = originalClient.withOptions({
|
|
324
|
+
defaultHeaders: { 'X-Custom-Header': 'value' },
|
|
121
325
|
});
|
|
122
|
-
|
|
326
|
+
expect(newClient).toBeInstanceOf(client_1.InferenceGatewayClient);
|
|
327
|
+
expect(newClient).not.toBe(originalClient);
|
|
328
|
+
// We can't directly test private properties, but we can test behavior
|
|
329
|
+
mockFetch.mockResolvedValueOnce({
|
|
330
|
+
ok: true,
|
|
331
|
+
json: () => Promise.resolve({}),
|
|
332
|
+
});
|
|
333
|
+
newClient.listModels();
|
|
334
|
+
expect(mockFetch).toHaveBeenCalledWith('http://localhost:8080/v1/models', expect.objectContaining({
|
|
335
|
+
headers: expect.any(Headers),
|
|
336
|
+
}));
|
|
123
337
|
});
|
|
124
338
|
});
|
|
125
339
|
});
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@inference-gateway/sdk",
|
|
3
|
-
"version": "0.2
|
|
3
|
+
"version": "0.3.2",
|
|
4
4
|
"description": "An SDK written in Typescript for the [Inference Gateway](https://github.com/inference-gateway/inference-gateway).",
|
|
5
5
|
"main": "dist/src/index.js",
|
|
6
6
|
"types": "dist/src/index.d.ts",
|
|
@@ -18,7 +18,8 @@
|
|
|
18
18
|
"ollama",
|
|
19
19
|
"cloudflare",
|
|
20
20
|
"cohere",
|
|
21
|
-
"typescript"
|
|
21
|
+
"typescript",
|
|
22
|
+
"deepseek"
|
|
22
23
|
],
|
|
23
24
|
"author": "Eden Reich <eden.reich@gmail.com>",
|
|
24
25
|
"license": "MIT",
|