@inference-gateway/sdk 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +11 -0
- package/README.md +29 -2
- package/dist/src/client.d.ts +2 -1
- package/dist/src/client.js +58 -0
- package/dist/src/types/index.d.ts +9 -0
- package/dist/tests/client.test.js +105 -0
- package/package.json +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -1,3 +1,14 @@
|
|
|
1
|
+
## [0.3.0](https://github.com/inference-gateway/typescript-sdk/compare/v0.2.0...v0.3.0) (2025-02-02)
|
|
2
|
+
|
|
3
|
+
### ✨ Features
|
|
4
|
+
|
|
5
|
+
* add streaming content functionality to InferenceGatewayClient and update README ([ba41d2d](https://github.com/inference-gateway/typescript-sdk/commit/ba41d2dc136b83372820af2aefa63969932e16f0))
|
|
6
|
+
|
|
7
|
+
### 📚 Documentation
|
|
8
|
+
|
|
9
|
+
* **fix:** Update examples in README.md ([4e972fc](https://github.com/inference-gateway/typescript-sdk/commit/4e972fc2c577f41b0b443f1c87cde7561717b577))
|
|
10
|
+
* Update OpenAPI spec - download it from Inference-gateway ([9816b15](https://github.com/inference-gateway/typescript-sdk/commit/9816b151db6b48b04723f93b988daf83239a09df))
|
|
11
|
+
|
|
1
12
|
## [0.2.0](https://github.com/inference-gateway/typescript-sdk/compare/v0.1.6...v0.2.0) (2025-01-28)
|
|
2
13
|
|
|
3
14
|
### ✨ Features
|
package/README.md
CHANGED
|
@@ -9,6 +9,7 @@ An SDK written in Typescript for the [Inference Gateway](https://github.com/eden
|
|
|
9
9
|
- [Listing All Models](#listing-all-models)
|
|
10
10
|
- [List Models by Provider](#list-models-by-provider)
|
|
11
11
|
- [Generating Content](#generating-content)
|
|
12
|
+
- [Streaming Content](#streaming-content)
|
|
12
13
|
- [Health Check](#health-check)
|
|
13
14
|
- [Contributing](#contributing)
|
|
14
15
|
- [License](#license)
|
|
@@ -37,7 +38,7 @@ async function main() {
|
|
|
37
38
|
models.forEach((providerModels) => {
|
|
38
39
|
console.log(`Provider: ${providerModels.provider}`);
|
|
39
40
|
providerModels.models.forEach((model) => {
|
|
40
|
-
console.log(`Model: ${model.
|
|
41
|
+
console.log(`Model: ${model.name}`);
|
|
41
42
|
});
|
|
42
43
|
});
|
|
43
44
|
|
|
@@ -76,7 +77,7 @@ try {
|
|
|
76
77
|
models.forEach((providerModels) => {
|
|
77
78
|
console.log(`Provider: ${providerModels.provider}`);
|
|
78
79
|
providerModels.models.forEach((model) => {
|
|
79
|
-
console.log(`Model: ${model.
|
|
80
|
+
console.log(`Model: ${model.name}`);
|
|
80
81
|
});
|
|
81
82
|
});
|
|
82
83
|
} catch (error) {
|
|
@@ -136,6 +137,32 @@ const client = new InferenceGatewayClient('http://localhost:8080');
|
|
|
136
137
|
}
|
|
137
138
|
```
|
|
138
139
|
|
|
140
|
+
### Streaming Content
|
|
141
|
+
|
|
142
|
+
To stream content using a model, use the `streamContent` method:
|
|
143
|
+
|
|
144
|
+
```typescript
|
|
145
|
+
const client = new InferenceGatewayClient('http://localhost:8080');
|
|
146
|
+
|
|
147
|
+
await client.generateContentStream(
|
|
148
|
+
{
|
|
149
|
+
provider: Provider.Groq,
|
|
150
|
+
model: 'deepseek-r1-distill-llama-70b',
|
|
151
|
+
messages: [
|
|
152
|
+
{
|
|
153
|
+
role: MessageRole.User,
|
|
154
|
+
content: 'Tell me a story',
|
|
155
|
+
},
|
|
156
|
+
],
|
|
157
|
+
},
|
|
158
|
+
{
|
|
159
|
+
onMessageStart: (role) => console.log('Message started:', role),
|
|
160
|
+
onContentDelta: (content) => process.stdout.write(content),
|
|
161
|
+
onStreamEnd: () => console.log('\nStream completed'),
|
|
162
|
+
}
|
|
163
|
+
);
|
|
164
|
+
```
|
|
165
|
+
|
|
139
166
|
### Health Check
|
|
140
167
|
|
|
141
168
|
To check if the Inference Gateway is running, use the `healthCheck` method:
|
package/dist/src/client.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { GenerateContentRequest, GenerateContentResponse, Provider, ProviderModels } from './types';
|
|
1
|
+
import { GenerateContentOptions, GenerateContentRequest, GenerateContentResponse, Provider, ProviderModels } from './types';
|
|
2
2
|
export declare class InferenceGatewayClient {
|
|
3
3
|
private baseUrl;
|
|
4
4
|
private authToken?;
|
|
@@ -7,5 +7,6 @@ export declare class InferenceGatewayClient {
|
|
|
7
7
|
listModels(): Promise<ProviderModels[]>;
|
|
8
8
|
listModelsByProvider(provider: Provider): Promise<ProviderModels>;
|
|
9
9
|
generateContent(params: GenerateContentRequest): Promise<GenerateContentResponse>;
|
|
10
|
+
generateContentStream(params: GenerateContentRequest, options?: GenerateContentOptions): Promise<void>;
|
|
10
11
|
healthCheck(): Promise<boolean>;
|
|
11
12
|
}
|
package/dist/src/client.js
CHANGED
|
@@ -41,6 +41,64 @@ class InferenceGatewayClient {
|
|
|
41
41
|
}),
|
|
42
42
|
});
|
|
43
43
|
}
|
|
44
|
+
async generateContentStream(params, options) {
|
|
45
|
+
const response = await fetch(`${this.baseUrl}/llms/${params.provider}/generate`, {
|
|
46
|
+
method: 'POST',
|
|
47
|
+
headers: {
|
|
48
|
+
'Content-Type': 'application/json',
|
|
49
|
+
...(this.authToken && { Authorization: `Bearer ${this.authToken}` }),
|
|
50
|
+
},
|
|
51
|
+
body: JSON.stringify({
|
|
52
|
+
model: params.model,
|
|
53
|
+
messages: params.messages,
|
|
54
|
+
stream: true,
|
|
55
|
+
ssevents: true,
|
|
56
|
+
}),
|
|
57
|
+
});
|
|
58
|
+
if (!response.ok) {
|
|
59
|
+
const error = await response.json();
|
|
60
|
+
throw new Error(error.error || `HTTP error! status: ${response.status}`);
|
|
61
|
+
}
|
|
62
|
+
const reader = response.body?.getReader();
|
|
63
|
+
if (!reader)
|
|
64
|
+
throw new Error('Response body is not readable');
|
|
65
|
+
const decoder = new TextDecoder();
|
|
66
|
+
while (true) {
|
|
67
|
+
const { done, value } = await reader.read();
|
|
68
|
+
if (done)
|
|
69
|
+
break;
|
|
70
|
+
const events = decoder.decode(value).split('\n\n');
|
|
71
|
+
for (const event of events) {
|
|
72
|
+
if (!event.trim())
|
|
73
|
+
continue;
|
|
74
|
+
const [eventType, ...data] = event.split('\n');
|
|
75
|
+
const eventData = JSON.parse(data.join('\n').replace('data: ', ''));
|
|
76
|
+
switch (eventType.replace('event: ', '')) {
|
|
77
|
+
case 'message-start':
|
|
78
|
+
options?.onMessageStart?.(eventData.role);
|
|
79
|
+
break;
|
|
80
|
+
case 'stream-start':
|
|
81
|
+
options?.onStreamStart?.();
|
|
82
|
+
break;
|
|
83
|
+
case 'content-start':
|
|
84
|
+
options?.onContentStart?.();
|
|
85
|
+
break;
|
|
86
|
+
case 'content-delta':
|
|
87
|
+
options?.onContentDelta?.(eventData.content);
|
|
88
|
+
break;
|
|
89
|
+
case 'content-end':
|
|
90
|
+
options?.onContentEnd?.();
|
|
91
|
+
break;
|
|
92
|
+
case 'message-end':
|
|
93
|
+
options?.onMessageEnd?.();
|
|
94
|
+
break;
|
|
95
|
+
case 'stream-end':
|
|
96
|
+
options?.onStreamEnd?.();
|
|
97
|
+
break;
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
}
|
|
44
102
|
async healthCheck() {
|
|
45
103
|
try {
|
|
46
104
|
await this.request('/health');
|
|
@@ -36,3 +36,12 @@ export interface GenerateContentResponse {
|
|
|
36
36
|
content: string;
|
|
37
37
|
};
|
|
38
38
|
}
|
|
39
|
+
export interface GenerateContentOptions {
|
|
40
|
+
onMessageStart?: (role: string) => void;
|
|
41
|
+
onStreamStart?: () => void;
|
|
42
|
+
onContentStart?: () => void;
|
|
43
|
+
onContentDelta?: (content: string) => void;
|
|
44
|
+
onContentEnd?: () => void;
|
|
45
|
+
onMessageEnd?: () => void;
|
|
46
|
+
onStreamEnd?: () => void;
|
|
47
|
+
}
|
|
@@ -122,4 +122,109 @@ describe('InferenceGatewayClient', () => {
|
|
|
122
122
|
await expect(client.listModels()).rejects.toThrow(errorMessage);
|
|
123
123
|
});
|
|
124
124
|
});
|
|
125
|
+
describe('generateContentStream', () => {
|
|
126
|
+
it('should handle SSE events correctly', async () => {
|
|
127
|
+
const mockRequest = {
|
|
128
|
+
provider: types_1.Provider.Ollama,
|
|
129
|
+
model: 'llama2',
|
|
130
|
+
messages: [
|
|
131
|
+
{ role: types_1.MessageRole.System, content: 'You are a helpful assistant' },
|
|
132
|
+
{ role: types_1.MessageRole.User, content: 'Hello' },
|
|
133
|
+
],
|
|
134
|
+
};
|
|
135
|
+
const mockStream = new TransformStream();
|
|
136
|
+
const writer = mockStream.writable.getWriter();
|
|
137
|
+
const encoder = new TextEncoder();
|
|
138
|
+
global.fetch.mockResolvedValueOnce({
|
|
139
|
+
ok: true,
|
|
140
|
+
body: mockStream.readable,
|
|
141
|
+
});
|
|
142
|
+
const callbacks = {
|
|
143
|
+
onMessageStart: jest.fn(),
|
|
144
|
+
onStreamStart: jest.fn(),
|
|
145
|
+
onContentStart: jest.fn(),
|
|
146
|
+
onContentDelta: jest.fn(),
|
|
147
|
+
onContentEnd: jest.fn(),
|
|
148
|
+
onMessageEnd: jest.fn(),
|
|
149
|
+
onStreamEnd: jest.fn(),
|
|
150
|
+
};
|
|
151
|
+
const streamPromise = client.generateContentStream(mockRequest, callbacks);
|
|
152
|
+
await writer.write(encoder.encode('event: message-start\ndata: {"role": "assistant"}\n\n' +
|
|
153
|
+
'event: stream-start\ndata: {}\n\n' +
|
|
154
|
+
'event: content-start\ndata: {}\n\n' +
|
|
155
|
+
'event: content-delta\ndata: {"content": "Hello"}\n\n' +
|
|
156
|
+
'event: content-delta\ndata: {"content": " there!"}\n\n' +
|
|
157
|
+
'event: content-end\ndata: {}\n\n' +
|
|
158
|
+
'event: message-end\ndata: {}\n\n' +
|
|
159
|
+
'event: stream-end\ndata: {}\n\n'));
|
|
160
|
+
await writer.close();
|
|
161
|
+
await streamPromise;
|
|
162
|
+
expect(callbacks.onMessageStart).toHaveBeenCalledWith('assistant');
|
|
163
|
+
expect(callbacks.onStreamStart).toHaveBeenCalledTimes(1);
|
|
164
|
+
expect(callbacks.onContentStart).toHaveBeenCalledTimes(1);
|
|
165
|
+
expect(callbacks.onContentDelta).toHaveBeenCalledWith('Hello');
|
|
166
|
+
expect(callbacks.onContentDelta).toHaveBeenCalledWith(' there!');
|
|
167
|
+
expect(callbacks.onContentEnd).toHaveBeenCalledTimes(1);
|
|
168
|
+
expect(callbacks.onMessageEnd).toHaveBeenCalledTimes(1);
|
|
169
|
+
expect(callbacks.onStreamEnd).toHaveBeenCalledTimes(1);
|
|
170
|
+
expect(global.fetch).toHaveBeenCalledWith(`${mockBaseUrl}/llms/${mockRequest.provider}/generate`, expect.objectContaining({
|
|
171
|
+
method: 'POST',
|
|
172
|
+
body: JSON.stringify({
|
|
173
|
+
model: mockRequest.model,
|
|
174
|
+
messages: mockRequest.messages,
|
|
175
|
+
stream: true,
|
|
176
|
+
ssevents: true,
|
|
177
|
+
}),
|
|
178
|
+
}));
|
|
179
|
+
});
|
|
180
|
+
it('should handle errors in the stream response', async () => {
|
|
181
|
+
const mockRequest = {
|
|
182
|
+
provider: types_1.Provider.Ollama,
|
|
183
|
+
model: 'llama2',
|
|
184
|
+
messages: [{ role: types_1.MessageRole.User, content: 'Hello' }],
|
|
185
|
+
};
|
|
186
|
+
global.fetch.mockResolvedValueOnce({
|
|
187
|
+
ok: false,
|
|
188
|
+
status: 400,
|
|
189
|
+
json: () => Promise.resolve({ error: 'Bad Request' }),
|
|
190
|
+
});
|
|
191
|
+
await expect(client.generateContentStream(mockRequest, {})).rejects.toThrow('Bad Request');
|
|
192
|
+
});
|
|
193
|
+
it('should handle non-readable response body', async () => {
|
|
194
|
+
const mockRequest = {
|
|
195
|
+
provider: types_1.Provider.Ollama,
|
|
196
|
+
model: 'llama2',
|
|
197
|
+
messages: [{ role: types_1.MessageRole.User, content: 'Hello' }],
|
|
198
|
+
};
|
|
199
|
+
global.fetch.mockResolvedValueOnce({
|
|
200
|
+
ok: true,
|
|
201
|
+
body: null,
|
|
202
|
+
});
|
|
203
|
+
await expect(client.generateContentStream(mockRequest, {})).rejects.toThrow('Response body is not readable');
|
|
204
|
+
});
|
|
205
|
+
it('should handle empty events in the stream', async () => {
|
|
206
|
+
const mockRequest = {
|
|
207
|
+
provider: types_1.Provider.Ollama,
|
|
208
|
+
model: 'llama2',
|
|
209
|
+
messages: [{ role: types_1.MessageRole.User, content: 'Hello' }],
|
|
210
|
+
};
|
|
211
|
+
const mockStream = new TransformStream();
|
|
212
|
+
const writer = mockStream.writable.getWriter();
|
|
213
|
+
const encoder = new TextEncoder();
|
|
214
|
+
global.fetch.mockResolvedValueOnce({
|
|
215
|
+
ok: true,
|
|
216
|
+
body: mockStream.readable,
|
|
217
|
+
});
|
|
218
|
+
const callbacks = {
|
|
219
|
+
onContentDelta: jest.fn(),
|
|
220
|
+
};
|
|
221
|
+
const streamPromise = client.generateContentStream(mockRequest, callbacks);
|
|
222
|
+
await writer.write(encoder.encode('\n\n'));
|
|
223
|
+
await writer.write(encoder.encode('event: content-delta\ndata: {"content": "Hello"}\n\n'));
|
|
224
|
+
await writer.close();
|
|
225
|
+
await streamPromise;
|
|
226
|
+
expect(callbacks.onContentDelta).toHaveBeenCalledTimes(1);
|
|
227
|
+
expect(callbacks.onContentDelta).toHaveBeenCalledWith('Hello');
|
|
228
|
+
});
|
|
229
|
+
});
|
|
125
230
|
});
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@inference-gateway/sdk",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.3.0",
|
|
4
4
|
"description": "An SDK written in Typescript for the [Inference Gateway](https://github.com/inference-gateway/inference-gateway).",
|
|
5
5
|
"main": "dist/src/index.js",
|
|
6
6
|
"types": "dist/src/index.d.ts",
|