@inference-gateway/sdk 0.3.0 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,106 +2,201 @@
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.InferenceGatewayClient = void 0;
4
4
  class InferenceGatewayClient {
5
- baseUrl;
6
- authToken;
7
- constructor(baseUrl, authToken) {
8
- this.baseUrl = baseUrl.replace(/\/$/, '');
9
- this.authToken = authToken;
5
+ baseURL;
6
+ apiKey;
7
+ defaultHeaders;
8
+ defaultQuery;
9
+ timeout;
10
+ fetchFn;
11
+ constructor(options = {}) {
12
+ this.baseURL = options.baseURL || 'http://localhost:8080/v1';
13
+ this.apiKey = options.apiKey;
14
+ this.defaultHeaders = options.defaultHeaders || {};
15
+ this.defaultQuery = options.defaultQuery || {};
16
+ this.timeout = options.timeout || 30000;
17
+ this.fetchFn = options.fetch || globalThis.fetch;
10
18
  }
11
- async request(path, options = {}) {
19
+ /**
20
+ * Creates a new instance of the client with the given options merged with the existing options.
21
+ */
22
+ withOptions(options) {
23
+ return new InferenceGatewayClient({
24
+ baseURL: options.baseURL || this.baseURL,
25
+ apiKey: options.apiKey || this.apiKey,
26
+ defaultHeaders: { ...this.defaultHeaders, ...options.defaultHeaders },
27
+ defaultQuery: { ...this.defaultQuery, ...options.defaultQuery },
28
+ timeout: options.timeout || this.timeout,
29
+ fetch: options.fetch || this.fetchFn,
30
+ });
31
+ }
32
+ /**
33
+ * Makes a request to the API.
34
+ */
35
+ async request(path, options = {}, query = {}) {
12
36
  const headers = new Headers({
13
37
  'Content-Type': 'application/json',
38
+ ...this.defaultHeaders,
14
39
  ...options.headers,
15
40
  });
16
- if (this.authToken) {
17
- headers.set('Authorization', `Bearer ${this.authToken}`);
41
+ if (this.apiKey) {
42
+ headers.set('Authorization', `Bearer ${this.apiKey}`);
18
43
  }
19
- const response = await fetch(`${this.baseUrl}${path}`, {
20
- ...options,
21
- headers,
44
+ // Combine default query parameters with provided ones
45
+ const queryParams = new URLSearchParams({
46
+ ...this.defaultQuery,
47
+ ...query,
22
48
  });
23
- if (!response.ok) {
24
- const error = await response.json();
25
- throw new Error(error.error || `HTTP error! status: ${response.status}`);
49
+ const queryString = queryParams.toString();
50
+ const url = `${this.baseURL}${path}${queryString ? `?${queryString}` : ''}`;
51
+ const controller = new AbortController();
52
+ const timeoutId = globalThis.setTimeout(() => controller.abort(), this.timeout);
53
+ try {
54
+ const response = await this.fetchFn(url, {
55
+ ...options,
56
+ headers,
57
+ signal: controller.signal,
58
+ });
59
+ if (!response.ok) {
60
+ const error = (await response.json());
61
+ throw new Error(error.error || `HTTP error! status: ${response.status}`);
62
+ }
63
+ return response.json();
64
+ }
65
+ finally {
66
+ globalThis.clearTimeout(timeoutId);
26
67
  }
27
- return response.json();
28
- }
29
- async listModels() {
30
- return this.request('/llms');
31
68
  }
32
- async listModelsByProvider(provider) {
33
- return this.request(`/llms/${provider}`);
69
+ /**
70
+ * Lists the currently available models.
71
+ */
72
+ async listModels(provider) {
73
+ const query = {};
74
+ if (provider) {
75
+ query.provider = provider;
76
+ }
77
+ return this.request('/models', { method: 'GET' }, query);
34
78
  }
35
- async generateContent(params) {
36
- return this.request(`/llms/${params.provider}/generate`, {
79
+ /**
80
+ * Creates a chat completion.
81
+ */
82
+ async createChatCompletion(request, provider) {
83
+ const query = {};
84
+ if (provider) {
85
+ query.provider = provider;
86
+ }
87
+ return this.request('/chat/completions', {
37
88
  method: 'POST',
38
- body: JSON.stringify({
39
- model: params.model,
40
- messages: params.messages,
41
- }),
42
- });
89
+ body: JSON.stringify(request),
90
+ }, query);
43
91
  }
44
- async generateContentStream(params, options) {
45
- const response = await fetch(`${this.baseUrl}/llms/${params.provider}/generate`, {
46
- method: 'POST',
47
- headers: {
48
- 'Content-Type': 'application/json',
49
- ...(this.authToken && { Authorization: `Bearer ${this.authToken}` }),
50
- },
51
- body: JSON.stringify({
52
- model: params.model,
53
- messages: params.messages,
54
- stream: true,
55
- ssevents: true,
56
- }),
92
+ /**
93
+ * Creates a streaming chat completion.
94
+ */
95
+ async streamChatCompletion(request, callbacks, provider) {
96
+ const query = {};
97
+ if (provider) {
98
+ query.provider = provider;
99
+ }
100
+ const queryParams = new URLSearchParams({
101
+ ...this.defaultQuery,
102
+ ...query,
57
103
  });
58
- if (!response.ok) {
59
- const error = await response.json();
60
- throw new Error(error.error || `HTTP error! status: ${response.status}`);
104
+ const queryString = queryParams.toString();
105
+ const url = `${this.baseURL}/chat/completions${queryString ? `?${queryString}` : ''}`;
106
+ const headers = new Headers({
107
+ 'Content-Type': 'application/json',
108
+ ...this.defaultHeaders,
109
+ });
110
+ if (this.apiKey) {
111
+ headers.set('Authorization', `Bearer ${this.apiKey}`);
61
112
  }
62
- const reader = response.body?.getReader();
63
- if (!reader)
64
- throw new Error('Response body is not readable');
65
- const decoder = new TextDecoder();
66
- while (true) {
67
- const { done, value } = await reader.read();
68
- if (done)
69
- break;
70
- const events = decoder.decode(value).split('\n\n');
71
- for (const event of events) {
72
- if (!event.trim())
73
- continue;
74
- const [eventType, ...data] = event.split('\n');
75
- const eventData = JSON.parse(data.join('\n').replace('data: ', ''));
76
- switch (eventType.replace('event: ', '')) {
77
- case 'message-start':
78
- options?.onMessageStart?.(eventData.role);
79
- break;
80
- case 'stream-start':
81
- options?.onStreamStart?.();
82
- break;
83
- case 'content-start':
84
- options?.onContentStart?.();
85
- break;
86
- case 'content-delta':
87
- options?.onContentDelta?.(eventData.content);
88
- break;
89
- case 'content-end':
90
- options?.onContentEnd?.();
91
- break;
92
- case 'message-end':
93
- options?.onMessageEnd?.();
94
- break;
95
- case 'stream-end':
96
- options?.onStreamEnd?.();
97
- break;
113
+ const controller = new AbortController();
114
+ const timeoutId = globalThis.setTimeout(() => controller.abort(), this.timeout);
115
+ try {
116
+ const response = await this.fetchFn(url, {
117
+ method: 'POST',
118
+ headers,
119
+ body: JSON.stringify({
120
+ ...request,
121
+ stream: true,
122
+ }),
123
+ signal: controller.signal,
124
+ });
125
+ if (!response.ok) {
126
+ const error = (await response.json());
127
+ throw new Error(error.error || `HTTP error! status: ${response.status}`);
128
+ }
129
+ if (!response.body) {
130
+ throw new Error('Response body is not readable');
131
+ }
132
+ callbacks.onOpen?.();
133
+ const reader = response.body.getReader();
134
+ const decoder = new TextDecoder();
135
+ let buffer = '';
136
+ while (true) {
137
+ const { done, value } = await reader.read();
138
+ if (done)
139
+ break;
140
+ buffer += decoder.decode(value, { stream: true });
141
+ const lines = buffer.split('\n');
142
+ buffer = lines.pop() || '';
143
+ for (const line of lines) {
144
+ if (line.startsWith('data: ')) {
145
+ const data = line.slice(5).trim();
146
+ if (data === '[DONE]') {
147
+ callbacks.onFinish?.(null);
148
+ return;
149
+ }
150
+ try {
151
+ const chunk = JSON.parse(data);
152
+ callbacks.onChunk?.(chunk);
153
+ const content = chunk.choices[0]?.delta?.content;
154
+ if (content) {
155
+ callbacks.onContent?.(content);
156
+ }
157
+ const toolCalls = chunk.choices[0]?.delta?.tool_calls;
158
+ if (toolCalls && toolCalls.length > 0) {
159
+ const toolCall = {
160
+ id: toolCalls[0].id || '',
161
+ type: 'function',
162
+ function: {
163
+ name: toolCalls[0].function?.name || '',
164
+ arguments: toolCalls[0].function?.arguments || '',
165
+ },
166
+ };
167
+ callbacks.onTool?.(toolCall);
168
+ }
169
+ }
170
+ catch (e) {
171
+ globalThis.console.error('Error parsing SSE data:', e);
172
+ }
173
+ }
98
174
  }
99
175
  }
100
176
  }
177
+ catch (error) {
178
+ const apiError = {
179
+ error: error.message || 'Unknown error',
180
+ };
181
+ callbacks.onError?.(apiError);
182
+ throw error;
183
+ }
184
+ finally {
185
+ globalThis.clearTimeout(timeoutId);
186
+ }
187
+ }
188
+ /**
189
+ * Proxy a request to a specific provider.
190
+ */
191
+ async proxy(provider, path, options = {}) {
192
+ return this.request(`/proxy/${provider}/${path}`, options);
101
193
  }
194
+ /**
195
+ * Health check endpoint.
196
+ */
102
197
  async healthCheck() {
103
198
  try {
104
- await this.request('/health');
199
+ await this.fetchFn(`${this.baseURL.replace('/v1', '')}/health`);
105
200
  return true;
106
201
  }
107
202
  catch {
@@ -2,46 +2,127 @@ export declare enum Provider {
2
2
  Ollama = "ollama",
3
3
  Groq = "groq",
4
4
  OpenAI = "openai",
5
- Google = "google",
6
5
  Cloudflare = "cloudflare",
7
6
  Cohere = "cohere",
8
- Anthropic = "anthropic"
7
+ Anthropic = "anthropic",
8
+ DeepSeek = "deepseek"
9
9
  }
10
10
  export declare enum MessageRole {
11
11
  System = "system",
12
12
  User = "user",
13
- Assistant = "assistant"
13
+ Assistant = "assistant",
14
+ Tool = "tool"
14
15
  }
15
16
  export interface Message {
16
17
  role: MessageRole;
17
18
  content: string;
19
+ tool_calls?: ChatCompletionMessageToolCall[];
20
+ tool_call_id?: string;
18
21
  }
19
22
  export interface Model {
23
+ id: string;
24
+ object: string;
25
+ created: number;
26
+ owned_by: string;
27
+ }
28
+ export interface ListModelsResponse {
29
+ object: string;
30
+ data: Model[];
31
+ }
32
+ export interface ChatCompletionMessageToolCallFunction {
20
33
  name: string;
34
+ arguments: string;
35
+ }
36
+ export interface ChatCompletionMessageToolCall {
37
+ id: string;
38
+ type: 'function';
39
+ function: ChatCompletionMessageToolCallFunction;
40
+ }
41
+ export interface ChatCompletionMessageToolCallChunk {
42
+ index: number;
43
+ id?: string;
44
+ type?: string;
45
+ function?: {
46
+ name?: string;
47
+ arguments?: string;
48
+ };
21
49
  }
22
- export interface ProviderModels {
23
- provider: Provider;
24
- models: Model[];
50
+ export interface FunctionParameters {
51
+ type: string;
52
+ properties?: Record<string, unknown>;
53
+ required?: string[];
25
54
  }
26
- export interface GenerateContentRequest {
27
- provider: Provider;
55
+ export interface FunctionObject {
56
+ description?: string;
57
+ name: string;
58
+ parameters: FunctionParameters;
59
+ strict?: boolean;
60
+ }
61
+ export interface ChatCompletionTool {
62
+ type: 'function';
63
+ function: FunctionObject;
64
+ }
65
+ export interface ChatCompletionRequest {
28
66
  model: string;
29
67
  messages: Message[];
68
+ max_tokens?: number;
69
+ stream?: boolean;
70
+ stream_options?: ChatCompletionStreamOptions;
71
+ tools?: ChatCompletionTool[];
72
+ temperature?: number;
73
+ top_p?: number;
74
+ top_k?: number;
30
75
  }
31
- export interface GenerateContentResponse {
32
- provider: string;
33
- response: {
34
- role: 'assistant';
35
- model: string;
36
- content: string;
37
- };
76
+ export interface ChatCompletionStreamOptions {
77
+ include_usage?: boolean;
78
+ }
79
+ export interface ChatCompletionChoice {
80
+ finish_reason: 'stop' | 'length' | 'tool_calls' | 'content_filter' | 'function_call';
81
+ index: number;
82
+ message: Message;
83
+ logprobs?: Record<string, unknown>;
84
+ }
85
+ export interface CompletionUsage {
86
+ prompt_tokens: number;
87
+ completion_tokens: number;
88
+ total_tokens: number;
89
+ }
90
+ export interface ChatCompletionResponse {
91
+ id: string;
92
+ choices: ChatCompletionChoice[];
93
+ created: number;
94
+ model: string;
95
+ object: string;
96
+ usage?: CompletionUsage;
97
+ }
98
+ export interface ChatCompletionStreamChoice {
99
+ delta: ChatCompletionStreamResponseDelta;
100
+ finish_reason: 'stop' | 'length' | 'tool_calls' | 'content_filter' | 'function_call' | null;
101
+ index: number;
102
+ logprobs?: Record<string, unknown>;
103
+ }
104
+ export interface ChatCompletionStreamResponseDelta {
105
+ content?: string;
106
+ tool_calls?: ChatCompletionMessageToolCallChunk[];
107
+ role?: MessageRole;
108
+ refusal?: string;
109
+ }
110
+ export interface ChatCompletionStreamResponse {
111
+ id: string;
112
+ choices: ChatCompletionStreamChoice[];
113
+ created: number;
114
+ model: string;
115
+ object: string;
116
+ usage?: CompletionUsage;
117
+ }
118
+ export interface ChatCompletionStreamCallbacks {
119
+ onOpen?: () => void;
120
+ onChunk?: (chunk: ChatCompletionStreamResponse) => void;
121
+ onContent?: (content: string) => void;
122
+ onTool?: (toolCall: ChatCompletionMessageToolCall) => void;
123
+ onFinish?: (response: ChatCompletionStreamResponse) => void;
124
+ onError?: (error: Error) => void;
38
125
  }
39
- export interface GenerateContentOptions {
40
- onMessageStart?: (role: string) => void;
41
- onStreamStart?: () => void;
42
- onContentStart?: () => void;
43
- onContentDelta?: (content: string) => void;
44
- onContentEnd?: () => void;
45
- onMessageEnd?: () => void;
46
- onStreamEnd?: () => void;
126
+ export interface Error {
127
+ error: string;
47
128
  }
@@ -6,14 +6,15 @@ var Provider;
6
6
  Provider["Ollama"] = "ollama";
7
7
  Provider["Groq"] = "groq";
8
8
  Provider["OpenAI"] = "openai";
9
- Provider["Google"] = "google";
10
9
  Provider["Cloudflare"] = "cloudflare";
11
10
  Provider["Cohere"] = "cohere";
12
11
  Provider["Anthropic"] = "anthropic";
12
+ Provider["DeepSeek"] = "deepseek";
13
13
  })(Provider || (exports.Provider = Provider = {}));
14
14
  var MessageRole;
15
15
  (function (MessageRole) {
16
16
  MessageRole["System"] = "system";
17
17
  MessageRole["User"] = "user";
18
18
  MessageRole["Assistant"] = "assistant";
19
+ MessageRole["Tool"] = "tool";
19
20
  })(MessageRole || (exports.MessageRole = MessageRole = {}));