genai-lite 0.3.3 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +374 -14
- package/dist/index.d.ts +5 -0
- package/dist/index.js +8 -1
- package/dist/llm/LLMService.test.js +28 -9
- package/dist/llm/clients/LlamaCppClientAdapter.d.ts +116 -0
- package/dist/llm/clients/LlamaCppClientAdapter.js +289 -0
- package/dist/llm/clients/LlamaCppClientAdapter.test.d.ts +1 -0
- package/dist/llm/clients/LlamaCppClientAdapter.test.js +447 -0
- package/dist/llm/clients/LlamaCppServerClient.d.ts +161 -0
- package/dist/llm/clients/LlamaCppServerClient.js +192 -0
- package/dist/llm/clients/LlamaCppServerClient.test.d.ts +1 -0
- package/dist/llm/clients/LlamaCppServerClient.test.js +294 -0
- package/dist/llm/config.d.ts +12 -0
- package/dist/llm/config.js +77 -0
- package/dist/llm/services/ModelResolver.js +13 -13
- package/dist/llm/services/ModelResolver.test.js +25 -4
- package/dist/llm/types.d.ts +6 -0
- package/dist/providers/fromEnvironment.d.ts +4 -0
- package/dist/providers/fromEnvironment.js +8 -0
- package/dist/providers/fromEnvironment.test.js +13 -0
- package/package.json +1 -1
|
@@ -0,0 +1,447 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
const LlamaCppClientAdapter_1 = require("./LlamaCppClientAdapter");
|
|
4
|
+
// Mock OpenAI SDK
|
|
5
|
+
jest.mock('openai', () => {
|
|
6
|
+
return {
|
|
7
|
+
__esModule: true,
|
|
8
|
+
default: jest.fn().mockImplementation(() => ({
|
|
9
|
+
chat: {
|
|
10
|
+
completions: {
|
|
11
|
+
create: mockCreate,
|
|
12
|
+
},
|
|
13
|
+
},
|
|
14
|
+
})),
|
|
15
|
+
};
|
|
16
|
+
});
|
|
17
|
+
// Mock LlamaCppServerClient
|
|
18
|
+
jest.mock('./LlamaCppServerClient', () => {
|
|
19
|
+
return {
|
|
20
|
+
LlamaCppServerClient: jest.fn().mockImplementation(() => ({
|
|
21
|
+
getHealth: mockGetHealth,
|
|
22
|
+
})),
|
|
23
|
+
};
|
|
24
|
+
});
|
|
25
|
+
const mockCreate = jest.fn();
|
|
26
|
+
const mockGetHealth = jest.fn();
|
|
27
|
+
describe('LlamaCppClientAdapter', () => {
|
|
28
|
+
let adapter;
|
|
29
|
+
let basicRequest;
|
|
30
|
+
beforeEach(() => {
|
|
31
|
+
jest.clearAllMocks();
|
|
32
|
+
adapter = new LlamaCppClientAdapter_1.LlamaCppClientAdapter();
|
|
33
|
+
basicRequest = {
|
|
34
|
+
providerId: 'llamacpp',
|
|
35
|
+
modelId: 'llama-3-8b-instruct',
|
|
36
|
+
messages: [
|
|
37
|
+
{ role: 'user', content: 'Hello, how are you?' },
|
|
38
|
+
],
|
|
39
|
+
settings: {
|
|
40
|
+
temperature: 0.7,
|
|
41
|
+
maxTokens: 1000,
|
|
42
|
+
topP: 0.95,
|
|
43
|
+
stopSequences: [],
|
|
44
|
+
frequencyPenalty: 0.0,
|
|
45
|
+
presencePenalty: 0.0,
|
|
46
|
+
supportsSystemMessage: true,
|
|
47
|
+
user: '',
|
|
48
|
+
geminiSafetySettings: [],
|
|
49
|
+
reasoning: {
|
|
50
|
+
enabled: false,
|
|
51
|
+
exclude: false,
|
|
52
|
+
},
|
|
53
|
+
thinkingExtraction: {
|
|
54
|
+
enabled: false,
|
|
55
|
+
tag: 'thinking',
|
|
56
|
+
onMissing: 'auto',
|
|
57
|
+
},
|
|
58
|
+
},
|
|
59
|
+
};
|
|
60
|
+
});
|
|
61
|
+
describe('constructor', () => {
|
|
62
|
+
it('should use default baseURL when not provided', () => {
|
|
63
|
+
const adapterInfo = adapter.getAdapterInfo();
|
|
64
|
+
expect(adapterInfo.baseURL).toBe('http://localhost:8080');
|
|
65
|
+
});
|
|
66
|
+
it('should use custom baseURL when provided', () => {
|
|
67
|
+
const customAdapter = new LlamaCppClientAdapter_1.LlamaCppClientAdapter({
|
|
68
|
+
baseURL: 'http://localhost:9090',
|
|
69
|
+
});
|
|
70
|
+
const adapterInfo = customAdapter.getAdapterInfo();
|
|
71
|
+
expect(adapterInfo.baseURL).toBe('http://localhost:9090');
|
|
72
|
+
});
|
|
73
|
+
it('should set checkHealth to false by default', () => {
|
|
74
|
+
const adapter = new LlamaCppClientAdapter_1.LlamaCppClientAdapter();
|
|
75
|
+
expect(adapter.checkHealth).toBe(false);
|
|
76
|
+
});
|
|
77
|
+
it('should set checkHealth when provided', () => {
|
|
78
|
+
const adapter = new LlamaCppClientAdapter_1.LlamaCppClientAdapter({ checkHealth: true });
|
|
79
|
+
expect(adapter.checkHealth).toBe(true);
|
|
80
|
+
});
|
|
81
|
+
});
|
|
82
|
+
describe('sendMessage', () => {
|
|
83
|
+
it('should send message successfully', async () => {
|
|
84
|
+
mockCreate.mockResolvedValueOnce({
|
|
85
|
+
id: 'chatcmpl-123',
|
|
86
|
+
object: 'chat.completion',
|
|
87
|
+
created: 1677652288,
|
|
88
|
+
model: 'llama-3-8b-instruct',
|
|
89
|
+
choices: [
|
|
90
|
+
{
|
|
91
|
+
index: 0,
|
|
92
|
+
message: {
|
|
93
|
+
role: 'assistant',
|
|
94
|
+
content: 'I am doing well, thank you!',
|
|
95
|
+
},
|
|
96
|
+
finish_reason: 'stop',
|
|
97
|
+
},
|
|
98
|
+
],
|
|
99
|
+
usage: {
|
|
100
|
+
prompt_tokens: 10,
|
|
101
|
+
completion_tokens: 8,
|
|
102
|
+
total_tokens: 18,
|
|
103
|
+
},
|
|
104
|
+
});
|
|
105
|
+
const response = await adapter.sendMessage(basicRequest, 'not-needed');
|
|
106
|
+
expect(response.object).toBe('chat.completion');
|
|
107
|
+
if (response.object === 'chat.completion') {
|
|
108
|
+
expect(response.choices[0].message.content).toBe('I am doing well, thank you!');
|
|
109
|
+
expect(response.choices[0].finish_reason).toBe('stop');
|
|
110
|
+
expect(response.usage).toEqual({
|
|
111
|
+
prompt_tokens: 10,
|
|
112
|
+
completion_tokens: 8,
|
|
113
|
+
total_tokens: 18,
|
|
114
|
+
});
|
|
115
|
+
}
|
|
116
|
+
});
|
|
117
|
+
it('should include system message when provided', async () => {
|
|
118
|
+
mockCreate.mockResolvedValueOnce({
|
|
119
|
+
id: 'chatcmpl-124',
|
|
120
|
+
choices: [
|
|
121
|
+
{
|
|
122
|
+
message: { role: 'assistant', content: 'Response' },
|
|
123
|
+
finish_reason: 'stop',
|
|
124
|
+
},
|
|
125
|
+
],
|
|
126
|
+
});
|
|
127
|
+
const requestWithSystem = {
|
|
128
|
+
...basicRequest,
|
|
129
|
+
systemMessage: 'You are a helpful assistant.',
|
|
130
|
+
};
|
|
131
|
+
await adapter.sendMessage(requestWithSystem, 'not-needed');
|
|
132
|
+
expect(mockCreate).toHaveBeenCalledWith(expect.objectContaining({
|
|
133
|
+
messages: expect.arrayContaining([
|
|
134
|
+
{ role: 'system', content: 'You are a helpful assistant.' },
|
|
135
|
+
]),
|
|
136
|
+
}));
|
|
137
|
+
});
|
|
138
|
+
it('should pass stop sequences when provided', async () => {
|
|
139
|
+
mockCreate.mockResolvedValueOnce({
|
|
140
|
+
id: 'chatcmpl-125',
|
|
141
|
+
choices: [
|
|
142
|
+
{
|
|
143
|
+
message: { role: 'assistant', content: 'Response' },
|
|
144
|
+
finish_reason: 'stop',
|
|
145
|
+
},
|
|
146
|
+
],
|
|
147
|
+
});
|
|
148
|
+
const requestWithStop = {
|
|
149
|
+
...basicRequest,
|
|
150
|
+
settings: {
|
|
151
|
+
...basicRequest.settings,
|
|
152
|
+
stopSequences: ['END', 'STOP'],
|
|
153
|
+
},
|
|
154
|
+
};
|
|
155
|
+
await adapter.sendMessage(requestWithStop, 'not-needed');
|
|
156
|
+
expect(mockCreate).toHaveBeenCalledWith(expect.objectContaining({
|
|
157
|
+
stop: ['END', 'STOP'],
|
|
158
|
+
}));
|
|
159
|
+
});
|
|
160
|
+
it('should handle length finish reason', async () => {
|
|
161
|
+
mockCreate.mockResolvedValueOnce({
|
|
162
|
+
id: 'chatcmpl-126',
|
|
163
|
+
choices: [
|
|
164
|
+
{
|
|
165
|
+
message: { role: 'assistant', content: 'Response...' },
|
|
166
|
+
finish_reason: 'length',
|
|
167
|
+
},
|
|
168
|
+
],
|
|
169
|
+
});
|
|
170
|
+
const response = await adapter.sendMessage(basicRequest, 'not-needed');
|
|
171
|
+
expect(response.object).toBe('chat.completion');
|
|
172
|
+
if (response.object === 'chat.completion') {
|
|
173
|
+
expect(response.choices[0].finish_reason).toBe('length');
|
|
174
|
+
}
|
|
175
|
+
});
|
|
176
|
+
it('should handle completion without usage data', async () => {
|
|
177
|
+
mockCreate.mockResolvedValueOnce({
|
|
178
|
+
id: 'chatcmpl-127',
|
|
179
|
+
choices: [
|
|
180
|
+
{
|
|
181
|
+
message: { role: 'assistant', content: 'Response' },
|
|
182
|
+
finish_reason: 'stop',
|
|
183
|
+
},
|
|
184
|
+
],
|
|
185
|
+
// No usage field
|
|
186
|
+
});
|
|
187
|
+
const response = await adapter.sendMessage(basicRequest, 'not-needed');
|
|
188
|
+
expect(response.object).toBe('chat.completion');
|
|
189
|
+
if (response.object === 'chat.completion') {
|
|
190
|
+
expect(response.usage).toBeUndefined();
|
|
191
|
+
}
|
|
192
|
+
});
|
|
193
|
+
it('should handle multiple choices', async () => {
|
|
194
|
+
mockCreate.mockResolvedValueOnce({
|
|
195
|
+
id: 'chatcmpl-128',
|
|
196
|
+
choices: [
|
|
197
|
+
{
|
|
198
|
+
message: { role: 'assistant', content: 'First response' },
|
|
199
|
+
finish_reason: 'stop',
|
|
200
|
+
},
|
|
201
|
+
{
|
|
202
|
+
message: { role: 'assistant', content: 'Second response' },
|
|
203
|
+
finish_reason: 'stop',
|
|
204
|
+
},
|
|
205
|
+
],
|
|
206
|
+
});
|
|
207
|
+
const response = await adapter.sendMessage(basicRequest, 'not-needed');
|
|
208
|
+
expect(response.object).toBe('chat.completion');
|
|
209
|
+
if (response.object === 'chat.completion') {
|
|
210
|
+
expect(response.choices[0].message.content).toBe('First response');
|
|
211
|
+
expect(response.choices).toHaveLength(2);
|
|
212
|
+
expect(response.choices[1].message.content).toBe('Second response');
|
|
213
|
+
}
|
|
214
|
+
});
|
|
215
|
+
it('should check health before request when enabled', async () => {
|
|
216
|
+
const healthCheckAdapter = new LlamaCppClientAdapter_1.LlamaCppClientAdapter({ checkHealth: true });
|
|
217
|
+
mockGetHealth.mockResolvedValueOnce({ status: 'ok' });
|
|
218
|
+
mockCreate.mockResolvedValueOnce({
|
|
219
|
+
id: 'chatcmpl-129',
|
|
220
|
+
choices: [
|
|
221
|
+
{
|
|
222
|
+
message: { role: 'assistant', content: 'Response' },
|
|
223
|
+
finish_reason: 'stop',
|
|
224
|
+
},
|
|
225
|
+
],
|
|
226
|
+
});
|
|
227
|
+
const response = await healthCheckAdapter.sendMessage(basicRequest, 'not-needed');
|
|
228
|
+
expect(mockGetHealth).toHaveBeenCalled();
|
|
229
|
+
expect(response.object).toBe('chat.completion');
|
|
230
|
+
});
|
|
231
|
+
it('should return error when health check fails with error status', async () => {
|
|
232
|
+
const healthCheckAdapter = new LlamaCppClientAdapter_1.LlamaCppClientAdapter({ checkHealth: true });
|
|
233
|
+
mockGetHealth.mockResolvedValueOnce({
|
|
234
|
+
status: 'error',
|
|
235
|
+
error: 'Model load failed'
|
|
236
|
+
});
|
|
237
|
+
const response = await healthCheckAdapter.sendMessage(basicRequest, 'not-needed');
|
|
238
|
+
expect(response.object).toBe('error');
|
|
239
|
+
if (response.object === 'error') {
|
|
240
|
+
expect(response.error.message).toContain('server not ready');
|
|
241
|
+
expect(response.error.message).toContain('Model load failed');
|
|
242
|
+
expect(response.error.code).toBe('PROVIDER_ERROR');
|
|
243
|
+
}
|
|
244
|
+
});
|
|
245
|
+
it('should return error when health check fails with loading status', async () => {
|
|
246
|
+
const healthCheckAdapter = new LlamaCppClientAdapter_1.LlamaCppClientAdapter({ checkHealth: true });
|
|
247
|
+
mockGetHealth.mockResolvedValueOnce({ status: 'loading' });
|
|
248
|
+
const response = await healthCheckAdapter.sendMessage(basicRequest, 'not-needed');
|
|
249
|
+
expect(response.object).toBe('error');
|
|
250
|
+
if (response.object === 'error') {
|
|
251
|
+
expect(response.error.message).toContain('loading');
|
|
252
|
+
}
|
|
253
|
+
});
|
|
254
|
+
it('should proceed with request if health check throws error', async () => {
|
|
255
|
+
const healthCheckAdapter = new LlamaCppClientAdapter_1.LlamaCppClientAdapter({ checkHealth: true });
|
|
256
|
+
mockGetHealth.mockRejectedValueOnce(new Error('Connection refused'));
|
|
257
|
+
mockCreate.mockResolvedValueOnce({
|
|
258
|
+
id: 'chatcmpl-130',
|
|
259
|
+
choices: [
|
|
260
|
+
{
|
|
261
|
+
message: { role: 'assistant', content: 'Response' },
|
|
262
|
+
finish_reason: 'stop',
|
|
263
|
+
},
|
|
264
|
+
],
|
|
265
|
+
});
|
|
266
|
+
const response = await healthCheckAdapter.sendMessage(basicRequest, 'not-needed');
|
|
267
|
+
expect(response.object).toBe('chat.completion');
|
|
268
|
+
});
|
|
269
|
+
it('should handle connection error to server', async () => {
|
|
270
|
+
mockCreate.mockRejectedValueOnce(new Error('fetch failed: ECONNREFUSED'));
|
|
271
|
+
const response = await adapter.sendMessage(basicRequest, 'not-needed');
|
|
272
|
+
expect(response.object).toBe('error');
|
|
273
|
+
if (response.object === 'error') {
|
|
274
|
+
expect(response.error.message).toContain('Cannot connect to llama.cpp server');
|
|
275
|
+
expect(response.error.message).toContain('Is the server running?');
|
|
276
|
+
expect(response.error.code).toBe('NETWORK_ERROR');
|
|
277
|
+
}
|
|
278
|
+
});
|
|
279
|
+
it('should handle API errors', async () => {
|
|
280
|
+
mockCreate.mockRejectedValueOnce({
|
|
281
|
+
status: 400,
|
|
282
|
+
message: 'Invalid request',
|
|
283
|
+
});
|
|
284
|
+
const response = await adapter.sendMessage(basicRequest, 'not-needed');
|
|
285
|
+
expect(response.object).toBe('error');
|
|
286
|
+
if (response.object === 'error') {
|
|
287
|
+
expect(response.error.code).toBeDefined();
|
|
288
|
+
}
|
|
289
|
+
});
|
|
290
|
+
it('should handle error when no choices in response', async () => {
|
|
291
|
+
mockCreate.mockResolvedValueOnce({
|
|
292
|
+
id: 'chatcmpl-131',
|
|
293
|
+
choices: [],
|
|
294
|
+
});
|
|
295
|
+
const response = await adapter.sendMessage(basicRequest, 'not-needed');
|
|
296
|
+
expect(response.object).toBe('error');
|
|
297
|
+
});
|
|
298
|
+
});
|
|
299
|
+
describe('validateApiKey', () => {
|
|
300
|
+
it('should always return true (no API key required)', () => {
|
|
301
|
+
expect(adapter.validateApiKey('')).toBe(true);
|
|
302
|
+
expect(adapter.validateApiKey('any-string')).toBe(true);
|
|
303
|
+
expect(adapter.validateApiKey('not-needed')).toBe(true);
|
|
304
|
+
});
|
|
305
|
+
});
|
|
306
|
+
describe('getAdapterInfo', () => {
|
|
307
|
+
it('should return adapter information', () => {
|
|
308
|
+
const info = adapter.getAdapterInfo();
|
|
309
|
+
expect(info.providerId).toBe('llamacpp');
|
|
310
|
+
expect(info.name).toBe('llama.cpp Client Adapter');
|
|
311
|
+
expect(info.version).toBe('1.0.0');
|
|
312
|
+
expect(info.baseURL).toBe('http://localhost:8080');
|
|
313
|
+
});
|
|
314
|
+
it('should include custom baseURL in info', () => {
|
|
315
|
+
const customAdapter = new LlamaCppClientAdapter_1.LlamaCppClientAdapter({
|
|
316
|
+
baseURL: 'http://gpu-server:8080',
|
|
317
|
+
});
|
|
318
|
+
const info = customAdapter.getAdapterInfo();
|
|
319
|
+
expect(info.baseURL).toBe('http://gpu-server:8080');
|
|
320
|
+
});
|
|
321
|
+
});
|
|
322
|
+
describe('getServerClient', () => {
|
|
323
|
+
it('should return the underlying server client', () => {
|
|
324
|
+
const serverClient = adapter.getServerClient();
|
|
325
|
+
expect(serverClient).toBeDefined();
|
|
326
|
+
});
|
|
327
|
+
});
|
|
328
|
+
describe('message formatting', () => {
|
|
329
|
+
it('should format user messages correctly', async () => {
|
|
330
|
+
mockCreate.mockResolvedValueOnce({
|
|
331
|
+
id: 'chatcmpl-132',
|
|
332
|
+
choices: [
|
|
333
|
+
{
|
|
334
|
+
message: { role: 'assistant', content: 'Response' },
|
|
335
|
+
finish_reason: 'stop',
|
|
336
|
+
},
|
|
337
|
+
],
|
|
338
|
+
});
|
|
339
|
+
await adapter.sendMessage(basicRequest, 'not-needed');
|
|
340
|
+
expect(mockCreate).toHaveBeenCalledWith(expect.objectContaining({
|
|
341
|
+
messages: expect.arrayContaining([
|
|
342
|
+
{ role: 'user', content: 'Hello, how are you?' },
|
|
343
|
+
]),
|
|
344
|
+
}));
|
|
345
|
+
});
|
|
346
|
+
it('should format assistant messages correctly', async () => {
|
|
347
|
+
mockCreate.mockResolvedValueOnce({
|
|
348
|
+
id: 'chatcmpl-133',
|
|
349
|
+
choices: [
|
|
350
|
+
{
|
|
351
|
+
message: { role: 'assistant', content: 'Response' },
|
|
352
|
+
finish_reason: 'stop',
|
|
353
|
+
},
|
|
354
|
+
],
|
|
355
|
+
});
|
|
356
|
+
const requestWithHistory = {
|
|
357
|
+
...basicRequest,
|
|
358
|
+
messages: [
|
|
359
|
+
{ role: 'user', content: 'Hi' },
|
|
360
|
+
{ role: 'assistant', content: 'Hello!' },
|
|
361
|
+
{ role: 'user', content: 'How are you?' },
|
|
362
|
+
],
|
|
363
|
+
};
|
|
364
|
+
await adapter.sendMessage(requestWithHistory, 'not-needed');
|
|
365
|
+
expect(mockCreate).toHaveBeenCalledWith(expect.objectContaining({
|
|
366
|
+
messages: [
|
|
367
|
+
{ role: 'user', content: 'Hi' },
|
|
368
|
+
{ role: 'assistant', content: 'Hello!' },
|
|
369
|
+
{ role: 'user', content: 'How are you?' },
|
|
370
|
+
],
|
|
371
|
+
}));
|
|
372
|
+
});
|
|
373
|
+
it('should handle system messages in conversation', async () => {
|
|
374
|
+
mockCreate.mockResolvedValueOnce({
|
|
375
|
+
id: 'chatcmpl-134',
|
|
376
|
+
choices: [
|
|
377
|
+
{
|
|
378
|
+
message: { role: 'assistant', content: 'Response' },
|
|
379
|
+
finish_reason: 'stop',
|
|
380
|
+
},
|
|
381
|
+
],
|
|
382
|
+
});
|
|
383
|
+
const requestWithSystemInMessages = {
|
|
384
|
+
...basicRequest,
|
|
385
|
+
messages: [
|
|
386
|
+
{ role: 'system', content: 'Be concise' },
|
|
387
|
+
{ role: 'user', content: 'Explain AI' },
|
|
388
|
+
],
|
|
389
|
+
};
|
|
390
|
+
await adapter.sendMessage(requestWithSystemInMessages, 'not-needed');
|
|
391
|
+
expect(mockCreate).toHaveBeenCalledWith(expect.objectContaining({
|
|
392
|
+
messages: expect.arrayContaining([
|
|
393
|
+
{ role: 'system', content: 'Be concise' },
|
|
394
|
+
]),
|
|
395
|
+
}));
|
|
396
|
+
});
|
|
397
|
+
});
|
|
398
|
+
describe('API parameter mapping', () => {
|
|
399
|
+
it('should pass all standard parameters', async () => {
|
|
400
|
+
mockCreate.mockResolvedValueOnce({
|
|
401
|
+
id: 'chatcmpl-135',
|
|
402
|
+
choices: [
|
|
403
|
+
{
|
|
404
|
+
message: { role: 'assistant', content: 'Response' },
|
|
405
|
+
finish_reason: 'stop',
|
|
406
|
+
},
|
|
407
|
+
],
|
|
408
|
+
});
|
|
409
|
+
const fullRequest = {
|
|
410
|
+
...basicRequest,
|
|
411
|
+
settings: {
|
|
412
|
+
...basicRequest.settings,
|
|
413
|
+
temperature: 0.9,
|
|
414
|
+
maxTokens: 2000,
|
|
415
|
+
topP: 0.8,
|
|
416
|
+
frequencyPenalty: 0.5,
|
|
417
|
+
presencePenalty: 0.3,
|
|
418
|
+
stopSequences: ['END'],
|
|
419
|
+
},
|
|
420
|
+
};
|
|
421
|
+
await adapter.sendMessage(fullRequest, 'not-needed');
|
|
422
|
+
expect(mockCreate).toHaveBeenCalledWith(expect.objectContaining({
|
|
423
|
+
model: 'llama-3-8b-instruct',
|
|
424
|
+
temperature: 0.9,
|
|
425
|
+
max_tokens: 2000,
|
|
426
|
+
top_p: 0.8,
|
|
427
|
+
frequency_penalty: 0.5,
|
|
428
|
+
presence_penalty: 0.3,
|
|
429
|
+
stop: ['END'],
|
|
430
|
+
}));
|
|
431
|
+
});
|
|
432
|
+
it('should omit frequency penalty when zero', async () => {
|
|
433
|
+
mockCreate.mockResolvedValueOnce({
|
|
434
|
+
id: 'chatcmpl-136',
|
|
435
|
+
choices: [
|
|
436
|
+
{
|
|
437
|
+
message: { role: 'assistant', content: 'Response' },
|
|
438
|
+
finish_reason: 'stop',
|
|
439
|
+
},
|
|
440
|
+
],
|
|
441
|
+
});
|
|
442
|
+
await adapter.sendMessage(basicRequest, 'not-needed');
|
|
443
|
+
const callArgs = mockCreate.mock.calls[0][0];
|
|
444
|
+
expect(callArgs.frequency_penalty).toBeUndefined();
|
|
445
|
+
});
|
|
446
|
+
});
|
|
447
|
+
});
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Response from the /health endpoint
|
|
3
|
+
*/
|
|
4
|
+
export interface LlamaCppHealthResponse {
|
|
5
|
+
status: 'loading' | 'error' | 'ok';
|
|
6
|
+
error?: string;
|
|
7
|
+
}
|
|
8
|
+
/**
|
|
9
|
+
* Response from the /tokenize endpoint
|
|
10
|
+
*/
|
|
11
|
+
export interface LlamaCppTokenizeResponse {
|
|
12
|
+
tokens: number[];
|
|
13
|
+
}
|
|
14
|
+
/**
|
|
15
|
+
* Response from the /detokenize endpoint
|
|
16
|
+
*/
|
|
17
|
+
export interface LlamaCppDetokenizeResponse {
|
|
18
|
+
content: string;
|
|
19
|
+
}
|
|
20
|
+
/**
|
|
21
|
+
* Response from the /embedding endpoint
|
|
22
|
+
*/
|
|
23
|
+
export interface LlamaCppEmbeddingResponse {
|
|
24
|
+
embedding: number[];
|
|
25
|
+
}
|
|
26
|
+
/**
|
|
27
|
+
* Response from the /infill endpoint
|
|
28
|
+
*/
|
|
29
|
+
export interface LlamaCppInfillResponse {
|
|
30
|
+
content: string;
|
|
31
|
+
tokens?: number[];
|
|
32
|
+
stop?: boolean;
|
|
33
|
+
}
|
|
34
|
+
/**
|
|
35
|
+
* Response from the /props endpoint
|
|
36
|
+
*/
|
|
37
|
+
export interface LlamaCppPropsResponse {
|
|
38
|
+
assistant_name?: string;
|
|
39
|
+
user_name?: string;
|
|
40
|
+
default_generation_settings?: Record<string, any>;
|
|
41
|
+
total_slots?: number;
|
|
42
|
+
[key: string]: any;
|
|
43
|
+
}
|
|
44
|
+
/**
|
|
45
|
+
* Response from the /metrics endpoint
|
|
46
|
+
*/
|
|
47
|
+
export interface LlamaCppMetricsResponse {
|
|
48
|
+
[key: string]: any;
|
|
49
|
+
}
|
|
50
|
+
/**
|
|
51
|
+
* Individual slot information from /slots endpoint
|
|
52
|
+
*/
|
|
53
|
+
export interface LlamaCppSlot {
|
|
54
|
+
id: number;
|
|
55
|
+
state: number;
|
|
56
|
+
prompt?: string;
|
|
57
|
+
[key: string]: any;
|
|
58
|
+
}
|
|
59
|
+
/**
|
|
60
|
+
* Response from the /slots endpoint
|
|
61
|
+
*/
|
|
62
|
+
export interface LlamaCppSlotsResponse {
|
|
63
|
+
slots: LlamaCppSlot[];
|
|
64
|
+
}
|
|
65
|
+
/**
|
|
66
|
+
* Client for interacting with llama.cpp server's management and utility endpoints
|
|
67
|
+
*
|
|
68
|
+
* This class provides access to non-LLM endpoints like tokenization, embeddings,
|
|
69
|
+
* health checks, and server properties. For chat completions, use LlamaCppClientAdapter.
|
|
70
|
+
*
|
|
71
|
+
* @example
|
|
72
|
+
* ```typescript
|
|
73
|
+
* const client = new LlamaCppServerClient('http://localhost:8080');
|
|
74
|
+
*
|
|
75
|
+
* // Check if server is ready
|
|
76
|
+
* const health = await client.getHealth();
|
|
77
|
+
* console.log(health.status); // 'ok', 'loading', or 'error'
|
|
78
|
+
*
|
|
79
|
+
* // Tokenize text
|
|
80
|
+
* const { tokens } = await client.tokenize('Hello world');
|
|
81
|
+
* console.log(tokens); // [123, 456, 789]
|
|
82
|
+
*
|
|
83
|
+
* // Generate embeddings
|
|
84
|
+
* const { embedding } = await client.createEmbedding('Some text');
|
|
85
|
+
* ```
|
|
86
|
+
*/
|
|
87
|
+
export declare class LlamaCppServerClient {
|
|
88
|
+
private baseURL;
|
|
89
|
+
/**
|
|
90
|
+
* Creates a new llama.cpp server client
|
|
91
|
+
*
|
|
92
|
+
* @param baseURL - The base URL of the llama.cpp server (e.g., 'http://localhost:8080')
|
|
93
|
+
*/
|
|
94
|
+
constructor(baseURL: string);
|
|
95
|
+
/**
|
|
96
|
+
* Checks the health and readiness of the server
|
|
97
|
+
*
|
|
98
|
+
* @returns Promise resolving to health status
|
|
99
|
+
* @throws Error if the request fails
|
|
100
|
+
*/
|
|
101
|
+
getHealth(): Promise<LlamaCppHealthResponse>;
|
|
102
|
+
/**
|
|
103
|
+
* Converts text to tokens using the loaded model's tokenizer
|
|
104
|
+
*
|
|
105
|
+
* @param content - The text to tokenize
|
|
106
|
+
* @returns Promise resolving to array of token IDs
|
|
107
|
+
* @throws Error if the request fails
|
|
108
|
+
*/
|
|
109
|
+
tokenize(content: string): Promise<LlamaCppTokenizeResponse>;
|
|
110
|
+
/**
|
|
111
|
+
* Converts tokens back to text using the loaded model's tokenizer
|
|
112
|
+
*
|
|
113
|
+
* @param tokens - Array of token IDs to convert
|
|
114
|
+
* @returns Promise resolving to the decoded text
|
|
115
|
+
* @throws Error if the request fails
|
|
116
|
+
*/
|
|
117
|
+
detokenize(tokens: number[]): Promise<LlamaCppDetokenizeResponse>;
|
|
118
|
+
/**
|
|
119
|
+
* Generates an embedding vector for the given text
|
|
120
|
+
*
|
|
121
|
+
* @param content - The text to embed
|
|
122
|
+
* @param imageData - Optional base64-encoded image data for multimodal models
|
|
123
|
+
* @returns Promise resolving to the embedding vector
|
|
124
|
+
* @throws Error if the request fails
|
|
125
|
+
*/
|
|
126
|
+
createEmbedding(content: string, imageData?: string): Promise<LlamaCppEmbeddingResponse>;
|
|
127
|
+
/**
|
|
128
|
+
* Performs code infilling (completing code between prefix and suffix)
|
|
129
|
+
*
|
|
130
|
+
* @param inputPrefix - The code before the cursor/gap
|
|
131
|
+
* @param inputSuffix - The code after the cursor/gap
|
|
132
|
+
* @returns Promise resolving to the infilled completion
|
|
133
|
+
* @throws Error if the request fails
|
|
134
|
+
*/
|
|
135
|
+
infill(inputPrefix: string, inputSuffix: string): Promise<LlamaCppInfillResponse>;
|
|
136
|
+
/**
|
|
137
|
+
* Retrieves server properties and configuration
|
|
138
|
+
*
|
|
139
|
+
* @returns Promise resolving to server properties
|
|
140
|
+
* @throws Error if the request fails
|
|
141
|
+
*/
|
|
142
|
+
getProps(): Promise<LlamaCppPropsResponse>;
|
|
143
|
+
/**
|
|
144
|
+
* Retrieves performance metrics from the server
|
|
145
|
+
*
|
|
146
|
+
* @returns Promise resolving to metrics data
|
|
147
|
+
* @throws Error if the request fails
|
|
148
|
+
*/
|
|
149
|
+
getMetrics(): Promise<LlamaCppMetricsResponse>;
|
|
150
|
+
/**
|
|
151
|
+
* Retrieves processing slot status (debugging endpoint)
|
|
152
|
+
*
|
|
153
|
+
* WARNING: This endpoint may expose sensitive information including prompt content.
|
|
154
|
+
* The llama.cpp documentation strongly advises against enabling this in production.
|
|
155
|
+
* Only use this endpoint in development/debugging environments.
|
|
156
|
+
*
|
|
157
|
+
* @returns Promise resolving to slot status information
|
|
158
|
+
* @throws Error if the request fails or endpoint is not enabled
|
|
159
|
+
*/
|
|
160
|
+
getSlots(): Promise<LlamaCppSlotsResponse>;
|
|
161
|
+
}
|