genai-lite 0.3.3 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,447 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ const LlamaCppClientAdapter_1 = require("./LlamaCppClientAdapter");
4
+ // Mock OpenAI SDK
5
+ jest.mock('openai', () => {
6
+ return {
7
+ __esModule: true,
8
+ default: jest.fn().mockImplementation(() => ({
9
+ chat: {
10
+ completions: {
11
+ create: mockCreate,
12
+ },
13
+ },
14
+ })),
15
+ };
16
+ });
17
+ // Mock LlamaCppServerClient
18
+ jest.mock('./LlamaCppServerClient', () => {
19
+ return {
20
+ LlamaCppServerClient: jest.fn().mockImplementation(() => ({
21
+ getHealth: mockGetHealth,
22
+ })),
23
+ };
24
+ });
25
+ const mockCreate = jest.fn();
26
+ const mockGetHealth = jest.fn();
27
+ describe('LlamaCppClientAdapter', () => {
28
+ let adapter;
29
+ let basicRequest;
30
+ beforeEach(() => {
31
+ jest.clearAllMocks();
32
+ adapter = new LlamaCppClientAdapter_1.LlamaCppClientAdapter();
33
+ basicRequest = {
34
+ providerId: 'llamacpp',
35
+ modelId: 'llama-3-8b-instruct',
36
+ messages: [
37
+ { role: 'user', content: 'Hello, how are you?' },
38
+ ],
39
+ settings: {
40
+ temperature: 0.7,
41
+ maxTokens: 1000,
42
+ topP: 0.95,
43
+ stopSequences: [],
44
+ frequencyPenalty: 0.0,
45
+ presencePenalty: 0.0,
46
+ supportsSystemMessage: true,
47
+ user: '',
48
+ geminiSafetySettings: [],
49
+ reasoning: {
50
+ enabled: false,
51
+ exclude: false,
52
+ },
53
+ thinkingExtraction: {
54
+ enabled: false,
55
+ tag: 'thinking',
56
+ onMissing: 'auto',
57
+ },
58
+ },
59
+ };
60
+ });
61
+ describe('constructor', () => {
62
+ it('should use default baseURL when not provided', () => {
63
+ const adapterInfo = adapter.getAdapterInfo();
64
+ expect(adapterInfo.baseURL).toBe('http://localhost:8080');
65
+ });
66
+ it('should use custom baseURL when provided', () => {
67
+ const customAdapter = new LlamaCppClientAdapter_1.LlamaCppClientAdapter({
68
+ baseURL: 'http://localhost:9090',
69
+ });
70
+ const adapterInfo = customAdapter.getAdapterInfo();
71
+ expect(adapterInfo.baseURL).toBe('http://localhost:9090');
72
+ });
73
+ it('should set checkHealth to false by default', () => {
74
+ const adapter = new LlamaCppClientAdapter_1.LlamaCppClientAdapter();
75
+ expect(adapter.checkHealth).toBe(false);
76
+ });
77
+ it('should set checkHealth when provided', () => {
78
+ const adapter = new LlamaCppClientAdapter_1.LlamaCppClientAdapter({ checkHealth: true });
79
+ expect(adapter.checkHealth).toBe(true);
80
+ });
81
+ });
82
+ describe('sendMessage', () => {
83
+ it('should send message successfully', async () => {
84
+ mockCreate.mockResolvedValueOnce({
85
+ id: 'chatcmpl-123',
86
+ object: 'chat.completion',
87
+ created: 1677652288,
88
+ model: 'llama-3-8b-instruct',
89
+ choices: [
90
+ {
91
+ index: 0,
92
+ message: {
93
+ role: 'assistant',
94
+ content: 'I am doing well, thank you!',
95
+ },
96
+ finish_reason: 'stop',
97
+ },
98
+ ],
99
+ usage: {
100
+ prompt_tokens: 10,
101
+ completion_tokens: 8,
102
+ total_tokens: 18,
103
+ },
104
+ });
105
+ const response = await adapter.sendMessage(basicRequest, 'not-needed');
106
+ expect(response.object).toBe('chat.completion');
107
+ if (response.object === 'chat.completion') {
108
+ expect(response.choices[0].message.content).toBe('I am doing well, thank you!');
109
+ expect(response.choices[0].finish_reason).toBe('stop');
110
+ expect(response.usage).toEqual({
111
+ prompt_tokens: 10,
112
+ completion_tokens: 8,
113
+ total_tokens: 18,
114
+ });
115
+ }
116
+ });
117
+ it('should include system message when provided', async () => {
118
+ mockCreate.mockResolvedValueOnce({
119
+ id: 'chatcmpl-124',
120
+ choices: [
121
+ {
122
+ message: { role: 'assistant', content: 'Response' },
123
+ finish_reason: 'stop',
124
+ },
125
+ ],
126
+ });
127
+ const requestWithSystem = {
128
+ ...basicRequest,
129
+ systemMessage: 'You are a helpful assistant.',
130
+ };
131
+ await adapter.sendMessage(requestWithSystem, 'not-needed');
132
+ expect(mockCreate).toHaveBeenCalledWith(expect.objectContaining({
133
+ messages: expect.arrayContaining([
134
+ { role: 'system', content: 'You are a helpful assistant.' },
135
+ ]),
136
+ }));
137
+ });
138
+ it('should pass stop sequences when provided', async () => {
139
+ mockCreate.mockResolvedValueOnce({
140
+ id: 'chatcmpl-125',
141
+ choices: [
142
+ {
143
+ message: { role: 'assistant', content: 'Response' },
144
+ finish_reason: 'stop',
145
+ },
146
+ ],
147
+ });
148
+ const requestWithStop = {
149
+ ...basicRequest,
150
+ settings: {
151
+ ...basicRequest.settings,
152
+ stopSequences: ['END', 'STOP'],
153
+ },
154
+ };
155
+ await adapter.sendMessage(requestWithStop, 'not-needed');
156
+ expect(mockCreate).toHaveBeenCalledWith(expect.objectContaining({
157
+ stop: ['END', 'STOP'],
158
+ }));
159
+ });
160
+ it('should handle length finish reason', async () => {
161
+ mockCreate.mockResolvedValueOnce({
162
+ id: 'chatcmpl-126',
163
+ choices: [
164
+ {
165
+ message: { role: 'assistant', content: 'Response...' },
166
+ finish_reason: 'length',
167
+ },
168
+ ],
169
+ });
170
+ const response = await adapter.sendMessage(basicRequest, 'not-needed');
171
+ expect(response.object).toBe('chat.completion');
172
+ if (response.object === 'chat.completion') {
173
+ expect(response.choices[0].finish_reason).toBe('length');
174
+ }
175
+ });
176
+ it('should handle completion without usage data', async () => {
177
+ mockCreate.mockResolvedValueOnce({
178
+ id: 'chatcmpl-127',
179
+ choices: [
180
+ {
181
+ message: { role: 'assistant', content: 'Response' },
182
+ finish_reason: 'stop',
183
+ },
184
+ ],
185
+ // No usage field
186
+ });
187
+ const response = await adapter.sendMessage(basicRequest, 'not-needed');
188
+ expect(response.object).toBe('chat.completion');
189
+ if (response.object === 'chat.completion') {
190
+ expect(response.usage).toBeUndefined();
191
+ }
192
+ });
193
+ it('should handle multiple choices', async () => {
194
+ mockCreate.mockResolvedValueOnce({
195
+ id: 'chatcmpl-128',
196
+ choices: [
197
+ {
198
+ message: { role: 'assistant', content: 'First response' },
199
+ finish_reason: 'stop',
200
+ },
201
+ {
202
+ message: { role: 'assistant', content: 'Second response' },
203
+ finish_reason: 'stop',
204
+ },
205
+ ],
206
+ });
207
+ const response = await adapter.sendMessage(basicRequest, 'not-needed');
208
+ expect(response.object).toBe('chat.completion');
209
+ if (response.object === 'chat.completion') {
210
+ expect(response.choices[0].message.content).toBe('First response');
211
+ expect(response.choices).toHaveLength(2);
212
+ expect(response.choices[1].message.content).toBe('Second response');
213
+ }
214
+ });
215
+ it('should check health before request when enabled', async () => {
216
+ const healthCheckAdapter = new LlamaCppClientAdapter_1.LlamaCppClientAdapter({ checkHealth: true });
217
+ mockGetHealth.mockResolvedValueOnce({ status: 'ok' });
218
+ mockCreate.mockResolvedValueOnce({
219
+ id: 'chatcmpl-129',
220
+ choices: [
221
+ {
222
+ message: { role: 'assistant', content: 'Response' },
223
+ finish_reason: 'stop',
224
+ },
225
+ ],
226
+ });
227
+ const response = await healthCheckAdapter.sendMessage(basicRequest, 'not-needed');
228
+ expect(mockGetHealth).toHaveBeenCalled();
229
+ expect(response.object).toBe('chat.completion');
230
+ });
231
+ it('should return error when health check fails with error status', async () => {
232
+ const healthCheckAdapter = new LlamaCppClientAdapter_1.LlamaCppClientAdapter({ checkHealth: true });
233
+ mockGetHealth.mockResolvedValueOnce({
234
+ status: 'error',
235
+ error: 'Model load failed'
236
+ });
237
+ const response = await healthCheckAdapter.sendMessage(basicRequest, 'not-needed');
238
+ expect(response.object).toBe('error');
239
+ if (response.object === 'error') {
240
+ expect(response.error.message).toContain('server not ready');
241
+ expect(response.error.message).toContain('Model load failed');
242
+ expect(response.error.code).toBe('PROVIDER_ERROR');
243
+ }
244
+ });
245
+ it('should return error when health check fails with loading status', async () => {
246
+ const healthCheckAdapter = new LlamaCppClientAdapter_1.LlamaCppClientAdapter({ checkHealth: true });
247
+ mockGetHealth.mockResolvedValueOnce({ status: 'loading' });
248
+ const response = await healthCheckAdapter.sendMessage(basicRequest, 'not-needed');
249
+ expect(response.object).toBe('error');
250
+ if (response.object === 'error') {
251
+ expect(response.error.message).toContain('loading');
252
+ }
253
+ });
254
+ it('should proceed with request if health check throws error', async () => {
255
+ const healthCheckAdapter = new LlamaCppClientAdapter_1.LlamaCppClientAdapter({ checkHealth: true });
256
+ mockGetHealth.mockRejectedValueOnce(new Error('Connection refused'));
257
+ mockCreate.mockResolvedValueOnce({
258
+ id: 'chatcmpl-130',
259
+ choices: [
260
+ {
261
+ message: { role: 'assistant', content: 'Response' },
262
+ finish_reason: 'stop',
263
+ },
264
+ ],
265
+ });
266
+ const response = await healthCheckAdapter.sendMessage(basicRequest, 'not-needed');
267
+ expect(response.object).toBe('chat.completion');
268
+ });
269
+ it('should handle connection error to server', async () => {
270
+ mockCreate.mockRejectedValueOnce(new Error('fetch failed: ECONNREFUSED'));
271
+ const response = await adapter.sendMessage(basicRequest, 'not-needed');
272
+ expect(response.object).toBe('error');
273
+ if (response.object === 'error') {
274
+ expect(response.error.message).toContain('Cannot connect to llama.cpp server');
275
+ expect(response.error.message).toContain('Is the server running?');
276
+ expect(response.error.code).toBe('NETWORK_ERROR');
277
+ }
278
+ });
279
+ it('should handle API errors', async () => {
280
+ mockCreate.mockRejectedValueOnce({
281
+ status: 400,
282
+ message: 'Invalid request',
283
+ });
284
+ const response = await adapter.sendMessage(basicRequest, 'not-needed');
285
+ expect(response.object).toBe('error');
286
+ if (response.object === 'error') {
287
+ expect(response.error.code).toBeDefined();
288
+ }
289
+ });
290
+ it('should handle error when no choices in response', async () => {
291
+ mockCreate.mockResolvedValueOnce({
292
+ id: 'chatcmpl-131',
293
+ choices: [],
294
+ });
295
+ const response = await adapter.sendMessage(basicRequest, 'not-needed');
296
+ expect(response.object).toBe('error');
297
+ });
298
+ });
299
+ describe('validateApiKey', () => {
300
+ it('should always return true (no API key required)', () => {
301
+ expect(adapter.validateApiKey('')).toBe(true);
302
+ expect(adapter.validateApiKey('any-string')).toBe(true);
303
+ expect(adapter.validateApiKey('not-needed')).toBe(true);
304
+ });
305
+ });
306
+ describe('getAdapterInfo', () => {
307
+ it('should return adapter information', () => {
308
+ const info = adapter.getAdapterInfo();
309
+ expect(info.providerId).toBe('llamacpp');
310
+ expect(info.name).toBe('llama.cpp Client Adapter');
311
+ expect(info.version).toBe('1.0.0');
312
+ expect(info.baseURL).toBe('http://localhost:8080');
313
+ });
314
+ it('should include custom baseURL in info', () => {
315
+ const customAdapter = new LlamaCppClientAdapter_1.LlamaCppClientAdapter({
316
+ baseURL: 'http://gpu-server:8080',
317
+ });
318
+ const info = customAdapter.getAdapterInfo();
319
+ expect(info.baseURL).toBe('http://gpu-server:8080');
320
+ });
321
+ });
322
+ describe('getServerClient', () => {
323
+ it('should return the underlying server client', () => {
324
+ const serverClient = adapter.getServerClient();
325
+ expect(serverClient).toBeDefined();
326
+ });
327
+ });
328
+ describe('message formatting', () => {
329
+ it('should format user messages correctly', async () => {
330
+ mockCreate.mockResolvedValueOnce({
331
+ id: 'chatcmpl-132',
332
+ choices: [
333
+ {
334
+ message: { role: 'assistant', content: 'Response' },
335
+ finish_reason: 'stop',
336
+ },
337
+ ],
338
+ });
339
+ await adapter.sendMessage(basicRequest, 'not-needed');
340
+ expect(mockCreate).toHaveBeenCalledWith(expect.objectContaining({
341
+ messages: expect.arrayContaining([
342
+ { role: 'user', content: 'Hello, how are you?' },
343
+ ]),
344
+ }));
345
+ });
346
+ it('should format assistant messages correctly', async () => {
347
+ mockCreate.mockResolvedValueOnce({
348
+ id: 'chatcmpl-133',
349
+ choices: [
350
+ {
351
+ message: { role: 'assistant', content: 'Response' },
352
+ finish_reason: 'stop',
353
+ },
354
+ ],
355
+ });
356
+ const requestWithHistory = {
357
+ ...basicRequest,
358
+ messages: [
359
+ { role: 'user', content: 'Hi' },
360
+ { role: 'assistant', content: 'Hello!' },
361
+ { role: 'user', content: 'How are you?' },
362
+ ],
363
+ };
364
+ await adapter.sendMessage(requestWithHistory, 'not-needed');
365
+ expect(mockCreate).toHaveBeenCalledWith(expect.objectContaining({
366
+ messages: [
367
+ { role: 'user', content: 'Hi' },
368
+ { role: 'assistant', content: 'Hello!' },
369
+ { role: 'user', content: 'How are you?' },
370
+ ],
371
+ }));
372
+ });
373
+ it('should handle system messages in conversation', async () => {
374
+ mockCreate.mockResolvedValueOnce({
375
+ id: 'chatcmpl-134',
376
+ choices: [
377
+ {
378
+ message: { role: 'assistant', content: 'Response' },
379
+ finish_reason: 'stop',
380
+ },
381
+ ],
382
+ });
383
+ const requestWithSystemInMessages = {
384
+ ...basicRequest,
385
+ messages: [
386
+ { role: 'system', content: 'Be concise' },
387
+ { role: 'user', content: 'Explain AI' },
388
+ ],
389
+ };
390
+ await adapter.sendMessage(requestWithSystemInMessages, 'not-needed');
391
+ expect(mockCreate).toHaveBeenCalledWith(expect.objectContaining({
392
+ messages: expect.arrayContaining([
393
+ { role: 'system', content: 'Be concise' },
394
+ ]),
395
+ }));
396
+ });
397
+ });
398
+ describe('API parameter mapping', () => {
399
+ it('should pass all standard parameters', async () => {
400
+ mockCreate.mockResolvedValueOnce({
401
+ id: 'chatcmpl-135',
402
+ choices: [
403
+ {
404
+ message: { role: 'assistant', content: 'Response' },
405
+ finish_reason: 'stop',
406
+ },
407
+ ],
408
+ });
409
+ const fullRequest = {
410
+ ...basicRequest,
411
+ settings: {
412
+ ...basicRequest.settings,
413
+ temperature: 0.9,
414
+ maxTokens: 2000,
415
+ topP: 0.8,
416
+ frequencyPenalty: 0.5,
417
+ presencePenalty: 0.3,
418
+ stopSequences: ['END'],
419
+ },
420
+ };
421
+ await adapter.sendMessage(fullRequest, 'not-needed');
422
+ expect(mockCreate).toHaveBeenCalledWith(expect.objectContaining({
423
+ model: 'llama-3-8b-instruct',
424
+ temperature: 0.9,
425
+ max_tokens: 2000,
426
+ top_p: 0.8,
427
+ frequency_penalty: 0.5,
428
+ presence_penalty: 0.3,
429
+ stop: ['END'],
430
+ }));
431
+ });
432
+ it('should omit frequency penalty when zero', async () => {
433
+ mockCreate.mockResolvedValueOnce({
434
+ id: 'chatcmpl-136',
435
+ choices: [
436
+ {
437
+ message: { role: 'assistant', content: 'Response' },
438
+ finish_reason: 'stop',
439
+ },
440
+ ],
441
+ });
442
+ await adapter.sendMessage(basicRequest, 'not-needed');
443
+ const callArgs = mockCreate.mock.calls[0][0];
444
+ expect(callArgs.frequency_penalty).toBeUndefined();
445
+ });
446
+ });
447
+ });
@@ -0,0 +1,161 @@
1
+ /**
2
+ * Response from the /health endpoint
3
+ */
4
+ export interface LlamaCppHealthResponse {
5
+ status: 'loading' | 'error' | 'ok';
6
+ error?: string;
7
+ }
8
+ /**
9
+ * Response from the /tokenize endpoint
10
+ */
11
+ export interface LlamaCppTokenizeResponse {
12
+ tokens: number[];
13
+ }
14
+ /**
15
+ * Response from the /detokenize endpoint
16
+ */
17
+ export interface LlamaCppDetokenizeResponse {
18
+ content: string;
19
+ }
20
+ /**
21
+ * Response from the /embedding endpoint
22
+ */
23
+ export interface LlamaCppEmbeddingResponse {
24
+ embedding: number[];
25
+ }
26
+ /**
27
+ * Response from the /infill endpoint
28
+ */
29
+ export interface LlamaCppInfillResponse {
30
+ content: string;
31
+ tokens?: number[];
32
+ stop?: boolean;
33
+ }
34
+ /**
35
+ * Response from the /props endpoint
36
+ */
37
+ export interface LlamaCppPropsResponse {
38
+ assistant_name?: string;
39
+ user_name?: string;
40
+ default_generation_settings?: Record<string, any>;
41
+ total_slots?: number;
42
+ [key: string]: any;
43
+ }
44
+ /**
45
+ * Response from the /metrics endpoint
46
+ */
47
+ export interface LlamaCppMetricsResponse {
48
+ [key: string]: any;
49
+ }
50
+ /**
51
+ * Individual slot information from /slots endpoint
52
+ */
53
+ export interface LlamaCppSlot {
54
+ id: number;
55
+ state: number;
56
+ prompt?: string;
57
+ [key: string]: any;
58
+ }
59
+ /**
60
+ * Response from the /slots endpoint
61
+ */
62
+ export interface LlamaCppSlotsResponse {
63
+ slots: LlamaCppSlot[];
64
+ }
65
+ /**
66
+ * Client for interacting with llama.cpp server's management and utility endpoints
67
+ *
68
+ * This class provides access to non-LLM endpoints like tokenization, embeddings,
69
+ * health checks, and server properties. For chat completions, use LlamaCppClientAdapter.
70
+ *
71
+ * @example
72
+ * ```typescript
73
+ * const client = new LlamaCppServerClient('http://localhost:8080');
74
+ *
75
+ * // Check if server is ready
76
+ * const health = await client.getHealth();
77
+ * console.log(health.status); // 'ok', 'loading', or 'error'
78
+ *
79
+ * // Tokenize text
80
+ * const { tokens } = await client.tokenize('Hello world');
81
+ * console.log(tokens); // [123, 456, 789]
82
+ *
83
+ * // Generate embeddings
84
+ * const { embedding } = await client.createEmbedding('Some text');
85
+ * ```
86
+ */
87
+ export declare class LlamaCppServerClient {
88
+ private baseURL;
89
+ /**
90
+ * Creates a new llama.cpp server client
91
+ *
92
+ * @param baseURL - The base URL of the llama.cpp server (e.g., 'http://localhost:8080')
93
+ */
94
+ constructor(baseURL: string);
95
+ /**
96
+ * Checks the health and readiness of the server
97
+ *
98
+ * @returns Promise resolving to health status
99
+ * @throws Error if the request fails
100
+ */
101
+ getHealth(): Promise<LlamaCppHealthResponse>;
102
+ /**
103
+ * Converts text to tokens using the loaded model's tokenizer
104
+ *
105
+ * @param content - The text to tokenize
106
+ * @returns Promise resolving to array of token IDs
107
+ * @throws Error if the request fails
108
+ */
109
+ tokenize(content: string): Promise<LlamaCppTokenizeResponse>;
110
+ /**
111
+ * Converts tokens back to text using the loaded model's tokenizer
112
+ *
113
+ * @param tokens - Array of token IDs to convert
114
+ * @returns Promise resolving to the decoded text
115
+ * @throws Error if the request fails
116
+ */
117
+ detokenize(tokens: number[]): Promise<LlamaCppDetokenizeResponse>;
118
+ /**
119
+ * Generates an embedding vector for the given text
120
+ *
121
+ * @param content - The text to embed
122
+ * @param imageData - Optional base64-encoded image data for multimodal models
123
+ * @returns Promise resolving to the embedding vector
124
+ * @throws Error if the request fails
125
+ */
126
+ createEmbedding(content: string, imageData?: string): Promise<LlamaCppEmbeddingResponse>;
127
+ /**
128
+ * Performs code infilling (completing code between prefix and suffix)
129
+ *
130
+ * @param inputPrefix - The code before the cursor/gap
131
+ * @param inputSuffix - The code after the cursor/gap
132
+ * @returns Promise resolving to the infilled completion
133
+ * @throws Error if the request fails
134
+ */
135
+ infill(inputPrefix: string, inputSuffix: string): Promise<LlamaCppInfillResponse>;
136
+ /**
137
+ * Retrieves server properties and configuration
138
+ *
139
+ * @returns Promise resolving to server properties
140
+ * @throws Error if the request fails
141
+ */
142
+ getProps(): Promise<LlamaCppPropsResponse>;
143
+ /**
144
+ * Retrieves performance metrics from the server
145
+ *
146
+ * @returns Promise resolving to metrics data
147
+ * @throws Error if the request fails
148
+ */
149
+ getMetrics(): Promise<LlamaCppMetricsResponse>;
150
+ /**
151
+ * Retrieves processing slot status (debugging endpoint)
152
+ *
153
+ * WARNING: This endpoint may expose sensitive information including prompt content.
154
+ * The llama.cpp documentation strongly advises against enabling this in production.
155
+ * Only use this endpoint in development/debugging environments.
156
+ *
157
+ * @returns Promise resolving to slot status information
158
+ * @throws Error if the request fails or endpoint is not enabled
159
+ */
160
+ getSlots(): Promise<LlamaCppSlotsResponse>;
161
+ }