genai-lite 0.3.2 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +380 -15
- package/dist/index.d.ts +5 -0
- package/dist/index.js +8 -1
- package/dist/llm/LLMService.js +8 -0
- package/dist/llm/LLMService.test.js +57 -9
- package/dist/llm/clients/LlamaCppClientAdapter.d.ts +116 -0
- package/dist/llm/clients/LlamaCppClientAdapter.js +289 -0
- package/dist/llm/clients/LlamaCppClientAdapter.test.d.ts +1 -0
- package/dist/llm/clients/LlamaCppClientAdapter.test.js +447 -0
- package/dist/llm/clients/LlamaCppServerClient.d.ts +161 -0
- package/dist/llm/clients/LlamaCppServerClient.js +192 -0
- package/dist/llm/clients/LlamaCppServerClient.test.d.ts +1 -0
- package/dist/llm/clients/LlamaCppServerClient.test.js +294 -0
- package/dist/llm/config.d.ts +12 -0
- package/dist/llm/config.js +77 -0
- package/dist/llm/services/ModelResolver.js +13 -13
- package/dist/llm/services/ModelResolver.test.js +25 -4
- package/dist/llm/types.d.ts +8 -0
- package/dist/providers/fromEnvironment.d.ts +4 -0
- package/dist/providers/fromEnvironment.js +8 -0
- package/dist/providers/fromEnvironment.test.js +13 -0
- package/package.json +1 -1
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
// AI Summary: Utility client for interacting with llama.cpp server's non-LLM endpoints.
|
|
3
|
+
// Provides methods for tokenization, embeddings, health checks, and server management.
|
|
4
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
5
|
+
exports.LlamaCppServerClient = void 0;
|
|
6
|
+
/**
|
|
7
|
+
* Client for interacting with llama.cpp server's management and utility endpoints
|
|
8
|
+
*
|
|
9
|
+
* This class provides access to non-LLM endpoints like tokenization, embeddings,
|
|
10
|
+
* health checks, and server properties. For chat completions, use LlamaCppClientAdapter.
|
|
11
|
+
*
|
|
12
|
+
* @example
|
|
13
|
+
* ```typescript
|
|
14
|
+
* const client = new LlamaCppServerClient('http://localhost:8080');
|
|
15
|
+
*
|
|
16
|
+
* // Check if server is ready
|
|
17
|
+
* const health = await client.getHealth();
|
|
18
|
+
* console.log(health.status); // 'ok', 'loading', or 'error'
|
|
19
|
+
*
|
|
20
|
+
* // Tokenize text
|
|
21
|
+
* const { tokens } = await client.tokenize('Hello world');
|
|
22
|
+
* console.log(tokens); // [123, 456, 789]
|
|
23
|
+
*
|
|
24
|
+
* // Generate embeddings
|
|
25
|
+
* const { embedding } = await client.createEmbedding('Some text');
|
|
26
|
+
* ```
|
|
27
|
+
*/
|
|
28
|
+
class LlamaCppServerClient {
|
|
29
|
+
/**
|
|
30
|
+
* Creates a new llama.cpp server client
|
|
31
|
+
*
|
|
32
|
+
* @param baseURL - The base URL of the llama.cpp server (e.g., 'http://localhost:8080')
|
|
33
|
+
*/
|
|
34
|
+
constructor(baseURL) {
|
|
35
|
+
// Remove trailing slash if present
|
|
36
|
+
this.baseURL = baseURL.replace(/\/$/, '');
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* Checks the health and readiness of the server
|
|
40
|
+
*
|
|
41
|
+
* @returns Promise resolving to health status
|
|
42
|
+
* @throws Error if the request fails
|
|
43
|
+
*/
|
|
44
|
+
async getHealth() {
|
|
45
|
+
const response = await fetch(`${this.baseURL}/health`);
|
|
46
|
+
if (!response.ok) {
|
|
47
|
+
throw new Error(`Health check failed: ${response.status} ${response.statusText}`);
|
|
48
|
+
}
|
|
49
|
+
return await response.json();
|
|
50
|
+
}
|
|
51
|
+
/**
|
|
52
|
+
* Converts text to tokens using the loaded model's tokenizer
|
|
53
|
+
*
|
|
54
|
+
* @param content - The text to tokenize
|
|
55
|
+
* @returns Promise resolving to array of token IDs
|
|
56
|
+
* @throws Error if the request fails
|
|
57
|
+
*/
|
|
58
|
+
async tokenize(content) {
|
|
59
|
+
const response = await fetch(`${this.baseURL}/tokenize`, {
|
|
60
|
+
method: 'POST',
|
|
61
|
+
headers: { 'Content-Type': 'application/json' },
|
|
62
|
+
body: JSON.stringify({ content }),
|
|
63
|
+
});
|
|
64
|
+
if (!response.ok) {
|
|
65
|
+
const errorText = await response.text();
|
|
66
|
+
throw new Error(`Tokenize failed: ${response.status} ${response.statusText} - ${errorText}`);
|
|
67
|
+
}
|
|
68
|
+
return await response.json();
|
|
69
|
+
}
|
|
70
|
+
/**
|
|
71
|
+
* Converts tokens back to text using the loaded model's tokenizer
|
|
72
|
+
*
|
|
73
|
+
* @param tokens - Array of token IDs to convert
|
|
74
|
+
* @returns Promise resolving to the decoded text
|
|
75
|
+
* @throws Error if the request fails
|
|
76
|
+
*/
|
|
77
|
+
async detokenize(tokens) {
|
|
78
|
+
const response = await fetch(`${this.baseURL}/detokenize`, {
|
|
79
|
+
method: 'POST',
|
|
80
|
+
headers: { 'Content-Type': 'application/json' },
|
|
81
|
+
body: JSON.stringify({ tokens }),
|
|
82
|
+
});
|
|
83
|
+
if (!response.ok) {
|
|
84
|
+
const errorText = await response.text();
|
|
85
|
+
throw new Error(`Detokenize failed: ${response.status} ${response.statusText} - ${errorText}`);
|
|
86
|
+
}
|
|
87
|
+
return await response.json();
|
|
88
|
+
}
|
|
89
|
+
/**
|
|
90
|
+
* Generates an embedding vector for the given text
|
|
91
|
+
*
|
|
92
|
+
* @param content - The text to embed
|
|
93
|
+
* @param imageData - Optional base64-encoded image data for multimodal models
|
|
94
|
+
* @returns Promise resolving to the embedding vector
|
|
95
|
+
* @throws Error if the request fails
|
|
96
|
+
*/
|
|
97
|
+
async createEmbedding(content, imageData) {
|
|
98
|
+
const body = { content };
|
|
99
|
+
if (imageData) {
|
|
100
|
+
body.image_data = imageData;
|
|
101
|
+
}
|
|
102
|
+
const response = await fetch(`${this.baseURL}/embedding`, {
|
|
103
|
+
method: 'POST',
|
|
104
|
+
headers: { 'Content-Type': 'application/json' },
|
|
105
|
+
body: JSON.stringify(body),
|
|
106
|
+
});
|
|
107
|
+
if (!response.ok) {
|
|
108
|
+
const errorText = await response.text();
|
|
109
|
+
throw new Error(`Embedding failed: ${response.status} ${response.statusText} - ${errorText}`);
|
|
110
|
+
}
|
|
111
|
+
return await response.json();
|
|
112
|
+
}
|
|
113
|
+
/**
|
|
114
|
+
* Performs code infilling (completing code between prefix and suffix)
|
|
115
|
+
*
|
|
116
|
+
* @param inputPrefix - The code before the cursor/gap
|
|
117
|
+
* @param inputSuffix - The code after the cursor/gap
|
|
118
|
+
* @returns Promise resolving to the infilled completion
|
|
119
|
+
* @throws Error if the request fails
|
|
120
|
+
*/
|
|
121
|
+
async infill(inputPrefix, inputSuffix) {
|
|
122
|
+
const response = await fetch(`${this.baseURL}/infill`, {
|
|
123
|
+
method: 'POST',
|
|
124
|
+
headers: { 'Content-Type': 'application/json' },
|
|
125
|
+
body: JSON.stringify({
|
|
126
|
+
input_prefix: inputPrefix,
|
|
127
|
+
input_suffix: inputSuffix,
|
|
128
|
+
}),
|
|
129
|
+
});
|
|
130
|
+
if (!response.ok) {
|
|
131
|
+
const errorText = await response.text();
|
|
132
|
+
throw new Error(`Infill failed: ${response.status} ${response.statusText} - ${errorText}`);
|
|
133
|
+
}
|
|
134
|
+
return await response.json();
|
|
135
|
+
}
|
|
136
|
+
/**
|
|
137
|
+
* Retrieves server properties and configuration
|
|
138
|
+
*
|
|
139
|
+
* @returns Promise resolving to server properties
|
|
140
|
+
* @throws Error if the request fails
|
|
141
|
+
*/
|
|
142
|
+
async getProps() {
|
|
143
|
+
const response = await fetch(`${this.baseURL}/props`);
|
|
144
|
+
if (!response.ok) {
|
|
145
|
+
throw new Error(`Get props failed: ${response.status} ${response.statusText}`);
|
|
146
|
+
}
|
|
147
|
+
return await response.json();
|
|
148
|
+
}
|
|
149
|
+
/**
|
|
150
|
+
* Retrieves performance metrics from the server
|
|
151
|
+
*
|
|
152
|
+
* @returns Promise resolving to metrics data
|
|
153
|
+
* @throws Error if the request fails
|
|
154
|
+
*/
|
|
155
|
+
async getMetrics() {
|
|
156
|
+
const response = await fetch(`${this.baseURL}/metrics`);
|
|
157
|
+
if (!response.ok) {
|
|
158
|
+
throw new Error(`Get metrics failed: ${response.status} ${response.statusText}`);
|
|
159
|
+
}
|
|
160
|
+
// Metrics endpoint might return Prometheus format or JSON
|
|
161
|
+
const contentType = response.headers.get('content-type');
|
|
162
|
+
if (contentType?.includes('application/json')) {
|
|
163
|
+
return await response.json();
|
|
164
|
+
}
|
|
165
|
+
else {
|
|
166
|
+
// Return raw text for Prometheus format
|
|
167
|
+
const text = await response.text();
|
|
168
|
+
return { raw: text };
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
/**
|
|
172
|
+
* Retrieves processing slot status (debugging endpoint)
|
|
173
|
+
*
|
|
174
|
+
* WARNING: This endpoint may expose sensitive information including prompt content.
|
|
175
|
+
* The llama.cpp documentation strongly advises against enabling this in production.
|
|
176
|
+
* Only use this endpoint in development/debugging environments.
|
|
177
|
+
*
|
|
178
|
+
* @returns Promise resolving to slot status information
|
|
179
|
+
* @throws Error if the request fails or endpoint is not enabled
|
|
180
|
+
*/
|
|
181
|
+
async getSlots() {
|
|
182
|
+
const response = await fetch(`${this.baseURL}/slots`);
|
|
183
|
+
if (!response.ok) {
|
|
184
|
+
if (response.status === 404) {
|
|
185
|
+
throw new Error('Slots endpoint not enabled. Start server with --slots flag to enable.');
|
|
186
|
+
}
|
|
187
|
+
throw new Error(`Get slots failed: ${response.status} ${response.statusText}`);
|
|
188
|
+
}
|
|
189
|
+
return await response.json();
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
exports.LlamaCppServerClient = LlamaCppServerClient;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,294 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
const LlamaCppServerClient_1 = require("./LlamaCppServerClient");
|
|
4
|
+
// Mock global fetch
|
|
5
|
+
global.fetch = jest.fn();
|
|
6
|
+
describe('LlamaCppServerClient', () => {
|
|
7
|
+
let client;
|
|
8
|
+
const baseURL = 'http://localhost:8080';
|
|
9
|
+
beforeEach(() => {
|
|
10
|
+
client = new LlamaCppServerClient_1.LlamaCppServerClient(baseURL);
|
|
11
|
+
jest.clearAllMocks();
|
|
12
|
+
});
|
|
13
|
+
describe('constructor', () => {
|
|
14
|
+
it('should remove trailing slash from baseURL', () => {
|
|
15
|
+
const clientWithSlash = new LlamaCppServerClient_1.LlamaCppServerClient('http://localhost:8080/');
|
|
16
|
+
expect(clientWithSlash.baseURL).toBe('http://localhost:8080');
|
|
17
|
+
});
|
|
18
|
+
it('should preserve baseURL without trailing slash', () => {
|
|
19
|
+
expect(client.baseURL).toBe('http://localhost:8080');
|
|
20
|
+
});
|
|
21
|
+
});
|
|
22
|
+
describe('getHealth', () => {
|
|
23
|
+
it('should return health status when server is ok', async () => {
|
|
24
|
+
const mockResponse = { status: 'ok' };
|
|
25
|
+
global.fetch.mockResolvedValueOnce({
|
|
26
|
+
ok: true,
|
|
27
|
+
json: async () => mockResponse,
|
|
28
|
+
});
|
|
29
|
+
const result = await client.getHealth();
|
|
30
|
+
expect(global.fetch).toHaveBeenCalledWith(`${baseURL}/health`);
|
|
31
|
+
expect(result).toEqual(mockResponse);
|
|
32
|
+
});
|
|
33
|
+
it('should return loading status', async () => {
|
|
34
|
+
const mockResponse = { status: 'loading' };
|
|
35
|
+
global.fetch.mockResolvedValueOnce({
|
|
36
|
+
ok: true,
|
|
37
|
+
json: async () => mockResponse,
|
|
38
|
+
});
|
|
39
|
+
const result = await client.getHealth();
|
|
40
|
+
expect(result.status).toBe('loading');
|
|
41
|
+
});
|
|
42
|
+
it('should return error status with message', async () => {
|
|
43
|
+
const mockResponse = { status: 'error', error: 'Model load failed' };
|
|
44
|
+
global.fetch.mockResolvedValueOnce({
|
|
45
|
+
ok: true,
|
|
46
|
+
json: async () => mockResponse,
|
|
47
|
+
});
|
|
48
|
+
const result = await client.getHealth();
|
|
49
|
+
expect(result.status).toBe('error');
|
|
50
|
+
expect(result.error).toBe('Model load failed');
|
|
51
|
+
});
|
|
52
|
+
it('should throw error when request fails', async () => {
|
|
53
|
+
global.fetch.mockResolvedValueOnce({
|
|
54
|
+
ok: false,
|
|
55
|
+
status: 500,
|
|
56
|
+
statusText: 'Internal Server Error',
|
|
57
|
+
});
|
|
58
|
+
await expect(client.getHealth()).rejects.toThrow('Health check failed: 500 Internal Server Error');
|
|
59
|
+
});
|
|
60
|
+
});
|
|
61
|
+
describe('tokenize', () => {
|
|
62
|
+
it('should tokenize text and return token IDs', async () => {
|
|
63
|
+
const mockResponse = { tokens: [12, 345, 6789] };
|
|
64
|
+
global.fetch.mockResolvedValueOnce({
|
|
65
|
+
ok: true,
|
|
66
|
+
json: async () => mockResponse,
|
|
67
|
+
});
|
|
68
|
+
const result = await client.tokenize('Hello world');
|
|
69
|
+
expect(global.fetch).toHaveBeenCalledWith(`${baseURL}/tokenize`, {
|
|
70
|
+
method: 'POST',
|
|
71
|
+
headers: { 'Content-Type': 'application/json' },
|
|
72
|
+
body: JSON.stringify({ content: 'Hello world' }),
|
|
73
|
+
});
|
|
74
|
+
expect(result).toEqual(mockResponse);
|
|
75
|
+
expect(result.tokens).toHaveLength(3);
|
|
76
|
+
});
|
|
77
|
+
it('should handle empty string', async () => {
|
|
78
|
+
const mockResponse = { tokens: [] };
|
|
79
|
+
global.fetch.mockResolvedValueOnce({
|
|
80
|
+
ok: true,
|
|
81
|
+
json: async () => mockResponse,
|
|
82
|
+
});
|
|
83
|
+
const result = await client.tokenize('');
|
|
84
|
+
expect(result.tokens).toEqual([]);
|
|
85
|
+
});
|
|
86
|
+
it('should throw error when tokenization fails', async () => {
|
|
87
|
+
global.fetch.mockResolvedValueOnce({
|
|
88
|
+
ok: false,
|
|
89
|
+
status: 400,
|
|
90
|
+
statusText: 'Bad Request',
|
|
91
|
+
text: async () => 'Invalid input',
|
|
92
|
+
});
|
|
93
|
+
await expect(client.tokenize('test')).rejects.toThrow('Tokenize failed: 400 Bad Request - Invalid input');
|
|
94
|
+
});
|
|
95
|
+
});
|
|
96
|
+
describe('detokenize', () => {
|
|
97
|
+
it('should convert tokens back to text', async () => {
|
|
98
|
+
const mockResponse = { content: 'Hello world' };
|
|
99
|
+
global.fetch.mockResolvedValueOnce({
|
|
100
|
+
ok: true,
|
|
101
|
+
json: async () => mockResponse,
|
|
102
|
+
});
|
|
103
|
+
const result = await client.detokenize([12, 345, 6789]);
|
|
104
|
+
expect(global.fetch).toHaveBeenCalledWith(`${baseURL}/detokenize`, {
|
|
105
|
+
method: 'POST',
|
|
106
|
+
headers: { 'Content-Type': 'application/json' },
|
|
107
|
+
body: JSON.stringify({ tokens: [12, 345, 6789] }),
|
|
108
|
+
});
|
|
109
|
+
expect(result).toEqual(mockResponse);
|
|
110
|
+
expect(result.content).toBe('Hello world');
|
|
111
|
+
});
|
|
112
|
+
it('should handle empty token array', async () => {
|
|
113
|
+
const mockResponse = { content: '' };
|
|
114
|
+
global.fetch.mockResolvedValueOnce({
|
|
115
|
+
ok: true,
|
|
116
|
+
json: async () => mockResponse,
|
|
117
|
+
});
|
|
118
|
+
const result = await client.detokenize([]);
|
|
119
|
+
expect(result.content).toBe('');
|
|
120
|
+
});
|
|
121
|
+
it('should throw error when detokenization fails', async () => {
|
|
122
|
+
global.fetch.mockResolvedValueOnce({
|
|
123
|
+
ok: false,
|
|
124
|
+
status: 400,
|
|
125
|
+
statusText: 'Bad Request',
|
|
126
|
+
text: async () => 'Invalid tokens',
|
|
127
|
+
});
|
|
128
|
+
await expect(client.detokenize([1, 2, 3])).rejects.toThrow('Detokenize failed: 400 Bad Request - Invalid tokens');
|
|
129
|
+
});
|
|
130
|
+
});
|
|
131
|
+
describe('createEmbedding', () => {
|
|
132
|
+
it('should generate embedding for text', async () => {
|
|
133
|
+
const mockEmbedding = [0.1, 0.2, 0.3, 0.4];
|
|
134
|
+
const mockResponse = { embedding: mockEmbedding };
|
|
135
|
+
global.fetch.mockResolvedValueOnce({
|
|
136
|
+
ok: true,
|
|
137
|
+
json: async () => mockResponse,
|
|
138
|
+
});
|
|
139
|
+
const result = await client.createEmbedding('Hello world');
|
|
140
|
+
expect(global.fetch).toHaveBeenCalledWith(`${baseURL}/embedding`, {
|
|
141
|
+
method: 'POST',
|
|
142
|
+
headers: { 'Content-Type': 'application/json' },
|
|
143
|
+
body: JSON.stringify({ content: 'Hello world' }),
|
|
144
|
+
});
|
|
145
|
+
expect(result.embedding).toEqual(mockEmbedding);
|
|
146
|
+
});
|
|
147
|
+
it('should include image data when provided', async () => {
|
|
148
|
+
const mockResponse = { embedding: [0.1, 0.2] };
|
|
149
|
+
global.fetch.mockResolvedValueOnce({
|
|
150
|
+
ok: true,
|
|
151
|
+
json: async () => mockResponse,
|
|
152
|
+
});
|
|
153
|
+
const imageData = 'base64encodedimage';
|
|
154
|
+
await client.createEmbedding('Text with image', imageData);
|
|
155
|
+
expect(global.fetch).toHaveBeenCalledWith(`${baseURL}/embedding`, {
|
|
156
|
+
method: 'POST',
|
|
157
|
+
headers: { 'Content-Type': 'application/json' },
|
|
158
|
+
body: JSON.stringify({
|
|
159
|
+
content: 'Text with image',
|
|
160
|
+
image_data: imageData,
|
|
161
|
+
}),
|
|
162
|
+
});
|
|
163
|
+
});
|
|
164
|
+
it('should throw error when embedding fails', async () => {
|
|
165
|
+
global.fetch.mockResolvedValueOnce({
|
|
166
|
+
ok: false,
|
|
167
|
+
status: 500,
|
|
168
|
+
statusText: 'Server Error',
|
|
169
|
+
text: async () => 'Model error',
|
|
170
|
+
});
|
|
171
|
+
await expect(client.createEmbedding('test')).rejects.toThrow('Embedding failed: 500 Server Error - Model error');
|
|
172
|
+
});
|
|
173
|
+
});
|
|
174
|
+
describe('infill', () => {
|
|
175
|
+
it('should complete code between prefix and suffix', async () => {
|
|
176
|
+
const mockResponse = { content: ' return x + y;\n', tokens: [1, 2, 3], stop: true };
|
|
177
|
+
global.fetch.mockResolvedValueOnce({
|
|
178
|
+
ok: true,
|
|
179
|
+
json: async () => mockResponse,
|
|
180
|
+
});
|
|
181
|
+
const result = await client.infill('def add(x, y):\n', '\nprint(add(2, 3))');
|
|
182
|
+
expect(global.fetch).toHaveBeenCalledWith(`${baseURL}/infill`, {
|
|
183
|
+
method: 'POST',
|
|
184
|
+
headers: { 'Content-Type': 'application/json' },
|
|
185
|
+
body: JSON.stringify({
|
|
186
|
+
input_prefix: 'def add(x, y):\n',
|
|
187
|
+
input_suffix: '\nprint(add(2, 3))',
|
|
188
|
+
}),
|
|
189
|
+
});
|
|
190
|
+
expect(result.content).toBe(' return x + y;\n');
|
|
191
|
+
expect(result.stop).toBe(true);
|
|
192
|
+
});
|
|
193
|
+
it('should throw error when infill fails', async () => {
|
|
194
|
+
global.fetch.mockResolvedValueOnce({
|
|
195
|
+
ok: false,
|
|
196
|
+
status: 400,
|
|
197
|
+
statusText: 'Bad Request',
|
|
198
|
+
text: async () => 'Invalid code',
|
|
199
|
+
});
|
|
200
|
+
await expect(client.infill('prefix', 'suffix')).rejects.toThrow('Infill failed: 400 Bad Request - Invalid code');
|
|
201
|
+
});
|
|
202
|
+
});
|
|
203
|
+
describe('getProps', () => {
|
|
204
|
+
it('should retrieve server properties', async () => {
|
|
205
|
+
const mockResponse = {
|
|
206
|
+
assistant_name: 'Assistant',
|
|
207
|
+
user_name: 'User',
|
|
208
|
+
default_generation_settings: { temperature: 0.8 },
|
|
209
|
+
total_slots: 4,
|
|
210
|
+
};
|
|
211
|
+
global.fetch.mockResolvedValueOnce({
|
|
212
|
+
ok: true,
|
|
213
|
+
json: async () => mockResponse,
|
|
214
|
+
});
|
|
215
|
+
const result = await client.getProps();
|
|
216
|
+
expect(global.fetch).toHaveBeenCalledWith(`${baseURL}/props`);
|
|
217
|
+
expect(result).toEqual(mockResponse);
|
|
218
|
+
expect(result.total_slots).toBe(4);
|
|
219
|
+
});
|
|
220
|
+
it('should throw error when getting props fails', async () => {
|
|
221
|
+
global.fetch.mockResolvedValueOnce({
|
|
222
|
+
ok: false,
|
|
223
|
+
status: 404,
|
|
224
|
+
statusText: 'Not Found',
|
|
225
|
+
});
|
|
226
|
+
await expect(client.getProps()).rejects.toThrow('Get props failed: 404 Not Found');
|
|
227
|
+
});
|
|
228
|
+
});
|
|
229
|
+
describe('getMetrics', () => {
|
|
230
|
+
it('should retrieve JSON metrics', async () => {
|
|
231
|
+
const mockResponse = { requests_total: 100, tokens_generated: 50000 };
|
|
232
|
+
global.fetch.mockResolvedValueOnce({
|
|
233
|
+
ok: true,
|
|
234
|
+
headers: new Map([['content-type', 'application/json']]),
|
|
235
|
+
json: async () => mockResponse,
|
|
236
|
+
});
|
|
237
|
+
const result = await client.getMetrics();
|
|
238
|
+
expect(global.fetch).toHaveBeenCalledWith(`${baseURL}/metrics`);
|
|
239
|
+
expect(result).toEqual(mockResponse);
|
|
240
|
+
});
|
|
241
|
+
it('should handle Prometheus format metrics', async () => {
|
|
242
|
+
const prometheusText = '# HELP metric_name Description\nmetric_name 42\n';
|
|
243
|
+
global.fetch.mockResolvedValueOnce({
|
|
244
|
+
ok: true,
|
|
245
|
+
headers: new Map([['content-type', 'text/plain']]),
|
|
246
|
+
text: async () => prometheusText,
|
|
247
|
+
});
|
|
248
|
+
const result = await client.getMetrics();
|
|
249
|
+
expect(result).toEqual({ raw: prometheusText });
|
|
250
|
+
});
|
|
251
|
+
it('should throw error when getting metrics fails', async () => {
|
|
252
|
+
global.fetch.mockResolvedValueOnce({
|
|
253
|
+
ok: false,
|
|
254
|
+
status: 500,
|
|
255
|
+
statusText: 'Server Error',
|
|
256
|
+
});
|
|
257
|
+
await expect(client.getMetrics()).rejects.toThrow('Get metrics failed: 500 Server Error');
|
|
258
|
+
});
|
|
259
|
+
});
|
|
260
|
+
describe('getSlots', () => {
|
|
261
|
+
it('should retrieve slot information', async () => {
|
|
262
|
+
const mockResponse = {
|
|
263
|
+
slots: [
|
|
264
|
+
{ id: 0, state: 1, prompt: 'Hello' },
|
|
265
|
+
{ id: 1, state: 0 },
|
|
266
|
+
],
|
|
267
|
+
};
|
|
268
|
+
global.fetch.mockResolvedValueOnce({
|
|
269
|
+
ok: true,
|
|
270
|
+
json: async () => mockResponse,
|
|
271
|
+
});
|
|
272
|
+
const result = await client.getSlots();
|
|
273
|
+
expect(global.fetch).toHaveBeenCalledWith(`${baseURL}/slots`);
|
|
274
|
+
expect(result.slots).toHaveLength(2);
|
|
275
|
+
expect(result.slots[0].id).toBe(0);
|
|
276
|
+
});
|
|
277
|
+
it('should throw specific error when endpoint is not enabled', async () => {
|
|
278
|
+
global.fetch.mockResolvedValueOnce({
|
|
279
|
+
ok: false,
|
|
280
|
+
status: 404,
|
|
281
|
+
statusText: 'Not Found',
|
|
282
|
+
});
|
|
283
|
+
await expect(client.getSlots()).rejects.toThrow('Slots endpoint not enabled. Start server with --slots flag to enable.');
|
|
284
|
+
});
|
|
285
|
+
it('should throw error for other failures', async () => {
|
|
286
|
+
global.fetch.mockResolvedValueOnce({
|
|
287
|
+
ok: false,
|
|
288
|
+
status: 500,
|
|
289
|
+
statusText: 'Server Error',
|
|
290
|
+
});
|
|
291
|
+
await expect(client.getSlots()).rejects.toThrow('Get slots failed: 500 Server Error');
|
|
292
|
+
});
|
|
293
|
+
});
|
|
294
|
+
});
|
package/dist/llm/config.d.ts
CHANGED
|
@@ -6,6 +6,7 @@ import type { ILLMClientAdapter } from "./clients/types";
|
|
|
6
6
|
*/
|
|
7
7
|
export declare const ADAPTER_CONSTRUCTORS: Partial<Record<ApiProviderId, new (config?: {
|
|
8
8
|
baseURL?: string;
|
|
9
|
+
checkHealth?: boolean;
|
|
9
10
|
}) => ILLMClientAdapter>>;
|
|
10
11
|
/**
|
|
11
12
|
* Optional configuration objects for each adapter
|
|
@@ -73,6 +74,17 @@ export declare function isProviderSupported(providerId: string): boolean;
|
|
|
73
74
|
* @returns True if the model is supported for the provider
|
|
74
75
|
*/
|
|
75
76
|
export declare function isModelSupported(modelId: string, providerId: string): boolean;
|
|
77
|
+
/**
|
|
78
|
+
* Creates a fallback ModelInfo for unknown/unregistered models
|
|
79
|
+
*
|
|
80
|
+
* Used when allowUnknownModels is enabled for a provider, or as a permissive
|
|
81
|
+
* fallback when strict validation is disabled. Provides sensible defaults.
|
|
82
|
+
*
|
|
83
|
+
* @param modelId - The model ID to create info for
|
|
84
|
+
* @param providerId - The provider ID
|
|
85
|
+
* @returns ModelInfo with default/placeholder values
|
|
86
|
+
*/
|
|
87
|
+
export declare function createFallbackModelInfo(modelId: string, providerId: string): ModelInfo;
|
|
76
88
|
/**
|
|
77
89
|
* Gets merged default settings for a specific model and provider
|
|
78
90
|
*
|
package/dist/llm/config.js
CHANGED
|
@@ -8,11 +8,13 @@ exports.getModelById = getModelById;
|
|
|
8
8
|
exports.getModelsByProvider = getModelsByProvider;
|
|
9
9
|
exports.isProviderSupported = isProviderSupported;
|
|
10
10
|
exports.isModelSupported = isModelSupported;
|
|
11
|
+
exports.createFallbackModelInfo = createFallbackModelInfo;
|
|
11
12
|
exports.getDefaultSettingsForModel = getDefaultSettingsForModel;
|
|
12
13
|
exports.validateLLMSettings = validateLLMSettings;
|
|
13
14
|
const OpenAIClientAdapter_1 = require("./clients/OpenAIClientAdapter");
|
|
14
15
|
const AnthropicClientAdapter_1 = require("./clients/AnthropicClientAdapter");
|
|
15
16
|
const GeminiClientAdapter_1 = require("./clients/GeminiClientAdapter");
|
|
17
|
+
const LlamaCppClientAdapter_1 = require("./clients/LlamaCppClientAdapter");
|
|
16
18
|
// Placeholder for future imports:
|
|
17
19
|
// import { MistralClientAdapter } from './clients/MistralClientAdapter';
|
|
18
20
|
/**
|
|
@@ -23,6 +25,7 @@ exports.ADAPTER_CONSTRUCTORS = {
|
|
|
23
25
|
openai: OpenAIClientAdapter_1.OpenAIClientAdapter,
|
|
24
26
|
anthropic: AnthropicClientAdapter_1.AnthropicClientAdapter,
|
|
25
27
|
gemini: GeminiClientAdapter_1.GeminiClientAdapter,
|
|
28
|
+
llamacpp: LlamaCppClientAdapter_1.LlamaCppClientAdapter,
|
|
26
29
|
// 'mistral': MistralClientAdapter, // Uncomment and add when Mistral adapter is ready
|
|
27
30
|
};
|
|
28
31
|
/**
|
|
@@ -36,6 +39,9 @@ exports.ADAPTER_CONFIGS = {
|
|
|
36
39
|
anthropic: {
|
|
37
40
|
baseURL: process.env.ANTHROPIC_API_BASE_URL || undefined,
|
|
38
41
|
},
|
|
42
|
+
llamacpp: {
|
|
43
|
+
baseURL: process.env.LLAMACPP_API_BASE_URL || 'http://localhost:8080',
|
|
44
|
+
},
|
|
39
45
|
// 'gemini': { /* ... Gemini specific config ... */ },
|
|
40
46
|
// 'mistral': { /* ... Mistral specific config ... */ },
|
|
41
47
|
};
|
|
@@ -116,6 +122,16 @@ exports.SUPPORTED_PROVIDERS = [
|
|
|
116
122
|
id: "mistral",
|
|
117
123
|
name: "Mistral AI",
|
|
118
124
|
},
|
|
125
|
+
{
|
|
126
|
+
id: "llamacpp",
|
|
127
|
+
name: "llama.cpp",
|
|
128
|
+
allowUnknownModels: true, // Users load arbitrary GGUF models with custom names
|
|
129
|
+
},
|
|
130
|
+
{
|
|
131
|
+
id: "mock",
|
|
132
|
+
name: "Mock Provider",
|
|
133
|
+
allowUnknownModels: true, // Test provider accepts any model
|
|
134
|
+
},
|
|
119
135
|
];
|
|
120
136
|
/**
|
|
121
137
|
* Supported LLM models with their configurations
|
|
@@ -412,6 +428,43 @@ exports.SUPPORTED_MODELS = [
|
|
|
412
428
|
supportsImages: false,
|
|
413
429
|
supportsPromptCache: false,
|
|
414
430
|
},
|
|
431
|
+
// llama.cpp Models (examples - users can specify any loaded model)
|
|
432
|
+
{
|
|
433
|
+
id: "llama-3-8b-instruct",
|
|
434
|
+
name: "Llama 3 8B Instruct",
|
|
435
|
+
providerId: "llamacpp",
|
|
436
|
+
contextWindow: 8192,
|
|
437
|
+
inputPrice: 0.0,
|
|
438
|
+
outputPrice: 0.0,
|
|
439
|
+
description: "Local Llama 3 8B model via llama.cpp server",
|
|
440
|
+
maxTokens: 4096,
|
|
441
|
+
supportsImages: false,
|
|
442
|
+
supportsPromptCache: false,
|
|
443
|
+
},
|
|
444
|
+
{
|
|
445
|
+
id: "llama-3-70b-instruct",
|
|
446
|
+
name: "Llama 3 70B Instruct",
|
|
447
|
+
providerId: "llamacpp",
|
|
448
|
+
contextWindow: 8192,
|
|
449
|
+
inputPrice: 0.0,
|
|
450
|
+
outputPrice: 0.0,
|
|
451
|
+
description: "Local Llama 3 70B model via llama.cpp server",
|
|
452
|
+
maxTokens: 4096,
|
|
453
|
+
supportsImages: false,
|
|
454
|
+
supportsPromptCache: false,
|
|
455
|
+
},
|
|
456
|
+
{
|
|
457
|
+
id: "mistral-7b-instruct",
|
|
458
|
+
name: "Mistral 7B Instruct",
|
|
459
|
+
providerId: "llamacpp",
|
|
460
|
+
contextWindow: 32768,
|
|
461
|
+
inputPrice: 0.0,
|
|
462
|
+
outputPrice: 0.0,
|
|
463
|
+
description: "Local Mistral 7B model via llama.cpp server",
|
|
464
|
+
maxTokens: 4096,
|
|
465
|
+
supportsImages: false,
|
|
466
|
+
supportsPromptCache: false,
|
|
467
|
+
},
|
|
415
468
|
];
|
|
416
469
|
/**
|
|
417
470
|
* Gets provider information by ID
|
|
@@ -460,6 +513,30 @@ function isProviderSupported(providerId) {
|
|
|
460
513
|
function isModelSupported(modelId, providerId) {
|
|
461
514
|
return exports.SUPPORTED_MODELS.some((model) => model.id === modelId && model.providerId === providerId);
|
|
462
515
|
}
|
|
516
|
+
/**
|
|
517
|
+
* Creates a fallback ModelInfo for unknown/unregistered models
|
|
518
|
+
*
|
|
519
|
+
* Used when allowUnknownModels is enabled for a provider, or as a permissive
|
|
520
|
+
* fallback when strict validation is disabled. Provides sensible defaults.
|
|
521
|
+
*
|
|
522
|
+
* @param modelId - The model ID to create info for
|
|
523
|
+
* @param providerId - The provider ID
|
|
524
|
+
* @returns ModelInfo with default/placeholder values
|
|
525
|
+
*/
|
|
526
|
+
function createFallbackModelInfo(modelId, providerId) {
|
|
527
|
+
return {
|
|
528
|
+
id: modelId,
|
|
529
|
+
name: modelId,
|
|
530
|
+
providerId: providerId,
|
|
531
|
+
contextWindow: 4096,
|
|
532
|
+
maxTokens: 2048,
|
|
533
|
+
inputPrice: 0,
|
|
534
|
+
outputPrice: 0,
|
|
535
|
+
description: `Unknown model (using defaults)`,
|
|
536
|
+
supportsImages: false,
|
|
537
|
+
supportsPromptCache: false,
|
|
538
|
+
};
|
|
539
|
+
}
|
|
463
540
|
/**
|
|
464
541
|
* Gets merged default settings for a specific model and provider
|
|
465
542
|
*
|
|
@@ -90,20 +90,20 @@ class ModelResolver {
|
|
|
90
90
|
}
|
|
91
91
|
};
|
|
92
92
|
}
|
|
93
|
-
|
|
93
|
+
let modelInfo = (0, config_1.getModelById)(options.modelId, options.providerId);
|
|
94
94
|
if (!modelInfo) {
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
}
|
|
95
|
+
// Check if provider allows unknown models
|
|
96
|
+
const provider = (0, config_1.getProviderById)(options.providerId);
|
|
97
|
+
if (provider?.allowUnknownModels) {
|
|
98
|
+
// Flexible provider (e.g., llamacpp) - silent fallback
|
|
99
|
+
modelInfo = (0, config_1.createFallbackModelInfo)(options.modelId, options.providerId);
|
|
100
|
+
}
|
|
101
|
+
else {
|
|
102
|
+
// Strict provider - warn but allow
|
|
103
|
+
console.warn(`⚠️ Unknown model "${options.modelId}" for provider "${options.providerId}". ` +
|
|
104
|
+
`Using default settings. This may fail at the provider API if the model doesn't exist.`);
|
|
105
|
+
modelInfo = (0, config_1.createFallbackModelInfo)(options.modelId, options.providerId);
|
|
106
|
+
}
|
|
107
107
|
}
|
|
108
108
|
return {
|
|
109
109
|
providerId: options.providerId,
|