genai-lite 0.3.2 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +380 -15
- package/dist/index.d.ts +5 -0
- package/dist/index.js +8 -1
- package/dist/llm/LLMService.js +8 -0
- package/dist/llm/LLMService.test.js +57 -9
- package/dist/llm/clients/LlamaCppClientAdapter.d.ts +116 -0
- package/dist/llm/clients/LlamaCppClientAdapter.js +289 -0
- package/dist/llm/clients/LlamaCppClientAdapter.test.d.ts +1 -0
- package/dist/llm/clients/LlamaCppClientAdapter.test.js +447 -0
- package/dist/llm/clients/LlamaCppServerClient.d.ts +161 -0
- package/dist/llm/clients/LlamaCppServerClient.js +192 -0
- package/dist/llm/clients/LlamaCppServerClient.test.d.ts +1 -0
- package/dist/llm/clients/LlamaCppServerClient.test.js +294 -0
- package/dist/llm/config.d.ts +12 -0
- package/dist/llm/config.js +77 -0
- package/dist/llm/services/ModelResolver.js +13 -13
- package/dist/llm/services/ModelResolver.test.js +25 -4
- package/dist/llm/types.d.ts +8 -0
- package/dist/providers/fromEnvironment.d.ts +4 -0
- package/dist/providers/fromEnvironment.js +8 -0
- package/dist/providers/fromEnvironment.test.js +13 -0
- package/package.json +1 -1
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
import type { LLMResponse, LLMFailureResponse } from "../types";
|
|
2
|
+
import type { ILLMClientAdapter, InternalLLMChatRequest } from "./types";
|
|
3
|
+
import { LlamaCppServerClient } from "./LlamaCppServerClient";
|
|
4
|
+
/**
|
|
5
|
+
* Configuration options for LlamaCppClientAdapter
|
|
6
|
+
*/
|
|
7
|
+
export interface LlamaCppClientConfig {
|
|
8
|
+
/** Base URL of the llama.cpp server (default: http://localhost:8080) */
|
|
9
|
+
baseURL?: string;
|
|
10
|
+
/** Whether to check server health before sending requests (default: false) */
|
|
11
|
+
checkHealth?: boolean;
|
|
12
|
+
}
|
|
13
|
+
/**
|
|
14
|
+
* Client adapter for llama.cpp server integration
|
|
15
|
+
*
|
|
16
|
+
* This adapter provides integration with llama.cpp server via its OpenAI-compatible
|
|
17
|
+
* /v1/chat/completions endpoint. It uses the OpenAI SDK internally, making it compatible
|
|
18
|
+
* with llama.cpp's OpenAI-compatible API.
|
|
19
|
+
*
|
|
20
|
+
* Key features:
|
|
21
|
+
* - Uses llama.cpp's OpenAI-compatible chat completions endpoint
|
|
22
|
+
* - Optional health check before requests
|
|
23
|
+
* - No API key required (llama.cpp is a local server)
|
|
24
|
+
* - Supports all standard LLM settings
|
|
25
|
+
*
|
|
26
|
+
* Note: Model IDs are not validated against a predefined list since llama.cpp
|
|
27
|
+
* serves whatever model is loaded. Users must specify the correct model name.
|
|
28
|
+
*
|
|
29
|
+
* @example
|
|
30
|
+
* ```typescript
|
|
31
|
+
* // Create adapter for local server
|
|
32
|
+
* const adapter = new LlamaCppClientAdapter({
|
|
33
|
+
* baseURL: 'http://localhost:8080',
|
|
34
|
+
* checkHealth: true
|
|
35
|
+
* });
|
|
36
|
+
*
|
|
37
|
+
* // Register with LLMService
|
|
38
|
+
* service.registerAdapter('llamacpp', adapter);
|
|
39
|
+
*
|
|
40
|
+
* // Use via LLMService
|
|
41
|
+
* const response = await service.sendMessage({
|
|
42
|
+
* providerId: 'llamacpp',
|
|
43
|
+
* modelId: 'llama-3-8b-instruct',
|
|
44
|
+
* messages: [{ role: 'user', content: 'Hello!' }]
|
|
45
|
+
* });
|
|
46
|
+
* ```
|
|
47
|
+
*/
|
|
48
|
+
export declare class LlamaCppClientAdapter implements ILLMClientAdapter {
|
|
49
|
+
private baseURL;
|
|
50
|
+
private checkHealth;
|
|
51
|
+
private serverClient;
|
|
52
|
+
/**
|
|
53
|
+
* Creates a new llama.cpp client adapter
|
|
54
|
+
*
|
|
55
|
+
* @param config Optional configuration for the adapter
|
|
56
|
+
*/
|
|
57
|
+
constructor(config?: LlamaCppClientConfig);
|
|
58
|
+
/**
|
|
59
|
+
* Sends a chat message to llama.cpp server
|
|
60
|
+
*
|
|
61
|
+
* @param request - The internal LLM request with applied settings
|
|
62
|
+
* @param apiKey - Not used for llama.cpp (local server), but kept for interface compatibility
|
|
63
|
+
* @returns Promise resolving to success or failure response
|
|
64
|
+
*/
|
|
65
|
+
sendMessage(request: InternalLLMChatRequest, apiKey: string): Promise<LLMResponse | LLMFailureResponse>;
|
|
66
|
+
/**
|
|
67
|
+
* Validates API key format
|
|
68
|
+
*
|
|
69
|
+
* For llama.cpp, API keys are not required, so this always returns true.
|
|
70
|
+
* The method is implemented for interface compatibility.
|
|
71
|
+
*
|
|
72
|
+
* @param apiKey - The API key (ignored)
|
|
73
|
+
* @returns Always true
|
|
74
|
+
*/
|
|
75
|
+
validateApiKey(apiKey: string): boolean;
|
|
76
|
+
/**
|
|
77
|
+
* Gets adapter information
|
|
78
|
+
*/
|
|
79
|
+
getAdapterInfo(): {
|
|
80
|
+
providerId: "llamacpp";
|
|
81
|
+
name: string;
|
|
82
|
+
version: string;
|
|
83
|
+
baseURL: string;
|
|
84
|
+
};
|
|
85
|
+
/**
|
|
86
|
+
* Gets the underlying server client for advanced operations
|
|
87
|
+
*
|
|
88
|
+
* This allows access to non-LLM endpoints like tokenize, embedding, health, etc.
|
|
89
|
+
*
|
|
90
|
+
* @returns The LlamaCppServerClient instance
|
|
91
|
+
*/
|
|
92
|
+
getServerClient(): LlamaCppServerClient;
|
|
93
|
+
/**
|
|
94
|
+
* Formats messages for OpenAI-compatible API
|
|
95
|
+
*
|
|
96
|
+
* @param request - The internal LLM request
|
|
97
|
+
* @returns Formatted messages array
|
|
98
|
+
*/
|
|
99
|
+
private formatMessages;
|
|
100
|
+
/**
|
|
101
|
+
* Creates a standardized success response from llama.cpp's response
|
|
102
|
+
*
|
|
103
|
+
* @param completion - Raw OpenAI-compatible completion response
|
|
104
|
+
* @param request - Original request for context
|
|
105
|
+
* @returns Standardized LLM response
|
|
106
|
+
*/
|
|
107
|
+
private createSuccessResponse;
|
|
108
|
+
/**
|
|
109
|
+
* Creates a standardized error response from an error
|
|
110
|
+
*
|
|
111
|
+
* @param error - The error that occurred
|
|
112
|
+
* @param request - Original request for context
|
|
113
|
+
* @returns Standardized LLM failure response
|
|
114
|
+
*/
|
|
115
|
+
private createErrorResponse;
|
|
116
|
+
}
|
|
@@ -0,0 +1,289 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
// AI Summary: Client adapter for llama.cpp server using OpenAI-compatible API.
|
|
3
|
+
// Provides LLM chat completions via llama.cpp's /v1/chat/completions endpoint.
|
|
4
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
5
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
6
|
+
};
|
|
7
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
8
|
+
exports.LlamaCppClientAdapter = void 0;
|
|
9
|
+
const openai_1 = __importDefault(require("openai"));
|
|
10
|
+
const types_1 = require("./types");
|
|
11
|
+
const adapterErrorUtils_1 = require("./adapterErrorUtils");
|
|
12
|
+
const LlamaCppServerClient_1 = require("./LlamaCppServerClient");
|
|
13
|
+
/**
|
|
14
|
+
* Client adapter for llama.cpp server integration
|
|
15
|
+
*
|
|
16
|
+
* This adapter provides integration with llama.cpp server via its OpenAI-compatible
|
|
17
|
+
* /v1/chat/completions endpoint. It uses the OpenAI SDK internally, making it compatible
|
|
18
|
+
* with llama.cpp's OpenAI-compatible API.
|
|
19
|
+
*
|
|
20
|
+
* Key features:
|
|
21
|
+
* - Uses llama.cpp's OpenAI-compatible chat completions endpoint
|
|
22
|
+
* - Optional health check before requests
|
|
23
|
+
* - No API key required (llama.cpp is a local server)
|
|
24
|
+
* - Supports all standard LLM settings
|
|
25
|
+
*
|
|
26
|
+
* Note: Model IDs are not validated against a predefined list since llama.cpp
|
|
27
|
+
* serves whatever model is loaded. Users must specify the correct model name.
|
|
28
|
+
*
|
|
29
|
+
* @example
|
|
30
|
+
* ```typescript
|
|
31
|
+
* // Create adapter for local server
|
|
32
|
+
* const adapter = new LlamaCppClientAdapter({
|
|
33
|
+
* baseURL: 'http://localhost:8080',
|
|
34
|
+
* checkHealth: true
|
|
35
|
+
* });
|
|
36
|
+
*
|
|
37
|
+
* // Register with LLMService
|
|
38
|
+
* service.registerAdapter('llamacpp', adapter);
|
|
39
|
+
*
|
|
40
|
+
* // Use via LLMService
|
|
41
|
+
* const response = await service.sendMessage({
|
|
42
|
+
* providerId: 'llamacpp',
|
|
43
|
+
* modelId: 'llama-3-8b-instruct',
|
|
44
|
+
* messages: [{ role: 'user', content: 'Hello!' }]
|
|
45
|
+
* });
|
|
46
|
+
* ```
|
|
47
|
+
*/
|
|
48
|
+
class LlamaCppClientAdapter {
|
|
49
|
+
/**
|
|
50
|
+
* Creates a new llama.cpp client adapter
|
|
51
|
+
*
|
|
52
|
+
* @param config Optional configuration for the adapter
|
|
53
|
+
*/
|
|
54
|
+
constructor(config) {
|
|
55
|
+
this.baseURL = config?.baseURL || 'http://localhost:8080';
|
|
56
|
+
this.checkHealth = config?.checkHealth || false;
|
|
57
|
+
this.serverClient = new LlamaCppServerClient_1.LlamaCppServerClient(this.baseURL);
|
|
58
|
+
}
|
|
59
|
+
/**
|
|
60
|
+
* Sends a chat message to llama.cpp server
|
|
61
|
+
*
|
|
62
|
+
* @param request - The internal LLM request with applied settings
|
|
63
|
+
* @param apiKey - Not used for llama.cpp (local server), but kept for interface compatibility
|
|
64
|
+
* @returns Promise resolving to success or failure response
|
|
65
|
+
*/
|
|
66
|
+
async sendMessage(request, apiKey) {
|
|
67
|
+
try {
|
|
68
|
+
// Optional health check before making request
|
|
69
|
+
if (this.checkHealth) {
|
|
70
|
+
try {
|
|
71
|
+
const health = await this.serverClient.getHealth();
|
|
72
|
+
if (health.status !== 'ok') {
|
|
73
|
+
return {
|
|
74
|
+
provider: request.providerId,
|
|
75
|
+
model: request.modelId,
|
|
76
|
+
error: {
|
|
77
|
+
message: `llama.cpp server not ready: ${health.status}${health.error ? ' - ' + health.error : ''}`,
|
|
78
|
+
code: types_1.ADAPTER_ERROR_CODES.PROVIDER_ERROR,
|
|
79
|
+
type: 'server_not_ready',
|
|
80
|
+
},
|
|
81
|
+
object: 'error',
|
|
82
|
+
};
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
catch (healthError) {
|
|
86
|
+
console.warn('Health check failed, proceeding with request anyway:', healthError);
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
// Initialize OpenAI client with llama.cpp base URL
|
|
90
|
+
// API key is not used by llama.cpp but required by SDK
|
|
91
|
+
const openai = new openai_1.default({
|
|
92
|
+
apiKey: apiKey || 'not-needed',
|
|
93
|
+
baseURL: `${this.baseURL}/v1`,
|
|
94
|
+
});
|
|
95
|
+
// Format messages for OpenAI-compatible API
|
|
96
|
+
const messages = this.formatMessages(request);
|
|
97
|
+
// Prepare API call parameters
|
|
98
|
+
const completionParams = {
|
|
99
|
+
model: request.modelId,
|
|
100
|
+
messages: messages,
|
|
101
|
+
temperature: request.settings.temperature,
|
|
102
|
+
max_tokens: request.settings.maxTokens,
|
|
103
|
+
top_p: request.settings.topP,
|
|
104
|
+
...(request.settings.stopSequences.length > 0 && {
|
|
105
|
+
stop: request.settings.stopSequences,
|
|
106
|
+
}),
|
|
107
|
+
...(request.settings.frequencyPenalty !== 0 && {
|
|
108
|
+
frequency_penalty: request.settings.frequencyPenalty,
|
|
109
|
+
}),
|
|
110
|
+
...(request.settings.presencePenalty !== 0 && {
|
|
111
|
+
presence_penalty: request.settings.presencePenalty,
|
|
112
|
+
}),
|
|
113
|
+
};
|
|
114
|
+
console.log(`llama.cpp API parameters:`, {
|
|
115
|
+
baseURL: this.baseURL,
|
|
116
|
+
model: completionParams.model,
|
|
117
|
+
temperature: completionParams.temperature,
|
|
118
|
+
max_tokens: completionParams.max_tokens,
|
|
119
|
+
top_p: completionParams.top_p,
|
|
120
|
+
});
|
|
121
|
+
console.log(`Making llama.cpp API call for model: ${request.modelId}`);
|
|
122
|
+
// Make the API call
|
|
123
|
+
const completion = await openai.chat.completions.create(completionParams);
|
|
124
|
+
// Type guard to ensure we have a non-streaming response
|
|
125
|
+
if ('id' in completion && 'choices' in completion) {
|
|
126
|
+
console.log(`llama.cpp API call successful, response ID: ${completion.id}`);
|
|
127
|
+
return this.createSuccessResponse(completion, request);
|
|
128
|
+
}
|
|
129
|
+
else {
|
|
130
|
+
throw new Error('Unexpected streaming response from llama.cpp server');
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
catch (error) {
|
|
134
|
+
console.error("llama.cpp API error:", error);
|
|
135
|
+
return this.createErrorResponse(error, request);
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
/**
|
|
139
|
+
* Validates API key format
|
|
140
|
+
*
|
|
141
|
+
* For llama.cpp, API keys are not required, so this always returns true.
|
|
142
|
+
* The method is implemented for interface compatibility.
|
|
143
|
+
*
|
|
144
|
+
* @param apiKey - The API key (ignored)
|
|
145
|
+
* @returns Always true
|
|
146
|
+
*/
|
|
147
|
+
validateApiKey(apiKey) {
|
|
148
|
+
// llama.cpp doesn't require API keys, accept any value
|
|
149
|
+
return true;
|
|
150
|
+
}
|
|
151
|
+
/**
|
|
152
|
+
* Gets adapter information
|
|
153
|
+
*/
|
|
154
|
+
getAdapterInfo() {
|
|
155
|
+
return {
|
|
156
|
+
providerId: "llamacpp",
|
|
157
|
+
name: "llama.cpp Client Adapter",
|
|
158
|
+
version: "1.0.0",
|
|
159
|
+
baseURL: this.baseURL,
|
|
160
|
+
};
|
|
161
|
+
}
|
|
162
|
+
/**
|
|
163
|
+
* Gets the underlying server client for advanced operations
|
|
164
|
+
*
|
|
165
|
+
* This allows access to non-LLM endpoints like tokenize, embedding, health, etc.
|
|
166
|
+
*
|
|
167
|
+
* @returns The LlamaCppServerClient instance
|
|
168
|
+
*/
|
|
169
|
+
getServerClient() {
|
|
170
|
+
return this.serverClient;
|
|
171
|
+
}
|
|
172
|
+
/**
|
|
173
|
+
* Formats messages for OpenAI-compatible API
|
|
174
|
+
*
|
|
175
|
+
* @param request - The internal LLM request
|
|
176
|
+
* @returns Formatted messages array
|
|
177
|
+
*/
|
|
178
|
+
formatMessages(request) {
|
|
179
|
+
const messages = [];
|
|
180
|
+
// Add system message if provided
|
|
181
|
+
if (request.systemMessage) {
|
|
182
|
+
messages.push({
|
|
183
|
+
role: "system",
|
|
184
|
+
content: request.systemMessage,
|
|
185
|
+
});
|
|
186
|
+
}
|
|
187
|
+
// Add conversation messages
|
|
188
|
+
for (const message of request.messages) {
|
|
189
|
+
if (message.role === "system") {
|
|
190
|
+
messages.push({
|
|
191
|
+
role: "system",
|
|
192
|
+
content: message.content,
|
|
193
|
+
});
|
|
194
|
+
}
|
|
195
|
+
else if (message.role === "user") {
|
|
196
|
+
messages.push({
|
|
197
|
+
role: "user",
|
|
198
|
+
content: message.content,
|
|
199
|
+
});
|
|
200
|
+
}
|
|
201
|
+
else if (message.role === "assistant") {
|
|
202
|
+
messages.push({
|
|
203
|
+
role: "assistant",
|
|
204
|
+
content: message.content,
|
|
205
|
+
});
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
return messages;
|
|
209
|
+
}
|
|
210
|
+
/**
|
|
211
|
+
* Creates a standardized success response from llama.cpp's response
|
|
212
|
+
*
|
|
213
|
+
* @param completion - Raw OpenAI-compatible completion response
|
|
214
|
+
* @param request - Original request for context
|
|
215
|
+
* @returns Standardized LLM response
|
|
216
|
+
*/
|
|
217
|
+
createSuccessResponse(completion, request) {
|
|
218
|
+
const choice = completion.choices[0];
|
|
219
|
+
if (!choice || !choice.message) {
|
|
220
|
+
throw new Error("No valid choices in llama.cpp completion response");
|
|
221
|
+
}
|
|
222
|
+
return {
|
|
223
|
+
id: completion.id,
|
|
224
|
+
provider: request.providerId,
|
|
225
|
+
model: completion.model || request.modelId,
|
|
226
|
+
created: completion.created,
|
|
227
|
+
choices: completion.choices.map((c) => ({
|
|
228
|
+
message: {
|
|
229
|
+
role: "assistant",
|
|
230
|
+
content: c.message.content || "",
|
|
231
|
+
},
|
|
232
|
+
finish_reason: c.finish_reason,
|
|
233
|
+
index: c.index,
|
|
234
|
+
})),
|
|
235
|
+
usage: completion.usage
|
|
236
|
+
? {
|
|
237
|
+
prompt_tokens: completion.usage.prompt_tokens,
|
|
238
|
+
completion_tokens: completion.usage.completion_tokens,
|
|
239
|
+
total_tokens: completion.usage.total_tokens,
|
|
240
|
+
}
|
|
241
|
+
: undefined,
|
|
242
|
+
object: "chat.completion",
|
|
243
|
+
};
|
|
244
|
+
}
|
|
245
|
+
/**
|
|
246
|
+
* Creates a standardized error response from an error
|
|
247
|
+
*
|
|
248
|
+
* @param error - The error that occurred
|
|
249
|
+
* @param request - Original request for context
|
|
250
|
+
* @returns Standardized LLM failure response
|
|
251
|
+
*/
|
|
252
|
+
createErrorResponse(error, request) {
|
|
253
|
+
const errorMessage = error?.message || String(error);
|
|
254
|
+
let errorCode, errorType, status;
|
|
255
|
+
// Check for connection errors (server not running)
|
|
256
|
+
if (errorMessage.includes("ECONNREFUSED") ||
|
|
257
|
+
errorMessage.includes("fetch failed") ||
|
|
258
|
+
errorMessage.includes("connect")) {
|
|
259
|
+
errorCode = types_1.ADAPTER_ERROR_CODES.NETWORK_ERROR;
|
|
260
|
+
errorType = "connection_error";
|
|
261
|
+
return {
|
|
262
|
+
provider: request.providerId,
|
|
263
|
+
model: request.modelId,
|
|
264
|
+
error: {
|
|
265
|
+
message: `Cannot connect to llama.cpp server at ${this.baseURL}. Is the server running?`,
|
|
266
|
+
code: errorCode,
|
|
267
|
+
type: errorType,
|
|
268
|
+
providerError: error,
|
|
269
|
+
},
|
|
270
|
+
object: "error",
|
|
271
|
+
};
|
|
272
|
+
}
|
|
273
|
+
// Use common error mapping for other errors
|
|
274
|
+
const mappedError = (0, adapterErrorUtils_1.getCommonMappedErrorDetails)(error);
|
|
275
|
+
return {
|
|
276
|
+
provider: request.providerId,
|
|
277
|
+
model: request.modelId,
|
|
278
|
+
error: {
|
|
279
|
+
message: mappedError.errorMessage,
|
|
280
|
+
code: mappedError.errorCode,
|
|
281
|
+
type: mappedError.errorType,
|
|
282
|
+
...(mappedError.status && { status: mappedError.status }),
|
|
283
|
+
providerError: error,
|
|
284
|
+
},
|
|
285
|
+
object: "error",
|
|
286
|
+
};
|
|
287
|
+
}
|
|
288
|
+
}
|
|
289
|
+
exports.LlamaCppClientAdapter = LlamaCppClientAdapter;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|