genai-lite 0.3.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,116 @@
1
+ import type { LLMResponse, LLMFailureResponse } from "../types";
2
+ import type { ILLMClientAdapter, InternalLLMChatRequest } from "./types";
3
+ import { LlamaCppServerClient } from "./LlamaCppServerClient";
4
+ /**
5
+ * Configuration options for LlamaCppClientAdapter
6
+ */
7
+ export interface LlamaCppClientConfig {
8
+ /** Base URL of the llama.cpp server (default: http://localhost:8080) */
9
+ baseURL?: string;
10
+ /** Whether to check server health before sending requests (default: false) */
11
+ checkHealth?: boolean;
12
+ }
13
+ /**
14
+ * Client adapter for llama.cpp server integration
15
+ *
16
+ * This adapter provides integration with llama.cpp server via its OpenAI-compatible
17
+ * /v1/chat/completions endpoint. It uses the OpenAI SDK internally, making it compatible
18
+ * with llama.cpp's OpenAI-compatible API.
19
+ *
20
+ * Key features:
21
+ * - Uses llama.cpp's OpenAI-compatible chat completions endpoint
22
+ * - Optional health check before requests
23
+ * - No API key required (llama.cpp is a local server)
24
+ * - Supports all standard LLM settings
25
+ *
26
+ * Note: Model IDs are not validated against a predefined list since llama.cpp
27
+ * serves whatever model is loaded. Users must specify the correct model name.
28
+ *
29
+ * @example
30
+ * ```typescript
31
+ * // Create adapter for local server
32
+ * const adapter = new LlamaCppClientAdapter({
33
+ * baseURL: 'http://localhost:8080',
34
+ * checkHealth: true
35
+ * });
36
+ *
37
+ * // Register with LLMService
38
+ * service.registerAdapter('llamacpp', adapter);
39
+ *
40
+ * // Use via LLMService
41
+ * const response = await service.sendMessage({
42
+ * providerId: 'llamacpp',
43
+ * modelId: 'llama-3-8b-instruct',
44
+ * messages: [{ role: 'user', content: 'Hello!' }]
45
+ * });
46
+ * ```
47
+ */
48
+ export declare class LlamaCppClientAdapter implements ILLMClientAdapter {
49
+ private baseURL;
50
+ private checkHealth;
51
+ private serverClient;
52
+ /**
53
+ * Creates a new llama.cpp client adapter
54
+ *
55
+ * @param config Optional configuration for the adapter
56
+ */
57
+ constructor(config?: LlamaCppClientConfig);
58
+ /**
59
+ * Sends a chat message to llama.cpp server
60
+ *
61
+ * @param request - The internal LLM request with applied settings
62
+ * @param apiKey - Not used for llama.cpp (local server), but kept for interface compatibility
63
+ * @returns Promise resolving to success or failure response
64
+ */
65
+ sendMessage(request: InternalLLMChatRequest, apiKey: string): Promise<LLMResponse | LLMFailureResponse>;
66
+ /**
67
+ * Validates API key format
68
+ *
69
+ * For llama.cpp, API keys are not required, so this always returns true.
70
+ * The method is implemented for interface compatibility.
71
+ *
72
+ * @param apiKey - The API key (ignored)
73
+ * @returns Always true
74
+ */
75
+ validateApiKey(apiKey: string): boolean;
76
+ /**
77
+ * Gets adapter information
78
+ */
79
+ getAdapterInfo(): {
80
+ providerId: "llamacpp";
81
+ name: string;
82
+ version: string;
83
+ baseURL: string;
84
+ };
85
+ /**
86
+ * Gets the underlying server client for advanced operations
87
+ *
88
+ * This allows access to non-LLM endpoints like tokenize, embedding, health, etc.
89
+ *
90
+ * @returns The LlamaCppServerClient instance
91
+ */
92
+ getServerClient(): LlamaCppServerClient;
93
+ /**
94
+ * Formats messages for OpenAI-compatible API
95
+ *
96
+ * @param request - The internal LLM request
97
+ * @returns Formatted messages array
98
+ */
99
+ private formatMessages;
100
+ /**
101
+ * Creates a standardized success response from llama.cpp's response
102
+ *
103
+ * @param completion - Raw OpenAI-compatible completion response
104
+ * @param request - Original request for context
105
+ * @returns Standardized LLM response
106
+ */
107
+ private createSuccessResponse;
108
+ /**
109
+ * Creates a standardized error response from an error
110
+ *
111
+ * @param error - The error that occurred
112
+ * @param request - Original request for context
113
+ * @returns Standardized LLM failure response
114
+ */
115
+ private createErrorResponse;
116
+ }
@@ -0,0 +1,289 @@
1
+ "use strict";
2
+ // AI Summary: Client adapter for llama.cpp server using OpenAI-compatible API.
3
+ // Provides LLM chat completions via llama.cpp's /v1/chat/completions endpoint.
4
+ var __importDefault = (this && this.__importDefault) || function (mod) {
5
+ return (mod && mod.__esModule) ? mod : { "default": mod };
6
+ };
7
+ Object.defineProperty(exports, "__esModule", { value: true });
8
+ exports.LlamaCppClientAdapter = void 0;
9
+ const openai_1 = __importDefault(require("openai"));
10
+ const types_1 = require("./types");
11
+ const adapterErrorUtils_1 = require("./adapterErrorUtils");
12
+ const LlamaCppServerClient_1 = require("./LlamaCppServerClient");
13
+ /**
14
+ * Client adapter for llama.cpp server integration
15
+ *
16
+ * This adapter provides integration with llama.cpp server via its OpenAI-compatible
17
+ * /v1/chat/completions endpoint. It uses the OpenAI SDK internally, making it compatible
18
+ * with llama.cpp's OpenAI-compatible API.
19
+ *
20
+ * Key features:
21
+ * - Uses llama.cpp's OpenAI-compatible chat completions endpoint
22
+ * - Optional health check before requests
23
+ * - No API key required (llama.cpp is a local server)
24
+ * - Supports all standard LLM settings
25
+ *
26
+ * Note: Model IDs are not validated against a predefined list since llama.cpp
27
+ * serves whatever model is loaded. Users must specify the correct model name.
28
+ *
29
+ * @example
30
+ * ```typescript
31
+ * // Create adapter for local server
32
+ * const adapter = new LlamaCppClientAdapter({
33
+ * baseURL: 'http://localhost:8080',
34
+ * checkHealth: true
35
+ * });
36
+ *
37
+ * // Register with LLMService
38
+ * service.registerAdapter('llamacpp', adapter);
39
+ *
40
+ * // Use via LLMService
41
+ * const response = await service.sendMessage({
42
+ * providerId: 'llamacpp',
43
+ * modelId: 'llama-3-8b-instruct',
44
+ * messages: [{ role: 'user', content: 'Hello!' }]
45
+ * });
46
+ * ```
47
+ */
48
+ class LlamaCppClientAdapter {
49
+ /**
50
+ * Creates a new llama.cpp client adapter
51
+ *
52
+ * @param config Optional configuration for the adapter
53
+ */
54
+ constructor(config) {
55
+ this.baseURL = config?.baseURL || 'http://localhost:8080';
56
+ this.checkHealth = config?.checkHealth || false;
57
+ this.serverClient = new LlamaCppServerClient_1.LlamaCppServerClient(this.baseURL);
58
+ }
59
+ /**
60
+ * Sends a chat message to llama.cpp server
61
+ *
62
+ * @param request - The internal LLM request with applied settings
63
+ * @param apiKey - Not used for llama.cpp (local server), but kept for interface compatibility
64
+ * @returns Promise resolving to success or failure response
65
+ */
66
+ async sendMessage(request, apiKey) {
67
+ try {
68
+ // Optional health check before making request
69
+ if (this.checkHealth) {
70
+ try {
71
+ const health = await this.serverClient.getHealth();
72
+ if (health.status !== 'ok') {
73
+ return {
74
+ provider: request.providerId,
75
+ model: request.modelId,
76
+ error: {
77
+ message: `llama.cpp server not ready: ${health.status}${health.error ? ' - ' + health.error : ''}`,
78
+ code: types_1.ADAPTER_ERROR_CODES.PROVIDER_ERROR,
79
+ type: 'server_not_ready',
80
+ },
81
+ object: 'error',
82
+ };
83
+ }
84
+ }
85
+ catch (healthError) {
86
+ console.warn('Health check failed, proceeding with request anyway:', healthError);
87
+ }
88
+ }
89
+ // Initialize OpenAI client with llama.cpp base URL
90
+ // API key is not used by llama.cpp but required by SDK
91
+ const openai = new openai_1.default({
92
+ apiKey: apiKey || 'not-needed',
93
+ baseURL: `${this.baseURL}/v1`,
94
+ });
95
+ // Format messages for OpenAI-compatible API
96
+ const messages = this.formatMessages(request);
97
+ // Prepare API call parameters
98
+ const completionParams = {
99
+ model: request.modelId,
100
+ messages: messages,
101
+ temperature: request.settings.temperature,
102
+ max_tokens: request.settings.maxTokens,
103
+ top_p: request.settings.topP,
104
+ ...(request.settings.stopSequences.length > 0 && {
105
+ stop: request.settings.stopSequences,
106
+ }),
107
+ ...(request.settings.frequencyPenalty !== 0 && {
108
+ frequency_penalty: request.settings.frequencyPenalty,
109
+ }),
110
+ ...(request.settings.presencePenalty !== 0 && {
111
+ presence_penalty: request.settings.presencePenalty,
112
+ }),
113
+ };
114
+ console.log(`llama.cpp API parameters:`, {
115
+ baseURL: this.baseURL,
116
+ model: completionParams.model,
117
+ temperature: completionParams.temperature,
118
+ max_tokens: completionParams.max_tokens,
119
+ top_p: completionParams.top_p,
120
+ });
121
+ console.log(`Making llama.cpp API call for model: ${request.modelId}`);
122
+ // Make the API call
123
+ const completion = await openai.chat.completions.create(completionParams);
124
+ // Type guard to ensure we have a non-streaming response
125
+ if ('id' in completion && 'choices' in completion) {
126
+ console.log(`llama.cpp API call successful, response ID: ${completion.id}`);
127
+ return this.createSuccessResponse(completion, request);
128
+ }
129
+ else {
130
+ throw new Error('Unexpected streaming response from llama.cpp server');
131
+ }
132
+ }
133
+ catch (error) {
134
+ console.error("llama.cpp API error:", error);
135
+ return this.createErrorResponse(error, request);
136
+ }
137
+ }
138
+ /**
139
+ * Validates API key format
140
+ *
141
+ * For llama.cpp, API keys are not required, so this always returns true.
142
+ * The method is implemented for interface compatibility.
143
+ *
144
+ * @param apiKey - The API key (ignored)
145
+ * @returns Always true
146
+ */
147
+ validateApiKey(apiKey) {
148
+ // llama.cpp doesn't require API keys, accept any value
149
+ return true;
150
+ }
151
+ /**
152
+ * Gets adapter information
153
+ */
154
+ getAdapterInfo() {
155
+ return {
156
+ providerId: "llamacpp",
157
+ name: "llama.cpp Client Adapter",
158
+ version: "1.0.0",
159
+ baseURL: this.baseURL,
160
+ };
161
+ }
162
+ /**
163
+ * Gets the underlying server client for advanced operations
164
+ *
165
+ * This allows access to non-LLM endpoints like tokenize, embedding, health, etc.
166
+ *
167
+ * @returns The LlamaCppServerClient instance
168
+ */
169
+ getServerClient() {
170
+ return this.serverClient;
171
+ }
172
+ /**
173
+ * Formats messages for OpenAI-compatible API
174
+ *
175
+ * @param request - The internal LLM request
176
+ * @returns Formatted messages array
177
+ */
178
+ formatMessages(request) {
179
+ const messages = [];
180
+ // Add system message if provided
181
+ if (request.systemMessage) {
182
+ messages.push({
183
+ role: "system",
184
+ content: request.systemMessage,
185
+ });
186
+ }
187
+ // Add conversation messages
188
+ for (const message of request.messages) {
189
+ if (message.role === "system") {
190
+ messages.push({
191
+ role: "system",
192
+ content: message.content,
193
+ });
194
+ }
195
+ else if (message.role === "user") {
196
+ messages.push({
197
+ role: "user",
198
+ content: message.content,
199
+ });
200
+ }
201
+ else if (message.role === "assistant") {
202
+ messages.push({
203
+ role: "assistant",
204
+ content: message.content,
205
+ });
206
+ }
207
+ }
208
+ return messages;
209
+ }
210
+ /**
211
+ * Creates a standardized success response from llama.cpp's response
212
+ *
213
+ * @param completion - Raw OpenAI-compatible completion response
214
+ * @param request - Original request for context
215
+ * @returns Standardized LLM response
216
+ */
217
+ createSuccessResponse(completion, request) {
218
+ const choice = completion.choices[0];
219
+ if (!choice || !choice.message) {
220
+ throw new Error("No valid choices in llama.cpp completion response");
221
+ }
222
+ return {
223
+ id: completion.id,
224
+ provider: request.providerId,
225
+ model: completion.model || request.modelId,
226
+ created: completion.created,
227
+ choices: completion.choices.map((c) => ({
228
+ message: {
229
+ role: "assistant",
230
+ content: c.message.content || "",
231
+ },
232
+ finish_reason: c.finish_reason,
233
+ index: c.index,
234
+ })),
235
+ usage: completion.usage
236
+ ? {
237
+ prompt_tokens: completion.usage.prompt_tokens,
238
+ completion_tokens: completion.usage.completion_tokens,
239
+ total_tokens: completion.usage.total_tokens,
240
+ }
241
+ : undefined,
242
+ object: "chat.completion",
243
+ };
244
+ }
245
+ /**
246
+ * Creates a standardized error response from an error
247
+ *
248
+ * @param error - The error that occurred
249
+ * @param request - Original request for context
250
+ * @returns Standardized LLM failure response
251
+ */
252
+ createErrorResponse(error, request) {
253
+ const errorMessage = error?.message || String(error);
254
+ let errorCode, errorType, status;
255
+ // Check for connection errors (server not running)
256
+ if (errorMessage.includes("ECONNREFUSED") ||
257
+ errorMessage.includes("fetch failed") ||
258
+ errorMessage.includes("connect")) {
259
+ errorCode = types_1.ADAPTER_ERROR_CODES.NETWORK_ERROR;
260
+ errorType = "connection_error";
261
+ return {
262
+ provider: request.providerId,
263
+ model: request.modelId,
264
+ error: {
265
+ message: `Cannot connect to llama.cpp server at ${this.baseURL}. Is the server running?`,
266
+ code: errorCode,
267
+ type: errorType,
268
+ providerError: error,
269
+ },
270
+ object: "error",
271
+ };
272
+ }
273
+ // Use common error mapping for other errors
274
+ const mappedError = (0, adapterErrorUtils_1.getCommonMappedErrorDetails)(error);
275
+ return {
276
+ provider: request.providerId,
277
+ model: request.modelId,
278
+ error: {
279
+ message: mappedError.errorMessage,
280
+ code: mappedError.errorCode,
281
+ type: mappedError.errorType,
282
+ ...(mappedError.status && { status: mappedError.status }),
283
+ providerError: error,
284
+ },
285
+ object: "error",
286
+ };
287
+ }
288
+ }
289
+ exports.LlamaCppClientAdapter = LlamaCppClientAdapter;
@@ -0,0 +1 @@
1
+ export {};