genai-lite 0.3.3 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,192 @@
1
+ "use strict";
2
+ // AI Summary: Utility client for interacting with llama.cpp server's non-LLM endpoints.
3
+ // Provides methods for tokenization, embeddings, health checks, and server management.
4
+ Object.defineProperty(exports, "__esModule", { value: true });
5
+ exports.LlamaCppServerClient = void 0;
6
+ /**
7
+ * Client for interacting with llama.cpp server's management and utility endpoints
8
+ *
9
+ * This class provides access to non-LLM endpoints like tokenization, embeddings,
10
+ * health checks, and server properties. For chat completions, use LlamaCppClientAdapter.
11
+ *
12
+ * @example
13
+ * ```typescript
14
+ * const client = new LlamaCppServerClient('http://localhost:8080');
15
+ *
16
+ * // Check if server is ready
17
+ * const health = await client.getHealth();
18
+ * console.log(health.status); // 'ok', 'loading', or 'error'
19
+ *
20
+ * // Tokenize text
21
+ * const { tokens } = await client.tokenize('Hello world');
22
+ * console.log(tokens); // [123, 456, 789]
23
+ *
24
+ * // Generate embeddings
25
+ * const { embedding } = await client.createEmbedding('Some text');
26
+ * ```
27
+ */
28
+ class LlamaCppServerClient {
29
+ /**
30
+ * Creates a new llama.cpp server client
31
+ *
32
+ * @param baseURL - The base URL of the llama.cpp server (e.g., 'http://localhost:8080')
33
+ */
34
+ constructor(baseURL) {
35
+ // Remove trailing slash if present
36
+ this.baseURL = baseURL.replace(/\/$/, '');
37
+ }
38
+ /**
39
+ * Checks the health and readiness of the server
40
+ *
41
+ * @returns Promise resolving to health status
42
+ * @throws Error if the request fails
43
+ */
44
+ async getHealth() {
45
+ const response = await fetch(`${this.baseURL}/health`);
46
+ if (!response.ok) {
47
+ throw new Error(`Health check failed: ${response.status} ${response.statusText}`);
48
+ }
49
+ return await response.json();
50
+ }
51
+ /**
52
+ * Converts text to tokens using the loaded model's tokenizer
53
+ *
54
+ * @param content - The text to tokenize
55
+ * @returns Promise resolving to array of token IDs
56
+ * @throws Error if the request fails
57
+ */
58
+ async tokenize(content) {
59
+ const response = await fetch(`${this.baseURL}/tokenize`, {
60
+ method: 'POST',
61
+ headers: { 'Content-Type': 'application/json' },
62
+ body: JSON.stringify({ content }),
63
+ });
64
+ if (!response.ok) {
65
+ const errorText = await response.text();
66
+ throw new Error(`Tokenize failed: ${response.status} ${response.statusText} - ${errorText}`);
67
+ }
68
+ return await response.json();
69
+ }
70
+ /**
71
+ * Converts tokens back to text using the loaded model's tokenizer
72
+ *
73
+ * @param tokens - Array of token IDs to convert
74
+ * @returns Promise resolving to the decoded text
75
+ * @throws Error if the request fails
76
+ */
77
+ async detokenize(tokens) {
78
+ const response = await fetch(`${this.baseURL}/detokenize`, {
79
+ method: 'POST',
80
+ headers: { 'Content-Type': 'application/json' },
81
+ body: JSON.stringify({ tokens }),
82
+ });
83
+ if (!response.ok) {
84
+ const errorText = await response.text();
85
+ throw new Error(`Detokenize failed: ${response.status} ${response.statusText} - ${errorText}`);
86
+ }
87
+ return await response.json();
88
+ }
89
+ /**
90
+ * Generates an embedding vector for the given text
91
+ *
92
+ * @param content - The text to embed
93
+ * @param imageData - Optional base64-encoded image data for multimodal models
94
+ * @returns Promise resolving to the embedding vector
95
+ * @throws Error if the request fails
96
+ */
97
+ async createEmbedding(content, imageData) {
98
+ const body = { content };
99
+ if (imageData) {
100
+ body.image_data = imageData;
101
+ }
102
+ const response = await fetch(`${this.baseURL}/embedding`, {
103
+ method: 'POST',
104
+ headers: { 'Content-Type': 'application/json' },
105
+ body: JSON.stringify(body),
106
+ });
107
+ if (!response.ok) {
108
+ const errorText = await response.text();
109
+ throw new Error(`Embedding failed: ${response.status} ${response.statusText} - ${errorText}`);
110
+ }
111
+ return await response.json();
112
+ }
113
+ /**
114
+ * Performs code infilling (completing code between prefix and suffix)
115
+ *
116
+ * @param inputPrefix - The code before the cursor/gap
117
+ * @param inputSuffix - The code after the cursor/gap
118
+ * @returns Promise resolving to the infilled completion
119
+ * @throws Error if the request fails
120
+ */
121
+ async infill(inputPrefix, inputSuffix) {
122
+ const response = await fetch(`${this.baseURL}/infill`, {
123
+ method: 'POST',
124
+ headers: { 'Content-Type': 'application/json' },
125
+ body: JSON.stringify({
126
+ input_prefix: inputPrefix,
127
+ input_suffix: inputSuffix,
128
+ }),
129
+ });
130
+ if (!response.ok) {
131
+ const errorText = await response.text();
132
+ throw new Error(`Infill failed: ${response.status} ${response.statusText} - ${errorText}`);
133
+ }
134
+ return await response.json();
135
+ }
136
+ /**
137
+ * Retrieves server properties and configuration
138
+ *
139
+ * @returns Promise resolving to server properties
140
+ * @throws Error if the request fails
141
+ */
142
+ async getProps() {
143
+ const response = await fetch(`${this.baseURL}/props`);
144
+ if (!response.ok) {
145
+ throw new Error(`Get props failed: ${response.status} ${response.statusText}`);
146
+ }
147
+ return await response.json();
148
+ }
149
+ /**
150
+ * Retrieves performance metrics from the server
151
+ *
152
+ * @returns Promise resolving to metrics data
153
+ * @throws Error if the request fails
154
+ */
155
+ async getMetrics() {
156
+ const response = await fetch(`${this.baseURL}/metrics`);
157
+ if (!response.ok) {
158
+ throw new Error(`Get metrics failed: ${response.status} ${response.statusText}`);
159
+ }
160
+ // Metrics endpoint might return Prometheus format or JSON
161
+ const contentType = response.headers.get('content-type');
162
+ if (contentType?.includes('application/json')) {
163
+ return await response.json();
164
+ }
165
+ else {
166
+ // Return raw text for Prometheus format
167
+ const text = await response.text();
168
+ return { raw: text };
169
+ }
170
+ }
171
+ /**
172
+ * Retrieves processing slot status (debugging endpoint)
173
+ *
174
+ * WARNING: This endpoint may expose sensitive information including prompt content.
175
+ * The llama.cpp documentation strongly advises against enabling this in production.
176
+ * Only use this endpoint in development/debugging environments.
177
+ *
178
+ * @returns Promise resolving to slot status information
179
+ * @throws Error if the request fails or endpoint is not enabled
180
+ */
181
+ async getSlots() {
182
+ const response = await fetch(`${this.baseURL}/slots`);
183
+ if (!response.ok) {
184
+ if (response.status === 404) {
185
+ throw new Error('Slots endpoint not enabled. Start server with --slots flag to enable.');
186
+ }
187
+ throw new Error(`Get slots failed: ${response.status} ${response.statusText}`);
188
+ }
189
+ return await response.json();
190
+ }
191
+ }
192
+ exports.LlamaCppServerClient = LlamaCppServerClient;
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,294 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ const LlamaCppServerClient_1 = require("./LlamaCppServerClient");
4
+ // Mock global fetch
5
+ global.fetch = jest.fn();
6
+ describe('LlamaCppServerClient', () => {
7
+ let client;
8
+ const baseURL = 'http://localhost:8080';
9
+ beforeEach(() => {
10
+ client = new LlamaCppServerClient_1.LlamaCppServerClient(baseURL);
11
+ jest.clearAllMocks();
12
+ });
13
+ describe('constructor', () => {
14
+ it('should remove trailing slash from baseURL', () => {
15
+ const clientWithSlash = new LlamaCppServerClient_1.LlamaCppServerClient('http://localhost:8080/');
16
+ expect(clientWithSlash.baseURL).toBe('http://localhost:8080');
17
+ });
18
+ it('should preserve baseURL without trailing slash', () => {
19
+ expect(client.baseURL).toBe('http://localhost:8080');
20
+ });
21
+ });
22
+ describe('getHealth', () => {
23
+ it('should return health status when server is ok', async () => {
24
+ const mockResponse = { status: 'ok' };
25
+ global.fetch.mockResolvedValueOnce({
26
+ ok: true,
27
+ json: async () => mockResponse,
28
+ });
29
+ const result = await client.getHealth();
30
+ expect(global.fetch).toHaveBeenCalledWith(`${baseURL}/health`);
31
+ expect(result).toEqual(mockResponse);
32
+ });
33
+ it('should return loading status', async () => {
34
+ const mockResponse = { status: 'loading' };
35
+ global.fetch.mockResolvedValueOnce({
36
+ ok: true,
37
+ json: async () => mockResponse,
38
+ });
39
+ const result = await client.getHealth();
40
+ expect(result.status).toBe('loading');
41
+ });
42
+ it('should return error status with message', async () => {
43
+ const mockResponse = { status: 'error', error: 'Model load failed' };
44
+ global.fetch.mockResolvedValueOnce({
45
+ ok: true,
46
+ json: async () => mockResponse,
47
+ });
48
+ const result = await client.getHealth();
49
+ expect(result.status).toBe('error');
50
+ expect(result.error).toBe('Model load failed');
51
+ });
52
+ it('should throw error when request fails', async () => {
53
+ global.fetch.mockResolvedValueOnce({
54
+ ok: false,
55
+ status: 500,
56
+ statusText: 'Internal Server Error',
57
+ });
58
+ await expect(client.getHealth()).rejects.toThrow('Health check failed: 500 Internal Server Error');
59
+ });
60
+ });
61
+ describe('tokenize', () => {
62
+ it('should tokenize text and return token IDs', async () => {
63
+ const mockResponse = { tokens: [12, 345, 6789] };
64
+ global.fetch.mockResolvedValueOnce({
65
+ ok: true,
66
+ json: async () => mockResponse,
67
+ });
68
+ const result = await client.tokenize('Hello world');
69
+ expect(global.fetch).toHaveBeenCalledWith(`${baseURL}/tokenize`, {
70
+ method: 'POST',
71
+ headers: { 'Content-Type': 'application/json' },
72
+ body: JSON.stringify({ content: 'Hello world' }),
73
+ });
74
+ expect(result).toEqual(mockResponse);
75
+ expect(result.tokens).toHaveLength(3);
76
+ });
77
+ it('should handle empty string', async () => {
78
+ const mockResponse = { tokens: [] };
79
+ global.fetch.mockResolvedValueOnce({
80
+ ok: true,
81
+ json: async () => mockResponse,
82
+ });
83
+ const result = await client.tokenize('');
84
+ expect(result.tokens).toEqual([]);
85
+ });
86
+ it('should throw error when tokenization fails', async () => {
87
+ global.fetch.mockResolvedValueOnce({
88
+ ok: false,
89
+ status: 400,
90
+ statusText: 'Bad Request',
91
+ text: async () => 'Invalid input',
92
+ });
93
+ await expect(client.tokenize('test')).rejects.toThrow('Tokenize failed: 400 Bad Request - Invalid input');
94
+ });
95
+ });
96
+ describe('detokenize', () => {
97
+ it('should convert tokens back to text', async () => {
98
+ const mockResponse = { content: 'Hello world' };
99
+ global.fetch.mockResolvedValueOnce({
100
+ ok: true,
101
+ json: async () => mockResponse,
102
+ });
103
+ const result = await client.detokenize([12, 345, 6789]);
104
+ expect(global.fetch).toHaveBeenCalledWith(`${baseURL}/detokenize`, {
105
+ method: 'POST',
106
+ headers: { 'Content-Type': 'application/json' },
107
+ body: JSON.stringify({ tokens: [12, 345, 6789] }),
108
+ });
109
+ expect(result).toEqual(mockResponse);
110
+ expect(result.content).toBe('Hello world');
111
+ });
112
+ it('should handle empty token array', async () => {
113
+ const mockResponse = { content: '' };
114
+ global.fetch.mockResolvedValueOnce({
115
+ ok: true,
116
+ json: async () => mockResponse,
117
+ });
118
+ const result = await client.detokenize([]);
119
+ expect(result.content).toBe('');
120
+ });
121
+ it('should throw error when detokenization fails', async () => {
122
+ global.fetch.mockResolvedValueOnce({
123
+ ok: false,
124
+ status: 400,
125
+ statusText: 'Bad Request',
126
+ text: async () => 'Invalid tokens',
127
+ });
128
+ await expect(client.detokenize([1, 2, 3])).rejects.toThrow('Detokenize failed: 400 Bad Request - Invalid tokens');
129
+ });
130
+ });
131
+ describe('createEmbedding', () => {
132
+ it('should generate embedding for text', async () => {
133
+ const mockEmbedding = [0.1, 0.2, 0.3, 0.4];
134
+ const mockResponse = { embedding: mockEmbedding };
135
+ global.fetch.mockResolvedValueOnce({
136
+ ok: true,
137
+ json: async () => mockResponse,
138
+ });
139
+ const result = await client.createEmbedding('Hello world');
140
+ expect(global.fetch).toHaveBeenCalledWith(`${baseURL}/embedding`, {
141
+ method: 'POST',
142
+ headers: { 'Content-Type': 'application/json' },
143
+ body: JSON.stringify({ content: 'Hello world' }),
144
+ });
145
+ expect(result.embedding).toEqual(mockEmbedding);
146
+ });
147
+ it('should include image data when provided', async () => {
148
+ const mockResponse = { embedding: [0.1, 0.2] };
149
+ global.fetch.mockResolvedValueOnce({
150
+ ok: true,
151
+ json: async () => mockResponse,
152
+ });
153
+ const imageData = 'base64encodedimage';
154
+ await client.createEmbedding('Text with image', imageData);
155
+ expect(global.fetch).toHaveBeenCalledWith(`${baseURL}/embedding`, {
156
+ method: 'POST',
157
+ headers: { 'Content-Type': 'application/json' },
158
+ body: JSON.stringify({
159
+ content: 'Text with image',
160
+ image_data: imageData,
161
+ }),
162
+ });
163
+ });
164
+ it('should throw error when embedding fails', async () => {
165
+ global.fetch.mockResolvedValueOnce({
166
+ ok: false,
167
+ status: 500,
168
+ statusText: 'Server Error',
169
+ text: async () => 'Model error',
170
+ });
171
+ await expect(client.createEmbedding('test')).rejects.toThrow('Embedding failed: 500 Server Error - Model error');
172
+ });
173
+ });
174
+ describe('infill', () => {
175
+ it('should complete code between prefix and suffix', async () => {
176
+ const mockResponse = { content: ' return x + y;\n', tokens: [1, 2, 3], stop: true };
177
+ global.fetch.mockResolvedValueOnce({
178
+ ok: true,
179
+ json: async () => mockResponse,
180
+ });
181
+ const result = await client.infill('def add(x, y):\n', '\nprint(add(2, 3))');
182
+ expect(global.fetch).toHaveBeenCalledWith(`${baseURL}/infill`, {
183
+ method: 'POST',
184
+ headers: { 'Content-Type': 'application/json' },
185
+ body: JSON.stringify({
186
+ input_prefix: 'def add(x, y):\n',
187
+ input_suffix: '\nprint(add(2, 3))',
188
+ }),
189
+ });
190
+ expect(result.content).toBe(' return x + y;\n');
191
+ expect(result.stop).toBe(true);
192
+ });
193
+ it('should throw error when infill fails', async () => {
194
+ global.fetch.mockResolvedValueOnce({
195
+ ok: false,
196
+ status: 400,
197
+ statusText: 'Bad Request',
198
+ text: async () => 'Invalid code',
199
+ });
200
+ await expect(client.infill('prefix', 'suffix')).rejects.toThrow('Infill failed: 400 Bad Request - Invalid code');
201
+ });
202
+ });
203
+ describe('getProps', () => {
204
+ it('should retrieve server properties', async () => {
205
+ const mockResponse = {
206
+ assistant_name: 'Assistant',
207
+ user_name: 'User',
208
+ default_generation_settings: { temperature: 0.8 },
209
+ total_slots: 4,
210
+ };
211
+ global.fetch.mockResolvedValueOnce({
212
+ ok: true,
213
+ json: async () => mockResponse,
214
+ });
215
+ const result = await client.getProps();
216
+ expect(global.fetch).toHaveBeenCalledWith(`${baseURL}/props`);
217
+ expect(result).toEqual(mockResponse);
218
+ expect(result.total_slots).toBe(4);
219
+ });
220
+ it('should throw error when getting props fails', async () => {
221
+ global.fetch.mockResolvedValueOnce({
222
+ ok: false,
223
+ status: 404,
224
+ statusText: 'Not Found',
225
+ });
226
+ await expect(client.getProps()).rejects.toThrow('Get props failed: 404 Not Found');
227
+ });
228
+ });
229
+ describe('getMetrics', () => {
230
+ it('should retrieve JSON metrics', async () => {
231
+ const mockResponse = { requests_total: 100, tokens_generated: 50000 };
232
+ global.fetch.mockResolvedValueOnce({
233
+ ok: true,
234
+ headers: new Map([['content-type', 'application/json']]),
235
+ json: async () => mockResponse,
236
+ });
237
+ const result = await client.getMetrics();
238
+ expect(global.fetch).toHaveBeenCalledWith(`${baseURL}/metrics`);
239
+ expect(result).toEqual(mockResponse);
240
+ });
241
+ it('should handle Prometheus format metrics', async () => {
242
+ const prometheusText = '# HELP metric_name Description\nmetric_name 42\n';
243
+ global.fetch.mockResolvedValueOnce({
244
+ ok: true,
245
+ headers: new Map([['content-type', 'text/plain']]),
246
+ text: async () => prometheusText,
247
+ });
248
+ const result = await client.getMetrics();
249
+ expect(result).toEqual({ raw: prometheusText });
250
+ });
251
+ it('should throw error when getting metrics fails', async () => {
252
+ global.fetch.mockResolvedValueOnce({
253
+ ok: false,
254
+ status: 500,
255
+ statusText: 'Server Error',
256
+ });
257
+ await expect(client.getMetrics()).rejects.toThrow('Get metrics failed: 500 Server Error');
258
+ });
259
+ });
260
+ describe('getSlots', () => {
261
+ it('should retrieve slot information', async () => {
262
+ const mockResponse = {
263
+ slots: [
264
+ { id: 0, state: 1, prompt: 'Hello' },
265
+ { id: 1, state: 0 },
266
+ ],
267
+ };
268
+ global.fetch.mockResolvedValueOnce({
269
+ ok: true,
270
+ json: async () => mockResponse,
271
+ });
272
+ const result = await client.getSlots();
273
+ expect(global.fetch).toHaveBeenCalledWith(`${baseURL}/slots`);
274
+ expect(result.slots).toHaveLength(2);
275
+ expect(result.slots[0].id).toBe(0);
276
+ });
277
+ it('should throw specific error when endpoint is not enabled', async () => {
278
+ global.fetch.mockResolvedValueOnce({
279
+ ok: false,
280
+ status: 404,
281
+ statusText: 'Not Found',
282
+ });
283
+ await expect(client.getSlots()).rejects.toThrow('Slots endpoint not enabled. Start server with --slots flag to enable.');
284
+ });
285
+ it('should throw error for other failures', async () => {
286
+ global.fetch.mockResolvedValueOnce({
287
+ ok: false,
288
+ status: 500,
289
+ statusText: 'Server Error',
290
+ });
291
+ await expect(client.getSlots()).rejects.toThrow('Get slots failed: 500 Server Error');
292
+ });
293
+ });
294
+ });
@@ -6,6 +6,7 @@ import type { ILLMClientAdapter } from "./clients/types";
6
6
  */
7
7
  export declare const ADAPTER_CONSTRUCTORS: Partial<Record<ApiProviderId, new (config?: {
8
8
  baseURL?: string;
9
+ checkHealth?: boolean;
9
10
  }) => ILLMClientAdapter>>;
10
11
  /**
11
12
  * Optional configuration objects for each adapter
@@ -73,6 +74,17 @@ export declare function isProviderSupported(providerId: string): boolean;
73
74
  * @returns True if the model is supported for the provider
74
75
  */
75
76
  export declare function isModelSupported(modelId: string, providerId: string): boolean;
77
+ /**
78
+ * Creates a fallback ModelInfo for unknown/unregistered models
79
+ *
80
+ * Used when allowUnknownModels is enabled for a provider, or as a permissive
81
+ * fallback when strict validation is disabled. Provides sensible defaults.
82
+ *
83
+ * @param modelId - The model ID to create info for
84
+ * @param providerId - The provider ID
85
+ * @returns ModelInfo with default/placeholder values
86
+ */
87
+ export declare function createFallbackModelInfo(modelId: string, providerId: string): ModelInfo;
76
88
  /**
77
89
  * Gets merged default settings for a specific model and provider
78
90
  *
@@ -8,11 +8,13 @@ exports.getModelById = getModelById;
8
8
  exports.getModelsByProvider = getModelsByProvider;
9
9
  exports.isProviderSupported = isProviderSupported;
10
10
  exports.isModelSupported = isModelSupported;
11
+ exports.createFallbackModelInfo = createFallbackModelInfo;
11
12
  exports.getDefaultSettingsForModel = getDefaultSettingsForModel;
12
13
  exports.validateLLMSettings = validateLLMSettings;
13
14
  const OpenAIClientAdapter_1 = require("./clients/OpenAIClientAdapter");
14
15
  const AnthropicClientAdapter_1 = require("./clients/AnthropicClientAdapter");
15
16
  const GeminiClientAdapter_1 = require("./clients/GeminiClientAdapter");
17
+ const LlamaCppClientAdapter_1 = require("./clients/LlamaCppClientAdapter");
16
18
  // Placeholder for future imports:
17
19
  // import { MistralClientAdapter } from './clients/MistralClientAdapter';
18
20
  /**
@@ -23,6 +25,7 @@ exports.ADAPTER_CONSTRUCTORS = {
23
25
  openai: OpenAIClientAdapter_1.OpenAIClientAdapter,
24
26
  anthropic: AnthropicClientAdapter_1.AnthropicClientAdapter,
25
27
  gemini: GeminiClientAdapter_1.GeminiClientAdapter,
28
+ llamacpp: LlamaCppClientAdapter_1.LlamaCppClientAdapter,
26
29
  // 'mistral': MistralClientAdapter, // Uncomment and add when Mistral adapter is ready
27
30
  };
28
31
  /**
@@ -36,6 +39,9 @@ exports.ADAPTER_CONFIGS = {
36
39
  anthropic: {
37
40
  baseURL: process.env.ANTHROPIC_API_BASE_URL || undefined,
38
41
  },
42
+ llamacpp: {
43
+ baseURL: process.env.LLAMACPP_API_BASE_URL || 'http://localhost:8080',
44
+ },
39
45
  // 'gemini': { /* ... Gemini specific config ... */ },
40
46
  // 'mistral': { /* ... Mistral specific config ... */ },
41
47
  };
@@ -116,6 +122,16 @@ exports.SUPPORTED_PROVIDERS = [
116
122
  id: "mistral",
117
123
  name: "Mistral AI",
118
124
  },
125
+ {
126
+ id: "llamacpp",
127
+ name: "llama.cpp",
128
+ allowUnknownModels: true, // Users load arbitrary GGUF models with custom names
129
+ },
130
+ {
131
+ id: "mock",
132
+ name: "Mock Provider",
133
+ allowUnknownModels: true, // Test provider accepts any model
134
+ },
119
135
  ];
120
136
  /**
121
137
  * Supported LLM models with their configurations
@@ -412,6 +428,43 @@ exports.SUPPORTED_MODELS = [
412
428
  supportsImages: false,
413
429
  supportsPromptCache: false,
414
430
  },
431
+ // llama.cpp Models (examples - users can specify any loaded model)
432
+ {
433
+ id: "llama-3-8b-instruct",
434
+ name: "Llama 3 8B Instruct",
435
+ providerId: "llamacpp",
436
+ contextWindow: 8192,
437
+ inputPrice: 0.0,
438
+ outputPrice: 0.0,
439
+ description: "Local Llama 3 8B model via llama.cpp server",
440
+ maxTokens: 4096,
441
+ supportsImages: false,
442
+ supportsPromptCache: false,
443
+ },
444
+ {
445
+ id: "llama-3-70b-instruct",
446
+ name: "Llama 3 70B Instruct",
447
+ providerId: "llamacpp",
448
+ contextWindow: 8192,
449
+ inputPrice: 0.0,
450
+ outputPrice: 0.0,
451
+ description: "Local Llama 3 70B model via llama.cpp server",
452
+ maxTokens: 4096,
453
+ supportsImages: false,
454
+ supportsPromptCache: false,
455
+ },
456
+ {
457
+ id: "mistral-7b-instruct",
458
+ name: "Mistral 7B Instruct",
459
+ providerId: "llamacpp",
460
+ contextWindow: 32768,
461
+ inputPrice: 0.0,
462
+ outputPrice: 0.0,
463
+ description: "Local Mistral 7B model via llama.cpp server",
464
+ maxTokens: 4096,
465
+ supportsImages: false,
466
+ supportsPromptCache: false,
467
+ },
415
468
  ];
416
469
  /**
417
470
  * Gets provider information by ID
@@ -460,6 +513,30 @@ function isProviderSupported(providerId) {
460
513
  function isModelSupported(modelId, providerId) {
461
514
  return exports.SUPPORTED_MODELS.some((model) => model.id === modelId && model.providerId === providerId);
462
515
  }
516
+ /**
517
+ * Creates a fallback ModelInfo for unknown/unregistered models
518
+ *
519
+ * Used when allowUnknownModels is enabled for a provider, or as a permissive
520
+ * fallback when strict validation is disabled. Provides sensible defaults.
521
+ *
522
+ * @param modelId - The model ID to create info for
523
+ * @param providerId - The provider ID
524
+ * @returns ModelInfo with default/placeholder values
525
+ */
526
+ function createFallbackModelInfo(modelId, providerId) {
527
+ return {
528
+ id: modelId,
529
+ name: modelId,
530
+ providerId: providerId,
531
+ contextWindow: 4096,
532
+ maxTokens: 2048,
533
+ inputPrice: 0,
534
+ outputPrice: 0,
535
+ description: `Unknown model (using defaults)`,
536
+ supportsImages: false,
537
+ supportsPromptCache: false,
538
+ };
539
+ }
463
540
  /**
464
541
  * Gets merged default settings for a specific model and provider
465
542
  *
@@ -90,20 +90,20 @@ class ModelResolver {
90
90
  }
91
91
  };
92
92
  }
93
- const modelInfo = (0, config_1.getModelById)(options.modelId, options.providerId);
93
+ let modelInfo = (0, config_1.getModelById)(options.modelId, options.providerId);
94
94
  if (!modelInfo) {
95
- return {
96
- error: {
97
- provider: options.providerId,
98
- model: options.modelId,
99
- error: {
100
- message: `Unsupported model: ${options.modelId} for provider: ${options.providerId}`,
101
- code: 'UNSUPPORTED_MODEL',
102
- type: 'validation_error',
103
- },
104
- object: 'error',
105
- }
106
- };
95
+ // Check if provider allows unknown models
96
+ const provider = (0, config_1.getProviderById)(options.providerId);
97
+ if (provider?.allowUnknownModels) {
98
+ // Flexible provider (e.g., llamacpp) - silent fallback
99
+ modelInfo = (0, config_1.createFallbackModelInfo)(options.modelId, options.providerId);
100
+ }
101
+ else {
102
+ // Strict provider - warn but allow
103
+ console.warn(`⚠️ Unknown model "${options.modelId}" for provider "${options.providerId}". ` +
104
+ `Using default settings. This may fail at the provider API if the model doesn't exist.`);
105
+ modelInfo = (0, config_1.createFallbackModelInfo)(options.modelId, options.providerId);
106
+ }
107
107
  }
108
108
  return {
109
109
  providerId: options.providerId,