genai-lite 0.3.3 โ†’ 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,10 +1,11 @@
1
1
  # genai-lite
2
2
 
3
- A lightweight, portable Node.js/TypeScript library providing a unified interface for interacting with multiple Generative AI providers (OpenAI, Anthropic, Google Gemini, Mistral, and more).
3
+ A lightweight, portable Node.js/TypeScript library providing a unified interface for interacting with multiple Generative AI providersโ€”both cloud-based (OpenAI, Anthropic, Google Gemini, Mistral) and local (llama.cpp).
4
4
 
5
5
  ## Features
6
6
 
7
7
  - ๐Ÿ”Œ **Unified API** - Single interface for multiple AI providers
8
+ - ๐Ÿ  **Local & Cloud Models** - Run models locally with llama.cpp or use cloud APIs
8
9
  - ๐Ÿ” **Flexible API Key Management** - Bring your own key storage solution
9
10
  - ๐Ÿ“ฆ **Zero Electron Dependencies** - Works in any Node.js environment
10
11
  - ๐ŸŽฏ **TypeScript First** - Full type safety and IntelliSense support
@@ -21,13 +22,14 @@ npm install genai-lite
21
22
 
22
23
  ## Quick Start
23
24
 
25
+ ### Cloud Providers (OpenAI, Anthropic, Gemini, Mistral)
26
+
24
27
  ```typescript
25
28
  import { LLMService, fromEnvironment } from 'genai-lite';
26
29
 
27
30
  // Create service with environment variable API key provider
28
31
  const llmService = new LLMService(fromEnvironment);
29
32
 
30
- // Option 1: Direct message sending
31
33
  const response = await llmService.sendMessage({
32
34
  providerId: 'openai',
33
35
  modelId: 'gpt-4.1-mini',
@@ -37,26 +39,47 @@ const response = await llmService.sendMessage({
37
39
  ]
38
40
  });
39
41
 
40
- // Option 2: Create messages from template (recommended for complex prompts)
41
- const { messages } = await llmService.createMessages({
42
- template: '<SYSTEM>You are a helpful assistant.</SYSTEM><USER>Hello, how are you?</USER>',
43
- providerId: 'openai',
44
- modelId: 'gpt-4.1-mini'
45
- });
42
+ if (response.object === 'chat.completion') {
43
+ console.log(response.choices[0].message.content);
44
+ } else {
45
+ console.error('Error:', response.error.message);
46
+ }
47
+ ```
46
48
 
47
- const response2 = await llmService.sendMessage({
48
- providerId: 'openai',
49
- modelId: 'gpt-4.1-mini',
50
- messages
49
+ ### Local Models (llama.cpp)
50
+
51
+ ```typescript
52
+ import { LLMService } from 'genai-lite';
53
+
54
+ // Start llama.cpp server first: llama-server -m /path/to/model.gguf --port 8080
55
+ const llmService = new LLMService(async () => 'not-needed');
56
+
57
+ const response = await llmService.sendMessage({
58
+ providerId: 'llamacpp',
59
+ modelId: 'llama-3-8b-instruct', // Must match your loaded model
60
+ messages: [
61
+ { role: 'system', content: 'You are a helpful assistant.' },
62
+ { role: 'user', content: 'Explain quantum computing briefly.' }
63
+ ]
51
64
  });
52
65
 
53
66
  if (response.object === 'chat.completion') {
54
67
  console.log(response.choices[0].message.content);
55
- } else {
56
- console.error('Error:', response.error.message);
57
68
  }
58
69
  ```
59
70
 
71
+ See the [llama.cpp Integration](#llamacpp-integration) section for setup details.
72
+
73
+ ## Example Application
74
+
75
+ For a complete, production-ready example showcasing all genai-lite capabilities, see the **[chat-demo](examples/chat-demo)** interactive web application. The demo includes:
76
+ - Multi-provider chat interface with all supported providers
77
+ - Template rendering and model presets
78
+ - llama.cpp utilities (tokenization, embeddings, health checks)
79
+ - Settings persistence, export/import features
80
+
81
+ The chat-demo serves as both a comprehensive showcase and a quick-test environment for library changes.
82
+
60
83
  ## API Key Management
61
84
 
62
85
  genai-lite uses a flexible API key provider pattern. You can use the built-in environment variable provider or create your own:
@@ -124,6 +147,64 @@ const llmService = new LLMService(myKeyProvider);
124
147
  - `codestral-2501` - Specialized for code generation
125
148
  - `devstral-small-2505` - Compact development-focused model
126
149
 
150
+ ### llama.cpp (Local Models)
151
+
152
+ Run models locally via [llama.cpp](https://github.com/ggml-org/llama.cpp) server. Model IDs can be any nameโ€”they're not validated since you load your own GGUF models.
153
+
154
+ **Example models:**
155
+ - `llama-3-8b-instruct` - Llama 3 8B Instruct
156
+ - `llama-3-70b-instruct` - Llama 3 70B Instruct
157
+ - `mistral-7b-instruct` - Mistral 7B Instruct
158
+ - `my-custom-model` - Any custom model you've loaded
159
+
160
+ **Setup:**
161
+
162
+ 1. Start llama.cpp server with your model:
163
+ ```bash
164
+ llama-server -m /path/to/model.gguf --port 8080
165
+ ```
166
+
167
+ 2. Use with genai-lite (no API key needed):
168
+ ```typescript
169
+ import { LLMService } from 'genai-lite';
170
+
171
+ // API key can be any string for llama.cpp
172
+ const service = new LLMService(async () => 'not-needed');
173
+
174
+ const response = await service.sendMessage({
175
+ providerId: 'llamacpp',
176
+ modelId: 'llama-3-8b-instruct', // Must match your loaded model name
177
+ messages: [{ role: 'user', content: 'Hello!' }]
178
+ });
179
+ ```
180
+
181
+ 3. Configure server URL via environment variable:
182
+ ```bash
183
+ export LLAMACPP_API_BASE_URL=http://localhost:8080
184
+ ```
185
+
186
+ **Advanced features** - Access non-LLM endpoints:
187
+
188
+ ```typescript
189
+ import { LlamaCppServerClient } from 'genai-lite';
190
+
191
+ const client = new LlamaCppServerClient('http://localhost:8080');
192
+
193
+ // Check server health
194
+ const health = await client.getHealth();
195
+
196
+ // Tokenize text
197
+ const { tokens } = await client.tokenize('Hello world');
198
+
199
+ // Generate embeddings
200
+ const { embedding } = await client.createEmbedding('Some text');
201
+
202
+ // Code completion
203
+ const result = await client.infill('def hello():\n', '\nprint("done")');
204
+ ```
205
+
206
+ See the [llama.cpp Integration](#llamacpp-integration) section for details.
207
+
127
208
  ### Models with Reasoning Support
128
209
 
129
210
  Some models include advanced reasoning/thinking capabilities that enhance their problem-solving abilities:
@@ -666,6 +747,261 @@ if (response.object === 'error') {
666
747
  }
667
748
  ```
668
749
 
750
+ ## llama.cpp Integration
751
+
752
+ `genai-lite` provides comprehensive support for running local LLMs via [llama.cpp](https://github.com/ggml-org/llama.cpp) server, enabling completely offline AI capabilities with the same unified interface.
753
+
754
+ ### Why llama.cpp?
755
+
756
+ - **Privacy**: All model inference runs locally on your hardware
757
+ - **Cost**: No API costs after initial model download
758
+ - **Control**: Use any GGUF model from Hugging Face
759
+ - **Performance**: Optimized C++ implementation with hardware acceleration
760
+
761
+ ### Setup
762
+
763
+ #### 1. Install llama.cpp
764
+
765
+ ```bash
766
+ # Clone and build llama.cpp
767
+ git clone https://github.com/ggml-org/llama.cpp
768
+ cd llama.cpp
769
+ make
770
+
771
+ # Or download pre-built binaries from releases
772
+ ```
773
+
774
+ #### 2. Download a Model
775
+
776
+ Get GGUF models from Hugging Face, for example:
777
+ - [Meta-Llama-3.1-8B-Instruct-GGUF](https://huggingface.co/bartowski/Meta-Llama-3.1-8B-Instruct-GGUF)
778
+ - [Mistral-7B-Instruct-v0.3-GGUF](https://huggingface.co/bartowski/Mistral-7B-Instruct-v0.3-GGUF)
779
+
780
+ #### 3. Start the Server
781
+
782
+ ```bash
783
+ # Basic usage
784
+ llama-server -m /path/to/model.gguf --port 8080
785
+
786
+ # With more options
787
+ llama-server -m /path/to/model.gguf \
788
+ --port 8080 \
789
+ -c 4096 \ # Context size
790
+ -np 4 \ # Parallel requests
791
+ --threads 8 # CPU threads
792
+ ```
793
+
794
+ ### Basic Usage
795
+
796
+ ```typescript
797
+ import { LLMService } from 'genai-lite';
798
+
799
+ // llama.cpp doesn't need API keys
800
+ const service = new LLMService(async () => 'not-needed');
801
+
802
+ const response = await service.sendMessage({
803
+ providerId: 'llamacpp',
804
+ modelId: 'llama-3-8b-instruct', // Arbitrary name matching your model
805
+ messages: [
806
+ { role: 'system', content: 'You are a helpful assistant.' },
807
+ { role: 'user', content: 'Explain quantum computing in simple terms.' }
808
+ ],
809
+ settings: {
810
+ temperature: 0.7,
811
+ maxTokens: 500
812
+ }
813
+ });
814
+
815
+ if (response.object === 'chat.completion') {
816
+ console.log(response.choices[0].message.content);
817
+ }
818
+ ```
819
+
820
+ ### Configuration
821
+
822
+ #### Environment Variable
823
+
824
+ Set the server URL via environment variable (default: `http://localhost:8080`):
825
+
826
+ ```bash
827
+ export LLAMACPP_API_BASE_URL=http://localhost:8080
828
+ ```
829
+
830
+ #### Multiple Servers
831
+
832
+ Register multiple llama.cpp instances for different models:
833
+
834
+ ```typescript
835
+ import { LLMService, LlamaCppClientAdapter } from 'genai-lite';
836
+
837
+ const service = new LLMService(async () => 'not-needed');
838
+
839
+ // Register adapters for different servers/models
840
+ service.registerAdapter(
841
+ 'llamacpp-small',
842
+ new LlamaCppClientAdapter({ baseURL: 'http://localhost:8080' })
843
+ );
844
+
845
+ service.registerAdapter(
846
+ 'llamacpp-large',
847
+ new LlamaCppClientAdapter({ baseURL: 'http://localhost:8081' })
848
+ );
849
+
850
+ // Use them
851
+ const response = await service.sendMessage({
852
+ providerId: 'llamacpp-small',
853
+ modelId: 'llama-3-8b',
854
+ messages: [{ role: 'user', content: 'Hello!' }]
855
+ });
856
+ ```
857
+
858
+ #### Health Checking
859
+
860
+ Enable automatic health checks before requests:
861
+
862
+ ```typescript
863
+ import { LlamaCppClientAdapter } from 'genai-lite';
864
+
865
+ const adapter = new LlamaCppClientAdapter({
866
+ baseURL: 'http://localhost:8080',
867
+ checkHealth: true // Check server status before each request
868
+ });
869
+
870
+ service.registerAdapter('llamacpp', adapter);
871
+ ```
872
+
873
+ ### Advanced Features
874
+
875
+ #### Server Management
876
+
877
+ The `LlamaCppServerClient` class provides access to all llama.cpp server endpoints:
878
+
879
+ ```typescript
880
+ import { LlamaCppServerClient } from 'genai-lite';
881
+
882
+ const client = new LlamaCppServerClient('http://localhost:8080');
883
+
884
+ // Health monitoring
885
+ const health = await client.getHealth();
886
+ console.log(health.status); // 'ok', 'loading', or 'error'
887
+
888
+ // Server properties
889
+ const props = await client.getProps();
890
+ console.log(props.total_slots); // Number of available slots
891
+
892
+ // Performance metrics (if enabled)
893
+ const metrics = await client.getMetrics();
894
+ ```
895
+
896
+ #### Tokenization
897
+
898
+ ```typescript
899
+ const client = new LlamaCppServerClient('http://localhost:8080');
900
+
901
+ // Tokenize text
902
+ const { tokens } = await client.tokenize('Hello, world!');
903
+ console.log(tokens); // [123, 456, 789]
904
+
905
+ // Count tokens before sending to LLM
906
+ const prompt = 'Long text...';
907
+ const { tokens: promptTokens } = await client.tokenize(prompt);
908
+ if (promptTokens.length > 4000) {
909
+ console.log('Prompt too long, truncating...');
910
+ }
911
+
912
+ // Detokenize back to text
913
+ const { content } = await client.detokenize([123, 456, 789]);
914
+ console.log(content); // 'Hello, world!'
915
+ ```
916
+
917
+ #### Text Embeddings
918
+
919
+ ```typescript
920
+ const client = new LlamaCppServerClient('http://localhost:8080');
921
+
922
+ // Generate embeddings for semantic search
923
+ const { embedding } = await client.createEmbedding('Search query text');
924
+ console.log(embedding.length); // e.g., 768 dimensions
925
+
926
+ // With images (for multimodal models)
927
+ const { embedding: multimodalEmbed } = await client.createEmbedding(
928
+ 'Describe this image',
929
+ 'base64_image_data_here'
930
+ );
931
+ ```
932
+
933
+ #### Code Infilling
934
+
935
+ Perfect for code completion in IDEs:
936
+
937
+ ```typescript
938
+ const client = new LlamaCppServerClient('http://localhost:8080');
939
+
940
+ const result = await client.infill(
941
+ 'def calculate_fibonacci(n):\n ', // Prefix (before cursor)
942
+ '\n return result' // Suffix (after cursor)
943
+ );
944
+
945
+ console.log(result.content);
946
+ // Output: "if n <= 1:\n return n\n result = calculate_fibonacci(n-1) + calculate_fibonacci(n-2)"
947
+ ```
948
+
949
+ ### Error Handling
950
+
951
+ ```typescript
952
+ const response = await service.sendMessage({
953
+ providerId: 'llamacpp',
954
+ modelId: 'my-model',
955
+ messages: [{ role: 'user', content: 'Hello' }]
956
+ });
957
+
958
+ if (response.object === 'error') {
959
+ switch (response.error.code) {
960
+ case 'NETWORK_ERROR':
961
+ console.error('Server not running or unreachable');
962
+ break;
963
+ case 'PROVIDER_ERROR':
964
+ console.error('Server error:', response.error.message);
965
+ break;
966
+ default:
967
+ console.error('Unknown error:', response.error);
968
+ }
969
+ }
970
+ ```
971
+
972
+ ### Best Practices
973
+
974
+ 1. **Model Naming**: Use descriptive model IDs (e.g., `llama-3-8b-instruct`) since llama.cpp accepts any name
975
+ 2. **Context Size**: Set appropriate context (`-c` flag) when starting the server
976
+ 3. **Parallel Requests**: Configure slots (`-np`) based on your hardware
977
+ 4. **Health Monitoring**: Enable `checkHealth` for production to detect server issues early
978
+ 5. **Resource Management**: Monitor memory usage; large models need significant RAM
979
+
980
+ ### Troubleshooting
981
+
982
+ **Server not responding:**
983
+ ```bash
984
+ # Check if server is running
985
+ curl http://localhost:8080/health
986
+
987
+ # Should return: {"status":"ok"}
988
+ ```
989
+
990
+ **Model loading errors:**
991
+ ```bash
992
+ # Increase memory or reduce context size
993
+ llama-server -m model.gguf --port 8080 -c 2048
994
+ ```
995
+
996
+ **Slow responses:**
997
+ ```bash
998
+ # Use quantized models (smaller but faster)
999
+ # e.g., Q4_K_M, Q5_K_M instead of F16
1000
+
1001
+ # Increase threads
1002
+ llama-server -m model.gguf --threads 16
1003
+ ```
1004
+
669
1005
  ## Using with Electron
670
1006
 
671
1007
  `genai-lite` is designed to work seamlessly within an Electron application's main process, especially when paired with a secure storage solution like `genai-key-storage-lite`.
@@ -725,6 +1061,26 @@ import type {
725
1061
  CreateMessagesResult,
726
1062
  TemplateMetadata
727
1063
  } from 'genai-lite';
1064
+
1065
+ // llama.cpp integration types and classes
1066
+ import {
1067
+ LlamaCppClientAdapter,
1068
+ LlamaCppServerClient,
1069
+ createFallbackModelInfo
1070
+ } from 'genai-lite';
1071
+
1072
+ import type {
1073
+ LlamaCppClientConfig,
1074
+ LlamaCppHealthResponse,
1075
+ LlamaCppTokenizeResponse,
1076
+ LlamaCppDetokenizeResponse,
1077
+ LlamaCppEmbeddingResponse,
1078
+ LlamaCppInfillResponse,
1079
+ LlamaCppPropsResponse,
1080
+ LlamaCppMetricsResponse,
1081
+ LlamaCppSlot,
1082
+ LlamaCppSlotsResponse
1083
+ } from 'genai-lite';
728
1084
  ```
729
1085
 
730
1086
  ## Utilities
@@ -1106,6 +1462,10 @@ These utilities enable:
1106
1462
  - **Template Reusability**: Define templates once, use with different variables
1107
1463
  - **Type Safety**: Full TypeScript support with LLMMessage types
1108
1464
 
1465
+ ## Examples
1466
+
1467
+ See the **[chat-demo](examples/chat-demo)** application for a complete working example that demonstrates all library features in a production-ready React + Express application.
1468
+
1109
1469
  ## Contributing
1110
1470
 
1111
1471
  Contributions are welcome! Please feel free to submit a Pull Request. For major changes, please open an issue first to discuss what you would like to change.
package/dist/index.d.ts CHANGED
@@ -5,7 +5,12 @@ export type { ModelPreset } from "./types/presets";
5
5
  export * from "./llm/types";
6
6
  export * from "./llm/clients/types";
7
7
  export { fromEnvironment } from "./providers/fromEnvironment";
8
+ export { LlamaCppClientAdapter } from "./llm/clients/LlamaCppClientAdapter";
9
+ export { LlamaCppServerClient } from "./llm/clients/LlamaCppServerClient";
10
+ export type { LlamaCppClientConfig, } from "./llm/clients/LlamaCppClientAdapter";
11
+ export type { LlamaCppHealthResponse, LlamaCppTokenizeResponse, LlamaCppDetokenizeResponse, LlamaCppEmbeddingResponse, LlamaCppInfillResponse, LlamaCppPropsResponse, LlamaCppMetricsResponse, LlamaCppSlot, LlamaCppSlotsResponse, } from "./llm/clients/LlamaCppServerClient";
8
12
  export { renderTemplate } from "./prompting/template";
9
13
  export { countTokens, getSmartPreview, extractRandomVariables } from "./prompting/content";
10
14
  export { parseStructuredContent, parseRoleTags, extractInitialTaggedContent, parseTemplateWithMetadata } from "./prompting/parser";
11
15
  export type { TemplateMetadata } from "./prompting/parser";
16
+ export { createFallbackModelInfo } from "./llm/config";
package/dist/index.js CHANGED
@@ -14,7 +14,7 @@ var __exportStar = (this && this.__exportStar) || function(m, exports) {
14
14
  for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
15
15
  };
16
16
  Object.defineProperty(exports, "__esModule", { value: true });
17
- exports.parseTemplateWithMetadata = exports.extractInitialTaggedContent = exports.parseRoleTags = exports.parseStructuredContent = exports.extractRandomVariables = exports.getSmartPreview = exports.countTokens = exports.renderTemplate = exports.fromEnvironment = exports.LLMService = void 0;
17
+ exports.createFallbackModelInfo = exports.parseTemplateWithMetadata = exports.extractInitialTaggedContent = exports.parseRoleTags = exports.parseStructuredContent = exports.extractRandomVariables = exports.getSmartPreview = exports.countTokens = exports.renderTemplate = exports.LlamaCppServerClient = exports.LlamaCppClientAdapter = exports.fromEnvironment = exports.LLMService = void 0;
18
18
  // --- LLM Service ---
19
19
  var LLMService_1 = require("./llm/LLMService");
20
20
  Object.defineProperty(exports, "LLMService", { enumerable: true, get: function () { return LLMService_1.LLMService; } });
@@ -25,6 +25,11 @@ __exportStar(require("./llm/clients/types"), exports);
25
25
  // --- API Key Providers ---
26
26
  var fromEnvironment_1 = require("./providers/fromEnvironment");
27
27
  Object.defineProperty(exports, "fromEnvironment", { enumerable: true, get: function () { return fromEnvironment_1.fromEnvironment; } });
28
+ // --- llama.cpp Integration ---
29
+ var LlamaCppClientAdapter_1 = require("./llm/clients/LlamaCppClientAdapter");
30
+ Object.defineProperty(exports, "LlamaCppClientAdapter", { enumerable: true, get: function () { return LlamaCppClientAdapter_1.LlamaCppClientAdapter; } });
31
+ var LlamaCppServerClient_1 = require("./llm/clients/LlamaCppServerClient");
32
+ Object.defineProperty(exports, "LlamaCppServerClient", { enumerable: true, get: function () { return LlamaCppServerClient_1.LlamaCppServerClient; } });
28
33
  // --- Utilities ---
29
34
  var template_1 = require("./prompting/template");
30
35
  Object.defineProperty(exports, "renderTemplate", { enumerable: true, get: function () { return template_1.renderTemplate; } });
@@ -37,3 +42,5 @@ Object.defineProperty(exports, "parseStructuredContent", { enumerable: true, get
37
42
  Object.defineProperty(exports, "parseRoleTags", { enumerable: true, get: function () { return parser_1.parseRoleTags; } });
38
43
  Object.defineProperty(exports, "extractInitialTaggedContent", { enumerable: true, get: function () { return parser_1.extractInitialTaggedContent; } });
39
44
  Object.defineProperty(exports, "parseTemplateWithMetadata", { enumerable: true, get: function () { return parser_1.parseTemplateWithMetadata; } });
45
+ var config_1 = require("./llm/config");
46
+ Object.defineProperty(exports, "createFallbackModelInfo", { enumerable: true, get: function () { return config_1.createFallbackModelInfo; } });
@@ -44,17 +44,34 @@ describe('LLMService', () => {
44
44
  expect(errorResponse.error.code).toBe('UNSUPPORTED_PROVIDER');
45
45
  expect(errorResponse.error.message).toContain('Unsupported provider');
46
46
  });
47
- it('should return validation error for unsupported model', async () => {
47
+ it('should succeed with fallback for unknown model', async () => {
48
48
  const request = {
49
- providerId: 'openai',
49
+ providerId: 'mock', // Use mock provider to avoid real API calls
50
50
  modelId: 'unsupported-model',
51
51
  messages: [{ role: 'user', content: 'Hello' }]
52
52
  };
53
53
  const response = await service.sendMessage(request);
54
- expect(response.object).toBe('error');
55
- const errorResponse = response;
56
- expect(errorResponse.error.code).toBe('UNSUPPORTED_MODEL');
57
- expect(errorResponse.error.message).toContain('Unsupported model');
54
+ // Should succeed with mock response (not error) even for unknown model
55
+ expect(response.object).toBe('chat.completion');
56
+ });
57
+ it('should silently work with flexible providers unknown models (no warning)', async () => {
58
+ const warnings = [];
59
+ const consoleWarnSpy = jest.spyOn(console, 'warn').mockImplementation((msg) => {
60
+ warnings.push(msg);
61
+ });
62
+ // Test with mock provider (which has allowUnknownModels: true)
63
+ const request = {
64
+ providerId: 'mock',
65
+ modelId: 'totally-unknown-model-xyz',
66
+ messages: [{ role: 'user', content: 'Testing flexible provider' }]
67
+ };
68
+ const response = await service.sendMessage(request);
69
+ // Should succeed with mock response
70
+ expect(response.object).toBe('chat.completion');
71
+ // Should NOT warn about unknown model (filter out adapter constructor warnings)
72
+ const unknownModelWarnings = warnings.filter(w => !w.includes('No adapter constructor'));
73
+ expect(unknownModelWarnings.length).toBe(0); // No warnings for flexible providers
74
+ consoleWarnSpy.mockRestore();
58
75
  });
59
76
  it('should return validation error for empty messages', async () => {
60
77
  const request = {
@@ -160,8 +177,8 @@ describe('LLMService', () => {
160
177
  // Second request to same provider
161
178
  request.messages = [{ role: 'user', content: 'Second request' }];
162
179
  await service.sendMessage(request);
163
- // API key provider should be called for each request with mock provider
164
- expect(mockApiKeyProvider).toHaveBeenCalledTimes(0); // Mock provider doesn't need API keys
180
+ // API key provider should be called once per unique provider (mock provider now registered)
181
+ expect(mockApiKeyProvider).toHaveBeenCalledTimes(2);
165
182
  });
166
183
  });
167
184
  describe('settings management', () => {
@@ -325,11 +342,13 @@ describe('LLMService', () => {
325
342
  describe('getProviders', () => {
326
343
  it('should return all supported providers', async () => {
327
344
  const providers = await service.getProviders();
328
- expect(providers).toHaveLength(4);
345
+ expect(providers).toHaveLength(6);
329
346
  expect(providers.find(p => p.id === 'openai')).toBeDefined();
330
347
  expect(providers.find(p => p.id === 'anthropic')).toBeDefined();
331
348
  expect(providers.find(p => p.id === 'gemini')).toBeDefined();
332
349
  expect(providers.find(p => p.id === 'mistral')).toBeDefined();
350
+ expect(providers.find(p => p.id === 'llamacpp')).toBeDefined();
351
+ expect(providers.find(p => p.id === 'mock')).toBeDefined();
333
352
  });
334
353
  it('should include provider metadata', async () => {
335
354
  const providers = await service.getProviders();
@@ -0,0 +1,116 @@
1
+ import type { LLMResponse, LLMFailureResponse } from "../types";
2
+ import type { ILLMClientAdapter, InternalLLMChatRequest } from "./types";
3
+ import { LlamaCppServerClient } from "./LlamaCppServerClient";
4
+ /**
5
+ * Configuration options for LlamaCppClientAdapter
6
+ */
7
+ export interface LlamaCppClientConfig {
8
+ /** Base URL of the llama.cpp server (default: http://localhost:8080) */
9
+ baseURL?: string;
10
+ /** Whether to check server health before sending requests (default: false) */
11
+ checkHealth?: boolean;
12
+ }
13
+ /**
14
+ * Client adapter for llama.cpp server integration
15
+ *
16
+ * This adapter provides integration with llama.cpp server via its OpenAI-compatible
17
+ * /v1/chat/completions endpoint. It uses the OpenAI SDK internally, making it compatible
18
+ * with llama.cpp's OpenAI-compatible API.
19
+ *
20
+ * Key features:
21
+ * - Uses llama.cpp's OpenAI-compatible chat completions endpoint
22
+ * - Optional health check before requests
23
+ * - No API key required (llama.cpp is a local server)
24
+ * - Supports all standard LLM settings
25
+ *
26
+ * Note: Model IDs are not validated against a predefined list since llama.cpp
27
+ * serves whatever model is loaded. Users must specify the correct model name.
28
+ *
29
+ * @example
30
+ * ```typescript
31
+ * // Create adapter for local server
32
+ * const adapter = new LlamaCppClientAdapter({
33
+ * baseURL: 'http://localhost:8080',
34
+ * checkHealth: true
35
+ * });
36
+ *
37
+ * // Register with LLMService
38
+ * service.registerAdapter('llamacpp', adapter);
39
+ *
40
+ * // Use via LLMService
41
+ * const response = await service.sendMessage({
42
+ * providerId: 'llamacpp',
43
+ * modelId: 'llama-3-8b-instruct',
44
+ * messages: [{ role: 'user', content: 'Hello!' }]
45
+ * });
46
+ * ```
47
+ */
48
+ export declare class LlamaCppClientAdapter implements ILLMClientAdapter {
49
+ private baseURL;
50
+ private checkHealth;
51
+ private serverClient;
52
+ /**
53
+ * Creates a new llama.cpp client adapter
54
+ *
55
+ * @param config Optional configuration for the adapter
56
+ */
57
+ constructor(config?: LlamaCppClientConfig);
58
+ /**
59
+ * Sends a chat message to llama.cpp server
60
+ *
61
+ * @param request - The internal LLM request with applied settings
62
+ * @param apiKey - Not used for llama.cpp (local server), but kept for interface compatibility
63
+ * @returns Promise resolving to success or failure response
64
+ */
65
+ sendMessage(request: InternalLLMChatRequest, apiKey: string): Promise<LLMResponse | LLMFailureResponse>;
66
+ /**
67
+ * Validates API key format
68
+ *
69
+ * For llama.cpp, API keys are not required, so this always returns true.
70
+ * The method is implemented for interface compatibility.
71
+ *
72
+ * @param apiKey - The API key (ignored)
73
+ * @returns Always true
74
+ */
75
+ validateApiKey(apiKey: string): boolean;
76
+ /**
77
+ * Gets adapter information
78
+ */
79
+ getAdapterInfo(): {
80
+ providerId: "llamacpp";
81
+ name: string;
82
+ version: string;
83
+ baseURL: string;
84
+ };
85
+ /**
86
+ * Gets the underlying server client for advanced operations
87
+ *
88
+ * This allows access to non-LLM endpoints like tokenize, embedding, health, etc.
89
+ *
90
+ * @returns The LlamaCppServerClient instance
91
+ */
92
+ getServerClient(): LlamaCppServerClient;
93
+ /**
94
+ * Formats messages for OpenAI-compatible API
95
+ *
96
+ * @param request - The internal LLM request
97
+ * @returns Formatted messages array
98
+ */
99
+ private formatMessages;
100
+ /**
101
+ * Creates a standardized success response from llama.cpp's response
102
+ *
103
+ * @param completion - Raw OpenAI-compatible completion response
104
+ * @param request - Original request for context
105
+ * @returns Standardized LLM response
106
+ */
107
+ private createSuccessResponse;
108
+ /**
109
+ * Creates a standardized error response from an error
110
+ *
111
+ * @param error - The error that occurred
112
+ * @param request - Original request for context
113
+ * @returns Standardized LLM failure response
114
+ */
115
+ private createErrorResponse;
116
+ }