genai-lite 0.3.3 โ 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +374 -14
- package/dist/index.d.ts +5 -0
- package/dist/index.js +8 -1
- package/dist/llm/LLMService.test.js +28 -9
- package/dist/llm/clients/LlamaCppClientAdapter.d.ts +116 -0
- package/dist/llm/clients/LlamaCppClientAdapter.js +289 -0
- package/dist/llm/clients/LlamaCppClientAdapter.test.d.ts +1 -0
- package/dist/llm/clients/LlamaCppClientAdapter.test.js +447 -0
- package/dist/llm/clients/LlamaCppServerClient.d.ts +161 -0
- package/dist/llm/clients/LlamaCppServerClient.js +192 -0
- package/dist/llm/clients/LlamaCppServerClient.test.d.ts +1 -0
- package/dist/llm/clients/LlamaCppServerClient.test.js +294 -0
- package/dist/llm/config.d.ts +12 -0
- package/dist/llm/config.js +77 -0
- package/dist/llm/services/ModelResolver.js +13 -13
- package/dist/llm/services/ModelResolver.test.js +25 -4
- package/dist/llm/types.d.ts +6 -0
- package/dist/providers/fromEnvironment.d.ts +4 -0
- package/dist/providers/fromEnvironment.js +8 -0
- package/dist/providers/fromEnvironment.test.js +13 -0
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
# genai-lite
|
|
2
2
|
|
|
3
|
-
A lightweight, portable Node.js/TypeScript library providing a unified interface for interacting with multiple Generative AI providers (OpenAI, Anthropic, Google Gemini, Mistral
|
|
3
|
+
A lightweight, portable Node.js/TypeScript library providing a unified interface for interacting with multiple Generative AI providersโboth cloud-based (OpenAI, Anthropic, Google Gemini, Mistral) and local (llama.cpp).
|
|
4
4
|
|
|
5
5
|
## Features
|
|
6
6
|
|
|
7
7
|
- ๐ **Unified API** - Single interface for multiple AI providers
|
|
8
|
+
- ๐ **Local & Cloud Models** - Run models locally with llama.cpp or use cloud APIs
|
|
8
9
|
- ๐ **Flexible API Key Management** - Bring your own key storage solution
|
|
9
10
|
- ๐ฆ **Zero Electron Dependencies** - Works in any Node.js environment
|
|
10
11
|
- ๐ฏ **TypeScript First** - Full type safety and IntelliSense support
|
|
@@ -21,13 +22,14 @@ npm install genai-lite
|
|
|
21
22
|
|
|
22
23
|
## Quick Start
|
|
23
24
|
|
|
25
|
+
### Cloud Providers (OpenAI, Anthropic, Gemini, Mistral)
|
|
26
|
+
|
|
24
27
|
```typescript
|
|
25
28
|
import { LLMService, fromEnvironment } from 'genai-lite';
|
|
26
29
|
|
|
27
30
|
// Create service with environment variable API key provider
|
|
28
31
|
const llmService = new LLMService(fromEnvironment);
|
|
29
32
|
|
|
30
|
-
// Option 1: Direct message sending
|
|
31
33
|
const response = await llmService.sendMessage({
|
|
32
34
|
providerId: 'openai',
|
|
33
35
|
modelId: 'gpt-4.1-mini',
|
|
@@ -37,26 +39,47 @@ const response = await llmService.sendMessage({
|
|
|
37
39
|
]
|
|
38
40
|
});
|
|
39
41
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
42
|
+
if (response.object === 'chat.completion') {
|
|
43
|
+
console.log(response.choices[0].message.content);
|
|
44
|
+
} else {
|
|
45
|
+
console.error('Error:', response.error.message);
|
|
46
|
+
}
|
|
47
|
+
```
|
|
46
48
|
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
49
|
+
### Local Models (llama.cpp)
|
|
50
|
+
|
|
51
|
+
```typescript
|
|
52
|
+
import { LLMService } from 'genai-lite';
|
|
53
|
+
|
|
54
|
+
// Start llama.cpp server first: llama-server -m /path/to/model.gguf --port 8080
|
|
55
|
+
const llmService = new LLMService(async () => 'not-needed');
|
|
56
|
+
|
|
57
|
+
const response = await llmService.sendMessage({
|
|
58
|
+
providerId: 'llamacpp',
|
|
59
|
+
modelId: 'llama-3-8b-instruct', // Must match your loaded model
|
|
60
|
+
messages: [
|
|
61
|
+
{ role: 'system', content: 'You are a helpful assistant.' },
|
|
62
|
+
{ role: 'user', content: 'Explain quantum computing briefly.' }
|
|
63
|
+
]
|
|
51
64
|
});
|
|
52
65
|
|
|
53
66
|
if (response.object === 'chat.completion') {
|
|
54
67
|
console.log(response.choices[0].message.content);
|
|
55
|
-
} else {
|
|
56
|
-
console.error('Error:', response.error.message);
|
|
57
68
|
}
|
|
58
69
|
```
|
|
59
70
|
|
|
71
|
+
See the [llama.cpp Integration](#llamacpp-integration) section for setup details.
|
|
72
|
+
|
|
73
|
+
## Example Application
|
|
74
|
+
|
|
75
|
+
For a complete, production-ready example showcasing all genai-lite capabilities, see the **[chat-demo](examples/chat-demo)** interactive web application. The demo includes:
|
|
76
|
+
- Multi-provider chat interface with all supported providers
|
|
77
|
+
- Template rendering and model presets
|
|
78
|
+
- llama.cpp utilities (tokenization, embeddings, health checks)
|
|
79
|
+
- Settings persistence, export/import features
|
|
80
|
+
|
|
81
|
+
The chat-demo serves as both a comprehensive showcase and a quick-test environment for library changes.
|
|
82
|
+
|
|
60
83
|
## API Key Management
|
|
61
84
|
|
|
62
85
|
genai-lite uses a flexible API key provider pattern. You can use the built-in environment variable provider or create your own:
|
|
@@ -124,6 +147,64 @@ const llmService = new LLMService(myKeyProvider);
|
|
|
124
147
|
- `codestral-2501` - Specialized for code generation
|
|
125
148
|
- `devstral-small-2505` - Compact development-focused model
|
|
126
149
|
|
|
150
|
+
### llama.cpp (Local Models)
|
|
151
|
+
|
|
152
|
+
Run models locally via [llama.cpp](https://github.com/ggml-org/llama.cpp) server. Model IDs can be any nameโthey're not validated since you load your own GGUF models.
|
|
153
|
+
|
|
154
|
+
**Example models:**
|
|
155
|
+
- `llama-3-8b-instruct` - Llama 3 8B Instruct
|
|
156
|
+
- `llama-3-70b-instruct` - Llama 3 70B Instruct
|
|
157
|
+
- `mistral-7b-instruct` - Mistral 7B Instruct
|
|
158
|
+
- `my-custom-model` - Any custom model you've loaded
|
|
159
|
+
|
|
160
|
+
**Setup:**
|
|
161
|
+
|
|
162
|
+
1. Start llama.cpp server with your model:
|
|
163
|
+
```bash
|
|
164
|
+
llama-server -m /path/to/model.gguf --port 8080
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
2. Use with genai-lite (no API key needed):
|
|
168
|
+
```typescript
|
|
169
|
+
import { LLMService } from 'genai-lite';
|
|
170
|
+
|
|
171
|
+
// API key can be any string for llama.cpp
|
|
172
|
+
const service = new LLMService(async () => 'not-needed');
|
|
173
|
+
|
|
174
|
+
const response = await service.sendMessage({
|
|
175
|
+
providerId: 'llamacpp',
|
|
176
|
+
modelId: 'llama-3-8b-instruct', // Must match your loaded model name
|
|
177
|
+
messages: [{ role: 'user', content: 'Hello!' }]
|
|
178
|
+
});
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
3. Configure server URL via environment variable:
|
|
182
|
+
```bash
|
|
183
|
+
export LLAMACPP_API_BASE_URL=http://localhost:8080
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
**Advanced features** - Access non-LLM endpoints:
|
|
187
|
+
|
|
188
|
+
```typescript
|
|
189
|
+
import { LlamaCppServerClient } from 'genai-lite';
|
|
190
|
+
|
|
191
|
+
const client = new LlamaCppServerClient('http://localhost:8080');
|
|
192
|
+
|
|
193
|
+
// Check server health
|
|
194
|
+
const health = await client.getHealth();
|
|
195
|
+
|
|
196
|
+
// Tokenize text
|
|
197
|
+
const { tokens } = await client.tokenize('Hello world');
|
|
198
|
+
|
|
199
|
+
// Generate embeddings
|
|
200
|
+
const { embedding } = await client.createEmbedding('Some text');
|
|
201
|
+
|
|
202
|
+
// Code completion
|
|
203
|
+
const result = await client.infill('def hello():\n', '\nprint("done")');
|
|
204
|
+
```
|
|
205
|
+
|
|
206
|
+
See the [llama.cpp Integration](#llamacpp-integration) section for details.
|
|
207
|
+
|
|
127
208
|
### Models with Reasoning Support
|
|
128
209
|
|
|
129
210
|
Some models include advanced reasoning/thinking capabilities that enhance their problem-solving abilities:
|
|
@@ -666,6 +747,261 @@ if (response.object === 'error') {
|
|
|
666
747
|
}
|
|
667
748
|
```
|
|
668
749
|
|
|
750
|
+
## llama.cpp Integration
|
|
751
|
+
|
|
752
|
+
`genai-lite` provides comprehensive support for running local LLMs via [llama.cpp](https://github.com/ggml-org/llama.cpp) server, enabling completely offline AI capabilities with the same unified interface.
|
|
753
|
+
|
|
754
|
+
### Why llama.cpp?
|
|
755
|
+
|
|
756
|
+
- **Privacy**: All model inference runs locally on your hardware
|
|
757
|
+
- **Cost**: No API costs after initial model download
|
|
758
|
+
- **Control**: Use any GGUF model from Hugging Face
|
|
759
|
+
- **Performance**: Optimized C++ implementation with hardware acceleration
|
|
760
|
+
|
|
761
|
+
### Setup
|
|
762
|
+
|
|
763
|
+
#### 1. Install llama.cpp
|
|
764
|
+
|
|
765
|
+
```bash
|
|
766
|
+
# Clone and build llama.cpp
|
|
767
|
+
git clone https://github.com/ggml-org/llama.cpp
|
|
768
|
+
cd llama.cpp
|
|
769
|
+
make
|
|
770
|
+
|
|
771
|
+
# Or download pre-built binaries from releases
|
|
772
|
+
```
|
|
773
|
+
|
|
774
|
+
#### 2. Download a Model
|
|
775
|
+
|
|
776
|
+
Get GGUF models from Hugging Face, for example:
|
|
777
|
+
- [Meta-Llama-3.1-8B-Instruct-GGUF](https://huggingface.co/bartowski/Meta-Llama-3.1-8B-Instruct-GGUF)
|
|
778
|
+
- [Mistral-7B-Instruct-v0.3-GGUF](https://huggingface.co/bartowski/Mistral-7B-Instruct-v0.3-GGUF)
|
|
779
|
+
|
|
780
|
+
#### 3. Start the Server
|
|
781
|
+
|
|
782
|
+
```bash
|
|
783
|
+
# Basic usage
|
|
784
|
+
llama-server -m /path/to/model.gguf --port 8080
|
|
785
|
+
|
|
786
|
+
# With more options
|
|
787
|
+
llama-server -m /path/to/model.gguf \
|
|
788
|
+
--port 8080 \
|
|
789
|
+
-c 4096 \ # Context size
|
|
790
|
+
-np 4 \ # Parallel requests
|
|
791
|
+
--threads 8 # CPU threads
|
|
792
|
+
```
|
|
793
|
+
|
|
794
|
+
### Basic Usage
|
|
795
|
+
|
|
796
|
+
```typescript
|
|
797
|
+
import { LLMService } from 'genai-lite';
|
|
798
|
+
|
|
799
|
+
// llama.cpp doesn't need API keys
|
|
800
|
+
const service = new LLMService(async () => 'not-needed');
|
|
801
|
+
|
|
802
|
+
const response = await service.sendMessage({
|
|
803
|
+
providerId: 'llamacpp',
|
|
804
|
+
modelId: 'llama-3-8b-instruct', // Arbitrary name matching your model
|
|
805
|
+
messages: [
|
|
806
|
+
{ role: 'system', content: 'You are a helpful assistant.' },
|
|
807
|
+
{ role: 'user', content: 'Explain quantum computing in simple terms.' }
|
|
808
|
+
],
|
|
809
|
+
settings: {
|
|
810
|
+
temperature: 0.7,
|
|
811
|
+
maxTokens: 500
|
|
812
|
+
}
|
|
813
|
+
});
|
|
814
|
+
|
|
815
|
+
if (response.object === 'chat.completion') {
|
|
816
|
+
console.log(response.choices[0].message.content);
|
|
817
|
+
}
|
|
818
|
+
```
|
|
819
|
+
|
|
820
|
+
### Configuration
|
|
821
|
+
|
|
822
|
+
#### Environment Variable
|
|
823
|
+
|
|
824
|
+
Set the server URL via environment variable (default: `http://localhost:8080`):
|
|
825
|
+
|
|
826
|
+
```bash
|
|
827
|
+
export LLAMACPP_API_BASE_URL=http://localhost:8080
|
|
828
|
+
```
|
|
829
|
+
|
|
830
|
+
#### Multiple Servers
|
|
831
|
+
|
|
832
|
+
Register multiple llama.cpp instances for different models:
|
|
833
|
+
|
|
834
|
+
```typescript
|
|
835
|
+
import { LLMService, LlamaCppClientAdapter } from 'genai-lite';
|
|
836
|
+
|
|
837
|
+
const service = new LLMService(async () => 'not-needed');
|
|
838
|
+
|
|
839
|
+
// Register adapters for different servers/models
|
|
840
|
+
service.registerAdapter(
|
|
841
|
+
'llamacpp-small',
|
|
842
|
+
new LlamaCppClientAdapter({ baseURL: 'http://localhost:8080' })
|
|
843
|
+
);
|
|
844
|
+
|
|
845
|
+
service.registerAdapter(
|
|
846
|
+
'llamacpp-large',
|
|
847
|
+
new LlamaCppClientAdapter({ baseURL: 'http://localhost:8081' })
|
|
848
|
+
);
|
|
849
|
+
|
|
850
|
+
// Use them
|
|
851
|
+
const response = await service.sendMessage({
|
|
852
|
+
providerId: 'llamacpp-small',
|
|
853
|
+
modelId: 'llama-3-8b',
|
|
854
|
+
messages: [{ role: 'user', content: 'Hello!' }]
|
|
855
|
+
});
|
|
856
|
+
```
|
|
857
|
+
|
|
858
|
+
#### Health Checking
|
|
859
|
+
|
|
860
|
+
Enable automatic health checks before requests:
|
|
861
|
+
|
|
862
|
+
```typescript
|
|
863
|
+
import { LlamaCppClientAdapter } from 'genai-lite';
|
|
864
|
+
|
|
865
|
+
const adapter = new LlamaCppClientAdapter({
|
|
866
|
+
baseURL: 'http://localhost:8080',
|
|
867
|
+
checkHealth: true // Check server status before each request
|
|
868
|
+
});
|
|
869
|
+
|
|
870
|
+
service.registerAdapter('llamacpp', adapter);
|
|
871
|
+
```
|
|
872
|
+
|
|
873
|
+
### Advanced Features
|
|
874
|
+
|
|
875
|
+
#### Server Management
|
|
876
|
+
|
|
877
|
+
The `LlamaCppServerClient` class provides access to all llama.cpp server endpoints:
|
|
878
|
+
|
|
879
|
+
```typescript
|
|
880
|
+
import { LlamaCppServerClient } from 'genai-lite';
|
|
881
|
+
|
|
882
|
+
const client = new LlamaCppServerClient('http://localhost:8080');
|
|
883
|
+
|
|
884
|
+
// Health monitoring
|
|
885
|
+
const health = await client.getHealth();
|
|
886
|
+
console.log(health.status); // 'ok', 'loading', or 'error'
|
|
887
|
+
|
|
888
|
+
// Server properties
|
|
889
|
+
const props = await client.getProps();
|
|
890
|
+
console.log(props.total_slots); // Number of available slots
|
|
891
|
+
|
|
892
|
+
// Performance metrics (if enabled)
|
|
893
|
+
const metrics = await client.getMetrics();
|
|
894
|
+
```
|
|
895
|
+
|
|
896
|
+
#### Tokenization
|
|
897
|
+
|
|
898
|
+
```typescript
|
|
899
|
+
const client = new LlamaCppServerClient('http://localhost:8080');
|
|
900
|
+
|
|
901
|
+
// Tokenize text
|
|
902
|
+
const { tokens } = await client.tokenize('Hello, world!');
|
|
903
|
+
console.log(tokens); // [123, 456, 789]
|
|
904
|
+
|
|
905
|
+
// Count tokens before sending to LLM
|
|
906
|
+
const prompt = 'Long text...';
|
|
907
|
+
const { tokens: promptTokens } = await client.tokenize(prompt);
|
|
908
|
+
if (promptTokens.length > 4000) {
|
|
909
|
+
console.log('Prompt too long, truncating...');
|
|
910
|
+
}
|
|
911
|
+
|
|
912
|
+
// Detokenize back to text
|
|
913
|
+
const { content } = await client.detokenize([123, 456, 789]);
|
|
914
|
+
console.log(content); // 'Hello, world!'
|
|
915
|
+
```
|
|
916
|
+
|
|
917
|
+
#### Text Embeddings
|
|
918
|
+
|
|
919
|
+
```typescript
|
|
920
|
+
const client = new LlamaCppServerClient('http://localhost:8080');
|
|
921
|
+
|
|
922
|
+
// Generate embeddings for semantic search
|
|
923
|
+
const { embedding } = await client.createEmbedding('Search query text');
|
|
924
|
+
console.log(embedding.length); // e.g., 768 dimensions
|
|
925
|
+
|
|
926
|
+
// With images (for multimodal models)
|
|
927
|
+
const { embedding: multimodalEmbed } = await client.createEmbedding(
|
|
928
|
+
'Describe this image',
|
|
929
|
+
'base64_image_data_here'
|
|
930
|
+
);
|
|
931
|
+
```
|
|
932
|
+
|
|
933
|
+
#### Code Infilling
|
|
934
|
+
|
|
935
|
+
Perfect for code completion in IDEs:
|
|
936
|
+
|
|
937
|
+
```typescript
|
|
938
|
+
const client = new LlamaCppServerClient('http://localhost:8080');
|
|
939
|
+
|
|
940
|
+
const result = await client.infill(
|
|
941
|
+
'def calculate_fibonacci(n):\n ', // Prefix (before cursor)
|
|
942
|
+
'\n return result' // Suffix (after cursor)
|
|
943
|
+
);
|
|
944
|
+
|
|
945
|
+
console.log(result.content);
|
|
946
|
+
// Output: "if n <= 1:\n return n\n result = calculate_fibonacci(n-1) + calculate_fibonacci(n-2)"
|
|
947
|
+
```
|
|
948
|
+
|
|
949
|
+
### Error Handling
|
|
950
|
+
|
|
951
|
+
```typescript
|
|
952
|
+
const response = await service.sendMessage({
|
|
953
|
+
providerId: 'llamacpp',
|
|
954
|
+
modelId: 'my-model',
|
|
955
|
+
messages: [{ role: 'user', content: 'Hello' }]
|
|
956
|
+
});
|
|
957
|
+
|
|
958
|
+
if (response.object === 'error') {
|
|
959
|
+
switch (response.error.code) {
|
|
960
|
+
case 'NETWORK_ERROR':
|
|
961
|
+
console.error('Server not running or unreachable');
|
|
962
|
+
break;
|
|
963
|
+
case 'PROVIDER_ERROR':
|
|
964
|
+
console.error('Server error:', response.error.message);
|
|
965
|
+
break;
|
|
966
|
+
default:
|
|
967
|
+
console.error('Unknown error:', response.error);
|
|
968
|
+
}
|
|
969
|
+
}
|
|
970
|
+
```
|
|
971
|
+
|
|
972
|
+
### Best Practices
|
|
973
|
+
|
|
974
|
+
1. **Model Naming**: Use descriptive model IDs (e.g., `llama-3-8b-instruct`) since llama.cpp accepts any name
|
|
975
|
+
2. **Context Size**: Set appropriate context (`-c` flag) when starting the server
|
|
976
|
+
3. **Parallel Requests**: Configure slots (`-np`) based on your hardware
|
|
977
|
+
4. **Health Monitoring**: Enable `checkHealth` for production to detect server issues early
|
|
978
|
+
5. **Resource Management**: Monitor memory usage; large models need significant RAM
|
|
979
|
+
|
|
980
|
+
### Troubleshooting
|
|
981
|
+
|
|
982
|
+
**Server not responding:**
|
|
983
|
+
```bash
|
|
984
|
+
# Check if server is running
|
|
985
|
+
curl http://localhost:8080/health
|
|
986
|
+
|
|
987
|
+
# Should return: {"status":"ok"}
|
|
988
|
+
```
|
|
989
|
+
|
|
990
|
+
**Model loading errors:**
|
|
991
|
+
```bash
|
|
992
|
+
# Increase memory or reduce context size
|
|
993
|
+
llama-server -m model.gguf --port 8080 -c 2048
|
|
994
|
+
```
|
|
995
|
+
|
|
996
|
+
**Slow responses:**
|
|
997
|
+
```bash
|
|
998
|
+
# Use quantized models (smaller but faster)
|
|
999
|
+
# e.g., Q4_K_M, Q5_K_M instead of F16
|
|
1000
|
+
|
|
1001
|
+
# Increase threads
|
|
1002
|
+
llama-server -m model.gguf --threads 16
|
|
1003
|
+
```
|
|
1004
|
+
|
|
669
1005
|
## Using with Electron
|
|
670
1006
|
|
|
671
1007
|
`genai-lite` is designed to work seamlessly within an Electron application's main process, especially when paired with a secure storage solution like `genai-key-storage-lite`.
|
|
@@ -725,6 +1061,26 @@ import type {
|
|
|
725
1061
|
CreateMessagesResult,
|
|
726
1062
|
TemplateMetadata
|
|
727
1063
|
} from 'genai-lite';
|
|
1064
|
+
|
|
1065
|
+
// llama.cpp integration types and classes
|
|
1066
|
+
import {
|
|
1067
|
+
LlamaCppClientAdapter,
|
|
1068
|
+
LlamaCppServerClient,
|
|
1069
|
+
createFallbackModelInfo
|
|
1070
|
+
} from 'genai-lite';
|
|
1071
|
+
|
|
1072
|
+
import type {
|
|
1073
|
+
LlamaCppClientConfig,
|
|
1074
|
+
LlamaCppHealthResponse,
|
|
1075
|
+
LlamaCppTokenizeResponse,
|
|
1076
|
+
LlamaCppDetokenizeResponse,
|
|
1077
|
+
LlamaCppEmbeddingResponse,
|
|
1078
|
+
LlamaCppInfillResponse,
|
|
1079
|
+
LlamaCppPropsResponse,
|
|
1080
|
+
LlamaCppMetricsResponse,
|
|
1081
|
+
LlamaCppSlot,
|
|
1082
|
+
LlamaCppSlotsResponse
|
|
1083
|
+
} from 'genai-lite';
|
|
728
1084
|
```
|
|
729
1085
|
|
|
730
1086
|
## Utilities
|
|
@@ -1106,6 +1462,10 @@ These utilities enable:
|
|
|
1106
1462
|
- **Template Reusability**: Define templates once, use with different variables
|
|
1107
1463
|
- **Type Safety**: Full TypeScript support with LLMMessage types
|
|
1108
1464
|
|
|
1465
|
+
## Examples
|
|
1466
|
+
|
|
1467
|
+
See the **[chat-demo](examples/chat-demo)** application for a complete working example that demonstrates all library features in a production-ready React + Express application.
|
|
1468
|
+
|
|
1109
1469
|
## Contributing
|
|
1110
1470
|
|
|
1111
1471
|
Contributions are welcome! Please feel free to submit a Pull Request. For major changes, please open an issue first to discuss what you would like to change.
|
package/dist/index.d.ts
CHANGED
|
@@ -5,7 +5,12 @@ export type { ModelPreset } from "./types/presets";
|
|
|
5
5
|
export * from "./llm/types";
|
|
6
6
|
export * from "./llm/clients/types";
|
|
7
7
|
export { fromEnvironment } from "./providers/fromEnvironment";
|
|
8
|
+
export { LlamaCppClientAdapter } from "./llm/clients/LlamaCppClientAdapter";
|
|
9
|
+
export { LlamaCppServerClient } from "./llm/clients/LlamaCppServerClient";
|
|
10
|
+
export type { LlamaCppClientConfig, } from "./llm/clients/LlamaCppClientAdapter";
|
|
11
|
+
export type { LlamaCppHealthResponse, LlamaCppTokenizeResponse, LlamaCppDetokenizeResponse, LlamaCppEmbeddingResponse, LlamaCppInfillResponse, LlamaCppPropsResponse, LlamaCppMetricsResponse, LlamaCppSlot, LlamaCppSlotsResponse, } from "./llm/clients/LlamaCppServerClient";
|
|
8
12
|
export { renderTemplate } from "./prompting/template";
|
|
9
13
|
export { countTokens, getSmartPreview, extractRandomVariables } from "./prompting/content";
|
|
10
14
|
export { parseStructuredContent, parseRoleTags, extractInitialTaggedContent, parseTemplateWithMetadata } from "./prompting/parser";
|
|
11
15
|
export type { TemplateMetadata } from "./prompting/parser";
|
|
16
|
+
export { createFallbackModelInfo } from "./llm/config";
|
package/dist/index.js
CHANGED
|
@@ -14,7 +14,7 @@ var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
|
|
14
14
|
for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
|
|
15
15
|
};
|
|
16
16
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
17
|
-
exports.parseTemplateWithMetadata = exports.extractInitialTaggedContent = exports.parseRoleTags = exports.parseStructuredContent = exports.extractRandomVariables = exports.getSmartPreview = exports.countTokens = exports.renderTemplate = exports.fromEnvironment = exports.LLMService = void 0;
|
|
17
|
+
exports.createFallbackModelInfo = exports.parseTemplateWithMetadata = exports.extractInitialTaggedContent = exports.parseRoleTags = exports.parseStructuredContent = exports.extractRandomVariables = exports.getSmartPreview = exports.countTokens = exports.renderTemplate = exports.LlamaCppServerClient = exports.LlamaCppClientAdapter = exports.fromEnvironment = exports.LLMService = void 0;
|
|
18
18
|
// --- LLM Service ---
|
|
19
19
|
var LLMService_1 = require("./llm/LLMService");
|
|
20
20
|
Object.defineProperty(exports, "LLMService", { enumerable: true, get: function () { return LLMService_1.LLMService; } });
|
|
@@ -25,6 +25,11 @@ __exportStar(require("./llm/clients/types"), exports);
|
|
|
25
25
|
// --- API Key Providers ---
|
|
26
26
|
var fromEnvironment_1 = require("./providers/fromEnvironment");
|
|
27
27
|
Object.defineProperty(exports, "fromEnvironment", { enumerable: true, get: function () { return fromEnvironment_1.fromEnvironment; } });
|
|
28
|
+
// --- llama.cpp Integration ---
|
|
29
|
+
var LlamaCppClientAdapter_1 = require("./llm/clients/LlamaCppClientAdapter");
|
|
30
|
+
Object.defineProperty(exports, "LlamaCppClientAdapter", { enumerable: true, get: function () { return LlamaCppClientAdapter_1.LlamaCppClientAdapter; } });
|
|
31
|
+
var LlamaCppServerClient_1 = require("./llm/clients/LlamaCppServerClient");
|
|
32
|
+
Object.defineProperty(exports, "LlamaCppServerClient", { enumerable: true, get: function () { return LlamaCppServerClient_1.LlamaCppServerClient; } });
|
|
28
33
|
// --- Utilities ---
|
|
29
34
|
var template_1 = require("./prompting/template");
|
|
30
35
|
Object.defineProperty(exports, "renderTemplate", { enumerable: true, get: function () { return template_1.renderTemplate; } });
|
|
@@ -37,3 +42,5 @@ Object.defineProperty(exports, "parseStructuredContent", { enumerable: true, get
|
|
|
37
42
|
Object.defineProperty(exports, "parseRoleTags", { enumerable: true, get: function () { return parser_1.parseRoleTags; } });
|
|
38
43
|
Object.defineProperty(exports, "extractInitialTaggedContent", { enumerable: true, get: function () { return parser_1.extractInitialTaggedContent; } });
|
|
39
44
|
Object.defineProperty(exports, "parseTemplateWithMetadata", { enumerable: true, get: function () { return parser_1.parseTemplateWithMetadata; } });
|
|
45
|
+
var config_1 = require("./llm/config");
|
|
46
|
+
Object.defineProperty(exports, "createFallbackModelInfo", { enumerable: true, get: function () { return config_1.createFallbackModelInfo; } });
|
|
@@ -44,17 +44,34 @@ describe('LLMService', () => {
|
|
|
44
44
|
expect(errorResponse.error.code).toBe('UNSUPPORTED_PROVIDER');
|
|
45
45
|
expect(errorResponse.error.message).toContain('Unsupported provider');
|
|
46
46
|
});
|
|
47
|
-
it('should
|
|
47
|
+
it('should succeed with fallback for unknown model', async () => {
|
|
48
48
|
const request = {
|
|
49
|
-
providerId: '
|
|
49
|
+
providerId: 'mock', // Use mock provider to avoid real API calls
|
|
50
50
|
modelId: 'unsupported-model',
|
|
51
51
|
messages: [{ role: 'user', content: 'Hello' }]
|
|
52
52
|
};
|
|
53
53
|
const response = await service.sendMessage(request);
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
54
|
+
// Should succeed with mock response (not error) even for unknown model
|
|
55
|
+
expect(response.object).toBe('chat.completion');
|
|
56
|
+
});
|
|
57
|
+
it('should silently work with flexible providers unknown models (no warning)', async () => {
|
|
58
|
+
const warnings = [];
|
|
59
|
+
const consoleWarnSpy = jest.spyOn(console, 'warn').mockImplementation((msg) => {
|
|
60
|
+
warnings.push(msg);
|
|
61
|
+
});
|
|
62
|
+
// Test with mock provider (which has allowUnknownModels: true)
|
|
63
|
+
const request = {
|
|
64
|
+
providerId: 'mock',
|
|
65
|
+
modelId: 'totally-unknown-model-xyz',
|
|
66
|
+
messages: [{ role: 'user', content: 'Testing flexible provider' }]
|
|
67
|
+
};
|
|
68
|
+
const response = await service.sendMessage(request);
|
|
69
|
+
// Should succeed with mock response
|
|
70
|
+
expect(response.object).toBe('chat.completion');
|
|
71
|
+
// Should NOT warn about unknown model (filter out adapter constructor warnings)
|
|
72
|
+
const unknownModelWarnings = warnings.filter(w => !w.includes('No adapter constructor'));
|
|
73
|
+
expect(unknownModelWarnings.length).toBe(0); // No warnings for flexible providers
|
|
74
|
+
consoleWarnSpy.mockRestore();
|
|
58
75
|
});
|
|
59
76
|
it('should return validation error for empty messages', async () => {
|
|
60
77
|
const request = {
|
|
@@ -160,8 +177,8 @@ describe('LLMService', () => {
|
|
|
160
177
|
// Second request to same provider
|
|
161
178
|
request.messages = [{ role: 'user', content: 'Second request' }];
|
|
162
179
|
await service.sendMessage(request);
|
|
163
|
-
// API key provider should be called
|
|
164
|
-
expect(mockApiKeyProvider).toHaveBeenCalledTimes(
|
|
180
|
+
// API key provider should be called once per unique provider (mock provider now registered)
|
|
181
|
+
expect(mockApiKeyProvider).toHaveBeenCalledTimes(2);
|
|
165
182
|
});
|
|
166
183
|
});
|
|
167
184
|
describe('settings management', () => {
|
|
@@ -325,11 +342,13 @@ describe('LLMService', () => {
|
|
|
325
342
|
describe('getProviders', () => {
|
|
326
343
|
it('should return all supported providers', async () => {
|
|
327
344
|
const providers = await service.getProviders();
|
|
328
|
-
expect(providers).toHaveLength(
|
|
345
|
+
expect(providers).toHaveLength(6);
|
|
329
346
|
expect(providers.find(p => p.id === 'openai')).toBeDefined();
|
|
330
347
|
expect(providers.find(p => p.id === 'anthropic')).toBeDefined();
|
|
331
348
|
expect(providers.find(p => p.id === 'gemini')).toBeDefined();
|
|
332
349
|
expect(providers.find(p => p.id === 'mistral')).toBeDefined();
|
|
350
|
+
expect(providers.find(p => p.id === 'llamacpp')).toBeDefined();
|
|
351
|
+
expect(providers.find(p => p.id === 'mock')).toBeDefined();
|
|
333
352
|
});
|
|
334
353
|
it('should include provider metadata', async () => {
|
|
335
354
|
const providers = await service.getProviders();
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
import type { LLMResponse, LLMFailureResponse } from "../types";
|
|
2
|
+
import type { ILLMClientAdapter, InternalLLMChatRequest } from "./types";
|
|
3
|
+
import { LlamaCppServerClient } from "./LlamaCppServerClient";
|
|
4
|
+
/**
|
|
5
|
+
* Configuration options for LlamaCppClientAdapter
|
|
6
|
+
*/
|
|
7
|
+
export interface LlamaCppClientConfig {
|
|
8
|
+
/** Base URL of the llama.cpp server (default: http://localhost:8080) */
|
|
9
|
+
baseURL?: string;
|
|
10
|
+
/** Whether to check server health before sending requests (default: false) */
|
|
11
|
+
checkHealth?: boolean;
|
|
12
|
+
}
|
|
13
|
+
/**
|
|
14
|
+
* Client adapter for llama.cpp server integration
|
|
15
|
+
*
|
|
16
|
+
* This adapter provides integration with llama.cpp server via its OpenAI-compatible
|
|
17
|
+
* /v1/chat/completions endpoint. It uses the OpenAI SDK internally, making it compatible
|
|
18
|
+
* with llama.cpp's OpenAI-compatible API.
|
|
19
|
+
*
|
|
20
|
+
* Key features:
|
|
21
|
+
* - Uses llama.cpp's OpenAI-compatible chat completions endpoint
|
|
22
|
+
* - Optional health check before requests
|
|
23
|
+
* - No API key required (llama.cpp is a local server)
|
|
24
|
+
* - Supports all standard LLM settings
|
|
25
|
+
*
|
|
26
|
+
* Note: Model IDs are not validated against a predefined list since llama.cpp
|
|
27
|
+
* serves whatever model is loaded. Users must specify the correct model name.
|
|
28
|
+
*
|
|
29
|
+
* @example
|
|
30
|
+
* ```typescript
|
|
31
|
+
* // Create adapter for local server
|
|
32
|
+
* const adapter = new LlamaCppClientAdapter({
|
|
33
|
+
* baseURL: 'http://localhost:8080',
|
|
34
|
+
* checkHealth: true
|
|
35
|
+
* });
|
|
36
|
+
*
|
|
37
|
+
* // Register with LLMService
|
|
38
|
+
* service.registerAdapter('llamacpp', adapter);
|
|
39
|
+
*
|
|
40
|
+
* // Use via LLMService
|
|
41
|
+
* const response = await service.sendMessage({
|
|
42
|
+
* providerId: 'llamacpp',
|
|
43
|
+
* modelId: 'llama-3-8b-instruct',
|
|
44
|
+
* messages: [{ role: 'user', content: 'Hello!' }]
|
|
45
|
+
* });
|
|
46
|
+
* ```
|
|
47
|
+
*/
|
|
48
|
+
export declare class LlamaCppClientAdapter implements ILLMClientAdapter {
|
|
49
|
+
private baseURL;
|
|
50
|
+
private checkHealth;
|
|
51
|
+
private serverClient;
|
|
52
|
+
/**
|
|
53
|
+
* Creates a new llama.cpp client adapter
|
|
54
|
+
*
|
|
55
|
+
* @param config Optional configuration for the adapter
|
|
56
|
+
*/
|
|
57
|
+
constructor(config?: LlamaCppClientConfig);
|
|
58
|
+
/**
|
|
59
|
+
* Sends a chat message to llama.cpp server
|
|
60
|
+
*
|
|
61
|
+
* @param request - The internal LLM request with applied settings
|
|
62
|
+
* @param apiKey - Not used for llama.cpp (local server), but kept for interface compatibility
|
|
63
|
+
* @returns Promise resolving to success or failure response
|
|
64
|
+
*/
|
|
65
|
+
sendMessage(request: InternalLLMChatRequest, apiKey: string): Promise<LLMResponse | LLMFailureResponse>;
|
|
66
|
+
/**
|
|
67
|
+
* Validates API key format
|
|
68
|
+
*
|
|
69
|
+
* For llama.cpp, API keys are not required, so this always returns true.
|
|
70
|
+
* The method is implemented for interface compatibility.
|
|
71
|
+
*
|
|
72
|
+
* @param apiKey - The API key (ignored)
|
|
73
|
+
* @returns Always true
|
|
74
|
+
*/
|
|
75
|
+
validateApiKey(apiKey: string): boolean;
|
|
76
|
+
/**
|
|
77
|
+
* Gets adapter information
|
|
78
|
+
*/
|
|
79
|
+
getAdapterInfo(): {
|
|
80
|
+
providerId: "llamacpp";
|
|
81
|
+
name: string;
|
|
82
|
+
version: string;
|
|
83
|
+
baseURL: string;
|
|
84
|
+
};
|
|
85
|
+
/**
|
|
86
|
+
* Gets the underlying server client for advanced operations
|
|
87
|
+
*
|
|
88
|
+
* This allows access to non-LLM endpoints like tokenize, embedding, health, etc.
|
|
89
|
+
*
|
|
90
|
+
* @returns The LlamaCppServerClient instance
|
|
91
|
+
*/
|
|
92
|
+
getServerClient(): LlamaCppServerClient;
|
|
93
|
+
/**
|
|
94
|
+
* Formats messages for OpenAI-compatible API
|
|
95
|
+
*
|
|
96
|
+
* @param request - The internal LLM request
|
|
97
|
+
* @returns Formatted messages array
|
|
98
|
+
*/
|
|
99
|
+
private formatMessages;
|
|
100
|
+
/**
|
|
101
|
+
* Creates a standardized success response from llama.cpp's response
|
|
102
|
+
*
|
|
103
|
+
* @param completion - Raw OpenAI-compatible completion response
|
|
104
|
+
* @param request - Original request for context
|
|
105
|
+
* @returns Standardized LLM response
|
|
106
|
+
*/
|
|
107
|
+
private createSuccessResponse;
|
|
108
|
+
/**
|
|
109
|
+
* Creates a standardized error response from an error
|
|
110
|
+
*
|
|
111
|
+
* @param error - The error that occurred
|
|
112
|
+
* @param request - Original request for context
|
|
113
|
+
* @returns Standardized LLM failure response
|
|
114
|
+
*/
|
|
115
|
+
private createErrorResponse;
|
|
116
|
+
}
|