genai-lite 0.3.2 โ 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +380 -15
- package/dist/index.d.ts +5 -0
- package/dist/index.js +8 -1
- package/dist/llm/LLMService.js +8 -0
- package/dist/llm/LLMService.test.js +57 -9
- package/dist/llm/clients/LlamaCppClientAdapter.d.ts +116 -0
- package/dist/llm/clients/LlamaCppClientAdapter.js +289 -0
- package/dist/llm/clients/LlamaCppClientAdapter.test.d.ts +1 -0
- package/dist/llm/clients/LlamaCppClientAdapter.test.js +447 -0
- package/dist/llm/clients/LlamaCppServerClient.d.ts +161 -0
- package/dist/llm/clients/LlamaCppServerClient.js +192 -0
- package/dist/llm/clients/LlamaCppServerClient.test.d.ts +1 -0
- package/dist/llm/clients/LlamaCppServerClient.test.js +294 -0
- package/dist/llm/config.d.ts +12 -0
- package/dist/llm/config.js +77 -0
- package/dist/llm/services/ModelResolver.js +13 -13
- package/dist/llm/services/ModelResolver.test.js +25 -4
- package/dist/llm/types.d.ts +8 -0
- package/dist/providers/fromEnvironment.d.ts +4 -0
- package/dist/providers/fromEnvironment.js +8 -0
- package/dist/providers/fromEnvironment.test.js +13 -0
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
# genai-lite
|
|
2
2
|
|
|
3
|
-
A lightweight, portable Node.js/TypeScript library providing a unified interface for interacting with multiple Generative AI providers (OpenAI, Anthropic, Google Gemini, Mistral
|
|
3
|
+
A lightweight, portable Node.js/TypeScript library providing a unified interface for interacting with multiple Generative AI providersโboth cloud-based (OpenAI, Anthropic, Google Gemini, Mistral) and local (llama.cpp).
|
|
4
4
|
|
|
5
5
|
## Features
|
|
6
6
|
|
|
7
7
|
- ๐ **Unified API** - Single interface for multiple AI providers
|
|
8
|
+
- ๐ **Local & Cloud Models** - Run models locally with llama.cpp or use cloud APIs
|
|
8
9
|
- ๐ **Flexible API Key Management** - Bring your own key storage solution
|
|
9
10
|
- ๐ฆ **Zero Electron Dependencies** - Works in any Node.js environment
|
|
10
11
|
- ๐ฏ **TypeScript First** - Full type safety and IntelliSense support
|
|
@@ -21,13 +22,14 @@ npm install genai-lite
|
|
|
21
22
|
|
|
22
23
|
## Quick Start
|
|
23
24
|
|
|
25
|
+
### Cloud Providers (OpenAI, Anthropic, Gemini, Mistral)
|
|
26
|
+
|
|
24
27
|
```typescript
|
|
25
28
|
import { LLMService, fromEnvironment } from 'genai-lite';
|
|
26
29
|
|
|
27
30
|
// Create service with environment variable API key provider
|
|
28
31
|
const llmService = new LLMService(fromEnvironment);
|
|
29
32
|
|
|
30
|
-
// Option 1: Direct message sending
|
|
31
33
|
const response = await llmService.sendMessage({
|
|
32
34
|
providerId: 'openai',
|
|
33
35
|
modelId: 'gpt-4.1-mini',
|
|
@@ -37,26 +39,47 @@ const response = await llmService.sendMessage({
|
|
|
37
39
|
]
|
|
38
40
|
});
|
|
39
41
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
42
|
+
if (response.object === 'chat.completion') {
|
|
43
|
+
console.log(response.choices[0].message.content);
|
|
44
|
+
} else {
|
|
45
|
+
console.error('Error:', response.error.message);
|
|
46
|
+
}
|
|
47
|
+
```
|
|
46
48
|
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
49
|
+
### Local Models (llama.cpp)
|
|
50
|
+
|
|
51
|
+
```typescript
|
|
52
|
+
import { LLMService } from 'genai-lite';
|
|
53
|
+
|
|
54
|
+
// Start llama.cpp server first: llama-server -m /path/to/model.gguf --port 8080
|
|
55
|
+
const llmService = new LLMService(async () => 'not-needed');
|
|
56
|
+
|
|
57
|
+
const response = await llmService.sendMessage({
|
|
58
|
+
providerId: 'llamacpp',
|
|
59
|
+
modelId: 'llama-3-8b-instruct', // Must match your loaded model
|
|
60
|
+
messages: [
|
|
61
|
+
{ role: 'system', content: 'You are a helpful assistant.' },
|
|
62
|
+
{ role: 'user', content: 'Explain quantum computing briefly.' }
|
|
63
|
+
]
|
|
51
64
|
});
|
|
52
65
|
|
|
53
66
|
if (response.object === 'chat.completion') {
|
|
54
67
|
console.log(response.choices[0].message.content);
|
|
55
|
-
} else {
|
|
56
|
-
console.error('Error:', response.error.message);
|
|
57
68
|
}
|
|
58
69
|
```
|
|
59
70
|
|
|
71
|
+
See the [llama.cpp Integration](#llamacpp-integration) section for setup details.
|
|
72
|
+
|
|
73
|
+
## Example Application
|
|
74
|
+
|
|
75
|
+
For a complete, production-ready example showcasing all genai-lite capabilities, see the **[chat-demo](examples/chat-demo)** interactive web application. The demo includes:
|
|
76
|
+
- Multi-provider chat interface with all supported providers
|
|
77
|
+
- Template rendering and model presets
|
|
78
|
+
- llama.cpp utilities (tokenization, embeddings, health checks)
|
|
79
|
+
- Settings persistence, export/import features
|
|
80
|
+
|
|
81
|
+
The chat-demo serves as both a comprehensive showcase and a quick-test environment for library changes.
|
|
82
|
+
|
|
60
83
|
## API Key Management
|
|
61
84
|
|
|
62
85
|
genai-lite uses a flexible API key provider pattern. You can use the built-in environment variable provider or create your own:
|
|
@@ -124,6 +147,64 @@ const llmService = new LLMService(myKeyProvider);
|
|
|
124
147
|
- `codestral-2501` - Specialized for code generation
|
|
125
148
|
- `devstral-small-2505` - Compact development-focused model
|
|
126
149
|
|
|
150
|
+
### llama.cpp (Local Models)
|
|
151
|
+
|
|
152
|
+
Run models locally via [llama.cpp](https://github.com/ggml-org/llama.cpp) server. Model IDs can be any nameโthey're not validated since you load your own GGUF models.
|
|
153
|
+
|
|
154
|
+
**Example models:**
|
|
155
|
+
- `llama-3-8b-instruct` - Llama 3 8B Instruct
|
|
156
|
+
- `llama-3-70b-instruct` - Llama 3 70B Instruct
|
|
157
|
+
- `mistral-7b-instruct` - Mistral 7B Instruct
|
|
158
|
+
- `my-custom-model` - Any custom model you've loaded
|
|
159
|
+
|
|
160
|
+
**Setup:**
|
|
161
|
+
|
|
162
|
+
1. Start llama.cpp server with your model:
|
|
163
|
+
```bash
|
|
164
|
+
llama-server -m /path/to/model.gguf --port 8080
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
2. Use with genai-lite (no API key needed):
|
|
168
|
+
```typescript
|
|
169
|
+
import { LLMService } from 'genai-lite';
|
|
170
|
+
|
|
171
|
+
// API key can be any string for llama.cpp
|
|
172
|
+
const service = new LLMService(async () => 'not-needed');
|
|
173
|
+
|
|
174
|
+
const response = await service.sendMessage({
|
|
175
|
+
providerId: 'llamacpp',
|
|
176
|
+
modelId: 'llama-3-8b-instruct', // Must match your loaded model name
|
|
177
|
+
messages: [{ role: 'user', content: 'Hello!' }]
|
|
178
|
+
});
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
3. Configure server URL via environment variable:
|
|
182
|
+
```bash
|
|
183
|
+
export LLAMACPP_API_BASE_URL=http://localhost:8080
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
**Advanced features** - Access non-LLM endpoints:
|
|
187
|
+
|
|
188
|
+
```typescript
|
|
189
|
+
import { LlamaCppServerClient } from 'genai-lite';
|
|
190
|
+
|
|
191
|
+
const client = new LlamaCppServerClient('http://localhost:8080');
|
|
192
|
+
|
|
193
|
+
// Check server health
|
|
194
|
+
const health = await client.getHealth();
|
|
195
|
+
|
|
196
|
+
// Tokenize text
|
|
197
|
+
const { tokens } = await client.tokenize('Hello world');
|
|
198
|
+
|
|
199
|
+
// Generate embeddings
|
|
200
|
+
const { embedding } = await client.createEmbedding('Some text');
|
|
201
|
+
|
|
202
|
+
// Code completion
|
|
203
|
+
const result = await client.infill('def hello():\n', '\nprint("done")');
|
|
204
|
+
```
|
|
205
|
+
|
|
206
|
+
See the [llama.cpp Integration](#llamacpp-integration) section for details.
|
|
207
|
+
|
|
127
208
|
### Models with Reasoning Support
|
|
128
209
|
|
|
129
210
|
Some models include advanced reasoning/thinking capabilities that enhance their problem-solving abilities:
|
|
@@ -268,7 +349,7 @@ The `onMissing` property controls what happens when the expected thinking tag is
|
|
|
268
349
|
|
|
269
350
|
- `'ignore'`: Silently continue without the tag
|
|
270
351
|
- `'warn'`: Log a warning but continue processing
|
|
271
|
-
- `'error'`: Return an error response
|
|
352
|
+
- `'error'`: Return an error response with the original response preserved in `partialResponse`
|
|
272
353
|
- `'auto'` (default): Intelligently decide based on the model's native reasoning capabilities
|
|
273
354
|
|
|
274
355
|
**How `'auto'` Mode Works:**
|
|
@@ -290,6 +371,7 @@ const response = await llmService.sendMessage({
|
|
|
290
371
|
}
|
|
291
372
|
});
|
|
292
373
|
// Result: ERROR if <thinking> tag is missing (strict enforcement)
|
|
374
|
+
// The response is still accessible via errorResponse.partialResponse
|
|
293
375
|
|
|
294
376
|
// With native reasoning models (e.g., Claude with reasoning enabled)
|
|
295
377
|
const response = await llmService.sendMessage({
|
|
@@ -654,6 +736,10 @@ if (response.object === 'error') {
|
|
|
654
736
|
break;
|
|
655
737
|
case 'validation_error':
|
|
656
738
|
console.error('Invalid request:', response.error.message);
|
|
739
|
+
// For validation errors, the response may still be available
|
|
740
|
+
if (response.partialResponse) {
|
|
741
|
+
console.log('Partial response:', response.partialResponse.choices[0].message.content);
|
|
742
|
+
}
|
|
657
743
|
break;
|
|
658
744
|
default:
|
|
659
745
|
console.error('Error:', response.error.message);
|
|
@@ -661,6 +747,261 @@ if (response.object === 'error') {
|
|
|
661
747
|
}
|
|
662
748
|
```
|
|
663
749
|
|
|
750
|
+
## llama.cpp Integration
|
|
751
|
+
|
|
752
|
+
`genai-lite` provides comprehensive support for running local LLMs via [llama.cpp](https://github.com/ggml-org/llama.cpp) server, enabling completely offline AI capabilities with the same unified interface.
|
|
753
|
+
|
|
754
|
+
### Why llama.cpp?
|
|
755
|
+
|
|
756
|
+
- **Privacy**: All model inference runs locally on your hardware
|
|
757
|
+
- **Cost**: No API costs after initial model download
|
|
758
|
+
- **Control**: Use any GGUF model from Hugging Face
|
|
759
|
+
- **Performance**: Optimized C++ implementation with hardware acceleration
|
|
760
|
+
|
|
761
|
+
### Setup
|
|
762
|
+
|
|
763
|
+
#### 1. Install llama.cpp
|
|
764
|
+
|
|
765
|
+
```bash
|
|
766
|
+
# Clone and build llama.cpp
|
|
767
|
+
git clone https://github.com/ggml-org/llama.cpp
|
|
768
|
+
cd llama.cpp
|
|
769
|
+
make
|
|
770
|
+
|
|
771
|
+
# Or download pre-built binaries from releases
|
|
772
|
+
```
|
|
773
|
+
|
|
774
|
+
#### 2. Download a Model
|
|
775
|
+
|
|
776
|
+
Get GGUF models from Hugging Face, for example:
|
|
777
|
+
- [Meta-Llama-3.1-8B-Instruct-GGUF](https://huggingface.co/bartowski/Meta-Llama-3.1-8B-Instruct-GGUF)
|
|
778
|
+
- [Mistral-7B-Instruct-v0.3-GGUF](https://huggingface.co/bartowski/Mistral-7B-Instruct-v0.3-GGUF)
|
|
779
|
+
|
|
780
|
+
#### 3. Start the Server
|
|
781
|
+
|
|
782
|
+
```bash
|
|
783
|
+
# Basic usage
|
|
784
|
+
llama-server -m /path/to/model.gguf --port 8080
|
|
785
|
+
|
|
786
|
+
# With more options
|
|
787
|
+
llama-server -m /path/to/model.gguf \
|
|
788
|
+
--port 8080 \
|
|
789
|
+
-c 4096 \ # Context size
|
|
790
|
+
-np 4 \ # Parallel requests
|
|
791
|
+
--threads 8 # CPU threads
|
|
792
|
+
```
|
|
793
|
+
|
|
794
|
+
### Basic Usage
|
|
795
|
+
|
|
796
|
+
```typescript
|
|
797
|
+
import { LLMService } from 'genai-lite';
|
|
798
|
+
|
|
799
|
+
// llama.cpp doesn't need API keys
|
|
800
|
+
const service = new LLMService(async () => 'not-needed');
|
|
801
|
+
|
|
802
|
+
const response = await service.sendMessage({
|
|
803
|
+
providerId: 'llamacpp',
|
|
804
|
+
modelId: 'llama-3-8b-instruct', // Arbitrary name matching your model
|
|
805
|
+
messages: [
|
|
806
|
+
{ role: 'system', content: 'You are a helpful assistant.' },
|
|
807
|
+
{ role: 'user', content: 'Explain quantum computing in simple terms.' }
|
|
808
|
+
],
|
|
809
|
+
settings: {
|
|
810
|
+
temperature: 0.7,
|
|
811
|
+
maxTokens: 500
|
|
812
|
+
}
|
|
813
|
+
});
|
|
814
|
+
|
|
815
|
+
if (response.object === 'chat.completion') {
|
|
816
|
+
console.log(response.choices[0].message.content);
|
|
817
|
+
}
|
|
818
|
+
```
|
|
819
|
+
|
|
820
|
+
### Configuration
|
|
821
|
+
|
|
822
|
+
#### Environment Variable
|
|
823
|
+
|
|
824
|
+
Set the server URL via environment variable (default: `http://localhost:8080`):
|
|
825
|
+
|
|
826
|
+
```bash
|
|
827
|
+
export LLAMACPP_API_BASE_URL=http://localhost:8080
|
|
828
|
+
```
|
|
829
|
+
|
|
830
|
+
#### Multiple Servers
|
|
831
|
+
|
|
832
|
+
Register multiple llama.cpp instances for different models:
|
|
833
|
+
|
|
834
|
+
```typescript
|
|
835
|
+
import { LLMService, LlamaCppClientAdapter } from 'genai-lite';
|
|
836
|
+
|
|
837
|
+
const service = new LLMService(async () => 'not-needed');
|
|
838
|
+
|
|
839
|
+
// Register adapters for different servers/models
|
|
840
|
+
service.registerAdapter(
|
|
841
|
+
'llamacpp-small',
|
|
842
|
+
new LlamaCppClientAdapter({ baseURL: 'http://localhost:8080' })
|
|
843
|
+
);
|
|
844
|
+
|
|
845
|
+
service.registerAdapter(
|
|
846
|
+
'llamacpp-large',
|
|
847
|
+
new LlamaCppClientAdapter({ baseURL: 'http://localhost:8081' })
|
|
848
|
+
);
|
|
849
|
+
|
|
850
|
+
// Use them
|
|
851
|
+
const response = await service.sendMessage({
|
|
852
|
+
providerId: 'llamacpp-small',
|
|
853
|
+
modelId: 'llama-3-8b',
|
|
854
|
+
messages: [{ role: 'user', content: 'Hello!' }]
|
|
855
|
+
});
|
|
856
|
+
```
|
|
857
|
+
|
|
858
|
+
#### Health Checking
|
|
859
|
+
|
|
860
|
+
Enable automatic health checks before requests:
|
|
861
|
+
|
|
862
|
+
```typescript
|
|
863
|
+
import { LlamaCppClientAdapter } from 'genai-lite';
|
|
864
|
+
|
|
865
|
+
const adapter = new LlamaCppClientAdapter({
|
|
866
|
+
baseURL: 'http://localhost:8080',
|
|
867
|
+
checkHealth: true // Check server status before each request
|
|
868
|
+
});
|
|
869
|
+
|
|
870
|
+
service.registerAdapter('llamacpp', adapter);
|
|
871
|
+
```
|
|
872
|
+
|
|
873
|
+
### Advanced Features
|
|
874
|
+
|
|
875
|
+
#### Server Management
|
|
876
|
+
|
|
877
|
+
The `LlamaCppServerClient` class provides access to all llama.cpp server endpoints:
|
|
878
|
+
|
|
879
|
+
```typescript
|
|
880
|
+
import { LlamaCppServerClient } from 'genai-lite';
|
|
881
|
+
|
|
882
|
+
const client = new LlamaCppServerClient('http://localhost:8080');
|
|
883
|
+
|
|
884
|
+
// Health monitoring
|
|
885
|
+
const health = await client.getHealth();
|
|
886
|
+
console.log(health.status); // 'ok', 'loading', or 'error'
|
|
887
|
+
|
|
888
|
+
// Server properties
|
|
889
|
+
const props = await client.getProps();
|
|
890
|
+
console.log(props.total_slots); // Number of available slots
|
|
891
|
+
|
|
892
|
+
// Performance metrics (if enabled)
|
|
893
|
+
const metrics = await client.getMetrics();
|
|
894
|
+
```
|
|
895
|
+
|
|
896
|
+
#### Tokenization
|
|
897
|
+
|
|
898
|
+
```typescript
|
|
899
|
+
const client = new LlamaCppServerClient('http://localhost:8080');
|
|
900
|
+
|
|
901
|
+
// Tokenize text
|
|
902
|
+
const { tokens } = await client.tokenize('Hello, world!');
|
|
903
|
+
console.log(tokens); // [123, 456, 789]
|
|
904
|
+
|
|
905
|
+
// Count tokens before sending to LLM
|
|
906
|
+
const prompt = 'Long text...';
|
|
907
|
+
const { tokens: promptTokens } = await client.tokenize(prompt);
|
|
908
|
+
if (promptTokens.length > 4000) {
|
|
909
|
+
console.log('Prompt too long, truncating...');
|
|
910
|
+
}
|
|
911
|
+
|
|
912
|
+
// Detokenize back to text
|
|
913
|
+
const { content } = await client.detokenize([123, 456, 789]);
|
|
914
|
+
console.log(content); // 'Hello, world!'
|
|
915
|
+
```
|
|
916
|
+
|
|
917
|
+
#### Text Embeddings
|
|
918
|
+
|
|
919
|
+
```typescript
|
|
920
|
+
const client = new LlamaCppServerClient('http://localhost:8080');
|
|
921
|
+
|
|
922
|
+
// Generate embeddings for semantic search
|
|
923
|
+
const { embedding } = await client.createEmbedding('Search query text');
|
|
924
|
+
console.log(embedding.length); // e.g., 768 dimensions
|
|
925
|
+
|
|
926
|
+
// With images (for multimodal models)
|
|
927
|
+
const { embedding: multimodalEmbed } = await client.createEmbedding(
|
|
928
|
+
'Describe this image',
|
|
929
|
+
'base64_image_data_here'
|
|
930
|
+
);
|
|
931
|
+
```
|
|
932
|
+
|
|
933
|
+
#### Code Infilling
|
|
934
|
+
|
|
935
|
+
Perfect for code completion in IDEs:
|
|
936
|
+
|
|
937
|
+
```typescript
|
|
938
|
+
const client = new LlamaCppServerClient('http://localhost:8080');
|
|
939
|
+
|
|
940
|
+
const result = await client.infill(
|
|
941
|
+
'def calculate_fibonacci(n):\n ', // Prefix (before cursor)
|
|
942
|
+
'\n return result' // Suffix (after cursor)
|
|
943
|
+
);
|
|
944
|
+
|
|
945
|
+
console.log(result.content);
|
|
946
|
+
// Output: "if n <= 1:\n return n\n result = calculate_fibonacci(n-1) + calculate_fibonacci(n-2)"
|
|
947
|
+
```
|
|
948
|
+
|
|
949
|
+
### Error Handling
|
|
950
|
+
|
|
951
|
+
```typescript
|
|
952
|
+
const response = await service.sendMessage({
|
|
953
|
+
providerId: 'llamacpp',
|
|
954
|
+
modelId: 'my-model',
|
|
955
|
+
messages: [{ role: 'user', content: 'Hello' }]
|
|
956
|
+
});
|
|
957
|
+
|
|
958
|
+
if (response.object === 'error') {
|
|
959
|
+
switch (response.error.code) {
|
|
960
|
+
case 'NETWORK_ERROR':
|
|
961
|
+
console.error('Server not running or unreachable');
|
|
962
|
+
break;
|
|
963
|
+
case 'PROVIDER_ERROR':
|
|
964
|
+
console.error('Server error:', response.error.message);
|
|
965
|
+
break;
|
|
966
|
+
default:
|
|
967
|
+
console.error('Unknown error:', response.error);
|
|
968
|
+
}
|
|
969
|
+
}
|
|
970
|
+
```
|
|
971
|
+
|
|
972
|
+
### Best Practices
|
|
973
|
+
|
|
974
|
+
1. **Model Naming**: Use descriptive model IDs (e.g., `llama-3-8b-instruct`) since llama.cpp accepts any name
|
|
975
|
+
2. **Context Size**: Set appropriate context (`-c` flag) when starting the server
|
|
976
|
+
3. **Parallel Requests**: Configure slots (`-np`) based on your hardware
|
|
977
|
+
4. **Health Monitoring**: Enable `checkHealth` for production to detect server issues early
|
|
978
|
+
5. **Resource Management**: Monitor memory usage; large models need significant RAM
|
|
979
|
+
|
|
980
|
+
### Troubleshooting
|
|
981
|
+
|
|
982
|
+
**Server not responding:**
|
|
983
|
+
```bash
|
|
984
|
+
# Check if server is running
|
|
985
|
+
curl http://localhost:8080/health
|
|
986
|
+
|
|
987
|
+
# Should return: {"status":"ok"}
|
|
988
|
+
```
|
|
989
|
+
|
|
990
|
+
**Model loading errors:**
|
|
991
|
+
```bash
|
|
992
|
+
# Increase memory or reduce context size
|
|
993
|
+
llama-server -m model.gguf --port 8080 -c 2048
|
|
994
|
+
```
|
|
995
|
+
|
|
996
|
+
**Slow responses:**
|
|
997
|
+
```bash
|
|
998
|
+
# Use quantized models (smaller but faster)
|
|
999
|
+
# e.g., Q4_K_M, Q5_K_M instead of F16
|
|
1000
|
+
|
|
1001
|
+
# Increase threads
|
|
1002
|
+
llama-server -m model.gguf --threads 16
|
|
1003
|
+
```
|
|
1004
|
+
|
|
664
1005
|
## Using with Electron
|
|
665
1006
|
|
|
666
1007
|
`genai-lite` is designed to work seamlessly within an Electron application's main process, especially when paired with a secure storage solution like `genai-key-storage-lite`.
|
|
@@ -720,6 +1061,26 @@ import type {
|
|
|
720
1061
|
CreateMessagesResult,
|
|
721
1062
|
TemplateMetadata
|
|
722
1063
|
} from 'genai-lite';
|
|
1064
|
+
|
|
1065
|
+
// llama.cpp integration types and classes
|
|
1066
|
+
import {
|
|
1067
|
+
LlamaCppClientAdapter,
|
|
1068
|
+
LlamaCppServerClient,
|
|
1069
|
+
createFallbackModelInfo
|
|
1070
|
+
} from 'genai-lite';
|
|
1071
|
+
|
|
1072
|
+
import type {
|
|
1073
|
+
LlamaCppClientConfig,
|
|
1074
|
+
LlamaCppHealthResponse,
|
|
1075
|
+
LlamaCppTokenizeResponse,
|
|
1076
|
+
LlamaCppDetokenizeResponse,
|
|
1077
|
+
LlamaCppEmbeddingResponse,
|
|
1078
|
+
LlamaCppInfillResponse,
|
|
1079
|
+
LlamaCppPropsResponse,
|
|
1080
|
+
LlamaCppMetricsResponse,
|
|
1081
|
+
LlamaCppSlot,
|
|
1082
|
+
LlamaCppSlotsResponse
|
|
1083
|
+
} from 'genai-lite';
|
|
723
1084
|
```
|
|
724
1085
|
|
|
725
1086
|
## Utilities
|
|
@@ -1101,6 +1462,10 @@ These utilities enable:
|
|
|
1101
1462
|
- **Template Reusability**: Define templates once, use with different variables
|
|
1102
1463
|
- **Type Safety**: Full TypeScript support with LLMMessage types
|
|
1103
1464
|
|
|
1465
|
+
## Examples
|
|
1466
|
+
|
|
1467
|
+
See the **[chat-demo](examples/chat-demo)** application for a complete working example that demonstrates all library features in a production-ready React + Express application.
|
|
1468
|
+
|
|
1104
1469
|
## Contributing
|
|
1105
1470
|
|
|
1106
1471
|
Contributions are welcome! Please feel free to submit a Pull Request. For major changes, please open an issue first to discuss what you would like to change.
|
package/dist/index.d.ts
CHANGED
|
@@ -5,7 +5,12 @@ export type { ModelPreset } from "./types/presets";
|
|
|
5
5
|
export * from "./llm/types";
|
|
6
6
|
export * from "./llm/clients/types";
|
|
7
7
|
export { fromEnvironment } from "./providers/fromEnvironment";
|
|
8
|
+
export { LlamaCppClientAdapter } from "./llm/clients/LlamaCppClientAdapter";
|
|
9
|
+
export { LlamaCppServerClient } from "./llm/clients/LlamaCppServerClient";
|
|
10
|
+
export type { LlamaCppClientConfig, } from "./llm/clients/LlamaCppClientAdapter";
|
|
11
|
+
export type { LlamaCppHealthResponse, LlamaCppTokenizeResponse, LlamaCppDetokenizeResponse, LlamaCppEmbeddingResponse, LlamaCppInfillResponse, LlamaCppPropsResponse, LlamaCppMetricsResponse, LlamaCppSlot, LlamaCppSlotsResponse, } from "./llm/clients/LlamaCppServerClient";
|
|
8
12
|
export { renderTemplate } from "./prompting/template";
|
|
9
13
|
export { countTokens, getSmartPreview, extractRandomVariables } from "./prompting/content";
|
|
10
14
|
export { parseStructuredContent, parseRoleTags, extractInitialTaggedContent, parseTemplateWithMetadata } from "./prompting/parser";
|
|
11
15
|
export type { TemplateMetadata } from "./prompting/parser";
|
|
16
|
+
export { createFallbackModelInfo } from "./llm/config";
|
package/dist/index.js
CHANGED
|
@@ -14,7 +14,7 @@ var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
|
|
14
14
|
for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
|
|
15
15
|
};
|
|
16
16
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
17
|
-
exports.parseTemplateWithMetadata = exports.extractInitialTaggedContent = exports.parseRoleTags = exports.parseStructuredContent = exports.extractRandomVariables = exports.getSmartPreview = exports.countTokens = exports.renderTemplate = exports.fromEnvironment = exports.LLMService = void 0;
|
|
17
|
+
exports.createFallbackModelInfo = exports.parseTemplateWithMetadata = exports.extractInitialTaggedContent = exports.parseRoleTags = exports.parseStructuredContent = exports.extractRandomVariables = exports.getSmartPreview = exports.countTokens = exports.renderTemplate = exports.LlamaCppServerClient = exports.LlamaCppClientAdapter = exports.fromEnvironment = exports.LLMService = void 0;
|
|
18
18
|
// --- LLM Service ---
|
|
19
19
|
var LLMService_1 = require("./llm/LLMService");
|
|
20
20
|
Object.defineProperty(exports, "LLMService", { enumerable: true, get: function () { return LLMService_1.LLMService; } });
|
|
@@ -25,6 +25,11 @@ __exportStar(require("./llm/clients/types"), exports);
|
|
|
25
25
|
// --- API Key Providers ---
|
|
26
26
|
var fromEnvironment_1 = require("./providers/fromEnvironment");
|
|
27
27
|
Object.defineProperty(exports, "fromEnvironment", { enumerable: true, get: function () { return fromEnvironment_1.fromEnvironment; } });
|
|
28
|
+
// --- llama.cpp Integration ---
|
|
29
|
+
var LlamaCppClientAdapter_1 = require("./llm/clients/LlamaCppClientAdapter");
|
|
30
|
+
Object.defineProperty(exports, "LlamaCppClientAdapter", { enumerable: true, get: function () { return LlamaCppClientAdapter_1.LlamaCppClientAdapter; } });
|
|
31
|
+
var LlamaCppServerClient_1 = require("./llm/clients/LlamaCppServerClient");
|
|
32
|
+
Object.defineProperty(exports, "LlamaCppServerClient", { enumerable: true, get: function () { return LlamaCppServerClient_1.LlamaCppServerClient; } });
|
|
28
33
|
// --- Utilities ---
|
|
29
34
|
var template_1 = require("./prompting/template");
|
|
30
35
|
Object.defineProperty(exports, "renderTemplate", { enumerable: true, get: function () { return template_1.renderTemplate; } });
|
|
@@ -37,3 +42,5 @@ Object.defineProperty(exports, "parseStructuredContent", { enumerable: true, get
|
|
|
37
42
|
Object.defineProperty(exports, "parseRoleTags", { enumerable: true, get: function () { return parser_1.parseRoleTags; } });
|
|
38
43
|
Object.defineProperty(exports, "extractInitialTaggedContent", { enumerable: true, get: function () { return parser_1.extractInitialTaggedContent; } });
|
|
39
44
|
Object.defineProperty(exports, "parseTemplateWithMetadata", { enumerable: true, get: function () { return parser_1.parseTemplateWithMetadata; } });
|
|
45
|
+
var config_1 = require("./llm/config");
|
|
46
|
+
Object.defineProperty(exports, "createFallbackModelInfo", { enumerable: true, get: function () { return config_1.createFallbackModelInfo; } });
|
package/dist/llm/LLMService.js
CHANGED
|
@@ -193,6 +193,14 @@ class LLMService {
|
|
|
193
193
|
type: "validation_error",
|
|
194
194
|
},
|
|
195
195
|
object: "error",
|
|
196
|
+
partialResponse: {
|
|
197
|
+
id: result.id,
|
|
198
|
+
provider: result.provider,
|
|
199
|
+
model: result.model,
|
|
200
|
+
created: result.created,
|
|
201
|
+
choices: result.choices,
|
|
202
|
+
usage: result.usage
|
|
203
|
+
}
|
|
196
204
|
};
|
|
197
205
|
}
|
|
198
206
|
else if (effectiveOnMissing === 'warn') {
|
|
@@ -44,17 +44,34 @@ describe('LLMService', () => {
|
|
|
44
44
|
expect(errorResponse.error.code).toBe('UNSUPPORTED_PROVIDER');
|
|
45
45
|
expect(errorResponse.error.message).toContain('Unsupported provider');
|
|
46
46
|
});
|
|
47
|
-
it('should
|
|
47
|
+
it('should succeed with fallback for unknown model', async () => {
|
|
48
48
|
const request = {
|
|
49
|
-
providerId: '
|
|
49
|
+
providerId: 'mock', // Use mock provider to avoid real API calls
|
|
50
50
|
modelId: 'unsupported-model',
|
|
51
51
|
messages: [{ role: 'user', content: 'Hello' }]
|
|
52
52
|
};
|
|
53
53
|
const response = await service.sendMessage(request);
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
54
|
+
// Should succeed with mock response (not error) even for unknown model
|
|
55
|
+
expect(response.object).toBe('chat.completion');
|
|
56
|
+
});
|
|
57
|
+
it('should silently work with flexible providers unknown models (no warning)', async () => {
|
|
58
|
+
const warnings = [];
|
|
59
|
+
const consoleWarnSpy = jest.spyOn(console, 'warn').mockImplementation((msg) => {
|
|
60
|
+
warnings.push(msg);
|
|
61
|
+
});
|
|
62
|
+
// Test with mock provider (which has allowUnknownModels: true)
|
|
63
|
+
const request = {
|
|
64
|
+
providerId: 'mock',
|
|
65
|
+
modelId: 'totally-unknown-model-xyz',
|
|
66
|
+
messages: [{ role: 'user', content: 'Testing flexible provider' }]
|
|
67
|
+
};
|
|
68
|
+
const response = await service.sendMessage(request);
|
|
69
|
+
// Should succeed with mock response
|
|
70
|
+
expect(response.object).toBe('chat.completion');
|
|
71
|
+
// Should NOT warn about unknown model (filter out adapter constructor warnings)
|
|
72
|
+
const unknownModelWarnings = warnings.filter(w => !w.includes('No adapter constructor'));
|
|
73
|
+
expect(unknownModelWarnings.length).toBe(0); // No warnings for flexible providers
|
|
74
|
+
consoleWarnSpy.mockRestore();
|
|
58
75
|
});
|
|
59
76
|
it('should return validation error for empty messages', async () => {
|
|
60
77
|
const request = {
|
|
@@ -160,8 +177,8 @@ describe('LLMService', () => {
|
|
|
160
177
|
// Second request to same provider
|
|
161
178
|
request.messages = [{ role: 'user', content: 'Second request' }];
|
|
162
179
|
await service.sendMessage(request);
|
|
163
|
-
// API key provider should be called
|
|
164
|
-
expect(mockApiKeyProvider).toHaveBeenCalledTimes(
|
|
180
|
+
// API key provider should be called once per unique provider (mock provider now registered)
|
|
181
|
+
expect(mockApiKeyProvider).toHaveBeenCalledTimes(2);
|
|
165
182
|
});
|
|
166
183
|
});
|
|
167
184
|
describe('settings management', () => {
|
|
@@ -325,11 +342,13 @@ describe('LLMService', () => {
|
|
|
325
342
|
describe('getProviders', () => {
|
|
326
343
|
it('should return all supported providers', async () => {
|
|
327
344
|
const providers = await service.getProviders();
|
|
328
|
-
expect(providers).toHaveLength(
|
|
345
|
+
expect(providers).toHaveLength(6);
|
|
329
346
|
expect(providers.find(p => p.id === 'openai')).toBeDefined();
|
|
330
347
|
expect(providers.find(p => p.id === 'anthropic')).toBeDefined();
|
|
331
348
|
expect(providers.find(p => p.id === 'gemini')).toBeDefined();
|
|
332
349
|
expect(providers.find(p => p.id === 'mistral')).toBeDefined();
|
|
350
|
+
expect(providers.find(p => p.id === 'llamacpp')).toBeDefined();
|
|
351
|
+
expect(providers.find(p => p.id === 'mock')).toBeDefined();
|
|
333
352
|
});
|
|
334
353
|
it('should include provider metadata', async () => {
|
|
335
354
|
const providers = await service.getProviders();
|
|
@@ -488,6 +507,9 @@ describe('LLMService', () => {
|
|
|
488
507
|
expect(errorResponse.error.type).toBe('validation_error');
|
|
489
508
|
expect(errorResponse.error.message).toContain('response was expected to start with a <thinking> tag');
|
|
490
509
|
expect(errorResponse.error.message).toContain('does not have native reasoning active');
|
|
510
|
+
// Check that partial response is included
|
|
511
|
+
expect(errorResponse.partialResponse).toBeDefined();
|
|
512
|
+
expect(errorResponse.partialResponse.choices[0].message.content).toBe('Response without thinking tag.');
|
|
491
513
|
});
|
|
492
514
|
it('should handle missing tag for non-reasoning model with warn', async () => {
|
|
493
515
|
const consoleSpy = jest.spyOn(console, 'warn').mockImplementation();
|
|
@@ -509,6 +531,27 @@ describe('LLMService', () => {
|
|
|
509
531
|
expect(consoleSpy).toHaveBeenCalledWith(expect.stringContaining('Expected <thinking> tag was not found'));
|
|
510
532
|
consoleSpy.mockRestore();
|
|
511
533
|
});
|
|
534
|
+
it('should handle missing tag with explicit error mode', async () => {
|
|
535
|
+
const request = {
|
|
536
|
+
providerId: 'mistral',
|
|
537
|
+
modelId: 'codestral-2501',
|
|
538
|
+
messages: [{ role: 'user', content: 'test_thinking:Response without thinking tag.' }],
|
|
539
|
+
settings: {
|
|
540
|
+
thinkingExtraction: {
|
|
541
|
+
enabled: true,
|
|
542
|
+
onMissing: 'error' // Explicitly set to error
|
|
543
|
+
}
|
|
544
|
+
}
|
|
545
|
+
};
|
|
546
|
+
const response = await service.sendMessage(request);
|
|
547
|
+
expect(response.object).toBe('error');
|
|
548
|
+
const errorResponse = response;
|
|
549
|
+
expect(errorResponse.error.code).toBe('MISSING_EXPECTED_TAG');
|
|
550
|
+
expect(errorResponse.error.message).toContain('response was expected to start with a <thinking> tag');
|
|
551
|
+
// Check that partial response is included
|
|
552
|
+
expect(errorResponse.partialResponse).toBeDefined();
|
|
553
|
+
expect(errorResponse.partialResponse.choices[0].message.content).toBe('Response without thinking tag.');
|
|
554
|
+
});
|
|
512
555
|
it('should handle missing tag for non-reasoning model with ignore', async () => {
|
|
513
556
|
const request = {
|
|
514
557
|
providerId: 'mistral',
|
|
@@ -543,6 +586,8 @@ describe('LLMService', () => {
|
|
|
543
586
|
expect(response.object).toBe('error');
|
|
544
587
|
const errorResponse = response;
|
|
545
588
|
expect(errorResponse.error.message).toContain('expected to start with a <reasoning> tag');
|
|
589
|
+
expect(errorResponse.partialResponse).toBeDefined();
|
|
590
|
+
expect(errorResponse.partialResponse.choices[0].message.content).toBe('Response without custom tag.');
|
|
546
591
|
});
|
|
547
592
|
describe('auto mode with native reasoning detection', () => {
|
|
548
593
|
it('should enforce thinking tags for non-reasoning models by default', async () => {
|
|
@@ -564,6 +609,8 @@ describe('LLMService', () => {
|
|
|
564
609
|
const errorResponse = response;
|
|
565
610
|
expect(errorResponse.error.code).toBe('MISSING_EXPECTED_TAG');
|
|
566
611
|
expect(errorResponse.error.message).toContain('does not have native reasoning active');
|
|
612
|
+
expect(errorResponse.partialResponse).toBeDefined();
|
|
613
|
+
expect(errorResponse.partialResponse.choices[0].message.content).toBe('Response without thinking tag.');
|
|
567
614
|
});
|
|
568
615
|
it('should respect explicit reasoning.enabled: false even for models with enabledByDefault', async () => {
|
|
569
616
|
// This is the key test for the fix
|
|
@@ -584,6 +631,7 @@ describe('LLMService', () => {
|
|
|
584
631
|
expect(response.object).toBe('error');
|
|
585
632
|
const errorResponse = response;
|
|
586
633
|
expect(errorResponse.error.code).toBe('MISSING_EXPECTED_TAG');
|
|
634
|
+
expect(errorResponse.partialResponse).toBeDefined();
|
|
587
635
|
});
|
|
588
636
|
});
|
|
589
637
|
});
|