genai-lite 0.3.2 โ†’ 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,10 +1,11 @@
1
1
  # genai-lite
2
2
 
3
- A lightweight, portable Node.js/TypeScript library providing a unified interface for interacting with multiple Generative AI providers (OpenAI, Anthropic, Google Gemini, Mistral, and more).
3
+ A lightweight, portable Node.js/TypeScript library providing a unified interface for interacting with multiple Generative AI providersโ€”both cloud-based (OpenAI, Anthropic, Google Gemini, Mistral) and local (llama.cpp).
4
4
 
5
5
  ## Features
6
6
 
7
7
  - ๐Ÿ”Œ **Unified API** - Single interface for multiple AI providers
8
+ - ๐Ÿ  **Local & Cloud Models** - Run models locally with llama.cpp or use cloud APIs
8
9
  - ๐Ÿ” **Flexible API Key Management** - Bring your own key storage solution
9
10
  - ๐Ÿ“ฆ **Zero Electron Dependencies** - Works in any Node.js environment
10
11
  - ๐ŸŽฏ **TypeScript First** - Full type safety and IntelliSense support
@@ -21,13 +22,14 @@ npm install genai-lite
21
22
 
22
23
  ## Quick Start
23
24
 
25
+ ### Cloud Providers (OpenAI, Anthropic, Gemini, Mistral)
26
+
24
27
  ```typescript
25
28
  import { LLMService, fromEnvironment } from 'genai-lite';
26
29
 
27
30
  // Create service with environment variable API key provider
28
31
  const llmService = new LLMService(fromEnvironment);
29
32
 
30
- // Option 1: Direct message sending
31
33
  const response = await llmService.sendMessage({
32
34
  providerId: 'openai',
33
35
  modelId: 'gpt-4.1-mini',
@@ -37,26 +39,47 @@ const response = await llmService.sendMessage({
37
39
  ]
38
40
  });
39
41
 
40
- // Option 2: Create messages from template (recommended for complex prompts)
41
- const { messages } = await llmService.createMessages({
42
- template: '<SYSTEM>You are a helpful assistant.</SYSTEM><USER>Hello, how are you?</USER>',
43
- providerId: 'openai',
44
- modelId: 'gpt-4.1-mini'
45
- });
42
+ if (response.object === 'chat.completion') {
43
+ console.log(response.choices[0].message.content);
44
+ } else {
45
+ console.error('Error:', response.error.message);
46
+ }
47
+ ```
46
48
 
47
- const response2 = await llmService.sendMessage({
48
- providerId: 'openai',
49
- modelId: 'gpt-4.1-mini',
50
- messages
49
+ ### Local Models (llama.cpp)
50
+
51
+ ```typescript
52
+ import { LLMService } from 'genai-lite';
53
+
54
+ // Start llama.cpp server first: llama-server -m /path/to/model.gguf --port 8080
55
+ const llmService = new LLMService(async () => 'not-needed');
56
+
57
+ const response = await llmService.sendMessage({
58
+ providerId: 'llamacpp',
59
+ modelId: 'llama-3-8b-instruct', // Must match your loaded model
60
+ messages: [
61
+ { role: 'system', content: 'You are a helpful assistant.' },
62
+ { role: 'user', content: 'Explain quantum computing briefly.' }
63
+ ]
51
64
  });
52
65
 
53
66
  if (response.object === 'chat.completion') {
54
67
  console.log(response.choices[0].message.content);
55
- } else {
56
- console.error('Error:', response.error.message);
57
68
  }
58
69
  ```
59
70
 
71
+ See the [llama.cpp Integration](#llamacpp-integration) section for setup details.
72
+
73
+ ## Example Application
74
+
75
+ For a complete, production-ready example showcasing all genai-lite capabilities, see the **[chat-demo](examples/chat-demo)** interactive web application. The demo includes:
76
+ - Multi-provider chat interface with all supported providers
77
+ - Template rendering and model presets
78
+ - llama.cpp utilities (tokenization, embeddings, health checks)
79
+ - Settings persistence, export/import features
80
+
81
+ The chat-demo serves as both a comprehensive showcase and a quick-test environment for library changes.
82
+
60
83
  ## API Key Management
61
84
 
62
85
  genai-lite uses a flexible API key provider pattern. You can use the built-in environment variable provider or create your own:
@@ -124,6 +147,64 @@ const llmService = new LLMService(myKeyProvider);
124
147
  - `codestral-2501` - Specialized for code generation
125
148
  - `devstral-small-2505` - Compact development-focused model
126
149
 
150
+ ### llama.cpp (Local Models)
151
+
152
+ Run models locally via [llama.cpp](https://github.com/ggml-org/llama.cpp) server. Model IDs can be any nameโ€”they're not validated since you load your own GGUF models.
153
+
154
+ **Example models:**
155
+ - `llama-3-8b-instruct` - Llama 3 8B Instruct
156
+ - `llama-3-70b-instruct` - Llama 3 70B Instruct
157
+ - `mistral-7b-instruct` - Mistral 7B Instruct
158
+ - `my-custom-model` - Any custom model you've loaded
159
+
160
+ **Setup:**
161
+
162
+ 1. Start llama.cpp server with your model:
163
+ ```bash
164
+ llama-server -m /path/to/model.gguf --port 8080
165
+ ```
166
+
167
+ 2. Use with genai-lite (no API key needed):
168
+ ```typescript
169
+ import { LLMService } from 'genai-lite';
170
+
171
+ // API key can be any string for llama.cpp
172
+ const service = new LLMService(async () => 'not-needed');
173
+
174
+ const response = await service.sendMessage({
175
+ providerId: 'llamacpp',
176
+ modelId: 'llama-3-8b-instruct', // Must match your loaded model name
177
+ messages: [{ role: 'user', content: 'Hello!' }]
178
+ });
179
+ ```
180
+
181
+ 3. Configure server URL via environment variable:
182
+ ```bash
183
+ export LLAMACPP_API_BASE_URL=http://localhost:8080
184
+ ```
185
+
186
+ **Advanced features** - Access non-LLM endpoints:
187
+
188
+ ```typescript
189
+ import { LlamaCppServerClient } from 'genai-lite';
190
+
191
+ const client = new LlamaCppServerClient('http://localhost:8080');
192
+
193
+ // Check server health
194
+ const health = await client.getHealth();
195
+
196
+ // Tokenize text
197
+ const { tokens } = await client.tokenize('Hello world');
198
+
199
+ // Generate embeddings
200
+ const { embedding } = await client.createEmbedding('Some text');
201
+
202
+ // Code completion
203
+ const result = await client.infill('def hello():\n', '\nprint("done")');
204
+ ```
205
+
206
+ See the [llama.cpp Integration](#llamacpp-integration) section for details.
207
+
127
208
  ### Models with Reasoning Support
128
209
 
129
210
  Some models include advanced reasoning/thinking capabilities that enhance their problem-solving abilities:
@@ -268,7 +349,7 @@ The `onMissing` property controls what happens when the expected thinking tag is
268
349
 
269
350
  - `'ignore'`: Silently continue without the tag
270
351
  - `'warn'`: Log a warning but continue processing
271
- - `'error'`: Return an error response
352
+ - `'error'`: Return an error response with the original response preserved in `partialResponse`
272
353
  - `'auto'` (default): Intelligently decide based on the model's native reasoning capabilities
273
354
 
274
355
  **How `'auto'` Mode Works:**
@@ -290,6 +371,7 @@ const response = await llmService.sendMessage({
290
371
  }
291
372
  });
292
373
  // Result: ERROR if <thinking> tag is missing (strict enforcement)
374
+ // The response is still accessible via errorResponse.partialResponse
293
375
 
294
376
  // With native reasoning models (e.g., Claude with reasoning enabled)
295
377
  const response = await llmService.sendMessage({
@@ -654,6 +736,10 @@ if (response.object === 'error') {
654
736
  break;
655
737
  case 'validation_error':
656
738
  console.error('Invalid request:', response.error.message);
739
+ // For validation errors, the response may still be available
740
+ if (response.partialResponse) {
741
+ console.log('Partial response:', response.partialResponse.choices[0].message.content);
742
+ }
657
743
  break;
658
744
  default:
659
745
  console.error('Error:', response.error.message);
@@ -661,6 +747,261 @@ if (response.object === 'error') {
661
747
  }
662
748
  ```
663
749
 
750
+ ## llama.cpp Integration
751
+
752
+ `genai-lite` provides comprehensive support for running local LLMs via [llama.cpp](https://github.com/ggml-org/llama.cpp) server, enabling completely offline AI capabilities with the same unified interface.
753
+
754
+ ### Why llama.cpp?
755
+
756
+ - **Privacy**: All model inference runs locally on your hardware
757
+ - **Cost**: No API costs after initial model download
758
+ - **Control**: Use any GGUF model from Hugging Face
759
+ - **Performance**: Optimized C++ implementation with hardware acceleration
760
+
761
+ ### Setup
762
+
763
+ #### 1. Install llama.cpp
764
+
765
+ ```bash
766
+ # Clone and build llama.cpp
767
+ git clone https://github.com/ggml-org/llama.cpp
768
+ cd llama.cpp
769
+ make
770
+
771
+ # Or download pre-built binaries from releases
772
+ ```
773
+
774
+ #### 2. Download a Model
775
+
776
+ Get GGUF models from Hugging Face, for example:
777
+ - [Meta-Llama-3.1-8B-Instruct-GGUF](https://huggingface.co/bartowski/Meta-Llama-3.1-8B-Instruct-GGUF)
778
+ - [Mistral-7B-Instruct-v0.3-GGUF](https://huggingface.co/bartowski/Mistral-7B-Instruct-v0.3-GGUF)
779
+
780
+ #### 3. Start the Server
781
+
782
+ ```bash
783
+ # Basic usage
784
+ llama-server -m /path/to/model.gguf --port 8080
785
+
786
+ # With more options
787
+ llama-server -m /path/to/model.gguf \
788
+ --port 8080 \
789
+ -c 4096 \ # Context size
790
+ -np 4 \ # Parallel requests
791
+ --threads 8 # CPU threads
792
+ ```
793
+
794
+ ### Basic Usage
795
+
796
+ ```typescript
797
+ import { LLMService } from 'genai-lite';
798
+
799
+ // llama.cpp doesn't need API keys
800
+ const service = new LLMService(async () => 'not-needed');
801
+
802
+ const response = await service.sendMessage({
803
+ providerId: 'llamacpp',
804
+ modelId: 'llama-3-8b-instruct', // Arbitrary name matching your model
805
+ messages: [
806
+ { role: 'system', content: 'You are a helpful assistant.' },
807
+ { role: 'user', content: 'Explain quantum computing in simple terms.' }
808
+ ],
809
+ settings: {
810
+ temperature: 0.7,
811
+ maxTokens: 500
812
+ }
813
+ });
814
+
815
+ if (response.object === 'chat.completion') {
816
+ console.log(response.choices[0].message.content);
817
+ }
818
+ ```
819
+
820
+ ### Configuration
821
+
822
+ #### Environment Variable
823
+
824
+ Set the server URL via environment variable (default: `http://localhost:8080`):
825
+
826
+ ```bash
827
+ export LLAMACPP_API_BASE_URL=http://localhost:8080
828
+ ```
829
+
830
+ #### Multiple Servers
831
+
832
+ Register multiple llama.cpp instances for different models:
833
+
834
+ ```typescript
835
+ import { LLMService, LlamaCppClientAdapter } from 'genai-lite';
836
+
837
+ const service = new LLMService(async () => 'not-needed');
838
+
839
+ // Register adapters for different servers/models
840
+ service.registerAdapter(
841
+ 'llamacpp-small',
842
+ new LlamaCppClientAdapter({ baseURL: 'http://localhost:8080' })
843
+ );
844
+
845
+ service.registerAdapter(
846
+ 'llamacpp-large',
847
+ new LlamaCppClientAdapter({ baseURL: 'http://localhost:8081' })
848
+ );
849
+
850
+ // Use them
851
+ const response = await service.sendMessage({
852
+ providerId: 'llamacpp-small',
853
+ modelId: 'llama-3-8b',
854
+ messages: [{ role: 'user', content: 'Hello!' }]
855
+ });
856
+ ```
857
+
858
+ #### Health Checking
859
+
860
+ Enable automatic health checks before requests:
861
+
862
+ ```typescript
863
+ import { LlamaCppClientAdapter } from 'genai-lite';
864
+
865
+ const adapter = new LlamaCppClientAdapter({
866
+ baseURL: 'http://localhost:8080',
867
+ checkHealth: true // Check server status before each request
868
+ });
869
+
870
+ service.registerAdapter('llamacpp', adapter);
871
+ ```
872
+
873
+ ### Advanced Features
874
+
875
+ #### Server Management
876
+
877
+ The `LlamaCppServerClient` class provides access to all llama.cpp server endpoints:
878
+
879
+ ```typescript
880
+ import { LlamaCppServerClient } from 'genai-lite';
881
+
882
+ const client = new LlamaCppServerClient('http://localhost:8080');
883
+
884
+ // Health monitoring
885
+ const health = await client.getHealth();
886
+ console.log(health.status); // 'ok', 'loading', or 'error'
887
+
888
+ // Server properties
889
+ const props = await client.getProps();
890
+ console.log(props.total_slots); // Number of available slots
891
+
892
+ // Performance metrics (if enabled)
893
+ const metrics = await client.getMetrics();
894
+ ```
895
+
896
+ #### Tokenization
897
+
898
+ ```typescript
899
+ const client = new LlamaCppServerClient('http://localhost:8080');
900
+
901
+ // Tokenize text
902
+ const { tokens } = await client.tokenize('Hello, world!');
903
+ console.log(tokens); // [123, 456, 789]
904
+
905
+ // Count tokens before sending to LLM
906
+ const prompt = 'Long text...';
907
+ const { tokens: promptTokens } = await client.tokenize(prompt);
908
+ if (promptTokens.length > 4000) {
909
+ console.log('Prompt too long, truncating...');
910
+ }
911
+
912
+ // Detokenize back to text
913
+ const { content } = await client.detokenize([123, 456, 789]);
914
+ console.log(content); // 'Hello, world!'
915
+ ```
916
+
917
+ #### Text Embeddings
918
+
919
+ ```typescript
920
+ const client = new LlamaCppServerClient('http://localhost:8080');
921
+
922
+ // Generate embeddings for semantic search
923
+ const { embedding } = await client.createEmbedding('Search query text');
924
+ console.log(embedding.length); // e.g., 768 dimensions
925
+
926
+ // With images (for multimodal models)
927
+ const { embedding: multimodalEmbed } = await client.createEmbedding(
928
+ 'Describe this image',
929
+ 'base64_image_data_here'
930
+ );
931
+ ```
932
+
933
+ #### Code Infilling
934
+
935
+ Perfect for code completion in IDEs:
936
+
937
+ ```typescript
938
+ const client = new LlamaCppServerClient('http://localhost:8080');
939
+
940
+ const result = await client.infill(
941
+ 'def calculate_fibonacci(n):\n ', // Prefix (before cursor)
942
+ '\n return result' // Suffix (after cursor)
943
+ );
944
+
945
+ console.log(result.content);
946
+ // Output: "if n <= 1:\n return n\n result = calculate_fibonacci(n-1) + calculate_fibonacci(n-2)"
947
+ ```
948
+
949
+ ### Error Handling
950
+
951
+ ```typescript
952
+ const response = await service.sendMessage({
953
+ providerId: 'llamacpp',
954
+ modelId: 'my-model',
955
+ messages: [{ role: 'user', content: 'Hello' }]
956
+ });
957
+
958
+ if (response.object === 'error') {
959
+ switch (response.error.code) {
960
+ case 'NETWORK_ERROR':
961
+ console.error('Server not running or unreachable');
962
+ break;
963
+ case 'PROVIDER_ERROR':
964
+ console.error('Server error:', response.error.message);
965
+ break;
966
+ default:
967
+ console.error('Unknown error:', response.error);
968
+ }
969
+ }
970
+ ```
971
+
972
+ ### Best Practices
973
+
974
+ 1. **Model Naming**: Use descriptive model IDs (e.g., `llama-3-8b-instruct`) since llama.cpp accepts any name
975
+ 2. **Context Size**: Set appropriate context (`-c` flag) when starting the server
976
+ 3. **Parallel Requests**: Configure slots (`-np`) based on your hardware
977
+ 4. **Health Monitoring**: Enable `checkHealth` for production to detect server issues early
978
+ 5. **Resource Management**: Monitor memory usage; large models need significant RAM
979
+
980
+ ### Troubleshooting
981
+
982
+ **Server not responding:**
983
+ ```bash
984
+ # Check if server is running
985
+ curl http://localhost:8080/health
986
+
987
+ # Should return: {"status":"ok"}
988
+ ```
989
+
990
+ **Model loading errors:**
991
+ ```bash
992
+ # Increase memory or reduce context size
993
+ llama-server -m model.gguf --port 8080 -c 2048
994
+ ```
995
+
996
+ **Slow responses:**
997
+ ```bash
998
+ # Use quantized models (smaller but faster)
999
+ # e.g., Q4_K_M, Q5_K_M instead of F16
1000
+
1001
+ # Increase threads
1002
+ llama-server -m model.gguf --threads 16
1003
+ ```
1004
+
664
1005
  ## Using with Electron
665
1006
 
666
1007
  `genai-lite` is designed to work seamlessly within an Electron application's main process, especially when paired with a secure storage solution like `genai-key-storage-lite`.
@@ -720,6 +1061,26 @@ import type {
720
1061
  CreateMessagesResult,
721
1062
  TemplateMetadata
722
1063
  } from 'genai-lite';
1064
+
1065
+ // llama.cpp integration types and classes
1066
+ import {
1067
+ LlamaCppClientAdapter,
1068
+ LlamaCppServerClient,
1069
+ createFallbackModelInfo
1070
+ } from 'genai-lite';
1071
+
1072
+ import type {
1073
+ LlamaCppClientConfig,
1074
+ LlamaCppHealthResponse,
1075
+ LlamaCppTokenizeResponse,
1076
+ LlamaCppDetokenizeResponse,
1077
+ LlamaCppEmbeddingResponse,
1078
+ LlamaCppInfillResponse,
1079
+ LlamaCppPropsResponse,
1080
+ LlamaCppMetricsResponse,
1081
+ LlamaCppSlot,
1082
+ LlamaCppSlotsResponse
1083
+ } from 'genai-lite';
723
1084
  ```
724
1085
 
725
1086
  ## Utilities
@@ -1101,6 +1462,10 @@ These utilities enable:
1101
1462
  - **Template Reusability**: Define templates once, use with different variables
1102
1463
  - **Type Safety**: Full TypeScript support with LLMMessage types
1103
1464
 
1465
+ ## Examples
1466
+
1467
+ See the **[chat-demo](examples/chat-demo)** application for a complete working example that demonstrates all library features in a production-ready React + Express application.
1468
+
1104
1469
  ## Contributing
1105
1470
 
1106
1471
  Contributions are welcome! Please feel free to submit a Pull Request. For major changes, please open an issue first to discuss what you would like to change.
package/dist/index.d.ts CHANGED
@@ -5,7 +5,12 @@ export type { ModelPreset } from "./types/presets";
5
5
  export * from "./llm/types";
6
6
  export * from "./llm/clients/types";
7
7
  export { fromEnvironment } from "./providers/fromEnvironment";
8
+ export { LlamaCppClientAdapter } from "./llm/clients/LlamaCppClientAdapter";
9
+ export { LlamaCppServerClient } from "./llm/clients/LlamaCppServerClient";
10
+ export type { LlamaCppClientConfig, } from "./llm/clients/LlamaCppClientAdapter";
11
+ export type { LlamaCppHealthResponse, LlamaCppTokenizeResponse, LlamaCppDetokenizeResponse, LlamaCppEmbeddingResponse, LlamaCppInfillResponse, LlamaCppPropsResponse, LlamaCppMetricsResponse, LlamaCppSlot, LlamaCppSlotsResponse, } from "./llm/clients/LlamaCppServerClient";
8
12
  export { renderTemplate } from "./prompting/template";
9
13
  export { countTokens, getSmartPreview, extractRandomVariables } from "./prompting/content";
10
14
  export { parseStructuredContent, parseRoleTags, extractInitialTaggedContent, parseTemplateWithMetadata } from "./prompting/parser";
11
15
  export type { TemplateMetadata } from "./prompting/parser";
16
+ export { createFallbackModelInfo } from "./llm/config";
package/dist/index.js CHANGED
@@ -14,7 +14,7 @@ var __exportStar = (this && this.__exportStar) || function(m, exports) {
14
14
  for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
15
15
  };
16
16
  Object.defineProperty(exports, "__esModule", { value: true });
17
- exports.parseTemplateWithMetadata = exports.extractInitialTaggedContent = exports.parseRoleTags = exports.parseStructuredContent = exports.extractRandomVariables = exports.getSmartPreview = exports.countTokens = exports.renderTemplate = exports.fromEnvironment = exports.LLMService = void 0;
17
+ exports.createFallbackModelInfo = exports.parseTemplateWithMetadata = exports.extractInitialTaggedContent = exports.parseRoleTags = exports.parseStructuredContent = exports.extractRandomVariables = exports.getSmartPreview = exports.countTokens = exports.renderTemplate = exports.LlamaCppServerClient = exports.LlamaCppClientAdapter = exports.fromEnvironment = exports.LLMService = void 0;
18
18
  // --- LLM Service ---
19
19
  var LLMService_1 = require("./llm/LLMService");
20
20
  Object.defineProperty(exports, "LLMService", { enumerable: true, get: function () { return LLMService_1.LLMService; } });
@@ -25,6 +25,11 @@ __exportStar(require("./llm/clients/types"), exports);
25
25
  // --- API Key Providers ---
26
26
  var fromEnvironment_1 = require("./providers/fromEnvironment");
27
27
  Object.defineProperty(exports, "fromEnvironment", { enumerable: true, get: function () { return fromEnvironment_1.fromEnvironment; } });
28
+ // --- llama.cpp Integration ---
29
+ var LlamaCppClientAdapter_1 = require("./llm/clients/LlamaCppClientAdapter");
30
+ Object.defineProperty(exports, "LlamaCppClientAdapter", { enumerable: true, get: function () { return LlamaCppClientAdapter_1.LlamaCppClientAdapter; } });
31
+ var LlamaCppServerClient_1 = require("./llm/clients/LlamaCppServerClient");
32
+ Object.defineProperty(exports, "LlamaCppServerClient", { enumerable: true, get: function () { return LlamaCppServerClient_1.LlamaCppServerClient; } });
28
33
  // --- Utilities ---
29
34
  var template_1 = require("./prompting/template");
30
35
  Object.defineProperty(exports, "renderTemplate", { enumerable: true, get: function () { return template_1.renderTemplate; } });
@@ -37,3 +42,5 @@ Object.defineProperty(exports, "parseStructuredContent", { enumerable: true, get
37
42
  Object.defineProperty(exports, "parseRoleTags", { enumerable: true, get: function () { return parser_1.parseRoleTags; } });
38
43
  Object.defineProperty(exports, "extractInitialTaggedContent", { enumerable: true, get: function () { return parser_1.extractInitialTaggedContent; } });
39
44
  Object.defineProperty(exports, "parseTemplateWithMetadata", { enumerable: true, get: function () { return parser_1.parseTemplateWithMetadata; } });
45
+ var config_1 = require("./llm/config");
46
+ Object.defineProperty(exports, "createFallbackModelInfo", { enumerable: true, get: function () { return config_1.createFallbackModelInfo; } });
@@ -193,6 +193,14 @@ class LLMService {
193
193
  type: "validation_error",
194
194
  },
195
195
  object: "error",
196
+ partialResponse: {
197
+ id: result.id,
198
+ provider: result.provider,
199
+ model: result.model,
200
+ created: result.created,
201
+ choices: result.choices,
202
+ usage: result.usage
203
+ }
196
204
  };
197
205
  }
198
206
  else if (effectiveOnMissing === 'warn') {
@@ -44,17 +44,34 @@ describe('LLMService', () => {
44
44
  expect(errorResponse.error.code).toBe('UNSUPPORTED_PROVIDER');
45
45
  expect(errorResponse.error.message).toContain('Unsupported provider');
46
46
  });
47
- it('should return validation error for unsupported model', async () => {
47
+ it('should succeed with fallback for unknown model', async () => {
48
48
  const request = {
49
- providerId: 'openai',
49
+ providerId: 'mock', // Use mock provider to avoid real API calls
50
50
  modelId: 'unsupported-model',
51
51
  messages: [{ role: 'user', content: 'Hello' }]
52
52
  };
53
53
  const response = await service.sendMessage(request);
54
- expect(response.object).toBe('error');
55
- const errorResponse = response;
56
- expect(errorResponse.error.code).toBe('UNSUPPORTED_MODEL');
57
- expect(errorResponse.error.message).toContain('Unsupported model');
54
+ // Should succeed with mock response (not error) even for unknown model
55
+ expect(response.object).toBe('chat.completion');
56
+ });
57
+ it('should silently work with flexible providers unknown models (no warning)', async () => {
58
+ const warnings = [];
59
+ const consoleWarnSpy = jest.spyOn(console, 'warn').mockImplementation((msg) => {
60
+ warnings.push(msg);
61
+ });
62
+ // Test with mock provider (which has allowUnknownModels: true)
63
+ const request = {
64
+ providerId: 'mock',
65
+ modelId: 'totally-unknown-model-xyz',
66
+ messages: [{ role: 'user', content: 'Testing flexible provider' }]
67
+ };
68
+ const response = await service.sendMessage(request);
69
+ // Should succeed with mock response
70
+ expect(response.object).toBe('chat.completion');
71
+ // Should NOT warn about unknown model (filter out adapter constructor warnings)
72
+ const unknownModelWarnings = warnings.filter(w => !w.includes('No adapter constructor'));
73
+ expect(unknownModelWarnings.length).toBe(0); // No warnings for flexible providers
74
+ consoleWarnSpy.mockRestore();
58
75
  });
59
76
  it('should return validation error for empty messages', async () => {
60
77
  const request = {
@@ -160,8 +177,8 @@ describe('LLMService', () => {
160
177
  // Second request to same provider
161
178
  request.messages = [{ role: 'user', content: 'Second request' }];
162
179
  await service.sendMessage(request);
163
- // API key provider should be called for each request with mock provider
164
- expect(mockApiKeyProvider).toHaveBeenCalledTimes(0); // Mock provider doesn't need API keys
180
+ // API key provider should be called once per unique provider (mock provider now registered)
181
+ expect(mockApiKeyProvider).toHaveBeenCalledTimes(2);
165
182
  });
166
183
  });
167
184
  describe('settings management', () => {
@@ -325,11 +342,13 @@ describe('LLMService', () => {
325
342
  describe('getProviders', () => {
326
343
  it('should return all supported providers', async () => {
327
344
  const providers = await service.getProviders();
328
- expect(providers).toHaveLength(4);
345
+ expect(providers).toHaveLength(6);
329
346
  expect(providers.find(p => p.id === 'openai')).toBeDefined();
330
347
  expect(providers.find(p => p.id === 'anthropic')).toBeDefined();
331
348
  expect(providers.find(p => p.id === 'gemini')).toBeDefined();
332
349
  expect(providers.find(p => p.id === 'mistral')).toBeDefined();
350
+ expect(providers.find(p => p.id === 'llamacpp')).toBeDefined();
351
+ expect(providers.find(p => p.id === 'mock')).toBeDefined();
333
352
  });
334
353
  it('should include provider metadata', async () => {
335
354
  const providers = await service.getProviders();
@@ -488,6 +507,9 @@ describe('LLMService', () => {
488
507
  expect(errorResponse.error.type).toBe('validation_error');
489
508
  expect(errorResponse.error.message).toContain('response was expected to start with a <thinking> tag');
490
509
  expect(errorResponse.error.message).toContain('does not have native reasoning active');
510
+ // Check that partial response is included
511
+ expect(errorResponse.partialResponse).toBeDefined();
512
+ expect(errorResponse.partialResponse.choices[0].message.content).toBe('Response without thinking tag.');
491
513
  });
492
514
  it('should handle missing tag for non-reasoning model with warn', async () => {
493
515
  const consoleSpy = jest.spyOn(console, 'warn').mockImplementation();
@@ -509,6 +531,27 @@ describe('LLMService', () => {
509
531
  expect(consoleSpy).toHaveBeenCalledWith(expect.stringContaining('Expected <thinking> tag was not found'));
510
532
  consoleSpy.mockRestore();
511
533
  });
534
+ it('should handle missing tag with explicit error mode', async () => {
535
+ const request = {
536
+ providerId: 'mistral',
537
+ modelId: 'codestral-2501',
538
+ messages: [{ role: 'user', content: 'test_thinking:Response without thinking tag.' }],
539
+ settings: {
540
+ thinkingExtraction: {
541
+ enabled: true,
542
+ onMissing: 'error' // Explicitly set to error
543
+ }
544
+ }
545
+ };
546
+ const response = await service.sendMessage(request);
547
+ expect(response.object).toBe('error');
548
+ const errorResponse = response;
549
+ expect(errorResponse.error.code).toBe('MISSING_EXPECTED_TAG');
550
+ expect(errorResponse.error.message).toContain('response was expected to start with a <thinking> tag');
551
+ // Check that partial response is included
552
+ expect(errorResponse.partialResponse).toBeDefined();
553
+ expect(errorResponse.partialResponse.choices[0].message.content).toBe('Response without thinking tag.');
554
+ });
512
555
  it('should handle missing tag for non-reasoning model with ignore', async () => {
513
556
  const request = {
514
557
  providerId: 'mistral',
@@ -543,6 +586,8 @@ describe('LLMService', () => {
543
586
  expect(response.object).toBe('error');
544
587
  const errorResponse = response;
545
588
  expect(errorResponse.error.message).toContain('expected to start with a <reasoning> tag');
589
+ expect(errorResponse.partialResponse).toBeDefined();
590
+ expect(errorResponse.partialResponse.choices[0].message.content).toBe('Response without custom tag.');
546
591
  });
547
592
  describe('auto mode with native reasoning detection', () => {
548
593
  it('should enforce thinking tags for non-reasoning models by default', async () => {
@@ -564,6 +609,8 @@ describe('LLMService', () => {
564
609
  const errorResponse = response;
565
610
  expect(errorResponse.error.code).toBe('MISSING_EXPECTED_TAG');
566
611
  expect(errorResponse.error.message).toContain('does not have native reasoning active');
612
+ expect(errorResponse.partialResponse).toBeDefined();
613
+ expect(errorResponse.partialResponse.choices[0].message.content).toBe('Response without thinking tag.');
567
614
  });
568
615
  it('should respect explicit reasoning.enabled: false even for models with enabledByDefault', async () => {
569
616
  // This is the key test for the fix
@@ -584,6 +631,7 @@ describe('LLMService', () => {
584
631
  expect(response.object).toBe('error');
585
632
  const errorResponse = response;
586
633
  expect(errorResponse.error.code).toBe('MISSING_EXPECTED_TAG');
634
+ expect(errorResponse.partialResponse).toBeDefined();
587
635
  });
588
636
  });
589
637
  });