genai-lite 0.4.2 → 0.4.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +22 -6
- package/dist/index.d.ts +3 -2
- package/dist/index.js +3 -1
- package/dist/llm/LLMService.js +3 -3
- package/dist/llm/clients/LlamaCppClientAdapter.d.ts +19 -1
- package/dist/llm/clients/LlamaCppClientAdapter.js +88 -8
- package/dist/llm/clients/LlamaCppServerClient.d.ts +36 -0
- package/dist/llm/clients/LlamaCppServerClient.js +25 -0
- package/dist/llm/config.d.ts +43 -2
- package/dist/llm/config.js +167 -4
- package/dist/llm/services/ModelResolver.d.ts +4 -2
- package/dist/llm/services/ModelResolver.js +22 -5
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -151,6 +151,8 @@ const llmService = new LLMService(myKeyProvider);
|
|
|
151
151
|
|
|
152
152
|
Run models locally via [llama.cpp](https://github.com/ggml-org/llama.cpp) server. Model IDs can be any name—they're not validated since you load your own GGUF models.
|
|
153
153
|
|
|
154
|
+
**Automatic Capability Detection:** genai-lite automatically detects capabilities (reasoning support, context windows, token limits) for known open-weights models (Qwen3, etc.) by matching the GGUF filename from the server. No configuration needed.
|
|
155
|
+
|
|
154
156
|
**Example models:**
|
|
155
157
|
- `llama-3-8b-instruct` - Llama 3 8B Instruct
|
|
156
158
|
- `llama-3-70b-instruct` - Llama 3 70B Instruct
|
|
@@ -212,6 +214,7 @@ Some models include advanced reasoning/thinking capabilities that enhance their
|
|
|
212
214
|
- **Anthropic**: Claude Sonnet 4, Claude Opus 4, Claude 3.7 Sonnet
|
|
213
215
|
- **Google Gemini**: Gemini 2.5 Pro (always on), Gemini 2.5 Flash, Gemini 2.5 Flash-Lite Preview
|
|
214
216
|
- **OpenAI**: o4-mini (always on)
|
|
217
|
+
- **llama.cpp**: Qwen3, DeepSeek-R1, GPT-OSS (requires `--reasoning-format deepseek` server flag)
|
|
215
218
|
|
|
216
219
|
See the [Reasoning Mode](#reasoning-mode) section for usage details.
|
|
217
220
|
|
|
@@ -794,12 +797,20 @@ Get GGUF models from Hugging Face, for example:
|
|
|
794
797
|
# Basic usage
|
|
795
798
|
llama-server -m /path/to/model.gguf --port 8080
|
|
796
799
|
|
|
797
|
-
# With
|
|
800
|
+
# With reasoning support (for Qwen3, DeepSeek-R1, etc.)
|
|
801
|
+
llama-server -m /path/to/model.gguf \
|
|
802
|
+
--port 8080 \
|
|
803
|
+
--jinja \
|
|
804
|
+
--reasoning-format deepseek
|
|
805
|
+
|
|
806
|
+
# Full options
|
|
798
807
|
llama-server -m /path/to/model.gguf \
|
|
799
808
|
--port 8080 \
|
|
800
|
-
|
|
801
|
-
-
|
|
802
|
-
|
|
809
|
+
--jinja \ # Required for reasoning
|
|
810
|
+
--reasoning-format deepseek \ # Extract reasoning from <think> tags
|
|
811
|
+
-c 4096 \ # Context size
|
|
812
|
+
-np 4 \ # Parallel requests
|
|
813
|
+
--threads 8 # CPU threads
|
|
803
814
|
```
|
|
804
815
|
|
|
805
816
|
### Basic Usage
|
|
@@ -1077,7 +1088,9 @@ import type {
|
|
|
1077
1088
|
import {
|
|
1078
1089
|
LlamaCppClientAdapter,
|
|
1079
1090
|
LlamaCppServerClient,
|
|
1080
|
-
createFallbackModelInfo
|
|
1091
|
+
createFallbackModelInfo,
|
|
1092
|
+
detectGgufCapabilities,
|
|
1093
|
+
KNOWN_GGUF_MODELS
|
|
1081
1094
|
} from 'genai-lite';
|
|
1082
1095
|
|
|
1083
1096
|
import type {
|
|
@@ -1090,7 +1103,10 @@ import type {
|
|
|
1090
1103
|
LlamaCppPropsResponse,
|
|
1091
1104
|
LlamaCppMetricsResponse,
|
|
1092
1105
|
LlamaCppSlot,
|
|
1093
|
-
LlamaCppSlotsResponse
|
|
1106
|
+
LlamaCppSlotsResponse,
|
|
1107
|
+
LlamaCppModel,
|
|
1108
|
+
LlamaCppModelsResponse,
|
|
1109
|
+
GgufModelPattern
|
|
1094
1110
|
} from 'genai-lite';
|
|
1095
1111
|
```
|
|
1096
1112
|
|
package/dist/index.d.ts
CHANGED
|
@@ -8,9 +8,10 @@ export { fromEnvironment } from "./providers/fromEnvironment";
|
|
|
8
8
|
export { LlamaCppClientAdapter } from "./llm/clients/LlamaCppClientAdapter";
|
|
9
9
|
export { LlamaCppServerClient } from "./llm/clients/LlamaCppServerClient";
|
|
10
10
|
export type { LlamaCppClientConfig, } from "./llm/clients/LlamaCppClientAdapter";
|
|
11
|
-
export type { LlamaCppHealthResponse, LlamaCppTokenizeResponse, LlamaCppDetokenizeResponse, LlamaCppEmbeddingResponse, LlamaCppInfillResponse, LlamaCppPropsResponse, LlamaCppMetricsResponse, LlamaCppSlot, LlamaCppSlotsResponse, } from "./llm/clients/LlamaCppServerClient";
|
|
11
|
+
export type { LlamaCppHealthResponse, LlamaCppTokenizeResponse, LlamaCppDetokenizeResponse, LlamaCppEmbeddingResponse, LlamaCppInfillResponse, LlamaCppPropsResponse, LlamaCppMetricsResponse, LlamaCppSlot, LlamaCppSlotsResponse, LlamaCppModel, LlamaCppModelsResponse, } from "./llm/clients/LlamaCppServerClient";
|
|
12
12
|
export { renderTemplate } from "./prompting/template";
|
|
13
13
|
export { countTokens, getSmartPreview, extractRandomVariables } from "./prompting/content";
|
|
14
14
|
export { parseStructuredContent, parseRoleTags, extractInitialTaggedContent, parseTemplateWithMetadata } from "./prompting/parser";
|
|
15
15
|
export type { TemplateMetadata } from "./prompting/parser";
|
|
16
|
-
export { createFallbackModelInfo } from "./llm/config";
|
|
16
|
+
export { createFallbackModelInfo, detectGgufCapabilities, KNOWN_GGUF_MODELS } from "./llm/config";
|
|
17
|
+
export type { GgufModelPattern } from "./llm/config";
|
package/dist/index.js
CHANGED
|
@@ -14,7 +14,7 @@ var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
|
|
14
14
|
for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
|
|
15
15
|
};
|
|
16
16
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
17
|
-
exports.createFallbackModelInfo = exports.parseTemplateWithMetadata = exports.extractInitialTaggedContent = exports.parseRoleTags = exports.parseStructuredContent = exports.extractRandomVariables = exports.getSmartPreview = exports.countTokens = exports.renderTemplate = exports.LlamaCppServerClient = exports.LlamaCppClientAdapter = exports.fromEnvironment = exports.LLMService = void 0;
|
|
17
|
+
exports.KNOWN_GGUF_MODELS = exports.detectGgufCapabilities = exports.createFallbackModelInfo = exports.parseTemplateWithMetadata = exports.extractInitialTaggedContent = exports.parseRoleTags = exports.parseStructuredContent = exports.extractRandomVariables = exports.getSmartPreview = exports.countTokens = exports.renderTemplate = exports.LlamaCppServerClient = exports.LlamaCppClientAdapter = exports.fromEnvironment = exports.LLMService = void 0;
|
|
18
18
|
// --- LLM Service ---
|
|
19
19
|
var LLMService_1 = require("./llm/LLMService");
|
|
20
20
|
Object.defineProperty(exports, "LLMService", { enumerable: true, get: function () { return LLMService_1.LLMService; } });
|
|
@@ -44,3 +44,5 @@ Object.defineProperty(exports, "extractInitialTaggedContent", { enumerable: true
|
|
|
44
44
|
Object.defineProperty(exports, "parseTemplateWithMetadata", { enumerable: true, get: function () { return parser_1.parseTemplateWithMetadata; } });
|
|
45
45
|
var config_1 = require("./llm/config");
|
|
46
46
|
Object.defineProperty(exports, "createFallbackModelInfo", { enumerable: true, get: function () { return config_1.createFallbackModelInfo; } });
|
|
47
|
+
Object.defineProperty(exports, "detectGgufCapabilities", { enumerable: true, get: function () { return config_1.detectGgufCapabilities; } });
|
|
48
|
+
Object.defineProperty(exports, "KNOWN_GGUF_MODELS", { enumerable: true, get: function () { return config_1.KNOWN_GGUF_MODELS; } });
|
package/dist/llm/LLMService.js
CHANGED
|
@@ -31,7 +31,7 @@ class LLMService {
|
|
|
31
31
|
this.adapterRegistry = new AdapterRegistry_1.AdapterRegistry();
|
|
32
32
|
this.requestValidator = new RequestValidator_1.RequestValidator();
|
|
33
33
|
this.settingsManager = new SettingsManager_1.SettingsManager();
|
|
34
|
-
this.modelResolver = new ModelResolver_1.ModelResolver(this.presetManager);
|
|
34
|
+
this.modelResolver = new ModelResolver_1.ModelResolver(this.presetManager, this.adapterRegistry);
|
|
35
35
|
}
|
|
36
36
|
/**
|
|
37
37
|
* Gets list of supported LLM providers
|
|
@@ -69,7 +69,7 @@ class LLMService {
|
|
|
69
69
|
console.log(`LLMService.sendMessage called with presetId: ${request.presetId}, provider: ${request.providerId}, model: ${request.modelId}`);
|
|
70
70
|
try {
|
|
71
71
|
// Resolve model information from preset or direct IDs
|
|
72
|
-
const resolved = this.modelResolver.resolve(request);
|
|
72
|
+
const resolved = await this.modelResolver.resolve(request);
|
|
73
73
|
if (resolved.error) {
|
|
74
74
|
return resolved.error;
|
|
75
75
|
}
|
|
@@ -327,7 +327,7 @@ class LLMService {
|
|
|
327
327
|
let modelContext = null;
|
|
328
328
|
if (options.presetId || (options.providerId && options.modelId)) {
|
|
329
329
|
// Resolve model information
|
|
330
|
-
const resolved = this.modelResolver.resolve({
|
|
330
|
+
const resolved = await this.modelResolver.resolve({
|
|
331
331
|
presetId: options.presetId,
|
|
332
332
|
providerId: options.providerId,
|
|
333
333
|
modelId: options.modelId,
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type { LLMResponse, LLMFailureResponse } from "../types";
|
|
1
|
+
import type { LLMResponse, LLMFailureResponse, ModelInfo } from "../types";
|
|
2
2
|
import type { ILLMClientAdapter, InternalLLMChatRequest } from "./types";
|
|
3
3
|
import { LlamaCppServerClient } from "./LlamaCppServerClient";
|
|
4
4
|
/**
|
|
@@ -49,12 +49,30 @@ export declare class LlamaCppClientAdapter implements ILLMClientAdapter {
|
|
|
49
49
|
private baseURL;
|
|
50
50
|
private checkHealth;
|
|
51
51
|
private serverClient;
|
|
52
|
+
private cachedModelCapabilities;
|
|
53
|
+
private detectionAttempted;
|
|
52
54
|
/**
|
|
53
55
|
* Creates a new llama.cpp client adapter
|
|
54
56
|
*
|
|
55
57
|
* @param config Optional configuration for the adapter
|
|
56
58
|
*/
|
|
57
59
|
constructor(config?: LlamaCppClientConfig);
|
|
60
|
+
/**
|
|
61
|
+
* Gets model capabilities by detecting the loaded GGUF model
|
|
62
|
+
*
|
|
63
|
+
* This method caches the result to avoid repeated HTTP calls.
|
|
64
|
+
* Cache is automatically cleared on connection errors in sendMessage().
|
|
65
|
+
*
|
|
66
|
+
* @returns Detected model capabilities or null if detection fails
|
|
67
|
+
*/
|
|
68
|
+
getModelCapabilities(): Promise<Partial<ModelInfo> | null>;
|
|
69
|
+
/**
|
|
70
|
+
* Clears the cached model capabilities
|
|
71
|
+
*
|
|
72
|
+
* Called automatically on connection errors, or can be called manually
|
|
73
|
+
* if the server has been restarted with a different model.
|
|
74
|
+
*/
|
|
75
|
+
clearModelCache(): void;
|
|
58
76
|
/**
|
|
59
77
|
* Sends a chat message to llama.cpp server
|
|
60
78
|
*
|
|
@@ -10,6 +10,7 @@ const openai_1 = __importDefault(require("openai"));
|
|
|
10
10
|
const types_1 = require("./types");
|
|
11
11
|
const adapterErrorUtils_1 = require("./adapterErrorUtils");
|
|
12
12
|
const LlamaCppServerClient_1 = require("./LlamaCppServerClient");
|
|
13
|
+
const config_1 = require("../config");
|
|
13
14
|
/**
|
|
14
15
|
* Client adapter for llama.cpp server integration
|
|
15
16
|
*
|
|
@@ -52,10 +53,68 @@ class LlamaCppClientAdapter {
|
|
|
52
53
|
* @param config Optional configuration for the adapter
|
|
53
54
|
*/
|
|
54
55
|
constructor(config) {
|
|
56
|
+
this.cachedModelCapabilities = null;
|
|
57
|
+
this.detectionAttempted = false;
|
|
55
58
|
this.baseURL = config?.baseURL || 'http://localhost:8080';
|
|
56
59
|
this.checkHealth = config?.checkHealth || false;
|
|
57
60
|
this.serverClient = new LlamaCppServerClient_1.LlamaCppServerClient(this.baseURL);
|
|
58
61
|
}
|
|
62
|
+
/**
|
|
63
|
+
* Gets model capabilities by detecting the loaded GGUF model
|
|
64
|
+
*
|
|
65
|
+
* This method caches the result to avoid repeated HTTP calls.
|
|
66
|
+
* Cache is automatically cleared on connection errors in sendMessage().
|
|
67
|
+
*
|
|
68
|
+
* @returns Detected model capabilities or null if detection fails
|
|
69
|
+
*/
|
|
70
|
+
async getModelCapabilities() {
|
|
71
|
+
// Return cached result if available
|
|
72
|
+
if (this.cachedModelCapabilities !== null) {
|
|
73
|
+
return this.cachedModelCapabilities;
|
|
74
|
+
}
|
|
75
|
+
// Return null if we already tried and failed
|
|
76
|
+
if (this.detectionAttempted) {
|
|
77
|
+
return null;
|
|
78
|
+
}
|
|
79
|
+
// Attempt detection
|
|
80
|
+
try {
|
|
81
|
+
console.log(`Detecting model capabilities from llama.cpp server at ${this.baseURL}`);
|
|
82
|
+
const { data } = await this.serverClient.getModels();
|
|
83
|
+
if (!data || data.length === 0) {
|
|
84
|
+
console.warn('No models loaded in llama.cpp server');
|
|
85
|
+
this.detectionAttempted = true;
|
|
86
|
+
return null;
|
|
87
|
+
}
|
|
88
|
+
const ggufFilename = data[0].id;
|
|
89
|
+
const capabilities = (0, config_1.detectGgufCapabilities)(ggufFilename);
|
|
90
|
+
// Cache the result (even if null)
|
|
91
|
+
this.cachedModelCapabilities = capabilities;
|
|
92
|
+
this.detectionAttempted = true;
|
|
93
|
+
if (capabilities) {
|
|
94
|
+
console.log(`Cached model capabilities for: ${ggufFilename}`);
|
|
95
|
+
}
|
|
96
|
+
else {
|
|
97
|
+
console.log(`No known pattern matched for: ${ggufFilename}`);
|
|
98
|
+
}
|
|
99
|
+
return capabilities;
|
|
100
|
+
}
|
|
101
|
+
catch (error) {
|
|
102
|
+
console.warn('Failed to detect model capabilities:', error);
|
|
103
|
+
this.detectionAttempted = true;
|
|
104
|
+
return null;
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
/**
|
|
108
|
+
* Clears the cached model capabilities
|
|
109
|
+
*
|
|
110
|
+
* Called automatically on connection errors, or can be called manually
|
|
111
|
+
* if the server has been restarted with a different model.
|
|
112
|
+
*/
|
|
113
|
+
clearModelCache() {
|
|
114
|
+
this.cachedModelCapabilities = null;
|
|
115
|
+
this.detectionAttempted = false;
|
|
116
|
+
console.log('Cleared model capabilities cache');
|
|
117
|
+
}
|
|
59
118
|
/**
|
|
60
119
|
* Sends a chat message to llama.cpp server
|
|
61
120
|
*
|
|
@@ -132,6 +191,13 @@ class LlamaCppClientAdapter {
|
|
|
132
191
|
}
|
|
133
192
|
catch (error) {
|
|
134
193
|
console.error("llama.cpp API error:", error);
|
|
194
|
+
// Clear cache on connection errors so we re-detect on next request
|
|
195
|
+
const errorMessage = error?.message || String(error);
|
|
196
|
+
if (errorMessage.includes("ECONNREFUSED") ||
|
|
197
|
+
errorMessage.includes("fetch failed") ||
|
|
198
|
+
errorMessage.includes("connect")) {
|
|
199
|
+
this.clearModelCache();
|
|
200
|
+
}
|
|
135
201
|
return this.createErrorResponse(error, request);
|
|
136
202
|
}
|
|
137
203
|
}
|
|
@@ -219,19 +285,33 @@ class LlamaCppClientAdapter {
|
|
|
219
285
|
if (!choice || !choice.message) {
|
|
220
286
|
throw new Error("No valid choices in llama.cpp completion response");
|
|
221
287
|
}
|
|
288
|
+
// Extract reasoning content if available
|
|
289
|
+
// llama.cpp returns reasoning in reasoning_content field when using --reasoning-format
|
|
290
|
+
let reasoning;
|
|
291
|
+
if (choice.message.reasoning_content) {
|
|
292
|
+
reasoning = choice.message.reasoning_content;
|
|
293
|
+
}
|
|
222
294
|
return {
|
|
223
295
|
id: completion.id,
|
|
224
296
|
provider: request.providerId,
|
|
225
297
|
model: completion.model || request.modelId,
|
|
226
298
|
created: completion.created,
|
|
227
|
-
choices: completion.choices.map((c) =>
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
299
|
+
choices: completion.choices.map((c) => {
|
|
300
|
+
const mappedChoice = {
|
|
301
|
+
message: {
|
|
302
|
+
role: "assistant",
|
|
303
|
+
content: c.message.content || "",
|
|
304
|
+
},
|
|
305
|
+
finish_reason: c.finish_reason,
|
|
306
|
+
index: c.index,
|
|
307
|
+
};
|
|
308
|
+
// Include reasoning if available and not excluded
|
|
309
|
+
const messageReasoning = c.message.reasoning_content;
|
|
310
|
+
if (messageReasoning && request.settings.reasoning && !request.settings.reasoning.exclude) {
|
|
311
|
+
mappedChoice.reasoning = messageReasoning;
|
|
312
|
+
}
|
|
313
|
+
return mappedChoice;
|
|
314
|
+
}),
|
|
235
315
|
usage: completion.usage
|
|
236
316
|
? {
|
|
237
317
|
prompt_tokens: completion.usage.prompt_tokens,
|
|
@@ -62,6 +62,23 @@ export interface LlamaCppSlot {
|
|
|
62
62
|
export interface LlamaCppSlotsResponse {
|
|
63
63
|
slots: LlamaCppSlot[];
|
|
64
64
|
}
|
|
65
|
+
/**
|
|
66
|
+
* Individual model information from /v1/models endpoint
|
|
67
|
+
*/
|
|
68
|
+
export interface LlamaCppModel {
|
|
69
|
+
id: string;
|
|
70
|
+
object?: string;
|
|
71
|
+
created?: number;
|
|
72
|
+
owned_by?: string;
|
|
73
|
+
[key: string]: any;
|
|
74
|
+
}
|
|
75
|
+
/**
|
|
76
|
+
* Response from the /v1/models endpoint
|
|
77
|
+
*/
|
|
78
|
+
export interface LlamaCppModelsResponse {
|
|
79
|
+
object: string;
|
|
80
|
+
data: LlamaCppModel[];
|
|
81
|
+
}
|
|
65
82
|
/**
|
|
66
83
|
* Client for interacting with llama.cpp server's management and utility endpoints
|
|
67
84
|
*
|
|
@@ -158,4 +175,23 @@ export declare class LlamaCppServerClient {
|
|
|
158
175
|
* @throws Error if the request fails or endpoint is not enabled
|
|
159
176
|
*/
|
|
160
177
|
getSlots(): Promise<LlamaCppSlotsResponse>;
|
|
178
|
+
/**
|
|
179
|
+
* Retrieves the list of models loaded in the llama.cpp server
|
|
180
|
+
*
|
|
181
|
+
* This uses the OpenAI-compatible /v1/models endpoint to get information
|
|
182
|
+
* about the currently loaded model(s). Typically llama.cpp serves one model
|
|
183
|
+
* at a time, but this returns an array for API compatibility.
|
|
184
|
+
*
|
|
185
|
+
* @returns Promise resolving to models response with array of loaded models
|
|
186
|
+
* @throws Error if the request fails
|
|
187
|
+
*
|
|
188
|
+
* @example
|
|
189
|
+
* ```typescript
|
|
190
|
+
* const client = new LlamaCppServerClient('http://localhost:8080');
|
|
191
|
+
* const { data } = await client.getModels();
|
|
192
|
+
* console.log('Loaded model:', data[0].id);
|
|
193
|
+
* // Output: "Qwen2.5-7B-Instruct-Q4_K_M.gguf"
|
|
194
|
+
* ```
|
|
195
|
+
*/
|
|
196
|
+
getModels(): Promise<LlamaCppModelsResponse>;
|
|
161
197
|
}
|
|
@@ -188,5 +188,30 @@ class LlamaCppServerClient {
|
|
|
188
188
|
}
|
|
189
189
|
return await response.json();
|
|
190
190
|
}
|
|
191
|
+
/**
|
|
192
|
+
* Retrieves the list of models loaded in the llama.cpp server
|
|
193
|
+
*
|
|
194
|
+
* This uses the OpenAI-compatible /v1/models endpoint to get information
|
|
195
|
+
* about the currently loaded model(s). Typically llama.cpp serves one model
|
|
196
|
+
* at a time, but this returns an array for API compatibility.
|
|
197
|
+
*
|
|
198
|
+
* @returns Promise resolving to models response with array of loaded models
|
|
199
|
+
* @throws Error if the request fails
|
|
200
|
+
*
|
|
201
|
+
* @example
|
|
202
|
+
* ```typescript
|
|
203
|
+
* const client = new LlamaCppServerClient('http://localhost:8080');
|
|
204
|
+
* const { data } = await client.getModels();
|
|
205
|
+
* console.log('Loaded model:', data[0].id);
|
|
206
|
+
* // Output: "Qwen2.5-7B-Instruct-Q4_K_M.gguf"
|
|
207
|
+
* ```
|
|
208
|
+
*/
|
|
209
|
+
async getModels() {
|
|
210
|
+
const response = await fetch(`${this.baseURL}/v1/models`);
|
|
211
|
+
if (!response.ok) {
|
|
212
|
+
throw new Error(`Get models failed: ${response.status} ${response.statusText}`);
|
|
213
|
+
}
|
|
214
|
+
return await response.json();
|
|
215
|
+
}
|
|
191
216
|
}
|
|
192
217
|
exports.LlamaCppServerClient = LlamaCppServerClient;
|
package/dist/llm/config.d.ts
CHANGED
|
@@ -31,6 +31,46 @@ export declare const MODEL_DEFAULT_SETTINGS: Record<string, Partial<LLMSettings>
|
|
|
31
31
|
* Supported LLM providers
|
|
32
32
|
*/
|
|
33
33
|
export declare const SUPPORTED_PROVIDERS: ProviderInfo[];
|
|
34
|
+
/**
|
|
35
|
+
* Pattern definition for detecting GGUF model capabilities
|
|
36
|
+
*/
|
|
37
|
+
export interface GgufModelPattern {
|
|
38
|
+
/** Pattern to match in the GGUF filename (case-insensitive substring match) */
|
|
39
|
+
pattern: string;
|
|
40
|
+
/** Human-readable name for the model */
|
|
41
|
+
name: string;
|
|
42
|
+
/** Optional description */
|
|
43
|
+
description?: string;
|
|
44
|
+
/** Model capabilities (reasoning config, context window, etc.) */
|
|
45
|
+
capabilities: Partial<ModelInfo>;
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* Known GGUF model patterns for capability detection
|
|
49
|
+
*
|
|
50
|
+
* Order matters: more specific patterns should come before generic ones.
|
|
51
|
+
* First matching pattern wins.
|
|
52
|
+
*
|
|
53
|
+
* Example: "Qwen3-0.6B-0522" should be before "Qwen3-0.6B"
|
|
54
|
+
*/
|
|
55
|
+
export declare const KNOWN_GGUF_MODELS: GgufModelPattern[];
|
|
56
|
+
/**
|
|
57
|
+
* Detects model capabilities from GGUF filename
|
|
58
|
+
*
|
|
59
|
+
* Performs case-insensitive substring matching against known model patterns.
|
|
60
|
+
* Returns the first matching pattern's capabilities (array order determines priority).
|
|
61
|
+
*
|
|
62
|
+
* @param ggufFilename - The GGUF model filename (e.g., "Qwen3-8B-Instruct-Q4_K_M.gguf")
|
|
63
|
+
* @returns Partial ModelInfo with detected capabilities, or null if no match
|
|
64
|
+
*
|
|
65
|
+
* @example
|
|
66
|
+
* ```typescript
|
|
67
|
+
* const caps = detectGgufCapabilities("Qwen3-8B-Instruct-Q4_K_M.gguf");
|
|
68
|
+
* if (caps?.reasoning?.supported) {
|
|
69
|
+
* console.log("This model supports thinking!");
|
|
70
|
+
* }
|
|
71
|
+
* ```
|
|
72
|
+
*/
|
|
73
|
+
export declare function detectGgufCapabilities(ggufFilename: string): Partial<ModelInfo> | null;
|
|
34
74
|
/**
|
|
35
75
|
* Supported LLM models with their configurations
|
|
36
76
|
* ModelInfo is similar to Cline model info
|
|
@@ -82,9 +122,10 @@ export declare function isModelSupported(modelId: string, providerId: string): b
|
|
|
82
122
|
*
|
|
83
123
|
* @param modelId - The model ID to create info for
|
|
84
124
|
* @param providerId - The provider ID
|
|
85
|
-
* @
|
|
125
|
+
* @param capabilities - Optional detected capabilities to merge (e.g., from GGUF detection)
|
|
126
|
+
* @returns ModelInfo with default/placeholder values, enhanced with detected capabilities
|
|
86
127
|
*/
|
|
87
|
-
export declare function createFallbackModelInfo(modelId: string, providerId: string): ModelInfo;
|
|
128
|
+
export declare function createFallbackModelInfo(modelId: string, providerId: string, capabilities?: Partial<ModelInfo>): ModelInfo;
|
|
88
129
|
/**
|
|
89
130
|
* Gets merged default settings for a specific model and provider
|
|
90
131
|
*
|
package/dist/llm/config.js
CHANGED
|
@@ -2,7 +2,8 @@
|
|
|
2
2
|
// AI Summary: Configuration for LLM module including default settings, supported providers, and models.
|
|
3
3
|
// Defines operational parameters and available LLM options for the application.
|
|
4
4
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
5
|
-
exports.SUPPORTED_MODELS = exports.SUPPORTED_PROVIDERS = exports.MODEL_DEFAULT_SETTINGS = exports.PROVIDER_DEFAULT_SETTINGS = exports.DEFAULT_LLM_SETTINGS = exports.ADAPTER_CONFIGS = exports.ADAPTER_CONSTRUCTORS = void 0;
|
|
5
|
+
exports.SUPPORTED_MODELS = exports.KNOWN_GGUF_MODELS = exports.SUPPORTED_PROVIDERS = exports.MODEL_DEFAULT_SETTINGS = exports.PROVIDER_DEFAULT_SETTINGS = exports.DEFAULT_LLM_SETTINGS = exports.ADAPTER_CONFIGS = exports.ADAPTER_CONSTRUCTORS = void 0;
|
|
6
|
+
exports.detectGgufCapabilities = detectGgufCapabilities;
|
|
6
7
|
exports.getProviderById = getProviderById;
|
|
7
8
|
exports.getModelById = getModelById;
|
|
8
9
|
exports.getModelsByProvider = getModelsByProvider;
|
|
@@ -133,6 +134,150 @@ exports.SUPPORTED_PROVIDERS = [
|
|
|
133
134
|
allowUnknownModels: true, // Test provider accepts any model
|
|
134
135
|
},
|
|
135
136
|
];
|
|
137
|
+
/**
|
|
138
|
+
* Known GGUF model patterns for capability detection
|
|
139
|
+
*
|
|
140
|
+
* Order matters: more specific patterns should come before generic ones.
|
|
141
|
+
* First matching pattern wins.
|
|
142
|
+
*
|
|
143
|
+
* Example: "Qwen3-0.6B-0522" should be before "Qwen3-0.6B"
|
|
144
|
+
*/
|
|
145
|
+
exports.KNOWN_GGUF_MODELS = [
|
|
146
|
+
// Qwen 3 Series - All support thinking/reasoning
|
|
147
|
+
{
|
|
148
|
+
pattern: "qwen3-30b",
|
|
149
|
+
name: "Qwen 3 30B",
|
|
150
|
+
description: "Qwen 3 30B model with thinking capabilities",
|
|
151
|
+
capabilities: {
|
|
152
|
+
maxTokens: 16384,
|
|
153
|
+
contextWindow: 131072,
|
|
154
|
+
supportsImages: false,
|
|
155
|
+
supportsPromptCache: false,
|
|
156
|
+
reasoning: {
|
|
157
|
+
supported: true,
|
|
158
|
+
enabledByDefault: false,
|
|
159
|
+
canDisable: true,
|
|
160
|
+
maxBudget: 38912,
|
|
161
|
+
},
|
|
162
|
+
},
|
|
163
|
+
},
|
|
164
|
+
{
|
|
165
|
+
pattern: "qwen3-14b",
|
|
166
|
+
name: "Qwen 3 14B",
|
|
167
|
+
description: "Qwen 3 14B model with thinking capabilities",
|
|
168
|
+
capabilities: {
|
|
169
|
+
maxTokens: 8192,
|
|
170
|
+
contextWindow: 131072,
|
|
171
|
+
supportsImages: false,
|
|
172
|
+
supportsPromptCache: false,
|
|
173
|
+
reasoning: {
|
|
174
|
+
supported: true,
|
|
175
|
+
enabledByDefault: false,
|
|
176
|
+
canDisable: true,
|
|
177
|
+
maxBudget: 38912,
|
|
178
|
+
},
|
|
179
|
+
},
|
|
180
|
+
},
|
|
181
|
+
{
|
|
182
|
+
pattern: "qwen3-8b",
|
|
183
|
+
name: "Qwen 3 8B",
|
|
184
|
+
description: "Qwen 3 8B model with thinking capabilities",
|
|
185
|
+
capabilities: {
|
|
186
|
+
maxTokens: 8192,
|
|
187
|
+
contextWindow: 131072,
|
|
188
|
+
supportsImages: false,
|
|
189
|
+
supportsPromptCache: false,
|
|
190
|
+
reasoning: {
|
|
191
|
+
supported: true,
|
|
192
|
+
enabledByDefault: false,
|
|
193
|
+
canDisable: true,
|
|
194
|
+
maxBudget: 38912,
|
|
195
|
+
},
|
|
196
|
+
},
|
|
197
|
+
},
|
|
198
|
+
{
|
|
199
|
+
pattern: "qwen3-4b",
|
|
200
|
+
name: "Qwen 3 4B",
|
|
201
|
+
description: "Qwen 3 4B model with thinking capabilities",
|
|
202
|
+
capabilities: {
|
|
203
|
+
maxTokens: 8192,
|
|
204
|
+
contextWindow: 131072,
|
|
205
|
+
supportsImages: false,
|
|
206
|
+
supportsPromptCache: false,
|
|
207
|
+
reasoning: {
|
|
208
|
+
supported: true,
|
|
209
|
+
enabledByDefault: false,
|
|
210
|
+
canDisable: true,
|
|
211
|
+
maxBudget: 38912,
|
|
212
|
+
},
|
|
213
|
+
},
|
|
214
|
+
},
|
|
215
|
+
{
|
|
216
|
+
pattern: "qwen3-1.7b",
|
|
217
|
+
name: "Qwen 3 1.7B",
|
|
218
|
+
description: "Qwen 3 1.7B model with thinking capabilities",
|
|
219
|
+
capabilities: {
|
|
220
|
+
maxTokens: 8192,
|
|
221
|
+
contextWindow: 32768,
|
|
222
|
+
supportsImages: false,
|
|
223
|
+
supportsPromptCache: false,
|
|
224
|
+
reasoning: {
|
|
225
|
+
supported: true,
|
|
226
|
+
enabledByDefault: false,
|
|
227
|
+
canDisable: true,
|
|
228
|
+
maxBudget: 30720,
|
|
229
|
+
},
|
|
230
|
+
},
|
|
231
|
+
},
|
|
232
|
+
{
|
|
233
|
+
pattern: "qwen3-0.6b",
|
|
234
|
+
name: "Qwen 3 0.6B",
|
|
235
|
+
description: "Qwen 3 0.6B model with thinking capabilities",
|
|
236
|
+
capabilities: {
|
|
237
|
+
maxTokens: 8192,
|
|
238
|
+
contextWindow: 32768,
|
|
239
|
+
supportsImages: false,
|
|
240
|
+
supportsPromptCache: false,
|
|
241
|
+
reasoning: {
|
|
242
|
+
supported: true,
|
|
243
|
+
enabledByDefault: false,
|
|
244
|
+
canDisable: true,
|
|
245
|
+
maxBudget: 30720,
|
|
246
|
+
},
|
|
247
|
+
},
|
|
248
|
+
},
|
|
249
|
+
// Add more model patterns here as needed
|
|
250
|
+
// DeepSeek, Llama, etc.
|
|
251
|
+
];
|
|
252
|
+
/**
|
|
253
|
+
* Detects model capabilities from GGUF filename
|
|
254
|
+
*
|
|
255
|
+
* Performs case-insensitive substring matching against known model patterns.
|
|
256
|
+
* Returns the first matching pattern's capabilities (array order determines priority).
|
|
257
|
+
*
|
|
258
|
+
* @param ggufFilename - The GGUF model filename (e.g., "Qwen3-8B-Instruct-Q4_K_M.gguf")
|
|
259
|
+
* @returns Partial ModelInfo with detected capabilities, or null if no match
|
|
260
|
+
*
|
|
261
|
+
* @example
|
|
262
|
+
* ```typescript
|
|
263
|
+
* const caps = detectGgufCapabilities("Qwen3-8B-Instruct-Q4_K_M.gguf");
|
|
264
|
+
* if (caps?.reasoning?.supported) {
|
|
265
|
+
* console.log("This model supports thinking!");
|
|
266
|
+
* }
|
|
267
|
+
* ```
|
|
268
|
+
*/
|
|
269
|
+
function detectGgufCapabilities(ggufFilename) {
|
|
270
|
+
const lowerFilename = ggufFilename.toLowerCase();
|
|
271
|
+
// First match wins (array is pre-ordered from specific to generic)
|
|
272
|
+
for (const model of exports.KNOWN_GGUF_MODELS) {
|
|
273
|
+
if (lowerFilename.includes(model.pattern.toLowerCase())) {
|
|
274
|
+
console.log(`Detected GGUF model: ${model.name} (pattern: ${model.pattern})`);
|
|
275
|
+
return model.capabilities;
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
// No match found
|
|
279
|
+
return null;
|
|
280
|
+
}
|
|
136
281
|
/**
|
|
137
282
|
* Supported LLM models with their configurations
|
|
138
283
|
* ModelInfo is similar to Cline model info
|
|
@@ -521,10 +666,11 @@ function isModelSupported(modelId, providerId) {
|
|
|
521
666
|
*
|
|
522
667
|
* @param modelId - The model ID to create info for
|
|
523
668
|
* @param providerId - The provider ID
|
|
524
|
-
* @
|
|
669
|
+
* @param capabilities - Optional detected capabilities to merge (e.g., from GGUF detection)
|
|
670
|
+
* @returns ModelInfo with default/placeholder values, enhanced with detected capabilities
|
|
525
671
|
*/
|
|
526
|
-
function createFallbackModelInfo(modelId, providerId) {
|
|
527
|
-
|
|
672
|
+
function createFallbackModelInfo(modelId, providerId, capabilities) {
|
|
673
|
+
const defaults = {
|
|
528
674
|
id: modelId,
|
|
529
675
|
name: modelId,
|
|
530
676
|
providerId: providerId,
|
|
@@ -536,6 +682,23 @@ function createFallbackModelInfo(modelId, providerId) {
|
|
|
536
682
|
supportsImages: false,
|
|
537
683
|
supportsPromptCache: false,
|
|
538
684
|
};
|
|
685
|
+
// Merge detected capabilities if provided
|
|
686
|
+
if (capabilities) {
|
|
687
|
+
return {
|
|
688
|
+
...defaults,
|
|
689
|
+
...capabilities,
|
|
690
|
+
// Always preserve these from defaults/params
|
|
691
|
+
id: modelId,
|
|
692
|
+
name: capabilities.name || modelId,
|
|
693
|
+
providerId: providerId,
|
|
694
|
+
// For local models, pricing is always 0
|
|
695
|
+
inputPrice: 0,
|
|
696
|
+
outputPrice: 0,
|
|
697
|
+
cacheWritesPrice: undefined,
|
|
698
|
+
cacheReadsPrice: undefined,
|
|
699
|
+
};
|
|
700
|
+
}
|
|
701
|
+
return defaults;
|
|
539
702
|
}
|
|
540
703
|
/**
|
|
541
704
|
* Gets merged default settings for a specific model and provider
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import type { LLMFailureResponse, LLMSettings, ModelInfo } from "../types";
|
|
2
2
|
import { PresetManager } from "./PresetManager";
|
|
3
|
+
import { AdapterRegistry } from "./AdapterRegistry";
|
|
3
4
|
/**
|
|
4
5
|
* Options for model selection
|
|
5
6
|
*/
|
|
@@ -24,12 +25,13 @@ export interface ModelResolution {
|
|
|
24
25
|
*/
|
|
25
26
|
export declare class ModelResolver {
|
|
26
27
|
private presetManager;
|
|
27
|
-
|
|
28
|
+
private adapterRegistry;
|
|
29
|
+
constructor(presetManager: PresetManager, adapterRegistry: AdapterRegistry);
|
|
28
30
|
/**
|
|
29
31
|
* Resolves model information from either a preset ID or provider/model IDs
|
|
30
32
|
*
|
|
31
33
|
* @param options Options containing either presetId or providerId/modelId
|
|
32
34
|
* @returns Resolved model info and settings or error response
|
|
33
35
|
*/
|
|
34
|
-
resolve(options: ModelSelectionOptions): ModelResolution
|
|
36
|
+
resolve(options: ModelSelectionOptions): Promise<ModelResolution>;
|
|
35
37
|
}
|
|
@@ -6,8 +6,9 @@ const config_1 = require("../config");
|
|
|
6
6
|
* Resolves model information from presets or direct provider/model IDs
|
|
7
7
|
*/
|
|
8
8
|
class ModelResolver {
|
|
9
|
-
constructor(presetManager) {
|
|
9
|
+
constructor(presetManager, adapterRegistry) {
|
|
10
10
|
this.presetManager = presetManager;
|
|
11
|
+
this.adapterRegistry = adapterRegistry;
|
|
11
12
|
}
|
|
12
13
|
/**
|
|
13
14
|
* Resolves model information from either a preset ID or provider/model IDs
|
|
@@ -15,7 +16,7 @@ class ModelResolver {
|
|
|
15
16
|
* @param options Options containing either presetId or providerId/modelId
|
|
16
17
|
* @returns Resolved model info and settings or error response
|
|
17
18
|
*/
|
|
18
|
-
resolve(options) {
|
|
19
|
+
async resolve(options) {
|
|
19
20
|
// If presetId is provided, use it
|
|
20
21
|
if (options.presetId) {
|
|
21
22
|
const preset = this.presetManager.resolvePreset(options.presetId);
|
|
@@ -94,15 +95,31 @@ class ModelResolver {
|
|
|
94
95
|
if (!modelInfo) {
|
|
95
96
|
// Check if provider allows unknown models
|
|
96
97
|
const provider = (0, config_1.getProviderById)(options.providerId);
|
|
98
|
+
// For llamacpp, try to detect capabilities from the adapter's cache
|
|
99
|
+
let detectedCapabilities;
|
|
100
|
+
if (options.providerId === 'llamacpp') {
|
|
101
|
+
try {
|
|
102
|
+
const adapter = this.adapterRegistry.getAdapter('llamacpp');
|
|
103
|
+
// Check if adapter has the getModelCapabilities method
|
|
104
|
+
if (adapter && typeof adapter.getModelCapabilities === 'function') {
|
|
105
|
+
const capabilities = await adapter.getModelCapabilities();
|
|
106
|
+
detectedCapabilities = capabilities || undefined;
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
catch (error) {
|
|
110
|
+
console.warn('Failed to detect GGUF model capabilities:', error);
|
|
111
|
+
// Continue with fallback
|
|
112
|
+
}
|
|
113
|
+
}
|
|
97
114
|
if (provider?.allowUnknownModels) {
|
|
98
|
-
// Flexible provider (e.g., llamacpp) - silent fallback
|
|
99
|
-
modelInfo = (0, config_1.createFallbackModelInfo)(options.modelId, options.providerId);
|
|
115
|
+
// Flexible provider (e.g., llamacpp) - silent fallback with detected capabilities
|
|
116
|
+
modelInfo = (0, config_1.createFallbackModelInfo)(options.modelId, options.providerId, detectedCapabilities);
|
|
100
117
|
}
|
|
101
118
|
else {
|
|
102
119
|
// Strict provider - warn but allow
|
|
103
120
|
console.warn(`⚠️ Unknown model "${options.modelId}" for provider "${options.providerId}". ` +
|
|
104
121
|
`Using default settings. This may fail at the provider API if the model doesn't exist.`);
|
|
105
|
-
modelInfo = (0, config_1.createFallbackModelInfo)(options.modelId, options.providerId);
|
|
122
|
+
modelInfo = (0, config_1.createFallbackModelInfo)(options.modelId, options.providerId, detectedCapabilities);
|
|
106
123
|
}
|
|
107
124
|
}
|
|
108
125
|
return {
|