genai-lite 0.4.1 → 0.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -151,6 +151,8 @@ const llmService = new LLMService(myKeyProvider);
151
151
 
152
152
  Run models locally via [llama.cpp](https://github.com/ggml-org/llama.cpp) server. Model IDs can be any name—they're not validated since you load your own GGUF models.
153
153
 
154
+ **Automatic Capability Detection:** genai-lite automatically detects capabilities (reasoning support, context windows, token limits) for known open-weights models (Qwen3, etc.) by matching the GGUF filename from the server. No configuration needed.
155
+
154
156
  **Example models:**
155
157
  - `llama-3-8b-instruct` - Llama 3 8B Instruct
156
158
  - `llama-3-70b-instruct` - Llama 3 70B Instruct
@@ -212,6 +214,7 @@ Some models include advanced reasoning/thinking capabilities that enhance their
212
214
  - **Anthropic**: Claude Sonnet 4, Claude Opus 4, Claude 3.7 Sonnet
213
215
  - **Google Gemini**: Gemini 2.5 Pro (always on), Gemini 2.5 Flash, Gemini 2.5 Flash-Lite Preview
214
216
  - **OpenAI**: o4-mini (always on)
217
+ - **llama.cpp**: Qwen3, DeepSeek-R1, GPT-OSS (requires `--reasoning-format deepseek` server flag)
215
218
 
216
219
  See the [Reasoning Mode](#reasoning-mode) section for usage details.
217
220
 
@@ -794,12 +797,20 @@ Get GGUF models from Hugging Face, for example:
794
797
  # Basic usage
795
798
  llama-server -m /path/to/model.gguf --port 8080
796
799
 
797
- # With more options
800
+ # With reasoning support (for Qwen3, DeepSeek-R1, etc.)
801
+ llama-server -m /path/to/model.gguf \
802
+ --port 8080 \
803
+ --jinja \
804
+ --reasoning-format deepseek
805
+
806
+ # Full options
798
807
  llama-server -m /path/to/model.gguf \
799
808
  --port 8080 \
800
- -c 4096 \ # Context size
801
- -np 4 \ # Parallel requests
802
- --threads 8 # CPU threads
809
+ --jinja \ # Required for reasoning
810
+ --reasoning-format deepseek \ # Extract reasoning from <think> tags
811
+ -c 4096 \ # Context size
812
+ -np 4 \ # Parallel requests
813
+ --threads 8 # CPU threads
803
814
  ```
804
815
 
805
816
  ### Basic Usage
@@ -1077,7 +1088,9 @@ import type {
1077
1088
  import {
1078
1089
  LlamaCppClientAdapter,
1079
1090
  LlamaCppServerClient,
1080
- createFallbackModelInfo
1091
+ createFallbackModelInfo,
1092
+ detectGgufCapabilities,
1093
+ KNOWN_GGUF_MODELS
1081
1094
  } from 'genai-lite';
1082
1095
 
1083
1096
  import type {
@@ -1090,7 +1103,10 @@ import type {
1090
1103
  LlamaCppPropsResponse,
1091
1104
  LlamaCppMetricsResponse,
1092
1105
  LlamaCppSlot,
1093
- LlamaCppSlotsResponse
1106
+ LlamaCppSlotsResponse,
1107
+ LlamaCppModel,
1108
+ LlamaCppModelsResponse,
1109
+ GgufModelPattern
1094
1110
  } from 'genai-lite';
1095
1111
  ```
1096
1112
 
package/dist/index.d.ts CHANGED
@@ -8,9 +8,10 @@ export { fromEnvironment } from "./providers/fromEnvironment";
8
8
  export { LlamaCppClientAdapter } from "./llm/clients/LlamaCppClientAdapter";
9
9
  export { LlamaCppServerClient } from "./llm/clients/LlamaCppServerClient";
10
10
  export type { LlamaCppClientConfig, } from "./llm/clients/LlamaCppClientAdapter";
11
- export type { LlamaCppHealthResponse, LlamaCppTokenizeResponse, LlamaCppDetokenizeResponse, LlamaCppEmbeddingResponse, LlamaCppInfillResponse, LlamaCppPropsResponse, LlamaCppMetricsResponse, LlamaCppSlot, LlamaCppSlotsResponse, } from "./llm/clients/LlamaCppServerClient";
11
+ export type { LlamaCppHealthResponse, LlamaCppTokenizeResponse, LlamaCppDetokenizeResponse, LlamaCppEmbeddingResponse, LlamaCppInfillResponse, LlamaCppPropsResponse, LlamaCppMetricsResponse, LlamaCppSlot, LlamaCppSlotsResponse, LlamaCppModel, LlamaCppModelsResponse, } from "./llm/clients/LlamaCppServerClient";
12
12
  export { renderTemplate } from "./prompting/template";
13
13
  export { countTokens, getSmartPreview, extractRandomVariables } from "./prompting/content";
14
14
  export { parseStructuredContent, parseRoleTags, extractInitialTaggedContent, parseTemplateWithMetadata } from "./prompting/parser";
15
15
  export type { TemplateMetadata } from "./prompting/parser";
16
- export { createFallbackModelInfo } from "./llm/config";
16
+ export { createFallbackModelInfo, detectGgufCapabilities, KNOWN_GGUF_MODELS } from "./llm/config";
17
+ export type { GgufModelPattern } from "./llm/config";
package/dist/index.js CHANGED
@@ -14,7 +14,7 @@ var __exportStar = (this && this.__exportStar) || function(m, exports) {
14
14
  for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
15
15
  };
16
16
  Object.defineProperty(exports, "__esModule", { value: true });
17
- exports.createFallbackModelInfo = exports.parseTemplateWithMetadata = exports.extractInitialTaggedContent = exports.parseRoleTags = exports.parseStructuredContent = exports.extractRandomVariables = exports.getSmartPreview = exports.countTokens = exports.renderTemplate = exports.LlamaCppServerClient = exports.LlamaCppClientAdapter = exports.fromEnvironment = exports.LLMService = void 0;
17
+ exports.KNOWN_GGUF_MODELS = exports.detectGgufCapabilities = exports.createFallbackModelInfo = exports.parseTemplateWithMetadata = exports.extractInitialTaggedContent = exports.parseRoleTags = exports.parseStructuredContent = exports.extractRandomVariables = exports.getSmartPreview = exports.countTokens = exports.renderTemplate = exports.LlamaCppServerClient = exports.LlamaCppClientAdapter = exports.fromEnvironment = exports.LLMService = void 0;
18
18
  // --- LLM Service ---
19
19
  var LLMService_1 = require("./llm/LLMService");
20
20
  Object.defineProperty(exports, "LLMService", { enumerable: true, get: function () { return LLMService_1.LLMService; } });
@@ -44,3 +44,5 @@ Object.defineProperty(exports, "extractInitialTaggedContent", { enumerable: true
44
44
  Object.defineProperty(exports, "parseTemplateWithMetadata", { enumerable: true, get: function () { return parser_1.parseTemplateWithMetadata; } });
45
45
  var config_1 = require("./llm/config");
46
46
  Object.defineProperty(exports, "createFallbackModelInfo", { enumerable: true, get: function () { return config_1.createFallbackModelInfo; } });
47
+ Object.defineProperty(exports, "detectGgufCapabilities", { enumerable: true, get: function () { return config_1.detectGgufCapabilities; } });
48
+ Object.defineProperty(exports, "KNOWN_GGUF_MODELS", { enumerable: true, get: function () { return config_1.KNOWN_GGUF_MODELS; } });
@@ -31,7 +31,7 @@ class LLMService {
31
31
  this.adapterRegistry = new AdapterRegistry_1.AdapterRegistry();
32
32
  this.requestValidator = new RequestValidator_1.RequestValidator();
33
33
  this.settingsManager = new SettingsManager_1.SettingsManager();
34
- this.modelResolver = new ModelResolver_1.ModelResolver(this.presetManager);
34
+ this.modelResolver = new ModelResolver_1.ModelResolver(this.presetManager, this.adapterRegistry);
35
35
  }
36
36
  /**
37
37
  * Gets list of supported LLM providers
@@ -69,7 +69,7 @@ class LLMService {
69
69
  console.log(`LLMService.sendMessage called with presetId: ${request.presetId}, provider: ${request.providerId}, model: ${request.modelId}`);
70
70
  try {
71
71
  // Resolve model information from preset or direct IDs
72
- const resolved = this.modelResolver.resolve(request);
72
+ const resolved = await this.modelResolver.resolve(request);
73
73
  if (resolved.error) {
74
74
  return resolved.error;
75
75
  }
@@ -143,6 +143,19 @@ class LLMService {
143
143
  object: "error",
144
144
  };
145
145
  }
146
+ // Validate API key format if adapter supports it
147
+ if (clientAdapter.validateApiKey && !clientAdapter.validateApiKey(apiKey)) {
148
+ return {
149
+ provider: providerId,
150
+ model: modelId,
151
+ error: {
152
+ message: `Invalid API key format for provider '${providerId}'. Please check your API key.`,
153
+ code: "INVALID_API_KEY",
154
+ type: "authentication_error",
155
+ },
156
+ object: "error",
157
+ };
158
+ }
146
159
  console.log(`Making LLM request with ${clientAdapter.constructor.name} for provider: ${providerId}`);
147
160
  const result = await clientAdapter.sendMessage(internalRequest, apiKey);
148
161
  // Post-process for thinking tag fallback
@@ -314,7 +327,7 @@ class LLMService {
314
327
  let modelContext = null;
315
328
  if (options.presetId || (options.providerId && options.modelId)) {
316
329
  // Resolve model information
317
- const resolved = this.modelResolver.resolve({
330
+ const resolved = await this.modelResolver.resolve({
318
331
  presetId: options.presetId,
319
332
  providerId: options.providerId,
320
333
  modelId: options.modelId,
@@ -1,4 +1,4 @@
1
- import type { LLMResponse, LLMFailureResponse } from "../types";
1
+ import type { LLMResponse, LLMFailureResponse, ModelInfo } from "../types";
2
2
  import type { ILLMClientAdapter, InternalLLMChatRequest } from "./types";
3
3
  import { LlamaCppServerClient } from "./LlamaCppServerClient";
4
4
  /**
@@ -49,12 +49,30 @@ export declare class LlamaCppClientAdapter implements ILLMClientAdapter {
49
49
  private baseURL;
50
50
  private checkHealth;
51
51
  private serverClient;
52
+ private cachedModelCapabilities;
53
+ private detectionAttempted;
52
54
  /**
53
55
  * Creates a new llama.cpp client adapter
54
56
  *
55
57
  * @param config Optional configuration for the adapter
56
58
  */
57
59
  constructor(config?: LlamaCppClientConfig);
60
+ /**
61
+ * Gets model capabilities by detecting the loaded GGUF model
62
+ *
63
+ * This method caches the result to avoid repeated HTTP calls.
64
+ * Cache is automatically cleared on connection errors in sendMessage().
65
+ *
66
+ * @returns Detected model capabilities or null if detection fails
67
+ */
68
+ getModelCapabilities(): Promise<Partial<ModelInfo> | null>;
69
+ /**
70
+ * Clears the cached model capabilities
71
+ *
72
+ * Called automatically on connection errors, or can be called manually
73
+ * if the server has been restarted with a different model.
74
+ */
75
+ clearModelCache(): void;
58
76
  /**
59
77
  * Sends a chat message to llama.cpp server
60
78
  *
@@ -10,6 +10,7 @@ const openai_1 = __importDefault(require("openai"));
10
10
  const types_1 = require("./types");
11
11
  const adapterErrorUtils_1 = require("./adapterErrorUtils");
12
12
  const LlamaCppServerClient_1 = require("./LlamaCppServerClient");
13
+ const config_1 = require("../config");
13
14
  /**
14
15
  * Client adapter for llama.cpp server integration
15
16
  *
@@ -52,10 +53,68 @@ class LlamaCppClientAdapter {
52
53
  * @param config Optional configuration for the adapter
53
54
  */
54
55
  constructor(config) {
56
+ this.cachedModelCapabilities = null;
57
+ this.detectionAttempted = false;
55
58
  this.baseURL = config?.baseURL || 'http://localhost:8080';
56
59
  this.checkHealth = config?.checkHealth || false;
57
60
  this.serverClient = new LlamaCppServerClient_1.LlamaCppServerClient(this.baseURL);
58
61
  }
62
+ /**
63
+ * Gets model capabilities by detecting the loaded GGUF model
64
+ *
65
+ * This method caches the result to avoid repeated HTTP calls.
66
+ * Cache is automatically cleared on connection errors in sendMessage().
67
+ *
68
+ * @returns Detected model capabilities or null if detection fails
69
+ */
70
+ async getModelCapabilities() {
71
+ // Return cached result if available
72
+ if (this.cachedModelCapabilities !== null) {
73
+ return this.cachedModelCapabilities;
74
+ }
75
+ // Return null if we already tried and failed
76
+ if (this.detectionAttempted) {
77
+ return null;
78
+ }
79
+ // Attempt detection
80
+ try {
81
+ console.log(`Detecting model capabilities from llama.cpp server at ${this.baseURL}`);
82
+ const { data } = await this.serverClient.getModels();
83
+ if (!data || data.length === 0) {
84
+ console.warn('No models loaded in llama.cpp server');
85
+ this.detectionAttempted = true;
86
+ return null;
87
+ }
88
+ const ggufFilename = data[0].id;
89
+ const capabilities = (0, config_1.detectGgufCapabilities)(ggufFilename);
90
+ // Cache the result (even if null)
91
+ this.cachedModelCapabilities = capabilities;
92
+ this.detectionAttempted = true;
93
+ if (capabilities) {
94
+ console.log(`Cached model capabilities for: ${ggufFilename}`);
95
+ }
96
+ else {
97
+ console.log(`No known pattern matched for: ${ggufFilename}`);
98
+ }
99
+ return capabilities;
100
+ }
101
+ catch (error) {
102
+ console.warn('Failed to detect model capabilities:', error);
103
+ this.detectionAttempted = true;
104
+ return null;
105
+ }
106
+ }
107
+ /**
108
+ * Clears the cached model capabilities
109
+ *
110
+ * Called automatically on connection errors, or can be called manually
111
+ * if the server has been restarted with a different model.
112
+ */
113
+ clearModelCache() {
114
+ this.cachedModelCapabilities = null;
115
+ this.detectionAttempted = false;
116
+ console.log('Cleared model capabilities cache');
117
+ }
59
118
  /**
60
119
  * Sends a chat message to llama.cpp server
61
120
  *
@@ -132,6 +191,13 @@ class LlamaCppClientAdapter {
132
191
  }
133
192
  catch (error) {
134
193
  console.error("llama.cpp API error:", error);
194
+ // Clear cache on connection errors so we re-detect on next request
195
+ const errorMessage = error?.message || String(error);
196
+ if (errorMessage.includes("ECONNREFUSED") ||
197
+ errorMessage.includes("fetch failed") ||
198
+ errorMessage.includes("connect")) {
199
+ this.clearModelCache();
200
+ }
135
201
  return this.createErrorResponse(error, request);
136
202
  }
137
203
  }
@@ -219,19 +285,33 @@ class LlamaCppClientAdapter {
219
285
  if (!choice || !choice.message) {
220
286
  throw new Error("No valid choices in llama.cpp completion response");
221
287
  }
288
+ // Extract reasoning content if available
289
+ // llama.cpp returns reasoning in reasoning_content field when using --reasoning-format
290
+ let reasoning;
291
+ if (choice.message.reasoning_content) {
292
+ reasoning = choice.message.reasoning_content;
293
+ }
222
294
  return {
223
295
  id: completion.id,
224
296
  provider: request.providerId,
225
297
  model: completion.model || request.modelId,
226
298
  created: completion.created,
227
- choices: completion.choices.map((c) => ({
228
- message: {
229
- role: "assistant",
230
- content: c.message.content || "",
231
- },
232
- finish_reason: c.finish_reason,
233
- index: c.index,
234
- })),
299
+ choices: completion.choices.map((c) => {
300
+ const mappedChoice = {
301
+ message: {
302
+ role: "assistant",
303
+ content: c.message.content || "",
304
+ },
305
+ finish_reason: c.finish_reason,
306
+ index: c.index,
307
+ };
308
+ // Include reasoning if available and not excluded
309
+ const messageReasoning = c.message.reasoning_content;
310
+ if (messageReasoning && request.settings.reasoning && !request.settings.reasoning.exclude) {
311
+ mappedChoice.reasoning = messageReasoning;
312
+ }
313
+ return mappedChoice;
314
+ }),
235
315
  usage: completion.usage
236
316
  ? {
237
317
  prompt_tokens: completion.usage.prompt_tokens,
@@ -62,6 +62,23 @@ export interface LlamaCppSlot {
62
62
  export interface LlamaCppSlotsResponse {
63
63
  slots: LlamaCppSlot[];
64
64
  }
65
+ /**
66
+ * Individual model information from /v1/models endpoint
67
+ */
68
+ export interface LlamaCppModel {
69
+ id: string;
70
+ object?: string;
71
+ created?: number;
72
+ owned_by?: string;
73
+ [key: string]: any;
74
+ }
75
+ /**
76
+ * Response from the /v1/models endpoint
77
+ */
78
+ export interface LlamaCppModelsResponse {
79
+ object: string;
80
+ data: LlamaCppModel[];
81
+ }
65
82
  /**
66
83
  * Client for interacting with llama.cpp server's management and utility endpoints
67
84
  *
@@ -158,4 +175,23 @@ export declare class LlamaCppServerClient {
158
175
  * @throws Error if the request fails or endpoint is not enabled
159
176
  */
160
177
  getSlots(): Promise<LlamaCppSlotsResponse>;
178
+ /**
179
+ * Retrieves the list of models loaded in the llama.cpp server
180
+ *
181
+ * This uses the OpenAI-compatible /v1/models endpoint to get information
182
+ * about the currently loaded model(s). Typically llama.cpp serves one model
183
+ * at a time, but this returns an array for API compatibility.
184
+ *
185
+ * @returns Promise resolving to models response with array of loaded models
186
+ * @throws Error if the request fails
187
+ *
188
+ * @example
189
+ * ```typescript
190
+ * const client = new LlamaCppServerClient('http://localhost:8080');
191
+ * const { data } = await client.getModels();
192
+ * console.log('Loaded model:', data[0].id);
193
+ * // Output: "Qwen2.5-7B-Instruct-Q4_K_M.gguf"
194
+ * ```
195
+ */
196
+ getModels(): Promise<LlamaCppModelsResponse>;
161
197
  }
@@ -188,5 +188,30 @@ class LlamaCppServerClient {
188
188
  }
189
189
  return await response.json();
190
190
  }
191
+ /**
192
+ * Retrieves the list of models loaded in the llama.cpp server
193
+ *
194
+ * This uses the OpenAI-compatible /v1/models endpoint to get information
195
+ * about the currently loaded model(s). Typically llama.cpp serves one model
196
+ * at a time, but this returns an array for API compatibility.
197
+ *
198
+ * @returns Promise resolving to models response with array of loaded models
199
+ * @throws Error if the request fails
200
+ *
201
+ * @example
202
+ * ```typescript
203
+ * const client = new LlamaCppServerClient('http://localhost:8080');
204
+ * const { data } = await client.getModels();
205
+ * console.log('Loaded model:', data[0].id);
206
+ * // Output: "Qwen2.5-7B-Instruct-Q4_K_M.gguf"
207
+ * ```
208
+ */
209
+ async getModels() {
210
+ const response = await fetch(`${this.baseURL}/v1/models`);
211
+ if (!response.ok) {
212
+ throw new Error(`Get models failed: ${response.status} ${response.statusText}`);
213
+ }
214
+ return await response.json();
215
+ }
191
216
  }
192
217
  exports.LlamaCppServerClient = LlamaCppServerClient;
@@ -31,6 +31,46 @@ export declare const MODEL_DEFAULT_SETTINGS: Record<string, Partial<LLMSettings>
31
31
  * Supported LLM providers
32
32
  */
33
33
  export declare const SUPPORTED_PROVIDERS: ProviderInfo[];
34
+ /**
35
+ * Pattern definition for detecting GGUF model capabilities
36
+ */
37
+ export interface GgufModelPattern {
38
+ /** Pattern to match in the GGUF filename (case-insensitive substring match) */
39
+ pattern: string;
40
+ /** Human-readable name for the model */
41
+ name: string;
42
+ /** Optional description */
43
+ description?: string;
44
+ /** Model capabilities (reasoning config, context window, etc.) */
45
+ capabilities: Partial<ModelInfo>;
46
+ }
47
+ /**
48
+ * Known GGUF model patterns for capability detection
49
+ *
50
+ * Order matters: more specific patterns should come before generic ones.
51
+ * First matching pattern wins.
52
+ *
53
+ * Example: "Qwen3-0.6B-0522" should be before "Qwen3-0.6B"
54
+ */
55
+ export declare const KNOWN_GGUF_MODELS: GgufModelPattern[];
56
+ /**
57
+ * Detects model capabilities from GGUF filename
58
+ *
59
+ * Performs case-insensitive substring matching against known model patterns.
60
+ * Returns the first matching pattern's capabilities (array order determines priority).
61
+ *
62
+ * @param ggufFilename - The GGUF model filename (e.g., "Qwen3-8B-Instruct-Q4_K_M.gguf")
63
+ * @returns Partial ModelInfo with detected capabilities, or null if no match
64
+ *
65
+ * @example
66
+ * ```typescript
67
+ * const caps = detectGgufCapabilities("Qwen3-8B-Instruct-Q4_K_M.gguf");
68
+ * if (caps?.reasoning?.supported) {
69
+ * console.log("This model supports thinking!");
70
+ * }
71
+ * ```
72
+ */
73
+ export declare function detectGgufCapabilities(ggufFilename: string): Partial<ModelInfo> | null;
34
74
  /**
35
75
  * Supported LLM models with their configurations
36
76
  * ModelInfo is similar to Cline model info
@@ -82,9 +122,10 @@ export declare function isModelSupported(modelId: string, providerId: string): b
82
122
  *
83
123
  * @param modelId - The model ID to create info for
84
124
  * @param providerId - The provider ID
85
- * @returns ModelInfo with default/placeholder values
125
+ * @param capabilities - Optional detected capabilities to merge (e.g., from GGUF detection)
126
+ * @returns ModelInfo with default/placeholder values, enhanced with detected capabilities
86
127
  */
87
- export declare function createFallbackModelInfo(modelId: string, providerId: string): ModelInfo;
128
+ export declare function createFallbackModelInfo(modelId: string, providerId: string, capabilities?: Partial<ModelInfo>): ModelInfo;
88
129
  /**
89
130
  * Gets merged default settings for a specific model and provider
90
131
  *
@@ -2,7 +2,8 @@
2
2
  // AI Summary: Configuration for LLM module including default settings, supported providers, and models.
3
3
  // Defines operational parameters and available LLM options for the application.
4
4
  Object.defineProperty(exports, "__esModule", { value: true });
5
- exports.SUPPORTED_MODELS = exports.SUPPORTED_PROVIDERS = exports.MODEL_DEFAULT_SETTINGS = exports.PROVIDER_DEFAULT_SETTINGS = exports.DEFAULT_LLM_SETTINGS = exports.ADAPTER_CONFIGS = exports.ADAPTER_CONSTRUCTORS = void 0;
5
+ exports.SUPPORTED_MODELS = exports.KNOWN_GGUF_MODELS = exports.SUPPORTED_PROVIDERS = exports.MODEL_DEFAULT_SETTINGS = exports.PROVIDER_DEFAULT_SETTINGS = exports.DEFAULT_LLM_SETTINGS = exports.ADAPTER_CONFIGS = exports.ADAPTER_CONSTRUCTORS = void 0;
6
+ exports.detectGgufCapabilities = detectGgufCapabilities;
6
7
  exports.getProviderById = getProviderById;
7
8
  exports.getModelById = getModelById;
8
9
  exports.getModelsByProvider = getModelsByProvider;
@@ -133,6 +134,150 @@ exports.SUPPORTED_PROVIDERS = [
133
134
  allowUnknownModels: true, // Test provider accepts any model
134
135
  },
135
136
  ];
137
+ /**
138
+ * Known GGUF model patterns for capability detection
139
+ *
140
+ * Order matters: more specific patterns should come before generic ones.
141
+ * First matching pattern wins.
142
+ *
143
+ * Example: "Qwen3-0.6B-0522" should be before "Qwen3-0.6B"
144
+ */
145
+ exports.KNOWN_GGUF_MODELS = [
146
+ // Qwen 3 Series - All support thinking/reasoning
147
+ {
148
+ pattern: "qwen3-30b",
149
+ name: "Qwen 3 30B",
150
+ description: "Qwen 3 30B model with thinking capabilities",
151
+ capabilities: {
152
+ maxTokens: 16384,
153
+ contextWindow: 131072,
154
+ supportsImages: false,
155
+ supportsPromptCache: false,
156
+ reasoning: {
157
+ supported: true,
158
+ enabledByDefault: false,
159
+ canDisable: true,
160
+ maxBudget: 38912,
161
+ },
162
+ },
163
+ },
164
+ {
165
+ pattern: "qwen3-14b",
166
+ name: "Qwen 3 14B",
167
+ description: "Qwen 3 14B model with thinking capabilities",
168
+ capabilities: {
169
+ maxTokens: 8192,
170
+ contextWindow: 131072,
171
+ supportsImages: false,
172
+ supportsPromptCache: false,
173
+ reasoning: {
174
+ supported: true,
175
+ enabledByDefault: false,
176
+ canDisable: true,
177
+ maxBudget: 38912,
178
+ },
179
+ },
180
+ },
181
+ {
182
+ pattern: "qwen3-8b",
183
+ name: "Qwen 3 8B",
184
+ description: "Qwen 3 8B model with thinking capabilities",
185
+ capabilities: {
186
+ maxTokens: 8192,
187
+ contextWindow: 131072,
188
+ supportsImages: false,
189
+ supportsPromptCache: false,
190
+ reasoning: {
191
+ supported: true,
192
+ enabledByDefault: false,
193
+ canDisable: true,
194
+ maxBudget: 38912,
195
+ },
196
+ },
197
+ },
198
+ {
199
+ pattern: "qwen3-4b",
200
+ name: "Qwen 3 4B",
201
+ description: "Qwen 3 4B model with thinking capabilities",
202
+ capabilities: {
203
+ maxTokens: 8192,
204
+ contextWindow: 131072,
205
+ supportsImages: false,
206
+ supportsPromptCache: false,
207
+ reasoning: {
208
+ supported: true,
209
+ enabledByDefault: false,
210
+ canDisable: true,
211
+ maxBudget: 38912,
212
+ },
213
+ },
214
+ },
215
+ {
216
+ pattern: "qwen3-1.7b",
217
+ name: "Qwen 3 1.7B",
218
+ description: "Qwen 3 1.7B model with thinking capabilities",
219
+ capabilities: {
220
+ maxTokens: 8192,
221
+ contextWindow: 32768,
222
+ supportsImages: false,
223
+ supportsPromptCache: false,
224
+ reasoning: {
225
+ supported: true,
226
+ enabledByDefault: false,
227
+ canDisable: true,
228
+ maxBudget: 30720,
229
+ },
230
+ },
231
+ },
232
+ {
233
+ pattern: "qwen3-0.6b",
234
+ name: "Qwen 3 0.6B",
235
+ description: "Qwen 3 0.6B model with thinking capabilities",
236
+ capabilities: {
237
+ maxTokens: 8192,
238
+ contextWindow: 32768,
239
+ supportsImages: false,
240
+ supportsPromptCache: false,
241
+ reasoning: {
242
+ supported: true,
243
+ enabledByDefault: false,
244
+ canDisable: true,
245
+ maxBudget: 30720,
246
+ },
247
+ },
248
+ },
249
+ // Add more model patterns here as needed
250
+ // DeepSeek, Llama, etc.
251
+ ];
252
+ /**
253
+ * Detects model capabilities from GGUF filename
254
+ *
255
+ * Performs case-insensitive substring matching against known model patterns.
256
+ * Returns the first matching pattern's capabilities (array order determines priority).
257
+ *
258
+ * @param ggufFilename - The GGUF model filename (e.g., "Qwen3-8B-Instruct-Q4_K_M.gguf")
259
+ * @returns Partial ModelInfo with detected capabilities, or null if no match
260
+ *
261
+ * @example
262
+ * ```typescript
263
+ * const caps = detectGgufCapabilities("Qwen3-8B-Instruct-Q4_K_M.gguf");
264
+ * if (caps?.reasoning?.supported) {
265
+ * console.log("This model supports thinking!");
266
+ * }
267
+ * ```
268
+ */
269
+ function detectGgufCapabilities(ggufFilename) {
270
+ const lowerFilename = ggufFilename.toLowerCase();
271
+ // First match wins (array is pre-ordered from specific to generic)
272
+ for (const model of exports.KNOWN_GGUF_MODELS) {
273
+ if (lowerFilename.includes(model.pattern.toLowerCase())) {
274
+ console.log(`Detected GGUF model: ${model.name} (pattern: ${model.pattern})`);
275
+ return model.capabilities;
276
+ }
277
+ }
278
+ // No match found
279
+ return null;
280
+ }
136
281
  /**
137
282
  * Supported LLM models with their configurations
138
283
  * ModelInfo is similar to Cline model info
@@ -521,10 +666,11 @@ function isModelSupported(modelId, providerId) {
521
666
  *
522
667
  * @param modelId - The model ID to create info for
523
668
  * @param providerId - The provider ID
524
- * @returns ModelInfo with default/placeholder values
669
+ * @param capabilities - Optional detected capabilities to merge (e.g., from GGUF detection)
670
+ * @returns ModelInfo with default/placeholder values, enhanced with detected capabilities
525
671
  */
526
- function createFallbackModelInfo(modelId, providerId) {
527
- return {
672
+ function createFallbackModelInfo(modelId, providerId, capabilities) {
673
+ const defaults = {
528
674
  id: modelId,
529
675
  name: modelId,
530
676
  providerId: providerId,
@@ -536,6 +682,23 @@ function createFallbackModelInfo(modelId, providerId) {
536
682
  supportsImages: false,
537
683
  supportsPromptCache: false,
538
684
  };
685
+ // Merge detected capabilities if provided
686
+ if (capabilities) {
687
+ return {
688
+ ...defaults,
689
+ ...capabilities,
690
+ // Always preserve these from defaults/params
691
+ id: modelId,
692
+ name: capabilities.name || modelId,
693
+ providerId: providerId,
694
+ // For local models, pricing is always 0
695
+ inputPrice: 0,
696
+ outputPrice: 0,
697
+ cacheWritesPrice: undefined,
698
+ cacheReadsPrice: undefined,
699
+ };
700
+ }
701
+ return defaults;
539
702
  }
540
703
  /**
541
704
  * Gets merged default settings for a specific model and provider
@@ -1,5 +1,6 @@
1
1
  import type { LLMFailureResponse, LLMSettings, ModelInfo } from "../types";
2
2
  import { PresetManager } from "./PresetManager";
3
+ import { AdapterRegistry } from "./AdapterRegistry";
3
4
  /**
4
5
  * Options for model selection
5
6
  */
@@ -24,12 +25,13 @@ export interface ModelResolution {
24
25
  */
25
26
  export declare class ModelResolver {
26
27
  private presetManager;
27
- constructor(presetManager: PresetManager);
28
+ private adapterRegistry;
29
+ constructor(presetManager: PresetManager, adapterRegistry: AdapterRegistry);
28
30
  /**
29
31
  * Resolves model information from either a preset ID or provider/model IDs
30
32
  *
31
33
  * @param options Options containing either presetId or providerId/modelId
32
34
  * @returns Resolved model info and settings or error response
33
35
  */
34
- resolve(options: ModelSelectionOptions): ModelResolution;
36
+ resolve(options: ModelSelectionOptions): Promise<ModelResolution>;
35
37
  }
@@ -6,8 +6,9 @@ const config_1 = require("../config");
6
6
  * Resolves model information from presets or direct provider/model IDs
7
7
  */
8
8
  class ModelResolver {
9
- constructor(presetManager) {
9
+ constructor(presetManager, adapterRegistry) {
10
10
  this.presetManager = presetManager;
11
+ this.adapterRegistry = adapterRegistry;
11
12
  }
12
13
  /**
13
14
  * Resolves model information from either a preset ID or provider/model IDs
@@ -15,7 +16,7 @@ class ModelResolver {
15
16
  * @param options Options containing either presetId or providerId/modelId
16
17
  * @returns Resolved model info and settings or error response
17
18
  */
18
- resolve(options) {
19
+ async resolve(options) {
19
20
  // If presetId is provided, use it
20
21
  if (options.presetId) {
21
22
  const preset = this.presetManager.resolvePreset(options.presetId);
@@ -94,15 +95,31 @@ class ModelResolver {
94
95
  if (!modelInfo) {
95
96
  // Check if provider allows unknown models
96
97
  const provider = (0, config_1.getProviderById)(options.providerId);
98
+ // For llamacpp, try to detect capabilities from the adapter's cache
99
+ let detectedCapabilities;
100
+ if (options.providerId === 'llamacpp') {
101
+ try {
102
+ const adapter = this.adapterRegistry.getAdapter('llamacpp');
103
+ // Check if adapter has the getModelCapabilities method
104
+ if (adapter && typeof adapter.getModelCapabilities === 'function') {
105
+ const capabilities = await adapter.getModelCapabilities();
106
+ detectedCapabilities = capabilities || undefined;
107
+ }
108
+ }
109
+ catch (error) {
110
+ console.warn('Failed to detect GGUF model capabilities:', error);
111
+ // Continue with fallback
112
+ }
113
+ }
97
114
  if (provider?.allowUnknownModels) {
98
- // Flexible provider (e.g., llamacpp) - silent fallback
99
- modelInfo = (0, config_1.createFallbackModelInfo)(options.modelId, options.providerId);
115
+ // Flexible provider (e.g., llamacpp) - silent fallback with detected capabilities
116
+ modelInfo = (0, config_1.createFallbackModelInfo)(options.modelId, options.providerId, detectedCapabilities);
100
117
  }
101
118
  else {
102
119
  // Strict provider - warn but allow
103
120
  console.warn(`⚠️ Unknown model "${options.modelId}" for provider "${options.providerId}". ` +
104
121
  `Using default settings. This may fail at the provider API if the model doesn't exist.`);
105
- modelInfo = (0, config_1.createFallbackModelInfo)(options.modelId, options.providerId);
122
+ modelInfo = (0, config_1.createFallbackModelInfo)(options.modelId, options.providerId, detectedCapabilities);
106
123
  }
107
124
  }
108
125
  return {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "genai-lite",
3
- "version": "0.4.1",
3
+ "version": "0.4.3",
4
4
  "description": "A lightweight, portable toolkit for interacting with various Generative AI APIs.",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",