npm - genai-lite - Versions diffs - 0.4.1 → 0.4.3 - Mend

genai-lite 0.4.1 → 0.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/README.md +22 -6
package/dist/index.d.ts +3 -2
package/dist/index.js +3 -1
package/dist/llm/LLMService.js +16 -3
package/dist/llm/clients/LlamaCppClientAdapter.d.ts +19 -1
package/dist/llm/clients/LlamaCppClientAdapter.js +88 -8
package/dist/llm/clients/LlamaCppServerClient.d.ts +36 -0
package/dist/llm/clients/LlamaCppServerClient.js +25 -0
package/dist/llm/config.d.ts +43 -2
package/dist/llm/config.js +167 -4
package/dist/llm/services/ModelResolver.d.ts +4 -2
package/dist/llm/services/ModelResolver.js +22 -5
package/package.json +1 -1

package/README.md CHANGED Viewed

@@ -151,6 +151,8 @@ const llmService = new LLMService(myKeyProvider);
 Run models locally via [llama.cpp](https://github.com/ggml-org/llama.cpp) server. Model IDs can be any name—they're not validated since you load your own GGUF models.
+**Automatic Capability Detection:** genai-lite automatically detects capabilities (reasoning support, context windows, token limits) for known open-weights models (Qwen3, etc.) by matching the GGUF filename from the server. No configuration needed.
 **Example models:**
 - `llama-3-8b-instruct` - Llama 3 8B Instruct
 - `llama-3-70b-instruct` - Llama 3 70B Instruct
@@ -212,6 +214,7 @@ Some models include advanced reasoning/thinking capabilities that enhance their
 - **Anthropic**: Claude Sonnet 4, Claude Opus 4, Claude 3.7 Sonnet
 - **Google Gemini**: Gemini 2.5 Pro (always on), Gemini 2.5 Flash, Gemini 2.5 Flash-Lite Preview
 - **OpenAI**: o4-mini (always on)
+- **llama.cpp**: Qwen3, DeepSeek-R1, GPT-OSS (requires `--reasoning-format deepseek` server flag)
 See the [Reasoning Mode](#reasoning-mode) section for usage details.
@@ -794,12 +797,20 @@ Get GGUF models from Hugging Face, for example:
 # Basic usage
 llama-server -m /path/to/model.gguf --port 8080
-# With more options
+# With reasoning support (for Qwen3, DeepSeek-R1, etc.)
+llama-server -m /path/to/model.gguf \
+  --port 8080 \
+  --jinja \
+  --reasoning-format deepseek
+# Full options
 llama-server -m /path/to/model.gguf \
   --port 8080 \
-  -c 4096 \           # Context size
-  -np 4 \             # Parallel requests
-  --threads 8         # CPU threads
+  --jinja \                    # Required for reasoning
+  --reasoning-format deepseek \ # Extract reasoning from <think> tags
+  -c 4096 \                    # Context size
+  -np 4 \                      # Parallel requests
+  --threads 8                  # CPU threads
 ```
 ### Basic Usage
@@ -1077,7 +1088,9 @@ import type {
 import {
   LlamaCppClientAdapter,
   LlamaCppServerClient,
-  createFallbackModelInfo
+  createFallbackModelInfo,
+  detectGgufCapabilities,
+  KNOWN_GGUF_MODELS
 } from 'genai-lite';
 import type {
@@ -1090,7 +1103,10 @@ import type {
   LlamaCppPropsResponse,
   LlamaCppMetricsResponse,
   LlamaCppSlot,
-  LlamaCppSlotsResponse
+  LlamaCppSlotsResponse,
+  LlamaCppModel,
+  LlamaCppModelsResponse,
+  GgufModelPattern
 } from 'genai-lite';
 ```

package/dist/index.d.ts CHANGED Viewed

@@ -8,9 +8,10 @@ export { fromEnvironment } from "./providers/fromEnvironment";
 export { LlamaCppClientAdapter } from "./llm/clients/LlamaCppClientAdapter";
 export { LlamaCppServerClient } from "./llm/clients/LlamaCppServerClient";
 export type { LlamaCppClientConfig, } from "./llm/clients/LlamaCppClientAdapter";
-export type { LlamaCppHealthResponse, LlamaCppTokenizeResponse, LlamaCppDetokenizeResponse, LlamaCppEmbeddingResponse, LlamaCppInfillResponse, LlamaCppPropsResponse, LlamaCppMetricsResponse, LlamaCppSlot, LlamaCppSlotsResponse, } from "./llm/clients/LlamaCppServerClient";
+export type { LlamaCppHealthResponse, LlamaCppTokenizeResponse, LlamaCppDetokenizeResponse, LlamaCppEmbeddingResponse, LlamaCppInfillResponse, LlamaCppPropsResponse, LlamaCppMetricsResponse, LlamaCppSlot, LlamaCppSlotsResponse, LlamaCppModel, LlamaCppModelsResponse, } from "./llm/clients/LlamaCppServerClient";
 export { renderTemplate } from "./prompting/template";
 export { countTokens, getSmartPreview, extractRandomVariables } from "./prompting/content";
 export { parseStructuredContent, parseRoleTags, extractInitialTaggedContent, parseTemplateWithMetadata } from "./prompting/parser";
 export type { TemplateMetadata } from "./prompting/parser";
-export { createFallbackModelInfo } from "./llm/config";
+export { createFallbackModelInfo, detectGgufCapabilities, KNOWN_GGUF_MODELS } from "./llm/config";
+export type { GgufModelPattern } from "./llm/config";

package/dist/index.js CHANGED Viewed

@@ -14,7 +14,7 @@ var __exportStar = (this && this.__exportStar) || function(m, exports) {
     for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
 };
 Object.defineProperty(exports, "__esModule", { value: true });
-exports.createFallbackModelInfo = exports.parseTemplateWithMetadata = exports.extractInitialTaggedContent = exports.parseRoleTags = exports.parseStructuredContent = exports.extractRandomVariables = exports.getSmartPreview = exports.countTokens = exports.renderTemplate = exports.LlamaCppServerClient = exports.LlamaCppClientAdapter = exports.fromEnvironment = exports.LLMService = void 0;
+exports.KNOWN_GGUF_MODELS = exports.detectGgufCapabilities = exports.createFallbackModelInfo = exports.parseTemplateWithMetadata = exports.extractInitialTaggedContent = exports.parseRoleTags = exports.parseStructuredContent = exports.extractRandomVariables = exports.getSmartPreview = exports.countTokens = exports.renderTemplate = exports.LlamaCppServerClient = exports.LlamaCppClientAdapter = exports.fromEnvironment = exports.LLMService = void 0;
 // --- LLM Service ---
 var LLMService_1 = require("./llm/LLMService");
 Object.defineProperty(exports, "LLMService", { enumerable: true, get: function () { return LLMService_1.LLMService; } });
@@ -44,3 +44,5 @@ Object.defineProperty(exports, "extractInitialTaggedContent", { enumerable: true
 Object.defineProperty(exports, "parseTemplateWithMetadata", { enumerable: true, get: function () { return parser_1.parseTemplateWithMetadata; } });
 var config_1 = require("./llm/config");
 Object.defineProperty(exports, "createFallbackModelInfo", { enumerable: true, get: function () { return config_1.createFallbackModelInfo; } });
+Object.defineProperty(exports, "detectGgufCapabilities", { enumerable: true, get: function () { return config_1.detectGgufCapabilities; } });
+Object.defineProperty(exports, "KNOWN_GGUF_MODELS", { enumerable: true, get: function () { return config_1.KNOWN_GGUF_MODELS; } });

package/dist/llm/LLMService.js CHANGED Viewed

@@ -31,7 +31,7 @@ class LLMService {
         this.adapterRegistry = new AdapterRegistry_1.AdapterRegistry();
         this.requestValidator = new RequestValidator_1.RequestValidator();
         this.settingsManager = new SettingsManager_1.SettingsManager();
-        this.modelResolver = new ModelResolver_1.ModelResolver(this.presetManager);
+        this.modelResolver = new ModelResolver_1.ModelResolver(this.presetManager, this.adapterRegistry);
     }
     /**
      * Gets list of supported LLM providers
@@ -69,7 +69,7 @@ class LLMService {
         console.log(`LLMService.sendMessage called with presetId: ${request.presetId}, provider: ${request.providerId}, model: ${request.modelId}`);
         try {
             // Resolve model information from preset or direct IDs
-            const resolved = this.modelResolver.resolve(request);
+            const resolved = await this.modelResolver.resolve(request);
             if (resolved.error) {
                 return resolved.error;
             }
@@ -143,6 +143,19 @@ class LLMService {
                         object: "error",
                     };
                 }
+                // Validate API key format if adapter supports it
+                if (clientAdapter.validateApiKey && !clientAdapter.validateApiKey(apiKey)) {
+                    return {
+                        provider: providerId,
+                        model: modelId,
+                        error: {
+                            message: `Invalid API key format for provider '${providerId}'. Please check your API key.`,
+                            code: "INVALID_API_KEY",
+                            type: "authentication_error",
+                        },
+                        object: "error",
+                    };
+                }
                 console.log(`Making LLM request with ${clientAdapter.constructor.name} for provider: ${providerId}`);
                 const result = await clientAdapter.sendMessage(internalRequest, apiKey);
                 // Post-process for thinking tag fallback
@@ -314,7 +327,7 @@ class LLMService {
         let modelContext = null;
         if (options.presetId || (options.providerId && options.modelId)) {
             // Resolve model information
-            const resolved = this.modelResolver.resolve({
+            const resolved = await this.modelResolver.resolve({
                 presetId: options.presetId,
                 providerId: options.providerId,
                 modelId: options.modelId,

package/dist/llm/clients/LlamaCppClientAdapter.d.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import type { LLMResponse, LLMFailureResponse } from "../types";
+import type { LLMResponse, LLMFailureResponse, ModelInfo } from "../types";
 import type { ILLMClientAdapter, InternalLLMChatRequest } from "./types";
 import { LlamaCppServerClient } from "./LlamaCppServerClient";
 /**
@@ -49,12 +49,30 @@ export declare class LlamaCppClientAdapter implements ILLMClientAdapter {
     private baseURL;
     private checkHealth;
     private serverClient;
+    private cachedModelCapabilities;
+    private detectionAttempted;
     /**
      * Creates a new llama.cpp client adapter
      *
      * @param config Optional configuration for the adapter
      */
     constructor(config?: LlamaCppClientConfig);
+    /**
+     * Gets model capabilities by detecting the loaded GGUF model
+     *
+     * This method caches the result to avoid repeated HTTP calls.
+     * Cache is automatically cleared on connection errors in sendMessage().
+     *
+     * @returns Detected model capabilities or null if detection fails
+     */
+    getModelCapabilities(): Promise<Partial<ModelInfo> | null>;
+    /**
+     * Clears the cached model capabilities
+     *
+     * Called automatically on connection errors, or can be called manually
+     * if the server has been restarted with a different model.
+     */
+    clearModelCache(): void;
     /**
      * Sends a chat message to llama.cpp server
      *

package/dist/llm/clients/LlamaCppClientAdapter.js CHANGED Viewed

@@ -10,6 +10,7 @@ const openai_1 = __importDefault(require("openai"));
 const types_1 = require("./types");
 const adapterErrorUtils_1 = require("./adapterErrorUtils");
 const LlamaCppServerClient_1 = require("./LlamaCppServerClient");
+const config_1 = require("../config");
 /**
  * Client adapter for llama.cpp server integration
  *
@@ -52,10 +53,68 @@ class LlamaCppClientAdapter {
      * @param config Optional configuration for the adapter
      */
     constructor(config) {
+        this.cachedModelCapabilities = null;
+        this.detectionAttempted = false;
         this.baseURL = config?.baseURL || 'http://localhost:8080';
         this.checkHealth = config?.checkHealth || false;
         this.serverClient = new LlamaCppServerClient_1.LlamaCppServerClient(this.baseURL);
     }
+    /**
+     * Gets model capabilities by detecting the loaded GGUF model
+     *
+     * This method caches the result to avoid repeated HTTP calls.
+     * Cache is automatically cleared on connection errors in sendMessage().
+     *
+     * @returns Detected model capabilities or null if detection fails
+     */
+    async getModelCapabilities() {
+        // Return cached result if available
+        if (this.cachedModelCapabilities !== null) {
+            return this.cachedModelCapabilities;
+        }
+        // Return null if we already tried and failed
+        if (this.detectionAttempted) {
+            return null;
+        }
+        // Attempt detection
+        try {
+            console.log(`Detecting model capabilities from llama.cpp server at ${this.baseURL}`);
+            const { data } = await this.serverClient.getModels();
+            if (!data || data.length === 0) {
+                console.warn('No models loaded in llama.cpp server');
+                this.detectionAttempted = true;
+                return null;
+            }
+            const ggufFilename = data[0].id;
+            const capabilities = (0, config_1.detectGgufCapabilities)(ggufFilename);
+            // Cache the result (even if null)
+            this.cachedModelCapabilities = capabilities;
+            this.detectionAttempted = true;
+            if (capabilities) {
+                console.log(`Cached model capabilities for: ${ggufFilename}`);
+            }
+            else {
+                console.log(`No known pattern matched for: ${ggufFilename}`);
+            }
+            return capabilities;
+        }
+        catch (error) {
+            console.warn('Failed to detect model capabilities:', error);
+            this.detectionAttempted = true;
+            return null;
+        }
+    }
+    /**
+     * Clears the cached model capabilities
+     *
+     * Called automatically on connection errors, or can be called manually
+     * if the server has been restarted with a different model.
+     */
+    clearModelCache() {
+        this.cachedModelCapabilities = null;
+        this.detectionAttempted = false;
+        console.log('Cleared model capabilities cache');
+    }
     /**
      * Sends a chat message to llama.cpp server
      *
@@ -132,6 +191,13 @@ class LlamaCppClientAdapter {
         }
         catch (error) {
             console.error("llama.cpp API error:", error);
+            // Clear cache on connection errors so we re-detect on next request
+            const errorMessage = error?.message || String(error);
+            if (errorMessage.includes("ECONNREFUSED") ||
+                errorMessage.includes("fetch failed") ||
+                errorMessage.includes("connect")) {
+                this.clearModelCache();
+            }
             return this.createErrorResponse(error, request);
         }
     }
@@ -219,19 +285,33 @@ class LlamaCppClientAdapter {
         if (!choice || !choice.message) {
             throw new Error("No valid choices in llama.cpp completion response");
         }
+        // Extract reasoning content if available
+        // llama.cpp returns reasoning in reasoning_content field when using --reasoning-format
+        let reasoning;
+        if (choice.message.reasoning_content) {
+            reasoning = choice.message.reasoning_content;
+        }
         return {
             id: completion.id,
             provider: request.providerId,
             model: completion.model || request.modelId,
             created: completion.created,
-            choices: completion.choices.map((c) => ({
-                message: {
-                    role: "assistant",
-                    content: c.message.content || "",
-                },
-                finish_reason: c.finish_reason,
-                index: c.index,
-            })),
+            choices: completion.choices.map((c) => {
+                const mappedChoice = {
+                    message: {
+                        role: "assistant",
+                        content: c.message.content || "",
+                    },
+                    finish_reason: c.finish_reason,
+                    index: c.index,
+                };
+                // Include reasoning if available and not excluded
+                const messageReasoning = c.message.reasoning_content;
+                if (messageReasoning && request.settings.reasoning && !request.settings.reasoning.exclude) {
+                    mappedChoice.reasoning = messageReasoning;
+                }
+                return mappedChoice;
+            }),
             usage: completion.usage
                 ? {
                     prompt_tokens: completion.usage.prompt_tokens,

package/dist/llm/clients/LlamaCppServerClient.d.ts CHANGED Viewed

@@ -62,6 +62,23 @@ export interface LlamaCppSlot {
 export interface LlamaCppSlotsResponse {
     slots: LlamaCppSlot[];
 }
+/**
+ * Individual model information from /v1/models endpoint
+ */
+export interface LlamaCppModel {
+    id: string;
+    object?: string;
+    created?: number;
+    owned_by?: string;
+    [key: string]: any;
+}
+/**
+ * Response from the /v1/models endpoint
+ */
+export interface LlamaCppModelsResponse {
+    object: string;
+    data: LlamaCppModel[];
+}
 /**
  * Client for interacting with llama.cpp server's management and utility endpoints
  *
@@ -158,4 +175,23 @@ export declare class LlamaCppServerClient {
      * @throws Error if the request fails or endpoint is not enabled
      */
     getSlots(): Promise<LlamaCppSlotsResponse>;
+    /**
+     * Retrieves the list of models loaded in the llama.cpp server
+     *
+     * This uses the OpenAI-compatible /v1/models endpoint to get information
+     * about the currently loaded model(s). Typically llama.cpp serves one model
+     * at a time, but this returns an array for API compatibility.
+     *
+     * @returns Promise resolving to models response with array of loaded models
+     * @throws Error if the request fails
+     *
+     * @example
+     * ```typescript
+     * const client = new LlamaCppServerClient('http://localhost:8080');
+     * const { data } = await client.getModels();
+     * console.log('Loaded model:', data[0].id);
+     * // Output: "Qwen2.5-7B-Instruct-Q4_K_M.gguf"
+     * ```
+     */
+    getModels(): Promise<LlamaCppModelsResponse>;
 }

package/dist/llm/clients/LlamaCppServerClient.js CHANGED Viewed

@@ -188,5 +188,30 @@ class LlamaCppServerClient {
         }
         return await response.json();
     }
+    /**
+     * Retrieves the list of models loaded in the llama.cpp server
+     *
+     * This uses the OpenAI-compatible /v1/models endpoint to get information
+     * about the currently loaded model(s). Typically llama.cpp serves one model
+     * at a time, but this returns an array for API compatibility.
+     *
+     * @returns Promise resolving to models response with array of loaded models
+     * @throws Error if the request fails
+     *
+     * @example
+     * ```typescript
+     * const client = new LlamaCppServerClient('http://localhost:8080');
+     * const { data } = await client.getModels();
+     * console.log('Loaded model:', data[0].id);
+     * // Output: "Qwen2.5-7B-Instruct-Q4_K_M.gguf"
+     * ```
+     */
+    async getModels() {
+        const response = await fetch(`${this.baseURL}/v1/models`);
+        if (!response.ok) {
+            throw new Error(`Get models failed: ${response.status} ${response.statusText}`);
+        }
+        return await response.json();
+    }
 }
 exports.LlamaCppServerClient = LlamaCppServerClient;

package/dist/llm/config.d.ts CHANGED Viewed

@@ -31,6 +31,46 @@ export declare const MODEL_DEFAULT_SETTINGS: Record<string, Partial<LLMSettings>
  * Supported LLM providers
  */
 export declare const SUPPORTED_PROVIDERS: ProviderInfo[];
+/**
+ * Pattern definition for detecting GGUF model capabilities
+ */
+export interface GgufModelPattern {
+    /** Pattern to match in the GGUF filename (case-insensitive substring match) */
+    pattern: string;
+    /** Human-readable name for the model */
+    name: string;
+    /** Optional description */
+    description?: string;
+    /** Model capabilities (reasoning config, context window, etc.) */
+    capabilities: Partial<ModelInfo>;
+}
+/**
+ * Known GGUF model patterns for capability detection
+ *
+ * Order matters: more specific patterns should come before generic ones.
+ * First matching pattern wins.
+ *
+ * Example: "Qwen3-0.6B-0522" should be before "Qwen3-0.6B"
+ */
+export declare const KNOWN_GGUF_MODELS: GgufModelPattern[];
+/**
+ * Detects model capabilities from GGUF filename
+ *
+ * Performs case-insensitive substring matching against known model patterns.
+ * Returns the first matching pattern's capabilities (array order determines priority).
+ *
+ * @param ggufFilename - The GGUF model filename (e.g., "Qwen3-8B-Instruct-Q4_K_M.gguf")
+ * @returns Partial ModelInfo with detected capabilities, or null if no match
+ *
+ * @example
+ * ```typescript
+ * const caps = detectGgufCapabilities("Qwen3-8B-Instruct-Q4_K_M.gguf");
+ * if (caps?.reasoning?.supported) {
+ *   console.log("This model supports thinking!");
+ * }
+ * ```
+ */
+export declare function detectGgufCapabilities(ggufFilename: string): Partial<ModelInfo> | null;
 /**
  * Supported LLM models with their configurations
  * ModelInfo is similar to Cline model info
@@ -82,9 +122,10 @@ export declare function isModelSupported(modelId: string, providerId: string): b
  *
  * @param modelId - The model ID to create info for
  * @param providerId - The provider ID
- * @returns ModelInfo with default/placeholder values
+ * @param capabilities - Optional detected capabilities to merge (e.g., from GGUF detection)
+ * @returns ModelInfo with default/placeholder values, enhanced with detected capabilities
  */
-export declare function createFallbackModelInfo(modelId: string, providerId: string): ModelInfo;
+export declare function createFallbackModelInfo(modelId: string, providerId: string, capabilities?: Partial<ModelInfo>): ModelInfo;
 /**
  * Gets merged default settings for a specific model and provider
  *

package/dist/llm/config.js CHANGED Viewed

@@ -2,7 +2,8 @@
 // AI Summary: Configuration for LLM module including default settings, supported providers, and models.
 // Defines operational parameters and available LLM options for the application.
 Object.defineProperty(exports, "__esModule", { value: true });
-exports.SUPPORTED_MODELS = exports.SUPPORTED_PROVIDERS = exports.MODEL_DEFAULT_SETTINGS = exports.PROVIDER_DEFAULT_SETTINGS = exports.DEFAULT_LLM_SETTINGS = exports.ADAPTER_CONFIGS = exports.ADAPTER_CONSTRUCTORS = void 0;
+exports.SUPPORTED_MODELS = exports.KNOWN_GGUF_MODELS = exports.SUPPORTED_PROVIDERS = exports.MODEL_DEFAULT_SETTINGS = exports.PROVIDER_DEFAULT_SETTINGS = exports.DEFAULT_LLM_SETTINGS = exports.ADAPTER_CONFIGS = exports.ADAPTER_CONSTRUCTORS = void 0;
+exports.detectGgufCapabilities = detectGgufCapabilities;
 exports.getProviderById = getProviderById;
 exports.getModelById = getModelById;
 exports.getModelsByProvider = getModelsByProvider;
@@ -133,6 +134,150 @@ exports.SUPPORTED_PROVIDERS = [
         allowUnknownModels: true, // Test provider accepts any model
     },
 ];
+/**
+ * Known GGUF model patterns for capability detection
+ *
+ * Order matters: more specific patterns should come before generic ones.
+ * First matching pattern wins.
+ *
+ * Example: "Qwen3-0.6B-0522" should be before "Qwen3-0.6B"
+ */
+exports.KNOWN_GGUF_MODELS = [
+    // Qwen 3 Series - All support thinking/reasoning
+    {
+        pattern: "qwen3-30b",
+        name: "Qwen 3 30B",
+        description: "Qwen 3 30B model with thinking capabilities",
+        capabilities: {
+            maxTokens: 16384,
+            contextWindow: 131072,
+            supportsImages: false,
+            supportsPromptCache: false,
+            reasoning: {
+                supported: true,
+                enabledByDefault: false,
+                canDisable: true,
+                maxBudget: 38912,
+            },
+        },
+    },
+    {
+        pattern: "qwen3-14b",
+        name: "Qwen 3 14B",
+        description: "Qwen 3 14B model with thinking capabilities",
+        capabilities: {
+            maxTokens: 8192,
+            contextWindow: 131072,
+            supportsImages: false,
+            supportsPromptCache: false,
+            reasoning: {
+                supported: true,
+                enabledByDefault: false,
+                canDisable: true,
+                maxBudget: 38912,
+            },
+        },
+    },
+    {
+        pattern: "qwen3-8b",
+        name: "Qwen 3 8B",
+        description: "Qwen 3 8B model with thinking capabilities",
+        capabilities: {
+            maxTokens: 8192,
+            contextWindow: 131072,
+            supportsImages: false,
+            supportsPromptCache: false,
+            reasoning: {
+                supported: true,
+                enabledByDefault: false,
+                canDisable: true,
+                maxBudget: 38912,
+            },
+        },
+    },
+    {
+        pattern: "qwen3-4b",
+        name: "Qwen 3 4B",
+        description: "Qwen 3 4B model with thinking capabilities",
+        capabilities: {
+            maxTokens: 8192,
+            contextWindow: 131072,
+            supportsImages: false,
+            supportsPromptCache: false,
+            reasoning: {
+                supported: true,
+                enabledByDefault: false,
+                canDisable: true,
+                maxBudget: 38912,
+            },
+        },
+    },
+    {
+        pattern: "qwen3-1.7b",
+        name: "Qwen 3 1.7B",
+        description: "Qwen 3 1.7B model with thinking capabilities",
+        capabilities: {
+            maxTokens: 8192,
+            contextWindow: 32768,
+            supportsImages: false,
+            supportsPromptCache: false,
+            reasoning: {
+                supported: true,
+                enabledByDefault: false,
+                canDisable: true,
+                maxBudget: 30720,
+            },
+        },
+    },
+    {
+        pattern: "qwen3-0.6b",
+        name: "Qwen 3 0.6B",
+        description: "Qwen 3 0.6B model with thinking capabilities",
+        capabilities: {
+            maxTokens: 8192,
+            contextWindow: 32768,
+            supportsImages: false,
+            supportsPromptCache: false,
+            reasoning: {
+                supported: true,
+                enabledByDefault: false,
+                canDisable: true,
+                maxBudget: 30720,
+            },
+        },
+    },
+    // Add more model patterns here as needed
+    // DeepSeek, Llama, etc.
+];
+/**
+ * Detects model capabilities from GGUF filename
+ *
+ * Performs case-insensitive substring matching against known model patterns.
+ * Returns the first matching pattern's capabilities (array order determines priority).
+ *
+ * @param ggufFilename - The GGUF model filename (e.g., "Qwen3-8B-Instruct-Q4_K_M.gguf")
+ * @returns Partial ModelInfo with detected capabilities, or null if no match
+ *
+ * @example
+ * ```typescript
+ * const caps = detectGgufCapabilities("Qwen3-8B-Instruct-Q4_K_M.gguf");
+ * if (caps?.reasoning?.supported) {
+ *   console.log("This model supports thinking!");
+ * }
+ * ```
+ */
+function detectGgufCapabilities(ggufFilename) {
+    const lowerFilename = ggufFilename.toLowerCase();
+    // First match wins (array is pre-ordered from specific to generic)
+    for (const model of exports.KNOWN_GGUF_MODELS) {
+        if (lowerFilename.includes(model.pattern.toLowerCase())) {
+            console.log(`Detected GGUF model: ${model.name} (pattern: ${model.pattern})`);
+            return model.capabilities;
+        }
+    }
+    // No match found
+    return null;
+}
 /**
  * Supported LLM models with their configurations
  * ModelInfo is similar to Cline model info
@@ -521,10 +666,11 @@ function isModelSupported(modelId, providerId) {
  *
  * @param modelId - The model ID to create info for
  * @param providerId - The provider ID
- * @returns ModelInfo with default/placeholder values
+ * @param capabilities - Optional detected capabilities to merge (e.g., from GGUF detection)
+ * @returns ModelInfo with default/placeholder values, enhanced with detected capabilities
  */
-function createFallbackModelInfo(modelId, providerId) {
-    return {
+function createFallbackModelInfo(modelId, providerId, capabilities) {
+    const defaults = {
         id: modelId,
         name: modelId,
         providerId: providerId,
@@ -536,6 +682,23 @@ function createFallbackModelInfo(modelId, providerId) {
         supportsImages: false,
         supportsPromptCache: false,
     };
+    // Merge detected capabilities if provided
+    if (capabilities) {
+        return {
+            ...defaults,
+            ...capabilities,
+            // Always preserve these from defaults/params
+            id: modelId,
+            name: capabilities.name || modelId,
+            providerId: providerId,
+            // For local models, pricing is always 0
+            inputPrice: 0,
+            outputPrice: 0,
+            cacheWritesPrice: undefined,
+            cacheReadsPrice: undefined,
+        };
+    }
+    return defaults;
 }
 /**
  * Gets merged default settings for a specific model and provider

package/dist/llm/services/ModelResolver.d.ts CHANGED Viewed

@@ -1,5 +1,6 @@
 import type { LLMFailureResponse, LLMSettings, ModelInfo } from "../types";
 import { PresetManager } from "./PresetManager";
+import { AdapterRegistry } from "./AdapterRegistry";
 /**
  * Options for model selection
  */
@@ -24,12 +25,13 @@ export interface ModelResolution {
  */
 export declare class ModelResolver {
     private presetManager;
-    constructor(presetManager: PresetManager);
+    private adapterRegistry;
+    constructor(presetManager: PresetManager, adapterRegistry: AdapterRegistry);
     /**
      * Resolves model information from either a preset ID or provider/model IDs
      *
      * @param options Options containing either presetId or providerId/modelId
      * @returns Resolved model info and settings or error response
      */
-    resolve(options: ModelSelectionOptions): ModelResolution;
+    resolve(options: ModelSelectionOptions): Promise<ModelResolution>;
 }

package/dist/llm/services/ModelResolver.js CHANGED Viewed

@@ -6,8 +6,9 @@ const config_1 = require("../config");
  * Resolves model information from presets or direct provider/model IDs
  */
 class ModelResolver {
-    constructor(presetManager) {
+    constructor(presetManager, adapterRegistry) {
         this.presetManager = presetManager;
+        this.adapterRegistry = adapterRegistry;
     }
     /**
      * Resolves model information from either a preset ID or provider/model IDs
@@ -15,7 +16,7 @@ class ModelResolver {
      * @param options Options containing either presetId or providerId/modelId
      * @returns Resolved model info and settings or error response
      */
-    resolve(options) {
+    async resolve(options) {
         // If presetId is provided, use it
         if (options.presetId) {
             const preset = this.presetManager.resolvePreset(options.presetId);
@@ -94,15 +95,31 @@ class ModelResolver {
         if (!modelInfo) {
             // Check if provider allows unknown models
             const provider = (0, config_1.getProviderById)(options.providerId);
+            // For llamacpp, try to detect capabilities from the adapter's cache
+            let detectedCapabilities;
+            if (options.providerId === 'llamacpp') {
+                try {
+                    const adapter = this.adapterRegistry.getAdapter('llamacpp');
+                    // Check if adapter has the getModelCapabilities method
+                    if (adapter && typeof adapter.getModelCapabilities === 'function') {
+                        const capabilities = await adapter.getModelCapabilities();
+                        detectedCapabilities = capabilities || undefined;
+                    }
+                }
+                catch (error) {
+                    console.warn('Failed to detect GGUF model capabilities:', error);
+                    // Continue with fallback
+                }
+            }
             if (provider?.allowUnknownModels) {
-                // Flexible provider (e.g., llamacpp) - silent fallback
-                modelInfo = (0, config_1.createFallbackModelInfo)(options.modelId, options.providerId);
+                // Flexible provider (e.g., llamacpp) - silent fallback with detected capabilities
+                modelInfo = (0, config_1.createFallbackModelInfo)(options.modelId, options.providerId, detectedCapabilities);
             }
             else {
                 // Strict provider - warn but allow
                 console.warn(`⚠️  Unknown model "${options.modelId}" for provider "${options.providerId}". ` +
                     `Using default settings. This may fail at the provider API if the model doesn't exist.`);
-                modelInfo = (0, config_1.createFallbackModelInfo)(options.modelId, options.providerId);
+                modelInfo = (0, config_1.createFallbackModelInfo)(options.modelId, options.providerId, detectedCapabilities);
             }
         }
         return {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "genai-lite",
-  "version": "0.4.1",
+  "version": "0.4.3",
   "description": "A lightweight, portable toolkit for interacting with various Generative AI APIs.",
   "main": "dist/index.js",
   "types": "dist/index.d.ts",