npm - lynkr - Versions diffs - 7.2.3 → 7.2.5 - Mend

lynkr 7.2.3 → 7.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/package.json +4 -4
package/src/clients/databricks.js +4 -4
package/src/clients/retry.js +6 -1
package/src/clients/standard-tools.js +6 -2
package/src/config/index.js +6 -0
package/src/headroom/launcher.js +8 -3
package/src/orchestrator/index.js +106 -2
package/src/routing/model-registry.js +437 -0
package/src/tools/smart-selection.js +7 -1
package/src/tools/workspace.js +35 -4
package/src/workspace/index.js +30 -0

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "lynkr",
-  "version": "7.2.3",
+  "version": "7.2.5",
   "description": "Self-hosted Claude Code & Cursor proxy with Databricks,AWS BedRock,Azure  adapters, openrouter, Ollama,llamacpp,LM Studio, workspace tooling, and MCP integration.",
   "main": "index.js",
   "bin": {
@@ -8,9 +8,9 @@
     "lynkr-setup": "./scripts/setup.js"
   },
   "scripts": {
-    "prestart": "docker compose --profile headroom up -d headroom 2>/dev/null || echo 'Headroom container not started (Docker may not be running)'",
+    "prestart": "node -e \"if(process.env.HEADROOM_ENABLED==='true'&&process.env.HEADROOM_DOCKER_ENABLED!=='false'){process.exit(0)}else{process.exit(1)}\" && docker compose --profile headroom up -d headroom 2>/dev/null || echo 'Headroom skipped (disabled or Docker not running)'",
     "start": "node index.js 2>&1 | npx pino-pretty --sync",
-    "stop": "docker compose --profile headroom down",
+    "stop": "node -e \"if(process.env.HEADROOM_ENABLED==='true'&&process.env.HEADROOM_DOCKER_ENABLED!=='false'){process.exit(0)}else{process.exit(1)}\" && docker compose --profile headroom down || echo 'Headroom skipped (disabled or Docker not running)'",
     "dev": "nodemon index.js",
     "lint": "eslint src index.js",
     "test": "npm run test:unit && npm run test:performance",
@@ -49,7 +49,6 @@
     "@babel/traverse": "^7.29.0",
     "compression": "^1.7.4",
     "diff": "^5.2.0",
-    "dockerode": "^4.0.2",
     "dotenv": "^16.4.5",
     "express": "^5.1.0",
     "express-rate-limit": "^8.2.1",
@@ -62,6 +61,7 @@
   },
   "optionalDependencies": {
     "better-sqlite3": "^12.6.2",
+    "dockerode": "^4.0.2",
     "tree-sitter": "^0.21.1",
     "tree-sitter-javascript": "^0.21.0",
     "tree-sitter-python": "^0.21.0",

package/src/clients/databricks.js CHANGED Viewed

@@ -309,7 +309,7 @@ async function invokeOllama(body) {
   }
   const ollamaBody = {
-    model: config.ollama.model,
+    model: body._suggestionModeModel || config.ollama.model,
     messages: deduplicated,
     stream: false,  // Force non-streaming for Ollama - streaming format conversion not yet implemented
     options: {
@@ -410,7 +410,7 @@ async function invokeOpenRouter(body) {
   }
   const openRouterBody = {
-    model: config.openrouter.model,
+    model: body._suggestionModeModel || config.openrouter.model,
     messages,
     temperature: body.temperature ?? 0.7,
     max_tokens: body.max_tokens ?? 4096,
@@ -496,7 +496,7 @@ async function invokeAzureOpenAI(body) {
     max_tokens: Math.min(body.max_tokens ?? 4096, 16384),  // Cap at Azure OpenAI's limit
     top_p: body.top_p ?? 1.0,
     stream: false,  // Force non-streaming for Azure OpenAI - streaming format conversion not yet implemented
-    model: config.azureOpenAI.deployment
+    model: body._suggestionModeModel || config.azureOpenAI.deployment
   };
   // Add tools - inject standard tools if client didn't send any (passthrough mode)
@@ -842,7 +842,7 @@ async function invokeOpenAI(body) {
   }
   const openAIBody = {
-    model: config.openai.model || "gpt-4o",
+    model: body._suggestionModeModel || config.openai.model || "gpt-4o",
     messages,
     temperature: body.temperature ?? 0.7,
     max_tokens: body.max_tokens ?? 4096,

package/src/clients/retry.js CHANGED Viewed

@@ -10,7 +10,7 @@ const DEFAULT_CONFIG = {
   backoffMultiplier: 2,
   jitterFactor: 0.1, // 10% jitter
   retryableStatuses: [429, 500, 502, 503, 504],
-  retryableErrors: ['ECONNRESET', 'ETIMEDOUT', 'ENOTFOUND', 'ENETUNREACH'],
+  retryableErrors: ['ECONNRESET', 'ETIMEDOUT', 'ENOTFOUND', 'ENETUNREACH', 'ECONNREFUSED'],
 };
 /**
@@ -44,6 +44,11 @@ function isRetryable(error, response, config) {
     return true;
   }
+  // Check nested cause (Node undici wraps connection errors as TypeError)
+  if (error && error.cause?.code && config.retryableErrors.includes(error.cause.code)) {
+    return true;
+  }
   // Check for network errors
   if (error && (error.name === 'FetchError' || error.name === 'AbortError')) {
     return true;

package/src/clients/standard-tools.js CHANGED Viewed

@@ -24,13 +24,17 @@ const STANDARD_TOOLS = [
   },
   {
     name: "Read",
-    description: "Reads a file from the local filesystem. You can access any file directly by using this tool.",
+    description: "Reads a file from the local filesystem. You can access any file directly by using this tool. For files outside the workspace, the user must approve access first.",
     input_schema: {
       type: "object",
       properties: {
         file_path: {
           type: "string",
-          description: "Relative path within workspace (e.g., 'config.js', 'src/index.ts'). DO NOT use absolute paths."
+          description: "Path to the file. Use relative paths for workspace files (e.g., 'src/index.ts'). For files outside the workspace use absolute paths or ~ for the home directory (e.g., '~/Documents/notes.md', '/etc/hosts'). Each call reads ONE file only — do not pass multiple paths."
+        },
+        user_approved: {
+          type: "boolean",
+          description: "Set to true ONLY after the user has explicitly approved reading a file outside the workspace. Never set this to true without asking the user first."
         },
         limit: {
           type: "number",

package/src/config/index.js CHANGED Viewed

@@ -136,6 +136,10 @@ const zaiModel = process.env.ZAI_MODEL?.trim() || "GLM-4.7";
 const vertexApiKey = process.env.VERTEX_API_KEY?.trim() || process.env.GOOGLE_API_KEY?.trim() || null;
 const vertexModel = process.env.VERTEX_MODEL?.trim() || "gemini-2.0-flash";
+// Suggestion mode model override
+// Values: "default" (use MODEL_DEFAULT), "none" (skip LLM call), or a model name
+const suggestionModeModel = (process.env.SUGGESTION_MODE_MODEL ?? "default").trim();
 // Hot reload configuration
 const hotReloadEnabled = process.env.HOT_RELOAD_ENABLED !== "false"; // default true
 const hotReloadDebounceMs = Number.parseInt(process.env.HOT_RELOAD_DEBOUNCE_MS ?? "1000", 10);
@@ -596,6 +600,7 @@ var config = {
   modelProvider: {
     type: modelProvider,
     defaultModel,
+    suggestionModeModel,
     // Hybrid routing settings
     preferOllama,
     fallbackEnabled,
@@ -885,6 +890,7 @@ function reloadConfig() {
   config.modelProvider.preferOllama = process.env.PREFER_OLLAMA === "true";
   config.modelProvider.fallbackEnabled = process.env.FALLBACK_ENABLED !== "false";
   config.modelProvider.fallbackProvider = (process.env.FALLBACK_PROVIDER ?? "databricks").toLowerCase();
+  config.modelProvider.suggestionModeModel = (process.env.SUGGESTION_MODE_MODEL ?? "default").trim();
   // Log level
   config.logger.level = process.env.LOG_LEVEL ?? "info";

package/src/headroom/launcher.js CHANGED Viewed

@@ -5,12 +5,17 @@
  * Provides automatic container creation, health checking, and graceful shutdown.
  */
-const Docker = require("dockerode");
+let Docker;
+try {
+  Docker = require("dockerode");
+} catch {
+  Docker = null;
+}
 const logger = require("../logger");
 const config = require("../config");
-// Initialize Docker client
-const docker = new Docker();
+// Initialize Docker client (only if dockerode is available)
+const docker = Docker ? new Docker() : null;
 // Launcher state
 let containerInstance = null;

package/src/orchestrator/index.js CHANGED Viewed

@@ -1234,6 +1234,15 @@ function sanitizePayload(payload) {
     toolCount: clean.tools?.length ?? 0
   }, '[CONTEXT_FLOW] After sanitizePayload');
+  // === Suggestion mode: tag request and override model if configured ===
+  const { isSuggestionMode: isSuggestion } = detectSuggestionMode(clean.messages);
+  clean._requestMode = isSuggestion ? "suggestion" : "main";
+  const smConfig = config.modelProvider?.suggestionModeModel ?? "default";
+  if (isSuggestion && smConfig.toLowerCase() !== "default" && smConfig.toLowerCase() !== "none") {
+    clean.model = smConfig;
+    clean._suggestionModeModel = smConfig;
+  }
   return clean;
 }
@@ -1694,7 +1703,33 @@ IMPORTANT TOOL USAGE RULES:
     });
   }
-  const databricksResponse = await invokeModel(cleanPayload);
+  let databricksResponse;
+  try {
+    databricksResponse = await invokeModel(cleanPayload);
+  } catch (modelError) {
+    const isConnectionError = modelError.cause?.code === 'ECONNREFUSED'
+      || modelError.message?.includes('fetch failed')
+      || modelError.code === 'ECONNREFUSED';
+    if (isConnectionError) {
+      logger.error(`Provider ${providerType} is unreachable (connection refused). Is it running?`);
+      return {
+        response: {
+          status: 503,
+          body: {
+            error: {
+              type: "provider_unreachable",
+              message: `Provider ${providerType} is unreachable. Is the service running?`,
+            },
+          },
+          terminationReason: "provider_unreachable",
+        },
+        steps,
+        durationMs: Date.now() - start,
+        terminationReason: "provider_unreachable",
+      };
+    }
+    throw modelError;
+  }
   // Extract and log actual token usage
   const actualUsage = databricksResponse.ok && config.tokenTracking?.enabled !== false
@@ -1876,11 +1911,26 @@ IMPORTANT TOOL USAGE RULES:
       toolCalls = Array.isArray(message.tool_calls) ? message.tool_calls : [];
     }
+    // Guard: drop hallucinated tool calls when no tools were sent to the model.
+    // Some models (e.g. Llama 3.1) hallucinate tool_call blocks from conversation
+    // history even when the request contained zero tool definitions.
+    const toolsWereSent = Array.isArray(cleanPayload.tools) && cleanPayload.tools.length > 0;
+    if (toolCalls.length > 0 && !toolsWereSent) {
+      logger.warn({
+        sessionId: session?.id ?? null,
+        step: steps,
+        hallucinated: toolCalls.map(tc => tc.function?.name || tc.name),
+        noToolInjection: !!cleanPayload._noToolInjection,
+      }, "Dropped hallucinated tool calls (no tools were sent to model)");
+      toolCalls = [];
+      // If there's also no text content, treat as empty response (handled below)
+    }
     if (toolCalls.length > 0) {
       // Convert OpenAI/OpenRouter format to Anthropic format for session storage
       let sessionContent;
       if (providerType === "azure-anthropic") {
-        // Azure Anthropic already returns content in Anthropic format
+        // Azure Anthropic already returns content in Anthropic
         sessionContent = databricksResponse.json?.content ?? [];
       } else {
         // Convert OpenAI/OpenRouter format to Anthropic content blocks
@@ -3217,6 +3267,34 @@ IMPORTANT TOOL USAGE RULES:
   };
 }
+/**
+ * Detect if the current request is a suggestion mode call.
+ * Scans the last user message for the [SUGGESTION MODE: marker.
+ * @param {Array} messages - The conversation messages
+ * @returns {{ isSuggestionMode: boolean }}
+ */
+function detectSuggestionMode(messages) {
+  if (!Array.isArray(messages) || messages.length === 0) {
+    return { isSuggestionMode: false };
+  }
+  // Scan from the end to find the last user message
+  for (let i = messages.length - 1; i >= 0; i--) {
+    const msg = messages[i];
+    if (msg?.role !== 'user') continue;
+    const content = typeof msg.content === 'string'
+      ? msg.content
+      : Array.isArray(msg.content)
+        ? msg.content.map(b => b.text || '').join(' ')
+        : '';
+    if (content.includes('[SUGGESTION MODE:')) {
+      return { isSuggestionMode: true };
+    }
+    // Only check the last user message
+    break;
+  }
+  return { isSuggestionMode: false };
+}
 async function processMessage({ payload, headers, session, cwd, options = {} }) {
   const requestedModel =
     payload?.model ??
@@ -3226,6 +3304,32 @@ async function processMessage({ payload, headers, session, cwd, options = {} })
     typeof headers?.["anthropic-beta"] === "string" &&
     headers["anthropic-beta"].includes("interleaved-thinking");
+  // === SUGGESTION MODE: Early return when SUGGESTION_MODE_MODEL=none ===
+  const { isSuggestionMode } = detectSuggestionMode(payload?.messages);
+  const suggestionModelConfig = config.modelProvider?.suggestionModeModel ?? "default";
+  if (isSuggestionMode && suggestionModelConfig.toLowerCase() === "none") {
+    logger.info('Suggestion mode: skipping LLM call (SUGGESTION_MODE_MODEL=none)');
+    return {
+      response: {
+        json: {
+          id: `msg_suggestion_skip_${Date.now()}`,
+          type: "message",
+          role: "assistant",
+          content: [{ type: "text", text: "" }],
+          model: requestedModel,
+          stop_reason: "end_turn",
+          stop_sequence: null,
+          usage: { input_tokens: 0, output_tokens: 0 },
+        },
+        ok: true,
+        status: 200,
+      },
+      steps: 0,
+      durationMs: 0,
+      terminationReason: "suggestion_mode_skip",
+    };
+  }
   // === TOOL LOOP GUARD (EARLY CHECK) ===
   // Check BEFORE sanitization since sanitizePayload removes conversation history
   const toolLoopThreshold = config.policy?.toolLoopThreshold ?? 3;

package/src/routing/model-registry.js ADDED Viewed

@@ -0,0 +1,437 @@
+/**
+ * Model Registry
+ * Multi-source pricing: LiteLLM -> models.dev -> Databricks fallback
+ * Caches data locally with 24h TTL
+ */
+const fs = require('fs');
+const path = require('path');
+const logger = require('../logger');
+// API URLs
+const LITELLM_URL = 'https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json';
+const MODELS_DEV_URL = 'https://models.dev/api.json';
+// Cache settings
+const CACHE_FILE = path.join(__dirname, '../../data/model-prices-cache.json');
+const CACHE_TTL_MS = 24 * 60 * 60 * 1000; // 24 hours
+// Databricks fallback pricing (based on Anthropic direct API prices)
+const DATABRICKS_FALLBACK = {
+  // Claude models
+  'databricks-claude-opus-4-6': { input: 5.0, output: 25.0, context: 1000000 },
+  'databricks-claude-opus-4-5': { input: 5.0, output: 25.0, context: 200000 },
+  'databricks-claude-opus-4-1': { input: 15.0, output: 75.0, context: 200000 },
+  'databricks-claude-sonnet-4-5': { input: 3.0, output: 15.0, context: 200000 },
+  'databricks-claude-sonnet-4': { input: 3.0, output: 15.0, context: 200000 },
+  'databricks-claude-3-7-sonnet': { input: 3.0, output: 15.0, context: 200000 },
+  'databricks-claude-haiku-4-5': { input: 1.0, output: 5.0, context: 200000 },
+  // Llama models
+  'databricks-llama-4-maverick': { input: 1.0, output: 1.0, context: 128000 },
+  'databricks-meta-llama-3-3-70b-instruct': { input: 0.9, output: 0.9, context: 128000 },
+  'databricks-meta-llama-3-1-405b-instruct': { input: 2.0, output: 2.0, context: 128000 },
+  'databricks-meta-llama-3-1-8b-instruct': { input: 0.2, output: 0.2, context: 128000 },
+  // GPT models via Databricks
+  'databricks-gpt-5-2': { input: 5.0, output: 15.0, context: 200000 },
+  'databricks-gpt-5-1': { input: 3.0, output: 12.0, context: 200000 },
+  'databricks-gpt-5': { input: 2.5, output: 10.0, context: 128000 },
+  'databricks-gpt-5-mini': { input: 0.5, output: 1.5, context: 128000 },
+  'databricks-gpt-5-nano': { input: 0.15, output: 0.6, context: 128000 },
+  // Gemini models via Databricks
+  'databricks-gemini-3-flash': { input: 0.075, output: 0.3, context: 1000000 },
+  'databricks-gemini-3-pro': { input: 1.25, output: 5.0, context: 2000000 },
+  'databricks-gemini-2-5-pro': { input: 1.25, output: 5.0, context: 1000000 },
+  'databricks-gemini-2-5-flash': { input: 0.075, output: 0.3, context: 1000000 },
+  // DBRX
+  'databricks-dbrx-instruct': { input: 0.75, output: 2.25, context: 32000 },
+  // Embedding models (price per 1M tokens)
+  'databricks-gte-large-en': { input: 0.02, output: 0, context: 8192 },
+  'databricks-bge-large-en': { input: 0.02, output: 0, context: 512 },
+};
+// Default cost for unknown models
+const DEFAULT_COST = { input: 1.0, output: 3.0, context: 128000 };
+class ModelRegistry {
+  constructor() {
+    this.litellmPrices = {};
+    this.modelsDevPrices = {};
+    this.loaded = false;
+    this.lastFetch = 0;
+    this.modelIndex = new Map();
+  }
+  /**
+   * Initialize registry - load from cache or fetch fresh data
+   */
+  async initialize() {
+    if (this.loaded) return;
+    // Try cache first
+    if (this._loadFromCache()) {
+      this.loaded = true;
+      // Background refresh if stale
+      if (Date.now() - this.lastFetch > CACHE_TTL_MS) {
+        this._fetchAll().catch(err =>
+          logger.warn({ err: err.message }, '[ModelRegistry] Background refresh failed')
+        );
+      }
+      return;
+    }
+    // Fetch fresh data
+    await this._fetchAll();
+    this.loaded = true;
+  }
+  /**
+   * Fetch from both sources
+   */
+  async _fetchAll() {
+    const results = await Promise.allSettled([
+      this._fetchLiteLLM(),
+      this._fetchModelsDev(),
+    ]);
+    const litellmOk = results[0].status === 'fulfilled';
+    const modelsDevOk = results[1].status === 'fulfilled';
+    if (litellmOk || modelsDevOk) {
+      this._buildIndex();
+      this._saveToCache();
+      this.lastFetch = Date.now();
+      logger.info({
+        litellm: litellmOk ? Object.keys(this.litellmPrices).length : 0,
+        modelsDev: modelsDevOk ? Object.keys(this.modelsDevPrices).length : 0,
+        total: this.modelIndex.size,
+      }, '[ModelRegistry] Loaded pricing data');
+    } else {
+      logger.warn('[ModelRegistry] All sources failed, using Databricks fallback only');
+    }
+  }
+  /**
+   * Fetch LiteLLM pricing
+   */
+  async _fetchLiteLLM() {
+    try {
+      const response = await fetch(LITELLM_URL, {
+        signal: AbortSignal.timeout(15000),
+        headers: { 'Accept': 'application/json' },
+      });
+      if (!response.ok) throw new Error(`HTTP ${response.status}`);
+      const data = await response.json();
+      this.litellmPrices = this._processLiteLLM(data);
+      logger.debug({ count: Object.keys(this.litellmPrices).length }, '[ModelRegistry] LiteLLM loaded');
+    } catch (err) {
+      logger.warn({ err: err.message }, '[ModelRegistry] LiteLLM fetch failed');
+      throw err;
+    }
+  }
+  /**
+   * Process LiteLLM format into our format
+   * LiteLLM uses cost per token, we use cost per 1M tokens
+   */
+  _processLiteLLM(data) {
+    const prices = {};
+    for (const [modelId, info] of Object.entries(data)) {
+      if (!info || typeof info !== 'object') continue;
+      // Convert per-token to per-million-tokens
+      const inputCost = (info.input_cost_per_token || 0) * 1_000_000;
+      const outputCost = (info.output_cost_per_token || 0) * 1_000_000;
+      prices[modelId.toLowerCase()] = {
+        input: inputCost,
+        output: outputCost,
+        context: info.max_input_tokens || info.max_tokens || 128000,
+        maxOutput: info.max_output_tokens || 4096,
+        toolCall: info.supports_function_calling ?? true,
+        vision: info.supports_vision ?? false,
+        source: 'litellm',
+      };
+      // Also index without provider prefix for flexible lookup
+      const shortName = modelId.split('/').pop().toLowerCase();
+      if (shortName !== modelId.toLowerCase()) {
+        prices[shortName] = prices[modelId.toLowerCase()];
+      }
+    }
+    return prices;
+  }
+  /**
+   * Fetch models.dev pricing
+   */
+  async _fetchModelsDev() {
+    try {
+      const response = await fetch(MODELS_DEV_URL, {
+        signal: AbortSignal.timeout(15000),
+        headers: { 'Accept': 'application/json' },
+      });
+      if (!response.ok) throw new Error(`HTTP ${response.status}`);
+      const data = await response.json();
+      this.modelsDevPrices = this._processModelsDev(data);
+      logger.debug({ count: Object.keys(this.modelsDevPrices).length }, '[ModelRegistry] models.dev loaded');
+    } catch (err) {
+      logger.warn({ err: err.message }, '[ModelRegistry] models.dev fetch failed');
+      throw err;
+    }
+  }
+  /**
+   * Process models.dev format into our format
+   */
+  _processModelsDev(data) {
+    const prices = {};
+    for (const [providerId, providerData] of Object.entries(data)) {
+      if (!providerData?.models) continue;
+      for (const [modelId, info] of Object.entries(providerData.models)) {
+        const fullId = `${providerId}/${modelId}`.toLowerCase();
+        prices[fullId] = {
+          input: info.cost?.input || 0,
+          output: info.cost?.output || 0,
+          cacheRead: info.cost?.cache_read,
+          cacheWrite: info.cost?.cache_write,
+          context: info.context || 128000,
+          maxOutput: info.output || 4096,
+          toolCall: info.tool_call ?? false,
+          reasoning: info.reasoning ?? false,
+          vision: Array.isArray(info.input) && info.input.includes('image'),
+          source: 'models.dev',
+        };
+        // Also index by short name
+        prices[modelId.toLowerCase()] = prices[fullId];
+      }
+    }
+    return prices;
+  }
+  /**
+   * Build unified index from all sources
+   */
+  _buildIndex() {
+    this.modelIndex.clear();
+    // Add Databricks fallback first (lowest priority)
+    for (const [modelId, info] of Object.entries(DATABRICKS_FALLBACK)) {
+      this.modelIndex.set(modelId.toLowerCase(), { ...info, source: 'databricks-fallback' });
+    }
+    // Add models.dev (medium priority)
+    for (const [modelId, info] of Object.entries(this.modelsDevPrices)) {
+      this.modelIndex.set(modelId, info);
+    }
+    // Add LiteLLM (highest priority)
+    for (const [modelId, info] of Object.entries(this.litellmPrices)) {
+      this.modelIndex.set(modelId, info);
+    }
+  }
+  /**
+   * Get cost for a model
+   * @param {string} modelName - Model name/ID
+   * @returns {Object} Cost info { input, output, context, ... }
+   */
+  getCost(modelName) {
+    if (!modelName) return { ...DEFAULT_COST, source: 'default' };
+    const normalizedName = modelName.toLowerCase();
+    // Direct lookup
+    if (this.modelIndex.has(normalizedName)) {
+      return this.modelIndex.get(normalizedName);
+    }
+    // Try common variations
+    const variations = [
+      normalizedName,
+      normalizedName.replace('databricks-', ''),
+      normalizedName.replace('azure/', ''),
+      normalizedName.replace('bedrock/', ''),
+      normalizedName.replace('anthropic.', ''),
+      normalizedName.split('/').pop(),
+    ];
+    for (const variant of variations) {
+      if (this.modelIndex.has(variant)) {
+        return this.modelIndex.get(variant);
+      }
+    }
+    // Fuzzy match for partial names
+    for (const [key, value] of this.modelIndex.entries()) {
+      if (key.includes(normalizedName) || normalizedName.includes(key)) {
+        return value;
+      }
+    }
+    logger.debug({ model: modelName }, '[ModelRegistry] Model not found, using default');
+    return { ...DEFAULT_COST, source: 'default' };
+  }
+  /**
+   * Get model info by name
+   */
+  getModel(modelName) {
+    return this.getCost(modelName);
+  }
+  /**
+   * Check if model is free (local)
+   */
+  isFree(modelName) {
+    const cost = this.getCost(modelName);
+    return cost.input === 0 && cost.output === 0;
+  }
+  /**
+   * Check if model supports tool calling
+   */
+  supportsTools(modelName) {
+    const model = this.getCost(modelName);
+    return model.toolCall === true;
+  }
+  /**
+   * Find models matching criteria
+   */
+  findModels(criteria = {}) {
+    const results = [];
+    for (const [modelId, info] of this.modelIndex.entries()) {
+      if (criteria.maxInputCost && info.input > criteria.maxInputCost) continue;
+      if (criteria.minContext && info.context < criteria.minContext) continue;
+      if (criteria.toolCall && !info.toolCall) continue;
+      if (criteria.reasoning && !info.reasoning) continue;
+      if (criteria.vision && !info.vision) continue;
+      results.push({ modelId, ...info });
+    }
+    // Sort by input cost ascending
+    return results.sort((a, b) => a.input - b.input);
+  }
+  /**
+   * Get stats for metrics endpoint
+   */
+  getStats() {
+    const sources = { litellm: 0, 'models.dev': 0, 'databricks-fallback': 0, default: 0 };
+    for (const info of this.modelIndex.values()) {
+      const source = info.source || 'default';
+      sources[source] = (sources[source] || 0) + 1;
+    }
+    return {
+      totalModels: this.modelIndex.size,
+      bySource: sources,
+      lastFetch: this.lastFetch,
+      cacheAge: this.lastFetch ? Date.now() - this.lastFetch : null,
+      cacheTTL: CACHE_TTL_MS,
+    };
+  }
+  /**
+   * Force refresh from APIs
+   */
+  async refresh() {
+    await this._fetchAll();
+  }
+  // Cache management
+  _loadFromCache() {
+    try {
+      if (!fs.existsSync(CACHE_FILE)) return false;
+      const cache = JSON.parse(fs.readFileSync(CACHE_FILE, 'utf8'));
+      this.litellmPrices = cache.litellm || {};
+      this.modelsDevPrices = cache.modelsDev || {};
+      this.lastFetch = cache.timestamp || 0;
+      this._buildIndex();
+      logger.debug({
+        age: Math.round((Date.now() - this.lastFetch) / 60000) + 'min',
+        models: this.modelIndex.size,
+      }, '[ModelRegistry] Loaded from cache');
+      return true;
+    } catch (err) {
+      logger.debug({ err: err.message }, '[ModelRegistry] Cache load failed');
+      return false;
+    }
+  }
+  _saveToCache() {
+    try {
+      const dir = path.dirname(CACHE_FILE);
+      if (!fs.existsSync(dir)) {
+        fs.mkdirSync(dir, { recursive: true });
+      }
+      const cache = {
+        litellm: this.litellmPrices,
+        modelsDev: this.modelsDevPrices,
+        timestamp: Date.now(),
+      };
+      fs.writeFileSync(CACHE_FILE, JSON.stringify(cache, null, 2));
+      logger.debug('[ModelRegistry] Cache saved');
+    } catch (err) {
+      logger.warn({ err: err.message }, '[ModelRegistry] Cache save failed');
+    }
+  }
+}
+// Singleton with lazy initialization
+let instance = null;
+async function getModelRegistry() {
+  if (!instance) {
+    instance = new ModelRegistry();
+    await instance.initialize();
+  }
+  return instance;
+}
+// Sync getter (uses cache only, no network)
+function getModelRegistrySync() {
+  if (!instance) {
+    instance = new ModelRegistry();
+    instance._loadFromCache();
+    instance._buildIndex();
+    instance.loaded = true;
+  }
+  return instance;
+}
+module.exports = {
+  ModelRegistry,
+  getModelRegistry,
+  getModelRegistrySync,
+  DATABRICKS_FALLBACK,
+  DEFAULT_COST,
+};

package/src/tools/smart-selection.js CHANGED Viewed

@@ -9,6 +9,9 @@
 const logger = require('../logger');
+// Strip system-reminder blocks injected by the CLI before classification
+const SYSTEM_REMINDER_PATTERN = /<system-reminder>[\s\S]*?<\/system-reminder>/g;
 // Pre-compiled regex patterns for performance (avoid recompiling on every request)
 const GREETING_PATTERN = /^(hi|hello|hey|good morning|good afternoon|good evening|howdy|greetings|sup|yo)[\s\.\!\?]*$/i;
 const QUESTION_PATTERN = /^(what is|what's|how does|when|where|why|explain|define|tell me about|can you explain)/i;
@@ -190,7 +193,10 @@ function classifyRequestType(payload) {
     return { type: 'coding', confidence: 0.5, keywords: [] };
   }
-  const content = extractContent(lastMessage);
+  const rawContent = extractContent(lastMessage);
+  // Strip <system-reminder> blocks before classification to prevent
+  // CLI-injected keywords (search, explain, documentation) from polluting results
+  const content = rawContent.replace(SYSTEM_REMINDER_PATTERN, '').trim();
   const contentLower = content.toLowerCase();
   const messageCount = payload.messages?.length ?? 0;

package/src/tools/workspace.js CHANGED Viewed

@@ -1,8 +1,12 @@
+const path = require("path");
 const {
   readFile,
   writeFile,
   applyFilePatch,
   resolveWorkspacePath,
+  expandTilde,
+  isExternalPath,
+  readExternalFile,
   fileExists,
   workspaceRoot,
 } = require("../workspace");
@@ -30,17 +34,44 @@ function registerWorkspaceTools() {
   registerTool(
     "fs_read",
     async ({ args = {} }) => {
-      const relativePath = validateString(args.path ?? args.file, "path");
+      const targetPath = validateString(args.path ?? args.file ?? args.file_path, "path");
       const encoding = normalizeEncoding(args.encoding);
-      const content = await readFile(relativePath, encoding);
+      // Check if path is outside workspace
+      if (isExternalPath(targetPath)) {
+        if (args.user_approved !== true) {
+          const expanded = expandTilde(targetPath);
+          const resolved = path.resolve(expanded);
+          return {
+            ok: false,
+            status: 403,
+            content: JSON.stringify({
+              error: "external_path_requires_approval",
+              message: `The file "${targetPath}" resolves to "${resolved}" which is outside the workspace. You MUST ask the user for permission before reading this file. If the user approves, call this tool again with the same path and set user_approved to true.`,
+              resolved_path: resolved,
+            }),
+          };
+        }
+        // User approved — read external file
+        const { content, resolvedPath } = await readExternalFile(targetPath, encoding);
+        return {
+          ok: true,
+          status: 200,
+          content,
+          metadata: { path: targetPath, encoding, resolved_path: resolvedPath },
+        };
+      }
+      // Normal workspace read (unchanged)
+      const content = await readFile(targetPath, encoding);
       return {
         ok: true,
         status: 200,
         content,
         metadata: {
-          path: relativePath,
+          path: targetPath,
           encoding,
-          resolved_path: resolveWorkspacePath(relativePath),
+          resolved_path: resolveWorkspacePath(targetPath),
         },
       };
     },

package/src/workspace/index.js CHANGED Viewed

@@ -10,6 +10,33 @@ if (!fs.existsSync(workspaceRoot)) {
   fs.mkdirSync(workspaceRoot, { recursive: true });
 }
+function expandTilde(targetPath) {
+  if (typeof targetPath !== "string") return targetPath;
+  if (targetPath.startsWith("~")) {
+    const home = process.env.HOME || process.env.USERPROFILE;
+    if (home) {
+      return path.join(home, targetPath.slice(1));
+    }
+  }
+  return targetPath;
+}
+function isExternalPath(targetPath) {
+  const expanded = expandTilde(targetPath);
+  const resolved = path.resolve(workspaceRoot, expanded);
+  return !resolved.startsWith(workspaceRoot);
+}
+async function readExternalFile(targetPath, encoding = "utf8") {
+  const expanded = expandTilde(targetPath);
+  const resolved = path.resolve(expanded);
+  const stats = await fsp.stat(resolved);
+  if (!stats.isFile()) {
+    throw new Error("Requested path is not a file.");
+  }
+  return { content: await fsp.readFile(resolved, { encoding }), resolvedPath: resolved };
+}
 function resolveWorkspacePath(targetPath) {
   if (!targetPath || typeof targetPath !== "string") {
     throw new Error("Path must be a non-empty string.");
@@ -110,6 +137,9 @@ function validateCwd(cwd) {
 module.exports = {
   workspaceRoot,
   resolveWorkspacePath,
+  expandTilde,
+  isExternalPath,
+  readExternalFile,
   readFile,
   writeFile,
   fileExists,