lynkr 7.2.3 → 7.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "lynkr",
3
- "version": "7.2.3",
3
+ "version": "7.2.5",
4
4
  "description": "Self-hosted Claude Code & Cursor proxy with Databricks,AWS BedRock,Azure adapters, openrouter, Ollama,llamacpp,LM Studio, workspace tooling, and MCP integration.",
5
5
  "main": "index.js",
6
6
  "bin": {
@@ -8,9 +8,9 @@
8
8
  "lynkr-setup": "./scripts/setup.js"
9
9
  },
10
10
  "scripts": {
11
- "prestart": "docker compose --profile headroom up -d headroom 2>/dev/null || echo 'Headroom container not started (Docker may not be running)'",
11
+ "prestart": "node -e \"if(process.env.HEADROOM_ENABLED==='true'&&process.env.HEADROOM_DOCKER_ENABLED!=='false'){process.exit(0)}else{process.exit(1)}\" && docker compose --profile headroom up -d headroom 2>/dev/null || echo 'Headroom skipped (disabled or Docker not running)'",
12
12
  "start": "node index.js 2>&1 | npx pino-pretty --sync",
13
- "stop": "docker compose --profile headroom down",
13
+ "stop": "node -e \"if(process.env.HEADROOM_ENABLED==='true'&&process.env.HEADROOM_DOCKER_ENABLED!=='false'){process.exit(0)}else{process.exit(1)}\" && docker compose --profile headroom down || echo 'Headroom skipped (disabled or Docker not running)'",
14
14
  "dev": "nodemon index.js",
15
15
  "lint": "eslint src index.js",
16
16
  "test": "npm run test:unit && npm run test:performance",
@@ -49,7 +49,6 @@
49
49
  "@babel/traverse": "^7.29.0",
50
50
  "compression": "^1.7.4",
51
51
  "diff": "^5.2.0",
52
- "dockerode": "^4.0.2",
53
52
  "dotenv": "^16.4.5",
54
53
  "express": "^5.1.0",
55
54
  "express-rate-limit": "^8.2.1",
@@ -62,6 +61,7 @@
62
61
  },
63
62
  "optionalDependencies": {
64
63
  "better-sqlite3": "^12.6.2",
64
+ "dockerode": "^4.0.2",
65
65
  "tree-sitter": "^0.21.1",
66
66
  "tree-sitter-javascript": "^0.21.0",
67
67
  "tree-sitter-python": "^0.21.0",
@@ -309,7 +309,7 @@ async function invokeOllama(body) {
309
309
  }
310
310
 
311
311
  const ollamaBody = {
312
- model: config.ollama.model,
312
+ model: body._suggestionModeModel || config.ollama.model,
313
313
  messages: deduplicated,
314
314
  stream: false, // Force non-streaming for Ollama - streaming format conversion not yet implemented
315
315
  options: {
@@ -410,7 +410,7 @@ async function invokeOpenRouter(body) {
410
410
  }
411
411
 
412
412
  const openRouterBody = {
413
- model: config.openrouter.model,
413
+ model: body._suggestionModeModel || config.openrouter.model,
414
414
  messages,
415
415
  temperature: body.temperature ?? 0.7,
416
416
  max_tokens: body.max_tokens ?? 4096,
@@ -496,7 +496,7 @@ async function invokeAzureOpenAI(body) {
496
496
  max_tokens: Math.min(body.max_tokens ?? 4096, 16384), // Cap at Azure OpenAI's limit
497
497
  top_p: body.top_p ?? 1.0,
498
498
  stream: false, // Force non-streaming for Azure OpenAI - streaming format conversion not yet implemented
499
- model: config.azureOpenAI.deployment
499
+ model: body._suggestionModeModel || config.azureOpenAI.deployment
500
500
  };
501
501
 
502
502
  // Add tools - inject standard tools if client didn't send any (passthrough mode)
@@ -842,7 +842,7 @@ async function invokeOpenAI(body) {
842
842
  }
843
843
 
844
844
  const openAIBody = {
845
- model: config.openai.model || "gpt-4o",
845
+ model: body._suggestionModeModel || config.openai.model || "gpt-4o",
846
846
  messages,
847
847
  temperature: body.temperature ?? 0.7,
848
848
  max_tokens: body.max_tokens ?? 4096,
@@ -10,7 +10,7 @@ const DEFAULT_CONFIG = {
10
10
  backoffMultiplier: 2,
11
11
  jitterFactor: 0.1, // 10% jitter
12
12
  retryableStatuses: [429, 500, 502, 503, 504],
13
- retryableErrors: ['ECONNRESET', 'ETIMEDOUT', 'ENOTFOUND', 'ENETUNREACH'],
13
+ retryableErrors: ['ECONNRESET', 'ETIMEDOUT', 'ENOTFOUND', 'ENETUNREACH', 'ECONNREFUSED'],
14
14
  };
15
15
 
16
16
  /**
@@ -44,6 +44,11 @@ function isRetryable(error, response, config) {
44
44
  return true;
45
45
  }
46
46
 
47
+ // Check nested cause (Node undici wraps connection errors as TypeError)
48
+ if (error && error.cause?.code && config.retryableErrors.includes(error.cause.code)) {
49
+ return true;
50
+ }
51
+
47
52
  // Check for network errors
48
53
  if (error && (error.name === 'FetchError' || error.name === 'AbortError')) {
49
54
  return true;
@@ -24,13 +24,17 @@ const STANDARD_TOOLS = [
24
24
  },
25
25
  {
26
26
  name: "Read",
27
- description: "Reads a file from the local filesystem. You can access any file directly by using this tool.",
27
+ description: "Reads a file from the local filesystem. You can access any file directly by using this tool. For files outside the workspace, the user must approve access first.",
28
28
  input_schema: {
29
29
  type: "object",
30
30
  properties: {
31
31
  file_path: {
32
32
  type: "string",
33
- description: "Relative path within workspace (e.g., 'config.js', 'src/index.ts'). DO NOT use absolute paths."
33
+ description: "Path to the file. Use relative paths for workspace files (e.g., 'src/index.ts'). For files outside the workspace use absolute paths or ~ for the home directory (e.g., '~/Documents/notes.md', '/etc/hosts'). Each call reads ONE file only — do not pass multiple paths."
34
+ },
35
+ user_approved: {
36
+ type: "boolean",
37
+ description: "Set to true ONLY after the user has explicitly approved reading a file outside the workspace. Never set this to true without asking the user first."
34
38
  },
35
39
  limit: {
36
40
  type: "number",
@@ -136,6 +136,10 @@ const zaiModel = process.env.ZAI_MODEL?.trim() || "GLM-4.7";
136
136
  const vertexApiKey = process.env.VERTEX_API_KEY?.trim() || process.env.GOOGLE_API_KEY?.trim() || null;
137
137
  const vertexModel = process.env.VERTEX_MODEL?.trim() || "gemini-2.0-flash";
138
138
 
139
+ // Suggestion mode model override
140
+ // Values: "default" (use MODEL_DEFAULT), "none" (skip LLM call), or a model name
141
+ const suggestionModeModel = (process.env.SUGGESTION_MODE_MODEL ?? "default").trim();
142
+
139
143
  // Hot reload configuration
140
144
  const hotReloadEnabled = process.env.HOT_RELOAD_ENABLED !== "false"; // default true
141
145
  const hotReloadDebounceMs = Number.parseInt(process.env.HOT_RELOAD_DEBOUNCE_MS ?? "1000", 10);
@@ -596,6 +600,7 @@ var config = {
596
600
  modelProvider: {
597
601
  type: modelProvider,
598
602
  defaultModel,
603
+ suggestionModeModel,
599
604
  // Hybrid routing settings
600
605
  preferOllama,
601
606
  fallbackEnabled,
@@ -885,6 +890,7 @@ function reloadConfig() {
885
890
  config.modelProvider.preferOllama = process.env.PREFER_OLLAMA === "true";
886
891
  config.modelProvider.fallbackEnabled = process.env.FALLBACK_ENABLED !== "false";
887
892
  config.modelProvider.fallbackProvider = (process.env.FALLBACK_PROVIDER ?? "databricks").toLowerCase();
893
+ config.modelProvider.suggestionModeModel = (process.env.SUGGESTION_MODE_MODEL ?? "default").trim();
888
894
 
889
895
  // Log level
890
896
  config.logger.level = process.env.LOG_LEVEL ?? "info";
@@ -5,12 +5,17 @@
5
5
  * Provides automatic container creation, health checking, and graceful shutdown.
6
6
  */
7
7
 
8
- const Docker = require("dockerode");
8
+ let Docker;
9
+ try {
10
+ Docker = require("dockerode");
11
+ } catch {
12
+ Docker = null;
13
+ }
9
14
  const logger = require("../logger");
10
15
  const config = require("../config");
11
16
 
12
- // Initialize Docker client
13
- const docker = new Docker();
17
+ // Initialize Docker client (only if dockerode is available)
18
+ const docker = Docker ? new Docker() : null;
14
19
 
15
20
  // Launcher state
16
21
  let containerInstance = null;
@@ -1234,6 +1234,15 @@ function sanitizePayload(payload) {
1234
1234
  toolCount: clean.tools?.length ?? 0
1235
1235
  }, '[CONTEXT_FLOW] After sanitizePayload');
1236
1236
 
1237
+ // === Suggestion mode: tag request and override model if configured ===
1238
+ const { isSuggestionMode: isSuggestion } = detectSuggestionMode(clean.messages);
1239
+ clean._requestMode = isSuggestion ? "suggestion" : "main";
1240
+ const smConfig = config.modelProvider?.suggestionModeModel ?? "default";
1241
+ if (isSuggestion && smConfig.toLowerCase() !== "default" && smConfig.toLowerCase() !== "none") {
1242
+ clean.model = smConfig;
1243
+ clean._suggestionModeModel = smConfig;
1244
+ }
1245
+
1237
1246
  return clean;
1238
1247
  }
1239
1248
 
@@ -1694,7 +1703,33 @@ IMPORTANT TOOL USAGE RULES:
1694
1703
  });
1695
1704
  }
1696
1705
 
1697
- const databricksResponse = await invokeModel(cleanPayload);
1706
+ let databricksResponse;
1707
+ try {
1708
+ databricksResponse = await invokeModel(cleanPayload);
1709
+ } catch (modelError) {
1710
+ const isConnectionError = modelError.cause?.code === 'ECONNREFUSED'
1711
+ || modelError.message?.includes('fetch failed')
1712
+ || modelError.code === 'ECONNREFUSED';
1713
+ if (isConnectionError) {
1714
+ logger.error(`Provider ${providerType} is unreachable (connection refused). Is it running?`);
1715
+ return {
1716
+ response: {
1717
+ status: 503,
1718
+ body: {
1719
+ error: {
1720
+ type: "provider_unreachable",
1721
+ message: `Provider ${providerType} is unreachable. Is the service running?`,
1722
+ },
1723
+ },
1724
+ terminationReason: "provider_unreachable",
1725
+ },
1726
+ steps,
1727
+ durationMs: Date.now() - start,
1728
+ terminationReason: "provider_unreachable",
1729
+ };
1730
+ }
1731
+ throw modelError;
1732
+ }
1698
1733
 
1699
1734
  // Extract and log actual token usage
1700
1735
  const actualUsage = databricksResponse.ok && config.tokenTracking?.enabled !== false
@@ -1876,11 +1911,26 @@ IMPORTANT TOOL USAGE RULES:
1876
1911
  toolCalls = Array.isArray(message.tool_calls) ? message.tool_calls : [];
1877
1912
  }
1878
1913
 
1914
+ // Guard: drop hallucinated tool calls when no tools were sent to the model.
1915
+ // Some models (e.g. Llama 3.1) hallucinate tool_call blocks from conversation
1916
+ // history even when the request contained zero tool definitions.
1917
+ const toolsWereSent = Array.isArray(cleanPayload.tools) && cleanPayload.tools.length > 0;
1918
+ if (toolCalls.length > 0 && !toolsWereSent) {
1919
+ logger.warn({
1920
+ sessionId: session?.id ?? null,
1921
+ step: steps,
1922
+ hallucinated: toolCalls.map(tc => tc.function?.name || tc.name),
1923
+ noToolInjection: !!cleanPayload._noToolInjection,
1924
+ }, "Dropped hallucinated tool calls (no tools were sent to model)");
1925
+ toolCalls = [];
1926
+ // If there's also no text content, treat as empty response (handled below)
1927
+ }
1928
+
1879
1929
  if (toolCalls.length > 0) {
1880
1930
  // Convert OpenAI/OpenRouter format to Anthropic format for session storage
1881
1931
  let sessionContent;
1882
1932
  if (providerType === "azure-anthropic") {
1883
- // Azure Anthropic already returns content in Anthropic format
1933
+ // Azure Anthropic already returns content in Anthropic
1884
1934
  sessionContent = databricksResponse.json?.content ?? [];
1885
1935
  } else {
1886
1936
  // Convert OpenAI/OpenRouter format to Anthropic content blocks
@@ -3217,6 +3267,34 @@ IMPORTANT TOOL USAGE RULES:
3217
3267
  };
3218
3268
  }
3219
3269
 
3270
+ /**
3271
+ * Detect if the current request is a suggestion mode call.
3272
+ * Scans the last user message for the [SUGGESTION MODE: marker.
3273
+ * @param {Array} messages - The conversation messages
3274
+ * @returns {{ isSuggestionMode: boolean }}
3275
+ */
3276
+ function detectSuggestionMode(messages) {
3277
+ if (!Array.isArray(messages) || messages.length === 0) {
3278
+ return { isSuggestionMode: false };
3279
+ }
3280
+ // Scan from the end to find the last user message
3281
+ for (let i = messages.length - 1; i >= 0; i--) {
3282
+ const msg = messages[i];
3283
+ if (msg?.role !== 'user') continue;
3284
+ const content = typeof msg.content === 'string'
3285
+ ? msg.content
3286
+ : Array.isArray(msg.content)
3287
+ ? msg.content.map(b => b.text || '').join(' ')
3288
+ : '';
3289
+ if (content.includes('[SUGGESTION MODE:')) {
3290
+ return { isSuggestionMode: true };
3291
+ }
3292
+ // Only check the last user message
3293
+ break;
3294
+ }
3295
+ return { isSuggestionMode: false };
3296
+ }
3297
+
3220
3298
  async function processMessage({ payload, headers, session, cwd, options = {} }) {
3221
3299
  const requestedModel =
3222
3300
  payload?.model ??
@@ -3226,6 +3304,32 @@ async function processMessage({ payload, headers, session, cwd, options = {} })
3226
3304
  typeof headers?.["anthropic-beta"] === "string" &&
3227
3305
  headers["anthropic-beta"].includes("interleaved-thinking");
3228
3306
 
3307
+ // === SUGGESTION MODE: Early return when SUGGESTION_MODE_MODEL=none ===
3308
+ const { isSuggestionMode } = detectSuggestionMode(payload?.messages);
3309
+ const suggestionModelConfig = config.modelProvider?.suggestionModeModel ?? "default";
3310
+ if (isSuggestionMode && suggestionModelConfig.toLowerCase() === "none") {
3311
+ logger.info('Suggestion mode: skipping LLM call (SUGGESTION_MODE_MODEL=none)');
3312
+ return {
3313
+ response: {
3314
+ json: {
3315
+ id: `msg_suggestion_skip_${Date.now()}`,
3316
+ type: "message",
3317
+ role: "assistant",
3318
+ content: [{ type: "text", text: "" }],
3319
+ model: requestedModel,
3320
+ stop_reason: "end_turn",
3321
+ stop_sequence: null,
3322
+ usage: { input_tokens: 0, output_tokens: 0 },
3323
+ },
3324
+ ok: true,
3325
+ status: 200,
3326
+ },
3327
+ steps: 0,
3328
+ durationMs: 0,
3329
+ terminationReason: "suggestion_mode_skip",
3330
+ };
3331
+ }
3332
+
3229
3333
  // === TOOL LOOP GUARD (EARLY CHECK) ===
3230
3334
  // Check BEFORE sanitization since sanitizePayload removes conversation history
3231
3335
  const toolLoopThreshold = config.policy?.toolLoopThreshold ?? 3;
@@ -0,0 +1,437 @@
1
+ /**
2
+ * Model Registry
3
+ * Multi-source pricing: LiteLLM -> models.dev -> Databricks fallback
4
+ * Caches data locally with 24h TTL
5
+ */
6
+
7
+ const fs = require('fs');
8
+ const path = require('path');
9
+ const logger = require('../logger');
10
+
11
+ // API URLs
12
+ const LITELLM_URL = 'https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json';
13
+ const MODELS_DEV_URL = 'https://models.dev/api.json';
14
+
15
+ // Cache settings
16
+ const CACHE_FILE = path.join(__dirname, '../../data/model-prices-cache.json');
17
+ const CACHE_TTL_MS = 24 * 60 * 60 * 1000; // 24 hours
18
+
19
+ // Databricks fallback pricing (based on Anthropic direct API prices)
20
+ const DATABRICKS_FALLBACK = {
21
+ // Claude models
22
+ 'databricks-claude-opus-4-6': { input: 5.0, output: 25.0, context: 1000000 },
23
+ 'databricks-claude-opus-4-5': { input: 5.0, output: 25.0, context: 200000 },
24
+ 'databricks-claude-opus-4-1': { input: 15.0, output: 75.0, context: 200000 },
25
+ 'databricks-claude-sonnet-4-5': { input: 3.0, output: 15.0, context: 200000 },
26
+ 'databricks-claude-sonnet-4': { input: 3.0, output: 15.0, context: 200000 },
27
+ 'databricks-claude-3-7-sonnet': { input: 3.0, output: 15.0, context: 200000 },
28
+ 'databricks-claude-haiku-4-5': { input: 1.0, output: 5.0, context: 200000 },
29
+
30
+ // Llama models
31
+ 'databricks-llama-4-maverick': { input: 1.0, output: 1.0, context: 128000 },
32
+ 'databricks-meta-llama-3-3-70b-instruct': { input: 0.9, output: 0.9, context: 128000 },
33
+ 'databricks-meta-llama-3-1-405b-instruct': { input: 2.0, output: 2.0, context: 128000 },
34
+ 'databricks-meta-llama-3-1-8b-instruct': { input: 0.2, output: 0.2, context: 128000 },
35
+
36
+ // GPT models via Databricks
37
+ 'databricks-gpt-5-2': { input: 5.0, output: 15.0, context: 200000 },
38
+ 'databricks-gpt-5-1': { input: 3.0, output: 12.0, context: 200000 },
39
+ 'databricks-gpt-5': { input: 2.5, output: 10.0, context: 128000 },
40
+ 'databricks-gpt-5-mini': { input: 0.5, output: 1.5, context: 128000 },
41
+ 'databricks-gpt-5-nano': { input: 0.15, output: 0.6, context: 128000 },
42
+
43
+ // Gemini models via Databricks
44
+ 'databricks-gemini-3-flash': { input: 0.075, output: 0.3, context: 1000000 },
45
+ 'databricks-gemini-3-pro': { input: 1.25, output: 5.0, context: 2000000 },
46
+ 'databricks-gemini-2-5-pro': { input: 1.25, output: 5.0, context: 1000000 },
47
+ 'databricks-gemini-2-5-flash': { input: 0.075, output: 0.3, context: 1000000 },
48
+
49
+ // DBRX
50
+ 'databricks-dbrx-instruct': { input: 0.75, output: 2.25, context: 32000 },
51
+
52
+ // Embedding models (price per 1M tokens)
53
+ 'databricks-gte-large-en': { input: 0.02, output: 0, context: 8192 },
54
+ 'databricks-bge-large-en': { input: 0.02, output: 0, context: 512 },
55
+ };
56
+
57
+ // Default cost for unknown models
58
+ const DEFAULT_COST = { input: 1.0, output: 3.0, context: 128000 };
59
+
60
+ class ModelRegistry {
61
+ constructor() {
62
+ this.litellmPrices = {};
63
+ this.modelsDevPrices = {};
64
+ this.loaded = false;
65
+ this.lastFetch = 0;
66
+ this.modelIndex = new Map();
67
+ }
68
+
69
+ /**
70
+ * Initialize registry - load from cache or fetch fresh data
71
+ */
72
+ async initialize() {
73
+ if (this.loaded) return;
74
+
75
+ // Try cache first
76
+ if (this._loadFromCache()) {
77
+ this.loaded = true;
78
+ // Background refresh if stale
79
+ if (Date.now() - this.lastFetch > CACHE_TTL_MS) {
80
+ this._fetchAll().catch(err =>
81
+ logger.warn({ err: err.message }, '[ModelRegistry] Background refresh failed')
82
+ );
83
+ }
84
+ return;
85
+ }
86
+
87
+ // Fetch fresh data
88
+ await this._fetchAll();
89
+ this.loaded = true;
90
+ }
91
+
92
+ /**
93
+ * Fetch from both sources
94
+ */
95
+ async _fetchAll() {
96
+ const results = await Promise.allSettled([
97
+ this._fetchLiteLLM(),
98
+ this._fetchModelsDev(),
99
+ ]);
100
+
101
+ const litellmOk = results[0].status === 'fulfilled';
102
+ const modelsDevOk = results[1].status === 'fulfilled';
103
+
104
+ if (litellmOk || modelsDevOk) {
105
+ this._buildIndex();
106
+ this._saveToCache();
107
+ this.lastFetch = Date.now();
108
+
109
+ logger.info({
110
+ litellm: litellmOk ? Object.keys(this.litellmPrices).length : 0,
111
+ modelsDev: modelsDevOk ? Object.keys(this.modelsDevPrices).length : 0,
112
+ total: this.modelIndex.size,
113
+ }, '[ModelRegistry] Loaded pricing data');
114
+ } else {
115
+ logger.warn('[ModelRegistry] All sources failed, using Databricks fallback only');
116
+ }
117
+ }
118
+
119
+ /**
120
+ * Fetch LiteLLM pricing
121
+ */
122
+ async _fetchLiteLLM() {
123
+ try {
124
+ const response = await fetch(LITELLM_URL, {
125
+ signal: AbortSignal.timeout(15000),
126
+ headers: { 'Accept': 'application/json' },
127
+ });
128
+
129
+ if (!response.ok) throw new Error(`HTTP ${response.status}`);
130
+
131
+ const data = await response.json();
132
+ this.litellmPrices = this._processLiteLLM(data);
133
+
134
+ logger.debug({ count: Object.keys(this.litellmPrices).length }, '[ModelRegistry] LiteLLM loaded');
135
+ } catch (err) {
136
+ logger.warn({ err: err.message }, '[ModelRegistry] LiteLLM fetch failed');
137
+ throw err;
138
+ }
139
+ }
140
+
141
+ /**
142
+ * Process LiteLLM format into our format
143
+ * LiteLLM uses cost per token, we use cost per 1M tokens
144
+ */
145
+ _processLiteLLM(data) {
146
+ const prices = {};
147
+
148
+ for (const [modelId, info] of Object.entries(data)) {
149
+ if (!info || typeof info !== 'object') continue;
150
+
151
+ // Convert per-token to per-million-tokens
152
+ const inputCost = (info.input_cost_per_token || 0) * 1_000_000;
153
+ const outputCost = (info.output_cost_per_token || 0) * 1_000_000;
154
+
155
+ prices[modelId.toLowerCase()] = {
156
+ input: inputCost,
157
+ output: outputCost,
158
+ context: info.max_input_tokens || info.max_tokens || 128000,
159
+ maxOutput: info.max_output_tokens || 4096,
160
+ toolCall: info.supports_function_calling ?? true,
161
+ vision: info.supports_vision ?? false,
162
+ source: 'litellm',
163
+ };
164
+
165
+ // Also index without provider prefix for flexible lookup
166
+ const shortName = modelId.split('/').pop().toLowerCase();
167
+ if (shortName !== modelId.toLowerCase()) {
168
+ prices[shortName] = prices[modelId.toLowerCase()];
169
+ }
170
+ }
171
+
172
+ return prices;
173
+ }
174
+
175
+ /**
176
+ * Fetch models.dev pricing
177
+ */
178
+ async _fetchModelsDev() {
179
+ try {
180
+ const response = await fetch(MODELS_DEV_URL, {
181
+ signal: AbortSignal.timeout(15000),
182
+ headers: { 'Accept': 'application/json' },
183
+ });
184
+
185
+ if (!response.ok) throw new Error(`HTTP ${response.status}`);
186
+
187
+ const data = await response.json();
188
+ this.modelsDevPrices = this._processModelsDev(data);
189
+
190
+ logger.debug({ count: Object.keys(this.modelsDevPrices).length }, '[ModelRegistry] models.dev loaded');
191
+ } catch (err) {
192
+ logger.warn({ err: err.message }, '[ModelRegistry] models.dev fetch failed');
193
+ throw err;
194
+ }
195
+ }
196
+
197
+ /**
198
+ * Process models.dev format into our format
199
+ */
200
+ _processModelsDev(data) {
201
+ const prices = {};
202
+
203
+ for (const [providerId, providerData] of Object.entries(data)) {
204
+ if (!providerData?.models) continue;
205
+
206
+ for (const [modelId, info] of Object.entries(providerData.models)) {
207
+ const fullId = `${providerId}/${modelId}`.toLowerCase();
208
+
209
+ prices[fullId] = {
210
+ input: info.cost?.input || 0,
211
+ output: info.cost?.output || 0,
212
+ cacheRead: info.cost?.cache_read,
213
+ cacheWrite: info.cost?.cache_write,
214
+ context: info.context || 128000,
215
+ maxOutput: info.output || 4096,
216
+ toolCall: info.tool_call ?? false,
217
+ reasoning: info.reasoning ?? false,
218
+ vision: Array.isArray(info.input) && info.input.includes('image'),
219
+ source: 'models.dev',
220
+ };
221
+
222
+ // Also index by short name
223
+ prices[modelId.toLowerCase()] = prices[fullId];
224
+ }
225
+ }
226
+
227
+ return prices;
228
+ }
229
+
230
+ /**
231
+ * Build unified index from all sources
232
+ */
233
+ _buildIndex() {
234
+ this.modelIndex.clear();
235
+
236
+ // Add Databricks fallback first (lowest priority)
237
+ for (const [modelId, info] of Object.entries(DATABRICKS_FALLBACK)) {
238
+ this.modelIndex.set(modelId.toLowerCase(), { ...info, source: 'databricks-fallback' });
239
+ }
240
+
241
+ // Add models.dev (medium priority)
242
+ for (const [modelId, info] of Object.entries(this.modelsDevPrices)) {
243
+ this.modelIndex.set(modelId, info);
244
+ }
245
+
246
+ // Add LiteLLM (highest priority)
247
+ for (const [modelId, info] of Object.entries(this.litellmPrices)) {
248
+ this.modelIndex.set(modelId, info);
249
+ }
250
+ }
251
+
252
+ /**
253
+ * Get cost for a model
254
+ * @param {string} modelName - Model name/ID
255
+ * @returns {Object} Cost info { input, output, context, ... }
256
+ */
257
+ getCost(modelName) {
258
+ if (!modelName) return { ...DEFAULT_COST, source: 'default' };
259
+
260
+ const normalizedName = modelName.toLowerCase();
261
+
262
+ // Direct lookup
263
+ if (this.modelIndex.has(normalizedName)) {
264
+ return this.modelIndex.get(normalizedName);
265
+ }
266
+
267
+ // Try common variations
268
+ const variations = [
269
+ normalizedName,
270
+ normalizedName.replace('databricks-', ''),
271
+ normalizedName.replace('azure/', ''),
272
+ normalizedName.replace('bedrock/', ''),
273
+ normalizedName.replace('anthropic.', ''),
274
+ normalizedName.split('/').pop(),
275
+ ];
276
+
277
+ for (const variant of variations) {
278
+ if (this.modelIndex.has(variant)) {
279
+ return this.modelIndex.get(variant);
280
+ }
281
+ }
282
+
283
+ // Fuzzy match for partial names
284
+ for (const [key, value] of this.modelIndex.entries()) {
285
+ if (key.includes(normalizedName) || normalizedName.includes(key)) {
286
+ return value;
287
+ }
288
+ }
289
+
290
+ logger.debug({ model: modelName }, '[ModelRegistry] Model not found, using default');
291
+ return { ...DEFAULT_COST, source: 'default' };
292
+ }
293
+
294
+ /**
295
+ * Get model info by name
296
+ */
297
+ getModel(modelName) {
298
+ return this.getCost(modelName);
299
+ }
300
+
301
+ /**
302
+ * Check if model is free (local)
303
+ */
304
+ isFree(modelName) {
305
+ const cost = this.getCost(modelName);
306
+ return cost.input === 0 && cost.output === 0;
307
+ }
308
+
309
+ /**
310
+ * Check if model supports tool calling
311
+ */
312
+ supportsTools(modelName) {
313
+ const model = this.getCost(modelName);
314
+ return model.toolCall === true;
315
+ }
316
+
317
+ /**
318
+ * Find models matching criteria
319
+ */
320
+ findModels(criteria = {}) {
321
+ const results = [];
322
+
323
+ for (const [modelId, info] of this.modelIndex.entries()) {
324
+ if (criteria.maxInputCost && info.input > criteria.maxInputCost) continue;
325
+ if (criteria.minContext && info.context < criteria.minContext) continue;
326
+ if (criteria.toolCall && !info.toolCall) continue;
327
+ if (criteria.reasoning && !info.reasoning) continue;
328
+ if (criteria.vision && !info.vision) continue;
329
+
330
+ results.push({ modelId, ...info });
331
+ }
332
+
333
+ // Sort by input cost ascending
334
+ return results.sort((a, b) => a.input - b.input);
335
+ }
336
+
337
+ /**
338
+ * Get stats for metrics endpoint
339
+ */
340
+ getStats() {
341
+ const sources = { litellm: 0, 'models.dev': 0, 'databricks-fallback': 0, default: 0 };
342
+
343
+ for (const info of this.modelIndex.values()) {
344
+ const source = info.source || 'default';
345
+ sources[source] = (sources[source] || 0) + 1;
346
+ }
347
+
348
+ return {
349
+ totalModels: this.modelIndex.size,
350
+ bySource: sources,
351
+ lastFetch: this.lastFetch,
352
+ cacheAge: this.lastFetch ? Date.now() - this.lastFetch : null,
353
+ cacheTTL: CACHE_TTL_MS,
354
+ };
355
+ }
356
+
357
+ /**
358
+ * Force refresh from APIs
359
+ */
360
+ async refresh() {
361
+ await this._fetchAll();
362
+ }
363
+
364
+ // Cache management
365
+ _loadFromCache() {
366
+ try {
367
+ if (!fs.existsSync(CACHE_FILE)) return false;
368
+
369
+ const cache = JSON.parse(fs.readFileSync(CACHE_FILE, 'utf8'));
370
+ this.litellmPrices = cache.litellm || {};
371
+ this.modelsDevPrices = cache.modelsDev || {};
372
+ this.lastFetch = cache.timestamp || 0;
373
+
374
+ this._buildIndex();
375
+
376
+ logger.debug({
377
+ age: Math.round((Date.now() - this.lastFetch) / 60000) + 'min',
378
+ models: this.modelIndex.size,
379
+ }, '[ModelRegistry] Loaded from cache');
380
+
381
+ return true;
382
+ } catch (err) {
383
+ logger.debug({ err: err.message }, '[ModelRegistry] Cache load failed');
384
+ return false;
385
+ }
386
+ }
387
+
388
+ _saveToCache() {
389
+ try {
390
+ const dir = path.dirname(CACHE_FILE);
391
+ if (!fs.existsSync(dir)) {
392
+ fs.mkdirSync(dir, { recursive: true });
393
+ }
394
+
395
+ const cache = {
396
+ litellm: this.litellmPrices,
397
+ modelsDev: this.modelsDevPrices,
398
+ timestamp: Date.now(),
399
+ };
400
+
401
+ fs.writeFileSync(CACHE_FILE, JSON.stringify(cache, null, 2));
402
+ logger.debug('[ModelRegistry] Cache saved');
403
+ } catch (err) {
404
+ logger.warn({ err: err.message }, '[ModelRegistry] Cache save failed');
405
+ }
406
+ }
407
+ }
408
+
409
+ // Singleton with lazy initialization
410
+ let instance = null;
411
+
412
+ async function getModelRegistry() {
413
+ if (!instance) {
414
+ instance = new ModelRegistry();
415
+ await instance.initialize();
416
+ }
417
+ return instance;
418
+ }
419
+
420
+ // Sync getter (uses cache only, no network)
421
+ function getModelRegistrySync() {
422
+ if (!instance) {
423
+ instance = new ModelRegistry();
424
+ instance._loadFromCache();
425
+ instance._buildIndex();
426
+ instance.loaded = true;
427
+ }
428
+ return instance;
429
+ }
430
+
431
+ module.exports = {
432
+ ModelRegistry,
433
+ getModelRegistry,
434
+ getModelRegistrySync,
435
+ DATABRICKS_FALLBACK,
436
+ DEFAULT_COST,
437
+ };
@@ -9,6 +9,9 @@
9
9
 
10
10
  const logger = require('../logger');
11
11
 
12
+ // Strip system-reminder blocks injected by the CLI before classification
13
+ const SYSTEM_REMINDER_PATTERN = /<system-reminder>[\s\S]*?<\/system-reminder>/g;
14
+
12
15
  // Pre-compiled regex patterns for performance (avoid recompiling on every request)
13
16
  const GREETING_PATTERN = /^(hi|hello|hey|good morning|good afternoon|good evening|howdy|greetings|sup|yo)[\s\.\!\?]*$/i;
14
17
  const QUESTION_PATTERN = /^(what is|what's|how does|when|where|why|explain|define|tell me about|can you explain)/i;
@@ -190,7 +193,10 @@ function classifyRequestType(payload) {
190
193
  return { type: 'coding', confidence: 0.5, keywords: [] };
191
194
  }
192
195
 
193
- const content = extractContent(lastMessage);
196
+ const rawContent = extractContent(lastMessage);
197
+ // Strip <system-reminder> blocks before classification to prevent
198
+ // CLI-injected keywords (search, explain, documentation) from polluting results
199
+ const content = rawContent.replace(SYSTEM_REMINDER_PATTERN, '').trim();
194
200
  const contentLower = content.toLowerCase();
195
201
  const messageCount = payload.messages?.length ?? 0;
196
202
 
@@ -1,8 +1,12 @@
1
+ const path = require("path");
1
2
  const {
2
3
  readFile,
3
4
  writeFile,
4
5
  applyFilePatch,
5
6
  resolveWorkspacePath,
7
+ expandTilde,
8
+ isExternalPath,
9
+ readExternalFile,
6
10
  fileExists,
7
11
  workspaceRoot,
8
12
  } = require("../workspace");
@@ -30,17 +34,44 @@ function registerWorkspaceTools() {
30
34
  registerTool(
31
35
  "fs_read",
32
36
  async ({ args = {} }) => {
33
- const relativePath = validateString(args.path ?? args.file, "path");
37
+ const targetPath = validateString(args.path ?? args.file ?? args.file_path, "path");
34
38
  const encoding = normalizeEncoding(args.encoding);
35
- const content = await readFile(relativePath, encoding);
39
+
40
+ // Check if path is outside workspace
41
+ if (isExternalPath(targetPath)) {
42
+ if (args.user_approved !== true) {
43
+ const expanded = expandTilde(targetPath);
44
+ const resolved = path.resolve(expanded);
45
+ return {
46
+ ok: false,
47
+ status: 403,
48
+ content: JSON.stringify({
49
+ error: "external_path_requires_approval",
50
+ message: `The file "${targetPath}" resolves to "${resolved}" which is outside the workspace. You MUST ask the user for permission before reading this file. If the user approves, call this tool again with the same path and set user_approved to true.`,
51
+ resolved_path: resolved,
52
+ }),
53
+ };
54
+ }
55
+ // User approved — read external file
56
+ const { content, resolvedPath } = await readExternalFile(targetPath, encoding);
57
+ return {
58
+ ok: true,
59
+ status: 200,
60
+ content,
61
+ metadata: { path: targetPath, encoding, resolved_path: resolvedPath },
62
+ };
63
+ }
64
+
65
+ // Normal workspace read (unchanged)
66
+ const content = await readFile(targetPath, encoding);
36
67
  return {
37
68
  ok: true,
38
69
  status: 200,
39
70
  content,
40
71
  metadata: {
41
- path: relativePath,
72
+ path: targetPath,
42
73
  encoding,
43
- resolved_path: resolveWorkspacePath(relativePath),
74
+ resolved_path: resolveWorkspacePath(targetPath),
44
75
  },
45
76
  };
46
77
  },
@@ -10,6 +10,33 @@ if (!fs.existsSync(workspaceRoot)) {
10
10
  fs.mkdirSync(workspaceRoot, { recursive: true });
11
11
  }
12
12
 
13
+ function expandTilde(targetPath) {
14
+ if (typeof targetPath !== "string") return targetPath;
15
+ if (targetPath.startsWith("~")) {
16
+ const home = process.env.HOME || process.env.USERPROFILE;
17
+ if (home) {
18
+ return path.join(home, targetPath.slice(1));
19
+ }
20
+ }
21
+ return targetPath;
22
+ }
23
+
24
+ function isExternalPath(targetPath) {
25
+ const expanded = expandTilde(targetPath);
26
+ const resolved = path.resolve(workspaceRoot, expanded);
27
+ return !resolved.startsWith(workspaceRoot);
28
+ }
29
+
30
+ async function readExternalFile(targetPath, encoding = "utf8") {
31
+ const expanded = expandTilde(targetPath);
32
+ const resolved = path.resolve(expanded);
33
+ const stats = await fsp.stat(resolved);
34
+ if (!stats.isFile()) {
35
+ throw new Error("Requested path is not a file.");
36
+ }
37
+ return { content: await fsp.readFile(resolved, { encoding }), resolvedPath: resolved };
38
+ }
39
+
13
40
  function resolveWorkspacePath(targetPath) {
14
41
  if (!targetPath || typeof targetPath !== "string") {
15
42
  throw new Error("Path must be a non-empty string.");
@@ -110,6 +137,9 @@ function validateCwd(cwd) {
110
137
  module.exports = {
111
138
  workspaceRoot,
112
139
  resolveWorkspacePath,
140
+ expandTilde,
141
+ isExternalPath,
142
+ readExternalFile,
113
143
  readFile,
114
144
  writeFile,
115
145
  fileExists,