npm - lynkr - Versions diffs - 7.2.5 → 8.0.1 - Mend

lynkr 7.2.5 → 8.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (124) hide show

package/README.md +3 -3
package/config/model-tiers.json +89 -0
package/install.sh +6 -1
package/package.json +4 -2
package/scripts/setup.js +0 -1
package/src/agents/executor.js +14 -6
package/src/api/middleware/session.js +15 -2
package/src/api/openai-router.js +162 -37
package/src/api/providers-handler.js +15 -1
package/src/api/router.js +107 -2
package/src/budget/index.js +4 -3
package/src/clients/databricks.js +431 -234
package/src/clients/gpt-utils.js +181 -0
package/src/clients/ollama-utils.js +66 -140
package/src/clients/routing.js +0 -1
package/src/clients/standard-tools.js +99 -3
package/src/config/index.js +133 -35
package/src/context/toon.js +173 -0
package/src/logger/index.js +23 -0
package/src/orchestrator/index.js +688 -213
package/src/routing/agentic-detector.js +320 -0
package/src/routing/complexity-analyzer.js +202 -2
package/src/routing/cost-optimizer.js +305 -0
package/src/routing/index.js +168 -159
package/src/routing/model-tiers.js +365 -0
package/src/server.js +4 -14
package/src/sessions/cleanup.js +3 -3
package/src/sessions/record.js +10 -1
package/src/sessions/store.js +7 -2
package/src/tools/agent-task.js +48 -1
package/src/tools/index.js +19 -2
package/src/tools/lazy-loader.js +7 -0
package/src/tools/tinyfish.js +358 -0
package/src/tools/truncate.js +1 -0
package/.github/FUNDING.yml +0 -15
package/.github/workflows/README.md +0 -215
package/.github/workflows/ci.yml +0 -69
package/.github/workflows/index.yml +0 -62
package/.github/workflows/web-tools-tests.yml +0 -56
package/CITATIONS.bib +0 -6
package/CLAWROUTER_ROUTING_PLAN.md +0 -910
package/DEPLOYMENT.md +0 -1001
package/LYNKR-TUI-PLAN.md +0 -984
package/PERFORMANCE-REPORT.md +0 -866
package/PLAN-per-client-model-routing.md +0 -252
package/ROUTER_COMPARISON.md +0 -173
package/TIER_ROUTING_PLAN.md +0 -771
package/docs/42642f749da6234f41b6b425c3bb07c9.txt +0 -1
package/docs/BingSiteAuth.xml +0 -4
package/docs/docs-style.css +0 -478
package/docs/docs.html +0 -197
package/docs/google5be250e608e6da39.html +0 -1
package/docs/index.html +0 -577
package/docs/index.md +0 -577
package/docs/robots.txt +0 -4
package/docs/sitemap.xml +0 -44
package/docs/style.css +0 -1223
package/documentation/README.md +0 -100
package/documentation/api.md +0 -806
package/documentation/claude-code-cli.md +0 -672
package/documentation/codex-cli.md +0 -397
package/documentation/contributing.md +0 -571
package/documentation/cursor-integration.md +0 -731
package/documentation/docker.md +0 -867
package/documentation/embeddings.md +0 -760
package/documentation/faq.md +0 -659
package/documentation/features.md +0 -396
package/documentation/headroom.md +0 -519
package/documentation/installation.md +0 -706
package/documentation/memory-system.md +0 -476
package/documentation/production.md +0 -601
package/documentation/providers.md +0 -906
package/documentation/testing.md +0 -629
package/documentation/token-optimization.md +0 -323
package/documentation/tools.md +0 -697
package/documentation/troubleshooting.md +0 -893
package/final-test.js +0 -33
package/headroom-sidecar/config.py +0 -93
package/headroom-sidecar/requirements.txt +0 -14
package/headroom-sidecar/server.py +0 -451
package/monitor-agents.sh +0 -31
package/scripts/audit-log-reader.js +0 -399
package/scripts/compact-dictionary.js +0 -204
package/scripts/test-deduplication.js +0 -448
package/src/db/database.sqlite +0 -0
package/test/README.md +0 -212
package/test/azure-openai-config.test.js +0 -204
package/test/azure-openai-error-resilience.test.js +0 -238
package/test/azure-openai-format-conversion.test.js +0 -354
package/test/azure-openai-integration.test.js +0 -281
package/test/azure-openai-routing.test.js +0 -177
package/test/azure-openai-streaming.test.js +0 -171
package/test/bedrock-integration.test.js +0 -471
package/test/comprehensive-test-suite.js +0 -928
package/test/config-validation.test.js +0 -207
package/test/cursor-integration.test.js +0 -484
package/test/format-conversion.test.js +0 -578
package/test/hybrid-routing-integration.test.js +0 -254
package/test/hybrid-routing-performance.test.js +0 -418
package/test/llamacpp-integration.test.js +0 -863
package/test/lmstudio-integration.test.js +0 -335
package/test/memory/extractor.test.js +0 -398
package/test/memory/retriever.test.js +0 -613
package/test/memory/retriever.test.js.bak +0 -585
package/test/memory/search.test.js +0 -537
package/test/memory/search.test.js.bak +0 -389
package/test/memory/store.test.js +0 -344
package/test/memory/store.test.js.bak +0 -312
package/test/memory/surprise.test.js +0 -300
package/test/memory-performance.test.js +0 -472
package/test/openai-integration.test.js +0 -686
package/test/openrouter-error-resilience.test.js +0 -418
package/test/passthrough-mode.test.js +0 -385
package/test/performance-benchmark.js +0 -351
package/test/performance-tests.js +0 -528
package/test/routing.test.js +0 -219
package/test/web-tools.test.js +0 -329
package/test-agents-simple.js +0 -43
package/test-cli-connection.sh +0 -33
package/test-learning-unit.js +0 -126
package/test-learning.js +0 -112
package/test-parallel-agents.sh +0 -124
package/test-parallel-direct.js +0 -155
package/test-subagents.sh +0 -117

package/README.md CHANGED Viewed

@@ -50,7 +50,7 @@ Lynkr is a **self-hosted proxy server** that unlocks Claude Code CLI , Cursor ID
 npm install -g pino-pretty
 npm install -g lynkr
-lynk start
+lynkr start
 ```
 **Option 2: Git Clone**
@@ -238,7 +238,7 @@ Lynkr supports [ClawdBot](https://github.com/openclaw/openclaw) via its OpenAI-c
 ### Getting Started
 - 📦 **[Installation Guide](documentation/installation.md)** - Detailed installation for all methods
-- ⚙️ **[Provider Configuration](documentation/providers.md)** - Complete setup for all 9+ providers
+- ⚙️ **[Provider Configuration](documentation/providers.md)** - Complete setup for all 12+ providers
 - 🎯 **[Quick Start Examples](documentation/installation.md#quick-start-examples)** - Copy-paste configs
 ### IDE & CLI Integration
@@ -277,7 +277,7 @@ Lynkr supports [ClawdBot](https://github.com/openclaw/openclaw) via its OpenAI-c
 ## Key Features Highlights
-- ✅ **Multi-Provider Support** - 9+ providers including local (Ollama, llama.cpp) and cloud (Bedrock, Databricks, OpenRouter)
+- ✅ **Multi-Provider Support** - 12+ providers including local (Ollama, llama.cpp) and cloud (Bedrock, Databricks, OpenRouter, Moonshot AI)
 - ✅ **60-80% Cost Reduction** - Token optimization with smart tool selection, prompt caching, memory deduplication
 - ✅ **100% Local Option** - Run completely offline with Ollama/llama.cpp (zero cloud dependencies)
 - ✅ **OpenAI Compatible** - Works with Cursor IDE, Continue.dev, and any OpenAI-compatible client

package/config/model-tiers.json ADDED Viewed

@@ -0,0 +1,89 @@
+{
+  "tiers": {
+    "SIMPLE": {
+      "description": "Greetings, simple Q&A, confirmations, basic lookups",
+      "range": [0, 25],
+      "priority": 1,
+      "preferred": {
+        "ollama": ["llama3.2", "gemma2", "phi3", "qwen2.5:7b", "mistral"],
+        "llamacpp": ["default"],
+        "lmstudio": ["default"],
+        "openai": ["gpt-4o-mini", "gpt-3.5-turbo"],
+        "azure-openai": ["gpt-4o-mini", "gpt-35-turbo"],
+        "anthropic": ["claude-3-haiku-20240307", "claude-3-5-haiku-20241022"],
+        "bedrock": ["anthropic.claude-3-haiku-20240307-v1:0", "amazon.nova-lite-v1:0"],
+        "databricks": ["databricks-claude-haiku-4-5", "databricks-gpt-5-nano"],
+        "google": ["gemini-2.0-flash", "gemini-1.5-flash"],
+        "openrouter": ["google/gemini-flash-1.5", "deepseek/deepseek-chat"],
+        "zai": ["GLM-4-Flash"],
+        "moonshot": ["kimi-k2-turbo-preview"]
+      }
+    },
+    "MEDIUM": {
+      "description": "Code reading, simple edits, research, documentation",
+      "range": [26, 50],
+      "priority": 2,
+      "preferred": {
+        "ollama": ["qwen2.5:32b", "deepseek-coder:33b", "codellama:34b"],
+        "llamacpp": ["default"],
+        "lmstudio": ["default"],
+        "openai": ["gpt-4o", "gpt-4-turbo"],
+        "azure-openai": ["gpt-4o", "gpt-4"],
+        "anthropic": ["claude-sonnet-4-20250514", "claude-3-5-sonnet-20241022"],
+        "bedrock": ["anthropic.claude-3-5-sonnet-20241022-v2:0", "amazon.nova-pro-v1:0"],
+        "databricks": ["databricks-claude-sonnet-4-5", "databricks-gpt-5-1"],
+        "google": ["gemini-1.5-pro", "gemini-2.0-pro"],
+        "openrouter": ["anthropic/claude-3.5-sonnet", "openai/gpt-4o"],
+        "zai": ["GLM-4.7"],
+        "moonshot": ["kimi-k2-turbo-preview"]
+      }
+    },
+    "COMPLEX": {
+      "description": "Multi-file changes, debugging, architecture, refactoring",
+      "range": [51, 75],
+      "priority": 3,
+      "preferred": {
+        "ollama": ["qwen2.5:72b", "llama3.1:70b", "deepseek-coder-v2:236b"],
+        "openai": ["o1-mini", "o3-mini", "gpt-4o"],
+        "azure-openai": ["o1-mini", "gpt-4o"],
+        "anthropic": ["claude-sonnet-4-20250514", "claude-3-5-sonnet-20241022"],
+        "bedrock": ["anthropic.claude-3-5-sonnet-20241022-v2:0"],
+        "databricks": ["databricks-claude-sonnet-4-5", "databricks-gpt-5-1-codex-max"],
+        "google": ["gemini-2.5-pro", "gemini-1.5-pro"],
+        "openrouter": ["anthropic/claude-3.5-sonnet", "meta-llama/llama-3.1-405b"],
+        "zai": ["GLM-4.7"],
+        "moonshot": ["kimi-k2-turbo-preview"]
+      }
+    },
+    "REASONING": {
+      "description": "Complex analysis, security audits, novel problems, deep thinking",
+      "range": [76, 100],
+      "priority": 4,
+      "preferred": {
+        "openai": ["o1", "o1-pro", "o3"],
+        "azure-openai": ["o1", "o1-pro"],
+        "anthropic": ["claude-opus-4-20250514", "claude-3-opus-20240229"],
+        "bedrock": ["anthropic.claude-3-opus-20240229-v1:0"],
+        "databricks": ["databricks-claude-opus-4-6", "databricks-claude-opus-4-5", "databricks-gpt-5-2"],
+        "google": ["gemini-2.5-pro"],
+        "openrouter": ["anthropic/claude-3-opus", "deepseek/deepseek-reasoner", "openai/o1"],
+        "deepseek": ["deepseek-reasoner", "deepseek-r1"],
+        "moonshot": ["kimi-k2-thinking", "kimi-k2-turbo-preview"]
+      }
+    }
+  },
+  "localProviders": {
+    "ollama": { "free": true, "defaultTier": "SIMPLE" },
+    "llamacpp": { "free": true, "defaultTier": "SIMPLE" },
+    "lmstudio": { "free": true, "defaultTier": "SIMPLE" }
+  },
+  "providerAliases": {
+    "azure": "azure-openai",
+    "aws": "bedrock",
+    "amazon": "bedrock",
+    "claude": "anthropic",
+    "gemini": "google",
+    "vertex": "google",
+    "kimi": "moonshot"
+  }
+}

package/install.sh CHANGED Viewed

@@ -134,10 +134,15 @@ MODEL_PROVIDER=ollama
 PORT=8080
 # Ollama Configuration (default for local development)
-PREFER_OLLAMA=true
 OLLAMA_MODEL=qwen2.5-coder:7b
 OLLAMA_ENDPOINT=http://localhost:11434
+# Tier-based routing (uncomment and configure to enable)
+# TIER_SIMPLE=ollama:qwen2.5-coder:7b
+# TIER_MEDIUM=ollama:qwen2.5-coder:7b
+# TIER_COMPLEX=ollama:qwen2.5-coder:7b
+# TIER_REASONING=ollama:qwen2.5-coder:7b
 # Long-Term Memory System (Titans-Inspired) - Enabled by default
 MEMORY_ENABLED=true
 MEMORY_RETRIEVAL_LIMIT=5

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "lynkr",
-  "version": "7.2.5",
+  "version": "8.0.1",
   "description": "Self-hosted Claude Code & Cursor proxy with Databricks,AWS BedRock,Azure  adapters, openrouter, Ollama,llamacpp,LM Studio, workspace tooling, and MCP integration.",
   "main": "index.js",
   "bin": {
@@ -14,7 +14,7 @@
     "dev": "nodemon index.js",
     "lint": "eslint src index.js",
     "test": "npm run test:unit && npm run test:performance",
-    "test:unit": "DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node --test test/routing.test.js test/hybrid-routing-integration.test.js test/web-tools.test.js test/passthrough-mode.test.js test/openrouter-error-resilience.test.js test/format-conversion.test.js test/azure-openai-config.test.js test/azure-openai-format-conversion.test.js test/azure-openai-routing.test.js test/azure-openai-streaming.test.js test/azure-openai-error-resilience.test.js test/azure-openai-integration.test.js test/openai-integration.test.js test/llamacpp-integration.test.js test/memory/store.test.js test/memory/surprise.test.js test/memory/extractor.test.js test/memory/search.test.js test/memory/retriever.test.js",
+    "test:unit": "DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node --test test/routing.test.js test/hybrid-routing-integration.test.js test/web-tools.test.js test/passthrough-mode.test.js test/openrouter-error-resilience.test.js test/format-conversion.test.js test/azure-openai-config.test.js test/azure-openai-format-conversion.test.js test/azure-openai-routing.test.js test/azure-openai-streaming.test.js test/azure-openai-error-resilience.test.js test/azure-openai-integration.test.js test/openai-integration.test.js test/toon-compression.test.js test/llamacpp-integration.test.js test/memory/store.test.js test/memory/surprise.test.js test/memory/extractor.test.js test/memory/search.test.js test/memory/retriever.test.js",
     "test:memory": "DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node --test test/memory/store.test.js test/memory/surprise.test.js test/memory/extractor.test.js test/memory/search.test.js test/memory/retriever.test.js",
     "test:new-features": "DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node --test test/passthrough-mode.test.js test/openrouter-error-resilience.test.js test/format-conversion.test.js",
     "test:performance": "DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node test/hybrid-routing-performance.test.js && DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node test/performance-tests.js",
@@ -47,6 +47,7 @@
     "@azure/openai": "^2.0.0",
     "@babel/parser": "^7.29.0",
     "@babel/traverse": "^7.29.0",
+    "@toon-format/toon": "^2.1.0",
     "compression": "^1.7.4",
     "diff": "^5.2.0",
     "dotenv": "^16.4.5",
@@ -57,6 +58,7 @@
     "openai": "^6.14.0",
     "pino": "^8.17.2",
     "pino-http": "^8.6.0",
+    "pino-roll": "^4.0.0",
     "undici": "^6.22.0"
   },
   "optionalDependencies": {

package/scripts/setup.js CHANGED Viewed

@@ -251,7 +251,6 @@ async function createEnvFile() {
     if (ollamaOnly) {
       let envContent = fs.readFileSync(envPath, "utf-8");
       envContent = envContent.replace(/^# MODEL_PROVIDER=databricks/m, "MODEL_PROVIDER=ollama");
-      envContent = envContent.replace(/^PREFER_OLLAMA=true/m, "# PREFER_OLLAMA=true  # Not needed when MODEL_PROVIDER=ollama");
       envContent = envContent.replace(/^FALLBACK_ENABLED=true/m, "FALLBACK_ENABLED=false");
       fs.writeFileSync(envPath, envContent);
     }

package/src/agents/executor.js CHANGED Viewed

@@ -162,14 +162,22 @@ class SubagentExecutor {
       payload.tools = filteredTools;
     }
-    // Determine provider based on model - subagents should use the specified model
+    // Determine provider based on model family.
+    // Subagents should use the currently configured MODEL_PROVIDER and avoid
+    // hard-fallbacks to Azure when Azure is not selected/configured.
     let forceProvider = null;
-    if (payload.model?.includes('claude') || payload.model?.includes('sonnet') || payload.model?.includes('haiku') || payload.model?.includes('opus')) {
-      // Route Claude models to the configured Claude provider (azure-openai, databricks, etc.)
+    const modelLower = String(payload.model || "").toLowerCase();
+    const isClaudeFamilyModel =
+      modelLower.includes("claude") ||
+      modelLower.includes("sonnet") ||
+      modelLower.includes("haiku") ||
+      modelLower.includes("opus");
+    const isGptFamilyModel = modelLower.includes("gpt");
+    if (isClaudeFamilyModel || isGptFamilyModel) {
       const config = require('../config');
-      forceProvider = config.modelProvider?.provider || 'azure-openai';
-    } else if (payload.model?.includes('gpt')) {
-      forceProvider = 'azure-openai';
+      // `type` is the canonical key; `provider` kept as legacy fallback.
+      forceProvider = config.modelProvider?.type || config.modelProvider?.provider || null;
     }
     logger.debug({

package/src/api/middleware/session.js CHANGED Viewed

@@ -45,8 +45,21 @@ function sessionMiddleware(req, res, next) {
     // Add sessionId to logger context for this request
     req.log = logger.child({ sessionId });
-    const session = getOrCreateSession(sessionId);
-    req.session = session;
+    // Skip DB persistence for auto-generated (ephemeral) session IDs.
+    // These are created when the client doesn't send a session header,
+    // so storing them just bloats the DB with throwaway records.
+    if (req.generatedSessionId) {
+      req.session = {
+        id: sessionId,
+        createdAt: Date.now(),
+        updatedAt: Date.now(),
+        metadata: {},
+        history: [],
+        _ephemeral: true,
+      };
+    } else {
+      req.session = getOrCreateSession(sessionId);
+    }
     return next();
   } catch (err) {
     return next(err);

package/src/api/openai-router.js CHANGED Viewed

@@ -21,9 +21,9 @@ const orchestrator = require("../orchestrator");
 const { getSession } = require("../sessions");
 const {
   convertOpenAIToAnthropic,
-  convertAnthropicToOpenAI,
-  convertAnthropicStreamChunkToOpenAI
+  convertAnthropicToOpenAI
 } = require("../clients/openai-format");
+const { IDE_SAFE_TOOLS } = require("../clients/standard-tools");
 const router = express.Router();
@@ -60,13 +60,14 @@ function detectClient(headers) {
  */
 const CLIENT_TOOL_MAPPINGS = {
   // ============== CODEX CLI ==============
-  // Tools: shell_command, read_file, write_file, apply_patch, glob_file_search, rg, list_dir
+  // Confirmed tools: shell, apply_patch, read_file, write_file, list_dir, glob_file_search,
+  //   rg, web_search, update_plan, view_image, memory
+  // NOT supported: spawn_agent/spawn_thread (Task has no Codex equivalent)
   codex: {
     "Bash": {
-      name: "shell_command",
+      name: "shell",
       mapArgs: (a) => ({
-        command: a.command || "",
-        workdir: a.cwd || a.working_directory
+        command: ["bash", "-c", a.command || ""]
       })
     },
     "Read": {
@@ -113,6 +114,25 @@ const CLIENT_TOOL_MAPPINGS = {
       mapArgs: (a) => ({
         path: a.path || a.directory
       })
+    },
+    "TodoWrite": {
+      name: "update_plan",
+      mapArgs: (a) => ({
+        todos: a.todos || []
+      })
+    },
+    "WebSearch": {
+      name: "web_search",
+      mapArgs: (a) => ({
+        query: a.query || ""
+      })
+    },
+    "WebAgent": {
+      name: "web_agent",
+      mapArgs: (a) => ({
+        url: a.url || "",
+        goal: a.goal || ""
+      })
     }
   },
@@ -168,6 +188,13 @@ const CLIENT_TOOL_MAPPINGS = {
         path: a.path || a.directory || ".",
         recursive: false
       })
+    },
+    "WebAgent": {
+      name: "web_agent",
+      mapArgs: (a) => ({
+        url: a.url || "",
+        goal: a.goal || ""
+      })
     }
   },
@@ -224,6 +251,13 @@ const CLIENT_TOOL_MAPPINGS = {
         path: a.path || a.directory || ".",
         recursive: false
       })
+    },
+    "WebAgent": {
+      name: "web_agent",
+      mapArgs: (a) => ({
+        url: a.url || "",
+        goal: a.goal || ""
+      })
     }
   },
@@ -274,6 +308,13 @@ const CLIENT_TOOL_MAPPINGS = {
       mapArgs: (a) => ({
         filepath: a.path || a.directory || "."
       })
+    },
+    "WebAgent": {
+      name: "web_agent",
+      mapArgs: (a) => ({
+        url: a.url || "",
+        goal: a.goal || ""
+      })
     }
   }
 };
@@ -321,14 +362,7 @@ function mapToolForClient(toolName, argsJson, clientType) {
   };
 }
-/**
- * Check if client is a known AI coding tool that needs tool mapping
- * @param {Object} headers - Request headers
- * @returns {boolean}
- */
-function isKnownClient(headers) {
-  return detectClient(headers) !== "unknown";
-}
 /**
  * POST /v1/chat/completions
@@ -374,7 +408,7 @@ router.post("/chat/completions", async (req, res) => {
         : JSON.stringify(m.content).substring(0, 200)
     }));
-    logger.info({
+    logger.debug({
       endpoint: "/v1/chat/completions",
       model: req.body.model,
       messageCount: req.body.messages?.length,
@@ -393,6 +427,32 @@ router.post("/chat/completions", async (req, res) => {
     // Convert OpenAI request to Anthropic format
     const anthropicRequest = convertOpenAIToAnthropic(req.body);
+    // Inject tools if client didn't send any.
+    // Two-layer filtering:
+    //   1. IDE_SAFE_TOOLS = STANDARD_TOOLS minus AskUserQuestion (can't work through proxy)
+    //   2. For known clients (codex, cline, etc.), further filter to only tools
+    //      that have a mapping in CLIENT_TOOL_MAPPINGS — this ensures clients like
+    //      Codex don't see tools they can't handle (Task, WebFetch, NotebookEdit)
+    //      while Claude Code (unknown client) gets the full IDE_SAFE_TOOLS set.
+    // Skip injection if client explicitly opted out (tool_choice: "none" or empty tools array).
+    const clientType = detectClient(req.headers);
+    const clientExplicitlyDisabledTools = req.body.tool_choice === "none" || Array.isArray(req.body.tools);
+    if (!clientExplicitlyDisabledTools && (!anthropicRequest.tools || anthropicRequest.tools.length === 0)) {
+      const clientMappings = CLIENT_TOOL_MAPPINGS[clientType];
+      const clientTools = clientMappings
+        ? IDE_SAFE_TOOLS.filter(t => clientMappings[t.name])
+        : IDE_SAFE_TOOLS;
+      anthropicRequest.tools = clientTools;
+      logger.debug({
+        clientType,
+        injectedToolCount: clientTools.length,
+        injectedToolNames: clientTools.map(t => t.name),
+        reason: clientMappings
+          ? `Known client '${clientType}' — filtered to mapped tools only`
+          : "Unknown client — injecting full IDE_SAFE_TOOLS"
+      }, "=== INJECTING TOOLS ===");
+    }
     // Get or create session
     const session = getSession(sessionId);
@@ -420,7 +480,7 @@ router.post("/chat/completions", async (req, res) => {
         });
         // Check if we have a valid response body
-        logger.info({
+        logger.debug({
           hasResult: !!result,
           resultKeys: result ? Object.keys(result) : null,
           hasBody: result && !!result.body,
@@ -442,7 +502,7 @@ router.post("/chat/completions", async (req, res) => {
         const openaiResponse = convertAnthropicToOpenAI(result.body, req.body.model);
         // Debug: Log what we're about to stream
-        logger.info({
+        logger.debug({
           openaiResponseId: openaiResponse.id,
           messageContent: openaiResponse.choices[0]?.message?.content?.substring(0, 100),
           contentLength: openaiResponse.choices[0]?.message?.content?.length || 0,
@@ -454,7 +514,25 @@ router.post("/chat/completions", async (req, res) => {
         // Simulate streaming by sending the complete response as chunks
         const content = openaiResponse.choices[0].message.content || "";
-        const toolCalls = openaiResponse.choices[0].message.tool_calls;
+        let toolCalls = openaiResponse.choices[0].message.tool_calls;
+        // Map tool names for known IDE clients
+        if (clientType !== "unknown" && toolCalls && toolCalls.length > 0) {
+          toolCalls = toolCalls.map(tc => {
+            const mapped = mapToolForClient(tc.function?.name || "", tc.function?.arguments || "{}", clientType);
+            return {
+              ...tc,
+              function: {
+                name: mapped.name,
+                arguments: mapped.arguments
+              }
+            };
+          });
+          logger.debug({
+            mappedTools: toolCalls.map(t => t.function?.name),
+            clientType
+          }, "Tool names mapped for streaming chat/completions");
+        }
         // Send start chunk with role
         const startChunk = {
@@ -493,7 +571,7 @@ router.post("/chat/completions", async (req, res) => {
             }]
           };
           const contentWriteOk = res.write(`data: ${JSON.stringify(contentChunk)}\n\n`);
-          logger.info({ contentPreview: content.substring(0, 50), writeOk: contentWriteOk }, "Sent content chunk");
+          logger.debug({ contentPreview: content.substring(0, 50), writeOk: contentWriteOk }, "Sent content chunk");
         }
         // Send tool calls if present
@@ -545,7 +623,7 @@ router.post("/chat/completions", async (req, res) => {
         res.write("data: [DONE]\n\n");
         // Ensure data is flushed before ending
-        logger.info({ contentLength: content.length, contentPreview: content.substring(0, 50) }, "=== SSE STREAM COMPLETE ===");
+        logger.debug({ contentLength: content.length, contentPreview: content.substring(0, 50) }, "=== SSE STREAM COMPLETE ===");
         res.end();
         logger.info({
@@ -558,10 +636,7 @@ router.post("/chat/completions", async (req, res) => {
       } catch (streamError) {
         logger.error({
           error: streamError.message,
-          stack: streamError.stack,
-          resultWasNull: !result,
-          resultBodyWasNull: result && !result.body,
-          resultKeys: result ? Object.keys(result) : null
+          stack: streamError.stack
         }, "=== STREAMING ERROR ===");
         // Send error in OpenAI streaming format
@@ -606,6 +681,24 @@ router.post("/chat/completions", async (req, res) => {
       // Convert Anthropic response to OpenAI format
       const openaiResponse = convertAnthropicToOpenAI(result.body, req.body.model);
+      // Map tool names for known IDE clients
+      if (clientType !== "unknown" && openaiResponse.choices?.[0]?.message?.tool_calls?.length > 0) {
+        openaiResponse.choices[0].message.tool_calls = openaiResponse.choices[0].message.tool_calls.map(tc => {
+          const mapped = mapToolForClient(tc.function?.name || "", tc.function?.arguments || "{}", clientType);
+          return {
+            ...tc,
+            function: {
+              name: mapped.name,
+              arguments: mapped.arguments
+            }
+          };
+        });
+        logger.debug({
+          mappedTools: openaiResponse.choices[0].message.tool_calls.map(t => t.function?.name),
+          clientType
+        }, "Tool names mapped for non-streaming chat/completions");
+      }
       logger.info({
         duration: Date.now() - startTime,
         mode: "non-streaming",
@@ -767,6 +860,18 @@ function getConfiguredProviders() {
     });
   }
+  // Check Moonshot AI (Kimi)
+  if (config.moonshot?.apiKey) {
+    providers.push({
+      name: "moonshot",
+      type: "moonshot-ai",
+      models: [
+        config.moonshot.model || "kimi-k2-turbo-preview",
+        "kimi-k2-turbo-preview"
+      ]
+    });
+  }
   // Check Vertex AI (Google Cloud)
   if (config.vertex?.projectId) {
     providers.push({
@@ -1013,7 +1118,7 @@ function determineEmbeddingProvider(requestedModel = null) {
 async function generateOllamaEmbeddings(inputs, embeddingConfig) {
   const { model, endpoint } = embeddingConfig;
-  logger.info({
+  logger.debug({
     model,
     endpoint,
     inputCount: inputs.length
@@ -1079,7 +1184,7 @@ async function generateOllamaEmbeddings(inputs, embeddingConfig) {
 async function generateLlamaCppEmbeddings(inputs, embeddingConfig) {
   const { model, endpoint } = embeddingConfig;
-  logger.info({
+  logger.debug({
     model,
     endpoint,
     inputCount: inputs.length
@@ -1147,7 +1252,7 @@ async function generateLlamaCppEmbeddings(inputs, embeddingConfig) {
 async function generateOpenRouterEmbeddings(inputs, embeddingConfig) {
   const { model, apiKey, endpoint } = embeddingConfig;
-  logger.info({
+  logger.debug({
     model,
     inputCount: inputs.length
   }, "Generating embeddings with OpenRouter");
@@ -1181,7 +1286,7 @@ async function generateOpenRouterEmbeddings(inputs, embeddingConfig) {
 async function generateOpenAIEmbeddings(inputs, embeddingConfig) {
   const { model, apiKey, endpoint } = embeddingConfig;
-  logger.info({
+  logger.debug({
     model,
     inputCount: inputs.length
   }, "Generating embeddings with OpenAI");
@@ -1233,7 +1338,7 @@ router.post("/embeddings", async (req, res) => {
     // Convert input to array if string
     const inputs = Array.isArray(input) ? input : [input];
-    logger.info({
+    logger.debug({
       endpoint: "/v1/embeddings",
       model: model || "auto-detect",
       inputCount: inputs.length,
@@ -1335,7 +1440,7 @@ router.post("/responses", async (req, res) => {
     const { convertResponsesToChat, convertChatToResponses } = require("../clients/responses-format");
     // Comprehensive debug logging
-    logger.info({
+    logger.debug({
       endpoint: "/v1/responses",
       inputType: typeof req.body.input,
       inputIsArray: Array.isArray(req.body.input),
@@ -1354,7 +1459,7 @@ router.post("/responses", async (req, res) => {
     // Convert Responses API to Chat Completions format
     const chatRequest = convertResponsesToChat(req.body);
-    logger.info({
+    logger.debug({
       chatRequestMessageCount: chatRequest.messages?.length,
       chatRequestMessages: chatRequest.messages?.map(m => ({
         role: m.role,
@@ -1366,7 +1471,7 @@ router.post("/responses", async (req, res) => {
     // Convert to Anthropic format
     const anthropicRequest = convertOpenAIToAnthropic(chatRequest);
-    logger.info({
+    logger.debug({
       anthropicMessageCount: anthropicRequest.messages?.length,
       anthropicMessages: anthropicRequest.messages?.map(m => ({
         role: m.role,
@@ -1374,6 +1479,26 @@ router.post("/responses", async (req, res) => {
       }))
     }, "After Chat→Anthropic conversion");
+    // Inject tools if client didn't send any (same two-layer filtering as chat/completions).
+    // Skip injection if client explicitly opted out (tool_choice: "none" or empty tools array).
+    const clientType = detectClient(req.headers);
+    const clientExplicitlyDisabledTools = req.body.tool_choice === "none" || Array.isArray(req.body.tools);
+    if (!clientExplicitlyDisabledTools && (!anthropicRequest.tools || anthropicRequest.tools.length === 0)) {
+      const clientMappings = CLIENT_TOOL_MAPPINGS[clientType];
+      const clientTools = clientMappings
+        ? IDE_SAFE_TOOLS.filter(t => clientMappings[t.name])
+        : IDE_SAFE_TOOLS;
+      anthropicRequest.tools = clientTools;
+      logger.debug({
+        clientType,
+        injectedToolCount: clientTools.length,
+        injectedToolNames: clientTools.map(t => t.name),
+        reason: clientMappings
+          ? `Known client '${clientType}' — filtered to mapped tools only`
+          : "Unknown client — injecting full IDE_SAFE_TOOLS"
+      }, "=== INJECTING TOOLS (responses) ===");
+    }
     // Get session
     const session = getSession(sessionId);
@@ -1400,7 +1525,7 @@ router.post("/responses", async (req, res) => {
         });
         // Debug: Log what orchestrator returned
-        logger.info({
+        logger.debug({
           hasResult: !!result,
           hasBody: !!result?.body,
           bodyKeys: result?.body ? Object.keys(result.body) : null,
@@ -1412,7 +1537,7 @@ router.post("/responses", async (req, res) => {
         // Convert back: Anthropic → OpenAI → Responses
         const chatResponse = convertAnthropicToOpenAI(result.body, req.body.model);
-        logger.info({
+        logger.debug({
           chatContent: chatResponse.choices?.[0]?.message?.content?.substring(0, 200),
           chatContentLength: chatResponse.choices?.[0]?.message?.content?.length || 0,
           hasToolCalls: !!chatResponse.choices?.[0]?.message?.tool_calls,
@@ -1433,7 +1558,7 @@ router.post("/responses", async (req, res) => {
         // Check if client is a known AI coding tool and map tool names accordingly
         const clientType = detectClient(req.headers);
         if (clientType !== "unknown" && toolCalls.length > 0) {
-          logger.info({
+          logger.debug({
             originalTools: toolCalls.map(t => t.function?.name),
             clientType,
             userAgent: req.headers["user-agent"]
@@ -1451,12 +1576,12 @@ router.post("/responses", async (req, res) => {
             };
           });
-          logger.info({
+          logger.debug({
             mappedTools: toolCalls.map(t => t.function?.name)
           }, `Tool names mapped for ${clientType}`);
         }
-        logger.info({
+        logger.debug({
           content: content.substring(0, 100),
           contentLength: content.length,
           toolCallCount: toolCalls.length,

package/src/api/providers-handler.js CHANGED Viewed

@@ -179,6 +179,20 @@ function getConfiguredProviders() {
     });
   }
+  // Check Moonshot AI (Kimi)
+  if (config.moonshot?.apiKey) {
+    providers.push({
+      name: "moonshot",
+      type: "moonshot-ai",
+      baseUrl: config.moonshot.endpoint || "https://api.moonshot.ai/v1",
+      enabled: true,
+      models: [
+        { id: config.moonshot.model || "kimi-k2-turbo-preview", name: "Configured Model" },
+        { id: "kimi-k2-turbo-preview", name: "Kimi K2 Turbo Preview" },
+      ]
+    });
+  }
   // Check Vertex AI (Google Cloud)
   if (config.vertex?.projectId) {
     const region = config.vertex.region || "us-east5";
@@ -369,7 +383,7 @@ router.get("/config", (req, res) => {
       model_provider: config.modelProvider?.type || "databricks",
       fallback_provider: config.modelProvider?.fallbackProvider || null,
       fallback_enabled: config.modelProvider?.fallbackEnabled || false,
-      prefer_ollama: config.modelProvider?.preferOllama || false,
+      tier_routing_enabled: config.modelTiers?.enabled || false,
       tool_execution_mode: config.toolExecutionMode || "server",
       configured_providers: providers.map(p => p.name),
       memory_enabled: config.memory?.enabled || false,