lynkr 7.2.5 → 8.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. package/README.md +3 -3
  2. package/config/model-tiers.json +89 -0
  3. package/install.sh +6 -1
  4. package/package.json +4 -2
  5. package/scripts/setup.js +0 -1
  6. package/src/agents/executor.js +14 -6
  7. package/src/api/middleware/session.js +15 -2
  8. package/src/api/openai-router.js +162 -37
  9. package/src/api/providers-handler.js +15 -1
  10. package/src/api/router.js +107 -2
  11. package/src/budget/index.js +4 -3
  12. package/src/clients/databricks.js +431 -234
  13. package/src/clients/gpt-utils.js +181 -0
  14. package/src/clients/ollama-utils.js +66 -140
  15. package/src/clients/routing.js +0 -1
  16. package/src/clients/standard-tools.js +99 -3
  17. package/src/config/index.js +133 -35
  18. package/src/context/toon.js +173 -0
  19. package/src/logger/index.js +23 -0
  20. package/src/orchestrator/index.js +688 -213
  21. package/src/routing/agentic-detector.js +320 -0
  22. package/src/routing/complexity-analyzer.js +202 -2
  23. package/src/routing/cost-optimizer.js +305 -0
  24. package/src/routing/index.js +168 -159
  25. package/src/routing/model-tiers.js +365 -0
  26. package/src/server.js +4 -14
  27. package/src/sessions/cleanup.js +3 -3
  28. package/src/sessions/record.js +10 -1
  29. package/src/sessions/store.js +7 -2
  30. package/src/tools/agent-task.js +48 -1
  31. package/src/tools/index.js +19 -2
  32. package/src/tools/lazy-loader.js +7 -0
  33. package/src/tools/tinyfish.js +358 -0
  34. package/src/tools/truncate.js +1 -0
  35. package/.github/FUNDING.yml +0 -15
  36. package/.github/workflows/README.md +0 -215
  37. package/.github/workflows/ci.yml +0 -69
  38. package/.github/workflows/index.yml +0 -62
  39. package/.github/workflows/web-tools-tests.yml +0 -56
  40. package/CITATIONS.bib +0 -6
  41. package/CLAWROUTER_ROUTING_PLAN.md +0 -910
  42. package/DEPLOYMENT.md +0 -1001
  43. package/LYNKR-TUI-PLAN.md +0 -984
  44. package/PERFORMANCE-REPORT.md +0 -866
  45. package/PLAN-per-client-model-routing.md +0 -252
  46. package/ROUTER_COMPARISON.md +0 -173
  47. package/TIER_ROUTING_PLAN.md +0 -771
  48. package/docs/42642f749da6234f41b6b425c3bb07c9.txt +0 -1
  49. package/docs/BingSiteAuth.xml +0 -4
  50. package/docs/docs-style.css +0 -478
  51. package/docs/docs.html +0 -197
  52. package/docs/google5be250e608e6da39.html +0 -1
  53. package/docs/index.html +0 -577
  54. package/docs/index.md +0 -577
  55. package/docs/robots.txt +0 -4
  56. package/docs/sitemap.xml +0 -44
  57. package/docs/style.css +0 -1223
  58. package/documentation/README.md +0 -100
  59. package/documentation/api.md +0 -806
  60. package/documentation/claude-code-cli.md +0 -672
  61. package/documentation/codex-cli.md +0 -397
  62. package/documentation/contributing.md +0 -571
  63. package/documentation/cursor-integration.md +0 -731
  64. package/documentation/docker.md +0 -867
  65. package/documentation/embeddings.md +0 -760
  66. package/documentation/faq.md +0 -659
  67. package/documentation/features.md +0 -396
  68. package/documentation/headroom.md +0 -519
  69. package/documentation/installation.md +0 -706
  70. package/documentation/memory-system.md +0 -476
  71. package/documentation/production.md +0 -601
  72. package/documentation/providers.md +0 -906
  73. package/documentation/testing.md +0 -629
  74. package/documentation/token-optimization.md +0 -323
  75. package/documentation/tools.md +0 -697
  76. package/documentation/troubleshooting.md +0 -893
  77. package/final-test.js +0 -33
  78. package/headroom-sidecar/config.py +0 -93
  79. package/headroom-sidecar/requirements.txt +0 -14
  80. package/headroom-sidecar/server.py +0 -451
  81. package/monitor-agents.sh +0 -31
  82. package/scripts/audit-log-reader.js +0 -399
  83. package/scripts/compact-dictionary.js +0 -204
  84. package/scripts/test-deduplication.js +0 -448
  85. package/src/db/database.sqlite +0 -0
  86. package/test/README.md +0 -212
  87. package/test/azure-openai-config.test.js +0 -204
  88. package/test/azure-openai-error-resilience.test.js +0 -238
  89. package/test/azure-openai-format-conversion.test.js +0 -354
  90. package/test/azure-openai-integration.test.js +0 -281
  91. package/test/azure-openai-routing.test.js +0 -177
  92. package/test/azure-openai-streaming.test.js +0 -171
  93. package/test/bedrock-integration.test.js +0 -471
  94. package/test/comprehensive-test-suite.js +0 -928
  95. package/test/config-validation.test.js +0 -207
  96. package/test/cursor-integration.test.js +0 -484
  97. package/test/format-conversion.test.js +0 -578
  98. package/test/hybrid-routing-integration.test.js +0 -254
  99. package/test/hybrid-routing-performance.test.js +0 -418
  100. package/test/llamacpp-integration.test.js +0 -863
  101. package/test/lmstudio-integration.test.js +0 -335
  102. package/test/memory/extractor.test.js +0 -398
  103. package/test/memory/retriever.test.js +0 -613
  104. package/test/memory/retriever.test.js.bak +0 -585
  105. package/test/memory/search.test.js +0 -537
  106. package/test/memory/search.test.js.bak +0 -389
  107. package/test/memory/store.test.js +0 -344
  108. package/test/memory/store.test.js.bak +0 -312
  109. package/test/memory/surprise.test.js +0 -300
  110. package/test/memory-performance.test.js +0 -472
  111. package/test/openai-integration.test.js +0 -686
  112. package/test/openrouter-error-resilience.test.js +0 -418
  113. package/test/passthrough-mode.test.js +0 -385
  114. package/test/performance-benchmark.js +0 -351
  115. package/test/performance-tests.js +0 -528
  116. package/test/routing.test.js +0 -219
  117. package/test/web-tools.test.js +0 -329
  118. package/test-agents-simple.js +0 -43
  119. package/test-cli-connection.sh +0 -33
  120. package/test-learning-unit.js +0 -126
  121. package/test-learning.js +0 -112
  122. package/test-parallel-agents.sh +0 -124
  123. package/test-parallel-direct.js +0 -155
  124. package/test-subagents.sh +0 -117
package/README.md CHANGED
@@ -50,7 +50,7 @@ Lynkr is a **self-hosted proxy server** that unlocks Claude Code CLI , Cursor ID
50
50
  npm install -g pino-pretty
51
51
  npm install -g lynkr
52
52
 
53
- lynk start
53
+ lynkr start
54
54
  ```
55
55
 
56
56
  **Option 2: Git Clone**
@@ -238,7 +238,7 @@ Lynkr supports [ClawdBot](https://github.com/openclaw/openclaw) via its OpenAI-c
238
238
 
239
239
  ### Getting Started
240
240
  - 📦 **[Installation Guide](documentation/installation.md)** - Detailed installation for all methods
241
- - ⚙️ **[Provider Configuration](documentation/providers.md)** - Complete setup for all 9+ providers
241
+ - ⚙️ **[Provider Configuration](documentation/providers.md)** - Complete setup for all 12+ providers
242
242
  - 🎯 **[Quick Start Examples](documentation/installation.md#quick-start-examples)** - Copy-paste configs
243
243
 
244
244
  ### IDE & CLI Integration
@@ -277,7 +277,7 @@ Lynkr supports [ClawdBot](https://github.com/openclaw/openclaw) via its OpenAI-c
277
277
 
278
278
  ## Key Features Highlights
279
279
 
280
- - ✅ **Multi-Provider Support** - 9+ providers including local (Ollama, llama.cpp) and cloud (Bedrock, Databricks, OpenRouter)
280
+ - ✅ **Multi-Provider Support** - 12+ providers including local (Ollama, llama.cpp) and cloud (Bedrock, Databricks, OpenRouter, Moonshot AI)
281
281
  - ✅ **60-80% Cost Reduction** - Token optimization with smart tool selection, prompt caching, memory deduplication
282
282
  - ✅ **100% Local Option** - Run completely offline with Ollama/llama.cpp (zero cloud dependencies)
283
283
  - ✅ **OpenAI Compatible** - Works with Cursor IDE, Continue.dev, and any OpenAI-compatible client
@@ -0,0 +1,89 @@
1
+ {
2
+ "tiers": {
3
+ "SIMPLE": {
4
+ "description": "Greetings, simple Q&A, confirmations, basic lookups",
5
+ "range": [0, 25],
6
+ "priority": 1,
7
+ "preferred": {
8
+ "ollama": ["llama3.2", "gemma2", "phi3", "qwen2.5:7b", "mistral"],
9
+ "llamacpp": ["default"],
10
+ "lmstudio": ["default"],
11
+ "openai": ["gpt-4o-mini", "gpt-3.5-turbo"],
12
+ "azure-openai": ["gpt-4o-mini", "gpt-35-turbo"],
13
+ "anthropic": ["claude-3-haiku-20240307", "claude-3-5-haiku-20241022"],
14
+ "bedrock": ["anthropic.claude-3-haiku-20240307-v1:0", "amazon.nova-lite-v1:0"],
15
+ "databricks": ["databricks-claude-haiku-4-5", "databricks-gpt-5-nano"],
16
+ "google": ["gemini-2.0-flash", "gemini-1.5-flash"],
17
+ "openrouter": ["google/gemini-flash-1.5", "deepseek/deepseek-chat"],
18
+ "zai": ["GLM-4-Flash"],
19
+ "moonshot": ["kimi-k2-turbo-preview"]
20
+ }
21
+ },
22
+ "MEDIUM": {
23
+ "description": "Code reading, simple edits, research, documentation",
24
+ "range": [26, 50],
25
+ "priority": 2,
26
+ "preferred": {
27
+ "ollama": ["qwen2.5:32b", "deepseek-coder:33b", "codellama:34b"],
28
+ "llamacpp": ["default"],
29
+ "lmstudio": ["default"],
30
+ "openai": ["gpt-4o", "gpt-4-turbo"],
31
+ "azure-openai": ["gpt-4o", "gpt-4"],
32
+ "anthropic": ["claude-sonnet-4-20250514", "claude-3-5-sonnet-20241022"],
33
+ "bedrock": ["anthropic.claude-3-5-sonnet-20241022-v2:0", "amazon.nova-pro-v1:0"],
34
+ "databricks": ["databricks-claude-sonnet-4-5", "databricks-gpt-5-1"],
35
+ "google": ["gemini-1.5-pro", "gemini-2.0-pro"],
36
+ "openrouter": ["anthropic/claude-3.5-sonnet", "openai/gpt-4o"],
37
+ "zai": ["GLM-4.7"],
38
+ "moonshot": ["kimi-k2-turbo-preview"]
39
+ }
40
+ },
41
+ "COMPLEX": {
42
+ "description": "Multi-file changes, debugging, architecture, refactoring",
43
+ "range": [51, 75],
44
+ "priority": 3,
45
+ "preferred": {
46
+ "ollama": ["qwen2.5:72b", "llama3.1:70b", "deepseek-coder-v2:236b"],
47
+ "openai": ["o1-mini", "o3-mini", "gpt-4o"],
48
+ "azure-openai": ["o1-mini", "gpt-4o"],
49
+ "anthropic": ["claude-sonnet-4-20250514", "claude-3-5-sonnet-20241022"],
50
+ "bedrock": ["anthropic.claude-3-5-sonnet-20241022-v2:0"],
51
+ "databricks": ["databricks-claude-sonnet-4-5", "databricks-gpt-5-1-codex-max"],
52
+ "google": ["gemini-2.5-pro", "gemini-1.5-pro"],
53
+ "openrouter": ["anthropic/claude-3.5-sonnet", "meta-llama/llama-3.1-405b"],
54
+ "zai": ["GLM-4.7"],
55
+ "moonshot": ["kimi-k2-turbo-preview"]
56
+ }
57
+ },
58
+ "REASONING": {
59
+ "description": "Complex analysis, security audits, novel problems, deep thinking",
60
+ "range": [76, 100],
61
+ "priority": 4,
62
+ "preferred": {
63
+ "openai": ["o1", "o1-pro", "o3"],
64
+ "azure-openai": ["o1", "o1-pro"],
65
+ "anthropic": ["claude-opus-4-20250514", "claude-3-opus-20240229"],
66
+ "bedrock": ["anthropic.claude-3-opus-20240229-v1:0"],
67
+ "databricks": ["databricks-claude-opus-4-6", "databricks-claude-opus-4-5", "databricks-gpt-5-2"],
68
+ "google": ["gemini-2.5-pro"],
69
+ "openrouter": ["anthropic/claude-3-opus", "deepseek/deepseek-reasoner", "openai/o1"],
70
+ "deepseek": ["deepseek-reasoner", "deepseek-r1"],
71
+ "moonshot": ["kimi-k2-thinking", "kimi-k2-turbo-preview"]
72
+ }
73
+ }
74
+ },
75
+ "localProviders": {
76
+ "ollama": { "free": true, "defaultTier": "SIMPLE" },
77
+ "llamacpp": { "free": true, "defaultTier": "SIMPLE" },
78
+ "lmstudio": { "free": true, "defaultTier": "SIMPLE" }
79
+ },
80
+ "providerAliases": {
81
+ "azure": "azure-openai",
82
+ "aws": "bedrock",
83
+ "amazon": "bedrock",
84
+ "claude": "anthropic",
85
+ "gemini": "google",
86
+ "vertex": "google",
87
+ "kimi": "moonshot"
88
+ }
89
+ }
package/install.sh CHANGED
@@ -134,10 +134,15 @@ MODEL_PROVIDER=ollama
134
134
  PORT=8080
135
135
 
136
136
  # Ollama Configuration (default for local development)
137
- PREFER_OLLAMA=true
138
137
  OLLAMA_MODEL=qwen2.5-coder:7b
139
138
  OLLAMA_ENDPOINT=http://localhost:11434
140
139
 
140
+ # Tier-based routing (uncomment and configure to enable)
141
+ # TIER_SIMPLE=ollama:qwen2.5-coder:7b
142
+ # TIER_MEDIUM=ollama:qwen2.5-coder:7b
143
+ # TIER_COMPLEX=ollama:qwen2.5-coder:7b
144
+ # TIER_REASONING=ollama:qwen2.5-coder:7b
145
+
141
146
  # Long-Term Memory System (Titans-Inspired) - Enabled by default
142
147
  MEMORY_ENABLED=true
143
148
  MEMORY_RETRIEVAL_LIMIT=5
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "lynkr",
3
- "version": "7.2.5",
3
+ "version": "8.0.1",
4
4
  "description": "Self-hosted Claude Code & Cursor proxy with Databricks,AWS BedRock,Azure adapters, openrouter, Ollama,llamacpp,LM Studio, workspace tooling, and MCP integration.",
5
5
  "main": "index.js",
6
6
  "bin": {
@@ -14,7 +14,7 @@
14
14
  "dev": "nodemon index.js",
15
15
  "lint": "eslint src index.js",
16
16
  "test": "npm run test:unit && npm run test:performance",
17
- "test:unit": "DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node --test test/routing.test.js test/hybrid-routing-integration.test.js test/web-tools.test.js test/passthrough-mode.test.js test/openrouter-error-resilience.test.js test/format-conversion.test.js test/azure-openai-config.test.js test/azure-openai-format-conversion.test.js test/azure-openai-routing.test.js test/azure-openai-streaming.test.js test/azure-openai-error-resilience.test.js test/azure-openai-integration.test.js test/openai-integration.test.js test/llamacpp-integration.test.js test/memory/store.test.js test/memory/surprise.test.js test/memory/extractor.test.js test/memory/search.test.js test/memory/retriever.test.js",
17
+ "test:unit": "DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node --test test/routing.test.js test/hybrid-routing-integration.test.js test/web-tools.test.js test/passthrough-mode.test.js test/openrouter-error-resilience.test.js test/format-conversion.test.js test/azure-openai-config.test.js test/azure-openai-format-conversion.test.js test/azure-openai-routing.test.js test/azure-openai-streaming.test.js test/azure-openai-error-resilience.test.js test/azure-openai-integration.test.js test/openai-integration.test.js test/toon-compression.test.js test/llamacpp-integration.test.js test/memory/store.test.js test/memory/surprise.test.js test/memory/extractor.test.js test/memory/search.test.js test/memory/retriever.test.js",
18
18
  "test:memory": "DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node --test test/memory/store.test.js test/memory/surprise.test.js test/memory/extractor.test.js test/memory/search.test.js test/memory/retriever.test.js",
19
19
  "test:new-features": "DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node --test test/passthrough-mode.test.js test/openrouter-error-resilience.test.js test/format-conversion.test.js",
20
20
  "test:performance": "DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node test/hybrid-routing-performance.test.js && DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node test/performance-tests.js",
@@ -47,6 +47,7 @@
47
47
  "@azure/openai": "^2.0.0",
48
48
  "@babel/parser": "^7.29.0",
49
49
  "@babel/traverse": "^7.29.0",
50
+ "@toon-format/toon": "^2.1.0",
50
51
  "compression": "^1.7.4",
51
52
  "diff": "^5.2.0",
52
53
  "dotenv": "^16.4.5",
@@ -57,6 +58,7 @@
57
58
  "openai": "^6.14.0",
58
59
  "pino": "^8.17.2",
59
60
  "pino-http": "^8.6.0",
61
+ "pino-roll": "^4.0.0",
60
62
  "undici": "^6.22.0"
61
63
  },
62
64
  "optionalDependencies": {
package/scripts/setup.js CHANGED
@@ -251,7 +251,6 @@ async function createEnvFile() {
251
251
  if (ollamaOnly) {
252
252
  let envContent = fs.readFileSync(envPath, "utf-8");
253
253
  envContent = envContent.replace(/^# MODEL_PROVIDER=databricks/m, "MODEL_PROVIDER=ollama");
254
- envContent = envContent.replace(/^PREFER_OLLAMA=true/m, "# PREFER_OLLAMA=true # Not needed when MODEL_PROVIDER=ollama");
255
254
  envContent = envContent.replace(/^FALLBACK_ENABLED=true/m, "FALLBACK_ENABLED=false");
256
255
  fs.writeFileSync(envPath, envContent);
257
256
  }
@@ -162,14 +162,22 @@ class SubagentExecutor {
162
162
  payload.tools = filteredTools;
163
163
  }
164
164
 
165
- // Determine provider based on model - subagents should use the specified model
165
+ // Determine provider based on model family.
166
+ // Subagents should use the currently configured MODEL_PROVIDER and avoid
167
+ // hard-fallbacks to Azure when Azure is not selected/configured.
166
168
  let forceProvider = null;
167
- if (payload.model?.includes('claude') || payload.model?.includes('sonnet') || payload.model?.includes('haiku') || payload.model?.includes('opus')) {
168
- // Route Claude models to the configured Claude provider (azure-openai, databricks, etc.)
169
+ const modelLower = String(payload.model || "").toLowerCase();
170
+ const isClaudeFamilyModel =
171
+ modelLower.includes("claude") ||
172
+ modelLower.includes("sonnet") ||
173
+ modelLower.includes("haiku") ||
174
+ modelLower.includes("opus");
175
+ const isGptFamilyModel = modelLower.includes("gpt");
176
+
177
+ if (isClaudeFamilyModel || isGptFamilyModel) {
169
178
  const config = require('../config');
170
- forceProvider = config.modelProvider?.provider || 'azure-openai';
171
- } else if (payload.model?.includes('gpt')) {
172
- forceProvider = 'azure-openai';
179
+ // `type` is the canonical key; `provider` kept as legacy fallback.
180
+ forceProvider = config.modelProvider?.type || config.modelProvider?.provider || null;
173
181
  }
174
182
 
175
183
  logger.debug({
@@ -45,8 +45,21 @@ function sessionMiddleware(req, res, next) {
45
45
  // Add sessionId to logger context for this request
46
46
  req.log = logger.child({ sessionId });
47
47
 
48
- const session = getOrCreateSession(sessionId);
49
- req.session = session;
48
+ // Skip DB persistence for auto-generated (ephemeral) session IDs.
49
+ // These are created when the client doesn't send a session header,
50
+ // so storing them just bloats the DB with throwaway records.
51
+ if (req.generatedSessionId) {
52
+ req.session = {
53
+ id: sessionId,
54
+ createdAt: Date.now(),
55
+ updatedAt: Date.now(),
56
+ metadata: {},
57
+ history: [],
58
+ _ephemeral: true,
59
+ };
60
+ } else {
61
+ req.session = getOrCreateSession(sessionId);
62
+ }
50
63
  return next();
51
64
  } catch (err) {
52
65
  return next(err);
@@ -21,9 +21,9 @@ const orchestrator = require("../orchestrator");
21
21
  const { getSession } = require("../sessions");
22
22
  const {
23
23
  convertOpenAIToAnthropic,
24
- convertAnthropicToOpenAI,
25
- convertAnthropicStreamChunkToOpenAI
24
+ convertAnthropicToOpenAI
26
25
  } = require("../clients/openai-format");
26
+ const { IDE_SAFE_TOOLS } = require("../clients/standard-tools");
27
27
 
28
28
  const router = express.Router();
29
29
 
@@ -60,13 +60,14 @@ function detectClient(headers) {
60
60
  */
61
61
  const CLIENT_TOOL_MAPPINGS = {
62
62
  // ============== CODEX CLI ==============
63
- // Tools: shell_command, read_file, write_file, apply_patch, glob_file_search, rg, list_dir
63
+ // Confirmed tools: shell, apply_patch, read_file, write_file, list_dir, glob_file_search,
64
+ // rg, web_search, update_plan, view_image, memory
65
+ // NOT supported: spawn_agent/spawn_thread (Task has no Codex equivalent)
64
66
  codex: {
65
67
  "Bash": {
66
- name: "shell_command",
68
+ name: "shell",
67
69
  mapArgs: (a) => ({
68
- command: a.command || "",
69
- workdir: a.cwd || a.working_directory
70
+ command: ["bash", "-c", a.command || ""]
70
71
  })
71
72
  },
72
73
  "Read": {
@@ -113,6 +114,25 @@ const CLIENT_TOOL_MAPPINGS = {
113
114
  mapArgs: (a) => ({
114
115
  path: a.path || a.directory
115
116
  })
117
+ },
118
+ "TodoWrite": {
119
+ name: "update_plan",
120
+ mapArgs: (a) => ({
121
+ todos: a.todos || []
122
+ })
123
+ },
124
+ "WebSearch": {
125
+ name: "web_search",
126
+ mapArgs: (a) => ({
127
+ query: a.query || ""
128
+ })
129
+ },
130
+ "WebAgent": {
131
+ name: "web_agent",
132
+ mapArgs: (a) => ({
133
+ url: a.url || "",
134
+ goal: a.goal || ""
135
+ })
116
136
  }
117
137
  },
118
138
 
@@ -168,6 +188,13 @@ const CLIENT_TOOL_MAPPINGS = {
168
188
  path: a.path || a.directory || ".",
169
189
  recursive: false
170
190
  })
191
+ },
192
+ "WebAgent": {
193
+ name: "web_agent",
194
+ mapArgs: (a) => ({
195
+ url: a.url || "",
196
+ goal: a.goal || ""
197
+ })
171
198
  }
172
199
  },
173
200
 
@@ -224,6 +251,13 @@ const CLIENT_TOOL_MAPPINGS = {
224
251
  path: a.path || a.directory || ".",
225
252
  recursive: false
226
253
  })
254
+ },
255
+ "WebAgent": {
256
+ name: "web_agent",
257
+ mapArgs: (a) => ({
258
+ url: a.url || "",
259
+ goal: a.goal || ""
260
+ })
227
261
  }
228
262
  },
229
263
 
@@ -274,6 +308,13 @@ const CLIENT_TOOL_MAPPINGS = {
274
308
  mapArgs: (a) => ({
275
309
  filepath: a.path || a.directory || "."
276
310
  })
311
+ },
312
+ "WebAgent": {
313
+ name: "web_agent",
314
+ mapArgs: (a) => ({
315
+ url: a.url || "",
316
+ goal: a.goal || ""
317
+ })
277
318
  }
278
319
  }
279
320
  };
@@ -321,14 +362,7 @@ function mapToolForClient(toolName, argsJson, clientType) {
321
362
  };
322
363
  }
323
364
 
324
- /**
325
- * Check if client is a known AI coding tool that needs tool mapping
326
- * @param {Object} headers - Request headers
327
- * @returns {boolean}
328
- */
329
- function isKnownClient(headers) {
330
- return detectClient(headers) !== "unknown";
331
- }
365
+
332
366
 
333
367
  /**
334
368
  * POST /v1/chat/completions
@@ -374,7 +408,7 @@ router.post("/chat/completions", async (req, res) => {
374
408
  : JSON.stringify(m.content).substring(0, 200)
375
409
  }));
376
410
 
377
- logger.info({
411
+ logger.debug({
378
412
  endpoint: "/v1/chat/completions",
379
413
  model: req.body.model,
380
414
  messageCount: req.body.messages?.length,
@@ -393,6 +427,32 @@ router.post("/chat/completions", async (req, res) => {
393
427
  // Convert OpenAI request to Anthropic format
394
428
  const anthropicRequest = convertOpenAIToAnthropic(req.body);
395
429
 
430
+ // Inject tools if client didn't send any.
431
+ // Two-layer filtering:
432
+ // 1. IDE_SAFE_TOOLS = STANDARD_TOOLS minus AskUserQuestion (can't work through proxy)
433
+ // 2. For known clients (codex, cline, etc.), further filter to only tools
434
+ // that have a mapping in CLIENT_TOOL_MAPPINGS — this ensures clients like
435
+ // Codex don't see tools they can't handle (Task, WebFetch, NotebookEdit)
436
+ // while Claude Code (unknown client) gets the full IDE_SAFE_TOOLS set.
437
+ // Skip injection if client explicitly opted out (tool_choice: "none" or empty tools array).
438
+ const clientType = detectClient(req.headers);
439
+ const clientExplicitlyDisabledTools = req.body.tool_choice === "none" || Array.isArray(req.body.tools);
440
+ if (!clientExplicitlyDisabledTools && (!anthropicRequest.tools || anthropicRequest.tools.length === 0)) {
441
+ const clientMappings = CLIENT_TOOL_MAPPINGS[clientType];
442
+ const clientTools = clientMappings
443
+ ? IDE_SAFE_TOOLS.filter(t => clientMappings[t.name])
444
+ : IDE_SAFE_TOOLS;
445
+ anthropicRequest.tools = clientTools;
446
+ logger.debug({
447
+ clientType,
448
+ injectedToolCount: clientTools.length,
449
+ injectedToolNames: clientTools.map(t => t.name),
450
+ reason: clientMappings
451
+ ? `Known client '${clientType}' — filtered to mapped tools only`
452
+ : "Unknown client — injecting full IDE_SAFE_TOOLS"
453
+ }, "=== INJECTING TOOLS ===");
454
+ }
455
+
396
456
  // Get or create session
397
457
  const session = getSession(sessionId);
398
458
 
@@ -420,7 +480,7 @@ router.post("/chat/completions", async (req, res) => {
420
480
  });
421
481
 
422
482
  // Check if we have a valid response body
423
- logger.info({
483
+ logger.debug({
424
484
  hasResult: !!result,
425
485
  resultKeys: result ? Object.keys(result) : null,
426
486
  hasBody: result && !!result.body,
@@ -442,7 +502,7 @@ router.post("/chat/completions", async (req, res) => {
442
502
  const openaiResponse = convertAnthropicToOpenAI(result.body, req.body.model);
443
503
 
444
504
  // Debug: Log what we're about to stream
445
- logger.info({
505
+ logger.debug({
446
506
  openaiResponseId: openaiResponse.id,
447
507
  messageContent: openaiResponse.choices[0]?.message?.content?.substring(0, 100),
448
508
  contentLength: openaiResponse.choices[0]?.message?.content?.length || 0,
@@ -454,7 +514,25 @@ router.post("/chat/completions", async (req, res) => {
454
514
 
455
515
  // Simulate streaming by sending the complete response as chunks
456
516
  const content = openaiResponse.choices[0].message.content || "";
457
- const toolCalls = openaiResponse.choices[0].message.tool_calls;
517
+ let toolCalls = openaiResponse.choices[0].message.tool_calls;
518
+
519
+ // Map tool names for known IDE clients
520
+ if (clientType !== "unknown" && toolCalls && toolCalls.length > 0) {
521
+ toolCalls = toolCalls.map(tc => {
522
+ const mapped = mapToolForClient(tc.function?.name || "", tc.function?.arguments || "{}", clientType);
523
+ return {
524
+ ...tc,
525
+ function: {
526
+ name: mapped.name,
527
+ arguments: mapped.arguments
528
+ }
529
+ };
530
+ });
531
+ logger.debug({
532
+ mappedTools: toolCalls.map(t => t.function?.name),
533
+ clientType
534
+ }, "Tool names mapped for streaming chat/completions");
535
+ }
458
536
 
459
537
  // Send start chunk with role
460
538
  const startChunk = {
@@ -493,7 +571,7 @@ router.post("/chat/completions", async (req, res) => {
493
571
  }]
494
572
  };
495
573
  const contentWriteOk = res.write(`data: ${JSON.stringify(contentChunk)}\n\n`);
496
- logger.info({ contentPreview: content.substring(0, 50), writeOk: contentWriteOk }, "Sent content chunk");
574
+ logger.debug({ contentPreview: content.substring(0, 50), writeOk: contentWriteOk }, "Sent content chunk");
497
575
  }
498
576
 
499
577
  // Send tool calls if present
@@ -545,7 +623,7 @@ router.post("/chat/completions", async (req, res) => {
545
623
  res.write("data: [DONE]\n\n");
546
624
 
547
625
  // Ensure data is flushed before ending
548
- logger.info({ contentLength: content.length, contentPreview: content.substring(0, 50) }, "=== SSE STREAM COMPLETE ===");
626
+ logger.debug({ contentLength: content.length, contentPreview: content.substring(0, 50) }, "=== SSE STREAM COMPLETE ===");
549
627
  res.end();
550
628
 
551
629
  logger.info({
@@ -558,10 +636,7 @@ router.post("/chat/completions", async (req, res) => {
558
636
  } catch (streamError) {
559
637
  logger.error({
560
638
  error: streamError.message,
561
- stack: streamError.stack,
562
- resultWasNull: !result,
563
- resultBodyWasNull: result && !result.body,
564
- resultKeys: result ? Object.keys(result) : null
639
+ stack: streamError.stack
565
640
  }, "=== STREAMING ERROR ===");
566
641
 
567
642
  // Send error in OpenAI streaming format
@@ -606,6 +681,24 @@ router.post("/chat/completions", async (req, res) => {
606
681
  // Convert Anthropic response to OpenAI format
607
682
  const openaiResponse = convertAnthropicToOpenAI(result.body, req.body.model);
608
683
 
684
+ // Map tool names for known IDE clients
685
+ if (clientType !== "unknown" && openaiResponse.choices?.[0]?.message?.tool_calls?.length > 0) {
686
+ openaiResponse.choices[0].message.tool_calls = openaiResponse.choices[0].message.tool_calls.map(tc => {
687
+ const mapped = mapToolForClient(tc.function?.name || "", tc.function?.arguments || "{}", clientType);
688
+ return {
689
+ ...tc,
690
+ function: {
691
+ name: mapped.name,
692
+ arguments: mapped.arguments
693
+ }
694
+ };
695
+ });
696
+ logger.debug({
697
+ mappedTools: openaiResponse.choices[0].message.tool_calls.map(t => t.function?.name),
698
+ clientType
699
+ }, "Tool names mapped for non-streaming chat/completions");
700
+ }
701
+
609
702
  logger.info({
610
703
  duration: Date.now() - startTime,
611
704
  mode: "non-streaming",
@@ -767,6 +860,18 @@ function getConfiguredProviders() {
767
860
  });
768
861
  }
769
862
 
863
+ // Check Moonshot AI (Kimi)
864
+ if (config.moonshot?.apiKey) {
865
+ providers.push({
866
+ name: "moonshot",
867
+ type: "moonshot-ai",
868
+ models: [
869
+ config.moonshot.model || "kimi-k2-turbo-preview",
870
+ "kimi-k2-turbo-preview"
871
+ ]
872
+ });
873
+ }
874
+
770
875
  // Check Vertex AI (Google Cloud)
771
876
  if (config.vertex?.projectId) {
772
877
  providers.push({
@@ -1013,7 +1118,7 @@ function determineEmbeddingProvider(requestedModel = null) {
1013
1118
  async function generateOllamaEmbeddings(inputs, embeddingConfig) {
1014
1119
  const { model, endpoint } = embeddingConfig;
1015
1120
 
1016
- logger.info({
1121
+ logger.debug({
1017
1122
  model,
1018
1123
  endpoint,
1019
1124
  inputCount: inputs.length
@@ -1079,7 +1184,7 @@ async function generateOllamaEmbeddings(inputs, embeddingConfig) {
1079
1184
  async function generateLlamaCppEmbeddings(inputs, embeddingConfig) {
1080
1185
  const { model, endpoint } = embeddingConfig;
1081
1186
 
1082
- logger.info({
1187
+ logger.debug({
1083
1188
  model,
1084
1189
  endpoint,
1085
1190
  inputCount: inputs.length
@@ -1147,7 +1252,7 @@ async function generateLlamaCppEmbeddings(inputs, embeddingConfig) {
1147
1252
  async function generateOpenRouterEmbeddings(inputs, embeddingConfig) {
1148
1253
  const { model, apiKey, endpoint } = embeddingConfig;
1149
1254
 
1150
- logger.info({
1255
+ logger.debug({
1151
1256
  model,
1152
1257
  inputCount: inputs.length
1153
1258
  }, "Generating embeddings with OpenRouter");
@@ -1181,7 +1286,7 @@ async function generateOpenRouterEmbeddings(inputs, embeddingConfig) {
1181
1286
  async function generateOpenAIEmbeddings(inputs, embeddingConfig) {
1182
1287
  const { model, apiKey, endpoint } = embeddingConfig;
1183
1288
 
1184
- logger.info({
1289
+ logger.debug({
1185
1290
  model,
1186
1291
  inputCount: inputs.length
1187
1292
  }, "Generating embeddings with OpenAI");
@@ -1233,7 +1338,7 @@ router.post("/embeddings", async (req, res) => {
1233
1338
  // Convert input to array if string
1234
1339
  const inputs = Array.isArray(input) ? input : [input];
1235
1340
 
1236
- logger.info({
1341
+ logger.debug({
1237
1342
  endpoint: "/v1/embeddings",
1238
1343
  model: model || "auto-detect",
1239
1344
  inputCount: inputs.length,
@@ -1335,7 +1440,7 @@ router.post("/responses", async (req, res) => {
1335
1440
  const { convertResponsesToChat, convertChatToResponses } = require("../clients/responses-format");
1336
1441
 
1337
1442
  // Comprehensive debug logging
1338
- logger.info({
1443
+ logger.debug({
1339
1444
  endpoint: "/v1/responses",
1340
1445
  inputType: typeof req.body.input,
1341
1446
  inputIsArray: Array.isArray(req.body.input),
@@ -1354,7 +1459,7 @@ router.post("/responses", async (req, res) => {
1354
1459
  // Convert Responses API to Chat Completions format
1355
1460
  const chatRequest = convertResponsesToChat(req.body);
1356
1461
 
1357
- logger.info({
1462
+ logger.debug({
1358
1463
  chatRequestMessageCount: chatRequest.messages?.length,
1359
1464
  chatRequestMessages: chatRequest.messages?.map(m => ({
1360
1465
  role: m.role,
@@ -1366,7 +1471,7 @@ router.post("/responses", async (req, res) => {
1366
1471
  // Convert to Anthropic format
1367
1472
  const anthropicRequest = convertOpenAIToAnthropic(chatRequest);
1368
1473
 
1369
- logger.info({
1474
+ logger.debug({
1370
1475
  anthropicMessageCount: anthropicRequest.messages?.length,
1371
1476
  anthropicMessages: anthropicRequest.messages?.map(m => ({
1372
1477
  role: m.role,
@@ -1374,6 +1479,26 @@ router.post("/responses", async (req, res) => {
1374
1479
  }))
1375
1480
  }, "After Chat→Anthropic conversion");
1376
1481
 
1482
+ // Inject tools if client didn't send any (same two-layer filtering as chat/completions).
1483
+ // Skip injection if client explicitly opted out (tool_choice: "none" or empty tools array).
1484
+ const clientType = detectClient(req.headers);
1485
+ const clientExplicitlyDisabledTools = req.body.tool_choice === "none" || Array.isArray(req.body.tools);
1486
+ if (!clientExplicitlyDisabledTools && (!anthropicRequest.tools || anthropicRequest.tools.length === 0)) {
1487
+ const clientMappings = CLIENT_TOOL_MAPPINGS[clientType];
1488
+ const clientTools = clientMappings
1489
+ ? IDE_SAFE_TOOLS.filter(t => clientMappings[t.name])
1490
+ : IDE_SAFE_TOOLS;
1491
+ anthropicRequest.tools = clientTools;
1492
+ logger.debug({
1493
+ clientType,
1494
+ injectedToolCount: clientTools.length,
1495
+ injectedToolNames: clientTools.map(t => t.name),
1496
+ reason: clientMappings
1497
+ ? `Known client '${clientType}' — filtered to mapped tools only`
1498
+ : "Unknown client — injecting full IDE_SAFE_TOOLS"
1499
+ }, "=== INJECTING TOOLS (responses) ===");
1500
+ }
1501
+
1377
1502
  // Get session
1378
1503
  const session = getSession(sessionId);
1379
1504
 
@@ -1400,7 +1525,7 @@ router.post("/responses", async (req, res) => {
1400
1525
  });
1401
1526
 
1402
1527
  // Debug: Log what orchestrator returned
1403
- logger.info({
1528
+ logger.debug({
1404
1529
  hasResult: !!result,
1405
1530
  hasBody: !!result?.body,
1406
1531
  bodyKeys: result?.body ? Object.keys(result.body) : null,
@@ -1412,7 +1537,7 @@ router.post("/responses", async (req, res) => {
1412
1537
  // Convert back: Anthropic → OpenAI → Responses
1413
1538
  const chatResponse = convertAnthropicToOpenAI(result.body, req.body.model);
1414
1539
 
1415
- logger.info({
1540
+ logger.debug({
1416
1541
  chatContent: chatResponse.choices?.[0]?.message?.content?.substring(0, 200),
1417
1542
  chatContentLength: chatResponse.choices?.[0]?.message?.content?.length || 0,
1418
1543
  hasToolCalls: !!chatResponse.choices?.[0]?.message?.tool_calls,
@@ -1433,7 +1558,7 @@ router.post("/responses", async (req, res) => {
1433
1558
  // Check if client is a known AI coding tool and map tool names accordingly
1434
1559
  const clientType = detectClient(req.headers);
1435
1560
  if (clientType !== "unknown" && toolCalls.length > 0) {
1436
- logger.info({
1561
+ logger.debug({
1437
1562
  originalTools: toolCalls.map(t => t.function?.name),
1438
1563
  clientType,
1439
1564
  userAgent: req.headers["user-agent"]
@@ -1451,12 +1576,12 @@ router.post("/responses", async (req, res) => {
1451
1576
  };
1452
1577
  });
1453
1578
 
1454
- logger.info({
1579
+ logger.debug({
1455
1580
  mappedTools: toolCalls.map(t => t.function?.name)
1456
1581
  }, `Tool names mapped for ${clientType}`);
1457
1582
  }
1458
1583
 
1459
- logger.info({
1584
+ logger.debug({
1460
1585
  content: content.substring(0, 100),
1461
1586
  contentLength: content.length,
1462
1587
  toolCallCount: toolCalls.length,
@@ -179,6 +179,20 @@ function getConfiguredProviders() {
179
179
  });
180
180
  }
181
181
 
182
+ // Check Moonshot AI (Kimi)
183
+ if (config.moonshot?.apiKey) {
184
+ providers.push({
185
+ name: "moonshot",
186
+ type: "moonshot-ai",
187
+ baseUrl: config.moonshot.endpoint || "https://api.moonshot.ai/v1",
188
+ enabled: true,
189
+ models: [
190
+ { id: config.moonshot.model || "kimi-k2-turbo-preview", name: "Configured Model" },
191
+ { id: "kimi-k2-turbo-preview", name: "Kimi K2 Turbo Preview" },
192
+ ]
193
+ });
194
+ }
195
+
182
196
  // Check Vertex AI (Google Cloud)
183
197
  if (config.vertex?.projectId) {
184
198
  const region = config.vertex.region || "us-east5";
@@ -369,7 +383,7 @@ router.get("/config", (req, res) => {
369
383
  model_provider: config.modelProvider?.type || "databricks",
370
384
  fallback_provider: config.modelProvider?.fallbackProvider || null,
371
385
  fallback_enabled: config.modelProvider?.fallbackEnabled || false,
372
- prefer_ollama: config.modelProvider?.preferOllama || false,
386
+ tier_routing_enabled: config.modelTiers?.enabled || false,
373
387
  tool_execution_mode: config.toolExecutionMode || "server",
374
388
  configured_providers: providers.map(p => p.name),
375
389
  memory_enabled: config.memory?.enabled || false,