lynkr 7.2.4 → 8.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/README.md +2 -2
  2. package/config/model-tiers.json +89 -0
  3. package/docs/docs.html +1 -0
  4. package/docs/index.md +7 -0
  5. package/docs/toon-integration-spec.md +130 -0
  6. package/documentation/README.md +3 -2
  7. package/documentation/claude-code-cli.md +23 -16
  8. package/documentation/cursor-integration.md +17 -14
  9. package/documentation/docker.md +11 -4
  10. package/documentation/embeddings.md +7 -5
  11. package/documentation/faq.md +66 -12
  12. package/documentation/features.md +22 -15
  13. package/documentation/installation.md +66 -14
  14. package/documentation/production.md +43 -8
  15. package/documentation/providers.md +145 -42
  16. package/documentation/routing.md +476 -0
  17. package/documentation/token-optimization.md +7 -5
  18. package/documentation/troubleshooting.md +81 -5
  19. package/install.sh +6 -1
  20. package/package.json +5 -3
  21. package/scripts/setup.js +0 -1
  22. package/src/agents/executor.js +14 -6
  23. package/src/api/middleware/session.js +15 -2
  24. package/src/api/openai-router.js +130 -37
  25. package/src/api/providers-handler.js +15 -1
  26. package/src/api/router.js +107 -2
  27. package/src/budget/index.js +4 -3
  28. package/src/clients/databricks.js +431 -234
  29. package/src/clients/gpt-utils.js +181 -0
  30. package/src/clients/ollama-utils.js +66 -140
  31. package/src/clients/routing.js +0 -1
  32. package/src/clients/standard-tools.js +82 -5
  33. package/src/config/index.js +119 -35
  34. package/src/context/toon.js +173 -0
  35. package/src/headroom/launcher.js +8 -3
  36. package/src/logger/index.js +23 -0
  37. package/src/orchestrator/index.js +765 -212
  38. package/src/routing/agentic-detector.js +320 -0
  39. package/src/routing/complexity-analyzer.js +202 -2
  40. package/src/routing/cost-optimizer.js +305 -0
  41. package/src/routing/index.js +168 -159
  42. package/src/routing/model-registry.js +437 -0
  43. package/src/routing/model-tiers.js +365 -0
  44. package/src/server.js +2 -2
  45. package/src/sessions/cleanup.js +3 -3
  46. package/src/sessions/record.js +10 -1
  47. package/src/sessions/store.js +7 -2
  48. package/src/tools/agent-task.js +48 -1
  49. package/src/tools/index.js +15 -2
  50. package/src/tools/workspace.js +35 -4
  51. package/src/workspace/index.js +30 -0
  52. package/te +11622 -0
  53. package/test/README.md +1 -1
  54. package/test/azure-openai-config.test.js +17 -8
  55. package/test/azure-openai-integration.test.js +7 -1
  56. package/test/azure-openai-routing.test.js +41 -43
  57. package/test/bedrock-integration.test.js +18 -32
  58. package/test/hybrid-routing-integration.test.js +35 -20
  59. package/test/hybrid-routing-performance.test.js +74 -64
  60. package/test/llamacpp-integration.test.js +28 -9
  61. package/test/lmstudio-integration.test.js +20 -8
  62. package/test/openai-integration.test.js +17 -20
  63. package/test/performance-tests.js +1 -1
  64. package/test/routing.test.js +65 -59
  65. package/test/toon-compression.test.js +131 -0
  66. package/CLAWROUTER_ROUTING_PLAN.md +0 -910
  67. package/ROUTER_COMPARISON.md +0 -173
  68. package/TIER_ROUTING_PLAN.md +0 -771
@@ -162,14 +162,22 @@ class SubagentExecutor {
162
162
  payload.tools = filteredTools;
163
163
  }
164
164
 
165
- // Determine provider based on model - subagents should use the specified model
165
+ // Determine provider based on model family.
166
+ // Subagents should use the currently configured MODEL_PROVIDER and avoid
167
+ // hard-fallbacks to Azure when Azure is not selected/configured.
166
168
  let forceProvider = null;
167
- if (payload.model?.includes('claude') || payload.model?.includes('sonnet') || payload.model?.includes('haiku') || payload.model?.includes('opus')) {
168
- // Route Claude models to the configured Claude provider (azure-openai, databricks, etc.)
169
+ const modelLower = String(payload.model || "").toLowerCase();
170
+ const isClaudeFamilyModel =
171
+ modelLower.includes("claude") ||
172
+ modelLower.includes("sonnet") ||
173
+ modelLower.includes("haiku") ||
174
+ modelLower.includes("opus");
175
+ const isGptFamilyModel = modelLower.includes("gpt");
176
+
177
+ if (isClaudeFamilyModel || isGptFamilyModel) {
169
178
  const config = require('../config');
170
- forceProvider = config.modelProvider?.provider || 'azure-openai';
171
- } else if (payload.model?.includes('gpt')) {
172
- forceProvider = 'azure-openai';
179
+ // `type` is the canonical key; `provider` kept as legacy fallback.
180
+ forceProvider = config.modelProvider?.type || config.modelProvider?.provider || null;
173
181
  }
174
182
 
175
183
  logger.debug({
@@ -45,8 +45,21 @@ function sessionMiddleware(req, res, next) {
45
45
  // Add sessionId to logger context for this request
46
46
  req.log = logger.child({ sessionId });
47
47
 
48
- const session = getOrCreateSession(sessionId);
49
- req.session = session;
48
+ // Skip DB persistence for auto-generated (ephemeral) session IDs.
49
+ // These are created when the client doesn't send a session header,
50
+ // so storing them just bloats the DB with throwaway records.
51
+ if (req.generatedSessionId) {
52
+ req.session = {
53
+ id: sessionId,
54
+ createdAt: Date.now(),
55
+ updatedAt: Date.now(),
56
+ metadata: {},
57
+ history: [],
58
+ _ephemeral: true,
59
+ };
60
+ } else {
61
+ req.session = getOrCreateSession(sessionId);
62
+ }
50
63
  return next();
51
64
  } catch (err) {
52
65
  return next(err);
@@ -21,9 +21,9 @@ const orchestrator = require("../orchestrator");
21
21
  const { getSession } = require("../sessions");
22
22
  const {
23
23
  convertOpenAIToAnthropic,
24
- convertAnthropicToOpenAI,
25
- convertAnthropicStreamChunkToOpenAI
24
+ convertAnthropicToOpenAI
26
25
  } = require("../clients/openai-format");
26
+ const { IDE_SAFE_TOOLS } = require("../clients/standard-tools");
27
27
 
28
28
  const router = express.Router();
29
29
 
@@ -60,13 +60,14 @@ function detectClient(headers) {
60
60
  */
61
61
  const CLIENT_TOOL_MAPPINGS = {
62
62
  // ============== CODEX CLI ==============
63
- // Tools: shell_command, read_file, write_file, apply_patch, glob_file_search, rg, list_dir
63
+ // Confirmed tools: shell, apply_patch, read_file, write_file, list_dir, glob_file_search,
64
+ // rg, web_search, update_plan, view_image, memory
65
+ // NOT supported: spawn_agent/spawn_thread (Task has no Codex equivalent)
64
66
  codex: {
65
67
  "Bash": {
66
- name: "shell_command",
68
+ name: "shell",
67
69
  mapArgs: (a) => ({
68
- command: a.command || "",
69
- workdir: a.cwd || a.working_directory
70
+ command: ["bash", "-c", a.command || ""]
70
71
  })
71
72
  },
72
73
  "Read": {
@@ -113,6 +114,18 @@ const CLIENT_TOOL_MAPPINGS = {
113
114
  mapArgs: (a) => ({
114
115
  path: a.path || a.directory
115
116
  })
117
+ },
118
+ "TodoWrite": {
119
+ name: "update_plan",
120
+ mapArgs: (a) => ({
121
+ todos: a.todos || []
122
+ })
123
+ },
124
+ "WebSearch": {
125
+ name: "web_search",
126
+ mapArgs: (a) => ({
127
+ query: a.query || ""
128
+ })
116
129
  }
117
130
  },
118
131
 
@@ -321,14 +334,7 @@ function mapToolForClient(toolName, argsJson, clientType) {
321
334
  };
322
335
  }
323
336
 
324
- /**
325
- * Check if client is a known AI coding tool that needs tool mapping
326
- * @param {Object} headers - Request headers
327
- * @returns {boolean}
328
- */
329
- function isKnownClient(headers) {
330
- return detectClient(headers) !== "unknown";
331
- }
337
+
332
338
 
333
339
  /**
334
340
  * POST /v1/chat/completions
@@ -374,7 +380,7 @@ router.post("/chat/completions", async (req, res) => {
374
380
  : JSON.stringify(m.content).substring(0, 200)
375
381
  }));
376
382
 
377
- logger.info({
383
+ logger.debug({
378
384
  endpoint: "/v1/chat/completions",
379
385
  model: req.body.model,
380
386
  messageCount: req.body.messages?.length,
@@ -393,6 +399,30 @@ router.post("/chat/completions", async (req, res) => {
393
399
  // Convert OpenAI request to Anthropic format
394
400
  const anthropicRequest = convertOpenAIToAnthropic(req.body);
395
401
 
402
+ // Inject tools if client didn't send any.
403
+ // Two-layer filtering:
404
+ // 1. IDE_SAFE_TOOLS = STANDARD_TOOLS minus AskUserQuestion (can't work through proxy)
405
+ // 2. For known clients (codex, cline, etc.), further filter to only tools
406
+ // that have a mapping in CLIENT_TOOL_MAPPINGS — this ensures clients like
407
+ // Codex don't see tools they can't handle (Task, WebFetch, NotebookEdit)
408
+ // while Claude Code (unknown client) gets the full IDE_SAFE_TOOLS set.
409
+ const clientType = detectClient(req.headers);
410
+ if (!anthropicRequest.tools || anthropicRequest.tools.length === 0) {
411
+ const clientMappings = CLIENT_TOOL_MAPPINGS[clientType];
412
+ const clientTools = clientMappings
413
+ ? IDE_SAFE_TOOLS.filter(t => clientMappings[t.name])
414
+ : IDE_SAFE_TOOLS;
415
+ anthropicRequest.tools = clientTools;
416
+ logger.debug({
417
+ clientType,
418
+ injectedToolCount: clientTools.length,
419
+ injectedToolNames: clientTools.map(t => t.name),
420
+ reason: clientMappings
421
+ ? `Known client '${clientType}' — filtered to mapped tools only`
422
+ : "Unknown client — injecting full IDE_SAFE_TOOLS"
423
+ }, "=== INJECTING TOOLS ===");
424
+ }
425
+
396
426
  // Get or create session
397
427
  const session = getSession(sessionId);
398
428
 
@@ -420,7 +450,7 @@ router.post("/chat/completions", async (req, res) => {
420
450
  });
421
451
 
422
452
  // Check if we have a valid response body
423
- logger.info({
453
+ logger.debug({
424
454
  hasResult: !!result,
425
455
  resultKeys: result ? Object.keys(result) : null,
426
456
  hasBody: result && !!result.body,
@@ -442,7 +472,7 @@ router.post("/chat/completions", async (req, res) => {
442
472
  const openaiResponse = convertAnthropicToOpenAI(result.body, req.body.model);
443
473
 
444
474
  // Debug: Log what we're about to stream
445
- logger.info({
475
+ logger.debug({
446
476
  openaiResponseId: openaiResponse.id,
447
477
  messageContent: openaiResponse.choices[0]?.message?.content?.substring(0, 100),
448
478
  contentLength: openaiResponse.choices[0]?.message?.content?.length || 0,
@@ -454,7 +484,25 @@ router.post("/chat/completions", async (req, res) => {
454
484
 
455
485
  // Simulate streaming by sending the complete response as chunks
456
486
  const content = openaiResponse.choices[0].message.content || "";
457
- const toolCalls = openaiResponse.choices[0].message.tool_calls;
487
+ let toolCalls = openaiResponse.choices[0].message.tool_calls;
488
+
489
+ // Map tool names for known IDE clients
490
+ if (clientType !== "unknown" && toolCalls && toolCalls.length > 0) {
491
+ toolCalls = toolCalls.map(tc => {
492
+ const mapped = mapToolForClient(tc.function?.name || "", tc.function?.arguments || "{}", clientType);
493
+ return {
494
+ ...tc,
495
+ function: {
496
+ name: mapped.name,
497
+ arguments: mapped.arguments
498
+ }
499
+ };
500
+ });
501
+ logger.debug({
502
+ mappedTools: toolCalls.map(t => t.function?.name),
503
+ clientType
504
+ }, "Tool names mapped for streaming chat/completions");
505
+ }
458
506
 
459
507
  // Send start chunk with role
460
508
  const startChunk = {
@@ -493,7 +541,7 @@ router.post("/chat/completions", async (req, res) => {
493
541
  }]
494
542
  };
495
543
  const contentWriteOk = res.write(`data: ${JSON.stringify(contentChunk)}\n\n`);
496
- logger.info({ contentPreview: content.substring(0, 50), writeOk: contentWriteOk }, "Sent content chunk");
544
+ logger.debug({ contentPreview: content.substring(0, 50), writeOk: contentWriteOk }, "Sent content chunk");
497
545
  }
498
546
 
499
547
  // Send tool calls if present
@@ -545,7 +593,7 @@ router.post("/chat/completions", async (req, res) => {
545
593
  res.write("data: [DONE]\n\n");
546
594
 
547
595
  // Ensure data is flushed before ending
548
- logger.info({ contentLength: content.length, contentPreview: content.substring(0, 50) }, "=== SSE STREAM COMPLETE ===");
596
+ logger.debug({ contentLength: content.length, contentPreview: content.substring(0, 50) }, "=== SSE STREAM COMPLETE ===");
549
597
  res.end();
550
598
 
551
599
  logger.info({
@@ -558,10 +606,7 @@ router.post("/chat/completions", async (req, res) => {
558
606
  } catch (streamError) {
559
607
  logger.error({
560
608
  error: streamError.message,
561
- stack: streamError.stack,
562
- resultWasNull: !result,
563
- resultBodyWasNull: result && !result.body,
564
- resultKeys: result ? Object.keys(result) : null
609
+ stack: streamError.stack
565
610
  }, "=== STREAMING ERROR ===");
566
611
 
567
612
  // Send error in OpenAI streaming format
@@ -606,6 +651,24 @@ router.post("/chat/completions", async (req, res) => {
606
651
  // Convert Anthropic response to OpenAI format
607
652
  const openaiResponse = convertAnthropicToOpenAI(result.body, req.body.model);
608
653
 
654
+ // Map tool names for known IDE clients
655
+ if (clientType !== "unknown" && openaiResponse.choices?.[0]?.message?.tool_calls?.length > 0) {
656
+ openaiResponse.choices[0].message.tool_calls = openaiResponse.choices[0].message.tool_calls.map(tc => {
657
+ const mapped = mapToolForClient(tc.function?.name || "", tc.function?.arguments || "{}", clientType);
658
+ return {
659
+ ...tc,
660
+ function: {
661
+ name: mapped.name,
662
+ arguments: mapped.arguments
663
+ }
664
+ };
665
+ });
666
+ logger.debug({
667
+ mappedTools: openaiResponse.choices[0].message.tool_calls.map(t => t.function?.name),
668
+ clientType
669
+ }, "Tool names mapped for non-streaming chat/completions");
670
+ }
671
+
609
672
  logger.info({
610
673
  duration: Date.now() - startTime,
611
674
  mode: "non-streaming",
@@ -767,6 +830,18 @@ function getConfiguredProviders() {
767
830
  });
768
831
  }
769
832
 
833
+ // Check Moonshot AI (Kimi)
834
+ if (config.moonshot?.apiKey) {
835
+ providers.push({
836
+ name: "moonshot",
837
+ type: "moonshot-ai",
838
+ models: [
839
+ config.moonshot.model || "kimi-k2-turbo-preview",
840
+ "kimi-k2-turbo-preview"
841
+ ]
842
+ });
843
+ }
844
+
770
845
  // Check Vertex AI (Google Cloud)
771
846
  if (config.vertex?.projectId) {
772
847
  providers.push({
@@ -1013,7 +1088,7 @@ function determineEmbeddingProvider(requestedModel = null) {
1013
1088
  async function generateOllamaEmbeddings(inputs, embeddingConfig) {
1014
1089
  const { model, endpoint } = embeddingConfig;
1015
1090
 
1016
- logger.info({
1091
+ logger.debug({
1017
1092
  model,
1018
1093
  endpoint,
1019
1094
  inputCount: inputs.length
@@ -1079,7 +1154,7 @@ async function generateOllamaEmbeddings(inputs, embeddingConfig) {
1079
1154
  async function generateLlamaCppEmbeddings(inputs, embeddingConfig) {
1080
1155
  const { model, endpoint } = embeddingConfig;
1081
1156
 
1082
- logger.info({
1157
+ logger.debug({
1083
1158
  model,
1084
1159
  endpoint,
1085
1160
  inputCount: inputs.length
@@ -1147,7 +1222,7 @@ async function generateLlamaCppEmbeddings(inputs, embeddingConfig) {
1147
1222
  async function generateOpenRouterEmbeddings(inputs, embeddingConfig) {
1148
1223
  const { model, apiKey, endpoint } = embeddingConfig;
1149
1224
 
1150
- logger.info({
1225
+ logger.debug({
1151
1226
  model,
1152
1227
  inputCount: inputs.length
1153
1228
  }, "Generating embeddings with OpenRouter");
@@ -1181,7 +1256,7 @@ async function generateOpenRouterEmbeddings(inputs, embeddingConfig) {
1181
1256
  async function generateOpenAIEmbeddings(inputs, embeddingConfig) {
1182
1257
  const { model, apiKey, endpoint } = embeddingConfig;
1183
1258
 
1184
- logger.info({
1259
+ logger.debug({
1185
1260
  model,
1186
1261
  inputCount: inputs.length
1187
1262
  }, "Generating embeddings with OpenAI");
@@ -1233,7 +1308,7 @@ router.post("/embeddings", async (req, res) => {
1233
1308
  // Convert input to array if string
1234
1309
  const inputs = Array.isArray(input) ? input : [input];
1235
1310
 
1236
- logger.info({
1311
+ logger.debug({
1237
1312
  endpoint: "/v1/embeddings",
1238
1313
  model: model || "auto-detect",
1239
1314
  inputCount: inputs.length,
@@ -1335,7 +1410,7 @@ router.post("/responses", async (req, res) => {
1335
1410
  const { convertResponsesToChat, convertChatToResponses } = require("../clients/responses-format");
1336
1411
 
1337
1412
  // Comprehensive debug logging
1338
- logger.info({
1413
+ logger.debug({
1339
1414
  endpoint: "/v1/responses",
1340
1415
  inputType: typeof req.body.input,
1341
1416
  inputIsArray: Array.isArray(req.body.input),
@@ -1354,7 +1429,7 @@ router.post("/responses", async (req, res) => {
1354
1429
  // Convert Responses API to Chat Completions format
1355
1430
  const chatRequest = convertResponsesToChat(req.body);
1356
1431
 
1357
- logger.info({
1432
+ logger.debug({
1358
1433
  chatRequestMessageCount: chatRequest.messages?.length,
1359
1434
  chatRequestMessages: chatRequest.messages?.map(m => ({
1360
1435
  role: m.role,
@@ -1366,7 +1441,7 @@ router.post("/responses", async (req, res) => {
1366
1441
  // Convert to Anthropic format
1367
1442
  const anthropicRequest = convertOpenAIToAnthropic(chatRequest);
1368
1443
 
1369
- logger.info({
1444
+ logger.debug({
1370
1445
  anthropicMessageCount: anthropicRequest.messages?.length,
1371
1446
  anthropicMessages: anthropicRequest.messages?.map(m => ({
1372
1447
  role: m.role,
@@ -1374,6 +1449,24 @@ router.post("/responses", async (req, res) => {
1374
1449
  }))
1375
1450
  }, "After Chat→Anthropic conversion");
1376
1451
 
1452
+ // Inject tools if client didn't send any (same two-layer filtering as chat/completions).
1453
+ const clientType = detectClient(req.headers);
1454
+ if (!anthropicRequest.tools || anthropicRequest.tools.length === 0) {
1455
+ const clientMappings = CLIENT_TOOL_MAPPINGS[clientType];
1456
+ const clientTools = clientMappings
1457
+ ? IDE_SAFE_TOOLS.filter(t => clientMappings[t.name])
1458
+ : IDE_SAFE_TOOLS;
1459
+ anthropicRequest.tools = clientTools;
1460
+ logger.debug({
1461
+ clientType,
1462
+ injectedToolCount: clientTools.length,
1463
+ injectedToolNames: clientTools.map(t => t.name),
1464
+ reason: clientMappings
1465
+ ? `Known client '${clientType}' — filtered to mapped tools only`
1466
+ : "Unknown client — injecting full IDE_SAFE_TOOLS"
1467
+ }, "=== INJECTING TOOLS (responses) ===");
1468
+ }
1469
+
1377
1470
  // Get session
1378
1471
  const session = getSession(sessionId);
1379
1472
 
@@ -1400,7 +1493,7 @@ router.post("/responses", async (req, res) => {
1400
1493
  });
1401
1494
 
1402
1495
  // Debug: Log what orchestrator returned
1403
- logger.info({
1496
+ logger.debug({
1404
1497
  hasResult: !!result,
1405
1498
  hasBody: !!result?.body,
1406
1499
  bodyKeys: result?.body ? Object.keys(result.body) : null,
@@ -1412,7 +1505,7 @@ router.post("/responses", async (req, res) => {
1412
1505
  // Convert back: Anthropic → OpenAI → Responses
1413
1506
  const chatResponse = convertAnthropicToOpenAI(result.body, req.body.model);
1414
1507
 
1415
- logger.info({
1508
+ logger.debug({
1416
1509
  chatContent: chatResponse.choices?.[0]?.message?.content?.substring(0, 200),
1417
1510
  chatContentLength: chatResponse.choices?.[0]?.message?.content?.length || 0,
1418
1511
  hasToolCalls: !!chatResponse.choices?.[0]?.message?.tool_calls,
@@ -1433,7 +1526,7 @@ router.post("/responses", async (req, res) => {
1433
1526
  // Check if client is a known AI coding tool and map tool names accordingly
1434
1527
  const clientType = detectClient(req.headers);
1435
1528
  if (clientType !== "unknown" && toolCalls.length > 0) {
1436
- logger.info({
1529
+ logger.debug({
1437
1530
  originalTools: toolCalls.map(t => t.function?.name),
1438
1531
  clientType,
1439
1532
  userAgent: req.headers["user-agent"]
@@ -1451,12 +1544,12 @@ router.post("/responses", async (req, res) => {
1451
1544
  };
1452
1545
  });
1453
1546
 
1454
- logger.info({
1547
+ logger.debug({
1455
1548
  mappedTools: toolCalls.map(t => t.function?.name)
1456
1549
  }, `Tool names mapped for ${clientType}`);
1457
1550
  }
1458
1551
 
1459
- logger.info({
1552
+ logger.debug({
1460
1553
  content: content.substring(0, 100),
1461
1554
  contentLength: content.length,
1462
1555
  toolCallCount: toolCalls.length,
@@ -179,6 +179,20 @@ function getConfiguredProviders() {
179
179
  });
180
180
  }
181
181
 
182
+ // Check Moonshot AI (Kimi)
183
+ if (config.moonshot?.apiKey) {
184
+ providers.push({
185
+ name: "moonshot",
186
+ type: "moonshot-ai",
187
+ baseUrl: config.moonshot.endpoint || "https://api.moonshot.ai/v1",
188
+ enabled: true,
189
+ models: [
190
+ { id: config.moonshot.model || "kimi-k2-turbo-preview", name: "Configured Model" },
191
+ { id: "kimi-k2-turbo-preview", name: "Kimi K2 Turbo Preview" },
192
+ ]
193
+ });
194
+ }
195
+
182
196
  // Check Vertex AI (Google Cloud)
183
197
  if (config.vertex?.projectId) {
184
198
  const region = config.vertex.region || "us-east5";
@@ -369,7 +383,7 @@ router.get("/config", (req, res) => {
369
383
  model_provider: config.modelProvider?.type || "databricks",
370
384
  fallback_provider: config.modelProvider?.fallbackProvider || null,
371
385
  fallback_enabled: config.modelProvider?.fallbackEnabled || false,
372
- prefer_ollama: config.modelProvider?.preferOllama || false,
386
+ tier_routing_enabled: config.modelTiers?.enabled || false,
373
387
  tool_execution_mode: config.toolExecutionMode || "server",
374
388
  configured_providers: providers.map(p => p.name),
375
389
  memory_enabled: config.memory?.enabled || false,
package/src/api/router.js CHANGED
@@ -2,10 +2,11 @@ const express = require("express");
2
2
  const { processMessage } = require("../orchestrator");
3
3
  const { getSession } = require("../sessions");
4
4
  const metrics = require("../metrics");
5
+ const logger = require("../logger");
5
6
  const { createRateLimiter } = require("./middleware/rate-limiter");
6
7
  const openaiRouter = require("./openai-router");
7
8
  const providersRouter = require("./providers-handler");
8
- const { getRoutingHeaders, getRoutingStats, analyzeComplexity } = require("../routing");
9
+ const { getRoutingHeaders, getRoutingStats, analyzeComplexity, getModelTierSelector } = require("../routing");
9
10
  const { validateCwd } = require("../workspace");
10
11
 
11
12
  const router = express.Router();
@@ -71,6 +72,99 @@ router.get("/routing/stats", (req, res) => {
71
72
  });
72
73
  });
73
74
 
75
+ // Model registry info (from LiteLLM + models.dev APIs)
76
+ router.get("/routing/models", async (req, res) => {
77
+ try {
78
+ const { getModelRegistry } = require("../routing/model-registry");
79
+ const registry = await getModelRegistry();
80
+ res.json({
81
+ status: "ok",
82
+ ...registry.getStats(),
83
+ });
84
+ } catch (err) {
85
+ res.status(500).json({ error: err.message });
86
+ }
87
+ });
88
+
89
+ // Get specific model info
90
+ router.get("/routing/models/:model", async (req, res) => {
91
+ try {
92
+ const { getModelRegistry } = require("../routing/model-registry");
93
+ const registry = await getModelRegistry();
94
+ const model = registry.getModel(req.params.model);
95
+ if (!model || model.source === "default") {
96
+ return res.status(404).json({ error: "Model not found", model: req.params.model });
97
+ }
98
+ res.json({ status: "ok", model: req.params.model, ...model });
99
+ } catch (err) {
100
+ res.status(500).json({ error: err.message });
101
+ }
102
+ });
103
+
104
+ // Routing tier information
105
+ router.get("/routing/tiers", (req, res) => {
106
+ try {
107
+ const { getModelTierSelector } = require("../routing/model-tiers");
108
+ const selector = getModelTierSelector();
109
+ res.json({
110
+ status: "ok",
111
+ ...selector.getTierStats(),
112
+ });
113
+ } catch (err) {
114
+ res.status(500).json({ error: err.message });
115
+ }
116
+ });
117
+
118
+ // Cost optimization stats
119
+ router.get("/metrics/cost-optimization", (req, res) => {
120
+ try {
121
+ const { getCostOptimizer } = require("../routing/cost-optimizer");
122
+ const optimizer = getCostOptimizer();
123
+ res.json({
124
+ status: "ok",
125
+ ...optimizer.getStats(),
126
+ });
127
+ } catch (err) {
128
+ res.status(500).json({ error: err.message });
129
+ }
130
+ });
131
+
132
+ // Request analysis test endpoint
133
+ router.post("/routing/analyze", async (req, res) => {
134
+ try {
135
+ const { getAgenticDetector } = require("../routing/agentic-detector");
136
+ const { getModelTierSelector } = require("../routing/model-tiers");
137
+ const { getModelRegistry } = require("../routing/model-registry");
138
+
139
+ const analysis = analyzeComplexity(req.body, { weighted: req.query.weighted === "true" });
140
+ const agentic = getAgenticDetector().detect(req.body);
141
+ const selector = getModelTierSelector();
142
+ const tier = selector.getTier(analysis.score);
143
+
144
+ // Get recommended model for tier
145
+ const provider = req.query.provider || "openai";
146
+ const modelSelection = selector.selectModel(tier, provider);
147
+
148
+ // Get model cost info
149
+ let modelInfo = null;
150
+ if (modelSelection.model) {
151
+ const registry = await getModelRegistry();
152
+ modelInfo = registry.getCost(modelSelection.model);
153
+ }
154
+
155
+ res.json({
156
+ status: "ok",
157
+ analysis,
158
+ agentic,
159
+ tier,
160
+ modelSelection,
161
+ modelInfo,
162
+ });
163
+ } catch (err) {
164
+ res.status(500).json({ error: err.message });
165
+ }
166
+ });
167
+
74
168
  router.get("/debug/session", (req, res) => {
75
169
  if (!req.sessionId) {
76
170
  return res.status(400).json({ error: "missing_session_id", message: "Provide x-session-id header" });
@@ -123,8 +217,19 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => {
123
217
 
124
218
  // Analyze complexity for routing headers (Phase 3)
125
219
  const complexity = analyzeComplexity(req.body);
220
+ let preRouteProvider = 'cloud';
221
+ if (complexity.recommendation === 'local') {
222
+ // Use tier config to determine actual provider instead of hardcoding 'ollama'
223
+ try {
224
+ const selector = getModelTierSelector();
225
+ const tierResult = selector.selectModel('SIMPLE', null);
226
+ preRouteProvider = tierResult.provider;
227
+ } catch (_) {
228
+ preRouteProvider = 'ollama';
229
+ }
230
+ }
126
231
  const routingHeaders = getRoutingHeaders({
127
- provider: complexity.recommendation === 'local' ? 'ollama' : 'cloud',
232
+ provider: preRouteProvider,
128
233
  score: complexity.score,
129
234
  threshold: complexity.threshold,
130
235
  method: 'complexity',
@@ -11,13 +11,14 @@ const logger = require('../logger');
11
11
  class BudgetManager {
12
12
  constructor(options = {}) {
13
13
  this.enabled = options.enabled !== false;
14
+ let dbPath = null;
14
15
  if (!this.enabled || !Database) {
15
16
  this.enabled = false;
16
17
  return;
17
18
  }
18
19
 
19
20
  try {
20
- const dbPath = path.join(process.cwd(), 'data', 'budgets.db');
21
+ dbPath = path.join(process.cwd(), 'data', 'budgets.db');
21
22
  const dbDir = path.dirname(dbPath);
22
23
 
23
24
  if (!fs.existsSync(dbDir)) {
@@ -25,14 +26,14 @@ class BudgetManager {
25
26
  }
26
27
 
27
28
  this.db = new Database(dbPath);
29
+ this.dbPath = dbPath;
28
30
  this.initDatabase();
31
+ logger.info({ dbPath }, 'Budget manager initialized');
29
32
  } catch (err) {
30
33
  logger.warn({ err: err.message }, "BudgetManager: better-sqlite3 not available");
31
34
  this.enabled = false;
32
35
  return;
33
36
  }
34
-
35
- logger.info({ dbPath }, 'Budget manager initialized');
36
37
  }
37
38
 
38
39
  initDatabase() {