lynkr 7.2.4 → 8.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/config/model-tiers.json +89 -0
- package/docs/docs.html +1 -0
- package/docs/index.md +7 -0
- package/docs/toon-integration-spec.md +130 -0
- package/documentation/README.md +3 -2
- package/documentation/claude-code-cli.md +23 -16
- package/documentation/cursor-integration.md +17 -14
- package/documentation/docker.md +11 -4
- package/documentation/embeddings.md +7 -5
- package/documentation/faq.md +66 -12
- package/documentation/features.md +22 -15
- package/documentation/installation.md +66 -14
- package/documentation/production.md +43 -8
- package/documentation/providers.md +145 -42
- package/documentation/routing.md +476 -0
- package/documentation/token-optimization.md +7 -5
- package/documentation/troubleshooting.md +81 -5
- package/install.sh +6 -1
- package/package.json +5 -3
- package/scripts/setup.js +0 -1
- package/src/agents/executor.js +14 -6
- package/src/api/middleware/session.js +15 -2
- package/src/api/openai-router.js +130 -37
- package/src/api/providers-handler.js +15 -1
- package/src/api/router.js +107 -2
- package/src/budget/index.js +4 -3
- package/src/clients/databricks.js +431 -234
- package/src/clients/gpt-utils.js +181 -0
- package/src/clients/ollama-utils.js +66 -140
- package/src/clients/routing.js +0 -1
- package/src/clients/standard-tools.js +82 -5
- package/src/config/index.js +119 -35
- package/src/context/toon.js +173 -0
- package/src/headroom/launcher.js +8 -3
- package/src/logger/index.js +23 -0
- package/src/orchestrator/index.js +765 -212
- package/src/routing/agentic-detector.js +320 -0
- package/src/routing/complexity-analyzer.js +202 -2
- package/src/routing/cost-optimizer.js +305 -0
- package/src/routing/index.js +168 -159
- package/src/routing/model-registry.js +437 -0
- package/src/routing/model-tiers.js +365 -0
- package/src/server.js +2 -2
- package/src/sessions/cleanup.js +3 -3
- package/src/sessions/record.js +10 -1
- package/src/sessions/store.js +7 -2
- package/src/tools/agent-task.js +48 -1
- package/src/tools/index.js +15 -2
- package/src/tools/workspace.js +35 -4
- package/src/workspace/index.js +30 -0
- package/te +11622 -0
- package/test/README.md +1 -1
- package/test/azure-openai-config.test.js +17 -8
- package/test/azure-openai-integration.test.js +7 -1
- package/test/azure-openai-routing.test.js +41 -43
- package/test/bedrock-integration.test.js +18 -32
- package/test/hybrid-routing-integration.test.js +35 -20
- package/test/hybrid-routing-performance.test.js +74 -64
- package/test/llamacpp-integration.test.js +28 -9
- package/test/lmstudio-integration.test.js +20 -8
- package/test/openai-integration.test.js +17 -20
- package/test/performance-tests.js +1 -1
- package/test/routing.test.js +65 -59
- package/test/toon-compression.test.js +131 -0
- package/CLAWROUTER_ROUTING_PLAN.md +0 -910
- package/ROUTER_COMPARISON.md +0 -173
- package/TIER_ROUTING_PLAN.md +0 -771
package/src/agents/executor.js
CHANGED
|
@@ -162,14 +162,22 @@ class SubagentExecutor {
|
|
|
162
162
|
payload.tools = filteredTools;
|
|
163
163
|
}
|
|
164
164
|
|
|
165
|
-
// Determine provider based on model
|
|
165
|
+
// Determine provider based on model family.
|
|
166
|
+
// Subagents should use the currently configured MODEL_PROVIDER and avoid
|
|
167
|
+
// hard-fallbacks to Azure when Azure is not selected/configured.
|
|
166
168
|
let forceProvider = null;
|
|
167
|
-
|
|
168
|
-
|
|
169
|
+
const modelLower = String(payload.model || "").toLowerCase();
|
|
170
|
+
const isClaudeFamilyModel =
|
|
171
|
+
modelLower.includes("claude") ||
|
|
172
|
+
modelLower.includes("sonnet") ||
|
|
173
|
+
modelLower.includes("haiku") ||
|
|
174
|
+
modelLower.includes("opus");
|
|
175
|
+
const isGptFamilyModel = modelLower.includes("gpt");
|
|
176
|
+
|
|
177
|
+
if (isClaudeFamilyModel || isGptFamilyModel) {
|
|
169
178
|
const config = require('../config');
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
forceProvider = 'azure-openai';
|
|
179
|
+
// `type` is the canonical key; `provider` kept as legacy fallback.
|
|
180
|
+
forceProvider = config.modelProvider?.type || config.modelProvider?.provider || null;
|
|
173
181
|
}
|
|
174
182
|
|
|
175
183
|
logger.debug({
|
|
@@ -45,8 +45,21 @@ function sessionMiddleware(req, res, next) {
|
|
|
45
45
|
// Add sessionId to logger context for this request
|
|
46
46
|
req.log = logger.child({ sessionId });
|
|
47
47
|
|
|
48
|
-
|
|
49
|
-
|
|
48
|
+
// Skip DB persistence for auto-generated (ephemeral) session IDs.
|
|
49
|
+
// These are created when the client doesn't send a session header,
|
|
50
|
+
// so storing them just bloats the DB with throwaway records.
|
|
51
|
+
if (req.generatedSessionId) {
|
|
52
|
+
req.session = {
|
|
53
|
+
id: sessionId,
|
|
54
|
+
createdAt: Date.now(),
|
|
55
|
+
updatedAt: Date.now(),
|
|
56
|
+
metadata: {},
|
|
57
|
+
history: [],
|
|
58
|
+
_ephemeral: true,
|
|
59
|
+
};
|
|
60
|
+
} else {
|
|
61
|
+
req.session = getOrCreateSession(sessionId);
|
|
62
|
+
}
|
|
50
63
|
return next();
|
|
51
64
|
} catch (err) {
|
|
52
65
|
return next(err);
|
package/src/api/openai-router.js
CHANGED
|
@@ -21,9 +21,9 @@ const orchestrator = require("../orchestrator");
|
|
|
21
21
|
const { getSession } = require("../sessions");
|
|
22
22
|
const {
|
|
23
23
|
convertOpenAIToAnthropic,
|
|
24
|
-
convertAnthropicToOpenAI
|
|
25
|
-
convertAnthropicStreamChunkToOpenAI
|
|
24
|
+
convertAnthropicToOpenAI
|
|
26
25
|
} = require("../clients/openai-format");
|
|
26
|
+
const { IDE_SAFE_TOOLS } = require("../clients/standard-tools");
|
|
27
27
|
|
|
28
28
|
const router = express.Router();
|
|
29
29
|
|
|
@@ -60,13 +60,14 @@ function detectClient(headers) {
|
|
|
60
60
|
*/
|
|
61
61
|
const CLIENT_TOOL_MAPPINGS = {
|
|
62
62
|
// ============== CODEX CLI ==============
|
|
63
|
-
//
|
|
63
|
+
// Confirmed tools: shell, apply_patch, read_file, write_file, list_dir, glob_file_search,
|
|
64
|
+
// rg, web_search, update_plan, view_image, memory
|
|
65
|
+
// NOT supported: spawn_agent/spawn_thread (Task has no Codex equivalent)
|
|
64
66
|
codex: {
|
|
65
67
|
"Bash": {
|
|
66
|
-
name: "
|
|
68
|
+
name: "shell",
|
|
67
69
|
mapArgs: (a) => ({
|
|
68
|
-
command: a.command || ""
|
|
69
|
-
workdir: a.cwd || a.working_directory
|
|
70
|
+
command: ["bash", "-c", a.command || ""]
|
|
70
71
|
})
|
|
71
72
|
},
|
|
72
73
|
"Read": {
|
|
@@ -113,6 +114,18 @@ const CLIENT_TOOL_MAPPINGS = {
|
|
|
113
114
|
mapArgs: (a) => ({
|
|
114
115
|
path: a.path || a.directory
|
|
115
116
|
})
|
|
117
|
+
},
|
|
118
|
+
"TodoWrite": {
|
|
119
|
+
name: "update_plan",
|
|
120
|
+
mapArgs: (a) => ({
|
|
121
|
+
todos: a.todos || []
|
|
122
|
+
})
|
|
123
|
+
},
|
|
124
|
+
"WebSearch": {
|
|
125
|
+
name: "web_search",
|
|
126
|
+
mapArgs: (a) => ({
|
|
127
|
+
query: a.query || ""
|
|
128
|
+
})
|
|
116
129
|
}
|
|
117
130
|
},
|
|
118
131
|
|
|
@@ -321,14 +334,7 @@ function mapToolForClient(toolName, argsJson, clientType) {
|
|
|
321
334
|
};
|
|
322
335
|
}
|
|
323
336
|
|
|
324
|
-
|
|
325
|
-
* Check if client is a known AI coding tool that needs tool mapping
|
|
326
|
-
* @param {Object} headers - Request headers
|
|
327
|
-
* @returns {boolean}
|
|
328
|
-
*/
|
|
329
|
-
function isKnownClient(headers) {
|
|
330
|
-
return detectClient(headers) !== "unknown";
|
|
331
|
-
}
|
|
337
|
+
|
|
332
338
|
|
|
333
339
|
/**
|
|
334
340
|
* POST /v1/chat/completions
|
|
@@ -374,7 +380,7 @@ router.post("/chat/completions", async (req, res) => {
|
|
|
374
380
|
: JSON.stringify(m.content).substring(0, 200)
|
|
375
381
|
}));
|
|
376
382
|
|
|
377
|
-
logger.
|
|
383
|
+
logger.debug({
|
|
378
384
|
endpoint: "/v1/chat/completions",
|
|
379
385
|
model: req.body.model,
|
|
380
386
|
messageCount: req.body.messages?.length,
|
|
@@ -393,6 +399,30 @@ router.post("/chat/completions", async (req, res) => {
|
|
|
393
399
|
// Convert OpenAI request to Anthropic format
|
|
394
400
|
const anthropicRequest = convertOpenAIToAnthropic(req.body);
|
|
395
401
|
|
|
402
|
+
// Inject tools if client didn't send any.
|
|
403
|
+
// Two-layer filtering:
|
|
404
|
+
// 1. IDE_SAFE_TOOLS = STANDARD_TOOLS minus AskUserQuestion (can't work through proxy)
|
|
405
|
+
// 2. For known clients (codex, cline, etc.), further filter to only tools
|
|
406
|
+
// that have a mapping in CLIENT_TOOL_MAPPINGS — this ensures clients like
|
|
407
|
+
// Codex don't see tools they can't handle (Task, WebFetch, NotebookEdit)
|
|
408
|
+
// while Claude Code (unknown client) gets the full IDE_SAFE_TOOLS set.
|
|
409
|
+
const clientType = detectClient(req.headers);
|
|
410
|
+
if (!anthropicRequest.tools || anthropicRequest.tools.length === 0) {
|
|
411
|
+
const clientMappings = CLIENT_TOOL_MAPPINGS[clientType];
|
|
412
|
+
const clientTools = clientMappings
|
|
413
|
+
? IDE_SAFE_TOOLS.filter(t => clientMappings[t.name])
|
|
414
|
+
: IDE_SAFE_TOOLS;
|
|
415
|
+
anthropicRequest.tools = clientTools;
|
|
416
|
+
logger.debug({
|
|
417
|
+
clientType,
|
|
418
|
+
injectedToolCount: clientTools.length,
|
|
419
|
+
injectedToolNames: clientTools.map(t => t.name),
|
|
420
|
+
reason: clientMappings
|
|
421
|
+
? `Known client '${clientType}' — filtered to mapped tools only`
|
|
422
|
+
: "Unknown client — injecting full IDE_SAFE_TOOLS"
|
|
423
|
+
}, "=== INJECTING TOOLS ===");
|
|
424
|
+
}
|
|
425
|
+
|
|
396
426
|
// Get or create session
|
|
397
427
|
const session = getSession(sessionId);
|
|
398
428
|
|
|
@@ -420,7 +450,7 @@ router.post("/chat/completions", async (req, res) => {
|
|
|
420
450
|
});
|
|
421
451
|
|
|
422
452
|
// Check if we have a valid response body
|
|
423
|
-
logger.
|
|
453
|
+
logger.debug({
|
|
424
454
|
hasResult: !!result,
|
|
425
455
|
resultKeys: result ? Object.keys(result) : null,
|
|
426
456
|
hasBody: result && !!result.body,
|
|
@@ -442,7 +472,7 @@ router.post("/chat/completions", async (req, res) => {
|
|
|
442
472
|
const openaiResponse = convertAnthropicToOpenAI(result.body, req.body.model);
|
|
443
473
|
|
|
444
474
|
// Debug: Log what we're about to stream
|
|
445
|
-
logger.
|
|
475
|
+
logger.debug({
|
|
446
476
|
openaiResponseId: openaiResponse.id,
|
|
447
477
|
messageContent: openaiResponse.choices[0]?.message?.content?.substring(0, 100),
|
|
448
478
|
contentLength: openaiResponse.choices[0]?.message?.content?.length || 0,
|
|
@@ -454,7 +484,25 @@ router.post("/chat/completions", async (req, res) => {
|
|
|
454
484
|
|
|
455
485
|
// Simulate streaming by sending the complete response as chunks
|
|
456
486
|
const content = openaiResponse.choices[0].message.content || "";
|
|
457
|
-
|
|
487
|
+
let toolCalls = openaiResponse.choices[0].message.tool_calls;
|
|
488
|
+
|
|
489
|
+
// Map tool names for known IDE clients
|
|
490
|
+
if (clientType !== "unknown" && toolCalls && toolCalls.length > 0) {
|
|
491
|
+
toolCalls = toolCalls.map(tc => {
|
|
492
|
+
const mapped = mapToolForClient(tc.function?.name || "", tc.function?.arguments || "{}", clientType);
|
|
493
|
+
return {
|
|
494
|
+
...tc,
|
|
495
|
+
function: {
|
|
496
|
+
name: mapped.name,
|
|
497
|
+
arguments: mapped.arguments
|
|
498
|
+
}
|
|
499
|
+
};
|
|
500
|
+
});
|
|
501
|
+
logger.debug({
|
|
502
|
+
mappedTools: toolCalls.map(t => t.function?.name),
|
|
503
|
+
clientType
|
|
504
|
+
}, "Tool names mapped for streaming chat/completions");
|
|
505
|
+
}
|
|
458
506
|
|
|
459
507
|
// Send start chunk with role
|
|
460
508
|
const startChunk = {
|
|
@@ -493,7 +541,7 @@ router.post("/chat/completions", async (req, res) => {
|
|
|
493
541
|
}]
|
|
494
542
|
};
|
|
495
543
|
const contentWriteOk = res.write(`data: ${JSON.stringify(contentChunk)}\n\n`);
|
|
496
|
-
logger.
|
|
544
|
+
logger.debug({ contentPreview: content.substring(0, 50), writeOk: contentWriteOk }, "Sent content chunk");
|
|
497
545
|
}
|
|
498
546
|
|
|
499
547
|
// Send tool calls if present
|
|
@@ -545,7 +593,7 @@ router.post("/chat/completions", async (req, res) => {
|
|
|
545
593
|
res.write("data: [DONE]\n\n");
|
|
546
594
|
|
|
547
595
|
// Ensure data is flushed before ending
|
|
548
|
-
logger.
|
|
596
|
+
logger.debug({ contentLength: content.length, contentPreview: content.substring(0, 50) }, "=== SSE STREAM COMPLETE ===");
|
|
549
597
|
res.end();
|
|
550
598
|
|
|
551
599
|
logger.info({
|
|
@@ -558,10 +606,7 @@ router.post("/chat/completions", async (req, res) => {
|
|
|
558
606
|
} catch (streamError) {
|
|
559
607
|
logger.error({
|
|
560
608
|
error: streamError.message,
|
|
561
|
-
stack: streamError.stack
|
|
562
|
-
resultWasNull: !result,
|
|
563
|
-
resultBodyWasNull: result && !result.body,
|
|
564
|
-
resultKeys: result ? Object.keys(result) : null
|
|
609
|
+
stack: streamError.stack
|
|
565
610
|
}, "=== STREAMING ERROR ===");
|
|
566
611
|
|
|
567
612
|
// Send error in OpenAI streaming format
|
|
@@ -606,6 +651,24 @@ router.post("/chat/completions", async (req, res) => {
|
|
|
606
651
|
// Convert Anthropic response to OpenAI format
|
|
607
652
|
const openaiResponse = convertAnthropicToOpenAI(result.body, req.body.model);
|
|
608
653
|
|
|
654
|
+
// Map tool names for known IDE clients
|
|
655
|
+
if (clientType !== "unknown" && openaiResponse.choices?.[0]?.message?.tool_calls?.length > 0) {
|
|
656
|
+
openaiResponse.choices[0].message.tool_calls = openaiResponse.choices[0].message.tool_calls.map(tc => {
|
|
657
|
+
const mapped = mapToolForClient(tc.function?.name || "", tc.function?.arguments || "{}", clientType);
|
|
658
|
+
return {
|
|
659
|
+
...tc,
|
|
660
|
+
function: {
|
|
661
|
+
name: mapped.name,
|
|
662
|
+
arguments: mapped.arguments
|
|
663
|
+
}
|
|
664
|
+
};
|
|
665
|
+
});
|
|
666
|
+
logger.debug({
|
|
667
|
+
mappedTools: openaiResponse.choices[0].message.tool_calls.map(t => t.function?.name),
|
|
668
|
+
clientType
|
|
669
|
+
}, "Tool names mapped for non-streaming chat/completions");
|
|
670
|
+
}
|
|
671
|
+
|
|
609
672
|
logger.info({
|
|
610
673
|
duration: Date.now() - startTime,
|
|
611
674
|
mode: "non-streaming",
|
|
@@ -767,6 +830,18 @@ function getConfiguredProviders() {
|
|
|
767
830
|
});
|
|
768
831
|
}
|
|
769
832
|
|
|
833
|
+
// Check Moonshot AI (Kimi)
|
|
834
|
+
if (config.moonshot?.apiKey) {
|
|
835
|
+
providers.push({
|
|
836
|
+
name: "moonshot",
|
|
837
|
+
type: "moonshot-ai",
|
|
838
|
+
models: [
|
|
839
|
+
config.moonshot.model || "kimi-k2-turbo-preview",
|
|
840
|
+
"kimi-k2-turbo-preview"
|
|
841
|
+
]
|
|
842
|
+
});
|
|
843
|
+
}
|
|
844
|
+
|
|
770
845
|
// Check Vertex AI (Google Cloud)
|
|
771
846
|
if (config.vertex?.projectId) {
|
|
772
847
|
providers.push({
|
|
@@ -1013,7 +1088,7 @@ function determineEmbeddingProvider(requestedModel = null) {
|
|
|
1013
1088
|
async function generateOllamaEmbeddings(inputs, embeddingConfig) {
|
|
1014
1089
|
const { model, endpoint } = embeddingConfig;
|
|
1015
1090
|
|
|
1016
|
-
logger.
|
|
1091
|
+
logger.debug({
|
|
1017
1092
|
model,
|
|
1018
1093
|
endpoint,
|
|
1019
1094
|
inputCount: inputs.length
|
|
@@ -1079,7 +1154,7 @@ async function generateOllamaEmbeddings(inputs, embeddingConfig) {
|
|
|
1079
1154
|
async function generateLlamaCppEmbeddings(inputs, embeddingConfig) {
|
|
1080
1155
|
const { model, endpoint } = embeddingConfig;
|
|
1081
1156
|
|
|
1082
|
-
logger.
|
|
1157
|
+
logger.debug({
|
|
1083
1158
|
model,
|
|
1084
1159
|
endpoint,
|
|
1085
1160
|
inputCount: inputs.length
|
|
@@ -1147,7 +1222,7 @@ async function generateLlamaCppEmbeddings(inputs, embeddingConfig) {
|
|
|
1147
1222
|
async function generateOpenRouterEmbeddings(inputs, embeddingConfig) {
|
|
1148
1223
|
const { model, apiKey, endpoint } = embeddingConfig;
|
|
1149
1224
|
|
|
1150
|
-
logger.
|
|
1225
|
+
logger.debug({
|
|
1151
1226
|
model,
|
|
1152
1227
|
inputCount: inputs.length
|
|
1153
1228
|
}, "Generating embeddings with OpenRouter");
|
|
@@ -1181,7 +1256,7 @@ async function generateOpenRouterEmbeddings(inputs, embeddingConfig) {
|
|
|
1181
1256
|
async function generateOpenAIEmbeddings(inputs, embeddingConfig) {
|
|
1182
1257
|
const { model, apiKey, endpoint } = embeddingConfig;
|
|
1183
1258
|
|
|
1184
|
-
logger.
|
|
1259
|
+
logger.debug({
|
|
1185
1260
|
model,
|
|
1186
1261
|
inputCount: inputs.length
|
|
1187
1262
|
}, "Generating embeddings with OpenAI");
|
|
@@ -1233,7 +1308,7 @@ router.post("/embeddings", async (req, res) => {
|
|
|
1233
1308
|
// Convert input to array if string
|
|
1234
1309
|
const inputs = Array.isArray(input) ? input : [input];
|
|
1235
1310
|
|
|
1236
|
-
logger.
|
|
1311
|
+
logger.debug({
|
|
1237
1312
|
endpoint: "/v1/embeddings",
|
|
1238
1313
|
model: model || "auto-detect",
|
|
1239
1314
|
inputCount: inputs.length,
|
|
@@ -1335,7 +1410,7 @@ router.post("/responses", async (req, res) => {
|
|
|
1335
1410
|
const { convertResponsesToChat, convertChatToResponses } = require("../clients/responses-format");
|
|
1336
1411
|
|
|
1337
1412
|
// Comprehensive debug logging
|
|
1338
|
-
logger.
|
|
1413
|
+
logger.debug({
|
|
1339
1414
|
endpoint: "/v1/responses",
|
|
1340
1415
|
inputType: typeof req.body.input,
|
|
1341
1416
|
inputIsArray: Array.isArray(req.body.input),
|
|
@@ -1354,7 +1429,7 @@ router.post("/responses", async (req, res) => {
|
|
|
1354
1429
|
// Convert Responses API to Chat Completions format
|
|
1355
1430
|
const chatRequest = convertResponsesToChat(req.body);
|
|
1356
1431
|
|
|
1357
|
-
logger.
|
|
1432
|
+
logger.debug({
|
|
1358
1433
|
chatRequestMessageCount: chatRequest.messages?.length,
|
|
1359
1434
|
chatRequestMessages: chatRequest.messages?.map(m => ({
|
|
1360
1435
|
role: m.role,
|
|
@@ -1366,7 +1441,7 @@ router.post("/responses", async (req, res) => {
|
|
|
1366
1441
|
// Convert to Anthropic format
|
|
1367
1442
|
const anthropicRequest = convertOpenAIToAnthropic(chatRequest);
|
|
1368
1443
|
|
|
1369
|
-
logger.
|
|
1444
|
+
logger.debug({
|
|
1370
1445
|
anthropicMessageCount: anthropicRequest.messages?.length,
|
|
1371
1446
|
anthropicMessages: anthropicRequest.messages?.map(m => ({
|
|
1372
1447
|
role: m.role,
|
|
@@ -1374,6 +1449,24 @@ router.post("/responses", async (req, res) => {
|
|
|
1374
1449
|
}))
|
|
1375
1450
|
}, "After Chat→Anthropic conversion");
|
|
1376
1451
|
|
|
1452
|
+
// Inject tools if client didn't send any (same two-layer filtering as chat/completions).
|
|
1453
|
+
const clientType = detectClient(req.headers);
|
|
1454
|
+
if (!anthropicRequest.tools || anthropicRequest.tools.length === 0) {
|
|
1455
|
+
const clientMappings = CLIENT_TOOL_MAPPINGS[clientType];
|
|
1456
|
+
const clientTools = clientMappings
|
|
1457
|
+
? IDE_SAFE_TOOLS.filter(t => clientMappings[t.name])
|
|
1458
|
+
: IDE_SAFE_TOOLS;
|
|
1459
|
+
anthropicRequest.tools = clientTools;
|
|
1460
|
+
logger.debug({
|
|
1461
|
+
clientType,
|
|
1462
|
+
injectedToolCount: clientTools.length,
|
|
1463
|
+
injectedToolNames: clientTools.map(t => t.name),
|
|
1464
|
+
reason: clientMappings
|
|
1465
|
+
? `Known client '${clientType}' — filtered to mapped tools only`
|
|
1466
|
+
: "Unknown client — injecting full IDE_SAFE_TOOLS"
|
|
1467
|
+
}, "=== INJECTING TOOLS (responses) ===");
|
|
1468
|
+
}
|
|
1469
|
+
|
|
1377
1470
|
// Get session
|
|
1378
1471
|
const session = getSession(sessionId);
|
|
1379
1472
|
|
|
@@ -1400,7 +1493,7 @@ router.post("/responses", async (req, res) => {
|
|
|
1400
1493
|
});
|
|
1401
1494
|
|
|
1402
1495
|
// Debug: Log what orchestrator returned
|
|
1403
|
-
logger.
|
|
1496
|
+
logger.debug({
|
|
1404
1497
|
hasResult: !!result,
|
|
1405
1498
|
hasBody: !!result?.body,
|
|
1406
1499
|
bodyKeys: result?.body ? Object.keys(result.body) : null,
|
|
@@ -1412,7 +1505,7 @@ router.post("/responses", async (req, res) => {
|
|
|
1412
1505
|
// Convert back: Anthropic → OpenAI → Responses
|
|
1413
1506
|
const chatResponse = convertAnthropicToOpenAI(result.body, req.body.model);
|
|
1414
1507
|
|
|
1415
|
-
logger.
|
|
1508
|
+
logger.debug({
|
|
1416
1509
|
chatContent: chatResponse.choices?.[0]?.message?.content?.substring(0, 200),
|
|
1417
1510
|
chatContentLength: chatResponse.choices?.[0]?.message?.content?.length || 0,
|
|
1418
1511
|
hasToolCalls: !!chatResponse.choices?.[0]?.message?.tool_calls,
|
|
@@ -1433,7 +1526,7 @@ router.post("/responses", async (req, res) => {
|
|
|
1433
1526
|
// Check if client is a known AI coding tool and map tool names accordingly
|
|
1434
1527
|
const clientType = detectClient(req.headers);
|
|
1435
1528
|
if (clientType !== "unknown" && toolCalls.length > 0) {
|
|
1436
|
-
logger.
|
|
1529
|
+
logger.debug({
|
|
1437
1530
|
originalTools: toolCalls.map(t => t.function?.name),
|
|
1438
1531
|
clientType,
|
|
1439
1532
|
userAgent: req.headers["user-agent"]
|
|
@@ -1451,12 +1544,12 @@ router.post("/responses", async (req, res) => {
|
|
|
1451
1544
|
};
|
|
1452
1545
|
});
|
|
1453
1546
|
|
|
1454
|
-
logger.
|
|
1547
|
+
logger.debug({
|
|
1455
1548
|
mappedTools: toolCalls.map(t => t.function?.name)
|
|
1456
1549
|
}, `Tool names mapped for ${clientType}`);
|
|
1457
1550
|
}
|
|
1458
1551
|
|
|
1459
|
-
logger.
|
|
1552
|
+
logger.debug({
|
|
1460
1553
|
content: content.substring(0, 100),
|
|
1461
1554
|
contentLength: content.length,
|
|
1462
1555
|
toolCallCount: toolCalls.length,
|
|
@@ -179,6 +179,20 @@ function getConfiguredProviders() {
|
|
|
179
179
|
});
|
|
180
180
|
}
|
|
181
181
|
|
|
182
|
+
// Check Moonshot AI (Kimi)
|
|
183
|
+
if (config.moonshot?.apiKey) {
|
|
184
|
+
providers.push({
|
|
185
|
+
name: "moonshot",
|
|
186
|
+
type: "moonshot-ai",
|
|
187
|
+
baseUrl: config.moonshot.endpoint || "https://api.moonshot.ai/v1",
|
|
188
|
+
enabled: true,
|
|
189
|
+
models: [
|
|
190
|
+
{ id: config.moonshot.model || "kimi-k2-turbo-preview", name: "Configured Model" },
|
|
191
|
+
{ id: "kimi-k2-turbo-preview", name: "Kimi K2 Turbo Preview" },
|
|
192
|
+
]
|
|
193
|
+
});
|
|
194
|
+
}
|
|
195
|
+
|
|
182
196
|
// Check Vertex AI (Google Cloud)
|
|
183
197
|
if (config.vertex?.projectId) {
|
|
184
198
|
const region = config.vertex.region || "us-east5";
|
|
@@ -369,7 +383,7 @@ router.get("/config", (req, res) => {
|
|
|
369
383
|
model_provider: config.modelProvider?.type || "databricks",
|
|
370
384
|
fallback_provider: config.modelProvider?.fallbackProvider || null,
|
|
371
385
|
fallback_enabled: config.modelProvider?.fallbackEnabled || false,
|
|
372
|
-
|
|
386
|
+
tier_routing_enabled: config.modelTiers?.enabled || false,
|
|
373
387
|
tool_execution_mode: config.toolExecutionMode || "server",
|
|
374
388
|
configured_providers: providers.map(p => p.name),
|
|
375
389
|
memory_enabled: config.memory?.enabled || false,
|
package/src/api/router.js
CHANGED
|
@@ -2,10 +2,11 @@ const express = require("express");
|
|
|
2
2
|
const { processMessage } = require("../orchestrator");
|
|
3
3
|
const { getSession } = require("../sessions");
|
|
4
4
|
const metrics = require("../metrics");
|
|
5
|
+
const logger = require("../logger");
|
|
5
6
|
const { createRateLimiter } = require("./middleware/rate-limiter");
|
|
6
7
|
const openaiRouter = require("./openai-router");
|
|
7
8
|
const providersRouter = require("./providers-handler");
|
|
8
|
-
const { getRoutingHeaders, getRoutingStats, analyzeComplexity } = require("../routing");
|
|
9
|
+
const { getRoutingHeaders, getRoutingStats, analyzeComplexity, getModelTierSelector } = require("../routing");
|
|
9
10
|
const { validateCwd } = require("../workspace");
|
|
10
11
|
|
|
11
12
|
const router = express.Router();
|
|
@@ -71,6 +72,99 @@ router.get("/routing/stats", (req, res) => {
|
|
|
71
72
|
});
|
|
72
73
|
});
|
|
73
74
|
|
|
75
|
+
// Model registry info (from LiteLLM + models.dev APIs)
|
|
76
|
+
router.get("/routing/models", async (req, res) => {
|
|
77
|
+
try {
|
|
78
|
+
const { getModelRegistry } = require("../routing/model-registry");
|
|
79
|
+
const registry = await getModelRegistry();
|
|
80
|
+
res.json({
|
|
81
|
+
status: "ok",
|
|
82
|
+
...registry.getStats(),
|
|
83
|
+
});
|
|
84
|
+
} catch (err) {
|
|
85
|
+
res.status(500).json({ error: err.message });
|
|
86
|
+
}
|
|
87
|
+
});
|
|
88
|
+
|
|
89
|
+
// Get specific model info
|
|
90
|
+
router.get("/routing/models/:model", async (req, res) => {
|
|
91
|
+
try {
|
|
92
|
+
const { getModelRegistry } = require("../routing/model-registry");
|
|
93
|
+
const registry = await getModelRegistry();
|
|
94
|
+
const model = registry.getModel(req.params.model);
|
|
95
|
+
if (!model || model.source === "default") {
|
|
96
|
+
return res.status(404).json({ error: "Model not found", model: req.params.model });
|
|
97
|
+
}
|
|
98
|
+
res.json({ status: "ok", model: req.params.model, ...model });
|
|
99
|
+
} catch (err) {
|
|
100
|
+
res.status(500).json({ error: err.message });
|
|
101
|
+
}
|
|
102
|
+
});
|
|
103
|
+
|
|
104
|
+
// Routing tier information
|
|
105
|
+
router.get("/routing/tiers", (req, res) => {
|
|
106
|
+
try {
|
|
107
|
+
const { getModelTierSelector } = require("../routing/model-tiers");
|
|
108
|
+
const selector = getModelTierSelector();
|
|
109
|
+
res.json({
|
|
110
|
+
status: "ok",
|
|
111
|
+
...selector.getTierStats(),
|
|
112
|
+
});
|
|
113
|
+
} catch (err) {
|
|
114
|
+
res.status(500).json({ error: err.message });
|
|
115
|
+
}
|
|
116
|
+
});
|
|
117
|
+
|
|
118
|
+
// Cost optimization stats
|
|
119
|
+
router.get("/metrics/cost-optimization", (req, res) => {
|
|
120
|
+
try {
|
|
121
|
+
const { getCostOptimizer } = require("../routing/cost-optimizer");
|
|
122
|
+
const optimizer = getCostOptimizer();
|
|
123
|
+
res.json({
|
|
124
|
+
status: "ok",
|
|
125
|
+
...optimizer.getStats(),
|
|
126
|
+
});
|
|
127
|
+
} catch (err) {
|
|
128
|
+
res.status(500).json({ error: err.message });
|
|
129
|
+
}
|
|
130
|
+
});
|
|
131
|
+
|
|
132
|
+
// Request analysis test endpoint
|
|
133
|
+
router.post("/routing/analyze", async (req, res) => {
|
|
134
|
+
try {
|
|
135
|
+
const { getAgenticDetector } = require("../routing/agentic-detector");
|
|
136
|
+
const { getModelTierSelector } = require("../routing/model-tiers");
|
|
137
|
+
const { getModelRegistry } = require("../routing/model-registry");
|
|
138
|
+
|
|
139
|
+
const analysis = analyzeComplexity(req.body, { weighted: req.query.weighted === "true" });
|
|
140
|
+
const agentic = getAgenticDetector().detect(req.body);
|
|
141
|
+
const selector = getModelTierSelector();
|
|
142
|
+
const tier = selector.getTier(analysis.score);
|
|
143
|
+
|
|
144
|
+
// Get recommended model for tier
|
|
145
|
+
const provider = req.query.provider || "openai";
|
|
146
|
+
const modelSelection = selector.selectModel(tier, provider);
|
|
147
|
+
|
|
148
|
+
// Get model cost info
|
|
149
|
+
let modelInfo = null;
|
|
150
|
+
if (modelSelection.model) {
|
|
151
|
+
const registry = await getModelRegistry();
|
|
152
|
+
modelInfo = registry.getCost(modelSelection.model);
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
res.json({
|
|
156
|
+
status: "ok",
|
|
157
|
+
analysis,
|
|
158
|
+
agentic,
|
|
159
|
+
tier,
|
|
160
|
+
modelSelection,
|
|
161
|
+
modelInfo,
|
|
162
|
+
});
|
|
163
|
+
} catch (err) {
|
|
164
|
+
res.status(500).json({ error: err.message });
|
|
165
|
+
}
|
|
166
|
+
});
|
|
167
|
+
|
|
74
168
|
router.get("/debug/session", (req, res) => {
|
|
75
169
|
if (!req.sessionId) {
|
|
76
170
|
return res.status(400).json({ error: "missing_session_id", message: "Provide x-session-id header" });
|
|
@@ -123,8 +217,19 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => {
|
|
|
123
217
|
|
|
124
218
|
// Analyze complexity for routing headers (Phase 3)
|
|
125
219
|
const complexity = analyzeComplexity(req.body);
|
|
220
|
+
let preRouteProvider = 'cloud';
|
|
221
|
+
if (complexity.recommendation === 'local') {
|
|
222
|
+
// Use tier config to determine actual provider instead of hardcoding 'ollama'
|
|
223
|
+
try {
|
|
224
|
+
const selector = getModelTierSelector();
|
|
225
|
+
const tierResult = selector.selectModel('SIMPLE', null);
|
|
226
|
+
preRouteProvider = tierResult.provider;
|
|
227
|
+
} catch (_) {
|
|
228
|
+
preRouteProvider = 'ollama';
|
|
229
|
+
}
|
|
230
|
+
}
|
|
126
231
|
const routingHeaders = getRoutingHeaders({
|
|
127
|
-
provider:
|
|
232
|
+
provider: preRouteProvider,
|
|
128
233
|
score: complexity.score,
|
|
129
234
|
threshold: complexity.threshold,
|
|
130
235
|
method: 'complexity',
|
package/src/budget/index.js
CHANGED
|
@@ -11,13 +11,14 @@ const logger = require('../logger');
|
|
|
11
11
|
class BudgetManager {
|
|
12
12
|
constructor(options = {}) {
|
|
13
13
|
this.enabled = options.enabled !== false;
|
|
14
|
+
let dbPath = null;
|
|
14
15
|
if (!this.enabled || !Database) {
|
|
15
16
|
this.enabled = false;
|
|
16
17
|
return;
|
|
17
18
|
}
|
|
18
19
|
|
|
19
20
|
try {
|
|
20
|
-
|
|
21
|
+
dbPath = path.join(process.cwd(), 'data', 'budgets.db');
|
|
21
22
|
const dbDir = path.dirname(dbPath);
|
|
22
23
|
|
|
23
24
|
if (!fs.existsSync(dbDir)) {
|
|
@@ -25,14 +26,14 @@ class BudgetManager {
|
|
|
25
26
|
}
|
|
26
27
|
|
|
27
28
|
this.db = new Database(dbPath);
|
|
29
|
+
this.dbPath = dbPath;
|
|
28
30
|
this.initDatabase();
|
|
31
|
+
logger.info({ dbPath }, 'Budget manager initialized');
|
|
29
32
|
} catch (err) {
|
|
30
33
|
logger.warn({ err: err.message }, "BudgetManager: better-sqlite3 not available");
|
|
31
34
|
this.enabled = false;
|
|
32
35
|
return;
|
|
33
36
|
}
|
|
34
|
-
|
|
35
|
-
logger.info({ dbPath }, 'Budget manager initialized');
|
|
36
37
|
}
|
|
37
38
|
|
|
38
39
|
initDatabase() {
|