lynkr 9.0.2 → 9.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cli.js +18 -1
- package/bin/lynkr-trajectory.js +136 -0
- package/bin/lynkr-usage.js +219 -0
- package/funding.json +110 -0
- package/package.json +2 -2
- package/public/dashboard.html +665 -0
- package/src/api/files-router.js +6 -6
- package/src/api/middleware/budget.js +19 -1
- package/src/api/middleware/load-shedding.js +17 -0
- package/src/api/openai-router.js +1 -1
- package/src/api/router.js +185 -47
- package/src/clients/databricks.js +9 -5
- package/src/clients/openai-format.js +31 -5
- package/src/config/index.js +7 -0
- package/src/dashboard/api.js +170 -0
- package/src/dashboard/router.js +13 -0
- package/src/headroom/client.js +3 -109
- package/src/headroom/index.js +0 -14
- package/src/memory/search.js +0 -50
- package/src/orchestrator/index.js +62 -5
- package/src/orchestrator/preflight.js +188 -0
- package/src/routing/index.js +61 -0
- package/src/routing/interaction.js +183 -0
- package/src/routing/risk-analyzer.js +194 -0
- package/src/routing/telemetry.js +7 -0
- package/src/server.js +3 -0
- package/src/stores/file-store.js +42 -7
- package/src/tools/smart-selection.js +11 -2
- package/src/training/trajectory-compressor.js +266 -0
- package/src/usage/aggregator.js +206 -0
- package/src/utils/markdown-ansi.js +146 -0
package/src/api/files-router.js
CHANGED
|
@@ -33,7 +33,7 @@ router.post("/files", async (req, res) => {
|
|
|
33
33
|
filename = parsed.filename || filename;
|
|
34
34
|
mimeType = parsed.mimeType || mimeType;
|
|
35
35
|
purpose = parsed.purpose || purpose;
|
|
36
|
-
const entry = fileStore.storeFile(parsed.file, { filename, purpose, mimeType });
|
|
36
|
+
const entry = await fileStore.storeFile(parsed.file, { filename, purpose, mimeType });
|
|
37
37
|
return res.json(entry);
|
|
38
38
|
}
|
|
39
39
|
}
|
|
@@ -43,7 +43,7 @@ router.post("/files", async (req, res) => {
|
|
|
43
43
|
mimeType = contentType.split(";")[0].trim() || mimeType;
|
|
44
44
|
filename = req.headers["x-filename"] || filename;
|
|
45
45
|
purpose = req.query.purpose || purpose;
|
|
46
|
-
const entry = fileStore.storeFile(buffer, { filename, purpose, mimeType });
|
|
46
|
+
const entry = await fileStore.storeFile(buffer, { filename, purpose, mimeType });
|
|
47
47
|
res.json(entry);
|
|
48
48
|
} catch (err) {
|
|
49
49
|
logger.error({ err }, "File upload failed");
|
|
@@ -62,18 +62,18 @@ router.get("/files/:id", (req, res) => {
|
|
|
62
62
|
res.json(file);
|
|
63
63
|
});
|
|
64
64
|
|
|
65
|
-
router.get("/files/:id/content", (req, res) => {
|
|
65
|
+
router.get("/files/:id/content", async (req, res) => {
|
|
66
66
|
const file = fileStore.getFile(req.params.id);
|
|
67
67
|
if (!file) return res.status(404).json({ error: { message: "File not found" } });
|
|
68
|
-
const content = fileStore.getFileContent(req.params.id);
|
|
68
|
+
const content = await fileStore.getFileContent(req.params.id);
|
|
69
69
|
if (!content) return res.status(404).json({ error: { message: "File content not found" } });
|
|
70
70
|
res.setHeader("Content-Type", file.mime_type);
|
|
71
71
|
res.setHeader("Content-Disposition", `attachment; filename="${file.filename}"`);
|
|
72
72
|
res.send(content);
|
|
73
73
|
});
|
|
74
74
|
|
|
75
|
-
router.delete("/files/:id", (req, res) => {
|
|
76
|
-
const deleted = fileStore.deleteFile(req.params.id);
|
|
75
|
+
router.delete("/files/:id", async (req, res) => {
|
|
76
|
+
const deleted = await fileStore.deleteFile(req.params.id);
|
|
77
77
|
if (!deleted) return res.status(404).json({ error: { message: "File not found" } });
|
|
78
78
|
res.json({ id: req.params.id, object: "file", deleted: true });
|
|
79
79
|
});
|
|
@@ -57,12 +57,30 @@ function budgetMiddleware(req, res, next) {
|
|
|
57
57
|
}, 'Budget warning: approaching limits');
|
|
58
58
|
}
|
|
59
59
|
|
|
60
|
-
// Attach budget info to request for usage recording later
|
|
61
60
|
req.budgetInfo = {
|
|
62
61
|
userId,
|
|
63
62
|
budgetCheck,
|
|
63
|
+
startTime: Date.now(),
|
|
64
64
|
};
|
|
65
65
|
|
|
66
|
+
// Record usage after response completes
|
|
67
|
+
res.on('finish', () => {
|
|
68
|
+
try {
|
|
69
|
+
const usage = res.locals.usage;
|
|
70
|
+
if (!usage) return;
|
|
71
|
+
budgetManager.recordUsage(userId, req.session?.id || null, {
|
|
72
|
+
tokensInput: usage.prompt_tokens || usage.input_tokens || 0,
|
|
73
|
+
tokensOutput: usage.completion_tokens || usage.output_tokens || 0,
|
|
74
|
+
costUsd: usage.cost_usd || 0,
|
|
75
|
+
model: usage.model || null,
|
|
76
|
+
endpoint: req.path,
|
|
77
|
+
latencyMs: Date.now() - req.budgetInfo.startTime,
|
|
78
|
+
});
|
|
79
|
+
} catch (err) {
|
|
80
|
+
logger.warn({ err: err.message }, 'Failed to record usage after response');
|
|
81
|
+
}
|
|
82
|
+
});
|
|
83
|
+
|
|
66
84
|
next();
|
|
67
85
|
}
|
|
68
86
|
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
const os = require("os");
|
|
1
2
|
const logger = require("../../logger");
|
|
2
3
|
const { ServiceUnavailableError } = require("./error-handling");
|
|
3
4
|
|
|
@@ -55,6 +56,20 @@ class LoadShedder {
|
|
|
55
56
|
return true;
|
|
56
57
|
}
|
|
57
58
|
|
|
59
|
+
// Check RSS / system memory
|
|
60
|
+
const rssPercent = memUsage.rss / os.totalmem();
|
|
61
|
+
if (rssPercent > this.memoryThreshold) {
|
|
62
|
+
logger.warn(
|
|
63
|
+
{
|
|
64
|
+
rssPercent: (rssPercent * 100).toFixed(2),
|
|
65
|
+
threshold: (this.memoryThreshold * 100).toFixed(2),
|
|
66
|
+
},
|
|
67
|
+
"Load shedding: RSS memory usage exceeded threshold"
|
|
68
|
+
);
|
|
69
|
+
this.cachedOverloadState = true;
|
|
70
|
+
return true;
|
|
71
|
+
}
|
|
72
|
+
|
|
58
73
|
// Check active requests
|
|
59
74
|
if (this.activeRequests > this.activeRequestsThreshold) {
|
|
60
75
|
logger.warn(
|
|
@@ -81,8 +96,10 @@ class LoadShedder {
|
|
|
81
96
|
activeRequests: this.activeRequests,
|
|
82
97
|
totalShed: this.totalShed,
|
|
83
98
|
heapUsedPercent: ((memUsage.heapUsed / memUsage.heapTotal) * 100).toFixed(2),
|
|
99
|
+
rssPercent: ((memUsage.rss / os.totalmem()) * 100).toFixed(2),
|
|
84
100
|
thresholds: {
|
|
85
101
|
heapThreshold: (this.heapThreshold * 100).toFixed(2),
|
|
102
|
+
memoryThreshold: (this.memoryThreshold * 100).toFixed(2),
|
|
86
103
|
activeRequestsThreshold: this.activeRequestsThreshold,
|
|
87
104
|
},
|
|
88
105
|
};
|
package/src/api/openai-router.js
CHANGED
|
@@ -366,7 +366,7 @@ router.post("/chat/completions", async (req, res) => {
|
|
|
366
366
|
role: m.role,
|
|
367
367
|
contentPreview: typeof m.content === 'string'
|
|
368
368
|
? m.content.substring(0, 200)
|
|
369
|
-
: JSON.stringify(m.content).substring(0, 200)
|
|
369
|
+
: (m.content == null ? null : (JSON.stringify(m.content) ?? '').substring(0, 200))
|
|
370
370
|
}));
|
|
371
371
|
|
|
372
372
|
logger.debug({
|
package/src/api/router.js
CHANGED
|
@@ -6,8 +6,10 @@ const logger = require("../logger");
|
|
|
6
6
|
const { createRateLimiter } = require("./middleware/rate-limiter");
|
|
7
7
|
const openaiRouter = require("./openai-router");
|
|
8
8
|
const providersRouter = require("./providers-handler");
|
|
9
|
-
const { getRoutingHeaders, getRoutingStats, analyzeComplexity, getModelTierSelector } = require("../routing");
|
|
9
|
+
const { getRoutingHeaders, getRoutingStats, analyzeComplexity, getModelTierSelector, analyzeRisk } = require("../routing");
|
|
10
|
+
const { buildInteractionBlock } = require("../routing/interaction");
|
|
10
11
|
const { validateCwd } = require("../workspace");
|
|
12
|
+
const { renderText } = require("../utils/markdown-ansi");
|
|
11
13
|
|
|
12
14
|
const router = express.Router();
|
|
13
15
|
|
|
@@ -63,6 +65,24 @@ router.get("/health", (req, res) => {
|
|
|
63
65
|
res.json({ status: "ok" });
|
|
64
66
|
});
|
|
65
67
|
|
|
68
|
+
// Usage report — same data as `lynkr usage` CLI, served as JSON for
|
|
69
|
+
// dashboards / agents / scripts that want to surface spend & savings.
|
|
70
|
+
router.get("/v1/usage", (req, res) => {
|
|
71
|
+
try {
|
|
72
|
+
const aggregator = require("../usage/aggregator");
|
|
73
|
+
const window = req.query.window || (req.query.days ? `${parseInt(req.query.days, 10)}d` : "30d");
|
|
74
|
+
const usage = aggregator.getUsage({
|
|
75
|
+
window,
|
|
76
|
+
flagship: req.query.flagship,
|
|
77
|
+
provider: req.query.provider,
|
|
78
|
+
model: req.query.model,
|
|
79
|
+
});
|
|
80
|
+
res.json(usage);
|
|
81
|
+
} catch (err) {
|
|
82
|
+
res.status(500).json({ error: err.message });
|
|
83
|
+
}
|
|
84
|
+
});
|
|
85
|
+
|
|
66
86
|
// Routing stats endpoint (Phase 3: Metrics)
|
|
67
87
|
router.get("/routing/stats", (req, res) => {
|
|
68
88
|
const stats = getRoutingStats();
|
|
@@ -260,24 +280,70 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => {
|
|
|
260
280
|
// Analyze complexity for routing headers (Phase 3)
|
|
261
281
|
const complexity = await analyzeComplexity(req.body);
|
|
262
282
|
timer.mark("analyzeComplexity");
|
|
283
|
+
|
|
284
|
+
// Risk axis runs alongside complexity. Cheap pure-string scan, no I/O.
|
|
285
|
+
let preRouteRisk = null;
|
|
286
|
+
try {
|
|
287
|
+
preRouteRisk = analyzeRisk(req.body);
|
|
288
|
+
} catch (err) {
|
|
289
|
+
logger.debug({ err: err.message }, '[Router] Risk analysis failed in pre-route');
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
// Pre-route tier: high-risk forces COMPLEX, otherwise tier is
|
|
293
|
+
// inferred from the complexity recommendation. The actual final
|
|
294
|
+
// tier may differ (invokeModel re-runs determineProviderSmart) —
|
|
295
|
+
// this is best-effort for header surfacing.
|
|
263
296
|
let preRouteProvider = 'cloud';
|
|
264
|
-
|
|
265
|
-
|
|
297
|
+
let preRouteTier = null;
|
|
298
|
+
let preRouteModel = null;
|
|
299
|
+
let preRouteMethod = 'complexity';
|
|
300
|
+
let preRouteReason = complexity.breakdown?.taskType?.reason || complexity.recommendation;
|
|
301
|
+
|
|
302
|
+
if (preRouteRisk?.level === 'high') {
|
|
266
303
|
try {
|
|
267
304
|
const selector = getModelTierSelector();
|
|
268
|
-
const tierResult = selector.selectModel('
|
|
305
|
+
const tierResult = selector.selectModel('COMPLEX', null);
|
|
269
306
|
preRouteProvider = tierResult.provider;
|
|
307
|
+
preRouteTier = 'COMPLEX';
|
|
308
|
+
preRouteModel = tierResult.model;
|
|
309
|
+
preRouteMethod = 'risk';
|
|
310
|
+
preRouteReason = 'high_risk_forced_tier';
|
|
270
311
|
} catch (_) {
|
|
271
|
-
|
|
312
|
+
// Risk-forced tier not configured; fall back to normal flow.
|
|
313
|
+
}
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
if (!preRouteTier) {
|
|
317
|
+
if (complexity.recommendation === 'local') {
|
|
318
|
+
try {
|
|
319
|
+
const selector = getModelTierSelector();
|
|
320
|
+
const tierResult = selector.selectModel('SIMPLE', null);
|
|
321
|
+
preRouteProvider = tierResult.provider;
|
|
322
|
+
preRouteTier = 'SIMPLE';
|
|
323
|
+
preRouteModel = tierResult.model;
|
|
324
|
+
} catch (_) {
|
|
325
|
+
preRouteProvider = 'ollama';
|
|
326
|
+
}
|
|
272
327
|
}
|
|
273
328
|
}
|
|
274
|
-
|
|
329
|
+
|
|
330
|
+
const preRouteDecision = {
|
|
275
331
|
provider: preRouteProvider,
|
|
332
|
+
tier: preRouteTier,
|
|
333
|
+
model: preRouteModel,
|
|
334
|
+
method: preRouteMethod,
|
|
335
|
+
reason: preRouteReason,
|
|
276
336
|
score: complexity.score,
|
|
277
337
|
threshold: complexity.threshold,
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
338
|
+
risk: preRouteRisk,
|
|
339
|
+
};
|
|
340
|
+
|
|
341
|
+
const routingHeaders = getRoutingHeaders(preRouteDecision);
|
|
342
|
+
|
|
343
|
+
// Build the interaction block once. It travels in headers always
|
|
344
|
+
// (X-Lynkr-Interaction-* derived fields) and optionally into the
|
|
345
|
+
// response body when LYNKR_VISIBLE_ROUTING=true.
|
|
346
|
+
const interaction = buildInteractionBlock(preRouteDecision);
|
|
281
347
|
|
|
282
348
|
// Extract client CWD from request body or header
|
|
283
349
|
const clientCwd = validateCwd(req.body?.cwd || req.headers['x-workspace-cwd']);
|
|
@@ -424,17 +490,35 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => {
|
|
|
424
490
|
content_block: { type: "text", text: "" }
|
|
425
491
|
})}\n\n`);
|
|
426
492
|
|
|
427
|
-
// Send text
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
493
|
+
// Send text — one chunk when ANSI rendering is active (splitting
|
|
494
|
+
// ANSI escape sequences across 20-char chunks breaks terminal output).
|
|
495
|
+
// Plain text falls back to line-level chunks for a trickle effect.
|
|
496
|
+
// Never apply ANSI rendering to HTML content (<artifact> blocks):
|
|
497
|
+
// ANSI codes corrupt CSS selectors like `*` and break the browser viewer.
|
|
498
|
+
const rawBlockText = block.text || "";
|
|
499
|
+
const isHtmlContent = rawBlockText.includes("<artifact") || rawBlockText.trimStart().startsWith("<");
|
|
500
|
+
const text = isHtmlContent ? rawBlockText : renderText(rawBlockText);
|
|
501
|
+
const { enabled: ansiEnabled } = require("../utils/markdown-ansi");
|
|
502
|
+
if (ansiEnabled && !isHtmlContent) {
|
|
503
|
+
if (text.length > 0) {
|
|
504
|
+
res.write(`event: content_block_delta\n`);
|
|
505
|
+
res.write(`data: ${JSON.stringify({
|
|
506
|
+
type: "content_block_delta",
|
|
507
|
+
index: i,
|
|
508
|
+
delta: { type: "text_delta", text }
|
|
509
|
+
})}\n\n`);
|
|
510
|
+
}
|
|
511
|
+
} else {
|
|
512
|
+
const lines = text.split("\n");
|
|
513
|
+
for (const line of lines) {
|
|
514
|
+
const lineWithNl = line + "\n";
|
|
515
|
+
res.write(`event: content_block_delta\n`);
|
|
516
|
+
res.write(`data: ${JSON.stringify({
|
|
517
|
+
type: "content_block_delta",
|
|
518
|
+
index: i,
|
|
519
|
+
delta: { type: "text_delta", text: lineWithNl }
|
|
520
|
+
})}\n\n`);
|
|
521
|
+
}
|
|
438
522
|
}
|
|
439
523
|
|
|
440
524
|
res.write(`event: content_block_stop\n`);
|
|
@@ -459,22 +543,37 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => {
|
|
|
459
543
|
res.write(`event: content_block_stop\n`);
|
|
460
544
|
res.write(`data: ${JSON.stringify({ type: "content_block_stop", index: i })}\n\n`);
|
|
461
545
|
} else if (block.type === "tool_use") {
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
546
|
+
// Original request had no tools → model hallucinated a tool call.
|
|
547
|
+
// Extract file content from write-style tools and wrap it in an
|
|
548
|
+
// <artifact> block so open-design routes it to the Design panel.
|
|
549
|
+
const toolName = (block.name || "").toLowerCase();
|
|
550
|
+
const writeTools = new Set(["write", "create_file", "write_file", "str_replace_editor"]);
|
|
551
|
+
if (writeTools.has(toolName)) {
|
|
552
|
+
const rawContent = block.input?.content ?? block.input?.file_content ?? block.input?.new_content ?? "";
|
|
553
|
+
const filePath = String(block.input?.file_path ?? block.input?.filename ?? "design.html");
|
|
554
|
+
const content = String(rawContent);
|
|
555
|
+
if (content) {
|
|
556
|
+
// Wrap in <artifact> so open-design's parser routes it to the file viewer.
|
|
557
|
+
const identifier = filePath.replace(/[^a-zA-Z0-9._-]/g, "_");
|
|
558
|
+
const title = filePath;
|
|
559
|
+
const wrapped = `<artifact identifier="${identifier}" type="text/html" title="${title}">\n${content}\n</artifact>`;
|
|
560
|
+
res.write(`event: content_block_start\n`);
|
|
561
|
+
res.write(`data: ${JSON.stringify({
|
|
562
|
+
type: "content_block_start",
|
|
563
|
+
index: i,
|
|
564
|
+
content_block: { type: "text", text: "" }
|
|
565
|
+
})}\n\n`);
|
|
566
|
+
res.write(`event: content_block_delta\n`);
|
|
567
|
+
res.write(`data: ${JSON.stringify({
|
|
568
|
+
type: "content_block_delta",
|
|
569
|
+
index: i,
|
|
570
|
+
delta: { type: "text_delta", text: wrapped }
|
|
571
|
+
})}\n\n`);
|
|
572
|
+
res.write(`event: content_block_stop\n`);
|
|
573
|
+
res.write(`data: ${JSON.stringify({ type: "content_block_stop", index: i })}\n\n`);
|
|
574
|
+
}
|
|
575
|
+
}
|
|
576
|
+
// Non-write tool_use in a tool-less request is silently dropped.
|
|
478
577
|
}
|
|
479
578
|
}
|
|
480
579
|
|
|
@@ -566,16 +665,30 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => {
|
|
|
566
665
|
content_block: { type: "text", text: "" }
|
|
567
666
|
})}\n\n`);
|
|
568
667
|
|
|
569
|
-
const
|
|
570
|
-
const
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
668
|
+
const rawBlockText2 = block.text || "";
|
|
669
|
+
const isHtmlContent2 = rawBlockText2.includes("<artifact") || rawBlockText2.trimStart().startsWith("<");
|
|
670
|
+
const text = isHtmlContent2 ? rawBlockText2 : renderText(rawBlockText2);
|
|
671
|
+
const { enabled: ansiEnabled } = require("../utils/markdown-ansi");
|
|
672
|
+
if (ansiEnabled && !isHtmlContent2) {
|
|
673
|
+
if (text.length > 0) {
|
|
674
|
+
res.write(`event: content_block_delta\n`);
|
|
675
|
+
res.write(`data: ${JSON.stringify({
|
|
676
|
+
type: "content_block_delta",
|
|
677
|
+
index: i,
|
|
678
|
+
delta: { type: "text_delta", text }
|
|
679
|
+
})}\n\n`);
|
|
680
|
+
}
|
|
681
|
+
} else {
|
|
682
|
+
const lines = text.split("\n");
|
|
683
|
+
for (const line of lines) {
|
|
684
|
+
const lineWithNl = line + "\n";
|
|
685
|
+
res.write(`event: content_block_delta\n`);
|
|
686
|
+
res.write(`data: ${JSON.stringify({
|
|
687
|
+
type: "content_block_delta",
|
|
688
|
+
index: i,
|
|
689
|
+
delta: { type: "text_delta", text: lineWithNl }
|
|
690
|
+
})}\n\n`);
|
|
691
|
+
}
|
|
579
692
|
}
|
|
580
693
|
|
|
581
694
|
res.write(`event: content_block_stop\n`);
|
|
@@ -651,8 +764,33 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => {
|
|
|
651
764
|
});
|
|
652
765
|
}
|
|
653
766
|
|
|
767
|
+
// Inject visible interaction block into the response body when
|
|
768
|
+
// LYNKR_VISIBLE_ROUTING=true. We only mutate JSON bodies — and only
|
|
769
|
+
// when the response looks like a valid Anthropic Message — so this
|
|
770
|
+
// is a no-op for streamed / error / non-message responses.
|
|
771
|
+
let finalBody = result.body;
|
|
772
|
+
if (
|
|
773
|
+
config.routing?.visibleInteraction &&
|
|
774
|
+
interaction &&
|
|
775
|
+
result.status >= 200 && result.status < 300 &&
|
|
776
|
+
result.body
|
|
777
|
+
) {
|
|
778
|
+
try {
|
|
779
|
+
const text = Buffer.isBuffer(result.body) ? result.body.toString('utf8') : result.body;
|
|
780
|
+
if (typeof text === 'string' && text.startsWith('{')) {
|
|
781
|
+
const parsed = JSON.parse(text);
|
|
782
|
+
if (parsed && typeof parsed === 'object' && parsed.type === 'message') {
|
|
783
|
+
parsed.lynkr_interaction = interaction;
|
|
784
|
+
finalBody = JSON.stringify(parsed);
|
|
785
|
+
}
|
|
786
|
+
}
|
|
787
|
+
} catch (err) {
|
|
788
|
+
logger.debug({ err: err.message }, '[Router] Skipped interaction injection (non-JSON body)');
|
|
789
|
+
}
|
|
790
|
+
}
|
|
791
|
+
|
|
654
792
|
metrics.recordResponse(result.status);
|
|
655
|
-
res.status(result.status).send(
|
|
793
|
+
res.status(result.status).send(finalBody);
|
|
656
794
|
} catch (error) {
|
|
657
795
|
next(error);
|
|
658
796
|
}
|
|
@@ -221,7 +221,7 @@ async function invokeOllama(body) {
|
|
|
221
221
|
const useAnthropicApi = await hasAnthropicEndpoint(config.ollama.endpoint);
|
|
222
222
|
|
|
223
223
|
// Check if model supports tools FIRST (before wasteful injection)
|
|
224
|
-
const supportsTools = await checkOllamaToolSupport(
|
|
224
|
+
const supportsTools = await checkOllamaToolSupport(modelName);
|
|
225
225
|
const injectToolsOllama = process.env.INJECT_TOOLS_OLLAMA !== "false";
|
|
226
226
|
|
|
227
227
|
// Determine tools to send
|
|
@@ -476,13 +476,17 @@ async function invokeAzureOpenAI(body) {
|
|
|
476
476
|
// System prompt injection disabled - breaks model response
|
|
477
477
|
// Tool guidance now provided via tool descriptions instead
|
|
478
478
|
|
|
479
|
+
const azureDeployment = body._suggestionModeModel || body._tierModel || config.azureOpenAI.deployment || "";
|
|
480
|
+
const isGpt5 = /gpt-5/i.test(azureDeployment);
|
|
481
|
+
const maxTokensKey = isGpt5 ? "max_completion_tokens" : "max_tokens";
|
|
482
|
+
|
|
479
483
|
const azureBody = {
|
|
480
484
|
messages,
|
|
481
|
-
temperature: body.temperature ?? 0.3,
|
|
482
|
-
|
|
485
|
+
temperature: body.temperature ?? 0.3,
|
|
486
|
+
[maxTokensKey]: Math.min(body.max_tokens ?? 16384, 16384),
|
|
483
487
|
top_p: body.top_p ?? 1.0,
|
|
484
|
-
stream: false,
|
|
485
|
-
model:
|
|
488
|
+
stream: false,
|
|
489
|
+
model: azureDeployment
|
|
486
490
|
};
|
|
487
491
|
|
|
488
492
|
// Add tools - inject standard tools if client didn't send any (passthrough mode)
|
|
@@ -203,24 +203,37 @@ function convertAnthropicToOpenAI(anthropicResponse, model = "claude-3-5-sonnet-
|
|
|
203
203
|
|
|
204
204
|
const { id, content, stop_reason, usage } = anthropicResponse;
|
|
205
205
|
|
|
206
|
-
//
|
|
207
|
-
|
|
208
|
-
|
|
206
|
+
// Tolerant fallback: providers sometimes return reasoning-only responses
|
|
207
|
+
// (Minimax/DeepSeek), error envelopes, or empty bodies. Treat missing/invalid
|
|
208
|
+
// content as an empty turn so jcode/Pi/Codex don't crash on the response.
|
|
209
|
+
const safeContent = Array.isArray(content) ? content : [];
|
|
210
|
+
if (safeContent.length === 0) {
|
|
211
|
+
logger.warn({
|
|
212
|
+
hasContent: content !== undefined,
|
|
213
|
+
contentType: typeof content,
|
|
214
|
+
stop_reason,
|
|
215
|
+
responseKeys: Object.keys(anthropicResponse),
|
|
216
|
+
hasError: !!anthropicResponse.error,
|
|
217
|
+
errorMessage: anthropicResponse.error?.message,
|
|
218
|
+
}, "convertAnthropicToOpenAI: empty/missing content, returning empty assistant message");
|
|
209
219
|
}
|
|
210
220
|
|
|
211
221
|
// Convert content blocks to OpenAI format
|
|
212
222
|
let messageContent = "";
|
|
223
|
+
let reasoningContent = "";
|
|
213
224
|
const toolCalls = [];
|
|
214
225
|
let citations = [];
|
|
215
226
|
|
|
216
|
-
for (const block of
|
|
227
|
+
for (const block of safeContent) {
|
|
217
228
|
if (block.type === "text") {
|
|
218
229
|
messageContent += block.text;
|
|
219
230
|
if (Array.isArray(block.citations)) {
|
|
220
231
|
citations.push(...block.citations);
|
|
221
232
|
}
|
|
222
233
|
} else if (block.type === "thinking") {
|
|
223
|
-
//
|
|
234
|
+
// Preserve reasoning text so reasoning-only models (Minimax, DeepSeek-R1)
|
|
235
|
+
// surface visible output to OpenAI clients that don't render thinking blocks
|
|
236
|
+
reasoningContent += (block.thinking || "");
|
|
224
237
|
} else if (block.type === "tool_use") {
|
|
225
238
|
toolCalls.push({
|
|
226
239
|
id: block.id,
|
|
@@ -233,6 +246,12 @@ function convertAnthropicToOpenAI(anthropicResponse, model = "claude-3-5-sonnet-
|
|
|
233
246
|
}
|
|
234
247
|
}
|
|
235
248
|
|
|
249
|
+
// Fallback: if the model returned only reasoning (no visible text and no tools),
|
|
250
|
+
// promote reasoning into the visible content so jcode/Pi/Codex see something
|
|
251
|
+
if (!messageContent && !toolCalls.length && reasoningContent) {
|
|
252
|
+
messageContent = reasoningContent;
|
|
253
|
+
}
|
|
254
|
+
|
|
236
255
|
// Build OpenAI response
|
|
237
256
|
// Ensure ID has the chatcmpl- prefix that OpenAI clients expect
|
|
238
257
|
const responseId = id && id.startsWith("chatcmpl-") ? id : `chatcmpl-${Date.now()}`;
|
|
@@ -263,6 +282,13 @@ function convertAnthropicToOpenAI(anthropicResponse, model = "claude-3-5-sonnet-
|
|
|
263
282
|
openaiResponse.citations = citations;
|
|
264
283
|
}
|
|
265
284
|
|
|
285
|
+
// Add reasoning_content as a side-channel field so clients that render
|
|
286
|
+
// thinking (e.g. some jcode / OpenRouter setups) can show it without losing
|
|
287
|
+
// it from the visible content fallback above
|
|
288
|
+
if (reasoningContent && reasoningContent !== messageContent) {
|
|
289
|
+
openaiResponse.choices[0].message.reasoning_content = reasoningContent;
|
|
290
|
+
}
|
|
291
|
+
|
|
266
292
|
// Add tool_calls if present
|
|
267
293
|
if (toolCalls.length > 0) {
|
|
268
294
|
openaiResponse.choices[0].message.tool_calls = toolCalls;
|
package/src/config/index.js
CHANGED
|
@@ -920,6 +920,13 @@ var config = {
|
|
|
920
920
|
weightedScoring: true,
|
|
921
921
|
costOptimization: true,
|
|
922
922
|
agenticDetection: true,
|
|
923
|
+
// Embed an interaction block in the response body so the user can
|
|
924
|
+
// see *why* a particular tier/provider was chosen.
|
|
925
|
+
visibleInteraction: process.env.LYNKR_VISIBLE_ROUTING === 'true',
|
|
926
|
+
// Run user-supplied preflight commands before invoking the model.
|
|
927
|
+
// If all exit 0, short-circuit the request with zero LLM cost.
|
|
928
|
+
preflightEnabled: process.env.LYNKR_PREFLIGHT_ENABLED === 'true',
|
|
929
|
+
preflightTimeoutMs: Number(process.env.LYNKR_PREFLIGHT_TIMEOUT_MS) || 120000,
|
|
923
930
|
},
|
|
924
931
|
|
|
925
932
|
// Model Tier Configuration (REQUIRED)
|