lynkr 9.0.2 → 9.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +21 -10
- package/bin/cli.js +18 -1
- package/bin/lynkr-trajectory.js +136 -0
- package/bin/lynkr-usage.js +219 -0
- package/funding.json +110 -0
- package/package.json +4 -2
- package/public/dashboard.html +665 -0
- package/scripts/build-knn-index.js +130 -0
- package/scripts/calibrate-thresholds.js +197 -0
- package/scripts/compare-policies.js +67 -0
- package/scripts/learn-output-ratios.js +162 -0
- package/scripts/refresh-pricing.js +122 -0
- package/scripts/run-routerarena.js +26 -0
- package/scripts/sample-regret.js +84 -0
- package/scripts/train-risk-classifier.js +191 -0
- package/src/api/files-router.js +6 -6
- package/src/api/middleware/budget-enforcer.js +60 -0
- package/src/api/middleware/budget.js +19 -1
- package/src/api/middleware/load-shedding.js +17 -0
- package/src/api/middleware/tenant.js +21 -0
- package/src/api/openai-router.js +1 -1
- package/src/api/router.js +204 -87
- package/src/budget/hierarchical-budget.js +159 -0
- package/src/cache/semantic.js +28 -2
- package/src/clients/databricks.js +68 -10
- package/src/clients/openai-format.js +31 -5
- package/src/config/index.js +246 -43
- package/src/context/toon.js +5 -4
- package/src/dashboard/api.js +170 -0
- package/src/dashboard/router.js +13 -0
- package/src/headroom/client.js +3 -109
- package/src/headroom/index.js +0 -14
- package/src/memory/search.js +0 -50
- package/src/orchestrator/index.js +106 -11
- package/src/orchestrator/preflight.js +188 -0
- package/src/prompts/system.js +34 -6
- package/src/routing/bandit.js +246 -0
- package/src/routing/cascade.js +106 -0
- package/src/routing/complexity-analyzer.js +7 -15
- package/src/routing/confidence-scorer.js +121 -0
- package/src/routing/context-validator.js +71 -0
- package/src/routing/cost-optimizer.js +5 -2
- package/src/routing/deadline.js +52 -0
- package/src/routing/drift-monitor.js +113 -0
- package/src/routing/embedding-cache.js +77 -0
- package/src/routing/index.js +374 -4
- package/src/routing/interaction.js +183 -0
- package/src/routing/knn-router.js +206 -0
- package/src/routing/latency-tracker.js +113 -71
- package/src/routing/model-tiers.js +156 -6
- package/src/routing/output-ratios.js +57 -0
- package/src/routing/regret-estimator.js +91 -0
- package/src/routing/reward-pipeline.js +62 -0
- package/src/routing/risk-analyzer.js +194 -0
- package/src/routing/risk-classifier.js +130 -0
- package/src/routing/shadow-mode.js +77 -0
- package/src/routing/telemetry.js +7 -0
- package/src/routing/tenant-policy.js +96 -0
- package/src/routing/tokenizer.js +162 -0
- package/src/server.js +12 -0
- package/src/stores/file-store.js +42 -7
- package/src/tools/smart-selection.js +11 -2
- package/src/training/trajectory-compressor.js +266 -0
- package/src/usage/aggregator.js +206 -0
- package/src/utils/markdown-ansi.js +146 -0
package/src/api/router.js
CHANGED
|
@@ -3,11 +3,14 @@ const { processMessage } = require("../orchestrator");
|
|
|
3
3
|
const { getSession } = require("../sessions");
|
|
4
4
|
const metrics = require("../metrics");
|
|
5
5
|
const logger = require("../logger");
|
|
6
|
+
const config = require("../config");
|
|
6
7
|
const { createRateLimiter } = require("./middleware/rate-limiter");
|
|
7
8
|
const openaiRouter = require("./openai-router");
|
|
8
9
|
const providersRouter = require("./providers-handler");
|
|
9
|
-
const { getRoutingHeaders, getRoutingStats, analyzeComplexity, getModelTierSelector } = require("../routing");
|
|
10
|
+
const { getRoutingHeaders, getRoutingStats, analyzeComplexity, getModelTierSelector, analyzeRisk } = require("../routing");
|
|
11
|
+
const { buildInteractionBlock } = require("../routing/interaction");
|
|
10
12
|
const { validateCwd } = require("../workspace");
|
|
13
|
+
const { renderText } = require("../utils/markdown-ansi");
|
|
11
14
|
|
|
12
15
|
const router = express.Router();
|
|
13
16
|
|
|
@@ -15,54 +18,48 @@ const router = express.Router();
|
|
|
15
18
|
const rateLimiter = createRateLimiter();
|
|
16
19
|
|
|
17
20
|
/**
|
|
18
|
-
* Estimate token count for messages
|
|
19
|
-
*
|
|
20
|
-
*
|
|
21
|
-
*
|
|
22
|
-
* @returns {number} Estimated input token count
|
|
21
|
+
* Estimate token count for messages.
|
|
22
|
+
*
|
|
23
|
+
* Phase 1.1: tiktoken-backed via routing/tokenizer (graceful fallback to chars/4
|
|
24
|
+
* if js-tiktoken is unavailable).
|
|
23
25
|
*/
|
|
24
|
-
|
|
25
|
-
let totalChars = 0;
|
|
26
|
-
|
|
27
|
-
// Count system prompt characters
|
|
28
|
-
if (system) {
|
|
29
|
-
if (typeof system === "string") {
|
|
30
|
-
totalChars += system.length;
|
|
31
|
-
} else if (Array.isArray(system)) {
|
|
32
|
-
system.forEach((block) => {
|
|
33
|
-
if (block.type === "text" && block.text) {
|
|
34
|
-
totalChars += block.text.length;
|
|
35
|
-
}
|
|
36
|
-
});
|
|
37
|
-
}
|
|
38
|
-
}
|
|
39
|
-
|
|
40
|
-
// Count message characters
|
|
41
|
-
messages.forEach((msg) => {
|
|
42
|
-
if (msg.content) {
|
|
43
|
-
if (typeof msg.content === "string") {
|
|
44
|
-
totalChars += msg.content.length;
|
|
45
|
-
} else if (Array.isArray(msg.content)) {
|
|
46
|
-
msg.content.forEach((block) => {
|
|
47
|
-
if (block.type === "text" && block.text) {
|
|
48
|
-
totalChars += block.text.length;
|
|
49
|
-
} else if (block.type === "image" && block.source?.data) {
|
|
50
|
-
// Images: rough estimate based on base64 length
|
|
51
|
-
totalChars += Math.floor(block.source.data.length / 6);
|
|
52
|
-
}
|
|
53
|
-
});
|
|
54
|
-
}
|
|
55
|
-
}
|
|
56
|
-
});
|
|
26
|
+
const { countMessagesTokens } = require("../routing/tokenizer");
|
|
57
27
|
|
|
58
|
-
|
|
59
|
-
return
|
|
28
|
+
function estimateTokenCount(messages = [], system = null, model = null) {
|
|
29
|
+
return countMessagesTokens(messages, system, model);
|
|
60
30
|
}
|
|
61
31
|
|
|
32
|
+
// Root health check (for HEAD / and GET /)
|
|
33
|
+
router.head("/", (req, res) => {
|
|
34
|
+
res.status(200).end();
|
|
35
|
+
});
|
|
36
|
+
|
|
37
|
+
router.get("/", (req, res) => {
|
|
38
|
+
res.json({ status: "ok", service: "lynkr" });
|
|
39
|
+
});
|
|
40
|
+
|
|
62
41
|
router.get("/health", (req, res) => {
|
|
63
42
|
res.json({ status: "ok" });
|
|
64
43
|
});
|
|
65
44
|
|
|
45
|
+
// Usage report — same data as `lynkr usage` CLI, served as JSON for
|
|
46
|
+
// dashboards / agents / scripts that want to surface spend & savings.
|
|
47
|
+
router.get("/v1/usage", (req, res) => {
|
|
48
|
+
try {
|
|
49
|
+
const aggregator = require("../usage/aggregator");
|
|
50
|
+
const window = req.query.window || (req.query.days ? `${parseInt(req.query.days, 10)}d` : "30d");
|
|
51
|
+
const usage = aggregator.getUsage({
|
|
52
|
+
window,
|
|
53
|
+
flagship: req.query.flagship,
|
|
54
|
+
provider: req.query.provider,
|
|
55
|
+
model: req.query.model,
|
|
56
|
+
});
|
|
57
|
+
res.json(usage);
|
|
58
|
+
} catch (err) {
|
|
59
|
+
res.status(500).json({ error: err.message });
|
|
60
|
+
}
|
|
61
|
+
});
|
|
62
|
+
|
|
66
63
|
// Routing stats endpoint (Phase 3: Metrics)
|
|
67
64
|
router.get("/routing/stats", (req, res) => {
|
|
68
65
|
const stats = getRoutingStats();
|
|
@@ -260,24 +257,70 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => {
|
|
|
260
257
|
// Analyze complexity for routing headers (Phase 3)
|
|
261
258
|
const complexity = await analyzeComplexity(req.body);
|
|
262
259
|
timer.mark("analyzeComplexity");
|
|
260
|
+
|
|
261
|
+
// Risk axis runs alongside complexity. Cheap pure-string scan, no I/O.
|
|
262
|
+
let preRouteRisk = null;
|
|
263
|
+
try {
|
|
264
|
+
preRouteRisk = analyzeRisk(req.body);
|
|
265
|
+
} catch (err) {
|
|
266
|
+
logger.debug({ err: err.message }, '[Router] Risk analysis failed in pre-route');
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
// Pre-route tier: high-risk forces COMPLEX, otherwise tier is
|
|
270
|
+
// inferred from the complexity recommendation. The actual final
|
|
271
|
+
// tier may differ (invokeModel re-runs determineProviderSmart) —
|
|
272
|
+
// this is best-effort for header surfacing.
|
|
263
273
|
let preRouteProvider = 'cloud';
|
|
264
|
-
|
|
265
|
-
|
|
274
|
+
let preRouteTier = null;
|
|
275
|
+
let preRouteModel = null;
|
|
276
|
+
let preRouteMethod = 'complexity';
|
|
277
|
+
let preRouteReason = complexity.breakdown?.taskType?.reason || complexity.recommendation;
|
|
278
|
+
|
|
279
|
+
if (preRouteRisk?.level === 'high') {
|
|
266
280
|
try {
|
|
267
281
|
const selector = getModelTierSelector();
|
|
268
|
-
const tierResult = selector.selectModel('
|
|
282
|
+
const tierResult = selector.selectModel('COMPLEX', null);
|
|
269
283
|
preRouteProvider = tierResult.provider;
|
|
284
|
+
preRouteTier = 'COMPLEX';
|
|
285
|
+
preRouteModel = tierResult.model;
|
|
286
|
+
preRouteMethod = 'risk';
|
|
287
|
+
preRouteReason = 'high_risk_forced_tier';
|
|
270
288
|
} catch (_) {
|
|
271
|
-
|
|
289
|
+
// Risk-forced tier not configured; fall back to normal flow.
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
if (!preRouteTier) {
|
|
294
|
+
if (complexity.recommendation === 'local') {
|
|
295
|
+
try {
|
|
296
|
+
const selector = getModelTierSelector();
|
|
297
|
+
const tierResult = selector.selectModel('SIMPLE', null);
|
|
298
|
+
preRouteProvider = tierResult.provider;
|
|
299
|
+
preRouteTier = 'SIMPLE';
|
|
300
|
+
preRouteModel = tierResult.model;
|
|
301
|
+
} catch (_) {
|
|
302
|
+
preRouteProvider = 'ollama';
|
|
303
|
+
}
|
|
272
304
|
}
|
|
273
305
|
}
|
|
274
|
-
|
|
306
|
+
|
|
307
|
+
const preRouteDecision = {
|
|
275
308
|
provider: preRouteProvider,
|
|
309
|
+
tier: preRouteTier,
|
|
310
|
+
model: preRouteModel,
|
|
311
|
+
method: preRouteMethod,
|
|
312
|
+
reason: preRouteReason,
|
|
276
313
|
score: complexity.score,
|
|
277
314
|
threshold: complexity.threshold,
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
315
|
+
risk: preRouteRisk,
|
|
316
|
+
};
|
|
317
|
+
|
|
318
|
+
const routingHeaders = getRoutingHeaders(preRouteDecision);
|
|
319
|
+
|
|
320
|
+
// Build the interaction block once. It travels in headers always
|
|
321
|
+
// (X-Lynkr-Interaction-* derived fields) and optionally into the
|
|
322
|
+
// response body when LYNKR_VISIBLE_ROUTING=true.
|
|
323
|
+
const interaction = buildInteractionBlock(preRouteDecision);
|
|
281
324
|
|
|
282
325
|
// Extract client CWD from request body or header
|
|
283
326
|
const clientCwd = validateCwd(req.body?.cwd || req.headers['x-workspace-cwd']);
|
|
@@ -305,6 +348,7 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => {
|
|
|
305
348
|
options: {
|
|
306
349
|
maxSteps: req.body?.max_steps,
|
|
307
350
|
maxDurationMs: req.body?.max_duration_ms,
|
|
351
|
+
tenantPolicy: res.locals?.tenantPolicy || null,
|
|
308
352
|
},
|
|
309
353
|
});
|
|
310
354
|
|
|
@@ -424,17 +468,35 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => {
|
|
|
424
468
|
content_block: { type: "text", text: "" }
|
|
425
469
|
})}\n\n`);
|
|
426
470
|
|
|
427
|
-
// Send text
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
471
|
+
// Send text — one chunk when ANSI rendering is active (splitting
|
|
472
|
+
// ANSI escape sequences across 20-char chunks breaks terminal output).
|
|
473
|
+
// Plain text falls back to line-level chunks for a trickle effect.
|
|
474
|
+
// Never apply ANSI rendering to HTML content (<artifact> blocks):
|
|
475
|
+
// ANSI codes corrupt CSS selectors like `*` and break the browser viewer.
|
|
476
|
+
const rawBlockText = block.text || "";
|
|
477
|
+
const isHtmlContent = rawBlockText.includes("<artifact") || rawBlockText.trimStart().startsWith("<");
|
|
478
|
+
const text = isHtmlContent ? rawBlockText : renderText(rawBlockText);
|
|
479
|
+
const { enabled: ansiEnabled } = require("../utils/markdown-ansi");
|
|
480
|
+
if (ansiEnabled && !isHtmlContent) {
|
|
481
|
+
if (text.length > 0) {
|
|
482
|
+
res.write(`event: content_block_delta\n`);
|
|
483
|
+
res.write(`data: ${JSON.stringify({
|
|
484
|
+
type: "content_block_delta",
|
|
485
|
+
index: i,
|
|
486
|
+
delta: { type: "text_delta", text }
|
|
487
|
+
})}\n\n`);
|
|
488
|
+
}
|
|
489
|
+
} else {
|
|
490
|
+
const lines = text.split("\n");
|
|
491
|
+
for (const line of lines) {
|
|
492
|
+
const lineWithNl = line + "\n";
|
|
493
|
+
res.write(`event: content_block_delta\n`);
|
|
494
|
+
res.write(`data: ${JSON.stringify({
|
|
495
|
+
type: "content_block_delta",
|
|
496
|
+
index: i,
|
|
497
|
+
delta: { type: "text_delta", text: lineWithNl }
|
|
498
|
+
})}\n\n`);
|
|
499
|
+
}
|
|
438
500
|
}
|
|
439
501
|
|
|
440
502
|
res.write(`event: content_block_stop\n`);
|
|
@@ -459,22 +521,37 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => {
|
|
|
459
521
|
res.write(`event: content_block_stop\n`);
|
|
460
522
|
res.write(`data: ${JSON.stringify({ type: "content_block_stop", index: i })}\n\n`);
|
|
461
523
|
} else if (block.type === "tool_use") {
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
524
|
+
// Original request had no tools → model hallucinated a tool call.
|
|
525
|
+
// Extract file content from write-style tools and wrap it in an
|
|
526
|
+
// <artifact> block so open-design routes it to the Design panel.
|
|
527
|
+
const toolName = (block.name || "").toLowerCase();
|
|
528
|
+
const writeTools = new Set(["write", "create_file", "write_file", "str_replace_editor"]);
|
|
529
|
+
if (writeTools.has(toolName)) {
|
|
530
|
+
const rawContent = block.input?.content ?? block.input?.file_content ?? block.input?.new_content ?? "";
|
|
531
|
+
const filePath = String(block.input?.file_path ?? block.input?.filename ?? "design.html");
|
|
532
|
+
const content = String(rawContent);
|
|
533
|
+
if (content) {
|
|
534
|
+
// Wrap in <artifact> so open-design's parser routes it to the file viewer.
|
|
535
|
+
const identifier = filePath.replace(/[^a-zA-Z0-9._-]/g, "_");
|
|
536
|
+
const title = filePath;
|
|
537
|
+
const wrapped = `<artifact identifier="${identifier}" type="text/html" title="${title}">\n${content}\n</artifact>`;
|
|
538
|
+
res.write(`event: content_block_start\n`);
|
|
539
|
+
res.write(`data: ${JSON.stringify({
|
|
540
|
+
type: "content_block_start",
|
|
541
|
+
index: i,
|
|
542
|
+
content_block: { type: "text", text: "" }
|
|
543
|
+
})}\n\n`);
|
|
544
|
+
res.write(`event: content_block_delta\n`);
|
|
545
|
+
res.write(`data: ${JSON.stringify({
|
|
546
|
+
type: "content_block_delta",
|
|
547
|
+
index: i,
|
|
548
|
+
delta: { type: "text_delta", text: wrapped }
|
|
549
|
+
})}\n\n`);
|
|
550
|
+
res.write(`event: content_block_stop\n`);
|
|
551
|
+
res.write(`data: ${JSON.stringify({ type: "content_block_stop", index: i })}\n\n`);
|
|
552
|
+
}
|
|
553
|
+
}
|
|
554
|
+
// Non-write tool_use in a tool-less request is silently dropped.
|
|
478
555
|
}
|
|
479
556
|
}
|
|
480
557
|
|
|
@@ -505,6 +582,7 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => {
|
|
|
505
582
|
options: {
|
|
506
583
|
maxSteps: req.body?.max_steps,
|
|
507
584
|
maxDurationMs: req.body?.max_duration_ms,
|
|
585
|
+
tenantPolicy: res.locals?.tenantPolicy || null,
|
|
508
586
|
},
|
|
509
587
|
});
|
|
510
588
|
timer.mark("processMessage");
|
|
@@ -566,16 +644,30 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => {
|
|
|
566
644
|
content_block: { type: "text", text: "" }
|
|
567
645
|
})}\n\n`);
|
|
568
646
|
|
|
569
|
-
const
|
|
570
|
-
const
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
647
|
+
const rawBlockText2 = block.text || "";
|
|
648
|
+
const isHtmlContent2 = rawBlockText2.includes("<artifact") || rawBlockText2.trimStart().startsWith("<");
|
|
649
|
+
const text = isHtmlContent2 ? rawBlockText2 : renderText(rawBlockText2);
|
|
650
|
+
const { enabled: ansiEnabled } = require("../utils/markdown-ansi");
|
|
651
|
+
if (ansiEnabled && !isHtmlContent2) {
|
|
652
|
+
if (text.length > 0) {
|
|
653
|
+
res.write(`event: content_block_delta\n`);
|
|
654
|
+
res.write(`data: ${JSON.stringify({
|
|
655
|
+
type: "content_block_delta",
|
|
656
|
+
index: i,
|
|
657
|
+
delta: { type: "text_delta", text }
|
|
658
|
+
})}\n\n`);
|
|
659
|
+
}
|
|
660
|
+
} else {
|
|
661
|
+
const lines = text.split("\n");
|
|
662
|
+
for (const line of lines) {
|
|
663
|
+
const lineWithNl = line + "\n";
|
|
664
|
+
res.write(`event: content_block_delta\n`);
|
|
665
|
+
res.write(`data: ${JSON.stringify({
|
|
666
|
+
type: "content_block_delta",
|
|
667
|
+
index: i,
|
|
668
|
+
delta: { type: "text_delta", text: lineWithNl }
|
|
669
|
+
})}\n\n`);
|
|
670
|
+
}
|
|
579
671
|
}
|
|
580
672
|
|
|
581
673
|
res.write(`event: content_block_stop\n`);
|
|
@@ -651,8 +743,33 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => {
|
|
|
651
743
|
});
|
|
652
744
|
}
|
|
653
745
|
|
|
746
|
+
// Inject visible interaction block into the response body when
|
|
747
|
+
// LYNKR_VISIBLE_ROUTING=true. We only mutate JSON bodies — and only
|
|
748
|
+
// when the response looks like a valid Anthropic Message — so this
|
|
749
|
+
// is a no-op for streamed / error / non-message responses.
|
|
750
|
+
let finalBody = result.body;
|
|
751
|
+
if (
|
|
752
|
+
config.routing?.visibleInteraction &&
|
|
753
|
+
interaction &&
|
|
754
|
+
result.status >= 200 && result.status < 300 &&
|
|
755
|
+
result.body
|
|
756
|
+
) {
|
|
757
|
+
try {
|
|
758
|
+
const text = Buffer.isBuffer(result.body) ? result.body.toString('utf8') : result.body;
|
|
759
|
+
if (typeof text === 'string' && text.startsWith('{')) {
|
|
760
|
+
const parsed = JSON.parse(text);
|
|
761
|
+
if (parsed && typeof parsed === 'object' && parsed.type === 'message') {
|
|
762
|
+
parsed.lynkr_interaction = interaction;
|
|
763
|
+
finalBody = JSON.stringify(parsed);
|
|
764
|
+
}
|
|
765
|
+
}
|
|
766
|
+
} catch (err) {
|
|
767
|
+
logger.debug({ err: err.message }, '[Router] Skipped interaction injection (non-JSON body)');
|
|
768
|
+
}
|
|
769
|
+
}
|
|
770
|
+
|
|
654
771
|
metrics.recordResponse(result.status);
|
|
655
|
-
res.status(result.status).send(
|
|
772
|
+
res.status(result.status).send(finalBody);
|
|
656
773
|
} catch (error) {
|
|
657
774
|
next(error);
|
|
658
775
|
}
|
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Hierarchical budget controls (Phase 6.2).
|
|
3
|
+
*
|
|
4
|
+
* Tracks spend at four levels: virtual_key → team → customer → org.
|
|
5
|
+
* Each level has a ceiling; a request must pass *every* level it belongs
|
|
6
|
+
* to.
|
|
7
|
+
*
|
|
8
|
+
* Storage: in-process Map by default. Operations are atomic-by-design (single
|
|
9
|
+
* Node event loop), so no locking needed. For multi-process deployments,
|
|
10
|
+
* swap the storage implementation for Redis (the interface is stable; see
|
|
11
|
+
* RedisBudgetStore stub at the bottom of the file).
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
const fs = require('fs');
|
|
15
|
+
const path = require('path');
|
|
16
|
+
const logger = require('../logger');
|
|
17
|
+
|
|
18
|
+
const CONFIG_PATH = path.join(__dirname, '../../data/budgets.json');
|
|
19
|
+
const RELOAD_INTERVAL_MS = 60_000;
|
|
20
|
+
|
|
21
|
+
const LEVELS = ['virtual_key', 'team', 'customer', 'org'];
|
|
22
|
+
|
|
23
|
+
class MapBudgetStore {
|
|
24
|
+
constructor() {
|
|
25
|
+
this._spend = new Map(); // `${level}:${id}` → { spent, periodStart }
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
_key(level, id) {
|
|
29
|
+
return `${level}:${id}`;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
get(level, id) {
|
|
33
|
+
return this._spend.get(this._key(level, id)) || { spent: 0, periodStart: Date.now() };
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
set(level, id, value) {
|
|
37
|
+
this._spend.set(this._key(level, id), value);
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
incr(level, id, amount) {
|
|
41
|
+
const current = this.get(level, id);
|
|
42
|
+
current.spent += amount;
|
|
43
|
+
this.set(level, id, current);
|
|
44
|
+
return current;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
resetIfStale(level, id, periodMs) {
|
|
48
|
+
const current = this.get(level, id);
|
|
49
|
+
if (Date.now() - current.periodStart > periodMs) {
|
|
50
|
+
current.spent = 0;
|
|
51
|
+
current.periodStart = Date.now();
|
|
52
|
+
this.set(level, id, current);
|
|
53
|
+
}
|
|
54
|
+
return current;
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
let _config = null;
|
|
59
|
+
let _configLoadedAt = 0;
|
|
60
|
+
function _loadConfig() {
|
|
61
|
+
if (_config && Date.now() - _configLoadedAt < RELOAD_INTERVAL_MS) return _config;
|
|
62
|
+
try {
|
|
63
|
+
if (fs.existsSync(CONFIG_PATH)) {
|
|
64
|
+
_config = JSON.parse(fs.readFileSync(CONFIG_PATH, 'utf8'));
|
|
65
|
+
_configLoadedAt = Date.now();
|
|
66
|
+
return _config;
|
|
67
|
+
}
|
|
68
|
+
} catch (err) {
|
|
69
|
+
logger.debug({ err: err.message }, '[HierarchicalBudget] Config load failed');
|
|
70
|
+
}
|
|
71
|
+
_config = { defaults: { periodMs: 86400000 }, limits: {} };
|
|
72
|
+
_configLoadedAt = Date.now();
|
|
73
|
+
return _config;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
class HierarchicalBudget {
|
|
77
|
+
constructor(store = new MapBudgetStore()) {
|
|
78
|
+
this.store = store;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
/**
|
|
82
|
+
* Check whether all relevant ceilings still allow `amount` of spend.
|
|
83
|
+
* @param {object} context — { virtual_key, team, customer, org }
|
|
84
|
+
* @param {number} amount — dollars
|
|
85
|
+
* @returns {{ ok: boolean, exceeded?: { level, id, limit, spent } }}
|
|
86
|
+
*/
|
|
87
|
+
check(context, amount) {
|
|
88
|
+
const config = _loadConfig();
|
|
89
|
+
const periodMs = config.defaults?.periodMs || 86400000;
|
|
90
|
+
for (const level of LEVELS) {
|
|
91
|
+
const id = context[level];
|
|
92
|
+
if (!id) continue;
|
|
93
|
+
const limit = config.limits?.[level]?.[id] ?? config.defaults?.[level];
|
|
94
|
+
if (typeof limit !== 'number') continue;
|
|
95
|
+
const current = this.store.resetIfStale(level, id, periodMs);
|
|
96
|
+
if (current.spent + amount > limit) {
|
|
97
|
+
return {
|
|
98
|
+
ok: false,
|
|
99
|
+
exceeded: { level, id, limit, spent: current.spent },
|
|
100
|
+
};
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
return { ok: true };
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
/**
|
|
107
|
+
* Record spend after a request completes. Increments all relevant levels.
|
|
108
|
+
*/
|
|
109
|
+
record(context, amount) {
|
|
110
|
+
if (typeof amount !== 'number' || amount <= 0) return;
|
|
111
|
+
for (const level of LEVELS) {
|
|
112
|
+
const id = context[level];
|
|
113
|
+
if (!id) continue;
|
|
114
|
+
this.store.incr(level, id, amount);
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
/**
|
|
119
|
+
* Summary for the dashboard.
|
|
120
|
+
*/
|
|
121
|
+
status(context) {
|
|
122
|
+
const config = _loadConfig();
|
|
123
|
+
const periodMs = config.defaults?.periodMs || 86400000;
|
|
124
|
+
const out = {};
|
|
125
|
+
for (const level of LEVELS) {
|
|
126
|
+
const id = context[level];
|
|
127
|
+
if (!id) continue;
|
|
128
|
+
const limit = config.limits?.[level]?.[id] ?? config.defaults?.[level];
|
|
129
|
+
const current = this.store.resetIfStale(level, id, periodMs);
|
|
130
|
+
out[level] = { id, spent: current.spent, limit, periodStart: current.periodStart };
|
|
131
|
+
}
|
|
132
|
+
return out;
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
let _instance = null;
|
|
137
|
+
function getHierarchicalBudget() {
|
|
138
|
+
if (!_instance) _instance = new HierarchicalBudget();
|
|
139
|
+
return _instance;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
/**
|
|
143
|
+
* Redis backend stub. Implement this when scaling beyond a single Node
|
|
144
|
+
* process. The interface mirrors MapBudgetStore so HierarchicalBudget can
|
|
145
|
+
* use either.
|
|
146
|
+
*/
|
|
147
|
+
class RedisBudgetStore {
|
|
148
|
+
constructor(_redisClient) {
|
|
149
|
+
throw new Error('RedisBudgetStore not implemented. Stub — wire your Redis client and use INCRBY with periodic TTL.');
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
module.exports = {
|
|
154
|
+
HierarchicalBudget,
|
|
155
|
+
MapBudgetStore,
|
|
156
|
+
RedisBudgetStore,
|
|
157
|
+
getHierarchicalBudget,
|
|
158
|
+
LEVELS,
|
|
159
|
+
};
|
package/src/cache/semantic.js
CHANGED
|
@@ -14,16 +14,29 @@ const logger = require('../logger');
|
|
|
14
14
|
const config = require('../config');
|
|
15
15
|
|
|
16
16
|
// Default configuration (can be overridden via config.semanticCache)
|
|
17
|
+
//
|
|
18
|
+
// Phase 2.1 of the routing overhaul: defaults aligned with the plan
|
|
19
|
+
// (10K entries, 0.95 threshold matches research on GPT Semantic Cache).
|
|
20
|
+
// Short-TTL keywords trigger a reduced TTL rather than blocking caching.
|
|
17
21
|
function getDefaultConfig() {
|
|
18
22
|
const configOverrides = config.semanticCache || {};
|
|
19
23
|
return {
|
|
20
24
|
enabled: configOverrides.enabled ?? true,
|
|
21
25
|
similarityThreshold: configOverrides.similarityThreshold ?? 0.92,
|
|
22
|
-
maxEntries: configOverrides.maxEntries ??
|
|
26
|
+
maxEntries: configOverrides.maxEntries ?? 10000,
|
|
23
27
|
ttlMs: configOverrides.ttlMs ?? 3600000, // 1 hour
|
|
28
|
+
shortTtlMs: configOverrides.shortTtlMs ?? 300000, // 5 min for time-sensitive queries
|
|
29
|
+
shortTtlPatterns: [
|
|
30
|
+
/\bnow\b/i,
|
|
31
|
+
/\btoday\b/i,
|
|
32
|
+
/\bcurrent\b/i,
|
|
33
|
+
/\blatest\b/i,
|
|
34
|
+
/\brecent\b/i,
|
|
35
|
+
/\bjust\s+now\b/i,
|
|
36
|
+
],
|
|
24
37
|
minPromptLength: 20, // Don't cache very short prompts
|
|
25
38
|
maxPromptLength: 5000, // Don't cache very long prompts (too specific)
|
|
26
|
-
excludePatterns: [ // Patterns to exclude from caching
|
|
39
|
+
excludePatterns: [ // Patterns to fully exclude from caching
|
|
27
40
|
/current time/i,
|
|
28
41
|
/today's date/i,
|
|
29
42
|
/right now/i,
|
|
@@ -33,6 +46,19 @@ function getDefaultConfig() {
|
|
|
33
46
|
};
|
|
34
47
|
}
|
|
35
48
|
|
|
49
|
+
/**
|
|
50
|
+
* Phase 2.1 helper: determine the TTL to apply to a given prompt.
|
|
51
|
+
* Time-sensitive keywords ("now", "today", "current") get a short TTL so
|
|
52
|
+
* stale answers don't persist for an hour.
|
|
53
|
+
*/
|
|
54
|
+
function _ttlForPrompt(promptText, cfg) {
|
|
55
|
+
if (!promptText || !Array.isArray(cfg.shortTtlPatterns)) return cfg.ttlMs;
|
|
56
|
+
for (const re of cfg.shortTtlPatterns) {
|
|
57
|
+
if (re.test(promptText)) return cfg.shortTtlMs;
|
|
58
|
+
}
|
|
59
|
+
return cfg.ttlMs;
|
|
60
|
+
}
|
|
61
|
+
|
|
36
62
|
class SemanticCache {
|
|
37
63
|
constructor(options = {}) {
|
|
38
64
|
this.config = { ...getDefaultConfig(), ...options };
|