lynkr 9.0.2 → 9.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. package/README.md +21 -10
  2. package/bin/cli.js +18 -1
  3. package/bin/lynkr-trajectory.js +136 -0
  4. package/bin/lynkr-usage.js +219 -0
  5. package/funding.json +110 -0
  6. package/package.json +4 -2
  7. package/public/dashboard.html +665 -0
  8. package/scripts/build-knn-index.js +130 -0
  9. package/scripts/calibrate-thresholds.js +197 -0
  10. package/scripts/compare-policies.js +67 -0
  11. package/scripts/learn-output-ratios.js +162 -0
  12. package/scripts/refresh-pricing.js +122 -0
  13. package/scripts/run-routerarena.js +26 -0
  14. package/scripts/sample-regret.js +84 -0
  15. package/scripts/train-risk-classifier.js +191 -0
  16. package/src/api/files-router.js +6 -6
  17. package/src/api/middleware/budget-enforcer.js +60 -0
  18. package/src/api/middleware/budget.js +19 -1
  19. package/src/api/middleware/load-shedding.js +17 -0
  20. package/src/api/middleware/tenant.js +21 -0
  21. package/src/api/openai-router.js +1 -1
  22. package/src/api/router.js +204 -87
  23. package/src/budget/hierarchical-budget.js +159 -0
  24. package/src/cache/semantic.js +28 -2
  25. package/src/clients/databricks.js +68 -10
  26. package/src/clients/openai-format.js +31 -5
  27. package/src/config/index.js +246 -43
  28. package/src/context/toon.js +5 -4
  29. package/src/dashboard/api.js +170 -0
  30. package/src/dashboard/router.js +13 -0
  31. package/src/headroom/client.js +3 -109
  32. package/src/headroom/index.js +0 -14
  33. package/src/memory/search.js +0 -50
  34. package/src/orchestrator/index.js +106 -11
  35. package/src/orchestrator/preflight.js +188 -0
  36. package/src/prompts/system.js +34 -6
  37. package/src/routing/bandit.js +246 -0
  38. package/src/routing/cascade.js +106 -0
  39. package/src/routing/complexity-analyzer.js +7 -15
  40. package/src/routing/confidence-scorer.js +121 -0
  41. package/src/routing/context-validator.js +71 -0
  42. package/src/routing/cost-optimizer.js +5 -2
  43. package/src/routing/deadline.js +52 -0
  44. package/src/routing/drift-monitor.js +113 -0
  45. package/src/routing/embedding-cache.js +77 -0
  46. package/src/routing/index.js +374 -4
  47. package/src/routing/interaction.js +183 -0
  48. package/src/routing/knn-router.js +206 -0
  49. package/src/routing/latency-tracker.js +113 -71
  50. package/src/routing/model-tiers.js +156 -6
  51. package/src/routing/output-ratios.js +57 -0
  52. package/src/routing/regret-estimator.js +91 -0
  53. package/src/routing/reward-pipeline.js +62 -0
  54. package/src/routing/risk-analyzer.js +194 -0
  55. package/src/routing/risk-classifier.js +130 -0
  56. package/src/routing/shadow-mode.js +77 -0
  57. package/src/routing/telemetry.js +7 -0
  58. package/src/routing/tenant-policy.js +96 -0
  59. package/src/routing/tokenizer.js +162 -0
  60. package/src/server.js +12 -0
  61. package/src/stores/file-store.js +42 -7
  62. package/src/tools/smart-selection.js +11 -2
  63. package/src/training/trajectory-compressor.js +266 -0
  64. package/src/usage/aggregator.js +206 -0
  65. package/src/utils/markdown-ansi.js +146 -0
package/src/api/router.js CHANGED
@@ -3,11 +3,14 @@ const { processMessage } = require("../orchestrator");
3
3
  const { getSession } = require("../sessions");
4
4
  const metrics = require("../metrics");
5
5
  const logger = require("../logger");
6
+ const config = require("../config");
6
7
  const { createRateLimiter } = require("./middleware/rate-limiter");
7
8
  const openaiRouter = require("./openai-router");
8
9
  const providersRouter = require("./providers-handler");
9
- const { getRoutingHeaders, getRoutingStats, analyzeComplexity, getModelTierSelector } = require("../routing");
10
+ const { getRoutingHeaders, getRoutingStats, analyzeComplexity, getModelTierSelector, analyzeRisk } = require("../routing");
11
+ const { buildInteractionBlock } = require("../routing/interaction");
10
12
  const { validateCwd } = require("../workspace");
13
+ const { renderText } = require("../utils/markdown-ansi");
11
14
 
12
15
  const router = express.Router();
13
16
 
@@ -15,54 +18,48 @@ const router = express.Router();
15
18
  const rateLimiter = createRateLimiter();
16
19
 
17
20
  /**
18
- * Estimate token count for messages
19
- * Uses rough approximation of ~4 characters per token
20
- * @param {Array} messages - Array of message objects with role and content
21
- * @param {string|Array} system - System prompt (string or array of content blocks)
22
- * @returns {number} Estimated input token count
21
+ * Estimate token count for messages.
22
+ *
23
+ * Phase 1.1: tiktoken-backed via routing/tokenizer (graceful fallback to chars/4
24
+ * if js-tiktoken is unavailable).
23
25
  */
24
- function estimateTokenCount(messages = [], system = null) {
25
- let totalChars = 0;
26
-
27
- // Count system prompt characters
28
- if (system) {
29
- if (typeof system === "string") {
30
- totalChars += system.length;
31
- } else if (Array.isArray(system)) {
32
- system.forEach((block) => {
33
- if (block.type === "text" && block.text) {
34
- totalChars += block.text.length;
35
- }
36
- });
37
- }
38
- }
39
-
40
- // Count message characters
41
- messages.forEach((msg) => {
42
- if (msg.content) {
43
- if (typeof msg.content === "string") {
44
- totalChars += msg.content.length;
45
- } else if (Array.isArray(msg.content)) {
46
- msg.content.forEach((block) => {
47
- if (block.type === "text" && block.text) {
48
- totalChars += block.text.length;
49
- } else if (block.type === "image" && block.source?.data) {
50
- // Images: rough estimate based on base64 length
51
- totalChars += Math.floor(block.source.data.length / 6);
52
- }
53
- });
54
- }
55
- }
56
- });
26
+ const { countMessagesTokens } = require("../routing/tokenizer");
57
27
 
58
- // Estimate tokens: ~4 characters per token
59
- return Math.ceil(totalChars / 4);
28
+ function estimateTokenCount(messages = [], system = null, model = null) {
29
+ return countMessagesTokens(messages, system, model);
60
30
  }
61
31
 
32
+ // Root health check (for HEAD / and GET /)
33
+ router.head("/", (req, res) => {
34
+ res.status(200).end();
35
+ });
36
+
37
+ router.get("/", (req, res) => {
38
+ res.json({ status: "ok", service: "lynkr" });
39
+ });
40
+
62
41
  router.get("/health", (req, res) => {
63
42
  res.json({ status: "ok" });
64
43
  });
65
44
 
45
+ // Usage report — same data as `lynkr usage` CLI, served as JSON for
46
+ // dashboards / agents / scripts that want to surface spend & savings.
47
+ router.get("/v1/usage", (req, res) => {
48
+ try {
49
+ const aggregator = require("../usage/aggregator");
50
+ const window = req.query.window || (req.query.days ? `${parseInt(req.query.days, 10)}d` : "30d");
51
+ const usage = aggregator.getUsage({
52
+ window,
53
+ flagship: req.query.flagship,
54
+ provider: req.query.provider,
55
+ model: req.query.model,
56
+ });
57
+ res.json(usage);
58
+ } catch (err) {
59
+ res.status(500).json({ error: err.message });
60
+ }
61
+ });
62
+
66
63
  // Routing stats endpoint (Phase 3: Metrics)
67
64
  router.get("/routing/stats", (req, res) => {
68
65
  const stats = getRoutingStats();
@@ -260,24 +257,70 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => {
260
257
  // Analyze complexity for routing headers (Phase 3)
261
258
  const complexity = await analyzeComplexity(req.body);
262
259
  timer.mark("analyzeComplexity");
260
+
261
+ // Risk axis runs alongside complexity. Cheap pure-string scan, no I/O.
262
+ let preRouteRisk = null;
263
+ try {
264
+ preRouteRisk = analyzeRisk(req.body);
265
+ } catch (err) {
266
+ logger.debug({ err: err.message }, '[Router] Risk analysis failed in pre-route');
267
+ }
268
+
269
+ // Pre-route tier: high-risk forces COMPLEX, otherwise tier is
270
+ // inferred from the complexity recommendation. The actual final
271
+ // tier may differ (invokeModel re-runs determineProviderSmart) —
272
+ // this is best-effort for header surfacing.
263
273
  let preRouteProvider = 'cloud';
264
- if (complexity.recommendation === 'local') {
265
- // Use tier config to determine actual provider instead of hardcoding 'ollama'
274
+ let preRouteTier = null;
275
+ let preRouteModel = null;
276
+ let preRouteMethod = 'complexity';
277
+ let preRouteReason = complexity.breakdown?.taskType?.reason || complexity.recommendation;
278
+
279
+ if (preRouteRisk?.level === 'high') {
266
280
  try {
267
281
  const selector = getModelTierSelector();
268
- const tierResult = selector.selectModel('SIMPLE', null);
282
+ const tierResult = selector.selectModel('COMPLEX', null);
269
283
  preRouteProvider = tierResult.provider;
284
+ preRouteTier = 'COMPLEX';
285
+ preRouteModel = tierResult.model;
286
+ preRouteMethod = 'risk';
287
+ preRouteReason = 'high_risk_forced_tier';
270
288
  } catch (_) {
271
- preRouteProvider = 'ollama';
289
+ // Risk-forced tier not configured; fall back to normal flow.
290
+ }
291
+ }
292
+
293
+ if (!preRouteTier) {
294
+ if (complexity.recommendation === 'local') {
295
+ try {
296
+ const selector = getModelTierSelector();
297
+ const tierResult = selector.selectModel('SIMPLE', null);
298
+ preRouteProvider = tierResult.provider;
299
+ preRouteTier = 'SIMPLE';
300
+ preRouteModel = tierResult.model;
301
+ } catch (_) {
302
+ preRouteProvider = 'ollama';
303
+ }
272
304
  }
273
305
  }
274
- const routingHeaders = getRoutingHeaders({
306
+
307
+ const preRouteDecision = {
275
308
  provider: preRouteProvider,
309
+ tier: preRouteTier,
310
+ model: preRouteModel,
311
+ method: preRouteMethod,
312
+ reason: preRouteReason,
276
313
  score: complexity.score,
277
314
  threshold: complexity.threshold,
278
- method: 'complexity',
279
- reason: complexity.breakdown?.taskType?.reason || complexity.recommendation,
280
- });
315
+ risk: preRouteRisk,
316
+ };
317
+
318
+ const routingHeaders = getRoutingHeaders(preRouteDecision);
319
+
320
+ // Build the interaction block once. It travels in headers always
321
+ // (X-Lynkr-Interaction-* derived fields) and optionally into the
322
+ // response body when LYNKR_VISIBLE_ROUTING=true.
323
+ const interaction = buildInteractionBlock(preRouteDecision);
281
324
 
282
325
  // Extract client CWD from request body or header
283
326
  const clientCwd = validateCwd(req.body?.cwd || req.headers['x-workspace-cwd']);
@@ -305,6 +348,7 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => {
305
348
  options: {
306
349
  maxSteps: req.body?.max_steps,
307
350
  maxDurationMs: req.body?.max_duration_ms,
351
+ tenantPolicy: res.locals?.tenantPolicy || null,
308
352
  },
309
353
  });
310
354
 
@@ -424,17 +468,35 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => {
424
468
  content_block: { type: "text", text: "" }
425
469
  })}\n\n`);
426
470
 
427
- // Send text in chunks
428
- const text = block.text || "";
429
- const chunkSize = 20;
430
- for (let j = 0; j < text.length; j += chunkSize) {
431
- const chunk = text.slice(j, j + chunkSize);
432
- res.write(`event: content_block_delta\n`);
433
- res.write(`data: ${JSON.stringify({
434
- type: "content_block_delta",
435
- index: i,
436
- delta: { type: "text_delta", text: chunk }
437
- })}\n\n`);
471
+ // Send text one chunk when ANSI rendering is active (splitting
472
+ // ANSI escape sequences across 20-char chunks breaks terminal output).
473
+ // Plain text falls back to line-level chunks for a trickle effect.
474
+ // Never apply ANSI rendering to HTML content (<artifact> blocks):
475
+ // ANSI codes corrupt CSS selectors like `*` and break the browser viewer.
476
+ const rawBlockText = block.text || "";
477
+ const isHtmlContent = rawBlockText.includes("<artifact") || rawBlockText.trimStart().startsWith("<");
478
+ const text = isHtmlContent ? rawBlockText : renderText(rawBlockText);
479
+ const { enabled: ansiEnabled } = require("../utils/markdown-ansi");
480
+ if (ansiEnabled && !isHtmlContent) {
481
+ if (text.length > 0) {
482
+ res.write(`event: content_block_delta\n`);
483
+ res.write(`data: ${JSON.stringify({
484
+ type: "content_block_delta",
485
+ index: i,
486
+ delta: { type: "text_delta", text }
487
+ })}\n\n`);
488
+ }
489
+ } else {
490
+ const lines = text.split("\n");
491
+ for (const line of lines) {
492
+ const lineWithNl = line + "\n";
493
+ res.write(`event: content_block_delta\n`);
494
+ res.write(`data: ${JSON.stringify({
495
+ type: "content_block_delta",
496
+ index: i,
497
+ delta: { type: "text_delta", text: lineWithNl }
498
+ })}\n\n`);
499
+ }
438
500
  }
439
501
 
440
502
  res.write(`event: content_block_stop\n`);
@@ -459,22 +521,37 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => {
459
521
  res.write(`event: content_block_stop\n`);
460
522
  res.write(`data: ${JSON.stringify({ type: "content_block_stop", index: i })}\n\n`);
461
523
  } else if (block.type === "tool_use") {
462
- res.write(`event: content_block_start\n`);
463
- res.write(`data: ${JSON.stringify({
464
- type: "content_block_start",
465
- index: i,
466
- content_block: { type: "tool_use", id: block.id, name: block.name, input: {} }
467
- })}\n\n`);
468
-
469
- res.write(`event: content_block_delta\n`);
470
- res.write(`data: ${JSON.stringify({
471
- type: "content_block_delta",
472
- index: i,
473
- delta: { type: "input_json_delta", partial_json: JSON.stringify(block.input) }
474
- })}\n\n`);
475
-
476
- res.write(`event: content_block_stop\n`);
477
- res.write(`data: ${JSON.stringify({ type: "content_block_stop", index: i })}\n\n`);
524
+ // Original request had no tools → model hallucinated a tool call.
525
+ // Extract file content from write-style tools and wrap it in an
526
+ // <artifact> block so open-design routes it to the Design panel.
527
+ const toolName = (block.name || "").toLowerCase();
528
+ const writeTools = new Set(["write", "create_file", "write_file", "str_replace_editor"]);
529
+ if (writeTools.has(toolName)) {
530
+ const rawContent = block.input?.content ?? block.input?.file_content ?? block.input?.new_content ?? "";
531
+ const filePath = String(block.input?.file_path ?? block.input?.filename ?? "design.html");
532
+ const content = String(rawContent);
533
+ if (content) {
534
+ // Wrap in <artifact> so open-design's parser routes it to the file viewer.
535
+ const identifier = filePath.replace(/[^a-zA-Z0-9._-]/g, "_");
536
+ const title = filePath;
537
+ const wrapped = `<artifact identifier="${identifier}" type="text/html" title="${title}">\n${content}\n</artifact>`;
538
+ res.write(`event: content_block_start\n`);
539
+ res.write(`data: ${JSON.stringify({
540
+ type: "content_block_start",
541
+ index: i,
542
+ content_block: { type: "text", text: "" }
543
+ })}\n\n`);
544
+ res.write(`event: content_block_delta\n`);
545
+ res.write(`data: ${JSON.stringify({
546
+ type: "content_block_delta",
547
+ index: i,
548
+ delta: { type: "text_delta", text: wrapped }
549
+ })}\n\n`);
550
+ res.write(`event: content_block_stop\n`);
551
+ res.write(`data: ${JSON.stringify({ type: "content_block_stop", index: i })}\n\n`);
552
+ }
553
+ }
554
+ // Non-write tool_use in a tool-less request is silently dropped.
478
555
  }
479
556
  }
480
557
 
@@ -505,6 +582,7 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => {
505
582
  options: {
506
583
  maxSteps: req.body?.max_steps,
507
584
  maxDurationMs: req.body?.max_duration_ms,
585
+ tenantPolicy: res.locals?.tenantPolicy || null,
508
586
  },
509
587
  });
510
588
  timer.mark("processMessage");
@@ -566,16 +644,30 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => {
566
644
  content_block: { type: "text", text: "" }
567
645
  })}\n\n`);
568
646
 
569
- const text = block.text || "";
570
- const chunkSize = 20;
571
- for (let j = 0; j < text.length; j += chunkSize) {
572
- const chunk = text.slice(j, j + chunkSize);
573
- res.write(`event: content_block_delta\n`);
574
- res.write(`data: ${JSON.stringify({
575
- type: "content_block_delta",
576
- index: i,
577
- delta: { type: "text_delta", text: chunk }
578
- })}\n\n`);
647
+ const rawBlockText2 = block.text || "";
648
+ const isHtmlContent2 = rawBlockText2.includes("<artifact") || rawBlockText2.trimStart().startsWith("<");
649
+ const text = isHtmlContent2 ? rawBlockText2 : renderText(rawBlockText2);
650
+ const { enabled: ansiEnabled } = require("../utils/markdown-ansi");
651
+ if (ansiEnabled && !isHtmlContent2) {
652
+ if (text.length > 0) {
653
+ res.write(`event: content_block_delta\n`);
654
+ res.write(`data: ${JSON.stringify({
655
+ type: "content_block_delta",
656
+ index: i,
657
+ delta: { type: "text_delta", text }
658
+ })}\n\n`);
659
+ }
660
+ } else {
661
+ const lines = text.split("\n");
662
+ for (const line of lines) {
663
+ const lineWithNl = line + "\n";
664
+ res.write(`event: content_block_delta\n`);
665
+ res.write(`data: ${JSON.stringify({
666
+ type: "content_block_delta",
667
+ index: i,
668
+ delta: { type: "text_delta", text: lineWithNl }
669
+ })}\n\n`);
670
+ }
579
671
  }
580
672
 
581
673
  res.write(`event: content_block_stop\n`);
@@ -651,8 +743,33 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => {
651
743
  });
652
744
  }
653
745
 
746
+ // Inject visible interaction block into the response body when
747
+ // LYNKR_VISIBLE_ROUTING=true. We only mutate JSON bodies — and only
748
+ // when the response looks like a valid Anthropic Message — so this
749
+ // is a no-op for streamed / error / non-message responses.
750
+ let finalBody = result.body;
751
+ if (
752
+ config.routing?.visibleInteraction &&
753
+ interaction &&
754
+ result.status >= 200 && result.status < 300 &&
755
+ result.body
756
+ ) {
757
+ try {
758
+ const text = Buffer.isBuffer(result.body) ? result.body.toString('utf8') : result.body;
759
+ if (typeof text === 'string' && text.startsWith('{')) {
760
+ const parsed = JSON.parse(text);
761
+ if (parsed && typeof parsed === 'object' && parsed.type === 'message') {
762
+ parsed.lynkr_interaction = interaction;
763
+ finalBody = JSON.stringify(parsed);
764
+ }
765
+ }
766
+ } catch (err) {
767
+ logger.debug({ err: err.message }, '[Router] Skipped interaction injection (non-JSON body)');
768
+ }
769
+ }
770
+
654
771
  metrics.recordResponse(result.status);
655
- res.status(result.status).send(result.body);
772
+ res.status(result.status).send(finalBody);
656
773
  } catch (error) {
657
774
  next(error);
658
775
  }
@@ -0,0 +1,159 @@
1
+ /**
2
+ * Hierarchical budget controls (Phase 6.2).
3
+ *
4
+ * Tracks spend at four levels: virtual_key → team → customer → org.
5
+ * Each level has a ceiling; a request must pass *every* level it belongs
6
+ * to.
7
+ *
8
+ * Storage: in-process Map by default. Operations are atomic-by-design (single
9
+ * Node event loop), so no locking needed. For multi-process deployments,
10
+ * swap the storage implementation for Redis (the interface is stable; see
11
+ * RedisBudgetStore stub at the bottom of the file).
12
+ */
13
+
14
+ const fs = require('fs');
15
+ const path = require('path');
16
+ const logger = require('../logger');
17
+
18
+ const CONFIG_PATH = path.join(__dirname, '../../data/budgets.json');
19
+ const RELOAD_INTERVAL_MS = 60_000;
20
+
21
+ const LEVELS = ['virtual_key', 'team', 'customer', 'org'];
22
+
23
+ class MapBudgetStore {
24
+ constructor() {
25
+ this._spend = new Map(); // `${level}:${id}` → { spent, periodStart }
26
+ }
27
+
28
+ _key(level, id) {
29
+ return `${level}:${id}`;
30
+ }
31
+
32
+ get(level, id) {
33
+ return this._spend.get(this._key(level, id)) || { spent: 0, periodStart: Date.now() };
34
+ }
35
+
36
+ set(level, id, value) {
37
+ this._spend.set(this._key(level, id), value);
38
+ }
39
+
40
+ incr(level, id, amount) {
41
+ const current = this.get(level, id);
42
+ current.spent += amount;
43
+ this.set(level, id, current);
44
+ return current;
45
+ }
46
+
47
+ resetIfStale(level, id, periodMs) {
48
+ const current = this.get(level, id);
49
+ if (Date.now() - current.periodStart > periodMs) {
50
+ current.spent = 0;
51
+ current.periodStart = Date.now();
52
+ this.set(level, id, current);
53
+ }
54
+ return current;
55
+ }
56
+ }
57
+
58
+ let _config = null;
59
+ let _configLoadedAt = 0;
60
+ function _loadConfig() {
61
+ if (_config && Date.now() - _configLoadedAt < RELOAD_INTERVAL_MS) return _config;
62
+ try {
63
+ if (fs.existsSync(CONFIG_PATH)) {
64
+ _config = JSON.parse(fs.readFileSync(CONFIG_PATH, 'utf8'));
65
+ _configLoadedAt = Date.now();
66
+ return _config;
67
+ }
68
+ } catch (err) {
69
+ logger.debug({ err: err.message }, '[HierarchicalBudget] Config load failed');
70
+ }
71
+ _config = { defaults: { periodMs: 86400000 }, limits: {} };
72
+ _configLoadedAt = Date.now();
73
+ return _config;
74
+ }
75
+
76
+ class HierarchicalBudget {
77
+ constructor(store = new MapBudgetStore()) {
78
+ this.store = store;
79
+ }
80
+
81
+ /**
82
+ * Check whether all relevant ceilings still allow `amount` of spend.
83
+ * @param {object} context — { virtual_key, team, customer, org }
84
+ * @param {number} amount — dollars
85
+ * @returns {{ ok: boolean, exceeded?: { level, id, limit, spent } }}
86
+ */
87
+ check(context, amount) {
88
+ const config = _loadConfig();
89
+ const periodMs = config.defaults?.periodMs || 86400000;
90
+ for (const level of LEVELS) {
91
+ const id = context[level];
92
+ if (!id) continue;
93
+ const limit = config.limits?.[level]?.[id] ?? config.defaults?.[level];
94
+ if (typeof limit !== 'number') continue;
95
+ const current = this.store.resetIfStale(level, id, periodMs);
96
+ if (current.spent + amount > limit) {
97
+ return {
98
+ ok: false,
99
+ exceeded: { level, id, limit, spent: current.spent },
100
+ };
101
+ }
102
+ }
103
+ return { ok: true };
104
+ }
105
+
106
+ /**
107
+ * Record spend after a request completes. Increments all relevant levels.
108
+ */
109
+ record(context, amount) {
110
+ if (typeof amount !== 'number' || amount <= 0) return;
111
+ for (const level of LEVELS) {
112
+ const id = context[level];
113
+ if (!id) continue;
114
+ this.store.incr(level, id, amount);
115
+ }
116
+ }
117
+
118
+ /**
119
+ * Summary for the dashboard.
120
+ */
121
+ status(context) {
122
+ const config = _loadConfig();
123
+ const periodMs = config.defaults?.periodMs || 86400000;
124
+ const out = {};
125
+ for (const level of LEVELS) {
126
+ const id = context[level];
127
+ if (!id) continue;
128
+ const limit = config.limits?.[level]?.[id] ?? config.defaults?.[level];
129
+ const current = this.store.resetIfStale(level, id, periodMs);
130
+ out[level] = { id, spent: current.spent, limit, periodStart: current.periodStart };
131
+ }
132
+ return out;
133
+ }
134
+ }
135
+
136
+ let _instance = null;
137
+ function getHierarchicalBudget() {
138
+ if (!_instance) _instance = new HierarchicalBudget();
139
+ return _instance;
140
+ }
141
+
142
+ /**
143
+ * Redis backend stub. Implement this when scaling beyond a single Node
144
+ * process. The interface mirrors MapBudgetStore so HierarchicalBudget can
145
+ * use either.
146
+ */
147
+ class RedisBudgetStore {
148
+ constructor(_redisClient) {
149
+ throw new Error('RedisBudgetStore not implemented. Stub — wire your Redis client and use INCRBY with periodic TTL.');
150
+ }
151
+ }
152
+
153
+ module.exports = {
154
+ HierarchicalBudget,
155
+ MapBudgetStore,
156
+ RedisBudgetStore,
157
+ getHierarchicalBudget,
158
+ LEVELS,
159
+ };
@@ -14,16 +14,29 @@ const logger = require('../logger');
14
14
  const config = require('../config');
15
15
 
16
16
  // Default configuration (can be overridden via config.semanticCache)
17
+ //
18
+ // Phase 2.1 of the routing overhaul: defaults aligned with the plan
19
+ // (10K entries, 0.95 threshold matches research on GPT Semantic Cache).
20
+ // Short-TTL keywords trigger a reduced TTL rather than blocking caching.
17
21
  function getDefaultConfig() {
18
22
  const configOverrides = config.semanticCache || {};
19
23
  return {
20
24
  enabled: configOverrides.enabled ?? true,
21
25
  similarityThreshold: configOverrides.similarityThreshold ?? 0.92,
22
- maxEntries: configOverrides.maxEntries ?? 500,
26
+ maxEntries: configOverrides.maxEntries ?? 10000,
23
27
  ttlMs: configOverrides.ttlMs ?? 3600000, // 1 hour
28
+ shortTtlMs: configOverrides.shortTtlMs ?? 300000, // 5 min for time-sensitive queries
29
+ shortTtlPatterns: [
30
+ /\bnow\b/i,
31
+ /\btoday\b/i,
32
+ /\bcurrent\b/i,
33
+ /\blatest\b/i,
34
+ /\brecent\b/i,
35
+ /\bjust\s+now\b/i,
36
+ ],
24
37
  minPromptLength: 20, // Don't cache very short prompts
25
38
  maxPromptLength: 5000, // Don't cache very long prompts (too specific)
26
- excludePatterns: [ // Patterns to exclude from caching
39
+ excludePatterns: [ // Patterns to fully exclude from caching
27
40
  /current time/i,
28
41
  /today's date/i,
29
42
  /right now/i,
@@ -33,6 +46,19 @@ function getDefaultConfig() {
33
46
  };
34
47
  }
35
48
 
49
+ /**
50
+ * Phase 2.1 helper: determine the TTL to apply to a given prompt.
51
+ * Time-sensitive keywords ("now", "today", "current") get a short TTL so
52
+ * stale answers don't persist for an hour.
53
+ */
54
+ function _ttlForPrompt(promptText, cfg) {
55
+ if (!promptText || !Array.isArray(cfg.shortTtlPatterns)) return cfg.ttlMs;
56
+ for (const re of cfg.shortTtlPatterns) {
57
+ if (re.test(promptText)) return cfg.shortTtlMs;
58
+ }
59
+ return cfg.ttlMs;
60
+ }
61
+
36
62
  class SemanticCache {
37
63
  constructor(options = {}) {
38
64
  this.config = { ...getDefaultConfig(), ...options };