npm - lynkr - Versions diffs - 9.0.2 → 9.1.3 - Mend

lynkr 9.0.2 → 9.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (65) hide show

package/README.md +21 -10
package/bin/cli.js +18 -1
package/bin/lynkr-trajectory.js +136 -0
package/bin/lynkr-usage.js +219 -0
package/funding.json +110 -0
package/package.json +4 -2
package/public/dashboard.html +665 -0
package/scripts/build-knn-index.js +130 -0
package/scripts/calibrate-thresholds.js +197 -0
package/scripts/compare-policies.js +67 -0
package/scripts/learn-output-ratios.js +162 -0
package/scripts/refresh-pricing.js +122 -0
package/scripts/run-routerarena.js +26 -0
package/scripts/sample-regret.js +84 -0
package/scripts/train-risk-classifier.js +191 -0
package/src/api/files-router.js +6 -6
package/src/api/middleware/budget-enforcer.js +60 -0
package/src/api/middleware/budget.js +19 -1
package/src/api/middleware/load-shedding.js +17 -0
package/src/api/middleware/tenant.js +21 -0
package/src/api/openai-router.js +1 -1
package/src/api/router.js +204 -87
package/src/budget/hierarchical-budget.js +159 -0
package/src/cache/semantic.js +28 -2
package/src/clients/databricks.js +68 -10
package/src/clients/openai-format.js +31 -5
package/src/config/index.js +246 -43
package/src/context/toon.js +5 -4
package/src/dashboard/api.js +170 -0
package/src/dashboard/router.js +13 -0
package/src/headroom/client.js +3 -109
package/src/headroom/index.js +0 -14
package/src/memory/search.js +0 -50
package/src/orchestrator/index.js +106 -11
package/src/orchestrator/preflight.js +188 -0
package/src/prompts/system.js +34 -6
package/src/routing/bandit.js +246 -0
package/src/routing/cascade.js +106 -0
package/src/routing/complexity-analyzer.js +7 -15
package/src/routing/confidence-scorer.js +121 -0
package/src/routing/context-validator.js +71 -0
package/src/routing/cost-optimizer.js +5 -2
package/src/routing/deadline.js +52 -0
package/src/routing/drift-monitor.js +113 -0
package/src/routing/embedding-cache.js +77 -0
package/src/routing/index.js +374 -4
package/src/routing/interaction.js +183 -0
package/src/routing/knn-router.js +206 -0
package/src/routing/latency-tracker.js +113 -71
package/src/routing/model-tiers.js +156 -6
package/src/routing/output-ratios.js +57 -0
package/src/routing/regret-estimator.js +91 -0
package/src/routing/reward-pipeline.js +62 -0
package/src/routing/risk-analyzer.js +194 -0
package/src/routing/risk-classifier.js +130 -0
package/src/routing/shadow-mode.js +77 -0
package/src/routing/telemetry.js +7 -0
package/src/routing/tenant-policy.js +96 -0
package/src/routing/tokenizer.js +162 -0
package/src/server.js +12 -0
package/src/stores/file-store.js +42 -7
package/src/tools/smart-selection.js +11 -2
package/src/training/trajectory-compressor.js +266 -0
package/src/usage/aggregator.js +206 -0
package/src/utils/markdown-ansi.js +146 -0

package/src/api/router.js CHANGED Viewed

@@ -3,11 +3,14 @@ const { processMessage } = require("../orchestrator");
 const { getSession } = require("../sessions");
 const metrics = require("../metrics");
 const logger = require("../logger");
+const config = require("../config");
 const { createRateLimiter } = require("./middleware/rate-limiter");
 const openaiRouter = require("./openai-router");
 const providersRouter = require("./providers-handler");
-const { getRoutingHeaders, getRoutingStats, analyzeComplexity, getModelTierSelector } = require("../routing");
+const { getRoutingHeaders, getRoutingStats, analyzeComplexity, getModelTierSelector, analyzeRisk } = require("../routing");
+const { buildInteractionBlock } = require("../routing/interaction");
 const { validateCwd } = require("../workspace");
+const { renderText } = require("../utils/markdown-ansi");
 const router = express.Router();
@@ -15,54 +18,48 @@ const router = express.Router();
 const rateLimiter = createRateLimiter();
 /**
- * Estimate token count for messages
- * Uses rough approximation of ~4 characters per token
- * @param {Array} messages - Array of message objects with role and content
- * @param {string|Array} system - System prompt (string or array of content blocks)
- * @returns {number} Estimated input token count
+ * Estimate token count for messages.
+ *
+ * Phase 1.1: tiktoken-backed via routing/tokenizer (graceful fallback to chars/4
+ * if js-tiktoken is unavailable).
  */
-function estimateTokenCount(messages = [], system = null) {
-  let totalChars = 0;
-  // Count system prompt characters
-  if (system) {
-    if (typeof system === "string") {
-      totalChars += system.length;
-    } else if (Array.isArray(system)) {
-      system.forEach((block) => {
-        if (block.type === "text" && block.text) {
-          totalChars += block.text.length;
-        }
-      });
-    }
-  }
-  // Count message characters
-  messages.forEach((msg) => {
-    if (msg.content) {
-      if (typeof msg.content === "string") {
-        totalChars += msg.content.length;
-      } else if (Array.isArray(msg.content)) {
-        msg.content.forEach((block) => {
-          if (block.type === "text" && block.text) {
-            totalChars += block.text.length;
-          } else if (block.type === "image" && block.source?.data) {
-            // Images: rough estimate based on base64 length
-            totalChars += Math.floor(block.source.data.length / 6);
-          }
-        });
-      }
-    }
-  });
+const { countMessagesTokens } = require("../routing/tokenizer");
-  // Estimate tokens: ~4 characters per token
-  return Math.ceil(totalChars / 4);
+function estimateTokenCount(messages = [], system = null, model = null) {
+  return countMessagesTokens(messages, system, model);
 }
+// Root health check (for HEAD / and GET /)
+router.head("/", (req, res) => {
+  res.status(200).end();
+});
+router.get("/", (req, res) => {
+  res.json({ status: "ok", service: "lynkr" });
+});
 router.get("/health", (req, res) => {
   res.json({ status: "ok" });
 });
+// Usage report — same data as `lynkr usage` CLI, served as JSON for
+// dashboards / agents / scripts that want to surface spend & savings.
+router.get("/v1/usage", (req, res) => {
+  try {
+    const aggregator = require("../usage/aggregator");
+    const window = req.query.window || (req.query.days ? `${parseInt(req.query.days, 10)}d` : "30d");
+    const usage = aggregator.getUsage({
+      window,
+      flagship: req.query.flagship,
+      provider: req.query.provider,
+      model: req.query.model,
+    });
+    res.json(usage);
+  } catch (err) {
+    res.status(500).json({ error: err.message });
+  }
+});
 // Routing stats endpoint (Phase 3: Metrics)
 router.get("/routing/stats", (req, res) => {
   const stats = getRoutingStats();
@@ -260,24 +257,70 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => {
     // Analyze complexity for routing headers (Phase 3)
     const complexity = await analyzeComplexity(req.body);
     timer.mark("analyzeComplexity");
+    // Risk axis runs alongside complexity. Cheap pure-string scan, no I/O.
+    let preRouteRisk = null;
+    try {
+      preRouteRisk = analyzeRisk(req.body);
+    } catch (err) {
+      logger.debug({ err: err.message }, '[Router] Risk analysis failed in pre-route');
+    }
+    // Pre-route tier: high-risk forces COMPLEX, otherwise tier is
+    // inferred from the complexity recommendation. The actual final
+    // tier may differ (invokeModel re-runs determineProviderSmart) —
+    // this is best-effort for header surfacing.
     let preRouteProvider = 'cloud';
-    if (complexity.recommendation === 'local') {
-      // Use tier config to determine actual provider instead of hardcoding 'ollama'
+    let preRouteTier = null;
+    let preRouteModel = null;
+    let preRouteMethod = 'complexity';
+    let preRouteReason = complexity.breakdown?.taskType?.reason || complexity.recommendation;
+    if (preRouteRisk?.level === 'high') {
       try {
         const selector = getModelTierSelector();
-        const tierResult = selector.selectModel('SIMPLE', null);
+        const tierResult = selector.selectModel('COMPLEX', null);
         preRouteProvider = tierResult.provider;
+        preRouteTier = 'COMPLEX';
+        preRouteModel = tierResult.model;
+        preRouteMethod = 'risk';
+        preRouteReason = 'high_risk_forced_tier';
       } catch (_) {
-        preRouteProvider = 'ollama';
+        // Risk-forced tier not configured; fall back to normal flow.
+      }
+    }
+    if (!preRouteTier) {
+      if (complexity.recommendation === 'local') {
+        try {
+          const selector = getModelTierSelector();
+          const tierResult = selector.selectModel('SIMPLE', null);
+          preRouteProvider = tierResult.provider;
+          preRouteTier = 'SIMPLE';
+          preRouteModel = tierResult.model;
+        } catch (_) {
+          preRouteProvider = 'ollama';
+        }
       }
     }
-    const routingHeaders = getRoutingHeaders({
+    const preRouteDecision = {
       provider: preRouteProvider,
+      tier: preRouteTier,
+      model: preRouteModel,
+      method: preRouteMethod,
+      reason: preRouteReason,
       score: complexity.score,
       threshold: complexity.threshold,
-      method: 'complexity',
-      reason: complexity.breakdown?.taskType?.reason || complexity.recommendation,
-    });
+      risk: preRouteRisk,
+    };
+    const routingHeaders = getRoutingHeaders(preRouteDecision);
+    // Build the interaction block once. It travels in headers always
+    // (X-Lynkr-Interaction-* derived fields) and optionally into the
+    // response body when LYNKR_VISIBLE_ROUTING=true.
+    const interaction = buildInteractionBlock(preRouteDecision);
     // Extract client CWD from request body or header
     const clientCwd = validateCwd(req.body?.cwd || req.headers['x-workspace-cwd']);
@@ -305,6 +348,7 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => {
         options: {
           maxSteps: req.body?.max_steps,
           maxDurationMs: req.body?.max_duration_ms,
+          tenantPolicy: res.locals?.tenantPolicy || null,
         },
       });
@@ -424,17 +468,35 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => {
             content_block: { type: "text", text: "" }
           })}\n\n`);
-          // Send text in chunks
-          const text = block.text || "";
-          const chunkSize = 20;
-          for (let j = 0; j < text.length; j += chunkSize) {
-            const chunk = text.slice(j, j + chunkSize);
-            res.write(`event: content_block_delta\n`);
-            res.write(`data: ${JSON.stringify({
-              type: "content_block_delta",
-              index: i,
-              delta: { type: "text_delta", text: chunk }
-            })}\n\n`);
+          // Send text — one chunk when ANSI rendering is active (splitting
+          // ANSI escape sequences across 20-char chunks breaks terminal output).
+          // Plain text falls back to line-level chunks for a trickle effect.
+          // Never apply ANSI rendering to HTML content (<artifact> blocks):
+          // ANSI codes corrupt CSS selectors like `*` and break the browser viewer.
+          const rawBlockText = block.text || "";
+          const isHtmlContent = rawBlockText.includes("<artifact") || rawBlockText.trimStart().startsWith("<");
+          const text = isHtmlContent ? rawBlockText : renderText(rawBlockText);
+          const { enabled: ansiEnabled } = require("../utils/markdown-ansi");
+          if (ansiEnabled && !isHtmlContent) {
+            if (text.length > 0) {
+              res.write(`event: content_block_delta\n`);
+              res.write(`data: ${JSON.stringify({
+                type: "content_block_delta",
+                index: i,
+                delta: { type: "text_delta", text }
+              })}\n\n`);
+            }
+          } else {
+            const lines = text.split("\n");
+            for (const line of lines) {
+              const lineWithNl = line + "\n";
+              res.write(`event: content_block_delta\n`);
+              res.write(`data: ${JSON.stringify({
+                type: "content_block_delta",
+                index: i,
+                delta: { type: "text_delta", text: lineWithNl }
+              })}\n\n`);
+            }
           }
           res.write(`event: content_block_stop\n`);
@@ -459,22 +521,37 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => {
           res.write(`event: content_block_stop\n`);
           res.write(`data: ${JSON.stringify({ type: "content_block_stop", index: i })}\n\n`);
         } else if (block.type === "tool_use") {
-          res.write(`event: content_block_start\n`);
-          res.write(`data: ${JSON.stringify({
-            type: "content_block_start",
-            index: i,
-            content_block: { type: "tool_use", id: block.id, name: block.name, input: {} }
-          })}\n\n`);
-          res.write(`event: content_block_delta\n`);
-          res.write(`data: ${JSON.stringify({
-            type: "content_block_delta",
-            index: i,
-            delta: { type: "input_json_delta", partial_json: JSON.stringify(block.input) }
-          })}\n\n`);
-          res.write(`event: content_block_stop\n`);
-          res.write(`data: ${JSON.stringify({ type: "content_block_stop", index: i })}\n\n`);
+          // Original request had no tools → model hallucinated a tool call.
+          // Extract file content from write-style tools and wrap it in an
+          // <artifact> block so open-design routes it to the Design panel.
+          const toolName = (block.name || "").toLowerCase();
+          const writeTools = new Set(["write", "create_file", "write_file", "str_replace_editor"]);
+          if (writeTools.has(toolName)) {
+            const rawContent = block.input?.content ?? block.input?.file_content ?? block.input?.new_content ?? "";
+            const filePath = String(block.input?.file_path ?? block.input?.filename ?? "design.html");
+            const content = String(rawContent);
+            if (content) {
+              // Wrap in <artifact> so open-design's parser routes it to the file viewer.
+              const identifier = filePath.replace(/[^a-zA-Z0-9._-]/g, "_");
+              const title = filePath;
+              const wrapped = `<artifact identifier="${identifier}" type="text/html" title="${title}">\n${content}\n</artifact>`;
+              res.write(`event: content_block_start\n`);
+              res.write(`data: ${JSON.stringify({
+                type: "content_block_start",
+                index: i,
+                content_block: { type: "text", text: "" }
+              })}\n\n`);
+              res.write(`event: content_block_delta\n`);
+              res.write(`data: ${JSON.stringify({
+                type: "content_block_delta",
+                index: i,
+                delta: { type: "text_delta", text: wrapped }
+              })}\n\n`);
+              res.write(`event: content_block_stop\n`);
+              res.write(`data: ${JSON.stringify({ type: "content_block_stop", index: i })}\n\n`);
+            }
+          }
+          // Non-write tool_use in a tool-less request is silently dropped.
         }
       }
@@ -505,6 +582,7 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => {
       options: {
         maxSteps: req.body?.max_steps,
         maxDurationMs: req.body?.max_duration_ms,
+        tenantPolicy: res.locals?.tenantPolicy || null,
       },
     });
     timer.mark("processMessage");
@@ -566,16 +644,30 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => {
             content_block: { type: "text", text: "" }
           })}\n\n`);
-          const text = block.text || "";
-          const chunkSize = 20;
-          for (let j = 0; j < text.length; j += chunkSize) {
-            const chunk = text.slice(j, j + chunkSize);
-            res.write(`event: content_block_delta\n`);
-            res.write(`data: ${JSON.stringify({
-              type: "content_block_delta",
-              index: i,
-              delta: { type: "text_delta", text: chunk }
-            })}\n\n`);
+          const rawBlockText2 = block.text || "";
+          const isHtmlContent2 = rawBlockText2.includes("<artifact") || rawBlockText2.trimStart().startsWith("<");
+          const text = isHtmlContent2 ? rawBlockText2 : renderText(rawBlockText2);
+          const { enabled: ansiEnabled } = require("../utils/markdown-ansi");
+          if (ansiEnabled && !isHtmlContent2) {
+            if (text.length > 0) {
+              res.write(`event: content_block_delta\n`);
+              res.write(`data: ${JSON.stringify({
+                type: "content_block_delta",
+                index: i,
+                delta: { type: "text_delta", text }
+              })}\n\n`);
+            }
+          } else {
+            const lines = text.split("\n");
+            for (const line of lines) {
+              const lineWithNl = line + "\n";
+              res.write(`event: content_block_delta\n`);
+              res.write(`data: ${JSON.stringify({
+                type: "content_block_delta",
+                index: i,
+                delta: { type: "text_delta", text: lineWithNl }
+              })}\n\n`);
+            }
           }
           res.write(`event: content_block_stop\n`);
@@ -651,8 +743,33 @@ router.post("/v1/messages", rateLimiter, async (req, res, next) => {
       });
     }
+    // Inject visible interaction block into the response body when
+    // LYNKR_VISIBLE_ROUTING=true. We only mutate JSON bodies — and only
+    // when the response looks like a valid Anthropic Message — so this
+    // is a no-op for streamed / error / non-message responses.
+    let finalBody = result.body;
+    if (
+      config.routing?.visibleInteraction &&
+      interaction &&
+      result.status >= 200 && result.status < 300 &&
+      result.body
+    ) {
+      try {
+        const text = Buffer.isBuffer(result.body) ? result.body.toString('utf8') : result.body;
+        if (typeof text === 'string' && text.startsWith('{')) {
+          const parsed = JSON.parse(text);
+          if (parsed && typeof parsed === 'object' && parsed.type === 'message') {
+            parsed.lynkr_interaction = interaction;
+            finalBody = JSON.stringify(parsed);
+          }
+        }
+      } catch (err) {
+        logger.debug({ err: err.message }, '[Router] Skipped interaction injection (non-JSON body)');
+      }
+    }
     metrics.recordResponse(result.status);
-    res.status(result.status).send(result.body);
+    res.status(result.status).send(finalBody);
   } catch (error) {
     next(error);
   }

package/src/budget/hierarchical-budget.js ADDED Viewed

@@ -0,0 +1,159 @@
+/**
+ * Hierarchical budget controls (Phase 6.2).
+ *
+ * Tracks spend at four levels: virtual_key → team → customer → org.
+ * Each level has a ceiling; a request must pass *every* level it belongs
+ * to.
+ *
+ * Storage: in-process Map by default. Operations are atomic-by-design (single
+ * Node event loop), so no locking needed. For multi-process deployments,
+ * swap the storage implementation for Redis (the interface is stable; see
+ * RedisBudgetStore stub at the bottom of the file).
+ */
+const fs = require('fs');
+const path = require('path');
+const logger = require('../logger');
+const CONFIG_PATH = path.join(__dirname, '../../data/budgets.json');
+const RELOAD_INTERVAL_MS = 60_000;
+const LEVELS = ['virtual_key', 'team', 'customer', 'org'];
+class MapBudgetStore {
+  constructor() {
+    this._spend = new Map(); // `${level}:${id}` → { spent, periodStart }
+  }
+  _key(level, id) {
+    return `${level}:${id}`;
+  }
+  get(level, id) {
+    return this._spend.get(this._key(level, id)) || { spent: 0, periodStart: Date.now() };
+  }
+  set(level, id, value) {
+    this._spend.set(this._key(level, id), value);
+  }
+  incr(level, id, amount) {
+    const current = this.get(level, id);
+    current.spent += amount;
+    this.set(level, id, current);
+    return current;
+  }
+  resetIfStale(level, id, periodMs) {
+    const current = this.get(level, id);
+    if (Date.now() - current.periodStart > periodMs) {
+      current.spent = 0;
+      current.periodStart = Date.now();
+      this.set(level, id, current);
+    }
+    return current;
+  }
+}
+let _config = null;
+let _configLoadedAt = 0;
+function _loadConfig() {
+  if (_config && Date.now() - _configLoadedAt < RELOAD_INTERVAL_MS) return _config;
+  try {
+    if (fs.existsSync(CONFIG_PATH)) {
+      _config = JSON.parse(fs.readFileSync(CONFIG_PATH, 'utf8'));
+      _configLoadedAt = Date.now();
+      return _config;
+    }
+  } catch (err) {
+    logger.debug({ err: err.message }, '[HierarchicalBudget] Config load failed');
+  }
+  _config = { defaults: { periodMs: 86400000 }, limits: {} };
+  _configLoadedAt = Date.now();
+  return _config;
+}
+class HierarchicalBudget {
+  constructor(store = new MapBudgetStore()) {
+    this.store = store;
+  }
+  /**
+   * Check whether all relevant ceilings still allow `amount` of spend.
+   * @param {object} context — { virtual_key, team, customer, org }
+   * @param {number} amount — dollars
+   * @returns {{ ok: boolean, exceeded?: { level, id, limit, spent } }}
+   */
+  check(context, amount) {
+    const config = _loadConfig();
+    const periodMs = config.defaults?.periodMs || 86400000;
+    for (const level of LEVELS) {
+      const id = context[level];
+      if (!id) continue;
+      const limit = config.limits?.[level]?.[id] ?? config.defaults?.[level];
+      if (typeof limit !== 'number') continue;
+      const current = this.store.resetIfStale(level, id, periodMs);
+      if (current.spent + amount > limit) {
+        return {
+          ok: false,
+          exceeded: { level, id, limit, spent: current.spent },
+        };
+      }
+    }
+    return { ok: true };
+  }
+  /**
+   * Record spend after a request completes. Increments all relevant levels.
+   */
+  record(context, amount) {
+    if (typeof amount !== 'number' || amount <= 0) return;
+    for (const level of LEVELS) {
+      const id = context[level];
+      if (!id) continue;
+      this.store.incr(level, id, amount);
+    }
+  }
+  /**
+   * Summary for the dashboard.
+   */
+  status(context) {
+    const config = _loadConfig();
+    const periodMs = config.defaults?.periodMs || 86400000;
+    const out = {};
+    for (const level of LEVELS) {
+      const id = context[level];
+      if (!id) continue;
+      const limit = config.limits?.[level]?.[id] ?? config.defaults?.[level];
+      const current = this.store.resetIfStale(level, id, periodMs);
+      out[level] = { id, spent: current.spent, limit, periodStart: current.periodStart };
+    }
+    return out;
+  }
+}
+let _instance = null;
+function getHierarchicalBudget() {
+  if (!_instance) _instance = new HierarchicalBudget();
+  return _instance;
+}
+/**
+ * Redis backend stub. Implement this when scaling beyond a single Node
+ * process. The interface mirrors MapBudgetStore so HierarchicalBudget can
+ * use either.
+ */
+class RedisBudgetStore {
+  constructor(_redisClient) {
+    throw new Error('RedisBudgetStore not implemented. Stub — wire your Redis client and use INCRBY with periodic TTL.');
+  }
+}
+module.exports = {
+  HierarchicalBudget,
+  MapBudgetStore,
+  RedisBudgetStore,
+  getHierarchicalBudget,
+  LEVELS,
+};

package/src/cache/semantic.js CHANGED Viewed

@@ -14,16 +14,29 @@ const logger = require('../logger');
 const config = require('../config');
 // Default configuration (can be overridden via config.semanticCache)
+//
+// Phase 2.1 of the routing overhaul: defaults aligned with the plan
+// (10K entries, 0.95 threshold matches research on GPT Semantic Cache).
+// Short-TTL keywords trigger a reduced TTL rather than blocking caching.
 function getDefaultConfig() {
   const configOverrides = config.semanticCache || {};
   return {
     enabled: configOverrides.enabled ?? true,
     similarityThreshold: configOverrides.similarityThreshold ?? 0.92,
-    maxEntries: configOverrides.maxEntries ?? 500,
+    maxEntries: configOverrides.maxEntries ?? 10000,
     ttlMs: configOverrides.ttlMs ?? 3600000,  // 1 hour
+    shortTtlMs: configOverrides.shortTtlMs ?? 300000, // 5 min for time-sensitive queries
+    shortTtlPatterns: [
+      /\bnow\b/i,
+      /\btoday\b/i,
+      /\bcurrent\b/i,
+      /\blatest\b/i,
+      /\brecent\b/i,
+      /\bjust\s+now\b/i,
+    ],
     minPromptLength: 20,        // Don't cache very short prompts
     maxPromptLength: 5000,      // Don't cache very long prompts (too specific)
-    excludePatterns: [          // Patterns to exclude from caching
+    excludePatterns: [          // Patterns to fully exclude from caching
       /current time/i,
       /today's date/i,
       /right now/i,
@@ -33,6 +46,19 @@ function getDefaultConfig() {
   };
 }
+/**
+ * Phase 2.1 helper: determine the TTL to apply to a given prompt.
+ * Time-sensitive keywords ("now", "today", "current") get a short TTL so
+ * stale answers don't persist for an hour.
+ */
+function _ttlForPrompt(promptText, cfg) {
+  if (!promptText || !Array.isArray(cfg.shortTtlPatterns)) return cfg.ttlMs;
+  for (const re of cfg.shortTtlPatterns) {
+    if (re.test(promptText)) return cfg.shortTtlMs;
+  }
+  return cfg.ttlMs;
+}
 class SemanticCache {
   constructor(options = {}) {
     this.config = { ...getDefaultConfig(), ...options };