npm - lynkr - Versions diffs - 9.0.2 → 9.1.3 - Mend

lynkr 9.0.2 → 9.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (65) hide show

package/README.md +21 -10
package/bin/cli.js +18 -1
package/bin/lynkr-trajectory.js +136 -0
package/bin/lynkr-usage.js +219 -0
package/funding.json +110 -0
package/package.json +4 -2
package/public/dashboard.html +665 -0
package/scripts/build-knn-index.js +130 -0
package/scripts/calibrate-thresholds.js +197 -0
package/scripts/compare-policies.js +67 -0
package/scripts/learn-output-ratios.js +162 -0
package/scripts/refresh-pricing.js +122 -0
package/scripts/run-routerarena.js +26 -0
package/scripts/sample-regret.js +84 -0
package/scripts/train-risk-classifier.js +191 -0
package/src/api/files-router.js +6 -6
package/src/api/middleware/budget-enforcer.js +60 -0
package/src/api/middleware/budget.js +19 -1
package/src/api/middleware/load-shedding.js +17 -0
package/src/api/middleware/tenant.js +21 -0
package/src/api/openai-router.js +1 -1
package/src/api/router.js +204 -87
package/src/budget/hierarchical-budget.js +159 -0
package/src/cache/semantic.js +28 -2
package/src/clients/databricks.js +68 -10
package/src/clients/openai-format.js +31 -5
package/src/config/index.js +246 -43
package/src/context/toon.js +5 -4
package/src/dashboard/api.js +170 -0
package/src/dashboard/router.js +13 -0
package/src/headroom/client.js +3 -109
package/src/headroom/index.js +0 -14
package/src/memory/search.js +0 -50
package/src/orchestrator/index.js +106 -11
package/src/orchestrator/preflight.js +188 -0
package/src/prompts/system.js +34 -6
package/src/routing/bandit.js +246 -0
package/src/routing/cascade.js +106 -0
package/src/routing/complexity-analyzer.js +7 -15
package/src/routing/confidence-scorer.js +121 -0
package/src/routing/context-validator.js +71 -0
package/src/routing/cost-optimizer.js +5 -2
package/src/routing/deadline.js +52 -0
package/src/routing/drift-monitor.js +113 -0
package/src/routing/embedding-cache.js +77 -0
package/src/routing/index.js +374 -4
package/src/routing/interaction.js +183 -0
package/src/routing/knn-router.js +206 -0
package/src/routing/latency-tracker.js +113 -71
package/src/routing/model-tiers.js +156 -6
package/src/routing/output-ratios.js +57 -0
package/src/routing/regret-estimator.js +91 -0
package/src/routing/reward-pipeline.js +62 -0
package/src/routing/risk-analyzer.js +194 -0
package/src/routing/risk-classifier.js +130 -0
package/src/routing/shadow-mode.js +77 -0
package/src/routing/telemetry.js +7 -0
package/src/routing/tenant-policy.js +96 -0
package/src/routing/tokenizer.js +162 -0
package/src/server.js +12 -0
package/src/stores/file-store.js +42 -7
package/src/tools/smart-selection.js +11 -2
package/src/training/trajectory-compressor.js +266 -0
package/src/usage/aggregator.js +206 -0
package/src/utils/markdown-ansi.js +146 -0

package/README.md CHANGED Viewed

@@ -225,14 +225,15 @@ Routes requests to the right model based on 5-phase complexity analysis. Simple
 - **Graphify integration** — AST-based knowledge graph detects god nodes, community cohesion, blast radius across 19 languages
 - **Routing telemetry** — every decision recorded with quality scoring (0-100) and latency tracking (P50/P95/P99)
-### Token Optimization (7 Phases)
-- **Smart tool selection** — only sends tools relevant to the current task
-- **Code Mode** — replaces 100+ MCP tools with 4 meta-tools (~96% token reduction)
-- **Distill compression** — structural similarity, delta rendering, smart dedup of repetitive tool outputs
-- **Prompt caching** — SHA-256 keyed LRU cache
-- **Memory deduplication** — eliminates repeated information across turns
-- **History compression** — sliding window with Distill-powered structural dedup
-- **Headroom sidecar** — optional 47-92% ML-based compression (Smart Crusher, CCR, LLMLingua)
+### Token Optimization (8 Phases)
+- **MCP Code Mode** — replaces 100+ MCP tool schemas with 4 meta-tools (~96% reduction, lazy tool discovery)
+- **Smart tool selection** — only sends tools relevant to the current task (50-70% reduction)
+- **Prompt caching** — SHA-256 keyed LRU cache (30-45% reduction on repeated prompts)
+- **Memory deduplication** — eliminates repeated information across turns (20-30% reduction)
+- **Tool response truncation** — intelligent truncation of long outputs (15-25% reduction)
+- **Dynamic system prompts** — adapt complexity to request type (10-20% reduction)
+- **Distill compression** — structural similarity, delta rendering, smart dedup of repetitive tool outputs (20-40% reduction)
+- **Headroom sidecar** — optional ML-based compression: Smart Crusher, CCR, LLMLingua (47-92% reduction)
 ### Enterprise Resilience
 - **Circuit breakers** — automatic failover with half-open probe recovery
@@ -254,12 +255,22 @@ SEMANTIC_CACHE_THRESHOLD=0.95
 ```
 ### MCP Integration + Code Mode
-Automatic Model Context Protocol server discovery and orchestration. Your MCP tools work through Lynkr without configuration. Enable Code Mode to replace 100+ MCP tool definitions with 4 lightweight meta-tools:
+Automatic Model Context Protocol server discovery and orchestration. Your MCP tools work through Lynkr without configuration.
+**MCP Code Mode** — Token optimization for heavy MCP setups:
+- Replaces 100+ individual MCP tool schemas with 4 meta-tools
+- Reduces tool catalog from ~17,500 tokens to ~700 tokens (**96% reduction**)
+- Enables lazy tool discovery: model queries `mcp_list_tools`, then `mcp_tool_info`, then `mcp_execute`
+- Best for: 50+ MCP tools, long conversations, context-constrained setups
+- Trade-off: 3 sequential calls instead of 1 (adds ~2-3s latency)
 ```bash
-CODE_MODE_ENABLED=true  # ~96% reduction in tool-catalog tokens
+CODE_MODE_ENABLED=true          # Enable Code Mode
+CODE_MODE_CACHE_TTL=60000       # Tool list cache TTL (ms)
 ```
+See [Token Optimization Guide](documentation/token-optimization.md#phase-0-mcp-code-mode-96-reduction-for-mcp-tools) and [Tools Documentation](documentation/tools.md#mcp-code-mode-token-optimization) for details.
 ---
 ## Deployment Options

package/bin/cli.js CHANGED Viewed

@@ -1,7 +1,22 @@
 #!/usr/bin/env node
+const path = require("path");
 const pkg = require('../package.json');
+// Subcommands. Dispatched before server boot so `lynkr usage` / `lynkr trajectory`
+// don't start the proxy. Add new subcommands here, not in scattered binaries.
+const SUBCOMMANDS = {
+  usage:      path.join(__dirname, "lynkr-usage.js"),
+  trajectory: path.join(__dirname, "lynkr-trajectory.js"),
+};
+const sub = process.argv[2];
+if (sub && Object.prototype.hasOwnProperty.call(SUBCOMMANDS, sub)) {
+  process.argv.splice(2, 1); // drop the subcommand token so the script's own arg parser is happy
+  require(SUBCOMMANDS[sub]);
+  return;
+}
 if (process.argv.includes('--version') || process.argv.includes('-v')) {
   console.log(pkg.version);
   process.exit(0);
@@ -14,7 +29,9 @@ ${pkg.name} v${pkg.version}
 ${pkg.description}
 Usage:
-  lynkr [options]
+  lynkr [options]                  Start the proxy server (default)
+  lynkr usage [options]            Show AI spend report and tier-routing savings
+  lynkr trajectory [options]       Export agent trajectories as JSONL training data
 Options:
   -h, --help      Show this help message

package/bin/lynkr-trajectory.js ADDED Viewed

@@ -0,0 +1,136 @@
+#!/usr/bin/env node
+/* eslint-disable no-console */
+/**
+ * lynkr trajectory — export agent trajectories from the session DB
+ * as JSONL training data.
+ *
+ * Usage:
+ *   lynkr trajectory                                     # stdout, last 30 days
+ *   lynkr trajectory --since 7d                          # last 7 days
+ *   lynkr trajectory --output trajectories.jsonl        # write to file
+ *   lynkr trajectory --tier COMPLEX                      # only complex sessions
+ *   lynkr trajectory --anonymize                         # strip PII / paths / secrets
+ *   lynkr trajectory --count                             # just print the row count
+ */
+const path = require("path");
+process.env.WORKSPACE_ROOT = process.env.WORKSPACE_ROOT || path.resolve(__dirname, "..");
+const compressor = require("../src/training/trajectory-compressor");
+function parseArgs(argv) {
+  const opts = { since: "30d", anonymize: false, output: "-", count: false };
+  for (let i = 2; i < argv.length; i++) {
+    const a = argv[i];
+    const next = argv[i + 1];
+    if (a === "--since" && next) {
+      opts.since = next;
+      i++;
+    } else if (a === "--days" && next) {
+      opts.since = `${parseInt(next, 10)}d`;
+      i++;
+    } else if (a === "--tier" && next) {
+      opts.tier = next.toUpperCase();
+      i++;
+    } else if (a === "--output" && next) {
+      opts.output = next;
+      i++;
+    } else if (a === "-o" && next) {
+      opts.output = next;
+      i++;
+    } else if (a === "--anonymize" || a === "--anonymise") {
+      opts.anonymize = true;
+    } else if (a === "--count") {
+      opts.count = true;
+    } else if (a === "--help" || a === "-h") {
+      printHelp();
+      process.exit(0);
+    } else if (a === "--format" && next) {
+      // Reserved for future formats. Only "jsonl" is supported today.
+      if (next !== "jsonl") {
+        console.error(`Unsupported --format: ${next}. Only 'jsonl' is supported.`);
+        process.exit(2);
+      }
+      i++;
+    }
+  }
+  return opts;
+}
+function printHelp() {
+  console.log(`Lynkr trajectory exporter — emit JSONL training samples from session history.
+Usage:
+  lynkr trajectory [options]
+Options:
+  --since <window>     "7d", "30d", ISO date, or epoch ms (default: 30d)
+  --days N             Shorthand for --since Nd
+  --tier <tier>        Filter to one tier: SIMPLE, MEDIUM, COMPLEX, REASONING
+  --output, -o <path>  Output file (default: stdout, "-")
+  --anonymize          Strip PII, file paths, API keys, hostnames
+  --count              Print only the row count, no output
+  --format jsonl       Output format (only jsonl supported)
+  -h, --help           Show this help
+Examples:
+  lynkr trajectory --days 7 --output last-week.jsonl
+  lynkr trajectory --tier COMPLEX --anonymize -o complex-anon.jsonl
+  lynkr trajectory --count
+Output format (one JSON object per line):
+  {
+    "session_id": "...",
+    "messages": [{"role": "user", "content": "..."}, ...],
+    "tool_calls": [...],
+    "outcome": "success" | "error",
+    "tier": "MEDIUM",
+    "complexity_score": 38,
+    "model_used": "gpt-4o",
+    "provider_used": "azure-openai",
+    "tokens_in": 1234,
+    "tokens_out": 456,
+    "latency_ms": 2400,
+    "started_at": "...",
+    "ended_at": "..."
+  }
+`);
+}
+function fmtInt(n) {
+  return new Intl.NumberFormat("en-US").format(n || 0);
+}
+function main() {
+  const opts = parseArgs(process.argv);
+  if (opts.count) {
+    // Quick path — stream-walk the sessions and just count valid trajectories.
+    let count = 0;
+    compressor.exportJsonl({
+      ...opts,
+      output: { write: () => count++, end: () => {} },
+    });
+    console.log(`${fmtInt(count)} trajectories`);
+    return;
+  }
+  const isStdout = opts.output === "-";
+  const start = Date.now();
+  const result = compressor.exportJsonl({
+    since: opts.since,
+    tier: opts.tier,
+    anonymize: opts.anonymize,
+    output: opts.output,
+  });
+  if (!isStdout) {
+    const elapsed = ((Date.now() - start) / 1000).toFixed(1);
+    process.stderr.write(
+      `Exported ${fmtInt(result.count)} trajectories to ${result.output} in ${elapsed}s\n`
+    );
+  }
+}
+main();

package/bin/lynkr-usage.js ADDED Viewed

@@ -0,0 +1,219 @@
+#!/usr/bin/env node
+/* eslint-disable no-console */
+/**
+ * lynkr usage — print AI spend report from routing telemetry.
+ *
+ * Usage:
+ *   lynkr-usage                          # last 30 days
+ *   lynkr-usage --days 7
+ *   lynkr-usage --window 1d
+ *   lynkr-usage --window all
+ *   lynkr-usage --json                   # machine-readable
+ *   lynkr-usage --flagship gpt-5         # alternative comparison model
+ *   lynkr-usage --provider moonshot      # filter to one provider
+ */
+const path = require("path");
+// Make sure config/logger pick up the workspace root
+process.env.WORKSPACE_ROOT = process.env.WORKSPACE_ROOT || path.resolve(__dirname, "..");
+const aggregator = require("../src/usage/aggregator");
+function parseArgs(argv) {
+  const opts = { window: "30d", json: false };
+  for (let i = 2; i < argv.length; i++) {
+    const a = argv[i];
+    const next = argv[i + 1];
+    if (a === "--json") opts.json = true;
+    else if (a === "--days" && next) {
+      opts.window = `${parseInt(next, 10)}d`;
+      i++;
+    } else if (a === "--window" && next) {
+      opts.window = next;
+      i++;
+    } else if (a === "--since" && next) {
+      opts.window = next;
+      i++;
+    } else if (a === "--flagship" && next) {
+      opts.flagship = next;
+      i++;
+    } else if (a === "--provider" && next) {
+      opts.provider = next;
+      i++;
+    } else if (a === "--model" && next) {
+      opts.model = next;
+      i++;
+    } else if (a === "--help" || a === "-h") {
+      printHelp();
+      process.exit(0);
+    }
+  }
+  return opts;
+}
+function printHelp() {
+  console.log(`Lynkr usage report — show AI spend and tier-routing savings.
+Usage:
+  lynkr usage [options]
+Options:
+  --days N            Window in days (e.g. --days 7)
+  --window <preset>   Window preset: 1d, 7d, 30d, all (default: 30d)
+  --since <iso>       Custom start time (ISO 8601 or epoch ms)
+  --flagship <model>  Comparison model for "savings" math (default: claude-sonnet-4-5-20250929)
+  --provider <name>   Filter to a single provider
+  --model <id>        Filter to a single model
+  --json              Print as JSON instead of a formatted table
+  -h, --help          Show this help
+Examples:
+  lynkr usage
+  lynkr usage --days 7
+  lynkr usage --window all --json
+`);
+}
+const C = {
+  reset: "\x1b[0m",
+  dim: "\x1b[2m",
+  bold: "\x1b[1m",
+  green: "\x1b[32m",
+  yellow: "\x1b[33m",
+  cyan: "\x1b[36m",
+  red: "\x1b[31m",
+  gray: "\x1b[90m",
+};
+function colour(text, code) {
+  if (!process.stdout.isTTY) return text;
+  return `${code}${text}${C.reset}`;
+}
+function fmtUSD(n) {
+  if (!n) return "$0.00";
+  if (n < 0.01) return `$${n.toFixed(4)}`;
+  return `$${n.toFixed(2)}`;
+}
+function fmtTokens(n) {
+  if (!n) return "0";
+  if (n >= 1_000_000) return `${(n / 1_000_000).toFixed(2)}M`;
+  if (n >= 1_000) return `${(n / 1_000).toFixed(1)}K`;
+  return String(n);
+}
+function fmtInt(n) {
+  return new Intl.NumberFormat("en-US").format(n || 0);
+}
+function pad(s, width, align = "left") {
+  s = String(s);
+  if (s.length >= width) return s;
+  const filler = " ".repeat(width - visibleLength(s));
+  return align === "right" ? filler + s : s + filler;
+}
+function visibleLength(s) {
+  // strip ANSI for column-width math
+  return String(s).replace(/\x1b\[[0-9;]*m/g, "").length;
+}
+function tableRow(cells, widths, aligns) {
+  return cells
+    .map((c, i) => pad(c, widths[i], aligns[i] || "left"))
+    .join("  ");
+}
+function printTable(rows, header, widths, aligns) {
+  console.log(colour(tableRow(header, widths, aligns), C.bold));
+  console.log(colour(widths.map((w) => "─".repeat(w)).join("  "), C.dim));
+  for (const row of rows) {
+    console.log(tableRow(row, widths, aligns));
+  }
+}
+function bucketRows(bucket, widths) {
+  return Object.entries(bucket)
+    .sort((a, b) => b[1].actualCost - a[1].actualCost)
+    .map(([key, b]) => [
+      key,
+      fmtInt(b.requests),
+      fmtTokens(b.totalTokens),
+      colour(fmtUSD(b.actualCost), C.cyan),
+      colour(fmtUSD(b.flagshipCost), C.gray),
+      colour(fmtUSD(b.saved), C.green),
+      colour(`${b.savedPercent.toFixed(1)}%`, C.green),
+    ]);
+}
+function printReport(usage) {
+  const { window, since, flagship, totals, byTier, byProvider, byModel } = usage;
+  const banner = `Lynkr — Usage Report`;
+  console.log("");
+  console.log(colour(banner, C.bold));
+  console.log(
+    colour(
+      `window: ${window}${since ? `  since: ${since}` : ""}  flagship-comparison: ${flagship}`,
+      C.dim
+    )
+  );
+  console.log("");
+  // Summary line
+  const headline =
+    `${fmtInt(totals.requests)} requests   ` +
+    `${fmtTokens(totals.totalTokens)} tokens   ` +
+    `actual ${colour(fmtUSD(totals.actualCost), C.cyan)}   ` +
+    `flagship-only ${colour(fmtUSD(totals.flagshipCost), C.gray)}   ` +
+    `saved ${colour(fmtUSD(totals.saved), C.green)} ` +
+    colour(`(${totals.savedPercent.toFixed(1)}%)`, C.green);
+  console.log(headline);
+  if (totals.fallbacks || totals.errors) {
+    console.log(
+      colour(
+        `   ${totals.fallbacks} fallback${totals.fallbacks !== 1 ? "s" : ""}, ` +
+          `${totals.errors} error${totals.errors !== 1 ? "s" : ""}`,
+        C.yellow
+      )
+    );
+  }
+  console.log("");
+  if (totals.requests === 0) {
+    console.log(colour("No telemetry yet for this window. Send some requests through Lynkr first.", C.yellow));
+    return;
+  }
+  const headers = ["", "REQUESTS", "TOKENS", "ACTUAL", "FLAGSHIP", "SAVED", "PCT"];
+  const widths = [22, 9, 9, 10, 10, 10, 7];
+  const aligns = ["left", "right", "right", "right", "right", "right", "right"];
+  console.log(colour("BY TIER", C.bold));
+  printTable(bucketRows(byTier, widths), ["TIER", ...headers.slice(1)], widths, aligns);
+  console.log("");
+  console.log(colour("BY PROVIDER", C.bold));
+  printTable(bucketRows(byProvider, widths), ["PROVIDER", ...headers.slice(1)], widths, aligns);
+  console.log("");
+  console.log(colour("BY MODEL", C.bold));
+  printTable(bucketRows(byModel, widths), ["MODEL", ...headers.slice(1)], widths, aligns);
+  console.log("");
+}
+function main() {
+  const opts = parseArgs(process.argv);
+  const usage = aggregator.getUsage(opts);
+  if (opts.json) {
+    process.stdout.write(JSON.stringify(usage, null, 2) + "\n");
+    return;
+  }
+  printReport(usage);
+}
+main();

package/funding.json ADDED Viewed

@@ -0,0 +1,110 @@
+{
+  "$schema": "https://fundingjson.org/schema/v1.1.0.json",
+  "version": "v1.1.0",
+  "entity": {
+    "type": "individual",
+    "role": "maintainer",
+    "name": "Vishal Veera Reddy",
+    "email": "veerareddyvishal56@gmail.com",
+    "description": "Indian software engineer building open-source AI infrastructure. Sole maintainer of Lynkr, a self-hosted AI gateway that lets developers run any AI coding tool on any LLM provider.",
+    "webpageUrl": {
+      "url": "https://github.com/vishalveerareddy123"
+    }
+  },
+  "projects": [
+    {
+      "guid": "lynkr",
+      "name": "Lynkr",
+      "description": "A self-hosted AI gateway that decouples AI coding tools (Claude Code, Cursor, Codex, Cline, jcode, Pi) from their default LLM providers. Lynkr auto-detects the connecting tool, translates between Anthropic and OpenAI request formats, and routes to any of 12+ backends (Ollama, AWS Bedrock, Azure OpenAI, OpenRouter, Databricks, Moonshot, Google Vertex, llama.cpp, LM Studio, and more). A request-complexity classifier sends simple turns to free local models and complex ones to flagship cloud models, cutting per-developer AI bills 60-80% while removing vendor lock-in. Includes tool-result compression, MCP Code Mode (96% token reduction on tool definitions), persistent memory, and tier-based routing — all configured through a single .env file.",
+      "webpageUrl": {
+        "url": "https://fast-editor.github.io/Lynkr/"
+      },
+      "repositoryUrl": {
+        "url": "https://github.com/Fast-Editor/Lynkr"
+      },
+      "licenses": ["spdx:Apache-2.0"],
+      "tags": [
+        "ai",
+        "ai-gateway",
+        "llm",
+        "llm-router",
+        "developer-tools",
+        "proxy",
+        "claude-code",
+        "ollama",
+        "anthropic",
+        "openai"
+      ]
+    }
+  ],
+  "funding": {
+    "channels": [
+      {
+        "guid": "github-sponsors",
+        "type": "payment-provider",
+        "address": "https://github.com/sponsors/vishalveerareddy123",
+        "description": "Support Lynkr development via GitHub Sponsors."
+      },
+      {
+        "guid": "fossunited-grant",
+        "type": "other",
+        "address": "grants@fossunited.org",
+        "description": "FOSS United Foundation grant channel for institutional FOSS funding."
+      },
+      {
+        "guid": "bank-transfer",
+        "type": "bank",
+        "address": "Available on request via the project email.",
+        "description": "Direct bank transfer for organisations or grant disbursements."
+      }
+    ],
+    "plans": [
+      {
+        "guid": "core-maintenance-2026",
+        "status": "active",
+        "name": "Core maintenance + roadmap (12 months)",
+        "description": "Funds full-time work on Lynkr's core gateway: provider-format conversions, tier routing, tool-call translation across 10+ model formats (Minimax, Qwen, GLM, Llama, DeepSeek, Mistral), tool-result compression, persistent memory, MCP Code Mode, observability, tests, and docs. Estimated cost reflects one Indian maintainer working full-time for a year.",
+        "amount": 500000,
+        "currency": "INR",
+        "frequency": "yearly",
+        "channels": ["fossunited-grant", "bank-transfer"]
+      },
+      {
+        "guid": "infra-2026",
+        "status": "active",
+        "name": "Infrastructure + benchmarks",
+        "description": "Funds CI runners, benchmark harness for cost/quality/latency comparisons across providers, public dashboard at lynkr.dev, and self-hosted SearXNG + telemetry mirrors used by Lynkr's web search and routing layers.",
+        "amount": 150000,
+        "currency": "INR",
+        "frequency": "yearly",
+        "channels": ["fossunited-grant", "bank-transfer"]
+      },
+      {
+        "guid": "community-sponsor",
+        "status": "active",
+        "name": "Community sponsorship",
+        "description": "Recurring small-amount sponsorship from individual developers and small teams who use Lynkr.",
+        "amount": 0,
+        "currency": "USD",
+        "frequency": "monthly",
+        "channels": ["github-sponsors"]
+      },
+      {
+        "guid": "one-time",
+        "status": "active",
+        "name": "One-time contribution",
+        "description": "Any-amount one-time contribution from users or supporters.",
+        "amount": 0,
+        "currency": "USD",
+        "frequency": "one-time",
+        "channels": ["github-sponsors", "bank-transfer"]
+      }
+    ],
+    "history": []
+  }
+}

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "lynkr",
-  "version": "9.0.2",
+  "version": "9.1.3",
   "description": "Self-hosted Claude Code & Cursor proxy with Databricks,AWS BedRock,Azure  adapters, openrouter, Ollama,llamacpp,LM Studio, workspace tooling, and MCP integration.",
   "main": "index.js",
   "bin": {
@@ -14,7 +14,7 @@
     "dev": "nodemon index.js",
     "lint": "eslint src index.js",
     "test": "npm run test:unit && npm run test:performance",
-    "test:unit": "DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node --test test/routing.test.js test/hybrid-routing-integration.test.js test/web-tools.test.js test/passthrough-mode.test.js test/openrouter-error-resilience.test.js test/format-conversion.test.js test/azure-openai-config.test.js test/azure-openai-format-conversion.test.js test/azure-openai-routing.test.js test/azure-openai-streaming.test.js test/azure-openai-error-resilience.test.js test/azure-openai-integration.test.js test/openai-integration.test.js test/toon-compression.test.js test/llamacpp-integration.test.js test/resilience.test.js test/telemetry-routing.test.js test/memory/store.test.js test/memory/surprise.test.js test/memory/extractor.test.js test/memory/search.test.js test/memory/retriever.test.js test/distill.test.js test/large-payload.test.js test/code-mode.test.js test/prompt-cache-injection.test.js",
+    "test:unit": "DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node --test test/routing.test.js test/hybrid-routing-integration.test.js test/web-tools.test.js test/passthrough-mode.test.js test/openrouter-error-resilience.test.js test/format-conversion.test.js test/azure-openai-config.test.js test/azure-openai-format-conversion.test.js test/azure-openai-routing.test.js test/azure-openai-streaming.test.js test/azure-openai-error-resilience.test.js test/azure-openai-integration.test.js test/openai-integration.test.js test/toon-compression.test.js test/llamacpp-integration.test.js test/resilience.test.js test/telemetry-routing.test.js test/memory/store.test.js test/memory/surprise.test.js test/memory/extractor.test.js test/memory/search.test.js test/memory/retriever.test.js test/distill.test.js test/large-payload.test.js test/code-mode.test.js test/prompt-cache-injection.test.js test/risk-analyzer.test.js test/interaction-block.test.js test/preflight.test.js",
     "test:memory": "DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node --test test/memory/store.test.js test/memory/surprise.test.js test/memory/extractor.test.js test/memory/search.test.js test/memory/retriever.test.js",
     "test:new-features": "DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node --test test/passthrough-mode.test.js test/openrouter-error-resilience.test.js test/format-conversion.test.js",
     "test:performance": "DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node test/hybrid-routing-performance.test.js && DATABRICKS_API_KEY=test-key DATABRICKS_API_BASE=http://test.com node test/performance-tests.js",
@@ -55,6 +55,8 @@
     "express": "^5.1.0",
     "express-rate-limit": "^8.2.1",
     "fast-glob": "^3.3.2",
+    "hnswlib-node": "^3.0.0",
+    "js-tiktoken": "^1.0.20",
     "js-yaml": "^4.1.1",
     "openai": "^6.14.0",
     "pino": "^8.17.2",