npm - lynkr - Versions diffs - 9.1.2 → 9.1.4 - Mend

lynkr 9.1.2 → 9.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

package/README.md +21 -10
package/package.json +3 -1
package/scripts/build-knn-index.js +130 -0
package/scripts/calibrate-thresholds.js +197 -0
package/scripts/compare-policies.js +67 -0
package/scripts/learn-output-ratios.js +162 -0
package/scripts/refresh-pricing.js +122 -0
package/scripts/run-routerarena.js +26 -0
package/scripts/sample-regret.js +84 -0
package/scripts/train-risk-classifier.js +191 -0
package/src/api/middleware/budget-enforcer.js +60 -0
package/src/api/middleware/load-shedding.js +11 -1
package/src/api/middleware/tenant.js +21 -0
package/src/api/router.js +19 -40
package/src/budget/hierarchical-budget.js +159 -0
package/src/cache/semantic.js +28 -2
package/src/clients/databricks.js +59 -5
package/src/config/index.js +239 -43
package/src/context/toon.js +5 -4
package/src/orchestrator/index.js +44 -6
package/src/prompts/system.js +34 -6
package/src/routing/bandit.js +246 -0
package/src/routing/cascade.js +106 -0
package/src/routing/complexity-analyzer.js +7 -15
package/src/routing/confidence-scorer.js +121 -0
package/src/routing/context-validator.js +71 -0
package/src/routing/cost-optimizer.js +5 -2
package/src/routing/deadline.js +52 -0
package/src/routing/drift-monitor.js +113 -0
package/src/routing/embedding-cache.js +77 -0
package/src/routing/index.js +314 -5
package/src/routing/knn-router.js +206 -0
package/src/routing/latency-tracker.js +113 -71
package/src/routing/model-tiers.js +156 -6
package/src/routing/output-ratios.js +57 -0
package/src/routing/regret-estimator.js +91 -0
package/src/routing/reward-pipeline.js +62 -0
package/src/routing/risk-classifier.js +130 -0
package/src/routing/shadow-mode.js +77 -0
package/src/routing/tenant-policy.js +96 -0
package/src/routing/tokenizer.js +162 -0
package/src/server.js +9 -0

package/src/routing/risk-classifier.js ADDED Viewed

@@ -0,0 +1,130 @@
+/**
+ * Risk classifier (Phase 3.4).
+ *
+ * Replaces the regex-based risk-analyzer with a small logistic-regression
+ * model trained on TF-IDF of unigrams + bigrams. Bootstrap labels come from
+ * the existing regex matcher; subsequent training uses telemetry-flagged
+ * outcomes (set the request header `x-lynkr-risk-confirmed: true` to mark a
+ * request as truly risky for training).
+ *
+ * Falls back to the existing regex analyzer when no model artifact is present
+ * at data/risk-classifier.json. Model weights are JSON-serializable so they
+ * load fast and can be diffed in PRs.
+ */
+const fs = require('fs');
+const path = require('path');
+const logger = require('../logger');
+const { analyzeRisk: regexAnalyzeRisk } = require('./risk-analyzer');
+const MODEL_PATH = path.join(__dirname, '../../data/risk-classifier.json');
+const DECISION_THRESHOLD = 0.5;
+let _model = null;
+let _modelLoaded = false;
+function _tokenize(text) {
+  if (!text || typeof text !== 'string') return [];
+  return text.toLowerCase().split(/[^a-z0-9_\-/.]+/).filter(Boolean);
+}
+function _features(text) {
+  const tokens = _tokenize(text);
+  const out = new Map();
+  for (let i = 0; i < tokens.length; i++) {
+    out.set(tokens[i], (out.get(tokens[i]) || 0) + 1);
+    if (i + 1 < tokens.length) {
+      const bigram = `${tokens[i]} ${tokens[i + 1]}`;
+      out.set(bigram, (out.get(bigram) || 0) + 1);
+    }
+  }
+  return out;
+}
+function _loadModel() {
+  if (_modelLoaded) return _model;
+  _modelLoaded = true;
+  try {
+    if (!fs.existsSync(MODEL_PATH)) return null;
+    const raw = JSON.parse(fs.readFileSync(MODEL_PATH, 'utf8'));
+    if (!raw?.weights || !raw?.bias) return null;
+    _model = raw;
+    return _model;
+  } catch (err) {
+    logger.debug({ err: err.message }, '[RiskClassifier] Model load failed');
+    return null;
+  }
+}
+function _sigmoid(z) {
+  if (z >= 0) return 1 / (1 + Math.exp(-z));
+  const ez = Math.exp(z);
+  return ez / (1 + ez);
+}
+function _predict(text, model) {
+  const feats = _features(text);
+  let z = model.bias;
+  for (const [tok, count] of feats) {
+    const w = model.weights[tok];
+    if (typeof w === 'number') z += w * count;
+  }
+  return _sigmoid(z);
+}
+/**
+ * Drop-in replacement for analyzeRisk(payload).
+ * Returns { level: 'low'|'medium'|'high', score, ...regexHits } so it's
+ * compatible with the existing telemetry pipeline.
+ */
+function analyzeRisk(payload) {
+  // Always run the regex analyzer for hit details (kept for telemetry).
+  const regexResult = regexAnalyzeRisk(payload);
+  const model = _loadModel();
+  if (!model) return regexResult;
+  // Build the text we feed to the classifier: latest user message + tool defs + system fingerprint
+  let text = '';
+  if (Array.isArray(payload?.messages)) {
+    for (let i = payload.messages.length - 1; i >= 0; i--) {
+      const msg = payload.messages[i];
+      if (msg?.role === 'user') {
+        if (typeof msg.content === 'string') text = msg.content;
+        else if (Array.isArray(msg.content)) {
+          text = msg.content.filter(b => b?.type === 'text').map(b => b.text).join(' ');
+        }
+        break;
+      }
+    }
+  }
+  if (typeof payload?.system === 'string') text += ' ' + payload.system;
+  const prob = _predict(text, model);
+  let level;
+  if (prob >= 0.75) level = 'high';
+  else if (prob >= DECISION_THRESHOLD) level = 'medium';
+  else level = 'low';
+  // Reconcile with regex: if classifier disagrees with regex by a lot, prefer the stricter signal.
+  // (We never want to *downgrade* a regex-flagged high-risk request silently.)
+  if (regexResult?.level === 'high' && level !== 'high') level = 'high';
+  return {
+    ...regexResult,
+    level,
+    score: prob,
+    classifierUsed: true,
+  };
+}
+function reloadModel() {
+  _modelLoaded = false;
+  _model = null;
+}
+module.exports = {
+  analyzeRisk,
+  reloadModel,
+  _internal: { _features, _predict },
+};

package/src/routing/shadow-mode.js ADDED Viewed

@@ -0,0 +1,77 @@
+/**
+ * Shadow-mode policy A/B testing (Phase 4.4).
+ *
+ * Lets us test a new routing policy against production without serving its
+ * decisions. The shadow policy runs alongside the active policy, makes its
+ * decision, and that decision is logged. A weekly comparison job
+ * (scripts/compare-policies.js) summarises agreement, cost delta, and (via
+ * the regret estimator) projected quality delta on the disagreed-on subset.
+ *
+ * Activation:
+ *   - Set LYNKR_SHADOW_POLICY=<name> to enable
+ *   - Implement and register policies via registerPolicy()
+ */
+const fs = require('fs');
+const path = require('path');
+const logger = require('../logger');
+const LOG_PATH = path.join(__dirname, '../../data/shadow-decisions.jsonl');
+const _registry = new Map();
+function registerPolicy(name, fn) {
+  if (typeof fn !== 'function') throw new Error('Policy must be a function');
+  _registry.set(name, fn);
+}
+function isEnabled() {
+  return !!process.env.LYNKR_SHADOW_POLICY && _registry.has(process.env.LYNKR_SHADOW_POLICY);
+}
+function getShadowPolicy() {
+  if (!isEnabled()) return null;
+  return _registry.get(process.env.LYNKR_SHADOW_POLICY);
+}
+function _appendLog(entry) {
+  try {
+    fs.mkdirSync(path.dirname(LOG_PATH), { recursive: true });
+    fs.appendFileSync(LOG_PATH, JSON.stringify(entry) + '\n');
+  } catch (err) {
+    logger.debug({ err: err.message }, '[ShadowMode] Log append failed');
+  }
+}
+/**
+ * Compare active and shadow decisions on the same payload, log the result.
+ * Does NOT change which decision is served — the caller uses activeDecision.
+ */
+async function compareAndLog({ payload, activeDecision, shadowFn }) {
+  if (!shadowFn) return null;
+  let shadowDecision;
+  try {
+    shadowDecision = await shadowFn(payload);
+  } catch (err) {
+    logger.debug({ err: err.message }, '[ShadowMode] Shadow policy failed');
+    return null;
+  }
+  const agree = activeDecision.provider === shadowDecision?.provider
+    && activeDecision.model === shadowDecision?.model;
+  _appendLog({
+    timestamp: Date.now(),
+    policy: process.env.LYNKR_SHADOW_POLICY,
+    agree,
+    active: { provider: activeDecision.provider, model: activeDecision.model, tier: activeDecision.tier, score: activeDecision.score },
+    shadow: shadowDecision ? { provider: shadowDecision.provider, model: shadowDecision.model, tier: shadowDecision.tier, score: shadowDecision.score } : null,
+  });
+  return { agree, shadow: shadowDecision };
+}
+module.exports = {
+  registerPolicy,
+  isEnabled,
+  getShadowPolicy,
+  compareAndLog,
+  LOG_PATH,
+};

package/src/routing/tenant-policy.js ADDED Viewed

@@ -0,0 +1,96 @@
+/**
+ * Per-tenant routing policy (Phase 6.1).
+ *
+ * Each tenant can override:
+ *   - tier thresholds (which complexity scores map to which tiers)
+ *   - reward weights (λ for cost, μ for latency in the bandit)
+ *   - max acceptable latency
+ *   - blocked models (never route to these)
+ *
+ * Tenant id is read from the `LYNKR_TENANT_ID` request header. Per-tenant
+ * configs live in data/tenants/<id>.json. Falls back to global config when
+ * the id is absent or the file doesn't exist.
+ */
+const fs = require('fs');
+const path = require('path');
+const logger = require('../logger');
+const TENANTS_DIR = path.join(__dirname, '../../data/tenants');
+const _cache = new Map();
+const RELOAD_INTERVAL_MS = 60_000;
+function _loadTenant(tenantId) {
+  if (!tenantId) return null;
+  const cached = _cache.get(tenantId);
+  if (cached && Date.now() - cached.loadedAt < RELOAD_INTERVAL_MS) return cached.config;
+  const file = path.join(TENANTS_DIR, `${tenantId.replace(/[^a-zA-Z0-9_-]/g, '_')}.json`);
+  if (!fs.existsSync(file)) {
+    _cache.set(tenantId, { config: null, loadedAt: Date.now() });
+    return null;
+  }
+  try {
+    const data = JSON.parse(fs.readFileSync(file, 'utf8'));
+    _cache.set(tenantId, { config: data, loadedAt: Date.now() });
+    return data;
+  } catch (err) {
+    logger.warn({ tenantId, err: err.message }, '[TenantPolicy] Load failed');
+    return null;
+  }
+}
+function getPolicy(tenantId) {
+  const t = _loadTenant(tenantId);
+  if (!t) return null;
+  return {
+    tenantId,
+    tierRanges: t.tierRanges || null,
+    rewardWeights: t.rewardWeights || null,
+    maxLatencyMs: t.maxLatencyMs ?? null,
+    blockedModels: Array.isArray(t.blockedModels) ? new Set(t.blockedModels) : null,
+    preferredProviders: Array.isArray(t.preferredProviders) ? t.preferredProviders : null,
+  };
+}
+/**
+ * Apply tenant overrides to a routing decision after the main algorithm has
+ * produced one. Returns either the decision unchanged or a new decision
+ * respecting the tenant constraints.
+ */
+function applyTenantOverrides(decision, tenantPolicy) {
+  if (!tenantPolicy || !decision) return decision;
+  // Blocked model → fall back to next-cheapest qualifying model in same tier
+  if (tenantPolicy.blockedModels && decision.model && tenantPolicy.blockedModels.has(decision.model)) {
+    const { getCostOptimizer } = require('./cost-optimizer');
+    const optimizer = getCostOptimizer();
+    const cheapest = optimizer.findCheapestForTier(decision.tier, tenantPolicy.preferredProviders || []);
+    if (cheapest && !tenantPolicy.blockedModels.has(cheapest.model)) {
+      return {
+        ...decision,
+        provider: cheapest.provider,
+        model: cheapest.model,
+        method: (decision.method || '') + '+tenant_override',
+        tenantOverride: { reason: 'blocked_model', tenantId: tenantPolicy.tenantId },
+      };
+    }
+  }
+  return decision;
+}
+function getTenantId(req) {
+  if (!req) return null;
+  const h = req.headers || req;
+  return (h['lynkr-tenant-id'] || h['LYNKR-Tenant-Id'] || h['x-tenant-id'] || null);
+}
+function reloadCache() {
+  _cache.clear();
+}
+module.exports = {
+  getPolicy,
+  getTenantId,
+  applyTenantOverrides,
+  reloadCache,
+};

package/src/routing/tokenizer.js ADDED Viewed

@@ -0,0 +1,162 @@
+/**
+ * Accurate token estimation using js-tiktoken.
+ *
+ * Replaces the chars/4 approximation across the routing path. Falls back to
+ * chars/4 if js-tiktoken is unavailable (graceful degradation — never throws).
+ *
+ * Phase 1.1 of the routing overhaul.
+ *
+ * @module routing/tokenizer
+ */
+const logger = require('../logger');
+let _tiktoken = null;
+let _tiktokenLoaded = false;
+const _encoderCache = new Map();
+function _loadTiktoken() {
+  if (_tiktokenLoaded) return _tiktoken;
+  _tiktokenLoaded = true;
+  try {
+    _tiktoken = require('js-tiktoken');
+  } catch (err) {
+    logger.debug(
+      { err: err.message },
+      '[Tokenizer] js-tiktoken not available, falling back to chars/4'
+    );
+    _tiktoken = null;
+  }
+  return _tiktoken;
+}
+function _encodingForModel(model) {
+  if (!model || typeof model !== 'string') return 'cl100k_base';
+  const lower = model.toLowerCase();
+  // GPT-4o family + o-series use o200k_base
+  if (
+    lower.includes('gpt-4o') ||
+    lower.includes('gpt-4.1') ||
+    lower.includes('gpt-5') ||
+    lower.includes('o1') ||
+    lower.includes('o3') ||
+    lower.includes('o4')
+  ) {
+    return 'o200k_base';
+  }
+  // GPT-4 / GPT-3.5 / Anthropic / most others approximate well with cl100k_base
+  return 'cl100k_base';
+}
+function _getEncoder(model) {
+  const tiktoken = _loadTiktoken();
+  if (!tiktoken) return null;
+  const encName = _encodingForModel(model);
+  let cached = _encoderCache.get(encName);
+  if (cached) return cached;
+  try {
+    cached = tiktoken.getEncoding(encName);
+    _encoderCache.set(encName, cached);
+    return cached;
+  } catch (err) {
+    logger.debug(
+      { err: err.message, encoding: encName },
+      '[Tokenizer] Encoder load failed, using fallback'
+    );
+    return null;
+  }
+}
+/**
+ * Count tokens in a single string.
+ * @param {string} text
+ * @param {string|null} model - optional model name for encoding selection
+ * @returns {number}
+ */
+function countTokens(text, model = null) {
+  if (!text || typeof text !== 'string') return 0;
+  const encoder = _getEncoder(model);
+  if (!encoder) return Math.ceil(text.length / 4);
+  try {
+    return encoder.encode(text).length;
+  } catch (err) {
+    return Math.ceil(text.length / 4);
+  }
+}
+function _extractText(content) {
+  if (!content) return '';
+  if (typeof content === 'string') return content;
+  if (Array.isArray(content)) {
+    let combined = '';
+    for (const block of content) {
+      if (!block) continue;
+      if (typeof block === 'string') {
+        combined += block + ' ';
+      } else if (block.type === 'text' && block.text) {
+        combined += block.text + ' ';
+      } else if (typeof block.text === 'string') {
+        combined += block.text + ' ';
+      } else if (block.type === 'tool_use' && block.input) {
+        try {
+          combined += JSON.stringify(block.input) + ' ';
+        } catch {
+          // ignore non-serializable input
+        }
+      } else if (block.type === 'tool_result' && block.content) {
+        combined += _extractText(block.content) + ' ';
+      }
+    }
+    return combined;
+  }
+  return '';
+}
+function _imageTokenEstimate(content) {
+  if (!Array.isArray(content)) return 0;
+  let imageBase64Bytes = 0;
+  for (const block of content) {
+    if (block?.type === 'image' && block.source?.data) {
+      imageBase64Bytes += block.source.data.length;
+    }
+  }
+  // Rough heuristic mirroring previous behavior: ~1 token per 6 base64 chars
+  return Math.floor(imageBase64Bytes / 6);
+}
+/**
+ * Count tokens across a full Anthropic-format message array + optional system.
+ * @param {Array} messages
+ * @param {string|Array|null} system
+ * @param {string|null} model
+ * @returns {number}
+ */
+function countMessagesTokens(messages = [], system = null, model = null) {
+  let total = 0;
+  if (system) {
+    total += countTokens(_extractText(system), model);
+  }
+  if (Array.isArray(messages)) {
+    for (const msg of messages) {
+      total += countTokens(_extractText(msg?.content), model);
+      total += _imageTokenEstimate(msg?.content);
+    }
+    // Per-message structural overhead (~4 tokens per message in both Anthropic and OpenAI)
+    total += messages.length * 4;
+  }
+  return total;
+}
+/**
+ * Count tokens from a full payload object (Anthropic-style with .messages, .system, .model).
+ */
+function countPayloadTokens(payload, model = null) {
+  if (!payload) return 0;
+  return countMessagesTokens(payload.messages, payload.system, model || payload.model);
+}
+module.exports = {
+  countTokens,
+  countMessagesTokens,
+  countPayloadTokens,
+};

package/src/server.js CHANGED Viewed

@@ -9,6 +9,8 @@ const { metricsMiddleware } = require("./api/middleware/metrics");
 const { requestLoggingMiddleware } = require("./api/middleware/request-logging");
 const { errorHandlingMiddleware, notFoundHandler } = require("./api/middleware/error-handling");
 const { loadSheddingMiddleware, initializeLoadShedder } = require("./api/middleware/load-shedding");
+const { tenantMiddleware } = require("./api/middleware/tenant");
+const { budgetEnforcer } = require("./api/middleware/budget-enforcer");
 const { livenessCheck, readinessCheck } = require("./api/health");
 const { getMetricsCollector } = require("./observability/metrics");
 const { getShutdownManager } = require("./server/shutdown");
@@ -90,6 +92,13 @@ function createApp() {
     app.use('/v1/messages', budgetMiddleware);
   }
+  // Phase 6.1 — per-tenant routing policies (LYNKR-Tenant-Id header).
+  // Runs before message handling so res.locals.tenantPolicy is populated.
+  app.use('/v1/messages', tenantMiddleware);
+  // Phase 6.2 — hierarchical budget enforcement (LYNKR_BUDGET_ENFORCER=false to disable).
+  app.use('/v1/messages', budgetEnforcer);
   // Health check endpoints
   app.get("/health/live", livenessCheck);
   app.get("/health/ready", readinessCheck);