npm - lynkr - Versions diffs - 9.1.2 → 9.1.4 - Mend

lynkr 9.1.2 → 9.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

package/README.md +21 -10
package/package.json +3 -1
package/scripts/build-knn-index.js +130 -0
package/scripts/calibrate-thresholds.js +197 -0
package/scripts/compare-policies.js +67 -0
package/scripts/learn-output-ratios.js +162 -0
package/scripts/refresh-pricing.js +122 -0
package/scripts/run-routerarena.js +26 -0
package/scripts/sample-regret.js +84 -0
package/scripts/train-risk-classifier.js +191 -0
package/src/api/middleware/budget-enforcer.js +60 -0
package/src/api/middleware/load-shedding.js +11 -1
package/src/api/middleware/tenant.js +21 -0
package/src/api/router.js +19 -40
package/src/budget/hierarchical-budget.js +159 -0
package/src/cache/semantic.js +28 -2
package/src/clients/databricks.js +59 -5
package/src/config/index.js +239 -43
package/src/context/toon.js +5 -4
package/src/orchestrator/index.js +44 -6
package/src/prompts/system.js +34 -6
package/src/routing/bandit.js +246 -0
package/src/routing/cascade.js +106 -0
package/src/routing/complexity-analyzer.js +7 -15
package/src/routing/confidence-scorer.js +121 -0
package/src/routing/context-validator.js +71 -0
package/src/routing/cost-optimizer.js +5 -2
package/src/routing/deadline.js +52 -0
package/src/routing/drift-monitor.js +113 -0
package/src/routing/embedding-cache.js +77 -0
package/src/routing/index.js +314 -5
package/src/routing/knn-router.js +206 -0
package/src/routing/latency-tracker.js +113 -71
package/src/routing/model-tiers.js +156 -6
package/src/routing/output-ratios.js +57 -0
package/src/routing/regret-estimator.js +91 -0
package/src/routing/reward-pipeline.js +62 -0
package/src/routing/risk-classifier.js +130 -0
package/src/routing/shadow-mode.js +77 -0
package/src/routing/tenant-policy.js +96 -0
package/src/routing/tokenizer.js +162 -0
package/src/server.js +9 -0

package/src/routing/latency-tracker.js CHANGED Viewed

@@ -1,80 +1,78 @@
 /**
- * Rolling Latency Tracker
+ * Rolling Latency Tracker (per provider:model)
  *
- * Tracks per-provider latency using circular buffers to provide
- * P50/P95/P99 percentile statistics for routing decisions.
+ * Tracks latency keyed by `${provider}:${model}` so models within a provider
+ * (Opus vs Haiku) get separate stats. Backward-compatible: callers that pass
+ * only a provider still work — they're tracked under `${provider}:*`.
+ *
+ * Phase 1.5 of the routing overhaul: previous version keyed by provider only.
  *
  * @module routing/latency-tracker
  */
 const logger = require("../logger");
-/** Size of the circular buffer per provider */
 const BUFFER_SIZE = 200;
-/** Minimum sample count before penalizeScore returns a meaningful value */
 const MIN_SAMPLES = 10;
-/**
- * @typedef {Object} LatencyStats
- * @property {number} p50 - 50th percentile latency (ms)
- * @property {number} p95 - 95th percentile latency (ms)
- * @property {number} p99 - 99th percentile latency (ms)
- * @property {number} avg - Average latency (ms)
- * @property {number} count - Total measurements recorded
- * @property {number} lastUpdated - Timestamp of the last recorded measurement
- */
+/** Wildcard model used when caller doesn't specify one. */
+const ANY_MODEL = '*';
+function _key(provider, model) {
+  return `${provider}:${model || ANY_MODEL}`;
+}
 class LatencyTracker {
   constructor() {
-    /** @type {Map<string, { buffer: number[], index: number, count: number, lastUpdated: number }>} */
-    this._providers = new Map();
+    /** @type {Map<string, { buffer: number[], index: number, count: number, lastUpdated: number, provider: string, model: string }>} */
+    this._entries = new Map();
   }
   /**
-   * Record a latency measurement for a provider.
-   * @param {string} provider - Provider name (e.g. "databricks", "ollama")
-   * @param {number} latencyMs - Measured latency in milliseconds
+   * Record a latency measurement.
+   *
+   * Signatures:
+   *   record(provider, latencyMs)              // legacy
+   *   record(provider, model, latencyMs)       // preferred
    */
-  record(provider, latencyMs) {
-    if (!provider || typeof latencyMs !== "number" || latencyMs < 0) {
-      return;
+  record(provider, modelOrLatency, maybeLatency) {
+    let model;
+    let latencyMs;
+    if (typeof modelOrLatency === 'number') {
+      model = ANY_MODEL;
+      latencyMs = modelOrLatency;
+    } else {
+      model = modelOrLatency || ANY_MODEL;
+      latencyMs = maybeLatency;
     }
-    let entry = this._providers.get(provider);
+    if (!provider || typeof latencyMs !== "number" || latencyMs < 0) return;
+    const k = _key(provider, model);
+    let entry = this._entries.get(k);
     if (!entry) {
       entry = {
         buffer: new Array(BUFFER_SIZE).fill(0),
         index: 0,
         count: 0,
         lastUpdated: 0,
+        provider,
+        model,
       };
-      this._providers.set(provider, entry);
+      this._entries.set(k, entry);
     }
     entry.buffer[entry.index] = latencyMs;
     entry.index = (entry.index + 1) % BUFFER_SIZE;
     entry.count += 1;
     entry.lastUpdated = Date.now();
   }
-  /**
-   * Get latency statistics for a specific provider.
-   * @param {string} provider - Provider name
-   * @returns {LatencyStats|null} Statistics or null if no data
-   */
-  getStats(provider) {
-    const entry = this._providers.get(provider);
-    if (!entry || entry.count === 0) {
-      return null;
-    }
+  _computeStats(entry) {
+    if (!entry || entry.count === 0) return null;
     const sampleCount = Math.min(entry.count, BUFFER_SIZE);
     const samples = entry.buffer.slice(0, sampleCount);
     const sorted = samples.slice().sort((a, b) => a - b);
     const sum = sorted.reduce((acc, v) => acc + v, 0);
     return {
       p50: sorted[Math.floor(sampleCount * 0.5)],
       p95: sorted[Math.floor(sampleCount * 0.95)],
@@ -82,61 +80,105 @@ class LatencyTracker {
       avg: Math.round(sum / sampleCount),
       count: entry.count,
       lastUpdated: entry.lastUpdated,
+      provider: entry.provider,
+      model: entry.model,
     };
   }
   /**
-   * Calculate a routing score penalty/bonus based on provider latency.
-   *
-   * Returns a value from -5 to +10 that can be added to a routing score:
-   *   +10 if P95 > 10000ms (very slow, penalise by boosting complexity toward cloud)
-   *   +5  if P95 > 5000ms
-   *   -5  if P50 < 1000ms (fast, reward)
-   *    0  otherwise or if insufficient data
-   *
-   * @param {string} provider - Provider name
-   * @returns {number} Score adjustment (-5 to +10)
+   * Get stats for a specific (provider, model) pair, or aggregated for a provider
+   * if model is omitted.
    */
-  penalizeScore(provider) {
-    const stats = this.getStats(provider);
-    if (!stats || stats.count < MIN_SAMPLES) {
-      return 0;
+  getStats(provider, model = null) {
+    if (model) {
+      return this._computeStats(this._entries.get(_key(provider, model)));
+    }
+    // Aggregate across all models for this provider
+    const provEntries = [];
+    for (const [k, entry] of this._entries) {
+      if (entry.provider === provider) provEntries.push(entry);
     }
+    if (provEntries.length === 0) return null;
+    if (provEntries.length === 1) return this._computeStats(provEntries[0]);
+    // Pool samples across model entries to compute combined percentiles
+    const pooled = [];
+    let total = 0;
+    let lastUpdated = 0;
+    for (const e of provEntries) {
+      const n = Math.min(e.count, BUFFER_SIZE);
+      for (let i = 0; i < n; i++) pooled.push(e.buffer[i]);
+      total += e.count;
+      if (e.lastUpdated > lastUpdated) lastUpdated = e.lastUpdated;
+    }
+    if (pooled.length === 0) return null;
+    pooled.sort((a, b) => a - b);
+    const sum = pooled.reduce((acc, v) => acc + v, 0);
+    return {
+      p50: pooled[Math.floor(pooled.length * 0.5)],
+      p95: pooled[Math.floor(pooled.length * 0.95)],
+      p99: pooled[Math.floor(pooled.length * 0.99)],
+      avg: Math.round(sum / pooled.length),
+      count: total,
+      lastUpdated,
+      provider,
+      model: ANY_MODEL,
+    };
+  }
+  /** Latency penalty/bonus used by complexity-analyzer. */
+  penalizeScore(provider, model = null) {
+    const stats = this.getStats(provider, model);
+    if (!stats || stats.count < MIN_SAMPLES) return 0;
     if (stats.p95 > 10000) return 10;
     if (stats.p95 > 5000) return 5;
     if (stats.p50 < 1000) return -5;
     return 0;
   }
   /**
-   * Get statistics for all tracked providers.
-   * @returns {Map<string, LatencyStats>}
+   * Phase 1.5: per-model P95 lookup for deadline-aware routing (Phase 6.3).
+   * Returns null if insufficient samples.
+   */
+  getModelP95(provider, model) {
+    const stats = this.getStats(provider, model);
+    if (!stats || stats.count < MIN_SAMPLES) return null;
+    return stats.p95;
+  }
+  /**
+   * Whether a model is currently degraded (P95 > 2x its historical median).
+   * Currently uses a simple absolute threshold — better signal will come in
+   * Phase 4.3 (drift detection).
+   */
+  isDegraded(provider, model) {
+    const stats = this.getStats(provider, model);
+    if (!stats || stats.count < MIN_SAMPLES) return false;
+    return stats.p95 > stats.p50 * 2 && stats.p95 > 5000;
+  }
+  /**
+   * Get stats for every tracked entry.
+   *
+   * Backward-compat: when an entry was recorded via the legacy 2-arg
+   * `record(provider, latency)` signature, the model is the wildcard `*`
+   * and we return it keyed by provider name only. Entries with explicit
+   * models use the `provider:model` key.
    */
   getAllStats() {
     const result = new Map();
-    for (const provider of this._providers.keys()) {
-      const stats = this.getStats(provider);
-      if (stats) {
-        result.set(provider, stats);
-      }
+    for (const [k, entry] of this._entries) {
+      const stats = this._computeStats(entry);
+      if (!stats) continue;
+      const outKey = entry.model === ANY_MODEL ? entry.provider : k;
+      result.set(outKey, stats);
     }
     return result;
   }
 }
-// ---------------------------------------------------------------------------
-// Singleton
-// ---------------------------------------------------------------------------
-/** @type {LatencyTracker|null} */
 let instance = null;
-/**
- * Get the singleton LatencyTracker instance.
- * @returns {LatencyTracker}
- */
 function getLatencyTracker() {
   if (!instance) {
     instance = new LatencyTracker();
@@ -145,4 +187,4 @@ function getLatencyTracker() {
   return instance;
 }
-module.exports = { LatencyTracker, getLatencyTracker };
+module.exports = { LatencyTracker, getLatencyTracker, ANY_MODEL };

package/src/routing/model-tiers.js CHANGED Viewed

@@ -12,7 +12,10 @@ const config = require('../config');
 // Load tier config
 const TIER_CONFIG_PATH = path.join(__dirname, '../../config/model-tiers.json');
-// Tier definitions with complexity ranges
+// Phase 1.4: calibrated thresholds (written by scripts/calibrate-thresholds.js)
+const CALIBRATED_PATH = path.join(__dirname, '../../data/calibrated-thresholds.json');
+// Tier definitions with complexity ranges (defaults; may be overridden by calibration)
 const TIER_DEFINITIONS = {
   SIMPLE: {
     description: 'Greetings, simple Q&A, confirmations',
@@ -41,13 +44,30 @@ class ModelTierSelector {
     this.tierConfig = null;
     this.localProviders = {};
     this.providerAliases = {};
+    /** Per-tier ranges, possibly overridden by calibration. */
+    this.ranges = null;
     this._loadConfig();
+    this._loadCalibrated();
   }
   /**
    * Load tier configuration from JSON file
    */
   _loadConfig() {
+    // Check if tier routing mode is active (all 4 TIER_* env vars set)
+    const tierRoutingMode = !!(
+      config.modelTiers?.SIMPLE?.trim() &&
+      config.modelTiers?.MEDIUM?.trim() &&
+      config.modelTiers?.COMPLEX?.trim() &&
+      config.modelTiers?.REASONING?.trim()
+    );
+    if (tierRoutingMode) {
+      logger.debug('[ModelTiers] Tier routing mode active, building config from TIER_* env vars');
+      this._buildFromEnvVars();
+      return;
+    }
     try {
       if (fs.existsSync(TIER_CONFIG_PATH)) {
         const data = JSON.parse(fs.readFileSync(TIER_CONFIG_PATH, 'utf8'));
@@ -65,9 +85,86 @@ class ModelTierSelector {
     }
   }
+  /**
+   * Phase 1.4: load calibrated tier thresholds if the nightly job has produced them.
+   * Falls back silently to TIER_DEFINITIONS when absent or malformed.
+   */
+  _loadCalibrated() {
+    this.ranges = this._defaultRanges();
+    try {
+      if (!fs.existsSync(CALIBRATED_PATH)) return;
+      const data = JSON.parse(fs.readFileSync(CALIBRATED_PATH, 'utf8'));
+      if (!data?.ranges) return;
+      const calibrated = {};
+      for (const tier of Object.keys(TIER_DEFINITIONS)) {
+        const r = data.ranges[tier];
+        if (Array.isArray(r) && r.length === 2 && r[0] <= r[1]) {
+          calibrated[tier] = r;
+        } else {
+          calibrated[tier] = TIER_DEFINITIONS[tier].range;
+        }
+      }
+      this.ranges = calibrated;
+      logger.info({ ranges: this.ranges, calibratedAt: data.calibratedAt }, '[ModelTiers] Using calibrated thresholds');
+    } catch (err) {
+      logger.debug({ err: err.message }, '[ModelTiers] Calibrated thresholds load failed; using defaults');
+    }
+  }
+  _defaultRanges() {
+    const ranges = {};
+    for (const [tier, def] of Object.entries(TIER_DEFINITIONS)) {
+      ranges[tier] = def.range.slice();
+    }
+    return ranges;
+  }
   /**
    * Load default tier config
    */
+  /**
+   * Build tier config from TIER_* environment variables
+   * Format: TIER_SIMPLE=provider:model
+   */
+  _buildFromEnvVars() {
+    this.tierConfig = {};
+    this.localProviders = {
+      ollama: { free: true, defaultTier: 'SIMPLE' },
+      llamacpp: { free: true, defaultTier: 'SIMPLE' },
+      lmstudio: { free: true, defaultTier: 'SIMPLE' },
+      mlx: { free: true, defaultTier: 'SIMPLE' },
+    };
+    const tiers = ['SIMPLE', 'MEDIUM', 'COMPLEX', 'REASONING'];
+    for (const tier of tiers) {
+      const envValue = config.modelTiers?.[tier]?.trim();
+      if (!envValue) continue;
+      // Parse provider:model format
+      const match = envValue.match(/^([a-z-]+):(.+)$/);
+      if (!match) {
+        logger.warn({ tier, value: envValue }, '[ModelTiers] Invalid TIER format, expected provider:model');
+        continue;
+      }
+      const [, provider, model] = match;
+      // Initialize tier config if not exists
+      if (!this.tierConfig[tier]) {
+        this.tierConfig[tier] = { preferred: {} };
+      }
+      // Set this as the ONLY preferred model for this tier+provider
+      this.tierConfig[tier].preferred[provider] = [model];
+      logger.debug({
+        tier,
+        provider,
+        model
+      }, '[ModelTiers] Tier configured from env');
+    }
+  }
   _loadDefaults() {
     this.tierConfig = {
       SIMPLE: { preferred: { ollama: ['llama3.2'], openai: ['gpt-4o-mini'] } },
@@ -92,20 +189,73 @@ class ModelTierSelector {
   }
   /**
-   * Get tier from complexity score
+   * Get tier from complexity score.
+   * Phase 1.4: honors calibrated ranges when present.
    * @param {number} complexityScore - Score from 0-100
    * @returns {string} Tier name (SIMPLE, MEDIUM, COMPLEX, REASONING)
    */
   getTier(complexityScore) {
     const score = Math.max(0, Math.min(100, complexityScore || 0));
+    const ranges = this.ranges || this._defaultRanges();
+    for (const tier of Object.keys(TIER_DEFINITIONS)) {
+      const [lo, hi] = ranges[tier];
+      if (score >= lo && score <= hi) return tier;
+    }
+    return score > 75 ? 'REASONING' : 'SIMPLE';
+  }
-    for (const [tier, def] of Object.entries(TIER_DEFINITIONS)) {
-      if (score >= def.range[0] && score <= def.range[1]) {
-        return tier;
+  /**
+   * Phase 1.3: find a model with at least `minContext` context window.
+   * Returns null when no qualifying model is available.
+   */
+  findContextCapable(minContext, preferredTier = null) {
+    const { getModelRegistrySync } = require('./model-registry');
+    const registry = getModelRegistrySync();
+    const tierOrder = preferredTier
+      ? [preferredTier, 'REASONING', 'COMPLEX', 'MEDIUM', 'SIMPLE']
+      : ['REASONING', 'COMPLEX', 'MEDIUM', 'SIMPLE'];
+    const seen = new Set();
+    for (const tier of tierOrder) {
+      if (seen.has(tier)) continue;
+      seen.add(tier);
+      const tierConfig = this.tierConfig[tier];
+      if (!tierConfig?.preferred) continue;
+      for (const [provider, models] of Object.entries(tierConfig.preferred)) {
+        for (const model of models) {
+          const cost = registry.getCost(model);
+          if (cost?.context && cost.context >= minContext) {
+            return { provider, model, tier, context: cost.context };
+          }
+        }
       }
     }
+    return null;
+  }
-    return score > 75 ? 'REASONING' : 'SIMPLE';
+  /**
+   * Find a vision-capable model at or above `preferredTier`.
+   * Walks tier order from preferred upward; returns null when none available.
+   */
+  findVisionCapable(preferredTier = null) {
+    const { getModelRegistrySync } = require('./model-registry');
+    const registry = getModelRegistrySync();
+    const tierOrder = preferredTier
+      ? [preferredTier, 'COMPLEX', 'REASONING', 'MEDIUM', 'SIMPLE']
+      : ['COMPLEX', 'REASONING', 'MEDIUM', 'SIMPLE'];
+    const seen = new Set();
+    for (const t of tierOrder) {
+      if (seen.has(t)) continue;
+      seen.add(t);
+      const tierConfig = this.tierConfig[t];
+      if (!tierConfig?.preferred) continue;
+      for (const [provider, models] of Object.entries(tierConfig.preferred)) {
+        for (const model of models) {
+          const info = registry.getCost(model);
+          if (info?.vision) return { provider, model, tier: t };
+        }
+      }
+    }
+    return null;
   }
   /**

package/src/routing/output-ratios.js ADDED Viewed

@@ -0,0 +1,57 @@
+/**
+ * Output-token ratio lookup (Phase 2.3).
+ *
+ * Reads data/output-ratios.json (built by scripts/learn-output-ratios.js).
+ * Falls back to hardcoded defaults when the file is absent.
+ */
+const fs = require('fs');
+const path = require('path');
+const logger = require('../logger');
+const FILE_PATH = path.join(__dirname, '../../data/output-ratios.json');
+const DEFAULT_RATIOS = {
+  simple_qa: 0.30,
+  code_gen: 2.10,
+  code_edit: 1.40,
+  summarization: 0.15,
+  reasoning: 1.50,
+  tool_use: 0.80,
+  default: 0.50,
+};
+let _cached = null;
+let _cacheLoadedAt = 0;
+const RELOAD_INTERVAL_MS = 60_000;
+function _load() {
+  if (_cached && Date.now() - _cacheLoadedAt < RELOAD_INTERVAL_MS) return _cached;
+  try {
+    if (fs.existsSync(FILE_PATH)) {
+      const data = JSON.parse(fs.readFileSync(FILE_PATH, 'utf8'));
+      if (data?.ratios && typeof data.ratios === 'object') {
+        _cached = { ...DEFAULT_RATIOS, ...data.ratios };
+        _cacheLoadedAt = Date.now();
+        return _cached;
+      }
+    }
+  } catch (err) {
+    logger.debug({ err: err.message }, '[OutputRatios] Load failed, using defaults');
+  }
+  _cached = DEFAULT_RATIOS;
+  _cacheLoadedAt = Date.now();
+  return _cached;
+}
+function ratioFor(taskType) {
+  const ratios = _load();
+  const key = (taskType || 'default').toLowerCase();
+  return ratios[key] ?? ratios.default ?? 0.5;
+}
+function reload() {
+  _cached = null;
+}
+module.exports = { ratioFor, reload, DEFAULT_RATIOS };

package/src/routing/regret-estimator.js ADDED Viewed

@@ -0,0 +1,91 @@
+/**
+ * Regret estimator (Phase 4.2).
+ *
+ * Periodically samples a fraction of yesterday's requests, re-runs them
+ * through a strictly-better model (Opus), and compares quality. If the
+ * routed model consistently underperforms vs Opus by >10%, this writes an
+ * alert to data/regret-alerts.json.
+ *
+ * Off by default (costs real money). Enable with LYNKR_REGRET_ESTIMATOR=true
+ * and run via cron: `node scripts/sample-regret.js`.
+ */
+const fs = require('fs');
+const path = require('path');
+const logger = require('../logger');
+const ALERTS_PATH = path.join(__dirname, '../../data/regret-alerts.json');
+/**
+ * @param {object} args
+ * @param {Array<{request: object, response: object, model: string, quality: number}>} args.samples
+ * @param {function} args.runOpus — async (request) → { response, quality }
+ * @param {number} args.threshold — fractional underperformance threshold (default 0.10)
+ * @returns {Promise<{ regret, sampledCount, alerts }>}
+ */
+async function estimate(args) {
+  const threshold = args.threshold ?? 0.10;
+  const results = [];
+  for (const s of args.samples) {
+    try {
+      const opus = await args.runOpus(s.request);
+      const delta = (opus.quality - s.quality) / Math.max(1, opus.quality);
+      results.push({
+        model: s.model,
+        routedQuality: s.quality,
+        opusQuality: opus.quality,
+        regret: Math.max(0, delta),
+        underperforming: delta > threshold,
+      });
+    } catch (err) {
+      logger.debug({ err: err.message }, '[RegretEstimator] Opus re-run failed');
+    }
+  }
+  const byModel = new Map();
+  for (const r of results) {
+    if (!byModel.has(r.model)) byModel.set(r.model, []);
+    byModel.get(r.model).push(r);
+  }
+  const alerts = [];
+  for (const [model, runs] of byModel) {
+    const underperforming = runs.filter(r => r.underperforming).length;
+    const rate = underperforming / runs.length;
+    if (rate > 0.5 && runs.length >= 5) {
+      alerts.push({
+        model,
+        underperformingRate: rate,
+        sampleSize: runs.length,
+        avgRegret: runs.reduce((s, r) => s + r.regret, 0) / runs.length,
+        timestamp: Date.now(),
+      });
+    }
+  }
+  if (alerts.length > 0) {
+    try {
+      fs.mkdirSync(path.dirname(ALERTS_PATH), { recursive: true });
+      let existing = [];
+      if (fs.existsSync(ALERTS_PATH)) {
+        try { existing = JSON.parse(fs.readFileSync(ALERTS_PATH, 'utf8')); } catch {}
+      }
+      const out = Array.isArray(existing) ? existing : [];
+      out.push(...alerts);
+      // Keep last 100 alerts
+      const trimmed = out.slice(-100);
+      fs.writeFileSync(ALERTS_PATH, JSON.stringify(trimmed, null, 2));
+    } catch (err) {
+      logger.warn({ err: err.message }, '[RegretEstimator] Alert write failed');
+    }
+  }
+  const totalRegret = results.reduce((s, r) => s + r.regret, 0) / Math.max(1, results.length);
+  return { regret: totalRegret, sampledCount: results.length, alerts };
+}
+function isEnabled() {
+  return process.env.LYNKR_REGRET_ESTIMATOR === 'true';
+}
+module.exports = { estimate, isEnabled };

package/src/routing/reward-pipeline.js ADDED Viewed

@@ -0,0 +1,62 @@
+/**
+ * Reward pipeline for the LinUCB bandit (Phase 4.1).
+ *
+ * Combines quality score, normalised cost, and normalised latency into a
+ * single scalar reward in [0, 100]. The bandit then rescales to [0, 1].
+ *
+ *   reward = quality - λ·norm_cost·100 - μ·norm_latency·100
+ *
+ * Normalisation uses running min/max so we don't need to pre-compute global
+ * scales.
+ */
+const logger = require('../logger');
+const DEFAULT_LAMBDA = 0.3;
+const DEFAULT_MU = 0.1;
+class RewardPipeline {
+  constructor({ lambda = DEFAULT_LAMBDA, mu = DEFAULT_MU } = {}) {
+    this.lambda = lambda;
+    this.mu = mu;
+    this.costRange = { min: Infinity, max: -Infinity };
+    this.latencyRange = { min: Infinity, max: -Infinity };
+  }
+  observe({ cost, latency }) {
+    if (typeof cost === 'number' && cost >= 0) {
+      this.costRange.min = Math.min(this.costRange.min, cost);
+      this.costRange.max = Math.max(this.costRange.max, cost);
+    }
+    if (typeof latency === 'number' && latency >= 0) {
+      this.latencyRange.min = Math.min(this.latencyRange.min, latency);
+      this.latencyRange.max = Math.max(this.latencyRange.max, latency);
+    }
+  }
+  _normalize(value, range) {
+    if (!isFinite(range.min) || !isFinite(range.max) || range.max <= range.min) return 0;
+    const v = Math.max(range.min, Math.min(range.max, value));
+    return (v - range.min) / (range.max - range.min);
+  }
+  /**
+   * @param {object} obs - { quality: 0-100, cost: dollars, latency: ms }
+   * @returns {number} reward in [0, 100]
+   */
+  reward(obs) {
+    this.observe(obs);
+    const q = typeof obs.quality === 'number' ? obs.quality : 50;
+    const cn = this._normalize(obs.cost ?? 0, this.costRange);
+    const ln = this._normalize(obs.latency ?? 0, this.latencyRange);
+    return Math.max(0, Math.min(100, q - this.lambda * cn * 100 - this.mu * ln * 100));
+  }
+}
+let _instance = null;
+function getRewardPipeline() {
+  if (!_instance) _instance = new RewardPipeline();
+  return _instance;
+}
+module.exports = { RewardPipeline, getRewardPipeline };