npm - lynkr - Versions diffs - 9.0.2 → 9.1.3 - Mend

lynkr 9.0.2 → 9.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (65) hide show

package/README.md +21 -10
package/bin/cli.js +18 -1
package/bin/lynkr-trajectory.js +136 -0
package/bin/lynkr-usage.js +219 -0
package/funding.json +110 -0
package/package.json +4 -2
package/public/dashboard.html +665 -0
package/scripts/build-knn-index.js +130 -0
package/scripts/calibrate-thresholds.js +197 -0
package/scripts/compare-policies.js +67 -0
package/scripts/learn-output-ratios.js +162 -0
package/scripts/refresh-pricing.js +122 -0
package/scripts/run-routerarena.js +26 -0
package/scripts/sample-regret.js +84 -0
package/scripts/train-risk-classifier.js +191 -0
package/src/api/files-router.js +6 -6
package/src/api/middleware/budget-enforcer.js +60 -0
package/src/api/middleware/budget.js +19 -1
package/src/api/middleware/load-shedding.js +17 -0
package/src/api/middleware/tenant.js +21 -0
package/src/api/openai-router.js +1 -1
package/src/api/router.js +204 -87
package/src/budget/hierarchical-budget.js +159 -0
package/src/cache/semantic.js +28 -2
package/src/clients/databricks.js +68 -10
package/src/clients/openai-format.js +31 -5
package/src/config/index.js +246 -43
package/src/context/toon.js +5 -4
package/src/dashboard/api.js +170 -0
package/src/dashboard/router.js +13 -0
package/src/headroom/client.js +3 -109
package/src/headroom/index.js +0 -14
package/src/memory/search.js +0 -50
package/src/orchestrator/index.js +106 -11
package/src/orchestrator/preflight.js +188 -0
package/src/prompts/system.js +34 -6
package/src/routing/bandit.js +246 -0
package/src/routing/cascade.js +106 -0
package/src/routing/complexity-analyzer.js +7 -15
package/src/routing/confidence-scorer.js +121 -0
package/src/routing/context-validator.js +71 -0
package/src/routing/cost-optimizer.js +5 -2
package/src/routing/deadline.js +52 -0
package/src/routing/drift-monitor.js +113 -0
package/src/routing/embedding-cache.js +77 -0
package/src/routing/index.js +374 -4
package/src/routing/interaction.js +183 -0
package/src/routing/knn-router.js +206 -0
package/src/routing/latency-tracker.js +113 -71
package/src/routing/model-tiers.js +156 -6
package/src/routing/output-ratios.js +57 -0
package/src/routing/regret-estimator.js +91 -0
package/src/routing/reward-pipeline.js +62 -0
package/src/routing/risk-analyzer.js +194 -0
package/src/routing/risk-classifier.js +130 -0
package/src/routing/shadow-mode.js +77 -0
package/src/routing/telemetry.js +7 -0
package/src/routing/tenant-policy.js +96 -0
package/src/routing/tokenizer.js +162 -0
package/src/server.js +12 -0
package/src/stores/file-store.js +42 -7
package/src/tools/smart-selection.js +11 -2
package/src/training/trajectory-compressor.js +266 -0
package/src/usage/aggregator.js +206 -0
package/src/utils/markdown-ansi.js +146 -0

package/src/orchestrator/preflight.js ADDED Viewed

@@ -0,0 +1,188 @@
+/**
+ * Preflight Checks
+ *
+ * Runs user-supplied commands before invoking the model. If they all
+ * exit 0, the work is already done — we skip the LLM call entirely
+ * and return a synthetic "preflight_satisfied" response at zero cost.
+ *
+ * Typical use case: a fix-the-failing-test request that arrives after
+ * the test already passes (CI lag, retry-after-fix, idempotent agent
+ * retries).
+ *
+ * The request opts in by including a top-level `preflight_commands`
+ * array on the Anthropic-format payload, e.g.:
+ *
+ *   {
+ *     "model": "...",
+ *     "messages": [...],
+ *     "preflight_commands": ["pnpm test -- user-service"]
+ *   }
+ *
+ * Disabled by default — gated on LYNKR_PREFLIGHT_ENABLED=true. The
+ * commands run with the same permissions as the Lynkr server, so
+ * operators should only enable this on workspaces where that is OK.
+ *
+ * @module orchestrator/preflight
+ */
+const { spawnSync } = require('child_process');
+const path = require('path');
+const config = require('../config');
+const logger = require('../logger');
+const MAX_COMMANDS = 10;
+const MAX_OUTPUT_BYTES = 4000;
+/**
+ * Extract the preflight command list from a request payload.
+ * Accepts either `preflight_commands` (Lynkr-specific) or
+ * `metadata.lynkr_preflight_commands` (for clients that strip unknown
+ * top-level fields).
+ *
+ * @param {object} payload
+ * @returns {string[]}
+ */
+function extractCommands(payload) {
+  if (!payload) return [];
+  const raw =
+    payload.preflight_commands ||
+    payload.metadata?.lynkr_preflight_commands ||
+    [];
+  if (!Array.isArray(raw)) return [];
+  return raw
+    .filter(cmd => typeof cmd === 'string' && cmd.trim().length > 0)
+    .slice(0, MAX_COMMANDS);
+}
+/**
+ * Resolve the workspace path for command execution. Falls back to
+ * process.cwd() if no workspace is supplied (the caller should usually
+ * pass one explicitly).
+ *
+ * @param {string|null|undefined} cwd
+ * @returns {string|null} absolute path, or null if invalid
+ */
+function resolveCwd(cwd) {
+  if (!cwd || typeof cwd !== 'string') return null;
+  if (!path.isAbsolute(cwd)) return null;
+  return cwd;
+}
+/**
+ * Run a single command, returning a structured result.
+ *
+ * @param {string} command
+ * @param {string} cwd
+ * @param {number} timeoutMs
+ * @returns {{ command: string, exit_code: number|null, stdout: string, stderr: string, timed_out: boolean }}
+ */
+function runCommand(command, cwd, timeoutMs) {
+  const result = spawnSync(command, {
+    cwd,
+    shell: true,
+    encoding: 'utf8',
+    timeout: timeoutMs,
+    maxBuffer: 10 * 1024 * 1024,
+  });
+  return {
+    command,
+    exit_code: result.status,
+    stdout: (result.stdout || '').slice(-MAX_OUTPUT_BYTES),
+    stderr: (result.stderr || '').slice(-MAX_OUTPUT_BYTES),
+    timed_out: result.signal === 'SIGTERM',
+  };
+}
+/**
+ * Try the preflight pass. Returns null when preflight should be
+ * skipped (disabled, no commands, missing cwd). Returns a result
+ * object otherwise.
+ *
+ * @param {object} args
+ * @param {object} args.payload - Anthropic-format request payload
+ * @param {string} [args.cwd] - Workspace cwd (absolute path)
+ * @returns {null | {
+ *   satisfied: boolean,
+ *   results: object[],
+ *   failedCommand: string|null,
+ *   reason: string,
+ * }}
+ */
+function tryPreflight({ payload, cwd }) {
+  if (!config.routing?.preflightEnabled) return null;
+  const commands = extractCommands(payload);
+  if (commands.length === 0) return null;
+  const workspaceCwd = resolveCwd(cwd);
+  if (!workspaceCwd) {
+    logger.debug({ cwd }, '[Preflight] No valid cwd, skipping');
+    return null;
+  }
+  const timeoutMs = config.routing?.preflightTimeoutMs || 120000;
+  const results = [];
+  for (const command of commands) {
+    const r = runCommand(command, workspaceCwd, timeoutMs);
+    results.push(r);
+    if (r.exit_code !== 0) {
+      return {
+        satisfied: false,
+        results,
+        failedCommand: command,
+        reason: r.timed_out
+          ? `Preflight command timed out: ${command}`
+          : `Preflight command exited ${r.exit_code}: ${command}`,
+      };
+    }
+  }
+  return {
+    satisfied: true,
+    results,
+    failedCommand: null,
+    reason: 'All preflight commands passed.',
+  };
+}
+/**
+ * Build a synthetic "preflight satisfied" Anthropic Message response
+ * that processMessage can return without hitting the model.
+ *
+ * @param {object} args
+ * @param {string} args.model
+ * @param {object} args.preflightResult
+ * @returns {object} The full processMessage return value.
+ */
+function buildSatisfiedResponse({ model, preflightResult }) {
+  const summary = `Preflight satisfied — work appears already complete (${preflightResult.results.length} command${preflightResult.results.length === 1 ? '' : 's'} passed).`;
+  return {
+    response: {
+      json: {
+        id: `msg_preflight_${Date.now()}`,
+        type: 'message',
+        role: 'assistant',
+        content: [{ type: 'text', text: summary }],
+        model,
+        stop_reason: 'end_turn',
+        stop_sequence: null,
+        usage: { input_tokens: 0, output_tokens: 0 },
+        lynkr_preflight: {
+          satisfied: true,
+          reason: preflightResult.reason,
+          results: preflightResult.results,
+        },
+      },
+      ok: true,
+      status: 200,
+    },
+    steps: 0,
+    durationMs: 0,
+    terminationReason: 'preflight_satisfied',
+  };
+}
+module.exports = {
+  tryPreflight,
+  buildSatisfiedResponse,
+  extractCommands,
+  // Exposed for tests
+  resolveCwd,
+};

package/src/prompts/system.js CHANGED Viewed

@@ -70,13 +70,41 @@ function compressToolDescriptions(tools, mode = null) {
     return tools; // Return unmodified if not in minimal mode
   }
-  return tools.map(tool => {
+  const validTools = tools.filter(tool => {
+    // Handle both Anthropic format (name + input_schema) and OpenAI format (function.name)
+    const hasAnthropicFormat = tool && tool.name && tool.input_schema;
+    const hasOpenAIFormat = tool && tool.function && tool.function.name;
+    const isValid = hasAnthropicFormat || hasOpenAIFormat;
+    if (!isValid) {
+      logger.debug({
+        hasName: !!tool?.name,
+        hasSchema: !!tool?.input_schema,
+        hasFunctionName: !!tool?.function?.name,
+        toolType: typeof tool
+      }, 'Filtered out malformed tool');
+    }
+    return isValid;
+  });
+  if (validTools.length === 0 && tools.length > 0) {
+    logger.warn({ originalCount: tools.length }, 'All tools filtered out as malformed - returning original');
+    return tools;
+  }
+  return validTools.map(tool => {
+    // If already in OpenAI format, return as-is (no compression for OpenAI format)
+    if (tool.function && !tool.input_schema) {
+      return tool;
+    }
+    // Compress Anthropic format
     const compressed = {
       name: tool.name,
       input_schema: {
-        type: tool.input_schema.type,
+        type: tool.input_schema?.type || "object",
         properties: {},
-        required: tool.input_schema.required || [],
+        required: tool.input_schema?.required || [],
       }
     };
@@ -190,7 +218,7 @@ function optimizeSystemPrompt(system, context = {}, mode = null) {
   // 2. Remove file operation guidelines if no file tools
   const hasFileTools = context.tools?.some(t =>
-    ['Read', 'Write', 'Edit', 'Glob', 'Grep'].includes(t.name)
+    t?.name && ['Read', 'Write', 'Edit', 'Glob', 'Grep'].includes(t.name)
   );
   if (!hasFileTools) {
     text = removeSection(text, /# File Operations?[\s\S]*?(?=\n#|\n\n[A-Z]|$)/gi, optimizations, 'file operations');
@@ -198,7 +226,7 @@ function optimizeSystemPrompt(system, context = {}, mode = null) {
   // 3. Remove git guidelines if no git tools
   const hasGitTools = context.tools?.some(t =>
-    t.name.toLowerCase().includes('git')
+    t?.name && t.name.toLowerCase().includes('git')
   );
   if (!hasGitTools) {
     text = removeSection(text, /# Git.*?[\s\S]*?(?=\n#|\n\n[A-Z]|$)/gi, optimizations, 'git guidelines');
@@ -207,7 +235,7 @@ function optimizeSystemPrompt(system, context = {}, mode = null) {
   // 4. Remove web search guidelines if no web tools
   const hasWebTools = context.tools?.some(t =>
-    ['WebSearch', 'WebFetch'].includes(t.name)
+    t?.name && ['WebSearch', 'WebFetch'].includes(t.name)
   );
   if (!hasWebTools) {
     text = removeSection(text, /# Web.*?[\s\S]*?(?=\n#|\n\n[A-Z]|$)/gi, optimizations, 'web guidelines');

package/src/routing/bandit.js ADDED Viewed

@@ -0,0 +1,246 @@
+/**
+ * LinUCB contextual bandit for intra-tier model selection (Phase 4.1).
+ *
+ * Standard LinUCB-with-disjoint-models algorithm (Li et al. 2010).
+ *   - One arm per (provider, model) pair in a tier
+ *   - Context = numerical feature vector for the request
+ *   - Reward = quality_score - λ·norm_cost - μ·norm_latency
+ *   - Per-arm A (d×d ridge-regression matrix) and b (d-vector) stored to disk
+ *
+ * State persists to data/bandit-state.json. Loaded on startup; saved on
+ * every `update()` (cheap — small matrices) and on graceful shutdown.
+ */
+const fs = require('fs');
+const path = require('path');
+const logger = require('../logger');
+const STATE_PATH = path.join(__dirname, '../../data/bandit-state.json');
+const DEFAULT_ALPHA = 1.5;
+const DEFAULT_LAMBDA = 0.3; // cost penalty weight
+const DEFAULT_MU = 0.1;     // latency penalty weight
+const FEATURE_DIM = 12;
+const EXPLORATION_RATE = 0.05;
+function _identity(d) {
+  const m = new Array(d);
+  for (let i = 0; i < d; i++) {
+    m[i] = new Array(d).fill(0);
+    m[i][i] = 1;
+  }
+  return m;
+}
+function _zeros(d) {
+  return new Array(d).fill(0);
+}
+function _matVec(M, v) {
+  const d = v.length;
+  const out = new Array(d).fill(0);
+  for (let i = 0; i < d; i++) {
+    for (let j = 0; j < d; j++) out[i] += M[i][j] * v[j];
+  }
+  return out;
+}
+function _dot(a, b) {
+  let s = 0;
+  for (let i = 0; i < a.length; i++) s += a[i] * b[i];
+  return s;
+}
+function _outer(a, b) {
+  const out = new Array(a.length);
+  for (let i = 0; i < a.length; i++) {
+    out[i] = new Array(b.length);
+    for (let j = 0; j < b.length; j++) out[i][j] = a[i] * b[j];
+  }
+  return out;
+}
+function _addMat(A, B) {
+  for (let i = 0; i < A.length; i++) {
+    for (let j = 0; j < A[i].length; j++) A[i][j] += B[i][j];
+  }
+}
+function _addVec(a, b) {
+  for (let i = 0; i < a.length; i++) a[i] += b[i];
+}
+/**
+ * Invert a small dense matrix via Gauss-Jordan. For d=12 this is plenty fast
+ * and saves us a dependency on a linear algebra library.
+ */
+function _inv(M) {
+  const d = M.length;
+  const aug = M.map((row, i) => {
+    const r = row.slice();
+    for (let j = 0; j < d; j++) r.push(i === j ? 1 : 0);
+    return r;
+  });
+  for (let i = 0; i < d; i++) {
+    let pivot = aug[i][i];
+    if (Math.abs(pivot) < 1e-12) {
+      let swap = -1;
+      for (let k = i + 1; k < d; k++) {
+        if (Math.abs(aug[k][i]) > 1e-12) { swap = k; break; }
+      }
+      if (swap < 0) throw new Error('matrix singular');
+      [aug[i], aug[swap]] = [aug[swap], aug[i]];
+      pivot = aug[i][i];
+    }
+    for (let j = 0; j < 2 * d; j++) aug[i][j] /= pivot;
+    for (let k = 0; k < d; k++) {
+      if (k === i) continue;
+      const factor = aug[k][i];
+      for (let j = 0; j < 2 * d; j++) aug[k][j] -= factor * aug[i][j];
+    }
+  }
+  return aug.map(row => row.slice(d));
+}
+class LinUCBBandit {
+  constructor({ alpha = DEFAULT_ALPHA, lambda = DEFAULT_LAMBDA, mu = DEFAULT_MU, dim = FEATURE_DIM } = {}) {
+    this.alpha = alpha;
+    this.lambda = lambda;
+    this.mu = mu;
+    this.dim = dim;
+    /** arms: Map<armKey, { A: number[][], b: number[], count: number }> */
+    this.arms = new Map();
+    this.steps = 0;
+    this._load();
+  }
+  _armKey(tier, provider, model) {
+    return `${tier}|${provider}:${model}`;
+  }
+  _ensureArm(armKey) {
+    if (!this.arms.has(armKey)) {
+      this.arms.set(armKey, { A: _identity(this.dim), b: _zeros(this.dim), count: 0 });
+    }
+    return this.arms.get(armKey);
+  }
+  /**
+   * Pick an arm for a given tier and context.
+   * @param {string} tier
+   * @param {Array<{ provider: string, model: string }>} candidates — qualifying arms
+   * @param {number[]} context — feature vector
+   * @returns {{ provider, model, ucb, explored }} chosen arm
+   */
+  pick(tier, candidates, context) {
+    if (!candidates || candidates.length === 0) return null;
+    if (context.length !== this.dim) {
+      // Pad or truncate to dim
+      context = context.slice(0, this.dim);
+      while (context.length < this.dim) context.push(0);
+    }
+    // ε-greedy: 5% pure exploration
+    if (Math.random() < EXPLORATION_RATE) {
+      const random = candidates[Math.floor(Math.random() * candidates.length)];
+      return { ...random, ucb: null, explored: true };
+    }
+    let best = null;
+    let bestUcb = -Infinity;
+    for (const c of candidates) {
+      const key = this._armKey(tier, c.provider, c.model);
+      const arm = this._ensureArm(key);
+      let Ainv;
+      try {
+        Ainv = _inv(arm.A);
+      } catch (err) {
+        continue;
+      }
+      const theta = _matVec(Ainv, arm.b);
+      const mean = _dot(theta, context);
+      const variance = _dot(context, _matVec(Ainv, context));
+      const ucb = mean + this.alpha * Math.sqrt(Math.max(0, variance));
+      if (ucb > bestUcb) {
+        bestUcb = ucb;
+        best = { ...c, ucb, explored: false };
+      }
+    }
+    return best;
+  }
+  /**
+   * Update the chosen arm with the observed reward.
+   * @param {string} tier
+   * @param {string} provider
+   * @param {string} model
+   * @param {number[]} context
+   * @param {number} reward — typically in [0, 100]; will be rescaled to [0, 1] internally
+   */
+  update(tier, provider, model, context, reward) {
+    const key = this._armKey(tier, provider, model);
+    const arm = this._ensureArm(key);
+    let ctx = context;
+    if (ctx.length !== this.dim) {
+      ctx = ctx.slice(0, this.dim);
+      while (ctx.length < this.dim) ctx.push(0);
+    }
+    const r = Math.max(0, Math.min(1, reward / 100));
+    _addMat(arm.A, _outer(ctx, ctx));
+    _addVec(arm.b, ctx.map(x => x * r));
+    arm.count++;
+    this.steps++;
+    // Save periodically (not every step to limit IO)
+    if (this.steps % 25 === 0) this._save();
+  }
+  _save() {
+    try {
+      fs.mkdirSync(path.dirname(STATE_PATH), { recursive: true });
+      const arms = {};
+      for (const [k, v] of this.arms) arms[k] = v;
+      fs.writeFileSync(STATE_PATH, JSON.stringify({
+        savedAt: Date.now(),
+        steps: this.steps,
+        alpha: this.alpha,
+        lambda: this.lambda,
+        mu: this.mu,
+        dim: this.dim,
+        arms,
+      }, null, 0));
+    } catch (err) {
+      logger.debug({ err: err.message }, '[Bandit] State save failed');
+    }
+  }
+  _load() {
+    try {
+      if (!fs.existsSync(STATE_PATH)) return;
+      const raw = JSON.parse(fs.readFileSync(STATE_PATH, 'utf8'));
+      if (raw.dim && raw.dim === this.dim) {
+        for (const [k, v] of Object.entries(raw.arms || {})) {
+          this.arms.set(k, v);
+        }
+        this.steps = raw.steps || 0;
+        logger.info({ arms: this.arms.size, steps: this.steps }, '[Bandit] State loaded');
+      }
+    } catch (err) {
+      logger.debug({ err: err.message }, '[Bandit] State load failed');
+    }
+  }
+  getStats() {
+    const armStats = {};
+    for (const [k, v] of this.arms) {
+      armStats[k] = { count: v.count };
+    }
+    return { steps: this.steps, arms: armStats, alpha: this.alpha };
+  }
+}
+let _instance = null;
+function getBandit() {
+  if (!_instance) _instance = new LinUCBBandit();
+  return _instance;
+}
+module.exports = { LinUCBBandit, getBandit, FEATURE_DIM };

package/src/routing/cascade.js ADDED Viewed

@@ -0,0 +1,106 @@
+/**
+ * Small-first cascade with confidence-based deferral (Phase 3.3).
+ *
+ * For tier-MEDIUM/COMPLEX requests, optionally try a smaller model first.
+ * If the response confidence (from confidence-scorer) ≥ threshold, accept it.
+ * Otherwise, escalate to the originally-routed tier model.
+ *
+ * Off by default for streaming (can't retry mid-stream cleanly).
+ * Opt-in via LYNKR_CASCADE_ENABLED=true.
+ */
+const logger = require('../logger');
+const confidenceScorer = require('./confidence-scorer');
+const DEFAULT_THRESHOLD = 0.85;
+const TIERS_ELIGIBLE = ['MEDIUM', 'COMPLEX'];
+function isEnabled() {
+  return process.env.LYNKR_CASCADE_ENABLED === 'true';
+}
+/**
+ * @param {object} args
+ * @param {string} args.tier — the originally selected tier
+ * @param {boolean} args.streaming — true if the request is streaming
+ * @param {boolean} args.hasTools — true if tools are present
+ * @returns {boolean}
+ */
+function shouldCascade(args) {
+  if (!isEnabled()) return false;
+  if (args.streaming) return false; // streaming responses can't be retried cleanly
+  if (args.hasTools) return false; // tool calls have side effects; don't double-run
+  if (!TIERS_ELIGIBLE.includes(args.tier)) return false;
+  return true;
+}
+/**
+ * Run a small-first cascade.
+ *
+ * @param {object} args
+ * @param {object} args.payload — the request payload
+ * @param {object} args.smallModel — { provider, model }
+ * @param {object} args.bigModel — { provider, model }
+ * @param {function} args.invoke — async (provider, model, payload) → response
+ * @param {string} args.taskType — used by confidence scorer
+ * @param {number} args.threshold — confidence threshold, defaults to 0.85
+ * @param {function} args.judge — optional judge LLM for reasoning tasks
+ * @returns {Promise<{ response, usedModel, cascadeStats }>}
+ */
+async function run(args) {
+  const threshold = args.threshold ?? DEFAULT_THRESHOLD;
+  const start = Date.now();
+  let smallLatency = 0;
+  let bigLatency = 0;
+  // Try small model
+  let smallResponse;
+  try {
+    const t0 = Date.now();
+    smallResponse = await args.invoke(args.smallModel.provider, args.smallModel.model, args.payload);
+    smallLatency = Date.now() - t0;
+  } catch (err) {
+    logger.debug({ err: err.message }, '[Cascade] Small model failed, escalating');
+    const t0 = Date.now();
+    const bigResponse = await args.invoke(args.bigModel.provider, args.bigModel.model, args.payload);
+    bigLatency = Date.now() - t0;
+    return {
+      response: bigResponse,
+      usedModel: args.bigModel,
+      cascadeStats: { accepted: false, reason: 'small_failed', smallLatency, bigLatency, totalLatency: Date.now() - start },
+    };
+  }
+  const confidence = await confidenceScorer.score(smallResponse, {
+    taskType: args.taskType,
+    question: args.payload?.messages?.[args.payload.messages.length - 1]?.content,
+    judge: args.judge,
+  });
+  if (confidence >= threshold) {
+    return {
+      response: smallResponse,
+      usedModel: args.smallModel,
+      cascadeStats: { accepted: true, confidence, smallLatency, bigLatency: 0, totalLatency: Date.now() - start },
+    };
+  }
+  // Escalate
+  const t0 = Date.now();
+  const bigResponse = await args.invoke(args.bigModel.provider, args.bigModel.model, args.payload);
+  bigLatency = Date.now() - t0;
+  return {
+    response: bigResponse,
+    usedModel: args.bigModel,
+    cascadeStats: {
+      accepted: false,
+      confidence,
+      threshold,
+      smallLatency,
+      bigLatency,
+      totalLatency: Date.now() - start,
+    },
+  };
+}
+module.exports = { run, shouldCascade, isEnabled, DEFAULT_THRESHOLD };

package/src/routing/complexity-analyzer.js CHANGED Viewed

@@ -395,24 +395,16 @@ function extractContent(payload) {
 }
 /**
- * Estimate token count (rough approximation)
+ * Estimate token count.
+ *
+ * Phase 1.1: delegates to the tiktoken-backed tokenizer (graceful fallback to
+ * chars/4 if js-tiktoken is unavailable).
  */
+const { countPayloadTokens } = require('./tokenizer');
 function estimateTokens(payload) {
   if (!payload?.messages) return 0;
-  let totalChars = 0;
-  for (const msg of payload.messages) {
-    if (typeof msg.content === 'string') {
-      totalChars += msg.content.length;
-    } else if (Array.isArray(msg.content)) {
-      for (const block of msg.content) {
-        if (block?.text) totalChars += block.text.length;
-      }
-    }
-  }
-  // Rough approximation: 4 chars per token
-  return Math.ceil(totalChars / 4);
+  return countPayloadTokens(payload, payload?.model);
 }
 /**