npm - lynkr - Versions diffs - 9.4.6 → 9.5.0 - Mend

lynkr 9.4.6 → 9.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/README.md +46 -14
package/install.sh +21 -5
package/package.json +4 -2
package/public/dashboard.html +13 -1
package/scripts/check-native.js +97 -0
package/src/clients/databricks.js +80 -3
package/src/clients/openrouter-utils.js +15 -0
package/src/config/index.js +9 -0
package/src/context/caveman.js +94 -0
package/src/context/tool-dedup.js +95 -0
package/src/context/tool-result-compressor.js +106 -0
package/src/dashboard/api.js +69 -18
package/src/orchestrator/bypass.js +135 -0
package/src/orchestrator/index.js +33 -2
package/src/routing/index.js +39 -0
package/src/routing/model-registry.js +89 -26
package/src/routing/risk-analyzer.js +7 -2
package/src/routing/session-affinity.js +96 -0
package/src/routing/telemetry.js +16 -3
package/.impeccable/live/config.json +0 -8

package/src/context/tool-dedup.js ADDED Viewed

@@ -0,0 +1,95 @@
+/**
+ * MCP-aware Tool Dedup
+ *
+ * Strips built-in tool definitions when an equivalent MCP tool is present in
+ * the request. Sending both wastes tool-schema tokens and gives the model
+ * redundant choices. Rule-based and deterministic.
+ *
+ * Example: if the Exa or Tavily MCP search tools are present, the built-in
+ * WebSearch/WebFetch tools are redundant and dropped.
+ *
+ * Ported from 9router's toolDeduper. Always on — purely removes redundant
+ * tool definitions, never adds.
+ *
+ * @module context/tool-dedup
+ */
+const logger = require("../logger");
+// Each rule: if any `triggers` tool is present, strip any tools matching
+// `strip`. Patterns may be exact strings or RegExp (matched against the name).
+const DEDUP_RULES = [
+  {
+    // Exa MCP present → drop built-in web tools (Exa is preferred).
+    triggers: ["mcp__exa__web_search_exa", "mcp__exa__web_fetch_exa"],
+    strip: ["WebSearch", "WebFetch", "web_search", "web_fetch", "mcp__workspace__web_fetch"],
+  },
+  {
+    // Tavily MCP present → drop built-in web tools.
+    triggers: ["mcp__tavily__tavily_search", "mcp__tavily__tavily_extract"],
+    strip: ["WebSearch", "WebFetch", "web_search", "web_fetch", "mcp__workspace__web_fetch"],
+  },
+  {
+    // Browser MCP present → drop a duplicate Chrome-connector tool family.
+    triggers: [/^mcp__browsermcp__/],
+    strip: [/^mcp__Claude_in_Chrome__/],
+  },
+];
+function getToolName(t) {
+  return t?.name || t?.function?.name || "";
+}
+function matches(name, pattern) {
+  if (typeof pattern === "string") return name === pattern;
+  return pattern instanceof RegExp ? pattern.test(name) : false;
+}
+/**
+ * Remove redundant built-in tools that are superseded by present MCP tools.
+ *
+ * @param {Array} tools - Tool definitions (Anthropic or OpenAI shape).
+ * @returns {{tools: Array, stripped: string[]}} filtered tools + names removed.
+ */
+function dedupeTools(tools) {
+  if (!Array.isArray(tools) || tools.length === 0) return { tools, stripped: [] };
+  const names = tools.map(getToolName);
+  const toStrip = new Set();
+  for (const rule of DEDUP_RULES) {
+    const hasTrigger = names.some((n) => rule.triggers.some((p) => matches(n, p)));
+    if (!hasTrigger) continue;
+    for (const n of names) {
+      // Never strip a tool that is itself a trigger.
+      if (rule.triggers.some((p) => matches(n, p))) continue;
+      if (rule.strip.some((p) => matches(n, p))) toStrip.add(n);
+    }
+  }
+  if (toStrip.size === 0) return { tools, stripped: [] };
+  const out = tools.filter((t) => !toStrip.has(getToolName(t)));
+  return { tools: out, stripped: Array.from(toStrip) };
+}
+/**
+ * Apply tool dedup to a payload in place. No-op when nothing is stripped.
+ *
+ * @param {object} payload - Request body with a `tools` array.
+ * @returns {string[]} names of stripped tools.
+ */
+function applyToolDedup(payload) {
+  if (!payload || !Array.isArray(payload.tools)) return [];
+  const { tools, stripped } = dedupeTools(payload.tools);
+  if (stripped.length > 0) {
+    payload.tools = tools;
+    logger.debug({ stripped }, "[ToolDedup] Stripped redundant built-in tools (MCP equivalents present)");
+  }
+  return stripped;
+}
+module.exports = {
+  dedupeTools,
+  applyToolDedup,
+};

package/src/context/tool-result-compressor.js CHANGED Viewed

@@ -455,6 +455,107 @@ function compressContainerOutput(text) {
   return `${header}\n${dataLines.slice(0, 10).join("\n")}\n... +${dataLines.length - 10} more (${dataLines.length} total)`;
 }
+// 11. Grep / ripgrep output ("file:lineno:content"), per-file match cap.
+// Ported from 9router RTK grep filter (rtk/src/cmds/system/pipe_cmd.rs).
+const GREP_PER_FILE_MAX = 10;
+function compressGrep(text) {
+  const byFile = new Map();
+  let total = 0;
+  for (const line of text.split("\n")) {
+    // splitn(3, ':') — only split on the first two colons.
+    const first = line.indexOf(":");
+    if (first === -1) continue;
+    const second = line.indexOf(":", first + 1);
+    if (second === -1) continue;
+    const file = line.slice(0, first);
+    const lineNumStr = line.slice(first + 1, second);
+    const content = line.slice(second + 1);
+    if (!/^\d+$/.test(lineNumStr)) continue;
+    total++;
+    if (!byFile.has(file)) byFile.set(file, []);
+    byFile.get(file).push([lineNumStr, content]);
+  }
+  // Require a meaningful number of matches so we don't mangle prose that
+  // happens to contain a "word:123:..." line.
+  if (total < 5) return null;
+  const files = Array.from(byFile.keys()).sort();
+  let out = `${total} matches in ${files.length}F:\n\n`;
+  for (const file of files) {
+    const matches = byFile.get(file);
+    out += `[file] ${file} (${matches.length}):\n`;
+    for (const [lineNum, content] of matches.slice(0, GREP_PER_FILE_MAX)) {
+      out += `  ${lineNum.padStart(4)}: ${content.trim()}\n`;
+    }
+    if (matches.length > GREP_PER_FILE_MAX) {
+      out += `  +${matches.length - GREP_PER_FILE_MAX}\n`;
+    }
+    out += "\n";
+  }
+  return out;
+}
+// 12. Generic log de-duplication: collapse consecutive duplicate lines and
+// runs of blank lines, with a hard line cap. Ported from 9router RTK dedupLog.
+const DEDUP_LINE_MAX = 2000;
+function compressDedupLog(text) {
+  const lines = text.split("\n");
+  const out = [];
+  let prev = null;
+  let runCount = 0;
+  let blankStreak = 0;
+  const flushRun = () => {
+    if (prev !== null && runCount > 1) {
+      out.push(`  ... (${runCount - 1} duplicate lines)`);
+    }
+  };
+  for (const line of lines) {
+    if (line.trim() === "") {
+      if (blankStreak < 1) out.push(line);
+      blankStreak += 1;
+      flushRun();
+      prev = null;
+      runCount = 0;
+      continue;
+    }
+    blankStreak = 0;
+    if (line === prev) {
+      runCount += 1;
+      continue;
+    }
+    flushRun();
+    out.push(line);
+    prev = line;
+    runCount = 1;
+    if (out.length >= DEDUP_LINE_MAX) {
+      out.push(`... (truncated at ${DEDUP_LINE_MAX} lines)`);
+      return out.join("\n");
+    }
+  }
+  flushRun();
+  return out.join("\n");
+}
+// 13. Last-resort generic truncation: keep head + tail lines, drop the middle.
+// Only kicks in for very long output no specific compressor matched.
+// Ported from 9router RTK smartTruncate.
+const SMART_TRUNCATE_HEAD = 120;
+const SMART_TRUNCATE_TAIL = 60;
+const SMART_TRUNCATE_MIN_LINES = 250;
+function compressSmartTruncate(text) {
+  const lines = text.split("\n");
+  if (lines.length < SMART_TRUNCATE_MIN_LINES) return null;
+  const head = lines.slice(0, SMART_TRUNCATE_HEAD);
+  const tail = lines.slice(lines.length - SMART_TRUNCATE_TAIL);
+  const cut = lines.length - head.length - tail.length;
+  return [...head, `... +${cut} lines truncated`, ...tail].join("\n");
+}
 // ── Compression Pipeline ─────────────────────────────────────────────
 const COMPRESSORS = [
@@ -466,8 +567,13 @@ const COMPRESSORS = [
   { name: "build_output", fn: compressBuildOutput },
   { name: "container_output", fn: compressContainerOutput },
   { name: "json_response", fn: compressJSON },
+  { name: "grep_output", fn: compressGrep },
   { name: "directory_listing", fn: compressDirectoryListing },
   { name: "large_file", fn: compressLargeFile },
+  // Generic fallbacks last: dedup exact-duplicate spam, then hard head/tail
+  // truncation only if nothing more specific applied.
+  { name: "dedup_log", fn: compressDedupLog },
+  { name: "smart_truncate", fn: compressSmartTruncate },
 ];
 // Compression levels tied to routing tiers

package/src/dashboard/api.js CHANGED Viewed

@@ -5,24 +5,74 @@ const metrics = require('../metrics');
 const { getMetricsCollector } = require('../observability/metrics');
 const { TIER_DEFINITIONS } = require('../routing/model-tiers');
-function getConfiguredProviders() {
+// Per-provider type + whether its credentials/endpoint are actually present.
+function providerMeta() {
   const c = config;
-  const providers = [];
-  const add = (name, type, ok) => ok && providers.push({ name, type });
-  add('databricks',     'cloud', c.databricks?.url && c.databricks?.apiKey);
-  add('azure-anthropic','cloud', c.azureAnthropic?.endpoint && c.azureAnthropic?.apiKey);
-  add('bedrock',        'cloud', c.bedrock?.apiKey);
-  add('openrouter',     'cloud', c.openrouter?.apiKey);
-  add('openai',         'cloud', c.openai?.apiKey);
-  add('azure-openai',   'cloud', c.azureOpenAI?.endpoint && c.azureOpenAI?.apiKey);
-  add('vertex',         'cloud', c.vertex?.projectId);
-  add('moonshot',       'cloud', c.moonshot?.apiKey);
-  add('ollama',         'local', c.ollama?.endpoint);
-  add('llamacpp',       'local', c.llamacpp?.endpoint);
-  add('lmstudio',       'local', c.lmstudio?.endpoint);
-  return providers;
+  return {
+    databricks:        { type: 'cloud', configured: !!(c.databricks?.url && c.databricks?.apiKey) },
+    'azure-anthropic': { type: 'cloud', configured: !!(c.azureAnthropic?.endpoint && c.azureAnthropic?.apiKey) },
+    bedrock:           { type: 'cloud', configured: !!c.bedrock?.apiKey },
+    openrouter:        { type: 'cloud', configured: !!c.openrouter?.apiKey },
+    openai:            { type: 'cloud', configured: !!c.openai?.apiKey },
+    'azure-openai':    { type: 'cloud', configured: !!(c.azureOpenAI?.endpoint && c.azureOpenAI?.apiKey) },
+    vertex:            { type: 'cloud', configured: !!c.vertex?.projectId },
+    moonshot:          { type: 'cloud', configured: !!c.moonshot?.apiKey },
+    ollama:            { type: 'local', configured: !!c.ollama?.endpoint },
+    llamacpp:          { type: 'local', configured: !!c.llamacpp?.endpoint },
+    lmstudio:          { type: 'local', configured: !!c.lmstudio?.endpoint },
+  };
+}
+// Providers the active routing config actually points at: the provider prefix
+// of each TIER_* value (format `provider:model[:variant]`) plus the base
+// MODEL_PROVIDER. Returns Map<providerName, tierLabels[]>.
+function getReferencedProviders() {
+  const refs = new Map();
+  const note = (provider, label) => {
+    const key = String(provider || '').trim().toLowerCase();
+    if (!key) return;
+    if (!refs.has(key)) refs.set(key, []);
+    if (label && !refs.get(key).includes(label)) refs.get(key).push(label);
+  };
+  const tiers = config.modelTiers || {};
+  for (const [tier, val] of Object.entries(tiers)) {
+    if (typeof val === 'string' && val.trim()) {
+      note(val.split(':')[0], tier);
+    }
+  }
+  note(config.modelProvider?.type, 'default');
+  return refs;
+}
+// Providers used by the routing config that have credentials/endpoints set.
+// Unknown providers (no metadata) are included optimistically since we can't
+// verify their credentials.
+function getConfiguredProviders() {
+  const meta = providerMeta();
+  const out = [];
+  for (const [name, tiers] of getReferencedProviders()) {
+    const m = meta[name];
+    if (!m || m.configured) {
+      out.push({ name, type: m?.type || 'cloud', tiers });
+    }
+  }
+  return out;
+}
+// Tiers pointing at a known provider whose credentials/endpoint are missing —
+// surfaced as a warning so a misconfigured tier is visible.
+function getProviderWarnings() {
+  const meta = providerMeta();
+  const out = [];
+  for (const [name, tiers] of getReferencedProviders()) {
+    const m = meta[name];
+    if (m && !m.configured) {
+      out.push({ name, type: m.type, tiers });
+    }
+  }
+  return out;
 }
 // Noise provider names injected by unit tests — filter them out of UI
@@ -92,7 +142,8 @@ function overview(req, res) {
     port:          config.port,
     version:       process.env.npm_package_version || '9.0.2',
     modelProvider: config.modelProvider?.type || 'unknown',
-    providers:     getConfiguredProviders(),
+    providers:        getConfiguredProviders(),
+    providerWarnings: getProviderWarnings(),
     statsWindow:   win.label,
     metrics: {
       requestsTotal:    snap.requestsTotal,

package/src/orchestrator/bypass.js ADDED Viewed

@@ -0,0 +1,135 @@
+/**
+ * Request Bypass
+ *
+ * Short-circuits Claude Code CLI housekeeping requests that don't need a real
+ * model call:
+ *   - "Warmup" pings the CLI sends to prime a connection
+ *   - Topic/title extraction (the CLI asks for {"isNewTopic":..,"title":..})
+ *   - Single-word "count" / "Warmup" probes
+ *
+ * Returning a canned response here saves a full provider round-trip (latency
+ * and tokens) on every session. Inspired by 9router's bypassHandler.
+ *
+ * Always on — only ever returns a canned response for unambiguous Claude CLI
+ * housekeeping traffic, never for real work.
+ *
+ * @module orchestrator/bypass
+ */
+const logger = require("../logger");
+/** Flatten Anthropic content (string | block[]) into plain text. */
+function getText(content) {
+  if (typeof content === "string") return content;
+  if (Array.isArray(content)) {
+    return content
+      .filter((b) => b && b.type === "text" && typeof b.text === "string")
+      .map((b) => b.text)
+      .join(" ");
+  }
+  return "";
+}
+/** Flatten the top-level Anthropic `system` field (string | block[]). */
+function getSystemText(system) {
+  if (typeof system === "string") return system;
+  if (Array.isArray(system)) {
+    return system
+      .filter((s) => s && s.type === "text" && typeof s.text === "string")
+      .map((s) => s.text)
+      .join(" ");
+  }
+  return "";
+}
+/**
+ * Decide whether a request is a bypassable Claude CLI housekeeping call.
+ *
+ * @param {object} args
+ * @param {object} args.payload - The Anthropic request body.
+ * @param {object} [args.headers] - Lowercased request headers.
+ * @returns {{kind: string, text: string}|null} bypass descriptor or null.
+ */
+function detectBypass({ payload, headers = {} }) {
+  if (!payload || !Array.isArray(payload.messages) || payload.messages.length === 0) {
+    return null;
+  }
+  // Only bypass Claude CLI traffic — other clients use these endpoints for
+  // real work and must never receive a canned response.
+  const ua = String(headers["user-agent"] || "").toLowerCase();
+  if (!ua.includes("claude-cli")) return null;
+  const messages = payload.messages;
+  const lastMsg = messages[messages.length - 1];
+  // Pattern 1: Title prefill — the CLI seeds an assistant turn with just "{"
+  // to coax a JSON object out of the model.
+  if (lastMsg?.role === "assistant") {
+    const firstBlockText =
+      Array.isArray(lastMsg.content) && lastMsg.content[0]?.type === "text"
+        ? lastMsg.content[0].text
+        : typeof lastMsg.content === "string"
+          ? lastMsg.content
+          : "";
+    if (firstBlockText.trim() === "{") {
+      return { kind: "title_prefill", text: "{}" };
+    }
+  }
+  // Pattern 2: Topic/title extraction — system prompt asks for isNewTopic.
+  // Synthesize a title from the first user message instead of calling a model.
+  const systemText = getSystemText(payload.system);
+  if (systemText.includes("isNewTopic")) {
+    const userMsg = messages.find((m) => m.role === "user");
+    const userText = getText(userMsg?.content).trim();
+    const title = userText.split(/\s+/).filter(Boolean).slice(0, 3).join(" ");
+    return {
+      kind: "title_extraction",
+      text: JSON.stringify({ isNewTopic: true, title }),
+    };
+  }
+  // Pattern 3: Warmup / count probes — a single short user message.
+  if (messages.length === 1 && messages[0]?.role === "user") {
+    const firstText = getText(messages[0].content).trim();
+    if (firstText === "Warmup" || firstText === "count") {
+      return { kind: firstText.toLowerCase(), text: "OK" };
+    }
+  }
+  return null;
+}
+/**
+ * Build the processMessage-shaped response for a bypass descriptor.
+ * Matches the `{ status, body, terminationReason }` contract the router
+ * consumes (same shape as the prompt-cache early returns).
+ *
+ * @param {{kind: string, text: string}} bypass
+ * @param {string} model - Model id to echo back.
+ * @returns {{status: number, body: object, terminationReason: string}}
+ */
+function buildBypassResponse(bypass, model) {
+  logger.info({ kind: bypass.kind }, "[Bypass] Short-circuiting CLI housekeeping request");
+  return {
+    status: 200,
+    body: {
+      id: `msg_bypass_${Date.now()}`,
+      type: "message",
+      role: "assistant",
+      content: [{ type: "text", text: bypass.text }],
+      model: model || "claude-3-unknown",
+      stop_reason: "end_turn",
+      stop_sequence: null,
+      usage: { input_tokens: 1, output_tokens: 1 },
+      lynkr_bypass: { kind: bypass.kind },
+    },
+    terminationReason: `bypass_${bypass.kind}`,
+  };
+}
+module.exports = {
+  detectBypass,
+  buildBypassResponse,
+};

package/src/orchestrator/index.js CHANGED Viewed

@@ -18,6 +18,7 @@ const { createAuditLogger } = require("../logger/audit-logger");
 const { getResolvedIp, runWithDnsContext } = require("../clients/dns-logger");
 const { getShuttingDown } = require("../api/health");
 const { tryPreflight, buildSatisfiedResponse: buildPreflightResponse } = require("./preflight");
+const { detectBypass, buildBypassResponse } = require("./bypass");
 const crypto = require("crypto");
 const { asyncClone, asyncTransform, getPoolStats } = require("../workers/helpers");
 const { getSemanticCache, isSemanticCacheEnabled } = require("../cache/semantic");
@@ -1362,8 +1363,12 @@ function sanitizePayload(payload) {
     delete clean.tool_choice;
   }
-  // Smart tool selection (universal, applies to all providers)
-  if (config.smartToolSelection?.enabled && Array.isArray(clean.tools) && clean.tools.length > 0) {
+  // Smart tool selection (server mode only). In client/passthrough mode the
+  // client (e.g. Claude Code) owns tool execution, so stripping its tools would
+  // make the model emit calls for tools we removed — they then get dropped as
+  // "hallucinated" and the session makes no progress. Pass tools through intact.
+  const inClientMode = config.toolExecutionMode === "client" || config.toolExecutionMode === "passthrough";
+  if (!inClientMode && config.smartToolSelection?.enabled && Array.isArray(clean.tools) && clean.tools.length > 0) {
     const classification = classifyRequestType(clean);
     const selectedTools = selectToolsSmartly(clean.tools, classification, {
       provider: providerType,
@@ -1977,6 +1982,12 @@ IMPORTANT TOOL USAGE RULES:
     cleanPayload._tenantPolicy = options.tenantPolicy;
   }
+  // Thread session id for provider affinity — keeps a tool-bearing
+  // conversation on one provider so tool_call_id linkage doesn't break.
+  if (session?.id) {
+    cleanPayload._sessionId = session.id;
+  }
   // RTK-inspired tool result compression: compress large tool_results
   // before they reach the model (saves 60-90% on test/git/lint output)
   if (config.toolResultCompression?.enabled !== false) {
@@ -1985,6 +1996,18 @@ IMPORTANT TOOL USAGE RULES:
     compressToolResults(cleanPayload.messages, { tier });
   }
+  // MCP-aware tool dedup: drop built-in tools superseded by present MCP tools
+  // (e.g. WebSearch/WebFetch when Exa/Tavily MCP is available). Always on.
+  const { applyToolDedup } = require("../context/tool-dedup");
+  applyToolDedup(cleanPayload);
+  // Caveman terse-output injection (opt-in): nudge the model toward shorter
+  // responses to reduce output tokens.
+  if (config.caveman?.enabled === true) {
+    const { injectCaveman } = require("../context/caveman");
+    cleanPayload.system = injectCaveman(cleanPayload.system);
+  }
   if (agentTimer) agentTimer.mark("preInvokeModel");
   let databricksResponse;
   try {
@@ -3735,6 +3758,14 @@ async function processMessage({ payload, headers, session, cwd, options = {} })
     };
   }
+  // === REQUEST BYPASS ===
+  // Claude CLI housekeeping (Warmup pings, topic/title extraction) doesn't
+  // need a model call — return a canned response and skip the provider.
+  const bypass = detectBypass({ payload, headers });
+  if (bypass) {
+    return buildBypassResponse(bypass, requestedModel);
+  }
   // === PREFLIGHT CHECK ===
   // If the request supplied preflight_commands and they all pass in
   // the workspace, the work is already done — short-circuit with a

package/src/routing/index.js CHANGED Viewed

@@ -138,7 +138,46 @@ function getBestLocalProvider() {
  * @param {Object} options - Routing options
  * @returns {Object} Routing decision with provider and metadata
  */
+const sessionAffinity = require('./session-affinity');
+/**
+ * Provider routing with session affinity.
+ *
+ * When a conversation already carries tool history, reuse the provider the
+ * session first routed to so tool-call IDs don't break across providers.
+ * Fresh turns route normally and refresh the session's pinned provider.
+ */
 async function determineProviderSmart(payload, options = {}) {
+  const sessionId = payload?._sessionId || null;
+  // Enforce affinity only for in-flight tool exchanges — the turns that 400
+  // if the provider changes. Fresh turns keep full per-turn tier routing.
+  if (sessionId && !options.forceProvider && sessionAffinity.payloadHasToolHistory(payload)) {
+    const pinned = sessionAffinity.getPinned(sessionId);
+    if (pinned) {
+      logger.debug({ sessionId, provider: pinned.provider, tier: pinned.tier },
+        '[Routing] Session affinity — reusing provider for tool-bearing turn');
+      return {
+        provider: pinned.provider,
+        model: pinned.model,
+        tier: pinned.tier,
+        method: 'session_affinity',
+        reason: 'tool_history_provider_pin',
+      };
+    }
+  }
+  const decision = await _determineProviderSmartInner(payload, options);
+  // Remember the chosen provider so later tool-bearing turns stay consistent.
+  if (sessionId && decision?.provider && !options.forceProvider) {
+    sessionAffinity.setPinned(sessionId, decision);
+  }
+  return decision;
+}
+async function _determineProviderSmartInner(payload, options = {}) {
   const primaryProvider = config.modelProvider?.type ?? 'databricks';
   // Risk analysis runs orthogonally to complexity. We compute it once