npm - lynkr - Versions diffs - 3.0.0 → 3.2.0 - Mend

lynkr 3.0.0 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

package/LICENSE +201 -21
package/README.md +626 -145
package/docs/index.md +150 -18
package/install.sh +63 -16
package/package.json +2 -2
package/scripts/setup.js +117 -43
package/src/api/router.js +78 -0
package/src/clients/openrouter-utils.js +51 -7
package/src/config/index.js +51 -0
package/src/context/budget.js +326 -0
package/src/context/compression.js +397 -0
package/src/memory/format.js +156 -0
package/src/memory/retriever.js +55 -14
package/src/memory/search.js +36 -12
package/src/memory/store.js +61 -13
package/src/memory/surprise.js +56 -15
package/src/orchestrator/index.js +189 -2
package/src/prompts/system.js +320 -0
package/src/tools/index.js +9 -0
package/src/tools/smart-selection.js +356 -0
package/src/tools/truncate.js +105 -0
package/src/utils/tokens.js +217 -0
package/test/llamacpp-integration.test.js +198 -0
package/test/memory/extractor.test.js +34 -6
package/test/memory/retriever.test.js +45 -15
package/test/memory/retriever.test.js.bak +585 -0
package/test/memory/search.test.js +160 -12
package/test/memory/search.test.js.bak +389 -0
package/test/memory/store.test.js +57 -25
package/test/memory/store.test.js.bak +312 -0
package/test/memory/surprise.test.js +1 -1

package/src/api/router.js CHANGED Viewed

@@ -305,4 +305,82 @@ router.get("/v1/agents/:executionId", (req, res) => {
   }
 });
+// Token usage statistics for a session
+router.get("/api/sessions/:sessionId/tokens", (req, res) => {
+  try {
+    const tokens = require("../utils/tokens");
+    const { sessionId } = req.params;
+    const session = getSession(sessionId);
+    if (!session) {
+      return res.status(404).json({ error: "Session not found" });
+    }
+    const stats = tokens.getSessionTokenStats(session);
+    res.json({
+      sessionId,
+      stats: {
+        turns: stats.turns,
+        totalTokens: stats.totalTokens,
+        totalCost: parseFloat(stats.totalCost.toFixed(4)),
+        averageTokensPerTurn: stats.averageTokensPerTurn,
+        cacheHitRate: parseFloat(stats.cacheHitRate) + '%'
+      },
+      breakdown: stats.breakdown.map(turn => ({
+        turn: turn.turn,
+        timestamp: turn.timestamp,
+        model: turn.model,
+        estimated: turn.estimated.total,
+        actual: {
+          input: turn.actual.inputTokens,
+          output: turn.actual.outputTokens,
+          cached: turn.actual.cacheReadTokens,
+          total: turn.actual.totalTokens
+        },
+        cost: parseFloat(turn.cost.total.toFixed(6))
+      }))
+    });
+  } catch (error) {
+    res.status(500).json({ error: error.message });
+  }
+});
+// Global token usage statistics (all sessions)
+router.get("/api/tokens/stats", (req, res) => {
+  try {
+    const tokens = require("../utils/tokens");
+    const { getAllSessions } = require("../sessions");
+    const allSessions = getAllSessions();
+    let totalTokens = 0;
+    let totalCost = 0;
+    let totalTurns = 0;
+    let totalSessions = 0;
+    for (const session of allSessions) {
+      const stats = tokens.getSessionTokenStats(session);
+      if (stats.turns > 0) {
+        totalTokens += stats.totalTokens;
+        totalCost += stats.totalCost;
+        totalTurns += stats.turns;
+        totalSessions++;
+      }
+    }
+    res.json({
+      global: {
+        sessions: totalSessions,
+        turns: totalTurns,
+        totalTokens,
+        totalCost: parseFloat(totalCost.toFixed(4)),
+        averageTokensPerTurn: totalTurns > 0 ? Math.round(totalTokens / totalTurns) : 0,
+        averageTokensPerSession: totalSessions > 0 ? Math.round(totalTokens / totalSessions) : 0
+      }
+    });
+  } catch (error) {
+    res.status(500).json({ error: error.message });
+  }
+});
 module.exports = router;

package/src/clients/openrouter-utils.js CHANGED Viewed

@@ -244,16 +244,60 @@ function convertOpenRouterResponseToAnthropic(openRouterResponse, requestedModel
   const message = choice.message || {};
   const contentBlocks = [];
-  // Add text content if present
+  // Check if there are tool calls present
+  const hasToolCalls = Array.isArray(message.tool_calls) && message.tool_calls.length > 0;
+  // Helper function to detect if content is a JSON representation of a tool call
+  // Some models (like llama.cpp) may output tool calls in both content AND tool_calls
+  const isToolCallJson = (text) => {
+    if (!text) return false;
+    const trimmed = text.trim();
+    // Check if it looks like a JSON object containing tool/function call
+    // Matches various formats:
+    // - {"type": "function", "function": {"name": "X", "parameters": {...}}}
+    // - {"function": "X", "parameters": {...}}
+    // - {"tool": "X", "input": {...}}
+    return (trimmed.startsWith('{') || trimmed.startsWith('[')) &&
+           (trimmed.includes('"function"') || trimmed.includes('"tool"') ||
+            (trimmed.includes('"type"') && trimmed.includes('"parameters"'))) &&
+           (trimmed.includes('"parameters"') || trimmed.includes('"input"') ||
+            trimmed.includes('"arguments"'));
+  };
+  // Add text content if present, but skip if it's a duplicate/malformed tool call JSON
   if (message.content && message.content.trim()) {
-    contentBlocks.push({
-      type: "text",
-      text: message.content
-    });
+    const looksLikeToolJson = isToolCallJson(message.content);
+    // Skip content in two cases:
+    // 1. We have proper tool_calls AND content duplicates them (original fix)
+    // 2. Content looks like tool call JSON but we DON'T have tool_calls
+    //    (model incorrectly output JSON instead of structured tool_calls)
+    if (looksLikeToolJson) {
+      if (hasToolCalls) {
+        // Case 1: Duplicate - model provided both content and tool_calls
+        logger.debug({
+          contentPreview: message.content.substring(0, 100),
+          toolCallCount: message.tool_calls.length
+        }, "Skipping text content that duplicates tool_calls (llama.cpp quirk)");
+      } else {
+        // Case 2: Malformed - model only provided JSON in content, not structured tool_calls
+        // This is a model error - it should have used tool_calls, not raw JSON
+        logger.warn({
+          contentPreview: message.content.substring(0, 200)
+        }, "Model output tool call as JSON text instead of structured tool_calls - filtering out malformed output");
+      }
+      // Skip this content block in both cases
+    } else {
+      // Normal text content - include it
+      contentBlocks.push({
+        type: "text",
+        text: message.content
+      });
+    }
   }
   // Add tool calls if present
-  if (Array.isArray(message.tool_calls) && message.tool_calls.length > 0) {
+  if (hasToolCalls) {
     for (const toolCall of message.tool_calls) {
       const func = toolCall.function || {};
       let input = {};
@@ -288,7 +332,7 @@ function convertOpenRouterResponseToAnthropic(openRouterResponse, requestedModel
   // Determine stop reason
   let stopReason = "end_turn";
-  if (message.tool_calls && message.tool_calls.length > 0) {
+  if (hasToolCalls) {
     stopReason = "tool_use";
   } else if (choice.finish_reason === "length") {
     stopReason = "max_tokens";

package/src/config/index.js CHANGED Viewed

@@ -135,6 +135,28 @@ const memoryExtractionEnabled = process.env.MEMORY_EXTRACTION_ENABLED !== "false
 const memoryDecayEnabled = process.env.MEMORY_DECAY_ENABLED !== "false"; // default true
 const memoryDecayHalfLifeDays = Number.parseInt(process.env.MEMORY_DECAY_HALF_LIFE ?? "30", 10);
+// Token optimization settings
+const tokenTrackingEnabled = process.env.TOKEN_TRACKING_ENABLED !== "false"; // default true
+const toolTruncationEnabled = process.env.TOOL_TRUNCATION_ENABLED !== "false"; // default true
+const memoryFormat = (process.env.MEMORY_FORMAT ?? "compact").toLowerCase();
+const memoryDedupEnabled = process.env.MEMORY_DEDUP_ENABLED !== "false"; // default true
+const memoryDedupLookback = Number.parseInt(process.env.MEMORY_DEDUP_LOOKBACK ?? "5", 10);
+const systemPromptMode = (process.env.SYSTEM_PROMPT_MODE ?? "dynamic").toLowerCase();
+const toolDescriptions = (process.env.TOOL_DESCRIPTIONS ?? "minimal").toLowerCase();
+const historyCompressionEnabled = process.env.HISTORY_COMPRESSION_ENABLED !== "false"; // default true
+const historyKeepRecentTurns = Number.parseInt(process.env.HISTORY_KEEP_RECENT_TURNS ?? "10", 10);
+const historySummarizeOlder = process.env.HISTORY_SUMMARIZE_OLDER !== "false"; // default true
+const tokenBudgetWarning = Number.parseInt(process.env.TOKEN_BUDGET_WARNING ?? "100000", 10);
+const tokenBudgetMax = Number.parseInt(process.env.TOKEN_BUDGET_MAX ?? "180000", 10);
+const tokenBudgetEnforcement = process.env.TOKEN_BUDGET_ENFORCEMENT !== "false"; // default true
+// Smart tool selection configuration (always enabled)
+const smartToolSelectionMode = (process.env.SMART_TOOL_SELECTION_MODE ?? "heuristic").toLowerCase();
+const smartToolSelectionTokenBudget = Number.parseInt(
+  process.env.SMART_TOOL_SELECTION_TOKEN_BUDGET ?? "2500",
+  10
+);
 // Only require Databricks credentials if it's the primary provider or used as fallback
 if (modelProvider === "databricks" && (!rawBaseUrl || !apiKey)) {
   throw new Error("Set DATABRICKS_API_BASE and DATABRICKS_API_KEY before starting the proxy.");
@@ -531,6 +553,9 @@ const config = {
     injectionFormat: ["system", "assistant_preamble"].includes(memoryInjectionFormat)
       ? memoryInjectionFormat
       : "system",
+    format: memoryFormat,
+    dedupEnabled: memoryDedupEnabled,
+    dedupLookback: memoryDedupLookback,
     extraction: {
       enabled: memoryExtractionEnabled,
     },
@@ -539,6 +564,32 @@ const config = {
       halfLifeDays: Number.isNaN(memoryDecayHalfLifeDays) ? 30 : memoryDecayHalfLifeDays,
     },
   },
+  tokenTracking: {
+    enabled: tokenTrackingEnabled,
+  },
+  toolTruncation: {
+    enabled: toolTruncationEnabled,
+  },
+  systemPrompt: {
+    mode: systemPromptMode,
+    toolDescriptions: toolDescriptions,
+  },
+  historyCompression: {
+    enabled: historyCompressionEnabled,
+    keepRecentTurns: historyKeepRecentTurns,
+    summarizeOlder: historySummarizeOlder,
+  },
+  tokenBudget: {
+    warning: tokenBudgetWarning,
+    max: tokenBudgetMax,
+    enforcement: tokenBudgetEnforcement,
+  },
+  smartToolSelection: {
+    enabled: true,  // HARDCODED - always enabled
+    mode: smartToolSelectionMode,
+    tokenBudget: smartToolSelectionTokenBudget,
+    minimalMode: false,  // HARDCODED - disabled
+  },
 };
 module.exports = config;

package/src/context/budget.js ADDED Viewed

@@ -0,0 +1,326 @@
+/**
+ * Token Budget Management
+ *
+ * Enforces token budgets and applies adaptive compression
+ * when payloads approach or exceed limits.
+ *
+ */
+const logger = require('../logger');
+const config = require('../config');
+const tokens = require('../utils/tokens');
+const historyCompression = require('./compression');
+const systemPrompt = require('../prompts/system');
+/**
+ * Check if payload exceeds token budget
+ *
+ * @param {Object} payload - Request payload
+ * @param {number} warningThreshold - Warning threshold (tokens)
+ * @param {number} maxThreshold - Maximum threshold (tokens)
+ * @returns {Object} Budget check result
+ */
+function checkBudget(payload, warningThreshold = null, maxThreshold = null) {
+  warningThreshold = warningThreshold ?? config.tokenBudget?.warning ?? 100000;
+  maxThreshold = maxThreshold ?? config.tokenBudget?.max ?? 180000;
+  const estimated = tokens.countPayloadTokens(payload);
+  const totalTokens = estimated.total;
+  const warningLevel = totalTokens / warningThreshold;
+  const maxLevel = totalTokens / maxThreshold;
+  return {
+    estimated,
+    totalTokens,
+    warningThreshold,
+    maxThreshold,
+    atWarning: totalTokens >= warningThreshold,
+    overMax: totalTokens >= maxThreshold,
+    warningLevel: warningLevel.toFixed(2),
+    maxLevel: maxLevel.toFixed(2),
+    needsCompression: totalTokens >= warningThreshold
+  };
+}
+/**
+ * Enforce token budget with adaptive compression
+ *
+ * Applies progressively aggressive compression strategies
+ * until payload fits within budget.
+ *
+ * @param {Object} payload - Request payload
+ * @param {Object} options - Budget options
+ * @returns {Object} Optimized payload and statistics
+ */
+function enforceBudget(payload, options = {}) {
+  const opts = {
+    warningThreshold: options.warningThreshold ?? config.tokenBudget?.warning ?? 100000,
+    maxThreshold: options.maxThreshold ?? config.tokenBudget?.max ?? 180000,
+    enforcement: options.enforcement ?? config.tokenBudget?.enforcement ?? true,
+  };
+  if (!opts.enforcement) {
+    return { payload, compressed: false, strategy: 'none' };
+  }
+  const initialCheck = checkBudget(payload, opts.warningThreshold, opts.maxThreshold);
+  if (!initialCheck.needsCompression) {
+    return {
+      payload,
+      compressed: false,
+      strategy: 'none',
+      budget: initialCheck
+    };
+  }
+  // Clone payload to avoid modifying original
+  let optimized = JSON.parse(JSON.stringify(payload));
+  let strategy = [];
+  logger.info({
+    initialTokens: initialCheck.totalTokens,
+    warningThreshold: opts.warningThreshold,
+    maxThreshold: opts.maxThreshold,
+    overBudget: initialCheck.totalTokens - opts.maxThreshold
+  }, 'Token budget exceeded, applying adaptive compression');
+  // Strategy 1: Compress history more aggressively
+  if (optimized.messages && optimized.messages.length > 10) {
+    const originalMessages = optimized.messages;
+    optimized.messages = historyCompression.compressHistory(originalMessages, {
+      keepRecentTurns: 5, // More aggressive: keep only 5 recent
+      summarizeOlder: true,
+      enabled: true
+    });
+    strategy.push('aggressive_history_compression');
+    const afterHistory = checkBudget(optimized, opts.warningThreshold, opts.maxThreshold);
+    if (!afterHistory.overMax) {
+      return finalizeBudgetEnforcement(payload, optimized, strategy, initialCheck, afterHistory);
+    }
+  }
+  // Strategy 2: Further compress history (keep only 3 turns)
+  if (optimized.messages && optimized.messages.length > 5) {
+    const originalMessages = optimized.messages;
+    optimized.messages = historyCompression.compressHistory(originalMessages, {
+      keepRecentTurns: 3, // Very aggressive: keep only 3
+      summarizeOlder: true,
+      enabled: true
+    });
+    strategy.push('extreme_history_compression');
+    const afterExtreme = checkBudget(optimized, opts.warningThreshold, opts.maxThreshold);
+    if (!afterExtreme.overMax) {
+      return finalizeBudgetEnforcement(payload, optimized, strategy, initialCheck, afterExtreme);
+    }
+  }
+  // Strategy 3: Compress system prompt aggressively
+  if (optimized.system) {
+    const originalSystem = optimized.system;
+    optimized.system = compressSystemPromptAggressively(originalSystem, optimized);
+    strategy.push('aggressive_system_compression');
+    const afterSystem = checkBudget(optimized, opts.warningThreshold, opts.maxThreshold);
+    if (!afterSystem.overMax) {
+      return finalizeBudgetEnforcement(payload, optimized, strategy, initialCheck, afterSystem);
+    }
+  }
+  // Strategy 4: Remove tool descriptions entirely (keep only names/schemas)
+  if (optimized.tools && optimized.tools.length > 0) {
+    optimized.tools = optimized.tools.map(tool => ({
+      name: tool.name,
+      input_schema: tool.input_schema
+      // Remove description entirely
+    }));
+    strategy.push('remove_tool_descriptions');
+    const afterTools = checkBudget(optimized, opts.warningThreshold, opts.maxThreshold);
+    if (!afterTools.overMax) {
+      return finalizeBudgetEnforcement(payload, optimized, strategy, initialCheck, afterTools);
+    }
+  }
+  // Strategy 5: Reduce tools to essential only
+  if (optimized.tools && optimized.tools.length > 5) {
+    const essentialTools = ['Read', 'Write', 'Edit', 'Bash', 'Grep'];
+    optimized.tools = optimized.tools.filter(t => essentialTools.includes(t.name));
+    strategy.push('reduce_to_essential_tools');
+    const afterToolReduction = checkBudget(optimized, opts.warningThreshold, opts.maxThreshold);
+    if (!afterToolReduction.overMax) {
+      return finalizeBudgetEnforcement(payload, optimized, strategy, initialCheck, afterToolReduction);
+    }
+  }
+  // Strategy 6: Last resort - truncate system prompt
+  if (optimized.system) {
+    const systemText = typeof optimized.system === 'string'
+      ? optimized.system
+      : systemPrompt.flattenBlocks(optimized.system);
+    optimized.system = systemText.substring(0, 5000) + '\n\n[System prompt truncated due to token budget]';
+    strategy.push('truncate_system_prompt');
+    const afterTruncate = checkBudget(optimized, opts.warningThreshold, opts.maxThreshold);
+    if (!afterTruncate.overMax) {
+      return finalizeBudgetEnforcement(payload, optimized, strategy, initialCheck, afterTruncate);
+    }
+  }
+  // Final check
+  const finalCheck = checkBudget(optimized, opts.warningThreshold, opts.maxThreshold);
+  if (finalCheck.overMax) {
+    logger.error({
+      initialTokens: initialCheck.totalTokens,
+      finalTokens: finalCheck.totalTokens,
+      maxThreshold: opts.maxThreshold,
+      strategiesApplied: strategy
+    }, 'Failed to compress payload within token budget');
+  }
+  return finalizeBudgetEnforcement(payload, optimized, strategy, initialCheck, finalCheck);
+}
+/**
+ * Finalize budget enforcement and return results
+ */
+function finalizeBudgetEnforcement(original, optimized, strategy, initialCheck, finalCheck) {
+  const saved = initialCheck.totalTokens - finalCheck.totalTokens;
+  const percentage = initialCheck.totalTokens > 0
+    ? ((saved / initialCheck.totalTokens) * 100).toFixed(1)
+    : '0.0';
+  logger.info({
+    strategiesApplied: strategy,
+    initialTokens: initialCheck.totalTokens,
+    finalTokens: finalCheck.totalTokens,
+    saved,
+    percentage,
+    nowWithinBudget: !finalCheck.overMax
+  }, 'Budget enforcement completed');
+  return {
+    payload: optimized,
+    compressed: true,
+    strategy: strategy.join(' -> '),
+    initialBudget: initialCheck,
+    finalBudget: finalCheck,
+    stats: {
+      initialTokens: initialCheck.totalTokens,
+      finalTokens: finalCheck.totalTokens,
+      saved,
+      percentage
+    }
+  };
+}
+/**
+ * Compress system prompt aggressively for budget enforcement
+ */
+function compressSystemPromptAggressively(systemPromptContent, payload) {
+  let text = typeof systemPromptContent === 'string'
+    ? systemPromptContent
+    : systemPrompt.flattenBlocks(systemPromptContent);
+  // Remove all examples
+  text = text.replace(/<example>[\s\S]*?<\/example>/g, '');
+  // Remove verbose sections
+  text = text.replace(/# (Background|Context|Examples|Notes|Tips|Guidelines)[\s\S]*?(?=\n#|\n\n[A-Z]|$)/gi, '');
+  // Remove excessive whitespace
+  text = text.replace(/\n{3,}/g, '\n\n');
+  text = text.replace(/[ \t]+\n/g, '\n');
+  // Remove comments
+  text = text.replace(/<!--[\s\S]*?-->/g, '');
+  return text;
+}
+/**
+ * Get budget allocation for different sections
+ *
+ * Helps prioritize token allocation across system/tools/messages
+ *
+ * @param {number} totalBudget - Total token budget
+ * @returns {Object} Allocation breakdown
+ */
+function getAllocation(totalBudget = 180000) {
+  return {
+    system: Math.floor(totalBudget * 0.15), // 15% for system prompt
+    tools: Math.floor(totalBudget * 0.10),  // 10% for tool definitions
+    messages: Math.floor(totalBudget * 0.60), // 60% for message history
+    output: Math.floor(totalBudget * 0.15),   // 15% reserved for output
+  };
+}
+/**
+ * Analyze budget usage breakdown
+ *
+ * @param {Object} payload - Request payload
+ * @returns {Object} Budget breakdown analysis
+ */
+function analyzeBudgetUsage(payload) {
+  const breakdown = tokens.countPayloadTokens(payload);
+  const total = breakdown.total;
+  const allocation = getAllocation(config.tokenBudget?.max ?? 180000);
+  return {
+    usage: breakdown,
+    allocation,
+    percentages: {
+      system: total > 0 ? ((breakdown.system / total) * 100).toFixed(1) : '0.0',
+      tools: total > 0 ? ((breakdown.tools / total) * 100).toFixed(1) : '0.0',
+      messages: total > 0 ? ((breakdown.messages / total) * 100).toFixed(1) : '0.0',
+    },
+    recommendations: generateRecommendations(breakdown, allocation)
+  };
+}
+/**
+ * Generate recommendations based on budget usage
+ */
+function generateRecommendations(breakdown, allocation) {
+  const recommendations = [];
+  if (breakdown.system > allocation.system) {
+    recommendations.push({
+      section: 'system',
+      issue: 'System prompt exceeds recommended allocation',
+      suggestion: 'Enable dynamic system prompts (SYSTEM_PROMPT_MODE=dynamic)'
+    });
+  }
+  if (breakdown.tools > allocation.tools) {
+    recommendations.push({
+      section: 'tools',
+      issue: 'Tool definitions exceed recommended allocation',
+      suggestion: 'Enable minimal tool descriptions (TOOL_DESCRIPTIONS=minimal)'
+    });
+  }
+  if (breakdown.messages > allocation.messages) {
+    recommendations.push({
+      section: 'messages',
+      issue: 'Message history exceeds recommended allocation',
+      suggestion: 'Enable history compression (HISTORY_COMPRESSION_ENABLED=true)'
+    });
+  }
+  return recommendations;
+}
+module.exports = {
+  checkBudget,
+  enforceBudget,
+  getAllocation,
+  analyzeBudgetUsage,
+};