npm - converse-mcp-server - Versions diffs - 1.3.2 → 1.3.4 - Mend

converse-mcp-server 1.3.2 → 1.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/docs/API.md +39 -0
package/package.json +1 -1
package/src/providers/anthropic.js +78 -20
package/src/providers/mistral.js +1 -1

package/docs/API.md CHANGED Viewed

@@ -291,6 +291,45 @@ MCP_TRANSPORT=stdio npm start
 | `grok-3` | `grok3` | 131K | 131K | Previous gen | Stable reasoning |
 | `grok-3-fast` | - | 131K | 131K | High perf | Faster processing |
+### Anthropic Models
+| Model | Alias | Context | Tokens | Features | Use Cases |
+|-------|-------|---------|--------|----------|-----------|
+| `claude-opus-4-20250514` | `opus-4`, `opus` | 200K | 32K | Extended thinking, images, caching | Complex reasoning tasks |
+| `claude-sonnet-4-20250514` | `sonnet-4`, `sonnet` | 200K | 64K | Extended thinking, images, caching | High performance, balanced |
+| `claude-3-7-sonnet-20250219` | `sonnet-3.7` | 200K | 64K | Extended thinking, images, caching | Enhanced 3.x generation |
+| `claude-3-5-sonnet-20241022` | `claude-3.5-sonnet` | 200K | 8K | Images, caching | Fast and intelligent |
+| `claude-3-5-haiku-20241022` | `haiku` | 200K | 8K | Caching | Fastest, simple queries |
+**Prompt Caching (Always Enabled):**
+- System prompts are automatically cached for 1 hour using Anthropic's prompt caching
+- Reduces latency and costs for repeated requests with the same system prompt
+- Minimum 1024 tokens required for caching (2048 for Haiku models)
+- Cache information available in response metadata: `cache_creation_input_tokens` and `cache_read_input_tokens`
+### DeepSeek Models
+| Model | Alias | Context | Tokens | Features | Use Cases |
+|-------|-------|---------|--------|----------|-----------|
+| `deepseek-v3` | `deepseek-chat`, `deepseek` | 128K | 64K | Latest model | General purpose AI |
+| `deepseek-coder-v2.5` | `deepseek-coder` | 128K | 16K | Code optimization | Programming tasks |
+### Mistral Models
+| Model | Alias | Context | Tokens | Features | Use Cases |
+|-------|-------|---------|--------|----------|-----------|
+| `magistral-medium-2506` | `magistral`, `magistral-medium` | 40K | 8K | Reasoning model | Complex reasoning |
+| `magistral-small-2506` | `magistral-small` | 40K | 8K | Small reasoning | Fast reasoning |
+| `mistral-medium-2505` | `mistral-medium`, `mistral` | 128K | 32K | Multimodal | General + images |
+### OpenRouter Models
+| Model | Alias | Context | Tokens | Features | Use Cases |
+|-------|-------|---------|--------|----------|-----------|
+| `kimi/k2` | `k2`, `kimi-k2` | 256K | 128K | Latest Kimi | Large context tasks |
+| `qwen/qwen-2.5-coder-32b-instruct` | `qwen-coder` | 32K | 32K | Code focus | Programming |
+| `qwen/qwq-32b-preview` | `qwen-thinking`, `qwq` | 32K | 32K | Reasoning | Step-by-step thinking |
 ### Model Selection
 Use `"auto"` for automatic selection or specify exact models:

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "converse-mcp-server",
-  "version": "1.3.2",
+  "version": "1.3.4",
   "description": "Converse MCP Server - Converse with other LLMs with chat and consensus tools",
   "type": "module",
   "main": "src/index.js",

package/src/providers/anthropic.js CHANGED Viewed

@@ -168,13 +168,20 @@ function validateApiKey(apiKey) {
  * - System messages must be passed separately
  * - Messages must alternate between user and assistant
  * - First message must be from user
+ * - System can now be an array with cache control blocks
  */
-function convertMessagesToAnthropic(messages) {
+function convertMessagesToAnthropic(messages, options = {}) {
   if (!Array.isArray(messages)) {
     throw new AnthropicProviderError('Messages must be an array', ErrorCodes.INVALID_MESSAGES);
   }
-  let systemPrompt = '';
+  const {
+    enableSystemCache = true, // Always cache system messages by default
+    cacheUserMessages = false,
+    cacheMessageThreshold = 5 // Cache messages after this many turns
+  } = options;
+  let systemContent = [];
+  let systemText = '';
   const anthropicMessages = [];
   for (const [index, msg] of messages.entries()) {
@@ -193,8 +200,8 @@ function convertMessagesToAnthropic(messages) {
     }
     if (role === 'system') {
-      // Anthropic expects system messages to be concatenated
-      systemPrompt += (systemPrompt ? '\n\n' : '') + content;
+      // Collect system messages
+      systemText += (systemText ? '\n\n' : '') + content;
     } else {
       // Handle complex content structure (array with text and images)
       if (Array.isArray(content)) {
@@ -252,7 +259,27 @@ function convertMessagesToAnthropic(messages) {
     }
   }
-  return { systemPrompt, messages: anthropicMessages };
+  // Build system content based on cache enablement
+  let systemResult = null;
+  if (systemText) {
+    if (enableSystemCache) {
+      // Use array format with cache control for system prompt
+      systemResult = [{
+        type: 'text',
+        text: systemText,
+        cache_control: {
+          type: 'ephemeral',
+          ttl: '1h' // 1 hour cache duration
+        }
+      }];
+      debugLog(`[Anthropic] System prompt caching enabled (ephemeral with ttl-extender for 1 hour) - ${systemText.length} chars`);
+    } else {
+      // Use simple string format without caching
+      systemResult = systemText;
+    }
+  }
+  return { systemPrompt: systemResult, messages: anthropicMessages };
 }
 /**
@@ -324,16 +351,20 @@ export const anthropicProvider = {
     // Get Anthropic SDK
     const Anthropic = await getAnthropicSDK();
-    // Initialize Anthropic client
+    // Initialize Anthropic client with default headers
+    // Use both prompt caching and extended cache duration headers for 1-hour caching
     const anthropic = new Anthropic({
       apiKey: config.apiKeys.anthropic,
+      defaultHeaders: {
+        'anthropic-beta': 'prompt-caching-2024-07-31,extended-cache-ttl-2025-04-11'
+      }
     });
     // Resolve model name
     const resolvedModel = resolveModelName(model);
     const modelConfig = SUPPORTED_MODELS[resolvedModel] || {};
-    // Convert messages to Anthropic format
+    // Convert messages to Anthropic format (system messages are always cached)
     const { systemPrompt, messages: anthropicMessages } = convertMessagesToAnthropic(messages);
     // Build request payload
@@ -350,24 +381,49 @@ export const anthropicProvider = {
     }
     // Add max tokens (required by Anthropic)
-    requestPayload.max_tokens = maxTokens
-      ? Math.min(maxTokens, modelConfig.maxOutputTokens || 8192)
-      : modelConfig.maxOutputTokens || 8192;
-    // Add temperature if specified
-    if (temperature !== undefined) {
-      requestPayload.temperature = Math.max(0, Math.min(1, temperature));
+    const defaultMaxTokens = modelConfig.maxOutputTokens || 8192;
+    // If thinking is supported and enabled, we need to reduce max_tokens to leave room for thinking
+    let effectiveMaxTokens = defaultMaxTokens;
+    if (modelConfig.supportsThinking && reasoning_effort) {
+      // Reserve some tokens for thinking - use a more conservative approach
+      effectiveMaxTokens = Math.min(defaultMaxTokens, 16000); // Cap at 16k for models with thinking
     }
+    requestPayload.max_tokens = maxTokens
+      ? Math.min(maxTokens, effectiveMaxTokens)
+      : effectiveMaxTokens;
     // Add thinking configuration for models that support it
     if (modelConfig.supportsThinking && reasoning_effort) {
       const thinkingBudget = calculateThinkingBudget(modelConfig, reasoning_effort);
       if (thinkingBudget > 0) {
-        requestPayload.thinking = {
-          type: 'enabled',
-          budget_tokens: thinkingBudget
-        };
-        debugLog(`[Anthropic] Thinking enabled with budget: ${thinkingBudget} tokens (${reasoning_effort} effort)`);
+        // Anthropic docs: thinking budget counts towards total token limit
+        // So we need to ensure max_tokens + budget_tokens <= model's actual limit
+        // Reduce max_tokens to make room for thinking
+        const reducedMaxTokens = requestPayload.max_tokens - thinkingBudget;
+        if (reducedMaxTokens >= 1000 && thinkingBudget >= 1024) { // Ensure we have reasonable space for both
+          requestPayload.max_tokens = reducedMaxTokens;
+          requestPayload.thinking = {
+            type: 'enabled',
+            budget_tokens: thinkingBudget
+          };
+          debugLog(`[Anthropic] Thinking enabled with budget: ${thinkingBudget} tokens, max_tokens reduced to: ${reducedMaxTokens} (${reasoning_effort} effort)`);
+        } else {
+          debugLog(`[Anthropic] Not enough token budget for thinking. Would need ${thinkingBudget} thinking + ${reducedMaxTokens} output tokens`);
+        }
+      }
+    }
+    // Add temperature if specified
+    // When thinking is enabled, temperature must be 1
+    if (temperature !== undefined) {
+      if (requestPayload.thinking) {
+        requestPayload.temperature = 1;
+        debugLog('[Anthropic] Temperature forced to 1 for thinking mode');
+      } else {
+        requestPayload.temperature = Math.max(0, Math.min(1, temperature));
       }
     }
@@ -421,7 +477,9 @@ export const anthropicProvider = {
             input_tokens: usage.input_tokens || 0,
             output_tokens: usage.output_tokens || 0,
             total_tokens: (usage.input_tokens || 0) + (usage.output_tokens || 0),
-            thinking_tokens: usage.thinking_input_tokens || 0
+            thinking_tokens: usage.thinking_input_tokens || 0,
+            cache_creation_input_tokens: usage.cache_creation_input_tokens || 0,
+            cache_read_input_tokens: usage.cache_read_input_tokens || 0
           },
           response_time_ms: responseTime,
           finish_reason: response.stop_reason,

package/src/providers/mistral.js CHANGED Viewed

@@ -172,7 +172,7 @@ async function getMistralSDK() {
   if (!MistralSDK) {
     try {
       const module = await import('@mistralai/mistralai');
-      MistralSDK = module.default || module.Mistral;
+      MistralSDK = module.Mistral || module.default;
     } catch (error) {
       throw new MistralProviderError(
         'Failed to load Mistral SDK. Please install @mistralai/mistralai',