converse-mcp-server 1.3.2 → 1.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/docs/API.md CHANGED
@@ -291,6 +291,45 @@ MCP_TRANSPORT=stdio npm start
291
291
  | `grok-3` | `grok3` | 131K | 131K | Previous gen | Stable reasoning |
292
292
  | `grok-3-fast` | - | 131K | 131K | High perf | Faster processing |
293
293
 
294
+ ### Anthropic Models
295
+
296
+ | Model | Alias | Context | Tokens | Features | Use Cases |
297
+ |-------|-------|---------|--------|----------|-----------|
298
+ | `claude-opus-4-20250514` | `opus-4`, `opus` | 200K | 32K | Extended thinking, images, caching | Complex reasoning tasks |
299
+ | `claude-sonnet-4-20250514` | `sonnet-4`, `sonnet` | 200K | 64K | Extended thinking, images, caching | High performance, balanced |
300
+ | `claude-3-7-sonnet-20250219` | `sonnet-3.7` | 200K | 64K | Extended thinking, images, caching | Enhanced 3.x generation |
301
+ | `claude-3-5-sonnet-20241022` | `claude-3.5-sonnet` | 200K | 8K | Images, caching | Fast and intelligent |
302
+ | `claude-3-5-haiku-20241022` | `haiku` | 200K | 8K | Caching | Fastest, simple queries |
303
+
304
+ **Prompt Caching (Always Enabled):**
305
+ - System prompts are automatically cached for 1 hour using Anthropic's prompt caching
306
+ - Reduces latency and costs for repeated requests with the same system prompt
307
+ - Minimum 1024 tokens required for caching (2048 for Haiku models)
308
+ - Cache information available in response metadata: `cache_creation_input_tokens` and `cache_read_input_tokens`
309
+
310
+ ### DeepSeek Models
311
+
312
+ | Model | Alias | Context | Tokens | Features | Use Cases |
313
+ |-------|-------|---------|--------|----------|-----------|
314
+ | `deepseek-v3` | `deepseek-chat`, `deepseek` | 128K | 64K | Latest model | General purpose AI |
315
+ | `deepseek-coder-v2.5` | `deepseek-coder` | 128K | 16K | Code optimization | Programming tasks |
316
+
317
+ ### Mistral Models
318
+
319
+ | Model | Alias | Context | Tokens | Features | Use Cases |
320
+ |-------|-------|---------|--------|----------|-----------|
321
+ | `magistral-medium-2506` | `magistral`, `magistral-medium` | 40K | 8K | Reasoning model | Complex reasoning |
322
+ | `magistral-small-2506` | `magistral-small` | 40K | 8K | Small reasoning | Fast reasoning |
323
+ | `mistral-medium-2505` | `mistral-medium`, `mistral` | 128K | 32K | Multimodal | General + images |
324
+
325
+ ### OpenRouter Models
326
+
327
+ | Model | Alias | Context | Tokens | Features | Use Cases |
328
+ |-------|-------|---------|--------|----------|-----------|
329
+ | `kimi/k2` | `k2`, `kimi-k2` | 256K | 128K | Latest Kimi | Large context tasks |
330
+ | `qwen/qwen-2.5-coder-32b-instruct` | `qwen-coder` | 32K | 32K | Code focus | Programming |
331
+ | `qwen/qwq-32b-preview` | `qwen-thinking`, `qwq` | 32K | 32K | Reasoning | Step-by-step thinking |
332
+
294
333
  ### Model Selection
295
334
 
296
335
  Use `"auto"` for automatic selection or specify exact models:
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "converse-mcp-server",
3
- "version": "1.3.2",
3
+ "version": "1.3.4",
4
4
  "description": "Converse MCP Server - Converse with other LLMs with chat and consensus tools",
5
5
  "type": "module",
6
6
  "main": "src/index.js",
@@ -168,13 +168,20 @@ function validateApiKey(apiKey) {
168
168
  * - System messages must be passed separately
169
169
  * - Messages must alternate between user and assistant
170
170
  * - First message must be from user
171
+ * - System can now be an array with cache control blocks
171
172
  */
172
- function convertMessagesToAnthropic(messages) {
173
+ function convertMessagesToAnthropic(messages, options = {}) {
173
174
  if (!Array.isArray(messages)) {
174
175
  throw new AnthropicProviderError('Messages must be an array', ErrorCodes.INVALID_MESSAGES);
175
176
  }
176
177
 
177
- let systemPrompt = '';
178
+ const {
179
+ enableSystemCache = true, // Always cache system messages by default
180
+ cacheUserMessages = false,
181
+ cacheMessageThreshold = 5 // Cache messages after this many turns
182
+ } = options;
183
+ let systemContent = [];
184
+ let systemText = '';
178
185
  const anthropicMessages = [];
179
186
 
180
187
  for (const [index, msg] of messages.entries()) {
@@ -193,8 +200,8 @@ function convertMessagesToAnthropic(messages) {
193
200
  }
194
201
 
195
202
  if (role === 'system') {
196
- // Anthropic expects system messages to be concatenated
197
- systemPrompt += (systemPrompt ? '\n\n' : '') + content;
203
+ // Collect system messages
204
+ systemText += (systemText ? '\n\n' : '') + content;
198
205
  } else {
199
206
  // Handle complex content structure (array with text and images)
200
207
  if (Array.isArray(content)) {
@@ -252,7 +259,27 @@ function convertMessagesToAnthropic(messages) {
252
259
  }
253
260
  }
254
261
 
255
- return { systemPrompt, messages: anthropicMessages };
262
+ // Build system content based on cache enablement
263
+ let systemResult = null;
264
+ if (systemText) {
265
+ if (enableSystemCache) {
266
+ // Use array format with cache control for system prompt
267
+ systemResult = [{
268
+ type: 'text',
269
+ text: systemText,
270
+ cache_control: {
271
+ type: 'ephemeral',
272
+ ttl: '1h' // 1 hour cache duration
273
+ }
274
+ }];
275
+ debugLog(`[Anthropic] System prompt caching enabled (ephemeral with ttl-extender for 1 hour) - ${systemText.length} chars`);
276
+ } else {
277
+ // Use simple string format without caching
278
+ systemResult = systemText;
279
+ }
280
+ }
281
+
282
+ return { systemPrompt: systemResult, messages: anthropicMessages };
256
283
  }
257
284
 
258
285
  /**
@@ -324,16 +351,20 @@ export const anthropicProvider = {
324
351
  // Get Anthropic SDK
325
352
  const Anthropic = await getAnthropicSDK();
326
353
 
327
- // Initialize Anthropic client
354
+ // Initialize Anthropic client with default headers
355
+ // Use both prompt caching and extended cache duration headers for 1-hour caching
328
356
  const anthropic = new Anthropic({
329
357
  apiKey: config.apiKeys.anthropic,
358
+ defaultHeaders: {
359
+ 'anthropic-beta': 'prompt-caching-2024-07-31,extended-cache-ttl-2025-04-11'
360
+ }
330
361
  });
331
362
 
332
363
  // Resolve model name
333
364
  const resolvedModel = resolveModelName(model);
334
365
  const modelConfig = SUPPORTED_MODELS[resolvedModel] || {};
335
366
 
336
- // Convert messages to Anthropic format
367
+ // Convert messages to Anthropic format (system messages are always cached)
337
368
  const { systemPrompt, messages: anthropicMessages } = convertMessagesToAnthropic(messages);
338
369
 
339
370
  // Build request payload
@@ -350,24 +381,49 @@ export const anthropicProvider = {
350
381
  }
351
382
 
352
383
  // Add max tokens (required by Anthropic)
353
- requestPayload.max_tokens = maxTokens
354
- ? Math.min(maxTokens, modelConfig.maxOutputTokens || 8192)
355
- : modelConfig.maxOutputTokens || 8192;
356
-
357
- // Add temperature if specified
358
- if (temperature !== undefined) {
359
- requestPayload.temperature = Math.max(0, Math.min(1, temperature));
384
+ const defaultMaxTokens = modelConfig.maxOutputTokens || 8192;
385
+
386
+ // If thinking is supported and enabled, we need to reduce max_tokens to leave room for thinking
387
+ let effectiveMaxTokens = defaultMaxTokens;
388
+ if (modelConfig.supportsThinking && reasoning_effort) {
389
+ // Reserve some tokens for thinking - use a more conservative approach
390
+ effectiveMaxTokens = Math.min(defaultMaxTokens, 16000); // Cap at 16k for models with thinking
360
391
  }
392
+
393
+ requestPayload.max_tokens = maxTokens
394
+ ? Math.min(maxTokens, effectiveMaxTokens)
395
+ : effectiveMaxTokens;
361
396
 
362
397
  // Add thinking configuration for models that support it
363
398
  if (modelConfig.supportsThinking && reasoning_effort) {
364
399
  const thinkingBudget = calculateThinkingBudget(modelConfig, reasoning_effort);
365
400
  if (thinkingBudget > 0) {
366
- requestPayload.thinking = {
367
- type: 'enabled',
368
- budget_tokens: thinkingBudget
369
- };
370
- debugLog(`[Anthropic] Thinking enabled with budget: ${thinkingBudget} tokens (${reasoning_effort} effort)`);
401
+ // Anthropic docs: thinking budget counts towards total token limit
402
+ // So we need to ensure max_tokens + budget_tokens <= model's actual limit
403
+ // Reduce max_tokens to make room for thinking
404
+ const reducedMaxTokens = requestPayload.max_tokens - thinkingBudget;
405
+
406
+ if (reducedMaxTokens >= 1000 && thinkingBudget >= 1024) { // Ensure we have reasonable space for both
407
+ requestPayload.max_tokens = reducedMaxTokens;
408
+ requestPayload.thinking = {
409
+ type: 'enabled',
410
+ budget_tokens: thinkingBudget
411
+ };
412
+ debugLog(`[Anthropic] Thinking enabled with budget: ${thinkingBudget} tokens, max_tokens reduced to: ${reducedMaxTokens} (${reasoning_effort} effort)`);
413
+ } else {
414
+ debugLog(`[Anthropic] Not enough token budget for thinking. Would need ${thinkingBudget} thinking + ${reducedMaxTokens} output tokens`);
415
+ }
416
+ }
417
+ }
418
+
419
+ // Add temperature if specified
420
+ // When thinking is enabled, temperature must be 1
421
+ if (temperature !== undefined) {
422
+ if (requestPayload.thinking) {
423
+ requestPayload.temperature = 1;
424
+ debugLog('[Anthropic] Temperature forced to 1 for thinking mode');
425
+ } else {
426
+ requestPayload.temperature = Math.max(0, Math.min(1, temperature));
371
427
  }
372
428
  }
373
429
 
@@ -421,7 +477,9 @@ export const anthropicProvider = {
421
477
  input_tokens: usage.input_tokens || 0,
422
478
  output_tokens: usage.output_tokens || 0,
423
479
  total_tokens: (usage.input_tokens || 0) + (usage.output_tokens || 0),
424
- thinking_tokens: usage.thinking_input_tokens || 0
480
+ thinking_tokens: usage.thinking_input_tokens || 0,
481
+ cache_creation_input_tokens: usage.cache_creation_input_tokens || 0,
482
+ cache_read_input_tokens: usage.cache_read_input_tokens || 0
425
483
  },
426
484
  response_time_ms: responseTime,
427
485
  finish_reason: response.stop_reason,
@@ -172,7 +172,7 @@ async function getMistralSDK() {
172
172
  if (!MistralSDK) {
173
173
  try {
174
174
  const module = await import('@mistralai/mistralai');
175
- MistralSDK = module.default || module.Mistral;
175
+ MistralSDK = module.Mistral || module.default;
176
176
  } catch (error) {
177
177
  throw new MistralProviderError(
178
178
  'Failed to load Mistral SDK. Please install @mistralai/mistralai',