converse-mcp-server 1.5.2 → 1.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -622,7 +622,7 @@ git push origin feature/your-feature
622
622
 
623
623
  ## 🙏 Acknowledgments
624
624
 
625
- This MCP Server was inspired by and builds upon the excellent work from [BeehiveInnovations/zen-mcp-server](https://github.com/BeehiveInnovations/zen-mcp-server). We're grateful for their pioneering implementation and innovative approach to MCP server development.
625
+ This MCP Server was inspired by and builds upon the excellent work from [BeehiveInnovations/zen-mcp-server](https://github.com/BeehiveInnovations/zen-mcp-server).
626
626
 
627
627
  ## 📄 License
628
628
 
@@ -632,8 +632,4 @@ MIT License - see [LICENSE](LICENSE) file for details.
632
632
 
633
633
  - **GitHub**: https://github.com/FallDownTheSystem/converse
634
634
  - **Issues**: https://github.com/FallDownTheSystem/converse/issues
635
- - **NPM Package**: https://www.npmjs.com/package/converse-mcp-server
636
-
637
- ---
638
-
639
- **Built with ❤️ using Node.js and modern AI APIs**
635
+ - **NPM Package**: https://www.npmjs.com/package/converse-mcp-server
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "converse-mcp-server",
3
- "version": "1.5.2",
3
+ "version": "1.5.4",
4
4
  "description": "Converse MCP Server - Converse with other LLMs with chat and consensus tools",
5
5
  "type": "module",
6
6
  "main": "src/index.js",
@@ -351,19 +351,22 @@ export const anthropicProvider = {
351
351
  // Get Anthropic SDK
352
352
  const Anthropic = await getAnthropicSDK();
353
353
 
354
+ // Resolve model name first
355
+ const resolvedModel = resolveModelName(model);
356
+ const modelConfig = SUPPORTED_MODELS[resolvedModel] || {};
357
+
354
358
  // Initialize Anthropic client with default headers
355
359
  // Use both prompt caching and extended cache duration headers for 1-hour caching
360
+ // Set beta headers for caching
361
+ const betaHeaders = ['prompt-caching-2024-07-31', 'extended-cache-ttl-2025-04-11'];
362
+
356
363
  const anthropic = new Anthropic({
357
364
  apiKey: config.apiKeys.anthropic,
358
365
  defaultHeaders: {
359
- 'anthropic-beta': 'prompt-caching-2024-07-31,extended-cache-ttl-2025-04-11'
366
+ 'anthropic-beta': betaHeaders.join(',')
360
367
  }
361
368
  });
362
369
 
363
- // Resolve model name
364
- const resolvedModel = resolveModelName(model);
365
- const modelConfig = SUPPORTED_MODELS[resolvedModel] || {};
366
-
367
370
  // Convert messages to Anthropic format (system messages are always cached)
368
371
  const { systemPrompt, messages: anthropicMessages } = convertMessagesToAnthropic(messages);
369
372
 
@@ -380,39 +383,38 @@ export const anthropicProvider = {
380
383
  requestPayload.system = systemPrompt;
381
384
  }
382
385
 
383
- // Add max tokens (required by Anthropic)
384
- const defaultMaxTokens = modelConfig.maxOutputTokens || 8192;
385
-
386
- // If thinking is supported and enabled, we need to reduce max_tokens to leave room for thinking
387
- let effectiveMaxTokens = defaultMaxTokens;
388
- if (modelConfig.supportsThinking && reasoning_effort) {
389
- // Reserve some tokens for thinking - use a more conservative approach
390
- effectiveMaxTokens = Math.min(defaultMaxTokens, 16000); // Cap at 16k for models with thinking
386
+ // Add max tokens only if explicitly requested
387
+ // For Claude 4 series models, let the SDK use its defaults (32k for opus, 64k for sonnet)
388
+ if (maxTokens) {
389
+ requestPayload.max_tokens = Math.min(maxTokens, modelConfig.maxOutputTokens || 8192);
390
+ } else if (!resolvedModel.includes('claude-opus-4') && !resolvedModel.includes('claude-sonnet-4')) {
391
+ // For non-4 series models, we still need to set max_tokens
392
+ requestPayload.max_tokens = modelConfig.maxOutputTokens || 8192;
391
393
  }
392
-
393
- requestPayload.max_tokens = maxTokens
394
- ? Math.min(maxTokens, effectiveMaxTokens)
395
- : effectiveMaxTokens;
394
+ // For 4 series models without explicit maxTokens, don't set max_tokens - let SDK use defaults
396
395
 
397
396
  // Add thinking configuration for models that support it
398
397
  if (modelConfig.supportsThinking && reasoning_effort) {
399
398
  const thinkingBudget = calculateThinkingBudget(modelConfig, reasoning_effort);
400
- if (thinkingBudget > 0) {
401
- // Anthropic docs: thinking budget counts towards total token limit
402
- // So we need to ensure max_tokens + budget_tokens <= model's actual limit
403
- // Reduce max_tokens to make room for thinking
404
- const reducedMaxTokens = requestPayload.max_tokens - thinkingBudget;
405
-
406
- if (reducedMaxTokens >= 1000 && thinkingBudget >= 1024) { // Ensure we have reasonable space for both
407
- requestPayload.max_tokens = reducedMaxTokens;
408
- requestPayload.thinking = {
409
- type: 'enabled',
410
- budget_tokens: thinkingBudget
411
- };
412
- debugLog(`[Anthropic] Thinking enabled with budget: ${thinkingBudget} tokens, max_tokens reduced to: ${reducedMaxTokens} (${reasoning_effort} effort)`);
413
- } else {
414
- debugLog(`[Anthropic] Not enough token budget for thinking. Would need ${thinkingBudget} thinking + ${reducedMaxTokens} output tokens`);
415
- }
399
+ debugLog(`[Anthropic] Model ${resolvedModel}: maxOutputTokens=${modelConfig.maxOutputTokens}, maxThinkingTokens=${modelConfig.maxThinkingTokens}, thinkingBudget=${thinkingBudget}`);
400
+
401
+ // For 4 series models, we trust the SDK defaults work with thinking
402
+ // For other models, check against max_tokens if set
403
+ const maxTokensLimit = requestPayload.max_tokens ||
404
+ (resolvedModel.includes('claude-opus-4') ? 32000 :
405
+ resolvedModel.includes('claude-sonnet-4') ? 64000 :
406
+ modelConfig.maxOutputTokens);
407
+
408
+ if (thinkingBudget > 0 && thinkingBudget < maxTokensLimit) {
409
+ // According to Anthropic docs: thinking tokens count towards max_tokens limit
410
+ // thinking.budget_tokens must be >= 1024 and < max_tokens
411
+ requestPayload.thinking = {
412
+ type: 'enabled',
413
+ budget_tokens: thinkingBudget
414
+ };
415
+ debugLog(`[Anthropic] Thinking enabled with budget: ${thinkingBudget} tokens (${reasoning_effort} effort)`);
416
+ } else {
417
+ debugLog(`[Anthropic] Thinking not enabled: budget ${thinkingBudget} must be < max_tokens limit ${maxTokensLimit}`);
416
418
  }
417
419
  }
418
420
 
@@ -429,6 +431,14 @@ export const anthropicProvider = {
429
431
 
430
432
  try {
431
433
  debugLog(`[Anthropic] Calling ${resolvedModel} with ${anthropicMessages.length} messages`);
434
+ debugLog(`[Anthropic] Request payload:`, JSON.stringify({
435
+ model: requestPayload.model,
436
+ max_tokens: requestPayload.max_tokens,
437
+ thinking: requestPayload.thinking,
438
+ temperature: requestPayload.temperature,
439
+ message_count: requestPayload.messages?.length,
440
+ system_length: Array.isArray(requestPayload.system) ? requestPayload.system[0]?.text?.length : requestPayload.system?.length
441
+ }, null, 2));
432
442
  if (systemPrompt) {
433
443
  debugLog(`[Anthropic] System prompt length: ${systemPrompt.length} characters`);
434
444
  }
@@ -507,8 +517,21 @@ export const anthropicProvider = {
507
517
  throw new AnthropicProviderError(`Invalid request: ${error.error.message}`, ErrorCodes.INVALID_REQUEST, error);
508
518
  } else if (error.error?.type === 'not_found_error') {
509
519
  throw new AnthropicProviderError(`Model ${resolvedModel} not found`, ErrorCodes.MODEL_NOT_FOUND, error);
510
- } else if (error.message?.includes('context length') || error.message?.includes('token')) {
511
- throw new AnthropicProviderError('Context length exceeded for model', ErrorCodes.CONTEXT_LENGTH_EXCEEDED, error);
520
+ } else if (error.message?.includes('context length') || error.message?.includes('context_length') ||
521
+ (error.message?.includes('token') && error.message?.includes('limit'))) {
522
+ debugError(`[Anthropic] Context length error - Full error:`, error);
523
+ debugError(`[Anthropic] Error message:`, error.message);
524
+ debugError(`[Anthropic] Error response:`, error.response);
525
+ throw new AnthropicProviderError(`Context length exceeded for model: ${error.message}`, ErrorCodes.CONTEXT_LENGTH_EXCEEDED, error);
526
+ } else if (error.message?.includes('Streaming is strongly recommended')) {
527
+ // This is just a warning from the SDK about long requests
528
+ debugLog(`[Anthropic] SDK streaming recommendation warning`);
529
+ debugError(`[Anthropic] Full error object:`, error);
530
+ // Check if there's an actual error response
531
+ if (error.response || error.status) {
532
+ debugError(`[Anthropic] Error response status:`, error.status);
533
+ debugError(`[Anthropic] Error response data:`, error.response);
534
+ }
512
535
  }
513
536
 
514
537
  // Generic error handling
@@ -18,11 +18,11 @@ const SUPPORTED_MODELS = {
18
18
  supportsStreaming: true,
19
19
  supportsImages: true,
20
20
  supportsTemperature: true,
21
- supportsThinking: true,
21
+ supportsThinking: false,
22
22
  supportsWebSearch: true,
23
- maxThinkingTokens: 24576,
23
+ maxThinkingTokens: 0,
24
24
  timeout: 300000,
25
- description: 'Gemini 2.0 Flash (1M context) - Latest fast model with experimental thinking, supports audio/video input and grounding',
25
+ description: 'Gemini 2.0 Flash (1M context) - Latest fast model, supports audio/video input and grounding',
26
26
  aliases: ['flash-2.0', 'flash2', 'flash 2.0', 'gemini flash 2.0', 'gemini-2.0-flash-latest']
27
27
  },
28
28
  'gemini-2.0-flash-lite': {