converse-mcp-server 1.3.2 → 1.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/docs/API.md +39 -0
- package/package.json +1 -1
- package/src/providers/anthropic.js +78 -20
- package/src/providers/mistral.js +1 -1
package/docs/API.md
CHANGED
|
@@ -291,6 +291,45 @@ MCP_TRANSPORT=stdio npm start
|
|
|
291
291
|
| `grok-3` | `grok3` | 131K | 131K | Previous gen | Stable reasoning |
|
|
292
292
|
| `grok-3-fast` | - | 131K | 131K | High perf | Faster processing |
|
|
293
293
|
|
|
294
|
+
### Anthropic Models
|
|
295
|
+
|
|
296
|
+
| Model | Alias | Context | Tokens | Features | Use Cases |
|
|
297
|
+
|-------|-------|---------|--------|----------|-----------|
|
|
298
|
+
| `claude-opus-4-20250514` | `opus-4`, `opus` | 200K | 32K | Extended thinking, images, caching | Complex reasoning tasks |
|
|
299
|
+
| `claude-sonnet-4-20250514` | `sonnet-4`, `sonnet` | 200K | 64K | Extended thinking, images, caching | High performance, balanced |
|
|
300
|
+
| `claude-3-7-sonnet-20250219` | `sonnet-3.7` | 200K | 64K | Extended thinking, images, caching | Enhanced 3.x generation |
|
|
301
|
+
| `claude-3-5-sonnet-20241022` | `claude-3.5-sonnet` | 200K | 8K | Images, caching | Fast and intelligent |
|
|
302
|
+
| `claude-3-5-haiku-20241022` | `haiku` | 200K | 8K | Caching | Fastest, simple queries |
|
|
303
|
+
|
|
304
|
+
**Prompt Caching (Always Enabled):**
|
|
305
|
+
- System prompts are automatically cached for 1 hour using Anthropic's prompt caching
|
|
306
|
+
- Reduces latency and costs for repeated requests with the same system prompt
|
|
307
|
+
- Minimum 1024 tokens required for caching (2048 for Haiku models)
|
|
308
|
+
- Cache information available in response metadata: `cache_creation_input_tokens` and `cache_read_input_tokens`
|
|
309
|
+
|
|
310
|
+
### DeepSeek Models
|
|
311
|
+
|
|
312
|
+
| Model | Alias | Context | Tokens | Features | Use Cases |
|
|
313
|
+
|-------|-------|---------|--------|----------|-----------|
|
|
314
|
+
| `deepseek-v3` | `deepseek-chat`, `deepseek` | 128K | 64K | Latest model | General purpose AI |
|
|
315
|
+
| `deepseek-coder-v2.5` | `deepseek-coder` | 128K | 16K | Code optimization | Programming tasks |
|
|
316
|
+
|
|
317
|
+
### Mistral Models
|
|
318
|
+
|
|
319
|
+
| Model | Alias | Context | Tokens | Features | Use Cases |
|
|
320
|
+
|-------|-------|---------|--------|----------|-----------|
|
|
321
|
+
| `magistral-medium-2506` | `magistral`, `magistral-medium` | 40K | 8K | Reasoning model | Complex reasoning |
|
|
322
|
+
| `magistral-small-2506` | `magistral-small` | 40K | 8K | Small reasoning | Fast reasoning |
|
|
323
|
+
| `mistral-medium-2505` | `mistral-medium`, `mistral` | 128K | 32K | Multimodal | General + images |
|
|
324
|
+
|
|
325
|
+
### OpenRouter Models
|
|
326
|
+
|
|
327
|
+
| Model | Alias | Context | Tokens | Features | Use Cases |
|
|
328
|
+
|-------|-------|---------|--------|----------|-----------|
|
|
329
|
+
| `kimi/k2` | `k2`, `kimi-k2` | 256K | 128K | Latest Kimi | Large context tasks |
|
|
330
|
+
| `qwen/qwen-2.5-coder-32b-instruct` | `qwen-coder` | 32K | 32K | Code focus | Programming |
|
|
331
|
+
| `qwen/qwq-32b-preview` | `qwen-thinking`, `qwq` | 32K | 32K | Reasoning | Step-by-step thinking |
|
|
332
|
+
|
|
294
333
|
### Model Selection
|
|
295
334
|
|
|
296
335
|
Use `"auto"` for automatic selection or specify exact models:
|
package/package.json
CHANGED
|
@@ -168,13 +168,20 @@ function validateApiKey(apiKey) {
|
|
|
168
168
|
* - System messages must be passed separately
|
|
169
169
|
* - Messages must alternate between user and assistant
|
|
170
170
|
* - First message must be from user
|
|
171
|
+
* - System can now be an array with cache control blocks
|
|
171
172
|
*/
|
|
172
|
-
function convertMessagesToAnthropic(messages) {
|
|
173
|
+
function convertMessagesToAnthropic(messages, options = {}) {
|
|
173
174
|
if (!Array.isArray(messages)) {
|
|
174
175
|
throw new AnthropicProviderError('Messages must be an array', ErrorCodes.INVALID_MESSAGES);
|
|
175
176
|
}
|
|
176
177
|
|
|
177
|
-
|
|
178
|
+
const {
|
|
179
|
+
enableSystemCache = true, // Always cache system messages by default
|
|
180
|
+
cacheUserMessages = false,
|
|
181
|
+
cacheMessageThreshold = 5 // Cache messages after this many turns
|
|
182
|
+
} = options;
|
|
183
|
+
let systemContent = [];
|
|
184
|
+
let systemText = '';
|
|
178
185
|
const anthropicMessages = [];
|
|
179
186
|
|
|
180
187
|
for (const [index, msg] of messages.entries()) {
|
|
@@ -193,8 +200,8 @@ function convertMessagesToAnthropic(messages) {
|
|
|
193
200
|
}
|
|
194
201
|
|
|
195
202
|
if (role === 'system') {
|
|
196
|
-
//
|
|
197
|
-
|
|
203
|
+
// Collect system messages
|
|
204
|
+
systemText += (systemText ? '\n\n' : '') + content;
|
|
198
205
|
} else {
|
|
199
206
|
// Handle complex content structure (array with text and images)
|
|
200
207
|
if (Array.isArray(content)) {
|
|
@@ -252,7 +259,27 @@ function convertMessagesToAnthropic(messages) {
|
|
|
252
259
|
}
|
|
253
260
|
}
|
|
254
261
|
|
|
255
|
-
|
|
262
|
+
// Build system content based on cache enablement
|
|
263
|
+
let systemResult = null;
|
|
264
|
+
if (systemText) {
|
|
265
|
+
if (enableSystemCache) {
|
|
266
|
+
// Use array format with cache control for system prompt
|
|
267
|
+
systemResult = [{
|
|
268
|
+
type: 'text',
|
|
269
|
+
text: systemText,
|
|
270
|
+
cache_control: {
|
|
271
|
+
type: 'ephemeral',
|
|
272
|
+
ttl: '1h' // 1 hour cache duration
|
|
273
|
+
}
|
|
274
|
+
}];
|
|
275
|
+
debugLog(`[Anthropic] System prompt caching enabled (ephemeral with ttl-extender for 1 hour) - ${systemText.length} chars`);
|
|
276
|
+
} else {
|
|
277
|
+
// Use simple string format without caching
|
|
278
|
+
systemResult = systemText;
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
return { systemPrompt: systemResult, messages: anthropicMessages };
|
|
256
283
|
}
|
|
257
284
|
|
|
258
285
|
/**
|
|
@@ -324,16 +351,20 @@ export const anthropicProvider = {
|
|
|
324
351
|
// Get Anthropic SDK
|
|
325
352
|
const Anthropic = await getAnthropicSDK();
|
|
326
353
|
|
|
327
|
-
// Initialize Anthropic client
|
|
354
|
+
// Initialize Anthropic client with default headers
|
|
355
|
+
// Use both prompt caching and extended cache duration headers for 1-hour caching
|
|
328
356
|
const anthropic = new Anthropic({
|
|
329
357
|
apiKey: config.apiKeys.anthropic,
|
|
358
|
+
defaultHeaders: {
|
|
359
|
+
'anthropic-beta': 'prompt-caching-2024-07-31,extended-cache-ttl-2025-04-11'
|
|
360
|
+
}
|
|
330
361
|
});
|
|
331
362
|
|
|
332
363
|
// Resolve model name
|
|
333
364
|
const resolvedModel = resolveModelName(model);
|
|
334
365
|
const modelConfig = SUPPORTED_MODELS[resolvedModel] || {};
|
|
335
366
|
|
|
336
|
-
// Convert messages to Anthropic format
|
|
367
|
+
// Convert messages to Anthropic format (system messages are always cached)
|
|
337
368
|
const { systemPrompt, messages: anthropicMessages } = convertMessagesToAnthropic(messages);
|
|
338
369
|
|
|
339
370
|
// Build request payload
|
|
@@ -350,24 +381,49 @@ export const anthropicProvider = {
|
|
|
350
381
|
}
|
|
351
382
|
|
|
352
383
|
// Add max tokens (required by Anthropic)
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
384
|
+
const defaultMaxTokens = modelConfig.maxOutputTokens || 8192;
|
|
385
|
+
|
|
386
|
+
// If thinking is supported and enabled, we need to reduce max_tokens to leave room for thinking
|
|
387
|
+
let effectiveMaxTokens = defaultMaxTokens;
|
|
388
|
+
if (modelConfig.supportsThinking && reasoning_effort) {
|
|
389
|
+
// Reserve some tokens for thinking - use a more conservative approach
|
|
390
|
+
effectiveMaxTokens = Math.min(defaultMaxTokens, 16000); // Cap at 16k for models with thinking
|
|
360
391
|
}
|
|
392
|
+
|
|
393
|
+
requestPayload.max_tokens = maxTokens
|
|
394
|
+
? Math.min(maxTokens, effectiveMaxTokens)
|
|
395
|
+
: effectiveMaxTokens;
|
|
361
396
|
|
|
362
397
|
// Add thinking configuration for models that support it
|
|
363
398
|
if (modelConfig.supportsThinking && reasoning_effort) {
|
|
364
399
|
const thinkingBudget = calculateThinkingBudget(modelConfig, reasoning_effort);
|
|
365
400
|
if (thinkingBudget > 0) {
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
401
|
+
// Anthropic docs: thinking budget counts towards total token limit
|
|
402
|
+
// So we need to ensure max_tokens + budget_tokens <= model's actual limit
|
|
403
|
+
// Reduce max_tokens to make room for thinking
|
|
404
|
+
const reducedMaxTokens = requestPayload.max_tokens - thinkingBudget;
|
|
405
|
+
|
|
406
|
+
if (reducedMaxTokens >= 1000 && thinkingBudget >= 1024) { // Ensure we have reasonable space for both
|
|
407
|
+
requestPayload.max_tokens = reducedMaxTokens;
|
|
408
|
+
requestPayload.thinking = {
|
|
409
|
+
type: 'enabled',
|
|
410
|
+
budget_tokens: thinkingBudget
|
|
411
|
+
};
|
|
412
|
+
debugLog(`[Anthropic] Thinking enabled with budget: ${thinkingBudget} tokens, max_tokens reduced to: ${reducedMaxTokens} (${reasoning_effort} effort)`);
|
|
413
|
+
} else {
|
|
414
|
+
debugLog(`[Anthropic] Not enough token budget for thinking. Would need ${thinkingBudget} thinking + ${reducedMaxTokens} output tokens`);
|
|
415
|
+
}
|
|
416
|
+
}
|
|
417
|
+
}
|
|
418
|
+
|
|
419
|
+
// Add temperature if specified
|
|
420
|
+
// When thinking is enabled, temperature must be 1
|
|
421
|
+
if (temperature !== undefined) {
|
|
422
|
+
if (requestPayload.thinking) {
|
|
423
|
+
requestPayload.temperature = 1;
|
|
424
|
+
debugLog('[Anthropic] Temperature forced to 1 for thinking mode');
|
|
425
|
+
} else {
|
|
426
|
+
requestPayload.temperature = Math.max(0, Math.min(1, temperature));
|
|
371
427
|
}
|
|
372
428
|
}
|
|
373
429
|
|
|
@@ -421,7 +477,9 @@ export const anthropicProvider = {
|
|
|
421
477
|
input_tokens: usage.input_tokens || 0,
|
|
422
478
|
output_tokens: usage.output_tokens || 0,
|
|
423
479
|
total_tokens: (usage.input_tokens || 0) + (usage.output_tokens || 0),
|
|
424
|
-
thinking_tokens: usage.thinking_input_tokens || 0
|
|
480
|
+
thinking_tokens: usage.thinking_input_tokens || 0,
|
|
481
|
+
cache_creation_input_tokens: usage.cache_creation_input_tokens || 0,
|
|
482
|
+
cache_read_input_tokens: usage.cache_read_input_tokens || 0
|
|
425
483
|
},
|
|
426
484
|
response_time_ms: responseTime,
|
|
427
485
|
finish_reason: response.stop_reason,
|
package/src/providers/mistral.js
CHANGED
|
@@ -172,7 +172,7 @@ async function getMistralSDK() {
|
|
|
172
172
|
if (!MistralSDK) {
|
|
173
173
|
try {
|
|
174
174
|
const module = await import('@mistralai/mistralai');
|
|
175
|
-
MistralSDK = module.
|
|
175
|
+
MistralSDK = module.Mistral || module.default;
|
|
176
176
|
} catch (error) {
|
|
177
177
|
throw new MistralProviderError(
|
|
178
178
|
'Failed to load Mistral SDK. Please install @mistralai/mistralai',
|