hanzo-mcp 0.8.1__py3-none-any.whl → 0.8.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hanzo-mcp might be problematic. Click here for more details.

@@ -0,0 +1,911 @@
1
+ """Unified LLM tool with multiple actions including consensus mode."""
2
+
3
+ import os
4
+ import json
5
+ import asyncio
6
+ from typing import (
7
+ Any,
8
+ Dict,
9
+ List,
10
+ Unpack,
11
+ Optional,
12
+ Annotated,
13
+ TypedDict,
14
+ final,
15
+ override,
16
+ )
17
+ from pathlib import Path
18
+
19
+ from pydantic import Field
20
+ from mcp.server.fastmcp import Context as MCPContext
21
+
22
+ from hanzo_mcp.tools.common.base import BaseTool
23
+ from hanzo_mcp.tools.common.context import create_tool_context
24
+
25
+ # Check if litellm is available
26
+ try:
27
+ import litellm
28
+
29
+ LITELLM_AVAILABLE = True
30
+ except ImportError:
31
+ LITELLM_AVAILABLE = False
32
+
33
+
34
+ # Parameter types
35
+ Action = Annotated[
36
+ str,
37
+ Field(
38
+ description="Action to perform: query, consensus, list, models, enable, disable, test",
39
+ default="query",
40
+ ),
41
+ ]
42
+
43
+ Model = Annotated[
44
+ Optional[str],
45
+ Field(
46
+ description="Model name (e.g., gpt-4, claude-3-opus-20240229)",
47
+ default=None,
48
+ ),
49
+ ]
50
+
51
+ Models = Annotated[
52
+ Optional[List[str]],
53
+ Field(
54
+ description="List of models for consensus mode",
55
+ default=None,
56
+ ),
57
+ ]
58
+
59
+ Prompt = Annotated[
60
+ Optional[str],
61
+ Field(
62
+ description="The prompt to send to the LLM",
63
+ default=None,
64
+ ),
65
+ ]
66
+
67
+ SystemPrompt = Annotated[
68
+ Optional[str],
69
+ Field(
70
+ description="System prompt to set context",
71
+ default=None,
72
+ ),
73
+ ]
74
+
75
+ Temperature = Annotated[
76
+ float,
77
+ Field(
78
+ description="Temperature for response randomness (0-2)",
79
+ default=0.7,
80
+ ),
81
+ ]
82
+
83
+ MaxTokens = Annotated[
84
+ Optional[int],
85
+ Field(
86
+ description="Maximum tokens in response",
87
+ default=None,
88
+ ),
89
+ ]
90
+
91
+ JsonMode = Annotated[
92
+ bool,
93
+ Field(
94
+ description="Request JSON formatted response",
95
+ default=False,
96
+ ),
97
+ ]
98
+
99
+ Stream = Annotated[
100
+ bool,
101
+ Field(
102
+ description="Stream the response",
103
+ default=False,
104
+ ),
105
+ ]
106
+
107
+ Provider = Annotated[
108
+ Optional[str],
109
+ Field(
110
+ description="Provider name for list/enable/disable actions",
111
+ default=None,
112
+ ),
113
+ ]
114
+
115
+ IncludeRaw = Annotated[
116
+ bool,
117
+ Field(
118
+ description="Include raw responses in consensus mode",
119
+ default=False,
120
+ ),
121
+ ]
122
+
123
+ JudgeModel = Annotated[
124
+ Optional[str],
125
+ Field(
126
+ description="Model to use as judge/aggregator in consensus",
127
+ default=None,
128
+ ),
129
+ ]
130
+
131
+ DevilsAdvocate = Annotated[
132
+ bool,
133
+ Field(
134
+ description="Enable devil's advocate mode (10th model critiques others)",
135
+ default=False,
136
+ ),
137
+ ]
138
+
139
+ ConsensusSize = Annotated[
140
+ Optional[int],
141
+ Field(
142
+ description="Number of models to use in consensus (default: 3)",
143
+ default=None,
144
+ ),
145
+ ]
146
+
147
+
148
+ class LLMParams(TypedDict, total=False):
149
+ """Parameters for LLM tool."""
150
+
151
+ action: str
152
+ model: Optional[str]
153
+ models: Optional[List[str]]
154
+ prompt: Optional[str]
155
+ system_prompt: Optional[str]
156
+ temperature: float
157
+ max_tokens: Optional[int]
158
+ json_mode: bool
159
+ stream: bool
160
+ provider: Optional[str]
161
+ include_raw: bool
162
+ judge_model: Optional[str]
163
+ devils_advocate: bool
164
+ consensus_size: Optional[int]
165
+
166
+
167
+ @final
168
+ class UnifiedLLMTool(BaseTool):
169
+ """Unified LLM tool with multiple actions."""
170
+
171
+ # Config file for settings
172
+ CONFIG_FILE = Path.home() / ".hanzo" / "mcp" / "llm_config.json"
173
+
174
+ # Default consensus models in order of preference
175
+ DEFAULT_CONSENSUS_MODELS = [
176
+ "gpt-4o", # OpenAI's latest
177
+ "claude-3-opus-20240229", # Claude's most capable
178
+ "gemini/gemini-1.5-pro", # Google's best
179
+ "groq/llama3-70b-8192", # Fast Groq
180
+ "mistral/mistral-large-latest", # Mistral's best
181
+ "perplexity/llama-3.1-sonar-large-128k-chat", # Perplexity with search
182
+ ]
183
+
184
+ # API key environment variables
185
+ API_KEY_ENV_VARS = {
186
+ "openai": ["OPENAI_API_KEY"],
187
+ "anthropic": ["ANTHROPIC_API_KEY", "CLAUDE_API_KEY"],
188
+ "google": ["GOOGLE_API_KEY", "GEMINI_API_KEY"],
189
+ "groq": ["GROQ_API_KEY"],
190
+ "mistral": ["MISTRAL_API_KEY"],
191
+ "perplexity": ["PERPLEXITY_API_KEY", "PERPLEXITYAI_API_KEY"],
192
+ "together": ["TOGETHER_API_KEY", "TOGETHERAI_API_KEY"],
193
+ "cohere": ["COHERE_API_KEY"],
194
+ "replicate": ["REPLICATE_API_KEY"],
195
+ "huggingface": ["HUGGINGFACE_API_KEY", "HF_TOKEN"],
196
+ "bedrock": ["AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY"],
197
+ "vertex": ["GOOGLE_APPLICATION_CREDENTIALS"],
198
+ "azure": ["AZURE_API_KEY"],
199
+ "voyage": ["VOYAGE_API_KEY"],
200
+ "deepseek": ["DEEPSEEK_API_KEY"],
201
+ }
202
+
203
+ def __init__(self):
204
+ """Initialize the unified LLM tool."""
205
+ self.available_providers = self._detect_available_providers()
206
+ self.config = self._load_config()
207
+
208
+ def _detect_available_providers(self) -> Dict[str, List[str]]:
209
+ """Detect which providers have API keys configured."""
210
+ available = {}
211
+
212
+ for provider, env_vars in self.API_KEY_ENV_VARS.items():
213
+ for var in env_vars:
214
+ if os.getenv(var):
215
+ available[provider] = env_vars
216
+ break
217
+
218
+ return available
219
+
220
+ def _load_config(self) -> Dict[str, Any]:
221
+ """Load configuration from file."""
222
+ if self.CONFIG_FILE.exists():
223
+ try:
224
+ with open(self.CONFIG_FILE, "r") as f:
225
+ return json.load(f)
226
+ except:
227
+ pass
228
+
229
+ # Default config
230
+ return {
231
+ "disabled_providers": [],
232
+ "consensus_models": None, # Use defaults if None
233
+ "default_judge_model": "gpt-4o",
234
+ "consensus_size": 3,
235
+ }
236
+
237
+ def _save_config(self):
238
+ """Save configuration to file."""
239
+ self.CONFIG_FILE.parent.mkdir(parents=True, exist_ok=True)
240
+ with open(self.CONFIG_FILE, "w") as f:
241
+ json.dump(self.config, f, indent=2)
242
+
243
+ @property
244
+ @override
245
+ def name(self) -> str:
246
+ """Get the tool name."""
247
+ return "llm"
248
+
249
+ @property
250
+ @override
251
+ def description(self) -> str:
252
+ """Get the tool description."""
253
+ available = list(self.available_providers.keys())
254
+
255
+ return f"""Query LLMs. Default: single query. Actions: consensus, list, models, test.
256
+
257
+ Usage:
258
+ llm "What is the capital of France?"
259
+ llm "Explain this code" --model gpt-4o
260
+ llm --action consensus "Is this approach correct?" --devils-advocate
261
+ llm --action models --provider openai
262
+
263
+ Available: {', '.join(available) if available else 'None'}"""
264
+
265
+ @override
266
+ async def call(
267
+ self,
268
+ ctx: MCPContext,
269
+ **params: Unpack[LLMParams],
270
+ ) -> str:
271
+ """Execute LLM action."""
272
+ # Create tool context only if we have a proper MCP context
273
+ tool_ctx = None
274
+ try:
275
+ if hasattr(ctx, "client") and ctx.client and hasattr(ctx.client, "server"):
276
+ tool_ctx = create_tool_context(ctx)
277
+ if tool_ctx:
278
+ await tool_ctx.set_tool_info(self.name)
279
+ except:
280
+ # Running in test mode without MCP context
281
+ pass
282
+
283
+ if not LITELLM_AVAILABLE:
284
+ return (
285
+ "Error: LiteLLM is not installed. Install it with: pip install litellm"
286
+ )
287
+
288
+ # Extract action
289
+ action = params.get("action", "query")
290
+
291
+ # Route to appropriate handler
292
+ if action == "query":
293
+ return await self._handle_query(tool_ctx, params)
294
+ elif action == "consensus":
295
+ return await self._handle_consensus(tool_ctx, params)
296
+ elif action == "list":
297
+ return self._handle_list()
298
+ elif action == "models":
299
+ return self._handle_models(params.get("provider"))
300
+ elif action == "enable":
301
+ return self._handle_enable(params.get("provider"))
302
+ elif action == "disable":
303
+ return self._handle_disable(params.get("provider"))
304
+ elif action == "test":
305
+ return await self._handle_test(
306
+ tool_ctx, params.get("model"), params.get("provider")
307
+ )
308
+ else:
309
+ return f"Error: Unknown action '{action}'. Valid actions: query, consensus, list, models, enable, disable, test"
310
+
311
+ async def _handle_query(self, tool_ctx, params: Dict[str, Any]) -> str:
312
+ """Handle single model query."""
313
+ model = params.get("model")
314
+ prompt = params.get("prompt")
315
+
316
+ if not prompt:
317
+ return "Error: prompt is required for query action"
318
+
319
+ # Auto-select model if not specified
320
+ if not model:
321
+ if self.available_providers:
322
+ # Use first available model
323
+ if "openai" in self.available_providers:
324
+ model = "gpt-4o-mini"
325
+ elif "anthropic" in self.available_providers:
326
+ model = "claude-3-haiku-20240307"
327
+ elif "google" in self.available_providers:
328
+ model = "gemini/gemini-1.5-flash"
329
+ else:
330
+ # Use first provider's default
331
+ provider = list(self.available_providers.keys())[0]
332
+ model = f"{provider}/default"
333
+ else:
334
+ return "Error: No model specified and no API keys found"
335
+
336
+ # Check if we have API key for this model
337
+ provider = self._get_provider_for_model(model)
338
+ if provider and provider not in self.available_providers:
339
+ env_vars = self.API_KEY_ENV_VARS.get(provider, [])
340
+ return f"Error: No API key found for {provider}. Set one of: {', '.join(env_vars)}"
341
+
342
+ # Build messages
343
+ messages = []
344
+ if params.get("system_prompt"):
345
+ messages.append({"role": "system", "content": params["system_prompt"]})
346
+ messages.append({"role": "user", "content": prompt})
347
+
348
+ # Build kwargs
349
+ kwargs = {
350
+ "model": model,
351
+ "messages": messages,
352
+ "temperature": params.get("temperature", 0.7),
353
+ }
354
+
355
+ if params.get("max_tokens"):
356
+ kwargs["max_tokens"] = params["max_tokens"]
357
+
358
+ if params.get("json_mode"):
359
+ kwargs["response_format"] = {"type": "json_object"}
360
+
361
+ if params.get("stream"):
362
+ kwargs["stream"] = True
363
+
364
+ try:
365
+ if tool_ctx:
366
+ await tool_ctx.info(f"Querying {model}...")
367
+
368
+ if kwargs.get("stream"):
369
+ # Handle streaming response
370
+ response_text = ""
371
+ async for chunk in await litellm.acompletion(**kwargs):
372
+ if chunk.choices[0].delta.content:
373
+ response_text += chunk.choices[0].delta.content
374
+ return response_text
375
+ else:
376
+ # Regular response
377
+ response = await litellm.acompletion(**kwargs)
378
+ return response.choices[0].message.content
379
+
380
+ except Exception as e:
381
+ error_msg = str(e)
382
+ if "model_not_found" in error_msg or "does not exist" in error_msg:
383
+ return f"Error: Model '{model}' not found. Use 'llm --action models' to see available models."
384
+ else:
385
+ return f"Error calling LLM: {error_msg}"
386
+
387
+ async def _handle_consensus(self, tool_ctx, params: Dict[str, Any]) -> str:
388
+ """Handle consensus mode with multiple models."""
389
+ prompt = params.get("prompt")
390
+ if not prompt:
391
+ return "Error: prompt is required for consensus action"
392
+
393
+ # Determine models to use
394
+ models = params.get("models")
395
+ if not models:
396
+ # Use configured or default models
397
+ consensus_size = params.get("consensus_size") or self.config.get(
398
+ "consensus_size", 3
399
+ )
400
+ models = self._get_consensus_models(consensus_size)
401
+
402
+ if not models:
403
+ return "Error: No models available for consensus. Set API keys for at least 2 providers."
404
+
405
+ if len(models) < 2:
406
+ return "Error: Consensus requires at least 2 models"
407
+
408
+ # Check for devil's advocate mode
409
+ devils_advocate = params.get("devils_advocate", False)
410
+ if devils_advocate and len(models) < 3:
411
+ return "Error: Devil's advocate mode requires at least 3 models"
412
+
413
+ if tool_ctx:
414
+ await tool_ctx.info(f"Running consensus with {len(models)} models...")
415
+
416
+ # Query models in parallel
417
+ system_prompt = params.get("system_prompt")
418
+ temperature = params.get("temperature", 0.7)
419
+ max_tokens = params.get("max_tokens")
420
+
421
+ # Split models if using devil's advocate
422
+ if devils_advocate:
423
+ consensus_models = models[:-1]
424
+ devil_model = models[-1]
425
+ else:
426
+ consensus_models = models
427
+ devil_model = None
428
+
429
+ # Query consensus models
430
+ responses = await self._query_models_parallel(
431
+ consensus_models, prompt, system_prompt, temperature, max_tokens, tool_ctx
432
+ )
433
+
434
+ # Get devil's advocate response if enabled
435
+ devil_response = None
436
+ if devil_model:
437
+ # Create devil's advocate prompt
438
+ responses_text = "\n\n".join(
439
+ [
440
+ f"Model {i+1}: {resp['response']}"
441
+ for i, resp in enumerate(responses)
442
+ if resp["response"]
443
+ ]
444
+ )
445
+
446
+ devil_prompt = f"""You are a critical analyst. Review these responses to the question below and provide a devil's advocate perspective. Challenge assumptions, point out weaknesses, and suggest alternative viewpoints.
447
+
448
+ Original Question: {prompt}
449
+
450
+ Responses from other models:
451
+ {responses_text}
452
+
453
+ Provide your critical analysis:"""
454
+
455
+ devil_result = await self._query_single_model(
456
+ devil_model, devil_prompt, system_prompt, temperature, max_tokens
457
+ )
458
+
459
+ if devil_result["success"]:
460
+ devil_response = {
461
+ "model": devil_model,
462
+ "response": devil_result["response"],
463
+ "time_ms": devil_result["time_ms"],
464
+ }
465
+
466
+ # Aggregate responses
467
+ judge_model = params.get("judge_model") or self.config.get(
468
+ "default_judge_model", "gpt-4o"
469
+ )
470
+ include_raw = params.get("include_raw", False)
471
+
472
+ return await self._aggregate_consensus(
473
+ responses, prompt, judge_model, include_raw, devil_response, tool_ctx
474
+ )
475
+
476
+ def _handle_list(self) -> str:
477
+ """List available providers."""
478
+ output = ["=== LLM Providers ==="]
479
+
480
+ # Get all possible providers
481
+ all_providers = sorted(self.API_KEY_ENV_VARS.keys())
482
+ disabled = self.config.get("disabled_providers", [])
483
+
484
+ output.append(f"Total providers: {len(all_providers)}")
485
+ output.append(f"Available: {len(self.available_providers)}")
486
+ output.append(f"Disabled: {len(disabled)}\n")
487
+
488
+ for provider in all_providers:
489
+ status_parts = []
490
+
491
+ # Check if API key exists
492
+ if provider in self.available_providers:
493
+ status_parts.append("✅ API key found")
494
+ else:
495
+ status_parts.append("❌ No API key")
496
+
497
+ # Check if disabled
498
+ if provider in disabled:
499
+ status_parts.append("🚫 Disabled")
500
+
501
+ # Show environment variables
502
+ env_vars = self.API_KEY_ENV_VARS.get(provider, [])
503
+ status = " | ".join(status_parts)
504
+
505
+ output.append(f"{provider}: {status}")
506
+ output.append(f" Environment variables: {', '.join(env_vars)}")
507
+
508
+ output.append(
509
+ "\nUse 'llm --action enable/disable --provider <name>' to manage providers"
510
+ )
511
+
512
+ return "\n".join(output)
513
+
514
+ def _handle_models(self, provider: Optional[str] = None) -> str:
515
+ """List available models."""
516
+ try:
517
+ all_models = self._get_all_models()
518
+
519
+ if not all_models:
520
+ return "No models available or LiteLLM not properly initialized"
521
+
522
+ output = ["=== Available LLM Models ==="]
523
+
524
+ if provider:
525
+ # Show models for specific provider
526
+ provider_lower = provider.lower()
527
+ models = all_models.get(provider_lower, [])
528
+
529
+ if not models:
530
+ return f"No models found for provider '{provider}'"
531
+
532
+ output.append(f"\n{provider.upper()} ({len(models)} models):")
533
+ output.append("-" * 40)
534
+
535
+ # Show first 50 models
536
+ for model in models[:50]:
537
+ output.append(f" {model}")
538
+
539
+ if len(models) > 50:
540
+ output.append(f" ... and {len(models) - 50} more")
541
+ else:
542
+ # Show summary of all providers
543
+ total_models = sum(len(models) for models in all_models.values())
544
+ output.append(f"Total models available: {total_models}")
545
+ output.append("")
546
+
547
+ # Show providers with counts
548
+ for provider_name, models in sorted(all_models.items()):
549
+ if models:
550
+ available = (
551
+ "✅" if provider_name in self.available_providers else "❌"
552
+ )
553
+ output.append(
554
+ f"{available} {provider_name}: {len(models)} models"
555
+ )
556
+
557
+ output.append(
558
+ "\nUse 'llm --action models --provider <name>' to see specific models"
559
+ )
560
+
561
+ return "\n".join(output)
562
+
563
+ except Exception as e:
564
+ return f"Error listing models: {str(e)}"
565
+
566
+ def _handle_enable(self, provider: Optional[str]) -> str:
567
+ """Enable a provider."""
568
+ if not provider:
569
+ return "Error: provider is required for enable action"
570
+
571
+ provider = provider.lower()
572
+ disabled = self.config.get("disabled_providers", [])
573
+
574
+ if provider in disabled:
575
+ disabled.remove(provider)
576
+ self.config["disabled_providers"] = disabled
577
+ self._save_config()
578
+ return f"Successfully enabled {provider}"
579
+ else:
580
+ return f"{provider} is already enabled"
581
+
582
+ def _handle_disable(self, provider: Optional[str]) -> str:
583
+ """Disable a provider."""
584
+ if not provider:
585
+ return "Error: provider is required for disable action"
586
+
587
+ provider = provider.lower()
588
+ disabled = self.config.get("disabled_providers", [])
589
+
590
+ if provider not in disabled:
591
+ disabled.append(provider)
592
+ self.config["disabled_providers"] = disabled
593
+ self._save_config()
594
+ return f"Successfully disabled {provider}"
595
+ else:
596
+ return f"{provider} is already disabled"
597
+
598
+ async def _handle_test(
599
+ self, tool_ctx, model: Optional[str], provider: Optional[str]
600
+ ) -> str:
601
+ """Test a model or provider."""
602
+ if not model and not provider:
603
+ return "Error: Either model or provider is required for test action"
604
+
605
+ # If provider specified, test its default model
606
+ if provider and not model:
607
+ provider = provider.lower()
608
+ if provider == "openai":
609
+ model = "gpt-3.5-turbo"
610
+ elif provider == "anthropic":
611
+ model = "claude-3-haiku-20240307"
612
+ elif provider == "google":
613
+ model = "gemini/gemini-1.5-flash"
614
+ elif provider == "groq":
615
+ model = "groq/llama3-8b-8192"
616
+ else:
617
+ model = f"{provider}/default"
618
+
619
+ # Test the model
620
+ test_prompt = "Say 'Hello from Hanzo MCP!' in exactly 5 words."
621
+
622
+ try:
623
+ if tool_ctx:
624
+ await tool_ctx.info(f"Testing {model}...")
625
+
626
+ response = await litellm.acompletion(
627
+ model=model,
628
+ messages=[{"role": "user", "content": test_prompt}],
629
+ temperature=0,
630
+ max_tokens=20,
631
+ )
632
+
633
+ result = response.choices[0].message.content
634
+ return f"✅ {model} is working!\nResponse: {result}"
635
+
636
+ except Exception as e:
637
+ return f"❌ {model} failed: {str(e)}"
638
+
639
+ def _get_consensus_models(self, size: int) -> List[str]:
640
+ """Get models for consensus based on availability."""
641
+ # Use configured models if set
642
+ configured = self.config.get("consensus_models")
643
+ if configured:
644
+ return configured[:size]
645
+
646
+ # Otherwise, build list from available providers
647
+ models = []
648
+ disabled = self.config.get("disabled_providers", [])
649
+
650
+ # Try default models first
651
+ for model in self.DEFAULT_CONSENSUS_MODELS:
652
+ if len(models) >= size:
653
+ break
654
+
655
+ provider = self._get_provider_for_model(model)
656
+ if (
657
+ provider
658
+ and provider in self.available_providers
659
+ and provider not in disabled
660
+ ):
661
+ models.append(model)
662
+
663
+ # If still need more, add from available providers
664
+ if len(models) < size:
665
+ for provider in self.available_providers:
666
+ if provider in disabled:
667
+ continue
668
+
669
+ if provider == "openai" and "gpt-4o" not in models:
670
+ models.append("gpt-4o")
671
+ elif provider == "anthropic" and "claude-3-opus-20240229" not in models:
672
+ models.append("claude-3-opus-20240229")
673
+ elif provider == "google" and "gemini/gemini-1.5-pro" not in models:
674
+ models.append("gemini/gemini-1.5-pro")
675
+
676
+ if len(models) >= size:
677
+ break
678
+
679
+ return models
680
+
681
+ async def _query_models_parallel(
682
+ self,
683
+ models: List[str],
684
+ prompt: str,
685
+ system_prompt: Optional[str],
686
+ temperature: float,
687
+ max_tokens: Optional[int],
688
+ tool_ctx,
689
+ ) -> List[Dict[str, Any]]:
690
+ """Query multiple models in parallel."""
691
+
692
+ async def query_with_info(model: str) -> Dict[str, Any]:
693
+ result = await self._query_single_model(
694
+ model, prompt, system_prompt, temperature, max_tokens
695
+ )
696
+ return {
697
+ "model": model,
698
+ "response": result.get("response"),
699
+ "success": result.get("success", False),
700
+ "error": result.get("error"),
701
+ "time_ms": result.get("time_ms", 0),
702
+ }
703
+
704
+ # Run all queries in parallel
705
+ tasks = [query_with_info(model) for model in models]
706
+ results = await asyncio.gather(*tasks)
707
+
708
+ # Report results
709
+ successful = sum(1 for r in results if r["success"])
710
+ if tool_ctx:
711
+ await tool_ctx.info(f"Completed {successful}/{len(models)} model queries")
712
+
713
+ return results
714
+
715
+ async def _query_single_model(
716
+ self,
717
+ model: str,
718
+ prompt: str,
719
+ system_prompt: Optional[str],
720
+ temperature: float,
721
+ max_tokens: Optional[int],
722
+ ) -> Dict[str, Any]:
723
+ """Query a single model and return result with metadata."""
724
+ import time
725
+
726
+ start_time = time.time()
727
+
728
+ try:
729
+ messages = []
730
+ if system_prompt:
731
+ messages.append({"role": "system", "content": system_prompt})
732
+ messages.append({"role": "user", "content": prompt})
733
+
734
+ kwargs = {
735
+ "model": model,
736
+ "messages": messages,
737
+ "temperature": temperature,
738
+ }
739
+ if max_tokens:
740
+ kwargs["max_tokens"] = max_tokens
741
+
742
+ response = await litellm.acompletion(**kwargs)
743
+
744
+ return {
745
+ "success": True,
746
+ "response": response.choices[0].message.content,
747
+ "time_ms": int((time.time() - start_time) * 1000),
748
+ }
749
+
750
+ except Exception as e:
751
+ return {
752
+ "success": False,
753
+ "error": str(e),
754
+ "time_ms": int((time.time() - start_time) * 1000),
755
+ }
756
+
757
+ async def _aggregate_consensus(
758
+ self,
759
+ responses: List[Dict[str, Any]],
760
+ original_prompt: str,
761
+ judge_model: str,
762
+ include_raw: bool,
763
+ devil_response: Optional[Dict[str, Any]],
764
+ tool_ctx,
765
+ ) -> str:
766
+ """Aggregate consensus responses using a judge model."""
767
+ # Prepare response data
768
+ successful_responses = [r for r in responses if r["success"]]
769
+
770
+ if not successful_responses:
771
+ return "Error: All models failed to respond"
772
+
773
+ # Format responses for aggregation
774
+ responses_text = "\n\n".join(
775
+ [
776
+ f"Model: {r['model']}\nResponse: {r['response']}"
777
+ for r in successful_responses
778
+ ]
779
+ )
780
+
781
+ if devil_response:
782
+ responses_text += f"\n\nDevil's Advocate ({devil_response['model']}):\n{devil_response['response']}"
783
+
784
+ # Create aggregation prompt
785
+ aggregation_prompt = f"""Analyze the following responses from multiple AI models to this question:
786
+
787
+ <original_question>
788
+ {original_prompt}
789
+ </original_question>
790
+
791
+ <model_responses>
792
+ {responses_text}
793
+ </model_responses>
794
+
795
+ Please provide:
796
+ 1. A synthesis of the key points where models agree
797
+ 2. Notable differences or disagreements between responses
798
+ 3. A balanced conclusion incorporating the best insights
799
+ {f"4. Evaluation of the devil's advocate critique" if devil_response else ""}
800
+
801
+ Be concise and highlight the most important findings."""
802
+
803
+ # Get aggregation
804
+ try:
805
+ if tool_ctx:
806
+ await tool_ctx.info(f"Aggregating responses with {judge_model}...")
807
+
808
+ judge_result = await self._query_single_model(
809
+ judge_model, aggregation_prompt, None, 0.3, None
810
+ )
811
+
812
+ if not judge_result["success"]:
813
+ return f"Error: Judge model failed: {judge_result.get('error', 'Unknown error')}"
814
+
815
+ # Format output
816
+ output = [
817
+ f"=== Consensus Analysis ({len(successful_responses)} models) ===\n"
818
+ ]
819
+ output.append(judge_result["response"])
820
+
821
+ # Add model list
822
+ output.append(
823
+ f"\nModels consulted: {', '.join([r['model'] for r in successful_responses])}"
824
+ )
825
+ if devil_response:
826
+ output.append(f"Devil's Advocate: {devil_response['model']}")
827
+
828
+ # Add timing info
829
+ avg_time = sum(r["time_ms"] for r in responses) / len(responses)
830
+ output.append(f"\nAverage response time: {avg_time:.0f}ms")
831
+
832
+ # Include raw responses if requested
833
+ if include_raw:
834
+ output.append("\n\n=== Raw Responses ===")
835
+ for r in successful_responses:
836
+ output.append(f"\n{r['model']}:")
837
+ output.append("-" * 40)
838
+ output.append(r["response"])
839
+
840
+ if devil_response:
841
+ output.append(f"\nDevil's Advocate ({devil_response['model']}):")
842
+ output.append("-" * 40)
843
+ output.append(devil_response["response"])
844
+
845
+ return "\n".join(output)
846
+
847
+ except Exception as e:
848
+ return f"Error during aggregation: {str(e)}"
849
+
850
+ def _get_provider_for_model(self, model: str) -> Optional[str]:
851
+ """Determine the provider for a given model."""
852
+ model_lower = model.lower()
853
+
854
+ # Check explicit provider prefix
855
+ if "/" in model:
856
+ return model.split("/")[0]
857
+
858
+ # Check model prefixes
859
+ if model_lower.startswith("gpt"):
860
+ return "openai"
861
+ elif model_lower.startswith("claude"):
862
+ return "anthropic"
863
+ elif model_lower.startswith("gemini"):
864
+ return "google"
865
+ elif model_lower.startswith("command"):
866
+ return "cohere"
867
+
868
+ # Default to OpenAI
869
+ return "openai"
870
+
871
+ def _get_all_models(self) -> Dict[str, List[str]]:
872
+ """Get all available models from LiteLLM."""
873
+ try:
874
+ import litellm
875
+
876
+ # Get all models
877
+ all_models = litellm.model_list
878
+
879
+ # Organize by provider
880
+ providers = {}
881
+
882
+ for model in all_models:
883
+ # Extract provider
884
+ if "/" in model:
885
+ provider = model.split("/")[0]
886
+ elif model.startswith("gpt"):
887
+ provider = "openai"
888
+ elif model.startswith("claude"):
889
+ provider = "anthropic"
890
+ elif model.startswith("gemini"):
891
+ provider = "google"
892
+ elif model.startswith("command"):
893
+ provider = "cohere"
894
+ else:
895
+ provider = "other"
896
+
897
+ if provider not in providers:
898
+ providers[provider] = []
899
+ providers[provider].append(model)
900
+
901
+ # Sort models within each provider
902
+ for provider in providers:
903
+ providers[provider] = sorted(providers[provider])
904
+
905
+ return providers
906
+ except Exception:
907
+ return {}
908
+
909
+ def register(self, mcp_server) -> None:
910
+ """Register this tool with the MCP server."""
911
+ pass