hanzo-mcp 0.5.0__py3-none-any.whl → 0.5.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hanzo-mcp might be problematic. Click here for more details.
- hanzo_mcp/__init__.py +1 -1
- hanzo_mcp/config/settings.py +61 -0
- hanzo_mcp/tools/__init__.py +158 -12
- hanzo_mcp/tools/common/base.py +7 -2
- hanzo_mcp/tools/common/config_tool.py +396 -0
- hanzo_mcp/tools/common/stats.py +261 -0
- hanzo_mcp/tools/common/tool_disable.py +144 -0
- hanzo_mcp/tools/common/tool_enable.py +182 -0
- hanzo_mcp/tools/common/tool_list.py +263 -0
- hanzo_mcp/tools/database/__init__.py +71 -0
- hanzo_mcp/tools/database/database_manager.py +246 -0
- hanzo_mcp/tools/database/graph_add.py +257 -0
- hanzo_mcp/tools/database/graph_query.py +536 -0
- hanzo_mcp/tools/database/graph_remove.py +267 -0
- hanzo_mcp/tools/database/graph_search.py +348 -0
- hanzo_mcp/tools/database/graph_stats.py +345 -0
- hanzo_mcp/tools/database/sql_query.py +229 -0
- hanzo_mcp/tools/database/sql_search.py +296 -0
- hanzo_mcp/tools/database/sql_stats.py +254 -0
- hanzo_mcp/tools/editor/__init__.py +11 -0
- hanzo_mcp/tools/editor/neovim_command.py +272 -0
- hanzo_mcp/tools/editor/neovim_edit.py +290 -0
- hanzo_mcp/tools/editor/neovim_session.py +356 -0
- hanzo_mcp/tools/filesystem/__init__.py +20 -1
- hanzo_mcp/tools/filesystem/batch_search.py +812 -0
- hanzo_mcp/tools/filesystem/find_files.py +348 -0
- hanzo_mcp/tools/filesystem/git_search.py +505 -0
- hanzo_mcp/tools/llm/__init__.py +27 -0
- hanzo_mcp/tools/llm/consensus_tool.py +351 -0
- hanzo_mcp/tools/llm/llm_manage.py +413 -0
- hanzo_mcp/tools/llm/llm_tool.py +346 -0
- hanzo_mcp/tools/llm/provider_tools.py +412 -0
- hanzo_mcp/tools/mcp/__init__.py +11 -0
- hanzo_mcp/tools/mcp/mcp_add.py +263 -0
- hanzo_mcp/tools/mcp/mcp_remove.py +127 -0
- hanzo_mcp/tools/mcp/mcp_stats.py +165 -0
- hanzo_mcp/tools/shell/__init__.py +27 -7
- hanzo_mcp/tools/shell/logs.py +265 -0
- hanzo_mcp/tools/shell/npx.py +194 -0
- hanzo_mcp/tools/shell/npx_background.py +254 -0
- hanzo_mcp/tools/shell/pkill.py +262 -0
- hanzo_mcp/tools/shell/processes.py +279 -0
- hanzo_mcp/tools/shell/run_background.py +326 -0
- hanzo_mcp/tools/shell/uvx.py +187 -0
- hanzo_mcp/tools/shell/uvx_background.py +249 -0
- hanzo_mcp/tools/vector/__init__.py +21 -12
- hanzo_mcp/tools/vector/ast_analyzer.py +459 -0
- hanzo_mcp/tools/vector/git_ingester.py +485 -0
- hanzo_mcp/tools/vector/index_tool.py +358 -0
- hanzo_mcp/tools/vector/infinity_store.py +465 -1
- hanzo_mcp/tools/vector/mock_infinity.py +162 -0
- hanzo_mcp/tools/vector/vector_index.py +7 -6
- hanzo_mcp/tools/vector/vector_search.py +22 -7
- {hanzo_mcp-0.5.0.dist-info → hanzo_mcp-0.5.2.dist-info}/METADATA +68 -20
- hanzo_mcp-0.5.2.dist-info/RECORD +106 -0
- hanzo_mcp-0.5.0.dist-info/RECORD +0 -63
- {hanzo_mcp-0.5.0.dist-info → hanzo_mcp-0.5.2.dist-info}/WHEEL +0 -0
- {hanzo_mcp-0.5.0.dist-info → hanzo_mcp-0.5.2.dist-info}/entry_points.txt +0 -0
- {hanzo_mcp-0.5.0.dist-info → hanzo_mcp-0.5.2.dist-info}/licenses/LICENSE +0 -0
- {hanzo_mcp-0.5.0.dist-info → hanzo_mcp-0.5.2.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,351 @@
|
|
|
1
|
+
"""Consensus tool for querying multiple LLMs in parallel."""
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import json
|
|
5
|
+
from typing import Annotated, Optional, TypedDict, Unpack, final, override, List, Dict, Any
|
|
6
|
+
from datetime import datetime
|
|
7
|
+
|
|
8
|
+
from fastmcp import Context as MCPContext
|
|
9
|
+
from pydantic import Field
|
|
10
|
+
|
|
11
|
+
from hanzo_mcp.tools.common.base import BaseTool
|
|
12
|
+
from hanzo_mcp.tools.common.context import create_tool_context
|
|
13
|
+
from hanzo_mcp.tools.llm.llm_tool import LLMTool
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
Prompt = Annotated[
|
|
17
|
+
str,
|
|
18
|
+
Field(
|
|
19
|
+
description="The prompt to send to all models",
|
|
20
|
+
min_length=1,
|
|
21
|
+
),
|
|
22
|
+
]
|
|
23
|
+
|
|
24
|
+
Models = Annotated[
|
|
25
|
+
Optional[List[str]],
|
|
26
|
+
Field(
|
|
27
|
+
description="List of models to query (defaults to a diverse set)",
|
|
28
|
+
default=None,
|
|
29
|
+
),
|
|
30
|
+
]
|
|
31
|
+
|
|
32
|
+
SystemPrompt = Annotated[
|
|
33
|
+
Optional[str],
|
|
34
|
+
Field(
|
|
35
|
+
description="System prompt for all models",
|
|
36
|
+
default=None,
|
|
37
|
+
),
|
|
38
|
+
]
|
|
39
|
+
|
|
40
|
+
Temperature = Annotated[
|
|
41
|
+
float,
|
|
42
|
+
Field(
|
|
43
|
+
description="Temperature for all models",
|
|
44
|
+
default=0.7,
|
|
45
|
+
),
|
|
46
|
+
]
|
|
47
|
+
|
|
48
|
+
MaxTokens = Annotated[
|
|
49
|
+
Optional[int],
|
|
50
|
+
Field(
|
|
51
|
+
description="Maximum tokens per response",
|
|
52
|
+
default=None,
|
|
53
|
+
),
|
|
54
|
+
]
|
|
55
|
+
|
|
56
|
+
AggregationModel = Annotated[
|
|
57
|
+
Optional[str],
|
|
58
|
+
Field(
|
|
59
|
+
description="Model to use for aggregating responses (defaults to gpt-4)",
|
|
60
|
+
default="gpt-4",
|
|
61
|
+
),
|
|
62
|
+
]
|
|
63
|
+
|
|
64
|
+
IncludeRaw = Annotated[
|
|
65
|
+
bool,
|
|
66
|
+
Field(
|
|
67
|
+
description="Include raw responses from each model",
|
|
68
|
+
default=False,
|
|
69
|
+
),
|
|
70
|
+
]
|
|
71
|
+
|
|
72
|
+
Timeout = Annotated[
|
|
73
|
+
int,
|
|
74
|
+
Field(
|
|
75
|
+
description="Timeout in seconds for each model",
|
|
76
|
+
default=30,
|
|
77
|
+
),
|
|
78
|
+
]
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
class ConsensusToolParams(TypedDict, total=False):
|
|
82
|
+
"""Parameters for consensus tool."""
|
|
83
|
+
|
|
84
|
+
prompt: str
|
|
85
|
+
models: Optional[List[str]]
|
|
86
|
+
system_prompt: Optional[str]
|
|
87
|
+
temperature: float
|
|
88
|
+
max_tokens: Optional[int]
|
|
89
|
+
aggregation_model: Optional[str]
|
|
90
|
+
include_raw: bool
|
|
91
|
+
timeout: int
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
@final
|
|
95
|
+
class ConsensusTool(BaseTool):
|
|
96
|
+
"""Tool for getting consensus from multiple LLMs."""
|
|
97
|
+
|
|
98
|
+
# Default models to use if none specified - mix of fast and powerful models
|
|
99
|
+
DEFAULT_MODELS = [
|
|
100
|
+
"gpt-4o-mini", # OpenAI's fast model
|
|
101
|
+
"claude-3-opus-20240229", # Claude's most capable model
|
|
102
|
+
"gemini/gemini-1.5-pro", # Google's largest model
|
|
103
|
+
"groq/llama3-70b-8192", # Fast inference via Groq
|
|
104
|
+
"mistral/mistral-large-latest", # Mistral's best model
|
|
105
|
+
]
|
|
106
|
+
|
|
107
|
+
def __init__(self):
|
|
108
|
+
"""Initialize the consensus tool."""
|
|
109
|
+
self.llm_tool = LLMTool()
|
|
110
|
+
|
|
111
|
+
@property
|
|
112
|
+
@override
|
|
113
|
+
def name(self) -> str:
|
|
114
|
+
"""Get the tool name."""
|
|
115
|
+
return "consensus"
|
|
116
|
+
|
|
117
|
+
@property
|
|
118
|
+
@override
|
|
119
|
+
def description(self) -> str:
|
|
120
|
+
"""Get the tool description."""
|
|
121
|
+
available_providers = list(self.llm_tool.available_providers.keys())
|
|
122
|
+
|
|
123
|
+
return f"""Query multiple LLMs in parallel and get a consensus response.
|
|
124
|
+
|
|
125
|
+
Queries multiple models simultaneously, then uses another model to
|
|
126
|
+
synthesize and analyze the responses for consensus, disagreements, and insights.
|
|
127
|
+
|
|
128
|
+
Available providers: {', '.join(available_providers)}
|
|
129
|
+
|
|
130
|
+
Default models (if available):
|
|
131
|
+
- GPT-4 (OpenAI)
|
|
132
|
+
- Claude 3 Sonnet (Anthropic)
|
|
133
|
+
- Gemini Pro (Google)
|
|
134
|
+
- Mixtral 8x7B (Groq)
|
|
135
|
+
- Mistral Medium (Mistral)
|
|
136
|
+
|
|
137
|
+
Examples:
|
|
138
|
+
- consensus --prompt "What are the key principles of good software design?"
|
|
139
|
+
- consensus --prompt "Analyze this code for security issues" --models '["gpt-4", "claude-3-opus-20240229"]'
|
|
140
|
+
- consensus --prompt "Is this implementation correct?" --include-raw
|
|
141
|
+
- consensus --prompt "What's the best approach?" --aggregation-model "claude-3-opus-20240229"
|
|
142
|
+
|
|
143
|
+
The tool will:
|
|
144
|
+
1. Query all specified models in parallel
|
|
145
|
+
2. Collect and analyze responses
|
|
146
|
+
3. Use the aggregation model to synthesize findings
|
|
147
|
+
4. Highlight areas of agreement and disagreement
|
|
148
|
+
5. Provide a balanced consensus view
|
|
149
|
+
"""
|
|
150
|
+
|
|
151
|
+
@override
|
|
152
|
+
async def call(
|
|
153
|
+
self,
|
|
154
|
+
ctx: MCPContext,
|
|
155
|
+
**params: Unpack[ConsensusToolParams],
|
|
156
|
+
) -> str:
|
|
157
|
+
"""Get consensus from multiple LLMs.
|
|
158
|
+
|
|
159
|
+
Args:
|
|
160
|
+
ctx: MCP context
|
|
161
|
+
**params: Tool parameters
|
|
162
|
+
|
|
163
|
+
Returns:
|
|
164
|
+
Consensus analysis
|
|
165
|
+
"""
|
|
166
|
+
tool_ctx = create_tool_context(ctx)
|
|
167
|
+
await tool_ctx.set_tool_info(self.name)
|
|
168
|
+
|
|
169
|
+
# Extract parameters
|
|
170
|
+
prompt = params.get("prompt")
|
|
171
|
+
if not prompt:
|
|
172
|
+
return "Error: prompt is required"
|
|
173
|
+
|
|
174
|
+
models = params.get("models") or self.DEFAULT_MODELS
|
|
175
|
+
system_prompt = params.get("system_prompt")
|
|
176
|
+
temperature = params.get("temperature", 0.7)
|
|
177
|
+
max_tokens = params.get("max_tokens")
|
|
178
|
+
aggregation_model = params.get("aggregation_model", "gpt-4")
|
|
179
|
+
include_raw = params.get("include_raw", False)
|
|
180
|
+
timeout = params.get("timeout", 30)
|
|
181
|
+
|
|
182
|
+
# Filter models to only those with available API keys
|
|
183
|
+
available_models = []
|
|
184
|
+
skipped_models = []
|
|
185
|
+
|
|
186
|
+
for model in models:
|
|
187
|
+
provider = self.llm_tool._get_provider_for_model(model)
|
|
188
|
+
if provider in self.llm_tool.available_providers:
|
|
189
|
+
available_models.append(model)
|
|
190
|
+
else:
|
|
191
|
+
skipped_models.append((model, provider))
|
|
192
|
+
|
|
193
|
+
if not available_models:
|
|
194
|
+
return "Error: No models available with configured API keys. Please set API keys for at least one provider."
|
|
195
|
+
|
|
196
|
+
await tool_ctx.info(f"Querying {len(available_models)} models in parallel...")
|
|
197
|
+
|
|
198
|
+
if skipped_models:
|
|
199
|
+
skipped_info = ", ".join([f"{m[0]} ({m[1]})" for m in skipped_models])
|
|
200
|
+
await tool_ctx.info(f"Skipping models without API keys: {skipped_info}")
|
|
201
|
+
|
|
202
|
+
# Query all models in parallel
|
|
203
|
+
results = await self._query_models_parallel(
|
|
204
|
+
available_models, prompt, system_prompt,
|
|
205
|
+
temperature, max_tokens, timeout
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
# Prepare summary of results
|
|
209
|
+
successful_responses = [(m, r) for m, r in results.items() if not r.startswith("Error:")]
|
|
210
|
+
failed_responses = [(m, r) for m, r in results.items() if r.startswith("Error:")]
|
|
211
|
+
|
|
212
|
+
if not successful_responses:
|
|
213
|
+
return "Error: All model queries failed:\n\n" + "\n".join([f"{m}: {r}" for m, r in failed_responses])
|
|
214
|
+
|
|
215
|
+
# Use aggregation model to synthesize responses
|
|
216
|
+
consensus = await self._aggregate_responses(
|
|
217
|
+
successful_responses, prompt, aggregation_model
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
# Format output
|
|
221
|
+
output = ["=== LLM Consensus Analysis ==="]
|
|
222
|
+
output.append(f"Query: {prompt}")
|
|
223
|
+
output.append(f"Models queried: {len(available_models)}")
|
|
224
|
+
output.append(f"Successful responses: {len(successful_responses)}")
|
|
225
|
+
|
|
226
|
+
if failed_responses:
|
|
227
|
+
output.append(f"Failed responses: {len(failed_responses)}")
|
|
228
|
+
|
|
229
|
+
output.append("")
|
|
230
|
+
output.append("=== Consensus Summary ===")
|
|
231
|
+
output.append(consensus)
|
|
232
|
+
|
|
233
|
+
if include_raw:
|
|
234
|
+
output.append("\n=== Individual Responses ===")
|
|
235
|
+
for model, response in successful_responses:
|
|
236
|
+
output.append(f"\n--- {model} ---")
|
|
237
|
+
output.append(response[:500] + "..." if len(response) > 500 else response)
|
|
238
|
+
|
|
239
|
+
if failed_responses:
|
|
240
|
+
output.append("\n=== Failed Queries ===")
|
|
241
|
+
for model, error in failed_responses:
|
|
242
|
+
output.append(f"{model}: {error}")
|
|
243
|
+
|
|
244
|
+
return "\n".join(output)
|
|
245
|
+
|
|
246
|
+
async def _query_models_parallel(
|
|
247
|
+
self, models: List[str], prompt: str,
|
|
248
|
+
system_prompt: Optional[str], temperature: float,
|
|
249
|
+
max_tokens: Optional[int], timeout: int
|
|
250
|
+
) -> Dict[str, str]:
|
|
251
|
+
"""Query multiple models in parallel."""
|
|
252
|
+
async def query_with_timeout(model: str) -> tuple[str, str]:
|
|
253
|
+
try:
|
|
254
|
+
params = {
|
|
255
|
+
"model": model,
|
|
256
|
+
"prompt": prompt,
|
|
257
|
+
"temperature": temperature,
|
|
258
|
+
}
|
|
259
|
+
if system_prompt:
|
|
260
|
+
params["system_prompt"] = system_prompt
|
|
261
|
+
if max_tokens:
|
|
262
|
+
params["max_tokens"] = max_tokens
|
|
263
|
+
|
|
264
|
+
# Create a mock context for the LLM tool
|
|
265
|
+
mock_ctx = type('MockContext', (), {'client': None})()
|
|
266
|
+
|
|
267
|
+
result = await asyncio.wait_for(
|
|
268
|
+
self.llm_tool.call(mock_ctx, **params),
|
|
269
|
+
timeout=timeout
|
|
270
|
+
)
|
|
271
|
+
return (model, result)
|
|
272
|
+
except asyncio.TimeoutError:
|
|
273
|
+
return (model, f"Error: Timeout after {timeout} seconds")
|
|
274
|
+
except Exception as e:
|
|
275
|
+
return (model, f"Error: {str(e)}")
|
|
276
|
+
|
|
277
|
+
# Run all queries in parallel
|
|
278
|
+
tasks = [query_with_timeout(model) for model in models]
|
|
279
|
+
results = await asyncio.gather(*tasks)
|
|
280
|
+
|
|
281
|
+
return dict(results)
|
|
282
|
+
|
|
283
|
+
async def _aggregate_responses(
|
|
284
|
+
self, responses: List[tuple[str, str]],
|
|
285
|
+
original_prompt: str, aggregation_model: str
|
|
286
|
+
) -> str:
|
|
287
|
+
"""Use an LLM to aggregate and analyze responses."""
|
|
288
|
+
# Prepare the aggregation prompt
|
|
289
|
+
response_summary = "\n\n".join([
|
|
290
|
+
f"Model: {model}\nResponse: {response}"
|
|
291
|
+
for model, response in responses
|
|
292
|
+
])
|
|
293
|
+
|
|
294
|
+
aggregation_prompt = f"""You are analyzing responses from multiple AI models to the following prompt:
|
|
295
|
+
|
|
296
|
+
<original_prompt>
|
|
297
|
+
{original_prompt}
|
|
298
|
+
</original_prompt>
|
|
299
|
+
|
|
300
|
+
<model_responses>
|
|
301
|
+
{response_summary}
|
|
302
|
+
</model_responses>
|
|
303
|
+
|
|
304
|
+
Please provide a comprehensive analysis that includes:
|
|
305
|
+
|
|
306
|
+
1. **Consensus Points**: What do most or all models agree on?
|
|
307
|
+
2. **Divergent Views**: Where do the models disagree or offer different perspectives?
|
|
308
|
+
3. **Key Insights**: What are the most valuable insights across all responses?
|
|
309
|
+
4. **Unique Contributions**: Did any model provide unique valuable information?
|
|
310
|
+
5. **Synthesis**: Provide a balanced, synthesized answer that incorporates the best elements from all responses.
|
|
311
|
+
|
|
312
|
+
Be concise but thorough. Focus on providing actionable insights."""
|
|
313
|
+
|
|
314
|
+
try:
|
|
315
|
+
# Use the LLM tool to get the aggregation
|
|
316
|
+
mock_ctx = type('MockContext', (), {'client': None})()
|
|
317
|
+
|
|
318
|
+
aggregation_params = {
|
|
319
|
+
"model": aggregation_model,
|
|
320
|
+
"prompt": aggregation_prompt,
|
|
321
|
+
"temperature": 0.3, # Lower temperature for more consistent analysis
|
|
322
|
+
"system_prompt": "You are an expert at analyzing and synthesizing multiple AI responses to provide balanced, insightful consensus."
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
result = await self.llm_tool.call(mock_ctx, **aggregation_params)
|
|
326
|
+
return result
|
|
327
|
+
|
|
328
|
+
except Exception as e:
|
|
329
|
+
# Fallback to simple aggregation if LLM fails
|
|
330
|
+
return self._simple_aggregate(responses)
|
|
331
|
+
|
|
332
|
+
def _simple_aggregate(self, responses: List[tuple[str, str]]) -> str:
|
|
333
|
+
"""Simple fallback aggregation without LLM."""
|
|
334
|
+
output = []
|
|
335
|
+
output.append("Summary of responses:")
|
|
336
|
+
output.append("")
|
|
337
|
+
|
|
338
|
+
# Find common themes (very basic)
|
|
339
|
+
all_text = " ".join([r[1] for r in responses]).lower()
|
|
340
|
+
|
|
341
|
+
output.append("Response lengths:")
|
|
342
|
+
for model, response in responses:
|
|
343
|
+
output.append(f"- {model}: {len(response)} characters")
|
|
344
|
+
|
|
345
|
+
output.append("\nNote: Advanced consensus analysis unavailable. Showing basic summary only.")
|
|
346
|
+
|
|
347
|
+
return "\n".join(output)
|
|
348
|
+
|
|
349
|
+
def register(self, mcp_server) -> None:
|
|
350
|
+
"""Register this tool with the MCP server."""
|
|
351
|
+
pass
|