hanzo-mcp 0.5.0__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hanzo-mcp might be problematic. Click here for more details.
- hanzo_mcp/__init__.py +1 -1
- hanzo_mcp/config/settings.py +61 -0
- hanzo_mcp/tools/__init__.py +26 -11
- hanzo_mcp/tools/common/config_tool.py +396 -0
- hanzo_mcp/tools/filesystem/__init__.py +10 -1
- hanzo_mcp/tools/filesystem/unified_search.py +689 -0
- hanzo_mcp/tools/vector/__init__.py +16 -12
- hanzo_mcp/tools/vector/ast_analyzer.py +459 -0
- hanzo_mcp/tools/vector/git_ingester.py +482 -0
- hanzo_mcp/tools/vector/infinity_store.py +367 -1
- hanzo_mcp/tools/vector/mock_infinity.py +162 -0
- hanzo_mcp/tools/vector/vector_index.py +7 -6
- hanzo_mcp/tools/vector/vector_search.py +11 -1
- {hanzo_mcp-0.5.0.dist-info → hanzo_mcp-0.5.1.dist-info}/METADATA +68 -20
- {hanzo_mcp-0.5.0.dist-info → hanzo_mcp-0.5.1.dist-info}/RECORD +19 -14
- {hanzo_mcp-0.5.0.dist-info → hanzo_mcp-0.5.1.dist-info}/WHEEL +0 -0
- {hanzo_mcp-0.5.0.dist-info → hanzo_mcp-0.5.1.dist-info}/entry_points.txt +0 -0
- {hanzo_mcp-0.5.0.dist-info → hanzo_mcp-0.5.1.dist-info}/licenses/LICENSE +0 -0
- {hanzo_mcp-0.5.0.dist-info → hanzo_mcp-0.5.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,689 @@
|
|
|
1
|
+
"""Unified search tool that combines grep, vector, AST, and semantic search.
|
|
2
|
+
|
|
3
|
+
This tool provides an intelligent multi-search approach that:
|
|
4
|
+
1. Always starts with fast grep/regex search
|
|
5
|
+
2. Enhances with vector similarity, AST context, and symbol search
|
|
6
|
+
3. Returns comprehensive results with function/method context
|
|
7
|
+
4. Optimizes performance through intelligent caching and batching
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import asyncio
|
|
11
|
+
import json
|
|
12
|
+
import re
|
|
13
|
+
from dataclasses import dataclass, asdict
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
from typing import Dict, List, Optional, Set, Tuple, Any, Union
|
|
16
|
+
from enum import Enum
|
|
17
|
+
|
|
18
|
+
from fastmcp import Context as MCPContext
|
|
19
|
+
from fastmcp import FastMCP
|
|
20
|
+
from pydantic import Field
|
|
21
|
+
from typing_extensions import Annotated, TypedDict, Unpack, final, override
|
|
22
|
+
|
|
23
|
+
from hanzo_mcp.tools.filesystem.base import FilesystemBaseTool
|
|
24
|
+
from hanzo_mcp.tools.filesystem.grep import Grep
|
|
25
|
+
from hanzo_mcp.tools.filesystem.grep_ast_tool import GrepAstTool
|
|
26
|
+
from hanzo_mcp.tools.vector.vector_search import VectorSearchTool
|
|
27
|
+
from hanzo_mcp.tools.vector.ast_analyzer import ASTAnalyzer, Symbol
|
|
28
|
+
from hanzo_mcp.tools.common.permissions import PermissionManager
|
|
29
|
+
from hanzo_mcp.tools.vector.project_manager import ProjectVectorManager
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class SearchType(Enum):
|
|
33
|
+
"""Types of searches that can be performed."""
|
|
34
|
+
GREP = "grep"
|
|
35
|
+
VECTOR = "vector"
|
|
36
|
+
AST = "ast"
|
|
37
|
+
SYMBOL = "symbol"
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@dataclass
|
|
41
|
+
class SearchResult:
|
|
42
|
+
"""Unified search result combining different search types."""
|
|
43
|
+
file_path: str
|
|
44
|
+
line_number: Optional[int]
|
|
45
|
+
content: str
|
|
46
|
+
search_type: SearchType
|
|
47
|
+
score: float # Relevance score (0-1)
|
|
48
|
+
context: Optional[str] = None # AST/function context
|
|
49
|
+
symbol_info: Optional[Symbol] = None
|
|
50
|
+
project: Optional[str] = None
|
|
51
|
+
|
|
52
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
53
|
+
"""Convert to dictionary for JSON serialization."""
|
|
54
|
+
result = asdict(self)
|
|
55
|
+
result['search_type'] = self.search_type.value
|
|
56
|
+
if self.symbol_info:
|
|
57
|
+
result['symbol_info'] = asdict(self.symbol_info)
|
|
58
|
+
return result
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
@dataclass
|
|
62
|
+
class UnifiedSearchResults:
|
|
63
|
+
"""Container for all unified search results."""
|
|
64
|
+
query: str
|
|
65
|
+
total_results: int
|
|
66
|
+
results_by_type: Dict[SearchType, List[SearchResult]]
|
|
67
|
+
combined_results: List[SearchResult]
|
|
68
|
+
search_time_ms: float
|
|
69
|
+
|
|
70
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
71
|
+
"""Convert to dictionary for JSON serialization."""
|
|
72
|
+
return {
|
|
73
|
+
'query': self.query,
|
|
74
|
+
'total_results': self.total_results,
|
|
75
|
+
'results_by_type': {k.value: [r.to_dict() for r in v] for k, v in self.results_by_type.items()},
|
|
76
|
+
'combined_results': [r.to_dict() for r in self.combined_results],
|
|
77
|
+
'search_time_ms': self.search_time_ms,
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
Pattern = Annotated[str, Field(description="The search pattern/query", min_length=1)]
|
|
82
|
+
SearchPath = Annotated[str, Field(description="Path to search in", default=".")]
|
|
83
|
+
Include = Annotated[str, Field(description="File pattern to include", default="*")]
|
|
84
|
+
MaxResults = Annotated[int, Field(description="Maximum results per search type", default=20)]
|
|
85
|
+
EnableVector = Annotated[bool, Field(description="Enable vector/semantic search", default=True)]
|
|
86
|
+
EnableAST = Annotated[bool, Field(description="Enable AST context search", default=True)]
|
|
87
|
+
EnableSymbol = Annotated[bool, Field(description="Enable symbol search", default=True)]
|
|
88
|
+
IncludeContext = Annotated[bool, Field(description="Include function/method context", default=True)]
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
class UnifiedSearchParams(TypedDict):
|
|
92
|
+
"""Parameters for unified search."""
|
|
93
|
+
pattern: Pattern
|
|
94
|
+
path: SearchPath
|
|
95
|
+
include: Include
|
|
96
|
+
max_results: MaxResults
|
|
97
|
+
enable_vector: EnableVector
|
|
98
|
+
enable_ast: EnableAST
|
|
99
|
+
enable_symbol: EnableSymbol
|
|
100
|
+
include_context: IncludeContext
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
@final
|
|
104
|
+
class UnifiedSearchTool(FilesystemBaseTool):
|
|
105
|
+
"""Unified search tool combining multiple search strategies."""
|
|
106
|
+
|
|
107
|
+
def __init__(self, permission_manager: PermissionManager,
|
|
108
|
+
project_manager: Optional[ProjectVectorManager] = None):
|
|
109
|
+
"""Initialize the unified search tool."""
|
|
110
|
+
super().__init__(permission_manager)
|
|
111
|
+
self.project_manager = project_manager
|
|
112
|
+
|
|
113
|
+
# Initialize component search tools
|
|
114
|
+
self.grep_tool = Grep(permission_manager)
|
|
115
|
+
self.grep_ast_tool = GrepAstTool(permission_manager)
|
|
116
|
+
self.ast_analyzer = ASTAnalyzer()
|
|
117
|
+
|
|
118
|
+
# Vector search is optional
|
|
119
|
+
self.vector_tool = None
|
|
120
|
+
if project_manager:
|
|
121
|
+
self.vector_tool = VectorSearchTool(permission_manager, project_manager)
|
|
122
|
+
|
|
123
|
+
# Cache for AST analysis results
|
|
124
|
+
self._ast_cache: Dict[str, Any] = {}
|
|
125
|
+
self._symbol_cache: Dict[str, List[Symbol]] = {}
|
|
126
|
+
|
|
127
|
+
@property
|
|
128
|
+
@override
|
|
129
|
+
def name(self) -> str:
|
|
130
|
+
"""Get the tool name."""
|
|
131
|
+
return "unified_search"
|
|
132
|
+
|
|
133
|
+
@property
|
|
134
|
+
@override
|
|
135
|
+
def description(self) -> str:
|
|
136
|
+
"""Get the tool description."""
|
|
137
|
+
return """Intelligent unified search combining grep, vector similarity, AST context, and symbol search.
|
|
138
|
+
|
|
139
|
+
This tool provides the most comprehensive search experience by:
|
|
140
|
+
1. Starting with fast grep/regex search for immediate results
|
|
141
|
+
2. Enhancing with vector similarity for semantic matches
|
|
142
|
+
3. Adding AST context to show structural information
|
|
143
|
+
4. Including symbol search for code definitions
|
|
144
|
+
5. Providing function/method body context when relevant
|
|
145
|
+
|
|
146
|
+
The tool intelligently combines results and provides relevance scoring across all search types.
|
|
147
|
+
Use this when you need comprehensive search results or aren't sure which search type is best."""
|
|
148
|
+
|
|
149
|
+
def _detect_search_intent(self, pattern: str) -> Tuple[bool, bool, bool]:
|
|
150
|
+
"""Analyze pattern to determine which search types to enable.
|
|
151
|
+
|
|
152
|
+
Returns:
|
|
153
|
+
Tuple of (should_use_vector, should_use_ast, should_use_symbol)
|
|
154
|
+
"""
|
|
155
|
+
# Default to all enabled
|
|
156
|
+
use_vector = True
|
|
157
|
+
use_ast = True
|
|
158
|
+
use_symbol = True
|
|
159
|
+
|
|
160
|
+
# If pattern looks like regex, focus on text search
|
|
161
|
+
regex_indicators = ['.*', '\\w', '\\d', '\\s', '[', ']', '(', ')', '|', '^', '$']
|
|
162
|
+
if any(indicator in pattern for indicator in regex_indicators):
|
|
163
|
+
use_vector = False # Regex patterns don't work well with vector search
|
|
164
|
+
|
|
165
|
+
# If pattern looks like a function/class name, prioritize symbol search
|
|
166
|
+
if re.match(r'^[a-zA-Z_][a-zA-Z0-9_]*$', pattern):
|
|
167
|
+
use_symbol = True
|
|
168
|
+
use_ast = True
|
|
169
|
+
|
|
170
|
+
# If pattern contains natural language, prioritize vector search
|
|
171
|
+
words = pattern.split()
|
|
172
|
+
if len(words) > 2 and not any(c in pattern for c in ['(', ')', '{', '}', '[', ']']):
|
|
173
|
+
use_vector = True
|
|
174
|
+
|
|
175
|
+
return use_vector, use_ast, use_symbol
|
|
176
|
+
|
|
177
|
+
async def _run_grep_search(self, pattern: str, path: str, include: str,
|
|
178
|
+
tool_ctx, max_results: int) -> List[SearchResult]:
|
|
179
|
+
"""Run grep search and convert results."""
|
|
180
|
+
await tool_ctx.info(f"Running grep search for: {pattern}")
|
|
181
|
+
|
|
182
|
+
try:
|
|
183
|
+
# Use the existing grep tool
|
|
184
|
+
grep_result = await self.grep_tool.call(
|
|
185
|
+
tool_ctx.mcp_context,
|
|
186
|
+
pattern=pattern,
|
|
187
|
+
path=path,
|
|
188
|
+
include=include
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
results = []
|
|
192
|
+
if "Found" in grep_result and "matches" in grep_result:
|
|
193
|
+
# Parse grep results
|
|
194
|
+
lines = grep_result.split('\n')
|
|
195
|
+
for line in lines[2:]: # Skip header lines
|
|
196
|
+
if ':' in line and len(line.strip()) > 0:
|
|
197
|
+
try:
|
|
198
|
+
parts = line.split(':', 2)
|
|
199
|
+
if len(parts) >= 3:
|
|
200
|
+
file_path = parts[0]
|
|
201
|
+
line_num = int(parts[1])
|
|
202
|
+
content = parts[2].strip()
|
|
203
|
+
|
|
204
|
+
result = SearchResult(
|
|
205
|
+
file_path=file_path,
|
|
206
|
+
line_number=line_num,
|
|
207
|
+
content=content,
|
|
208
|
+
search_type=SearchType.GREP,
|
|
209
|
+
score=1.0, # Grep results are exact matches
|
|
210
|
+
)
|
|
211
|
+
results.append(result)
|
|
212
|
+
|
|
213
|
+
if len(results) >= max_results:
|
|
214
|
+
break
|
|
215
|
+
except (ValueError, IndexError):
|
|
216
|
+
continue
|
|
217
|
+
|
|
218
|
+
await tool_ctx.info(f"Grep search found {len(results)} results")
|
|
219
|
+
return results
|
|
220
|
+
|
|
221
|
+
except Exception as e:
|
|
222
|
+
await tool_ctx.error(f"Grep search failed: {str(e)}")
|
|
223
|
+
return []
|
|
224
|
+
|
|
225
|
+
async def _run_vector_search(self, pattern: str, path: str, tool_ctx,
|
|
226
|
+
max_results: int) -> List[SearchResult]:
|
|
227
|
+
"""Run vector search and convert results."""
|
|
228
|
+
if not self.vector_tool:
|
|
229
|
+
return []
|
|
230
|
+
|
|
231
|
+
await tool_ctx.info(f"Running vector search for: {pattern}")
|
|
232
|
+
|
|
233
|
+
try:
|
|
234
|
+
# Determine search scope based on path
|
|
235
|
+
if path == ".":
|
|
236
|
+
search_scope = "current"
|
|
237
|
+
else:
|
|
238
|
+
search_scope = "all" # Could be enhanced to detect project
|
|
239
|
+
|
|
240
|
+
vector_result = await self.vector_tool.call(
|
|
241
|
+
tool_ctx.mcp_context,
|
|
242
|
+
query=pattern,
|
|
243
|
+
limit=max_results,
|
|
244
|
+
score_threshold=0.3,
|
|
245
|
+
search_scope=search_scope,
|
|
246
|
+
include_content=True
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
results = []
|
|
250
|
+
# Parse vector search results - this would need to be enhanced
|
|
251
|
+
# based on the actual format returned by vector_tool
|
|
252
|
+
if "Found" in vector_result:
|
|
253
|
+
# This is a simplified parser - would need to match actual format
|
|
254
|
+
lines = vector_result.split('\n')
|
|
255
|
+
current_file = None
|
|
256
|
+
current_score = 0.0
|
|
257
|
+
|
|
258
|
+
for line in lines:
|
|
259
|
+
if "Result" in line and "Score:" in line:
|
|
260
|
+
# Extract score
|
|
261
|
+
score_match = re.search(r'Score: ([\d.]+)%', line)
|
|
262
|
+
if score_match:
|
|
263
|
+
current_score = float(score_match.group(1)) / 100.0
|
|
264
|
+
|
|
265
|
+
# Extract file path
|
|
266
|
+
if " - " in line:
|
|
267
|
+
parts = line.split(" - ")
|
|
268
|
+
if len(parts) > 1:
|
|
269
|
+
current_file = parts[-1].strip()
|
|
270
|
+
|
|
271
|
+
elif current_file and line.strip() and not line.startswith('-'):
|
|
272
|
+
# This is content
|
|
273
|
+
result = SearchResult(
|
|
274
|
+
file_path=current_file,
|
|
275
|
+
line_number=None,
|
|
276
|
+
content=line.strip(),
|
|
277
|
+
search_type=SearchType.VECTOR,
|
|
278
|
+
score=current_score,
|
|
279
|
+
)
|
|
280
|
+
results.append(result)
|
|
281
|
+
|
|
282
|
+
if len(results) >= max_results:
|
|
283
|
+
break
|
|
284
|
+
|
|
285
|
+
await tool_ctx.info(f"Vector search found {len(results)} results")
|
|
286
|
+
return results
|
|
287
|
+
|
|
288
|
+
except Exception as e:
|
|
289
|
+
await tool_ctx.error(f"Vector search failed: {str(e)}")
|
|
290
|
+
return []
|
|
291
|
+
|
|
292
|
+
async def _run_ast_search(self, pattern: str, path: str, include: str,
|
|
293
|
+
tool_ctx, max_results: int) -> List[SearchResult]:
|
|
294
|
+
"""Run AST-aware search and convert results."""
|
|
295
|
+
await tool_ctx.info(f"Running AST search for: {pattern}")
|
|
296
|
+
|
|
297
|
+
try:
|
|
298
|
+
ast_result = await self.grep_ast_tool.call(
|
|
299
|
+
tool_ctx.mcp_context,
|
|
300
|
+
pattern=pattern,
|
|
301
|
+
path=path,
|
|
302
|
+
ignore_case=False,
|
|
303
|
+
line_number=True
|
|
304
|
+
)
|
|
305
|
+
|
|
306
|
+
results = []
|
|
307
|
+
if ast_result and not ast_result.startswith("No matches"):
|
|
308
|
+
# Parse AST results - they include structural context
|
|
309
|
+
current_file = None
|
|
310
|
+
context_lines = []
|
|
311
|
+
|
|
312
|
+
for line in ast_result.split('\n'):
|
|
313
|
+
if line.endswith(':') and '/' in line:
|
|
314
|
+
# This is a file header
|
|
315
|
+
current_file = line[:-1]
|
|
316
|
+
context_lines = []
|
|
317
|
+
elif current_file and line.strip():
|
|
318
|
+
if ':' in line and line.strip()[0].isdigit():
|
|
319
|
+
# This looks like a line with number
|
|
320
|
+
try:
|
|
321
|
+
parts = line.split(':', 1)
|
|
322
|
+
line_num = int(parts[0].strip())
|
|
323
|
+
content = parts[1].strip() if len(parts) > 1 else ""
|
|
324
|
+
|
|
325
|
+
result = SearchResult(
|
|
326
|
+
file_path=current_file,
|
|
327
|
+
line_number=line_num,
|
|
328
|
+
content=content,
|
|
329
|
+
search_type=SearchType.AST,
|
|
330
|
+
score=0.9, # High score for AST matches
|
|
331
|
+
context='\n'.join(context_lines) if context_lines else None
|
|
332
|
+
)
|
|
333
|
+
results.append(result)
|
|
334
|
+
|
|
335
|
+
if len(results) >= max_results:
|
|
336
|
+
break
|
|
337
|
+
|
|
338
|
+
except ValueError:
|
|
339
|
+
context_lines.append(line)
|
|
340
|
+
else:
|
|
341
|
+
context_lines.append(line)
|
|
342
|
+
|
|
343
|
+
await tool_ctx.info(f"AST search found {len(results)} results")
|
|
344
|
+
return results
|
|
345
|
+
|
|
346
|
+
except Exception as e:
|
|
347
|
+
await tool_ctx.error(f"AST search failed: {str(e)}")
|
|
348
|
+
return []
|
|
349
|
+
|
|
350
|
+
async def _run_symbol_search(self, pattern: str, path: str, tool_ctx,
|
|
351
|
+
max_results: int) -> List[SearchResult]:
|
|
352
|
+
"""Run symbol search using AST analysis."""
|
|
353
|
+
await tool_ctx.info(f"Running symbol search for: {pattern}")
|
|
354
|
+
|
|
355
|
+
try:
|
|
356
|
+
results = []
|
|
357
|
+
path_obj = Path(path)
|
|
358
|
+
|
|
359
|
+
# Find files to analyze
|
|
360
|
+
files_to_check = []
|
|
361
|
+
if path_obj.is_file():
|
|
362
|
+
files_to_check.append(str(path_obj))
|
|
363
|
+
elif path_obj.is_dir():
|
|
364
|
+
# Look for source files
|
|
365
|
+
for ext in ['.py', '.js', '.ts', '.java', '.cpp', '.c']:
|
|
366
|
+
files_to_check.extend(path_obj.rglob(f'*{ext}'))
|
|
367
|
+
files_to_check = [str(f) for f in files_to_check[:50]] # Limit for performance
|
|
368
|
+
|
|
369
|
+
# Analyze files for symbols
|
|
370
|
+
for file_path in files_to_check:
|
|
371
|
+
if not self.is_path_allowed(file_path):
|
|
372
|
+
continue
|
|
373
|
+
|
|
374
|
+
# Check cache first
|
|
375
|
+
if file_path in self._symbol_cache:
|
|
376
|
+
symbols = self._symbol_cache[file_path]
|
|
377
|
+
else:
|
|
378
|
+
# Analyze file
|
|
379
|
+
file_ast = self.ast_analyzer.analyze_file(file_path)
|
|
380
|
+
symbols = file_ast.symbols if file_ast else []
|
|
381
|
+
self._symbol_cache[file_path] = symbols
|
|
382
|
+
|
|
383
|
+
# Search symbols
|
|
384
|
+
for symbol in symbols:
|
|
385
|
+
if re.search(pattern, symbol.name, re.IGNORECASE):
|
|
386
|
+
result = SearchResult(
|
|
387
|
+
file_path=symbol.file_path,
|
|
388
|
+
line_number=symbol.line_start,
|
|
389
|
+
content=f"{symbol.type} {symbol.name}" + (f" - {symbol.docstring[:100]}..." if symbol.docstring else ""),
|
|
390
|
+
search_type=SearchType.SYMBOL,
|
|
391
|
+
score=0.95, # Very high score for symbol matches
|
|
392
|
+
symbol_info=symbol,
|
|
393
|
+
context=symbol.signature
|
|
394
|
+
)
|
|
395
|
+
results.append(result)
|
|
396
|
+
|
|
397
|
+
if len(results) >= max_results:
|
|
398
|
+
break
|
|
399
|
+
|
|
400
|
+
if len(results) >= max_results:
|
|
401
|
+
break
|
|
402
|
+
|
|
403
|
+
await tool_ctx.info(f"Symbol search found {len(results)} results")
|
|
404
|
+
return results
|
|
405
|
+
|
|
406
|
+
except Exception as e:
|
|
407
|
+
await tool_ctx.error(f"Symbol search failed: {str(e)}")
|
|
408
|
+
return []
|
|
409
|
+
|
|
410
|
+
async def _add_function_context(self, results: List[SearchResult], tool_ctx) -> List[SearchResult]:
|
|
411
|
+
"""Add function/method context to results where relevant."""
|
|
412
|
+
enhanced_results = []
|
|
413
|
+
|
|
414
|
+
for result in results:
|
|
415
|
+
enhanced_result = result
|
|
416
|
+
|
|
417
|
+
if result.line_number and not result.context:
|
|
418
|
+
try:
|
|
419
|
+
# Read the file and find surrounding function
|
|
420
|
+
file_path = Path(result.file_path)
|
|
421
|
+
if file_path.exists() and self.is_path_allowed(str(file_path)):
|
|
422
|
+
|
|
423
|
+
# Check if we have AST analysis cached
|
|
424
|
+
if str(file_path) not in self._ast_cache:
|
|
425
|
+
file_ast = self.ast_analyzer.analyze_file(str(file_path))
|
|
426
|
+
self._ast_cache[str(file_path)] = file_ast
|
|
427
|
+
else:
|
|
428
|
+
file_ast = self._ast_cache[str(file_path)]
|
|
429
|
+
|
|
430
|
+
if file_ast:
|
|
431
|
+
# Find symbol containing this line
|
|
432
|
+
for symbol in file_ast.symbols:
|
|
433
|
+
if (symbol.line_start <= result.line_number <= symbol.line_end and
|
|
434
|
+
symbol.type in ['function', 'method']):
|
|
435
|
+
enhanced_result = SearchResult(
|
|
436
|
+
file_path=result.file_path,
|
|
437
|
+
line_number=result.line_number,
|
|
438
|
+
content=result.content,
|
|
439
|
+
search_type=result.search_type,
|
|
440
|
+
score=result.score,
|
|
441
|
+
context=f"In {symbol.type} {symbol.name}(): {symbol.signature or ''}",
|
|
442
|
+
symbol_info=symbol,
|
|
443
|
+
project=result.project
|
|
444
|
+
)
|
|
445
|
+
break
|
|
446
|
+
except Exception as e:
|
|
447
|
+
await tool_ctx.warning(f"Could not add context for {result.file_path}: {str(e)}")
|
|
448
|
+
|
|
449
|
+
enhanced_results.append(enhanced_result)
|
|
450
|
+
|
|
451
|
+
return enhanced_results
|
|
452
|
+
|
|
453
|
+
def _combine_and_rank_results(self, results_by_type: Dict[SearchType, List[SearchResult]]) -> List[SearchResult]:
|
|
454
|
+
"""Combine results from different search types and rank by relevance."""
|
|
455
|
+
all_results = []
|
|
456
|
+
seen_combinations = set()
|
|
457
|
+
|
|
458
|
+
# Combine all results, avoiding duplicates
|
|
459
|
+
for search_type, results in results_by_type.items():
|
|
460
|
+
for result in results:
|
|
461
|
+
# Create a key to identify duplicates
|
|
462
|
+
key = (result.file_path, result.line_number)
|
|
463
|
+
|
|
464
|
+
if key not in seen_combinations:
|
|
465
|
+
seen_combinations.add(key)
|
|
466
|
+
all_results.append(result)
|
|
467
|
+
else:
|
|
468
|
+
# Merge with existing result based on score and type priority
|
|
469
|
+
type_priority = {
|
|
470
|
+
SearchType.SYMBOL: 4,
|
|
471
|
+
SearchType.GREP: 3,
|
|
472
|
+
SearchType.AST: 2,
|
|
473
|
+
SearchType.VECTOR: 1
|
|
474
|
+
}
|
|
475
|
+
|
|
476
|
+
for existing in all_results:
|
|
477
|
+
existing_key = (existing.file_path, existing.line_number)
|
|
478
|
+
if existing_key == key:
|
|
479
|
+
# Update if the new result has higher priority or better score
|
|
480
|
+
result_priority = type_priority[result.search_type]
|
|
481
|
+
existing_priority = type_priority[existing.search_type]
|
|
482
|
+
|
|
483
|
+
# Replace existing if: higher priority type, or same priority but higher score
|
|
484
|
+
if (result_priority > existing_priority or
|
|
485
|
+
(result_priority == existing_priority and result.score > existing.score)):
|
|
486
|
+
# Replace the entire result to preserve type
|
|
487
|
+
idx = all_results.index(existing)
|
|
488
|
+
all_results[idx] = result
|
|
489
|
+
else:
|
|
490
|
+
# Still merge useful information
|
|
491
|
+
existing.context = existing.context or result.context
|
|
492
|
+
existing.symbol_info = existing.symbol_info or result.symbol_info
|
|
493
|
+
break
|
|
494
|
+
|
|
495
|
+
# Sort by score (descending) then by search type priority
|
|
496
|
+
type_priority = {
|
|
497
|
+
SearchType.SYMBOL: 4,
|
|
498
|
+
SearchType.GREP: 3,
|
|
499
|
+
SearchType.AST: 2,
|
|
500
|
+
SearchType.VECTOR: 1
|
|
501
|
+
}
|
|
502
|
+
|
|
503
|
+
all_results.sort(key=lambda r: (r.score, type_priority[r.search_type]), reverse=True)
|
|
504
|
+
|
|
505
|
+
return all_results
|
|
506
|
+
|
|
507
|
+
@override
|
|
508
|
+
async def call(self, ctx: MCPContext, **params: Unpack[UnifiedSearchParams]) -> str:
|
|
509
|
+
"""Execute unified search with all enabled search types."""
|
|
510
|
+
import time
|
|
511
|
+
start_time = time.time()
|
|
512
|
+
|
|
513
|
+
tool_ctx = self.create_tool_context(ctx)
|
|
514
|
+
|
|
515
|
+
# Extract parameters
|
|
516
|
+
pattern = params["pattern"]
|
|
517
|
+
path = params.get("path", ".")
|
|
518
|
+
include = params.get("include", "*")
|
|
519
|
+
max_results = params.get("max_results", 20)
|
|
520
|
+
enable_vector = params.get("enable_vector", True)
|
|
521
|
+
enable_ast = params.get("enable_ast", True)
|
|
522
|
+
enable_symbol = params.get("enable_symbol", True)
|
|
523
|
+
include_context = params.get("include_context", True)
|
|
524
|
+
|
|
525
|
+
# Validate path
|
|
526
|
+
path_validation = self.validate_path(path)
|
|
527
|
+
if path_validation.is_error:
|
|
528
|
+
await tool_ctx.error(path_validation.error_message)
|
|
529
|
+
return f"Error: {path_validation.error_message}"
|
|
530
|
+
|
|
531
|
+
# Check path permissions and existence
|
|
532
|
+
allowed, error_msg = await self.check_path_allowed(path, tool_ctx)
|
|
533
|
+
if not allowed:
|
|
534
|
+
return error_msg
|
|
535
|
+
|
|
536
|
+
exists, error_msg = await self.check_path_exists(path, tool_ctx)
|
|
537
|
+
if not exists:
|
|
538
|
+
return error_msg
|
|
539
|
+
|
|
540
|
+
# Analyze search intent to optimize which searches to run
|
|
541
|
+
should_vector, should_ast, should_symbol = self._detect_search_intent(pattern)
|
|
542
|
+
enable_vector = enable_vector and should_vector
|
|
543
|
+
enable_ast = enable_ast and should_ast
|
|
544
|
+
enable_symbol = enable_symbol and should_symbol
|
|
545
|
+
|
|
546
|
+
await tool_ctx.info(f"Starting unified search for '{pattern}' in {path}")
|
|
547
|
+
await tool_ctx.info(f"Enabled searches: grep=True vector={enable_vector} ast={enable_ast} symbol={enable_symbol}")
|
|
548
|
+
|
|
549
|
+
# Run searches in parallel for maximum efficiency
|
|
550
|
+
search_tasks = []
|
|
551
|
+
|
|
552
|
+
# Always run grep first (fastest, most reliable)
|
|
553
|
+
search_tasks.append(
|
|
554
|
+
self._run_grep_search(pattern, path, include, tool_ctx, max_results)
|
|
555
|
+
)
|
|
556
|
+
|
|
557
|
+
if enable_vector and self.vector_tool:
|
|
558
|
+
search_tasks.append(
|
|
559
|
+
self._run_vector_search(pattern, path, tool_ctx, max_results)
|
|
560
|
+
)
|
|
561
|
+
|
|
562
|
+
if enable_ast:
|
|
563
|
+
search_tasks.append(
|
|
564
|
+
self._run_ast_search(pattern, path, include, tool_ctx, max_results)
|
|
565
|
+
)
|
|
566
|
+
|
|
567
|
+
if enable_symbol:
|
|
568
|
+
search_tasks.append(
|
|
569
|
+
self._run_symbol_search(pattern, path, tool_ctx, max_results)
|
|
570
|
+
)
|
|
571
|
+
|
|
572
|
+
# Execute all searches in parallel
|
|
573
|
+
search_results = await asyncio.gather(*search_tasks, return_exceptions=True)
|
|
574
|
+
|
|
575
|
+
# Organize results by type
|
|
576
|
+
results_by_type = {}
|
|
577
|
+
search_types = [SearchType.GREP]
|
|
578
|
+
if enable_vector and self.vector_tool:
|
|
579
|
+
search_types.append(SearchType.VECTOR)
|
|
580
|
+
if enable_ast:
|
|
581
|
+
search_types.append(SearchType.AST)
|
|
582
|
+
if enable_symbol:
|
|
583
|
+
search_types.append(SearchType.SYMBOL)
|
|
584
|
+
|
|
585
|
+
for i, result in enumerate(search_results):
|
|
586
|
+
if isinstance(result, Exception):
|
|
587
|
+
await tool_ctx.error(f"Search failed: {str(result)}")
|
|
588
|
+
continue
|
|
589
|
+
|
|
590
|
+
search_type = search_types[i]
|
|
591
|
+
results_by_type[search_type] = result
|
|
592
|
+
|
|
593
|
+
# Add function context if requested
|
|
594
|
+
if include_context:
|
|
595
|
+
for search_type, results in results_by_type.items():
|
|
596
|
+
if results:
|
|
597
|
+
results_by_type[search_type] = await self._add_function_context(results, tool_ctx)
|
|
598
|
+
|
|
599
|
+
# Combine and rank all results
|
|
600
|
+
combined_results = self._combine_and_rank_results(results_by_type)
|
|
601
|
+
|
|
602
|
+
end_time = time.time()
|
|
603
|
+
search_time_ms = (end_time - start_time) * 1000
|
|
604
|
+
|
|
605
|
+
# Create unified results object
|
|
606
|
+
unified_results = UnifiedSearchResults(
|
|
607
|
+
query=pattern,
|
|
608
|
+
total_results=len(combined_results),
|
|
609
|
+
results_by_type=results_by_type,
|
|
610
|
+
combined_results=combined_results[:max_results * 2], # Allow some extra for variety
|
|
611
|
+
search_time_ms=search_time_ms
|
|
612
|
+
)
|
|
613
|
+
|
|
614
|
+
# Format output
|
|
615
|
+
return self._format_unified_results(unified_results)
|
|
616
|
+
|
|
617
|
+
def _format_unified_results(self, results: UnifiedSearchResults) -> str:
|
|
618
|
+
"""Format unified search results for display."""
|
|
619
|
+
if results.total_results == 0:
|
|
620
|
+
return f"No results found for query: '{results.query}'"
|
|
621
|
+
|
|
622
|
+
lines = [
|
|
623
|
+
f"Unified Search Results for '{results.query}' ({results.search_time_ms:.1f}ms)",
|
|
624
|
+
f"Found {results.total_results} total results across {len(results.results_by_type)} search types",
|
|
625
|
+
""
|
|
626
|
+
]
|
|
627
|
+
|
|
628
|
+
# Show summary by type
|
|
629
|
+
for search_type, type_results in results.results_by_type.items():
|
|
630
|
+
if type_results:
|
|
631
|
+
lines.append(f"• {search_type.value.title()}: {len(type_results)} results")
|
|
632
|
+
lines.append("")
|
|
633
|
+
|
|
634
|
+
# Show top combined results
|
|
635
|
+
lines.append("=== Top Results (Combined & Ranked) ===")
|
|
636
|
+
for i, result in enumerate(results.combined_results[:20], 1):
|
|
637
|
+
score_display = f"{result.score:.2f}" if result.score < 1.0 else "1.00"
|
|
638
|
+
|
|
639
|
+
header = f"Result {i} [{result.search_type.value}] (Score: {score_display})"
|
|
640
|
+
if result.line_number:
|
|
641
|
+
header += f" - {result.file_path}:{result.line_number}"
|
|
642
|
+
else:
|
|
643
|
+
header += f" - {result.file_path}"
|
|
644
|
+
|
|
645
|
+
lines.append(header)
|
|
646
|
+
lines.append("-" * len(header))
|
|
647
|
+
|
|
648
|
+
if result.context:
|
|
649
|
+
lines.append(f"Context: {result.context}")
|
|
650
|
+
|
|
651
|
+
lines.append(f"Content: {result.content}")
|
|
652
|
+
|
|
653
|
+
if result.symbol_info:
|
|
654
|
+
lines.append(f"Symbol: {result.symbol_info.type} {result.symbol_info.name}")
|
|
655
|
+
if result.symbol_info.signature:
|
|
656
|
+
lines.append(f"Signature: {result.symbol_info.signature}")
|
|
657
|
+
|
|
658
|
+
lines.append("")
|
|
659
|
+
|
|
660
|
+
return "\n".join(lines)
|
|
661
|
+
|
|
662
|
+
@override
|
|
663
|
+
def register(self, mcp_server: FastMCP) -> None:
|
|
664
|
+
"""Register the unified search tool with the MCP server."""
|
|
665
|
+
tool_self = self
|
|
666
|
+
|
|
667
|
+
@mcp_server.tool(name=self.name, description=self.description)
|
|
668
|
+
async def unified_search(
|
|
669
|
+
ctx: MCPContext,
|
|
670
|
+
pattern: Pattern,
|
|
671
|
+
path: SearchPath = ".",
|
|
672
|
+
include: Include = "*",
|
|
673
|
+
max_results: MaxResults = 20,
|
|
674
|
+
enable_vector: EnableVector = True,
|
|
675
|
+
enable_ast: EnableAST = True,
|
|
676
|
+
enable_symbol: EnableSymbol = True,
|
|
677
|
+
include_context: IncludeContext = True,
|
|
678
|
+
) -> str:
|
|
679
|
+
return await tool_self.call(
|
|
680
|
+
ctx,
|
|
681
|
+
pattern=pattern,
|
|
682
|
+
path=path,
|
|
683
|
+
include=include,
|
|
684
|
+
max_results=max_results,
|
|
685
|
+
enable_vector=enable_vector,
|
|
686
|
+
enable_ast=enable_ast,
|
|
687
|
+
enable_symbol=enable_symbol,
|
|
688
|
+
include_context=include_context,
|
|
689
|
+
)
|