hanzo-mcp 0.9.0__py3-none-any.whl → 0.9.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hanzo-mcp might be problematic. Click here for more details.
- hanzo_mcp/__init__.py +1 -1
- hanzo_mcp/analytics/posthog_analytics.py +14 -1
- hanzo_mcp/cli.py +108 -4
- hanzo_mcp/server.py +11 -0
- hanzo_mcp/tools/__init__.py +3 -16
- hanzo_mcp/tools/agent/__init__.py +5 -0
- hanzo_mcp/tools/agent/agent.py +5 -0
- hanzo_mcp/tools/agent/agent_tool.py +3 -17
- hanzo_mcp/tools/agent/agent_tool_v1_deprecated.py +623 -0
- hanzo_mcp/tools/agent/clarification_tool.py +7 -1
- hanzo_mcp/tools/agent/claude_desktop_auth.py +16 -6
- hanzo_mcp/tools/agent/cli_agent_base.py +5 -0
- hanzo_mcp/tools/agent/cli_tools.py +26 -0
- hanzo_mcp/tools/agent/code_auth_tool.py +5 -0
- hanzo_mcp/tools/agent/critic_tool.py +7 -1
- hanzo_mcp/tools/agent/iching_tool.py +5 -0
- hanzo_mcp/tools/agent/network_tool.py +5 -0
- hanzo_mcp/tools/agent/review_tool.py +7 -1
- hanzo_mcp/tools/agent/swarm_alias.py +5 -0
- hanzo_mcp/tools/agent/swarm_tool.py +701 -0
- hanzo_mcp/tools/agent/swarm_tool_v1_deprecated.py +554 -0
- hanzo_mcp/tools/agent/unified_cli_tools.py +5 -0
- hanzo_mcp/tools/common/auto_timeout.py +254 -0
- hanzo_mcp/tools/common/base.py +4 -0
- hanzo_mcp/tools/common/batch_tool.py +5 -0
- hanzo_mcp/tools/common/config_tool.py +5 -0
- hanzo_mcp/tools/common/critic_tool.py +5 -0
- hanzo_mcp/tools/common/paginated_base.py +4 -0
- hanzo_mcp/tools/common/permissions.py +38 -12
- hanzo_mcp/tools/common/personality.py +673 -980
- hanzo_mcp/tools/common/stats.py +5 -0
- hanzo_mcp/tools/common/thinking_tool.py +5 -0
- hanzo_mcp/tools/common/timeout_parser.py +103 -0
- hanzo_mcp/tools/common/tool_disable.py +5 -0
- hanzo_mcp/tools/common/tool_enable.py +5 -0
- hanzo_mcp/tools/common/tool_list.py +5 -0
- hanzo_mcp/tools/config/config_tool.py +5 -0
- hanzo_mcp/tools/config/mode_tool.py +5 -0
- hanzo_mcp/tools/database/graph.py +5 -0
- hanzo_mcp/tools/database/graph_add.py +5 -0
- hanzo_mcp/tools/database/graph_query.py +5 -0
- hanzo_mcp/tools/database/graph_remove.py +5 -0
- hanzo_mcp/tools/database/graph_search.py +5 -0
- hanzo_mcp/tools/database/graph_stats.py +5 -0
- hanzo_mcp/tools/database/sql.py +5 -0
- hanzo_mcp/tools/database/sql_query.py +2 -0
- hanzo_mcp/tools/database/sql_search.py +5 -0
- hanzo_mcp/tools/database/sql_stats.py +5 -0
- hanzo_mcp/tools/editor/neovim_command.py +5 -0
- hanzo_mcp/tools/editor/neovim_edit.py +7 -2
- hanzo_mcp/tools/editor/neovim_session.py +5 -0
- hanzo_mcp/tools/filesystem/__init__.py +23 -26
- hanzo_mcp/tools/filesystem/ast_tool.py +3 -4
- hanzo_mcp/tools/filesystem/base.py +2 -18
- hanzo_mcp/tools/filesystem/batch_search.py +825 -0
- hanzo_mcp/tools/filesystem/content_replace.py +5 -3
- hanzo_mcp/tools/filesystem/diff.py +5 -0
- hanzo_mcp/tools/filesystem/directory_tree.py +34 -281
- hanzo_mcp/tools/filesystem/directory_tree_paginated.py +345 -0
- hanzo_mcp/tools/filesystem/edit.py +6 -5
- hanzo_mcp/tools/filesystem/find.py +177 -311
- hanzo_mcp/tools/filesystem/find_files.py +370 -0
- hanzo_mcp/tools/filesystem/git_search.py +5 -3
- hanzo_mcp/tools/filesystem/grep.py +454 -0
- hanzo_mcp/tools/filesystem/multi_edit.py +6 -5
- hanzo_mcp/tools/filesystem/read.py +10 -9
- hanzo_mcp/tools/filesystem/rules_tool.py +6 -4
- hanzo_mcp/tools/filesystem/search_tool.py +728 -0
- hanzo_mcp/tools/filesystem/symbols_tool.py +510 -0
- hanzo_mcp/tools/filesystem/tree.py +273 -0
- hanzo_mcp/tools/filesystem/watch.py +6 -1
- hanzo_mcp/tools/filesystem/write.py +13 -7
- hanzo_mcp/tools/jupyter/jupyter.py +30 -2
- hanzo_mcp/tools/jupyter/notebook_edit.py +298 -0
- hanzo_mcp/tools/jupyter/notebook_read.py +148 -0
- hanzo_mcp/tools/llm/consensus_tool.py +8 -6
- hanzo_mcp/tools/llm/llm_manage.py +5 -0
- hanzo_mcp/tools/llm/llm_tool.py +2 -0
- hanzo_mcp/tools/llm/llm_unified.py +5 -0
- hanzo_mcp/tools/llm/provider_tools.py +5 -0
- hanzo_mcp/tools/lsp/lsp_tool.py +475 -622
- hanzo_mcp/tools/mcp/mcp_add.py +7 -2
- hanzo_mcp/tools/mcp/mcp_remove.py +15 -2
- hanzo_mcp/tools/mcp/mcp_stats.py +5 -0
- hanzo_mcp/tools/mcp/mcp_tool.py +5 -0
- hanzo_mcp/tools/memory/knowledge_tools.py +14 -0
- hanzo_mcp/tools/memory/memory_tools.py +17 -0
- hanzo_mcp/tools/search/find_tool.py +5 -3
- hanzo_mcp/tools/search/unified_search.py +3 -1
- hanzo_mcp/tools/shell/__init__.py +2 -14
- hanzo_mcp/tools/shell/base_process.py +4 -2
- hanzo_mcp/tools/shell/bash_tool.py +2 -0
- hanzo_mcp/tools/shell/command_executor.py +7 -7
- hanzo_mcp/tools/shell/logs.py +5 -0
- hanzo_mcp/tools/shell/npx.py +5 -0
- hanzo_mcp/tools/shell/npx_background.py +5 -0
- hanzo_mcp/tools/shell/npx_tool.py +5 -0
- hanzo_mcp/tools/shell/open.py +5 -0
- hanzo_mcp/tools/shell/pkill.py +5 -0
- hanzo_mcp/tools/shell/process_tool.py +5 -0
- hanzo_mcp/tools/shell/processes.py +5 -0
- hanzo_mcp/tools/shell/run_background.py +5 -0
- hanzo_mcp/tools/shell/run_command.py +2 -0
- hanzo_mcp/tools/shell/run_command_windows.py +5 -0
- hanzo_mcp/tools/shell/streaming_command.py +5 -0
- hanzo_mcp/tools/shell/uvx.py +5 -0
- hanzo_mcp/tools/shell/uvx_background.py +5 -0
- hanzo_mcp/tools/shell/uvx_tool.py +5 -0
- hanzo_mcp/tools/shell/zsh_tool.py +3 -0
- hanzo_mcp/tools/todo/todo.py +5 -0
- hanzo_mcp/tools/todo/todo_read.py +142 -0
- hanzo_mcp/tools/todo/todo_write.py +367 -0
- hanzo_mcp/tools/vector/__init__.py +42 -95
- hanzo_mcp/tools/vector/index_tool.py +5 -0
- hanzo_mcp/tools/vector/vector.py +5 -0
- hanzo_mcp/tools/vector/vector_index.py +5 -0
- hanzo_mcp/tools/vector/vector_search.py +5 -0
- {hanzo_mcp-0.9.0.dist-info → hanzo_mcp-0.9.2.dist-info}/METADATA +1 -1
- hanzo_mcp-0.9.2.dist-info/RECORD +195 -0
- hanzo_mcp/tools/common/path_utils.py +0 -34
- hanzo_mcp/tools/compiler/__init__.py +0 -8
- hanzo_mcp/tools/compiler/sandboxed_compiler.py +0 -681
- hanzo_mcp/tools/environment/__init__.py +0 -8
- hanzo_mcp/tools/environment/environment_detector.py +0 -594
- hanzo_mcp/tools/filesystem/search.py +0 -1160
- hanzo_mcp/tools/framework/__init__.py +0 -8
- hanzo_mcp/tools/framework/framework_modes.py +0 -714
- hanzo_mcp/tools/memory/conversation_memory.py +0 -636
- hanzo_mcp/tools/shell/run_tool.py +0 -56
- hanzo_mcp/tools/vector/node_tool.py +0 -538
- hanzo_mcp/tools/vector/unified_vector.py +0 -384
- hanzo_mcp-0.9.0.dist-info/RECORD +0 -191
- {hanzo_mcp-0.9.0.dist-info → hanzo_mcp-0.9.2.dist-info}/WHEEL +0 -0
- {hanzo_mcp-0.9.0.dist-info → hanzo_mcp-0.9.2.dist-info}/entry_points.txt +0 -0
- {hanzo_mcp-0.9.0.dist-info → hanzo_mcp-0.9.2.dist-info}/top_level.txt +0 -0
|
@@ -1,1160 +0,0 @@
|
|
|
1
|
-
"""Unified search tool implementation.
|
|
2
|
-
|
|
3
|
-
This module provides the unified search tool that combines multiple search strategies:
|
|
4
|
-
- Pattern search (regex/text) using ripgrep or fallback
|
|
5
|
-
- AST-aware code search with structural context
|
|
6
|
-
- Semantic similarity search using vector embeddings
|
|
7
|
-
- Git history search through commits and content
|
|
8
|
-
- Symbol search for function/class definitions
|
|
9
|
-
|
|
10
|
-
The tool can run single queries or batch multiple queries in parallel for comprehensive
|
|
11
|
-
code analysis and refactoring tasks.
|
|
12
|
-
"""
|
|
13
|
-
|
|
14
|
-
import re
|
|
15
|
-
import json
|
|
16
|
-
import shlex
|
|
17
|
-
import shutil
|
|
18
|
-
import asyncio
|
|
19
|
-
import fnmatch
|
|
20
|
-
from enum import Enum
|
|
21
|
-
from typing import (
|
|
22
|
-
Dict,
|
|
23
|
-
List,
|
|
24
|
-
Tuple,
|
|
25
|
-
Union,
|
|
26
|
-
Unpack,
|
|
27
|
-
Optional,
|
|
28
|
-
Annotated,
|
|
29
|
-
TypedDict,
|
|
30
|
-
final,
|
|
31
|
-
override,
|
|
32
|
-
Literal,
|
|
33
|
-
)
|
|
34
|
-
from pathlib import Path
|
|
35
|
-
from dataclasses import dataclass
|
|
36
|
-
|
|
37
|
-
from pydantic import Field
|
|
38
|
-
from mcp.server import FastMCP
|
|
39
|
-
from mcp.server.fastmcp import Context as MCPContext
|
|
40
|
-
|
|
41
|
-
from hanzo_mcp.tools.common.context import ToolContext
|
|
42
|
-
from hanzo_mcp.tools.common.truncate import truncate_response
|
|
43
|
-
from hanzo_mcp.tools.filesystem.base import FilesystemBaseTool
|
|
44
|
-
|
|
45
|
-
# For optional dependencies
|
|
46
|
-
try:
|
|
47
|
-
from hanzo_mcp.tools.vector.vector_search import VectorSearchTool
|
|
48
|
-
from hanzo_mcp.tools.vector.project_manager import ProjectVectorManager
|
|
49
|
-
VECTOR_SEARCH_AVAILABLE = True
|
|
50
|
-
except ImportError:
|
|
51
|
-
VectorSearchTool = None
|
|
52
|
-
ProjectVectorManager = None
|
|
53
|
-
VECTOR_SEARCH_AVAILABLE = False
|
|
54
|
-
|
|
55
|
-
try:
|
|
56
|
-
from hanzo_mcp.tools.filesystem.git_search import GitSearchTool
|
|
57
|
-
GIT_SEARCH_AVAILABLE = True
|
|
58
|
-
except ImportError:
|
|
59
|
-
GitSearchTool = None
|
|
60
|
-
GIT_SEARCH_AVAILABLE = False
|
|
61
|
-
|
|
62
|
-
try:
|
|
63
|
-
from hanzo_mcp.tools.filesystem.ast_tool import ASTTool
|
|
64
|
-
AST_SEARCH_AVAILABLE = True
|
|
65
|
-
except ImportError:
|
|
66
|
-
ASTTool = None
|
|
67
|
-
AST_SEARCH_AVAILABLE = False
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
class SearchStrategy(Enum):
|
|
71
|
-
"""Search strategies available."""
|
|
72
|
-
PATTERN = "pattern"
|
|
73
|
-
AST = "ast"
|
|
74
|
-
SEMANTIC = "semantic"
|
|
75
|
-
GIT = "git"
|
|
76
|
-
ALL = "all"
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
class SearchType(Enum):
|
|
80
|
-
"""Types of searches that can be performed."""
|
|
81
|
-
GREP = "grep"
|
|
82
|
-
GREP_AST = "grep_ast"
|
|
83
|
-
VECTOR = "vector"
|
|
84
|
-
GIT = "git"
|
|
85
|
-
SYMBOL = "symbol"
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
@dataclass
|
|
89
|
-
class SearchResult:
|
|
90
|
-
"""Search result from any search type."""
|
|
91
|
-
file_path: str
|
|
92
|
-
line_number: Optional[int]
|
|
93
|
-
content: str
|
|
94
|
-
search_type: SearchType
|
|
95
|
-
score: float # Relevance score (0-1)
|
|
96
|
-
context: Optional[str] = None # Function/class context
|
|
97
|
-
match_count: int = 1 # Number of matches in this location
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
# Type annotations for parameters
|
|
101
|
-
Query = Annotated[
|
|
102
|
-
str,
|
|
103
|
-
Field(
|
|
104
|
-
description="The search pattern (supports regex for pattern search, natural language for semantic search)",
|
|
105
|
-
min_length=1,
|
|
106
|
-
),
|
|
107
|
-
]
|
|
108
|
-
|
|
109
|
-
SearchPath = Annotated[
|
|
110
|
-
str,
|
|
111
|
-
Field(
|
|
112
|
-
description="The directory to search in. Defaults to current directory.",
|
|
113
|
-
default=".",
|
|
114
|
-
),
|
|
115
|
-
]
|
|
116
|
-
|
|
117
|
-
Strategy = Annotated[
|
|
118
|
-
Literal["pattern", "ast", "semantic", "git", "all"],
|
|
119
|
-
Field(
|
|
120
|
-
description="Search strategy: pattern (regex/text), ast (code structure), semantic (vector), git (history), all (combined)",
|
|
121
|
-
default="pattern",
|
|
122
|
-
),
|
|
123
|
-
]
|
|
124
|
-
|
|
125
|
-
Batch = Annotated[
|
|
126
|
-
Optional[List[str]],
|
|
127
|
-
Field(
|
|
128
|
-
description="List of additional queries to search in parallel",
|
|
129
|
-
default=None,
|
|
130
|
-
),
|
|
131
|
-
]
|
|
132
|
-
|
|
133
|
-
Include = Annotated[
|
|
134
|
-
str,
|
|
135
|
-
Field(
|
|
136
|
-
description='File pattern to include (e.g. "*.js", "*.{ts,tsx}")',
|
|
137
|
-
default="*",
|
|
138
|
-
),
|
|
139
|
-
]
|
|
140
|
-
|
|
141
|
-
ContextLines = Annotated[
|
|
142
|
-
int,
|
|
143
|
-
Field(
|
|
144
|
-
description="Number of context lines around matches",
|
|
145
|
-
default=2,
|
|
146
|
-
ge=0,
|
|
147
|
-
le=10,
|
|
148
|
-
),
|
|
149
|
-
]
|
|
150
|
-
|
|
151
|
-
Parallel = Annotated[
|
|
152
|
-
bool,
|
|
153
|
-
Field(
|
|
154
|
-
description="Run searches in parallel for faster results",
|
|
155
|
-
default=False,
|
|
156
|
-
),
|
|
157
|
-
]
|
|
158
|
-
|
|
159
|
-
MaxResults = Annotated[
|
|
160
|
-
Optional[int],
|
|
161
|
-
Field(
|
|
162
|
-
description="Maximum number of results to return",
|
|
163
|
-
default=None,
|
|
164
|
-
gt=0,
|
|
165
|
-
),
|
|
166
|
-
]
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
class SearchParams(TypedDict):
|
|
170
|
-
"""Parameters for the unified search tool."""
|
|
171
|
-
query: Query
|
|
172
|
-
path: SearchPath
|
|
173
|
-
strategy: Strategy
|
|
174
|
-
batch: Batch
|
|
175
|
-
include: Include
|
|
176
|
-
context_lines: ContextLines
|
|
177
|
-
parallel: Parallel
|
|
178
|
-
max_results: MaxResults
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
# Legacy grep parameters for backward compatibility
|
|
182
|
-
Pattern = Annotated[
|
|
183
|
-
str,
|
|
184
|
-
Field(
|
|
185
|
-
description="The regular expression pattern to search for in file contents",
|
|
186
|
-
min_length=1,
|
|
187
|
-
),
|
|
188
|
-
]
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
class GrepToolParams(TypedDict):
|
|
192
|
-
"""Legacy parameters for grep tool compatibility."""
|
|
193
|
-
pattern: Pattern
|
|
194
|
-
path: SearchPath
|
|
195
|
-
include: Include
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
@final
|
|
199
|
-
class UnifiedSearchTool(FilesystemBaseTool):
|
|
200
|
-
"""Unified search tool that combines multiple search strategies."""
|
|
201
|
-
|
|
202
|
-
def __init__(
|
|
203
|
-
self,
|
|
204
|
-
permission_manager,
|
|
205
|
-
project_manager: Optional[ProjectVectorManager] = None,
|
|
206
|
-
):
|
|
207
|
-
"""Initialize the unified search tool.
|
|
208
|
-
|
|
209
|
-
Args:
|
|
210
|
-
permission_manager: Permission manager for access control
|
|
211
|
-
project_manager: Optional project manager for vector search
|
|
212
|
-
"""
|
|
213
|
-
super().__init__(permission_manager)
|
|
214
|
-
self.project_manager = project_manager
|
|
215
|
-
|
|
216
|
-
# Initialize component tools
|
|
217
|
-
self.grep_ast_tool = None
|
|
218
|
-
self.git_search_tool = None
|
|
219
|
-
self.vector_tool = None
|
|
220
|
-
|
|
221
|
-
if AST_SEARCH_AVAILABLE:
|
|
222
|
-
self.grep_ast_tool = ASTTool(permission_manager)
|
|
223
|
-
|
|
224
|
-
if GIT_SEARCH_AVAILABLE:
|
|
225
|
-
self.git_search_tool = GitSearchTool(permission_manager)
|
|
226
|
-
|
|
227
|
-
if VECTOR_SEARCH_AVAILABLE and project_manager:
|
|
228
|
-
self.vector_tool = VectorSearchTool(permission_manager, project_manager)
|
|
229
|
-
|
|
230
|
-
@property
|
|
231
|
-
@override
|
|
232
|
-
def name(self) -> str:
|
|
233
|
-
"""Get the tool name."""
|
|
234
|
-
return "search"
|
|
235
|
-
|
|
236
|
-
@property
|
|
237
|
-
@override
|
|
238
|
-
def description(self) -> str:
|
|
239
|
-
"""Get the tool description."""
|
|
240
|
-
return """Unified search tool that combines multiple search strategies.
|
|
241
|
-
|
|
242
|
-
Supports different search strategies:
|
|
243
|
-
- pattern: Fast regex/text search using ripgrep
|
|
244
|
-
- ast: AST-aware code structure search
|
|
245
|
-
- semantic: Vector-based semantic similarity search
|
|
246
|
-
- git: Search through git history and commits
|
|
247
|
-
- all: Run all available strategies and combine results
|
|
248
|
-
|
|
249
|
-
Can batch multiple queries for comprehensive analysis.
|
|
250
|
-
Results are combined, deduplicated, and ranked by relevance.
|
|
251
|
-
|
|
252
|
-
Examples:
|
|
253
|
-
- search(query="TODO", strategy="pattern") - Find TODO comments
|
|
254
|
-
- search(query="error handling", strategy="semantic") - Find error handling code
|
|
255
|
-
- search(query="processPayment", strategy="ast") - Find function definitions
|
|
256
|
-
- search(query="bug fix", strategy="git") - Search git history
|
|
257
|
-
- search(query="auth", batch=["authentication", "authorize"], strategy="all") - Multi-query search"""
|
|
258
|
-
|
|
259
|
-
def is_ripgrep_installed(self) -> bool:
|
|
260
|
-
"""Check if ripgrep (rg) is installed."""
|
|
261
|
-
return shutil.which("rg") is not None
|
|
262
|
-
|
|
263
|
-
async def run_ripgrep(
|
|
264
|
-
self,
|
|
265
|
-
pattern: str,
|
|
266
|
-
path: str,
|
|
267
|
-
tool_ctx: ToolContext,
|
|
268
|
-
include_pattern: str | None = None,
|
|
269
|
-
) -> str:
|
|
270
|
-
"""Run ripgrep with the given parameters and return the results."""
|
|
271
|
-
# Special case for tests: direct file path with include pattern that doesn't match
|
|
272
|
-
if Path(path).is_file() and include_pattern and include_pattern != "*":
|
|
273
|
-
if not fnmatch.fnmatch(Path(path).name, include_pattern):
|
|
274
|
-
await tool_ctx.info(f"File does not match pattern '{include_pattern}': {path}")
|
|
275
|
-
return f"File does not match pattern '{include_pattern}': {path}"
|
|
276
|
-
|
|
277
|
-
cmd = ["rg", "--json", pattern]
|
|
278
|
-
|
|
279
|
-
# Add path
|
|
280
|
-
cmd.append(path)
|
|
281
|
-
|
|
282
|
-
# Add include pattern if provided
|
|
283
|
-
if include_pattern and include_pattern != "*":
|
|
284
|
-
cmd.extend(["-g", include_pattern])
|
|
285
|
-
|
|
286
|
-
await tool_ctx.info(f"Running ripgrep command: {shlex.join(cmd)}")
|
|
287
|
-
|
|
288
|
-
try:
|
|
289
|
-
# Execute ripgrep process
|
|
290
|
-
process = await asyncio.create_subprocess_exec(
|
|
291
|
-
*cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
|
|
292
|
-
)
|
|
293
|
-
|
|
294
|
-
stdout, stderr = await process.communicate()
|
|
295
|
-
|
|
296
|
-
if process.returncode != 0 and process.returncode != 1:
|
|
297
|
-
# rg returns 1 when no matches are found, which is not an error
|
|
298
|
-
await tool_ctx.error(f"ripgrep failed with exit code {process.returncode}: {stderr.decode()}")
|
|
299
|
-
return f"Error executing ripgrep: {stderr.decode()}"
|
|
300
|
-
|
|
301
|
-
# Parse the JSON output
|
|
302
|
-
results = self.parse_ripgrep_json_output(stdout.decode())
|
|
303
|
-
return results
|
|
304
|
-
|
|
305
|
-
except Exception as e:
|
|
306
|
-
await tool_ctx.error(f"Error running ripgrep: {str(e)}")
|
|
307
|
-
return f"Error running ripgrep: {str(e)}"
|
|
308
|
-
|
|
309
|
-
def parse_ripgrep_json_output(self, output: str) -> str:
|
|
310
|
-
"""Parse ripgrep JSON output and format it for human readability."""
|
|
311
|
-
if not output.strip():
|
|
312
|
-
return "No matches found."
|
|
313
|
-
|
|
314
|
-
formatted_results = []
|
|
315
|
-
file_results = {}
|
|
316
|
-
|
|
317
|
-
for line in output.splitlines():
|
|
318
|
-
if not line.strip():
|
|
319
|
-
continue
|
|
320
|
-
|
|
321
|
-
try:
|
|
322
|
-
data = json.loads(line)
|
|
323
|
-
|
|
324
|
-
if data.get("type") == "match":
|
|
325
|
-
path = data.get("data", {}).get("path", {}).get("text", "")
|
|
326
|
-
line_number = data.get("data", {}).get("line_number", 0)
|
|
327
|
-
line_text = data.get("data", {}).get("lines", {}).get("text", "").rstrip()
|
|
328
|
-
|
|
329
|
-
if path not in file_results:
|
|
330
|
-
file_results[path] = []
|
|
331
|
-
|
|
332
|
-
file_results[path].append((line_number, line_text))
|
|
333
|
-
|
|
334
|
-
except json.JSONDecodeError as e:
|
|
335
|
-
formatted_results.append(f"Error parsing JSON: {str(e)}")
|
|
336
|
-
|
|
337
|
-
# Count total matches
|
|
338
|
-
total_matches = sum(len(matches) for matches in file_results.values())
|
|
339
|
-
total_files = len(file_results)
|
|
340
|
-
|
|
341
|
-
if total_matches == 0:
|
|
342
|
-
return "No matches found."
|
|
343
|
-
|
|
344
|
-
formatted_results.append(
|
|
345
|
-
f"Found {total_matches} matches in {total_files} file{'s' if total_files > 1 else ''}:"
|
|
346
|
-
)
|
|
347
|
-
formatted_results.append("") # Empty line for readability
|
|
348
|
-
|
|
349
|
-
# Format the results by file
|
|
350
|
-
for file_path, matches in file_results.items():
|
|
351
|
-
for line_number, line_text in matches:
|
|
352
|
-
formatted_results.append(f"{file_path}:{line_number}: {line_text}")
|
|
353
|
-
|
|
354
|
-
return "\n".join(formatted_results)
|
|
355
|
-
|
|
356
|
-
async def fallback_grep(
|
|
357
|
-
self,
|
|
358
|
-
pattern: str,
|
|
359
|
-
path: str,
|
|
360
|
-
tool_ctx: ToolContext,
|
|
361
|
-
include_pattern: str | None = None,
|
|
362
|
-
) -> str:
|
|
363
|
-
"""Fallback Python implementation when ripgrep is not available."""
|
|
364
|
-
await tool_ctx.info("Using fallback Python implementation for grep")
|
|
365
|
-
|
|
366
|
-
try:
|
|
367
|
-
input_path = Path(path)
|
|
368
|
-
|
|
369
|
-
# Find matching files
|
|
370
|
-
matching_files: list[Path] = []
|
|
371
|
-
|
|
372
|
-
# Process based on whether path is a file or directory
|
|
373
|
-
if input_path.is_file():
|
|
374
|
-
# Single file search - check file pattern match first
|
|
375
|
-
if (
|
|
376
|
-
include_pattern is None
|
|
377
|
-
or include_pattern == "*"
|
|
378
|
-
or fnmatch.fnmatch(input_path.name, include_pattern)
|
|
379
|
-
):
|
|
380
|
-
matching_files.append(input_path)
|
|
381
|
-
await tool_ctx.info(f"Searching single file: {path}")
|
|
382
|
-
else:
|
|
383
|
-
# File doesn't match the pattern, return immediately
|
|
384
|
-
await tool_ctx.info(f"File does not match pattern '{include_pattern}': {path}")
|
|
385
|
-
return f"File does not match pattern '{include_pattern}': {path}"
|
|
386
|
-
elif input_path.is_dir():
|
|
387
|
-
# Directory search - find all files
|
|
388
|
-
await tool_ctx.info(f"Finding files in directory: {path}")
|
|
389
|
-
|
|
390
|
-
# Keep track of allowed paths for filtering
|
|
391
|
-
allowed_paths: set[str] = set()
|
|
392
|
-
|
|
393
|
-
# Collect all allowed paths first for faster filtering
|
|
394
|
-
for entry in input_path.rglob("*"):
|
|
395
|
-
entry_path = str(entry)
|
|
396
|
-
if self.is_path_allowed(entry_path):
|
|
397
|
-
allowed_paths.add(entry_path)
|
|
398
|
-
|
|
399
|
-
# Find matching files efficiently
|
|
400
|
-
for entry in input_path.rglob("*"):
|
|
401
|
-
entry_path = str(entry)
|
|
402
|
-
if entry_path in allowed_paths and entry.is_file():
|
|
403
|
-
if (
|
|
404
|
-
include_pattern is None
|
|
405
|
-
or include_pattern == "*"
|
|
406
|
-
or fnmatch.fnmatch(entry.name, include_pattern)
|
|
407
|
-
):
|
|
408
|
-
matching_files.append(entry)
|
|
409
|
-
|
|
410
|
-
await tool_ctx.info(f"Found {len(matching_files)} matching files")
|
|
411
|
-
else:
|
|
412
|
-
# This shouldn't happen if path exists
|
|
413
|
-
await tool_ctx.error(f"Path is neither a file nor a directory: {path}")
|
|
414
|
-
return f"Error: Path is neither a file nor a directory: {path}"
|
|
415
|
-
|
|
416
|
-
# Report progress
|
|
417
|
-
total_files = len(matching_files)
|
|
418
|
-
if input_path.is_file():
|
|
419
|
-
await tool_ctx.info(f"Searching file: {path}")
|
|
420
|
-
else:
|
|
421
|
-
await tool_ctx.info(f"Searching through {total_files} files in directory")
|
|
422
|
-
|
|
423
|
-
# Set up for parallel processing
|
|
424
|
-
results: list[str] = []
|
|
425
|
-
files_processed = 0
|
|
426
|
-
matches_found = 0
|
|
427
|
-
batch_size = 20 # Process files in batches to avoid overwhelming the system
|
|
428
|
-
|
|
429
|
-
# Use a semaphore to limit concurrent file operations
|
|
430
|
-
semaphore = asyncio.Semaphore(10)
|
|
431
|
-
|
|
432
|
-
# Create an async function to search a single file
|
|
433
|
-
async def search_file(file_path: Path) -> list[str]:
|
|
434
|
-
nonlocal files_processed, matches_found
|
|
435
|
-
file_results: list[str] = []
|
|
436
|
-
|
|
437
|
-
try:
|
|
438
|
-
async with semaphore: # Limit concurrent operations
|
|
439
|
-
try:
|
|
440
|
-
with open(file_path, "r", encoding="utf-8") as f:
|
|
441
|
-
for line_num, line in enumerate(f, 1):
|
|
442
|
-
if re.search(pattern, line):
|
|
443
|
-
file_results.append(f"{file_path}:{line_num}: {line.rstrip()}")
|
|
444
|
-
matches_found += 1
|
|
445
|
-
files_processed += 1
|
|
446
|
-
except UnicodeDecodeError:
|
|
447
|
-
# Skip binary files
|
|
448
|
-
files_processed += 1
|
|
449
|
-
except Exception as e:
|
|
450
|
-
await tool_ctx.warning(f"Error reading {file_path}: {str(e)}")
|
|
451
|
-
except Exception as e:
|
|
452
|
-
await tool_ctx.warning(f"Error processing {file_path}: {str(e)}")
|
|
453
|
-
|
|
454
|
-
return file_results
|
|
455
|
-
|
|
456
|
-
# Process files in parallel batches
|
|
457
|
-
for i in range(0, len(matching_files), batch_size):
|
|
458
|
-
batch = matching_files[i : i + batch_size]
|
|
459
|
-
batch_tasks = [search_file(file_path) for file_path in batch]
|
|
460
|
-
|
|
461
|
-
# Report progress
|
|
462
|
-
await tool_ctx.report_progress(i, total_files)
|
|
463
|
-
|
|
464
|
-
# Wait for the batch to complete
|
|
465
|
-
batch_results = await asyncio.gather(*batch_tasks)
|
|
466
|
-
|
|
467
|
-
# Flatten and collect results
|
|
468
|
-
for file_result in batch_results:
|
|
469
|
-
results.extend(file_result)
|
|
470
|
-
|
|
471
|
-
# Final progress report
|
|
472
|
-
await tool_ctx.report_progress(total_files, total_files)
|
|
473
|
-
|
|
474
|
-
if not results:
|
|
475
|
-
if input_path.is_file():
|
|
476
|
-
return f"No matches found for pattern '{pattern}' in file: {path}"
|
|
477
|
-
else:
|
|
478
|
-
return f"No matches found for pattern '{pattern}' in files matching '{include_pattern or '*'}' in directory: {path}"
|
|
479
|
-
|
|
480
|
-
await tool_ctx.info(
|
|
481
|
-
f"Found {matches_found} matches in {files_processed} file{'s' if files_processed > 1 else ''}"
|
|
482
|
-
)
|
|
483
|
-
return (
|
|
484
|
-
f"Found {matches_found} matches in {files_processed} file{'s' if files_processed > 1 else ''}:\n\n"
|
|
485
|
-
+ "\n".join(results)
|
|
486
|
-
)
|
|
487
|
-
except Exception as e:
|
|
488
|
-
await tool_ctx.error(f"Error searching file contents: {str(e)}")
|
|
489
|
-
return f"Error searching file contents: {str(e)}"
|
|
490
|
-
|
|
491
|
-
def _analyze_pattern(self, pattern: str) -> Dict[str, bool]:
|
|
492
|
-
"""Analyze the pattern to determine optimal search strategies."""
|
|
493
|
-
# Check if pattern looks like regex
|
|
494
|
-
regex_chars = r"[.*+?^${}()|[\]\\]"
|
|
495
|
-
has_regex = bool(re.search(regex_chars, pattern))
|
|
496
|
-
|
|
497
|
-
# Check if pattern looks like a symbol name
|
|
498
|
-
is_symbol = bool(re.match(r"^[a-zA-Z_][a-zA-Z0-9_]*$", pattern))
|
|
499
|
-
|
|
500
|
-
# Check if pattern is natural language
|
|
501
|
-
words = pattern.split()
|
|
502
|
-
is_natural_language = len(words) > 2 and not has_regex
|
|
503
|
-
|
|
504
|
-
return {
|
|
505
|
-
"use_grep": True, # Always useful
|
|
506
|
-
"use_grep_ast": not has_regex, # AST doesn't handle regex well
|
|
507
|
-
"use_vector": is_natural_language or len(pattern) > 10,
|
|
508
|
-
"use_git": True, # Always check history
|
|
509
|
-
"use_symbol": is_symbol or "def" in pattern or "class" in pattern,
|
|
510
|
-
}
|
|
511
|
-
|
|
512
|
-
async def _run_pattern_search(
|
|
513
|
-
self, pattern: str, path: str, include: str, tool_ctx: ToolContext, max_results: Optional[int]
|
|
514
|
-
) -> List[SearchResult]:
|
|
515
|
-
"""Run pattern search using ripgrep or fallback."""
|
|
516
|
-
try:
|
|
517
|
-
if self.is_ripgrep_installed():
|
|
518
|
-
await tool_ctx.info("Using ripgrep for pattern search")
|
|
519
|
-
result = await self.run_ripgrep(pattern, path, tool_ctx, include)
|
|
520
|
-
else:
|
|
521
|
-
await tool_ctx.info("Using fallback implementation for pattern search")
|
|
522
|
-
result = await self.fallback_grep(pattern, path, tool_ctx, include)
|
|
523
|
-
|
|
524
|
-
results = []
|
|
525
|
-
if "Found" in result and "matches" in result:
|
|
526
|
-
lines = result.split("\n")
|
|
527
|
-
for line in lines[2:]: # Skip header
|
|
528
|
-
if ":" in line and line.strip():
|
|
529
|
-
try:
|
|
530
|
-
parts = line.split(":", 2)
|
|
531
|
-
if len(parts) >= 3:
|
|
532
|
-
results.append(
|
|
533
|
-
SearchResult(
|
|
534
|
-
file_path=parts[0],
|
|
535
|
-
line_number=int(parts[1]),
|
|
536
|
-
content=parts[2].strip(),
|
|
537
|
-
search_type=SearchType.GREP,
|
|
538
|
-
score=1.0, # Exact matches get perfect score
|
|
539
|
-
)
|
|
540
|
-
)
|
|
541
|
-
if max_results and len(results) >= max_results:
|
|
542
|
-
break
|
|
543
|
-
except ValueError:
|
|
544
|
-
continue
|
|
545
|
-
|
|
546
|
-
await tool_ctx.info(f"Pattern search found {len(results)} results")
|
|
547
|
-
return results
|
|
548
|
-
|
|
549
|
-
except Exception as e:
|
|
550
|
-
await tool_ctx.error(f"Pattern search failed: {e}")
|
|
551
|
-
return []
|
|
552
|
-
|
|
553
|
-
async def _run_ast_search(
|
|
554
|
-
self, pattern: str, path: str, tool_ctx: ToolContext, max_results: Optional[int]
|
|
555
|
-
) -> List[SearchResult]:
|
|
556
|
-
"""Run AST-aware search."""
|
|
557
|
-
if not self.grep_ast_tool:
|
|
558
|
-
return []
|
|
559
|
-
|
|
560
|
-
try:
|
|
561
|
-
result = await self.grep_ast_tool.call(
|
|
562
|
-
tool_ctx.mcp_context,
|
|
563
|
-
pattern=pattern,
|
|
564
|
-
path=path,
|
|
565
|
-
ignore_case=True,
|
|
566
|
-
line_number=True,
|
|
567
|
-
)
|
|
568
|
-
|
|
569
|
-
results = []
|
|
570
|
-
if result and not result.startswith("No matches"):
|
|
571
|
-
current_file = None
|
|
572
|
-
current_context = []
|
|
573
|
-
|
|
574
|
-
for line in result.split("\n"):
|
|
575
|
-
if line.endswith(":") and "/" in line:
|
|
576
|
-
current_file = line[:-1]
|
|
577
|
-
current_context = []
|
|
578
|
-
elif current_file and ":" in line:
|
|
579
|
-
try:
|
|
580
|
-
# Try to parse line with number
|
|
581
|
-
parts = line.split(":", 1)
|
|
582
|
-
line_num = int(parts[0].strip())
|
|
583
|
-
content = parts[1].strip() if len(parts) > 1 else ""
|
|
584
|
-
|
|
585
|
-
results.append(
|
|
586
|
-
SearchResult(
|
|
587
|
-
file_path=current_file,
|
|
588
|
-
line_number=line_num,
|
|
589
|
-
content=content,
|
|
590
|
-
search_type=SearchType.GREP_AST,
|
|
591
|
-
score=0.95, # High score for AST matches
|
|
592
|
-
context=(" > ".join(current_context) if current_context else None),
|
|
593
|
-
)
|
|
594
|
-
)
|
|
595
|
-
|
|
596
|
-
if max_results and len(results) >= max_results:
|
|
597
|
-
break
|
|
598
|
-
except ValueError:
|
|
599
|
-
# This might be context info
|
|
600
|
-
if line.strip():
|
|
601
|
-
current_context.append(line.strip())
|
|
602
|
-
|
|
603
|
-
await tool_ctx.info(f"AST search found {len(results)} results")
|
|
604
|
-
return results
|
|
605
|
-
|
|
606
|
-
except Exception as e:
|
|
607
|
-
await tool_ctx.error(f"AST search failed: {e}")
|
|
608
|
-
return []
|
|
609
|
-
|
|
610
|
-
async def _run_semantic_search(
|
|
611
|
-
self, pattern: str, path: str, tool_ctx: ToolContext, max_results: Optional[int]
|
|
612
|
-
) -> List[SearchResult]:
|
|
613
|
-
"""Run semantic vector search."""
|
|
614
|
-
if not self.vector_tool:
|
|
615
|
-
return []
|
|
616
|
-
|
|
617
|
-
try:
|
|
618
|
-
# Determine search scope
|
|
619
|
-
search_scope = "current" if path == "." else "all"
|
|
620
|
-
|
|
621
|
-
result = await self.vector_tool.call(
|
|
622
|
-
tool_ctx.mcp_context,
|
|
623
|
-
query=pattern,
|
|
624
|
-
limit=max_results or 50,
|
|
625
|
-
score_threshold=0.3,
|
|
626
|
-
search_scope=search_scope,
|
|
627
|
-
include_content=True,
|
|
628
|
-
)
|
|
629
|
-
|
|
630
|
-
results = []
|
|
631
|
-
if "Found" in result:
|
|
632
|
-
# Parse vector search results
|
|
633
|
-
lines = result.split("\n")
|
|
634
|
-
current_file = None
|
|
635
|
-
current_score = 0.0
|
|
636
|
-
|
|
637
|
-
for line in lines:
|
|
638
|
-
if "Result" in line and "Score:" in line:
|
|
639
|
-
# Extract score and file
|
|
640
|
-
score_match = re.search(r"Score: ([\d.]+)%", line)
|
|
641
|
-
if score_match:
|
|
642
|
-
current_score = float(score_match.group(1)) / 100.0
|
|
643
|
-
|
|
644
|
-
file_match = re.search(r" - ([^\s]+)$", line)
|
|
645
|
-
if file_match:
|
|
646
|
-
current_file = file_match.group(1)
|
|
647
|
-
|
|
648
|
-
elif current_file and line.strip() and not line.startswith("-"):
|
|
649
|
-
# Content line
|
|
650
|
-
results.append(
|
|
651
|
-
SearchResult(
|
|
652
|
-
file_path=current_file,
|
|
653
|
-
line_number=None,
|
|
654
|
-
content=line.strip()[:200], # Limit content length
|
|
655
|
-
search_type=SearchType.VECTOR,
|
|
656
|
-
score=current_score,
|
|
657
|
-
)
|
|
658
|
-
)
|
|
659
|
-
|
|
660
|
-
if max_results and len(results) >= max_results:
|
|
661
|
-
break
|
|
662
|
-
|
|
663
|
-
await tool_ctx.info(f"Semantic search found {len(results)} results")
|
|
664
|
-
return results
|
|
665
|
-
|
|
666
|
-
except Exception as e:
|
|
667
|
-
await tool_ctx.error(f"Semantic search failed: {e}")
|
|
668
|
-
return []
|
|
669
|
-
|
|
670
|
-
async def _run_git_search(
|
|
671
|
-
self, pattern: str, path: str, tool_ctx: ToolContext, max_results: Optional[int]
|
|
672
|
-
) -> List[SearchResult]:
|
|
673
|
-
"""Run git history search."""
|
|
674
|
-
if not self.git_search_tool:
|
|
675
|
-
return []
|
|
676
|
-
|
|
677
|
-
try:
|
|
678
|
-
# Search in both content and commits
|
|
679
|
-
max_per_type = (max_results or 50) // 2
|
|
680
|
-
tasks = [
|
|
681
|
-
self.git_search_tool.call(
|
|
682
|
-
tool_ctx.mcp_context,
|
|
683
|
-
pattern=pattern,
|
|
684
|
-
path=path,
|
|
685
|
-
search_type="content",
|
|
686
|
-
max_count=max_per_type,
|
|
687
|
-
),
|
|
688
|
-
self.git_search_tool.call(
|
|
689
|
-
tool_ctx.mcp_context,
|
|
690
|
-
pattern=pattern,
|
|
691
|
-
path=path,
|
|
692
|
-
search_type="commits",
|
|
693
|
-
max_count=max_per_type,
|
|
694
|
-
),
|
|
695
|
-
]
|
|
696
|
-
|
|
697
|
-
git_results = await asyncio.gather(*tasks, return_exceptions=True)
|
|
698
|
-
|
|
699
|
-
results = []
|
|
700
|
-
for _i, result in enumerate(git_results):
|
|
701
|
-
if isinstance(result, Exception):
|
|
702
|
-
continue
|
|
703
|
-
|
|
704
|
-
if "Found" in result:
|
|
705
|
-
# Parse git results
|
|
706
|
-
lines = result.split("\n")
|
|
707
|
-
for line in lines:
|
|
708
|
-
if ":" in line and line.strip():
|
|
709
|
-
parts = line.split(":", 2)
|
|
710
|
-
if len(parts) >= 2:
|
|
711
|
-
results.append(
|
|
712
|
-
SearchResult(
|
|
713
|
-
file_path=parts[0].strip(),
|
|
714
|
-
line_number=None,
|
|
715
|
-
content=(parts[-1].strip() if len(parts) > 2 else line),
|
|
716
|
-
search_type=SearchType.GIT,
|
|
717
|
-
score=0.8, # Good score for git matches
|
|
718
|
-
)
|
|
719
|
-
)
|
|
720
|
-
|
|
721
|
-
if max_results and len(results) >= max_results:
|
|
722
|
-
break
|
|
723
|
-
|
|
724
|
-
await tool_ctx.info(f"Git search found {len(results)} results")
|
|
725
|
-
return results
|
|
726
|
-
|
|
727
|
-
except Exception as e:
|
|
728
|
-
await tool_ctx.error(f"Git search failed: {e}")
|
|
729
|
-
return []
|
|
730
|
-
|
|
731
|
-
async def _run_symbol_search(
|
|
732
|
-
self, pattern: str, path: str, tool_ctx: ToolContext, max_results: Optional[int]
|
|
733
|
-
) -> List[SearchResult]:
|
|
734
|
-
"""Search for symbol definitions using grep with specific patterns."""
|
|
735
|
-
try:
|
|
736
|
-
# Create patterns for common symbol definitions
|
|
737
|
-
symbol_patterns = [
|
|
738
|
-
f"(def|class|function|func|fn)\\s+{pattern}", # Python, JS, various
|
|
739
|
-
f"(public|private|protected)?\\s*(static)?\\s*\\w+\\s+{pattern}\\s*\\(", # Java/C++
|
|
740
|
-
f"const\\s+{pattern}\\s*=", # JS/TS const
|
|
741
|
-
f"let\\s+{pattern}\\s*=", # JS/TS let
|
|
742
|
-
f"var\\s+{pattern}\\s*=", # JS/TS var
|
|
743
|
-
]
|
|
744
|
-
|
|
745
|
-
# Run pattern searches for each symbol pattern
|
|
746
|
-
all_results = []
|
|
747
|
-
max_per_pattern = (max_results or 50) // len(symbol_patterns)
|
|
748
|
-
|
|
749
|
-
for sp in symbol_patterns:
|
|
750
|
-
pattern_results = await self._run_pattern_search(
|
|
751
|
-
sp, path, "*", tool_ctx, max_per_pattern
|
|
752
|
-
)
|
|
753
|
-
# Convert to symbol type
|
|
754
|
-
for result in pattern_results:
|
|
755
|
-
result.search_type = SearchType.SYMBOL
|
|
756
|
-
result.score = 0.98 # Very high score for symbol definitions
|
|
757
|
-
all_results.extend(pattern_results)
|
|
758
|
-
|
|
759
|
-
await tool_ctx.info(f"Symbol search found {len(all_results)} results")
|
|
760
|
-
return all_results
|
|
761
|
-
|
|
762
|
-
except Exception as e:
|
|
763
|
-
await tool_ctx.error(f"Symbol search failed: {e}")
|
|
764
|
-
return []
|
|
765
|
-
|
|
766
|
-
def _deduplicate_results(self, all_results: List[SearchResult]) -> List[SearchResult]:
|
|
767
|
-
"""Deduplicate results, keeping the highest scoring version."""
|
|
768
|
-
seen = {}
|
|
769
|
-
|
|
770
|
-
for result in all_results:
|
|
771
|
-
key = (result.file_path, result.line_number)
|
|
772
|
-
|
|
773
|
-
if key not in seen or result.score > seen[key].score:
|
|
774
|
-
seen[key] = result
|
|
775
|
-
elif key in seen and result.context and not seen[key].context:
|
|
776
|
-
# Add context if missing
|
|
777
|
-
seen[key].context = result.context
|
|
778
|
-
|
|
779
|
-
return list(seen.values())
|
|
780
|
-
|
|
781
|
-
def _rank_results(self, results: List[SearchResult]) -> List[SearchResult]:
|
|
782
|
-
"""Rank results by relevance score and search type priority."""
|
|
783
|
-
# Define search type priorities
|
|
784
|
-
type_priority = {
|
|
785
|
-
SearchType.SYMBOL: 5,
|
|
786
|
-
SearchType.GREP: 4,
|
|
787
|
-
SearchType.GREP_AST: 3,
|
|
788
|
-
SearchType.GIT: 2,
|
|
789
|
-
SearchType.VECTOR: 1,
|
|
790
|
-
}
|
|
791
|
-
|
|
792
|
-
# Sort by score (descending) and then by type priority
|
|
793
|
-
results.sort(key=lambda r: (r.score, type_priority.get(r.search_type, 0)), reverse=True)
|
|
794
|
-
|
|
795
|
-
return results
|
|
796
|
-
|
|
797
|
-
def _format_results(
|
|
798
|
-
self,
|
|
799
|
-
query: str,
|
|
800
|
-
results: List[SearchResult],
|
|
801
|
-
results_by_type: Dict[SearchType, List[SearchResult]],
|
|
802
|
-
search_time_ms: float,
|
|
803
|
-
strategy: str,
|
|
804
|
-
) -> str:
|
|
805
|
-
"""Format search results for display."""
|
|
806
|
-
output = []
|
|
807
|
-
|
|
808
|
-
# Header
|
|
809
|
-
output.append(f"=== Search Results ===")
|
|
810
|
-
output.append(f"Query: '{query}'")
|
|
811
|
-
output.append(f"Strategy: {strategy}")
|
|
812
|
-
output.append(f"Total results: {len(results)}")
|
|
813
|
-
output.append(f"Search time: {search_time_ms:.1f}ms")
|
|
814
|
-
|
|
815
|
-
# Summary by type
|
|
816
|
-
output.append("\nResults by type:")
|
|
817
|
-
for search_type, type_results in results_by_type.items():
|
|
818
|
-
if type_results:
|
|
819
|
-
output.append(f" {search_type.value}: {len(type_results)} matches")
|
|
820
|
-
|
|
821
|
-
if not results:
|
|
822
|
-
output.append("\nNo results found.")
|
|
823
|
-
return "\n".join(output)
|
|
824
|
-
|
|
825
|
-
# Group results by file
|
|
826
|
-
results_by_file = {}
|
|
827
|
-
for result in results:
|
|
828
|
-
if result.file_path not in results_by_file:
|
|
829
|
-
results_by_file[result.file_path] = []
|
|
830
|
-
results_by_file[result.file_path].append(result)
|
|
831
|
-
|
|
832
|
-
# Display results
|
|
833
|
-
output.append(f"\n=== Results ({len(results)} total) ===\n")
|
|
834
|
-
|
|
835
|
-
for file_path, file_results in results_by_file.items():
|
|
836
|
-
output.append(f"{file_path}")
|
|
837
|
-
output.append("-" * len(file_path))
|
|
838
|
-
|
|
839
|
-
# Sort by line number
|
|
840
|
-
file_results.sort(key=lambda r: r.line_number or 0)
|
|
841
|
-
|
|
842
|
-
for result in file_results:
|
|
843
|
-
# Format result line
|
|
844
|
-
score_str = f"[{result.search_type.value} {result.score:.2f}]"
|
|
845
|
-
|
|
846
|
-
if result.line_number:
|
|
847
|
-
output.append(f" {result.line_number:>4}: {score_str} {result.content}")
|
|
848
|
-
else:
|
|
849
|
-
output.append(f" {score_str} {result.content}")
|
|
850
|
-
|
|
851
|
-
# Add context if available
|
|
852
|
-
if result.context:
|
|
853
|
-
output.append(f" Context: {result.context}")
|
|
854
|
-
|
|
855
|
-
output.append("") # Empty line between files
|
|
856
|
-
|
|
857
|
-
return "\n".join(output)
|
|
858
|
-
|
|
859
|
-
async def run_unified_search(
|
|
860
|
-
self,
|
|
861
|
-
query: str,
|
|
862
|
-
path: str,
|
|
863
|
-
strategy: str,
|
|
864
|
-
include: str,
|
|
865
|
-
max_results: Optional[int],
|
|
866
|
-
tool_ctx: ToolContext,
|
|
867
|
-
) -> str:
|
|
868
|
-
"""Run unified search with specified strategy."""
|
|
869
|
-
import time
|
|
870
|
-
start_time = time.time()
|
|
871
|
-
|
|
872
|
-
await tool_ctx.info(f"Starting {strategy} search for '{query}' in {path}")
|
|
873
|
-
|
|
874
|
-
# Determine which searches to run based on strategy
|
|
875
|
-
search_tasks = []
|
|
876
|
-
search_names = []
|
|
877
|
-
|
|
878
|
-
if strategy == "pattern":
|
|
879
|
-
search_tasks.append(self._run_pattern_search(query, path, include, tool_ctx, max_results))
|
|
880
|
-
search_names.append("pattern")
|
|
881
|
-
elif strategy == "ast":
|
|
882
|
-
if self.grep_ast_tool:
|
|
883
|
-
search_tasks.append(self._run_ast_search(query, path, tool_ctx, max_results))
|
|
884
|
-
search_names.append("ast")
|
|
885
|
-
else:
|
|
886
|
-
await tool_ctx.warning("AST search not available, falling back to pattern search")
|
|
887
|
-
search_tasks.append(self._run_pattern_search(query, path, include, tool_ctx, max_results))
|
|
888
|
-
search_names.append("pattern")
|
|
889
|
-
elif strategy == "semantic":
|
|
890
|
-
if self.vector_tool:
|
|
891
|
-
search_tasks.append(self._run_semantic_search(query, path, tool_ctx, max_results))
|
|
892
|
-
search_names.append("semantic")
|
|
893
|
-
else:
|
|
894
|
-
await tool_ctx.warning("Semantic search not available, falling back to pattern search")
|
|
895
|
-
search_tasks.append(self._run_pattern_search(query, path, include, tool_ctx, max_results))
|
|
896
|
-
search_names.append("pattern")
|
|
897
|
-
elif strategy == "git":
|
|
898
|
-
if self.git_search_tool:
|
|
899
|
-
search_tasks.append(self._run_git_search(query, path, tool_ctx, max_results))
|
|
900
|
-
search_names.append("git")
|
|
901
|
-
else:
|
|
902
|
-
await tool_ctx.warning("Git search not available, falling back to pattern search")
|
|
903
|
-
search_tasks.append(self._run_pattern_search(query, path, include, tool_ctx, max_results))
|
|
904
|
-
search_names.append("pattern")
|
|
905
|
-
elif strategy == "all":
|
|
906
|
-
# Analyze pattern to determine best strategies
|
|
907
|
-
pattern_analysis = self._analyze_pattern(query)
|
|
908
|
-
|
|
909
|
-
if pattern_analysis["use_grep"]:
|
|
910
|
-
search_tasks.append(self._run_pattern_search(query, path, include, tool_ctx, max_results))
|
|
911
|
-
search_names.append("pattern")
|
|
912
|
-
|
|
913
|
-
if pattern_analysis["use_grep_ast"] and self.grep_ast_tool:
|
|
914
|
-
search_tasks.append(self._run_ast_search(query, path, tool_ctx, max_results))
|
|
915
|
-
search_names.append("ast")
|
|
916
|
-
|
|
917
|
-
if pattern_analysis["use_vector"] and self.vector_tool:
|
|
918
|
-
search_tasks.append(self._run_semantic_search(query, path, tool_ctx, max_results))
|
|
919
|
-
search_names.append("semantic")
|
|
920
|
-
|
|
921
|
-
if pattern_analysis["use_git"] and self.git_search_tool:
|
|
922
|
-
search_tasks.append(self._run_git_search(query, path, tool_ctx, max_results))
|
|
923
|
-
search_names.append("git")
|
|
924
|
-
|
|
925
|
-
if pattern_analysis["use_symbol"]:
|
|
926
|
-
search_tasks.append(self._run_symbol_search(query, path, tool_ctx, max_results))
|
|
927
|
-
search_names.append("symbol")
|
|
928
|
-
|
|
929
|
-
await tool_ctx.info(f"Running {len(search_tasks)} search types: {', '.join(search_names)}")
|
|
930
|
-
|
|
931
|
-
# Run all searches
|
|
932
|
-
search_results = await asyncio.gather(*search_tasks, return_exceptions=True)
|
|
933
|
-
|
|
934
|
-
# Collect all results
|
|
935
|
-
all_results = []
|
|
936
|
-
results_by_type = {}
|
|
937
|
-
|
|
938
|
-
for search_type, results in zip(search_names, search_results):
|
|
939
|
-
if isinstance(results, Exception):
|
|
940
|
-
await tool_ctx.error(f"{search_type} search failed: {results}")
|
|
941
|
-
results_by_type[SearchType(search_type)] = []
|
|
942
|
-
else:
|
|
943
|
-
# Map search names to SearchType enum
|
|
944
|
-
search_type_enum = {
|
|
945
|
-
"pattern": SearchType.GREP,
|
|
946
|
-
"ast": SearchType.GREP_AST,
|
|
947
|
-
"semantic": SearchType.VECTOR,
|
|
948
|
-
"git": SearchType.GIT,
|
|
949
|
-
"symbol": SearchType.SYMBOL,
|
|
950
|
-
}.get(search_type, SearchType.GREP)
|
|
951
|
-
|
|
952
|
-
results_by_type[search_type_enum] = results
|
|
953
|
-
all_results.extend(results)
|
|
954
|
-
|
|
955
|
-
# Deduplicate and rank results
|
|
956
|
-
unique_results = self._deduplicate_results(all_results)
|
|
957
|
-
ranked_results = self._rank_results(unique_results)
|
|
958
|
-
|
|
959
|
-
# Limit total results
|
|
960
|
-
if max_results:
|
|
961
|
-
final_results = ranked_results[:max_results]
|
|
962
|
-
else:
|
|
963
|
-
final_results = ranked_results
|
|
964
|
-
|
|
965
|
-
# Calculate search time
|
|
966
|
-
search_time = (time.time() - start_time) * 1000
|
|
967
|
-
|
|
968
|
-
# Format output
|
|
969
|
-
return self._format_results(
|
|
970
|
-
query=query,
|
|
971
|
-
results=final_results,
|
|
972
|
-
results_by_type=results_by_type,
|
|
973
|
-
search_time_ms=search_time,
|
|
974
|
-
strategy=strategy,
|
|
975
|
-
)
|
|
976
|
-
|
|
977
|
-
@override
|
|
978
|
-
async def call(
|
|
979
|
-
self,
|
|
980
|
-
ctx: MCPContext,
|
|
981
|
-
**params: Unpack[Union[SearchParams, GrepToolParams]],
|
|
982
|
-
) -> str:
|
|
983
|
-
"""Execute the search tool with the given parameters."""
|
|
984
|
-
tool_ctx = self.create_tool_context(ctx)
|
|
985
|
-
|
|
986
|
-
# Handle both new and legacy parameter formats
|
|
987
|
-
if "query" in params:
|
|
988
|
-
# New unified search parameters
|
|
989
|
-
query = params["query"]
|
|
990
|
-
path = params.get("path", ".")
|
|
991
|
-
strategy = params.get("strategy", "pattern")
|
|
992
|
-
batch = params.get("batch")
|
|
993
|
-
include = params.get("include", "*")
|
|
994
|
-
context_lines = params.get("context_lines", 2)
|
|
995
|
-
parallel = params.get("parallel", False)
|
|
996
|
-
max_results = params.get("max_results")
|
|
997
|
-
else:
|
|
998
|
-
# Legacy grep parameters
|
|
999
|
-
query = params.get("pattern")
|
|
1000
|
-
path = params.get("path", ".")
|
|
1001
|
-
strategy = "pattern"
|
|
1002
|
-
batch = None
|
|
1003
|
-
include = params.get("include", "*")
|
|
1004
|
-
context_lines = 2
|
|
1005
|
-
parallel = False
|
|
1006
|
-
max_results = None
|
|
1007
|
-
|
|
1008
|
-
# Expand path (handles ~, $HOME, etc.)
|
|
1009
|
-
path = self.expand_path(path)
|
|
1010
|
-
|
|
1011
|
-
# Validate required parameters
|
|
1012
|
-
if query is None:
|
|
1013
|
-
await tool_ctx.error("Parameter 'query' or 'pattern' is required but was None")
|
|
1014
|
-
return "Error: Parameter 'query' or 'pattern' is required but was None"
|
|
1015
|
-
|
|
1016
|
-
# Validate path
|
|
1017
|
-
path_validation = self.validate_path(path)
|
|
1018
|
-
if path_validation.is_error:
|
|
1019
|
-
await tool_ctx.error(path_validation.error_message)
|
|
1020
|
-
return f"Error: {path_validation.error_message}"
|
|
1021
|
-
|
|
1022
|
-
# Check if path is allowed
|
|
1023
|
-
allowed, error_msg = await self.check_path_allowed(path, tool_ctx)
|
|
1024
|
-
if not allowed:
|
|
1025
|
-
return error_msg
|
|
1026
|
-
|
|
1027
|
-
# Check if path exists
|
|
1028
|
-
exists, error_msg = await self.check_path_exists(path, tool_ctx)
|
|
1029
|
-
if not exists:
|
|
1030
|
-
return error_msg
|
|
1031
|
-
|
|
1032
|
-
# Handle batch queries
|
|
1033
|
-
if batch:
|
|
1034
|
-
# Run all queries including the main one
|
|
1035
|
-
all_queries = [query] + batch
|
|
1036
|
-
|
|
1037
|
-
if parallel:
|
|
1038
|
-
# Run all queries in parallel
|
|
1039
|
-
await tool_ctx.info(f"Running {len(all_queries)} queries in parallel")
|
|
1040
|
-
tasks = [
|
|
1041
|
-
self.run_unified_search(q, path, strategy, include, max_results, tool_ctx)
|
|
1042
|
-
for q in all_queries
|
|
1043
|
-
]
|
|
1044
|
-
batch_results = await asyncio.gather(*tasks, return_exceptions=True)
|
|
1045
|
-
|
|
1046
|
-
# Combine results
|
|
1047
|
-
output = [f"=== Batch Search Results ({len(all_queries)} queries) ===\n"]
|
|
1048
|
-
for i, (q, result) in enumerate(zip(all_queries, batch_results)):
|
|
1049
|
-
if isinstance(result, Exception):
|
|
1050
|
-
output.append(f"Query {i+1} '{q}' failed: {result}\n")
|
|
1051
|
-
else:
|
|
1052
|
-
output.append(f"Query {i+1}: {result}\n")
|
|
1053
|
-
output.append("="*80 + "\n")
|
|
1054
|
-
|
|
1055
|
-
return "\n".join(output)
|
|
1056
|
-
else:
|
|
1057
|
-
# Run queries sequentially
|
|
1058
|
-
await tool_ctx.info(f"Running {len(all_queries)} queries sequentially")
|
|
1059
|
-
output = [f"=== Batch Search Results ({len(all_queries)} queries) ===\n"]
|
|
1060
|
-
|
|
1061
|
-
for i, q in enumerate(all_queries):
|
|
1062
|
-
result = await self.run_unified_search(q, path, strategy, include, max_results, tool_ctx)
|
|
1063
|
-
output.append(f"Query {i+1}: {result}\n")
|
|
1064
|
-
output.append("="*80 + "\n")
|
|
1065
|
-
|
|
1066
|
-
return "\n".join(output)
|
|
1067
|
-
else:
|
|
1068
|
-
# Single query
|
|
1069
|
-
result = await self.run_unified_search(query, path, strategy, include, max_results, tool_ctx)
|
|
1070
|
-
return truncate_response(
|
|
1071
|
-
result,
|
|
1072
|
-
max_tokens=25000,
|
|
1073
|
-
truncation_message="\n\n[Search results truncated due to token limit. Use more specific patterns or limit max_results.]",
|
|
1074
|
-
)
|
|
1075
|
-
|
|
1076
|
-
@override
|
|
1077
|
-
def register(self, mcp_server: FastMCP) -> None:
|
|
1078
|
-
"""Register this search tool with the MCP server."""
|
|
1079
|
-
tool_self = self
|
|
1080
|
-
|
|
1081
|
-
@mcp_server.tool(name=self.name, description=self.description)
|
|
1082
|
-
async def search(
|
|
1083
|
-
ctx: MCPContext,
|
|
1084
|
-
query: Query,
|
|
1085
|
-
path: SearchPath = ".",
|
|
1086
|
-
strategy: Strategy = "pattern",
|
|
1087
|
-
batch: Batch = None,
|
|
1088
|
-
include: Include = "*",
|
|
1089
|
-
context_lines: ContextLines = 2,
|
|
1090
|
-
parallel: Parallel = False,
|
|
1091
|
-
max_results: MaxResults = None,
|
|
1092
|
-
) -> str:
|
|
1093
|
-
return await tool_self.call(
|
|
1094
|
-
ctx,
|
|
1095
|
-
query=query,
|
|
1096
|
-
path=path,
|
|
1097
|
-
strategy=strategy,
|
|
1098
|
-
batch=batch,
|
|
1099
|
-
include=include,
|
|
1100
|
-
context_lines=context_lines,
|
|
1101
|
-
parallel=parallel,
|
|
1102
|
-
max_results=max_results,
|
|
1103
|
-
)
|
|
1104
|
-
|
|
1105
|
-
|
|
1106
|
-
# Legacy alias for backward compatibility
|
|
1107
|
-
class Grep(UnifiedSearchTool):
|
|
1108
|
-
"""Legacy grep tool - alias for unified search with pattern strategy."""
|
|
1109
|
-
|
|
1110
|
-
@property
|
|
1111
|
-
@override
|
|
1112
|
-
def name(self) -> str:
|
|
1113
|
-
"""Get the tool name."""
|
|
1114
|
-
return "grep"
|
|
1115
|
-
|
|
1116
|
-
@property
|
|
1117
|
-
@override
|
|
1118
|
-
def description(self) -> str:
|
|
1119
|
-
"""Get the tool description."""
|
|
1120
|
-
return """Fast content search tool that works with any codebase size.
|
|
1121
|
-
Searches file contents using regular expressions.
|
|
1122
|
-
Supports full regex syntax (eg. "log.*Error", "function\\s+\\w+", etc.).
|
|
1123
|
-
Filter files by pattern with the include parameter (eg. "*.js", "*.{ts,tsx}").
|
|
1124
|
-
Returns matching file paths sorted by modification time.
|
|
1125
|
-
Use this tool when you need to find files containing specific patterns.
|
|
1126
|
-
When you are doing an open ended search that may require multiple rounds of globbing and grepping, use the Agent tool instead."""
|
|
1127
|
-
|
|
1128
|
-
@override
|
|
1129
|
-
def register(self, mcp_server: FastMCP) -> None:
|
|
1130
|
-
"""Register this grep tool with the MCP server."""
|
|
1131
|
-
tool_self = self
|
|
1132
|
-
|
|
1133
|
-
@mcp_server.tool(name=self.name, description=self.description)
|
|
1134
|
-
async def grep(
|
|
1135
|
-
ctx: MCPContext,
|
|
1136
|
-
pattern: Pattern,
|
|
1137
|
-
path: SearchPath = ".",
|
|
1138
|
-
include: Include = "*",
|
|
1139
|
-
) -> str:
|
|
1140
|
-
# Map legacy parameters to new format
|
|
1141
|
-
return await tool_self.call(ctx, pattern=pattern, path=path, include=include)
|
|
1142
|
-
|
|
1143
|
-
|
|
1144
|
-
# Factory functions for easy instantiation
|
|
1145
|
-
def create_unified_search_tool(permission_manager=None, project_manager=None):
|
|
1146
|
-
"""Create a unified search tool instance."""
|
|
1147
|
-
if permission_manager is None:
|
|
1148
|
-
from hanzo_mcp.tools.common.permissions import PermissionManager
|
|
1149
|
-
permission_manager = PermissionManager()
|
|
1150
|
-
|
|
1151
|
-
return UnifiedSearchTool(permission_manager, project_manager)
|
|
1152
|
-
|
|
1153
|
-
|
|
1154
|
-
def create_grep_tool(permission_manager=None):
|
|
1155
|
-
"""Create a legacy grep tool instance."""
|
|
1156
|
-
if permission_manager is None:
|
|
1157
|
-
from hanzo_mcp.tools.common.permissions import PermissionManager
|
|
1158
|
-
permission_manager = PermissionManager()
|
|
1159
|
-
|
|
1160
|
-
return Grep(permission_manager)
|