hanzo-mcp 0.7.6__py3-none-any.whl → 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hanzo-mcp might be problematic. Click here for more details.
- hanzo_mcp/__init__.py +7 -1
- hanzo_mcp/__main__.py +1 -1
- hanzo_mcp/analytics/__init__.py +2 -2
- hanzo_mcp/analytics/posthog_analytics.py +76 -82
- hanzo_mcp/cli.py +31 -36
- hanzo_mcp/cli_enhanced.py +94 -72
- hanzo_mcp/cli_plugin.py +27 -17
- hanzo_mcp/config/__init__.py +2 -2
- hanzo_mcp/config/settings.py +112 -88
- hanzo_mcp/config/tool_config.py +32 -34
- hanzo_mcp/dev_server.py +66 -67
- hanzo_mcp/prompts/__init__.py +94 -12
- hanzo_mcp/prompts/enhanced_prompts.py +809 -0
- hanzo_mcp/prompts/example_custom_prompt.py +6 -5
- hanzo_mcp/prompts/project_todo_reminder.py +0 -1
- hanzo_mcp/prompts/tool_explorer.py +10 -7
- hanzo_mcp/server.py +17 -21
- hanzo_mcp/server_enhanced.py +15 -22
- hanzo_mcp/tools/__init__.py +56 -28
- hanzo_mcp/tools/agent/__init__.py +16 -19
- hanzo_mcp/tools/agent/agent.py +82 -65
- hanzo_mcp/tools/agent/agent_tool.py +152 -122
- hanzo_mcp/tools/agent/agent_tool_v1_deprecated.py +66 -62
- hanzo_mcp/tools/agent/clarification_protocol.py +55 -50
- hanzo_mcp/tools/agent/clarification_tool.py +11 -10
- hanzo_mcp/tools/agent/claude_cli_tool.py +21 -20
- hanzo_mcp/tools/agent/claude_desktop_auth.py +130 -144
- hanzo_mcp/tools/agent/cli_agent_base.py +59 -53
- hanzo_mcp/tools/agent/code_auth.py +102 -107
- hanzo_mcp/tools/agent/code_auth_tool.py +28 -27
- hanzo_mcp/tools/agent/codex_cli_tool.py +20 -19
- hanzo_mcp/tools/agent/critic_tool.py +86 -73
- hanzo_mcp/tools/agent/gemini_cli_tool.py +21 -20
- hanzo_mcp/tools/agent/grok_cli_tool.py +21 -20
- hanzo_mcp/tools/agent/iching_tool.py +404 -139
- hanzo_mcp/tools/agent/network_tool.py +89 -73
- hanzo_mcp/tools/agent/prompt.py +2 -1
- hanzo_mcp/tools/agent/review_tool.py +101 -98
- hanzo_mcp/tools/agent/swarm_alias.py +87 -0
- hanzo_mcp/tools/agent/swarm_tool.py +246 -161
- hanzo_mcp/tools/agent/swarm_tool_v1_deprecated.py +134 -92
- hanzo_mcp/tools/agent/tool_adapter.py +21 -11
- hanzo_mcp/tools/common/__init__.py +1 -1
- hanzo_mcp/tools/common/base.py +3 -5
- hanzo_mcp/tools/common/batch_tool.py +46 -39
- hanzo_mcp/tools/common/config_tool.py +120 -84
- hanzo_mcp/tools/common/context.py +1 -5
- hanzo_mcp/tools/common/context_fix.py +5 -3
- hanzo_mcp/tools/common/critic_tool.py +4 -8
- hanzo_mcp/tools/common/decorators.py +58 -56
- hanzo_mcp/tools/common/enhanced_base.py +29 -32
- hanzo_mcp/tools/common/fastmcp_pagination.py +91 -94
- hanzo_mcp/tools/common/forgiving_edit.py +91 -87
- hanzo_mcp/tools/common/mode.py +15 -17
- hanzo_mcp/tools/common/mode_loader.py +27 -24
- hanzo_mcp/tools/common/paginated_base.py +61 -53
- hanzo_mcp/tools/common/paginated_response.py +72 -79
- hanzo_mcp/tools/common/pagination.py +50 -53
- hanzo_mcp/tools/common/permissions.py +4 -4
- hanzo_mcp/tools/common/personality.py +186 -138
- hanzo_mcp/tools/common/plugin_loader.py +54 -54
- hanzo_mcp/tools/common/stats.py +65 -47
- hanzo_mcp/tools/common/test_helpers.py +31 -0
- hanzo_mcp/tools/common/thinking_tool.py +4 -8
- hanzo_mcp/tools/common/tool_disable.py +17 -12
- hanzo_mcp/tools/common/tool_enable.py +13 -14
- hanzo_mcp/tools/common/tool_list.py +36 -28
- hanzo_mcp/tools/common/truncate.py +23 -23
- hanzo_mcp/tools/config/__init__.py +4 -4
- hanzo_mcp/tools/config/config_tool.py +42 -29
- hanzo_mcp/tools/config/index_config.py +37 -34
- hanzo_mcp/tools/config/mode_tool.py +175 -55
- hanzo_mcp/tools/database/__init__.py +15 -12
- hanzo_mcp/tools/database/database_manager.py +77 -75
- hanzo_mcp/tools/database/graph.py +137 -91
- hanzo_mcp/tools/database/graph_add.py +30 -18
- hanzo_mcp/tools/database/graph_query.py +178 -102
- hanzo_mcp/tools/database/graph_remove.py +33 -28
- hanzo_mcp/tools/database/graph_search.py +97 -75
- hanzo_mcp/tools/database/graph_stats.py +91 -59
- hanzo_mcp/tools/database/sql.py +107 -79
- hanzo_mcp/tools/database/sql_query.py +30 -24
- hanzo_mcp/tools/database/sql_search.py +29 -25
- hanzo_mcp/tools/database/sql_stats.py +47 -35
- hanzo_mcp/tools/editor/neovim_command.py +25 -28
- hanzo_mcp/tools/editor/neovim_edit.py +21 -23
- hanzo_mcp/tools/editor/neovim_session.py +60 -54
- hanzo_mcp/tools/filesystem/__init__.py +31 -30
- hanzo_mcp/tools/filesystem/ast_multi_edit.py +329 -249
- hanzo_mcp/tools/filesystem/ast_tool.py +4 -4
- hanzo_mcp/tools/filesystem/base.py +1 -1
- hanzo_mcp/tools/filesystem/batch_search.py +316 -224
- hanzo_mcp/tools/filesystem/content_replace.py +4 -4
- hanzo_mcp/tools/filesystem/diff.py +71 -59
- hanzo_mcp/tools/filesystem/directory_tree.py +7 -7
- hanzo_mcp/tools/filesystem/directory_tree_paginated.py +49 -37
- hanzo_mcp/tools/filesystem/edit.py +4 -4
- hanzo_mcp/tools/filesystem/find.py +173 -80
- hanzo_mcp/tools/filesystem/find_files.py +73 -52
- hanzo_mcp/tools/filesystem/git_search.py +157 -104
- hanzo_mcp/tools/filesystem/grep.py +8 -8
- hanzo_mcp/tools/filesystem/multi_edit.py +4 -8
- hanzo_mcp/tools/filesystem/read.py +12 -10
- hanzo_mcp/tools/filesystem/rules_tool.py +59 -43
- hanzo_mcp/tools/filesystem/search_tool.py +263 -207
- hanzo_mcp/tools/filesystem/symbols_tool.py +94 -54
- hanzo_mcp/tools/filesystem/tree.py +35 -33
- hanzo_mcp/tools/filesystem/unix_aliases.py +13 -18
- hanzo_mcp/tools/filesystem/watch.py +37 -36
- hanzo_mcp/tools/filesystem/write.py +4 -8
- hanzo_mcp/tools/jupyter/__init__.py +4 -4
- hanzo_mcp/tools/jupyter/base.py +4 -5
- hanzo_mcp/tools/jupyter/jupyter.py +67 -47
- hanzo_mcp/tools/jupyter/notebook_edit.py +4 -4
- hanzo_mcp/tools/jupyter/notebook_read.py +4 -7
- hanzo_mcp/tools/llm/__init__.py +5 -7
- hanzo_mcp/tools/llm/consensus_tool.py +72 -52
- hanzo_mcp/tools/llm/llm_manage.py +101 -60
- hanzo_mcp/tools/llm/llm_tool.py +226 -166
- hanzo_mcp/tools/llm/provider_tools.py +25 -26
- hanzo_mcp/tools/lsp/__init__.py +1 -1
- hanzo_mcp/tools/lsp/lsp_tool.py +228 -143
- hanzo_mcp/tools/mcp/__init__.py +2 -3
- hanzo_mcp/tools/mcp/mcp_add.py +27 -25
- hanzo_mcp/tools/mcp/mcp_remove.py +7 -8
- hanzo_mcp/tools/mcp/mcp_stats.py +23 -22
- hanzo_mcp/tools/mcp/mcp_tool.py +129 -98
- hanzo_mcp/tools/memory/__init__.py +39 -21
- hanzo_mcp/tools/memory/knowledge_tools.py +124 -99
- hanzo_mcp/tools/memory/memory_tools.py +90 -108
- hanzo_mcp/tools/search/__init__.py +7 -2
- hanzo_mcp/tools/search/find_tool.py +297 -212
- hanzo_mcp/tools/search/unified_search.py +366 -314
- hanzo_mcp/tools/shell/__init__.py +8 -7
- hanzo_mcp/tools/shell/auto_background.py +56 -49
- hanzo_mcp/tools/shell/base.py +1 -1
- hanzo_mcp/tools/shell/base_process.py +75 -75
- hanzo_mcp/tools/shell/bash_session.py +2 -2
- hanzo_mcp/tools/shell/bash_session_executor.py +4 -4
- hanzo_mcp/tools/shell/bash_tool.py +24 -31
- hanzo_mcp/tools/shell/command_executor.py +12 -12
- hanzo_mcp/tools/shell/logs.py +43 -33
- hanzo_mcp/tools/shell/npx.py +13 -13
- hanzo_mcp/tools/shell/npx_background.py +24 -21
- hanzo_mcp/tools/shell/npx_tool.py +18 -22
- hanzo_mcp/tools/shell/open.py +19 -21
- hanzo_mcp/tools/shell/pkill.py +31 -26
- hanzo_mcp/tools/shell/process_tool.py +32 -32
- hanzo_mcp/tools/shell/processes.py +57 -58
- hanzo_mcp/tools/shell/run_background.py +24 -25
- hanzo_mcp/tools/shell/run_command.py +5 -5
- hanzo_mcp/tools/shell/run_command_windows.py +5 -5
- hanzo_mcp/tools/shell/session_storage.py +3 -3
- hanzo_mcp/tools/shell/streaming_command.py +141 -126
- hanzo_mcp/tools/shell/uvx.py +24 -25
- hanzo_mcp/tools/shell/uvx_background.py +35 -33
- hanzo_mcp/tools/shell/uvx_tool.py +18 -22
- hanzo_mcp/tools/todo/__init__.py +6 -2
- hanzo_mcp/tools/todo/todo.py +50 -37
- hanzo_mcp/tools/todo/todo_read.py +5 -8
- hanzo_mcp/tools/todo/todo_write.py +5 -7
- hanzo_mcp/tools/vector/__init__.py +40 -28
- hanzo_mcp/tools/vector/ast_analyzer.py +176 -143
- hanzo_mcp/tools/vector/git_ingester.py +170 -179
- hanzo_mcp/tools/vector/index_tool.py +96 -44
- hanzo_mcp/tools/vector/infinity_store.py +283 -228
- hanzo_mcp/tools/vector/mock_infinity.py +39 -40
- hanzo_mcp/tools/vector/project_manager.py +88 -78
- hanzo_mcp/tools/vector/vector.py +59 -42
- hanzo_mcp/tools/vector/vector_index.py +30 -27
- hanzo_mcp/tools/vector/vector_search.py +64 -45
- hanzo_mcp/types.py +6 -4
- {hanzo_mcp-0.7.6.dist-info → hanzo_mcp-0.8.0.dist-info}/METADATA +1 -1
- hanzo_mcp-0.8.0.dist-info/RECORD +185 -0
- hanzo_mcp-0.7.6.dist-info/RECORD +0 -182
- {hanzo_mcp-0.7.6.dist-info → hanzo_mcp-0.8.0.dist-info}/WHEEL +0 -0
- {hanzo_mcp-0.7.6.dist-info → hanzo_mcp-0.8.0.dist-info}/entry_points.txt +0 -0
- {hanzo_mcp-0.7.6.dist-info → hanzo_mcp-0.8.0.dist-info}/top_level.txt +0 -0
|
@@ -4,30 +4,28 @@ This is your main search interface that intelligently combines all available
|
|
|
4
4
|
search capabilities including text, AST, symbols, memory, and semantic search.
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
|
-
import os
|
|
8
|
-
import time
|
|
9
7
|
import json
|
|
8
|
+
import time
|
|
9
|
+
import hashlib
|
|
10
10
|
import subprocess
|
|
11
|
-
from typing import
|
|
11
|
+
from typing import Any, Dict, List, Optional
|
|
12
12
|
from pathlib import Path
|
|
13
13
|
from dataclasses import dataclass
|
|
14
|
-
from collections import defaultdict
|
|
15
|
-
import hashlib
|
|
16
14
|
|
|
17
|
-
from hanzo_mcp.tools.common.base import BaseTool
|
|
18
|
-
from hanzo_mcp.tools.common.paginated_response import AutoPaginatedResponse
|
|
19
|
-
from hanzo_mcp.tools.common.decorators import with_context_normalization
|
|
20
15
|
from hanzo_mcp.types import MCPResourceDocument
|
|
16
|
+
from hanzo_mcp.tools.common.base import BaseTool
|
|
21
17
|
|
|
22
18
|
# Import memory tools if available
|
|
23
19
|
try:
|
|
24
20
|
from hanzo_mcp.tools.memory.memory_tools import KnowledgeRetrieval
|
|
21
|
+
|
|
25
22
|
MEMORY_AVAILABLE = True
|
|
26
23
|
except ImportError:
|
|
27
24
|
MEMORY_AVAILABLE = False
|
|
28
25
|
|
|
29
26
|
try:
|
|
30
27
|
import tree_sitter
|
|
28
|
+
|
|
31
29
|
TREESITTER_AVAILABLE = True
|
|
32
30
|
except ImportError:
|
|
33
31
|
TREESITTER_AVAILABLE = False
|
|
@@ -35,6 +33,7 @@ except ImportError:
|
|
|
35
33
|
try:
|
|
36
34
|
import chromadb
|
|
37
35
|
from sentence_transformers import SentenceTransformer
|
|
36
|
+
|
|
38
37
|
VECTOR_SEARCH_AVAILABLE = True
|
|
39
38
|
except ImportError:
|
|
40
39
|
VECTOR_SEARCH_AVAILABLE = False
|
|
@@ -43,6 +42,7 @@ except ImportError:
|
|
|
43
42
|
@dataclass
|
|
44
43
|
class SearchResult:
|
|
45
44
|
"""Unified search result."""
|
|
45
|
+
|
|
46
46
|
file_path: str
|
|
47
47
|
line_number: int
|
|
48
48
|
column: int
|
|
@@ -53,7 +53,7 @@ class SearchResult:
|
|
|
53
53
|
score: float = 1.0
|
|
54
54
|
node_type: Optional[str] = None
|
|
55
55
|
semantic_context: Optional[str] = None
|
|
56
|
-
|
|
56
|
+
|
|
57
57
|
def to_dict(self) -> Dict[str, Any]:
|
|
58
58
|
return {
|
|
59
59
|
"file": self.file_path,
|
|
@@ -66,10 +66,10 @@ class SearchResult:
|
|
|
66
66
|
"before": self.context_before,
|
|
67
67
|
"after": self.context_after,
|
|
68
68
|
"node_type": self.node_type,
|
|
69
|
-
"semantic": self.semantic_context
|
|
70
|
-
}
|
|
69
|
+
"semantic": self.semantic_context,
|
|
70
|
+
},
|
|
71
71
|
}
|
|
72
|
-
|
|
72
|
+
|
|
73
73
|
def __hash__(self):
|
|
74
74
|
"""Make result hashable for deduplication."""
|
|
75
75
|
return hash((self.file_path, self.line_number, self.column, self.match_text))
|
|
@@ -77,7 +77,7 @@ class SearchResult:
|
|
|
77
77
|
|
|
78
78
|
class UnifiedSearch(BaseTool):
|
|
79
79
|
"""THE primary search tool - your universal interface for finding anything.
|
|
80
|
-
|
|
80
|
+
|
|
81
81
|
This is the main search tool you should use for finding:
|
|
82
82
|
- Code patterns and text matches (using ripgrep)
|
|
83
83
|
- AST nodes and code structure (using treesitter)
|
|
@@ -85,34 +85,34 @@ class UnifiedSearch(BaseTool):
|
|
|
85
85
|
- Files and directories (using find tool)
|
|
86
86
|
- Memory and knowledge base entries
|
|
87
87
|
- Semantic/conceptual matches (using vector search)
|
|
88
|
-
|
|
88
|
+
|
|
89
89
|
The tool automatically determines the best search strategy based on your query
|
|
90
90
|
and runs multiple search types in parallel for comprehensive results.
|
|
91
|
-
|
|
91
|
+
|
|
92
92
|
USAGE EXAMPLES:
|
|
93
|
-
|
|
93
|
+
|
|
94
94
|
1. Find code patterns:
|
|
95
95
|
search("error handling") # Finds all error handling code
|
|
96
96
|
search("TODO|FIXME") # Regex search for TODOs
|
|
97
97
|
search("async function") # Find async functions
|
|
98
|
-
|
|
98
|
+
|
|
99
99
|
2. Find symbols/definitions:
|
|
100
100
|
search("class UserService") # Find class definition
|
|
101
101
|
search("handleRequest") # Find function/method
|
|
102
102
|
search("MAX_RETRIES") # Find constant
|
|
103
|
-
|
|
103
|
+
|
|
104
104
|
3. Find files:
|
|
105
105
|
search("test_*.py", search_files=True) # Find test files
|
|
106
106
|
search("config", search_files=True) # Find config files
|
|
107
|
-
|
|
107
|
+
|
|
108
108
|
4. Semantic search:
|
|
109
109
|
search("how authentication works") # Natural language query
|
|
110
110
|
search("database connection logic") # Conceptual search
|
|
111
|
-
|
|
111
|
+
|
|
112
112
|
5. Memory search:
|
|
113
113
|
search("previous discussion about API design") # Search memories
|
|
114
114
|
search("that bug we fixed last week") # Search knowledge
|
|
115
|
-
|
|
115
|
+
|
|
116
116
|
The tool automatically:
|
|
117
117
|
- Detects query intent and chooses appropriate search methods
|
|
118
118
|
- Runs searches in parallel for speed
|
|
@@ -120,7 +120,7 @@ class UnifiedSearch(BaseTool):
|
|
|
120
120
|
- Provides context around matches
|
|
121
121
|
- Paginates results to stay within token limits
|
|
122
122
|
- Respects .gitignore and other exclusions
|
|
123
|
-
|
|
123
|
+
|
|
124
124
|
PRO TIPS:
|
|
125
125
|
- Use natural language for conceptual searches
|
|
126
126
|
- Use code syntax for exact matches
|
|
@@ -128,94 +128,102 @@ class UnifiedSearch(BaseTool):
|
|
|
128
128
|
- Results are ranked by relevance and type
|
|
129
129
|
- Use page parameter to get more results
|
|
130
130
|
"""
|
|
131
|
-
|
|
131
|
+
|
|
132
132
|
name = "search"
|
|
133
133
|
description = """THE primary unified search tool for rapid parallel search across all modalities.
|
|
134
134
|
|
|
135
135
|
Find anything in your codebase using text, AST, symbols, files, memory, and semantic search.
|
|
136
136
|
Automatically detects query intent and runs appropriate searches in parallel.
|
|
137
137
|
"""
|
|
138
|
-
|
|
138
|
+
|
|
139
139
|
def __init__(self):
|
|
140
140
|
super().__init__()
|
|
141
141
|
self.ripgrep_available = self._check_ripgrep()
|
|
142
142
|
self.vector_db = None
|
|
143
143
|
self.embedder = None
|
|
144
|
-
|
|
144
|
+
|
|
145
145
|
if VECTOR_SEARCH_AVAILABLE:
|
|
146
146
|
self._init_vector_search()
|
|
147
|
-
|
|
147
|
+
|
|
148
148
|
def _check_ripgrep(self) -> bool:
|
|
149
149
|
"""Check if ripgrep is available."""
|
|
150
150
|
try:
|
|
151
|
-
subprocess.run([
|
|
151
|
+
subprocess.run(["rg", "--version"], capture_output=True, check=True)
|
|
152
152
|
return True
|
|
153
|
-
except:
|
|
153
|
+
except Exception:
|
|
154
154
|
return False
|
|
155
|
-
|
|
155
|
+
|
|
156
156
|
def _init_vector_search(self):
|
|
157
157
|
"""Initialize vector search components."""
|
|
158
158
|
try:
|
|
159
|
-
self.embedder = SentenceTransformer(
|
|
159
|
+
self.embedder = SentenceTransformer("all-MiniLM-L6-v2")
|
|
160
160
|
self.vector_db = chromadb.Client()
|
|
161
161
|
# Create or get collection
|
|
162
162
|
self.collection = self.vector_db.get_or_create_collection(
|
|
163
|
-
name="code_search",
|
|
164
|
-
metadata={"description": "Code semantic search"}
|
|
163
|
+
name="code_search", metadata={"description": "Code semantic search"}
|
|
165
164
|
)
|
|
166
165
|
except Exception as e:
|
|
167
166
|
print(f"Failed to initialize vector search: {e}")
|
|
168
167
|
self.vector_db = None
|
|
169
|
-
|
|
168
|
+
|
|
170
169
|
def _should_use_vector_search(self, query: str) -> bool:
|
|
171
170
|
"""Determine if vector search would be helpful."""
|
|
172
171
|
# Use vector search for natural language queries
|
|
173
172
|
indicators = [
|
|
174
173
|
len(query.split()) > 2, # Multi-word queries
|
|
175
|
-
not any(
|
|
176
|
-
|
|
177
|
-
|
|
174
|
+
not any(
|
|
175
|
+
c in query for c in ["(", ")", "{", "}", "[", "]"]
|
|
176
|
+
), # Not code syntax
|
|
177
|
+
" " in query, # Has spaces (natural language)
|
|
178
|
+
not query.startswith("^") and not query.endswith("$"), # Not regex anchors
|
|
178
179
|
]
|
|
179
180
|
return sum(indicators) >= 2
|
|
180
|
-
|
|
181
|
+
|
|
181
182
|
def _should_use_ast_search(self, query: str) -> bool:
|
|
182
183
|
"""Determine if AST search would be helpful."""
|
|
183
184
|
# Use AST search for code patterns
|
|
184
185
|
indicators = [
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
any(
|
|
188
|
-
|
|
186
|
+
"class " in query or "function " in query or "def " in query,
|
|
187
|
+
"import " in query or "from " in query,
|
|
188
|
+
any(
|
|
189
|
+
kw in query.lower()
|
|
190
|
+
for kw in ["method", "function", "class", "interface", "struct"]
|
|
191
|
+
),
|
|
192
|
+
"::" in query or "->" in query or "." in query, # Member access
|
|
189
193
|
]
|
|
190
194
|
return any(indicators)
|
|
191
|
-
|
|
195
|
+
|
|
192
196
|
def _should_use_symbol_search(self, query: str) -> bool:
|
|
193
197
|
"""Determine if symbol search would be helpful."""
|
|
194
198
|
# Use symbol search for identifiers
|
|
195
199
|
return (
|
|
196
|
-
len(query.split()) <= 2
|
|
197
|
-
query.replace(
|
|
198
|
-
|
|
200
|
+
len(query.split()) <= 2 # Short queries
|
|
201
|
+
and query.replace("_", "")
|
|
202
|
+
.replace("-", "")
|
|
203
|
+
.isalnum() # Looks like identifier
|
|
204
|
+
and not " " in query.strip() # Single token
|
|
199
205
|
)
|
|
200
|
-
|
|
201
|
-
async def run(
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
206
|
+
|
|
207
|
+
async def run(
|
|
208
|
+
self,
|
|
209
|
+
pattern: str,
|
|
210
|
+
path: str = ".",
|
|
211
|
+
include: Optional[str] = None,
|
|
212
|
+
exclude: Optional[str] = None,
|
|
213
|
+
max_results_per_type: int = 20,
|
|
214
|
+
context_lines: int = 3,
|
|
215
|
+
search_files: bool = False,
|
|
216
|
+
search_memory: bool = None,
|
|
217
|
+
enable_text: bool = None,
|
|
218
|
+
enable_ast: bool = None,
|
|
219
|
+
enable_vector: bool = None,
|
|
220
|
+
enable_symbol: bool = None,
|
|
221
|
+
page_size: int = 50,
|
|
222
|
+
page: int = 1,
|
|
223
|
+
**kwargs,
|
|
224
|
+
) -> MCPResourceDocument:
|
|
217
225
|
"""Execute unified search across all available search modalities.
|
|
218
|
-
|
|
226
|
+
|
|
219
227
|
Args:
|
|
220
228
|
pattern: Search query (text, regex, natural language, or glob for files)
|
|
221
229
|
path: Directory to search in (default: current directory)
|
|
@@ -229,28 +237,32 @@ class UnifiedSearch(BaseTool):
|
|
|
229
237
|
page_size: Results per page (default: 50)
|
|
230
238
|
page: Page number to retrieve (default: 1)
|
|
231
239
|
"""
|
|
232
|
-
|
|
240
|
+
|
|
233
241
|
# Auto-detect search types based on query
|
|
234
242
|
if search_memory is None:
|
|
235
243
|
# Search memory for natural language queries or specific references
|
|
236
|
-
search_memory = (
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
244
|
+
search_memory = MEMORY_AVAILABLE and (
|
|
245
|
+
self._should_use_vector_search(pattern)
|
|
246
|
+
or any(
|
|
247
|
+
word in pattern.lower()
|
|
248
|
+
for word in ["previous", "discussion", "remember", "last"]
|
|
249
|
+
)
|
|
240
250
|
)
|
|
241
|
-
|
|
251
|
+
|
|
242
252
|
if enable_text is None:
|
|
243
253
|
enable_text = True # Always use text search as baseline
|
|
244
|
-
|
|
254
|
+
|
|
245
255
|
if enable_vector is None:
|
|
246
|
-
enable_vector =
|
|
247
|
-
|
|
256
|
+
enable_vector = (
|
|
257
|
+
self._should_use_vector_search(pattern) and VECTOR_SEARCH_AVAILABLE
|
|
258
|
+
)
|
|
259
|
+
|
|
248
260
|
if enable_ast is None:
|
|
249
261
|
enable_ast = self._should_use_ast_search(pattern) and TREESITTER_AVAILABLE
|
|
250
|
-
|
|
262
|
+
|
|
251
263
|
if enable_symbol is None:
|
|
252
264
|
enable_symbol = self._should_use_symbol_search(pattern)
|
|
253
|
-
|
|
265
|
+
|
|
254
266
|
# Collect results from all enabled search types
|
|
255
267
|
all_results = []
|
|
256
268
|
search_stats = {
|
|
@@ -259,9 +271,9 @@ class UnifiedSearch(BaseTool):
|
|
|
259
271
|
"search_types_used": [],
|
|
260
272
|
"total_matches": 0,
|
|
261
273
|
"unique_matches": 0,
|
|
262
|
-
"time_ms": {}
|
|
274
|
+
"time_ms": {},
|
|
263
275
|
}
|
|
264
|
-
|
|
276
|
+
|
|
265
277
|
# 1. Text search (ripgrep) - always fast, do first
|
|
266
278
|
if enable_text:
|
|
267
279
|
start = time.time()
|
|
@@ -271,7 +283,7 @@ class UnifiedSearch(BaseTool):
|
|
|
271
283
|
search_stats["time_ms"]["text"] = int((time.time() - start) * 1000)
|
|
272
284
|
search_stats["search_types_used"].append("text")
|
|
273
285
|
all_results.extend(text_results)
|
|
274
|
-
|
|
286
|
+
|
|
275
287
|
# 2. AST search - for code structure
|
|
276
288
|
if enable_ast and TREESITTER_AVAILABLE:
|
|
277
289
|
start = time.time()
|
|
@@ -281,7 +293,7 @@ class UnifiedSearch(BaseTool):
|
|
|
281
293
|
search_stats["time_ms"]["ast"] = int((time.time() - start) * 1000)
|
|
282
294
|
search_stats["search_types_used"].append("ast")
|
|
283
295
|
all_results.extend(ast_results)
|
|
284
|
-
|
|
296
|
+
|
|
285
297
|
# 3. Symbol search - for definitions
|
|
286
298
|
if enable_symbol:
|
|
287
299
|
start = time.time()
|
|
@@ -291,7 +303,7 @@ class UnifiedSearch(BaseTool):
|
|
|
291
303
|
search_stats["time_ms"]["symbol"] = int((time.time() - start) * 1000)
|
|
292
304
|
search_stats["search_types_used"].append("symbol")
|
|
293
305
|
all_results.extend(symbol_results)
|
|
294
|
-
|
|
306
|
+
|
|
295
307
|
# 4. Vector search - for semantic similarity
|
|
296
308
|
if enable_vector and self.vector_db:
|
|
297
309
|
start = time.time()
|
|
@@ -301,7 +313,7 @@ class UnifiedSearch(BaseTool):
|
|
|
301
313
|
search_stats["time_ms"]["vector"] = int((time.time() - start) * 1000)
|
|
302
314
|
search_stats["search_types_used"].append("vector")
|
|
303
315
|
all_results.extend(vector_results)
|
|
304
|
-
|
|
316
|
+
|
|
305
317
|
# 5. File search - for finding files by name/pattern
|
|
306
318
|
if search_files:
|
|
307
319
|
start = time.time()
|
|
@@ -311,7 +323,7 @@ class UnifiedSearch(BaseTool):
|
|
|
311
323
|
search_stats["time_ms"]["files"] = int((time.time() - start) * 1000)
|
|
312
324
|
search_stats["search_types_used"].append("files")
|
|
313
325
|
all_results.extend(file_results)
|
|
314
|
-
|
|
326
|
+
|
|
315
327
|
# 6. Memory search - for knowledge base and previous discussions
|
|
316
328
|
if search_memory:
|
|
317
329
|
start = time.time()
|
|
@@ -321,20 +333,20 @@ class UnifiedSearch(BaseTool):
|
|
|
321
333
|
search_stats["time_ms"]["memory"] = int((time.time() - start) * 1000)
|
|
322
334
|
search_stats["search_types_used"].append("memory")
|
|
323
335
|
all_results.extend(memory_results)
|
|
324
|
-
|
|
336
|
+
|
|
325
337
|
# Deduplicate and rank results
|
|
326
338
|
unique_results = self._deduplicate_results(all_results)
|
|
327
339
|
ranked_results = self._rank_results(unique_results, pattern)
|
|
328
|
-
|
|
340
|
+
|
|
329
341
|
search_stats["total_matches"] = len(all_results)
|
|
330
342
|
search_stats["unique_matches"] = len(ranked_results)
|
|
331
|
-
|
|
343
|
+
|
|
332
344
|
# Paginate results
|
|
333
345
|
total_results = len(ranked_results)
|
|
334
346
|
start_idx = (page - 1) * page_size
|
|
335
347
|
end_idx = start_idx + page_size
|
|
336
348
|
page_results = ranked_results[start_idx:end_idx]
|
|
337
|
-
|
|
349
|
+
|
|
338
350
|
# Format results for output
|
|
339
351
|
formatted_results = []
|
|
340
352
|
for result in page_results:
|
|
@@ -342,7 +354,7 @@ class UnifiedSearch(BaseTool):
|
|
|
342
354
|
# Add match preview with context
|
|
343
355
|
formatted["preview"] = self._format_preview(result)
|
|
344
356
|
formatted_results.append(formatted)
|
|
345
|
-
|
|
357
|
+
|
|
346
358
|
# Create paginated response
|
|
347
359
|
response_data = {
|
|
348
360
|
"results": formatted_results,
|
|
@@ -353,21 +365,20 @@ class UnifiedSearch(BaseTool):
|
|
|
353
365
|
"total_results": total_results,
|
|
354
366
|
"total_pages": (total_results + page_size - 1) // page_size,
|
|
355
367
|
"has_next": end_idx < total_results,
|
|
356
|
-
"has_prev": page > 1
|
|
357
|
-
}
|
|
368
|
+
"has_prev": page > 1,
|
|
369
|
+
},
|
|
358
370
|
}
|
|
359
|
-
|
|
371
|
+
|
|
360
372
|
return MCPResourceDocument(data=response_data)
|
|
361
|
-
|
|
373
|
+
|
|
362
374
|
async def call(self, **kwargs) -> str:
|
|
363
375
|
"""Tool interface for MCP - converts result to JSON string."""
|
|
364
376
|
result = await self.run(**kwargs)
|
|
365
377
|
return result.to_json_string()
|
|
366
|
-
|
|
378
|
+
|
|
367
379
|
def register(self, mcp_server) -> None:
|
|
368
380
|
"""Register tool with MCP server."""
|
|
369
|
-
|
|
370
|
-
|
|
381
|
+
|
|
371
382
|
@mcp_server.tool(name=self.name, description=self.description)
|
|
372
383
|
async def search_handler(
|
|
373
384
|
pattern: str,
|
|
@@ -402,77 +413,83 @@ class UnifiedSearch(BaseTool):
|
|
|
402
413
|
search_files=search_files,
|
|
403
414
|
search_memory=search_memory,
|
|
404
415
|
)
|
|
405
|
-
|
|
406
|
-
async def _text_search(
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
416
|
+
|
|
417
|
+
async def _text_search(
|
|
418
|
+
self,
|
|
419
|
+
pattern: str,
|
|
420
|
+
path: str,
|
|
421
|
+
include: Optional[str],
|
|
422
|
+
exclude: Optional[str],
|
|
423
|
+
max_results: int,
|
|
424
|
+
context_lines: int,
|
|
425
|
+
) -> List[SearchResult]:
|
|
413
426
|
"""Perform text search using ripgrep."""
|
|
414
427
|
results = []
|
|
415
|
-
|
|
428
|
+
|
|
416
429
|
if not self.ripgrep_available:
|
|
417
430
|
# Fallback to Python implementation
|
|
418
|
-
return await self._python_text_search(
|
|
419
|
-
|
|
431
|
+
return await self._python_text_search(
|
|
432
|
+
pattern, path, include, exclude, max_results, context_lines
|
|
433
|
+
)
|
|
434
|
+
|
|
420
435
|
# Build ripgrep command
|
|
421
|
-
cmd = [
|
|
422
|
-
|
|
436
|
+
cmd = ["rg", "--json", "--max-count", str(max_results)]
|
|
437
|
+
|
|
423
438
|
if context_lines > 0:
|
|
424
|
-
cmd.extend([
|
|
425
|
-
|
|
439
|
+
cmd.extend(["-C", str(context_lines)])
|
|
440
|
+
|
|
426
441
|
if include:
|
|
427
|
-
cmd.extend([
|
|
428
|
-
|
|
442
|
+
cmd.extend(["--glob", include])
|
|
443
|
+
|
|
429
444
|
if exclude:
|
|
430
|
-
cmd.extend([
|
|
431
|
-
|
|
445
|
+
cmd.extend(["--glob", f"!{exclude}"])
|
|
446
|
+
|
|
432
447
|
cmd.extend([pattern, path])
|
|
433
|
-
|
|
448
|
+
|
|
434
449
|
try:
|
|
435
450
|
proc = subprocess.run(cmd, capture_output=True, text=True)
|
|
436
|
-
|
|
451
|
+
|
|
437
452
|
for line in proc.stdout.splitlines():
|
|
438
453
|
try:
|
|
439
454
|
data = json.loads(line)
|
|
440
|
-
if data.get(
|
|
441
|
-
match_data = data[
|
|
442
|
-
|
|
455
|
+
if data.get("type") == "match":
|
|
456
|
+
match_data = data["data"]
|
|
457
|
+
|
|
443
458
|
result = SearchResult(
|
|
444
|
-
file_path=match_data[
|
|
445
|
-
line_number=match_data[
|
|
446
|
-
column=match_data[
|
|
447
|
-
match_text=match_data[
|
|
459
|
+
file_path=match_data["path"]["text"],
|
|
460
|
+
line_number=match_data["line_number"],
|
|
461
|
+
column=match_data["submatches"][0]["start"],
|
|
462
|
+
match_text=match_data["lines"]["text"].strip(),
|
|
448
463
|
context_before=[],
|
|
449
464
|
context_after=[],
|
|
450
|
-
match_type=
|
|
451
|
-
score=1.0
|
|
465
|
+
match_type="text",
|
|
466
|
+
score=1.0,
|
|
452
467
|
)
|
|
453
|
-
|
|
468
|
+
|
|
454
469
|
# Extract context if available
|
|
455
|
-
if
|
|
470
|
+
if "context" in data:
|
|
456
471
|
# Parse context lines
|
|
457
472
|
pass
|
|
458
|
-
|
|
473
|
+
|
|
459
474
|
results.append(result)
|
|
460
|
-
|
|
475
|
+
|
|
461
476
|
except json.JSONDecodeError:
|
|
462
477
|
continue
|
|
463
|
-
|
|
478
|
+
|
|
464
479
|
except subprocess.CalledProcessError:
|
|
465
480
|
pass
|
|
466
|
-
|
|
481
|
+
|
|
467
482
|
return results
|
|
468
|
-
|
|
469
|
-
async def _ast_search(
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
483
|
+
|
|
484
|
+
async def _ast_search(
|
|
485
|
+
self,
|
|
486
|
+
pattern: str,
|
|
487
|
+
path: str,
|
|
488
|
+
include: Optional[str],
|
|
489
|
+
exclude: Optional[str],
|
|
490
|
+
max_results: int,
|
|
491
|
+
context_lines: int,
|
|
492
|
+
) -> List[SearchResult]:
|
|
476
493
|
"""Perform AST-based search using treesitter."""
|
|
477
494
|
# Try to use grep-ast if available
|
|
478
495
|
try:
|
|
@@ -480,15 +497,14 @@ class UnifiedSearch(BaseTool):
|
|
|
480
497
|
except ImportError:
|
|
481
498
|
# grep-ast not installed, skip AST search
|
|
482
499
|
return []
|
|
483
|
-
|
|
500
|
+
|
|
484
501
|
results = []
|
|
485
|
-
|
|
502
|
+
|
|
486
503
|
try:
|
|
487
|
-
|
|
488
504
|
# Get files to search
|
|
489
505
|
search_path = Path(path or ".")
|
|
490
506
|
files_to_search = []
|
|
491
|
-
|
|
507
|
+
|
|
492
508
|
if search_path.is_file():
|
|
493
509
|
files_to_search = [search_path]
|
|
494
510
|
else:
|
|
@@ -500,16 +516,16 @@ class UnifiedSearch(BaseTool):
|
|
|
500
516
|
files_to_search.extend(search_path.rglob(ext))
|
|
501
517
|
if len(files_to_search) >= max_results:
|
|
502
518
|
break
|
|
503
|
-
|
|
519
|
+
|
|
504
520
|
# Search each file
|
|
505
521
|
for file_path in files_to_search[:max_results]:
|
|
506
522
|
if not file_path.is_file():
|
|
507
523
|
continue
|
|
508
|
-
|
|
524
|
+
|
|
509
525
|
try:
|
|
510
526
|
with open(file_path, "r", encoding="utf-8") as f:
|
|
511
527
|
code = f.read()
|
|
512
|
-
|
|
528
|
+
|
|
513
529
|
# Process with grep-ast
|
|
514
530
|
tc = TreeContext(
|
|
515
531
|
str(file_path),
|
|
@@ -518,47 +534,57 @@ class UnifiedSearch(BaseTool):
|
|
|
518
534
|
verbose=False,
|
|
519
535
|
line_number=True,
|
|
520
536
|
)
|
|
521
|
-
|
|
537
|
+
|
|
522
538
|
# Find matches
|
|
523
539
|
matches = tc.grep(pattern, ignore_case=False)
|
|
524
|
-
|
|
540
|
+
|
|
525
541
|
for match in matches:
|
|
526
542
|
# Extract context
|
|
527
|
-
lines = code.split(
|
|
543
|
+
lines = code.split("\n")
|
|
528
544
|
line_num = match # This might need adjustment based on actual return type
|
|
529
|
-
|
|
545
|
+
|
|
530
546
|
result = SearchResult(
|
|
531
547
|
file_path=str(file_path),
|
|
532
548
|
line_number=line_num,
|
|
533
549
|
column=0,
|
|
534
|
-
match_text=
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
550
|
+
match_text=(
|
|
551
|
+
lines[line_num - 1]
|
|
552
|
+
if 0 < line_num <= len(lines)
|
|
553
|
+
else ""
|
|
554
|
+
),
|
|
555
|
+
context_before=lines[
|
|
556
|
+
max(0, line_num - context_lines - 1) : line_num - 1
|
|
557
|
+
],
|
|
558
|
+
context_after=lines[
|
|
559
|
+
line_num : min(len(lines), line_num + context_lines)
|
|
560
|
+
],
|
|
561
|
+
match_type="ast",
|
|
538
562
|
score=0.9,
|
|
539
|
-
node_type=
|
|
540
|
-
semantic_context=None
|
|
563
|
+
node_type="ast_match",
|
|
564
|
+
semantic_context=None,
|
|
541
565
|
)
|
|
542
566
|
results.append(result)
|
|
543
|
-
|
|
567
|
+
|
|
544
568
|
except Exception:
|
|
545
569
|
# Skip files that can't be parsed
|
|
546
570
|
continue
|
|
547
|
-
|
|
571
|
+
|
|
548
572
|
except Exception as e:
|
|
549
573
|
print(f"AST search error: {e}")
|
|
550
|
-
|
|
574
|
+
|
|
551
575
|
return results
|
|
552
|
-
|
|
553
|
-
async def _symbol_search(
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
576
|
+
|
|
577
|
+
async def _symbol_search(
|
|
578
|
+
self,
|
|
579
|
+
pattern: str,
|
|
580
|
+
path: str,
|
|
581
|
+
include: Optional[str],
|
|
582
|
+
exclude: Optional[str],
|
|
583
|
+
max_results: int,
|
|
584
|
+
) -> List[SearchResult]:
|
|
559
585
|
"""Search for symbol definitions."""
|
|
560
586
|
results = []
|
|
561
|
-
|
|
587
|
+
|
|
562
588
|
# Use ctags or similar for symbol search
|
|
563
589
|
# For now, use specialized ripgrep patterns
|
|
564
590
|
symbol_patterns = [
|
|
@@ -568,80 +594,92 @@ class UnifiedSearch(BaseTool):
|
|
|
568
594
|
f"^\\s*type\\s+{pattern}", # Type definitions
|
|
569
595
|
f"interface\\s+{pattern}", # Interface definitions
|
|
570
596
|
]
|
|
571
|
-
|
|
597
|
+
|
|
572
598
|
for symbol_pattern in symbol_patterns:
|
|
573
599
|
symbol_results = await self._text_search(
|
|
574
|
-
symbol_pattern,
|
|
575
|
-
|
|
600
|
+
symbol_pattern,
|
|
601
|
+
path,
|
|
602
|
+
include,
|
|
603
|
+
exclude,
|
|
604
|
+
max_results // len(symbol_patterns),
|
|
605
|
+
0,
|
|
576
606
|
)
|
|
577
|
-
|
|
607
|
+
|
|
578
608
|
for res in symbol_results:
|
|
579
|
-
res.match_type =
|
|
609
|
+
res.match_type = "symbol"
|
|
580
610
|
res.score = 1.1 # Boost symbol definitions
|
|
581
611
|
results.append(res)
|
|
582
|
-
|
|
612
|
+
|
|
583
613
|
return results
|
|
584
|
-
|
|
585
|
-
async def _vector_search(
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
614
|
+
|
|
615
|
+
async def _vector_search(
|
|
616
|
+
self,
|
|
617
|
+
query: str,
|
|
618
|
+
path: str,
|
|
619
|
+
include: Optional[str],
|
|
620
|
+
exclude: Optional[str],
|
|
621
|
+
max_results: int,
|
|
622
|
+
context_lines: int,
|
|
623
|
+
) -> List[SearchResult]:
|
|
592
624
|
"""Perform semantic vector search."""
|
|
593
625
|
if not self.vector_db or not self.embedder:
|
|
594
626
|
return []
|
|
595
|
-
|
|
627
|
+
|
|
596
628
|
results = []
|
|
597
|
-
|
|
629
|
+
|
|
598
630
|
try:
|
|
599
631
|
# Embed the query
|
|
600
632
|
query_embedding = self.embedder.encode(query).tolist()
|
|
601
|
-
|
|
633
|
+
|
|
602
634
|
# Search in vector database
|
|
603
635
|
search_results = self.collection.query(
|
|
604
636
|
query_embeddings=[query_embedding],
|
|
605
637
|
n_results=max_results,
|
|
606
|
-
where={"path": {"$contains": path}} if path != "." else None
|
|
638
|
+
where={"path": {"$contains": path}} if path != "." else None,
|
|
607
639
|
)
|
|
608
|
-
|
|
609
|
-
if search_results[
|
|
610
|
-
for i,
|
|
611
|
-
metadata = search_results[
|
|
612
|
-
|
|
640
|
+
|
|
641
|
+
if search_results["ids"][0]:
|
|
642
|
+
for i, _doc_id in enumerate(search_results["ids"][0]):
|
|
643
|
+
metadata = search_results["metadatas"][0][i]
|
|
644
|
+
|
|
613
645
|
result = SearchResult(
|
|
614
|
-
file_path=metadata[
|
|
615
|
-
line_number=metadata[
|
|
646
|
+
file_path=metadata["file_path"],
|
|
647
|
+
line_number=metadata["line_number"],
|
|
616
648
|
column=0,
|
|
617
|
-
match_text=search_results[
|
|
649
|
+
match_text=search_results["documents"][0][i],
|
|
618
650
|
context_before=[],
|
|
619
651
|
context_after=[],
|
|
620
|
-
match_type=
|
|
621
|
-
score=1.0
|
|
622
|
-
|
|
652
|
+
match_type="vector",
|
|
653
|
+
score=1.0
|
|
654
|
+
- search_results["distances"][0][
|
|
655
|
+
i
|
|
656
|
+
], # Convert distance to similarity
|
|
657
|
+
semantic_context=metadata.get("context", ""),
|
|
623
658
|
)
|
|
624
659
|
results.append(result)
|
|
625
|
-
|
|
660
|
+
|
|
626
661
|
except Exception as e:
|
|
627
662
|
print(f"Vector search error: {e}")
|
|
628
|
-
|
|
663
|
+
|
|
629
664
|
return results
|
|
630
|
-
|
|
631
|
-
async def _file_search(
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
665
|
+
|
|
666
|
+
async def _file_search(
|
|
667
|
+
self,
|
|
668
|
+
pattern: str,
|
|
669
|
+
path: str,
|
|
670
|
+
include: Optional[str],
|
|
671
|
+
exclude: Optional[str],
|
|
672
|
+
max_results: int,
|
|
673
|
+
) -> List[SearchResult]:
|
|
637
674
|
"""Search for files by name/pattern using find tool."""
|
|
638
675
|
results = []
|
|
639
|
-
|
|
676
|
+
|
|
640
677
|
try:
|
|
641
678
|
# Import and use find tool
|
|
642
679
|
from hanzo_mcp.tools.search.find_tool import FindTool
|
|
680
|
+
|
|
643
681
|
find_tool = FindTool()
|
|
644
|
-
|
|
682
|
+
|
|
645
683
|
# Call find tool with pattern
|
|
646
684
|
find_result = await find_tool.run(
|
|
647
685
|
pattern=pattern,
|
|
@@ -650,9 +688,9 @@ class UnifiedSearch(BaseTool):
|
|
|
650
688
|
max_results=max_results,
|
|
651
689
|
regex=False, # Use glob patterns by default
|
|
652
690
|
fuzzy=False,
|
|
653
|
-
case_sensitive=False
|
|
691
|
+
case_sensitive=False,
|
|
654
692
|
)
|
|
655
|
-
|
|
693
|
+
|
|
656
694
|
# Convert find results to SearchResult format
|
|
657
695
|
if find_result.data and "results" in find_result.data:
|
|
658
696
|
for file_match in find_result.data["results"]:
|
|
@@ -663,73 +701,74 @@ class UnifiedSearch(BaseTool):
|
|
|
663
701
|
match_text=file_match["name"],
|
|
664
702
|
context_before=[],
|
|
665
703
|
context_after=[],
|
|
666
|
-
match_type=
|
|
704
|
+
match_type="file",
|
|
667
705
|
score=1.0,
|
|
668
|
-
semantic_context=f"File: {file_match['extension']} ({file_match['size']} bytes)"
|
|
706
|
+
semantic_context=f"File: {file_match['extension']} ({file_match['size']} bytes)",
|
|
669
707
|
)
|
|
670
708
|
results.append(result)
|
|
671
|
-
|
|
709
|
+
|
|
672
710
|
except Exception as e:
|
|
673
711
|
print(f"File search error: {e}")
|
|
674
|
-
|
|
712
|
+
|
|
675
713
|
return results
|
|
676
|
-
|
|
677
|
-
async def _memory_search(
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
context_lines: int) -> List[SearchResult]:
|
|
714
|
+
|
|
715
|
+
async def _memory_search(
|
|
716
|
+
self, query: str, max_results: int, context_lines: int
|
|
717
|
+
) -> List[SearchResult]:
|
|
681
718
|
"""Search in memory/knowledge base."""
|
|
682
719
|
results = []
|
|
683
|
-
|
|
720
|
+
|
|
684
721
|
if not MEMORY_AVAILABLE:
|
|
685
722
|
return results
|
|
686
|
-
|
|
723
|
+
|
|
687
724
|
try:
|
|
688
725
|
# Create memory retrieval tool
|
|
689
726
|
retrieval_tool = KnowledgeRetrieval()
|
|
690
|
-
|
|
727
|
+
|
|
691
728
|
# Search memories
|
|
692
729
|
memory_result = await retrieval_tool.run(
|
|
693
730
|
query=query,
|
|
694
731
|
top_k=max_results,
|
|
695
|
-
threshold=0.5 # Minimum relevance threshold
|
|
732
|
+
threshold=0.5, # Minimum relevance threshold
|
|
696
733
|
)
|
|
697
|
-
|
|
734
|
+
|
|
698
735
|
# Convert memory results to SearchResult format
|
|
699
736
|
if memory_result.data and "results" in memory_result.data:
|
|
700
737
|
for mem in memory_result.data["results"]:
|
|
701
738
|
# Extract content and metadata
|
|
702
739
|
content = mem.get("content", "")
|
|
703
740
|
metadata = mem.get("metadata", {})
|
|
704
|
-
|
|
741
|
+
|
|
705
742
|
# Create a virtual file path for memories
|
|
706
743
|
memory_type = metadata.get("type", "memory")
|
|
707
744
|
memory_id = metadata.get("id", "unknown")
|
|
708
745
|
virtual_path = f"memory://{memory_type}/{memory_id}"
|
|
709
|
-
|
|
746
|
+
|
|
710
747
|
result = SearchResult(
|
|
711
748
|
file_path=virtual_path,
|
|
712
749
|
line_number=1,
|
|
713
750
|
column=0,
|
|
714
|
-
match_text=
|
|
751
|
+
match_text=(
|
|
752
|
+
content[:200] + "..." if len(content) > 200 else content
|
|
753
|
+
),
|
|
715
754
|
context_before=[],
|
|
716
755
|
context_after=[],
|
|
717
|
-
match_type=
|
|
756
|
+
match_type="memory",
|
|
718
757
|
score=mem.get("score", 0.8),
|
|
719
|
-
semantic_context=f"Memory type: {memory_type}, Created: {metadata.get('created_at', 'unknown')}"
|
|
758
|
+
semantic_context=f"Memory type: {memory_type}, Created: {metadata.get('created_at', 'unknown')}",
|
|
720
759
|
)
|
|
721
760
|
results.append(result)
|
|
722
|
-
|
|
761
|
+
|
|
723
762
|
except Exception as e:
|
|
724
763
|
print(f"Memory search error: {e}")
|
|
725
|
-
|
|
764
|
+
|
|
726
765
|
return results
|
|
727
|
-
|
|
766
|
+
|
|
728
767
|
def _deduplicate_results(self, results: List[SearchResult]) -> List[SearchResult]:
|
|
729
768
|
"""Remove duplicate results across search types."""
|
|
730
769
|
seen = set()
|
|
731
770
|
unique = []
|
|
732
|
-
|
|
771
|
+
|
|
733
772
|
for result in results:
|
|
734
773
|
key = (result.file_path, result.line_number, result.match_text.strip())
|
|
735
774
|
if key not in seen:
|
|
@@ -738,7 +777,11 @@ class UnifiedSearch(BaseTool):
|
|
|
738
777
|
else:
|
|
739
778
|
# Merge information from duplicate
|
|
740
779
|
for existing in unique:
|
|
741
|
-
if (
|
|
780
|
+
if (
|
|
781
|
+
existing.file_path,
|
|
782
|
+
existing.line_number,
|
|
783
|
+
existing.match_text.strip(),
|
|
784
|
+
) == key:
|
|
742
785
|
# Update with better context or node type
|
|
743
786
|
if result.node_type and not existing.node_type:
|
|
744
787
|
existing.node_type = result.node_type
|
|
@@ -747,42 +790,49 @@ class UnifiedSearch(BaseTool):
|
|
|
747
790
|
# Take best score
|
|
748
791
|
existing.score = max(existing.score, result.score)
|
|
749
792
|
break
|
|
750
|
-
|
|
793
|
+
|
|
751
794
|
return unique
|
|
752
|
-
|
|
753
|
-
def _rank_results(
|
|
795
|
+
|
|
796
|
+
def _rank_results(
|
|
797
|
+
self, results: List[SearchResult], query: str
|
|
798
|
+
) -> List[SearchResult]:
|
|
754
799
|
"""Rank results by relevance."""
|
|
755
800
|
# Simple ranking based on:
|
|
756
801
|
# 1. Match type score
|
|
757
802
|
# 2. Exact match bonus
|
|
758
803
|
# 3. File path relevance
|
|
759
|
-
|
|
804
|
+
|
|
760
805
|
for result in results:
|
|
761
806
|
# Exact match bonus
|
|
762
807
|
if query.lower() in result.match_text.lower():
|
|
763
808
|
result.score *= 1.2
|
|
764
|
-
|
|
809
|
+
|
|
765
810
|
# Path relevance (prefer non-test, non-vendor files)
|
|
766
|
-
if any(
|
|
811
|
+
if any(
|
|
812
|
+
skip in result.file_path for skip in ["test", "vendor", "node_modules"]
|
|
813
|
+
):
|
|
767
814
|
result.score *= 0.8
|
|
768
|
-
|
|
815
|
+
|
|
769
816
|
# Prefer definition files
|
|
770
|
-
if any(
|
|
817
|
+
if any(
|
|
818
|
+
pattern in result.file_path
|
|
819
|
+
for pattern in ["index.", "main.", "api.", "types."]
|
|
820
|
+
):
|
|
771
821
|
result.score *= 1.1
|
|
772
|
-
|
|
822
|
+
|
|
773
823
|
# Sort by score descending, then by file path
|
|
774
824
|
results.sort(key=lambda r: (-r.score, r.file_path, r.line_number))
|
|
775
|
-
|
|
825
|
+
|
|
776
826
|
return results
|
|
777
|
-
|
|
827
|
+
|
|
778
828
|
def _format_preview(self, result: SearchResult) -> str:
|
|
779
829
|
"""Format result preview with context."""
|
|
780
830
|
lines = []
|
|
781
|
-
|
|
831
|
+
|
|
782
832
|
# Add context before
|
|
783
833
|
for line in result.context_before[-2:]:
|
|
784
834
|
lines.append(f" {line}")
|
|
785
|
-
|
|
835
|
+
|
|
786
836
|
# Add match line with highlighting
|
|
787
837
|
match_line = result.match_text
|
|
788
838
|
if result.column > 0:
|
|
@@ -791,48 +841,49 @@ class UnifiedSearch(BaseTool):
|
|
|
791
841
|
lines.append(f" {' ' * result.column}^")
|
|
792
842
|
else:
|
|
793
843
|
lines.append(f"> {match_line}")
|
|
794
|
-
|
|
844
|
+
|
|
795
845
|
# Add context after
|
|
796
846
|
for line in result.context_after[:2]:
|
|
797
847
|
lines.append(f" {line}")
|
|
798
|
-
|
|
799
|
-
return
|
|
800
|
-
|
|
801
|
-
async def _python_text_search(
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
848
|
+
|
|
849
|
+
return "\n".join(lines)
|
|
850
|
+
|
|
851
|
+
async def _python_text_search(
|
|
852
|
+
self,
|
|
853
|
+
pattern: str,
|
|
854
|
+
path: str,
|
|
855
|
+
include: Optional[str],
|
|
856
|
+
exclude: Optional[str],
|
|
857
|
+
max_results: int,
|
|
858
|
+
context_lines: int,
|
|
859
|
+
) -> List[SearchResult]:
|
|
808
860
|
"""Fallback Python text search when ripgrep not available."""
|
|
809
861
|
results = []
|
|
810
862
|
count = 0
|
|
811
|
-
|
|
863
|
+
|
|
812
864
|
import re
|
|
813
|
-
|
|
814
|
-
|
|
865
|
+
|
|
815
866
|
# Compile pattern
|
|
816
867
|
try:
|
|
817
868
|
regex = re.compile(pattern)
|
|
818
869
|
except re.error:
|
|
819
870
|
# Treat as literal string
|
|
820
871
|
regex = re.compile(re.escape(pattern))
|
|
821
|
-
|
|
872
|
+
|
|
822
873
|
# Find files
|
|
823
|
-
for file_path in Path(path).rglob(include or
|
|
874
|
+
for file_path in Path(path).rglob(include or "*"):
|
|
824
875
|
if count >= max_results:
|
|
825
876
|
break
|
|
826
|
-
|
|
877
|
+
|
|
827
878
|
if file_path.is_file():
|
|
828
879
|
try:
|
|
829
|
-
with open(file_path,
|
|
880
|
+
with open(file_path, "r", encoding="utf-8") as f:
|
|
830
881
|
lines = f.readlines()
|
|
831
|
-
|
|
882
|
+
|
|
832
883
|
for i, line in enumerate(lines):
|
|
833
884
|
if count >= max_results:
|
|
834
885
|
break
|
|
835
|
-
|
|
886
|
+
|
|
836
887
|
match = regex.search(line)
|
|
837
888
|
if match:
|
|
838
889
|
result = SearchResult(
|
|
@@ -840,114 +891,115 @@ class UnifiedSearch(BaseTool):
|
|
|
840
891
|
line_number=i + 1,
|
|
841
892
|
column=match.start(),
|
|
842
893
|
match_text=line.strip(),
|
|
843
|
-
context_before=lines[max(0, i-context_lines):i],
|
|
844
|
-
context_after=lines[i+1:i+1+context_lines],
|
|
845
|
-
match_type=
|
|
846
|
-
score=1.0
|
|
894
|
+
context_before=lines[max(0, i - context_lines) : i],
|
|
895
|
+
context_after=lines[i + 1 : i + 1 + context_lines],
|
|
896
|
+
match_type="text",
|
|
897
|
+
score=1.0,
|
|
847
898
|
)
|
|
848
899
|
results.append(result)
|
|
849
900
|
count += 1
|
|
850
|
-
|
|
901
|
+
|
|
851
902
|
except Exception:
|
|
852
903
|
continue
|
|
853
|
-
|
|
904
|
+
|
|
854
905
|
return results
|
|
855
906
|
|
|
856
907
|
|
|
857
908
|
# Index builder for vector search
|
|
858
909
|
class CodeIndexer:
|
|
859
910
|
"""Build and maintain vector search index."""
|
|
860
|
-
|
|
911
|
+
|
|
861
912
|
def __init__(self, vector_db, embedder):
|
|
862
913
|
self.vector_db = vector_db
|
|
863
914
|
self.embedder = embedder
|
|
864
915
|
self.collection = vector_db.get_or_create_collection("code_search")
|
|
865
|
-
|
|
916
|
+
|
|
866
917
|
async def index_directory(self, path: str, file_patterns: List[str] = None):
|
|
867
918
|
"""Index a directory for vector search."""
|
|
868
919
|
if file_patterns is None:
|
|
869
|
-
file_patterns = [
|
|
870
|
-
|
|
920
|
+
file_patterns = ["*.py", "*.js", "*.ts", "*.go", "*.java", "*.cpp", "*.c"]
|
|
921
|
+
|
|
871
922
|
documents = []
|
|
872
923
|
metadatas = []
|
|
873
924
|
ids = []
|
|
874
|
-
|
|
925
|
+
|
|
875
926
|
for pattern in file_patterns:
|
|
876
927
|
for file_path in Path(path).rglob(pattern):
|
|
877
928
|
if file_path.is_file():
|
|
878
929
|
try:
|
|
879
|
-
with open(file_path,
|
|
930
|
+
with open(file_path, "r", encoding="utf-8") as f:
|
|
880
931
|
content = f.read()
|
|
881
|
-
|
|
932
|
+
|
|
882
933
|
# Split into chunks (functions, classes, etc.)
|
|
883
934
|
chunks = self._split_code_intelligently(content, file_path)
|
|
884
|
-
|
|
935
|
+
|
|
885
936
|
for chunk in chunks:
|
|
886
937
|
doc_id = hashlib.md5(
|
|
887
938
|
f"{file_path}:{chunk['line']}:{chunk['text'][:50]}".encode()
|
|
888
939
|
).hexdigest()
|
|
889
|
-
|
|
890
|
-
documents.append(chunk[
|
|
891
|
-
metadatas.append(
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
|
|
940
|
+
|
|
941
|
+
documents.append(chunk["text"])
|
|
942
|
+
metadatas.append(
|
|
943
|
+
{
|
|
944
|
+
"file_path": str(file_path),
|
|
945
|
+
"line_number": chunk["line"],
|
|
946
|
+
"context": chunk.get("context", ""),
|
|
947
|
+
"type": chunk.get("type", "code"),
|
|
948
|
+
}
|
|
949
|
+
)
|
|
897
950
|
ids.append(doc_id)
|
|
898
|
-
|
|
951
|
+
|
|
899
952
|
except Exception as e:
|
|
900
953
|
print(f"Error indexing {file_path}: {e}")
|
|
901
|
-
|
|
954
|
+
|
|
902
955
|
# Batch embed and store
|
|
903
956
|
if documents:
|
|
904
957
|
embeddings = self.embedder.encode(documents).tolist()
|
|
905
958
|
self.collection.add(
|
|
906
|
-
embeddings=embeddings,
|
|
907
|
-
documents=documents,
|
|
908
|
-
metadatas=metadatas,
|
|
909
|
-
ids=ids
|
|
959
|
+
embeddings=embeddings, documents=documents, metadatas=metadatas, ids=ids
|
|
910
960
|
)
|
|
911
|
-
|
|
912
|
-
def _split_code_intelligently(
|
|
961
|
+
|
|
962
|
+
def _split_code_intelligently(
|
|
963
|
+
self, content: str, file_path: Path
|
|
964
|
+
) -> List[Dict[str, Any]]:
|
|
913
965
|
"""Split code into meaningful chunks."""
|
|
914
966
|
# Simple line-based splitting for now
|
|
915
967
|
# TODO: Use AST for better splitting
|
|
916
968
|
chunks = []
|
|
917
|
-
lines = content.split(
|
|
918
|
-
|
|
969
|
+
lines = content.split("\n")
|
|
970
|
+
|
|
919
971
|
# Group into function-sized chunks
|
|
920
972
|
current_chunk = []
|
|
921
973
|
current_line = 1
|
|
922
|
-
|
|
974
|
+
|
|
923
975
|
for i, line in enumerate(lines):
|
|
924
976
|
current_chunk.append(line)
|
|
925
|
-
|
|
977
|
+
|
|
926
978
|
# Split on function/class definitions or every 50 lines
|
|
927
|
-
if
|
|
928
|
-
|
|
929
|
-
|
|
979
|
+
if len(current_chunk) >= 50 or any(
|
|
980
|
+
kw in line for kw in ["def ", "function ", "class ", "interface "]
|
|
981
|
+
):
|
|
930
982
|
if current_chunk:
|
|
931
|
-
chunks.append(
|
|
932
|
-
|
|
933
|
-
|
|
934
|
-
|
|
935
|
-
|
|
983
|
+
chunks.append(
|
|
984
|
+
{
|
|
985
|
+
"text": "\n".join(current_chunk),
|
|
986
|
+
"line": current_line,
|
|
987
|
+
"type": "code",
|
|
988
|
+
}
|
|
989
|
+
)
|
|
936
990
|
current_chunk = []
|
|
937
991
|
current_line = i + 2
|
|
938
|
-
|
|
992
|
+
|
|
939
993
|
# Add remaining
|
|
940
994
|
if current_chunk:
|
|
941
|
-
chunks.append(
|
|
942
|
-
|
|
943
|
-
|
|
944
|
-
|
|
945
|
-
})
|
|
946
|
-
|
|
995
|
+
chunks.append(
|
|
996
|
+
{"text": "\n".join(current_chunk), "line": current_line, "type": "code"}
|
|
997
|
+
)
|
|
998
|
+
|
|
947
999
|
return chunks
|
|
948
1000
|
|
|
949
1001
|
|
|
950
1002
|
# Tool registration
|
|
951
1003
|
def create_unified_search_tool():
|
|
952
1004
|
"""Factory function to create unified search tool."""
|
|
953
|
-
return UnifiedSearch()
|
|
1005
|
+
return UnifiedSearch()
|