hanzo-mcp 0.7.6__py3-none-any.whl → 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hanzo-mcp might be problematic. Click here for more details.

Files changed (178) hide show
  1. hanzo_mcp/__init__.py +7 -1
  2. hanzo_mcp/__main__.py +1 -1
  3. hanzo_mcp/analytics/__init__.py +2 -2
  4. hanzo_mcp/analytics/posthog_analytics.py +76 -82
  5. hanzo_mcp/cli.py +31 -36
  6. hanzo_mcp/cli_enhanced.py +94 -72
  7. hanzo_mcp/cli_plugin.py +27 -17
  8. hanzo_mcp/config/__init__.py +2 -2
  9. hanzo_mcp/config/settings.py +112 -88
  10. hanzo_mcp/config/tool_config.py +32 -34
  11. hanzo_mcp/dev_server.py +66 -67
  12. hanzo_mcp/prompts/__init__.py +94 -12
  13. hanzo_mcp/prompts/enhanced_prompts.py +809 -0
  14. hanzo_mcp/prompts/example_custom_prompt.py +6 -5
  15. hanzo_mcp/prompts/project_todo_reminder.py +0 -1
  16. hanzo_mcp/prompts/tool_explorer.py +10 -7
  17. hanzo_mcp/server.py +17 -21
  18. hanzo_mcp/server_enhanced.py +15 -22
  19. hanzo_mcp/tools/__init__.py +56 -28
  20. hanzo_mcp/tools/agent/__init__.py +16 -19
  21. hanzo_mcp/tools/agent/agent.py +82 -65
  22. hanzo_mcp/tools/agent/agent_tool.py +152 -122
  23. hanzo_mcp/tools/agent/agent_tool_v1_deprecated.py +66 -62
  24. hanzo_mcp/tools/agent/clarification_protocol.py +55 -50
  25. hanzo_mcp/tools/agent/clarification_tool.py +11 -10
  26. hanzo_mcp/tools/agent/claude_cli_tool.py +21 -20
  27. hanzo_mcp/tools/agent/claude_desktop_auth.py +130 -144
  28. hanzo_mcp/tools/agent/cli_agent_base.py +59 -53
  29. hanzo_mcp/tools/agent/code_auth.py +102 -107
  30. hanzo_mcp/tools/agent/code_auth_tool.py +28 -27
  31. hanzo_mcp/tools/agent/codex_cli_tool.py +20 -19
  32. hanzo_mcp/tools/agent/critic_tool.py +86 -73
  33. hanzo_mcp/tools/agent/gemini_cli_tool.py +21 -20
  34. hanzo_mcp/tools/agent/grok_cli_tool.py +21 -20
  35. hanzo_mcp/tools/agent/iching_tool.py +404 -139
  36. hanzo_mcp/tools/agent/network_tool.py +89 -73
  37. hanzo_mcp/tools/agent/prompt.py +2 -1
  38. hanzo_mcp/tools/agent/review_tool.py +101 -98
  39. hanzo_mcp/tools/agent/swarm_alias.py +87 -0
  40. hanzo_mcp/tools/agent/swarm_tool.py +246 -161
  41. hanzo_mcp/tools/agent/swarm_tool_v1_deprecated.py +134 -92
  42. hanzo_mcp/tools/agent/tool_adapter.py +21 -11
  43. hanzo_mcp/tools/common/__init__.py +1 -1
  44. hanzo_mcp/tools/common/base.py +3 -5
  45. hanzo_mcp/tools/common/batch_tool.py +46 -39
  46. hanzo_mcp/tools/common/config_tool.py +120 -84
  47. hanzo_mcp/tools/common/context.py +1 -5
  48. hanzo_mcp/tools/common/context_fix.py +5 -3
  49. hanzo_mcp/tools/common/critic_tool.py +4 -8
  50. hanzo_mcp/tools/common/decorators.py +58 -56
  51. hanzo_mcp/tools/common/enhanced_base.py +29 -32
  52. hanzo_mcp/tools/common/fastmcp_pagination.py +91 -94
  53. hanzo_mcp/tools/common/forgiving_edit.py +91 -87
  54. hanzo_mcp/tools/common/mode.py +15 -17
  55. hanzo_mcp/tools/common/mode_loader.py +27 -24
  56. hanzo_mcp/tools/common/paginated_base.py +61 -53
  57. hanzo_mcp/tools/common/paginated_response.py +72 -79
  58. hanzo_mcp/tools/common/pagination.py +50 -53
  59. hanzo_mcp/tools/common/permissions.py +4 -4
  60. hanzo_mcp/tools/common/personality.py +186 -138
  61. hanzo_mcp/tools/common/plugin_loader.py +54 -54
  62. hanzo_mcp/tools/common/stats.py +65 -47
  63. hanzo_mcp/tools/common/test_helpers.py +31 -0
  64. hanzo_mcp/tools/common/thinking_tool.py +4 -8
  65. hanzo_mcp/tools/common/tool_disable.py +17 -12
  66. hanzo_mcp/tools/common/tool_enable.py +13 -14
  67. hanzo_mcp/tools/common/tool_list.py +36 -28
  68. hanzo_mcp/tools/common/truncate.py +23 -23
  69. hanzo_mcp/tools/config/__init__.py +4 -4
  70. hanzo_mcp/tools/config/config_tool.py +42 -29
  71. hanzo_mcp/tools/config/index_config.py +37 -34
  72. hanzo_mcp/tools/config/mode_tool.py +175 -55
  73. hanzo_mcp/tools/database/__init__.py +15 -12
  74. hanzo_mcp/tools/database/database_manager.py +77 -75
  75. hanzo_mcp/tools/database/graph.py +137 -91
  76. hanzo_mcp/tools/database/graph_add.py +30 -18
  77. hanzo_mcp/tools/database/graph_query.py +178 -102
  78. hanzo_mcp/tools/database/graph_remove.py +33 -28
  79. hanzo_mcp/tools/database/graph_search.py +97 -75
  80. hanzo_mcp/tools/database/graph_stats.py +91 -59
  81. hanzo_mcp/tools/database/sql.py +107 -79
  82. hanzo_mcp/tools/database/sql_query.py +30 -24
  83. hanzo_mcp/tools/database/sql_search.py +29 -25
  84. hanzo_mcp/tools/database/sql_stats.py +47 -35
  85. hanzo_mcp/tools/editor/neovim_command.py +25 -28
  86. hanzo_mcp/tools/editor/neovim_edit.py +21 -23
  87. hanzo_mcp/tools/editor/neovim_session.py +60 -54
  88. hanzo_mcp/tools/filesystem/__init__.py +31 -30
  89. hanzo_mcp/tools/filesystem/ast_multi_edit.py +329 -249
  90. hanzo_mcp/tools/filesystem/ast_tool.py +4 -4
  91. hanzo_mcp/tools/filesystem/base.py +1 -1
  92. hanzo_mcp/tools/filesystem/batch_search.py +316 -224
  93. hanzo_mcp/tools/filesystem/content_replace.py +4 -4
  94. hanzo_mcp/tools/filesystem/diff.py +71 -59
  95. hanzo_mcp/tools/filesystem/directory_tree.py +7 -7
  96. hanzo_mcp/tools/filesystem/directory_tree_paginated.py +49 -37
  97. hanzo_mcp/tools/filesystem/edit.py +4 -4
  98. hanzo_mcp/tools/filesystem/find.py +173 -80
  99. hanzo_mcp/tools/filesystem/find_files.py +73 -52
  100. hanzo_mcp/tools/filesystem/git_search.py +157 -104
  101. hanzo_mcp/tools/filesystem/grep.py +8 -8
  102. hanzo_mcp/tools/filesystem/multi_edit.py +4 -8
  103. hanzo_mcp/tools/filesystem/read.py +12 -10
  104. hanzo_mcp/tools/filesystem/rules_tool.py +59 -43
  105. hanzo_mcp/tools/filesystem/search_tool.py +263 -207
  106. hanzo_mcp/tools/filesystem/symbols_tool.py +94 -54
  107. hanzo_mcp/tools/filesystem/tree.py +35 -33
  108. hanzo_mcp/tools/filesystem/unix_aliases.py +13 -18
  109. hanzo_mcp/tools/filesystem/watch.py +37 -36
  110. hanzo_mcp/tools/filesystem/write.py +4 -8
  111. hanzo_mcp/tools/jupyter/__init__.py +4 -4
  112. hanzo_mcp/tools/jupyter/base.py +4 -5
  113. hanzo_mcp/tools/jupyter/jupyter.py +67 -47
  114. hanzo_mcp/tools/jupyter/notebook_edit.py +4 -4
  115. hanzo_mcp/tools/jupyter/notebook_read.py +4 -7
  116. hanzo_mcp/tools/llm/__init__.py +5 -7
  117. hanzo_mcp/tools/llm/consensus_tool.py +72 -52
  118. hanzo_mcp/tools/llm/llm_manage.py +101 -60
  119. hanzo_mcp/tools/llm/llm_tool.py +226 -166
  120. hanzo_mcp/tools/llm/provider_tools.py +25 -26
  121. hanzo_mcp/tools/lsp/__init__.py +1 -1
  122. hanzo_mcp/tools/lsp/lsp_tool.py +228 -143
  123. hanzo_mcp/tools/mcp/__init__.py +2 -3
  124. hanzo_mcp/tools/mcp/mcp_add.py +27 -25
  125. hanzo_mcp/tools/mcp/mcp_remove.py +7 -8
  126. hanzo_mcp/tools/mcp/mcp_stats.py +23 -22
  127. hanzo_mcp/tools/mcp/mcp_tool.py +129 -98
  128. hanzo_mcp/tools/memory/__init__.py +39 -21
  129. hanzo_mcp/tools/memory/knowledge_tools.py +124 -99
  130. hanzo_mcp/tools/memory/memory_tools.py +90 -108
  131. hanzo_mcp/tools/search/__init__.py +7 -2
  132. hanzo_mcp/tools/search/find_tool.py +297 -212
  133. hanzo_mcp/tools/search/unified_search.py +366 -314
  134. hanzo_mcp/tools/shell/__init__.py +8 -7
  135. hanzo_mcp/tools/shell/auto_background.py +56 -49
  136. hanzo_mcp/tools/shell/base.py +1 -1
  137. hanzo_mcp/tools/shell/base_process.py +75 -75
  138. hanzo_mcp/tools/shell/bash_session.py +2 -2
  139. hanzo_mcp/tools/shell/bash_session_executor.py +4 -4
  140. hanzo_mcp/tools/shell/bash_tool.py +24 -31
  141. hanzo_mcp/tools/shell/command_executor.py +12 -12
  142. hanzo_mcp/tools/shell/logs.py +43 -33
  143. hanzo_mcp/tools/shell/npx.py +13 -13
  144. hanzo_mcp/tools/shell/npx_background.py +24 -21
  145. hanzo_mcp/tools/shell/npx_tool.py +18 -22
  146. hanzo_mcp/tools/shell/open.py +19 -21
  147. hanzo_mcp/tools/shell/pkill.py +31 -26
  148. hanzo_mcp/tools/shell/process_tool.py +32 -32
  149. hanzo_mcp/tools/shell/processes.py +57 -58
  150. hanzo_mcp/tools/shell/run_background.py +24 -25
  151. hanzo_mcp/tools/shell/run_command.py +5 -5
  152. hanzo_mcp/tools/shell/run_command_windows.py +5 -5
  153. hanzo_mcp/tools/shell/session_storage.py +3 -3
  154. hanzo_mcp/tools/shell/streaming_command.py +141 -126
  155. hanzo_mcp/tools/shell/uvx.py +24 -25
  156. hanzo_mcp/tools/shell/uvx_background.py +35 -33
  157. hanzo_mcp/tools/shell/uvx_tool.py +18 -22
  158. hanzo_mcp/tools/todo/__init__.py +6 -2
  159. hanzo_mcp/tools/todo/todo.py +50 -37
  160. hanzo_mcp/tools/todo/todo_read.py +5 -8
  161. hanzo_mcp/tools/todo/todo_write.py +5 -7
  162. hanzo_mcp/tools/vector/__init__.py +40 -28
  163. hanzo_mcp/tools/vector/ast_analyzer.py +176 -143
  164. hanzo_mcp/tools/vector/git_ingester.py +170 -179
  165. hanzo_mcp/tools/vector/index_tool.py +96 -44
  166. hanzo_mcp/tools/vector/infinity_store.py +283 -228
  167. hanzo_mcp/tools/vector/mock_infinity.py +39 -40
  168. hanzo_mcp/tools/vector/project_manager.py +88 -78
  169. hanzo_mcp/tools/vector/vector.py +59 -42
  170. hanzo_mcp/tools/vector/vector_index.py +30 -27
  171. hanzo_mcp/tools/vector/vector_search.py +64 -45
  172. hanzo_mcp/types.py +6 -4
  173. {hanzo_mcp-0.7.6.dist-info → hanzo_mcp-0.8.0.dist-info}/METADATA +1 -1
  174. hanzo_mcp-0.8.0.dist-info/RECORD +185 -0
  175. hanzo_mcp-0.7.6.dist-info/RECORD +0 -182
  176. {hanzo_mcp-0.7.6.dist-info → hanzo_mcp-0.8.0.dist-info}/WHEEL +0 -0
  177. {hanzo_mcp-0.7.6.dist-info → hanzo_mcp-0.8.0.dist-info}/entry_points.txt +0 -0
  178. {hanzo_mcp-0.7.6.dist-info → hanzo_mcp-0.8.0.dist-info}/top_level.txt +0 -0
@@ -4,30 +4,28 @@ This is your main search interface that intelligently combines all available
4
4
  search capabilities including text, AST, symbols, memory, and semantic search.
5
5
  """
6
6
 
7
- import os
8
- import time
9
7
  import json
8
+ import time
9
+ import hashlib
10
10
  import subprocess
11
- from typing import List, Dict, Any, Optional, Set, Tuple
11
+ from typing import Any, Dict, List, Optional
12
12
  from pathlib import Path
13
13
  from dataclasses import dataclass
14
- from collections import defaultdict
15
- import hashlib
16
14
 
17
- from hanzo_mcp.tools.common.base import BaseTool
18
- from hanzo_mcp.tools.common.paginated_response import AutoPaginatedResponse
19
- from hanzo_mcp.tools.common.decorators import with_context_normalization
20
15
  from hanzo_mcp.types import MCPResourceDocument
16
+ from hanzo_mcp.tools.common.base import BaseTool
21
17
 
22
18
  # Import memory tools if available
23
19
  try:
24
20
  from hanzo_mcp.tools.memory.memory_tools import KnowledgeRetrieval
21
+
25
22
  MEMORY_AVAILABLE = True
26
23
  except ImportError:
27
24
  MEMORY_AVAILABLE = False
28
25
 
29
26
  try:
30
27
  import tree_sitter
28
+
31
29
  TREESITTER_AVAILABLE = True
32
30
  except ImportError:
33
31
  TREESITTER_AVAILABLE = False
@@ -35,6 +33,7 @@ except ImportError:
35
33
  try:
36
34
  import chromadb
37
35
  from sentence_transformers import SentenceTransformer
36
+
38
37
  VECTOR_SEARCH_AVAILABLE = True
39
38
  except ImportError:
40
39
  VECTOR_SEARCH_AVAILABLE = False
@@ -43,6 +42,7 @@ except ImportError:
43
42
  @dataclass
44
43
  class SearchResult:
45
44
  """Unified search result."""
45
+
46
46
  file_path: str
47
47
  line_number: int
48
48
  column: int
@@ -53,7 +53,7 @@ class SearchResult:
53
53
  score: float = 1.0
54
54
  node_type: Optional[str] = None
55
55
  semantic_context: Optional[str] = None
56
-
56
+
57
57
  def to_dict(self) -> Dict[str, Any]:
58
58
  return {
59
59
  "file": self.file_path,
@@ -66,10 +66,10 @@ class SearchResult:
66
66
  "before": self.context_before,
67
67
  "after": self.context_after,
68
68
  "node_type": self.node_type,
69
- "semantic": self.semantic_context
70
- }
69
+ "semantic": self.semantic_context,
70
+ },
71
71
  }
72
-
72
+
73
73
  def __hash__(self):
74
74
  """Make result hashable for deduplication."""
75
75
  return hash((self.file_path, self.line_number, self.column, self.match_text))
@@ -77,7 +77,7 @@ class SearchResult:
77
77
 
78
78
  class UnifiedSearch(BaseTool):
79
79
  """THE primary search tool - your universal interface for finding anything.
80
-
80
+
81
81
  This is the main search tool you should use for finding:
82
82
  - Code patterns and text matches (using ripgrep)
83
83
  - AST nodes and code structure (using treesitter)
@@ -85,34 +85,34 @@ class UnifiedSearch(BaseTool):
85
85
  - Files and directories (using find tool)
86
86
  - Memory and knowledge base entries
87
87
  - Semantic/conceptual matches (using vector search)
88
-
88
+
89
89
  The tool automatically determines the best search strategy based on your query
90
90
  and runs multiple search types in parallel for comprehensive results.
91
-
91
+
92
92
  USAGE EXAMPLES:
93
-
93
+
94
94
  1. Find code patterns:
95
95
  search("error handling") # Finds all error handling code
96
96
  search("TODO|FIXME") # Regex search for TODOs
97
97
  search("async function") # Find async functions
98
-
98
+
99
99
  2. Find symbols/definitions:
100
100
  search("class UserService") # Find class definition
101
101
  search("handleRequest") # Find function/method
102
102
  search("MAX_RETRIES") # Find constant
103
-
103
+
104
104
  3. Find files:
105
105
  search("test_*.py", search_files=True) # Find test files
106
106
  search("config", search_files=True) # Find config files
107
-
107
+
108
108
  4. Semantic search:
109
109
  search("how authentication works") # Natural language query
110
110
  search("database connection logic") # Conceptual search
111
-
111
+
112
112
  5. Memory search:
113
113
  search("previous discussion about API design") # Search memories
114
114
  search("that bug we fixed last week") # Search knowledge
115
-
115
+
116
116
  The tool automatically:
117
117
  - Detects query intent and chooses appropriate search methods
118
118
  - Runs searches in parallel for speed
@@ -120,7 +120,7 @@ class UnifiedSearch(BaseTool):
120
120
  - Provides context around matches
121
121
  - Paginates results to stay within token limits
122
122
  - Respects .gitignore and other exclusions
123
-
123
+
124
124
  PRO TIPS:
125
125
  - Use natural language for conceptual searches
126
126
  - Use code syntax for exact matches
@@ -128,94 +128,102 @@ class UnifiedSearch(BaseTool):
128
128
  - Results are ranked by relevance and type
129
129
  - Use page parameter to get more results
130
130
  """
131
-
131
+
132
132
  name = "search"
133
133
  description = """THE primary unified search tool for rapid parallel search across all modalities.
134
134
 
135
135
  Find anything in your codebase using text, AST, symbols, files, memory, and semantic search.
136
136
  Automatically detects query intent and runs appropriate searches in parallel.
137
137
  """
138
-
138
+
139
139
  def __init__(self):
140
140
  super().__init__()
141
141
  self.ripgrep_available = self._check_ripgrep()
142
142
  self.vector_db = None
143
143
  self.embedder = None
144
-
144
+
145
145
  if VECTOR_SEARCH_AVAILABLE:
146
146
  self._init_vector_search()
147
-
147
+
148
148
  def _check_ripgrep(self) -> bool:
149
149
  """Check if ripgrep is available."""
150
150
  try:
151
- subprocess.run(['rg', '--version'], capture_output=True, check=True)
151
+ subprocess.run(["rg", "--version"], capture_output=True, check=True)
152
152
  return True
153
- except:
153
+ except Exception:
154
154
  return False
155
-
155
+
156
156
  def _init_vector_search(self):
157
157
  """Initialize vector search components."""
158
158
  try:
159
- self.embedder = SentenceTransformer('all-MiniLM-L6-v2')
159
+ self.embedder = SentenceTransformer("all-MiniLM-L6-v2")
160
160
  self.vector_db = chromadb.Client()
161
161
  # Create or get collection
162
162
  self.collection = self.vector_db.get_or_create_collection(
163
- name="code_search",
164
- metadata={"description": "Code semantic search"}
163
+ name="code_search", metadata={"description": "Code semantic search"}
165
164
  )
166
165
  except Exception as e:
167
166
  print(f"Failed to initialize vector search: {e}")
168
167
  self.vector_db = None
169
-
168
+
170
169
  def _should_use_vector_search(self, query: str) -> bool:
171
170
  """Determine if vector search would be helpful."""
172
171
  # Use vector search for natural language queries
173
172
  indicators = [
174
173
  len(query.split()) > 2, # Multi-word queries
175
- not any(c in query for c in ['(', ')', '{', '}', '[', ']']), # Not code syntax
176
- ' ' in query, # Has spaces (natural language)
177
- not query.startswith('^') and not query.endswith('$'), # Not regex anchors
174
+ not any(
175
+ c in query for c in ["(", ")", "{", "}", "[", "]"]
176
+ ), # Not code syntax
177
+ " " in query, # Has spaces (natural language)
178
+ not query.startswith("^") and not query.endswith("$"), # Not regex anchors
178
179
  ]
179
180
  return sum(indicators) >= 2
180
-
181
+
181
182
  def _should_use_ast_search(self, query: str) -> bool:
182
183
  """Determine if AST search would be helpful."""
183
184
  # Use AST search for code patterns
184
185
  indicators = [
185
- 'class ' in query or 'function ' in query or 'def ' in query,
186
- 'import ' in query or 'from ' in query,
187
- any(kw in query.lower() for kw in ['method', 'function', 'class', 'interface', 'struct']),
188
- '::' in query or '->' in query or '.' in query, # Member access
186
+ "class " in query or "function " in query or "def " in query,
187
+ "import " in query or "from " in query,
188
+ any(
189
+ kw in query.lower()
190
+ for kw in ["method", "function", "class", "interface", "struct"]
191
+ ),
192
+ "::" in query or "->" in query or "." in query, # Member access
189
193
  ]
190
194
  return any(indicators)
191
-
195
+
192
196
  def _should_use_symbol_search(self, query: str) -> bool:
193
197
  """Determine if symbol search would be helpful."""
194
198
  # Use symbol search for identifiers
195
199
  return (
196
- len(query.split()) <= 2 and # Short queries
197
- query.replace('_', '').replace('-', '').isalnum() and # Looks like identifier
198
- not ' ' in query.strip() # Single token
200
+ len(query.split()) <= 2 # Short queries
201
+ and query.replace("_", "")
202
+ .replace("-", "")
203
+ .isalnum() # Looks like identifier
204
+ and not " " in query.strip() # Single token
199
205
  )
200
-
201
- async def run(self,
202
- pattern: str,
203
- path: str = ".",
204
- include: Optional[str] = None,
205
- exclude: Optional[str] = None,
206
- max_results_per_type: int = 20,
207
- context_lines: int = 3,
208
- search_files: bool = False,
209
- search_memory: bool = None,
210
- enable_text: bool = None,
211
- enable_ast: bool = None,
212
- enable_vector: bool = None,
213
- enable_symbol: bool = None,
214
- page_size: int = 50,
215
- page: int = 1,
216
- **kwargs) -> MCPResourceDocument:
206
+
207
+ async def run(
208
+ self,
209
+ pattern: str,
210
+ path: str = ".",
211
+ include: Optional[str] = None,
212
+ exclude: Optional[str] = None,
213
+ max_results_per_type: int = 20,
214
+ context_lines: int = 3,
215
+ search_files: bool = False,
216
+ search_memory: bool = None,
217
+ enable_text: bool = None,
218
+ enable_ast: bool = None,
219
+ enable_vector: bool = None,
220
+ enable_symbol: bool = None,
221
+ page_size: int = 50,
222
+ page: int = 1,
223
+ **kwargs,
224
+ ) -> MCPResourceDocument:
217
225
  """Execute unified search across all available search modalities.
218
-
226
+
219
227
  Args:
220
228
  pattern: Search query (text, regex, natural language, or glob for files)
221
229
  path: Directory to search in (default: current directory)
@@ -229,28 +237,32 @@ class UnifiedSearch(BaseTool):
229
237
  page_size: Results per page (default: 50)
230
238
  page: Page number to retrieve (default: 1)
231
239
  """
232
-
240
+
233
241
  # Auto-detect search types based on query
234
242
  if search_memory is None:
235
243
  # Search memory for natural language queries or specific references
236
- search_memory = (
237
- MEMORY_AVAILABLE and
238
- (self._should_use_vector_search(pattern) or
239
- any(word in pattern.lower() for word in ['previous', 'discussion', 'remember', 'last']))
244
+ search_memory = MEMORY_AVAILABLE and (
245
+ self._should_use_vector_search(pattern)
246
+ or any(
247
+ word in pattern.lower()
248
+ for word in ["previous", "discussion", "remember", "last"]
249
+ )
240
250
  )
241
-
251
+
242
252
  if enable_text is None:
243
253
  enable_text = True # Always use text search as baseline
244
-
254
+
245
255
  if enable_vector is None:
246
- enable_vector = self._should_use_vector_search(pattern) and VECTOR_SEARCH_AVAILABLE
247
-
256
+ enable_vector = (
257
+ self._should_use_vector_search(pattern) and VECTOR_SEARCH_AVAILABLE
258
+ )
259
+
248
260
  if enable_ast is None:
249
261
  enable_ast = self._should_use_ast_search(pattern) and TREESITTER_AVAILABLE
250
-
262
+
251
263
  if enable_symbol is None:
252
264
  enable_symbol = self._should_use_symbol_search(pattern)
253
-
265
+
254
266
  # Collect results from all enabled search types
255
267
  all_results = []
256
268
  search_stats = {
@@ -259,9 +271,9 @@ class UnifiedSearch(BaseTool):
259
271
  "search_types_used": [],
260
272
  "total_matches": 0,
261
273
  "unique_matches": 0,
262
- "time_ms": {}
274
+ "time_ms": {},
263
275
  }
264
-
276
+
265
277
  # 1. Text search (ripgrep) - always fast, do first
266
278
  if enable_text:
267
279
  start = time.time()
@@ -271,7 +283,7 @@ class UnifiedSearch(BaseTool):
271
283
  search_stats["time_ms"]["text"] = int((time.time() - start) * 1000)
272
284
  search_stats["search_types_used"].append("text")
273
285
  all_results.extend(text_results)
274
-
286
+
275
287
  # 2. AST search - for code structure
276
288
  if enable_ast and TREESITTER_AVAILABLE:
277
289
  start = time.time()
@@ -281,7 +293,7 @@ class UnifiedSearch(BaseTool):
281
293
  search_stats["time_ms"]["ast"] = int((time.time() - start) * 1000)
282
294
  search_stats["search_types_used"].append("ast")
283
295
  all_results.extend(ast_results)
284
-
296
+
285
297
  # 3. Symbol search - for definitions
286
298
  if enable_symbol:
287
299
  start = time.time()
@@ -291,7 +303,7 @@ class UnifiedSearch(BaseTool):
291
303
  search_stats["time_ms"]["symbol"] = int((time.time() - start) * 1000)
292
304
  search_stats["search_types_used"].append("symbol")
293
305
  all_results.extend(symbol_results)
294
-
306
+
295
307
  # 4. Vector search - for semantic similarity
296
308
  if enable_vector and self.vector_db:
297
309
  start = time.time()
@@ -301,7 +313,7 @@ class UnifiedSearch(BaseTool):
301
313
  search_stats["time_ms"]["vector"] = int((time.time() - start) * 1000)
302
314
  search_stats["search_types_used"].append("vector")
303
315
  all_results.extend(vector_results)
304
-
316
+
305
317
  # 5. File search - for finding files by name/pattern
306
318
  if search_files:
307
319
  start = time.time()
@@ -311,7 +323,7 @@ class UnifiedSearch(BaseTool):
311
323
  search_stats["time_ms"]["files"] = int((time.time() - start) * 1000)
312
324
  search_stats["search_types_used"].append("files")
313
325
  all_results.extend(file_results)
314
-
326
+
315
327
  # 6. Memory search - for knowledge base and previous discussions
316
328
  if search_memory:
317
329
  start = time.time()
@@ -321,20 +333,20 @@ class UnifiedSearch(BaseTool):
321
333
  search_stats["time_ms"]["memory"] = int((time.time() - start) * 1000)
322
334
  search_stats["search_types_used"].append("memory")
323
335
  all_results.extend(memory_results)
324
-
336
+
325
337
  # Deduplicate and rank results
326
338
  unique_results = self._deduplicate_results(all_results)
327
339
  ranked_results = self._rank_results(unique_results, pattern)
328
-
340
+
329
341
  search_stats["total_matches"] = len(all_results)
330
342
  search_stats["unique_matches"] = len(ranked_results)
331
-
343
+
332
344
  # Paginate results
333
345
  total_results = len(ranked_results)
334
346
  start_idx = (page - 1) * page_size
335
347
  end_idx = start_idx + page_size
336
348
  page_results = ranked_results[start_idx:end_idx]
337
-
349
+
338
350
  # Format results for output
339
351
  formatted_results = []
340
352
  for result in page_results:
@@ -342,7 +354,7 @@ class UnifiedSearch(BaseTool):
342
354
  # Add match preview with context
343
355
  formatted["preview"] = self._format_preview(result)
344
356
  formatted_results.append(formatted)
345
-
357
+
346
358
  # Create paginated response
347
359
  response_data = {
348
360
  "results": formatted_results,
@@ -353,21 +365,20 @@ class UnifiedSearch(BaseTool):
353
365
  "total_results": total_results,
354
366
  "total_pages": (total_results + page_size - 1) // page_size,
355
367
  "has_next": end_idx < total_results,
356
- "has_prev": page > 1
357
- }
368
+ "has_prev": page > 1,
369
+ },
358
370
  }
359
-
371
+
360
372
  return MCPResourceDocument(data=response_data)
361
-
373
+
362
374
  async def call(self, **kwargs) -> str:
363
375
  """Tool interface for MCP - converts result to JSON string."""
364
376
  result = await self.run(**kwargs)
365
377
  return result.to_json_string()
366
-
378
+
367
379
  def register(self, mcp_server) -> None:
368
380
  """Register tool with MCP server."""
369
- from mcp.server import FastMCP
370
-
381
+
371
382
  @mcp_server.tool(name=self.name, description=self.description)
372
383
  async def search_handler(
373
384
  pattern: str,
@@ -402,77 +413,83 @@ class UnifiedSearch(BaseTool):
402
413
  search_files=search_files,
403
414
  search_memory=search_memory,
404
415
  )
405
-
406
- async def _text_search(self,
407
- pattern: str,
408
- path: str,
409
- include: Optional[str],
410
- exclude: Optional[str],
411
- max_results: int,
412
- context_lines: int) -> List[SearchResult]:
416
+
417
+ async def _text_search(
418
+ self,
419
+ pattern: str,
420
+ path: str,
421
+ include: Optional[str],
422
+ exclude: Optional[str],
423
+ max_results: int,
424
+ context_lines: int,
425
+ ) -> List[SearchResult]:
413
426
  """Perform text search using ripgrep."""
414
427
  results = []
415
-
428
+
416
429
  if not self.ripgrep_available:
417
430
  # Fallback to Python implementation
418
- return await self._python_text_search(pattern, path, include, exclude, max_results, context_lines)
419
-
431
+ return await self._python_text_search(
432
+ pattern, path, include, exclude, max_results, context_lines
433
+ )
434
+
420
435
  # Build ripgrep command
421
- cmd = ['rg', '--json', '--max-count', str(max_results)]
422
-
436
+ cmd = ["rg", "--json", "--max-count", str(max_results)]
437
+
423
438
  if context_lines > 0:
424
- cmd.extend(['-C', str(context_lines)])
425
-
439
+ cmd.extend(["-C", str(context_lines)])
440
+
426
441
  if include:
427
- cmd.extend(['--glob', include])
428
-
442
+ cmd.extend(["--glob", include])
443
+
429
444
  if exclude:
430
- cmd.extend(['--glob', f'!{exclude}'])
431
-
445
+ cmd.extend(["--glob", f"!{exclude}"])
446
+
432
447
  cmd.extend([pattern, path])
433
-
448
+
434
449
  try:
435
450
  proc = subprocess.run(cmd, capture_output=True, text=True)
436
-
451
+
437
452
  for line in proc.stdout.splitlines():
438
453
  try:
439
454
  data = json.loads(line)
440
- if data.get('type') == 'match':
441
- match_data = data['data']
442
-
455
+ if data.get("type") == "match":
456
+ match_data = data["data"]
457
+
443
458
  result = SearchResult(
444
- file_path=match_data['path']['text'],
445
- line_number=match_data['line_number'],
446
- column=match_data['submatches'][0]['start'],
447
- match_text=match_data['lines']['text'].strip(),
459
+ file_path=match_data["path"]["text"],
460
+ line_number=match_data["line_number"],
461
+ column=match_data["submatches"][0]["start"],
462
+ match_text=match_data["lines"]["text"].strip(),
448
463
  context_before=[],
449
464
  context_after=[],
450
- match_type='text',
451
- score=1.0
465
+ match_type="text",
466
+ score=1.0,
452
467
  )
453
-
468
+
454
469
  # Extract context if available
455
- if 'context' in data:
470
+ if "context" in data:
456
471
  # Parse context lines
457
472
  pass
458
-
473
+
459
474
  results.append(result)
460
-
475
+
461
476
  except json.JSONDecodeError:
462
477
  continue
463
-
478
+
464
479
  except subprocess.CalledProcessError:
465
480
  pass
466
-
481
+
467
482
  return results
468
-
469
- async def _ast_search(self,
470
- pattern: str,
471
- path: str,
472
- include: Optional[str],
473
- exclude: Optional[str],
474
- max_results: int,
475
- context_lines: int) -> List[SearchResult]:
483
+
484
+ async def _ast_search(
485
+ self,
486
+ pattern: str,
487
+ path: str,
488
+ include: Optional[str],
489
+ exclude: Optional[str],
490
+ max_results: int,
491
+ context_lines: int,
492
+ ) -> List[SearchResult]:
476
493
  """Perform AST-based search using treesitter."""
477
494
  # Try to use grep-ast if available
478
495
  try:
@@ -480,15 +497,14 @@ class UnifiedSearch(BaseTool):
480
497
  except ImportError:
481
498
  # grep-ast not installed, skip AST search
482
499
  return []
483
-
500
+
484
501
  results = []
485
-
502
+
486
503
  try:
487
-
488
504
  # Get files to search
489
505
  search_path = Path(path or ".")
490
506
  files_to_search = []
491
-
507
+
492
508
  if search_path.is_file():
493
509
  files_to_search = [search_path]
494
510
  else:
@@ -500,16 +516,16 @@ class UnifiedSearch(BaseTool):
500
516
  files_to_search.extend(search_path.rglob(ext))
501
517
  if len(files_to_search) >= max_results:
502
518
  break
503
-
519
+
504
520
  # Search each file
505
521
  for file_path in files_to_search[:max_results]:
506
522
  if not file_path.is_file():
507
523
  continue
508
-
524
+
509
525
  try:
510
526
  with open(file_path, "r", encoding="utf-8") as f:
511
527
  code = f.read()
512
-
528
+
513
529
  # Process with grep-ast
514
530
  tc = TreeContext(
515
531
  str(file_path),
@@ -518,47 +534,57 @@ class UnifiedSearch(BaseTool):
518
534
  verbose=False,
519
535
  line_number=True,
520
536
  )
521
-
537
+
522
538
  # Find matches
523
539
  matches = tc.grep(pattern, ignore_case=False)
524
-
540
+
525
541
  for match in matches:
526
542
  # Extract context
527
- lines = code.split('\n')
543
+ lines = code.split("\n")
528
544
  line_num = match # This might need adjustment based on actual return type
529
-
545
+
530
546
  result = SearchResult(
531
547
  file_path=str(file_path),
532
548
  line_number=line_num,
533
549
  column=0,
534
- match_text=lines[line_num - 1] if 0 < line_num <= len(lines) else "",
535
- context_before=lines[max(0, line_num - context_lines - 1):line_num - 1],
536
- context_after=lines[line_num:min(len(lines), line_num + context_lines)],
537
- match_type='ast',
550
+ match_text=(
551
+ lines[line_num - 1]
552
+ if 0 < line_num <= len(lines)
553
+ else ""
554
+ ),
555
+ context_before=lines[
556
+ max(0, line_num - context_lines - 1) : line_num - 1
557
+ ],
558
+ context_after=lines[
559
+ line_num : min(len(lines), line_num + context_lines)
560
+ ],
561
+ match_type="ast",
538
562
  score=0.9,
539
- node_type='ast_match',
540
- semantic_context=None
563
+ node_type="ast_match",
564
+ semantic_context=None,
541
565
  )
542
566
  results.append(result)
543
-
567
+
544
568
  except Exception:
545
569
  # Skip files that can't be parsed
546
570
  continue
547
-
571
+
548
572
  except Exception as e:
549
573
  print(f"AST search error: {e}")
550
-
574
+
551
575
  return results
552
-
553
- async def _symbol_search(self,
554
- pattern: str,
555
- path: str,
556
- include: Optional[str],
557
- exclude: Optional[str],
558
- max_results: int) -> List[SearchResult]:
576
+
577
+ async def _symbol_search(
578
+ self,
579
+ pattern: str,
580
+ path: str,
581
+ include: Optional[str],
582
+ exclude: Optional[str],
583
+ max_results: int,
584
+ ) -> List[SearchResult]:
559
585
  """Search for symbol definitions."""
560
586
  results = []
561
-
587
+
562
588
  # Use ctags or similar for symbol search
563
589
  # For now, use specialized ripgrep patterns
564
590
  symbol_patterns = [
@@ -568,80 +594,92 @@ class UnifiedSearch(BaseTool):
568
594
  f"^\\s*type\\s+{pattern}", # Type definitions
569
595
  f"interface\\s+{pattern}", # Interface definitions
570
596
  ]
571
-
597
+
572
598
  for symbol_pattern in symbol_patterns:
573
599
  symbol_results = await self._text_search(
574
- symbol_pattern, path, include, exclude,
575
- max_results // len(symbol_patterns), 0
600
+ symbol_pattern,
601
+ path,
602
+ include,
603
+ exclude,
604
+ max_results // len(symbol_patterns),
605
+ 0,
576
606
  )
577
-
607
+
578
608
  for res in symbol_results:
579
- res.match_type = 'symbol'
609
+ res.match_type = "symbol"
580
610
  res.score = 1.1 # Boost symbol definitions
581
611
  results.append(res)
582
-
612
+
583
613
  return results
584
-
585
- async def _vector_search(self,
586
- query: str,
587
- path: str,
588
- include: Optional[str],
589
- exclude: Optional[str],
590
- max_results: int,
591
- context_lines: int) -> List[SearchResult]:
614
+
615
+ async def _vector_search(
616
+ self,
617
+ query: str,
618
+ path: str,
619
+ include: Optional[str],
620
+ exclude: Optional[str],
621
+ max_results: int,
622
+ context_lines: int,
623
+ ) -> List[SearchResult]:
592
624
  """Perform semantic vector search."""
593
625
  if not self.vector_db or not self.embedder:
594
626
  return []
595
-
627
+
596
628
  results = []
597
-
629
+
598
630
  try:
599
631
  # Embed the query
600
632
  query_embedding = self.embedder.encode(query).tolist()
601
-
633
+
602
634
  # Search in vector database
603
635
  search_results = self.collection.query(
604
636
  query_embeddings=[query_embedding],
605
637
  n_results=max_results,
606
- where={"path": {"$contains": path}} if path != "." else None
638
+ where={"path": {"$contains": path}} if path != "." else None,
607
639
  )
608
-
609
- if search_results['ids'][0]:
610
- for i, doc_id in enumerate(search_results['ids'][0]):
611
- metadata = search_results['metadatas'][0][i]
612
-
640
+
641
+ if search_results["ids"][0]:
642
+ for i, _doc_id in enumerate(search_results["ids"][0]):
643
+ metadata = search_results["metadatas"][0][i]
644
+
613
645
  result = SearchResult(
614
- file_path=metadata['file_path'],
615
- line_number=metadata['line_number'],
646
+ file_path=metadata["file_path"],
647
+ line_number=metadata["line_number"],
616
648
  column=0,
617
- match_text=search_results['documents'][0][i],
649
+ match_text=search_results["documents"][0][i],
618
650
  context_before=[],
619
651
  context_after=[],
620
- match_type='vector',
621
- score=1.0 - search_results['distances'][0][i], # Convert distance to similarity
622
- semantic_context=metadata.get('context', '')
652
+ match_type="vector",
653
+ score=1.0
654
+ - search_results["distances"][0][
655
+ i
656
+ ], # Convert distance to similarity
657
+ semantic_context=metadata.get("context", ""),
623
658
  )
624
659
  results.append(result)
625
-
660
+
626
661
  except Exception as e:
627
662
  print(f"Vector search error: {e}")
628
-
663
+
629
664
  return results
630
-
631
- async def _file_search(self,
632
- pattern: str,
633
- path: str,
634
- include: Optional[str],
635
- exclude: Optional[str],
636
- max_results: int) -> List[SearchResult]:
665
+
666
+ async def _file_search(
667
+ self,
668
+ pattern: str,
669
+ path: str,
670
+ include: Optional[str],
671
+ exclude: Optional[str],
672
+ max_results: int,
673
+ ) -> List[SearchResult]:
637
674
  """Search for files by name/pattern using find tool."""
638
675
  results = []
639
-
676
+
640
677
  try:
641
678
  # Import and use find tool
642
679
  from hanzo_mcp.tools.search.find_tool import FindTool
680
+
643
681
  find_tool = FindTool()
644
-
682
+
645
683
  # Call find tool with pattern
646
684
  find_result = await find_tool.run(
647
685
  pattern=pattern,
@@ -650,9 +688,9 @@ class UnifiedSearch(BaseTool):
650
688
  max_results=max_results,
651
689
  regex=False, # Use glob patterns by default
652
690
  fuzzy=False,
653
- case_sensitive=False
691
+ case_sensitive=False,
654
692
  )
655
-
693
+
656
694
  # Convert find results to SearchResult format
657
695
  if find_result.data and "results" in find_result.data:
658
696
  for file_match in find_result.data["results"]:
@@ -663,73 +701,74 @@ class UnifiedSearch(BaseTool):
663
701
  match_text=file_match["name"],
664
702
  context_before=[],
665
703
  context_after=[],
666
- match_type='file',
704
+ match_type="file",
667
705
  score=1.0,
668
- semantic_context=f"File: {file_match['extension']} ({file_match['size']} bytes)"
706
+ semantic_context=f"File: {file_match['extension']} ({file_match['size']} bytes)",
669
707
  )
670
708
  results.append(result)
671
-
709
+
672
710
  except Exception as e:
673
711
  print(f"File search error: {e}")
674
-
712
+
675
713
  return results
676
-
677
- async def _memory_search(self,
678
- query: str,
679
- max_results: int,
680
- context_lines: int) -> List[SearchResult]:
714
+
715
+ async def _memory_search(
716
+ self, query: str, max_results: int, context_lines: int
717
+ ) -> List[SearchResult]:
681
718
  """Search in memory/knowledge base."""
682
719
  results = []
683
-
720
+
684
721
  if not MEMORY_AVAILABLE:
685
722
  return results
686
-
723
+
687
724
  try:
688
725
  # Create memory retrieval tool
689
726
  retrieval_tool = KnowledgeRetrieval()
690
-
727
+
691
728
  # Search memories
692
729
  memory_result = await retrieval_tool.run(
693
730
  query=query,
694
731
  top_k=max_results,
695
- threshold=0.5 # Minimum relevance threshold
732
+ threshold=0.5, # Minimum relevance threshold
696
733
  )
697
-
734
+
698
735
  # Convert memory results to SearchResult format
699
736
  if memory_result.data and "results" in memory_result.data:
700
737
  for mem in memory_result.data["results"]:
701
738
  # Extract content and metadata
702
739
  content = mem.get("content", "")
703
740
  metadata = mem.get("metadata", {})
704
-
741
+
705
742
  # Create a virtual file path for memories
706
743
  memory_type = metadata.get("type", "memory")
707
744
  memory_id = metadata.get("id", "unknown")
708
745
  virtual_path = f"memory://{memory_type}/{memory_id}"
709
-
746
+
710
747
  result = SearchResult(
711
748
  file_path=virtual_path,
712
749
  line_number=1,
713
750
  column=0,
714
- match_text=content[:200] + "..." if len(content) > 200 else content,
751
+ match_text=(
752
+ content[:200] + "..." if len(content) > 200 else content
753
+ ),
715
754
  context_before=[],
716
755
  context_after=[],
717
- match_type='memory',
756
+ match_type="memory",
718
757
  score=mem.get("score", 0.8),
719
- semantic_context=f"Memory type: {memory_type}, Created: {metadata.get('created_at', 'unknown')}"
758
+ semantic_context=f"Memory type: {memory_type}, Created: {metadata.get('created_at', 'unknown')}",
720
759
  )
721
760
  results.append(result)
722
-
761
+
723
762
  except Exception as e:
724
763
  print(f"Memory search error: {e}")
725
-
764
+
726
765
  return results
727
-
766
+
728
767
  def _deduplicate_results(self, results: List[SearchResult]) -> List[SearchResult]:
729
768
  """Remove duplicate results across search types."""
730
769
  seen = set()
731
770
  unique = []
732
-
771
+
733
772
  for result in results:
734
773
  key = (result.file_path, result.line_number, result.match_text.strip())
735
774
  if key not in seen:
@@ -738,7 +777,11 @@ class UnifiedSearch(BaseTool):
738
777
  else:
739
778
  # Merge information from duplicate
740
779
  for existing in unique:
741
- if (existing.file_path, existing.line_number, existing.match_text.strip()) == key:
780
+ if (
781
+ existing.file_path,
782
+ existing.line_number,
783
+ existing.match_text.strip(),
784
+ ) == key:
742
785
  # Update with better context or node type
743
786
  if result.node_type and not existing.node_type:
744
787
  existing.node_type = result.node_type
@@ -747,42 +790,49 @@ class UnifiedSearch(BaseTool):
747
790
  # Take best score
748
791
  existing.score = max(existing.score, result.score)
749
792
  break
750
-
793
+
751
794
  return unique
752
-
753
- def _rank_results(self, results: List[SearchResult], query: str) -> List[SearchResult]:
795
+
796
+ def _rank_results(
797
+ self, results: List[SearchResult], query: str
798
+ ) -> List[SearchResult]:
754
799
  """Rank results by relevance."""
755
800
  # Simple ranking based on:
756
801
  # 1. Match type score
757
802
  # 2. Exact match bonus
758
803
  # 3. File path relevance
759
-
804
+
760
805
  for result in results:
761
806
  # Exact match bonus
762
807
  if query.lower() in result.match_text.lower():
763
808
  result.score *= 1.2
764
-
809
+
765
810
  # Path relevance (prefer non-test, non-vendor files)
766
- if any(skip in result.file_path for skip in ['test', 'vendor', 'node_modules']):
811
+ if any(
812
+ skip in result.file_path for skip in ["test", "vendor", "node_modules"]
813
+ ):
767
814
  result.score *= 0.8
768
-
815
+
769
816
  # Prefer definition files
770
- if any(pattern in result.file_path for pattern in ['index.', 'main.', 'api.', 'types.']):
817
+ if any(
818
+ pattern in result.file_path
819
+ for pattern in ["index.", "main.", "api.", "types."]
820
+ ):
771
821
  result.score *= 1.1
772
-
822
+
773
823
  # Sort by score descending, then by file path
774
824
  results.sort(key=lambda r: (-r.score, r.file_path, r.line_number))
775
-
825
+
776
826
  return results
777
-
827
+
778
828
  def _format_preview(self, result: SearchResult) -> str:
779
829
  """Format result preview with context."""
780
830
  lines = []
781
-
831
+
782
832
  # Add context before
783
833
  for line in result.context_before[-2:]:
784
834
  lines.append(f" {line}")
785
-
835
+
786
836
  # Add match line with highlighting
787
837
  match_line = result.match_text
788
838
  if result.column > 0:
@@ -791,48 +841,49 @@ class UnifiedSearch(BaseTool):
791
841
  lines.append(f" {' ' * result.column}^")
792
842
  else:
793
843
  lines.append(f"> {match_line}")
794
-
844
+
795
845
  # Add context after
796
846
  for line in result.context_after[:2]:
797
847
  lines.append(f" {line}")
798
-
799
- return '\n'.join(lines)
800
-
801
- async def _python_text_search(self,
802
- pattern: str,
803
- path: str,
804
- include: Optional[str],
805
- exclude: Optional[str],
806
- max_results: int,
807
- context_lines: int) -> List[SearchResult]:
848
+
849
+ return "\n".join(lines)
850
+
851
+ async def _python_text_search(
852
+ self,
853
+ pattern: str,
854
+ path: str,
855
+ include: Optional[str],
856
+ exclude: Optional[str],
857
+ max_results: int,
858
+ context_lines: int,
859
+ ) -> List[SearchResult]:
808
860
  """Fallback Python text search when ripgrep not available."""
809
861
  results = []
810
862
  count = 0
811
-
863
+
812
864
  import re
813
- import glob
814
-
865
+
815
866
  # Compile pattern
816
867
  try:
817
868
  regex = re.compile(pattern)
818
869
  except re.error:
819
870
  # Treat as literal string
820
871
  regex = re.compile(re.escape(pattern))
821
-
872
+
822
873
  # Find files
823
- for file_path in Path(path).rglob(include or '*'):
874
+ for file_path in Path(path).rglob(include or "*"):
824
875
  if count >= max_results:
825
876
  break
826
-
877
+
827
878
  if file_path.is_file():
828
879
  try:
829
- with open(file_path, 'r', encoding='utf-8') as f:
880
+ with open(file_path, "r", encoding="utf-8") as f:
830
881
  lines = f.readlines()
831
-
882
+
832
883
  for i, line in enumerate(lines):
833
884
  if count >= max_results:
834
885
  break
835
-
886
+
836
887
  match = regex.search(line)
837
888
  if match:
838
889
  result = SearchResult(
@@ -840,114 +891,115 @@ class UnifiedSearch(BaseTool):
840
891
  line_number=i + 1,
841
892
  column=match.start(),
842
893
  match_text=line.strip(),
843
- context_before=lines[max(0, i-context_lines):i],
844
- context_after=lines[i+1:i+1+context_lines],
845
- match_type='text',
846
- score=1.0
894
+ context_before=lines[max(0, i - context_lines) : i],
895
+ context_after=lines[i + 1 : i + 1 + context_lines],
896
+ match_type="text",
897
+ score=1.0,
847
898
  )
848
899
  results.append(result)
849
900
  count += 1
850
-
901
+
851
902
  except Exception:
852
903
  continue
853
-
904
+
854
905
  return results
855
906
 
856
907
 
857
908
  # Index builder for vector search
858
909
  class CodeIndexer:
859
910
  """Build and maintain vector search index."""
860
-
911
+
861
912
  def __init__(self, vector_db, embedder):
862
913
  self.vector_db = vector_db
863
914
  self.embedder = embedder
864
915
  self.collection = vector_db.get_or_create_collection("code_search")
865
-
916
+
866
917
  async def index_directory(self, path: str, file_patterns: List[str] = None):
867
918
  """Index a directory for vector search."""
868
919
  if file_patterns is None:
869
- file_patterns = ['*.py', '*.js', '*.ts', '*.go', '*.java', '*.cpp', '*.c']
870
-
920
+ file_patterns = ["*.py", "*.js", "*.ts", "*.go", "*.java", "*.cpp", "*.c"]
921
+
871
922
  documents = []
872
923
  metadatas = []
873
924
  ids = []
874
-
925
+
875
926
  for pattern in file_patterns:
876
927
  for file_path in Path(path).rglob(pattern):
877
928
  if file_path.is_file():
878
929
  try:
879
- with open(file_path, 'r', encoding='utf-8') as f:
930
+ with open(file_path, "r", encoding="utf-8") as f:
880
931
  content = f.read()
881
-
932
+
882
933
  # Split into chunks (functions, classes, etc.)
883
934
  chunks = self._split_code_intelligently(content, file_path)
884
-
935
+
885
936
  for chunk in chunks:
886
937
  doc_id = hashlib.md5(
887
938
  f"{file_path}:{chunk['line']}:{chunk['text'][:50]}".encode()
888
939
  ).hexdigest()
889
-
890
- documents.append(chunk['text'])
891
- metadatas.append({
892
- 'file_path': str(file_path),
893
- 'line_number': chunk['line'],
894
- 'context': chunk.get('context', ''),
895
- 'type': chunk.get('type', 'code')
896
- })
940
+
941
+ documents.append(chunk["text"])
942
+ metadatas.append(
943
+ {
944
+ "file_path": str(file_path),
945
+ "line_number": chunk["line"],
946
+ "context": chunk.get("context", ""),
947
+ "type": chunk.get("type", "code"),
948
+ }
949
+ )
897
950
  ids.append(doc_id)
898
-
951
+
899
952
  except Exception as e:
900
953
  print(f"Error indexing {file_path}: {e}")
901
-
954
+
902
955
  # Batch embed and store
903
956
  if documents:
904
957
  embeddings = self.embedder.encode(documents).tolist()
905
958
  self.collection.add(
906
- embeddings=embeddings,
907
- documents=documents,
908
- metadatas=metadatas,
909
- ids=ids
959
+ embeddings=embeddings, documents=documents, metadatas=metadatas, ids=ids
910
960
  )
911
-
912
- def _split_code_intelligently(self, content: str, file_path: Path) -> List[Dict[str, Any]]:
961
+
962
+ def _split_code_intelligently(
963
+ self, content: str, file_path: Path
964
+ ) -> List[Dict[str, Any]]:
913
965
  """Split code into meaningful chunks."""
914
966
  # Simple line-based splitting for now
915
967
  # TODO: Use AST for better splitting
916
968
  chunks = []
917
- lines = content.split('\n')
918
-
969
+ lines = content.split("\n")
970
+
919
971
  # Group into function-sized chunks
920
972
  current_chunk = []
921
973
  current_line = 1
922
-
974
+
923
975
  for i, line in enumerate(lines):
924
976
  current_chunk.append(line)
925
-
977
+
926
978
  # Split on function/class definitions or every 50 lines
927
- if (len(current_chunk) >= 50 or
928
- any(kw in line for kw in ['def ', 'function ', 'class ', 'interface '])):
929
-
979
+ if len(current_chunk) >= 50 or any(
980
+ kw in line for kw in ["def ", "function ", "class ", "interface "]
981
+ ):
930
982
  if current_chunk:
931
- chunks.append({
932
- 'text': '\n'.join(current_chunk),
933
- 'line': current_line,
934
- 'type': 'code'
935
- })
983
+ chunks.append(
984
+ {
985
+ "text": "\n".join(current_chunk),
986
+ "line": current_line,
987
+ "type": "code",
988
+ }
989
+ )
936
990
  current_chunk = []
937
991
  current_line = i + 2
938
-
992
+
939
993
  # Add remaining
940
994
  if current_chunk:
941
- chunks.append({
942
- 'text': '\n'.join(current_chunk),
943
- 'line': current_line,
944
- 'type': 'code'
945
- })
946
-
995
+ chunks.append(
996
+ {"text": "\n".join(current_chunk), "line": current_line, "type": "code"}
997
+ )
998
+
947
999
  return chunks
948
1000
 
949
1001
 
950
1002
  # Tool registration
951
1003
  def create_unified_search_tool():
952
1004
  """Factory function to create unified search tool."""
953
- return UnifiedSearch()
1005
+ return UnifiedSearch()