hanzo-mcp 0.6.12__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hanzo-mcp might be problematic. Click here for more details.

Files changed (117) hide show
  1. hanzo_mcp/__init__.py +2 -2
  2. hanzo_mcp/analytics/__init__.py +5 -0
  3. hanzo_mcp/analytics/posthog_analytics.py +364 -0
  4. hanzo_mcp/cli.py +5 -5
  5. hanzo_mcp/cli_enhanced.py +7 -7
  6. hanzo_mcp/cli_plugin.py +91 -0
  7. hanzo_mcp/config/__init__.py +1 -1
  8. hanzo_mcp/config/settings.py +70 -7
  9. hanzo_mcp/config/tool_config.py +20 -6
  10. hanzo_mcp/dev_server.py +3 -3
  11. hanzo_mcp/prompts/project_system.py +1 -1
  12. hanzo_mcp/server.py +40 -3
  13. hanzo_mcp/server_enhanced.py +69 -0
  14. hanzo_mcp/tools/__init__.py +140 -31
  15. hanzo_mcp/tools/agent/__init__.py +85 -4
  16. hanzo_mcp/tools/agent/agent_tool.py +104 -6
  17. hanzo_mcp/tools/agent/agent_tool_v2.py +459 -0
  18. hanzo_mcp/tools/agent/clarification_protocol.py +220 -0
  19. hanzo_mcp/tools/agent/clarification_tool.py +68 -0
  20. hanzo_mcp/tools/agent/claude_cli_tool.py +125 -0
  21. hanzo_mcp/tools/agent/claude_desktop_auth.py +508 -0
  22. hanzo_mcp/tools/agent/cli_agent_base.py +191 -0
  23. hanzo_mcp/tools/agent/code_auth.py +436 -0
  24. hanzo_mcp/tools/agent/code_auth_tool.py +194 -0
  25. hanzo_mcp/tools/agent/codex_cli_tool.py +123 -0
  26. hanzo_mcp/tools/agent/critic_tool.py +376 -0
  27. hanzo_mcp/tools/agent/gemini_cli_tool.py +128 -0
  28. hanzo_mcp/tools/agent/grok_cli_tool.py +128 -0
  29. hanzo_mcp/tools/agent/iching_tool.py +380 -0
  30. hanzo_mcp/tools/agent/network_tool.py +273 -0
  31. hanzo_mcp/tools/agent/prompt.py +62 -20
  32. hanzo_mcp/tools/agent/review_tool.py +433 -0
  33. hanzo_mcp/tools/agent/swarm_tool.py +535 -0
  34. hanzo_mcp/tools/agent/swarm_tool_v2.py +594 -0
  35. hanzo_mcp/tools/common/__init__.py +15 -1
  36. hanzo_mcp/tools/common/base.py +5 -4
  37. hanzo_mcp/tools/common/batch_tool.py +103 -11
  38. hanzo_mcp/tools/common/config_tool.py +2 -2
  39. hanzo_mcp/tools/common/context.py +2 -2
  40. hanzo_mcp/tools/common/context_fix.py +26 -0
  41. hanzo_mcp/tools/common/critic_tool.py +196 -0
  42. hanzo_mcp/tools/common/decorators.py +208 -0
  43. hanzo_mcp/tools/common/enhanced_base.py +106 -0
  44. hanzo_mcp/tools/common/fastmcp_pagination.py +369 -0
  45. hanzo_mcp/tools/common/forgiving_edit.py +243 -0
  46. hanzo_mcp/tools/common/mode.py +116 -0
  47. hanzo_mcp/tools/common/mode_loader.py +105 -0
  48. hanzo_mcp/tools/common/paginated_base.py +230 -0
  49. hanzo_mcp/tools/common/paginated_response.py +307 -0
  50. hanzo_mcp/tools/common/pagination.py +226 -0
  51. hanzo_mcp/tools/common/permissions.py +1 -1
  52. hanzo_mcp/tools/common/personality.py +936 -0
  53. hanzo_mcp/tools/common/plugin_loader.py +287 -0
  54. hanzo_mcp/tools/common/stats.py +4 -4
  55. hanzo_mcp/tools/common/tool_list.py +4 -1
  56. hanzo_mcp/tools/common/truncate.py +101 -0
  57. hanzo_mcp/tools/common/validation.py +1 -1
  58. hanzo_mcp/tools/config/__init__.py +3 -1
  59. hanzo_mcp/tools/config/config_tool.py +1 -1
  60. hanzo_mcp/tools/config/mode_tool.py +209 -0
  61. hanzo_mcp/tools/database/__init__.py +1 -1
  62. hanzo_mcp/tools/editor/__init__.py +1 -1
  63. hanzo_mcp/tools/filesystem/__init__.py +48 -14
  64. hanzo_mcp/tools/filesystem/ast_multi_edit.py +562 -0
  65. hanzo_mcp/tools/filesystem/batch_search.py +3 -3
  66. hanzo_mcp/tools/filesystem/diff.py +2 -2
  67. hanzo_mcp/tools/filesystem/directory_tree_paginated.py +338 -0
  68. hanzo_mcp/tools/filesystem/rules_tool.py +235 -0
  69. hanzo_mcp/tools/filesystem/{unified_search.py → search_tool.py} +12 -12
  70. hanzo_mcp/tools/filesystem/{symbols_unified.py → symbols_tool.py} +104 -5
  71. hanzo_mcp/tools/filesystem/watch.py +3 -2
  72. hanzo_mcp/tools/jupyter/__init__.py +2 -2
  73. hanzo_mcp/tools/jupyter/jupyter.py +1 -1
  74. hanzo_mcp/tools/llm/__init__.py +3 -3
  75. hanzo_mcp/tools/llm/llm_tool.py +648 -143
  76. hanzo_mcp/tools/lsp/__init__.py +5 -0
  77. hanzo_mcp/tools/lsp/lsp_tool.py +512 -0
  78. hanzo_mcp/tools/mcp/__init__.py +2 -2
  79. hanzo_mcp/tools/mcp/{mcp_unified.py → mcp_tool.py} +3 -3
  80. hanzo_mcp/tools/memory/__init__.py +76 -0
  81. hanzo_mcp/tools/memory/knowledge_tools.py +518 -0
  82. hanzo_mcp/tools/memory/memory_tools.py +456 -0
  83. hanzo_mcp/tools/search/__init__.py +6 -0
  84. hanzo_mcp/tools/search/find_tool.py +581 -0
  85. hanzo_mcp/tools/search/unified_search.py +953 -0
  86. hanzo_mcp/tools/shell/__init__.py +11 -6
  87. hanzo_mcp/tools/shell/auto_background.py +203 -0
  88. hanzo_mcp/tools/shell/base_process.py +57 -29
  89. hanzo_mcp/tools/shell/bash_session_executor.py +1 -1
  90. hanzo_mcp/tools/shell/{bash_unified.py → bash_tool.py} +18 -34
  91. hanzo_mcp/tools/shell/command_executor.py +2 -2
  92. hanzo_mcp/tools/shell/{npx_unified.py → npx_tool.py} +16 -33
  93. hanzo_mcp/tools/shell/open.py +2 -2
  94. hanzo_mcp/tools/shell/{process_unified.py → process_tool.py} +1 -1
  95. hanzo_mcp/tools/shell/run_command_windows.py +1 -1
  96. hanzo_mcp/tools/shell/streaming_command.py +594 -0
  97. hanzo_mcp/tools/shell/uvx.py +47 -2
  98. hanzo_mcp/tools/shell/uvx_background.py +47 -2
  99. hanzo_mcp/tools/shell/{uvx_unified.py → uvx_tool.py} +16 -33
  100. hanzo_mcp/tools/todo/__init__.py +14 -19
  101. hanzo_mcp/tools/todo/todo.py +22 -1
  102. hanzo_mcp/tools/vector/__init__.py +1 -1
  103. hanzo_mcp/tools/vector/infinity_store.py +2 -2
  104. hanzo_mcp/tools/vector/project_manager.py +1 -1
  105. hanzo_mcp/types.py +23 -0
  106. hanzo_mcp-0.7.0.dist-info/METADATA +516 -0
  107. hanzo_mcp-0.7.0.dist-info/RECORD +180 -0
  108. {hanzo_mcp-0.6.12.dist-info → hanzo_mcp-0.7.0.dist-info}/entry_points.txt +1 -0
  109. hanzo_mcp/tools/common/palette.py +0 -344
  110. hanzo_mcp/tools/common/palette_loader.py +0 -108
  111. hanzo_mcp/tools/config/palette_tool.py +0 -179
  112. hanzo_mcp/tools/llm/llm_unified.py +0 -851
  113. hanzo_mcp-0.6.12.dist-info/METADATA +0 -339
  114. hanzo_mcp-0.6.12.dist-info/RECORD +0 -135
  115. hanzo_mcp-0.6.12.dist-info/licenses/LICENSE +0 -21
  116. {hanzo_mcp-0.6.12.dist-info → hanzo_mcp-0.7.0.dist-info}/WHEEL +0 -0
  117. {hanzo_mcp-0.6.12.dist-info → hanzo_mcp-0.7.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,953 @@
1
+ """Primary unified search tool - THE search tool for finding anything in code.
2
+
3
+ This is your main search interface that intelligently combines all available
4
+ search capabilities including text, AST, symbols, memory, and semantic search.
5
+ """
6
+
7
+ import os
8
+ import time
9
+ import json
10
+ import subprocess
11
+ from typing import List, Dict, Any, Optional, Set, Tuple
12
+ from pathlib import Path
13
+ from dataclasses import dataclass
14
+ from collections import defaultdict
15
+ import hashlib
16
+
17
+ from hanzo_mcp.tools.common.base import BaseTool
18
+ from hanzo_mcp.tools.common.paginated_response import AutoPaginatedResponse
19
+ from hanzo_mcp.tools.common.decorators import with_context_normalization
20
+ from hanzo_mcp.types import MCPResourceDocument
21
+
22
+ # Import memory tools if available
23
+ try:
24
+ from hanzo_mcp.tools.memory.memory_tools import KnowledgeRetrieval
25
+ MEMORY_AVAILABLE = True
26
+ except ImportError:
27
+ MEMORY_AVAILABLE = False
28
+
29
+ try:
30
+ import tree_sitter
31
+ TREESITTER_AVAILABLE = True
32
+ except ImportError:
33
+ TREESITTER_AVAILABLE = False
34
+
35
+ try:
36
+ import chromadb
37
+ from sentence_transformers import SentenceTransformer
38
+ VECTOR_SEARCH_AVAILABLE = True
39
+ except ImportError:
40
+ VECTOR_SEARCH_AVAILABLE = False
41
+
42
+
43
+ @dataclass
44
+ class SearchResult:
45
+ """Unified search result."""
46
+ file_path: str
47
+ line_number: int
48
+ column: int
49
+ match_text: str
50
+ context_before: List[str]
51
+ context_after: List[str]
52
+ match_type: str # 'text', 'ast', 'vector', 'symbol', 'memory', 'file'
53
+ score: float = 1.0
54
+ node_type: Optional[str] = None
55
+ semantic_context: Optional[str] = None
56
+
57
+ def to_dict(self) -> Dict[str, Any]:
58
+ return {
59
+ "file": self.file_path,
60
+ "line": self.line_number,
61
+ "column": self.column,
62
+ "match": self.match_text,
63
+ "type": self.match_type,
64
+ "score": self.score,
65
+ "context": {
66
+ "before": self.context_before,
67
+ "after": self.context_after,
68
+ "node_type": self.node_type,
69
+ "semantic": self.semantic_context
70
+ }
71
+ }
72
+
73
+ def __hash__(self):
74
+ """Make result hashable for deduplication."""
75
+ return hash((self.file_path, self.line_number, self.column, self.match_text))
76
+
77
+
78
+ class UnifiedSearch(BaseTool):
79
+ """THE primary search tool - your universal interface for finding anything.
80
+
81
+ This is the main search tool you should use for finding:
82
+ - Code patterns and text matches (using ripgrep)
83
+ - AST nodes and code structure (using treesitter)
84
+ - Symbol definitions and references (using ctags/LSP)
85
+ - Files and directories (using find tool)
86
+ - Memory and knowledge base entries
87
+ - Semantic/conceptual matches (using vector search)
88
+
89
+ The tool automatically determines the best search strategy based on your query
90
+ and runs multiple search types in parallel for comprehensive results.
91
+
92
+ USAGE EXAMPLES:
93
+
94
+ 1. Find code patterns:
95
+ search("error handling") # Finds all error handling code
96
+ search("TODO|FIXME") # Regex search for TODOs
97
+ search("async function") # Find async functions
98
+
99
+ 2. Find symbols/definitions:
100
+ search("class UserService") # Find class definition
101
+ search("handleRequest") # Find function/method
102
+ search("MAX_RETRIES") # Find constant
103
+
104
+ 3. Find files:
105
+ search("test_*.py", search_files=True) # Find test files
106
+ search("config", search_files=True) # Find config files
107
+
108
+ 4. Semantic search:
109
+ search("how authentication works") # Natural language query
110
+ search("database connection logic") # Conceptual search
111
+
112
+ 5. Memory search:
113
+ search("previous discussion about API design") # Search memories
114
+ search("that bug we fixed last week") # Search knowledge
115
+
116
+ The tool automatically:
117
+ - Detects query intent and chooses appropriate search methods
118
+ - Runs searches in parallel for speed
119
+ - Deduplicates and ranks results by relevance
120
+ - Provides context around matches
121
+ - Paginates results to stay within token limits
122
+ - Respects .gitignore and other exclusions
123
+
124
+ PRO TIPS:
125
+ - Use natural language for conceptual searches
126
+ - Use code syntax for exact matches
127
+ - Add search_files=True to also find filenames
128
+ - Results are ranked by relevance and type
129
+ - Use page parameter to get more results
130
+ """
131
+
132
+ name = "search"
133
+ description = """THE primary unified search tool for rapid parallel search across all modalities.
134
+
135
+ Find anything in your codebase using text, AST, symbols, files, memory, and semantic search.
136
+ Automatically detects query intent and runs appropriate searches in parallel.
137
+ """
138
+
139
+ def __init__(self):
140
+ super().__init__()
141
+ self.ripgrep_available = self._check_ripgrep()
142
+ self.vector_db = None
143
+ self.embedder = None
144
+
145
+ if VECTOR_SEARCH_AVAILABLE:
146
+ self._init_vector_search()
147
+
148
+ def _check_ripgrep(self) -> bool:
149
+ """Check if ripgrep is available."""
150
+ try:
151
+ subprocess.run(['rg', '--version'], capture_output=True, check=True)
152
+ return True
153
+ except:
154
+ return False
155
+
156
+ def _init_vector_search(self):
157
+ """Initialize vector search components."""
158
+ try:
159
+ self.embedder = SentenceTransformer('all-MiniLM-L6-v2')
160
+ self.vector_db = chromadb.Client()
161
+ # Create or get collection
162
+ self.collection = self.vector_db.get_or_create_collection(
163
+ name="code_search",
164
+ metadata={"description": "Code semantic search"}
165
+ )
166
+ except Exception as e:
167
+ print(f"Failed to initialize vector search: {e}")
168
+ self.vector_db = None
169
+
170
+ def _should_use_vector_search(self, query: str) -> bool:
171
+ """Determine if vector search would be helpful."""
172
+ # Use vector search for natural language queries
173
+ indicators = [
174
+ len(query.split()) > 2, # Multi-word queries
175
+ not any(c in query for c in ['(', ')', '{', '}', '[', ']']), # Not code syntax
176
+ ' ' in query, # Has spaces (natural language)
177
+ not query.startswith('^') and not query.endswith('$'), # Not regex anchors
178
+ ]
179
+ return sum(indicators) >= 2
180
+
181
+ def _should_use_ast_search(self, query: str) -> bool:
182
+ """Determine if AST search would be helpful."""
183
+ # Use AST search for code patterns
184
+ indicators = [
185
+ 'class ' in query or 'function ' in query or 'def ' in query,
186
+ 'import ' in query or 'from ' in query,
187
+ any(kw in query.lower() for kw in ['method', 'function', 'class', 'interface', 'struct']),
188
+ '::' in query or '->' in query or '.' in query, # Member access
189
+ ]
190
+ return any(indicators)
191
+
192
+ def _should_use_symbol_search(self, query: str) -> bool:
193
+ """Determine if symbol search would be helpful."""
194
+ # Use symbol search for identifiers
195
+ return (
196
+ len(query.split()) <= 2 and # Short queries
197
+ query.replace('_', '').replace('-', '').isalnum() and # Looks like identifier
198
+ not ' ' in query.strip() # Single token
199
+ )
200
+
201
+ async def run(self,
202
+ pattern: str,
203
+ path: str = ".",
204
+ include: Optional[str] = None,
205
+ exclude: Optional[str] = None,
206
+ max_results_per_type: int = 20,
207
+ context_lines: int = 3,
208
+ search_files: bool = False,
209
+ search_memory: bool = None,
210
+ enable_text: bool = None,
211
+ enable_ast: bool = None,
212
+ enable_vector: bool = None,
213
+ enable_symbol: bool = None,
214
+ page_size: int = 50,
215
+ page: int = 1,
216
+ **kwargs) -> MCPResourceDocument:
217
+ """Execute unified search across all available search modalities.
218
+
219
+ Args:
220
+ pattern: Search query (text, regex, natural language, or glob for files)
221
+ path: Directory to search in (default: current directory)
222
+ include: File pattern to include (e.g., "*.py", "*.js")
223
+ exclude: File pattern to exclude (e.g., "*.test.py")
224
+ max_results_per_type: Max results from each search type
225
+ context_lines: Lines of context around text matches
226
+ search_files: Also search for matching filenames
227
+ search_memory: Search in memory/knowledge base (auto-detected if None)
228
+ enable_*: Force enable/disable specific search types (auto if None)
229
+ page_size: Results per page (default: 50)
230
+ page: Page number to retrieve (default: 1)
231
+ """
232
+
233
+ # Auto-detect search types based on query
234
+ if search_memory is None:
235
+ # Search memory for natural language queries or specific references
236
+ search_memory = (
237
+ MEMORY_AVAILABLE and
238
+ (self._should_use_vector_search(pattern) or
239
+ any(word in pattern.lower() for word in ['previous', 'discussion', 'remember', 'last']))
240
+ )
241
+
242
+ if enable_text is None:
243
+ enable_text = True # Always use text search as baseline
244
+
245
+ if enable_vector is None:
246
+ enable_vector = self._should_use_vector_search(pattern) and VECTOR_SEARCH_AVAILABLE
247
+
248
+ if enable_ast is None:
249
+ enable_ast = self._should_use_ast_search(pattern) and TREESITTER_AVAILABLE
250
+
251
+ if enable_symbol is None:
252
+ enable_symbol = self._should_use_symbol_search(pattern)
253
+
254
+ # Collect results from all enabled search types
255
+ all_results = []
256
+ search_stats = {
257
+ "query": pattern,
258
+ "path": path,
259
+ "search_types_used": [],
260
+ "total_matches": 0,
261
+ "unique_matches": 0,
262
+ "time_ms": {}
263
+ }
264
+
265
+ # 1. Text search (ripgrep) - always fast, do first
266
+ if enable_text:
267
+ start = time.time()
268
+ text_results = await self._text_search(
269
+ pattern, path, include, exclude, max_results_per_type, context_lines
270
+ )
271
+ search_stats["time_ms"]["text"] = int((time.time() - start) * 1000)
272
+ search_stats["search_types_used"].append("text")
273
+ all_results.extend(text_results)
274
+
275
+ # 2. AST search - for code structure
276
+ if enable_ast and TREESITTER_AVAILABLE:
277
+ start = time.time()
278
+ ast_results = await self._ast_search(
279
+ pattern, path, include, exclude, max_results_per_type, context_lines
280
+ )
281
+ search_stats["time_ms"]["ast"] = int((time.time() - start) * 1000)
282
+ search_stats["search_types_used"].append("ast")
283
+ all_results.extend(ast_results)
284
+
285
+ # 3. Symbol search - for definitions
286
+ if enable_symbol:
287
+ start = time.time()
288
+ symbol_results = await self._symbol_search(
289
+ pattern, path, include, exclude, max_results_per_type
290
+ )
291
+ search_stats["time_ms"]["symbol"] = int((time.time() - start) * 1000)
292
+ search_stats["search_types_used"].append("symbol")
293
+ all_results.extend(symbol_results)
294
+
295
+ # 4. Vector search - for semantic similarity
296
+ if enable_vector and self.vector_db:
297
+ start = time.time()
298
+ vector_results = await self._vector_search(
299
+ pattern, path, include, exclude, max_results_per_type, context_lines
300
+ )
301
+ search_stats["time_ms"]["vector"] = int((time.time() - start) * 1000)
302
+ search_stats["search_types_used"].append("vector")
303
+ all_results.extend(vector_results)
304
+
305
+ # 5. File search - for finding files by name/pattern
306
+ if search_files:
307
+ start = time.time()
308
+ file_results = await self._file_search(
309
+ pattern, path, include, exclude, max_results_per_type
310
+ )
311
+ search_stats["time_ms"]["files"] = int((time.time() - start) * 1000)
312
+ search_stats["search_types_used"].append("files")
313
+ all_results.extend(file_results)
314
+
315
+ # 6. Memory search - for knowledge base and previous discussions
316
+ if search_memory:
317
+ start = time.time()
318
+ memory_results = await self._memory_search(
319
+ pattern, max_results_per_type, context_lines
320
+ )
321
+ search_stats["time_ms"]["memory"] = int((time.time() - start) * 1000)
322
+ search_stats["search_types_used"].append("memory")
323
+ all_results.extend(memory_results)
324
+
325
+ # Deduplicate and rank results
326
+ unique_results = self._deduplicate_results(all_results)
327
+ ranked_results = self._rank_results(unique_results, pattern)
328
+
329
+ search_stats["total_matches"] = len(all_results)
330
+ search_stats["unique_matches"] = len(ranked_results)
331
+
332
+ # Paginate results
333
+ total_results = len(ranked_results)
334
+ start_idx = (page - 1) * page_size
335
+ end_idx = start_idx + page_size
336
+ page_results = ranked_results[start_idx:end_idx]
337
+
338
+ # Format results for output
339
+ formatted_results = []
340
+ for result in page_results:
341
+ formatted = result.to_dict()
342
+ # Add match preview with context
343
+ formatted["preview"] = self._format_preview(result)
344
+ formatted_results.append(formatted)
345
+
346
+ # Create paginated response
347
+ response_data = {
348
+ "results": formatted_results,
349
+ "statistics": search_stats,
350
+ "pagination": {
351
+ "page": page,
352
+ "page_size": page_size,
353
+ "total_results": total_results,
354
+ "total_pages": (total_results + page_size - 1) // page_size,
355
+ "has_next": end_idx < total_results,
356
+ "has_prev": page > 1
357
+ }
358
+ }
359
+
360
+ return MCPResourceDocument(data=response_data)
361
+
362
+ async def call(self, **kwargs) -> str:
363
+ """Tool interface for MCP - converts result to JSON string."""
364
+ result = await self.run(**kwargs)
365
+ return result.to_json_string()
366
+
367
+ def register(self, mcp_server) -> None:
368
+ """Register tool with MCP server."""
369
+ from mcp.server import FastMCP
370
+
371
+ @mcp_server.tool(name=self.name, description=self.description)
372
+ async def search_handler(
373
+ pattern: str,
374
+ path: str = ".",
375
+ include: Optional[str] = None,
376
+ exclude: Optional[str] = None,
377
+ max_results_per_type: int = 20,
378
+ context_lines: int = 2,
379
+ page_size: int = 50,
380
+ page: int = 1,
381
+ enable_text: bool = True,
382
+ enable_ast: bool = True,
383
+ enable_vector: bool = True,
384
+ enable_symbol: bool = True,
385
+ search_files: bool = False,
386
+ search_memory: bool = False,
387
+ ) -> str:
388
+ """Execute unified search."""
389
+ return await self.call(
390
+ pattern=pattern,
391
+ path=path,
392
+ include=include,
393
+ exclude=exclude,
394
+ max_results_per_type=max_results_per_type,
395
+ context_lines=context_lines,
396
+ page_size=page_size,
397
+ page=page,
398
+ enable_text=enable_text,
399
+ enable_ast=enable_ast,
400
+ enable_vector=enable_vector,
401
+ enable_symbol=enable_symbol,
402
+ search_files=search_files,
403
+ search_memory=search_memory,
404
+ )
405
+
406
+ async def _text_search(self,
407
+ pattern: str,
408
+ path: str,
409
+ include: Optional[str],
410
+ exclude: Optional[str],
411
+ max_results: int,
412
+ context_lines: int) -> List[SearchResult]:
413
+ """Perform text search using ripgrep."""
414
+ results = []
415
+
416
+ if not self.ripgrep_available:
417
+ # Fallback to Python implementation
418
+ return await self._python_text_search(pattern, path, include, exclude, max_results, context_lines)
419
+
420
+ # Build ripgrep command
421
+ cmd = ['rg', '--json', '--max-count', str(max_results)]
422
+
423
+ if context_lines > 0:
424
+ cmd.extend(['-C', str(context_lines)])
425
+
426
+ if include:
427
+ cmd.extend(['--glob', include])
428
+
429
+ if exclude:
430
+ cmd.extend(['--glob', f'!{exclude}'])
431
+
432
+ cmd.extend([pattern, path])
433
+
434
+ try:
435
+ proc = subprocess.run(cmd, capture_output=True, text=True)
436
+
437
+ for line in proc.stdout.splitlines():
438
+ try:
439
+ data = json.loads(line)
440
+ if data.get('type') == 'match':
441
+ match_data = data['data']
442
+
443
+ result = SearchResult(
444
+ file_path=match_data['path']['text'],
445
+ line_number=match_data['line_number'],
446
+ column=match_data['submatches'][0]['start'],
447
+ match_text=match_data['lines']['text'].strip(),
448
+ context_before=[],
449
+ context_after=[],
450
+ match_type='text',
451
+ score=1.0
452
+ )
453
+
454
+ # Extract context if available
455
+ if 'context' in data:
456
+ # Parse context lines
457
+ pass
458
+
459
+ results.append(result)
460
+
461
+ except json.JSONDecodeError:
462
+ continue
463
+
464
+ except subprocess.CalledProcessError:
465
+ pass
466
+
467
+ return results
468
+
469
+ async def _ast_search(self,
470
+ pattern: str,
471
+ path: str,
472
+ include: Optional[str],
473
+ exclude: Optional[str],
474
+ max_results: int,
475
+ context_lines: int) -> List[SearchResult]:
476
+ """Perform AST-based search using treesitter."""
477
+ # Try to use grep-ast if available
478
+ try:
479
+ from grep_ast.grep_ast import TreeContext
480
+ except ImportError:
481
+ # grep-ast not installed, skip AST search
482
+ return []
483
+
484
+ results = []
485
+
486
+ try:
487
+
488
+ # Get files to search
489
+ search_path = Path(path or ".")
490
+ files_to_search = []
491
+
492
+ if search_path.is_file():
493
+ files_to_search = [search_path]
494
+ else:
495
+ # Find files matching include pattern
496
+ pattern_to_use = include or "*.py"
497
+ for ext in ["*.py", "*.js", "*.ts", "*.go", "*.java", "*.cpp", "*.c"]:
498
+ if include and include != ext:
499
+ continue
500
+ files_to_search.extend(search_path.rglob(ext))
501
+ if len(files_to_search) >= max_results:
502
+ break
503
+
504
+ # Search each file
505
+ for file_path in files_to_search[:max_results]:
506
+ if not file_path.is_file():
507
+ continue
508
+
509
+ try:
510
+ with open(file_path, "r", encoding="utf-8") as f:
511
+ code = f.read()
512
+
513
+ # Process with grep-ast
514
+ tc = TreeContext(
515
+ str(file_path),
516
+ code,
517
+ color=False,
518
+ verbose=False,
519
+ line_number=True,
520
+ )
521
+
522
+ # Find matches
523
+ matches = tc.grep(pattern, ignore_case=False)
524
+
525
+ for match in matches:
526
+ # Extract context
527
+ lines = code.split('\n')
528
+ line_num = match # This might need adjustment based on actual return type
529
+
530
+ result = SearchResult(
531
+ file_path=str(file_path),
532
+ line_number=line_num,
533
+ column=0,
534
+ match_text=lines[line_num - 1] if 0 < line_num <= len(lines) else "",
535
+ context_before=lines[max(0, line_num - context_lines - 1):line_num - 1],
536
+ context_after=lines[line_num:min(len(lines), line_num + context_lines)],
537
+ match_type='ast',
538
+ score=0.9,
539
+ node_type='ast_match',
540
+ semantic_context=None
541
+ )
542
+ results.append(result)
543
+
544
+ except Exception:
545
+ # Skip files that can't be parsed
546
+ continue
547
+
548
+ except Exception as e:
549
+ print(f"AST search error: {e}")
550
+
551
+ return results
552
+
553
+ async def _symbol_search(self,
554
+ pattern: str,
555
+ path: str,
556
+ include: Optional[str],
557
+ exclude: Optional[str],
558
+ max_results: int) -> List[SearchResult]:
559
+ """Search for symbol definitions."""
560
+ results = []
561
+
562
+ # Use ctags or similar for symbol search
563
+ # For now, use specialized ripgrep patterns
564
+ symbol_patterns = [
565
+ f"^\\s*(def|function|func)\\s+{pattern}", # Function definitions
566
+ f"^\\s*class\\s+{pattern}", # Class definitions
567
+ f"^\\s*(const|let|var)\\s+{pattern}", # Variable declarations
568
+ f"^\\s*type\\s+{pattern}", # Type definitions
569
+ f"interface\\s+{pattern}", # Interface definitions
570
+ ]
571
+
572
+ for symbol_pattern in symbol_patterns:
573
+ symbol_results = await self._text_search(
574
+ symbol_pattern, path, include, exclude,
575
+ max_results // len(symbol_patterns), 0
576
+ )
577
+
578
+ for res in symbol_results:
579
+ res.match_type = 'symbol'
580
+ res.score = 1.1 # Boost symbol definitions
581
+ results.append(res)
582
+
583
+ return results
584
+
585
+ async def _vector_search(self,
586
+ query: str,
587
+ path: str,
588
+ include: Optional[str],
589
+ exclude: Optional[str],
590
+ max_results: int,
591
+ context_lines: int) -> List[SearchResult]:
592
+ """Perform semantic vector search."""
593
+ if not self.vector_db or not self.embedder:
594
+ return []
595
+
596
+ results = []
597
+
598
+ try:
599
+ # Embed the query
600
+ query_embedding = self.embedder.encode(query).tolist()
601
+
602
+ # Search in vector database
603
+ search_results = self.collection.query(
604
+ query_embeddings=[query_embedding],
605
+ n_results=max_results,
606
+ where={"path": {"$contains": path}} if path != "." else None
607
+ )
608
+
609
+ if search_results['ids'][0]:
610
+ for i, doc_id in enumerate(search_results['ids'][0]):
611
+ metadata = search_results['metadatas'][0][i]
612
+
613
+ result = SearchResult(
614
+ file_path=metadata['file_path'],
615
+ line_number=metadata['line_number'],
616
+ column=0,
617
+ match_text=search_results['documents'][0][i],
618
+ context_before=[],
619
+ context_after=[],
620
+ match_type='vector',
621
+ score=1.0 - search_results['distances'][0][i], # Convert distance to similarity
622
+ semantic_context=metadata.get('context', '')
623
+ )
624
+ results.append(result)
625
+
626
+ except Exception as e:
627
+ print(f"Vector search error: {e}")
628
+
629
+ return results
630
+
631
+ async def _file_search(self,
632
+ pattern: str,
633
+ path: str,
634
+ include: Optional[str],
635
+ exclude: Optional[str],
636
+ max_results: int) -> List[SearchResult]:
637
+ """Search for files by name/pattern using find tool."""
638
+ results = []
639
+
640
+ try:
641
+ # Import and use find tool
642
+ from hanzo_mcp.tools.search.find_tool import FindTool
643
+ find_tool = FindTool()
644
+
645
+ # Call find tool with pattern
646
+ find_result = await find_tool.run(
647
+ pattern=pattern,
648
+ path=path,
649
+ type="file", # Only files for now
650
+ max_results=max_results,
651
+ regex=False, # Use glob patterns by default
652
+ fuzzy=False,
653
+ case_sensitive=False
654
+ )
655
+
656
+ # Convert find results to SearchResult format
657
+ if find_result.data and "results" in find_result.data:
658
+ for file_match in find_result.data["results"]:
659
+ result = SearchResult(
660
+ file_path=file_match["path"],
661
+ line_number=1, # File matches don't have line numbers
662
+ column=0,
663
+ match_text=file_match["name"],
664
+ context_before=[],
665
+ context_after=[],
666
+ match_type='file',
667
+ score=1.0,
668
+ semantic_context=f"File: {file_match['extension']} ({file_match['size']} bytes)"
669
+ )
670
+ results.append(result)
671
+
672
+ except Exception as e:
673
+ print(f"File search error: {e}")
674
+
675
+ return results
676
+
677
+ async def _memory_search(self,
678
+ query: str,
679
+ max_results: int,
680
+ context_lines: int) -> List[SearchResult]:
681
+ """Search in memory/knowledge base."""
682
+ results = []
683
+
684
+ if not MEMORY_AVAILABLE:
685
+ return results
686
+
687
+ try:
688
+ # Create memory retrieval tool
689
+ retrieval_tool = KnowledgeRetrieval()
690
+
691
+ # Search memories
692
+ memory_result = await retrieval_tool.run(
693
+ query=query,
694
+ top_k=max_results,
695
+ threshold=0.5 # Minimum relevance threshold
696
+ )
697
+
698
+ # Convert memory results to SearchResult format
699
+ if memory_result.data and "results" in memory_result.data:
700
+ for mem in memory_result.data["results"]:
701
+ # Extract content and metadata
702
+ content = mem.get("content", "")
703
+ metadata = mem.get("metadata", {})
704
+
705
+ # Create a virtual file path for memories
706
+ memory_type = metadata.get("type", "memory")
707
+ memory_id = metadata.get("id", "unknown")
708
+ virtual_path = f"memory://{memory_type}/{memory_id}"
709
+
710
+ result = SearchResult(
711
+ file_path=virtual_path,
712
+ line_number=1,
713
+ column=0,
714
+ match_text=content[:200] + "..." if len(content) > 200 else content,
715
+ context_before=[],
716
+ context_after=[],
717
+ match_type='memory',
718
+ score=mem.get("score", 0.8),
719
+ semantic_context=f"Memory type: {memory_type}, Created: {metadata.get('created_at', 'unknown')}"
720
+ )
721
+ results.append(result)
722
+
723
+ except Exception as e:
724
+ print(f"Memory search error: {e}")
725
+
726
+ return results
727
+
728
+ def _deduplicate_results(self, results: List[SearchResult]) -> List[SearchResult]:
729
+ """Remove duplicate results across search types."""
730
+ seen = set()
731
+ unique = []
732
+
733
+ for result in results:
734
+ key = (result.file_path, result.line_number, result.match_text.strip())
735
+ if key not in seen:
736
+ seen.add(key)
737
+ unique.append(result)
738
+ else:
739
+ # Merge information from duplicate
740
+ for existing in unique:
741
+ if (existing.file_path, existing.line_number, existing.match_text.strip()) == key:
742
+ # Update with better context or node type
743
+ if result.node_type and not existing.node_type:
744
+ existing.node_type = result.node_type
745
+ if result.semantic_context and not existing.semantic_context:
746
+ existing.semantic_context = result.semantic_context
747
+ # Take best score
748
+ existing.score = max(existing.score, result.score)
749
+ break
750
+
751
+ return unique
752
+
753
+ def _rank_results(self, results: List[SearchResult], query: str) -> List[SearchResult]:
754
+ """Rank results by relevance."""
755
+ # Simple ranking based on:
756
+ # 1. Match type score
757
+ # 2. Exact match bonus
758
+ # 3. File path relevance
759
+
760
+ for result in results:
761
+ # Exact match bonus
762
+ if query.lower() in result.match_text.lower():
763
+ result.score *= 1.2
764
+
765
+ # Path relevance (prefer non-test, non-vendor files)
766
+ if any(skip in result.file_path for skip in ['test', 'vendor', 'node_modules']):
767
+ result.score *= 0.8
768
+
769
+ # Prefer definition files
770
+ if any(pattern in result.file_path for pattern in ['index.', 'main.', 'api.', 'types.']):
771
+ result.score *= 1.1
772
+
773
+ # Sort by score descending, then by file path
774
+ results.sort(key=lambda r: (-r.score, r.file_path, r.line_number))
775
+
776
+ return results
777
+
778
+ def _format_preview(self, result: SearchResult) -> str:
779
+ """Format result preview with context."""
780
+ lines = []
781
+
782
+ # Add context before
783
+ for line in result.context_before[-2:]:
784
+ lines.append(f" {line}")
785
+
786
+ # Add match line with highlighting
787
+ match_line = result.match_text
788
+ if result.column > 0:
789
+ # Add column indicator
790
+ lines.append(f"> {match_line}")
791
+ lines.append(f" {' ' * result.column}^")
792
+ else:
793
+ lines.append(f"> {match_line}")
794
+
795
+ # Add context after
796
+ for line in result.context_after[:2]:
797
+ lines.append(f" {line}")
798
+
799
+ return '\n'.join(lines)
800
+
801
+ async def _python_text_search(self,
802
+ pattern: str,
803
+ path: str,
804
+ include: Optional[str],
805
+ exclude: Optional[str],
806
+ max_results: int,
807
+ context_lines: int) -> List[SearchResult]:
808
+ """Fallback Python text search when ripgrep not available."""
809
+ results = []
810
+ count = 0
811
+
812
+ import re
813
+ import glob
814
+
815
+ # Compile pattern
816
+ try:
817
+ regex = re.compile(pattern)
818
+ except re.error:
819
+ # Treat as literal string
820
+ regex = re.compile(re.escape(pattern))
821
+
822
+ # Find files
823
+ for file_path in Path(path).rglob(include or '*'):
824
+ if count >= max_results:
825
+ break
826
+
827
+ if file_path.is_file():
828
+ try:
829
+ with open(file_path, 'r', encoding='utf-8') as f:
830
+ lines = f.readlines()
831
+
832
+ for i, line in enumerate(lines):
833
+ if count >= max_results:
834
+ break
835
+
836
+ match = regex.search(line)
837
+ if match:
838
+ result = SearchResult(
839
+ file_path=str(file_path),
840
+ line_number=i + 1,
841
+ column=match.start(),
842
+ match_text=line.strip(),
843
+ context_before=lines[max(0, i-context_lines):i],
844
+ context_after=lines[i+1:i+1+context_lines],
845
+ match_type='text',
846
+ score=1.0
847
+ )
848
+ results.append(result)
849
+ count += 1
850
+
851
+ except Exception:
852
+ continue
853
+
854
+ return results
855
+
856
+
857
+ # Index builder for vector search
858
+ class CodeIndexer:
859
+ """Build and maintain vector search index."""
860
+
861
+ def __init__(self, vector_db, embedder):
862
+ self.vector_db = vector_db
863
+ self.embedder = embedder
864
+ self.collection = vector_db.get_or_create_collection("code_search")
865
+
866
+ async def index_directory(self, path: str, file_patterns: List[str] = None):
867
+ """Index a directory for vector search."""
868
+ if file_patterns is None:
869
+ file_patterns = ['*.py', '*.js', '*.ts', '*.go', '*.java', '*.cpp', '*.c']
870
+
871
+ documents = []
872
+ metadatas = []
873
+ ids = []
874
+
875
+ for pattern in file_patterns:
876
+ for file_path in Path(path).rglob(pattern):
877
+ if file_path.is_file():
878
+ try:
879
+ with open(file_path, 'r', encoding='utf-8') as f:
880
+ content = f.read()
881
+
882
+ # Split into chunks (functions, classes, etc.)
883
+ chunks = self._split_code_intelligently(content, file_path)
884
+
885
+ for chunk in chunks:
886
+ doc_id = hashlib.md5(
887
+ f"{file_path}:{chunk['line']}:{chunk['text'][:50]}".encode()
888
+ ).hexdigest()
889
+
890
+ documents.append(chunk['text'])
891
+ metadatas.append({
892
+ 'file_path': str(file_path),
893
+ 'line_number': chunk['line'],
894
+ 'context': chunk.get('context', ''),
895
+ 'type': chunk.get('type', 'code')
896
+ })
897
+ ids.append(doc_id)
898
+
899
+ except Exception as e:
900
+ print(f"Error indexing {file_path}: {e}")
901
+
902
+ # Batch embed and store
903
+ if documents:
904
+ embeddings = self.embedder.encode(documents).tolist()
905
+ self.collection.add(
906
+ embeddings=embeddings,
907
+ documents=documents,
908
+ metadatas=metadatas,
909
+ ids=ids
910
+ )
911
+
912
+ def _split_code_intelligently(self, content: str, file_path: Path) -> List[Dict[str, Any]]:
913
+ """Split code into meaningful chunks."""
914
+ # Simple line-based splitting for now
915
+ # TODO: Use AST for better splitting
916
+ chunks = []
917
+ lines = content.split('\n')
918
+
919
+ # Group into function-sized chunks
920
+ current_chunk = []
921
+ current_line = 1
922
+
923
+ for i, line in enumerate(lines):
924
+ current_chunk.append(line)
925
+
926
+ # Split on function/class definitions or every 50 lines
927
+ if (len(current_chunk) >= 50 or
928
+ any(kw in line for kw in ['def ', 'function ', 'class ', 'interface '])):
929
+
930
+ if current_chunk:
931
+ chunks.append({
932
+ 'text': '\n'.join(current_chunk),
933
+ 'line': current_line,
934
+ 'type': 'code'
935
+ })
936
+ current_chunk = []
937
+ current_line = i + 2
938
+
939
+ # Add remaining
940
+ if current_chunk:
941
+ chunks.append({
942
+ 'text': '\n'.join(current_chunk),
943
+ 'line': current_line,
944
+ 'type': 'code'
945
+ })
946
+
947
+ return chunks
948
+
949
+
950
+ # Tool registration
951
+ def create_unified_search_tool():
952
+ """Factory function to create unified search tool."""
953
+ return UnifiedSearch()