hanzo-mcp 0.3.8__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hanzo-mcp might be problematic. Click here for more details.
- hanzo_mcp/__init__.py +1 -1
- hanzo_mcp/cli.py +118 -170
- hanzo_mcp/cli_enhanced.py +438 -0
- hanzo_mcp/config/__init__.py +19 -0
- hanzo_mcp/config/settings.py +388 -0
- hanzo_mcp/config/tool_config.py +197 -0
- hanzo_mcp/prompts/__init__.py +117 -0
- hanzo_mcp/prompts/compact_conversation.py +77 -0
- hanzo_mcp/prompts/create_release.py +38 -0
- hanzo_mcp/prompts/project_system.py +120 -0
- hanzo_mcp/prompts/project_todo_reminder.py +111 -0
- hanzo_mcp/prompts/utils.py +286 -0
- hanzo_mcp/server.py +117 -99
- hanzo_mcp/tools/__init__.py +105 -32
- hanzo_mcp/tools/agent/__init__.py +8 -11
- hanzo_mcp/tools/agent/agent_tool.py +290 -224
- hanzo_mcp/tools/agent/prompt.py +16 -13
- hanzo_mcp/tools/agent/tool_adapter.py +9 -9
- hanzo_mcp/tools/common/__init__.py +17 -16
- hanzo_mcp/tools/common/base.py +79 -110
- hanzo_mcp/tools/common/batch_tool.py +330 -0
- hanzo_mcp/tools/common/context.py +26 -292
- hanzo_mcp/tools/common/permissions.py +12 -12
- hanzo_mcp/tools/common/thinking_tool.py +153 -0
- hanzo_mcp/tools/common/validation.py +1 -63
- hanzo_mcp/tools/filesystem/__init__.py +88 -57
- hanzo_mcp/tools/filesystem/base.py +32 -24
- hanzo_mcp/tools/filesystem/content_replace.py +114 -107
- hanzo_mcp/tools/filesystem/directory_tree.py +129 -105
- hanzo_mcp/tools/filesystem/edit.py +279 -0
- hanzo_mcp/tools/filesystem/grep.py +458 -0
- hanzo_mcp/tools/filesystem/grep_ast_tool.py +250 -0
- hanzo_mcp/tools/filesystem/multi_edit.py +362 -0
- hanzo_mcp/tools/filesystem/read.py +255 -0
- hanzo_mcp/tools/filesystem/write.py +156 -0
- hanzo_mcp/tools/jupyter/__init__.py +41 -29
- hanzo_mcp/tools/jupyter/base.py +66 -57
- hanzo_mcp/tools/jupyter/{edit_notebook.py → notebook_edit.py} +162 -139
- hanzo_mcp/tools/jupyter/notebook_read.py +152 -0
- hanzo_mcp/tools/shell/__init__.py +29 -20
- hanzo_mcp/tools/shell/base.py +87 -45
- hanzo_mcp/tools/shell/bash_session.py +731 -0
- hanzo_mcp/tools/shell/bash_session_executor.py +295 -0
- hanzo_mcp/tools/shell/command_executor.py +435 -384
- hanzo_mcp/tools/shell/run_command.py +284 -131
- hanzo_mcp/tools/shell/run_command_windows.py +328 -0
- hanzo_mcp/tools/shell/session_manager.py +196 -0
- hanzo_mcp/tools/shell/session_storage.py +325 -0
- hanzo_mcp/tools/todo/__init__.py +66 -0
- hanzo_mcp/tools/todo/base.py +319 -0
- hanzo_mcp/tools/todo/todo_read.py +148 -0
- hanzo_mcp/tools/todo/todo_write.py +378 -0
- hanzo_mcp/tools/vector/__init__.py +95 -0
- hanzo_mcp/tools/vector/infinity_store.py +365 -0
- hanzo_mcp/tools/vector/project_manager.py +361 -0
- hanzo_mcp/tools/vector/vector_index.py +115 -0
- hanzo_mcp/tools/vector/vector_search.py +215 -0
- {hanzo_mcp-0.3.8.dist-info → hanzo_mcp-0.5.0.dist-info}/METADATA +33 -1
- hanzo_mcp-0.5.0.dist-info/RECORD +63 -0
- {hanzo_mcp-0.3.8.dist-info → hanzo_mcp-0.5.0.dist-info}/WHEEL +1 -1
- hanzo_mcp/tools/agent/base_provider.py +0 -73
- hanzo_mcp/tools/agent/litellm_provider.py +0 -45
- hanzo_mcp/tools/agent/lmstudio_agent.py +0 -385
- hanzo_mcp/tools/agent/lmstudio_provider.py +0 -219
- hanzo_mcp/tools/agent/provider_registry.py +0 -120
- hanzo_mcp/tools/common/error_handling.py +0 -86
- hanzo_mcp/tools/common/logging_config.py +0 -115
- hanzo_mcp/tools/common/session.py +0 -91
- hanzo_mcp/tools/common/think_tool.py +0 -123
- hanzo_mcp/tools/common/version_tool.py +0 -120
- hanzo_mcp/tools/filesystem/edit_file.py +0 -287
- hanzo_mcp/tools/filesystem/get_file_info.py +0 -170
- hanzo_mcp/tools/filesystem/read_files.py +0 -199
- hanzo_mcp/tools/filesystem/search_content.py +0 -275
- hanzo_mcp/tools/filesystem/write_file.py +0 -162
- hanzo_mcp/tools/jupyter/notebook_operations.py +0 -514
- hanzo_mcp/tools/jupyter/read_notebook.py +0 -165
- hanzo_mcp/tools/project/__init__.py +0 -64
- hanzo_mcp/tools/project/analysis.py +0 -886
- hanzo_mcp/tools/project/base.py +0 -66
- hanzo_mcp/tools/project/project_analyze.py +0 -173
- hanzo_mcp/tools/shell/run_script.py +0 -215
- hanzo_mcp/tools/shell/script_tool.py +0 -244
- hanzo_mcp-0.3.8.dist-info/RECORD +0 -53
- {hanzo_mcp-0.3.8.dist-info → hanzo_mcp-0.5.0.dist-info}/entry_points.txt +0 -0
- {hanzo_mcp-0.3.8.dist-info → hanzo_mcp-0.5.0.dist-info}/licenses/LICENSE +0 -0
- {hanzo_mcp-0.3.8.dist-info → hanzo_mcp-0.5.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,361 @@
|
|
|
1
|
+
"""Project-aware vector database management for Hanzo MCP."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Dict, List, Optional, Set, Tuple, Any
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
import asyncio
|
|
8
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
9
|
+
|
|
10
|
+
from .infinity_store import InfinityVectorStore, SearchResult
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass
|
|
14
|
+
class ProjectInfo:
|
|
15
|
+
"""Information about a detected project."""
|
|
16
|
+
root_path: Path
|
|
17
|
+
llm_md_path: Path
|
|
18
|
+
db_path: Path
|
|
19
|
+
name: str
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class ProjectVectorManager:
|
|
23
|
+
"""Manages project-aware vector databases."""
|
|
24
|
+
|
|
25
|
+
def __init__(
|
|
26
|
+
self,
|
|
27
|
+
global_db_path: Optional[str] = None,
|
|
28
|
+
embedding_model: str = "text-embedding-3-small",
|
|
29
|
+
dimension: int = 1536,
|
|
30
|
+
):
|
|
31
|
+
"""Initialize the project vector manager.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
global_db_path: Path for global vector store (default: ~/.config/hanzo/db)
|
|
35
|
+
embedding_model: Embedding model to use
|
|
36
|
+
dimension: Vector dimension
|
|
37
|
+
"""
|
|
38
|
+
self.embedding_model = embedding_model
|
|
39
|
+
self.dimension = dimension
|
|
40
|
+
|
|
41
|
+
# Set up global database path
|
|
42
|
+
if global_db_path:
|
|
43
|
+
self.global_db_path = Path(global_db_path)
|
|
44
|
+
else:
|
|
45
|
+
from hanzo_mcp.config.settings import get_config_dir
|
|
46
|
+
self.global_db_path = get_config_dir() / "db"
|
|
47
|
+
|
|
48
|
+
self.global_db_path.mkdir(parents=True, exist_ok=True)
|
|
49
|
+
|
|
50
|
+
# Cache for project info and vector stores
|
|
51
|
+
self.projects: Dict[str, ProjectInfo] = {}
|
|
52
|
+
self.vector_stores: Dict[str, InfinityVectorStore] = {}
|
|
53
|
+
self._global_store: Optional[InfinityVectorStore] = None
|
|
54
|
+
|
|
55
|
+
# Thread pool for parallel operations
|
|
56
|
+
self.executor = ThreadPoolExecutor(max_workers=4)
|
|
57
|
+
|
|
58
|
+
def _get_global_store(self) -> InfinityVectorStore:
|
|
59
|
+
"""Get or create the global vector store."""
|
|
60
|
+
if self._global_store is None:
|
|
61
|
+
self._global_store = InfinityVectorStore(
|
|
62
|
+
data_path=str(self.global_db_path),
|
|
63
|
+
embedding_model=self.embedding_model,
|
|
64
|
+
dimension=self.dimension,
|
|
65
|
+
)
|
|
66
|
+
return self._global_store
|
|
67
|
+
|
|
68
|
+
def detect_projects(self, search_paths: List[str]) -> List[ProjectInfo]:
|
|
69
|
+
"""Detect projects by finding LLM.md files.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
search_paths: List of paths to search for projects
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
List of detected project information
|
|
76
|
+
"""
|
|
77
|
+
projects = []
|
|
78
|
+
|
|
79
|
+
for search_path in search_paths:
|
|
80
|
+
path = Path(search_path).resolve()
|
|
81
|
+
|
|
82
|
+
# Search for LLM.md files
|
|
83
|
+
for llm_md_path in path.rglob("LLM.md"):
|
|
84
|
+
project_root = llm_md_path.parent
|
|
85
|
+
project_name = project_root.name
|
|
86
|
+
|
|
87
|
+
# Create .hanzo/db directory in project
|
|
88
|
+
db_path = project_root / ".hanzo" / "db"
|
|
89
|
+
db_path.mkdir(parents=True, exist_ok=True)
|
|
90
|
+
|
|
91
|
+
project_info = ProjectInfo(
|
|
92
|
+
root_path=project_root,
|
|
93
|
+
llm_md_path=llm_md_path,
|
|
94
|
+
db_path=db_path,
|
|
95
|
+
name=project_name,
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
projects.append(project_info)
|
|
99
|
+
|
|
100
|
+
# Cache project info
|
|
101
|
+
project_key = str(project_root)
|
|
102
|
+
self.projects[project_key] = project_info
|
|
103
|
+
|
|
104
|
+
return projects
|
|
105
|
+
|
|
106
|
+
def get_project_for_path(self, file_path: str) -> Optional[ProjectInfo]:
|
|
107
|
+
"""Find the project that contains a given file path.
|
|
108
|
+
|
|
109
|
+
Args:
|
|
110
|
+
file_path: File path to check
|
|
111
|
+
|
|
112
|
+
Returns:
|
|
113
|
+
Project info if found, None otherwise
|
|
114
|
+
"""
|
|
115
|
+
path = Path(file_path).resolve()
|
|
116
|
+
|
|
117
|
+
# Check each known project
|
|
118
|
+
for project_key, project_info in self.projects.items():
|
|
119
|
+
try:
|
|
120
|
+
# Check if path is within project root
|
|
121
|
+
path.relative_to(project_info.root_path)
|
|
122
|
+
return project_info
|
|
123
|
+
except ValueError:
|
|
124
|
+
# Path is not within this project
|
|
125
|
+
continue
|
|
126
|
+
|
|
127
|
+
# Try to find project by walking up the directory tree
|
|
128
|
+
current_path = path.parent if path.is_file() else path
|
|
129
|
+
|
|
130
|
+
while current_path != current_path.parent: # Stop at filesystem root
|
|
131
|
+
llm_md_path = current_path / "LLM.md"
|
|
132
|
+
if llm_md_path.exists():
|
|
133
|
+
# Found a project, create and cache it
|
|
134
|
+
db_path = current_path / ".hanzo" / "db"
|
|
135
|
+
db_path.mkdir(parents=True, exist_ok=True)
|
|
136
|
+
|
|
137
|
+
project_info = ProjectInfo(
|
|
138
|
+
root_path=current_path,
|
|
139
|
+
llm_md_path=llm_md_path,
|
|
140
|
+
db_path=db_path,
|
|
141
|
+
name=current_path.name,
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
project_key = str(current_path)
|
|
145
|
+
self.projects[project_key] = project_info
|
|
146
|
+
return project_info
|
|
147
|
+
|
|
148
|
+
current_path = current_path.parent
|
|
149
|
+
|
|
150
|
+
return None
|
|
151
|
+
|
|
152
|
+
def get_vector_store(self, project_info: Optional[ProjectInfo] = None) -> InfinityVectorStore:
|
|
153
|
+
"""Get vector store for a project or global store.
|
|
154
|
+
|
|
155
|
+
Args:
|
|
156
|
+
project_info: Project to get store for, None for global store
|
|
157
|
+
|
|
158
|
+
Returns:
|
|
159
|
+
Vector store instance
|
|
160
|
+
"""
|
|
161
|
+
if project_info is None:
|
|
162
|
+
return self._get_global_store()
|
|
163
|
+
|
|
164
|
+
project_key = str(project_info.root_path)
|
|
165
|
+
|
|
166
|
+
if project_key not in self.vector_stores:
|
|
167
|
+
self.vector_stores[project_key] = InfinityVectorStore(
|
|
168
|
+
data_path=str(project_info.db_path),
|
|
169
|
+
embedding_model=self.embedding_model,
|
|
170
|
+
dimension=self.dimension,
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
return self.vector_stores[project_key]
|
|
174
|
+
|
|
175
|
+
def add_file_to_appropriate_store(
|
|
176
|
+
self,
|
|
177
|
+
file_path: str,
|
|
178
|
+
chunk_size: int = 1000,
|
|
179
|
+
chunk_overlap: int = 200,
|
|
180
|
+
metadata: Dict[str, Any] = None,
|
|
181
|
+
) -> Tuple[List[str], Optional[ProjectInfo]]:
|
|
182
|
+
"""Add a file to the appropriate vector store (project or global).
|
|
183
|
+
|
|
184
|
+
Args:
|
|
185
|
+
file_path: Path to file to add
|
|
186
|
+
chunk_size: Chunk size for text splitting
|
|
187
|
+
chunk_overlap: Overlap between chunks
|
|
188
|
+
metadata: Additional metadata
|
|
189
|
+
|
|
190
|
+
Returns:
|
|
191
|
+
Tuple of (document IDs, project info or None for global)
|
|
192
|
+
"""
|
|
193
|
+
# Find project for this file
|
|
194
|
+
project_info = self.get_project_for_path(file_path)
|
|
195
|
+
|
|
196
|
+
# Get appropriate vector store
|
|
197
|
+
vector_store = self.get_vector_store(project_info)
|
|
198
|
+
|
|
199
|
+
# Add file metadata
|
|
200
|
+
file_metadata = metadata or {}
|
|
201
|
+
if project_info:
|
|
202
|
+
file_metadata["project_name"] = project_info.name
|
|
203
|
+
file_metadata["project_root"] = str(project_info.root_path)
|
|
204
|
+
else:
|
|
205
|
+
file_metadata["project_name"] = "global"
|
|
206
|
+
|
|
207
|
+
# Add file to store
|
|
208
|
+
doc_ids = vector_store.add_file(
|
|
209
|
+
file_path=file_path,
|
|
210
|
+
chunk_size=chunk_size,
|
|
211
|
+
chunk_overlap=chunk_overlap,
|
|
212
|
+
metadata=file_metadata,
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
return doc_ids, project_info
|
|
216
|
+
|
|
217
|
+
async def search_all_projects(
|
|
218
|
+
self,
|
|
219
|
+
query: str,
|
|
220
|
+
limit_per_project: int = 5,
|
|
221
|
+
score_threshold: float = 0.0,
|
|
222
|
+
include_global: bool = True,
|
|
223
|
+
project_filter: Optional[List[str]] = None,
|
|
224
|
+
) -> Dict[str, List[SearchResult]]:
|
|
225
|
+
"""Search across all projects in parallel.
|
|
226
|
+
|
|
227
|
+
Args:
|
|
228
|
+
query: Search query
|
|
229
|
+
limit_per_project: Maximum results per project
|
|
230
|
+
score_threshold: Minimum similarity score
|
|
231
|
+
include_global: Whether to include global store
|
|
232
|
+
project_filter: List of project names to search (None for all)
|
|
233
|
+
|
|
234
|
+
Returns:
|
|
235
|
+
Dictionary mapping project names to search results
|
|
236
|
+
"""
|
|
237
|
+
search_tasks = []
|
|
238
|
+
project_names = []
|
|
239
|
+
|
|
240
|
+
# Add global store if requested
|
|
241
|
+
if include_global:
|
|
242
|
+
global_store = self._get_global_store()
|
|
243
|
+
search_tasks.append(
|
|
244
|
+
asyncio.get_event_loop().run_in_executor(
|
|
245
|
+
self.executor,
|
|
246
|
+
lambda: global_store.search(query, limit_per_project, score_threshold)
|
|
247
|
+
)
|
|
248
|
+
)
|
|
249
|
+
project_names.append("global")
|
|
250
|
+
|
|
251
|
+
# Add project stores
|
|
252
|
+
for project_key, project_info in self.projects.items():
|
|
253
|
+
# Apply project filter
|
|
254
|
+
if project_filter and project_info.name not in project_filter:
|
|
255
|
+
continue
|
|
256
|
+
|
|
257
|
+
vector_store = self.get_vector_store(project_info)
|
|
258
|
+
search_tasks.append(
|
|
259
|
+
asyncio.get_event_loop().run_in_executor(
|
|
260
|
+
self.executor,
|
|
261
|
+
lambda vs=vector_store: vs.search(query, limit_per_project, score_threshold)
|
|
262
|
+
)
|
|
263
|
+
)
|
|
264
|
+
project_names.append(project_info.name)
|
|
265
|
+
|
|
266
|
+
# Execute all searches in parallel
|
|
267
|
+
results = await asyncio.gather(*search_tasks, return_exceptions=True)
|
|
268
|
+
|
|
269
|
+
# Combine results
|
|
270
|
+
combined_results = {}
|
|
271
|
+
for i, result in enumerate(results):
|
|
272
|
+
project_name = project_names[i]
|
|
273
|
+
if isinstance(result, Exception):
|
|
274
|
+
# Log error but continue
|
|
275
|
+
print(f"Error searching project {project_name}: {result}")
|
|
276
|
+
combined_results[project_name] = []
|
|
277
|
+
else:
|
|
278
|
+
combined_results[project_name] = result
|
|
279
|
+
|
|
280
|
+
return combined_results
|
|
281
|
+
|
|
282
|
+
def search_project_by_path(
|
|
283
|
+
self,
|
|
284
|
+
file_path: str,
|
|
285
|
+
query: str,
|
|
286
|
+
limit: int = 10,
|
|
287
|
+
score_threshold: float = 0.0,
|
|
288
|
+
) -> List[SearchResult]:
|
|
289
|
+
"""Search the project containing a specific file path.
|
|
290
|
+
|
|
291
|
+
Args:
|
|
292
|
+
file_path: File path to determine project
|
|
293
|
+
query: Search query
|
|
294
|
+
limit: Maximum results
|
|
295
|
+
score_threshold: Minimum similarity score
|
|
296
|
+
|
|
297
|
+
Returns:
|
|
298
|
+
Search results from the appropriate project store
|
|
299
|
+
"""
|
|
300
|
+
project_info = self.get_project_for_path(file_path)
|
|
301
|
+
vector_store = self.get_vector_store(project_info)
|
|
302
|
+
|
|
303
|
+
return vector_store.search(
|
|
304
|
+
query=query,
|
|
305
|
+
limit=limit,
|
|
306
|
+
score_threshold=score_threshold,
|
|
307
|
+
)
|
|
308
|
+
|
|
309
|
+
def get_project_stats(self) -> Dict[str, Dict[str, Any]]:
|
|
310
|
+
"""Get statistics for all projects.
|
|
311
|
+
|
|
312
|
+
Returns:
|
|
313
|
+
Dictionary mapping project names to stats
|
|
314
|
+
"""
|
|
315
|
+
stats = {}
|
|
316
|
+
|
|
317
|
+
# Global store stats
|
|
318
|
+
try:
|
|
319
|
+
global_store = self._get_global_store()
|
|
320
|
+
global_files = global_store.list_files()
|
|
321
|
+
stats["global"] = {
|
|
322
|
+
"file_count": len(global_files),
|
|
323
|
+
"db_path": str(self.global_db_path),
|
|
324
|
+
}
|
|
325
|
+
except Exception as e:
|
|
326
|
+
stats["global"] = {"error": str(e)}
|
|
327
|
+
|
|
328
|
+
# Project store stats
|
|
329
|
+
for project_key, project_info in self.projects.items():
|
|
330
|
+
try:
|
|
331
|
+
vector_store = self.get_vector_store(project_info)
|
|
332
|
+
project_files = vector_store.list_files()
|
|
333
|
+
stats[project_info.name] = {
|
|
334
|
+
"file_count": len(project_files),
|
|
335
|
+
"db_path": str(project_info.db_path),
|
|
336
|
+
"root_path": str(project_info.root_path),
|
|
337
|
+
"llm_md_exists": project_info.llm_md_path.exists(),
|
|
338
|
+
}
|
|
339
|
+
except Exception as e:
|
|
340
|
+
stats[project_info.name] = {"error": str(e)}
|
|
341
|
+
|
|
342
|
+
return stats
|
|
343
|
+
|
|
344
|
+
def cleanup(self):
|
|
345
|
+
"""Close all vector stores and cleanup resources."""
|
|
346
|
+
# Close all project stores
|
|
347
|
+
for vector_store in self.vector_stores.values():
|
|
348
|
+
try:
|
|
349
|
+
vector_store.close()
|
|
350
|
+
except:
|
|
351
|
+
pass
|
|
352
|
+
|
|
353
|
+
# Close global store
|
|
354
|
+
if self._global_store:
|
|
355
|
+
try:
|
|
356
|
+
self._global_store.close()
|
|
357
|
+
except:
|
|
358
|
+
pass
|
|
359
|
+
|
|
360
|
+
# Shutdown executor
|
|
361
|
+
self.executor.shutdown(wait=False)
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
"""Vector indexing tool for adding documents to vector database."""
|
|
2
|
+
|
|
3
|
+
from typing import Dict, List, Optional, TypedDict, Unpack, final
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from fastmcp import Context as MCPContext
|
|
7
|
+
from pydantic import Field
|
|
8
|
+
|
|
9
|
+
from hanzo_mcp.tools.common.base import BaseTool
|
|
10
|
+
from hanzo_mcp.tools.common.permissions import PermissionManager
|
|
11
|
+
from hanzo_mcp.tools.common.validation import validate_path_access
|
|
12
|
+
|
|
13
|
+
from .infinity_store import InfinityVectorStore
|
|
14
|
+
from .project_manager import ProjectVectorManager
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class VectorIndexParams(TypedDict, total=False):
|
|
18
|
+
"""Parameters for vector indexing operations."""
|
|
19
|
+
|
|
20
|
+
file_path: str
|
|
21
|
+
content: Optional[str]
|
|
22
|
+
chunk_size: Optional[int]
|
|
23
|
+
chunk_overlap: Optional[int]
|
|
24
|
+
metadata: Optional[Dict[str, str]]
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@final
|
|
28
|
+
class VectorIndexTool(BaseTool):
|
|
29
|
+
"""Tool for indexing documents in the vector database."""
|
|
30
|
+
|
|
31
|
+
def __init__(self, permission_manager: PermissionManager, project_manager: ProjectVectorManager):
|
|
32
|
+
"""Initialize the vector index tool.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
permission_manager: Permission manager for access control
|
|
36
|
+
project_manager: Project-aware vector store manager
|
|
37
|
+
"""
|
|
38
|
+
self.permission_manager = permission_manager
|
|
39
|
+
self.project_manager = project_manager
|
|
40
|
+
|
|
41
|
+
@property
|
|
42
|
+
def name(self) -> str:
|
|
43
|
+
"""Get the tool name."""
|
|
44
|
+
return "vector_index"
|
|
45
|
+
|
|
46
|
+
@property
|
|
47
|
+
def description(self) -> str:
|
|
48
|
+
"""Get the tool description."""
|
|
49
|
+
return """Index documents in project-aware vector databases for semantic search.
|
|
50
|
+
|
|
51
|
+
Can index individual text content or entire files. Files are automatically assigned
|
|
52
|
+
to the appropriate project database based on LLM.md detection or stored in the global
|
|
53
|
+
database. Files are chunked for optimal search performance.
|
|
54
|
+
|
|
55
|
+
Projects are detected by finding LLM.md files, with databases stored in .hanzo/db
|
|
56
|
+
directories alongside them. Use this to build searchable knowledge bases per project."""
|
|
57
|
+
|
|
58
|
+
async def call(
|
|
59
|
+
self,
|
|
60
|
+
ctx: MCPContext,
|
|
61
|
+
**params: Unpack[VectorIndexParams],
|
|
62
|
+
) -> str:
|
|
63
|
+
"""Index content or files in the vector database.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
ctx: MCP context
|
|
67
|
+
**params: Tool parameters
|
|
68
|
+
|
|
69
|
+
Returns:
|
|
70
|
+
Indexing result message
|
|
71
|
+
"""
|
|
72
|
+
file_path = params.get("file_path")
|
|
73
|
+
content = params.get("content")
|
|
74
|
+
chunk_size = params.get("chunk_size", 1000)
|
|
75
|
+
chunk_overlap = params.get("chunk_overlap", 200)
|
|
76
|
+
metadata = params.get("metadata", {})
|
|
77
|
+
|
|
78
|
+
if not file_path and not content:
|
|
79
|
+
return "Error: Either file_path or content must be provided"
|
|
80
|
+
|
|
81
|
+
try:
|
|
82
|
+
if file_path:
|
|
83
|
+
# Validate file access
|
|
84
|
+
file_access_result = validate_path_access(
|
|
85
|
+
self.permission_manager, file_path, require_existence=True
|
|
86
|
+
)
|
|
87
|
+
if not file_access_result.allowed:
|
|
88
|
+
return f"Error: {file_access_result.reason}"
|
|
89
|
+
|
|
90
|
+
# Index file using project-aware manager
|
|
91
|
+
doc_ids, project_info = self.project_manager.add_file_to_appropriate_store(
|
|
92
|
+
file_path=file_path,
|
|
93
|
+
chunk_size=chunk_size,
|
|
94
|
+
chunk_overlap=chunk_overlap,
|
|
95
|
+
metadata=metadata,
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
file_name = Path(file_path).name
|
|
99
|
+
if project_info:
|
|
100
|
+
return f"Successfully indexed {file_name} with {len(doc_ids)} chunks in project '{project_info.name}'"
|
|
101
|
+
else:
|
|
102
|
+
return f"Successfully indexed {file_name} with {len(doc_ids)} chunks in global database"
|
|
103
|
+
|
|
104
|
+
else:
|
|
105
|
+
# Index content directly in global store (no project context)
|
|
106
|
+
global_store = self.project_manager._get_global_store()
|
|
107
|
+
doc_id = global_store.add_document(
|
|
108
|
+
content=content,
|
|
109
|
+
metadata=metadata,
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
return f"Successfully indexed content as document {doc_id} in global database"
|
|
113
|
+
|
|
114
|
+
except Exception as e:
|
|
115
|
+
return f"Error indexing content: {str(e)}"
|
|
@@ -0,0 +1,215 @@
|
|
|
1
|
+
"""Vector search tool for semantic document retrieval."""
|
|
2
|
+
|
|
3
|
+
from typing import Dict, List, Optional, TypedDict, Unpack, final
|
|
4
|
+
import json
|
|
5
|
+
import asyncio
|
|
6
|
+
|
|
7
|
+
from fastmcp import Context as MCPContext
|
|
8
|
+
from pydantic import Field
|
|
9
|
+
|
|
10
|
+
from hanzo_mcp.tools.common.base import BaseTool
|
|
11
|
+
from hanzo_mcp.tools.common.permissions import PermissionManager
|
|
12
|
+
|
|
13
|
+
from .infinity_store import InfinityVectorStore
|
|
14
|
+
from .project_manager import ProjectVectorManager
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class VectorSearchParams(TypedDict, total=False):
|
|
18
|
+
"""Parameters for vector search operations."""
|
|
19
|
+
|
|
20
|
+
query: str
|
|
21
|
+
limit: Optional[int]
|
|
22
|
+
score_threshold: Optional[float]
|
|
23
|
+
include_content: Optional[bool]
|
|
24
|
+
file_filter: Optional[str]
|
|
25
|
+
project_filter: Optional[List[str]]
|
|
26
|
+
search_scope: Optional[str] # "all", "global", "current", or specific project name
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@final
|
|
30
|
+
class VectorSearchTool(BaseTool):
|
|
31
|
+
"""Tool for semantic search in the vector database."""
|
|
32
|
+
|
|
33
|
+
def __init__(self, permission_manager: PermissionManager, project_manager: ProjectVectorManager):
|
|
34
|
+
"""Initialize the vector search tool.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
permission_manager: Permission manager for access control
|
|
38
|
+
project_manager: Project-aware vector store manager
|
|
39
|
+
"""
|
|
40
|
+
self.permission_manager = permission_manager
|
|
41
|
+
self.project_manager = project_manager
|
|
42
|
+
|
|
43
|
+
@property
|
|
44
|
+
def name(self) -> str:
|
|
45
|
+
"""Get the tool name."""
|
|
46
|
+
return "vector_search"
|
|
47
|
+
|
|
48
|
+
@property
|
|
49
|
+
def description(self) -> str:
|
|
50
|
+
"""Get the tool description."""
|
|
51
|
+
return """Search for documents using semantic similarity across project-aware vector databases.
|
|
52
|
+
|
|
53
|
+
Performs intelligent text search that understands meaning and context, not just keywords.
|
|
54
|
+
Can search across all projects, specific projects, or just the global database. Projects are
|
|
55
|
+
automatically detected based on LLM.md files.
|
|
56
|
+
|
|
57
|
+
Returns ranked results with similarity scores, project context, and document metadata."""
|
|
58
|
+
|
|
59
|
+
async def call(
|
|
60
|
+
self,
|
|
61
|
+
ctx: MCPContext,
|
|
62
|
+
**params: Unpack[VectorSearchParams],
|
|
63
|
+
) -> str:
|
|
64
|
+
"""Search for similar documents in the vector database.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
ctx: MCP context
|
|
68
|
+
**params: Tool parameters
|
|
69
|
+
|
|
70
|
+
Returns:
|
|
71
|
+
Search results formatted as text
|
|
72
|
+
"""
|
|
73
|
+
query = params.get("query")
|
|
74
|
+
if not query:
|
|
75
|
+
return "Error: query parameter is required"
|
|
76
|
+
|
|
77
|
+
limit = params.get("limit", 10)
|
|
78
|
+
score_threshold = params.get("score_threshold", 0.0)
|
|
79
|
+
include_content = params.get("include_content", True)
|
|
80
|
+
file_filter = params.get("file_filter")
|
|
81
|
+
project_filter = params.get("project_filter")
|
|
82
|
+
search_scope = params.get("search_scope", "all")
|
|
83
|
+
|
|
84
|
+
try:
|
|
85
|
+
# Determine search strategy based on scope
|
|
86
|
+
if search_scope == "all":
|
|
87
|
+
# Search across all projects
|
|
88
|
+
project_results = await self.project_manager.search_all_projects(
|
|
89
|
+
query=query,
|
|
90
|
+
limit_per_project=limit,
|
|
91
|
+
score_threshold=score_threshold,
|
|
92
|
+
include_global=True,
|
|
93
|
+
project_filter=project_filter,
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
# Combine and sort all results
|
|
97
|
+
all_results = []
|
|
98
|
+
for project_name, results in project_results.items():
|
|
99
|
+
for result in results:
|
|
100
|
+
# Add project info to metadata
|
|
101
|
+
result.document.metadata = result.document.metadata or {}
|
|
102
|
+
result.document.metadata["search_project"] = project_name
|
|
103
|
+
all_results.append(result)
|
|
104
|
+
|
|
105
|
+
# Sort by score and limit
|
|
106
|
+
all_results.sort(key=lambda x: x.score, reverse=True)
|
|
107
|
+
results = all_results[:limit]
|
|
108
|
+
|
|
109
|
+
elif search_scope == "global":
|
|
110
|
+
# Search only global store
|
|
111
|
+
global_store = self.project_manager._get_global_store()
|
|
112
|
+
results = global_store.search(
|
|
113
|
+
query=query,
|
|
114
|
+
limit=limit,
|
|
115
|
+
score_threshold=score_threshold,
|
|
116
|
+
)
|
|
117
|
+
for result in results:
|
|
118
|
+
result.document.metadata = result.document.metadata or {}
|
|
119
|
+
result.document.metadata["search_project"] = "global"
|
|
120
|
+
|
|
121
|
+
else:
|
|
122
|
+
# Search specific project or current context
|
|
123
|
+
if search_scope != "current":
|
|
124
|
+
# Search specific project by name
|
|
125
|
+
project_info = None
|
|
126
|
+
for proj_key, proj_info in self.project_manager.projects.items():
|
|
127
|
+
if proj_info.name == search_scope:
|
|
128
|
+
project_info = proj_info
|
|
129
|
+
break
|
|
130
|
+
|
|
131
|
+
if project_info:
|
|
132
|
+
vector_store = self.project_manager.get_vector_store(project_info)
|
|
133
|
+
results = vector_store.search(
|
|
134
|
+
query=query,
|
|
135
|
+
limit=limit,
|
|
136
|
+
score_threshold=score_threshold,
|
|
137
|
+
)
|
|
138
|
+
for result in results:
|
|
139
|
+
result.document.metadata = result.document.metadata or {}
|
|
140
|
+
result.document.metadata["search_project"] = project_info.name
|
|
141
|
+
else:
|
|
142
|
+
return f"Project '{search_scope}' not found"
|
|
143
|
+
else:
|
|
144
|
+
# For "current", try to detect from working directory
|
|
145
|
+
import os
|
|
146
|
+
current_dir = os.getcwd()
|
|
147
|
+
project_info = self.project_manager.get_project_for_path(current_dir)
|
|
148
|
+
|
|
149
|
+
if project_info:
|
|
150
|
+
vector_store = self.project_manager.get_vector_store(project_info)
|
|
151
|
+
results = vector_store.search(
|
|
152
|
+
query=query,
|
|
153
|
+
limit=limit,
|
|
154
|
+
score_threshold=score_threshold,
|
|
155
|
+
)
|
|
156
|
+
for result in results:
|
|
157
|
+
result.document.metadata = result.document.metadata or {}
|
|
158
|
+
result.document.metadata["search_project"] = project_info.name
|
|
159
|
+
else:
|
|
160
|
+
# Fall back to global store
|
|
161
|
+
global_store = self.project_manager._get_global_store()
|
|
162
|
+
results = global_store.search(
|
|
163
|
+
query=query,
|
|
164
|
+
limit=limit,
|
|
165
|
+
score_threshold=score_threshold,
|
|
166
|
+
)
|
|
167
|
+
for result in results:
|
|
168
|
+
result.document.metadata = result.document.metadata or {}
|
|
169
|
+
result.document.metadata["search_project"] = "global"
|
|
170
|
+
|
|
171
|
+
if not results:
|
|
172
|
+
return f"No results found for query: '{query}'"
|
|
173
|
+
|
|
174
|
+
# Filter by file if requested
|
|
175
|
+
if file_filter:
|
|
176
|
+
results = [r for r in results if file_filter in (r.document.file_path or "")]
|
|
177
|
+
|
|
178
|
+
# Format results
|
|
179
|
+
output_lines = [f"Found {len(results)} results for query: '{query}'\n"]
|
|
180
|
+
|
|
181
|
+
for i, result in enumerate(results, 1):
|
|
182
|
+
doc = result.document
|
|
183
|
+
score_percent = result.score * 100
|
|
184
|
+
|
|
185
|
+
# Header with score and metadata
|
|
186
|
+
project_name = doc.metadata.get("search_project", "unknown")
|
|
187
|
+
header = f"Result {i} (Score: {score_percent:.1f}%) - Project: {project_name}"
|
|
188
|
+
if doc.file_path:
|
|
189
|
+
header += f" - {doc.file_path}"
|
|
190
|
+
if doc.chunk_index is not None:
|
|
191
|
+
header += f" [Chunk {doc.chunk_index}]"
|
|
192
|
+
|
|
193
|
+
output_lines.append(header)
|
|
194
|
+
output_lines.append("-" * len(header))
|
|
195
|
+
|
|
196
|
+
# Add metadata if available
|
|
197
|
+
if doc.metadata:
|
|
198
|
+
relevant_metadata = {k: v for k, v in doc.metadata.items()
|
|
199
|
+
if k not in ['chunk_number', 'total_chunks', 'search_project']}
|
|
200
|
+
if relevant_metadata:
|
|
201
|
+
output_lines.append(f"Metadata: {json.dumps(relevant_metadata, indent=2)}")
|
|
202
|
+
|
|
203
|
+
# Add content if requested
|
|
204
|
+
if include_content:
|
|
205
|
+
content = doc.content
|
|
206
|
+
if len(content) > 500:
|
|
207
|
+
content = content[:500] + "..."
|
|
208
|
+
output_lines.append(f"Content:\n{content}")
|
|
209
|
+
|
|
210
|
+
output_lines.append("") # Empty line between results
|
|
211
|
+
|
|
212
|
+
return "\n".join(output_lines)
|
|
213
|
+
|
|
214
|
+
except Exception as e:
|
|
215
|
+
return f"Error searching vector database: {str(e)}"
|