hanzo-mcp 0.5.0__py3-none-any.whl → 0.5.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hanzo-mcp might be problematic. Click here for more details.
- hanzo_mcp/__init__.py +1 -1
- hanzo_mcp/config/settings.py +61 -0
- hanzo_mcp/tools/__init__.py +158 -12
- hanzo_mcp/tools/common/base.py +7 -2
- hanzo_mcp/tools/common/config_tool.py +396 -0
- hanzo_mcp/tools/common/stats.py +261 -0
- hanzo_mcp/tools/common/tool_disable.py +144 -0
- hanzo_mcp/tools/common/tool_enable.py +182 -0
- hanzo_mcp/tools/common/tool_list.py +263 -0
- hanzo_mcp/tools/database/__init__.py +71 -0
- hanzo_mcp/tools/database/database_manager.py +246 -0
- hanzo_mcp/tools/database/graph_add.py +257 -0
- hanzo_mcp/tools/database/graph_query.py +536 -0
- hanzo_mcp/tools/database/graph_remove.py +267 -0
- hanzo_mcp/tools/database/graph_search.py +348 -0
- hanzo_mcp/tools/database/graph_stats.py +345 -0
- hanzo_mcp/tools/database/sql_query.py +229 -0
- hanzo_mcp/tools/database/sql_search.py +296 -0
- hanzo_mcp/tools/database/sql_stats.py +254 -0
- hanzo_mcp/tools/editor/__init__.py +11 -0
- hanzo_mcp/tools/editor/neovim_command.py +272 -0
- hanzo_mcp/tools/editor/neovim_edit.py +290 -0
- hanzo_mcp/tools/editor/neovim_session.py +356 -0
- hanzo_mcp/tools/filesystem/__init__.py +20 -1
- hanzo_mcp/tools/filesystem/batch_search.py +812 -0
- hanzo_mcp/tools/filesystem/find_files.py +348 -0
- hanzo_mcp/tools/filesystem/git_search.py +505 -0
- hanzo_mcp/tools/llm/__init__.py +27 -0
- hanzo_mcp/tools/llm/consensus_tool.py +351 -0
- hanzo_mcp/tools/llm/llm_manage.py +413 -0
- hanzo_mcp/tools/llm/llm_tool.py +346 -0
- hanzo_mcp/tools/llm/provider_tools.py +412 -0
- hanzo_mcp/tools/mcp/__init__.py +11 -0
- hanzo_mcp/tools/mcp/mcp_add.py +263 -0
- hanzo_mcp/tools/mcp/mcp_remove.py +127 -0
- hanzo_mcp/tools/mcp/mcp_stats.py +165 -0
- hanzo_mcp/tools/shell/__init__.py +27 -7
- hanzo_mcp/tools/shell/logs.py +265 -0
- hanzo_mcp/tools/shell/npx.py +194 -0
- hanzo_mcp/tools/shell/npx_background.py +254 -0
- hanzo_mcp/tools/shell/pkill.py +262 -0
- hanzo_mcp/tools/shell/processes.py +279 -0
- hanzo_mcp/tools/shell/run_background.py +326 -0
- hanzo_mcp/tools/shell/uvx.py +187 -0
- hanzo_mcp/tools/shell/uvx_background.py +249 -0
- hanzo_mcp/tools/vector/__init__.py +21 -12
- hanzo_mcp/tools/vector/ast_analyzer.py +459 -0
- hanzo_mcp/tools/vector/git_ingester.py +485 -0
- hanzo_mcp/tools/vector/index_tool.py +358 -0
- hanzo_mcp/tools/vector/infinity_store.py +465 -1
- hanzo_mcp/tools/vector/mock_infinity.py +162 -0
- hanzo_mcp/tools/vector/vector_index.py +7 -6
- hanzo_mcp/tools/vector/vector_search.py +22 -7
- {hanzo_mcp-0.5.0.dist-info → hanzo_mcp-0.5.2.dist-info}/METADATA +68 -20
- hanzo_mcp-0.5.2.dist-info/RECORD +106 -0
- hanzo_mcp-0.5.0.dist-info/RECORD +0 -63
- {hanzo_mcp-0.5.0.dist-info → hanzo_mcp-0.5.2.dist-info}/WHEEL +0 -0
- {hanzo_mcp-0.5.0.dist-info → hanzo_mcp-0.5.2.dist-info}/entry_points.txt +0 -0
- {hanzo_mcp-0.5.0.dist-info → hanzo_mcp-0.5.2.dist-info}/licenses/LICENSE +0 -0
- {hanzo_mcp-0.5.0.dist-info → hanzo_mcp-0.5.2.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,358 @@
|
|
|
1
|
+
"""Index tool for managing vector store indexing."""
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import os
|
|
5
|
+
import time
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Annotated, TypedDict, Unpack, final, override
|
|
8
|
+
|
|
9
|
+
from fastmcp import Context as MCPContext
|
|
10
|
+
from pydantic import Field
|
|
11
|
+
|
|
12
|
+
from hanzo_mcp.tools.common.base import BaseTool
|
|
13
|
+
from hanzo_mcp.tools.common.context import create_tool_context
|
|
14
|
+
from hanzo_mcp.tools.common.permissions import PermissionManager
|
|
15
|
+
from hanzo_mcp.tools.vector.git_ingester import GitIngester
|
|
16
|
+
from hanzo_mcp.tools.vector.infinity_store import InfinityVectorStore
|
|
17
|
+
from hanzo_mcp.tools.vector.project_manager import ProjectVectorManager
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
Path_str = Annotated[
|
|
21
|
+
str,
|
|
22
|
+
Field(
|
|
23
|
+
description="Path to index (defaults to current working directory)",
|
|
24
|
+
min_length=1,
|
|
25
|
+
),
|
|
26
|
+
]
|
|
27
|
+
|
|
28
|
+
IncludeGitHistory = Annotated[
|
|
29
|
+
bool,
|
|
30
|
+
Field(
|
|
31
|
+
description="Include git history in the index",
|
|
32
|
+
default=True,
|
|
33
|
+
),
|
|
34
|
+
]
|
|
35
|
+
|
|
36
|
+
FilePatterns = Annotated[
|
|
37
|
+
list[str] | None,
|
|
38
|
+
Field(
|
|
39
|
+
description="File patterns to include (e.g., ['*.py', '*.js'])",
|
|
40
|
+
default=None,
|
|
41
|
+
),
|
|
42
|
+
]
|
|
43
|
+
|
|
44
|
+
ShowStats = Annotated[
|
|
45
|
+
bool,
|
|
46
|
+
Field(
|
|
47
|
+
description="Show detailed statistics after indexing",
|
|
48
|
+
default=True,
|
|
49
|
+
),
|
|
50
|
+
]
|
|
51
|
+
|
|
52
|
+
Force = Annotated[
|
|
53
|
+
bool,
|
|
54
|
+
Field(
|
|
55
|
+
description="Force re-indexing even if already indexed",
|
|
56
|
+
default=False,
|
|
57
|
+
),
|
|
58
|
+
]
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class IndexToolParams(TypedDict, total=False):
|
|
62
|
+
"""Parameters for the index tool."""
|
|
63
|
+
|
|
64
|
+
path: str
|
|
65
|
+
include_git_history: bool
|
|
66
|
+
file_patterns: list[str] | None
|
|
67
|
+
show_stats: bool
|
|
68
|
+
force: bool
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
@final
|
|
72
|
+
class IndexTool(BaseTool):
|
|
73
|
+
"""Tool for indexing files and git history into vector store."""
|
|
74
|
+
|
|
75
|
+
def __init__(self, permission_manager: PermissionManager):
|
|
76
|
+
"""Initialize the index tool.
|
|
77
|
+
|
|
78
|
+
Args:
|
|
79
|
+
permission_manager: Permission manager for access control
|
|
80
|
+
"""
|
|
81
|
+
self.permission_manager = permission_manager
|
|
82
|
+
self.project_manager = ProjectVectorManager(permission_manager)
|
|
83
|
+
|
|
84
|
+
@property
|
|
85
|
+
@override
|
|
86
|
+
def name(self) -> str:
|
|
87
|
+
"""Get the tool name."""
|
|
88
|
+
return "index"
|
|
89
|
+
|
|
90
|
+
@property
|
|
91
|
+
@override
|
|
92
|
+
def description(self) -> str:
|
|
93
|
+
"""Get the tool description."""
|
|
94
|
+
return """Index files and git history into the vector store for semantic search.
|
|
95
|
+
|
|
96
|
+
This tool:
|
|
97
|
+
- Indexes all project files into a vector database
|
|
98
|
+
- Includes git history (commits, diffs, blame) when available
|
|
99
|
+
- Supports incremental updates
|
|
100
|
+
- Shows statistics about indexed content
|
|
101
|
+
- Automatically creates project-specific databases
|
|
102
|
+
|
|
103
|
+
Usage:
|
|
104
|
+
- index: Index the current directory
|
|
105
|
+
- index --path /path/to/project: Index a specific path
|
|
106
|
+
- index --file-patterns "*.py" "*.js": Index only specific file types
|
|
107
|
+
- index --no-git-history: Skip git history indexing
|
|
108
|
+
- index --force: Force re-indexing of all files"""
|
|
109
|
+
|
|
110
|
+
@override
|
|
111
|
+
async def call(
|
|
112
|
+
self,
|
|
113
|
+
ctx: MCPContext,
|
|
114
|
+
**params: Unpack[IndexToolParams],
|
|
115
|
+
) -> str:
|
|
116
|
+
"""Execute the index tool.
|
|
117
|
+
|
|
118
|
+
Args:
|
|
119
|
+
ctx: MCP context
|
|
120
|
+
**params: Tool parameters
|
|
121
|
+
|
|
122
|
+
Returns:
|
|
123
|
+
Indexing result and statistics
|
|
124
|
+
"""
|
|
125
|
+
start_time = time.time()
|
|
126
|
+
tool_ctx = create_tool_context(ctx)
|
|
127
|
+
await tool_ctx.set_tool_info(self.name)
|
|
128
|
+
|
|
129
|
+
# Extract parameters
|
|
130
|
+
path = params.get("path", os.getcwd())
|
|
131
|
+
include_git_history = params.get("include_git_history", True)
|
|
132
|
+
file_patterns = params.get("file_patterns")
|
|
133
|
+
show_stats = params.get("show_stats", True)
|
|
134
|
+
force = params.get("force", False)
|
|
135
|
+
|
|
136
|
+
# Resolve absolute path
|
|
137
|
+
abs_path = os.path.abspath(path)
|
|
138
|
+
|
|
139
|
+
# Check permissions
|
|
140
|
+
if not self.permission_manager.has_permission(abs_path):
|
|
141
|
+
return f"Permission denied: {abs_path}"
|
|
142
|
+
|
|
143
|
+
# Check if path exists
|
|
144
|
+
if not os.path.exists(abs_path):
|
|
145
|
+
return f"Path does not exist: {abs_path}"
|
|
146
|
+
|
|
147
|
+
await tool_ctx.info(f"Starting indexing of {abs_path}")
|
|
148
|
+
|
|
149
|
+
try:
|
|
150
|
+
# Get or create vector store for this project
|
|
151
|
+
vector_store = self.project_manager.get_project_store(abs_path)
|
|
152
|
+
|
|
153
|
+
# Check if already indexed (unless force)
|
|
154
|
+
if not force:
|
|
155
|
+
stats = await vector_store.get_stats()
|
|
156
|
+
if stats and stats.get("document_count", 0) > 0:
|
|
157
|
+
await tool_ctx.info("Project already indexed, use --force to re-index")
|
|
158
|
+
if show_stats:
|
|
159
|
+
return self._format_stats(stats, abs_path, time.time() - start_time)
|
|
160
|
+
return "Project is already indexed. Use --force to re-index."
|
|
161
|
+
|
|
162
|
+
# Prepare file patterns
|
|
163
|
+
if file_patterns is None:
|
|
164
|
+
# Default patterns for code files
|
|
165
|
+
file_patterns = [
|
|
166
|
+
"*.py", "*.js", "*.ts", "*.jsx", "*.tsx",
|
|
167
|
+
"*.java", "*.cpp", "*.c", "*.h", "*.hpp",
|
|
168
|
+
"*.go", "*.rs", "*.rb", "*.php", "*.swift",
|
|
169
|
+
"*.kt", "*.scala", "*.cs", "*.vb", "*.fs",
|
|
170
|
+
"*.sh", "*.bash", "*.zsh", "*.fish",
|
|
171
|
+
"*.md", "*.rst", "*.txt", "*.json", "*.yaml", "*.yml",
|
|
172
|
+
"*.toml", "*.ini", "*.cfg", "*.conf",
|
|
173
|
+
"*.html", "*.css", "*.scss", "*.sass", "*.less",
|
|
174
|
+
"*.sql", "*.graphql", "*.proto",
|
|
175
|
+
"Dockerfile", "Makefile", "*.mk",
|
|
176
|
+
".gitignore", ".dockerignore", "requirements.txt",
|
|
177
|
+
"package.json", "Cargo.toml", "go.mod", "pom.xml",
|
|
178
|
+
]
|
|
179
|
+
|
|
180
|
+
# Clear existing index if force
|
|
181
|
+
if force:
|
|
182
|
+
await tool_ctx.info("Clearing existing index...")
|
|
183
|
+
await vector_store.clear()
|
|
184
|
+
|
|
185
|
+
# Index files
|
|
186
|
+
await tool_ctx.info("Indexing files...")
|
|
187
|
+
indexed_files = 0
|
|
188
|
+
total_size = 0
|
|
189
|
+
errors = []
|
|
190
|
+
|
|
191
|
+
for pattern in file_patterns:
|
|
192
|
+
pattern_files = await self._find_files(abs_path, pattern)
|
|
193
|
+
for file_path in pattern_files:
|
|
194
|
+
try:
|
|
195
|
+
# Check file size (skip very large files)
|
|
196
|
+
file_size = os.path.getsize(file_path)
|
|
197
|
+
if file_size > 10 * 1024 * 1024: # 10MB
|
|
198
|
+
await tool_ctx.warning(f"Skipping large file: {file_path}")
|
|
199
|
+
continue
|
|
200
|
+
|
|
201
|
+
# Read file content
|
|
202
|
+
try:
|
|
203
|
+
with open(file_path, "r", encoding="utf-8") as f:
|
|
204
|
+
content = f.read()
|
|
205
|
+
except UnicodeDecodeError:
|
|
206
|
+
# Skip binary files
|
|
207
|
+
continue
|
|
208
|
+
|
|
209
|
+
# Index the file
|
|
210
|
+
rel_path = os.path.relpath(file_path, abs_path)
|
|
211
|
+
await vector_store.index_document(
|
|
212
|
+
content=content,
|
|
213
|
+
metadata={
|
|
214
|
+
"type": "file",
|
|
215
|
+
"path": rel_path,
|
|
216
|
+
"absolute_path": file_path,
|
|
217
|
+
"size": file_size,
|
|
218
|
+
"extension": Path(file_path).suffix,
|
|
219
|
+
}
|
|
220
|
+
)
|
|
221
|
+
indexed_files += 1
|
|
222
|
+
total_size += file_size
|
|
223
|
+
|
|
224
|
+
if indexed_files % 100 == 0:
|
|
225
|
+
await tool_ctx.info(f"Indexed {indexed_files} files...")
|
|
226
|
+
|
|
227
|
+
except Exception as e:
|
|
228
|
+
errors.append(f"{file_path}: {str(e)}")
|
|
229
|
+
|
|
230
|
+
await tool_ctx.info(f"Indexed {indexed_files} files ({total_size / 1024 / 1024:.1f} MB)")
|
|
231
|
+
|
|
232
|
+
# Index git history if requested
|
|
233
|
+
git_stats = {}
|
|
234
|
+
if include_git_history and os.path.exists(os.path.join(abs_path, ".git")):
|
|
235
|
+
await tool_ctx.info("Indexing git history...")
|
|
236
|
+
|
|
237
|
+
git_ingester = GitIngester(vector_store)
|
|
238
|
+
git_stats = await git_ingester.ingest_repository(
|
|
239
|
+
repo_path=abs_path,
|
|
240
|
+
include_history=True,
|
|
241
|
+
include_diffs=True,
|
|
242
|
+
include_blame=True,
|
|
243
|
+
file_patterns=file_patterns,
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
await tool_ctx.info(
|
|
247
|
+
f"Indexed {git_stats.get('commits_indexed', 0)} commits, "
|
|
248
|
+
f"{git_stats.get('diffs_indexed', 0)} diffs"
|
|
249
|
+
)
|
|
250
|
+
|
|
251
|
+
# Get final statistics
|
|
252
|
+
if show_stats:
|
|
253
|
+
stats = await vector_store.get_stats()
|
|
254
|
+
stats.update({
|
|
255
|
+
"files_indexed": indexed_files,
|
|
256
|
+
"total_size_mb": total_size / 1024 / 1024,
|
|
257
|
+
"errors": len(errors),
|
|
258
|
+
**git_stats,
|
|
259
|
+
})
|
|
260
|
+
result = self._format_stats(stats, abs_path, time.time() - start_time)
|
|
261
|
+
|
|
262
|
+
if errors:
|
|
263
|
+
result += f"\n\nErrors ({len(errors)}):\n"
|
|
264
|
+
result += "\n".join(errors[:10]) # Show first 10 errors
|
|
265
|
+
if len(errors) > 10:
|
|
266
|
+
result += f"\n... and {len(errors) - 10} more errors"
|
|
267
|
+
|
|
268
|
+
return result
|
|
269
|
+
else:
|
|
270
|
+
return f"Successfully indexed {indexed_files} files"
|
|
271
|
+
|
|
272
|
+
except Exception as e:
|
|
273
|
+
await tool_ctx.error(f"Indexing failed: {str(e)}")
|
|
274
|
+
return f"Error during indexing: {str(e)}"
|
|
275
|
+
|
|
276
|
+
async def _find_files(self, base_path: str, pattern: str) -> list[str]:
|
|
277
|
+
"""Find files matching a pattern.
|
|
278
|
+
|
|
279
|
+
Args:
|
|
280
|
+
base_path: Base directory to search
|
|
281
|
+
pattern: File pattern to match
|
|
282
|
+
|
|
283
|
+
Returns:
|
|
284
|
+
List of matching file paths
|
|
285
|
+
"""
|
|
286
|
+
import glob
|
|
287
|
+
|
|
288
|
+
# Use glob to find files
|
|
289
|
+
if pattern.startswith("*."):
|
|
290
|
+
# Extension pattern
|
|
291
|
+
files = glob.glob(
|
|
292
|
+
os.path.join(base_path, "**", pattern),
|
|
293
|
+
recursive=True,
|
|
294
|
+
)
|
|
295
|
+
else:
|
|
296
|
+
# Exact filename
|
|
297
|
+
files = glob.glob(
|
|
298
|
+
os.path.join(base_path, "**", pattern),
|
|
299
|
+
recursive=True,
|
|
300
|
+
)
|
|
301
|
+
|
|
302
|
+
# Filter out hidden directories and common ignore patterns
|
|
303
|
+
filtered_files = []
|
|
304
|
+
ignore_dirs = {".git", "__pycache__", "node_modules", ".venv", "venv", "dist", "build"}
|
|
305
|
+
|
|
306
|
+
for file_path in files:
|
|
307
|
+
# Check if any parent directory is in ignore list
|
|
308
|
+
parts = Path(file_path).parts
|
|
309
|
+
if any(part in ignore_dirs for part in parts):
|
|
310
|
+
continue
|
|
311
|
+
if any(part.startswith(".") and part != "." for part in parts[:-1]):
|
|
312
|
+
continue # Skip hidden directories (but allow hidden files like .gitignore)
|
|
313
|
+
filtered_files.append(file_path)
|
|
314
|
+
|
|
315
|
+
return filtered_files
|
|
316
|
+
|
|
317
|
+
def _format_stats(self, stats: dict, path: str, elapsed_time: float) -> str:
|
|
318
|
+
"""Format statistics for display.
|
|
319
|
+
|
|
320
|
+
Args:
|
|
321
|
+
stats: Statistics dictionary
|
|
322
|
+
path: Indexed path
|
|
323
|
+
elapsed_time: Time taken for indexing
|
|
324
|
+
|
|
325
|
+
Returns:
|
|
326
|
+
Formatted statistics string
|
|
327
|
+
"""
|
|
328
|
+
result = f"=== Index Statistics for {path} ===\n\n"
|
|
329
|
+
|
|
330
|
+
# Basic stats
|
|
331
|
+
result += f"Indexing completed in {elapsed_time:.1f} seconds\n\n"
|
|
332
|
+
|
|
333
|
+
result += "Content Statistics:\n"
|
|
334
|
+
result += f" Documents: {stats.get('document_count', 0):,}\n"
|
|
335
|
+
result += f" Files indexed: {stats.get('files_indexed', 0):,}\n"
|
|
336
|
+
result += f" Total size: {stats.get('total_size_mb', 0):.1f} MB\n"
|
|
337
|
+
|
|
338
|
+
if stats.get("commits_indexed", 0) > 0:
|
|
339
|
+
result += f"\nGit History:\n"
|
|
340
|
+
result += f" Commits: {stats.get('commits_indexed', 0):,}\n"
|
|
341
|
+
result += f" Diffs: {stats.get('diffs_indexed', 0):,}\n"
|
|
342
|
+
result += f" Blame entries: {stats.get('blame_entries', 0):,}\n"
|
|
343
|
+
|
|
344
|
+
# Vector store info
|
|
345
|
+
result += f"\nVector Store:\n"
|
|
346
|
+
result += f" Database: {stats.get('database_name', 'default')}\n"
|
|
347
|
+
result += f" Table: {stats.get('table_name', 'documents')}\n"
|
|
348
|
+
result += f" Vectors: {stats.get('vector_count', stats.get('document_count', 0)):,}\n"
|
|
349
|
+
|
|
350
|
+
if stats.get("errors", 0) > 0:
|
|
351
|
+
result += f"\nErrors: {stats.get('errors', 0)}\n"
|
|
352
|
+
|
|
353
|
+
return result
|
|
354
|
+
|
|
355
|
+
def register(self, mcp_server) -> None:
|
|
356
|
+
"""Register this tool with the MCP server."""
|
|
357
|
+
# Tool registration is handled by the ToolRegistry
|
|
358
|
+
pass
|