hanzo-mcp 0.5.1__py3-none-any.whl → 0.6.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hanzo-mcp might be problematic. Click here for more details.
- hanzo_mcp/__init__.py +1 -1
- hanzo_mcp/cli.py +32 -0
- hanzo_mcp/dev_server.py +246 -0
- hanzo_mcp/prompts/__init__.py +1 -1
- hanzo_mcp/prompts/project_system.py +43 -7
- hanzo_mcp/server.py +5 -1
- hanzo_mcp/tools/__init__.py +168 -6
- hanzo_mcp/tools/agent/__init__.py +1 -1
- hanzo_mcp/tools/agent/agent.py +401 -0
- hanzo_mcp/tools/agent/agent_tool.py +3 -4
- hanzo_mcp/tools/common/__init__.py +1 -1
- hanzo_mcp/tools/common/base.py +9 -4
- hanzo_mcp/tools/common/batch_tool.py +3 -5
- hanzo_mcp/tools/common/config_tool.py +1 -1
- hanzo_mcp/tools/common/context.py +1 -1
- hanzo_mcp/tools/common/palette.py +344 -0
- hanzo_mcp/tools/common/palette_loader.py +108 -0
- hanzo_mcp/tools/common/stats.py +261 -0
- hanzo_mcp/tools/common/thinking_tool.py +3 -5
- hanzo_mcp/tools/common/tool_disable.py +144 -0
- hanzo_mcp/tools/common/tool_enable.py +182 -0
- hanzo_mcp/tools/common/tool_list.py +260 -0
- hanzo_mcp/tools/config/__init__.py +10 -0
- hanzo_mcp/tools/config/config_tool.py +212 -0
- hanzo_mcp/tools/config/index_config.py +176 -0
- hanzo_mcp/tools/config/palette_tool.py +166 -0
- hanzo_mcp/tools/database/__init__.py +71 -0
- hanzo_mcp/tools/database/database_manager.py +246 -0
- hanzo_mcp/tools/database/graph.py +482 -0
- hanzo_mcp/tools/database/graph_add.py +257 -0
- hanzo_mcp/tools/database/graph_query.py +536 -0
- hanzo_mcp/tools/database/graph_remove.py +267 -0
- hanzo_mcp/tools/database/graph_search.py +348 -0
- hanzo_mcp/tools/database/graph_stats.py +345 -0
- hanzo_mcp/tools/database/sql.py +411 -0
- hanzo_mcp/tools/database/sql_query.py +229 -0
- hanzo_mcp/tools/database/sql_search.py +296 -0
- hanzo_mcp/tools/database/sql_stats.py +254 -0
- hanzo_mcp/tools/editor/__init__.py +11 -0
- hanzo_mcp/tools/editor/neovim_command.py +272 -0
- hanzo_mcp/tools/editor/neovim_edit.py +290 -0
- hanzo_mcp/tools/editor/neovim_session.py +356 -0
- hanzo_mcp/tools/filesystem/__init__.py +52 -13
- hanzo_mcp/tools/filesystem/base.py +1 -1
- hanzo_mcp/tools/filesystem/batch_search.py +812 -0
- hanzo_mcp/tools/filesystem/content_replace.py +3 -5
- hanzo_mcp/tools/filesystem/diff.py +193 -0
- hanzo_mcp/tools/filesystem/directory_tree.py +3 -5
- hanzo_mcp/tools/filesystem/edit.py +3 -5
- hanzo_mcp/tools/filesystem/find.py +443 -0
- hanzo_mcp/tools/filesystem/find_files.py +348 -0
- hanzo_mcp/tools/filesystem/git_search.py +505 -0
- hanzo_mcp/tools/filesystem/grep.py +2 -2
- hanzo_mcp/tools/filesystem/multi_edit.py +3 -5
- hanzo_mcp/tools/filesystem/read.py +17 -5
- hanzo_mcp/tools/filesystem/{grep_ast_tool.py → symbols.py} +17 -27
- hanzo_mcp/tools/filesystem/symbols_unified.py +376 -0
- hanzo_mcp/tools/filesystem/tree.py +268 -0
- hanzo_mcp/tools/filesystem/unified_search.py +465 -443
- hanzo_mcp/tools/filesystem/unix_aliases.py +99 -0
- hanzo_mcp/tools/filesystem/watch.py +174 -0
- hanzo_mcp/tools/filesystem/write.py +3 -5
- hanzo_mcp/tools/jupyter/__init__.py +9 -12
- hanzo_mcp/tools/jupyter/base.py +1 -1
- hanzo_mcp/tools/jupyter/jupyter.py +326 -0
- hanzo_mcp/tools/jupyter/notebook_edit.py +3 -4
- hanzo_mcp/tools/jupyter/notebook_read.py +3 -5
- hanzo_mcp/tools/llm/__init__.py +31 -0
- hanzo_mcp/tools/llm/consensus_tool.py +351 -0
- hanzo_mcp/tools/llm/llm_manage.py +413 -0
- hanzo_mcp/tools/llm/llm_tool.py +346 -0
- hanzo_mcp/tools/llm/llm_unified.py +851 -0
- hanzo_mcp/tools/llm/provider_tools.py +412 -0
- hanzo_mcp/tools/mcp/__init__.py +15 -0
- hanzo_mcp/tools/mcp/mcp_add.py +263 -0
- hanzo_mcp/tools/mcp/mcp_remove.py +127 -0
- hanzo_mcp/tools/mcp/mcp_stats.py +165 -0
- hanzo_mcp/tools/mcp/mcp_unified.py +503 -0
- hanzo_mcp/tools/shell/__init__.py +21 -23
- hanzo_mcp/tools/shell/base.py +1 -1
- hanzo_mcp/tools/shell/base_process.py +303 -0
- hanzo_mcp/tools/shell/bash_unified.py +134 -0
- hanzo_mcp/tools/shell/logs.py +265 -0
- hanzo_mcp/tools/shell/npx.py +194 -0
- hanzo_mcp/tools/shell/npx_background.py +254 -0
- hanzo_mcp/tools/shell/npx_unified.py +101 -0
- hanzo_mcp/tools/shell/open.py +107 -0
- hanzo_mcp/tools/shell/pkill.py +262 -0
- hanzo_mcp/tools/shell/process_unified.py +131 -0
- hanzo_mcp/tools/shell/processes.py +279 -0
- hanzo_mcp/tools/shell/run_background.py +326 -0
- hanzo_mcp/tools/shell/run_command.py +3 -4
- hanzo_mcp/tools/shell/run_command_windows.py +3 -4
- hanzo_mcp/tools/shell/uvx.py +187 -0
- hanzo_mcp/tools/shell/uvx_background.py +249 -0
- hanzo_mcp/tools/shell/uvx_unified.py +101 -0
- hanzo_mcp/tools/todo/__init__.py +1 -1
- hanzo_mcp/tools/todo/base.py +1 -1
- hanzo_mcp/tools/todo/todo.py +265 -0
- hanzo_mcp/tools/todo/todo_read.py +3 -5
- hanzo_mcp/tools/todo/todo_write.py +3 -5
- hanzo_mcp/tools/vector/__init__.py +6 -1
- hanzo_mcp/tools/vector/git_ingester.py +3 -0
- hanzo_mcp/tools/vector/index_tool.py +358 -0
- hanzo_mcp/tools/vector/infinity_store.py +98 -0
- hanzo_mcp/tools/vector/project_manager.py +27 -5
- hanzo_mcp/tools/vector/vector.py +311 -0
- hanzo_mcp/tools/vector/vector_index.py +1 -1
- hanzo_mcp/tools/vector/vector_search.py +12 -7
- hanzo_mcp-0.6.1.dist-info/METADATA +336 -0
- hanzo_mcp-0.6.1.dist-info/RECORD +134 -0
- hanzo_mcp-0.6.1.dist-info/entry_points.txt +3 -0
- hanzo_mcp-0.5.1.dist-info/METADATA +0 -276
- hanzo_mcp-0.5.1.dist-info/RECORD +0 -68
- hanzo_mcp-0.5.1.dist-info/entry_points.txt +0 -2
- {hanzo_mcp-0.5.1.dist-info → hanzo_mcp-0.6.1.dist-info}/WHEEL +0 -0
- {hanzo_mcp-0.5.1.dist-info → hanzo_mcp-0.6.1.dist-info}/licenses/LICENSE +0 -0
- {hanzo_mcp-0.5.1.dist-info → hanzo_mcp-0.6.1.dist-info}/top_level.txt +0 -0
|
@@ -9,7 +9,7 @@ Supported backends:
|
|
|
9
9
|
|
|
10
10
|
from hanzo_mcp.tools.common.base import BaseTool
|
|
11
11
|
from hanzo_mcp.tools.common.permissions import PermissionManager
|
|
12
|
-
from
|
|
12
|
+
from mcp.server import FastMCP
|
|
13
13
|
|
|
14
14
|
# Try to import vector dependencies
|
|
15
15
|
try:
|
|
@@ -17,6 +17,7 @@ try:
|
|
|
17
17
|
from .project_manager import ProjectVectorManager
|
|
18
18
|
from .vector_index import VectorIndexTool
|
|
19
19
|
from .vector_search import VectorSearchTool
|
|
20
|
+
from .index_tool import IndexTool
|
|
20
21
|
|
|
21
22
|
VECTOR_AVAILABLE = True
|
|
22
23
|
|
|
@@ -64,6 +65,9 @@ try:
|
|
|
64
65
|
print(f"Detected {len(detected_projects)} projects with LLM.md files")
|
|
65
66
|
|
|
66
67
|
# Register individual tools if enabled
|
|
68
|
+
if tool_enabled.get("index", True):
|
|
69
|
+
tools.append(IndexTool(permission_manager))
|
|
70
|
+
|
|
67
71
|
if tool_enabled.get("vector_index", True):
|
|
68
72
|
tools.append(VectorIndexTool(permission_manager, project_manager))
|
|
69
73
|
|
|
@@ -94,6 +98,7 @@ if VECTOR_AVAILABLE:
|
|
|
94
98
|
__all__.extend([
|
|
95
99
|
"InfinityVectorStore",
|
|
96
100
|
"ProjectVectorManager",
|
|
101
|
+
"IndexTool",
|
|
97
102
|
"VectorIndexTool",
|
|
98
103
|
"VectorSearchTool",
|
|
99
104
|
])
|
|
@@ -89,9 +89,11 @@ class GitIngester:
|
|
|
89
89
|
"repository": str(repo_path),
|
|
90
90
|
"branch": branch,
|
|
91
91
|
"commits_processed": 0,
|
|
92
|
+
"commits_indexed": 0,
|
|
92
93
|
"files_indexed": 0,
|
|
93
94
|
"symbols_extracted": 0,
|
|
94
95
|
"diffs_indexed": 0,
|
|
96
|
+
"blame_entries": 0,
|
|
95
97
|
"errors": []
|
|
96
98
|
}
|
|
97
99
|
|
|
@@ -125,6 +127,7 @@ class GitIngester:
|
|
|
125
127
|
|
|
126
128
|
for commit in commits:
|
|
127
129
|
self._index_commit(commit, include_diffs=include_diffs)
|
|
130
|
+
results["commits_indexed"] = results.get("commits_indexed", 0) + 1
|
|
128
131
|
|
|
129
132
|
if include_diffs:
|
|
130
133
|
results["diffs_indexed"] += len(commit.files)
|
|
@@ -0,0 +1,358 @@
|
|
|
1
|
+
"""Index tool for managing vector store indexing."""
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import os
|
|
5
|
+
import time
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Annotated, TypedDict, Unpack, final, override
|
|
8
|
+
|
|
9
|
+
from mcp.server.fastmcp import Context as MCPContext
|
|
10
|
+
from pydantic import Field
|
|
11
|
+
|
|
12
|
+
from hanzo_mcp.tools.common.base import BaseTool
|
|
13
|
+
from hanzo_mcp.tools.common.context import create_tool_context
|
|
14
|
+
from hanzo_mcp.tools.common.permissions import PermissionManager
|
|
15
|
+
from hanzo_mcp.tools.vector.git_ingester import GitIngester
|
|
16
|
+
from hanzo_mcp.tools.vector.infinity_store import InfinityVectorStore
|
|
17
|
+
from hanzo_mcp.tools.vector.project_manager import ProjectVectorManager
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
Path_str = Annotated[
|
|
21
|
+
str,
|
|
22
|
+
Field(
|
|
23
|
+
description="Path to index (defaults to current working directory)",
|
|
24
|
+
min_length=1,
|
|
25
|
+
),
|
|
26
|
+
]
|
|
27
|
+
|
|
28
|
+
IncludeGitHistory = Annotated[
|
|
29
|
+
bool,
|
|
30
|
+
Field(
|
|
31
|
+
description="Include git history in the index",
|
|
32
|
+
default=True,
|
|
33
|
+
),
|
|
34
|
+
]
|
|
35
|
+
|
|
36
|
+
FilePatterns = Annotated[
|
|
37
|
+
list[str] | None,
|
|
38
|
+
Field(
|
|
39
|
+
description="File patterns to include (e.g., ['*.py', '*.js'])",
|
|
40
|
+
default=None,
|
|
41
|
+
),
|
|
42
|
+
]
|
|
43
|
+
|
|
44
|
+
ShowStats = Annotated[
|
|
45
|
+
bool,
|
|
46
|
+
Field(
|
|
47
|
+
description="Show detailed statistics after indexing",
|
|
48
|
+
default=True,
|
|
49
|
+
),
|
|
50
|
+
]
|
|
51
|
+
|
|
52
|
+
Force = Annotated[
|
|
53
|
+
bool,
|
|
54
|
+
Field(
|
|
55
|
+
description="Force re-indexing even if already indexed",
|
|
56
|
+
default=False,
|
|
57
|
+
),
|
|
58
|
+
]
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class IndexToolParams(TypedDict, total=False):
|
|
62
|
+
"""Parameters for the index tool."""
|
|
63
|
+
|
|
64
|
+
path: str
|
|
65
|
+
include_git_history: bool
|
|
66
|
+
file_patterns: list[str] | None
|
|
67
|
+
show_stats: bool
|
|
68
|
+
force: bool
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
@final
|
|
72
|
+
class IndexTool(BaseTool):
|
|
73
|
+
"""Tool for indexing files and git history into vector store."""
|
|
74
|
+
|
|
75
|
+
def __init__(self, permission_manager: PermissionManager):
|
|
76
|
+
"""Initialize the index tool.
|
|
77
|
+
|
|
78
|
+
Args:
|
|
79
|
+
permission_manager: Permission manager for access control
|
|
80
|
+
"""
|
|
81
|
+
self.permission_manager = permission_manager
|
|
82
|
+
self.project_manager = ProjectVectorManager(permission_manager)
|
|
83
|
+
|
|
84
|
+
@property
|
|
85
|
+
@override
|
|
86
|
+
def name(self) -> str:
|
|
87
|
+
"""Get the tool name."""
|
|
88
|
+
return "index"
|
|
89
|
+
|
|
90
|
+
@property
|
|
91
|
+
@override
|
|
92
|
+
def description(self) -> str:
|
|
93
|
+
"""Get the tool description."""
|
|
94
|
+
return """Index files and git history into the vector store for semantic search.
|
|
95
|
+
|
|
96
|
+
This tool:
|
|
97
|
+
- Indexes all project files into a vector database
|
|
98
|
+
- Includes git history (commits, diffs, blame) when available
|
|
99
|
+
- Supports incremental updates
|
|
100
|
+
- Shows statistics about indexed content
|
|
101
|
+
- Automatically creates project-specific databases
|
|
102
|
+
|
|
103
|
+
Usage:
|
|
104
|
+
- index: Index the current directory
|
|
105
|
+
- index --path /path/to/project: Index a specific path
|
|
106
|
+
- index --file-patterns "*.py" "*.js": Index only specific file types
|
|
107
|
+
- index --no-git-history: Skip git history indexing
|
|
108
|
+
- index --force: Force re-indexing of all files"""
|
|
109
|
+
|
|
110
|
+
@override
|
|
111
|
+
async def call(
|
|
112
|
+
self,
|
|
113
|
+
ctx: MCPContext,
|
|
114
|
+
**params: Unpack[IndexToolParams],
|
|
115
|
+
) -> str:
|
|
116
|
+
"""Execute the index tool.
|
|
117
|
+
|
|
118
|
+
Args:
|
|
119
|
+
ctx: MCP context
|
|
120
|
+
**params: Tool parameters
|
|
121
|
+
|
|
122
|
+
Returns:
|
|
123
|
+
Indexing result and statistics
|
|
124
|
+
"""
|
|
125
|
+
start_time = time.time()
|
|
126
|
+
tool_ctx = create_tool_context(ctx)
|
|
127
|
+
await tool_ctx.set_tool_info(self.name)
|
|
128
|
+
|
|
129
|
+
# Extract parameters
|
|
130
|
+
path = params.get("path", os.getcwd())
|
|
131
|
+
include_git_history = params.get("include_git_history", True)
|
|
132
|
+
file_patterns = params.get("file_patterns")
|
|
133
|
+
show_stats = params.get("show_stats", True)
|
|
134
|
+
force = params.get("force", False)
|
|
135
|
+
|
|
136
|
+
# Resolve absolute path
|
|
137
|
+
abs_path = os.path.abspath(path)
|
|
138
|
+
|
|
139
|
+
# Check permissions
|
|
140
|
+
if not self.permission_manager.has_permission(abs_path):
|
|
141
|
+
return f"Permission denied: {abs_path}"
|
|
142
|
+
|
|
143
|
+
# Check if path exists
|
|
144
|
+
if not os.path.exists(abs_path):
|
|
145
|
+
return f"Path does not exist: {abs_path}"
|
|
146
|
+
|
|
147
|
+
await tool_ctx.info(f"Starting indexing of {abs_path}")
|
|
148
|
+
|
|
149
|
+
try:
|
|
150
|
+
# Get or create vector store for this project
|
|
151
|
+
vector_store = self.project_manager.get_project_store(abs_path)
|
|
152
|
+
|
|
153
|
+
# Check if already indexed (unless force)
|
|
154
|
+
if not force:
|
|
155
|
+
stats = await vector_store.get_stats()
|
|
156
|
+
if stats and stats.get("document_count", 0) > 0:
|
|
157
|
+
await tool_ctx.info("Project already indexed, use --force to re-index")
|
|
158
|
+
if show_stats:
|
|
159
|
+
return self._format_stats(stats, abs_path, time.time() - start_time)
|
|
160
|
+
return "Project is already indexed. Use --force to re-index."
|
|
161
|
+
|
|
162
|
+
# Prepare file patterns
|
|
163
|
+
if file_patterns is None:
|
|
164
|
+
# Default patterns for code files
|
|
165
|
+
file_patterns = [
|
|
166
|
+
"*.py", "*.js", "*.ts", "*.jsx", "*.tsx",
|
|
167
|
+
"*.java", "*.cpp", "*.c", "*.h", "*.hpp",
|
|
168
|
+
"*.go", "*.rs", "*.rb", "*.php", "*.swift",
|
|
169
|
+
"*.kt", "*.scala", "*.cs", "*.vb", "*.fs",
|
|
170
|
+
"*.sh", "*.bash", "*.zsh", "*.fish",
|
|
171
|
+
"*.md", "*.rst", "*.txt", "*.json", "*.yaml", "*.yml",
|
|
172
|
+
"*.toml", "*.ini", "*.cfg", "*.conf",
|
|
173
|
+
"*.html", "*.css", "*.scss", "*.sass", "*.less",
|
|
174
|
+
"*.sql", "*.graphql", "*.proto",
|
|
175
|
+
"Dockerfile", "Makefile", "*.mk",
|
|
176
|
+
".gitignore", ".dockerignore", "requirements.txt",
|
|
177
|
+
"package.json", "Cargo.toml", "go.mod", "pom.xml",
|
|
178
|
+
]
|
|
179
|
+
|
|
180
|
+
# Clear existing index if force
|
|
181
|
+
if force:
|
|
182
|
+
await tool_ctx.info("Clearing existing index...")
|
|
183
|
+
await vector_store.clear()
|
|
184
|
+
|
|
185
|
+
# Index files
|
|
186
|
+
await tool_ctx.info("Indexing files...")
|
|
187
|
+
indexed_files = 0
|
|
188
|
+
total_size = 0
|
|
189
|
+
errors = []
|
|
190
|
+
|
|
191
|
+
for pattern in file_patterns:
|
|
192
|
+
pattern_files = await self._find_files(abs_path, pattern)
|
|
193
|
+
for file_path in pattern_files:
|
|
194
|
+
try:
|
|
195
|
+
# Check file size (skip very large files)
|
|
196
|
+
file_size = os.path.getsize(file_path)
|
|
197
|
+
if file_size > 10 * 1024 * 1024: # 10MB
|
|
198
|
+
await tool_ctx.warning(f"Skipping large file: {file_path}")
|
|
199
|
+
continue
|
|
200
|
+
|
|
201
|
+
# Read file content
|
|
202
|
+
try:
|
|
203
|
+
with open(file_path, "r", encoding="utf-8") as f:
|
|
204
|
+
content = f.read()
|
|
205
|
+
except UnicodeDecodeError:
|
|
206
|
+
# Skip binary files
|
|
207
|
+
continue
|
|
208
|
+
|
|
209
|
+
# Index the file
|
|
210
|
+
rel_path = os.path.relpath(file_path, abs_path)
|
|
211
|
+
await vector_store.index_document(
|
|
212
|
+
content=content,
|
|
213
|
+
metadata={
|
|
214
|
+
"type": "file",
|
|
215
|
+
"path": rel_path,
|
|
216
|
+
"absolute_path": file_path,
|
|
217
|
+
"size": file_size,
|
|
218
|
+
"extension": Path(file_path).suffix,
|
|
219
|
+
}
|
|
220
|
+
)
|
|
221
|
+
indexed_files += 1
|
|
222
|
+
total_size += file_size
|
|
223
|
+
|
|
224
|
+
if indexed_files % 100 == 0:
|
|
225
|
+
await tool_ctx.info(f"Indexed {indexed_files} files...")
|
|
226
|
+
|
|
227
|
+
except Exception as e:
|
|
228
|
+
errors.append(f"{file_path}: {str(e)}")
|
|
229
|
+
|
|
230
|
+
await tool_ctx.info(f"Indexed {indexed_files} files ({total_size / 1024 / 1024:.1f} MB)")
|
|
231
|
+
|
|
232
|
+
# Index git history if requested
|
|
233
|
+
git_stats = {}
|
|
234
|
+
if include_git_history and os.path.exists(os.path.join(abs_path, ".git")):
|
|
235
|
+
await tool_ctx.info("Indexing git history...")
|
|
236
|
+
|
|
237
|
+
git_ingester = GitIngester(vector_store)
|
|
238
|
+
git_stats = await git_ingester.ingest_repository(
|
|
239
|
+
repo_path=abs_path,
|
|
240
|
+
include_history=True,
|
|
241
|
+
include_diffs=True,
|
|
242
|
+
include_blame=True,
|
|
243
|
+
file_patterns=file_patterns,
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
await tool_ctx.info(
|
|
247
|
+
f"Indexed {git_stats.get('commits_indexed', 0)} commits, "
|
|
248
|
+
f"{git_stats.get('diffs_indexed', 0)} diffs"
|
|
249
|
+
)
|
|
250
|
+
|
|
251
|
+
# Get final statistics
|
|
252
|
+
if show_stats:
|
|
253
|
+
stats = await vector_store.get_stats()
|
|
254
|
+
stats.update({
|
|
255
|
+
"files_indexed": indexed_files,
|
|
256
|
+
"total_size_mb": total_size / 1024 / 1024,
|
|
257
|
+
"errors": len(errors),
|
|
258
|
+
**git_stats,
|
|
259
|
+
})
|
|
260
|
+
result = self._format_stats(stats, abs_path, time.time() - start_time)
|
|
261
|
+
|
|
262
|
+
if errors:
|
|
263
|
+
result += f"\n\nErrors ({len(errors)}):\n"
|
|
264
|
+
result += "\n".join(errors[:10]) # Show first 10 errors
|
|
265
|
+
if len(errors) > 10:
|
|
266
|
+
result += f"\n... and {len(errors) - 10} more errors"
|
|
267
|
+
|
|
268
|
+
return result
|
|
269
|
+
else:
|
|
270
|
+
return f"Successfully indexed {indexed_files} files"
|
|
271
|
+
|
|
272
|
+
except Exception as e:
|
|
273
|
+
await tool_ctx.error(f"Indexing failed: {str(e)}")
|
|
274
|
+
return f"Error during indexing: {str(e)}"
|
|
275
|
+
|
|
276
|
+
async def _find_files(self, base_path: str, pattern: str) -> list[str]:
|
|
277
|
+
"""Find files matching a pattern.
|
|
278
|
+
|
|
279
|
+
Args:
|
|
280
|
+
base_path: Base directory to search
|
|
281
|
+
pattern: File pattern to match
|
|
282
|
+
|
|
283
|
+
Returns:
|
|
284
|
+
List of matching file paths
|
|
285
|
+
"""
|
|
286
|
+
import glob
|
|
287
|
+
|
|
288
|
+
# Use glob to find files
|
|
289
|
+
if pattern.startswith("*."):
|
|
290
|
+
# Extension pattern
|
|
291
|
+
files = glob.glob(
|
|
292
|
+
os.path.join(base_path, "**", pattern),
|
|
293
|
+
recursive=True,
|
|
294
|
+
)
|
|
295
|
+
else:
|
|
296
|
+
# Exact filename
|
|
297
|
+
files = glob.glob(
|
|
298
|
+
os.path.join(base_path, "**", pattern),
|
|
299
|
+
recursive=True,
|
|
300
|
+
)
|
|
301
|
+
|
|
302
|
+
# Filter out hidden directories and common ignore patterns
|
|
303
|
+
filtered_files = []
|
|
304
|
+
ignore_dirs = {".git", "__pycache__", "node_modules", ".venv", "venv", "dist", "build"}
|
|
305
|
+
|
|
306
|
+
for file_path in files:
|
|
307
|
+
# Check if any parent directory is in ignore list
|
|
308
|
+
parts = Path(file_path).parts
|
|
309
|
+
if any(part in ignore_dirs for part in parts):
|
|
310
|
+
continue
|
|
311
|
+
if any(part.startswith(".") and part != "." for part in parts[:-1]):
|
|
312
|
+
continue # Skip hidden directories (but allow hidden files like .gitignore)
|
|
313
|
+
filtered_files.append(file_path)
|
|
314
|
+
|
|
315
|
+
return filtered_files
|
|
316
|
+
|
|
317
|
+
def _format_stats(self, stats: dict, path: str, elapsed_time: float) -> str:
|
|
318
|
+
"""Format statistics for display.
|
|
319
|
+
|
|
320
|
+
Args:
|
|
321
|
+
stats: Statistics dictionary
|
|
322
|
+
path: Indexed path
|
|
323
|
+
elapsed_time: Time taken for indexing
|
|
324
|
+
|
|
325
|
+
Returns:
|
|
326
|
+
Formatted statistics string
|
|
327
|
+
"""
|
|
328
|
+
result = f"=== Index Statistics for {path} ===\n\n"
|
|
329
|
+
|
|
330
|
+
# Basic stats
|
|
331
|
+
result += f"Indexing completed in {elapsed_time:.1f} seconds\n\n"
|
|
332
|
+
|
|
333
|
+
result += "Content Statistics:\n"
|
|
334
|
+
result += f" Documents: {stats.get('document_count', 0):,}\n"
|
|
335
|
+
result += f" Files indexed: {stats.get('files_indexed', 0):,}\n"
|
|
336
|
+
result += f" Total size: {stats.get('total_size_mb', 0):.1f} MB\n"
|
|
337
|
+
|
|
338
|
+
if stats.get("commits_indexed", 0) > 0:
|
|
339
|
+
result += f"\nGit History:\n"
|
|
340
|
+
result += f" Commits: {stats.get('commits_indexed', 0):,}\n"
|
|
341
|
+
result += f" Diffs: {stats.get('diffs_indexed', 0):,}\n"
|
|
342
|
+
result += f" Blame entries: {stats.get('blame_entries', 0):,}\n"
|
|
343
|
+
|
|
344
|
+
# Vector store info
|
|
345
|
+
result += f"\nVector Store:\n"
|
|
346
|
+
result += f" Database: {stats.get('database_name', 'default')}\n"
|
|
347
|
+
result += f" Table: {stats.get('table_name', 'documents')}\n"
|
|
348
|
+
result += f" Vectors: {stats.get('vector_count', stats.get('document_count', 0)):,}\n"
|
|
349
|
+
|
|
350
|
+
if stats.get("errors", 0) > 0:
|
|
351
|
+
result += f"\nErrors: {stats.get('errors', 0)}\n"
|
|
352
|
+
|
|
353
|
+
return result
|
|
354
|
+
|
|
355
|
+
def register(self, mcp_server) -> None:
|
|
356
|
+
"""Register this tool with the MCP server."""
|
|
357
|
+
# Tool registration is handled by the ToolRegistry
|
|
358
|
+
pass
|
|
@@ -725,6 +725,104 @@ class InfinityVectorStore:
|
|
|
725
725
|
import random
|
|
726
726
|
return [random.random() for _ in range(self.dimension)]
|
|
727
727
|
|
|
728
|
+
async def get_stats(self) -> Dict[str, Any]:
|
|
729
|
+
"""Get statistics about the vector store.
|
|
730
|
+
|
|
731
|
+
Returns:
|
|
732
|
+
Dictionary with statistics
|
|
733
|
+
"""
|
|
734
|
+
try:
|
|
735
|
+
# Get document count
|
|
736
|
+
doc_count_result = self.documents_table.output(["count(*)"]).to_pl()
|
|
737
|
+
doc_count = doc_count_result.item(0, 0) if len(doc_count_result) > 0 else 0
|
|
738
|
+
|
|
739
|
+
# Get unique file count
|
|
740
|
+
file_result = self.documents_table.output(["file_path"]).to_pl()
|
|
741
|
+
unique_files = set()
|
|
742
|
+
for row in file_result.iter_rows():
|
|
743
|
+
if row[0]:
|
|
744
|
+
unique_files.add(row[0])
|
|
745
|
+
|
|
746
|
+
# Get symbol count
|
|
747
|
+
symbol_count = 0
|
|
748
|
+
try:
|
|
749
|
+
symbol_result = self.symbols_table.output(["count(*)"]).to_pl()
|
|
750
|
+
symbol_count = symbol_result.item(0, 0) if len(symbol_result) > 0 else 0
|
|
751
|
+
except:
|
|
752
|
+
pass
|
|
753
|
+
|
|
754
|
+
# Get AST count
|
|
755
|
+
ast_count = 0
|
|
756
|
+
try:
|
|
757
|
+
ast_result = self.ast_table.output(["count(*)"]).to_pl()
|
|
758
|
+
ast_count = ast_result.item(0, 0) if len(ast_result) > 0 else 0
|
|
759
|
+
except:
|
|
760
|
+
pass
|
|
761
|
+
|
|
762
|
+
return {
|
|
763
|
+
"document_count": doc_count,
|
|
764
|
+
"vector_count": doc_count, # Each document has a vector
|
|
765
|
+
"unique_files": len(unique_files),
|
|
766
|
+
"symbol_count": symbol_count,
|
|
767
|
+
"ast_count": ast_count,
|
|
768
|
+
"database_name": self.db_name,
|
|
769
|
+
"table_name": "documents",
|
|
770
|
+
"dimension": self.dimension,
|
|
771
|
+
}
|
|
772
|
+
except Exception as e:
|
|
773
|
+
return {
|
|
774
|
+
"error": str(e),
|
|
775
|
+
"document_count": 0,
|
|
776
|
+
"vector_count": 0,
|
|
777
|
+
}
|
|
778
|
+
|
|
779
|
+
async def clear(self) -> bool:
|
|
780
|
+
"""Clear all data from the vector store.
|
|
781
|
+
|
|
782
|
+
Returns:
|
|
783
|
+
True if successful
|
|
784
|
+
"""
|
|
785
|
+
try:
|
|
786
|
+
# Delete all records from all tables
|
|
787
|
+
self.documents_table.delete()
|
|
788
|
+
|
|
789
|
+
try:
|
|
790
|
+
self.symbols_table.delete()
|
|
791
|
+
except:
|
|
792
|
+
pass
|
|
793
|
+
|
|
794
|
+
try:
|
|
795
|
+
self.ast_table.delete()
|
|
796
|
+
except:
|
|
797
|
+
pass
|
|
798
|
+
|
|
799
|
+
try:
|
|
800
|
+
self.references_table.delete()
|
|
801
|
+
except:
|
|
802
|
+
pass
|
|
803
|
+
|
|
804
|
+
return True
|
|
805
|
+
except Exception as e:
|
|
806
|
+
print(f"Error clearing vector store: {e}")
|
|
807
|
+
return False
|
|
808
|
+
|
|
809
|
+
async def index_document(
|
|
810
|
+
self,
|
|
811
|
+
content: str,
|
|
812
|
+
metadata: Dict[str, Any] = None,
|
|
813
|
+
) -> str:
|
|
814
|
+
"""Async version of add_document for consistency.
|
|
815
|
+
|
|
816
|
+
Args:
|
|
817
|
+
content: Document content
|
|
818
|
+
metadata: Additional metadata
|
|
819
|
+
|
|
820
|
+
Returns:
|
|
821
|
+
Document ID
|
|
822
|
+
"""
|
|
823
|
+
file_path = metadata.get("path") if metadata else None
|
|
824
|
+
return self.add_document(content, metadata, file_path)
|
|
825
|
+
|
|
728
826
|
def close(self):
|
|
729
827
|
"""Close the database connection."""
|
|
730
828
|
if hasattr(self, 'infinity'):
|
|
@@ -8,6 +8,7 @@ import asyncio
|
|
|
8
8
|
from concurrent.futures import ThreadPoolExecutor
|
|
9
9
|
|
|
10
10
|
from .infinity_store import InfinityVectorStore, SearchResult
|
|
11
|
+
from hanzo_mcp.tools.config.index_config import IndexConfig, IndexScope
|
|
11
12
|
|
|
12
13
|
|
|
13
14
|
@dataclass
|
|
@@ -38,12 +39,14 @@ class ProjectVectorManager:
|
|
|
38
39
|
self.embedding_model = embedding_model
|
|
39
40
|
self.dimension = dimension
|
|
40
41
|
|
|
42
|
+
# Set up index configuration
|
|
43
|
+
self.index_config = IndexConfig()
|
|
44
|
+
|
|
41
45
|
# Set up global database path
|
|
42
46
|
if global_db_path:
|
|
43
47
|
self.global_db_path = Path(global_db_path)
|
|
44
48
|
else:
|
|
45
|
-
|
|
46
|
-
self.global_db_path = get_config_dir() / "db"
|
|
49
|
+
self.global_db_path = self.index_config.get_index_path("vector")
|
|
47
50
|
|
|
48
51
|
self.global_db_path.mkdir(parents=True, exist_ok=True)
|
|
49
52
|
|
|
@@ -158,14 +161,25 @@ class ProjectVectorManager:
|
|
|
158
161
|
Returns:
|
|
159
162
|
Vector store instance
|
|
160
163
|
"""
|
|
161
|
-
|
|
164
|
+
# Check indexing scope
|
|
165
|
+
if project_info:
|
|
166
|
+
scope = self.index_config.get_scope(str(project_info.root_path))
|
|
167
|
+
if scope == IndexScope.GLOBAL:
|
|
168
|
+
# Even for project files, use global store if configured
|
|
169
|
+
return self._get_global_store()
|
|
170
|
+
else:
|
|
162
171
|
return self._get_global_store()
|
|
163
172
|
|
|
173
|
+
# Use project-specific store
|
|
164
174
|
project_key = str(project_info.root_path)
|
|
165
175
|
|
|
166
176
|
if project_key not in self.vector_stores:
|
|
177
|
+
# Get index path based on configuration
|
|
178
|
+
index_path = self.index_config.get_index_path("vector", str(project_info.root_path))
|
|
179
|
+
index_path.mkdir(parents=True, exist_ok=True)
|
|
180
|
+
|
|
167
181
|
self.vector_stores[project_key] = InfinityVectorStore(
|
|
168
|
-
data_path=str(
|
|
182
|
+
data_path=str(index_path),
|
|
169
183
|
embedding_model=self.embedding_model,
|
|
170
184
|
dimension=self.dimension,
|
|
171
185
|
)
|
|
@@ -190,10 +204,14 @@ class ProjectVectorManager:
|
|
|
190
204
|
Returns:
|
|
191
205
|
Tuple of (document IDs, project info or None for global)
|
|
192
206
|
"""
|
|
207
|
+
# Check if indexing is enabled
|
|
208
|
+
if not self.index_config.is_indexing_enabled("vector"):
|
|
209
|
+
return [], None
|
|
210
|
+
|
|
193
211
|
# Find project for this file
|
|
194
212
|
project_info = self.get_project_for_path(file_path)
|
|
195
213
|
|
|
196
|
-
# Get appropriate vector store
|
|
214
|
+
# Get appropriate vector store based on scope configuration
|
|
197
215
|
vector_store = self.get_vector_store(project_info)
|
|
198
216
|
|
|
199
217
|
# Add file metadata
|
|
@@ -201,8 +219,12 @@ class ProjectVectorManager:
|
|
|
201
219
|
if project_info:
|
|
202
220
|
file_metadata["project_name"] = project_info.name
|
|
203
221
|
file_metadata["project_root"] = str(project_info.root_path)
|
|
222
|
+
# Check actual scope used
|
|
223
|
+
scope = self.index_config.get_scope(str(project_info.root_path))
|
|
224
|
+
file_metadata["index_scope"] = scope.value
|
|
204
225
|
else:
|
|
205
226
|
file_metadata["project_name"] = "global"
|
|
227
|
+
file_metadata["index_scope"] = "global"
|
|
206
228
|
|
|
207
229
|
# Add file to store
|
|
208
230
|
doc_ids = vector_store.add_file(
|