hanzo-mcp 0.3.8__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hanzo-mcp might be problematic. Click here for more details.
- hanzo_mcp/__init__.py +1 -1
- hanzo_mcp/cli.py +118 -170
- hanzo_mcp/cli_enhanced.py +438 -0
- hanzo_mcp/config/__init__.py +19 -0
- hanzo_mcp/config/settings.py +449 -0
- hanzo_mcp/config/tool_config.py +197 -0
- hanzo_mcp/prompts/__init__.py +117 -0
- hanzo_mcp/prompts/compact_conversation.py +77 -0
- hanzo_mcp/prompts/create_release.py +38 -0
- hanzo_mcp/prompts/project_system.py +120 -0
- hanzo_mcp/prompts/project_todo_reminder.py +111 -0
- hanzo_mcp/prompts/utils.py +286 -0
- hanzo_mcp/server.py +117 -99
- hanzo_mcp/tools/__init__.py +121 -33
- hanzo_mcp/tools/agent/__init__.py +8 -11
- hanzo_mcp/tools/agent/agent_tool.py +290 -224
- hanzo_mcp/tools/agent/prompt.py +16 -13
- hanzo_mcp/tools/agent/tool_adapter.py +9 -9
- hanzo_mcp/tools/common/__init__.py +17 -16
- hanzo_mcp/tools/common/base.py +79 -110
- hanzo_mcp/tools/common/batch_tool.py +330 -0
- hanzo_mcp/tools/common/config_tool.py +396 -0
- hanzo_mcp/tools/common/context.py +26 -292
- hanzo_mcp/tools/common/permissions.py +12 -12
- hanzo_mcp/tools/common/thinking_tool.py +153 -0
- hanzo_mcp/tools/common/validation.py +1 -63
- hanzo_mcp/tools/filesystem/__init__.py +97 -57
- hanzo_mcp/tools/filesystem/base.py +32 -24
- hanzo_mcp/tools/filesystem/content_replace.py +114 -107
- hanzo_mcp/tools/filesystem/directory_tree.py +129 -105
- hanzo_mcp/tools/filesystem/edit.py +279 -0
- hanzo_mcp/tools/filesystem/grep.py +458 -0
- hanzo_mcp/tools/filesystem/grep_ast_tool.py +250 -0
- hanzo_mcp/tools/filesystem/multi_edit.py +362 -0
- hanzo_mcp/tools/filesystem/read.py +255 -0
- hanzo_mcp/tools/filesystem/unified_search.py +689 -0
- hanzo_mcp/tools/filesystem/write.py +156 -0
- hanzo_mcp/tools/jupyter/__init__.py +41 -29
- hanzo_mcp/tools/jupyter/base.py +66 -57
- hanzo_mcp/tools/jupyter/{edit_notebook.py → notebook_edit.py} +162 -139
- hanzo_mcp/tools/jupyter/notebook_read.py +152 -0
- hanzo_mcp/tools/shell/__init__.py +29 -20
- hanzo_mcp/tools/shell/base.py +87 -45
- hanzo_mcp/tools/shell/bash_session.py +731 -0
- hanzo_mcp/tools/shell/bash_session_executor.py +295 -0
- hanzo_mcp/tools/shell/command_executor.py +435 -384
- hanzo_mcp/tools/shell/run_command.py +284 -131
- hanzo_mcp/tools/shell/run_command_windows.py +328 -0
- hanzo_mcp/tools/shell/session_manager.py +196 -0
- hanzo_mcp/tools/shell/session_storage.py +325 -0
- hanzo_mcp/tools/todo/__init__.py +66 -0
- hanzo_mcp/tools/todo/base.py +319 -0
- hanzo_mcp/tools/todo/todo_read.py +148 -0
- hanzo_mcp/tools/todo/todo_write.py +378 -0
- hanzo_mcp/tools/vector/__init__.py +99 -0
- hanzo_mcp/tools/vector/ast_analyzer.py +459 -0
- hanzo_mcp/tools/vector/git_ingester.py +482 -0
- hanzo_mcp/tools/vector/infinity_store.py +731 -0
- hanzo_mcp/tools/vector/mock_infinity.py +162 -0
- hanzo_mcp/tools/vector/project_manager.py +361 -0
- hanzo_mcp/tools/vector/vector_index.py +116 -0
- hanzo_mcp/tools/vector/vector_search.py +225 -0
- hanzo_mcp-0.5.1.dist-info/METADATA +276 -0
- hanzo_mcp-0.5.1.dist-info/RECORD +68 -0
- {hanzo_mcp-0.3.8.dist-info → hanzo_mcp-0.5.1.dist-info}/WHEEL +1 -1
- hanzo_mcp/tools/agent/base_provider.py +0 -73
- hanzo_mcp/tools/agent/litellm_provider.py +0 -45
- hanzo_mcp/tools/agent/lmstudio_agent.py +0 -385
- hanzo_mcp/tools/agent/lmstudio_provider.py +0 -219
- hanzo_mcp/tools/agent/provider_registry.py +0 -120
- hanzo_mcp/tools/common/error_handling.py +0 -86
- hanzo_mcp/tools/common/logging_config.py +0 -115
- hanzo_mcp/tools/common/session.py +0 -91
- hanzo_mcp/tools/common/think_tool.py +0 -123
- hanzo_mcp/tools/common/version_tool.py +0 -120
- hanzo_mcp/tools/filesystem/edit_file.py +0 -287
- hanzo_mcp/tools/filesystem/get_file_info.py +0 -170
- hanzo_mcp/tools/filesystem/read_files.py +0 -199
- hanzo_mcp/tools/filesystem/search_content.py +0 -275
- hanzo_mcp/tools/filesystem/write_file.py +0 -162
- hanzo_mcp/tools/jupyter/notebook_operations.py +0 -514
- hanzo_mcp/tools/jupyter/read_notebook.py +0 -165
- hanzo_mcp/tools/project/__init__.py +0 -64
- hanzo_mcp/tools/project/analysis.py +0 -886
- hanzo_mcp/tools/project/base.py +0 -66
- hanzo_mcp/tools/project/project_analyze.py +0 -173
- hanzo_mcp/tools/shell/run_script.py +0 -215
- hanzo_mcp/tools/shell/script_tool.py +0 -244
- hanzo_mcp-0.3.8.dist-info/METADATA +0 -196
- hanzo_mcp-0.3.8.dist-info/RECORD +0 -53
- {hanzo_mcp-0.3.8.dist-info → hanzo_mcp-0.5.1.dist-info}/entry_points.txt +0 -0
- {hanzo_mcp-0.3.8.dist-info → hanzo_mcp-0.5.1.dist-info}/licenses/LICENSE +0 -0
- {hanzo_mcp-0.3.8.dist-info → hanzo_mcp-0.5.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
"""Mock implementation of infinity_embedded for testing on unsupported platforms."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import hashlib
|
|
5
|
+
import random
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Dict, List, Any, Optional
|
|
8
|
+
from datetime import datetime
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class MockTable:
|
|
12
|
+
"""Mock implementation of an Infinity table."""
|
|
13
|
+
|
|
14
|
+
def __init__(self, name: str, schema: Dict[str, Any]):
|
|
15
|
+
self.name = name
|
|
16
|
+
self.schema = schema
|
|
17
|
+
self.data = []
|
|
18
|
+
self._id_counter = 0
|
|
19
|
+
|
|
20
|
+
def insert(self, records: List[Dict[str, Any]]):
|
|
21
|
+
"""Insert records into the table."""
|
|
22
|
+
for record in records:
|
|
23
|
+
# Add an internal ID if not present
|
|
24
|
+
if 'id' not in record:
|
|
25
|
+
record['_internal_id'] = self._id_counter
|
|
26
|
+
self._id_counter += 1
|
|
27
|
+
self.data.append(record)
|
|
28
|
+
|
|
29
|
+
def delete(self, condition: str):
|
|
30
|
+
"""Delete records matching condition."""
|
|
31
|
+
# Simple implementation - just clear for now
|
|
32
|
+
self.data = [r for r in self.data if not self._eval_condition(r, condition)]
|
|
33
|
+
|
|
34
|
+
def output(self, columns: List[str]):
|
|
35
|
+
"""Start a query chain."""
|
|
36
|
+
return MockQuery(self, columns)
|
|
37
|
+
|
|
38
|
+
def _eval_condition(self, record: Dict[str, Any], condition: str) -> bool:
|
|
39
|
+
"""Evaluate a simple condition."""
|
|
40
|
+
# Very basic implementation
|
|
41
|
+
if '=' in condition:
|
|
42
|
+
field, value = condition.split('=', 1)
|
|
43
|
+
field = field.strip()
|
|
44
|
+
value = value.strip().strip("'\"")
|
|
45
|
+
return str(record.get(field, '')) == value
|
|
46
|
+
return False
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class MockQuery:
|
|
50
|
+
"""Mock query builder."""
|
|
51
|
+
|
|
52
|
+
def __init__(self, table: MockTable, columns: List[str]):
|
|
53
|
+
self.table = table
|
|
54
|
+
self.columns = columns
|
|
55
|
+
self.filters = []
|
|
56
|
+
self.vector_search = None
|
|
57
|
+
self.limit_value = None
|
|
58
|
+
|
|
59
|
+
def filter(self, condition: str):
|
|
60
|
+
"""Add a filter condition."""
|
|
61
|
+
self.filters.append(condition)
|
|
62
|
+
return self
|
|
63
|
+
|
|
64
|
+
def match_dense(self, column: str, vector: List[float], dtype: str, metric: str, limit: int):
|
|
65
|
+
"""Add vector search."""
|
|
66
|
+
self.vector_search = {
|
|
67
|
+
'column': column,
|
|
68
|
+
'vector': vector,
|
|
69
|
+
'dtype': dtype,
|
|
70
|
+
'metric': metric,
|
|
71
|
+
'limit': limit
|
|
72
|
+
}
|
|
73
|
+
self.limit_value = limit
|
|
74
|
+
return self
|
|
75
|
+
|
|
76
|
+
def to_pl(self):
|
|
77
|
+
"""Execute query and return polars-like result."""
|
|
78
|
+
results = self.table.data.copy()
|
|
79
|
+
|
|
80
|
+
# Apply filters
|
|
81
|
+
for condition in self.filters:
|
|
82
|
+
results = [r for r in results if self.table._eval_condition(r, condition)]
|
|
83
|
+
|
|
84
|
+
# Apply vector search (mock similarity)
|
|
85
|
+
if self.vector_search:
|
|
86
|
+
# Add mock scores
|
|
87
|
+
for r in results:
|
|
88
|
+
r['score'] = random.uniform(0.5, 1.0)
|
|
89
|
+
# Sort by score
|
|
90
|
+
results.sort(key=lambda x: x.get('score', 0), reverse=True)
|
|
91
|
+
# Limit results
|
|
92
|
+
if self.limit_value:
|
|
93
|
+
results = results[:self.limit_value]
|
|
94
|
+
|
|
95
|
+
# Return mock polars DataFrame
|
|
96
|
+
return MockDataFrame(results)
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
class MockDataFrame:
|
|
100
|
+
"""Mock polars DataFrame."""
|
|
101
|
+
|
|
102
|
+
def __init__(self, data: List[Dict[str, Any]]):
|
|
103
|
+
self.data = data
|
|
104
|
+
|
|
105
|
+
def __len__(self):
|
|
106
|
+
return len(self.data)
|
|
107
|
+
|
|
108
|
+
def iter_rows(self, named: bool = False):
|
|
109
|
+
"""Iterate over rows."""
|
|
110
|
+
if named:
|
|
111
|
+
return iter(self.data)
|
|
112
|
+
else:
|
|
113
|
+
# Return tuples
|
|
114
|
+
if not self.data:
|
|
115
|
+
return iter([])
|
|
116
|
+
keys = list(self.data[0].keys())
|
|
117
|
+
return iter([tuple(row.get(k) for k in keys) for row in self.data])
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
class MockDatabase:
|
|
121
|
+
"""Mock implementation of an Infinity database."""
|
|
122
|
+
|
|
123
|
+
def __init__(self, name: str):
|
|
124
|
+
self.name = name
|
|
125
|
+
self.tables = {}
|
|
126
|
+
|
|
127
|
+
def create_table(self, name: str, schema: Dict[str, Any]) -> MockTable:
|
|
128
|
+
"""Create a new table."""
|
|
129
|
+
table = MockTable(name, schema)
|
|
130
|
+
self.tables[name] = table
|
|
131
|
+
return table
|
|
132
|
+
|
|
133
|
+
def get_table(self, name: str) -> MockTable:
|
|
134
|
+
"""Get an existing table."""
|
|
135
|
+
if name not in self.tables:
|
|
136
|
+
raise KeyError(f"Table {name} not found")
|
|
137
|
+
return self.tables[name]
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
class MockInfinity:
|
|
141
|
+
"""Mock implementation of Infinity connection."""
|
|
142
|
+
|
|
143
|
+
def __init__(self, path: str):
|
|
144
|
+
self.path = Path(path)
|
|
145
|
+
self.databases = {}
|
|
146
|
+
# Ensure directory exists
|
|
147
|
+
self.path.mkdir(parents=True, exist_ok=True)
|
|
148
|
+
|
|
149
|
+
def get_database(self, name: str) -> MockDatabase:
|
|
150
|
+
"""Get or create a database."""
|
|
151
|
+
if name not in self.databases:
|
|
152
|
+
self.databases[name] = MockDatabase(name)
|
|
153
|
+
return self.databases[name]
|
|
154
|
+
|
|
155
|
+
def disconnect(self):
|
|
156
|
+
"""Disconnect from Infinity."""
|
|
157
|
+
pass
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def connect(path: str) -> MockInfinity:
|
|
161
|
+
"""Connect to Infinity (mock implementation)."""
|
|
162
|
+
return MockInfinity(path)
|
|
@@ -0,0 +1,361 @@
|
|
|
1
|
+
"""Project-aware vector database management for Hanzo MCP."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Dict, List, Optional, Set, Tuple, Any
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
import asyncio
|
|
8
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
9
|
+
|
|
10
|
+
from .infinity_store import InfinityVectorStore, SearchResult
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass
|
|
14
|
+
class ProjectInfo:
|
|
15
|
+
"""Information about a detected project."""
|
|
16
|
+
root_path: Path
|
|
17
|
+
llm_md_path: Path
|
|
18
|
+
db_path: Path
|
|
19
|
+
name: str
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class ProjectVectorManager:
|
|
23
|
+
"""Manages project-aware vector databases."""
|
|
24
|
+
|
|
25
|
+
def __init__(
|
|
26
|
+
self,
|
|
27
|
+
global_db_path: Optional[str] = None,
|
|
28
|
+
embedding_model: str = "text-embedding-3-small",
|
|
29
|
+
dimension: int = 1536,
|
|
30
|
+
):
|
|
31
|
+
"""Initialize the project vector manager.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
global_db_path: Path for global vector store (default: ~/.config/hanzo/db)
|
|
35
|
+
embedding_model: Embedding model to use
|
|
36
|
+
dimension: Vector dimension
|
|
37
|
+
"""
|
|
38
|
+
self.embedding_model = embedding_model
|
|
39
|
+
self.dimension = dimension
|
|
40
|
+
|
|
41
|
+
# Set up global database path
|
|
42
|
+
if global_db_path:
|
|
43
|
+
self.global_db_path = Path(global_db_path)
|
|
44
|
+
else:
|
|
45
|
+
from hanzo_mcp.config.settings import get_config_dir
|
|
46
|
+
self.global_db_path = get_config_dir() / "db"
|
|
47
|
+
|
|
48
|
+
self.global_db_path.mkdir(parents=True, exist_ok=True)
|
|
49
|
+
|
|
50
|
+
# Cache for project info and vector stores
|
|
51
|
+
self.projects: Dict[str, ProjectInfo] = {}
|
|
52
|
+
self.vector_stores: Dict[str, InfinityVectorStore] = {}
|
|
53
|
+
self._global_store: Optional[InfinityVectorStore] = None
|
|
54
|
+
|
|
55
|
+
# Thread pool for parallel operations
|
|
56
|
+
self.executor = ThreadPoolExecutor(max_workers=4)
|
|
57
|
+
|
|
58
|
+
def _get_global_store(self) -> InfinityVectorStore:
|
|
59
|
+
"""Get or create the global vector store."""
|
|
60
|
+
if self._global_store is None:
|
|
61
|
+
self._global_store = InfinityVectorStore(
|
|
62
|
+
data_path=str(self.global_db_path),
|
|
63
|
+
embedding_model=self.embedding_model,
|
|
64
|
+
dimension=self.dimension,
|
|
65
|
+
)
|
|
66
|
+
return self._global_store
|
|
67
|
+
|
|
68
|
+
def detect_projects(self, search_paths: List[str]) -> List[ProjectInfo]:
|
|
69
|
+
"""Detect projects by finding LLM.md files.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
search_paths: List of paths to search for projects
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
List of detected project information
|
|
76
|
+
"""
|
|
77
|
+
projects = []
|
|
78
|
+
|
|
79
|
+
for search_path in search_paths:
|
|
80
|
+
path = Path(search_path).resolve()
|
|
81
|
+
|
|
82
|
+
# Search for LLM.md files
|
|
83
|
+
for llm_md_path in path.rglob("LLM.md"):
|
|
84
|
+
project_root = llm_md_path.parent
|
|
85
|
+
project_name = project_root.name
|
|
86
|
+
|
|
87
|
+
# Create .hanzo/db directory in project
|
|
88
|
+
db_path = project_root / ".hanzo" / "db"
|
|
89
|
+
db_path.mkdir(parents=True, exist_ok=True)
|
|
90
|
+
|
|
91
|
+
project_info = ProjectInfo(
|
|
92
|
+
root_path=project_root,
|
|
93
|
+
llm_md_path=llm_md_path,
|
|
94
|
+
db_path=db_path,
|
|
95
|
+
name=project_name,
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
projects.append(project_info)
|
|
99
|
+
|
|
100
|
+
# Cache project info
|
|
101
|
+
project_key = str(project_root)
|
|
102
|
+
self.projects[project_key] = project_info
|
|
103
|
+
|
|
104
|
+
return projects
|
|
105
|
+
|
|
106
|
+
def get_project_for_path(self, file_path: str) -> Optional[ProjectInfo]:
|
|
107
|
+
"""Find the project that contains a given file path.
|
|
108
|
+
|
|
109
|
+
Args:
|
|
110
|
+
file_path: File path to check
|
|
111
|
+
|
|
112
|
+
Returns:
|
|
113
|
+
Project info if found, None otherwise
|
|
114
|
+
"""
|
|
115
|
+
path = Path(file_path).resolve()
|
|
116
|
+
|
|
117
|
+
# Check each known project
|
|
118
|
+
for project_key, project_info in self.projects.items():
|
|
119
|
+
try:
|
|
120
|
+
# Check if path is within project root
|
|
121
|
+
path.relative_to(project_info.root_path)
|
|
122
|
+
return project_info
|
|
123
|
+
except ValueError:
|
|
124
|
+
# Path is not within this project
|
|
125
|
+
continue
|
|
126
|
+
|
|
127
|
+
# Try to find project by walking up the directory tree
|
|
128
|
+
current_path = path.parent if path.is_file() else path
|
|
129
|
+
|
|
130
|
+
while current_path != current_path.parent: # Stop at filesystem root
|
|
131
|
+
llm_md_path = current_path / "LLM.md"
|
|
132
|
+
if llm_md_path.exists():
|
|
133
|
+
# Found a project, create and cache it
|
|
134
|
+
db_path = current_path / ".hanzo" / "db"
|
|
135
|
+
db_path.mkdir(parents=True, exist_ok=True)
|
|
136
|
+
|
|
137
|
+
project_info = ProjectInfo(
|
|
138
|
+
root_path=current_path,
|
|
139
|
+
llm_md_path=llm_md_path,
|
|
140
|
+
db_path=db_path,
|
|
141
|
+
name=current_path.name,
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
project_key = str(current_path)
|
|
145
|
+
self.projects[project_key] = project_info
|
|
146
|
+
return project_info
|
|
147
|
+
|
|
148
|
+
current_path = current_path.parent
|
|
149
|
+
|
|
150
|
+
return None
|
|
151
|
+
|
|
152
|
+
def get_vector_store(self, project_info: Optional[ProjectInfo] = None) -> InfinityVectorStore:
|
|
153
|
+
"""Get vector store for a project or global store.
|
|
154
|
+
|
|
155
|
+
Args:
|
|
156
|
+
project_info: Project to get store for, None for global store
|
|
157
|
+
|
|
158
|
+
Returns:
|
|
159
|
+
Vector store instance
|
|
160
|
+
"""
|
|
161
|
+
if project_info is None:
|
|
162
|
+
return self._get_global_store()
|
|
163
|
+
|
|
164
|
+
project_key = str(project_info.root_path)
|
|
165
|
+
|
|
166
|
+
if project_key not in self.vector_stores:
|
|
167
|
+
self.vector_stores[project_key] = InfinityVectorStore(
|
|
168
|
+
data_path=str(project_info.db_path),
|
|
169
|
+
embedding_model=self.embedding_model,
|
|
170
|
+
dimension=self.dimension,
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
return self.vector_stores[project_key]
|
|
174
|
+
|
|
175
|
+
def add_file_to_appropriate_store(
|
|
176
|
+
self,
|
|
177
|
+
file_path: str,
|
|
178
|
+
chunk_size: int = 1000,
|
|
179
|
+
chunk_overlap: int = 200,
|
|
180
|
+
metadata: Dict[str, Any] = None,
|
|
181
|
+
) -> Tuple[List[str], Optional[ProjectInfo]]:
|
|
182
|
+
"""Add a file to the appropriate vector store (project or global).
|
|
183
|
+
|
|
184
|
+
Args:
|
|
185
|
+
file_path: Path to file to add
|
|
186
|
+
chunk_size: Chunk size for text splitting
|
|
187
|
+
chunk_overlap: Overlap between chunks
|
|
188
|
+
metadata: Additional metadata
|
|
189
|
+
|
|
190
|
+
Returns:
|
|
191
|
+
Tuple of (document IDs, project info or None for global)
|
|
192
|
+
"""
|
|
193
|
+
# Find project for this file
|
|
194
|
+
project_info = self.get_project_for_path(file_path)
|
|
195
|
+
|
|
196
|
+
# Get appropriate vector store
|
|
197
|
+
vector_store = self.get_vector_store(project_info)
|
|
198
|
+
|
|
199
|
+
# Add file metadata
|
|
200
|
+
file_metadata = metadata or {}
|
|
201
|
+
if project_info:
|
|
202
|
+
file_metadata["project_name"] = project_info.name
|
|
203
|
+
file_metadata["project_root"] = str(project_info.root_path)
|
|
204
|
+
else:
|
|
205
|
+
file_metadata["project_name"] = "global"
|
|
206
|
+
|
|
207
|
+
# Add file to store
|
|
208
|
+
doc_ids = vector_store.add_file(
|
|
209
|
+
file_path=file_path,
|
|
210
|
+
chunk_size=chunk_size,
|
|
211
|
+
chunk_overlap=chunk_overlap,
|
|
212
|
+
metadata=file_metadata,
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
return doc_ids, project_info
|
|
216
|
+
|
|
217
|
+
async def search_all_projects(
|
|
218
|
+
self,
|
|
219
|
+
query: str,
|
|
220
|
+
limit_per_project: int = 5,
|
|
221
|
+
score_threshold: float = 0.0,
|
|
222
|
+
include_global: bool = True,
|
|
223
|
+
project_filter: Optional[List[str]] = None,
|
|
224
|
+
) -> Dict[str, List[SearchResult]]:
|
|
225
|
+
"""Search across all projects in parallel.
|
|
226
|
+
|
|
227
|
+
Args:
|
|
228
|
+
query: Search query
|
|
229
|
+
limit_per_project: Maximum results per project
|
|
230
|
+
score_threshold: Minimum similarity score
|
|
231
|
+
include_global: Whether to include global store
|
|
232
|
+
project_filter: List of project names to search (None for all)
|
|
233
|
+
|
|
234
|
+
Returns:
|
|
235
|
+
Dictionary mapping project names to search results
|
|
236
|
+
"""
|
|
237
|
+
search_tasks = []
|
|
238
|
+
project_names = []
|
|
239
|
+
|
|
240
|
+
# Add global store if requested
|
|
241
|
+
if include_global:
|
|
242
|
+
global_store = self._get_global_store()
|
|
243
|
+
search_tasks.append(
|
|
244
|
+
asyncio.get_event_loop().run_in_executor(
|
|
245
|
+
self.executor,
|
|
246
|
+
lambda: global_store.search(query, limit_per_project, score_threshold)
|
|
247
|
+
)
|
|
248
|
+
)
|
|
249
|
+
project_names.append("global")
|
|
250
|
+
|
|
251
|
+
# Add project stores
|
|
252
|
+
for project_key, project_info in self.projects.items():
|
|
253
|
+
# Apply project filter
|
|
254
|
+
if project_filter and project_info.name not in project_filter:
|
|
255
|
+
continue
|
|
256
|
+
|
|
257
|
+
vector_store = self.get_vector_store(project_info)
|
|
258
|
+
search_tasks.append(
|
|
259
|
+
asyncio.get_event_loop().run_in_executor(
|
|
260
|
+
self.executor,
|
|
261
|
+
lambda vs=vector_store: vs.search(query, limit_per_project, score_threshold)
|
|
262
|
+
)
|
|
263
|
+
)
|
|
264
|
+
project_names.append(project_info.name)
|
|
265
|
+
|
|
266
|
+
# Execute all searches in parallel
|
|
267
|
+
results = await asyncio.gather(*search_tasks, return_exceptions=True)
|
|
268
|
+
|
|
269
|
+
# Combine results
|
|
270
|
+
combined_results = {}
|
|
271
|
+
for i, result in enumerate(results):
|
|
272
|
+
project_name = project_names[i]
|
|
273
|
+
if isinstance(result, Exception):
|
|
274
|
+
# Log error but continue
|
|
275
|
+
print(f"Error searching project {project_name}: {result}")
|
|
276
|
+
combined_results[project_name] = []
|
|
277
|
+
else:
|
|
278
|
+
combined_results[project_name] = result
|
|
279
|
+
|
|
280
|
+
return combined_results
|
|
281
|
+
|
|
282
|
+
def search_project_by_path(
|
|
283
|
+
self,
|
|
284
|
+
file_path: str,
|
|
285
|
+
query: str,
|
|
286
|
+
limit: int = 10,
|
|
287
|
+
score_threshold: float = 0.0,
|
|
288
|
+
) -> List[SearchResult]:
|
|
289
|
+
"""Search the project containing a specific file path.
|
|
290
|
+
|
|
291
|
+
Args:
|
|
292
|
+
file_path: File path to determine project
|
|
293
|
+
query: Search query
|
|
294
|
+
limit: Maximum results
|
|
295
|
+
score_threshold: Minimum similarity score
|
|
296
|
+
|
|
297
|
+
Returns:
|
|
298
|
+
Search results from the appropriate project store
|
|
299
|
+
"""
|
|
300
|
+
project_info = self.get_project_for_path(file_path)
|
|
301
|
+
vector_store = self.get_vector_store(project_info)
|
|
302
|
+
|
|
303
|
+
return vector_store.search(
|
|
304
|
+
query=query,
|
|
305
|
+
limit=limit,
|
|
306
|
+
score_threshold=score_threshold,
|
|
307
|
+
)
|
|
308
|
+
|
|
309
|
+
def get_project_stats(self) -> Dict[str, Dict[str, Any]]:
|
|
310
|
+
"""Get statistics for all projects.
|
|
311
|
+
|
|
312
|
+
Returns:
|
|
313
|
+
Dictionary mapping project names to stats
|
|
314
|
+
"""
|
|
315
|
+
stats = {}
|
|
316
|
+
|
|
317
|
+
# Global store stats
|
|
318
|
+
try:
|
|
319
|
+
global_store = self._get_global_store()
|
|
320
|
+
global_files = global_store.list_files()
|
|
321
|
+
stats["global"] = {
|
|
322
|
+
"file_count": len(global_files),
|
|
323
|
+
"db_path": str(self.global_db_path),
|
|
324
|
+
}
|
|
325
|
+
except Exception as e:
|
|
326
|
+
stats["global"] = {"error": str(e)}
|
|
327
|
+
|
|
328
|
+
# Project store stats
|
|
329
|
+
for project_key, project_info in self.projects.items():
|
|
330
|
+
try:
|
|
331
|
+
vector_store = self.get_vector_store(project_info)
|
|
332
|
+
project_files = vector_store.list_files()
|
|
333
|
+
stats[project_info.name] = {
|
|
334
|
+
"file_count": len(project_files),
|
|
335
|
+
"db_path": str(project_info.db_path),
|
|
336
|
+
"root_path": str(project_info.root_path),
|
|
337
|
+
"llm_md_exists": project_info.llm_md_path.exists(),
|
|
338
|
+
}
|
|
339
|
+
except Exception as e:
|
|
340
|
+
stats[project_info.name] = {"error": str(e)}
|
|
341
|
+
|
|
342
|
+
return stats
|
|
343
|
+
|
|
344
|
+
def cleanup(self):
|
|
345
|
+
"""Close all vector stores and cleanup resources."""
|
|
346
|
+
# Close all project stores
|
|
347
|
+
for vector_store in self.vector_stores.values():
|
|
348
|
+
try:
|
|
349
|
+
vector_store.close()
|
|
350
|
+
except:
|
|
351
|
+
pass
|
|
352
|
+
|
|
353
|
+
# Close global store
|
|
354
|
+
if self._global_store:
|
|
355
|
+
try:
|
|
356
|
+
self._global_store.close()
|
|
357
|
+
except:
|
|
358
|
+
pass
|
|
359
|
+
|
|
360
|
+
# Shutdown executor
|
|
361
|
+
self.executor.shutdown(wait=False)
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
"""Vector indexing tool for adding documents to vector database."""
|
|
2
|
+
|
|
3
|
+
from typing import Dict, List, Optional, TypedDict, Unpack, final
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from fastmcp import Context as MCPContext
|
|
7
|
+
from pydantic import Field
|
|
8
|
+
|
|
9
|
+
from hanzo_mcp.tools.common.base import BaseTool
|
|
10
|
+
from hanzo_mcp.tools.common.permissions import PermissionManager
|
|
11
|
+
from hanzo_mcp.tools.common.validation import validate_path_parameter
|
|
12
|
+
|
|
13
|
+
from .infinity_store import InfinityVectorStore
|
|
14
|
+
from .project_manager import ProjectVectorManager
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class VectorIndexParams(TypedDict, total=False):
|
|
18
|
+
"""Parameters for vector indexing operations."""
|
|
19
|
+
|
|
20
|
+
file_path: str
|
|
21
|
+
content: Optional[str]
|
|
22
|
+
chunk_size: Optional[int]
|
|
23
|
+
chunk_overlap: Optional[int]
|
|
24
|
+
metadata: Optional[Dict[str, str]]
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@final
|
|
28
|
+
class VectorIndexTool(BaseTool):
|
|
29
|
+
"""Tool for indexing documents in the vector database."""
|
|
30
|
+
|
|
31
|
+
def __init__(self, permission_manager: PermissionManager, project_manager: ProjectVectorManager):
|
|
32
|
+
"""Initialize the vector index tool.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
permission_manager: Permission manager for access control
|
|
36
|
+
project_manager: Project-aware vector store manager
|
|
37
|
+
"""
|
|
38
|
+
self.permission_manager = permission_manager
|
|
39
|
+
self.project_manager = project_manager
|
|
40
|
+
|
|
41
|
+
@property
|
|
42
|
+
def name(self) -> str:
|
|
43
|
+
"""Get the tool name."""
|
|
44
|
+
return "vector_index"
|
|
45
|
+
|
|
46
|
+
@property
|
|
47
|
+
def description(self) -> str:
|
|
48
|
+
"""Get the tool description."""
|
|
49
|
+
return """Index documents in project-aware vector databases for semantic search.
|
|
50
|
+
|
|
51
|
+
Can index individual text content or entire files. Files are automatically assigned
|
|
52
|
+
to the appropriate project database based on LLM.md detection or stored in the global
|
|
53
|
+
database. Files are chunked for optimal search performance.
|
|
54
|
+
|
|
55
|
+
Projects are detected by finding LLM.md files, with databases stored in .hanzo/db
|
|
56
|
+
directories alongside them. Use this to build searchable knowledge bases per project."""
|
|
57
|
+
|
|
58
|
+
async def call(
|
|
59
|
+
self,
|
|
60
|
+
ctx: MCPContext,
|
|
61
|
+
**params: Unpack[VectorIndexParams],
|
|
62
|
+
) -> str:
|
|
63
|
+
"""Index content or files in the vector database.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
ctx: MCP context
|
|
67
|
+
**params: Tool parameters
|
|
68
|
+
|
|
69
|
+
Returns:
|
|
70
|
+
Indexing result message
|
|
71
|
+
"""
|
|
72
|
+
file_path = params.get("file_path")
|
|
73
|
+
content = params.get("content")
|
|
74
|
+
chunk_size = params.get("chunk_size", 1000)
|
|
75
|
+
chunk_overlap = params.get("chunk_overlap", 200)
|
|
76
|
+
metadata = params.get("metadata", {})
|
|
77
|
+
|
|
78
|
+
if not file_path and not content:
|
|
79
|
+
return "Error: Either file_path or content must be provided"
|
|
80
|
+
|
|
81
|
+
try:
|
|
82
|
+
if file_path:
|
|
83
|
+
# Validate file access
|
|
84
|
+
# Use permission manager's existing validation
|
|
85
|
+
if not self.permission_manager.is_path_allowed(file_path):
|
|
86
|
+
return f"Error: Access denied to path {file_path}"
|
|
87
|
+
|
|
88
|
+
if not Path(file_path).exists():
|
|
89
|
+
return f"Error: File does not exist: {file_path}"
|
|
90
|
+
|
|
91
|
+
# Index file using project-aware manager
|
|
92
|
+
doc_ids, project_info = self.project_manager.add_file_to_appropriate_store(
|
|
93
|
+
file_path=file_path,
|
|
94
|
+
chunk_size=chunk_size,
|
|
95
|
+
chunk_overlap=chunk_overlap,
|
|
96
|
+
metadata=metadata,
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
file_name = Path(file_path).name
|
|
100
|
+
if project_info:
|
|
101
|
+
return f"Successfully indexed {file_name} with {len(doc_ids)} chunks in project '{project_info.name}'"
|
|
102
|
+
else:
|
|
103
|
+
return f"Successfully indexed {file_name} with {len(doc_ids)} chunks in global database"
|
|
104
|
+
|
|
105
|
+
else:
|
|
106
|
+
# Index content directly in global store (no project context)
|
|
107
|
+
global_store = self.project_manager._get_global_store()
|
|
108
|
+
doc_id = global_store.add_document(
|
|
109
|
+
content=content,
|
|
110
|
+
metadata=metadata,
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
return f"Successfully indexed content as document {doc_id} in global database"
|
|
114
|
+
|
|
115
|
+
except Exception as e:
|
|
116
|
+
return f"Error indexing content: {str(e)}"
|