hanzo-mcp 0.3.8__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hanzo-mcp might be problematic. Click here for more details.
- hanzo_mcp/__init__.py +1 -1
- hanzo_mcp/cli.py +118 -170
- hanzo_mcp/cli_enhanced.py +438 -0
- hanzo_mcp/config/__init__.py +19 -0
- hanzo_mcp/config/settings.py +388 -0
- hanzo_mcp/config/tool_config.py +197 -0
- hanzo_mcp/prompts/__init__.py +117 -0
- hanzo_mcp/prompts/compact_conversation.py +77 -0
- hanzo_mcp/prompts/create_release.py +38 -0
- hanzo_mcp/prompts/project_system.py +120 -0
- hanzo_mcp/prompts/project_todo_reminder.py +111 -0
- hanzo_mcp/prompts/utils.py +286 -0
- hanzo_mcp/server.py +117 -99
- hanzo_mcp/tools/__init__.py +105 -32
- hanzo_mcp/tools/agent/__init__.py +8 -11
- hanzo_mcp/tools/agent/agent_tool.py +290 -224
- hanzo_mcp/tools/agent/prompt.py +16 -13
- hanzo_mcp/tools/agent/tool_adapter.py +9 -9
- hanzo_mcp/tools/common/__init__.py +17 -16
- hanzo_mcp/tools/common/base.py +79 -110
- hanzo_mcp/tools/common/batch_tool.py +330 -0
- hanzo_mcp/tools/common/context.py +26 -292
- hanzo_mcp/tools/common/permissions.py +12 -12
- hanzo_mcp/tools/common/thinking_tool.py +153 -0
- hanzo_mcp/tools/common/validation.py +1 -63
- hanzo_mcp/tools/filesystem/__init__.py +88 -57
- hanzo_mcp/tools/filesystem/base.py +32 -24
- hanzo_mcp/tools/filesystem/content_replace.py +114 -107
- hanzo_mcp/tools/filesystem/directory_tree.py +129 -105
- hanzo_mcp/tools/filesystem/edit.py +279 -0
- hanzo_mcp/tools/filesystem/grep.py +458 -0
- hanzo_mcp/tools/filesystem/grep_ast_tool.py +250 -0
- hanzo_mcp/tools/filesystem/multi_edit.py +362 -0
- hanzo_mcp/tools/filesystem/read.py +255 -0
- hanzo_mcp/tools/filesystem/write.py +156 -0
- hanzo_mcp/tools/jupyter/__init__.py +41 -29
- hanzo_mcp/tools/jupyter/base.py +66 -57
- hanzo_mcp/tools/jupyter/{edit_notebook.py → notebook_edit.py} +162 -139
- hanzo_mcp/tools/jupyter/notebook_read.py +152 -0
- hanzo_mcp/tools/shell/__init__.py +29 -20
- hanzo_mcp/tools/shell/base.py +87 -45
- hanzo_mcp/tools/shell/bash_session.py +731 -0
- hanzo_mcp/tools/shell/bash_session_executor.py +295 -0
- hanzo_mcp/tools/shell/command_executor.py +435 -384
- hanzo_mcp/tools/shell/run_command.py +284 -131
- hanzo_mcp/tools/shell/run_command_windows.py +328 -0
- hanzo_mcp/tools/shell/session_manager.py +196 -0
- hanzo_mcp/tools/shell/session_storage.py +325 -0
- hanzo_mcp/tools/todo/__init__.py +66 -0
- hanzo_mcp/tools/todo/base.py +319 -0
- hanzo_mcp/tools/todo/todo_read.py +148 -0
- hanzo_mcp/tools/todo/todo_write.py +378 -0
- hanzo_mcp/tools/vector/__init__.py +95 -0
- hanzo_mcp/tools/vector/infinity_store.py +365 -0
- hanzo_mcp/tools/vector/project_manager.py +361 -0
- hanzo_mcp/tools/vector/vector_index.py +115 -0
- hanzo_mcp/tools/vector/vector_search.py +215 -0
- {hanzo_mcp-0.3.8.dist-info → hanzo_mcp-0.5.0.dist-info}/METADATA +33 -1
- hanzo_mcp-0.5.0.dist-info/RECORD +63 -0
- {hanzo_mcp-0.3.8.dist-info → hanzo_mcp-0.5.0.dist-info}/WHEEL +1 -1
- hanzo_mcp/tools/agent/base_provider.py +0 -73
- hanzo_mcp/tools/agent/litellm_provider.py +0 -45
- hanzo_mcp/tools/agent/lmstudio_agent.py +0 -385
- hanzo_mcp/tools/agent/lmstudio_provider.py +0 -219
- hanzo_mcp/tools/agent/provider_registry.py +0 -120
- hanzo_mcp/tools/common/error_handling.py +0 -86
- hanzo_mcp/tools/common/logging_config.py +0 -115
- hanzo_mcp/tools/common/session.py +0 -91
- hanzo_mcp/tools/common/think_tool.py +0 -123
- hanzo_mcp/tools/common/version_tool.py +0 -120
- hanzo_mcp/tools/filesystem/edit_file.py +0 -287
- hanzo_mcp/tools/filesystem/get_file_info.py +0 -170
- hanzo_mcp/tools/filesystem/read_files.py +0 -199
- hanzo_mcp/tools/filesystem/search_content.py +0 -275
- hanzo_mcp/tools/filesystem/write_file.py +0 -162
- hanzo_mcp/tools/jupyter/notebook_operations.py +0 -514
- hanzo_mcp/tools/jupyter/read_notebook.py +0 -165
- hanzo_mcp/tools/project/__init__.py +0 -64
- hanzo_mcp/tools/project/analysis.py +0 -886
- hanzo_mcp/tools/project/base.py +0 -66
- hanzo_mcp/tools/project/project_analyze.py +0 -173
- hanzo_mcp/tools/shell/run_script.py +0 -215
- hanzo_mcp/tools/shell/script_tool.py +0 -244
- hanzo_mcp-0.3.8.dist-info/RECORD +0 -53
- {hanzo_mcp-0.3.8.dist-info → hanzo_mcp-0.5.0.dist-info}/entry_points.txt +0 -0
- {hanzo_mcp-0.3.8.dist-info → hanzo_mcp-0.5.0.dist-info}/licenses/LICENSE +0 -0
- {hanzo_mcp-0.3.8.dist-info → hanzo_mcp-0.5.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,365 @@
|
|
|
1
|
+
"""Infinity vector database integration for Hanzo MCP."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import hashlib
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
|
|
9
|
+
try:
|
|
10
|
+
import infinity_embedded
|
|
11
|
+
INFINITY_AVAILABLE = True
|
|
12
|
+
except ImportError:
|
|
13
|
+
INFINITY_AVAILABLE = False
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass
|
|
17
|
+
class Document:
|
|
18
|
+
"""Document representation for vector storage."""
|
|
19
|
+
id: str
|
|
20
|
+
content: str
|
|
21
|
+
metadata: Dict[str, Any]
|
|
22
|
+
file_path: Optional[str] = None
|
|
23
|
+
chunk_index: Optional[int] = None
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@dataclass
|
|
27
|
+
class SearchResult:
|
|
28
|
+
"""Search result from vector database."""
|
|
29
|
+
document: Document
|
|
30
|
+
score: float
|
|
31
|
+
distance: float
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class InfinityVectorStore:
|
|
35
|
+
"""Local vector database using Infinity."""
|
|
36
|
+
|
|
37
|
+
def __init__(
|
|
38
|
+
self,
|
|
39
|
+
data_path: Optional[str] = None,
|
|
40
|
+
embedding_model: str = "text-embedding-3-small",
|
|
41
|
+
dimension: int = 1536, # Default for OpenAI text-embedding-3-small
|
|
42
|
+
):
|
|
43
|
+
"""Initialize the Infinity vector store.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
data_path: Path to store vector database (default: ~/.config/hanzo/vector-store)
|
|
47
|
+
embedding_model: Embedding model to use
|
|
48
|
+
dimension: Vector dimension (must match embedding model)
|
|
49
|
+
"""
|
|
50
|
+
if not INFINITY_AVAILABLE:
|
|
51
|
+
raise ImportError("infinity_embedded is required for vector store functionality")
|
|
52
|
+
|
|
53
|
+
# Set up data path
|
|
54
|
+
if data_path:
|
|
55
|
+
self.data_path = Path(data_path)
|
|
56
|
+
else:
|
|
57
|
+
from hanzo_mcp.config.settings import get_config_dir
|
|
58
|
+
self.data_path = get_config_dir() / "vector-store"
|
|
59
|
+
|
|
60
|
+
self.data_path.mkdir(parents=True, exist_ok=True)
|
|
61
|
+
|
|
62
|
+
self.embedding_model = embedding_model
|
|
63
|
+
self.dimension = dimension
|
|
64
|
+
|
|
65
|
+
# Connect to Infinity
|
|
66
|
+
self.infinity = infinity_embedded.connect(str(self.data_path))
|
|
67
|
+
self.db = self.infinity.get_database("hanzo_mcp")
|
|
68
|
+
|
|
69
|
+
# Initialize tables
|
|
70
|
+
self._initialize_tables()
|
|
71
|
+
|
|
72
|
+
def _initialize_tables(self):
|
|
73
|
+
"""Initialize database tables if they don't exist."""
|
|
74
|
+
# Documents table
|
|
75
|
+
try:
|
|
76
|
+
self.documents_table = self.db.get_table("documents")
|
|
77
|
+
except:
|
|
78
|
+
self.documents_table = self.db.create_table(
|
|
79
|
+
"documents",
|
|
80
|
+
{
|
|
81
|
+
"id": {"type": "varchar"},
|
|
82
|
+
"content": {"type": "varchar"},
|
|
83
|
+
"file_path": {"type": "varchar"},
|
|
84
|
+
"chunk_index": {"type": "integer"},
|
|
85
|
+
"metadata": {"type": "varchar"}, # JSON string
|
|
86
|
+
"embedding": {"type": f"vector,{self.dimension},float"},
|
|
87
|
+
}
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
def _generate_doc_id(self, content: str, file_path: str = "", chunk_index: int = 0) -> str:
|
|
91
|
+
"""Generate a unique document ID."""
|
|
92
|
+
content_hash = hashlib.sha256(content.encode()).hexdigest()[:16]
|
|
93
|
+
path_hash = hashlib.sha256(file_path.encode()).hexdigest()[:8]
|
|
94
|
+
return f"doc_{path_hash}_{chunk_index}_{content_hash}"
|
|
95
|
+
|
|
96
|
+
def add_document(
|
|
97
|
+
self,
|
|
98
|
+
content: str,
|
|
99
|
+
metadata: Dict[str, Any] = None,
|
|
100
|
+
file_path: Optional[str] = None,
|
|
101
|
+
chunk_index: int = 0,
|
|
102
|
+
embedding: Optional[List[float]] = None,
|
|
103
|
+
) -> str:
|
|
104
|
+
"""Add a document to the vector store.
|
|
105
|
+
|
|
106
|
+
Args:
|
|
107
|
+
content: Document content
|
|
108
|
+
metadata: Additional metadata
|
|
109
|
+
file_path: Source file path
|
|
110
|
+
chunk_index: Chunk index if document is part of larger file
|
|
111
|
+
embedding: Pre-computed embedding (if None, will compute)
|
|
112
|
+
|
|
113
|
+
Returns:
|
|
114
|
+
Document ID
|
|
115
|
+
"""
|
|
116
|
+
doc_id = self._generate_doc_id(content, file_path or "", chunk_index)
|
|
117
|
+
|
|
118
|
+
# Generate embedding if not provided
|
|
119
|
+
if embedding is None:
|
|
120
|
+
embedding = self._generate_embedding(content)
|
|
121
|
+
|
|
122
|
+
# Prepare metadata
|
|
123
|
+
metadata = metadata or {}
|
|
124
|
+
metadata_json = json.dumps(metadata)
|
|
125
|
+
|
|
126
|
+
# Insert document
|
|
127
|
+
self.documents_table.insert([{
|
|
128
|
+
"id": doc_id,
|
|
129
|
+
"content": content,
|
|
130
|
+
"file_path": file_path or "",
|
|
131
|
+
"chunk_index": chunk_index,
|
|
132
|
+
"metadata": metadata_json,
|
|
133
|
+
"embedding": embedding,
|
|
134
|
+
}])
|
|
135
|
+
|
|
136
|
+
return doc_id
|
|
137
|
+
|
|
138
|
+
def add_file(
|
|
139
|
+
self,
|
|
140
|
+
file_path: str,
|
|
141
|
+
chunk_size: int = 1000,
|
|
142
|
+
chunk_overlap: int = 200,
|
|
143
|
+
metadata: Dict[str, Any] = None,
|
|
144
|
+
) -> List[str]:
|
|
145
|
+
"""Add a file to the vector store by chunking it.
|
|
146
|
+
|
|
147
|
+
Args:
|
|
148
|
+
file_path: Path to the file to add
|
|
149
|
+
chunk_size: Maximum characters per chunk
|
|
150
|
+
chunk_overlap: Characters to overlap between chunks
|
|
151
|
+
metadata: Additional metadata for all chunks
|
|
152
|
+
|
|
153
|
+
Returns:
|
|
154
|
+
List of document IDs for all chunks
|
|
155
|
+
"""
|
|
156
|
+
path = Path(file_path)
|
|
157
|
+
if not path.exists():
|
|
158
|
+
raise FileNotFoundError(f"File not found: {file_path}")
|
|
159
|
+
|
|
160
|
+
# Read file content
|
|
161
|
+
try:
|
|
162
|
+
content = path.read_text(encoding='utf-8')
|
|
163
|
+
except UnicodeDecodeError:
|
|
164
|
+
# Try with different encoding
|
|
165
|
+
content = path.read_text(encoding='latin-1')
|
|
166
|
+
|
|
167
|
+
# Chunk the content
|
|
168
|
+
chunks = self._chunk_text(content, chunk_size, chunk_overlap)
|
|
169
|
+
|
|
170
|
+
# Add metadata
|
|
171
|
+
file_metadata = metadata or {}
|
|
172
|
+
file_metadata.update({
|
|
173
|
+
"file_name": path.name,
|
|
174
|
+
"file_extension": path.suffix,
|
|
175
|
+
"file_size": path.stat().st_size,
|
|
176
|
+
})
|
|
177
|
+
|
|
178
|
+
# Add each chunk
|
|
179
|
+
doc_ids = []
|
|
180
|
+
for i, chunk in enumerate(chunks):
|
|
181
|
+
chunk_metadata = file_metadata.copy()
|
|
182
|
+
chunk_metadata["chunk_number"] = i
|
|
183
|
+
chunk_metadata["total_chunks"] = len(chunks)
|
|
184
|
+
|
|
185
|
+
doc_id = self.add_document(
|
|
186
|
+
content=chunk,
|
|
187
|
+
metadata=chunk_metadata,
|
|
188
|
+
file_path=str(path),
|
|
189
|
+
chunk_index=i,
|
|
190
|
+
)
|
|
191
|
+
doc_ids.append(doc_id)
|
|
192
|
+
|
|
193
|
+
return doc_ids
|
|
194
|
+
|
|
195
|
+
def search(
|
|
196
|
+
self,
|
|
197
|
+
query: str,
|
|
198
|
+
limit: int = 10,
|
|
199
|
+
score_threshold: float = 0.0,
|
|
200
|
+
filters: Dict[str, Any] = None,
|
|
201
|
+
) -> List[SearchResult]:
|
|
202
|
+
"""Search for similar documents.
|
|
203
|
+
|
|
204
|
+
Args:
|
|
205
|
+
query: Search query
|
|
206
|
+
limit: Maximum number of results
|
|
207
|
+
score_threshold: Minimum similarity score
|
|
208
|
+
filters: Metadata filters (not yet implemented)
|
|
209
|
+
|
|
210
|
+
Returns:
|
|
211
|
+
List of search results
|
|
212
|
+
"""
|
|
213
|
+
# Generate query embedding
|
|
214
|
+
query_embedding = self._generate_embedding(query)
|
|
215
|
+
|
|
216
|
+
# Perform vector search
|
|
217
|
+
search_results = self.documents_table.output(["*"]).match_dense(
|
|
218
|
+
"embedding",
|
|
219
|
+
query_embedding,
|
|
220
|
+
"float",
|
|
221
|
+
"ip", # Inner product (cosine similarity)
|
|
222
|
+
limit
|
|
223
|
+
).to_pl()
|
|
224
|
+
|
|
225
|
+
# Convert to SearchResult objects
|
|
226
|
+
results = []
|
|
227
|
+
for row in search_results.iter_rows(named=True):
|
|
228
|
+
# Parse metadata
|
|
229
|
+
try:
|
|
230
|
+
metadata = json.loads(row["metadata"])
|
|
231
|
+
except:
|
|
232
|
+
metadata = {}
|
|
233
|
+
|
|
234
|
+
# Create document
|
|
235
|
+
document = Document(
|
|
236
|
+
id=row["id"],
|
|
237
|
+
content=row["content"],
|
|
238
|
+
metadata=metadata,
|
|
239
|
+
file_path=row["file_path"] if row["file_path"] else None,
|
|
240
|
+
chunk_index=row["chunk_index"],
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
# Score is the similarity (higher is better)
|
|
244
|
+
score = row.get("score", 0.0)
|
|
245
|
+
distance = 1.0 - score # Convert similarity to distance
|
|
246
|
+
|
|
247
|
+
if score >= score_threshold:
|
|
248
|
+
results.append(SearchResult(
|
|
249
|
+
document=document,
|
|
250
|
+
score=score,
|
|
251
|
+
distance=distance,
|
|
252
|
+
))
|
|
253
|
+
|
|
254
|
+
return results
|
|
255
|
+
|
|
256
|
+
def delete_document(self, doc_id: str) -> bool:
|
|
257
|
+
"""Delete a document by ID.
|
|
258
|
+
|
|
259
|
+
Args:
|
|
260
|
+
doc_id: Document ID to delete
|
|
261
|
+
|
|
262
|
+
Returns:
|
|
263
|
+
True if document was deleted
|
|
264
|
+
"""
|
|
265
|
+
try:
|
|
266
|
+
self.documents_table.delete(f"id = '{doc_id}'")
|
|
267
|
+
return True
|
|
268
|
+
except:
|
|
269
|
+
return False
|
|
270
|
+
|
|
271
|
+
def delete_file(self, file_path: str) -> int:
|
|
272
|
+
"""Delete all documents from a specific file.
|
|
273
|
+
|
|
274
|
+
Args:
|
|
275
|
+
file_path: File path to delete documents for
|
|
276
|
+
|
|
277
|
+
Returns:
|
|
278
|
+
Number of documents deleted
|
|
279
|
+
"""
|
|
280
|
+
try:
|
|
281
|
+
# Get count first
|
|
282
|
+
results = self.documents_table.output(["id"]).filter(f"file_path = '{file_path}'").to_pl()
|
|
283
|
+
count = len(results)
|
|
284
|
+
|
|
285
|
+
# Delete all documents for this file
|
|
286
|
+
self.documents_table.delete(f"file_path = '{file_path}'")
|
|
287
|
+
return count
|
|
288
|
+
except:
|
|
289
|
+
return 0
|
|
290
|
+
|
|
291
|
+
def list_files(self) -> List[Dict[str, Any]]:
|
|
292
|
+
"""List all indexed files.
|
|
293
|
+
|
|
294
|
+
Returns:
|
|
295
|
+
List of file information
|
|
296
|
+
"""
|
|
297
|
+
try:
|
|
298
|
+
results = self.documents_table.output(["file_path", "metadata"]).to_pl()
|
|
299
|
+
|
|
300
|
+
files = {}
|
|
301
|
+
for row in results.iter_rows(named=True):
|
|
302
|
+
file_path = row["file_path"]
|
|
303
|
+
if file_path and file_path not in files:
|
|
304
|
+
try:
|
|
305
|
+
metadata = json.loads(row["metadata"])
|
|
306
|
+
files[file_path] = {
|
|
307
|
+
"file_path": file_path,
|
|
308
|
+
"file_name": metadata.get("file_name", Path(file_path).name),
|
|
309
|
+
"file_size": metadata.get("file_size", 0),
|
|
310
|
+
"total_chunks": metadata.get("total_chunks", 1),
|
|
311
|
+
}
|
|
312
|
+
except:
|
|
313
|
+
files[file_path] = {
|
|
314
|
+
"file_path": file_path,
|
|
315
|
+
"file_name": Path(file_path).name,
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
return list(files.values())
|
|
319
|
+
except:
|
|
320
|
+
return []
|
|
321
|
+
|
|
322
|
+
def _chunk_text(self, text: str, chunk_size: int, overlap: int) -> List[str]:
|
|
323
|
+
"""Split text into overlapping chunks."""
|
|
324
|
+
if len(text) <= chunk_size:
|
|
325
|
+
return [text]
|
|
326
|
+
|
|
327
|
+
chunks = []
|
|
328
|
+
start = 0
|
|
329
|
+
|
|
330
|
+
while start < len(text):
|
|
331
|
+
end = start + chunk_size
|
|
332
|
+
|
|
333
|
+
# Try to break at word boundary
|
|
334
|
+
if end < len(text):
|
|
335
|
+
# Look back for a good break point
|
|
336
|
+
break_point = end
|
|
337
|
+
for i in range(end - 100, start + 100, -1):
|
|
338
|
+
if i > 0 and text[i] in '\n\r.!?':
|
|
339
|
+
break_point = i + 1
|
|
340
|
+
break
|
|
341
|
+
end = break_point
|
|
342
|
+
|
|
343
|
+
chunk = text[start:end].strip()
|
|
344
|
+
if chunk:
|
|
345
|
+
chunks.append(chunk)
|
|
346
|
+
|
|
347
|
+
start = max(start + chunk_size - overlap, end)
|
|
348
|
+
|
|
349
|
+
return chunks
|
|
350
|
+
|
|
351
|
+
def _generate_embedding(self, text: str) -> List[float]:
|
|
352
|
+
"""Generate embedding for text.
|
|
353
|
+
|
|
354
|
+
For now, this returns a dummy embedding. In a real implementation,
|
|
355
|
+
you would call an embedding API (OpenAI, Cohere, etc.) or use a local model.
|
|
356
|
+
"""
|
|
357
|
+
# This is a placeholder - you would implement actual embedding generation here
|
|
358
|
+
# For now, return a random embedding of the correct dimension
|
|
359
|
+
import random
|
|
360
|
+
return [random.random() for _ in range(self.dimension)]
|
|
361
|
+
|
|
362
|
+
def close(self):
|
|
363
|
+
"""Close the database connection."""
|
|
364
|
+
if hasattr(self, 'infinity'):
|
|
365
|
+
self.infinity.disconnect()
|