hanzo-mcp 0.5.0__py3-none-any.whl → 0.5.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hanzo-mcp might be problematic. Click here for more details.
- hanzo_mcp/__init__.py +1 -1
- hanzo_mcp/config/settings.py +61 -0
- hanzo_mcp/tools/__init__.py +158 -12
- hanzo_mcp/tools/common/base.py +7 -2
- hanzo_mcp/tools/common/config_tool.py +396 -0
- hanzo_mcp/tools/common/stats.py +261 -0
- hanzo_mcp/tools/common/tool_disable.py +144 -0
- hanzo_mcp/tools/common/tool_enable.py +182 -0
- hanzo_mcp/tools/common/tool_list.py +263 -0
- hanzo_mcp/tools/database/__init__.py +71 -0
- hanzo_mcp/tools/database/database_manager.py +246 -0
- hanzo_mcp/tools/database/graph_add.py +257 -0
- hanzo_mcp/tools/database/graph_query.py +536 -0
- hanzo_mcp/tools/database/graph_remove.py +267 -0
- hanzo_mcp/tools/database/graph_search.py +348 -0
- hanzo_mcp/tools/database/graph_stats.py +345 -0
- hanzo_mcp/tools/database/sql_query.py +229 -0
- hanzo_mcp/tools/database/sql_search.py +296 -0
- hanzo_mcp/tools/database/sql_stats.py +254 -0
- hanzo_mcp/tools/editor/__init__.py +11 -0
- hanzo_mcp/tools/editor/neovim_command.py +272 -0
- hanzo_mcp/tools/editor/neovim_edit.py +290 -0
- hanzo_mcp/tools/editor/neovim_session.py +356 -0
- hanzo_mcp/tools/filesystem/__init__.py +20 -1
- hanzo_mcp/tools/filesystem/batch_search.py +812 -0
- hanzo_mcp/tools/filesystem/find_files.py +348 -0
- hanzo_mcp/tools/filesystem/git_search.py +505 -0
- hanzo_mcp/tools/llm/__init__.py +27 -0
- hanzo_mcp/tools/llm/consensus_tool.py +351 -0
- hanzo_mcp/tools/llm/llm_manage.py +413 -0
- hanzo_mcp/tools/llm/llm_tool.py +346 -0
- hanzo_mcp/tools/llm/provider_tools.py +412 -0
- hanzo_mcp/tools/mcp/__init__.py +11 -0
- hanzo_mcp/tools/mcp/mcp_add.py +263 -0
- hanzo_mcp/tools/mcp/mcp_remove.py +127 -0
- hanzo_mcp/tools/mcp/mcp_stats.py +165 -0
- hanzo_mcp/tools/shell/__init__.py +27 -7
- hanzo_mcp/tools/shell/logs.py +265 -0
- hanzo_mcp/tools/shell/npx.py +194 -0
- hanzo_mcp/tools/shell/npx_background.py +254 -0
- hanzo_mcp/tools/shell/pkill.py +262 -0
- hanzo_mcp/tools/shell/processes.py +279 -0
- hanzo_mcp/tools/shell/run_background.py +326 -0
- hanzo_mcp/tools/shell/uvx.py +187 -0
- hanzo_mcp/tools/shell/uvx_background.py +249 -0
- hanzo_mcp/tools/vector/__init__.py +21 -12
- hanzo_mcp/tools/vector/ast_analyzer.py +459 -0
- hanzo_mcp/tools/vector/git_ingester.py +485 -0
- hanzo_mcp/tools/vector/index_tool.py +358 -0
- hanzo_mcp/tools/vector/infinity_store.py +465 -1
- hanzo_mcp/tools/vector/mock_infinity.py +162 -0
- hanzo_mcp/tools/vector/vector_index.py +7 -6
- hanzo_mcp/tools/vector/vector_search.py +22 -7
- {hanzo_mcp-0.5.0.dist-info → hanzo_mcp-0.5.2.dist-info}/METADATA +68 -20
- hanzo_mcp-0.5.2.dist-info/RECORD +106 -0
- hanzo_mcp-0.5.0.dist-info/RECORD +0 -63
- {hanzo_mcp-0.5.0.dist-info → hanzo_mcp-0.5.2.dist-info}/WHEEL +0 -0
- {hanzo_mcp-0.5.0.dist-info → hanzo_mcp-0.5.2.dist-info}/entry_points.txt +0 -0
- {hanzo_mcp-0.5.0.dist-info → hanzo_mcp-0.5.2.dist-info}/licenses/LICENSE +0 -0
- {hanzo_mcp-0.5.0.dist-info → hanzo_mcp-0.5.2.dist-info}/top_level.txt +0 -0
|
@@ -10,7 +10,11 @@ try:
|
|
|
10
10
|
import infinity_embedded
|
|
11
11
|
INFINITY_AVAILABLE = True
|
|
12
12
|
except ImportError:
|
|
13
|
-
|
|
13
|
+
# Use mock implementation when infinity_embedded is not available
|
|
14
|
+
from . import mock_infinity as infinity_embedded
|
|
15
|
+
INFINITY_AVAILABLE = True # Mock is always available
|
|
16
|
+
|
|
17
|
+
from .ast_analyzer import ASTAnalyzer, FileAST, Symbol, create_symbol_embedding_text
|
|
14
18
|
|
|
15
19
|
|
|
16
20
|
@dataclass
|
|
@@ -31,6 +35,27 @@ class SearchResult:
|
|
|
31
35
|
distance: float
|
|
32
36
|
|
|
33
37
|
|
|
38
|
+
@dataclass
|
|
39
|
+
class SymbolSearchResult:
|
|
40
|
+
"""Search result for symbols."""
|
|
41
|
+
symbol: Symbol
|
|
42
|
+
score: float
|
|
43
|
+
context_document: Optional[Document] = None
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@dataclass
|
|
47
|
+
class UnifiedSearchResult:
|
|
48
|
+
"""Unified search result combining text, vector, and symbol search."""
|
|
49
|
+
type: str # 'document', 'symbol', 'reference'
|
|
50
|
+
content: str
|
|
51
|
+
file_path: str
|
|
52
|
+
line_start: int
|
|
53
|
+
line_end: int
|
|
54
|
+
score: float
|
|
55
|
+
search_type: str # 'text', 'vector', 'symbol', 'ast'
|
|
56
|
+
metadata: Dict[str, Any]
|
|
57
|
+
|
|
58
|
+
|
|
34
59
|
class InfinityVectorStore:
|
|
35
60
|
"""Local vector database using Infinity."""
|
|
36
61
|
|
|
@@ -62,6 +87,9 @@ class InfinityVectorStore:
|
|
|
62
87
|
self.embedding_model = embedding_model
|
|
63
88
|
self.dimension = dimension
|
|
64
89
|
|
|
90
|
+
# Initialize AST analyzer
|
|
91
|
+
self.ast_analyzer = ASTAnalyzer()
|
|
92
|
+
|
|
65
93
|
# Connect to Infinity
|
|
66
94
|
self.infinity = infinity_embedded.connect(str(self.data_path))
|
|
67
95
|
self.db = self.infinity.get_database("hanzo_mcp")
|
|
@@ -86,6 +114,60 @@ class InfinityVectorStore:
|
|
|
86
114
|
"embedding": {"type": f"vector,{self.dimension},float"},
|
|
87
115
|
}
|
|
88
116
|
)
|
|
117
|
+
|
|
118
|
+
# Symbols table for code symbols
|
|
119
|
+
try:
|
|
120
|
+
self.symbols_table = self.db.get_table("symbols")
|
|
121
|
+
except:
|
|
122
|
+
self.symbols_table = self.db.create_table(
|
|
123
|
+
"symbols",
|
|
124
|
+
{
|
|
125
|
+
"id": {"type": "varchar"},
|
|
126
|
+
"name": {"type": "varchar"},
|
|
127
|
+
"type": {"type": "varchar"}, # function, class, variable, etc.
|
|
128
|
+
"file_path": {"type": "varchar"},
|
|
129
|
+
"line_start": {"type": "integer"},
|
|
130
|
+
"line_end": {"type": "integer"},
|
|
131
|
+
"scope": {"type": "varchar"},
|
|
132
|
+
"parent": {"type": "varchar"},
|
|
133
|
+
"signature": {"type": "varchar"},
|
|
134
|
+
"docstring": {"type": "varchar"},
|
|
135
|
+
"metadata": {"type": "varchar"}, # JSON string
|
|
136
|
+
"embedding": {"type": f"vector,{self.dimension},float"},
|
|
137
|
+
}
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
# AST table for storing complete file ASTs
|
|
141
|
+
try:
|
|
142
|
+
self.ast_table = self.db.get_table("ast_files")
|
|
143
|
+
except:
|
|
144
|
+
self.ast_table = self.db.create_table(
|
|
145
|
+
"ast_files",
|
|
146
|
+
{
|
|
147
|
+
"file_path": {"type": "varchar"},
|
|
148
|
+
"file_hash": {"type": "varchar"},
|
|
149
|
+
"language": {"type": "varchar"},
|
|
150
|
+
"ast_data": {"type": "varchar"}, # JSON string of complete AST
|
|
151
|
+
"last_updated": {"type": "varchar"}, # ISO timestamp
|
|
152
|
+
}
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
# References table for cross-file references
|
|
156
|
+
try:
|
|
157
|
+
self.references_table = self.db.get_table("references")
|
|
158
|
+
except:
|
|
159
|
+
self.references_table = self.db.create_table(
|
|
160
|
+
"references",
|
|
161
|
+
{
|
|
162
|
+
"id": {"type": "varchar"},
|
|
163
|
+
"source_file": {"type": "varchar"},
|
|
164
|
+
"target_file": {"type": "varchar"},
|
|
165
|
+
"symbol_name": {"type": "varchar"},
|
|
166
|
+
"reference_type": {"type": "varchar"}, # import, call, inheritance, etc.
|
|
167
|
+
"line_number": {"type": "integer"},
|
|
168
|
+
"metadata": {"type": "varchar"}, # JSON string
|
|
169
|
+
}
|
|
170
|
+
)
|
|
89
171
|
|
|
90
172
|
def _generate_doc_id(self, content: str, file_path: str = "", chunk_index: int = 0) -> str:
|
|
91
173
|
"""Generate a unique document ID."""
|
|
@@ -192,6 +274,290 @@ class InfinityVectorStore:
|
|
|
192
274
|
|
|
193
275
|
return doc_ids
|
|
194
276
|
|
|
277
|
+
def add_file_with_ast(
|
|
278
|
+
self,
|
|
279
|
+
file_path: str,
|
|
280
|
+
chunk_size: int = 1000,
|
|
281
|
+
chunk_overlap: int = 200,
|
|
282
|
+
metadata: Dict[str, Any] = None,
|
|
283
|
+
) -> Tuple[List[str], Optional[FileAST]]:
|
|
284
|
+
"""Add a file with full AST analysis and symbol extraction.
|
|
285
|
+
|
|
286
|
+
Args:
|
|
287
|
+
file_path: Path to the file to add
|
|
288
|
+
chunk_size: Maximum characters per chunk for content
|
|
289
|
+
chunk_overlap: Characters to overlap between chunks
|
|
290
|
+
metadata: Additional metadata for all chunks
|
|
291
|
+
|
|
292
|
+
Returns:
|
|
293
|
+
Tuple of (document IDs for content chunks, FileAST object)
|
|
294
|
+
"""
|
|
295
|
+
path = Path(file_path)
|
|
296
|
+
if not path.exists():
|
|
297
|
+
raise FileNotFoundError(f"File not found: {file_path}")
|
|
298
|
+
|
|
299
|
+
# First add file content using existing method
|
|
300
|
+
doc_ids = self.add_file(file_path, chunk_size, chunk_overlap, metadata)
|
|
301
|
+
|
|
302
|
+
# Analyze AST and symbols
|
|
303
|
+
file_ast = self.ast_analyzer.analyze_file(file_path)
|
|
304
|
+
if not file_ast:
|
|
305
|
+
return doc_ids, None
|
|
306
|
+
|
|
307
|
+
# Store complete AST
|
|
308
|
+
self._store_file_ast(file_ast)
|
|
309
|
+
|
|
310
|
+
# Store individual symbols with embeddings
|
|
311
|
+
self._store_symbols(file_ast.symbols)
|
|
312
|
+
|
|
313
|
+
# Store cross-references
|
|
314
|
+
self._store_references(file_ast)
|
|
315
|
+
|
|
316
|
+
return doc_ids, file_ast
|
|
317
|
+
|
|
318
|
+
def _store_file_ast(self, file_ast: FileAST):
|
|
319
|
+
"""Store complete file AST information."""
|
|
320
|
+
from datetime import datetime
|
|
321
|
+
|
|
322
|
+
# Remove existing AST for this file
|
|
323
|
+
try:
|
|
324
|
+
self.ast_table.delete(f"file_path = '{file_ast.file_path}'")
|
|
325
|
+
except:
|
|
326
|
+
pass
|
|
327
|
+
|
|
328
|
+
# Insert new AST
|
|
329
|
+
self.ast_table.insert([{
|
|
330
|
+
"file_path": file_ast.file_path,
|
|
331
|
+
"file_hash": file_ast.file_hash,
|
|
332
|
+
"language": file_ast.language,
|
|
333
|
+
"ast_data": json.dumps(file_ast.to_dict()),
|
|
334
|
+
"last_updated": datetime.now().isoformat(),
|
|
335
|
+
}])
|
|
336
|
+
|
|
337
|
+
def _store_symbols(self, symbols: List[Symbol]):
|
|
338
|
+
"""Store symbols with vector embeddings."""
|
|
339
|
+
if not symbols:
|
|
340
|
+
return
|
|
341
|
+
|
|
342
|
+
# Remove existing symbols for these files
|
|
343
|
+
file_paths = list(set(symbol.file_path for symbol in symbols))
|
|
344
|
+
for file_path in file_paths:
|
|
345
|
+
try:
|
|
346
|
+
self.symbols_table.delete(f"file_path = '{file_path}'")
|
|
347
|
+
except:
|
|
348
|
+
pass
|
|
349
|
+
|
|
350
|
+
# Insert new symbols
|
|
351
|
+
symbol_records = []
|
|
352
|
+
for symbol in symbols:
|
|
353
|
+
# Create embedding text for symbol
|
|
354
|
+
embedding_text = create_symbol_embedding_text(symbol)
|
|
355
|
+
embedding = self._generate_embedding(embedding_text)
|
|
356
|
+
|
|
357
|
+
# Generate symbol ID
|
|
358
|
+
symbol_id = self._generate_symbol_id(symbol)
|
|
359
|
+
|
|
360
|
+
# Prepare metadata
|
|
361
|
+
symbol_metadata = {
|
|
362
|
+
"references": symbol.references,
|
|
363
|
+
"embedding_text": embedding_text,
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
symbol_records.append({
|
|
367
|
+
"id": symbol_id,
|
|
368
|
+
"name": symbol.name,
|
|
369
|
+
"type": symbol.type,
|
|
370
|
+
"file_path": symbol.file_path,
|
|
371
|
+
"line_start": symbol.line_start,
|
|
372
|
+
"line_end": symbol.line_end,
|
|
373
|
+
"scope": symbol.scope or "",
|
|
374
|
+
"parent": symbol.parent or "",
|
|
375
|
+
"signature": symbol.signature or "",
|
|
376
|
+
"docstring": symbol.docstring or "",
|
|
377
|
+
"metadata": json.dumps(symbol_metadata),
|
|
378
|
+
"embedding": embedding,
|
|
379
|
+
})
|
|
380
|
+
|
|
381
|
+
if symbol_records:
|
|
382
|
+
self.symbols_table.insert(symbol_records)
|
|
383
|
+
|
|
384
|
+
def _store_references(self, file_ast: FileAST):
|
|
385
|
+
"""Store cross-file references."""
|
|
386
|
+
if not file_ast.dependencies:
|
|
387
|
+
return
|
|
388
|
+
|
|
389
|
+
# Remove existing references for this file
|
|
390
|
+
try:
|
|
391
|
+
self.references_table.delete(f"source_file = '{file_ast.file_path}'")
|
|
392
|
+
except:
|
|
393
|
+
pass
|
|
394
|
+
|
|
395
|
+
# Insert new references
|
|
396
|
+
reference_records = []
|
|
397
|
+
for i, dependency in enumerate(file_ast.dependencies):
|
|
398
|
+
ref_id = f"{file_ast.file_path}_{dependency}_{i}"
|
|
399
|
+
reference_records.append({
|
|
400
|
+
"id": ref_id,
|
|
401
|
+
"source_file": file_ast.file_path,
|
|
402
|
+
"target_file": dependency,
|
|
403
|
+
"symbol_name": dependency,
|
|
404
|
+
"reference_type": "import",
|
|
405
|
+
"line_number": 0, # Could be enhanced to track actual line numbers
|
|
406
|
+
"metadata": json.dumps({}),
|
|
407
|
+
})
|
|
408
|
+
|
|
409
|
+
if reference_records:
|
|
410
|
+
self.references_table.insert(reference_records)
|
|
411
|
+
|
|
412
|
+
def _generate_symbol_id(self, symbol: Symbol) -> str:
|
|
413
|
+
"""Generate unique symbol ID."""
|
|
414
|
+
text = f"{symbol.file_path}_{symbol.type}_{symbol.name}_{symbol.line_start}"
|
|
415
|
+
return hashlib.sha256(text.encode()).hexdigest()[:16]
|
|
416
|
+
|
|
417
|
+
def search_symbols(
|
|
418
|
+
self,
|
|
419
|
+
query: str,
|
|
420
|
+
symbol_type: Optional[str] = None,
|
|
421
|
+
file_path: Optional[str] = None,
|
|
422
|
+
limit: int = 10,
|
|
423
|
+
score_threshold: float = 0.0,
|
|
424
|
+
) -> List[SymbolSearchResult]:
|
|
425
|
+
"""Search for symbols using vector similarity.
|
|
426
|
+
|
|
427
|
+
Args:
|
|
428
|
+
query: Search query
|
|
429
|
+
symbol_type: Filter by symbol type (function, class, variable, etc.)
|
|
430
|
+
file_path: Filter by file path
|
|
431
|
+
limit: Maximum number of results
|
|
432
|
+
score_threshold: Minimum similarity score
|
|
433
|
+
|
|
434
|
+
Returns:
|
|
435
|
+
List of symbol search results
|
|
436
|
+
"""
|
|
437
|
+
# Generate query embedding
|
|
438
|
+
query_embedding = self._generate_embedding(query)
|
|
439
|
+
|
|
440
|
+
# Build search query
|
|
441
|
+
search_query = self.symbols_table.output(["*"]).match_dense(
|
|
442
|
+
"embedding",
|
|
443
|
+
query_embedding,
|
|
444
|
+
"float",
|
|
445
|
+
"ip", # Inner product
|
|
446
|
+
limit * 2 # Get more results for filtering
|
|
447
|
+
)
|
|
448
|
+
|
|
449
|
+
# Apply filters
|
|
450
|
+
if symbol_type:
|
|
451
|
+
search_query = search_query.filter(f"type = '{symbol_type}'")
|
|
452
|
+
if file_path:
|
|
453
|
+
search_query = search_query.filter(f"file_path = '{file_path}'")
|
|
454
|
+
|
|
455
|
+
search_results = search_query.to_pl()
|
|
456
|
+
|
|
457
|
+
# Convert to SymbolSearchResult objects
|
|
458
|
+
results = []
|
|
459
|
+
for row in search_results.iter_rows(named=True):
|
|
460
|
+
score = row.get("score", 0.0)
|
|
461
|
+
if score >= score_threshold:
|
|
462
|
+
# Parse metadata
|
|
463
|
+
try:
|
|
464
|
+
metadata = json.loads(row["metadata"])
|
|
465
|
+
except:
|
|
466
|
+
metadata = {}
|
|
467
|
+
|
|
468
|
+
# Create Symbol object
|
|
469
|
+
symbol = Symbol(
|
|
470
|
+
name=row["name"],
|
|
471
|
+
type=row["type"],
|
|
472
|
+
file_path=row["file_path"],
|
|
473
|
+
line_start=row["line_start"],
|
|
474
|
+
line_end=row["line_end"],
|
|
475
|
+
column_start=0, # Not stored in table
|
|
476
|
+
column_end=0, # Not stored in table
|
|
477
|
+
scope=row["scope"],
|
|
478
|
+
parent=row["parent"] if row["parent"] else None,
|
|
479
|
+
docstring=row["docstring"] if row["docstring"] else None,
|
|
480
|
+
signature=row["signature"] if row["signature"] else None,
|
|
481
|
+
references=metadata.get("references", []),
|
|
482
|
+
)
|
|
483
|
+
|
|
484
|
+
results.append(SymbolSearchResult(
|
|
485
|
+
symbol=symbol,
|
|
486
|
+
score=score,
|
|
487
|
+
))
|
|
488
|
+
|
|
489
|
+
return results[:limit]
|
|
490
|
+
|
|
491
|
+
def search_ast_nodes(
|
|
492
|
+
self,
|
|
493
|
+
file_path: str,
|
|
494
|
+
node_type: Optional[str] = None,
|
|
495
|
+
node_name: Optional[str] = None,
|
|
496
|
+
) -> Optional[FileAST]:
|
|
497
|
+
"""Search AST nodes within a specific file.
|
|
498
|
+
|
|
499
|
+
Args:
|
|
500
|
+
file_path: File to search in
|
|
501
|
+
node_type: Filter by AST node type
|
|
502
|
+
node_name: Filter by node name
|
|
503
|
+
|
|
504
|
+
Returns:
|
|
505
|
+
FileAST object if file found, None otherwise
|
|
506
|
+
"""
|
|
507
|
+
try:
|
|
508
|
+
results = self.ast_table.output(["*"]).filter(f"file_path = '{file_path}'").to_pl()
|
|
509
|
+
|
|
510
|
+
if len(results) == 0:
|
|
511
|
+
return None
|
|
512
|
+
|
|
513
|
+
row = next(results.iter_rows(named=True))
|
|
514
|
+
ast_data = json.loads(row["ast_data"])
|
|
515
|
+
|
|
516
|
+
# Reconstruct FileAST object
|
|
517
|
+
file_ast = FileAST(
|
|
518
|
+
file_path=ast_data["file_path"],
|
|
519
|
+
file_hash=ast_data["file_hash"],
|
|
520
|
+
language=ast_data["language"],
|
|
521
|
+
symbols=[Symbol(**s) for s in ast_data["symbols"]],
|
|
522
|
+
ast_nodes=[], # Would need custom deserialization for ASTNode
|
|
523
|
+
imports=ast_data["imports"],
|
|
524
|
+
exports=ast_data["exports"],
|
|
525
|
+
dependencies=ast_data["dependencies"],
|
|
526
|
+
)
|
|
527
|
+
|
|
528
|
+
return file_ast
|
|
529
|
+
|
|
530
|
+
except Exception as e:
|
|
531
|
+
print(f"Error searching AST nodes: {e}")
|
|
532
|
+
return None
|
|
533
|
+
|
|
534
|
+
def get_file_references(self, file_path: str) -> List[Dict[str, Any]]:
|
|
535
|
+
"""Get all files that reference the given file.
|
|
536
|
+
|
|
537
|
+
Args:
|
|
538
|
+
file_path: File to find references for
|
|
539
|
+
|
|
540
|
+
Returns:
|
|
541
|
+
List of reference information
|
|
542
|
+
"""
|
|
543
|
+
try:
|
|
544
|
+
results = self.references_table.output(["*"]).filter(f"target_file = '{file_path}'").to_pl()
|
|
545
|
+
|
|
546
|
+
references = []
|
|
547
|
+
for row in results.iter_rows(named=True):
|
|
548
|
+
references.append({
|
|
549
|
+
"source_file": row["source_file"],
|
|
550
|
+
"symbol_name": row["symbol_name"],
|
|
551
|
+
"reference_type": row["reference_type"],
|
|
552
|
+
"line_number": row["line_number"],
|
|
553
|
+
})
|
|
554
|
+
|
|
555
|
+
return references
|
|
556
|
+
|
|
557
|
+
except Exception as e:
|
|
558
|
+
print(f"Error getting file references: {e}")
|
|
559
|
+
return []
|
|
560
|
+
|
|
195
561
|
def search(
|
|
196
562
|
self,
|
|
197
563
|
query: str,
|
|
@@ -359,6 +725,104 @@ class InfinityVectorStore:
|
|
|
359
725
|
import random
|
|
360
726
|
return [random.random() for _ in range(self.dimension)]
|
|
361
727
|
|
|
728
|
+
async def get_stats(self) -> Dict[str, Any]:
|
|
729
|
+
"""Get statistics about the vector store.
|
|
730
|
+
|
|
731
|
+
Returns:
|
|
732
|
+
Dictionary with statistics
|
|
733
|
+
"""
|
|
734
|
+
try:
|
|
735
|
+
# Get document count
|
|
736
|
+
doc_count_result = self.documents_table.output(["count(*)"]).to_pl()
|
|
737
|
+
doc_count = doc_count_result.item(0, 0) if len(doc_count_result) > 0 else 0
|
|
738
|
+
|
|
739
|
+
# Get unique file count
|
|
740
|
+
file_result = self.documents_table.output(["file_path"]).to_pl()
|
|
741
|
+
unique_files = set()
|
|
742
|
+
for row in file_result.iter_rows():
|
|
743
|
+
if row[0]:
|
|
744
|
+
unique_files.add(row[0])
|
|
745
|
+
|
|
746
|
+
# Get symbol count
|
|
747
|
+
symbol_count = 0
|
|
748
|
+
try:
|
|
749
|
+
symbol_result = self.symbols_table.output(["count(*)"]).to_pl()
|
|
750
|
+
symbol_count = symbol_result.item(0, 0) if len(symbol_result) > 0 else 0
|
|
751
|
+
except:
|
|
752
|
+
pass
|
|
753
|
+
|
|
754
|
+
# Get AST count
|
|
755
|
+
ast_count = 0
|
|
756
|
+
try:
|
|
757
|
+
ast_result = self.ast_table.output(["count(*)"]).to_pl()
|
|
758
|
+
ast_count = ast_result.item(0, 0) if len(ast_result) > 0 else 0
|
|
759
|
+
except:
|
|
760
|
+
pass
|
|
761
|
+
|
|
762
|
+
return {
|
|
763
|
+
"document_count": doc_count,
|
|
764
|
+
"vector_count": doc_count, # Each document has a vector
|
|
765
|
+
"unique_files": len(unique_files),
|
|
766
|
+
"symbol_count": symbol_count,
|
|
767
|
+
"ast_count": ast_count,
|
|
768
|
+
"database_name": self.db_name,
|
|
769
|
+
"table_name": "documents",
|
|
770
|
+
"dimension": self.dimension,
|
|
771
|
+
}
|
|
772
|
+
except Exception as e:
|
|
773
|
+
return {
|
|
774
|
+
"error": str(e),
|
|
775
|
+
"document_count": 0,
|
|
776
|
+
"vector_count": 0,
|
|
777
|
+
}
|
|
778
|
+
|
|
779
|
+
async def clear(self) -> bool:
|
|
780
|
+
"""Clear all data from the vector store.
|
|
781
|
+
|
|
782
|
+
Returns:
|
|
783
|
+
True if successful
|
|
784
|
+
"""
|
|
785
|
+
try:
|
|
786
|
+
# Delete all records from all tables
|
|
787
|
+
self.documents_table.delete()
|
|
788
|
+
|
|
789
|
+
try:
|
|
790
|
+
self.symbols_table.delete()
|
|
791
|
+
except:
|
|
792
|
+
pass
|
|
793
|
+
|
|
794
|
+
try:
|
|
795
|
+
self.ast_table.delete()
|
|
796
|
+
except:
|
|
797
|
+
pass
|
|
798
|
+
|
|
799
|
+
try:
|
|
800
|
+
self.references_table.delete()
|
|
801
|
+
except:
|
|
802
|
+
pass
|
|
803
|
+
|
|
804
|
+
return True
|
|
805
|
+
except Exception as e:
|
|
806
|
+
print(f"Error clearing vector store: {e}")
|
|
807
|
+
return False
|
|
808
|
+
|
|
809
|
+
async def index_document(
|
|
810
|
+
self,
|
|
811
|
+
content: str,
|
|
812
|
+
metadata: Dict[str, Any] = None,
|
|
813
|
+
) -> str:
|
|
814
|
+
"""Async version of add_document for consistency.
|
|
815
|
+
|
|
816
|
+
Args:
|
|
817
|
+
content: Document content
|
|
818
|
+
metadata: Additional metadata
|
|
819
|
+
|
|
820
|
+
Returns:
|
|
821
|
+
Document ID
|
|
822
|
+
"""
|
|
823
|
+
file_path = metadata.get("path") if metadata else None
|
|
824
|
+
return self.add_document(content, metadata, file_path)
|
|
825
|
+
|
|
362
826
|
def close(self):
|
|
363
827
|
"""Close the database connection."""
|
|
364
828
|
if hasattr(self, 'infinity'):
|
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
"""Mock implementation of infinity_embedded for testing on unsupported platforms."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import hashlib
|
|
5
|
+
import random
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Dict, List, Any, Optional
|
|
8
|
+
from datetime import datetime
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class MockTable:
|
|
12
|
+
"""Mock implementation of an Infinity table."""
|
|
13
|
+
|
|
14
|
+
def __init__(self, name: str, schema: Dict[str, Any]):
|
|
15
|
+
self.name = name
|
|
16
|
+
self.schema = schema
|
|
17
|
+
self.data = []
|
|
18
|
+
self._id_counter = 0
|
|
19
|
+
|
|
20
|
+
def insert(self, records: List[Dict[str, Any]]):
|
|
21
|
+
"""Insert records into the table."""
|
|
22
|
+
for record in records:
|
|
23
|
+
# Add an internal ID if not present
|
|
24
|
+
if 'id' not in record:
|
|
25
|
+
record['_internal_id'] = self._id_counter
|
|
26
|
+
self._id_counter += 1
|
|
27
|
+
self.data.append(record)
|
|
28
|
+
|
|
29
|
+
def delete(self, condition: str):
|
|
30
|
+
"""Delete records matching condition."""
|
|
31
|
+
# Simple implementation - just clear for now
|
|
32
|
+
self.data = [r for r in self.data if not self._eval_condition(r, condition)]
|
|
33
|
+
|
|
34
|
+
def output(self, columns: List[str]):
|
|
35
|
+
"""Start a query chain."""
|
|
36
|
+
return MockQuery(self, columns)
|
|
37
|
+
|
|
38
|
+
def _eval_condition(self, record: Dict[str, Any], condition: str) -> bool:
|
|
39
|
+
"""Evaluate a simple condition."""
|
|
40
|
+
# Very basic implementation
|
|
41
|
+
if '=' in condition:
|
|
42
|
+
field, value = condition.split('=', 1)
|
|
43
|
+
field = field.strip()
|
|
44
|
+
value = value.strip().strip("'\"")
|
|
45
|
+
return str(record.get(field, '')) == value
|
|
46
|
+
return False
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class MockQuery:
|
|
50
|
+
"""Mock query builder."""
|
|
51
|
+
|
|
52
|
+
def __init__(self, table: MockTable, columns: List[str]):
|
|
53
|
+
self.table = table
|
|
54
|
+
self.columns = columns
|
|
55
|
+
self.filters = []
|
|
56
|
+
self.vector_search = None
|
|
57
|
+
self.limit_value = None
|
|
58
|
+
|
|
59
|
+
def filter(self, condition: str):
|
|
60
|
+
"""Add a filter condition."""
|
|
61
|
+
self.filters.append(condition)
|
|
62
|
+
return self
|
|
63
|
+
|
|
64
|
+
def match_dense(self, column: str, vector: List[float], dtype: str, metric: str, limit: int):
|
|
65
|
+
"""Add vector search."""
|
|
66
|
+
self.vector_search = {
|
|
67
|
+
'column': column,
|
|
68
|
+
'vector': vector,
|
|
69
|
+
'dtype': dtype,
|
|
70
|
+
'metric': metric,
|
|
71
|
+
'limit': limit
|
|
72
|
+
}
|
|
73
|
+
self.limit_value = limit
|
|
74
|
+
return self
|
|
75
|
+
|
|
76
|
+
def to_pl(self):
|
|
77
|
+
"""Execute query and return polars-like result."""
|
|
78
|
+
results = self.table.data.copy()
|
|
79
|
+
|
|
80
|
+
# Apply filters
|
|
81
|
+
for condition in self.filters:
|
|
82
|
+
results = [r for r in results if self.table._eval_condition(r, condition)]
|
|
83
|
+
|
|
84
|
+
# Apply vector search (mock similarity)
|
|
85
|
+
if self.vector_search:
|
|
86
|
+
# Add mock scores
|
|
87
|
+
for r in results:
|
|
88
|
+
r['score'] = random.uniform(0.5, 1.0)
|
|
89
|
+
# Sort by score
|
|
90
|
+
results.sort(key=lambda x: x.get('score', 0), reverse=True)
|
|
91
|
+
# Limit results
|
|
92
|
+
if self.limit_value:
|
|
93
|
+
results = results[:self.limit_value]
|
|
94
|
+
|
|
95
|
+
# Return mock polars DataFrame
|
|
96
|
+
return MockDataFrame(results)
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
class MockDataFrame:
|
|
100
|
+
"""Mock polars DataFrame."""
|
|
101
|
+
|
|
102
|
+
def __init__(self, data: List[Dict[str, Any]]):
|
|
103
|
+
self.data = data
|
|
104
|
+
|
|
105
|
+
def __len__(self):
|
|
106
|
+
return len(self.data)
|
|
107
|
+
|
|
108
|
+
def iter_rows(self, named: bool = False):
|
|
109
|
+
"""Iterate over rows."""
|
|
110
|
+
if named:
|
|
111
|
+
return iter(self.data)
|
|
112
|
+
else:
|
|
113
|
+
# Return tuples
|
|
114
|
+
if not self.data:
|
|
115
|
+
return iter([])
|
|
116
|
+
keys = list(self.data[0].keys())
|
|
117
|
+
return iter([tuple(row.get(k) for k in keys) for row in self.data])
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
class MockDatabase:
|
|
121
|
+
"""Mock implementation of an Infinity database."""
|
|
122
|
+
|
|
123
|
+
def __init__(self, name: str):
|
|
124
|
+
self.name = name
|
|
125
|
+
self.tables = {}
|
|
126
|
+
|
|
127
|
+
def create_table(self, name: str, schema: Dict[str, Any]) -> MockTable:
|
|
128
|
+
"""Create a new table."""
|
|
129
|
+
table = MockTable(name, schema)
|
|
130
|
+
self.tables[name] = table
|
|
131
|
+
return table
|
|
132
|
+
|
|
133
|
+
def get_table(self, name: str) -> MockTable:
|
|
134
|
+
"""Get an existing table."""
|
|
135
|
+
if name not in self.tables:
|
|
136
|
+
raise KeyError(f"Table {name} not found")
|
|
137
|
+
return self.tables[name]
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
class MockInfinity:
|
|
141
|
+
"""Mock implementation of Infinity connection."""
|
|
142
|
+
|
|
143
|
+
def __init__(self, path: str):
|
|
144
|
+
self.path = Path(path)
|
|
145
|
+
self.databases = {}
|
|
146
|
+
# Ensure directory exists
|
|
147
|
+
self.path.mkdir(parents=True, exist_ok=True)
|
|
148
|
+
|
|
149
|
+
def get_database(self, name: str) -> MockDatabase:
|
|
150
|
+
"""Get or create a database."""
|
|
151
|
+
if name not in self.databases:
|
|
152
|
+
self.databases[name] = MockDatabase(name)
|
|
153
|
+
return self.databases[name]
|
|
154
|
+
|
|
155
|
+
def disconnect(self):
|
|
156
|
+
"""Disconnect from Infinity."""
|
|
157
|
+
pass
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def connect(path: str) -> MockInfinity:
|
|
161
|
+
"""Connect to Infinity (mock implementation)."""
|
|
162
|
+
return MockInfinity(path)
|
|
@@ -8,7 +8,7 @@ from pydantic import Field
|
|
|
8
8
|
|
|
9
9
|
from hanzo_mcp.tools.common.base import BaseTool
|
|
10
10
|
from hanzo_mcp.tools.common.permissions import PermissionManager
|
|
11
|
-
from hanzo_mcp.tools.common.validation import
|
|
11
|
+
from hanzo_mcp.tools.common.validation import validate_path_parameter
|
|
12
12
|
|
|
13
13
|
from .infinity_store import InfinityVectorStore
|
|
14
14
|
from .project_manager import ProjectVectorManager
|
|
@@ -81,11 +81,12 @@ directories alongside them. Use this to build searchable knowledge bases per pro
|
|
|
81
81
|
try:
|
|
82
82
|
if file_path:
|
|
83
83
|
# Validate file access
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
84
|
+
# Use permission manager's existing validation
|
|
85
|
+
if not self.permission_manager.is_path_allowed(file_path):
|
|
86
|
+
return f"Error: Access denied to path {file_path}"
|
|
87
|
+
|
|
88
|
+
if not Path(file_path).exists():
|
|
89
|
+
return f"Error: File does not exist: {file_path}"
|
|
89
90
|
|
|
90
91
|
# Index file using project-aware manager
|
|
91
92
|
doc_ids, project_info = self.project_manager.add_file_to_appropriate_store(
|