hanzo-mcp 0.5.0__py3-none-any.whl → 0.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hanzo-mcp might be problematic. Click here for more details.

Files changed (60) hide show
  1. hanzo_mcp/__init__.py +1 -1
  2. hanzo_mcp/config/settings.py +61 -0
  3. hanzo_mcp/tools/__init__.py +158 -12
  4. hanzo_mcp/tools/common/base.py +7 -2
  5. hanzo_mcp/tools/common/config_tool.py +396 -0
  6. hanzo_mcp/tools/common/stats.py +261 -0
  7. hanzo_mcp/tools/common/tool_disable.py +144 -0
  8. hanzo_mcp/tools/common/tool_enable.py +182 -0
  9. hanzo_mcp/tools/common/tool_list.py +263 -0
  10. hanzo_mcp/tools/database/__init__.py +71 -0
  11. hanzo_mcp/tools/database/database_manager.py +246 -0
  12. hanzo_mcp/tools/database/graph_add.py +257 -0
  13. hanzo_mcp/tools/database/graph_query.py +536 -0
  14. hanzo_mcp/tools/database/graph_remove.py +267 -0
  15. hanzo_mcp/tools/database/graph_search.py +348 -0
  16. hanzo_mcp/tools/database/graph_stats.py +345 -0
  17. hanzo_mcp/tools/database/sql_query.py +229 -0
  18. hanzo_mcp/tools/database/sql_search.py +296 -0
  19. hanzo_mcp/tools/database/sql_stats.py +254 -0
  20. hanzo_mcp/tools/editor/__init__.py +11 -0
  21. hanzo_mcp/tools/editor/neovim_command.py +272 -0
  22. hanzo_mcp/tools/editor/neovim_edit.py +290 -0
  23. hanzo_mcp/tools/editor/neovim_session.py +356 -0
  24. hanzo_mcp/tools/filesystem/__init__.py +20 -1
  25. hanzo_mcp/tools/filesystem/batch_search.py +812 -0
  26. hanzo_mcp/tools/filesystem/find_files.py +348 -0
  27. hanzo_mcp/tools/filesystem/git_search.py +505 -0
  28. hanzo_mcp/tools/llm/__init__.py +27 -0
  29. hanzo_mcp/tools/llm/consensus_tool.py +351 -0
  30. hanzo_mcp/tools/llm/llm_manage.py +413 -0
  31. hanzo_mcp/tools/llm/llm_tool.py +346 -0
  32. hanzo_mcp/tools/llm/provider_tools.py +412 -0
  33. hanzo_mcp/tools/mcp/__init__.py +11 -0
  34. hanzo_mcp/tools/mcp/mcp_add.py +263 -0
  35. hanzo_mcp/tools/mcp/mcp_remove.py +127 -0
  36. hanzo_mcp/tools/mcp/mcp_stats.py +165 -0
  37. hanzo_mcp/tools/shell/__init__.py +27 -7
  38. hanzo_mcp/tools/shell/logs.py +265 -0
  39. hanzo_mcp/tools/shell/npx.py +194 -0
  40. hanzo_mcp/tools/shell/npx_background.py +254 -0
  41. hanzo_mcp/tools/shell/pkill.py +262 -0
  42. hanzo_mcp/tools/shell/processes.py +279 -0
  43. hanzo_mcp/tools/shell/run_background.py +326 -0
  44. hanzo_mcp/tools/shell/uvx.py +187 -0
  45. hanzo_mcp/tools/shell/uvx_background.py +249 -0
  46. hanzo_mcp/tools/vector/__init__.py +21 -12
  47. hanzo_mcp/tools/vector/ast_analyzer.py +459 -0
  48. hanzo_mcp/tools/vector/git_ingester.py +485 -0
  49. hanzo_mcp/tools/vector/index_tool.py +358 -0
  50. hanzo_mcp/tools/vector/infinity_store.py +465 -1
  51. hanzo_mcp/tools/vector/mock_infinity.py +162 -0
  52. hanzo_mcp/tools/vector/vector_index.py +7 -6
  53. hanzo_mcp/tools/vector/vector_search.py +22 -7
  54. {hanzo_mcp-0.5.0.dist-info → hanzo_mcp-0.5.2.dist-info}/METADATA +68 -20
  55. hanzo_mcp-0.5.2.dist-info/RECORD +106 -0
  56. hanzo_mcp-0.5.0.dist-info/RECORD +0 -63
  57. {hanzo_mcp-0.5.0.dist-info → hanzo_mcp-0.5.2.dist-info}/WHEEL +0 -0
  58. {hanzo_mcp-0.5.0.dist-info → hanzo_mcp-0.5.2.dist-info}/entry_points.txt +0 -0
  59. {hanzo_mcp-0.5.0.dist-info → hanzo_mcp-0.5.2.dist-info}/licenses/LICENSE +0 -0
  60. {hanzo_mcp-0.5.0.dist-info → hanzo_mcp-0.5.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,485 @@
1
+ """Git repository ingester for comprehensive code indexing.
2
+
3
+ This module provides functionality to ingest entire git repositories including:
4
+ - Full git history and commit metadata
5
+ - File contents at different points in time
6
+ - AST analysis via tree-sitter
7
+ - Symbol extraction and cross-references
8
+ - Blame information for line-level attribution
9
+ """
10
+
11
+ import subprocess
12
+ import json
13
+ import hashlib
14
+ from pathlib import Path
15
+ from typing import Dict, List, Optional, Tuple, Any
16
+ from datetime import datetime
17
+ from dataclasses import dataclass
18
+ import logging
19
+
20
+ from .infinity_store import InfinityVectorStore
21
+ from .ast_analyzer import ASTAnalyzer
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+
26
+ @dataclass
27
+ class GitCommit:
28
+ """Represents a git commit."""
29
+ hash: str
30
+ author: str
31
+ author_email: str
32
+ timestamp: int
33
+ message: str
34
+ files: List[Dict[str, str]] # [{'status': 'M', 'filename': 'main.py'}]
35
+ parent_hashes: List[str]
36
+
37
+
38
+ @dataclass
39
+ class GitFileHistory:
40
+ """History of a single file."""
41
+ file_path: str
42
+ commits: List[GitCommit]
43
+ current_content: Optional[str]
44
+ line_blame: Dict[int, Dict[str, Any]] # line_number -> blame info
45
+
46
+
47
+ class GitIngester:
48
+ """Ingests git repositories into vector store."""
49
+
50
+ def __init__(self, vector_store: InfinityVectorStore):
51
+ """Initialize the git ingester.
52
+
53
+ Args:
54
+ vector_store: The vector store to ingest into
55
+ """
56
+ self.vector_store = vector_store
57
+ self.ast_analyzer = ASTAnalyzer()
58
+ self._commit_cache: Dict[str, GitCommit] = {}
59
+
60
+ def ingest_repository(
61
+ self,
62
+ repo_path: str,
63
+ branch: str = "HEAD",
64
+ include_history: bool = True,
65
+ include_diffs: bool = True,
66
+ include_blame: bool = True,
67
+ file_patterns: Optional[List[str]] = None
68
+ ) -> Dict[str, Any]:
69
+ """Ingest an entire git repository.
70
+
71
+ Args:
72
+ repo_path: Path to the git repository
73
+ branch: Branch to ingest (default: HEAD)
74
+ include_history: Whether to include commit history
75
+ include_diffs: Whether to include diff information
76
+ include_blame: Whether to include blame information
77
+ file_patterns: List of file patterns to include (e.g., ["*.py", "*.js"])
78
+
79
+ Returns:
80
+ Summary of ingestion results
81
+ """
82
+ repo_path = Path(repo_path)
83
+ if not (repo_path / ".git").exists():
84
+ raise ValueError(f"Not a git repository: {repo_path}")
85
+
86
+ logger.info(f"Starting ingestion of repository: {repo_path}")
87
+
88
+ results = {
89
+ "repository": str(repo_path),
90
+ "branch": branch,
91
+ "commits_processed": 0,
92
+ "commits_indexed": 0,
93
+ "files_indexed": 0,
94
+ "symbols_extracted": 0,
95
+ "diffs_indexed": 0,
96
+ "blame_entries": 0,
97
+ "errors": []
98
+ }
99
+
100
+ try:
101
+ # Get current branch/commit
102
+ current_commit = self._get_current_commit(repo_path)
103
+ results["current_commit"] = current_commit
104
+
105
+ # Get list of files to process
106
+ files = self._get_repository_files(repo_path, file_patterns)
107
+ logger.info(f"Found {len(files)} files to process")
108
+
109
+ # Process each file
110
+ for file_path in files:
111
+ try:
112
+ self._process_file(
113
+ repo_path,
114
+ file_path,
115
+ include_history=include_history,
116
+ include_blame=include_blame,
117
+ results=results
118
+ )
119
+ except Exception as e:
120
+ logger.error(f"Error processing {file_path}: {e}")
121
+ results["errors"].append(f"{file_path}: {str(e)}")
122
+
123
+ # Process commit history if requested
124
+ if include_history:
125
+ commits = self._get_commit_history(repo_path, branch)
126
+ results["commits_processed"] = len(commits)
127
+
128
+ for commit in commits:
129
+ self._index_commit(commit, include_diffs=include_diffs)
130
+ results["commits_indexed"] = results.get("commits_indexed", 0) + 1
131
+
132
+ if include_diffs:
133
+ results["diffs_indexed"] += len(commit.files)
134
+
135
+ # Create repository metadata document
136
+ self._index_repository_metadata(repo_path, results)
137
+
138
+ except Exception as e:
139
+ logger.error(f"Repository ingestion failed: {e}")
140
+ results["errors"].append(f"Fatal error: {str(e)}")
141
+
142
+ logger.info(f"Ingestion complete: {results}")
143
+ return results
144
+
145
+ def _get_current_commit(self, repo_path: Path) -> str:
146
+ """Get the current commit hash."""
147
+ result = subprocess.run(
148
+ ["git", "rev-parse", "HEAD"],
149
+ cwd=repo_path,
150
+ capture_output=True,
151
+ text=True,
152
+ check=True
153
+ )
154
+ return result.stdout.strip()
155
+
156
+ def _get_repository_files(
157
+ self,
158
+ repo_path: Path,
159
+ patterns: Optional[List[str]] = None
160
+ ) -> List[Path]:
161
+ """Get list of files in repository matching patterns."""
162
+ # Use git ls-files to respect .gitignore
163
+ cmd = ["git", "ls-files"]
164
+
165
+ result = subprocess.run(
166
+ cmd,
167
+ cwd=repo_path,
168
+ capture_output=True,
169
+ text=True,
170
+ check=True
171
+ )
172
+
173
+ files = []
174
+ for line in result.stdout.strip().split('\n'):
175
+ if line:
176
+ file_path = repo_path / line
177
+ if file_path.exists():
178
+ # Apply pattern filtering if specified
179
+ if patterns:
180
+ if any(file_path.match(pattern) for pattern in patterns):
181
+ files.append(file_path)
182
+ else:
183
+ files.append(file_path)
184
+
185
+ return files
186
+
187
+ def _get_commit_history(
188
+ self,
189
+ repo_path: Path,
190
+ branch: str = "HEAD",
191
+ max_commits: int = 1000
192
+ ) -> List[GitCommit]:
193
+ """Get commit history for the repository."""
194
+ # Get commit list with basic info
195
+ result = subprocess.run(
196
+ ["git", "log", branch, f"--max-count={max_commits}",
197
+ "--pretty=format:%H|%P|%an|%ae|%at|%s"],
198
+ cwd=repo_path,
199
+ capture_output=True,
200
+ text=True,
201
+ check=True
202
+ )
203
+
204
+ commits = []
205
+ for line in result.stdout.strip().split('\n'):
206
+ if line:
207
+ parts = line.split('|', 5)
208
+ if len(parts) >= 6:
209
+ commit_hash = parts[0]
210
+ parent_hashes = parts[1].split() if parts[1] else []
211
+
212
+ # Get file changes for this commit
213
+ files = self._get_commit_files(repo_path, commit_hash)
214
+
215
+ commit = GitCommit(
216
+ hash=commit_hash,
217
+ parent_hashes=parent_hashes,
218
+ author=parts[2],
219
+ author_email=parts[3],
220
+ timestamp=int(parts[4]),
221
+ message=parts[5],
222
+ files=files
223
+ )
224
+ commits.append(commit)
225
+ self._commit_cache[commit_hash] = commit
226
+
227
+ return commits
228
+
229
+ def _get_commit_files(self, repo_path: Path, commit_hash: str) -> List[Dict[str, str]]:
230
+ """Get list of files changed in a commit."""
231
+ result = subprocess.run(
232
+ ["git", "show", "--name-status", "--format=", commit_hash],
233
+ cwd=repo_path,
234
+ capture_output=True,
235
+ text=True,
236
+ check=True
237
+ )
238
+
239
+ files = []
240
+ for line in result.stdout.strip().split('\n'):
241
+ if line and '\t' in line:
242
+ parts = line.split('\t', 1)
243
+ if len(parts) == 2:
244
+ files.append({
245
+ 'status': parts[0],
246
+ 'filename': parts[1]
247
+ })
248
+
249
+ return files
250
+
251
+ def _process_file(
252
+ self,
253
+ repo_path: Path,
254
+ file_path: Path,
255
+ include_history: bool,
256
+ include_blame: bool,
257
+ results: Dict[str, Any]
258
+ ):
259
+ """Process a single file."""
260
+ relative_path = file_path.relative_to(repo_path)
261
+
262
+ # Read current content
263
+ try:
264
+ content = file_path.read_text(encoding='utf-8')
265
+ except UnicodeDecodeError:
266
+ content = file_path.read_text(encoding='latin-1')
267
+
268
+ # Get file metadata
269
+ metadata = {
270
+ 'repository': str(repo_path),
271
+ 'relative_path': str(relative_path),
272
+ 'file_type': file_path.suffix,
273
+ 'size': file_path.stat().st_size,
274
+ }
275
+
276
+ # Add git history metadata if requested
277
+ if include_history:
278
+ history = self._get_file_history(repo_path, relative_path)
279
+ metadata['commit_count'] = len(history)
280
+ if history:
281
+ metadata['first_commit'] = history[-1]['hash']
282
+ metadata['last_commit'] = history[0]['hash']
283
+ metadata['last_modified'] = datetime.fromtimestamp(
284
+ history[0]['timestamp']
285
+ ).isoformat()
286
+
287
+ # Add blame information if requested
288
+ if include_blame:
289
+ blame_data = self._get_file_blame(repo_path, relative_path)
290
+ metadata['unique_authors'] = len(set(
291
+ b['author'] for b in blame_data.values()
292
+ ))
293
+
294
+ # Index the file content
295
+ doc_ids = self.vector_store.add_file(
296
+ str(file_path),
297
+ chunk_size=1000,
298
+ chunk_overlap=200,
299
+ metadata=metadata
300
+ )
301
+ results["files_indexed"] += 1
302
+
303
+ # Perform AST analysis for supported languages
304
+ if file_path.suffix in ['.py', '.js', '.ts', '.java', '.cpp', '.c']:
305
+ try:
306
+ file_ast = self.ast_analyzer.analyze_file(str(file_path))
307
+ if file_ast:
308
+ # Store complete AST
309
+ self.vector_store._store_file_ast(file_ast)
310
+
311
+ # Store individual symbols
312
+ self.vector_store._store_symbols(file_ast.symbols)
313
+ results["symbols_extracted"] += len(file_ast.symbols)
314
+
315
+ # Store cross-references
316
+ self.vector_store._store_references(file_ast)
317
+ except Exception as e:
318
+ logger.warning(f"AST analysis failed for {file_path}: {e}")
319
+
320
+ def _get_file_history(
321
+ self,
322
+ repo_path: Path,
323
+ file_path: Path
324
+ ) -> List[Dict[str, Any]]:
325
+ """Get commit history for a specific file."""
326
+ result = subprocess.run(
327
+ ["git", "log", "--follow", "--pretty=format:%H|%at|%an|%s", "--", str(file_path)],
328
+ cwd=repo_path,
329
+ capture_output=True,
330
+ text=True
331
+ )
332
+
333
+ if result.returncode != 0:
334
+ return []
335
+
336
+ history = []
337
+ for line in result.stdout.strip().split('\n'):
338
+ if line:
339
+ parts = line.split('|', 3)
340
+ if len(parts) >= 4:
341
+ history.append({
342
+ 'hash': parts[0],
343
+ 'timestamp': int(parts[1]),
344
+ 'author': parts[2],
345
+ 'message': parts[3]
346
+ })
347
+
348
+ return history
349
+
350
+ def _get_file_blame(
351
+ self,
352
+ repo_path: Path,
353
+ file_path: Path
354
+ ) -> Dict[int, Dict[str, Any]]:
355
+ """Get blame information for a file."""
356
+ result = subprocess.run(
357
+ ["git", "blame", "--line-porcelain", "--", str(file_path)],
358
+ cwd=repo_path,
359
+ capture_output=True,
360
+ text=True
361
+ )
362
+
363
+ if result.returncode != 0:
364
+ return {}
365
+
366
+ blame_data = {}
367
+ current_commit = None
368
+ current_line = None
369
+ author = None
370
+ timestamp = None
371
+
372
+ for line in result.stdout.strip().split('\n'):
373
+ if line and not line.startswith('\t'):
374
+ parts = line.split(' ')
375
+ if len(parts) >= 3 and len(parts[0]) == 40: # SHA-1 hash
376
+ current_commit = parts[0]
377
+ current_line = int(parts[2])
378
+ elif line.startswith('author '):
379
+ author = line[7:]
380
+ elif line.startswith('author-time '):
381
+ timestamp = int(line[12:])
382
+
383
+ # We have all the data for this line
384
+ if current_line and author:
385
+ blame_data[current_line] = {
386
+ 'commit': current_commit,
387
+ 'author': author,
388
+ 'timestamp': timestamp
389
+ }
390
+
391
+ return blame_data
392
+
393
+ def _index_commit(
394
+ self,
395
+ commit: GitCommit,
396
+ include_diffs: bool = True
397
+ ):
398
+ """Index a single commit."""
399
+ # Create commit document
400
+ commit_doc = f"""Git Commit: {commit.hash}
401
+ Author: {commit.author} <{commit.author_email}>
402
+ Date: {datetime.fromtimestamp(commit.timestamp).isoformat()}
403
+ Message: {commit.message}
404
+
405
+ Files changed: {len(commit.files)}
406
+ """
407
+
408
+ for file_info in commit.files:
409
+ commit_doc += f"\n{file_info['status']}\t{file_info['filename']}"
410
+
411
+ # Index commit
412
+ metadata = {
413
+ 'type': 'git_commit',
414
+ 'commit_hash': commit.hash,
415
+ 'author': commit.author,
416
+ 'timestamp': commit.timestamp,
417
+ 'file_count': len(commit.files)
418
+ }
419
+
420
+ self.vector_store.add_document(commit_doc, metadata)
421
+
422
+ # Index diffs if requested
423
+ if include_diffs:
424
+ for file_info in commit.files:
425
+ self._index_commit_diff(commit, file_info['filename'])
426
+
427
+ def _index_commit_diff(self, commit: GitCommit, filename: str):
428
+ """Index the diff for a specific file in a commit."""
429
+ # This is a simplified version - in practice you'd want to
430
+ # parse the actual diff and store meaningful chunks
431
+ metadata = {
432
+ 'type': 'git_diff',
433
+ 'commit_hash': commit.hash,
434
+ 'filename': filename,
435
+ 'author': commit.author,
436
+ 'timestamp': commit.timestamp
437
+ }
438
+
439
+ # Create a document representing this change
440
+ diff_doc = f"""File: {filename}
441
+ Commit: {commit.hash}
442
+ Author: {commit.author}
443
+ Message: {commit.message}
444
+ """
445
+
446
+ self.vector_store.add_document(diff_doc, metadata)
447
+
448
+ def _index_repository_metadata(
449
+ self,
450
+ repo_path: Path,
451
+ results: Dict[str, Any]
452
+ ):
453
+ """Index overall repository metadata."""
454
+ # Get repository info
455
+ remote_result = subprocess.run(
456
+ ["git", "remote", "get-url", "origin"],
457
+ cwd=repo_path,
458
+ capture_output=True,
459
+ text=True
460
+ )
461
+
462
+ remote_url = remote_result.stdout.strip() if remote_result.returncode == 0 else None
463
+
464
+ # Create repository summary document
465
+ repo_doc = f"""Repository: {repo_path.name}
466
+ Path: {repo_path}
467
+ Remote: {remote_url or 'No remote'}
468
+ Current Commit: {results.get('current_commit', 'Unknown')}
469
+
470
+ Statistics:
471
+ - Files indexed: {results['files_indexed']}
472
+ - Commits processed: {results['commits_processed']}
473
+ - Symbols extracted: {results['symbols_extracted']}
474
+ - Diffs indexed: {results['diffs_indexed']}
475
+ """
476
+
477
+ metadata = {
478
+ 'type': 'repository',
479
+ 'name': repo_path.name,
480
+ 'path': str(repo_path),
481
+ 'remote_url': remote_url,
482
+ **results
483
+ }
484
+
485
+ self.vector_store.add_document(repo_doc, metadata)