hanzo-mcp 0.3.8__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hanzo-mcp might be problematic. Click here for more details.

Files changed (93) hide show
  1. hanzo_mcp/__init__.py +1 -1
  2. hanzo_mcp/cli.py +118 -170
  3. hanzo_mcp/cli_enhanced.py +438 -0
  4. hanzo_mcp/config/__init__.py +19 -0
  5. hanzo_mcp/config/settings.py +449 -0
  6. hanzo_mcp/config/tool_config.py +197 -0
  7. hanzo_mcp/prompts/__init__.py +117 -0
  8. hanzo_mcp/prompts/compact_conversation.py +77 -0
  9. hanzo_mcp/prompts/create_release.py +38 -0
  10. hanzo_mcp/prompts/project_system.py +120 -0
  11. hanzo_mcp/prompts/project_todo_reminder.py +111 -0
  12. hanzo_mcp/prompts/utils.py +286 -0
  13. hanzo_mcp/server.py +117 -99
  14. hanzo_mcp/tools/__init__.py +121 -33
  15. hanzo_mcp/tools/agent/__init__.py +8 -11
  16. hanzo_mcp/tools/agent/agent_tool.py +290 -224
  17. hanzo_mcp/tools/agent/prompt.py +16 -13
  18. hanzo_mcp/tools/agent/tool_adapter.py +9 -9
  19. hanzo_mcp/tools/common/__init__.py +17 -16
  20. hanzo_mcp/tools/common/base.py +79 -110
  21. hanzo_mcp/tools/common/batch_tool.py +330 -0
  22. hanzo_mcp/tools/common/config_tool.py +396 -0
  23. hanzo_mcp/tools/common/context.py +26 -292
  24. hanzo_mcp/tools/common/permissions.py +12 -12
  25. hanzo_mcp/tools/common/thinking_tool.py +153 -0
  26. hanzo_mcp/tools/common/validation.py +1 -63
  27. hanzo_mcp/tools/filesystem/__init__.py +97 -57
  28. hanzo_mcp/tools/filesystem/base.py +32 -24
  29. hanzo_mcp/tools/filesystem/content_replace.py +114 -107
  30. hanzo_mcp/tools/filesystem/directory_tree.py +129 -105
  31. hanzo_mcp/tools/filesystem/edit.py +279 -0
  32. hanzo_mcp/tools/filesystem/grep.py +458 -0
  33. hanzo_mcp/tools/filesystem/grep_ast_tool.py +250 -0
  34. hanzo_mcp/tools/filesystem/multi_edit.py +362 -0
  35. hanzo_mcp/tools/filesystem/read.py +255 -0
  36. hanzo_mcp/tools/filesystem/unified_search.py +689 -0
  37. hanzo_mcp/tools/filesystem/write.py +156 -0
  38. hanzo_mcp/tools/jupyter/__init__.py +41 -29
  39. hanzo_mcp/tools/jupyter/base.py +66 -57
  40. hanzo_mcp/tools/jupyter/{edit_notebook.py → notebook_edit.py} +162 -139
  41. hanzo_mcp/tools/jupyter/notebook_read.py +152 -0
  42. hanzo_mcp/tools/shell/__init__.py +29 -20
  43. hanzo_mcp/tools/shell/base.py +87 -45
  44. hanzo_mcp/tools/shell/bash_session.py +731 -0
  45. hanzo_mcp/tools/shell/bash_session_executor.py +295 -0
  46. hanzo_mcp/tools/shell/command_executor.py +435 -384
  47. hanzo_mcp/tools/shell/run_command.py +284 -131
  48. hanzo_mcp/tools/shell/run_command_windows.py +328 -0
  49. hanzo_mcp/tools/shell/session_manager.py +196 -0
  50. hanzo_mcp/tools/shell/session_storage.py +325 -0
  51. hanzo_mcp/tools/todo/__init__.py +66 -0
  52. hanzo_mcp/tools/todo/base.py +319 -0
  53. hanzo_mcp/tools/todo/todo_read.py +148 -0
  54. hanzo_mcp/tools/todo/todo_write.py +378 -0
  55. hanzo_mcp/tools/vector/__init__.py +99 -0
  56. hanzo_mcp/tools/vector/ast_analyzer.py +459 -0
  57. hanzo_mcp/tools/vector/git_ingester.py +482 -0
  58. hanzo_mcp/tools/vector/infinity_store.py +731 -0
  59. hanzo_mcp/tools/vector/mock_infinity.py +162 -0
  60. hanzo_mcp/tools/vector/project_manager.py +361 -0
  61. hanzo_mcp/tools/vector/vector_index.py +116 -0
  62. hanzo_mcp/tools/vector/vector_search.py +225 -0
  63. hanzo_mcp-0.5.1.dist-info/METADATA +276 -0
  64. hanzo_mcp-0.5.1.dist-info/RECORD +68 -0
  65. {hanzo_mcp-0.3.8.dist-info → hanzo_mcp-0.5.1.dist-info}/WHEEL +1 -1
  66. hanzo_mcp/tools/agent/base_provider.py +0 -73
  67. hanzo_mcp/tools/agent/litellm_provider.py +0 -45
  68. hanzo_mcp/tools/agent/lmstudio_agent.py +0 -385
  69. hanzo_mcp/tools/agent/lmstudio_provider.py +0 -219
  70. hanzo_mcp/tools/agent/provider_registry.py +0 -120
  71. hanzo_mcp/tools/common/error_handling.py +0 -86
  72. hanzo_mcp/tools/common/logging_config.py +0 -115
  73. hanzo_mcp/tools/common/session.py +0 -91
  74. hanzo_mcp/tools/common/think_tool.py +0 -123
  75. hanzo_mcp/tools/common/version_tool.py +0 -120
  76. hanzo_mcp/tools/filesystem/edit_file.py +0 -287
  77. hanzo_mcp/tools/filesystem/get_file_info.py +0 -170
  78. hanzo_mcp/tools/filesystem/read_files.py +0 -199
  79. hanzo_mcp/tools/filesystem/search_content.py +0 -275
  80. hanzo_mcp/tools/filesystem/write_file.py +0 -162
  81. hanzo_mcp/tools/jupyter/notebook_operations.py +0 -514
  82. hanzo_mcp/tools/jupyter/read_notebook.py +0 -165
  83. hanzo_mcp/tools/project/__init__.py +0 -64
  84. hanzo_mcp/tools/project/analysis.py +0 -886
  85. hanzo_mcp/tools/project/base.py +0 -66
  86. hanzo_mcp/tools/project/project_analyze.py +0 -173
  87. hanzo_mcp/tools/shell/run_script.py +0 -215
  88. hanzo_mcp/tools/shell/script_tool.py +0 -244
  89. hanzo_mcp-0.3.8.dist-info/METADATA +0 -196
  90. hanzo_mcp-0.3.8.dist-info/RECORD +0 -53
  91. {hanzo_mcp-0.3.8.dist-info → hanzo_mcp-0.5.1.dist-info}/entry_points.txt +0 -0
  92. {hanzo_mcp-0.3.8.dist-info → hanzo_mcp-0.5.1.dist-info}/licenses/LICENSE +0 -0
  93. {hanzo_mcp-0.3.8.dist-info → hanzo_mcp-0.5.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,482 @@
1
+ """Git repository ingester for comprehensive code indexing.
2
+
3
+ This module provides functionality to ingest entire git repositories including:
4
+ - Full git history and commit metadata
5
+ - File contents at different points in time
6
+ - AST analysis via tree-sitter
7
+ - Symbol extraction and cross-references
8
+ - Blame information for line-level attribution
9
+ """
10
+
11
+ import subprocess
12
+ import json
13
+ import hashlib
14
+ from pathlib import Path
15
+ from typing import Dict, List, Optional, Tuple, Any
16
+ from datetime import datetime
17
+ from dataclasses import dataclass
18
+ import logging
19
+
20
+ from .infinity_store import InfinityVectorStore
21
+ from .ast_analyzer import ASTAnalyzer
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+
26
+ @dataclass
27
+ class GitCommit:
28
+ """Represents a git commit."""
29
+ hash: str
30
+ author: str
31
+ author_email: str
32
+ timestamp: int
33
+ message: str
34
+ files: List[Dict[str, str]] # [{'status': 'M', 'filename': 'main.py'}]
35
+ parent_hashes: List[str]
36
+
37
+
38
+ @dataclass
39
+ class GitFileHistory:
40
+ """History of a single file."""
41
+ file_path: str
42
+ commits: List[GitCommit]
43
+ current_content: Optional[str]
44
+ line_blame: Dict[int, Dict[str, Any]] # line_number -> blame info
45
+
46
+
47
+ class GitIngester:
48
+ """Ingests git repositories into vector store."""
49
+
50
+ def __init__(self, vector_store: InfinityVectorStore):
51
+ """Initialize the git ingester.
52
+
53
+ Args:
54
+ vector_store: The vector store to ingest into
55
+ """
56
+ self.vector_store = vector_store
57
+ self.ast_analyzer = ASTAnalyzer()
58
+ self._commit_cache: Dict[str, GitCommit] = {}
59
+
60
+ def ingest_repository(
61
+ self,
62
+ repo_path: str,
63
+ branch: str = "HEAD",
64
+ include_history: bool = True,
65
+ include_diffs: bool = True,
66
+ include_blame: bool = True,
67
+ file_patterns: Optional[List[str]] = None
68
+ ) -> Dict[str, Any]:
69
+ """Ingest an entire git repository.
70
+
71
+ Args:
72
+ repo_path: Path to the git repository
73
+ branch: Branch to ingest (default: HEAD)
74
+ include_history: Whether to include commit history
75
+ include_diffs: Whether to include diff information
76
+ include_blame: Whether to include blame information
77
+ file_patterns: List of file patterns to include (e.g., ["*.py", "*.js"])
78
+
79
+ Returns:
80
+ Summary of ingestion results
81
+ """
82
+ repo_path = Path(repo_path)
83
+ if not (repo_path / ".git").exists():
84
+ raise ValueError(f"Not a git repository: {repo_path}")
85
+
86
+ logger.info(f"Starting ingestion of repository: {repo_path}")
87
+
88
+ results = {
89
+ "repository": str(repo_path),
90
+ "branch": branch,
91
+ "commits_processed": 0,
92
+ "files_indexed": 0,
93
+ "symbols_extracted": 0,
94
+ "diffs_indexed": 0,
95
+ "errors": []
96
+ }
97
+
98
+ try:
99
+ # Get current branch/commit
100
+ current_commit = self._get_current_commit(repo_path)
101
+ results["current_commit"] = current_commit
102
+
103
+ # Get list of files to process
104
+ files = self._get_repository_files(repo_path, file_patterns)
105
+ logger.info(f"Found {len(files)} files to process")
106
+
107
+ # Process each file
108
+ for file_path in files:
109
+ try:
110
+ self._process_file(
111
+ repo_path,
112
+ file_path,
113
+ include_history=include_history,
114
+ include_blame=include_blame,
115
+ results=results
116
+ )
117
+ except Exception as e:
118
+ logger.error(f"Error processing {file_path}: {e}")
119
+ results["errors"].append(f"{file_path}: {str(e)}")
120
+
121
+ # Process commit history if requested
122
+ if include_history:
123
+ commits = self._get_commit_history(repo_path, branch)
124
+ results["commits_processed"] = len(commits)
125
+
126
+ for commit in commits:
127
+ self._index_commit(commit, include_diffs=include_diffs)
128
+
129
+ if include_diffs:
130
+ results["diffs_indexed"] += len(commit.files)
131
+
132
+ # Create repository metadata document
133
+ self._index_repository_metadata(repo_path, results)
134
+
135
+ except Exception as e:
136
+ logger.error(f"Repository ingestion failed: {e}")
137
+ results["errors"].append(f"Fatal error: {str(e)}")
138
+
139
+ logger.info(f"Ingestion complete: {results}")
140
+ return results
141
+
142
+ def _get_current_commit(self, repo_path: Path) -> str:
143
+ """Get the current commit hash."""
144
+ result = subprocess.run(
145
+ ["git", "rev-parse", "HEAD"],
146
+ cwd=repo_path,
147
+ capture_output=True,
148
+ text=True,
149
+ check=True
150
+ )
151
+ return result.stdout.strip()
152
+
153
+ def _get_repository_files(
154
+ self,
155
+ repo_path: Path,
156
+ patterns: Optional[List[str]] = None
157
+ ) -> List[Path]:
158
+ """Get list of files in repository matching patterns."""
159
+ # Use git ls-files to respect .gitignore
160
+ cmd = ["git", "ls-files"]
161
+
162
+ result = subprocess.run(
163
+ cmd,
164
+ cwd=repo_path,
165
+ capture_output=True,
166
+ text=True,
167
+ check=True
168
+ )
169
+
170
+ files = []
171
+ for line in result.stdout.strip().split('\n'):
172
+ if line:
173
+ file_path = repo_path / line
174
+ if file_path.exists():
175
+ # Apply pattern filtering if specified
176
+ if patterns:
177
+ if any(file_path.match(pattern) for pattern in patterns):
178
+ files.append(file_path)
179
+ else:
180
+ files.append(file_path)
181
+
182
+ return files
183
+
184
+ def _get_commit_history(
185
+ self,
186
+ repo_path: Path,
187
+ branch: str = "HEAD",
188
+ max_commits: int = 1000
189
+ ) -> List[GitCommit]:
190
+ """Get commit history for the repository."""
191
+ # Get commit list with basic info
192
+ result = subprocess.run(
193
+ ["git", "log", branch, f"--max-count={max_commits}",
194
+ "--pretty=format:%H|%P|%an|%ae|%at|%s"],
195
+ cwd=repo_path,
196
+ capture_output=True,
197
+ text=True,
198
+ check=True
199
+ )
200
+
201
+ commits = []
202
+ for line in result.stdout.strip().split('\n'):
203
+ if line:
204
+ parts = line.split('|', 5)
205
+ if len(parts) >= 6:
206
+ commit_hash = parts[0]
207
+ parent_hashes = parts[1].split() if parts[1] else []
208
+
209
+ # Get file changes for this commit
210
+ files = self._get_commit_files(repo_path, commit_hash)
211
+
212
+ commit = GitCommit(
213
+ hash=commit_hash,
214
+ parent_hashes=parent_hashes,
215
+ author=parts[2],
216
+ author_email=parts[3],
217
+ timestamp=int(parts[4]),
218
+ message=parts[5],
219
+ files=files
220
+ )
221
+ commits.append(commit)
222
+ self._commit_cache[commit_hash] = commit
223
+
224
+ return commits
225
+
226
+ def _get_commit_files(self, repo_path: Path, commit_hash: str) -> List[Dict[str, str]]:
227
+ """Get list of files changed in a commit."""
228
+ result = subprocess.run(
229
+ ["git", "show", "--name-status", "--format=", commit_hash],
230
+ cwd=repo_path,
231
+ capture_output=True,
232
+ text=True,
233
+ check=True
234
+ )
235
+
236
+ files = []
237
+ for line in result.stdout.strip().split('\n'):
238
+ if line and '\t' in line:
239
+ parts = line.split('\t', 1)
240
+ if len(parts) == 2:
241
+ files.append({
242
+ 'status': parts[0],
243
+ 'filename': parts[1]
244
+ })
245
+
246
+ return files
247
+
248
+ def _process_file(
249
+ self,
250
+ repo_path: Path,
251
+ file_path: Path,
252
+ include_history: bool,
253
+ include_blame: bool,
254
+ results: Dict[str, Any]
255
+ ):
256
+ """Process a single file."""
257
+ relative_path = file_path.relative_to(repo_path)
258
+
259
+ # Read current content
260
+ try:
261
+ content = file_path.read_text(encoding='utf-8')
262
+ except UnicodeDecodeError:
263
+ content = file_path.read_text(encoding='latin-1')
264
+
265
+ # Get file metadata
266
+ metadata = {
267
+ 'repository': str(repo_path),
268
+ 'relative_path': str(relative_path),
269
+ 'file_type': file_path.suffix,
270
+ 'size': file_path.stat().st_size,
271
+ }
272
+
273
+ # Add git history metadata if requested
274
+ if include_history:
275
+ history = self._get_file_history(repo_path, relative_path)
276
+ metadata['commit_count'] = len(history)
277
+ if history:
278
+ metadata['first_commit'] = history[-1]['hash']
279
+ metadata['last_commit'] = history[0]['hash']
280
+ metadata['last_modified'] = datetime.fromtimestamp(
281
+ history[0]['timestamp']
282
+ ).isoformat()
283
+
284
+ # Add blame information if requested
285
+ if include_blame:
286
+ blame_data = self._get_file_blame(repo_path, relative_path)
287
+ metadata['unique_authors'] = len(set(
288
+ b['author'] for b in blame_data.values()
289
+ ))
290
+
291
+ # Index the file content
292
+ doc_ids = self.vector_store.add_file(
293
+ str(file_path),
294
+ chunk_size=1000,
295
+ chunk_overlap=200,
296
+ metadata=metadata
297
+ )
298
+ results["files_indexed"] += 1
299
+
300
+ # Perform AST analysis for supported languages
301
+ if file_path.suffix in ['.py', '.js', '.ts', '.java', '.cpp', '.c']:
302
+ try:
303
+ file_ast = self.ast_analyzer.analyze_file(str(file_path))
304
+ if file_ast:
305
+ # Store complete AST
306
+ self.vector_store._store_file_ast(file_ast)
307
+
308
+ # Store individual symbols
309
+ self.vector_store._store_symbols(file_ast.symbols)
310
+ results["symbols_extracted"] += len(file_ast.symbols)
311
+
312
+ # Store cross-references
313
+ self.vector_store._store_references(file_ast)
314
+ except Exception as e:
315
+ logger.warning(f"AST analysis failed for {file_path}: {e}")
316
+
317
+ def _get_file_history(
318
+ self,
319
+ repo_path: Path,
320
+ file_path: Path
321
+ ) -> List[Dict[str, Any]]:
322
+ """Get commit history for a specific file."""
323
+ result = subprocess.run(
324
+ ["git", "log", "--follow", "--pretty=format:%H|%at|%an|%s", "--", str(file_path)],
325
+ cwd=repo_path,
326
+ capture_output=True,
327
+ text=True
328
+ )
329
+
330
+ if result.returncode != 0:
331
+ return []
332
+
333
+ history = []
334
+ for line in result.stdout.strip().split('\n'):
335
+ if line:
336
+ parts = line.split('|', 3)
337
+ if len(parts) >= 4:
338
+ history.append({
339
+ 'hash': parts[0],
340
+ 'timestamp': int(parts[1]),
341
+ 'author': parts[2],
342
+ 'message': parts[3]
343
+ })
344
+
345
+ return history
346
+
347
+ def _get_file_blame(
348
+ self,
349
+ repo_path: Path,
350
+ file_path: Path
351
+ ) -> Dict[int, Dict[str, Any]]:
352
+ """Get blame information for a file."""
353
+ result = subprocess.run(
354
+ ["git", "blame", "--line-porcelain", "--", str(file_path)],
355
+ cwd=repo_path,
356
+ capture_output=True,
357
+ text=True
358
+ )
359
+
360
+ if result.returncode != 0:
361
+ return {}
362
+
363
+ blame_data = {}
364
+ current_commit = None
365
+ current_line = None
366
+ author = None
367
+ timestamp = None
368
+
369
+ for line in result.stdout.strip().split('\n'):
370
+ if line and not line.startswith('\t'):
371
+ parts = line.split(' ')
372
+ if len(parts) >= 3 and len(parts[0]) == 40: # SHA-1 hash
373
+ current_commit = parts[0]
374
+ current_line = int(parts[2])
375
+ elif line.startswith('author '):
376
+ author = line[7:]
377
+ elif line.startswith('author-time '):
378
+ timestamp = int(line[12:])
379
+
380
+ # We have all the data for this line
381
+ if current_line and author:
382
+ blame_data[current_line] = {
383
+ 'commit': current_commit,
384
+ 'author': author,
385
+ 'timestamp': timestamp
386
+ }
387
+
388
+ return blame_data
389
+
390
+ def _index_commit(
391
+ self,
392
+ commit: GitCommit,
393
+ include_diffs: bool = True
394
+ ):
395
+ """Index a single commit."""
396
+ # Create commit document
397
+ commit_doc = f"""Git Commit: {commit.hash}
398
+ Author: {commit.author} <{commit.author_email}>
399
+ Date: {datetime.fromtimestamp(commit.timestamp).isoformat()}
400
+ Message: {commit.message}
401
+
402
+ Files changed: {len(commit.files)}
403
+ """
404
+
405
+ for file_info in commit.files:
406
+ commit_doc += f"\n{file_info['status']}\t{file_info['filename']}"
407
+
408
+ # Index commit
409
+ metadata = {
410
+ 'type': 'git_commit',
411
+ 'commit_hash': commit.hash,
412
+ 'author': commit.author,
413
+ 'timestamp': commit.timestamp,
414
+ 'file_count': len(commit.files)
415
+ }
416
+
417
+ self.vector_store.add_document(commit_doc, metadata)
418
+
419
+ # Index diffs if requested
420
+ if include_diffs:
421
+ for file_info in commit.files:
422
+ self._index_commit_diff(commit, file_info['filename'])
423
+
424
+ def _index_commit_diff(self, commit: GitCommit, filename: str):
425
+ """Index the diff for a specific file in a commit."""
426
+ # This is a simplified version - in practice you'd want to
427
+ # parse the actual diff and store meaningful chunks
428
+ metadata = {
429
+ 'type': 'git_diff',
430
+ 'commit_hash': commit.hash,
431
+ 'filename': filename,
432
+ 'author': commit.author,
433
+ 'timestamp': commit.timestamp
434
+ }
435
+
436
+ # Create a document representing this change
437
+ diff_doc = f"""File: {filename}
438
+ Commit: {commit.hash}
439
+ Author: {commit.author}
440
+ Message: {commit.message}
441
+ """
442
+
443
+ self.vector_store.add_document(diff_doc, metadata)
444
+
445
+ def _index_repository_metadata(
446
+ self,
447
+ repo_path: Path,
448
+ results: Dict[str, Any]
449
+ ):
450
+ """Index overall repository metadata."""
451
+ # Get repository info
452
+ remote_result = subprocess.run(
453
+ ["git", "remote", "get-url", "origin"],
454
+ cwd=repo_path,
455
+ capture_output=True,
456
+ text=True
457
+ )
458
+
459
+ remote_url = remote_result.stdout.strip() if remote_result.returncode == 0 else None
460
+
461
+ # Create repository summary document
462
+ repo_doc = f"""Repository: {repo_path.name}
463
+ Path: {repo_path}
464
+ Remote: {remote_url or 'No remote'}
465
+ Current Commit: {results.get('current_commit', 'Unknown')}
466
+
467
+ Statistics:
468
+ - Files indexed: {results['files_indexed']}
469
+ - Commits processed: {results['commits_processed']}
470
+ - Symbols extracted: {results['symbols_extracted']}
471
+ - Diffs indexed: {results['diffs_indexed']}
472
+ """
473
+
474
+ metadata = {
475
+ 'type': 'repository',
476
+ 'name': repo_path.name,
477
+ 'path': str(repo_path),
478
+ 'remote_url': remote_url,
479
+ **results
480
+ }
481
+
482
+ self.vector_store.add_document(repo_doc, metadata)