hanzo-mcp 0.7.6__py3-none-any.whl → 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hanzo-mcp might be problematic. Click here for more details.
- hanzo_mcp/__init__.py +7 -1
- hanzo_mcp/__main__.py +1 -1
- hanzo_mcp/analytics/__init__.py +2 -2
- hanzo_mcp/analytics/posthog_analytics.py +76 -82
- hanzo_mcp/cli.py +31 -36
- hanzo_mcp/cli_enhanced.py +94 -72
- hanzo_mcp/cli_plugin.py +27 -17
- hanzo_mcp/config/__init__.py +2 -2
- hanzo_mcp/config/settings.py +112 -88
- hanzo_mcp/config/tool_config.py +32 -34
- hanzo_mcp/dev_server.py +66 -67
- hanzo_mcp/prompts/__init__.py +94 -12
- hanzo_mcp/prompts/enhanced_prompts.py +809 -0
- hanzo_mcp/prompts/example_custom_prompt.py +6 -5
- hanzo_mcp/prompts/project_todo_reminder.py +0 -1
- hanzo_mcp/prompts/tool_explorer.py +10 -7
- hanzo_mcp/server.py +17 -21
- hanzo_mcp/server_enhanced.py +15 -22
- hanzo_mcp/tools/__init__.py +56 -28
- hanzo_mcp/tools/agent/__init__.py +16 -19
- hanzo_mcp/tools/agent/agent.py +82 -65
- hanzo_mcp/tools/agent/agent_tool.py +152 -122
- hanzo_mcp/tools/agent/agent_tool_v1_deprecated.py +66 -62
- hanzo_mcp/tools/agent/clarification_protocol.py +55 -50
- hanzo_mcp/tools/agent/clarification_tool.py +11 -10
- hanzo_mcp/tools/agent/claude_cli_tool.py +21 -20
- hanzo_mcp/tools/agent/claude_desktop_auth.py +130 -144
- hanzo_mcp/tools/agent/cli_agent_base.py +59 -53
- hanzo_mcp/tools/agent/code_auth.py +102 -107
- hanzo_mcp/tools/agent/code_auth_tool.py +28 -27
- hanzo_mcp/tools/agent/codex_cli_tool.py +20 -19
- hanzo_mcp/tools/agent/critic_tool.py +86 -73
- hanzo_mcp/tools/agent/gemini_cli_tool.py +21 -20
- hanzo_mcp/tools/agent/grok_cli_tool.py +21 -20
- hanzo_mcp/tools/agent/iching_tool.py +404 -139
- hanzo_mcp/tools/agent/network_tool.py +89 -73
- hanzo_mcp/tools/agent/prompt.py +2 -1
- hanzo_mcp/tools/agent/review_tool.py +101 -98
- hanzo_mcp/tools/agent/swarm_alias.py +87 -0
- hanzo_mcp/tools/agent/swarm_tool.py +246 -161
- hanzo_mcp/tools/agent/swarm_tool_v1_deprecated.py +134 -92
- hanzo_mcp/tools/agent/tool_adapter.py +21 -11
- hanzo_mcp/tools/common/__init__.py +1 -1
- hanzo_mcp/tools/common/base.py +3 -5
- hanzo_mcp/tools/common/batch_tool.py +46 -39
- hanzo_mcp/tools/common/config_tool.py +120 -84
- hanzo_mcp/tools/common/context.py +1 -5
- hanzo_mcp/tools/common/context_fix.py +5 -3
- hanzo_mcp/tools/common/critic_tool.py +4 -8
- hanzo_mcp/tools/common/decorators.py +58 -56
- hanzo_mcp/tools/common/enhanced_base.py +29 -32
- hanzo_mcp/tools/common/fastmcp_pagination.py +91 -94
- hanzo_mcp/tools/common/forgiving_edit.py +91 -87
- hanzo_mcp/tools/common/mode.py +15 -17
- hanzo_mcp/tools/common/mode_loader.py +27 -24
- hanzo_mcp/tools/common/paginated_base.py +61 -53
- hanzo_mcp/tools/common/paginated_response.py +72 -79
- hanzo_mcp/tools/common/pagination.py +50 -53
- hanzo_mcp/tools/common/permissions.py +4 -4
- hanzo_mcp/tools/common/personality.py +186 -138
- hanzo_mcp/tools/common/plugin_loader.py +54 -54
- hanzo_mcp/tools/common/stats.py +65 -47
- hanzo_mcp/tools/common/test_helpers.py +31 -0
- hanzo_mcp/tools/common/thinking_tool.py +4 -8
- hanzo_mcp/tools/common/tool_disable.py +17 -12
- hanzo_mcp/tools/common/tool_enable.py +13 -14
- hanzo_mcp/tools/common/tool_list.py +36 -28
- hanzo_mcp/tools/common/truncate.py +23 -23
- hanzo_mcp/tools/config/__init__.py +4 -4
- hanzo_mcp/tools/config/config_tool.py +42 -29
- hanzo_mcp/tools/config/index_config.py +37 -34
- hanzo_mcp/tools/config/mode_tool.py +175 -55
- hanzo_mcp/tools/database/__init__.py +15 -12
- hanzo_mcp/tools/database/database_manager.py +77 -75
- hanzo_mcp/tools/database/graph.py +137 -91
- hanzo_mcp/tools/database/graph_add.py +30 -18
- hanzo_mcp/tools/database/graph_query.py +178 -102
- hanzo_mcp/tools/database/graph_remove.py +33 -28
- hanzo_mcp/tools/database/graph_search.py +97 -75
- hanzo_mcp/tools/database/graph_stats.py +91 -59
- hanzo_mcp/tools/database/sql.py +107 -79
- hanzo_mcp/tools/database/sql_query.py +30 -24
- hanzo_mcp/tools/database/sql_search.py +29 -25
- hanzo_mcp/tools/database/sql_stats.py +47 -35
- hanzo_mcp/tools/editor/neovim_command.py +25 -28
- hanzo_mcp/tools/editor/neovim_edit.py +21 -23
- hanzo_mcp/tools/editor/neovim_session.py +60 -54
- hanzo_mcp/tools/filesystem/__init__.py +31 -30
- hanzo_mcp/tools/filesystem/ast_multi_edit.py +329 -249
- hanzo_mcp/tools/filesystem/ast_tool.py +4 -4
- hanzo_mcp/tools/filesystem/base.py +1 -1
- hanzo_mcp/tools/filesystem/batch_search.py +316 -224
- hanzo_mcp/tools/filesystem/content_replace.py +4 -4
- hanzo_mcp/tools/filesystem/diff.py +71 -59
- hanzo_mcp/tools/filesystem/directory_tree.py +7 -7
- hanzo_mcp/tools/filesystem/directory_tree_paginated.py +49 -37
- hanzo_mcp/tools/filesystem/edit.py +4 -4
- hanzo_mcp/tools/filesystem/find.py +173 -80
- hanzo_mcp/tools/filesystem/find_files.py +73 -52
- hanzo_mcp/tools/filesystem/git_search.py +157 -104
- hanzo_mcp/tools/filesystem/grep.py +8 -8
- hanzo_mcp/tools/filesystem/multi_edit.py +4 -8
- hanzo_mcp/tools/filesystem/read.py +12 -10
- hanzo_mcp/tools/filesystem/rules_tool.py +59 -43
- hanzo_mcp/tools/filesystem/search_tool.py +263 -207
- hanzo_mcp/tools/filesystem/symbols_tool.py +94 -54
- hanzo_mcp/tools/filesystem/tree.py +35 -33
- hanzo_mcp/tools/filesystem/unix_aliases.py +13 -18
- hanzo_mcp/tools/filesystem/watch.py +37 -36
- hanzo_mcp/tools/filesystem/write.py +4 -8
- hanzo_mcp/tools/jupyter/__init__.py +4 -4
- hanzo_mcp/tools/jupyter/base.py +4 -5
- hanzo_mcp/tools/jupyter/jupyter.py +67 -47
- hanzo_mcp/tools/jupyter/notebook_edit.py +4 -4
- hanzo_mcp/tools/jupyter/notebook_read.py +4 -7
- hanzo_mcp/tools/llm/__init__.py +5 -7
- hanzo_mcp/tools/llm/consensus_tool.py +72 -52
- hanzo_mcp/tools/llm/llm_manage.py +101 -60
- hanzo_mcp/tools/llm/llm_tool.py +226 -166
- hanzo_mcp/tools/llm/provider_tools.py +25 -26
- hanzo_mcp/tools/lsp/__init__.py +1 -1
- hanzo_mcp/tools/lsp/lsp_tool.py +228 -143
- hanzo_mcp/tools/mcp/__init__.py +2 -3
- hanzo_mcp/tools/mcp/mcp_add.py +27 -25
- hanzo_mcp/tools/mcp/mcp_remove.py +7 -8
- hanzo_mcp/tools/mcp/mcp_stats.py +23 -22
- hanzo_mcp/tools/mcp/mcp_tool.py +129 -98
- hanzo_mcp/tools/memory/__init__.py +39 -21
- hanzo_mcp/tools/memory/knowledge_tools.py +124 -99
- hanzo_mcp/tools/memory/memory_tools.py +90 -108
- hanzo_mcp/tools/search/__init__.py +7 -2
- hanzo_mcp/tools/search/find_tool.py +297 -212
- hanzo_mcp/tools/search/unified_search.py +366 -314
- hanzo_mcp/tools/shell/__init__.py +8 -7
- hanzo_mcp/tools/shell/auto_background.py +56 -49
- hanzo_mcp/tools/shell/base.py +1 -1
- hanzo_mcp/tools/shell/base_process.py +75 -75
- hanzo_mcp/tools/shell/bash_session.py +2 -2
- hanzo_mcp/tools/shell/bash_session_executor.py +4 -4
- hanzo_mcp/tools/shell/bash_tool.py +24 -31
- hanzo_mcp/tools/shell/command_executor.py +12 -12
- hanzo_mcp/tools/shell/logs.py +43 -33
- hanzo_mcp/tools/shell/npx.py +13 -13
- hanzo_mcp/tools/shell/npx_background.py +24 -21
- hanzo_mcp/tools/shell/npx_tool.py +18 -22
- hanzo_mcp/tools/shell/open.py +19 -21
- hanzo_mcp/tools/shell/pkill.py +31 -26
- hanzo_mcp/tools/shell/process_tool.py +32 -32
- hanzo_mcp/tools/shell/processes.py +57 -58
- hanzo_mcp/tools/shell/run_background.py +24 -25
- hanzo_mcp/tools/shell/run_command.py +5 -5
- hanzo_mcp/tools/shell/run_command_windows.py +5 -5
- hanzo_mcp/tools/shell/session_storage.py +3 -3
- hanzo_mcp/tools/shell/streaming_command.py +141 -126
- hanzo_mcp/tools/shell/uvx.py +24 -25
- hanzo_mcp/tools/shell/uvx_background.py +35 -33
- hanzo_mcp/tools/shell/uvx_tool.py +18 -22
- hanzo_mcp/tools/todo/__init__.py +6 -2
- hanzo_mcp/tools/todo/todo.py +50 -37
- hanzo_mcp/tools/todo/todo_read.py +5 -8
- hanzo_mcp/tools/todo/todo_write.py +5 -7
- hanzo_mcp/tools/vector/__init__.py +40 -28
- hanzo_mcp/tools/vector/ast_analyzer.py +176 -143
- hanzo_mcp/tools/vector/git_ingester.py +170 -179
- hanzo_mcp/tools/vector/index_tool.py +96 -44
- hanzo_mcp/tools/vector/infinity_store.py +283 -228
- hanzo_mcp/tools/vector/mock_infinity.py +39 -40
- hanzo_mcp/tools/vector/project_manager.py +88 -78
- hanzo_mcp/tools/vector/vector.py +59 -42
- hanzo_mcp/tools/vector/vector_index.py +30 -27
- hanzo_mcp/tools/vector/vector_search.py +64 -45
- hanzo_mcp/types.py +6 -4
- {hanzo_mcp-0.7.6.dist-info → hanzo_mcp-0.8.0.dist-info}/METADATA +1 -1
- hanzo_mcp-0.8.0.dist-info/RECORD +185 -0
- hanzo_mcp-0.7.6.dist-info/RECORD +0 -182
- {hanzo_mcp-0.7.6.dist-info → hanzo_mcp-0.8.0.dist-info}/WHEEL +0 -0
- {hanzo_mcp-0.7.6.dist-info → hanzo_mcp-0.8.0.dist-info}/entry_points.txt +0 -0
- {hanzo_mcp-0.7.6.dist-info → hanzo_mcp-0.8.0.dist-info}/top_level.txt +0 -0
|
@@ -2,23 +2,21 @@
|
|
|
2
2
|
|
|
3
3
|
This module provides functionality to ingest entire git repositories including:
|
|
4
4
|
- Full git history and commit metadata
|
|
5
|
-
- File contents at different points in time
|
|
5
|
+
- File contents at different points in time
|
|
6
6
|
- AST analysis via tree-sitter
|
|
7
7
|
- Symbol extraction and cross-references
|
|
8
8
|
- Blame information for line-level attribution
|
|
9
9
|
"""
|
|
10
10
|
|
|
11
|
+
import logging
|
|
11
12
|
import subprocess
|
|
12
|
-
import
|
|
13
|
-
import hashlib
|
|
13
|
+
from typing import Any, Dict, List, Optional
|
|
14
14
|
from pathlib import Path
|
|
15
|
-
from typing import Dict, List, Optional, Tuple, Any
|
|
16
15
|
from datetime import datetime
|
|
17
16
|
from dataclasses import dataclass
|
|
18
|
-
import logging
|
|
19
17
|
|
|
20
|
-
from .infinity_store import InfinityVectorStore
|
|
21
18
|
from .ast_analyzer import ASTAnalyzer
|
|
19
|
+
from .infinity_store import InfinityVectorStore
|
|
22
20
|
|
|
23
21
|
logger = logging.getLogger(__name__)
|
|
24
22
|
|
|
@@ -26,6 +24,7 @@ logger = logging.getLogger(__name__)
|
|
|
26
24
|
@dataclass
|
|
27
25
|
class GitCommit:
|
|
28
26
|
"""Represents a git commit."""
|
|
27
|
+
|
|
29
28
|
hash: str
|
|
30
29
|
author: str
|
|
31
30
|
author_email: str
|
|
@@ -38,6 +37,7 @@ class GitCommit:
|
|
|
38
37
|
@dataclass
|
|
39
38
|
class GitFileHistory:
|
|
40
39
|
"""History of a single file."""
|
|
40
|
+
|
|
41
41
|
file_path: str
|
|
42
42
|
commits: List[GitCommit]
|
|
43
43
|
current_content: Optional[str]
|
|
@@ -46,17 +46,17 @@ class GitFileHistory:
|
|
|
46
46
|
|
|
47
47
|
class GitIngester:
|
|
48
48
|
"""Ingests git repositories into vector store."""
|
|
49
|
-
|
|
49
|
+
|
|
50
50
|
def __init__(self, vector_store: InfinityVectorStore):
|
|
51
51
|
"""Initialize the git ingester.
|
|
52
|
-
|
|
52
|
+
|
|
53
53
|
Args:
|
|
54
54
|
vector_store: The vector store to ingest into
|
|
55
55
|
"""
|
|
56
56
|
self.vector_store = vector_store
|
|
57
57
|
self.ast_analyzer = ASTAnalyzer()
|
|
58
58
|
self._commit_cache: Dict[str, GitCommit] = {}
|
|
59
|
-
|
|
59
|
+
|
|
60
60
|
def ingest_repository(
|
|
61
61
|
self,
|
|
62
62
|
repo_path: str,
|
|
@@ -64,10 +64,10 @@ class GitIngester:
|
|
|
64
64
|
include_history: bool = True,
|
|
65
65
|
include_diffs: bool = True,
|
|
66
66
|
include_blame: bool = True,
|
|
67
|
-
file_patterns: Optional[List[str]] = None
|
|
67
|
+
file_patterns: Optional[List[str]] = None,
|
|
68
68
|
) -> Dict[str, Any]:
|
|
69
69
|
"""Ingest an entire git repository.
|
|
70
|
-
|
|
70
|
+
|
|
71
71
|
Args:
|
|
72
72
|
repo_path: Path to the git repository
|
|
73
73
|
branch: Branch to ingest (default: HEAD)
|
|
@@ -75,16 +75,16 @@ class GitIngester:
|
|
|
75
75
|
include_diffs: Whether to include diff information
|
|
76
76
|
include_blame: Whether to include blame information
|
|
77
77
|
file_patterns: List of file patterns to include (e.g., ["*.py", "*.js"])
|
|
78
|
-
|
|
78
|
+
|
|
79
79
|
Returns:
|
|
80
80
|
Summary of ingestion results
|
|
81
81
|
"""
|
|
82
82
|
repo_path = Path(repo_path)
|
|
83
83
|
if not (repo_path / ".git").exists():
|
|
84
84
|
raise ValueError(f"Not a git repository: {repo_path}")
|
|
85
|
-
|
|
85
|
+
|
|
86
86
|
logger.info(f"Starting ingestion of repository: {repo_path}")
|
|
87
|
-
|
|
87
|
+
|
|
88
88
|
results = {
|
|
89
89
|
"repository": str(repo_path),
|
|
90
90
|
"branch": branch,
|
|
@@ -94,18 +94,18 @@ class GitIngester:
|
|
|
94
94
|
"symbols_extracted": 0,
|
|
95
95
|
"diffs_indexed": 0,
|
|
96
96
|
"blame_entries": 0,
|
|
97
|
-
"errors": []
|
|
97
|
+
"errors": [],
|
|
98
98
|
}
|
|
99
|
-
|
|
99
|
+
|
|
100
100
|
try:
|
|
101
101
|
# Get current branch/commit
|
|
102
102
|
current_commit = self._get_current_commit(repo_path)
|
|
103
103
|
results["current_commit"] = current_commit
|
|
104
|
-
|
|
104
|
+
|
|
105
105
|
# Get list of files to process
|
|
106
106
|
files = self._get_repository_files(repo_path, file_patterns)
|
|
107
107
|
logger.info(f"Found {len(files)} files to process")
|
|
108
|
-
|
|
108
|
+
|
|
109
109
|
# Process each file
|
|
110
110
|
for file_path in files:
|
|
111
111
|
try:
|
|
@@ -114,34 +114,34 @@ class GitIngester:
|
|
|
114
114
|
file_path,
|
|
115
115
|
include_history=include_history,
|
|
116
116
|
include_blame=include_blame,
|
|
117
|
-
results=results
|
|
117
|
+
results=results,
|
|
118
118
|
)
|
|
119
119
|
except Exception as e:
|
|
120
120
|
logger.error(f"Error processing {file_path}: {e}")
|
|
121
121
|
results["errors"].append(f"{file_path}: {str(e)}")
|
|
122
|
-
|
|
122
|
+
|
|
123
123
|
# Process commit history if requested
|
|
124
124
|
if include_history:
|
|
125
125
|
commits = self._get_commit_history(repo_path, branch)
|
|
126
126
|
results["commits_processed"] = len(commits)
|
|
127
|
-
|
|
127
|
+
|
|
128
128
|
for commit in commits:
|
|
129
129
|
self._index_commit(commit, include_diffs=include_diffs)
|
|
130
130
|
results["commits_indexed"] = results.get("commits_indexed", 0) + 1
|
|
131
|
-
|
|
131
|
+
|
|
132
132
|
if include_diffs:
|
|
133
133
|
results["diffs_indexed"] += len(commit.files)
|
|
134
|
-
|
|
134
|
+
|
|
135
135
|
# Create repository metadata document
|
|
136
136
|
self._index_repository_metadata(repo_path, results)
|
|
137
|
-
|
|
137
|
+
|
|
138
138
|
except Exception as e:
|
|
139
139
|
logger.error(f"Repository ingestion failed: {e}")
|
|
140
140
|
results["errors"].append(f"Fatal error: {str(e)}")
|
|
141
|
-
|
|
141
|
+
|
|
142
142
|
logger.info(f"Ingestion complete: {results}")
|
|
143
143
|
return results
|
|
144
|
-
|
|
144
|
+
|
|
145
145
|
def _get_current_commit(self, repo_path: Path) -> str:
|
|
146
146
|
"""Get the current commit hash."""
|
|
147
147
|
result = subprocess.run(
|
|
@@ -149,29 +149,23 @@ class GitIngester:
|
|
|
149
149
|
cwd=repo_path,
|
|
150
150
|
capture_output=True,
|
|
151
151
|
text=True,
|
|
152
|
-
check=True
|
|
152
|
+
check=True,
|
|
153
153
|
)
|
|
154
154
|
return result.stdout.strip()
|
|
155
|
-
|
|
155
|
+
|
|
156
156
|
def _get_repository_files(
|
|
157
|
-
self,
|
|
158
|
-
repo_path: Path,
|
|
159
|
-
patterns: Optional[List[str]] = None
|
|
157
|
+
self, repo_path: Path, patterns: Optional[List[str]] = None
|
|
160
158
|
) -> List[Path]:
|
|
161
159
|
"""Get list of files in repository matching patterns."""
|
|
162
160
|
# Use git ls-files to respect .gitignore
|
|
163
161
|
cmd = ["git", "ls-files"]
|
|
164
|
-
|
|
162
|
+
|
|
165
163
|
result = subprocess.run(
|
|
166
|
-
cmd,
|
|
167
|
-
cwd=repo_path,
|
|
168
|
-
capture_output=True,
|
|
169
|
-
text=True,
|
|
170
|
-
check=True
|
|
164
|
+
cmd, cwd=repo_path, capture_output=True, text=True, check=True
|
|
171
165
|
)
|
|
172
|
-
|
|
166
|
+
|
|
173
167
|
files = []
|
|
174
|
-
for line in result.stdout.strip().split(
|
|
168
|
+
for line in result.stdout.strip().split("\n"):
|
|
175
169
|
if line:
|
|
176
170
|
file_path = repo_path / line
|
|
177
171
|
if file_path.exists():
|
|
@@ -181,37 +175,39 @@ class GitIngester:
|
|
|
181
175
|
files.append(file_path)
|
|
182
176
|
else:
|
|
183
177
|
files.append(file_path)
|
|
184
|
-
|
|
178
|
+
|
|
185
179
|
return files
|
|
186
|
-
|
|
180
|
+
|
|
187
181
|
def _get_commit_history(
|
|
188
|
-
self,
|
|
189
|
-
repo_path: Path,
|
|
190
|
-
branch: str = "HEAD",
|
|
191
|
-
max_commits: int = 1000
|
|
182
|
+
self, repo_path: Path, branch: str = "HEAD", max_commits: int = 1000
|
|
192
183
|
) -> List[GitCommit]:
|
|
193
184
|
"""Get commit history for the repository."""
|
|
194
185
|
# Get commit list with basic info
|
|
195
186
|
result = subprocess.run(
|
|
196
|
-
[
|
|
197
|
-
|
|
187
|
+
[
|
|
188
|
+
"git",
|
|
189
|
+
"log",
|
|
190
|
+
branch,
|
|
191
|
+
f"--max-count={max_commits}",
|
|
192
|
+
"--pretty=format:%H|%P|%an|%ae|%at|%s",
|
|
193
|
+
],
|
|
198
194
|
cwd=repo_path,
|
|
199
195
|
capture_output=True,
|
|
200
196
|
text=True,
|
|
201
|
-
check=True
|
|
197
|
+
check=True,
|
|
202
198
|
)
|
|
203
|
-
|
|
199
|
+
|
|
204
200
|
commits = []
|
|
205
|
-
for line in result.stdout.strip().split(
|
|
201
|
+
for line in result.stdout.strip().split("\n"):
|
|
206
202
|
if line:
|
|
207
|
-
parts = line.split(
|
|
203
|
+
parts = line.split("|", 5)
|
|
208
204
|
if len(parts) >= 6:
|
|
209
205
|
commit_hash = parts[0]
|
|
210
206
|
parent_hashes = parts[1].split() if parts[1] else []
|
|
211
|
-
|
|
207
|
+
|
|
212
208
|
# Get file changes for this commit
|
|
213
209
|
files = self._get_commit_files(repo_path, commit_hash)
|
|
214
|
-
|
|
210
|
+
|
|
215
211
|
commit = GitCommit(
|
|
216
212
|
hash=commit_hash,
|
|
217
213
|
parent_hashes=parent_hashes,
|
|
@@ -219,182 +215,179 @@ class GitIngester:
|
|
|
219
215
|
author_email=parts[3],
|
|
220
216
|
timestamp=int(parts[4]),
|
|
221
217
|
message=parts[5],
|
|
222
|
-
files=files
|
|
218
|
+
files=files,
|
|
223
219
|
)
|
|
224
220
|
commits.append(commit)
|
|
225
221
|
self._commit_cache[commit_hash] = commit
|
|
226
|
-
|
|
222
|
+
|
|
227
223
|
return commits
|
|
228
|
-
|
|
229
|
-
def _get_commit_files(
|
|
224
|
+
|
|
225
|
+
def _get_commit_files(
|
|
226
|
+
self, repo_path: Path, commit_hash: str
|
|
227
|
+
) -> List[Dict[str, str]]:
|
|
230
228
|
"""Get list of files changed in a commit."""
|
|
231
229
|
result = subprocess.run(
|
|
232
230
|
["git", "show", "--name-status", "--format=", commit_hash],
|
|
233
231
|
cwd=repo_path,
|
|
234
232
|
capture_output=True,
|
|
235
233
|
text=True,
|
|
236
|
-
check=True
|
|
234
|
+
check=True,
|
|
237
235
|
)
|
|
238
|
-
|
|
236
|
+
|
|
239
237
|
files = []
|
|
240
|
-
for line in result.stdout.strip().split(
|
|
241
|
-
if line and
|
|
242
|
-
parts = line.split(
|
|
238
|
+
for line in result.stdout.strip().split("\n"):
|
|
239
|
+
if line and "\t" in line:
|
|
240
|
+
parts = line.split("\t", 1)
|
|
243
241
|
if len(parts) == 2:
|
|
244
|
-
files.append({
|
|
245
|
-
|
|
246
|
-
'filename': parts[1]
|
|
247
|
-
})
|
|
248
|
-
|
|
242
|
+
files.append({"status": parts[0], "filename": parts[1]})
|
|
243
|
+
|
|
249
244
|
return files
|
|
250
|
-
|
|
245
|
+
|
|
251
246
|
def _process_file(
|
|
252
247
|
self,
|
|
253
248
|
repo_path: Path,
|
|
254
249
|
file_path: Path,
|
|
255
250
|
include_history: bool,
|
|
256
251
|
include_blame: bool,
|
|
257
|
-
results: Dict[str, Any]
|
|
252
|
+
results: Dict[str, Any],
|
|
258
253
|
):
|
|
259
254
|
"""Process a single file."""
|
|
260
255
|
relative_path = file_path.relative_to(repo_path)
|
|
261
|
-
|
|
256
|
+
|
|
262
257
|
# Read current content
|
|
263
258
|
try:
|
|
264
|
-
content = file_path.read_text(encoding=
|
|
259
|
+
content = file_path.read_text(encoding="utf-8")
|
|
265
260
|
except UnicodeDecodeError:
|
|
266
|
-
content = file_path.read_text(encoding=
|
|
267
|
-
|
|
261
|
+
content = file_path.read_text(encoding="latin-1")
|
|
262
|
+
|
|
268
263
|
# Get file metadata
|
|
269
264
|
metadata = {
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
265
|
+
"repository": str(repo_path),
|
|
266
|
+
"relative_path": str(relative_path),
|
|
267
|
+
"file_type": file_path.suffix,
|
|
268
|
+
"size": file_path.stat().st_size,
|
|
274
269
|
}
|
|
275
|
-
|
|
270
|
+
|
|
276
271
|
# Add git history metadata if requested
|
|
277
272
|
if include_history:
|
|
278
273
|
history = self._get_file_history(repo_path, relative_path)
|
|
279
|
-
metadata[
|
|
274
|
+
metadata["commit_count"] = len(history)
|
|
280
275
|
if history:
|
|
281
|
-
metadata[
|
|
282
|
-
metadata[
|
|
283
|
-
metadata[
|
|
284
|
-
history[0][
|
|
276
|
+
metadata["first_commit"] = history[-1]["hash"]
|
|
277
|
+
metadata["last_commit"] = history[0]["hash"]
|
|
278
|
+
metadata["last_modified"] = datetime.fromtimestamp(
|
|
279
|
+
history[0]["timestamp"]
|
|
285
280
|
).isoformat()
|
|
286
|
-
|
|
281
|
+
|
|
287
282
|
# Add blame information if requested
|
|
288
283
|
if include_blame:
|
|
289
284
|
blame_data = self._get_file_blame(repo_path, relative_path)
|
|
290
|
-
metadata[
|
|
291
|
-
b[
|
|
292
|
-
)
|
|
293
|
-
|
|
285
|
+
metadata["unique_authors"] = len(
|
|
286
|
+
set(b["author"] for b in blame_data.values())
|
|
287
|
+
)
|
|
288
|
+
|
|
294
289
|
# Index the file content
|
|
295
290
|
doc_ids = self.vector_store.add_file(
|
|
296
|
-
str(file_path),
|
|
297
|
-
chunk_size=1000,
|
|
298
|
-
chunk_overlap=200,
|
|
299
|
-
metadata=metadata
|
|
291
|
+
str(file_path), chunk_size=1000, chunk_overlap=200, metadata=metadata
|
|
300
292
|
)
|
|
301
293
|
results["files_indexed"] += 1
|
|
302
|
-
|
|
294
|
+
|
|
303
295
|
# Perform AST analysis for supported languages
|
|
304
|
-
if file_path.suffix in [
|
|
296
|
+
if file_path.suffix in [".py", ".js", ".ts", ".java", ".cpp", ".c"]:
|
|
305
297
|
try:
|
|
306
298
|
file_ast = self.ast_analyzer.analyze_file(str(file_path))
|
|
307
299
|
if file_ast:
|
|
308
300
|
# Store complete AST
|
|
309
301
|
self.vector_store._store_file_ast(file_ast)
|
|
310
|
-
|
|
302
|
+
|
|
311
303
|
# Store individual symbols
|
|
312
304
|
self.vector_store._store_symbols(file_ast.symbols)
|
|
313
305
|
results["symbols_extracted"] += len(file_ast.symbols)
|
|
314
|
-
|
|
306
|
+
|
|
315
307
|
# Store cross-references
|
|
316
308
|
self.vector_store._store_references(file_ast)
|
|
317
309
|
except Exception as e:
|
|
318
310
|
logger.warning(f"AST analysis failed for {file_path}: {e}")
|
|
319
|
-
|
|
311
|
+
|
|
320
312
|
def _get_file_history(
|
|
321
|
-
self,
|
|
322
|
-
repo_path: Path,
|
|
323
|
-
file_path: Path
|
|
313
|
+
self, repo_path: Path, file_path: Path
|
|
324
314
|
) -> List[Dict[str, Any]]:
|
|
325
315
|
"""Get commit history for a specific file."""
|
|
326
316
|
result = subprocess.run(
|
|
327
|
-
[
|
|
317
|
+
[
|
|
318
|
+
"git",
|
|
319
|
+
"log",
|
|
320
|
+
"--follow",
|
|
321
|
+
"--pretty=format:%H|%at|%an|%s",
|
|
322
|
+
"--",
|
|
323
|
+
str(file_path),
|
|
324
|
+
],
|
|
328
325
|
cwd=repo_path,
|
|
329
326
|
capture_output=True,
|
|
330
|
-
text=True
|
|
327
|
+
text=True,
|
|
331
328
|
)
|
|
332
|
-
|
|
329
|
+
|
|
333
330
|
if result.returncode != 0:
|
|
334
331
|
return []
|
|
335
|
-
|
|
332
|
+
|
|
336
333
|
history = []
|
|
337
|
-
for line in result.stdout.strip().split(
|
|
334
|
+
for line in result.stdout.strip().split("\n"):
|
|
338
335
|
if line:
|
|
339
|
-
parts = line.split(
|
|
336
|
+
parts = line.split("|", 3)
|
|
340
337
|
if len(parts) >= 4:
|
|
341
|
-
history.append(
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
338
|
+
history.append(
|
|
339
|
+
{
|
|
340
|
+
"hash": parts[0],
|
|
341
|
+
"timestamp": int(parts[1]),
|
|
342
|
+
"author": parts[2],
|
|
343
|
+
"message": parts[3],
|
|
344
|
+
}
|
|
345
|
+
)
|
|
346
|
+
|
|
348
347
|
return history
|
|
349
|
-
|
|
348
|
+
|
|
350
349
|
def _get_file_blame(
|
|
351
|
-
self,
|
|
352
|
-
repo_path: Path,
|
|
353
|
-
file_path: Path
|
|
350
|
+
self, repo_path: Path, file_path: Path
|
|
354
351
|
) -> Dict[int, Dict[str, Any]]:
|
|
355
352
|
"""Get blame information for a file."""
|
|
356
353
|
result = subprocess.run(
|
|
357
354
|
["git", "blame", "--line-porcelain", "--", str(file_path)],
|
|
358
355
|
cwd=repo_path,
|
|
359
356
|
capture_output=True,
|
|
360
|
-
text=True
|
|
357
|
+
text=True,
|
|
361
358
|
)
|
|
362
|
-
|
|
359
|
+
|
|
363
360
|
if result.returncode != 0:
|
|
364
361
|
return {}
|
|
365
|
-
|
|
362
|
+
|
|
366
363
|
blame_data = {}
|
|
367
364
|
current_commit = None
|
|
368
365
|
current_line = None
|
|
369
366
|
author = None
|
|
370
367
|
timestamp = None
|
|
371
|
-
|
|
372
|
-
for line in result.stdout.strip().split(
|
|
373
|
-
if line and not line.startswith(
|
|
374
|
-
parts = line.split(
|
|
368
|
+
|
|
369
|
+
for line in result.stdout.strip().split("\n"):
|
|
370
|
+
if line and not line.startswith("\t"):
|
|
371
|
+
parts = line.split(" ")
|
|
375
372
|
if len(parts) >= 3 and len(parts[0]) == 40: # SHA-1 hash
|
|
376
373
|
current_commit = parts[0]
|
|
377
374
|
current_line = int(parts[2])
|
|
378
|
-
elif line.startswith(
|
|
375
|
+
elif line.startswith("author "):
|
|
379
376
|
author = line[7:]
|
|
380
|
-
elif line.startswith(
|
|
377
|
+
elif line.startswith("author-time "):
|
|
381
378
|
timestamp = int(line[12:])
|
|
382
|
-
|
|
379
|
+
|
|
383
380
|
# We have all the data for this line
|
|
384
381
|
if current_line and author:
|
|
385
382
|
blame_data[current_line] = {
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
383
|
+
"commit": current_commit,
|
|
384
|
+
"author": author,
|
|
385
|
+
"timestamp": timestamp,
|
|
389
386
|
}
|
|
390
|
-
|
|
387
|
+
|
|
391
388
|
return blame_data
|
|
392
|
-
|
|
393
|
-
def _index_commit(
|
|
394
|
-
self,
|
|
395
|
-
commit: GitCommit,
|
|
396
|
-
include_diffs: bool = True
|
|
397
|
-
):
|
|
389
|
+
|
|
390
|
+
def _index_commit(self, commit: GitCommit, include_diffs: bool = True):
|
|
398
391
|
"""Index a single commit."""
|
|
399
392
|
# Create commit document
|
|
400
393
|
commit_doc = f"""Git Commit: {commit.hash}
|
|
@@ -404,82 +397,80 @@ Message: {commit.message}
|
|
|
404
397
|
|
|
405
398
|
Files changed: {len(commit.files)}
|
|
406
399
|
"""
|
|
407
|
-
|
|
400
|
+
|
|
408
401
|
for file_info in commit.files:
|
|
409
402
|
commit_doc += f"\n{file_info['status']}\t{file_info['filename']}"
|
|
410
|
-
|
|
403
|
+
|
|
411
404
|
# Index commit
|
|
412
405
|
metadata = {
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
406
|
+
"type": "git_commit",
|
|
407
|
+
"commit_hash": commit.hash,
|
|
408
|
+
"author": commit.author,
|
|
409
|
+
"timestamp": commit.timestamp,
|
|
410
|
+
"file_count": len(commit.files),
|
|
418
411
|
}
|
|
419
|
-
|
|
412
|
+
|
|
420
413
|
self.vector_store.add_document(commit_doc, metadata)
|
|
421
|
-
|
|
414
|
+
|
|
422
415
|
# Index diffs if requested
|
|
423
416
|
if include_diffs:
|
|
424
417
|
for file_info in commit.files:
|
|
425
|
-
self._index_commit_diff(commit, file_info[
|
|
426
|
-
|
|
418
|
+
self._index_commit_diff(commit, file_info["filename"])
|
|
419
|
+
|
|
427
420
|
def _index_commit_diff(self, commit: GitCommit, filename: str):
|
|
428
421
|
"""Index the diff for a specific file in a commit."""
|
|
429
422
|
# This is a simplified version - in practice you'd want to
|
|
430
423
|
# parse the actual diff and store meaningful chunks
|
|
431
424
|
metadata = {
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
425
|
+
"type": "git_diff",
|
|
426
|
+
"commit_hash": commit.hash,
|
|
427
|
+
"filename": filename,
|
|
428
|
+
"author": commit.author,
|
|
429
|
+
"timestamp": commit.timestamp,
|
|
437
430
|
}
|
|
438
|
-
|
|
431
|
+
|
|
439
432
|
# Create a document representing this change
|
|
440
433
|
diff_doc = f"""File: {filename}
|
|
441
434
|
Commit: {commit.hash}
|
|
442
435
|
Author: {commit.author}
|
|
443
436
|
Message: {commit.message}
|
|
444
437
|
"""
|
|
445
|
-
|
|
438
|
+
|
|
446
439
|
self.vector_store.add_document(diff_doc, metadata)
|
|
447
|
-
|
|
448
|
-
def _index_repository_metadata(
|
|
449
|
-
self,
|
|
450
|
-
repo_path: Path,
|
|
451
|
-
results: Dict[str, Any]
|
|
452
|
-
):
|
|
440
|
+
|
|
441
|
+
def _index_repository_metadata(self, repo_path: Path, results: Dict[str, Any]):
|
|
453
442
|
"""Index overall repository metadata."""
|
|
454
443
|
# Get repository info
|
|
455
444
|
remote_result = subprocess.run(
|
|
456
445
|
["git", "remote", "get-url", "origin"],
|
|
457
446
|
cwd=repo_path,
|
|
458
447
|
capture_output=True,
|
|
459
|
-
text=True
|
|
448
|
+
text=True,
|
|
460
449
|
)
|
|
461
|
-
|
|
462
|
-
remote_url =
|
|
463
|
-
|
|
450
|
+
|
|
451
|
+
remote_url = (
|
|
452
|
+
remote_result.stdout.strip() if remote_result.returncode == 0 else None
|
|
453
|
+
)
|
|
454
|
+
|
|
464
455
|
# Create repository summary document
|
|
465
456
|
repo_doc = f"""Repository: {repo_path.name}
|
|
466
457
|
Path: {repo_path}
|
|
467
|
-
Remote: {remote_url or
|
|
468
|
-
Current Commit: {results.get(
|
|
458
|
+
Remote: {remote_url or "No remote"}
|
|
459
|
+
Current Commit: {results.get("current_commit", "Unknown")}
|
|
469
460
|
|
|
470
461
|
Statistics:
|
|
471
|
-
- Files indexed: {results[
|
|
472
|
-
- Commits processed: {results[
|
|
473
|
-
- Symbols extracted: {results[
|
|
474
|
-
- Diffs indexed: {results[
|
|
462
|
+
- Files indexed: {results["files_indexed"]}
|
|
463
|
+
- Commits processed: {results["commits_processed"]}
|
|
464
|
+
- Symbols extracted: {results["symbols_extracted"]}
|
|
465
|
+
- Diffs indexed: {results["diffs_indexed"]}
|
|
475
466
|
"""
|
|
476
|
-
|
|
467
|
+
|
|
477
468
|
metadata = {
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
**results
|
|
469
|
+
"type": "repository",
|
|
470
|
+
"name": repo_path.name,
|
|
471
|
+
"path": str(repo_path),
|
|
472
|
+
"remote_url": remote_url,
|
|
473
|
+
**results,
|
|
483
474
|
}
|
|
484
|
-
|
|
485
|
-
self.vector_store.add_document(repo_doc, metadata)
|
|
475
|
+
|
|
476
|
+
self.vector_store.add_document(repo_doc, metadata)
|