hanzo-mcp 0.7.7__py3-none-any.whl → 0.8.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hanzo-mcp might be problematic. Click here for more details.

Files changed (178) hide show
  1. hanzo_mcp/__init__.py +6 -0
  2. hanzo_mcp/__main__.py +1 -1
  3. hanzo_mcp/analytics/__init__.py +2 -2
  4. hanzo_mcp/analytics/posthog_analytics.py +76 -82
  5. hanzo_mcp/cli.py +31 -36
  6. hanzo_mcp/cli_enhanced.py +94 -72
  7. hanzo_mcp/cli_plugin.py +27 -17
  8. hanzo_mcp/config/__init__.py +2 -2
  9. hanzo_mcp/config/settings.py +112 -88
  10. hanzo_mcp/config/tool_config.py +32 -34
  11. hanzo_mcp/dev_server.py +66 -67
  12. hanzo_mcp/prompts/__init__.py +94 -12
  13. hanzo_mcp/prompts/enhanced_prompts.py +809 -0
  14. hanzo_mcp/prompts/example_custom_prompt.py +6 -5
  15. hanzo_mcp/prompts/project_todo_reminder.py +0 -1
  16. hanzo_mcp/prompts/tool_explorer.py +10 -7
  17. hanzo_mcp/server.py +17 -21
  18. hanzo_mcp/server_enhanced.py +15 -22
  19. hanzo_mcp/tools/__init__.py +56 -28
  20. hanzo_mcp/tools/agent/__init__.py +16 -19
  21. hanzo_mcp/tools/agent/agent.py +82 -65
  22. hanzo_mcp/tools/agent/agent_tool.py +152 -122
  23. hanzo_mcp/tools/agent/agent_tool_v1_deprecated.py +66 -62
  24. hanzo_mcp/tools/agent/clarification_protocol.py +55 -50
  25. hanzo_mcp/tools/agent/clarification_tool.py +11 -10
  26. hanzo_mcp/tools/agent/claude_cli_tool.py +21 -20
  27. hanzo_mcp/tools/agent/claude_desktop_auth.py +130 -144
  28. hanzo_mcp/tools/agent/cli_agent_base.py +59 -53
  29. hanzo_mcp/tools/agent/code_auth.py +102 -107
  30. hanzo_mcp/tools/agent/code_auth_tool.py +28 -27
  31. hanzo_mcp/tools/agent/codex_cli_tool.py +20 -19
  32. hanzo_mcp/tools/agent/critic_tool.py +86 -73
  33. hanzo_mcp/tools/agent/gemini_cli_tool.py +21 -20
  34. hanzo_mcp/tools/agent/grok_cli_tool.py +21 -20
  35. hanzo_mcp/tools/agent/iching_tool.py +404 -139
  36. hanzo_mcp/tools/agent/network_tool.py +89 -73
  37. hanzo_mcp/tools/agent/prompt.py +2 -1
  38. hanzo_mcp/tools/agent/review_tool.py +101 -98
  39. hanzo_mcp/tools/agent/swarm_alias.py +87 -0
  40. hanzo_mcp/tools/agent/swarm_tool.py +246 -161
  41. hanzo_mcp/tools/agent/swarm_tool_v1_deprecated.py +134 -92
  42. hanzo_mcp/tools/agent/tool_adapter.py +21 -11
  43. hanzo_mcp/tools/common/__init__.py +1 -1
  44. hanzo_mcp/tools/common/base.py +3 -5
  45. hanzo_mcp/tools/common/batch_tool.py +46 -39
  46. hanzo_mcp/tools/common/config_tool.py +120 -84
  47. hanzo_mcp/tools/common/context.py +1 -5
  48. hanzo_mcp/tools/common/context_fix.py +5 -3
  49. hanzo_mcp/tools/common/critic_tool.py +4 -8
  50. hanzo_mcp/tools/common/decorators.py +58 -56
  51. hanzo_mcp/tools/common/enhanced_base.py +29 -32
  52. hanzo_mcp/tools/common/fastmcp_pagination.py +91 -94
  53. hanzo_mcp/tools/common/forgiving_edit.py +91 -87
  54. hanzo_mcp/tools/common/mode.py +15 -17
  55. hanzo_mcp/tools/common/mode_loader.py +27 -24
  56. hanzo_mcp/tools/common/paginated_base.py +61 -53
  57. hanzo_mcp/tools/common/paginated_response.py +72 -79
  58. hanzo_mcp/tools/common/pagination.py +50 -53
  59. hanzo_mcp/tools/common/permissions.py +4 -4
  60. hanzo_mcp/tools/common/personality.py +186 -138
  61. hanzo_mcp/tools/common/plugin_loader.py +54 -54
  62. hanzo_mcp/tools/common/stats.py +65 -47
  63. hanzo_mcp/tools/common/test_helpers.py +31 -0
  64. hanzo_mcp/tools/common/thinking_tool.py +4 -8
  65. hanzo_mcp/tools/common/tool_disable.py +17 -12
  66. hanzo_mcp/tools/common/tool_enable.py +13 -14
  67. hanzo_mcp/tools/common/tool_list.py +36 -28
  68. hanzo_mcp/tools/common/truncate.py +23 -23
  69. hanzo_mcp/tools/config/__init__.py +4 -4
  70. hanzo_mcp/tools/config/config_tool.py +42 -29
  71. hanzo_mcp/tools/config/index_config.py +37 -34
  72. hanzo_mcp/tools/config/mode_tool.py +175 -55
  73. hanzo_mcp/tools/database/__init__.py +15 -12
  74. hanzo_mcp/tools/database/database_manager.py +77 -75
  75. hanzo_mcp/tools/database/graph.py +137 -91
  76. hanzo_mcp/tools/database/graph_add.py +30 -18
  77. hanzo_mcp/tools/database/graph_query.py +178 -102
  78. hanzo_mcp/tools/database/graph_remove.py +33 -28
  79. hanzo_mcp/tools/database/graph_search.py +97 -75
  80. hanzo_mcp/tools/database/graph_stats.py +91 -59
  81. hanzo_mcp/tools/database/sql.py +107 -79
  82. hanzo_mcp/tools/database/sql_query.py +30 -24
  83. hanzo_mcp/tools/database/sql_search.py +29 -25
  84. hanzo_mcp/tools/database/sql_stats.py +47 -35
  85. hanzo_mcp/tools/editor/neovim_command.py +25 -28
  86. hanzo_mcp/tools/editor/neovim_edit.py +21 -23
  87. hanzo_mcp/tools/editor/neovim_session.py +60 -54
  88. hanzo_mcp/tools/filesystem/__init__.py +31 -30
  89. hanzo_mcp/tools/filesystem/ast_multi_edit.py +329 -249
  90. hanzo_mcp/tools/filesystem/ast_tool.py +4 -4
  91. hanzo_mcp/tools/filesystem/base.py +1 -1
  92. hanzo_mcp/tools/filesystem/batch_search.py +316 -224
  93. hanzo_mcp/tools/filesystem/content_replace.py +4 -4
  94. hanzo_mcp/tools/filesystem/diff.py +71 -59
  95. hanzo_mcp/tools/filesystem/directory_tree.py +7 -7
  96. hanzo_mcp/tools/filesystem/directory_tree_paginated.py +49 -37
  97. hanzo_mcp/tools/filesystem/edit.py +4 -4
  98. hanzo_mcp/tools/filesystem/find.py +173 -80
  99. hanzo_mcp/tools/filesystem/find_files.py +73 -52
  100. hanzo_mcp/tools/filesystem/git_search.py +157 -104
  101. hanzo_mcp/tools/filesystem/grep.py +8 -8
  102. hanzo_mcp/tools/filesystem/multi_edit.py +4 -8
  103. hanzo_mcp/tools/filesystem/read.py +12 -10
  104. hanzo_mcp/tools/filesystem/rules_tool.py +59 -43
  105. hanzo_mcp/tools/filesystem/search_tool.py +263 -207
  106. hanzo_mcp/tools/filesystem/symbols_tool.py +94 -54
  107. hanzo_mcp/tools/filesystem/tree.py +35 -33
  108. hanzo_mcp/tools/filesystem/unix_aliases.py +13 -18
  109. hanzo_mcp/tools/filesystem/watch.py +37 -36
  110. hanzo_mcp/tools/filesystem/write.py +4 -8
  111. hanzo_mcp/tools/jupyter/__init__.py +4 -4
  112. hanzo_mcp/tools/jupyter/base.py +4 -5
  113. hanzo_mcp/tools/jupyter/jupyter.py +67 -47
  114. hanzo_mcp/tools/jupyter/notebook_edit.py +4 -4
  115. hanzo_mcp/tools/jupyter/notebook_read.py +4 -7
  116. hanzo_mcp/tools/llm/__init__.py +5 -7
  117. hanzo_mcp/tools/llm/consensus_tool.py +72 -52
  118. hanzo_mcp/tools/llm/llm_manage.py +101 -60
  119. hanzo_mcp/tools/llm/llm_tool.py +226 -166
  120. hanzo_mcp/tools/llm/provider_tools.py +25 -26
  121. hanzo_mcp/tools/lsp/__init__.py +1 -1
  122. hanzo_mcp/tools/lsp/lsp_tool.py +228 -143
  123. hanzo_mcp/tools/mcp/__init__.py +2 -3
  124. hanzo_mcp/tools/mcp/mcp_add.py +27 -25
  125. hanzo_mcp/tools/mcp/mcp_remove.py +7 -8
  126. hanzo_mcp/tools/mcp/mcp_stats.py +23 -22
  127. hanzo_mcp/tools/mcp/mcp_tool.py +129 -98
  128. hanzo_mcp/tools/memory/__init__.py +39 -21
  129. hanzo_mcp/tools/memory/knowledge_tools.py +124 -99
  130. hanzo_mcp/tools/memory/memory_tools.py +90 -108
  131. hanzo_mcp/tools/search/__init__.py +7 -2
  132. hanzo_mcp/tools/search/find_tool.py +297 -212
  133. hanzo_mcp/tools/search/unified_search.py +366 -314
  134. hanzo_mcp/tools/shell/__init__.py +8 -7
  135. hanzo_mcp/tools/shell/auto_background.py +56 -49
  136. hanzo_mcp/tools/shell/base.py +1 -1
  137. hanzo_mcp/tools/shell/base_process.py +75 -75
  138. hanzo_mcp/tools/shell/bash_session.py +2 -2
  139. hanzo_mcp/tools/shell/bash_session_executor.py +4 -4
  140. hanzo_mcp/tools/shell/bash_tool.py +24 -31
  141. hanzo_mcp/tools/shell/command_executor.py +12 -12
  142. hanzo_mcp/tools/shell/logs.py +43 -33
  143. hanzo_mcp/tools/shell/npx.py +13 -13
  144. hanzo_mcp/tools/shell/npx_background.py +24 -21
  145. hanzo_mcp/tools/shell/npx_tool.py +18 -22
  146. hanzo_mcp/tools/shell/open.py +19 -21
  147. hanzo_mcp/tools/shell/pkill.py +31 -26
  148. hanzo_mcp/tools/shell/process_tool.py +32 -32
  149. hanzo_mcp/tools/shell/processes.py +57 -58
  150. hanzo_mcp/tools/shell/run_background.py +24 -25
  151. hanzo_mcp/tools/shell/run_command.py +5 -5
  152. hanzo_mcp/tools/shell/run_command_windows.py +5 -5
  153. hanzo_mcp/tools/shell/session_storage.py +3 -3
  154. hanzo_mcp/tools/shell/streaming_command.py +141 -126
  155. hanzo_mcp/tools/shell/uvx.py +24 -25
  156. hanzo_mcp/tools/shell/uvx_background.py +35 -33
  157. hanzo_mcp/tools/shell/uvx_tool.py +18 -22
  158. hanzo_mcp/tools/todo/__init__.py +6 -2
  159. hanzo_mcp/tools/todo/todo.py +50 -37
  160. hanzo_mcp/tools/todo/todo_read.py +5 -8
  161. hanzo_mcp/tools/todo/todo_write.py +5 -7
  162. hanzo_mcp/tools/vector/__init__.py +40 -28
  163. hanzo_mcp/tools/vector/ast_analyzer.py +176 -143
  164. hanzo_mcp/tools/vector/git_ingester.py +170 -179
  165. hanzo_mcp/tools/vector/index_tool.py +96 -44
  166. hanzo_mcp/tools/vector/infinity_store.py +283 -228
  167. hanzo_mcp/tools/vector/mock_infinity.py +39 -40
  168. hanzo_mcp/tools/vector/project_manager.py +88 -78
  169. hanzo_mcp/tools/vector/vector.py +59 -42
  170. hanzo_mcp/tools/vector/vector_index.py +30 -27
  171. hanzo_mcp/tools/vector/vector_search.py +64 -45
  172. hanzo_mcp/types.py +6 -4
  173. {hanzo_mcp-0.7.7.dist-info → hanzo_mcp-0.8.1.dist-info}/METADATA +1 -1
  174. hanzo_mcp-0.8.1.dist-info/RECORD +185 -0
  175. hanzo_mcp-0.7.7.dist-info/RECORD +0 -182
  176. {hanzo_mcp-0.7.7.dist-info → hanzo_mcp-0.8.1.dist-info}/WHEEL +0 -0
  177. {hanzo_mcp-0.7.7.dist-info → hanzo_mcp-0.8.1.dist-info}/entry_points.txt +0 -0
  178. {hanzo_mcp-0.7.7.dist-info → hanzo_mcp-0.8.1.dist-info}/top_level.txt +0 -0
@@ -2,23 +2,21 @@
2
2
 
3
3
  This module provides functionality to ingest entire git repositories including:
4
4
  - Full git history and commit metadata
5
- - File contents at different points in time
5
+ - File contents at different points in time
6
6
  - AST analysis via tree-sitter
7
7
  - Symbol extraction and cross-references
8
8
  - Blame information for line-level attribution
9
9
  """
10
10
 
11
+ import logging
11
12
  import subprocess
12
- import json
13
- import hashlib
13
+ from typing import Any, Dict, List, Optional
14
14
  from pathlib import Path
15
- from typing import Dict, List, Optional, Tuple, Any
16
15
  from datetime import datetime
17
16
  from dataclasses import dataclass
18
- import logging
19
17
 
20
- from .infinity_store import InfinityVectorStore
21
18
  from .ast_analyzer import ASTAnalyzer
19
+ from .infinity_store import InfinityVectorStore
22
20
 
23
21
  logger = logging.getLogger(__name__)
24
22
 
@@ -26,6 +24,7 @@ logger = logging.getLogger(__name__)
26
24
  @dataclass
27
25
  class GitCommit:
28
26
  """Represents a git commit."""
27
+
29
28
  hash: str
30
29
  author: str
31
30
  author_email: str
@@ -38,6 +37,7 @@ class GitCommit:
38
37
  @dataclass
39
38
  class GitFileHistory:
40
39
  """History of a single file."""
40
+
41
41
  file_path: str
42
42
  commits: List[GitCommit]
43
43
  current_content: Optional[str]
@@ -46,17 +46,17 @@ class GitFileHistory:
46
46
 
47
47
  class GitIngester:
48
48
  """Ingests git repositories into vector store."""
49
-
49
+
50
50
  def __init__(self, vector_store: InfinityVectorStore):
51
51
  """Initialize the git ingester.
52
-
52
+
53
53
  Args:
54
54
  vector_store: The vector store to ingest into
55
55
  """
56
56
  self.vector_store = vector_store
57
57
  self.ast_analyzer = ASTAnalyzer()
58
58
  self._commit_cache: Dict[str, GitCommit] = {}
59
-
59
+
60
60
  def ingest_repository(
61
61
  self,
62
62
  repo_path: str,
@@ -64,10 +64,10 @@ class GitIngester:
64
64
  include_history: bool = True,
65
65
  include_diffs: bool = True,
66
66
  include_blame: bool = True,
67
- file_patterns: Optional[List[str]] = None
67
+ file_patterns: Optional[List[str]] = None,
68
68
  ) -> Dict[str, Any]:
69
69
  """Ingest an entire git repository.
70
-
70
+
71
71
  Args:
72
72
  repo_path: Path to the git repository
73
73
  branch: Branch to ingest (default: HEAD)
@@ -75,16 +75,16 @@ class GitIngester:
75
75
  include_diffs: Whether to include diff information
76
76
  include_blame: Whether to include blame information
77
77
  file_patterns: List of file patterns to include (e.g., ["*.py", "*.js"])
78
-
78
+
79
79
  Returns:
80
80
  Summary of ingestion results
81
81
  """
82
82
  repo_path = Path(repo_path)
83
83
  if not (repo_path / ".git").exists():
84
84
  raise ValueError(f"Not a git repository: {repo_path}")
85
-
85
+
86
86
  logger.info(f"Starting ingestion of repository: {repo_path}")
87
-
87
+
88
88
  results = {
89
89
  "repository": str(repo_path),
90
90
  "branch": branch,
@@ -94,18 +94,18 @@ class GitIngester:
94
94
  "symbols_extracted": 0,
95
95
  "diffs_indexed": 0,
96
96
  "blame_entries": 0,
97
- "errors": []
97
+ "errors": [],
98
98
  }
99
-
99
+
100
100
  try:
101
101
  # Get current branch/commit
102
102
  current_commit = self._get_current_commit(repo_path)
103
103
  results["current_commit"] = current_commit
104
-
104
+
105
105
  # Get list of files to process
106
106
  files = self._get_repository_files(repo_path, file_patterns)
107
107
  logger.info(f"Found {len(files)} files to process")
108
-
108
+
109
109
  # Process each file
110
110
  for file_path in files:
111
111
  try:
@@ -114,34 +114,34 @@ class GitIngester:
114
114
  file_path,
115
115
  include_history=include_history,
116
116
  include_blame=include_blame,
117
- results=results
117
+ results=results,
118
118
  )
119
119
  except Exception as e:
120
120
  logger.error(f"Error processing {file_path}: {e}")
121
121
  results["errors"].append(f"{file_path}: {str(e)}")
122
-
122
+
123
123
  # Process commit history if requested
124
124
  if include_history:
125
125
  commits = self._get_commit_history(repo_path, branch)
126
126
  results["commits_processed"] = len(commits)
127
-
127
+
128
128
  for commit in commits:
129
129
  self._index_commit(commit, include_diffs=include_diffs)
130
130
  results["commits_indexed"] = results.get("commits_indexed", 0) + 1
131
-
131
+
132
132
  if include_diffs:
133
133
  results["diffs_indexed"] += len(commit.files)
134
-
134
+
135
135
  # Create repository metadata document
136
136
  self._index_repository_metadata(repo_path, results)
137
-
137
+
138
138
  except Exception as e:
139
139
  logger.error(f"Repository ingestion failed: {e}")
140
140
  results["errors"].append(f"Fatal error: {str(e)}")
141
-
141
+
142
142
  logger.info(f"Ingestion complete: {results}")
143
143
  return results
144
-
144
+
145
145
  def _get_current_commit(self, repo_path: Path) -> str:
146
146
  """Get the current commit hash."""
147
147
  result = subprocess.run(
@@ -149,29 +149,23 @@ class GitIngester:
149
149
  cwd=repo_path,
150
150
  capture_output=True,
151
151
  text=True,
152
- check=True
152
+ check=True,
153
153
  )
154
154
  return result.stdout.strip()
155
-
155
+
156
156
  def _get_repository_files(
157
- self,
158
- repo_path: Path,
159
- patterns: Optional[List[str]] = None
157
+ self, repo_path: Path, patterns: Optional[List[str]] = None
160
158
  ) -> List[Path]:
161
159
  """Get list of files in repository matching patterns."""
162
160
  # Use git ls-files to respect .gitignore
163
161
  cmd = ["git", "ls-files"]
164
-
162
+
165
163
  result = subprocess.run(
166
- cmd,
167
- cwd=repo_path,
168
- capture_output=True,
169
- text=True,
170
- check=True
164
+ cmd, cwd=repo_path, capture_output=True, text=True, check=True
171
165
  )
172
-
166
+
173
167
  files = []
174
- for line in result.stdout.strip().split('\n'):
168
+ for line in result.stdout.strip().split("\n"):
175
169
  if line:
176
170
  file_path = repo_path / line
177
171
  if file_path.exists():
@@ -181,37 +175,39 @@ class GitIngester:
181
175
  files.append(file_path)
182
176
  else:
183
177
  files.append(file_path)
184
-
178
+
185
179
  return files
186
-
180
+
187
181
  def _get_commit_history(
188
- self,
189
- repo_path: Path,
190
- branch: str = "HEAD",
191
- max_commits: int = 1000
182
+ self, repo_path: Path, branch: str = "HEAD", max_commits: int = 1000
192
183
  ) -> List[GitCommit]:
193
184
  """Get commit history for the repository."""
194
185
  # Get commit list with basic info
195
186
  result = subprocess.run(
196
- ["git", "log", branch, f"--max-count={max_commits}",
197
- "--pretty=format:%H|%P|%an|%ae|%at|%s"],
187
+ [
188
+ "git",
189
+ "log",
190
+ branch,
191
+ f"--max-count={max_commits}",
192
+ "--pretty=format:%H|%P|%an|%ae|%at|%s",
193
+ ],
198
194
  cwd=repo_path,
199
195
  capture_output=True,
200
196
  text=True,
201
- check=True
197
+ check=True,
202
198
  )
203
-
199
+
204
200
  commits = []
205
- for line in result.stdout.strip().split('\n'):
201
+ for line in result.stdout.strip().split("\n"):
206
202
  if line:
207
- parts = line.split('|', 5)
203
+ parts = line.split("|", 5)
208
204
  if len(parts) >= 6:
209
205
  commit_hash = parts[0]
210
206
  parent_hashes = parts[1].split() if parts[1] else []
211
-
207
+
212
208
  # Get file changes for this commit
213
209
  files = self._get_commit_files(repo_path, commit_hash)
214
-
210
+
215
211
  commit = GitCommit(
216
212
  hash=commit_hash,
217
213
  parent_hashes=parent_hashes,
@@ -219,182 +215,179 @@ class GitIngester:
219
215
  author_email=parts[3],
220
216
  timestamp=int(parts[4]),
221
217
  message=parts[5],
222
- files=files
218
+ files=files,
223
219
  )
224
220
  commits.append(commit)
225
221
  self._commit_cache[commit_hash] = commit
226
-
222
+
227
223
  return commits
228
-
229
- def _get_commit_files(self, repo_path: Path, commit_hash: str) -> List[Dict[str, str]]:
224
+
225
+ def _get_commit_files(
226
+ self, repo_path: Path, commit_hash: str
227
+ ) -> List[Dict[str, str]]:
230
228
  """Get list of files changed in a commit."""
231
229
  result = subprocess.run(
232
230
  ["git", "show", "--name-status", "--format=", commit_hash],
233
231
  cwd=repo_path,
234
232
  capture_output=True,
235
233
  text=True,
236
- check=True
234
+ check=True,
237
235
  )
238
-
236
+
239
237
  files = []
240
- for line in result.stdout.strip().split('\n'):
241
- if line and '\t' in line:
242
- parts = line.split('\t', 1)
238
+ for line in result.stdout.strip().split("\n"):
239
+ if line and "\t" in line:
240
+ parts = line.split("\t", 1)
243
241
  if len(parts) == 2:
244
- files.append({
245
- 'status': parts[0],
246
- 'filename': parts[1]
247
- })
248
-
242
+ files.append({"status": parts[0], "filename": parts[1]})
243
+
249
244
  return files
250
-
245
+
251
246
  def _process_file(
252
247
  self,
253
248
  repo_path: Path,
254
249
  file_path: Path,
255
250
  include_history: bool,
256
251
  include_blame: bool,
257
- results: Dict[str, Any]
252
+ results: Dict[str, Any],
258
253
  ):
259
254
  """Process a single file."""
260
255
  relative_path = file_path.relative_to(repo_path)
261
-
256
+
262
257
  # Read current content
263
258
  try:
264
- content = file_path.read_text(encoding='utf-8')
259
+ content = file_path.read_text(encoding="utf-8")
265
260
  except UnicodeDecodeError:
266
- content = file_path.read_text(encoding='latin-1')
267
-
261
+ content = file_path.read_text(encoding="latin-1")
262
+
268
263
  # Get file metadata
269
264
  metadata = {
270
- 'repository': str(repo_path),
271
- 'relative_path': str(relative_path),
272
- 'file_type': file_path.suffix,
273
- 'size': file_path.stat().st_size,
265
+ "repository": str(repo_path),
266
+ "relative_path": str(relative_path),
267
+ "file_type": file_path.suffix,
268
+ "size": file_path.stat().st_size,
274
269
  }
275
-
270
+
276
271
  # Add git history metadata if requested
277
272
  if include_history:
278
273
  history = self._get_file_history(repo_path, relative_path)
279
- metadata['commit_count'] = len(history)
274
+ metadata["commit_count"] = len(history)
280
275
  if history:
281
- metadata['first_commit'] = history[-1]['hash']
282
- metadata['last_commit'] = history[0]['hash']
283
- metadata['last_modified'] = datetime.fromtimestamp(
284
- history[0]['timestamp']
276
+ metadata["first_commit"] = history[-1]["hash"]
277
+ metadata["last_commit"] = history[0]["hash"]
278
+ metadata["last_modified"] = datetime.fromtimestamp(
279
+ history[0]["timestamp"]
285
280
  ).isoformat()
286
-
281
+
287
282
  # Add blame information if requested
288
283
  if include_blame:
289
284
  blame_data = self._get_file_blame(repo_path, relative_path)
290
- metadata['unique_authors'] = len(set(
291
- b['author'] for b in blame_data.values()
292
- ))
293
-
285
+ metadata["unique_authors"] = len(
286
+ set(b["author"] for b in blame_data.values())
287
+ )
288
+
294
289
  # Index the file content
295
290
  doc_ids = self.vector_store.add_file(
296
- str(file_path),
297
- chunk_size=1000,
298
- chunk_overlap=200,
299
- metadata=metadata
291
+ str(file_path), chunk_size=1000, chunk_overlap=200, metadata=metadata
300
292
  )
301
293
  results["files_indexed"] += 1
302
-
294
+
303
295
  # Perform AST analysis for supported languages
304
- if file_path.suffix in ['.py', '.js', '.ts', '.java', '.cpp', '.c']:
296
+ if file_path.suffix in [".py", ".js", ".ts", ".java", ".cpp", ".c"]:
305
297
  try:
306
298
  file_ast = self.ast_analyzer.analyze_file(str(file_path))
307
299
  if file_ast:
308
300
  # Store complete AST
309
301
  self.vector_store._store_file_ast(file_ast)
310
-
302
+
311
303
  # Store individual symbols
312
304
  self.vector_store._store_symbols(file_ast.symbols)
313
305
  results["symbols_extracted"] += len(file_ast.symbols)
314
-
306
+
315
307
  # Store cross-references
316
308
  self.vector_store._store_references(file_ast)
317
309
  except Exception as e:
318
310
  logger.warning(f"AST analysis failed for {file_path}: {e}")
319
-
311
+
320
312
  def _get_file_history(
321
- self,
322
- repo_path: Path,
323
- file_path: Path
313
+ self, repo_path: Path, file_path: Path
324
314
  ) -> List[Dict[str, Any]]:
325
315
  """Get commit history for a specific file."""
326
316
  result = subprocess.run(
327
- ["git", "log", "--follow", "--pretty=format:%H|%at|%an|%s", "--", str(file_path)],
317
+ [
318
+ "git",
319
+ "log",
320
+ "--follow",
321
+ "--pretty=format:%H|%at|%an|%s",
322
+ "--",
323
+ str(file_path),
324
+ ],
328
325
  cwd=repo_path,
329
326
  capture_output=True,
330
- text=True
327
+ text=True,
331
328
  )
332
-
329
+
333
330
  if result.returncode != 0:
334
331
  return []
335
-
332
+
336
333
  history = []
337
- for line in result.stdout.strip().split('\n'):
334
+ for line in result.stdout.strip().split("\n"):
338
335
  if line:
339
- parts = line.split('|', 3)
336
+ parts = line.split("|", 3)
340
337
  if len(parts) >= 4:
341
- history.append({
342
- 'hash': parts[0],
343
- 'timestamp': int(parts[1]),
344
- 'author': parts[2],
345
- 'message': parts[3]
346
- })
347
-
338
+ history.append(
339
+ {
340
+ "hash": parts[0],
341
+ "timestamp": int(parts[1]),
342
+ "author": parts[2],
343
+ "message": parts[3],
344
+ }
345
+ )
346
+
348
347
  return history
349
-
348
+
350
349
  def _get_file_blame(
351
- self,
352
- repo_path: Path,
353
- file_path: Path
350
+ self, repo_path: Path, file_path: Path
354
351
  ) -> Dict[int, Dict[str, Any]]:
355
352
  """Get blame information for a file."""
356
353
  result = subprocess.run(
357
354
  ["git", "blame", "--line-porcelain", "--", str(file_path)],
358
355
  cwd=repo_path,
359
356
  capture_output=True,
360
- text=True
357
+ text=True,
361
358
  )
362
-
359
+
363
360
  if result.returncode != 0:
364
361
  return {}
365
-
362
+
366
363
  blame_data = {}
367
364
  current_commit = None
368
365
  current_line = None
369
366
  author = None
370
367
  timestamp = None
371
-
372
- for line in result.stdout.strip().split('\n'):
373
- if line and not line.startswith('\t'):
374
- parts = line.split(' ')
368
+
369
+ for line in result.stdout.strip().split("\n"):
370
+ if line and not line.startswith("\t"):
371
+ parts = line.split(" ")
375
372
  if len(parts) >= 3 and len(parts[0]) == 40: # SHA-1 hash
376
373
  current_commit = parts[0]
377
374
  current_line = int(parts[2])
378
- elif line.startswith('author '):
375
+ elif line.startswith("author "):
379
376
  author = line[7:]
380
- elif line.startswith('author-time '):
377
+ elif line.startswith("author-time "):
381
378
  timestamp = int(line[12:])
382
-
379
+
383
380
  # We have all the data for this line
384
381
  if current_line and author:
385
382
  blame_data[current_line] = {
386
- 'commit': current_commit,
387
- 'author': author,
388
- 'timestamp': timestamp
383
+ "commit": current_commit,
384
+ "author": author,
385
+ "timestamp": timestamp,
389
386
  }
390
-
387
+
391
388
  return blame_data
392
-
393
- def _index_commit(
394
- self,
395
- commit: GitCommit,
396
- include_diffs: bool = True
397
- ):
389
+
390
+ def _index_commit(self, commit: GitCommit, include_diffs: bool = True):
398
391
  """Index a single commit."""
399
392
  # Create commit document
400
393
  commit_doc = f"""Git Commit: {commit.hash}
@@ -404,82 +397,80 @@ Message: {commit.message}
404
397
 
405
398
  Files changed: {len(commit.files)}
406
399
  """
407
-
400
+
408
401
  for file_info in commit.files:
409
402
  commit_doc += f"\n{file_info['status']}\t{file_info['filename']}"
410
-
403
+
411
404
  # Index commit
412
405
  metadata = {
413
- 'type': 'git_commit',
414
- 'commit_hash': commit.hash,
415
- 'author': commit.author,
416
- 'timestamp': commit.timestamp,
417
- 'file_count': len(commit.files)
406
+ "type": "git_commit",
407
+ "commit_hash": commit.hash,
408
+ "author": commit.author,
409
+ "timestamp": commit.timestamp,
410
+ "file_count": len(commit.files),
418
411
  }
419
-
412
+
420
413
  self.vector_store.add_document(commit_doc, metadata)
421
-
414
+
422
415
  # Index diffs if requested
423
416
  if include_diffs:
424
417
  for file_info in commit.files:
425
- self._index_commit_diff(commit, file_info['filename'])
426
-
418
+ self._index_commit_diff(commit, file_info["filename"])
419
+
427
420
  def _index_commit_diff(self, commit: GitCommit, filename: str):
428
421
  """Index the diff for a specific file in a commit."""
429
422
  # This is a simplified version - in practice you'd want to
430
423
  # parse the actual diff and store meaningful chunks
431
424
  metadata = {
432
- 'type': 'git_diff',
433
- 'commit_hash': commit.hash,
434
- 'filename': filename,
435
- 'author': commit.author,
436
- 'timestamp': commit.timestamp
425
+ "type": "git_diff",
426
+ "commit_hash": commit.hash,
427
+ "filename": filename,
428
+ "author": commit.author,
429
+ "timestamp": commit.timestamp,
437
430
  }
438
-
431
+
439
432
  # Create a document representing this change
440
433
  diff_doc = f"""File: {filename}
441
434
  Commit: {commit.hash}
442
435
  Author: {commit.author}
443
436
  Message: {commit.message}
444
437
  """
445
-
438
+
446
439
  self.vector_store.add_document(diff_doc, metadata)
447
-
448
- def _index_repository_metadata(
449
- self,
450
- repo_path: Path,
451
- results: Dict[str, Any]
452
- ):
440
+
441
+ def _index_repository_metadata(self, repo_path: Path, results: Dict[str, Any]):
453
442
  """Index overall repository metadata."""
454
443
  # Get repository info
455
444
  remote_result = subprocess.run(
456
445
  ["git", "remote", "get-url", "origin"],
457
446
  cwd=repo_path,
458
447
  capture_output=True,
459
- text=True
448
+ text=True,
460
449
  )
461
-
462
- remote_url = remote_result.stdout.strip() if remote_result.returncode == 0 else None
463
-
450
+
451
+ remote_url = (
452
+ remote_result.stdout.strip() if remote_result.returncode == 0 else None
453
+ )
454
+
464
455
  # Create repository summary document
465
456
  repo_doc = f"""Repository: {repo_path.name}
466
457
  Path: {repo_path}
467
- Remote: {remote_url or 'No remote'}
468
- Current Commit: {results.get('current_commit', 'Unknown')}
458
+ Remote: {remote_url or "No remote"}
459
+ Current Commit: {results.get("current_commit", "Unknown")}
469
460
 
470
461
  Statistics:
471
- - Files indexed: {results['files_indexed']}
472
- - Commits processed: {results['commits_processed']}
473
- - Symbols extracted: {results['symbols_extracted']}
474
- - Diffs indexed: {results['diffs_indexed']}
462
+ - Files indexed: {results["files_indexed"]}
463
+ - Commits processed: {results["commits_processed"]}
464
+ - Symbols extracted: {results["symbols_extracted"]}
465
+ - Diffs indexed: {results["diffs_indexed"]}
475
466
  """
476
-
467
+
477
468
  metadata = {
478
- 'type': 'repository',
479
- 'name': repo_path.name,
480
- 'path': str(repo_path),
481
- 'remote_url': remote_url,
482
- **results
469
+ "type": "repository",
470
+ "name": repo_path.name,
471
+ "path": str(repo_path),
472
+ "remote_url": remote_url,
473
+ **results,
483
474
  }
484
-
485
- self.vector_store.add_document(repo_doc, metadata)
475
+
476
+ self.vector_store.add_document(repo_doc, metadata)