mcp-vector-search 1.0.3__py3-none-any.whl → 1.1.22__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mcp_vector_search/__init__.py +3 -3
- mcp_vector_search/analysis/__init__.py +48 -1
- mcp_vector_search/analysis/baseline/__init__.py +68 -0
- mcp_vector_search/analysis/baseline/comparator.py +462 -0
- mcp_vector_search/analysis/baseline/manager.py +621 -0
- mcp_vector_search/analysis/collectors/__init__.py +35 -0
- mcp_vector_search/analysis/collectors/cohesion.py +463 -0
- mcp_vector_search/analysis/collectors/coupling.py +1162 -0
- mcp_vector_search/analysis/collectors/halstead.py +514 -0
- mcp_vector_search/analysis/collectors/smells.py +325 -0
- mcp_vector_search/analysis/debt.py +516 -0
- mcp_vector_search/analysis/interpretation.py +685 -0
- mcp_vector_search/analysis/metrics.py +74 -1
- mcp_vector_search/analysis/reporters/__init__.py +3 -1
- mcp_vector_search/analysis/reporters/console.py +424 -0
- mcp_vector_search/analysis/reporters/markdown.py +480 -0
- mcp_vector_search/analysis/reporters/sarif.py +377 -0
- mcp_vector_search/analysis/storage/__init__.py +93 -0
- mcp_vector_search/analysis/storage/metrics_store.py +762 -0
- mcp_vector_search/analysis/storage/schema.py +245 -0
- mcp_vector_search/analysis/storage/trend_tracker.py +560 -0
- mcp_vector_search/analysis/trends.py +308 -0
- mcp_vector_search/analysis/visualizer/__init__.py +90 -0
- mcp_vector_search/analysis/visualizer/d3_data.py +534 -0
- mcp_vector_search/analysis/visualizer/exporter.py +484 -0
- mcp_vector_search/analysis/visualizer/html_report.py +2895 -0
- mcp_vector_search/analysis/visualizer/schemas.py +525 -0
- mcp_vector_search/cli/commands/analyze.py +665 -11
- mcp_vector_search/cli/commands/chat.py +193 -0
- mcp_vector_search/cli/commands/index.py +600 -2
- mcp_vector_search/cli/commands/index_background.py +467 -0
- mcp_vector_search/cli/commands/search.py +194 -1
- mcp_vector_search/cli/commands/setup.py +64 -13
- mcp_vector_search/cli/commands/status.py +302 -3
- mcp_vector_search/cli/commands/visualize/cli.py +26 -10
- mcp_vector_search/cli/commands/visualize/exporters/json_exporter.py +8 -4
- mcp_vector_search/cli/commands/visualize/graph_builder.py +167 -234
- mcp_vector_search/cli/commands/visualize/server.py +304 -15
- mcp_vector_search/cli/commands/visualize/templates/base.py +60 -6
- mcp_vector_search/cli/commands/visualize/templates/scripts.py +2100 -65
- mcp_vector_search/cli/commands/visualize/templates/styles.py +1297 -88
- mcp_vector_search/cli/didyoumean.py +5 -0
- mcp_vector_search/cli/main.py +16 -5
- mcp_vector_search/cli/output.py +134 -5
- mcp_vector_search/config/thresholds.py +89 -1
- mcp_vector_search/core/__init__.py +16 -0
- mcp_vector_search/core/database.py +39 -2
- mcp_vector_search/core/embeddings.py +24 -0
- mcp_vector_search/core/git.py +380 -0
- mcp_vector_search/core/indexer.py +445 -84
- mcp_vector_search/core/llm_client.py +9 -4
- mcp_vector_search/core/models.py +88 -1
- mcp_vector_search/core/relationships.py +473 -0
- mcp_vector_search/core/search.py +1 -1
- mcp_vector_search/mcp/server.py +795 -4
- mcp_vector_search/parsers/python.py +285 -5
- mcp_vector_search/utils/gitignore.py +0 -3
- {mcp_vector_search-1.0.3.dist-info → mcp_vector_search-1.1.22.dist-info}/METADATA +3 -2
- {mcp_vector_search-1.0.3.dist-info → mcp_vector_search-1.1.22.dist-info}/RECORD +62 -39
- mcp_vector_search/cli/commands/visualize.py.original +0 -2536
- {mcp_vector_search-1.0.3.dist-info → mcp_vector_search-1.1.22.dist-info}/WHEEL +0 -0
- {mcp_vector_search-1.0.3.dist-info → mcp_vector_search-1.1.22.dist-info}/entry_points.txt +0 -0
- {mcp_vector_search-1.0.3.dist-info → mcp_vector_search-1.1.22.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,380 @@
|
|
|
1
|
+
"""Git integration for diff-aware analysis.
|
|
2
|
+
|
|
3
|
+
This module provides the GitManager class for detecting changed files in a git
|
|
4
|
+
repository, enabling diff-aware analysis that focuses only on modified code.
|
|
5
|
+
|
|
6
|
+
Design Decisions:
|
|
7
|
+
- Uses subprocess to call git commands (standard approach, no dependencies)
|
|
8
|
+
- Returns absolute Paths for consistency with rest of codebase
|
|
9
|
+
- Robust error handling with custom exceptions
|
|
10
|
+
- Supports both uncommitted changes and baseline comparisons
|
|
11
|
+
|
|
12
|
+
Performance:
|
|
13
|
+
- Git operations are typically fast (<100ms for most repos)
|
|
14
|
+
- File path resolution is O(n) where n is number of changed files
|
|
15
|
+
- Subprocess overhead is minimal compared to parsing/analysis time
|
|
16
|
+
|
|
17
|
+
Error Handling:
|
|
18
|
+
All git operations are wrapped with proper exception handling:
|
|
19
|
+
- GitNotAvailableError: Git binary not found in PATH
|
|
20
|
+
- GitNotRepoError: Not a git repository
|
|
21
|
+
- GitReferenceError: Invalid branch/commit reference
|
|
22
|
+
- GitError: General git operation failures
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
import subprocess
|
|
26
|
+
from pathlib import Path
|
|
27
|
+
|
|
28
|
+
from loguru import logger
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class GitError(Exception):
|
|
32
|
+
"""Base exception for git-related errors."""
|
|
33
|
+
|
|
34
|
+
pass
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class GitNotAvailableError(GitError):
|
|
38
|
+
"""Git binary is not available in PATH."""
|
|
39
|
+
|
|
40
|
+
pass
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class GitNotRepoError(GitError):
|
|
44
|
+
"""Directory is not a git repository."""
|
|
45
|
+
|
|
46
|
+
pass
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class GitReferenceError(GitError):
|
|
50
|
+
"""Git reference (branch, tag, commit) does not exist."""
|
|
51
|
+
|
|
52
|
+
pass
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class GitManager:
|
|
56
|
+
"""Manage git operations for diff-aware analysis.
|
|
57
|
+
|
|
58
|
+
This class provides methods to detect changed files in a git repository,
|
|
59
|
+
supporting both uncommitted changes and baseline comparisons.
|
|
60
|
+
|
|
61
|
+
Design Pattern: Simple wrapper around git commands with error handling.
|
|
62
|
+
No caching to ensure always-fresh results (git is fast enough).
|
|
63
|
+
|
|
64
|
+
Example:
|
|
65
|
+
>>> manager = GitManager(Path("/path/to/repo"))
|
|
66
|
+
>>> changed = manager.get_changed_files()
|
|
67
|
+
>>> print(f"Found {len(changed)} changed files")
|
|
68
|
+
"""
|
|
69
|
+
|
|
70
|
+
def __init__(self, project_root: Path):
|
|
71
|
+
"""Initialize git manager.
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
project_root: Root directory of the project
|
|
75
|
+
|
|
76
|
+
Raises:
|
|
77
|
+
GitNotAvailableError: If git binary is not available
|
|
78
|
+
GitNotRepoError: If project_root is not a git repository
|
|
79
|
+
"""
|
|
80
|
+
self.project_root = project_root.resolve()
|
|
81
|
+
|
|
82
|
+
# Check git availability first
|
|
83
|
+
if not self.is_git_available():
|
|
84
|
+
raise GitNotAvailableError(
|
|
85
|
+
"Git binary not found. Install git or run without --changed-only"
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
# Check if this is a git repository
|
|
89
|
+
if not self.is_git_repo():
|
|
90
|
+
raise GitNotRepoError(
|
|
91
|
+
f"Not a git repository: {self.project_root}. "
|
|
92
|
+
"Initialize git with: git init"
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
def is_git_available(self) -> bool:
|
|
96
|
+
"""Check if git command is available in PATH.
|
|
97
|
+
|
|
98
|
+
Returns:
|
|
99
|
+
True if git is available, False otherwise
|
|
100
|
+
|
|
101
|
+
Performance: O(1), cached by OS after first call
|
|
102
|
+
"""
|
|
103
|
+
try:
|
|
104
|
+
subprocess.run( # nosec B607 - git is intentionally called via PATH
|
|
105
|
+
["git", "--version"],
|
|
106
|
+
capture_output=True,
|
|
107
|
+
check=True,
|
|
108
|
+
timeout=5,
|
|
109
|
+
)
|
|
110
|
+
return True
|
|
111
|
+
except (
|
|
112
|
+
subprocess.CalledProcessError,
|
|
113
|
+
FileNotFoundError,
|
|
114
|
+
subprocess.TimeoutExpired,
|
|
115
|
+
):
|
|
116
|
+
return False
|
|
117
|
+
|
|
118
|
+
def is_git_repo(self) -> bool:
|
|
119
|
+
"""Check if project directory is a git repository.
|
|
120
|
+
|
|
121
|
+
Returns:
|
|
122
|
+
True if directory is a git repository
|
|
123
|
+
|
|
124
|
+
Performance: O(1), filesystem check
|
|
125
|
+
"""
|
|
126
|
+
try:
|
|
127
|
+
subprocess.run( # nosec B607 - git is intentionally called via PATH
|
|
128
|
+
["git", "rev-parse", "--git-dir"],
|
|
129
|
+
cwd=self.project_root,
|
|
130
|
+
capture_output=True,
|
|
131
|
+
check=True,
|
|
132
|
+
timeout=5,
|
|
133
|
+
)
|
|
134
|
+
# Successfully ran, so it's a git repo
|
|
135
|
+
return True
|
|
136
|
+
except (
|
|
137
|
+
subprocess.CalledProcessError,
|
|
138
|
+
FileNotFoundError,
|
|
139
|
+
subprocess.TimeoutExpired,
|
|
140
|
+
):
|
|
141
|
+
return False
|
|
142
|
+
|
|
143
|
+
def get_changed_files(self, include_untracked: bool = True) -> list[Path]:
|
|
144
|
+
"""Get list of changed files in working directory.
|
|
145
|
+
|
|
146
|
+
Detects uncommitted changes using `git status --porcelain`.
|
|
147
|
+
Includes both staged and unstaged modifications.
|
|
148
|
+
|
|
149
|
+
Args:
|
|
150
|
+
include_untracked: Include untracked files (default: True)
|
|
151
|
+
|
|
152
|
+
Returns:
|
|
153
|
+
List of changed file paths (absolute paths)
|
|
154
|
+
|
|
155
|
+
Raises:
|
|
156
|
+
GitError: If git status command fails
|
|
157
|
+
|
|
158
|
+
Performance: O(n) where n is number of files in working tree
|
|
159
|
+
|
|
160
|
+
Git Status Format:
|
|
161
|
+
XY filename
|
|
162
|
+
X = index status (staged)
|
|
163
|
+
Y = working tree status (unstaged)
|
|
164
|
+
?? = untracked
|
|
165
|
+
D = deleted
|
|
166
|
+
R old -> new = renamed
|
|
167
|
+
|
|
168
|
+
Example:
|
|
169
|
+
>>> manager = GitManager(Path.cwd())
|
|
170
|
+
>>> changed = manager.get_changed_files()
|
|
171
|
+
>>> for file in changed:
|
|
172
|
+
... print(f"Modified: {file}")
|
|
173
|
+
"""
|
|
174
|
+
cmd = ["git", "status", "--porcelain"]
|
|
175
|
+
|
|
176
|
+
try:
|
|
177
|
+
result = subprocess.run( # nosec B607 - git is intentionally called via PATH
|
|
178
|
+
cmd,
|
|
179
|
+
cwd=self.project_root,
|
|
180
|
+
capture_output=True,
|
|
181
|
+
text=True,
|
|
182
|
+
check=True,
|
|
183
|
+
timeout=10,
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
changed_files = []
|
|
187
|
+
for line in result.stdout.splitlines():
|
|
188
|
+
if not line.strip():
|
|
189
|
+
continue
|
|
190
|
+
|
|
191
|
+
# Parse git status porcelain format
|
|
192
|
+
# Format: XY filename (X=index, Y=working tree)
|
|
193
|
+
status = line[:2]
|
|
194
|
+
filename = line[3:].strip()
|
|
195
|
+
|
|
196
|
+
# Handle renamed files: "R old -> new"
|
|
197
|
+
if " -> " in filename:
|
|
198
|
+
filename = filename.split(" -> ")[1]
|
|
199
|
+
|
|
200
|
+
# Skip deleted files (they don't exist to analyze)
|
|
201
|
+
if "D" in status:
|
|
202
|
+
logger.debug(f"Skipping deleted file: {filename}")
|
|
203
|
+
continue
|
|
204
|
+
|
|
205
|
+
# Skip untracked if not requested
|
|
206
|
+
if not include_untracked and status.startswith("??"):
|
|
207
|
+
logger.debug(f"Skipping untracked file: {filename}")
|
|
208
|
+
continue
|
|
209
|
+
|
|
210
|
+
# Convert to absolute path and verify existence
|
|
211
|
+
file_path = self.project_root / filename
|
|
212
|
+
if file_path.exists() and file_path.is_file():
|
|
213
|
+
changed_files.append(file_path)
|
|
214
|
+
else:
|
|
215
|
+
logger.debug(f"Skipping non-existent file: {file_path}")
|
|
216
|
+
|
|
217
|
+
logger.info(
|
|
218
|
+
f"Found {len(changed_files)} changed files "
|
|
219
|
+
f"(untracked={'included' if include_untracked else 'excluded'})"
|
|
220
|
+
)
|
|
221
|
+
return changed_files
|
|
222
|
+
|
|
223
|
+
except subprocess.CalledProcessError as e:
|
|
224
|
+
error_msg = e.stderr.strip() if e.stderr else "Unknown error"
|
|
225
|
+
logger.error(f"Git status failed: {error_msg}")
|
|
226
|
+
raise GitError(f"Failed to get changed files: {error_msg}")
|
|
227
|
+
except subprocess.TimeoutExpired:
|
|
228
|
+
logger.error("Git status command timed out")
|
|
229
|
+
raise GitError("Git status command timed out after 10 seconds")
|
|
230
|
+
|
|
231
|
+
def get_diff_files(self, baseline: str = "main") -> list[Path]:
|
|
232
|
+
"""Get list of files that differ from baseline branch.
|
|
233
|
+
|
|
234
|
+
Compares current branch against baseline using `git diff --name-only`.
|
|
235
|
+
|
|
236
|
+
Args:
|
|
237
|
+
baseline: Baseline branch or commit (default: "main")
|
|
238
|
+
|
|
239
|
+
Returns:
|
|
240
|
+
List of changed file paths (absolute paths)
|
|
241
|
+
|
|
242
|
+
Raises:
|
|
243
|
+
GitReferenceError: If baseline reference doesn't exist
|
|
244
|
+
GitError: If git diff command fails
|
|
245
|
+
|
|
246
|
+
Performance: O(n) where n is number of files in diff
|
|
247
|
+
|
|
248
|
+
Baseline Fallback Strategy:
|
|
249
|
+
1. Try requested baseline (e.g., "main")
|
|
250
|
+
2. If not found, try "master"
|
|
251
|
+
3. If not found, try "develop"
|
|
252
|
+
4. If not found, try "HEAD~1"
|
|
253
|
+
5. If still not found, raise GitReferenceError
|
|
254
|
+
|
|
255
|
+
Example:
|
|
256
|
+
>>> manager = GitManager(Path.cwd())
|
|
257
|
+
>>> diff_files = manager.get_diff_files("main")
|
|
258
|
+
>>> print(f"Changed vs main: {len(diff_files)} files")
|
|
259
|
+
"""
|
|
260
|
+
# First, check if baseline exists
|
|
261
|
+
if not self.ref_exists(baseline):
|
|
262
|
+
# Try common alternatives
|
|
263
|
+
alternatives = ["master", "develop", "HEAD~1"]
|
|
264
|
+
for alt in alternatives:
|
|
265
|
+
if self.ref_exists(alt):
|
|
266
|
+
logger.warning(
|
|
267
|
+
f"Baseline '{baseline}' not found, using '{alt}' instead"
|
|
268
|
+
)
|
|
269
|
+
baseline = alt
|
|
270
|
+
break
|
|
271
|
+
else:
|
|
272
|
+
raise GitReferenceError(
|
|
273
|
+
f"Baseline '{baseline}' not found. "
|
|
274
|
+
f"Try: main, master, develop, or HEAD~1. "
|
|
275
|
+
f"Check available branches with: git branch -a"
|
|
276
|
+
)
|
|
277
|
+
|
|
278
|
+
# Get list of changed files
|
|
279
|
+
cmd = ["git", "diff", "--name-only", baseline]
|
|
280
|
+
|
|
281
|
+
try:
|
|
282
|
+
result = subprocess.run( # nosec B607 - git is intentionally called via PATH
|
|
283
|
+
cmd,
|
|
284
|
+
cwd=self.project_root,
|
|
285
|
+
capture_output=True,
|
|
286
|
+
text=True,
|
|
287
|
+
check=True,
|
|
288
|
+
timeout=10,
|
|
289
|
+
)
|
|
290
|
+
|
|
291
|
+
changed_files = []
|
|
292
|
+
for line in result.stdout.splitlines():
|
|
293
|
+
if not line.strip():
|
|
294
|
+
continue
|
|
295
|
+
|
|
296
|
+
# Convert to absolute path and verify existence
|
|
297
|
+
file_path = self.project_root / line.strip()
|
|
298
|
+
if file_path.exists() and file_path.is_file():
|
|
299
|
+
changed_files.append(file_path)
|
|
300
|
+
else:
|
|
301
|
+
# File may have been deleted in current branch
|
|
302
|
+
logger.debug(f"Skipping non-existent diff file: {file_path}")
|
|
303
|
+
|
|
304
|
+
logger.info(f"Found {len(changed_files)} files different from {baseline}")
|
|
305
|
+
return changed_files
|
|
306
|
+
|
|
307
|
+
except subprocess.CalledProcessError as e:
|
|
308
|
+
error_msg = e.stderr.strip() if e.stderr else "Unknown error"
|
|
309
|
+
logger.error(f"Git diff failed: {error_msg}")
|
|
310
|
+
raise GitError(f"Failed to get diff files: {error_msg}")
|
|
311
|
+
except subprocess.TimeoutExpired:
|
|
312
|
+
logger.error("Git diff command timed out")
|
|
313
|
+
raise GitError("Git diff command timed out after 10 seconds")
|
|
314
|
+
|
|
315
|
+
def ref_exists(self, ref: str) -> bool:
|
|
316
|
+
"""Check if a git ref (branch, tag, commit) exists.
|
|
317
|
+
|
|
318
|
+
Uses `git rev-parse --verify` to check reference validity.
|
|
319
|
+
|
|
320
|
+
Args:
|
|
321
|
+
ref: Git reference to check (branch, tag, commit hash)
|
|
322
|
+
|
|
323
|
+
Returns:
|
|
324
|
+
True if ref exists and is valid
|
|
325
|
+
|
|
326
|
+
Performance: O(1), fast git operation
|
|
327
|
+
|
|
328
|
+
Example:
|
|
329
|
+
>>> manager = GitManager(Path.cwd())
|
|
330
|
+
>>> if manager.ref_exists("main"):
|
|
331
|
+
... print("Main branch exists")
|
|
332
|
+
"""
|
|
333
|
+
cmd = ["git", "rev-parse", "--verify", ref]
|
|
334
|
+
|
|
335
|
+
try:
|
|
336
|
+
subprocess.run( # nosec B607 - git is intentionally called via PATH
|
|
337
|
+
cmd,
|
|
338
|
+
cwd=self.project_root,
|
|
339
|
+
capture_output=True,
|
|
340
|
+
check=True,
|
|
341
|
+
timeout=5,
|
|
342
|
+
)
|
|
343
|
+
return True
|
|
344
|
+
except (subprocess.CalledProcessError, subprocess.TimeoutExpired):
|
|
345
|
+
return False
|
|
346
|
+
|
|
347
|
+
def get_current_branch(self) -> str | None:
|
|
348
|
+
"""Get name of current branch.
|
|
349
|
+
|
|
350
|
+
Returns:
|
|
351
|
+
Branch name or None if detached HEAD
|
|
352
|
+
|
|
353
|
+
Performance: O(1), fast git operation
|
|
354
|
+
|
|
355
|
+
Example:
|
|
356
|
+
>>> manager = GitManager(Path.cwd())
|
|
357
|
+
>>> branch = manager.get_current_branch()
|
|
358
|
+
>>> if branch:
|
|
359
|
+
... print(f"Current branch: {branch}")
|
|
360
|
+
... else:
|
|
361
|
+
... print("Detached HEAD state")
|
|
362
|
+
"""
|
|
363
|
+
cmd = ["git", "rev-parse", "--abbrev-ref", "HEAD"]
|
|
364
|
+
|
|
365
|
+
try:
|
|
366
|
+
result = subprocess.run( # nosec B607 - git is intentionally called via PATH
|
|
367
|
+
cmd,
|
|
368
|
+
cwd=self.project_root,
|
|
369
|
+
capture_output=True,
|
|
370
|
+
text=True,
|
|
371
|
+
check=True,
|
|
372
|
+
timeout=5,
|
|
373
|
+
)
|
|
374
|
+
|
|
375
|
+
branch = result.stdout.strip()
|
|
376
|
+
# "HEAD" means detached HEAD state
|
|
377
|
+
return branch if branch != "HEAD" else None
|
|
378
|
+
|
|
379
|
+
except (subprocess.CalledProcessError, subprocess.TimeoutExpired):
|
|
380
|
+
return None
|