mcp-vector-search 0.12.6__py3-none-any.whl → 1.1.22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. mcp_vector_search/__init__.py +3 -3
  2. mcp_vector_search/analysis/__init__.py +111 -0
  3. mcp_vector_search/analysis/baseline/__init__.py +68 -0
  4. mcp_vector_search/analysis/baseline/comparator.py +462 -0
  5. mcp_vector_search/analysis/baseline/manager.py +621 -0
  6. mcp_vector_search/analysis/collectors/__init__.py +74 -0
  7. mcp_vector_search/analysis/collectors/base.py +164 -0
  8. mcp_vector_search/analysis/collectors/cohesion.py +463 -0
  9. mcp_vector_search/analysis/collectors/complexity.py +743 -0
  10. mcp_vector_search/analysis/collectors/coupling.py +1162 -0
  11. mcp_vector_search/analysis/collectors/halstead.py +514 -0
  12. mcp_vector_search/analysis/collectors/smells.py +325 -0
  13. mcp_vector_search/analysis/debt.py +516 -0
  14. mcp_vector_search/analysis/interpretation.py +685 -0
  15. mcp_vector_search/analysis/metrics.py +414 -0
  16. mcp_vector_search/analysis/reporters/__init__.py +7 -0
  17. mcp_vector_search/analysis/reporters/console.py +646 -0
  18. mcp_vector_search/analysis/reporters/markdown.py +480 -0
  19. mcp_vector_search/analysis/reporters/sarif.py +377 -0
  20. mcp_vector_search/analysis/storage/__init__.py +93 -0
  21. mcp_vector_search/analysis/storage/metrics_store.py +762 -0
  22. mcp_vector_search/analysis/storage/schema.py +245 -0
  23. mcp_vector_search/analysis/storage/trend_tracker.py +560 -0
  24. mcp_vector_search/analysis/trends.py +308 -0
  25. mcp_vector_search/analysis/visualizer/__init__.py +90 -0
  26. mcp_vector_search/analysis/visualizer/d3_data.py +534 -0
  27. mcp_vector_search/analysis/visualizer/exporter.py +484 -0
  28. mcp_vector_search/analysis/visualizer/html_report.py +2895 -0
  29. mcp_vector_search/analysis/visualizer/schemas.py +525 -0
  30. mcp_vector_search/cli/commands/analyze.py +1062 -0
  31. mcp_vector_search/cli/commands/chat.py +1455 -0
  32. mcp_vector_search/cli/commands/index.py +621 -5
  33. mcp_vector_search/cli/commands/index_background.py +467 -0
  34. mcp_vector_search/cli/commands/init.py +13 -0
  35. mcp_vector_search/cli/commands/install.py +597 -335
  36. mcp_vector_search/cli/commands/install_old.py +8 -4
  37. mcp_vector_search/cli/commands/mcp.py +78 -6
  38. mcp_vector_search/cli/commands/reset.py +68 -26
  39. mcp_vector_search/cli/commands/search.py +224 -8
  40. mcp_vector_search/cli/commands/setup.py +1184 -0
  41. mcp_vector_search/cli/commands/status.py +339 -5
  42. mcp_vector_search/cli/commands/uninstall.py +276 -357
  43. mcp_vector_search/cli/commands/visualize/__init__.py +39 -0
  44. mcp_vector_search/cli/commands/visualize/cli.py +292 -0
  45. mcp_vector_search/cli/commands/visualize/exporters/__init__.py +12 -0
  46. mcp_vector_search/cli/commands/visualize/exporters/html_exporter.py +33 -0
  47. mcp_vector_search/cli/commands/visualize/exporters/json_exporter.py +33 -0
  48. mcp_vector_search/cli/commands/visualize/graph_builder.py +647 -0
  49. mcp_vector_search/cli/commands/visualize/layout_engine.py +469 -0
  50. mcp_vector_search/cli/commands/visualize/server.py +600 -0
  51. mcp_vector_search/cli/commands/visualize/state_manager.py +428 -0
  52. mcp_vector_search/cli/commands/visualize/templates/__init__.py +16 -0
  53. mcp_vector_search/cli/commands/visualize/templates/base.py +234 -0
  54. mcp_vector_search/cli/commands/visualize/templates/scripts.py +4542 -0
  55. mcp_vector_search/cli/commands/visualize/templates/styles.py +2522 -0
  56. mcp_vector_search/cli/didyoumean.py +27 -2
  57. mcp_vector_search/cli/main.py +127 -160
  58. mcp_vector_search/cli/output.py +158 -13
  59. mcp_vector_search/config/__init__.py +4 -0
  60. mcp_vector_search/config/default_thresholds.yaml +52 -0
  61. mcp_vector_search/config/settings.py +12 -0
  62. mcp_vector_search/config/thresholds.py +273 -0
  63. mcp_vector_search/core/__init__.py +16 -0
  64. mcp_vector_search/core/auto_indexer.py +3 -3
  65. mcp_vector_search/core/boilerplate.py +186 -0
  66. mcp_vector_search/core/config_utils.py +394 -0
  67. mcp_vector_search/core/database.py +406 -94
  68. mcp_vector_search/core/embeddings.py +24 -0
  69. mcp_vector_search/core/exceptions.py +11 -0
  70. mcp_vector_search/core/git.py +380 -0
  71. mcp_vector_search/core/git_hooks.py +4 -4
  72. mcp_vector_search/core/indexer.py +632 -54
  73. mcp_vector_search/core/llm_client.py +756 -0
  74. mcp_vector_search/core/models.py +91 -1
  75. mcp_vector_search/core/project.py +17 -0
  76. mcp_vector_search/core/relationships.py +473 -0
  77. mcp_vector_search/core/scheduler.py +11 -11
  78. mcp_vector_search/core/search.py +179 -29
  79. mcp_vector_search/mcp/server.py +819 -9
  80. mcp_vector_search/parsers/python.py +285 -5
  81. mcp_vector_search/utils/__init__.py +2 -0
  82. mcp_vector_search/utils/gitignore.py +0 -3
  83. mcp_vector_search/utils/gitignore_updater.py +212 -0
  84. mcp_vector_search/utils/monorepo.py +66 -4
  85. mcp_vector_search/utils/timing.py +10 -6
  86. {mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.1.22.dist-info}/METADATA +184 -53
  87. mcp_vector_search-1.1.22.dist-info/RECORD +120 -0
  88. {mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.1.22.dist-info}/WHEEL +1 -1
  89. {mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.1.22.dist-info}/entry_points.txt +1 -0
  90. mcp_vector_search/cli/commands/visualize.py +0 -1467
  91. mcp_vector_search-0.12.6.dist-info/RECORD +0 -68
  92. {mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.1.22.dist-info}/licenses/LICENSE +0 -0
@@ -2,8 +2,32 @@
2
2
 
3
3
  import hashlib
4
4
  import json
5
+ import multiprocessing
6
+ import os
5
7
  from pathlib import Path
6
8
 
9
+
10
+ # Configure tokenizers parallelism based on process context
11
+ # Enable parallelism in main process for 2-4x speedup
12
+ # Disable in forked processes to avoid deadlock warnings
13
+ # See: https://github.com/huggingface/tokenizers/issues/1294
14
+ def _configure_tokenizers_parallelism() -> None:
15
+ """Configure TOKENIZERS_PARALLELISM based on process context."""
16
+ # Check if we're in the main process
17
+ is_main_process = multiprocessing.current_process().name == "MainProcess"
18
+
19
+ if is_main_process:
20
+ # Enable parallelism in main process for better performance
21
+ # This gives 2-4x speedup for embedding generation
22
+ os.environ["TOKENIZERS_PARALLELISM"] = "true"
23
+ else:
24
+ # Disable in forked processes to avoid deadlock
25
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
26
+
27
+
28
+ # Configure before importing sentence_transformers
29
+ _configure_tokenizers_parallelism()
30
+
7
31
  import aiofiles
8
32
  from loguru import logger
9
33
  from sentence_transformers import SentenceTransformer
@@ -53,6 +53,17 @@ class IndexCorruptionError(DatabaseError):
53
53
  pass
54
54
 
55
55
 
56
+ class RustPanicError(DatabaseError):
57
+ """ChromaDB Rust bindings panic detected.
58
+
59
+ This error occurs when ChromaDB's Rust bindings encounter
60
+ HNSW index metadata inconsistencies, typically manifesting as:
61
+ 'range start index X out of range for slice of length Y'
62
+ """
63
+
64
+ pass
65
+
66
+
56
67
  class ParsingError(MCPVectorSearchError):
57
68
  """Code parsing errors."""
58
69
 
@@ -0,0 +1,380 @@
1
+ """Git integration for diff-aware analysis.
2
+
3
+ This module provides the GitManager class for detecting changed files in a git
4
+ repository, enabling diff-aware analysis that focuses only on modified code.
5
+
6
+ Design Decisions:
7
+ - Uses subprocess to call git commands (standard approach, no dependencies)
8
+ - Returns absolute Paths for consistency with rest of codebase
9
+ - Robust error handling with custom exceptions
10
+ - Supports both uncommitted changes and baseline comparisons
11
+
12
+ Performance:
13
+ - Git operations are typically fast (<100ms for most repos)
14
+ - File path resolution is O(n) where n is number of changed files
15
+ - Subprocess overhead is minimal compared to parsing/analysis time
16
+
17
+ Error Handling:
18
+ All git operations are wrapped with proper exception handling:
19
+ - GitNotAvailableError: Git binary not found in PATH
20
+ - GitNotRepoError: Not a git repository
21
+ - GitReferenceError: Invalid branch/commit reference
22
+ - GitError: General git operation failures
23
+ """
24
+
25
+ import subprocess
26
+ from pathlib import Path
27
+
28
+ from loguru import logger
29
+
30
+
31
+ class GitError(Exception):
32
+ """Base exception for git-related errors."""
33
+
34
+ pass
35
+
36
+
37
+ class GitNotAvailableError(GitError):
38
+ """Git binary is not available in PATH."""
39
+
40
+ pass
41
+
42
+
43
+ class GitNotRepoError(GitError):
44
+ """Directory is not a git repository."""
45
+
46
+ pass
47
+
48
+
49
+ class GitReferenceError(GitError):
50
+ """Git reference (branch, tag, commit) does not exist."""
51
+
52
+ pass
53
+
54
+
55
+ class GitManager:
56
+ """Manage git operations for diff-aware analysis.
57
+
58
+ This class provides methods to detect changed files in a git repository,
59
+ supporting both uncommitted changes and baseline comparisons.
60
+
61
+ Design Pattern: Simple wrapper around git commands with error handling.
62
+ No caching to ensure always-fresh results (git is fast enough).
63
+
64
+ Example:
65
+ >>> manager = GitManager(Path("/path/to/repo"))
66
+ >>> changed = manager.get_changed_files()
67
+ >>> print(f"Found {len(changed)} changed files")
68
+ """
69
+
70
+ def __init__(self, project_root: Path):
71
+ """Initialize git manager.
72
+
73
+ Args:
74
+ project_root: Root directory of the project
75
+
76
+ Raises:
77
+ GitNotAvailableError: If git binary is not available
78
+ GitNotRepoError: If project_root is not a git repository
79
+ """
80
+ self.project_root = project_root.resolve()
81
+
82
+ # Check git availability first
83
+ if not self.is_git_available():
84
+ raise GitNotAvailableError(
85
+ "Git binary not found. Install git or run without --changed-only"
86
+ )
87
+
88
+ # Check if this is a git repository
89
+ if not self.is_git_repo():
90
+ raise GitNotRepoError(
91
+ f"Not a git repository: {self.project_root}. "
92
+ "Initialize git with: git init"
93
+ )
94
+
95
+ def is_git_available(self) -> bool:
96
+ """Check if git command is available in PATH.
97
+
98
+ Returns:
99
+ True if git is available, False otherwise
100
+
101
+ Performance: O(1), cached by OS after first call
102
+ """
103
+ try:
104
+ subprocess.run( # nosec B607 - git is intentionally called via PATH
105
+ ["git", "--version"],
106
+ capture_output=True,
107
+ check=True,
108
+ timeout=5,
109
+ )
110
+ return True
111
+ except (
112
+ subprocess.CalledProcessError,
113
+ FileNotFoundError,
114
+ subprocess.TimeoutExpired,
115
+ ):
116
+ return False
117
+
118
+ def is_git_repo(self) -> bool:
119
+ """Check if project directory is a git repository.
120
+
121
+ Returns:
122
+ True if directory is a git repository
123
+
124
+ Performance: O(1), filesystem check
125
+ """
126
+ try:
127
+ subprocess.run( # nosec B607 - git is intentionally called via PATH
128
+ ["git", "rev-parse", "--git-dir"],
129
+ cwd=self.project_root,
130
+ capture_output=True,
131
+ check=True,
132
+ timeout=5,
133
+ )
134
+ # Successfully ran, so it's a git repo
135
+ return True
136
+ except (
137
+ subprocess.CalledProcessError,
138
+ FileNotFoundError,
139
+ subprocess.TimeoutExpired,
140
+ ):
141
+ return False
142
+
143
+ def get_changed_files(self, include_untracked: bool = True) -> list[Path]:
144
+ """Get list of changed files in working directory.
145
+
146
+ Detects uncommitted changes using `git status --porcelain`.
147
+ Includes both staged and unstaged modifications.
148
+
149
+ Args:
150
+ include_untracked: Include untracked files (default: True)
151
+
152
+ Returns:
153
+ List of changed file paths (absolute paths)
154
+
155
+ Raises:
156
+ GitError: If git status command fails
157
+
158
+ Performance: O(n) where n is number of files in working tree
159
+
160
+ Git Status Format:
161
+ XY filename
162
+ X = index status (staged)
163
+ Y = working tree status (unstaged)
164
+ ?? = untracked
165
+ D = deleted
166
+ R old -> new = renamed
167
+
168
+ Example:
169
+ >>> manager = GitManager(Path.cwd())
170
+ >>> changed = manager.get_changed_files()
171
+ >>> for file in changed:
172
+ ... print(f"Modified: {file}")
173
+ """
174
+ cmd = ["git", "status", "--porcelain"]
175
+
176
+ try:
177
+ result = subprocess.run( # nosec B607 - git is intentionally called via PATH
178
+ cmd,
179
+ cwd=self.project_root,
180
+ capture_output=True,
181
+ text=True,
182
+ check=True,
183
+ timeout=10,
184
+ )
185
+
186
+ changed_files = []
187
+ for line in result.stdout.splitlines():
188
+ if not line.strip():
189
+ continue
190
+
191
+ # Parse git status porcelain format
192
+ # Format: XY filename (X=index, Y=working tree)
193
+ status = line[:2]
194
+ filename = line[3:].strip()
195
+
196
+ # Handle renamed files: "R old -> new"
197
+ if " -> " in filename:
198
+ filename = filename.split(" -> ")[1]
199
+
200
+ # Skip deleted files (they don't exist to analyze)
201
+ if "D" in status:
202
+ logger.debug(f"Skipping deleted file: {filename}")
203
+ continue
204
+
205
+ # Skip untracked if not requested
206
+ if not include_untracked and status.startswith("??"):
207
+ logger.debug(f"Skipping untracked file: {filename}")
208
+ continue
209
+
210
+ # Convert to absolute path and verify existence
211
+ file_path = self.project_root / filename
212
+ if file_path.exists() and file_path.is_file():
213
+ changed_files.append(file_path)
214
+ else:
215
+ logger.debug(f"Skipping non-existent file: {file_path}")
216
+
217
+ logger.info(
218
+ f"Found {len(changed_files)} changed files "
219
+ f"(untracked={'included' if include_untracked else 'excluded'})"
220
+ )
221
+ return changed_files
222
+
223
+ except subprocess.CalledProcessError as e:
224
+ error_msg = e.stderr.strip() if e.stderr else "Unknown error"
225
+ logger.error(f"Git status failed: {error_msg}")
226
+ raise GitError(f"Failed to get changed files: {error_msg}")
227
+ except subprocess.TimeoutExpired:
228
+ logger.error("Git status command timed out")
229
+ raise GitError("Git status command timed out after 10 seconds")
230
+
231
+ def get_diff_files(self, baseline: str = "main") -> list[Path]:
232
+ """Get list of files that differ from baseline branch.
233
+
234
+ Compares current branch against baseline using `git diff --name-only`.
235
+
236
+ Args:
237
+ baseline: Baseline branch or commit (default: "main")
238
+
239
+ Returns:
240
+ List of changed file paths (absolute paths)
241
+
242
+ Raises:
243
+ GitReferenceError: If baseline reference doesn't exist
244
+ GitError: If git diff command fails
245
+
246
+ Performance: O(n) where n is number of files in diff
247
+
248
+ Baseline Fallback Strategy:
249
+ 1. Try requested baseline (e.g., "main")
250
+ 2. If not found, try "master"
251
+ 3. If not found, try "develop"
252
+ 4. If not found, try "HEAD~1"
253
+ 5. If still not found, raise GitReferenceError
254
+
255
+ Example:
256
+ >>> manager = GitManager(Path.cwd())
257
+ >>> diff_files = manager.get_diff_files("main")
258
+ >>> print(f"Changed vs main: {len(diff_files)} files")
259
+ """
260
+ # First, check if baseline exists
261
+ if not self.ref_exists(baseline):
262
+ # Try common alternatives
263
+ alternatives = ["master", "develop", "HEAD~1"]
264
+ for alt in alternatives:
265
+ if self.ref_exists(alt):
266
+ logger.warning(
267
+ f"Baseline '{baseline}' not found, using '{alt}' instead"
268
+ )
269
+ baseline = alt
270
+ break
271
+ else:
272
+ raise GitReferenceError(
273
+ f"Baseline '{baseline}' not found. "
274
+ f"Try: main, master, develop, or HEAD~1. "
275
+ f"Check available branches with: git branch -a"
276
+ )
277
+
278
+ # Get list of changed files
279
+ cmd = ["git", "diff", "--name-only", baseline]
280
+
281
+ try:
282
+ result = subprocess.run( # nosec B607 - git is intentionally called via PATH
283
+ cmd,
284
+ cwd=self.project_root,
285
+ capture_output=True,
286
+ text=True,
287
+ check=True,
288
+ timeout=10,
289
+ )
290
+
291
+ changed_files = []
292
+ for line in result.stdout.splitlines():
293
+ if not line.strip():
294
+ continue
295
+
296
+ # Convert to absolute path and verify existence
297
+ file_path = self.project_root / line.strip()
298
+ if file_path.exists() and file_path.is_file():
299
+ changed_files.append(file_path)
300
+ else:
301
+ # File may have been deleted in current branch
302
+ logger.debug(f"Skipping non-existent diff file: {file_path}")
303
+
304
+ logger.info(f"Found {len(changed_files)} files different from {baseline}")
305
+ return changed_files
306
+
307
+ except subprocess.CalledProcessError as e:
308
+ error_msg = e.stderr.strip() if e.stderr else "Unknown error"
309
+ logger.error(f"Git diff failed: {error_msg}")
310
+ raise GitError(f"Failed to get diff files: {error_msg}")
311
+ except subprocess.TimeoutExpired:
312
+ logger.error("Git diff command timed out")
313
+ raise GitError("Git diff command timed out after 10 seconds")
314
+
315
+ def ref_exists(self, ref: str) -> bool:
316
+ """Check if a git ref (branch, tag, commit) exists.
317
+
318
+ Uses `git rev-parse --verify` to check reference validity.
319
+
320
+ Args:
321
+ ref: Git reference to check (branch, tag, commit hash)
322
+
323
+ Returns:
324
+ True if ref exists and is valid
325
+
326
+ Performance: O(1), fast git operation
327
+
328
+ Example:
329
+ >>> manager = GitManager(Path.cwd())
330
+ >>> if manager.ref_exists("main"):
331
+ ... print("Main branch exists")
332
+ """
333
+ cmd = ["git", "rev-parse", "--verify", ref]
334
+
335
+ try:
336
+ subprocess.run( # nosec B607 - git is intentionally called via PATH
337
+ cmd,
338
+ cwd=self.project_root,
339
+ capture_output=True,
340
+ check=True,
341
+ timeout=5,
342
+ )
343
+ return True
344
+ except (subprocess.CalledProcessError, subprocess.TimeoutExpired):
345
+ return False
346
+
347
+ def get_current_branch(self) -> str | None:
348
+ """Get name of current branch.
349
+
350
+ Returns:
351
+ Branch name or None if detached HEAD
352
+
353
+ Performance: O(1), fast git operation
354
+
355
+ Example:
356
+ >>> manager = GitManager(Path.cwd())
357
+ >>> branch = manager.get_current_branch()
358
+ >>> if branch:
359
+ ... print(f"Current branch: {branch}")
360
+ ... else:
361
+ ... print("Detached HEAD state")
362
+ """
363
+ cmd = ["git", "rev-parse", "--abbrev-ref", "HEAD"]
364
+
365
+ try:
366
+ result = subprocess.run( # nosec B607 - git is intentionally called via PATH
367
+ cmd,
368
+ cwd=self.project_root,
369
+ capture_output=True,
370
+ text=True,
371
+ check=True,
372
+ timeout=5,
373
+ )
374
+
375
+ branch = result.stdout.strip()
376
+ # "HEAD" means detached HEAD state
377
+ return branch if branch != "HEAD" else None
378
+
379
+ except (subprocess.CalledProcessError, subprocess.TimeoutExpired):
380
+ return None
@@ -126,7 +126,7 @@ class GitHookManager:
126
126
  python_path = sys.executable
127
127
  project_root = str(self.project_root)
128
128
 
129
- script = f'''#!/bin/bash
129
+ script = f"""#!/bin/bash
130
130
  # MCP Vector Search Hook - {hook_type}
131
131
  # Auto-generated - do not edit manually
132
132
 
@@ -158,7 +158,7 @@ $MCP_CMD auto-index check --auto-reindex --max-files 10 &> /dev/null || true
158
158
 
159
159
  # Exit successfully (don't block Git operations)
160
160
  exit 0
161
- '''
161
+ """
162
162
  return script
163
163
 
164
164
  def _integrate_with_existing_hook(self, hook_file: Path, our_script: str) -> bool:
@@ -275,7 +275,7 @@ class GitChangeDetector:
275
275
  Set of changed file paths
276
276
  """
277
277
  try:
278
- result = subprocess.run(
278
+ result = subprocess.run( # nosec B607
279
279
  ["git", "diff", "--name-only", commit_hash, "HEAD"],
280
280
  cwd=project_root,
281
281
  capture_output=True,
@@ -306,7 +306,7 @@ class GitChangeDetector:
306
306
  Set of changed file paths
307
307
  """
308
308
  try:
309
- result = subprocess.run(
309
+ result = subprocess.run( # nosec B607
310
310
  ["git", "diff", "--name-only", "HEAD~1", "HEAD"],
311
311
  cwd=project_root,
312
312
  capture_output=True,