mcp-vector-search 0.12.6__py3-none-any.whl → 1.1.22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. mcp_vector_search/__init__.py +3 -3
  2. mcp_vector_search/analysis/__init__.py +111 -0
  3. mcp_vector_search/analysis/baseline/__init__.py +68 -0
  4. mcp_vector_search/analysis/baseline/comparator.py +462 -0
  5. mcp_vector_search/analysis/baseline/manager.py +621 -0
  6. mcp_vector_search/analysis/collectors/__init__.py +74 -0
  7. mcp_vector_search/analysis/collectors/base.py +164 -0
  8. mcp_vector_search/analysis/collectors/cohesion.py +463 -0
  9. mcp_vector_search/analysis/collectors/complexity.py +743 -0
  10. mcp_vector_search/analysis/collectors/coupling.py +1162 -0
  11. mcp_vector_search/analysis/collectors/halstead.py +514 -0
  12. mcp_vector_search/analysis/collectors/smells.py +325 -0
  13. mcp_vector_search/analysis/debt.py +516 -0
  14. mcp_vector_search/analysis/interpretation.py +685 -0
  15. mcp_vector_search/analysis/metrics.py +414 -0
  16. mcp_vector_search/analysis/reporters/__init__.py +7 -0
  17. mcp_vector_search/analysis/reporters/console.py +646 -0
  18. mcp_vector_search/analysis/reporters/markdown.py +480 -0
  19. mcp_vector_search/analysis/reporters/sarif.py +377 -0
  20. mcp_vector_search/analysis/storage/__init__.py +93 -0
  21. mcp_vector_search/analysis/storage/metrics_store.py +762 -0
  22. mcp_vector_search/analysis/storage/schema.py +245 -0
  23. mcp_vector_search/analysis/storage/trend_tracker.py +560 -0
  24. mcp_vector_search/analysis/trends.py +308 -0
  25. mcp_vector_search/analysis/visualizer/__init__.py +90 -0
  26. mcp_vector_search/analysis/visualizer/d3_data.py +534 -0
  27. mcp_vector_search/analysis/visualizer/exporter.py +484 -0
  28. mcp_vector_search/analysis/visualizer/html_report.py +2895 -0
  29. mcp_vector_search/analysis/visualizer/schemas.py +525 -0
  30. mcp_vector_search/cli/commands/analyze.py +1062 -0
  31. mcp_vector_search/cli/commands/chat.py +1455 -0
  32. mcp_vector_search/cli/commands/index.py +621 -5
  33. mcp_vector_search/cli/commands/index_background.py +467 -0
  34. mcp_vector_search/cli/commands/init.py +13 -0
  35. mcp_vector_search/cli/commands/install.py +597 -335
  36. mcp_vector_search/cli/commands/install_old.py +8 -4
  37. mcp_vector_search/cli/commands/mcp.py +78 -6
  38. mcp_vector_search/cli/commands/reset.py +68 -26
  39. mcp_vector_search/cli/commands/search.py +224 -8
  40. mcp_vector_search/cli/commands/setup.py +1184 -0
  41. mcp_vector_search/cli/commands/status.py +339 -5
  42. mcp_vector_search/cli/commands/uninstall.py +276 -357
  43. mcp_vector_search/cli/commands/visualize/__init__.py +39 -0
  44. mcp_vector_search/cli/commands/visualize/cli.py +292 -0
  45. mcp_vector_search/cli/commands/visualize/exporters/__init__.py +12 -0
  46. mcp_vector_search/cli/commands/visualize/exporters/html_exporter.py +33 -0
  47. mcp_vector_search/cli/commands/visualize/exporters/json_exporter.py +33 -0
  48. mcp_vector_search/cli/commands/visualize/graph_builder.py +647 -0
  49. mcp_vector_search/cli/commands/visualize/layout_engine.py +469 -0
  50. mcp_vector_search/cli/commands/visualize/server.py +600 -0
  51. mcp_vector_search/cli/commands/visualize/state_manager.py +428 -0
  52. mcp_vector_search/cli/commands/visualize/templates/__init__.py +16 -0
  53. mcp_vector_search/cli/commands/visualize/templates/base.py +234 -0
  54. mcp_vector_search/cli/commands/visualize/templates/scripts.py +4542 -0
  55. mcp_vector_search/cli/commands/visualize/templates/styles.py +2522 -0
  56. mcp_vector_search/cli/didyoumean.py +27 -2
  57. mcp_vector_search/cli/main.py +127 -160
  58. mcp_vector_search/cli/output.py +158 -13
  59. mcp_vector_search/config/__init__.py +4 -0
  60. mcp_vector_search/config/default_thresholds.yaml +52 -0
  61. mcp_vector_search/config/settings.py +12 -0
  62. mcp_vector_search/config/thresholds.py +273 -0
  63. mcp_vector_search/core/__init__.py +16 -0
  64. mcp_vector_search/core/auto_indexer.py +3 -3
  65. mcp_vector_search/core/boilerplate.py +186 -0
  66. mcp_vector_search/core/config_utils.py +394 -0
  67. mcp_vector_search/core/database.py +406 -94
  68. mcp_vector_search/core/embeddings.py +24 -0
  69. mcp_vector_search/core/exceptions.py +11 -0
  70. mcp_vector_search/core/git.py +380 -0
  71. mcp_vector_search/core/git_hooks.py +4 -4
  72. mcp_vector_search/core/indexer.py +632 -54
  73. mcp_vector_search/core/llm_client.py +756 -0
  74. mcp_vector_search/core/models.py +91 -1
  75. mcp_vector_search/core/project.py +17 -0
  76. mcp_vector_search/core/relationships.py +473 -0
  77. mcp_vector_search/core/scheduler.py +11 -11
  78. mcp_vector_search/core/search.py +179 -29
  79. mcp_vector_search/mcp/server.py +819 -9
  80. mcp_vector_search/parsers/python.py +285 -5
  81. mcp_vector_search/utils/__init__.py +2 -0
  82. mcp_vector_search/utils/gitignore.py +0 -3
  83. mcp_vector_search/utils/gitignore_updater.py +212 -0
  84. mcp_vector_search/utils/monorepo.py +66 -4
  85. mcp_vector_search/utils/timing.py +10 -6
  86. {mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.1.22.dist-info}/METADATA +184 -53
  87. mcp_vector_search-1.1.22.dist-info/RECORD +120 -0
  88. {mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.1.22.dist-info}/WHEEL +1 -1
  89. {mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.1.22.dist-info}/entry_points.txt +1 -0
  90. mcp_vector_search/cli/commands/visualize.py +0 -1467
  91. mcp_vector_search-0.12.6.dist-info/RECORD +0 -68
  92. {mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.1.22.dist-info}/licenses/LICENSE +0 -0
@@ -73,7 +73,7 @@ class SchedulerManager:
73
73
  project_root = str(self.project_root)
74
74
 
75
75
  # Create wrapper script
76
- script_content = f'''#!/bin/bash
76
+ script_content = f"""#!/bin/bash
77
77
  # MCP Vector Search Auto-Reindex - {task_name}
78
78
  cd "{project_root}" || exit 1
79
79
 
@@ -85,7 +85,7 @@ elif [ -f "{python_path}" ]; then
85
85
  else
86
86
  python3 -m mcp_vector_search auto-index check --auto-reindex --max-files 10
87
87
  fi
88
- '''
88
+ """
89
89
 
90
90
  # Write script to temp file
91
91
  script_dir = Path.home() / ".mcp-vector-search" / "scripts"
@@ -109,7 +109,7 @@ fi
109
109
 
110
110
  # Get current crontab
111
111
  try:
112
- result = subprocess.run(
112
+ result = subprocess.run( # nosec B607
113
113
  ["crontab", "-l"], capture_output=True, text=True, check=True
114
114
  )
115
115
  current_crontab = result.stdout
@@ -125,7 +125,7 @@ fi
125
125
  new_crontab = current_crontab + cron_entry
126
126
 
127
127
  # Install new crontab
128
- process = subprocess.Popen(
128
+ process = subprocess.Popen( # nosec B607
129
129
  ["crontab", "-"], stdin=subprocess.PIPE, text=True
130
130
  )
131
131
  process.communicate(input=new_crontab)
@@ -148,7 +148,7 @@ fi
148
148
  try:
149
149
  # Get current crontab
150
150
  try:
151
- result = subprocess.run(
151
+ result = subprocess.run( # nosec B607
152
152
  ["crontab", "-l"], capture_output=True, text=True, check=True
153
153
  )
154
154
  current_crontab = result.stdout
@@ -163,13 +163,13 @@ fi
163
163
 
164
164
  # Install new crontab
165
165
  if new_crontab.strip():
166
- process = subprocess.Popen(
166
+ process = subprocess.Popen( # nosec B607
167
167
  ["crontab", "-"], stdin=subprocess.PIPE, text=True
168
168
  )
169
169
  process.communicate(input=new_crontab)
170
170
  else:
171
171
  # Remove crontab entirely if empty
172
- subprocess.run(["crontab", "-r"], check=False)
172
+ subprocess.run(["crontab", "-r"], check=False) # nosec B607
173
173
 
174
174
  # Remove script file
175
175
  script_dir = Path.home() / ".mcp-vector-search" / "scripts"
@@ -191,7 +191,7 @@ fi
191
191
  project_root = str(self.project_root)
192
192
 
193
193
  # Create PowerShell script
194
- script_content = f'''# MCP Vector Search Auto-Reindex - {task_name}
194
+ script_content = f"""# MCP Vector Search Auto-Reindex - {task_name}
195
195
  Set-Location "{project_root}"
196
196
 
197
197
  try {{
@@ -205,7 +205,7 @@ try {{
205
205
  }} catch {{
206
206
  # Silently ignore errors
207
207
  }}
208
- '''
208
+ """
209
209
 
210
210
  # Write script
211
211
  script_dir = Path.home() / ".mcp-vector-search" / "scripts"
@@ -302,7 +302,7 @@ try {{
302
302
  def _get_cron_status(self, task_name: str) -> dict:
303
303
  """Get cron job status."""
304
304
  try:
305
- result = subprocess.run(
305
+ result = subprocess.run( # nosec B607
306
306
  ["crontab", "-l"], capture_output=True, text=True, check=True
307
307
  )
308
308
 
@@ -315,7 +315,7 @@ try {{
315
315
  def _get_windows_task_status(self, task_name: str) -> dict:
316
316
  """Get Windows task status."""
317
317
  try:
318
- result = subprocess.run(
318
+ result = subprocess.run( # nosec B607
319
319
  ["schtasks", "/query", "/tn", task_name], capture_output=True, text=True
320
320
  )
321
321
 
@@ -1,5 +1,6 @@
1
1
  """Semantic search engine for MCP Vector Search."""
2
2
 
3
+ import asyncio
3
4
  import re
4
5
  import time
5
6
  from collections import OrderedDict
@@ -11,8 +12,9 @@ from loguru import logger
11
12
 
12
13
  from ..config.constants import DEFAULT_CACHE_SIZE
13
14
  from .auto_indexer import AutoIndexer, SearchTriggeredIndexer
15
+ from .boilerplate import BoilerplateFilter
14
16
  from .database import VectorDatabase
15
- from .exceptions import SearchError
17
+ from .exceptions import RustPanicError, SearchError
16
18
  from .models import SearchResult
17
19
 
18
20
 
@@ -67,6 +69,7 @@ class SemanticSearchEngine:
67
69
  _BOOST_SHALLOW_PATH = 0.02
68
70
  _PENALTY_TEST_FILE = -0.02
69
71
  _PENALTY_DEEP_PATH = -0.01
72
+ _PENALTY_BOILERPLATE = -0.15
70
73
 
71
74
  def __init__(
72
75
  self,
@@ -106,6 +109,156 @@ class SemanticSearchEngine:
106
109
  self._last_health_check: float = 0.0
107
110
  self._health_check_interval: float = 60.0
108
111
 
112
+ # Boilerplate filter for smart result ranking
113
+ self._boilerplate_filter = BoilerplateFilter()
114
+
115
+ @staticmethod
116
+ def _is_rust_panic_error(error: Exception) -> bool:
117
+ """Detect ChromaDB Rust panic errors.
118
+
119
+ Args:
120
+ error: Exception to check
121
+
122
+ Returns:
123
+ True if this is a Rust panic error
124
+ """
125
+ error_msg = str(error).lower()
126
+
127
+ # Check for the specific Rust panic pattern
128
+ # "range start index X out of range for slice of length Y"
129
+ if "range start index" in error_msg and "out of range" in error_msg:
130
+ return True
131
+
132
+ # Check for other Rust panic indicators
133
+ rust_panic_patterns = [
134
+ "rust panic",
135
+ "pyo3_runtime.panicexception",
136
+ "thread 'tokio-runtime-worker' panicked",
137
+ "rust/sqlite/src/db.rs", # Specific to the known ChromaDB issue
138
+ ]
139
+
140
+ return any(pattern in error_msg for pattern in rust_panic_patterns)
141
+
142
+ @staticmethod
143
+ def _is_corruption_error(error: Exception) -> bool:
144
+ """Detect index corruption errors.
145
+
146
+ Args:
147
+ error: Exception to check
148
+
149
+ Returns:
150
+ True if this is a corruption error
151
+ """
152
+ error_msg = str(error).lower()
153
+
154
+ corruption_indicators = [
155
+ "pickle",
156
+ "unpickling",
157
+ "eof",
158
+ "ran out of input",
159
+ "hnsw",
160
+ "deserialize",
161
+ "corrupt",
162
+ ]
163
+
164
+ return any(indicator in error_msg for indicator in corruption_indicators)
165
+
166
+ async def _search_with_retry(
167
+ self,
168
+ query: str,
169
+ limit: int,
170
+ filters: dict[str, Any] | None,
171
+ threshold: float,
172
+ max_retries: int = 3,
173
+ ) -> list[SearchResult]:
174
+ """Execute search with retry logic and exponential backoff.
175
+
176
+ Args:
177
+ query: Processed search query
178
+ limit: Maximum number of results
179
+ filters: Optional filters
180
+ threshold: Similarity threshold
181
+ max_retries: Maximum retry attempts (default: 3)
182
+
183
+ Returns:
184
+ List of search results
185
+
186
+ Raises:
187
+ RustPanicError: If Rust panic persists after retries
188
+ SearchError: If search fails for other reasons
189
+ """
190
+ last_error = None
191
+ backoff_delays = [0, 0.1, 0.5] # Immediate, 100ms, 500ms
192
+
193
+ for attempt in range(max_retries):
194
+ try:
195
+ # Add delay for retries (exponential backoff)
196
+ if attempt > 0 and backoff_delays[attempt] > 0:
197
+ await asyncio.sleep(backoff_delays[attempt])
198
+ logger.debug(
199
+ f"Retrying search after {backoff_delays[attempt]}s delay (attempt {attempt + 1}/{max_retries})"
200
+ )
201
+
202
+ # Perform the actual search
203
+ results = await self.database.search(
204
+ query=query,
205
+ limit=limit,
206
+ filters=filters,
207
+ similarity_threshold=threshold,
208
+ )
209
+
210
+ # Success! If we had retries, log that we recovered
211
+ if attempt > 0:
212
+ logger.info(
213
+ f"Search succeeded after {attempt + 1} attempts (recovered from transient error)"
214
+ )
215
+
216
+ return results
217
+
218
+ except BaseException as e:
219
+ # Re-raise system exceptions we should never catch
220
+ if isinstance(e, KeyboardInterrupt | SystemExit | GeneratorExit):
221
+ raise
222
+
223
+ last_error = e
224
+
225
+ # Check if this is a Rust panic
226
+ if self._is_rust_panic_error(e):
227
+ logger.warning(
228
+ f"ChromaDB Rust panic detected (attempt {attempt + 1}/{max_retries}): {e}"
229
+ )
230
+
231
+ # If this is the last retry, escalate to corruption recovery
232
+ if attempt == max_retries - 1:
233
+ logger.error(
234
+ "Rust panic persisted after all retries - index may be corrupted"
235
+ )
236
+ raise RustPanicError(
237
+ "ChromaDB Rust panic detected. The HNSW index may be corrupted. "
238
+ "Please run 'mcp-vector-search reset' followed by 'mcp-vector-search index' to rebuild."
239
+ ) from e
240
+
241
+ # Otherwise, continue to next retry
242
+ continue
243
+
244
+ # Check for general corruption
245
+ elif self._is_corruption_error(e):
246
+ logger.error(f"Index corruption detected: {e}")
247
+ raise SearchError(
248
+ "Index corruption detected. Please run 'mcp-vector-search reset' "
249
+ "followed by 'mcp-vector-search index' to rebuild."
250
+ ) from e
251
+
252
+ # Some other error - don't retry, just fail
253
+ else:
254
+ logger.error(f"Search failed: {e}")
255
+ raise SearchError(f"Search failed: {e}") from e
256
+
257
+ # Should never reach here, but just in case
258
+ raise SearchError(
259
+ f"Search failed after {max_retries} retries: {last_error}"
260
+ ) from last_error
261
+
109
262
  async def search(
110
263
  self,
111
264
  query: str,
@@ -162,12 +315,12 @@ class SemanticSearchEngine:
162
315
  # Preprocess query
163
316
  processed_query = self._preprocess_query(query)
164
317
 
165
- # Perform vector search
166
- results = await self.database.search(
318
+ # Perform vector search with retry logic
319
+ results = await self._search_with_retry(
167
320
  query=processed_query,
168
321
  limit=limit,
169
322
  filters=filters,
170
- similarity_threshold=threshold,
323
+ threshold=threshold,
171
324
  )
172
325
 
173
326
  # Post-process results
@@ -184,32 +337,13 @@ class SemanticSearchEngine:
184
337
  )
185
338
  return ranked_results
186
339
 
340
+ except (RustPanicError, SearchError):
341
+ # These errors are already properly formatted with user guidance
342
+ raise
187
343
  except Exception as e:
188
- error_msg = str(e).lower()
189
- # Check for corruption indicators
190
- if any(
191
- indicator in error_msg
192
- for indicator in [
193
- "pickle",
194
- "unpickling",
195
- "eof",
196
- "ran out of input",
197
- "hnsw",
198
- "index",
199
- "deserialize",
200
- "corrupt",
201
- ]
202
- ):
203
- logger.error(f"Index corruption detected during search: {e}")
204
- logger.info(
205
- "The index appears to be corrupted. Please run 'mcp-vector-search reset' to clear the index and then 'mcp-vector-search index' to rebuild it."
206
- )
207
- raise SearchError(
208
- "Index corruption detected. Please run 'mcp-vector-search reset' followed by 'mcp-vector-search index' to rebuild."
209
- ) from e
210
- else:
211
- logger.error(f"Search failed for query '{query}': {e}")
212
- raise SearchError(f"Search failed: {e}") from e
344
+ # Unexpected error - wrap it in SearchError
345
+ logger.error(f"Unexpected search error for query '{query}': {e}")
346
+ raise SearchError(f"Search failed: {e}") from e
213
347
 
214
348
  async def search_similar(
215
349
  self,
@@ -470,6 +604,11 @@ class SemanticSearchEngine:
470
604
  result.context_before = context_before
471
605
  result.context_after = context_after
472
606
 
607
+ except FileNotFoundError:
608
+ # File was deleted since indexing - silently skip context
609
+ # This is normal when index is stale; use --force to reindex
610
+ logger.debug(f"File no longer exists (stale index): {result.file_path}")
611
+ result.file_missing = True # Mark for potential filtering
473
612
  except Exception as e:
474
613
  logger.warning(f"Failed to get context for {result.file_path}: {e}")
475
614
 
@@ -562,6 +701,17 @@ class SemanticSearchEngine:
562
701
  elif path_depth > 5:
563
702
  score += self._PENALTY_DEEP_PATH
564
703
 
704
+ # Factor 7: Boilerplate penalty (penalize common boilerplate patterns)
705
+ # Apply penalty to function names (constructors, lifecycle methods, etc.)
706
+ if result.function_name:
707
+ boilerplate_penalty = self._boilerplate_filter.get_penalty(
708
+ name=result.function_name,
709
+ language=result.language,
710
+ query=query,
711
+ penalty=self._PENALTY_BOILERPLATE,
712
+ )
713
+ score += boilerplate_penalty
714
+
565
715
  # Ensure score doesn't exceed 1.0
566
716
  result.similarity_score = min(1.0, score)
567
717