PyPI - mcp-vector-search - Versions diffs - 0.12.6__py3-none-any.whl → 1.1.22__py3-none-any.whl - Mend

mcp-vector-search 0.12.6py3-none-any.whl → 1.1.22py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (92) hide show

mcp_vector_search/__init__.py +3 -3
mcp_vector_search/analysis/__init__.py +111 -0
mcp_vector_search/analysis/baseline/__init__.py +68 -0
mcp_vector_search/analysis/baseline/comparator.py +462 -0
mcp_vector_search/analysis/baseline/manager.py +621 -0
mcp_vector_search/analysis/collectors/__init__.py +74 -0
mcp_vector_search/analysis/collectors/base.py +164 -0
mcp_vector_search/analysis/collectors/cohesion.py +463 -0
mcp_vector_search/analysis/collectors/complexity.py +743 -0
mcp_vector_search/analysis/collectors/coupling.py +1162 -0
mcp_vector_search/analysis/collectors/halstead.py +514 -0
mcp_vector_search/analysis/collectors/smells.py +325 -0
mcp_vector_search/analysis/debt.py +516 -0
mcp_vector_search/analysis/interpretation.py +685 -0
mcp_vector_search/analysis/metrics.py +414 -0
mcp_vector_search/analysis/reporters/__init__.py +7 -0
mcp_vector_search/analysis/reporters/console.py +646 -0
mcp_vector_search/analysis/reporters/markdown.py +480 -0
mcp_vector_search/analysis/reporters/sarif.py +377 -0
mcp_vector_search/analysis/storage/__init__.py +93 -0
mcp_vector_search/analysis/storage/metrics_store.py +762 -0
mcp_vector_search/analysis/storage/schema.py +245 -0
mcp_vector_search/analysis/storage/trend_tracker.py +560 -0
mcp_vector_search/analysis/trends.py +308 -0
mcp_vector_search/analysis/visualizer/__init__.py +90 -0
mcp_vector_search/analysis/visualizer/d3_data.py +534 -0
mcp_vector_search/analysis/visualizer/exporter.py +484 -0
mcp_vector_search/analysis/visualizer/html_report.py +2895 -0
mcp_vector_search/analysis/visualizer/schemas.py +525 -0
mcp_vector_search/cli/commands/analyze.py +1062 -0
mcp_vector_search/cli/commands/chat.py +1455 -0
mcp_vector_search/cli/commands/index.py +621 -5
mcp_vector_search/cli/commands/index_background.py +467 -0
mcp_vector_search/cli/commands/init.py +13 -0
mcp_vector_search/cli/commands/install.py +597 -335
mcp_vector_search/cli/commands/install_old.py +8 -4
mcp_vector_search/cli/commands/mcp.py +78 -6
mcp_vector_search/cli/commands/reset.py +68 -26
mcp_vector_search/cli/commands/search.py +224 -8
mcp_vector_search/cli/commands/setup.py +1184 -0
mcp_vector_search/cli/commands/status.py +339 -5
mcp_vector_search/cli/commands/uninstall.py +276 -357
mcp_vector_search/cli/commands/visualize/__init__.py +39 -0
mcp_vector_search/cli/commands/visualize/cli.py +292 -0
mcp_vector_search/cli/commands/visualize/exporters/__init__.py +12 -0
mcp_vector_search/cli/commands/visualize/exporters/html_exporter.py +33 -0
mcp_vector_search/cli/commands/visualize/exporters/json_exporter.py +33 -0
mcp_vector_search/cli/commands/visualize/graph_builder.py +647 -0
mcp_vector_search/cli/commands/visualize/layout_engine.py +469 -0
mcp_vector_search/cli/commands/visualize/server.py +600 -0
mcp_vector_search/cli/commands/visualize/state_manager.py +428 -0
mcp_vector_search/cli/commands/visualize/templates/__init__.py +16 -0
mcp_vector_search/cli/commands/visualize/templates/base.py +234 -0
mcp_vector_search/cli/commands/visualize/templates/scripts.py +4542 -0
mcp_vector_search/cli/commands/visualize/templates/styles.py +2522 -0
mcp_vector_search/cli/didyoumean.py +27 -2
mcp_vector_search/cli/main.py +127 -160
mcp_vector_search/cli/output.py +158 -13
mcp_vector_search/config/__init__.py +4 -0
mcp_vector_search/config/default_thresholds.yaml +52 -0
mcp_vector_search/config/settings.py +12 -0
mcp_vector_search/config/thresholds.py +273 -0
mcp_vector_search/core/__init__.py +16 -0
mcp_vector_search/core/auto_indexer.py +3 -3
mcp_vector_search/core/boilerplate.py +186 -0
mcp_vector_search/core/config_utils.py +394 -0
mcp_vector_search/core/database.py +406 -94
mcp_vector_search/core/embeddings.py +24 -0
mcp_vector_search/core/exceptions.py +11 -0
mcp_vector_search/core/git.py +380 -0
mcp_vector_search/core/git_hooks.py +4 -4
mcp_vector_search/core/indexer.py +632 -54
mcp_vector_search/core/llm_client.py +756 -0
mcp_vector_search/core/models.py +91 -1
mcp_vector_search/core/project.py +17 -0
mcp_vector_search/core/relationships.py +473 -0
mcp_vector_search/core/scheduler.py +11 -11
mcp_vector_search/core/search.py +179 -29
mcp_vector_search/mcp/server.py +819 -9
mcp_vector_search/parsers/python.py +285 -5
mcp_vector_search/utils/__init__.py +2 -0
mcp_vector_search/utils/gitignore.py +0 -3
mcp_vector_search/utils/gitignore_updater.py +212 -0
mcp_vector_search/utils/monorepo.py +66 -4
mcp_vector_search/utils/timing.py +10 -6
{mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.1.22.dist-info}/METADATA +184 -53
mcp_vector_search-1.1.22.dist-info/RECORD +120 -0
{mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.1.22.dist-info}/WHEEL +1 -1
{mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.1.22.dist-info}/entry_points.txt +1 -0
mcp_vector_search/cli/commands/visualize.py +0 -1467
mcp_vector_search-0.12.6.dist-info/RECORD +0 -68
{mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.1.22.dist-info}/licenses/LICENSE +0 -0

mcp_vector_search/core/scheduler.py CHANGED Viewed

@@ -73,7 +73,7 @@ class SchedulerManager:
             project_root = str(self.project_root)
             # Create wrapper script
-            script_content = f'''#!/bin/bash
+            script_content = f"""#!/bin/bash
 # MCP Vector Search Auto-Reindex - {task_name}
 cd "{project_root}" || exit 1
@@ -85,7 +85,7 @@ elif [ -f "{python_path}" ]; then
 else
     python3 -m mcp_vector_search auto-index check --auto-reindex --max-files 10
 fi
-'''
+"""
             # Write script to temp file
             script_dir = Path.home() / ".mcp-vector-search" / "scripts"
@@ -109,7 +109,7 @@ fi
             # Get current crontab
             try:
-                result = subprocess.run(
+                result = subprocess.run(  # nosec B607
                     ["crontab", "-l"], capture_output=True, text=True, check=True
                 )
                 current_crontab = result.stdout
@@ -125,7 +125,7 @@ fi
             new_crontab = current_crontab + cron_entry
             # Install new crontab
-            process = subprocess.Popen(
+            process = subprocess.Popen(  # nosec B607
                 ["crontab", "-"], stdin=subprocess.PIPE, text=True
             )
             process.communicate(input=new_crontab)
@@ -148,7 +148,7 @@ fi
         try:
             # Get current crontab
             try:
-                result = subprocess.run(
+                result = subprocess.run(  # nosec B607
                     ["crontab", "-l"], capture_output=True, text=True, check=True
                 )
                 current_crontab = result.stdout
@@ -163,13 +163,13 @@ fi
             # Install new crontab
             if new_crontab.strip():
-                process = subprocess.Popen(
+                process = subprocess.Popen(  # nosec B607
                     ["crontab", "-"], stdin=subprocess.PIPE, text=True
                 )
                 process.communicate(input=new_crontab)
             else:
                 # Remove crontab entirely if empty
-                subprocess.run(["crontab", "-r"], check=False)
+                subprocess.run(["crontab", "-r"], check=False)  # nosec B607
             # Remove script file
             script_dir = Path.home() / ".mcp-vector-search" / "scripts"
@@ -191,7 +191,7 @@ fi
             project_root = str(self.project_root)
             # Create PowerShell script
-            script_content = f'''# MCP Vector Search Auto-Reindex - {task_name}
+            script_content = f"""# MCP Vector Search Auto-Reindex - {task_name}
 Set-Location "{project_root}"
 try {{
@@ -205,7 +205,7 @@ try {{
 }} catch {{
     # Silently ignore errors
 }}
-'''
+"""
             # Write script
             script_dir = Path.home() / ".mcp-vector-search" / "scripts"
@@ -302,7 +302,7 @@ try {{
     def _get_cron_status(self, task_name: str) -> dict:
         """Get cron job status."""
         try:
-            result = subprocess.run(
+            result = subprocess.run(  # nosec B607
                 ["crontab", "-l"], capture_output=True, text=True, check=True
             )
@@ -315,7 +315,7 @@ try {{
     def _get_windows_task_status(self, task_name: str) -> dict:
         """Get Windows task status."""
         try:
-            result = subprocess.run(
+            result = subprocess.run(  # nosec B607
                 ["schtasks", "/query", "/tn", task_name], capture_output=True, text=True
             )

mcp_vector_search/core/search.py CHANGED Viewed

@@ -1,5 +1,6 @@
 """Semantic search engine for MCP Vector Search."""
+import asyncio
 import re
 import time
 from collections import OrderedDict
@@ -11,8 +12,9 @@ from loguru import logger
 from ..config.constants import DEFAULT_CACHE_SIZE
 from .auto_indexer import AutoIndexer, SearchTriggeredIndexer
+from .boilerplate import BoilerplateFilter
 from .database import VectorDatabase
-from .exceptions import SearchError
+from .exceptions import RustPanicError, SearchError
 from .models import SearchResult
@@ -67,6 +69,7 @@ class SemanticSearchEngine:
     _BOOST_SHALLOW_PATH = 0.02
     _PENALTY_TEST_FILE = -0.02
     _PENALTY_DEEP_PATH = -0.01
+    _PENALTY_BOILERPLATE = -0.15
     def __init__(
         self,
@@ -106,6 +109,156 @@ class SemanticSearchEngine:
         self._last_health_check: float = 0.0
         self._health_check_interval: float = 60.0
+        # Boilerplate filter for smart result ranking
+        self._boilerplate_filter = BoilerplateFilter()
+    @staticmethod
+    def _is_rust_panic_error(error: Exception) -> bool:
+        """Detect ChromaDB Rust panic errors.
+        Args:
+            error: Exception to check
+        Returns:
+            True if this is a Rust panic error
+        """
+        error_msg = str(error).lower()
+        # Check for the specific Rust panic pattern
+        # "range start index X out of range for slice of length Y"
+        if "range start index" in error_msg and "out of range" in error_msg:
+            return True
+        # Check for other Rust panic indicators
+        rust_panic_patterns = [
+            "rust panic",
+            "pyo3_runtime.panicexception",
+            "thread 'tokio-runtime-worker' panicked",
+            "rust/sqlite/src/db.rs",  # Specific to the known ChromaDB issue
+        ]
+        return any(pattern in error_msg for pattern in rust_panic_patterns)
+    @staticmethod
+    def _is_corruption_error(error: Exception) -> bool:
+        """Detect index corruption errors.
+        Args:
+            error: Exception to check
+        Returns:
+            True if this is a corruption error
+        """
+        error_msg = str(error).lower()
+        corruption_indicators = [
+            "pickle",
+            "unpickling",
+            "eof",
+            "ran out of input",
+            "hnsw",
+            "deserialize",
+            "corrupt",
+        ]
+        return any(indicator in error_msg for indicator in corruption_indicators)
+    async def _search_with_retry(
+        self,
+        query: str,
+        limit: int,
+        filters: dict[str, Any] | None,
+        threshold: float,
+        max_retries: int = 3,
+    ) -> list[SearchResult]:
+        """Execute search with retry logic and exponential backoff.
+        Args:
+            query: Processed search query
+            limit: Maximum number of results
+            filters: Optional filters
+            threshold: Similarity threshold
+            max_retries: Maximum retry attempts (default: 3)
+        Returns:
+            List of search results
+        Raises:
+            RustPanicError: If Rust panic persists after retries
+            SearchError: If search fails for other reasons
+        """
+        last_error = None
+        backoff_delays = [0, 0.1, 0.5]  # Immediate, 100ms, 500ms
+        for attempt in range(max_retries):
+            try:
+                # Add delay for retries (exponential backoff)
+                if attempt > 0 and backoff_delays[attempt] > 0:
+                    await asyncio.sleep(backoff_delays[attempt])
+                    logger.debug(
+                        f"Retrying search after {backoff_delays[attempt]}s delay (attempt {attempt + 1}/{max_retries})"
+                    )
+                # Perform the actual search
+                results = await self.database.search(
+                    query=query,
+                    limit=limit,
+                    filters=filters,
+                    similarity_threshold=threshold,
+                )
+                # Success! If we had retries, log that we recovered
+                if attempt > 0:
+                    logger.info(
+                        f"Search succeeded after {attempt + 1} attempts (recovered from transient error)"
+                    )
+                return results
+            except BaseException as e:
+                # Re-raise system exceptions we should never catch
+                if isinstance(e, KeyboardInterrupt | SystemExit | GeneratorExit):
+                    raise
+                last_error = e
+                # Check if this is a Rust panic
+                if self._is_rust_panic_error(e):
+                    logger.warning(
+                        f"ChromaDB Rust panic detected (attempt {attempt + 1}/{max_retries}): {e}"
+                    )
+                    # If this is the last retry, escalate to corruption recovery
+                    if attempt == max_retries - 1:
+                        logger.error(
+                            "Rust panic persisted after all retries - index may be corrupted"
+                        )
+                        raise RustPanicError(
+                            "ChromaDB Rust panic detected. The HNSW index may be corrupted. "
+                            "Please run 'mcp-vector-search reset' followed by 'mcp-vector-search index' to rebuild."
+                        ) from e
+                    # Otherwise, continue to next retry
+                    continue
+                # Check for general corruption
+                elif self._is_corruption_error(e):
+                    logger.error(f"Index corruption detected: {e}")
+                    raise SearchError(
+                        "Index corruption detected. Please run 'mcp-vector-search reset' "
+                        "followed by 'mcp-vector-search index' to rebuild."
+                    ) from e
+                # Some other error - don't retry, just fail
+                else:
+                    logger.error(f"Search failed: {e}")
+                    raise SearchError(f"Search failed: {e}") from e
+        # Should never reach here, but just in case
+        raise SearchError(
+            f"Search failed after {max_retries} retries: {last_error}"
+        ) from last_error
     async def search(
         self,
         query: str,
@@ -162,12 +315,12 @@ class SemanticSearchEngine:
             # Preprocess query
             processed_query = self._preprocess_query(query)
-            # Perform vector search
-            results = await self.database.search(
+            # Perform vector search with retry logic
+            results = await self._search_with_retry(
                 query=processed_query,
                 limit=limit,
                 filters=filters,
-                similarity_threshold=threshold,
+                threshold=threshold,
             )
             # Post-process results
@@ -184,32 +337,13 @@ class SemanticSearchEngine:
             )
             return ranked_results
+        except (RustPanicError, SearchError):
+            # These errors are already properly formatted with user guidance
+            raise
         except Exception as e:
-            error_msg = str(e).lower()
-            # Check for corruption indicators
-            if any(
-                indicator in error_msg
-                for indicator in [
-                    "pickle",
-                    "unpickling",
-                    "eof",
-                    "ran out of input",
-                    "hnsw",
-                    "index",
-                    "deserialize",
-                    "corrupt",
-                ]
-            ):
-                logger.error(f"Index corruption detected during search: {e}")
-                logger.info(
-                    "The index appears to be corrupted. Please run 'mcp-vector-search reset' to clear the index and then 'mcp-vector-search index' to rebuild it."
-                )
-                raise SearchError(
-                    "Index corruption detected. Please run 'mcp-vector-search reset' followed by 'mcp-vector-search index' to rebuild."
-                ) from e
-            else:
-                logger.error(f"Search failed for query '{query}': {e}")
-                raise SearchError(f"Search failed: {e}") from e
+            # Unexpected error - wrap it in SearchError
+            logger.error(f"Unexpected search error for query '{query}': {e}")
+            raise SearchError(f"Search failed: {e}") from e
     async def search_similar(
         self,
@@ -470,6 +604,11 @@ class SemanticSearchEngine:
             result.context_before = context_before
             result.context_after = context_after
+        except FileNotFoundError:
+            # File was deleted since indexing - silently skip context
+            # This is normal when index is stale; use --force to reindex
+            logger.debug(f"File no longer exists (stale index): {result.file_path}")
+            result.file_missing = True  # Mark for potential filtering
         except Exception as e:
             logger.warning(f"Failed to get context for {result.file_path}: {e}")
@@ -562,6 +701,17 @@ class SemanticSearchEngine:
             elif path_depth > 5:
                 score += self._PENALTY_DEEP_PATH
+            # Factor 7: Boilerplate penalty (penalize common boilerplate patterns)
+            # Apply penalty to function names (constructors, lifecycle methods, etc.)
+            if result.function_name:
+                boilerplate_penalty = self._boilerplate_filter.get_penalty(
+                    name=result.function_name,
+                    language=result.language,
+                    query=query,
+                    penalty=self._PENALTY_BOILERPLATE,
+                )
+                score += boilerplate_penalty
             # Ensure score doesn't exceed 1.0
             result.similarity_score = min(1.0, score)

mcp-vector-search 0.12.6__py3-none-any.whl → 1.1.22__py3-none-any.whl

mcp-vector-search 0.12.6py3-none-any.whl → 1.1.22py3-none-any.whl