PyPI - mcp-vector-search - Versions diffs - 0.12.6__py3-none-any.whl → 1.1.22__py3-none-any.whl - Mend

mcp-vector-search 0.12.6py3-none-any.whl → 1.1.22py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (92) hide show

mcp_vector_search/__init__.py +3 -3
mcp_vector_search/analysis/__init__.py +111 -0
mcp_vector_search/analysis/baseline/__init__.py +68 -0
mcp_vector_search/analysis/baseline/comparator.py +462 -0
mcp_vector_search/analysis/baseline/manager.py +621 -0
mcp_vector_search/analysis/collectors/__init__.py +74 -0
mcp_vector_search/analysis/collectors/base.py +164 -0
mcp_vector_search/analysis/collectors/cohesion.py +463 -0
mcp_vector_search/analysis/collectors/complexity.py +743 -0
mcp_vector_search/analysis/collectors/coupling.py +1162 -0
mcp_vector_search/analysis/collectors/halstead.py +514 -0
mcp_vector_search/analysis/collectors/smells.py +325 -0
mcp_vector_search/analysis/debt.py +516 -0
mcp_vector_search/analysis/interpretation.py +685 -0
mcp_vector_search/analysis/metrics.py +414 -0
mcp_vector_search/analysis/reporters/__init__.py +7 -0
mcp_vector_search/analysis/reporters/console.py +646 -0
mcp_vector_search/analysis/reporters/markdown.py +480 -0
mcp_vector_search/analysis/reporters/sarif.py +377 -0
mcp_vector_search/analysis/storage/__init__.py +93 -0
mcp_vector_search/analysis/storage/metrics_store.py +762 -0
mcp_vector_search/analysis/storage/schema.py +245 -0
mcp_vector_search/analysis/storage/trend_tracker.py +560 -0
mcp_vector_search/analysis/trends.py +308 -0
mcp_vector_search/analysis/visualizer/__init__.py +90 -0
mcp_vector_search/analysis/visualizer/d3_data.py +534 -0
mcp_vector_search/analysis/visualizer/exporter.py +484 -0
mcp_vector_search/analysis/visualizer/html_report.py +2895 -0
mcp_vector_search/analysis/visualizer/schemas.py +525 -0
mcp_vector_search/cli/commands/analyze.py +1062 -0
mcp_vector_search/cli/commands/chat.py +1455 -0
mcp_vector_search/cli/commands/index.py +621 -5
mcp_vector_search/cli/commands/index_background.py +467 -0
mcp_vector_search/cli/commands/init.py +13 -0
mcp_vector_search/cli/commands/install.py +597 -335
mcp_vector_search/cli/commands/install_old.py +8 -4
mcp_vector_search/cli/commands/mcp.py +78 -6
mcp_vector_search/cli/commands/reset.py +68 -26
mcp_vector_search/cli/commands/search.py +224 -8
mcp_vector_search/cli/commands/setup.py +1184 -0
mcp_vector_search/cli/commands/status.py +339 -5
mcp_vector_search/cli/commands/uninstall.py +276 -357
mcp_vector_search/cli/commands/visualize/__init__.py +39 -0
mcp_vector_search/cli/commands/visualize/cli.py +292 -0
mcp_vector_search/cli/commands/visualize/exporters/__init__.py +12 -0
mcp_vector_search/cli/commands/visualize/exporters/html_exporter.py +33 -0
mcp_vector_search/cli/commands/visualize/exporters/json_exporter.py +33 -0
mcp_vector_search/cli/commands/visualize/graph_builder.py +647 -0
mcp_vector_search/cli/commands/visualize/layout_engine.py +469 -0
mcp_vector_search/cli/commands/visualize/server.py +600 -0
mcp_vector_search/cli/commands/visualize/state_manager.py +428 -0
mcp_vector_search/cli/commands/visualize/templates/__init__.py +16 -0
mcp_vector_search/cli/commands/visualize/templates/base.py +234 -0
mcp_vector_search/cli/commands/visualize/templates/scripts.py +4542 -0
mcp_vector_search/cli/commands/visualize/templates/styles.py +2522 -0
mcp_vector_search/cli/didyoumean.py +27 -2
mcp_vector_search/cli/main.py +127 -160
mcp_vector_search/cli/output.py +158 -13
mcp_vector_search/config/__init__.py +4 -0
mcp_vector_search/config/default_thresholds.yaml +52 -0
mcp_vector_search/config/settings.py +12 -0
mcp_vector_search/config/thresholds.py +273 -0
mcp_vector_search/core/__init__.py +16 -0
mcp_vector_search/core/auto_indexer.py +3 -3
mcp_vector_search/core/boilerplate.py +186 -0
mcp_vector_search/core/config_utils.py +394 -0
mcp_vector_search/core/database.py +406 -94
mcp_vector_search/core/embeddings.py +24 -0
mcp_vector_search/core/exceptions.py +11 -0
mcp_vector_search/core/git.py +380 -0
mcp_vector_search/core/git_hooks.py +4 -4
mcp_vector_search/core/indexer.py +632 -54
mcp_vector_search/core/llm_client.py +756 -0
mcp_vector_search/core/models.py +91 -1
mcp_vector_search/core/project.py +17 -0
mcp_vector_search/core/relationships.py +473 -0
mcp_vector_search/core/scheduler.py +11 -11
mcp_vector_search/core/search.py +179 -29
mcp_vector_search/mcp/server.py +819 -9
mcp_vector_search/parsers/python.py +285 -5
mcp_vector_search/utils/__init__.py +2 -0
mcp_vector_search/utils/gitignore.py +0 -3
mcp_vector_search/utils/gitignore_updater.py +212 -0
mcp_vector_search/utils/monorepo.py +66 -4
mcp_vector_search/utils/timing.py +10 -6
{mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.1.22.dist-info}/METADATA +184 -53
mcp_vector_search-1.1.22.dist-info/RECORD +120 -0
{mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.1.22.dist-info}/WHEEL +1 -1
{mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.1.22.dist-info}/entry_points.txt +1 -0
mcp_vector_search/cli/commands/visualize.py +0 -1467
mcp_vector_search-0.12.6.dist-info/RECORD +0 -68
{mcp_vector_search-0.12.6.dist-info → mcp_vector_search-1.1.22.dist-info}/licenses/LICENSE +0 -0

mcp_vector_search/core/database.py CHANGED Viewed

@@ -44,11 +44,14 @@ class VectorDatabase(ABC):
         ...
     @abstractmethod
-    async def add_chunks(self, chunks: list[CodeChunk]) -> None:
-        """Add code chunks to the database.
+    async def add_chunks(
+        self, chunks: list[CodeChunk], metrics: dict[str, Any] | None = None
+    ) -> None:
+        """Add code chunks to the database with optional structural metrics.
         Args:
             chunks: List of code chunks to add
+            metrics: Optional dict mapping chunk IDs to ChunkMetrics.to_metadata() dicts
         """
         ...
@@ -148,6 +151,7 @@ class ChromaVectorDatabase(VectorDatabase):
         self.collection_name = collection_name
         self._client = None
         self._collection = None
+        self._recovery_attempted = False  # Guard against infinite recursion
     async def initialize(self) -> None:
         """Initialize ChromaDB client and collection with corruption recovery."""
@@ -157,49 +161,144 @@ class ChromaVectorDatabase(VectorDatabase):
             # Ensure directory exists
             self.persist_directory.mkdir(parents=True, exist_ok=True)
-            # Check for corruption before initializing
+            # LAYER 1: Check for corruption before initializing (SQLite + HNSW checks)
             await self._detect_and_recover_corruption()
-            # Create client with new API
-            self._client = chromadb.PersistentClient(
-                path=str(self.persist_directory),
-                settings=chromadb.Settings(
-                    anonymized_telemetry=False,
-                    allow_reset=True,
-                ),
-            )
+            # LAYER 2: Wrap ChromaDB initialization with Rust panic detection
+            try:
+                # Create client with new API
+                self._client = chromadb.PersistentClient(
+                    path=str(self.persist_directory),
+                    settings=chromadb.Settings(
+                        anonymized_telemetry=False,
+                        allow_reset=True,
+                    ),
+                )
-            # Create or get collection
-            self._collection = self._client.get_or_create_collection(
-                name=self.collection_name,
-                embedding_function=self.embedding_function,
-                metadata={
-                    "description": "Semantic code search collection",
-                },
-            )
+                # Create or get collection
+                self._collection = self._client.get_or_create_collection(
+                    name=self.collection_name,
+                    embedding_function=self.embedding_function,
+                    metadata={
+                        "description": "Semantic code search collection",
+                    },
+                )
+                # Reset recovery flag on successful initialization
+                self._recovery_attempted = False
+                logger.debug(f"ChromaDB initialized at {self.persist_directory}")
+            except BaseException as init_error:
+                # Re-raise system exceptions we should never catch
+                if isinstance(
+                    init_error, KeyboardInterrupt | SystemExit | GeneratorExit
+                ):
+                    raise
+                # LAYER 2: Detect Rust panic patterns during initialization
+                error_msg = str(init_error).lower()
+                # Rust panic patterns (common ChromaDB Rust panics)
+                rust_panic_patterns = [
+                    "range start index",
+                    "out of range",
+                    "panic",
+                    "thread panicked",
+                    "slice of length",
+                    "index out of bounds",
+                ]
+                if any(pattern in error_msg for pattern in rust_panic_patterns):
+                    logger.warning(
+                        f"Rust panic detected during ChromaDB initialization: {init_error}"
+                    )
+                    logger.info(
+                        "Attempting automatic recovery from database corruption..."
+                    )
+                    await self._recover_from_corruption()
+                    # Retry initialization ONCE after recovery
+                    try:
+                        logger.info(
+                            "Retrying ChromaDB initialization after recovery..."
+                        )
+                        self._client = chromadb.PersistentClient(
+                            path=str(self.persist_directory),
+                            settings=chromadb.Settings(
+                                anonymized_telemetry=False,
+                                allow_reset=True,
+                            ),
+                        )
+                        self._collection = self._client.get_or_create_collection(
+                            name=self.collection_name,
+                            embedding_function=self.embedding_function,
+                            metadata={
+                                "description": "Semantic code search collection",
+                            },
+                        )
+                        logger.info("ChromaDB successfully initialized after recovery")
-            logger.debug(f"ChromaDB initialized at {self.persist_directory}")
+                    except BaseException as retry_error:
+                        # Re-raise system exceptions
+                        if isinstance(
+                            retry_error, KeyboardInterrupt | SystemExit | GeneratorExit
+                        ):
+                            raise
+                        logger.error(
+                            f"Failed to recover from database corruption: {retry_error}"
+                        )
+                        # Mark recovery as attempted to prevent infinite loops
+                        self._recovery_attempted = True
+                        raise DatabaseError(
+                            f"Failed to recover from database corruption. "
+                            f"Please run 'mcp-vector-search reset index' to clear the database. "
+                            f"Error: {retry_error}"
+                        ) from retry_error
+                else:
+                    # Not a Rust panic, re-raise original exception
+                    raise
+        except (DatabaseError, DatabaseInitializationError):
+            # Re-raise our own errors without re-processing
+            raise
         except Exception as e:
-            # Check if this is a corruption error
+            # Check if this is a corruption error (legacy detection for backward compatibility)
             error_msg = str(e).lower()
-            if any(
-                indicator in error_msg
-                for indicator in [
-                    "pickle",
-                    "unpickling",
-                    "eof",
-                    "ran out of input",
-                    "hnsw",
-                    "index",
-                    "deserialize",
-                    "corrupt",
-                ]
-            ):
+            corruption_indicators = [
+                "pickle",
+                "unpickling",
+                "eof",
+                "ran out of input",
+                "hnsw",
+                "index",
+                "deserialize",
+                "corrupt",
+                "file is not a database",  # SQLite corruption
+                "database error",  # ChromaDB database errors
+            ]
+            if any(indicator in error_msg for indicator in corruption_indicators):
+                # Prevent infinite recursion - only attempt recovery once
+                if self._recovery_attempted:
+                    logger.error(
+                        f"Recovery already attempted but corruption persists: {e}"
+                    )
+                    raise DatabaseInitializationError(
+                        f"Failed to recover from database corruption. "
+                        f"Please run 'mcp-vector-search reset index' to clear and rebuild the database. Error: {e}"
+                    ) from e
                 logger.warning(f"Detected index corruption: {e}")
+                self._recovery_attempted = True
                 # Try to recover
                 await self._recover_from_corruption()
-                # Retry initialization
+                # Retry initialization ONE TIME
                 await self.initialize()
             else:
                 logger.error(f"Failed to initialize ChromaDB: {e}")
@@ -245,8 +344,16 @@ class ChromaVectorDatabase(VectorDatabase):
             self._collection = None
             logger.debug("ChromaDB connections closed")
-    async def add_chunks(self, chunks: list[CodeChunk]) -> None:
-        """Add code chunks to the database."""
+    async def add_chunks(
+        self, chunks: list[CodeChunk], metrics: dict[str, Any] | None = None
+    ) -> None:
+        """Add code chunks to the database with optional structural metrics.
+        Args:
+            chunks: List of code chunks to add
+            metrics: Optional dict mapping chunk IDs to ChunkMetrics.to_metadata() dicts
+                    Example: {"chunk_id_1": {"cognitive_complexity": 5, ...}, ...}
+        """
         if not self._collection:
             raise DatabaseNotInitializedError("Database not initialized")
@@ -259,11 +366,27 @@ class ChromaVectorDatabase(VectorDatabase):
             ids = []
             for chunk in chunks:
-                # Create searchable text
-                searchable_text = self._create_searchable_text(chunk)
-                documents.append(searchable_text)
+                # Debug: Check first chunk content
+                if len(documents) == 0:
+                    import sys
+                    has_meta = "Language:" in chunk.content and "File:" in chunk.content
+                    print("\n[DATABASE] First chunk content check:", file=sys.stderr)
+                    print(f"  Type: {chunk.chunk_type}", file=sys.stderr)
+                    print(f"  File: {chunk.file_path.name}", file=sys.stderr)
+                    print(
+                        f"  Has metadata IN chunk.content: {has_meta}", file=sys.stderr
+                    )
+                    print(
+                        f"  Last 100 chars: {repr(chunk.content[-100:])}",
+                        file=sys.stderr,
+                    )
-                # Create metadata
+                # Store original content directly in documents (no metadata appended)
+                # The embedding will be created from the original content
+                documents.append(chunk.content)
+                # Create metadata (searchable fields as metadata, not appended to content)
                 metadata = {
                     "file_path": str(chunk.file_path),
                     "start_line": chunk.start_line,
@@ -288,6 +411,12 @@ class ChromaVectorDatabase(VectorDatabase):
                     "subproject_name": chunk.subproject_name or "",
                     "subproject_path": chunk.subproject_path or "",
                 }
+                # Merge structural metrics if provided
+                if metrics and chunk.chunk_id and chunk.chunk_id in metrics:
+                    chunk_metrics = metrics[chunk.chunk_id]
+                    metadata.update(chunk_metrics)
                 metadatas.append(metadata)
                 # Use chunk ID
@@ -347,6 +476,34 @@ class ChromaVectorDatabase(VectorDatabase):
                     similarity = max(0.0, 1.0 / (1.0 + distance))
                     if similarity >= similarity_threshold:
+                        # Document contains the original content (no metadata appended)
+                        # Parse code smells from JSON if present
+                        code_smells = []
+                        if "code_smells" in metadata:
+                            try:
+                                code_smells = json.loads(metadata["code_smells"])
+                            except (json.JSONDecodeError, TypeError):
+                                code_smells = []
+                        # Calculate quality score from metrics (0-100 scale)
+                        quality_score = None
+                        if (
+                            "cognitive_complexity" in metadata
+                            and "smell_count" in metadata
+                        ):
+                            # Simple quality score: penalize complexity and smells
+                            complexity = metadata["cognitive_complexity"]
+                            smells = metadata["smell_count"]
+                            # Start with 100, penalize for complexity and smells
+                            score = 100
+                            # Complexity penalty: -2 points per complexity unit
+                            score -= min(50, complexity * 2)
+                            # Smell penalty: -10 points per smell
+                            score -= min(30, smells * 10)
+                            quality_score = max(0, score)
                         result = SearchResult(
                             content=doc,
                             file_path=Path(metadata["file_path"]),
@@ -358,6 +515,16 @@ class ChromaVectorDatabase(VectorDatabase):
                             chunk_type=metadata.get("chunk_type", "code"),
                             function_name=metadata.get("function_name") or None,
                             class_name=metadata.get("class_name") or None,
+                            # Quality metrics from structural analysis
+                            cognitive_complexity=metadata.get("cognitive_complexity"),
+                            cyclomatic_complexity=metadata.get("cyclomatic_complexity"),
+                            max_nesting_depth=metadata.get("max_nesting_depth"),
+                            parameter_count=metadata.get("parameter_count"),
+                            lines_of_code=metadata.get("lines_of_code"),
+                            complexity_grade=metadata.get("complexity_grade"),
+                            code_smells=code_smells,
+                            smell_count=metadata.get("smell_count"),
+                            quality_score=quality_score,
                         )
                         search_results.append(result)
@@ -507,6 +674,7 @@ class ChromaVectorDatabase(VectorDatabase):
             if results and results.get("ids"):
                 for i, _chunk_id in enumerate(results["ids"]):
                     metadata = results["metadatas"][i]
+                    # Document now contains the original content (no metadata appended)
                     content = results["documents"][i]
                     # Parse JSON strings back to lists/dicts
@@ -560,6 +728,9 @@ class ChromaVectorDatabase(VectorDatabase):
     def _create_searchable_text(self, chunk: CodeChunk) -> str:
         """Create optimized searchable text from code chunk."""
+        import sys
+        print("WARNING: _create_searchable_text IS BEING CALLED!", file=sys.stderr)
         parts = [chunk.content]
         # Add contextual information
@@ -579,7 +750,24 @@ class ChromaVectorDatabase(VectorDatabase):
         return "\n".join(parts)
     def _build_where_clause(self, filters: dict[str, Any]) -> dict[str, Any]:
-        """Build ChromaDB where clause from filters."""
+        """Build ChromaDB where clause from filters.
+        Supports filtering by:
+        - language, file_path, chunk_type (standard fields)
+        - complexity_grade (A, B, C, D, F)
+        - smell_count (0, >0)
+        - cognitive_complexity (range queries using $and)
+        Args:
+            filters: Dictionary of filter criteria
+        Returns:
+            ChromaDB where clause
+        """
+        # If filters already contain ChromaDB operators ($and, $or), pass through
+        if "$and" in filters or "$or" in filters:
+            return filters
         where = {}
         for key, value in filters.items():
@@ -587,46 +775,140 @@ class ChromaVectorDatabase(VectorDatabase):
                 where[key] = {"$in": value}
             elif isinstance(value, str) and value.startswith("!"):
                 where[key] = {"$ne": value[1:]}
+            elif isinstance(value, dict):
+                # Support operator queries like {"$gte": 10}
+                where[key] = value
             else:
                 where[key] = value
         return where
     async def _detect_and_recover_corruption(self) -> None:
-        """Detect and recover from index corruption proactively."""
-        # Check for common corruption indicators in ChromaDB files
+        """Detect and recover from index corruption proactively.
+        This method checks for:
+        1. SQLite database corruption (LAYER 1: Pre-initialization check)
+        2. HNSW pickle file corruption
+        3. Metadata/data inconsistencies
+        4. File size anomalies
+        """
+        # LAYER 1: Check SQLite database integrity FIRST (before ChromaDB initialization)
         chroma_db_path = self.persist_directory / "chroma.sqlite3"
         # If database doesn't exist yet, nothing to check
         if not chroma_db_path.exists():
             return
+        # SQLite integrity check - catches corruption BEFORE Rust panic
+        try:
+            import sqlite3
+            logger.debug("Running SQLite integrity check...")
+            conn = sqlite3.connect(str(chroma_db_path))
+            cursor = conn.execute("PRAGMA quick_check")
+            result = cursor.fetchone()[0]
+            conn.close()
+            if result != "ok":
+                logger.warning(f"SQLite database corruption detected: {result}")
+                logger.info("Initiating automatic recovery from database corruption...")
+                await self._recover_from_corruption()
+                return
+            logger.debug("SQLite integrity check passed")
+        except sqlite3.Error as e:
+            logger.warning(f"SQLite database error during integrity check: {e}")
+            logger.info("Initiating automatic recovery from database corruption...")
+            await self._recover_from_corruption()
+            return
         # Check for HNSW index files that might be corrupted
-        self.persist_directory / "chroma-collections.parquet"
         index_path = self.persist_directory / "index"
         if index_path.exists():
-            # Look for pickle files in the index
+            # Look for pickle files in the index (HNSW metadata)
             pickle_files = list(index_path.glob("**/*.pkl"))
             pickle_files.extend(list(index_path.glob("**/*.pickle")))
+            pickle_files.extend(list(index_path.glob("**/*.bin")))  # Binary HNSW files
+            logger.debug(
+                f"Checking {len(pickle_files)} HNSW index files for corruption..."
+            )
             for pickle_file in pickle_files:
                 try:
-                    # Try to read the pickle file to detect corruption
-                    import pickle
-                    with open(pickle_file, "rb") as f:
-                        pickle.load(f)
-                except (EOFError, pickle.UnpicklingError, Exception) as e:
-                    logger.warning(
-                        f"Corrupted index file detected: {pickle_file} - {e}"
-                    )
+                    # Check file size - suspiciously small files might be corrupted
+                    file_size = pickle_file.stat().st_size
+                    if file_size == 0:
+                        logger.warning(
+                            f"Empty HNSW index file detected: {pickle_file} (0 bytes)"
+                        )
+                        await self._recover_from_corruption()
+                        return
+                    # Only validate pickle files (not binary .bin files)
+                    if pickle_file.suffix in (".pkl", ".pickle"):
+                        # Try to read the pickle file to detect corruption
+                        import pickle  # nosec B403 # Trusted internal index files only
+                        with open(pickle_file, "rb") as f:
+                            data = pickle.load(f)  # nosec B301 # Trusted internal index files only
+                            # Additional validation: check if data structure is valid
+                            if data is None:
+                                logger.warning(
+                                    f"HNSW index file contains None data: {pickle_file}"
+                                )
+                                await self._recover_from_corruption()
+                                return
+                            # Check for metadata consistency (if it's a dict)
+                            if isinstance(data, dict):
+                                # Look for known metadata keys that should exist
+                                if "space" in data and "dim" in data:
+                                    # Validate dimensions are reasonable
+                                    if data.get("dim", 0) <= 0:
+                                        logger.warning(
+                                            f"Invalid dimensions in HNSW index: {pickle_file} (dim={data.get('dim')})"
+                                        )
+                                        await self._recover_from_corruption()
+                                        return
+                except (EOFError, pickle.UnpicklingError) as e:
+                    logger.warning(f"Pickle corruption detected in {pickle_file}: {e}")
                     await self._recover_from_corruption()
                     return
+                except Exception as e:
+                    # Check if this is a Rust panic pattern
+                    error_msg = str(e).lower()
+                    if "range start index" in error_msg and "out of range" in error_msg:
+                        logger.warning(
+                            f"Rust panic pattern detected in {pickle_file}: {e}"
+                        )
+                        await self._recover_from_corruption()
+                        return
+                    else:
+                        logger.warning(
+                            f"Error reading HNSW index file {pickle_file}: {e}"
+                        )
+                        # Continue checking other files before deciding to recover
+                        continue
+            logger.debug("HNSW index files validation passed")
     async def _recover_from_corruption(self) -> None:
-        """Recover from index corruption by rebuilding the index."""
-        logger.info("Attempting to recover from index corruption...")
+        """Recover from index corruption by rebuilding the index.
+        This method:
+        1. Creates a timestamped backup of the corrupted index
+        2. Clears the corrupted index directory
+        3. Recreates the directory structure
+        4. Logs detailed recovery steps and instructions
+        """
+        logger.warning("=" * 80)
+        logger.warning("INDEX CORRUPTION DETECTED - Initiating recovery...")
+        logger.warning("=" * 80)
         # Create backup directory
         backup_dir = (
@@ -634,7 +916,7 @@ class ChromaVectorDatabase(VectorDatabase):
         )
         backup_dir.mkdir(exist_ok=True)
-        # Backup current state (in case we need it)
+        # Backup current state (in case we need it for debugging)
         import time
         timestamp = int(time.time())
@@ -643,24 +925,41 @@ class ChromaVectorDatabase(VectorDatabase):
         if self.persist_directory.exists():
             try:
                 shutil.copytree(self.persist_directory, backup_path)
-                logger.info(f"Created backup at {backup_path}")
+                logger.info(f"✓ Created backup at {backup_path}")
             except Exception as e:
-                logger.warning(f"Could not create backup: {e}")
+                logger.warning(f"⚠ Could not create backup: {e}")
         # Clear the corrupted index
         if self.persist_directory.exists():
             try:
+                # Log what we're about to delete
+                total_size = sum(
+                    f.stat().st_size
+                    for f in self.persist_directory.rglob("*")
+                    if f.is_file()
+                )
+                logger.info(
+                    f"Clearing corrupted index ({total_size / 1024 / 1024:.2f} MB)..."
+                )
                 shutil.rmtree(self.persist_directory)
-                logger.info(f"Cleared corrupted index at {self.persist_directory}")
+                logger.info(f"✓ Cleared corrupted index at {self.persist_directory}")
             except Exception as e:
-                logger.error(f"Failed to clear corrupted index: {e}")
+                logger.error(f"✗ Failed to clear corrupted index: {e}")
                 raise IndexCorruptionError(
-                    f"Could not clear corrupted index: {e}"
+                    f"Could not clear corrupted index: {e}. "
+                    f"Please manually delete {self.persist_directory} and try again."
                 ) from e
         # Recreate the directory
         self.persist_directory.mkdir(parents=True, exist_ok=True)
-        logger.info("Index directory recreated. Please re-index your codebase.")
+        logger.info("✓ Index directory recreated")
+        logger.warning("=" * 80)
+        logger.warning("RECOVERY COMPLETE - Next steps:")
+        logger.warning("  1. Run 'mcp-vector-search index' to rebuild the index")
+        logger.warning(f"  2. Backup saved to: {backup_path}")
+        logger.warning("=" * 80)
     async def health_check(self) -> bool:
         """Check database health and integrity.
@@ -762,8 +1061,15 @@ class PooledChromaVectorDatabase(VectorDatabase):
         await self._pool.close()
         logger.debug("Pooled ChromaDB connections closed")
-    async def add_chunks(self, chunks: list[CodeChunk]) -> None:
-        """Add code chunks to the database using pooled connection."""
+    async def add_chunks(
+        self, chunks: list[CodeChunk], metrics: dict[str, Any] | None = None
+    ) -> None:
+        """Add code chunks to the database using pooled connection with optional metrics.
+        Args:
+            chunks: List of code chunks to add
+            metrics: Optional dict mapping chunk IDs to ChunkMetrics.to_metadata() dicts
+        """
         if not chunks:
             return
@@ -779,35 +1085,40 @@ class PooledChromaVectorDatabase(VectorDatabase):
                 ids = []
                 for chunk in chunks:
+                    # Store original content in documents (no metadata appended)
                     documents.append(chunk.content)
-                    metadatas.append(
-                        {
-                            "file_path": str(chunk.file_path),
-                            "start_line": chunk.start_line,
-                            "end_line": chunk.end_line,
-                            "language": chunk.language,
-                            "chunk_type": chunk.chunk_type,
-                            "function_name": chunk.function_name or "",
-                            "class_name": chunk.class_name or "",
-                            "docstring": chunk.docstring or "",
-                            "complexity_score": chunk.complexity_score,
-                            # Hierarchy fields (convert lists to JSON strings for ChromaDB)
-                            "chunk_id": chunk.chunk_id or "",
-                            "parent_chunk_id": chunk.parent_chunk_id or "",
-                            "child_chunk_ids": json.dumps(chunk.child_chunk_ids or []),
-                            "chunk_depth": chunk.chunk_depth,
-                            # Additional metadata (convert lists/dicts to JSON strings)
-                            "decorators": json.dumps(chunk.decorators or []),
-                            "parameters": json.dumps(chunk.parameters or []),
-                            "return_type": chunk.return_type or "",
-                            "type_annotations": json.dumps(
-                                chunk.type_annotations or {}
-                            ),
-                            # Monorepo support
-                            "subproject_name": chunk.subproject_name or "",
-                            "subproject_path": chunk.subproject_path or "",
-                        }
-                    )
+                    metadata = {
+                        "file_path": str(chunk.file_path),
+                        "start_line": chunk.start_line,
+                        "end_line": chunk.end_line,
+                        "language": chunk.language,
+                        "chunk_type": chunk.chunk_type,
+                        "function_name": chunk.function_name or "",
+                        "class_name": chunk.class_name or "",
+                        "docstring": chunk.docstring or "",
+                        "complexity_score": chunk.complexity_score,
+                        # Hierarchy fields (convert lists to JSON strings for ChromaDB)
+                        "chunk_id": chunk.chunk_id or "",
+                        "parent_chunk_id": chunk.parent_chunk_id or "",
+                        "child_chunk_ids": json.dumps(chunk.child_chunk_ids or []),
+                        "chunk_depth": chunk.chunk_depth,
+                        # Additional metadata (convert lists/dicts to JSON strings)
+                        "decorators": json.dumps(chunk.decorators or []),
+                        "parameters": json.dumps(chunk.parameters or []),
+                        "return_type": chunk.return_type or "",
+                        "type_annotations": json.dumps(chunk.type_annotations or {}),
+                        # Monorepo support
+                        "subproject_name": chunk.subproject_name or "",
+                        "subproject_path": chunk.subproject_path or "",
+                    }
+                    # Merge structural metrics if provided
+                    if metrics and chunk.chunk_id and chunk.chunk_id in metrics:
+                        chunk_metrics = metrics[chunk.chunk_id]
+                        metadata.update(chunk_metrics)
+                    metadatas.append(metadata)
                     ids.append(chunk.id)
                 # Add to collection
@@ -862,6 +1173,7 @@ class PooledChromaVectorDatabase(VectorDatabase):
                         similarity = max(0.0, 1.0 / (1.0 + distance))
                         if similarity >= similarity_threshold:
+                            # Document contains the original content (no metadata appended)
                             result = SearchResult(
                                 content=doc,
                                 file_path=Path(metadata["file_path"]),

mcp-vector-search 0.12.6__py3-none-any.whl → 1.1.22__py3-none-any.whl

mcp-vector-search 0.12.6py3-none-any.whl → 1.1.22py3-none-any.whl