PyPI - mcp-vector-search - Versions diffs - 0.4.14__py3-none-any.whl → 0.5.1__py3-none-any.whl - Mend

mcp-vector-search 0.4.14py3-none-any.whl → 0.5.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mcp-vector-search might be problematic. Click here for more details.

Files changed (30) hide show

mcp_vector_search/__init__.py +2 -2
mcp_vector_search/cli/commands/index.py +73 -31
mcp_vector_search/cli/commands/init.py +189 -113
mcp_vector_search/cli/commands/install.py +525 -113
mcp_vector_search/cli/commands/mcp.py +201 -151
mcp_vector_search/cli/commands/reset.py +41 -41
mcp_vector_search/cli/commands/search.py +73 -14
mcp_vector_search/cli/commands/status.py +51 -17
mcp_vector_search/cli/didyoumean.py +254 -246
mcp_vector_search/cli/main.py +114 -43
mcp_vector_search/cli/output.py +152 -0
mcp_vector_search/cli/suggestions.py +246 -197
mcp_vector_search/core/database.py +81 -49
mcp_vector_search/core/indexer.py +10 -4
mcp_vector_search/core/search.py +17 -6
mcp_vector_search/mcp/__main__.py +1 -1
mcp_vector_search/mcp/server.py +211 -203
mcp_vector_search/parsers/__init__.py +7 -0
mcp_vector_search/parsers/dart.py +605 -0
mcp_vector_search/parsers/html.py +413 -0
mcp_vector_search/parsers/php.py +694 -0
mcp_vector_search/parsers/registry.py +21 -1
mcp_vector_search/parsers/ruby.py +678 -0
mcp_vector_search/parsers/text.py +32 -26
mcp_vector_search/utils/gitignore.py +72 -71
{mcp_vector_search-0.4.14.dist-info → mcp_vector_search-0.5.1.dist-info}/METADATA +76 -5
{mcp_vector_search-0.4.14.dist-info → mcp_vector_search-0.5.1.dist-info}/RECORD +30 -26
{mcp_vector_search-0.4.14.dist-info → mcp_vector_search-0.5.1.dist-info}/WHEEL +0 -0
{mcp_vector_search-0.4.14.dist-info → mcp_vector_search-0.5.1.dist-info}/entry_points.txt +0 -0
{mcp_vector_search-0.4.14.dist-info → mcp_vector_search-0.5.1.dist-info}/licenses/LICENSE +0 -0

mcp_vector_search/core/database.py CHANGED Viewed

@@ -100,7 +100,7 @@ class VectorDatabase(ABC):
     @abstractmethod
     async def health_check(self) -> bool:
         """Check database health and integrity.
         Returns:
             True if database is healthy, False otherwise
         """
@@ -145,7 +145,7 @@ class ChromaVectorDatabase(VectorDatabase):
             # Ensure directory exists
             self.persist_directory.mkdir(parents=True, exist_ok=True)
             # Check for corruption before initializing
             await self._detect_and_recover_corruption()
@@ -172,10 +172,19 @@ class ChromaVectorDatabase(VectorDatabase):
         except Exception as e:
             # Check if this is a corruption error
             error_msg = str(e).lower()
-            if any(indicator in error_msg for indicator in [
-                "pickle", "unpickling", "eof", "ran out of input",
-                "hnsw", "index", "deserialize", "corrupt"
-            ]):
+            if any(
+                indicator in error_msg
+                for indicator in [
+                    "pickle",
+                    "unpickling",
+                    "eof",
+                    "ran out of input",
+                    "hnsw",
+                    "index",
+                    "deserialize",
+                    "corrupt",
+                ]
+            ):
                 logger.warning(f"Detected index corruption: {e}")
                 # Try to recover
                 await self._recover_from_corruption()
@@ -468,51 +477,57 @@ class ChromaVectorDatabase(VectorDatabase):
         """Detect and recover from index corruption proactively."""
         # Check for common corruption indicators in ChromaDB files
         chroma_db_path = self.persist_directory / "chroma.sqlite3"
         # If database doesn't exist yet, nothing to check
         if not chroma_db_path.exists():
             return
         # Check for HNSW index files that might be corrupted
         collection_path = self.persist_directory / "chroma-collections.parquet"
         index_path = self.persist_directory / "index"
         if index_path.exists():
             # Look for pickle files in the index
             pickle_files = list(index_path.glob("**/*.pkl"))
             pickle_files.extend(list(index_path.glob("**/*.pickle")))
             for pickle_file in pickle_files:
                 try:
                     # Try to read the pickle file to detect corruption
                     import pickle
-                    with open(pickle_file, 'rb') as f:
+                    with open(pickle_file, "rb") as f:
                         pickle.load(f)
                 except (EOFError, pickle.UnpicklingError, Exception) as e:
-                    logger.warning(f"Corrupted index file detected: {pickle_file} - {e}")
+                    logger.warning(
+                        f"Corrupted index file detected: {pickle_file} - {e}"
+                    )
                     await self._recover_from_corruption()
                     return
     async def _recover_from_corruption(self) -> None:
         """Recover from index corruption by rebuilding the index."""
         logger.info("Attempting to recover from index corruption...")
         # Create backup directory
-        backup_dir = self.persist_directory.parent / f"{self.persist_directory.name}_backup"
+        backup_dir = (
+            self.persist_directory.parent / f"{self.persist_directory.name}_backup"
+        )
         backup_dir.mkdir(exist_ok=True)
         # Backup current state (in case we need it)
         import time
         timestamp = int(time.time())
         backup_path = backup_dir / f"backup_{timestamp}"
         if self.persist_directory.exists():
             try:
                 shutil.copytree(self.persist_directory, backup_path)
                 logger.info(f"Created backup at {backup_path}")
             except Exception as e:
                 logger.warning(f"Could not create backup: {e}")
         # Clear the corrupted index
         if self.persist_directory.exists():
             try:
@@ -523,14 +538,14 @@ class ChromaVectorDatabase(VectorDatabase):
                 raise IndexCorruptionError(
                     f"Could not clear corrupted index: {e}"
                 ) from e
         # Recreate the directory
         self.persist_directory.mkdir(parents=True, exist_ok=True)
         logger.info("Index directory recreated. Please re-index your codebase.")
     async def health_check(self) -> bool:
         """Check database health and integrity.
         Returns:
             True if database is healthy, False otherwise
         """
@@ -539,35 +554,42 @@ class ChromaVectorDatabase(VectorDatabase):
             if not self._client or not self._collection:
                 logger.warning("Database not initialized")
                 return False
             # Try a simple operation to test the connection
             try:
                 # Attempt to get count - this will fail if index is corrupted
                 count = self._collection.count()
                 logger.debug(f"Health check passed: {count} chunks in database")
                 # Try a minimal query to ensure search works
                 self._collection.query(
-                    query_texts=["test"],
-                    n_results=1,
-                    include=["metadatas"]
+                    query_texts=["test"], n_results=1, include=["metadatas"]
                 )
                 return True
             except Exception as e:
                 error_msg = str(e).lower()
-                if any(indicator in error_msg for indicator in [
-                    "pickle", "unpickling", "eof", "ran out of input",
-                    "hnsw", "index", "deserialize", "corrupt"
-                ]):
+                if any(
+                    indicator in error_msg
+                    for indicator in [
+                        "pickle",
+                        "unpickling",
+                        "eof",
+                        "ran out of input",
+                        "hnsw",
+                        "index",
+                        "deserialize",
+                        "corrupt",
+                    ]
+                ):
                     logger.error(f"Index corruption detected during health check: {e}")
                     return False
                 else:
                     # Some other error
                     logger.warning(f"Health check failed: {e}")
                     return False
         except Exception as e:
             logger.error(f"Health check error: {e}")
             return False
@@ -793,7 +815,7 @@ class PooledChromaVectorDatabase(VectorDatabase):
                     file_types=file_type_counts,
                     index_size_mb=index_size_mb,
                     last_updated="unknown",  # ChromaDB doesn't track this
-                    embedding_model="unknown"  # TODO: Track this in metadata
+                    embedding_model="unknown",  # TODO: Track this in metadata
                 )
         except Exception as e:
@@ -868,24 +890,31 @@ class PooledChromaVectorDatabase(VectorDatabase):
             pool_healthy = await self._pool.health_check()
             if not pool_healthy:
                 return False
             # Try a simple query to verify database integrity
             try:
                 async with self._pool.get_connection() as conn:
                     # Test basic operations
                     conn.collection.count()
                     conn.collection.query(
-                        query_texts=["test"],
-                        n_results=1,
-                        include=["metadatas"]
+                        query_texts=["test"], n_results=1, include=["metadatas"]
                     )
                 return True
             except Exception as e:
                 error_msg = str(e).lower()
-                if any(indicator in error_msg for indicator in [
-                    "pickle", "unpickling", "eof", "ran out of input",
-                    "hnsw", "index", "deserialize", "corrupt"
-                ]):
+                if any(
+                    indicator in error_msg
+                    for indicator in [
+                        "pickle",
+                        "unpickling",
+                        "eof",
+                        "ran out of input",
+                        "hnsw",
+                        "index",
+                        "deserialize",
+                        "corrupt",
+                    ]
+                ):
                     logger.error(f"Index corruption detected: {e}")
                     # Attempt recovery
                     await self._recover_from_corruption()
@@ -896,30 +925,33 @@ class PooledChromaVectorDatabase(VectorDatabase):
         except Exception as e:
             logger.error(f"Health check error: {e}")
             return False
     async def _recover_from_corruption(self) -> None:
         """Recover from index corruption by rebuilding the index."""
         logger.info("Attempting to recover from index corruption...")
         # Close the pool first
         await self._pool.close()
         # Create backup directory
-        backup_dir = self.persist_directory.parent / f"{self.persist_directory.name}_backup"
+        backup_dir = (
+            self.persist_directory.parent / f"{self.persist_directory.name}_backup"
+        )
         backup_dir.mkdir(exist_ok=True)
         # Backup current state
         import time
         timestamp = int(time.time())
         backup_path = backup_dir / f"backup_{timestamp}"
         if self.persist_directory.exists():
             try:
                 shutil.copytree(self.persist_directory, backup_path)
                 logger.info(f"Created backup at {backup_path}")
             except Exception as e:
                 logger.warning(f"Could not create backup: {e}")
         # Clear the corrupted index
         if self.persist_directory.exists():
             try:
@@ -930,10 +962,10 @@ class PooledChromaVectorDatabase(VectorDatabase):
                 raise IndexCorruptionError(
                     f"Could not clear corrupted index: {e}"
                 ) from e
         # Recreate the directory
         self.persist_directory.mkdir(parents=True, exist_ok=True)
         # Reinitialize the pool
         await self._pool.initialize()
         logger.info("Index recovered. Please re-index your codebase.")

mcp_vector_search/core/indexer.py CHANGED Viewed

@@ -9,7 +9,7 @@ from loguru import logger
 from ..config.defaults import DEFAULT_IGNORE_PATTERNS
 from ..parsers.registry import get_parser_registry
-from ..utils.gitignore import create_gitignore_parser, GitignoreParser
+from ..utils.gitignore import create_gitignore_parser
 from .database import VectorDatabase
 from .exceptions import ParsingError
 from .models import CodeChunk
@@ -51,7 +51,9 @@ class SemanticIndexer:
         # Initialize gitignore parser
         try:
             self.gitignore_parser = create_gitignore_parser(project_root)
-            logger.debug(f"Loaded {len(self.gitignore_parser.patterns)} gitignore patterns")
+            logger.debug(
+                f"Loaded {len(self.gitignore_parser.patterns)} gitignore patterns"
+            )
         except Exception as e:
             logger.warning(f"Failed to load gitignore patterns: {e}")
             self.gitignore_parser = None
@@ -376,14 +378,18 @@ class SemanticIndexer:
             # Check each part of the path against default ignore patterns
             for part in relative_path.parts:
                 if part in self._ignore_patterns:
-                    logger.debug(f"Path ignored by default pattern '{part}': {file_path}")
+                    logger.debug(
+                        f"Path ignored by default pattern '{part}': {file_path}"
+                    )
                     return True
             # Check if any parent directory should be ignored
             for parent in relative_path.parents:
                 for part in parent.parts:
                     if part in self._ignore_patterns:
-                        logger.debug(f"Path ignored by parent pattern '{part}': {file_path}")
+                        logger.debug(
+                            f"Path ignored by parent pattern '{part}': {file_path}"
+                        )
                         return True
             return False

mcp_vector_search/core/search.py CHANGED Viewed

@@ -68,7 +68,7 @@ class SemanticSearchEngine:
         # Health check before search
         try:
-            if hasattr(self.database, 'health_check'):
+            if hasattr(self.database, "health_check"):
                 is_healthy = await self.database.health_check()
                 if not is_healthy:
                     logger.warning("Database health check failed - attempting recovery")
@@ -118,12 +118,23 @@ class SemanticSearchEngine:
         except Exception as e:
             error_msg = str(e).lower()
             # Check for corruption indicators
-            if any(indicator in error_msg for indicator in [
-                "pickle", "unpickling", "eof", "ran out of input",
-                "hnsw", "index", "deserialize", "corrupt"
-            ]):
+            if any(
+                indicator in error_msg
+                for indicator in [
+                    "pickle",
+                    "unpickling",
+                    "eof",
+                    "ran out of input",
+                    "hnsw",
+                    "index",
+                    "deserialize",
+                    "corrupt",
+                ]
+            ):
                 logger.error(f"Index corruption detected during search: {e}")
-                logger.info("The index appears to be corrupted. Please run 'mcp-vector-search reset' to clear the index and then 'mcp-vector-search index' to rebuild it.")
+                logger.info(
+                    "The index appears to be corrupted. Please run 'mcp-vector-search reset' to clear the index and then 'mcp-vector-search index' to rebuild it."
+                )
                 raise SearchError(
                     "Index corruption detected. Please run 'mcp-vector-search reset' followed by 'mcp-vector-search index' to rebuild."
                 ) from e

mcp_vector_search/mcp/__main__.py CHANGED Viewed

@@ -11,7 +11,7 @@ def main():
     """Main entry point for the MCP server."""
     # Allow specifying project root as command line argument
     project_root = Path(sys.argv[1]) if len(sys.argv) > 1 else None
     try:
         asyncio.run(run_mcp_server(project_root))
     except KeyboardInterrupt:

mcp-vector-search 0.4.14__py3-none-any.whl → 0.5.1__py3-none-any.whl

Potentially problematic release.

mcp-vector-search 0.4.14py3-none-any.whl → 0.5.1py3-none-any.whl