PyPI - mcp-vector-search - Versions diffs - 0.15.7__py3-none-any.whl - Mend

mcp-vector-search 0.15.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mcp-vector-search might be problematic. Click here for more details.

Files changed (86) hide show

mcp_vector_search/__init__.py +10 -0
mcp_vector_search/cli/__init__.py +1 -0
mcp_vector_search/cli/commands/__init__.py +1 -0
mcp_vector_search/cli/commands/auto_index.py +397 -0
mcp_vector_search/cli/commands/chat.py +534 -0
mcp_vector_search/cli/commands/config.py +393 -0
mcp_vector_search/cli/commands/demo.py +358 -0
mcp_vector_search/cli/commands/index.py +762 -0
mcp_vector_search/cli/commands/init.py +658 -0
mcp_vector_search/cli/commands/install.py +869 -0
mcp_vector_search/cli/commands/install_old.py +700 -0
mcp_vector_search/cli/commands/mcp.py +1254 -0
mcp_vector_search/cli/commands/reset.py +393 -0
mcp_vector_search/cli/commands/search.py +796 -0
mcp_vector_search/cli/commands/setup.py +1133 -0
mcp_vector_search/cli/commands/status.py +584 -0
mcp_vector_search/cli/commands/uninstall.py +404 -0
mcp_vector_search/cli/commands/visualize/__init__.py +39 -0
mcp_vector_search/cli/commands/visualize/cli.py +265 -0
mcp_vector_search/cli/commands/visualize/exporters/__init__.py +12 -0
mcp_vector_search/cli/commands/visualize/exporters/html_exporter.py +33 -0
mcp_vector_search/cli/commands/visualize/exporters/json_exporter.py +29 -0
mcp_vector_search/cli/commands/visualize/graph_builder.py +709 -0
mcp_vector_search/cli/commands/visualize/layout_engine.py +469 -0
mcp_vector_search/cli/commands/visualize/server.py +201 -0
mcp_vector_search/cli/commands/visualize/state_manager.py +428 -0
mcp_vector_search/cli/commands/visualize/templates/__init__.py +16 -0
mcp_vector_search/cli/commands/visualize/templates/base.py +218 -0
mcp_vector_search/cli/commands/visualize/templates/scripts.py +3670 -0
mcp_vector_search/cli/commands/visualize/templates/styles.py +779 -0
mcp_vector_search/cli/commands/visualize.py.original +2536 -0
mcp_vector_search/cli/commands/watch.py +287 -0
mcp_vector_search/cli/didyoumean.py +520 -0
mcp_vector_search/cli/export.py +320 -0
mcp_vector_search/cli/history.py +295 -0
mcp_vector_search/cli/interactive.py +342 -0
mcp_vector_search/cli/main.py +484 -0
mcp_vector_search/cli/output.py +414 -0
mcp_vector_search/cli/suggestions.py +375 -0
mcp_vector_search/config/__init__.py +1 -0
mcp_vector_search/config/constants.py +24 -0
mcp_vector_search/config/defaults.py +200 -0
mcp_vector_search/config/settings.py +146 -0
mcp_vector_search/core/__init__.py +1 -0
mcp_vector_search/core/auto_indexer.py +298 -0
mcp_vector_search/core/config_utils.py +394 -0
mcp_vector_search/core/connection_pool.py +360 -0
mcp_vector_search/core/database.py +1237 -0
mcp_vector_search/core/directory_index.py +318 -0
mcp_vector_search/core/embeddings.py +294 -0
mcp_vector_search/core/exceptions.py +89 -0
mcp_vector_search/core/factory.py +318 -0
mcp_vector_search/core/git_hooks.py +345 -0
mcp_vector_search/core/indexer.py +1002 -0
mcp_vector_search/core/llm_client.py +453 -0
mcp_vector_search/core/models.py +294 -0
mcp_vector_search/core/project.py +350 -0
mcp_vector_search/core/scheduler.py +330 -0
mcp_vector_search/core/search.py +952 -0
mcp_vector_search/core/watcher.py +322 -0
mcp_vector_search/mcp/__init__.py +5 -0
mcp_vector_search/mcp/__main__.py +25 -0
mcp_vector_search/mcp/server.py +752 -0
mcp_vector_search/parsers/__init__.py +8 -0
mcp_vector_search/parsers/base.py +296 -0
mcp_vector_search/parsers/dart.py +605 -0
mcp_vector_search/parsers/html.py +413 -0
mcp_vector_search/parsers/javascript.py +643 -0
mcp_vector_search/parsers/php.py +694 -0
mcp_vector_search/parsers/python.py +502 -0
mcp_vector_search/parsers/registry.py +223 -0
mcp_vector_search/parsers/ruby.py +678 -0
mcp_vector_search/parsers/text.py +186 -0
mcp_vector_search/parsers/utils.py +265 -0
mcp_vector_search/py.typed +1 -0
mcp_vector_search/utils/__init__.py +42 -0
mcp_vector_search/utils/gitignore.py +250 -0
mcp_vector_search/utils/gitignore_updater.py +212 -0
mcp_vector_search/utils/monorepo.py +339 -0
mcp_vector_search/utils/timing.py +338 -0
mcp_vector_search/utils/version.py +47 -0
mcp_vector_search-0.15.7.dist-info/METADATA +884 -0
mcp_vector_search-0.15.7.dist-info/RECORD +86 -0
mcp_vector_search-0.15.7.dist-info/WHEEL +4 -0
mcp_vector_search-0.15.7.dist-info/entry_points.txt +3 -0
mcp_vector_search-0.15.7.dist-info/licenses/LICENSE +21 -0

mcp_vector_search/config/settings.py ADDED Viewed

@@ -0,0 +1,146 @@
+"""Pydantic configuration schemas for MCP Vector Search."""
+from pathlib import Path
+from pydantic import Field, field_validator
+from pydantic_settings import BaseSettings
+class ProjectConfig(BaseSettings):
+    """Type-safe project configuration with validation."""
+    project_root: Path = Field(..., description="Project root directory")
+    index_path: Path = Field(
+        default=".mcp-vector-search", description="Index storage path"
+    )
+    file_extensions: list[str] = Field(
+        default=[".py", ".js", ".ts", ".jsx", ".tsx"],
+        description="File extensions to index",
+    )
+    embedding_model: str = Field(
+        default="sentence-transformers/all-MiniLM-L6-v2",
+        description="Embedding model name",
+    )
+    similarity_threshold: float = Field(
+        default=0.3, ge=0.0, le=1.0, description="Similarity threshold"
+    )
+    max_chunk_size: int = Field(
+        default=512, gt=0, description="Maximum chunk size in tokens"
+    )
+    languages: list[str] = Field(
+        default=[], description="Detected programming languages"
+    )
+    watch_files: bool = Field(
+        default=False, description="Enable file watching for incremental updates"
+    )
+    cache_embeddings: bool = Field(default=True, description="Enable embedding caching")
+    max_cache_size: int = Field(
+        default=1000, gt=0, description="Maximum number of cached embeddings"
+    )
+    auto_reindex_on_upgrade: bool = Field(
+        default=True,
+        description="Automatically reindex when tool version is upgraded (minor/major versions)",
+    )
+    skip_dotfiles: bool = Field(
+        default=True,
+        description="Skip files and directories starting with '.' (except whitelisted ones)",
+    )
+    respect_gitignore: bool = Field(
+        default=True,
+        description="Respect .gitignore patterns when indexing files",
+    )
+    openrouter_api_key: str | None = Field(
+        default=None,
+        description="OpenRouter API key for chat command (optional, can also use env var)",
+    )
+    openai_api_key: str | None = Field(
+        default=None,
+        description="OpenAI API key for chat command (optional, can also use env var)",
+    )
+    preferred_llm_provider: str | None = Field(
+        default=None,
+        description="Preferred LLM provider: 'openai' or 'openrouter' (auto-detect if not set)",
+    )
+    @field_validator("project_root", "index_path", mode="before")
+    @classmethod
+    def validate_paths(cls, v: Path) -> Path:
+        """Ensure paths are absolute and normalized."""
+        if isinstance(v, str):
+            v = Path(v)
+        return v.resolve() if isinstance(v, Path) else v
+    @field_validator("file_extensions", mode="before")
+    @classmethod
+    def validate_extensions(cls, v: list[str]) -> list[str]:
+        """Ensure extensions start with dot."""
+        if isinstance(v, list):
+            return [ext if ext.startswith(".") else f".{ext}" for ext in v]
+        return v
+    model_config = {
+        "env_prefix": "MCP_VECTOR_SEARCH_",
+        "case_sensitive": False,
+    }
+class DatabaseConfig(BaseSettings):
+    """Database configuration settings."""
+    persist_directory: Path | None = Field(
+        default=None, description="ChromaDB persistence directory"
+    )
+    collection_name: str = Field(
+        default="code_search", description="ChromaDB collection name"
+    )
+    batch_size: int = Field(
+        default=32, gt=0, description="Batch size for embedding operations"
+    )
+    enable_telemetry: bool = Field(
+        default=False, description="Enable ChromaDB telemetry"
+    )
+    @field_validator("persist_directory", mode="before")
+    @classmethod
+    def validate_persist_directory(cls, v: Path | None) -> Path | None:
+        """Ensure persist directory is absolute if provided."""
+        if v and isinstance(v, str):
+            v = Path(v)
+        return v.resolve() if isinstance(v, Path) else None
+    model_config = {
+        "env_prefix": "MCP_VECTOR_SEARCH_DB_",
+        "case_sensitive": False,
+    }
+class SearchConfig(BaseSettings):
+    """Search configuration settings."""
+    default_limit: int = Field(
+        default=10, gt=0, description="Default number of search results"
+    )
+    max_limit: int = Field(
+        default=100, gt=0, description="Maximum number of search results"
+    )
+    enable_reranking: bool = Field(default=True, description="Enable result reranking")
+    context_lines: int = Field(
+        default=3, ge=0, description="Number of context lines to include"
+    )
+    @field_validator("max_limit", mode="after")
+    @classmethod
+    def validate_max_limit(cls, v: int, info) -> int:
+        """Ensure max_limit is greater than default_limit."""
+        if info.data and "default_limit" in info.data:
+            default_limit = info.data["default_limit"]
+            if v < default_limit:
+                raise ValueError(
+                    "max_limit must be greater than or equal to default_limit"
+                )
+        return v
+    model_config = {
+        "env_prefix": "MCP_VECTOR_SEARCH_SEARCH_",
+        "case_sensitive": False,
+    }

mcp_vector_search/core/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """Core functionality for MCP Vector Search."""

mcp_vector_search/core/auto_indexer.py ADDED Viewed

@@ -0,0 +1,298 @@
+"""Automatic indexing strategies without daemon processes."""
+import asyncio
+import os
+import time
+from pathlib import Path
+from loguru import logger
+from .database import VectorDatabase
+from .indexer import SemanticIndexer
+class AutoIndexer:
+    """Handles automatic reindexing without daemon processes."""
+    def __init__(
+        self,
+        indexer: SemanticIndexer,
+        database: VectorDatabase,
+        auto_reindex_threshold: int = 5,  # Max files to auto-reindex
+        staleness_threshold: float = 300.0,  # 5 minutes
+    ):
+        """Initialize auto-indexer.
+        Args:
+            indexer: Semantic indexer instance
+            database: Vector database instance
+            auto_reindex_threshold: Max files to auto-reindex without asking
+            staleness_threshold: Time in seconds before considering index stale
+        """
+        self.indexer = indexer
+        self.database = database
+        self.auto_reindex_threshold = auto_reindex_threshold
+        self.staleness_threshold = staleness_threshold
+        self._last_check_time = 0.0
+        self._check_interval = 30.0  # Check at most every 30 seconds
+    async def check_and_reindex_if_needed(
+        self, force_check: bool = False, interactive: bool = True
+    ) -> tuple[bool, int]:
+        """Check if reindexing is needed and optionally perform it.
+        Args:
+            force_check: Skip time-based check throttling
+            interactive: Whether to prompt user for large reindexes
+        Returns:
+            Tuple of (reindexed, files_updated)
+        """
+        current_time = time.time()
+        # Throttle checks to avoid excessive filesystem scanning
+        if (
+            not force_check
+            and (current_time - self._last_check_time) < self._check_interval
+        ):
+            return False, 0
+        self._last_check_time = current_time
+        try:
+            # Get files that need reindexing
+            stale_files = await self._find_stale_files()
+            if not stale_files:
+                logger.debug("No files need reindexing")
+                return False, 0
+            logger.info(f"Found {len(stale_files)} files that need reindexing")
+            # Decide whether to auto-reindex
+            should_reindex = await self._should_auto_reindex(stale_files, interactive)
+            if should_reindex:
+                updated_count = await self._reindex_files(stale_files)
+                logger.info(f"Auto-reindexed {updated_count} files")
+                return True, updated_count
+            else:
+                logger.info("Skipping auto-reindex (user choice or too many files)")
+                return False, len(stale_files)
+        except Exception as e:
+            logger.error(f"Auto-reindex check failed: {e}")
+            return False, 0
+    async def _find_stale_files(self) -> list[Path]:
+        """Find files that need reindexing."""
+        try:
+            # Load existing metadata
+            metadata = self.indexer._load_index_metadata()
+            # Find all indexable files
+            all_files = self.indexer._find_indexable_files()
+            stale_files = []
+            for file_path in all_files:
+                if self.indexer._needs_reindexing(file_path, metadata):
+                    stale_files.append(file_path)
+            return stale_files
+        except Exception as e:
+            logger.error(f"Failed to find stale files: {e}")
+            return []
+    async def _should_auto_reindex(
+        self, stale_files: list[Path], interactive: bool
+    ) -> bool:
+        """Determine if we should automatically reindex."""
+        file_count = len(stale_files)
+        # Always auto-reindex small numbers of files
+        if file_count <= self.auto_reindex_threshold:
+            logger.debug(f"Auto-reindexing {file_count} files (under threshold)")
+            return True
+        # For larger numbers, check if interactive mode is enabled
+        if not interactive:
+            logger.debug(
+                f"Skipping auto-reindex of {file_count} files (non-interactive)"
+            )
+            return False
+        # In interactive mode, we could prompt the user
+        # For now, we'll be conservative and skip large reindexes
+        logger.info(f"Skipping auto-reindex of {file_count} files (over threshold)")
+        logger.info("Run 'mcp-vector-search index' to update manually")
+        return False
+    async def _reindex_files(self, files: list[Path]) -> int:
+        """Reindex the specified files."""
+        updated_count = 0
+        try:
+            # Process files in small batches to avoid overwhelming the system
+            batch_size = min(self.auto_reindex_threshold, 10)
+            for i in range(0, len(files), batch_size):
+                batch = files[i : i + batch_size]
+                # Process batch
+                results = await self.indexer._process_file_batch(
+                    batch, force_reindex=False
+                )
+                # Count successful updates
+                updated_count += sum(1 for success in results if success)
+                # Small delay between batches to be nice to the system
+                if i + batch_size < len(files):
+                    await asyncio.sleep(0.1)
+            return updated_count
+        except Exception as e:
+            logger.error(f"Failed to reindex files: {e}")
+            return updated_count
+    def get_staleness_info(self) -> dict[str, any]:
+        """Get information about index staleness."""
+        try:
+            metadata = self.indexer._load_index_metadata()
+            all_files = self.indexer._find_indexable_files()
+            stale_count = 0
+            newest_file_time = 0.0
+            oldest_index_time = float("inf")
+            for file_path in all_files:
+                file_mtime = os.path.getmtime(file_path)
+                newest_file_time = max(newest_file_time, file_mtime)
+                stored_mtime = metadata.get(str(file_path), 0)
+                if stored_mtime > 0:
+                    oldest_index_time = min(oldest_index_time, stored_mtime)
+                if self.indexer._needs_reindexing(file_path, metadata):
+                    stale_count += 1
+            current_time = time.time()
+            staleness_seconds = (
+                current_time - oldest_index_time
+                if oldest_index_time != float("inf")
+                else 0
+            )
+            return {
+                "total_files": len(all_files),
+                "indexed_files": len(metadata),
+                "stale_files": stale_count,
+                "staleness_seconds": staleness_seconds,
+                "is_stale": staleness_seconds > self.staleness_threshold,
+                "newest_file_time": newest_file_time,
+                "oldest_index_time": (
+                    oldest_index_time if oldest_index_time != float("inf") else 0
+                ),
+            }
+        except Exception as e:
+            logger.error(f"Failed to get staleness info: {e}")
+            return {
+                "total_files": 0,
+                "indexed_files": 0,
+                "stale_files": 0,
+                "staleness_seconds": 0,
+                "is_stale": False,
+                "newest_file_time": 0,
+                "oldest_index_time": 0,
+            }
+class SearchTriggeredIndexer:
+    """Automatically reindex when searches are performed."""
+    def __init__(self, auto_indexer: AutoIndexer):
+        self.auto_indexer = auto_indexer
+        self._search_count = 0
+        self._searches_since_check = 0
+        self._check_every_n_searches = 10  # Check every 10 searches
+    async def pre_search_hook(self) -> bool:
+        """Hook to run before search operations.
+        Returns:
+            True if reindexing occurred, False otherwise
+        """
+        self._search_count += 1
+        self._searches_since_check += 1
+        # Only check periodically to avoid slowing down searches
+        if self._searches_since_check >= self._check_every_n_searches:
+            self._searches_since_check = 0
+            logger.debug("Checking for stale files before search")
+            reindexed, file_count = await self.auto_indexer.check_and_reindex_if_needed(
+                force_check=False,
+                interactive=False,  # Non-interactive during search
+            )
+            if reindexed:
+                logger.info(f"Auto-reindexed {file_count} files before search")
+            return reindexed
+        return False
+    def get_search_stats(self) -> dict[str, int]:
+        """Get search-related statistics."""
+        return {
+            "total_searches": self._search_count,
+            "searches_since_check": self._searches_since_check,
+            "check_interval": self._check_every_n_searches,
+        }
+class PeriodicIndexChecker:
+    """Check for stale index periodically during operations."""
+    def __init__(self, auto_indexer: AutoIndexer, check_interval: float = 3600.0):
+        """Initialize periodic checker.
+        Args:
+            auto_indexer: AutoIndexer instance
+            check_interval: Check interval in seconds (default: 1 hour)
+        """
+        self.auto_indexer = auto_indexer
+        self.check_interval = check_interval
+        self._last_periodic_check = 0.0
+    async def maybe_check_and_reindex(self) -> bool:
+        """Check if it's time for a periodic reindex check.
+        Returns:
+            True if reindexing occurred, False otherwise
+        """
+        current_time = time.time()
+        if (current_time - self._last_periodic_check) >= self.check_interval:
+            self._last_periodic_check = current_time
+            logger.debug("Performing periodic index staleness check")
+            reindexed, file_count = await self.auto_indexer.check_and_reindex_if_needed(
+                force_check=True, interactive=False
+            )
+            if reindexed:
+                logger.info(f"Periodic auto-reindex updated {file_count} files")
+            return reindexed
+        return False
+    def time_until_next_check(self) -> float:
+        """Get time in seconds until next periodic check."""
+        current_time = time.time()
+        elapsed = current_time - self._last_periodic_check
+        return max(0, self.check_interval - elapsed)