PyPI - mcp-vector-search - Versions diffs - 0.12.6__py3-none-any.whl - Mend

mcp-vector-search 0.12.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (68) hide show

mcp_vector_search/__init__.py +10 -0
mcp_vector_search/cli/__init__.py +1 -0
mcp_vector_search/cli/commands/__init__.py +1 -0
mcp_vector_search/cli/commands/auto_index.py +397 -0
mcp_vector_search/cli/commands/config.py +393 -0
mcp_vector_search/cli/commands/demo.py +358 -0
mcp_vector_search/cli/commands/index.py +744 -0
mcp_vector_search/cli/commands/init.py +645 -0
mcp_vector_search/cli/commands/install.py +675 -0
mcp_vector_search/cli/commands/install_old.py +696 -0
mcp_vector_search/cli/commands/mcp.py +1182 -0
mcp_vector_search/cli/commands/reset.py +393 -0
mcp_vector_search/cli/commands/search.py +773 -0
mcp_vector_search/cli/commands/status.py +549 -0
mcp_vector_search/cli/commands/uninstall.py +485 -0
mcp_vector_search/cli/commands/visualize.py +1467 -0
mcp_vector_search/cli/commands/watch.py +287 -0
mcp_vector_search/cli/didyoumean.py +500 -0
mcp_vector_search/cli/export.py +320 -0
mcp_vector_search/cli/history.py +295 -0
mcp_vector_search/cli/interactive.py +342 -0
mcp_vector_search/cli/main.py +461 -0
mcp_vector_search/cli/output.py +412 -0
mcp_vector_search/cli/suggestions.py +375 -0
mcp_vector_search/config/__init__.py +1 -0
mcp_vector_search/config/constants.py +24 -0
mcp_vector_search/config/defaults.py +200 -0
mcp_vector_search/config/settings.py +134 -0
mcp_vector_search/core/__init__.py +1 -0
mcp_vector_search/core/auto_indexer.py +298 -0
mcp_vector_search/core/connection_pool.py +360 -0
mcp_vector_search/core/database.py +1214 -0
mcp_vector_search/core/directory_index.py +318 -0
mcp_vector_search/core/embeddings.py +294 -0
mcp_vector_search/core/exceptions.py +89 -0
mcp_vector_search/core/factory.py +318 -0
mcp_vector_search/core/git_hooks.py +345 -0
mcp_vector_search/core/indexer.py +1002 -0
mcp_vector_search/core/models.py +294 -0
mcp_vector_search/core/project.py +333 -0
mcp_vector_search/core/scheduler.py +330 -0
mcp_vector_search/core/search.py +952 -0
mcp_vector_search/core/watcher.py +322 -0
mcp_vector_search/mcp/__init__.py +5 -0
mcp_vector_search/mcp/__main__.py +25 -0
mcp_vector_search/mcp/server.py +733 -0
mcp_vector_search/parsers/__init__.py +8 -0
mcp_vector_search/parsers/base.py +296 -0
mcp_vector_search/parsers/dart.py +605 -0
mcp_vector_search/parsers/html.py +413 -0
mcp_vector_search/parsers/javascript.py +643 -0
mcp_vector_search/parsers/php.py +694 -0
mcp_vector_search/parsers/python.py +502 -0
mcp_vector_search/parsers/registry.py +223 -0
mcp_vector_search/parsers/ruby.py +678 -0
mcp_vector_search/parsers/text.py +186 -0
mcp_vector_search/parsers/utils.py +265 -0
mcp_vector_search/py.typed +1 -0
mcp_vector_search/utils/__init__.py +40 -0
mcp_vector_search/utils/gitignore.py +250 -0
mcp_vector_search/utils/monorepo.py +277 -0
mcp_vector_search/utils/timing.py +334 -0
mcp_vector_search/utils/version.py +47 -0
mcp_vector_search-0.12.6.dist-info/METADATA +754 -0
mcp_vector_search-0.12.6.dist-info/RECORD +68 -0
mcp_vector_search-0.12.6.dist-info/WHEEL +4 -0
mcp_vector_search-0.12.6.dist-info/entry_points.txt +2 -0
mcp_vector_search-0.12.6.dist-info/licenses/LICENSE +21 -0

mcp_vector_search/core/models.py ADDED Viewed

@@ -0,0 +1,294 @@
+"""Data models for MCP Vector Search."""
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any
+from pydantic import BaseModel, Field
+@dataclass
+class CodeChunk:
+    """Represents a chunk of code with metadata."""
+    content: str
+    file_path: Path
+    start_line: int
+    end_line: int
+    language: str
+    chunk_type: str = "code"  # code, function, class, comment, docstring
+    function_name: str | None = None
+    class_name: str | None = None
+    docstring: str | None = None
+    imports: list[str] = None
+    # Enhancement 1: Complexity scoring
+    complexity_score: float = 0.0
+    # Enhancement 3: Hierarchical relationships
+    chunk_id: str | None = None
+    parent_chunk_id: str | None = None
+    child_chunk_ids: list[str] = None
+    chunk_depth: int = 0
+    # Enhancement 4: Enhanced metadata
+    decorators: list[str] = None
+    parameters: list[dict] = None
+    return_type: str | None = None
+    type_annotations: dict[str, str] = None
+    # Enhancement 5: Monorepo support
+    subproject_name: str | None = None  # "ewtn-plus-foundation"
+    subproject_path: str | None = None  # Relative path from root
+    def __post_init__(self) -> None:
+        """Initialize default values and generate chunk ID."""
+        if self.imports is None:
+            self.imports = []
+        if self.child_chunk_ids is None:
+            self.child_chunk_ids = []
+        if self.decorators is None:
+            self.decorators = []
+        if self.parameters is None:
+            self.parameters = []
+        if self.type_annotations is None:
+            self.type_annotations = {}
+        # Generate chunk ID if not provided
+        if self.chunk_id is None:
+            import hashlib
+            # Include name and first 50 chars of content for uniqueness
+            # This ensures deterministic IDs while handling same-location chunks
+            name = self.function_name or self.class_name or ""
+            content_hash = hashlib.sha256(self.content[:100].encode()).hexdigest()[:8]
+            id_string = f"{self.file_path}:{self.chunk_type}:{name}:{self.start_line}:{self.end_line}:{content_hash}"
+            self.chunk_id = hashlib.sha256(id_string.encode()).hexdigest()[:16]
+    @property
+    def id(self) -> str:
+        """Generate unique ID for this chunk."""
+        return f"{self.file_path}:{self.start_line}:{self.end_line}"
+    @property
+    def line_count(self) -> int:
+        """Get the number of lines in this chunk."""
+        return self.end_line - self.start_line + 1
+    def to_dict(self) -> dict[str, Any]:
+        """Convert to dictionary for storage."""
+        return {
+            "content": self.content,
+            "file_path": str(self.file_path),
+            "start_line": self.start_line,
+            "end_line": self.end_line,
+            "language": self.language,
+            "chunk_type": self.chunk_type,
+            "function_name": self.function_name,
+            "class_name": self.class_name,
+            "docstring": self.docstring,
+            "imports": self.imports,
+            "complexity_score": self.complexity_score,
+            "chunk_id": self.chunk_id,
+            "parent_chunk_id": self.parent_chunk_id,
+            "child_chunk_ids": self.child_chunk_ids,
+            "chunk_depth": self.chunk_depth,
+            "decorators": self.decorators,
+            "parameters": self.parameters,
+            "return_type": self.return_type,
+            "type_annotations": self.type_annotations,
+            "subproject_name": self.subproject_name,
+            "subproject_path": self.subproject_path,
+        }
+    @classmethod
+    def from_dict(cls, data: dict[str, Any]) -> "CodeChunk":
+        """Create from dictionary."""
+        return cls(
+            content=data["content"],
+            file_path=Path(data["file_path"]),
+            start_line=data["start_line"],
+            end_line=data["end_line"],
+            language=data["language"],
+            chunk_type=data.get("chunk_type", "code"),
+            function_name=data.get("function_name"),
+            class_name=data.get("class_name"),
+            docstring=data.get("docstring"),
+            imports=data.get("imports", []),
+            complexity_score=data.get("complexity_score", 0.0),
+            chunk_id=data.get("chunk_id"),
+            parent_chunk_id=data.get("parent_chunk_id"),
+            child_chunk_ids=data.get("child_chunk_ids", []),
+            chunk_depth=data.get("chunk_depth", 0),
+            decorators=data.get("decorators", []),
+            parameters=data.get("parameters", []),
+            return_type=data.get("return_type"),
+            type_annotations=data.get("type_annotations", {}),
+            subproject_name=data.get("subproject_name"),
+            subproject_path=data.get("subproject_path"),
+        )
+class SearchResult(BaseModel):
+    """Represents a search result with metadata."""
+    content: str = Field(..., description="The matched code content")
+    file_path: Path = Field(..., description="Path to the source file")
+    start_line: int = Field(..., description="Starting line number")
+    end_line: int = Field(..., description="Ending line number")
+    language: str = Field(..., description="Programming language")
+    similarity_score: float = Field(..., description="Similarity score (0.0 to 1.0)")
+    rank: int = Field(..., description="Result rank in search results")
+    chunk_type: str = Field(default="code", description="Type of code chunk")
+    function_name: str | None = Field(
+        default=None, description="Function name if applicable"
+    )
+    class_name: str | None = Field(default=None, description="Class name if applicable")
+    context_before: list[str] = Field(default=[], description="Lines before the match")
+    context_after: list[str] = Field(default=[], description="Lines after the match")
+    highlights: list[str] = Field(default=[], description="Highlighted terms")
+    class Config:
+        arbitrary_types_allowed = True
+    @property
+    def line_count(self) -> int:
+        """Get the number of lines in this result."""
+        return self.end_line - self.start_line + 1
+    @property
+    def location(self) -> str:
+        """Get a human-readable location string."""
+        return f"{self.file_path}:{self.start_line}-{self.end_line}"
+    def to_dict(self) -> dict[str, Any]:
+        """Convert to dictionary for serialization."""
+        return {
+            "content": self.content,
+            "file_path": str(self.file_path),
+            "start_line": self.start_line,
+            "end_line": self.end_line,
+            "language": self.language,
+            "similarity_score": self.similarity_score,
+            "rank": self.rank,
+            "chunk_type": self.chunk_type,
+            "function_name": self.function_name,
+            "class_name": self.class_name,
+            "context_before": self.context_before,
+            "context_after": self.context_after,
+            "highlights": self.highlights,
+            "location": self.location,
+            "line_count": self.line_count,
+        }
+class IndexStats(BaseModel):
+    """Statistics about the search index."""
+    total_files: int = Field(..., description="Total number of indexed files")
+    total_chunks: int = Field(..., description="Total number of code chunks")
+    languages: dict[str, int] = Field(..., description="Language distribution")
+    file_types: dict[str, int] = Field(..., description="File type distribution")
+    index_size_mb: float = Field(..., description="Index size in megabytes")
+    last_updated: str = Field(..., description="Last update timestamp")
+    embedding_model: str = Field(..., description="Embedding model used")
+    def to_dict(self) -> dict[str, Any]:
+        """Convert to dictionary for serialization."""
+        return {
+            "total_files": self.total_files,
+            "total_chunks": self.total_chunks,
+            "languages": self.languages,
+            "file_types": self.file_types,
+            "index_size_mb": self.index_size_mb,
+            "last_updated": self.last_updated,
+            "embedding_model": self.embedding_model,
+        }
+@dataclass
+class Directory:
+    """Represents a directory in the project structure."""
+    path: Path  # Relative path from project root
+    name: str  # Directory name
+    parent_path: Path | None = None  # Parent directory path (None for root)
+    file_count: int = 0  # Number of files directly in this directory
+    subdirectory_count: int = 0  # Number of subdirectories
+    total_chunks: int = 0  # Total code chunks in this directory (recursive)
+    languages: dict[str, int] = None  # Language distribution in this directory
+    depth: int = 0  # Depth from project root (0 = root)
+    is_package: bool = False  # True if contains __init__.py or package.json
+    last_modified: float | None = (
+        None  # Most recent file modification time (unix timestamp)
+    )
+    def __post_init__(self) -> None:
+        """Initialize default values and generate directory ID."""
+        if self.languages is None:
+            self.languages = {}
+    @property
+    def id(self) -> str:
+        """Generate unique ID for this directory."""
+        import hashlib
+        return hashlib.sha256(str(self.path).encode()).hexdigest()[:16]
+    def to_dict(self) -> dict[str, Any]:
+        """Convert to dictionary for storage."""
+        return {
+            "path": str(self.path),
+            "name": self.name,
+            "parent_path": str(self.parent_path) if self.parent_path else None,
+            "file_count": self.file_count,
+            "subdirectory_count": self.subdirectory_count,
+            "total_chunks": self.total_chunks,
+            "languages": self.languages,
+            "depth": self.depth,
+            "is_package": self.is_package,
+            "last_modified": self.last_modified,
+        }
+    @classmethod
+    def from_dict(cls, data: dict[str, Any]) -> "Directory":
+        """Create from dictionary."""
+        return cls(
+            path=Path(data["path"]),
+            name=data["name"],
+            parent_path=Path(data["parent_path"]) if data.get("parent_path") else None,
+            file_count=data.get("file_count", 0),
+            subdirectory_count=data.get("subdirectory_count", 0),
+            total_chunks=data.get("total_chunks", 0),
+            languages=data.get("languages", {}),
+            depth=data.get("depth", 0),
+            is_package=data.get("is_package", False),
+            last_modified=data.get("last_modified"),
+        )
+class ProjectInfo(BaseModel):
+    """Information about a project."""
+    name: str = Field(..., description="Project name")
+    root_path: Path = Field(..., description="Project root directory")
+    config_path: Path = Field(..., description="Configuration file path")
+    index_path: Path = Field(..., description="Index directory path")
+    is_initialized: bool = Field(..., description="Whether project is initialized")
+    languages: list[str] = Field(default=[], description="Detected languages")
+    file_count: int = Field(default=0, description="Number of indexable files")
+    class Config:
+        arbitrary_types_allowed = True
+    def to_dict(self) -> dict[str, Any]:
+        """Convert to dictionary for serialization."""
+        return {
+            "name": self.name,
+            "root_path": str(self.root_path),
+            "config_path": str(self.config_path),
+            "index_path": str(self.index_path),
+            "is_initialized": self.is_initialized,
+            "languages": self.languages,
+            "file_count": self.file_count,
+        }

mcp_vector_search/core/project.py ADDED Viewed

@@ -0,0 +1,333 @@
+"""Project detection and management for MCP Vector Search."""
+import json
+from pathlib import Path
+from loguru import logger
+from ..config.defaults import (
+    DEFAULT_FILE_EXTENSIONS,
+    DEFAULT_IGNORE_PATTERNS,
+    get_default_config_path,
+    get_default_index_path,
+    get_language_from_extension,
+)
+from ..config.settings import ProjectConfig
+from ..utils.gitignore import create_gitignore_parser
+from .exceptions import (
+    ConfigurationError,
+    ProjectInitializationError,
+    ProjectNotFoundError,
+)
+from .models import ProjectInfo
+class ProjectManager:
+    """Manages project detection, initialization, and configuration."""
+    def __init__(self, project_root: Path | None = None) -> None:
+        """Initialize project manager.
+        Args:
+            project_root: Project root directory. If None, will auto-detect.
+        """
+        self.project_root = project_root or self._detect_project_root()
+        self._config: ProjectConfig | None = None
+        # Initialize gitignore parser
+        try:
+            self.gitignore_parser = create_gitignore_parser(self.project_root)
+        except Exception as e:
+            logger.debug(f"Failed to load gitignore patterns: {e}")
+            self.gitignore_parser = None
+    def _detect_project_root(self) -> Path:
+        """Auto-detect project root directory."""
+        current = Path.cwd()
+        # Look for common project indicators
+        indicators = [
+            ".git",
+            ".mcp-vector-search",
+            "pyproject.toml",
+            "package.json",
+            "Cargo.toml",
+            "go.mod",
+            "pom.xml",
+            "build.gradle",
+            ".project",
+        ]
+        # Walk up the directory tree
+        for path in [current] + list(current.parents):
+            for indicator in indicators:
+                if (path / indicator).exists():
+                    logger.debug(f"Detected project root: {path} (found {indicator})")
+                    return path
+        # Default to current directory
+        logger.debug(f"Using current directory as project root: {current}")
+        return current
+    def is_initialized(self) -> bool:
+        """Check if project is initialized for MCP Vector Search."""
+        config_path = get_default_config_path(self.project_root)
+        index_path = get_default_index_path(self.project_root)
+        return config_path.exists() and index_path.exists()
+    def initialize(
+        self,
+        file_extensions: list[str] | None = None,
+        embedding_model: str = "microsoft/codebert-base",
+        similarity_threshold: float = 0.5,
+        force: bool = False,
+    ) -> ProjectConfig:
+        """Initialize project for MCP Vector Search.
+        Args:
+            file_extensions: File extensions to index
+            embedding_model: Embedding model to use
+            similarity_threshold: Similarity threshold for search
+            force: Force re-initialization if already exists
+        Returns:
+            Project configuration
+        Raises:
+            ProjectInitializationError: If initialization fails
+        """
+        if self.is_initialized() and not force:
+            raise ProjectInitializationError(
+                f"Project already initialized at {self.project_root}. Use --force to re-initialize."
+            )
+        try:
+            # Create index directory
+            index_path = get_default_index_path(self.project_root)
+            index_path.mkdir(parents=True, exist_ok=True)
+            # Detect languages and files
+            detected_languages = self.detect_languages()
+            file_count = self.count_indexable_files(
+                file_extensions or DEFAULT_FILE_EXTENSIONS
+            )
+            # Create configuration
+            config = ProjectConfig(
+                project_root=self.project_root,
+                index_path=index_path,
+                file_extensions=file_extensions or DEFAULT_FILE_EXTENSIONS,
+                embedding_model=embedding_model,
+                similarity_threshold=similarity_threshold,
+                languages=detected_languages,
+            )
+            # Save configuration
+            self.save_config(config)
+            logger.info(
+                f"Initialized project at {self.project_root}",
+                languages=detected_languages,
+                file_count=file_count,
+                extensions=config.file_extensions,
+            )
+            self._config = config
+            return config
+        except Exception as e:
+            raise ProjectInitializationError(
+                f"Failed to initialize project: {e}"
+            ) from e
+    def load_config(self) -> ProjectConfig:
+        """Load project configuration.
+        Returns:
+            Project configuration
+        Raises:
+            ProjectNotFoundError: If project is not initialized
+            ConfigurationError: If configuration is invalid
+        """
+        if not self.is_initialized():
+            raise ProjectNotFoundError(
+                f"Project not initialized at {self.project_root}. Run 'mcp-vector-search init' first."
+            )
+        config_path = get_default_config_path(self.project_root)
+        try:
+            with open(config_path) as f:
+                config_data = json.load(f)
+            # Convert paths back to Path objects
+            config_data["project_root"] = Path(config_data["project_root"])
+            config_data["index_path"] = Path(config_data["index_path"])
+            config = ProjectConfig(**config_data)
+            self._config = config
+            return config
+        except Exception as e:
+            raise ConfigurationError(f"Failed to load configuration: {e}") from e
+    def save_config(self, config: ProjectConfig) -> None:
+        """Save project configuration.
+        Args:
+            config: Project configuration to save
+        Raises:
+            ConfigurationError: If saving fails
+        """
+        config_path = get_default_config_path(self.project_root)
+        config_path.parent.mkdir(parents=True, exist_ok=True)
+        try:
+            # Convert to JSON-serializable format
+            config_data = config.model_dump()
+            config_data["project_root"] = str(config.project_root)
+            config_data["index_path"] = str(config.index_path)
+            with open(config_path, "w") as f:
+                json.dump(config_data, f, indent=2)
+            logger.debug(f"Saved configuration to {config_path}")
+        except Exception as e:
+            raise ConfigurationError(f"Failed to save configuration: {e}") from e
+    @property
+    def config(self) -> ProjectConfig:
+        """Get project configuration, loading if necessary."""
+        if self._config is None:
+            self._config = self.load_config()
+        return self._config
+    def detect_languages(self) -> list[str]:
+        """Detect programming languages in the project.
+        Returns:
+            List of detected language names
+        """
+        languages: set[str] = set()
+        for file_path in self._iter_source_files():
+            language = get_language_from_extension(file_path.suffix)
+            if language != "text":
+                languages.add(language)
+        return sorted(languages)
+    def count_indexable_files(self, extensions: list[str]) -> int:
+        """Count files that can be indexed.
+        Args:
+            extensions: File extensions to count
+        Returns:
+            Number of indexable files
+        """
+        count = 0
+        for file_path in self._iter_source_files():
+            if file_path.suffix in extensions:
+                count += 1
+        return count
+    def get_project_info(self, file_count: int | None = None) -> ProjectInfo:
+        """Get comprehensive project information.
+        Args:
+            file_count: Optional pre-computed file count (avoids expensive filesystem scan)
+        Returns:
+            Project information
+        """
+        config_path = get_default_config_path(self.project_root)
+        index_path = get_default_index_path(self.project_root)
+        is_initialized = self.is_initialized()
+        languages = []
+        computed_file_count = 0
+        if is_initialized:
+            try:
+                config = self.config
+                languages = config.languages
+                # Use provided file_count if available to avoid filesystem scan
+                if file_count is not None:
+                    computed_file_count = file_count
+                else:
+                    computed_file_count = self.count_indexable_files(
+                        config.file_extensions
+                    )
+            except Exception:
+                # Ignore errors when getting detailed info
+                pass
+        return ProjectInfo(
+            name=self.project_root.name,
+            root_path=self.project_root,
+            config_path=config_path,
+            index_path=index_path,
+            is_initialized=is_initialized,
+            languages=languages,
+            file_count=computed_file_count,
+        )
+    def _iter_source_files(self) -> list[Path]:
+        """Iterate over source files in the project.
+        Returns:
+            List of source file paths
+        """
+        files = []
+        for path in self.project_root.rglob("*"):
+            if not path.is_file():
+                continue
+            # Skip ignored patterns
+            # PERFORMANCE: Pass is_directory=False since we already checked is_file()
+            if self._should_ignore_path(path, is_directory=False):
+                continue
+            files.append(path)
+        return files
+    def _should_ignore_path(self, path: Path, is_directory: bool | None = None) -> bool:
+        """Check if a path should be ignored.
+        Args:
+            path: Path to check
+            is_directory: Optional hint if path is a directory (avoids filesystem check)
+        Returns:
+            True if path should be ignored
+        """
+        # First check gitignore rules if available
+        # PERFORMANCE: Pass is_directory hint to avoid redundant stat() calls
+        if self.gitignore_parser and self.gitignore_parser.is_ignored(
+            path, is_directory=is_directory
+        ):
+            return True
+        # Check if any parent directory is in ignore patterns
+        for part in path.parts:
+            if part in DEFAULT_IGNORE_PATTERNS:
+                return True
+        # Check relative path from project root
+        try:
+            relative_path = path.relative_to(self.project_root)
+            for part in relative_path.parts:
+                if part in DEFAULT_IGNORE_PATTERNS:
+                    return True
+        except ValueError:
+            # Path is not relative to project root
+            return True
+        return False