PyPI - mcp-vector-search - Versions diffs - 0.0.3__py3-none-any.whl → 0.4.11__py3-none-any.whl - Mend

mcp-vector-search 0.0.3py3-none-any.whl → 0.4.11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mcp-vector-search might be problematic. Click here for more details.

Files changed (49) hide show

mcp_vector_search/__init__.py +3 -2
mcp_vector_search/cli/commands/auto_index.py +397 -0
mcp_vector_search/cli/commands/config.py +88 -40
mcp_vector_search/cli/commands/index.py +198 -52
mcp_vector_search/cli/commands/init.py +472 -58
mcp_vector_search/cli/commands/install.py +284 -0
mcp_vector_search/cli/commands/mcp.py +495 -0
mcp_vector_search/cli/commands/search.py +241 -87
mcp_vector_search/cli/commands/status.py +184 -58
mcp_vector_search/cli/commands/watch.py +34 -35
mcp_vector_search/cli/didyoumean.py +184 -0
mcp_vector_search/cli/export.py +320 -0
mcp_vector_search/cli/history.py +292 -0
mcp_vector_search/cli/interactive.py +342 -0
mcp_vector_search/cli/main.py +163 -26
mcp_vector_search/cli/output.py +63 -45
mcp_vector_search/config/defaults.py +50 -36
mcp_vector_search/config/settings.py +49 -35
mcp_vector_search/core/auto_indexer.py +298 -0
mcp_vector_search/core/connection_pool.py +322 -0
mcp_vector_search/core/database.py +335 -25
mcp_vector_search/core/embeddings.py +73 -29
mcp_vector_search/core/exceptions.py +19 -2
mcp_vector_search/core/factory.py +310 -0
mcp_vector_search/core/git_hooks.py +345 -0
mcp_vector_search/core/indexer.py +237 -73
mcp_vector_search/core/models.py +21 -19
mcp_vector_search/core/project.py +73 -58
mcp_vector_search/core/scheduler.py +330 -0
mcp_vector_search/core/search.py +574 -86
mcp_vector_search/core/watcher.py +48 -46
mcp_vector_search/mcp/__init__.py +4 -0
mcp_vector_search/mcp/__main__.py +25 -0
mcp_vector_search/mcp/server.py +701 -0
mcp_vector_search/parsers/base.py +30 -31
mcp_vector_search/parsers/javascript.py +74 -48
mcp_vector_search/parsers/python.py +57 -49
mcp_vector_search/parsers/registry.py +47 -32
mcp_vector_search/parsers/text.py +179 -0
mcp_vector_search/utils/__init__.py +40 -0
mcp_vector_search/utils/gitignore.py +229 -0
mcp_vector_search/utils/timing.py +334 -0
mcp_vector_search/utils/version.py +47 -0
{mcp_vector_search-0.0.3.dist-info → mcp_vector_search-0.4.11.dist-info}/METADATA +173 -7
mcp_vector_search-0.4.11.dist-info/RECORD +54 -0
mcp_vector_search-0.0.3.dist-info/RECORD +0 -35
{mcp_vector_search-0.0.3.dist-info → mcp_vector_search-0.4.11.dist-info}/WHEEL +0 -0
{mcp_vector_search-0.0.3.dist-info → mcp_vector_search-0.4.11.dist-info}/entry_points.txt +0 -0
{mcp_vector_search-0.0.3.dist-info → mcp_vector_search-0.4.11.dist-info}/licenses/LICENSE +0 -0

mcp_vector_search/parsers/text.py ADDED Viewed

@@ -0,0 +1,179 @@
+"""Text file parser for MCP Vector Search."""
+from pathlib import Path
+from ..core.models import CodeChunk
+from .base import BaseParser
+class TextParser(BaseParser):
+    """Parser for plain text files (.txt)."""
+    def __init__(self) -> None:
+        """Initialize text parser."""
+        super().__init__("text")
+    async def parse_file(self, file_path: Path) -> list[CodeChunk]:
+        """Parse a text file and extract chunks.
+        Args:
+            file_path: Path to the text file
+        Returns:
+            List of text chunks
+        """
+        try:
+            with open(file_path, encoding="utf-8") as f:
+                content = f.read()
+            return await self.parse_content(content, file_path)
+        except Exception:
+            # Return empty list if file can't be read
+            return []
+    async def parse_content(self, content: str, file_path: Path) -> list[CodeChunk]:
+        """Parse text content into semantic chunks.
+        Uses paragraph-based chunking for better semantic coherence.
+        Falls back to line-based chunking for non-paragraph text.
+        Args:
+            content: Text content to parse
+            file_path: Path to the source file
+        Returns:
+            List of text chunks
+        """
+        if not content.strip():
+            return []
+        chunks = []
+        lines = content.splitlines(keepends=True)
+        # Try paragraph-based chunking first
+        paragraphs = self._extract_paragraphs(content)
+        if paragraphs:
+            # Use paragraph-based chunking
+            for para_info in paragraphs:
+                chunk = self._create_chunk(
+                    content=para_info["content"],
+                    file_path=file_path,
+                    start_line=para_info["start_line"],
+                    end_line=para_info["end_line"],
+                    chunk_type="text",
+                )
+                chunks.append(chunk)
+        else:
+            # Fall back to line-based chunking for non-paragraph text
+            # Use smaller chunks for text files (30 lines instead of 50)
+            chunk_size = 30
+            for i in range(0, len(lines), chunk_size):
+                start_line = i + 1
+                end_line = min(i + chunk_size, len(lines))
+                chunk_content = "".join(lines[i:end_line])
+                if chunk_content.strip():
+                    chunk = self._create_chunk(
+                        content=chunk_content,
+                        file_path=file_path,
+                        start_line=start_line,
+                        end_line=end_line,
+                        chunk_type="text",
+                    )
+                    chunks.append(chunk)
+        return chunks
+    def _extract_paragraphs(self, content: str) -> list[dict]:
+        """Extract paragraphs from text content.
+        A paragraph is defined as one or more non-empty lines
+        separated by empty lines.
+        Args:
+            content: Text content
+        Returns:
+            List of paragraph info dictionaries
+        """
+        lines = content.splitlines(keepends=True)
+        paragraphs = []
+        current_para = []
+        start_line = 1
+        for i, line in enumerate(lines, 1):
+            if line.strip():
+                if not current_para:
+                    start_line = i
+                current_para.append(line)
+            else:
+                if current_para:
+                    # End of paragraph
+                    para_content = "".join(current_para)
+                    if len(para_content.strip()) > 20:  # Minimum paragraph size
+                        paragraphs.append({
+                            "content": para_content,
+                            "start_line": start_line,
+                            "end_line": i - 1
+                        })
+                    current_para = []
+        # Handle last paragraph if exists
+        if current_para:
+            para_content = "".join(current_para)
+            if len(para_content.strip()) > 20:
+                paragraphs.append({
+                    "content": para_content,
+                    "start_line": start_line,
+                    "end_line": len(lines)
+                })
+        # If we have very few paragraphs, merge small ones
+        if paragraphs:
+            merged = self._merge_small_paragraphs(paragraphs)
+            return merged
+        return []
+    def _merge_small_paragraphs(self, paragraphs: list[dict], target_size: int = 200) -> list[dict]:
+        """Merge small paragraphs to create more substantial chunks.
+        Args:
+            paragraphs: List of paragraph dictionaries
+            target_size: Target size for merged paragraphs in characters
+        Returns:
+            List of merged paragraph dictionaries
+        """
+        merged = []
+        current_merge = None
+        for para in paragraphs:
+            para_len = len(para["content"])
+            if current_merge is None:
+                current_merge = para.copy()
+            elif len(current_merge["content"]) + para_len < target_size * 2:
+                # Merge with current
+                current_merge["content"] += "\n" + para["content"]
+                current_merge["end_line"] = para["end_line"]
+            else:
+                # Start new merge
+                if len(current_merge["content"].strip()) > 20:
+                    merged.append(current_merge)
+                current_merge = para.copy()
+        # Add last merge
+        if current_merge and len(current_merge["content"].strip()) > 20:
+            merged.append(current_merge)
+        return merged
+    def get_supported_extensions(self) -> list[str]:
+        """Get list of supported file extensions.
+        Returns:
+            List of supported extensions
+        """
+        return [".txt"]

mcp_vector_search/utils/__init__.py ADDED Viewed

@@ -0,0 +1,40 @@
+"""Utility modules for MCP Vector Search."""
+from .gitignore import (
+    GitignoreParser,
+    GitignorePattern,
+    create_gitignore_parser,
+    is_path_gitignored,
+)
+from .timing import (
+    PerformanceProfiler,
+    SearchProfiler,
+    TimingResult,
+    get_global_profiler,
+    print_global_report,
+    time_async_block,
+    time_block,
+    time_function,
+)
+from .version import get_user_agent, get_version_info, get_version_string
+__all__ = [
+    # Gitignore utilities
+    "GitignoreParser",
+    "GitignorePattern",
+    "create_gitignore_parser",
+    "is_path_gitignored",
+    # Timing utilities
+    "PerformanceProfiler",
+    "TimingResult",
+    "time_function",
+    "time_block",
+    "time_async_block",
+    "get_global_profiler",
+    "print_global_report",
+    "SearchProfiler",
+    # Version utilities
+    "get_version_info",
+    "get_version_string",
+    "get_user_agent",
+]

mcp_vector_search/utils/gitignore.py ADDED Viewed

@@ -0,0 +1,229 @@
+"""Gitignore parsing and matching utilities."""
+import fnmatch
+import re
+from pathlib import Path
+from typing import List, Set
+from loguru import logger
+class GitignorePattern:
+    """Represents a single gitignore pattern with its matching logic."""
+    def __init__(self, pattern: str, is_negation: bool = False, is_directory_only: bool = False):
+        """Initialize a gitignore pattern.
+        Args:
+            pattern: The pattern string
+            is_negation: Whether this is a negation pattern (starts with !)
+            is_directory_only: Whether this pattern only matches directories (ends with /)
+        """
+        self.original_pattern = pattern
+        self.is_negation = is_negation
+        self.is_directory_only = is_directory_only
+        self.pattern = self._normalize_pattern(pattern)
+    def _normalize_pattern(self, pattern: str) -> str:
+        """Normalize the pattern for matching."""
+        # Remove leading ! for negation patterns
+        if pattern.startswith('!'):
+            pattern = pattern[1:]
+        # Remove trailing / for directory-only patterns
+        if pattern.endswith('/'):
+            pattern = pattern[:-1]
+        # Handle leading slash (absolute from repo root)
+        if pattern.startswith('/'):
+            pattern = pattern[1:]
+        return pattern
+    def matches(self, path: str, is_directory: bool = False) -> bool:
+        """Check if this pattern matches the given path.
+        Args:
+            path: Relative path from repository root
+            is_directory: Whether the path is a directory
+        Returns:
+            True if the pattern matches
+        """
+        # Directory-only patterns only match directories
+        if self.is_directory_only and not is_directory:
+            return False
+        # Convert path separators for consistent matching
+        path = path.replace('\\', '/')
+        pattern = self.pattern.replace('\\', '/')
+        # Try exact match first
+        if fnmatch.fnmatch(path, pattern):
+            return True
+        # Try matching any parent directory
+        path_parts = path.split('/')
+        for i in range(len(path_parts)):
+            subpath = '/'.join(path_parts[i:])
+            if fnmatch.fnmatch(subpath, pattern):
+                return True
+        # Try matching with ** patterns (glob-style)
+        if '**' in pattern:
+            # Convert ** to regex pattern
+            regex_pattern = pattern.replace('**', '.*')
+            regex_pattern = regex_pattern.replace('*', '[^/]*')
+            regex_pattern = regex_pattern.replace('?', '[^/]')
+            regex_pattern = f'^{regex_pattern}$'
+            try:
+                if re.match(regex_pattern, path):
+                    return True
+            except re.error:
+                # Fallback to simple fnmatch if regex fails
+                pass
+        return False
+class GitignoreParser:
+    """Parser for .gitignore files with proper pattern matching."""
+    def __init__(self, project_root: Path):
+        """Initialize gitignore parser.
+        Args:
+            project_root: Root directory of the project
+        """
+        self.project_root = project_root
+        self.patterns: List[GitignorePattern] = []
+        self._load_gitignore_files()
+    def _load_gitignore_files(self) -> None:
+        """Load all .gitignore files in the project hierarchy."""
+        # Load global .gitignore first (if exists)
+        global_gitignore = self.project_root / '.gitignore'
+        if global_gitignore.exists():
+            self._parse_gitignore_file(global_gitignore)
+        # Load .gitignore files in subdirectories
+        for gitignore_file in self.project_root.rglob('.gitignore'):
+            if gitignore_file != global_gitignore:
+                self._parse_gitignore_file(gitignore_file)
+    def _parse_gitignore_file(self, gitignore_path: Path) -> None:
+        """Parse a single .gitignore file.
+        Args:
+            gitignore_path: Path to the .gitignore file
+        """
+        try:
+            with open(gitignore_path, 'r', encoding='utf-8', errors='ignore') as f:
+                lines = f.readlines()
+            for line_num, line in enumerate(lines, 1):
+                line = line.strip()
+                # Skip empty lines and comments
+                if not line or line.startswith('#'):
+                    continue
+                # Check for negation pattern
+                is_negation = line.startswith('!')
+                # Check for directory-only pattern
+                is_directory_only = line.endswith('/')
+                # Create pattern relative to the .gitignore file's directory
+                gitignore_dir = gitignore_path.parent
+                if gitignore_dir != self.project_root:
+                    # Adjust pattern for subdirectory .gitignore files
+                    relative_dir = gitignore_dir.relative_to(self.project_root)
+                    if not line.startswith('/') and not is_negation:
+                        line = str(relative_dir / line)
+                    elif is_negation and not line[1:].startswith('/'):
+                        line = '!' + str(relative_dir / line[1:])
+                pattern = GitignorePattern(line, is_negation, is_directory_only)
+                self.patterns.append(pattern)
+        except Exception as e:
+            logger.warning(f"Failed to parse {gitignore_path}: {e}")
+    def is_ignored(self, path: Path) -> bool:
+        """Check if a path should be ignored according to .gitignore rules.
+        Args:
+            path: Path to check (can be absolute or relative to project root)
+        Returns:
+            True if the path should be ignored
+        """
+        try:
+            # Convert to relative path from project root
+            if path.is_absolute():
+                relative_path = path.relative_to(self.project_root)
+            else:
+                relative_path = path
+            path_str = str(relative_path).replace('\\', '/')
+            is_directory = path.is_dir() if path.exists() else False
+            # Apply patterns in order, with later patterns overriding earlier ones
+            ignored = False
+            for pattern in self.patterns:
+                if pattern.matches(path_str, is_directory):
+                    ignored = not pattern.is_negation
+            return ignored
+        except ValueError:
+            # Path is not relative to project root
+            return False
+        except Exception as e:
+            logger.debug(f"Error checking gitignore for {path}: {e}")
+            return False
+    def get_ignored_patterns(self) -> List[str]:
+        """Get list of all ignore patterns.
+        Returns:
+            List of pattern strings
+        """
+        return [p.original_pattern for p in self.patterns if not p.is_negation]
+    def get_negation_patterns(self) -> List[str]:
+        """Get list of all negation patterns.
+        Returns:
+            List of negation pattern strings
+        """
+        return [p.original_pattern for p in self.patterns if p.is_negation]
+def create_gitignore_parser(project_root: Path) -> GitignoreParser:
+    """Create a gitignore parser for the given project.
+    Args:
+        project_root: Root directory of the project
+    Returns:
+        GitignoreParser instance
+    """
+    return GitignoreParser(project_root)
+def is_path_gitignored(path: Path, project_root: Path) -> bool:
+    """Quick function to check if a path is gitignored.
+    Args:
+        path: Path to check
+        project_root: Root directory of the project
+    Returns:
+        True if the path should be ignored
+    """
+    parser = create_gitignore_parser(project_root)
+    return parser.is_ignored(path)

mcp-vector-search 0.0.3__py3-none-any.whl → 0.4.11__py3-none-any.whl

Potentially problematic release.

mcp-vector-search 0.0.3py3-none-any.whl → 0.4.11py3-none-any.whl