PyPI - mcp-code-indexer - Versions diffs - 1.0.0__py3-none-any.whl - Mend

mcp-code-indexer 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

mcp_code_indexer/__init__.py +16 -0
mcp_code_indexer/database/__init__.py +1 -0
mcp_code_indexer/database/database.py +480 -0
mcp_code_indexer/database/models.py +123 -0
mcp_code_indexer/error_handler.py +365 -0
mcp_code_indexer/file_scanner.py +375 -0
mcp_code_indexer/logging_config.py +183 -0
mcp_code_indexer/main.py +129 -0
mcp_code_indexer/merge_handler.py +386 -0
mcp_code_indexer/middleware/__init__.py +7 -0
mcp_code_indexer/middleware/error_middleware.py +286 -0
mcp_code_indexer/server/__init__.py +1 -0
mcp_code_indexer/server/mcp_server.py +699 -0
mcp_code_indexer/tiktoken_cache/9b5ad71b2ce5302211f9c61530b329a4922fc6a4 +100256 -0
mcp_code_indexer/token_counter.py +243 -0
mcp_code_indexer/tools/__init__.py +1 -0
mcp_code_indexer-1.0.0.dist-info/METADATA +364 -0
mcp_code_indexer-1.0.0.dist-info/RECORD +22 -0
mcp_code_indexer-1.0.0.dist-info/WHEEL +5 -0
mcp_code_indexer-1.0.0.dist-info/entry_points.txt +2 -0
mcp_code_indexer-1.0.0.dist-info/licenses/LICENSE +21 -0
mcp_code_indexer-1.0.0.dist-info/top_level.txt +1 -0

mcp_code_indexer/file_scanner.py ADDED Viewed

@@ -0,0 +1,375 @@
+"""
+File discovery and gitignore integration for the MCP Code Indexer.
+This module provides functionality to scan project directories for files
+while respecting .gitignore patterns and common ignore patterns. It enables
+efficient discovery of files that need description tracking.
+"""
+import logging
+from pathlib import Path
+from typing import List, Set, Optional, Generator
+import fnmatch
+try:
+    from gitignore_parser import parse_gitignore
+except ImportError:
+    parse_gitignore = None
+logger = logging.getLogger(__name__)
+# Default patterns to ignore even without .gitignore
+DEFAULT_IGNORE_PATTERNS = [
+    # Version control
+    '.git/',
+    '.svn/',
+    '.hg/',
+    # Dependencies and packages
+    'node_modules/',
+    'venv/',
+    '.venv/',
+    'env/',
+    '.env/',
+    '__pycache__/',
+    '*.pyc',
+    '*.pyo',
+    '*.pyd',
+    '.Python',
+    # Build artifacts
+    'build/',
+    'dist/',
+    'target/',
+    'out/',
+    'bin/',
+    'obj/',
+    '*.o',
+    '*.so',
+    '*.dylib',
+    '*.dll',
+    '*.exe',
+    # IDE and editor files
+    '.vscode/',
+    '.idea/',
+    '.vs/',
+    '*.swp',
+    '*.swo',
+    '*~',
+    '.DS_Store',
+    'Thumbs.db',
+    # Testing and coverage
+    'coverage/',
+    'htmlcov/',
+    '.pytest_cache/',
+    '.coverage',
+    '*.coverage',
+    # Documentation builds
+    '_build/',
+    'docs/_build/',
+    'site/',
+    # Logs and temporary files
+    '*.log',
+    '*.tmp',
+    '*.temp',
+    '*.cache',
+    # Package files
+    '*.tar.gz',
+    '*.zip',
+    '*.rar',
+    '*.7z',
+    # Lock files
+    'package-lock.json',
+    'yarn.lock',
+    'Pipfile.lock',
+    'poetry.lock',
+]
+# File extensions commonly ignored for code indexing
+IGNORED_EXTENSIONS = {
+    # Binary files
+    '.exe', '.dll', '.so', '.dylib', '.bin', '.o', '.obj',
+    # Images
+    '.png', '.jpg', '.jpeg', '.gif', '.bmp', '.ico', '.svg',
+    # Documents
+    '.pdf', '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx',
+    # Media
+    '.mp3', '.mp4', '.avi', '.mov', '.wmv', '.flv',
+    # Archives
+    '.zip', '.tar', '.gz', '.rar', '.7z',
+    # Fonts
+    '.ttf', '.otf', '.woff', '.woff2', '.eot',
+}
+class FileScanner:
+    """
+    Handles file discovery with gitignore and pattern-based filtering.
+    Provides methods to scan directories while respecting .gitignore files
+    and default ignore patterns to identify files suitable for description tracking.
+    """
+    def __init__(self, project_root: Path):
+        """
+        Initialize file scanner for a project.
+        Args:
+            project_root: Root directory of the project to scan
+        """
+        self.project_root = Path(project_root).resolve()
+        self._gitignore_cache: dict = {}
+        self._load_gitignore_patterns()
+    def _load_gitignore_patterns(self) -> None:
+        """Load and cache gitignore patterns from the project."""
+        self._gitignore_cache.clear()
+        if parse_gitignore is None:
+            logger.warning("gitignore_parser not available, using default patterns only")
+            return
+        # Look for .gitignore files in the project hierarchy
+        current_path = self.project_root
+        while current_path != current_path.parent:
+            gitignore_path = current_path / '.gitignore'
+            if gitignore_path.exists():
+                try:
+                    gitignore_func = parse_gitignore(gitignore_path)
+                    self._gitignore_cache[str(current_path)] = gitignore_func
+                    logger.debug(f"Loaded .gitignore from {gitignore_path}")
+                except Exception as e:
+                    logger.warning(f"Failed to parse {gitignore_path}: {e}")
+            current_path = current_path.parent
+    def _is_ignored_by_gitignore(self, file_path: Path) -> bool:
+        """Check if a file is ignored by any .gitignore file."""
+        if not self._gitignore_cache:
+            return False
+        # Check against all loaded .gitignore patterns
+        for base_path, gitignore_func in self._gitignore_cache.items():
+            try:
+                # gitignore_parser expects absolute paths
+                if gitignore_func(str(file_path.resolve())):
+                    return True
+            except Exception as e:
+                logger.debug(f"Error checking gitignore pattern: {e}")
+                continue
+        return False
+    def _is_ignored_by_default_patterns(self, file_path: Path) -> bool:
+        """Check if a file matches default ignore patterns."""
+        try:
+            resolved_file = file_path.resolve()
+            resolved_root = self.project_root.resolve()
+            rel_path = resolved_file.relative_to(resolved_root)
+            rel_path_str = str(rel_path)
+        except ValueError:
+            return True
+        for pattern in DEFAULT_IGNORE_PATTERNS:
+            # Handle directory patterns
+            if pattern.endswith('/'):
+                pattern_no_slash = pattern.rstrip('/')
+                # Check if any parent directory matches
+                for parent in rel_path.parents:
+                    if fnmatch.fnmatch(parent.name, pattern_no_slash):
+                        return True
+                # Check the file's parent directory
+                if fnmatch.fnmatch(rel_path.parent.name, pattern_no_slash):
+                    return True
+            else:
+                # Handle file patterns
+                if fnmatch.fnmatch(rel_path_str, pattern):
+                    return True
+                if fnmatch.fnmatch(file_path.name, pattern):
+                    return True
+        return False
+    def _is_ignored_by_extension(self, file_path: Path) -> bool:
+        """Check if a file has an ignored extension."""
+        return file_path.suffix.lower() in IGNORED_EXTENSIONS
+    def should_ignore_file(self, file_path: Path) -> bool:
+        """
+        Determine if a file should be ignored.
+        Args:
+            file_path: Path to the file to check
+        Returns:
+            True if the file should be ignored
+        """
+        # Check if it's a file (not directory)
+        if not file_path.is_file():
+            return True
+        # Check file extension
+        if self._is_ignored_by_extension(file_path):
+            return True
+        # Check default patterns
+        if self._is_ignored_by_default_patterns(file_path):
+            return True
+        # Check gitignore patterns
+        if self._is_ignored_by_gitignore(file_path):
+            return True
+        return False
+    def scan_directory(self, max_files: Optional[int] = None) -> List[Path]:
+        """
+        Scan the project directory for trackable files.
+        Args:
+            max_files: Maximum number of files to return (None for no limit)
+        Returns:
+            List of file paths that should be tracked
+        """
+        files = []
+        try:
+            for file_path in self._walk_directory():
+                if not self.should_ignore_file(file_path):
+                    files.append(file_path)
+                    if max_files and len(files) >= max_files:
+                        logger.info(f"Reached max_files limit of {max_files}")
+                        break
+        except Exception as e:
+            logger.error(f"Error scanning directory {self.project_root}: {e}")
+        # Sort files for consistent ordering
+        files.sort()
+        logger.info(f"Found {len(files)} trackable files in {self.project_root}")
+        return files
+    def _walk_directory(self) -> Generator[Path, None, None]:
+        """Walk through all files in the project directory."""
+        try:
+            for item in self.project_root.rglob('*'):
+                if item.is_file():
+                    yield item
+        except PermissionError as e:
+            logger.warning(f"Permission denied accessing {e.filename}")
+        except Exception as e:
+            logger.error(f"Error walking directory: {e}")
+    def get_relative_path(self, file_path: Path) -> str:
+        """
+        Get relative path from project root.
+        Args:
+            file_path: Absolute path to file
+        Returns:
+            Relative path string from project root
+        """
+        try:
+            # Resolve both paths to handle symlinks and .. properly
+            resolved_file = file_path.resolve()
+            resolved_root = self.project_root.resolve()
+            return str(resolved_file.relative_to(resolved_root))
+        except ValueError:
+            # File is outside project root, return absolute path
+            return str(file_path)
+    def find_missing_files(self, existing_paths: Set[str]) -> List[Path]:
+        """
+        Find files that exist on disk but aren't in the existing paths set.
+        Args:
+            existing_paths: Set of relative file paths that already have descriptions
+        Returns:
+            List of file paths that are missing descriptions
+        """
+        all_files = self.scan_directory()
+        missing_files = []
+        for file_path in all_files:
+            rel_path = self.get_relative_path(file_path)
+            if rel_path not in existing_paths:
+                missing_files.append(file_path)
+        logger.info(f"Found {len(missing_files)} files missing descriptions")
+        return missing_files
+    def is_valid_project_directory(self) -> bool:
+        """
+        Check if the project root is a valid directory for scanning.
+        Returns:
+            True if the directory exists and is accessible
+        """
+        try:
+            return (
+                self.project_root.exists() and
+                self.project_root.is_dir() and
+                self.project_root.stat().st_mode & 0o444  # Readable
+            )
+        except (OSError, PermissionError):
+            return False
+    def get_project_stats(self) -> dict:
+        """
+        Get statistics about the project directory.
+        Returns:
+            Dictionary with project statistics
+        """
+        stats = {
+            'total_files': 0,
+            'trackable_files': 0,
+            'ignored_files': 0,
+            'largest_file_size': 0,
+            'file_extensions': {},
+        }
+        try:
+            for file_path in self._walk_directory():
+                stats['total_files'] += 1
+                # Track file size
+                try:
+                    file_size = file_path.stat().st_size
+                    stats['largest_file_size'] = max(stats['largest_file_size'], file_size)
+                except OSError:
+                    pass
+                # Track extensions
+                ext = file_path.suffix.lower()
+                stats['file_extensions'][ext] = stats['file_extensions'].get(ext, 0) + 1
+                # Check if trackable
+                if self.should_ignore_file(file_path):
+                    stats['ignored_files'] += 1
+                else:
+                    stats['trackable_files'] += 1
+        except Exception as e:
+            logger.error(f"Error getting project stats: {e}")
+        return stats

mcp_code_indexer/logging_config.py ADDED Viewed

@@ -0,0 +1,183 @@
+"""
+Logging configuration for the MCP Code Indexer.
+This module provides centralized logging setup with structured JSON output,
+proper async handling, and file rotation for production use.
+"""
+import logging
+import logging.handlers
+import sys
+from pathlib import Path
+from typing import Optional
+from .error_handler import StructuredFormatter
+def setup_logging(
+    log_level: str = "INFO",
+    log_file: Optional[Path] = None,
+    enable_file_logging: bool = False,
+    max_bytes: int = 10 * 1024 * 1024,  # 10MB
+    backup_count: int = 5
+) -> logging.Logger:
+    """
+    Set up comprehensive logging configuration.
+    Args:
+        log_level: Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
+        log_file: Path to log file (optional)
+        enable_file_logging: Whether to enable file logging
+        max_bytes: Maximum size of log file before rotation
+        backup_count: Number of backup files to keep
+    Returns:
+        Configured root logger
+    """
+    # Get root logger
+    root_logger = logging.getLogger()
+    root_logger.setLevel(getattr(logging, log_level.upper()))
+    # Clear existing handlers
+    root_logger.handlers.clear()
+    # Console handler (stderr to avoid interfering with MCP stdout)
+    console_handler = logging.StreamHandler(sys.stderr)
+    console_handler.setLevel(getattr(logging, log_level.upper()))
+    # Use structured formatter for all handlers
+    structured_formatter = StructuredFormatter()
+    console_handler.setFormatter(structured_formatter)
+    root_logger.addHandler(console_handler)
+    # File handler (optional)
+    if enable_file_logging and log_file:
+        try:
+            # Ensure log directory exists
+            log_file.parent.mkdir(parents=True, exist_ok=True)
+            # Rotating file handler
+            file_handler = logging.handlers.RotatingFileHandler(
+                log_file,
+                maxBytes=max_bytes,
+                backupCount=backup_count,
+                encoding='utf-8'
+            )
+            file_handler.setLevel(logging.DEBUG)  # File gets all levels
+            file_handler.setFormatter(structured_formatter)
+            root_logger.addHandler(file_handler)
+        except (OSError, PermissionError) as e:
+            # Log to console if file logging fails
+            root_logger.warning(f"Failed to set up file logging: {e}")
+    # Configure specific loggers
+    # Quiet down noisy libraries
+    logging.getLogger("aiosqlite").setLevel(logging.WARNING)
+    logging.getLogger("tiktoken").setLevel(logging.WARNING)
+    # MCP specific loggers
+    mcp_logger = logging.getLogger("mcp")
+    mcp_logger.setLevel(logging.INFO)
+    # Database logger
+    db_logger = logging.getLogger("src.database")
+    db_logger.setLevel(logging.INFO)
+    # Server logger
+    server_logger = logging.getLogger("src.server")
+    server_logger.setLevel(logging.INFO)
+    return root_logger
+def get_logger(name: str) -> logging.Logger:
+    """
+    Get a logger with the specified name.
+    Args:
+        name: Logger name (usually __name__)
+    Returns:
+        Logger instance
+    """
+    return logging.getLogger(name)
+def log_performance_metrics(
+    logger: logging.Logger,
+    operation: str,
+    duration: float,
+    **metrics
+) -> None:
+    """
+    Log performance metrics in structured format.
+    Args:
+        logger: Logger instance
+        operation: Name of the operation
+        duration: Duration in seconds
+        **metrics: Additional metrics to log
+    """
+    perf_data = {
+        "operation": operation,
+        "duration_seconds": duration,
+        "metrics": metrics
+    }
+    logger.info(
+        f"Performance: {operation} completed in {duration:.3f}s",
+        extra={"structured_data": {"performance": perf_data}}
+    )
+def log_tool_usage(
+    logger: logging.Logger,
+    tool_name: str,
+    arguments: dict,
+    success: bool,
+    duration: Optional[float] = None,
+    result_size: Optional[int] = None
+) -> None:
+    """
+    Log MCP tool usage for analytics.
+    Args:
+        logger: Logger instance
+        tool_name: Name of the MCP tool
+        arguments: Tool arguments (will be sanitized)
+        success: Whether the operation succeeded
+        duration: Operation duration in seconds
+        result_size: Size of result data
+    """
+    # Sanitize arguments
+    safe_args = {}
+    for key, value in arguments.items():
+        if isinstance(value, str) and len(value) > 50:
+            safe_args[key] = f"{value[:50]}..."
+        else:
+            safe_args[key] = value
+    usage_data = {
+        "tool_name": tool_name,
+        "arguments": safe_args,
+        "success": success
+    }
+    if duration is not None:
+        usage_data["duration_seconds"] = duration
+    if result_size is not None:
+        usage_data["result_size"] = result_size
+    level = logging.INFO if success else logging.WARNING
+    message = f"Tool {tool_name}: {'SUCCESS' if success else 'FAILED'}"
+    logger.log(
+        level,
+        message,
+        extra={"structured_data": {"tool_usage": usage_data}}
+    )

mcp_code_indexer/main.py ADDED Viewed

@@ -0,0 +1,129 @@
+#!/usr/bin/env python3
+"""
+MCP Code Indexer Package Main Module
+Entry point for the mcp-code-indexer package when installed via pip.
+"""
+import argparse
+import asyncio
+import logging
+import sys
+from pathlib import Path
+from . import __version__
+from .logging_config import setup_logging
+from .error_handler import setup_error_handling
+def parse_arguments() -> argparse.Namespace:
+    """Parse command line arguments."""
+    parser = argparse.ArgumentParser(
+        description="MCP Code Index Server - Track file descriptions across codebases",
+        prog="mcp-code-indexer"
+    )
+    parser.add_argument(
+        "--version",
+        action="version",
+        version=f"mcp-code-indexer {__version__}"
+    )
+    parser.add_argument(
+        "--token-limit",
+        type=int,
+        default=32000,
+        help="Maximum tokens before recommending search instead of full overview (default: 32000)"
+    )
+    parser.add_argument(
+        "--db-path",
+        type=str,
+        default="~/.mcp-code-index/tracker.db",
+        help="Path to SQLite database (default: ~/.mcp-code-index/tracker.db)"
+    )
+    parser.add_argument(
+        "--cache-dir",
+        type=str,
+        default="~/.mcp-code-index/cache",
+        help="Directory for caching token counts (default: ~/.mcp-code-index/cache)"
+    )
+    parser.add_argument(
+        "--log-level",
+        type=str,
+        choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
+        default="INFO",
+        help="Logging level (default: INFO)"
+    )
+    return parser.parse_args()
+async def main() -> None:
+    """Main entry point for the MCP server."""
+    args = parse_arguments()
+    # Setup structured logging
+    log_file = Path(args.cache_dir).expanduser() / "server.log" if args.cache_dir else None
+    logger = setup_logging(
+        log_level=args.log_level,
+        log_file=log_file,
+        enable_file_logging=True
+    )
+    # Setup error handling
+    error_handler = setup_error_handling(logger)
+    # Expand user paths
+    db_path = Path(args.db_path).expanduser()
+    cache_dir = Path(args.cache_dir).expanduser()
+    # Create directories if they don't exist
+    db_path.parent.mkdir(parents=True, exist_ok=True)
+    cache_dir.mkdir(parents=True, exist_ok=True)
+    # Log startup information
+    logger.info("Starting MCP Code Index Server", extra={
+        "structured_data": {
+            "startup": {
+                "version": __version__,
+                "token_limit": args.token_limit,
+                "db_path": str(db_path),
+                "cache_dir": str(cache_dir),
+                "log_level": args.log_level
+            }
+        }
+    })
+    try:
+        # Import and run the MCP server
+        from .server.mcp_server import MCPCodeIndexServer
+        server = MCPCodeIndexServer(
+            token_limit=args.token_limit,
+            db_path=db_path,
+            cache_dir=cache_dir
+        )
+        await server.run()
+    except Exception as e:
+        error_handler.log_error(e, context={"phase": "startup"})
+        raise
+def cli_main():
+    """Console script entry point."""
+    try:
+        asyncio.run(main())
+    except KeyboardInterrupt:
+        print("\nServer stopped by user")
+    except Exception as e:
+        print(f"Server failed to start: {e}")
+        sys.exit(1)
+if __name__ == "__main__":
+    cli_main()