PyPI - mcp-code-indexer - Versions diffs - 1.0.0__py3-none-any.whl - Mend

mcp-code-indexer 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

mcp_code_indexer/__init__.py +16 -0
mcp_code_indexer/database/__init__.py +1 -0
mcp_code_indexer/database/database.py +480 -0
mcp_code_indexer/database/models.py +123 -0
mcp_code_indexer/error_handler.py +365 -0
mcp_code_indexer/file_scanner.py +375 -0
mcp_code_indexer/logging_config.py +183 -0
mcp_code_indexer/main.py +129 -0
mcp_code_indexer/merge_handler.py +386 -0
mcp_code_indexer/middleware/__init__.py +7 -0
mcp_code_indexer/middleware/error_middleware.py +286 -0
mcp_code_indexer/server/__init__.py +1 -0
mcp_code_indexer/server/mcp_server.py +699 -0
mcp_code_indexer/tiktoken_cache/9b5ad71b2ce5302211f9c61530b329a4922fc6a4 +100256 -0
mcp_code_indexer/token_counter.py +243 -0
mcp_code_indexer/tools/__init__.py +1 -0
mcp_code_indexer-1.0.0.dist-info/METADATA +364 -0
mcp_code_indexer-1.0.0.dist-info/RECORD +22 -0
mcp_code_indexer-1.0.0.dist-info/WHEEL +5 -0
mcp_code_indexer-1.0.0.dist-info/entry_points.txt +2 -0
mcp_code_indexer-1.0.0.dist-info/licenses/LICENSE +21 -0
mcp_code_indexer-1.0.0.dist-info/top_level.txt +1 -0

mcp_code_indexer/__init__.py ADDED Viewed

@@ -0,0 +1,16 @@
+"""
+MCP Code Indexer - Intelligent codebase navigation for AI agents.
+A production-ready Model Context Protocol (MCP) server that provides
+intelligent codebase navigation through searchable file descriptions,
+token-aware overviews, and advanced merge capabilities.
+"""
+__version__ = "1.0.0"
+__author__ = "MCP Code Indexer Contributors"
+__email__ = ""
+__license__ = "MIT"
+from .server.mcp_server import MCPCodeIndexServer
+__all__ = ["MCPCodeIndexServer", "__version__"]

mcp_code_indexer/database/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """Database models and operations."""

mcp_code_indexer/database/database.py ADDED Viewed

@@ -0,0 +1,480 @@
+"""
+Database operations for the MCP Code Indexer.
+This module provides async database operations using aiosqlite with proper
+connection management, transaction handling, and performance optimizations.
+"""
+import json
+import logging
+import sqlite3
+from contextlib import asynccontextmanager
+from datetime import datetime, timedelta
+from pathlib import Path
+from typing import List, Optional, Dict, Any, Tuple, AsyncIterator
+import aiosqlite
+from mcp_code_indexer.database.models import (
+    Project, FileDescription, MergeConflict, SearchResult,
+    CodebaseSizeInfo
+)
+logger = logging.getLogger(__name__)
+class DatabaseManager:
+    """
+    Manages SQLite database operations with async support.
+    Provides high-level operations for projects, file descriptions, search,
+    and caching with proper transaction management and error handling.
+    """
+    def __init__(self, db_path: Path, pool_size: int = 5):
+        """Initialize database manager with path to SQLite database."""
+        self.db_path = db_path
+        self.pool_size = pool_size
+        self._connection_pool: List[aiosqlite.Connection] = []
+        self._pool_lock = None  # Will be initialized in async context
+    async def initialize(self) -> None:
+        """Initialize database schema and configuration."""
+        import asyncio
+        # Initialize pool lock
+        self._pool_lock = asyncio.Lock()
+        # Ensure database directory exists
+        self.db_path.parent.mkdir(parents=True, exist_ok=True)
+        # Apply migrations in order
+        migrations_dir = Path(__file__).parent.parent.parent / "migrations"
+        migration_files = sorted(migrations_dir.glob("*.sql"))
+        async with aiosqlite.connect(self.db_path) as db:
+            # Enable row factory for easier data access
+            db.row_factory = aiosqlite.Row
+            # Apply each migration
+            for migration_file in migration_files:
+                logger.info(f"Applying migration: {migration_file.name}")
+                with open(migration_file, 'r') as f:
+                    migration_sql = f.read()
+                await db.executescript(migration_sql)
+                await db.commit()
+        logger.info(f"Database initialized at {self.db_path} with {len(migration_files)} migrations")
+    @asynccontextmanager
+    async def get_connection(self) -> AsyncIterator[aiosqlite.Connection]:
+        """Get a database connection from pool or create new one."""
+        conn = None
+        # Try to get from pool
+        if self._pool_lock:
+            async with self._pool_lock:
+                if self._connection_pool:
+                    conn = self._connection_pool.pop()
+        # Create new connection if none available
+        if conn is None:
+            conn = await aiosqlite.connect(self.db_path)
+            conn.row_factory = aiosqlite.Row
+            # Apply performance settings to new connections
+            await conn.execute("PRAGMA busy_timeout = 30000")  # 30 second timeout
+            await conn.execute("PRAGMA synchronous = NORMAL")   # Balanced durability/performance
+            await conn.execute("PRAGMA cache_size = -64000")    # 64MB cache
+            await conn.execute("PRAGMA temp_store = MEMORY")    # Use memory for temp tables
+        try:
+            yield conn
+        finally:
+            # Return to pool if pool not full, otherwise close
+            returned_to_pool = False
+            if self._pool_lock and len(self._connection_pool) < self.pool_size:
+                async with self._pool_lock:
+                    if len(self._connection_pool) < self.pool_size:
+                        self._connection_pool.append(conn)
+                        returned_to_pool = True
+            if not returned_to_pool:
+                await conn.close()
+    async def close_pool(self) -> None:
+        """Close all connections in the pool."""
+        if self._pool_lock:
+            async with self._pool_lock:
+                for conn in self._connection_pool:
+                    await conn.close()
+                self._connection_pool.clear()
+    # Project operations
+    async def create_project(self, project: Project) -> None:
+        """Create a new project record."""
+        async with self.get_connection() as db:
+            await db.execute(
+                """
+                INSERT INTO projects (id, name, remote_origin, upstream_origin, aliases, created, last_accessed)
+                VALUES (?, ?, ?, ?, ?, ?, ?)
+                """,
+                (
+                    project.id,
+                    project.name,
+                    project.remote_origin,
+                    project.upstream_origin,
+                    json.dumps(project.aliases),
+                    project.created,
+                    project.last_accessed
+                )
+            )
+            await db.commit()
+            logger.debug(f"Created project: {project.id}")
+    async def get_project(self, project_id: str) -> Optional[Project]:
+        """Get project by ID."""
+        async with self.get_connection() as db:
+            cursor = await db.execute(
+                "SELECT * FROM projects WHERE id = ?",
+                (project_id,)
+            )
+            row = await cursor.fetchone()
+            if row:
+                return Project(
+                    id=row['id'],
+                    name=row['name'],
+                    remote_origin=row['remote_origin'],
+                    upstream_origin=row['upstream_origin'],
+                    aliases=json.loads(row['aliases']),
+                    created=datetime.fromisoformat(row['created']),
+                    last_accessed=datetime.fromisoformat(row['last_accessed'])
+                )
+            return None
+    async def find_project_by_origin(self, origin_url: str) -> Optional[Project]:
+        """Find project by remote or upstream origin URL."""
+        async with self.get_connection() as db:
+            cursor = await db.execute(
+                """
+                SELECT * FROM projects
+                WHERE remote_origin = ? OR upstream_origin = ?
+                LIMIT 1
+                """,
+                (origin_url, origin_url)
+            )
+            row = await cursor.fetchone()
+            if row:
+                return Project(
+                    id=row['id'],
+                    name=row['name'],
+                    remote_origin=row['remote_origin'],
+                    upstream_origin=row['upstream_origin'],
+                    aliases=json.loads(row['aliases']),
+                    created=datetime.fromisoformat(row['created']),
+                    last_accessed=datetime.fromisoformat(row['last_accessed'])
+                )
+            return None
+    async def update_project_access_time(self, project_id: str) -> None:
+        """Update the last accessed time for a project."""
+        async with self.get_connection() as db:
+            await db.execute(
+                "UPDATE projects SET last_accessed = ? WHERE id = ?",
+                (datetime.utcnow(), project_id)
+            )
+            await db.commit()
+    # File description operations
+    async def create_file_description(self, file_desc: FileDescription) -> None:
+        """Create or update a file description."""
+        async with self.get_connection() as db:
+            await db.execute(
+                """
+                INSERT OR REPLACE INTO file_descriptions
+                (project_id, branch, file_path, description, file_hash, last_modified, version, source_project_id)
+                VALUES (?, ?, ?, ?, ?, ?, ?, ?)
+                """,
+                (
+                    file_desc.project_id,
+                    file_desc.branch,
+                    file_desc.file_path,
+                    file_desc.description,
+                    file_desc.file_hash,
+                    file_desc.last_modified,
+                    file_desc.version,
+                    file_desc.source_project_id
+                )
+            )
+            await db.commit()
+            logger.debug(f"Saved file description: {file_desc.file_path}")
+    async def get_file_description(
+        self,
+        project_id: str,
+        branch: str,
+        file_path: str
+    ) -> Optional[FileDescription]:
+        """Get file description by project, branch, and path."""
+        async with self.get_connection() as db:
+            cursor = await db.execute(
+                """
+                SELECT * FROM file_descriptions
+                WHERE project_id = ? AND branch = ? AND file_path = ?
+                """,
+                (project_id, branch, file_path)
+            )
+            row = await cursor.fetchone()
+            if row:
+                return FileDescription(
+                    project_id=row['project_id'],
+                    branch=row['branch'],
+                    file_path=row['file_path'],
+                    description=row['description'],
+                    file_hash=row['file_hash'],
+                    last_modified=datetime.fromisoformat(row['last_modified']),
+                    version=row['version'],
+                    source_project_id=row['source_project_id']
+                )
+            return None
+    async def get_all_file_descriptions(
+        self,
+        project_id: str,
+        branch: str
+    ) -> List[FileDescription]:
+        """Get all file descriptions for a project and branch."""
+        async with self.get_connection() as db:
+            cursor = await db.execute(
+                """
+                SELECT * FROM file_descriptions
+                WHERE project_id = ? AND branch = ?
+                ORDER BY file_path
+                """,
+                (project_id, branch)
+            )
+            rows = await cursor.fetchall()
+            return [
+                FileDescription(
+                    project_id=row['project_id'],
+                    branch=row['branch'],
+                    file_path=row['file_path'],
+                    description=row['description'],
+                    file_hash=row['file_hash'],
+                    last_modified=datetime.fromisoformat(row['last_modified']),
+                    version=row['version'],
+                    source_project_id=row['source_project_id']
+                )
+                for row in rows
+            ]
+    async def batch_create_file_descriptions(self, file_descriptions: List[FileDescription]) -> None:
+        """Batch create multiple file descriptions efficiently."""
+        if not file_descriptions:
+            return
+        async with self.get_connection() as db:
+            data = [
+                (
+                    fd.project_id,
+                    fd.branch,
+                    fd.file_path,
+                    fd.description,
+                    fd.file_hash,
+                    fd.last_modified,
+                    fd.version,
+                    fd.source_project_id
+                )
+                for fd in file_descriptions
+            ]
+            await db.executemany(
+                """
+                INSERT OR REPLACE INTO file_descriptions
+                (project_id, branch, file_path, description, file_hash, last_modified, version, source_project_id)
+                VALUES (?, ?, ?, ?, ?, ?, ?, ?)
+                """,
+                data
+            )
+            await db.commit()
+            logger.debug(f"Batch created {len(file_descriptions)} file descriptions")
+    # Search operations
+    async def search_file_descriptions(
+        self,
+        project_id: str,
+        branch: str,
+        query: str,
+        max_results: int = 20
+    ) -> List[SearchResult]:
+        """Search file descriptions using FTS5."""
+        async with self.get_connection() as db:
+            cursor = await db.execute(
+                """
+                SELECT
+                    fd.project_id,
+                    fd.branch,
+                    fd.file_path,
+                    fd.description,
+                    fts.rank
+                FROM file_descriptions_fts fts
+                JOIN file_descriptions fd ON fd.rowid = fts.rowid
+                WHERE fts MATCH ?
+                  AND fd.project_id = ?
+                  AND fd.branch = ?
+                ORDER BY fts.rank
+                LIMIT ?
+                """,
+                (query, project_id, branch, max_results)
+            )
+            rows = await cursor.fetchall()
+            return [
+                SearchResult(
+                    project_id=row['project_id'],
+                    branch=row['branch'],
+                    file_path=row['file_path'],
+                    description=row['description'],
+                    relevance_score=row['rank']
+                )
+                for row in rows
+            ]
+    # Token cache operations
+    async def get_cached_token_count(self, cache_key: str) -> Optional[int]:
+        """Get cached token count if not expired."""
+        async with self.get_connection() as db:
+            cursor = await db.execute(
+                """
+                SELECT token_count FROM token_cache
+                WHERE cache_key = ? AND (expires IS NULL OR expires > ?)
+                """,
+                (cache_key, datetime.utcnow())
+            )
+            row = await cursor.fetchone()
+            return row['token_count'] if row else None
+    async def cache_token_count(
+        self,
+        cache_key: str,
+        token_count: int,
+        ttl_hours: int = 24
+    ) -> None:
+        """Cache token count with TTL."""
+        expires = datetime.utcnow() + timedelta(hours=ttl_hours)
+        async with self.get_connection() as db:
+            await db.execute(
+                """
+                INSERT OR REPLACE INTO token_cache (cache_key, token_count, expires)
+                VALUES (?, ?, ?)
+                """,
+                (cache_key, token_count, expires)
+            )
+            await db.commit()
+    async def cleanup_expired_cache(self) -> None:
+        """Remove expired cache entries."""
+        async with self.get_connection() as db:
+            await db.execute(
+                "DELETE FROM token_cache WHERE expires < ?",
+                (datetime.utcnow(),)
+            )
+            await db.commit()
+    # Utility operations
+    async def get_file_count(self, project_id: str, branch: str) -> int:
+        """Get count of files in a project branch."""
+        async with self.get_connection() as db:
+            cursor = await db.execute(
+                "SELECT COUNT(*) as count FROM file_descriptions WHERE project_id = ? AND branch = ?",
+                (project_id, branch)
+            )
+            row = await cursor.fetchone()
+            return row['count'] if row else 0
+    # Upstream inheritance operations
+    async def inherit_from_upstream(self, project: Project, target_branch: str = "main") -> int:
+        """
+        Inherit file descriptions from upstream repository.
+        Args:
+            project: Target project that should inherit descriptions
+            target_branch: Branch to inherit descriptions into
+        Returns:
+            Number of descriptions inherited
+        """
+        if not project.upstream_origin:
+            return 0
+        # Find upstream project
+        upstream_project = await self.find_project_by_origin(project.upstream_origin)
+        if not upstream_project:
+            logger.debug(f"No upstream project found for {project.upstream_origin}")
+            return 0
+        # Get upstream descriptions
+        upstream_descriptions = await self.get_all_file_descriptions(
+            upstream_project.id, target_branch
+        )
+        if not upstream_descriptions:
+            logger.debug(f"No upstream descriptions found in branch {target_branch}")
+            return 0
+        # Get existing descriptions to avoid overwriting
+        existing_descriptions = await self.get_all_file_descriptions(
+            project.id, target_branch
+        )
+        existing_paths = {desc.file_path for desc in existing_descriptions}
+        # Create new descriptions for files that don't exist locally
+        inherited_descriptions = []
+        for upstream_desc in upstream_descriptions:
+            if upstream_desc.file_path not in existing_paths:
+                new_desc = FileDescription(
+                    project_id=project.id,
+                    branch=target_branch,
+                    file_path=upstream_desc.file_path,
+                    description=upstream_desc.description,
+                    file_hash=None,  # Don't copy hash as local file may differ
+                    last_modified=datetime.utcnow(),
+                    version=1,
+                    source_project_id=upstream_project.id  # Track inheritance source
+                )
+                inherited_descriptions.append(new_desc)
+        if inherited_descriptions:
+            await self.batch_create_file_descriptions(inherited_descriptions)
+            logger.info(f"Inherited {len(inherited_descriptions)} descriptions from upstream")
+        return len(inherited_descriptions)
+    async def check_upstream_inheritance_needed(self, project: Project) -> bool:
+        """
+        Check if a project needs upstream inheritance.
+        Args:
+            project: Project to check
+        Returns:
+            True if project has upstream but no descriptions yet
+        """
+        if not project.upstream_origin:
+            return False
+        # Check if project has any descriptions
+        file_count = await self.get_file_count(project.id, "main")
+        return file_count == 0

mcp_code_indexer/database/models.py ADDED Viewed

@@ -0,0 +1,123 @@
+"""
+Data models for the MCP Code Indexer.
+This module defines Pydantic models for project tracking, file descriptions,
+and merge conflicts. These models provide validation and serialization for
+the database operations.
+"""
+from datetime import datetime
+from typing import List, Optional
+from pydantic import BaseModel, Field
+class Project(BaseModel):
+    """
+    Represents a tracked project/repository.
+    Projects are identified by a combination of git remotes and local paths,
+    allowing tracking across forks, renames, and different local copies.
+    """
+    id: str = Field(..., description="Generated unique identifier")
+    name: str = Field(..., description="User-provided project name")
+    remote_origin: Optional[str] = Field(None, description="Git remote origin URL")
+    upstream_origin: Optional[str] = Field(None, description="Upstream repository URL for forks")
+    aliases: List[str] = Field(default_factory=list, description="Alternative identifiers")
+    created: datetime = Field(default_factory=datetime.utcnow, description="Creation timestamp")
+    last_accessed: datetime = Field(default_factory=datetime.utcnow, description="Last access timestamp")
+class FileDescription(BaseModel):
+    """
+    Represents a file description within a project branch.
+    Stores detailed summaries of file contents including purpose, components,
+    and relationships to enable efficient codebase navigation.
+    """
+    project_id: str = Field(..., description="Reference to project")
+    branch: str = Field(..., description="Git branch name")
+    file_path: str = Field(..., description="Relative path from project root")
+    description: str = Field(..., description="Detailed content description")
+    file_hash: Optional[str] = Field(None, description="SHA-256 of file contents")
+    last_modified: datetime = Field(default_factory=datetime.utcnow, description="Last update timestamp")
+    version: int = Field(default=1, description="For optimistic concurrency control")
+    source_project_id: Optional[str] = Field(None, description="Source project if copied from upstream")
+class MergeConflict(BaseModel):
+    """
+    Represents a merge conflict between file descriptions.
+    Used during branch merging when the same file has different descriptions
+    in source and target branches.
+    """
+    id: Optional[int] = Field(None, description="Database ID")
+    project_id: str = Field(..., description="Project identifier")
+    file_path: str = Field(..., description="Path to conflicted file")
+    source_branch: str = Field(..., description="Branch being merged from")
+    target_branch: str = Field(..., description="Branch being merged into")
+    source_description: str = Field(..., description="Description from source branch")
+    target_description: str = Field(..., description="Description from target branch")
+    resolution: Optional[str] = Field(None, description="AI-provided resolution")
+    created: datetime = Field(default_factory=datetime.utcnow, description="Creation timestamp")
+class CodebaseOverview(BaseModel):
+    """
+    Represents a complete codebase structure with file descriptions.
+    Provides hierarchical view of project files with token count information
+    to help determine whether to use full overview or search-based approach.
+    """
+    project_name: str = Field(..., description="Project name")
+    branch: str = Field(..., description="Git branch")
+    total_files: int = Field(..., description="Total number of tracked files")
+    total_tokens: int = Field(..., description="Total token count for all descriptions")
+    is_large: bool = Field(..., description="True if exceeds configured token limit")
+    token_limit: int = Field(..., description="Current token limit setting")
+    structure: 'FolderNode' = Field(..., description="Hierarchical folder structure")
+class FolderNode(BaseModel):
+    """
+    Represents a folder in the codebase hierarchy.
+    """
+    name: str = Field(..., description="Folder name")
+    path: str = Field(..., description="Full path from project root")
+    files: List['FileNode'] = Field(default_factory=list, description="Files in this folder")
+    folders: List['FolderNode'] = Field(default_factory=list, description="Subfolders")
+class FileNode(BaseModel):
+    """
+    Represents a file in the codebase hierarchy.
+    """
+    name: str = Field(..., description="File name")
+    path: str = Field(..., description="Full path from project root")
+    description: str = Field(..., description="File description")
+class SearchResult(BaseModel):
+    """
+    Represents a search result with relevance scoring.
+    """
+    file_path: str = Field(..., description="Path to the matching file")
+    description: str = Field(..., description="File description")
+    relevance_score: float = Field(..., description="Search relevance score")
+    project_id: str = Field(..., description="Project identifier")
+    branch: str = Field(..., description="Git branch")
+class CodebaseSizeInfo(BaseModel):
+    """
+    Information about codebase size and token usage.
+    """
+    total_tokens: int = Field(..., description="Total token count")
+    is_large: bool = Field(..., description="Whether codebase exceeds token limit")
+    recommendation: str = Field(..., description="Recommended approach (use_search or use_overview)")
+    token_limit: int = Field(..., description="Configured token limit")
+# Enable forward references for recursive models
+FolderNode.model_rebuild()
+CodebaseOverview.model_rebuild()