PyPI - mcp-code-indexer - Versions diffs - 2.4.0__py3-none-any.whl → 3.0.2__py3-none-any.whl - Mend

mcp-code-indexer 2.4.0py3-none-any.whl → 3.0.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

mcp_code_indexer/database/database.py CHANGED Viewed

@@ -31,6 +31,7 @@ from mcp_code_indexer.database.connection_health import (
     ConnectionHealthMonitor, DatabaseMetricsCollector
 )
 from mcp_code_indexer.query_preprocessor import preprocess_search_query
+from mcp_code_indexer.cleanup_manager import CleanupManager
 logger = logging.getLogger(__name__)
@@ -79,6 +80,9 @@ class DatabaseManager:
         self._health_monitor = None  # Initialized in async context
         self._metrics_collector = DatabaseMetricsCollector()
+        # Cleanup manager for retention policies
+        self._cleanup_manager = None  # Initialized in async context
     async def initialize(self) -> None:
         """Initialize database schema and configuration."""
         import asyncio
@@ -97,13 +101,19 @@ class DatabaseManager:
         )
         await self._health_monitor.start_monitoring()
+        # Initialize cleanup manager
+        self._cleanup_manager = CleanupManager(self, retention_months=6)
         # Ensure database directory exists
         self.db_path.parent.mkdir(parents=True, exist_ok=True)
         # Database initialization now uses the modern retry executor directly
         # Apply migrations in order
-        migrations_dir = Path(__file__).parent.parent.parent.parent / "migrations"
+        # Migrations are now bundled with the package
+        migrations_dir = Path(__file__).parent.parent / "migrations"
+        if not migrations_dir.exists():
+            raise RuntimeError(f"Could not find migrations directory at {migrations_dir}")
         migration_files = sorted(migrations_dir.glob("*.sql"))
         async with aiosqlite.connect(self.db_path) as db:
@@ -113,16 +123,48 @@ class DatabaseManager:
             # Configure WAL mode and optimizations for concurrent access
             await self._configure_database_optimizations(db, include_wal_mode=self.enable_wal_mode)
-            # Apply each migration
+            # Create migrations tracking table
+            await db.execute('''
+                CREATE TABLE IF NOT EXISTS migrations (
+                    id INTEGER PRIMARY KEY AUTOINCREMENT,
+                    filename TEXT UNIQUE NOT NULL,
+                    applied_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+                )
+            ''')
+            await db.commit()
+            # Get list of already applied migrations
+            cursor = await db.execute('SELECT filename FROM migrations')
+            applied_migrations = {row[0] for row in await cursor.fetchall()}
+            # Apply each migration that hasn't been applied yet
             for migration_file in migration_files:
-                logger.info(f"Applying migration: {migration_file.name}")
-                with open(migration_file, 'r') as f:
-                    migration_sql = f.read()
+                migration_name = migration_file.name
+                if migration_name in applied_migrations:
+                    logger.info(f"Skipping already applied migration: {migration_name}")
+                    continue
+                logger.info(f"Applying migration: {migration_name}")
+                try:
+                    migration_sql = migration_file.read_text(encoding='utf-8')
+                except AttributeError:
+                    # Fallback for regular file objects
+                    with open(migration_file, 'r', encoding='utf-8') as f:
+                        migration_sql = f.read()
-                await db.executescript(migration_sql)
-                await db.commit()
+                try:
+                    await db.executescript(migration_sql)
+                    # Record that migration was applied
+                    await db.execute('INSERT INTO migrations (filename) VALUES (?)', (migration_name,))
+                    await db.commit()
+                    logger.info(f"Successfully applied migration: {migration_name}")
+                except Exception as e:
+                    logger.error(f"Failed to apply migration {migration_name}: {e}")
+                    await db.rollback()
+                    raise
-        logger.info(f"Database initialized at {self.db_path} with {len(migration_files)} migrations")
+        logger.info(f"Database initialized at {self.db_path} with {len(migration_files)} total migrations")
     async def _configure_database_optimizations(self, db: aiosqlite.Connection, include_wal_mode: bool = True) -> None:
         """
@@ -703,20 +745,7 @@ class DatabaseManager:
             return projects
-    async def get_branch_file_counts(self, project_id: str) -> Dict[str, int]:
-        """Get file counts per branch for a project."""
-        async with self.get_connection() as db:
-            cursor = await db.execute(
-                """
-                SELECT branch, COUNT(*) as file_count
-                FROM file_descriptions
-                WHERE project_id = ?
-                GROUP BY branch
-                """,
-                (project_id,)
-            )
-            rows = await cursor.fetchall()
-            return {row[0]: row[1] for row in rows}
     # File description operations
@@ -726,18 +755,18 @@ class DatabaseManager:
             await db.execute(
                 """
                 INSERT OR REPLACE INTO file_descriptions
-                (project_id, branch, file_path, description, file_hash, last_modified, version, source_project_id)
+                (project_id, file_path, description, file_hash, last_modified, version, source_project_id, to_be_cleaned)
                 VALUES (?, ?, ?, ?, ?, ?, ?, ?)
                 """,
                 (
                     file_desc.project_id,
-                    file_desc.branch,
                     file_desc.file_path,
                     file_desc.description,
                     file_desc.file_hash,
                     file_desc.last_modified,
                     file_desc.version,
-                    file_desc.source_project_id
+                    file_desc.source_project_id,
+                    file_desc.to_be_cleaned
                 )
             )
             await db.commit()
@@ -746,60 +775,60 @@ class DatabaseManager:
     async def get_file_description(
         self,
         project_id: str,
-        branch: str,
         file_path: str
     ) -> Optional[FileDescription]:
-        """Get file description by project, branch, and path."""
+        """Get file description by project and path."""
         async with self.get_connection() as db:
             cursor = await db.execute(
                 """
                 SELECT * FROM file_descriptions
-                WHERE project_id = ? AND branch = ? AND file_path = ?
+                WHERE project_id = ? AND file_path = ? AND to_be_cleaned IS NULL
                 """,
-                (project_id, branch, file_path)
+                (project_id, file_path)
             )
             row = await cursor.fetchone()
             if row:
                 return FileDescription(
+                    id=row['id'],
                     project_id=row['project_id'],
-                    branch=row['branch'],
                     file_path=row['file_path'],
                     description=row['description'],
                     file_hash=row['file_hash'],
                     last_modified=datetime.fromisoformat(row['last_modified']),
                     version=row['version'],
-                    source_project_id=row['source_project_id']
+                    source_project_id=row['source_project_id'],
+                    to_be_cleaned=row['to_be_cleaned']
                 )
             return None
     async def get_all_file_descriptions(
         self,
-        project_id: str,
-        branch: str
+        project_id: str
     ) -> List[FileDescription]:
-        """Get all file descriptions for a project and branch."""
+        """Get all file descriptions for a project."""
         async with self.get_connection() as db:
             cursor = await db.execute(
                 """
                 SELECT * FROM file_descriptions
-                WHERE project_id = ? AND branch = ?
+                WHERE project_id = ? AND to_be_cleaned IS NULL
                 ORDER BY file_path
                 """,
-                (project_id, branch)
+                (project_id,)
             )
             rows = await cursor.fetchall()
             return [
                 FileDescription(
+                    id=row['id'],
                     project_id=row['project_id'],
-                    branch=row['branch'],
                     file_path=row['file_path'],
                     description=row['description'],
                     file_hash=row['file_hash'],
                     last_modified=datetime.fromisoformat(row['last_modified']),
                     version=row['version'],
-                    source_project_id=row['source_project_id']
+                    source_project_id=row['source_project_id'],
+                    to_be_cleaned=row['to_be_cleaned']
                 )
                 for row in rows
             ]
@@ -813,13 +842,13 @@ class DatabaseManager:
             data = [
                 (
                     fd.project_id,
-                    fd.branch,
                     fd.file_path,
                     fd.description,
                     fd.file_hash,
                     fd.last_modified,
                     fd.version,
-                    fd.source_project_id
+                    fd.source_project_id,
+                    fd.to_be_cleaned
                 )
                 for fd in file_descriptions
             ]
@@ -827,7 +856,7 @@ class DatabaseManager:
             await conn.executemany(
                 """
                 INSERT OR REPLACE INTO file_descriptions
-                (project_id, branch, file_path, description, file_hash, last_modified, version, source_project_id)
+                (project_id, file_path, description, file_hash, last_modified, version, source_project_id, to_be_cleaned)
                 VALUES (?, ?, ?, ?, ?, ?, ?, ?)
                 """,
                 data
@@ -845,7 +874,6 @@ class DatabaseManager:
     async def search_file_descriptions(
         self,
         project_id: str,
-        branch: str,
         query: str,
         max_results: int = 20
     ) -> List[SearchResult]:
@@ -864,26 +892,24 @@ class DatabaseManager:
                 """
                 SELECT
                     fd.project_id,
-                    fd.branch,
                     fd.file_path,
                     fd.description,
                     bm25(file_descriptions_fts) as rank
                 FROM file_descriptions_fts
-                JOIN file_descriptions fd ON fd.rowid = file_descriptions_fts.rowid
+                JOIN file_descriptions fd ON fd.id = file_descriptions_fts.rowid
                 WHERE file_descriptions_fts MATCH ?
                   AND fd.project_id = ?
-                  AND fd.branch = ?
+                  AND fd.to_be_cleaned IS NULL
                 ORDER BY bm25(file_descriptions_fts)
                 LIMIT ?
                 """,
-                (preprocessed_query, project_id, branch, max_results)
+                (preprocessed_query, project_id, max_results)
             )
             rows = await cursor.fetchall()
             return [
                 SearchResult(
                     project_id=row['project_id'],
-                    branch=row['branch'],
                     file_path=row['file_path'],
                     description=row['description'],
                     relevance_score=row['rank']
@@ -936,12 +962,12 @@ class DatabaseManager:
     # Utility operations
-    async def get_file_count(self, project_id: str, branch: str) -> int:
-        """Get count of files in a project branch."""
+    async def get_file_count(self, project_id: str) -> int:
+        """Get count of files in a project."""
         async with self.get_connection() as db:
             cursor = await db.execute(
-                "SELECT COUNT(*) as count FROM file_descriptions WHERE project_id = ? AND branch = ?",
-                (project_id, branch)
+                "SELECT COUNT(*) as count FROM file_descriptions WHERE project_id = ? AND to_be_cleaned IS NULL",
+                (project_id,)
             )
             row = await cursor.fetchone()
             return row['count'] if row else 0
@@ -1030,12 +1056,11 @@ class DatabaseManager:
             await db.execute(
                 """
                 INSERT OR REPLACE INTO project_overviews
-                (project_id, branch, overview, last_modified, total_files, total_tokens)
-                VALUES (?, ?, ?, ?, ?, ?)
+                (project_id, overview, last_modified, total_files, total_tokens)
+                VALUES (?, ?, ?, ?, ?)
                 """,
                 (
                     overview.project_id,
-                    overview.branch,
                     overview.overview,
                     overview.last_modified,
                     overview.total_files,
@@ -1043,21 +1068,20 @@ class DatabaseManager:
                 )
             )
             await db.commit()
-            logger.debug(f"Created/updated overview for project {overview.project_id}, branch {overview.branch}")
+            logger.debug(f"Created/updated overview for project {overview.project_id}")
-    async def get_project_overview(self, project_id: str, branch: str) -> Optional[ProjectOverview]:
-        """Get project overview by ID and branch."""
+    async def get_project_overview(self, project_id: str) -> Optional[ProjectOverview]:
+        """Get project overview by ID."""
         async with self.get_connection() as db:
             cursor = await db.execute(
-                "SELECT * FROM project_overviews WHERE project_id = ? AND branch = ?",
-                (project_id, branch)
+                "SELECT * FROM project_overviews WHERE project_id = ?",
+                (project_id,)
             )
             row = await cursor.fetchone()
             if row:
                 return ProjectOverview(
                     project_id=row['project_id'],
-                    branch=row['branch'],
                     overview=row['overview'],
                     last_modified=datetime.fromisoformat(row['last_modified']),
                     total_files=row['total_files'],
@@ -1065,25 +1089,24 @@ class DatabaseManager:
                 )
             return None
-    async def cleanup_missing_files(self, project_id: str, branch: str, project_root: Path) -> List[str]:
+    async def cleanup_missing_files(self, project_id: str, project_root: Path) -> List[str]:
         """
-        Remove descriptions for files that no longer exist on disk.
+        Mark descriptions for cleanup for files that no longer exist on disk.
         Args:
             project_id: Project identifier
-            branch: Branch name
             project_root: Path to project root directory
         Returns:
-            List of file paths that were cleaned up
+            List of file paths that were marked for cleanup
         """
         removed_files = []
         async def cleanup_operation(conn: aiosqlite.Connection) -> List[str]:
-            # Get all file descriptions for this project/branch
+            # Get all active file descriptions for this project
             cursor = await conn.execute(
-                "SELECT file_path FROM file_descriptions WHERE project_id = ? AND branch = ?",
-                (project_id, branch)
+                "SELECT file_path FROM file_descriptions WHERE project_id = ? AND to_be_cleaned IS NULL",
+                (project_id,)
             )
             rows = await cursor.fetchall()
@@ -1097,31 +1120,32 @@ class DatabaseManager:
                 if not full_path.exists():
                     to_remove.append(file_path)
-            # Remove descriptions for missing files
+            # Mark descriptions for cleanup instead of deleting
             if to_remove:
+                import time
+                cleanup_timestamp = int(time.time())
                 await conn.executemany(
-                    "DELETE FROM file_descriptions WHERE project_id = ? AND branch = ? AND file_path = ?",
-                    [(project_id, branch, path) for path in to_remove]
+                    "UPDATE file_descriptions SET to_be_cleaned = ? WHERE project_id = ? AND file_path = ?",
+                    [(cleanup_timestamp, project_id, path) for path in to_remove]
                 )
-                logger.info(f"Cleaned up {len(to_remove)} missing files from {project_id}/{branch}")
+                logger.info(f"Marked {len(to_remove)} missing files for cleanup from {project_id}")
             return to_remove
         removed_files = await self.execute_transaction_with_retry(
             cleanup_operation,
-            f"cleanup_missing_files_{project_id}_{branch}",
+            f"cleanup_missing_files_{project_id}",
             timeout_seconds=60.0  # Longer timeout for file system operations
         )
         return removed_files
-    async def analyze_word_frequency(self, project_id: str, branch: str, limit: int = 200) -> WordFrequencyResult:
+    async def analyze_word_frequency(self, project_id: str, limit: int = 200) -> WordFrequencyResult:
         """
-        Analyze word frequency across all file descriptions for a project/branch.
+        Analyze word frequency across all file descriptions for a project.
         Args:
             project_id: Project identifier
-            branch: Branch name
             limit: Maximum number of top terms to return
         Returns:
@@ -1152,10 +1176,10 @@ class DatabaseManager:
         stop_words.update(programming_keywords)
         async with self.get_connection() as db:
-            # Get all descriptions for this project/branch
+            # Get all descriptions for this project
             cursor = await db.execute(
-                "SELECT description FROM file_descriptions WHERE project_id = ? AND branch = ?",
-                (project_id, branch)
+                "SELECT description FROM file_descriptions WHERE project_id = ? AND to_be_cleaned IS NULL",
+                (project_id,)
             )
             rows = await cursor.fetchall()
@@ -1218,13 +1242,12 @@ class DatabaseManager:
             await db.commit()
             return removed_count
-    async def get_project_map_data(self, project_identifier: str, branch: str = None) -> dict:
+    async def get_project_map_data(self, project_identifier: str) -> dict:
         """
         Get all data needed to generate a project map.
         Args:
             project_identifier: Project name or ID
-            branch: Branch name (optional, will use first available if not specified)
         Returns:
             Dictionary containing project info, overview, and file descriptions
@@ -1256,39 +1279,43 @@ class DatabaseManager:
             project = Project(**project_dict)
-            # If no branch specified, find the first available branch
-            if not branch:
-                cursor = await db.execute(
-                    "SELECT DISTINCT branch FROM file_descriptions WHERE project_id = ? LIMIT 1",
-                    (project.id,)
-                )
-                branch_row = await cursor.fetchone()
-                if branch_row:
-                    branch = branch_row['branch']
-                else:
-                    branch = 'main'  # Default fallback
             # Get project overview
             cursor = await db.execute(
-                "SELECT * FROM project_overviews WHERE project_id = ? AND branch = ?",
-                (project.id, branch)
+                "SELECT * FROM project_overviews WHERE project_id = ?",
+                (project.id,)
             )
             overview_row = await cursor.fetchone()
             project_overview = ProjectOverview(**overview_row) if overview_row else None
-            # Get all file descriptions for this project/branch
+            # Get all file descriptions for this project
             cursor = await db.execute(
                 """SELECT * FROM file_descriptions
-                   WHERE project_id = ? AND branch = ?
+                   WHERE project_id = ? AND to_be_cleaned IS NULL
                    ORDER BY file_path""",
-                (project.id, branch)
+                (project.id,)
             )
             file_rows = await cursor.fetchall()
             file_descriptions = [FileDescription(**row) for row in file_rows]
             return {
                 'project': project,
-                'branch': branch,
                 'overview': project_overview,
                 'files': file_descriptions
             }
+    # Cleanup operations
+    @property
+    def cleanup_manager(self) -> CleanupManager:
+        """Get the cleanup manager instance."""
+        if self._cleanup_manager is None:
+            self._cleanup_manager = CleanupManager(self, retention_months=6)
+        return self._cleanup_manager
+    async def mark_file_for_cleanup(self, project_id: str, file_path: str) -> bool:
+        """Mark a file for cleanup. Convenience method."""
+        return await self.cleanup_manager.mark_file_for_cleanup(project_id, file_path)
+    async def perform_cleanup(self, project_id: Optional[str] = None) -> int:
+        """Perform cleanup of old records. Convenience method."""
+        return await self.cleanup_manager.perform_cleanup(project_id)

mcp_code_indexer/database/models.py CHANGED Viewed

@@ -29,19 +29,20 @@ class Project(BaseModel):
 class FileDescription(BaseModel):
     """
-    Represents a file description within a project branch.
+    Represents a file description within a project.
     Stores detailed summaries of file contents including purpose, components,
     and relationships to enable efficient codebase navigation.
     """
+    id: Optional[int] = Field(None, description="Database ID")
     project_id: str = Field(..., description="Reference to project")
-    branch: str = Field(..., description="Git branch name")
     file_path: str = Field(..., description="Relative path from project root")
     description: str = Field(..., description="Detailed content description")
     file_hash: Optional[str] = Field(None, description="SHA-256 of file contents")
     last_modified: datetime = Field(default_factory=datetime.utcnow, description="Last update timestamp")
     version: int = Field(default=1, description="For optimistic concurrency control")
     source_project_id: Optional[str] = Field(None, description="Source project if copied from upstream")
+    to_be_cleaned: Optional[int] = Field(None, description="UNIX timestamp for cleanup, NULL = active")
 class MergeConflict(BaseModel):
@@ -71,7 +72,6 @@ class ProjectOverview(BaseModel):
     individual file descriptions.
     """
     project_id: str = Field(..., description="Reference to project")
-    branch: str = Field(..., description="Git branch name")
     overview: str = Field(..., description="Comprehensive codebase narrative")
     last_modified: datetime = Field(default_factory=datetime.utcnow, description="Last update timestamp")
     total_files: int = Field(..., description="Number of files in codebase")
@@ -86,7 +86,6 @@ class CodebaseOverview(BaseModel):
     to help determine whether to use full overview or search-based approach.
     """
     project_name: str = Field(..., description="Project name")
-    branch: str = Field(..., description="Git branch")
     total_files: int = Field(..., description="Total number of tracked files")
     total_tokens: int = Field(..., description="Total token count for all descriptions")
     is_large: bool = Field(..., description="True if exceeds configured token limit")
@@ -121,7 +120,6 @@ class SearchResult(BaseModel):
     description: str = Field(..., description="File description")
     relevance_score: float = Field(..., description="Search relevance score")
     project_id: str = Field(..., description="Project identifier")
-    branch: str = Field(..., description="Git branch")
 class CodebaseSizeInfo(BaseModel):

mcp_code_indexer/deepask_handler.py CHANGED Viewed

@@ -75,7 +75,7 @@ class DeepAskHandler(ClaudeAPIHandler):
         Ask an enhanced question about the project using two-stage Claude API processing.
         Args:
-            project_info: Project information dict with projectName, folderPath, branch, etc.
+            project_info: Project information dict with projectName, folderPath, etc.
             question: User's question about the project
             max_file_results: Maximum number of file descriptions to include
@@ -118,8 +118,7 @@ class DeepAskHandler(ClaudeAPIHandler):
                     "stage1_tokens": stage1_result["token_usage"],
                     "stage2_tokens": stage2_result["token_usage"],
                     "total_files_found": stage2_result["total_files_found"],
-                    "files_included": len(stage2_result["relevant_files"]),
-                    "branch": project_info.get("branch", "unknown")
+                    "files_included": len(stage2_result["relevant_files"])
                 }
             }
@@ -237,7 +236,6 @@ class DeepAskHandler(ClaudeAPIHandler):
                 try:
                     search_results = await self.db_manager.search_file_descriptions(
                         project_id=project.id,
-                        branch=project_info["branch"],
                         query=search_term,
                         max_results=max_file_results
                     )
@@ -322,9 +320,8 @@ class DeepAskHandler(ClaudeAPIHandler):
     ) -> str:
         """Build stage 1 prompt for extracting search terms."""
         project_name = project_info["projectName"]
-        branch = project_info.get("branch", "unknown")
-        return f"""I need to answer a question about the codebase "{project_name}" (branch: {branch}). To provide the best answer, I need to search for relevant files and then answer the question.
+        return f"""I need to answer a question about the codebase "{project_name}". To provide the best answer, I need to search for relevant files and then answer the question.
 PROJECT OVERVIEW:
 {overview}
@@ -352,7 +349,6 @@ Respond with valid JSON in this format:
     ) -> str:
         """Build stage 2 prompt for enhanced answer."""
         project_name = project_info["projectName"]
-        branch = project_info.get("branch", "unknown")
         # Format file descriptions
         file_context = ""
@@ -365,7 +361,7 @@ Respond with valid JSON in this format:
         else:
             file_context = "\n\nNo relevant files found in the search."
-        return f"""Please answer the following question about the codebase "{project_name}" (branch: {branch}).
+        return f"""Please answer the following question about the codebase "{project_name}".
 PROJECT OVERVIEW (COMPRESSED):
 {compressed_overview}
@@ -432,7 +428,7 @@ Your answer should be comprehensive but focused on the specific question asked."
         output = []
         output.append(f"Question: {result['question']}")
-        output.append(f"Project: {result['project_name']} (branch: {metadata['branch']})")
+        output.append(f"Project: {result['project_name']}")
         output.append("")
         output.append("Answer:")
         output.append(answer)

mcp_code_indexer/error_handler.py CHANGED Viewed

@@ -254,6 +254,7 @@ class StructuredFormatter(logging.Formatter):
     def format(self, record: logging.LogRecord) -> str:
         """Format log record as structured JSON."""
         import json
+        from . import __version__
         log_data = {
             "timestamp": datetime.utcnow().isoformat(),
@@ -262,7 +263,8 @@ class StructuredFormatter(logging.Formatter):
             "message": record.getMessage(),
             "module": record.module,
             "function": record.funcName,
-            "line": record.lineno
+            "line": record.lineno,
+            "version": __version__
         }
         # Add structured data if present

mcp-code-indexer 2.4.0__py3-none-any.whl → 3.0.2__py3-none-any.whl

mcp-code-indexer 2.4.0py3-none-any.whl → 3.0.2py3-none-any.whl