PyPI - mcp-code-indexer - Versions diffs - 2.3.0__tar.gz → 3.0.0__tar.gz - Mend

mcp-code-indexer 2.3.0tar.gz → 3.0.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (53) hide show

{mcp_code_indexer-2.3.0/src/mcp_code_indexer.egg-info → mcp_code_indexer-3.0.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: mcp-code-indexer
-Version: 2.3.0
+Version: 3.0.0
 Summary: MCP server that tracks file descriptions across codebases, enabling AI agents to efficiently navigate and understand code through searchable summaries and token-aware overviews.
 Author: MCP Code Indexer Contributors
 Maintainer: MCP Code Indexer Contributors
@@ -59,8 +59,8 @@ Dynamic: requires-python
 # MCP Code Indexer 🚀
-[![PyPI version](https://badge.fury.io/py/mcp-code-indexer.svg?17)](https://badge.fury.io/py/mcp-code-indexer)
-[![Python](https://img.shields.io/pypi/pyversions/mcp-code-indexer.svg?17)](https://pypi.org/project/mcp-code-indexer/)
+[![PyPI version](https://badge.fury.io/py/mcp-code-indexer.svg?19)](https://badge.fury.io/py/mcp-code-indexer)
+[![Python](https://img.shields.io/pypi/pyversions/mcp-code-indexer.svg?19)](https://pypi.org/project/mcp-code-indexer/)
 [![License](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT)
 A production-ready **Model Context Protocol (MCP) server** that revolutionizes how AI agents navigate and understand codebases. Built for high-concurrency environments with advanced database resilience, the server provides instant access to intelligent descriptions, semantic search, and context-aware recommendations while maintaining 800+ writes/sec throughput.

{mcp_code_indexer-2.3.0 → mcp_code_indexer-3.0.0}/README.md RENAMED Viewed

@@ -1,7 +1,7 @@
 # MCP Code Indexer 🚀
-[![PyPI version](https://badge.fury.io/py/mcp-code-indexer.svg?17)](https://badge.fury.io/py/mcp-code-indexer)
-[![Python](https://img.shields.io/pypi/pyversions/mcp-code-indexer.svg?17)](https://pypi.org/project/mcp-code-indexer/)
+[![PyPI version](https://badge.fury.io/py/mcp-code-indexer.svg?19)](https://badge.fury.io/py/mcp-code-indexer)
+[![Python](https://img.shields.io/pypi/pyversions/mcp-code-indexer.svg?19)](https://pypi.org/project/mcp-code-indexer/)
 [![License](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT)
 A production-ready **Model Context Protocol (MCP) server** that revolutionizes how AI agents navigate and understand codebases. Built for high-concurrency environments with advanced database resilience, the server provides instant access to intelligent descriptions, semantic search, and context-aware recommendations while maintaining 800+ writes/sec throughput.

mcp_code_indexer-3.0.0/migrations/004_remove_branch_dependency.sql ADDED Viewed

@@ -0,0 +1,166 @@
+-- Migration 004: Remove branch dependency from database schema
+-- This migration consolidates multi-branch data and simplifies the schema
+-- by removing branch columns from file_descriptions and project_overviews tables
+-- Ensure WAL mode is enabled for safe migrations
+PRAGMA journal_mode=WAL;
+-- Enable foreign key support
+PRAGMA foreign_keys=ON;
+-- Start transaction for atomic migration
+BEGIN TRANSACTION;
+-- Create new file_descriptions table without branch dependency
+CREATE TABLE file_descriptions_new (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    project_id TEXT NOT NULL,
+    file_path TEXT NOT NULL,
+    description TEXT NOT NULL,
+    file_hash TEXT,
+    last_modified DATETIME DEFAULT CURRENT_TIMESTAMP,
+    version INTEGER DEFAULT 1,
+    source_project_id TEXT,
+    to_be_cleaned INTEGER DEFAULT NULL, -- UNIX timestamp for cleanup, NULL = active
+    UNIQUE(project_id, file_path),
+    FOREIGN KEY (project_id) REFERENCES projects(id) ON DELETE CASCADE,
+    FOREIGN KEY (source_project_id) REFERENCES projects(id) ON DELETE SET NULL
+);
+-- Create indexes for the new table
+CREATE INDEX idx_file_descriptions_new_project_id ON file_descriptions_new(project_id);
+CREATE INDEX idx_file_descriptions_new_file_hash ON file_descriptions_new(file_hash);
+CREATE INDEX idx_file_descriptions_new_last_modified ON file_descriptions_new(last_modified);
+CREATE INDEX idx_file_descriptions_new_to_be_cleaned ON file_descriptions_new(to_be_cleaned);
+-- Consolidate data from old table - keep most recent description per file
+-- This handles multi-branch scenarios by selecting the newest data
+INSERT INTO file_descriptions_new (
+    project_id, file_path, description, file_hash, last_modified, version, source_project_id
+)
+SELECT
+    project_id,
+    file_path,
+    description,
+    file_hash,
+    last_modified,
+    version,
+    source_project_id
+FROM (
+    SELECT
+        project_id,
+        file_path,
+        description,
+        file_hash,
+        last_modified,
+        version,
+        source_project_id,
+        ROW_NUMBER() OVER (
+            PARTITION BY project_id, file_path
+            ORDER BY last_modified DESC
+        ) as rn
+    FROM file_descriptions
+) ranked_descriptions
+WHERE rn = 1;
+-- Create new project_overviews table without branch dependency
+CREATE TABLE project_overviews_new (
+    project_id TEXT PRIMARY KEY,
+    overview TEXT NOT NULL,
+    last_modified TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+    total_files INTEGER NOT NULL DEFAULT 0,
+    total_tokens INTEGER NOT NULL DEFAULT 0,
+    FOREIGN KEY (project_id) REFERENCES projects(id) ON DELETE CASCADE
+);
+-- Create indexes for the new table
+CREATE INDEX idx_project_overviews_new_last_modified ON project_overviews_new(last_modified);
+-- Consolidate project overviews - keep the one with most tokens (most comprehensive)
+INSERT INTO project_overviews_new (
+    project_id, overview, last_modified, total_files, total_tokens
+)
+SELECT
+    project_id,
+    overview,
+    last_modified,
+    total_files,
+    total_tokens
+FROM (
+    SELECT
+        project_id,
+        overview,
+        last_modified,
+        total_files,
+        total_tokens,
+        ROW_NUMBER() OVER (
+            PARTITION BY project_id
+            ORDER BY total_tokens DESC, last_modified DESC
+        ) as rn
+    FROM project_overviews
+) ranked_overviews
+WHERE rn = 1;
+-- Drop FTS5 triggers for old table
+DROP TRIGGER IF EXISTS file_descriptions_ai;
+DROP TRIGGER IF EXISTS file_descriptions_ad;
+DROP TRIGGER IF EXISTS file_descriptions_au;
+-- Drop FTS5 virtual table
+DROP TABLE IF EXISTS file_descriptions_fts;
+-- Drop old tables
+DROP TABLE file_descriptions;
+DROP TABLE project_overviews;
+-- Rename new tables to original names
+ALTER TABLE file_descriptions_new RENAME TO file_descriptions;
+ALTER TABLE project_overviews_new RENAME TO project_overviews;
+-- Create new FTS5 virtual table without branch column
+CREATE VIRTUAL TABLE file_descriptions_fts USING fts5(
+    project_id,
+    file_path,
+    description,
+    content='file_descriptions',
+    content_rowid='id'
+);
+-- Populate FTS5 table with existing data (only active records)
+INSERT INTO file_descriptions_fts(rowid, project_id, file_path, description)
+SELECT id, project_id, file_path, description
+FROM file_descriptions
+WHERE to_be_cleaned IS NULL;
+-- Create new FTS5 triggers for the updated schema
+CREATE TRIGGER file_descriptions_ai AFTER INSERT ON file_descriptions BEGIN
+  -- Only index active records (not marked for cleanup)
+  INSERT INTO file_descriptions_fts(rowid, project_id, file_path, description)
+  SELECT new.id, new.project_id, new.file_path, new.description
+  WHERE new.to_be_cleaned IS NULL;
+END;
+CREATE TRIGGER file_descriptions_ad AFTER DELETE ON file_descriptions BEGIN
+  INSERT INTO file_descriptions_fts(file_descriptions_fts, rowid, project_id, file_path, description)
+  VALUES ('delete', old.id, old.project_id, old.file_path, old.description);
+END;
+CREATE TRIGGER file_descriptions_au AFTER UPDATE ON file_descriptions BEGIN
+  -- Remove old record from FTS
+  INSERT INTO file_descriptions_fts(file_descriptions_fts, rowid, project_id, file_path, description)
+  VALUES ('delete', old.id, old.project_id, old.file_path, old.description);
+  -- Add new record only if it's active (not marked for cleanup)
+  INSERT INTO file_descriptions_fts(rowid, project_id, file_path, description)
+  SELECT new.id, new.project_id, new.file_path, new.description
+  WHERE new.to_be_cleaned IS NULL;
+END;
+-- Update merge_conflicts table to remove branch references (optional cleanup)
+-- This table structure can remain as-is since it's used for temporary conflict resolution
+-- but we'll remove unused indexes that reference branches
+DROP INDEX IF EXISTS idx_merge_conflicts_project;
+CREATE INDEX idx_merge_conflicts_project ON merge_conflicts(project_id, created);
+-- Commit the migration
+COMMIT;

{mcp_code_indexer-2.3.0 → mcp_code_indexer-3.0.0}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "mcp-code-indexer"
-version = "2.3.0"
+version = "3.0.0"
 description = "MCP server that tracks file descriptions across codebases, enabling AI agents to efficiently navigate and understand code through searchable summaries and token-aware overviews."
 readme = "README.md"
 license = {text = "MIT"}

{mcp_code_indexer-2.3.0 → mcp_code_indexer-3.0.0}/src/mcp_code_indexer/ask_handler.py RENAMED Viewed

@@ -51,7 +51,7 @@ class AskHandler(ClaudeAPIHandler):
         Ask a question about the project using Claude API.
         Args:
-            project_info: Project information dict with projectName, folderPath, branch, etc.
+            project_info: Project information dict with projectName, folderPath, etc.
             question: User's question about the project
             include_overview: Whether to include project overview in context
@@ -112,8 +112,7 @@ class AskHandler(ClaudeAPIHandler):
                         "response_tokens": response.usage.get("completion_tokens") if response.usage else None,
                         "total_tokens": response.usage.get("total_tokens") if response.usage else None
                     },
-                    "include_overview": include_overview,
-                    "branch": project_info.get("branch", "unknown")
+                    "include_overview": include_overview
                 }
             }
@@ -141,10 +140,9 @@ class AskHandler(ClaudeAPIHandler):
             Formatted prompt string
         """
         project_name = project_info["projectName"]
-        branch = project_info.get("branch", "unknown")
         if overview.strip():
-            prompt = f"""Please answer the following question about the codebase "{project_name}" (branch: {branch}).
+            prompt = f"""Please answer the following question about the codebase "{project_name}".
 PROJECT OVERVIEW:
 {overview}
@@ -154,7 +152,7 @@ QUESTION:
 Please provide a clear, detailed answer based on the project overview above. If the overview doesn't contain enough information to fully answer the question, please say so and suggest what additional information might be needed."""
         else:
-            prompt = f"""Please answer the following question about the codebase "{project_name}" (branch: {branch}).
+            prompt = f"""Please answer the following question about the codebase "{project_name}".
 Note: No project overview is available for this codebase.
@@ -200,7 +198,7 @@ If the project overview is insufficient to answer the question completely, expla
         output = []
         output.append(f"Question: {result['question']}")
-        output.append(f"Project: {result['project_name']} (branch: {metadata['branch']})")
+        output.append(f"Project: {result['project_name']}")
         output.append("")
         output.append("Answer:")
         output.append(answer)

{mcp_code_indexer-2.3.0 → mcp_code_indexer-3.0.0}/src/mcp_code_indexer/claude_api_handler.py RENAMED Viewed

@@ -331,7 +331,7 @@ class ClaudeAPIHandler:
         Get project overview from database.
         Args:
-            project_info: Project information dict with projectName, folderPath, branch, etc.
+            project_info: Project information dict with projectName, folderPath, etc.
         Returns:
             Project overview text or empty string if not found
@@ -345,7 +345,7 @@ class ClaudeAPIHandler:
                 return ""
             # Get overview for the project using project.id
-            overview_result = await self.db_manager.get_project_overview(project.id, project_info["branch"])
+            overview_result = await self.db_manager.get_project_overview(project.id)
             if overview_result:
                 return overview_result.overview
             else:

mcp_code_indexer-3.0.0/src/mcp_code_indexer/cleanup_manager.py ADDED Viewed

@@ -0,0 +1,255 @@
+"""
+Cleanup Manager for MCP Code Indexer.
+Handles soft deletion and retention policies for file descriptions
+that are marked for cleanup. Provides periodic cleanup operations
+and manual cleanup methods.
+"""
+import logging
+import time
+from typing import List, Optional
+from pathlib import Path
+logger = logging.getLogger(__name__)
+class CleanupManager:
+    """
+    Manages cleanup operations for file descriptions with retention policies.
+    Handles soft deletion by updating to_be_cleaned timestamps and provides
+    periodic cleanup to permanently remove old records after the retention period.
+    """
+    def __init__(self, db_manager, retention_months: int = 6):
+        """
+        Initialize cleanup manager.
+        Args:
+            db_manager: DatabaseManager instance
+            retention_months: Number of months to retain records before permanent deletion
+        """
+        self.db_manager = db_manager
+        self.retention_months = retention_months
+    async def mark_file_for_cleanup(self, project_id: str, file_path: str) -> bool:
+        """
+        Mark a specific file for cleanup by setting to_be_cleaned timestamp.
+        Args:
+            project_id: Project identifier
+            file_path: Path to file to mark for cleanup
+        Returns:
+            True if file was marked, False if file not found
+        """
+        cleanup_timestamp = int(time.time())
+        async with self.db_manager.get_write_connection_with_retry("mark_file_for_cleanup") as db:
+            cursor = await db.execute(
+                """
+                UPDATE file_descriptions
+                SET to_be_cleaned = ?
+                WHERE project_id = ? AND file_path = ? AND to_be_cleaned IS NULL
+                """,
+                (cleanup_timestamp, project_id, file_path)
+            )
+            await db.commit()
+            # Check if any rows were affected
+            return cursor.rowcount > 0
+    async def mark_files_for_cleanup(self, project_id: str, file_paths: List[str]) -> int:
+        """
+        Mark multiple files for cleanup in a batch operation.
+        Args:
+            project_id: Project identifier
+            file_paths: List of file paths to mark for cleanup
+        Returns:
+            Number of files marked for cleanup
+        """
+        if not file_paths:
+            return 0
+        cleanup_timestamp = int(time.time())
+        async def batch_operation(conn):
+            data = [(cleanup_timestamp, project_id, path) for path in file_paths]
+            cursor = await conn.executemany(
+                """
+                UPDATE file_descriptions
+                SET to_be_cleaned = ?
+                WHERE project_id = ? AND file_path = ? AND to_be_cleaned IS NULL
+                """,
+                data
+            )
+            return cursor.rowcount
+        marked_count = await self.db_manager.execute_transaction_with_retry(
+            batch_operation,
+            f"mark_files_for_cleanup_{len(file_paths)}_files",
+            timeout_seconds=30.0
+        )
+        logger.info(f"Marked {marked_count} files for cleanup in project {project_id}")
+        return marked_count
+    async def restore_file_from_cleanup(self, project_id: str, file_path: str) -> bool:
+        """
+        Restore a file from cleanup by clearing its to_be_cleaned timestamp.
+        Args:
+            project_id: Project identifier
+            file_path: Path to file to restore
+        Returns:
+            True if file was restored, False if file not found
+        """
+        async with self.db_manager.get_write_connection_with_retry("restore_file_from_cleanup") as db:
+            cursor = await db.execute(
+                """
+                UPDATE file_descriptions
+                SET to_be_cleaned = NULL
+                WHERE project_id = ? AND file_path = ? AND to_be_cleaned IS NOT NULL
+                """,
+                (project_id, file_path)
+            )
+            await db.commit()
+            return cursor.rowcount > 0
+    async def get_files_to_be_cleaned(self, project_id: str) -> List[dict]:
+        """
+        Get list of files marked for cleanup in a project.
+        Args:
+            project_id: Project identifier
+        Returns:
+            List of dictionaries with file_path and to_be_cleaned timestamp
+        """
+        async with self.db_manager.get_connection() as db:
+            cursor = await db.execute(
+                """
+                SELECT file_path, to_be_cleaned
+                FROM file_descriptions
+                WHERE project_id = ? AND to_be_cleaned IS NOT NULL
+                ORDER BY to_be_cleaned DESC, file_path
+                """,
+                (project_id,)
+            )
+            rows = await cursor.fetchall()
+            return [
+                {
+                    'file_path': row['file_path'],
+                    'marked_for_cleanup': row['to_be_cleaned'],
+                    'marked_date': time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(row['to_be_cleaned']))
+                }
+                for row in rows
+            ]
+    async def perform_cleanup(self, project_id: Optional[str] = None) -> int:
+        """
+        Permanently delete records that exceed the retention period.
+        Args:
+            project_id: If specified, only clean up this project. Otherwise clean all projects.
+        Returns:
+            Number of records permanently deleted
+        """
+        # Calculate cutoff timestamp (retention_months ago)
+        cutoff_seconds = self.retention_months * 30 * 24 * 60 * 60  # Approximate months to seconds
+        cutoff_timestamp = int(time.time()) - cutoff_seconds
+        async def cleanup_operation(conn):
+            if project_id:
+                cursor = await conn.execute(
+                    """
+                    DELETE FROM file_descriptions
+                    WHERE project_id = ? AND to_be_cleaned IS NOT NULL AND to_be_cleaned < ?
+                    """,
+                    (project_id, cutoff_timestamp)
+                )
+            else:
+                cursor = await conn.execute(
+                    """
+                    DELETE FROM file_descriptions
+                    WHERE to_be_cleaned IS NOT NULL AND to_be_cleaned < ?
+                    """,
+                    (cutoff_timestamp,)
+                )
+            return cursor.rowcount
+        deleted_count = await self.db_manager.execute_transaction_with_retry(
+            cleanup_operation,
+            f"perform_cleanup_{project_id or 'all_projects'}",
+            timeout_seconds=60.0
+        )
+        if deleted_count > 0:
+            scope = f"project {project_id}" if project_id else "all projects"
+            logger.info(f"Permanently deleted {deleted_count} old records from {scope}")
+        return deleted_count
+    async def get_cleanup_stats(self, project_id: Optional[str] = None) -> dict:
+        """
+        Get statistics about cleanup state.
+        Args:
+            project_id: If specified, get stats for this project only
+        Returns:
+            Dictionary with cleanup statistics
+        """
+        cutoff_seconds = self.retention_months * 30 * 24 * 60 * 60
+        cutoff_timestamp = int(time.time()) - cutoff_seconds
+        async with self.db_manager.get_connection() as db:
+            if project_id:
+                base_where = "WHERE project_id = ?"
+                params = (project_id,)
+            else:
+                base_where = ""
+                params = ()
+            # Active files
+            cursor = await db.execute(
+                f"SELECT COUNT(*) FROM file_descriptions {base_where} AND to_be_cleaned IS NULL",
+                params
+            )
+            active_count = (await cursor.fetchone())[0]
+            # Files marked for cleanup
+            cursor = await db.execute(
+                f"SELECT COUNT(*) FROM file_descriptions {base_where} AND to_be_cleaned IS NOT NULL",
+                params
+            )
+            marked_count = (await cursor.fetchone())[0]
+            # Files eligible for permanent deletion
+            if project_id:
+                cursor = await db.execute(
+                    "SELECT COUNT(*) FROM file_descriptions WHERE project_id = ? AND to_be_cleaned IS NOT NULL AND to_be_cleaned < ?",
+                    (project_id, cutoff_timestamp)
+                )
+            else:
+                cursor = await db.execute(
+                    "SELECT COUNT(*) FROM file_descriptions WHERE to_be_cleaned IS NOT NULL AND to_be_cleaned < ?",
+                    (cutoff_timestamp,)
+                )
+            eligible_for_deletion = (await cursor.fetchone())[0]
+            return {
+                'active_files': active_count,
+                'marked_for_cleanup': marked_count,
+                'eligible_for_deletion': eligible_for_deletion,
+                'retention_months': self.retention_months,
+                'cutoff_date': time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(cutoff_timestamp))
+            }

mcp-code-indexer 2.3.0__tar.gz → 3.0.0__tar.gz

mcp-code-indexer 2.3.0tar.gz → 3.0.0tar.gz