PyPI - mcp-code-indexer - Versions diffs - 3.3.0__tar.gz → 3.4.1__tar.gz - Mend

mcp-code-indexer 3.3.0tar.gz → 3.4.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

{mcp_code_indexer-3.3.0 → mcp_code_indexer-3.4.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: mcp-code-indexer
-Version: 3.3.0
+Version: 3.4.1
 Summary: MCP server that tracks file descriptions across codebases, enabling AI agents to efficiently navigate and understand code through searchable summaries and token-aware overviews.
 License: MIT
 Keywords: mcp,model-context-protocol,code-indexer,ai-tools,codebase-navigation,file-descriptions,llm-tools
@@ -40,8 +40,8 @@ Description-Content-Type: text/markdown
 # MCP Code Indexer 🚀
-[![PyPI version](https://badge.fury.io/py/mcp-code-indexer.svg?30)](https://badge.fury.io/py/mcp-code-indexer)
-[![Python](https://img.shields.io/pypi/pyversions/mcp-code-indexer.svg?30)](https://pypi.org/project/mcp-code-indexer/)
+[![PyPI version](https://badge.fury.io/py/mcp-code-indexer.svg?32)](https://badge.fury.io/py/mcp-code-indexer)
+[![Python](https://img.shields.io/pypi/pyversions/mcp-code-indexer.svg?32)](https://pypi.org/project/mcp-code-indexer/)
 [![License](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT)
 A production-ready **Model Context Protocol (MCP) server** that revolutionizes how AI agents navigate and understand codebases. Built for high-concurrency environments with advanced database resilience, the server provides instant access to intelligent descriptions, semantic search, and context-aware recommendations while maintaining 800+ writes/sec throughput.

{mcp_code_indexer-3.3.0 → mcp_code_indexer-3.4.1}/README.md RENAMED Viewed

@@ -1,7 +1,7 @@
 # MCP Code Indexer 🚀
-[![PyPI version](https://badge.fury.io/py/mcp-code-indexer.svg?30)](https://badge.fury.io/py/mcp-code-indexer)
-[![Python](https://img.shields.io/pypi/pyversions/mcp-code-indexer.svg?30)](https://pypi.org/project/mcp-code-indexer/)
+[![PyPI version](https://badge.fury.io/py/mcp-code-indexer.svg?32)](https://badge.fury.io/py/mcp-code-indexer)
+[![Python](https://img.shields.io/pypi/pyversions/mcp-code-indexer.svg?32)](https://pypi.org/project/mcp-code-indexer/)
 [![License](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT)
 A production-ready **Model Context Protocol (MCP) server** that revolutionizes how AI agents navigate and understand codebases. Built for high-concurrency environments with advanced database resilience, the server provides instant access to intelligent descriptions, semantic search, and context-aware recommendations while maintaining 800+ writes/sec throughput.

{mcp_code_indexer-3.3.0 → mcp_code_indexer-3.4.1}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
 [tool.poetry]
 name = "mcp-code-indexer"
-version = "3.3.0"
+version = "3.4.1"
 description = "MCP server that tracks file descriptions across codebases, enabling AI agents to efficiently navigate and understand code through searchable summaries and token-aware overviews."
 authors = ["MCP Code Indexer Contributors"]
 maintainers = ["MCP Code Indexer Contributors"]

{mcp_code_indexer-3.3.0 → mcp_code_indexer-3.4.1}/src/mcp_code_indexer/git_hook_handler.py RENAMED Viewed

@@ -76,6 +76,7 @@ class GitHookHandler:
         self.config = {
             "model": os.getenv("MCP_GITHOOK_MODEL", self.OPENROUTER_MODEL),
             "max_diff_tokens": 136000,  # Skip if diff larger than this (in tokens)
+            "chunk_token_limit": 100000,  # Target token limit per chunk
             "timeout": 300,  # 5 minutes
             "temperature": 0.3,  # Lower temperature for consistent updates
         }
@@ -87,6 +88,21 @@ class GitHookHandler:
                 "OPENROUTER_API_KEY environment variable is required for git hook mode"
             )
+    def _log_and_print(self, message: str, level: str = "info") -> None:
+        """
+        Log message and also print to stdout for user visibility.
+        Args:
+            message: Message to log and print
+            level: Log level (info, warning, error)
+        """
+        # Log to logger
+        getattr(self.logger, level)(message)
+        # Also print to stdout with prefix for visibility
+        prefix = "🔍" if level == "info" else "⚠️" if level == "warning" else "❌"
+        print(f"{prefix} {message}")
     async def run_githook_mode(
         self,
         commit_hash: Optional[str] = None,
@@ -102,21 +118,20 @@ class GitHookHandler:
         This is the main entry point for git hook functionality.
         """
         try:
-            self.logger.info("=== Git Hook Analysis Started ===")
+            self._log_and_print("=== Git Hook Analysis Started ===")
             if commit_hash:
-                self.logger.info(f"Mode: Single commit ({commit_hash})")
+                self._log_and_print(f"Mode: Single commit ({commit_hash})")
             elif commit_range:
-                self.logger.info(
+                self._log_and_print(
                     f"Mode: Commit range ({commit_range[0]}..{commit_range[1]})"
                 )
             else:
-                self.logger.info("Mode: Staged changes")
+                self._log_and_print("Mode: Staged changes")
             # Get git info from current directory
             project_info = await self._identify_project_from_git()
-            self.logger.info(
-                f"Project identified: {project_info.get('name', 'Unknown')} "
-                f"at {project_info.get('folderPath', 'Unknown')}"
+            self._log_and_print(
+                f"Project: {project_info.get('name', 'Unknown')}"
             )
             # Get git diff and commit message based on mode
@@ -136,25 +151,23 @@ class GitHookHandler:
             # Log diff details
             if not git_diff:
-                self.logger.info("Skipping git hook update - no git diff")
+                self._log_and_print("No changes detected, skipping analysis")
                 return
             diff_tokens = self.token_counter.count_tokens(git_diff)
-            self.logger.info(f"Git diff: {diff_tokens} tokens")
+            self._log_and_print(f"Analyzing diff: {diff_tokens:,} tokens")
             # Fetch current state
-            self.logger.info("Fetching current project state...")
+            self._log_and_print("Fetching current project state...")
             current_overview = await self._get_project_overview(project_info)
             current_descriptions = await self._get_all_descriptions(project_info)
             changed_files = self._extract_changed_files(git_diff)
             if not changed_files:
-                self.logger.info("No changed files detected in git diff")
+                self._log_and_print("No files changed, skipping analysis")
                 return
-            self.logger.info(
-                f"Found {len(changed_files)} changed files: {', '.join(changed_files)}"
-            )
+            self._log_and_print(f"Found {len(changed_files)} changed files")
             overview_tokens = (
                 self.token_counter.count_tokens(current_overview)
                 if current_overview
@@ -174,13 +187,23 @@ class GitHookHandler:
             # Apply updates to database
             await self._apply_updates(project_info, updates)
-            self.logger.info(
-                f"Git hook update completed successfully for {len(changed_files)} files"
-            )
+            # Count actual updates
+            file_update_count = len(updates.get("file_updates", {}))
+            overview_updated = bool(updates.get("overview_update"))
+            if file_update_count > 0 or overview_updated:
+                update_parts = []
+                if file_update_count > 0:
+                    update_parts.append(f"{file_update_count} file descriptions")
+                if overview_updated:
+                    update_parts.append("project overview")
+                self._log_and_print(f"✅ Updated {' and '.join(update_parts)}")
+            else:
+                self._log_and_print("✅ Analysis complete, no updates needed")
         except Exception as e:
-            self.logger.error(f"Git hook mode failed: {e}")
+            self._log_and_print(f"Git hook analysis failed: {e}", "error")
             self.logger.error(f"Exception details: {type(e).__name__}: {str(e)}")
             import traceback
@@ -197,8 +220,8 @@ class GitHookHandler:
         changed_files: List[str],
     ) -> Dict[str, Any]:
         """
-        Smart staging: Try single-stage first, fall back to two-stage if
-        token limit exceeded.
+        Smart staging: Try single-stage first, fall back to two-stage,
+        then chunked processing if needed.
         Args:
             git_diff: Git diff content
@@ -229,35 +252,31 @@ class GitHookHandler:
         if prompt_tokens <= token_limit:
             # Use single-stage approach
-            self.logger.info("Using single-stage analysis (within token limit)")
+            self._log_and_print("Using single-stage analysis")
             result = await self._call_openrouter(single_stage_prompt)
-            self.logger.info("Single-stage analysis completed")
             return result
         else:
             # Fall back to two-stage approach
-            self.logger.info(
-                f"Single-stage prompt too large ({prompt_tokens} tokens), "
-                f"falling back to two-stage analysis"
-            )
-            # Stage 1: Check if overview needs updating
-            overview_updates = await self._analyze_overview_updates(
-                git_diff, commit_message, current_overview, changed_files
-            )
-            # Stage 2: Update file descriptions
-            file_updates = await self._analyze_file_updates(
-                git_diff, commit_message, current_descriptions, changed_files
-            )
-            # Combine updates
-            updates = {
-                "file_updates": file_updates.get("file_updates", {}),
-                "overview_update": overview_updates.get("overview_update"),
-            }
+            self._log_and_print("Using two-stage analysis (large diff)")
-            self.logger.info("Two-stage analysis completed")
-            return updates
+            # Try two-stage analysis first
+            try:
+                return await self._analyze_with_two_stage(
+                    git_diff, commit_message, current_overview,
+                    current_descriptions, changed_files
+                )
+            except GitHookError as e:
+                if "too large" in str(e).lower():
+                    # Fall back to chunked processing
+                    self._log_and_print(
+                        "Using chunked processing (very large diff)"
+                    )
+                    return await self._analyze_with_chunking(
+                        git_diff, commit_message, current_overview,
+                        current_descriptions, changed_files
+                    )
+                else:
+                    raise
     def _build_single_stage_prompt(
         self,
@@ -571,6 +590,118 @@ Return ONLY a JSON object:
             self.logger.warning(f"Failed to get file descriptions: {e}")
             return {}
+    async def _analyze_with_two_stage(
+        self,
+        git_diff: str,
+        commit_message: str,
+        current_overview: str,
+        current_descriptions: Dict[str, str],
+        changed_files: List[str],
+    ) -> Dict[str, Any]:
+        """
+        Two-stage analysis: overview updates first, then file updates.
+        Args:
+            git_diff: Git diff content
+            commit_message: Commit message explaining the changes
+            current_overview: Current project overview
+            current_descriptions: Current file descriptions
+            changed_files: List of changed file paths
+        Returns:
+            Dict containing file_updates and overview_update
+        """
+        # Stage 1: Check if overview needs updating
+        overview_updates = await self._analyze_overview_updates(
+            git_diff, commit_message, current_overview, changed_files
+        )
+        # Stage 2: Update file descriptions
+        file_updates = await self._analyze_file_updates(
+            git_diff, commit_message, current_descriptions, changed_files
+        )
+        # Combine updates
+        updates = {
+            "file_updates": file_updates.get("file_updates", {}),
+            "overview_update": overview_updates.get("overview_update"),
+        }
+        self.logger.info("Two-stage analysis completed")
+        return updates
+    async def _analyze_with_chunking(
+        self,
+        git_diff: str,
+        commit_message: str,
+        current_overview: str,
+        current_descriptions: Dict[str, str],
+        changed_files: List[str],
+    ) -> Dict[str, Any]:
+        """
+        Chunked processing: Break large diffs into manageable chunks.
+        Args:
+            git_diff: Git diff content
+            commit_message: Commit message explaining the changes
+            current_overview: Current project overview
+            current_descriptions: Current file descriptions
+            changed_files: List of changed file paths
+        Returns:
+            Dict containing file_updates and overview_update
+        """
+        self._log_and_print(
+            f"Starting chunked processing for {len(changed_files)} files"
+        )
+        # First, handle overview separately if needed
+        overview_update = None
+        if current_overview:
+            overview_update = await self._analyze_overview_lightweight(
+                commit_message, current_overview, changed_files
+            )
+        # Break changed files into chunks and process file descriptions
+        chunk_size = await self._calculate_optimal_chunk_size(
+            git_diff, changed_files
+        )
+        self._log_and_print(f"Processing in {chunk_size}-file chunks")
+        all_file_updates = {}
+        for i in range(0, len(changed_files), chunk_size):
+            chunk_files = changed_files[i:i + chunk_size]
+            chunk_number = (i // chunk_size) + 1
+            total_chunks = (len(changed_files) + chunk_size - 1) // chunk_size
+            self._log_and_print(
+                f"Processing chunk {chunk_number}/{total_chunks} "
+                f"({len(chunk_files)} files)"
+            )
+            # Extract diff content for this chunk
+            chunk_diff = self._extract_chunk_diff(git_diff, chunk_files)
+            # Process this chunk
+            chunk_updates = await self._analyze_file_chunk(
+                chunk_diff, commit_message, current_descriptions, chunk_files
+            )
+            # Merge results
+            if chunk_updates and "file_updates" in chunk_updates:
+                all_file_updates.update(chunk_updates["file_updates"])
+        self.logger.info(
+            f"Chunked processing completed: updated {len(all_file_updates)} files"
+        )
+        return {
+            "file_updates": all_file_updates,
+            "overview_update": overview_update
+        }
     async def _analyze_overview_updates(
         self,
         git_diff: str,
@@ -632,11 +763,9 @@ Return ONLY a JSON object:
         self.logger.info(f"Stage 1 prompt: {prompt_tokens} tokens")
         if prompt_tokens > self.config["max_diff_tokens"]:
-            self.logger.warning(
-                f"Stage 1 prompt too large ({prompt_tokens} tokens), "
-                f"skipping overview analysis"
+            raise GitHookError(
+                f"Stage 1 prompt too large ({prompt_tokens} tokens)"
             )
-            return {"overview_update": None}
         # Call OpenRouter API
         result = await self._call_openrouter(prompt)
@@ -708,11 +837,9 @@ Return ONLY a JSON object:
         self.logger.info(f"Stage 2 prompt: {prompt_tokens} tokens")
         if prompt_tokens > self.config["max_diff_tokens"]:
-            self.logger.warning(
-                f"Stage 2 prompt too large ({prompt_tokens} tokens), "
-                f"skipping file analysis"
+            raise GitHookError(
+                f"Stage 2 prompt too large ({prompt_tokens} tokens)"
             )
-            return {"file_updates": {}}
         # Call OpenRouter API
         result = await self._call_openrouter(prompt)
@@ -720,6 +847,201 @@ Return ONLY a JSON object:
         return result
+    async def _analyze_overview_lightweight(
+        self,
+        commit_message: str,
+        current_overview: str,
+        changed_files: List[str],
+    ) -> Optional[str]:
+        """
+        Lightweight overview analysis without including full diff.
+        Args:
+            commit_message: Commit message explaining the changes
+            current_overview: Current project overview
+            changed_files: List of changed file paths
+        Returns:
+            Updated overview text or None
+        """
+        self.logger.info("Lightweight overview analysis...")
+        prompt = f"""Analyze this commit to determine if project overview needs updating.
+COMMIT MESSAGE:
+{commit_message or "No commit message available"}
+CURRENT PROJECT OVERVIEW:
+{current_overview or "No overview available"}
+CHANGED FILES:
+{', '.join(changed_files)}
+INSTRUCTIONS:
+Update project overview ONLY if there are major structural changes like:
+- New major features or components (indicated by commit message or new directories)
+- Architectural changes (new patterns, frameworks, or approaches)
+- Significant dependency additions (Cargo.toml, package.json, pyproject.toml changes)
+- New API endpoints or workflows
+- Changes to build/deployment processes
+Do NOT update for: bug fixes, small refactors, documentation updates, version bumps.
+Return ONLY a JSON object:
+{{
+  "overview_update": "Updated overview text" or null
+}}"""
+        try:
+            result = await self._call_openrouter(prompt)
+            return result.get("overview_update")
+        except Exception as e:
+            self.logger.warning(f"Lightweight overview analysis failed: {e}")
+            return None
+    async def _calculate_optimal_chunk_size(
+        self, git_diff: str, changed_files: List[str]
+    ) -> int:
+        """
+        Calculate optimal chunk size based on diff content.
+        Args:
+            git_diff: Full git diff content
+            changed_files: List of changed file paths
+        Returns:
+            Optimal number of files per chunk
+        """
+        if not changed_files:
+            return 10  # Default chunk size
+        # Estimate average diff size per file
+        total_diff_tokens = self.token_counter.count_tokens(git_diff)
+        avg_tokens_per_file = total_diff_tokens / len(changed_files)
+        # Target chunk token limit
+        chunk_limit = self.config.get("chunk_token_limit", 100000)
+        # Calculate chunk size with buffer for overhead
+        overhead_factor = 0.7  # Reserve 30% for prompt overhead
+        effective_limit = chunk_limit * overhead_factor
+        chunk_size = max(1, int(effective_limit / avg_tokens_per_file))
+        # Cap at reasonable limits
+        chunk_size = min(chunk_size, 50)  # Max 50 files per chunk
+        chunk_size = max(chunk_size, 5)   # Min 5 files per chunk
+        self.logger.info(
+            f"Calculated chunk size: {chunk_size} files "
+            f"(avg {avg_tokens_per_file:.0f} tokens/file, "
+            f"target {chunk_limit} tokens/chunk)"
+        )
+        return chunk_size
+    def _extract_chunk_diff(self, git_diff: str, chunk_files: List[str]) -> str:
+        """
+        Extract diff content for specific files.
+        Args:
+            git_diff: Full git diff content
+            chunk_files: List of files to include in chunk
+        Returns:
+            Filtered diff content for chunk files only
+        """
+        lines = git_diff.split('\n')
+        chunk_lines = []
+        current_file = None
+        include_section = False
+        for line in lines:
+            if line.startswith('diff --git'):
+                # Parse file path from diff header
+                parts = line.split(' ')
+                if len(parts) >= 4:
+                    file_path = parts[2][2:]  # Remove 'a/' prefix
+                    current_file = file_path
+                    include_section = file_path in chunk_files
+            if include_section:
+                chunk_lines.append(line)
+        return '\n'.join(chunk_lines)
+    async def _analyze_file_chunk(
+        self,
+        chunk_diff: str,
+        commit_message: str,
+        current_descriptions: Dict[str, str],
+        chunk_files: List[str],
+    ) -> Dict[str, Any]:
+        """
+        Analyze a chunk of files for description updates.
+        Args:
+            chunk_diff: Git diff for this chunk only
+            commit_message: Commit message explaining the changes
+            current_descriptions: Current file descriptions
+            chunk_files: List of files in this chunk
+        Returns:
+            Dict with file_updates for this chunk
+        """
+        # Only include descriptions for files in this chunk
+        relevant_descriptions = {
+            path: desc
+            for path, desc in current_descriptions.items()
+            if path in chunk_files
+        }
+        prompt = f"""Analyze this git commit chunk and update file descriptions.
+COMMIT MESSAGE:
+{commit_message or "No commit message available"}
+CURRENT FILE DESCRIPTIONS (for chunk files only):
+{json.dumps(relevant_descriptions, indent=2)}
+CHUNK FILES:
+{', '.join(chunk_files)}
+GIT DIFF (chunk only):
+{chunk_diff}
+INSTRUCTIONS:
+Use the COMMIT MESSAGE to understand the intent and context of the changes.
+Update descriptions for files that have changed significantly.
+Only include files that need actual description updates.
+Return ONLY a JSON object:
+{{
+  "file_updates": {{
+    "path/to/file1.py": "Updated description for file1",
+    "path/to/file2.js": "Updated description for file2"
+  }}
+}}"""
+        # Check token count
+        prompt_tokens = self.token_counter.count_tokens(prompt)
+        self.logger.info(f"Chunk prompt: {prompt_tokens} tokens")
+        if prompt_tokens > self.config.get("chunk_token_limit", 100000):
+            self.logger.warning(
+                f"Chunk still too large ({prompt_tokens} tokens), "
+                f"skipping {len(chunk_files)} files"
+            )
+            return {"file_updates": {}}
+        # Call OpenRouter API
+        try:
+            result = await self._call_openrouter(prompt)
+            return result
+        except Exception as e:
+            self.logger.error(f"Failed to analyze chunk: {e}")
+            return {"file_updates": {}}
     @retry(
         wait=wait_exponential(multiplier=1, min=4, max=60),
         stop=stop_after_attempt(5),