PyPI - mcp-code-indexer - Versions diffs - 1.8.0__tar.gz → 1.9.0__tar.gz - Mend

mcp-code-indexer 1.8.0tar.gz → 1.9.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

{mcp_code_indexer-1.8.0/src/mcp_code_indexer.egg-info → mcp_code_indexer-1.9.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: mcp-code-indexer
-Version: 1.8.0
+Version: 1.9.0
 Summary: MCP server that tracks file descriptions across codebases, enabling AI agents to efficiently navigate and understand code through searchable summaries and token-aware overviews.
 Author: MCP Code Indexer Contributors
 Maintainer: MCP Code Indexer Contributors

{mcp_code_indexer-1.8.0 → mcp_code_indexer-1.9.0}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "mcp-code-indexer"
-version = "1.8.0"
+version = "1.9.0"
 description = "MCP server that tracks file descriptions across codebases, enabling AI agents to efficiently navigate and understand code through searchable summaries and token-aware overviews."
 readme = "README.md"
 license = {text = "MIT"}

{mcp_code_indexer-1.8.0 → mcp_code_indexer-1.9.0}/src/mcp_code_indexer/git_hook_handler.py RENAMED Viewed

@@ -119,9 +119,8 @@ class GitHookHandler:
                 self.logger.info(f"Skipping git hook update - no git diff")
                 return
-            diff_chars = len(git_diff)
             diff_tokens = self.token_counter.count_tokens(git_diff)
-            self.logger.info(f"Git diff: {diff_chars} characters, {diff_tokens} tokens")
+            self.logger.info(f"Git diff: {diff_tokens} tokens")
             # Fetch current state
             self.logger.info("Fetching current project state...")
@@ -134,11 +133,69 @@ class GitHookHandler:
                 return
             self.logger.info(f"Found {len(changed_files)} changed files: {', '.join(changed_files)}")
-            self.logger.info(f"Current overview length: {len(current_overview) if current_overview else 0} characters")
+            overview_tokens = self.token_counter.count_tokens(current_overview) if current_overview else 0
+            self.logger.info(f"Current overview: {overview_tokens} tokens")
             self.logger.info(f"Current descriptions count: {len(current_descriptions)}")
-            # Use two-stage approach for large codebases
-            self.logger.info("Starting two-stage analysis approach...")
+            # Try single-stage first, fall back to two-stage if needed
+            updates = await self._analyze_with_smart_staging(
+                git_diff, commit_message, current_overview, current_descriptions, changed_files
+            )
+            # Apply updates to database
+            await self._apply_updates(project_info, updates)
+            self.logger.info(f"Git hook update completed successfully for {len(changed_files)} files")
+        except Exception as e:
+            self.logger.error(f"Git hook mode failed: {e}")
+            self.logger.error(f"Exception details: {type(e).__name__}: {str(e)}")
+            import traceback
+            self.logger.error(f"Full traceback:\n{traceback.format_exc()}")
+            # Don't fail the git operation - just log the error
+            raise GitHookError(f"Git hook processing failed: {e}")
+    async def _analyze_with_smart_staging(
+        self,
+        git_diff: str,
+        commit_message: str,
+        current_overview: str,
+        current_descriptions: Dict[str, str],
+        changed_files: List[str]
+    ) -> Dict[str, Any]:
+        """
+        Smart staging: Try single-stage first, fall back to two-stage if token limit exceeded.
+        Args:
+            git_diff: Git diff content
+            commit_message: Commit message explaining the changes
+            current_overview: Current project overview
+            current_descriptions: Current file descriptions
+            changed_files: List of changed file paths
+        Returns:
+            Dict containing file_updates and overview_update
+        """
+        # Build single-stage prompt and check token count
+        single_stage_prompt = self._build_single_stage_prompt(
+            git_diff, commit_message, current_overview, current_descriptions, changed_files
+        )
+        prompt_tokens = self.token_counter.count_tokens(single_stage_prompt)
+        token_limit = self.config.get("max_diff_tokens", 130000)  # Conservative limit under 136k
+        self.logger.info(f"Single-stage prompt: {prompt_tokens} tokens")
+        self.logger.info(f"Token limit: {token_limit}")
+        if prompt_tokens <= token_limit:
+            # Use single-stage approach
+            self.logger.info("Using single-stage analysis (within token limit)")
+            result = await self._call_openrouter(single_stage_prompt)
+            self.logger.info("Single-stage analysis completed")
+            return result
+        else:
+            # Fall back to two-stage approach
+            self.logger.info(f"Single-stage prompt too large ({prompt_tokens} tokens), falling back to two-stage analysis")
             # Stage 1: Check if overview needs updating
             overview_updates = await self._analyze_overview_updates(
@@ -156,20 +213,76 @@ class GitHookHandler:
                 "overview_update": overview_updates.get("overview_update")
             }
-            self.logger.info(f"Two-stage analysis completed, processing updates...")
-            # Apply updates to database
-            await self._apply_updates(project_info, updates)
-            self.logger.info(f"Git hook update completed successfully for {len(changed_files)} files")
+            self.logger.info("Two-stage analysis completed")
+            return updates
+    def _build_single_stage_prompt(
+        self,
+        git_diff: str,
+        commit_message: str,
+        current_overview: str,
+        current_descriptions: Dict[str, str],
+        changed_files: List[str]
+    ) -> str:
+        """
+        Build single-stage prompt that handles both overview and file updates.
+        Args:
+            git_diff: Git diff content
+            commit_message: Commit message explaining the changes
+            current_overview: Current project overview
+            current_descriptions: Current file descriptions
+            changed_files: List of changed file paths
-        except Exception as e:
-            self.logger.error(f"Git hook mode failed: {e}")
-            self.logger.error(f"Exception details: {type(e).__name__}: {str(e)}")
-            import traceback
-            self.logger.error(f"Full traceback:\n{traceback.format_exc()}")
-            # Don't fail the git operation - just log the error
-            raise GitHookError(f"Git hook processing failed: {e}")
+        Returns:
+            Complete single-stage prompt
+        """
+        # Only include descriptions for changed files to reduce token usage
+        relevant_descriptions = {
+            path: desc for path, desc in current_descriptions.items()
+            if path in changed_files
+        }
+        return f"""Analyze this git commit and update both the project overview (if needed) and file descriptions.
+COMMIT MESSAGE:
+{commit_message or "No commit message available"}
+CURRENT PROJECT OVERVIEW:
+{current_overview or "No overview available"}
+CURRENT FILE DESCRIPTIONS (for changed files only):
+{json.dumps(relevant_descriptions, indent=2)}
+CHANGED FILES:
+{', '.join(changed_files)}
+GIT DIFF:
+{git_diff}
+INSTRUCTIONS:
+1. OVERVIEW UPDATE: Update project overview ONLY if there are major structural changes like:
+   - New major features or components (indicated by commit message or new directories)
+   - Architectural changes (new patterns, frameworks, or approaches)
+   - Significant dependency additions (Cargo.toml, package.json, requirements.txt changes)
+   - New API endpoints or workflows
+   - Changes to build/deployment processes
+   Do NOT update for: bug fixes, small refactors, documentation updates, version bumps.
+   If updating, provide comprehensive narrative (10-20 pages of text) with directory structure, architecture, components, and workflows.
+2. FILE UPDATES: Update descriptions for files that have changed significantly. Consider both the diff content and commit message context. Only include files that need actual description updates.
+Return ONLY a JSON object:
+{{
+  "overview_update": "Updated overview text" or null,
+  "file_updates": {{
+    "path/to/file1.py": "Updated description for file1",
+    "path/to/file2.js": "Updated description for file2"
+  }}
+}}"""
     async def _identify_project_from_git(self) -> Dict[str, Any]:
         """
@@ -489,9 +602,8 @@ Return ONLY a JSON object:
 }}"""
         # Log prompt details
-        prompt_chars = len(prompt)
         prompt_tokens = self.token_counter.count_tokens(prompt)
-        self.logger.info(f"Stage 1 prompt: {prompt_chars} characters, {prompt_tokens} tokens")
+        self.logger.info(f"Stage 1 prompt: {prompt_tokens} tokens")
         if prompt_tokens > self.config["max_diff_tokens"]:
             self.logger.warning(f"Stage 1 prompt too large ({prompt_tokens} tokens), skipping overview analysis")
@@ -559,9 +671,8 @@ Return ONLY a JSON object:
 }}"""
         # Log prompt details
-        prompt_chars = len(prompt)
         prompt_tokens = self.token_counter.count_tokens(prompt)
-        self.logger.info(f"Stage 2 prompt: {prompt_chars} characters, {prompt_tokens} tokens")
+        self.logger.info(f"Stage 2 prompt: {prompt_tokens} tokens")
         if prompt_tokens > self.config["max_diff_tokens"]:
             self.logger.warning(f"Stage 2 prompt too large ({prompt_tokens} tokens), skipping file analysis")
@@ -667,8 +778,49 @@ Return ONLY a JSON object:
         Returns:
             Validated response data
         """
+        def extract_json_from_response(text: str) -> str:
+            """Extract JSON from response that might have extra text before/after."""
+            text = text.strip()
+            # Try to find JSON in the response
+            json_start = -1
+            json_end = -1
+            # Look for opening brace
+            for i, char in enumerate(text):
+                if char == '{':
+                    json_start = i
+                    break
+            if json_start == -1:
+                return text  # No JSON found, return original
+            # Find matching closing brace
+            brace_count = 0
+            for i in range(json_start, len(text)):
+                if text[i] == '{':
+                    brace_count += 1
+                elif text[i] == '}':
+                    brace_count -= 1
+                    if brace_count == 0:
+                        json_end = i + 1
+                        break
+            if json_end == -1:
+                return text  # No matching brace found, return original
+            return text[json_start:json_end]
         try:
-            data = json.loads(response_text.strip())
+            # First try parsing as-is
+            try:
+                data = json.loads(response_text.strip())
+            except json.JSONDecodeError:
+                # Try extracting JSON from response
+                extracted_json = extract_json_from_response(response_text)
+                if extracted_json != response_text.strip():
+                    self.logger.debug(f"Extracted JSON from response: {extracted_json}")
+                data = json.loads(extracted_json)
             # Handle both single-stage and two-stage responses
             if "file_updates" in data and "overview_update" in data:
@@ -700,8 +852,10 @@ Return ONLY a JSON object:
             return data
         except json.JSONDecodeError as e:
+            self.logger.error(f"Raw response content: {repr(response_text)}")
             raise GitHookError(f"Invalid JSON response from API: {e}")
         except ValueError as e:
+            self.logger.error(f"Raw response content: {repr(response_text)}")
             raise GitHookError(f"Invalid response structure: {e}")
     async def _apply_updates(self, project_info: Dict[str, Any], updates: Dict[str, Any]) -> None:

{mcp_code_indexer-1.8.0 → mcp_code_indexer-1.9.0/src/mcp_code_indexer.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: mcp-code-indexer
-Version: 1.8.0
+Version: 1.9.0
 Summary: MCP server that tracks file descriptions across codebases, enabling AI agents to efficiently navigate and understand code through searchable summaries and token-aware overviews.
 Author: MCP Code Indexer Contributors
 Maintainer: MCP Code Indexer Contributors