PyPI - mcp-code-indexer - Versions diffs - 4.2.14__py3-none-any.whl → 4.2.16__py3-none-any.whl - Mend

mcp-code-indexer 4.2.14py3-none-any.whl → 4.2.16py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

mcp_code_indexer/database/database.py +251 -85
mcp_code_indexer/database/models.py +66 -24
mcp_code_indexer/database/retry_executor.py +15 -5
mcp_code_indexer/file_scanner.py +107 -12
mcp_code_indexer/main.py +75 -23
mcp_code_indexer/server/mcp_server.py +191 -1
mcp_code_indexer/vector_mode/chunking/ast_chunker.py +103 -84
mcp_code_indexer/vector_mode/chunking/chunk_optimizer.py +1 -0
mcp_code_indexer/vector_mode/config.py +113 -45
mcp_code_indexer/vector_mode/const.py +24 -0
mcp_code_indexer/vector_mode/daemon.py +860 -98
mcp_code_indexer/vector_mode/monitoring/change_detector.py +113 -97
mcp_code_indexer/vector_mode/monitoring/file_watcher.py +175 -121
mcp_code_indexer/vector_mode/providers/turbopuffer_client.py +291 -98
mcp_code_indexer/vector_mode/providers/voyage_client.py +140 -38
mcp_code_indexer/vector_mode/services/__init__.py +9 -0
mcp_code_indexer/vector_mode/services/embedding_service.py +389 -0
mcp_code_indexer/vector_mode/services/vector_mode_tools_service.py +459 -0
mcp_code_indexer/vector_mode/services/vector_storage_service.py +580 -0
mcp_code_indexer/vector_mode/types.py +46 -0
mcp_code_indexer/vector_mode/utils.py +50 -0
{mcp_code_indexer-4.2.14.dist-info → mcp_code_indexer-4.2.16.dist-info}/METADATA +13 -10
{mcp_code_indexer-4.2.14.dist-info → mcp_code_indexer-4.2.16.dist-info}/RECORD +26 -19
{mcp_code_indexer-4.2.14.dist-info → mcp_code_indexer-4.2.16.dist-info}/WHEEL +1 -1
{mcp_code_indexer-4.2.14.dist-info → mcp_code_indexer-4.2.16.dist-info}/entry_points.txt +0 -0
{mcp_code_indexer-4.2.14.dist-info → mcp_code_indexer-4.2.16.dist-info/licenses}/LICENSE +0 -0

mcp_code_indexer/file_scanner.py CHANGED Viewed

@@ -6,10 +6,12 @@ while respecting .gitignore patterns and common ignore patterns. It enables
 efficient discovery of files that need description tracking.
 """
+import asyncio
 import fnmatch
 import logging
+import os
 from pathlib import Path
-from typing import Dict, Generator, List, Optional, Set, Union, Any, cast
+from typing import Dict, Iterator, List, Optional, Set, Union, Any, cast
 try:
     from gitignore_parser import parse_gitignore
@@ -150,6 +152,13 @@ class FileScanner:
         self.project_root = Path(project_root).resolve()
         self._gitignore_cache: Dict[str, Any] = {}
         self._load_gitignore_patterns()
+        # Build ignore patterns set for directory pruning
+        self.ignore_patterns = set(DEFAULT_IGNORE_PATTERNS)
+    @property
+    def root_path(self) -> Path:
+        """Get the root path for the scanner (alias for project_root)."""
+        return self.project_root
     def _load_gitignore_patterns(self) -> None:
         """Load and cache gitignore patterns from the project."""
@@ -228,6 +237,53 @@ class FileScanner:
         """Check if a file has an ignored extension."""
         return file_path.suffix.lower() in IGNORED_EXTENSIONS
+    def should_ignore_path(self, path: Path) -> bool:
+        """
+        Check if a path (file or directory) should be ignored based on patterns.
+        This is used for directory pruning during walks to skip entire subtrees
+        like node_modules, .git, etc.
+        Args:
+            path: Path to check (can be file or directory)
+        Returns:
+            True if the path should be ignored
+        """
+        try:
+            rel_path = path.relative_to(self.project_root)
+        except ValueError:
+            rel_path = path
+        path_str = str(rel_path)
+        path_name = path.name
+        # Check against ignore patterns
+        for pattern in self.ignore_patterns:
+            # Handle directory patterns (ending with /)
+            if pattern.endswith("/"):
+                pattern_no_slash = pattern.rstrip("/")
+                if path_name == pattern_no_slash:
+                    return True
+            # Handle wildcard patterns (starting with *)
+            elif pattern.startswith("*"):
+                if path_str.endswith(pattern[1:]) or path_name.endswith(pattern[1:]):
+                    return True
+            # Handle path patterns (containing / or \)
+            elif "/" in pattern or "\\" in pattern:
+                if pattern in path_str:
+                    return True
+            # Handle simple name patterns
+            else:
+                if pattern in path.parts or path_name == pattern:
+                    return True
+        # Also check gitignore
+        if self._is_ignored_by_gitignore(path):
+            return True
+        return False
     def should_ignore_file(self, file_path: Path) -> bool:
         """
         Determine if a file should be ignored.
@@ -246,12 +302,8 @@ class FileScanner:
         if self._is_ignored_by_extension(file_path):
             return True
-        # Check default patterns
-        if self._is_ignored_by_default_patterns(file_path):
-            return True
-        # Check gitignore patterns
-        if self._is_ignored_by_gitignore(file_path):
+        # Check path-based patterns
+        if self.should_ignore_path(file_path):
             return True
         return False
@@ -286,12 +338,27 @@ class FileScanner:
         logger.info(f"Found {len(files)} trackable files in {self.project_root}")
         return files
-    def _walk_directory(self) -> Generator[Path, None, None]:
-        """Walk through all files in the project directory."""
+    def _walk_directory(self) -> Iterator[Path]:
+        """
+        Walk directory using os.walk with directory pruning.
+        This skips ignored directories entirely rather than traversing then filtering.
+        Critical for performance - avoids traversing node_modules, .git, etc.
+        """
         try:
-            for item in self.project_root.rglob("*"):
-                if item.is_file():
-                    yield item
+            for dirpath, dirnames, filenames in os.walk(self.project_root):
+                current_dir = Path(dirpath)
+                # Prune ignored directories in-place to prevent descending into them
+                # Modifying dirnames in-place is the documented way to prune os.walk
+                dirnames[:] = [
+                    d for d in dirnames
+                    if not self.should_ignore_path(current_dir / d)
+                ]
+                for filename in filenames:
+                    yield current_dir / filename
         except PermissionError as e:
             logger.warning(f"Permission denied accessing {e.filename}")
         except Exception as e:
@@ -404,3 +471,31 @@ class FileScanner:
             logger.error(f"Error getting project stats: {e}")
         return stats
+    async def scan_directory_async(
+        self, max_files: Optional[int] = None
+    ) -> List[Path]:
+        """
+        Async version of scan_directory running in a thread.
+        Args:
+            max_files: Maximum number of files to return (None for no limit)
+        Returns:
+            List of file paths that should be tracked
+        """
+        loop = asyncio.get_running_loop()
+        return await loop.run_in_executor(None, self.scan_directory, max_files)
+    async def find_missing_files_async(self, existing_paths: Set[str]) -> List[Path]:
+        """
+        Async version of find_missing_files running in a thread.
+        Args:
+            existing_paths: Set of relative file paths that already have descriptions
+        Returns:
+            List of file paths that are missing descriptions
+        """
+        loop = asyncio.get_running_loop()
+        return await loop.run_in_executor(None, self.find_missing_files, existing_paths)

mcp_code_indexer/main.py CHANGED Viewed

@@ -377,6 +377,8 @@ async def handle_runcommand(args: argparse.Namespace) -> None:
             "get_word_frequency": server._handle_get_word_frequency,
             "search_codebase_overview": server._handle_search_codebase_overview,
             "check_database_health": server._handle_check_database_health,
+            "enabled_vector_mode": server._handle_enabled_vector_mode,
+            "find_similar_code": server._handle_find_similar_code,
         }
         if tool_name not in tool_handlers:
@@ -1017,41 +1019,49 @@ async def main() -> None:
                 from .vector_mode import is_vector_mode_available, check_api_keys
                 from .vector_mode.config import load_vector_config
                 from .vector_mode.daemon import start_vector_daemon
                 # Check if vector mode is available
                 if not is_vector_mode_available():
-                    logger.error("Vector mode dependencies not found. Try reinstalling: pip install --upgrade mcp-code-indexer")
+                    logger.error(
+                        "Vector mode dependencies not found. Try reinstalling: pip install --upgrade mcp-code-indexer"
+                    )
                     sys.exit(1)
                 # Check API keys
                 api_keys = check_api_keys()
                 if not all(api_keys.values()):
                     missing = [k for k, v in api_keys.items() if not v]
-                    logger.error(f"Missing API keys for vector mode: {', '.join(missing)}")
+                    logger.error(
+                        f"Missing API keys for vector mode: {', '.join(missing)}"
+                    )
                     sys.exit(1)
                 # Load vector configuration
-                vector_config_path = Path(args.vector_config).expanduser() if args.vector_config else None
+                vector_config_path = (
+                    Path(args.vector_config).expanduser()
+                    if args.vector_config
+                    else None
+                )
                 vector_config = load_vector_config(vector_config_path)
                 logger.info(
-                    "Vector mode enabled",
+                    "Vector mode enabled",
                     extra={
                         "structured_data": {
                             "embedding_model": vector_config.embedding_model,
                             "batch_size": vector_config.batch_size,
                             "daemon_enabled": vector_config.daemon_enabled,
                         }
-                    }
+                    },
                 )
                 # Start vector daemon in background
                 if vector_config.daemon_enabled:
                     vector_daemon_task = asyncio.create_task(
                         start_vector_daemon(vector_config_path, db_path, cache_dir)
                     )
                     logger.info("Vector daemon started")
             except Exception as e:
                 logger.error(f"Failed to initialize vector mode: {e}")
                 sys.exit(1)
@@ -1095,7 +1105,45 @@ async def main() -> None:
         if transport:
             transport.server = server
-        await server.run()
+        # If vector mode is enabled, we need to handle signals properly
+        # because server.run() may not respond to KeyboardInterrupt
+        if args.vector and vector_daemon_task:
+            # Setup signal handling for graceful shutdown
+            shutdown_event = asyncio.Event()
+            def signal_handler():
+                logger.info("Shutdown signal received")
+                shutdown_event.set()
+            # Register signal handlers
+            loop = asyncio.get_running_loop()
+            for sig in [signal.SIGTERM, signal.SIGINT]:
+                loop.add_signal_handler(sig, signal_handler)
+            # Run server and wait for shutdown signal
+            server_task = asyncio.create_task(server.run())
+            shutdown_task = asyncio.create_task(shutdown_event.wait())
+            try:
+                # Wait for either server completion or shutdown signal
+                done, pending = await asyncio.wait(
+                    [server_task, shutdown_task], return_when=asyncio.FIRST_COMPLETED
+                )
+                # Cancel remaining tasks
+                for task in pending:
+                    task.cancel()
+                    try:
+                        await task
+                    except asyncio.CancelledError:
+                        pass
+            except Exception as e:
+                logger.error(f"Error during server execution: {e}")
+                raise
+        else:
+            # Normal mode - let server handle KeyboardInterrupt naturally
+            await server.run()
     except Exception as e:
         error_handler.log_error(e, context={"phase": "startup"})
@@ -1105,17 +1153,21 @@ async def main() -> None:
         if vector_daemon_task and not vector_daemon_task.done():
             logger.info("Cancelling vector daemon")
             vector_daemon_task.cancel()
         # Wait for vector daemon to finish
         if vector_daemon_task:
             try:
                 await vector_daemon_task
             except asyncio.CancelledError:
                 logger.info("Vector daemon cancelled successfully")
         # Clean up any remaining asyncio tasks to prevent hanging
         current_task = asyncio.current_task()
-        tasks = [task for task in asyncio.all_tasks() if not task.done() and task is not current_task]
+        tasks = [
+            task
+            for task in asyncio.all_tasks()
+            if not task.done() and task is not current_task
+        ]
         if tasks:
             logger.info(f"Cancelling {len(tasks)} remaining tasks")
             for task in tasks:
@@ -1124,22 +1176,21 @@ async def main() -> None:
             # Wait for cancellation but don't wait forever
             try:
                 await asyncio.wait_for(
-                    asyncio.gather(*tasks, return_exceptions=True),
-                    timeout=2.0
+                    asyncio.gather(*tasks, return_exceptions=True), timeout=2.0
                 )
             except asyncio.TimeoutError:
                 logger.warning("Some tasks did not cancel within timeout")
         # Force close any remaining connections and cleanup resources
         try:
             # Give a moment for final cleanup
             await asyncio.sleep(0.1)
             # Shutdown the event loop executor to stop any background threads
             loop = asyncio.get_running_loop()
-            if hasattr(loop, '_default_executor') and loop._default_executor:
+            if hasattr(loop, "_default_executor") and loop._default_executor:
                 loop._default_executor.shutdown(wait=False)
         except Exception as e:
             logger.warning(f"Error during final cleanup: {e}")
@@ -1163,14 +1214,15 @@ def cli_main() -> None:
         # Force cleanup of any remaining resources to prevent hanging
         import threading
         import time
         # Give main threads a moment to finish
         time.sleep(0.1)
         # Force exit if daemon threads are preventing shutdown
         active_threads = threading.active_count()
         if active_threads > 1:  # More than just the main thread
             import os
             os._exit(0)

mcp_code_indexer/server/mcp_server.py CHANGED Viewed

@@ -684,6 +684,104 @@ class MCPCodeIndexServer:
                         "additionalProperties": False,
                     },
                 ),
+                types.Tool(
+                    name="enabled_vector_mode",
+                    description=(
+                        "Enables or disables vector mode for a project. Vector mode "
+                        "provides semantic search capabilities with embeddings for "
+                        "enhanced code navigation and discovery."
+                    ),
+                    inputSchema={
+                        "type": "object",
+                        "properties": {
+                            "projectName": {
+                                "type": "string",
+                                "description": "The name of the project",
+                            },
+                            "folderPath": {
+                                "type": "string",
+                                "description": (
+                                    "Absolute path to the project folder on disk"
+                                ),
+                            },
+                            "enabled": {
+                                "type": "boolean",
+                                "description": (
+                                    "Whether to enable (true) or disable (false) vector mode"
+                                ),
+                            },
+                        },
+                        "required": ["projectName", "folderPath", "enabled"],
+                        "additionalProperties": False,
+                    },
+                ),
+                types.Tool(
+                    name="find_similar_code",
+                    description=(
+                        "Find code similar to a given code snippet or file section using "
+                        "vector-based semantic search. This tool uses AI embeddings to "
+                        "understand code context and meaning, providing more intelligent "
+                        "similarity detection than text-based matching. Requires vector "
+                        "mode to be enabled for the project."
+                    ),
+                    inputSchema={
+                        "type": "object",
+                        "properties": {
+                            "projectName": {
+                                "type": "string",
+                                "description": "The name of the project",
+                            },
+                            "folderPath": {
+                                "type": "string",
+                                "description": (
+                                    "Absolute path to the project folder on disk"
+                                ),
+                            },
+                            "code_snippet": {
+                                "type": "string",
+                                "description": (
+                                    "Direct code snippet to search for similarities (mutually "
+                                    "exclusive with file_path)"
+                                ),
+                            },
+                            "file_path": {
+                                "type": "string",
+                                "description": (
+                                    "Path to file containing code to analyze (mutually "
+                                    "exclusive with code_snippet)"
+                                ),
+                            },
+                            "line_start": {
+                                "type": "integer",
+                                "description": (
+                                    "Starting line number for file section (1-indexed, "
+                                    "used with file_path)"
+                                ),
+                            },
+                            "line_end": {
+                                "type": "integer",
+                                "description": (
+                                    "Ending line number for file section (1-indexed, "
+                                    "used with file_path)"
+                                ),
+                            },
+                            "similarity_threshold": {
+                                "type": "number",
+                                "description": (
+                                    "Minimum similarity score (0.0-1.0, optional)"
+                                ),
+                            },
+                            "max_results": {
+                                "type": "integer",
+                                "description": (
+                                    "Maximum number of results to return (optional)"
+                                ),
+                            },
+                        },
+                        "required": ["projectName", "folderPath"],
+                        "additionalProperties": False,
+                    },
+                ),
             ]
         @self.server.call_tool()  # type: ignore[misc]
@@ -711,6 +809,8 @@ class MCPCodeIndexServer:
                 "get_word_frequency": self._handle_get_word_frequency,
                 "check_database_health": self._handle_check_database_health,
                 "search_codebase_overview": self._handle_search_codebase_overview,
+                "enabled_vector_mode": self._handle_enabled_vector_mode,
+                "find_similar_code": self._handle_find_similar_code,
             }
             if name not in tool_handlers:
@@ -834,7 +934,9 @@ class MCPCodeIndexServer:
                 )
             if project is None:
-                raise RuntimeError("Project should always be set in if/else branches above")
+                raise RuntimeError(
+                    "Project should always be set in if/else branches above"
+                )
             return project.id
     async def _find_matching_project(
@@ -1103,6 +1205,7 @@ class MCPCodeIndexServer:
             "isLarge": is_large,
             "recommendation": recommendation,
             "tokenLimit": token_limit,
+            "totalTokens": total_tokens,
             "totalFiles": len(file_descriptions),
             "cleanedUpCount": cleaned_up_count,
         }
@@ -1481,6 +1584,93 @@ class MCPCodeIndexServer:
             "status_summary": self._generate_health_summary(comprehensive_diagnostics),
         }
+    async def _handle_enabled_vector_mode(
+        self, arguments: Dict[str, Any]
+    ) -> Dict[str, Any]:
+        """Handle enabled_vector_mode tool calls."""
+        folder_path = arguments["folderPath"]
+        db_manager = await self.db_factory.get_database_manager(folder_path)
+        project_id = await self._get_or_create_project_id(arguments)
+        enabled = arguments["enabled"]
+        try:
+            await db_manager.set_project_vector_mode(project_id, enabled)
+            return {
+                "success": True,
+                "message": f"Vector mode {'enabled' if enabled else 'disabled'} for project",
+                "project_id": project_id,
+                "vector_mode": enabled,
+            }
+        except ValueError as e:
+            return {
+                "success": False,
+                "error": str(e),
+                "project_id": project_id,
+                "vector_mode": None,
+            }
+    async def _handle_find_similar_code(
+        self, arguments: Dict[str, Any]
+    ) -> Dict[str, Any]:
+        """Handle find_similar_code tool calls."""
+        try:
+            from mcp_code_indexer.vector_mode.services.vector_mode_tools_service import (
+                VectorModeToolsService,
+            )
+            # Initialize the tools service (handles all vector mode setup internally)
+            tools_service = VectorModeToolsService()
+            # Extract project info
+            project_name = arguments["projectName"]
+            folder_path = arguments["folderPath"]
+            logger.info(
+                "Processing find_similar_code request",
+                extra={
+                    "structured_data": {
+                        "project_name": project_name,
+                        "has_code_snippet": "code_snippet" in arguments,
+                        "has_file_path": "file_path" in arguments,
+                    }
+                },
+            )
+            # Call the service method
+            result = await tools_service.find_similar_code(
+                project_name=project_name,
+                folder_path=folder_path,
+                code_snippet=arguments.get("code_snippet"),
+                file_path=arguments.get("file_path"),
+                line_start=arguments.get("line_start"),
+                line_end=arguments.get("line_end"),
+                similarity_threshold=arguments.get("similarity_threshold"),
+                max_results=arguments.get("max_results"),
+            )
+            # Add success indicator to the result
+            result["success"] = True
+            return result
+        except Exception as e:
+            logger.error(
+                "Failed to execute find_similar_code",
+                extra={
+                    "structured_data": {
+                        "error": str(e),
+                        "project_name": arguments.get("projectName", "unknown"),
+                    }
+                },
+                exc_info=True,
+            )
+            return {
+                "success": False,
+                "error": str(e),
+                "results": [],
+                "total_results": 0,
+            }
     def _generate_health_summary(self, diagnostics: Dict[str, Any]) -> Dict[str, Any]:
         """Generate a concise health summary from comprehensive diagnostics."""
         if "resilience_indicators" not in diagnostics:

mcp-code-indexer 4.2.14__py3-none-any.whl → 4.2.16__py3-none-any.whl

mcp-code-indexer 4.2.14py3-none-any.whl → 4.2.16py3-none-any.whl