PyPI - mcp-code-indexer - Versions diffs - 3.1.4__py3-none-any.whl → 3.1.5__py3-none-any.whl - Mend

mcp-code-indexer 3.1.4py3-none-any.whl → 3.1.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

mcp_code_indexer/__init__.py +8 -6
mcp_code_indexer/ask_handler.py +105 -75
mcp_code_indexer/claude_api_handler.py +125 -82
mcp_code_indexer/cleanup_manager.py +107 -81
mcp_code_indexer/database/connection_health.py +212 -161
mcp_code_indexer/database/database.py +529 -415
mcp_code_indexer/database/exceptions.py +167 -118
mcp_code_indexer/database/models.py +54 -19
mcp_code_indexer/database/retry_executor.py +139 -103
mcp_code_indexer/deepask_handler.py +178 -140
mcp_code_indexer/error_handler.py +88 -76
mcp_code_indexer/file_scanner.py +163 -141
mcp_code_indexer/git_hook_handler.py +352 -261
mcp_code_indexer/logging_config.py +76 -94
mcp_code_indexer/main.py +406 -320
mcp_code_indexer/middleware/error_middleware.py +106 -71
mcp_code_indexer/query_preprocessor.py +40 -40
mcp_code_indexer/server/mcp_server.py +785 -470
mcp_code_indexer/token_counter.py +54 -47
{mcp_code_indexer-3.1.4.dist-info → mcp_code_indexer-3.1.5.dist-info}/METADATA +3 -3
mcp_code_indexer-3.1.5.dist-info/RECORD +37 -0
mcp_code_indexer-3.1.4.dist-info/RECORD +0 -37
{mcp_code_indexer-3.1.4.dist-info → mcp_code_indexer-3.1.5.dist-info}/WHEEL +0 -0
{mcp_code_indexer-3.1.4.dist-info → mcp_code_indexer-3.1.5.dist-info}/entry_points.txt +0 -0
{mcp_code_indexer-3.1.4.dist-info → mcp_code_indexer-3.1.5.dist-info}/licenses/LICENSE +0 -0
{mcp_code_indexer-3.1.4.dist-info → mcp_code_indexer-3.1.5.dist-info}/top_level.txt +0 -0

mcp_code_indexer/database/exceptions.py CHANGED Viewed

@@ -12,15 +12,19 @@ from typing import Any, Dict, Optional
 class DatabaseError(Exception):
     """Base exception for all database-related errors."""
-    def __init__(self, message: str, operation_name: str = "",
-                 error_context: Optional[Dict[str, Any]] = None):
+    def __init__(
+        self,
+        message: str,
+        operation_name: str = "",
+        error_context: Optional[Dict[str, Any]] = None,
+    ):
         self.message = message
         self.operation_name = operation_name
         self.error_context = error_context or {}
         self.timestamp = datetime.now(timezone.utc)
         super().__init__(f"{operation_name}: {message}" if operation_name else message)
     def to_dict(self) -> Dict[str, Any]:
         """Convert exception to dictionary for structured logging."""
         return {
@@ -28,240 +32,283 @@ class DatabaseError(Exception):
             "message": self.message,
             "operation_name": self.operation_name,
             "timestamp": self.timestamp.isoformat(),
-            "error_context": self.error_context
+            "error_context": self.error_context,
         }
 class DatabaseLockError(DatabaseError):
     """Exception for SQLite database locking issues that are retryable."""
-    def __init__(self, message: str, retry_count: int = 0, operation_name: str = "",
-                 last_attempt: Optional[datetime] = None, lock_type: str = "unknown"):
+    def __init__(
+        self,
+        message: str,
+        retry_count: int = 0,
+        operation_name: str = "",
+        last_attempt: Optional[datetime] = None,
+        lock_type: str = "unknown",
+    ):
         self.retry_count = retry_count
         self.last_attempt = last_attempt or datetime.now(timezone.utc)
         self.lock_type = lock_type  # 'read', 'write', 'exclusive'
         error_context = {
             "retry_count": retry_count,
             "last_attempt": self.last_attempt.isoformat(),
             "lock_type": lock_type,
-            "retryable": True
+            "retryable": True,
         }
         super().__init__(message, operation_name, error_context)
 class DatabaseBusyError(DatabaseError):
     """Exception for SQLite database busy errors that are retryable."""
-    def __init__(self, message: str, operation_name: str = "",
-                 busy_timeout: float = 0.0, resource_type: str = "connection"):
+    def __init__(
+        self,
+        message: str,
+        operation_name: str = "",
+        busy_timeout: float = 0.0,
+        resource_type: str = "connection",
+    ):
         self.busy_timeout = busy_timeout
         self.resource_type = resource_type  # 'connection', 'transaction', 'table'
         error_context = {
             "busy_timeout": busy_timeout,
             "resource_type": resource_type,
-            "retryable": True
+            "retryable": True,
         }
         super().__init__(message, operation_name, error_context)
 class DatabaseConnectionError(DatabaseError):
     """Exception for database connection issues."""
-    def __init__(self, message: str, operation_name: str = "",
-                 connection_info: Optional[Dict[str, Any]] = None):
+    def __init__(
+        self,
+        message: str,
+        operation_name: str = "",
+        connection_info: Optional[Dict[str, Any]] = None,
+    ):
         self.connection_info = connection_info or {}
         error_context = {
             "connection_info": self.connection_info,
-            "retryable": False  # Connection errors usually indicate config issues
+            "retryable": False,  # Connection errors usually indicate config issues
         }
         super().__init__(message, operation_name, error_context)
 class DatabaseSchemaError(DatabaseError):
     """Exception for database schema-related errors."""
-    def __init__(self, message: str, operation_name: str = "",
-                 schema_version: Optional[str] = None, migration_info: Optional[Dict] = None):
+    def __init__(
+        self,
+        message: str,
+        operation_name: str = "",
+        schema_version: Optional[str] = None,
+        migration_info: Optional[Dict] = None,
+    ):
         self.schema_version = schema_version
         self.migration_info = migration_info or {}
         error_context = {
             "schema_version": schema_version,
             "migration_info": self.migration_info,
-            "retryable": False  # Schema errors require manual intervention
+            "retryable": False,  # Schema errors require manual intervention
         }
         super().__init__(message, operation_name, error_context)
 class DatabaseIntegrityError(DatabaseError):
     """Exception for database integrity constraint violations."""
-    def __init__(self, message: str, operation_name: str = "",
-                 constraint_type: str = "unknown", affected_table: str = ""):
-        self.constraint_type = constraint_type  # 'primary_key', 'foreign_key', 'unique', 'check'
+    def __init__(
+        self,
+        message: str,
+        operation_name: str = "",
+        constraint_type: str = "unknown",
+        affected_table: str = "",
+    ):
+        self.constraint_type = (
+            constraint_type  # 'primary_key', 'foreign_key', 'unique', 'check'
+        )
         self.affected_table = affected_table
         error_context = {
             "constraint_type": constraint_type,
             "affected_table": affected_table,
-            "retryable": False  # Integrity errors indicate data issues
+            "retryable": False,  # Integrity errors indicate data issues
         }
         super().__init__(message, operation_name, error_context)
 class DatabaseTimeoutError(DatabaseError):
     """Exception for database operation timeouts."""
-    def __init__(self, message: str, operation_name: str = "",
-                 timeout_seconds: float = 0.0, operation_type: str = "unknown"):
+    def __init__(
+        self,
+        message: str,
+        operation_name: str = "",
+        timeout_seconds: float = 0.0,
+        operation_type: str = "unknown",
+    ):
         self.timeout_seconds = timeout_seconds
         self.operation_type = operation_type  # 'read', 'write', 'transaction'
         error_context = {
             "timeout_seconds": timeout_seconds,
             "operation_type": operation_type,
-            "retryable": True  # Timeouts might be transient
+            "retryable": True,  # Timeouts might be transient
         }
         super().__init__(message, operation_name, error_context)
 def classify_sqlite_error(error: Exception, operation_name: str = "") -> DatabaseError:
     """
     Classify a raw SQLite error into our structured exception hierarchy.
     Args:
         error: Original exception from SQLite
         operation_name: Name of the operation that failed
     Returns:
         Appropriate DatabaseError subclass with context
     """
     error_message = str(error).lower()
     original_message = str(error)
     # Database locking errors
-    if any(msg in error_message for msg in [
-        "database is locked",
-        "sqlite_locked",
-        "attempt to write a readonly database"
-    ]):
-        lock_type = "write" if "write" in error_message or "readonly" in error_message else "read"
+    if any(
+        msg in error_message
+        for msg in [
+            "database is locked",
+            "sqlite_locked",
+            "attempt to write a readonly database",
+        ]
+    ):
+        lock_type = (
+            "write"
+            if "write" in error_message or "readonly" in error_message
+            else "read"
+        )
         return DatabaseLockError(
-            original_message,
-            operation_name=operation_name,
-            lock_type=lock_type
+            original_message, operation_name=operation_name, lock_type=lock_type
         )
     # Database busy errors
-    if any(msg in error_message for msg in [
-        "database is busy",
-        "sqlite_busy",
-        "cannot start a transaction within a transaction"
-    ]):
-        resource_type = "transaction" if "transaction" in error_message else "connection"
+    if any(
+        msg in error_message
+        for msg in [
+            "database is busy",
+            "sqlite_busy",
+            "cannot start a transaction within a transaction",
+        ]
+    ):
+        resource_type = (
+            "transaction" if "transaction" in error_message else "connection"
+        )
         return DatabaseBusyError(
-            original_message,
-            operation_name=operation_name,
-            resource_type=resource_type
+            original_message, operation_name=operation_name, resource_type=resource_type
         )
     # Connection errors
-    if any(msg in error_message for msg in [
-        "unable to open database",
-        "disk i/o error",
-        "database disk image is malformed",
-        "no such database"
-    ]):
-        return DatabaseConnectionError(
-            original_message,
-            operation_name=operation_name
-        )
+    if any(
+        msg in error_message
+        for msg in [
+            "unable to open database",
+            "disk i/o error",
+            "database disk image is malformed",
+            "no such database",
+        ]
+    ):
+        return DatabaseConnectionError(original_message, operation_name=operation_name)
     # Schema errors
-    if any(msg in error_message for msg in [
-        "no such table",
-        "no such column",
-        "table already exists",
-        "syntax error"
-    ]):
-        return DatabaseSchemaError(
-            original_message,
-            operation_name=operation_name
-        )
+    if any(
+        msg in error_message
+        for msg in [
+            "no such table",
+            "no such column",
+            "table already exists",
+            "syntax error",
+        ]
+    ):
+        return DatabaseSchemaError(original_message, operation_name=operation_name)
     # Integrity constraint errors
-    if any(msg in error_message for msg in [
-        "unique constraint failed",
-        "foreign key constraint failed",
-        "primary key constraint failed",
-        "check constraint failed"
-    ]):
+    if any(
+        msg in error_message
+        for msg in [
+            "unique constraint failed",
+            "foreign key constraint failed",
+            "primary key constraint failed",
+            "check constraint failed",
+        ]
+    ):
         constraint_type = "unknown"
         if "unique" in error_message:
             constraint_type = "unique"
         elif "foreign key" in error_message:
-            constraint_type = "foreign_key"
+            constraint_type = "foreign_key"
         elif "primary key" in error_message:
             constraint_type = "primary_key"
         elif "check" in error_message:
             constraint_type = "check"
         return DatabaseIntegrityError(
             original_message,
             operation_name=operation_name,
-            constraint_type=constraint_type
+            constraint_type=constraint_type,
         )
     # Default to generic database error
     return DatabaseError(
         original_message,
         operation_name=operation_name,
-        error_context={"original_error_type": type(error).__name__}
+        error_context={"original_error_type": type(error).__name__},
     )
 def is_retryable_error(error: Exception) -> bool:
     """
     Determine if an error is retryable based on our classification.
     Args:
         error: Exception to check
     Returns:
         True if the error should trigger a retry
     """
     if isinstance(error, DatabaseError):
         return error.error_context.get("retryable", False)
     # For raw exceptions, use simple classification
     error_message = str(error).lower()
     retryable_patterns = [
         "database is locked",
         "database is busy",
-        "sqlite_busy",
+        "sqlite_busy",
         "sqlite_locked",
-        "cannot start a transaction within a transaction"
+        "cannot start a transaction within a transaction",
     ]
     return any(pattern in error_message for pattern in retryable_patterns)
 def get_error_classification_stats(errors: list) -> Dict[str, Any]:
     """
     Analyze a list of errors and provide classification statistics.
     Args:
         errors: List of Exception objects to analyze
     Returns:
         Dictionary with error classification statistics
     """
@@ -270,34 +317,36 @@ def get_error_classification_stats(errors: list) -> Dict[str, Any]:
         "error_types": {},
         "retryable_count": 0,
         "non_retryable_count": 0,
-        "most_common_errors": {}
+        "most_common_errors": {},
     }
     error_messages = {}
     for error in errors:
         # Classify error
-        classified = classify_sqlite_error(error) if not isinstance(error, DatabaseError) else error
+        classified = (
+            classify_sqlite_error(error)
+            if not isinstance(error, DatabaseError)
+            else error
+        )
         error_type = type(classified).__name__
         # Count by type
         stats["error_types"][error_type] = stats["error_types"].get(error_type, 0) + 1
         # Count retryable vs non-retryable
         if is_retryable_error(classified):
             stats["retryable_count"] += 1
         else:
             stats["non_retryable_count"] += 1
         # Track common error messages
         message = str(error)
         error_messages[message] = error_messages.get(message, 0) + 1
     # Find most common error messages
     stats["most_common_errors"] = sorted(
-        error_messages.items(),
-        key=lambda x: x[1],
-        reverse=True
+        error_messages.items(), key=lambda x: x[1], reverse=True
     )[:5]
     return stats

mcp_code_indexer/database/models.py CHANGED Viewed

@@ -14,42 +14,57 @@ from pydantic import BaseModel, Field
 class Project(BaseModel):
     """
     Represents a tracked project/repository.
     Projects are identified by project name and folder paths,
     allowing tracking across different local copies without git coupling.
     """
     id: str = Field(..., description="Generated unique identifier")
     name: str = Field(..., description="User-provided project name")
-    aliases: List[str] = Field(default_factory=list, description="Alternative identifiers")
-    created: datetime = Field(default_factory=datetime.utcnow, description="Creation timestamp")
-    last_accessed: datetime = Field(default_factory=datetime.utcnow, description="Last access timestamp")
+    aliases: List[str] = Field(
+        default_factory=list, description="Alternative identifiers"
+    )
+    created: datetime = Field(
+        default_factory=datetime.utcnow, description="Creation timestamp"
+    )
+    last_accessed: datetime = Field(
+        default_factory=datetime.utcnow, description="Last access timestamp"
+    )
 class FileDescription(BaseModel):
     """
     Represents a file description within a project.
     Stores detailed summaries of file contents including purpose, components,
     and relationships to enable efficient codebase navigation.
     """
     id: Optional[int] = Field(None, description="Database ID")
     project_id: str = Field(..., description="Reference to project")
     file_path: str = Field(..., description="Relative path from project root")
     description: str = Field(..., description="Detailed content description")
     file_hash: Optional[str] = Field(None, description="SHA-256 of file contents")
-    last_modified: datetime = Field(default_factory=datetime.utcnow, description="Last update timestamp")
+    last_modified: datetime = Field(
+        default_factory=datetime.utcnow, description="Last update timestamp"
+    )
     version: int = Field(default=1, description="For optimistic concurrency control")
-    source_project_id: Optional[str] = Field(None, description="Source project if copied from upstream")
-    to_be_cleaned: Optional[int] = Field(None, description="UNIX timestamp for cleanup, NULL = active")
+    source_project_id: Optional[str] = Field(
+        None, description="Source project if copied from upstream"
+    )
+    to_be_cleaned: Optional[int] = Field(
+        None, description="UNIX timestamp for cleanup, NULL = active"
+    )
 class MergeConflict(BaseModel):
     """
     Represents a merge conflict between file descriptions.
     Used during branch merging when the same file has different descriptions
     in source and target branches.
     """
     id: Optional[int] = Field(None, description="Database ID")
     project_id: str = Field(..., description="Project identifier")
     file_path: str = Field(..., description="Path to conflicted file")
@@ -58,53 +73,65 @@ class MergeConflict(BaseModel):
     source_description: str = Field(..., description="Description from source branch")
     target_description: str = Field(..., description="Description from target branch")
     resolution: Optional[str] = Field(None, description="AI-provided resolution")
-    created: datetime = Field(default_factory=datetime.utcnow, description="Creation timestamp")
+    created: datetime = Field(
+        default_factory=datetime.utcnow, description="Creation timestamp"
+    )
 class ProjectOverview(BaseModel):
     """
     Represents a condensed, interpretive overview of an entire codebase.
     Stores a comprehensive narrative that captures architecture, components,
     relationships, and design patterns in a single document rather than
     individual file descriptions.
     """
     project_id: str = Field(..., description="Reference to project")
     overview: str = Field(..., description="Comprehensive codebase narrative")
-    last_modified: datetime = Field(default_factory=datetime.utcnow, description="Last update timestamp")
+    last_modified: datetime = Field(
+        default_factory=datetime.utcnow, description="Last update timestamp"
+    )
     total_files: int = Field(..., description="Number of files in codebase")
-    total_tokens: int = Field(..., description="Total tokens in individual descriptions")
+    total_tokens: int = Field(
+        ..., description="Total tokens in individual descriptions"
+    )
 class CodebaseOverview(BaseModel):
     """
     Represents a complete codebase structure with file descriptions.
     Provides hierarchical view of project files with token count information
     to help determine whether to use full overview or search-based approach.
     """
     project_name: str = Field(..., description="Project name")
     total_files: int = Field(..., description="Total number of tracked files")
     total_tokens: int = Field(..., description="Total token count for all descriptions")
     is_large: bool = Field(..., description="True if exceeds configured token limit")
     token_limit: int = Field(..., description="Current token limit setting")
-    structure: 'FolderNode' = Field(..., description="Hierarchical folder structure")
+    structure: "FolderNode" = Field(..., description="Hierarchical folder structure")
 class FolderNode(BaseModel):
     """
     Represents a folder in the codebase hierarchy.
     """
     name: str = Field(..., description="Folder name")
     path: str = Field(..., description="Full path from project root")
-    files: List['FileNode'] = Field(default_factory=list, description="Files in this folder")
-    folders: List['FolderNode'] = Field(default_factory=list, description="Subfolders")
+    files: List["FileNode"] = Field(
+        default_factory=list, description="Files in this folder"
+    )
+    folders: List["FolderNode"] = Field(default_factory=list, description="Subfolders")
 class FileNode(BaseModel):
     """
     Represents a file in the codebase hierarchy.
     """
     name: str = Field(..., description="File name")
     path: str = Field(..., description="Full path from project root")
     description: str = Field(..., description="File description")
@@ -114,6 +141,7 @@ class SearchResult(BaseModel):
     """
     Represents a search result with relevance scoring.
     """
     file_path: str = Field(..., description="Path to the matching file")
     description: str = Field(..., description="File description")
     relevance_score: float = Field(..., description="Search relevance score")
@@ -124,11 +152,16 @@ class CodebaseSizeInfo(BaseModel):
     """
     Information about codebase size and token usage.
     """
     total_tokens: int = Field(..., description="Total token count")
     is_large: bool = Field(..., description="Whether codebase exceeds token limit")
-    recommendation: str = Field(..., description="Recommended approach (use_search or use_overview)")
+    recommendation: str = Field(
+        ..., description="Recommended approach (use_search or use_overview)"
+    )
     token_limit: int = Field(..., description="Configured token limit")
-    cleaned_up_files: List[str] = Field(default_factory=list, description="Files removed during cleanup")
+    cleaned_up_files: List[str] = Field(
+        default_factory=list, description="Files removed during cleanup"
+    )
     cleaned_up_count: int = Field(default=0, description="Number of files cleaned up")
@@ -136,6 +169,7 @@ class WordFrequencyTerm(BaseModel):
     """
     Represents a term and its frequency from word analysis.
     """
     term: str = Field(..., description="The word/term")
     frequency: int = Field(..., description="Number of occurrences")
@@ -144,6 +178,7 @@ class WordFrequencyResult(BaseModel):
     """
     Results from word frequency analysis of file descriptions.
     """
     top_terms: List[WordFrequencyTerm] = Field(..., description="Top frequent terms")
     total_terms_analyzed: int = Field(..., description="Total terms processed")
     total_unique_terms: int = Field(..., description="Number of unique terms found")

mcp-code-indexer 3.1.4__py3-none-any.whl → 3.1.5__py3-none-any.whl

mcp-code-indexer 3.1.4py3-none-any.whl → 3.1.5py3-none-any.whl