PyPI - claude-mpm - Versions diffs - 4.13.2__py3-none-any.whl → 4.14.0__py3-none-any.whl - Mend

claude-mpm 4.13.2py3-none-any.whl → 4.14.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of claude-mpm might be problematic. Click here for more details.

Files changed (44) hide show

claude_mpm/VERSION +1 -1
claude_mpm/cli/__init__.py +10 -0
claude_mpm/cli/commands/local_deploy.py +536 -0
claude_mpm/cli/parsers/base_parser.py +7 -0
claude_mpm/cli/parsers/local_deploy_parser.py +227 -0
claude_mpm/config/model_config.py +428 -0
claude_mpm/core/interactive_session.py +3 -0
claude_mpm/services/core/interfaces/__init__.py +74 -2
claude_mpm/services/core/interfaces/health.py +172 -0
claude_mpm/services/core/interfaces/model.py +281 -0
claude_mpm/services/core/interfaces/process.py +372 -0
claude_mpm/services/core/interfaces/restart.py +307 -0
claude_mpm/services/core/interfaces/stability.py +260 -0
claude_mpm/services/core/models/__init__.py +35 -0
claude_mpm/services/core/models/health.py +189 -0
claude_mpm/services/core/models/process.py +258 -0
claude_mpm/services/core/models/restart.py +302 -0
claude_mpm/services/core/models/stability.py +264 -0
claude_mpm/services/local_ops/__init__.py +163 -0
claude_mpm/services/local_ops/crash_detector.py +257 -0
claude_mpm/services/local_ops/health_checks/__init__.py +28 -0
claude_mpm/services/local_ops/health_checks/http_check.py +223 -0
claude_mpm/services/local_ops/health_checks/process_check.py +235 -0
claude_mpm/services/local_ops/health_checks/resource_check.py +254 -0
claude_mpm/services/local_ops/health_manager.py +430 -0
claude_mpm/services/local_ops/log_monitor.py +396 -0
claude_mpm/services/local_ops/memory_leak_detector.py +294 -0
claude_mpm/services/local_ops/process_manager.py +595 -0
claude_mpm/services/local_ops/resource_monitor.py +331 -0
claude_mpm/services/local_ops/restart_manager.py +401 -0
claude_mpm/services/local_ops/restart_policy.py +387 -0
claude_mpm/services/local_ops/state_manager.py +371 -0
claude_mpm/services/local_ops/unified_manager.py +600 -0
claude_mpm/services/model/__init__.py +147 -0
claude_mpm/services/model/base_provider.py +365 -0
claude_mpm/services/model/claude_provider.py +412 -0
claude_mpm/services/model/model_router.py +453 -0
claude_mpm/services/model/ollama_provider.py +415 -0
{claude_mpm-4.13.2.dist-info → claude_mpm-4.14.0.dist-info}/METADATA +1 -1
{claude_mpm-4.13.2.dist-info → claude_mpm-4.14.0.dist-info}/RECORD +44 -12
{claude_mpm-4.13.2.dist-info → claude_mpm-4.14.0.dist-info}/WHEEL +0 -0
{claude_mpm-4.13.2.dist-info → claude_mpm-4.14.0.dist-info}/entry_points.txt +0 -0
{claude_mpm-4.13.2.dist-info → claude_mpm-4.14.0.dist-info}/licenses/LICENSE +0 -0
{claude_mpm-4.13.2.dist-info → claude_mpm-4.14.0.dist-info}/top_level.txt +0 -0

claude_mpm/services/core/interfaces/stability.py ADDED Viewed

@@ -0,0 +1,260 @@
+"""
+Stability Monitoring Interfaces for Claude MPM Framework
+==========================================================
+WHY: This module defines interfaces for proactive stability monitoring including
+memory leak detection, log monitoring, and resource exhaustion prevention.
+DESIGN DECISION: Separated from health checks to enable preventive monitoring
+that triggers actions BEFORE crashes occur. Provides early warning systems.
+ARCHITECTURE:
+- IMemoryLeakDetector: Interface for memory leak detection using trend analysis
+- ILogMonitor: Interface for real-time log file monitoring and pattern matching
+- IResourceMonitor: Interface for comprehensive resource usage tracking
+USAGE:
+    memory_detector = MemoryLeakDetector(leak_threshold_mb_per_minute=10.0)
+    log_monitor = LogMonitor(log_file="/var/log/app.log")
+    resource_monitor = ResourceMonitor(fd_threshold_percent=0.8)
+    # Integrate with health monitoring
+    health_manager.add_stability_monitors(
+        memory_detector=memory_detector,
+        log_monitor=log_monitor,
+        resource_monitor=resource_monitor,
+    )
+"""
+from abc import ABC, abstractmethod
+from typing import Callable, List
+from claude_mpm.services.core.models.stability import (
+    LogPatternMatch,
+    MemoryTrend,
+    ResourceUsage,
+)
+class IMemoryLeakDetector(ABC):
+    """
+    Interface for memory leak detection using trend analysis.
+    WHY: Memory leaks are a common cause of process crashes. Early detection
+    enables preemptive restarts BEFORE the OOM killer terminates the process.
+    DESIGN DECISION: Uses slope-based trend analysis over a rolling window
+    to detect sustained memory growth patterns, filtering out normal variations.
+    Algorithm:
+    1. Maintain rolling window of memory measurements (timestamp, memory_mb)
+    2. Calculate linear regression slope (MB per minute)
+    3. Detect leak if slope exceeds threshold (default: 10 MB/minute)
+    4. Trigger alert when leak detected and memory > 80% limit
+    Thread Safety: Implementations must be thread-safe for concurrent access.
+    """
+    @abstractmethod
+    def record_memory_usage(self, deployment_id: str, memory_mb: float) -> None:
+        """
+        Record a memory usage measurement.
+        WHY: Builds historical data for trend analysis. Should be called
+        periodically (e.g., every 30s) to collect sufficient data points.
+        Args:
+            deployment_id: Deployment identifier
+            memory_mb: Current memory usage in megabytes
+        """
+    @abstractmethod
+    def analyze_trend(self, deployment_id: str) -> MemoryTrend:
+        """
+        Analyze memory usage trend for leak detection.
+        WHY: Computes slope of memory usage over time to detect sustained
+        growth patterns characteristic of memory leaks.
+        Args:
+            deployment_id: Deployment identifier
+        Returns:
+            MemoryTrend with slope analysis and leak detection result
+        Algorithm:
+            slope_mb_per_minute = (recent_memory - old_memory) / time_delta_minutes
+            is_leaking = slope_mb_per_minute > threshold
+        """
+    @abstractmethod
+    def is_leaking(self, deployment_id: str) -> bool:
+        """
+        Check if deployment has a detected memory leak.
+        Returns:
+            True if leak detected (sustained memory growth)
+        """
+    @abstractmethod
+    def register_leak_callback(
+        self, callback: Callable[[str, MemoryTrend], None]
+    ) -> None:
+        """
+        Register callback for leak detection events.
+        Args:
+            callback: Function called with (deployment_id, trend) when leak detected
+        """
+class ILogMonitor(ABC):
+    """
+    Interface for real-time log file monitoring and pattern matching.
+    WHY: Application logs contain early warning signals (exceptions, OOM errors,
+    segfaults) that predict imminent crashes. Real-time monitoring enables
+    proactive intervention.
+    DESIGN DECISION: Uses watchdog library for efficient file system monitoring.
+    Avoids polling by receiving file modification events from the OS.
+    Pattern Matching:
+    - Regex-based patterns for flexibility
+    - Configurable patterns per deployment
+    - Built-in patterns for common errors:
+      * OutOfMemoryError
+      * Segmentation fault
+      * Exception: / Traceback
+      * Database connection errors
+      * Network timeouts
+    Thread Safety: Uses watchdog's thread-safe event handling.
+    """
+    @abstractmethod
+    def start_monitoring(self, log_file: str, deployment_id: str) -> None:
+        """
+        Start monitoring a log file for error patterns.
+        WHY: Begins watching the log file for new entries. Uses OS-level
+        file system events for efficiency.
+        Args:
+            log_file: Path to log file to monitor
+            deployment_id: Deployment identifier for callbacks
+        """
+    @abstractmethod
+    def stop_monitoring(self, deployment_id: str) -> None:
+        """
+        Stop monitoring a deployment's log file.
+        Args:
+            deployment_id: Deployment identifier
+        """
+    @abstractmethod
+    def add_pattern(self, pattern: str, severity: str = "ERROR") -> None:
+        """
+        Add an error pattern to monitor.
+        Args:
+            pattern: Regex pattern to match
+            severity: Error severity (ERROR, CRITICAL, WARNING)
+        """
+    @abstractmethod
+    def get_recent_matches(
+        self, deployment_id: str, limit: int = 10
+    ) -> List[LogPatternMatch]:
+        """
+        Get recent pattern matches for a deployment.
+        Args:
+            deployment_id: Deployment identifier
+            limit: Maximum number of matches to return
+        Returns:
+            List of LogPatternMatch objects, newest first
+        """
+    @abstractmethod
+    def register_match_callback(
+        self, callback: Callable[[str, LogPatternMatch], None]
+    ) -> None:
+        """
+        Register callback for pattern matches.
+        Args:
+            callback: Function called with (deployment_id, match) when pattern detected
+        """
+class IResourceMonitor(ABC):
+    """
+    Interface for comprehensive resource usage monitoring.
+    WHY: Resource exhaustion (file descriptors, threads, connections, disk space)
+    causes crashes and degradation. Monitoring enables preemptive action at 80%
+    thresholds before hitting hard limits.
+    DESIGN DECISION: Extends basic resource health checks with:
+    - Higher granularity (more frequent checks)
+    - Percentage-based thresholds (80% of ulimit)
+    - Trend analysis for growth rate
+    - Integration with restart manager for preemptive restarts
+    Resource Types:
+    1. File Descriptors: Critical for I/O operations (Unix: ulimit -n)
+    2. Threads: Memory and scheduling overhead
+    3. Network Connections: Socket exhaustion
+    4. Disk Space: Working directory availability
+    Thread Safety: Implementations must be thread-safe.
+    """
+    @abstractmethod
+    def check_resources(self, deployment_id: str) -> ResourceUsage:
+        """
+        Check resource usage for a deployment.
+        WHY: Provides comprehensive snapshot of resource consumption across
+        all monitored resource types.
+        Args:
+            deployment_id: Deployment identifier
+        Returns:
+            ResourceUsage with current metrics and critical status
+        Raises:
+            ValueError: If deployment not found
+        """
+    @abstractmethod
+    def is_critical(self, deployment_id: str) -> bool:
+        """
+        Check if any resource is at critical threshold (>80%).
+        Returns:
+            True if any resource exceeds 80% of limit
+        """
+    @abstractmethod
+    def register_critical_callback(
+        self, callback: Callable[[str, ResourceUsage], None]
+    ) -> None:
+        """
+        Register callback for critical resource usage.
+        Args:
+            callback: Function called with (deployment_id, usage) when critical
+        """
+__all__ = [
+    "ILogMonitor",
+    "IMemoryLeakDetector",
+    "IResourceMonitor",
+]

claude_mpm/services/core/models/__init__.py CHANGED Viewed

@@ -20,6 +20,25 @@ from .agent_config import (
     ConfigurationResult,
     ValidationResult,
 )
+from .process import (
+    PROTECTED_PORT_RANGES,
+    DeploymentState,
+    ProcessInfo,
+    ProcessStatus,
+    StartConfig,
+    is_port_protected,
+)
+from .restart import (
+    CircuitBreakerState,
+    RestartAttempt,
+    RestartConfig,
+    RestartHistory,
+)
+from .stability import (
+    LogPatternMatch,
+    MemoryTrend,
+    ResourceUsage,
+)
 from .toolchain import (
     ConfidenceLevel,
     DeploymentTarget,
@@ -43,4 +62,20 @@ __all__ = [  # noqa: RUF022 - Grouped by category with comments for clarity
     "ConfigurationResult",
     "ValidationResult",
     "ConfigurationPreview",
+    # Process management models
+    "ProcessStatus",
+    "DeploymentState",
+    "ProcessInfo",
+    "StartConfig",
+    "PROTECTED_PORT_RANGES",
+    "is_port_protected",
+    # Restart management models
+    "CircuitBreakerState",
+    "RestartAttempt",
+    "RestartHistory",
+    "RestartConfig",
+    # Stability monitoring models
+    "MemoryTrend",
+    "LogPatternMatch",
+    "ResourceUsage",
 ]

claude_mpm/services/core/models/health.py ADDED Viewed

@@ -0,0 +1,189 @@
+"""
+Health Monitoring Data Models for Claude MPM Framework
+=======================================================
+WHY: This module defines data structures for health monitoring operations,
+including health status, check results, and deployment health aggregations.
+DESIGN DECISION: Uses dataclasses for immutability and type safety. Provides
+clear health status enum and structured check results.
+ARCHITECTURE:
+- HealthStatus: Enum of health states (HEALTHY, DEGRADED, UNHEALTHY, UNKNOWN)
+- HealthCheckResult: Result of a single health check
+- DeploymentHealth: Aggregated health status for a deployment
+"""
+from dataclasses import asdict, dataclass, field
+from datetime import datetime
+from enum import Enum
+from typing import Any, Dict, List
+class HealthStatus(Enum):
+    """
+    Health status levels.
+    WHY: Provides granular health states to distinguish between different
+    levels of service degradation.
+    States:
+        HEALTHY: All checks passing, process operating normally
+        DEGRADED: Process running but with issues (high resource usage, slow responses)
+        UNHEALTHY: Critical failure (process dead, crashed, or unresponsive)
+        UNKNOWN: Cannot determine health status
+    """
+    HEALTHY = "healthy"
+    DEGRADED = "degraded"
+    UNHEALTHY = "unhealthy"
+    UNKNOWN = "unknown"
+    def is_operational(self) -> bool:
+        """Check if status indicates operational service."""
+        return self in (HealthStatus.HEALTHY, HealthStatus.DEGRADED)
+    def is_critical(self) -> bool:
+        """Check if status indicates critical failure."""
+        return self == HealthStatus.UNHEALTHY
+@dataclass
+class HealthCheckResult:
+    """
+    Result of a single health check.
+    WHY: Contains all information about a specific health check execution,
+    enabling detailed analysis and debugging of health issues.
+    Attributes:
+        status: HealthStatus of the check
+        check_type: Type of health check (http, process, resource)
+        message: Human-readable description of the result
+        details: Additional check-specific data
+        checked_at: Timestamp when check was performed
+    """
+    status: HealthStatus
+    check_type: str
+    message: str
+    details: Dict[str, Any] = field(default_factory=dict)
+    checked_at: datetime = field(default_factory=datetime.now)
+    def to_dict(self) -> Dict[str, Any]:
+        """
+        Convert to dictionary for JSON serialization.
+        Returns:
+            Dictionary representation with datetime converted to ISO format
+        """
+        data = asdict(self)
+        data["status"] = self.status.value
+        data["checked_at"] = self.checked_at.isoformat()
+        return data
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "HealthCheckResult":
+        """
+        Create HealthCheckResult from dictionary.
+        Args:
+            data: Dictionary from JSON deserialization
+        Returns:
+            HealthCheckResult instance
+        """
+        # Convert ISO string to datetime
+        if isinstance(data.get("checked_at"), str):
+            data["checked_at"] = datetime.fromisoformat(data["checked_at"])
+        # Convert status string to enum
+        if isinstance(data.get("status"), str):
+            data["status"] = HealthStatus(data["status"])
+        return cls(**data)
+@dataclass
+class DeploymentHealth:
+    """
+    Aggregated health status for a deployment.
+    WHY: Combines results from multiple health checks to provide a
+    comprehensive health assessment of a deployment.
+    Attributes:
+        deployment_id: Unique deployment identifier
+        overall_status: Aggregated health status
+        checks: List of individual health check results
+        last_check: Timestamp of the most recent health check
+    """
+    deployment_id: str
+    overall_status: HealthStatus
+    checks: List[HealthCheckResult] = field(default_factory=list)
+    last_check: datetime = field(default_factory=datetime.now)
+    def to_dict(self) -> Dict[str, Any]:
+        """
+        Convert to dictionary for JSON serialization.
+        Returns:
+            Dictionary representation
+        """
+        return {
+            "deployment_id": self.deployment_id,
+            "overall_status": self.overall_status.value,
+            "checks": [check.to_dict() for check in self.checks],
+            "last_check": self.last_check.isoformat(),
+        }
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "DeploymentHealth":
+        """
+        Create DeploymentHealth from dictionary.
+        Args:
+            data: Dictionary from JSON deserialization
+        Returns:
+            DeploymentHealth instance
+        """
+        # Convert ISO string to datetime
+        if isinstance(data.get("last_check"), str):
+            data["last_check"] = datetime.fromisoformat(data["last_check"])
+        # Convert status string to enum
+        if isinstance(data.get("overall_status"), str):
+            data["overall_status"] = HealthStatus(data["overall_status"])
+        # Convert check dicts to HealthCheckResult objects
+        if isinstance(data.get("checks"), list):
+            data["checks"] = [
+                HealthCheckResult.from_dict(check) if isinstance(check, dict) else check
+                for check in data["checks"]
+            ]
+        return cls(**data)
+    def get_check_by_type(self, check_type: str) -> HealthCheckResult | None:
+        """
+        Get the result of a specific check type.
+        Args:
+            check_type: Type of health check to retrieve
+        Returns:
+            HealthCheckResult if found, None otherwise
+        """
+        for check in self.checks:
+            if check.check_type == check_type:
+                return check
+        return None
+__all__ = [
+    "DeploymentHealth",
+    "HealthCheckResult",
+    "HealthStatus",
+]

claude-mpm 4.13.2__py3-none-any.whl → 4.14.0__py3-none-any.whl

Potentially problematic release.

claude-mpm 4.13.2py3-none-any.whl → 4.14.0py3-none-any.whl