claude-mpm 3.9.8__py3-none-any.whl → 3.9.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- claude_mpm/VERSION +1 -1
- claude_mpm/agents/base_agent.json +1 -1
- claude_mpm/agents/templates/memory_manager.json +155 -0
- claude_mpm/cli/__init__.py +18 -3
- claude_mpm/cli/commands/__init__.py +6 -1
- claude_mpm/cli/commands/cleanup.py +21 -1
- claude_mpm/cli/commands/mcp.py +967 -0
- claude_mpm/cli/commands/run_guarded.py +511 -0
- claude_mpm/cli/parser.py +156 -3
- claude_mpm/config/experimental_features.py +219 -0
- claude_mpm/config/memory_guardian_config.py +325 -0
- claude_mpm/config/memory_guardian_yaml.py +335 -0
- claude_mpm/constants.py +14 -0
- claude_mpm/core/memory_aware_runner.py +353 -0
- claude_mpm/hooks/claude_hooks/hook_handler.py +76 -19
- claude_mpm/models/state_models.py +433 -0
- claude_mpm/services/communication/__init__.py +2 -2
- claude_mpm/services/communication/socketio.py +18 -16
- claude_mpm/services/infrastructure/__init__.py +4 -1
- claude_mpm/services/infrastructure/context_preservation.py +537 -0
- claude_mpm/services/infrastructure/graceful_degradation.py +616 -0
- claude_mpm/services/infrastructure/health_monitor.py +775 -0
- claude_mpm/services/infrastructure/logging.py +3 -3
- claude_mpm/services/infrastructure/memory_dashboard.py +479 -0
- claude_mpm/services/infrastructure/memory_guardian.py +944 -0
- claude_mpm/services/infrastructure/restart_protection.py +642 -0
- claude_mpm/services/infrastructure/state_manager.py +774 -0
- claude_mpm/services/mcp_gateway/__init__.py +39 -23
- claude_mpm/services/mcp_gateway/core/__init__.py +2 -2
- claude_mpm/services/mcp_gateway/core/interfaces.py +10 -9
- claude_mpm/services/mcp_gateway/main.py +356 -0
- claude_mpm/services/mcp_gateway/manager.py +334 -0
- claude_mpm/services/mcp_gateway/registry/__init__.py +6 -3
- claude_mpm/services/mcp_gateway/registry/service_registry.py +393 -0
- claude_mpm/services/mcp_gateway/registry/tool_registry.py +477 -0
- claude_mpm/services/mcp_gateway/server/__init__.py +9 -3
- claude_mpm/services/mcp_gateway/server/mcp_gateway.py +431 -0
- claude_mpm/services/mcp_gateway/server/stdio_handler.py +373 -0
- claude_mpm/services/mcp_gateway/tools/__init__.py +16 -3
- claude_mpm/services/mcp_gateway/tools/base_adapter.py +496 -0
- claude_mpm/services/mcp_gateway/tools/document_summarizer.py +729 -0
- claude_mpm/services/mcp_gateway/tools/hello_world.py +551 -0
- claude_mpm/services/ticket_manager.py +8 -8
- claude_mpm/services/ticket_manager_di.py +5 -5
- claude_mpm/storage/__init__.py +9 -0
- claude_mpm/storage/state_storage.py +556 -0
- claude_mpm/utils/file_utils.py +293 -0
- claude_mpm/utils/platform_memory.py +524 -0
- claude_mpm/utils/subprocess_utils.py +305 -0
- {claude_mpm-3.9.8.dist-info → claude_mpm-3.9.11.dist-info}/METADATA +27 -2
- {claude_mpm-3.9.8.dist-info → claude_mpm-3.9.11.dist-info}/RECORD +56 -32
- claude_mpm/agents/templates/.claude-mpm/memories/README.md +0 -36
- claude_mpm/agents/templates/.claude-mpm/memories/engineer_agent.md +0 -39
- claude_mpm/agents/templates/.claude-mpm/memories/qa_agent.md +0 -38
- claude_mpm/agents/templates/.claude-mpm/memories/research_agent.md +0 -39
- claude_mpm/agents/templates/.claude-mpm/memories/version_control_agent.md +0 -38
- /claude_mpm/agents/templates/{research_memory_efficient.json → backup/research_memory_efficient.json} +0 -0
- {claude_mpm-3.9.8.dist-info → claude_mpm-3.9.11.dist-info}/WHEEL +0 -0
- {claude_mpm-3.9.8.dist-info → claude_mpm-3.9.11.dist-info}/entry_points.txt +0 -0
- {claude_mpm-3.9.8.dist-info → claude_mpm-3.9.11.dist-info}/licenses/LICENSE +0 -0
- {claude_mpm-3.9.8.dist-info → claude_mpm-3.9.11.dist-info}/top_level.txt +0 -0
| @@ -0,0 +1,775 @@ | |
| 1 | 
            +
            """Health monitoring service for Memory Guardian system.
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            Provides comprehensive health checks including system resources, process health,
         | 
| 4 | 
            +
            and integration with existing monitoring infrastructure.
         | 
| 5 | 
            +
            """
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            import asyncio
         | 
| 8 | 
            +
            import os
         | 
| 9 | 
            +
            import platform
         | 
| 10 | 
            +
            import psutil
         | 
| 11 | 
            +
            import shutil
         | 
| 12 | 
            +
            import socket
         | 
| 13 | 
            +
            import time
         | 
| 14 | 
            +
            from dataclasses import dataclass
         | 
| 15 | 
            +
            from datetime import datetime
         | 
| 16 | 
            +
            from enum import Enum
         | 
| 17 | 
            +
            from pathlib import Path
         | 
| 18 | 
            +
            from typing import Dict, List, Optional, Any, Callable
         | 
| 19 | 
            +
             | 
| 20 | 
            +
            from claude_mpm.services.core.base import BaseService
         | 
| 21 | 
            +
             | 
| 22 | 
            +
             | 
| 23 | 
            +
            class HealthStatus(Enum):
         | 
| 24 | 
            +
                """Health check status levels."""
         | 
| 25 | 
            +
                HEALTHY = "healthy"
         | 
| 26 | 
            +
                DEGRADED = "degraded"
         | 
| 27 | 
            +
                UNHEALTHY = "unhealthy"
         | 
| 28 | 
            +
                CRITICAL = "critical"
         | 
| 29 | 
            +
             | 
| 30 | 
            +
             | 
| 31 | 
            +
            class CheckType(Enum):
         | 
| 32 | 
            +
                """Types of health checks."""
         | 
| 33 | 
            +
                SYSTEM_RESOURCES = "system_resources"
         | 
| 34 | 
            +
                CPU_USAGE = "cpu_usage"
         | 
| 35 | 
            +
                MEMORY_USAGE = "memory_usage"
         | 
| 36 | 
            +
                DISK_SPACE = "disk_space"
         | 
| 37 | 
            +
                NETWORK = "network"
         | 
| 38 | 
            +
                PROCESS = "process"
         | 
| 39 | 
            +
                DEPENDENCIES = "dependencies"
         | 
| 40 | 
            +
                CUSTOM = "custom"
         | 
| 41 | 
            +
             | 
| 42 | 
            +
             | 
| 43 | 
            +
            @dataclass
         | 
| 44 | 
            +
            class HealthCheck:
         | 
| 45 | 
            +
                """Individual health check result."""
         | 
| 46 | 
            +
                name: str
         | 
| 47 | 
            +
                check_type: CheckType
         | 
| 48 | 
            +
                status: HealthStatus
         | 
| 49 | 
            +
                message: str
         | 
| 50 | 
            +
                details: Dict[str, Any]
         | 
| 51 | 
            +
                timestamp: float
         | 
| 52 | 
            +
                duration_ms: float
         | 
| 53 | 
            +
                
         | 
| 54 | 
            +
                def to_dict(self) -> Dict[str, Any]:
         | 
| 55 | 
            +
                    """Convert to dictionary."""
         | 
| 56 | 
            +
                    return {
         | 
| 57 | 
            +
                        'name': self.name,
         | 
| 58 | 
            +
                        'type': self.check_type.value,
         | 
| 59 | 
            +
                        'status': self.status.value,
         | 
| 60 | 
            +
                        'message': self.message,
         | 
| 61 | 
            +
                        'details': self.details,
         | 
| 62 | 
            +
                        'timestamp': self.timestamp,
         | 
| 63 | 
            +
                        'timestamp_iso': datetime.fromtimestamp(self.timestamp).isoformat(),
         | 
| 64 | 
            +
                        'duration_ms': round(self.duration_ms, 2)
         | 
| 65 | 
            +
                    }
         | 
| 66 | 
            +
             | 
| 67 | 
            +
             | 
| 68 | 
            +
            @dataclass
         | 
| 69 | 
            +
            class SystemHealth:
         | 
| 70 | 
            +
                """Overall system health status."""
         | 
| 71 | 
            +
                status: HealthStatus
         | 
| 72 | 
            +
                checks: List[HealthCheck]
         | 
| 73 | 
            +
                timestamp: float
         | 
| 74 | 
            +
                
         | 
| 75 | 
            +
                @property
         | 
| 76 | 
            +
                def healthy_checks(self) -> int:
         | 
| 77 | 
            +
                    """Count of healthy checks."""
         | 
| 78 | 
            +
                    return sum(1 for c in self.checks if c.status == HealthStatus.HEALTHY)
         | 
| 79 | 
            +
                
         | 
| 80 | 
            +
                @property
         | 
| 81 | 
            +
                def total_checks(self) -> int:
         | 
| 82 | 
            +
                    """Total number of checks."""
         | 
| 83 | 
            +
                    return len(self.checks)
         | 
| 84 | 
            +
                
         | 
| 85 | 
            +
                @property
         | 
| 86 | 
            +
                def health_percentage(self) -> float:
         | 
| 87 | 
            +
                    """Health percentage (0-100)."""
         | 
| 88 | 
            +
                    if self.total_checks == 0:
         | 
| 89 | 
            +
                        return 0.0
         | 
| 90 | 
            +
                    return (self.healthy_checks / self.total_checks) * 100
         | 
| 91 | 
            +
                
         | 
| 92 | 
            +
                def to_dict(self) -> Dict[str, Any]:
         | 
| 93 | 
            +
                    """Convert to dictionary."""
         | 
| 94 | 
            +
                    return {
         | 
| 95 | 
            +
                        'status': self.status.value,
         | 
| 96 | 
            +
                        'health_percentage': round(self.health_percentage, 1),
         | 
| 97 | 
            +
                        'healthy_checks': self.healthy_checks,
         | 
| 98 | 
            +
                        'total_checks': self.total_checks,
         | 
| 99 | 
            +
                        'checks': [c.to_dict() for c in self.checks],
         | 
| 100 | 
            +
                        'timestamp': self.timestamp,
         | 
| 101 | 
            +
                        'timestamp_iso': datetime.fromtimestamp(self.timestamp).isoformat()
         | 
| 102 | 
            +
                    }
         | 
| 103 | 
            +
             | 
| 104 | 
            +
             | 
| 105 | 
            +
            class HealthMonitor(BaseService):
         | 
| 106 | 
            +
                """Service for monitoring system and application health."""
         | 
| 107 | 
            +
                
         | 
| 108 | 
            +
                def __init__(
         | 
| 109 | 
            +
                    self,
         | 
| 110 | 
            +
                    cpu_threshold_percent: float = 80.0,
         | 
| 111 | 
            +
                    memory_threshold_percent: float = 90.0,
         | 
| 112 | 
            +
                    disk_threshold_percent: float = 90.0,
         | 
| 113 | 
            +
                    min_disk_space_gb: float = 1.0,
         | 
| 114 | 
            +
                    check_interval_seconds: int = 30,
         | 
| 115 | 
            +
                    state_dir: Optional[Path] = None
         | 
| 116 | 
            +
                ):
         | 
| 117 | 
            +
                    """Initialize health monitor service.
         | 
| 118 | 
            +
                    
         | 
| 119 | 
            +
                    Args:
         | 
| 120 | 
            +
                        cpu_threshold_percent: CPU usage threshold for degradation
         | 
| 121 | 
            +
                        memory_threshold_percent: Memory usage threshold for degradation
         | 
| 122 | 
            +
                        disk_threshold_percent: Disk usage threshold for degradation
         | 
| 123 | 
            +
                        min_disk_space_gb: Minimum required disk space in GB
         | 
| 124 | 
            +
                        check_interval_seconds: Interval between health checks
         | 
| 125 | 
            +
                        state_dir: Directory for state files
         | 
| 126 | 
            +
                    """
         | 
| 127 | 
            +
                    super().__init__("HealthMonitor")
         | 
| 128 | 
            +
                    
         | 
| 129 | 
            +
                    # Configuration
         | 
| 130 | 
            +
                    self.cpu_threshold = cpu_threshold_percent
         | 
| 131 | 
            +
                    self.memory_threshold = memory_threshold_percent
         | 
| 132 | 
            +
                    self.disk_threshold = disk_threshold_percent
         | 
| 133 | 
            +
                    self.min_disk_space_gb = min_disk_space_gb
         | 
| 134 | 
            +
                    self.check_interval = check_interval_seconds
         | 
| 135 | 
            +
                    self.state_dir = state_dir or Path.home() / ".claude-mpm" / "health"
         | 
| 136 | 
            +
                    
         | 
| 137 | 
            +
                    # Health check registry
         | 
| 138 | 
            +
                    self.health_checks: Dict[str, Callable] = {}
         | 
| 139 | 
            +
                    self.custom_checks: List[Callable] = []
         | 
| 140 | 
            +
                    
         | 
| 141 | 
            +
                    # State tracking
         | 
| 142 | 
            +
                    self.last_check: Optional[SystemHealth] = None
         | 
| 143 | 
            +
                    self.check_history: List[SystemHealth] = []
         | 
| 144 | 
            +
                    self.monitoring_task: Optional[asyncio.Task] = None
         | 
| 145 | 
            +
                    self.monitoring_active = False
         | 
| 146 | 
            +
                    
         | 
| 147 | 
            +
                    # Process monitoring
         | 
| 148 | 
            +
                    self.monitored_pid: Optional[int] = None
         | 
| 149 | 
            +
                    self.monitored_process: Optional[psutil.Process] = None
         | 
| 150 | 
            +
                    
         | 
| 151 | 
            +
                    # Register default health checks
         | 
| 152 | 
            +
                    self._register_default_checks()
         | 
| 153 | 
            +
                    
         | 
| 154 | 
            +
                    self.log_info(
         | 
| 155 | 
            +
                        f"Health monitor initialized: "
         | 
| 156 | 
            +
                        f"CPU={cpu_threshold_percent}%, "
         | 
| 157 | 
            +
                        f"Memory={memory_threshold_percent}%, "
         | 
| 158 | 
            +
                        f"Disk={disk_threshold_percent}%"
         | 
| 159 | 
            +
                    )
         | 
| 160 | 
            +
                
         | 
| 161 | 
            +
                async def initialize(self) -> bool:
         | 
| 162 | 
            +
                    """Initialize the health monitor service.
         | 
| 163 | 
            +
                    
         | 
| 164 | 
            +
                    Returns:
         | 
| 165 | 
            +
                        True if initialization successful
         | 
| 166 | 
            +
                    """
         | 
| 167 | 
            +
                    try:
         | 
| 168 | 
            +
                        self.log_info("Initializing health monitor service")
         | 
| 169 | 
            +
                        
         | 
| 170 | 
            +
                        # Create state directory
         | 
| 171 | 
            +
                        self.state_dir.mkdir(parents=True, exist_ok=True)
         | 
| 172 | 
            +
                        
         | 
| 173 | 
            +
                        # Verify system capabilities
         | 
| 174 | 
            +
                        if not self._verify_system_capabilities():
         | 
| 175 | 
            +
                            self.log_warning("Some system capabilities unavailable, running in degraded mode")
         | 
| 176 | 
            +
                        
         | 
| 177 | 
            +
                        # Start monitoring if configured
         | 
| 178 | 
            +
                        if self.check_interval > 0:
         | 
| 179 | 
            +
                            self.start_monitoring()
         | 
| 180 | 
            +
                        
         | 
| 181 | 
            +
                        self._initialized = True
         | 
| 182 | 
            +
                        self.log_info("Health monitor service initialized successfully")
         | 
| 183 | 
            +
                        return True
         | 
| 184 | 
            +
                        
         | 
| 185 | 
            +
                    except Exception as e:
         | 
| 186 | 
            +
                        self.log_error(f"Failed to initialize health monitor: {e}")
         | 
| 187 | 
            +
                        return False
         | 
| 188 | 
            +
                
         | 
| 189 | 
            +
                async def shutdown(self) -> None:
         | 
| 190 | 
            +
                    """Shutdown the health monitor service."""
         | 
| 191 | 
            +
                    try:
         | 
| 192 | 
            +
                        self.log_info("Shutting down health monitor service")
         | 
| 193 | 
            +
                        
         | 
| 194 | 
            +
                        # Stop monitoring
         | 
| 195 | 
            +
                        await self.stop_monitoring()
         | 
| 196 | 
            +
                        
         | 
| 197 | 
            +
                        self._shutdown = True
         | 
| 198 | 
            +
                        self.log_info("Health monitor service shutdown complete")
         | 
| 199 | 
            +
                        
         | 
| 200 | 
            +
                    except Exception as e:
         | 
| 201 | 
            +
                        self.log_error(f"Error during health monitor shutdown: {e}")
         | 
| 202 | 
            +
                
         | 
| 203 | 
            +
                def set_monitored_process(self, pid: int) -> bool:
         | 
| 204 | 
            +
                    """Set the process to monitor.
         | 
| 205 | 
            +
                    
         | 
| 206 | 
            +
                    Args:
         | 
| 207 | 
            +
                        pid: Process ID to monitor
         | 
| 208 | 
            +
                        
         | 
| 209 | 
            +
                    Returns:
         | 
| 210 | 
            +
                        True if process found and set
         | 
| 211 | 
            +
                    """
         | 
| 212 | 
            +
                    try:
         | 
| 213 | 
            +
                        self.monitored_process = psutil.Process(pid)
         | 
| 214 | 
            +
                        self.monitored_pid = pid
         | 
| 215 | 
            +
                        self.log_info(f"Monitoring process {pid}")
         | 
| 216 | 
            +
                        return True
         | 
| 217 | 
            +
                    except psutil.NoSuchProcess:
         | 
| 218 | 
            +
                        self.log_error(f"Process {pid} not found")
         | 
| 219 | 
            +
                        return False
         | 
| 220 | 
            +
                    except Exception as e:
         | 
| 221 | 
            +
                        self.log_error(f"Error setting monitored process: {e}")
         | 
| 222 | 
            +
                        return False
         | 
| 223 | 
            +
                
         | 
| 224 | 
            +
                async def check_health(self) -> SystemHealth:
         | 
| 225 | 
            +
                    """Perform all health checks.
         | 
| 226 | 
            +
                    
         | 
| 227 | 
            +
                    Returns:
         | 
| 228 | 
            +
                        SystemHealth object with results
         | 
| 229 | 
            +
                    """
         | 
| 230 | 
            +
                    checks = []
         | 
| 231 | 
            +
                    start_time = time.time()
         | 
| 232 | 
            +
                    
         | 
| 233 | 
            +
                    # Run system resource checks
         | 
| 234 | 
            +
                    checks.append(await self._check_cpu_usage())
         | 
| 235 | 
            +
                    checks.append(await self._check_memory_usage())
         | 
| 236 | 
            +
                    checks.append(await self._check_disk_space())
         | 
| 237 | 
            +
                    
         | 
| 238 | 
            +
                    # Run network check
         | 
| 239 | 
            +
                    checks.append(await self._check_network())
         | 
| 240 | 
            +
                    
         | 
| 241 | 
            +
                    # Run process check if configured
         | 
| 242 | 
            +
                    if self.monitored_process:
         | 
| 243 | 
            +
                        checks.append(await self._check_process_health())
         | 
| 244 | 
            +
                    
         | 
| 245 | 
            +
                    # Run dependency checks
         | 
| 246 | 
            +
                    checks.append(await self._check_dependencies())
         | 
| 247 | 
            +
                    
         | 
| 248 | 
            +
                    # Run custom checks
         | 
| 249 | 
            +
                    for check_func in self.custom_checks:
         | 
| 250 | 
            +
                        try:
         | 
| 251 | 
            +
                            result = await check_func()
         | 
| 252 | 
            +
                            if isinstance(result, HealthCheck):
         | 
| 253 | 
            +
                                checks.append(result)
         | 
| 254 | 
            +
                        except Exception as e:
         | 
| 255 | 
            +
                            self.log_error(f"Custom health check failed: {e}")
         | 
| 256 | 
            +
                    
         | 
| 257 | 
            +
                    # Determine overall status
         | 
| 258 | 
            +
                    status = self._determine_overall_status(checks)
         | 
| 259 | 
            +
                    
         | 
| 260 | 
            +
                    # Create health report
         | 
| 261 | 
            +
                    health = SystemHealth(
         | 
| 262 | 
            +
                        status=status,
         | 
| 263 | 
            +
                        checks=checks,
         | 
| 264 | 
            +
                        timestamp=start_time
         | 
| 265 | 
            +
                    )
         | 
| 266 | 
            +
                    
         | 
| 267 | 
            +
                    # Update state
         | 
| 268 | 
            +
                    self.last_check = health
         | 
| 269 | 
            +
                    self.check_history.append(health)
         | 
| 270 | 
            +
                    
         | 
| 271 | 
            +
                    # Trim history
         | 
| 272 | 
            +
                    if len(self.check_history) > 100:
         | 
| 273 | 
            +
                        self.check_history = self.check_history[-100:]
         | 
| 274 | 
            +
                    
         | 
| 275 | 
            +
                    return health
         | 
| 276 | 
            +
                
         | 
| 277 | 
            +
                async def validate_before_start(self) -> tuple[bool, str]:
         | 
| 278 | 
            +
                    """Validate system resources before starting monitoring.
         | 
| 279 | 
            +
                    
         | 
| 280 | 
            +
                    Returns:
         | 
| 281 | 
            +
                        Tuple of (valid, message)
         | 
| 282 | 
            +
                    """
         | 
| 283 | 
            +
                    # Check available memory
         | 
| 284 | 
            +
                    mem = psutil.virtual_memory()
         | 
| 285 | 
            +
                    if mem.available < 500 * 1024 * 1024:  # Less than 500MB
         | 
| 286 | 
            +
                        return False, f"Insufficient memory: {mem.available / (1024*1024):.0f}MB available"
         | 
| 287 | 
            +
                    
         | 
| 288 | 
            +
                    # Check disk space
         | 
| 289 | 
            +
                    disk = shutil.disk_usage(self.state_dir)
         | 
| 290 | 
            +
                    if disk.free < self.min_disk_space_gb * 1024 * 1024 * 1024:
         | 
| 291 | 
            +
                        return False, f"Insufficient disk space: {disk.free / (1024*1024*1024):.1f}GB available"
         | 
| 292 | 
            +
                    
         | 
| 293 | 
            +
                    # Check CPU load
         | 
| 294 | 
            +
                    cpu_percent = psutil.cpu_percent(interval=0.1)
         | 
| 295 | 
            +
                    if cpu_percent > 95:
         | 
| 296 | 
            +
                        return False, f"CPU overloaded: {cpu_percent:.0f}% usage"
         | 
| 297 | 
            +
                    
         | 
| 298 | 
            +
                    return True, "System resources adequate"
         | 
| 299 | 
            +
                
         | 
| 300 | 
            +
                def start_monitoring(self) -> None:
         | 
| 301 | 
            +
                    """Start continuous health monitoring."""
         | 
| 302 | 
            +
                    if self.monitoring_active:
         | 
| 303 | 
            +
                        self.log_warning("Health monitoring already active")
         | 
| 304 | 
            +
                        return
         | 
| 305 | 
            +
                    
         | 
| 306 | 
            +
                    self.monitoring_active = True
         | 
| 307 | 
            +
                    self.monitoring_task = asyncio.create_task(self._monitoring_loop())
         | 
| 308 | 
            +
                    self.log_info("Started health monitoring")
         | 
| 309 | 
            +
                
         | 
| 310 | 
            +
                async def stop_monitoring(self) -> None:
         | 
| 311 | 
            +
                    """Stop continuous health monitoring."""
         | 
| 312 | 
            +
                    if not self.monitoring_active:
         | 
| 313 | 
            +
                        return
         | 
| 314 | 
            +
                    
         | 
| 315 | 
            +
                    self.monitoring_active = False
         | 
| 316 | 
            +
                    
         | 
| 317 | 
            +
                    if self.monitoring_task:
         | 
| 318 | 
            +
                        self.monitoring_task.cancel()
         | 
| 319 | 
            +
                        try:
         | 
| 320 | 
            +
                            await self.monitoring_task
         | 
| 321 | 
            +
                        except asyncio.CancelledError:
         | 
| 322 | 
            +
                            pass
         | 
| 323 | 
            +
                        self.monitoring_task = None
         | 
| 324 | 
            +
                    
         | 
| 325 | 
            +
                    self.log_info("Stopped health monitoring")
         | 
| 326 | 
            +
                
         | 
| 327 | 
            +
                def register_health_check(self, name: str, check_func: Callable) -> None:
         | 
| 328 | 
            +
                    """Register a custom health check.
         | 
| 329 | 
            +
                    
         | 
| 330 | 
            +
                    Args:
         | 
| 331 | 
            +
                        name: Name of the health check
         | 
| 332 | 
            +
                        check_func: Async function that returns HealthCheck
         | 
| 333 | 
            +
                    """
         | 
| 334 | 
            +
                    self.custom_checks.append(check_func)
         | 
| 335 | 
            +
                    self.log_info(f"Registered custom health check: {name}")
         | 
| 336 | 
            +
                
         | 
| 337 | 
            +
                def get_health_status(self) -> Optional[SystemHealth]:
         | 
| 338 | 
            +
                    """Get the last health check result.
         | 
| 339 | 
            +
                    
         | 
| 340 | 
            +
                    Returns:
         | 
| 341 | 
            +
                        Last SystemHealth or None
         | 
| 342 | 
            +
                    """
         | 
| 343 | 
            +
                    return self.last_check
         | 
| 344 | 
            +
                
         | 
| 345 | 
            +
                def get_health_history(self, limit: int = 10) -> List[SystemHealth]:
         | 
| 346 | 
            +
                    """Get health check history.
         | 
| 347 | 
            +
                    
         | 
| 348 | 
            +
                    Args:
         | 
| 349 | 
            +
                        limit: Maximum number of records to return
         | 
| 350 | 
            +
                        
         | 
| 351 | 
            +
                    Returns:
         | 
| 352 | 
            +
                        List of SystemHealth objects
         | 
| 353 | 
            +
                    """
         | 
| 354 | 
            +
                    return self.check_history[-limit:]
         | 
| 355 | 
            +
                
         | 
| 356 | 
            +
                async def _check_cpu_usage(self) -> HealthCheck:
         | 
| 357 | 
            +
                    """Check CPU usage."""
         | 
| 358 | 
            +
                    start = time.time()
         | 
| 359 | 
            +
                    
         | 
| 360 | 
            +
                    try:
         | 
| 361 | 
            +
                        cpu_percent = psutil.cpu_percent(interval=0.1)
         | 
| 362 | 
            +
                        cpu_count = psutil.cpu_count()
         | 
| 363 | 
            +
                        
         | 
| 364 | 
            +
                        if cpu_percent >= 95:
         | 
| 365 | 
            +
                            status = HealthStatus.CRITICAL
         | 
| 366 | 
            +
                            message = f"CPU critically high: {cpu_percent:.1f}%"
         | 
| 367 | 
            +
                        elif cpu_percent >= self.cpu_threshold:
         | 
| 368 | 
            +
                            status = HealthStatus.DEGRADED
         | 
| 369 | 
            +
                            message = f"CPU usage high: {cpu_percent:.1f}%"
         | 
| 370 | 
            +
                        else:
         | 
| 371 | 
            +
                            status = HealthStatus.HEALTHY
         | 
| 372 | 
            +
                            message = f"CPU usage normal: {cpu_percent:.1f}%"
         | 
| 373 | 
            +
                        
         | 
| 374 | 
            +
                        return HealthCheck(
         | 
| 375 | 
            +
                            name="CPU Usage",
         | 
| 376 | 
            +
                            check_type=CheckType.CPU_USAGE,
         | 
| 377 | 
            +
                            status=status,
         | 
| 378 | 
            +
                            message=message,
         | 
| 379 | 
            +
                            details={
         | 
| 380 | 
            +
                                'cpu_percent': cpu_percent,
         | 
| 381 | 
            +
                                'cpu_count': cpu_count,
         | 
| 382 | 
            +
                                'threshold': self.cpu_threshold
         | 
| 383 | 
            +
                            },
         | 
| 384 | 
            +
                            timestamp=start,
         | 
| 385 | 
            +
                            duration_ms=(time.time() - start) * 1000
         | 
| 386 | 
            +
                        )
         | 
| 387 | 
            +
                        
         | 
| 388 | 
            +
                    except Exception as e:
         | 
| 389 | 
            +
                        return HealthCheck(
         | 
| 390 | 
            +
                            name="CPU Usage",
         | 
| 391 | 
            +
                            check_type=CheckType.CPU_USAGE,
         | 
| 392 | 
            +
                            status=HealthStatus.UNHEALTHY,
         | 
| 393 | 
            +
                            message=f"Failed to check CPU: {e}",
         | 
| 394 | 
            +
                            details={'error': str(e)},
         | 
| 395 | 
            +
                            timestamp=start,
         | 
| 396 | 
            +
                            duration_ms=(time.time() - start) * 1000
         | 
| 397 | 
            +
                        )
         | 
| 398 | 
            +
                
         | 
| 399 | 
            +
                async def _check_memory_usage(self) -> HealthCheck:
         | 
| 400 | 
            +
                    """Check memory usage."""
         | 
| 401 | 
            +
                    start = time.time()
         | 
| 402 | 
            +
                    
         | 
| 403 | 
            +
                    try:
         | 
| 404 | 
            +
                        mem = psutil.virtual_memory()
         | 
| 405 | 
            +
                        
         | 
| 406 | 
            +
                        if mem.percent >= 95:
         | 
| 407 | 
            +
                            status = HealthStatus.CRITICAL
         | 
| 408 | 
            +
                            message = f"Memory critically high: {mem.percent:.1f}%"
         | 
| 409 | 
            +
                        elif mem.percent >= self.memory_threshold:
         | 
| 410 | 
            +
                            status = HealthStatus.DEGRADED
         | 
| 411 | 
            +
                            message = f"Memory usage high: {mem.percent:.1f}%"
         | 
| 412 | 
            +
                        else:
         | 
| 413 | 
            +
                            status = HealthStatus.HEALTHY
         | 
| 414 | 
            +
                            message = f"Memory usage normal: {mem.percent:.1f}%"
         | 
| 415 | 
            +
                        
         | 
| 416 | 
            +
                        return HealthCheck(
         | 
| 417 | 
            +
                            name="Memory Usage",
         | 
| 418 | 
            +
                            check_type=CheckType.MEMORY_USAGE,
         | 
| 419 | 
            +
                            status=status,
         | 
| 420 | 
            +
                            message=message,
         | 
| 421 | 
            +
                            details={
         | 
| 422 | 
            +
                                'memory_percent': mem.percent,
         | 
| 423 | 
            +
                                'available_mb': mem.available / (1024 * 1024),
         | 
| 424 | 
            +
                                'total_mb': mem.total / (1024 * 1024),
         | 
| 425 | 
            +
                                'threshold': self.memory_threshold
         | 
| 426 | 
            +
                            },
         | 
| 427 | 
            +
                            timestamp=start,
         | 
| 428 | 
            +
                            duration_ms=(time.time() - start) * 1000
         | 
| 429 | 
            +
                        )
         | 
| 430 | 
            +
                        
         | 
| 431 | 
            +
                    except Exception as e:
         | 
| 432 | 
            +
                        return HealthCheck(
         | 
| 433 | 
            +
                            name="Memory Usage",
         | 
| 434 | 
            +
                            check_type=CheckType.MEMORY_USAGE,
         | 
| 435 | 
            +
                            status=HealthStatus.UNHEALTHY,
         | 
| 436 | 
            +
                            message=f"Failed to check memory: {e}",
         | 
| 437 | 
            +
                            details={'error': str(e)},
         | 
| 438 | 
            +
                            timestamp=start,
         | 
| 439 | 
            +
                            duration_ms=(time.time() - start) * 1000
         | 
| 440 | 
            +
                        )
         | 
| 441 | 
            +
                
         | 
| 442 | 
            +
                async def _check_disk_space(self) -> HealthCheck:
         | 
| 443 | 
            +
                    """Check disk space."""
         | 
| 444 | 
            +
                    start = time.time()
         | 
| 445 | 
            +
                    
         | 
| 446 | 
            +
                    try:
         | 
| 447 | 
            +
                        # Check disk where state directory is located
         | 
| 448 | 
            +
                        disk = shutil.disk_usage(self.state_dir)
         | 
| 449 | 
            +
                        disk_percent = (disk.used / disk.total) * 100
         | 
| 450 | 
            +
                        free_gb = disk.free / (1024 * 1024 * 1024)
         | 
| 451 | 
            +
                        
         | 
| 452 | 
            +
                        if free_gb < self.min_disk_space_gb:
         | 
| 453 | 
            +
                            status = HealthStatus.CRITICAL
         | 
| 454 | 
            +
                            message = f"Disk space critical: {free_gb:.1f}GB free"
         | 
| 455 | 
            +
                        elif disk_percent >= self.disk_threshold:
         | 
| 456 | 
            +
                            status = HealthStatus.DEGRADED
         | 
| 457 | 
            +
                            message = f"Disk usage high: {disk_percent:.1f}%"
         | 
| 458 | 
            +
                        else:
         | 
| 459 | 
            +
                            status = HealthStatus.HEALTHY
         | 
| 460 | 
            +
                            message = f"Disk space adequate: {free_gb:.1f}GB free"
         | 
| 461 | 
            +
                        
         | 
| 462 | 
            +
                        return HealthCheck(
         | 
| 463 | 
            +
                            name="Disk Space",
         | 
| 464 | 
            +
                            check_type=CheckType.DISK_SPACE,
         | 
| 465 | 
            +
                            status=status,
         | 
| 466 | 
            +
                            message=message,
         | 
| 467 | 
            +
                            details={
         | 
| 468 | 
            +
                                'disk_percent': disk_percent,
         | 
| 469 | 
            +
                                'free_gb': free_gb,
         | 
| 470 | 
            +
                                'total_gb': disk.total / (1024 * 1024 * 1024),
         | 
| 471 | 
            +
                                'threshold': self.disk_threshold,
         | 
| 472 | 
            +
                                'min_space_gb': self.min_disk_space_gb,
         | 
| 473 | 
            +
                                'path': str(self.state_dir)
         | 
| 474 | 
            +
                            },
         | 
| 475 | 
            +
                            timestamp=start,
         | 
| 476 | 
            +
                            duration_ms=(time.time() - start) * 1000
         | 
| 477 | 
            +
                        )
         | 
| 478 | 
            +
                        
         | 
| 479 | 
            +
                    except Exception as e:
         | 
| 480 | 
            +
                        return HealthCheck(
         | 
| 481 | 
            +
                            name="Disk Space",
         | 
| 482 | 
            +
                            check_type=CheckType.DISK_SPACE,
         | 
| 483 | 
            +
                            status=HealthStatus.UNHEALTHY,
         | 
| 484 | 
            +
                            message=f"Failed to check disk: {e}",
         | 
| 485 | 
            +
                            details={'error': str(e)},
         | 
| 486 | 
            +
                            timestamp=start,
         | 
| 487 | 
            +
                            duration_ms=(time.time() - start) * 1000
         | 
| 488 | 
            +
                        )
         | 
| 489 | 
            +
                
         | 
| 490 | 
            +
                async def _check_network(self) -> HealthCheck:
         | 
| 491 | 
            +
                    """Check network connectivity."""
         | 
| 492 | 
            +
                    start = time.time()
         | 
| 493 | 
            +
                    
         | 
| 494 | 
            +
                    try:
         | 
| 495 | 
            +
                        # Try to connect to common DNS servers
         | 
| 496 | 
            +
                        test_hosts = [
         | 
| 497 | 
            +
                            ('8.8.8.8', 53),  # Google DNS
         | 
| 498 | 
            +
                            ('1.1.1.1', 53),  # Cloudflare DNS
         | 
| 499 | 
            +
                        ]
         | 
| 500 | 
            +
                        
         | 
| 501 | 
            +
                        connected = False
         | 
| 502 | 
            +
                        for host, port in test_hosts:
         | 
| 503 | 
            +
                            try:
         | 
| 504 | 
            +
                                sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
         | 
| 505 | 
            +
                                sock.settimeout(2)
         | 
| 506 | 
            +
                                result = sock.connect_ex((host, port))
         | 
| 507 | 
            +
                                sock.close()
         | 
| 508 | 
            +
                                if result == 0:
         | 
| 509 | 
            +
                                    connected = True
         | 
| 510 | 
            +
                                    break
         | 
| 511 | 
            +
                            except:
         | 
| 512 | 
            +
                                continue
         | 
| 513 | 
            +
                        
         | 
| 514 | 
            +
                        if connected:
         | 
| 515 | 
            +
                            status = HealthStatus.HEALTHY
         | 
| 516 | 
            +
                            message = "Network connectivity OK"
         | 
| 517 | 
            +
                        else:
         | 
| 518 | 
            +
                            status = HealthStatus.DEGRADED
         | 
| 519 | 
            +
                            message = "Network connectivity limited"
         | 
| 520 | 
            +
                        
         | 
| 521 | 
            +
                        return HealthCheck(
         | 
| 522 | 
            +
                            name="Network",
         | 
| 523 | 
            +
                            check_type=CheckType.NETWORK,
         | 
| 524 | 
            +
                            status=status,
         | 
| 525 | 
            +
                            message=message,
         | 
| 526 | 
            +
                            details={'connected': connected},
         | 
| 527 | 
            +
                            timestamp=start,
         | 
| 528 | 
            +
                            duration_ms=(time.time() - start) * 1000
         | 
| 529 | 
            +
                        )
         | 
| 530 | 
            +
                        
         | 
| 531 | 
            +
                    except Exception as e:
         | 
| 532 | 
            +
                        return HealthCheck(
         | 
| 533 | 
            +
                            name="Network",
         | 
| 534 | 
            +
                            check_type=CheckType.NETWORK,
         | 
| 535 | 
            +
                            status=HealthStatus.UNHEALTHY,
         | 
| 536 | 
            +
                            message=f"Failed to check network: {e}",
         | 
| 537 | 
            +
                            details={'error': str(e)},
         | 
| 538 | 
            +
                            timestamp=start,
         | 
| 539 | 
            +
                            duration_ms=(time.time() - start) * 1000
         | 
| 540 | 
            +
                        )
         | 
| 541 | 
            +
                
         | 
| 542 | 
            +
                async def _check_process_health(self) -> HealthCheck:
         | 
| 543 | 
            +
                    """Check monitored process health."""
         | 
| 544 | 
            +
                    start = time.time()
         | 
| 545 | 
            +
                    
         | 
| 546 | 
            +
                    try:
         | 
| 547 | 
            +
                        if not self.monitored_process:
         | 
| 548 | 
            +
                            return HealthCheck(
         | 
| 549 | 
            +
                                name="Process Health",
         | 
| 550 | 
            +
                                check_type=CheckType.PROCESS,
         | 
| 551 | 
            +
                                status=HealthStatus.HEALTHY,
         | 
| 552 | 
            +
                                message="No process monitored",
         | 
| 553 | 
            +
                                details={},
         | 
| 554 | 
            +
                                timestamp=start,
         | 
| 555 | 
            +
                                duration_ms=(time.time() - start) * 1000
         | 
| 556 | 
            +
                            )
         | 
| 557 | 
            +
                        
         | 
| 558 | 
            +
                        # Check if process is running
         | 
| 559 | 
            +
                        if not self.monitored_process.is_running():
         | 
| 560 | 
            +
                            return HealthCheck(
         | 
| 561 | 
            +
                                name="Process Health",
         | 
| 562 | 
            +
                                check_type=CheckType.PROCESS,
         | 
| 563 | 
            +
                                status=HealthStatus.CRITICAL,
         | 
| 564 | 
            +
                                message=f"Process {self.monitored_pid} not running",
         | 
| 565 | 
            +
                                details={'pid': self.monitored_pid},
         | 
| 566 | 
            +
                                timestamp=start,
         | 
| 567 | 
            +
                                duration_ms=(time.time() - start) * 1000
         | 
| 568 | 
            +
                            )
         | 
| 569 | 
            +
                        
         | 
| 570 | 
            +
                        # Get process info
         | 
| 571 | 
            +
                        with self.monitored_process.oneshot():
         | 
| 572 | 
            +
                            cpu_percent = self.monitored_process.cpu_percent()
         | 
| 573 | 
            +
                            mem_info = self.monitored_process.memory_info()
         | 
| 574 | 
            +
                            mem_mb = mem_info.rss / (1024 * 1024)
         | 
| 575 | 
            +
                            status_str = self.monitored_process.status()
         | 
| 576 | 
            +
                        
         | 
| 577 | 
            +
                        # Determine health based on process status
         | 
| 578 | 
            +
                        if status_str in ['zombie', 'dead']:
         | 
| 579 | 
            +
                            status = HealthStatus.CRITICAL
         | 
| 580 | 
            +
                            message = f"Process in {status_str} state"
         | 
| 581 | 
            +
                        elif cpu_percent > 90:
         | 
| 582 | 
            +
                            status = HealthStatus.DEGRADED
         | 
| 583 | 
            +
                            message = f"Process CPU high: {cpu_percent:.1f}%"
         | 
| 584 | 
            +
                        else:
         | 
| 585 | 
            +
                            status = HealthStatus.HEALTHY
         | 
| 586 | 
            +
                            message = f"Process healthy (PID: {self.monitored_pid})"
         | 
| 587 | 
            +
                        
         | 
| 588 | 
            +
                        return HealthCheck(
         | 
| 589 | 
            +
                            name="Process Health",
         | 
| 590 | 
            +
                            check_type=CheckType.PROCESS,
         | 
| 591 | 
            +
                            status=status,
         | 
| 592 | 
            +
                            message=message,
         | 
| 593 | 
            +
                            details={
         | 
| 594 | 
            +
                                'pid': self.monitored_pid,
         | 
| 595 | 
            +
                                'cpu_percent': cpu_percent,
         | 
| 596 | 
            +
                                'memory_mb': mem_mb,
         | 
| 597 | 
            +
                                'status': status_str
         | 
| 598 | 
            +
                            },
         | 
| 599 | 
            +
                            timestamp=start,
         | 
| 600 | 
            +
                            duration_ms=(time.time() - start) * 1000
         | 
| 601 | 
            +
                        )
         | 
| 602 | 
            +
                        
         | 
| 603 | 
            +
                    except psutil.NoSuchProcess:
         | 
| 604 | 
            +
                        return HealthCheck(
         | 
| 605 | 
            +
                            name="Process Health",
         | 
| 606 | 
            +
                            check_type=CheckType.PROCESS,
         | 
| 607 | 
            +
                            status=HealthStatus.CRITICAL,
         | 
| 608 | 
            +
                            message=f"Process {self.monitored_pid} not found",
         | 
| 609 | 
            +
                            details={'pid': self.monitored_pid},
         | 
| 610 | 
            +
                            timestamp=start,
         | 
| 611 | 
            +
                            duration_ms=(time.time() - start) * 1000
         | 
| 612 | 
            +
                        )
         | 
| 613 | 
            +
                    except Exception as e:
         | 
| 614 | 
            +
                        return HealthCheck(
         | 
| 615 | 
            +
                            name="Process Health",
         | 
| 616 | 
            +
                            check_type=CheckType.PROCESS,
         | 
| 617 | 
            +
                            status=HealthStatus.UNHEALTHY,
         | 
| 618 | 
            +
                            message=f"Failed to check process: {e}",
         | 
| 619 | 
            +
                            details={'error': str(e)},
         | 
| 620 | 
            +
                            timestamp=start,
         | 
| 621 | 
            +
                            duration_ms=(time.time() - start) * 1000
         | 
| 622 | 
            +
                        )
         | 
| 623 | 
            +
                
         | 
| 624 | 
            +
                async def _check_dependencies(self) -> HealthCheck:
         | 
| 625 | 
            +
                    """Check critical dependencies."""
         | 
| 626 | 
            +
                    start = time.time()
         | 
| 627 | 
            +
                    
         | 
| 628 | 
            +
                    try:
         | 
| 629 | 
            +
                        missing = []
         | 
| 630 | 
            +
                        
         | 
| 631 | 
            +
                        # Check for psutil (critical dependency)
         | 
| 632 | 
            +
                        try:
         | 
| 633 | 
            +
                            import psutil
         | 
| 634 | 
            +
                        except ImportError:
         | 
| 635 | 
            +
                            missing.append('psutil')
         | 
| 636 | 
            +
                        
         | 
| 637 | 
            +
                        if missing:
         | 
| 638 | 
            +
                            status = HealthStatus.DEGRADED
         | 
| 639 | 
            +
                            message = f"Missing dependencies: {', '.join(missing)}"
         | 
| 640 | 
            +
                        else:
         | 
| 641 | 
            +
                            status = HealthStatus.HEALTHY
         | 
| 642 | 
            +
                            message = "All dependencies available"
         | 
| 643 | 
            +
                        
         | 
| 644 | 
            +
                        return HealthCheck(
         | 
| 645 | 
            +
                            name="Dependencies",
         | 
| 646 | 
            +
                            check_type=CheckType.DEPENDENCIES,
         | 
| 647 | 
            +
                            status=status,
         | 
| 648 | 
            +
                            message=message,
         | 
| 649 | 
            +
                            details={'missing': missing},
         | 
| 650 | 
            +
                            timestamp=start,
         | 
| 651 | 
            +
                            duration_ms=(time.time() - start) * 1000
         | 
| 652 | 
            +
                        )
         | 
| 653 | 
            +
                        
         | 
| 654 | 
            +
                    except Exception as e:
         | 
| 655 | 
            +
                        return HealthCheck(
         | 
| 656 | 
            +
                            name="Dependencies",
         | 
| 657 | 
            +
                            check_type=CheckType.DEPENDENCIES,
         | 
| 658 | 
            +
                            status=HealthStatus.UNHEALTHY,
         | 
| 659 | 
            +
                            message=f"Failed to check dependencies: {e}",
         | 
| 660 | 
            +
                            details={'error': str(e)},
         | 
| 661 | 
            +
                            timestamp=start,
         | 
| 662 | 
            +
                            duration_ms=(time.time() - start) * 1000
         | 
| 663 | 
            +
                        )
         | 
| 664 | 
            +
                
         | 
| 665 | 
            +
                def _register_default_checks(self) -> None:
         | 
| 666 | 
            +
                    """Register default health checks."""
         | 
| 667 | 
            +
                    # Default checks are implemented as methods
         | 
| 668 | 
            +
                    pass
         | 
| 669 | 
            +
                
         | 
| 670 | 
            +
                def _verify_system_capabilities(self) -> bool:
         | 
| 671 | 
            +
                    """Verify system monitoring capabilities.
         | 
| 672 | 
            +
                    
         | 
| 673 | 
            +
                    Returns:
         | 
| 674 | 
            +
                        True if all capabilities available
         | 
| 675 | 
            +
                    """
         | 
| 676 | 
            +
                    capabilities = {
         | 
| 677 | 
            +
                        'psutil': False,
         | 
| 678 | 
            +
                        'cpu': False,
         | 
| 679 | 
            +
                        'memory': False,
         | 
| 680 | 
            +
                        'disk': False,
         | 
| 681 | 
            +
                        'network': False
         | 
| 682 | 
            +
                    }
         | 
| 683 | 
            +
                    
         | 
| 684 | 
            +
                    try:
         | 
| 685 | 
            +
                        import psutil
         | 
| 686 | 
            +
                        capabilities['psutil'] = True
         | 
| 687 | 
            +
                        
         | 
| 688 | 
            +
                        # Test CPU monitoring
         | 
| 689 | 
            +
                        try:
         | 
| 690 | 
            +
                            psutil.cpu_percent(interval=0.1)
         | 
| 691 | 
            +
                            capabilities['cpu'] = True
         | 
| 692 | 
            +
                        except:
         | 
| 693 | 
            +
                            pass
         | 
| 694 | 
            +
                        
         | 
| 695 | 
            +
                        # Test memory monitoring
         | 
| 696 | 
            +
                        try:
         | 
| 697 | 
            +
                            psutil.virtual_memory()
         | 
| 698 | 
            +
                            capabilities['memory'] = True
         | 
| 699 | 
            +
                        except:
         | 
| 700 | 
            +
                            pass
         | 
| 701 | 
            +
                        
         | 
| 702 | 
            +
                        # Test disk monitoring
         | 
| 703 | 
            +
                        try:
         | 
| 704 | 
            +
                            shutil.disk_usage('/')
         | 
| 705 | 
            +
                            capabilities['disk'] = True
         | 
| 706 | 
            +
                        except:
         | 
| 707 | 
            +
                            pass
         | 
| 708 | 
            +
                        
         | 
| 709 | 
            +
                        # Test network monitoring
         | 
| 710 | 
            +
                        try:
         | 
| 711 | 
            +
                            socket.socket(socket.AF_INET, socket.SOCK_STREAM)
         | 
| 712 | 
            +
                            capabilities['network'] = True
         | 
| 713 | 
            +
                        except:
         | 
| 714 | 
            +
                            pass
         | 
| 715 | 
            +
                        
         | 
| 716 | 
            +
                    except ImportError:
         | 
| 717 | 
            +
                        self.log_warning("psutil not available, running in degraded mode")
         | 
| 718 | 
            +
                    
         | 
| 719 | 
            +
                    # Log capabilities
         | 
| 720 | 
            +
                    for cap, available in capabilities.items():
         | 
| 721 | 
            +
                        if not available:
         | 
| 722 | 
            +
                            self.log_warning(f"System capability unavailable: {cap}")
         | 
| 723 | 
            +
                    
         | 
| 724 | 
            +
                    return all(capabilities.values())
         | 
| 725 | 
            +
                
         | 
| 726 | 
            +
                def _determine_overall_status(self, checks: List[HealthCheck]) -> HealthStatus:
         | 
| 727 | 
            +
                    """Determine overall health status from individual checks.
         | 
| 728 | 
            +
                    
         | 
| 729 | 
            +
                    Args:
         | 
| 730 | 
            +
                        checks: List of health check results
         | 
| 731 | 
            +
                        
         | 
| 732 | 
            +
                    Returns:
         | 
| 733 | 
            +
                        Overall HealthStatus
         | 
| 734 | 
            +
                    """
         | 
| 735 | 
            +
                    if not checks:
         | 
| 736 | 
            +
                        return HealthStatus.HEALTHY
         | 
| 737 | 
            +
                    
         | 
| 738 | 
            +
                    # Count status types
         | 
| 739 | 
            +
                    critical_count = sum(1 for c in checks if c.status == HealthStatus.CRITICAL)
         | 
| 740 | 
            +
                    unhealthy_count = sum(1 for c in checks if c.status == HealthStatus.UNHEALTHY)
         | 
| 741 | 
            +
                    degraded_count = sum(1 for c in checks if c.status == HealthStatus.DEGRADED)
         | 
| 742 | 
            +
                    
         | 
| 743 | 
            +
                    # Determine overall status
         | 
| 744 | 
            +
                    if critical_count > 0:
         | 
| 745 | 
            +
                        return HealthStatus.CRITICAL
         | 
| 746 | 
            +
                    elif unhealthy_count > 0:
         | 
| 747 | 
            +
                        return HealthStatus.UNHEALTHY
         | 
| 748 | 
            +
                    elif degraded_count > 0:
         | 
| 749 | 
            +
                        return HealthStatus.DEGRADED
         | 
| 750 | 
            +
                    else:
         | 
| 751 | 
            +
                        return HealthStatus.HEALTHY
         | 
| 752 | 
            +
                
         | 
| 753 | 
            +
                async def _monitoring_loop(self) -> None:
         | 
| 754 | 
            +
                    """Continuous health monitoring loop."""
         | 
| 755 | 
            +
                    try:
         | 
| 756 | 
            +
                        while self.monitoring_active:
         | 
| 757 | 
            +
                            try:
         | 
| 758 | 
            +
                                # Perform health checks
         | 
| 759 | 
            +
                                health = await self.check_health()
         | 
| 760 | 
            +
                                
         | 
| 761 | 
            +
                                # Log if status changed
         | 
| 762 | 
            +
                                if self.check_history and len(self.check_history) > 1:
         | 
| 763 | 
            +
                                    prev_status = self.check_history[-2].status
         | 
| 764 | 
            +
                                    if health.status != prev_status:
         | 
| 765 | 
            +
                                        self.log_info(f"Health status changed: {prev_status.value} -> {health.status.value}")
         | 
| 766 | 
            +
                                
         | 
| 767 | 
            +
                                # Wait for next check
         | 
| 768 | 
            +
                                await asyncio.sleep(self.check_interval)
         | 
| 769 | 
            +
                                
         | 
| 770 | 
            +
                            except Exception as e:
         | 
| 771 | 
            +
                                self.log_error(f"Error in health monitoring loop: {e}")
         | 
| 772 | 
            +
                                await asyncio.sleep(self.check_interval)
         | 
| 773 | 
            +
                                
         | 
| 774 | 
            +
                    except asyncio.CancelledError:
         | 
| 775 | 
            +
                        self.log_debug("Health monitoring loop cancelled")
         |