claude-mpm 4.1.4__py3-none-any.whl → 4.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. claude_mpm/VERSION +1 -1
  2. claude_mpm/cli/commands/tickets.py +365 -784
  3. claude_mpm/core/output_style_manager.py +24 -0
  4. claude_mpm/core/unified_agent_registry.py +46 -15
  5. claude_mpm/services/agents/deployment/agent_discovery_service.py +12 -3
  6. claude_mpm/services/agents/deployment/agent_lifecycle_manager.py +172 -233
  7. claude_mpm/services/agents/deployment/agent_lifecycle_manager_refactored.py +575 -0
  8. claude_mpm/services/agents/deployment/agent_operation_service.py +573 -0
  9. claude_mpm/services/agents/deployment/agent_record_service.py +419 -0
  10. claude_mpm/services/agents/deployment/agent_state_service.py +381 -0
  11. claude_mpm/services/agents/deployment/multi_source_deployment_service.py +4 -2
  12. claude_mpm/services/infrastructure/__init__.py +31 -5
  13. claude_mpm/services/infrastructure/monitoring/__init__.py +43 -0
  14. claude_mpm/services/infrastructure/monitoring/aggregator.py +437 -0
  15. claude_mpm/services/infrastructure/monitoring/base.py +130 -0
  16. claude_mpm/services/infrastructure/monitoring/legacy.py +203 -0
  17. claude_mpm/services/infrastructure/monitoring/network.py +218 -0
  18. claude_mpm/services/infrastructure/monitoring/process.py +342 -0
  19. claude_mpm/services/infrastructure/monitoring/resources.py +243 -0
  20. claude_mpm/services/infrastructure/monitoring/service.py +367 -0
  21. claude_mpm/services/infrastructure/monitoring.py +67 -1030
  22. claude_mpm/services/project/analyzer.py +13 -4
  23. claude_mpm/services/project/analyzer_refactored.py +450 -0
  24. claude_mpm/services/project/analyzer_v2.py +566 -0
  25. claude_mpm/services/project/architecture_analyzer.py +461 -0
  26. claude_mpm/services/project/dependency_analyzer.py +462 -0
  27. claude_mpm/services/project/language_analyzer.py +265 -0
  28. claude_mpm/services/project/metrics_collector.py +410 -0
  29. claude_mpm/services/ticket_manager.py +5 -1
  30. claude_mpm/services/ticket_services/__init__.py +26 -0
  31. claude_mpm/services/ticket_services/crud_service.py +328 -0
  32. claude_mpm/services/ticket_services/formatter_service.py +290 -0
  33. claude_mpm/services/ticket_services/search_service.py +324 -0
  34. claude_mpm/services/ticket_services/validation_service.py +303 -0
  35. claude_mpm/services/ticket_services/workflow_service.py +244 -0
  36. {claude_mpm-4.1.4.dist-info → claude_mpm-4.1.5.dist-info}/METADATA +1 -1
  37. {claude_mpm-4.1.4.dist-info → claude_mpm-4.1.5.dist-info}/RECORD +41 -17
  38. {claude_mpm-4.1.4.dist-info → claude_mpm-4.1.5.dist-info}/WHEEL +0 -0
  39. {claude_mpm-4.1.4.dist-info → claude_mpm-4.1.5.dist-info}/entry_points.txt +0 -0
  40. {claude_mpm-4.1.4.dist-info → claude_mpm-4.1.5.dist-info}/licenses/LICENSE +0 -0
  41. {claude_mpm-4.1.4.dist-info → claude_mpm-4.1.5.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,437 @@
1
+ """Monitoring aggregator service that combines all health checks.
2
+
3
+ Orchestrates multiple monitoring services and provides unified health status.
4
+ """
5
+
6
+ import asyncio
7
+ import contextlib
8
+ import time
9
+ from collections import deque
10
+ from typing import Any, Callable, Dict, List, Optional
11
+
12
+ from .base import (
13
+ BaseMonitoringService,
14
+ HealthChecker,
15
+ HealthCheckResult,
16
+ HealthMetric,
17
+ HealthStatus,
18
+ )
19
+
20
+
21
+ class MonitoringAggregatorService(BaseMonitoringService):
22
+ """Service that aggregates health checks from multiple monitoring services.
23
+
24
+ Provides:
25
+ - Unified health checking across all registered services
26
+ - Health history tracking
27
+ - Status aggregation and reporting
28
+ - Continuous monitoring with configurable intervals
29
+ - Integration with recovery systems via callbacks
30
+ """
31
+
32
+ def __init__(self, config: Optional[Dict[str, Any]] = None):
33
+ """Initialize monitoring aggregator service.
34
+
35
+ Args:
36
+ config: Configuration dictionary for monitoring
37
+ """
38
+ super().__init__("MonitoringAggregator")
39
+ self.config = config or {}
40
+
41
+ # Configuration with defaults
42
+ self.check_interval = self.config.get("check_interval", 30)
43
+ self.history_size = self.config.get("history_size", 100)
44
+ self.aggregation_window = self.config.get("aggregation_window", 300)
45
+
46
+ # Registered monitoring services
47
+ self.services: List[BaseMonitoringService] = []
48
+ self.checkers: List[HealthChecker] = [] # For backward compatibility
49
+
50
+ # Health history
51
+ self.health_history: deque = deque(maxlen=self.history_size)
52
+
53
+ # Monitoring state
54
+ self.monitoring = False
55
+ self.monitor_task: Optional[asyncio.Task] = None
56
+ self.last_check_result: Optional[HealthCheckResult] = None
57
+
58
+ # Health callbacks for recovery integration
59
+ self.health_callbacks: List[Callable[[HealthCheckResult], None]] = []
60
+
61
+ # Statistics
62
+ self.monitoring_stats = {
63
+ "checks_performed": 0,
64
+ "checks_failed": 0,
65
+ "average_check_duration_ms": 0,
66
+ "last_check_timestamp": None,
67
+ }
68
+
69
+ self.logger.info("Monitoring aggregator initialized")
70
+
71
+ def add_service(self, service: BaseMonitoringService) -> None:
72
+ """Add a monitoring service to aggregate.
73
+
74
+ Args:
75
+ service: Monitoring service to add
76
+ """
77
+ self.services.append(service)
78
+ self.logger.info(f"Added monitoring service: {service.get_name()}")
79
+
80
+ def add_checker(self, checker: HealthChecker) -> None:
81
+ """Add a health checker (backward compatibility).
82
+
83
+ Args:
84
+ checker: Health checker to add
85
+ """
86
+ self.checkers.append(checker)
87
+ self.logger.info(f"Added health checker: {checker.get_name()}")
88
+
89
+ def add_health_callback(
90
+ self, callback: Callable[[HealthCheckResult], None]
91
+ ) -> None:
92
+ """Add a callback to be called when health checks complete.
93
+
94
+ Args:
95
+ callback: Function to call with HealthCheckResult
96
+ """
97
+ self.health_callbacks.append(callback)
98
+ self.logger.debug(f"Added health callback: {callback.__name__}")
99
+
100
+ async def check_health(self) -> List[HealthMetric]:
101
+ """Perform health check across all services.
102
+
103
+ Returns:
104
+ Combined list of health metrics from all services
105
+ """
106
+ result = await self.perform_health_check()
107
+ return result.metrics
108
+
109
+ async def perform_health_check(self) -> HealthCheckResult:
110
+ """Perform comprehensive health check using all registered services."""
111
+ start_time = time.time()
112
+ all_metrics = []
113
+ errors = []
114
+
115
+ # Check all monitoring services
116
+ for service in self.services:
117
+ try:
118
+ service_start = time.time()
119
+ metrics = await service.check_health()
120
+ service_duration = (time.time() - service_start) * 1000
121
+
122
+ all_metrics.extend(metrics)
123
+ self.logger.debug(
124
+ f"Service {service.get_name()} completed in {service_duration:.2f}ms"
125
+ )
126
+ except Exception as e:
127
+ error_msg = f"Service {service.get_name()} failed: {e}"
128
+ errors.append(error_msg)
129
+ self.logger.error(error_msg)
130
+
131
+ all_metrics.append(
132
+ HealthMetric(
133
+ name=f"{service.get_name()}_error",
134
+ value=str(e),
135
+ status=HealthStatus.UNKNOWN,
136
+ message=error_msg,
137
+ )
138
+ )
139
+
140
+ # Check legacy health checkers
141
+ for checker in self.checkers:
142
+ try:
143
+ checker_start = time.time()
144
+ metrics = await checker.check_health()
145
+ checker_duration = (time.time() - checker_start) * 1000
146
+
147
+ all_metrics.extend(metrics)
148
+ self.logger.debug(
149
+ f"Checker {checker.get_name()} completed in {checker_duration:.2f}ms"
150
+ )
151
+ except Exception as e:
152
+ error_msg = f"Checker {checker.get_name()} failed: {e}"
153
+ errors.append(error_msg)
154
+ self.logger.error(error_msg)
155
+
156
+ all_metrics.append(
157
+ HealthMetric(
158
+ name=f"{checker.get_name()}_error",
159
+ value=str(e),
160
+ status=HealthStatus.UNKNOWN,
161
+ message=error_msg,
162
+ )
163
+ )
164
+
165
+ # Determine overall status
166
+ overall_status = self._determine_overall_status(all_metrics)
167
+
168
+ # Create result
169
+ duration_ms = (time.time() - start_time) * 1000
170
+ result = HealthCheckResult(
171
+ overall_status=overall_status,
172
+ metrics=all_metrics,
173
+ timestamp=start_time,
174
+ duration_ms=duration_ms,
175
+ errors=errors,
176
+ )
177
+
178
+ # Update statistics
179
+ self._update_statistics(result, duration_ms)
180
+
181
+ # Store in history
182
+ self.health_history.append(result)
183
+ self.last_check_result = result
184
+
185
+ # Notify callbacks
186
+ await self._notify_callbacks(result)
187
+
188
+ self.logger.debug(
189
+ f"Health check completed: {overall_status.value} "
190
+ f"({len(all_metrics)} metrics, {len(errors)} errors, "
191
+ f"{duration_ms:.2f}ms)"
192
+ )
193
+
194
+ return result
195
+
196
+ def _determine_overall_status(self, metrics: List[HealthMetric]) -> HealthStatus:
197
+ """Determine overall health status from individual metrics.
198
+
199
+ Args:
200
+ metrics: List of health metrics
201
+
202
+ Returns:
203
+ Overall health status
204
+ """
205
+ if not metrics:
206
+ return HealthStatus.UNKNOWN
207
+
208
+ # Count metrics by status
209
+ status_counts = dict.fromkeys(HealthStatus, 0)
210
+ for metric in metrics:
211
+ status_counts[metric.status] += 1
212
+
213
+ total_metrics = len(metrics)
214
+
215
+ # Critical if any critical metrics
216
+ if status_counts[HealthStatus.CRITICAL] > 0:
217
+ return HealthStatus.CRITICAL
218
+
219
+ # Warning if >30% warning metrics
220
+ warning_ratio = status_counts[HealthStatus.WARNING] / total_metrics
221
+ if warning_ratio > 0.3:
222
+ return HealthStatus.WARNING
223
+
224
+ # Unknown if >50% unknown metrics
225
+ unknown_ratio = status_counts[HealthStatus.UNKNOWN] / total_metrics
226
+ if unknown_ratio > 0.5:
227
+ return HealthStatus.UNKNOWN
228
+
229
+ # Healthy if mostly healthy metrics
230
+ return HealthStatus.HEALTHY
231
+
232
+ def _update_statistics(self, result: HealthCheckResult, duration_ms: float) -> None:
233
+ """Update monitoring statistics.
234
+
235
+ Args:
236
+ result: Health check result
237
+ duration_ms: Check duration in milliseconds
238
+ """
239
+ self.monitoring_stats["checks_performed"] += 1
240
+ if result.errors:
241
+ self.monitoring_stats["checks_failed"] += 1
242
+
243
+ # Update average duration
244
+ current_avg = self.monitoring_stats["average_check_duration_ms"]
245
+ checks_count = self.monitoring_stats["checks_performed"]
246
+ self.monitoring_stats["average_check_duration_ms"] = (
247
+ current_avg * (checks_count - 1) + duration_ms
248
+ ) / checks_count
249
+ self.monitoring_stats["last_check_timestamp"] = time.time()
250
+
251
+ async def _notify_callbacks(self, result: HealthCheckResult) -> None:
252
+ """Notify health callbacks asynchronously.
253
+
254
+ Args:
255
+ result: Health check result to pass to callbacks
256
+ """
257
+ for callback in self.health_callbacks:
258
+ try:
259
+ if asyncio.iscoroutinefunction(callback):
260
+ await callback(result)
261
+ else:
262
+ callback(result)
263
+ except Exception as e:
264
+ self.logger.error(f"Health callback {callback.__name__} failed: {e}")
265
+
266
+ def start_monitoring(self) -> None:
267
+ """Start continuous health monitoring."""
268
+ if self.monitoring:
269
+ self.logger.warning("Health monitoring is already running")
270
+ return
271
+
272
+ self.monitoring = True
273
+ self.monitor_task = asyncio.create_task(self._monitoring_loop())
274
+ self.logger.info(
275
+ f"Started health monitoring with {self.check_interval}s interval"
276
+ )
277
+
278
+ async def stop_monitoring(self) -> None:
279
+ """Stop continuous health monitoring."""
280
+ if not self.monitoring:
281
+ return
282
+
283
+ self.monitoring = False
284
+ if self.monitor_task:
285
+ self.monitor_task.cancel()
286
+ with contextlib.suppress(asyncio.CancelledError):
287
+ await self.monitor_task
288
+ self.monitor_task = None
289
+
290
+ self.logger.info("Stopped health monitoring")
291
+
292
+ async def _monitoring_loop(self) -> None:
293
+ """Continuous health monitoring loop."""
294
+ try:
295
+ while self.monitoring:
296
+ try:
297
+ await self.perform_health_check()
298
+ except Exception as e:
299
+ self.logger.error(f"Error during health check: {e}")
300
+
301
+ # Wait for next check
302
+ await asyncio.sleep(self.check_interval)
303
+ except asyncio.CancelledError:
304
+ self.logger.debug("Health monitoring loop cancelled")
305
+ except Exception as e:
306
+ self.logger.error(f"Health monitoring loop error: {e}")
307
+
308
+ def get_current_status(self) -> Optional[HealthCheckResult]:
309
+ """Get the most recent health check result.
310
+
311
+ Returns:
312
+ Most recent health check result or None
313
+ """
314
+ return self.last_check_result
315
+
316
+ def get_health_history(
317
+ self, limit: Optional[int] = None
318
+ ) -> List[HealthCheckResult]:
319
+ """Get health check history.
320
+
321
+ Args:
322
+ limit: Maximum number of results to return
323
+
324
+ Returns:
325
+ List of health check results, newest first
326
+ """
327
+ history = list(self.health_history)
328
+ history.reverse() # Newest first
329
+
330
+ if limit:
331
+ history = history[:limit]
332
+
333
+ return history
334
+
335
+ def get_aggregated_status(
336
+ self, window_seconds: Optional[int] = None
337
+ ) -> Dict[str, Any]:
338
+ """Get aggregated health status over a time window.
339
+
340
+ Args:
341
+ window_seconds: Time window for aggregation
342
+
343
+ Returns:
344
+ Dictionary with aggregated health statistics
345
+ """
346
+ window_seconds = window_seconds or self.aggregation_window
347
+ current_time = time.time()
348
+ cutoff_time = current_time - window_seconds
349
+
350
+ # Filter history to time window
351
+ recent_results = [
352
+ result for result in self.health_history if result.timestamp >= cutoff_time
353
+ ]
354
+
355
+ if not recent_results:
356
+ return {
357
+ "period": "no_data",
358
+ "window_seconds": window_seconds,
359
+ "checks_count": 0,
360
+ "overall_status": HealthStatus.UNKNOWN.value,
361
+ }
362
+
363
+ # Aggregate statistics
364
+ status_counts = dict.fromkeys(HealthStatus, 0)
365
+ total_metrics = 0
366
+ total_errors = 0
367
+ total_duration_ms = 0
368
+
369
+ for result in recent_results:
370
+ status_counts[result.overall_status] += 1
371
+ total_metrics += len(result.metrics)
372
+ total_errors += len(result.errors)
373
+ total_duration_ms += result.duration_ms
374
+
375
+ checks_count = len(recent_results)
376
+
377
+ # Determine aggregated status
378
+ if status_counts[HealthStatus.CRITICAL] > 0:
379
+ aggregated_status = HealthStatus.CRITICAL
380
+ elif status_counts[HealthStatus.WARNING] > checks_count * 0.3:
381
+ aggregated_status = HealthStatus.WARNING
382
+ elif status_counts[HealthStatus.UNKNOWN] > checks_count * 0.5:
383
+ aggregated_status = HealthStatus.UNKNOWN
384
+ else:
385
+ aggregated_status = HealthStatus.HEALTHY
386
+
387
+ return {
388
+ "period": f"last_{window_seconds}_seconds",
389
+ "window_seconds": window_seconds,
390
+ "checks_count": checks_count,
391
+ "overall_status": aggregated_status.value,
392
+ "status_distribution": {
393
+ status.value: count for status, count in status_counts.items()
394
+ },
395
+ "average_metrics_per_check": (
396
+ round(total_metrics / checks_count, 2) if checks_count > 0 else 0
397
+ ),
398
+ "total_errors": total_errors,
399
+ "average_duration_ms": (
400
+ round(total_duration_ms / checks_count, 2) if checks_count > 0 else 0
401
+ ),
402
+ "monitoring_stats": dict(self.monitoring_stats),
403
+ }
404
+
405
+ def export_diagnostics(self) -> Dict[str, Any]:
406
+ """Export comprehensive diagnostics information.
407
+
408
+ Returns:
409
+ Dictionary with diagnostic information
410
+ """
411
+ return {
412
+ "monitor_info": {
413
+ "check_interval": self.check_interval,
414
+ "history_size": self.history_size,
415
+ "aggregation_window": self.aggregation_window,
416
+ "monitoring_active": self.monitoring,
417
+ "services_count": len(self.services),
418
+ "checkers_count": len(self.checkers),
419
+ "callbacks_count": len(self.health_callbacks),
420
+ },
421
+ "services": [service.get_name() for service in self.services],
422
+ "checkers": [checker.get_name() for checker in self.checkers],
423
+ "current_status": (
424
+ self.last_check_result.to_dict() if self.last_check_result else None
425
+ ),
426
+ "aggregated_status": self.get_aggregated_status(),
427
+ "monitoring_stats": dict(self.monitoring_stats),
428
+ "history_summary": {
429
+ "total_checks": len(self.health_history),
430
+ "oldest_check": (
431
+ self.health_history[0].timestamp if self.health_history else None
432
+ ),
433
+ "newest_check": (
434
+ self.health_history[-1].timestamp if self.health_history else None
435
+ ),
436
+ },
437
+ }
@@ -0,0 +1,130 @@
1
+ """Base classes and data structures for monitoring services.
2
+
3
+ This module defines the core types and interfaces used across all monitoring services.
4
+ """
5
+
6
+ import time
7
+ from abc import ABC, abstractmethod
8
+ from dataclasses import asdict, dataclass
9
+ from datetime import datetime, timezone
10
+ from enum import Enum
11
+ from typing import Any, Dict, List, Optional, Union
12
+
13
+
14
+ class HealthStatus(Enum):
15
+ """Health status levels for monitoring."""
16
+
17
+ HEALTHY = "healthy"
18
+ WARNING = "warning"
19
+ CRITICAL = "critical"
20
+ UNKNOWN = "unknown"
21
+
22
+
23
+ @dataclass
24
+ class HealthMetric:
25
+ """Individual health metric data structure."""
26
+
27
+ name: str
28
+ value: Union[int, float, str, bool]
29
+ status: HealthStatus
30
+ threshold: Optional[Union[int, float]] = None
31
+ unit: Optional[str] = None
32
+ timestamp: float = None
33
+ message: Optional[str] = None
34
+
35
+ def __post_init__(self):
36
+ if self.timestamp is None:
37
+ self.timestamp = time.time()
38
+
39
+ def to_dict(self) -> Dict[str, Any]:
40
+ """Convert metric to dictionary format."""
41
+ result = asdict(self)
42
+ result["status"] = self.status.value
43
+ result["timestamp_iso"] = datetime.fromtimestamp(
44
+ self.timestamp, timezone.utc
45
+ ).isoformat()
46
+ return result
47
+
48
+
49
+ @dataclass
50
+ class HealthCheckResult:
51
+ """Result of a health check operation."""
52
+
53
+ overall_status: HealthStatus
54
+ metrics: List[HealthMetric]
55
+ timestamp: float
56
+ duration_ms: float
57
+ errors: List[str]
58
+
59
+ def __post_init__(self):
60
+ if not hasattr(self, "timestamp") or self.timestamp is None:
61
+ self.timestamp = time.time()
62
+
63
+ def to_dict(self) -> Dict[str, Any]:
64
+ """Convert health check result to dictionary format."""
65
+ return {
66
+ "overall_status": self.overall_status.value,
67
+ "metrics": [metric.to_dict() for metric in self.metrics],
68
+ "timestamp": self.timestamp,
69
+ "timestamp_iso": datetime.fromtimestamp(
70
+ self.timestamp, timezone.utc
71
+ ).isoformat(),
72
+ "duration_ms": self.duration_ms,
73
+ "errors": self.errors,
74
+ "metric_count": len(self.metrics),
75
+ "healthy_metrics": len(
76
+ [m for m in self.metrics if m.status == HealthStatus.HEALTHY]
77
+ ),
78
+ "warning_metrics": len(
79
+ [m for m in self.metrics if m.status == HealthStatus.WARNING]
80
+ ),
81
+ "critical_metrics": len(
82
+ [m for m in self.metrics if m.status == HealthStatus.CRITICAL]
83
+ ),
84
+ }
85
+
86
+
87
+ class HealthChecker(ABC):
88
+ """Abstract base class for health checkers.
89
+
90
+ Health checkers implement specific monitoring logic for different aspects
91
+ of the system (process resources, network connectivity, service health, etc.).
92
+ """
93
+
94
+ @abstractmethod
95
+ def get_name(self) -> str:
96
+ """Get the name of this health checker."""
97
+
98
+ @abstractmethod
99
+ async def check_health(self) -> List[HealthMetric]:
100
+ """Perform health check and return metrics."""
101
+
102
+
103
+ class BaseMonitoringService(ABC):
104
+ """Base class for monitoring services with dependency injection support."""
105
+
106
+ def __init__(self, name: str):
107
+ """Initialize base monitoring service.
108
+
109
+ Args:
110
+ name: Service name for logging and identification
111
+ """
112
+ self.name = name
113
+ self._logger = None
114
+
115
+ @property
116
+ def logger(self):
117
+ """Lazy-load logger to avoid circular imports."""
118
+ if self._logger is None:
119
+ import logging
120
+
121
+ self._logger = logging.getLogger(f"claude_mpm.monitoring.{self.name}")
122
+ return self._logger
123
+
124
+ @abstractmethod
125
+ async def check_health(self) -> List[HealthMetric]:
126
+ """Perform health check and return metrics."""
127
+
128
+ def get_name(self) -> str:
129
+ """Get service name."""
130
+ return self.name