claude-mpm 4.13.2__py3-none-any.whl → 4.18.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- claude_mpm/VERSION +1 -1
- claude_mpm/agents/BASE_ENGINEER.md +286 -0
- claude_mpm/agents/BASE_PM.md +48 -17
- claude_mpm/agents/OUTPUT_STYLE.md +329 -11
- claude_mpm/agents/PM_INSTRUCTIONS.md +227 -8
- claude_mpm/agents/agent_loader.py +17 -5
- claude_mpm/agents/frontmatter_validator.py +284 -253
- claude_mpm/agents/templates/agentic-coder-optimizer.json +9 -2
- claude_mpm/agents/templates/api_qa.json +7 -1
- claude_mpm/agents/templates/clerk-ops.json +8 -1
- claude_mpm/agents/templates/code_analyzer.json +4 -1
- claude_mpm/agents/templates/dart_engineer.json +11 -1
- claude_mpm/agents/templates/data_engineer.json +11 -1
- claude_mpm/agents/templates/documentation.json +6 -1
- claude_mpm/agents/templates/engineer.json +18 -1
- claude_mpm/agents/templates/gcp_ops_agent.json +8 -1
- claude_mpm/agents/templates/golang_engineer.json +11 -1
- claude_mpm/agents/templates/java_engineer.json +12 -2
- claude_mpm/agents/templates/local_ops_agent.json +1217 -6
- claude_mpm/agents/templates/nextjs_engineer.json +11 -1
- claude_mpm/agents/templates/ops.json +8 -1
- claude_mpm/agents/templates/php-engineer.json +11 -1
- claude_mpm/agents/templates/project_organizer.json +10 -3
- claude_mpm/agents/templates/prompt-engineer.json +5 -1
- claude_mpm/agents/templates/python_engineer.json +11 -1
- claude_mpm/agents/templates/qa.json +7 -1
- claude_mpm/agents/templates/react_engineer.json +11 -1
- claude_mpm/agents/templates/refactoring_engineer.json +8 -1
- claude_mpm/agents/templates/research.json +4 -1
- claude_mpm/agents/templates/ruby-engineer.json +11 -1
- claude_mpm/agents/templates/rust_engineer.json +11 -1
- claude_mpm/agents/templates/security.json +6 -1
- claude_mpm/agents/templates/svelte-engineer.json +225 -0
- claude_mpm/agents/templates/ticketing.json +6 -1
- claude_mpm/agents/templates/typescript_engineer.json +11 -1
- claude_mpm/agents/templates/vercel_ops_agent.json +8 -1
- claude_mpm/agents/templates/version_control.json +8 -1
- claude_mpm/agents/templates/web_qa.json +7 -1
- claude_mpm/agents/templates/web_ui.json +11 -1
- claude_mpm/cli/__init__.py +34 -706
- claude_mpm/cli/commands/agent_manager.py +25 -12
- claude_mpm/cli/commands/agent_state_manager.py +186 -0
- claude_mpm/cli/commands/agents.py +204 -148
- claude_mpm/cli/commands/aggregate.py +7 -3
- claude_mpm/cli/commands/analyze.py +9 -4
- claude_mpm/cli/commands/analyze_code.py +7 -2
- claude_mpm/cli/commands/auto_configure.py +7 -9
- claude_mpm/cli/commands/config.py +47 -13
- claude_mpm/cli/commands/configure.py +294 -1788
- claude_mpm/cli/commands/configure_agent_display.py +261 -0
- claude_mpm/cli/commands/configure_behavior_manager.py +204 -0
- claude_mpm/cli/commands/configure_hook_manager.py +225 -0
- claude_mpm/cli/commands/configure_models.py +18 -0
- claude_mpm/cli/commands/configure_navigation.py +167 -0
- claude_mpm/cli/commands/configure_paths.py +104 -0
- claude_mpm/cli/commands/configure_persistence.py +254 -0
- claude_mpm/cli/commands/configure_startup_manager.py +646 -0
- claude_mpm/cli/commands/configure_template_editor.py +497 -0
- claude_mpm/cli/commands/configure_validators.py +73 -0
- claude_mpm/cli/commands/local_deploy.py +537 -0
- claude_mpm/cli/commands/memory.py +54 -20
- claude_mpm/cli/commands/mpm_init.py +39 -25
- claude_mpm/cli/commands/mpm_init_handler.py +8 -3
- claude_mpm/cli/executor.py +202 -0
- claude_mpm/cli/helpers.py +105 -0
- claude_mpm/cli/interactive/__init__.py +3 -0
- claude_mpm/cli/interactive/skills_wizard.py +491 -0
- claude_mpm/cli/parsers/__init__.py +7 -1
- claude_mpm/cli/parsers/base_parser.py +98 -3
- claude_mpm/cli/parsers/local_deploy_parser.py +227 -0
- claude_mpm/cli/shared/output_formatters.py +28 -19
- claude_mpm/cli/startup.py +481 -0
- claude_mpm/cli/utils.py +52 -1
- claude_mpm/commands/mpm-help.md +3 -0
- claude_mpm/commands/mpm-version.md +113 -0
- claude_mpm/commands/mpm.md +1 -0
- claude_mpm/config/agent_config.py +2 -2
- claude_mpm/config/model_config.py +428 -0
- claude_mpm/core/base_service.py +13 -12
- claude_mpm/core/enums.py +452 -0
- claude_mpm/core/factories.py +1 -1
- claude_mpm/core/instruction_reinforcement_hook.py +2 -1
- claude_mpm/core/interactive_session.py +9 -3
- claude_mpm/core/logging_config.py +6 -2
- claude_mpm/core/oneshot_session.py +8 -4
- claude_mpm/core/optimized_agent_loader.py +3 -3
- claude_mpm/core/output_style_manager.py +12 -192
- claude_mpm/core/service_registry.py +5 -1
- claude_mpm/core/types.py +2 -9
- claude_mpm/core/typing_utils.py +7 -6
- claude_mpm/dashboard/static/js/dashboard.js +0 -14
- claude_mpm/dashboard/templates/index.html +3 -41
- claude_mpm/hooks/claude_hooks/response_tracking.py +35 -1
- claude_mpm/hooks/instruction_reinforcement.py +7 -2
- claude_mpm/models/resume_log.py +340 -0
- claude_mpm/services/agents/auto_config_manager.py +10 -11
- claude_mpm/services/agents/deployment/agent_configuration_manager.py +1 -1
- claude_mpm/services/agents/deployment/agent_record_service.py +1 -1
- claude_mpm/services/agents/deployment/agent_validator.py +17 -1
- claude_mpm/services/agents/deployment/async_agent_deployment.py +1 -1
- claude_mpm/services/agents/deployment/interface_adapter.py +3 -2
- claude_mpm/services/agents/deployment/local_template_deployment.py +1 -1
- claude_mpm/services/agents/deployment/pipeline/steps/agent_processing_step.py +7 -6
- claude_mpm/services/agents/deployment/pipeline/steps/base_step.py +7 -16
- claude_mpm/services/agents/deployment/pipeline/steps/configuration_step.py +4 -3
- claude_mpm/services/agents/deployment/pipeline/steps/target_directory_step.py +5 -3
- claude_mpm/services/agents/deployment/pipeline/steps/validation_step.py +6 -5
- claude_mpm/services/agents/deployment/refactored_agent_deployment_service.py +9 -6
- claude_mpm/services/agents/deployment/validation/__init__.py +3 -1
- claude_mpm/services/agents/deployment/validation/validation_result.py +1 -9
- claude_mpm/services/agents/local_template_manager.py +1 -1
- claude_mpm/services/agents/memory/agent_memory_manager.py +5 -2
- claude_mpm/services/agents/registry/modification_tracker.py +5 -2
- claude_mpm/services/command_handler_service.py +11 -5
- claude_mpm/services/core/interfaces/__init__.py +74 -2
- claude_mpm/services/core/interfaces/health.py +172 -0
- claude_mpm/services/core/interfaces/model.py +281 -0
- claude_mpm/services/core/interfaces/process.py +372 -0
- claude_mpm/services/core/interfaces/restart.py +307 -0
- claude_mpm/services/core/interfaces/stability.py +260 -0
- claude_mpm/services/core/models/__init__.py +33 -0
- claude_mpm/services/core/models/agent_config.py +12 -28
- claude_mpm/services/core/models/health.py +162 -0
- claude_mpm/services/core/models/process.py +235 -0
- claude_mpm/services/core/models/restart.py +302 -0
- claude_mpm/services/core/models/stability.py +264 -0
- claude_mpm/services/core/path_resolver.py +23 -7
- claude_mpm/services/diagnostics/__init__.py +2 -2
- claude_mpm/services/diagnostics/checks/agent_check.py +25 -24
- claude_mpm/services/diagnostics/checks/claude_code_check.py +24 -23
- claude_mpm/services/diagnostics/checks/common_issues_check.py +25 -24
- claude_mpm/services/diagnostics/checks/configuration_check.py +24 -23
- claude_mpm/services/diagnostics/checks/filesystem_check.py +18 -17
- claude_mpm/services/diagnostics/checks/installation_check.py +30 -29
- claude_mpm/services/diagnostics/checks/instructions_check.py +20 -19
- claude_mpm/services/diagnostics/checks/mcp_check.py +50 -36
- claude_mpm/services/diagnostics/checks/mcp_services_check.py +36 -31
- claude_mpm/services/diagnostics/checks/monitor_check.py +23 -22
- claude_mpm/services/diagnostics/checks/startup_log_check.py +9 -8
- claude_mpm/services/diagnostics/diagnostic_runner.py +6 -5
- claude_mpm/services/diagnostics/doctor_reporter.py +28 -25
- claude_mpm/services/diagnostics/models.py +19 -24
- claude_mpm/services/infrastructure/monitoring/__init__.py +1 -1
- claude_mpm/services/infrastructure/monitoring/aggregator.py +12 -12
- claude_mpm/services/infrastructure/monitoring/base.py +5 -13
- claude_mpm/services/infrastructure/monitoring/network.py +7 -6
- claude_mpm/services/infrastructure/monitoring/process.py +13 -12
- claude_mpm/services/infrastructure/monitoring/resources.py +7 -6
- claude_mpm/services/infrastructure/monitoring/service.py +16 -15
- claude_mpm/services/infrastructure/resume_log_generator.py +439 -0
- claude_mpm/services/local_ops/__init__.py +163 -0
- claude_mpm/services/local_ops/crash_detector.py +257 -0
- claude_mpm/services/local_ops/health_checks/__init__.py +28 -0
- claude_mpm/services/local_ops/health_checks/http_check.py +224 -0
- claude_mpm/services/local_ops/health_checks/process_check.py +236 -0
- claude_mpm/services/local_ops/health_checks/resource_check.py +255 -0
- claude_mpm/services/local_ops/health_manager.py +430 -0
- claude_mpm/services/local_ops/log_monitor.py +396 -0
- claude_mpm/services/local_ops/memory_leak_detector.py +294 -0
- claude_mpm/services/local_ops/process_manager.py +595 -0
- claude_mpm/services/local_ops/resource_monitor.py +331 -0
- claude_mpm/services/local_ops/restart_manager.py +401 -0
- claude_mpm/services/local_ops/restart_policy.py +387 -0
- claude_mpm/services/local_ops/state_manager.py +372 -0
- claude_mpm/services/local_ops/unified_manager.py +600 -0
- claude_mpm/services/mcp_config_manager.py +9 -4
- claude_mpm/services/mcp_gateway/core/__init__.py +1 -2
- claude_mpm/services/mcp_gateway/core/base.py +18 -31
- claude_mpm/services/mcp_gateway/tools/external_mcp_services.py +71 -24
- claude_mpm/services/mcp_gateway/tools/health_check_tool.py +30 -28
- claude_mpm/services/memory_hook_service.py +4 -1
- claude_mpm/services/model/__init__.py +147 -0
- claude_mpm/services/model/base_provider.py +365 -0
- claude_mpm/services/model/claude_provider.py +412 -0
- claude_mpm/services/model/model_router.py +453 -0
- claude_mpm/services/model/ollama_provider.py +415 -0
- claude_mpm/services/monitor/daemon_manager.py +3 -2
- claude_mpm/services/monitor/handlers/dashboard.py +2 -1
- claude_mpm/services/monitor/handlers/hooks.py +2 -1
- claude_mpm/services/monitor/management/lifecycle.py +3 -2
- claude_mpm/services/monitor/server.py +2 -1
- claude_mpm/services/session_management_service.py +3 -2
- claude_mpm/services/session_manager.py +205 -1
- claude_mpm/services/shared/async_service_base.py +16 -27
- claude_mpm/services/shared/lifecycle_service_base.py +1 -14
- claude_mpm/services/socketio/handlers/__init__.py +5 -2
- claude_mpm/services/socketio/handlers/hook.py +13 -2
- claude_mpm/services/socketio/handlers/registry.py +4 -2
- claude_mpm/services/socketio/server/main.py +10 -8
- claude_mpm/services/subprocess_launcher_service.py +14 -5
- claude_mpm/services/unified/analyzer_strategies/code_analyzer.py +8 -7
- claude_mpm/services/unified/analyzer_strategies/dependency_analyzer.py +6 -5
- claude_mpm/services/unified/analyzer_strategies/performance_analyzer.py +8 -7
- claude_mpm/services/unified/analyzer_strategies/security_analyzer.py +7 -6
- claude_mpm/services/unified/analyzer_strategies/structure_analyzer.py +5 -4
- claude_mpm/services/unified/config_strategies/validation_strategy.py +13 -9
- claude_mpm/services/unified/deployment_strategies/cloud_strategies.py +10 -3
- claude_mpm/services/unified/deployment_strategies/local.py +6 -5
- claude_mpm/services/unified/deployment_strategies/utils.py +6 -5
- claude_mpm/services/unified/deployment_strategies/vercel.py +7 -6
- claude_mpm/services/unified/interfaces.py +3 -1
- claude_mpm/services/unified/unified_analyzer.py +14 -10
- claude_mpm/services/unified/unified_config.py +2 -1
- claude_mpm/services/unified/unified_deployment.py +9 -4
- claude_mpm/services/version_service.py +104 -1
- claude_mpm/skills/__init__.py +21 -0
- claude_mpm/skills/bundled/__init__.py +6 -0
- claude_mpm/skills/bundled/api-documentation.md +393 -0
- claude_mpm/skills/bundled/async-testing.md +571 -0
- claude_mpm/skills/bundled/code-review.md +143 -0
- claude_mpm/skills/bundled/database-migration.md +199 -0
- claude_mpm/skills/bundled/docker-containerization.md +194 -0
- claude_mpm/skills/bundled/express-local-dev.md +1429 -0
- claude_mpm/skills/bundled/fastapi-local-dev.md +1199 -0
- claude_mpm/skills/bundled/git-workflow.md +414 -0
- claude_mpm/skills/bundled/imagemagick.md +204 -0
- claude_mpm/skills/bundled/json-data-handling.md +223 -0
- claude_mpm/skills/bundled/nextjs-local-dev.md +807 -0
- claude_mpm/skills/bundled/pdf.md +141 -0
- claude_mpm/skills/bundled/performance-profiling.md +567 -0
- claude_mpm/skills/bundled/refactoring-patterns.md +180 -0
- claude_mpm/skills/bundled/security-scanning.md +327 -0
- claude_mpm/skills/bundled/systematic-debugging.md +473 -0
- claude_mpm/skills/bundled/test-driven-development.md +378 -0
- claude_mpm/skills/bundled/vite-local-dev.md +1061 -0
- claude_mpm/skills/bundled/web-performance-optimization.md +2305 -0
- claude_mpm/skills/bundled/xlsx.md +157 -0
- claude_mpm/skills/registry.py +286 -0
- claude_mpm/skills/skill_manager.py +310 -0
- claude_mpm/tools/code_tree_analyzer.py +177 -141
- claude_mpm/tools/code_tree_events.py +4 -2
- claude_mpm/utils/agent_dependency_loader.py +2 -2
- {claude_mpm-4.13.2.dist-info → claude_mpm-4.18.2.dist-info}/METADATA +117 -8
- {claude_mpm-4.13.2.dist-info → claude_mpm-4.18.2.dist-info}/RECORD +238 -174
- claude_mpm/dashboard/static/css/code-tree.css +0 -1639
- claude_mpm/dashboard/static/js/components/code-tree/tree-breadcrumb.js +0 -353
- claude_mpm/dashboard/static/js/components/code-tree/tree-constants.js +0 -235
- claude_mpm/dashboard/static/js/components/code-tree/tree-search.js +0 -409
- claude_mpm/dashboard/static/js/components/code-tree/tree-utils.js +0 -435
- claude_mpm/dashboard/static/js/components/code-tree.js +0 -5869
- claude_mpm/dashboard/static/js/components/code-viewer.js +0 -1386
- claude_mpm/hooks/claude_hooks/hook_handler_eventbus.py +0 -425
- claude_mpm/hooks/claude_hooks/hook_handler_original.py +0 -1041
- claude_mpm/hooks/claude_hooks/hook_handler_refactored.py +0 -347
- claude_mpm/services/agents/deployment/agent_lifecycle_manager_refactored.py +0 -575
- claude_mpm/services/project/analyzer_refactored.py +0 -450
- {claude_mpm-4.13.2.dist-info → claude_mpm-4.18.2.dist-info}/WHEEL +0 -0
- {claude_mpm-4.13.2.dist-info → claude_mpm-4.18.2.dist-info}/entry_points.txt +0 -0
- {claude_mpm-4.13.2.dist-info → claude_mpm-4.18.2.dist-info}/licenses/LICENSE +0 -0
- {claude_mpm-4.13.2.dist-info → claude_mpm-4.18.2.dist-info}/top_level.txt +0 -0
|
@@ -6,17 +6,9 @@ consistency across all checks and reporting.
|
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
8
|
from dataclasses import dataclass, field
|
|
9
|
-
from
|
|
10
|
-
from typing import Any, Dict, List, Optional
|
|
9
|
+
from typing import Any, Dict, List, Optional, Union
|
|
11
10
|
|
|
12
|
-
|
|
13
|
-
class DiagnosticStatus(Enum):
|
|
14
|
-
"""Status levels for diagnostic results."""
|
|
15
|
-
|
|
16
|
-
OK = "ok"
|
|
17
|
-
WARNING = "warning"
|
|
18
|
-
ERROR = "error"
|
|
19
|
-
SKIPPED = "skipped"
|
|
11
|
+
from ...core.enums import OperationResult, ValidationSeverity
|
|
20
12
|
|
|
21
13
|
|
|
22
14
|
@dataclass
|
|
@@ -25,10 +17,13 @@ class DiagnosticResult:
|
|
|
25
17
|
|
|
26
18
|
WHY: Standardized result format ensures consistent reporting
|
|
27
19
|
and makes it easy to aggregate and display results.
|
|
20
|
+
|
|
21
|
+
Note: status uses Union[OperationResult, ValidationSeverity] to support both
|
|
22
|
+
operation results (SUCCESS, SKIPPED) and validation results (WARNING, ERROR).
|
|
28
23
|
"""
|
|
29
24
|
|
|
30
25
|
category: str # e.g., "Installation", "Agents", "MCP Server"
|
|
31
|
-
status:
|
|
26
|
+
status: Union[OperationResult, ValidationSeverity]
|
|
32
27
|
message: str
|
|
33
28
|
details: Dict[str, Any] = field(default_factory=dict)
|
|
34
29
|
fix_command: Optional[str] = None
|
|
@@ -50,16 +45,16 @@ class DiagnosticResult:
|
|
|
50
45
|
@property
|
|
51
46
|
def has_issues(self) -> bool:
|
|
52
47
|
"""Check if this result indicates any issues."""
|
|
53
|
-
return self.status in (
|
|
48
|
+
return self.status in (ValidationSeverity.WARNING, ValidationSeverity.ERROR)
|
|
54
49
|
|
|
55
50
|
@property
|
|
56
51
|
def severity_level(self) -> int:
|
|
57
52
|
"""Get numeric severity level for sorting."""
|
|
58
53
|
severity_map = {
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
54
|
+
OperationResult.SUCCESS: 0,
|
|
55
|
+
OperationResult.SKIPPED: 1,
|
|
56
|
+
ValidationSeverity.WARNING: 2,
|
|
57
|
+
ValidationSeverity.ERROR: 3,
|
|
63
58
|
}
|
|
64
59
|
return severity_map.get(self.status, 0)
|
|
65
60
|
|
|
@@ -84,13 +79,13 @@ class DiagnosticSummary:
|
|
|
84
79
|
self.results.append(result)
|
|
85
80
|
self.total_checks += 1
|
|
86
81
|
|
|
87
|
-
if result.status ==
|
|
82
|
+
if result.status == OperationResult.SUCCESS:
|
|
88
83
|
self.ok_count += 1
|
|
89
|
-
elif result.status ==
|
|
84
|
+
elif result.status == ValidationSeverity.WARNING:
|
|
90
85
|
self.warning_count += 1
|
|
91
|
-
elif result.status ==
|
|
86
|
+
elif result.status == ValidationSeverity.ERROR:
|
|
92
87
|
self.error_count += 1
|
|
93
|
-
elif result.status ==
|
|
88
|
+
elif result.status == OperationResult.SKIPPED:
|
|
94
89
|
self.skipped_count += 1
|
|
95
90
|
|
|
96
91
|
@property
|
|
@@ -99,13 +94,13 @@ class DiagnosticSummary:
|
|
|
99
94
|
return self.warning_count > 0 or self.error_count > 0
|
|
100
95
|
|
|
101
96
|
@property
|
|
102
|
-
def overall_status(self) ->
|
|
97
|
+
def overall_status(self) -> Union[OperationResult, ValidationSeverity]:
|
|
103
98
|
"""Get overall system status."""
|
|
104
99
|
if self.error_count > 0:
|
|
105
|
-
return
|
|
100
|
+
return ValidationSeverity.ERROR
|
|
106
101
|
if self.warning_count > 0:
|
|
107
|
-
return
|
|
108
|
-
return
|
|
102
|
+
return ValidationSeverity.WARNING
|
|
103
|
+
return OperationResult.SUCCESS
|
|
109
104
|
|
|
110
105
|
def to_dict(self) -> Dict[str, Any]:
|
|
111
106
|
"""Convert to dictionary for JSON serialization."""
|
|
@@ -3,12 +3,12 @@
|
|
|
3
3
|
Exports main monitoring components for backward compatibility.
|
|
4
4
|
"""
|
|
5
5
|
|
|
6
|
+
from ....core.enums import HealthStatus
|
|
6
7
|
from .aggregator import MonitoringAggregatorService
|
|
7
8
|
from .base import (
|
|
8
9
|
HealthChecker,
|
|
9
10
|
HealthCheckResult,
|
|
10
11
|
HealthMetric,
|
|
11
|
-
HealthStatus,
|
|
12
12
|
)
|
|
13
13
|
|
|
14
14
|
# Legacy exports for backward compatibility
|
|
@@ -9,12 +9,12 @@ import time
|
|
|
9
9
|
from collections import deque
|
|
10
10
|
from typing import Any, Callable, Dict, List, Optional
|
|
11
11
|
|
|
12
|
+
from ....core.enums import HealthStatus
|
|
12
13
|
from .base import (
|
|
13
14
|
BaseMonitoringService,
|
|
14
15
|
HealthChecker,
|
|
15
16
|
HealthCheckResult,
|
|
16
17
|
HealthMetric,
|
|
17
|
-
HealthStatus,
|
|
18
18
|
)
|
|
19
19
|
|
|
20
20
|
|
|
@@ -212,14 +212,14 @@ class MonitoringAggregatorService(BaseMonitoringService):
|
|
|
212
212
|
|
|
213
213
|
total_metrics = len(metrics)
|
|
214
214
|
|
|
215
|
-
#
|
|
216
|
-
if status_counts[HealthStatus.
|
|
217
|
-
return HealthStatus.
|
|
215
|
+
# Unhealthy if any unhealthy metrics
|
|
216
|
+
if status_counts[HealthStatus.UNHEALTHY] > 0:
|
|
217
|
+
return HealthStatus.UNHEALTHY
|
|
218
218
|
|
|
219
|
-
#
|
|
220
|
-
|
|
221
|
-
if
|
|
222
|
-
return HealthStatus.
|
|
219
|
+
# Degraded if >30% degraded metrics
|
|
220
|
+
degraded_ratio = status_counts[HealthStatus.DEGRADED] / total_metrics
|
|
221
|
+
if degraded_ratio > 0.3:
|
|
222
|
+
return HealthStatus.DEGRADED
|
|
223
223
|
|
|
224
224
|
# Unknown if >50% unknown metrics
|
|
225
225
|
unknown_ratio = status_counts[HealthStatus.UNKNOWN] / total_metrics
|
|
@@ -375,10 +375,10 @@ class MonitoringAggregatorService(BaseMonitoringService):
|
|
|
375
375
|
checks_count = len(recent_results)
|
|
376
376
|
|
|
377
377
|
# Determine aggregated status
|
|
378
|
-
if status_counts[HealthStatus.
|
|
379
|
-
aggregated_status = HealthStatus.
|
|
380
|
-
elif status_counts[HealthStatus.
|
|
381
|
-
aggregated_status = HealthStatus.
|
|
378
|
+
if status_counts[HealthStatus.UNHEALTHY] > 0:
|
|
379
|
+
aggregated_status = HealthStatus.UNHEALTHY
|
|
380
|
+
elif status_counts[HealthStatus.DEGRADED] > checks_count * 0.3:
|
|
381
|
+
aggregated_status = HealthStatus.DEGRADED
|
|
382
382
|
elif status_counts[HealthStatus.UNKNOWN] > checks_count * 0.5:
|
|
383
383
|
aggregated_status = HealthStatus.UNKNOWN
|
|
384
384
|
else:
|
|
@@ -7,17 +7,9 @@ import time
|
|
|
7
7
|
from abc import ABC, abstractmethod
|
|
8
8
|
from dataclasses import asdict, dataclass
|
|
9
9
|
from datetime import datetime, timezone
|
|
10
|
-
from enum import Enum
|
|
11
10
|
from typing import Any, Dict, List, Optional, Union
|
|
12
11
|
|
|
13
|
-
|
|
14
|
-
class HealthStatus(Enum):
|
|
15
|
-
"""Health status levels for monitoring."""
|
|
16
|
-
|
|
17
|
-
HEALTHY = "healthy"
|
|
18
|
-
WARNING = "warning"
|
|
19
|
-
CRITICAL = "critical"
|
|
20
|
-
UNKNOWN = "unknown"
|
|
12
|
+
from ....core.enums import HealthStatus
|
|
21
13
|
|
|
22
14
|
|
|
23
15
|
@dataclass
|
|
@@ -75,11 +67,11 @@ class HealthCheckResult:
|
|
|
75
67
|
"healthy_metrics": len(
|
|
76
68
|
[m for m in self.metrics if m.status == HealthStatus.HEALTHY]
|
|
77
69
|
),
|
|
78
|
-
"
|
|
79
|
-
[m for m in self.metrics if m.status == HealthStatus.
|
|
70
|
+
"degraded_metrics": len(
|
|
71
|
+
[m for m in self.metrics if m.status == HealthStatus.DEGRADED]
|
|
80
72
|
),
|
|
81
|
-
"
|
|
82
|
-
[m for m in self.metrics if m.status == HealthStatus.
|
|
73
|
+
"unhealthy_metrics": len(
|
|
74
|
+
[m for m in self.metrics if m.status == HealthStatus.UNHEALTHY]
|
|
83
75
|
),
|
|
84
76
|
}
|
|
85
77
|
|
|
@@ -6,7 +6,8 @@ Monitors network connectivity, port availability, and socket health.
|
|
|
6
6
|
import socket
|
|
7
7
|
from typing import Dict, List, Optional
|
|
8
8
|
|
|
9
|
-
from .
|
|
9
|
+
from ....core.enums import HealthStatus
|
|
10
|
+
from .base import BaseMonitoringService, HealthMetric
|
|
10
11
|
|
|
11
12
|
|
|
12
13
|
class NetworkHealthService(BaseMonitoringService):
|
|
@@ -98,7 +99,7 @@ class NetworkHealthService(BaseMonitoringService):
|
|
|
98
99
|
HealthMetric(
|
|
99
100
|
name="socket_creation",
|
|
100
101
|
value=False,
|
|
101
|
-
status=HealthStatus.
|
|
102
|
+
status=HealthStatus.UNHEALTHY,
|
|
102
103
|
message=f"Failed to create socket: {e}",
|
|
103
104
|
)
|
|
104
105
|
)
|
|
@@ -142,11 +143,11 @@ class NetworkHealthService(BaseMonitoringService):
|
|
|
142
143
|
)
|
|
143
144
|
)
|
|
144
145
|
else:
|
|
145
|
-
# Determine if this is
|
|
146
|
+
# Determine if this is unhealthy or degraded based on endpoint type
|
|
146
147
|
status = (
|
|
147
|
-
HealthStatus.
|
|
148
|
+
HealthStatus.DEGRADED
|
|
148
149
|
if "optional" in name.lower()
|
|
149
|
-
else HealthStatus.
|
|
150
|
+
else HealthStatus.UNHEALTHY
|
|
150
151
|
)
|
|
151
152
|
metrics.append(
|
|
152
153
|
HealthMetric(
|
|
@@ -161,7 +162,7 @@ class NetworkHealthService(BaseMonitoringService):
|
|
|
161
162
|
HealthMetric(
|
|
162
163
|
name=metric_name,
|
|
163
164
|
value=False,
|
|
164
|
-
status=HealthStatus.
|
|
165
|
+
status=HealthStatus.DEGRADED,
|
|
165
166
|
message=f"Connection to {host}:{port} timed out after {timeout}s",
|
|
166
167
|
)
|
|
167
168
|
)
|
|
@@ -6,8 +6,9 @@ Monitors individual process health including CPU, memory, file descriptors, and
|
|
|
6
6
|
from typing import List
|
|
7
7
|
|
|
8
8
|
from claude_mpm.core.constants import ResourceLimits, TimeoutConfig
|
|
9
|
+
from claude_mpm.core.enums import HealthStatus
|
|
9
10
|
|
|
10
|
-
from .base import BaseMonitoringService, HealthMetric
|
|
11
|
+
from .base import BaseMonitoringService, HealthMetric
|
|
11
12
|
|
|
12
13
|
try:
|
|
13
14
|
import psutil
|
|
@@ -66,7 +67,7 @@ class ProcessHealthService(BaseMonitoringService):
|
|
|
66
67
|
HealthMetric(
|
|
67
68
|
name="psutil_availability",
|
|
68
69
|
value=False,
|
|
69
|
-
status=HealthStatus.
|
|
70
|
+
status=HealthStatus.DEGRADED,
|
|
70
71
|
message="psutil not available for process monitoring",
|
|
71
72
|
)
|
|
72
73
|
)
|
|
@@ -77,7 +78,7 @@ class ProcessHealthService(BaseMonitoringService):
|
|
|
77
78
|
HealthMetric(
|
|
78
79
|
name="process_exists",
|
|
79
80
|
value=False,
|
|
80
|
-
status=HealthStatus.
|
|
81
|
+
status=HealthStatus.UNHEALTHY,
|
|
81
82
|
message=f"Process {self.pid} not found",
|
|
82
83
|
)
|
|
83
84
|
)
|
|
@@ -90,7 +91,7 @@ class ProcessHealthService(BaseMonitoringService):
|
|
|
90
91
|
HealthMetric(
|
|
91
92
|
name="process_exists",
|
|
92
93
|
value=False,
|
|
93
|
-
status=HealthStatus.
|
|
94
|
+
status=HealthStatus.UNHEALTHY,
|
|
94
95
|
message=f"Process {self.pid} is no longer running",
|
|
95
96
|
)
|
|
96
97
|
)
|
|
@@ -119,7 +120,7 @@ class ProcessHealthService(BaseMonitoringService):
|
|
|
119
120
|
HealthMetric(
|
|
120
121
|
name="process_exists",
|
|
121
122
|
value=False,
|
|
122
|
-
status=HealthStatus.
|
|
123
|
+
status=HealthStatus.UNHEALTHY,
|
|
123
124
|
message=f"Process {self.pid} no longer exists",
|
|
124
125
|
)
|
|
125
126
|
)
|
|
@@ -153,7 +154,7 @@ class ProcessHealthService(BaseMonitoringService):
|
|
|
153
154
|
status=(
|
|
154
155
|
HealthStatus.HEALTHY
|
|
155
156
|
if process_healthy
|
|
156
|
-
else HealthStatus.
|
|
157
|
+
else HealthStatus.UNHEALTHY
|
|
157
158
|
),
|
|
158
159
|
message=f"Process status: {status}",
|
|
159
160
|
)
|
|
@@ -179,9 +180,9 @@ class ProcessHealthService(BaseMonitoringService):
|
|
|
179
180
|
cpu_status = HealthStatus.HEALTHY
|
|
180
181
|
if cpu_percent > self.cpu_threshold:
|
|
181
182
|
cpu_status = (
|
|
182
|
-
HealthStatus.
|
|
183
|
+
HealthStatus.DEGRADED
|
|
183
184
|
if cpu_percent < self.cpu_threshold * 1.2
|
|
184
|
-
else HealthStatus.
|
|
185
|
+
else HealthStatus.UNHEALTHY
|
|
185
186
|
)
|
|
186
187
|
|
|
187
188
|
metrics.append(
|
|
@@ -213,9 +214,9 @@ class ProcessHealthService(BaseMonitoringService):
|
|
|
213
214
|
memory_status = HealthStatus.HEALTHY
|
|
214
215
|
if memory_mb > self.memory_threshold_mb:
|
|
215
216
|
memory_status = (
|
|
216
|
-
HealthStatus.
|
|
217
|
+
HealthStatus.DEGRADED
|
|
217
218
|
if memory_mb < self.memory_threshold_mb * 1.2
|
|
218
|
-
else HealthStatus.
|
|
219
|
+
else HealthStatus.UNHEALTHY
|
|
219
220
|
)
|
|
220
221
|
|
|
221
222
|
metrics.append(
|
|
@@ -256,9 +257,9 @@ class ProcessHealthService(BaseMonitoringService):
|
|
|
256
257
|
fd_status = HealthStatus.HEALTHY
|
|
257
258
|
if fd_count > self.fd_threshold:
|
|
258
259
|
fd_status = (
|
|
259
|
-
HealthStatus.
|
|
260
|
+
HealthStatus.DEGRADED
|
|
260
261
|
if fd_count < self.fd_threshold * 1.2
|
|
261
|
-
else HealthStatus.
|
|
262
|
+
else HealthStatus.UNHEALTHY
|
|
262
263
|
)
|
|
263
264
|
|
|
264
265
|
metrics.append(
|
|
@@ -5,7 +5,8 @@ Monitors system-wide resource usage including CPU, memory, and disk utilization.
|
|
|
5
5
|
|
|
6
6
|
from typing import Dict, List, Optional
|
|
7
7
|
|
|
8
|
-
from .
|
|
8
|
+
from ....core.enums import HealthStatus
|
|
9
|
+
from .base import BaseMonitoringService, HealthMetric
|
|
9
10
|
|
|
10
11
|
try:
|
|
11
12
|
import psutil
|
|
@@ -53,7 +54,7 @@ class ResourceMonitorService(BaseMonitoringService):
|
|
|
53
54
|
HealthMetric(
|
|
54
55
|
name="psutil_availability",
|
|
55
56
|
value=False,
|
|
56
|
-
status=HealthStatus.
|
|
57
|
+
status=HealthStatus.DEGRADED,
|
|
57
58
|
message="psutil not available for resource monitoring",
|
|
58
59
|
)
|
|
59
60
|
)
|
|
@@ -182,9 +183,9 @@ class ResourceMonitorService(BaseMonitoringService):
|
|
|
182
183
|
# Load is concerning if > cpu_count
|
|
183
184
|
load_status = HealthStatus.HEALTHY
|
|
184
185
|
if load1 > cpu_count:
|
|
185
|
-
load_status = HealthStatus.
|
|
186
|
+
load_status = HealthStatus.DEGRADED
|
|
186
187
|
if load1 > cpu_count * 1.5:
|
|
187
|
-
load_status = HealthStatus.
|
|
188
|
+
load_status = HealthStatus.UNHEALTHY
|
|
188
189
|
|
|
189
190
|
metrics.append(
|
|
190
191
|
HealthMetric(
|
|
@@ -220,8 +221,8 @@ class ResourceMonitorService(BaseMonitoringService):
|
|
|
220
221
|
if value < threshold:
|
|
221
222
|
return HealthStatus.HEALTHY
|
|
222
223
|
if value < threshold * 1.1: # 10% above threshold
|
|
223
|
-
return HealthStatus.
|
|
224
|
-
return HealthStatus.
|
|
224
|
+
return HealthStatus.DEGRADED
|
|
225
|
+
return HealthStatus.UNHEALTHY
|
|
225
226
|
|
|
226
227
|
def get_resource_summary(self) -> Optional[Dict[str, float]]:
|
|
227
228
|
"""Get quick resource summary without full health check.
|
|
@@ -6,7 +6,8 @@ Monitors service-specific metrics like client connections, event processing, and
|
|
|
6
6
|
import time
|
|
7
7
|
from typing import Any, Dict, List
|
|
8
8
|
|
|
9
|
-
from .
|
|
9
|
+
from ....core.enums import HealthStatus
|
|
10
|
+
from .base import BaseMonitoringService, HealthMetric
|
|
10
11
|
|
|
11
12
|
|
|
12
13
|
class ServiceHealthService(BaseMonitoringService):
|
|
@@ -79,9 +80,9 @@ class ServiceHealthService(BaseMonitoringService):
|
|
|
79
80
|
|
|
80
81
|
# Determine status based on thresholds
|
|
81
82
|
if client_count > self.max_clients:
|
|
82
|
-
client_status = HealthStatus.
|
|
83
|
+
client_status = HealthStatus.UNHEALTHY
|
|
83
84
|
elif client_count > self.max_clients * 0.8:
|
|
84
|
-
client_status = HealthStatus.
|
|
85
|
+
client_status = HealthStatus.DEGRADED
|
|
85
86
|
else:
|
|
86
87
|
client_status = HealthStatus.HEALTHY
|
|
87
88
|
|
|
@@ -129,7 +130,7 @@ class ServiceHealthService(BaseMonitoringService):
|
|
|
129
130
|
# Determine status based on rate
|
|
130
131
|
rate_status = HealthStatus.HEALTHY
|
|
131
132
|
if event_rate == 0 and events_processed > 0:
|
|
132
|
-
rate_status = HealthStatus.
|
|
133
|
+
rate_status = HealthStatus.DEGRADED # Processing stopped
|
|
133
134
|
|
|
134
135
|
metrics.append(
|
|
135
136
|
HealthMetric(
|
|
@@ -157,9 +158,9 @@ class ServiceHealthService(BaseMonitoringService):
|
|
|
157
158
|
queue_size = self.service_stats["event_queue_size"]
|
|
158
159
|
queue_status = HealthStatus.HEALTHY
|
|
159
160
|
if queue_size > 1000:
|
|
160
|
-
queue_status = HealthStatus.
|
|
161
|
+
queue_status = HealthStatus.DEGRADED
|
|
161
162
|
if queue_size > 5000:
|
|
162
|
-
queue_status = HealthStatus.
|
|
163
|
+
queue_status = HealthStatus.UNHEALTHY
|
|
163
164
|
|
|
164
165
|
metrics.append(
|
|
165
166
|
HealthMetric(
|
|
@@ -191,9 +192,9 @@ class ServiceHealthService(BaseMonitoringService):
|
|
|
191
192
|
|
|
192
193
|
# Determine status based on rate
|
|
193
194
|
if error_rate > self.max_error_rate:
|
|
194
|
-
error_status = HealthStatus.
|
|
195
|
+
error_status = HealthStatus.UNHEALTHY
|
|
195
196
|
elif error_rate > self.max_error_rate * 0.5:
|
|
196
|
-
error_status = HealthStatus.
|
|
197
|
+
error_status = HealthStatus.DEGRADED
|
|
197
198
|
else:
|
|
198
199
|
error_status = HealthStatus.HEALTHY
|
|
199
200
|
|
|
@@ -213,7 +214,7 @@ class ServiceHealthService(BaseMonitoringService):
|
|
|
213
214
|
name="total_errors",
|
|
214
215
|
value=errors,
|
|
215
216
|
status=(
|
|
216
|
-
HealthStatus.HEALTHY if errors == 0 else HealthStatus.
|
|
217
|
+
HealthStatus.HEALTHY if errors == 0 else HealthStatus.DEGRADED
|
|
217
218
|
),
|
|
218
219
|
)
|
|
219
220
|
)
|
|
@@ -228,7 +229,7 @@ class ServiceHealthService(BaseMonitoringService):
|
|
|
228
229
|
status=(
|
|
229
230
|
HealthStatus.HEALTHY
|
|
230
231
|
if recent_errors == 0
|
|
231
|
-
else HealthStatus.
|
|
232
|
+
else HealthStatus.DEGRADED
|
|
232
233
|
),
|
|
233
234
|
)
|
|
234
235
|
)
|
|
@@ -263,9 +264,9 @@ class ServiceHealthService(BaseMonitoringService):
|
|
|
263
264
|
|
|
264
265
|
# Determine status based on staleness
|
|
265
266
|
if time_since_activity > self.stale_activity_seconds * 2:
|
|
266
|
-
activity_status = HealthStatus.
|
|
267
|
+
activity_status = HealthStatus.UNHEALTHY
|
|
267
268
|
elif time_since_activity > self.stale_activity_seconds:
|
|
268
|
-
activity_status = HealthStatus.
|
|
269
|
+
activity_status = HealthStatus.DEGRADED
|
|
269
270
|
else:
|
|
270
271
|
activity_status = HealthStatus.HEALTHY
|
|
271
272
|
|
|
@@ -282,7 +283,7 @@ class ServiceHealthService(BaseMonitoringService):
|
|
|
282
283
|
HealthMetric(
|
|
283
284
|
name="time_since_last_activity",
|
|
284
285
|
value=-1,
|
|
285
|
-
status=HealthStatus.
|
|
286
|
+
status=HealthStatus.DEGRADED,
|
|
286
287
|
message="No last activity recorded",
|
|
287
288
|
)
|
|
288
289
|
)
|
|
@@ -307,9 +308,9 @@ class ServiceHealthService(BaseMonitoringService):
|
|
|
307
308
|
|
|
308
309
|
# Determine status based on response time
|
|
309
310
|
if avg_time > 1000: # > 1 second
|
|
310
|
-
time_status = HealthStatus.
|
|
311
|
+
time_status = HealthStatus.UNHEALTHY
|
|
311
312
|
elif avg_time > 500: # > 500ms
|
|
312
|
-
time_status = HealthStatus.
|
|
313
|
+
time_status = HealthStatus.DEGRADED
|
|
313
314
|
else:
|
|
314
315
|
time_status = HealthStatus.HEALTHY
|
|
315
316
|
|