claude-mpm 4.13.2__py3-none-any.whl → 4.18.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- claude_mpm/VERSION +1 -1
- claude_mpm/agents/BASE_ENGINEER.md +286 -0
- claude_mpm/agents/BASE_PM.md +48 -17
- claude_mpm/agents/OUTPUT_STYLE.md +329 -11
- claude_mpm/agents/PM_INSTRUCTIONS.md +227 -8
- claude_mpm/agents/agent_loader.py +17 -5
- claude_mpm/agents/frontmatter_validator.py +284 -253
- claude_mpm/agents/templates/agentic-coder-optimizer.json +9 -2
- claude_mpm/agents/templates/api_qa.json +7 -1
- claude_mpm/agents/templates/clerk-ops.json +8 -1
- claude_mpm/agents/templates/code_analyzer.json +4 -1
- claude_mpm/agents/templates/dart_engineer.json +11 -1
- claude_mpm/agents/templates/data_engineer.json +11 -1
- claude_mpm/agents/templates/documentation.json +6 -1
- claude_mpm/agents/templates/engineer.json +18 -1
- claude_mpm/agents/templates/gcp_ops_agent.json +8 -1
- claude_mpm/agents/templates/golang_engineer.json +11 -1
- claude_mpm/agents/templates/java_engineer.json +12 -2
- claude_mpm/agents/templates/local_ops_agent.json +1217 -6
- claude_mpm/agents/templates/nextjs_engineer.json +11 -1
- claude_mpm/agents/templates/ops.json +8 -1
- claude_mpm/agents/templates/php-engineer.json +11 -1
- claude_mpm/agents/templates/project_organizer.json +10 -3
- claude_mpm/agents/templates/prompt-engineer.json +5 -1
- claude_mpm/agents/templates/python_engineer.json +11 -1
- claude_mpm/agents/templates/qa.json +7 -1
- claude_mpm/agents/templates/react_engineer.json +11 -1
- claude_mpm/agents/templates/refactoring_engineer.json +8 -1
- claude_mpm/agents/templates/research.json +4 -1
- claude_mpm/agents/templates/ruby-engineer.json +11 -1
- claude_mpm/agents/templates/rust_engineer.json +11 -1
- claude_mpm/agents/templates/security.json +6 -1
- claude_mpm/agents/templates/svelte-engineer.json +225 -0
- claude_mpm/agents/templates/ticketing.json +6 -1
- claude_mpm/agents/templates/typescript_engineer.json +11 -1
- claude_mpm/agents/templates/vercel_ops_agent.json +8 -1
- claude_mpm/agents/templates/version_control.json +8 -1
- claude_mpm/agents/templates/web_qa.json +7 -1
- claude_mpm/agents/templates/web_ui.json +11 -1
- claude_mpm/cli/__init__.py +34 -706
- claude_mpm/cli/commands/agent_manager.py +25 -12
- claude_mpm/cli/commands/agent_state_manager.py +186 -0
- claude_mpm/cli/commands/agents.py +204 -148
- claude_mpm/cli/commands/aggregate.py +7 -3
- claude_mpm/cli/commands/analyze.py +9 -4
- claude_mpm/cli/commands/analyze_code.py +7 -2
- claude_mpm/cli/commands/auto_configure.py +7 -9
- claude_mpm/cli/commands/config.py +47 -13
- claude_mpm/cli/commands/configure.py +294 -1788
- claude_mpm/cli/commands/configure_agent_display.py +261 -0
- claude_mpm/cli/commands/configure_behavior_manager.py +204 -0
- claude_mpm/cli/commands/configure_hook_manager.py +225 -0
- claude_mpm/cli/commands/configure_models.py +18 -0
- claude_mpm/cli/commands/configure_navigation.py +167 -0
- claude_mpm/cli/commands/configure_paths.py +104 -0
- claude_mpm/cli/commands/configure_persistence.py +254 -0
- claude_mpm/cli/commands/configure_startup_manager.py +646 -0
- claude_mpm/cli/commands/configure_template_editor.py +497 -0
- claude_mpm/cli/commands/configure_validators.py +73 -0
- claude_mpm/cli/commands/local_deploy.py +537 -0
- claude_mpm/cli/commands/memory.py +54 -20
- claude_mpm/cli/commands/mpm_init.py +39 -25
- claude_mpm/cli/commands/mpm_init_handler.py +8 -3
- claude_mpm/cli/executor.py +202 -0
- claude_mpm/cli/helpers.py +105 -0
- claude_mpm/cli/interactive/__init__.py +3 -0
- claude_mpm/cli/interactive/skills_wizard.py +491 -0
- claude_mpm/cli/parsers/__init__.py +7 -1
- claude_mpm/cli/parsers/base_parser.py +98 -3
- claude_mpm/cli/parsers/local_deploy_parser.py +227 -0
- claude_mpm/cli/shared/output_formatters.py +28 -19
- claude_mpm/cli/startup.py +481 -0
- claude_mpm/cli/utils.py +52 -1
- claude_mpm/commands/mpm-help.md +3 -0
- claude_mpm/commands/mpm-version.md +113 -0
- claude_mpm/commands/mpm.md +1 -0
- claude_mpm/config/agent_config.py +2 -2
- claude_mpm/config/model_config.py +428 -0
- claude_mpm/core/base_service.py +13 -12
- claude_mpm/core/enums.py +452 -0
- claude_mpm/core/factories.py +1 -1
- claude_mpm/core/instruction_reinforcement_hook.py +2 -1
- claude_mpm/core/interactive_session.py +9 -3
- claude_mpm/core/logging_config.py +6 -2
- claude_mpm/core/oneshot_session.py +8 -4
- claude_mpm/core/optimized_agent_loader.py +3 -3
- claude_mpm/core/output_style_manager.py +12 -192
- claude_mpm/core/service_registry.py +5 -1
- claude_mpm/core/types.py +2 -9
- claude_mpm/core/typing_utils.py +7 -6
- claude_mpm/dashboard/static/js/dashboard.js +0 -14
- claude_mpm/dashboard/templates/index.html +3 -41
- claude_mpm/hooks/claude_hooks/response_tracking.py +35 -1
- claude_mpm/hooks/instruction_reinforcement.py +7 -2
- claude_mpm/models/resume_log.py +340 -0
- claude_mpm/services/agents/auto_config_manager.py +10 -11
- claude_mpm/services/agents/deployment/agent_configuration_manager.py +1 -1
- claude_mpm/services/agents/deployment/agent_record_service.py +1 -1
- claude_mpm/services/agents/deployment/agent_validator.py +17 -1
- claude_mpm/services/agents/deployment/async_agent_deployment.py +1 -1
- claude_mpm/services/agents/deployment/interface_adapter.py +3 -2
- claude_mpm/services/agents/deployment/local_template_deployment.py +1 -1
- claude_mpm/services/agents/deployment/pipeline/steps/agent_processing_step.py +7 -6
- claude_mpm/services/agents/deployment/pipeline/steps/base_step.py +7 -16
- claude_mpm/services/agents/deployment/pipeline/steps/configuration_step.py +4 -3
- claude_mpm/services/agents/deployment/pipeline/steps/target_directory_step.py +5 -3
- claude_mpm/services/agents/deployment/pipeline/steps/validation_step.py +6 -5
- claude_mpm/services/agents/deployment/refactored_agent_deployment_service.py +9 -6
- claude_mpm/services/agents/deployment/validation/__init__.py +3 -1
- claude_mpm/services/agents/deployment/validation/validation_result.py +1 -9
- claude_mpm/services/agents/local_template_manager.py +1 -1
- claude_mpm/services/agents/memory/agent_memory_manager.py +5 -2
- claude_mpm/services/agents/registry/modification_tracker.py +5 -2
- claude_mpm/services/command_handler_service.py +11 -5
- claude_mpm/services/core/interfaces/__init__.py +74 -2
- claude_mpm/services/core/interfaces/health.py +172 -0
- claude_mpm/services/core/interfaces/model.py +281 -0
- claude_mpm/services/core/interfaces/process.py +372 -0
- claude_mpm/services/core/interfaces/restart.py +307 -0
- claude_mpm/services/core/interfaces/stability.py +260 -0
- claude_mpm/services/core/models/__init__.py +33 -0
- claude_mpm/services/core/models/agent_config.py +12 -28
- claude_mpm/services/core/models/health.py +162 -0
- claude_mpm/services/core/models/process.py +235 -0
- claude_mpm/services/core/models/restart.py +302 -0
- claude_mpm/services/core/models/stability.py +264 -0
- claude_mpm/services/core/path_resolver.py +23 -7
- claude_mpm/services/diagnostics/__init__.py +2 -2
- claude_mpm/services/diagnostics/checks/agent_check.py +25 -24
- claude_mpm/services/diagnostics/checks/claude_code_check.py +24 -23
- claude_mpm/services/diagnostics/checks/common_issues_check.py +25 -24
- claude_mpm/services/diagnostics/checks/configuration_check.py +24 -23
- claude_mpm/services/diagnostics/checks/filesystem_check.py +18 -17
- claude_mpm/services/diagnostics/checks/installation_check.py +30 -29
- claude_mpm/services/diagnostics/checks/instructions_check.py +20 -19
- claude_mpm/services/diagnostics/checks/mcp_check.py +50 -36
- claude_mpm/services/diagnostics/checks/mcp_services_check.py +36 -31
- claude_mpm/services/diagnostics/checks/monitor_check.py +23 -22
- claude_mpm/services/diagnostics/checks/startup_log_check.py +9 -8
- claude_mpm/services/diagnostics/diagnostic_runner.py +6 -5
- claude_mpm/services/diagnostics/doctor_reporter.py +28 -25
- claude_mpm/services/diagnostics/models.py +19 -24
- claude_mpm/services/infrastructure/monitoring/__init__.py +1 -1
- claude_mpm/services/infrastructure/monitoring/aggregator.py +12 -12
- claude_mpm/services/infrastructure/monitoring/base.py +5 -13
- claude_mpm/services/infrastructure/monitoring/network.py +7 -6
- claude_mpm/services/infrastructure/monitoring/process.py +13 -12
- claude_mpm/services/infrastructure/monitoring/resources.py +7 -6
- claude_mpm/services/infrastructure/monitoring/service.py +16 -15
- claude_mpm/services/infrastructure/resume_log_generator.py +439 -0
- claude_mpm/services/local_ops/__init__.py +163 -0
- claude_mpm/services/local_ops/crash_detector.py +257 -0
- claude_mpm/services/local_ops/health_checks/__init__.py +28 -0
- claude_mpm/services/local_ops/health_checks/http_check.py +224 -0
- claude_mpm/services/local_ops/health_checks/process_check.py +236 -0
- claude_mpm/services/local_ops/health_checks/resource_check.py +255 -0
- claude_mpm/services/local_ops/health_manager.py +430 -0
- claude_mpm/services/local_ops/log_monitor.py +396 -0
- claude_mpm/services/local_ops/memory_leak_detector.py +294 -0
- claude_mpm/services/local_ops/process_manager.py +595 -0
- claude_mpm/services/local_ops/resource_monitor.py +331 -0
- claude_mpm/services/local_ops/restart_manager.py +401 -0
- claude_mpm/services/local_ops/restart_policy.py +387 -0
- claude_mpm/services/local_ops/state_manager.py +372 -0
- claude_mpm/services/local_ops/unified_manager.py +600 -0
- claude_mpm/services/mcp_config_manager.py +9 -4
- claude_mpm/services/mcp_gateway/core/__init__.py +1 -2
- claude_mpm/services/mcp_gateway/core/base.py +18 -31
- claude_mpm/services/mcp_gateway/tools/external_mcp_services.py +71 -24
- claude_mpm/services/mcp_gateway/tools/health_check_tool.py +30 -28
- claude_mpm/services/memory_hook_service.py +4 -1
- claude_mpm/services/model/__init__.py +147 -0
- claude_mpm/services/model/base_provider.py +365 -0
- claude_mpm/services/model/claude_provider.py +412 -0
- claude_mpm/services/model/model_router.py +453 -0
- claude_mpm/services/model/ollama_provider.py +415 -0
- claude_mpm/services/monitor/daemon_manager.py +3 -2
- claude_mpm/services/monitor/handlers/dashboard.py +2 -1
- claude_mpm/services/monitor/handlers/hooks.py +2 -1
- claude_mpm/services/monitor/management/lifecycle.py +3 -2
- claude_mpm/services/monitor/server.py +2 -1
- claude_mpm/services/session_management_service.py +3 -2
- claude_mpm/services/session_manager.py +205 -1
- claude_mpm/services/shared/async_service_base.py +16 -27
- claude_mpm/services/shared/lifecycle_service_base.py +1 -14
- claude_mpm/services/socketio/handlers/__init__.py +5 -2
- claude_mpm/services/socketio/handlers/hook.py +13 -2
- claude_mpm/services/socketio/handlers/registry.py +4 -2
- claude_mpm/services/socketio/server/main.py +10 -8
- claude_mpm/services/subprocess_launcher_service.py +14 -5
- claude_mpm/services/unified/analyzer_strategies/code_analyzer.py +8 -7
- claude_mpm/services/unified/analyzer_strategies/dependency_analyzer.py +6 -5
- claude_mpm/services/unified/analyzer_strategies/performance_analyzer.py +8 -7
- claude_mpm/services/unified/analyzer_strategies/security_analyzer.py +7 -6
- claude_mpm/services/unified/analyzer_strategies/structure_analyzer.py +5 -4
- claude_mpm/services/unified/config_strategies/validation_strategy.py +13 -9
- claude_mpm/services/unified/deployment_strategies/cloud_strategies.py +10 -3
- claude_mpm/services/unified/deployment_strategies/local.py +6 -5
- claude_mpm/services/unified/deployment_strategies/utils.py +6 -5
- claude_mpm/services/unified/deployment_strategies/vercel.py +7 -6
- claude_mpm/services/unified/interfaces.py +3 -1
- claude_mpm/services/unified/unified_analyzer.py +14 -10
- claude_mpm/services/unified/unified_config.py +2 -1
- claude_mpm/services/unified/unified_deployment.py +9 -4
- claude_mpm/services/version_service.py +104 -1
- claude_mpm/skills/__init__.py +21 -0
- claude_mpm/skills/bundled/__init__.py +6 -0
- claude_mpm/skills/bundled/api-documentation.md +393 -0
- claude_mpm/skills/bundled/async-testing.md +571 -0
- claude_mpm/skills/bundled/code-review.md +143 -0
- claude_mpm/skills/bundled/database-migration.md +199 -0
- claude_mpm/skills/bundled/docker-containerization.md +194 -0
- claude_mpm/skills/bundled/express-local-dev.md +1429 -0
- claude_mpm/skills/bundled/fastapi-local-dev.md +1199 -0
- claude_mpm/skills/bundled/git-workflow.md +414 -0
- claude_mpm/skills/bundled/imagemagick.md +204 -0
- claude_mpm/skills/bundled/json-data-handling.md +223 -0
- claude_mpm/skills/bundled/nextjs-local-dev.md +807 -0
- claude_mpm/skills/bundled/pdf.md +141 -0
- claude_mpm/skills/bundled/performance-profiling.md +567 -0
- claude_mpm/skills/bundled/refactoring-patterns.md +180 -0
- claude_mpm/skills/bundled/security-scanning.md +327 -0
- claude_mpm/skills/bundled/systematic-debugging.md +473 -0
- claude_mpm/skills/bundled/test-driven-development.md +378 -0
- claude_mpm/skills/bundled/vite-local-dev.md +1061 -0
- claude_mpm/skills/bundled/web-performance-optimization.md +2305 -0
- claude_mpm/skills/bundled/xlsx.md +157 -0
- claude_mpm/skills/registry.py +286 -0
- claude_mpm/skills/skill_manager.py +310 -0
- claude_mpm/tools/code_tree_analyzer.py +177 -141
- claude_mpm/tools/code_tree_events.py +4 -2
- claude_mpm/utils/agent_dependency_loader.py +2 -2
- {claude_mpm-4.13.2.dist-info → claude_mpm-4.18.2.dist-info}/METADATA +117 -8
- {claude_mpm-4.13.2.dist-info → claude_mpm-4.18.2.dist-info}/RECORD +238 -174
- claude_mpm/dashboard/static/css/code-tree.css +0 -1639
- claude_mpm/dashboard/static/js/components/code-tree/tree-breadcrumb.js +0 -353
- claude_mpm/dashboard/static/js/components/code-tree/tree-constants.js +0 -235
- claude_mpm/dashboard/static/js/components/code-tree/tree-search.js +0 -409
- claude_mpm/dashboard/static/js/components/code-tree/tree-utils.js +0 -435
- claude_mpm/dashboard/static/js/components/code-tree.js +0 -5869
- claude_mpm/dashboard/static/js/components/code-viewer.js +0 -1386
- claude_mpm/hooks/claude_hooks/hook_handler_eventbus.py +0 -425
- claude_mpm/hooks/claude_hooks/hook_handler_original.py +0 -1041
- claude_mpm/hooks/claude_hooks/hook_handler_refactored.py +0 -347
- claude_mpm/services/agents/deployment/agent_lifecycle_manager_refactored.py +0 -575
- claude_mpm/services/project/analyzer_refactored.py +0 -450
- {claude_mpm-4.13.2.dist-info → claude_mpm-4.18.2.dist-info}/WHEEL +0 -0
- {claude_mpm-4.13.2.dist-info → claude_mpm-4.18.2.dist-info}/entry_points.txt +0 -0
- {claude_mpm-4.13.2.dist-info → claude_mpm-4.18.2.dist-info}/licenses/LICENSE +0 -0
- {claude_mpm-4.13.2.dist-info → claude_mpm-4.18.2.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,331 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Resource Monitor for Claude MPM Framework
|
|
3
|
+
==========================================
|
|
4
|
+
|
|
5
|
+
WHY: Monitors multiple resource types (file descriptors, threads, connections,
|
|
6
|
+
disk space) to detect resource exhaustion BEFORE it causes crashes or degradation.
|
|
7
|
+
|
|
8
|
+
DESIGN DECISION: Extends basic resource health checks with higher granularity,
|
|
9
|
+
percentage-based thresholds (80% of limits), and preemptive alerting.
|
|
10
|
+
|
|
11
|
+
ARCHITECTURE:
|
|
12
|
+
- File descriptor monitoring (Unix: ulimit -n, Windows: handle count)
|
|
13
|
+
- Thread count monitoring
|
|
14
|
+
- Network connection monitoring
|
|
15
|
+
- Disk space monitoring (working directory)
|
|
16
|
+
- 80% threshold for preemptive alerts
|
|
17
|
+
- Callback system for critical resource usage
|
|
18
|
+
- Thread-safe with proper locking
|
|
19
|
+
|
|
20
|
+
USAGE:
|
|
21
|
+
monitor = ResourceMonitor(
|
|
22
|
+
process_manager=process_manager,
|
|
23
|
+
fd_threshold_percent=0.8,
|
|
24
|
+
thread_threshold=1000,
|
|
25
|
+
connection_threshold=500,
|
|
26
|
+
disk_threshold_mb=100,
|
|
27
|
+
)
|
|
28
|
+
monitor.initialize()
|
|
29
|
+
|
|
30
|
+
# Check resources
|
|
31
|
+
usage = monitor.check_resources(deployment_id)
|
|
32
|
+
if usage.is_critical:
|
|
33
|
+
print(f"Critical resources: {usage.get_critical_resources()}")
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
import platform
|
|
37
|
+
import resource as resource_module
|
|
38
|
+
import shutil
|
|
39
|
+
import threading
|
|
40
|
+
from typing import Callable, Dict, List
|
|
41
|
+
|
|
42
|
+
import psutil
|
|
43
|
+
|
|
44
|
+
from claude_mpm.services.core.base import SyncBaseService
|
|
45
|
+
from claude_mpm.services.core.interfaces.process import ILocalProcessManager
|
|
46
|
+
from claude_mpm.services.core.interfaces.stability import IResourceMonitor
|
|
47
|
+
from claude_mpm.services.core.models.stability import ResourceUsage
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class ResourceMonitor(SyncBaseService, IResourceMonitor):
|
|
51
|
+
"""
|
|
52
|
+
Comprehensive resource usage monitoring service.
|
|
53
|
+
|
|
54
|
+
WHY: Provides early warning of resource exhaustion by monitoring multiple
|
|
55
|
+
resource types and detecting when usage approaches limits (80% threshold).
|
|
56
|
+
|
|
57
|
+
Thread Safety: All public methods are thread-safe with proper locking.
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
def __init__(
|
|
61
|
+
self,
|
|
62
|
+
process_manager: ILocalProcessManager,
|
|
63
|
+
fd_threshold_percent: float = 0.8,
|
|
64
|
+
thread_threshold: int = 1000,
|
|
65
|
+
connection_threshold: int = 500,
|
|
66
|
+
disk_threshold_mb: float = 100.0,
|
|
67
|
+
):
|
|
68
|
+
"""
|
|
69
|
+
Initialize resource monitor.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
process_manager: Process manager for deployment lookup
|
|
73
|
+
fd_threshold_percent: File descriptor threshold as percent of ulimit (default: 0.8)
|
|
74
|
+
thread_threshold: Thread count threshold (default: 1000)
|
|
75
|
+
connection_threshold: Connection count threshold (default: 500)
|
|
76
|
+
disk_threshold_mb: Minimum free disk space in MB (default: 100)
|
|
77
|
+
"""
|
|
78
|
+
super().__init__("ResourceMonitor")
|
|
79
|
+
self.process_manager = process_manager
|
|
80
|
+
self.fd_threshold_percent = fd_threshold_percent
|
|
81
|
+
self.thread_threshold = thread_threshold
|
|
82
|
+
self.connection_threshold = connection_threshold
|
|
83
|
+
self.disk_threshold_mb = disk_threshold_mb
|
|
84
|
+
|
|
85
|
+
# Platform detection
|
|
86
|
+
self.is_windows = platform.system() == "Windows"
|
|
87
|
+
self.is_unix = not self.is_windows
|
|
88
|
+
|
|
89
|
+
# Critical resource callbacks
|
|
90
|
+
self._critical_callbacks: List[Callable[[str, ResourceUsage], None]] = []
|
|
91
|
+
|
|
92
|
+
# Thread safety
|
|
93
|
+
self._lock = threading.Lock()
|
|
94
|
+
|
|
95
|
+
def initialize(self) -> bool:
|
|
96
|
+
"""
|
|
97
|
+
Initialize the resource monitor.
|
|
98
|
+
|
|
99
|
+
Returns:
|
|
100
|
+
True if initialization successful
|
|
101
|
+
"""
|
|
102
|
+
self._initialized = True
|
|
103
|
+
self.log_info(
|
|
104
|
+
f"Resource monitor initialized "
|
|
105
|
+
f"(fd_threshold={self.fd_threshold_percent*100:.0f}%, "
|
|
106
|
+
f"thread_threshold={self.thread_threshold}, "
|
|
107
|
+
f"connection_threshold={self.connection_threshold}, "
|
|
108
|
+
f"disk_threshold={self.disk_threshold_mb}MB)"
|
|
109
|
+
)
|
|
110
|
+
return True
|
|
111
|
+
|
|
112
|
+
def shutdown(self) -> None:
|
|
113
|
+
"""Shutdown resource monitor and clear callbacks."""
|
|
114
|
+
with self._lock:
|
|
115
|
+
self._critical_callbacks.clear()
|
|
116
|
+
|
|
117
|
+
self._shutdown = True
|
|
118
|
+
self.log_info("Resource monitor shutdown complete")
|
|
119
|
+
|
|
120
|
+
def check_resources(self, deployment_id: str) -> ResourceUsage:
|
|
121
|
+
"""
|
|
122
|
+
Check resource usage for a deployment.
|
|
123
|
+
|
|
124
|
+
WHY: Provides comprehensive snapshot of resource consumption across
|
|
125
|
+
all monitored resource types.
|
|
126
|
+
|
|
127
|
+
Args:
|
|
128
|
+
deployment_id: Deployment identifier
|
|
129
|
+
|
|
130
|
+
Returns:
|
|
131
|
+
ResourceUsage with current metrics and critical status
|
|
132
|
+
|
|
133
|
+
Raises:
|
|
134
|
+
ValueError: If deployment not found
|
|
135
|
+
"""
|
|
136
|
+
# Validate deployment exists
|
|
137
|
+
deployment = self.process_manager.state_manager.get_deployment(deployment_id)
|
|
138
|
+
if not deployment:
|
|
139
|
+
raise ValueError(f"Deployment not found: {deployment_id}")
|
|
140
|
+
|
|
141
|
+
try:
|
|
142
|
+
process = psutil.Process(deployment.process_id)
|
|
143
|
+
|
|
144
|
+
# Initialize usage object
|
|
145
|
+
usage = ResourceUsage(deployment_id=deployment_id)
|
|
146
|
+
details = {}
|
|
147
|
+
|
|
148
|
+
# 1. Check file descriptors (Unix only)
|
|
149
|
+
if self.is_unix:
|
|
150
|
+
try:
|
|
151
|
+
num_fds = process.num_fds()
|
|
152
|
+
max_fds = self._get_max_fds()
|
|
153
|
+
|
|
154
|
+
usage.file_descriptors = num_fds
|
|
155
|
+
usage.max_file_descriptors = max_fds
|
|
156
|
+
details["fd_usage_percent"] = usage.fd_usage_percent
|
|
157
|
+
except (psutil.NoSuchProcess, psutil.AccessDenied, AttributeError):
|
|
158
|
+
pass
|
|
159
|
+
|
|
160
|
+
# 2. Check thread count
|
|
161
|
+
try:
|
|
162
|
+
num_threads = process.num_threads()
|
|
163
|
+
usage.threads = num_threads
|
|
164
|
+
details["thread_threshold"] = self.thread_threshold
|
|
165
|
+
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
|
166
|
+
pass
|
|
167
|
+
|
|
168
|
+
# 3. Check connection count
|
|
169
|
+
try:
|
|
170
|
+
connections = process.net_connections()
|
|
171
|
+
usage.connections = len(connections)
|
|
172
|
+
details["connection_threshold"] = self.connection_threshold
|
|
173
|
+
|
|
174
|
+
# Add connection breakdown by state
|
|
175
|
+
connection_states: Dict[str, int] = {}
|
|
176
|
+
for conn in connections:
|
|
177
|
+
state = conn.status
|
|
178
|
+
connection_states[state] = connection_states.get(state, 0) + 1
|
|
179
|
+
details["connection_states"] = connection_states
|
|
180
|
+
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
|
181
|
+
pass
|
|
182
|
+
|
|
183
|
+
# 4. Check disk space
|
|
184
|
+
try:
|
|
185
|
+
working_dir = deployment.working_directory
|
|
186
|
+
disk_usage = shutil.disk_usage(working_dir)
|
|
187
|
+
disk_free_mb = disk_usage.free / (1024 * 1024)
|
|
188
|
+
usage.disk_free_mb = disk_free_mb
|
|
189
|
+
details["disk_threshold_mb"] = self.disk_threshold_mb
|
|
190
|
+
except (OSError, FileNotFoundError):
|
|
191
|
+
pass
|
|
192
|
+
|
|
193
|
+
# Store thresholds in details
|
|
194
|
+
details["fd_threshold_percent"] = self.fd_threshold_percent
|
|
195
|
+
details["thread_threshold"] = self.thread_threshold
|
|
196
|
+
details["connection_threshold"] = self.connection_threshold
|
|
197
|
+
details["disk_threshold_mb"] = self.disk_threshold_mb
|
|
198
|
+
usage.details = details
|
|
199
|
+
|
|
200
|
+
# 5. Determine if any resource is critical
|
|
201
|
+
is_critical = self._check_critical(usage)
|
|
202
|
+
usage.is_critical = is_critical
|
|
203
|
+
|
|
204
|
+
# Trigger callbacks if critical
|
|
205
|
+
if is_critical:
|
|
206
|
+
critical_resources = usage.get_critical_resources()
|
|
207
|
+
self.log_warning(
|
|
208
|
+
f"Critical resource usage for {deployment_id}: "
|
|
209
|
+
f"{', '.join(critical_resources)}"
|
|
210
|
+
)
|
|
211
|
+
self._trigger_critical_callbacks(deployment_id, usage)
|
|
212
|
+
|
|
213
|
+
return usage
|
|
214
|
+
|
|
215
|
+
except psutil.NoSuchProcess:
|
|
216
|
+
# Process does not exist
|
|
217
|
+
return ResourceUsage(
|
|
218
|
+
deployment_id=deployment_id,
|
|
219
|
+
is_critical=True,
|
|
220
|
+
details={"error": "Process no longer exists"},
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
except psutil.AccessDenied as e:
|
|
224
|
+
# Cannot access process information
|
|
225
|
+
return ResourceUsage(
|
|
226
|
+
deployment_id=deployment_id,
|
|
227
|
+
is_critical=False,
|
|
228
|
+
details={"error": f"Access denied: {e}"},
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
except Exception as e:
|
|
232
|
+
# Unexpected error
|
|
233
|
+
self.log_error(f"Unexpected error checking resources: {e}")
|
|
234
|
+
return ResourceUsage(
|
|
235
|
+
deployment_id=deployment_id,
|
|
236
|
+
is_critical=False,
|
|
237
|
+
details={"error": str(e)},
|
|
238
|
+
)
|
|
239
|
+
|
|
240
|
+
def is_critical(self, deployment_id: str) -> bool:
|
|
241
|
+
"""
|
|
242
|
+
Check if any resource is at critical threshold (>80%).
|
|
243
|
+
|
|
244
|
+
Returns:
|
|
245
|
+
True if any resource exceeds 80% of limit
|
|
246
|
+
"""
|
|
247
|
+
try:
|
|
248
|
+
usage = self.check_resources(deployment_id)
|
|
249
|
+
return usage.is_critical
|
|
250
|
+
except ValueError:
|
|
251
|
+
return False
|
|
252
|
+
|
|
253
|
+
def register_critical_callback(
|
|
254
|
+
self, callback: Callable[[str, ResourceUsage], None]
|
|
255
|
+
) -> None:
|
|
256
|
+
"""
|
|
257
|
+
Register callback for critical resource usage.
|
|
258
|
+
|
|
259
|
+
Args:
|
|
260
|
+
callback: Function called with (deployment_id, usage) when critical
|
|
261
|
+
"""
|
|
262
|
+
with self._lock:
|
|
263
|
+
self._critical_callbacks.append(callback)
|
|
264
|
+
self.log_debug(f"Registered critical callback: {callback.__name__}")
|
|
265
|
+
|
|
266
|
+
def _check_critical(self, usage: ResourceUsage) -> bool:
|
|
267
|
+
"""
|
|
268
|
+
Check if resource usage is at critical levels.
|
|
269
|
+
|
|
270
|
+
Args:
|
|
271
|
+
usage: ResourceUsage to check
|
|
272
|
+
|
|
273
|
+
Returns:
|
|
274
|
+
True if any resource is critical (>80% threshold)
|
|
275
|
+
"""
|
|
276
|
+
# Check file descriptors
|
|
277
|
+
if usage.max_file_descriptors > 0:
|
|
278
|
+
fd_percent = usage.fd_usage_percent / 100.0
|
|
279
|
+
if fd_percent > self.fd_threshold_percent:
|
|
280
|
+
return True
|
|
281
|
+
|
|
282
|
+
# Check threads
|
|
283
|
+
if usage.threads > self.thread_threshold * self.fd_threshold_percent:
|
|
284
|
+
return True
|
|
285
|
+
|
|
286
|
+
# Check connections
|
|
287
|
+
if usage.connections > self.connection_threshold * self.fd_threshold_percent:
|
|
288
|
+
return True
|
|
289
|
+
|
|
290
|
+
# Check disk space
|
|
291
|
+
if usage.disk_free_mb < self.disk_threshold_mb:
|
|
292
|
+
return True
|
|
293
|
+
|
|
294
|
+
return False
|
|
295
|
+
|
|
296
|
+
def _get_max_fds(self) -> int:
|
|
297
|
+
"""
|
|
298
|
+
Get maximum file descriptors allowed (ulimit -n).
|
|
299
|
+
|
|
300
|
+
Returns:
|
|
301
|
+
Maximum file descriptors, or 0 if cannot determine
|
|
302
|
+
"""
|
|
303
|
+
if not self.is_unix:
|
|
304
|
+
return 0
|
|
305
|
+
|
|
306
|
+
try:
|
|
307
|
+
soft_limit, hard_limit = resource_module.getrlimit(
|
|
308
|
+
resource_module.RLIMIT_NOFILE
|
|
309
|
+
)
|
|
310
|
+
return soft_limit
|
|
311
|
+
except (ValueError, OSError):
|
|
312
|
+
return 0
|
|
313
|
+
|
|
314
|
+
def _trigger_critical_callbacks(
|
|
315
|
+
self, deployment_id: str, usage: ResourceUsage
|
|
316
|
+
) -> None:
|
|
317
|
+
"""
|
|
318
|
+
Trigger registered callbacks for critical resource usage.
|
|
319
|
+
|
|
320
|
+
Args:
|
|
321
|
+
deployment_id: Deployment with critical usage
|
|
322
|
+
usage: ResourceUsage with critical metrics
|
|
323
|
+
"""
|
|
324
|
+
for callback in self._critical_callbacks:
|
|
325
|
+
try:
|
|
326
|
+
callback(deployment_id, usage)
|
|
327
|
+
except Exception as e:
|
|
328
|
+
self.log_error(f"Error in critical callback {callback.__name__}: {e}")
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
__all__ = ["ResourceMonitor"]
|