claude-mpm 4.13.2__py3-none-any.whl → 4.18.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (250) hide show
  1. claude_mpm/VERSION +1 -1
  2. claude_mpm/agents/BASE_ENGINEER.md +286 -0
  3. claude_mpm/agents/BASE_PM.md +48 -17
  4. claude_mpm/agents/OUTPUT_STYLE.md +329 -11
  5. claude_mpm/agents/PM_INSTRUCTIONS.md +227 -8
  6. claude_mpm/agents/agent_loader.py +17 -5
  7. claude_mpm/agents/frontmatter_validator.py +284 -253
  8. claude_mpm/agents/templates/agentic-coder-optimizer.json +9 -2
  9. claude_mpm/agents/templates/api_qa.json +7 -1
  10. claude_mpm/agents/templates/clerk-ops.json +8 -1
  11. claude_mpm/agents/templates/code_analyzer.json +4 -1
  12. claude_mpm/agents/templates/dart_engineer.json +11 -1
  13. claude_mpm/agents/templates/data_engineer.json +11 -1
  14. claude_mpm/agents/templates/documentation.json +6 -1
  15. claude_mpm/agents/templates/engineer.json +18 -1
  16. claude_mpm/agents/templates/gcp_ops_agent.json +8 -1
  17. claude_mpm/agents/templates/golang_engineer.json +11 -1
  18. claude_mpm/agents/templates/java_engineer.json +12 -2
  19. claude_mpm/agents/templates/local_ops_agent.json +1217 -6
  20. claude_mpm/agents/templates/nextjs_engineer.json +11 -1
  21. claude_mpm/agents/templates/ops.json +8 -1
  22. claude_mpm/agents/templates/php-engineer.json +11 -1
  23. claude_mpm/agents/templates/project_organizer.json +10 -3
  24. claude_mpm/agents/templates/prompt-engineer.json +5 -1
  25. claude_mpm/agents/templates/python_engineer.json +11 -1
  26. claude_mpm/agents/templates/qa.json +7 -1
  27. claude_mpm/agents/templates/react_engineer.json +11 -1
  28. claude_mpm/agents/templates/refactoring_engineer.json +8 -1
  29. claude_mpm/agents/templates/research.json +4 -1
  30. claude_mpm/agents/templates/ruby-engineer.json +11 -1
  31. claude_mpm/agents/templates/rust_engineer.json +11 -1
  32. claude_mpm/agents/templates/security.json +6 -1
  33. claude_mpm/agents/templates/svelte-engineer.json +225 -0
  34. claude_mpm/agents/templates/ticketing.json +6 -1
  35. claude_mpm/agents/templates/typescript_engineer.json +11 -1
  36. claude_mpm/agents/templates/vercel_ops_agent.json +8 -1
  37. claude_mpm/agents/templates/version_control.json +8 -1
  38. claude_mpm/agents/templates/web_qa.json +7 -1
  39. claude_mpm/agents/templates/web_ui.json +11 -1
  40. claude_mpm/cli/__init__.py +34 -706
  41. claude_mpm/cli/commands/agent_manager.py +25 -12
  42. claude_mpm/cli/commands/agent_state_manager.py +186 -0
  43. claude_mpm/cli/commands/agents.py +204 -148
  44. claude_mpm/cli/commands/aggregate.py +7 -3
  45. claude_mpm/cli/commands/analyze.py +9 -4
  46. claude_mpm/cli/commands/analyze_code.py +7 -2
  47. claude_mpm/cli/commands/auto_configure.py +7 -9
  48. claude_mpm/cli/commands/config.py +47 -13
  49. claude_mpm/cli/commands/configure.py +294 -1788
  50. claude_mpm/cli/commands/configure_agent_display.py +261 -0
  51. claude_mpm/cli/commands/configure_behavior_manager.py +204 -0
  52. claude_mpm/cli/commands/configure_hook_manager.py +225 -0
  53. claude_mpm/cli/commands/configure_models.py +18 -0
  54. claude_mpm/cli/commands/configure_navigation.py +167 -0
  55. claude_mpm/cli/commands/configure_paths.py +104 -0
  56. claude_mpm/cli/commands/configure_persistence.py +254 -0
  57. claude_mpm/cli/commands/configure_startup_manager.py +646 -0
  58. claude_mpm/cli/commands/configure_template_editor.py +497 -0
  59. claude_mpm/cli/commands/configure_validators.py +73 -0
  60. claude_mpm/cli/commands/local_deploy.py +537 -0
  61. claude_mpm/cli/commands/memory.py +54 -20
  62. claude_mpm/cli/commands/mpm_init.py +39 -25
  63. claude_mpm/cli/commands/mpm_init_handler.py +8 -3
  64. claude_mpm/cli/executor.py +202 -0
  65. claude_mpm/cli/helpers.py +105 -0
  66. claude_mpm/cli/interactive/__init__.py +3 -0
  67. claude_mpm/cli/interactive/skills_wizard.py +491 -0
  68. claude_mpm/cli/parsers/__init__.py +7 -1
  69. claude_mpm/cli/parsers/base_parser.py +98 -3
  70. claude_mpm/cli/parsers/local_deploy_parser.py +227 -0
  71. claude_mpm/cli/shared/output_formatters.py +28 -19
  72. claude_mpm/cli/startup.py +481 -0
  73. claude_mpm/cli/utils.py +52 -1
  74. claude_mpm/commands/mpm-help.md +3 -0
  75. claude_mpm/commands/mpm-version.md +113 -0
  76. claude_mpm/commands/mpm.md +1 -0
  77. claude_mpm/config/agent_config.py +2 -2
  78. claude_mpm/config/model_config.py +428 -0
  79. claude_mpm/core/base_service.py +13 -12
  80. claude_mpm/core/enums.py +452 -0
  81. claude_mpm/core/factories.py +1 -1
  82. claude_mpm/core/instruction_reinforcement_hook.py +2 -1
  83. claude_mpm/core/interactive_session.py +9 -3
  84. claude_mpm/core/logging_config.py +6 -2
  85. claude_mpm/core/oneshot_session.py +8 -4
  86. claude_mpm/core/optimized_agent_loader.py +3 -3
  87. claude_mpm/core/output_style_manager.py +12 -192
  88. claude_mpm/core/service_registry.py +5 -1
  89. claude_mpm/core/types.py +2 -9
  90. claude_mpm/core/typing_utils.py +7 -6
  91. claude_mpm/dashboard/static/js/dashboard.js +0 -14
  92. claude_mpm/dashboard/templates/index.html +3 -41
  93. claude_mpm/hooks/claude_hooks/response_tracking.py +35 -1
  94. claude_mpm/hooks/instruction_reinforcement.py +7 -2
  95. claude_mpm/models/resume_log.py +340 -0
  96. claude_mpm/services/agents/auto_config_manager.py +10 -11
  97. claude_mpm/services/agents/deployment/agent_configuration_manager.py +1 -1
  98. claude_mpm/services/agents/deployment/agent_record_service.py +1 -1
  99. claude_mpm/services/agents/deployment/agent_validator.py +17 -1
  100. claude_mpm/services/agents/deployment/async_agent_deployment.py +1 -1
  101. claude_mpm/services/agents/deployment/interface_adapter.py +3 -2
  102. claude_mpm/services/agents/deployment/local_template_deployment.py +1 -1
  103. claude_mpm/services/agents/deployment/pipeline/steps/agent_processing_step.py +7 -6
  104. claude_mpm/services/agents/deployment/pipeline/steps/base_step.py +7 -16
  105. claude_mpm/services/agents/deployment/pipeline/steps/configuration_step.py +4 -3
  106. claude_mpm/services/agents/deployment/pipeline/steps/target_directory_step.py +5 -3
  107. claude_mpm/services/agents/deployment/pipeline/steps/validation_step.py +6 -5
  108. claude_mpm/services/agents/deployment/refactored_agent_deployment_service.py +9 -6
  109. claude_mpm/services/agents/deployment/validation/__init__.py +3 -1
  110. claude_mpm/services/agents/deployment/validation/validation_result.py +1 -9
  111. claude_mpm/services/agents/local_template_manager.py +1 -1
  112. claude_mpm/services/agents/memory/agent_memory_manager.py +5 -2
  113. claude_mpm/services/agents/registry/modification_tracker.py +5 -2
  114. claude_mpm/services/command_handler_service.py +11 -5
  115. claude_mpm/services/core/interfaces/__init__.py +74 -2
  116. claude_mpm/services/core/interfaces/health.py +172 -0
  117. claude_mpm/services/core/interfaces/model.py +281 -0
  118. claude_mpm/services/core/interfaces/process.py +372 -0
  119. claude_mpm/services/core/interfaces/restart.py +307 -0
  120. claude_mpm/services/core/interfaces/stability.py +260 -0
  121. claude_mpm/services/core/models/__init__.py +33 -0
  122. claude_mpm/services/core/models/agent_config.py +12 -28
  123. claude_mpm/services/core/models/health.py +162 -0
  124. claude_mpm/services/core/models/process.py +235 -0
  125. claude_mpm/services/core/models/restart.py +302 -0
  126. claude_mpm/services/core/models/stability.py +264 -0
  127. claude_mpm/services/core/path_resolver.py +23 -7
  128. claude_mpm/services/diagnostics/__init__.py +2 -2
  129. claude_mpm/services/diagnostics/checks/agent_check.py +25 -24
  130. claude_mpm/services/diagnostics/checks/claude_code_check.py +24 -23
  131. claude_mpm/services/diagnostics/checks/common_issues_check.py +25 -24
  132. claude_mpm/services/diagnostics/checks/configuration_check.py +24 -23
  133. claude_mpm/services/diagnostics/checks/filesystem_check.py +18 -17
  134. claude_mpm/services/diagnostics/checks/installation_check.py +30 -29
  135. claude_mpm/services/diagnostics/checks/instructions_check.py +20 -19
  136. claude_mpm/services/diagnostics/checks/mcp_check.py +50 -36
  137. claude_mpm/services/diagnostics/checks/mcp_services_check.py +36 -31
  138. claude_mpm/services/diagnostics/checks/monitor_check.py +23 -22
  139. claude_mpm/services/diagnostics/checks/startup_log_check.py +9 -8
  140. claude_mpm/services/diagnostics/diagnostic_runner.py +6 -5
  141. claude_mpm/services/diagnostics/doctor_reporter.py +28 -25
  142. claude_mpm/services/diagnostics/models.py +19 -24
  143. claude_mpm/services/infrastructure/monitoring/__init__.py +1 -1
  144. claude_mpm/services/infrastructure/monitoring/aggregator.py +12 -12
  145. claude_mpm/services/infrastructure/monitoring/base.py +5 -13
  146. claude_mpm/services/infrastructure/monitoring/network.py +7 -6
  147. claude_mpm/services/infrastructure/monitoring/process.py +13 -12
  148. claude_mpm/services/infrastructure/monitoring/resources.py +7 -6
  149. claude_mpm/services/infrastructure/monitoring/service.py +16 -15
  150. claude_mpm/services/infrastructure/resume_log_generator.py +439 -0
  151. claude_mpm/services/local_ops/__init__.py +163 -0
  152. claude_mpm/services/local_ops/crash_detector.py +257 -0
  153. claude_mpm/services/local_ops/health_checks/__init__.py +28 -0
  154. claude_mpm/services/local_ops/health_checks/http_check.py +224 -0
  155. claude_mpm/services/local_ops/health_checks/process_check.py +236 -0
  156. claude_mpm/services/local_ops/health_checks/resource_check.py +255 -0
  157. claude_mpm/services/local_ops/health_manager.py +430 -0
  158. claude_mpm/services/local_ops/log_monitor.py +396 -0
  159. claude_mpm/services/local_ops/memory_leak_detector.py +294 -0
  160. claude_mpm/services/local_ops/process_manager.py +595 -0
  161. claude_mpm/services/local_ops/resource_monitor.py +331 -0
  162. claude_mpm/services/local_ops/restart_manager.py +401 -0
  163. claude_mpm/services/local_ops/restart_policy.py +387 -0
  164. claude_mpm/services/local_ops/state_manager.py +372 -0
  165. claude_mpm/services/local_ops/unified_manager.py +600 -0
  166. claude_mpm/services/mcp_config_manager.py +9 -4
  167. claude_mpm/services/mcp_gateway/core/__init__.py +1 -2
  168. claude_mpm/services/mcp_gateway/core/base.py +18 -31
  169. claude_mpm/services/mcp_gateway/tools/external_mcp_services.py +71 -24
  170. claude_mpm/services/mcp_gateway/tools/health_check_tool.py +30 -28
  171. claude_mpm/services/memory_hook_service.py +4 -1
  172. claude_mpm/services/model/__init__.py +147 -0
  173. claude_mpm/services/model/base_provider.py +365 -0
  174. claude_mpm/services/model/claude_provider.py +412 -0
  175. claude_mpm/services/model/model_router.py +453 -0
  176. claude_mpm/services/model/ollama_provider.py +415 -0
  177. claude_mpm/services/monitor/daemon_manager.py +3 -2
  178. claude_mpm/services/monitor/handlers/dashboard.py +2 -1
  179. claude_mpm/services/monitor/handlers/hooks.py +2 -1
  180. claude_mpm/services/monitor/management/lifecycle.py +3 -2
  181. claude_mpm/services/monitor/server.py +2 -1
  182. claude_mpm/services/session_management_service.py +3 -2
  183. claude_mpm/services/session_manager.py +205 -1
  184. claude_mpm/services/shared/async_service_base.py +16 -27
  185. claude_mpm/services/shared/lifecycle_service_base.py +1 -14
  186. claude_mpm/services/socketio/handlers/__init__.py +5 -2
  187. claude_mpm/services/socketio/handlers/hook.py +13 -2
  188. claude_mpm/services/socketio/handlers/registry.py +4 -2
  189. claude_mpm/services/socketio/server/main.py +10 -8
  190. claude_mpm/services/subprocess_launcher_service.py +14 -5
  191. claude_mpm/services/unified/analyzer_strategies/code_analyzer.py +8 -7
  192. claude_mpm/services/unified/analyzer_strategies/dependency_analyzer.py +6 -5
  193. claude_mpm/services/unified/analyzer_strategies/performance_analyzer.py +8 -7
  194. claude_mpm/services/unified/analyzer_strategies/security_analyzer.py +7 -6
  195. claude_mpm/services/unified/analyzer_strategies/structure_analyzer.py +5 -4
  196. claude_mpm/services/unified/config_strategies/validation_strategy.py +13 -9
  197. claude_mpm/services/unified/deployment_strategies/cloud_strategies.py +10 -3
  198. claude_mpm/services/unified/deployment_strategies/local.py +6 -5
  199. claude_mpm/services/unified/deployment_strategies/utils.py +6 -5
  200. claude_mpm/services/unified/deployment_strategies/vercel.py +7 -6
  201. claude_mpm/services/unified/interfaces.py +3 -1
  202. claude_mpm/services/unified/unified_analyzer.py +14 -10
  203. claude_mpm/services/unified/unified_config.py +2 -1
  204. claude_mpm/services/unified/unified_deployment.py +9 -4
  205. claude_mpm/services/version_service.py +104 -1
  206. claude_mpm/skills/__init__.py +21 -0
  207. claude_mpm/skills/bundled/__init__.py +6 -0
  208. claude_mpm/skills/bundled/api-documentation.md +393 -0
  209. claude_mpm/skills/bundled/async-testing.md +571 -0
  210. claude_mpm/skills/bundled/code-review.md +143 -0
  211. claude_mpm/skills/bundled/database-migration.md +199 -0
  212. claude_mpm/skills/bundled/docker-containerization.md +194 -0
  213. claude_mpm/skills/bundled/express-local-dev.md +1429 -0
  214. claude_mpm/skills/bundled/fastapi-local-dev.md +1199 -0
  215. claude_mpm/skills/bundled/git-workflow.md +414 -0
  216. claude_mpm/skills/bundled/imagemagick.md +204 -0
  217. claude_mpm/skills/bundled/json-data-handling.md +223 -0
  218. claude_mpm/skills/bundled/nextjs-local-dev.md +807 -0
  219. claude_mpm/skills/bundled/pdf.md +141 -0
  220. claude_mpm/skills/bundled/performance-profiling.md +567 -0
  221. claude_mpm/skills/bundled/refactoring-patterns.md +180 -0
  222. claude_mpm/skills/bundled/security-scanning.md +327 -0
  223. claude_mpm/skills/bundled/systematic-debugging.md +473 -0
  224. claude_mpm/skills/bundled/test-driven-development.md +378 -0
  225. claude_mpm/skills/bundled/vite-local-dev.md +1061 -0
  226. claude_mpm/skills/bundled/web-performance-optimization.md +2305 -0
  227. claude_mpm/skills/bundled/xlsx.md +157 -0
  228. claude_mpm/skills/registry.py +286 -0
  229. claude_mpm/skills/skill_manager.py +310 -0
  230. claude_mpm/tools/code_tree_analyzer.py +177 -141
  231. claude_mpm/tools/code_tree_events.py +4 -2
  232. claude_mpm/utils/agent_dependency_loader.py +2 -2
  233. {claude_mpm-4.13.2.dist-info → claude_mpm-4.18.2.dist-info}/METADATA +117 -8
  234. {claude_mpm-4.13.2.dist-info → claude_mpm-4.18.2.dist-info}/RECORD +238 -174
  235. claude_mpm/dashboard/static/css/code-tree.css +0 -1639
  236. claude_mpm/dashboard/static/js/components/code-tree/tree-breadcrumb.js +0 -353
  237. claude_mpm/dashboard/static/js/components/code-tree/tree-constants.js +0 -235
  238. claude_mpm/dashboard/static/js/components/code-tree/tree-search.js +0 -409
  239. claude_mpm/dashboard/static/js/components/code-tree/tree-utils.js +0 -435
  240. claude_mpm/dashboard/static/js/components/code-tree.js +0 -5869
  241. claude_mpm/dashboard/static/js/components/code-viewer.js +0 -1386
  242. claude_mpm/hooks/claude_hooks/hook_handler_eventbus.py +0 -425
  243. claude_mpm/hooks/claude_hooks/hook_handler_original.py +0 -1041
  244. claude_mpm/hooks/claude_hooks/hook_handler_refactored.py +0 -347
  245. claude_mpm/services/agents/deployment/agent_lifecycle_manager_refactored.py +0 -575
  246. claude_mpm/services/project/analyzer_refactored.py +0 -450
  247. {claude_mpm-4.13.2.dist-info → claude_mpm-4.18.2.dist-info}/WHEEL +0 -0
  248. {claude_mpm-4.13.2.dist-info → claude_mpm-4.18.2.dist-info}/entry_points.txt +0 -0
  249. {claude_mpm-4.13.2.dist-info → claude_mpm-4.18.2.dist-info}/licenses/LICENSE +0 -0
  250. {claude_mpm-4.13.2.dist-info → claude_mpm-4.18.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,331 @@
1
+ """
2
+ Resource Monitor for Claude MPM Framework
3
+ ==========================================
4
+
5
+ WHY: Monitors multiple resource types (file descriptors, threads, connections,
6
+ disk space) to detect resource exhaustion BEFORE it causes crashes or degradation.
7
+
8
+ DESIGN DECISION: Extends basic resource health checks with higher granularity,
9
+ percentage-based thresholds (80% of limits), and preemptive alerting.
10
+
11
+ ARCHITECTURE:
12
+ - File descriptor monitoring (Unix: ulimit -n, Windows: handle count)
13
+ - Thread count monitoring
14
+ - Network connection monitoring
15
+ - Disk space monitoring (working directory)
16
+ - 80% threshold for preemptive alerts
17
+ - Callback system for critical resource usage
18
+ - Thread-safe with proper locking
19
+
20
+ USAGE:
21
+ monitor = ResourceMonitor(
22
+ process_manager=process_manager,
23
+ fd_threshold_percent=0.8,
24
+ thread_threshold=1000,
25
+ connection_threshold=500,
26
+ disk_threshold_mb=100,
27
+ )
28
+ monitor.initialize()
29
+
30
+ # Check resources
31
+ usage = monitor.check_resources(deployment_id)
32
+ if usage.is_critical:
33
+ print(f"Critical resources: {usage.get_critical_resources()}")
34
+ """
35
+
36
+ import platform
37
+ import resource as resource_module
38
+ import shutil
39
+ import threading
40
+ from typing import Callable, Dict, List
41
+
42
+ import psutil
43
+
44
+ from claude_mpm.services.core.base import SyncBaseService
45
+ from claude_mpm.services.core.interfaces.process import ILocalProcessManager
46
+ from claude_mpm.services.core.interfaces.stability import IResourceMonitor
47
+ from claude_mpm.services.core.models.stability import ResourceUsage
48
+
49
+
50
+ class ResourceMonitor(SyncBaseService, IResourceMonitor):
51
+ """
52
+ Comprehensive resource usage monitoring service.
53
+
54
+ WHY: Provides early warning of resource exhaustion by monitoring multiple
55
+ resource types and detecting when usage approaches limits (80% threshold).
56
+
57
+ Thread Safety: All public methods are thread-safe with proper locking.
58
+ """
59
+
60
+ def __init__(
61
+ self,
62
+ process_manager: ILocalProcessManager,
63
+ fd_threshold_percent: float = 0.8,
64
+ thread_threshold: int = 1000,
65
+ connection_threshold: int = 500,
66
+ disk_threshold_mb: float = 100.0,
67
+ ):
68
+ """
69
+ Initialize resource monitor.
70
+
71
+ Args:
72
+ process_manager: Process manager for deployment lookup
73
+ fd_threshold_percent: File descriptor threshold as percent of ulimit (default: 0.8)
74
+ thread_threshold: Thread count threshold (default: 1000)
75
+ connection_threshold: Connection count threshold (default: 500)
76
+ disk_threshold_mb: Minimum free disk space in MB (default: 100)
77
+ """
78
+ super().__init__("ResourceMonitor")
79
+ self.process_manager = process_manager
80
+ self.fd_threshold_percent = fd_threshold_percent
81
+ self.thread_threshold = thread_threshold
82
+ self.connection_threshold = connection_threshold
83
+ self.disk_threshold_mb = disk_threshold_mb
84
+
85
+ # Platform detection
86
+ self.is_windows = platform.system() == "Windows"
87
+ self.is_unix = not self.is_windows
88
+
89
+ # Critical resource callbacks
90
+ self._critical_callbacks: List[Callable[[str, ResourceUsage], None]] = []
91
+
92
+ # Thread safety
93
+ self._lock = threading.Lock()
94
+
95
+ def initialize(self) -> bool:
96
+ """
97
+ Initialize the resource monitor.
98
+
99
+ Returns:
100
+ True if initialization successful
101
+ """
102
+ self._initialized = True
103
+ self.log_info(
104
+ f"Resource monitor initialized "
105
+ f"(fd_threshold={self.fd_threshold_percent*100:.0f}%, "
106
+ f"thread_threshold={self.thread_threshold}, "
107
+ f"connection_threshold={self.connection_threshold}, "
108
+ f"disk_threshold={self.disk_threshold_mb}MB)"
109
+ )
110
+ return True
111
+
112
+ def shutdown(self) -> None:
113
+ """Shutdown resource monitor and clear callbacks."""
114
+ with self._lock:
115
+ self._critical_callbacks.clear()
116
+
117
+ self._shutdown = True
118
+ self.log_info("Resource monitor shutdown complete")
119
+
120
+ def check_resources(self, deployment_id: str) -> ResourceUsage:
121
+ """
122
+ Check resource usage for a deployment.
123
+
124
+ WHY: Provides comprehensive snapshot of resource consumption across
125
+ all monitored resource types.
126
+
127
+ Args:
128
+ deployment_id: Deployment identifier
129
+
130
+ Returns:
131
+ ResourceUsage with current metrics and critical status
132
+
133
+ Raises:
134
+ ValueError: If deployment not found
135
+ """
136
+ # Validate deployment exists
137
+ deployment = self.process_manager.state_manager.get_deployment(deployment_id)
138
+ if not deployment:
139
+ raise ValueError(f"Deployment not found: {deployment_id}")
140
+
141
+ try:
142
+ process = psutil.Process(deployment.process_id)
143
+
144
+ # Initialize usage object
145
+ usage = ResourceUsage(deployment_id=deployment_id)
146
+ details = {}
147
+
148
+ # 1. Check file descriptors (Unix only)
149
+ if self.is_unix:
150
+ try:
151
+ num_fds = process.num_fds()
152
+ max_fds = self._get_max_fds()
153
+
154
+ usage.file_descriptors = num_fds
155
+ usage.max_file_descriptors = max_fds
156
+ details["fd_usage_percent"] = usage.fd_usage_percent
157
+ except (psutil.NoSuchProcess, psutil.AccessDenied, AttributeError):
158
+ pass
159
+
160
+ # 2. Check thread count
161
+ try:
162
+ num_threads = process.num_threads()
163
+ usage.threads = num_threads
164
+ details["thread_threshold"] = self.thread_threshold
165
+ except (psutil.NoSuchProcess, psutil.AccessDenied):
166
+ pass
167
+
168
+ # 3. Check connection count
169
+ try:
170
+ connections = process.net_connections()
171
+ usage.connections = len(connections)
172
+ details["connection_threshold"] = self.connection_threshold
173
+
174
+ # Add connection breakdown by state
175
+ connection_states: Dict[str, int] = {}
176
+ for conn in connections:
177
+ state = conn.status
178
+ connection_states[state] = connection_states.get(state, 0) + 1
179
+ details["connection_states"] = connection_states
180
+ except (psutil.NoSuchProcess, psutil.AccessDenied):
181
+ pass
182
+
183
+ # 4. Check disk space
184
+ try:
185
+ working_dir = deployment.working_directory
186
+ disk_usage = shutil.disk_usage(working_dir)
187
+ disk_free_mb = disk_usage.free / (1024 * 1024)
188
+ usage.disk_free_mb = disk_free_mb
189
+ details["disk_threshold_mb"] = self.disk_threshold_mb
190
+ except (OSError, FileNotFoundError):
191
+ pass
192
+
193
+ # Store thresholds in details
194
+ details["fd_threshold_percent"] = self.fd_threshold_percent
195
+ details["thread_threshold"] = self.thread_threshold
196
+ details["connection_threshold"] = self.connection_threshold
197
+ details["disk_threshold_mb"] = self.disk_threshold_mb
198
+ usage.details = details
199
+
200
+ # 5. Determine if any resource is critical
201
+ is_critical = self._check_critical(usage)
202
+ usage.is_critical = is_critical
203
+
204
+ # Trigger callbacks if critical
205
+ if is_critical:
206
+ critical_resources = usage.get_critical_resources()
207
+ self.log_warning(
208
+ f"Critical resource usage for {deployment_id}: "
209
+ f"{', '.join(critical_resources)}"
210
+ )
211
+ self._trigger_critical_callbacks(deployment_id, usage)
212
+
213
+ return usage
214
+
215
+ except psutil.NoSuchProcess:
216
+ # Process does not exist
217
+ return ResourceUsage(
218
+ deployment_id=deployment_id,
219
+ is_critical=True,
220
+ details={"error": "Process no longer exists"},
221
+ )
222
+
223
+ except psutil.AccessDenied as e:
224
+ # Cannot access process information
225
+ return ResourceUsage(
226
+ deployment_id=deployment_id,
227
+ is_critical=False,
228
+ details={"error": f"Access denied: {e}"},
229
+ )
230
+
231
+ except Exception as e:
232
+ # Unexpected error
233
+ self.log_error(f"Unexpected error checking resources: {e}")
234
+ return ResourceUsage(
235
+ deployment_id=deployment_id,
236
+ is_critical=False,
237
+ details={"error": str(e)},
238
+ )
239
+
240
+ def is_critical(self, deployment_id: str) -> bool:
241
+ """
242
+ Check if any resource is at critical threshold (>80%).
243
+
244
+ Returns:
245
+ True if any resource exceeds 80% of limit
246
+ """
247
+ try:
248
+ usage = self.check_resources(deployment_id)
249
+ return usage.is_critical
250
+ except ValueError:
251
+ return False
252
+
253
+ def register_critical_callback(
254
+ self, callback: Callable[[str, ResourceUsage], None]
255
+ ) -> None:
256
+ """
257
+ Register callback for critical resource usage.
258
+
259
+ Args:
260
+ callback: Function called with (deployment_id, usage) when critical
261
+ """
262
+ with self._lock:
263
+ self._critical_callbacks.append(callback)
264
+ self.log_debug(f"Registered critical callback: {callback.__name__}")
265
+
266
+ def _check_critical(self, usage: ResourceUsage) -> bool:
267
+ """
268
+ Check if resource usage is at critical levels.
269
+
270
+ Args:
271
+ usage: ResourceUsage to check
272
+
273
+ Returns:
274
+ True if any resource is critical (>80% threshold)
275
+ """
276
+ # Check file descriptors
277
+ if usage.max_file_descriptors > 0:
278
+ fd_percent = usage.fd_usage_percent / 100.0
279
+ if fd_percent > self.fd_threshold_percent:
280
+ return True
281
+
282
+ # Check threads
283
+ if usage.threads > self.thread_threshold * self.fd_threshold_percent:
284
+ return True
285
+
286
+ # Check connections
287
+ if usage.connections > self.connection_threshold * self.fd_threshold_percent:
288
+ return True
289
+
290
+ # Check disk space
291
+ if usage.disk_free_mb < self.disk_threshold_mb:
292
+ return True
293
+
294
+ return False
295
+
296
+ def _get_max_fds(self) -> int:
297
+ """
298
+ Get maximum file descriptors allowed (ulimit -n).
299
+
300
+ Returns:
301
+ Maximum file descriptors, or 0 if cannot determine
302
+ """
303
+ if not self.is_unix:
304
+ return 0
305
+
306
+ try:
307
+ soft_limit, hard_limit = resource_module.getrlimit(
308
+ resource_module.RLIMIT_NOFILE
309
+ )
310
+ return soft_limit
311
+ except (ValueError, OSError):
312
+ return 0
313
+
314
+ def _trigger_critical_callbacks(
315
+ self, deployment_id: str, usage: ResourceUsage
316
+ ) -> None:
317
+ """
318
+ Trigger registered callbacks for critical resource usage.
319
+
320
+ Args:
321
+ deployment_id: Deployment with critical usage
322
+ usage: ResourceUsage with critical metrics
323
+ """
324
+ for callback in self._critical_callbacks:
325
+ try:
326
+ callback(deployment_id, usage)
327
+ except Exception as e:
328
+ self.log_error(f"Error in critical callback {callback.__name__}: {e}")
329
+
330
+
331
+ __all__ = ["ResourceMonitor"]