claude-mpm 4.13.2__py3-none-any.whl → 4.18.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (250) hide show
  1. claude_mpm/VERSION +1 -1
  2. claude_mpm/agents/BASE_ENGINEER.md +286 -0
  3. claude_mpm/agents/BASE_PM.md +48 -17
  4. claude_mpm/agents/OUTPUT_STYLE.md +329 -11
  5. claude_mpm/agents/PM_INSTRUCTIONS.md +227 -8
  6. claude_mpm/agents/agent_loader.py +17 -5
  7. claude_mpm/agents/frontmatter_validator.py +284 -253
  8. claude_mpm/agents/templates/agentic-coder-optimizer.json +9 -2
  9. claude_mpm/agents/templates/api_qa.json +7 -1
  10. claude_mpm/agents/templates/clerk-ops.json +8 -1
  11. claude_mpm/agents/templates/code_analyzer.json +4 -1
  12. claude_mpm/agents/templates/dart_engineer.json +11 -1
  13. claude_mpm/agents/templates/data_engineer.json +11 -1
  14. claude_mpm/agents/templates/documentation.json +6 -1
  15. claude_mpm/agents/templates/engineer.json +18 -1
  16. claude_mpm/agents/templates/gcp_ops_agent.json +8 -1
  17. claude_mpm/agents/templates/golang_engineer.json +11 -1
  18. claude_mpm/agents/templates/java_engineer.json +12 -2
  19. claude_mpm/agents/templates/local_ops_agent.json +1217 -6
  20. claude_mpm/agents/templates/nextjs_engineer.json +11 -1
  21. claude_mpm/agents/templates/ops.json +8 -1
  22. claude_mpm/agents/templates/php-engineer.json +11 -1
  23. claude_mpm/agents/templates/project_organizer.json +10 -3
  24. claude_mpm/agents/templates/prompt-engineer.json +5 -1
  25. claude_mpm/agents/templates/python_engineer.json +11 -1
  26. claude_mpm/agents/templates/qa.json +7 -1
  27. claude_mpm/agents/templates/react_engineer.json +11 -1
  28. claude_mpm/agents/templates/refactoring_engineer.json +8 -1
  29. claude_mpm/agents/templates/research.json +4 -1
  30. claude_mpm/agents/templates/ruby-engineer.json +11 -1
  31. claude_mpm/agents/templates/rust_engineer.json +11 -1
  32. claude_mpm/agents/templates/security.json +6 -1
  33. claude_mpm/agents/templates/svelte-engineer.json +225 -0
  34. claude_mpm/agents/templates/ticketing.json +6 -1
  35. claude_mpm/agents/templates/typescript_engineer.json +11 -1
  36. claude_mpm/agents/templates/vercel_ops_agent.json +8 -1
  37. claude_mpm/agents/templates/version_control.json +8 -1
  38. claude_mpm/agents/templates/web_qa.json +7 -1
  39. claude_mpm/agents/templates/web_ui.json +11 -1
  40. claude_mpm/cli/__init__.py +34 -706
  41. claude_mpm/cli/commands/agent_manager.py +25 -12
  42. claude_mpm/cli/commands/agent_state_manager.py +186 -0
  43. claude_mpm/cli/commands/agents.py +204 -148
  44. claude_mpm/cli/commands/aggregate.py +7 -3
  45. claude_mpm/cli/commands/analyze.py +9 -4
  46. claude_mpm/cli/commands/analyze_code.py +7 -2
  47. claude_mpm/cli/commands/auto_configure.py +7 -9
  48. claude_mpm/cli/commands/config.py +47 -13
  49. claude_mpm/cli/commands/configure.py +294 -1788
  50. claude_mpm/cli/commands/configure_agent_display.py +261 -0
  51. claude_mpm/cli/commands/configure_behavior_manager.py +204 -0
  52. claude_mpm/cli/commands/configure_hook_manager.py +225 -0
  53. claude_mpm/cli/commands/configure_models.py +18 -0
  54. claude_mpm/cli/commands/configure_navigation.py +167 -0
  55. claude_mpm/cli/commands/configure_paths.py +104 -0
  56. claude_mpm/cli/commands/configure_persistence.py +254 -0
  57. claude_mpm/cli/commands/configure_startup_manager.py +646 -0
  58. claude_mpm/cli/commands/configure_template_editor.py +497 -0
  59. claude_mpm/cli/commands/configure_validators.py +73 -0
  60. claude_mpm/cli/commands/local_deploy.py +537 -0
  61. claude_mpm/cli/commands/memory.py +54 -20
  62. claude_mpm/cli/commands/mpm_init.py +39 -25
  63. claude_mpm/cli/commands/mpm_init_handler.py +8 -3
  64. claude_mpm/cli/executor.py +202 -0
  65. claude_mpm/cli/helpers.py +105 -0
  66. claude_mpm/cli/interactive/__init__.py +3 -0
  67. claude_mpm/cli/interactive/skills_wizard.py +491 -0
  68. claude_mpm/cli/parsers/__init__.py +7 -1
  69. claude_mpm/cli/parsers/base_parser.py +98 -3
  70. claude_mpm/cli/parsers/local_deploy_parser.py +227 -0
  71. claude_mpm/cli/shared/output_formatters.py +28 -19
  72. claude_mpm/cli/startup.py +481 -0
  73. claude_mpm/cli/utils.py +52 -1
  74. claude_mpm/commands/mpm-help.md +3 -0
  75. claude_mpm/commands/mpm-version.md +113 -0
  76. claude_mpm/commands/mpm.md +1 -0
  77. claude_mpm/config/agent_config.py +2 -2
  78. claude_mpm/config/model_config.py +428 -0
  79. claude_mpm/core/base_service.py +13 -12
  80. claude_mpm/core/enums.py +452 -0
  81. claude_mpm/core/factories.py +1 -1
  82. claude_mpm/core/instruction_reinforcement_hook.py +2 -1
  83. claude_mpm/core/interactive_session.py +9 -3
  84. claude_mpm/core/logging_config.py +6 -2
  85. claude_mpm/core/oneshot_session.py +8 -4
  86. claude_mpm/core/optimized_agent_loader.py +3 -3
  87. claude_mpm/core/output_style_manager.py +12 -192
  88. claude_mpm/core/service_registry.py +5 -1
  89. claude_mpm/core/types.py +2 -9
  90. claude_mpm/core/typing_utils.py +7 -6
  91. claude_mpm/dashboard/static/js/dashboard.js +0 -14
  92. claude_mpm/dashboard/templates/index.html +3 -41
  93. claude_mpm/hooks/claude_hooks/response_tracking.py +35 -1
  94. claude_mpm/hooks/instruction_reinforcement.py +7 -2
  95. claude_mpm/models/resume_log.py +340 -0
  96. claude_mpm/services/agents/auto_config_manager.py +10 -11
  97. claude_mpm/services/agents/deployment/agent_configuration_manager.py +1 -1
  98. claude_mpm/services/agents/deployment/agent_record_service.py +1 -1
  99. claude_mpm/services/agents/deployment/agent_validator.py +17 -1
  100. claude_mpm/services/agents/deployment/async_agent_deployment.py +1 -1
  101. claude_mpm/services/agents/deployment/interface_adapter.py +3 -2
  102. claude_mpm/services/agents/deployment/local_template_deployment.py +1 -1
  103. claude_mpm/services/agents/deployment/pipeline/steps/agent_processing_step.py +7 -6
  104. claude_mpm/services/agents/deployment/pipeline/steps/base_step.py +7 -16
  105. claude_mpm/services/agents/deployment/pipeline/steps/configuration_step.py +4 -3
  106. claude_mpm/services/agents/deployment/pipeline/steps/target_directory_step.py +5 -3
  107. claude_mpm/services/agents/deployment/pipeline/steps/validation_step.py +6 -5
  108. claude_mpm/services/agents/deployment/refactored_agent_deployment_service.py +9 -6
  109. claude_mpm/services/agents/deployment/validation/__init__.py +3 -1
  110. claude_mpm/services/agents/deployment/validation/validation_result.py +1 -9
  111. claude_mpm/services/agents/local_template_manager.py +1 -1
  112. claude_mpm/services/agents/memory/agent_memory_manager.py +5 -2
  113. claude_mpm/services/agents/registry/modification_tracker.py +5 -2
  114. claude_mpm/services/command_handler_service.py +11 -5
  115. claude_mpm/services/core/interfaces/__init__.py +74 -2
  116. claude_mpm/services/core/interfaces/health.py +172 -0
  117. claude_mpm/services/core/interfaces/model.py +281 -0
  118. claude_mpm/services/core/interfaces/process.py +372 -0
  119. claude_mpm/services/core/interfaces/restart.py +307 -0
  120. claude_mpm/services/core/interfaces/stability.py +260 -0
  121. claude_mpm/services/core/models/__init__.py +33 -0
  122. claude_mpm/services/core/models/agent_config.py +12 -28
  123. claude_mpm/services/core/models/health.py +162 -0
  124. claude_mpm/services/core/models/process.py +235 -0
  125. claude_mpm/services/core/models/restart.py +302 -0
  126. claude_mpm/services/core/models/stability.py +264 -0
  127. claude_mpm/services/core/path_resolver.py +23 -7
  128. claude_mpm/services/diagnostics/__init__.py +2 -2
  129. claude_mpm/services/diagnostics/checks/agent_check.py +25 -24
  130. claude_mpm/services/diagnostics/checks/claude_code_check.py +24 -23
  131. claude_mpm/services/diagnostics/checks/common_issues_check.py +25 -24
  132. claude_mpm/services/diagnostics/checks/configuration_check.py +24 -23
  133. claude_mpm/services/diagnostics/checks/filesystem_check.py +18 -17
  134. claude_mpm/services/diagnostics/checks/installation_check.py +30 -29
  135. claude_mpm/services/diagnostics/checks/instructions_check.py +20 -19
  136. claude_mpm/services/diagnostics/checks/mcp_check.py +50 -36
  137. claude_mpm/services/diagnostics/checks/mcp_services_check.py +36 -31
  138. claude_mpm/services/diagnostics/checks/monitor_check.py +23 -22
  139. claude_mpm/services/diagnostics/checks/startup_log_check.py +9 -8
  140. claude_mpm/services/diagnostics/diagnostic_runner.py +6 -5
  141. claude_mpm/services/diagnostics/doctor_reporter.py +28 -25
  142. claude_mpm/services/diagnostics/models.py +19 -24
  143. claude_mpm/services/infrastructure/monitoring/__init__.py +1 -1
  144. claude_mpm/services/infrastructure/monitoring/aggregator.py +12 -12
  145. claude_mpm/services/infrastructure/monitoring/base.py +5 -13
  146. claude_mpm/services/infrastructure/monitoring/network.py +7 -6
  147. claude_mpm/services/infrastructure/monitoring/process.py +13 -12
  148. claude_mpm/services/infrastructure/monitoring/resources.py +7 -6
  149. claude_mpm/services/infrastructure/monitoring/service.py +16 -15
  150. claude_mpm/services/infrastructure/resume_log_generator.py +439 -0
  151. claude_mpm/services/local_ops/__init__.py +163 -0
  152. claude_mpm/services/local_ops/crash_detector.py +257 -0
  153. claude_mpm/services/local_ops/health_checks/__init__.py +28 -0
  154. claude_mpm/services/local_ops/health_checks/http_check.py +224 -0
  155. claude_mpm/services/local_ops/health_checks/process_check.py +236 -0
  156. claude_mpm/services/local_ops/health_checks/resource_check.py +255 -0
  157. claude_mpm/services/local_ops/health_manager.py +430 -0
  158. claude_mpm/services/local_ops/log_monitor.py +396 -0
  159. claude_mpm/services/local_ops/memory_leak_detector.py +294 -0
  160. claude_mpm/services/local_ops/process_manager.py +595 -0
  161. claude_mpm/services/local_ops/resource_monitor.py +331 -0
  162. claude_mpm/services/local_ops/restart_manager.py +401 -0
  163. claude_mpm/services/local_ops/restart_policy.py +387 -0
  164. claude_mpm/services/local_ops/state_manager.py +372 -0
  165. claude_mpm/services/local_ops/unified_manager.py +600 -0
  166. claude_mpm/services/mcp_config_manager.py +9 -4
  167. claude_mpm/services/mcp_gateway/core/__init__.py +1 -2
  168. claude_mpm/services/mcp_gateway/core/base.py +18 -31
  169. claude_mpm/services/mcp_gateway/tools/external_mcp_services.py +71 -24
  170. claude_mpm/services/mcp_gateway/tools/health_check_tool.py +30 -28
  171. claude_mpm/services/memory_hook_service.py +4 -1
  172. claude_mpm/services/model/__init__.py +147 -0
  173. claude_mpm/services/model/base_provider.py +365 -0
  174. claude_mpm/services/model/claude_provider.py +412 -0
  175. claude_mpm/services/model/model_router.py +453 -0
  176. claude_mpm/services/model/ollama_provider.py +415 -0
  177. claude_mpm/services/monitor/daemon_manager.py +3 -2
  178. claude_mpm/services/monitor/handlers/dashboard.py +2 -1
  179. claude_mpm/services/monitor/handlers/hooks.py +2 -1
  180. claude_mpm/services/monitor/management/lifecycle.py +3 -2
  181. claude_mpm/services/monitor/server.py +2 -1
  182. claude_mpm/services/session_management_service.py +3 -2
  183. claude_mpm/services/session_manager.py +205 -1
  184. claude_mpm/services/shared/async_service_base.py +16 -27
  185. claude_mpm/services/shared/lifecycle_service_base.py +1 -14
  186. claude_mpm/services/socketio/handlers/__init__.py +5 -2
  187. claude_mpm/services/socketio/handlers/hook.py +13 -2
  188. claude_mpm/services/socketio/handlers/registry.py +4 -2
  189. claude_mpm/services/socketio/server/main.py +10 -8
  190. claude_mpm/services/subprocess_launcher_service.py +14 -5
  191. claude_mpm/services/unified/analyzer_strategies/code_analyzer.py +8 -7
  192. claude_mpm/services/unified/analyzer_strategies/dependency_analyzer.py +6 -5
  193. claude_mpm/services/unified/analyzer_strategies/performance_analyzer.py +8 -7
  194. claude_mpm/services/unified/analyzer_strategies/security_analyzer.py +7 -6
  195. claude_mpm/services/unified/analyzer_strategies/structure_analyzer.py +5 -4
  196. claude_mpm/services/unified/config_strategies/validation_strategy.py +13 -9
  197. claude_mpm/services/unified/deployment_strategies/cloud_strategies.py +10 -3
  198. claude_mpm/services/unified/deployment_strategies/local.py +6 -5
  199. claude_mpm/services/unified/deployment_strategies/utils.py +6 -5
  200. claude_mpm/services/unified/deployment_strategies/vercel.py +7 -6
  201. claude_mpm/services/unified/interfaces.py +3 -1
  202. claude_mpm/services/unified/unified_analyzer.py +14 -10
  203. claude_mpm/services/unified/unified_config.py +2 -1
  204. claude_mpm/services/unified/unified_deployment.py +9 -4
  205. claude_mpm/services/version_service.py +104 -1
  206. claude_mpm/skills/__init__.py +21 -0
  207. claude_mpm/skills/bundled/__init__.py +6 -0
  208. claude_mpm/skills/bundled/api-documentation.md +393 -0
  209. claude_mpm/skills/bundled/async-testing.md +571 -0
  210. claude_mpm/skills/bundled/code-review.md +143 -0
  211. claude_mpm/skills/bundled/database-migration.md +199 -0
  212. claude_mpm/skills/bundled/docker-containerization.md +194 -0
  213. claude_mpm/skills/bundled/express-local-dev.md +1429 -0
  214. claude_mpm/skills/bundled/fastapi-local-dev.md +1199 -0
  215. claude_mpm/skills/bundled/git-workflow.md +414 -0
  216. claude_mpm/skills/bundled/imagemagick.md +204 -0
  217. claude_mpm/skills/bundled/json-data-handling.md +223 -0
  218. claude_mpm/skills/bundled/nextjs-local-dev.md +807 -0
  219. claude_mpm/skills/bundled/pdf.md +141 -0
  220. claude_mpm/skills/bundled/performance-profiling.md +567 -0
  221. claude_mpm/skills/bundled/refactoring-patterns.md +180 -0
  222. claude_mpm/skills/bundled/security-scanning.md +327 -0
  223. claude_mpm/skills/bundled/systematic-debugging.md +473 -0
  224. claude_mpm/skills/bundled/test-driven-development.md +378 -0
  225. claude_mpm/skills/bundled/vite-local-dev.md +1061 -0
  226. claude_mpm/skills/bundled/web-performance-optimization.md +2305 -0
  227. claude_mpm/skills/bundled/xlsx.md +157 -0
  228. claude_mpm/skills/registry.py +286 -0
  229. claude_mpm/skills/skill_manager.py +310 -0
  230. claude_mpm/tools/code_tree_analyzer.py +177 -141
  231. claude_mpm/tools/code_tree_events.py +4 -2
  232. claude_mpm/utils/agent_dependency_loader.py +2 -2
  233. {claude_mpm-4.13.2.dist-info → claude_mpm-4.18.2.dist-info}/METADATA +117 -8
  234. {claude_mpm-4.13.2.dist-info → claude_mpm-4.18.2.dist-info}/RECORD +238 -174
  235. claude_mpm/dashboard/static/css/code-tree.css +0 -1639
  236. claude_mpm/dashboard/static/js/components/code-tree/tree-breadcrumb.js +0 -353
  237. claude_mpm/dashboard/static/js/components/code-tree/tree-constants.js +0 -235
  238. claude_mpm/dashboard/static/js/components/code-tree/tree-search.js +0 -409
  239. claude_mpm/dashboard/static/js/components/code-tree/tree-utils.js +0 -435
  240. claude_mpm/dashboard/static/js/components/code-tree.js +0 -5869
  241. claude_mpm/dashboard/static/js/components/code-viewer.js +0 -1386
  242. claude_mpm/hooks/claude_hooks/hook_handler_eventbus.py +0 -425
  243. claude_mpm/hooks/claude_hooks/hook_handler_original.py +0 -1041
  244. claude_mpm/hooks/claude_hooks/hook_handler_refactored.py +0 -347
  245. claude_mpm/services/agents/deployment/agent_lifecycle_manager_refactored.py +0 -575
  246. claude_mpm/services/project/analyzer_refactored.py +0 -450
  247. {claude_mpm-4.13.2.dist-info → claude_mpm-4.18.2.dist-info}/WHEEL +0 -0
  248. {claude_mpm-4.13.2.dist-info → claude_mpm-4.18.2.dist-info}/entry_points.txt +0 -0
  249. {claude_mpm-4.13.2.dist-info → claude_mpm-4.18.2.dist-info}/licenses/LICENSE +0 -0
  250. {claude_mpm-4.13.2.dist-info → claude_mpm-4.18.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,236 @@
1
+ """
2
+ Process Health Check for Claude MPM Framework
3
+ ==============================================
4
+
5
+ WHY: Provides process-level health monitoring including existence validation,
6
+ status checking (running/zombie/stopped), and exit code detection.
7
+
8
+ DESIGN DECISION: Uses psutil for cross-platform process monitoring. Validates
9
+ process existence, status, and parent-child relationships.
10
+
11
+ ARCHITECTURE:
12
+ - Process existence verification with psutil.Process(pid)
13
+ - Process status checking (running, zombie, stopped, sleeping)
14
+ - Exit code detection for dead processes
15
+ - Parent-child relationship validation
16
+ - Process responsiveness checking (not hung)
17
+
18
+ USAGE:
19
+ process_check = ProcessHealthCheck(process_manager)
20
+ result = process_check.check(deployment_id="my-app")
21
+ """
22
+
23
+ import psutil
24
+
25
+ from claude_mpm.core.enums import HealthStatus
26
+ from claude_mpm.services.core.base import SyncBaseService
27
+ from claude_mpm.services.core.interfaces.health import IHealthCheck
28
+ from claude_mpm.services.core.interfaces.process import ILocalProcessManager
29
+ from claude_mpm.services.core.models.health import HealthCheckResult
30
+
31
+
32
+ class ProcessHealthCheck(SyncBaseService, IHealthCheck):
33
+ """
34
+ Process status health check implementation.
35
+
36
+ WHY: Validates that the process is running properly and not in a
37
+ degraded state (zombie, stopped, etc.).
38
+
39
+ Thread Safety: Stateless, safe for concurrent execution.
40
+ """
41
+
42
+ def __init__(self, process_manager: ILocalProcessManager):
43
+ """
44
+ Initialize process health check.
45
+
46
+ Args:
47
+ process_manager: Process manager for deployment lookup
48
+ """
49
+ super().__init__("ProcessHealthCheck")
50
+ self.process_manager = process_manager
51
+
52
+ def initialize(self) -> bool:
53
+ """
54
+ Initialize the health check.
55
+
56
+ Returns:
57
+ True if initialization successful
58
+ """
59
+ self._initialized = True
60
+ self.log_info("Process health check initialized")
61
+ return True
62
+
63
+ def shutdown(self) -> None:
64
+ """Shutdown health check (no resources to clean up)."""
65
+ self._shutdown = True
66
+
67
+ def get_check_type(self) -> str:
68
+ """Get the check type identifier."""
69
+ return "process"
70
+
71
+ def check(self, deployment_id: str, **kwargs) -> HealthCheckResult:
72
+ """
73
+ Execute process health check for a deployment.
74
+
75
+ Args:
76
+ deployment_id: Unique deployment identifier
77
+ **kwargs: Optional parameters:
78
+ - check_responsiveness: Check if process is responsive (default: True)
79
+
80
+ Returns:
81
+ HealthCheckResult with check status and details
82
+
83
+ Raises:
84
+ ValueError: If deployment_id not found
85
+ """
86
+ # Validate deployment exists
87
+ deployment = self.process_manager.state_manager.get_deployment(deployment_id)
88
+ if not deployment:
89
+ raise ValueError(f"Deployment not found: {deployment_id}")
90
+
91
+ check_responsiveness = kwargs.get("check_responsiveness", True)
92
+
93
+ try:
94
+ process = psutil.Process(deployment.process_id)
95
+
96
+ # Check if process exists and is running
97
+ if not process.is_running():
98
+ return HealthCheckResult(
99
+ status=HealthStatus.UNHEALTHY,
100
+ check_type=self.get_check_type(),
101
+ message="Process is not running",
102
+ details={
103
+ "pid": deployment.process_id,
104
+ "deployment_id": deployment_id,
105
+ },
106
+ )
107
+
108
+ # Get process status
109
+ process_status = process.status()
110
+
111
+ # Check for zombie process
112
+ if process_status == psutil.STATUS_ZOMBIE:
113
+ return HealthCheckResult(
114
+ status=HealthStatus.UNHEALTHY,
115
+ check_type=self.get_check_type(),
116
+ message="Process is a zombie",
117
+ details={
118
+ "pid": deployment.process_id,
119
+ "status": process_status,
120
+ "deployment_id": deployment_id,
121
+ },
122
+ )
123
+
124
+ # Check for stopped process
125
+ if process_status in (psutil.STATUS_STOPPED, psutil.STATUS_DEAD):
126
+ return HealthCheckResult(
127
+ status=HealthStatus.UNHEALTHY,
128
+ check_type=self.get_check_type(),
129
+ message=f"Process is {process_status}",
130
+ details={
131
+ "pid": deployment.process_id,
132
+ "status": process_status,
133
+ "deployment_id": deployment_id,
134
+ },
135
+ )
136
+
137
+ # Check responsiveness (CPU activity)
138
+ if check_responsiveness:
139
+ try:
140
+ cpu_percent = process.cpu_percent(interval=0.1)
141
+ # Process should have some CPU activity or be idle/sleeping
142
+ # A process with 0% CPU for extended periods might be hung
143
+ is_responsive = cpu_percent > 0 or process_status in (
144
+ psutil.STATUS_SLEEPING,
145
+ psutil.STATUS_IDLE,
146
+ )
147
+
148
+ if not is_responsive:
149
+ return HealthCheckResult(
150
+ status=HealthStatus.DEGRADED,
151
+ check_type=self.get_check_type(),
152
+ message="Process may be unresponsive",
153
+ details={
154
+ "pid": deployment.process_id,
155
+ "status": process_status,
156
+ "cpu_percent": cpu_percent,
157
+ "deployment_id": deployment_id,
158
+ },
159
+ )
160
+ except (psutil.NoSuchProcess, psutil.AccessDenied):
161
+ # Process disappeared or we lost access during check
162
+ return HealthCheckResult(
163
+ status=HealthStatus.UNHEALTHY,
164
+ check_type=self.get_check_type(),
165
+ message="Process disappeared during check",
166
+ details={
167
+ "pid": deployment.process_id,
168
+ "deployment_id": deployment_id,
169
+ },
170
+ )
171
+
172
+ # Process is healthy
173
+ # Get additional process info for details
174
+ try:
175
+ process_info = {
176
+ "pid": deployment.process_id,
177
+ "status": process_status,
178
+ "deployment_id": deployment_id,
179
+ "name": process.name(),
180
+ "num_threads": process.num_threads(),
181
+ }
182
+ except (psutil.NoSuchProcess, psutil.AccessDenied):
183
+ process_info = {
184
+ "pid": deployment.process_id,
185
+ "status": process_status,
186
+ "deployment_id": deployment_id,
187
+ }
188
+
189
+ return HealthCheckResult(
190
+ status=HealthStatus.HEALTHY,
191
+ check_type=self.get_check_type(),
192
+ message="Process is running normally",
193
+ details=process_info,
194
+ )
195
+
196
+ except psutil.NoSuchProcess:
197
+ # Process does not exist
198
+ return HealthCheckResult(
199
+ status=HealthStatus.UNHEALTHY,
200
+ check_type=self.get_check_type(),
201
+ message="Process no longer exists",
202
+ details={
203
+ "pid": deployment.process_id,
204
+ "deployment_id": deployment_id,
205
+ },
206
+ )
207
+
208
+ except psutil.AccessDenied as e:
209
+ # Cannot access process information
210
+ return HealthCheckResult(
211
+ status=HealthStatus.UNKNOWN,
212
+ check_type=self.get_check_type(),
213
+ message="Cannot access process information",
214
+ details={
215
+ "pid": deployment.process_id,
216
+ "deployment_id": deployment_id,
217
+ "error": str(e),
218
+ },
219
+ )
220
+
221
+ except Exception as e:
222
+ # Unexpected error
223
+ self.log_error(f"Unexpected error in process health check: {e}")
224
+ return HealthCheckResult(
225
+ status=HealthStatus.UNKNOWN,
226
+ check_type=self.get_check_type(),
227
+ message="Health check failed with error",
228
+ details={
229
+ "pid": deployment.process_id,
230
+ "deployment_id": deployment_id,
231
+ "error": str(e),
232
+ },
233
+ )
234
+
235
+
236
+ __all__ = ["ProcessHealthCheck"]
@@ -0,0 +1,255 @@
1
+ """
2
+ Resource Health Check for Claude MPM Framework
3
+ ===============================================
4
+
5
+ WHY: Provides resource usage monitoring including CPU, memory, file descriptors,
6
+ threads, and network connections to detect resource exhaustion issues.
7
+
8
+ DESIGN DECISION: Uses psutil for cross-platform resource monitoring with
9
+ configurable thresholds for different resource types.
10
+
11
+ ARCHITECTURE:
12
+ - CPU usage monitoring (threshold: 80%)
13
+ - Memory usage monitoring (threshold: 500MB)
14
+ - File descriptor count (threshold: 1000, Unix only)
15
+ - Thread count monitoring
16
+ - Network connection count (open sockets)
17
+
18
+ USAGE:
19
+ resource_check = ResourceHealthCheck(process_manager)
20
+ result = resource_check.check(
21
+ deployment_id="my-app",
22
+ cpu_threshold=80.0,
23
+ memory_threshold_mb=500.0
24
+ )
25
+ """
26
+
27
+ import platform
28
+
29
+ import psutil
30
+
31
+ from claude_mpm.core.enums import HealthStatus
32
+ from claude_mpm.services.core.base import SyncBaseService
33
+ from claude_mpm.services.core.interfaces.health import IHealthCheck
34
+ from claude_mpm.services.core.interfaces.process import ILocalProcessManager
35
+ from claude_mpm.services.core.models.health import HealthCheckResult
36
+
37
+
38
+ class ResourceHealthCheck(SyncBaseService, IHealthCheck):
39
+ """
40
+ Resource usage health check implementation.
41
+
42
+ WHY: Monitors resource consumption to detect issues before they
43
+ cause service degradation or failures.
44
+
45
+ Thread Safety: Stateless, safe for concurrent execution.
46
+ """
47
+
48
+ # Default thresholds
49
+ DEFAULT_CPU_THRESHOLD = 80.0 # Percentage
50
+ DEFAULT_MEMORY_THRESHOLD_MB = 500.0 # Megabytes
51
+ DEFAULT_FD_THRESHOLD = 1000 # File descriptors (Unix only)
52
+ DEFAULT_THREAD_THRESHOLD = 100 # Threads
53
+
54
+ def __init__(self, process_manager: ILocalProcessManager):
55
+ """
56
+ Initialize resource health check.
57
+
58
+ Args:
59
+ process_manager: Process manager for deployment lookup
60
+ """
61
+ super().__init__("ResourceHealthCheck")
62
+ self.process_manager = process_manager
63
+ self.is_windows = platform.system() == "Windows"
64
+
65
+ def initialize(self) -> bool:
66
+ """
67
+ Initialize the health check.
68
+
69
+ Returns:
70
+ True if initialization successful
71
+ """
72
+ self._initialized = True
73
+ self.log_info("Resource health check initialized")
74
+ return True
75
+
76
+ def shutdown(self) -> None:
77
+ """Shutdown health check (no resources to clean up)."""
78
+ self._shutdown = True
79
+
80
+ def get_check_type(self) -> str:
81
+ """Get the check type identifier."""
82
+ return "resource"
83
+
84
+ def check(self, deployment_id: str, **kwargs) -> HealthCheckResult:
85
+ """
86
+ Execute resource health check for a deployment.
87
+
88
+ Args:
89
+ deployment_id: Unique deployment identifier
90
+ **kwargs: Optional parameters:
91
+ - cpu_threshold: CPU usage threshold percentage (default: 80.0)
92
+ - memory_threshold_mb: Memory usage threshold in MB (default: 500.0)
93
+ - fd_threshold: File descriptor threshold (default: 1000, Unix only)
94
+ - thread_threshold: Thread count threshold (default: 100)
95
+
96
+ Returns:
97
+ HealthCheckResult with check status and details
98
+
99
+ Raises:
100
+ ValueError: If deployment_id not found
101
+ """
102
+ # Validate deployment exists
103
+ deployment = self.process_manager.state_manager.get_deployment(deployment_id)
104
+ if not deployment:
105
+ raise ValueError(f"Deployment not found: {deployment_id}")
106
+
107
+ # Get thresholds from kwargs
108
+ cpu_threshold = kwargs.get("cpu_threshold", self.DEFAULT_CPU_THRESHOLD)
109
+ memory_threshold_mb = kwargs.get(
110
+ "memory_threshold_mb", self.DEFAULT_MEMORY_THRESHOLD_MB
111
+ )
112
+ fd_threshold = kwargs.get("fd_threshold", self.DEFAULT_FD_THRESHOLD)
113
+ thread_threshold = kwargs.get("thread_threshold", self.DEFAULT_THREAD_THRESHOLD)
114
+
115
+ try:
116
+ process = psutil.Process(deployment.process_id)
117
+
118
+ # Collect resource metrics
119
+ details = {
120
+ "pid": deployment.process_id,
121
+ "deployment_id": deployment_id,
122
+ }
123
+
124
+ issues = []
125
+
126
+ # Check CPU usage
127
+ try:
128
+ cpu_percent = process.cpu_percent(interval=0.1)
129
+ details["cpu_percent"] = round(cpu_percent, 2)
130
+ details["cpu_threshold"] = cpu_threshold
131
+
132
+ if cpu_percent > cpu_threshold:
133
+ issues.append(
134
+ f"High CPU usage: {cpu_percent:.1f}% (threshold: {cpu_threshold}%)"
135
+ )
136
+ except (psutil.NoSuchProcess, psutil.AccessDenied):
137
+ pass
138
+
139
+ # Check memory usage
140
+ try:
141
+ memory_info = process.memory_info()
142
+ memory_mb = memory_info.rss / (1024 * 1024)
143
+ details["memory_mb"] = round(memory_mb, 2)
144
+ details["memory_threshold_mb"] = memory_threshold_mb
145
+
146
+ if memory_mb > memory_threshold_mb:
147
+ issues.append(
148
+ f"High memory usage: {memory_mb:.1f}MB (threshold: {memory_threshold_mb}MB)"
149
+ )
150
+ except (psutil.NoSuchProcess, psutil.AccessDenied):
151
+ pass
152
+
153
+ # Check file descriptors (Unix only)
154
+ if not self.is_windows:
155
+ try:
156
+ num_fds = process.num_fds()
157
+ details["num_fds"] = num_fds
158
+ details["fd_threshold"] = fd_threshold
159
+
160
+ if num_fds > fd_threshold:
161
+ issues.append(
162
+ f"High file descriptor count: {num_fds} (threshold: {fd_threshold})"
163
+ )
164
+ except (
165
+ psutil.NoSuchProcess,
166
+ psutil.AccessDenied,
167
+ AttributeError,
168
+ ):
169
+ # num_fds() not available on all platforms
170
+ pass
171
+
172
+ # Check thread count
173
+ try:
174
+ num_threads = process.num_threads()
175
+ details["num_threads"] = num_threads
176
+ details["thread_threshold"] = thread_threshold
177
+
178
+ if num_threads > thread_threshold:
179
+ issues.append(
180
+ f"High thread count: {num_threads} (threshold: {thread_threshold})"
181
+ )
182
+ except (psutil.NoSuchProcess, psutil.AccessDenied):
183
+ pass
184
+
185
+ # Check connection count
186
+ try:
187
+ connections = process.net_connections()
188
+ num_connections = len(connections)
189
+ details["num_connections"] = num_connections
190
+
191
+ # Add connection breakdown by state
192
+ connection_states = {}
193
+ for conn in connections:
194
+ state = conn.status
195
+ connection_states[state] = connection_states.get(state, 0) + 1
196
+ details["connection_states"] = connection_states
197
+ except (psutil.NoSuchProcess, psutil.AccessDenied):
198
+ pass
199
+
200
+ # Determine health status based on issues
201
+ if issues:
202
+ return HealthCheckResult(
203
+ status=HealthStatus.DEGRADED,
204
+ check_type=self.get_check_type(),
205
+ message=f"Resource usage issues detected: {'; '.join(issues)}",
206
+ details=details,
207
+ )
208
+ return HealthCheckResult(
209
+ status=HealthStatus.HEALTHY,
210
+ check_type=self.get_check_type(),
211
+ message="Resource usage within normal limits",
212
+ details=details,
213
+ )
214
+
215
+ except psutil.NoSuchProcess:
216
+ # Process does not exist
217
+ return HealthCheckResult(
218
+ status=HealthStatus.UNHEALTHY,
219
+ check_type=self.get_check_type(),
220
+ message="Process no longer exists",
221
+ details={
222
+ "pid": deployment.process_id,
223
+ "deployment_id": deployment_id,
224
+ },
225
+ )
226
+
227
+ except psutil.AccessDenied as e:
228
+ # Cannot access process information
229
+ return HealthCheckResult(
230
+ status=HealthStatus.UNKNOWN,
231
+ check_type=self.get_check_type(),
232
+ message="Cannot access process resource information",
233
+ details={
234
+ "pid": deployment.process_id,
235
+ "deployment_id": deployment_id,
236
+ "error": str(e),
237
+ },
238
+ )
239
+
240
+ except Exception as e:
241
+ # Unexpected error
242
+ self.log_error(f"Unexpected error in resource health check: {e}")
243
+ return HealthCheckResult(
244
+ status=HealthStatus.UNKNOWN,
245
+ check_type=self.get_check_type(),
246
+ message="Health check failed with error",
247
+ details={
248
+ "pid": deployment.process_id,
249
+ "deployment_id": deployment_id,
250
+ "error": str(e),
251
+ },
252
+ )
253
+
254
+
255
+ __all__ = ["ResourceHealthCheck"]