claude-mpm 4.1.1__py3-none-any.whl → 4.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (357) hide show
  1. claude_mpm/BUILD_NUMBER +1 -1
  2. claude_mpm/VERSION +1 -1
  3. claude_mpm/__main__.py +1 -1
  4. claude_mpm/agents/BASE_PM.md +74 -46
  5. claude_mpm/agents/INSTRUCTIONS.md +11 -153
  6. claude_mpm/agents/WORKFLOW.md +61 -321
  7. claude_mpm/agents/__init__.py +11 -11
  8. claude_mpm/agents/agent_loader.py +23 -20
  9. claude_mpm/agents/agent_loader_integration.py +1 -1
  10. claude_mpm/agents/agents_metadata.py +27 -0
  11. claude_mpm/agents/async_agent_loader.py +5 -8
  12. claude_mpm/agents/base_agent_loader.py +36 -25
  13. claude_mpm/agents/frontmatter_validator.py +6 -6
  14. claude_mpm/agents/schema/agent_schema.json +1 -1
  15. claude_mpm/agents/system_agent_config.py +9 -9
  16. claude_mpm/agents/templates/api_qa.json +47 -2
  17. claude_mpm/agents/templates/imagemagick.json +256 -0
  18. claude_mpm/agents/templates/qa.json +41 -2
  19. claude_mpm/agents/templates/ticketing.json +5 -5
  20. claude_mpm/agents/templates/web_qa.json +50 -2
  21. claude_mpm/cli/__init__.py +51 -46
  22. claude_mpm/cli/__main__.py +1 -1
  23. claude_mpm/cli/commands/__init__.py +10 -12
  24. claude_mpm/cli/commands/agent_manager.py +186 -181
  25. claude_mpm/cli/commands/agents.py +271 -268
  26. claude_mpm/cli/commands/aggregate.py +30 -29
  27. claude_mpm/cli/commands/cleanup.py +50 -44
  28. claude_mpm/cli/commands/cleanup_orphaned_agents.py +25 -25
  29. claude_mpm/cli/commands/config.py +162 -127
  30. claude_mpm/cli/commands/doctor.py +52 -62
  31. claude_mpm/cli/commands/info.py +37 -25
  32. claude_mpm/cli/commands/mcp.py +3 -7
  33. claude_mpm/cli/commands/mcp_command_router.py +14 -18
  34. claude_mpm/cli/commands/mcp_install_commands.py +28 -23
  35. claude_mpm/cli/commands/mcp_pipx_config.py +58 -49
  36. claude_mpm/cli/commands/mcp_server_commands.py +23 -17
  37. claude_mpm/cli/commands/memory.py +192 -141
  38. claude_mpm/cli/commands/monitor.py +117 -88
  39. claude_mpm/cli/commands/run.py +120 -84
  40. claude_mpm/cli/commands/run_config_checker.py +4 -5
  41. claude_mpm/cli/commands/socketio_monitor.py +17 -19
  42. claude_mpm/cli/commands/tickets.py +92 -92
  43. claude_mpm/cli/parser.py +1 -5
  44. claude_mpm/cli/parsers/__init__.py +1 -1
  45. claude_mpm/cli/parsers/agent_manager_parser.py +50 -98
  46. claude_mpm/cli/parsers/agents_parser.py +2 -3
  47. claude_mpm/cli/parsers/base_parser.py +7 -5
  48. claude_mpm/cli/parsers/mcp_parser.py +4 -2
  49. claude_mpm/cli/parsers/monitor_parser.py +26 -18
  50. claude_mpm/cli/shared/__init__.py +10 -10
  51. claude_mpm/cli/shared/argument_patterns.py +57 -71
  52. claude_mpm/cli/shared/base_command.py +61 -53
  53. claude_mpm/cli/shared/error_handling.py +62 -58
  54. claude_mpm/cli/shared/output_formatters.py +78 -77
  55. claude_mpm/cli/startup_logging.py +204 -172
  56. claude_mpm/cli/utils.py +10 -11
  57. claude_mpm/cli_module/__init__.py +1 -1
  58. claude_mpm/cli_module/args.py +1 -1
  59. claude_mpm/cli_module/migration_example.py +5 -5
  60. claude_mpm/config/__init__.py +9 -9
  61. claude_mpm/config/agent_config.py +15 -14
  62. claude_mpm/config/experimental_features.py +4 -4
  63. claude_mpm/config/paths.py +0 -1
  64. claude_mpm/config/socketio_config.py +5 -6
  65. claude_mpm/constants.py +1 -2
  66. claude_mpm/core/__init__.py +8 -8
  67. claude_mpm/core/agent_name_normalizer.py +1 -1
  68. claude_mpm/core/agent_registry.py +20 -23
  69. claude_mpm/core/agent_session_manager.py +3 -3
  70. claude_mpm/core/base_service.py +7 -15
  71. claude_mpm/core/cache.py +4 -6
  72. claude_mpm/core/claude_runner.py +85 -113
  73. claude_mpm/core/config.py +43 -28
  74. claude_mpm/core/config_aliases.py +0 -9
  75. claude_mpm/core/config_constants.py +52 -30
  76. claude_mpm/core/constants.py +0 -1
  77. claude_mpm/core/container.py +18 -27
  78. claude_mpm/core/exceptions.py +2 -2
  79. claude_mpm/core/factories.py +10 -12
  80. claude_mpm/core/framework_loader.py +581 -280
  81. claude_mpm/core/hook_manager.py +26 -22
  82. claude_mpm/core/hook_performance_config.py +58 -47
  83. claude_mpm/core/injectable_service.py +1 -1
  84. claude_mpm/core/interactive_session.py +61 -152
  85. claude_mpm/core/interfaces.py +1 -100
  86. claude_mpm/core/lazy.py +5 -5
  87. claude_mpm/core/log_manager.py +587 -0
  88. claude_mpm/core/logger.py +125 -8
  89. claude_mpm/core/logging_config.py +15 -15
  90. claude_mpm/core/minimal_framework_loader.py +5 -8
  91. claude_mpm/core/oneshot_session.py +15 -33
  92. claude_mpm/core/optimized_agent_loader.py +4 -6
  93. claude_mpm/core/optimized_startup.py +2 -1
  94. claude_mpm/core/output_style_manager.py +147 -106
  95. claude_mpm/core/pm_hook_interceptor.py +0 -1
  96. claude_mpm/core/service_registry.py +11 -8
  97. claude_mpm/core/session_manager.py +1 -2
  98. claude_mpm/core/shared/__init__.py +1 -1
  99. claude_mpm/core/shared/config_loader.py +101 -97
  100. claude_mpm/core/shared/path_resolver.py +72 -68
  101. claude_mpm/core/shared/singleton_manager.py +56 -50
  102. claude_mpm/core/socketio_pool.py +26 -6
  103. claude_mpm/core/tool_access_control.py +4 -5
  104. claude_mpm/core/typing_utils.py +50 -59
  105. claude_mpm/core/unified_agent_registry.py +14 -19
  106. claude_mpm/core/unified_config.py +4 -6
  107. claude_mpm/core/unified_paths.py +197 -109
  108. claude_mpm/dashboard/open_dashboard.py +2 -4
  109. claude_mpm/experimental/cli_enhancements.py +51 -36
  110. claude_mpm/generators/agent_profile_generator.py +2 -4
  111. claude_mpm/hooks/base_hook.py +1 -2
  112. claude_mpm/hooks/claude_hooks/connection_pool.py +72 -26
  113. claude_mpm/hooks/claude_hooks/event_handlers.py +93 -38
  114. claude_mpm/hooks/claude_hooks/hook_handler.py +130 -76
  115. claude_mpm/hooks/claude_hooks/hook_handler_eventbus.py +104 -77
  116. claude_mpm/hooks/claude_hooks/memory_integration.py +2 -4
  117. claude_mpm/hooks/claude_hooks/response_tracking.py +15 -11
  118. claude_mpm/hooks/claude_hooks/tool_analysis.py +12 -18
  119. claude_mpm/hooks/memory_integration_hook.py +5 -5
  120. claude_mpm/hooks/tool_call_interceptor.py +1 -1
  121. claude_mpm/hooks/validation_hooks.py +4 -4
  122. claude_mpm/init.py +4 -9
  123. claude_mpm/models/__init__.py +2 -2
  124. claude_mpm/models/agent_session.py +11 -14
  125. claude_mpm/scripts/mcp_server.py +20 -11
  126. claude_mpm/scripts/mcp_wrapper.py +5 -5
  127. claude_mpm/scripts/mpm_doctor.py +321 -0
  128. claude_mpm/scripts/socketio_daemon.py +28 -25
  129. claude_mpm/scripts/socketio_daemon_hardened.py +298 -258
  130. claude_mpm/scripts/socketio_server_manager.py +116 -95
  131. claude_mpm/services/__init__.py +49 -49
  132. claude_mpm/services/agent_capabilities_service.py +12 -18
  133. claude_mpm/services/agents/__init__.py +22 -22
  134. claude_mpm/services/agents/agent_builder.py +140 -119
  135. claude_mpm/services/agents/deployment/__init__.py +3 -3
  136. claude_mpm/services/agents/deployment/agent_config_provider.py +9 -9
  137. claude_mpm/services/agents/deployment/agent_configuration_manager.py +19 -20
  138. claude_mpm/services/agents/deployment/agent_definition_factory.py +1 -5
  139. claude_mpm/services/agents/deployment/agent_deployment.py +136 -106
  140. claude_mpm/services/agents/deployment/agent_discovery_service.py +4 -8
  141. claude_mpm/services/agents/deployment/agent_environment_manager.py +2 -7
  142. claude_mpm/services/agents/deployment/agent_filesystem_manager.py +6 -10
  143. claude_mpm/services/agents/deployment/agent_format_converter.py +11 -15
  144. claude_mpm/services/agents/deployment/agent_frontmatter_validator.py +2 -3
  145. claude_mpm/services/agents/deployment/agent_lifecycle_manager.py +5 -5
  146. claude_mpm/services/agents/deployment/agent_metrics_collector.py +13 -19
  147. claude_mpm/services/agents/deployment/agent_restore_handler.py +0 -1
  148. claude_mpm/services/agents/deployment/agent_template_builder.py +26 -35
  149. claude_mpm/services/agents/deployment/agent_validator.py +0 -1
  150. claude_mpm/services/agents/deployment/agent_version_manager.py +7 -9
  151. claude_mpm/services/agents/deployment/agent_versioning.py +3 -3
  152. claude_mpm/services/agents/deployment/agents_directory_resolver.py +6 -7
  153. claude_mpm/services/agents/deployment/async_agent_deployment.py +51 -38
  154. claude_mpm/services/agents/deployment/config/__init__.py +1 -1
  155. claude_mpm/services/agents/deployment/config/deployment_config.py +7 -8
  156. claude_mpm/services/agents/deployment/deployment_type_detector.py +1 -1
  157. claude_mpm/services/agents/deployment/deployment_wrapper.py +18 -18
  158. claude_mpm/services/agents/deployment/facade/__init__.py +1 -1
  159. claude_mpm/services/agents/deployment/facade/deployment_executor.py +0 -3
  160. claude_mpm/services/agents/deployment/facade/deployment_facade.py +3 -4
  161. claude_mpm/services/agents/deployment/interface_adapter.py +5 -7
  162. claude_mpm/services/agents/deployment/multi_source_deployment_service.py +345 -276
  163. claude_mpm/services/agents/deployment/pipeline/__init__.py +2 -2
  164. claude_mpm/services/agents/deployment/pipeline/pipeline_builder.py +1 -1
  165. claude_mpm/services/agents/deployment/pipeline/pipeline_context.py +6 -4
  166. claude_mpm/services/agents/deployment/pipeline/pipeline_executor.py +3 -3
  167. claude_mpm/services/agents/deployment/pipeline/steps/__init__.py +2 -2
  168. claude_mpm/services/agents/deployment/pipeline/steps/agent_processing_step.py +14 -13
  169. claude_mpm/services/agents/deployment/pipeline/steps/base_step.py +0 -1
  170. claude_mpm/services/agents/deployment/pipeline/steps/configuration_step.py +1 -1
  171. claude_mpm/services/agents/deployment/pipeline/steps/target_directory_step.py +8 -9
  172. claude_mpm/services/agents/deployment/pipeline/steps/validation_step.py +1 -1
  173. claude_mpm/services/agents/deployment/processors/__init__.py +1 -1
  174. claude_mpm/services/agents/deployment/processors/agent_processor.py +20 -16
  175. claude_mpm/services/agents/deployment/refactored_agent_deployment_service.py +5 -12
  176. claude_mpm/services/agents/deployment/results/__init__.py +1 -1
  177. claude_mpm/services/agents/deployment/results/deployment_result_builder.py +1 -1
  178. claude_mpm/services/agents/deployment/strategies/__init__.py +2 -2
  179. claude_mpm/services/agents/deployment/strategies/base_strategy.py +1 -7
  180. claude_mpm/services/agents/deployment/strategies/project_strategy.py +1 -4
  181. claude_mpm/services/agents/deployment/strategies/system_strategy.py +2 -3
  182. claude_mpm/services/agents/deployment/strategies/user_strategy.py +3 -7
  183. claude_mpm/services/agents/deployment/validation/__init__.py +1 -1
  184. claude_mpm/services/agents/deployment/validation/agent_validator.py +1 -1
  185. claude_mpm/services/agents/deployment/validation/template_validator.py +2 -2
  186. claude_mpm/services/agents/deployment/validation/validation_result.py +2 -6
  187. claude_mpm/services/agents/loading/__init__.py +1 -1
  188. claude_mpm/services/agents/loading/agent_profile_loader.py +6 -12
  189. claude_mpm/services/agents/loading/base_agent_manager.py +5 -5
  190. claude_mpm/services/agents/loading/framework_agent_loader.py +2 -4
  191. claude_mpm/services/agents/management/__init__.py +1 -1
  192. claude_mpm/services/agents/management/agent_capabilities_generator.py +1 -3
  193. claude_mpm/services/agents/management/agent_management_service.py +5 -9
  194. claude_mpm/services/agents/memory/__init__.py +4 -4
  195. claude_mpm/services/agents/memory/agent_memory_manager.py +280 -160
  196. claude_mpm/services/agents/memory/agent_persistence_service.py +0 -2
  197. claude_mpm/services/agents/memory/content_manager.py +44 -38
  198. claude_mpm/services/agents/memory/template_generator.py +4 -6
  199. claude_mpm/services/agents/registry/__init__.py +10 -6
  200. claude_mpm/services/agents/registry/deployed_agent_discovery.py +30 -27
  201. claude_mpm/services/agents/registry/modification_tracker.py +3 -6
  202. claude_mpm/services/async_session_logger.py +1 -2
  203. claude_mpm/services/claude_session_logger.py +1 -2
  204. claude_mpm/services/command_deployment_service.py +173 -0
  205. claude_mpm/services/command_handler_service.py +20 -22
  206. claude_mpm/services/core/__init__.py +25 -25
  207. claude_mpm/services/core/base.py +0 -5
  208. claude_mpm/services/core/interfaces/__init__.py +32 -32
  209. claude_mpm/services/core/interfaces/agent.py +0 -21
  210. claude_mpm/services/core/interfaces/communication.py +0 -27
  211. claude_mpm/services/core/interfaces/infrastructure.py +0 -56
  212. claude_mpm/services/core/interfaces/service.py +0 -29
  213. claude_mpm/services/diagnostics/__init__.py +1 -1
  214. claude_mpm/services/diagnostics/checks/__init__.py +6 -6
  215. claude_mpm/services/diagnostics/checks/agent_check.py +89 -80
  216. claude_mpm/services/diagnostics/checks/base_check.py +12 -16
  217. claude_mpm/services/diagnostics/checks/claude_desktop_check.py +84 -81
  218. claude_mpm/services/diagnostics/checks/common_issues_check.py +99 -91
  219. claude_mpm/services/diagnostics/checks/configuration_check.py +82 -77
  220. claude_mpm/services/diagnostics/checks/filesystem_check.py +67 -68
  221. claude_mpm/services/diagnostics/checks/installation_check.py +254 -94
  222. claude_mpm/services/diagnostics/checks/mcp_check.py +90 -88
  223. claude_mpm/services/diagnostics/checks/monitor_check.py +75 -76
  224. claude_mpm/services/diagnostics/checks/startup_log_check.py +67 -73
  225. claude_mpm/services/diagnostics/diagnostic_runner.py +67 -59
  226. claude_mpm/services/diagnostics/doctor_reporter.py +107 -70
  227. claude_mpm/services/diagnostics/models.py +21 -19
  228. claude_mpm/services/event_aggregator.py +10 -17
  229. claude_mpm/services/event_bus/__init__.py +1 -1
  230. claude_mpm/services/event_bus/config.py +54 -35
  231. claude_mpm/services/event_bus/event_bus.py +76 -71
  232. claude_mpm/services/event_bus/relay.py +74 -64
  233. claude_mpm/services/events/__init__.py +11 -11
  234. claude_mpm/services/events/consumers/__init__.py +3 -3
  235. claude_mpm/services/events/consumers/dead_letter.py +71 -63
  236. claude_mpm/services/events/consumers/logging.py +39 -37
  237. claude_mpm/services/events/consumers/metrics.py +56 -57
  238. claude_mpm/services/events/consumers/socketio.py +82 -81
  239. claude_mpm/services/events/core.py +110 -99
  240. claude_mpm/services/events/interfaces.py +56 -72
  241. claude_mpm/services/events/producers/__init__.py +1 -1
  242. claude_mpm/services/events/producers/hook.py +38 -38
  243. claude_mpm/services/events/producers/system.py +46 -44
  244. claude_mpm/services/exceptions.py +81 -80
  245. claude_mpm/services/framework_claude_md_generator/__init__.py +2 -4
  246. claude_mpm/services/framework_claude_md_generator/content_assembler.py +3 -5
  247. claude_mpm/services/framework_claude_md_generator/content_validator.py +1 -1
  248. claude_mpm/services/framework_claude_md_generator/deployment_manager.py +4 -4
  249. claude_mpm/services/framework_claude_md_generator/section_generators/__init__.py +0 -1
  250. claude_mpm/services/framework_claude_md_generator/section_generators/agents.py +0 -2
  251. claude_mpm/services/framework_claude_md_generator/version_manager.py +4 -5
  252. claude_mpm/services/hook_service.py +6 -9
  253. claude_mpm/services/infrastructure/__init__.py +1 -1
  254. claude_mpm/services/infrastructure/context_preservation.py +8 -12
  255. claude_mpm/services/infrastructure/monitoring.py +21 -23
  256. claude_mpm/services/mcp_gateway/__init__.py +37 -37
  257. claude_mpm/services/mcp_gateway/auto_configure.py +95 -103
  258. claude_mpm/services/mcp_gateway/config/__init__.py +1 -1
  259. claude_mpm/services/mcp_gateway/config/config_loader.py +23 -25
  260. claude_mpm/services/mcp_gateway/config/config_schema.py +5 -5
  261. claude_mpm/services/mcp_gateway/config/configuration.py +9 -6
  262. claude_mpm/services/mcp_gateway/core/__init__.py +10 -10
  263. claude_mpm/services/mcp_gateway/core/base.py +0 -3
  264. claude_mpm/services/mcp_gateway/core/interfaces.py +1 -38
  265. claude_mpm/services/mcp_gateway/core/process_pool.py +99 -93
  266. claude_mpm/services/mcp_gateway/core/singleton_manager.py +65 -62
  267. claude_mpm/services/mcp_gateway/core/startup_verification.py +75 -74
  268. claude_mpm/services/mcp_gateway/main.py +2 -1
  269. claude_mpm/services/mcp_gateway/registry/service_registry.py +5 -8
  270. claude_mpm/services/mcp_gateway/registry/tool_registry.py +1 -1
  271. claude_mpm/services/mcp_gateway/server/__init__.py +1 -1
  272. claude_mpm/services/mcp_gateway/server/mcp_gateway.py +12 -19
  273. claude_mpm/services/mcp_gateway/server/stdio_handler.py +4 -3
  274. claude_mpm/services/mcp_gateway/server/stdio_server.py +79 -71
  275. claude_mpm/services/mcp_gateway/tools/__init__.py +2 -2
  276. claude_mpm/services/mcp_gateway/tools/base_adapter.py +5 -6
  277. claude_mpm/services/mcp_gateway/tools/document_summarizer.py +13 -22
  278. claude_mpm/services/mcp_gateway/tools/health_check_tool.py +79 -78
  279. claude_mpm/services/mcp_gateway/tools/hello_world.py +12 -14
  280. claude_mpm/services/mcp_gateway/tools/ticket_tools.py +42 -49
  281. claude_mpm/services/mcp_gateway/tools/unified_ticket_tool.py +51 -55
  282. claude_mpm/services/memory/__init__.py +3 -3
  283. claude_mpm/services/memory/builder.py +3 -6
  284. claude_mpm/services/memory/cache/__init__.py +1 -1
  285. claude_mpm/services/memory/cache/shared_prompt_cache.py +3 -5
  286. claude_mpm/services/memory/cache/simple_cache.py +1 -1
  287. claude_mpm/services/memory/indexed_memory.py +5 -7
  288. claude_mpm/services/memory/optimizer.py +7 -10
  289. claude_mpm/services/memory/router.py +8 -9
  290. claude_mpm/services/memory_hook_service.py +48 -34
  291. claude_mpm/services/monitor_build_service.py +77 -73
  292. claude_mpm/services/port_manager.py +130 -108
  293. claude_mpm/services/project/analyzer.py +12 -10
  294. claude_mpm/services/project/registry.py +11 -11
  295. claude_mpm/services/recovery_manager.py +10 -19
  296. claude_mpm/services/response_tracker.py +0 -1
  297. claude_mpm/services/runner_configuration_service.py +19 -20
  298. claude_mpm/services/session_management_service.py +7 -11
  299. claude_mpm/services/shared/__init__.py +1 -1
  300. claude_mpm/services/shared/async_service_base.py +58 -50
  301. claude_mpm/services/shared/config_service_base.py +73 -67
  302. claude_mpm/services/shared/lifecycle_service_base.py +82 -78
  303. claude_mpm/services/shared/manager_base.py +94 -82
  304. claude_mpm/services/shared/service_factory.py +96 -98
  305. claude_mpm/services/socketio/__init__.py +3 -3
  306. claude_mpm/services/socketio/client_proxy.py +5 -5
  307. claude_mpm/services/socketio/event_normalizer.py +199 -181
  308. claude_mpm/services/socketio/handlers/__init__.py +3 -3
  309. claude_mpm/services/socketio/handlers/base.py +5 -4
  310. claude_mpm/services/socketio/handlers/connection.py +163 -136
  311. claude_mpm/services/socketio/handlers/file.py +13 -14
  312. claude_mpm/services/socketio/handlers/git.py +12 -7
  313. claude_mpm/services/socketio/handlers/hook.py +49 -44
  314. claude_mpm/services/socketio/handlers/memory.py +0 -1
  315. claude_mpm/services/socketio/handlers/project.py +0 -1
  316. claude_mpm/services/socketio/handlers/registry.py +37 -19
  317. claude_mpm/services/socketio/migration_utils.py +98 -84
  318. claude_mpm/services/socketio/server/__init__.py +1 -1
  319. claude_mpm/services/socketio/server/broadcaster.py +81 -87
  320. claude_mpm/services/socketio/server/core.py +65 -54
  321. claude_mpm/services/socketio/server/eventbus_integration.py +95 -56
  322. claude_mpm/services/socketio/server/main.py +64 -38
  323. claude_mpm/services/socketio_client_manager.py +10 -12
  324. claude_mpm/services/subprocess_launcher_service.py +4 -7
  325. claude_mpm/services/system_instructions_service.py +13 -14
  326. claude_mpm/services/ticket_manager.py +2 -2
  327. claude_mpm/services/utility_service.py +5 -13
  328. claude_mpm/services/version_control/__init__.py +16 -16
  329. claude_mpm/services/version_control/branch_strategy.py +5 -8
  330. claude_mpm/services/version_control/conflict_resolution.py +9 -23
  331. claude_mpm/services/version_control/git_operations.py +5 -7
  332. claude_mpm/services/version_control/semantic_versioning.py +16 -17
  333. claude_mpm/services/version_control/version_parser.py +13 -18
  334. claude_mpm/services/version_service.py +10 -11
  335. claude_mpm/storage/__init__.py +1 -1
  336. claude_mpm/storage/state_storage.py +22 -28
  337. claude_mpm/utils/__init__.py +6 -6
  338. claude_mpm/utils/agent_dependency_loader.py +47 -33
  339. claude_mpm/utils/config_manager.py +11 -14
  340. claude_mpm/utils/dependency_cache.py +1 -1
  341. claude_mpm/utils/dependency_manager.py +13 -17
  342. claude_mpm/utils/dependency_strategies.py +8 -10
  343. claude_mpm/utils/environment_context.py +3 -9
  344. claude_mpm/utils/error_handler.py +3 -13
  345. claude_mpm/utils/file_utils.py +1 -1
  346. claude_mpm/utils/path_operations.py +8 -12
  347. claude_mpm/utils/robust_installer.py +110 -33
  348. claude_mpm/utils/subprocess_utils.py +5 -6
  349. claude_mpm/validation/agent_validator.py +3 -6
  350. claude_mpm/validation/frontmatter_validator.py +1 -1
  351. {claude_mpm-4.1.1.dist-info → claude_mpm-4.1.2.dist-info}/METADATA +1 -1
  352. claude_mpm-4.1.2.dist-info/RECORD +498 -0
  353. claude_mpm-4.1.1.dist-info/RECORD +0 -494
  354. {claude_mpm-4.1.1.dist-info → claude_mpm-4.1.2.dist-info}/WHEEL +0 -0
  355. {claude_mpm-4.1.1.dist-info → claude_mpm-4.1.2.dist-info}/entry_points.txt +0 -0
  356. {claude_mpm-4.1.1.dist-info → claude_mpm-4.1.2.dist-info}/licenses/LICENSE +0 -0
  357. {claude_mpm-4.1.1.dist-info → claude_mpm-4.1.2.dist-info}/top_level.txt +0 -0
@@ -21,89 +21,99 @@ import os
21
21
  import signal
22
22
  import subprocess
23
23
  import sys
24
+ import threading
24
25
  import time
25
26
  import traceback
26
- from pathlib import Path
27
27
  from datetime import datetime
28
- from typing import Optional, Dict, Any
29
- import threading
30
- import queue
28
+ from pathlib import Path
29
+ from typing import Optional
30
+
31
31
 
32
32
  # Detect and use virtual environment Python if available
33
33
  def get_python_executable():
34
34
  """Get the appropriate Python executable, preferring virtual environment."""
35
- if hasattr(sys, 'real_prefix') or (hasattr(sys, 'base_prefix') and sys.base_prefix != sys.prefix):
35
+ if hasattr(sys, "real_prefix") or (
36
+ hasattr(sys, "base_prefix") and sys.base_prefix != sys.prefix
37
+ ):
36
38
  return sys.executable
37
-
38
- venv_path = os.environ.get('VIRTUAL_ENV')
39
+
40
+ venv_path = os.environ.get("VIRTUAL_ENV")
39
41
  if venv_path:
40
- venv_python = Path(venv_path) / 'bin' / 'python'
42
+ venv_python = Path(venv_path) / "bin" / "python"
41
43
  if venv_python.exists():
42
44
  return str(venv_python)
43
-
45
+
44
46
  exe_path = Path(sys.executable).resolve()
45
47
  for parent in exe_path.parents:
46
- if parent.name in ('venv', '.venv', 'env', '.env'):
48
+ if parent.name in ("venv", ".venv", "env", ".env"):
47
49
  return sys.executable
48
- if parent.name == 'bin' and (parent.parent / 'pyvenv.cfg').exists():
50
+ if parent.name == "bin" and (parent.parent / "pyvenv.cfg").exists():
49
51
  return sys.executable
50
- if parent.name == 'Scripts' and (parent.parent / 'pyvenv.cfg').exists():
52
+ if parent.name == "Scripts" and (parent.parent / "pyvenv.cfg").exists():
51
53
  return sys.executable
52
-
54
+
53
55
  script_path = Path(__file__).resolve()
54
56
  for parent in script_path.parents:
55
- if parent.name == 'src' or not (parent / 'src').exists():
56
- for venv_name in ('venv', '.venv', 'env', '.env'):
57
+ if parent.name == "src" or not (parent / "src").exists():
58
+ for venv_name in ("venv", ".venv", "env", ".env"):
57
59
  venv_dir = parent / venv_name
58
60
  if venv_dir.exists():
59
- venv_python = venv_dir / 'bin' / 'python'
61
+ venv_python = venv_dir / "bin" / "python"
60
62
  if venv_python.exists():
61
63
  return str(venv_python)
62
64
  break
63
-
65
+
64
66
  return sys.executable
65
67
 
68
+
66
69
  PYTHON_EXECUTABLE = get_python_executable()
67
70
 
71
+
68
72
  # Configuration from environment variables
69
73
  class Config:
70
74
  """Centralized configuration with environment variable support."""
71
-
75
+
72
76
  # Retry configuration
73
- MAX_RETRIES = int(os.environ.get('SOCKETIO_MAX_RETRIES', '10'))
74
- INITIAL_RETRY_DELAY = float(os.environ.get('SOCKETIO_INITIAL_RETRY_DELAY', '1.0'))
75
- MAX_RETRY_DELAY = float(os.environ.get('SOCKETIO_MAX_RETRY_DELAY', '60.0'))
76
- BACKOFF_FACTOR = float(os.environ.get('SOCKETIO_BACKOFF_FACTOR', '2.0'))
77
-
77
+ MAX_RETRIES = int(os.environ.get("SOCKETIO_MAX_RETRIES", "10"))
78
+ INITIAL_RETRY_DELAY = float(os.environ.get("SOCKETIO_INITIAL_RETRY_DELAY", "1.0"))
79
+ MAX_RETRY_DELAY = float(os.environ.get("SOCKETIO_MAX_RETRY_DELAY", "60.0"))
80
+ BACKOFF_FACTOR = float(os.environ.get("SOCKETIO_BACKOFF_FACTOR", "2.0"))
81
+
78
82
  # Health check configuration
79
- HEALTH_CHECK_INTERVAL = float(os.environ.get('SOCKETIO_HEALTH_CHECK_INTERVAL', '30.0'))
80
- HEALTH_CHECK_TIMEOUT = float(os.environ.get('SOCKETIO_HEALTH_CHECK_TIMEOUT', '5.0'))
81
- UNHEALTHY_THRESHOLD = int(os.environ.get('SOCKETIO_UNHEALTHY_THRESHOLD', '3'))
82
-
83
+ HEALTH_CHECK_INTERVAL = float(
84
+ os.environ.get("SOCKETIO_HEALTH_CHECK_INTERVAL", "30.0")
85
+ )
86
+ HEALTH_CHECK_TIMEOUT = float(os.environ.get("SOCKETIO_HEALTH_CHECK_TIMEOUT", "5.0"))
87
+ UNHEALTHY_THRESHOLD = int(os.environ.get("SOCKETIO_UNHEALTHY_THRESHOLD", "3"))
88
+
83
89
  # Process management
84
- STARTUP_TIMEOUT = float(os.environ.get('SOCKETIO_STARTUP_TIMEOUT', '30.0'))
85
- SHUTDOWN_TIMEOUT = float(os.environ.get('SOCKETIO_SHUTDOWN_TIMEOUT', '10.0'))
86
- FORCE_KILL_TIMEOUT = float(os.environ.get('SOCKETIO_FORCE_KILL_TIMEOUT', '5.0'))
87
-
90
+ STARTUP_TIMEOUT = float(os.environ.get("SOCKETIO_STARTUP_TIMEOUT", "30.0"))
91
+ SHUTDOWN_TIMEOUT = float(os.environ.get("SOCKETIO_SHUTDOWN_TIMEOUT", "10.0"))
92
+ FORCE_KILL_TIMEOUT = float(os.environ.get("SOCKETIO_FORCE_KILL_TIMEOUT", "5.0"))
93
+
88
94
  # Port configuration
89
- PORT_RANGE_START = int(os.environ.get('SOCKETIO_PORT_START', '8765'))
90
- PORT_RANGE_END = int(os.environ.get('SOCKETIO_PORT_END', '8785'))
91
-
95
+ PORT_RANGE_START = int(os.environ.get("SOCKETIO_PORT_START", "8765"))
96
+ PORT_RANGE_END = int(os.environ.get("SOCKETIO_PORT_END", "8785"))
97
+
92
98
  # Logging
93
- LOG_LEVEL = os.environ.get('SOCKETIO_LOG_LEVEL', 'INFO')
94
- LOG_FORMAT = '%(asctime)s [%(levelname)s] %(name)s: %(message)s'
95
-
99
+ LOG_LEVEL = os.environ.get("SOCKETIO_LOG_LEVEL", "INFO")
100
+ LOG_FORMAT = "%(asctime)s [%(levelname)s] %(name)s: %(message)s"
101
+
96
102
  # Monitoring
97
- METRICS_ENABLED = os.environ.get('SOCKETIO_METRICS_ENABLED', 'true').lower() == 'true'
98
- METRICS_FILE = os.environ.get('SOCKETIO_METRICS_FILE', '.claude-mpm/socketio-metrics.json')
103
+ METRICS_ENABLED = (
104
+ os.environ.get("SOCKETIO_METRICS_ENABLED", "true").lower() == "true"
105
+ )
106
+ METRICS_FILE = os.environ.get(
107
+ "SOCKETIO_METRICS_FILE", ".claude-mpm/socketio-metrics.json"
108
+ )
109
+
99
110
 
100
111
  # Setup structured logging
112
+ import contextlib
101
113
  import logging
102
- logging.basicConfig(
103
- level=getattr(logging, Config.LOG_LEVEL),
104
- format=Config.LOG_FORMAT
105
- )
106
- logger = logging.getLogger('socketio-daemon')
114
+
115
+ logging.basicConfig(level=getattr(logging, Config.LOG_LEVEL), format=Config.LOG_FORMAT)
116
+ logger = logging.getLogger("socketio-daemon")
107
117
 
108
118
  try:
109
119
  import psutil
@@ -114,75 +124,82 @@ except ImportError:
114
124
 
115
125
  # Import project modules
116
126
  try:
127
+ from claude_mpm.core.unified_paths import get_project_root
117
128
  from claude_mpm.services.port_manager import PortManager
118
129
  from claude_mpm.services.socketio.server.main import SocketIOServer
119
- from claude_mpm.core.unified_paths import get_project_root
120
130
  except ImportError:
121
131
  script_path = Path(__file__).resolve()
122
132
  if "site-packages" in str(script_path):
123
133
  parts = script_path.parts
124
- site_packages_idx = next(i for i, part in enumerate(parts) if part == "site-packages")
134
+ site_packages_idx = next(
135
+ i for i, part in enumerate(parts) if part == "site-packages"
136
+ )
125
137
  site_packages_path = Path(*parts[: site_packages_idx + 1])
126
138
  if site_packages_path.exists() and str(site_packages_path) not in sys.path:
127
139
  sys.path.insert(0, str(site_packages_path))
128
140
  else:
129
141
  src_path = script_path.parent.parent.parent
130
- if src_path.exists() and (src_path / "claude_mpm").exists() and str(src_path) not in sys.path:
142
+ if (
143
+ src_path.exists()
144
+ and (src_path / "claude_mpm").exists()
145
+ and str(src_path) not in sys.path
146
+ ):
131
147
  sys.path.insert(0, str(src_path))
132
-
148
+
149
+ from claude_mpm.core.unified_paths import get_project_root
133
150
  from claude_mpm.services.port_manager import PortManager
134
151
  from claude_mpm.services.socketio.server.main import SocketIOServer
135
- from claude_mpm.core.unified_paths import get_project_root
136
152
 
137
153
 
138
154
  class DaemonMetrics:
139
155
  """Track and persist daemon metrics for monitoring."""
140
-
156
+
141
157
  def __init__(self, metrics_file: Path):
142
158
  self.metrics_file = metrics_file
143
159
  self.metrics = {
144
- 'start_time': None,
145
- 'restarts': 0,
146
- 'total_failures': 0,
147
- 'last_failure': None,
148
- 'health_checks_passed': 0,
149
- 'health_checks_failed': 0,
150
- 'uptime_seconds': 0,
151
- 'last_health_check': None,
152
- 'status': 'initializing'
160
+ "start_time": None,
161
+ "restarts": 0,
162
+ "total_failures": 0,
163
+ "last_failure": None,
164
+ "health_checks_passed": 0,
165
+ "health_checks_failed": 0,
166
+ "uptime_seconds": 0,
167
+ "last_health_check": None,
168
+ "status": "initializing",
153
169
  }
154
170
  self.lock = threading.Lock()
155
171
  self.load()
156
-
172
+
157
173
  def load(self):
158
174
  """Load metrics from file if exists."""
159
175
  if self.metrics_file.exists():
160
176
  try:
161
- with open(self.metrics_file, 'r') as f:
177
+ with open(self.metrics_file) as f:
162
178
  saved = json.load(f)
163
179
  self.metrics.update(saved)
164
180
  except Exception as e:
165
181
  logger.warning(f"Could not load metrics: {e}")
166
-
182
+
167
183
  def save(self):
168
184
  """Persist metrics to file."""
169
185
  try:
170
186
  self.metrics_file.parent.mkdir(parents=True, exist_ok=True)
171
- with self.lock:
172
- with open(self.metrics_file, 'w') as f:
173
- json.dump(self.metrics, f, indent=2, default=str)
187
+ with self.lock, open(self.metrics_file, "w") as f:
188
+ json.dump(self.metrics, f, indent=2, default=str)
174
189
  except Exception as e:
175
190
  logger.error(f"Could not save metrics: {e}")
176
-
191
+
177
192
  def update(self, **kwargs):
178
193
  """Update metrics atomically."""
179
194
  with self.lock:
180
195
  self.metrics.update(kwargs)
181
- if self.metrics['start_time']:
182
- start = datetime.fromisoformat(str(self.metrics['start_time']))
183
- self.metrics['uptime_seconds'] = int((datetime.now() - start).total_seconds())
196
+ if self.metrics["start_time"]:
197
+ start = datetime.fromisoformat(str(self.metrics["start_time"]))
198
+ self.metrics["uptime_seconds"] = int(
199
+ (datetime.now() - start).total_seconds()
200
+ )
184
201
  self.save()
185
-
202
+
186
203
  def increment(self, key: str, amount: int = 1):
187
204
  """Increment a counter metric."""
188
205
  with self.lock:
@@ -192,29 +209,32 @@ class DaemonMetrics:
192
209
 
193
210
  class ExponentialBackoff:
194
211
  """Implement exponential backoff with jitter for retry logic."""
195
-
196
- def __init__(self, initial_delay: float = 1.0, max_delay: float = 60.0, factor: float = 2.0):
212
+
213
+ def __init__(
214
+ self, initial_delay: float = 1.0, max_delay: float = 60.0, factor: float = 2.0
215
+ ):
197
216
  self.initial_delay = initial_delay
198
217
  self.max_delay = max_delay
199
218
  self.factor = factor
200
219
  self.current_delay = initial_delay
201
220
  self.attempt = 0
202
-
221
+
203
222
  def next_delay(self) -> float:
204
223
  """Get the next delay with jitter."""
205
224
  import random
225
+
206
226
  self.attempt += 1
207
-
227
+
208
228
  # Calculate exponential delay
209
- delay = min(self.initial_delay * (self.factor ** self.attempt), self.max_delay)
210
-
229
+ delay = min(self.initial_delay * (self.factor**self.attempt), self.max_delay)
230
+
211
231
  # Add jitter (±25% randomization)
212
232
  jitter = delay * 0.25 * (2 * random.random() - 1)
213
233
  actual_delay = max(0.1, delay + jitter)
214
-
234
+
215
235
  logger.debug(f"Backoff attempt {self.attempt}: {actual_delay:.2f}s")
216
236
  return actual_delay
217
-
237
+
218
238
  def reset(self):
219
239
  """Reset the backoff counter."""
220
240
  self.attempt = 0
@@ -223,82 +243,92 @@ class ExponentialBackoff:
223
243
 
224
244
  class HealthMonitor:
225
245
  """Monitor daemon health and trigger recovery if needed."""
226
-
246
+
227
247
  def __init__(self, port: int, metrics: DaemonMetrics):
228
248
  self.port = port
229
249
  self.metrics = metrics
230
250
  self.consecutive_failures = 0
231
251
  self.running = False
232
252
  self.thread = None
233
-
253
+
234
254
  def start(self):
235
255
  """Start health monitoring in background thread."""
236
256
  if self.running:
237
257
  return
238
-
258
+
239
259
  self.running = True
240
260
  self.thread = threading.Thread(target=self._monitor_loop, daemon=True)
241
261
  self.thread.start()
242
262
  logger.info("Health monitor started")
243
-
263
+
244
264
  def stop(self):
245
265
  """Stop health monitoring."""
246
266
  self.running = False
247
267
  if self.thread:
248
268
  self.thread.join(timeout=5)
249
269
  logger.info("Health monitor stopped")
250
-
270
+
251
271
  def _monitor_loop(self):
252
272
  """Main health check loop."""
253
273
  while self.running:
254
274
  try:
255
275
  time.sleep(Config.HEALTH_CHECK_INTERVAL)
256
-
276
+
257
277
  if self._check_health():
258
278
  self.consecutive_failures = 0
259
- self.metrics.increment('health_checks_passed')
260
- self.metrics.update(last_health_check=datetime.now(), status='healthy')
279
+ self.metrics.increment("health_checks_passed")
280
+ self.metrics.update(
281
+ last_health_check=datetime.now(), status="healthy"
282
+ )
261
283
  else:
262
284
  self.consecutive_failures += 1
263
- self.metrics.increment('health_checks_failed')
264
- self.metrics.update(last_health_check=datetime.now(), status='unhealthy')
265
-
285
+ self.metrics.increment("health_checks_failed")
286
+ self.metrics.update(
287
+ last_health_check=datetime.now(), status="unhealthy"
288
+ )
289
+
266
290
  if self.consecutive_failures >= Config.UNHEALTHY_THRESHOLD:
267
- logger.error(f"Health check failed {self.consecutive_failures} times - daemon unhealthy")
291
+ logger.error(
292
+ f"Health check failed {self.consecutive_failures} times - daemon unhealthy"
293
+ )
268
294
  # Supervisor will handle restart
269
-
295
+
270
296
  except Exception as e:
271
297
  logger.error(f"Health monitor error: {e}")
272
-
298
+
273
299
  def _check_health(self) -> bool:
274
300
  """Perform health check on the daemon."""
275
301
  try:
276
302
  import socket
277
- import json
278
-
303
+
279
304
  # Try to connect to the socket
280
305
  sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
281
306
  sock.settimeout(Config.HEALTH_CHECK_TIMEOUT)
282
- result = sock.connect_ex(('localhost', self.port))
307
+ result = sock.connect_ex(("localhost", self.port))
283
308
  sock.close()
284
-
309
+
285
310
  if result != 0:
286
- logger.warning(f"Health check failed: cannot connect to port {self.port}")
311
+ logger.warning(
312
+ f"Health check failed: cannot connect to port {self.port}"
313
+ )
287
314
  return False
288
-
315
+
289
316
  # Try to make an HTTP health request if possible
290
317
  try:
291
318
  import urllib.request
292
- url = f'http://localhost:{self.port}/health'
293
- with urllib.request.urlopen(url, timeout=Config.HEALTH_CHECK_TIMEOUT) as response:
319
+
320
+ url = f"http://localhost:{self.port}/health"
321
+ with urllib.request.urlopen(
322
+ url, timeout=Config.HEALTH_CHECK_TIMEOUT
323
+ ) as response:
294
324
  if response.status == 200:
295
325
  return True
296
326
  except:
297
327
  # Fall back to simple port check
298
328
  pass
299
-
329
+
300
330
  return True
301
-
331
+
302
332
  except Exception as e:
303
333
  logger.error(f"Health check error: {e}")
304
334
  return False
@@ -306,92 +336,95 @@ class HealthMonitor:
306
336
 
307
337
  class DaemonSupervisor:
308
338
  """Supervise the daemon process and handle automatic recovery."""
309
-
339
+
310
340
  def __init__(self):
311
341
  self.deployment_root = get_project_root()
312
342
  self.pid_file = self.deployment_root / ".claude-mpm" / "socketio-server.pid"
313
343
  self.log_file = self.deployment_root / ".claude-mpm" / "socketio-server.log"
314
344
  self.lock_file = self.deployment_root / ".claude-mpm" / "socketio-server.lock"
315
- self.supervisor_pid_file = self.deployment_root / ".claude-mpm" / "socketio-supervisor.pid"
316
-
345
+ self.supervisor_pid_file = (
346
+ self.deployment_root / ".claude-mpm" / "socketio-supervisor.pid"
347
+ )
348
+
317
349
  # Metrics tracking
318
350
  metrics_file = self.deployment_root / ".claude-mpm" / Config.METRICS_FILE
319
351
  self.metrics = DaemonMetrics(metrics_file)
320
-
352
+
321
353
  # Recovery state
322
354
  self.backoff = ExponentialBackoff(
323
- Config.INITIAL_RETRY_DELAY,
324
- Config.MAX_RETRY_DELAY,
325
- Config.BACKOFF_FACTOR
355
+ Config.INITIAL_RETRY_DELAY, Config.MAX_RETRY_DELAY, Config.BACKOFF_FACTOR
326
356
  )
327
-
357
+
328
358
  self.port_manager = PortManager()
329
359
  self.server_process = None
330
360
  self.selected_port = None
331
361
  self.health_monitor = None
332
362
  self.shutdown_requested = False
333
-
363
+
334
364
  def ensure_dirs(self):
335
365
  """Ensure required directories exist."""
336
366
  self.pid_file.parent.mkdir(parents=True, exist_ok=True)
337
-
367
+
338
368
  def acquire_lock(self) -> bool:
339
369
  """Acquire exclusive lock to prevent multiple instances."""
340
370
  try:
341
371
  self.ensure_dirs()
342
-
372
+
343
373
  # Check for existing lock
344
374
  if self.lock_file.exists():
345
375
  try:
346
- with open(self.lock_file, 'r') as f:
376
+ with open(self.lock_file) as f:
347
377
  old_pid = int(f.read().strip())
348
-
378
+
349
379
  # Check if old process is still running
350
380
  if psutil.pid_exists(old_pid):
351
381
  process = psutil.Process(old_pid)
352
382
  if process.is_running():
353
- logger.warning(f"Another supervisor is running (PID: {old_pid})")
383
+ logger.warning(
384
+ f"Another supervisor is running (PID: {old_pid})"
385
+ )
354
386
  return False
355
387
  except:
356
388
  pass
357
-
389
+
358
390
  # Clean up stale lock
359
391
  self.lock_file.unlink(missing_ok=True)
360
-
392
+
361
393
  # Create new lock
362
- with open(self.lock_file, 'w') as f:
394
+ with open(self.lock_file, "w") as f:
363
395
  f.write(str(os.getpid()))
364
-
396
+
365
397
  return True
366
-
398
+
367
399
  except Exception as e:
368
400
  logger.error(f"Could not acquire lock: {e}")
369
401
  return False
370
-
402
+
371
403
  def release_lock(self):
372
404
  """Release the exclusive lock."""
373
405
  self.lock_file.unlink(missing_ok=True)
374
-
406
+
375
407
  def find_available_port(self) -> Optional[int]:
376
408
  """Find an available port for the server."""
377
409
  self.port_manager.cleanup_dead_instances()
378
410
  port = self.port_manager.find_available_port()
379
-
411
+
380
412
  if not port:
381
413
  # Try extended range if configured
382
414
  for p in range(Config.PORT_RANGE_START, Config.PORT_RANGE_END + 1):
383
415
  import socket
416
+
384
417
  try:
385
418
  sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
386
- result = sock.connect_ex(('localhost', p))
419
+ result = sock.connect_ex(("localhost", p))
387
420
  sock.close()
388
421
  if result != 0:
389
422
  return p
390
423
  except:
391
424
  pass
392
-
425
+
393
426
  return port
394
-
427
+
395
428
  def start_server_process(self) -> bool:
396
429
  """Start the actual Socket.IO server process."""
397
430
  try:
@@ -400,95 +433,92 @@ class DaemonSupervisor:
400
433
  if not self.selected_port:
401
434
  logger.error("No available ports")
402
435
  return False
403
-
436
+
404
437
  logger.info(f"Starting server on port {self.selected_port}")
405
-
438
+
406
439
  # Fork to create daemon process
407
440
  pid = os.fork()
408
441
  if pid > 0:
409
442
  # Parent process - supervisor
410
443
  self.server_process = pid
411
-
444
+
412
445
  # Save PID files
413
- with open(self.pid_file, 'w') as f:
446
+ with open(self.pid_file, "w") as f:
414
447
  f.write(str(pid))
415
-
416
- with open(self.supervisor_pid_file, 'w') as f:
448
+
449
+ with open(self.supervisor_pid_file, "w") as f:
417
450
  f.write(str(os.getpid()))
418
-
451
+
419
452
  # Save port info
420
453
  port_file = self.pid_file.parent / "socketio-port"
421
- with open(port_file, 'w') as f:
454
+ with open(port_file, "w") as f:
422
455
  f.write(str(self.selected_port))
423
-
456
+
424
457
  # Register with port manager
425
458
  self.port_manager.register_instance(self.selected_port, pid)
426
-
459
+
427
460
  # Wait for server to start
428
461
  if self._wait_for_server_start():
429
462
  logger.info(f"Server started successfully (PID: {pid})")
430
- self.metrics.update(
431
- start_time=datetime.now(),
432
- status='running'
433
- )
463
+ self.metrics.update(start_time=datetime.now(), status="running")
434
464
  self.backoff.reset()
435
465
  return True
436
- else:
437
- logger.error("Server failed to start within timeout")
438
- self._cleanup_failed_server(pid)
439
- return False
440
-
441
- else:
442
- # Child process - actual server
443
- self._run_server_process()
444
-
466
+ logger.error("Server failed to start within timeout")
467
+ self._cleanup_failed_server(pid)
468
+ return False
469
+
470
+ # Child process - actual server
471
+ self._run_server_process()
472
+
445
473
  except Exception as e:
446
474
  logger.error(f"Failed to start server: {e}")
447
475
  logger.debug(traceback.format_exc())
448
476
  return False
449
-
477
+
450
478
  def _run_server_process(self):
451
479
  """Run the Socket.IO server in the child process."""
452
480
  try:
453
481
  # Become a proper daemon
454
482
  os.setsid()
455
483
  os.umask(0)
456
-
484
+
457
485
  # Redirect output to log file
458
- with open(self.log_file, 'a') as log:
486
+ with open(self.log_file, "a") as log:
459
487
  os.dup2(log.fileno(), sys.stdout.fileno())
460
488
  os.dup2(log.fileno(), sys.stderr.fileno())
461
-
489
+
462
490
  # Log startup info
463
- print(f"[{datetime.now()}] Starting Socket.IO server on port {self.selected_port}")
491
+ print(
492
+ f"[{datetime.now()}] Starting Socket.IO server on port {self.selected_port}"
493
+ )
464
494
  print(f"[{datetime.now()}] Python: {sys.executable}")
465
495
  print(f"[{datetime.now()}] Version: {sys.version}")
466
-
496
+
467
497
  # Create and start server with error handling
468
498
  server = None
469
499
  try:
470
500
  server = SocketIOServer(host="localhost", port=self.selected_port)
471
-
501
+
472
502
  # Setup signal handlers
473
503
  def signal_handler(signum, frame):
474
- print(f"[{datetime.now()}] Received signal {signum}, shutting down...")
504
+ print(
505
+ f"[{datetime.now()}] Received signal {signum}, shutting down..."
506
+ )
475
507
  if server:
476
- try:
508
+ with contextlib.suppress(Exception):
477
509
  server.stop_sync()
478
- except:
479
- pass
480
510
  sys.exit(0)
481
-
511
+
482
512
  signal.signal(signal.SIGTERM, signal_handler)
483
513
  signal.signal(signal.SIGINT, signal_handler)
484
-
514
+
485
515
  # Start server
486
516
  server.start_sync()
487
-
517
+
488
518
  # Keep running
489
519
  while True:
490
520
  time.sleep(1)
491
-
521
+
492
522
  except KeyboardInterrupt:
493
523
  if server:
494
524
  server.stop_sync()
@@ -497,36 +527,36 @@ class DaemonSupervisor:
497
527
  print(f"[{datetime.now()}] Server error: {e}")
498
528
  print(traceback.format_exc())
499
529
  sys.exit(1)
500
-
530
+
501
531
  except Exception as e:
502
532
  print(f"[{datetime.now()}] Fatal error: {e}")
503
533
  sys.exit(1)
504
-
534
+
505
535
  def _wait_for_server_start(self) -> bool:
506
536
  """Wait for the server to become responsive."""
507
537
  import socket
508
-
538
+
509
539
  start_time = time.time()
510
540
  while time.time() - start_time < Config.STARTUP_TIMEOUT:
511
541
  # Check if process is still alive
512
542
  if not self._is_process_alive(self.server_process):
513
543
  return False
514
-
544
+
515
545
  # Try to connect
516
546
  try:
517
547
  sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
518
- result = sock.connect_ex(('localhost', self.selected_port))
548
+ result = sock.connect_ex(("localhost", self.selected_port))
519
549
  sock.close()
520
-
550
+
521
551
  if result == 0:
522
552
  return True
523
553
  except:
524
554
  pass
525
-
555
+
526
556
  time.sleep(0.5)
527
-
557
+
528
558
  return False
529
-
559
+
530
560
  def _is_process_alive(self, pid: int) -> bool:
531
561
  """Check if a process is alive."""
532
562
  try:
@@ -534,7 +564,7 @@ class DaemonSupervisor:
534
564
  return process.is_running()
535
565
  except (psutil.NoSuchProcess, psutil.AccessDenied):
536
566
  return False
537
-
567
+
538
568
  def _cleanup_failed_server(self, pid: int):
539
569
  """Clean up after a failed server start."""
540
570
  try:
@@ -542,71 +572,80 @@ class DaemonSupervisor:
542
572
  os.kill(pid, signal.SIGKILL)
543
573
  except:
544
574
  pass
545
-
575
+
546
576
  self.pid_file.unlink(missing_ok=True)
547
-
577
+
548
578
  if self.selected_port:
549
579
  instances = self.port_manager.load_instances()
550
580
  for instance_id, info in instances.items():
551
- if info.get('pid') == pid:
581
+ if info.get("pid") == pid:
552
582
  self.port_manager.remove_instance(instance_id)
553
583
  break
554
-
584
+
555
585
  def monitor_and_restart(self):
556
586
  """Monitor the server and restart if it crashes."""
557
587
  retry_count = 0
558
-
588
+
559
589
  while retry_count < Config.MAX_RETRIES and not self.shutdown_requested:
560
590
  try:
561
591
  # Start the server
562
592
  if self.start_server_process():
563
593
  # Start health monitoring
564
594
  if Config.METRICS_ENABLED and self.selected_port:
565
- self.health_monitor = HealthMonitor(self.selected_port, self.metrics)
595
+ self.health_monitor = HealthMonitor(
596
+ self.selected_port, self.metrics
597
+ )
566
598
  self.health_monitor.start()
567
-
599
+
568
600
  # Monitor the process
569
601
  while not self.shutdown_requested:
570
602
  time.sleep(5)
571
-
603
+
572
604
  # Check if process is still alive
573
605
  if not self._is_process_alive(self.server_process):
574
606
  logger.error("Server process died unexpectedly")
575
- self.metrics.increment('total_failures')
607
+ self.metrics.increment("total_failures")
576
608
  self.metrics.update(
577
- last_failure=datetime.now(),
578
- status='crashed'
609
+ last_failure=datetime.now(), status="crashed"
579
610
  )
580
611
  break
581
-
612
+
582
613
  # Check health status
583
- if self.health_monitor and self.health_monitor.consecutive_failures >= Config.UNHEALTHY_THRESHOLD:
614
+ if (
615
+ self.health_monitor
616
+ and self.health_monitor.consecutive_failures
617
+ >= Config.UNHEALTHY_THRESHOLD
618
+ ):
584
619
  logger.error("Server is unhealthy, restarting...")
585
620
  self._stop_server_process()
586
621
  break
587
-
622
+
588
623
  if self.shutdown_requested:
589
624
  break
590
-
625
+
591
626
  # Stop health monitor before restart
592
627
  if self.health_monitor:
593
628
  self.health_monitor.stop()
594
629
  self.health_monitor = None
595
-
630
+
596
631
  # Server crashed, apply backoff before restart
597
632
  retry_count += 1
598
633
  delay = self.backoff.next_delay()
599
- logger.info(f"Restarting in {delay:.1f}s (attempt {retry_count}/{Config.MAX_RETRIES})")
634
+ logger.info(
635
+ f"Restarting in {delay:.1f}s (attempt {retry_count}/{Config.MAX_RETRIES})"
636
+ )
600
637
  time.sleep(delay)
601
- self.metrics.increment('restarts')
602
-
638
+ self.metrics.increment("restarts")
639
+
603
640
  else:
604
641
  # Failed to start
605
642
  retry_count += 1
606
643
  delay = self.backoff.next_delay()
607
- logger.error(f"Failed to start, retrying in {delay:.1f}s (attempt {retry_count}/{Config.MAX_RETRIES})")
644
+ logger.error(
645
+ f"Failed to start, retrying in {delay:.1f}s (attempt {retry_count}/{Config.MAX_RETRIES})"
646
+ )
608
647
  time.sleep(delay)
609
-
648
+
610
649
  except KeyboardInterrupt:
611
650
  logger.info("Supervisor interrupted")
612
651
  break
@@ -615,22 +654,22 @@ class DaemonSupervisor:
615
654
  logger.debug(traceback.format_exc())
616
655
  retry_count += 1
617
656
  time.sleep(self.backoff.next_delay())
618
-
657
+
619
658
  if retry_count >= Config.MAX_RETRIES:
620
659
  logger.error(f"Max retries ({Config.MAX_RETRIES}) exceeded, giving up")
621
- self.metrics.update(status='failed')
622
-
660
+ self.metrics.update(status="failed")
661
+
623
662
  self.cleanup()
624
-
663
+
625
664
  def _stop_server_process(self):
626
665
  """Stop the server process gracefully."""
627
666
  if not self.server_process:
628
667
  return
629
-
668
+
630
669
  try:
631
670
  # Try graceful shutdown
632
671
  os.kill(self.server_process, signal.SIGTERM)
633
-
672
+
634
673
  # Wait for shutdown
635
674
  start_time = time.time()
636
675
  while time.time() - start_time < Config.SHUTDOWN_TIMEOUT:
@@ -638,45 +677,45 @@ class DaemonSupervisor:
638
677
  logger.info("Server stopped gracefully")
639
678
  return
640
679
  time.sleep(0.5)
641
-
680
+
642
681
  # Force kill if still running
643
682
  logger.warning("Server didn't stop gracefully, forcing...")
644
683
  os.kill(self.server_process, signal.SIGKILL)
645
684
  time.sleep(Config.FORCE_KILL_TIMEOUT)
646
-
685
+
647
686
  except Exception as e:
648
687
  logger.error(f"Error stopping server: {e}")
649
-
688
+
650
689
  def cleanup(self):
651
690
  """Clean up resources on shutdown."""
652
691
  logger.info("Cleaning up supervisor resources")
653
-
692
+
654
693
  # Stop health monitor
655
694
  if self.health_monitor:
656
695
  self.health_monitor.stop()
657
-
696
+
658
697
  # Stop server process
659
698
  if self.server_process:
660
699
  self._stop_server_process()
661
-
700
+
662
701
  # Clean up port registration
663
702
  if self.selected_port:
664
703
  instances = self.port_manager.load_instances()
665
704
  for instance_id, info in instances.items():
666
- if info.get('pid') == self.server_process:
705
+ if info.get("pid") == self.server_process:
667
706
  self.port_manager.remove_instance(instance_id)
668
707
  break
669
-
708
+
670
709
  # Remove PID files
671
710
  self.pid_file.unlink(missing_ok=True)
672
711
  self.supervisor_pid_file.unlink(missing_ok=True)
673
-
712
+
674
713
  # Update metrics
675
- self.metrics.update(status='stopped')
676
-
714
+ self.metrics.update(status="stopped")
715
+
677
716
  # Release lock
678
717
  self.release_lock()
679
-
718
+
680
719
  def handle_shutdown(self, signum, frame):
681
720
  """Handle shutdown signals."""
682
721
  logger.info(f"Received signal {signum}, initiating shutdown...")
@@ -686,42 +725,42 @@ class DaemonSupervisor:
686
725
  def start_daemon():
687
726
  """Start the hardened daemon with supervisor."""
688
727
  supervisor = DaemonSupervisor()
689
-
728
+
690
729
  # Check if already running
691
730
  if supervisor.pid_file.exists():
692
731
  try:
693
- with open(supervisor.pid_file, 'r') as f:
732
+ with open(supervisor.pid_file) as f:
694
733
  old_pid = int(f.read().strip())
695
-
734
+
696
735
  if supervisor._is_process_alive(old_pid):
697
736
  print(f"Socket.IO daemon is already running (PID: {old_pid})")
698
737
  return
699
738
  except:
700
739
  pass
701
-
740
+
702
741
  # Clean up stale PID file
703
742
  supervisor.pid_file.unlink(missing_ok=True)
704
-
743
+
705
744
  # Acquire lock
706
745
  if not supervisor.acquire_lock():
707
746
  print("Could not acquire lock - another instance may be running")
708
747
  return
709
-
748
+
710
749
  print("Starting hardened Socket.IO daemon with supervisor...")
711
750
  print(f"Python: {PYTHON_EXECUTABLE}")
712
751
  print(f"Max retries: {Config.MAX_RETRIES}")
713
752
  print(f"Health checks: {'enabled' if Config.METRICS_ENABLED else 'disabled'}")
714
-
753
+
715
754
  # Setup signal handlers
716
755
  signal.signal(signal.SIGTERM, supervisor.handle_shutdown)
717
756
  signal.signal(signal.SIGINT, supervisor.handle_shutdown)
718
-
757
+
719
758
  try:
720
759
  # Start monitoring and auto-restart loop
721
760
  supervisor.monitor_and_restart()
722
761
  finally:
723
762
  supervisor.cleanup()
724
-
763
+
725
764
  print("Socket.IO daemon stopped")
726
765
 
727
766
 
@@ -730,16 +769,16 @@ def stop_daemon():
730
769
  deployment_root = get_project_root()
731
770
  pid_file = deployment_root / ".claude-mpm" / "socketio-server.pid"
732
771
  supervisor_pid_file = deployment_root / ".claude-mpm" / "socketio-supervisor.pid"
733
-
772
+
734
773
  # Try to stop supervisor first
735
774
  if supervisor_pid_file.exists():
736
775
  try:
737
- with open(supervisor_pid_file, 'r') as f:
776
+ with open(supervisor_pid_file) as f:
738
777
  supervisor_pid = int(f.read().strip())
739
-
778
+
740
779
  print(f"Stopping supervisor (PID: {supervisor_pid})...")
741
780
  os.kill(supervisor_pid, signal.SIGTERM)
742
-
781
+
743
782
  # Wait for supervisor to stop
744
783
  for _ in range(20):
745
784
  if not psutil.pid_exists(supervisor_pid):
@@ -747,31 +786,31 @@ def stop_daemon():
747
786
  supervisor_pid_file.unlink(missing_ok=True)
748
787
  return
749
788
  time.sleep(0.5)
750
-
789
+
751
790
  # Force kill if needed
752
791
  print("Supervisor didn't stop gracefully, forcing...")
753
792
  os.kill(supervisor_pid, signal.SIGKILL)
754
793
  supervisor_pid_file.unlink(missing_ok=True)
755
-
794
+
756
795
  except Exception as e:
757
796
  print(f"Error stopping supervisor: {e}")
758
-
797
+
759
798
  # Also try to stop server directly if supervisor failed
760
799
  if pid_file.exists():
761
800
  try:
762
- with open(pid_file, 'r') as f:
801
+ with open(pid_file) as f:
763
802
  server_pid = int(f.read().strip())
764
-
803
+
765
804
  if psutil.pid_exists(server_pid):
766
805
  print(f"Stopping server (PID: {server_pid})...")
767
806
  os.kill(server_pid, signal.SIGTERM)
768
807
  time.sleep(2)
769
-
808
+
770
809
  if psutil.pid_exists(server_pid):
771
810
  os.kill(server_pid, signal.SIGKILL)
772
-
811
+
773
812
  pid_file.unlink(missing_ok=True)
774
-
813
+
775
814
  except Exception as e:
776
815
  print(f"Error stopping server: {e}")
777
816
 
@@ -782,16 +821,16 @@ def status_daemon():
782
821
  pid_file = deployment_root / ".claude-mpm" / "socketio-server.pid"
783
822
  supervisor_pid_file = deployment_root / ".claude-mpm" / "socketio-supervisor.pid"
784
823
  metrics_file = deployment_root / ".claude-mpm" / Config.METRICS_FILE
785
-
824
+
786
825
  print("Socket.IO Daemon Status")
787
826
  print("=" * 50)
788
-
827
+
789
828
  # Check supervisor
790
829
  if supervisor_pid_file.exists():
791
830
  try:
792
- with open(supervisor_pid_file, 'r') as f:
831
+ with open(supervisor_pid_file) as f:
793
832
  supervisor_pid = int(f.read().strip())
794
-
833
+
795
834
  if psutil.pid_exists(supervisor_pid):
796
835
  process = psutil.Process(supervisor_pid)
797
836
  print(f"✅ Supervisor: RUNNING (PID: {supervisor_pid})")
@@ -803,32 +842,33 @@ def status_daemon():
803
842
  print("❌ Supervisor: ERROR reading status")
804
843
  else:
805
844
  print("❌ Supervisor: NOT RUNNING")
806
-
845
+
807
846
  # Check server
808
847
  if pid_file.exists():
809
848
  try:
810
- with open(pid_file, 'r') as f:
849
+ with open(pid_file) as f:
811
850
  server_pid = int(f.read().strip())
812
-
851
+
813
852
  if psutil.pid_exists(server_pid):
814
853
  process = psutil.Process(server_pid)
815
854
  print(f"✅ Server: RUNNING (PID: {server_pid})")
816
855
  print(f" Memory: {process.memory_info().rss / 1024 / 1024:.1f} MB")
817
856
  print(f" CPU: {process.cpu_percent()}%")
818
-
857
+
819
858
  # Check port
820
859
  port_file = deployment_root / ".claude-mpm" / "socketio-port"
821
860
  if port_file.exists():
822
- with open(port_file, 'r') as f:
861
+ with open(port_file) as f:
823
862
  port = int(f.read().strip())
824
863
  print(f" Port: {port}")
825
-
864
+
826
865
  # Test connection
827
866
  import socket
867
+
828
868
  sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
829
- result = sock.connect_ex(('localhost', port))
869
+ result = sock.connect_ex(("localhost", port))
830
870
  sock.close()
831
-
871
+
832
872
  if result == 0:
833
873
  print(f" ✅ Listening on port {port}")
834
874
  else:
@@ -839,13 +879,13 @@ def status_daemon():
839
879
  print("❌ Server: ERROR reading status")
840
880
  else:
841
881
  print("❌ Server: NOT RUNNING")
842
-
882
+
843
883
  # Show metrics
844
884
  if metrics_file.exists():
845
885
  try:
846
- with open(metrics_file, 'r') as f:
886
+ with open(metrics_file) as f:
847
887
  metrics = json.load(f)
848
-
888
+
849
889
  print("\n📊 Metrics:")
850
890
  print(f" Status: {metrics.get('status', 'unknown')}")
851
891
  print(f" Uptime: {metrics.get('uptime_seconds', 0)} seconds")
@@ -853,15 +893,15 @@ def status_daemon():
853
893
  print(f" Failures: {metrics.get('total_failures', 0)}")
854
894
  print(f" Health Checks Passed: {metrics.get('health_checks_passed', 0)}")
855
895
  print(f" Health Checks Failed: {metrics.get('health_checks_failed', 0)}")
856
-
857
- if metrics.get('last_failure'):
896
+
897
+ if metrics.get("last_failure"):
858
898
  print(f" Last Failure: {metrics['last_failure']}")
859
- if metrics.get('last_health_check'):
899
+ if metrics.get("last_health_check"):
860
900
  print(f" Last Health Check: {metrics['last_health_check']}")
861
-
901
+
862
902
  except Exception as e:
863
903
  print(f"\n❌ Could not read metrics: {e}")
864
-
904
+
865
905
  print("\n🔧 Configuration:")
866
906
  print(f" Max Retries: {Config.MAX_RETRIES}")
867
907
  print(f" Health Check Interval: {Config.HEALTH_CHECK_INTERVAL}s")
@@ -874,9 +914,9 @@ def main():
874
914
  if len(sys.argv) < 2:
875
915
  print("Usage: socketio-daemon-hardened.py {start|stop|restart|status}")
876
916
  sys.exit(1)
877
-
917
+
878
918
  command = sys.argv[1]
879
-
919
+
880
920
  if command == "start":
881
921
  start_daemon()
882
922
  elif command == "stop":
@@ -894,4 +934,4 @@ def main():
894
934
 
895
935
 
896
936
  if __name__ == "__main__":
897
- main()
937
+ main()