claude-mpm 3.9.11__py3-none-any.whl → 4.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (419) hide show
  1. claude_mpm/VERSION +1 -1
  2. claude_mpm/__init__.py +2 -2
  3. claude_mpm/__main__.py +3 -2
  4. claude_mpm/agents/__init__.py +85 -79
  5. claude_mpm/agents/agent_loader.py +464 -1003
  6. claude_mpm/agents/agent_loader_integration.py +45 -45
  7. claude_mpm/agents/agents_metadata.py +29 -30
  8. claude_mpm/agents/async_agent_loader.py +156 -138
  9. claude_mpm/agents/base_agent.json +1 -1
  10. claude_mpm/agents/base_agent_loader.py +179 -151
  11. claude_mpm/agents/frontmatter_validator.py +229 -130
  12. claude_mpm/agents/schema/agent_schema.json +1 -1
  13. claude_mpm/agents/system_agent_config.py +213 -147
  14. claude_mpm/agents/templates/__init__.py +13 -13
  15. claude_mpm/agents/templates/code_analyzer.json +2 -2
  16. claude_mpm/agents/templates/data_engineer.json +1 -1
  17. claude_mpm/agents/templates/documentation.json +23 -11
  18. claude_mpm/agents/templates/engineer.json +22 -6
  19. claude_mpm/agents/templates/memory_manager.json +1 -1
  20. claude_mpm/agents/templates/ops.json +2 -2
  21. claude_mpm/agents/templates/project_organizer.json +1 -1
  22. claude_mpm/agents/templates/qa.json +1 -1
  23. claude_mpm/agents/templates/refactoring_engineer.json +222 -0
  24. claude_mpm/agents/templates/research.json +20 -14
  25. claude_mpm/agents/templates/security.json +1 -1
  26. claude_mpm/agents/templates/ticketing.json +1 -1
  27. claude_mpm/agents/templates/version_control.json +1 -1
  28. claude_mpm/agents/templates/web_qa.json +3 -1
  29. claude_mpm/agents/templates/web_ui.json +2 -2
  30. claude_mpm/cli/__init__.py +79 -51
  31. claude_mpm/cli/__main__.py +3 -2
  32. claude_mpm/cli/commands/__init__.py +20 -20
  33. claude_mpm/cli/commands/agents.py +279 -247
  34. claude_mpm/cli/commands/aggregate.py +138 -157
  35. claude_mpm/cli/commands/cleanup.py +147 -147
  36. claude_mpm/cli/commands/config.py +93 -76
  37. claude_mpm/cli/commands/info.py +17 -16
  38. claude_mpm/cli/commands/mcp.py +140 -905
  39. claude_mpm/cli/commands/mcp_command_router.py +139 -0
  40. claude_mpm/cli/commands/mcp_config_commands.py +20 -0
  41. claude_mpm/cli/commands/mcp_install_commands.py +20 -0
  42. claude_mpm/cli/commands/mcp_server_commands.py +175 -0
  43. claude_mpm/cli/commands/mcp_tool_commands.py +34 -0
  44. claude_mpm/cli/commands/memory.py +239 -203
  45. claude_mpm/cli/commands/monitor.py +203 -81
  46. claude_mpm/cli/commands/run.py +380 -429
  47. claude_mpm/cli/commands/run_config_checker.py +160 -0
  48. claude_mpm/cli/commands/socketio_monitor.py +235 -0
  49. claude_mpm/cli/commands/tickets.py +305 -197
  50. claude_mpm/cli/parser.py +24 -1156
  51. claude_mpm/cli/parsers/__init__.py +29 -0
  52. claude_mpm/cli/parsers/agents_parser.py +136 -0
  53. claude_mpm/cli/parsers/base_parser.py +331 -0
  54. claude_mpm/cli/parsers/config_parser.py +85 -0
  55. claude_mpm/cli/parsers/mcp_parser.py +152 -0
  56. claude_mpm/cli/parsers/memory_parser.py +138 -0
  57. claude_mpm/cli/parsers/monitor_parser.py +104 -0
  58. claude_mpm/cli/parsers/run_parser.py +147 -0
  59. claude_mpm/cli/parsers/tickets_parser.py +203 -0
  60. claude_mpm/cli/ticket_cli.py +7 -3
  61. claude_mpm/cli/utils.py +55 -37
  62. claude_mpm/cli_module/__init__.py +6 -6
  63. claude_mpm/cli_module/args.py +188 -140
  64. claude_mpm/cli_module/commands.py +79 -70
  65. claude_mpm/cli_module/migration_example.py +38 -60
  66. claude_mpm/config/__init__.py +32 -25
  67. claude_mpm/config/agent_config.py +151 -119
  68. claude_mpm/config/experimental_features.py +71 -73
  69. claude_mpm/config/paths.py +94 -208
  70. claude_mpm/config/socketio_config.py +84 -73
  71. claude_mpm/constants.py +35 -18
  72. claude_mpm/core/__init__.py +9 -6
  73. claude_mpm/core/agent_name_normalizer.py +68 -71
  74. claude_mpm/core/agent_registry.py +372 -521
  75. claude_mpm/core/agent_session_manager.py +74 -63
  76. claude_mpm/core/base_service.py +116 -87
  77. claude_mpm/core/cache.py +119 -153
  78. claude_mpm/core/claude_runner.py +425 -1120
  79. claude_mpm/core/config.py +263 -168
  80. claude_mpm/core/config_aliases.py +69 -61
  81. claude_mpm/core/config_constants.py +292 -0
  82. claude_mpm/core/constants.py +57 -99
  83. claude_mpm/core/container.py +211 -178
  84. claude_mpm/core/exceptions.py +233 -89
  85. claude_mpm/core/factories.py +92 -54
  86. claude_mpm/core/framework_loader.py +378 -220
  87. claude_mpm/core/hook_manager.py +198 -83
  88. claude_mpm/core/hook_performance_config.py +136 -0
  89. claude_mpm/core/injectable_service.py +61 -55
  90. claude_mpm/core/interactive_session.py +165 -155
  91. claude_mpm/core/interfaces.py +221 -195
  92. claude_mpm/core/lazy.py +96 -96
  93. claude_mpm/core/logger.py +133 -107
  94. claude_mpm/core/logging_config.py +185 -157
  95. claude_mpm/core/minimal_framework_loader.py +20 -15
  96. claude_mpm/core/mixins.py +30 -29
  97. claude_mpm/core/oneshot_session.py +215 -181
  98. claude_mpm/core/optimized_agent_loader.py +134 -138
  99. claude_mpm/core/optimized_startup.py +159 -157
  100. claude_mpm/core/pm_hook_interceptor.py +85 -72
  101. claude_mpm/core/service_registry.py +103 -101
  102. claude_mpm/core/session_manager.py +97 -87
  103. claude_mpm/core/socketio_pool.py +212 -158
  104. claude_mpm/core/tool_access_control.py +58 -51
  105. claude_mpm/core/types.py +46 -24
  106. claude_mpm/core/typing_utils.py +166 -82
  107. claude_mpm/core/unified_agent_registry.py +721 -0
  108. claude_mpm/core/unified_config.py +550 -0
  109. claude_mpm/core/unified_paths.py +549 -0
  110. claude_mpm/dashboard/index.html +1 -1
  111. claude_mpm/dashboard/open_dashboard.py +51 -17
  112. claude_mpm/dashboard/static/css/dashboard.css +27 -8
  113. claude_mpm/dashboard/static/dist/components/agent-inference.js +2 -0
  114. claude_mpm/dashboard/static/dist/components/event-processor.js +2 -0
  115. claude_mpm/dashboard/static/dist/components/event-viewer.js +2 -0
  116. claude_mpm/dashboard/static/dist/components/export-manager.js +2 -0
  117. claude_mpm/dashboard/static/dist/components/file-tool-tracker.js +2 -0
  118. claude_mpm/dashboard/static/dist/components/hud-library-loader.js +2 -0
  119. claude_mpm/dashboard/static/dist/components/hud-manager.js +2 -0
  120. claude_mpm/dashboard/static/dist/components/hud-visualizer.js +2 -0
  121. claude_mpm/dashboard/static/dist/components/module-viewer.js +2 -0
  122. claude_mpm/dashboard/static/dist/components/session-manager.js +2 -0
  123. claude_mpm/dashboard/static/dist/components/socket-manager.js +2 -0
  124. claude_mpm/dashboard/static/dist/components/ui-state-manager.js +2 -0
  125. claude_mpm/dashboard/static/dist/components/working-directory.js +2 -0
  126. claude_mpm/dashboard/static/dist/dashboard.js +2 -0
  127. claude_mpm/dashboard/static/dist/socket-client.js +2 -0
  128. claude_mpm/dashboard/static/js/components/agent-inference.js +80 -76
  129. claude_mpm/dashboard/static/js/components/event-processor.js +71 -67
  130. claude_mpm/dashboard/static/js/components/event-viewer.js +74 -70
  131. claude_mpm/dashboard/static/js/components/export-manager.js +31 -28
  132. claude_mpm/dashboard/static/js/components/file-tool-tracker.js +106 -92
  133. claude_mpm/dashboard/static/js/components/hud-library-loader.js +11 -11
  134. claude_mpm/dashboard/static/js/components/hud-manager.js +73 -73
  135. claude_mpm/dashboard/static/js/components/hud-visualizer.js +163 -163
  136. claude_mpm/dashboard/static/js/components/module-viewer.js +305 -233
  137. claude_mpm/dashboard/static/js/components/session-manager.js +32 -29
  138. claude_mpm/dashboard/static/js/components/socket-manager.js +27 -20
  139. claude_mpm/dashboard/static/js/components/ui-state-manager.js +21 -18
  140. claude_mpm/dashboard/static/js/components/working-directory.js +74 -71
  141. claude_mpm/dashboard/static/js/dashboard.js +178 -453
  142. claude_mpm/dashboard/static/js/extension-error-handler.js +164 -0
  143. claude_mpm/dashboard/static/js/socket-client.js +120 -54
  144. claude_mpm/dashboard/templates/index.html +40 -50
  145. claude_mpm/experimental/cli_enhancements.py +60 -58
  146. claude_mpm/generators/__init__.py +1 -1
  147. claude_mpm/generators/agent_profile_generator.py +75 -65
  148. claude_mpm/hooks/__init__.py +1 -1
  149. claude_mpm/hooks/base_hook.py +33 -28
  150. claude_mpm/hooks/claude_hooks/__init__.py +1 -1
  151. claude_mpm/hooks/claude_hooks/connection_pool.py +120 -0
  152. claude_mpm/hooks/claude_hooks/event_handlers.py +743 -0
  153. claude_mpm/hooks/claude_hooks/hook_handler.py +415 -1331
  154. claude_mpm/hooks/claude_hooks/hook_wrapper.sh +4 -4
  155. claude_mpm/hooks/claude_hooks/memory_integration.py +221 -0
  156. claude_mpm/hooks/claude_hooks/response_tracking.py +348 -0
  157. claude_mpm/hooks/claude_hooks/tool_analysis.py +230 -0
  158. claude_mpm/hooks/memory_integration_hook.py +140 -100
  159. claude_mpm/hooks/tool_call_interceptor.py +89 -76
  160. claude_mpm/hooks/validation_hooks.py +57 -49
  161. claude_mpm/init.py +145 -121
  162. claude_mpm/models/__init__.py +9 -9
  163. claude_mpm/models/agent_definition.py +33 -23
  164. claude_mpm/models/agent_session.py +228 -200
  165. claude_mpm/scripts/__init__.py +1 -1
  166. claude_mpm/scripts/socketio_daemon.py +192 -75
  167. claude_mpm/scripts/socketio_server_manager.py +328 -0
  168. claude_mpm/scripts/start_activity_logging.py +25 -22
  169. claude_mpm/services/__init__.py +68 -43
  170. claude_mpm/services/agent_capabilities_service.py +271 -0
  171. claude_mpm/services/agents/__init__.py +23 -32
  172. claude_mpm/services/agents/deployment/__init__.py +3 -3
  173. claude_mpm/services/agents/deployment/agent_config_provider.py +310 -0
  174. claude_mpm/services/agents/deployment/agent_configuration_manager.py +359 -0
  175. claude_mpm/services/agents/deployment/agent_definition_factory.py +84 -0
  176. claude_mpm/services/agents/deployment/agent_deployment.py +415 -2113
  177. claude_mpm/services/agents/deployment/agent_discovery_service.py +387 -0
  178. claude_mpm/services/agents/deployment/agent_environment_manager.py +293 -0
  179. claude_mpm/services/agents/deployment/agent_filesystem_manager.py +387 -0
  180. claude_mpm/services/agents/deployment/agent_format_converter.py +453 -0
  181. claude_mpm/services/agents/deployment/agent_frontmatter_validator.py +161 -0
  182. claude_mpm/services/agents/deployment/agent_lifecycle_manager.py +345 -495
  183. claude_mpm/services/agents/deployment/agent_metrics_collector.py +279 -0
  184. claude_mpm/services/agents/deployment/agent_restore_handler.py +88 -0
  185. claude_mpm/services/agents/deployment/agent_template_builder.py +406 -0
  186. claude_mpm/services/agents/deployment/agent_validator.py +352 -0
  187. claude_mpm/services/agents/deployment/agent_version_manager.py +313 -0
  188. claude_mpm/services/agents/deployment/agent_versioning.py +6 -9
  189. claude_mpm/services/agents/deployment/agents_directory_resolver.py +79 -0
  190. claude_mpm/services/agents/deployment/async_agent_deployment.py +298 -234
  191. claude_mpm/services/agents/deployment/config/__init__.py +13 -0
  192. claude_mpm/services/agents/deployment/config/deployment_config.py +182 -0
  193. claude_mpm/services/agents/deployment/config/deployment_config_manager.py +200 -0
  194. claude_mpm/services/agents/deployment/deployment_config_loader.py +54 -0
  195. claude_mpm/services/agents/deployment/deployment_type_detector.py +124 -0
  196. claude_mpm/services/agents/deployment/facade/__init__.py +18 -0
  197. claude_mpm/services/agents/deployment/facade/async_deployment_executor.py +159 -0
  198. claude_mpm/services/agents/deployment/facade/deployment_executor.py +73 -0
  199. claude_mpm/services/agents/deployment/facade/deployment_facade.py +270 -0
  200. claude_mpm/services/agents/deployment/facade/sync_deployment_executor.py +178 -0
  201. claude_mpm/services/agents/deployment/interface_adapter.py +227 -0
  202. claude_mpm/services/agents/deployment/lifecycle_health_checker.py +85 -0
  203. claude_mpm/services/agents/deployment/lifecycle_performance_tracker.py +100 -0
  204. claude_mpm/services/agents/deployment/pipeline/__init__.py +32 -0
  205. claude_mpm/services/agents/deployment/pipeline/pipeline_builder.py +158 -0
  206. claude_mpm/services/agents/deployment/pipeline/pipeline_context.py +159 -0
  207. claude_mpm/services/agents/deployment/pipeline/pipeline_executor.py +169 -0
  208. claude_mpm/services/agents/deployment/pipeline/steps/__init__.py +19 -0
  209. claude_mpm/services/agents/deployment/pipeline/steps/agent_processing_step.py +195 -0
  210. claude_mpm/services/agents/deployment/pipeline/steps/base_step.py +119 -0
  211. claude_mpm/services/agents/deployment/pipeline/steps/configuration_step.py +79 -0
  212. claude_mpm/services/agents/deployment/pipeline/steps/target_directory_step.py +90 -0
  213. claude_mpm/services/agents/deployment/pipeline/steps/validation_step.py +100 -0
  214. claude_mpm/services/agents/deployment/processors/__init__.py +15 -0
  215. claude_mpm/services/agents/deployment/processors/agent_deployment_context.py +98 -0
  216. claude_mpm/services/agents/deployment/processors/agent_deployment_result.py +235 -0
  217. claude_mpm/services/agents/deployment/processors/agent_processor.py +258 -0
  218. claude_mpm/services/agents/deployment/refactored_agent_deployment_service.py +318 -0
  219. claude_mpm/services/agents/deployment/results/__init__.py +13 -0
  220. claude_mpm/services/agents/deployment/results/deployment_metrics.py +200 -0
  221. claude_mpm/services/agents/deployment/results/deployment_result_builder.py +249 -0
  222. claude_mpm/services/agents/deployment/strategies/__init__.py +25 -0
  223. claude_mpm/services/agents/deployment/strategies/base_strategy.py +119 -0
  224. claude_mpm/services/agents/deployment/strategies/project_strategy.py +150 -0
  225. claude_mpm/services/agents/deployment/strategies/strategy_selector.py +117 -0
  226. claude_mpm/services/agents/deployment/strategies/system_strategy.py +116 -0
  227. claude_mpm/services/agents/deployment/strategies/user_strategy.py +137 -0
  228. claude_mpm/services/agents/deployment/system_instructions_deployer.py +108 -0
  229. claude_mpm/services/agents/deployment/validation/__init__.py +19 -0
  230. claude_mpm/services/agents/deployment/validation/agent_validator.py +323 -0
  231. claude_mpm/services/agents/deployment/validation/deployment_validator.py +238 -0
  232. claude_mpm/services/agents/deployment/validation/template_validator.py +299 -0
  233. claude_mpm/services/agents/deployment/validation/validation_result.py +226 -0
  234. claude_mpm/services/agents/loading/__init__.py +2 -2
  235. claude_mpm/services/agents/loading/agent_profile_loader.py +259 -229
  236. claude_mpm/services/agents/loading/base_agent_manager.py +90 -81
  237. claude_mpm/services/agents/loading/framework_agent_loader.py +154 -129
  238. claude_mpm/services/agents/management/__init__.py +2 -2
  239. claude_mpm/services/agents/management/agent_capabilities_generator.py +72 -58
  240. claude_mpm/services/agents/management/agent_management_service.py +209 -156
  241. claude_mpm/services/agents/memory/__init__.py +9 -6
  242. claude_mpm/services/agents/memory/agent_memory_manager.py +218 -1152
  243. claude_mpm/services/agents/memory/agent_persistence_service.py +20 -16
  244. claude_mpm/services/agents/memory/analyzer.py +430 -0
  245. claude_mpm/services/agents/memory/content_manager.py +376 -0
  246. claude_mpm/services/agents/memory/template_generator.py +468 -0
  247. claude_mpm/services/agents/registry/__init__.py +7 -10
  248. claude_mpm/services/agents/registry/deployed_agent_discovery.py +122 -97
  249. claude_mpm/services/agents/registry/modification_tracker.py +351 -285
  250. claude_mpm/services/async_session_logger.py +187 -153
  251. claude_mpm/services/claude_session_logger.py +87 -72
  252. claude_mpm/services/command_handler_service.py +217 -0
  253. claude_mpm/services/communication/__init__.py +3 -2
  254. claude_mpm/services/core/__init__.py +50 -97
  255. claude_mpm/services/core/base.py +60 -53
  256. claude_mpm/services/core/interfaces/__init__.py +188 -0
  257. claude_mpm/services/core/interfaces/agent.py +351 -0
  258. claude_mpm/services/core/interfaces/communication.py +343 -0
  259. claude_mpm/services/core/interfaces/infrastructure.py +413 -0
  260. claude_mpm/services/core/interfaces/service.py +434 -0
  261. claude_mpm/services/core/interfaces.py +19 -944
  262. claude_mpm/services/event_aggregator.py +208 -170
  263. claude_mpm/services/exceptions.py +387 -308
  264. claude_mpm/services/framework_claude_md_generator/__init__.py +75 -79
  265. claude_mpm/services/framework_claude_md_generator/content_assembler.py +69 -60
  266. claude_mpm/services/framework_claude_md_generator/content_validator.py +65 -61
  267. claude_mpm/services/framework_claude_md_generator/deployment_manager.py +68 -49
  268. claude_mpm/services/framework_claude_md_generator/section_generators/__init__.py +34 -34
  269. claude_mpm/services/framework_claude_md_generator/section_generators/agents.py +25 -22
  270. claude_mpm/services/framework_claude_md_generator/section_generators/claude_pm_init.py +10 -10
  271. claude_mpm/services/framework_claude_md_generator/section_generators/core_responsibilities.py +4 -3
  272. claude_mpm/services/framework_claude_md_generator/section_generators/delegation_constraints.py +4 -3
  273. claude_mpm/services/framework_claude_md_generator/section_generators/environment_config.py +4 -3
  274. claude_mpm/services/framework_claude_md_generator/section_generators/footer.py +6 -5
  275. claude_mpm/services/framework_claude_md_generator/section_generators/header.py +8 -7
  276. claude_mpm/services/framework_claude_md_generator/section_generators/orchestration_principles.py +4 -3
  277. claude_mpm/services/framework_claude_md_generator/section_generators/role_designation.py +6 -5
  278. claude_mpm/services/framework_claude_md_generator/section_generators/subprocess_validation.py +9 -8
  279. claude_mpm/services/framework_claude_md_generator/section_generators/todo_task_tools.py +4 -3
  280. claude_mpm/services/framework_claude_md_generator/section_generators/troubleshooting.py +5 -4
  281. claude_mpm/services/framework_claude_md_generator/section_manager.py +28 -27
  282. claude_mpm/services/framework_claude_md_generator/version_manager.py +30 -28
  283. claude_mpm/services/hook_service.py +106 -114
  284. claude_mpm/services/infrastructure/__init__.py +7 -5
  285. claude_mpm/services/infrastructure/context_preservation.py +233 -199
  286. claude_mpm/services/infrastructure/daemon_manager.py +279 -0
  287. claude_mpm/services/infrastructure/logging.py +83 -76
  288. claude_mpm/services/infrastructure/monitoring.py +547 -404
  289. claude_mpm/services/mcp_gateway/__init__.py +30 -13
  290. claude_mpm/services/mcp_gateway/config/__init__.py +2 -2
  291. claude_mpm/services/mcp_gateway/config/config_loader.py +61 -56
  292. claude_mpm/services/mcp_gateway/config/config_schema.py +50 -41
  293. claude_mpm/services/mcp_gateway/config/configuration.py +82 -75
  294. claude_mpm/services/mcp_gateway/core/__init__.py +13 -20
  295. claude_mpm/services/mcp_gateway/core/base.py +80 -67
  296. claude_mpm/services/mcp_gateway/core/exceptions.py +60 -46
  297. claude_mpm/services/mcp_gateway/core/interfaces.py +87 -84
  298. claude_mpm/services/mcp_gateway/main.py +287 -137
  299. claude_mpm/services/mcp_gateway/registry/__init__.py +1 -1
  300. claude_mpm/services/mcp_gateway/registry/service_registry.py +97 -94
  301. claude_mpm/services/mcp_gateway/registry/tool_registry.py +135 -126
  302. claude_mpm/services/mcp_gateway/server/__init__.py +2 -2
  303. claude_mpm/services/mcp_gateway/server/mcp_gateway.py +105 -110
  304. claude_mpm/services/mcp_gateway/server/stdio_handler.py +105 -107
  305. claude_mpm/services/mcp_gateway/server/stdio_server.py +691 -0
  306. claude_mpm/services/mcp_gateway/tools/__init__.py +4 -2
  307. claude_mpm/services/mcp_gateway/tools/base_adapter.py +109 -119
  308. claude_mpm/services/mcp_gateway/tools/document_summarizer.py +283 -215
  309. claude_mpm/services/mcp_gateway/tools/hello_world.py +122 -120
  310. claude_mpm/services/mcp_gateway/tools/ticket_tools.py +652 -0
  311. claude_mpm/services/mcp_gateway/tools/unified_ticket_tool.py +606 -0
  312. claude_mpm/services/memory/__init__.py +2 -2
  313. claude_mpm/services/memory/builder.py +451 -362
  314. claude_mpm/services/memory/cache/__init__.py +2 -2
  315. claude_mpm/services/memory/cache/shared_prompt_cache.py +232 -194
  316. claude_mpm/services/memory/cache/simple_cache.py +107 -93
  317. claude_mpm/services/memory/indexed_memory.py +195 -193
  318. claude_mpm/services/memory/optimizer.py +267 -234
  319. claude_mpm/services/memory/router.py +571 -263
  320. claude_mpm/services/memory_hook_service.py +237 -0
  321. claude_mpm/services/port_manager.py +223 -0
  322. claude_mpm/services/project/__init__.py +3 -3
  323. claude_mpm/services/project/analyzer.py +451 -305
  324. claude_mpm/services/project/registry.py +262 -240
  325. claude_mpm/services/recovery_manager.py +287 -231
  326. claude_mpm/services/response_tracker.py +87 -67
  327. claude_mpm/services/runner_configuration_service.py +587 -0
  328. claude_mpm/services/session_management_service.py +304 -0
  329. claude_mpm/services/socketio/__init__.py +4 -4
  330. claude_mpm/services/socketio/client_proxy.py +174 -0
  331. claude_mpm/services/socketio/handlers/__init__.py +3 -3
  332. claude_mpm/services/socketio/handlers/base.py +44 -30
  333. claude_mpm/services/socketio/handlers/connection.py +145 -65
  334. claude_mpm/services/socketio/handlers/file.py +123 -108
  335. claude_mpm/services/socketio/handlers/git.py +607 -373
  336. claude_mpm/services/socketio/handlers/hook.py +170 -0
  337. claude_mpm/services/socketio/handlers/memory.py +4 -4
  338. claude_mpm/services/socketio/handlers/project.py +4 -4
  339. claude_mpm/services/socketio/handlers/registry.py +53 -38
  340. claude_mpm/services/socketio/server/__init__.py +18 -0
  341. claude_mpm/services/socketio/server/broadcaster.py +252 -0
  342. claude_mpm/services/socketio/server/core.py +399 -0
  343. claude_mpm/services/socketio/server/main.py +323 -0
  344. claude_mpm/services/socketio_client_manager.py +160 -133
  345. claude_mpm/services/socketio_server.py +36 -1885
  346. claude_mpm/services/subprocess_launcher_service.py +316 -0
  347. claude_mpm/services/system_instructions_service.py +258 -0
  348. claude_mpm/services/ticket_manager.py +19 -533
  349. claude_mpm/services/utility_service.py +285 -0
  350. claude_mpm/services/version_control/__init__.py +18 -21
  351. claude_mpm/services/version_control/branch_strategy.py +20 -10
  352. claude_mpm/services/version_control/conflict_resolution.py +37 -13
  353. claude_mpm/services/version_control/git_operations.py +52 -21
  354. claude_mpm/services/version_control/semantic_versioning.py +92 -53
  355. claude_mpm/services/version_control/version_parser.py +145 -125
  356. claude_mpm/services/version_service.py +270 -0
  357. claude_mpm/storage/__init__.py +2 -2
  358. claude_mpm/storage/state_storage.py +177 -181
  359. claude_mpm/ticket_wrapper.py +2 -2
  360. claude_mpm/utils/__init__.py +2 -2
  361. claude_mpm/utils/agent_dependency_loader.py +453 -243
  362. claude_mpm/utils/config_manager.py +157 -118
  363. claude_mpm/utils/console.py +1 -1
  364. claude_mpm/utils/dependency_cache.py +102 -107
  365. claude_mpm/utils/dependency_manager.py +52 -47
  366. claude_mpm/utils/dependency_strategies.py +131 -96
  367. claude_mpm/utils/environment_context.py +110 -102
  368. claude_mpm/utils/error_handler.py +75 -55
  369. claude_mpm/utils/file_utils.py +80 -67
  370. claude_mpm/utils/framework_detection.py +12 -11
  371. claude_mpm/utils/import_migration_example.py +12 -60
  372. claude_mpm/utils/imports.py +48 -45
  373. claude_mpm/utils/path_operations.py +100 -93
  374. claude_mpm/utils/robust_installer.py +172 -164
  375. claude_mpm/utils/session_logging.py +30 -23
  376. claude_mpm/utils/subprocess_utils.py +99 -61
  377. claude_mpm/validation/__init__.py +1 -1
  378. claude_mpm/validation/agent_validator.py +151 -111
  379. claude_mpm/validation/frontmatter_validator.py +92 -71
  380. {claude_mpm-3.9.11.dist-info → claude_mpm-4.0.3.dist-info}/METADATA +27 -1
  381. claude_mpm-4.0.3.dist-info/RECORD +402 -0
  382. {claude_mpm-3.9.11.dist-info → claude_mpm-4.0.3.dist-info}/entry_points.txt +1 -0
  383. {claude_mpm-3.9.11.dist-info → claude_mpm-4.0.3.dist-info}/licenses/LICENSE +1 -1
  384. claude_mpm/cli/commands/run_guarded.py +0 -511
  385. claude_mpm/config/memory_guardian_config.py +0 -325
  386. claude_mpm/config/memory_guardian_yaml.py +0 -335
  387. claude_mpm/core/config_paths.py +0 -150
  388. claude_mpm/core/memory_aware_runner.py +0 -353
  389. claude_mpm/dashboard/static/js/dashboard-original.js +0 -4134
  390. claude_mpm/deployment_paths.py +0 -261
  391. claude_mpm/hooks/claude_hooks/hook_handler_fixed.py +0 -454
  392. claude_mpm/models/state_models.py +0 -433
  393. claude_mpm/services/agent/__init__.py +0 -24
  394. claude_mpm/services/agent/deployment.py +0 -2548
  395. claude_mpm/services/agent/management.py +0 -598
  396. claude_mpm/services/agent/registry.py +0 -813
  397. claude_mpm/services/agents/registry/agent_registry.py +0 -813
  398. claude_mpm/services/communication/socketio.py +0 -1935
  399. claude_mpm/services/communication/websocket.py +0 -479
  400. claude_mpm/services/framework_claude_md_generator.py +0 -624
  401. claude_mpm/services/health_monitor.py +0 -893
  402. claude_mpm/services/infrastructure/graceful_degradation.py +0 -616
  403. claude_mpm/services/infrastructure/health_monitor.py +0 -775
  404. claude_mpm/services/infrastructure/memory_dashboard.py +0 -479
  405. claude_mpm/services/infrastructure/memory_guardian.py +0 -944
  406. claude_mpm/services/infrastructure/restart_protection.py +0 -642
  407. claude_mpm/services/infrastructure/state_manager.py +0 -774
  408. claude_mpm/services/mcp_gateway/manager.py +0 -334
  409. claude_mpm/services/optimized_hook_service.py +0 -542
  410. claude_mpm/services/project_analyzer.py +0 -864
  411. claude_mpm/services/project_registry.py +0 -608
  412. claude_mpm/services/standalone_socketio_server.py +0 -1300
  413. claude_mpm/services/ticket_manager_di.py +0 -318
  414. claude_mpm/services/ticketing_service_original.py +0 -510
  415. claude_mpm/utils/paths.py +0 -395
  416. claude_mpm/utils/platform_memory.py +0 -524
  417. claude_mpm-3.9.11.dist-info/RECORD +0 -306
  418. {claude_mpm-3.9.11.dist-info → claude_mpm-4.0.3.dist-info}/WHEEL +0 -0
  419. {claude_mpm-3.9.11.dist-info → claude_mpm-4.0.3.dist-info}/top_level.txt +0 -0
@@ -16,21 +16,21 @@ Design Principles:
16
16
 
17
17
  import asyncio
18
18
  import logging
19
- import time
19
+ import socket
20
20
  import threading
21
+ import time
21
22
  from abc import ABC, abstractmethod
22
23
  from collections import deque
23
- from dataclasses import dataclass, asdict
24
+ from dataclasses import asdict, dataclass
24
25
  from datetime import datetime, timezone
25
26
  from enum import Enum
26
- from pathlib import Path
27
- from typing import Any, Dict, List, Optional, Callable, Union
28
- import json
29
- import socket
27
+ from typing import Any, Callable, Dict, List, Optional, Union
28
+
30
29
  from claude_mpm.core.constants import ResourceLimits, TimeoutConfig
31
30
 
32
31
  try:
33
32
  import psutil
33
+
34
34
  PSUTIL_AVAILABLE = True
35
35
  except ImportError:
36
36
  PSUTIL_AVAILABLE = False
@@ -39,6 +39,7 @@ except ImportError:
39
39
 
40
40
  class HealthStatus(Enum):
41
41
  """Health status levels for monitoring."""
42
+
42
43
  HEALTHY = "healthy"
43
44
  WARNING = "warning"
44
45
  CRITICAL = "critical"
@@ -48,6 +49,7 @@ class HealthStatus(Enum):
48
49
  @dataclass
49
50
  class HealthMetric:
50
51
  """Individual health metric data structure."""
52
+
51
53
  name: str
52
54
  value: Union[int, float, str, bool]
53
55
  status: HealthStatus
@@ -55,60 +57,71 @@ class HealthMetric:
55
57
  unit: Optional[str] = None
56
58
  timestamp: float = None
57
59
  message: Optional[str] = None
58
-
60
+
59
61
  def __post_init__(self):
60
62
  if self.timestamp is None:
61
63
  self.timestamp = time.time()
62
-
64
+
63
65
  def to_dict(self) -> Dict[str, Any]:
64
66
  """Convert metric to dictionary format."""
65
67
  result = asdict(self)
66
- result['status'] = self.status.value
67
- result['timestamp_iso'] = datetime.fromtimestamp(self.timestamp, timezone.utc).isoformat()
68
+ result["status"] = self.status.value
69
+ result["timestamp_iso"] = datetime.fromtimestamp(
70
+ self.timestamp, timezone.utc
71
+ ).isoformat()
68
72
  return result
69
73
 
70
74
 
71
75
  @dataclass
72
76
  class HealthCheckResult:
73
77
  """Result of a health check operation."""
78
+
74
79
  overall_status: HealthStatus
75
80
  metrics: List[HealthMetric]
76
81
  timestamp: float
77
82
  duration_ms: float
78
83
  errors: List[str]
79
-
84
+
80
85
  def __post_init__(self):
81
- if not hasattr(self, 'timestamp') or self.timestamp is None:
86
+ if not hasattr(self, "timestamp") or self.timestamp is None:
82
87
  self.timestamp = time.time()
83
-
88
+
84
89
  def to_dict(self) -> Dict[str, Any]:
85
90
  """Convert health check result to dictionary format."""
86
91
  return {
87
- 'overall_status': self.overall_status.value,
88
- 'metrics': [metric.to_dict() for metric in self.metrics],
89
- 'timestamp': self.timestamp,
90
- 'timestamp_iso': datetime.fromtimestamp(self.timestamp, timezone.utc).isoformat(),
91
- 'duration_ms': self.duration_ms,
92
- 'errors': self.errors,
93
- 'metric_count': len(self.metrics),
94
- 'healthy_metrics': len([m for m in self.metrics if m.status == HealthStatus.HEALTHY]),
95
- 'warning_metrics': len([m for m in self.metrics if m.status == HealthStatus.WARNING]),
96
- 'critical_metrics': len([m for m in self.metrics if m.status == HealthStatus.CRITICAL])
92
+ "overall_status": self.overall_status.value,
93
+ "metrics": [metric.to_dict() for metric in self.metrics],
94
+ "timestamp": self.timestamp,
95
+ "timestamp_iso": datetime.fromtimestamp(
96
+ self.timestamp, timezone.utc
97
+ ).isoformat(),
98
+ "duration_ms": self.duration_ms,
99
+ "errors": self.errors,
100
+ "metric_count": len(self.metrics),
101
+ "healthy_metrics": len(
102
+ [m for m in self.metrics if m.status == HealthStatus.HEALTHY]
103
+ ),
104
+ "warning_metrics": len(
105
+ [m for m in self.metrics if m.status == HealthStatus.WARNING]
106
+ ),
107
+ "critical_metrics": len(
108
+ [m for m in self.metrics if m.status == HealthStatus.CRITICAL]
109
+ ),
97
110
  }
98
111
 
99
112
 
100
113
  class HealthChecker(ABC):
101
114
  """Abstract base class for health checkers.
102
-
115
+
103
116
  Health checkers implement specific monitoring logic for different aspects
104
117
  of the system (process resources, network connectivity, service health, etc.).
105
118
  """
106
-
119
+
107
120
  @abstractmethod
108
121
  def get_name(self) -> str:
109
122
  """Get the name of this health checker."""
110
123
  pass
111
-
124
+
112
125
  @abstractmethod
113
126
  async def check_health(self) -> List[HealthMetric]:
114
127
  """Perform health check and return metrics."""
@@ -117,7 +130,7 @@ class HealthChecker(ABC):
117
130
 
118
131
  class ProcessResourceChecker(HealthChecker):
119
132
  """Health checker for process resource usage.
120
-
133
+
121
134
  Monitors:
122
135
  - CPU usage percentage
123
136
  - Memory usage (RSS, VMS)
@@ -125,11 +138,16 @@ class ProcessResourceChecker(HealthChecker):
125
138
  - Thread count
126
139
  - Process status
127
140
  """
128
-
129
- def __init__(self, pid: int, cpu_threshold: float = 80.0,
130
- memory_threshold_mb: int = 500, fd_threshold: int = 1000):
141
+
142
+ def __init__(
143
+ self,
144
+ pid: int,
145
+ cpu_threshold: float = 80.0,
146
+ memory_threshold_mb: int = 500,
147
+ fd_threshold: int = 1000,
148
+ ):
131
149
  """Initialize process resource checker.
132
-
150
+
133
151
  Args:
134
152
  pid: Process ID to monitor
135
153
  cpu_threshold: CPU usage threshold as percentage
@@ -142,197 +160,251 @@ class ProcessResourceChecker(HealthChecker):
142
160
  self.fd_threshold = fd_threshold
143
161
  self.process = None
144
162
  self.logger = logging.getLogger(f"{__name__}.ProcessResourceChecker")
145
-
163
+
146
164
  if PSUTIL_AVAILABLE:
147
165
  try:
148
166
  self.process = psutil.Process(pid)
149
167
  except psutil.NoSuchProcess:
150
168
  self.logger.warning(f"Process {pid} not found for monitoring")
151
-
169
+
152
170
  def get_name(self) -> str:
153
171
  return f"process_resources_{self.pid}"
154
-
172
+
155
173
  async def check_health(self) -> List[HealthMetric]:
156
174
  """Check process resource usage."""
157
175
  metrics = []
158
-
176
+
159
177
  if not PSUTIL_AVAILABLE:
160
- metrics.append(HealthMetric(
161
- name="psutil_availability",
162
- value=False,
163
- status=HealthStatus.WARNING,
164
- message="psutil not available for enhanced monitoring"
165
- ))
178
+ metrics.append(
179
+ HealthMetric(
180
+ name="psutil_availability",
181
+ value=False,
182
+ status=HealthStatus.WARNING,
183
+ message="psutil not available for enhanced monitoring",
184
+ )
185
+ )
166
186
  return metrics
167
-
187
+
168
188
  if not self.process:
169
- metrics.append(HealthMetric(
170
- name="process_exists",
171
- value=False,
172
- status=HealthStatus.CRITICAL,
173
- message=f"Process {self.pid} not found"
174
- ))
189
+ metrics.append(
190
+ HealthMetric(
191
+ name="process_exists",
192
+ value=False,
193
+ status=HealthStatus.CRITICAL,
194
+ message=f"Process {self.pid} not found",
195
+ )
196
+ )
175
197
  return metrics
176
-
198
+
177
199
  try:
178
200
  # Check if process still exists
179
201
  if not self.process.is_running():
180
- metrics.append(HealthMetric(
181
- name="process_exists",
182
- value=False,
183
- status=HealthStatus.CRITICAL,
184
- message=f"Process {self.pid} is no longer running"
185
- ))
202
+ metrics.append(
203
+ HealthMetric(
204
+ name="process_exists",
205
+ value=False,
206
+ status=HealthStatus.CRITICAL,
207
+ message=f"Process {self.pid} is no longer running",
208
+ )
209
+ )
186
210
  return metrics
187
-
211
+
188
212
  # Process status
189
213
  status = self.process.status()
190
- process_healthy = status not in [psutil.STATUS_ZOMBIE, psutil.STATUS_DEAD, psutil.STATUS_STOPPED]
191
- metrics.append(HealthMetric(
192
- name="process_status",
193
- value=status,
194
- status=HealthStatus.HEALTHY if process_healthy else HealthStatus.CRITICAL,
195
- message=f"Process status: {status}"
196
- ))
197
-
214
+ process_healthy = status not in [
215
+ psutil.STATUS_ZOMBIE,
216
+ psutil.STATUS_DEAD,
217
+ psutil.STATUS_STOPPED,
218
+ ]
219
+ metrics.append(
220
+ HealthMetric(
221
+ name="process_status",
222
+ value=status,
223
+ status=HealthStatus.HEALTHY
224
+ if process_healthy
225
+ else HealthStatus.CRITICAL,
226
+ message=f"Process status: {status}",
227
+ )
228
+ )
229
+
198
230
  # CPU usage
199
231
  try:
200
- cpu_percent = self.process.cpu_percent(interval=TimeoutConfig.CPU_SAMPLE_INTERVAL)
232
+ cpu_percent = self.process.cpu_percent(
233
+ interval=TimeoutConfig.CPU_SAMPLE_INTERVAL
234
+ )
201
235
  cpu_status = HealthStatus.HEALTHY
202
236
  if cpu_percent > self.cpu_threshold:
203
- cpu_status = HealthStatus.WARNING if cpu_percent < self.cpu_threshold * 1.2 else HealthStatus.CRITICAL
204
-
205
- metrics.append(HealthMetric(
206
- name="cpu_usage_percent",
207
- value=round(cpu_percent, 2),
208
- status=cpu_status,
209
- threshold=self.cpu_threshold,
210
- unit="%"
211
- ))
237
+ cpu_status = (
238
+ HealthStatus.WARNING
239
+ if cpu_percent < self.cpu_threshold * 1.2
240
+ else HealthStatus.CRITICAL
241
+ )
242
+
243
+ metrics.append(
244
+ HealthMetric(
245
+ name="cpu_usage_percent",
246
+ value=round(cpu_percent, 2),
247
+ status=cpu_status,
248
+ threshold=self.cpu_threshold,
249
+ unit="%",
250
+ )
251
+ )
212
252
  except Exception as e:
213
- metrics.append(HealthMetric(
214
- name="cpu_usage_percent",
215
- value=-1,
216
- status=HealthStatus.UNKNOWN,
217
- message=f"Failed to get CPU usage: {e}"
218
- ))
219
-
253
+ metrics.append(
254
+ HealthMetric(
255
+ name="cpu_usage_percent",
256
+ value=-1,
257
+ status=HealthStatus.UNKNOWN,
258
+ message=f"Failed to get CPU usage: {e}",
259
+ )
260
+ )
261
+
220
262
  # Memory usage
221
263
  try:
222
264
  memory_info = self.process.memory_info()
223
265
  memory_mb = memory_info.rss / ResourceLimits.BYTES_TO_MB
224
266
  memory_status = HealthStatus.HEALTHY
225
267
  if memory_mb > self.memory_threshold_mb:
226
- memory_status = HealthStatus.WARNING if memory_mb < self.memory_threshold_mb * 1.2 else HealthStatus.CRITICAL
227
-
228
- metrics.append(HealthMetric(
229
- name="memory_usage_mb",
230
- value=round(memory_mb, 2),
231
- status=memory_status,
232
- threshold=self.memory_threshold_mb,
233
- unit="MB"
234
- ))
235
-
236
- metrics.append(HealthMetric(
237
- name="memory_vms_mb",
238
- value=round(memory_info.vms / ResourceLimits.BYTES_TO_MB, 2),
239
- status=HealthStatus.HEALTHY,
240
- unit="MB"
241
- ))
268
+ memory_status = (
269
+ HealthStatus.WARNING
270
+ if memory_mb < self.memory_threshold_mb * 1.2
271
+ else HealthStatus.CRITICAL
272
+ )
273
+
274
+ metrics.append(
275
+ HealthMetric(
276
+ name="memory_usage_mb",
277
+ value=round(memory_mb, 2),
278
+ status=memory_status,
279
+ threshold=self.memory_threshold_mb,
280
+ unit="MB",
281
+ )
282
+ )
283
+
284
+ metrics.append(
285
+ HealthMetric(
286
+ name="memory_vms_mb",
287
+ value=round(memory_info.vms / ResourceLimits.BYTES_TO_MB, 2),
288
+ status=HealthStatus.HEALTHY,
289
+ unit="MB",
290
+ )
291
+ )
242
292
  except Exception as e:
243
- metrics.append(HealthMetric(
244
- name="memory_usage_mb",
245
- value=-1,
246
- status=HealthStatus.UNKNOWN,
247
- message=f"Failed to get memory usage: {e}"
248
- ))
249
-
293
+ metrics.append(
294
+ HealthMetric(
295
+ name="memory_usage_mb",
296
+ value=-1,
297
+ status=HealthStatus.UNKNOWN,
298
+ message=f"Failed to get memory usage: {e}",
299
+ )
300
+ )
301
+
250
302
  # File descriptors (Unix only)
251
- if hasattr(self.process, 'num_fds'):
303
+ if hasattr(self.process, "num_fds"):
252
304
  try:
253
305
  fd_count = self.process.num_fds()
254
306
  fd_status = HealthStatus.HEALTHY
255
307
  if fd_count > self.fd_threshold:
256
- fd_status = HealthStatus.WARNING if fd_count < self.fd_threshold * 1.2 else HealthStatus.CRITICAL
257
-
258
- metrics.append(HealthMetric(
259
- name="file_descriptors",
260
- value=fd_count,
261
- status=fd_status,
262
- threshold=self.fd_threshold
263
- ))
308
+ fd_status = (
309
+ HealthStatus.WARNING
310
+ if fd_count < self.fd_threshold * 1.2
311
+ else HealthStatus.CRITICAL
312
+ )
313
+
314
+ metrics.append(
315
+ HealthMetric(
316
+ name="file_descriptors",
317
+ value=fd_count,
318
+ status=fd_status,
319
+ threshold=self.fd_threshold,
320
+ )
321
+ )
264
322
  except Exception as e:
265
- metrics.append(HealthMetric(
266
- name="file_descriptors",
267
- value=-1,
268
- status=HealthStatus.UNKNOWN,
269
- message=f"Failed to get file descriptor count: {e}"
270
- ))
271
-
323
+ metrics.append(
324
+ HealthMetric(
325
+ name="file_descriptors",
326
+ value=-1,
327
+ status=HealthStatus.UNKNOWN,
328
+ message=f"Failed to get file descriptor count: {e}",
329
+ )
330
+ )
331
+
272
332
  # Thread count
273
333
  try:
274
334
  thread_count = self.process.num_threads()
275
- metrics.append(HealthMetric(
276
- name="thread_count",
277
- value=thread_count,
278
- status=HealthStatus.HEALTHY,
279
- ))
335
+ metrics.append(
336
+ HealthMetric(
337
+ name="thread_count",
338
+ value=thread_count,
339
+ status=HealthStatus.HEALTHY,
340
+ )
341
+ )
280
342
  except Exception as e:
281
- metrics.append(HealthMetric(
282
- name="thread_count",
283
- value=-1,
284
- status=HealthStatus.UNKNOWN,
285
- message=f"Failed to get thread count: {e}"
286
- ))
287
-
343
+ metrics.append(
344
+ HealthMetric(
345
+ name="thread_count",
346
+ value=-1,
347
+ status=HealthStatus.UNKNOWN,
348
+ message=f"Failed to get thread count: {e}",
349
+ )
350
+ )
351
+
288
352
  # Process create time (for validation)
289
353
  try:
290
354
  create_time = self.process.create_time()
291
- metrics.append(HealthMetric(
292
- name="process_start_time",
293
- value=create_time,
294
- status=HealthStatus.HEALTHY,
295
- unit="timestamp"
296
- ))
355
+ metrics.append(
356
+ HealthMetric(
357
+ name="process_start_time",
358
+ value=create_time,
359
+ status=HealthStatus.HEALTHY,
360
+ unit="timestamp",
361
+ )
362
+ )
297
363
  except Exception as e:
298
- metrics.append(HealthMetric(
299
- name="process_start_time",
300
- value=-1,
301
- status=HealthStatus.UNKNOWN,
302
- message=f"Failed to get process start time: {e}"
303
- ))
304
-
364
+ metrics.append(
365
+ HealthMetric(
366
+ name="process_start_time",
367
+ value=-1,
368
+ status=HealthStatus.UNKNOWN,
369
+ message=f"Failed to get process start time: {e}",
370
+ )
371
+ )
372
+
305
373
  except psutil.NoSuchProcess:
306
- metrics.append(HealthMetric(
307
- name="process_exists",
308
- value=False,
309
- status=HealthStatus.CRITICAL,
310
- message=f"Process {self.pid} no longer exists"
311
- ))
374
+ metrics.append(
375
+ HealthMetric(
376
+ name="process_exists",
377
+ value=False,
378
+ status=HealthStatus.CRITICAL,
379
+ message=f"Process {self.pid} no longer exists",
380
+ )
381
+ )
312
382
  except Exception as e:
313
383
  self.logger.error(f"Error checking process health: {e}")
314
- metrics.append(HealthMetric(
315
- name="process_check_error",
316
- value=str(e),
317
- status=HealthStatus.UNKNOWN,
318
- message=f"Unexpected error during process health check: {e}"
319
- ))
320
-
384
+ metrics.append(
385
+ HealthMetric(
386
+ name="process_check_error",
387
+ value=str(e),
388
+ status=HealthStatus.UNKNOWN,
389
+ message=f"Unexpected error during process health check: {e}",
390
+ )
391
+ )
392
+
321
393
  return metrics
322
394
 
323
395
 
324
396
  class NetworkConnectivityChecker(HealthChecker):
325
397
  """Health checker for network connectivity.
326
-
398
+
327
399
  Monitors:
328
400
  - Port availability and binding status
329
401
  - Socket connection health
330
402
  - Network interface status
331
403
  """
332
-
404
+
333
405
  def __init__(self, host: str, port: int, timeout: float = 1.0):
334
406
  """Initialize network connectivity checker.
335
-
407
+
336
408
  Args:
337
409
  host: Host address to check
338
410
  port: Port number to check
@@ -342,14 +414,14 @@ class NetworkConnectivityChecker(HealthChecker):
342
414
  self.port = port
343
415
  self.timeout = timeout
344
416
  self.logger = logging.getLogger(f"{__name__}.NetworkConnectivityChecker")
345
-
417
+
346
418
  def get_name(self) -> str:
347
419
  return f"network_connectivity_{self.host}_{self.port}"
348
-
420
+
349
421
  async def check_health(self) -> List[HealthMetric]:
350
422
  """Check network connectivity."""
351
423
  metrics = []
352
-
424
+
353
425
  # Check port binding
354
426
  try:
355
427
  # Try to connect to the port
@@ -357,64 +429,78 @@ class NetworkConnectivityChecker(HealthChecker):
357
429
  sock.settimeout(self.timeout)
358
430
  result = sock.connect_ex((self.host, self.port))
359
431
  sock.close()
360
-
432
+
361
433
  if result == 0:
362
- metrics.append(HealthMetric(
363
- name="port_accessible",
364
- value=True,
365
- status=HealthStatus.HEALTHY,
366
- message=f"Port {self.port} is accessible on {self.host}"
367
- ))
434
+ metrics.append(
435
+ HealthMetric(
436
+ name="port_accessible",
437
+ value=True,
438
+ status=HealthStatus.HEALTHY,
439
+ message=f"Port {self.port} is accessible on {self.host}",
440
+ )
441
+ )
368
442
  else:
369
- metrics.append(HealthMetric(
443
+ metrics.append(
444
+ HealthMetric(
445
+ name="port_accessible",
446
+ value=False,
447
+ status=HealthStatus.CRITICAL,
448
+ message=f"Port {self.port} is not accessible on {self.host}",
449
+ )
450
+ )
451
+ except Exception as e:
452
+ metrics.append(
453
+ HealthMetric(
370
454
  name="port_accessible",
371
455
  value=False,
372
- status=HealthStatus.CRITICAL,
373
- message=f"Port {self.port} is not accessible on {self.host}"
374
- ))
375
- except Exception as e:
376
- metrics.append(HealthMetric(
377
- name="port_accessible",
378
- value=False,
379
- status=HealthStatus.UNKNOWN,
380
- message=f"Error checking port accessibility: {e}"
381
- ))
382
-
456
+ status=HealthStatus.UNKNOWN,
457
+ message=f"Error checking port accessibility: {e}",
458
+ )
459
+ )
460
+
383
461
  # Check if we can create a socket (resource availability)
384
462
  try:
385
463
  test_sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
386
464
  test_sock.close()
387
- metrics.append(HealthMetric(
388
- name="socket_creation",
389
- value=True,
390
- status=HealthStatus.HEALTHY,
391
- message="Socket creation successful"
392
- ))
465
+ metrics.append(
466
+ HealthMetric(
467
+ name="socket_creation",
468
+ value=True,
469
+ status=HealthStatus.HEALTHY,
470
+ message="Socket creation successful",
471
+ )
472
+ )
393
473
  except Exception as e:
394
- metrics.append(HealthMetric(
395
- name="socket_creation",
396
- value=False,
397
- status=HealthStatus.CRITICAL,
398
- message=f"Failed to create socket: {e}"
399
- ))
400
-
474
+ metrics.append(
475
+ HealthMetric(
476
+ name="socket_creation",
477
+ value=False,
478
+ status=HealthStatus.CRITICAL,
479
+ message=f"Failed to create socket: {e}",
480
+ )
481
+ )
482
+
401
483
  return metrics
402
484
 
403
485
 
404
486
  class ServiceHealthChecker(HealthChecker):
405
487
  """Health checker for service-specific metrics.
406
-
488
+
407
489
  Monitors:
408
490
  - Connected clients count
409
491
  - Event processing rate
410
492
  - Error rates
411
493
  - Response times
412
494
  """
413
-
414
- def __init__(self, service_stats: Dict[str, Any],
415
- max_clients: int = 1000, max_error_rate: float = 0.1):
495
+
496
+ def __init__(
497
+ self,
498
+ service_stats: Dict[str, Any],
499
+ max_clients: int = 1000,
500
+ max_error_rate: float = 0.1,
501
+ ):
416
502
  """Initialize service health checker.
417
-
503
+
418
504
  Args:
419
505
  service_stats: Reference to service statistics dictionary
420
506
  max_clients: Maximum allowed connected clients
@@ -426,15 +512,15 @@ class ServiceHealthChecker(HealthChecker):
426
512
  self.last_check_time = time.time()
427
513
  self.last_events_processed = 0
428
514
  self.logger = logging.getLogger(f"{__name__}.ServiceHealthChecker")
429
-
515
+
430
516
  def get_name(self) -> str:
431
517
  return "service_health"
432
-
518
+
433
519
  async def check_health(self) -> List[HealthMetric]:
434
520
  """Check service-specific health metrics."""
435
521
  metrics = []
436
522
  current_time = time.time()
437
-
523
+
438
524
  # Connected clients
439
525
  try:
440
526
  client_count = self.service_stats.get("clients_connected", 0)
@@ -443,84 +529,104 @@ class ServiceHealthChecker(HealthChecker):
443
529
  client_status = HealthStatus.WARNING
444
530
  if client_count > self.max_clients:
445
531
  client_status = HealthStatus.CRITICAL
446
-
447
- metrics.append(HealthMetric(
448
- name="connected_clients",
449
- value=client_count,
450
- status=client_status,
451
- threshold=self.max_clients
452
- ))
532
+
533
+ metrics.append(
534
+ HealthMetric(
535
+ name="connected_clients",
536
+ value=client_count,
537
+ status=client_status,
538
+ threshold=self.max_clients,
539
+ )
540
+ )
453
541
  except Exception as e:
454
- metrics.append(HealthMetric(
455
- name="connected_clients",
456
- value=-1,
457
- status=HealthStatus.UNKNOWN,
458
- message=f"Failed to get client count: {e}"
459
- ))
460
-
542
+ metrics.append(
543
+ HealthMetric(
544
+ name="connected_clients",
545
+ value=-1,
546
+ status=HealthStatus.UNKNOWN,
547
+ message=f"Failed to get client count: {e}",
548
+ )
549
+ )
550
+
461
551
  # Event processing rate
462
552
  try:
463
553
  events_processed = self.service_stats.get("events_processed", 0)
464
554
  time_diff = current_time - self.last_check_time
465
-
555
+
466
556
  if time_diff > 0 and self.last_events_processed > 0:
467
557
  event_rate = (events_processed - self.last_events_processed) / time_diff
468
- metrics.append(HealthMetric(
469
- name="event_processing_rate",
470
- value=round(event_rate, 2),
471
- status=HealthStatus.HEALTHY,
472
- unit="events/sec"
473
- ))
474
-
558
+ metrics.append(
559
+ HealthMetric(
560
+ name="event_processing_rate",
561
+ value=round(event_rate, 2),
562
+ status=HealthStatus.HEALTHY,
563
+ unit="events/sec",
564
+ )
565
+ )
566
+
475
567
  self.last_events_processed = events_processed
476
-
568
+
477
569
  # Total events processed
478
- metrics.append(HealthMetric(
479
- name="total_events_processed",
480
- value=events_processed,
481
- status=HealthStatus.HEALTHY
482
- ))
570
+ metrics.append(
571
+ HealthMetric(
572
+ name="total_events_processed",
573
+ value=events_processed,
574
+ status=HealthStatus.HEALTHY,
575
+ )
576
+ )
483
577
  except Exception as e:
484
- metrics.append(HealthMetric(
485
- name="event_processing_rate",
486
- value=-1,
487
- status=HealthStatus.UNKNOWN,
488
- message=f"Failed to calculate event rate: {e}"
489
- ))
490
-
578
+ metrics.append(
579
+ HealthMetric(
580
+ name="event_processing_rate",
581
+ value=-1,
582
+ status=HealthStatus.UNKNOWN,
583
+ message=f"Failed to calculate event rate: {e}",
584
+ )
585
+ )
586
+
491
587
  # Error rate
492
588
  try:
493
589
  errors = self.service_stats.get("errors", 0)
494
- total_events = self.service_stats.get("events_processed", 1) # Avoid division by zero
590
+ total_events = self.service_stats.get(
591
+ "events_processed", 1
592
+ ) # Avoid division by zero
495
593
  error_rate = errors / max(total_events, 1)
496
-
594
+
497
595
  error_status = HealthStatus.HEALTHY
498
596
  if error_rate > self.max_error_rate * 0.5:
499
597
  error_status = HealthStatus.WARNING
500
598
  if error_rate > self.max_error_rate:
501
599
  error_status = HealthStatus.CRITICAL
502
-
503
- metrics.append(HealthMetric(
504
- name="error_rate",
505
- value=round(error_rate, 4),
506
- status=error_status,
507
- threshold=self.max_error_rate,
508
- unit="ratio"
509
- ))
510
-
511
- metrics.append(HealthMetric(
512
- name="total_errors",
513
- value=errors,
514
- status=HealthStatus.HEALTHY if errors == 0 else HealthStatus.WARNING
515
- ))
600
+
601
+ metrics.append(
602
+ HealthMetric(
603
+ name="error_rate",
604
+ value=round(error_rate, 4),
605
+ status=error_status,
606
+ threshold=self.max_error_rate,
607
+ unit="ratio",
608
+ )
609
+ )
610
+
611
+ metrics.append(
612
+ HealthMetric(
613
+ name="total_errors",
614
+ value=errors,
615
+ status=HealthStatus.HEALTHY
616
+ if errors == 0
617
+ else HealthStatus.WARNING,
618
+ )
619
+ )
516
620
  except Exception as e:
517
- metrics.append(HealthMetric(
518
- name="error_rate",
519
- value=-1,
520
- status=HealthStatus.UNKNOWN,
521
- message=f"Failed to calculate error rate: {e}"
522
- ))
523
-
621
+ metrics.append(
622
+ HealthMetric(
623
+ name="error_rate",
624
+ value=-1,
625
+ status=HealthStatus.UNKNOWN,
626
+ message=f"Failed to calculate error rate: {e}",
627
+ )
628
+ )
629
+
524
630
  # Last activity timestamp
525
631
  try:
526
632
  last_activity = self.service_stats.get("last_activity")
@@ -529,56 +635,66 @@ class ServiceHealthChecker(HealthChecker):
529
635
  if isinstance(last_activity, str):
530
636
  try:
531
637
  from dateutil.parser import parse
638
+
532
639
  last_activity_dt = parse(last_activity)
533
640
  last_activity_timestamp = last_activity_dt.timestamp()
534
641
  except ImportError:
535
642
  # Fallback: try to parse ISO format manually
536
643
  try:
537
644
  from datetime import datetime
538
- clean_timestamp = last_activity.rstrip('Z')
539
- last_activity_dt = datetime.fromisoformat(clean_timestamp.replace('T', ' '))
645
+
646
+ clean_timestamp = last_activity.rstrip("Z")
647
+ last_activity_dt = datetime.fromisoformat(
648
+ clean_timestamp.replace("T", " ")
649
+ )
540
650
  last_activity_timestamp = last_activity_dt.timestamp()
541
651
  except Exception:
542
652
  # Final fallback: treat as current time
543
653
  last_activity_timestamp = current_time
544
654
  else:
545
655
  last_activity_timestamp = float(last_activity)
546
-
656
+
547
657
  time_since_activity = current_time - last_activity_timestamp
548
658
  activity_status = HealthStatus.HEALTHY
549
659
  if time_since_activity > 300: # 5 minutes
550
660
  activity_status = HealthStatus.WARNING
551
661
  if time_since_activity > 1800: # 30 minutes
552
662
  activity_status = HealthStatus.CRITICAL
553
-
554
- metrics.append(HealthMetric(
555
- name="time_since_last_activity",
556
- value=round(time_since_activity, 2),
557
- status=activity_status,
558
- unit="seconds"
559
- ))
663
+
664
+ metrics.append(
665
+ HealthMetric(
666
+ name="time_since_last_activity",
667
+ value=round(time_since_activity, 2),
668
+ status=activity_status,
669
+ unit="seconds",
670
+ )
671
+ )
560
672
  else:
561
- metrics.append(HealthMetric(
673
+ metrics.append(
674
+ HealthMetric(
675
+ name="time_since_last_activity",
676
+ value=-1,
677
+ status=HealthStatus.WARNING,
678
+ message="No last activity recorded",
679
+ )
680
+ )
681
+ except Exception as e:
682
+ metrics.append(
683
+ HealthMetric(
562
684
  name="time_since_last_activity",
563
685
  value=-1,
564
- status=HealthStatus.WARNING,
565
- message="No last activity recorded"
566
- ))
567
- except Exception as e:
568
- metrics.append(HealthMetric(
569
- name="time_since_last_activity",
570
- value=-1,
571
- status=HealthStatus.UNKNOWN,
572
- message=f"Failed to parse last activity: {e}"
573
- ))
574
-
686
+ status=HealthStatus.UNKNOWN,
687
+ message=f"Failed to parse last activity: {e}",
688
+ )
689
+ )
690
+
575
691
  self.last_check_time = current_time
576
692
  return metrics
577
693
 
578
694
 
579
695
  class AdvancedHealthMonitor:
580
696
  """Advanced health monitoring system with configurable checks and thresholds.
581
-
697
+
582
698
  Provides comprehensive health monitoring including:
583
699
  - Multiple health checker integration
584
700
  - Configurable check intervals and thresholds
@@ -586,91 +702,99 @@ class AdvancedHealthMonitor:
586
702
  - Status aggregation and reporting
587
703
  - Integration with recovery systems
588
704
  """
589
-
705
+
590
706
  def __init__(self, config: Optional[Dict[str, Any]] = None):
591
707
  """Initialize advanced health monitor.
592
-
708
+
593
709
  Args:
594
710
  config: Configuration dictionary for health monitoring
595
711
  """
596
712
  self.config = config or {}
597
713
  self.logger = logging.getLogger(f"{__name__}.AdvancedHealthMonitor")
598
-
714
+
599
715
  # Configuration with defaults
600
- self.check_interval = self.config.get('check_interval', 30)
601
- self.history_size = self.config.get('history_size', 100)
602
- self.aggregation_window = self.config.get('aggregation_window', 300) # 5 minutes
603
-
716
+ self.check_interval = self.config.get("check_interval", 30)
717
+ self.history_size = self.config.get("history_size", 100)
718
+ self.aggregation_window = self.config.get(
719
+ "aggregation_window", 300
720
+ ) # 5 minutes
721
+
604
722
  # Health checkers
605
723
  self.checkers: List[HealthChecker] = []
606
-
724
+
607
725
  # Health history
608
726
  self.health_history: deque = deque(maxlen=self.history_size)
609
-
727
+
610
728
  # Monitoring state
611
729
  self.monitoring = False
612
730
  self.monitor_task: Optional[asyncio.Task] = None
613
731
  self.last_check_result: Optional[HealthCheckResult] = None
614
-
732
+
615
733
  # Health callbacks for recovery integration
616
734
  self.health_callbacks: List[Callable[[HealthCheckResult], None]] = []
617
-
735
+
618
736
  # Initialize metrics
619
737
  self.monitoring_stats = {
620
- 'checks_performed': 0,
621
- 'checks_failed': 0,
622
- 'average_check_duration_ms': 0,
623
- 'last_check_timestamp': None
738
+ "checks_performed": 0,
739
+ "checks_failed": 0,
740
+ "average_check_duration_ms": 0,
741
+ "last_check_timestamp": None,
624
742
  }
625
-
743
+
626
744
  self.logger.info("Advanced health monitor initialized")
627
-
745
+
628
746
  def add_checker(self, checker: HealthChecker) -> None:
629
747
  """Add a health checker to the monitoring system."""
630
748
  self.checkers.append(checker)
631
749
  self.logger.info(f"Added health checker: {checker.get_name()}")
632
-
633
- def add_health_callback(self, callback: Callable[[HealthCheckResult], None]) -> None:
750
+
751
+ def add_health_callback(
752
+ self, callback: Callable[[HealthCheckResult], None]
753
+ ) -> None:
634
754
  """Add a callback to be called when health checks complete.
635
-
755
+
636
756
  Args:
637
757
  callback: Function to call with HealthCheckResult
638
758
  """
639
759
  self.health_callbacks.append(callback)
640
760
  self.logger.debug(f"Added health callback: {callback.__name__}")
641
-
761
+
642
762
  async def perform_health_check(self) -> HealthCheckResult:
643
763
  """Perform comprehensive health check using all registered checkers."""
644
764
  start_time = time.time()
645
765
  all_metrics = []
646
766
  errors = []
647
-
767
+
648
768
  # Run all health checkers
649
769
  for checker in self.checkers:
650
770
  try:
651
771
  checker_start = time.time()
652
772
  metrics = await checker.check_health()
653
773
  checker_duration = (time.time() - checker_start) * 1000
654
-
774
+
655
775
  all_metrics.extend(metrics)
656
- self.logger.debug(f"Health checker {checker.get_name()} completed in {checker_duration:.2f}ms")
657
-
776
+ self.logger.debug(
777
+ f"Health checker {checker.get_name()} completed in {checker_duration:.2f}ms"
778
+ )
779
+
658
780
  except Exception as e:
659
781
  error_msg = f"Health checker {checker.get_name()} failed: {e}"
660
782
  errors.append(error_msg)
661
783
  self.logger.error(error_msg)
662
-
784
+
663
785
  # Add error metric
664
- all_metrics.append(HealthMetric(
665
- name=f"{checker.get_name()}_error",
666
- value=str(e),
667
- status=HealthStatus.UNKNOWN,
668
- message=error_msg
669
- ))
670
-
786
+ all_metrics.append(
787
+ HealthMetric(
788
+ name=f"{checker.get_name()}_error",
789
+ value=str(e),
790
+ status=HealthStatus.UNKNOWN,
791
+ message=error_msg,
792
+ )
793
+ )
794
+
671
795
  # Determine overall status
672
796
  overall_status = self._determine_overall_status(all_metrics)
673
-
797
+
674
798
  # Create result
675
799
  duration_ms = (time.time() - start_time) * 1000
676
800
  result = HealthCheckResult(
@@ -678,87 +802,91 @@ class AdvancedHealthMonitor:
678
802
  metrics=all_metrics,
679
803
  timestamp=start_time,
680
804
  duration_ms=duration_ms,
681
- errors=errors
805
+ errors=errors,
682
806
  )
683
-
807
+
684
808
  # Update statistics
685
- self.monitoring_stats['checks_performed'] += 1
809
+ self.monitoring_stats["checks_performed"] += 1
686
810
  if errors:
687
- self.monitoring_stats['checks_failed'] += 1
688
-
811
+ self.monitoring_stats["checks_failed"] += 1
812
+
689
813
  # Update average duration
690
- current_avg = self.monitoring_stats['average_check_duration_ms']
691
- checks_count = self.monitoring_stats['checks_performed']
692
- self.monitoring_stats['average_check_duration_ms'] = (
693
- (current_avg * (checks_count - 1) + duration_ms) / checks_count
694
- )
695
- self.monitoring_stats['last_check_timestamp'] = time.time()
696
-
814
+ current_avg = self.monitoring_stats["average_check_duration_ms"]
815
+ checks_count = self.monitoring_stats["checks_performed"]
816
+ self.monitoring_stats["average_check_duration_ms"] = (
817
+ current_avg * (checks_count - 1) + duration_ms
818
+ ) / checks_count
819
+ self.monitoring_stats["last_check_timestamp"] = time.time()
820
+
697
821
  # Store in history
698
822
  self.health_history.append(result)
699
823
  self.last_check_result = result
700
-
824
+
701
825
  # Notify callbacks
702
826
  for callback in self.health_callbacks:
703
827
  try:
704
828
  callback(result)
705
829
  except Exception as e:
706
830
  self.logger.error(f"Health callback {callback.__name__} failed: {e}")
707
-
708
- self.logger.debug(f"Health check completed: {overall_status.value} "
709
- f"({len(all_metrics)} metrics, {len(errors)} errors, "
710
- f"{duration_ms:.2f}ms)")
711
-
831
+
832
+ self.logger.debug(
833
+ f"Health check completed: {overall_status.value} "
834
+ f"({len(all_metrics)} metrics, {len(errors)} errors, "
835
+ f"{duration_ms:.2f}ms)"
836
+ )
837
+
712
838
  return result
713
-
839
+
714
840
  def _determine_overall_status(self, metrics: List[HealthMetric]) -> HealthStatus:
715
841
  """Determine overall health status from individual metrics."""
716
842
  if not metrics:
717
843
  return HealthStatus.UNKNOWN
718
-
844
+
719
845
  # Count metrics by status
720
846
  status_counts = {status: 0 for status in HealthStatus}
721
847
  for metric in metrics:
722
848
  status_counts[metric.status] += 1
723
-
849
+
724
850
  # Determine overall status based on counts
725
851
  total_metrics = len(metrics)
726
-
852
+
727
853
  # If any critical metrics, overall is critical
728
854
  if status_counts[HealthStatus.CRITICAL] > 0:
729
855
  return HealthStatus.CRITICAL
730
-
856
+
731
857
  # If more than 30% warning metrics, overall is warning
732
858
  warning_ratio = status_counts[HealthStatus.WARNING] / total_metrics
733
859
  if warning_ratio > 0.3:
734
860
  return HealthStatus.WARNING
735
-
861
+
736
862
  # If any warning metrics but less than 30%, still healthy
737
863
  if status_counts[HealthStatus.WARNING] > 0:
738
864
  return HealthStatus.HEALTHY
739
-
865
+
740
866
  # If any unknown metrics, overall is unknown
741
867
  if status_counts[HealthStatus.UNKNOWN] > 0:
742
868
  return HealthStatus.UNKNOWN
743
-
869
+
744
870
  # All metrics healthy
745
871
  return HealthStatus.HEALTHY
746
-
872
+
747
873
  def start_monitoring(self) -> None:
748
874
  """Start continuous health monitoring."""
749
875
  if self.monitoring:
750
876
  self.logger.warning("Health monitoring is already running")
751
877
  return
752
-
878
+
753
879
  self.monitoring = True
754
880
  self.monitor_task = asyncio.create_task(self._monitoring_loop())
755
- self.logger.info(f"Started health monitoring with {self.check_interval}s interval")
756
-
881
+ self.logger.info(
882
+ f"Started health monitoring with {self.check_interval}s interval"
883
+ )
884
+
757
885
  async def stop_monitoring(self) -> None:
758
886
  """Stop continuous health monitoring."""
759
887
  if not self.monitoring:
760
888
  return
761
-
889
+
762
890
  self.monitoring = False
763
891
  if self.monitor_task:
764
892
  self.monitor_task.cancel()
@@ -767,9 +895,9 @@ class AdvancedHealthMonitor:
767
895
  except asyncio.CancelledError:
768
896
  pass
769
897
  self.monitor_task = None
770
-
898
+
771
899
  self.logger.info("Stopped health monitoring")
772
-
900
+
773
901
  async def _monitoring_loop(self) -> None:
774
902
  """Continuous health monitoring loop."""
775
903
  try:
@@ -778,76 +906,79 @@ class AdvancedHealthMonitor:
778
906
  await self.perform_health_check()
779
907
  except Exception as e:
780
908
  self.logger.error(f"Error during health check: {e}")
781
-
909
+
782
910
  # Wait for next check
783
911
  await asyncio.sleep(self.check_interval)
784
912
  except asyncio.CancelledError:
785
913
  self.logger.debug("Health monitoring loop cancelled")
786
914
  except Exception as e:
787
915
  self.logger.error(f"Health monitoring loop error: {e}")
788
-
916
+
789
917
  def get_current_status(self) -> Optional[HealthCheckResult]:
790
918
  """Get the most recent health check result."""
791
919
  return self.last_check_result
792
-
793
- def get_health_history(self, limit: Optional[int] = None) -> List[HealthCheckResult]:
920
+
921
+ def get_health_history(
922
+ self, limit: Optional[int] = None
923
+ ) -> List[HealthCheckResult]:
794
924
  """Get health check history.
795
-
925
+
796
926
  Args:
797
927
  limit: Maximum number of results to return
798
-
928
+
799
929
  Returns:
800
930
  List of health check results, newest first
801
931
  """
802
932
  history = list(self.health_history)
803
933
  history.reverse() # Newest first
804
-
934
+
805
935
  if limit:
806
936
  history = history[:limit]
807
-
937
+
808
938
  return history
809
-
810
- def get_aggregated_status(self, window_seconds: Optional[int] = None) -> Dict[str, Any]:
939
+
940
+ def get_aggregated_status(
941
+ self, window_seconds: Optional[int] = None
942
+ ) -> Dict[str, Any]:
811
943
  """Get aggregated health status over a time window.
812
-
944
+
813
945
  Args:
814
946
  window_seconds: Time window for aggregation (defaults to configured window)
815
-
947
+
816
948
  Returns:
817
949
  Dictionary with aggregated health statistics
818
950
  """
819
951
  window_seconds = window_seconds or self.aggregation_window
820
952
  current_time = time.time()
821
953
  cutoff_time = current_time - window_seconds
822
-
954
+
823
955
  # Filter history to time window
824
956
  recent_results = [
825
- result for result in self.health_history
826
- if result.timestamp >= cutoff_time
957
+ result for result in self.health_history if result.timestamp >= cutoff_time
827
958
  ]
828
-
959
+
829
960
  if not recent_results:
830
961
  return {
831
- 'period': 'no_data',
832
- 'window_seconds': window_seconds,
833
- 'checks_count': 0,
834
- 'overall_status': HealthStatus.UNKNOWN.value
962
+ "period": "no_data",
963
+ "window_seconds": window_seconds,
964
+ "checks_count": 0,
965
+ "overall_status": HealthStatus.UNKNOWN.value,
835
966
  }
836
-
967
+
837
968
  # Aggregate statistics
838
969
  status_counts = {status: 0 for status in HealthStatus}
839
970
  total_metrics = 0
840
971
  total_errors = 0
841
972
  total_duration_ms = 0
842
-
973
+
843
974
  for result in recent_results:
844
975
  status_counts[result.overall_status] += 1
845
976
  total_metrics += len(result.metrics)
846
977
  total_errors += len(result.errors)
847
978
  total_duration_ms += result.duration_ms
848
-
979
+
849
980
  checks_count = len(recent_results)
850
-
981
+
851
982
  # Determine aggregated status
852
983
  if status_counts[HealthStatus.CRITICAL] > 0:
853
984
  aggregated_status = HealthStatus.CRITICAL
@@ -857,37 +988,49 @@ class AdvancedHealthMonitor:
857
988
  aggregated_status = HealthStatus.UNKNOWN
858
989
  else:
859
990
  aggregated_status = HealthStatus.HEALTHY
860
-
991
+
861
992
  return {
862
- 'period': f'last_{window_seconds}_seconds',
863
- 'window_seconds': window_seconds,
864
- 'checks_count': checks_count,
865
- 'overall_status': aggregated_status.value,
866
- 'status_distribution': {status.value: count for status, count in status_counts.items()},
867
- 'average_metrics_per_check': round(total_metrics / checks_count, 2) if checks_count > 0 else 0,
868
- 'total_errors': total_errors,
869
- 'average_duration_ms': round(total_duration_ms / checks_count, 2) if checks_count > 0 else 0,
870
- 'monitoring_stats': dict(self.monitoring_stats)
993
+ "period": f"last_{window_seconds}_seconds",
994
+ "window_seconds": window_seconds,
995
+ "checks_count": checks_count,
996
+ "overall_status": aggregated_status.value,
997
+ "status_distribution": {
998
+ status.value: count for status, count in status_counts.items()
999
+ },
1000
+ "average_metrics_per_check": (
1001
+ round(total_metrics / checks_count, 2) if checks_count > 0 else 0
1002
+ ),
1003
+ "total_errors": total_errors,
1004
+ "average_duration_ms": (
1005
+ round(total_duration_ms / checks_count, 2) if checks_count > 0 else 0
1006
+ ),
1007
+ "monitoring_stats": dict(self.monitoring_stats),
871
1008
  }
872
-
1009
+
873
1010
  def export_diagnostics(self) -> Dict[str, Any]:
874
1011
  """Export comprehensive diagnostics information."""
875
1012
  return {
876
- 'monitor_info': {
877
- 'check_interval': self.check_interval,
878
- 'history_size': self.history_size,
879
- 'aggregation_window': self.aggregation_window,
880
- 'monitoring_active': self.monitoring,
881
- 'checkers_count': len(self.checkers),
882
- 'callbacks_count': len(self.health_callbacks)
1013
+ "monitor_info": {
1014
+ "check_interval": self.check_interval,
1015
+ "history_size": self.history_size,
1016
+ "aggregation_window": self.aggregation_window,
1017
+ "monitoring_active": self.monitoring,
1018
+ "checkers_count": len(self.checkers),
1019
+ "callbacks_count": len(self.health_callbacks),
883
1020
  },
884
- 'checkers': [checker.get_name() for checker in self.checkers],
885
- 'current_status': self.last_check_result.to_dict() if self.last_check_result else None,
886
- 'aggregated_status': self.get_aggregated_status(),
887
- 'monitoring_stats': dict(self.monitoring_stats),
888
- 'history_summary': {
889
- 'total_checks': len(self.health_history),
890
- 'oldest_check': self.health_history[0].timestamp if self.health_history else None,
891
- 'newest_check': self.health_history[-1].timestamp if self.health_history else None
892
- }
893
- }
1021
+ "checkers": [checker.get_name() for checker in self.checkers],
1022
+ "current_status": self.last_check_result.to_dict()
1023
+ if self.last_check_result
1024
+ else None,
1025
+ "aggregated_status": self.get_aggregated_status(),
1026
+ "monitoring_stats": dict(self.monitoring_stats),
1027
+ "history_summary": {
1028
+ "total_checks": len(self.health_history),
1029
+ "oldest_check": self.health_history[0].timestamp
1030
+ if self.health_history
1031
+ else None,
1032
+ "newest_check": self.health_history[-1].timestamp
1033
+ if self.health_history
1034
+ else None,
1035
+ },
1036
+ }