claude-mpm 3.9.11__py3-none-any.whl → 4.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (419) hide show
  1. claude_mpm/VERSION +1 -1
  2. claude_mpm/__init__.py +2 -2
  3. claude_mpm/__main__.py +3 -2
  4. claude_mpm/agents/__init__.py +85 -79
  5. claude_mpm/agents/agent_loader.py +464 -1003
  6. claude_mpm/agents/agent_loader_integration.py +45 -45
  7. claude_mpm/agents/agents_metadata.py +29 -30
  8. claude_mpm/agents/async_agent_loader.py +156 -138
  9. claude_mpm/agents/base_agent.json +1 -1
  10. claude_mpm/agents/base_agent_loader.py +179 -151
  11. claude_mpm/agents/frontmatter_validator.py +229 -130
  12. claude_mpm/agents/schema/agent_schema.json +1 -1
  13. claude_mpm/agents/system_agent_config.py +213 -147
  14. claude_mpm/agents/templates/__init__.py +13 -13
  15. claude_mpm/agents/templates/code_analyzer.json +2 -2
  16. claude_mpm/agents/templates/data_engineer.json +1 -1
  17. claude_mpm/agents/templates/documentation.json +23 -11
  18. claude_mpm/agents/templates/engineer.json +22 -6
  19. claude_mpm/agents/templates/memory_manager.json +1 -1
  20. claude_mpm/agents/templates/ops.json +2 -2
  21. claude_mpm/agents/templates/project_organizer.json +1 -1
  22. claude_mpm/agents/templates/qa.json +1 -1
  23. claude_mpm/agents/templates/refactoring_engineer.json +222 -0
  24. claude_mpm/agents/templates/research.json +20 -14
  25. claude_mpm/agents/templates/security.json +1 -1
  26. claude_mpm/agents/templates/ticketing.json +1 -1
  27. claude_mpm/agents/templates/version_control.json +1 -1
  28. claude_mpm/agents/templates/web_qa.json +3 -1
  29. claude_mpm/agents/templates/web_ui.json +2 -2
  30. claude_mpm/cli/__init__.py +79 -51
  31. claude_mpm/cli/__main__.py +3 -2
  32. claude_mpm/cli/commands/__init__.py +20 -20
  33. claude_mpm/cli/commands/agents.py +279 -247
  34. claude_mpm/cli/commands/aggregate.py +138 -157
  35. claude_mpm/cli/commands/cleanup.py +147 -147
  36. claude_mpm/cli/commands/config.py +93 -76
  37. claude_mpm/cli/commands/info.py +17 -16
  38. claude_mpm/cli/commands/mcp.py +140 -905
  39. claude_mpm/cli/commands/mcp_command_router.py +139 -0
  40. claude_mpm/cli/commands/mcp_config_commands.py +20 -0
  41. claude_mpm/cli/commands/mcp_install_commands.py +20 -0
  42. claude_mpm/cli/commands/mcp_server_commands.py +175 -0
  43. claude_mpm/cli/commands/mcp_tool_commands.py +34 -0
  44. claude_mpm/cli/commands/memory.py +239 -203
  45. claude_mpm/cli/commands/monitor.py +203 -81
  46. claude_mpm/cli/commands/run.py +380 -429
  47. claude_mpm/cli/commands/run_config_checker.py +160 -0
  48. claude_mpm/cli/commands/socketio_monitor.py +235 -0
  49. claude_mpm/cli/commands/tickets.py +305 -197
  50. claude_mpm/cli/parser.py +24 -1156
  51. claude_mpm/cli/parsers/__init__.py +29 -0
  52. claude_mpm/cli/parsers/agents_parser.py +136 -0
  53. claude_mpm/cli/parsers/base_parser.py +331 -0
  54. claude_mpm/cli/parsers/config_parser.py +85 -0
  55. claude_mpm/cli/parsers/mcp_parser.py +152 -0
  56. claude_mpm/cli/parsers/memory_parser.py +138 -0
  57. claude_mpm/cli/parsers/monitor_parser.py +104 -0
  58. claude_mpm/cli/parsers/run_parser.py +147 -0
  59. claude_mpm/cli/parsers/tickets_parser.py +203 -0
  60. claude_mpm/cli/ticket_cli.py +7 -3
  61. claude_mpm/cli/utils.py +55 -37
  62. claude_mpm/cli_module/__init__.py +6 -6
  63. claude_mpm/cli_module/args.py +188 -140
  64. claude_mpm/cli_module/commands.py +79 -70
  65. claude_mpm/cli_module/migration_example.py +38 -60
  66. claude_mpm/config/__init__.py +32 -25
  67. claude_mpm/config/agent_config.py +151 -119
  68. claude_mpm/config/experimental_features.py +71 -73
  69. claude_mpm/config/paths.py +94 -208
  70. claude_mpm/config/socketio_config.py +84 -73
  71. claude_mpm/constants.py +35 -18
  72. claude_mpm/core/__init__.py +9 -6
  73. claude_mpm/core/agent_name_normalizer.py +68 -71
  74. claude_mpm/core/agent_registry.py +372 -521
  75. claude_mpm/core/agent_session_manager.py +74 -63
  76. claude_mpm/core/base_service.py +116 -87
  77. claude_mpm/core/cache.py +119 -153
  78. claude_mpm/core/claude_runner.py +425 -1120
  79. claude_mpm/core/config.py +263 -168
  80. claude_mpm/core/config_aliases.py +69 -61
  81. claude_mpm/core/config_constants.py +292 -0
  82. claude_mpm/core/constants.py +57 -99
  83. claude_mpm/core/container.py +211 -178
  84. claude_mpm/core/exceptions.py +233 -89
  85. claude_mpm/core/factories.py +92 -54
  86. claude_mpm/core/framework_loader.py +378 -220
  87. claude_mpm/core/hook_manager.py +198 -83
  88. claude_mpm/core/hook_performance_config.py +136 -0
  89. claude_mpm/core/injectable_service.py +61 -55
  90. claude_mpm/core/interactive_session.py +165 -155
  91. claude_mpm/core/interfaces.py +221 -195
  92. claude_mpm/core/lazy.py +96 -96
  93. claude_mpm/core/logger.py +133 -107
  94. claude_mpm/core/logging_config.py +185 -157
  95. claude_mpm/core/minimal_framework_loader.py +20 -15
  96. claude_mpm/core/mixins.py +30 -29
  97. claude_mpm/core/oneshot_session.py +215 -181
  98. claude_mpm/core/optimized_agent_loader.py +134 -138
  99. claude_mpm/core/optimized_startup.py +159 -157
  100. claude_mpm/core/pm_hook_interceptor.py +85 -72
  101. claude_mpm/core/service_registry.py +103 -101
  102. claude_mpm/core/session_manager.py +97 -87
  103. claude_mpm/core/socketio_pool.py +212 -158
  104. claude_mpm/core/tool_access_control.py +58 -51
  105. claude_mpm/core/types.py +46 -24
  106. claude_mpm/core/typing_utils.py +166 -82
  107. claude_mpm/core/unified_agent_registry.py +721 -0
  108. claude_mpm/core/unified_config.py +550 -0
  109. claude_mpm/core/unified_paths.py +549 -0
  110. claude_mpm/dashboard/index.html +1 -1
  111. claude_mpm/dashboard/open_dashboard.py +51 -17
  112. claude_mpm/dashboard/static/css/dashboard.css +27 -8
  113. claude_mpm/dashboard/static/dist/components/agent-inference.js +2 -0
  114. claude_mpm/dashboard/static/dist/components/event-processor.js +2 -0
  115. claude_mpm/dashboard/static/dist/components/event-viewer.js +2 -0
  116. claude_mpm/dashboard/static/dist/components/export-manager.js +2 -0
  117. claude_mpm/dashboard/static/dist/components/file-tool-tracker.js +2 -0
  118. claude_mpm/dashboard/static/dist/components/hud-library-loader.js +2 -0
  119. claude_mpm/dashboard/static/dist/components/hud-manager.js +2 -0
  120. claude_mpm/dashboard/static/dist/components/hud-visualizer.js +2 -0
  121. claude_mpm/dashboard/static/dist/components/module-viewer.js +2 -0
  122. claude_mpm/dashboard/static/dist/components/session-manager.js +2 -0
  123. claude_mpm/dashboard/static/dist/components/socket-manager.js +2 -0
  124. claude_mpm/dashboard/static/dist/components/ui-state-manager.js +2 -0
  125. claude_mpm/dashboard/static/dist/components/working-directory.js +2 -0
  126. claude_mpm/dashboard/static/dist/dashboard.js +2 -0
  127. claude_mpm/dashboard/static/dist/socket-client.js +2 -0
  128. claude_mpm/dashboard/static/js/components/agent-inference.js +80 -76
  129. claude_mpm/dashboard/static/js/components/event-processor.js +71 -67
  130. claude_mpm/dashboard/static/js/components/event-viewer.js +74 -70
  131. claude_mpm/dashboard/static/js/components/export-manager.js +31 -28
  132. claude_mpm/dashboard/static/js/components/file-tool-tracker.js +106 -92
  133. claude_mpm/dashboard/static/js/components/hud-library-loader.js +11 -11
  134. claude_mpm/dashboard/static/js/components/hud-manager.js +73 -73
  135. claude_mpm/dashboard/static/js/components/hud-visualizer.js +163 -163
  136. claude_mpm/dashboard/static/js/components/module-viewer.js +305 -233
  137. claude_mpm/dashboard/static/js/components/session-manager.js +32 -29
  138. claude_mpm/dashboard/static/js/components/socket-manager.js +27 -20
  139. claude_mpm/dashboard/static/js/components/ui-state-manager.js +21 -18
  140. claude_mpm/dashboard/static/js/components/working-directory.js +74 -71
  141. claude_mpm/dashboard/static/js/dashboard.js +178 -453
  142. claude_mpm/dashboard/static/js/extension-error-handler.js +164 -0
  143. claude_mpm/dashboard/static/js/socket-client.js +120 -54
  144. claude_mpm/dashboard/templates/index.html +40 -50
  145. claude_mpm/experimental/cli_enhancements.py +60 -58
  146. claude_mpm/generators/__init__.py +1 -1
  147. claude_mpm/generators/agent_profile_generator.py +75 -65
  148. claude_mpm/hooks/__init__.py +1 -1
  149. claude_mpm/hooks/base_hook.py +33 -28
  150. claude_mpm/hooks/claude_hooks/__init__.py +1 -1
  151. claude_mpm/hooks/claude_hooks/connection_pool.py +120 -0
  152. claude_mpm/hooks/claude_hooks/event_handlers.py +743 -0
  153. claude_mpm/hooks/claude_hooks/hook_handler.py +415 -1331
  154. claude_mpm/hooks/claude_hooks/hook_wrapper.sh +4 -4
  155. claude_mpm/hooks/claude_hooks/memory_integration.py +221 -0
  156. claude_mpm/hooks/claude_hooks/response_tracking.py +348 -0
  157. claude_mpm/hooks/claude_hooks/tool_analysis.py +230 -0
  158. claude_mpm/hooks/memory_integration_hook.py +140 -100
  159. claude_mpm/hooks/tool_call_interceptor.py +89 -76
  160. claude_mpm/hooks/validation_hooks.py +57 -49
  161. claude_mpm/init.py +145 -121
  162. claude_mpm/models/__init__.py +9 -9
  163. claude_mpm/models/agent_definition.py +33 -23
  164. claude_mpm/models/agent_session.py +228 -200
  165. claude_mpm/scripts/__init__.py +1 -1
  166. claude_mpm/scripts/socketio_daemon.py +192 -75
  167. claude_mpm/scripts/socketio_server_manager.py +328 -0
  168. claude_mpm/scripts/start_activity_logging.py +25 -22
  169. claude_mpm/services/__init__.py +68 -43
  170. claude_mpm/services/agent_capabilities_service.py +271 -0
  171. claude_mpm/services/agents/__init__.py +23 -32
  172. claude_mpm/services/agents/deployment/__init__.py +3 -3
  173. claude_mpm/services/agents/deployment/agent_config_provider.py +310 -0
  174. claude_mpm/services/agents/deployment/agent_configuration_manager.py +359 -0
  175. claude_mpm/services/agents/deployment/agent_definition_factory.py +84 -0
  176. claude_mpm/services/agents/deployment/agent_deployment.py +415 -2113
  177. claude_mpm/services/agents/deployment/agent_discovery_service.py +387 -0
  178. claude_mpm/services/agents/deployment/agent_environment_manager.py +293 -0
  179. claude_mpm/services/agents/deployment/agent_filesystem_manager.py +387 -0
  180. claude_mpm/services/agents/deployment/agent_format_converter.py +453 -0
  181. claude_mpm/services/agents/deployment/agent_frontmatter_validator.py +161 -0
  182. claude_mpm/services/agents/deployment/agent_lifecycle_manager.py +345 -495
  183. claude_mpm/services/agents/deployment/agent_metrics_collector.py +279 -0
  184. claude_mpm/services/agents/deployment/agent_restore_handler.py +88 -0
  185. claude_mpm/services/agents/deployment/agent_template_builder.py +406 -0
  186. claude_mpm/services/agents/deployment/agent_validator.py +352 -0
  187. claude_mpm/services/agents/deployment/agent_version_manager.py +313 -0
  188. claude_mpm/services/agents/deployment/agent_versioning.py +6 -9
  189. claude_mpm/services/agents/deployment/agents_directory_resolver.py +79 -0
  190. claude_mpm/services/agents/deployment/async_agent_deployment.py +298 -234
  191. claude_mpm/services/agents/deployment/config/__init__.py +13 -0
  192. claude_mpm/services/agents/deployment/config/deployment_config.py +182 -0
  193. claude_mpm/services/agents/deployment/config/deployment_config_manager.py +200 -0
  194. claude_mpm/services/agents/deployment/deployment_config_loader.py +54 -0
  195. claude_mpm/services/agents/deployment/deployment_type_detector.py +124 -0
  196. claude_mpm/services/agents/deployment/facade/__init__.py +18 -0
  197. claude_mpm/services/agents/deployment/facade/async_deployment_executor.py +159 -0
  198. claude_mpm/services/agents/deployment/facade/deployment_executor.py +73 -0
  199. claude_mpm/services/agents/deployment/facade/deployment_facade.py +270 -0
  200. claude_mpm/services/agents/deployment/facade/sync_deployment_executor.py +178 -0
  201. claude_mpm/services/agents/deployment/interface_adapter.py +227 -0
  202. claude_mpm/services/agents/deployment/lifecycle_health_checker.py +85 -0
  203. claude_mpm/services/agents/deployment/lifecycle_performance_tracker.py +100 -0
  204. claude_mpm/services/agents/deployment/pipeline/__init__.py +32 -0
  205. claude_mpm/services/agents/deployment/pipeline/pipeline_builder.py +158 -0
  206. claude_mpm/services/agents/deployment/pipeline/pipeline_context.py +159 -0
  207. claude_mpm/services/agents/deployment/pipeline/pipeline_executor.py +169 -0
  208. claude_mpm/services/agents/deployment/pipeline/steps/__init__.py +19 -0
  209. claude_mpm/services/agents/deployment/pipeline/steps/agent_processing_step.py +195 -0
  210. claude_mpm/services/agents/deployment/pipeline/steps/base_step.py +119 -0
  211. claude_mpm/services/agents/deployment/pipeline/steps/configuration_step.py +79 -0
  212. claude_mpm/services/agents/deployment/pipeline/steps/target_directory_step.py +90 -0
  213. claude_mpm/services/agents/deployment/pipeline/steps/validation_step.py +100 -0
  214. claude_mpm/services/agents/deployment/processors/__init__.py +15 -0
  215. claude_mpm/services/agents/deployment/processors/agent_deployment_context.py +98 -0
  216. claude_mpm/services/agents/deployment/processors/agent_deployment_result.py +235 -0
  217. claude_mpm/services/agents/deployment/processors/agent_processor.py +258 -0
  218. claude_mpm/services/agents/deployment/refactored_agent_deployment_service.py +318 -0
  219. claude_mpm/services/agents/deployment/results/__init__.py +13 -0
  220. claude_mpm/services/agents/deployment/results/deployment_metrics.py +200 -0
  221. claude_mpm/services/agents/deployment/results/deployment_result_builder.py +249 -0
  222. claude_mpm/services/agents/deployment/strategies/__init__.py +25 -0
  223. claude_mpm/services/agents/deployment/strategies/base_strategy.py +119 -0
  224. claude_mpm/services/agents/deployment/strategies/project_strategy.py +150 -0
  225. claude_mpm/services/agents/deployment/strategies/strategy_selector.py +117 -0
  226. claude_mpm/services/agents/deployment/strategies/system_strategy.py +116 -0
  227. claude_mpm/services/agents/deployment/strategies/user_strategy.py +137 -0
  228. claude_mpm/services/agents/deployment/system_instructions_deployer.py +108 -0
  229. claude_mpm/services/agents/deployment/validation/__init__.py +19 -0
  230. claude_mpm/services/agents/deployment/validation/agent_validator.py +323 -0
  231. claude_mpm/services/agents/deployment/validation/deployment_validator.py +238 -0
  232. claude_mpm/services/agents/deployment/validation/template_validator.py +299 -0
  233. claude_mpm/services/agents/deployment/validation/validation_result.py +226 -0
  234. claude_mpm/services/agents/loading/__init__.py +2 -2
  235. claude_mpm/services/agents/loading/agent_profile_loader.py +259 -229
  236. claude_mpm/services/agents/loading/base_agent_manager.py +90 -81
  237. claude_mpm/services/agents/loading/framework_agent_loader.py +154 -129
  238. claude_mpm/services/agents/management/__init__.py +2 -2
  239. claude_mpm/services/agents/management/agent_capabilities_generator.py +72 -58
  240. claude_mpm/services/agents/management/agent_management_service.py +209 -156
  241. claude_mpm/services/agents/memory/__init__.py +9 -6
  242. claude_mpm/services/agents/memory/agent_memory_manager.py +218 -1152
  243. claude_mpm/services/agents/memory/agent_persistence_service.py +20 -16
  244. claude_mpm/services/agents/memory/analyzer.py +430 -0
  245. claude_mpm/services/agents/memory/content_manager.py +376 -0
  246. claude_mpm/services/agents/memory/template_generator.py +468 -0
  247. claude_mpm/services/agents/registry/__init__.py +7 -10
  248. claude_mpm/services/agents/registry/deployed_agent_discovery.py +122 -97
  249. claude_mpm/services/agents/registry/modification_tracker.py +351 -285
  250. claude_mpm/services/async_session_logger.py +187 -153
  251. claude_mpm/services/claude_session_logger.py +87 -72
  252. claude_mpm/services/command_handler_service.py +217 -0
  253. claude_mpm/services/communication/__init__.py +3 -2
  254. claude_mpm/services/core/__init__.py +50 -97
  255. claude_mpm/services/core/base.py +60 -53
  256. claude_mpm/services/core/interfaces/__init__.py +188 -0
  257. claude_mpm/services/core/interfaces/agent.py +351 -0
  258. claude_mpm/services/core/interfaces/communication.py +343 -0
  259. claude_mpm/services/core/interfaces/infrastructure.py +413 -0
  260. claude_mpm/services/core/interfaces/service.py +434 -0
  261. claude_mpm/services/core/interfaces.py +19 -944
  262. claude_mpm/services/event_aggregator.py +208 -170
  263. claude_mpm/services/exceptions.py +387 -308
  264. claude_mpm/services/framework_claude_md_generator/__init__.py +75 -79
  265. claude_mpm/services/framework_claude_md_generator/content_assembler.py +69 -60
  266. claude_mpm/services/framework_claude_md_generator/content_validator.py +65 -61
  267. claude_mpm/services/framework_claude_md_generator/deployment_manager.py +68 -49
  268. claude_mpm/services/framework_claude_md_generator/section_generators/__init__.py +34 -34
  269. claude_mpm/services/framework_claude_md_generator/section_generators/agents.py +25 -22
  270. claude_mpm/services/framework_claude_md_generator/section_generators/claude_pm_init.py +10 -10
  271. claude_mpm/services/framework_claude_md_generator/section_generators/core_responsibilities.py +4 -3
  272. claude_mpm/services/framework_claude_md_generator/section_generators/delegation_constraints.py +4 -3
  273. claude_mpm/services/framework_claude_md_generator/section_generators/environment_config.py +4 -3
  274. claude_mpm/services/framework_claude_md_generator/section_generators/footer.py +6 -5
  275. claude_mpm/services/framework_claude_md_generator/section_generators/header.py +8 -7
  276. claude_mpm/services/framework_claude_md_generator/section_generators/orchestration_principles.py +4 -3
  277. claude_mpm/services/framework_claude_md_generator/section_generators/role_designation.py +6 -5
  278. claude_mpm/services/framework_claude_md_generator/section_generators/subprocess_validation.py +9 -8
  279. claude_mpm/services/framework_claude_md_generator/section_generators/todo_task_tools.py +4 -3
  280. claude_mpm/services/framework_claude_md_generator/section_generators/troubleshooting.py +5 -4
  281. claude_mpm/services/framework_claude_md_generator/section_manager.py +28 -27
  282. claude_mpm/services/framework_claude_md_generator/version_manager.py +30 -28
  283. claude_mpm/services/hook_service.py +106 -114
  284. claude_mpm/services/infrastructure/__init__.py +7 -5
  285. claude_mpm/services/infrastructure/context_preservation.py +233 -199
  286. claude_mpm/services/infrastructure/daemon_manager.py +279 -0
  287. claude_mpm/services/infrastructure/logging.py +83 -76
  288. claude_mpm/services/infrastructure/monitoring.py +547 -404
  289. claude_mpm/services/mcp_gateway/__init__.py +30 -13
  290. claude_mpm/services/mcp_gateway/config/__init__.py +2 -2
  291. claude_mpm/services/mcp_gateway/config/config_loader.py +61 -56
  292. claude_mpm/services/mcp_gateway/config/config_schema.py +50 -41
  293. claude_mpm/services/mcp_gateway/config/configuration.py +82 -75
  294. claude_mpm/services/mcp_gateway/core/__init__.py +13 -20
  295. claude_mpm/services/mcp_gateway/core/base.py +80 -67
  296. claude_mpm/services/mcp_gateway/core/exceptions.py +60 -46
  297. claude_mpm/services/mcp_gateway/core/interfaces.py +87 -84
  298. claude_mpm/services/mcp_gateway/main.py +287 -137
  299. claude_mpm/services/mcp_gateway/registry/__init__.py +1 -1
  300. claude_mpm/services/mcp_gateway/registry/service_registry.py +97 -94
  301. claude_mpm/services/mcp_gateway/registry/tool_registry.py +135 -126
  302. claude_mpm/services/mcp_gateway/server/__init__.py +2 -2
  303. claude_mpm/services/mcp_gateway/server/mcp_gateway.py +105 -110
  304. claude_mpm/services/mcp_gateway/server/stdio_handler.py +105 -107
  305. claude_mpm/services/mcp_gateway/server/stdio_server.py +691 -0
  306. claude_mpm/services/mcp_gateway/tools/__init__.py +4 -2
  307. claude_mpm/services/mcp_gateway/tools/base_adapter.py +109 -119
  308. claude_mpm/services/mcp_gateway/tools/document_summarizer.py +283 -215
  309. claude_mpm/services/mcp_gateway/tools/hello_world.py +122 -120
  310. claude_mpm/services/mcp_gateway/tools/ticket_tools.py +652 -0
  311. claude_mpm/services/mcp_gateway/tools/unified_ticket_tool.py +606 -0
  312. claude_mpm/services/memory/__init__.py +2 -2
  313. claude_mpm/services/memory/builder.py +451 -362
  314. claude_mpm/services/memory/cache/__init__.py +2 -2
  315. claude_mpm/services/memory/cache/shared_prompt_cache.py +232 -194
  316. claude_mpm/services/memory/cache/simple_cache.py +107 -93
  317. claude_mpm/services/memory/indexed_memory.py +195 -193
  318. claude_mpm/services/memory/optimizer.py +267 -234
  319. claude_mpm/services/memory/router.py +571 -263
  320. claude_mpm/services/memory_hook_service.py +237 -0
  321. claude_mpm/services/port_manager.py +223 -0
  322. claude_mpm/services/project/__init__.py +3 -3
  323. claude_mpm/services/project/analyzer.py +451 -305
  324. claude_mpm/services/project/registry.py +262 -240
  325. claude_mpm/services/recovery_manager.py +287 -231
  326. claude_mpm/services/response_tracker.py +87 -67
  327. claude_mpm/services/runner_configuration_service.py +587 -0
  328. claude_mpm/services/session_management_service.py +304 -0
  329. claude_mpm/services/socketio/__init__.py +4 -4
  330. claude_mpm/services/socketio/client_proxy.py +174 -0
  331. claude_mpm/services/socketio/handlers/__init__.py +3 -3
  332. claude_mpm/services/socketio/handlers/base.py +44 -30
  333. claude_mpm/services/socketio/handlers/connection.py +145 -65
  334. claude_mpm/services/socketio/handlers/file.py +123 -108
  335. claude_mpm/services/socketio/handlers/git.py +607 -373
  336. claude_mpm/services/socketio/handlers/hook.py +170 -0
  337. claude_mpm/services/socketio/handlers/memory.py +4 -4
  338. claude_mpm/services/socketio/handlers/project.py +4 -4
  339. claude_mpm/services/socketio/handlers/registry.py +53 -38
  340. claude_mpm/services/socketio/server/__init__.py +18 -0
  341. claude_mpm/services/socketio/server/broadcaster.py +252 -0
  342. claude_mpm/services/socketio/server/core.py +399 -0
  343. claude_mpm/services/socketio/server/main.py +323 -0
  344. claude_mpm/services/socketio_client_manager.py +160 -133
  345. claude_mpm/services/socketio_server.py +36 -1885
  346. claude_mpm/services/subprocess_launcher_service.py +316 -0
  347. claude_mpm/services/system_instructions_service.py +258 -0
  348. claude_mpm/services/ticket_manager.py +19 -533
  349. claude_mpm/services/utility_service.py +285 -0
  350. claude_mpm/services/version_control/__init__.py +18 -21
  351. claude_mpm/services/version_control/branch_strategy.py +20 -10
  352. claude_mpm/services/version_control/conflict_resolution.py +37 -13
  353. claude_mpm/services/version_control/git_operations.py +52 -21
  354. claude_mpm/services/version_control/semantic_versioning.py +92 -53
  355. claude_mpm/services/version_control/version_parser.py +145 -125
  356. claude_mpm/services/version_service.py +270 -0
  357. claude_mpm/storage/__init__.py +2 -2
  358. claude_mpm/storage/state_storage.py +177 -181
  359. claude_mpm/ticket_wrapper.py +2 -2
  360. claude_mpm/utils/__init__.py +2 -2
  361. claude_mpm/utils/agent_dependency_loader.py +453 -243
  362. claude_mpm/utils/config_manager.py +157 -118
  363. claude_mpm/utils/console.py +1 -1
  364. claude_mpm/utils/dependency_cache.py +102 -107
  365. claude_mpm/utils/dependency_manager.py +52 -47
  366. claude_mpm/utils/dependency_strategies.py +131 -96
  367. claude_mpm/utils/environment_context.py +110 -102
  368. claude_mpm/utils/error_handler.py +75 -55
  369. claude_mpm/utils/file_utils.py +80 -67
  370. claude_mpm/utils/framework_detection.py +12 -11
  371. claude_mpm/utils/import_migration_example.py +12 -60
  372. claude_mpm/utils/imports.py +48 -45
  373. claude_mpm/utils/path_operations.py +100 -93
  374. claude_mpm/utils/robust_installer.py +172 -164
  375. claude_mpm/utils/session_logging.py +30 -23
  376. claude_mpm/utils/subprocess_utils.py +99 -61
  377. claude_mpm/validation/__init__.py +1 -1
  378. claude_mpm/validation/agent_validator.py +151 -111
  379. claude_mpm/validation/frontmatter_validator.py +92 -71
  380. {claude_mpm-3.9.11.dist-info → claude_mpm-4.0.3.dist-info}/METADATA +27 -1
  381. claude_mpm-4.0.3.dist-info/RECORD +402 -0
  382. {claude_mpm-3.9.11.dist-info → claude_mpm-4.0.3.dist-info}/entry_points.txt +1 -0
  383. {claude_mpm-3.9.11.dist-info → claude_mpm-4.0.3.dist-info}/licenses/LICENSE +1 -1
  384. claude_mpm/cli/commands/run_guarded.py +0 -511
  385. claude_mpm/config/memory_guardian_config.py +0 -325
  386. claude_mpm/config/memory_guardian_yaml.py +0 -335
  387. claude_mpm/core/config_paths.py +0 -150
  388. claude_mpm/core/memory_aware_runner.py +0 -353
  389. claude_mpm/dashboard/static/js/dashboard-original.js +0 -4134
  390. claude_mpm/deployment_paths.py +0 -261
  391. claude_mpm/hooks/claude_hooks/hook_handler_fixed.py +0 -454
  392. claude_mpm/models/state_models.py +0 -433
  393. claude_mpm/services/agent/__init__.py +0 -24
  394. claude_mpm/services/agent/deployment.py +0 -2548
  395. claude_mpm/services/agent/management.py +0 -598
  396. claude_mpm/services/agent/registry.py +0 -813
  397. claude_mpm/services/agents/registry/agent_registry.py +0 -813
  398. claude_mpm/services/communication/socketio.py +0 -1935
  399. claude_mpm/services/communication/websocket.py +0 -479
  400. claude_mpm/services/framework_claude_md_generator.py +0 -624
  401. claude_mpm/services/health_monitor.py +0 -893
  402. claude_mpm/services/infrastructure/graceful_degradation.py +0 -616
  403. claude_mpm/services/infrastructure/health_monitor.py +0 -775
  404. claude_mpm/services/infrastructure/memory_dashboard.py +0 -479
  405. claude_mpm/services/infrastructure/memory_guardian.py +0 -944
  406. claude_mpm/services/infrastructure/restart_protection.py +0 -642
  407. claude_mpm/services/infrastructure/state_manager.py +0 -774
  408. claude_mpm/services/mcp_gateway/manager.py +0 -334
  409. claude_mpm/services/optimized_hook_service.py +0 -542
  410. claude_mpm/services/project_analyzer.py +0 -864
  411. claude_mpm/services/project_registry.py +0 -608
  412. claude_mpm/services/standalone_socketio_server.py +0 -1300
  413. claude_mpm/services/ticket_manager_di.py +0 -318
  414. claude_mpm/services/ticketing_service_original.py +0 -510
  415. claude_mpm/utils/paths.py +0 -395
  416. claude_mpm/utils/platform_memory.py +0 -524
  417. claude_mpm-3.9.11.dist-info/RECORD +0 -306
  418. {claude_mpm-3.9.11.dist-info → claude_mpm-4.0.3.dist-info}/WHEEL +0 -0
  419. {claude_mpm-3.9.11.dist-info → claude_mpm-4.0.3.dist-info}/top_level.txt +0 -0
@@ -1,944 +0,0 @@
1
- """Memory Guardian service for monitoring and managing Claude Code memory usage.
2
-
3
- This service monitors a subprocess (Claude Code) for memory consumption and
4
- performs automatic restarts when memory thresholds are exceeded.
5
-
6
- Design Principles:
7
- - Subprocess lifecycle management with graceful shutdown
8
- - Multi-threshold memory monitoring (warning, critical, emergency)
9
- - Platform-agnostic memory monitoring with fallbacks
10
- - Configurable restart policies with cooldown periods
11
- - State preservation hooks for future enhancement
12
- """
13
-
14
- import asyncio
15
- import json
16
- import logging
17
- import os
18
- import platform
19
- import signal
20
- import subprocess
21
- import sys
22
- import time
23
- from dataclasses import dataclass, field
24
- from datetime import datetime, timedelta
25
- from enum import Enum
26
- from pathlib import Path
27
- from typing import Optional, Dict, Any, List, Callable, Tuple
28
-
29
- from claude_mpm.services.core.base import BaseService
30
- from claude_mpm.config.memory_guardian_config import (
31
- MemoryGuardianConfig,
32
- get_default_config
33
- )
34
- from claude_mpm.utils.platform_memory import (
35
- get_process_memory,
36
- get_system_memory,
37
- check_memory_pressure,
38
- MemoryInfo
39
- )
40
- from claude_mpm.services.infrastructure.state_manager import StateManager
41
- from claude_mpm.services.infrastructure.restart_protection import RestartProtection
42
- from claude_mpm.services.infrastructure.health_monitor import HealthMonitor
43
- from claude_mpm.services.infrastructure.graceful_degradation import GracefulDegradation
44
-
45
-
46
- class MemoryState(Enum):
47
- """Memory usage state levels."""
48
- NORMAL = "normal"
49
- WARNING = "warning"
50
- CRITICAL = "critical"
51
- EMERGENCY = "emergency"
52
-
53
-
54
- class ProcessState(Enum):
55
- """Process lifecycle states."""
56
- NOT_STARTED = "not_started"
57
- STARTING = "starting"
58
- RUNNING = "running"
59
- STOPPING = "stopping"
60
- STOPPED = "stopped"
61
- RESTARTING = "restarting"
62
- FAILED = "failed"
63
-
64
-
65
- @dataclass
66
- class RestartAttempt:
67
- """Record of a restart attempt."""
68
- timestamp: float
69
- reason: str
70
- memory_mb: float
71
- success: bool
72
-
73
- def to_dict(self) -> Dict[str, Any]:
74
- """Convert to dictionary."""
75
- return {
76
- 'timestamp': self.timestamp,
77
- 'timestamp_iso': datetime.fromtimestamp(self.timestamp).isoformat(),
78
- 'reason': self.reason,
79
- 'memory_mb': self.memory_mb,
80
- 'success': self.success
81
- }
82
-
83
-
84
- @dataclass
85
- class MemoryStats:
86
- """Memory usage statistics."""
87
- current_mb: float = 0.0
88
- peak_mb: float = 0.0
89
- average_mb: float = 0.0
90
- samples: int = 0
91
- last_check: float = 0.0
92
- state: MemoryState = MemoryState.NORMAL
93
-
94
- def update(self, memory_mb: float) -> None:
95
- """Update statistics with new memory reading."""
96
- self.current_mb = memory_mb
97
- self.peak_mb = max(self.peak_mb, memory_mb)
98
-
99
- # Update running average
100
- if self.samples == 0:
101
- self.average_mb = memory_mb
102
- else:
103
- self.average_mb = ((self.average_mb * self.samples) + memory_mb) / (self.samples + 1)
104
-
105
- self.samples += 1
106
- self.last_check = time.time()
107
-
108
- def to_dict(self) -> Dict[str, Any]:
109
- """Convert to dictionary."""
110
- return {
111
- 'current_mb': round(self.current_mb, 2),
112
- 'peak_mb': round(self.peak_mb, 2),
113
- 'average_mb': round(self.average_mb, 2),
114
- 'samples': self.samples,
115
- 'last_check': self.last_check,
116
- 'last_check_iso': datetime.fromtimestamp(self.last_check).isoformat() if self.last_check > 0 else None,
117
- 'state': self.state.value
118
- }
119
-
120
-
121
- class MemoryGuardian(BaseService):
122
- """Service for monitoring and managing subprocess memory usage."""
123
-
124
- def __init__(self, config: Optional[MemoryGuardianConfig] = None):
125
- """Initialize Memory Guardian service.
126
-
127
- Args:
128
- config: Configuration for memory monitoring and management
129
- """
130
- super().__init__("MemoryGuardian")
131
-
132
- # Configuration
133
- self.config = config or get_default_config()
134
-
135
- # Validate configuration
136
- issues = self.config.validate()
137
- if issues:
138
- for issue in issues:
139
- self.log_warning(f"Configuration issue: {issue}")
140
-
141
- # Process management
142
- self.process: Optional[subprocess.Popen] = None
143
- self.process_state = ProcessState.NOT_STARTED
144
- self.process_pid: Optional[int] = None
145
-
146
- # Memory monitoring
147
- self.memory_stats = MemoryStats()
148
- self.memory_state = MemoryState.NORMAL
149
-
150
- # Restart tracking
151
- self.restart_attempts: List[RestartAttempt] = []
152
- self.last_restart_time: float = 0.0
153
- self.consecutive_failures: int = 0
154
-
155
- # Monitoring tasks
156
- self.monitor_task: Optional[asyncio.Task] = None
157
- self.monitoring = False
158
-
159
- # State preservation hooks (for future implementation)
160
- self.state_save_hooks: List[Callable[[Dict[str, Any]], None]] = []
161
- self.state_restore_hooks: List[Callable[[Dict[str, Any]], None]] = []
162
-
163
- # State manager integration
164
- self.state_manager: Optional[StateManager] = None
165
-
166
- # Safety services integration
167
- self.restart_protection: Optional[RestartProtection] = None
168
- self.health_monitor: Optional[HealthMonitor] = None
169
- self.graceful_degradation: Optional[GracefulDegradation] = None
170
-
171
- # Statistics
172
- self.start_time = time.time()
173
- self.total_restarts = 0
174
- self.total_uptime = 0.0
175
-
176
- self.log_info(f"Memory Guardian initialized with thresholds: "
177
- f"Warning={self.config.thresholds.warning}MB, "
178
- f"Critical={self.config.thresholds.critical}MB, "
179
- f"Emergency={self.config.thresholds.emergency}MB")
180
-
181
- async def initialize(self) -> bool:
182
- """Initialize the Memory Guardian service.
183
-
184
- Returns:
185
- True if initialization successful
186
- """
187
- try:
188
- self.log_info("Initializing Memory Guardian service")
189
-
190
- # Initialize state manager
191
- self.state_manager = StateManager()
192
- await self.state_manager.initialize()
193
-
194
- # Initialize safety services
195
- await self._initialize_safety_services()
196
-
197
- # Load persisted state if available
198
- if self.config.persist_state and self.config.state_file:
199
- self._load_state()
200
-
201
- # Auto-start process if configured
202
- if self.config.auto_start and self.config.enabled:
203
- self.log_info("Auto-starting monitored process")
204
- success = await self.start_process()
205
- if not success:
206
- self.log_warning("Failed to auto-start process")
207
-
208
- # Start monitoring if enabled
209
- if self.config.enabled:
210
- self.start_monitoring()
211
-
212
- self._initialized = True
213
- self.log_info("Memory Guardian service initialized successfully")
214
- return True
215
-
216
- except Exception as e:
217
- self.log_error(f"Failed to initialize Memory Guardian: {e}")
218
- return False
219
-
220
- async def shutdown(self) -> None:
221
- """Shutdown the Memory Guardian service gracefully."""
222
- try:
223
- self.log_info("Shutting down Memory Guardian service")
224
-
225
- # Stop monitoring
226
- await self.stop_monitoring()
227
-
228
- # Save state if configured
229
- if self.config.persist_state and self.config.state_file:
230
- self._save_state()
231
-
232
- # Shutdown state manager
233
- if self.state_manager:
234
- await self.state_manager.shutdown()
235
-
236
- # Shutdown safety services
237
- if self.restart_protection:
238
- await self.restart_protection.shutdown()
239
- if self.health_monitor:
240
- await self.health_monitor.shutdown()
241
- if self.graceful_degradation:
242
- await self.graceful_degradation.shutdown()
243
-
244
- # Terminate process if running
245
- if self.process and self.process_state == ProcessState.RUNNING:
246
- await self.terminate_process()
247
-
248
- self._shutdown = True
249
- self.log_info("Memory Guardian service shutdown complete")
250
-
251
- except Exception as e:
252
- self.log_error(f"Error during Memory Guardian shutdown: {e}")
253
-
254
- async def start_process(self) -> bool:
255
- """Start the monitored subprocess.
256
-
257
- Returns:
258
- True if process started successfully
259
- """
260
- if self.process and self.process_state == ProcessState.RUNNING:
261
- self.log_warning("Process is already running")
262
- return True
263
-
264
- try:
265
- self.log_info(f"Starting process: {' '.join(self.config.process_command)}")
266
- self.process_state = ProcessState.STARTING
267
-
268
- # Prepare environment
269
- env = os.environ.copy()
270
- env.update(self.config.process_env)
271
-
272
- # Build command
273
- cmd = self.config.process_command + self.config.process_args
274
-
275
- # Start subprocess
276
- self.process = subprocess.Popen(
277
- cmd,
278
- env=env,
279
- cwd=self.config.working_directory,
280
- stdout=subprocess.PIPE,
281
- stderr=subprocess.PIPE,
282
- start_new_session=True # Create new process group for clean termination
283
- )
284
-
285
- self.process_pid = self.process.pid
286
- self.process_state = ProcessState.RUNNING
287
-
288
- # Update health monitor with new process
289
- if self.health_monitor:
290
- self.health_monitor.set_monitored_process(self.process_pid)
291
-
292
- # Reset failure counter on successful start
293
- self.consecutive_failures = 0
294
-
295
- self.log_info(f"Process started successfully with PID {self.process_pid}")
296
-
297
- # Give process time to initialize
298
- await asyncio.sleep(2)
299
-
300
- # Check if process is still running
301
- if self.process.poll() is not None:
302
- self.log_error(f"Process exited immediately with code {self.process.returncode}")
303
- self.process_state = ProcessState.FAILED
304
- return False
305
-
306
- return True
307
-
308
- except FileNotFoundError:
309
- self.log_error(f"Command not found: {self.config.process_command[0]}")
310
- self.process_state = ProcessState.FAILED
311
- return False
312
- except Exception as e:
313
- self.log_error(f"Failed to start process: {e}")
314
- self.process_state = ProcessState.FAILED
315
- return False
316
-
317
- async def restart_process(self, reason: str = "Manual restart") -> bool:
318
- """Restart the monitored process with cooldown and retry logic.
319
-
320
- Args:
321
- reason: Reason for restart
322
-
323
- Returns:
324
- True if restart successful
325
- """
326
- self.log_info(f"Initiating process restart: {reason}")
327
-
328
- # Use restart protection if available
329
- if self.restart_protection:
330
- allowed, protection_reason = self.restart_protection.should_allow_restart(
331
- self.memory_stats.current_mb
332
- )
333
- if not allowed:
334
- self.log_error(f"Restart blocked by protection: {protection_reason}")
335
-
336
- # Trigger graceful degradation if available
337
- if self.graceful_degradation:
338
- await self.graceful_degradation.disable_feature(
339
- "automated_monitoring",
340
- f"Restart protection triggered: {protection_reason}"
341
- )
342
-
343
- self.process_state = ProcessState.FAILED
344
- return False
345
-
346
- # Get backoff from restart protection
347
- backoff = self.restart_protection.get_backoff_seconds(
348
- self.restart_protection.statistics.consecutive_failures + 1
349
- )
350
- if backoff > 0:
351
- self.log_info(f"Applying restart backoff of {backoff:.1f} seconds")
352
- await asyncio.sleep(backoff)
353
- else:
354
- # Fallback to original logic
355
- if not self._can_restart():
356
- self.log_error("Maximum restart attempts exceeded")
357
- self.process_state = ProcessState.FAILED
358
- return False
359
-
360
- # Apply cooldown if needed
361
- cooldown = self._get_restart_cooldown()
362
- if cooldown > 0:
363
- self.log_info(f"Applying restart cooldown of {cooldown} seconds")
364
- await asyncio.sleep(cooldown)
365
-
366
- # Record restart attempt
367
- memory_mb = self.memory_stats.current_mb
368
- self.process_state = ProcessState.RESTARTING
369
-
370
- # Save state before restart using StateManager
371
- if self.state_manager:
372
- state = await self.state_manager.capture_state(restart_reason=reason)
373
- if state:
374
- await self.state_manager.persist_state(state)
375
- else:
376
- # Fallback to hook-based preservation
377
- await self._trigger_state_save()
378
-
379
- # Terminate existing process
380
- if self.process:
381
- await self.terminate_process()
382
-
383
- # Start new process
384
- success = await self.start_process()
385
-
386
- # Record attempt
387
- attempt = RestartAttempt(
388
- timestamp=time.time(),
389
- reason=reason,
390
- memory_mb=memory_mb,
391
- success=success
392
- )
393
- self.restart_attempts.append(attempt)
394
-
395
- # Record in restart protection service
396
- if self.restart_protection:
397
- backoff_applied = self.restart_protection.get_backoff_seconds(
398
- self.restart_protection.statistics.consecutive_failures + 1
399
- ) if not success else 0
400
- self.restart_protection.record_restart(
401
- reason=reason,
402
- memory_mb=memory_mb,
403
- success=success,
404
- backoff_applied=backoff_applied
405
- )
406
-
407
- if success:
408
- self.total_restarts += 1
409
- self.last_restart_time = time.time()
410
- self.log_info("Process restarted successfully")
411
-
412
- # Restore state after restart using StateManager
413
- if self.state_manager:
414
- await self.state_manager.restore_state()
415
- else:
416
- # Fallback to hook-based restoration
417
- await self._trigger_state_restore()
418
- else:
419
- self.consecutive_failures += 1
420
- self.log_error("Process restart failed")
421
-
422
- return success
423
-
424
- async def terminate_process(self, timeout: Optional[int] = None) -> bool:
425
- """Terminate the monitored process gracefully with escalation.
426
-
427
- Args:
428
- timeout: Override timeout for graceful shutdown
429
-
430
- Returns:
431
- True if process terminated successfully
432
- """
433
- if not self.process:
434
- return True
435
-
436
- timeout = timeout or self.config.restart_policy.graceful_timeout
437
-
438
- try:
439
- self.log_info(f"Terminating process {self.process_pid}")
440
- self.process_state = ProcessState.STOPPING
441
-
442
- # Try graceful termination first (SIGTERM)
443
- if platform.system() != 'Windows':
444
- self.process.terminate()
445
- else:
446
- # On Windows, terminate() is already forceful
447
- self.process.terminate()
448
-
449
- # Wait for graceful shutdown
450
- try:
451
- self.log_debug(f"Waiting {timeout}s for graceful shutdown")
452
- await asyncio.wait_for(
453
- asyncio.create_task(self._wait_for_process()),
454
- timeout=timeout
455
- )
456
- self.log_info("Process terminated gracefully")
457
-
458
- except asyncio.TimeoutError:
459
- # Escalate to SIGKILL
460
- self.log_warning("Graceful shutdown timeout, forcing termination")
461
-
462
- if platform.system() != 'Windows':
463
- self.process.kill()
464
- else:
465
- # On Windows, use taskkill /F
466
- subprocess.run(
467
- ['taskkill', '/F', '/PID', str(self.process_pid)],
468
- capture_output=True
469
- )
470
-
471
- # Wait for forced termination
472
- try:
473
- await asyncio.wait_for(
474
- asyncio.create_task(self._wait_for_process()),
475
- timeout=self.config.restart_policy.force_kill_timeout
476
- )
477
- self.log_info("Process terminated forcefully")
478
- except asyncio.TimeoutError:
479
- self.log_error("Failed to terminate process")
480
- return False
481
-
482
- self.process = None
483
- self.process_pid = None
484
- self.process_state = ProcessState.STOPPED
485
- return True
486
-
487
- except Exception as e:
488
- self.log_error(f"Error terminating process: {e}")
489
- return False
490
-
491
- async def _wait_for_process(self) -> None:
492
- """Wait for process to exit."""
493
- while self.process and self.process.poll() is None:
494
- await asyncio.sleep(0.1)
495
-
496
- def get_memory_usage(self) -> Optional[float]:
497
- """Get current memory usage of monitored process in MB.
498
-
499
- Returns:
500
- Memory usage in MB or None if unable to determine
501
- """
502
- if not self.process or self.process_state != ProcessState.RUNNING:
503
- return None
504
-
505
- try:
506
- # Get memory info using platform utilities
507
- mem_info = get_process_memory(self.process_pid)
508
- if mem_info:
509
- return mem_info.rss_mb
510
-
511
- self.log_warning(f"Unable to get memory info for PID {self.process_pid}")
512
- return None
513
-
514
- except Exception as e:
515
- self.log_error(f"Error getting memory usage: {e}")
516
- return None
517
-
518
- async def monitor_memory(self) -> None:
519
- """Check memory usage and take action if thresholds exceeded."""
520
- if not self.process or self.process_state != ProcessState.RUNNING:
521
- return
522
-
523
- # Check if process is still alive
524
- if self.process.poll() is not None:
525
- self.log_warning(f"Process exited with code {self.process.returncode}")
526
- self.process_state = ProcessState.STOPPED
527
- self.process = None
528
- self.process_pid = None
529
-
530
- # Auto-restart if configured
531
- if self.config.auto_start:
532
- await self.restart_process("Process exited unexpectedly")
533
- return
534
-
535
- # Get memory usage
536
- memory_mb = self.get_memory_usage()
537
- if memory_mb is None:
538
- return
539
-
540
- # Update statistics
541
- self.memory_stats.update(memory_mb)
542
-
543
- # Record memory sample for trend analysis
544
- if self.restart_protection:
545
- self.restart_protection.record_memory_sample(memory_mb)
546
-
547
- # Determine memory state
548
- old_state = self.memory_state
549
-
550
- if memory_mb >= self.config.thresholds.emergency:
551
- self.memory_state = MemoryState.EMERGENCY
552
- elif memory_mb >= self.config.thresholds.critical:
553
- self.memory_state = MemoryState.CRITICAL
554
- elif memory_mb >= self.config.thresholds.warning:
555
- self.memory_state = MemoryState.WARNING
556
- else:
557
- self.memory_state = MemoryState.NORMAL
558
-
559
- self.memory_stats.state = self.memory_state
560
-
561
- # Log state changes
562
- if self.memory_state != old_state:
563
- self.log_info(f"Memory state changed: {old_state.value} -> {self.memory_state.value} "
564
- f"(current: {memory_mb:.2f}MB)")
565
-
566
- # Take action based on state
567
- if self.memory_state == MemoryState.EMERGENCY:
568
- self.log_critical(f"Emergency memory threshold exceeded: {memory_mb:.2f}MB")
569
- await self.restart_process(f"Emergency memory threshold exceeded ({memory_mb:.2f}MB)")
570
-
571
- elif self.memory_state == MemoryState.CRITICAL:
572
- self.log_warning(f"Critical memory threshold exceeded: {memory_mb:.2f}MB")
573
- # Check if we've been in critical state for too long
574
- if self._should_restart_for_critical():
575
- await self.restart_process(f"Sustained critical memory usage ({memory_mb:.2f}MB)")
576
-
577
- elif self.memory_state == MemoryState.WARNING:
578
- self.log_debug(f"Warning memory threshold exceeded: {memory_mb:.2f}MB")
579
-
580
- # Log periodic summary
581
- if self.config.monitoring.log_memory_stats:
582
- if time.time() - self.memory_stats.last_check > self.config.monitoring.log_interval:
583
- self._log_memory_summary()
584
-
585
- def start_monitoring(self) -> None:
586
- """Start continuous memory monitoring."""
587
- if self.monitoring:
588
- self.log_warning("Monitoring is already active")
589
- return
590
-
591
- self.monitoring = True
592
- self.monitor_task = asyncio.create_task(self._monitoring_loop())
593
- self.log_info("Started memory monitoring")
594
-
595
- async def stop_monitoring(self) -> None:
596
- """Stop continuous memory monitoring."""
597
- if not self.monitoring:
598
- return
599
-
600
- self.monitoring = False
601
-
602
- if self.monitor_task:
603
- self.monitor_task.cancel()
604
- try:
605
- await self.monitor_task
606
- except asyncio.CancelledError:
607
- pass
608
- self.monitor_task = None
609
-
610
- self.log_info("Stopped memory monitoring")
611
-
612
- async def _monitoring_loop(self) -> None:
613
- """Continuous monitoring loop."""
614
- try:
615
- while self.monitoring:
616
- try:
617
- await self.monitor_memory()
618
-
619
- # Get check interval based on current state
620
- interval = self.config.monitoring.get_check_interval(
621
- self.memory_state.value
622
- )
623
-
624
- await asyncio.sleep(interval)
625
-
626
- except Exception as e:
627
- self.log_error(f"Error in monitoring loop: {e}")
628
- await asyncio.sleep(5) # Brief pause before retry
629
-
630
- except asyncio.CancelledError:
631
- self.log_debug("Monitoring loop cancelled")
632
-
633
- def _can_restart(self) -> bool:
634
- """Check if restart is allowed based on policy.
635
-
636
- Returns:
637
- True if restart is allowed
638
- """
639
- # Check max attempts
640
- if self.config.restart_policy.max_attempts <= 0:
641
- return True # Unlimited restarts
642
-
643
- # Count recent attempts
644
- window_start = time.time() - self.config.restart_policy.attempt_window
645
- recent_attempts = [
646
- a for a in self.restart_attempts
647
- if a.timestamp >= window_start
648
- ]
649
-
650
- return len(recent_attempts) < self.config.restart_policy.max_attempts
651
-
652
- def _get_restart_cooldown(self) -> int:
653
- """Get cooldown period for next restart.
654
-
655
- Returns:
656
- Cooldown period in seconds
657
- """
658
- if not self.restart_attempts:
659
- return 0
660
-
661
- # Calculate based on consecutive failures
662
- return self.config.restart_policy.get_cooldown(self.consecutive_failures + 1)
663
-
664
- def _should_restart_for_critical(self) -> bool:
665
- """Determine if we should restart due to sustained critical memory.
666
-
667
- Returns:
668
- True if restart should be triggered
669
- """
670
- # Check how long we've been in critical state
671
- critical_duration = 60 # seconds
672
-
673
- # Look at recent memory samples
674
- recent_samples = [
675
- s for s in self.restart_attempts
676
- if s.timestamp >= time.time() - critical_duration
677
- ]
678
-
679
- # If we've been critical for the duration, restart
680
- # This is a simplified check - could be enhanced
681
- return self.memory_state == MemoryState.CRITICAL and len(recent_samples) == 0
682
-
683
- async def _trigger_state_save(self) -> None:
684
- """Trigger state preservation hooks."""
685
- if not self.state_save_hooks:
686
- return
687
-
688
- state = self.get_state()
689
-
690
- for hook in self.state_save_hooks:
691
- try:
692
- hook(state)
693
- except Exception as e:
694
- self.log_error(f"State save hook failed: {e}")
695
-
696
- async def _trigger_state_restore(self) -> None:
697
- """Trigger state restoration hooks."""
698
- if not self.state_restore_hooks:
699
- return
700
-
701
- state = self.get_state()
702
-
703
- for hook in self.state_restore_hooks:
704
- try:
705
- hook(state)
706
- except Exception as e:
707
- self.log_error(f"State restore hook failed: {e}")
708
-
709
- def _log_memory_summary(self) -> None:
710
- """Log memory usage summary."""
711
- uptime = time.time() - self.start_time
712
-
713
- self.log_info(
714
- f"Memory Summary - "
715
- f"Current: {self.memory_stats.current_mb:.2f}MB, "
716
- f"Peak: {self.memory_stats.peak_mb:.2f}MB, "
717
- f"Average: {self.memory_stats.average_mb:.2f}MB, "
718
- f"State: {self.memory_state.value}, "
719
- f"Restarts: {self.total_restarts}, "
720
- f"Uptime: {uptime/3600:.2f}h"
721
- )
722
-
723
- def _save_state(self) -> None:
724
- """Save service state to file."""
725
- if not self.config.state_file:
726
- return
727
-
728
- try:
729
- state = self.get_state()
730
- state_path = Path(self.config.state_file)
731
- state_path.parent.mkdir(parents=True, exist_ok=True)
732
-
733
- with open(state_path, 'w') as f:
734
- json.dump(state, f, indent=2)
735
-
736
- self.log_debug(f"Saved state to {state_path}")
737
-
738
- except Exception as e:
739
- self.log_error(f"Failed to save state: {e}")
740
-
741
- def _load_state(self) -> None:
742
- """Load service state from file."""
743
- if not self.config.state_file:
744
- return
745
-
746
- try:
747
- state_path = Path(self.config.state_file)
748
- if not state_path.exists():
749
- return
750
-
751
- with open(state_path, 'r') as f:
752
- state = json.load(f)
753
-
754
- # Restore relevant state
755
- self.total_restarts = state.get('total_restarts', 0)
756
- self.memory_stats.peak_mb = state.get('memory_stats', {}).get('peak_mb', 0.0)
757
-
758
- # Restore restart attempts
759
- attempts = state.get('restart_attempts', [])
760
- for attempt_data in attempts:
761
- attempt = RestartAttempt(
762
- timestamp=attempt_data['timestamp'],
763
- reason=attempt_data['reason'],
764
- memory_mb=attempt_data['memory_mb'],
765
- success=attempt_data['success']
766
- )
767
- self.restart_attempts.append(attempt)
768
-
769
- self.log_debug(f"Loaded state from {state_path}")
770
-
771
- except Exception as e:
772
- self.log_error(f"Failed to load state: {e}")
773
-
774
- def add_state_save_hook(self, hook: Callable[[Dict[str, Any]], None]) -> None:
775
- """Add a hook to be called before process restart.
776
-
777
- Args:
778
- hook: Function to call with current state
779
- """
780
- self.state_save_hooks.append(hook)
781
- self.log_debug(f"Added state save hook: {hook.__name__}")
782
-
783
- def add_state_restore_hook(self, hook: Callable[[Dict[str, Any]], None]) -> None:
784
- """Add a hook to be called after process restart.
785
-
786
- Args:
787
- hook: Function to call with saved state
788
- """
789
- self.state_restore_hooks.append(hook)
790
- self.log_debug(f"Added state restore hook: {hook.__name__}")
791
-
792
- def set_state_manager(self, state_manager: StateManager) -> None:
793
- """Set the state manager for state preservation.
794
-
795
- Args:
796
- state_manager: StateManager instance to use
797
- """
798
- self.state_manager = state_manager
799
- self.log_info("State manager configured for Memory Guardian")
800
-
801
- def get_state(self) -> Dict[str, Any]:
802
- """Get current service state.
803
-
804
- Returns:
805
- Dictionary containing service state
806
- """
807
- return {
808
- 'process_state': self.process_state.value,
809
- 'process_pid': self.process_pid,
810
- 'memory_state': self.memory_state.value,
811
- 'memory_stats': self.memory_stats.to_dict(),
812
- 'total_restarts': self.total_restarts,
813
- 'consecutive_failures': self.consecutive_failures,
814
- 'restart_attempts': [a.to_dict() for a in self.restart_attempts[-10:]], # Last 10
815
- 'config': self.config.to_dict(),
816
- 'start_time': self.start_time,
817
- 'monitoring': self.monitoring
818
- }
819
-
820
- async def _initialize_safety_services(self) -> None:
821
- """Initialize safety and protection services."""
822
- try:
823
- # Initialize restart protection
824
- self.restart_protection = RestartProtection(
825
- max_restarts_per_hour=5,
826
- max_consecutive_failures=3,
827
- base_backoff_seconds=1,
828
- max_backoff_seconds=60,
829
- state_file=Path(self.config.state_file).parent / "restart_protection.json" if self.config.state_file else None
830
- )
831
- await self.restart_protection.initialize()
832
- self.log_info("Restart protection service initialized")
833
-
834
- # Initialize health monitor
835
- self.health_monitor = HealthMonitor(
836
- cpu_threshold_percent=80,
837
- memory_threshold_percent=90,
838
- disk_threshold_percent=90,
839
- min_disk_space_gb=1.0,
840
- check_interval_seconds=30
841
- )
842
- await self.health_monitor.initialize()
843
- self.log_info("Health monitor service initialized")
844
-
845
- # Initialize graceful degradation
846
- self.graceful_degradation = GracefulDegradation(
847
- enable_notifications=True,
848
- log_degradation_events=True,
849
- state_file=Path(self.config.state_file).parent / "degradation.json" if self.config.state_file else None
850
- )
851
- await self.graceful_degradation.initialize()
852
- self.log_info("Graceful degradation service initialized")
853
-
854
- # Check initial health
855
- valid, message = await self.health_monitor.validate_before_start()
856
- if not valid:
857
- self.log_warning(f"System health check warning: {message}")
858
- await self.graceful_degradation.degrade_feature(
859
- "automated_monitoring",
860
- message,
861
- "reduced monitoring frequency"
862
- )
863
-
864
- except Exception as e:
865
- self.log_error(f"Failed to initialize safety services: {e}")
866
- # Continue without safety services - graceful degradation
867
- self.log_warning("Continuing without safety services")
868
-
869
- def set_restart_protection(self, restart_protection: RestartProtection) -> None:
870
- """Set the restart protection service.
871
-
872
- Args:
873
- restart_protection: RestartProtection instance to use
874
- """
875
- self.restart_protection = restart_protection
876
- self.log_info("Restart protection configured for Memory Guardian")
877
-
878
- def set_health_monitor(self, health_monitor: HealthMonitor) -> None:
879
- """Set the health monitor service.
880
-
881
- Args:
882
- health_monitor: HealthMonitor instance to use
883
- """
884
- self.health_monitor = health_monitor
885
- if self.process_pid:
886
- self.health_monitor.set_monitored_process(self.process_pid)
887
- self.log_info("Health monitor configured for Memory Guardian")
888
-
889
- def set_graceful_degradation(self, graceful_degradation: GracefulDegradation) -> None:
890
- """Set the graceful degradation service.
891
-
892
- Args:
893
- graceful_degradation: GracefulDegradation instance to use
894
- """
895
- self.graceful_degradation = graceful_degradation
896
- self.log_info("Graceful degradation configured for Memory Guardian")
897
-
898
- def get_status(self) -> Dict[str, Any]:
899
- """Get current service status.
900
-
901
- Returns:
902
- Dictionary containing service status
903
- """
904
- uptime = time.time() - self.start_time if self.process else 0
905
-
906
- # Get system memory info
907
- total_mem, available_mem = get_system_memory()
908
-
909
- return {
910
- 'enabled': self.config.enabled,
911
- 'process': {
912
- 'state': self.process_state.value,
913
- 'pid': self.process_pid,
914
- 'uptime_seconds': uptime,
915
- 'uptime_hours': uptime / 3600 if uptime > 0 else 0
916
- },
917
- 'memory': {
918
- 'current_mb': self.memory_stats.current_mb,
919
- 'peak_mb': self.memory_stats.peak_mb,
920
- 'average_mb': self.memory_stats.average_mb,
921
- 'state': self.memory_state.value,
922
- 'thresholds': {
923
- 'warning_mb': self.config.thresholds.warning,
924
- 'critical_mb': self.config.thresholds.critical,
925
- 'emergency_mb': self.config.thresholds.emergency
926
- },
927
- 'system': {
928
- 'total_mb': total_mem / (1024 * 1024) if total_mem > 0 else 0,
929
- 'available_mb': available_mem / (1024 * 1024) if available_mem > 0 else 0,
930
- 'pressure': check_memory_pressure()
931
- }
932
- },
933
- 'restarts': {
934
- 'total': self.total_restarts,
935
- 'consecutive_failures': self.consecutive_failures,
936
- 'can_restart': self._can_restart(),
937
- 'recent_attempts': [a.to_dict() for a in self.restart_attempts[-5:]]
938
- },
939
- 'monitoring': {
940
- 'active': self.monitoring,
941
- 'check_interval': self.config.monitoring.get_check_interval(self.memory_state.value),
942
- 'samples': self.memory_stats.samples
943
- }
944
- }