claude-mpm 3.9.11__py3-none-any.whl → 4.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- claude_mpm/VERSION +1 -1
- claude_mpm/__init__.py +2 -2
- claude_mpm/__main__.py +3 -2
- claude_mpm/agents/__init__.py +85 -79
- claude_mpm/agents/agent_loader.py +464 -1003
- claude_mpm/agents/agent_loader_integration.py +45 -45
- claude_mpm/agents/agents_metadata.py +29 -30
- claude_mpm/agents/async_agent_loader.py +156 -138
- claude_mpm/agents/base_agent.json +1 -1
- claude_mpm/agents/base_agent_loader.py +179 -151
- claude_mpm/agents/frontmatter_validator.py +229 -130
- claude_mpm/agents/schema/agent_schema.json +1 -1
- claude_mpm/agents/system_agent_config.py +213 -147
- claude_mpm/agents/templates/__init__.py +13 -13
- claude_mpm/agents/templates/code_analyzer.json +2 -2
- claude_mpm/agents/templates/data_engineer.json +1 -1
- claude_mpm/agents/templates/documentation.json +23 -11
- claude_mpm/agents/templates/engineer.json +22 -6
- claude_mpm/agents/templates/memory_manager.json +1 -1
- claude_mpm/agents/templates/ops.json +2 -2
- claude_mpm/agents/templates/project_organizer.json +1 -1
- claude_mpm/agents/templates/qa.json +1 -1
- claude_mpm/agents/templates/refactoring_engineer.json +222 -0
- claude_mpm/agents/templates/research.json +20 -14
- claude_mpm/agents/templates/security.json +1 -1
- claude_mpm/agents/templates/ticketing.json +1 -1
- claude_mpm/agents/templates/version_control.json +1 -1
- claude_mpm/agents/templates/web_qa.json +3 -1
- claude_mpm/agents/templates/web_ui.json +2 -2
- claude_mpm/cli/__init__.py +79 -51
- claude_mpm/cli/__main__.py +3 -2
- claude_mpm/cli/commands/__init__.py +20 -20
- claude_mpm/cli/commands/agents.py +279 -247
- claude_mpm/cli/commands/aggregate.py +138 -157
- claude_mpm/cli/commands/cleanup.py +147 -147
- claude_mpm/cli/commands/config.py +93 -76
- claude_mpm/cli/commands/info.py +17 -16
- claude_mpm/cli/commands/mcp.py +140 -905
- claude_mpm/cli/commands/mcp_command_router.py +139 -0
- claude_mpm/cli/commands/mcp_config_commands.py +20 -0
- claude_mpm/cli/commands/mcp_install_commands.py +20 -0
- claude_mpm/cli/commands/mcp_server_commands.py +175 -0
- claude_mpm/cli/commands/mcp_tool_commands.py +34 -0
- claude_mpm/cli/commands/memory.py +239 -203
- claude_mpm/cli/commands/monitor.py +203 -81
- claude_mpm/cli/commands/run.py +380 -429
- claude_mpm/cli/commands/run_config_checker.py +160 -0
- claude_mpm/cli/commands/socketio_monitor.py +235 -0
- claude_mpm/cli/commands/tickets.py +305 -197
- claude_mpm/cli/parser.py +24 -1156
- claude_mpm/cli/parsers/__init__.py +29 -0
- claude_mpm/cli/parsers/agents_parser.py +136 -0
- claude_mpm/cli/parsers/base_parser.py +331 -0
- claude_mpm/cli/parsers/config_parser.py +85 -0
- claude_mpm/cli/parsers/mcp_parser.py +152 -0
- claude_mpm/cli/parsers/memory_parser.py +138 -0
- claude_mpm/cli/parsers/monitor_parser.py +104 -0
- claude_mpm/cli/parsers/run_parser.py +147 -0
- claude_mpm/cli/parsers/tickets_parser.py +203 -0
- claude_mpm/cli/ticket_cli.py +7 -3
- claude_mpm/cli/utils.py +55 -37
- claude_mpm/cli_module/__init__.py +6 -6
- claude_mpm/cli_module/args.py +188 -140
- claude_mpm/cli_module/commands.py +79 -70
- claude_mpm/cli_module/migration_example.py +38 -60
- claude_mpm/config/__init__.py +32 -25
- claude_mpm/config/agent_config.py +151 -119
- claude_mpm/config/experimental_features.py +71 -73
- claude_mpm/config/paths.py +94 -208
- claude_mpm/config/socketio_config.py +84 -73
- claude_mpm/constants.py +35 -18
- claude_mpm/core/__init__.py +9 -6
- claude_mpm/core/agent_name_normalizer.py +68 -71
- claude_mpm/core/agent_registry.py +372 -521
- claude_mpm/core/agent_session_manager.py +74 -63
- claude_mpm/core/base_service.py +116 -87
- claude_mpm/core/cache.py +119 -153
- claude_mpm/core/claude_runner.py +425 -1120
- claude_mpm/core/config.py +263 -168
- claude_mpm/core/config_aliases.py +69 -61
- claude_mpm/core/config_constants.py +292 -0
- claude_mpm/core/constants.py +57 -99
- claude_mpm/core/container.py +211 -178
- claude_mpm/core/exceptions.py +233 -89
- claude_mpm/core/factories.py +92 -54
- claude_mpm/core/framework_loader.py +378 -220
- claude_mpm/core/hook_manager.py +198 -83
- claude_mpm/core/hook_performance_config.py +136 -0
- claude_mpm/core/injectable_service.py +61 -55
- claude_mpm/core/interactive_session.py +165 -155
- claude_mpm/core/interfaces.py +221 -195
- claude_mpm/core/lazy.py +96 -96
- claude_mpm/core/logger.py +133 -107
- claude_mpm/core/logging_config.py +185 -157
- claude_mpm/core/minimal_framework_loader.py +20 -15
- claude_mpm/core/mixins.py +30 -29
- claude_mpm/core/oneshot_session.py +215 -181
- claude_mpm/core/optimized_agent_loader.py +134 -138
- claude_mpm/core/optimized_startup.py +159 -157
- claude_mpm/core/pm_hook_interceptor.py +85 -72
- claude_mpm/core/service_registry.py +103 -101
- claude_mpm/core/session_manager.py +97 -87
- claude_mpm/core/socketio_pool.py +212 -158
- claude_mpm/core/tool_access_control.py +58 -51
- claude_mpm/core/types.py +46 -24
- claude_mpm/core/typing_utils.py +166 -82
- claude_mpm/core/unified_agent_registry.py +721 -0
- claude_mpm/core/unified_config.py +550 -0
- claude_mpm/core/unified_paths.py +549 -0
- claude_mpm/dashboard/index.html +1 -1
- claude_mpm/dashboard/open_dashboard.py +51 -17
- claude_mpm/dashboard/static/css/dashboard.css +27 -8
- claude_mpm/dashboard/static/dist/components/agent-inference.js +2 -0
- claude_mpm/dashboard/static/dist/components/event-processor.js +2 -0
- claude_mpm/dashboard/static/dist/components/event-viewer.js +2 -0
- claude_mpm/dashboard/static/dist/components/export-manager.js +2 -0
- claude_mpm/dashboard/static/dist/components/file-tool-tracker.js +2 -0
- claude_mpm/dashboard/static/dist/components/hud-library-loader.js +2 -0
- claude_mpm/dashboard/static/dist/components/hud-manager.js +2 -0
- claude_mpm/dashboard/static/dist/components/hud-visualizer.js +2 -0
- claude_mpm/dashboard/static/dist/components/module-viewer.js +2 -0
- claude_mpm/dashboard/static/dist/components/session-manager.js +2 -0
- claude_mpm/dashboard/static/dist/components/socket-manager.js +2 -0
- claude_mpm/dashboard/static/dist/components/ui-state-manager.js +2 -0
- claude_mpm/dashboard/static/dist/components/working-directory.js +2 -0
- claude_mpm/dashboard/static/dist/dashboard.js +2 -0
- claude_mpm/dashboard/static/dist/socket-client.js +2 -0
- claude_mpm/dashboard/static/js/components/agent-inference.js +80 -76
- claude_mpm/dashboard/static/js/components/event-processor.js +71 -67
- claude_mpm/dashboard/static/js/components/event-viewer.js +74 -70
- claude_mpm/dashboard/static/js/components/export-manager.js +31 -28
- claude_mpm/dashboard/static/js/components/file-tool-tracker.js +106 -92
- claude_mpm/dashboard/static/js/components/hud-library-loader.js +11 -11
- claude_mpm/dashboard/static/js/components/hud-manager.js +73 -73
- claude_mpm/dashboard/static/js/components/hud-visualizer.js +163 -163
- claude_mpm/dashboard/static/js/components/module-viewer.js +305 -233
- claude_mpm/dashboard/static/js/components/session-manager.js +32 -29
- claude_mpm/dashboard/static/js/components/socket-manager.js +27 -20
- claude_mpm/dashboard/static/js/components/ui-state-manager.js +21 -18
- claude_mpm/dashboard/static/js/components/working-directory.js +74 -71
- claude_mpm/dashboard/static/js/dashboard.js +178 -453
- claude_mpm/dashboard/static/js/extension-error-handler.js +164 -0
- claude_mpm/dashboard/static/js/socket-client.js +120 -54
- claude_mpm/dashboard/templates/index.html +40 -50
- claude_mpm/experimental/cli_enhancements.py +60 -58
- claude_mpm/generators/__init__.py +1 -1
- claude_mpm/generators/agent_profile_generator.py +75 -65
- claude_mpm/hooks/__init__.py +1 -1
- claude_mpm/hooks/base_hook.py +33 -28
- claude_mpm/hooks/claude_hooks/__init__.py +1 -1
- claude_mpm/hooks/claude_hooks/connection_pool.py +120 -0
- claude_mpm/hooks/claude_hooks/event_handlers.py +743 -0
- claude_mpm/hooks/claude_hooks/hook_handler.py +415 -1331
- claude_mpm/hooks/claude_hooks/hook_wrapper.sh +4 -4
- claude_mpm/hooks/claude_hooks/memory_integration.py +221 -0
- claude_mpm/hooks/claude_hooks/response_tracking.py +348 -0
- claude_mpm/hooks/claude_hooks/tool_analysis.py +230 -0
- claude_mpm/hooks/memory_integration_hook.py +140 -100
- claude_mpm/hooks/tool_call_interceptor.py +89 -76
- claude_mpm/hooks/validation_hooks.py +57 -49
- claude_mpm/init.py +145 -121
- claude_mpm/models/__init__.py +9 -9
- claude_mpm/models/agent_definition.py +33 -23
- claude_mpm/models/agent_session.py +228 -200
- claude_mpm/scripts/__init__.py +1 -1
- claude_mpm/scripts/socketio_daemon.py +192 -75
- claude_mpm/scripts/socketio_server_manager.py +328 -0
- claude_mpm/scripts/start_activity_logging.py +25 -22
- claude_mpm/services/__init__.py +68 -43
- claude_mpm/services/agent_capabilities_service.py +271 -0
- claude_mpm/services/agents/__init__.py +23 -32
- claude_mpm/services/agents/deployment/__init__.py +3 -3
- claude_mpm/services/agents/deployment/agent_config_provider.py +310 -0
- claude_mpm/services/agents/deployment/agent_configuration_manager.py +359 -0
- claude_mpm/services/agents/deployment/agent_definition_factory.py +84 -0
- claude_mpm/services/agents/deployment/agent_deployment.py +415 -2113
- claude_mpm/services/agents/deployment/agent_discovery_service.py +387 -0
- claude_mpm/services/agents/deployment/agent_environment_manager.py +293 -0
- claude_mpm/services/agents/deployment/agent_filesystem_manager.py +387 -0
- claude_mpm/services/agents/deployment/agent_format_converter.py +453 -0
- claude_mpm/services/agents/deployment/agent_frontmatter_validator.py +161 -0
- claude_mpm/services/agents/deployment/agent_lifecycle_manager.py +345 -495
- claude_mpm/services/agents/deployment/agent_metrics_collector.py +279 -0
- claude_mpm/services/agents/deployment/agent_restore_handler.py +88 -0
- claude_mpm/services/agents/deployment/agent_template_builder.py +406 -0
- claude_mpm/services/agents/deployment/agent_validator.py +352 -0
- claude_mpm/services/agents/deployment/agent_version_manager.py +313 -0
- claude_mpm/services/agents/deployment/agent_versioning.py +6 -9
- claude_mpm/services/agents/deployment/agents_directory_resolver.py +79 -0
- claude_mpm/services/agents/deployment/async_agent_deployment.py +298 -234
- claude_mpm/services/agents/deployment/config/__init__.py +13 -0
- claude_mpm/services/agents/deployment/config/deployment_config.py +182 -0
- claude_mpm/services/agents/deployment/config/deployment_config_manager.py +200 -0
- claude_mpm/services/agents/deployment/deployment_config_loader.py +54 -0
- claude_mpm/services/agents/deployment/deployment_type_detector.py +124 -0
- claude_mpm/services/agents/deployment/facade/__init__.py +18 -0
- claude_mpm/services/agents/deployment/facade/async_deployment_executor.py +159 -0
- claude_mpm/services/agents/deployment/facade/deployment_executor.py +73 -0
- claude_mpm/services/agents/deployment/facade/deployment_facade.py +270 -0
- claude_mpm/services/agents/deployment/facade/sync_deployment_executor.py +178 -0
- claude_mpm/services/agents/deployment/interface_adapter.py +227 -0
- claude_mpm/services/agents/deployment/lifecycle_health_checker.py +85 -0
- claude_mpm/services/agents/deployment/lifecycle_performance_tracker.py +100 -0
- claude_mpm/services/agents/deployment/pipeline/__init__.py +32 -0
- claude_mpm/services/agents/deployment/pipeline/pipeline_builder.py +158 -0
- claude_mpm/services/agents/deployment/pipeline/pipeline_context.py +159 -0
- claude_mpm/services/agents/deployment/pipeline/pipeline_executor.py +169 -0
- claude_mpm/services/agents/deployment/pipeline/steps/__init__.py +19 -0
- claude_mpm/services/agents/deployment/pipeline/steps/agent_processing_step.py +195 -0
- claude_mpm/services/agents/deployment/pipeline/steps/base_step.py +119 -0
- claude_mpm/services/agents/deployment/pipeline/steps/configuration_step.py +79 -0
- claude_mpm/services/agents/deployment/pipeline/steps/target_directory_step.py +90 -0
- claude_mpm/services/agents/deployment/pipeline/steps/validation_step.py +100 -0
- claude_mpm/services/agents/deployment/processors/__init__.py +15 -0
- claude_mpm/services/agents/deployment/processors/agent_deployment_context.py +98 -0
- claude_mpm/services/agents/deployment/processors/agent_deployment_result.py +235 -0
- claude_mpm/services/agents/deployment/processors/agent_processor.py +258 -0
- claude_mpm/services/agents/deployment/refactored_agent_deployment_service.py +318 -0
- claude_mpm/services/agents/deployment/results/__init__.py +13 -0
- claude_mpm/services/agents/deployment/results/deployment_metrics.py +200 -0
- claude_mpm/services/agents/deployment/results/deployment_result_builder.py +249 -0
- claude_mpm/services/agents/deployment/strategies/__init__.py +25 -0
- claude_mpm/services/agents/deployment/strategies/base_strategy.py +119 -0
- claude_mpm/services/agents/deployment/strategies/project_strategy.py +150 -0
- claude_mpm/services/agents/deployment/strategies/strategy_selector.py +117 -0
- claude_mpm/services/agents/deployment/strategies/system_strategy.py +116 -0
- claude_mpm/services/agents/deployment/strategies/user_strategy.py +137 -0
- claude_mpm/services/agents/deployment/system_instructions_deployer.py +108 -0
- claude_mpm/services/agents/deployment/validation/__init__.py +19 -0
- claude_mpm/services/agents/deployment/validation/agent_validator.py +323 -0
- claude_mpm/services/agents/deployment/validation/deployment_validator.py +238 -0
- claude_mpm/services/agents/deployment/validation/template_validator.py +299 -0
- claude_mpm/services/agents/deployment/validation/validation_result.py +226 -0
- claude_mpm/services/agents/loading/__init__.py +2 -2
- claude_mpm/services/agents/loading/agent_profile_loader.py +259 -229
- claude_mpm/services/agents/loading/base_agent_manager.py +90 -81
- claude_mpm/services/agents/loading/framework_agent_loader.py +154 -129
- claude_mpm/services/agents/management/__init__.py +2 -2
- claude_mpm/services/agents/management/agent_capabilities_generator.py +72 -58
- claude_mpm/services/agents/management/agent_management_service.py +209 -156
- claude_mpm/services/agents/memory/__init__.py +9 -6
- claude_mpm/services/agents/memory/agent_memory_manager.py +218 -1152
- claude_mpm/services/agents/memory/agent_persistence_service.py +20 -16
- claude_mpm/services/agents/memory/analyzer.py +430 -0
- claude_mpm/services/agents/memory/content_manager.py +376 -0
- claude_mpm/services/agents/memory/template_generator.py +468 -0
- claude_mpm/services/agents/registry/__init__.py +7 -10
- claude_mpm/services/agents/registry/deployed_agent_discovery.py +122 -97
- claude_mpm/services/agents/registry/modification_tracker.py +351 -285
- claude_mpm/services/async_session_logger.py +187 -153
- claude_mpm/services/claude_session_logger.py +87 -72
- claude_mpm/services/command_handler_service.py +217 -0
- claude_mpm/services/communication/__init__.py +3 -2
- claude_mpm/services/core/__init__.py +50 -97
- claude_mpm/services/core/base.py +60 -53
- claude_mpm/services/core/interfaces/__init__.py +188 -0
- claude_mpm/services/core/interfaces/agent.py +351 -0
- claude_mpm/services/core/interfaces/communication.py +343 -0
- claude_mpm/services/core/interfaces/infrastructure.py +413 -0
- claude_mpm/services/core/interfaces/service.py +434 -0
- claude_mpm/services/core/interfaces.py +19 -944
- claude_mpm/services/event_aggregator.py +208 -170
- claude_mpm/services/exceptions.py +387 -308
- claude_mpm/services/framework_claude_md_generator/__init__.py +75 -79
- claude_mpm/services/framework_claude_md_generator/content_assembler.py +69 -60
- claude_mpm/services/framework_claude_md_generator/content_validator.py +65 -61
- claude_mpm/services/framework_claude_md_generator/deployment_manager.py +68 -49
- claude_mpm/services/framework_claude_md_generator/section_generators/__init__.py +34 -34
- claude_mpm/services/framework_claude_md_generator/section_generators/agents.py +25 -22
- claude_mpm/services/framework_claude_md_generator/section_generators/claude_pm_init.py +10 -10
- claude_mpm/services/framework_claude_md_generator/section_generators/core_responsibilities.py +4 -3
- claude_mpm/services/framework_claude_md_generator/section_generators/delegation_constraints.py +4 -3
- claude_mpm/services/framework_claude_md_generator/section_generators/environment_config.py +4 -3
- claude_mpm/services/framework_claude_md_generator/section_generators/footer.py +6 -5
- claude_mpm/services/framework_claude_md_generator/section_generators/header.py +8 -7
- claude_mpm/services/framework_claude_md_generator/section_generators/orchestration_principles.py +4 -3
- claude_mpm/services/framework_claude_md_generator/section_generators/role_designation.py +6 -5
- claude_mpm/services/framework_claude_md_generator/section_generators/subprocess_validation.py +9 -8
- claude_mpm/services/framework_claude_md_generator/section_generators/todo_task_tools.py +4 -3
- claude_mpm/services/framework_claude_md_generator/section_generators/troubleshooting.py +5 -4
- claude_mpm/services/framework_claude_md_generator/section_manager.py +28 -27
- claude_mpm/services/framework_claude_md_generator/version_manager.py +30 -28
- claude_mpm/services/hook_service.py +106 -114
- claude_mpm/services/infrastructure/__init__.py +7 -5
- claude_mpm/services/infrastructure/context_preservation.py +233 -199
- claude_mpm/services/infrastructure/daemon_manager.py +279 -0
- claude_mpm/services/infrastructure/logging.py +83 -76
- claude_mpm/services/infrastructure/monitoring.py +547 -404
- claude_mpm/services/mcp_gateway/__init__.py +30 -13
- claude_mpm/services/mcp_gateway/config/__init__.py +2 -2
- claude_mpm/services/mcp_gateway/config/config_loader.py +61 -56
- claude_mpm/services/mcp_gateway/config/config_schema.py +50 -41
- claude_mpm/services/mcp_gateway/config/configuration.py +82 -75
- claude_mpm/services/mcp_gateway/core/__init__.py +13 -20
- claude_mpm/services/mcp_gateway/core/base.py +80 -67
- claude_mpm/services/mcp_gateway/core/exceptions.py +60 -46
- claude_mpm/services/mcp_gateway/core/interfaces.py +87 -84
- claude_mpm/services/mcp_gateway/main.py +287 -137
- claude_mpm/services/mcp_gateway/registry/__init__.py +1 -1
- claude_mpm/services/mcp_gateway/registry/service_registry.py +97 -94
- claude_mpm/services/mcp_gateway/registry/tool_registry.py +135 -126
- claude_mpm/services/mcp_gateway/server/__init__.py +2 -2
- claude_mpm/services/mcp_gateway/server/mcp_gateway.py +105 -110
- claude_mpm/services/mcp_gateway/server/stdio_handler.py +105 -107
- claude_mpm/services/mcp_gateway/server/stdio_server.py +691 -0
- claude_mpm/services/mcp_gateway/tools/__init__.py +4 -2
- claude_mpm/services/mcp_gateway/tools/base_adapter.py +109 -119
- claude_mpm/services/mcp_gateway/tools/document_summarizer.py +283 -215
- claude_mpm/services/mcp_gateway/tools/hello_world.py +122 -120
- claude_mpm/services/mcp_gateway/tools/ticket_tools.py +652 -0
- claude_mpm/services/mcp_gateway/tools/unified_ticket_tool.py +606 -0
- claude_mpm/services/memory/__init__.py +2 -2
- claude_mpm/services/memory/builder.py +451 -362
- claude_mpm/services/memory/cache/__init__.py +2 -2
- claude_mpm/services/memory/cache/shared_prompt_cache.py +232 -194
- claude_mpm/services/memory/cache/simple_cache.py +107 -93
- claude_mpm/services/memory/indexed_memory.py +195 -193
- claude_mpm/services/memory/optimizer.py +267 -234
- claude_mpm/services/memory/router.py +571 -263
- claude_mpm/services/memory_hook_service.py +237 -0
- claude_mpm/services/port_manager.py +223 -0
- claude_mpm/services/project/__init__.py +3 -3
- claude_mpm/services/project/analyzer.py +451 -305
- claude_mpm/services/project/registry.py +262 -240
- claude_mpm/services/recovery_manager.py +287 -231
- claude_mpm/services/response_tracker.py +87 -67
- claude_mpm/services/runner_configuration_service.py +587 -0
- claude_mpm/services/session_management_service.py +304 -0
- claude_mpm/services/socketio/__init__.py +4 -4
- claude_mpm/services/socketio/client_proxy.py +174 -0
- claude_mpm/services/socketio/handlers/__init__.py +3 -3
- claude_mpm/services/socketio/handlers/base.py +44 -30
- claude_mpm/services/socketio/handlers/connection.py +145 -65
- claude_mpm/services/socketio/handlers/file.py +123 -108
- claude_mpm/services/socketio/handlers/git.py +607 -373
- claude_mpm/services/socketio/handlers/hook.py +170 -0
- claude_mpm/services/socketio/handlers/memory.py +4 -4
- claude_mpm/services/socketio/handlers/project.py +4 -4
- claude_mpm/services/socketio/handlers/registry.py +53 -38
- claude_mpm/services/socketio/server/__init__.py +18 -0
- claude_mpm/services/socketio/server/broadcaster.py +252 -0
- claude_mpm/services/socketio/server/core.py +399 -0
- claude_mpm/services/socketio/server/main.py +323 -0
- claude_mpm/services/socketio_client_manager.py +160 -133
- claude_mpm/services/socketio_server.py +36 -1885
- claude_mpm/services/subprocess_launcher_service.py +316 -0
- claude_mpm/services/system_instructions_service.py +258 -0
- claude_mpm/services/ticket_manager.py +19 -533
- claude_mpm/services/utility_service.py +285 -0
- claude_mpm/services/version_control/__init__.py +18 -21
- claude_mpm/services/version_control/branch_strategy.py +20 -10
- claude_mpm/services/version_control/conflict_resolution.py +37 -13
- claude_mpm/services/version_control/git_operations.py +52 -21
- claude_mpm/services/version_control/semantic_versioning.py +92 -53
- claude_mpm/services/version_control/version_parser.py +145 -125
- claude_mpm/services/version_service.py +270 -0
- claude_mpm/storage/__init__.py +2 -2
- claude_mpm/storage/state_storage.py +177 -181
- claude_mpm/ticket_wrapper.py +2 -2
- claude_mpm/utils/__init__.py +2 -2
- claude_mpm/utils/agent_dependency_loader.py +453 -243
- claude_mpm/utils/config_manager.py +157 -118
- claude_mpm/utils/console.py +1 -1
- claude_mpm/utils/dependency_cache.py +102 -107
- claude_mpm/utils/dependency_manager.py +52 -47
- claude_mpm/utils/dependency_strategies.py +131 -96
- claude_mpm/utils/environment_context.py +110 -102
- claude_mpm/utils/error_handler.py +75 -55
- claude_mpm/utils/file_utils.py +80 -67
- claude_mpm/utils/framework_detection.py +12 -11
- claude_mpm/utils/import_migration_example.py +12 -60
- claude_mpm/utils/imports.py +48 -45
- claude_mpm/utils/path_operations.py +100 -93
- claude_mpm/utils/robust_installer.py +172 -164
- claude_mpm/utils/session_logging.py +30 -23
- claude_mpm/utils/subprocess_utils.py +99 -61
- claude_mpm/validation/__init__.py +1 -1
- claude_mpm/validation/agent_validator.py +151 -111
- claude_mpm/validation/frontmatter_validator.py +92 -71
- {claude_mpm-3.9.11.dist-info → claude_mpm-4.0.3.dist-info}/METADATA +27 -1
- claude_mpm-4.0.3.dist-info/RECORD +402 -0
- {claude_mpm-3.9.11.dist-info → claude_mpm-4.0.3.dist-info}/entry_points.txt +1 -0
- {claude_mpm-3.9.11.dist-info → claude_mpm-4.0.3.dist-info}/licenses/LICENSE +1 -1
- claude_mpm/cli/commands/run_guarded.py +0 -511
- claude_mpm/config/memory_guardian_config.py +0 -325
- claude_mpm/config/memory_guardian_yaml.py +0 -335
- claude_mpm/core/config_paths.py +0 -150
- claude_mpm/core/memory_aware_runner.py +0 -353
- claude_mpm/dashboard/static/js/dashboard-original.js +0 -4134
- claude_mpm/deployment_paths.py +0 -261
- claude_mpm/hooks/claude_hooks/hook_handler_fixed.py +0 -454
- claude_mpm/models/state_models.py +0 -433
- claude_mpm/services/agent/__init__.py +0 -24
- claude_mpm/services/agent/deployment.py +0 -2548
- claude_mpm/services/agent/management.py +0 -598
- claude_mpm/services/agent/registry.py +0 -813
- claude_mpm/services/agents/registry/agent_registry.py +0 -813
- claude_mpm/services/communication/socketio.py +0 -1935
- claude_mpm/services/communication/websocket.py +0 -479
- claude_mpm/services/framework_claude_md_generator.py +0 -624
- claude_mpm/services/health_monitor.py +0 -893
- claude_mpm/services/infrastructure/graceful_degradation.py +0 -616
- claude_mpm/services/infrastructure/health_monitor.py +0 -775
- claude_mpm/services/infrastructure/memory_dashboard.py +0 -479
- claude_mpm/services/infrastructure/memory_guardian.py +0 -944
- claude_mpm/services/infrastructure/restart_protection.py +0 -642
- claude_mpm/services/infrastructure/state_manager.py +0 -774
- claude_mpm/services/mcp_gateway/manager.py +0 -334
- claude_mpm/services/optimized_hook_service.py +0 -542
- claude_mpm/services/project_analyzer.py +0 -864
- claude_mpm/services/project_registry.py +0 -608
- claude_mpm/services/standalone_socketio_server.py +0 -1300
- claude_mpm/services/ticket_manager_di.py +0 -318
- claude_mpm/services/ticketing_service_original.py +0 -510
- claude_mpm/utils/paths.py +0 -395
- claude_mpm/utils/platform_memory.py +0 -524
- claude_mpm-3.9.11.dist-info/RECORD +0 -306
- {claude_mpm-3.9.11.dist-info → claude_mpm-4.0.3.dist-info}/WHEEL +0 -0
- {claude_mpm-3.9.11.dist-info → claude_mpm-4.0.3.dist-info}/top_level.txt +0 -0
| @@ -17,28 +17,25 @@ Design Principles: | |
| 17 17 |  | 
| 18 18 | 
             
            import asyncio
         | 
| 19 19 | 
             
            import logging
         | 
| 20 | 
            -
            import time
         | 
| 21 | 
            -
            import signal
         | 
| 22 20 | 
             
            import os
         | 
| 21 | 
            +
            import signal
         | 
| 23 22 | 
             
            import threading
         | 
| 23 | 
            +
            import time
         | 
| 24 24 | 
             
            from abc import ABC, abstractmethod
         | 
| 25 25 | 
             
            from collections import deque
         | 
| 26 26 | 
             
            from dataclasses import dataclass
         | 
| 27 27 | 
             
            from datetime import datetime, timezone
         | 
| 28 28 | 
             
            from enum import Enum
         | 
| 29 | 
            -
            from typing import Any, Dict, List, Optional,  | 
| 30 | 
            -
             | 
| 31 | 
            -
            from claude_mpm.core.constants import  | 
| 32 | 
            -
                RetryConfig,
         | 
| 33 | 
            -
                TimeoutConfig,
         | 
| 34 | 
            -
                PerformanceConfig
         | 
| 35 | 
            -
            )
         | 
| 29 | 
            +
            from typing import Any, Callable, Dict, List, Optional, Union
         | 
| 30 | 
            +
             | 
| 31 | 
            +
            from claude_mpm.core.constants import PerformanceConfig, RetryConfig, TimeoutConfig
         | 
| 36 32 |  | 
| 37 | 
            -
            from . | 
| 33 | 
            +
            from .infrastructure.monitoring import HealthCheckResult, HealthStatus
         | 
| 38 34 |  | 
| 39 35 |  | 
| 40 36 | 
             
            class RecoveryAction(Enum):
         | 
| 41 37 | 
             
                """Types of recovery actions that can be performed."""
         | 
| 38 | 
            +
             | 
| 42 39 | 
             
                NONE = "none"
         | 
| 43 40 | 
             
                LOG_WARNING = "log_warning"
         | 
| 44 41 | 
             
                CLEAR_CONNECTIONS = "clear_connections"
         | 
| @@ -48,14 +45,16 @@ class RecoveryAction(Enum): | |
| 48 45 |  | 
| 49 46 | 
             
            class CircuitState(Enum):
         | 
| 50 47 | 
             
                """Circuit breaker states."""
         | 
| 51 | 
            -
             | 
| 52 | 
            -
                 | 
| 48 | 
            +
             | 
| 49 | 
            +
                CLOSED = "closed"  # Normal operation
         | 
| 50 | 
            +
                OPEN = "open"  # Recovery blocked due to failures
         | 
| 53 51 | 
             
                HALF_OPEN = "half_open"  # Testing if recovery is working
         | 
| 54 52 |  | 
| 55 53 |  | 
| 56 54 | 
             
            @dataclass
         | 
| 57 55 | 
             
            class RecoveryEvent:
         | 
| 58 56 | 
             
                """Recovery event record."""
         | 
| 57 | 
            +
             | 
| 59 58 | 
             
                timestamp: float
         | 
| 60 59 | 
             
                action: RecoveryAction
         | 
| 61 60 | 
             
                trigger: str
         | 
| @@ -63,34 +62,36 @@ class RecoveryEvent: | |
| 63 62 | 
             
                success: bool
         | 
| 64 63 | 
             
                duration_ms: float
         | 
| 65 64 | 
             
                error_message: Optional[str] = None
         | 
| 66 | 
            -
             | 
| 65 | 
            +
             | 
| 67 66 | 
             
                def to_dict(self) -> Dict[str, Any]:
         | 
| 68 67 | 
             
                    """Convert recovery event to dictionary."""
         | 
| 69 68 | 
             
                    return {
         | 
| 70 | 
            -
                         | 
| 71 | 
            -
                         | 
| 72 | 
            -
             | 
| 73 | 
            -
                         | 
| 74 | 
            -
                         | 
| 75 | 
            -
                         | 
| 76 | 
            -
                         | 
| 77 | 
            -
                         | 
| 69 | 
            +
                        "timestamp": self.timestamp,
         | 
| 70 | 
            +
                        "timestamp_iso": datetime.fromtimestamp(
         | 
| 71 | 
            +
                            self.timestamp, timezone.utc
         | 
| 72 | 
            +
                        ).isoformat(),
         | 
| 73 | 
            +
                        "action": self.action.value,
         | 
| 74 | 
            +
                        "trigger": self.trigger,
         | 
| 75 | 
            +
                        "health_status": self.health_status.value,
         | 
| 76 | 
            +
                        "success": self.success,
         | 
| 77 | 
            +
                        "duration_ms": self.duration_ms,
         | 
| 78 | 
            +
                        "error_message": self.error_message,
         | 
| 78 79 | 
             
                    }
         | 
| 79 80 |  | 
| 80 81 |  | 
| 81 82 | 
             
            class RecoveryStrategy(ABC):
         | 
| 82 83 | 
             
                """Abstract base class for recovery strategies."""
         | 
| 83 | 
            -
             | 
| 84 | 
            +
             | 
| 84 85 | 
             
                @abstractmethod
         | 
| 85 86 | 
             
                def should_recover(self, health_result: HealthCheckResult) -> bool:
         | 
| 86 87 | 
             
                    """Determine if recovery should be triggered based on health result."""
         | 
| 87 88 | 
             
                    pass
         | 
| 88 | 
            -
             | 
| 89 | 
            +
             | 
| 89 90 | 
             
                @abstractmethod
         | 
| 90 91 | 
             
                def get_recovery_action(self, health_result: HealthCheckResult) -> RecoveryAction:
         | 
| 91 92 | 
             
                    """Determine the appropriate recovery action."""
         | 
| 92 93 | 
             
                    pass
         | 
| 93 | 
            -
             | 
| 94 | 
            +
             | 
| 94 95 | 
             
                @abstractmethod
         | 
| 95 96 | 
             
                def get_name(self) -> str:
         | 
| 96 97 | 
             
                    """Get the name of this recovery strategy."""
         | 
| @@ -99,78 +100,85 @@ class RecoveryStrategy(ABC): | |
| 99 100 |  | 
| 100 101 | 
             
            class GradedRecoveryStrategy(RecoveryStrategy):
         | 
| 101 102 | 
             
                """Recovery strategy with graduated response based on health status and history.
         | 
| 102 | 
            -
             | 
| 103 | 
            +
             | 
| 103 104 | 
             
                Recovery actions are escalated based on:
         | 
| 104 105 | 
             
                - Current health status severity
         | 
| 105 106 | 
             
                - Number of recent failures
         | 
| 106 107 | 
             
                - Time since last recovery attempt
         | 
| 107 108 | 
             
                """
         | 
| 108 | 
            -
             | 
| 109 | 
            +
             | 
| 109 110 | 
             
                def __init__(self, config: Optional[Dict[str, Any]] = None):
         | 
| 110 111 | 
             
                    """Initialize graded recovery strategy.
         | 
| 111 | 
            -
             | 
| 112 | 
            +
             | 
| 112 113 | 
             
                    Args:
         | 
| 113 114 | 
             
                        config: Configuration dictionary for recovery thresholds
         | 
| 114 115 | 
             
                    """
         | 
| 115 116 | 
             
                    self.config = config or {}
         | 
| 116 117 | 
             
                    self.logger = logging.getLogger(f"{__name__}.GradedRecoveryStrategy")
         | 
| 117 | 
            -
             | 
| 118 | 
            +
             | 
| 118 119 | 
             
                    # Configuration with defaults
         | 
| 119 | 
            -
                    self.warning_threshold = self.config.get( | 
| 120 | 
            -
                    self.critical_threshold = self.config.get( | 
| 121 | 
            -
             | 
| 122 | 
            -
                     | 
| 123 | 
            -
                    
         | 
| 120 | 
            +
                    self.warning_threshold = self.config.get("warning_threshold", 2)
         | 
| 121 | 
            +
                    self.critical_threshold = self.config.get(
         | 
| 122 | 
            +
                        "critical_threshold", RetryConfig.CRITICAL_THRESHOLD
         | 
| 123 | 
            +
                    )
         | 
| 124 | 
            +
                    self.failure_window_seconds = self.config.get(
         | 
| 125 | 
            +
                        "failure_window_seconds", RetryConfig.FAILURE_WINDOW
         | 
| 126 | 
            +
                    )
         | 
| 127 | 
            +
                    self.min_recovery_interval = self.config.get(
         | 
| 128 | 
            +
                        "min_recovery_interval", RetryConfig.MIN_RECOVERY_INTERVAL
         | 
| 129 | 
            +
                    )
         | 
| 130 | 
            +
             | 
| 124 131 | 
             
                    # Track recent failures
         | 
| 125 132 | 
             
                    self.recent_failures: deque = deque(maxlen=10)
         | 
| 126 133 | 
             
                    self.last_recovery_time = 0
         | 
| 127 | 
            -
             | 
| 134 | 
            +
             | 
| 128 135 | 
             
                def get_name(self) -> str:
         | 
| 129 136 | 
             
                    return "graded_recovery"
         | 
| 130 | 
            -
             | 
| 137 | 
            +
             | 
| 131 138 | 
             
                def should_recover(self, health_result: HealthCheckResult) -> bool:
         | 
| 132 139 | 
             
                    """Determine if recovery should be triggered."""
         | 
| 133 140 | 
             
                    current_time = time.time()
         | 
| 134 | 
            -
             | 
| 141 | 
            +
             | 
| 135 142 | 
             
                    # Don't trigger recovery too frequently
         | 
| 136 143 | 
             
                    if current_time - self.last_recovery_time < self.min_recovery_interval:
         | 
| 137 144 | 
             
                        self.logger.debug("Recovery suppressed due to min interval")
         | 
| 138 145 | 
             
                        return False
         | 
| 139 | 
            -
             | 
| 146 | 
            +
             | 
| 140 147 | 
             
                    # Check current health status
         | 
| 141 148 | 
             
                    if health_result.overall_status in [HealthStatus.CRITICAL]:
         | 
| 142 149 | 
             
                        return True
         | 
| 143 | 
            -
             | 
| 150 | 
            +
             | 
| 144 151 | 
             
                    if health_result.overall_status == HealthStatus.WARNING:
         | 
| 145 152 | 
             
                        # Count recent warnings in time window
         | 
| 146 153 | 
             
                        cutoff_time = current_time - self.failure_window_seconds
         | 
| 147 154 | 
             
                        recent_warnings = [
         | 
| 148 | 
            -
                            event for event in self.recent_failures
         | 
| 149 | 
            -
                            if event >= cutoff_time
         | 
| 155 | 
            +
                            event for event in self.recent_failures if event >= cutoff_time
         | 
| 150 156 | 
             
                        ]
         | 
| 151 | 
            -
             | 
| 157 | 
            +
             | 
| 152 158 | 
             
                        if len(recent_warnings) >= self.warning_threshold:
         | 
| 153 159 | 
             
                            return True
         | 
| 154 | 
            -
             | 
| 160 | 
            +
             | 
| 155 161 | 
             
                    return False
         | 
| 156 | 
            -
             | 
| 162 | 
            +
             | 
| 157 163 | 
             
                def get_recovery_action(self, health_result: HealthCheckResult) -> RecoveryAction:
         | 
| 158 164 | 
             
                    """Determine the appropriate recovery action based on health status."""
         | 
| 159 165 | 
             
                    current_time = time.time()
         | 
| 160 | 
            -
             | 
| 166 | 
            +
             | 
| 161 167 | 
             
                    # Count recent failures
         | 
| 162 168 | 
             
                    cutoff_time = current_time - self.failure_window_seconds
         | 
| 163 169 | 
             
                    recent_failures = [
         | 
| 164 | 
            -
                        event for event in self.recent_failures
         | 
| 165 | 
            -
                        if event >= cutoff_time
         | 
| 170 | 
            +
                        event for event in self.recent_failures if event >= cutoff_time
         | 
| 166 171 | 
             
                    ]
         | 
| 167 | 
            -
             | 
| 172 | 
            +
             | 
| 168 173 | 
             
                    failure_count = len(recent_failures)
         | 
| 169 | 
            -
             | 
| 174 | 
            +
             | 
| 170 175 | 
             
                    # Record this failure
         | 
| 171 | 
            -
                    if health_result.overall_status in [ | 
| 176 | 
            +
                    if health_result.overall_status in [
         | 
| 177 | 
            +
                        HealthStatus.WARNING,
         | 
| 178 | 
            +
                        HealthStatus.CRITICAL,
         | 
| 179 | 
            +
                    ]:
         | 
| 172 180 | 
             
                        self.recent_failures.append(current_time)
         | 
| 173 | 
            -
             | 
| 181 | 
            +
             | 
| 174 182 | 
             
                    # Determine action based on status and failure history
         | 
| 175 183 | 
             
                    if health_result.overall_status == HealthStatus.CRITICAL:
         | 
| 176 184 | 
             
                        if failure_count >= 3:
         | 
| @@ -179,30 +187,33 @@ class GradedRecoveryStrategy(RecoveryStrategy): | |
| 179 187 | 
             
                            return RecoveryAction.RESTART_SERVICE
         | 
| 180 188 | 
             
                        else:
         | 
| 181 189 | 
             
                            return RecoveryAction.CLEAR_CONNECTIONS
         | 
| 182 | 
            -
             | 
| 190 | 
            +
             | 
| 183 191 | 
             
                    elif health_result.overall_status == HealthStatus.WARNING:
         | 
| 184 192 | 
             
                        if failure_count >= self.warning_threshold:
         | 
| 185 193 | 
             
                            return RecoveryAction.CLEAR_CONNECTIONS
         | 
| 186 194 | 
             
                        else:
         | 
| 187 195 | 
             
                            return RecoveryAction.LOG_WARNING
         | 
| 188 | 
            -
             | 
| 196 | 
            +
             | 
| 189 197 | 
             
                    return RecoveryAction.NONE
         | 
| 190 198 |  | 
| 191 199 |  | 
| 192 200 | 
             
            class CircuitBreaker:
         | 
| 193 201 | 
             
                """Circuit breaker to prevent recovery loops and cascading failures.
         | 
| 194 | 
            -
             | 
| 202 | 
            +
             | 
| 195 203 | 
             
                Implements the circuit breaker pattern to:
         | 
| 196 204 | 
             
                - Prevent excessive recovery attempts
         | 
| 197 205 | 
             
                - Allow time for systems to stabilize
         | 
| 198 206 | 
             
                - Gradually re-enable recovery after failures
         | 
| 199 207 | 
             
                """
         | 
| 200 | 
            -
             | 
| 201 | 
            -
                def __init__( | 
| 202 | 
            -
             | 
| 203 | 
            -
             | 
| 208 | 
            +
             | 
| 209 | 
            +
                def __init__(
         | 
| 210 | 
            +
                    self,
         | 
| 211 | 
            +
                    failure_threshold: int = RetryConfig.FAILURE_THRESHOLD,
         | 
| 212 | 
            +
                    timeout_seconds: int = RetryConfig.CIRCUIT_TIMEOUT,
         | 
| 213 | 
            +
                    success_threshold: int = RetryConfig.SUCCESS_THRESHOLD,
         | 
| 214 | 
            +
                ):
         | 
| 204 215 | 
             
                    """Initialize circuit breaker.
         | 
| 205 | 
            -
             | 
| 216 | 
            +
             | 
| 206 217 | 
             
                    Args:
         | 
| 207 218 | 
             
                        failure_threshold: Number of failures before opening circuit
         | 
| 208 219 | 
             
                        timeout_seconds: Time to wait in OPEN state before trying HALF_OPEN
         | 
| @@ -211,80 +222,90 @@ class CircuitBreaker: | |
| 211 222 | 
             
                    self.failure_threshold = failure_threshold
         | 
| 212 223 | 
             
                    self.timeout_seconds = timeout_seconds
         | 
| 213 224 | 
             
                    self.success_threshold = success_threshold
         | 
| 214 | 
            -
             | 
| 225 | 
            +
             | 
| 215 226 | 
             
                    self.state = CircuitState.CLOSED
         | 
| 216 227 | 
             
                    self.failure_count = 0
         | 
| 217 228 | 
             
                    self.success_count = 0
         | 
| 218 229 | 
             
                    self.last_failure_time = 0
         | 
| 219 230 | 
             
                    self.state_change_time = time.time()
         | 
| 220 | 
            -
             | 
| 231 | 
            +
             | 
| 221 232 | 
             
                    self.logger = logging.getLogger(f"{__name__}.CircuitBreaker")
         | 
| 222 | 
            -
                    self.logger.info( | 
| 223 | 
            -
             | 
| 224 | 
            -
             | 
| 233 | 
            +
                    self.logger.info(
         | 
| 234 | 
            +
                        f"Circuit breaker initialized: failure_threshold={failure_threshold}, "
         | 
| 235 | 
            +
                        f"timeout={timeout_seconds}s, success_threshold={success_threshold}"
         | 
| 236 | 
            +
                    )
         | 
| 237 | 
            +
             | 
| 225 238 | 
             
                def can_proceed(self) -> bool:
         | 
| 226 239 | 
             
                    """Check if recovery operations can proceed."""
         | 
| 227 240 | 
             
                    current_time = time.time()
         | 
| 228 | 
            -
             | 
| 241 | 
            +
             | 
| 229 242 | 
             
                    if self.state == CircuitState.CLOSED:
         | 
| 230 243 | 
             
                        return True
         | 
| 231 | 
            -
             | 
| 244 | 
            +
             | 
| 232 245 | 
             
                    elif self.state == CircuitState.OPEN:
         | 
| 233 246 | 
             
                        # Check if timeout has elapsed
         | 
| 234 247 | 
             
                        if current_time - self.last_failure_time >= self.timeout_seconds:
         | 
| 235 248 | 
             
                            self._transition_to_half_open()
         | 
| 236 249 | 
             
                            return True
         | 
| 237 250 | 
             
                        return False
         | 
| 238 | 
            -
             | 
| 251 | 
            +
             | 
| 239 252 | 
             
                    elif self.state == CircuitState.HALF_OPEN:
         | 
| 240 253 | 
             
                        return True
         | 
| 241 | 
            -
             | 
| 254 | 
            +
             | 
| 242 255 | 
             
                    return False
         | 
| 243 | 
            -
             | 
| 256 | 
            +
             | 
| 244 257 | 
             
                def record_success(self) -> None:
         | 
| 245 258 | 
             
                    """Record a successful recovery operation."""
         | 
| 246 259 | 
             
                    if self.state == CircuitState.CLOSED:
         | 
| 247 260 | 
             
                        # Reset failure count on success in normal state
         | 
| 248 261 | 
             
                        self.failure_count = 0
         | 
| 249 | 
            -
             | 
| 262 | 
            +
             | 
| 250 263 | 
             
                    elif self.state == CircuitState.HALF_OPEN:
         | 
| 251 264 | 
             
                        self.success_count += 1
         | 
| 252 | 
            -
                        self.logger.debug( | 
| 253 | 
            -
             | 
| 265 | 
            +
                        self.logger.debug(
         | 
| 266 | 
            +
                            f"Circuit breaker success count: {self.success_count}/{self.success_threshold}"
         | 
| 267 | 
            +
                        )
         | 
| 268 | 
            +
             | 
| 254 269 | 
             
                        if self.success_count >= self.success_threshold:
         | 
| 255 270 | 
             
                            self._transition_to_closed()
         | 
| 256 | 
            -
             | 
| 271 | 
            +
             | 
| 257 272 | 
             
                def record_failure(self) -> None:
         | 
| 258 273 | 
             
                    """Record a failed recovery operation."""
         | 
| 259 274 | 
             
                    current_time = time.time()
         | 
| 260 275 | 
             
                    self.last_failure_time = current_time
         | 
| 261 | 
            -
             | 
| 276 | 
            +
             | 
| 262 277 | 
             
                    if self.state == CircuitState.CLOSED:
         | 
| 263 278 | 
             
                        self.failure_count += 1
         | 
| 264 | 
            -
                        self.logger.warning( | 
| 265 | 
            -
             | 
| 279 | 
            +
                        self.logger.warning(
         | 
| 280 | 
            +
                            f"Circuit breaker failure count: {self.failure_count}/{self.failure_threshold}"
         | 
| 281 | 
            +
                        )
         | 
| 282 | 
            +
             | 
| 266 283 | 
             
                        if self.failure_count >= self.failure_threshold:
         | 
| 267 284 | 
             
                            self._transition_to_open()
         | 
| 268 | 
            -
             | 
| 285 | 
            +
             | 
| 269 286 | 
             
                    elif self.state == CircuitState.HALF_OPEN:
         | 
| 270 287 | 
             
                        # Failure in half-open state goes back to open
         | 
| 271 288 | 
             
                        self._transition_to_open()
         | 
| 272 | 
            -
             | 
| 289 | 
            +
             | 
| 273 290 | 
             
                def _transition_to_open(self) -> None:
         | 
| 274 291 | 
             
                    """Transition circuit to OPEN state."""
         | 
| 275 292 | 
             
                    self.state = CircuitState.OPEN
         | 
| 276 293 | 
             
                    self.state_change_time = time.time()
         | 
| 277 294 | 
             
                    self.success_count = 0
         | 
| 278 | 
            -
                    self.logger.warning( | 
| 279 | 
            -
             | 
| 280 | 
            -
             | 
| 295 | 
            +
                    self.logger.warning(
         | 
| 296 | 
            +
                        f"Circuit breaker OPENED due to {self.failure_count} failures. "
         | 
| 297 | 
            +
                        f"Recovery blocked for {self.timeout_seconds} seconds."
         | 
| 298 | 
            +
                    )
         | 
| 299 | 
            +
             | 
| 281 300 | 
             
                def _transition_to_half_open(self) -> None:
         | 
| 282 301 | 
             
                    """Transition circuit to HALF_OPEN state."""
         | 
| 283 302 | 
             
                    self.state = CircuitState.HALF_OPEN
         | 
| 284 303 | 
             
                    self.state_change_time = time.time()
         | 
| 285 304 | 
             
                    self.success_count = 0
         | 
| 286 | 
            -
                    self.logger.info( | 
| 287 | 
            -
             | 
| 305 | 
            +
                    self.logger.info(
         | 
| 306 | 
            +
                        "Circuit breaker transitioned to HALF_OPEN. Testing recovery..."
         | 
| 307 | 
            +
                    )
         | 
| 308 | 
            +
             | 
| 288 309 | 
             
                def _transition_to_closed(self) -> None:
         | 
| 289 310 | 
             
                    """Transition circuit to CLOSED state."""
         | 
| 290 311 | 
             
                    self.state = CircuitState.CLOSED
         | 
| @@ -292,29 +313,29 @@ class CircuitBreaker: | |
| 292 313 | 
             
                    self.failure_count = 0
         | 
| 293 314 | 
             
                    self.success_count = 0
         | 
| 294 315 | 
             
                    self.logger.info("Circuit breaker CLOSED. Normal recovery operations resumed.")
         | 
| 295 | 
            -
             | 
| 316 | 
            +
             | 
| 296 317 | 
             
                def get_status(self) -> Dict[str, Any]:
         | 
| 297 318 | 
             
                    """Get current circuit breaker status."""
         | 
| 298 319 | 
             
                    current_time = time.time()
         | 
| 299 320 | 
             
                    return {
         | 
| 300 | 
            -
                         | 
| 301 | 
            -
                         | 
| 302 | 
            -
                         | 
| 303 | 
            -
                         | 
| 304 | 
            -
                         | 
| 305 | 
            -
                         | 
| 306 | 
            -
                         | 
| 307 | 
            -
                         | 
| 308 | 
            -
                             | 
| 309 | 
            -
                             | 
| 310 | 
            -
                             | 
| 311 | 
            -
                        }
         | 
| 321 | 
            +
                        "state": self.state.value,
         | 
| 322 | 
            +
                        "failure_count": self.failure_count,
         | 
| 323 | 
            +
                        "success_count": self.success_count,
         | 
| 324 | 
            +
                        "last_failure_time": self.last_failure_time,
         | 
| 325 | 
            +
                        "state_change_time": self.state_change_time,
         | 
| 326 | 
            +
                        "time_in_current_state": current_time - self.state_change_time,
         | 
| 327 | 
            +
                        "can_proceed": self.can_proceed(),
         | 
| 328 | 
            +
                        "config": {
         | 
| 329 | 
            +
                            "failure_threshold": self.failure_threshold,
         | 
| 330 | 
            +
                            "timeout_seconds": self.timeout_seconds,
         | 
| 331 | 
            +
                            "success_threshold": self.success_threshold,
         | 
| 332 | 
            +
                        },
         | 
| 312 333 | 
             
                    }
         | 
| 313 334 |  | 
| 314 335 |  | 
| 315 336 | 
             
            class RecoveryManager:
         | 
| 316 337 | 
             
                """Advanced recovery manager with circuit breaker and configurable strategies.
         | 
| 317 | 
            -
             | 
| 338 | 
            +
             | 
| 318 339 | 
             
                Provides comprehensive recovery capabilities including:
         | 
| 319 340 | 
             
                - Health-based recovery triggering
         | 
| 320 341 | 
             
                - Circuit breaker protection
         | 
| @@ -322,11 +343,10 @@ class RecoveryManager: | |
| 322 343 | 
             
                - Recovery event logging and history
         | 
| 323 344 | 
             
                - Integration with service lifecycle
         | 
| 324 345 | 
             
                """
         | 
| 325 | 
            -
             | 
| 326 | 
            -
                def __init__(self, config: Optional[Dict[str, Any]] = None, 
         | 
| 327 | 
            -
                             server_instance=None):
         | 
| 346 | 
            +
             | 
| 347 | 
            +
                def __init__(self, config: Optional[Dict[str, Any]] = None, server_instance=None):
         | 
| 328 348 | 
             
                    """Initialize recovery manager.
         | 
| 329 | 
            -
             | 
| 349 | 
            +
             | 
| 330 350 | 
             
                    Args:
         | 
| 331 351 | 
             
                        config: Configuration dictionary for recovery settings
         | 
| 332 352 | 
             
                        server_instance: Reference to the Socket.IO server instance
         | 
| @@ -334,110 +354,124 @@ class RecoveryManager: | |
| 334 354 | 
             
                    self.config = config or {}
         | 
| 335 355 | 
             
                    self.server_instance = server_instance
         | 
| 336 356 | 
             
                    self.logger = logging.getLogger(f"{__name__}.RecoveryManager")
         | 
| 337 | 
            -
             | 
| 357 | 
            +
             | 
| 338 358 | 
             
                    # Configuration with defaults
         | 
| 339 | 
            -
                    self.enabled = self.config.get( | 
| 340 | 
            -
                    self.check_interval = self.config.get( | 
| 341 | 
            -
                    self.max_recovery_attempts = self.config.get( | 
| 342 | 
            -
                    self.recovery_timeout = self.config.get( | 
| 343 | 
            -
             | 
| 359 | 
            +
                    self.enabled = self.config.get("enabled", True)
         | 
| 360 | 
            +
                    self.check_interval = self.config.get("check_interval", 60)
         | 
| 361 | 
            +
                    self.max_recovery_attempts = self.config.get("max_recovery_attempts", 5)
         | 
| 362 | 
            +
                    self.recovery_timeout = self.config.get("recovery_timeout", 30)
         | 
| 363 | 
            +
             | 
| 344 364 | 
             
                    # Initialize circuit breaker
         | 
| 345 | 
            -
                    circuit_config = self.config.get( | 
| 365 | 
            +
                    circuit_config = self.config.get("circuit_breaker", {})
         | 
| 346 366 | 
             
                    self.circuit_breaker = CircuitBreaker(
         | 
| 347 | 
            -
                        failure_threshold=circuit_config.get( | 
| 348 | 
            -
             | 
| 349 | 
            -
                         | 
| 367 | 
            +
                        failure_threshold=circuit_config.get(
         | 
| 368 | 
            +
                            "failure_threshold", RetryConfig.FAILURE_THRESHOLD
         | 
| 369 | 
            +
                        ),
         | 
| 370 | 
            +
                        timeout_seconds=circuit_config.get(
         | 
| 371 | 
            +
                            "timeout_seconds", RetryConfig.CIRCUIT_TIMEOUT
         | 
| 372 | 
            +
                        ),
         | 
| 373 | 
            +
                        success_threshold=circuit_config.get(
         | 
| 374 | 
            +
                            "success_threshold", RetryConfig.SUCCESS_THRESHOLD
         | 
| 375 | 
            +
                        ),
         | 
| 350 376 | 
             
                    )
         | 
| 351 | 
            -
             | 
| 377 | 
            +
             | 
| 352 378 | 
             
                    # Initialize recovery strategy
         | 
| 353 | 
            -
                    strategy_config = self.config.get( | 
| 379 | 
            +
                    strategy_config = self.config.get("strategy", {})
         | 
| 354 380 | 
             
                    self.recovery_strategy = GradedRecoveryStrategy(strategy_config)
         | 
| 355 | 
            -
             | 
| 381 | 
            +
             | 
| 356 382 | 
             
                    # Recovery event history
         | 
| 357 383 | 
             
                    self.recovery_history: deque = deque(maxlen=100)
         | 
| 358 | 
            -
             | 
| 384 | 
            +
             | 
| 359 385 | 
             
                    # Recovery state
         | 
| 360 386 | 
             
                    self.recovery_in_progress = False
         | 
| 361 387 | 
             
                    self.last_recovery_time = 0
         | 
| 362 388 | 
             
                    self.recovery_count = 0
         | 
| 363 | 
            -
             | 
| 389 | 
            +
             | 
| 364 390 | 
             
                    # Recovery callbacks
         | 
| 365 391 | 
             
                    self.recovery_callbacks: List[Callable[[RecoveryEvent], None]] = []
         | 
| 366 | 
            -
             | 
| 392 | 
            +
             | 
| 367 393 | 
             
                    # Statistics
         | 
| 368 394 | 
             
                    self.recovery_stats = {
         | 
| 369 | 
            -
                         | 
| 370 | 
            -
                         | 
| 371 | 
            -
                         | 
| 372 | 
            -
                         | 
| 373 | 
            -
                         | 
| 395 | 
            +
                        "total_recoveries": 0,
         | 
| 396 | 
            +
                        "successful_recoveries": 0,
         | 
| 397 | 
            +
                        "failed_recoveries": 0,
         | 
| 398 | 
            +
                        "actions_performed": {action.value: 0 for action in RecoveryAction},
         | 
| 399 | 
            +
                        "average_recovery_duration_ms": 0,
         | 
| 374 400 | 
             
                    }
         | 
| 375 | 
            -
             | 
| 376 | 
            -
                    self.logger.info( | 
| 377 | 
            -
             | 
| 401 | 
            +
             | 
| 402 | 
            +
                    self.logger.info(
         | 
| 403 | 
            +
                        f"Recovery manager initialized with strategy: {self.recovery_strategy.get_name()}"
         | 
| 404 | 
            +
                    )
         | 
| 405 | 
            +
             | 
| 378 406 | 
             
                def add_recovery_callback(self, callback: Callable[[RecoveryEvent], None]) -> None:
         | 
| 379 407 | 
             
                    """Add a callback to be notified of recovery events."""
         | 
| 380 408 | 
             
                    self.recovery_callbacks.append(callback)
         | 
| 381 409 | 
             
                    self.logger.debug(f"Added recovery callback: {callback.__name__}")
         | 
| 382 | 
            -
             | 
| 383 | 
            -
                def handle_health_result( | 
| 410 | 
            +
             | 
| 411 | 
            +
                def handle_health_result(
         | 
| 412 | 
            +
                    self, health_result: HealthCheckResult
         | 
| 413 | 
            +
                ) -> Optional[RecoveryEvent]:
         | 
| 384 414 | 
             
                    """Handle health check result and trigger recovery if needed.
         | 
| 385 | 
            -
             | 
| 415 | 
            +
             | 
| 386 416 | 
             
                    Args:
         | 
| 387 417 | 
             
                        health_result: Health check result to evaluate
         | 
| 388 | 
            -
             | 
| 418 | 
            +
             | 
| 389 419 | 
             
                    Returns:
         | 
| 390 420 | 
             
                        RecoveryEvent if recovery was triggered, None otherwise
         | 
| 391 421 | 
             
                    """
         | 
| 392 422 | 
             
                    if not self.enabled:
         | 
| 393 423 | 
             
                        return None
         | 
| 394 | 
            -
             | 
| 424 | 
            +
             | 
| 395 425 | 
             
                    if self.recovery_in_progress:
         | 
| 396 426 | 
             
                        self.logger.debug("Recovery already in progress, skipping")
         | 
| 397 427 | 
             
                        return None
         | 
| 398 | 
            -
             | 
| 428 | 
            +
             | 
| 399 429 | 
             
                    # Check if recovery should be triggered
         | 
| 400 430 | 
             
                    if not self.recovery_strategy.should_recover(health_result):
         | 
| 401 431 | 
             
                        return None
         | 
| 402 | 
            -
             | 
| 432 | 
            +
             | 
| 403 433 | 
             
                    # Check circuit breaker
         | 
| 404 434 | 
             
                    if not self.circuit_breaker.can_proceed():
         | 
| 405 435 | 
             
                        self.logger.warning("Recovery suppressed by circuit breaker")
         | 
| 406 436 | 
             
                        return None
         | 
| 407 | 
            -
             | 
| 437 | 
            +
             | 
| 408 438 | 
             
                    # Determine recovery action
         | 
| 409 439 | 
             
                    action = self.recovery_strategy.get_recovery_action(health_result)
         | 
| 410 | 
            -
             | 
| 440 | 
            +
             | 
| 411 441 | 
             
                    if action == RecoveryAction.NONE:
         | 
| 412 442 | 
             
                        return None
         | 
| 413 | 
            -
             | 
| 443 | 
            +
             | 
| 414 444 | 
             
                    # Trigger recovery
         | 
| 415 | 
            -
                    return asyncio.create_task( | 
| 416 | 
            -
             | 
| 417 | 
            -
             | 
| 418 | 
            -
             | 
| 419 | 
            -
             | 
| 445 | 
            +
                    return asyncio.create_task(
         | 
| 446 | 
            +
                        self._perform_recovery(action, health_result, "health_check")
         | 
| 447 | 
            +
                    )
         | 
| 448 | 
            +
             | 
| 449 | 
            +
                async def _perform_recovery(
         | 
| 450 | 
            +
                    self, action: RecoveryAction, health_result: HealthCheckResult, trigger: str
         | 
| 451 | 
            +
                ) -> RecoveryEvent:
         | 
| 420 452 | 
             
                    """Perform recovery action and record the event.
         | 
| 421 | 
            -
             | 
| 453 | 
            +
             | 
| 422 454 | 
             
                    Args:
         | 
| 423 455 | 
             
                        action: Recovery action to perform
         | 
| 424 456 | 
             
                        health_result: Health result that triggered recovery
         | 
| 425 457 | 
             
                        trigger: Description of what triggered the recovery
         | 
| 426 | 
            -
             | 
| 458 | 
            +
             | 
| 427 459 | 
             
                    Returns:
         | 
| 428 460 | 
             
                        RecoveryEvent record of the recovery attempt
         | 
| 429 461 | 
             
                    """
         | 
| 430 462 | 
             
                    if self.recovery_in_progress:
         | 
| 431 463 | 
             
                        raise RuntimeError("Recovery already in progress")
         | 
| 432 | 
            -
             | 
| 464 | 
            +
             | 
| 433 465 | 
             
                    self.recovery_in_progress = True
         | 
| 434 466 | 
             
                    start_time = time.time()
         | 
| 435 467 | 
             
                    success = False
         | 
| 436 468 | 
             
                    error_message = None
         | 
| 437 | 
            -
             | 
| 469 | 
            +
             | 
| 438 470 | 
             
                    try:
         | 
| 439 | 
            -
                        self.logger.info( | 
| 440 | 
            -
             | 
| 471 | 
            +
                        self.logger.info(
         | 
| 472 | 
            +
                            f"Starting recovery action: {action.value} (trigger: {trigger})"
         | 
| 473 | 
            +
                        )
         | 
| 474 | 
            +
             | 
| 441 475 | 
             
                        if action == RecoveryAction.LOG_WARNING:
         | 
| 442 476 | 
             
                            success = await self._log_warning(health_result)
         | 
| 443 477 | 
             
                        elif action == RecoveryAction.CLEAR_CONNECTIONS:
         | 
| @@ -449,16 +483,16 @@ class RecoveryManager: | |
| 449 483 | 
             
                        else:
         | 
| 450 484 | 
             
                            error_message = f"Unknown recovery action: {action}"
         | 
| 451 485 | 
             
                            self.logger.error(error_message)
         | 
| 452 | 
            -
             | 
| 486 | 
            +
             | 
| 453 487 | 
             
                    except Exception as e:
         | 
| 454 488 | 
             
                        error_message = f"Recovery action failed: {e}"
         | 
| 455 489 | 
             
                        self.logger.error(error_message)
         | 
| 456 490 | 
             
                        success = False
         | 
| 457 | 
            -
             | 
| 491 | 
            +
             | 
| 458 492 | 
             
                    finally:
         | 
| 459 493 | 
             
                        self.recovery_in_progress = False
         | 
| 460 494 | 
             
                        duration_ms = (time.time() - start_time) * PerformanceConfig.SECONDS_TO_MS
         | 
| 461 | 
            -
             | 
| 495 | 
            +
             | 
| 462 496 | 
             
                        # Create recovery event
         | 
| 463 497 | 
             
                        event = RecoveryEvent(
         | 
| 464 498 | 
             
                            timestamp=start_time,
         | 
| @@ -467,210 +501,232 @@ class RecoveryManager: | |
| 467 501 | 
             
                            health_status=health_result.overall_status,
         | 
| 468 502 | 
             
                            success=success,
         | 
| 469 503 | 
             
                            duration_ms=duration_ms,
         | 
| 470 | 
            -
                            error_message=error_message
         | 
| 504 | 
            +
                            error_message=error_message,
         | 
| 471 505 | 
             
                        )
         | 
| 472 | 
            -
             | 
| 506 | 
            +
             | 
| 473 507 | 
             
                        # Update statistics
         | 
| 474 508 | 
             
                        self._update_recovery_stats(event)
         | 
| 475 | 
            -
             | 
| 509 | 
            +
             | 
| 476 510 | 
             
                        # Record in circuit breaker
         | 
| 477 511 | 
             
                        if success:
         | 
| 478 512 | 
             
                            self.circuit_breaker.record_success()
         | 
| 479 513 | 
             
                        else:
         | 
| 480 514 | 
             
                            self.circuit_breaker.record_failure()
         | 
| 481 | 
            -
             | 
| 515 | 
            +
             | 
| 482 516 | 
             
                        # Store event
         | 
| 483 517 | 
             
                        self.recovery_history.append(event)
         | 
| 484 518 | 
             
                        self.last_recovery_time = start_time
         | 
| 485 519 | 
             
                        self.recovery_count += 1
         | 
| 486 | 
            -
             | 
| 520 | 
            +
             | 
| 487 521 | 
             
                        # Notify callbacks
         | 
| 488 522 | 
             
                        for callback in self.recovery_callbacks:
         | 
| 489 523 | 
             
                            try:
         | 
| 490 524 | 
             
                                callback(event)
         | 
| 491 525 | 
             
                            except Exception as e:
         | 
| 492 | 
            -
                                self.logger.error( | 
| 493 | 
            -
             | 
| 526 | 
            +
                                self.logger.error(
         | 
| 527 | 
            +
                                    f"Recovery callback {callback.__name__} failed: {e}"
         | 
| 528 | 
            +
                                )
         | 
| 529 | 
            +
             | 
| 494 530 | 
             
                        result_msg = "succeeded" if success else "failed"
         | 
| 495 | 
            -
                        self.logger.info( | 
| 496 | 
            -
             | 
| 531 | 
            +
                        self.logger.info(
         | 
| 532 | 
            +
                            f"Recovery action {action.value} {result_msg} in {duration_ms:.2f}ms"
         | 
| 533 | 
            +
                        )
         | 
| 534 | 
            +
             | 
| 497 535 | 
             
                        return event
         | 
| 498 | 
            -
             | 
| 536 | 
            +
             | 
| 499 537 | 
             
                async def _log_warning(self, health_result: HealthCheckResult) -> bool:
         | 
| 500 538 | 
             
                    """Log a warning about health issues."""
         | 
| 501 539 | 
             
                    try:
         | 
| 502 | 
            -
                        warning_metrics = [ | 
| 503 | 
            -
             | 
| 504 | 
            -
                        
         | 
| 505 | 
            -
                         | 
| 506 | 
            -
             | 
| 507 | 
            -
                        
         | 
| 540 | 
            +
                        warning_metrics = [
         | 
| 541 | 
            +
                            m for m in health_result.metrics if m.status == HealthStatus.WARNING
         | 
| 542 | 
            +
                        ]
         | 
| 543 | 
            +
                        critical_metrics = [
         | 
| 544 | 
            +
                            m for m in health_result.metrics if m.status == HealthStatus.CRITICAL
         | 
| 545 | 
            +
                        ]
         | 
| 546 | 
            +
             | 
| 547 | 
            +
                        self.logger.warning(
         | 
| 548 | 
            +
                            f"Health warning detected: {len(warning_metrics)} warning metrics, "
         | 
| 549 | 
            +
                            f"{len(critical_metrics)} critical metrics"
         | 
| 550 | 
            +
                        )
         | 
| 551 | 
            +
             | 
| 508 552 | 
             
                        for metric in warning_metrics + critical_metrics:
         | 
| 509 | 
            -
                            self.logger.warning( | 
| 510 | 
            -
             | 
| 553 | 
            +
                            self.logger.warning(
         | 
| 554 | 
            +
                                f"  {metric.name}: {metric.value} ({metric.status.value}) - {metric.message}"
         | 
| 555 | 
            +
                            )
         | 
| 556 | 
            +
             | 
| 511 557 | 
             
                        return True
         | 
| 512 558 | 
             
                    except Exception as e:
         | 
| 513 559 | 
             
                        self.logger.error(f"Failed to log warning: {e}")
         | 
| 514 560 | 
             
                        return False
         | 
| 515 | 
            -
             | 
| 561 | 
            +
             | 
| 516 562 | 
             
                async def _clear_connections(self) -> bool:
         | 
| 517 563 | 
             
                    """Clear all client connections to reset connection state."""
         | 
| 518 564 | 
             
                    try:
         | 
| 519 | 
            -
                        if not self.server_instance or not hasattr(self.server_instance,  | 
| 520 | 
            -
                            self.logger.warning( | 
| 565 | 
            +
                        if not self.server_instance or not hasattr(self.server_instance, "sio"):
         | 
| 566 | 
            +
                            self.logger.warning(
         | 
| 567 | 
            +
                                "No server instance available for connection clearing"
         | 
| 568 | 
            +
                            )
         | 
| 521 569 | 
             
                            return False
         | 
| 522 | 
            -
             | 
| 570 | 
            +
             | 
| 523 571 | 
             
                        sio = self.server_instance.sio
         | 
| 524 572 | 
             
                        if not sio:
         | 
| 525 573 | 
             
                            self.logger.warning("Socket.IO instance not available")
         | 
| 526 574 | 
             
                            return False
         | 
| 527 | 
            -
             | 
| 575 | 
            +
             | 
| 528 576 | 
             
                        # Get current clients
         | 
| 529 | 
            -
                        clients =  | 
| 530 | 
            -
             | 
| 577 | 
            +
                        clients = (
         | 
| 578 | 
            +
                            list(self.server_instance.clients)
         | 
| 579 | 
            +
                            if hasattr(self.server_instance, "clients")
         | 
| 580 | 
            +
                            else []
         | 
| 581 | 
            +
                        )
         | 
| 582 | 
            +
             | 
| 531 583 | 
             
                        self.logger.info(f"Clearing {len(clients)} client connections")
         | 
| 532 | 
            -
             | 
| 584 | 
            +
             | 
| 533 585 | 
             
                        # Disconnect all clients
         | 
| 534 586 | 
             
                        for client_id in clients:
         | 
| 535 587 | 
             
                            try:
         | 
| 536 588 | 
             
                                await sio.disconnect(client_id)
         | 
| 537 589 | 
             
                            except Exception as e:
         | 
| 538 590 | 
             
                                self.logger.warning(f"Failed to disconnect client {client_id}: {e}")
         | 
| 539 | 
            -
             | 
| 591 | 
            +
             | 
| 540 592 | 
             
                        # Clear client tracking
         | 
| 541 | 
            -
                        if hasattr(self.server_instance,  | 
| 593 | 
            +
                        if hasattr(self.server_instance, "clients"):
         | 
| 542 594 | 
             
                            self.server_instance.clients.clear()
         | 
| 543 | 
            -
                        if hasattr(self.server_instance,  | 
| 595 | 
            +
                        if hasattr(self.server_instance, "client_versions"):
         | 
| 544 596 | 
             
                            self.server_instance.client_versions.clear()
         | 
| 545 | 
            -
             | 
| 597 | 
            +
             | 
| 546 598 | 
             
                        self.logger.info("Client connections cleared successfully")
         | 
| 547 599 | 
             
                        return True
         | 
| 548 | 
            -
             | 
| 600 | 
            +
             | 
| 549 601 | 
             
                    except Exception as e:
         | 
| 550 602 | 
             
                        self.logger.error(f"Failed to clear connections: {e}")
         | 
| 551 603 | 
             
                        return False
         | 
| 552 | 
            -
             | 
| 604 | 
            +
             | 
| 553 605 | 
             
                async def _restart_service(self) -> bool:
         | 
| 554 606 | 
             
                    """Restart the Socket.IO service."""
         | 
| 555 607 | 
             
                    try:
         | 
| 556 608 | 
             
                        if not self.server_instance:
         | 
| 557 609 | 
             
                            self.logger.error("No server instance available for restart")
         | 
| 558 610 | 
             
                            return False
         | 
| 559 | 
            -
             | 
| 611 | 
            +
             | 
| 560 612 | 
             
                        self.logger.info("Attempting graceful service restart")
         | 
| 561 | 
            -
             | 
| 613 | 
            +
             | 
| 562 614 | 
             
                        # Save current configuration
         | 
| 563 | 
            -
                        host = getattr(self.server_instance,  | 
| 564 | 
            -
                        port = getattr(self.server_instance,  | 
| 565 | 
            -
             | 
| 615 | 
            +
                        host = getattr(self.server_instance, "host", "localhost")
         | 
| 616 | 
            +
                        port = getattr(self.server_instance, "port", 8765)
         | 
| 617 | 
            +
             | 
| 566 618 | 
             
                        # Stop current server
         | 
| 567 619 | 
             
                        try:
         | 
| 568 620 | 
             
                            await self.server_instance._shutdown_async()
         | 
| 569 621 | 
             
                            self.logger.info("Server shutdown completed")
         | 
| 570 622 | 
             
                        except Exception as e:
         | 
| 571 623 | 
             
                            self.logger.warning(f"Error during shutdown: {e}")
         | 
| 572 | 
            -
             | 
| 624 | 
            +
             | 
| 573 625 | 
             
                        # Wait a moment for cleanup
         | 
| 574 626 | 
             
                        await asyncio.sleep(1)
         | 
| 575 | 
            -
             | 
| 627 | 
            +
             | 
| 576 628 | 
             
                        # Restart server
         | 
| 577 629 | 
             
                        await self.server_instance.start_async()
         | 
| 578 630 | 
             
                        self.logger.info("Server restart completed successfully")
         | 
| 579 | 
            -
             | 
| 631 | 
            +
             | 
| 580 632 | 
             
                        return True
         | 
| 581 | 
            -
             | 
| 633 | 
            +
             | 
| 582 634 | 
             
                    except Exception as e:
         | 
| 583 635 | 
             
                        self.logger.error(f"Failed to restart service: {e}")
         | 
| 584 636 | 
             
                        return False
         | 
| 585 | 
            -
             | 
| 637 | 
            +
             | 
| 586 638 | 
             
                async def _emergency_stop(self) -> bool:
         | 
| 587 639 | 
             
                    """Perform emergency stop of the service."""
         | 
| 588 640 | 
             
                    try:
         | 
| 589 | 
            -
                        self.logger.critical( | 
| 590 | 
            -
             | 
| 641 | 
            +
                        self.logger.critical(
         | 
| 642 | 
            +
                            "Performing emergency stop due to critical health issues"
         | 
| 643 | 
            +
                        )
         | 
| 644 | 
            +
             | 
| 591 645 | 
             
                        if self.server_instance:
         | 
| 592 646 | 
             
                            try:
         | 
| 593 647 | 
             
                                # Force immediate shutdown
         | 
| 594 648 | 
             
                                await self.server_instance._shutdown_async()
         | 
| 595 649 | 
             
                            except Exception as e:
         | 
| 596 650 | 
             
                                self.logger.error(f"Error during emergency shutdown: {e}")
         | 
| 597 | 
            -
             | 
| 651 | 
            +
             | 
| 598 652 | 
             
                        # Send termination signal to process
         | 
| 599 653 | 
             
                        try:
         | 
| 600 654 | 
             
                            os.kill(os.getpid(), signal.SIGTERM)
         | 
| 601 655 | 
             
                        except Exception as e:
         | 
| 602 656 | 
             
                            self.logger.error(f"Failed to send termination signal: {e}")
         | 
| 603 657 | 
             
                            return False
         | 
| 604 | 
            -
             | 
| 658 | 
            +
             | 
| 605 659 | 
             
                        return True
         | 
| 606 | 
            -
             | 
| 660 | 
            +
             | 
| 607 661 | 
             
                    except Exception as e:
         | 
| 608 662 | 
             
                        self.logger.error(f"Emergency stop failed: {e}")
         | 
| 609 663 | 
             
                        return False
         | 
| 610 | 
            -
             | 
| 664 | 
            +
             | 
| 611 665 | 
             
                def _update_recovery_stats(self, event: RecoveryEvent) -> None:
         | 
| 612 666 | 
             
                    """Update recovery statistics with new event."""
         | 
| 613 | 
            -
                    self.recovery_stats[ | 
| 614 | 
            -
             | 
| 667 | 
            +
                    self.recovery_stats["total_recoveries"] += 1
         | 
| 668 | 
            +
             | 
| 615 669 | 
             
                    if event.success:
         | 
| 616 | 
            -
                        self.recovery_stats[ | 
| 670 | 
            +
                        self.recovery_stats["successful_recoveries"] += 1
         | 
| 617 671 | 
             
                    else:
         | 
| 618 | 
            -
                        self.recovery_stats[ | 
| 619 | 
            -
             | 
| 620 | 
            -
                    self.recovery_stats[ | 
| 621 | 
            -
             | 
| 672 | 
            +
                        self.recovery_stats["failed_recoveries"] += 1
         | 
| 673 | 
            +
             | 
| 674 | 
            +
                    self.recovery_stats["actions_performed"][event.action.value] += 1
         | 
| 675 | 
            +
             | 
| 622 676 | 
             
                    # Update average duration
         | 
| 623 | 
            -
                    total_recoveries = self.recovery_stats[ | 
| 624 | 
            -
                    current_avg = self.recovery_stats[ | 
| 625 | 
            -
                    self.recovery_stats[ | 
| 626 | 
            -
                         | 
| 627 | 
            -
                    )
         | 
| 628 | 
            -
             | 
| 677 | 
            +
                    total_recoveries = self.recovery_stats["total_recoveries"]
         | 
| 678 | 
            +
                    current_avg = self.recovery_stats["average_recovery_duration_ms"]
         | 
| 679 | 
            +
                    self.recovery_stats["average_recovery_duration_ms"] = (
         | 
| 680 | 
            +
                        current_avg * (total_recoveries - 1) + event.duration_ms
         | 
| 681 | 
            +
                    ) / total_recoveries
         | 
| 682 | 
            +
             | 
| 629 683 | 
             
                def get_recovery_status(self) -> Dict[str, Any]:
         | 
| 630 684 | 
             
                    """Get comprehensive recovery manager status."""
         | 
| 631 685 | 
             
                    return {
         | 
| 632 | 
            -
                         | 
| 633 | 
            -
                         | 
| 634 | 
            -
                         | 
| 635 | 
            -
                         | 
| 636 | 
            -
                         | 
| 637 | 
            -
                         | 
| 638 | 
            -
                         | 
| 639 | 
            -
                         | 
| 640 | 
            -
             | 
| 641 | 
            -
             | 
| 642 | 
            -
             | 
| 643 | 
            -
                             | 
| 644 | 
            -
             | 
| 686 | 
            +
                        "enabled": self.enabled,
         | 
| 687 | 
            +
                        "recovery_in_progress": self.recovery_in_progress,
         | 
| 688 | 
            +
                        "last_recovery_time": self.last_recovery_time,
         | 
| 689 | 
            +
                        "recovery_count": self.recovery_count,
         | 
| 690 | 
            +
                        "strategy": self.recovery_strategy.get_name(),
         | 
| 691 | 
            +
                        "circuit_breaker": self.circuit_breaker.get_status(),
         | 
| 692 | 
            +
                        "recovery_stats": dict(self.recovery_stats),
         | 
| 693 | 
            +
                        "recent_recoveries": [
         | 
| 694 | 
            +
                            event.to_dict() for event in list(self.recovery_history)[-10:]
         | 
| 695 | 
            +
                        ],
         | 
| 696 | 
            +
                        "config": {
         | 
| 697 | 
            +
                            "check_interval": self.check_interval,
         | 
| 698 | 
            +
                            "max_recovery_attempts": self.max_recovery_attempts,
         | 
| 699 | 
            +
                            "recovery_timeout": self.recovery_timeout,
         | 
| 700 | 
            +
                        },
         | 
| 645 701 | 
             
                    }
         | 
| 646 | 
            -
             | 
| 702 | 
            +
             | 
| 647 703 | 
             
                def get_recovery_history(self, limit: Optional[int] = None) -> List[RecoveryEvent]:
         | 
| 648 704 | 
             
                    """Get recovery event history.
         | 
| 649 | 
            -
             | 
| 705 | 
            +
             | 
| 650 706 | 
             
                    Args:
         | 
| 651 707 | 
             
                        limit: Maximum number of events to return
         | 
| 652 | 
            -
             | 
| 708 | 
            +
             | 
| 653 709 | 
             
                    Returns:
         | 
| 654 710 | 
             
                        List of recovery events, newest first
         | 
| 655 711 | 
             
                    """
         | 
| 656 712 | 
             
                    history = list(self.recovery_history)
         | 
| 657 713 | 
             
                    history.reverse()  # Newest first
         | 
| 658 | 
            -
             | 
| 714 | 
            +
             | 
| 659 715 | 
             
                    if limit:
         | 
| 660 716 | 
             
                        history = history[:limit]
         | 
| 661 | 
            -
             | 
| 717 | 
            +
             | 
| 662 718 | 
             
                    return history
         | 
| 663 | 
            -
             | 
| 719 | 
            +
             | 
| 664 720 | 
             
                def is_enabled(self) -> bool:
         | 
| 665 721 | 
             
                    """Check if recovery manager is enabled."""
         | 
| 666 722 | 
             
                    return self.enabled
         | 
| 667 | 
            -
             | 
| 723 | 
            +
             | 
| 668 724 | 
             
                def enable(self) -> None:
         | 
| 669 725 | 
             
                    """Enable recovery manager."""
         | 
| 670 726 | 
             
                    self.enabled = True
         | 
| 671 727 | 
             
                    self.logger.info("Recovery manager enabled")
         | 
| 672 | 
            -
             | 
| 728 | 
            +
             | 
| 673 729 | 
             
                def disable(self) -> None:
         | 
| 674 730 | 
             
                    """Disable recovery manager."""
         | 
| 675 731 | 
             
                    self.enabled = False
         | 
| 676 | 
            -
                    self.logger.info("Recovery manager disabled")
         | 
| 732 | 
            +
                    self.logger.info("Recovery manager disabled")
         |