claude-mpm 3.9.11__py3-none-any.whl → 4.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- claude_mpm/VERSION +1 -1
- claude_mpm/__init__.py +2 -2
- claude_mpm/__main__.py +3 -2
- claude_mpm/agents/__init__.py +85 -79
- claude_mpm/agents/agent_loader.py +464 -1003
- claude_mpm/agents/agent_loader_integration.py +45 -45
- claude_mpm/agents/agents_metadata.py +29 -30
- claude_mpm/agents/async_agent_loader.py +156 -138
- claude_mpm/agents/base_agent.json +1 -1
- claude_mpm/agents/base_agent_loader.py +179 -151
- claude_mpm/agents/frontmatter_validator.py +229 -130
- claude_mpm/agents/schema/agent_schema.json +1 -1
- claude_mpm/agents/system_agent_config.py +213 -147
- claude_mpm/agents/templates/__init__.py +13 -13
- claude_mpm/agents/templates/code_analyzer.json +2 -2
- claude_mpm/agents/templates/data_engineer.json +1 -1
- claude_mpm/agents/templates/documentation.json +23 -11
- claude_mpm/agents/templates/engineer.json +22 -6
- claude_mpm/agents/templates/memory_manager.json +1 -1
- claude_mpm/agents/templates/ops.json +2 -2
- claude_mpm/agents/templates/project_organizer.json +1 -1
- claude_mpm/agents/templates/qa.json +1 -1
- claude_mpm/agents/templates/refactoring_engineer.json +222 -0
- claude_mpm/agents/templates/research.json +20 -14
- claude_mpm/agents/templates/security.json +1 -1
- claude_mpm/agents/templates/ticketing.json +1 -1
- claude_mpm/agents/templates/version_control.json +1 -1
- claude_mpm/agents/templates/web_qa.json +3 -1
- claude_mpm/agents/templates/web_ui.json +2 -2
- claude_mpm/cli/__init__.py +79 -51
- claude_mpm/cli/__main__.py +3 -2
- claude_mpm/cli/commands/__init__.py +20 -20
- claude_mpm/cli/commands/agents.py +279 -247
- claude_mpm/cli/commands/aggregate.py +138 -157
- claude_mpm/cli/commands/cleanup.py +147 -147
- claude_mpm/cli/commands/config.py +93 -76
- claude_mpm/cli/commands/info.py +17 -16
- claude_mpm/cli/commands/mcp.py +140 -905
- claude_mpm/cli/commands/mcp_command_router.py +139 -0
- claude_mpm/cli/commands/mcp_config_commands.py +20 -0
- claude_mpm/cli/commands/mcp_install_commands.py +20 -0
- claude_mpm/cli/commands/mcp_server_commands.py +175 -0
- claude_mpm/cli/commands/mcp_tool_commands.py +34 -0
- claude_mpm/cli/commands/memory.py +239 -203
- claude_mpm/cli/commands/monitor.py +203 -81
- claude_mpm/cli/commands/run.py +380 -429
- claude_mpm/cli/commands/run_config_checker.py +160 -0
- claude_mpm/cli/commands/socketio_monitor.py +235 -0
- claude_mpm/cli/commands/tickets.py +305 -197
- claude_mpm/cli/parser.py +24 -1156
- claude_mpm/cli/parsers/__init__.py +29 -0
- claude_mpm/cli/parsers/agents_parser.py +136 -0
- claude_mpm/cli/parsers/base_parser.py +331 -0
- claude_mpm/cli/parsers/config_parser.py +85 -0
- claude_mpm/cli/parsers/mcp_parser.py +152 -0
- claude_mpm/cli/parsers/memory_parser.py +138 -0
- claude_mpm/cli/parsers/monitor_parser.py +104 -0
- claude_mpm/cli/parsers/run_parser.py +147 -0
- claude_mpm/cli/parsers/tickets_parser.py +203 -0
- claude_mpm/cli/ticket_cli.py +7 -3
- claude_mpm/cli/utils.py +55 -37
- claude_mpm/cli_module/__init__.py +6 -6
- claude_mpm/cli_module/args.py +188 -140
- claude_mpm/cli_module/commands.py +79 -70
- claude_mpm/cli_module/migration_example.py +38 -60
- claude_mpm/config/__init__.py +32 -25
- claude_mpm/config/agent_config.py +151 -119
- claude_mpm/config/experimental_features.py +71 -73
- claude_mpm/config/paths.py +94 -208
- claude_mpm/config/socketio_config.py +84 -73
- claude_mpm/constants.py +35 -18
- claude_mpm/core/__init__.py +9 -6
- claude_mpm/core/agent_name_normalizer.py +68 -71
- claude_mpm/core/agent_registry.py +372 -521
- claude_mpm/core/agent_session_manager.py +74 -63
- claude_mpm/core/base_service.py +116 -87
- claude_mpm/core/cache.py +119 -153
- claude_mpm/core/claude_runner.py +425 -1120
- claude_mpm/core/config.py +263 -168
- claude_mpm/core/config_aliases.py +69 -61
- claude_mpm/core/config_constants.py +292 -0
- claude_mpm/core/constants.py +57 -99
- claude_mpm/core/container.py +211 -178
- claude_mpm/core/exceptions.py +233 -89
- claude_mpm/core/factories.py +92 -54
- claude_mpm/core/framework_loader.py +378 -220
- claude_mpm/core/hook_manager.py +198 -83
- claude_mpm/core/hook_performance_config.py +136 -0
- claude_mpm/core/injectable_service.py +61 -55
- claude_mpm/core/interactive_session.py +165 -155
- claude_mpm/core/interfaces.py +221 -195
- claude_mpm/core/lazy.py +96 -96
- claude_mpm/core/logger.py +133 -107
- claude_mpm/core/logging_config.py +185 -157
- claude_mpm/core/minimal_framework_loader.py +20 -15
- claude_mpm/core/mixins.py +30 -29
- claude_mpm/core/oneshot_session.py +215 -181
- claude_mpm/core/optimized_agent_loader.py +134 -138
- claude_mpm/core/optimized_startup.py +159 -157
- claude_mpm/core/pm_hook_interceptor.py +85 -72
- claude_mpm/core/service_registry.py +103 -101
- claude_mpm/core/session_manager.py +97 -87
- claude_mpm/core/socketio_pool.py +212 -158
- claude_mpm/core/tool_access_control.py +58 -51
- claude_mpm/core/types.py +46 -24
- claude_mpm/core/typing_utils.py +166 -82
- claude_mpm/core/unified_agent_registry.py +721 -0
- claude_mpm/core/unified_config.py +550 -0
- claude_mpm/core/unified_paths.py +549 -0
- claude_mpm/dashboard/index.html +1 -1
- claude_mpm/dashboard/open_dashboard.py +51 -17
- claude_mpm/dashboard/static/css/dashboard.css +27 -8
- claude_mpm/dashboard/static/dist/components/agent-inference.js +2 -0
- claude_mpm/dashboard/static/dist/components/event-processor.js +2 -0
- claude_mpm/dashboard/static/dist/components/event-viewer.js +2 -0
- claude_mpm/dashboard/static/dist/components/export-manager.js +2 -0
- claude_mpm/dashboard/static/dist/components/file-tool-tracker.js +2 -0
- claude_mpm/dashboard/static/dist/components/hud-library-loader.js +2 -0
- claude_mpm/dashboard/static/dist/components/hud-manager.js +2 -0
- claude_mpm/dashboard/static/dist/components/hud-visualizer.js +2 -0
- claude_mpm/dashboard/static/dist/components/module-viewer.js +2 -0
- claude_mpm/dashboard/static/dist/components/session-manager.js +2 -0
- claude_mpm/dashboard/static/dist/components/socket-manager.js +2 -0
- claude_mpm/dashboard/static/dist/components/ui-state-manager.js +2 -0
- claude_mpm/dashboard/static/dist/components/working-directory.js +2 -0
- claude_mpm/dashboard/static/dist/dashboard.js +2 -0
- claude_mpm/dashboard/static/dist/socket-client.js +2 -0
- claude_mpm/dashboard/static/js/components/agent-inference.js +80 -76
- claude_mpm/dashboard/static/js/components/event-processor.js +71 -67
- claude_mpm/dashboard/static/js/components/event-viewer.js +74 -70
- claude_mpm/dashboard/static/js/components/export-manager.js +31 -28
- claude_mpm/dashboard/static/js/components/file-tool-tracker.js +106 -92
- claude_mpm/dashboard/static/js/components/hud-library-loader.js +11 -11
- claude_mpm/dashboard/static/js/components/hud-manager.js +73 -73
- claude_mpm/dashboard/static/js/components/hud-visualizer.js +163 -163
- claude_mpm/dashboard/static/js/components/module-viewer.js +305 -233
- claude_mpm/dashboard/static/js/components/session-manager.js +32 -29
- claude_mpm/dashboard/static/js/components/socket-manager.js +27 -20
- claude_mpm/dashboard/static/js/components/ui-state-manager.js +21 -18
- claude_mpm/dashboard/static/js/components/working-directory.js +74 -71
- claude_mpm/dashboard/static/js/dashboard.js +178 -453
- claude_mpm/dashboard/static/js/extension-error-handler.js +164 -0
- claude_mpm/dashboard/static/js/socket-client.js +120 -54
- claude_mpm/dashboard/templates/index.html +40 -50
- claude_mpm/experimental/cli_enhancements.py +60 -58
- claude_mpm/generators/__init__.py +1 -1
- claude_mpm/generators/agent_profile_generator.py +75 -65
- claude_mpm/hooks/__init__.py +1 -1
- claude_mpm/hooks/base_hook.py +33 -28
- claude_mpm/hooks/claude_hooks/__init__.py +1 -1
- claude_mpm/hooks/claude_hooks/connection_pool.py +120 -0
- claude_mpm/hooks/claude_hooks/event_handlers.py +743 -0
- claude_mpm/hooks/claude_hooks/hook_handler.py +415 -1331
- claude_mpm/hooks/claude_hooks/hook_wrapper.sh +4 -4
- claude_mpm/hooks/claude_hooks/memory_integration.py +221 -0
- claude_mpm/hooks/claude_hooks/response_tracking.py +348 -0
- claude_mpm/hooks/claude_hooks/tool_analysis.py +230 -0
- claude_mpm/hooks/memory_integration_hook.py +140 -100
- claude_mpm/hooks/tool_call_interceptor.py +89 -76
- claude_mpm/hooks/validation_hooks.py +57 -49
- claude_mpm/init.py +145 -121
- claude_mpm/models/__init__.py +9 -9
- claude_mpm/models/agent_definition.py +33 -23
- claude_mpm/models/agent_session.py +228 -200
- claude_mpm/scripts/__init__.py +1 -1
- claude_mpm/scripts/socketio_daemon.py +192 -75
- claude_mpm/scripts/socketio_server_manager.py +328 -0
- claude_mpm/scripts/start_activity_logging.py +25 -22
- claude_mpm/services/__init__.py +68 -43
- claude_mpm/services/agent_capabilities_service.py +271 -0
- claude_mpm/services/agents/__init__.py +23 -32
- claude_mpm/services/agents/deployment/__init__.py +3 -3
- claude_mpm/services/agents/deployment/agent_config_provider.py +310 -0
- claude_mpm/services/agents/deployment/agent_configuration_manager.py +359 -0
- claude_mpm/services/agents/deployment/agent_definition_factory.py +84 -0
- claude_mpm/services/agents/deployment/agent_deployment.py +415 -2113
- claude_mpm/services/agents/deployment/agent_discovery_service.py +387 -0
- claude_mpm/services/agents/deployment/agent_environment_manager.py +293 -0
- claude_mpm/services/agents/deployment/agent_filesystem_manager.py +387 -0
- claude_mpm/services/agents/deployment/agent_format_converter.py +453 -0
- claude_mpm/services/agents/deployment/agent_frontmatter_validator.py +161 -0
- claude_mpm/services/agents/deployment/agent_lifecycle_manager.py +345 -495
- claude_mpm/services/agents/deployment/agent_metrics_collector.py +279 -0
- claude_mpm/services/agents/deployment/agent_restore_handler.py +88 -0
- claude_mpm/services/agents/deployment/agent_template_builder.py +406 -0
- claude_mpm/services/agents/deployment/agent_validator.py +352 -0
- claude_mpm/services/agents/deployment/agent_version_manager.py +313 -0
- claude_mpm/services/agents/deployment/agent_versioning.py +6 -9
- claude_mpm/services/agents/deployment/agents_directory_resolver.py +79 -0
- claude_mpm/services/agents/deployment/async_agent_deployment.py +298 -234
- claude_mpm/services/agents/deployment/config/__init__.py +13 -0
- claude_mpm/services/agents/deployment/config/deployment_config.py +182 -0
- claude_mpm/services/agents/deployment/config/deployment_config_manager.py +200 -0
- claude_mpm/services/agents/deployment/deployment_config_loader.py +54 -0
- claude_mpm/services/agents/deployment/deployment_type_detector.py +124 -0
- claude_mpm/services/agents/deployment/facade/__init__.py +18 -0
- claude_mpm/services/agents/deployment/facade/async_deployment_executor.py +159 -0
- claude_mpm/services/agents/deployment/facade/deployment_executor.py +73 -0
- claude_mpm/services/agents/deployment/facade/deployment_facade.py +270 -0
- claude_mpm/services/agents/deployment/facade/sync_deployment_executor.py +178 -0
- claude_mpm/services/agents/deployment/interface_adapter.py +227 -0
- claude_mpm/services/agents/deployment/lifecycle_health_checker.py +85 -0
- claude_mpm/services/agents/deployment/lifecycle_performance_tracker.py +100 -0
- claude_mpm/services/agents/deployment/pipeline/__init__.py +32 -0
- claude_mpm/services/agents/deployment/pipeline/pipeline_builder.py +158 -0
- claude_mpm/services/agents/deployment/pipeline/pipeline_context.py +159 -0
- claude_mpm/services/agents/deployment/pipeline/pipeline_executor.py +169 -0
- claude_mpm/services/agents/deployment/pipeline/steps/__init__.py +19 -0
- claude_mpm/services/agents/deployment/pipeline/steps/agent_processing_step.py +195 -0
- claude_mpm/services/agents/deployment/pipeline/steps/base_step.py +119 -0
- claude_mpm/services/agents/deployment/pipeline/steps/configuration_step.py +79 -0
- claude_mpm/services/agents/deployment/pipeline/steps/target_directory_step.py +90 -0
- claude_mpm/services/agents/deployment/pipeline/steps/validation_step.py +100 -0
- claude_mpm/services/agents/deployment/processors/__init__.py +15 -0
- claude_mpm/services/agents/deployment/processors/agent_deployment_context.py +98 -0
- claude_mpm/services/agents/deployment/processors/agent_deployment_result.py +235 -0
- claude_mpm/services/agents/deployment/processors/agent_processor.py +258 -0
- claude_mpm/services/agents/deployment/refactored_agent_deployment_service.py +318 -0
- claude_mpm/services/agents/deployment/results/__init__.py +13 -0
- claude_mpm/services/agents/deployment/results/deployment_metrics.py +200 -0
- claude_mpm/services/agents/deployment/results/deployment_result_builder.py +249 -0
- claude_mpm/services/agents/deployment/strategies/__init__.py +25 -0
- claude_mpm/services/agents/deployment/strategies/base_strategy.py +119 -0
- claude_mpm/services/agents/deployment/strategies/project_strategy.py +150 -0
- claude_mpm/services/agents/deployment/strategies/strategy_selector.py +117 -0
- claude_mpm/services/agents/deployment/strategies/system_strategy.py +116 -0
- claude_mpm/services/agents/deployment/strategies/user_strategy.py +137 -0
- claude_mpm/services/agents/deployment/system_instructions_deployer.py +108 -0
- claude_mpm/services/agents/deployment/validation/__init__.py +19 -0
- claude_mpm/services/agents/deployment/validation/agent_validator.py +323 -0
- claude_mpm/services/agents/deployment/validation/deployment_validator.py +238 -0
- claude_mpm/services/agents/deployment/validation/template_validator.py +299 -0
- claude_mpm/services/agents/deployment/validation/validation_result.py +226 -0
- claude_mpm/services/agents/loading/__init__.py +2 -2
- claude_mpm/services/agents/loading/agent_profile_loader.py +259 -229
- claude_mpm/services/agents/loading/base_agent_manager.py +90 -81
- claude_mpm/services/agents/loading/framework_agent_loader.py +154 -129
- claude_mpm/services/agents/management/__init__.py +2 -2
- claude_mpm/services/agents/management/agent_capabilities_generator.py +72 -58
- claude_mpm/services/agents/management/agent_management_service.py +209 -156
- claude_mpm/services/agents/memory/__init__.py +9 -6
- claude_mpm/services/agents/memory/agent_memory_manager.py +218 -1152
- claude_mpm/services/agents/memory/agent_persistence_service.py +20 -16
- claude_mpm/services/agents/memory/analyzer.py +430 -0
- claude_mpm/services/agents/memory/content_manager.py +376 -0
- claude_mpm/services/agents/memory/template_generator.py +468 -0
- claude_mpm/services/agents/registry/__init__.py +7 -10
- claude_mpm/services/agents/registry/deployed_agent_discovery.py +122 -97
- claude_mpm/services/agents/registry/modification_tracker.py +351 -285
- claude_mpm/services/async_session_logger.py +187 -153
- claude_mpm/services/claude_session_logger.py +87 -72
- claude_mpm/services/command_handler_service.py +217 -0
- claude_mpm/services/communication/__init__.py +3 -2
- claude_mpm/services/core/__init__.py +50 -97
- claude_mpm/services/core/base.py +60 -53
- claude_mpm/services/core/interfaces/__init__.py +188 -0
- claude_mpm/services/core/interfaces/agent.py +351 -0
- claude_mpm/services/core/interfaces/communication.py +343 -0
- claude_mpm/services/core/interfaces/infrastructure.py +413 -0
- claude_mpm/services/core/interfaces/service.py +434 -0
- claude_mpm/services/core/interfaces.py +19 -944
- claude_mpm/services/event_aggregator.py +208 -170
- claude_mpm/services/exceptions.py +387 -308
- claude_mpm/services/framework_claude_md_generator/__init__.py +75 -79
- claude_mpm/services/framework_claude_md_generator/content_assembler.py +69 -60
- claude_mpm/services/framework_claude_md_generator/content_validator.py +65 -61
- claude_mpm/services/framework_claude_md_generator/deployment_manager.py +68 -49
- claude_mpm/services/framework_claude_md_generator/section_generators/__init__.py +34 -34
- claude_mpm/services/framework_claude_md_generator/section_generators/agents.py +25 -22
- claude_mpm/services/framework_claude_md_generator/section_generators/claude_pm_init.py +10 -10
- claude_mpm/services/framework_claude_md_generator/section_generators/core_responsibilities.py +4 -3
- claude_mpm/services/framework_claude_md_generator/section_generators/delegation_constraints.py +4 -3
- claude_mpm/services/framework_claude_md_generator/section_generators/environment_config.py +4 -3
- claude_mpm/services/framework_claude_md_generator/section_generators/footer.py +6 -5
- claude_mpm/services/framework_claude_md_generator/section_generators/header.py +8 -7
- claude_mpm/services/framework_claude_md_generator/section_generators/orchestration_principles.py +4 -3
- claude_mpm/services/framework_claude_md_generator/section_generators/role_designation.py +6 -5
- claude_mpm/services/framework_claude_md_generator/section_generators/subprocess_validation.py +9 -8
- claude_mpm/services/framework_claude_md_generator/section_generators/todo_task_tools.py +4 -3
- claude_mpm/services/framework_claude_md_generator/section_generators/troubleshooting.py +5 -4
- claude_mpm/services/framework_claude_md_generator/section_manager.py +28 -27
- claude_mpm/services/framework_claude_md_generator/version_manager.py +30 -28
- claude_mpm/services/hook_service.py +106 -114
- claude_mpm/services/infrastructure/__init__.py +7 -5
- claude_mpm/services/infrastructure/context_preservation.py +233 -199
- claude_mpm/services/infrastructure/daemon_manager.py +279 -0
- claude_mpm/services/infrastructure/logging.py +83 -76
- claude_mpm/services/infrastructure/monitoring.py +547 -404
- claude_mpm/services/mcp_gateway/__init__.py +30 -13
- claude_mpm/services/mcp_gateway/config/__init__.py +2 -2
- claude_mpm/services/mcp_gateway/config/config_loader.py +61 -56
- claude_mpm/services/mcp_gateway/config/config_schema.py +50 -41
- claude_mpm/services/mcp_gateway/config/configuration.py +82 -75
- claude_mpm/services/mcp_gateway/core/__init__.py +13 -20
- claude_mpm/services/mcp_gateway/core/base.py +80 -67
- claude_mpm/services/mcp_gateway/core/exceptions.py +60 -46
- claude_mpm/services/mcp_gateway/core/interfaces.py +87 -84
- claude_mpm/services/mcp_gateway/main.py +287 -137
- claude_mpm/services/mcp_gateway/registry/__init__.py +1 -1
- claude_mpm/services/mcp_gateway/registry/service_registry.py +97 -94
- claude_mpm/services/mcp_gateway/registry/tool_registry.py +135 -126
- claude_mpm/services/mcp_gateway/server/__init__.py +2 -2
- claude_mpm/services/mcp_gateway/server/mcp_gateway.py +105 -110
- claude_mpm/services/mcp_gateway/server/stdio_handler.py +105 -107
- claude_mpm/services/mcp_gateway/server/stdio_server.py +691 -0
- claude_mpm/services/mcp_gateway/tools/__init__.py +4 -2
- claude_mpm/services/mcp_gateway/tools/base_adapter.py +109 -119
- claude_mpm/services/mcp_gateway/tools/document_summarizer.py +283 -215
- claude_mpm/services/mcp_gateway/tools/hello_world.py +122 -120
- claude_mpm/services/mcp_gateway/tools/ticket_tools.py +652 -0
- claude_mpm/services/mcp_gateway/tools/unified_ticket_tool.py +606 -0
- claude_mpm/services/memory/__init__.py +2 -2
- claude_mpm/services/memory/builder.py +451 -362
- claude_mpm/services/memory/cache/__init__.py +2 -2
- claude_mpm/services/memory/cache/shared_prompt_cache.py +232 -194
- claude_mpm/services/memory/cache/simple_cache.py +107 -93
- claude_mpm/services/memory/indexed_memory.py +195 -193
- claude_mpm/services/memory/optimizer.py +267 -234
- claude_mpm/services/memory/router.py +571 -263
- claude_mpm/services/memory_hook_service.py +237 -0
- claude_mpm/services/port_manager.py +223 -0
- claude_mpm/services/project/__init__.py +3 -3
- claude_mpm/services/project/analyzer.py +451 -305
- claude_mpm/services/project/registry.py +262 -240
- claude_mpm/services/recovery_manager.py +287 -231
- claude_mpm/services/response_tracker.py +87 -67
- claude_mpm/services/runner_configuration_service.py +587 -0
- claude_mpm/services/session_management_service.py +304 -0
- claude_mpm/services/socketio/__init__.py +4 -4
- claude_mpm/services/socketio/client_proxy.py +174 -0
- claude_mpm/services/socketio/handlers/__init__.py +3 -3
- claude_mpm/services/socketio/handlers/base.py +44 -30
- claude_mpm/services/socketio/handlers/connection.py +145 -65
- claude_mpm/services/socketio/handlers/file.py +123 -108
- claude_mpm/services/socketio/handlers/git.py +607 -373
- claude_mpm/services/socketio/handlers/hook.py +170 -0
- claude_mpm/services/socketio/handlers/memory.py +4 -4
- claude_mpm/services/socketio/handlers/project.py +4 -4
- claude_mpm/services/socketio/handlers/registry.py +53 -38
- claude_mpm/services/socketio/server/__init__.py +18 -0
- claude_mpm/services/socketio/server/broadcaster.py +252 -0
- claude_mpm/services/socketio/server/core.py +399 -0
- claude_mpm/services/socketio/server/main.py +323 -0
- claude_mpm/services/socketio_client_manager.py +160 -133
- claude_mpm/services/socketio_server.py +36 -1885
- claude_mpm/services/subprocess_launcher_service.py +316 -0
- claude_mpm/services/system_instructions_service.py +258 -0
- claude_mpm/services/ticket_manager.py +19 -533
- claude_mpm/services/utility_service.py +285 -0
- claude_mpm/services/version_control/__init__.py +18 -21
- claude_mpm/services/version_control/branch_strategy.py +20 -10
- claude_mpm/services/version_control/conflict_resolution.py +37 -13
- claude_mpm/services/version_control/git_operations.py +52 -21
- claude_mpm/services/version_control/semantic_versioning.py +92 -53
- claude_mpm/services/version_control/version_parser.py +145 -125
- claude_mpm/services/version_service.py +270 -0
- claude_mpm/storage/__init__.py +2 -2
- claude_mpm/storage/state_storage.py +177 -181
- claude_mpm/ticket_wrapper.py +2 -2
- claude_mpm/utils/__init__.py +2 -2
- claude_mpm/utils/agent_dependency_loader.py +453 -243
- claude_mpm/utils/config_manager.py +157 -118
- claude_mpm/utils/console.py +1 -1
- claude_mpm/utils/dependency_cache.py +102 -107
- claude_mpm/utils/dependency_manager.py +52 -47
- claude_mpm/utils/dependency_strategies.py +131 -96
- claude_mpm/utils/environment_context.py +110 -102
- claude_mpm/utils/error_handler.py +75 -55
- claude_mpm/utils/file_utils.py +80 -67
- claude_mpm/utils/framework_detection.py +12 -11
- claude_mpm/utils/import_migration_example.py +12 -60
- claude_mpm/utils/imports.py +48 -45
- claude_mpm/utils/path_operations.py +100 -93
- claude_mpm/utils/robust_installer.py +172 -164
- claude_mpm/utils/session_logging.py +30 -23
- claude_mpm/utils/subprocess_utils.py +99 -61
- claude_mpm/validation/__init__.py +1 -1
- claude_mpm/validation/agent_validator.py +151 -111
- claude_mpm/validation/frontmatter_validator.py +92 -71
- {claude_mpm-3.9.11.dist-info → claude_mpm-4.0.3.dist-info}/METADATA +27 -1
- claude_mpm-4.0.3.dist-info/RECORD +402 -0
- {claude_mpm-3.9.11.dist-info → claude_mpm-4.0.3.dist-info}/entry_points.txt +1 -0
- {claude_mpm-3.9.11.dist-info → claude_mpm-4.0.3.dist-info}/licenses/LICENSE +1 -1
- claude_mpm/cli/commands/run_guarded.py +0 -511
- claude_mpm/config/memory_guardian_config.py +0 -325
- claude_mpm/config/memory_guardian_yaml.py +0 -335
- claude_mpm/core/config_paths.py +0 -150
- claude_mpm/core/memory_aware_runner.py +0 -353
- claude_mpm/dashboard/static/js/dashboard-original.js +0 -4134
- claude_mpm/deployment_paths.py +0 -261
- claude_mpm/hooks/claude_hooks/hook_handler_fixed.py +0 -454
- claude_mpm/models/state_models.py +0 -433
- claude_mpm/services/agent/__init__.py +0 -24
- claude_mpm/services/agent/deployment.py +0 -2548
- claude_mpm/services/agent/management.py +0 -598
- claude_mpm/services/agent/registry.py +0 -813
- claude_mpm/services/agents/registry/agent_registry.py +0 -813
- claude_mpm/services/communication/socketio.py +0 -1935
- claude_mpm/services/communication/websocket.py +0 -479
- claude_mpm/services/framework_claude_md_generator.py +0 -624
- claude_mpm/services/health_monitor.py +0 -893
- claude_mpm/services/infrastructure/graceful_degradation.py +0 -616
- claude_mpm/services/infrastructure/health_monitor.py +0 -775
- claude_mpm/services/infrastructure/memory_dashboard.py +0 -479
- claude_mpm/services/infrastructure/memory_guardian.py +0 -944
- claude_mpm/services/infrastructure/restart_protection.py +0 -642
- claude_mpm/services/infrastructure/state_manager.py +0 -774
- claude_mpm/services/mcp_gateway/manager.py +0 -334
- claude_mpm/services/optimized_hook_service.py +0 -542
- claude_mpm/services/project_analyzer.py +0 -864
- claude_mpm/services/project_registry.py +0 -608
- claude_mpm/services/standalone_socketio_server.py +0 -1300
- claude_mpm/services/ticket_manager_di.py +0 -318
- claude_mpm/services/ticketing_service_original.py +0 -510
- claude_mpm/utils/paths.py +0 -395
- claude_mpm/utils/platform_memory.py +0 -524
- claude_mpm-3.9.11.dist-info/RECORD +0 -306
- {claude_mpm-3.9.11.dist-info → claude_mpm-4.0.3.dist-info}/WHEEL +0 -0
- {claude_mpm-3.9.11.dist-info → claude_mpm-4.0.3.dist-info}/top_level.txt +0 -0
| @@ -16,21 +16,21 @@ Design Principles: | |
| 16 16 |  | 
| 17 17 | 
             
            import asyncio
         | 
| 18 18 | 
             
            import logging
         | 
| 19 | 
            -
            import  | 
| 19 | 
            +
            import socket
         | 
| 20 20 | 
             
            import threading
         | 
| 21 | 
            +
            import time
         | 
| 21 22 | 
             
            from abc import ABC, abstractmethod
         | 
| 22 23 | 
             
            from collections import deque
         | 
| 23 | 
            -
            from dataclasses import  | 
| 24 | 
            +
            from dataclasses import asdict, dataclass
         | 
| 24 25 | 
             
            from datetime import datetime, timezone
         | 
| 25 26 | 
             
            from enum import Enum
         | 
| 26 | 
            -
            from  | 
| 27 | 
            -
             | 
| 28 | 
            -
            import json
         | 
| 29 | 
            -
            import socket
         | 
| 27 | 
            +
            from typing import Any, Callable, Dict, List, Optional, Union
         | 
| 28 | 
            +
             | 
| 30 29 | 
             
            from claude_mpm.core.constants import ResourceLimits, TimeoutConfig
         | 
| 31 30 |  | 
| 32 31 | 
             
            try:
         | 
| 33 32 | 
             
                import psutil
         | 
| 33 | 
            +
             | 
| 34 34 | 
             
                PSUTIL_AVAILABLE = True
         | 
| 35 35 | 
             
            except ImportError:
         | 
| 36 36 | 
             
                PSUTIL_AVAILABLE = False
         | 
| @@ -39,6 +39,7 @@ except ImportError: | |
| 39 39 |  | 
| 40 40 | 
             
            class HealthStatus(Enum):
         | 
| 41 41 | 
             
                """Health status levels for monitoring."""
         | 
| 42 | 
            +
             | 
| 42 43 | 
             
                HEALTHY = "healthy"
         | 
| 43 44 | 
             
                WARNING = "warning"
         | 
| 44 45 | 
             
                CRITICAL = "critical"
         | 
| @@ -48,6 +49,7 @@ class HealthStatus(Enum): | |
| 48 49 | 
             
            @dataclass
         | 
| 49 50 | 
             
            class HealthMetric:
         | 
| 50 51 | 
             
                """Individual health metric data structure."""
         | 
| 52 | 
            +
             | 
| 51 53 | 
             
                name: str
         | 
| 52 54 | 
             
                value: Union[int, float, str, bool]
         | 
| 53 55 | 
             
                status: HealthStatus
         | 
| @@ -55,60 +57,71 @@ class HealthMetric: | |
| 55 57 | 
             
                unit: Optional[str] = None
         | 
| 56 58 | 
             
                timestamp: float = None
         | 
| 57 59 | 
             
                message: Optional[str] = None
         | 
| 58 | 
            -
             | 
| 60 | 
            +
             | 
| 59 61 | 
             
                def __post_init__(self):
         | 
| 60 62 | 
             
                    if self.timestamp is None:
         | 
| 61 63 | 
             
                        self.timestamp = time.time()
         | 
| 62 | 
            -
             | 
| 64 | 
            +
             | 
| 63 65 | 
             
                def to_dict(self) -> Dict[str, Any]:
         | 
| 64 66 | 
             
                    """Convert metric to dictionary format."""
         | 
| 65 67 | 
             
                    result = asdict(self)
         | 
| 66 | 
            -
                    result[ | 
| 67 | 
            -
                    result[ | 
| 68 | 
            +
                    result["status"] = self.status.value
         | 
| 69 | 
            +
                    result["timestamp_iso"] = datetime.fromtimestamp(
         | 
| 70 | 
            +
                        self.timestamp, timezone.utc
         | 
| 71 | 
            +
                    ).isoformat()
         | 
| 68 72 | 
             
                    return result
         | 
| 69 73 |  | 
| 70 74 |  | 
| 71 75 | 
             
            @dataclass
         | 
| 72 76 | 
             
            class HealthCheckResult:
         | 
| 73 77 | 
             
                """Result of a health check operation."""
         | 
| 78 | 
            +
             | 
| 74 79 | 
             
                overall_status: HealthStatus
         | 
| 75 80 | 
             
                metrics: List[HealthMetric]
         | 
| 76 81 | 
             
                timestamp: float
         | 
| 77 82 | 
             
                duration_ms: float
         | 
| 78 83 | 
             
                errors: List[str]
         | 
| 79 | 
            -
             | 
| 84 | 
            +
             | 
| 80 85 | 
             
                def __post_init__(self):
         | 
| 81 | 
            -
                    if not hasattr(self,  | 
| 86 | 
            +
                    if not hasattr(self, "timestamp") or self.timestamp is None:
         | 
| 82 87 | 
             
                        self.timestamp = time.time()
         | 
| 83 | 
            -
             | 
| 88 | 
            +
             | 
| 84 89 | 
             
                def to_dict(self) -> Dict[str, Any]:
         | 
| 85 90 | 
             
                    """Convert health check result to dictionary format."""
         | 
| 86 91 | 
             
                    return {
         | 
| 87 | 
            -
                         | 
| 88 | 
            -
                         | 
| 89 | 
            -
                         | 
| 90 | 
            -
                         | 
| 91 | 
            -
             | 
| 92 | 
            -
                         | 
| 93 | 
            -
                         | 
| 94 | 
            -
                         | 
| 95 | 
            -
                         | 
| 96 | 
            -
                         | 
| 92 | 
            +
                        "overall_status": self.overall_status.value,
         | 
| 93 | 
            +
                        "metrics": [metric.to_dict() for metric in self.metrics],
         | 
| 94 | 
            +
                        "timestamp": self.timestamp,
         | 
| 95 | 
            +
                        "timestamp_iso": datetime.fromtimestamp(
         | 
| 96 | 
            +
                            self.timestamp, timezone.utc
         | 
| 97 | 
            +
                        ).isoformat(),
         | 
| 98 | 
            +
                        "duration_ms": self.duration_ms,
         | 
| 99 | 
            +
                        "errors": self.errors,
         | 
| 100 | 
            +
                        "metric_count": len(self.metrics),
         | 
| 101 | 
            +
                        "healthy_metrics": len(
         | 
| 102 | 
            +
                            [m for m in self.metrics if m.status == HealthStatus.HEALTHY]
         | 
| 103 | 
            +
                        ),
         | 
| 104 | 
            +
                        "warning_metrics": len(
         | 
| 105 | 
            +
                            [m for m in self.metrics if m.status == HealthStatus.WARNING]
         | 
| 106 | 
            +
                        ),
         | 
| 107 | 
            +
                        "critical_metrics": len(
         | 
| 108 | 
            +
                            [m for m in self.metrics if m.status == HealthStatus.CRITICAL]
         | 
| 109 | 
            +
                        ),
         | 
| 97 110 | 
             
                    }
         | 
| 98 111 |  | 
| 99 112 |  | 
| 100 113 | 
             
            class HealthChecker(ABC):
         | 
| 101 114 | 
             
                """Abstract base class for health checkers.
         | 
| 102 | 
            -
             | 
| 115 | 
            +
             | 
| 103 116 | 
             
                Health checkers implement specific monitoring logic for different aspects
         | 
| 104 117 | 
             
                of the system (process resources, network connectivity, service health, etc.).
         | 
| 105 118 | 
             
                """
         | 
| 106 | 
            -
             | 
| 119 | 
            +
             | 
| 107 120 | 
             
                @abstractmethod
         | 
| 108 121 | 
             
                def get_name(self) -> str:
         | 
| 109 122 | 
             
                    """Get the name of this health checker."""
         | 
| 110 123 | 
             
                    pass
         | 
| 111 | 
            -
             | 
| 124 | 
            +
             | 
| 112 125 | 
             
                @abstractmethod
         | 
| 113 126 | 
             
                async def check_health(self) -> List[HealthMetric]:
         | 
| 114 127 | 
             
                    """Perform health check and return metrics."""
         | 
| @@ -117,7 +130,7 @@ class HealthChecker(ABC): | |
| 117 130 |  | 
| 118 131 | 
             
            class ProcessResourceChecker(HealthChecker):
         | 
| 119 132 | 
             
                """Health checker for process resource usage.
         | 
| 120 | 
            -
             | 
| 133 | 
            +
             | 
| 121 134 | 
             
                Monitors:
         | 
| 122 135 | 
             
                - CPU usage percentage
         | 
| 123 136 | 
             
                - Memory usage (RSS, VMS)
         | 
| @@ -125,11 +138,16 @@ class ProcessResourceChecker(HealthChecker): | |
| 125 138 | 
             
                - Thread count
         | 
| 126 139 | 
             
                - Process status
         | 
| 127 140 | 
             
                """
         | 
| 128 | 
            -
             | 
| 129 | 
            -
                def __init__( | 
| 130 | 
            -
             | 
| 141 | 
            +
             | 
| 142 | 
            +
                def __init__(
         | 
| 143 | 
            +
                    self,
         | 
| 144 | 
            +
                    pid: int,
         | 
| 145 | 
            +
                    cpu_threshold: float = 80.0,
         | 
| 146 | 
            +
                    memory_threshold_mb: int = 500,
         | 
| 147 | 
            +
                    fd_threshold: int = 1000,
         | 
| 148 | 
            +
                ):
         | 
| 131 149 | 
             
                    """Initialize process resource checker.
         | 
| 132 | 
            -
             | 
| 150 | 
            +
             | 
| 133 151 | 
             
                    Args:
         | 
| 134 152 | 
             
                        pid: Process ID to monitor
         | 
| 135 153 | 
             
                        cpu_threshold: CPU usage threshold as percentage
         | 
| @@ -142,197 +160,251 @@ class ProcessResourceChecker(HealthChecker): | |
| 142 160 | 
             
                    self.fd_threshold = fd_threshold
         | 
| 143 161 | 
             
                    self.process = None
         | 
| 144 162 | 
             
                    self.logger = logging.getLogger(f"{__name__}.ProcessResourceChecker")
         | 
| 145 | 
            -
             | 
| 163 | 
            +
             | 
| 146 164 | 
             
                    if PSUTIL_AVAILABLE:
         | 
| 147 165 | 
             
                        try:
         | 
| 148 166 | 
             
                            self.process = psutil.Process(pid)
         | 
| 149 167 | 
             
                        except psutil.NoSuchProcess:
         | 
| 150 168 | 
             
                            self.logger.warning(f"Process {pid} not found for monitoring")
         | 
| 151 | 
            -
             | 
| 169 | 
            +
             | 
| 152 170 | 
             
                def get_name(self) -> str:
         | 
| 153 171 | 
             
                    return f"process_resources_{self.pid}"
         | 
| 154 | 
            -
             | 
| 172 | 
            +
             | 
| 155 173 | 
             
                async def check_health(self) -> List[HealthMetric]:
         | 
| 156 174 | 
             
                    """Check process resource usage."""
         | 
| 157 175 | 
             
                    metrics = []
         | 
| 158 | 
            -
             | 
| 176 | 
            +
             | 
| 159 177 | 
             
                    if not PSUTIL_AVAILABLE:
         | 
| 160 | 
            -
                        metrics.append( | 
| 161 | 
            -
                             | 
| 162 | 
            -
             | 
| 163 | 
            -
             | 
| 164 | 
            -
             | 
| 165 | 
            -
             | 
| 178 | 
            +
                        metrics.append(
         | 
| 179 | 
            +
                            HealthMetric(
         | 
| 180 | 
            +
                                name="psutil_availability",
         | 
| 181 | 
            +
                                value=False,
         | 
| 182 | 
            +
                                status=HealthStatus.WARNING,
         | 
| 183 | 
            +
                                message="psutil not available for enhanced monitoring",
         | 
| 184 | 
            +
                            )
         | 
| 185 | 
            +
                        )
         | 
| 166 186 | 
             
                        return metrics
         | 
| 167 | 
            -
             | 
| 187 | 
            +
             | 
| 168 188 | 
             
                    if not self.process:
         | 
| 169 | 
            -
                        metrics.append( | 
| 170 | 
            -
                             | 
| 171 | 
            -
             | 
| 172 | 
            -
             | 
| 173 | 
            -
             | 
| 174 | 
            -
             | 
| 189 | 
            +
                        metrics.append(
         | 
| 190 | 
            +
                            HealthMetric(
         | 
| 191 | 
            +
                                name="process_exists",
         | 
| 192 | 
            +
                                value=False,
         | 
| 193 | 
            +
                                status=HealthStatus.CRITICAL,
         | 
| 194 | 
            +
                                message=f"Process {self.pid} not found",
         | 
| 195 | 
            +
                            )
         | 
| 196 | 
            +
                        )
         | 
| 175 197 | 
             
                        return metrics
         | 
| 176 | 
            -
             | 
| 198 | 
            +
             | 
| 177 199 | 
             
                    try:
         | 
| 178 200 | 
             
                        # Check if process still exists
         | 
| 179 201 | 
             
                        if not self.process.is_running():
         | 
| 180 | 
            -
                            metrics.append( | 
| 181 | 
            -
                                 | 
| 182 | 
            -
             | 
| 183 | 
            -
             | 
| 184 | 
            -
             | 
| 185 | 
            -
             | 
| 202 | 
            +
                            metrics.append(
         | 
| 203 | 
            +
                                HealthMetric(
         | 
| 204 | 
            +
                                    name="process_exists",
         | 
| 205 | 
            +
                                    value=False,
         | 
| 206 | 
            +
                                    status=HealthStatus.CRITICAL,
         | 
| 207 | 
            +
                                    message=f"Process {self.pid} is no longer running",
         | 
| 208 | 
            +
                                )
         | 
| 209 | 
            +
                            )
         | 
| 186 210 | 
             
                            return metrics
         | 
| 187 | 
            -
             | 
| 211 | 
            +
             | 
| 188 212 | 
             
                        # Process status
         | 
| 189 213 | 
             
                        status = self.process.status()
         | 
| 190 | 
            -
                        process_healthy = status not in [ | 
| 191 | 
            -
             | 
| 192 | 
            -
                             | 
| 193 | 
            -
                             | 
| 194 | 
            -
             | 
| 195 | 
            -
             | 
| 196 | 
            -
             | 
| 197 | 
            -
             | 
| 214 | 
            +
                        process_healthy = status not in [
         | 
| 215 | 
            +
                            psutil.STATUS_ZOMBIE,
         | 
| 216 | 
            +
                            psutil.STATUS_DEAD,
         | 
| 217 | 
            +
                            psutil.STATUS_STOPPED,
         | 
| 218 | 
            +
                        ]
         | 
| 219 | 
            +
                        metrics.append(
         | 
| 220 | 
            +
                            HealthMetric(
         | 
| 221 | 
            +
                                name="process_status",
         | 
| 222 | 
            +
                                value=status,
         | 
| 223 | 
            +
                                status=HealthStatus.HEALTHY
         | 
| 224 | 
            +
                                if process_healthy
         | 
| 225 | 
            +
                                else HealthStatus.CRITICAL,
         | 
| 226 | 
            +
                                message=f"Process status: {status}",
         | 
| 227 | 
            +
                            )
         | 
| 228 | 
            +
                        )
         | 
| 229 | 
            +
             | 
| 198 230 | 
             
                        # CPU usage
         | 
| 199 231 | 
             
                        try:
         | 
| 200 | 
            -
                            cpu_percent = self.process.cpu_percent( | 
| 232 | 
            +
                            cpu_percent = self.process.cpu_percent(
         | 
| 233 | 
            +
                                interval=TimeoutConfig.CPU_SAMPLE_INTERVAL
         | 
| 234 | 
            +
                            )
         | 
| 201 235 | 
             
                            cpu_status = HealthStatus.HEALTHY
         | 
| 202 236 | 
             
                            if cpu_percent > self.cpu_threshold:
         | 
| 203 | 
            -
                                cpu_status =  | 
| 204 | 
            -
             | 
| 205 | 
            -
             | 
| 206 | 
            -
             | 
| 207 | 
            -
                                 | 
| 208 | 
            -
             | 
| 209 | 
            -
             | 
| 210 | 
            -
                                 | 
| 211 | 
            -
             | 
| 237 | 
            +
                                cpu_status = (
         | 
| 238 | 
            +
                                    HealthStatus.WARNING
         | 
| 239 | 
            +
                                    if cpu_percent < self.cpu_threshold * 1.2
         | 
| 240 | 
            +
                                    else HealthStatus.CRITICAL
         | 
| 241 | 
            +
                                )
         | 
| 242 | 
            +
             | 
| 243 | 
            +
                            metrics.append(
         | 
| 244 | 
            +
                                HealthMetric(
         | 
| 245 | 
            +
                                    name="cpu_usage_percent",
         | 
| 246 | 
            +
                                    value=round(cpu_percent, 2),
         | 
| 247 | 
            +
                                    status=cpu_status,
         | 
| 248 | 
            +
                                    threshold=self.cpu_threshold,
         | 
| 249 | 
            +
                                    unit="%",
         | 
| 250 | 
            +
                                )
         | 
| 251 | 
            +
                            )
         | 
| 212 252 | 
             
                        except Exception as e:
         | 
| 213 | 
            -
                            metrics.append( | 
| 214 | 
            -
                                 | 
| 215 | 
            -
             | 
| 216 | 
            -
             | 
| 217 | 
            -
             | 
| 218 | 
            -
             | 
| 219 | 
            -
             | 
| 253 | 
            +
                            metrics.append(
         | 
| 254 | 
            +
                                HealthMetric(
         | 
| 255 | 
            +
                                    name="cpu_usage_percent",
         | 
| 256 | 
            +
                                    value=-1,
         | 
| 257 | 
            +
                                    status=HealthStatus.UNKNOWN,
         | 
| 258 | 
            +
                                    message=f"Failed to get CPU usage: {e}",
         | 
| 259 | 
            +
                                )
         | 
| 260 | 
            +
                            )
         | 
| 261 | 
            +
             | 
| 220 262 | 
             
                        # Memory usage
         | 
| 221 263 | 
             
                        try:
         | 
| 222 264 | 
             
                            memory_info = self.process.memory_info()
         | 
| 223 265 | 
             
                            memory_mb = memory_info.rss / ResourceLimits.BYTES_TO_MB
         | 
| 224 266 | 
             
                            memory_status = HealthStatus.HEALTHY
         | 
| 225 267 | 
             
                            if memory_mb > self.memory_threshold_mb:
         | 
| 226 | 
            -
                                memory_status =  | 
| 227 | 
            -
             | 
| 228 | 
            -
             | 
| 229 | 
            -
             | 
| 230 | 
            -
                                 | 
| 231 | 
            -
             | 
| 232 | 
            -
             | 
| 233 | 
            -
                                 | 
| 234 | 
            -
             | 
| 235 | 
            -
             | 
| 236 | 
            -
             | 
| 237 | 
            -
             | 
| 238 | 
            -
             | 
| 239 | 
            -
                                 | 
| 240 | 
            -
             | 
| 241 | 
            -
             | 
| 268 | 
            +
                                memory_status = (
         | 
| 269 | 
            +
                                    HealthStatus.WARNING
         | 
| 270 | 
            +
                                    if memory_mb < self.memory_threshold_mb * 1.2
         | 
| 271 | 
            +
                                    else HealthStatus.CRITICAL
         | 
| 272 | 
            +
                                )
         | 
| 273 | 
            +
             | 
| 274 | 
            +
                            metrics.append(
         | 
| 275 | 
            +
                                HealthMetric(
         | 
| 276 | 
            +
                                    name="memory_usage_mb",
         | 
| 277 | 
            +
                                    value=round(memory_mb, 2),
         | 
| 278 | 
            +
                                    status=memory_status,
         | 
| 279 | 
            +
                                    threshold=self.memory_threshold_mb,
         | 
| 280 | 
            +
                                    unit="MB",
         | 
| 281 | 
            +
                                )
         | 
| 282 | 
            +
                            )
         | 
| 283 | 
            +
             | 
| 284 | 
            +
                            metrics.append(
         | 
| 285 | 
            +
                                HealthMetric(
         | 
| 286 | 
            +
                                    name="memory_vms_mb",
         | 
| 287 | 
            +
                                    value=round(memory_info.vms / ResourceLimits.BYTES_TO_MB, 2),
         | 
| 288 | 
            +
                                    status=HealthStatus.HEALTHY,
         | 
| 289 | 
            +
                                    unit="MB",
         | 
| 290 | 
            +
                                )
         | 
| 291 | 
            +
                            )
         | 
| 242 292 | 
             
                        except Exception as e:
         | 
| 243 | 
            -
                            metrics.append( | 
| 244 | 
            -
                                 | 
| 245 | 
            -
             | 
| 246 | 
            -
             | 
| 247 | 
            -
             | 
| 248 | 
            -
             | 
| 249 | 
            -
             | 
| 293 | 
            +
                            metrics.append(
         | 
| 294 | 
            +
                                HealthMetric(
         | 
| 295 | 
            +
                                    name="memory_usage_mb",
         | 
| 296 | 
            +
                                    value=-1,
         | 
| 297 | 
            +
                                    status=HealthStatus.UNKNOWN,
         | 
| 298 | 
            +
                                    message=f"Failed to get memory usage: {e}",
         | 
| 299 | 
            +
                                )
         | 
| 300 | 
            +
                            )
         | 
| 301 | 
            +
             | 
| 250 302 | 
             
                        # File descriptors (Unix only)
         | 
| 251 | 
            -
                        if hasattr(self.process,  | 
| 303 | 
            +
                        if hasattr(self.process, "num_fds"):
         | 
| 252 304 | 
             
                            try:
         | 
| 253 305 | 
             
                                fd_count = self.process.num_fds()
         | 
| 254 306 | 
             
                                fd_status = HealthStatus.HEALTHY
         | 
| 255 307 | 
             
                                if fd_count > self.fd_threshold:
         | 
| 256 | 
            -
                                    fd_status =  | 
| 257 | 
            -
             | 
| 258 | 
            -
             | 
| 259 | 
            -
             | 
| 260 | 
            -
                                     | 
| 261 | 
            -
             | 
| 262 | 
            -
             | 
| 263 | 
            -
             | 
| 308 | 
            +
                                    fd_status = (
         | 
| 309 | 
            +
                                        HealthStatus.WARNING
         | 
| 310 | 
            +
                                        if fd_count < self.fd_threshold * 1.2
         | 
| 311 | 
            +
                                        else HealthStatus.CRITICAL
         | 
| 312 | 
            +
                                    )
         | 
| 313 | 
            +
             | 
| 314 | 
            +
                                metrics.append(
         | 
| 315 | 
            +
                                    HealthMetric(
         | 
| 316 | 
            +
                                        name="file_descriptors",
         | 
| 317 | 
            +
                                        value=fd_count,
         | 
| 318 | 
            +
                                        status=fd_status,
         | 
| 319 | 
            +
                                        threshold=self.fd_threshold,
         | 
| 320 | 
            +
                                    )
         | 
| 321 | 
            +
                                )
         | 
| 264 322 | 
             
                            except Exception as e:
         | 
| 265 | 
            -
                                metrics.append( | 
| 266 | 
            -
                                     | 
| 267 | 
            -
             | 
| 268 | 
            -
             | 
| 269 | 
            -
             | 
| 270 | 
            -
             | 
| 271 | 
            -
             | 
| 323 | 
            +
                                metrics.append(
         | 
| 324 | 
            +
                                    HealthMetric(
         | 
| 325 | 
            +
                                        name="file_descriptors",
         | 
| 326 | 
            +
                                        value=-1,
         | 
| 327 | 
            +
                                        status=HealthStatus.UNKNOWN,
         | 
| 328 | 
            +
                                        message=f"Failed to get file descriptor count: {e}",
         | 
| 329 | 
            +
                                    )
         | 
| 330 | 
            +
                                )
         | 
| 331 | 
            +
             | 
| 272 332 | 
             
                        # Thread count
         | 
| 273 333 | 
             
                        try:
         | 
| 274 334 | 
             
                            thread_count = self.process.num_threads()
         | 
| 275 | 
            -
                            metrics.append( | 
| 276 | 
            -
                                 | 
| 277 | 
            -
             | 
| 278 | 
            -
             | 
| 279 | 
            -
             | 
| 335 | 
            +
                            metrics.append(
         | 
| 336 | 
            +
                                HealthMetric(
         | 
| 337 | 
            +
                                    name="thread_count",
         | 
| 338 | 
            +
                                    value=thread_count,
         | 
| 339 | 
            +
                                    status=HealthStatus.HEALTHY,
         | 
| 340 | 
            +
                                )
         | 
| 341 | 
            +
                            )
         | 
| 280 342 | 
             
                        except Exception as e:
         | 
| 281 | 
            -
                            metrics.append( | 
| 282 | 
            -
                                 | 
| 283 | 
            -
             | 
| 284 | 
            -
             | 
| 285 | 
            -
             | 
| 286 | 
            -
             | 
| 287 | 
            -
             | 
| 343 | 
            +
                            metrics.append(
         | 
| 344 | 
            +
                                HealthMetric(
         | 
| 345 | 
            +
                                    name="thread_count",
         | 
| 346 | 
            +
                                    value=-1,
         | 
| 347 | 
            +
                                    status=HealthStatus.UNKNOWN,
         | 
| 348 | 
            +
                                    message=f"Failed to get thread count: {e}",
         | 
| 349 | 
            +
                                )
         | 
| 350 | 
            +
                            )
         | 
| 351 | 
            +
             | 
| 288 352 | 
             
                        # Process create time (for validation)
         | 
| 289 353 | 
             
                        try:
         | 
| 290 354 | 
             
                            create_time = self.process.create_time()
         | 
| 291 | 
            -
                            metrics.append( | 
| 292 | 
            -
                                 | 
| 293 | 
            -
             | 
| 294 | 
            -
             | 
| 295 | 
            -
             | 
| 296 | 
            -
             | 
| 355 | 
            +
                            metrics.append(
         | 
| 356 | 
            +
                                HealthMetric(
         | 
| 357 | 
            +
                                    name="process_start_time",
         | 
| 358 | 
            +
                                    value=create_time,
         | 
| 359 | 
            +
                                    status=HealthStatus.HEALTHY,
         | 
| 360 | 
            +
                                    unit="timestamp",
         | 
| 361 | 
            +
                                )
         | 
| 362 | 
            +
                            )
         | 
| 297 363 | 
             
                        except Exception as e:
         | 
| 298 | 
            -
                            metrics.append( | 
| 299 | 
            -
                                 | 
| 300 | 
            -
             | 
| 301 | 
            -
             | 
| 302 | 
            -
             | 
| 303 | 
            -
             | 
| 304 | 
            -
             | 
| 364 | 
            +
                            metrics.append(
         | 
| 365 | 
            +
                                HealthMetric(
         | 
| 366 | 
            +
                                    name="process_start_time",
         | 
| 367 | 
            +
                                    value=-1,
         | 
| 368 | 
            +
                                    status=HealthStatus.UNKNOWN,
         | 
| 369 | 
            +
                                    message=f"Failed to get process start time: {e}",
         | 
| 370 | 
            +
                                )
         | 
| 371 | 
            +
                            )
         | 
| 372 | 
            +
             | 
| 305 373 | 
             
                    except psutil.NoSuchProcess:
         | 
| 306 | 
            -
                        metrics.append( | 
| 307 | 
            -
                             | 
| 308 | 
            -
             | 
| 309 | 
            -
             | 
| 310 | 
            -
             | 
| 311 | 
            -
             | 
| 374 | 
            +
                        metrics.append(
         | 
| 375 | 
            +
                            HealthMetric(
         | 
| 376 | 
            +
                                name="process_exists",
         | 
| 377 | 
            +
                                value=False,
         | 
| 378 | 
            +
                                status=HealthStatus.CRITICAL,
         | 
| 379 | 
            +
                                message=f"Process {self.pid} no longer exists",
         | 
| 380 | 
            +
                            )
         | 
| 381 | 
            +
                        )
         | 
| 312 382 | 
             
                    except Exception as e:
         | 
| 313 383 | 
             
                        self.logger.error(f"Error checking process health: {e}")
         | 
| 314 | 
            -
                        metrics.append( | 
| 315 | 
            -
                             | 
| 316 | 
            -
             | 
| 317 | 
            -
             | 
| 318 | 
            -
             | 
| 319 | 
            -
             | 
| 320 | 
            -
             | 
| 384 | 
            +
                        metrics.append(
         | 
| 385 | 
            +
                            HealthMetric(
         | 
| 386 | 
            +
                                name="process_check_error",
         | 
| 387 | 
            +
                                value=str(e),
         | 
| 388 | 
            +
                                status=HealthStatus.UNKNOWN,
         | 
| 389 | 
            +
                                message=f"Unexpected error during process health check: {e}",
         | 
| 390 | 
            +
                            )
         | 
| 391 | 
            +
                        )
         | 
| 392 | 
            +
             | 
| 321 393 | 
             
                    return metrics
         | 
| 322 394 |  | 
| 323 395 |  | 
| 324 396 | 
             
            class NetworkConnectivityChecker(HealthChecker):
         | 
| 325 397 | 
             
                """Health checker for network connectivity.
         | 
| 326 | 
            -
             | 
| 398 | 
            +
             | 
| 327 399 | 
             
                Monitors:
         | 
| 328 400 | 
             
                - Port availability and binding status
         | 
| 329 401 | 
             
                - Socket connection health
         | 
| 330 402 | 
             
                - Network interface status
         | 
| 331 403 | 
             
                """
         | 
| 332 | 
            -
             | 
| 404 | 
            +
             | 
| 333 405 | 
             
                def __init__(self, host: str, port: int, timeout: float = 1.0):
         | 
| 334 406 | 
             
                    """Initialize network connectivity checker.
         | 
| 335 | 
            -
             | 
| 407 | 
            +
             | 
| 336 408 | 
             
                    Args:
         | 
| 337 409 | 
             
                        host: Host address to check
         | 
| 338 410 | 
             
                        port: Port number to check
         | 
| @@ -342,14 +414,14 @@ class NetworkConnectivityChecker(HealthChecker): | |
| 342 414 | 
             
                    self.port = port
         | 
| 343 415 | 
             
                    self.timeout = timeout
         | 
| 344 416 | 
             
                    self.logger = logging.getLogger(f"{__name__}.NetworkConnectivityChecker")
         | 
| 345 | 
            -
             | 
| 417 | 
            +
             | 
| 346 418 | 
             
                def get_name(self) -> str:
         | 
| 347 419 | 
             
                    return f"network_connectivity_{self.host}_{self.port}"
         | 
| 348 | 
            -
             | 
| 420 | 
            +
             | 
| 349 421 | 
             
                async def check_health(self) -> List[HealthMetric]:
         | 
| 350 422 | 
             
                    """Check network connectivity."""
         | 
| 351 423 | 
             
                    metrics = []
         | 
| 352 | 
            -
             | 
| 424 | 
            +
             | 
| 353 425 | 
             
                    # Check port binding
         | 
| 354 426 | 
             
                    try:
         | 
| 355 427 | 
             
                        # Try to connect to the port
         | 
| @@ -357,64 +429,78 @@ class NetworkConnectivityChecker(HealthChecker): | |
| 357 429 | 
             
                        sock.settimeout(self.timeout)
         | 
| 358 430 | 
             
                        result = sock.connect_ex((self.host, self.port))
         | 
| 359 431 | 
             
                        sock.close()
         | 
| 360 | 
            -
             | 
| 432 | 
            +
             | 
| 361 433 | 
             
                        if result == 0:
         | 
| 362 | 
            -
                            metrics.append( | 
| 363 | 
            -
                                 | 
| 364 | 
            -
             | 
| 365 | 
            -
             | 
| 366 | 
            -
             | 
| 367 | 
            -
             | 
| 434 | 
            +
                            metrics.append(
         | 
| 435 | 
            +
                                HealthMetric(
         | 
| 436 | 
            +
                                    name="port_accessible",
         | 
| 437 | 
            +
                                    value=True,
         | 
| 438 | 
            +
                                    status=HealthStatus.HEALTHY,
         | 
| 439 | 
            +
                                    message=f"Port {self.port} is accessible on {self.host}",
         | 
| 440 | 
            +
                                )
         | 
| 441 | 
            +
                            )
         | 
| 368 442 | 
             
                        else:
         | 
| 369 | 
            -
                            metrics.append( | 
| 443 | 
            +
                            metrics.append(
         | 
| 444 | 
            +
                                HealthMetric(
         | 
| 445 | 
            +
                                    name="port_accessible",
         | 
| 446 | 
            +
                                    value=False,
         | 
| 447 | 
            +
                                    status=HealthStatus.CRITICAL,
         | 
| 448 | 
            +
                                    message=f"Port {self.port} is not accessible on {self.host}",
         | 
| 449 | 
            +
                                )
         | 
| 450 | 
            +
                            )
         | 
| 451 | 
            +
                    except Exception as e:
         | 
| 452 | 
            +
                        metrics.append(
         | 
| 453 | 
            +
                            HealthMetric(
         | 
| 370 454 | 
             
                                name="port_accessible",
         | 
| 371 455 | 
             
                                value=False,
         | 
| 372 | 
            -
                                status=HealthStatus. | 
| 373 | 
            -
                                message=f" | 
| 374 | 
            -
                            ) | 
| 375 | 
            -
             | 
| 376 | 
            -
             | 
| 377 | 
            -
                            name="port_accessible",
         | 
| 378 | 
            -
                            value=False,
         | 
| 379 | 
            -
                            status=HealthStatus.UNKNOWN,
         | 
| 380 | 
            -
                            message=f"Error checking port accessibility: {e}"
         | 
| 381 | 
            -
                        ))
         | 
| 382 | 
            -
                    
         | 
| 456 | 
            +
                                status=HealthStatus.UNKNOWN,
         | 
| 457 | 
            +
                                message=f"Error checking port accessibility: {e}",
         | 
| 458 | 
            +
                            )
         | 
| 459 | 
            +
                        )
         | 
| 460 | 
            +
             | 
| 383 461 | 
             
                    # Check if we can create a socket (resource availability)
         | 
| 384 462 | 
             
                    try:
         | 
| 385 463 | 
             
                        test_sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
         | 
| 386 464 | 
             
                        test_sock.close()
         | 
| 387 | 
            -
                        metrics.append( | 
| 388 | 
            -
                             | 
| 389 | 
            -
             | 
| 390 | 
            -
             | 
| 391 | 
            -
             | 
| 392 | 
            -
             | 
| 465 | 
            +
                        metrics.append(
         | 
| 466 | 
            +
                            HealthMetric(
         | 
| 467 | 
            +
                                name="socket_creation",
         | 
| 468 | 
            +
                                value=True,
         | 
| 469 | 
            +
                                status=HealthStatus.HEALTHY,
         | 
| 470 | 
            +
                                message="Socket creation successful",
         | 
| 471 | 
            +
                            )
         | 
| 472 | 
            +
                        )
         | 
| 393 473 | 
             
                    except Exception as e:
         | 
| 394 | 
            -
                        metrics.append( | 
| 395 | 
            -
                             | 
| 396 | 
            -
             | 
| 397 | 
            -
             | 
| 398 | 
            -
             | 
| 399 | 
            -
             | 
| 400 | 
            -
             | 
| 474 | 
            +
                        metrics.append(
         | 
| 475 | 
            +
                            HealthMetric(
         | 
| 476 | 
            +
                                name="socket_creation",
         | 
| 477 | 
            +
                                value=False,
         | 
| 478 | 
            +
                                status=HealthStatus.CRITICAL,
         | 
| 479 | 
            +
                                message=f"Failed to create socket: {e}",
         | 
| 480 | 
            +
                            )
         | 
| 481 | 
            +
                        )
         | 
| 482 | 
            +
             | 
| 401 483 | 
             
                    return metrics
         | 
| 402 484 |  | 
| 403 485 |  | 
| 404 486 | 
             
            class ServiceHealthChecker(HealthChecker):
         | 
| 405 487 | 
             
                """Health checker for service-specific metrics.
         | 
| 406 | 
            -
             | 
| 488 | 
            +
             | 
| 407 489 | 
             
                Monitors:
         | 
| 408 490 | 
             
                - Connected clients count
         | 
| 409 491 | 
             
                - Event processing rate
         | 
| 410 492 | 
             
                - Error rates
         | 
| 411 493 | 
             
                - Response times
         | 
| 412 494 | 
             
                """
         | 
| 413 | 
            -
             | 
| 414 | 
            -
                def __init__( | 
| 415 | 
            -
             | 
| 495 | 
            +
             | 
| 496 | 
            +
                def __init__(
         | 
| 497 | 
            +
                    self,
         | 
| 498 | 
            +
                    service_stats: Dict[str, Any],
         | 
| 499 | 
            +
                    max_clients: int = 1000,
         | 
| 500 | 
            +
                    max_error_rate: float = 0.1,
         | 
| 501 | 
            +
                ):
         | 
| 416 502 | 
             
                    """Initialize service health checker.
         | 
| 417 | 
            -
             | 
| 503 | 
            +
             | 
| 418 504 | 
             
                    Args:
         | 
| 419 505 | 
             
                        service_stats: Reference to service statistics dictionary
         | 
| 420 506 | 
             
                        max_clients: Maximum allowed connected clients
         | 
| @@ -426,15 +512,15 @@ class ServiceHealthChecker(HealthChecker): | |
| 426 512 | 
             
                    self.last_check_time = time.time()
         | 
| 427 513 | 
             
                    self.last_events_processed = 0
         | 
| 428 514 | 
             
                    self.logger = logging.getLogger(f"{__name__}.ServiceHealthChecker")
         | 
| 429 | 
            -
             | 
| 515 | 
            +
             | 
| 430 516 | 
             
                def get_name(self) -> str:
         | 
| 431 517 | 
             
                    return "service_health"
         | 
| 432 | 
            -
             | 
| 518 | 
            +
             | 
| 433 519 | 
             
                async def check_health(self) -> List[HealthMetric]:
         | 
| 434 520 | 
             
                    """Check service-specific health metrics."""
         | 
| 435 521 | 
             
                    metrics = []
         | 
| 436 522 | 
             
                    current_time = time.time()
         | 
| 437 | 
            -
             | 
| 523 | 
            +
             | 
| 438 524 | 
             
                    # Connected clients
         | 
| 439 525 | 
             
                    try:
         | 
| 440 526 | 
             
                        client_count = self.service_stats.get("clients_connected", 0)
         | 
| @@ -443,84 +529,104 @@ class ServiceHealthChecker(HealthChecker): | |
| 443 529 | 
             
                            client_status = HealthStatus.WARNING
         | 
| 444 530 | 
             
                        if client_count > self.max_clients:
         | 
| 445 531 | 
             
                            client_status = HealthStatus.CRITICAL
         | 
| 446 | 
            -
             | 
| 447 | 
            -
                        metrics.append( | 
| 448 | 
            -
                             | 
| 449 | 
            -
             | 
| 450 | 
            -
             | 
| 451 | 
            -
             | 
| 452 | 
            -
             | 
| 532 | 
            +
             | 
| 533 | 
            +
                        metrics.append(
         | 
| 534 | 
            +
                            HealthMetric(
         | 
| 535 | 
            +
                                name="connected_clients",
         | 
| 536 | 
            +
                                value=client_count,
         | 
| 537 | 
            +
                                status=client_status,
         | 
| 538 | 
            +
                                threshold=self.max_clients,
         | 
| 539 | 
            +
                            )
         | 
| 540 | 
            +
                        )
         | 
| 453 541 | 
             
                    except Exception as e:
         | 
| 454 | 
            -
                        metrics.append( | 
| 455 | 
            -
                             | 
| 456 | 
            -
             | 
| 457 | 
            -
             | 
| 458 | 
            -
             | 
| 459 | 
            -
             | 
| 460 | 
            -
             | 
| 542 | 
            +
                        metrics.append(
         | 
| 543 | 
            +
                            HealthMetric(
         | 
| 544 | 
            +
                                name="connected_clients",
         | 
| 545 | 
            +
                                value=-1,
         | 
| 546 | 
            +
                                status=HealthStatus.UNKNOWN,
         | 
| 547 | 
            +
                                message=f"Failed to get client count: {e}",
         | 
| 548 | 
            +
                            )
         | 
| 549 | 
            +
                        )
         | 
| 550 | 
            +
             | 
| 461 551 | 
             
                    # Event processing rate
         | 
| 462 552 | 
             
                    try:
         | 
| 463 553 | 
             
                        events_processed = self.service_stats.get("events_processed", 0)
         | 
| 464 554 | 
             
                        time_diff = current_time - self.last_check_time
         | 
| 465 | 
            -
             | 
| 555 | 
            +
             | 
| 466 556 | 
             
                        if time_diff > 0 and self.last_events_processed > 0:
         | 
| 467 557 | 
             
                            event_rate = (events_processed - self.last_events_processed) / time_diff
         | 
| 468 | 
            -
                            metrics.append( | 
| 469 | 
            -
                                 | 
| 470 | 
            -
             | 
| 471 | 
            -
             | 
| 472 | 
            -
             | 
| 473 | 
            -
             | 
| 474 | 
            -
             | 
| 558 | 
            +
                            metrics.append(
         | 
| 559 | 
            +
                                HealthMetric(
         | 
| 560 | 
            +
                                    name="event_processing_rate",
         | 
| 561 | 
            +
                                    value=round(event_rate, 2),
         | 
| 562 | 
            +
                                    status=HealthStatus.HEALTHY,
         | 
| 563 | 
            +
                                    unit="events/sec",
         | 
| 564 | 
            +
                                )
         | 
| 565 | 
            +
                            )
         | 
| 566 | 
            +
             | 
| 475 567 | 
             
                        self.last_events_processed = events_processed
         | 
| 476 | 
            -
             | 
| 568 | 
            +
             | 
| 477 569 | 
             
                        # Total events processed
         | 
| 478 | 
            -
                        metrics.append( | 
| 479 | 
            -
                             | 
| 480 | 
            -
             | 
| 481 | 
            -
             | 
| 482 | 
            -
             | 
| 570 | 
            +
                        metrics.append(
         | 
| 571 | 
            +
                            HealthMetric(
         | 
| 572 | 
            +
                                name="total_events_processed",
         | 
| 573 | 
            +
                                value=events_processed,
         | 
| 574 | 
            +
                                status=HealthStatus.HEALTHY,
         | 
| 575 | 
            +
                            )
         | 
| 576 | 
            +
                        )
         | 
| 483 577 | 
             
                    except Exception as e:
         | 
| 484 | 
            -
                        metrics.append( | 
| 485 | 
            -
                             | 
| 486 | 
            -
             | 
| 487 | 
            -
             | 
| 488 | 
            -
             | 
| 489 | 
            -
             | 
| 490 | 
            -
             | 
| 578 | 
            +
                        metrics.append(
         | 
| 579 | 
            +
                            HealthMetric(
         | 
| 580 | 
            +
                                name="event_processing_rate",
         | 
| 581 | 
            +
                                value=-1,
         | 
| 582 | 
            +
                                status=HealthStatus.UNKNOWN,
         | 
| 583 | 
            +
                                message=f"Failed to calculate event rate: {e}",
         | 
| 584 | 
            +
                            )
         | 
| 585 | 
            +
                        )
         | 
| 586 | 
            +
             | 
| 491 587 | 
             
                    # Error rate
         | 
| 492 588 | 
             
                    try:
         | 
| 493 589 | 
             
                        errors = self.service_stats.get("errors", 0)
         | 
| 494 | 
            -
                        total_events = self.service_stats.get( | 
| 590 | 
            +
                        total_events = self.service_stats.get(
         | 
| 591 | 
            +
                            "events_processed", 1
         | 
| 592 | 
            +
                        )  # Avoid division by zero
         | 
| 495 593 | 
             
                        error_rate = errors / max(total_events, 1)
         | 
| 496 | 
            -
             | 
| 594 | 
            +
             | 
| 497 595 | 
             
                        error_status = HealthStatus.HEALTHY
         | 
| 498 596 | 
             
                        if error_rate > self.max_error_rate * 0.5:
         | 
| 499 597 | 
             
                            error_status = HealthStatus.WARNING
         | 
| 500 598 | 
             
                        if error_rate > self.max_error_rate:
         | 
| 501 599 | 
             
                            error_status = HealthStatus.CRITICAL
         | 
| 502 | 
            -
             | 
| 503 | 
            -
                        metrics.append( | 
| 504 | 
            -
                             | 
| 505 | 
            -
             | 
| 506 | 
            -
             | 
| 507 | 
            -
             | 
| 508 | 
            -
             | 
| 509 | 
            -
             | 
| 510 | 
            -
             | 
| 511 | 
            -
                         | 
| 512 | 
            -
             | 
| 513 | 
            -
             | 
| 514 | 
            -
                             | 
| 515 | 
            -
             | 
| 600 | 
            +
             | 
| 601 | 
            +
                        metrics.append(
         | 
| 602 | 
            +
                            HealthMetric(
         | 
| 603 | 
            +
                                name="error_rate",
         | 
| 604 | 
            +
                                value=round(error_rate, 4),
         | 
| 605 | 
            +
                                status=error_status,
         | 
| 606 | 
            +
                                threshold=self.max_error_rate,
         | 
| 607 | 
            +
                                unit="ratio",
         | 
| 608 | 
            +
                            )
         | 
| 609 | 
            +
                        )
         | 
| 610 | 
            +
             | 
| 611 | 
            +
                        metrics.append(
         | 
| 612 | 
            +
                            HealthMetric(
         | 
| 613 | 
            +
                                name="total_errors",
         | 
| 614 | 
            +
                                value=errors,
         | 
| 615 | 
            +
                                status=HealthStatus.HEALTHY
         | 
| 616 | 
            +
                                if errors == 0
         | 
| 617 | 
            +
                                else HealthStatus.WARNING,
         | 
| 618 | 
            +
                            )
         | 
| 619 | 
            +
                        )
         | 
| 516 620 | 
             
                    except Exception as e:
         | 
| 517 | 
            -
                        metrics.append( | 
| 518 | 
            -
                             | 
| 519 | 
            -
             | 
| 520 | 
            -
             | 
| 521 | 
            -
             | 
| 522 | 
            -
             | 
| 523 | 
            -
             | 
| 621 | 
            +
                        metrics.append(
         | 
| 622 | 
            +
                            HealthMetric(
         | 
| 623 | 
            +
                                name="error_rate",
         | 
| 624 | 
            +
                                value=-1,
         | 
| 625 | 
            +
                                status=HealthStatus.UNKNOWN,
         | 
| 626 | 
            +
                                message=f"Failed to calculate error rate: {e}",
         | 
| 627 | 
            +
                            )
         | 
| 628 | 
            +
                        )
         | 
| 629 | 
            +
             | 
| 524 630 | 
             
                    # Last activity timestamp
         | 
| 525 631 | 
             
                    try:
         | 
| 526 632 | 
             
                        last_activity = self.service_stats.get("last_activity")
         | 
| @@ -529,56 +635,66 @@ class ServiceHealthChecker(HealthChecker): | |
| 529 635 | 
             
                            if isinstance(last_activity, str):
         | 
| 530 636 | 
             
                                try:
         | 
| 531 637 | 
             
                                    from dateutil.parser import parse
         | 
| 638 | 
            +
             | 
| 532 639 | 
             
                                    last_activity_dt = parse(last_activity)
         | 
| 533 640 | 
             
                                    last_activity_timestamp = last_activity_dt.timestamp()
         | 
| 534 641 | 
             
                                except ImportError:
         | 
| 535 642 | 
             
                                    # Fallback: try to parse ISO format manually
         | 
| 536 643 | 
             
                                    try:
         | 
| 537 644 | 
             
                                        from datetime import datetime
         | 
| 538 | 
            -
             | 
| 539 | 
            -
                                         | 
| 645 | 
            +
             | 
| 646 | 
            +
                                        clean_timestamp = last_activity.rstrip("Z")
         | 
| 647 | 
            +
                                        last_activity_dt = datetime.fromisoformat(
         | 
| 648 | 
            +
                                            clean_timestamp.replace("T", " ")
         | 
| 649 | 
            +
                                        )
         | 
| 540 650 | 
             
                                        last_activity_timestamp = last_activity_dt.timestamp()
         | 
| 541 651 | 
             
                                    except Exception:
         | 
| 542 652 | 
             
                                        # Final fallback: treat as current time
         | 
| 543 653 | 
             
                                        last_activity_timestamp = current_time
         | 
| 544 654 | 
             
                            else:
         | 
| 545 655 | 
             
                                last_activity_timestamp = float(last_activity)
         | 
| 546 | 
            -
             | 
| 656 | 
            +
             | 
| 547 657 | 
             
                            time_since_activity = current_time - last_activity_timestamp
         | 
| 548 658 | 
             
                            activity_status = HealthStatus.HEALTHY
         | 
| 549 659 | 
             
                            if time_since_activity > 300:  # 5 minutes
         | 
| 550 660 | 
             
                                activity_status = HealthStatus.WARNING
         | 
| 551 661 | 
             
                            if time_since_activity > 1800:  # 30 minutes
         | 
| 552 662 | 
             
                                activity_status = HealthStatus.CRITICAL
         | 
| 553 | 
            -
             | 
| 554 | 
            -
                            metrics.append( | 
| 555 | 
            -
                                 | 
| 556 | 
            -
             | 
| 557 | 
            -
             | 
| 558 | 
            -
             | 
| 559 | 
            -
             | 
| 663 | 
            +
             | 
| 664 | 
            +
                            metrics.append(
         | 
| 665 | 
            +
                                HealthMetric(
         | 
| 666 | 
            +
                                    name="time_since_last_activity",
         | 
| 667 | 
            +
                                    value=round(time_since_activity, 2),
         | 
| 668 | 
            +
                                    status=activity_status,
         | 
| 669 | 
            +
                                    unit="seconds",
         | 
| 670 | 
            +
                                )
         | 
| 671 | 
            +
                            )
         | 
| 560 672 | 
             
                        else:
         | 
| 561 | 
            -
                            metrics.append( | 
| 673 | 
            +
                            metrics.append(
         | 
| 674 | 
            +
                                HealthMetric(
         | 
| 675 | 
            +
                                    name="time_since_last_activity",
         | 
| 676 | 
            +
                                    value=-1,
         | 
| 677 | 
            +
                                    status=HealthStatus.WARNING,
         | 
| 678 | 
            +
                                    message="No last activity recorded",
         | 
| 679 | 
            +
                                )
         | 
| 680 | 
            +
                            )
         | 
| 681 | 
            +
                    except Exception as e:
         | 
| 682 | 
            +
                        metrics.append(
         | 
| 683 | 
            +
                            HealthMetric(
         | 
| 562 684 | 
             
                                name="time_since_last_activity",
         | 
| 563 685 | 
             
                                value=-1,
         | 
| 564 | 
            -
                                status=HealthStatus. | 
| 565 | 
            -
                                message=" | 
| 566 | 
            -
                            ) | 
| 567 | 
            -
             | 
| 568 | 
            -
             | 
| 569 | 
            -
                            name="time_since_last_activity",
         | 
| 570 | 
            -
                            value=-1,
         | 
| 571 | 
            -
                            status=HealthStatus.UNKNOWN,
         | 
| 572 | 
            -
                            message=f"Failed to parse last activity: {e}"
         | 
| 573 | 
            -
                        ))
         | 
| 574 | 
            -
                    
         | 
| 686 | 
            +
                                status=HealthStatus.UNKNOWN,
         | 
| 687 | 
            +
                                message=f"Failed to parse last activity: {e}",
         | 
| 688 | 
            +
                            )
         | 
| 689 | 
            +
                        )
         | 
| 690 | 
            +
             | 
| 575 691 | 
             
                    self.last_check_time = current_time
         | 
| 576 692 | 
             
                    return metrics
         | 
| 577 693 |  | 
| 578 694 |  | 
| 579 695 | 
             
            class AdvancedHealthMonitor:
         | 
| 580 696 | 
             
                """Advanced health monitoring system with configurable checks and thresholds.
         | 
| 581 | 
            -
             | 
| 697 | 
            +
             | 
| 582 698 | 
             
                Provides comprehensive health monitoring including:
         | 
| 583 699 | 
             
                - Multiple health checker integration
         | 
| 584 700 | 
             
                - Configurable check intervals and thresholds
         | 
| @@ -586,91 +702,99 @@ class AdvancedHealthMonitor: | |
| 586 702 | 
             
                - Status aggregation and reporting
         | 
| 587 703 | 
             
                - Integration with recovery systems
         | 
| 588 704 | 
             
                """
         | 
| 589 | 
            -
             | 
| 705 | 
            +
             | 
| 590 706 | 
             
                def __init__(self, config: Optional[Dict[str, Any]] = None):
         | 
| 591 707 | 
             
                    """Initialize advanced health monitor.
         | 
| 592 | 
            -
             | 
| 708 | 
            +
             | 
| 593 709 | 
             
                    Args:
         | 
| 594 710 | 
             
                        config: Configuration dictionary for health monitoring
         | 
| 595 711 | 
             
                    """
         | 
| 596 712 | 
             
                    self.config = config or {}
         | 
| 597 713 | 
             
                    self.logger = logging.getLogger(f"{__name__}.AdvancedHealthMonitor")
         | 
| 598 | 
            -
             | 
| 714 | 
            +
             | 
| 599 715 | 
             
                    # Configuration with defaults
         | 
| 600 | 
            -
                    self.check_interval = self.config.get( | 
| 601 | 
            -
                    self.history_size = self.config.get( | 
| 602 | 
            -
                    self.aggregation_window = self.config.get( | 
| 603 | 
            -
             | 
| 716 | 
            +
                    self.check_interval = self.config.get("check_interval", 30)
         | 
| 717 | 
            +
                    self.history_size = self.config.get("history_size", 100)
         | 
| 718 | 
            +
                    self.aggregation_window = self.config.get(
         | 
| 719 | 
            +
                        "aggregation_window", 300
         | 
| 720 | 
            +
                    )  # 5 minutes
         | 
| 721 | 
            +
             | 
| 604 722 | 
             
                    # Health checkers
         | 
| 605 723 | 
             
                    self.checkers: List[HealthChecker] = []
         | 
| 606 | 
            -
             | 
| 724 | 
            +
             | 
| 607 725 | 
             
                    # Health history
         | 
| 608 726 | 
             
                    self.health_history: deque = deque(maxlen=self.history_size)
         | 
| 609 | 
            -
             | 
| 727 | 
            +
             | 
| 610 728 | 
             
                    # Monitoring state
         | 
| 611 729 | 
             
                    self.monitoring = False
         | 
| 612 730 | 
             
                    self.monitor_task: Optional[asyncio.Task] = None
         | 
| 613 731 | 
             
                    self.last_check_result: Optional[HealthCheckResult] = None
         | 
| 614 | 
            -
             | 
| 732 | 
            +
             | 
| 615 733 | 
             
                    # Health callbacks for recovery integration
         | 
| 616 734 | 
             
                    self.health_callbacks: List[Callable[[HealthCheckResult], None]] = []
         | 
| 617 | 
            -
             | 
| 735 | 
            +
             | 
| 618 736 | 
             
                    # Initialize metrics
         | 
| 619 737 | 
             
                    self.monitoring_stats = {
         | 
| 620 | 
            -
                         | 
| 621 | 
            -
                         | 
| 622 | 
            -
                         | 
| 623 | 
            -
                         | 
| 738 | 
            +
                        "checks_performed": 0,
         | 
| 739 | 
            +
                        "checks_failed": 0,
         | 
| 740 | 
            +
                        "average_check_duration_ms": 0,
         | 
| 741 | 
            +
                        "last_check_timestamp": None,
         | 
| 624 742 | 
             
                    }
         | 
| 625 | 
            -
             | 
| 743 | 
            +
             | 
| 626 744 | 
             
                    self.logger.info("Advanced health monitor initialized")
         | 
| 627 | 
            -
             | 
| 745 | 
            +
             | 
| 628 746 | 
             
                def add_checker(self, checker: HealthChecker) -> None:
         | 
| 629 747 | 
             
                    """Add a health checker to the monitoring system."""
         | 
| 630 748 | 
             
                    self.checkers.append(checker)
         | 
| 631 749 | 
             
                    self.logger.info(f"Added health checker: {checker.get_name()}")
         | 
| 632 | 
            -
             | 
| 633 | 
            -
                def add_health_callback( | 
| 750 | 
            +
             | 
| 751 | 
            +
                def add_health_callback(
         | 
| 752 | 
            +
                    self, callback: Callable[[HealthCheckResult], None]
         | 
| 753 | 
            +
                ) -> None:
         | 
| 634 754 | 
             
                    """Add a callback to be called when health checks complete.
         | 
| 635 | 
            -
             | 
| 755 | 
            +
             | 
| 636 756 | 
             
                    Args:
         | 
| 637 757 | 
             
                        callback: Function to call with HealthCheckResult
         | 
| 638 758 | 
             
                    """
         | 
| 639 759 | 
             
                    self.health_callbacks.append(callback)
         | 
| 640 760 | 
             
                    self.logger.debug(f"Added health callback: {callback.__name__}")
         | 
| 641 | 
            -
             | 
| 761 | 
            +
             | 
| 642 762 | 
             
                async def perform_health_check(self) -> HealthCheckResult:
         | 
| 643 763 | 
             
                    """Perform comprehensive health check using all registered checkers."""
         | 
| 644 764 | 
             
                    start_time = time.time()
         | 
| 645 765 | 
             
                    all_metrics = []
         | 
| 646 766 | 
             
                    errors = []
         | 
| 647 | 
            -
             | 
| 767 | 
            +
             | 
| 648 768 | 
             
                    # Run all health checkers
         | 
| 649 769 | 
             
                    for checker in self.checkers:
         | 
| 650 770 | 
             
                        try:
         | 
| 651 771 | 
             
                            checker_start = time.time()
         | 
| 652 772 | 
             
                            metrics = await checker.check_health()
         | 
| 653 773 | 
             
                            checker_duration = (time.time() - checker_start) * 1000
         | 
| 654 | 
            -
             | 
| 774 | 
            +
             | 
| 655 775 | 
             
                            all_metrics.extend(metrics)
         | 
| 656 | 
            -
                            self.logger.debug( | 
| 657 | 
            -
             | 
| 776 | 
            +
                            self.logger.debug(
         | 
| 777 | 
            +
                                f"Health checker {checker.get_name()} completed in {checker_duration:.2f}ms"
         | 
| 778 | 
            +
                            )
         | 
| 779 | 
            +
             | 
| 658 780 | 
             
                        except Exception as e:
         | 
| 659 781 | 
             
                            error_msg = f"Health checker {checker.get_name()} failed: {e}"
         | 
| 660 782 | 
             
                            errors.append(error_msg)
         | 
| 661 783 | 
             
                            self.logger.error(error_msg)
         | 
| 662 | 
            -
             | 
| 784 | 
            +
             | 
| 663 785 | 
             
                            # Add error metric
         | 
| 664 | 
            -
                            all_metrics.append( | 
| 665 | 
            -
                                 | 
| 666 | 
            -
             | 
| 667 | 
            -
             | 
| 668 | 
            -
             | 
| 669 | 
            -
             | 
| 670 | 
            -
             | 
| 786 | 
            +
                            all_metrics.append(
         | 
| 787 | 
            +
                                HealthMetric(
         | 
| 788 | 
            +
                                    name=f"{checker.get_name()}_error",
         | 
| 789 | 
            +
                                    value=str(e),
         | 
| 790 | 
            +
                                    status=HealthStatus.UNKNOWN,
         | 
| 791 | 
            +
                                    message=error_msg,
         | 
| 792 | 
            +
                                )
         | 
| 793 | 
            +
                            )
         | 
| 794 | 
            +
             | 
| 671 795 | 
             
                    # Determine overall status
         | 
| 672 796 | 
             
                    overall_status = self._determine_overall_status(all_metrics)
         | 
| 673 | 
            -
             | 
| 797 | 
            +
             | 
| 674 798 | 
             
                    # Create result
         | 
| 675 799 | 
             
                    duration_ms = (time.time() - start_time) * 1000
         | 
| 676 800 | 
             
                    result = HealthCheckResult(
         | 
| @@ -678,87 +802,91 @@ class AdvancedHealthMonitor: | |
| 678 802 | 
             
                        metrics=all_metrics,
         | 
| 679 803 | 
             
                        timestamp=start_time,
         | 
| 680 804 | 
             
                        duration_ms=duration_ms,
         | 
| 681 | 
            -
                        errors=errors
         | 
| 805 | 
            +
                        errors=errors,
         | 
| 682 806 | 
             
                    )
         | 
| 683 | 
            -
             | 
| 807 | 
            +
             | 
| 684 808 | 
             
                    # Update statistics
         | 
| 685 | 
            -
                    self.monitoring_stats[ | 
| 809 | 
            +
                    self.monitoring_stats["checks_performed"] += 1
         | 
| 686 810 | 
             
                    if errors:
         | 
| 687 | 
            -
                        self.monitoring_stats[ | 
| 688 | 
            -
             | 
| 811 | 
            +
                        self.monitoring_stats["checks_failed"] += 1
         | 
| 812 | 
            +
             | 
| 689 813 | 
             
                    # Update average duration
         | 
| 690 | 
            -
                    current_avg = self.monitoring_stats[ | 
| 691 | 
            -
                    checks_count = self.monitoring_stats[ | 
| 692 | 
            -
                    self.monitoring_stats[ | 
| 693 | 
            -
                         | 
| 694 | 
            -
                    )
         | 
| 695 | 
            -
                    self.monitoring_stats[ | 
| 696 | 
            -
             | 
| 814 | 
            +
                    current_avg = self.monitoring_stats["average_check_duration_ms"]
         | 
| 815 | 
            +
                    checks_count = self.monitoring_stats["checks_performed"]
         | 
| 816 | 
            +
                    self.monitoring_stats["average_check_duration_ms"] = (
         | 
| 817 | 
            +
                        current_avg * (checks_count - 1) + duration_ms
         | 
| 818 | 
            +
                    ) / checks_count
         | 
| 819 | 
            +
                    self.monitoring_stats["last_check_timestamp"] = time.time()
         | 
| 820 | 
            +
             | 
| 697 821 | 
             
                    # Store in history
         | 
| 698 822 | 
             
                    self.health_history.append(result)
         | 
| 699 823 | 
             
                    self.last_check_result = result
         | 
| 700 | 
            -
             | 
| 824 | 
            +
             | 
| 701 825 | 
             
                    # Notify callbacks
         | 
| 702 826 | 
             
                    for callback in self.health_callbacks:
         | 
| 703 827 | 
             
                        try:
         | 
| 704 828 | 
             
                            callback(result)
         | 
| 705 829 | 
             
                        except Exception as e:
         | 
| 706 830 | 
             
                            self.logger.error(f"Health callback {callback.__name__} failed: {e}")
         | 
| 707 | 
            -
             | 
| 708 | 
            -
                    self.logger.debug( | 
| 709 | 
            -
             | 
| 710 | 
            -
             | 
| 711 | 
            -
             | 
| 831 | 
            +
             | 
| 832 | 
            +
                    self.logger.debug(
         | 
| 833 | 
            +
                        f"Health check completed: {overall_status.value} "
         | 
| 834 | 
            +
                        f"({len(all_metrics)} metrics, {len(errors)} errors, "
         | 
| 835 | 
            +
                        f"{duration_ms:.2f}ms)"
         | 
| 836 | 
            +
                    )
         | 
| 837 | 
            +
             | 
| 712 838 | 
             
                    return result
         | 
| 713 | 
            -
             | 
| 839 | 
            +
             | 
| 714 840 | 
             
                def _determine_overall_status(self, metrics: List[HealthMetric]) -> HealthStatus:
         | 
| 715 841 | 
             
                    """Determine overall health status from individual metrics."""
         | 
| 716 842 | 
             
                    if not metrics:
         | 
| 717 843 | 
             
                        return HealthStatus.UNKNOWN
         | 
| 718 | 
            -
             | 
| 844 | 
            +
             | 
| 719 845 | 
             
                    # Count metrics by status
         | 
| 720 846 | 
             
                    status_counts = {status: 0 for status in HealthStatus}
         | 
| 721 847 | 
             
                    for metric in metrics:
         | 
| 722 848 | 
             
                        status_counts[metric.status] += 1
         | 
| 723 | 
            -
             | 
| 849 | 
            +
             | 
| 724 850 | 
             
                    # Determine overall status based on counts
         | 
| 725 851 | 
             
                    total_metrics = len(metrics)
         | 
| 726 | 
            -
             | 
| 852 | 
            +
             | 
| 727 853 | 
             
                    # If any critical metrics, overall is critical
         | 
| 728 854 | 
             
                    if status_counts[HealthStatus.CRITICAL] > 0:
         | 
| 729 855 | 
             
                        return HealthStatus.CRITICAL
         | 
| 730 | 
            -
             | 
| 856 | 
            +
             | 
| 731 857 | 
             
                    # If more than 30% warning metrics, overall is warning
         | 
| 732 858 | 
             
                    warning_ratio = status_counts[HealthStatus.WARNING] / total_metrics
         | 
| 733 859 | 
             
                    if warning_ratio > 0.3:
         | 
| 734 860 | 
             
                        return HealthStatus.WARNING
         | 
| 735 | 
            -
             | 
| 861 | 
            +
             | 
| 736 862 | 
             
                    # If any warning metrics but less than 30%, still healthy
         | 
| 737 863 | 
             
                    if status_counts[HealthStatus.WARNING] > 0:
         | 
| 738 864 | 
             
                        return HealthStatus.HEALTHY
         | 
| 739 | 
            -
             | 
| 865 | 
            +
             | 
| 740 866 | 
             
                    # If any unknown metrics, overall is unknown
         | 
| 741 867 | 
             
                    if status_counts[HealthStatus.UNKNOWN] > 0:
         | 
| 742 868 | 
             
                        return HealthStatus.UNKNOWN
         | 
| 743 | 
            -
             | 
| 869 | 
            +
             | 
| 744 870 | 
             
                    # All metrics healthy
         | 
| 745 871 | 
             
                    return HealthStatus.HEALTHY
         | 
| 746 | 
            -
             | 
| 872 | 
            +
             | 
| 747 873 | 
             
                def start_monitoring(self) -> None:
         | 
| 748 874 | 
             
                    """Start continuous health monitoring."""
         | 
| 749 875 | 
             
                    if self.monitoring:
         | 
| 750 876 | 
             
                        self.logger.warning("Health monitoring is already running")
         | 
| 751 877 | 
             
                        return
         | 
| 752 | 
            -
             | 
| 878 | 
            +
             | 
| 753 879 | 
             
                    self.monitoring = True
         | 
| 754 880 | 
             
                    self.monitor_task = asyncio.create_task(self._monitoring_loop())
         | 
| 755 | 
            -
                    self.logger.info( | 
| 756 | 
            -
             | 
| 881 | 
            +
                    self.logger.info(
         | 
| 882 | 
            +
                        f"Started health monitoring with {self.check_interval}s interval"
         | 
| 883 | 
            +
                    )
         | 
| 884 | 
            +
             | 
| 757 885 | 
             
                async def stop_monitoring(self) -> None:
         | 
| 758 886 | 
             
                    """Stop continuous health monitoring."""
         | 
| 759 887 | 
             
                    if not self.monitoring:
         | 
| 760 888 | 
             
                        return
         | 
| 761 | 
            -
             | 
| 889 | 
            +
             | 
| 762 890 | 
             
                    self.monitoring = False
         | 
| 763 891 | 
             
                    if self.monitor_task:
         | 
| 764 892 | 
             
                        self.monitor_task.cancel()
         | 
| @@ -767,9 +895,9 @@ class AdvancedHealthMonitor: | |
| 767 895 | 
             
                        except asyncio.CancelledError:
         | 
| 768 896 | 
             
                            pass
         | 
| 769 897 | 
             
                        self.monitor_task = None
         | 
| 770 | 
            -
             | 
| 898 | 
            +
             | 
| 771 899 | 
             
                    self.logger.info("Stopped health monitoring")
         | 
| 772 | 
            -
             | 
| 900 | 
            +
             | 
| 773 901 | 
             
                async def _monitoring_loop(self) -> None:
         | 
| 774 902 | 
             
                    """Continuous health monitoring loop."""
         | 
| 775 903 | 
             
                    try:
         | 
| @@ -778,76 +906,79 @@ class AdvancedHealthMonitor: | |
| 778 906 | 
             
                                await self.perform_health_check()
         | 
| 779 907 | 
             
                            except Exception as e:
         | 
| 780 908 | 
             
                                self.logger.error(f"Error during health check: {e}")
         | 
| 781 | 
            -
             | 
| 909 | 
            +
             | 
| 782 910 | 
             
                            # Wait for next check
         | 
| 783 911 | 
             
                            await asyncio.sleep(self.check_interval)
         | 
| 784 912 | 
             
                    except asyncio.CancelledError:
         | 
| 785 913 | 
             
                        self.logger.debug("Health monitoring loop cancelled")
         | 
| 786 914 | 
             
                    except Exception as e:
         | 
| 787 915 | 
             
                        self.logger.error(f"Health monitoring loop error: {e}")
         | 
| 788 | 
            -
             | 
| 916 | 
            +
             | 
| 789 917 | 
             
                def get_current_status(self) -> Optional[HealthCheckResult]:
         | 
| 790 918 | 
             
                    """Get the most recent health check result."""
         | 
| 791 919 | 
             
                    return self.last_check_result
         | 
| 792 | 
            -
             | 
| 793 | 
            -
                def get_health_history( | 
| 920 | 
            +
             | 
| 921 | 
            +
                def get_health_history(
         | 
| 922 | 
            +
                    self, limit: Optional[int] = None
         | 
| 923 | 
            +
                ) -> List[HealthCheckResult]:
         | 
| 794 924 | 
             
                    """Get health check history.
         | 
| 795 | 
            -
             | 
| 925 | 
            +
             | 
| 796 926 | 
             
                    Args:
         | 
| 797 927 | 
             
                        limit: Maximum number of results to return
         | 
| 798 | 
            -
             | 
| 928 | 
            +
             | 
| 799 929 | 
             
                    Returns:
         | 
| 800 930 | 
             
                        List of health check results, newest first
         | 
| 801 931 | 
             
                    """
         | 
| 802 932 | 
             
                    history = list(self.health_history)
         | 
| 803 933 | 
             
                    history.reverse()  # Newest first
         | 
| 804 | 
            -
             | 
| 934 | 
            +
             | 
| 805 935 | 
             
                    if limit:
         | 
| 806 936 | 
             
                        history = history[:limit]
         | 
| 807 | 
            -
             | 
| 937 | 
            +
             | 
| 808 938 | 
             
                    return history
         | 
| 809 | 
            -
             | 
| 810 | 
            -
                def get_aggregated_status( | 
| 939 | 
            +
             | 
| 940 | 
            +
                def get_aggregated_status(
         | 
| 941 | 
            +
                    self, window_seconds: Optional[int] = None
         | 
| 942 | 
            +
                ) -> Dict[str, Any]:
         | 
| 811 943 | 
             
                    """Get aggregated health status over a time window.
         | 
| 812 | 
            -
             | 
| 944 | 
            +
             | 
| 813 945 | 
             
                    Args:
         | 
| 814 946 | 
             
                        window_seconds: Time window for aggregation (defaults to configured window)
         | 
| 815 | 
            -
             | 
| 947 | 
            +
             | 
| 816 948 | 
             
                    Returns:
         | 
| 817 949 | 
             
                        Dictionary with aggregated health statistics
         | 
| 818 950 | 
             
                    """
         | 
| 819 951 | 
             
                    window_seconds = window_seconds or self.aggregation_window
         | 
| 820 952 | 
             
                    current_time = time.time()
         | 
| 821 953 | 
             
                    cutoff_time = current_time - window_seconds
         | 
| 822 | 
            -
             | 
| 954 | 
            +
             | 
| 823 955 | 
             
                    # Filter history to time window
         | 
| 824 956 | 
             
                    recent_results = [
         | 
| 825 | 
            -
                        result for result in self.health_history
         | 
| 826 | 
            -
                        if result.timestamp >= cutoff_time
         | 
| 957 | 
            +
                        result for result in self.health_history if result.timestamp >= cutoff_time
         | 
| 827 958 | 
             
                    ]
         | 
| 828 | 
            -
             | 
| 959 | 
            +
             | 
| 829 960 | 
             
                    if not recent_results:
         | 
| 830 961 | 
             
                        return {
         | 
| 831 | 
            -
                             | 
| 832 | 
            -
                             | 
| 833 | 
            -
                             | 
| 834 | 
            -
                             | 
| 962 | 
            +
                            "period": "no_data",
         | 
| 963 | 
            +
                            "window_seconds": window_seconds,
         | 
| 964 | 
            +
                            "checks_count": 0,
         | 
| 965 | 
            +
                            "overall_status": HealthStatus.UNKNOWN.value,
         | 
| 835 966 | 
             
                        }
         | 
| 836 | 
            -
             | 
| 967 | 
            +
             | 
| 837 968 | 
             
                    # Aggregate statistics
         | 
| 838 969 | 
             
                    status_counts = {status: 0 for status in HealthStatus}
         | 
| 839 970 | 
             
                    total_metrics = 0
         | 
| 840 971 | 
             
                    total_errors = 0
         | 
| 841 972 | 
             
                    total_duration_ms = 0
         | 
| 842 | 
            -
             | 
| 973 | 
            +
             | 
| 843 974 | 
             
                    for result in recent_results:
         | 
| 844 975 | 
             
                        status_counts[result.overall_status] += 1
         | 
| 845 976 | 
             
                        total_metrics += len(result.metrics)
         | 
| 846 977 | 
             
                        total_errors += len(result.errors)
         | 
| 847 978 | 
             
                        total_duration_ms += result.duration_ms
         | 
| 848 | 
            -
             | 
| 979 | 
            +
             | 
| 849 980 | 
             
                    checks_count = len(recent_results)
         | 
| 850 | 
            -
             | 
| 981 | 
            +
             | 
| 851 982 | 
             
                    # Determine aggregated status
         | 
| 852 983 | 
             
                    if status_counts[HealthStatus.CRITICAL] > 0:
         | 
| 853 984 | 
             
                        aggregated_status = HealthStatus.CRITICAL
         | 
| @@ -857,37 +988,49 @@ class AdvancedHealthMonitor: | |
| 857 988 | 
             
                        aggregated_status = HealthStatus.UNKNOWN
         | 
| 858 989 | 
             
                    else:
         | 
| 859 990 | 
             
                        aggregated_status = HealthStatus.HEALTHY
         | 
| 860 | 
            -
             | 
| 991 | 
            +
             | 
| 861 992 | 
             
                    return {
         | 
| 862 | 
            -
                         | 
| 863 | 
            -
                         | 
| 864 | 
            -
                         | 
| 865 | 
            -
                         | 
| 866 | 
            -
                         | 
| 867 | 
            -
             | 
| 868 | 
            -
                         | 
| 869 | 
            -
                         | 
| 870 | 
            -
             | 
| 993 | 
            +
                        "period": f"last_{window_seconds}_seconds",
         | 
| 994 | 
            +
                        "window_seconds": window_seconds,
         | 
| 995 | 
            +
                        "checks_count": checks_count,
         | 
| 996 | 
            +
                        "overall_status": aggregated_status.value,
         | 
| 997 | 
            +
                        "status_distribution": {
         | 
| 998 | 
            +
                            status.value: count for status, count in status_counts.items()
         | 
| 999 | 
            +
                        },
         | 
| 1000 | 
            +
                        "average_metrics_per_check": (
         | 
| 1001 | 
            +
                            round(total_metrics / checks_count, 2) if checks_count > 0 else 0
         | 
| 1002 | 
            +
                        ),
         | 
| 1003 | 
            +
                        "total_errors": total_errors,
         | 
| 1004 | 
            +
                        "average_duration_ms": (
         | 
| 1005 | 
            +
                            round(total_duration_ms / checks_count, 2) if checks_count > 0 else 0
         | 
| 1006 | 
            +
                        ),
         | 
| 1007 | 
            +
                        "monitoring_stats": dict(self.monitoring_stats),
         | 
| 871 1008 | 
             
                    }
         | 
| 872 | 
            -
             | 
| 1009 | 
            +
             | 
| 873 1010 | 
             
                def export_diagnostics(self) -> Dict[str, Any]:
         | 
| 874 1011 | 
             
                    """Export comprehensive diagnostics information."""
         | 
| 875 1012 | 
             
                    return {
         | 
| 876 | 
            -
                         | 
| 877 | 
            -
                             | 
| 878 | 
            -
                             | 
| 879 | 
            -
                             | 
| 880 | 
            -
                             | 
| 881 | 
            -
                             | 
| 882 | 
            -
                             | 
| 1013 | 
            +
                        "monitor_info": {
         | 
| 1014 | 
            +
                            "check_interval": self.check_interval,
         | 
| 1015 | 
            +
                            "history_size": self.history_size,
         | 
| 1016 | 
            +
                            "aggregation_window": self.aggregation_window,
         | 
| 1017 | 
            +
                            "monitoring_active": self.monitoring,
         | 
| 1018 | 
            +
                            "checkers_count": len(self.checkers),
         | 
| 1019 | 
            +
                            "callbacks_count": len(self.health_callbacks),
         | 
| 883 1020 | 
             
                        },
         | 
| 884 | 
            -
                         | 
| 885 | 
            -
                         | 
| 886 | 
            -
                         | 
| 887 | 
            -
                         | 
| 888 | 
            -
                         | 
| 889 | 
            -
             | 
| 890 | 
            -
             | 
| 891 | 
            -
                             | 
| 892 | 
            -
             | 
| 893 | 
            -
             | 
| 1021 | 
            +
                        "checkers": [checker.get_name() for checker in self.checkers],
         | 
| 1022 | 
            +
                        "current_status": self.last_check_result.to_dict()
         | 
| 1023 | 
            +
                        if self.last_check_result
         | 
| 1024 | 
            +
                        else None,
         | 
| 1025 | 
            +
                        "aggregated_status": self.get_aggregated_status(),
         | 
| 1026 | 
            +
                        "monitoring_stats": dict(self.monitoring_stats),
         | 
| 1027 | 
            +
                        "history_summary": {
         | 
| 1028 | 
            +
                            "total_checks": len(self.health_history),
         | 
| 1029 | 
            +
                            "oldest_check": self.health_history[0].timestamp
         | 
| 1030 | 
            +
                            if self.health_history
         | 
| 1031 | 
            +
                            else None,
         | 
| 1032 | 
            +
                            "newest_check": self.health_history[-1].timestamp
         | 
| 1033 | 
            +
                            if self.health_history
         | 
| 1034 | 
            +
                            else None,
         | 
| 1035 | 
            +
                        },
         | 
| 1036 | 
            +
                    }
         |