claude-mpm 4.1.0__py3-none-any.whl → 4.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- claude_mpm/BUILD_NUMBER +1 -1
- claude_mpm/VERSION +1 -1
- claude_mpm/__main__.py +1 -1
- claude_mpm/agents/BASE_PM.md +74 -46
- claude_mpm/agents/INSTRUCTIONS.md +11 -153
- claude_mpm/agents/WORKFLOW.md +61 -321
- claude_mpm/agents/__init__.py +11 -11
- claude_mpm/agents/agent_loader.py +23 -20
- claude_mpm/agents/agent_loader_integration.py +1 -1
- claude_mpm/agents/agents_metadata.py +27 -0
- claude_mpm/agents/async_agent_loader.py +5 -8
- claude_mpm/agents/base_agent_loader.py +36 -25
- claude_mpm/agents/frontmatter_validator.py +6 -6
- claude_mpm/agents/schema/agent_schema.json +1 -1
- claude_mpm/agents/system_agent_config.py +9 -9
- claude_mpm/agents/templates/api_qa.json +47 -2
- claude_mpm/agents/templates/imagemagick.json +256 -0
- claude_mpm/agents/templates/qa.json +41 -2
- claude_mpm/agents/templates/ticketing.json +5 -5
- claude_mpm/agents/templates/web_qa.json +133 -58
- claude_mpm/agents/templates/web_ui.json +3 -3
- claude_mpm/cli/__init__.py +51 -46
- claude_mpm/cli/__main__.py +1 -1
- claude_mpm/cli/commands/__init__.py +10 -12
- claude_mpm/cli/commands/agent_manager.py +186 -181
- claude_mpm/cli/commands/agents.py +271 -268
- claude_mpm/cli/commands/aggregate.py +30 -29
- claude_mpm/cli/commands/cleanup.py +50 -44
- claude_mpm/cli/commands/cleanup_orphaned_agents.py +25 -25
- claude_mpm/cli/commands/config.py +162 -127
- claude_mpm/cli/commands/doctor.py +52 -62
- claude_mpm/cli/commands/info.py +37 -25
- claude_mpm/cli/commands/mcp.py +3 -7
- claude_mpm/cli/commands/mcp_command_router.py +14 -18
- claude_mpm/cli/commands/mcp_install_commands.py +28 -23
- claude_mpm/cli/commands/mcp_pipx_config.py +58 -49
- claude_mpm/cli/commands/mcp_server_commands.py +23 -17
- claude_mpm/cli/commands/memory.py +192 -141
- claude_mpm/cli/commands/monitor.py +117 -88
- claude_mpm/cli/commands/run.py +120 -84
- claude_mpm/cli/commands/run_config_checker.py +4 -5
- claude_mpm/cli/commands/socketio_monitor.py +17 -19
- claude_mpm/cli/commands/tickets.py +92 -92
- claude_mpm/cli/parser.py +1 -5
- claude_mpm/cli/parsers/__init__.py +1 -1
- claude_mpm/cli/parsers/agent_manager_parser.py +50 -98
- claude_mpm/cli/parsers/agents_parser.py +2 -3
- claude_mpm/cli/parsers/base_parser.py +7 -5
- claude_mpm/cli/parsers/mcp_parser.py +4 -2
- claude_mpm/cli/parsers/monitor_parser.py +26 -18
- claude_mpm/cli/shared/__init__.py +10 -10
- claude_mpm/cli/shared/argument_patterns.py +57 -71
- claude_mpm/cli/shared/base_command.py +61 -53
- claude_mpm/cli/shared/error_handling.py +62 -58
- claude_mpm/cli/shared/output_formatters.py +78 -77
- claude_mpm/cli/startup_logging.py +204 -172
- claude_mpm/cli/utils.py +10 -11
- claude_mpm/cli_module/__init__.py +1 -1
- claude_mpm/cli_module/args.py +1 -1
- claude_mpm/cli_module/migration_example.py +5 -5
- claude_mpm/config/__init__.py +9 -9
- claude_mpm/config/agent_config.py +15 -14
- claude_mpm/config/experimental_features.py +4 -4
- claude_mpm/config/paths.py +0 -1
- claude_mpm/config/socketio_config.py +5 -6
- claude_mpm/constants.py +1 -2
- claude_mpm/core/__init__.py +8 -8
- claude_mpm/core/agent_name_normalizer.py +1 -1
- claude_mpm/core/agent_registry.py +20 -23
- claude_mpm/core/agent_session_manager.py +3 -3
- claude_mpm/core/base_service.py +7 -15
- claude_mpm/core/cache.py +4 -6
- claude_mpm/core/claude_runner.py +85 -113
- claude_mpm/core/config.py +43 -28
- claude_mpm/core/config_aliases.py +0 -9
- claude_mpm/core/config_constants.py +52 -30
- claude_mpm/core/constants.py +0 -1
- claude_mpm/core/container.py +18 -27
- claude_mpm/core/exceptions.py +2 -2
- claude_mpm/core/factories.py +10 -12
- claude_mpm/core/framework_loader.py +581 -280
- claude_mpm/core/hook_manager.py +26 -22
- claude_mpm/core/hook_performance_config.py +58 -47
- claude_mpm/core/injectable_service.py +1 -1
- claude_mpm/core/interactive_session.py +61 -152
- claude_mpm/core/interfaces.py +1 -100
- claude_mpm/core/lazy.py +5 -5
- claude_mpm/core/log_manager.py +587 -0
- claude_mpm/core/logger.py +125 -8
- claude_mpm/core/logging_config.py +15 -15
- claude_mpm/core/minimal_framework_loader.py +5 -8
- claude_mpm/core/oneshot_session.py +15 -33
- claude_mpm/core/optimized_agent_loader.py +4 -6
- claude_mpm/core/optimized_startup.py +2 -1
- claude_mpm/core/output_style_manager.py +147 -106
- claude_mpm/core/pm_hook_interceptor.py +0 -1
- claude_mpm/core/service_registry.py +11 -8
- claude_mpm/core/session_manager.py +1 -2
- claude_mpm/core/shared/__init__.py +1 -1
- claude_mpm/core/shared/config_loader.py +101 -97
- claude_mpm/core/shared/path_resolver.py +72 -68
- claude_mpm/core/shared/singleton_manager.py +56 -50
- claude_mpm/core/socketio_pool.py +26 -6
- claude_mpm/core/tool_access_control.py +4 -5
- claude_mpm/core/typing_utils.py +50 -59
- claude_mpm/core/unified_agent_registry.py +14 -19
- claude_mpm/core/unified_config.py +4 -6
- claude_mpm/core/unified_paths.py +197 -109
- claude_mpm/dashboard/open_dashboard.py +2 -4
- claude_mpm/experimental/cli_enhancements.py +51 -36
- claude_mpm/generators/agent_profile_generator.py +2 -4
- claude_mpm/hooks/base_hook.py +1 -2
- claude_mpm/hooks/claude_hooks/connection_pool.py +72 -26
- claude_mpm/hooks/claude_hooks/event_handlers.py +93 -38
- claude_mpm/hooks/claude_hooks/hook_handler.py +130 -76
- claude_mpm/hooks/claude_hooks/hook_handler_eventbus.py +104 -77
- claude_mpm/hooks/claude_hooks/memory_integration.py +2 -4
- claude_mpm/hooks/claude_hooks/response_tracking.py +15 -11
- claude_mpm/hooks/claude_hooks/tool_analysis.py +12 -18
- claude_mpm/hooks/memory_integration_hook.py +5 -5
- claude_mpm/hooks/tool_call_interceptor.py +1 -1
- claude_mpm/hooks/validation_hooks.py +4 -4
- claude_mpm/init.py +4 -9
- claude_mpm/models/__init__.py +2 -2
- claude_mpm/models/agent_session.py +11 -14
- claude_mpm/scripts/mcp_server.py +20 -11
- claude_mpm/scripts/mcp_wrapper.py +5 -5
- claude_mpm/scripts/mpm_doctor.py +321 -0
- claude_mpm/scripts/socketio_daemon.py +28 -25
- claude_mpm/scripts/socketio_daemon_hardened.py +298 -258
- claude_mpm/scripts/socketio_server_manager.py +116 -95
- claude_mpm/services/__init__.py +49 -49
- claude_mpm/services/agent_capabilities_service.py +12 -18
- claude_mpm/services/agents/__init__.py +22 -22
- claude_mpm/services/agents/agent_builder.py +140 -119
- claude_mpm/services/agents/deployment/__init__.py +3 -3
- claude_mpm/services/agents/deployment/agent_config_provider.py +9 -9
- claude_mpm/services/agents/deployment/agent_configuration_manager.py +19 -20
- claude_mpm/services/agents/deployment/agent_definition_factory.py +1 -5
- claude_mpm/services/agents/deployment/agent_deployment.py +136 -106
- claude_mpm/services/agents/deployment/agent_discovery_service.py +4 -8
- claude_mpm/services/agents/deployment/agent_environment_manager.py +2 -7
- claude_mpm/services/agents/deployment/agent_filesystem_manager.py +6 -10
- claude_mpm/services/agents/deployment/agent_format_converter.py +11 -15
- claude_mpm/services/agents/deployment/agent_frontmatter_validator.py +2 -3
- claude_mpm/services/agents/deployment/agent_lifecycle_manager.py +5 -5
- claude_mpm/services/agents/deployment/agent_metrics_collector.py +13 -19
- claude_mpm/services/agents/deployment/agent_restore_handler.py +0 -1
- claude_mpm/services/agents/deployment/agent_template_builder.py +26 -35
- claude_mpm/services/agents/deployment/agent_validator.py +0 -1
- claude_mpm/services/agents/deployment/agent_version_manager.py +7 -9
- claude_mpm/services/agents/deployment/agent_versioning.py +3 -3
- claude_mpm/services/agents/deployment/agents_directory_resolver.py +6 -7
- claude_mpm/services/agents/deployment/async_agent_deployment.py +51 -38
- claude_mpm/services/agents/deployment/config/__init__.py +1 -1
- claude_mpm/services/agents/deployment/config/deployment_config.py +7 -8
- claude_mpm/services/agents/deployment/deployment_type_detector.py +1 -1
- claude_mpm/services/agents/deployment/deployment_wrapper.py +18 -18
- claude_mpm/services/agents/deployment/facade/__init__.py +1 -1
- claude_mpm/services/agents/deployment/facade/deployment_executor.py +0 -3
- claude_mpm/services/agents/deployment/facade/deployment_facade.py +3 -4
- claude_mpm/services/agents/deployment/interface_adapter.py +5 -7
- claude_mpm/services/agents/deployment/multi_source_deployment_service.py +345 -276
- claude_mpm/services/agents/deployment/pipeline/__init__.py +2 -2
- claude_mpm/services/agents/deployment/pipeline/pipeline_builder.py +1 -1
- claude_mpm/services/agents/deployment/pipeline/pipeline_context.py +6 -4
- claude_mpm/services/agents/deployment/pipeline/pipeline_executor.py +3 -3
- claude_mpm/services/agents/deployment/pipeline/steps/__init__.py +2 -2
- claude_mpm/services/agents/deployment/pipeline/steps/agent_processing_step.py +14 -13
- claude_mpm/services/agents/deployment/pipeline/steps/base_step.py +0 -1
- claude_mpm/services/agents/deployment/pipeline/steps/configuration_step.py +1 -1
- claude_mpm/services/agents/deployment/pipeline/steps/target_directory_step.py +8 -9
- claude_mpm/services/agents/deployment/pipeline/steps/validation_step.py +1 -1
- claude_mpm/services/agents/deployment/processors/__init__.py +1 -1
- claude_mpm/services/agents/deployment/processors/agent_processor.py +20 -16
- claude_mpm/services/agents/deployment/refactored_agent_deployment_service.py +5 -12
- claude_mpm/services/agents/deployment/results/__init__.py +1 -1
- claude_mpm/services/agents/deployment/results/deployment_result_builder.py +1 -1
- claude_mpm/services/agents/deployment/strategies/__init__.py +2 -2
- claude_mpm/services/agents/deployment/strategies/base_strategy.py +1 -7
- claude_mpm/services/agents/deployment/strategies/project_strategy.py +1 -4
- claude_mpm/services/agents/deployment/strategies/system_strategy.py +2 -3
- claude_mpm/services/agents/deployment/strategies/user_strategy.py +3 -7
- claude_mpm/services/agents/deployment/validation/__init__.py +1 -1
- claude_mpm/services/agents/deployment/validation/agent_validator.py +1 -1
- claude_mpm/services/agents/deployment/validation/template_validator.py +2 -2
- claude_mpm/services/agents/deployment/validation/validation_result.py +2 -6
- claude_mpm/services/agents/loading/__init__.py +1 -1
- claude_mpm/services/agents/loading/agent_profile_loader.py +6 -12
- claude_mpm/services/agents/loading/base_agent_manager.py +5 -5
- claude_mpm/services/agents/loading/framework_agent_loader.py +2 -4
- claude_mpm/services/agents/management/__init__.py +1 -1
- claude_mpm/services/agents/management/agent_capabilities_generator.py +1 -3
- claude_mpm/services/agents/management/agent_management_service.py +5 -9
- claude_mpm/services/agents/memory/__init__.py +4 -4
- claude_mpm/services/agents/memory/agent_memory_manager.py +280 -160
- claude_mpm/services/agents/memory/agent_persistence_service.py +0 -2
- claude_mpm/services/agents/memory/content_manager.py +44 -38
- claude_mpm/services/agents/memory/template_generator.py +4 -6
- claude_mpm/services/agents/registry/__init__.py +10 -6
- claude_mpm/services/agents/registry/deployed_agent_discovery.py +30 -27
- claude_mpm/services/agents/registry/modification_tracker.py +3 -6
- claude_mpm/services/async_session_logger.py +1 -2
- claude_mpm/services/claude_session_logger.py +1 -2
- claude_mpm/services/command_deployment_service.py +173 -0
- claude_mpm/services/command_handler_service.py +20 -22
- claude_mpm/services/core/__init__.py +25 -25
- claude_mpm/services/core/base.py +0 -5
- claude_mpm/services/core/interfaces/__init__.py +32 -32
- claude_mpm/services/core/interfaces/agent.py +0 -21
- claude_mpm/services/core/interfaces/communication.py +0 -27
- claude_mpm/services/core/interfaces/infrastructure.py +0 -56
- claude_mpm/services/core/interfaces/service.py +0 -29
- claude_mpm/services/diagnostics/__init__.py +1 -1
- claude_mpm/services/diagnostics/checks/__init__.py +6 -6
- claude_mpm/services/diagnostics/checks/agent_check.py +89 -80
- claude_mpm/services/diagnostics/checks/base_check.py +12 -16
- claude_mpm/services/diagnostics/checks/claude_desktop_check.py +84 -81
- claude_mpm/services/diagnostics/checks/common_issues_check.py +99 -91
- claude_mpm/services/diagnostics/checks/configuration_check.py +82 -77
- claude_mpm/services/diagnostics/checks/filesystem_check.py +67 -68
- claude_mpm/services/diagnostics/checks/installation_check.py +254 -94
- claude_mpm/services/diagnostics/checks/mcp_check.py +90 -88
- claude_mpm/services/diagnostics/checks/monitor_check.py +75 -76
- claude_mpm/services/diagnostics/checks/startup_log_check.py +67 -73
- claude_mpm/services/diagnostics/diagnostic_runner.py +67 -59
- claude_mpm/services/diagnostics/doctor_reporter.py +107 -70
- claude_mpm/services/diagnostics/models.py +21 -19
- claude_mpm/services/event_aggregator.py +10 -17
- claude_mpm/services/event_bus/__init__.py +1 -1
- claude_mpm/services/event_bus/config.py +54 -35
- claude_mpm/services/event_bus/event_bus.py +76 -71
- claude_mpm/services/event_bus/relay.py +74 -64
- claude_mpm/services/events/__init__.py +11 -11
- claude_mpm/services/events/consumers/__init__.py +3 -3
- claude_mpm/services/events/consumers/dead_letter.py +71 -63
- claude_mpm/services/events/consumers/logging.py +39 -37
- claude_mpm/services/events/consumers/metrics.py +56 -57
- claude_mpm/services/events/consumers/socketio.py +82 -81
- claude_mpm/services/events/core.py +110 -99
- claude_mpm/services/events/interfaces.py +56 -72
- claude_mpm/services/events/producers/__init__.py +1 -1
- claude_mpm/services/events/producers/hook.py +38 -38
- claude_mpm/services/events/producers/system.py +46 -44
- claude_mpm/services/exceptions.py +81 -80
- claude_mpm/services/framework_claude_md_generator/__init__.py +2 -4
- claude_mpm/services/framework_claude_md_generator/content_assembler.py +3 -5
- claude_mpm/services/framework_claude_md_generator/content_validator.py +1 -1
- claude_mpm/services/framework_claude_md_generator/deployment_manager.py +4 -4
- claude_mpm/services/framework_claude_md_generator/section_generators/__init__.py +0 -1
- claude_mpm/services/framework_claude_md_generator/section_generators/agents.py +0 -2
- claude_mpm/services/framework_claude_md_generator/version_manager.py +4 -5
- claude_mpm/services/hook_service.py +6 -9
- claude_mpm/services/infrastructure/__init__.py +1 -1
- claude_mpm/services/infrastructure/context_preservation.py +8 -12
- claude_mpm/services/infrastructure/monitoring.py +21 -23
- claude_mpm/services/mcp_gateway/__init__.py +37 -37
- claude_mpm/services/mcp_gateway/auto_configure.py +95 -103
- claude_mpm/services/mcp_gateway/config/__init__.py +1 -1
- claude_mpm/services/mcp_gateway/config/config_loader.py +23 -25
- claude_mpm/services/mcp_gateway/config/config_schema.py +5 -5
- claude_mpm/services/mcp_gateway/config/configuration.py +9 -6
- claude_mpm/services/mcp_gateway/core/__init__.py +10 -10
- claude_mpm/services/mcp_gateway/core/base.py +0 -3
- claude_mpm/services/mcp_gateway/core/interfaces.py +1 -38
- claude_mpm/services/mcp_gateway/core/process_pool.py +99 -93
- claude_mpm/services/mcp_gateway/core/singleton_manager.py +65 -62
- claude_mpm/services/mcp_gateway/core/startup_verification.py +75 -74
- claude_mpm/services/mcp_gateway/main.py +2 -1
- claude_mpm/services/mcp_gateway/registry/service_registry.py +5 -8
- claude_mpm/services/mcp_gateway/registry/tool_registry.py +1 -1
- claude_mpm/services/mcp_gateway/server/__init__.py +1 -1
- claude_mpm/services/mcp_gateway/server/mcp_gateway.py +12 -19
- claude_mpm/services/mcp_gateway/server/stdio_handler.py +4 -3
- claude_mpm/services/mcp_gateway/server/stdio_server.py +79 -71
- claude_mpm/services/mcp_gateway/tools/__init__.py +2 -2
- claude_mpm/services/mcp_gateway/tools/base_adapter.py +5 -6
- claude_mpm/services/mcp_gateway/tools/document_summarizer.py +13 -22
- claude_mpm/services/mcp_gateway/tools/health_check_tool.py +79 -78
- claude_mpm/services/mcp_gateway/tools/hello_world.py +12 -14
- claude_mpm/services/mcp_gateway/tools/ticket_tools.py +42 -49
- claude_mpm/services/mcp_gateway/tools/unified_ticket_tool.py +51 -55
- claude_mpm/services/memory/__init__.py +3 -3
- claude_mpm/services/memory/builder.py +3 -6
- claude_mpm/services/memory/cache/__init__.py +1 -1
- claude_mpm/services/memory/cache/shared_prompt_cache.py +3 -5
- claude_mpm/services/memory/cache/simple_cache.py +1 -1
- claude_mpm/services/memory/indexed_memory.py +5 -7
- claude_mpm/services/memory/optimizer.py +7 -10
- claude_mpm/services/memory/router.py +8 -9
- claude_mpm/services/memory_hook_service.py +48 -34
- claude_mpm/services/monitor_build_service.py +77 -73
- claude_mpm/services/port_manager.py +130 -108
- claude_mpm/services/project/analyzer.py +12 -10
- claude_mpm/services/project/registry.py +11 -11
- claude_mpm/services/recovery_manager.py +10 -19
- claude_mpm/services/response_tracker.py +0 -1
- claude_mpm/services/runner_configuration_service.py +19 -20
- claude_mpm/services/session_management_service.py +7 -11
- claude_mpm/services/shared/__init__.py +1 -1
- claude_mpm/services/shared/async_service_base.py +58 -50
- claude_mpm/services/shared/config_service_base.py +73 -67
- claude_mpm/services/shared/lifecycle_service_base.py +82 -78
- claude_mpm/services/shared/manager_base.py +94 -82
- claude_mpm/services/shared/service_factory.py +96 -98
- claude_mpm/services/socketio/__init__.py +3 -3
- claude_mpm/services/socketio/client_proxy.py +5 -5
- claude_mpm/services/socketio/event_normalizer.py +199 -181
- claude_mpm/services/socketio/handlers/__init__.py +3 -3
- claude_mpm/services/socketio/handlers/base.py +5 -4
- claude_mpm/services/socketio/handlers/connection.py +163 -136
- claude_mpm/services/socketio/handlers/file.py +13 -14
- claude_mpm/services/socketio/handlers/git.py +12 -7
- claude_mpm/services/socketio/handlers/hook.py +49 -44
- claude_mpm/services/socketio/handlers/memory.py +0 -1
- claude_mpm/services/socketio/handlers/project.py +0 -1
- claude_mpm/services/socketio/handlers/registry.py +37 -19
- claude_mpm/services/socketio/migration_utils.py +98 -84
- claude_mpm/services/socketio/server/__init__.py +1 -1
- claude_mpm/services/socketio/server/broadcaster.py +81 -87
- claude_mpm/services/socketio/server/core.py +65 -54
- claude_mpm/services/socketio/server/eventbus_integration.py +95 -56
- claude_mpm/services/socketio/server/main.py +64 -38
- claude_mpm/services/socketio_client_manager.py +10 -12
- claude_mpm/services/subprocess_launcher_service.py +4 -7
- claude_mpm/services/system_instructions_service.py +13 -14
- claude_mpm/services/ticket_manager.py +2 -2
- claude_mpm/services/utility_service.py +5 -13
- claude_mpm/services/version_control/__init__.py +16 -16
- claude_mpm/services/version_control/branch_strategy.py +5 -8
- claude_mpm/services/version_control/conflict_resolution.py +9 -23
- claude_mpm/services/version_control/git_operations.py +5 -7
- claude_mpm/services/version_control/semantic_versioning.py +16 -17
- claude_mpm/services/version_control/version_parser.py +13 -18
- claude_mpm/services/version_service.py +10 -11
- claude_mpm/storage/__init__.py +1 -1
- claude_mpm/storage/state_storage.py +22 -28
- claude_mpm/utils/__init__.py +6 -6
- claude_mpm/utils/agent_dependency_loader.py +47 -33
- claude_mpm/utils/config_manager.py +11 -14
- claude_mpm/utils/dependency_cache.py +1 -1
- claude_mpm/utils/dependency_manager.py +13 -17
- claude_mpm/utils/dependency_strategies.py +8 -10
- claude_mpm/utils/environment_context.py +3 -9
- claude_mpm/utils/error_handler.py +3 -13
- claude_mpm/utils/file_utils.py +1 -1
- claude_mpm/utils/path_operations.py +8 -12
- claude_mpm/utils/robust_installer.py +110 -33
- claude_mpm/utils/subprocess_utils.py +5 -6
- claude_mpm/validation/agent_validator.py +3 -6
- claude_mpm/validation/frontmatter_validator.py +1 -1
- {claude_mpm-4.1.0.dist-info → claude_mpm-4.1.2.dist-info}/METADATA +1 -1
- claude_mpm-4.1.2.dist-info/RECORD +498 -0
- claude_mpm-4.1.0.dist-info/RECORD +0 -494
- {claude_mpm-4.1.0.dist-info → claude_mpm-4.1.2.dist-info}/WHEEL +0 -0
- {claude_mpm-4.1.0.dist-info → claude_mpm-4.1.2.dist-info}/entry_points.txt +0 -0
- {claude_mpm-4.1.0.dist-info → claude_mpm-4.1.2.dist-info}/licenses/LICENSE +0 -0
- {claude_mpm-4.1.0.dist-info → claude_mpm-4.1.2.dist-info}/top_level.txt +0 -0
|
@@ -21,89 +21,99 @@ import os
|
|
|
21
21
|
import signal
|
|
22
22
|
import subprocess
|
|
23
23
|
import sys
|
|
24
|
+
import threading
|
|
24
25
|
import time
|
|
25
26
|
import traceback
|
|
26
|
-
from pathlib import Path
|
|
27
27
|
from datetime import datetime
|
|
28
|
-
from
|
|
29
|
-
import
|
|
30
|
-
|
|
28
|
+
from pathlib import Path
|
|
29
|
+
from typing import Optional
|
|
30
|
+
|
|
31
31
|
|
|
32
32
|
# Detect and use virtual environment Python if available
|
|
33
33
|
def get_python_executable():
|
|
34
34
|
"""Get the appropriate Python executable, preferring virtual environment."""
|
|
35
|
-
if hasattr(sys,
|
|
35
|
+
if hasattr(sys, "real_prefix") or (
|
|
36
|
+
hasattr(sys, "base_prefix") and sys.base_prefix != sys.prefix
|
|
37
|
+
):
|
|
36
38
|
return sys.executable
|
|
37
|
-
|
|
38
|
-
venv_path = os.environ.get(
|
|
39
|
+
|
|
40
|
+
venv_path = os.environ.get("VIRTUAL_ENV")
|
|
39
41
|
if venv_path:
|
|
40
|
-
venv_python = Path(venv_path) /
|
|
42
|
+
venv_python = Path(venv_path) / "bin" / "python"
|
|
41
43
|
if venv_python.exists():
|
|
42
44
|
return str(venv_python)
|
|
43
|
-
|
|
45
|
+
|
|
44
46
|
exe_path = Path(sys.executable).resolve()
|
|
45
47
|
for parent in exe_path.parents:
|
|
46
|
-
if parent.name in (
|
|
48
|
+
if parent.name in ("venv", ".venv", "env", ".env"):
|
|
47
49
|
return sys.executable
|
|
48
|
-
if parent.name ==
|
|
50
|
+
if parent.name == "bin" and (parent.parent / "pyvenv.cfg").exists():
|
|
49
51
|
return sys.executable
|
|
50
|
-
if parent.name ==
|
|
52
|
+
if parent.name == "Scripts" and (parent.parent / "pyvenv.cfg").exists():
|
|
51
53
|
return sys.executable
|
|
52
|
-
|
|
54
|
+
|
|
53
55
|
script_path = Path(__file__).resolve()
|
|
54
56
|
for parent in script_path.parents:
|
|
55
|
-
if parent.name ==
|
|
56
|
-
for venv_name in (
|
|
57
|
+
if parent.name == "src" or not (parent / "src").exists():
|
|
58
|
+
for venv_name in ("venv", ".venv", "env", ".env"):
|
|
57
59
|
venv_dir = parent / venv_name
|
|
58
60
|
if venv_dir.exists():
|
|
59
|
-
venv_python = venv_dir /
|
|
61
|
+
venv_python = venv_dir / "bin" / "python"
|
|
60
62
|
if venv_python.exists():
|
|
61
63
|
return str(venv_python)
|
|
62
64
|
break
|
|
63
|
-
|
|
65
|
+
|
|
64
66
|
return sys.executable
|
|
65
67
|
|
|
68
|
+
|
|
66
69
|
PYTHON_EXECUTABLE = get_python_executable()
|
|
67
70
|
|
|
71
|
+
|
|
68
72
|
# Configuration from environment variables
|
|
69
73
|
class Config:
|
|
70
74
|
"""Centralized configuration with environment variable support."""
|
|
71
|
-
|
|
75
|
+
|
|
72
76
|
# Retry configuration
|
|
73
|
-
MAX_RETRIES = int(os.environ.get(
|
|
74
|
-
INITIAL_RETRY_DELAY = float(os.environ.get(
|
|
75
|
-
MAX_RETRY_DELAY = float(os.environ.get(
|
|
76
|
-
BACKOFF_FACTOR = float(os.environ.get(
|
|
77
|
-
|
|
77
|
+
MAX_RETRIES = int(os.environ.get("SOCKETIO_MAX_RETRIES", "10"))
|
|
78
|
+
INITIAL_RETRY_DELAY = float(os.environ.get("SOCKETIO_INITIAL_RETRY_DELAY", "1.0"))
|
|
79
|
+
MAX_RETRY_DELAY = float(os.environ.get("SOCKETIO_MAX_RETRY_DELAY", "60.0"))
|
|
80
|
+
BACKOFF_FACTOR = float(os.environ.get("SOCKETIO_BACKOFF_FACTOR", "2.0"))
|
|
81
|
+
|
|
78
82
|
# Health check configuration
|
|
79
|
-
HEALTH_CHECK_INTERVAL = float(
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
+
HEALTH_CHECK_INTERVAL = float(
|
|
84
|
+
os.environ.get("SOCKETIO_HEALTH_CHECK_INTERVAL", "30.0")
|
|
85
|
+
)
|
|
86
|
+
HEALTH_CHECK_TIMEOUT = float(os.environ.get("SOCKETIO_HEALTH_CHECK_TIMEOUT", "5.0"))
|
|
87
|
+
UNHEALTHY_THRESHOLD = int(os.environ.get("SOCKETIO_UNHEALTHY_THRESHOLD", "3"))
|
|
88
|
+
|
|
83
89
|
# Process management
|
|
84
|
-
STARTUP_TIMEOUT = float(os.environ.get(
|
|
85
|
-
SHUTDOWN_TIMEOUT = float(os.environ.get(
|
|
86
|
-
FORCE_KILL_TIMEOUT = float(os.environ.get(
|
|
87
|
-
|
|
90
|
+
STARTUP_TIMEOUT = float(os.environ.get("SOCKETIO_STARTUP_TIMEOUT", "30.0"))
|
|
91
|
+
SHUTDOWN_TIMEOUT = float(os.environ.get("SOCKETIO_SHUTDOWN_TIMEOUT", "10.0"))
|
|
92
|
+
FORCE_KILL_TIMEOUT = float(os.environ.get("SOCKETIO_FORCE_KILL_TIMEOUT", "5.0"))
|
|
93
|
+
|
|
88
94
|
# Port configuration
|
|
89
|
-
PORT_RANGE_START = int(os.environ.get(
|
|
90
|
-
PORT_RANGE_END = int(os.environ.get(
|
|
91
|
-
|
|
95
|
+
PORT_RANGE_START = int(os.environ.get("SOCKETIO_PORT_START", "8765"))
|
|
96
|
+
PORT_RANGE_END = int(os.environ.get("SOCKETIO_PORT_END", "8785"))
|
|
97
|
+
|
|
92
98
|
# Logging
|
|
93
|
-
LOG_LEVEL = os.environ.get(
|
|
94
|
-
LOG_FORMAT =
|
|
95
|
-
|
|
99
|
+
LOG_LEVEL = os.environ.get("SOCKETIO_LOG_LEVEL", "INFO")
|
|
100
|
+
LOG_FORMAT = "%(asctime)s [%(levelname)s] %(name)s: %(message)s"
|
|
101
|
+
|
|
96
102
|
# Monitoring
|
|
97
|
-
METRICS_ENABLED =
|
|
98
|
-
|
|
103
|
+
METRICS_ENABLED = (
|
|
104
|
+
os.environ.get("SOCKETIO_METRICS_ENABLED", "true").lower() == "true"
|
|
105
|
+
)
|
|
106
|
+
METRICS_FILE = os.environ.get(
|
|
107
|
+
"SOCKETIO_METRICS_FILE", ".claude-mpm/socketio-metrics.json"
|
|
108
|
+
)
|
|
109
|
+
|
|
99
110
|
|
|
100
111
|
# Setup structured logging
|
|
112
|
+
import contextlib
|
|
101
113
|
import logging
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
)
|
|
106
|
-
logger = logging.getLogger('socketio-daemon')
|
|
114
|
+
|
|
115
|
+
logging.basicConfig(level=getattr(logging, Config.LOG_LEVEL), format=Config.LOG_FORMAT)
|
|
116
|
+
logger = logging.getLogger("socketio-daemon")
|
|
107
117
|
|
|
108
118
|
try:
|
|
109
119
|
import psutil
|
|
@@ -114,75 +124,82 @@ except ImportError:
|
|
|
114
124
|
|
|
115
125
|
# Import project modules
|
|
116
126
|
try:
|
|
127
|
+
from claude_mpm.core.unified_paths import get_project_root
|
|
117
128
|
from claude_mpm.services.port_manager import PortManager
|
|
118
129
|
from claude_mpm.services.socketio.server.main import SocketIOServer
|
|
119
|
-
from claude_mpm.core.unified_paths import get_project_root
|
|
120
130
|
except ImportError:
|
|
121
131
|
script_path = Path(__file__).resolve()
|
|
122
132
|
if "site-packages" in str(script_path):
|
|
123
133
|
parts = script_path.parts
|
|
124
|
-
site_packages_idx = next(
|
|
134
|
+
site_packages_idx = next(
|
|
135
|
+
i for i, part in enumerate(parts) if part == "site-packages"
|
|
136
|
+
)
|
|
125
137
|
site_packages_path = Path(*parts[: site_packages_idx + 1])
|
|
126
138
|
if site_packages_path.exists() and str(site_packages_path) not in sys.path:
|
|
127
139
|
sys.path.insert(0, str(site_packages_path))
|
|
128
140
|
else:
|
|
129
141
|
src_path = script_path.parent.parent.parent
|
|
130
|
-
if
|
|
142
|
+
if (
|
|
143
|
+
src_path.exists()
|
|
144
|
+
and (src_path / "claude_mpm").exists()
|
|
145
|
+
and str(src_path) not in sys.path
|
|
146
|
+
):
|
|
131
147
|
sys.path.insert(0, str(src_path))
|
|
132
|
-
|
|
148
|
+
|
|
149
|
+
from claude_mpm.core.unified_paths import get_project_root
|
|
133
150
|
from claude_mpm.services.port_manager import PortManager
|
|
134
151
|
from claude_mpm.services.socketio.server.main import SocketIOServer
|
|
135
|
-
from claude_mpm.core.unified_paths import get_project_root
|
|
136
152
|
|
|
137
153
|
|
|
138
154
|
class DaemonMetrics:
|
|
139
155
|
"""Track and persist daemon metrics for monitoring."""
|
|
140
|
-
|
|
156
|
+
|
|
141
157
|
def __init__(self, metrics_file: Path):
|
|
142
158
|
self.metrics_file = metrics_file
|
|
143
159
|
self.metrics = {
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
160
|
+
"start_time": None,
|
|
161
|
+
"restarts": 0,
|
|
162
|
+
"total_failures": 0,
|
|
163
|
+
"last_failure": None,
|
|
164
|
+
"health_checks_passed": 0,
|
|
165
|
+
"health_checks_failed": 0,
|
|
166
|
+
"uptime_seconds": 0,
|
|
167
|
+
"last_health_check": None,
|
|
168
|
+
"status": "initializing",
|
|
153
169
|
}
|
|
154
170
|
self.lock = threading.Lock()
|
|
155
171
|
self.load()
|
|
156
|
-
|
|
172
|
+
|
|
157
173
|
def load(self):
|
|
158
174
|
"""Load metrics from file if exists."""
|
|
159
175
|
if self.metrics_file.exists():
|
|
160
176
|
try:
|
|
161
|
-
with open(self.metrics_file
|
|
177
|
+
with open(self.metrics_file) as f:
|
|
162
178
|
saved = json.load(f)
|
|
163
179
|
self.metrics.update(saved)
|
|
164
180
|
except Exception as e:
|
|
165
181
|
logger.warning(f"Could not load metrics: {e}")
|
|
166
|
-
|
|
182
|
+
|
|
167
183
|
def save(self):
|
|
168
184
|
"""Persist metrics to file."""
|
|
169
185
|
try:
|
|
170
186
|
self.metrics_file.parent.mkdir(parents=True, exist_ok=True)
|
|
171
|
-
with self.lock:
|
|
172
|
-
|
|
173
|
-
json.dump(self.metrics, f, indent=2, default=str)
|
|
187
|
+
with self.lock, open(self.metrics_file, "w") as f:
|
|
188
|
+
json.dump(self.metrics, f, indent=2, default=str)
|
|
174
189
|
except Exception as e:
|
|
175
190
|
logger.error(f"Could not save metrics: {e}")
|
|
176
|
-
|
|
191
|
+
|
|
177
192
|
def update(self, **kwargs):
|
|
178
193
|
"""Update metrics atomically."""
|
|
179
194
|
with self.lock:
|
|
180
195
|
self.metrics.update(kwargs)
|
|
181
|
-
if self.metrics[
|
|
182
|
-
start = datetime.fromisoformat(str(self.metrics[
|
|
183
|
-
self.metrics[
|
|
196
|
+
if self.metrics["start_time"]:
|
|
197
|
+
start = datetime.fromisoformat(str(self.metrics["start_time"]))
|
|
198
|
+
self.metrics["uptime_seconds"] = int(
|
|
199
|
+
(datetime.now() - start).total_seconds()
|
|
200
|
+
)
|
|
184
201
|
self.save()
|
|
185
|
-
|
|
202
|
+
|
|
186
203
|
def increment(self, key: str, amount: int = 1):
|
|
187
204
|
"""Increment a counter metric."""
|
|
188
205
|
with self.lock:
|
|
@@ -192,29 +209,32 @@ class DaemonMetrics:
|
|
|
192
209
|
|
|
193
210
|
class ExponentialBackoff:
|
|
194
211
|
"""Implement exponential backoff with jitter for retry logic."""
|
|
195
|
-
|
|
196
|
-
def __init__(
|
|
212
|
+
|
|
213
|
+
def __init__(
|
|
214
|
+
self, initial_delay: float = 1.0, max_delay: float = 60.0, factor: float = 2.0
|
|
215
|
+
):
|
|
197
216
|
self.initial_delay = initial_delay
|
|
198
217
|
self.max_delay = max_delay
|
|
199
218
|
self.factor = factor
|
|
200
219
|
self.current_delay = initial_delay
|
|
201
220
|
self.attempt = 0
|
|
202
|
-
|
|
221
|
+
|
|
203
222
|
def next_delay(self) -> float:
|
|
204
223
|
"""Get the next delay with jitter."""
|
|
205
224
|
import random
|
|
225
|
+
|
|
206
226
|
self.attempt += 1
|
|
207
|
-
|
|
227
|
+
|
|
208
228
|
# Calculate exponential delay
|
|
209
|
-
delay = min(self.initial_delay * (self.factor
|
|
210
|
-
|
|
229
|
+
delay = min(self.initial_delay * (self.factor**self.attempt), self.max_delay)
|
|
230
|
+
|
|
211
231
|
# Add jitter (±25% randomization)
|
|
212
232
|
jitter = delay * 0.25 * (2 * random.random() - 1)
|
|
213
233
|
actual_delay = max(0.1, delay + jitter)
|
|
214
|
-
|
|
234
|
+
|
|
215
235
|
logger.debug(f"Backoff attempt {self.attempt}: {actual_delay:.2f}s")
|
|
216
236
|
return actual_delay
|
|
217
|
-
|
|
237
|
+
|
|
218
238
|
def reset(self):
|
|
219
239
|
"""Reset the backoff counter."""
|
|
220
240
|
self.attempt = 0
|
|
@@ -223,82 +243,92 @@ class ExponentialBackoff:
|
|
|
223
243
|
|
|
224
244
|
class HealthMonitor:
|
|
225
245
|
"""Monitor daemon health and trigger recovery if needed."""
|
|
226
|
-
|
|
246
|
+
|
|
227
247
|
def __init__(self, port: int, metrics: DaemonMetrics):
|
|
228
248
|
self.port = port
|
|
229
249
|
self.metrics = metrics
|
|
230
250
|
self.consecutive_failures = 0
|
|
231
251
|
self.running = False
|
|
232
252
|
self.thread = None
|
|
233
|
-
|
|
253
|
+
|
|
234
254
|
def start(self):
|
|
235
255
|
"""Start health monitoring in background thread."""
|
|
236
256
|
if self.running:
|
|
237
257
|
return
|
|
238
|
-
|
|
258
|
+
|
|
239
259
|
self.running = True
|
|
240
260
|
self.thread = threading.Thread(target=self._monitor_loop, daemon=True)
|
|
241
261
|
self.thread.start()
|
|
242
262
|
logger.info("Health monitor started")
|
|
243
|
-
|
|
263
|
+
|
|
244
264
|
def stop(self):
|
|
245
265
|
"""Stop health monitoring."""
|
|
246
266
|
self.running = False
|
|
247
267
|
if self.thread:
|
|
248
268
|
self.thread.join(timeout=5)
|
|
249
269
|
logger.info("Health monitor stopped")
|
|
250
|
-
|
|
270
|
+
|
|
251
271
|
def _monitor_loop(self):
|
|
252
272
|
"""Main health check loop."""
|
|
253
273
|
while self.running:
|
|
254
274
|
try:
|
|
255
275
|
time.sleep(Config.HEALTH_CHECK_INTERVAL)
|
|
256
|
-
|
|
276
|
+
|
|
257
277
|
if self._check_health():
|
|
258
278
|
self.consecutive_failures = 0
|
|
259
|
-
self.metrics.increment(
|
|
260
|
-
self.metrics.update(
|
|
279
|
+
self.metrics.increment("health_checks_passed")
|
|
280
|
+
self.metrics.update(
|
|
281
|
+
last_health_check=datetime.now(), status="healthy"
|
|
282
|
+
)
|
|
261
283
|
else:
|
|
262
284
|
self.consecutive_failures += 1
|
|
263
|
-
self.metrics.increment(
|
|
264
|
-
self.metrics.update(
|
|
265
|
-
|
|
285
|
+
self.metrics.increment("health_checks_failed")
|
|
286
|
+
self.metrics.update(
|
|
287
|
+
last_health_check=datetime.now(), status="unhealthy"
|
|
288
|
+
)
|
|
289
|
+
|
|
266
290
|
if self.consecutive_failures >= Config.UNHEALTHY_THRESHOLD:
|
|
267
|
-
logger.error(
|
|
291
|
+
logger.error(
|
|
292
|
+
f"Health check failed {self.consecutive_failures} times - daemon unhealthy"
|
|
293
|
+
)
|
|
268
294
|
# Supervisor will handle restart
|
|
269
|
-
|
|
295
|
+
|
|
270
296
|
except Exception as e:
|
|
271
297
|
logger.error(f"Health monitor error: {e}")
|
|
272
|
-
|
|
298
|
+
|
|
273
299
|
def _check_health(self) -> bool:
|
|
274
300
|
"""Perform health check on the daemon."""
|
|
275
301
|
try:
|
|
276
302
|
import socket
|
|
277
|
-
|
|
278
|
-
|
|
303
|
+
|
|
279
304
|
# Try to connect to the socket
|
|
280
305
|
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
|
281
306
|
sock.settimeout(Config.HEALTH_CHECK_TIMEOUT)
|
|
282
|
-
result = sock.connect_ex((
|
|
307
|
+
result = sock.connect_ex(("localhost", self.port))
|
|
283
308
|
sock.close()
|
|
284
|
-
|
|
309
|
+
|
|
285
310
|
if result != 0:
|
|
286
|
-
logger.warning(
|
|
311
|
+
logger.warning(
|
|
312
|
+
f"Health check failed: cannot connect to port {self.port}"
|
|
313
|
+
)
|
|
287
314
|
return False
|
|
288
|
-
|
|
315
|
+
|
|
289
316
|
# Try to make an HTTP health request if possible
|
|
290
317
|
try:
|
|
291
318
|
import urllib.request
|
|
292
|
-
|
|
293
|
-
|
|
319
|
+
|
|
320
|
+
url = f"http://localhost:{self.port}/health"
|
|
321
|
+
with urllib.request.urlopen(
|
|
322
|
+
url, timeout=Config.HEALTH_CHECK_TIMEOUT
|
|
323
|
+
) as response:
|
|
294
324
|
if response.status == 200:
|
|
295
325
|
return True
|
|
296
326
|
except:
|
|
297
327
|
# Fall back to simple port check
|
|
298
328
|
pass
|
|
299
|
-
|
|
329
|
+
|
|
300
330
|
return True
|
|
301
|
-
|
|
331
|
+
|
|
302
332
|
except Exception as e:
|
|
303
333
|
logger.error(f"Health check error: {e}")
|
|
304
334
|
return False
|
|
@@ -306,92 +336,95 @@ class HealthMonitor:
|
|
|
306
336
|
|
|
307
337
|
class DaemonSupervisor:
|
|
308
338
|
"""Supervise the daemon process and handle automatic recovery."""
|
|
309
|
-
|
|
339
|
+
|
|
310
340
|
def __init__(self):
|
|
311
341
|
self.deployment_root = get_project_root()
|
|
312
342
|
self.pid_file = self.deployment_root / ".claude-mpm" / "socketio-server.pid"
|
|
313
343
|
self.log_file = self.deployment_root / ".claude-mpm" / "socketio-server.log"
|
|
314
344
|
self.lock_file = self.deployment_root / ".claude-mpm" / "socketio-server.lock"
|
|
315
|
-
self.supervisor_pid_file =
|
|
316
|
-
|
|
345
|
+
self.supervisor_pid_file = (
|
|
346
|
+
self.deployment_root / ".claude-mpm" / "socketio-supervisor.pid"
|
|
347
|
+
)
|
|
348
|
+
|
|
317
349
|
# Metrics tracking
|
|
318
350
|
metrics_file = self.deployment_root / ".claude-mpm" / Config.METRICS_FILE
|
|
319
351
|
self.metrics = DaemonMetrics(metrics_file)
|
|
320
|
-
|
|
352
|
+
|
|
321
353
|
# Recovery state
|
|
322
354
|
self.backoff = ExponentialBackoff(
|
|
323
|
-
Config.INITIAL_RETRY_DELAY,
|
|
324
|
-
Config.MAX_RETRY_DELAY,
|
|
325
|
-
Config.BACKOFF_FACTOR
|
|
355
|
+
Config.INITIAL_RETRY_DELAY, Config.MAX_RETRY_DELAY, Config.BACKOFF_FACTOR
|
|
326
356
|
)
|
|
327
|
-
|
|
357
|
+
|
|
328
358
|
self.port_manager = PortManager()
|
|
329
359
|
self.server_process = None
|
|
330
360
|
self.selected_port = None
|
|
331
361
|
self.health_monitor = None
|
|
332
362
|
self.shutdown_requested = False
|
|
333
|
-
|
|
363
|
+
|
|
334
364
|
def ensure_dirs(self):
|
|
335
365
|
"""Ensure required directories exist."""
|
|
336
366
|
self.pid_file.parent.mkdir(parents=True, exist_ok=True)
|
|
337
|
-
|
|
367
|
+
|
|
338
368
|
def acquire_lock(self) -> bool:
|
|
339
369
|
"""Acquire exclusive lock to prevent multiple instances."""
|
|
340
370
|
try:
|
|
341
371
|
self.ensure_dirs()
|
|
342
|
-
|
|
372
|
+
|
|
343
373
|
# Check for existing lock
|
|
344
374
|
if self.lock_file.exists():
|
|
345
375
|
try:
|
|
346
|
-
with open(self.lock_file
|
|
376
|
+
with open(self.lock_file) as f:
|
|
347
377
|
old_pid = int(f.read().strip())
|
|
348
|
-
|
|
378
|
+
|
|
349
379
|
# Check if old process is still running
|
|
350
380
|
if psutil.pid_exists(old_pid):
|
|
351
381
|
process = psutil.Process(old_pid)
|
|
352
382
|
if process.is_running():
|
|
353
|
-
logger.warning(
|
|
383
|
+
logger.warning(
|
|
384
|
+
f"Another supervisor is running (PID: {old_pid})"
|
|
385
|
+
)
|
|
354
386
|
return False
|
|
355
387
|
except:
|
|
356
388
|
pass
|
|
357
|
-
|
|
389
|
+
|
|
358
390
|
# Clean up stale lock
|
|
359
391
|
self.lock_file.unlink(missing_ok=True)
|
|
360
|
-
|
|
392
|
+
|
|
361
393
|
# Create new lock
|
|
362
|
-
with open(self.lock_file,
|
|
394
|
+
with open(self.lock_file, "w") as f:
|
|
363
395
|
f.write(str(os.getpid()))
|
|
364
|
-
|
|
396
|
+
|
|
365
397
|
return True
|
|
366
|
-
|
|
398
|
+
|
|
367
399
|
except Exception as e:
|
|
368
400
|
logger.error(f"Could not acquire lock: {e}")
|
|
369
401
|
return False
|
|
370
|
-
|
|
402
|
+
|
|
371
403
|
def release_lock(self):
|
|
372
404
|
"""Release the exclusive lock."""
|
|
373
405
|
self.lock_file.unlink(missing_ok=True)
|
|
374
|
-
|
|
406
|
+
|
|
375
407
|
def find_available_port(self) -> Optional[int]:
|
|
376
408
|
"""Find an available port for the server."""
|
|
377
409
|
self.port_manager.cleanup_dead_instances()
|
|
378
410
|
port = self.port_manager.find_available_port()
|
|
379
|
-
|
|
411
|
+
|
|
380
412
|
if not port:
|
|
381
413
|
# Try extended range if configured
|
|
382
414
|
for p in range(Config.PORT_RANGE_START, Config.PORT_RANGE_END + 1):
|
|
383
415
|
import socket
|
|
416
|
+
|
|
384
417
|
try:
|
|
385
418
|
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
|
386
|
-
result = sock.connect_ex((
|
|
419
|
+
result = sock.connect_ex(("localhost", p))
|
|
387
420
|
sock.close()
|
|
388
421
|
if result != 0:
|
|
389
422
|
return p
|
|
390
423
|
except:
|
|
391
424
|
pass
|
|
392
|
-
|
|
425
|
+
|
|
393
426
|
return port
|
|
394
|
-
|
|
427
|
+
|
|
395
428
|
def start_server_process(self) -> bool:
|
|
396
429
|
"""Start the actual Socket.IO server process."""
|
|
397
430
|
try:
|
|
@@ -400,95 +433,92 @@ class DaemonSupervisor:
|
|
|
400
433
|
if not self.selected_port:
|
|
401
434
|
logger.error("No available ports")
|
|
402
435
|
return False
|
|
403
|
-
|
|
436
|
+
|
|
404
437
|
logger.info(f"Starting server on port {self.selected_port}")
|
|
405
|
-
|
|
438
|
+
|
|
406
439
|
# Fork to create daemon process
|
|
407
440
|
pid = os.fork()
|
|
408
441
|
if pid > 0:
|
|
409
442
|
# Parent process - supervisor
|
|
410
443
|
self.server_process = pid
|
|
411
|
-
|
|
444
|
+
|
|
412
445
|
# Save PID files
|
|
413
|
-
with open(self.pid_file,
|
|
446
|
+
with open(self.pid_file, "w") as f:
|
|
414
447
|
f.write(str(pid))
|
|
415
|
-
|
|
416
|
-
with open(self.supervisor_pid_file,
|
|
448
|
+
|
|
449
|
+
with open(self.supervisor_pid_file, "w") as f:
|
|
417
450
|
f.write(str(os.getpid()))
|
|
418
|
-
|
|
451
|
+
|
|
419
452
|
# Save port info
|
|
420
453
|
port_file = self.pid_file.parent / "socketio-port"
|
|
421
|
-
with open(port_file,
|
|
454
|
+
with open(port_file, "w") as f:
|
|
422
455
|
f.write(str(self.selected_port))
|
|
423
|
-
|
|
456
|
+
|
|
424
457
|
# Register with port manager
|
|
425
458
|
self.port_manager.register_instance(self.selected_port, pid)
|
|
426
|
-
|
|
459
|
+
|
|
427
460
|
# Wait for server to start
|
|
428
461
|
if self._wait_for_server_start():
|
|
429
462
|
logger.info(f"Server started successfully (PID: {pid})")
|
|
430
|
-
self.metrics.update(
|
|
431
|
-
start_time=datetime.now(),
|
|
432
|
-
status='running'
|
|
433
|
-
)
|
|
463
|
+
self.metrics.update(start_time=datetime.now(), status="running")
|
|
434
464
|
self.backoff.reset()
|
|
435
465
|
return True
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
self._run_server_process()
|
|
444
|
-
|
|
466
|
+
logger.error("Server failed to start within timeout")
|
|
467
|
+
self._cleanup_failed_server(pid)
|
|
468
|
+
return False
|
|
469
|
+
|
|
470
|
+
# Child process - actual server
|
|
471
|
+
self._run_server_process()
|
|
472
|
+
|
|
445
473
|
except Exception as e:
|
|
446
474
|
logger.error(f"Failed to start server: {e}")
|
|
447
475
|
logger.debug(traceback.format_exc())
|
|
448
476
|
return False
|
|
449
|
-
|
|
477
|
+
|
|
450
478
|
def _run_server_process(self):
|
|
451
479
|
"""Run the Socket.IO server in the child process."""
|
|
452
480
|
try:
|
|
453
481
|
# Become a proper daemon
|
|
454
482
|
os.setsid()
|
|
455
483
|
os.umask(0)
|
|
456
|
-
|
|
484
|
+
|
|
457
485
|
# Redirect output to log file
|
|
458
|
-
with open(self.log_file,
|
|
486
|
+
with open(self.log_file, "a") as log:
|
|
459
487
|
os.dup2(log.fileno(), sys.stdout.fileno())
|
|
460
488
|
os.dup2(log.fileno(), sys.stderr.fileno())
|
|
461
|
-
|
|
489
|
+
|
|
462
490
|
# Log startup info
|
|
463
|
-
print(
|
|
491
|
+
print(
|
|
492
|
+
f"[{datetime.now()}] Starting Socket.IO server on port {self.selected_port}"
|
|
493
|
+
)
|
|
464
494
|
print(f"[{datetime.now()}] Python: {sys.executable}")
|
|
465
495
|
print(f"[{datetime.now()}] Version: {sys.version}")
|
|
466
|
-
|
|
496
|
+
|
|
467
497
|
# Create and start server with error handling
|
|
468
498
|
server = None
|
|
469
499
|
try:
|
|
470
500
|
server = SocketIOServer(host="localhost", port=self.selected_port)
|
|
471
|
-
|
|
501
|
+
|
|
472
502
|
# Setup signal handlers
|
|
473
503
|
def signal_handler(signum, frame):
|
|
474
|
-
print(
|
|
504
|
+
print(
|
|
505
|
+
f"[{datetime.now()}] Received signal {signum}, shutting down..."
|
|
506
|
+
)
|
|
475
507
|
if server:
|
|
476
|
-
|
|
508
|
+
with contextlib.suppress(Exception):
|
|
477
509
|
server.stop_sync()
|
|
478
|
-
except:
|
|
479
|
-
pass
|
|
480
510
|
sys.exit(0)
|
|
481
|
-
|
|
511
|
+
|
|
482
512
|
signal.signal(signal.SIGTERM, signal_handler)
|
|
483
513
|
signal.signal(signal.SIGINT, signal_handler)
|
|
484
|
-
|
|
514
|
+
|
|
485
515
|
# Start server
|
|
486
516
|
server.start_sync()
|
|
487
|
-
|
|
517
|
+
|
|
488
518
|
# Keep running
|
|
489
519
|
while True:
|
|
490
520
|
time.sleep(1)
|
|
491
|
-
|
|
521
|
+
|
|
492
522
|
except KeyboardInterrupt:
|
|
493
523
|
if server:
|
|
494
524
|
server.stop_sync()
|
|
@@ -497,36 +527,36 @@ class DaemonSupervisor:
|
|
|
497
527
|
print(f"[{datetime.now()}] Server error: {e}")
|
|
498
528
|
print(traceback.format_exc())
|
|
499
529
|
sys.exit(1)
|
|
500
|
-
|
|
530
|
+
|
|
501
531
|
except Exception as e:
|
|
502
532
|
print(f"[{datetime.now()}] Fatal error: {e}")
|
|
503
533
|
sys.exit(1)
|
|
504
|
-
|
|
534
|
+
|
|
505
535
|
def _wait_for_server_start(self) -> bool:
|
|
506
536
|
"""Wait for the server to become responsive."""
|
|
507
537
|
import socket
|
|
508
|
-
|
|
538
|
+
|
|
509
539
|
start_time = time.time()
|
|
510
540
|
while time.time() - start_time < Config.STARTUP_TIMEOUT:
|
|
511
541
|
# Check if process is still alive
|
|
512
542
|
if not self._is_process_alive(self.server_process):
|
|
513
543
|
return False
|
|
514
|
-
|
|
544
|
+
|
|
515
545
|
# Try to connect
|
|
516
546
|
try:
|
|
517
547
|
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
|
518
|
-
result = sock.connect_ex((
|
|
548
|
+
result = sock.connect_ex(("localhost", self.selected_port))
|
|
519
549
|
sock.close()
|
|
520
|
-
|
|
550
|
+
|
|
521
551
|
if result == 0:
|
|
522
552
|
return True
|
|
523
553
|
except:
|
|
524
554
|
pass
|
|
525
|
-
|
|
555
|
+
|
|
526
556
|
time.sleep(0.5)
|
|
527
|
-
|
|
557
|
+
|
|
528
558
|
return False
|
|
529
|
-
|
|
559
|
+
|
|
530
560
|
def _is_process_alive(self, pid: int) -> bool:
|
|
531
561
|
"""Check if a process is alive."""
|
|
532
562
|
try:
|
|
@@ -534,7 +564,7 @@ class DaemonSupervisor:
|
|
|
534
564
|
return process.is_running()
|
|
535
565
|
except (psutil.NoSuchProcess, psutil.AccessDenied):
|
|
536
566
|
return False
|
|
537
|
-
|
|
567
|
+
|
|
538
568
|
def _cleanup_failed_server(self, pid: int):
|
|
539
569
|
"""Clean up after a failed server start."""
|
|
540
570
|
try:
|
|
@@ -542,71 +572,80 @@ class DaemonSupervisor:
|
|
|
542
572
|
os.kill(pid, signal.SIGKILL)
|
|
543
573
|
except:
|
|
544
574
|
pass
|
|
545
|
-
|
|
575
|
+
|
|
546
576
|
self.pid_file.unlink(missing_ok=True)
|
|
547
|
-
|
|
577
|
+
|
|
548
578
|
if self.selected_port:
|
|
549
579
|
instances = self.port_manager.load_instances()
|
|
550
580
|
for instance_id, info in instances.items():
|
|
551
|
-
if info.get(
|
|
581
|
+
if info.get("pid") == pid:
|
|
552
582
|
self.port_manager.remove_instance(instance_id)
|
|
553
583
|
break
|
|
554
|
-
|
|
584
|
+
|
|
555
585
|
def monitor_and_restart(self):
|
|
556
586
|
"""Monitor the server and restart if it crashes."""
|
|
557
587
|
retry_count = 0
|
|
558
|
-
|
|
588
|
+
|
|
559
589
|
while retry_count < Config.MAX_RETRIES and not self.shutdown_requested:
|
|
560
590
|
try:
|
|
561
591
|
# Start the server
|
|
562
592
|
if self.start_server_process():
|
|
563
593
|
# Start health monitoring
|
|
564
594
|
if Config.METRICS_ENABLED and self.selected_port:
|
|
565
|
-
self.health_monitor = HealthMonitor(
|
|
595
|
+
self.health_monitor = HealthMonitor(
|
|
596
|
+
self.selected_port, self.metrics
|
|
597
|
+
)
|
|
566
598
|
self.health_monitor.start()
|
|
567
|
-
|
|
599
|
+
|
|
568
600
|
# Monitor the process
|
|
569
601
|
while not self.shutdown_requested:
|
|
570
602
|
time.sleep(5)
|
|
571
|
-
|
|
603
|
+
|
|
572
604
|
# Check if process is still alive
|
|
573
605
|
if not self._is_process_alive(self.server_process):
|
|
574
606
|
logger.error("Server process died unexpectedly")
|
|
575
|
-
self.metrics.increment(
|
|
607
|
+
self.metrics.increment("total_failures")
|
|
576
608
|
self.metrics.update(
|
|
577
|
-
last_failure=datetime.now(),
|
|
578
|
-
status='crashed'
|
|
609
|
+
last_failure=datetime.now(), status="crashed"
|
|
579
610
|
)
|
|
580
611
|
break
|
|
581
|
-
|
|
612
|
+
|
|
582
613
|
# Check health status
|
|
583
|
-
if
|
|
614
|
+
if (
|
|
615
|
+
self.health_monitor
|
|
616
|
+
and self.health_monitor.consecutive_failures
|
|
617
|
+
>= Config.UNHEALTHY_THRESHOLD
|
|
618
|
+
):
|
|
584
619
|
logger.error("Server is unhealthy, restarting...")
|
|
585
620
|
self._stop_server_process()
|
|
586
621
|
break
|
|
587
|
-
|
|
622
|
+
|
|
588
623
|
if self.shutdown_requested:
|
|
589
624
|
break
|
|
590
|
-
|
|
625
|
+
|
|
591
626
|
# Stop health monitor before restart
|
|
592
627
|
if self.health_monitor:
|
|
593
628
|
self.health_monitor.stop()
|
|
594
629
|
self.health_monitor = None
|
|
595
|
-
|
|
630
|
+
|
|
596
631
|
# Server crashed, apply backoff before restart
|
|
597
632
|
retry_count += 1
|
|
598
633
|
delay = self.backoff.next_delay()
|
|
599
|
-
logger.info(
|
|
634
|
+
logger.info(
|
|
635
|
+
f"Restarting in {delay:.1f}s (attempt {retry_count}/{Config.MAX_RETRIES})"
|
|
636
|
+
)
|
|
600
637
|
time.sleep(delay)
|
|
601
|
-
self.metrics.increment(
|
|
602
|
-
|
|
638
|
+
self.metrics.increment("restarts")
|
|
639
|
+
|
|
603
640
|
else:
|
|
604
641
|
# Failed to start
|
|
605
642
|
retry_count += 1
|
|
606
643
|
delay = self.backoff.next_delay()
|
|
607
|
-
logger.error(
|
|
644
|
+
logger.error(
|
|
645
|
+
f"Failed to start, retrying in {delay:.1f}s (attempt {retry_count}/{Config.MAX_RETRIES})"
|
|
646
|
+
)
|
|
608
647
|
time.sleep(delay)
|
|
609
|
-
|
|
648
|
+
|
|
610
649
|
except KeyboardInterrupt:
|
|
611
650
|
logger.info("Supervisor interrupted")
|
|
612
651
|
break
|
|
@@ -615,22 +654,22 @@ class DaemonSupervisor:
|
|
|
615
654
|
logger.debug(traceback.format_exc())
|
|
616
655
|
retry_count += 1
|
|
617
656
|
time.sleep(self.backoff.next_delay())
|
|
618
|
-
|
|
657
|
+
|
|
619
658
|
if retry_count >= Config.MAX_RETRIES:
|
|
620
659
|
logger.error(f"Max retries ({Config.MAX_RETRIES}) exceeded, giving up")
|
|
621
|
-
self.metrics.update(status=
|
|
622
|
-
|
|
660
|
+
self.metrics.update(status="failed")
|
|
661
|
+
|
|
623
662
|
self.cleanup()
|
|
624
|
-
|
|
663
|
+
|
|
625
664
|
def _stop_server_process(self):
|
|
626
665
|
"""Stop the server process gracefully."""
|
|
627
666
|
if not self.server_process:
|
|
628
667
|
return
|
|
629
|
-
|
|
668
|
+
|
|
630
669
|
try:
|
|
631
670
|
# Try graceful shutdown
|
|
632
671
|
os.kill(self.server_process, signal.SIGTERM)
|
|
633
|
-
|
|
672
|
+
|
|
634
673
|
# Wait for shutdown
|
|
635
674
|
start_time = time.time()
|
|
636
675
|
while time.time() - start_time < Config.SHUTDOWN_TIMEOUT:
|
|
@@ -638,45 +677,45 @@ class DaemonSupervisor:
|
|
|
638
677
|
logger.info("Server stopped gracefully")
|
|
639
678
|
return
|
|
640
679
|
time.sleep(0.5)
|
|
641
|
-
|
|
680
|
+
|
|
642
681
|
# Force kill if still running
|
|
643
682
|
logger.warning("Server didn't stop gracefully, forcing...")
|
|
644
683
|
os.kill(self.server_process, signal.SIGKILL)
|
|
645
684
|
time.sleep(Config.FORCE_KILL_TIMEOUT)
|
|
646
|
-
|
|
685
|
+
|
|
647
686
|
except Exception as e:
|
|
648
687
|
logger.error(f"Error stopping server: {e}")
|
|
649
|
-
|
|
688
|
+
|
|
650
689
|
def cleanup(self):
|
|
651
690
|
"""Clean up resources on shutdown."""
|
|
652
691
|
logger.info("Cleaning up supervisor resources")
|
|
653
|
-
|
|
692
|
+
|
|
654
693
|
# Stop health monitor
|
|
655
694
|
if self.health_monitor:
|
|
656
695
|
self.health_monitor.stop()
|
|
657
|
-
|
|
696
|
+
|
|
658
697
|
# Stop server process
|
|
659
698
|
if self.server_process:
|
|
660
699
|
self._stop_server_process()
|
|
661
|
-
|
|
700
|
+
|
|
662
701
|
# Clean up port registration
|
|
663
702
|
if self.selected_port:
|
|
664
703
|
instances = self.port_manager.load_instances()
|
|
665
704
|
for instance_id, info in instances.items():
|
|
666
|
-
if info.get(
|
|
705
|
+
if info.get("pid") == self.server_process:
|
|
667
706
|
self.port_manager.remove_instance(instance_id)
|
|
668
707
|
break
|
|
669
|
-
|
|
708
|
+
|
|
670
709
|
# Remove PID files
|
|
671
710
|
self.pid_file.unlink(missing_ok=True)
|
|
672
711
|
self.supervisor_pid_file.unlink(missing_ok=True)
|
|
673
|
-
|
|
712
|
+
|
|
674
713
|
# Update metrics
|
|
675
|
-
self.metrics.update(status=
|
|
676
|
-
|
|
714
|
+
self.metrics.update(status="stopped")
|
|
715
|
+
|
|
677
716
|
# Release lock
|
|
678
717
|
self.release_lock()
|
|
679
|
-
|
|
718
|
+
|
|
680
719
|
def handle_shutdown(self, signum, frame):
|
|
681
720
|
"""Handle shutdown signals."""
|
|
682
721
|
logger.info(f"Received signal {signum}, initiating shutdown...")
|
|
@@ -686,42 +725,42 @@ class DaemonSupervisor:
|
|
|
686
725
|
def start_daemon():
|
|
687
726
|
"""Start the hardened daemon with supervisor."""
|
|
688
727
|
supervisor = DaemonSupervisor()
|
|
689
|
-
|
|
728
|
+
|
|
690
729
|
# Check if already running
|
|
691
730
|
if supervisor.pid_file.exists():
|
|
692
731
|
try:
|
|
693
|
-
with open(supervisor.pid_file
|
|
732
|
+
with open(supervisor.pid_file) as f:
|
|
694
733
|
old_pid = int(f.read().strip())
|
|
695
|
-
|
|
734
|
+
|
|
696
735
|
if supervisor._is_process_alive(old_pid):
|
|
697
736
|
print(f"Socket.IO daemon is already running (PID: {old_pid})")
|
|
698
737
|
return
|
|
699
738
|
except:
|
|
700
739
|
pass
|
|
701
|
-
|
|
740
|
+
|
|
702
741
|
# Clean up stale PID file
|
|
703
742
|
supervisor.pid_file.unlink(missing_ok=True)
|
|
704
|
-
|
|
743
|
+
|
|
705
744
|
# Acquire lock
|
|
706
745
|
if not supervisor.acquire_lock():
|
|
707
746
|
print("Could not acquire lock - another instance may be running")
|
|
708
747
|
return
|
|
709
|
-
|
|
748
|
+
|
|
710
749
|
print("Starting hardened Socket.IO daemon with supervisor...")
|
|
711
750
|
print(f"Python: {PYTHON_EXECUTABLE}")
|
|
712
751
|
print(f"Max retries: {Config.MAX_RETRIES}")
|
|
713
752
|
print(f"Health checks: {'enabled' if Config.METRICS_ENABLED else 'disabled'}")
|
|
714
|
-
|
|
753
|
+
|
|
715
754
|
# Setup signal handlers
|
|
716
755
|
signal.signal(signal.SIGTERM, supervisor.handle_shutdown)
|
|
717
756
|
signal.signal(signal.SIGINT, supervisor.handle_shutdown)
|
|
718
|
-
|
|
757
|
+
|
|
719
758
|
try:
|
|
720
759
|
# Start monitoring and auto-restart loop
|
|
721
760
|
supervisor.monitor_and_restart()
|
|
722
761
|
finally:
|
|
723
762
|
supervisor.cleanup()
|
|
724
|
-
|
|
763
|
+
|
|
725
764
|
print("Socket.IO daemon stopped")
|
|
726
765
|
|
|
727
766
|
|
|
@@ -730,16 +769,16 @@ def stop_daemon():
|
|
|
730
769
|
deployment_root = get_project_root()
|
|
731
770
|
pid_file = deployment_root / ".claude-mpm" / "socketio-server.pid"
|
|
732
771
|
supervisor_pid_file = deployment_root / ".claude-mpm" / "socketio-supervisor.pid"
|
|
733
|
-
|
|
772
|
+
|
|
734
773
|
# Try to stop supervisor first
|
|
735
774
|
if supervisor_pid_file.exists():
|
|
736
775
|
try:
|
|
737
|
-
with open(supervisor_pid_file
|
|
776
|
+
with open(supervisor_pid_file) as f:
|
|
738
777
|
supervisor_pid = int(f.read().strip())
|
|
739
|
-
|
|
778
|
+
|
|
740
779
|
print(f"Stopping supervisor (PID: {supervisor_pid})...")
|
|
741
780
|
os.kill(supervisor_pid, signal.SIGTERM)
|
|
742
|
-
|
|
781
|
+
|
|
743
782
|
# Wait for supervisor to stop
|
|
744
783
|
for _ in range(20):
|
|
745
784
|
if not psutil.pid_exists(supervisor_pid):
|
|
@@ -747,31 +786,31 @@ def stop_daemon():
|
|
|
747
786
|
supervisor_pid_file.unlink(missing_ok=True)
|
|
748
787
|
return
|
|
749
788
|
time.sleep(0.5)
|
|
750
|
-
|
|
789
|
+
|
|
751
790
|
# Force kill if needed
|
|
752
791
|
print("Supervisor didn't stop gracefully, forcing...")
|
|
753
792
|
os.kill(supervisor_pid, signal.SIGKILL)
|
|
754
793
|
supervisor_pid_file.unlink(missing_ok=True)
|
|
755
|
-
|
|
794
|
+
|
|
756
795
|
except Exception as e:
|
|
757
796
|
print(f"Error stopping supervisor: {e}")
|
|
758
|
-
|
|
797
|
+
|
|
759
798
|
# Also try to stop server directly if supervisor failed
|
|
760
799
|
if pid_file.exists():
|
|
761
800
|
try:
|
|
762
|
-
with open(pid_file
|
|
801
|
+
with open(pid_file) as f:
|
|
763
802
|
server_pid = int(f.read().strip())
|
|
764
|
-
|
|
803
|
+
|
|
765
804
|
if psutil.pid_exists(server_pid):
|
|
766
805
|
print(f"Stopping server (PID: {server_pid})...")
|
|
767
806
|
os.kill(server_pid, signal.SIGTERM)
|
|
768
807
|
time.sleep(2)
|
|
769
|
-
|
|
808
|
+
|
|
770
809
|
if psutil.pid_exists(server_pid):
|
|
771
810
|
os.kill(server_pid, signal.SIGKILL)
|
|
772
|
-
|
|
811
|
+
|
|
773
812
|
pid_file.unlink(missing_ok=True)
|
|
774
|
-
|
|
813
|
+
|
|
775
814
|
except Exception as e:
|
|
776
815
|
print(f"Error stopping server: {e}")
|
|
777
816
|
|
|
@@ -782,16 +821,16 @@ def status_daemon():
|
|
|
782
821
|
pid_file = deployment_root / ".claude-mpm" / "socketio-server.pid"
|
|
783
822
|
supervisor_pid_file = deployment_root / ".claude-mpm" / "socketio-supervisor.pid"
|
|
784
823
|
metrics_file = deployment_root / ".claude-mpm" / Config.METRICS_FILE
|
|
785
|
-
|
|
824
|
+
|
|
786
825
|
print("Socket.IO Daemon Status")
|
|
787
826
|
print("=" * 50)
|
|
788
|
-
|
|
827
|
+
|
|
789
828
|
# Check supervisor
|
|
790
829
|
if supervisor_pid_file.exists():
|
|
791
830
|
try:
|
|
792
|
-
with open(supervisor_pid_file
|
|
831
|
+
with open(supervisor_pid_file) as f:
|
|
793
832
|
supervisor_pid = int(f.read().strip())
|
|
794
|
-
|
|
833
|
+
|
|
795
834
|
if psutil.pid_exists(supervisor_pid):
|
|
796
835
|
process = psutil.Process(supervisor_pid)
|
|
797
836
|
print(f"✅ Supervisor: RUNNING (PID: {supervisor_pid})")
|
|
@@ -803,32 +842,33 @@ def status_daemon():
|
|
|
803
842
|
print("❌ Supervisor: ERROR reading status")
|
|
804
843
|
else:
|
|
805
844
|
print("❌ Supervisor: NOT RUNNING")
|
|
806
|
-
|
|
845
|
+
|
|
807
846
|
# Check server
|
|
808
847
|
if pid_file.exists():
|
|
809
848
|
try:
|
|
810
|
-
with open(pid_file
|
|
849
|
+
with open(pid_file) as f:
|
|
811
850
|
server_pid = int(f.read().strip())
|
|
812
|
-
|
|
851
|
+
|
|
813
852
|
if psutil.pid_exists(server_pid):
|
|
814
853
|
process = psutil.Process(server_pid)
|
|
815
854
|
print(f"✅ Server: RUNNING (PID: {server_pid})")
|
|
816
855
|
print(f" Memory: {process.memory_info().rss / 1024 / 1024:.1f} MB")
|
|
817
856
|
print(f" CPU: {process.cpu_percent()}%")
|
|
818
|
-
|
|
857
|
+
|
|
819
858
|
# Check port
|
|
820
859
|
port_file = deployment_root / ".claude-mpm" / "socketio-port"
|
|
821
860
|
if port_file.exists():
|
|
822
|
-
with open(port_file
|
|
861
|
+
with open(port_file) as f:
|
|
823
862
|
port = int(f.read().strip())
|
|
824
863
|
print(f" Port: {port}")
|
|
825
|
-
|
|
864
|
+
|
|
826
865
|
# Test connection
|
|
827
866
|
import socket
|
|
867
|
+
|
|
828
868
|
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
|
829
|
-
result = sock.connect_ex((
|
|
869
|
+
result = sock.connect_ex(("localhost", port))
|
|
830
870
|
sock.close()
|
|
831
|
-
|
|
871
|
+
|
|
832
872
|
if result == 0:
|
|
833
873
|
print(f" ✅ Listening on port {port}")
|
|
834
874
|
else:
|
|
@@ -839,13 +879,13 @@ def status_daemon():
|
|
|
839
879
|
print("❌ Server: ERROR reading status")
|
|
840
880
|
else:
|
|
841
881
|
print("❌ Server: NOT RUNNING")
|
|
842
|
-
|
|
882
|
+
|
|
843
883
|
# Show metrics
|
|
844
884
|
if metrics_file.exists():
|
|
845
885
|
try:
|
|
846
|
-
with open(metrics_file
|
|
886
|
+
with open(metrics_file) as f:
|
|
847
887
|
metrics = json.load(f)
|
|
848
|
-
|
|
888
|
+
|
|
849
889
|
print("\n📊 Metrics:")
|
|
850
890
|
print(f" Status: {metrics.get('status', 'unknown')}")
|
|
851
891
|
print(f" Uptime: {metrics.get('uptime_seconds', 0)} seconds")
|
|
@@ -853,15 +893,15 @@ def status_daemon():
|
|
|
853
893
|
print(f" Failures: {metrics.get('total_failures', 0)}")
|
|
854
894
|
print(f" Health Checks Passed: {metrics.get('health_checks_passed', 0)}")
|
|
855
895
|
print(f" Health Checks Failed: {metrics.get('health_checks_failed', 0)}")
|
|
856
|
-
|
|
857
|
-
if metrics.get(
|
|
896
|
+
|
|
897
|
+
if metrics.get("last_failure"):
|
|
858
898
|
print(f" Last Failure: {metrics['last_failure']}")
|
|
859
|
-
if metrics.get(
|
|
899
|
+
if metrics.get("last_health_check"):
|
|
860
900
|
print(f" Last Health Check: {metrics['last_health_check']}")
|
|
861
|
-
|
|
901
|
+
|
|
862
902
|
except Exception as e:
|
|
863
903
|
print(f"\n❌ Could not read metrics: {e}")
|
|
864
|
-
|
|
904
|
+
|
|
865
905
|
print("\n🔧 Configuration:")
|
|
866
906
|
print(f" Max Retries: {Config.MAX_RETRIES}")
|
|
867
907
|
print(f" Health Check Interval: {Config.HEALTH_CHECK_INTERVAL}s")
|
|
@@ -874,9 +914,9 @@ def main():
|
|
|
874
914
|
if len(sys.argv) < 2:
|
|
875
915
|
print("Usage: socketio-daemon-hardened.py {start|stop|restart|status}")
|
|
876
916
|
sys.exit(1)
|
|
877
|
-
|
|
917
|
+
|
|
878
918
|
command = sys.argv[1]
|
|
879
|
-
|
|
919
|
+
|
|
880
920
|
if command == "start":
|
|
881
921
|
start_daemon()
|
|
882
922
|
elif command == "stop":
|
|
@@ -894,4 +934,4 @@ def main():
|
|
|
894
934
|
|
|
895
935
|
|
|
896
936
|
if __name__ == "__main__":
|
|
897
|
-
main()
|
|
937
|
+
main()
|