@agentunion/kite 1.3.2 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +302 -0
- package/cli.js +119 -4
- package/core/dependency_checker.py +250 -0
- package/core/env_checker.py +490 -0
- package/dependencies_lock.json +128 -0
- package/extensions/agents/assistant/entry.py +111 -1
- package/extensions/agents/assistant/server.py +279 -215
- package/extensions/channels/acp_channel/entry.py +111 -1
- package/extensions/channels/acp_channel/module.md +23 -22
- package/extensions/channels/acp_channel/server.py +279 -215
- package/extensions/event_hub_bench/entry.py +107 -1
- package/extensions/services/backup/entry.py +306 -21
- package/extensions/services/backup/module.md +24 -22
- package/extensions/services/evol/auth_manager.py +443 -0
- package/extensions/services/evol/config.yaml +149 -0
- package/extensions/services/evol/config_loader.py +117 -0
- package/extensions/services/evol/entry.py +406 -0
- package/extensions/services/evol/evol_api.py +173 -0
- package/extensions/services/evol/evol_config.json5 +29 -0
- package/extensions/services/evol/migrate_tokens.py +122 -0
- package/extensions/services/evol/module.md +32 -0
- package/extensions/services/evol/pairing.py +250 -0
- package/extensions/services/evol/pairing_codes.jsonl +1 -0
- package/extensions/services/evol/relay.py +682 -0
- package/extensions/services/evol/relay_config.json5 +67 -0
- package/extensions/services/evol/routes/__init__.py +1 -0
- package/extensions/services/evol/routes/routes_management_ws.py +127 -0
- package/extensions/services/evol/routes/routes_rpc.py +89 -0
- package/extensions/services/evol/routes/routes_test.py +61 -0
- package/extensions/services/evol/server.py +875 -0
- package/extensions/services/evol/static/css/style.css +1200 -0
- package/extensions/services/evol/static/index.html +781 -0
- package/extensions/services/evol/static/index_evol.html +14 -0
- package/extensions/services/evol/static/js/app.js +6304 -0
- package/extensions/services/evol/static/js/auth.js +326 -0
- package/extensions/services/evol/static/js/dialog.js +285 -0
- package/extensions/services/evol/static/js/evol-app-fixed.js +50 -0
- package/extensions/services/evol/static/js/evol-app.js +1949 -0
- package/extensions/services/evol/static/js/evol-app.js.bak +1800 -0
- package/extensions/services/evol/static/js/kernel-client-example.js +228 -0
- package/extensions/services/evol/static/js/kernel-client.js +396 -0
- package/extensions/services/evol/static/js/main.js +141 -0
- package/extensions/services/evol/static/js/registry-tests.js +585 -0
- package/extensions/services/evol/static/js/stats.js +217 -0
- package/extensions/services/evol/static/js/token-manager.js +175 -0
- package/extensions/services/evol/static/pairing.html +248 -0
- package/extensions/services/evol/static/test_registry.html +262 -0
- package/extensions/services/evol/static/test_relay.html +462 -0
- package/extensions/services/evol/stats_manager.py +240 -0
- package/extensions/services/model_service/entry.py +167 -19
- package/extensions/services/model_service/module.md +21 -22
- package/extensions/services/proxy/.claude/settings.local.json +13 -0
- package/extensions/services/proxy/CHANGELOG_20260308.md +258 -0
- package/extensions/services/proxy/_fix_prints.py +133 -0
- package/extensions/services/proxy/_fix_prints2.py +87 -0
- package/extensions/services/proxy/agentcp/LICENCE +178 -0
- package/extensions/services/proxy/agentcp/README copy.md +85 -0
- package/extensions/services/proxy/agentcp/README.md +260 -0
- package/extensions/services/proxy/agentcp/__init__.py +16 -0
- package/extensions/services/proxy/agentcp/agent.py +4 -0
- package/extensions/services/proxy/agentcp/agentcp.py +2494 -0
- package/extensions/services/proxy/agentcp/agentprofile.json +89 -0
- package/extensions/services/proxy/agentcp/ap/__init__.py +16 -0
- package/extensions/services/proxy/agentcp/ap/ap_client.py +316 -0
- package/extensions/services/proxy/agentcp/assets/images/wechat_qr.png +0 -0
- package/extensions/services/proxy/agentcp/backup/metrics.json +31 -0
- package/extensions/services/proxy/agentcp/base/__init__.py +20 -0
- package/extensions/services/proxy/agentcp/base/auth_client.py +257 -0
- package/extensions/services/proxy/agentcp/base/client.py +112 -0
- package/extensions/services/proxy/agentcp/base/env.py +34 -0
- package/extensions/services/proxy/agentcp/base/html_util.py +336 -0
- package/extensions/services/proxy/agentcp/base/log.py +98 -0
- package/extensions/services/proxy/agentcp/ca/__init__.py +17 -0
- package/extensions/services/proxy/agentcp/ca/ca_client.py +414 -0
- package/extensions/services/proxy/agentcp/ca/ca_root.py +74 -0
- package/extensions/services/proxy/agentcp/context/__init__.py +20 -0
- package/extensions/services/proxy/agentcp/context/context.py +73 -0
- package/extensions/services/proxy/agentcp/context/exceptions.py +114 -0
- package/extensions/services/proxy/agentcp/create_profile.py +125 -0
- package/extensions/services/proxy/agentcp/create_profile_weather.py +125 -0
- package/extensions/services/proxy/agentcp/db/__init__.py +15 -0
- package/extensions/services/proxy/agentcp/db/db_mananger.py +550 -0
- package/extensions/services/proxy/agentcp/docs/UDP_HEARTBEAT_FIX_REPORT.md +265 -0
- package/extensions/services/proxy/agentcp/docs/heartbeat_issue_analysis.md +291 -0
- package/extensions/services/proxy/agentcp/file/__init__.py +16 -0
- package/extensions/services/proxy/agentcp/file/file_client.py +141 -0
- package/extensions/services/proxy/agentcp/file/wss_binary_message.py +137 -0
- package/extensions/services/proxy/agentcp/hcp.py +299 -0
- package/extensions/services/proxy/agentcp/heartbeat/__init__.py +16 -0
- package/extensions/services/proxy/agentcp/heartbeat/heartbeat_client.py +360 -0
- package/extensions/services/proxy/agentcp/improved_scheduler.py +498 -0
- package/extensions/services/proxy/agentcp/llm_agent_utils.py +249 -0
- package/extensions/services/proxy/agentcp/llm_server.py +172 -0
- package/extensions/services/proxy/agentcp/mermaid.py +210 -0
- package/extensions/services/proxy/agentcp/message.py +149 -0
- package/extensions/services/proxy/agentcp/metrics.py +256 -0
- package/extensions/services/proxy/agentcp/monitoring/__init__.py +20 -0
- package/extensions/services/proxy/agentcp/monitoring/global_monitor.py +27 -0
- package/extensions/services/proxy/agentcp/monitoring/metrics_store.py +325 -0
- package/extensions/services/proxy/agentcp/monitoring/monitoring_service.py +269 -0
- package/extensions/services/proxy/agentcp/monitoring/sliding_window.py +222 -0
- package/extensions/services/proxy/agentcp/monitoring/standalone_reader.py +224 -0
- package/extensions/services/proxy/agentcp/msg/__init__.py +21 -0
- package/extensions/services/proxy/agentcp/msg/connection_manager.py +456 -0
- package/extensions/services/proxy/agentcp/msg/message_client.py +2058 -0
- package/extensions/services/proxy/agentcp/msg/message_serialize.py +263 -0
- package/extensions/services/proxy/agentcp/msg/open_ai_message.py +88 -0
- package/extensions/services/proxy/agentcp/msg/session_manager.py +1062 -0
- package/extensions/services/proxy/agentcp/msg/stream_client.py +267 -0
- package/extensions/services/proxy/agentcp/msg/websocket_file_receiver.py +89 -0
- package/extensions/services/proxy/agentcp/msg/ws_logger.py +685 -0
- package/extensions/services/proxy/agentcp/msg/wss_binary_message.py +137 -0
- package/extensions/services/proxy/agentcp/requirements.txt +7 -0
- package/extensions/services/proxy/agentcp/samples/agent_graph/README.md +37 -0
- package/extensions/services/proxy/agentcp/samples/agent_graph/agentprofile.json +89 -0
- package/extensions/services/proxy/agentcp/samples/agent_graph/create_profile.py +138 -0
- package/extensions/services/proxy/agentcp/samples/agent_graph/main.py +164 -0
- package/extensions/services/proxy/agentcp/samples/agent_use/create_profile.py +123 -0
- package/extensions/services/proxy/agentcp/samples/agent_use/llm/create_profile.py +129 -0
- package/extensions/services/proxy/agentcp/samples/agent_use/llm/env.json +5 -0
- package/extensions/services/proxy/agentcp/samples/agent_use/llm/main.py +146 -0
- package/extensions/services/proxy/agentcp/samples/agent_use/main.py +123 -0
- package/extensions/services/proxy/agentcp/samples/agent_use/readme.md +379 -0
- package/extensions/services/proxy/agentcp/samples/agent_use/search/create_profile.py +129 -0
- package/extensions/services/proxy/agentcp/samples/agent_use/search/main.py +28 -0
- package/extensions/services/proxy/agentcp/samples/agent_use/tool/create_profile.py +129 -0
- package/extensions/services/proxy/agentcp/samples/agent_use/tool/main.py +20 -0
- package/extensions/services/proxy/agentcp/samples/ali_amap/README.md +97 -0
- package/extensions/services/proxy/agentcp/samples/ali_amap/amap_agent.py +88 -0
- package/extensions/services/proxy/agentcp/samples/ali_amap/create_profile.py +125 -0
- package/extensions/services/proxy/agentcp/samples/compute_agent/agent/powershell.py +228 -0
- package/extensions/services/proxy/agentcp/samples/compute_agent/agent/software.py +63 -0
- package/extensions/services/proxy/agentcp/samples/compute_agent/agent/tools.py +36 -0
- package/extensions/services/proxy/agentcp/samples/compute_agent/browser_user.py +41 -0
- package/extensions/services/proxy/agentcp/samples/deepseek/README.md +79 -0
- package/extensions/services/proxy/agentcp/samples/deepseek/create_profile.py +126 -0
- package/extensions/services/proxy/agentcp/samples/deepseek/deepseek.py +42 -0
- package/extensions/services/proxy/agentcp/samples/dify_chat/README.md +78 -0
- package/extensions/services/proxy/agentcp/samples/dify_chat/create_profile.py +126 -0
- package/extensions/services/proxy/agentcp/samples/dify_chat/dify_chat.py +47 -0
- package/extensions/services/proxy/agentcp/samples/dify_workflow/README.md +78 -0
- package/extensions/services/proxy/agentcp/samples/dify_workflow/create_profile.py +126 -0
- package/extensions/services/proxy/agentcp/samples/dify_workflow/dify_workflow.py +46 -0
- package/extensions/services/proxy/agentcp/samples/executor/README.md +44 -0
- package/extensions/services/proxy/agentcp/samples/executor/agentprofile.json +89 -0
- package/extensions/services/proxy/agentcp/samples/executor/create_profile.py +139 -0
- package/extensions/services/proxy/agentcp/samples/executor/main.py +160 -0
- package/extensions/services/proxy/agentcp/samples/filereader/README.md +45 -0
- package/extensions/services/proxy/agentcp/samples/filereader/agentprofile.json +90 -0
- package/extensions/services/proxy/agentcp/samples/filereader/create_profile.py +137 -0
- package/extensions/services/proxy/agentcp/samples/filereader/main.py +253 -0
- package/extensions/services/proxy/agentcp/samples/filewriter/README.md +38 -0
- package/extensions/services/proxy/agentcp/samples/filewriter/agentprofile.json +91 -0
- package/extensions/services/proxy/agentcp/samples/filewriter/create_profile.py +138 -0
- package/extensions/services/proxy/agentcp/samples/filewriter/main.py +289 -0
- package/extensions/services/proxy/agentcp/samples/hcp/README.md +85 -0
- package/extensions/services/proxy/agentcp/samples/hcp/acp_weather_agent.zip +0 -0
- package/extensions/services/proxy/agentcp/samples/hcp/create_profile.py +125 -0
- package/extensions/services/proxy/agentcp/samples/hcp/hcp.py +237 -0
- package/extensions/services/proxy/agentcp/samples/helloworld/README.md +68 -0
- package/extensions/services/proxy/agentcp/samples/helloworld/hello_world.py +40 -0
- package/extensions/services/proxy/agentcp/samples/llm_agent/MEADME.md +117 -0
- package/extensions/services/proxy/agentcp/samples/llm_agent/create_profile.py +125 -0
- package/extensions/services/proxy/agentcp/samples/llm_agent/qwen_agent.py +136 -0
- package/extensions/services/proxy/agentcp/samples/local_llm_agent/README.md +90 -0
- package/extensions/services/proxy/agentcp/samples/local_llm_agent/create_profile.py +125 -0
- package/extensions/services/proxy/agentcp/samples/local_llm_agent/main.py +49 -0
- package/extensions/services/proxy/agentcp/samples/query_llm_from_agent/README.md +55 -0
- package/extensions/services/proxy/agentcp/samples/query_llm_from_agent/create_profile.py +125 -0
- package/extensions/services/proxy/agentcp/samples/query_llm_from_agent/main.py +23 -0
- package/extensions/services/proxy/agentcp/samples/query_weather_api_agent/README.md +103 -0
- package/extensions/services/proxy/agentcp/samples/query_weather_api_agent/create_profile.py +125 -0
- package/extensions/services/proxy/agentcp/samples/query_weather_api_agent/main.py +69 -0
- package/extensions/services/proxy/agentcp/samples/query_weather_from_agent/README.md +58 -0
- package/extensions/services/proxy/agentcp/samples/query_weather_from_agent/create_profile.py +125 -0
- package/extensions/services/proxy/agentcp/samples/query_weather_from_agent/main.py +25 -0
- package/extensions/services/proxy/agentcp/samples/qwen3/README.md +71 -0
- package/extensions/services/proxy/agentcp/samples/qwen3/create_profile.py +126 -0
- package/extensions/services/proxy/agentcp/samples/qwen3/qwen3.py +37 -0
- package/extensions/services/proxy/agentcp/samples/qwen3_tools/README.md +133 -0
- package/extensions/services/proxy/agentcp/samples/qwen3_tools/create_profile.py +126 -0
- package/extensions/services/proxy/agentcp/samples/qwen3_tools/qwen3_tools.py +98 -0
- package/extensions/services/proxy/agentcp/samples/search/create_profile_qwen.py +125 -0
- package/extensions/services/proxy/agentcp/samples/search/create_profile_search.py +125 -0
- package/extensions/services/proxy/agentcp/samples/search/qwen_agent.py +136 -0
- package/extensions/services/proxy/agentcp/samples/search/search_agent.py +170 -0
- package/extensions/services/proxy/agentcp/samples/wrapper_agently_to_agent/README.md +89 -0
- package/extensions/services/proxy/agentcp/samples/wrapper_agently_to_agent/create_profile.py +125 -0
- package/extensions/services/proxy/agentcp/samples/wrapper_agently_to_agent/main.py +44 -0
- package/extensions/services/proxy/agentcp/utils/__init__.py +15 -0
- package/extensions/services/proxy/agentcp/utils/file_util.py +117 -0
- package/extensions/services/proxy/agentcp/utils/proxy_bypass.py +99 -0
- package/extensions/services/proxy/agentcp/workflow.py +203 -0
- package/extensions/services/proxy/console_auth.py +109 -0
- package/extensions/services/proxy/evol/__init__.py +1 -0
- package/extensions/services/proxy/evol/config.py +37 -0
- package/extensions/services/proxy/evol/http/__init__.py +1 -0
- package/extensions/services/proxy/evol/http/async_http.py +551 -0
- package/extensions/services/proxy/evol/log.py +28 -0
- package/extensions/services/proxy/evol/presenter/__init__.py +2 -0
- package/extensions/services/proxy/evol/presenter/agentIdPresenter.py +1031 -0
- package/extensions/services/proxy/evol/presenter/apikeyPresenter.py +106 -0
- package/extensions/services/proxy/evol/presenter/configPresenter.py +1281 -0
- package/extensions/services/proxy/evol/presenter/userPresenter.py +477 -0
- package/extensions/services/proxy/evol/server/__init__.py +1 -0
- package/extensions/services/proxy/evol/server/claude_proxy_async.py +3430 -0
- package/extensions/services/proxy/evol/server/openclaw_proxy.py +1861 -0
- package/extensions/services/proxy/evol/server/proxy_config.py +15 -0
- package/extensions/services/proxy/evol/server/proxy_engine.py +501 -0
- package/extensions/services/proxy/evol/version.py +24 -0
- package/extensions/services/proxy/logs/websocket.log +260 -0
- package/extensions/services/proxy/main.py +240 -0
- package/extensions/services/proxy/requirements.txt +13 -0
- package/extensions/services/proxy/server.py +271 -0
- package/extensions/services/watchdog/entry.py +215 -26
- package/extensions/services/watchdog/module.md +1 -0
- package/extensions/services/watchdog/monitor.py +178 -38
- package/extensions/services/web/WEBSOCKET_STATUS.md +143 -0
- package/extensions/services/web/config_example.py +35 -0
- package/extensions/services/web/config_loader.py +110 -0
- package/extensions/services/web/entry.py +114 -26
- package/extensions/services/web/module.md +35 -24
- package/extensions/services/web/pairing.py +250 -0
- package/extensions/services/web/pairing_codes.jsonl +16 -0
- package/extensions/services/web/relay.py +643 -0
- package/extensions/services/web/relay_config.json5 +67 -0
- package/extensions/services/web/routes/routes_management_ws.py +127 -0
- package/extensions/services/web/routes/routes_rpc.py +89 -0
- package/extensions/services/web/routes/routes_test.py +61 -0
- package/extensions/services/web/routes/schemas.py +0 -22
- package/extensions/services/web/server.py +434 -99
- package/extensions/services/web/static/css/style.css +67 -28
- package/extensions/services/web/static/index.html +234 -44
- package/extensions/services/web/static/js/app.js +1335 -48
- package/extensions/services/web/static/js/kernel-client-example.js +161 -0
- package/extensions/services/web/static/js/kernel-client.js +383 -0
- package/extensions/services/web/static/js/registry-tests.js +558 -0
- package/extensions/services/web/static/js/token-manager.js +175 -0
- package/extensions/services/web/static/pairing.html +248 -0
- package/extensions/services/web/static/test_registry.html +262 -0
- package/extensions/services/web/web_config.json5 +29 -0
- package/kernel/entry.py +120 -32
- package/kernel/event_hub.py +141 -16
- package/kernel/module.md +60 -33
- package/kernel/registry_store.py +45 -36
- package/kernel/rpc_router.py +152 -59
- package/kernel/server.py +322 -26
- package/kite_cli/__init__.py +3 -0
- package/kite_cli/__main__.py +5 -0
- package/kite_cli/commands/__init__.py +1 -0
- package/kite_cli/commands/clean.py +101 -0
- package/kite_cli/commands/deps_install.py +67 -0
- package/kite_cli/commands/doctor.py +35 -0
- package/kite_cli/commands/env_check.py +45 -0
- package/kite_cli/commands/history.py +111 -0
- package/kite_cli/commands/info.py +96 -0
- package/kite_cli/commands/install.py +313 -0
- package/kite_cli/commands/list.py +143 -0
- package/kite_cli/commands/log.py +81 -0
- package/kite_cli/commands/prepare.py +49 -0
- package/kite_cli/commands/rollback.py +88 -0
- package/kite_cli/commands/search.py +73 -0
- package/kite_cli/commands/uninstall.py +85 -0
- package/kite_cli/commands/update.py +118 -0
- package/kite_cli/commands/venv_setup.py +56 -0
- package/kite_cli/core/__init__.py +1 -0
- package/kite_cli/core/checker.py +142 -0
- package/kite_cli/core/dependency.py +229 -0
- package/kite_cli/core/downloader.py +209 -0
- package/kite_cli/core/install_info.py +40 -0
- package/kite_cli/core/tool_installer.py +397 -0
- package/kite_cli/core/validator.py +78 -0
- package/kite_cli/main.py +317 -0
- package/kite_cli/utils/__init__.py +1 -0
- package/kite_cli/utils/i18n.py +252 -0
- package/kite_cli/utils/interactive.py +63 -0
- package/kite_cli/utils/operation_log.py +77 -0
- package/kite_cli/utils/paths.py +34 -0
- package/kite_cli/utils/version.py +308 -0
- package/launcher/entry.py +1124 -178
- package/launcher/logging_setup.py +104 -0
- package/launcher/module.md +46 -37
- package/launcher/module_scanner.py +11 -1
- package/main.py +4 -1
- package/package.json +9 -1
- package/python_version.json +4 -0
- package/requirements.txt +38 -0
- package/scripts/env-manager.js +328 -0
- package/scripts/plan_manager.py +315 -0
- package/scripts/python-env.js +79 -0
- package/scripts/scan_dependencies.py +461 -0
- package/scripts/setup-python-env.js +191 -0
- package/extensions/services/web/routes/routes_modules.py +0 -249
package/launcher/entry.py
CHANGED
|
@@ -29,10 +29,21 @@ from .process_manager import ProcessManager
|
|
|
29
29
|
IS_WINDOWS = sys.platform == "win32"
|
|
30
30
|
|
|
31
31
|
# Shutdown timeout constants (seconds)
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
32
|
+
|
|
33
|
+
# 不支持优雅关闭
|
|
34
|
+
SHUTDOWN_TIMEOUT_NON_GRACEFUL = 0.3 # SIGTERM 后等待时间
|
|
35
|
+
|
|
36
|
+
# 支持优雅关闭 - 等待响应
|
|
37
|
+
SHUTDOWN_TIMEOUT_ACK = 3.0 # 等待 shutdown.ack
|
|
38
|
+
SHUTDOWN_TIMEOUT_EXITING = 3.0 # 等待 module.exiting
|
|
39
|
+
|
|
40
|
+
# 清理超时(从 exiting 事件获取)
|
|
41
|
+
CLEANUP_TIMEOUT_DEFAULT = 5.0 # 默认清理时间
|
|
42
|
+
CLEANUP_TIMEOUT_MIN = 0.0 # 最小清理时间
|
|
43
|
+
CLEANUP_TIMEOUT_MAX = 30.0 # 最大清理时间
|
|
44
|
+
|
|
45
|
+
# 批量关闭安全网
|
|
46
|
+
SHUTDOWN_TIMEOUT_BULK = 3.0
|
|
36
47
|
|
|
37
48
|
# Core module names that are started in Phase 1 (not Phase 2)
|
|
38
49
|
CORE_MODULE_NAMES = {"kernel"}
|
|
@@ -62,16 +73,25 @@ class Launcher:
|
|
|
62
73
|
discovery=self._load_discovery(),
|
|
63
74
|
)
|
|
64
75
|
|
|
76
|
+
# Load relay configuration
|
|
77
|
+
relay_config = self._load_relay_config()
|
|
78
|
+
self._relay_modules = relay_config.get("modules", [])
|
|
79
|
+
self._relay_token_limits = relay_config.get("token_limits", {})
|
|
80
|
+
|
|
65
81
|
self.kernel_port: int = 0
|
|
66
82
|
self.modules: dict[str, ModuleInfo] = {}
|
|
67
83
|
self._shutdown_event = asyncio.Event()
|
|
68
84
|
self._thread_shutdown = threading.Event()
|
|
69
85
|
self._shutdown_complete = threading.Event() # Set when normal shutdown finishes
|
|
70
86
|
self._module_tokens: dict[str, str] = {} # module_name -> per-module token
|
|
87
|
+
self._client_tokens: dict[str, str] = {} # virtual module_id -> kernel_token (for relay modules)
|
|
71
88
|
|
|
72
89
|
# Three-layer state model: desired_state per module
|
|
73
90
|
self._desired_states: dict[str, str] = {} # module_name -> "running" | "stopped"
|
|
74
91
|
|
|
92
|
+
# Relay module configuration (already loaded above, don't reinitialize)
|
|
93
|
+
# self._relay_modules and self._relay_token_limits are set in lines 78-79
|
|
94
|
+
|
|
75
95
|
# Kernel WebSocket client
|
|
76
96
|
self._ws: object | None = None
|
|
77
97
|
self._ws_task: asyncio.Task | None = None
|
|
@@ -101,6 +121,9 @@ class Launcher:
|
|
|
101
121
|
# System-wide shutdown flag: prevents Watchdog restart during shutdown
|
|
102
122
|
self._system_shutting_down = False
|
|
103
123
|
|
|
124
|
+
# 模块退出状态跟踪(防止 stopped 事件重复发送)
|
|
125
|
+
self._module_states: dict[str, dict] = {}
|
|
126
|
+
|
|
104
127
|
# Kite stdout message waiters: {waiter_key: (threading.Event, data_dict)}
|
|
105
128
|
# Used by ProcessManager stdout callback (cross-thread)
|
|
106
129
|
self._msg_waiters: dict[str, tuple[threading.Event, dict]] = {}
|
|
@@ -117,6 +140,9 @@ class Launcher:
|
|
|
117
140
|
pass
|
|
118
141
|
os.environ["KITE_INSTANCE_SUFFIX"] = suffix
|
|
119
142
|
|
|
143
|
+
# Record launcher startup
|
|
144
|
+
self._record_launcher_startup()
|
|
145
|
+
|
|
120
146
|
@staticmethod
|
|
121
147
|
def _fmt_elapsed(seconds: float) -> str:
|
|
122
148
|
"""Format elapsed seconds: <1s → 'NNNms', >=1s → 'N.Ns', >=10s → 'NNs'."""
|
|
@@ -509,10 +535,14 @@ class Launcher:
|
|
|
509
535
|
ready = await self._wait_event("module.ready", "kernel", timeout=15)
|
|
510
536
|
if ready:
|
|
511
537
|
self._graceful_modules["kernel"] = bool(ready.get("graceful_shutdown"))
|
|
512
|
-
|
|
538
|
+
# Use startup_time from module.ready event
|
|
539
|
+
startup_time = ready.get("startup_time", time.monotonic() - t_ws)
|
|
540
|
+
self._ready_times["kernel"] = startup_time
|
|
541
|
+
startup_str = f"{startup_time:.3f}s" if startup_time < 10 else f"{startup_time:.2f}s"
|
|
542
|
+
print(f"[launcher] Kernel 已就绪 ({startup_str})")
|
|
513
543
|
else:
|
|
514
544
|
print("\033[91m[launcher] 警告: Kernel 在 15s 内未发送 module.ready\033[0m")
|
|
515
|
-
|
|
545
|
+
self._ready_times["kernel"] = time.monotonic() - t_ws
|
|
516
546
|
|
|
517
547
|
await asyncio.gather(
|
|
518
548
|
_scan_and_generate_tokens(),
|
|
@@ -597,6 +627,11 @@ class Launcher:
|
|
|
597
627
|
print(f"[launcher] Kernel 重连失败 {max_retries} 次,退出")
|
|
598
628
|
sys.exit(1)
|
|
599
629
|
print(f"[launcher] Kernel 连接错误: {e}, {retry_delay:.1f}s 后重试 ({attempt}/{max_retries})")
|
|
630
|
+
if attempt == 5:
|
|
631
|
+
print(f"\033[33m[launcher] 提示: 已连续 {attempt} 次无法连接 Kernel (端口 {self.kernel_port})")
|
|
632
|
+
if self.kernel_port < 1024:
|
|
633
|
+
print(f"[launcher] ⚠ 端口 {self.kernel_port} 异常偏低,可能是 Kernel 端口绑定失败或配置错误")
|
|
634
|
+
print(f"[launcher] 请检查: 1) Kernel 进程是否存活 2) kernel/module.md 中 preferred_port 配置是否正确\033[0m")
|
|
600
635
|
self._ws = None
|
|
601
636
|
if self._thread_shutdown.is_set():
|
|
602
637
|
return
|
|
@@ -608,7 +643,7 @@ class Launcher:
|
|
|
608
643
|
launcher_token = self._module_tokens.get("launcher", "")
|
|
609
644
|
ws_url = f"ws://127.0.0.1:{self.kernel_port}/ws?token={launcher_token}&id=launcher"
|
|
610
645
|
t_ws_connect = time.monotonic()
|
|
611
|
-
async with websockets.connect(ws_url, open_timeout=3, ping_interval=None,
|
|
646
|
+
async with websockets.connect(ws_url, open_timeout=3, ping_interval=None, close_timeout=10) as ws:
|
|
612
647
|
self._ws = ws
|
|
613
648
|
_ws_s = time.monotonic() - t_ws_connect
|
|
614
649
|
print(f"[launcher] 已连接到 Kernel ({self._fmt_elapsed(_ws_s)})")
|
|
@@ -632,20 +667,53 @@ class Launcher:
|
|
|
632
667
|
await self._rpc_call(ws, "registry.register", {
|
|
633
668
|
"module_id": "launcher",
|
|
634
669
|
"module_type": "infrastructure",
|
|
670
|
+
"tools": {
|
|
671
|
+
"rpc": {
|
|
672
|
+
"launcher": {
|
|
673
|
+
"list_modules": {"method": "list_modules", "description": "列出所有模块"},
|
|
674
|
+
"start_module": {"method": "start_module", "description": "启动模块"},
|
|
675
|
+
"stop_module": {"method": "stop_module", "description": "停止模块"},
|
|
676
|
+
"restart_module": {"method": "restart_module", "description": "重启模块"},
|
|
677
|
+
"restart_launcher": {"method": "restart_launcher", "description": "重启 Launcher"},
|
|
678
|
+
"rescan": {"method": "rescan", "description": "重新扫描模块"},
|
|
679
|
+
"shutdown": {"method": "shutdown", "description": "关闭系统"},
|
|
680
|
+
"request_client_token": {"method": "request_client_token", "description": "为 Web 客户端申请 Kernel Token"},
|
|
681
|
+
},
|
|
682
|
+
"module": {
|
|
683
|
+
"config": {
|
|
684
|
+
"get": {"method": "get_module_config", "description": "获取模块配置"},
|
|
685
|
+
"update": {"method": "update_module_config", "description": "更新模块配置"},
|
|
686
|
+
"reset": {"method": "reset_module_config", "description": "恢复默认配置"},
|
|
687
|
+
}
|
|
688
|
+
}
|
|
689
|
+
}
|
|
690
|
+
},
|
|
635
691
|
"events_publish": {
|
|
636
|
-
"
|
|
637
|
-
|
|
638
|
-
|
|
692
|
+
"system": {
|
|
693
|
+
"ready": {"description": "系统启动完成"}
|
|
694
|
+
},
|
|
695
|
+
"module": {
|
|
696
|
+
"starting": {"description": "模块启动中"},
|
|
697
|
+
"started": {"description": "模块已启动"},
|
|
698
|
+
"ready": {"description": "模块就绪"},
|
|
699
|
+
"stopped": {"description": "模块已停止"},
|
|
700
|
+
"exiting": {"description": "模块退出中"},
|
|
701
|
+
"shutdown": {"description": "模块关闭"}
|
|
702
|
+
}
|
|
639
703
|
},
|
|
640
704
|
"events_subscribe": [">"],
|
|
641
705
|
})
|
|
642
706
|
print("[launcher] 已注册到 Kernel")
|
|
643
707
|
|
|
644
708
|
# Publish module.ready for Launcher itself (every reconnect)
|
|
709
|
+
startup_time = time.monotonic() - self._t_start
|
|
645
710
|
await self._publish_event("module.ready", {
|
|
646
711
|
"module_id": "launcher",
|
|
647
712
|
"graceful_shutdown": True,
|
|
713
|
+
"startup_time": startup_time,
|
|
648
714
|
})
|
|
715
|
+
# Record launcher's own startup time
|
|
716
|
+
self._ready_times["launcher"] = startup_time
|
|
649
717
|
|
|
650
718
|
# Signal that connection is ready (after subscription and registration)
|
|
651
719
|
if self._ws_connected:
|
|
@@ -658,7 +726,14 @@ class Launcher:
|
|
|
658
726
|
raise
|
|
659
727
|
|
|
660
728
|
async def _ws_receiver(self, ws):
|
|
661
|
-
"""Receive loop: classify incoming messages.
|
|
729
|
+
"""Receive loop: classify incoming messages.
|
|
730
|
+
|
|
731
|
+
CRITICAL: RPC 死锁防范
|
|
732
|
+
- 入站 RPC 请求必须用 create_task() 异步执行,不可 await
|
|
733
|
+
- 原因:如果 handler 内部调用 rpc_call() 发出站请求,出站响应需要本接收循环来分发
|
|
734
|
+
- 如果接收循环被 await handler 阻塞,出站响应永远收不到 → 超时死锁
|
|
735
|
+
- 事件通知和 RPC 响应可以同步处理(它们不会反向调用 rpc_call)
|
|
736
|
+
"""
|
|
662
737
|
try:
|
|
663
738
|
async for raw in ws:
|
|
664
739
|
try:
|
|
@@ -676,7 +751,8 @@ class Launcher:
|
|
|
676
751
|
await self._handle_event_notification(msg)
|
|
677
752
|
elif has_method and has_id:
|
|
678
753
|
# Incoming RPC request (forwarded by Kernel)
|
|
679
|
-
|
|
754
|
+
# Run in background so receiver loop continues processing responses
|
|
755
|
+
asyncio.create_task(self._handle_rpc_request(ws, msg))
|
|
680
756
|
elif has_id and (has_result or has_error):
|
|
681
757
|
# RPC response (to our own call)
|
|
682
758
|
self._handle_rpc_response(msg)
|
|
@@ -717,6 +793,17 @@ class Launcher:
|
|
|
717
793
|
self._rpc_results[rpc_id] = msg
|
|
718
794
|
waiter.set()
|
|
719
795
|
|
|
796
|
+
async def _handle_ping_event(self, data: dict):
|
|
797
|
+
"""Handle system.ping event and reply with system.pong."""
|
|
798
|
+
t1 = data.get("ping_time")
|
|
799
|
+
t2 = time.time()
|
|
800
|
+
|
|
801
|
+
await self._publish_event("system.pong", {
|
|
802
|
+
"module_id": "launcher",
|
|
803
|
+
"ping_time": t1,
|
|
804
|
+
"pong_time": t2,
|
|
805
|
+
})
|
|
806
|
+
|
|
720
807
|
async def _handle_event_notification(self, msg: dict):
|
|
721
808
|
"""Handle an event notification (JSON-RPC 2.0 Notification with method='event')."""
|
|
722
809
|
params = msg.get("params", {})
|
|
@@ -725,9 +812,15 @@ class Launcher:
|
|
|
725
812
|
data = params.get("data") if isinstance(params.get("data"), dict) else {}
|
|
726
813
|
ts = params.get("timestamp", "")
|
|
727
814
|
|
|
815
|
+
# Handle system.ping event
|
|
816
|
+
if event == "system.ping":
|
|
817
|
+
await self._handle_ping_event(data)
|
|
818
|
+
return
|
|
819
|
+
|
|
728
820
|
# Trigger event waiters
|
|
729
821
|
module_id = data.get("module_id", "")
|
|
730
822
|
waiter_key = f"{event}:{module_id}"
|
|
823
|
+
|
|
731
824
|
waiter = self._event_waiters.get(waiter_key)
|
|
732
825
|
if waiter:
|
|
733
826
|
waiter[1].update(data)
|
|
@@ -735,6 +828,14 @@ class Launcher:
|
|
|
735
828
|
|
|
736
829
|
# module.exiting also wakes module.ready waiter
|
|
737
830
|
if event == "module.exiting" and module_id:
|
|
831
|
+
# 处理 token 释放(如果是虚拟模块且标记了 token_revoked)
|
|
832
|
+
token_revoked = data.get("token_revoked", False)
|
|
833
|
+
if token_revoked and module_id in self._client_tokens:
|
|
834
|
+
del self._client_tokens[module_id]
|
|
835
|
+
print(f"[launcher] Token revoked for {module_id}")
|
|
836
|
+
# 记录审计日志
|
|
837
|
+
self._log_token_request("system", module_id, "revoke", True)
|
|
838
|
+
|
|
738
839
|
ready_key = f"module.ready:{module_id}"
|
|
739
840
|
ready_waiter = self._event_waiters.get(ready_key)
|
|
740
841
|
if ready_waiter:
|
|
@@ -742,6 +843,42 @@ class Launcher:
|
|
|
742
843
|
ready_waiter[1]["_exited"] = True
|
|
743
844
|
ready_waiter[0].set()
|
|
744
845
|
|
|
846
|
+
# 处理主动退出场景(没有 shutdown 的情况)
|
|
847
|
+
if module_id not in self._module_states:
|
|
848
|
+
self._init_module_state(module_id)
|
|
849
|
+
state = self._module_states[module_id]
|
|
850
|
+
|
|
851
|
+
if not state.get("shutdown_sent"):
|
|
852
|
+
# 主动退出:记录信息
|
|
853
|
+
if not state.get("exiting_received"):
|
|
854
|
+
state["exiting_received"] = True
|
|
855
|
+
state["reason"] = data.get("reason", "active_exit")
|
|
856
|
+
state["restart"] = data.get("restart", False)
|
|
857
|
+
cleanup_timeout = data.get("cleanup_timeout", CLEANUP_TIMEOUT_DEFAULT)
|
|
858
|
+
cleanup_timeout = max(CLEANUP_TIMEOUT_MIN, min(cleanup_timeout, CLEANUP_TIMEOUT_MAX))
|
|
859
|
+
state["cleanup_timeout"] = cleanup_timeout
|
|
860
|
+
|
|
861
|
+
# 启动清理超时任务
|
|
862
|
+
async def cleanup_timeout_handler():
|
|
863
|
+
await asyncio.sleep(state["cleanup_timeout"])
|
|
864
|
+
if not state.get("stopped_sent"):
|
|
865
|
+
state["stopped_sent"] = True
|
|
866
|
+
self._kill_process(module_id)
|
|
867
|
+
|
|
868
|
+
# 发送 stopped 事件
|
|
869
|
+
await self._publish_event("module.stopped", {
|
|
870
|
+
"module_id": module_id,
|
|
871
|
+
"exit_code": -1, # 超时强制终止,退出码未知
|
|
872
|
+
"exit_type": "timeout",
|
|
873
|
+
"reason": state.get("reason", "cleanup_timeout"),
|
|
874
|
+
"restart": state.get("restart", False),
|
|
875
|
+
"ready_received": False,
|
|
876
|
+
})
|
|
877
|
+
|
|
878
|
+
self._log_lifecycle("stopped", module_id, reason=state["reason"])
|
|
879
|
+
|
|
880
|
+
state["cleanup_task"] = asyncio.create_task(cleanup_timeout_handler())
|
|
881
|
+
|
|
745
882
|
# module.crash → print red crash summary
|
|
746
883
|
if event == "module.crash" and module_id:
|
|
747
884
|
RED = "\033[91m"
|
|
@@ -756,6 +893,45 @@ class Launcher:
|
|
|
756
893
|
)
|
|
757
894
|
print(f"[launcher] 崩溃日志: {crash_log}")
|
|
758
895
|
|
|
896
|
+
# pairing.status → handle all pairing flow events
|
|
897
|
+
if event == "pairing.status":
|
|
898
|
+
GREEN = "\033[92m"
|
|
899
|
+
RED = "\033[91m"
|
|
900
|
+
RESET = "\033[0m"
|
|
901
|
+
|
|
902
|
+
step = data.get("step", "")
|
|
903
|
+
success = data.get("success", True)
|
|
904
|
+
|
|
905
|
+
if step == "code_generated":
|
|
906
|
+
code = data.get("code", "")
|
|
907
|
+
expires_in = data.get("expires_in", 300)
|
|
908
|
+
module_id = data.get("module_id", "unknown")
|
|
909
|
+
if code:
|
|
910
|
+
print(f"[launcher] {GREEN}配对码: {code}{RESET}")
|
|
911
|
+
print(f"[launcher] {GREEN}有效期: {expires_in} 秒{RESET}")
|
|
912
|
+
print(f"[launcher] {GREEN}来源模块: {module_id}{RESET}")
|
|
913
|
+
print(f"[launcher] {GREEN}访问 Web 界面时使用此配对码进行配对{RESET}")
|
|
914
|
+
|
|
915
|
+
elif step == "pairing":
|
|
916
|
+
if success:
|
|
917
|
+
print(f"[launcher] {GREEN}正在配对...{RESET}")
|
|
918
|
+
else:
|
|
919
|
+
reason = data.get("reason", "Unknown error")
|
|
920
|
+
print(f"[launcher] {RED}✗ 配对失败: {reason}{RESET}")
|
|
921
|
+
|
|
922
|
+
elif step == "completed":
|
|
923
|
+
if success:
|
|
924
|
+
module_id = data.get("module_id", "")
|
|
925
|
+
role = data.get("role", "")
|
|
926
|
+
print(f"[launcher] {GREEN}✓ 配对成功!{RESET}")
|
|
927
|
+
print(f"[launcher] {GREEN} 模块 ID: {module_id}{RESET}")
|
|
928
|
+
print(f"[launcher] {GREEN} 角色: {role}{RESET}")
|
|
929
|
+
else:
|
|
930
|
+
reason = data.get("reason", "Unknown error")
|
|
931
|
+
print(f"[launcher] {RED}✗ 配对失败: {reason}{RESET}")
|
|
932
|
+
|
|
933
|
+
return
|
|
934
|
+
|
|
759
935
|
# Only log system events (module.*, watchdog.*) to avoid flooding
|
|
760
936
|
if not (event.startswith("module.") or event.startswith("watchdog.")):
|
|
761
937
|
return
|
|
@@ -780,12 +956,18 @@ class Launcher:
|
|
|
780
956
|
params = msg.get("params", {})
|
|
781
957
|
|
|
782
958
|
handlers = {
|
|
783
|
-
"list_modules":
|
|
784
|
-
"start_module":
|
|
785
|
-
"stop_module":
|
|
786
|
-
"restart_module":
|
|
787
|
-
"
|
|
788
|
-
"
|
|
959
|
+
"list_modules": self._rpc_list_modules,
|
|
960
|
+
"start_module": self._rpc_start_module,
|
|
961
|
+
"stop_module": self._rpc_stop_module,
|
|
962
|
+
"restart_module": self._rpc_restart_module,
|
|
963
|
+
"restart_launcher": self._rpc_restart_launcher,
|
|
964
|
+
"rescan": self._rpc_rescan,
|
|
965
|
+
"shutdown": self._rpc_shutdown,
|
|
966
|
+
"get_module_config": self._rpc_get_module_config,
|
|
967
|
+
"update_module_config": self._rpc_update_module_config,
|
|
968
|
+
"reset_module_config": self._rpc_reset_module_config,
|
|
969
|
+
"request_client_token": self._rpc_request_client_token,
|
|
970
|
+
"release_client_token": self._rpc_release_client_token,
|
|
789
971
|
}
|
|
790
972
|
handler = handlers.get(method)
|
|
791
973
|
if handler:
|
|
@@ -807,20 +989,53 @@ class Launcher:
|
|
|
807
989
|
|
|
808
990
|
async def _rpc_list_modules(self, params: dict) -> dict:
|
|
809
991
|
"""List all modules and their current status."""
|
|
992
|
+
# Get ping/pong latencies from Kernel
|
|
993
|
+
latencies = {}
|
|
994
|
+
try:
|
|
995
|
+
latencies_resp = await self._rpc_call(self._ws, "kernel.latencies", {}, timeout=2)
|
|
996
|
+
if "result" in latencies_resp:
|
|
997
|
+
latencies = latencies_resp["result"].get("latencies", {})
|
|
998
|
+
except Exception as e:
|
|
999
|
+
print(f"[launcher] Failed to get latencies: {e}")
|
|
1000
|
+
|
|
1001
|
+
current_time = time.time()
|
|
810
1002
|
result = []
|
|
811
1003
|
for name, info in self.modules.items():
|
|
812
1004
|
running = self.process_manager.is_running(name)
|
|
813
1005
|
rec = self.process_manager.get_record(name)
|
|
1006
|
+
|
|
1007
|
+
# Get latency info for this module
|
|
1008
|
+
latency_info = latencies.get(name, {})
|
|
1009
|
+
ping_status = latency_info.get("status", "never")
|
|
1010
|
+
|
|
1011
|
+
# Calculate uptime (running time in seconds)
|
|
1012
|
+
uptime_seconds = None
|
|
1013
|
+
if running and rec and rec.started_at:
|
|
1014
|
+
uptime_seconds = current_time - rec.started_at
|
|
1015
|
+
|
|
814
1016
|
result.append({
|
|
815
1017
|
"name": name,
|
|
816
1018
|
"display_name": info.display_name,
|
|
817
1019
|
"type": info.type,
|
|
818
|
-
"
|
|
1020
|
+
"state": info.state, # 改名为 state(与 /api/modules 一致)
|
|
1021
|
+
"version": info.version,
|
|
1022
|
+
"runtime": info.runtime,
|
|
1023
|
+
"preferred_port": info.preferred_port,
|
|
1024
|
+
"monitor": info.monitor,
|
|
1025
|
+
"display_order": info.display_order,
|
|
819
1026
|
"desired_state": self._desired_states.get(name, "stopped"),
|
|
820
1027
|
"actual_state": f"running({rec.pid})" if running and rec else "stopped",
|
|
821
1028
|
"pid": rec.pid if running and rec else None,
|
|
822
|
-
"
|
|
1029
|
+
"startup_time": self._ready_times.get(name), # Module startup time in seconds
|
|
1030
|
+
"uptime_seconds": uptime_seconds, # Running time in seconds
|
|
1031
|
+
"ping_status": ping_status,
|
|
1032
|
+
"ping_outbound_ms": latency_info.get("outbound"),
|
|
1033
|
+
"ping_inbound_ms": latency_info.get("inbound"),
|
|
823
1034
|
})
|
|
1035
|
+
|
|
1036
|
+
# Sort by display_order (descending), then by name (ascending)
|
|
1037
|
+
result.sort(key=lambda m: (-m["display_order"], m["name"]))
|
|
1038
|
+
|
|
824
1039
|
return {"modules": result}
|
|
825
1040
|
|
|
826
1041
|
async def _rpc_start_module(self, params: dict) -> dict:
|
|
@@ -911,30 +1126,243 @@ class Launcher:
|
|
|
911
1126
|
self._request_shutdown(f"RPC shutdown request: {reason}")
|
|
912
1127
|
return {"status": "shutting_down", "reason": reason}
|
|
913
1128
|
|
|
1129
|
+
async def _rpc_get_module_config(self, params: dict) -> dict:
|
|
1130
|
+
"""获取指定模块的配置(通用降级方案)"""
|
|
1131
|
+
import re
|
|
1132
|
+
import yaml
|
|
1133
|
+
from pathlib import Path
|
|
1134
|
+
|
|
1135
|
+
module_name = params.get("module_name")
|
|
1136
|
+
if not module_name:
|
|
1137
|
+
raise ValueError("module_name required")
|
|
1138
|
+
|
|
1139
|
+
# 查找模块信息
|
|
1140
|
+
info = self.modules.get(module_name)
|
|
1141
|
+
if not info:
|
|
1142
|
+
raise RuntimeError(f"Module '{module_name}' not found")
|
|
1143
|
+
|
|
1144
|
+
# 读取 module.md
|
|
1145
|
+
md_path = Path(info.module_dir) / "module.md"
|
|
1146
|
+
if not md_path.exists():
|
|
1147
|
+
raise RuntimeError(f"module.md not found for '{module_name}'")
|
|
1148
|
+
|
|
1149
|
+
text = md_path.read_text(encoding="utf-8")
|
|
1150
|
+
m = re.match(r'^---\s*\n(.*?)\n---\s*\n?(.*)', text, re.DOTALL)
|
|
1151
|
+
if not m:
|
|
1152
|
+
frontmatter = {}
|
|
1153
|
+
else:
|
|
1154
|
+
frontmatter = yaml.safe_load(m.group(1)) or {}
|
|
1155
|
+
|
|
1156
|
+
# 读取 config.yaml(如果存在)
|
|
1157
|
+
config_path = Path(info.module_dir) / "config.yaml"
|
|
1158
|
+
config = None
|
|
1159
|
+
if config_path.exists():
|
|
1160
|
+
config = yaml.safe_load(config_path.read_text(encoding="utf-8")) or {}
|
|
1161
|
+
|
|
1162
|
+
return {
|
|
1163
|
+
"name": frontmatter.get("name", module_name),
|
|
1164
|
+
"display_name": frontmatter.get("display_name", ""),
|
|
1165
|
+
"type": frontmatter.get("type", ""),
|
|
1166
|
+
"state": frontmatter.get("state", "enabled"),
|
|
1167
|
+
"version": frontmatter.get("version", ""),
|
|
1168
|
+
"runtime": frontmatter.get("runtime", ""),
|
|
1169
|
+
"entry": frontmatter.get("entry", ""),
|
|
1170
|
+
"preferred_port": frontmatter.get("preferred_port"),
|
|
1171
|
+
"advertise_ip": frontmatter.get("advertise_ip"),
|
|
1172
|
+
"monitor": frontmatter.get("monitor"),
|
|
1173
|
+
"events": frontmatter.get("events"),
|
|
1174
|
+
"subscriptions": frontmatter.get("subscriptions"),
|
|
1175
|
+
"depends_on": frontmatter.get("depends_on"),
|
|
1176
|
+
"source_path": str(info.module_dir), # 添加模块路径
|
|
1177
|
+
"has_config": config is not None,
|
|
1178
|
+
"config": config,
|
|
1179
|
+
}
|
|
1180
|
+
|
|
1181
|
+
async def _rpc_update_module_config(self, params: dict) -> dict:
|
|
1182
|
+
"""更新指定模块的配置(通用降级方案)"""
|
|
1183
|
+
import yaml
|
|
1184
|
+
from pathlib import Path
|
|
1185
|
+
|
|
1186
|
+
module_name = params.get("module_name")
|
|
1187
|
+
metadata = params.get("metadata", {})
|
|
1188
|
+
config = params.get("config", {})
|
|
1189
|
+
|
|
1190
|
+
if not module_name:
|
|
1191
|
+
raise ValueError("module_name required")
|
|
1192
|
+
|
|
1193
|
+
info = self.modules.get(module_name)
|
|
1194
|
+
if not info:
|
|
1195
|
+
raise RuntimeError(f"Module '{module_name}' not found")
|
|
1196
|
+
|
|
1197
|
+
md_path = Path(info.module_dir) / "module.md"
|
|
1198
|
+
if not md_path.exists():
|
|
1199
|
+
raise RuntimeError(f"module.md not found for '{module_name}'")
|
|
1200
|
+
|
|
1201
|
+
# 更新 module.md frontmatter
|
|
1202
|
+
if metadata:
|
|
1203
|
+
frontmatter, body = _parse_frontmatter(md_path.read_text(encoding="utf-8"))
|
|
1204
|
+
for key, value in metadata.items():
|
|
1205
|
+
frontmatter[key] = value
|
|
1206
|
+
fm_str = yaml.dump(frontmatter, allow_unicode=True, sort_keys=False, default_flow_style=False).rstrip()
|
|
1207
|
+
content = f"---\n{fm_str}\n---\n{body}"
|
|
1208
|
+
md_path.write_text(content, encoding="utf-8")
|
|
1209
|
+
|
|
1210
|
+
# 更新 config.yaml
|
|
1211
|
+
if config:
|
|
1212
|
+
config_path = Path(info.module_dir) / "config.yaml"
|
|
1213
|
+
existing = {}
|
|
1214
|
+
if config_path.exists():
|
|
1215
|
+
existing = yaml.safe_load(config_path.read_text(encoding="utf-8")) or {}
|
|
1216
|
+
# Deep merge
|
|
1217
|
+
self._deep_merge(existing, config)
|
|
1218
|
+
config_path.write_text(
|
|
1219
|
+
yaml.dump(existing, allow_unicode=True, sort_keys=False, default_flow_style=False),
|
|
1220
|
+
encoding="utf-8"
|
|
1221
|
+
)
|
|
1222
|
+
|
|
1223
|
+
# 重新扫描以更新缓存
|
|
1224
|
+
await self._rescan_modules()
|
|
1225
|
+
|
|
1226
|
+
# 返回更新后的配置
|
|
1227
|
+
return await self._rpc_get_module_config({"module_name": module_name})
|
|
1228
|
+
|
|
1229
|
+
async def _rpc_reset_module_config(self, params: dict) -> dict:
|
|
1230
|
+
"""恢复指定模块的默认配置(通用降级方案)"""
|
|
1231
|
+
import yaml
|
|
1232
|
+
from pathlib import Path
|
|
1233
|
+
|
|
1234
|
+
module_name = params.get("module_name")
|
|
1235
|
+
fields = params.get("fields", [])
|
|
1236
|
+
reset_all = params.get("all", False)
|
|
1237
|
+
|
|
1238
|
+
if not module_name:
|
|
1239
|
+
raise ValueError("module_name required")
|
|
1240
|
+
|
|
1241
|
+
info = self.modules.get(module_name)
|
|
1242
|
+
if not info:
|
|
1243
|
+
raise RuntimeError(f"Module '{module_name}' not found")
|
|
1244
|
+
|
|
1245
|
+
md_path = Path(info.module_dir) / "module.md"
|
|
1246
|
+
if not md_path.exists():
|
|
1247
|
+
raise RuntimeError(f"module.md not found for '{module_name}'")
|
|
1248
|
+
|
|
1249
|
+
# 默认值定义(通用)
|
|
1250
|
+
defaults = {
|
|
1251
|
+
"state": "enabled",
|
|
1252
|
+
"monitor": True,
|
|
1253
|
+
}
|
|
1254
|
+
|
|
1255
|
+
frontmatter, body = _parse_frontmatter(md_path.read_text(encoding="utf-8"))
|
|
1256
|
+
|
|
1257
|
+
if reset_all:
|
|
1258
|
+
for key, value in defaults.items():
|
|
1259
|
+
frontmatter[key] = value
|
|
1260
|
+
else:
|
|
1261
|
+
for field in fields:
|
|
1262
|
+
if field in defaults:
|
|
1263
|
+
frontmatter[field] = defaults[field]
|
|
1264
|
+
elif field == "preferred_port":
|
|
1265
|
+
frontmatter.pop(field, None) # 恢复为 null
|
|
1266
|
+
elif field == "advertise_ip":
|
|
1267
|
+
frontmatter[field] = "127.0.0.1"
|
|
1268
|
+
|
|
1269
|
+
fm_str = yaml.dump(frontmatter, allow_unicode=True, sort_keys=False, default_flow_style=False).rstrip()
|
|
1270
|
+
content = f"---\n{fm_str}\n---\n{body}"
|
|
1271
|
+
md_path.write_text(content, encoding="utf-8")
|
|
1272
|
+
|
|
1273
|
+
# 重新扫描以更新缓存
|
|
1274
|
+
await self._rescan_modules()
|
|
1275
|
+
|
|
1276
|
+
return await self._rpc_get_module_config({"module_name": module_name})
|
|
1277
|
+
|
|
1278
|
+
@staticmethod
|
|
1279
|
+
def _deep_merge(base: dict, overlay: dict) -> dict:
|
|
1280
|
+
"""递归合并字典"""
|
|
1281
|
+
for k, v in overlay.items():
|
|
1282
|
+
if k in base and isinstance(base[k], dict) and isinstance(v, dict):
|
|
1283
|
+
Launcher._deep_merge(base[k], v)
|
|
1284
|
+
else:
|
|
1285
|
+
base[k] = v
|
|
1286
|
+
return base
|
|
1287
|
+
|
|
1288
|
+
|
|
1289
|
+
async def _rpc_restart_launcher(self, params: dict) -> dict:
|
|
1290
|
+
"""Restart Launcher process via Watchdog.
|
|
1291
|
+
|
|
1292
|
+
Simply notify watchdog and exit. Watchdog will start a new instance.
|
|
1293
|
+
|
|
1294
|
+
Args:
|
|
1295
|
+
params: {
|
|
1296
|
+
"reason": str (optional) - Restart reason
|
|
1297
|
+
}
|
|
1298
|
+
|
|
1299
|
+
Returns:
|
|
1300
|
+
{"status": "restarting", "reason": str}
|
|
1301
|
+
or {"error": "watchdog offline"}
|
|
1302
|
+
"""
|
|
1303
|
+
reason = params.get("reason", "user_request")
|
|
1304
|
+
O = "\033[33m" # orange/yellow
|
|
1305
|
+
R = "\033[0m" # reset
|
|
1306
|
+
print(f"{O}[launcher] 收到 Launcher 重启请求{R}")
|
|
1307
|
+
print(f"[launcher] 原因: {reason}")
|
|
1308
|
+
|
|
1309
|
+
# Check if watchdog is running
|
|
1310
|
+
watchdog_running = self.process_manager.is_running("watchdog")
|
|
1311
|
+
print(f"[launcher] 检查 watchdog 状态: {'running' if watchdog_running else 'stopped'}")
|
|
1312
|
+
|
|
1313
|
+
if not watchdog_running:
|
|
1314
|
+
error_msg = "watchdog 未运行, 无法重启"
|
|
1315
|
+
print(f"[launcher] ❌ {error_msg}")
|
|
1316
|
+
return {"error": error_msg}
|
|
1317
|
+
|
|
1318
|
+
print(f"[launcher] ✓ watchdog 状态正常,准备重启流程")
|
|
1319
|
+
|
|
1320
|
+
# Schedule restart in background (don't block RPC response)
|
|
1321
|
+
async def _do_restart():
|
|
1322
|
+
await asyncio.sleep(0.3) # 确保 RPC 响应已发送
|
|
1323
|
+
|
|
1324
|
+
print(f"[launcher] 发送 module.exiting 事件给 watchdog...")
|
|
1325
|
+
|
|
1326
|
+
# Collect startup info for watchdog to restart with same environment
|
|
1327
|
+
startup_info = {
|
|
1328
|
+
"python": sys.executable,
|
|
1329
|
+
"argv": sys.argv,
|
|
1330
|
+
"cwd": os.getcwd(),
|
|
1331
|
+
"env": dict(os.environ), # 所有环境变量
|
|
1332
|
+
}
|
|
1333
|
+
|
|
1334
|
+
# Notify watchdog: this is a planned restart, not a crash
|
|
1335
|
+
await self._publish_event("module.exiting", {
|
|
1336
|
+
"module_id": "launcher",
|
|
1337
|
+
"action": "restart_launcher",
|
|
1338
|
+
"reason": reason,
|
|
1339
|
+
"startup_info": startup_info,
|
|
1340
|
+
})
|
|
1341
|
+
|
|
1342
|
+
print(f"[launcher] 已通知 watchdog 计划内重启")
|
|
1343
|
+
print(f"[launcher] 退出进程,等待 watchdog 重启")
|
|
1344
|
+
print(f"[launcher] 原因: {reason}")
|
|
1345
|
+
|
|
1346
|
+
os._exit(0)
|
|
1347
|
+
|
|
1348
|
+
asyncio.create_task(_do_restart())
|
|
1349
|
+
|
|
1350
|
+
return {"status": "restarting", "reason": reason}
|
|
1351
|
+
|
|
914
1352
|
# ── Event publishing via RPC ──
|
|
915
1353
|
|
|
916
1354
|
async def _publish_event(self, event_type: str, data: dict):
|
|
917
1355
|
"""Publish an event via RPC event.publish through Kernel WS."""
|
|
918
1356
|
if not self._ws:
|
|
919
1357
|
return
|
|
920
|
-
|
|
921
|
-
|
|
922
|
-
"id": str(uuid.uuid4()),
|
|
923
|
-
"method": "event.publish",
|
|
924
|
-
"params": {
|
|
1358
|
+
try:
|
|
1359
|
+
await self._rpc_call(self._ws, "event.publish", {
|
|
925
1360
|
"event_id": str(uuid.uuid4()),
|
|
926
1361
|
"event": event_type,
|
|
927
1362
|
"data": data,
|
|
928
|
-
},
|
|
929
|
-
|
|
930
|
-
|
|
931
|
-
async def _send():
|
|
932
|
-
try:
|
|
933
|
-
await self._ws.send(msg)
|
|
934
|
-
except Exception as e:
|
|
935
|
-
print(f"[launcher] 发布事件失败: {e}")
|
|
936
|
-
|
|
937
|
-
asyncio.create_task(_send())
|
|
1363
|
+
}, timeout=2.0)
|
|
1364
|
+
except Exception as e:
|
|
1365
|
+
print(f"[launcher] 发布事件失败 ({event_type}): {e}")
|
|
938
1366
|
|
|
939
1367
|
async def _wait_event(self, event_type: str, module_id: str, timeout: float) -> dict | None:
|
|
940
1368
|
"""Wait for a specific event from a module. Returns data dict or None on timeout."""
|
|
@@ -956,90 +1384,299 @@ class Launcher:
|
|
|
956
1384
|
finally:
|
|
957
1385
|
self._event_waiters.pop(key, None)
|
|
958
1386
|
|
|
959
|
-
|
|
960
|
-
|
|
961
|
-
|
|
962
|
-
"""
|
|
963
|
-
self.
|
|
1387
|
+
# ── 退出机制辅助方法 ──
|
|
1388
|
+
|
|
1389
|
+
def _init_module_state(self, name: str):
|
|
1390
|
+
"""初始化模块状态跟踪字典"""
|
|
1391
|
+
self._module_states[name] = {
|
|
1392
|
+
"shutdown_sent": False,
|
|
1393
|
+
"ack_received": False,
|
|
1394
|
+
"exiting_received": False,
|
|
1395
|
+
"ready_received": False,
|
|
1396
|
+
"stopped_sent": False,
|
|
1397
|
+
"exit_type": None, # "graceful" | "non_graceful" | "active"
|
|
1398
|
+
"reason": None,
|
|
1399
|
+
"restart": None,
|
|
1400
|
+
"cleanup_timeout": None,
|
|
1401
|
+
"cleanup_task": None,
|
|
1402
|
+
}
|
|
964
1403
|
|
|
965
|
-
|
|
966
|
-
|
|
967
|
-
|
|
968
|
-
|
|
969
|
-
|
|
970
|
-
|
|
971
|
-
|
|
1404
|
+
def _kill_process(self, name: str):
|
|
1405
|
+
"""统一的进程杀死方法"""
|
|
1406
|
+
record = self.process_manager.get_record(name)
|
|
1407
|
+
if record and record.proc and record.proc.poll() is None:
|
|
1408
|
+
print(f"[launcher] 强制终止 {name} (PID {record.proc.pid})")
|
|
1409
|
+
self.process_manager.kill_process(name)
|
|
1410
|
+
elif record:
|
|
1411
|
+
# 进程已经退出,只是清理记录
|
|
1412
|
+
pass
|
|
1413
|
+
else:
|
|
1414
|
+
# 没有记录,可能已经被清理
|
|
1415
|
+
pass
|
|
1416
|
+
|
|
1417
|
+
def _determine_exit_type(self, name: str) -> str:
|
|
1418
|
+
"""判断退出类型: graceful | non_graceful | active"""
|
|
1419
|
+
state = self._module_states.get(name, {})
|
|
1420
|
+
if state.get("exiting_received"):
|
|
1421
|
+
return "graceful"
|
|
1422
|
+
elif state.get("shutdown_sent"):
|
|
1423
|
+
return "non_graceful"
|
|
1424
|
+
else:
|
|
1425
|
+
return "active"
|
|
1426
|
+
|
|
1427
|
+
def _resolve_reason(self, name: str) -> str:
|
|
1428
|
+
"""解析最终原因(优先级:exiting > shutdown > 默认)"""
|
|
1429
|
+
state = self._module_states.get(name, {})
|
|
1430
|
+
if state.get("reason"):
|
|
1431
|
+
return state["reason"]
|
|
1432
|
+
return "unknown"
|
|
1433
|
+
|
|
1434
|
+
def _resolve_restart(self, name: str) -> bool:
|
|
1435
|
+
"""解析重启决策(优先级:exiting > shutdown > 默认)"""
|
|
1436
|
+
state = self._module_states.get(name, {})
|
|
1437
|
+
if state.get("restart") is not None:
|
|
1438
|
+
return state["restart"]
|
|
1439
|
+
# 默认:主动退出不重启,被动关闭看 desired_state
|
|
1440
|
+
if self._determine_exit_type(name) == "active":
|
|
1441
|
+
return False
|
|
1442
|
+
return self._desired_states.get(name) == "running"
|
|
1443
|
+
|
|
1444
|
+
async def _send_stopped_event(self, name: str, exit_code: int):
|
|
1445
|
+
"""发送 module.stopped 事件(防重复)"""
|
|
1446
|
+
state = self._module_states.get(name, {})
|
|
1447
|
+
if state.get("stopped_sent"):
|
|
972
1448
|
return
|
|
973
1449
|
|
|
974
|
-
#
|
|
975
|
-
|
|
976
|
-
|
|
977
|
-
ack_data = {}
|
|
978
|
-
self._event_waiters[ack_key] = (ack_evt, ack_data)
|
|
1450
|
+
# 立即设置标记(防止竞态条件)
|
|
1451
|
+
if name in self._module_states:
|
|
1452
|
+
self._module_states[name]["stopped_sent"] = True
|
|
979
1453
|
|
|
980
|
-
|
|
981
|
-
|
|
982
|
-
|
|
983
|
-
self._event_waiters[ready_key] = (ready_evt, ready_data)
|
|
1454
|
+
exit_type = self._determine_exit_type(name)
|
|
1455
|
+
reason = self._resolve_reason(name)
|
|
1456
|
+
restart = self._resolve_restart(name)
|
|
984
1457
|
|
|
985
|
-
await self._publish_event("module.
|
|
986
|
-
"module_id": name,
|
|
1458
|
+
await self._publish_event("module.stopped", {
|
|
1459
|
+
"module_id": name,
|
|
1460
|
+
"exit_code": exit_code,
|
|
1461
|
+
"exit_type": exit_type,
|
|
1462
|
+
"reason": reason,
|
|
1463
|
+
"restart": restart,
|
|
1464
|
+
"ready_received": state.get("ready_received", False),
|
|
987
1465
|
})
|
|
988
1466
|
|
|
989
|
-
|
|
1467
|
+
# ── 优雅关闭 ──
|
|
1468
|
+
|
|
1469
|
+
async def _graceful_stop(self, name: str, reason: str = "stop_requested", timeout: float = 10):
|
|
1470
|
+
"""优雅关闭单个模块:
|
|
1471
|
+
1. 初始化状态跟踪
|
|
1472
|
+
2. 非优雅模块直接 SIGTERM
|
|
1473
|
+
3. 优雅模块:发送 shutdown → 等待 ack → 等待 exiting → 启动清理超时 → 杀死
|
|
1474
|
+
"""
|
|
990
1475
|
try:
|
|
991
|
-
|
|
992
|
-
ack = ack_data
|
|
993
|
-
except asyncio.TimeoutError:
|
|
994
|
-
ack = None
|
|
995
|
-
finally:
|
|
996
|
-
self._event_waiters.pop(ack_key, None)
|
|
1476
|
+
self._log_lifecycle("stopping", name, reason=reason)
|
|
997
1477
|
|
|
998
|
-
|
|
999
|
-
self.
|
|
1000
|
-
self.
|
|
1001
|
-
|
|
1478
|
+
# 初始化状态
|
|
1479
|
+
self._init_module_state(name)
|
|
1480
|
+
state = self._module_states[name]
|
|
1481
|
+
|
|
1482
|
+
# 非优雅模块:直接 SIGTERM
|
|
1483
|
+
if not self._graceful_modules.get(name):
|
|
1484
|
+
state["shutdown_sent"] = True # 标记:Launcher 主动关闭
|
|
1485
|
+
state["stopped_sent"] = True # 防重复标记
|
|
1486
|
+
state["reason"] = reason
|
|
1487
|
+
state["restart"] = self._desired_states.get(name) == "running"
|
|
1488
|
+
|
|
1489
|
+
self.process_manager.stop_module(name, timeout=SHUTDOWN_TIMEOUT_NON_GRACEFUL)
|
|
1490
|
+
|
|
1491
|
+
# 发送 stopped 事件
|
|
1492
|
+
await self._publish_event("module.stopped", {
|
|
1493
|
+
"module_id": name,
|
|
1494
|
+
"exit_code": 0,
|
|
1495
|
+
"exit_type": "non_graceful",
|
|
1496
|
+
"reason": reason,
|
|
1497
|
+
"restart": state["restart"],
|
|
1498
|
+
"ready_received": False,
|
|
1499
|
+
})
|
|
1500
|
+
|
|
1501
|
+
self._log_lifecycle("stopped", name, reason=reason)
|
|
1502
|
+
return
|
|
1503
|
+
|
|
1504
|
+
# 优雅模块:提前注册所有三个 waiter(ack、exiting、ready)
|
|
1505
|
+
# 这样可以避免事件到达时 waiter 还没注册的竞争条件
|
|
1506
|
+
ack_key = f"module.shutdown.ack:{name}"
|
|
1507
|
+
ack_evt = asyncio.Event()
|
|
1508
|
+
ack_data = {}
|
|
1509
|
+
self._event_waiters[ack_key] = (ack_evt, ack_data)
|
|
1510
|
+
|
|
1511
|
+
exiting_key = f"module.exiting:{name}"
|
|
1512
|
+
exiting_evt = asyncio.Event()
|
|
1513
|
+
exiting_data = {}
|
|
1514
|
+
self._event_waiters[exiting_key] = (exiting_evt, exiting_data)
|
|
1515
|
+
|
|
1516
|
+
ready_key = f"module.shutdown.ready:{name}"
|
|
1517
|
+
ready_evt = asyncio.Event()
|
|
1518
|
+
ready_data = {}
|
|
1519
|
+
self._event_waiters[ready_key] = (ready_evt, ready_data)
|
|
1520
|
+
|
|
1521
|
+
# 发送 shutdown 事件
|
|
1522
|
+
state["shutdown_sent"] = True
|
|
1523
|
+
state["reason"] = reason
|
|
1524
|
+
state["restart"] = self._desired_states.get(name) == "running"
|
|
1525
|
+
|
|
1526
|
+
await self._publish_event("module.shutdown", {
|
|
1002
1527
|
"module_id": name,
|
|
1003
|
-
"
|
|
1528
|
+
"reason": reason,
|
|
1529
|
+
"timeout": timeout,
|
|
1530
|
+
"restart": state["restart"],
|
|
1004
1531
|
})
|
|
1005
|
-
return
|
|
1006
1532
|
|
|
1007
|
-
|
|
1533
|
+
# 等待 ack
|
|
1534
|
+
try:
|
|
1535
|
+
await asyncio.wait_for(ack_evt.wait(), timeout=SHUTDOWN_TIMEOUT_ACK)
|
|
1536
|
+
state["ack_received"] = True
|
|
1537
|
+
except asyncio.TimeoutError:
|
|
1538
|
+
pass
|
|
1539
|
+
finally:
|
|
1540
|
+
self._event_waiters.pop(ack_key, None)
|
|
1008
1541
|
|
|
1009
|
-
|
|
1010
|
-
|
|
1011
|
-
|
|
1012
|
-
|
|
1013
|
-
|
|
1014
|
-
|
|
1015
|
-
finally:
|
|
1016
|
-
self._event_waiters.pop(ready_key, None)
|
|
1017
|
-
if ready:
|
|
1018
|
-
self.process_manager.stop_module(name, timeout=SHUTDOWN_TIMEOUT_READY)
|
|
1019
|
-
else:
|
|
1020
|
-
self.process_manager.stop_module(name, timeout=SHUTDOWN_TIMEOUT_PARTIAL)
|
|
1542
|
+
if not state["ack_received"]:
|
|
1543
|
+
# 没有 ack,直接杀死
|
|
1544
|
+
self._event_waiters.pop(exiting_key, None)
|
|
1545
|
+
self._event_waiters.pop(ready_key, None)
|
|
1546
|
+
state["stopped_sent"] = True
|
|
1547
|
+
self._kill_process(name)
|
|
1021
1548
|
|
|
1022
|
-
|
|
1023
|
-
|
|
1024
|
-
|
|
1025
|
-
|
|
1026
|
-
|
|
1549
|
+
# 发送 stopped 事件
|
|
1550
|
+
await self._publish_event("module.stopped", {
|
|
1551
|
+
"module_id": name,
|
|
1552
|
+
"exit_code": -1, # 未收到 ack,退出码未知
|
|
1553
|
+
"exit_type": "timeout",
|
|
1554
|
+
"reason": state["reason"],
|
|
1555
|
+
"restart": state["restart"],
|
|
1556
|
+
"ready_received": False,
|
|
1557
|
+
})
|
|
1558
|
+
|
|
1559
|
+
self._log_lifecycle("stopped", name, reason=reason)
|
|
1560
|
+
return
|
|
1561
|
+
|
|
1562
|
+
# 等待 exiting 事件
|
|
1563
|
+
try:
|
|
1564
|
+
await asyncio.wait_for(exiting_evt.wait(), timeout=SHUTDOWN_TIMEOUT_EXITING)
|
|
1565
|
+
state["exiting_received"] = True
|
|
1566
|
+
# 从 exiting 事件中提取信息
|
|
1567
|
+
if exiting_data.get("reason"):
|
|
1568
|
+
state["reason"] = exiting_data["reason"]
|
|
1569
|
+
if "restart" in exiting_data:
|
|
1570
|
+
state["restart"] = exiting_data["restart"]
|
|
1571
|
+
cleanup_timeout = exiting_data.get("cleanup_timeout", CLEANUP_TIMEOUT_DEFAULT)
|
|
1572
|
+
cleanup_timeout = max(CLEANUP_TIMEOUT_MIN, min(cleanup_timeout, CLEANUP_TIMEOUT_MAX))
|
|
1573
|
+
state["cleanup_timeout"] = cleanup_timeout
|
|
1574
|
+
except asyncio.TimeoutError:
|
|
1575
|
+
pass
|
|
1576
|
+
finally:
|
|
1577
|
+
self._event_waiters.pop(exiting_key, None)
|
|
1578
|
+
|
|
1579
|
+
if not state["exiting_received"]:
|
|
1580
|
+
# 没有 exiting,直接杀死
|
|
1581
|
+
self._event_waiters.pop(ready_key, None)
|
|
1582
|
+
state["stopped_sent"] = True
|
|
1583
|
+
self._kill_process(name)
|
|
1584
|
+
|
|
1585
|
+
# 发送 stopped 事件
|
|
1586
|
+
await self._publish_event("module.stopped", {
|
|
1587
|
+
"module_id": name,
|
|
1588
|
+
"exit_code": -1, # 未收到 exiting,退出码未知
|
|
1589
|
+
"exit_type": "timeout",
|
|
1590
|
+
"reason": state["reason"],
|
|
1591
|
+
"restart": state["restart"],
|
|
1592
|
+
"ready_received": False,
|
|
1593
|
+
})
|
|
1594
|
+
|
|
1595
|
+
self._log_lifecycle("stopped", name, reason=state["reason"])
|
|
1596
|
+
return
|
|
1597
|
+
|
|
1598
|
+
# ready waiter 已经在前面注册好了,直接启动清理超时任务
|
|
1599
|
+
# 启动清理超时任务(兜底机制)
|
|
1600
|
+
async def cleanup_timeout_handler():
|
|
1601
|
+
await asyncio.sleep(state["cleanup_timeout"])
|
|
1602
|
+
if not state.get("stopped_sent"):
|
|
1603
|
+
print(f"[launcher] {name} 清理超时 ({state['cleanup_timeout']}s),强制终止")
|
|
1604
|
+
state["stopped_sent"] = True
|
|
1605
|
+
self._kill_process(name)
|
|
1606
|
+
|
|
1607
|
+
# 发送 stopped 事件
|
|
1608
|
+
await self._publish_event("module.stopped", {
|
|
1609
|
+
"module_id": name,
|
|
1610
|
+
"exit_code": -1, # 清理超时,退出码未知
|
|
1611
|
+
"exit_type": "timeout",
|
|
1612
|
+
"reason": state["reason"],
|
|
1613
|
+
"restart": state["restart"],
|
|
1614
|
+
"ready_received": False,
|
|
1615
|
+
})
|
|
1616
|
+
|
|
1617
|
+
self._log_lifecycle("stopped", name, reason=state["reason"])
|
|
1618
|
+
|
|
1619
|
+
state["cleanup_task"] = asyncio.create_task(cleanup_timeout_handler())
|
|
1620
|
+
|
|
1621
|
+
# 等待 ready 事件(主路径)
|
|
1622
|
+
try:
|
|
1623
|
+
await asyncio.wait_for(ready_evt.wait(), timeout=state["cleanup_timeout"])
|
|
1624
|
+
state["ready_received"] = True
|
|
1625
|
+
print(f"[launcher] {name} 清理完成,准备退出")
|
|
1626
|
+
except asyncio.TimeoutError:
|
|
1627
|
+
# 超时由 cleanup_timeout_handler 处理
|
|
1628
|
+
pass
|
|
1629
|
+
finally:
|
|
1630
|
+
self._event_waiters.pop(ready_key, None)
|
|
1631
|
+
|
|
1632
|
+
# 取消清理超时任务(如果 ready 先到达)
|
|
1633
|
+
if state.get("ready_received") and state["cleanup_task"] and not state["cleanup_task"].done():
|
|
1634
|
+
state["cleanup_task"].cancel()
|
|
1635
|
+
|
|
1636
|
+
# 如果收到 ready,立即杀死进程
|
|
1637
|
+
if state.get("ready_received") and not state.get("stopped_sent"):
|
|
1638
|
+
state["stopped_sent"] = True
|
|
1639
|
+
self._kill_process(name)
|
|
1640
|
+
|
|
1641
|
+
# 发送 stopped 事件
|
|
1642
|
+
await self._publish_event("module.stopped", {
|
|
1643
|
+
"module_id": name,
|
|
1644
|
+
"exit_code": 0, # 正常退出
|
|
1645
|
+
"exit_type": "graceful",
|
|
1646
|
+
"reason": state["reason"],
|
|
1647
|
+
"restart": state["restart"],
|
|
1648
|
+
"ready_received": True,
|
|
1649
|
+
})
|
|
1650
|
+
|
|
1651
|
+
self._log_lifecycle("stopped", name, reason=state["reason"])
|
|
1652
|
+
|
|
1653
|
+
except Exception as e:
|
|
1654
|
+
# 优雅关闭出错,强制终止进程
|
|
1655
|
+
print(f"[launcher] 优雅关闭出错: {e}")
|
|
1656
|
+
if not state.get("stopped_sent"):
|
|
1657
|
+
state["stopped_sent"] = True
|
|
1658
|
+
self._kill_process(name)
|
|
1659
|
+
# 清理所有 waiters
|
|
1660
|
+
self._event_waiters.pop(f"module.shutdown.ack:{name}", None)
|
|
1661
|
+
self._event_waiters.pop(f"module.exiting:{name}", None)
|
|
1662
|
+
self._event_waiters.pop(f"module.shutdown.ready:{name}", None)
|
|
1027
1663
|
|
|
1028
1664
|
async def _graceful_shutdown_all(self):
|
|
1029
|
-
"""
|
|
1030
|
-
|
|
1031
|
-
|
|
1032
|
-
|
|
1033
|
-
|
|
1034
|
-
5. Shut down Kernel last (keeps event routing alive throughout)
|
|
1665
|
+
"""全量优雅退出:三阶段关闭
|
|
1666
|
+
|
|
1667
|
+
Phase 1: 先关闭 Watchdog(防止它监控到其他模块退出后触发重启)
|
|
1668
|
+
Phase 2: 关闭其他所有模块(除 Kernel)
|
|
1669
|
+
Phase 3: 最后关闭 Kernel(保证事件路由畅通)
|
|
1035
1670
|
"""
|
|
1036
1671
|
self._system_shutting_down = True
|
|
1037
1672
|
|
|
1038
|
-
#
|
|
1673
|
+
# 发送 Launcher 自己的 exiting 事件
|
|
1039
1674
|
await self._publish_event("module.exiting", {
|
|
1040
1675
|
"module_id": "launcher",
|
|
1676
|
+
"type": "active",
|
|
1041
1677
|
"reason": "system_shutdown",
|
|
1042
1678
|
"action": "none",
|
|
1679
|
+
"timeout": 0,
|
|
1043
1680
|
})
|
|
1044
1681
|
|
|
1045
1682
|
running = [n for n in self.modules if self.process_manager.is_running(n)]
|
|
@@ -1047,88 +1684,144 @@ class Launcher:
|
|
|
1047
1684
|
for cn in CORE_MODULE_NAMES:
|
|
1048
1685
|
if self.process_manager.is_running(cn) and cn not in running:
|
|
1049
1686
|
running.append(cn)
|
|
1687
|
+
|
|
1050
1688
|
if not running:
|
|
1051
1689
|
print("[launcher] 没有运行中的模块需要关闭")
|
|
1052
1690
|
return
|
|
1053
1691
|
|
|
1054
|
-
|
|
1055
|
-
|
|
1692
|
+
# 分组:Watchdog、Kernel、其他模块
|
|
1693
|
+
watchdog_running = WATCHDOG_MODULE_NAME in running
|
|
1694
|
+
kernel_running = "kernel" in running
|
|
1695
|
+
other_modules = [n for n in running if n not in (WATCHDOG_MODULE_NAME, "kernel")]
|
|
1696
|
+
|
|
1697
|
+
graceful_others = [n for n in other_modules if self._graceful_modules.get(n)]
|
|
1698
|
+
non_graceful_others = [n for n in other_modules if not self._graceful_modules.get(n)]
|
|
1699
|
+
|
|
1700
|
+
print(f"[launcher] 正在关闭 {len(running)} 个模块(三阶段)")
|
|
1701
|
+
|
|
1702
|
+
# ═══════════════════════════════════════════════════════════
|
|
1703
|
+
# Phase 1: 先关闭 Watchdog(防止重启其他模块)
|
|
1704
|
+
# ═══════════════════════════════════════════════════════════
|
|
1705
|
+
if watchdog_running and self.process_manager.is_running(WATCHDOG_MODULE_NAME):
|
|
1706
|
+
print(f"[launcher] Phase 1: 通知 Watchdog 退出(防止重启其他模块)")
|
|
1707
|
+
|
|
1708
|
+
if self._graceful_modules.get(WATCHDOG_MODULE_NAME):
|
|
1709
|
+
# Watchdog 支持优雅退出
|
|
1710
|
+
self._init_module_state(WATCHDOG_MODULE_NAME)
|
|
1711
|
+
state = self._module_states[WATCHDOG_MODULE_NAME]
|
|
1712
|
+
state["shutdown_sent"] = True
|
|
1713
|
+
state["reason"] = "system_shutdown"
|
|
1714
|
+
state["restart"] = False
|
|
1715
|
+
self._log_lifecycle("stopping", WATCHDOG_MODULE_NAME, reason="system_shutdown")
|
|
1716
|
+
|
|
1717
|
+
await self._publish_event("module.shutdown", {
|
|
1718
|
+
"module_id": WATCHDOG_MODULE_NAME,
|
|
1719
|
+
"reason": "system_shutdown",
|
|
1720
|
+
"timeout": 5,
|
|
1721
|
+
"restart": False,
|
|
1722
|
+
})
|
|
1723
|
+
|
|
1724
|
+
# 等待 0.2 秒确保事件送达(不需要等待进程退出)
|
|
1725
|
+
await asyncio.sleep(0.2)
|
|
1726
|
+
print(f"[launcher] Watchdog shutdown 事件已发送")
|
|
1727
|
+
else:
|
|
1728
|
+
# 直接终止
|
|
1729
|
+
self._init_module_state(WATCHDOG_MODULE_NAME)
|
|
1730
|
+
state = self._module_states[WATCHDOG_MODULE_NAME]
|
|
1731
|
+
state["shutdown_sent"] = True
|
|
1732
|
+
state["stopped_sent"] = True
|
|
1733
|
+
state["reason"] = "system_shutdown"
|
|
1734
|
+
state["restart"] = False
|
|
1735
|
+
self._log_lifecycle("stopping", WATCHDOG_MODULE_NAME, reason="system_shutdown")
|
|
1056
1736
|
|
|
1057
|
-
|
|
1058
|
-
kernel_deferred = "kernel" in graceful
|
|
1059
|
-
graceful_batch = [n for n in graceful if n != "kernel"] if kernel_deferred else graceful
|
|
1737
|
+
self.process_manager.stop_module(WATCHDOG_MODULE_NAME, timeout=SHUTDOWN_TIMEOUT_NON_GRACEFUL)
|
|
1060
1738
|
|
|
1061
|
-
|
|
1739
|
+
await self._publish_event("module.stopped", {
|
|
1740
|
+
"module_id": WATCHDOG_MODULE_NAME,
|
|
1741
|
+
"exit_code": 0,
|
|
1742
|
+
"exit_type": "non_graceful",
|
|
1743
|
+
"reason": "system_shutdown",
|
|
1744
|
+
"restart": False,
|
|
1745
|
+
"ready_received": False,
|
|
1746
|
+
})
|
|
1062
1747
|
|
|
1063
|
-
|
|
1064
|
-
|
|
1748
|
+
self._log_lifecycle("stopped", WATCHDOG_MODULE_NAME, reason="system_shutdown")
|
|
1749
|
+
|
|
1750
|
+
# ═══════════════════════════════════════════════════════════
|
|
1751
|
+
# Phase 2: 关闭其他所有模块(除 Kernel)
|
|
1752
|
+
# ═══════════════════════════════════════════════════════════
|
|
1753
|
+
if graceful_others or non_graceful_others:
|
|
1754
|
+
print(f"[launcher] Phase 2: 关闭其他模块({len(graceful_others)} 优雅 + {len(non_graceful_others)} 非优雅)")
|
|
1755
|
+
|
|
1756
|
+
# 通知优雅模块
|
|
1757
|
+
for name in graceful_others:
|
|
1758
|
+
self._init_module_state(name)
|
|
1759
|
+
state = self._module_states[name]
|
|
1760
|
+
state["shutdown_sent"] = True
|
|
1761
|
+
state["reason"] = "system_shutdown"
|
|
1762
|
+
state["restart"] = False
|
|
1065
1763
|
self._log_lifecycle("stopping", name, reason="system_shutdown")
|
|
1066
1764
|
await self._publish_event("module.shutdown", {
|
|
1067
|
-
"module_id": name,
|
|
1765
|
+
"module_id": name,
|
|
1766
|
+
"reason": "system_shutdown",
|
|
1767
|
+
"timeout": 5,
|
|
1768
|
+
"restart": False,
|
|
1068
1769
|
})
|
|
1069
1770
|
|
|
1070
|
-
#
|
|
1071
|
-
|
|
1072
|
-
|
|
1073
|
-
|
|
1771
|
+
# 终止非优雅模块
|
|
1772
|
+
for name in non_graceful_others:
|
|
1773
|
+
self._init_module_state(name)
|
|
1774
|
+
state = self._module_states[name]
|
|
1775
|
+
state["shutdown_sent"] = True
|
|
1776
|
+
state["stopped_sent"] = True
|
|
1777
|
+
state["reason"] = "system_shutdown"
|
|
1778
|
+
state["restart"] = False
|
|
1074
1779
|
self._log_lifecycle("stopping", name, reason="system_shutdown")
|
|
1075
|
-
|
|
1780
|
+
|
|
1781
|
+
self.process_manager.stop_module(name, timeout=SHUTDOWN_TIMEOUT_NON_GRACEFUL)
|
|
1782
|
+
|
|
1783
|
+
await self._publish_event("module.stopped", {
|
|
1784
|
+
"module_id": name,
|
|
1785
|
+
"exit_code": 0,
|
|
1786
|
+
"exit_type": "non_graceful",
|
|
1787
|
+
"reason": "system_shutdown",
|
|
1788
|
+
"restart": False,
|
|
1789
|
+
"ready_received": False,
|
|
1790
|
+
})
|
|
1791
|
+
|
|
1076
1792
|
self._log_lifecycle("stopped", name, reason="system_shutdown")
|
|
1077
1793
|
|
|
1078
|
-
#
|
|
1079
|
-
if
|
|
1794
|
+
# 等待优雅模块退出(包括 Watchdog)
|
|
1795
|
+
all_graceful = graceful_others + ([WATCHDOG_MODULE_NAME] if watchdog_running and self._graceful_modules.get(WATCHDOG_MODULE_NAME) else [])
|
|
1796
|
+
if all_graceful:
|
|
1080
1797
|
deadline = time.time() + 5
|
|
1081
1798
|
while time.time() < deadline:
|
|
1082
|
-
still_running = [n for n in
|
|
1799
|
+
still_running = [n for n in all_graceful if self.process_manager.is_running(n)]
|
|
1083
1800
|
if not still_running:
|
|
1084
|
-
print("[launcher]
|
|
1801
|
+
print("[launcher] 所有其他模块已退出")
|
|
1085
1802
|
break
|
|
1086
1803
|
remaining = max(0, deadline - time.time())
|
|
1087
1804
|
print(f"[launcher] 等待 {len(still_running)} 个模块退出 ({remaining:.0f}s): {', '.join(still_running)}")
|
|
1088
1805
|
await asyncio.sleep(1)
|
|
1089
|
-
|
|
1090
|
-
|
|
1806
|
+
|
|
1807
|
+
# 强杀未退出的
|
|
1808
|
+
for name in all_graceful:
|
|
1091
1809
|
if self.process_manager.is_running(name):
|
|
1092
|
-
|
|
1093
|
-
self.
|
|
1810
|
+
print(f"[launcher] {name} 超时,强制终止")
|
|
1811
|
+
self.process_manager.stop_module(name, timeout=SHUTDOWN_TIMEOUT_NON_GRACEFUL)
|
|
1812
|
+
self._log_lifecycle("stopped", name, reason="system_shutdown_timeout")
|
|
1094
1813
|
|
|
1095
|
-
#
|
|
1096
|
-
|
|
1097
|
-
|
|
1098
|
-
|
|
1814
|
+
# ═══════════════════════════════════════════════════════════
|
|
1815
|
+
# Phase 3: 最后关闭 Kernel(使用标准优雅退出流程)
|
|
1816
|
+
# ═══════════════════════════════════════════════════════════
|
|
1817
|
+
if kernel_running and self.process_manager.is_running("kernel"):
|
|
1818
|
+
print("[launcher] Phase 3: 关闭 Kernel(所有其他模块已退出)")
|
|
1099
1819
|
|
|
1100
|
-
#
|
|
1101
|
-
|
|
1102
|
-
try:
|
|
1103
|
-
if self._ws:
|
|
1104
|
-
await self._rpc_call(self._ws, "kernel.shutdown", {})
|
|
1105
|
-
print("[launcher] Kernel shutdown RPC 已发送")
|
|
1106
|
-
rpc_sent = True
|
|
1107
|
-
else:
|
|
1108
|
-
print("[launcher] WebSocket 未连接,跳过 RPC 调用")
|
|
1109
|
-
except Exception as e:
|
|
1110
|
-
print(f"[launcher] Kernel shutdown RPC 失败: {e}")
|
|
1111
|
-
|
|
1112
|
-
# Wait for kernel to exit
|
|
1113
|
-
if rpc_sent:
|
|
1114
|
-
# RPC sent: wait up to 5s for graceful exit
|
|
1115
|
-
proc = self.process_manager._processes.get("kernel")
|
|
1116
|
-
if proc:
|
|
1117
|
-
try:
|
|
1118
|
-
loop = asyncio.get_event_loop()
|
|
1119
|
-
await asyncio.wait_for(
|
|
1120
|
-
loop.run_in_executor(None, proc.wait),
|
|
1121
|
-
timeout=5
|
|
1122
|
-
)
|
|
1123
|
-
print("[launcher] Kernel 已退出")
|
|
1124
|
-
except asyncio.TimeoutError:
|
|
1125
|
-
print("[launcher] Kernel 5秒内未退出,强制停止")
|
|
1126
|
-
self.process_manager.stop_module("kernel", timeout=SHUTDOWN_TIMEOUT_PARTIAL)
|
|
1127
|
-
else:
|
|
1128
|
-
# No RPC (WS not connected): use shorter timeout for terminate
|
|
1129
|
-
self.process_manager.stop_module("kernel", timeout=2)
|
|
1820
|
+
# 明确标记不重启
|
|
1821
|
+
self._desired_states["kernel"] = "stopped"
|
|
1130
1822
|
|
|
1131
|
-
|
|
1823
|
+
# 使用标准优雅退出流程(内含等待 ack → exiting → ready → kill 完整逻辑)
|
|
1824
|
+
await self._graceful_stop("kernel", reason="system_shutdown", timeout=5)
|
|
1132
1825
|
|
|
1133
1826
|
# Final safety net
|
|
1134
1827
|
try:
|
|
@@ -1247,8 +1940,11 @@ class Launcher:
|
|
|
1247
1940
|
print(f"[launcher] 模块 '{info.name}' 主动退出: {reason} ({elapsed:.2f}s)")
|
|
1248
1941
|
elif ready:
|
|
1249
1942
|
self._graceful_modules[info.name] = bool(ready.get("graceful_shutdown"))
|
|
1250
|
-
|
|
1251
|
-
|
|
1943
|
+
# Use startup_time from module.ready event (module's self-reported startup time)
|
|
1944
|
+
startup_time = ready.get("startup_time", elapsed)
|
|
1945
|
+
self._ready_times[info.name] = startup_time
|
|
1946
|
+
startup_str = f"{startup_time:.3f}s" if startup_time < 10 else f"{startup_time:.2f}s"
|
|
1947
|
+
print(f"[launcher] 模块 '{info.name}' 已就绪 ({startup_str})")
|
|
1252
1948
|
else:
|
|
1253
1949
|
print(f"\033[91m[launcher] 警告: '{info.name}' 在 {timeout}s 内未发送 module.ready\033[0m")
|
|
1254
1950
|
|
|
@@ -1282,7 +1978,7 @@ class Launcher:
|
|
|
1282
1978
|
# Call Kernel RPC to generate tokens
|
|
1283
1979
|
try:
|
|
1284
1980
|
result = await self._rpc_call(self._ws, "kernel.generate_tokens", {"modules": module_names})
|
|
1285
|
-
if
|
|
1981
|
+
if "result" in result:
|
|
1286
1982
|
tokens = result["result"].get("tokens", {})
|
|
1287
1983
|
self._module_tokens.update(tokens)
|
|
1288
1984
|
print(f"[launcher] Kernel 已生成 {len(tokens)} 个模块令牌")
|
|
@@ -1297,7 +1993,7 @@ class Launcher:
|
|
|
1297
1993
|
return
|
|
1298
1994
|
try:
|
|
1299
1995
|
result = await self._rpc_call(self._ws, "kernel.register_tokens", tokens)
|
|
1300
|
-
if
|
|
1996
|
+
if "result" in result:
|
|
1301
1997
|
print(f"[launcher] 已注册 {len(tokens)} 个模块令牌")
|
|
1302
1998
|
elif "error" in result:
|
|
1303
1999
|
print(f"[launcher] 警告: 令牌注册失败: {result['error'].get('message', '')}")
|
|
@@ -1381,10 +2077,19 @@ class Launcher:
|
|
|
1381
2077
|
if rc != 0:
|
|
1382
2078
|
self._print_module_crash_summary(name)
|
|
1383
2079
|
self._log_lifecycle("exited", name, exit_code=rc)
|
|
1384
|
-
|
|
1385
|
-
|
|
1386
|
-
|
|
1387
|
-
|
|
2080
|
+
|
|
2081
|
+
# 检查是否已发送 stopped 事件
|
|
2082
|
+
state = self._module_states.get(name, {})
|
|
2083
|
+
if not state.get("stopped_sent"):
|
|
2084
|
+
# 取消清理超时任务(如果有)
|
|
2085
|
+
if state.get("cleanup_task"):
|
|
2086
|
+
state["cleanup_task"].cancel()
|
|
2087
|
+
# 发送 stopped 事件
|
|
2088
|
+
await self._send_stopped_event(name, rc)
|
|
2089
|
+
|
|
2090
|
+
# 无论是否发送,都清理状态(防止内存泄漏)
|
|
2091
|
+
self._module_states.pop(name, None)
|
|
2092
|
+
|
|
1388
2093
|
info = self.modules.get(name)
|
|
1389
2094
|
|
|
1390
2095
|
# 1) Core module crash → full restart
|
|
@@ -1524,9 +2229,6 @@ class Launcher:
|
|
|
1524
2229
|
started_at=self._start_unix,
|
|
1525
2230
|
)
|
|
1526
2231
|
running.append(("launcher", launcher_info, launcher_rec))
|
|
1527
|
-
# Launcher is ready immediately (ready_time = 0)
|
|
1528
|
-
if "launcher" not in self._ready_times:
|
|
1529
|
-
self._ready_times["launcher"] = 0.0
|
|
1530
2232
|
|
|
1531
2233
|
for name, info in self.modules.items():
|
|
1532
2234
|
rec = self.process_manager.get_record(name)
|
|
@@ -1597,7 +2299,16 @@ class Launcher:
|
|
|
1597
2299
|
for name, info, rec in running_sorted:
|
|
1598
2300
|
label = info.display_name or name
|
|
1599
2301
|
ready_t = self._ready_times.get(name)
|
|
1600
|
-
|
|
2302
|
+
# Format startup time with ms/s auto-switch
|
|
2303
|
+
if ready_t is not None:
|
|
2304
|
+
if ready_t < 1:
|
|
2305
|
+
time_str = f"{ready_t * 1000:.0f}ms"
|
|
2306
|
+
elif ready_t < 10:
|
|
2307
|
+
time_str = f"{ready_t:.2f}s"
|
|
2308
|
+
else:
|
|
2309
|
+
time_str = f"{ready_t:.1f}s"
|
|
2310
|
+
else:
|
|
2311
|
+
time_str = "—"
|
|
1601
2312
|
|
|
1602
2313
|
# Calculate elapsed from start
|
|
1603
2314
|
if ready_t is not None and hasattr(self, '_start_unix'):
|
|
@@ -1669,18 +2380,53 @@ class Launcher:
|
|
|
1669
2380
|
|
|
1670
2381
|
lines.append(f"{G} Kernel WS: ws://127.0.0.1:{self.kernel_port}/ws 实例: {self.instance_id}{R}")
|
|
1671
2382
|
|
|
1672
|
-
#
|
|
1673
|
-
|
|
1674
|
-
|
|
1675
|
-
|
|
1676
|
-
|
|
1677
|
-
|
|
1678
|
-
|
|
1679
|
-
|
|
1680
|
-
|
|
1681
|
-
|
|
1682
|
-
|
|
1683
|
-
|
|
2383
|
+
# Check if web or evol modules are running and display their URLs
|
|
2384
|
+
RED = "\033[91m"
|
|
2385
|
+
running_names = {name for name, _, _ in running}
|
|
2386
|
+
|
|
2387
|
+
# Check web module
|
|
2388
|
+
if "web" in running_names:
|
|
2389
|
+
web_url = ""
|
|
2390
|
+
web_error = ""
|
|
2391
|
+
if self._ws:
|
|
2392
|
+
try:
|
|
2393
|
+
resp = await self._rpc_call(self._ws, "registry.get", {"path": "web.api_endpoint"}, timeout=3)
|
|
2394
|
+
val = resp.get("result", {}).get("value")
|
|
2395
|
+
if val and isinstance(val, str):
|
|
2396
|
+
web_url = val.replace("://127.0.0.1:", "://localhost:")
|
|
2397
|
+
else:
|
|
2398
|
+
web_error = "未注册到 Kernel Registry"
|
|
2399
|
+
except Exception as e:
|
|
2400
|
+
web_error = f"查询失败: {str(e)}"
|
|
2401
|
+
else:
|
|
2402
|
+
web_error = "Kernel 连接不可用"
|
|
2403
|
+
|
|
2404
|
+
if web_url:
|
|
2405
|
+
lines.append(f"{B} Web 管理后台: {web_url}{R}")
|
|
2406
|
+
else:
|
|
2407
|
+
lines.append(f"{RED} Web 管理后台: {web_error}{R}")
|
|
2408
|
+
|
|
2409
|
+
# Check evol module
|
|
2410
|
+
if "evol" in running_names:
|
|
2411
|
+
evol_url = ""
|
|
2412
|
+
evol_error = ""
|
|
2413
|
+
if self._ws:
|
|
2414
|
+
try:
|
|
2415
|
+
resp = await self._rpc_call(self._ws, "registry.get", {"path": "evol.api_endpoint"}, timeout=3)
|
|
2416
|
+
val = resp.get("result", {}).get("value")
|
|
2417
|
+
if val and isinstance(val, str):
|
|
2418
|
+
evol_url = val.replace("://127.0.0.1:", "://localhost:")
|
|
2419
|
+
else:
|
|
2420
|
+
evol_error = "未注册到 Kernel Registry"
|
|
2421
|
+
except Exception as e:
|
|
2422
|
+
evol_error = f"查询失败: {str(e)}"
|
|
2423
|
+
else:
|
|
2424
|
+
evol_error = "Kernel 连接不可用"
|
|
2425
|
+
|
|
2426
|
+
if evol_url:
|
|
2427
|
+
lines.append(f"{B} Evol: {evol_url}{R}")
|
|
2428
|
+
else:
|
|
2429
|
+
lines.append(f"{RED} Evol: {evol_error}{R}")
|
|
1684
2430
|
|
|
1685
2431
|
# Instance info
|
|
1686
2432
|
instances = self.process_manager.get_alive_instances()
|
|
@@ -1691,7 +2437,18 @@ class Launcher:
|
|
|
1691
2437
|
debug_flag = " [DEBUG]" if os.environ.get("KITE_DEBUG") == "1" else ""
|
|
1692
2438
|
lines.append(f"{G} 当前实例: #{inst_num} 后缀: {suffix_display} PID: {os.getpid()}{debug_flag}{R}")
|
|
1693
2439
|
lines.append(f"{G} 实例目录: {inst_dir}{R}")
|
|
1694
|
-
|
|
2440
|
+
|
|
2441
|
+
# Check for abnormal working directory
|
|
2442
|
+
cwd_lower = cwd.lower()
|
|
2443
|
+
is_abnormal_cwd = (
|
|
2444
|
+
"windowsapps" in cwd_lower or
|
|
2445
|
+
"appdata\\local\\temp" in cwd_lower or
|
|
2446
|
+
not os.path.exists(os.path.join(cwd, "main.py"))
|
|
2447
|
+
)
|
|
2448
|
+
if is_abnormal_cwd:
|
|
2449
|
+
lines.append(f"\033[91m 工作目录: {cwd} ⚠️ 异常路径{R}")
|
|
2450
|
+
else:
|
|
2451
|
+
lines.append(f"{G} 工作目录: {cwd}{R}")
|
|
1695
2452
|
if len(instances) > 1:
|
|
1696
2453
|
lines.append(f"{G} 所有实例:{R}")
|
|
1697
2454
|
for i in instances:
|
|
@@ -1733,6 +2490,155 @@ class Launcher:
|
|
|
1733
2490
|
|
|
1734
2491
|
print("\n".join(lines))
|
|
1735
2492
|
|
|
2493
|
+
async def _rpc_request_client_token(self, params: dict) -> dict:
|
|
2494
|
+
"""为 relay 模块申请虚拟客户端 Token.
|
|
2495
|
+
|
|
2496
|
+
Args:
|
|
2497
|
+
params: {
|
|
2498
|
+
"module_id": str - 虚拟模块 ID (如 "web-client-a3f9e2")
|
|
2499
|
+
"_caller_id": str - 调用者模块 ID (由 Kernel 注入)
|
|
2500
|
+
}
|
|
2501
|
+
|
|
2502
|
+
Returns:
|
|
2503
|
+
{
|
|
2504
|
+
"token": str - Kernel Token (64 字符 hex)
|
|
2505
|
+
"module_id": str - 虚拟模块 ID
|
|
2506
|
+
}
|
|
2507
|
+
|
|
2508
|
+
Raises:
|
|
2509
|
+
PermissionError: 调用者不在白名单中
|
|
2510
|
+
ValueError: module_id 格式不合法或已存在
|
|
2511
|
+
RuntimeError: Token 限额已满
|
|
2512
|
+
"""
|
|
2513
|
+
import re
|
|
2514
|
+
|
|
2515
|
+
# 1. 获取调用者 ID
|
|
2516
|
+
caller_id = params.get("_caller_id")
|
|
2517
|
+
if not caller_id:
|
|
2518
|
+
raise PermissionError("Missing _caller_id in params")
|
|
2519
|
+
|
|
2520
|
+
# 2. 权限检查
|
|
2521
|
+
if caller_id not in self._relay_modules:
|
|
2522
|
+
print(f"[launcher] DEBUG: 权限检查失败 - caller_id={caller_id}, relay_modules={self._relay_modules}")
|
|
2523
|
+
raise PermissionError(f"Permission denied: {caller_id} not in relay_modules whitelist")
|
|
2524
|
+
|
|
2525
|
+
# 3. 参数验证
|
|
2526
|
+
module_id = params.get("module_id")
|
|
2527
|
+
if not module_id:
|
|
2528
|
+
raise ValueError("module_id is required")
|
|
2529
|
+
|
|
2530
|
+
# 4. 命名规范验证
|
|
2531
|
+
expected_prefix = f"{caller_id}-client-"
|
|
2532
|
+
if not module_id.startswith(expected_prefix):
|
|
2533
|
+
raise ValueError(f"module_id must start with {expected_prefix}")
|
|
2534
|
+
|
|
2535
|
+
suffix = module_id[len(expected_prefix):]
|
|
2536
|
+
if not re.match(r'^[a-zA-Z0-9_-]+$', suffix):
|
|
2537
|
+
raise ValueError("Invalid module_id suffix")
|
|
2538
|
+
|
|
2539
|
+
# 5. 检查是否已存在(幂等)
|
|
2540
|
+
if module_id in self._client_tokens:
|
|
2541
|
+
print(f"[launcher] Token already exists for {module_id}, returning existing token")
|
|
2542
|
+
return {
|
|
2543
|
+
"token": self._client_tokens[module_id],
|
|
2544
|
+
"module_id": module_id
|
|
2545
|
+
}
|
|
2546
|
+
|
|
2547
|
+
# 6. 检查限额
|
|
2548
|
+
limit = self._relay_token_limits.get(caller_id, 100)
|
|
2549
|
+
current_count = sum(1 for mid in self._client_tokens if mid.startswith(expected_prefix))
|
|
2550
|
+
if current_count >= limit:
|
|
2551
|
+
raise RuntimeError(f"Token limit reached: {current_count}/{limit}")
|
|
2552
|
+
|
|
2553
|
+
# 7. 生成 token
|
|
2554
|
+
token = secrets.token_hex(32)
|
|
2555
|
+
|
|
2556
|
+
# 8. 注册到 Kernel
|
|
2557
|
+
try:
|
|
2558
|
+
result = await self._rpc_call(self._ws, "kernel.register_tokens", {module_id: token})
|
|
2559
|
+
if "error" in result:
|
|
2560
|
+
raise RuntimeError(f"Failed to register token: {result['error'].get('message', '')}")
|
|
2561
|
+
except Exception as e:
|
|
2562
|
+
raise RuntimeError(f"Failed to register token to Kernel: {e}")
|
|
2563
|
+
|
|
2564
|
+
# 9. 保存到本地映射
|
|
2565
|
+
self._client_tokens[module_id] = token
|
|
2566
|
+
|
|
2567
|
+
# 10. 记录审计日志
|
|
2568
|
+
self._log_token_request(caller_id, module_id, "request", True)
|
|
2569
|
+
|
|
2570
|
+
print(f"[launcher] Token generated for {module_id} (caller: {caller_id})")
|
|
2571
|
+
|
|
2572
|
+
return {
|
|
2573
|
+
"token": token,
|
|
2574
|
+
"module_id": module_id
|
|
2575
|
+
}
|
|
2576
|
+
|
|
2577
|
+
async def _rpc_release_client_token(self, params: dict) -> dict:
|
|
2578
|
+
"""释放虚拟客户端 Token (可选).
|
|
2579
|
+
|
|
2580
|
+
Args:
|
|
2581
|
+
params: {
|
|
2582
|
+
"module_id": str - 虚拟模块 ID
|
|
2583
|
+
"_caller_id": str - 调用者模块 ID (由 Kernel 注入)
|
|
2584
|
+
}
|
|
2585
|
+
|
|
2586
|
+
Returns:
|
|
2587
|
+
{}
|
|
2588
|
+
|
|
2589
|
+
Raises:
|
|
2590
|
+
PermissionError: 调用者不在白名单中或 module_id 不属于调用者
|
|
2591
|
+
ValueError: module_id 格式不合法
|
|
2592
|
+
"""
|
|
2593
|
+
# 1. 获取调用者 ID
|
|
2594
|
+
caller_id = params.get("_caller_id")
|
|
2595
|
+
if not caller_id:
|
|
2596
|
+
raise PermissionError("Missing _caller_id in params")
|
|
2597
|
+
|
|
2598
|
+
# 2. 权限检查
|
|
2599
|
+
if caller_id not in self._relay_modules:
|
|
2600
|
+
print(f"[launcher] DEBUG: 权限检查失败 - caller_id={caller_id}, relay_modules={self._relay_modules}")
|
|
2601
|
+
raise PermissionError(f"Permission denied: {caller_id} not in relay_modules whitelist")
|
|
2602
|
+
|
|
2603
|
+
# 3. 参数验证
|
|
2604
|
+
module_id = params.get("module_id")
|
|
2605
|
+
if not module_id:
|
|
2606
|
+
raise ValueError("module_id is required")
|
|
2607
|
+
|
|
2608
|
+
# 4. 验证所有权
|
|
2609
|
+
expected_prefix = f"{caller_id}-client-"
|
|
2610
|
+
if not module_id.startswith(expected_prefix):
|
|
2611
|
+
raise ValueError(f"module_id does not belong to {caller_id}")
|
|
2612
|
+
|
|
2613
|
+
# 5. 删除 token
|
|
2614
|
+
if module_id in self._client_tokens:
|
|
2615
|
+
del self._client_tokens[module_id]
|
|
2616
|
+
print(f"[launcher] Token released for {module_id} (caller: {caller_id})")
|
|
2617
|
+
|
|
2618
|
+
# 6. 记录审计日志
|
|
2619
|
+
self._log_token_request(caller_id, module_id, "release", True)
|
|
2620
|
+
|
|
2621
|
+
return {}
|
|
2622
|
+
|
|
2623
|
+
def _log_token_request(self, caller_id: str, module_id: str, action: str, success: bool):
|
|
2624
|
+
"""记录 Token 申请/释放审计日志."""
|
|
2625
|
+
from datetime import datetime, timezone
|
|
2626
|
+
record = {
|
|
2627
|
+
"timestamp": datetime.now(timezone.utc).isoformat(),
|
|
2628
|
+
"caller_id": caller_id,
|
|
2629
|
+
"module_id": module_id,
|
|
2630
|
+
"action": action,
|
|
2631
|
+
"success": success
|
|
2632
|
+
}
|
|
2633
|
+
try:
|
|
2634
|
+
log_dir = os.path.join(os.environ.get("KITE_MODULE_DATA", ""), "log")
|
|
2635
|
+
os.makedirs(log_dir, exist_ok=True)
|
|
2636
|
+
log_file = os.path.join(log_dir, "token_requests.jsonl")
|
|
2637
|
+
with open(log_file, "a", encoding="utf-8") as f:
|
|
2638
|
+
f.write(json.dumps(record, ensure_ascii=False) + "\n")
|
|
2639
|
+
except Exception as e:
|
|
2640
|
+
print(f"[launcher] 警告: 写入 token 审计日志失败: {e}")
|
|
2641
|
+
|
|
1736
2642
|
# ── Utilities ──
|
|
1737
2643
|
|
|
1738
2644
|
def _load_discovery(self) -> dict | None:
|
|
@@ -1748,6 +2654,22 @@ class Launcher:
|
|
|
1748
2654
|
print(f"[launcher] 警告: 读取发现配置失败: {e}")
|
|
1749
2655
|
return None
|
|
1750
2656
|
|
|
2657
|
+
def _load_relay_config(self) -> dict:
|
|
2658
|
+
"""Read relay config from launcher's own module.md."""
|
|
2659
|
+
md_path = os.path.join(os.environ["KITE_PROJECT"], "launcher", "module.md")
|
|
2660
|
+
try:
|
|
2661
|
+
with open(md_path, "r", encoding="utf-8") as f:
|
|
2662
|
+
fm = _parse_frontmatter(f.read())
|
|
2663
|
+
relay = fm.get("relay")
|
|
2664
|
+
if isinstance(relay, dict) and relay:
|
|
2665
|
+
print(f"[launcher] Relay 配置已加载: modules={relay.get('modules')}, token_limits={relay.get('token_limits')}")
|
|
2666
|
+
return relay
|
|
2667
|
+
else:
|
|
2668
|
+
print(f"[launcher] 警告: relay 配置为空或格式错误")
|
|
2669
|
+
except Exception as e:
|
|
2670
|
+
print(f"[launcher] 警告: 读取 relay 配置失败: {e}")
|
|
2671
|
+
return {}
|
|
2672
|
+
|
|
1751
2673
|
def _log_lifecycle(self, event: str, module: str, **extra):
|
|
1752
2674
|
"""Append one JSONL line to lifecycle.jsonl."""
|
|
1753
2675
|
from datetime import datetime, timezone
|
|
@@ -1760,6 +2682,30 @@ class Launcher:
|
|
|
1760
2682
|
except Exception:
|
|
1761
2683
|
pass
|
|
1762
2684
|
|
|
2685
|
+
def _record_launcher_startup(self):
|
|
2686
|
+
"""Record launcher startup information to lifecycle.jsonl."""
|
|
2687
|
+
import sys
|
|
2688
|
+
from datetime import datetime, timezone
|
|
2689
|
+
|
|
2690
|
+
record = {
|
|
2691
|
+
"ts": datetime.now(timezone.utc).isoformat(),
|
|
2692
|
+
"event": "launcher_startup",
|
|
2693
|
+
"module": "launcher",
|
|
2694
|
+
"pid": os.getpid(),
|
|
2695
|
+
"cwd": os.getcwd(),
|
|
2696
|
+
"argv": sys.argv,
|
|
2697
|
+
"instance_dir": os.environ.get("KITE_INSTANCE_DIR", ""),
|
|
2698
|
+
"instance_suffix": self.process_manager.instance_suffix,
|
|
2699
|
+
"python": sys.executable,
|
|
2700
|
+
}
|
|
2701
|
+
|
|
2702
|
+
try:
|
|
2703
|
+
os.makedirs(os.path.dirname(self._lifecycle_log), exist_ok=True)
|
|
2704
|
+
with open(self._lifecycle_log, "a", encoding="utf-8") as f:
|
|
2705
|
+
f.write(json.dumps(record, ensure_ascii=False) + "\n")
|
|
2706
|
+
except Exception:
|
|
2707
|
+
pass
|
|
2708
|
+
|
|
1763
2709
|
|
|
1764
2710
|
|
|
1765
2711
|
def _update_module_md_state(module_dir: str, new_state: str):
|