npm - @agentunion/kite - Versions diffs - 1.0.7 → 1.2.0 - Mend

@agentunion/kite 1.0.7 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (77) hide show

package/core/event_hub/entry.py +305 -26
package/core/event_hub/hub.py +8 -0
package/core/event_hub/server.py +80 -17
package/core/kite_log.py +241 -0
package/core/launcher/entry.py +978 -284
package/core/launcher/process_manager.py +456 -46
package/core/registry/entry.py +272 -3
package/core/registry/server.py +339 -289
package/core/registry/store.py +10 -4
package/extensions/agents/__init__.py +1 -0
package/extensions/agents/assistant/__init__.py +1 -0
package/extensions/agents/assistant/entry.py +380 -0
package/extensions/agents/assistant/module.md +22 -0
package/extensions/agents/assistant/server.py +236 -0
package/extensions/channels/__init__.py +1 -0
package/extensions/channels/acp_channel/__init__.py +1 -0
package/extensions/channels/acp_channel/entry.py +380 -0
package/extensions/channels/acp_channel/module.md +22 -0
package/extensions/channels/acp_channel/server.py +236 -0
package/extensions/event_hub_bench/entry.py +664 -379
package/extensions/event_hub_bench/module.md +2 -1
package/extensions/services/backup/__init__.py +1 -0
package/extensions/services/backup/entry.py +380 -0
package/extensions/services/backup/module.md +22 -0
package/extensions/services/backup/server.py +244 -0
package/extensions/services/model_service/__init__.py +1 -0
package/extensions/services/model_service/entry.py +380 -0
package/extensions/services/model_service/module.md +22 -0
package/extensions/services/model_service/server.py +236 -0
package/extensions/services/watchdog/entry.py +460 -147
package/extensions/services/watchdog/module.md +3 -0
package/extensions/services/watchdog/monitor.py +128 -13
package/extensions/services/watchdog/server.py +75 -13
package/extensions/services/web/__init__.py +1 -0
package/extensions/services/web/config.yaml +149 -0
package/extensions/services/web/entry.py +487 -0
package/extensions/services/web/module.md +24 -0
package/extensions/services/web/routes/__init__.py +1 -0
package/extensions/services/web/routes/routes_call.py +189 -0
package/extensions/services/web/routes/routes_config.py +512 -0
package/extensions/services/web/routes/routes_contacts.py +98 -0
package/extensions/services/web/routes/routes_devlog.py +99 -0
package/extensions/services/web/routes/routes_phone.py +81 -0
package/extensions/services/web/routes/routes_sms.py +48 -0
package/extensions/services/web/routes/routes_stats.py +17 -0
package/extensions/services/web/routes/routes_voicechat.py +554 -0
package/extensions/services/web/routes/schemas.py +216 -0
package/extensions/services/web/server.py +332 -0
package/extensions/services/web/static/css/style.css +1064 -0
package/extensions/services/web/static/index.html +1445 -0
package/extensions/services/web/static/js/app.js +4671 -0
package/extensions/services/web/vendor/__init__.py +1 -0
package/extensions/services/web/vendor/bluetooth/__init__.py +0 -0
package/extensions/services/web/vendor/bluetooth/audio.py +348 -0
package/extensions/services/web/vendor/bluetooth/contacts.py +251 -0
package/extensions/services/web/vendor/bluetooth/manager.py +395 -0
package/extensions/services/web/vendor/bluetooth/sms.py +290 -0
package/extensions/services/web/vendor/bluetooth/telephony.py +274 -0
package/extensions/services/web/vendor/config.py +139 -0
package/extensions/services/web/vendor/conversation/__init__.py +0 -0
package/extensions/services/web/vendor/conversation/asr.py +936 -0
package/extensions/services/web/vendor/conversation/engine.py +548 -0
package/extensions/services/web/vendor/conversation/llm.py +534 -0
package/extensions/services/web/vendor/conversation/mcp_tools.py +190 -0
package/extensions/services/web/vendor/conversation/tts.py +322 -0
package/extensions/services/web/vendor/conversation/vad.py +138 -0
package/extensions/services/web/vendor/storage/__init__.py +1 -0
package/extensions/services/web/vendor/storage/identity.py +312 -0
package/extensions/services/web/vendor/storage/store.py +507 -0
package/extensions/services/web/vendor/task/__init__.py +0 -0
package/extensions/services/web/vendor/task/manager.py +864 -0
package/extensions/services/web/vendor/task/models.py +45 -0
package/extensions/services/web/vendor/task/webhook.py +263 -0
package/extensions/services/web/vendor/tools/__init__.py +0 -0
package/extensions/services/web/vendor/tools/registry.py +321 -0
package/main.py +230 -90
package/package.json +1 -1

package/core/launcher/entry.py CHANGED Viewed

@@ -8,9 +8,11 @@ Thread model:
 - (Windows) keyboard listener thread: polls for 'q' key
 4-Phase startup:
-  Phase 1: Registry → stdout port → KITE_REGISTRY_PORT → API → register self + tokens
-  Phase 2: Event Hub → stdin launcher_ws_token → stdout ws_endpoint → WS connect → module.ready
-  Phase 3: Event Hub → Registry → Registry → Event Hub WS → module.ready
+  Phase 1: Registry + Event Hub (parallel start) → Registry stdout port → stdin broadcast port to Event Hub
+           → API → register self + tokens → stdin launcher_ws_token to Event Hub
+           → stdout ws_endpoint → WS connect → module.ready
+  Phase 2: (reserved — Event Hub ready handled in Phase 1)
+  Phase 3: Registry delayed ready (Event Hub → Registry → Event Hub WS → module.ready)
   Phase 4: start remaining enabled modules in topo order
 """
@@ -34,9 +36,17 @@ from .process_manager import ProcessManager
 IS_WINDOWS = sys.platform == "win32"
+# Shutdown timeout constants (seconds)
+SHUTDOWN_TIMEOUT_NON_GRACEFUL = 5  # Non-graceful modules or no ack response
+SHUTDOWN_TIMEOUT_PARTIAL = 3       # Graceful module ack'd but no ready
+SHUTDOWN_TIMEOUT_READY = 1         # Graceful module sent ready (cleanup done)
+SHUTDOWN_TIMEOUT_BULK = 3          # Bulk stop_all() safety net
 # Core module names that are started in Phase 1-2 (not Phase 4)
 CORE_MODULE_NAMES = {"registry", "event_hub"}
+WATCHDOG_MODULE_NAME = "watchdog"
 class Launcher:
     """Kite system entry point. Starts Registry, manages modules, exposes API."""
@@ -65,9 +75,9 @@ class Launcher:
         self.modules: dict[str, ModuleInfo] = {}
         self._shutdown_event = asyncio.Event()
         self._thread_shutdown = threading.Event()
+        self._shutdown_complete = threading.Event()  # Set when normal shutdown finishes
         self._api_server: uvicorn.Server | None = None
         self._api_ready = threading.Event()
-        self._fail_counts: dict[str, int] = {}  # module_name -> consecutive failure count
         self._module_tokens: dict[str, str] = {}  # module_name -> per-module token
         # Three-layer state model: desired_state per module
@@ -83,15 +93,48 @@ class Launcher:
         # Event waiters: {event_key: (asyncio.Event, data_dict)}
         self._event_waiters: dict[str, tuple[asyncio.Event, dict]] = {}
+        # Module ready times: module_name -> seconds from start to ready
+        self._ready_times: dict[str, float] = {}
+        # Shared HTTP client for Registry communication (lazy-init, reuses TCP connections)
+        self._http: httpx.AsyncClient | None = None
+        # Module exit reasons: module_name -> reason string (for modules that sent module.exiting)
+        self._exit_reasons: dict[str, str] = {}
+        # Graceful shutdown capability: module_name -> True if module declared support
+        # Registry and Event Hub default to True (they start before Watchdog can observe)
+        self._graceful_modules: dict[str, bool] = {"registry": True, "event_hub": True}
+        # System-wide shutdown flag: prevents Watchdog restart during shutdown
+        self._system_shutting_down = False
         # Kite stdout message waiters: {waiter_key: (threading.Event, data_dict)}
         # Used by ProcessManager stdout callback (cross-thread)
         self._msg_waiters: dict[str, tuple[threading.Event, dict]] = {}
-        self._lifecycle_log = os.path.join(
-            os.environ["KITE_INSTANCE_DIR"], "launcher", "lifecycle.jsonl",
-        )
+        suffix = self.process_manager.instance_suffix
+        state_dir = os.path.join(os.environ["KITE_INSTANCE_DIR"], "launcher", "state")
+        os.makedirs(state_dir, exist_ok=True)
+        self._lifecycle_log = os.path.join(state_dir, f"lifecycle{suffix}.jsonl")
+        # Clear lifecycle log on startup (like latest.log)
+        try:
+            with open(self._lifecycle_log, "w", encoding="utf-8") as f:
+                pass
+        except Exception:
+            pass
+        os.environ["KITE_INSTANCE_SUFFIX"] = suffix
         self._app = self._create_api_app()
+    @staticmethod
+    def _fmt_elapsed(seconds: float) -> str:
+        """Format elapsed seconds: <1s → 'NNNms', >=1s → 'N.Ns', >=10s → 'NNs'."""
+        if seconds < 1:
+            return f"{seconds * 1000:.0f}ms"
+        if seconds < 10:
+            return f"{seconds:.1f}s"
+        return f"{seconds:.0f}s"
     # ── Instance workspace resolution ──
     @staticmethod
@@ -124,7 +167,6 @@ class Launcher:
                 with open(cwd_file, "w", encoding="utf-8") as f:
                     f.write(cwd)
                 os.environ["KITE_INSTANCE_DIR"] = candidate
-                print(f"[launcher] 实例工作区已创建: {candidate}")
                 return
             if os.path.isfile(cwd_file):
@@ -132,7 +174,6 @@ class Launcher:
                     with open(cwd_file, "r", encoding="utf-8") as f:
                         if f.read().strip() == cwd:
                             os.environ["KITE_INSTANCE_DIR"] = candidate
-                            print(f"[launcher] 实例工作区已找到: {candidate}")
                             return
                 except Exception:
                     pass
@@ -180,8 +221,7 @@ class Launcher:
     def run(self):
         """Synchronous entry point. Sets up signals, runs the async main loop."""
-        print("[launcher] Kite 启动中...")
-        print("[launcher] ── 环境变量 ──")
+        print("[launcher] ── 环境 ──")
         for key in sorted(k for k in os.environ if k.startswith("KITE_")):
             print(f"[launcher]   {key} = {os.environ[key]}")
         print(f"[launcher]   PID = {os.getpid()}")
@@ -197,6 +237,8 @@ class Launcher:
             asyncio.run(self._async_main())
         except KeyboardInterrupt:
             pass
+        except RuntimeError as e:
+            print(f"[launcher] 启动失败: {e}")
         finally:
             self._final_cleanup()
@@ -204,7 +246,7 @@ class Launcher:
         """Request graceful shutdown. Thread-safe — can be called from signal handler or any thread."""
         if self._thread_shutdown.is_set():
             return  # already shutting down
-        print(f"\n[launcher] {reason or '收到关闭请求'}")
+        print(f"[launcher] {reason or '收到关闭请求'}")
         self._thread_shutdown.set()
         # Wake up asyncio event loop immediately (so _monitor_loop / wait_for exits)
         loop = self._loop
@@ -213,9 +255,19 @@ class Launcher:
                 loop.call_soon_threadsafe(self._shutdown_event.set)
             except RuntimeError:
                 pass
-        # Safety net: force exit after 15s no matter what
+        # Safety net: force exit after 10s only if normal shutdown hasn't completed
         def _force():
-            time.sleep(15)
+            if self._shutdown_complete.wait(timeout=10):
+                return  # Normal shutdown completed — no need to force
+            try:
+                pm = self.process_manager
+                still = [n for n in pm._processes if pm.is_running(n)]
+            except Exception:
+                still = []
+            if still:
+                print(f"[launcher] 关闭超时，以下模块仍在运行: {', '.join(still)}，强制退出")
+            else:
+                print("[launcher] 关闭超时，强制退出")
             os._exit(1)
         threading.Thread(target=_force, daemon=True).start()
@@ -266,62 +318,145 @@ class Launcher:
     async def _async_main(self):
         """Full 4-phase startup sequence, then monitor loop."""
         self._loop = asyncio.get_running_loop()
+        t_start = time.monotonic()
+        self._start_unix = time.time()
+        phase_times = {}
+        G = "\033[32m"
+        R = "\033[0m"
         # Validate core modules exist (mechanism 12)
         self._validate_core_modules()
-        # Cleanup leftovers from previous instances
-        self.process_manager.cleanup_leftovers()
-        # Phase 1: Registry bootstrap
-        await self._phase1_registry()
-        if self._shutdown_event.is_set(): return
+        # Cleanup leftovers from previous instances (current instance dir)
+        local_cleaned = self.process_manager.cleanup_leftovers()
-        # Scan modules (can happen before Phase 2)
-        self.modules = self.module_scanner.scan()
-        for name, info in self.modules.items():
-            self._log_lifecycle("scanned", name, state=info.state, module_dir=info.module_dir)
-        print(f"[launcher] 发现 {len(self.modules)} 个模块: {', '.join(self.modules.keys()) or '(无)'}")
-        # Generate per-module tokens (including event_hub and registry)
-        await self._register_module_tokens()
-        if self._shutdown_event.is_set(): return
+        # Cross-directory leftover cleanup (background, non-blocking)
+        # run_in_executor returns a Future (not coroutine), so use ensure_future
+        self._global_cleanup_task = asyncio.ensure_future(
+            asyncio.get_running_loop().run_in_executor(
+                None, self.process_manager.cleanup_global_leftovers
+            )
+        )
-        # Phase 2: Event Hub bootstrap
-        await self._phase2_event_hub()
-        if self._shutdown_event.is_set(): return
+        try:
+            # Phase 1+2: Registry + Event Hub parallel bootstrap
+            t0 = time.monotonic()
+            await self._phase1_parallel_bootstrap()
+            elapsed_p1 = time.monotonic() - t0
+            phase_times["Phase 1+2: Registry + Event Hub (并行)"] = elapsed_p1
+            print(f"{G}[launcher] ✓ Phase 1+2 完成: Registry + Event Hub 已就绪 ({elapsed_p1:.2f}s){R}")
+            if self._shutdown_event.is_set(): return
+            # Phase 3: Wait for Registry delayed ready
+            t0 = time.monotonic()
+            await self._phase3_registry_ready()
+            elapsed = time.monotonic() - t0
+            phase_times["Phase 3: Registry 事件总线"] = elapsed
+            print(f"{G}[launcher] ✓ Phase 3 完成: Registry 已连接事件总线 ({elapsed:.2f}s){R}")
+            if self._shutdown_event.is_set(): return
+            # Initialize desired_state from config_state (needed before Phase 3.5)
+            for name, info in self.modules.items():
+                if info.state == "enabled":
+                    self._desired_states[name] = "running"
+                else:  # manual, disabled
+                    self._desired_states[name] = "stopped"
+            # Core modules are already running
+            for cn in CORE_MODULE_NAMES:
+                self._desired_states[cn] = "running"
+            # Phase 3.5: Watchdog ready
+            # If started in parallel (Phase 1), just wait for module.ready
+            # Otherwise start it now (fallback)
+            watchdog_info = self.modules.get(WATCHDOG_MODULE_NAME)
+            if watchdog_info and self._desired_states.get(WATCHDOG_MODULE_NAME) == "running":
+                t0 = time.monotonic()
+                if getattr(self, '_watchdog_parallel', False):
+                    print(f"[launcher] Phase 3.5: Watchdog 已并行启动，等待就绪...")
+                    ready = await self._wait_event("module.ready", "watchdog", timeout=15)
+                    elapsed = time.monotonic() - t0
+                    if ready and not ready.get("_exited"):
+                        self._graceful_modules["watchdog"] = bool(ready.get("graceful_shutdown"))
+                        self._ready_times["watchdog"] = elapsed
+                        print(f"[launcher] Watchdog 已就绪")
+                        self._log_lifecycle("started", "watchdog")
+                        await self._publish_event("module.started", {"module_id": "watchdog"})
+                        self.process_manager.close_stdio("watchdog")
+                    else:
+                        print(f"[launcher] 警告: Watchdog 在 15s 内未就绪")
+                else:
+                    print(f"[launcher] Phase 3.5: 启动 Watchdog...")
+                    await self._start_one_module(watchdog_info)
+                    elapsed = time.monotonic() - t0
+                print(f"{G}[launcher] ✓ Phase 3.5 完成: Watchdog ({elapsed:.2f}s){R}")
+                if self._shutdown_event.is_set(): return
+            # Phase 4: Start remaining enabled modules
+            t0 = time.monotonic()
+            await self._phase4_start_modules()
+            elapsed = time.monotonic() - t0
+            phase_times["Phase 4: Extensions"] = elapsed
+            print(f"{G}[launcher] ✓ Phase 4 完成: 扩展模块已启动 ({elapsed:.2f}s){R}")
+            if self._shutdown_event.is_set(): return
-        # Phase 3: Wait for Registry delayed ready
-        await self._phase3_registry_ready()
-        if self._shutdown_event.is_set(): return
+            # Post-startup
+            self.process_manager.persist_records()
+            self._heartbeat_task = asyncio.create_task(self._heartbeat_loop())
-        # Phase 4: Start remaining enabled modules
-        # Initialize desired_state from config_state
-        for name, info in self.modules.items():
-            if info.state == "enabled":
-                self._desired_states[name] = "running"
-            else:  # manual, disabled
-                self._desired_states[name] = "stopped"
-        # Core modules are already running
-        for cn in CORE_MODULE_NAMES:
-            self._desired_states[cn] = "running"
+            # Wait for global leftover cleanup to finish (non-blocking with timeout)
+            global_cleaned = {}
+            if hasattr(self, '_global_cleanup_task'):
+                try:
+                    global_cleaned = await asyncio.wait_for(self._global_cleanup_task, timeout=5) or {}
+                except asyncio.TimeoutError:
+                    print("[launcher] 警告: 全局遗留清理超时 (5s)，跳过")
+                except Exception as e:
+                    print(f"[launcher] 警告: 全局遗留清理出错: {e}")
+            # Merge local + global cleanup stats
+            cleaned_stats: dict[str, int] = {}
+            for d in (local_cleaned, global_cleaned):
+                for k, v in d.items():
+                    cleaned_stats[k] = cleaned_stats.get(k, 0) + v
+            # Global instance scan (via executor to avoid blocking)
+            global_instances = await asyncio.get_running_loop().run_in_executor(
+                None, self.process_manager.get_global_instances
+            )
-        await self._phase4_start_modules()
-        if self._shutdown_event.is_set(): return
+            # ── Startup report ──
+            total_time = time.monotonic() - t_start
+            await self._print_startup_report(total_time, phase_times,
+                                             global_instances=global_instances,
+                                             cleaned_stats=cleaned_stats)
+            # Notify all modules that system startup is complete
+            await self._publish_event("system.ready", {
+                "startup_time": round(total_time, 2),
+            })
-        # Post-startup
-        self.process_manager.persist_records()
-        self._heartbeat_task = asyncio.create_task(self._heartbeat_loop())
+            print("[launcher] 进入监控循环 (按 Ctrl+C 或 'q' 退出)")
+            await self._monitor_loop()
+        finally:
+            try:
+                await self._graceful_shutdown_all()
+            except Exception as e:
+                print(f"[launcher] 优雅关闭出错: {e}")
-        print("[launcher] 进入监控循环 (按 Ctrl+C 或 'q' 退出)")
-        await self._monitor_loop()
+    # ── Phase 1+2: Parallel bootstrap (Registry + Event Hub) ──
-        await self._graceful_shutdown_all()
+    async def _phase1_parallel_bootstrap(self):
+        """Start Registry + Event Hub processes in parallel to overlap cold-start time.
-    # ── Phase 1: Registry ──
+        Flow:
+        1. Start Registry + Event Hub processes simultaneously
+        2. Wait for Registry to report port via stdout
+        3. Set KITE_REGISTRY_PORT env (for Phase 3.5/4 modules) + start API
+        4. Scan modules + register self & tokens (parallel)
+        5. Send launcher_ws_token + registry_port to Event Hub via stdin
+        6. Wait for Event Hub ws_endpoint → WS connect → module.ready
+        """
+        t_registry = time.monotonic()
-    async def _phase1_registry(self):
-        """Start Registry → capture port from stdout → set env → start API → register self."""
+        # ── Step 1: Start both processes ──
         registry_dir = os.path.join(os.environ["KITE_PROJECT"], "core", "registry")
         registry_info = ModuleInfo(
             name="registry",
@@ -332,30 +467,186 @@ class Launcher:
             entry="entry.py",
             module_dir=registry_dir,
         )
-        boot_info = {"token": self.kite_token}
+        boot_info_registry = {"token": self.kite_token}
         self._log_lifecycle("starting", "registry")
-        ok = self.process_manager.start_module(registry_info, boot_info=boot_info)
+        ok = self.process_manager.start_module(registry_info, boot_info=boot_info_registry)
         if not ok:
             self._log_lifecycle("start_failed", "registry")
             raise RuntimeError("启动 Registry 失败")
-        # Wait for Registry to output port via stdout (mechanism 2)
-        print("[launcher] 等待 Registry 端口...")
+        # Start Event Hub in parallel (before Registry port is known)
+        eh_dir = os.path.join(os.environ["KITE_PROJECT"], "core", "event_hub")
+        eh_info = ModuleInfo(
+            name="event_hub",
+            display_name="Event Hub",
+            type="infrastructure",
+            state="enabled",
+            runtime="python",
+            entry="entry.py",
+            module_dir=eh_dir,
+        )
+        # Generate Event Hub token early (will register to Registry once it's up)
+        eh_token = secrets.token_hex(32)
+        self._module_tokens["event_hub"] = eh_token
+        boot_info_eh = {"token": eh_token}
+        self._log_lifecycle("starting", "event_hub")
+        ok = self.process_manager.start_module(eh_info, boot_info=boot_info_eh)
+        if not ok:
+            self._log_lifecycle("start_failed", "event_hub")
+            raise RuntimeError("启动 Event Hub 失败")
+        # Start Watchdog in parallel (before Registry port is known)
+        # Watchdog will block on stdin waiting for registry_port
+        watchdog_dir = os.path.join(os.environ["KITE_PROJECT"], "extensions", "services", "watchdog")
+        watchdog_md = os.path.join(watchdog_dir, "module.md")
+        self._watchdog_parallel = False  # track whether watchdog was started in parallel
+        if os.path.isfile(watchdog_md):
+            wd_token = secrets.token_hex(32)
+            self._module_tokens["watchdog"] = wd_token
+            # Parse watchdog module.md for ModuleInfo
+            try:
+                with open(watchdog_md, "r", encoding="utf-8") as f:
+                    wd_fm = _parse_frontmatter(f.read())
+                wd_info = ModuleInfo(
+                    name="watchdog",
+                    display_name=wd_fm.get("display_name", "Watchdog"),
+                    type=wd_fm.get("type", "service"),
+                    state="enabled",
+                    runtime=wd_fm.get("runtime", "python"),
+                    entry=wd_fm.get("entry", "entry.py"),
+                    module_dir=watchdog_dir,
+                )
+                boot_info_wd = {"token": wd_token}
+                self._log_lifecycle("starting", "watchdog")
+                ok = self.process_manager.start_module(wd_info, boot_info=boot_info_wd)
+                if ok:
+                    self._watchdog_parallel = True
+                else:
+                    self._log_lifecycle("start_failed", "watchdog")
+                    print("[launcher] 警告: Watchdog 并行启动失败，将在 Phase 3.5 重试")
+            except Exception as e:
+                print(f"[launcher] 警告: Watchdog module.md 解析失败: {e}")
+        parallel_modules = "Registry + Event Hub" + (" + Watchdog" if self._watchdog_parallel else "")
+        print(f"[launcher] {parallel_modules} 进程已同时启动，等待 Registry 端口...")
+        # Persist immediately after starting core processes
+        self.process_manager.persist_records()
+        # ── Step 2: Wait for Registry port ──
         msg = await self._wait_kite_message("registry", "port", timeout=6)
         if not msg or not msg.get("port"):
             raise RuntimeError("致命错误: Registry 在 6s 内未报告端口")
         self.registry_port = int(msg["port"])
-        print(f"[launcher] Registry 端口: {self.registry_port}")
+        self._ready_times["registry"] = time.monotonic() - t_registry
+        _wait_s = time.monotonic() - t_registry
+        print(f"[launcher] Registry 端口: {self.registry_port} (等待 {self._fmt_elapsed(_wait_s)})")
-        # Set KITE_REGISTRY_PORT for all subsequent child processes
+        # ── Step 3: Set env + start API + immediately unblock Event Hub ──
         os.environ["KITE_REGISTRY_PORT"] = str(self.registry_port)
-        # Start Launcher API in a separate thread
         self._start_api_thread()
-        # Register Launcher itself to Registry
-        await self._register_self()
+        # Send launcher_ws_token + registry_port to Event Hub ASAP (unblock it)
+        self._launcher_ws_token = secrets.token_hex(32)
+        self.process_manager.write_stdin("event_hub", {
+            "kite": "launcher_ws_token",
+            "launcher_ws_token": self._launcher_ws_token,
+        })
+        self.process_manager.write_stdin("event_hub", {
+            "kite": "registry_port",
+            "registry_port": self.registry_port,
+        })
+        # Send registry_port to Watchdog via stdin (if started in parallel)
+        # Watchdog will retry querying launcher.api_endpoint until it's available
+        if self.process_manager.is_running("watchdog"):
+            self.process_manager.write_stdin("watchdog", {
+                "kite": "registry_port",
+                "registry_port": self.registry_port,
+            })
+        # ── Step 4: Scan + register tokens ‖ wait for Event Hub ws_endpoint (parallel) ──
+        # Pre-register ws_endpoint waiter BEFORE gather to avoid race condition:
+        # module_scanner.scan() is synchronous and blocks the event loop,
+        # so the _wait_event_hub_endpoint coroutine wouldn't register its waiter in time.
+        ws_waiter_key = "event_hub:ws_endpoint"
+        ws_evt = threading.Event()
+        ws_data: dict = {}
+        self._msg_waiters[ws_waiter_key] = (ws_evt, ws_data)
+        async def _scan_and_register_tokens():
+            t_scan = time.monotonic()
+            self.modules = self.module_scanner.scan()
+            for name, info in self.modules.items():
+                self._log_lifecycle("scanned", name, state=info.state, module_dir=info.module_dir)
+            _scan_s = time.monotonic() - t_scan
+            print(f"[launcher] 发现 {len(self.modules)} 个模块: {', '.join(self.modules.keys()) or '(无)'} (扫描 {self._fmt_elapsed(_scan_s)})")
+            t_reg = time.monotonic()
+            await self._register_module_tokens()
+            _reg_s = time.monotonic() - t_reg
+            print(f"[launcher] 令牌注册完成 ({self._fmt_elapsed(_reg_s)})")
+        async def _wait_event_hub_endpoint():
+            t_wait_eh = time.monotonic()
+            print("[launcher] 等待 Event Hub ws_endpoint...")
+            shutdown = self._thread_shutdown
+            def _wait():
+                deadline = time.monotonic() + 10
+                while time.monotonic() < deadline:
+                    if ws_evt.wait(timeout=0.5):
+                        return True
+                    if shutdown.is_set():
+                        return False
+                return False
+            got = await asyncio.get_running_loop().run_in_executor(None, _wait)
+            self._msg_waiters.pop(ws_waiter_key, None)
+            if not got or not ws_data.get("ws_endpoint"):
+                raise RuntimeError("致命错误: Event Hub 在 10s 内未报告 ws_endpoint")
+            self._event_hub_ws_url = ws_data["ws_endpoint"]
+            _eh_s = time.monotonic() - t_wait_eh
+            print(f"[launcher] Event Hub 已发现: {self._event_hub_ws_url} (等待 {self._fmt_elapsed(_eh_s)})")
+        # Run all three in parallel: register_self + scan_tokens + wait_event_hub
+        await asyncio.gather(
+            self._register_self(),
+            _scan_and_register_tokens(),
+            _wait_event_hub_endpoint(),
+        )
+        if self._shutdown_event.is_set():
+            return
+        # ── Step 5: WS connect → module.ready ──
+        t_eh = time.monotonic()
+        self._ws_task = asyncio.create_task(self._ws_loop())
+        # Wait for Event Hub module.ready (sent when Launcher connects)
+        ready = await self._wait_event("module.ready", "event_hub", timeout=15)
+        if ready:
+            self._graceful_modules["event_hub"] = bool(ready.get("graceful_shutdown"))
+            print("[launcher] Event Hub 已就绪")
+        else:
+            print("[launcher] 警告: Event Hub 在 15s 内未发送 module.ready")
+        self._ready_times["event_hub"] = time.monotonic() - t_eh
+        self._log_lifecycle("started", "event_hub")
+        await self._publish_event("module.started", {"module_id": "event_hub"})
+        self.process_manager.close_stdio("event_hub")
+        # Store eh_info in modules dict if not already present (from scan)
+        if "event_hub" not in self.modules:
+            self.modules["event_hub"] = eh_info
+    def _get_http(self) -> httpx.AsyncClient:
+        """Get shared HTTP client (lazy-init, reuses TCP connections to Registry)."""
+        if self._http is None or self._http.is_closed:
+            self._http = httpx.AsyncClient(timeout=5)
+        return self._http
+    async def _close_http(self):
+        """Close shared HTTP client."""
+        if self._http and not self._http.is_closed:
+            await self._http.aclose()
+            self._http = None
     async def _register_self(self):
         """Register Launcher itself to Registry."""
@@ -376,86 +667,29 @@ class Launcher:
             "events_subscribe": [">"],
         }
         try:
-            async with httpx.AsyncClient() as client:
-                resp = await client.post(url, json=payload, headers=headers, timeout=5)
-                if resp.status_code == 200:
-                    print("[launcher] 已注册到 Registry")
-                else:
-                    print(f"[launcher] 警告: Registry 注册返回 {resp.status_code}")
+            client = self._get_http()
+            resp = await client.post(url, json=payload, headers=headers)
+            if resp.status_code == 200:
+                print("[launcher] 已注册到 Registry")
+            else:
+                print(f"[launcher] 警告: Registry 注册返回 {resp.status_code}")
         except Exception as e:
             print(f"[launcher] 警告: 注册到 Registry 失败: {e}")
-    # ── Phase 2: Event Hub ──
-    async def _phase2_event_hub(self):
-        """Start Event Hub → stdin launcher_ws_token → stdout ws_endpoint → WS connect → module.ready."""
-        # Find event_hub in scanned modules or build manually
-        eh_info = self.modules.get("event_hub")
-        if not eh_info:
-            eh_dir = os.path.join(os.environ["KITE_PROJECT"], "core", "event_hub")
-            eh_info = ModuleInfo(
-                name="event_hub",
-                display_name="Event Hub",
-                type="infrastructure",
-                state="enabled",
-                runtime="python",
-                entry="entry.py",
-                module_dir=eh_dir,
-            )
-        token = self._module_tokens.get("event_hub", "")
-        if not token:
-            token = secrets.token_hex(32)
-            self._module_tokens["event_hub"] = token
-            await self._register_tokens_to_registry({"event_hub": token})
-        boot_info = {"token": token}
-        self._log_lifecycle("starting", "event_hub")
-        ok = self.process_manager.start_module(eh_info, boot_info=boot_info)
-        if not ok:
-            self._log_lifecycle("start_failed", "event_hub")
-            raise RuntimeError("启动 Event Hub 失败")
-        # Send launcher_ws_token via stdin (mechanism 6)
-        self._launcher_ws_token = secrets.token_hex(32)
-        self.process_manager.write_stdin("event_hub", {
-            "kite": "launcher_ws_token",
-            "launcher_ws_token": self._launcher_ws_token,
-        })
-        # Wait for ws_endpoint from stdout (mechanism 5)
-        print("[launcher] 等待 Event Hub ws_endpoint...")
-        msg = await self._wait_kite_message("event_hub", "ws_endpoint", timeout=6)
-        if not msg or not msg.get("ws_endpoint"):
-            raise RuntimeError("致命错误: Event Hub 在 6s 内未报告 ws_endpoint")
-        self._event_hub_ws_url = msg["ws_endpoint"]
-        print(f"[launcher] Event Hub 已发现: {self._event_hub_ws_url}")
-        # Connect to Event Hub WebSocket with launcher_ws_token
-        self._ws_task = asyncio.create_task(self._ws_loop())
-        # Wait for Event Hub module.ready (sent when Launcher connects)
-        ready = await self._wait_event("module.ready", "event_hub", timeout=15)
-        if ready:
-            print("[launcher] Event Hub 已就绪")
-        else:
-            print("[launcher] 警告: Event Hub 在 15s 内未发送 module.ready")
-        self._log_lifecycle("started", "event_hub")
-        await self._publish_event("module.started", {"module_id": "event_hub"})
-        self.process_manager.close_stdio("event_hub")
+    # ── (Phase 2 merged into _phase1_parallel_bootstrap) ──
     # ── Phase 3: Registry delayed ready ──
     async def _phase3_registry_ready(self):
         """Wait for Registry module.ready (triggered after Event Hub registers to Registry
         and Registry connects to Event Hub WS)."""
-        print("[launcher] 等待 Registry 延迟就绪...")
+        print("[launcher] 等待 Registry 连接 Event Hub...")
         ready = await self._wait_event("module.ready", "registry", timeout=12)
         if ready:
-            print("[launcher] Registry 已就绪")
+            self._graceful_modules["registry"] = bool(ready.get("graceful_shutdown"))
+            print("[launcher] Registry 事件总线连接完成")
         else:
-            print("[launcher] 警告: Registry 在 12s 内未发送 module.ready (降级运行)")
+            print("[launcher] 警告: Registry 在 12s 内未连接事件总线 (降级运行)")
         self._log_lifecycle("started", "registry")
         await self._publish_event("module.started", {"module_id": "registry"})
@@ -467,7 +701,8 @@ class Launcher:
         """Start enabled modules (excluding core) in dependency order."""
         to_start = [m for m in self.modules.values()
                      if self._desired_states.get(m.name) == "running"
-                     and m.name not in CORE_MODULE_NAMES]
+                     and m.name not in CORE_MODULE_NAMES
+                     and m.name != WATCHDOG_MODULE_NAME]
         if not to_start:
             print("[launcher] 没有额外模块需要启动")
             return
@@ -487,14 +722,18 @@ class Launcher:
                         print(f"[launcher] 错误: '{m.name}' 依赖已禁用的模块 '{dep}'")
         try:
-            sorted_modules = self._topo_sort(to_start)
+            layers = self._topo_layers(to_start)
         except RuntimeError as e:
             print(f"[launcher] 错误: {e}")
             return
-        print(f"[launcher] 正在启动 {len(sorted_modules)} 个模块...")
-        for info in sorted_modules:
-            await self._start_one_module(info)
+        total = sum(len(layer) for layer in layers)
+        print(f"[launcher] 正在启动 {total} 个模块...")
+        for layer in layers:
+            if len(layer) == 1:
+                await self._start_one_module(layer[0])
+            else:
+                await asyncio.gather(*(self._start_one_module(info) for info in layer))
     # ── Event Hub WebSocket connection ──
@@ -506,16 +745,19 @@ class Launcher:
             except asyncio.CancelledError:
                 return
             except Exception as e:
-                print(f"[launcher] Event Hub 连接错误: {e}")
+                if not self._system_shutting_down:
+                    print(f"[launcher] Event Hub 连接错误: {e}")
             self._ws = None
             await asyncio.sleep(5)
     async def _ws_connect(self):
         """Single WebSocket session with launcher_ws_token auth."""
-        ws_url = f"{self._event_hub_ws_url}?token={self._launcher_ws_token}"
-        async with websockets.connect(ws_url, ping_interval=None, ping_timeout=None, close_timeout=10) as ws:
+        ws_url = f"{self._event_hub_ws_url}?token={self._launcher_ws_token}&id=launcher"
+        t_ws_connect = time.monotonic()
+        async with websockets.connect(ws_url, open_timeout=3, ping_interval=None, ping_timeout=None, close_timeout=10) as ws:
             self._ws = ws
-            print("[launcher] 已连接到 Event Hub")
+            _ws_s = time.monotonic() - t_ws_connect
+            print(f"[launcher] 已连接到 Event Hub ({self._fmt_elapsed(_ws_s)})")
             # Subscribe to all events
             await ws.send(json.dumps({
@@ -529,52 +771,89 @@ class Launcher:
                     msg = json.loads(raw)
                 except (json.JSONDecodeError, TypeError):
                     continue
-                msg_type = msg.get("type", "")
-                if msg_type == "event":
-                    source = msg.get("source", "unknown")
-                    event = msg.get("event", "")
-                    data = msg.get("data", {})
-                    # Trigger event waiters
-                    module_id = data.get("module_id", "")
-                    waiter_key = f"{event}:{module_id}"
-                    waiter = self._event_waiters.get(waiter_key)
-                    if waiter:
-                        waiter[1].update(data)
-                        waiter[0].set()
-                    ts = msg.get("timestamp", "")
-                    latency_str = ""
-                    if ts:
-                        try:
-                            from datetime import datetime, timezone
-                            sent = datetime.fromisoformat(ts)
-                            delay_ms = (datetime.now(timezone.utc) - sent).total_seconds() * 1000
-                            latency_str = f" ({delay_ms:.1f}ms)"
-                            local_ts = sent.astimezone().strftime("%H:%M:%S")
-                        except Exception:
-                            local_ts = ts[11:19] if len(ts) >= 19 else ts
-                        print(f"[{source}] {local_ts} {event}{latency_str}: {json.dumps(data, ensure_ascii=False)}")
-                    else:
-                        print(f"[{source}] {event}: {json.dumps(data, ensure_ascii=False)}")
-                elif msg_type == "error":
-                    print(f"[launcher] Event Hub 错误: {msg.get('message')}")
+                try:
+                    msg_type = msg.get("type", "")
+                    if msg_type == "event":
+                        source = msg.get("source", "unknown")
+                        event = msg.get("event", "")
+                        data = msg.get("data") if isinstance(msg.get("data"), dict) else {}
+                        # Trigger event waiters
+                        module_id = data.get("module_id", "")
+                        waiter_key = f"{event}:{module_id}"
+                        waiter = self._event_waiters.get(waiter_key)
+                        if waiter:
+                            waiter[1].update(data)
+                            waiter[0].set()
+                        # module.exiting also wakes module.ready waiter
+                        # (module won't send ready — no point waiting)
+                        if event == "module.exiting" and module_id:
+                            ready_key = f"module.ready:{module_id}"
+                            ready_waiter = self._event_waiters.get(ready_key)
+                            if ready_waiter:
+                                ready_waiter[1].update(data)
+                                ready_waiter[1]["_exited"] = True
+                                ready_waiter[0].set()
+                        # module.crash → print red crash summary (real-time notification)
+                        if event == "module.crash" and module_id:
+                            RED = "\033[91m"
+                            RESET = "\033[0m"
+                            exc_type = data.get("exception_type", "Unknown")
+                            preview = data.get("traceback_preview", "")
+                            severity = data.get("severity", "error")
+                            print(f"[launcher] {RED}模块 '{module_id}' 崩溃: "
+                                  f"{exc_type} — {preview}{RESET}")
+                            _suffix = os.environ.get("KITE_INSTANCE_SUFFIX", "")
+                            crash_log = os.path.join(
+                                os.environ.get("KITE_INSTANCE_DIR", ""),
+                                module_id, "log", f"crashes{_suffix}.jsonl"
+                            )
+                            print(f"[launcher] 崩溃日志: {crash_log}")
+                        ts = msg.get("timestamp", "")
+                        # Only log system events (module.*, watchdog.*) to avoid flooding
+                        # from benchmark/test traffic
+                        if not (event.startswith("module.") or event.startswith("watchdog.")):
+                            continue
+                        latency_str = ""
+                        if ts:
+                            try:
+                                from datetime import datetime, timezone
+                                sent = datetime.fromisoformat(ts)
+                                delay_ms = (datetime.now(timezone.utc) - sent).total_seconds() * 1000
+                                latency_str = f" ({delay_ms:.1f}ms)"
+                                local_ts = sent.astimezone().strftime("%H:%M:%S")
+                            except Exception:
+                                local_ts = ts[11:19] if len(ts) >= 19 else ts
+                            print(f"[{source}] {local_ts} {event}{latency_str}: {json.dumps(data, ensure_ascii=False)}")
+                        else:
+                            print(f"[{source}] {event}: {json.dumps(data, ensure_ascii=False)}")
+                    elif msg_type == "error":
+                        print(f"[launcher] Event Hub 错误: {msg.get('message')}")
+                except Exception as e:
+                    print(f"[launcher] 事件处理异常（已忽略）: {e}")
     async def _publish_event(self, event_type: str, data: dict):
-        """Publish an event to Event Hub via WebSocket."""
+        """Publish an event to Event Hub via WebSocket. Uses create_task to avoid
+        deadlock with _ws_connect recv loop (websockets 15.x send can block when
+        incoming frames are pending and recv is held by async-for)."""
         if not self._ws:
             return
         from datetime import datetime, timezone
-        msg = {
+        msg = json.dumps({
             "type": "event",
             "event_id": str(uuid.uuid4()),
             "event": event_type,
             "source": "launcher",
             "timestamp": datetime.now(timezone.utc).isoformat(),
             "data": data,
-        }
-        try:
-            await self._ws.send(json.dumps(msg))
-        except Exception as e:
-            print(f"[launcher] 发布事件失败: {e}")
+        })
+        async def _send():
+            try:
+                await self._ws.send(msg)
+            except Exception as e:
+                print(f"[launcher] 发布事件失败: {e}")
+        asyncio.create_task(_send())
     def _publish_event_threadsafe(self, event_type: str, data: dict):
         """Publish event from non-async context (API thread). Fire-and-forget."""
@@ -599,53 +878,127 @@ class Launcher:
             self._event_waiters.pop(key, None)
     async def _graceful_stop(self, name: str, reason: str = "stop_requested", timeout: float = 10):
-        """Graceful shutdown: send event → wait ack → wait ready → kill."""
+        """Graceful shutdown: check capability → send event → wait ack → wait ready → kill.
+        Modules that did not declare graceful_shutdown in module.ready are terminated directly.
+        """
         self._log_lifecycle("stopping", name, reason=reason)
+        if not self._graceful_modules.get(name):
+            self.process_manager.stop_module(name, timeout=SHUTDOWN_TIMEOUT_NON_GRACEFUL)
+            self._log_lifecycle("stopped", name, reason=reason)
+            await self._publish_event("module.stopped", {
+                "module_id": name,
+                "graceful_shutdown": False,
+            })
+            return
         await self._publish_event("module.shutdown", {
             "module_id": name, "reason": reason, "timeout": timeout,
         })
         ack = await self._wait_event("module.shutdown.ack", name, timeout=3)
         if not ack:
-            self.process_manager.stop_module(name, timeout=5)
-            await self._publish_event("module.stopped", {"module_id": name})
+            self.process_manager.stop_module(name, timeout=SHUTDOWN_TIMEOUT_NON_GRACEFUL)
+            await self._publish_event("module.stopped", {
+                "module_id": name,
+                "graceful_shutdown": self._graceful_modules.get(name, False),
+            })
             return
         estimated = min(ack.get("estimated_cleanup", timeout), timeout)
         ready = await self._wait_event("module.shutdown.ready", name, timeout=estimated)
         if ready:
-            self.process_manager.stop_module(name, timeout=1)
+            self.process_manager.stop_module(name, timeout=SHUTDOWN_TIMEOUT_READY)
         else:
-            self.process_manager.stop_module(name, timeout=3)
+            self.process_manager.stop_module(name, timeout=SHUTDOWN_TIMEOUT_PARTIAL)
         self._log_lifecycle("stopped", name, reason=reason)
-        await self._publish_event("module.stopped", {"module_id": name})
+        await self._publish_event("module.stopped", {
+            "module_id": name,
+            "graceful_shutdown": self._graceful_modules.get(name, False),
+        })
     async def _graceful_shutdown_all(self):
-        """Broadcast module.shutdown to all running modules, then force-kill survivors."""
+        """Shut down all modules. Order:
+        1. Send shutdown to graceful modules (excl. Event Hub) — let them start cleanup
+        2. Terminate non-graceful modules (fast, runs during graceful cleanup)
+        3. Wait for graceful modules to exit (process monitoring)
+        4. Shut down Event Hub last (keeps event routing alive throughout)
+        """
+        self._system_shutting_down = True
         running = [n for n in self.modules if self.process_manager.is_running(n)]
         # Also check core modules
         for cn in CORE_MODULE_NAMES:
             if self.process_manager.is_running(cn) and cn not in running:
                 running.append(cn)
         if not running:
+            print("[launcher] 没有运行中的模块需要关闭")
             return
-        print(f"[launcher] 优雅关闭: {', '.join(running)}")
-        for name in running:
+        graceful = [n for n in running if self._graceful_modules.get(n)]
+        non_graceful = [n for n in running if not self._graceful_modules.get(n)]
+        # Defer Event Hub — it must stay alive to route shutdown events
+        hub_deferred = "event_hub" in graceful
+        graceful_batch = [n for n in graceful if n != "event_hub"] if hub_deferred else graceful
+        print(f"[launcher] 正在关闭 {len(running)} 个模块: {', '.join(running)}")
+        # Phase 1: Notify graceful modules first (they start cleanup immediately)
+        for name in graceful_batch:
             self._log_lifecycle("stopping", name, reason="system_shutdown")
             await self._publish_event("module.shutdown", {
-                "module_id": name, "reason": "system_shutdown", "timeout": 10,
+                "module_id": name, "reason": "system_shutdown", "timeout": 5,
             })
-        deadline = time.time() + 10
-        while time.time() < deadline:
-            still_running = [n for n in running if self.process_manager.is_running(n)]
-            if not still_running:
-                break
-            await asyncio.sleep(0.5)
-        self.process_manager.stop_all(timeout=3)
-        for name in running:
+        # Phase 2: While graceful modules are cleaning up, terminate non-graceful ones
+        if non_graceful:
+            print(f"[launcher] 直接终止 {len(non_graceful)} 个不支持优雅退出的模块: {', '.join(non_graceful)}")
+        for name in non_graceful:
+            self._log_lifecycle("stopping", name, reason="system_shutdown")
+            self.process_manager.stop_module(name, timeout=SHUTDOWN_TIMEOUT_PARTIAL)
             self._log_lifecycle("stopped", name, reason="system_shutdown")
+        # Phase 3: Wait for graceful modules to exit (process monitoring)
+        if graceful_batch:
+            deadline = time.time() + 5
+            while time.time() < deadline:
+                still_running = [n for n in graceful_batch if self.process_manager.is_running(n)]
+                if not still_running:
+                    print("[launcher] 所有优雅退出模块已自行退出")
+                    break
+                remaining = max(0, deadline - time.time())
+                print(f"[launcher] 等待 {len(still_running)} 个模块退出 ({remaining:.0f}s): {', '.join(still_running)}")
+                await asyncio.sleep(1)
+            # Force kill survivors
+            for name in graceful_batch:
+                if self.process_manager.is_running(name):
+                    self.process_manager.stop_module(name, timeout=SHUTDOWN_TIMEOUT_PARTIAL)
+                    self._log_lifecycle("stopped", name, reason="system_shutdown")
+        # Phase 4: All other modules exited — now shut down Event Hub
+        if hub_deferred and self.process_manager.is_running("event_hub"):
+            self._log_lifecycle("stopping", "event_hub", reason="system_shutdown")
+            await self._publish_event("module.shutdown", {
+                "module_id": "event_hub", "reason": "system_shutdown", "timeout": 5,
+            })
+            deadline = time.time() + 5
+            while time.time() < deadline:
+                if not self.process_manager.is_running("event_hub"):
+                    print("[launcher] Event Hub 已退出")
+                    break
+                await asyncio.sleep(0.5)
+            if self.process_manager.is_running("event_hub"):
+                self.process_manager.stop_module("event_hub", timeout=SHUTDOWN_TIMEOUT_PARTIAL)
+            self._log_lifecycle("stopped", "event_hub", reason="system_shutdown")
+        # Final safety net
+        try:
+            self.process_manager.stop_all(timeout=SHUTDOWN_TIMEOUT_BULK)
+        except Exception as e:
+            print(f"[launcher] stop_all 出错: {e}")
+        await self._close_http()
     # ── Heartbeat to Registry ──
     async def _heartbeat_loop(self):
@@ -653,13 +1006,12 @@ class Launcher:
         while not self._thread_shutdown.is_set():
             await asyncio.sleep(30)
             try:
-                async with httpx.AsyncClient() as client:
-                    await client.post(
-                        f"http://127.0.0.1:{self.registry_port}/modules",
-                        json={"action": "heartbeat", "module_id": "launcher"},
-                        headers={"Authorization": f"Bearer {self.kite_token}"},
-                        timeout=5,
-                    )
+                client = self._get_http()
+                await client.post(
+                    f"http://127.0.0.1:{self.registry_port}/modules",
+                    json={"action": "heartbeat", "module_id": "launcher"},
+                    headers={"Authorization": f"Bearer {self.kite_token}"},
+                )
             except Exception:
                 pass
@@ -691,6 +1043,42 @@ class Launcher:
             visit(m.name)
         return order
+    def _topo_layers(self, modules: list[ModuleInfo]) -> list[list[ModuleInfo]]:
+        """Topological sort into layers. Modules in the same layer have no
+        inter-dependencies and can be started in parallel."""
+        name_map = {m.name: m for m in modules}
+        all_names = set(name_map.keys())
+        # Compute depth (longest path from root) for each module
+        depth: dict[str, int] = {}
+        in_stack: set[str] = set()
+        def get_depth(name: str) -> int:
+            if name in depth:
+                return depth[name]
+            if name in in_stack:
+                raise RuntimeError(f"Circular dependency detected involving '{name}'")
+            in_stack.add(name)
+            info = name_map.get(name)
+            d = 0
+            if info:
+                for dep in info.depends_on:
+                    if dep in all_names:
+                        d = max(d, get_depth(dep) + 1)
+            in_stack.remove(name)
+            depth[name] = d
+            return d
+        for name in all_names:
+            get_depth(name)
+        # Group by depth
+        max_depth = max(depth.values()) if depth else 0
+        layers: list[list[ModuleInfo]] = [[] for _ in range(max_depth + 1)]
+        for name, d in depth.items():
+            layers[d].append(name_map[name])
+        return layers
     async def _start_one_module(self, info: ModuleInfo):
         """Start a single module: publish starting → start process → wait ready → started → close stdio."""
         self._log_lifecycle("starting", info.name)
@@ -698,16 +1086,29 @@ class Launcher:
         token = self._module_tokens.get(info.name, "")
         boot_info = {"token": token}
+        t0 = time.monotonic()
         ok = self.process_manager.start_module(info, boot_info=boot_info)
         if not ok:
             self._log_lifecycle("start_failed", info.name)
             return
-        # Wait for module.ready (configurable timeout, degrade on timeout)
+        # Persist immediately after starting to ensure PID is recorded
+        # (in case launcher crashes before Phase 4 completes)
+        self.process_manager.persist_records()
+        # Wait for module.ready or module.exiting (whichever comes first)
         timeout = info.launch.timeout
         ready = await self._wait_event("module.ready", info.name, timeout=timeout)
-        if ready:
-            print(f"[launcher] 模块 '{info.name}' 已就绪")
+        elapsed = time.monotonic() - t0
+        if ready and ready.get("_exited"):
+            # Module sent module.exiting before ready — it chose to quit
+            reason = ready.get("reason", "unknown")
+            self._exit_reasons[info.name] = reason
+            print(f"[launcher] 模块 '{info.name}' 主动退出: {reason} ({elapsed:.2f}s)")
+        elif ready:
+            self._graceful_modules[info.name] = bool(ready.get("graceful_shutdown"))
+            self._ready_times[info.name] = elapsed
+            print(f"[launcher] 模块 '{info.name}' 已就绪 ({elapsed:.2f}s)")
         else:
             print(f"[launcher] 警告: '{info.name}' 在 {timeout}s 内未发送 module.ready")
@@ -736,12 +1137,12 @@ class Launcher:
         url = f"http://127.0.0.1:{self.registry_port}/tokens"
         headers = {"Authorization": f"Bearer {self.kite_token}"}
         try:
-            async with httpx.AsyncClient() as client:
-                resp = await client.post(url, json=tokens, headers=headers, timeout=5)
-                if resp.status_code == 200:
-                    print(f"[launcher] 已注册 {len(tokens)} 个模块令牌")
-                else:
-                    print(f"[launcher] 警告: 令牌注册返回 {resp.status_code}")
+            client = self._get_http()
+            resp = await client.post(url, json=tokens, headers=headers)
+            if resp.status_code == 200:
+                print(f"[launcher] 已注册 {len(tokens)} 个模块令牌")
+            else:
+                print(f"[launcher] 警告: 令牌注册返回 {resp.status_code}")
         except Exception as e:
             print(f"[launcher] 警告: 注册模块令牌失败: {e}")
@@ -799,49 +1200,90 @@ class Launcher:
         print(f"[launcher] API 服务器已启动，端口 {self.api_port}")
+    # ── Module crash summary ──
+    def _print_module_crash_summary(self, name: str):
+        """Read module's crashes.jsonl last record and print red summary to console.
+        Complement to module.crash event — reliable even if event was never sent."""
+        RED = "\033[91m"
+        RESET = "\033[0m"
+        _suffix = os.environ.get("KITE_INSTANCE_SUFFIX", "")
+        crash_log = os.path.join(
+            os.environ.get("KITE_INSTANCE_DIR", ""), name, "log", f"crashes{_suffix}.jsonl"
+        )
+        if not os.path.isfile(crash_log):
+            return
+        try:
+            with open(crash_log, "rb") as f:
+                f.seek(0, 2)
+                size = f.tell()
+                if size == 0:
+                    return
+                f.seek(max(0, size - 4096))
+                lines = f.read().decode("utf-8").strip().split("\n")
+                last = json.loads(lines[-1])
+                exc_type = last.get("exception_type", "Unknown")
+                ctx = last.get("context", {})
+                file_name = ctx.get("file", "unknown")
+                line_no = ctx.get("line", "?")
+                print(f"[launcher] {RED}崩溃: "
+                      f"{exc_type} in {file_name}:{line_no}{RESET}")
+                print(f"[launcher] 崩溃日志: {crash_log}")
+        except Exception:
+            pass
     # ── Monitor loop ──
     async def _monitor_loop(self):
         """Check child processes every second. Handle crashes.
         Uses _shutdown_event (asyncio.Event) so Ctrl+C wakes us immediately.
+        Responsibility split:
+        - Core module crash → full restart (Launcher handles)
+        - Watchdog crash → Launcher restarts directly (up to 3 times)
+        - Other module exit → publish module.stopped event only; Watchdog decides restart
         """
-        MAX_FAIL = 3
-        MAX_FAILED_MODULES = 3
+        WATCHDOG_MAX_FAIL = 3
+        watchdog_fail_count = 0
         while not self._shutdown_event.is_set():
             exited = self.process_manager.check_exited()
             for name, rc in exited:
                 print(f"[launcher] 模块 '{name}' 退出，返回码 {rc}")
+                if rc != 0:
+                    self._print_module_crash_summary(name)
                 self._log_lifecycle("exited", name, exit_code=rc)
                 await self._publish_event("module.stopped", {
                     "module_id": name, "exit_code": rc,
+                    "graceful_shutdown": self._graceful_modules.get(name, False),
                 })
                 info = self.modules.get(name)
-                # Core module crash → full restart
+                # 1) Core module crash → full restart
                 if name in CORE_MODULE_NAMES or (info and info.is_core()):
                     print(f"[launcher] 严重: 核心模块 '{name}' 崩溃，正在全部重启...")
                     self._log_lifecycle("core_crash", name, exit_code=rc)
                     await self._full_restart()
                     return
-                # Non-core: attempt restart if desired_state is "running"
-                self._fail_counts[name] = self._fail_counts.get(name, 0) + 1
-                count = self._fail_counts[name]
-                if count < MAX_FAIL and self._desired_states.get(name) == "running" and info:
-                    print(f"[launcher] 正在重启 '{name}' (第 {count}/{MAX_FAIL} 次)...")
-                    await self._start_one_module(info)
-                elif count >= MAX_FAIL:
-                    self._desired_states[name] = "stopped"
-                    self._log_lifecycle("failed", name, reason=f"exceeded {MAX_FAIL} retries")
-                    print(f"[launcher] 模块 '{name}' 失败 {MAX_FAIL} 次，已放弃")
+                # 2) Watchdog crash → Launcher restarts directly
+                if name == WATCHDOG_MODULE_NAME:
+                    if self._system_shutting_down:
+                        print(f"[launcher] Watchdog 退出（系统关闭中），跳过重启")
+                        continue
+                    watchdog_fail_count += 1
+                    if watchdog_fail_count <= WATCHDOG_MAX_FAIL and info:
+                        print(f"[launcher] Watchdog 崩溃，正在重启 (第 {watchdog_fail_count}/{WATCHDOG_MAX_FAIL} 次)...")
+                        await self._start_one_module(info)
+                    else:
+                        self._desired_states[name] = "stopped"
+                        self._log_lifecycle("failed", name, reason=f"exceeded {WATCHDOG_MAX_FAIL} retries")
+                        print(f"[launcher] Watchdog 失败 {WATCHDOG_MAX_FAIL} 次，已放弃")
+                    continue
-            failed_count = sum(1 for c in self._fail_counts.values() if c >= MAX_FAIL)
-            if failed_count >= MAX_FAILED_MODULES:
-                print(f"[launcher] {failed_count} 个模块永久失败，启动器退出")
-                return
+                # 3) Other modules → event already published above; Watchdog decides restart
+                #    (no restart logic here — Watchdog handles it via module.stopped event)
             if exited:
                 self.process_manager.persist_records()
@@ -857,6 +1299,9 @@ class Launcher:
         """Stop all modules, regenerate tokens, re-run Phase 1-4 (mechanism 10)."""
         print("[launcher] 全量重启: 正在停止所有模块...")
+        # Persist records before shutdown so cleanup_leftovers can find survivors
+        self.process_manager.persist_records()
         # Disconnect Event Hub WS
         if self._ws_task:
             self._ws_task.cancel()
@@ -869,7 +1314,13 @@ class Launcher:
         self._launcher_ws_token = ""
         await self._graceful_shutdown_all()
-        self._fail_counts.clear()
+        # Cleanup any leftover processes that survived graceful shutdown.
+        # Note: _graceful_shutdown_all() clears _processes/_records dicts, but
+        # cleanup_leftovers() reads from processes.json (persisted above), so it can
+        # still find and kill survivors.
+        self.process_manager.cleanup_leftovers()
         self._module_tokens.clear()
         # Regenerate kite_token
@@ -878,12 +1329,7 @@ class Launcher:
         print("[launcher] 全量重启: 重新执行 Phase 1-4...")
         try:
-            await self._phase1_registry()
-            self.modules = self.module_scanner.scan()
-            for n, info in self.modules.items():
-                self._log_lifecycle("scanned", n, state=info.state, module_dir=info.module_dir)
-            await self._register_module_tokens()
-            await self._phase2_event_hub()
+            await self._phase1_parallel_bootstrap()
             await self._phase3_registry_ready()
             await self._phase4_start_modules()
             self.process_manager.persist_records()
@@ -897,28 +1343,252 @@ class Launcher:
     def _final_cleanup(self):
         """Called on exit — stop all processes, stop API, clear records."""
-        print("[launcher] 正在关闭...")
+        try:
+            print("[launcher] 正在执行最终清理...")
+            if self._ws_task:
+                self._ws_task.cancel()
+            if hasattr(self, '_heartbeat_task') and self._heartbeat_task:
+                self._heartbeat_task.cancel()
+            # Note: _graceful_shutdown_all() already called stop_all() in _async_main finally block.
+            # This is just a safety check — should normally find nothing.
+            remaining = [n for n in self.process_manager._processes
+                         if self.process_manager.is_running(n)]
+            if remaining:
+                print(f"[launcher] 警告: 仍有残留进程 (不应出现): {', '.join(remaining)}")
+                self.process_manager.stop_all(timeout=SHUTDOWN_TIMEOUT_BULK)
+            else:
+                print("[launcher] 无残留进程")
-        if self._ws_task:
-            self._ws_task.cancel()
-        if hasattr(self, '_heartbeat_task') and self._heartbeat_task:
-            self._heartbeat_task.cancel()
+            if self._api_server:
+                self._api_server.should_exit = True
-        self.process_manager.stop_all(timeout=10)
+            # Clear instance runtime files
+            try:
+                os.remove(self.process_manager.records_path)
+            except OSError:
+                pass
+        except Exception as e:
+            print(f"[launcher] 最终清理出错: {e}")
+        finally:
+            # Signal the safety-net thread that normal shutdown has completed
+            self._shutdown_complete.set()
+            print("[launcher] 再见。")
+            if IS_WINDOWS:
+                os._exit(0)
+    # ── Startup report ──
+    async def _print_startup_report(self, total_time: float, phase_times: dict[str, float], *,
+                                     global_instances=None, cleaned_stats: dict[str, int] | None = None):
+        """Print a green startup summary with module list and timing."""
+        G = "\033[32m"  # green
+        Y = "\033[33m"  # yellow
+        R = "\033[0m"   # reset
+        B = "\033[1;32m"  # bold green
+        running = []
+        exited = []
+        stopped = []
+        for name, info in self.modules.items():
+            rec = self.process_manager.get_record(name)
+            is_running = self.process_manager.is_running(name)
+            if is_running and rec:
+                running.append((name, info, rec))
+            elif self._desired_states.get(name) == "running" and not is_running:
+                # Was started but already exited (e.g. module.exiting)
+                exited.append((name, info))
+            else:
+                stopped.append((name, info))
+        # Calculate kernel startup time (Phase 1+2+3)
+        kernel_time = 0
+        for phase_name in ["Phase 1+2: Registry + Event Hub (并行)", "Phase 3: Registry 事件总线"]:
+            if phase_name in phase_times:
+                kernel_time += phase_times[phase_name]
+        lines = [
+            "",
+            f"{B}{'=' * 60}",
+            f"  Kite 内核启动完成   耗时 {kernel_time:.2f}s",
+            f"  Kite 全部模块启动完成   总耗时 {total_time:.2f}s",
+            f"{'=' * 60}{R}",
+        ]
+        # Phase breakdown
+        lines.append(f"{G}  阶段耗时:{R}")
+        # Kernel modules section
+        lines.append(f"{G}    内核模块:{R}")
+        for phase_name in ["Phase 1+2: Registry + Event Hub (并行)", "Phase 3: Registry 事件总线"]:
+            if phase_name in phase_times:
+                elapsed = phase_times[phase_name]
+                lines.append(f"{G}      {phase_name:<26s} {elapsed:>6.2f}s{R}")
+        # Extension modules section
+        lines.append(f"{G}    扩展模块:{R}")
+        if "Phase 4: Extensions" in phase_times:
+            elapsed = phase_times["Phase 4: Extensions"]
+            lines.append(f"{G}      {'Phase 4: Extensions':<26s} {elapsed:>6.2f}s{R}")
+        # Sort running modules by ready time
+        running_sorted = sorted(running, key=lambda x: self._ready_times.get(x[0], float('inf')))
+        # Running modules with ready time and elapsed from Kite start
+        DIM = "\033[90m"
+        lines.append(f"{G}  运行中 ({len(running)}):{R}")
+        # CJK-aware display width helpers
+        def _dw(s):
+            """Display width: CJK chars count as 2, others as 1."""
+            w = 0
+            for c in str(s):
+                w += 2 if '\u4e00' <= c <= '\u9fff' or '\u3000' <= c <= '\u303f' or '\uff00' <= c <= '\uffef' else 1
+            return w
+        def _rpad(s, width):
+            """Left-align s in a field of given display width."""
+            return str(s) + ' ' * max(0, width - _dw(s))
+        def _lpad(s, width):
+            """Right-align s in a field of given display width."""
+            return ' ' * max(0, width - _dw(s)) + str(s)
+        # Column definitions: (header, align, min_width)
+        headers = ['模块', 'PID', '启动耗时', '进程启动时长', '类型']
+        aligns = ['left', 'right', 'right', 'right', 'left']  # alignment per column
+        # Build data rows first to calculate column widths
+        rows = []
+        for name, info, rec in running_sorted:
+            label = info.display_name or name
+            ready_t = self._ready_times.get(name)
+            time_str = f"{ready_t:.2f}s" if ready_t is not None else "—"
+            if ready_t is not None and hasattr(self, '_start_unix'):
+                elapsed_from_start = (rec.started_at + ready_t) - self._start_unix
+                es_str = f"{elapsed_from_start:.2f}s"
+            else:
+                es_str = "—"
+            rows.append([label, str(rec.pid), time_str, es_str, f"[{info.type}]"])
+        # Calculate column widths: max of header and all data display widths
+        col_widths = [_dw(h) for h in headers]
+        for row in rows:
+            for i, cell in enumerate(row):
+                col_widths[i] = max(col_widths[i], _dw(cell))
+        # Render header
+        hdr_parts = []
+        for i, h in enumerate(headers):
+            if aligns[i] == 'left':
+                hdr_parts.append(_rpad(h, col_widths[i]))
+            else:
+                hdr_parts.append(_lpad(h, col_widths[i]))
+        lines.append(f"{DIM}      {'  '.join(hdr_parts)}{R}")
+        # Render data rows
+        for row in rows:
+            parts = []
+            for i, cell in enumerate(row):
+                if aligns[i] == 'left':
+                    parts.append(_rpad(cell, col_widths[i]))
+                else:
+                    parts.append(_lpad(cell, col_widths[i]))
+            lines.append(f"{G}    ✓ {'  '.join(parts)}{R}")
+        # Exited modules (started but already quit)
+        if exited:
+            lines.append(f"{Y}  已退出 ({len(exited)}):{R}")
+            for name, info in exited:
+                label = info.display_name or name
+                reason = self._exit_reasons.get(name, "")
+                reason_str = f": {reason}" if reason else ""
+                lines.append(f"{Y}    ↗ {label:<20s} (主动退出{reason_str}){R}")
+        # Stopped modules
+        if stopped:
+            lines.append(f"{G}  未启动 ({len(stopped)}):{R}")
+            for name, info in stopped:
+                label = info.display_name or name
+                lines.append(f"{G}    - {label:<20s} ({info.state}){R}")
+        lines.append(f"{G}  Launcher API: http://127.0.0.1:{self.api_port}   实例: {self.instance_id}{R}")
+        # Query Registry for web module's access URL
+        web_url = await self._get_web_url()
+        if web_url:
+            lines.append(f"{B}  Web 管理后台: {web_url}{R}")
+        # Instance info
+        instances = self.process_manager.get_alive_instances()
+        inst_num = self.process_manager.instance_num
+        suffix_display = self.process_manager.instance_suffix or "(无)"
+        inst_dir = os.environ.get("KITE_INSTANCE_DIR", "")
+        cwd = os.environ.get("KITE_CWD", "")
+        debug_flag = " [DEBUG]" if os.environ.get("KITE_DEBUG") == "1" else ""
+        lines.append(f"{G}  当前实例: #{inst_num}  后缀: {suffix_display}  PID: {os.getpid()}{debug_flag}{R}")
+        lines.append(f"{G}  实例目录: {inst_dir}{R}")
+        lines.append(f"{G}  工作目录: {cwd}{R}")
+        if len(instances) > 1:
+            lines.append(f"{G}  所有实例:{R}")
+            for i in instances:
+                s = "" if i["num"] == 1 else f"~{i['num']}"
+                debug_tag = " [DEBUG]" if i.get("debug", False) else ""
+                current_tag = " (当前)" if i["is_self"] else ""
+                lines.append(f"{G}    #{i['num']}  PID {i['launcher_pid']}  "
+                             f"模块数 {i['module_count']}  (processes{s}.json){debug_tag}{current_tag}{R}")
+        # Cross-directory instances from other projects
+        if global_instances:
+            my_inst_basename = os.path.basename(os.environ.get("KITE_INSTANCE_DIR", ""))
+            other_instances = [i for i in global_instances
+                               if not i["is_self"] and i["instance_dir"] != my_inst_basename]
+            if other_instances:
+                lines.append(f"{G}  其他项目实例:{R}")
+                for i in other_instances:
+                    debug_tag = " [DEBUG]" if i.get("debug", False) else ""
+                    cwd_display = f"  {i['cwd']}" if i["cwd"] else ""
+                    lines.append(
+                        f"{G}    {i['instance_dir']:<20s}  "
+                        f"#{i['num']}  PID {i['launcher_pid']}  "
+                        f"模块数 {i['module_count']}"
+                        f"{cwd_display}{debug_tag}{R}"
+                    )
-        if self._api_server:
-            self._api_server.should_exit = True
+        if cleaned_stats:
+            total = sum(cleaned_stats.values())
+            if len(cleaned_stats) == 1:
+                inst, count = next(iter(cleaned_stats.items()))
+                lines.append(f"{Y}  已清理残留进程: {inst} ({count} 个){R}")
+            else:
+                lines.append(f"{Y}  已清理残留进程 (共 {total} 个):{R}")
+                for inst, count in cleaned_stats.items():
+                    lines.append(f"{Y}    {inst}: {count} 个{R}")
-        # Clear instance runtime files
-        self.process_manager._write_records_file([])
+        lines.append(f"{B}{'=' * 60}{R}")
+        lines.append("")
+        print("\n".join(lines))
+    async def _get_web_url(self) -> str:
+        """Query Registry for the web module's api_endpoint. Returns URL or empty string."""
         try:
-            os.remove(self.process_manager.records_path)
-        except OSError:
+            client = self._get_http()
+            resp = await client.get(
+                f"http://127.0.0.1:{self.registry_port}/get/web.api_endpoint",
+                headers={"Authorization": f"Bearer {self.kite_token}"},
+                timeout=3,
+            )
+            if resp.status_code == 200:
+                val = resp.json()
+                if val and isinstance(val, str):
+                    # Show localhost instead of 127.0.0.1 for friendliness
+                    return val.replace("://127.0.0.1:", "://localhost:")
+        except Exception:
             pass
-        print("[launcher] 再见。")
-        if IS_WINDOWS:
-            os._exit(0)
+        return ""
     # ── Utilities ──
@@ -930,7 +1600,6 @@ class Launcher:
                 fm = _parse_frontmatter(f.read())
             discovery = fm.get("discovery")
             if isinstance(discovery, dict) and discovery:
-                print(f"[launcher] 发现来源: {', '.join(discovery.keys())}")
                 return discovery
         except Exception as e:
             print(f"[launcher] 警告: 读取发现配置失败: {e}")
@@ -960,12 +1629,29 @@ class Launcher:
     def _create_api_app(self) -> FastAPI:
         """Create the FastAPI app with Launcher management routes."""
+        from fastapi import Request, HTTPException
         app = FastAPI(title="Kite Launcher", docs_url=None, redoc_url=None)
         launcher = self
+        def _require_auth(request: Request):
+            """Verify Bearer token and IP whitelist. Raise 401/403 on failure."""
+            # IP whitelist: only allow 127.0.0.1
+            client_host = request.client.host if request.client else None
+            if client_host not in ("127.0.0.1", "::1", "localhost"):
+                raise HTTPException(status_code=403, detail="Access denied: only localhost allowed")
+            # Bearer token verification
+            auth = request.headers.get("Authorization", "")
+            if not auth.startswith("Bearer "):
+                raise HTTPException(status_code=401, detail="Missing or invalid Authorization header")
+            token = auth[7:].strip()
+            if token != launcher.kite_token:
+                raise HTTPException(status_code=401, detail="Invalid token")
         @app.get("/launcher/modules")
-        async def list_modules():
+        async def list_modules(request: Request):
             """List all modules and their current status."""
+            _require_auth(request)
             result = []
             for name, info in launcher.modules.items():
                 running = launcher.process_manager.is_running(name)
@@ -983,8 +1669,9 @@ class Launcher:
             return result
         @app.post("/launcher/modules/{name}/start")
-        async def start_module(name: str):
+        async def start_module(name: str, request: Request):
             """Start a module by name."""
+            _require_auth(request)
             info = launcher.modules.get(name)
             if not info:
                 raise HTTPException(404, f"Module '{name}' not found")
@@ -994,13 +1681,12 @@ class Launcher:
             if name not in launcher._module_tokens:
                 launcher._module_tokens[name] = secrets.token_hex(32)
                 try:
-                    async with httpx.AsyncClient() as client:
-                        await client.post(
-                            f"http://127.0.0.1:{launcher.registry_port}/tokens",
-                            json={name: launcher._module_tokens[name]},
-                            headers={"Authorization": f"Bearer {launcher.kite_token}"},
-                            timeout=5,
-                        )
+                    client = launcher._get_http()
+                    await client.post(
+                        f"http://127.0.0.1:{launcher.registry_port}/tokens",
+                        json={name: launcher._module_tokens[name]},
+                        headers={"Authorization": f"Bearer {launcher.kite_token}"},
+                    )
                 except Exception as e:
                     print(f"[launcher] 警告: 注册 {name} 的令牌失败: {e}")
@@ -1009,7 +1695,6 @@ class Launcher:
             ok = launcher.process_manager.start_module(info, boot_info=boot_info)
             if ok:
                 launcher._desired_states[name] = "running"
-                launcher._fail_counts.pop(name, None)
                 launcher.process_manager.persist_records()
                 rec = launcher.process_manager.get_record(name)
                 launcher._log_lifecycle("started", name, pid=rec.pid if rec else None, via="api")
@@ -1019,8 +1704,9 @@ class Launcher:
             raise HTTPException(500, f"Failed to start '{name}'")
         @app.post("/launcher/modules/{name}/stop")
-        async def stop_module(name: str, body: dict = None):
+        async def stop_module(name: str, request: Request, body: dict = None):
             """Stop a module with graceful shutdown."""
+            _require_auth(request)
             info = launcher.modules.get(name)
             if not info:
                 raise HTTPException(404, f"Module '{name}' not found")
@@ -1031,8 +1717,9 @@ class Launcher:
             return {"status": "stopped", "name": name}
         @app.post("/launcher/modules/{name}/restart")
-        async def restart_module(name: str, body: dict = None):
+        async def restart_module(name: str, request: Request, body: dict = None):
             """Restart a module (stop + start)."""
+            _require_auth(request)
             info = launcher.modules.get(name)
             if not info:
                 raise HTTPException(404, f"Module '{name}' not found")
@@ -1042,13 +1729,12 @@ class Launcher:
             await launcher._graceful_stop(name, reason)
             launcher._module_tokens[name] = secrets.token_hex(32)
             try:
-                async with httpx.AsyncClient() as client:
-                    await client.post(
-                        f"http://127.0.0.1:{launcher.registry_port}/tokens",
-                        json={name: launcher._module_tokens[name]},
-                        headers={"Authorization": f"Bearer {launcher.kite_token}"},
-                        timeout=5,
-                    )
+                client = launcher._get_http()
+                await client.post(
+                    f"http://127.0.0.1:{launcher.registry_port}/tokens",
+                    json={name: launcher._module_tokens[name]},
+                    headers={"Authorization": f"Bearer {launcher.kite_token}"},
+                )
             except Exception:
                 pass
             token = launcher._module_tokens[name]
@@ -1056,7 +1742,6 @@ class Launcher:
             ok = launcher.process_manager.start_module(info, boot_info=boot_info)
             if ok:
                 launcher._desired_states[name] = "running"
-                launcher._fail_counts.pop(name, None)
                 launcher.process_manager.persist_records()
                 rec = launcher.process_manager.get_record(name)
                 launcher._log_lifecycle("started", name, pid=rec.pid if rec else None, via="restart_api")
@@ -1066,8 +1751,9 @@ class Launcher:
             raise HTTPException(500, f"Failed to restart '{name}'")
         @app.post("/launcher/rescan")
-        async def rescan_modules():
+        async def rescan_modules(request: Request):
             """Rescan module directories for new/removed modules."""
+            _require_auth(request)
             old_names = set(launcher.modules.keys())
             launcher.modules = launcher.module_scanner.scan()
             new_names = set(launcher.modules.keys())
@@ -1085,20 +1771,28 @@ class Launcher:
                     launcher._module_tokens[name] = secrets.token_hex(32)
                     new_tokens[name] = launcher._module_tokens[name]
                 try:
-                    async with httpx.AsyncClient() as client:
-                        await client.post(
-                            f"http://127.0.0.1:{launcher.registry_port}/tokens",
-                            json=new_tokens,
-                            headers={"Authorization": f"Bearer {launcher.kite_token}"},
-                            timeout=5,
-                        )
+                    client = launcher._get_http()
+                    await client.post(
+                        f"http://127.0.0.1:{launcher.registry_port}/tokens",
+                        json=new_tokens,
+                        headers={"Authorization": f"Bearer {launcher.kite_token}"},
+                    )
                 except Exception:
                     pass
             return {"added": added, "removed": removed, "total": len(launcher.modules)}
+        @app.post("/launcher/shutdown")
+        async def shutdown_launcher(request: Request, body: dict = None):
+            """Shutdown the entire Kite system (equivalent to Ctrl+C)."""
+            _require_auth(request)
+            reason = (body or {}).get("reason", "api_request")
+            launcher._request_shutdown(f"API shutdown request: {reason}")
+            return {"status": "shutting_down", "reason": reason}
         @app.put("/launcher/modules/{name}/state")
-        async def update_state(name: str, body: dict):
+        async def update_state(name: str, request: Request, body: dict):
             """Update module state (enabled/manual/disabled). Writes to module.md."""
+            _require_auth(request)
             info = launcher.modules.get(name)
             if not info:
                 raise HTTPException(404, f"Module '{name}' not found")