@agentunion/kite 1.0.7 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. package/CHANGELOG.md +208 -0
  2. package/README.md +48 -0
  3. package/cli.js +1 -1
  4. package/extensions/agents/__init__.py +1 -0
  5. package/extensions/agents/assistant/__init__.py +1 -0
  6. package/extensions/agents/assistant/entry.py +329 -0
  7. package/extensions/agents/assistant/module.md +22 -0
  8. package/extensions/agents/assistant/server.py +197 -0
  9. package/extensions/channels/__init__.py +1 -0
  10. package/extensions/channels/acp_channel/__init__.py +1 -0
  11. package/extensions/channels/acp_channel/entry.py +329 -0
  12. package/extensions/channels/acp_channel/module.md +22 -0
  13. package/extensions/channels/acp_channel/server.py +197 -0
  14. package/extensions/event_hub_bench/entry.py +624 -379
  15. package/extensions/event_hub_bench/module.md +2 -1
  16. package/extensions/services/backup/__init__.py +1 -0
  17. package/extensions/services/backup/entry.py +508 -0
  18. package/extensions/services/backup/module.md +22 -0
  19. package/extensions/services/model_service/__init__.py +1 -0
  20. package/extensions/services/model_service/entry.py +508 -0
  21. package/extensions/services/model_service/module.md +22 -0
  22. package/extensions/services/watchdog/entry.py +468 -102
  23. package/extensions/services/watchdog/module.md +3 -0
  24. package/extensions/services/watchdog/monitor.py +170 -69
  25. package/extensions/services/web/__init__.py +1 -0
  26. package/extensions/services/web/config.yaml +149 -0
  27. package/extensions/services/web/entry.py +390 -0
  28. package/extensions/services/web/module.md +24 -0
  29. package/extensions/services/web/routes/__init__.py +1 -0
  30. package/extensions/services/web/routes/routes_call.py +189 -0
  31. package/extensions/services/web/routes/routes_config.py +512 -0
  32. package/extensions/services/web/routes/routes_contacts.py +98 -0
  33. package/extensions/services/web/routes/routes_devlog.py +99 -0
  34. package/extensions/services/web/routes/routes_phone.py +81 -0
  35. package/extensions/services/web/routes/routes_sms.py +48 -0
  36. package/extensions/services/web/routes/routes_stats.py +17 -0
  37. package/extensions/services/web/routes/routes_voicechat.py +554 -0
  38. package/extensions/services/web/routes/schemas.py +216 -0
  39. package/extensions/services/web/server.py +375 -0
  40. package/extensions/services/web/static/css/style.css +1064 -0
  41. package/extensions/services/web/static/index.html +1445 -0
  42. package/extensions/services/web/static/js/app.js +4671 -0
  43. package/extensions/services/web/vendor/__init__.py +1 -0
  44. package/extensions/services/web/vendor/bluetooth/audio.py +348 -0
  45. package/extensions/services/web/vendor/bluetooth/contacts.py +251 -0
  46. package/extensions/services/web/vendor/bluetooth/manager.py +395 -0
  47. package/extensions/services/web/vendor/bluetooth/sms.py +290 -0
  48. package/extensions/services/web/vendor/bluetooth/telephony.py +274 -0
  49. package/extensions/services/web/vendor/config.py +139 -0
  50. package/extensions/services/web/vendor/conversation/asr.py +936 -0
  51. package/extensions/services/web/vendor/conversation/engine.py +548 -0
  52. package/extensions/services/web/vendor/conversation/llm.py +534 -0
  53. package/extensions/services/web/vendor/conversation/mcp_tools.py +190 -0
  54. package/extensions/services/web/vendor/conversation/tts.py +322 -0
  55. package/extensions/services/web/vendor/conversation/vad.py +138 -0
  56. package/extensions/services/web/vendor/storage/__init__.py +1 -0
  57. package/extensions/services/web/vendor/storage/identity.py +312 -0
  58. package/extensions/services/web/vendor/storage/store.py +507 -0
  59. package/extensions/services/web/vendor/task/manager.py +864 -0
  60. package/extensions/services/web/vendor/task/models.py +45 -0
  61. package/extensions/services/web/vendor/task/webhook.py +263 -0
  62. package/extensions/services/web/vendor/tools/registry.py +321 -0
  63. package/kernel/__init__.py +0 -0
  64. package/kernel/entry.py +407 -0
  65. package/{core/event_hub/hub.py → kernel/event_hub.py} +62 -74
  66. package/kernel/module.md +33 -0
  67. package/{core/registry/store.py → kernel/registry_store.py} +23 -8
  68. package/kernel/rpc_router.py +388 -0
  69. package/kernel/server.py +267 -0
  70. package/launcher/__init__.py +10 -0
  71. package/launcher/__main__.py +6 -0
  72. package/launcher/count_lines.py +258 -0
  73. package/launcher/entry.py +1778 -0
  74. package/launcher/logging_setup.py +289 -0
  75. package/{core/launcher → launcher}/module_scanner.py +11 -6
  76. package/launcher/process_manager.py +880 -0
  77. package/main.py +11 -210
  78. package/package.json +6 -9
  79. package/__init__.py +0 -1
  80. package/__main__.py +0 -15
  81. package/core/event_hub/BENCHMARK.md +0 -94
  82. package/core/event_hub/bench.py +0 -459
  83. package/core/event_hub/bench_extreme.py +0 -308
  84. package/core/event_hub/bench_perf.py +0 -350
  85. package/core/event_hub/entry.py +0 -157
  86. package/core/event_hub/module.md +0 -20
  87. package/core/event_hub/server.py +0 -206
  88. package/core/launcher/entry.py +0 -1158
  89. package/core/launcher/process_manager.py +0 -470
  90. package/core/registry/entry.py +0 -110
  91. package/core/registry/module.md +0 -30
  92. package/core/registry/server.py +0 -289
  93. package/extensions/services/watchdog/server.py +0 -167
  94. /package/{core → extensions/services/web/vendor/bluetooth}/__init__.py +0 -0
  95. /package/{core/event_hub → extensions/services/web/vendor/conversation}/__init__.py +0 -0
  96. /package/{core/launcher → extensions/services/web/vendor/task}/__init__.py +0 -0
  97. /package/{core/registry → extensions/services/web/vendor/tools}/__init__.py +0 -0
  98. /package/{core/event_hub → kernel}/dedup.py +0 -0
  99. /package/{core/event_hub → kernel}/router.py +0 -0
  100. /package/{core/launcher → launcher}/module.md +0 -0
@@ -1,1158 +0,0 @@
1
- """
2
- Launcher — the core of Kite. Manages module lifecycle, exposes API, monitors processes.
3
-
4
- Thread model:
5
- - Main thread: asyncio event loop (process management + monitor loop)
6
- - API thread: independent thread running uvicorn + FastAPI
7
- - stdout threads: one daemon thread per child process (ProcessManager)
8
- - (Windows) keyboard listener thread: polls for 'q' key
9
-
10
- 4-Phase startup:
11
- Phase 1: Registry → stdout port → KITE_REGISTRY_PORT → API → register self + tokens
12
- Phase 2: Event Hub → stdin launcher_ws_token → stdout ws_endpoint → WS connect → module.ready
13
- Phase 3: Event Hub → Registry → Registry → Event Hub WS → module.ready
14
- Phase 4: start remaining enabled modules in topo order
15
- """
16
-
17
- import asyncio
18
- import json
19
- import os
20
- import secrets
21
- import signal
22
- import sys
23
- import threading
24
- import time
25
- import uuid
26
-
27
- import httpx
28
- import uvicorn
29
- import websockets
30
- from fastapi import FastAPI, HTTPException
31
-
32
- from .module_scanner import ModuleScanner, ModuleInfo, LaunchConfig, _parse_frontmatter
33
- from .process_manager import ProcessManager
34
-
35
- IS_WINDOWS = sys.platform == "win32"
36
-
37
- # Core module names that are started in Phase 1-2 (not Phase 4)
38
- CORE_MODULE_NAMES = {"registry", "event_hub"}
39
-
40
-
41
- class Launcher:
42
- """Kite system entry point. Starts Registry, manages modules, exposes API."""
43
-
44
- def __init__(self, kite_token: str):
45
- self.kite_token = kite_token
46
- self.instance_id = str(os.getpid())
47
- os.environ["KITE_INSTANCE"] = self.instance_id
48
-
49
- # Resolve instance workspace (must happen before ProcessManager init)
50
- self._resolve_instance_dir()
51
- os.environ["KITE_MODULE_DATA"] = os.path.join(
52
- os.environ["KITE_INSTANCE_DIR"], "launcher",
53
- )
54
-
55
- self.process_manager = ProcessManager(
56
- kite_token, self.instance_id,
57
- on_kite_message=self._on_kite_message,
58
- )
59
- self.module_scanner = ModuleScanner(
60
- discovery=self._load_discovery(),
61
- )
62
-
63
- self.registry_port: int = 0
64
- self.api_port: int = 0
65
- self.modules: dict[str, ModuleInfo] = {}
66
- self._shutdown_event = asyncio.Event()
67
- self._thread_shutdown = threading.Event()
68
- self._api_server: uvicorn.Server | None = None
69
- self._api_ready = threading.Event()
70
- self._fail_counts: dict[str, int] = {} # module_name -> consecutive failure count
71
- self._module_tokens: dict[str, str] = {} # module_name -> per-module token
72
-
73
- # Three-layer state model: desired_state per module
74
- self._desired_states: dict[str, str] = {} # module_name -> "running" | "stopped"
75
-
76
- # Event Hub WebSocket client
77
- self._event_hub_ws_url: str = ""
78
- self._launcher_ws_token: str = ""
79
- self._ws: object | None = None
80
- self._ws_task: asyncio.Task | None = None
81
- self._loop: asyncio.AbstractEventLoop | None = None
82
-
83
- # Event waiters: {event_key: (asyncio.Event, data_dict)}
84
- self._event_waiters: dict[str, tuple[asyncio.Event, dict]] = {}
85
-
86
- # Kite stdout message waiters: {waiter_key: (threading.Event, data_dict)}
87
- # Used by ProcessManager stdout callback (cross-thread)
88
- self._msg_waiters: dict[str, tuple[threading.Event, dict]] = {}
89
-
90
- self._lifecycle_log = os.path.join(
91
- os.environ["KITE_INSTANCE_DIR"], "launcher", "lifecycle.jsonl",
92
- )
93
- self._app = self._create_api_app()
94
-
95
- # ── Instance workspace resolution ──
96
-
97
- @staticmethod
98
- def _resolve_instance_dir():
99
- """Resolve KITE_INSTANCE_DIR from KITE_WORKSPACE + KITE_CWD.
100
- Algorithm: take CWD basename, find matching dir in workspace via .cwd file,
101
- or create new one. Sets KITE_INSTANCE_DIR env var.
102
- """
103
- if os.environ.get("KITE_INSTANCE_DIR"):
104
- return # already set (e.g. by tests or parent)
105
-
106
- cwd = os.environ.get("KITE_CWD", os.getcwd())
107
- workspace = os.environ.get("KITE_WORKSPACE", "")
108
- if not workspace:
109
- home = os.environ.get("HOME") or os.environ.get("USERPROFILE") or os.path.expanduser("~")
110
- workspace = os.path.join(home, ".kite", "workspace")
111
- os.environ["KITE_WORKSPACE"] = workspace
112
-
113
- basename = os.path.basename(cwd.rstrip(os.sep)) or "default"
114
- suffix = 0
115
-
116
- while True:
117
- name = basename if suffix == 0 else f"{basename}~{suffix}"
118
- candidate = os.path.join(workspace, name)
119
- cwd_file = os.path.join(candidate, ".cwd")
120
-
121
- if not os.path.exists(candidate):
122
- # Empty slot — create new workspace
123
- os.makedirs(candidate, exist_ok=True)
124
- with open(cwd_file, "w", encoding="utf-8") as f:
125
- f.write(cwd)
126
- os.environ["KITE_INSTANCE_DIR"] = candidate
127
- print(f"[launcher] 实例工作区已创建: {candidate}")
128
- return
129
-
130
- if os.path.isfile(cwd_file):
131
- try:
132
- with open(cwd_file, "r", encoding="utf-8") as f:
133
- if f.read().strip() == cwd:
134
- os.environ["KITE_INSTANCE_DIR"] = candidate
135
- print(f"[launcher] 实例工作区已找到: {candidate}")
136
- return
137
- except Exception:
138
- pass
139
-
140
- suffix += 1
141
-
142
- # ── Kite stdout message callback ──
143
-
144
- def _on_kite_message(self, module_name: str, msg: dict):
145
- """Called by ProcessManager stdout reader thread when a kite message is detected.
146
- Thread-safe: only touches _msg_waiters (dict + threading.Event).
147
- """
148
- kite_type = msg.get("kite", "")
149
- key = f"{module_name}:{kite_type}"
150
- waiter = self._msg_waiters.get(key)
151
- if waiter:
152
- waiter[1].update(msg)
153
- waiter[0].set()
154
-
155
- async def _wait_kite_message(self, module_name: str, kite_type: str,
156
- timeout: float) -> dict | None:
157
- """Wait for a kite stdout message from a module. Returns msg dict or None on timeout.
158
- Checks shutdown flag every 0.5s so Ctrl+C is responsive even during Phase 1-2 waits.
159
- """
160
- key = f"{module_name}:{kite_type}"
161
- evt = threading.Event()
162
- data = {}
163
- self._msg_waiters[key] = (evt, data)
164
- shutdown = self._thread_shutdown
165
- try:
166
- def _wait():
167
- deadline = time.monotonic() + timeout
168
- while time.monotonic() < deadline:
169
- if evt.wait(timeout=0.5):
170
- return True
171
- if shutdown.is_set():
172
- return False
173
- return False
174
- got = await asyncio.get_running_loop().run_in_executor(None, _wait)
175
- return data if got else None
176
- finally:
177
- self._msg_waiters.pop(key, None)
178
-
179
- # ── Public entry ──
180
-
181
- def run(self):
182
- """Synchronous entry point. Sets up signals, runs the async main loop."""
183
- print("[launcher] Kite 启动中...")
184
- print("[launcher] ── 环境变量 ──")
185
- for key in sorted(k for k in os.environ if k.startswith("KITE_")):
186
- print(f"[launcher] {key} = {os.environ[key]}")
187
- print(f"[launcher] PID = {os.getpid()}")
188
- print(f"[launcher] PYTHON = {sys.executable}")
189
- print(f"[launcher] PLATFORM = {sys.platform}")
190
-
191
- if IS_WINDOWS:
192
- self._setup_windows_exit()
193
- else:
194
- self._setup_unix_signals()
195
-
196
- try:
197
- asyncio.run(self._async_main())
198
- except KeyboardInterrupt:
199
- pass
200
- finally:
201
- self._final_cleanup()
202
-
203
- def _request_shutdown(self, reason: str = ""):
204
- """Request graceful shutdown. Thread-safe — can be called from signal handler or any thread."""
205
- if self._thread_shutdown.is_set():
206
- return # already shutting down
207
- print(f"\n[launcher] {reason or '收到关闭请求'}")
208
- self._thread_shutdown.set()
209
- # Wake up asyncio event loop immediately (so _monitor_loop / wait_for exits)
210
- loop = self._loop
211
- if loop and not loop.is_closed():
212
- try:
213
- loop.call_soon_threadsafe(self._shutdown_event.set)
214
- except RuntimeError:
215
- pass
216
- # Safety net: force exit after 15s no matter what
217
- def _force():
218
- time.sleep(15)
219
- os._exit(1)
220
- threading.Thread(target=_force, daemon=True).start()
221
-
222
- def _setup_unix_signals(self):
223
- """Register SIGTERM/SIGINT handlers on Linux/macOS."""
224
- def _handler(signum, frame):
225
- self._request_shutdown(f"收到信号 {signum},正在关闭...")
226
- signal.signal(signal.SIGTERM, _handler)
227
- signal.signal(signal.SIGINT, _handler)
228
-
229
- def _setup_windows_exit(self):
230
- """SetConsoleCtrlHandler for Ctrl+C + daemon thread for 'q' key.
231
-
232
- Why not signal.signal(SIGINT)?
233
- Python's signal delivery requires the main thread to be executing bytecode.
234
- When the main thread is blocked in C code (asyncio ProactorEventLoop →
235
- GetQueuedCompletionStatus), SIGINT is never delivered.
236
- SetConsoleCtrlHandler runs its callback in a separate OS thread, so it
237
- always works regardless of what the main thread is doing.
238
- """
239
- import ctypes
240
-
241
- @ctypes.WINFUNCTYPE(ctypes.c_int, ctypes.c_uint)
242
- def _ctrl_handler(ctrl_type):
243
- if ctrl_type in (0, 1): # CTRL_C_EVENT, CTRL_BREAK_EVENT
244
- self._request_shutdown("收到 Ctrl+C,正在关闭...")
245
- return 1 # handled — prevent default (which kills the process)
246
- return 0
247
-
248
- # prevent GC of the C callback
249
- self._ctrl_handler_ref = _ctrl_handler
250
- ctypes.windll.kernel32.SetConsoleCtrlHandler(_ctrl_handler, 1)
251
-
252
- # 'q' key: handle via msvcrt polling
253
- def _listen():
254
- import msvcrt
255
- while not self._thread_shutdown.is_set():
256
- if msvcrt.kbhit():
257
- ch = msvcrt.getch()
258
- if ch in (b'q', b'Q'):
259
- self._request_shutdown("收到退出请求,正在关闭...")
260
- return
261
- time.sleep(0.1)
262
- threading.Thread(target=_listen, daemon=True).start()
263
-
264
- # ── Async main (4-Phase startup) ──
265
-
266
- async def _async_main(self):
267
- """Full 4-phase startup sequence, then monitor loop."""
268
- self._loop = asyncio.get_running_loop()
269
-
270
- # Validate core modules exist (mechanism 12)
271
- self._validate_core_modules()
272
-
273
- # Cleanup leftovers from previous instances
274
- self.process_manager.cleanup_leftovers()
275
-
276
- # Phase 1: Registry bootstrap
277
- await self._phase1_registry()
278
- if self._shutdown_event.is_set(): return
279
-
280
- # Scan modules (can happen before Phase 2)
281
- self.modules = self.module_scanner.scan()
282
- for name, info in self.modules.items():
283
- self._log_lifecycle("scanned", name, state=info.state, module_dir=info.module_dir)
284
- print(f"[launcher] 发现 {len(self.modules)} 个模块: {', '.join(self.modules.keys()) or '(无)'}")
285
-
286
- # Generate per-module tokens (including event_hub and registry)
287
- await self._register_module_tokens()
288
- if self._shutdown_event.is_set(): return
289
-
290
- # Phase 2: Event Hub bootstrap
291
- await self._phase2_event_hub()
292
- if self._shutdown_event.is_set(): return
293
-
294
- # Phase 3: Wait for Registry delayed ready
295
- await self._phase3_registry_ready()
296
- if self._shutdown_event.is_set(): return
297
-
298
- # Phase 4: Start remaining enabled modules
299
- # Initialize desired_state from config_state
300
- for name, info in self.modules.items():
301
- if info.state == "enabled":
302
- self._desired_states[name] = "running"
303
- else: # manual, disabled
304
- self._desired_states[name] = "stopped"
305
- # Core modules are already running
306
- for cn in CORE_MODULE_NAMES:
307
- self._desired_states[cn] = "running"
308
-
309
- await self._phase4_start_modules()
310
- if self._shutdown_event.is_set(): return
311
-
312
- # Post-startup
313
- self.process_manager.persist_records()
314
- self._heartbeat_task = asyncio.create_task(self._heartbeat_loop())
315
-
316
- print("[launcher] 进入监控循环 (按 Ctrl+C 或 'q' 退出)")
317
- await self._monitor_loop()
318
-
319
- await self._graceful_shutdown_all()
320
-
321
- # ── Phase 1: Registry ──
322
-
323
- async def _phase1_registry(self):
324
- """Start Registry → capture port from stdout → set env → start API → register self."""
325
- registry_dir = os.path.join(os.environ["KITE_PROJECT"], "core", "registry")
326
- registry_info = ModuleInfo(
327
- name="registry",
328
- display_name="Registry",
329
- type="infrastructure",
330
- state="enabled",
331
- runtime="python",
332
- entry="entry.py",
333
- module_dir=registry_dir,
334
- )
335
-
336
- boot_info = {"token": self.kite_token}
337
- self._log_lifecycle("starting", "registry")
338
- ok = self.process_manager.start_module(registry_info, boot_info=boot_info)
339
- if not ok:
340
- self._log_lifecycle("start_failed", "registry")
341
- raise RuntimeError("启动 Registry 失败")
342
-
343
- # Wait for Registry to output port via stdout (mechanism 2)
344
- print("[launcher] 等待 Registry 端口...")
345
- msg = await self._wait_kite_message("registry", "port", timeout=6)
346
- if not msg or not msg.get("port"):
347
- raise RuntimeError("致命错误: Registry 在 6s 内未报告端口")
348
- self.registry_port = int(msg["port"])
349
- print(f"[launcher] Registry 端口: {self.registry_port}")
350
-
351
- # Set KITE_REGISTRY_PORT for all subsequent child processes
352
- os.environ["KITE_REGISTRY_PORT"] = str(self.registry_port)
353
-
354
- # Start Launcher API in a separate thread
355
- self._start_api_thread()
356
-
357
- # Register Launcher itself to Registry
358
- await self._register_self()
359
-
360
- async def _register_self(self):
361
- """Register Launcher itself to Registry."""
362
- url = f"http://127.0.0.1:{self.registry_port}/modules"
363
- headers = {"Authorization": f"Bearer {self.kite_token}"}
364
- payload = {
365
- "action": "register",
366
- "module_id": "launcher",
367
- "module_type": "infrastructure",
368
- "name": "Launcher",
369
- "api_endpoint": f"http://127.0.0.1:{self.api_port}",
370
- "health_endpoint": "/launcher/modules",
371
- "events_publish": {
372
- "module.started": {},
373
- "module.stopped": {},
374
- "module.state_changed": {},
375
- },
376
- "events_subscribe": [">"],
377
- }
378
- try:
379
- async with httpx.AsyncClient() as client:
380
- resp = await client.post(url, json=payload, headers=headers, timeout=5)
381
- if resp.status_code == 200:
382
- print("[launcher] 已注册到 Registry")
383
- else:
384
- print(f"[launcher] 警告: Registry 注册返回 {resp.status_code}")
385
- except Exception as e:
386
- print(f"[launcher] 警告: 注册到 Registry 失败: {e}")
387
-
388
- # ── Phase 2: Event Hub ──
389
-
390
- async def _phase2_event_hub(self):
391
- """Start Event Hub → stdin launcher_ws_token → stdout ws_endpoint → WS connect → module.ready."""
392
- # Find event_hub in scanned modules or build manually
393
- eh_info = self.modules.get("event_hub")
394
- if not eh_info:
395
- eh_dir = os.path.join(os.environ["KITE_PROJECT"], "core", "event_hub")
396
- eh_info = ModuleInfo(
397
- name="event_hub",
398
- display_name="Event Hub",
399
- type="infrastructure",
400
- state="enabled",
401
- runtime="python",
402
- entry="entry.py",
403
- module_dir=eh_dir,
404
- )
405
-
406
- token = self._module_tokens.get("event_hub", "")
407
- if not token:
408
- token = secrets.token_hex(32)
409
- self._module_tokens["event_hub"] = token
410
- await self._register_tokens_to_registry({"event_hub": token})
411
-
412
- boot_info = {"token": token}
413
- self._log_lifecycle("starting", "event_hub")
414
- ok = self.process_manager.start_module(eh_info, boot_info=boot_info)
415
- if not ok:
416
- self._log_lifecycle("start_failed", "event_hub")
417
- raise RuntimeError("启动 Event Hub 失败")
418
-
419
- # Send launcher_ws_token via stdin (mechanism 6)
420
- self._launcher_ws_token = secrets.token_hex(32)
421
- self.process_manager.write_stdin("event_hub", {
422
- "kite": "launcher_ws_token",
423
- "launcher_ws_token": self._launcher_ws_token,
424
- })
425
-
426
- # Wait for ws_endpoint from stdout (mechanism 5)
427
- print("[launcher] 等待 Event Hub ws_endpoint...")
428
- msg = await self._wait_kite_message("event_hub", "ws_endpoint", timeout=6)
429
- if not msg or not msg.get("ws_endpoint"):
430
- raise RuntimeError("致命错误: Event Hub 在 6s 内未报告 ws_endpoint")
431
- self._event_hub_ws_url = msg["ws_endpoint"]
432
- print(f"[launcher] Event Hub 已发现: {self._event_hub_ws_url}")
433
-
434
- # Connect to Event Hub WebSocket with launcher_ws_token
435
- self._ws_task = asyncio.create_task(self._ws_loop())
436
-
437
- # Wait for Event Hub module.ready (sent when Launcher connects)
438
- ready = await self._wait_event("module.ready", "event_hub", timeout=15)
439
- if ready:
440
- print("[launcher] Event Hub 已就绪")
441
- else:
442
- print("[launcher] 警告: Event Hub 在 15s 内未发送 module.ready")
443
-
444
- self._log_lifecycle("started", "event_hub")
445
- await self._publish_event("module.started", {"module_id": "event_hub"})
446
- self.process_manager.close_stdio("event_hub")
447
-
448
- # ── Phase 3: Registry delayed ready ──
449
-
450
- async def _phase3_registry_ready(self):
451
- """Wait for Registry module.ready (triggered after Event Hub registers to Registry
452
- and Registry connects to Event Hub WS)."""
453
- print("[launcher] 等待 Registry 延迟就绪...")
454
- ready = await self._wait_event("module.ready", "registry", timeout=12)
455
- if ready:
456
- print("[launcher] Registry 已就绪")
457
- else:
458
- print("[launcher] 警告: Registry 在 12s 内未发送 module.ready (降级运行)")
459
-
460
- self._log_lifecycle("started", "registry")
461
- await self._publish_event("module.started", {"module_id": "registry"})
462
- self.process_manager.close_stdio("registry")
463
-
464
- # ── Phase 4: Start remaining modules ──
465
-
466
- async def _phase4_start_modules(self):
467
- """Start enabled modules (excluding core) in dependency order."""
468
- to_start = [m for m in self.modules.values()
469
- if self._desired_states.get(m.name) == "running"
470
- and m.name not in CORE_MODULE_NAMES]
471
- if not to_start:
472
- print("[launcher] 没有额外模块需要启动")
473
- return
474
-
475
- # Auto-start manual modules if depended upon
476
- needed = set(m.name for m in to_start)
477
- for m in list(to_start):
478
- for dep in m.depends_on:
479
- if dep not in needed and dep not in CORE_MODULE_NAMES:
480
- dep_info = self.modules.get(dep)
481
- if dep_info and dep_info.state != "disabled":
482
- needed.add(dep)
483
- to_start.append(dep_info)
484
- self._desired_states[dep] = "running"
485
- print(f"[launcher] 自动启动 '{dep}' (被依赖)")
486
- elif dep_info and dep_info.state == "disabled":
487
- print(f"[launcher] 错误: '{m.name}' 依赖已禁用的模块 '{dep}'")
488
-
489
- try:
490
- sorted_modules = self._topo_sort(to_start)
491
- except RuntimeError as e:
492
- print(f"[launcher] 错误: {e}")
493
- return
494
-
495
- print(f"[launcher] 正在启动 {len(sorted_modules)} 个模块...")
496
- for info in sorted_modules:
497
- await self._start_one_module(info)
498
-
499
- # ── Event Hub WebSocket connection ──
500
-
501
- async def _ws_loop(self):
502
- """Connect to Event Hub, reconnect on failure."""
503
- while not self._thread_shutdown.is_set():
504
- try:
505
- await self._ws_connect()
506
- except asyncio.CancelledError:
507
- return
508
- except Exception as e:
509
- print(f"[launcher] Event Hub 连接错误: {e}")
510
- self._ws = None
511
- await asyncio.sleep(5)
512
-
513
- async def _ws_connect(self):
514
- """Single WebSocket session with launcher_ws_token auth."""
515
- ws_url = f"{self._event_hub_ws_url}?token={self._launcher_ws_token}"
516
- async with websockets.connect(ws_url, ping_interval=None, ping_timeout=None, close_timeout=10) as ws:
517
- self._ws = ws
518
- print("[launcher] 已连接到 Event Hub")
519
-
520
- # Subscribe to all events
521
- await ws.send(json.dumps({
522
- "type": "subscribe",
523
- "events": [">"],
524
- }))
525
-
526
- # Receive loop
527
- async for raw in ws:
528
- try:
529
- msg = json.loads(raw)
530
- except (json.JSONDecodeError, TypeError):
531
- continue
532
- msg_type = msg.get("type", "")
533
- if msg_type == "event":
534
- source = msg.get("source", "unknown")
535
- event = msg.get("event", "")
536
- data = msg.get("data", {})
537
- # Trigger event waiters
538
- module_id = data.get("module_id", "")
539
- waiter_key = f"{event}:{module_id}"
540
- waiter = self._event_waiters.get(waiter_key)
541
- if waiter:
542
- waiter[1].update(data)
543
- waiter[0].set()
544
- ts = msg.get("timestamp", "")
545
- latency_str = ""
546
- if ts:
547
- try:
548
- from datetime import datetime, timezone
549
- sent = datetime.fromisoformat(ts)
550
- delay_ms = (datetime.now(timezone.utc) - sent).total_seconds() * 1000
551
- latency_str = f" ({delay_ms:.1f}ms)"
552
- local_ts = sent.astimezone().strftime("%H:%M:%S")
553
- except Exception:
554
- local_ts = ts[11:19] if len(ts) >= 19 else ts
555
- print(f"[{source}] {local_ts} {event}{latency_str}: {json.dumps(data, ensure_ascii=False)}")
556
- else:
557
- print(f"[{source}] {event}: {json.dumps(data, ensure_ascii=False)}")
558
- elif msg_type == "error":
559
- print(f"[launcher] Event Hub 错误: {msg.get('message')}")
560
-
561
- async def _publish_event(self, event_type: str, data: dict):
562
- """Publish an event to Event Hub via WebSocket."""
563
- if not self._ws:
564
- return
565
- from datetime import datetime, timezone
566
- msg = {
567
- "type": "event",
568
- "event_id": str(uuid.uuid4()),
569
- "event": event_type,
570
- "source": "launcher",
571
- "timestamp": datetime.now(timezone.utc).isoformat(),
572
- "data": data,
573
- }
574
- try:
575
- await self._ws.send(json.dumps(msg))
576
- except Exception as e:
577
- print(f"[launcher] 发布事件失败: {e}")
578
-
579
- def _publish_event_threadsafe(self, event_type: str, data: dict):
580
- """Publish event from non-async context (API thread). Fire-and-forget."""
581
- if not self._ws or not self._loop:
582
- return
583
- asyncio.run_coroutine_threadsafe(
584
- self._publish_event(event_type, data), self._loop,
585
- )
586
-
587
- async def _wait_event(self, event_type: str, module_id: str, timeout: float) -> dict | None:
588
- """Wait for a specific event from a module. Returns data dict or None on timeout."""
589
- key = f"{event_type}:{module_id}"
590
- evt = asyncio.Event()
591
- data = {}
592
- self._event_waiters[key] = (evt, data)
593
- try:
594
- await asyncio.wait_for(evt.wait(), timeout=timeout)
595
- return data
596
- except asyncio.TimeoutError:
597
- return None
598
- finally:
599
- self._event_waiters.pop(key, None)
600
-
601
- async def _graceful_stop(self, name: str, reason: str = "stop_requested", timeout: float = 10):
602
- """Graceful shutdown: send event → wait ack → wait ready → kill."""
603
- self._log_lifecycle("stopping", name, reason=reason)
604
- await self._publish_event("module.shutdown", {
605
- "module_id": name, "reason": reason, "timeout": timeout,
606
- })
607
-
608
- ack = await self._wait_event("module.shutdown.ack", name, timeout=3)
609
- if not ack:
610
- self.process_manager.stop_module(name, timeout=5)
611
- await self._publish_event("module.stopped", {"module_id": name})
612
- return
613
-
614
- estimated = min(ack.get("estimated_cleanup", timeout), timeout)
615
- ready = await self._wait_event("module.shutdown.ready", name, timeout=estimated)
616
- if ready:
617
- self.process_manager.stop_module(name, timeout=1)
618
- else:
619
- self.process_manager.stop_module(name, timeout=3)
620
-
621
- self._log_lifecycle("stopped", name, reason=reason)
622
- await self._publish_event("module.stopped", {"module_id": name})
623
-
624
- async def _graceful_shutdown_all(self):
625
- """Broadcast module.shutdown to all running modules, then force-kill survivors."""
626
- running = [n for n in self.modules if self.process_manager.is_running(n)]
627
- # Also check core modules
628
- for cn in CORE_MODULE_NAMES:
629
- if self.process_manager.is_running(cn) and cn not in running:
630
- running.append(cn)
631
- if not running:
632
- return
633
- print(f"[launcher] 优雅关闭: {', '.join(running)}")
634
- for name in running:
635
- self._log_lifecycle("stopping", name, reason="system_shutdown")
636
- await self._publish_event("module.shutdown", {
637
- "module_id": name, "reason": "system_shutdown", "timeout": 10,
638
- })
639
- deadline = time.time() + 10
640
- while time.time() < deadline:
641
- still_running = [n for n in running if self.process_manager.is_running(n)]
642
- if not still_running:
643
- break
644
- await asyncio.sleep(0.5)
645
- self.process_manager.stop_all(timeout=3)
646
- for name in running:
647
- self._log_lifecycle("stopped", name, reason="system_shutdown")
648
-
649
- # ── Heartbeat to Registry ──
650
-
651
- async def _heartbeat_loop(self):
652
- """Send heartbeat to Registry every 30 seconds."""
653
- while not self._thread_shutdown.is_set():
654
- await asyncio.sleep(30)
655
- try:
656
- async with httpx.AsyncClient() as client:
657
- await client.post(
658
- f"http://127.0.0.1:{self.registry_port}/modules",
659
- json={"action": "heartbeat", "module_id": "launcher"},
660
- headers={"Authorization": f"Bearer {self.kite_token}"},
661
- timeout=5,
662
- )
663
- except Exception:
664
- pass
665
-
666
- # ── Module startup ──
667
-
668
- def _topo_sort(self, modules: list[ModuleInfo]) -> list[ModuleInfo]:
669
- """Topological sort by depends_on. Raises RuntimeError on cycle."""
670
- name_map = {m.name: m for m in modules}
671
- visited = set()
672
- in_stack = set()
673
- order = []
674
-
675
- def visit(name):
676
- if name in in_stack:
677
- raise RuntimeError(f"Circular dependency detected involving '{name}'")
678
- if name in visited:
679
- return
680
- in_stack.add(name)
681
- info = name_map.get(name)
682
- if info:
683
- for dep in info.depends_on:
684
- visit(dep)
685
- in_stack.remove(name)
686
- visited.add(name)
687
- if info:
688
- order.append(info)
689
-
690
- for m in modules:
691
- visit(m.name)
692
- return order
693
-
694
- async def _start_one_module(self, info: ModuleInfo):
695
- """Start a single module: publish starting → start process → wait ready → started → close stdio."""
696
- self._log_lifecycle("starting", info.name)
697
- await self._publish_event("module.starting", {"module_id": info.name})
698
-
699
- token = self._module_tokens.get(info.name, "")
700
- boot_info = {"token": token}
701
- ok = self.process_manager.start_module(info, boot_info=boot_info)
702
- if not ok:
703
- self._log_lifecycle("start_failed", info.name)
704
- return
705
-
706
- # Wait for module.ready (configurable timeout, degrade on timeout)
707
- timeout = info.launch.timeout
708
- ready = await self._wait_event("module.ready", info.name, timeout=timeout)
709
- if ready:
710
- print(f"[launcher] 模块 '{info.name}' 已就绪")
711
- else:
712
- print(f"[launcher] 警告: '{info.name}' 在 {timeout}s 内未发送 module.ready")
713
-
714
- rec = self.process_manager.get_record(info.name)
715
- self._log_lifecycle("started", info.name, pid=rec.pid if rec else None)
716
- await self._publish_event("module.started", {"module_id": info.name})
717
- self.process_manager.close_stdio(info.name)
718
-
719
- async def _register_module_tokens(self):
720
- """Generate per-module tokens and register the mapping to Registry."""
721
- # Include all scanned modules + core modules
722
- for name in self.modules:
723
- if name not in self._module_tokens:
724
- self._module_tokens[name] = secrets.token_hex(32)
725
- # Ensure registry has a token
726
- if "registry" not in self._module_tokens:
727
- self._module_tokens["registry"] = secrets.token_hex(32)
728
-
729
- if not self._module_tokens:
730
- return
731
-
732
- await self._register_tokens_to_registry(self._module_tokens)
733
-
734
- async def _register_tokens_to_registry(self, tokens: dict):
735
- """Register token mapping to Registry via POST /tokens."""
736
- url = f"http://127.0.0.1:{self.registry_port}/tokens"
737
- headers = {"Authorization": f"Bearer {self.kite_token}"}
738
- try:
739
- async with httpx.AsyncClient() as client:
740
- resp = await client.post(url, json=tokens, headers=headers, timeout=5)
741
- if resp.status_code == 200:
742
- print(f"[launcher] 已注册 {len(tokens)} 个模块令牌")
743
- else:
744
- print(f"[launcher] 警告: 令牌注册返回 {resp.status_code}")
745
- except Exception as e:
746
- print(f"[launcher] 警告: 注册模块令牌失败: {e}")
747
-
748
- # ── Validation ──
749
-
750
- def _validate_core_modules(self):
751
- """Validate core modules exist (mechanism 12)."""
752
- project_root = os.environ["KITE_PROJECT"]
753
- for name in ("registry", "event_hub"):
754
- mod_dir = os.path.join(project_root, "core", name)
755
- md_path = os.path.join(mod_dir, "module.md")
756
- if not os.path.isdir(mod_dir):
757
- print(f"[launcher] 致命: 核心模块 '{name}' 目录未找到: {mod_dir}")
758
- sys.exit(1)
759
- if not os.path.isfile(md_path):
760
- print(f"[launcher] 致命: 核心模块 '{name}' 缺少 module.md: {md_path}")
761
- sys.exit(1)
762
- # Try to parse frontmatter
763
- try:
764
- with open(md_path, "r", encoding="utf-8") as f:
765
- fm = _parse_frontmatter(f.read())
766
- if not fm:
767
- print(f"[launcher] 致命: 核心模块 '{name}' module.md 没有有效的 frontmatter")
768
- sys.exit(1)
769
- except Exception as e:
770
- print(f"[launcher] 致命: 核心模块 '{name}' module.md 解析错误: {e}")
771
- sys.exit(1)
772
-
773
- # ── API thread ──
774
-
775
- def _start_api_thread(self):
776
- """Start the Launcher API server in a separate thread with OS-assigned port."""
777
- self.api_port = self._get_free_port()
778
- config = uvicorn.Config(
779
- self._app,
780
- host="127.0.0.1",
781
- port=self.api_port,
782
- log_level="warning",
783
- )
784
- self._api_server = uvicorn.Server(config)
785
-
786
- def _run():
787
- self._api_server.run()
788
-
789
- t = threading.Thread(target=_run, daemon=True)
790
- t.start()
791
-
792
- deadline = time.time() + 5
793
- while time.time() < deadline:
794
- if self._api_server.started:
795
- break
796
- time.sleep(0.05)
797
- else:
798
- print("[launcher] 警告: API 服务器可能尚未完全就绪")
799
-
800
- print(f"[launcher] API 服务器已启动,端口 {self.api_port}")
801
-
802
- # ── Monitor loop ──
803
-
804
- async def _monitor_loop(self):
805
- """Check child processes every second. Handle crashes.
806
- Uses _shutdown_event (asyncio.Event) so Ctrl+C wakes us immediately.
807
- """
808
- MAX_FAIL = 3
809
- MAX_FAILED_MODULES = 3
810
-
811
- while not self._shutdown_event.is_set():
812
- exited = self.process_manager.check_exited()
813
-
814
- for name, rc in exited:
815
- print(f"[launcher] 模块 '{name}' 退出,返回码 {rc}")
816
- self._log_lifecycle("exited", name, exit_code=rc)
817
- await self._publish_event("module.stopped", {
818
- "module_id": name, "exit_code": rc,
819
- })
820
- info = self.modules.get(name)
821
-
822
- # Core module crash → full restart
823
- if name in CORE_MODULE_NAMES or (info and info.is_core()):
824
- print(f"[launcher] 严重: 核心模块 '{name}' 崩溃,正在全部重启...")
825
- self._log_lifecycle("core_crash", name, exit_code=rc)
826
- await self._full_restart()
827
- return
828
-
829
- # Non-core: attempt restart if desired_state is "running"
830
- self._fail_counts[name] = self._fail_counts.get(name, 0) + 1
831
- count = self._fail_counts[name]
832
-
833
- if count < MAX_FAIL and self._desired_states.get(name) == "running" and info:
834
- print(f"[launcher] 正在重启 '{name}' (第 {count}/{MAX_FAIL} 次)...")
835
- await self._start_one_module(info)
836
- elif count >= MAX_FAIL:
837
- self._desired_states[name] = "stopped"
838
- self._log_lifecycle("failed", name, reason=f"exceeded {MAX_FAIL} retries")
839
- print(f"[launcher] 模块 '{name}' 失败 {MAX_FAIL} 次,已放弃")
840
-
841
- failed_count = sum(1 for c in self._fail_counts.values() if c >= MAX_FAIL)
842
- if failed_count >= MAX_FAILED_MODULES:
843
- print(f"[launcher] {failed_count} 个模块永久失败,启动器退出")
844
- return
845
-
846
- if exited:
847
- self.process_manager.persist_records()
848
-
849
- # Wait 1s but wake immediately on shutdown signal
850
- try:
851
- await asyncio.wait_for(self._shutdown_event.wait(), timeout=1)
852
- return # shutdown requested
853
- except asyncio.TimeoutError:
854
- pass
855
-
856
- async def _full_restart(self):
857
- """Stop all modules, regenerate tokens, re-run Phase 1-4 (mechanism 10)."""
858
- print("[launcher] 全量重启: 正在停止所有模块...")
859
-
860
- # Disconnect Event Hub WS
861
- if self._ws_task:
862
- self._ws_task.cancel()
863
- self._ws_task = None
864
- if hasattr(self, '_heartbeat_task') and self._heartbeat_task:
865
- self._heartbeat_task.cancel()
866
- self._heartbeat_task = None
867
- self._ws = None
868
- self._event_hub_ws_url = ""
869
- self._launcher_ws_token = ""
870
-
871
- await self._graceful_shutdown_all()
872
- self._fail_counts.clear()
873
- self._module_tokens.clear()
874
-
875
- # Regenerate kite_token
876
- self.kite_token = secrets.token_hex(32)
877
- self.process_manager.kite_token = self.kite_token
878
-
879
- print("[launcher] 全量重启: 重新执行 Phase 1-4...")
880
- try:
881
- await self._phase1_registry()
882
- self.modules = self.module_scanner.scan()
883
- for n, info in self.modules.items():
884
- self._log_lifecycle("scanned", n, state=info.state, module_dir=info.module_dir)
885
- await self._register_module_tokens()
886
- await self._phase2_event_hub()
887
- await self._phase3_registry_ready()
888
- await self._phase4_start_modules()
889
- self.process_manager.persist_records()
890
- self._heartbeat_task = asyncio.create_task(self._heartbeat_loop())
891
- print("[launcher] 全量重启完成,恢复监控循环")
892
- await self._monitor_loop()
893
- except Exception as e:
894
- print(f"[launcher] 全量重启失败: {e}")
895
-
896
- # ── Shutdown ──
897
-
898
- def _final_cleanup(self):
899
- """Called on exit — stop all processes, stop API, clear records."""
900
- print("[launcher] 正在关闭...")
901
-
902
- if self._ws_task:
903
- self._ws_task.cancel()
904
- if hasattr(self, '_heartbeat_task') and self._heartbeat_task:
905
- self._heartbeat_task.cancel()
906
-
907
- self.process_manager.stop_all(timeout=10)
908
-
909
- if self._api_server:
910
- self._api_server.should_exit = True
911
-
912
- # Clear instance runtime files
913
- self.process_manager._write_records_file([])
914
- try:
915
- os.remove(self.process_manager.records_path)
916
- except OSError:
917
- pass
918
- print("[launcher] 再见。")
919
-
920
- if IS_WINDOWS:
921
- os._exit(0)
922
-
923
- # ── Utilities ──
924
-
925
- def _load_discovery(self) -> dict | None:
926
- """Read discovery config from launcher's own module.md."""
927
- md_path = os.path.join(os.environ["KITE_PROJECT"], "core", "launcher", "module.md")
928
- try:
929
- with open(md_path, "r", encoding="utf-8") as f:
930
- fm = _parse_frontmatter(f.read())
931
- discovery = fm.get("discovery")
932
- if isinstance(discovery, dict) and discovery:
933
- print(f"[launcher] 发现来源: {', '.join(discovery.keys())}")
934
- return discovery
935
- except Exception as e:
936
- print(f"[launcher] 警告: 读取发现配置失败: {e}")
937
- return None
938
-
939
- def _log_lifecycle(self, event: str, module: str, **extra):
940
- """Append one JSONL line to lifecycle.jsonl."""
941
- from datetime import datetime, timezone
942
- record = {"ts": datetime.now(timezone.utc).isoformat(), "event": event, "module": module}
943
- record.update(extra)
944
- try:
945
- os.makedirs(os.path.dirname(self._lifecycle_log), exist_ok=True)
946
- with open(self._lifecycle_log, "a", encoding="utf-8") as f:
947
- f.write(json.dumps(record, ensure_ascii=False) + "\n")
948
- except Exception:
949
- pass
950
-
951
- @staticmethod
952
- def _get_free_port() -> int:
953
- """Get a free port assigned by the OS (bind to port 0)."""
954
- import socket
955
- with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
956
- s.bind(("127.0.0.1", 0))
957
- return s.getsockname()[1]
958
-
959
- # ── API app ──
960
-
961
- def _create_api_app(self) -> FastAPI:
962
- """Create the FastAPI app with Launcher management routes."""
963
- app = FastAPI(title="Kite Launcher", docs_url=None, redoc_url=None)
964
- launcher = self
965
-
966
- @app.get("/launcher/modules")
967
- async def list_modules():
968
- """List all modules and their current status."""
969
- result = []
970
- for name, info in launcher.modules.items():
971
- running = launcher.process_manager.is_running(name)
972
- rec = launcher.process_manager.get_record(name)
973
- result.append({
974
- "name": name,
975
- "display_name": info.display_name,
976
- "type": info.type,
977
- "config_state": info.state,
978
- "desired_state": launcher._desired_states.get(name, "stopped"),
979
- "actual_state": f"running({rec.pid})" if running and rec else "stopped",
980
- "pid": rec.pid if running and rec else None,
981
- "monitor": info.monitor,
982
- })
983
- return result
984
-
985
- @app.post("/launcher/modules/{name}/start")
986
- async def start_module(name: str):
987
- """Start a module by name."""
988
- info = launcher.modules.get(name)
989
- if not info:
990
- raise HTTPException(404, f"Module '{name}' not found")
991
- if info.state == "disabled":
992
- raise HTTPException(403, f"Module '{name}' is disabled")
993
-
994
- if name not in launcher._module_tokens:
995
- launcher._module_tokens[name] = secrets.token_hex(32)
996
- try:
997
- async with httpx.AsyncClient() as client:
998
- await client.post(
999
- f"http://127.0.0.1:{launcher.registry_port}/tokens",
1000
- json={name: launcher._module_tokens[name]},
1001
- headers={"Authorization": f"Bearer {launcher.kite_token}"},
1002
- timeout=5,
1003
- )
1004
- except Exception as e:
1005
- print(f"[launcher] 警告: 注册 {name} 的令牌失败: {e}")
1006
-
1007
- token = launcher._module_tokens[name]
1008
- boot_info = {"token": token}
1009
- ok = launcher.process_manager.start_module(info, boot_info=boot_info)
1010
- if ok:
1011
- launcher._desired_states[name] = "running"
1012
- launcher._fail_counts.pop(name, None)
1013
- launcher.process_manager.persist_records()
1014
- rec = launcher.process_manager.get_record(name)
1015
- launcher._log_lifecycle("started", name, pid=rec.pid if rec else None, via="api")
1016
- launcher._publish_event_threadsafe("module.started", {"module_id": name})
1017
- return {"status": "started", "name": name}
1018
- launcher._log_lifecycle("start_failed", name, via="api")
1019
- raise HTTPException(500, f"Failed to start '{name}'")
1020
-
1021
- @app.post("/launcher/modules/{name}/stop")
1022
- async def stop_module(name: str, body: dict = None):
1023
- """Stop a module with graceful shutdown."""
1024
- info = launcher.modules.get(name)
1025
- if not info:
1026
- raise HTTPException(404, f"Module '{name}' not found")
1027
- reason = (body or {}).get("reason", "stop_requested")
1028
- launcher._desired_states[name] = "stopped"
1029
- await launcher._graceful_stop(name, reason)
1030
- launcher.process_manager.persist_records()
1031
- return {"status": "stopped", "name": name}
1032
-
1033
- @app.post("/launcher/modules/{name}/restart")
1034
- async def restart_module(name: str, body: dict = None):
1035
- """Restart a module (stop + start)."""
1036
- info = launcher.modules.get(name)
1037
- if not info:
1038
- raise HTTPException(404, f"Module '{name}' not found")
1039
- if info.state == "disabled":
1040
- raise HTTPException(403, f"Module '{name}' is disabled")
1041
- reason = (body or {}).get("reason", "restart")
1042
- await launcher._graceful_stop(name, reason)
1043
- launcher._module_tokens[name] = secrets.token_hex(32)
1044
- try:
1045
- async with httpx.AsyncClient() as client:
1046
- await client.post(
1047
- f"http://127.0.0.1:{launcher.registry_port}/tokens",
1048
- json={name: launcher._module_tokens[name]},
1049
- headers={"Authorization": f"Bearer {launcher.kite_token}"},
1050
- timeout=5,
1051
- )
1052
- except Exception:
1053
- pass
1054
- token = launcher._module_tokens[name]
1055
- boot_info = {"token": token}
1056
- ok = launcher.process_manager.start_module(info, boot_info=boot_info)
1057
- if ok:
1058
- launcher._desired_states[name] = "running"
1059
- launcher._fail_counts.pop(name, None)
1060
- launcher.process_manager.persist_records()
1061
- rec = launcher.process_manager.get_record(name)
1062
- launcher._log_lifecycle("started", name, pid=rec.pid if rec else None, via="restart_api")
1063
- launcher._publish_event_threadsafe("module.started", {"module_id": name})
1064
- return {"status": "restarted", "name": name}
1065
- launcher._log_lifecycle("start_failed", name, via="restart_api")
1066
- raise HTTPException(500, f"Failed to restart '{name}'")
1067
-
1068
- @app.post("/launcher/rescan")
1069
- async def rescan_modules():
1070
- """Rescan module directories for new/removed modules."""
1071
- old_names = set(launcher.modules.keys())
1072
- launcher.modules = launcher.module_scanner.scan()
1073
- new_names = set(launcher.modules.keys())
1074
- added = list(new_names - old_names)
1075
- removed = list(old_names - new_names)
1076
- for name in added:
1077
- info = launcher.modules[name]
1078
- launcher._log_lifecycle("scanned", name, state=info.state, module_dir=info.module_dir)
1079
- for name in added:
1080
- info = launcher.modules[name]
1081
- launcher._desired_states[name] = "running" if info.state == "enabled" else "stopped"
1082
- if added:
1083
- new_tokens = {}
1084
- for name in added:
1085
- launcher._module_tokens[name] = secrets.token_hex(32)
1086
- new_tokens[name] = launcher._module_tokens[name]
1087
- try:
1088
- async with httpx.AsyncClient() as client:
1089
- await client.post(
1090
- f"http://127.0.0.1:{launcher.registry_port}/tokens",
1091
- json=new_tokens,
1092
- headers={"Authorization": f"Bearer {launcher.kite_token}"},
1093
- timeout=5,
1094
- )
1095
- except Exception:
1096
- pass
1097
- return {"added": added, "removed": removed, "total": len(launcher.modules)}
1098
-
1099
- @app.put("/launcher/modules/{name}/state")
1100
- async def update_state(name: str, body: dict):
1101
- """Update module state (enabled/manual/disabled). Writes to module.md."""
1102
- info = launcher.modules.get(name)
1103
- if not info:
1104
- raise HTTPException(404, f"Module '{name}' not found")
1105
-
1106
- new_state = body.get("state", "")
1107
- if new_state not in ("enabled", "manual", "disabled"):
1108
- raise HTTPException(400, "state must be enabled, manual, or disabled")
1109
-
1110
- if info.is_core() and new_state == "disabled":
1111
- raise HTTPException(403, "Core modules cannot be disabled")
1112
-
1113
- old_state = info.state
1114
- info.state = new_state
1115
-
1116
- if new_state == "enabled":
1117
- launcher._desired_states[name] = "running"
1118
- else:
1119
- launcher._desired_states[name] = "stopped"
1120
-
1121
- _update_module_md_state(info.module_dir, new_state)
1122
- launcher._publish_event_threadsafe("module.state_changed", {
1123
- "module_id": name,
1124
- "old_state": old_state,
1125
- "new_state": new_state,
1126
- })
1127
- return {
1128
- "name": name,
1129
- "old_state": old_state,
1130
- "new_state": new_state,
1131
- }
1132
-
1133
- return app
1134
-
1135
-
1136
- def _update_module_md_state(module_dir: str, new_state: str):
1137
- """Update the state field in a module's module.md frontmatter."""
1138
- import re
1139
- md_path = os.path.join(module_dir, "module.md")
1140
- if not os.path.isfile(md_path):
1141
- return
1142
-
1143
- try:
1144
- with open(md_path, "r", encoding="utf-8") as f:
1145
- content = f.read()
1146
-
1147
- updated = re.sub(
1148
- r'^(state:\s*)(\S+)',
1149
- rf'\g<1>{new_state}',
1150
- content,
1151
- count=1,
1152
- flags=re.MULTILINE,
1153
- )
1154
-
1155
- with open(md_path, "w", encoding="utf-8") as f:
1156
- f.write(updated)
1157
- except Exception as e:
1158
- print(f"[launcher] 警告: 更新 module.md 状态失败: {e}")