@agentunion/kite 1.0.6 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. package/cli.js +127 -25
  2. package/core/event_hub/entry.py +384 -61
  3. package/core/event_hub/hub.py +8 -0
  4. package/core/event_hub/module.md +0 -1
  5. package/core/event_hub/server.py +169 -38
  6. package/core/kite_log.py +241 -0
  7. package/core/launcher/entry.py +1306 -425
  8. package/core/launcher/module_scanner.py +10 -9
  9. package/core/launcher/process_manager.py +555 -121
  10. package/core/registry/entry.py +335 -30
  11. package/core/registry/server.py +339 -256
  12. package/core/registry/store.py +13 -2
  13. package/extensions/agents/__init__.py +1 -0
  14. package/extensions/agents/assistant/__init__.py +1 -0
  15. package/extensions/agents/assistant/entry.py +380 -0
  16. package/extensions/agents/assistant/module.md +22 -0
  17. package/extensions/agents/assistant/server.py +236 -0
  18. package/extensions/channels/__init__.py +1 -0
  19. package/extensions/channels/acp_channel/__init__.py +1 -0
  20. package/extensions/channels/acp_channel/entry.py +380 -0
  21. package/extensions/channels/acp_channel/module.md +22 -0
  22. package/extensions/channels/acp_channel/server.py +236 -0
  23. package/{core → extensions}/event_hub_bench/entry.py +664 -371
  24. package/{core → extensions}/event_hub_bench/module.md +4 -2
  25. package/extensions/services/backup/__init__.py +1 -0
  26. package/extensions/services/backup/entry.py +380 -0
  27. package/extensions/services/backup/module.md +22 -0
  28. package/extensions/services/backup/server.py +244 -0
  29. package/extensions/services/model_service/__init__.py +1 -0
  30. package/extensions/services/model_service/entry.py +380 -0
  31. package/extensions/services/model_service/module.md +22 -0
  32. package/extensions/services/model_service/server.py +236 -0
  33. package/extensions/services/watchdog/entry.py +460 -143
  34. package/extensions/services/watchdog/module.md +3 -0
  35. package/extensions/services/watchdog/monitor.py +128 -13
  36. package/extensions/services/watchdog/server.py +75 -13
  37. package/extensions/services/web/__init__.py +1 -0
  38. package/extensions/services/web/config.yaml +149 -0
  39. package/extensions/services/web/entry.py +487 -0
  40. package/extensions/services/web/module.md +24 -0
  41. package/extensions/services/web/routes/__init__.py +1 -0
  42. package/extensions/services/web/routes/routes_call.py +189 -0
  43. package/extensions/services/web/routes/routes_config.py +512 -0
  44. package/extensions/services/web/routes/routes_contacts.py +98 -0
  45. package/extensions/services/web/routes/routes_devlog.py +99 -0
  46. package/extensions/services/web/routes/routes_phone.py +81 -0
  47. package/extensions/services/web/routes/routes_sms.py +48 -0
  48. package/extensions/services/web/routes/routes_stats.py +17 -0
  49. package/extensions/services/web/routes/routes_voicechat.py +554 -0
  50. package/extensions/services/web/routes/schemas.py +216 -0
  51. package/extensions/services/web/server.py +332 -0
  52. package/extensions/services/web/static/css/style.css +1064 -0
  53. package/extensions/services/web/static/index.html +1445 -0
  54. package/extensions/services/web/static/js/app.js +4671 -0
  55. package/extensions/services/web/vendor/__init__.py +1 -0
  56. package/extensions/services/web/vendor/bluetooth/audio.py +348 -0
  57. package/extensions/services/web/vendor/bluetooth/contacts.py +251 -0
  58. package/extensions/services/web/vendor/bluetooth/manager.py +395 -0
  59. package/extensions/services/web/vendor/bluetooth/sms.py +290 -0
  60. package/extensions/services/web/vendor/bluetooth/telephony.py +274 -0
  61. package/extensions/services/web/vendor/config.py +139 -0
  62. package/extensions/services/web/vendor/conversation/__init__.py +0 -0
  63. package/extensions/services/web/vendor/conversation/asr.py +936 -0
  64. package/extensions/services/web/vendor/conversation/engine.py +548 -0
  65. package/extensions/services/web/vendor/conversation/llm.py +534 -0
  66. package/extensions/services/web/vendor/conversation/mcp_tools.py +190 -0
  67. package/extensions/services/web/vendor/conversation/tts.py +322 -0
  68. package/extensions/services/web/vendor/conversation/vad.py +138 -0
  69. package/extensions/services/web/vendor/storage/__init__.py +1 -0
  70. package/extensions/services/web/vendor/storage/identity.py +312 -0
  71. package/extensions/services/web/vendor/storage/store.py +507 -0
  72. package/extensions/services/web/vendor/task/__init__.py +0 -0
  73. package/extensions/services/web/vendor/task/manager.py +864 -0
  74. package/extensions/services/web/vendor/task/models.py +45 -0
  75. package/extensions/services/web/vendor/task/webhook.py +263 -0
  76. package/extensions/services/web/vendor/tools/__init__.py +0 -0
  77. package/extensions/services/web/vendor/tools/registry.py +321 -0
  78. package/main.py +344 -4
  79. package/package.json +11 -2
  80. package/core/__pycache__/__init__.cpython-313.pyc +0 -0
  81. package/core/__pycache__/data_dir.cpython-313.pyc +0 -0
  82. package/core/data_dir.py +0 -62
  83. package/core/event_hub/__pycache__/__init__.cpython-313.pyc +0 -0
  84. package/core/event_hub/__pycache__/bench.cpython-313.pyc +0 -0
  85. package/core/event_hub/__pycache__/bench_perf.cpython-313.pyc +0 -0
  86. package/core/event_hub/__pycache__/dedup.cpython-313.pyc +0 -0
  87. package/core/event_hub/__pycache__/entry.cpython-313.pyc +0 -0
  88. package/core/event_hub/__pycache__/hub.cpython-313.pyc +0 -0
  89. package/core/event_hub/__pycache__/router.cpython-313.pyc +0 -0
  90. package/core/event_hub/__pycache__/server.cpython-313.pyc +0 -0
  91. package/core/event_hub/bench_results/2026-02-28_13-26-48.json +0 -51
  92. package/core/event_hub/bench_results/2026-02-28_13-44-45.json +0 -51
  93. package/core/event_hub/bench_results/2026-02-28_13-45-39.json +0 -51
  94. package/core/launcher/__pycache__/__init__.cpython-313.pyc +0 -0
  95. package/core/launcher/__pycache__/entry.cpython-313.pyc +0 -0
  96. package/core/launcher/__pycache__/module_scanner.cpython-313.pyc +0 -0
  97. package/core/launcher/__pycache__/process_manager.cpython-313.pyc +0 -0
  98. package/core/launcher/data/log/lifecycle.jsonl +0 -1158
  99. package/core/launcher/data/token.txt +0 -1
  100. package/core/registry/__pycache__/__init__.cpython-313.pyc +0 -0
  101. package/core/registry/__pycache__/entry.cpython-313.pyc +0 -0
  102. package/core/registry/__pycache__/server.cpython-313.pyc +0 -0
  103. package/core/registry/__pycache__/store.cpython-313.pyc +0 -0
  104. package/core/registry/data/port.txt +0 -1
  105. package/core/registry/data/port_484.txt +0 -1
  106. package/extensions/__pycache__/__init__.cpython-313.pyc +0 -0
  107. package/extensions/services/__pycache__/__init__.cpython-313.pyc +0 -0
  108. package/extensions/services/watchdog/__pycache__/__init__.cpython-313.pyc +0 -0
  109. package/extensions/services/watchdog/__pycache__/entry.cpython-313.pyc +0 -0
  110. package/extensions/services/watchdog/__pycache__/monitor.cpython-313.pyc +0 -0
  111. package/extensions/services/watchdog/__pycache__/server.cpython-313.pyc +0 -0
  112. /package/{core/event_hub/bench_results/.gitkeep → extensions/services/web/vendor/bluetooth/__init__.py} +0 -0
@@ -1,143 +1,460 @@
1
- """
2
- Watchdog entry point.
3
- Reads boot_info from stdin, registers to Registry, starts health monitor.
4
- """
5
-
6
- import json
7
- import os
8
- import socket
9
- import sys
10
-
11
- import httpx
12
- import uvicorn
13
-
14
- # Ensure project root is on sys.path
15
- _this_dir = os.path.dirname(os.path.abspath(__file__))
16
- _project_root = os.path.dirname(os.path.dirname(os.path.dirname(_this_dir)))
17
- if _project_root not in sys.path:
18
- sys.path.insert(0, _project_root)
19
-
20
- from extensions.services.watchdog.monitor import HealthMonitor
21
- from extensions.services.watchdog.server import WatchdogServer
22
-
23
-
24
- def _get_free_port() -> int:
25
- with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
26
- s.bind(("127.0.0.1", 0))
27
- return s.getsockname()[1]
28
-
29
-
30
- def _register_to_registry(token: str, registry_url: str, port: int):
31
- payload = {
32
- "action": "register",
33
- "module_id": "watchdog",
34
- "module_type": "service",
35
- "name": "Watchdog",
36
- "api_endpoint": f"http://127.0.0.1:{port}",
37
- "health_endpoint": "/health",
38
- "events_publish": {
39
- "watchdog.module.unhealthy": {"description": "Module failed health check"},
40
- "watchdog.module.recovered": {"description": "Module recovered from unhealthy"},
41
- "watchdog.alert": {"description": "Module restarted too many times"},
42
- },
43
- "events_subscribe": [
44
- "module.started",
45
- "module.stopped",
46
- ],
47
- }
48
- headers = {"Authorization": f"Bearer {token}"}
49
- resp = httpx.post(
50
- f"{registry_url}/modules",
51
- json=payload, headers=headers, timeout=5,
52
- )
53
- if resp.status_code == 200:
54
- print("[watchdog] Registered to Registry")
55
- else:
56
- print(f"[watchdog] WARNING: Registry returned {resp.status_code}")
57
-
58
-
59
- def _get_launcher_url(token: str, registry_url: str) -> str:
60
- """Discover Launcher API endpoint from Registry."""
61
- headers = {"Authorization": f"Bearer {token}"}
62
- try:
63
- resp = httpx.get(
64
- f"{registry_url}/get/launcher.api_endpoint",
65
- headers=headers, timeout=5,
66
- )
67
- if resp.status_code == 200:
68
- return resp.json()
69
- except Exception:
70
- pass
71
- return ""
72
-
73
-
74
- def _get_event_hub_ws(token: str, registry_url: str) -> str:
75
- """Discover Event Hub WebSocket endpoint from Registry, with retry."""
76
- import time
77
- headers = {"Authorization": f"Bearer {token}"}
78
- deadline = time.time() + 10
79
- while time.time() < deadline:
80
- try:
81
- resp = httpx.get(
82
- f"{registry_url}/get/event_hub.metadata.ws_endpoint",
83
- headers=headers, timeout=3,
84
- )
85
- if resp.status_code == 200:
86
- val = resp.json()
87
- if val:
88
- return val
89
- except Exception:
90
- pass
91
- time.sleep(1)
92
- return ""
93
-
94
-
95
- def main():
96
- # Read boot_info from stdin
97
- token = ""
98
- registry_port = 0
99
- try:
100
- line = sys.stdin.readline().strip()
101
- if line:
102
- boot_info = json.loads(line)
103
- token = boot_info.get("token", "")
104
- registry_port = boot_info.get("registry_port", 0)
105
- except Exception:
106
- pass
107
-
108
- if not token or not registry_port:
109
- print("[watchdog] ERROR: Missing token or registry_port in boot_info")
110
- sys.exit(1)
111
-
112
- print(f"[watchdog] Token received ({len(token)} chars), registry port: {registry_port}")
113
-
114
- registry_url = f"http://127.0.0.1:{registry_port}"
115
- port = _get_free_port()
116
-
117
- # Register to Registry
118
- _register_to_registry(token, registry_url, port)
119
-
120
- # Discover Launcher URL
121
- launcher_url = _get_launcher_url(token, registry_url)
122
- if not launcher_url:
123
- print("[watchdog] WARNING: Could not discover Launcher URL, restart disabled")
124
-
125
- # Discover Event Hub WebSocket URL
126
- event_hub_ws = _get_event_hub_ws(token, registry_url)
127
- if not event_hub_ws:
128
- print("[watchdog] WARNING: Could not discover Event Hub WS, events disabled")
129
-
130
- # Create monitor and server
131
- monitor = HealthMonitor(
132
- own_token=token,
133
- registry_url=registry_url,
134
- launcher_url=launcher_url,
135
- )
136
- server = WatchdogServer(monitor, token=token, event_hub_ws=event_hub_ws)
137
-
138
- print(f"[watchdog] Starting on port {port}")
139
- uvicorn.run(server.app, host="127.0.0.1", port=port, log_level="warning")
140
-
141
-
142
- if __name__ == "__main__":
143
- main()
1
+ """
2
+ Watchdog entry point.
3
+ Reads boot_info from stdin, registers to Registry, starts health monitor.
4
+ Registry port: env KITE_REGISTRY_PORT (fast path) or stdin kite message (parallel start).
5
+ """
6
+
7
+ import builtins
8
+ import json
9
+ import os
10
+ import re
11
+ import socket
12
+ import sys
13
+ import threading
14
+ import time
15
+ import traceback
16
+ from datetime import datetime, timezone
17
+
18
+ import httpx
19
+ import uvicorn
20
+
21
+
22
+
23
+ # ── Module configuration ──
24
+ MODULE_NAME = "watchdog"
25
+
26
+
27
+ def _fmt_elapsed(t0: float) -> str:
28
+ """Format elapsed time since t0: <1s → 'NNNms', >=1s → 'N.Ns', >=10s → 'NNs'."""
29
+ d = time.monotonic() - t0
30
+ if d < 1:
31
+ return f"{d * 1000:.0f}ms"
32
+ if d < 10:
33
+ return f"{d:.1f}s"
34
+ return f"{d:.0f}s"
35
+
36
+
37
+ # ── Safe stdout/stderr: ignore BrokenPipeError after Launcher closes stdio ──
38
+
39
+ class _SafeWriter:
40
+ """Wraps a stream to silently swallow BrokenPipeError on write/flush."""
41
+ def __init__(self, stream):
42
+ self._stream = stream
43
+
44
+ def write(self, s):
45
+ try:
46
+ self._stream.write(s)
47
+ except (BrokenPipeError, OSError):
48
+ pass
49
+
50
+ def flush(self):
51
+ try:
52
+ self._stream.flush()
53
+ except (BrokenPipeError, OSError):
54
+ pass
55
+
56
+ def __getattr__(self, name):
57
+ return getattr(self._stream, name)
58
+
59
+ sys.stdout = _SafeWriter(sys.stdout)
60
+ sys.stderr = _SafeWriter(sys.stderr)
61
+
62
+
63
+ # ── Timestamped print + log file writer ──
64
+ # Independent implementation per module (no shared code dependency)
65
+
66
+ _builtin_print = builtins.print
67
+ _start_ts = time.monotonic()
68
+ _last_ts = time.monotonic()
69
+ _ANSI_RE = re.compile(r"\033\[[0-9;]*m")
70
+ _log_lock = threading.Lock()
71
+ _log_latest_path = None
72
+ _log_daily_path = None
73
+ _log_daily_date = ""
74
+ _log_dir = None
75
+ _crash_log_path = None
76
+
77
+ def _strip_ansi(s: str) -> str:
78
+ return _ANSI_RE.sub("", s)
79
+
80
+ def _resolve_daily_log_path():
81
+ """Resolve daily log path based on current date."""
82
+ global _log_daily_path, _log_daily_date
83
+ if not _log_dir:
84
+ return
85
+ today = datetime.now().strftime("%Y-%m-%d")
86
+ if today == _log_daily_date and _log_daily_path:
87
+ return
88
+ month_dir = os.path.join(_log_dir, today[:7])
89
+ os.makedirs(month_dir, exist_ok=True)
90
+ _log_daily_path = os.path.join(month_dir, f"{today}.log")
91
+ _log_daily_date = today
92
+
93
+ def _write_log(plain_line: str):
94
+ """Write a plain-text line to both latest.log and daily log."""
95
+ with _log_lock:
96
+ if _log_latest_path:
97
+ try:
98
+ with open(_log_latest_path, "a", encoding="utf-8") as f:
99
+ f.write(plain_line)
100
+ except Exception:
101
+ pass
102
+ _resolve_daily_log_path()
103
+ if _log_daily_path:
104
+ try:
105
+ with open(_log_daily_path, "a", encoding="utf-8") as f:
106
+ f.write(plain_line)
107
+ except Exception:
108
+ pass
109
+
110
+ def _write_crash(exc_type, exc_value, exc_tb, thread_name=None, severity="critical", handled=False):
111
+ """Write crash record to crashes.jsonl + daily crash archive."""
112
+ record = {
113
+ "timestamp": datetime.now(timezone.utc).isoformat(),
114
+ "module": MODULE_NAME,
115
+ "thread": thread_name or threading.current_thread().name,
116
+ "exception_type": exc_type.__name__ if exc_type else "Unknown",
117
+ "exception_message": str(exc_value),
118
+ "traceback": "".join(traceback.format_exception(exc_type, exc_value, exc_tb)),
119
+ "severity": severity,
120
+ "handled": handled,
121
+ "process_id": os.getpid(),
122
+ "platform": sys.platform,
123
+ "runtime_version": f"Python {sys.version.split()[0]}",
124
+ }
125
+
126
+ if exc_tb:
127
+ tb_entries = traceback.extract_tb(exc_tb)
128
+ if tb_entries:
129
+ last = tb_entries[-1]
130
+ record["context"] = {
131
+ "function": last.name,
132
+ "file": os.path.basename(last.filename),
133
+ "line": last.lineno,
134
+ }
135
+
136
+ line = json.dumps(record, ensure_ascii=False) + "\n"
137
+
138
+ if _crash_log_path:
139
+ try:
140
+ with open(_crash_log_path, "a", encoding="utf-8") as f:
141
+ f.write(line)
142
+ except Exception:
143
+ pass
144
+
145
+ if _log_dir:
146
+ try:
147
+ today = datetime.now().strftime("%Y-%m-%d")
148
+ archive_dir = os.path.join(_log_dir, "crashes", today[:7])
149
+ os.makedirs(archive_dir, exist_ok=True)
150
+ archive_path = os.path.join(archive_dir, f"{today}.jsonl")
151
+ with open(archive_path, "a", encoding="utf-8") as f:
152
+ f.write(line)
153
+ except Exception:
154
+ pass
155
+
156
+ def _print_crash_summary(exc_type, exc_tb, thread_name=None):
157
+ """Print crash summary to console (red highlight)."""
158
+ RED = "\033[91m"
159
+ RESET = "\033[0m"
160
+
161
+ if exc_tb:
162
+ tb_entries = traceback.extract_tb(exc_tb)
163
+ if tb_entries:
164
+ last = tb_entries[-1]
165
+ location = f"{os.path.basename(last.filename)}:{last.lineno}"
166
+ else:
167
+ location = "unknown"
168
+ else:
169
+ location = "unknown"
170
+
171
+ prefix = f"[{MODULE_NAME}]"
172
+ if thread_name:
173
+ _builtin_print(f"{prefix} {RED}线程 {thread_name} 崩溃: "
174
+ f"{exc_type.__name__} in {location}{RESET}")
175
+ else:
176
+ _builtin_print(f"{prefix} {RED}崩溃: {exc_type.__name__} in {location}{RESET}")
177
+ if _crash_log_path:
178
+ _builtin_print(f"{prefix} 崩溃日志: {_crash_log_path}")
179
+
180
+ def _setup_exception_hooks():
181
+ """Set up global exception hooks."""
182
+ _orig_excepthook = sys.excepthook
183
+
184
+ def _excepthook(exc_type, exc_value, exc_tb):
185
+ _write_crash(exc_type, exc_value, exc_tb, severity="critical", handled=False)
186
+ _print_crash_summary(exc_type, exc_tb)
187
+ _orig_excepthook(exc_type, exc_value, exc_tb)
188
+
189
+ sys.excepthook = _excepthook
190
+
191
+ if hasattr(threading, "excepthook"):
192
+ def _thread_excepthook(args):
193
+ _write_crash(args.exc_type, args.exc_value, args.exc_traceback,
194
+ thread_name=args.thread.name if args.thread else "unknown",
195
+ severity="error", handled=False)
196
+ _print_crash_summary(args.exc_type, args.exc_traceback,
197
+ thread_name=args.thread.name if args.thread else None)
198
+
199
+ threading.excepthook = _thread_excepthook
200
+
201
+ def _tprint(*args, **kwargs):
202
+ """Timestamped print that adds [timestamp] HH:MM:SS.mmm +delta prefix."""
203
+ global _last_ts
204
+ now = time.monotonic()
205
+ elapsed = now - _start_ts
206
+ delta = now - _last_ts
207
+ _last_ts = now
208
+
209
+ if elapsed < 1:
210
+ elapsed_str = f"{elapsed * 1000:.0f}ms"
211
+ elif elapsed < 100:
212
+ elapsed_str = f"{elapsed:.1f}s"
213
+ else:
214
+ elapsed_str = f"{elapsed:.0f}s"
215
+
216
+ if delta < 0.001:
217
+ delta_str = ""
218
+ elif delta < 1:
219
+ delta_str = f"+{delta * 1000:.0f}ms"
220
+ elif delta < 100:
221
+ delta_str = f"+{delta:.1f}s"
222
+ else:
223
+ delta_str = f"+{delta:.0f}s"
224
+
225
+ ts = datetime.now().strftime("%H:%M:%S.%f")[:-3]
226
+
227
+ _builtin_print(*args, **kwargs)
228
+
229
+ if _log_latest_path or _log_daily_path:
230
+ sep = kwargs.get("sep", " ")
231
+ end = kwargs.get("end", "\n")
232
+ text = sep.join(str(a) for a in args)
233
+ prefix = f"[{elapsed_str:>6}] {ts} {delta_str:>8} "
234
+ _write_log(prefix + _strip_ansi(text) + end)
235
+
236
+ builtins.print = _tprint
237
+
238
+ # Ensure project root is on sys.path (set by main.py or cli.js)
239
+ _project_root = os.environ.get("KITE_PROJECT") or os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
240
+ if _project_root not in sys.path:
241
+ sys.path.insert(0, _project_root)
242
+
243
+ from extensions.services.watchdog.monitor import HealthMonitor
244
+ from extensions.services.watchdog.server import WatchdogServer
245
+
246
+
247
+ def _get_free_port() -> int:
248
+ with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
249
+ s.bind(("127.0.0.1", 0))
250
+ return s.getsockname()[1]
251
+
252
+
253
+ def _register_to_registry(client: httpx.Client, token: str, registry_url: str, port: int, _t0: float):
254
+ payload = {
255
+ "action": "register",
256
+ "module_id": "watchdog",
257
+ "module_type": "service",
258
+ "name": "Watchdog",
259
+ "api_endpoint": f"http://127.0.0.1:{port}",
260
+ "health_endpoint": "/health",
261
+ "events_publish": {
262
+ "watchdog.module.unhealthy": {"description": "Module failed health check"},
263
+ "watchdog.module.recovered": {"description": "Module recovered from unhealthy"},
264
+ "watchdog.alert": {"description": "Module restarted too many times"},
265
+ },
266
+ "events_subscribe": [
267
+ "module.started",
268
+ "module.stopped",
269
+ "module.exiting",
270
+ "module.ready",
271
+ "module.shutdown",
272
+ ],
273
+ }
274
+ headers = {"Authorization": f"Bearer {token}"}
275
+ # Watchdog starts in parallel with token registration — retry on 401
276
+ deadline = time.monotonic() + 10
277
+ while True:
278
+ try:
279
+ resp = client.post(
280
+ f"{registry_url}/modules",
281
+ json=payload, headers=headers,
282
+ )
283
+ if resp.status_code == 200:
284
+ print(f"[watchdog] Registered to Registry ({_fmt_elapsed(_t0)})")
285
+ return
286
+ if resp.status_code == 401 and time.monotonic() < deadline:
287
+ time.sleep(0.3)
288
+ continue
289
+ print(f"[watchdog] WARNING: Registry returned {resp.status_code}")
290
+ return
291
+ except Exception as e:
292
+ if time.monotonic() < deadline:
293
+ time.sleep(0.3)
294
+ continue
295
+ print(f"[watchdog] WARNING: Registry registration failed: {e}")
296
+ return
297
+
298
+
299
+ def _get_launcher_url(client: httpx.Client, token: str, registry_url: str) -> str:
300
+ """Discover Launcher API endpoint from Registry, with retry."""
301
+ import time
302
+ headers = {"Authorization": f"Bearer {token}"}
303
+ deadline = time.time() + 5 # 5s timeout (Launcher registers quickly)
304
+ while time.time() < deadline:
305
+ try:
306
+ resp = client.get(
307
+ f"{registry_url}/get/launcher.api_endpoint",
308
+ headers=headers,
309
+ )
310
+ if resp.status_code == 200:
311
+ val = resp.json()
312
+ if val:
313
+ return val
314
+ except Exception:
315
+ pass
316
+ time.sleep(0.1) # Retry every 100ms
317
+ return ""
318
+
319
+
320
+ def _get_event_hub_ws(client: httpx.Client, token: str, registry_url: str) -> str:
321
+ """Discover Event Hub WebSocket endpoint from Registry, with retry."""
322
+ import time
323
+ headers = {"Authorization": f"Bearer {token}"}
324
+ deadline = time.time() + 10
325
+ while time.time() < deadline:
326
+ try:
327
+ resp = client.get(
328
+ f"{registry_url}/get/event_hub.metadata.ws_endpoint",
329
+ headers=headers,
330
+ )
331
+ if resp.status_code == 200:
332
+ val = resp.json()
333
+ if val:
334
+ return val
335
+ except Exception:
336
+ pass
337
+ time.sleep(0.2)
338
+ return ""
339
+
340
+
341
+ def _read_stdin_kite_message(expected_type: str, timeout: float = 10) -> dict | None:
342
+ """Read a single kite message of expected type from stdin with timeout."""
343
+ result = [None]
344
+
345
+ def _read():
346
+ try:
347
+ line = sys.stdin.readline().strip()
348
+ if line:
349
+ msg = json.loads(line)
350
+ if isinstance(msg, dict) and msg.get("kite") == expected_type:
351
+ result[0] = msg
352
+ except Exception:
353
+ pass
354
+
355
+ t = threading.Thread(target=_read, daemon=True)
356
+ t.start()
357
+ t.join(timeout=timeout)
358
+ return result[0]
359
+
360
+
361
+ def main():
362
+ # Initialize log file paths
363
+ global _log_dir, _log_latest_path, _crash_log_path
364
+ module_data = os.environ.get("KITE_MODULE_DATA")
365
+ if module_data:
366
+ _log_dir = os.path.join(module_data, "log")
367
+ os.makedirs(_log_dir, exist_ok=True)
368
+ suffix = os.environ.get("KITE_INSTANCE_SUFFIX", "")
369
+
370
+ _log_latest_path = os.path.join(_log_dir, f"latest{suffix}.log")
371
+ try:
372
+ with open(_log_latest_path, "w", encoding="utf-8") as f:
373
+ pass
374
+ except Exception:
375
+ _log_latest_path = None
376
+
377
+ _crash_log_path = os.path.join(_log_dir, f"crashes{suffix}.jsonl")
378
+ try:
379
+ with open(_crash_log_path, "w", encoding="utf-8") as f:
380
+ pass
381
+ except Exception:
382
+ _crash_log_path = None
383
+
384
+ _resolve_daily_log_path()
385
+
386
+ _setup_exception_hooks()
387
+
388
+ _t0 = time.monotonic()
389
+
390
+ # Kite environment
391
+ kite_instance = os.environ.get("KITE_INSTANCE", "")
392
+ is_debug = os.environ.get("KITE_DEBUG") == "1"
393
+
394
+ # Read boot_info from stdin (only token)
395
+ token = ""
396
+ try:
397
+ line = sys.stdin.readline().strip()
398
+ if line:
399
+ boot_info = json.loads(line)
400
+ token = boot_info.get("token", "")
401
+ except Exception:
402
+ pass
403
+
404
+ # Read registry_port: env first (fast path), stdin fallback (parallel start)
405
+ registry_port = int(os.environ.get("KITE_REGISTRY_PORT", "0"))
406
+ if not registry_port:
407
+ msg = _read_stdin_kite_message("registry_port", timeout=10)
408
+ if msg:
409
+ registry_port = int(msg.get("registry_port", 0))
410
+
411
+ if not token or not registry_port:
412
+ print("[watchdog] ERROR: Missing token or registry_port")
413
+ sys.exit(1)
414
+
415
+ print(f"[watchdog] Token received ({len(token)} chars), registry port: {registry_port} ({_fmt_elapsed(_t0)})")
416
+
417
+ registry_url = f"http://127.0.0.1:{registry_port}"
418
+ port = _get_free_port()
419
+
420
+ client = httpx.Client(timeout=5)
421
+
422
+ # Register to Registry
423
+ _register_to_registry(client, token, registry_url, port, _t0)
424
+
425
+ # Discover Launcher URL
426
+ launcher_url = _get_launcher_url(client, token, registry_url)
427
+ if not launcher_url:
428
+ print("[watchdog] WARNING: Could not discover Launcher URL, restart disabled")
429
+
430
+ # Discover Event Hub WebSocket URL
431
+ event_hub_ws = _get_event_hub_ws(client, token, registry_url)
432
+ if not event_hub_ws:
433
+ print("[watchdog] WARNING: Could not discover Event Hub WS, events disabled")
434
+ else:
435
+ print(f"[watchdog] Discovered Event Hub: {event_hub_ws}")
436
+
437
+ client.close()
438
+
439
+ # Create monitor and server
440
+ monitor = HealthMonitor(
441
+ own_token=token,
442
+ registry_url=registry_url,
443
+ launcher_url=launcher_url,
444
+ )
445
+ server = WatchdogServer(monitor, token=token, event_hub_ws=event_hub_ws)
446
+
447
+ print(f"[watchdog] Starting on port {port} ({_fmt_elapsed(_t0)})")
448
+ try:
449
+ config = uvicorn.Config(server.app, host="127.0.0.1", port=port, log_level="warning")
450
+ uvi_server = uvicorn.Server(config)
451
+ server._uvicorn_server = uvi_server
452
+ uvi_server.run()
453
+ except Exception as e:
454
+ _write_crash(type(e), e, e.__traceback__, severity="critical", handled=True)
455
+ _print_crash_summary(type(e), e.__traceback__)
456
+ sys.exit(1)
457
+
458
+
459
+ if __name__ == "__main__":
460
+ main()
@@ -13,6 +13,9 @@ events:
13
13
  subscriptions:
14
14
  - module.started
15
15
  - module.stopped
16
+ - module.exiting
17
+ - module.ready
18
+ - module.shutdown
16
19
  ---
17
20
 
18
21
  # Watchdog(保活模块)