@agentunion/kite 1.0.6 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cli.js +127 -25
- package/core/event_hub/entry.py +384 -61
- package/core/event_hub/hub.py +8 -0
- package/core/event_hub/module.md +0 -1
- package/core/event_hub/server.py +169 -38
- package/core/kite_log.py +241 -0
- package/core/launcher/entry.py +1306 -425
- package/core/launcher/module_scanner.py +10 -9
- package/core/launcher/process_manager.py +555 -121
- package/core/registry/entry.py +335 -30
- package/core/registry/server.py +339 -256
- package/core/registry/store.py +13 -2
- package/extensions/agents/__init__.py +1 -0
- package/extensions/agents/assistant/__init__.py +1 -0
- package/extensions/agents/assistant/entry.py +380 -0
- package/extensions/agents/assistant/module.md +22 -0
- package/extensions/agents/assistant/server.py +236 -0
- package/extensions/channels/__init__.py +1 -0
- package/extensions/channels/acp_channel/__init__.py +1 -0
- package/extensions/channels/acp_channel/entry.py +380 -0
- package/extensions/channels/acp_channel/module.md +22 -0
- package/extensions/channels/acp_channel/server.py +236 -0
- package/{core → extensions}/event_hub_bench/entry.py +664 -371
- package/{core → extensions}/event_hub_bench/module.md +4 -2
- package/extensions/services/backup/__init__.py +1 -0
- package/extensions/services/backup/entry.py +380 -0
- package/extensions/services/backup/module.md +22 -0
- package/extensions/services/backup/server.py +244 -0
- package/extensions/services/model_service/__init__.py +1 -0
- package/extensions/services/model_service/entry.py +380 -0
- package/extensions/services/model_service/module.md +22 -0
- package/extensions/services/model_service/server.py +236 -0
- package/extensions/services/watchdog/entry.py +460 -143
- package/extensions/services/watchdog/module.md +3 -0
- package/extensions/services/watchdog/monitor.py +128 -13
- package/extensions/services/watchdog/server.py +75 -13
- package/extensions/services/web/__init__.py +1 -0
- package/extensions/services/web/config.yaml +149 -0
- package/extensions/services/web/entry.py +487 -0
- package/extensions/services/web/module.md +24 -0
- package/extensions/services/web/routes/__init__.py +1 -0
- package/extensions/services/web/routes/routes_call.py +189 -0
- package/extensions/services/web/routes/routes_config.py +512 -0
- package/extensions/services/web/routes/routes_contacts.py +98 -0
- package/extensions/services/web/routes/routes_devlog.py +99 -0
- package/extensions/services/web/routes/routes_phone.py +81 -0
- package/extensions/services/web/routes/routes_sms.py +48 -0
- package/extensions/services/web/routes/routes_stats.py +17 -0
- package/extensions/services/web/routes/routes_voicechat.py +554 -0
- package/extensions/services/web/routes/schemas.py +216 -0
- package/extensions/services/web/server.py +332 -0
- package/extensions/services/web/static/css/style.css +1064 -0
- package/extensions/services/web/static/index.html +1445 -0
- package/extensions/services/web/static/js/app.js +4671 -0
- package/extensions/services/web/vendor/__init__.py +1 -0
- package/extensions/services/web/vendor/bluetooth/audio.py +348 -0
- package/extensions/services/web/vendor/bluetooth/contacts.py +251 -0
- package/extensions/services/web/vendor/bluetooth/manager.py +395 -0
- package/extensions/services/web/vendor/bluetooth/sms.py +290 -0
- package/extensions/services/web/vendor/bluetooth/telephony.py +274 -0
- package/extensions/services/web/vendor/config.py +139 -0
- package/extensions/services/web/vendor/conversation/__init__.py +0 -0
- package/extensions/services/web/vendor/conversation/asr.py +936 -0
- package/extensions/services/web/vendor/conversation/engine.py +548 -0
- package/extensions/services/web/vendor/conversation/llm.py +534 -0
- package/extensions/services/web/vendor/conversation/mcp_tools.py +190 -0
- package/extensions/services/web/vendor/conversation/tts.py +322 -0
- package/extensions/services/web/vendor/conversation/vad.py +138 -0
- package/extensions/services/web/vendor/storage/__init__.py +1 -0
- package/extensions/services/web/vendor/storage/identity.py +312 -0
- package/extensions/services/web/vendor/storage/store.py +507 -0
- package/extensions/services/web/vendor/task/__init__.py +0 -0
- package/extensions/services/web/vendor/task/manager.py +864 -0
- package/extensions/services/web/vendor/task/models.py +45 -0
- package/extensions/services/web/vendor/task/webhook.py +263 -0
- package/extensions/services/web/vendor/tools/__init__.py +0 -0
- package/extensions/services/web/vendor/tools/registry.py +321 -0
- package/main.py +344 -4
- package/package.json +11 -2
- package/core/__pycache__/__init__.cpython-313.pyc +0 -0
- package/core/__pycache__/data_dir.cpython-313.pyc +0 -0
- package/core/data_dir.py +0 -62
- package/core/event_hub/__pycache__/__init__.cpython-313.pyc +0 -0
- package/core/event_hub/__pycache__/bench.cpython-313.pyc +0 -0
- package/core/event_hub/__pycache__/bench_perf.cpython-313.pyc +0 -0
- package/core/event_hub/__pycache__/dedup.cpython-313.pyc +0 -0
- package/core/event_hub/__pycache__/entry.cpython-313.pyc +0 -0
- package/core/event_hub/__pycache__/hub.cpython-313.pyc +0 -0
- package/core/event_hub/__pycache__/router.cpython-313.pyc +0 -0
- package/core/event_hub/__pycache__/server.cpython-313.pyc +0 -0
- package/core/event_hub/bench_results/2026-02-28_13-26-48.json +0 -51
- package/core/event_hub/bench_results/2026-02-28_13-44-45.json +0 -51
- package/core/event_hub/bench_results/2026-02-28_13-45-39.json +0 -51
- package/core/launcher/__pycache__/__init__.cpython-313.pyc +0 -0
- package/core/launcher/__pycache__/entry.cpython-313.pyc +0 -0
- package/core/launcher/__pycache__/module_scanner.cpython-313.pyc +0 -0
- package/core/launcher/__pycache__/process_manager.cpython-313.pyc +0 -0
- package/core/launcher/data/log/lifecycle.jsonl +0 -1158
- package/core/launcher/data/token.txt +0 -1
- package/core/registry/__pycache__/__init__.cpython-313.pyc +0 -0
- package/core/registry/__pycache__/entry.cpython-313.pyc +0 -0
- package/core/registry/__pycache__/server.cpython-313.pyc +0 -0
- package/core/registry/__pycache__/store.cpython-313.pyc +0 -0
- package/core/registry/data/port.txt +0 -1
- package/core/registry/data/port_484.txt +0 -1
- package/extensions/__pycache__/__init__.cpython-313.pyc +0 -0
- package/extensions/services/__pycache__/__init__.cpython-313.pyc +0 -0
- package/extensions/services/watchdog/__pycache__/__init__.cpython-313.pyc +0 -0
- package/extensions/services/watchdog/__pycache__/entry.cpython-313.pyc +0 -0
- package/extensions/services/watchdog/__pycache__/monitor.cpython-313.pyc +0 -0
- package/extensions/services/watchdog/__pycache__/server.cpython-313.pyc +0 -0
- /package/{core/event_hub/bench_results/.gitkeep → extensions/services/web/vendor/bluetooth/__init__.py} +0 -0
|
@@ -1,143 +1,460 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Watchdog entry point.
|
|
3
|
-
Reads boot_info from stdin, registers to Registry, starts health monitor.
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
import
|
|
8
|
-
import
|
|
9
|
-
import
|
|
10
|
-
|
|
11
|
-
import
|
|
12
|
-
import
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
1
|
+
"""
|
|
2
|
+
Watchdog entry point.
|
|
3
|
+
Reads boot_info from stdin, registers to Registry, starts health monitor.
|
|
4
|
+
Registry port: env KITE_REGISTRY_PORT (fast path) or stdin kite message (parallel start).
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import builtins
|
|
8
|
+
import json
|
|
9
|
+
import os
|
|
10
|
+
import re
|
|
11
|
+
import socket
|
|
12
|
+
import sys
|
|
13
|
+
import threading
|
|
14
|
+
import time
|
|
15
|
+
import traceback
|
|
16
|
+
from datetime import datetime, timezone
|
|
17
|
+
|
|
18
|
+
import httpx
|
|
19
|
+
import uvicorn
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
# ── Module configuration ──
|
|
24
|
+
MODULE_NAME = "watchdog"
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _fmt_elapsed(t0: float) -> str:
|
|
28
|
+
"""Format elapsed time since t0: <1s → 'NNNms', >=1s → 'N.Ns', >=10s → 'NNs'."""
|
|
29
|
+
d = time.monotonic() - t0
|
|
30
|
+
if d < 1:
|
|
31
|
+
return f"{d * 1000:.0f}ms"
|
|
32
|
+
if d < 10:
|
|
33
|
+
return f"{d:.1f}s"
|
|
34
|
+
return f"{d:.0f}s"
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
# ── Safe stdout/stderr: ignore BrokenPipeError after Launcher closes stdio ──
|
|
38
|
+
|
|
39
|
+
class _SafeWriter:
|
|
40
|
+
"""Wraps a stream to silently swallow BrokenPipeError on write/flush."""
|
|
41
|
+
def __init__(self, stream):
|
|
42
|
+
self._stream = stream
|
|
43
|
+
|
|
44
|
+
def write(self, s):
|
|
45
|
+
try:
|
|
46
|
+
self._stream.write(s)
|
|
47
|
+
except (BrokenPipeError, OSError):
|
|
48
|
+
pass
|
|
49
|
+
|
|
50
|
+
def flush(self):
|
|
51
|
+
try:
|
|
52
|
+
self._stream.flush()
|
|
53
|
+
except (BrokenPipeError, OSError):
|
|
54
|
+
pass
|
|
55
|
+
|
|
56
|
+
def __getattr__(self, name):
|
|
57
|
+
return getattr(self._stream, name)
|
|
58
|
+
|
|
59
|
+
sys.stdout = _SafeWriter(sys.stdout)
|
|
60
|
+
sys.stderr = _SafeWriter(sys.stderr)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
# ── Timestamped print + log file writer ──
|
|
64
|
+
# Independent implementation per module (no shared code dependency)
|
|
65
|
+
|
|
66
|
+
_builtin_print = builtins.print
|
|
67
|
+
_start_ts = time.monotonic()
|
|
68
|
+
_last_ts = time.monotonic()
|
|
69
|
+
_ANSI_RE = re.compile(r"\033\[[0-9;]*m")
|
|
70
|
+
_log_lock = threading.Lock()
|
|
71
|
+
_log_latest_path = None
|
|
72
|
+
_log_daily_path = None
|
|
73
|
+
_log_daily_date = ""
|
|
74
|
+
_log_dir = None
|
|
75
|
+
_crash_log_path = None
|
|
76
|
+
|
|
77
|
+
def _strip_ansi(s: str) -> str:
|
|
78
|
+
return _ANSI_RE.sub("", s)
|
|
79
|
+
|
|
80
|
+
def _resolve_daily_log_path():
|
|
81
|
+
"""Resolve daily log path based on current date."""
|
|
82
|
+
global _log_daily_path, _log_daily_date
|
|
83
|
+
if not _log_dir:
|
|
84
|
+
return
|
|
85
|
+
today = datetime.now().strftime("%Y-%m-%d")
|
|
86
|
+
if today == _log_daily_date and _log_daily_path:
|
|
87
|
+
return
|
|
88
|
+
month_dir = os.path.join(_log_dir, today[:7])
|
|
89
|
+
os.makedirs(month_dir, exist_ok=True)
|
|
90
|
+
_log_daily_path = os.path.join(month_dir, f"{today}.log")
|
|
91
|
+
_log_daily_date = today
|
|
92
|
+
|
|
93
|
+
def _write_log(plain_line: str):
|
|
94
|
+
"""Write a plain-text line to both latest.log and daily log."""
|
|
95
|
+
with _log_lock:
|
|
96
|
+
if _log_latest_path:
|
|
97
|
+
try:
|
|
98
|
+
with open(_log_latest_path, "a", encoding="utf-8") as f:
|
|
99
|
+
f.write(plain_line)
|
|
100
|
+
except Exception:
|
|
101
|
+
pass
|
|
102
|
+
_resolve_daily_log_path()
|
|
103
|
+
if _log_daily_path:
|
|
104
|
+
try:
|
|
105
|
+
with open(_log_daily_path, "a", encoding="utf-8") as f:
|
|
106
|
+
f.write(plain_line)
|
|
107
|
+
except Exception:
|
|
108
|
+
pass
|
|
109
|
+
|
|
110
|
+
def _write_crash(exc_type, exc_value, exc_tb, thread_name=None, severity="critical", handled=False):
|
|
111
|
+
"""Write crash record to crashes.jsonl + daily crash archive."""
|
|
112
|
+
record = {
|
|
113
|
+
"timestamp": datetime.now(timezone.utc).isoformat(),
|
|
114
|
+
"module": MODULE_NAME,
|
|
115
|
+
"thread": thread_name or threading.current_thread().name,
|
|
116
|
+
"exception_type": exc_type.__name__ if exc_type else "Unknown",
|
|
117
|
+
"exception_message": str(exc_value),
|
|
118
|
+
"traceback": "".join(traceback.format_exception(exc_type, exc_value, exc_tb)),
|
|
119
|
+
"severity": severity,
|
|
120
|
+
"handled": handled,
|
|
121
|
+
"process_id": os.getpid(),
|
|
122
|
+
"platform": sys.platform,
|
|
123
|
+
"runtime_version": f"Python {sys.version.split()[0]}",
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
if exc_tb:
|
|
127
|
+
tb_entries = traceback.extract_tb(exc_tb)
|
|
128
|
+
if tb_entries:
|
|
129
|
+
last = tb_entries[-1]
|
|
130
|
+
record["context"] = {
|
|
131
|
+
"function": last.name,
|
|
132
|
+
"file": os.path.basename(last.filename),
|
|
133
|
+
"line": last.lineno,
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
line = json.dumps(record, ensure_ascii=False) + "\n"
|
|
137
|
+
|
|
138
|
+
if _crash_log_path:
|
|
139
|
+
try:
|
|
140
|
+
with open(_crash_log_path, "a", encoding="utf-8") as f:
|
|
141
|
+
f.write(line)
|
|
142
|
+
except Exception:
|
|
143
|
+
pass
|
|
144
|
+
|
|
145
|
+
if _log_dir:
|
|
146
|
+
try:
|
|
147
|
+
today = datetime.now().strftime("%Y-%m-%d")
|
|
148
|
+
archive_dir = os.path.join(_log_dir, "crashes", today[:7])
|
|
149
|
+
os.makedirs(archive_dir, exist_ok=True)
|
|
150
|
+
archive_path = os.path.join(archive_dir, f"{today}.jsonl")
|
|
151
|
+
with open(archive_path, "a", encoding="utf-8") as f:
|
|
152
|
+
f.write(line)
|
|
153
|
+
except Exception:
|
|
154
|
+
pass
|
|
155
|
+
|
|
156
|
+
def _print_crash_summary(exc_type, exc_tb, thread_name=None):
|
|
157
|
+
"""Print crash summary to console (red highlight)."""
|
|
158
|
+
RED = "\033[91m"
|
|
159
|
+
RESET = "\033[0m"
|
|
160
|
+
|
|
161
|
+
if exc_tb:
|
|
162
|
+
tb_entries = traceback.extract_tb(exc_tb)
|
|
163
|
+
if tb_entries:
|
|
164
|
+
last = tb_entries[-1]
|
|
165
|
+
location = f"{os.path.basename(last.filename)}:{last.lineno}"
|
|
166
|
+
else:
|
|
167
|
+
location = "unknown"
|
|
168
|
+
else:
|
|
169
|
+
location = "unknown"
|
|
170
|
+
|
|
171
|
+
prefix = f"[{MODULE_NAME}]"
|
|
172
|
+
if thread_name:
|
|
173
|
+
_builtin_print(f"{prefix} {RED}线程 {thread_name} 崩溃: "
|
|
174
|
+
f"{exc_type.__name__} in {location}{RESET}")
|
|
175
|
+
else:
|
|
176
|
+
_builtin_print(f"{prefix} {RED}崩溃: {exc_type.__name__} in {location}{RESET}")
|
|
177
|
+
if _crash_log_path:
|
|
178
|
+
_builtin_print(f"{prefix} 崩溃日志: {_crash_log_path}")
|
|
179
|
+
|
|
180
|
+
def _setup_exception_hooks():
|
|
181
|
+
"""Set up global exception hooks."""
|
|
182
|
+
_orig_excepthook = sys.excepthook
|
|
183
|
+
|
|
184
|
+
def _excepthook(exc_type, exc_value, exc_tb):
|
|
185
|
+
_write_crash(exc_type, exc_value, exc_tb, severity="critical", handled=False)
|
|
186
|
+
_print_crash_summary(exc_type, exc_tb)
|
|
187
|
+
_orig_excepthook(exc_type, exc_value, exc_tb)
|
|
188
|
+
|
|
189
|
+
sys.excepthook = _excepthook
|
|
190
|
+
|
|
191
|
+
if hasattr(threading, "excepthook"):
|
|
192
|
+
def _thread_excepthook(args):
|
|
193
|
+
_write_crash(args.exc_type, args.exc_value, args.exc_traceback,
|
|
194
|
+
thread_name=args.thread.name if args.thread else "unknown",
|
|
195
|
+
severity="error", handled=False)
|
|
196
|
+
_print_crash_summary(args.exc_type, args.exc_traceback,
|
|
197
|
+
thread_name=args.thread.name if args.thread else None)
|
|
198
|
+
|
|
199
|
+
threading.excepthook = _thread_excepthook
|
|
200
|
+
|
|
201
|
+
def _tprint(*args, **kwargs):
|
|
202
|
+
"""Timestamped print that adds [timestamp] HH:MM:SS.mmm +delta prefix."""
|
|
203
|
+
global _last_ts
|
|
204
|
+
now = time.monotonic()
|
|
205
|
+
elapsed = now - _start_ts
|
|
206
|
+
delta = now - _last_ts
|
|
207
|
+
_last_ts = now
|
|
208
|
+
|
|
209
|
+
if elapsed < 1:
|
|
210
|
+
elapsed_str = f"{elapsed * 1000:.0f}ms"
|
|
211
|
+
elif elapsed < 100:
|
|
212
|
+
elapsed_str = f"{elapsed:.1f}s"
|
|
213
|
+
else:
|
|
214
|
+
elapsed_str = f"{elapsed:.0f}s"
|
|
215
|
+
|
|
216
|
+
if delta < 0.001:
|
|
217
|
+
delta_str = ""
|
|
218
|
+
elif delta < 1:
|
|
219
|
+
delta_str = f"+{delta * 1000:.0f}ms"
|
|
220
|
+
elif delta < 100:
|
|
221
|
+
delta_str = f"+{delta:.1f}s"
|
|
222
|
+
else:
|
|
223
|
+
delta_str = f"+{delta:.0f}s"
|
|
224
|
+
|
|
225
|
+
ts = datetime.now().strftime("%H:%M:%S.%f")[:-3]
|
|
226
|
+
|
|
227
|
+
_builtin_print(*args, **kwargs)
|
|
228
|
+
|
|
229
|
+
if _log_latest_path or _log_daily_path:
|
|
230
|
+
sep = kwargs.get("sep", " ")
|
|
231
|
+
end = kwargs.get("end", "\n")
|
|
232
|
+
text = sep.join(str(a) for a in args)
|
|
233
|
+
prefix = f"[{elapsed_str:>6}] {ts} {delta_str:>8} "
|
|
234
|
+
_write_log(prefix + _strip_ansi(text) + end)
|
|
235
|
+
|
|
236
|
+
builtins.print = _tprint
|
|
237
|
+
|
|
238
|
+
# Ensure project root is on sys.path (set by main.py or cli.js)
|
|
239
|
+
_project_root = os.environ.get("KITE_PROJECT") or os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
|
|
240
|
+
if _project_root not in sys.path:
|
|
241
|
+
sys.path.insert(0, _project_root)
|
|
242
|
+
|
|
243
|
+
from extensions.services.watchdog.monitor import HealthMonitor
|
|
244
|
+
from extensions.services.watchdog.server import WatchdogServer
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def _get_free_port() -> int:
|
|
248
|
+
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
|
249
|
+
s.bind(("127.0.0.1", 0))
|
|
250
|
+
return s.getsockname()[1]
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
def _register_to_registry(client: httpx.Client, token: str, registry_url: str, port: int, _t0: float):
|
|
254
|
+
payload = {
|
|
255
|
+
"action": "register",
|
|
256
|
+
"module_id": "watchdog",
|
|
257
|
+
"module_type": "service",
|
|
258
|
+
"name": "Watchdog",
|
|
259
|
+
"api_endpoint": f"http://127.0.0.1:{port}",
|
|
260
|
+
"health_endpoint": "/health",
|
|
261
|
+
"events_publish": {
|
|
262
|
+
"watchdog.module.unhealthy": {"description": "Module failed health check"},
|
|
263
|
+
"watchdog.module.recovered": {"description": "Module recovered from unhealthy"},
|
|
264
|
+
"watchdog.alert": {"description": "Module restarted too many times"},
|
|
265
|
+
},
|
|
266
|
+
"events_subscribe": [
|
|
267
|
+
"module.started",
|
|
268
|
+
"module.stopped",
|
|
269
|
+
"module.exiting",
|
|
270
|
+
"module.ready",
|
|
271
|
+
"module.shutdown",
|
|
272
|
+
],
|
|
273
|
+
}
|
|
274
|
+
headers = {"Authorization": f"Bearer {token}"}
|
|
275
|
+
# Watchdog starts in parallel with token registration — retry on 401
|
|
276
|
+
deadline = time.monotonic() + 10
|
|
277
|
+
while True:
|
|
278
|
+
try:
|
|
279
|
+
resp = client.post(
|
|
280
|
+
f"{registry_url}/modules",
|
|
281
|
+
json=payload, headers=headers,
|
|
282
|
+
)
|
|
283
|
+
if resp.status_code == 200:
|
|
284
|
+
print(f"[watchdog] Registered to Registry ({_fmt_elapsed(_t0)})")
|
|
285
|
+
return
|
|
286
|
+
if resp.status_code == 401 and time.monotonic() < deadline:
|
|
287
|
+
time.sleep(0.3)
|
|
288
|
+
continue
|
|
289
|
+
print(f"[watchdog] WARNING: Registry returned {resp.status_code}")
|
|
290
|
+
return
|
|
291
|
+
except Exception as e:
|
|
292
|
+
if time.monotonic() < deadline:
|
|
293
|
+
time.sleep(0.3)
|
|
294
|
+
continue
|
|
295
|
+
print(f"[watchdog] WARNING: Registry registration failed: {e}")
|
|
296
|
+
return
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
def _get_launcher_url(client: httpx.Client, token: str, registry_url: str) -> str:
|
|
300
|
+
"""Discover Launcher API endpoint from Registry, with retry."""
|
|
301
|
+
import time
|
|
302
|
+
headers = {"Authorization": f"Bearer {token}"}
|
|
303
|
+
deadline = time.time() + 5 # 5s timeout (Launcher registers quickly)
|
|
304
|
+
while time.time() < deadline:
|
|
305
|
+
try:
|
|
306
|
+
resp = client.get(
|
|
307
|
+
f"{registry_url}/get/launcher.api_endpoint",
|
|
308
|
+
headers=headers,
|
|
309
|
+
)
|
|
310
|
+
if resp.status_code == 200:
|
|
311
|
+
val = resp.json()
|
|
312
|
+
if val:
|
|
313
|
+
return val
|
|
314
|
+
except Exception:
|
|
315
|
+
pass
|
|
316
|
+
time.sleep(0.1) # Retry every 100ms
|
|
317
|
+
return ""
|
|
318
|
+
|
|
319
|
+
|
|
320
|
+
def _get_event_hub_ws(client: httpx.Client, token: str, registry_url: str) -> str:
|
|
321
|
+
"""Discover Event Hub WebSocket endpoint from Registry, with retry."""
|
|
322
|
+
import time
|
|
323
|
+
headers = {"Authorization": f"Bearer {token}"}
|
|
324
|
+
deadline = time.time() + 10
|
|
325
|
+
while time.time() < deadline:
|
|
326
|
+
try:
|
|
327
|
+
resp = client.get(
|
|
328
|
+
f"{registry_url}/get/event_hub.metadata.ws_endpoint",
|
|
329
|
+
headers=headers,
|
|
330
|
+
)
|
|
331
|
+
if resp.status_code == 200:
|
|
332
|
+
val = resp.json()
|
|
333
|
+
if val:
|
|
334
|
+
return val
|
|
335
|
+
except Exception:
|
|
336
|
+
pass
|
|
337
|
+
time.sleep(0.2)
|
|
338
|
+
return ""
|
|
339
|
+
|
|
340
|
+
|
|
341
|
+
def _read_stdin_kite_message(expected_type: str, timeout: float = 10) -> dict | None:
|
|
342
|
+
"""Read a single kite message of expected type from stdin with timeout."""
|
|
343
|
+
result = [None]
|
|
344
|
+
|
|
345
|
+
def _read():
|
|
346
|
+
try:
|
|
347
|
+
line = sys.stdin.readline().strip()
|
|
348
|
+
if line:
|
|
349
|
+
msg = json.loads(line)
|
|
350
|
+
if isinstance(msg, dict) and msg.get("kite") == expected_type:
|
|
351
|
+
result[0] = msg
|
|
352
|
+
except Exception:
|
|
353
|
+
pass
|
|
354
|
+
|
|
355
|
+
t = threading.Thread(target=_read, daemon=True)
|
|
356
|
+
t.start()
|
|
357
|
+
t.join(timeout=timeout)
|
|
358
|
+
return result[0]
|
|
359
|
+
|
|
360
|
+
|
|
361
|
+
def main():
|
|
362
|
+
# Initialize log file paths
|
|
363
|
+
global _log_dir, _log_latest_path, _crash_log_path
|
|
364
|
+
module_data = os.environ.get("KITE_MODULE_DATA")
|
|
365
|
+
if module_data:
|
|
366
|
+
_log_dir = os.path.join(module_data, "log")
|
|
367
|
+
os.makedirs(_log_dir, exist_ok=True)
|
|
368
|
+
suffix = os.environ.get("KITE_INSTANCE_SUFFIX", "")
|
|
369
|
+
|
|
370
|
+
_log_latest_path = os.path.join(_log_dir, f"latest{suffix}.log")
|
|
371
|
+
try:
|
|
372
|
+
with open(_log_latest_path, "w", encoding="utf-8") as f:
|
|
373
|
+
pass
|
|
374
|
+
except Exception:
|
|
375
|
+
_log_latest_path = None
|
|
376
|
+
|
|
377
|
+
_crash_log_path = os.path.join(_log_dir, f"crashes{suffix}.jsonl")
|
|
378
|
+
try:
|
|
379
|
+
with open(_crash_log_path, "w", encoding="utf-8") as f:
|
|
380
|
+
pass
|
|
381
|
+
except Exception:
|
|
382
|
+
_crash_log_path = None
|
|
383
|
+
|
|
384
|
+
_resolve_daily_log_path()
|
|
385
|
+
|
|
386
|
+
_setup_exception_hooks()
|
|
387
|
+
|
|
388
|
+
_t0 = time.monotonic()
|
|
389
|
+
|
|
390
|
+
# Kite environment
|
|
391
|
+
kite_instance = os.environ.get("KITE_INSTANCE", "")
|
|
392
|
+
is_debug = os.environ.get("KITE_DEBUG") == "1"
|
|
393
|
+
|
|
394
|
+
# Read boot_info from stdin (only token)
|
|
395
|
+
token = ""
|
|
396
|
+
try:
|
|
397
|
+
line = sys.stdin.readline().strip()
|
|
398
|
+
if line:
|
|
399
|
+
boot_info = json.loads(line)
|
|
400
|
+
token = boot_info.get("token", "")
|
|
401
|
+
except Exception:
|
|
402
|
+
pass
|
|
403
|
+
|
|
404
|
+
# Read registry_port: env first (fast path), stdin fallback (parallel start)
|
|
405
|
+
registry_port = int(os.environ.get("KITE_REGISTRY_PORT", "0"))
|
|
406
|
+
if not registry_port:
|
|
407
|
+
msg = _read_stdin_kite_message("registry_port", timeout=10)
|
|
408
|
+
if msg:
|
|
409
|
+
registry_port = int(msg.get("registry_port", 0))
|
|
410
|
+
|
|
411
|
+
if not token or not registry_port:
|
|
412
|
+
print("[watchdog] ERROR: Missing token or registry_port")
|
|
413
|
+
sys.exit(1)
|
|
414
|
+
|
|
415
|
+
print(f"[watchdog] Token received ({len(token)} chars), registry port: {registry_port} ({_fmt_elapsed(_t0)})")
|
|
416
|
+
|
|
417
|
+
registry_url = f"http://127.0.0.1:{registry_port}"
|
|
418
|
+
port = _get_free_port()
|
|
419
|
+
|
|
420
|
+
client = httpx.Client(timeout=5)
|
|
421
|
+
|
|
422
|
+
# Register to Registry
|
|
423
|
+
_register_to_registry(client, token, registry_url, port, _t0)
|
|
424
|
+
|
|
425
|
+
# Discover Launcher URL
|
|
426
|
+
launcher_url = _get_launcher_url(client, token, registry_url)
|
|
427
|
+
if not launcher_url:
|
|
428
|
+
print("[watchdog] WARNING: Could not discover Launcher URL, restart disabled")
|
|
429
|
+
|
|
430
|
+
# Discover Event Hub WebSocket URL
|
|
431
|
+
event_hub_ws = _get_event_hub_ws(client, token, registry_url)
|
|
432
|
+
if not event_hub_ws:
|
|
433
|
+
print("[watchdog] WARNING: Could not discover Event Hub WS, events disabled")
|
|
434
|
+
else:
|
|
435
|
+
print(f"[watchdog] Discovered Event Hub: {event_hub_ws}")
|
|
436
|
+
|
|
437
|
+
client.close()
|
|
438
|
+
|
|
439
|
+
# Create monitor and server
|
|
440
|
+
monitor = HealthMonitor(
|
|
441
|
+
own_token=token,
|
|
442
|
+
registry_url=registry_url,
|
|
443
|
+
launcher_url=launcher_url,
|
|
444
|
+
)
|
|
445
|
+
server = WatchdogServer(monitor, token=token, event_hub_ws=event_hub_ws)
|
|
446
|
+
|
|
447
|
+
print(f"[watchdog] Starting on port {port} ({_fmt_elapsed(_t0)})")
|
|
448
|
+
try:
|
|
449
|
+
config = uvicorn.Config(server.app, host="127.0.0.1", port=port, log_level="warning")
|
|
450
|
+
uvi_server = uvicorn.Server(config)
|
|
451
|
+
server._uvicorn_server = uvi_server
|
|
452
|
+
uvi_server.run()
|
|
453
|
+
except Exception as e:
|
|
454
|
+
_write_crash(type(e), e, e.__traceback__, severity="critical", handled=True)
|
|
455
|
+
_print_crash_summary(type(e), e.__traceback__)
|
|
456
|
+
sys.exit(1)
|
|
457
|
+
|
|
458
|
+
|
|
459
|
+
if __name__ == "__main__":
|
|
460
|
+
main()
|