@agentunion/kite 1.2.0 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +208 -0
- package/README.md +48 -0
- package/cli.js +1 -1
- package/extensions/agents/assistant/entry.py +30 -81
- package/extensions/agents/assistant/module.md +1 -1
- package/extensions/agents/assistant/server.py +83 -122
- package/extensions/channels/acp_channel/entry.py +30 -81
- package/extensions/channels/acp_channel/module.md +1 -1
- package/extensions/channels/acp_channel/server.py +83 -122
- package/extensions/event_hub_bench/entry.py +81 -121
- package/extensions/services/backup/entry.py +213 -85
- package/extensions/services/model_service/entry.py +213 -85
- package/extensions/services/watchdog/entry.py +513 -460
- package/extensions/services/watchdog/monitor.py +55 -69
- package/extensions/services/web/entry.py +11 -108
- package/extensions/services/web/server.py +120 -77
- package/{core/registry → kernel}/entry.py +65 -37
- package/{core/event_hub/hub.py → kernel/event_hub.py} +61 -81
- package/kernel/module.md +33 -0
- package/{core/registry/store.py → kernel/registry_store.py} +13 -4
- package/kernel/rpc_router.py +388 -0
- package/kernel/server.py +267 -0
- package/launcher/__init__.py +10 -0
- package/launcher/__main__.py +6 -0
- package/launcher/count_lines.py +258 -0
- package/{core/launcher → launcher}/entry.py +693 -767
- package/launcher/logging_setup.py +289 -0
- package/{core/launcher → launcher}/module_scanner.py +11 -6
- package/main.py +11 -350
- package/package.json +6 -9
- package/__init__.py +0 -1
- package/__main__.py +0 -15
- package/core/event_hub/BENCHMARK.md +0 -94
- package/core/event_hub/__init__.py +0 -0
- package/core/event_hub/bench.py +0 -459
- package/core/event_hub/bench_extreme.py +0 -308
- package/core/event_hub/bench_perf.py +0 -350
- package/core/event_hub/entry.py +0 -436
- package/core/event_hub/module.md +0 -20
- package/core/event_hub/server.py +0 -269
- package/core/kite_log.py +0 -241
- package/core/launcher/__init__.py +0 -0
- package/core/registry/__init__.py +0 -0
- package/core/registry/module.md +0 -30
- package/core/registry/server.py +0 -339
- package/extensions/services/backup/server.py +0 -244
- package/extensions/services/model_service/server.py +0 -236
- package/extensions/services/watchdog/server.py +0 -229
- /package/{core → kernel}/__init__.py +0 -0
- /package/{core/event_hub → kernel}/dedup.py +0 -0
- /package/{core/event_hub → kernel}/router.py +0 -0
- /package/{core/launcher → launcher}/module.md +0 -0
- /package/{core/launcher → launcher}/process_manager.py +0 -0
|
@@ -9,8 +9,6 @@ import json
|
|
|
9
9
|
import time
|
|
10
10
|
from datetime import datetime, timezone
|
|
11
11
|
|
|
12
|
-
import httpx
|
|
13
|
-
|
|
14
12
|
|
|
15
13
|
# Module health states
|
|
16
14
|
HEALTHY = "healthy"
|
|
@@ -68,12 +66,11 @@ class HealthMonitor:
|
|
|
68
66
|
# Check intervals per resource state
|
|
69
67
|
INTERVALS = {NORMAL: 15, WARNING: 5, CRITICAL: 2}
|
|
70
68
|
|
|
71
|
-
def __init__(self, own_token: str,
|
|
72
|
-
publish_event=None):
|
|
69
|
+
def __init__(self, own_token: str, kernel_port: int, publish_event=None):
|
|
73
70
|
self.own_token = own_token
|
|
74
|
-
self.
|
|
75
|
-
self.launcher_url = launcher_url
|
|
71
|
+
self.kernel_port = kernel_port
|
|
76
72
|
self.publish_event = publish_event # async callable(event_dict)
|
|
73
|
+
self.rpc_call = None # set by entry.py: async callable(method, params)
|
|
77
74
|
self.modules: dict[str, ModuleStatus] = {}
|
|
78
75
|
self._running = False
|
|
79
76
|
self._psutil = None # lazy import
|
|
@@ -81,7 +78,7 @@ class HealthMonitor:
|
|
|
81
78
|
# Restart decision state (module.exiting / module.stopped / module.ready)
|
|
82
79
|
self._exit_intents: dict[str, str] = {} # module_id -> action from module.exiting
|
|
83
80
|
self._graceful_modules: dict[str, bool] = { # module_id -> supports graceful shutdown
|
|
84
|
-
"
|
|
81
|
+
"kernel": True, # started before Watchdog, default True
|
|
85
82
|
}
|
|
86
83
|
self._system_shutting_down = False
|
|
87
84
|
self._system_ready = False
|
|
@@ -91,43 +88,34 @@ class HealthMonitor:
|
|
|
91
88
|
# ── Module discovery ──
|
|
92
89
|
|
|
93
90
|
async def discover_modules(self):
|
|
94
|
-
"""Fetch monitored modules from Launcher
|
|
95
|
-
# Step 1: Get module list with monitor/pid from Launcher
|
|
96
|
-
monitored = {} # name ->
|
|
91
|
+
"""Fetch monitored modules from Launcher + Registry via RPC."""
|
|
92
|
+
# Step 1: Get module list with monitor/pid from Launcher via RPC
|
|
93
|
+
monitored = {} # name -> pid
|
|
97
94
|
try:
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
)
|
|
102
|
-
if
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
continue
|
|
107
|
-
if m.get("actual_state", "").startswith("running"):
|
|
108
|
-
monitored[name] = m.get("pid")
|
|
95
|
+
resp = await self.rpc_call("launcher.list_modules", {})
|
|
96
|
+
result = resp.get("result", {})
|
|
97
|
+
for m in result.get("modules", []):
|
|
98
|
+
name = m.get("name", "")
|
|
99
|
+
if name == "watchdog" or not m.get("monitor", True):
|
|
100
|
+
continue
|
|
101
|
+
if m.get("actual_state", "").startswith("running"):
|
|
102
|
+
monitored[name] = m.get("pid")
|
|
109
103
|
except Exception as e:
|
|
110
|
-
print(f"[watchdog] Launcher
|
|
104
|
+
print(f"[watchdog] Launcher RPC failed: {e}")
|
|
111
105
|
return
|
|
112
106
|
|
|
113
|
-
# Step 2: Get health endpoints from Registry
|
|
107
|
+
# Step 2: Get health endpoints from Registry via RPC
|
|
114
108
|
health_map = {} # name -> {api_endpoint, health_endpoint}
|
|
115
|
-
headers = {"Authorization": f"Bearer {self.own_token}"}
|
|
116
109
|
try:
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
if mid in monitored:
|
|
127
|
-
health_map[mid] = {
|
|
128
|
-
"api_endpoint": entry.get("api_endpoint", ""),
|
|
129
|
-
"health_endpoint": entry.get("value", "/health"),
|
|
130
|
-
}
|
|
110
|
+
resp = await self.rpc_call("registry.lookup", {"field": "health_endpoint"})
|
|
111
|
+
result = resp.get("result", {})
|
|
112
|
+
for entry in result.get("results", []):
|
|
113
|
+
mid = entry.get("module", "")
|
|
114
|
+
if mid in monitored:
|
|
115
|
+
health_map[mid] = {
|
|
116
|
+
"api_endpoint": entry.get("api_endpoint", ""),
|
|
117
|
+
"health_endpoint": entry.get("value", "/health"),
|
|
118
|
+
}
|
|
131
119
|
except Exception:
|
|
132
120
|
pass
|
|
133
121
|
|
|
@@ -215,28 +203,27 @@ class HealthMonitor:
|
|
|
215
203
|
# ── Restart via Launcher API ──
|
|
216
204
|
|
|
217
205
|
async def _restart_module(self, status: ModuleStatus):
|
|
218
|
-
"""Restart a module via Launcher
|
|
206
|
+
"""Restart a module via Launcher RPC."""
|
|
219
207
|
mid = status.module_id
|
|
220
208
|
print(f"[watchdog] Restarting {mid} (attempt {status.restarted_count + 1}/{self.MAX_RESTARTS})")
|
|
221
209
|
try:
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
print(f"[watchdog] {mid} restart failed: HTTP {resp.status_code}")
|
|
210
|
+
resp = await self.rpc_call("launcher.restart_module", {
|
|
211
|
+
"name": mid,
|
|
212
|
+
"reason": "resource_critical" if status.resource_state == CRITICAL else "restart",
|
|
213
|
+
})
|
|
214
|
+
result = resp.get("result", {})
|
|
215
|
+
if result.get("status") == "restarted":
|
|
216
|
+
status.restarted_count += 1
|
|
217
|
+
status.fail_count = 0
|
|
218
|
+
print(f"[watchdog] {mid} restart requested")
|
|
219
|
+
if status.restarted_count >= self.ALERT_AFTER_RESTARTS:
|
|
220
|
+
await self._publish("watchdog.alert", {
|
|
221
|
+
"module_id": mid,
|
|
222
|
+
"restarted_count": status.restarted_count,
|
|
223
|
+
"message": f"{mid} has been restarted {status.restarted_count} times",
|
|
224
|
+
})
|
|
225
|
+
else:
|
|
226
|
+
print(f"[watchdog] {mid} restart failed: {result}")
|
|
240
227
|
except Exception as e:
|
|
241
228
|
print(f"[watchdog] {mid} restart error: {e}")
|
|
242
229
|
|
|
@@ -366,7 +353,7 @@ class HealthMonitor:
|
|
|
366
353
|
# ── Incoming event handler ──
|
|
367
354
|
|
|
368
355
|
async def handle_event(self, msg: dict):
|
|
369
|
-
"""Handle events from
|
|
356
|
+
"""Handle events from Kernel — restart decisions + health tracking."""
|
|
370
357
|
event_type = msg.get("event", "")
|
|
371
358
|
data = msg.get("data", {})
|
|
372
359
|
module_id = data.get("module_id", "")
|
|
@@ -457,19 +444,18 @@ class HealthMonitor:
|
|
|
457
444
|
})
|
|
458
445
|
|
|
459
446
|
async def _restart_module_by_id(self, module_id: str, reason: str = "restart"):
|
|
460
|
-
"""Restart a module via Launcher
|
|
447
|
+
"""Restart a module via Launcher RPC by module_id."""
|
|
461
448
|
print(f"[watchdog] Requesting restart for {module_id} (reason={reason})")
|
|
462
449
|
try:
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
print(f"[watchdog] {module_id} restart failed: HTTP {resp.status_code}")
|
|
450
|
+
resp = await self.rpc_call("launcher.restart_module", {
|
|
451
|
+
"name": module_id,
|
|
452
|
+
"reason": reason,
|
|
453
|
+
})
|
|
454
|
+
result = resp.get("result", {})
|
|
455
|
+
if result.get("status") == "restarted":
|
|
456
|
+
print(f"[watchdog] {module_id} restart requested successfully")
|
|
457
|
+
else:
|
|
458
|
+
print(f"[watchdog] {module_id} restart failed: {result}")
|
|
473
459
|
except Exception as e:
|
|
474
460
|
print(f"[watchdog] {module_id} restart error: {e}")
|
|
475
461
|
|
|
@@ -17,7 +17,8 @@ import traceback
|
|
|
17
17
|
import uuid
|
|
18
18
|
from datetime import datetime, timezone
|
|
19
19
|
|
|
20
|
-
import
|
|
20
|
+
import asyncio
|
|
21
|
+
import websockets
|
|
21
22
|
import uvicorn
|
|
22
23
|
|
|
23
24
|
|
|
@@ -300,77 +301,6 @@ def _bind_port(preferred: int, host: str, max_attempts: int = 10) -> int | None:
|
|
|
300
301
|
return None
|
|
301
302
|
|
|
302
303
|
|
|
303
|
-
def _register_to_registry(client: httpx.Client, token: str, registry_url: str, host: str, port: int):
|
|
304
|
-
payload = {
|
|
305
|
-
"action": "register",
|
|
306
|
-
"module_id": "web",
|
|
307
|
-
"module_type": "service",
|
|
308
|
-
"name": "Web Management",
|
|
309
|
-
"api_endpoint": f"http://127.0.0.1:{port}",
|
|
310
|
-
"health_endpoint": "/health",
|
|
311
|
-
"events_publish": {
|
|
312
|
-
"web.test": {"description": "Test event from web module"},
|
|
313
|
-
},
|
|
314
|
-
"events_subscribe": [
|
|
315
|
-
"module.started",
|
|
316
|
-
"module.stopped",
|
|
317
|
-
"module.shutdown",
|
|
318
|
-
],
|
|
319
|
-
}
|
|
320
|
-
headers = {"Authorization": f"Bearer {token}"}
|
|
321
|
-
try:
|
|
322
|
-
resp = client.post(f"{registry_url}/modules", json=payload, headers=headers)
|
|
323
|
-
if resp.status_code == 200:
|
|
324
|
-
pass # timing printed in main()
|
|
325
|
-
else:
|
|
326
|
-
print(f"[web] WARNING: Registry returned {resp.status_code}")
|
|
327
|
-
except Exception as e:
|
|
328
|
-
print(f"[web] WARNING: Registry registration failed: {e}")
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
def _get_event_hub_ws(client: httpx.Client, token: str, registry_url: str) -> str:
|
|
332
|
-
"""Discover Event Hub WebSocket endpoint from Registry, with retry."""
|
|
333
|
-
headers = {"Authorization": f"Bearer {token}"}
|
|
334
|
-
deadline = time.time() + 10
|
|
335
|
-
while time.time() < deadline:
|
|
336
|
-
try:
|
|
337
|
-
resp = client.get(
|
|
338
|
-
f"{registry_url}/get/event_hub.metadata.ws_endpoint",
|
|
339
|
-
headers=headers,
|
|
340
|
-
)
|
|
341
|
-
if resp.status_code == 200:
|
|
342
|
-
val = resp.json()
|
|
343
|
-
if val:
|
|
344
|
-
return val
|
|
345
|
-
except Exception:
|
|
346
|
-
pass
|
|
347
|
-
time.sleep(0.2)
|
|
348
|
-
return ""
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
def _send_exiting_event(ws_url: str, token: str, reason: str):
|
|
352
|
-
"""Send module.exiting event to Event Hub before exit. Best-effort, non-blocking."""
|
|
353
|
-
try:
|
|
354
|
-
import websockets.sync.client as ws_sync
|
|
355
|
-
url = f"{ws_url}?token={token}&id=web"
|
|
356
|
-
with ws_sync.connect(url, close_timeout=3) as ws:
|
|
357
|
-
msg = {
|
|
358
|
-
"type": "event",
|
|
359
|
-
"event_id": str(uuid.uuid4()),
|
|
360
|
-
"event": "module.exiting",
|
|
361
|
-
"source": "web",
|
|
362
|
-
"timestamp": datetime.now(timezone.utc).isoformat(),
|
|
363
|
-
"data": {
|
|
364
|
-
"module_id": "web",
|
|
365
|
-
"reason": reason,
|
|
366
|
-
"action": "none",
|
|
367
|
-
},
|
|
368
|
-
}
|
|
369
|
-
ws.send(json.dumps(msg))
|
|
370
|
-
# Brief wait for delivery
|
|
371
|
-
time.sleep(0.3)
|
|
372
|
-
except Exception as e:
|
|
373
|
-
print(f"[web] WARNING: Could not send module.exiting: {e}")
|
|
374
304
|
|
|
375
305
|
|
|
376
306
|
def main():
|
|
@@ -412,55 +342,28 @@ def main():
|
|
|
412
342
|
except Exception:
|
|
413
343
|
pass
|
|
414
344
|
|
|
415
|
-
# Read
|
|
416
|
-
|
|
345
|
+
# Read kernel_port from environment variable
|
|
346
|
+
kernel_port = int(os.environ.get("KITE_KERNEL_PORT", "0"))
|
|
417
347
|
|
|
418
|
-
if not token or not
|
|
419
|
-
print("[web] ERROR: Missing token or
|
|
348
|
+
if not token or not kernel_port:
|
|
349
|
+
print("[web] ERROR: Missing token or KITE_KERNEL_PORT")
|
|
420
350
|
sys.exit(1)
|
|
421
351
|
|
|
422
|
-
print(f"[web] Token received ({len(token)} chars),
|
|
352
|
+
print(f"[web] Token received ({len(token)} chars), kernel port: {kernel_port} ({_fmt_elapsed(_t0)})")
|
|
423
353
|
|
|
424
354
|
# Read preferred_port from module.md
|
|
425
355
|
md_cfg = _read_module_md()
|
|
426
356
|
host = md_cfg["advertise_ip"]
|
|
427
357
|
port = _bind_port(md_cfg["preferred_port"], host)
|
|
428
358
|
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
# If port binding failed after 10 attempts, exit gracefully (no watchdog restart)
|
|
359
|
+
# If port binding failed after 10 attempts, exit gracefully
|
|
432
360
|
if port is None:
|
|
433
|
-
print("[web] ERROR: Cannot bind to any port,
|
|
434
|
-
|
|
435
|
-
# Try to discover Event Hub and send module.exiting event
|
|
436
|
-
client = httpx.Client(timeout=5)
|
|
437
|
-
event_hub_ws = _get_event_hub_ws(client, token, registry_url)
|
|
438
|
-
client.close()
|
|
439
|
-
|
|
440
|
-
if event_hub_ws:
|
|
441
|
-
reason = f"Port binding failed after 10 attempts ({md_cfg['preferred_port']}-{md_cfg['preferred_port']+9})"
|
|
442
|
-
_send_exiting_event(event_hub_ws, token, reason)
|
|
443
|
-
print("[web] module.exiting event sent")
|
|
444
|
-
else:
|
|
445
|
-
print("[web] WARNING: Could not discover Event Hub, exiting without event")
|
|
446
|
-
|
|
447
|
-
sys.exit(1) # Exit code 1 = startup failure
|
|
448
|
-
|
|
449
|
-
# Register and discover Event Hub synchronously before starting uvicorn
|
|
450
|
-
client = httpx.Client(timeout=5)
|
|
451
|
-
_register_to_registry(client, token, registry_url, host, port)
|
|
452
|
-
print(f"[web] Registered to Registry ({_fmt_elapsed(_t0)})")
|
|
453
|
-
event_hub_ws = _get_event_hub_ws(client, token, registry_url)
|
|
454
|
-
if not event_hub_ws:
|
|
455
|
-
print("[web] WARNING: Could not discover Event Hub WS, events disabled")
|
|
456
|
-
else:
|
|
457
|
-
print(f"[web] Discovered Event Hub: {event_hub_ws}")
|
|
458
|
-
client.close()
|
|
361
|
+
print("[web] ERROR: Cannot bind to any port, exiting")
|
|
362
|
+
sys.exit(1)
|
|
459
363
|
|
|
460
364
|
server = WebServer(
|
|
461
365
|
token=token,
|
|
462
|
-
|
|
463
|
-
event_hub_ws=event_hub_ws,
|
|
366
|
+
kernel_port=kernel_port,
|
|
464
367
|
host=host,
|
|
465
368
|
port=port,
|
|
466
369
|
boot_t0=_t0,
|
|
@@ -2,8 +2,7 @@
|
|
|
2
2
|
Web Management HTTP server.
|
|
3
3
|
Full web UI with all AI Phone Agent API endpoints.
|
|
4
4
|
Exposes /health, /status, static frontend, and all /api/* routes.
|
|
5
|
-
Connects to
|
|
6
|
-
Sends periodic heartbeat to Registry and test events to Event Hub.
|
|
5
|
+
Connects to Kernel via WebSocket JSON-RPC 2.0 for event publishing and subscription.
|
|
7
6
|
"""
|
|
8
7
|
|
|
9
8
|
import asyncio
|
|
@@ -14,7 +13,6 @@ import uuid
|
|
|
14
13
|
from datetime import datetime, timezone
|
|
15
14
|
from pathlib import Path
|
|
16
15
|
|
|
17
|
-
import httpx
|
|
18
16
|
import websockets
|
|
19
17
|
from fastapi import FastAPI
|
|
20
18
|
from fastapi.staticfiles import StaticFiles
|
|
@@ -38,17 +36,14 @@ logger = logging.getLogger(__name__)
|
|
|
38
36
|
|
|
39
37
|
class WebServer:
|
|
40
38
|
|
|
41
|
-
def __init__(self, token: str = "",
|
|
42
|
-
event_hub_ws: str = "",
|
|
39
|
+
def __init__(self, token: str = "", kernel_port: int = 0,
|
|
43
40
|
host: str = "0.0.0.0", port: int = 0, boot_t0: float = 0):
|
|
44
41
|
self.token = token
|
|
45
|
-
self.
|
|
46
|
-
self.event_hub_ws = event_hub_ws
|
|
42
|
+
self.kernel_port = kernel_port
|
|
47
43
|
self.host = host
|
|
48
44
|
self.port = port
|
|
49
45
|
self.boot_t0 = boot_t0
|
|
50
46
|
self._ws_task: asyncio.Task | None = None
|
|
51
|
-
self._heartbeat_task: asyncio.Task | None = None
|
|
52
47
|
self._test_task: asyncio.Task | None = None
|
|
53
48
|
self._ws: object | None = None
|
|
54
49
|
self._ready_sent = False
|
|
@@ -102,15 +97,12 @@ class WebServer:
|
|
|
102
97
|
logger.info("Web Management: managers initialized")
|
|
103
98
|
|
|
104
99
|
# Start background tasks directly
|
|
105
|
-
|
|
106
|
-
if server.event_hub_ws:
|
|
100
|
+
if server.kernel_port:
|
|
107
101
|
server._ws_task = asyncio.create_task(server._ws_loop())
|
|
108
102
|
server._test_task = asyncio.create_task(server._test_event_loop())
|
|
109
103
|
|
|
110
104
|
@app.on_event("shutdown")
|
|
111
105
|
async def _shutdown():
|
|
112
|
-
if server._heartbeat_task:
|
|
113
|
-
server._heartbeat_task.cancel()
|
|
114
106
|
if server._ws_task:
|
|
115
107
|
server._ws_task.cancel()
|
|
116
108
|
if server._test_task:
|
|
@@ -127,7 +119,7 @@ class WebServer:
|
|
|
127
119
|
return {
|
|
128
120
|
"status": "healthy",
|
|
129
121
|
"details": {
|
|
130
|
-
"
|
|
122
|
+
"kernel_connected": server._ws is not None,
|
|
131
123
|
"uptime_seconds": round(time.time() - server._start_time),
|
|
132
124
|
},
|
|
133
125
|
}
|
|
@@ -158,10 +150,10 @@ class WebServer:
|
|
|
158
150
|
|
|
159
151
|
return app
|
|
160
152
|
|
|
161
|
-
# ──
|
|
153
|
+
# ── Kernel WebSocket client ──
|
|
162
154
|
|
|
163
155
|
async def _ws_loop(self):
|
|
164
|
-
"""Connect to
|
|
156
|
+
"""Connect to Kernel, subscribe, register, and listen. Reconnect on failure."""
|
|
165
157
|
retry_delay = 0.5 # start with 0.5s
|
|
166
158
|
max_delay = 30 # cap at 30s
|
|
167
159
|
while not self._shutting_down:
|
|
@@ -171,7 +163,7 @@ class WebServer:
|
|
|
171
163
|
except asyncio.CancelledError:
|
|
172
164
|
return
|
|
173
165
|
except Exception as e:
|
|
174
|
-
print(f"[web]
|
|
166
|
+
print(f"[web] Kernel connection error: {e}, retrying in {retry_delay:.1f}s")
|
|
175
167
|
self._ws = None
|
|
176
168
|
if self._shutting_down:
|
|
177
169
|
return
|
|
@@ -179,35 +171,52 @@ class WebServer:
|
|
|
179
171
|
retry_delay = min(retry_delay * 2, max_delay) # exponential backoff
|
|
180
172
|
|
|
181
173
|
async def _ws_connect(self):
|
|
182
|
-
"""Single WebSocket session: connect, subscribe, receive loop."""
|
|
183
|
-
url = f"{self.
|
|
184
|
-
print(f"[web] WS connecting to
|
|
185
|
-
async with websockets.connect(url, open_timeout=
|
|
174
|
+
"""Single WebSocket session: connect, register, subscribe, receive loop."""
|
|
175
|
+
url = f"ws://127.0.0.1:{self.kernel_port}/ws?token={self.token}&id=web"
|
|
176
|
+
print(f"[web] WS connecting to Kernel")
|
|
177
|
+
async with websockets.connect(url, open_timeout=5, ping_interval=None, ping_timeout=None, close_timeout=10) as ws:
|
|
186
178
|
self._ws = ws
|
|
187
179
|
elapsed = time.monotonic() - self.boot_t0 if self.boot_t0 else 0
|
|
188
180
|
elapsed_str = f" ({elapsed:.1f}s)" if elapsed else ""
|
|
189
|
-
print(f"[web] Connected to
|
|
181
|
+
print(f"[web] Connected to Kernel{elapsed_str}")
|
|
182
|
+
|
|
183
|
+
# Subscribe to events
|
|
184
|
+
await self._rpc_call(ws, "event.subscribe", {
|
|
185
|
+
"events": [
|
|
186
|
+
"module.started",
|
|
187
|
+
"module.stopped",
|
|
188
|
+
"module.shutdown",
|
|
189
|
+
],
|
|
190
|
+
})
|
|
190
191
|
|
|
191
|
-
#
|
|
192
|
-
await
|
|
193
|
-
"
|
|
194
|
-
"
|
|
195
|
-
|
|
192
|
+
# Register to Kernel Registry via RPC
|
|
193
|
+
await self._rpc_call(ws, "registry.register", {
|
|
194
|
+
"module_id": "web",
|
|
195
|
+
"module_type": "service",
|
|
196
|
+
"api_endpoint": f"http://127.0.0.1:{self.port}",
|
|
197
|
+
"health_endpoint": "/health",
|
|
198
|
+
"events_publish": {
|
|
199
|
+
"web.test": {"description": "Test event from web module"},
|
|
200
|
+
"web.started": {"description": "Web UI started with access URL"},
|
|
201
|
+
},
|
|
202
|
+
"events_subscribe": [
|
|
203
|
+
"module.started",
|
|
204
|
+
"module.stopped",
|
|
205
|
+
"module.shutdown",
|
|
206
|
+
],
|
|
207
|
+
})
|
|
208
|
+
print(f"[web] Registered to Kernel{elapsed_str}")
|
|
196
209
|
|
|
197
210
|
# Send module.ready (once) so Launcher knows we're up
|
|
198
211
|
if not self._ready_sent:
|
|
199
|
-
|
|
200
|
-
"type": "event",
|
|
212
|
+
await self._rpc_call(ws, "event.publish", {
|
|
201
213
|
"event_id": str(uuid.uuid4()),
|
|
202
214
|
"event": "module.ready",
|
|
203
|
-
"source": "web",
|
|
204
|
-
"timestamp": datetime.now(timezone.utc).isoformat(),
|
|
205
215
|
"data": {
|
|
206
216
|
"module_id": "web",
|
|
207
217
|
"graceful_shutdown": True,
|
|
208
218
|
},
|
|
209
|
-
}
|
|
210
|
-
await ws.send(json.dumps(ready_msg))
|
|
219
|
+
})
|
|
211
220
|
self._ready_sent = True
|
|
212
221
|
elapsed = time.monotonic() - self.boot_t0 if self.boot_t0 else 0
|
|
213
222
|
elapsed_str = f" ({elapsed:.1f}s)" if elapsed else ""
|
|
@@ -225,7 +234,6 @@ class WebServer:
|
|
|
225
234
|
"port": self.port,
|
|
226
235
|
},
|
|
227
236
|
})
|
|
228
|
-
print(f"[web] \033[32m✓ Web UI ready: {access_url}\033[0m")
|
|
229
237
|
|
|
230
238
|
# Receive loop
|
|
231
239
|
async for raw in ws:
|
|
@@ -235,22 +243,84 @@ class WebServer:
|
|
|
235
243
|
continue
|
|
236
244
|
|
|
237
245
|
try:
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
elif msg_type == "error":
|
|
249
|
-
print(f"[web] Event Hub error: {msg.get('message')}")
|
|
246
|
+
has_method = "method" in msg
|
|
247
|
+
has_id = "id" in msg
|
|
248
|
+
|
|
249
|
+
if has_method and not has_id:
|
|
250
|
+
# Event Notification
|
|
251
|
+
await self._handle_event_notification(msg)
|
|
252
|
+
elif has_method and has_id:
|
|
253
|
+
# Incoming RPC request
|
|
254
|
+
await self._handle_rpc_request(ws, msg)
|
|
255
|
+
# Ignore RPC responses (we don't await them in this simple impl)
|
|
250
256
|
except Exception as e:
|
|
251
|
-
print(f"[web]
|
|
257
|
+
print(f"[web] 消息处理异常(已忽略): {e}")
|
|
258
|
+
|
|
259
|
+
async def _rpc_call(self, ws, method: str, params: dict = None):
|
|
260
|
+
"""Send a JSON-RPC 2.0 request (fire-and-forget, no response awaited)."""
|
|
261
|
+
msg = {"jsonrpc": "2.0", "id": str(uuid.uuid4()), "method": method}
|
|
262
|
+
if params:
|
|
263
|
+
msg["params"] = params
|
|
264
|
+
await ws.send(json.dumps(msg))
|
|
265
|
+
|
|
266
|
+
async def _handle_event_notification(self, msg: dict):
|
|
267
|
+
"""Handle an event notification (JSON-RPC 2.0 Notification with method='event')."""
|
|
268
|
+
params = msg.get("params", {})
|
|
269
|
+
event_type = params.get("event", "")
|
|
270
|
+
data = params.get("data", {})
|
|
271
|
+
|
|
272
|
+
# Special handling for module.shutdown targeting web
|
|
273
|
+
if event_type == "module.shutdown" and data.get("module_id") == "web":
|
|
274
|
+
await self._handle_shutdown()
|
|
275
|
+
return
|
|
276
|
+
|
|
277
|
+
# Log other events
|
|
278
|
+
print(f"[web] Event received: {event_type}")
|
|
279
|
+
|
|
280
|
+
async def _handle_rpc_request(self, ws, msg: dict):
|
|
281
|
+
"""Handle an incoming RPC request (web.* methods)."""
|
|
282
|
+
rpc_id = msg.get("id", "")
|
|
283
|
+
method = msg.get("method", "")
|
|
284
|
+
params = msg.get("params", {})
|
|
285
|
+
|
|
286
|
+
handlers = {
|
|
287
|
+
"health": lambda p: self._rpc_health(),
|
|
288
|
+
"status": lambda p: self._rpc_status(),
|
|
289
|
+
}
|
|
290
|
+
handler = handlers.get(method)
|
|
291
|
+
if handler:
|
|
292
|
+
try:
|
|
293
|
+
result = await handler(params)
|
|
294
|
+
await ws.send(json.dumps({"jsonrpc": "2.0", "id": rpc_id, "result": result}))
|
|
295
|
+
except Exception as e:
|
|
296
|
+
await ws.send(json.dumps({
|
|
297
|
+
"jsonrpc": "2.0", "id": rpc_id,
|
|
298
|
+
"error": {"code": -32603, "message": str(e)},
|
|
299
|
+
}))
|
|
300
|
+
else:
|
|
301
|
+
await ws.send(json.dumps({
|
|
302
|
+
"jsonrpc": "2.0", "id": rpc_id,
|
|
303
|
+
"error": {"code": -32601, "message": f"Method not found: {method}"},
|
|
304
|
+
}))
|
|
305
|
+
|
|
306
|
+
async def _rpc_health(self) -> dict:
|
|
307
|
+
"""RPC handler for web.health."""
|
|
308
|
+
return {
|
|
309
|
+
"status": "healthy",
|
|
310
|
+
"details": {
|
|
311
|
+
"uptime_seconds": round(time.time() - self._start_time),
|
|
312
|
+
},
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
async def _rpc_status(self) -> dict:
|
|
316
|
+
"""RPC handler for web.status."""
|
|
317
|
+
return {
|
|
318
|
+
"module": "web",
|
|
319
|
+
"status": "running",
|
|
320
|
+
"uptime_seconds": round(time.time() - self._start_time),
|
|
321
|
+
}
|
|
252
322
|
|
|
253
|
-
async def _handle_shutdown(self
|
|
323
|
+
async def _handle_shutdown(self):
|
|
254
324
|
"""Handle module.shutdown: ack → cleanup → ready → exit."""
|
|
255
325
|
print("[web] Received module.shutdown")
|
|
256
326
|
self._shutting_down = True
|
|
@@ -263,8 +333,6 @@ class WebServer:
|
|
|
263
333
|
print("[web] shutdown ack sent")
|
|
264
334
|
|
|
265
335
|
# Step 2: Cleanup (cancel background tasks)
|
|
266
|
-
if self._heartbeat_task:
|
|
267
|
-
self._heartbeat_task.cancel()
|
|
268
336
|
if self._test_task:
|
|
269
337
|
self._test_task.cancel()
|
|
270
338
|
if self.bt_manager:
|
|
@@ -282,39 +350,14 @@ class WebServer:
|
|
|
282
350
|
self._uvicorn_server.should_exit = True
|
|
283
351
|
|
|
284
352
|
async def _publish_event(self, event: dict):
|
|
285
|
-
"""Publish an event
|
|
353
|
+
"""Publish an event via RPC event.publish."""
|
|
286
354
|
if not self._ws:
|
|
287
355
|
return
|
|
288
|
-
|
|
289
|
-
"type": "event",
|
|
356
|
+
await self._rpc_call(self._ws, "event.publish", {
|
|
290
357
|
"event_id": str(uuid.uuid4()),
|
|
291
358
|
"event": event.get("event", ""),
|
|
292
|
-
"source": "web",
|
|
293
|
-
"timestamp": datetime.now(timezone.utc).isoformat(),
|
|
294
359
|
"data": event.get("data", {}),
|
|
295
|
-
}
|
|
296
|
-
try:
|
|
297
|
-
await self._ws.send(json.dumps(msg))
|
|
298
|
-
except Exception as e:
|
|
299
|
-
print(f"[web] Failed to publish event: {e}")
|
|
300
|
-
|
|
301
|
-
# ── Heartbeat to Registry ──
|
|
302
|
-
|
|
303
|
-
async def _heartbeat_loop(self):
|
|
304
|
-
"""Send heartbeat to Registry every 30 seconds."""
|
|
305
|
-
while True:
|
|
306
|
-
await asyncio.sleep(30)
|
|
307
|
-
try:
|
|
308
|
-
async with httpx.AsyncClient() as client:
|
|
309
|
-
await client.post(
|
|
310
|
-
f"{self.registry_url}/modules",
|
|
311
|
-
json={"action": "heartbeat", "module_id": "web"},
|
|
312
|
-
headers={"Authorization": f"Bearer {self.token}"},
|
|
313
|
-
timeout=5,
|
|
314
|
-
)
|
|
315
|
-
print("[web] heartbeat sent")
|
|
316
|
-
except Exception:
|
|
317
|
-
pass
|
|
360
|
+
})
|
|
318
361
|
|
|
319
362
|
# ── Test event loop ──
|
|
320
363
|
|