@agentunion/kite 1.3.0 → 1.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +87 -1
- package/extensions/agents/assistant/server.py +30 -12
- package/extensions/channels/acp_channel/server.py +30 -12
- package/extensions/services/backup/entry.py +123 -65
- package/extensions/services/model_service/entry.py +123 -65
- package/extensions/services/watchdog/entry.py +171 -80
- package/extensions/services/watchdog/monitor.py +112 -6
- package/extensions/services/web/routes/routes_modules.py +249 -0
- package/extensions/services/web/routes/schemas.py +22 -0
- package/extensions/services/web/server.py +37 -14
- package/extensions/services/web/static/css/style.css +97 -0
- package/extensions/services/web/static/index.html +105 -2
- package/extensions/services/web/static/js/app.js +288 -1
- package/kernel/event_hub.py +21 -3
- package/kernel/registry_store.py +22 -5
- package/kernel/rpc_router.py +15 -5
- package/kernel/server.py +75 -5
- package/launcher/count_lines.py +34 -0
- package/launcher/entry.py +92 -14
- package/launcher/process_manager.py +12 -1
- package/package.json +1 -1
|
@@ -263,10 +263,19 @@ def _read_stdin_kite_message(expected_type: str, timeout: float = 10) -> dict |
|
|
|
263
263
|
|
|
264
264
|
# Global WS reference for publish_event callback
|
|
265
265
|
_ws_global = None
|
|
266
|
+
_shutting_down = False
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
def _is_auth_failure(e: Exception) -> bool:
|
|
270
|
+
"""Check if a WebSocket exception indicates authentication failure."""
|
|
271
|
+
if hasattr(e, 'rcvd') and e.rcvd is not None:
|
|
272
|
+
code = e.rcvd.code if hasattr(e.rcvd, 'code') else 0
|
|
273
|
+
return code in (4001, 4003)
|
|
274
|
+
return False
|
|
266
275
|
|
|
267
276
|
|
|
268
277
|
async def main():
|
|
269
|
-
global _ws_global
|
|
278
|
+
global _ws_global, _shutting_down
|
|
270
279
|
# Initialize log file paths
|
|
271
280
|
global _log_dir, _log_latest_path, _crash_log_path
|
|
272
281
|
module_data = os.environ.get("KITE_MODULE_DATA")
|
|
@@ -318,41 +327,84 @@ async def main():
|
|
|
318
327
|
|
|
319
328
|
print(f"[model_service] Token received ({len(token)} chars), kernel port: {kernel_port} ({_fmt_elapsed(_t0)})")
|
|
320
329
|
|
|
321
|
-
#
|
|
330
|
+
# Start reconnect loop
|
|
331
|
+
await _ws_loop(token, kernel_port, _t0)
|
|
332
|
+
|
|
333
|
+
|
|
334
|
+
async def _ws_loop(token: str, kernel_port: int, _t0: float):
|
|
335
|
+
"""Connect to Kernel with exponential backoff reconnection."""
|
|
336
|
+
global _shutting_down
|
|
337
|
+
retry_delay = 0.3
|
|
338
|
+
max_delay = 5.0
|
|
339
|
+
max_retries = 10
|
|
340
|
+
attempt = 0
|
|
341
|
+
while not _shutting_down:
|
|
342
|
+
try:
|
|
343
|
+
await _ws_connect(token, kernel_port, _t0)
|
|
344
|
+
retry_delay = 0.3
|
|
345
|
+
attempt = 0
|
|
346
|
+
except asyncio.CancelledError:
|
|
347
|
+
return
|
|
348
|
+
except Exception as e:
|
|
349
|
+
attempt += 1
|
|
350
|
+
if _is_auth_failure(e):
|
|
351
|
+
print(f"[model_service] Kernel 认证失败,退出")
|
|
352
|
+
sys.exit(1)
|
|
353
|
+
if attempt >= max_retries:
|
|
354
|
+
print(f"[model_service] 重连失败 {max_retries} 次,退出")
|
|
355
|
+
sys.exit(1)
|
|
356
|
+
_write_crash(type(e), e, e.__traceback__, severity="error", handled=True)
|
|
357
|
+
print(f"[model_service] 连接错误: {e}, {retry_delay:.1f}s 后重试 ({attempt}/{max_retries})")
|
|
358
|
+
_ws_global_clear()
|
|
359
|
+
if _shutting_down:
|
|
360
|
+
return
|
|
361
|
+
await asyncio.sleep(retry_delay)
|
|
362
|
+
retry_delay = min(retry_delay * 2, max_delay)
|
|
363
|
+
|
|
364
|
+
|
|
365
|
+
def _ws_global_clear():
|
|
366
|
+
global _ws_global
|
|
367
|
+
_ws_global = None
|
|
368
|
+
|
|
369
|
+
|
|
370
|
+
async def _ws_connect(token: str, kernel_port: int, _t0: float):
|
|
371
|
+
"""Single WebSocket session: connect → subscribe → register → ready → receive loop."""
|
|
372
|
+
global _ws_global
|
|
373
|
+
|
|
322
374
|
ws_url = f"ws://127.0.0.1:{kernel_port}/ws?token={token}&id=model_service"
|
|
323
375
|
print(f"[model_service] Connecting to Kernel: {ws_url}")
|
|
324
376
|
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
"
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
"
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
"
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
print(f"[model_service] Registered to Kernel ({_fmt_elapsed(_t0)})")
|
|
377
|
+
async with websockets.connect(ws_url, open_timeout=5, ping_interval=None, ping_timeout=None, close_timeout=10) as ws:
|
|
378
|
+
_ws_global = ws
|
|
379
|
+
print(f"[model_service] Connected to Kernel ({_fmt_elapsed(_t0)})")
|
|
380
|
+
|
|
381
|
+
# Subscribe to events
|
|
382
|
+
await _rpc_call(ws, "event.subscribe", {
|
|
383
|
+
"events": [
|
|
384
|
+
"module.started",
|
|
385
|
+
"module.stopped",
|
|
386
|
+
"module.shutdown",
|
|
387
|
+
],
|
|
388
|
+
})
|
|
389
|
+
print(f"[model_service] Subscribed to events ({_fmt_elapsed(_t0)})")
|
|
390
|
+
|
|
391
|
+
# Register to Kernel Registry via RPC
|
|
392
|
+
await _rpc_call(ws, "registry.register", {
|
|
393
|
+
"module_id": "model_service",
|
|
394
|
+
"module_type": "service",
|
|
395
|
+
"events_publish": {
|
|
396
|
+
"model_service.test": {"description": "Test event from model_service module"},
|
|
397
|
+
},
|
|
398
|
+
"events_subscribe": [
|
|
399
|
+
"module.started",
|
|
400
|
+
"module.stopped",
|
|
401
|
+
"module.shutdown",
|
|
402
|
+
],
|
|
403
|
+
})
|
|
404
|
+
print(f"[model_service] Registered to Kernel ({_fmt_elapsed(_t0)})")
|
|
354
405
|
|
|
355
|
-
|
|
406
|
+
# Publish module.ready (every reconnect)
|
|
407
|
+
if not _shutting_down:
|
|
356
408
|
await _rpc_call(ws, "event.publish", {
|
|
357
409
|
"event_id": str(uuid.uuid4()),
|
|
358
410
|
"event": "module.ready",
|
|
@@ -363,34 +415,29 @@ async def main():
|
|
|
363
415
|
})
|
|
364
416
|
print(f"[model_service] module.ready published ({_fmt_elapsed(_t0)})")
|
|
365
417
|
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
except Exception as e:
|
|
391
|
-
_write_crash(type(e), e, e.__traceback__, severity="critical", handled=True)
|
|
392
|
-
_print_crash_summary(type(e), e.__traceback__)
|
|
393
|
-
sys.exit(1)
|
|
418
|
+
# Start test event loop in background
|
|
419
|
+
test_task = asyncio.create_task(_test_event_loop(ws))
|
|
420
|
+
|
|
421
|
+
# Message loop: handle incoming RPC + events
|
|
422
|
+
async for raw in ws:
|
|
423
|
+
try:
|
|
424
|
+
msg = json.loads(raw)
|
|
425
|
+
except (json.JSONDecodeError, TypeError):
|
|
426
|
+
continue
|
|
427
|
+
|
|
428
|
+
try:
|
|
429
|
+
has_method = "method" in msg
|
|
430
|
+
has_id = "id" in msg
|
|
431
|
+
|
|
432
|
+
if has_method and not has_id:
|
|
433
|
+
# Event Notification
|
|
434
|
+
await _handle_event_notification(msg)
|
|
435
|
+
elif has_method and has_id:
|
|
436
|
+
# Incoming RPC request
|
|
437
|
+
await _handle_rpc_request(ws, msg)
|
|
438
|
+
# Ignore RPC responses (we don't await them in this simple impl)
|
|
439
|
+
except Exception as e:
|
|
440
|
+
print(f"[model_service] 消息处理异常(已忽略): {e}")
|
|
394
441
|
|
|
395
442
|
|
|
396
443
|
async def _rpc_call(ws, method: str, params: dict = None):
|
|
@@ -416,10 +463,14 @@ async def _handle_event_notification(msg: dict):
|
|
|
416
463
|
event_type = params.get("event", "")
|
|
417
464
|
data = params.get("data", {})
|
|
418
465
|
|
|
419
|
-
# Special handling for module.shutdown
|
|
420
|
-
if event_type == "module.shutdown"
|
|
421
|
-
|
|
422
|
-
|
|
466
|
+
# Special handling for module.shutdown
|
|
467
|
+
if event_type == "module.shutdown":
|
|
468
|
+
target = data.get("module_id", "")
|
|
469
|
+
reason = data.get("reason", "")
|
|
470
|
+
# Handle both targeted shutdown (module_id == "model_service") and broadcast shutdown (no module_id or launcher_lost)
|
|
471
|
+
if target == "model_service" or not target or reason == "launcher_lost":
|
|
472
|
+
await _handle_shutdown()
|
|
473
|
+
return
|
|
423
474
|
|
|
424
475
|
# Log other events
|
|
425
476
|
print(f"[model_service] Event received: {event_type}")
|
|
@@ -472,8 +523,15 @@ async def _rpc_status() -> dict:
|
|
|
472
523
|
|
|
473
524
|
|
|
474
525
|
async def _handle_shutdown():
|
|
475
|
-
"""Handle module.shutdown event — ack
|
|
526
|
+
"""Handle module.shutdown event — exiting → ack → cleanup → ready → exit."""
|
|
527
|
+
global _shutting_down
|
|
476
528
|
print("[model_service] Received shutdown request")
|
|
529
|
+
_shutting_down = True
|
|
530
|
+
# Step 0: Send module.exiting
|
|
531
|
+
await _publish_event(_ws_global, {
|
|
532
|
+
"event": "module.exiting",
|
|
533
|
+
"data": {"module_id": "model_service", "action": "none"},
|
|
534
|
+
})
|
|
477
535
|
# Step 1: Send ack
|
|
478
536
|
await _publish_event(_ws_global, {
|
|
479
537
|
"event": "module.shutdown.ack",
|
|
@@ -265,11 +265,25 @@ def _read_stdin_kite_message(expected_type: str, timeout: float = 10) -> dict |
|
|
|
265
265
|
|
|
266
266
|
# Global WS reference for publish_event callback
|
|
267
267
|
_ws_global = None
|
|
268
|
+
_shutting_down = False
|
|
269
|
+
_monitor = None
|
|
270
|
+
_monitor_task = None
|
|
268
271
|
|
|
272
|
+
# RPC request-response infrastructure
|
|
273
|
+
_rpc_waiters: dict[str, asyncio.Event] = {} # rpc_id -> Event
|
|
274
|
+
_rpc_results: dict[str, dict] = {} # rpc_id -> response dict
|
|
275
|
+
|
|
276
|
+
|
|
277
|
+
def _is_auth_failure(e: Exception) -> bool:
|
|
278
|
+
"""Check if a WebSocket exception indicates authentication failure."""
|
|
279
|
+
if hasattr(e, 'rcvd') and e.rcvd is not None:
|
|
280
|
+
code = e.rcvd.code if hasattr(e.rcvd, 'code') else 0
|
|
281
|
+
return code in (4001, 4003)
|
|
282
|
+
return False
|
|
269
283
|
|
|
270
284
|
|
|
271
285
|
async def main():
|
|
272
|
-
global _ws_global
|
|
286
|
+
global _ws_global, _shutting_down, _monitor
|
|
273
287
|
# Initialize log file paths
|
|
274
288
|
global _log_dir, _log_latest_path, _crash_log_path
|
|
275
289
|
module_data = os.environ.get("KITE_MODULE_DATA")
|
|
@@ -321,57 +335,104 @@ async def main():
|
|
|
321
335
|
|
|
322
336
|
print(f"[watchdog] Token received ({len(token)} chars), kernel port: {kernel_port} ({_fmt_elapsed(_t0)})")
|
|
323
337
|
|
|
324
|
-
#
|
|
325
|
-
|
|
326
|
-
|
|
338
|
+
# Create monitor (once, persists across reconnects)
|
|
339
|
+
_monitor = HealthMonitor(
|
|
340
|
+
own_token=token,
|
|
341
|
+
kernel_port=kernel_port,
|
|
342
|
+
)
|
|
327
343
|
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
_ws_global = ws
|
|
331
|
-
print(f"[watchdog] Connected to Kernel ({_fmt_elapsed(_t0)})")
|
|
332
|
-
|
|
333
|
-
# Subscribe to events
|
|
334
|
-
await _rpc_call(ws, "event.subscribe", {
|
|
335
|
-
"events": [
|
|
336
|
-
"system.ready",
|
|
337
|
-
"module.started",
|
|
338
|
-
"module.stopped",
|
|
339
|
-
"module.exiting",
|
|
340
|
-
"module.ready",
|
|
341
|
-
"module.shutdown",
|
|
342
|
-
],
|
|
343
|
-
})
|
|
344
|
-
print(f"[watchdog] Subscribed to events ({_fmt_elapsed(_t0)})")
|
|
345
|
-
|
|
346
|
-
# Register to Kernel Registry via RPC
|
|
347
|
-
await _rpc_call(ws, "registry.register", {
|
|
348
|
-
"module_id": "watchdog",
|
|
349
|
-
"module_type": "service",
|
|
350
|
-
"events_publish": {
|
|
351
|
-
"watchdog.module.unhealthy": {},
|
|
352
|
-
"watchdog.module.recovered": {},
|
|
353
|
-
"watchdog.alert": {},
|
|
354
|
-
},
|
|
355
|
-
"events_subscribe": [
|
|
356
|
-
"system.ready",
|
|
357
|
-
"module.started",
|
|
358
|
-
"module.stopped",
|
|
359
|
-
"module.exiting",
|
|
360
|
-
"module.ready",
|
|
361
|
-
"module.shutdown",
|
|
362
|
-
],
|
|
363
|
-
})
|
|
364
|
-
print(f"[watchdog] Registered to Kernel ({_fmt_elapsed(_t0)})")
|
|
344
|
+
# Start reconnect loop
|
|
345
|
+
await _ws_loop(token, kernel_port, _t0)
|
|
365
346
|
|
|
366
|
-
# Create monitor with RPC callback
|
|
367
|
-
monitor = HealthMonitor(
|
|
368
|
-
own_token=token,
|
|
369
|
-
kernel_port=kernel_port,
|
|
370
|
-
)
|
|
371
|
-
monitor.publish_event = lambda event: asyncio.create_task(_publish_event(ws, event))
|
|
372
|
-
monitor.rpc_call = lambda method, params: _rpc_call(ws, method, params)
|
|
373
347
|
|
|
374
|
-
|
|
348
|
+
async def _ws_loop(token: str, kernel_port: int, _t0: float):
|
|
349
|
+
"""Connect to Kernel with exponential backoff reconnection."""
|
|
350
|
+
global _shutting_down
|
|
351
|
+
retry_delay = 0.3
|
|
352
|
+
max_delay = 5.0
|
|
353
|
+
max_retries = 10
|
|
354
|
+
attempt = 0
|
|
355
|
+
while not _shutting_down:
|
|
356
|
+
try:
|
|
357
|
+
await _ws_connect(token, kernel_port, _t0)
|
|
358
|
+
retry_delay = 0.3
|
|
359
|
+
attempt = 0
|
|
360
|
+
except asyncio.CancelledError:
|
|
361
|
+
return
|
|
362
|
+
except Exception as e:
|
|
363
|
+
attempt += 1
|
|
364
|
+
if _is_auth_failure(e):
|
|
365
|
+
print(f"[watchdog] Kernel 认证失败,退出")
|
|
366
|
+
sys.exit(1)
|
|
367
|
+
if attempt >= max_retries:
|
|
368
|
+
print(f"[watchdog] 重连失败 {max_retries} 次,退出")
|
|
369
|
+
sys.exit(1)
|
|
370
|
+
_write_crash(type(e), e, e.__traceback__, severity="error", handled=True)
|
|
371
|
+
print(f"[watchdog] 连接错误: {e}, {retry_delay:.1f}s 后重试 ({attempt}/{max_retries})")
|
|
372
|
+
_ws_global_clear()
|
|
373
|
+
if _shutting_down:
|
|
374
|
+
return
|
|
375
|
+
await asyncio.sleep(retry_delay)
|
|
376
|
+
retry_delay = min(retry_delay * 2, max_delay)
|
|
377
|
+
|
|
378
|
+
|
|
379
|
+
def _ws_global_clear():
|
|
380
|
+
global _ws_global
|
|
381
|
+
_ws_global = None
|
|
382
|
+
|
|
383
|
+
|
|
384
|
+
async def _ws_connect(token: str, kernel_port: int, _t0: float):
|
|
385
|
+
"""Single WebSocket session: connect → subscribe → register → ready → receive loop."""
|
|
386
|
+
global _ws_global, _monitor, _monitor_task
|
|
387
|
+
|
|
388
|
+
ws_url = f"ws://127.0.0.1:{kernel_port}/ws?token={token}&id=watchdog"
|
|
389
|
+
print(f"[watchdog] Connecting to Kernel: {ws_url}")
|
|
390
|
+
|
|
391
|
+
async with websockets.connect(ws_url, open_timeout=5, ping_interval=None, ping_timeout=None, close_timeout=10) as ws:
|
|
392
|
+
_ws_global = ws
|
|
393
|
+
print(f"[watchdog] Connected to Kernel ({_fmt_elapsed(_t0)})")
|
|
394
|
+
|
|
395
|
+
# Subscribe to events
|
|
396
|
+
await _rpc_call(ws, "event.subscribe", {
|
|
397
|
+
"events": [
|
|
398
|
+
"system.ready",
|
|
399
|
+
"module.started",
|
|
400
|
+
"module.stopped",
|
|
401
|
+
"module.exiting",
|
|
402
|
+
"module.ready",
|
|
403
|
+
"module.shutdown",
|
|
404
|
+
"module.offline",
|
|
405
|
+
],
|
|
406
|
+
})
|
|
407
|
+
print(f"[watchdog] Subscribed to events ({_fmt_elapsed(_t0)})")
|
|
408
|
+
|
|
409
|
+
# Register to Kernel Registry via RPC
|
|
410
|
+
await _rpc_call(ws, "registry.register", {
|
|
411
|
+
"module_id": "watchdog",
|
|
412
|
+
"module_type": "service",
|
|
413
|
+
"events_publish": {
|
|
414
|
+
"watchdog.module.unhealthy": {},
|
|
415
|
+
"watchdog.module.recovered": {},
|
|
416
|
+
"watchdog.alert": {},
|
|
417
|
+
},
|
|
418
|
+
"events_subscribe": [
|
|
419
|
+
"system.ready",
|
|
420
|
+
"module.started",
|
|
421
|
+
"module.stopped",
|
|
422
|
+
"module.exiting",
|
|
423
|
+
"module.ready",
|
|
424
|
+
"module.shutdown",
|
|
425
|
+
"module.offline",
|
|
426
|
+
],
|
|
427
|
+
})
|
|
428
|
+
print(f"[watchdog] Registered to Kernel ({_fmt_elapsed(_t0)})")
|
|
429
|
+
|
|
430
|
+
# Set up monitor callbacks (reconnect-safe)
|
|
431
|
+
_monitor.publish_event = lambda event: asyncio.create_task(_publish_event(ws, event))
|
|
432
|
+
_monitor.rpc_call = lambda method, params: _rpc_call_with_response(ws, method, params)
|
|
433
|
+
|
|
434
|
+
# Publish module.ready (every reconnect)
|
|
435
|
+
if not _shutting_down:
|
|
375
436
|
await _rpc_call(ws, "event.publish", {
|
|
376
437
|
"event_id": str(uuid.uuid4()),
|
|
377
438
|
"event": "module.ready",
|
|
@@ -382,34 +443,35 @@ async def main():
|
|
|
382
443
|
})
|
|
383
444
|
print(f"[watchdog] module.ready published ({_fmt_elapsed(_t0)})")
|
|
384
445
|
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
446
|
+
# Start monitor loop if not already running
|
|
447
|
+
if _monitor_task is None or _monitor_task.done():
|
|
448
|
+
_monitor_task = asyncio.create_task(_monitor.run())
|
|
449
|
+
|
|
450
|
+
# Message loop: handle incoming RPC + events
|
|
451
|
+
async for raw in ws:
|
|
452
|
+
try:
|
|
453
|
+
msg = json.loads(raw)
|
|
454
|
+
except (json.JSONDecodeError, TypeError):
|
|
455
|
+
continue
|
|
456
|
+
|
|
457
|
+
try:
|
|
458
|
+
has_method = "method" in msg
|
|
459
|
+
has_id = "id" in msg
|
|
460
|
+
|
|
461
|
+
if has_method and not has_id:
|
|
462
|
+
# Event Notification
|
|
463
|
+
await _handle_event_notification(msg, _monitor)
|
|
464
|
+
elif has_method and has_id:
|
|
465
|
+
# Incoming RPC request
|
|
466
|
+
await _handle_rpc_request(ws, msg, _monitor)
|
|
467
|
+
elif has_id and not has_method:
|
|
468
|
+
# RPC response — route to waiter
|
|
469
|
+
msg_id = msg["id"]
|
|
470
|
+
if msg_id in _rpc_waiters:
|
|
471
|
+
_rpc_results[msg_id] = msg
|
|
472
|
+
_rpc_waiters[msg_id].set()
|
|
473
|
+
except Exception as e:
|
|
474
|
+
print(f"[watchdog] 消息处理异常(已忽略): {e}")
|
|
413
475
|
|
|
414
476
|
|
|
415
477
|
|
|
@@ -421,6 +483,28 @@ async def _rpc_call(ws, method: str, params: dict = None):
|
|
|
421
483
|
await ws.send(json.dumps(msg))
|
|
422
484
|
|
|
423
485
|
|
|
486
|
+
async def _rpc_call_with_response(ws, method: str, params: dict = None, timeout: float = 5) -> dict:
|
|
487
|
+
"""Send a JSON-RPC 2.0 request and await the response."""
|
|
488
|
+
rpc_id = str(uuid.uuid4())
|
|
489
|
+
msg = {"jsonrpc": "2.0", "id": rpc_id, "method": method}
|
|
490
|
+
if params:
|
|
491
|
+
msg["params"] = params
|
|
492
|
+
|
|
493
|
+
evt = asyncio.Event()
|
|
494
|
+
_rpc_waiters[rpc_id] = evt
|
|
495
|
+
|
|
496
|
+
await ws.send(json.dumps(msg))
|
|
497
|
+
|
|
498
|
+
try:
|
|
499
|
+
await asyncio.wait_for(evt.wait(), timeout=timeout)
|
|
500
|
+
return _rpc_results.pop(rpc_id, {})
|
|
501
|
+
except asyncio.TimeoutError:
|
|
502
|
+
return {"error": {"code": -32000, "message": f"RPC timeout: {method}"}}
|
|
503
|
+
finally:
|
|
504
|
+
_rpc_waiters.pop(rpc_id, None)
|
|
505
|
+
_rpc_results.pop(rpc_id, None)
|
|
506
|
+
|
|
507
|
+
|
|
424
508
|
async def _publish_event(ws, event: dict):
|
|
425
509
|
"""Publish an event via RPC event.publish."""
|
|
426
510
|
await _rpc_call(ws, "event.publish", {
|
|
@@ -441,8 +525,8 @@ async def _handle_event_notification(msg: dict, monitor: HealthMonitor):
|
|
|
441
525
|
await _handle_shutdown(monitor)
|
|
442
526
|
return
|
|
443
527
|
|
|
444
|
-
# Forward to monitor
|
|
445
|
-
await monitor.handle_event(
|
|
528
|
+
# Forward to monitor (extract params from JSON-RPC notification)
|
|
529
|
+
await monitor.handle_event(params)
|
|
446
530
|
|
|
447
531
|
|
|
448
532
|
async def _handle_rpc_request(ws, msg: dict, monitor: HealthMonitor):
|
|
@@ -489,8 +573,15 @@ async def _rpc_status(monitor: HealthMonitor) -> dict:
|
|
|
489
573
|
|
|
490
574
|
|
|
491
575
|
async def _handle_shutdown(monitor: HealthMonitor):
|
|
492
|
-
"""Handle module.shutdown event — ack
|
|
576
|
+
"""Handle module.shutdown event — exiting → ack → cleanup → ready → exit."""
|
|
577
|
+
global _shutting_down
|
|
493
578
|
print("[watchdog] Received shutdown request")
|
|
579
|
+
_shutting_down = True
|
|
580
|
+
# Step 0: Send module.exiting
|
|
581
|
+
await _publish_event(_ws_global, {
|
|
582
|
+
"event": "module.exiting",
|
|
583
|
+
"data": {"module_id": "watchdog", "action": "none"},
|
|
584
|
+
})
|
|
494
585
|
# Step 1: Send ack
|
|
495
586
|
await _publish_event(_ws_global, {
|
|
496
587
|
"event": "module.shutdown.ack",
|