@agentunion/kite 1.3.0 → 1.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +87 -1
- package/extensions/agents/assistant/server.py +30 -12
- package/extensions/channels/acp_channel/server.py +30 -12
- package/extensions/services/backup/entry.py +123 -65
- package/extensions/services/model_service/entry.py +123 -65
- package/extensions/services/watchdog/entry.py +171 -80
- package/extensions/services/watchdog/monitor.py +112 -6
- package/extensions/services/web/routes/routes_modules.py +249 -0
- package/extensions/services/web/routes/schemas.py +22 -0
- package/extensions/services/web/server.py +37 -14
- package/extensions/services/web/static/css/style.css +97 -0
- package/extensions/services/web/static/index.html +105 -2
- package/extensions/services/web/static/js/app.js +288 -1
- package/kernel/event_hub.py +21 -3
- package/kernel/registry_store.py +22 -5
- package/kernel/rpc_router.py +15 -5
- package/kernel/server.py +75 -5
- package/launcher/count_lines.py +34 -0
- package/launcher/entry.py +92 -14
- package/launcher/process_manager.py +12 -1
- package/package.json +1 -1
package/kernel/server.py
CHANGED
|
@@ -64,6 +64,11 @@ class KernelServer:
|
|
|
64
64
|
self._launcher_subscribed = False
|
|
65
65
|
self._ready_published = False
|
|
66
66
|
|
|
67
|
+
# Debounce timers for disconnected modules (module_id -> asyncio.Task)
|
|
68
|
+
self._debounce_tasks: dict[str, asyncio.Task] = {}
|
|
69
|
+
# Launcher loss timer (35s after launcher offline)
|
|
70
|
+
self._launcher_loss_task: asyncio.Task | None = None
|
|
71
|
+
|
|
67
72
|
# Build FastAPI app
|
|
68
73
|
self.app = self._create_app()
|
|
69
74
|
|
|
@@ -110,13 +115,27 @@ class KernelServer:
|
|
|
110
115
|
|
|
111
116
|
await ws.accept()
|
|
112
117
|
|
|
118
|
+
# Cancel debounce timer if module is reconnecting within 5s window
|
|
119
|
+
old_debounce = server._debounce_tasks.pop(module_id, None)
|
|
120
|
+
if old_debounce:
|
|
121
|
+
old_debounce.cancel()
|
|
122
|
+
print(f"[kernel] {module_id} reconnected within debounce window")
|
|
123
|
+
|
|
113
124
|
# Register connection in both EventHub and shared connections table
|
|
114
125
|
server.event_hub.add_connection(module_id, ws)
|
|
115
126
|
server.connections[module_id] = ws
|
|
116
127
|
|
|
128
|
+
# Set connected status in registry (if module exists)
|
|
129
|
+
server.registry.set_connected(module_id)
|
|
130
|
+
|
|
117
131
|
# Track Launcher connection
|
|
118
132
|
if module_id == "launcher":
|
|
119
133
|
server._launcher_connected = True
|
|
134
|
+
# Cancel launcher loss timer if reconnecting
|
|
135
|
+
if server._launcher_loss_task:
|
|
136
|
+
server._launcher_loss_task.cancel()
|
|
137
|
+
server._launcher_loss_task = None
|
|
138
|
+
print(f"[kernel] launcher reconnected, cancelled loss timer")
|
|
120
139
|
print(f"[kernel] launcher connected")
|
|
121
140
|
|
|
122
141
|
# Renew heartbeat on connect
|
|
@@ -163,12 +182,19 @@ class KernelServer:
|
|
|
163
182
|
if "not connected" not in err and "closed" not in err:
|
|
164
183
|
print(f"[kernel] WebSocket error for {module_id}: {e}")
|
|
165
184
|
finally:
|
|
166
|
-
# Cleanup
|
|
185
|
+
# Cleanup connection but DON'T immediately set offline — debounce
|
|
167
186
|
server.event_hub.remove_connection(module_id)
|
|
168
187
|
server.connections.pop(module_id, None)
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
188
|
+
|
|
189
|
+
# Cancel existing debounce for this module (if reconnecting fast)
|
|
190
|
+
old_task = server._debounce_tasks.pop(module_id, None)
|
|
191
|
+
if old_task:
|
|
192
|
+
old_task.cancel()
|
|
193
|
+
|
|
194
|
+
# Start 5s debounce timer
|
|
195
|
+
server._debounce_tasks[module_id] = asyncio.create_task(
|
|
196
|
+
server._debounce_offline(module_id)
|
|
197
|
+
)
|
|
172
198
|
|
|
173
199
|
# ── HTTP endpoints (debug only) ──
|
|
174
200
|
|
|
@@ -180,7 +206,7 @@ class KernelServer:
|
|
|
180
206
|
"module_count": len(server.registry.modules),
|
|
181
207
|
"online_count": sum(
|
|
182
208
|
1 for m in server.registry.modules.values()
|
|
183
|
-
if m.get("status")
|
|
209
|
+
if m.get("status") in ("registered", "ready")
|
|
184
210
|
),
|
|
185
211
|
"event_stats": eh_health.get("details", {}),
|
|
186
212
|
}
|
|
@@ -227,6 +253,50 @@ class KernelServer:
|
|
|
227
253
|
except Exception as e:
|
|
228
254
|
print(f"[kernel] Dedup cleanup error: {e}")
|
|
229
255
|
|
|
256
|
+
# ── Debounce & Launcher loss ──
|
|
257
|
+
|
|
258
|
+
async def _debounce_offline(self, module_id: str):
|
|
259
|
+
"""Wait 5s after WS disconnect. If module doesn't reconnect, mark offline."""
|
|
260
|
+
try:
|
|
261
|
+
await asyncio.sleep(5)
|
|
262
|
+
except asyncio.CancelledError:
|
|
263
|
+
return # Module reconnected within 5s — cancelled by ws_endpoint
|
|
264
|
+
|
|
265
|
+
# 5s elapsed, module did not reconnect — mark offline
|
|
266
|
+
self._debounce_tasks.pop(module_id, None)
|
|
267
|
+
self.registry.set_offline(module_id)
|
|
268
|
+
self.event_hub.publish_internal("module.offline", {"module_id": module_id})
|
|
269
|
+
print(f"[kernel] {module_id} offline (5s debounce expired)")
|
|
270
|
+
|
|
271
|
+
# If launcher went offline, start 35s launcher loss timer
|
|
272
|
+
if module_id == "launcher":
|
|
273
|
+
self._launcher_connected = False
|
|
274
|
+
if not self._launcher_loss_task:
|
|
275
|
+
self._launcher_loss_task = asyncio.create_task(
|
|
276
|
+
self._launcher_loss_timeout()
|
|
277
|
+
)
|
|
278
|
+
|
|
279
|
+
async def _launcher_loss_timeout(self):
|
|
280
|
+
"""35s after launcher goes offline (post-debounce). Trigger graceful shutdown."""
|
|
281
|
+
try:
|
|
282
|
+
await asyncio.sleep(30) # 5s debounce already elapsed, total = 35s
|
|
283
|
+
except asyncio.CancelledError:
|
|
284
|
+
return # Launcher reconnected
|
|
285
|
+
|
|
286
|
+
print("[kernel] Launcher lost for 35s, triggering graceful shutdown")
|
|
287
|
+
self._launcher_loss_task = None
|
|
288
|
+
|
|
289
|
+
# Publish module.shutdown with reason launcher_lost to all modules
|
|
290
|
+
self.event_hub.publish_internal("module.shutdown", {
|
|
291
|
+
"reason": "launcher_lost",
|
|
292
|
+
})
|
|
293
|
+
|
|
294
|
+
# Wait for modules to clean up (up to 10s)
|
|
295
|
+
await asyncio.sleep(10)
|
|
296
|
+
|
|
297
|
+
# Shutdown Kernel itself
|
|
298
|
+
await self.shutdown()
|
|
299
|
+
|
|
230
300
|
# ── Self-registration ──
|
|
231
301
|
|
|
232
302
|
def self_register(self):
|
package/launcher/count_lines.py
CHANGED
|
@@ -185,6 +185,40 @@ def show_history(record_file: Path, limit: int = 10):
|
|
|
185
185
|
|
|
186
186
|
print("=" * 80 + "\n")
|
|
187
187
|
|
|
188
|
+
# 计算每日新增
|
|
189
|
+
daily_stats = {}
|
|
190
|
+
for record in records:
|
|
191
|
+
date = record["timestamp"][:10] # YYYY-MM-DD
|
|
192
|
+
total = record["stats"]["total"]
|
|
193
|
+
if date not in daily_stats:
|
|
194
|
+
daily_stats[date] = {"first": total, "last": total}
|
|
195
|
+
else:
|
|
196
|
+
daily_stats[date]["last"] = total
|
|
197
|
+
|
|
198
|
+
# 计算每日增量
|
|
199
|
+
daily_changes = []
|
|
200
|
+
for date in sorted(daily_stats.keys()):
|
|
201
|
+
day_data = daily_stats[date]
|
|
202
|
+
daily_change = day_data["last"] - day_data["first"]
|
|
203
|
+
if daily_change != 0: # 只显示有变化的日期
|
|
204
|
+
daily_changes.append((date, daily_change))
|
|
205
|
+
|
|
206
|
+
if daily_changes:
|
|
207
|
+
print("=" * 80)
|
|
208
|
+
print("每日新增代码行数")
|
|
209
|
+
print("=" * 80)
|
|
210
|
+
print(f"{'日期':<15} {'新增行数':>15}")
|
|
211
|
+
print("-" * 80)
|
|
212
|
+
for date, change in daily_changes[-10:]: # 最近 10 天
|
|
213
|
+
if change > 0:
|
|
214
|
+
change_str = f"{GREEN}+{change:,}{RESET}"
|
|
215
|
+
elif change < 0:
|
|
216
|
+
change_str = f"{RED}{change:,}{RESET}"
|
|
217
|
+
else:
|
|
218
|
+
change_str = "0"
|
|
219
|
+
print(f"{date:<15} {change_str:>15}")
|
|
220
|
+
print("=" * 80 + "\n")
|
|
221
|
+
|
|
188
222
|
|
|
189
223
|
def run_stats():
|
|
190
224
|
"""Run code stats from main.py entry point (simplified output)."""
|
package/launcher/entry.py
CHANGED
|
@@ -303,6 +303,21 @@ class Launcher:
|
|
|
303
303
|
ch = msvcrt.getch()
|
|
304
304
|
if ch == b'\x1b': # ESC - force exit immediately
|
|
305
305
|
print("[launcher] ESC 强制退出")
|
|
306
|
+
# Send module.exiting before exit (best effort)
|
|
307
|
+
try:
|
|
308
|
+
if self._ws and self._loop:
|
|
309
|
+
import concurrent.futures
|
|
310
|
+
fut = asyncio.run_coroutine_threadsafe(
|
|
311
|
+
self._publish_event("module.exiting", {
|
|
312
|
+
"module_id": "launcher",
|
|
313
|
+
"reason": "ESC exit",
|
|
314
|
+
"action": "none",
|
|
315
|
+
}),
|
|
316
|
+
self._loop,
|
|
317
|
+
)
|
|
318
|
+
fut.result(timeout=1) # Wait up to 1s
|
|
319
|
+
except Exception:
|
|
320
|
+
pass
|
|
306
321
|
os._exit(0)
|
|
307
322
|
elif ch in (b'q', b'Q'): # q/Q - graceful shutdown
|
|
308
323
|
self._request_shutdown("收到退出请求,正在关闭...")
|
|
@@ -316,7 +331,7 @@ class Launcher:
|
|
|
316
331
|
"""Full 2-phase startup sequence, then monitor loop."""
|
|
317
332
|
self._loop = asyncio.get_running_loop()
|
|
318
333
|
self._ws_connected = asyncio.Event() # Create event in async context
|
|
319
|
-
|
|
334
|
+
self._t_start = time.monotonic() # Store for launcher ready_time calculation
|
|
320
335
|
self._start_unix = time.time()
|
|
321
336
|
phase_times = {}
|
|
322
337
|
G = "\033[32m"
|
|
@@ -396,7 +411,7 @@ class Launcher:
|
|
|
396
411
|
)
|
|
397
412
|
|
|
398
413
|
# ── Startup report ──
|
|
399
|
-
total_time = time.monotonic() -
|
|
414
|
+
total_time = time.monotonic() - self._t_start
|
|
400
415
|
await self._print_startup_report(total_time, phase_times,
|
|
401
416
|
global_instances=global_instances,
|
|
402
417
|
cleaned_stats=cleaned_stats)
|
|
@@ -557,17 +572,36 @@ class Launcher:
|
|
|
557
572
|
# ── Kernel WebSocket connection (JSON-RPC 2.0) ──
|
|
558
573
|
|
|
559
574
|
async def _ws_loop(self):
|
|
560
|
-
"""Connect to Kernel, reconnect on failure."""
|
|
575
|
+
"""Connect to Kernel, reconnect on failure with exponential backoff."""
|
|
576
|
+
retry_delay = 0.3
|
|
577
|
+
max_delay = 5.0
|
|
578
|
+
max_retries = 10
|
|
579
|
+
attempt = 0
|
|
561
580
|
while not self._thread_shutdown.is_set():
|
|
562
581
|
try:
|
|
563
582
|
await self._ws_connect()
|
|
583
|
+
retry_delay = 0.3 # Reset on successful connection
|
|
584
|
+
attempt = 0
|
|
564
585
|
except asyncio.CancelledError:
|
|
565
586
|
return
|
|
566
587
|
except Exception as e:
|
|
567
588
|
if not self._system_shutting_down:
|
|
568
|
-
|
|
589
|
+
attempt += 1
|
|
590
|
+
# Check for auth failure (don't retry)
|
|
591
|
+
if hasattr(e, 'rcvd') and e.rcvd is not None:
|
|
592
|
+
code = e.rcvd.code if hasattr(e.rcvd, 'code') else 0
|
|
593
|
+
if code in (4001, 4003):
|
|
594
|
+
print(f"[launcher] Kernel 认证失败 (code {code}),退出")
|
|
595
|
+
sys.exit(1)
|
|
596
|
+
if attempt >= max_retries:
|
|
597
|
+
print(f"[launcher] Kernel 重连失败 {max_retries} 次,退出")
|
|
598
|
+
sys.exit(1)
|
|
599
|
+
print(f"[launcher] Kernel 连接错误: {e}, {retry_delay:.1f}s 后重试 ({attempt}/{max_retries})")
|
|
569
600
|
self._ws = None
|
|
570
|
-
|
|
601
|
+
if self._thread_shutdown.is_set():
|
|
602
|
+
return
|
|
603
|
+
await asyncio.sleep(retry_delay)
|
|
604
|
+
retry_delay = min(retry_delay * 2, max_delay)
|
|
571
605
|
|
|
572
606
|
async def _ws_connect(self):
|
|
573
607
|
"""Single WebSocket session with JSON-RPC 2.0 protocol."""
|
|
@@ -607,6 +641,12 @@ class Launcher:
|
|
|
607
641
|
})
|
|
608
642
|
print("[launcher] 已注册到 Kernel")
|
|
609
643
|
|
|
644
|
+
# Publish module.ready for Launcher itself (every reconnect)
|
|
645
|
+
await self._publish_event("module.ready", {
|
|
646
|
+
"module_id": "launcher",
|
|
647
|
+
"graceful_shutdown": True,
|
|
648
|
+
})
|
|
649
|
+
|
|
610
650
|
# Signal that connection is ready (after subscription and registration)
|
|
611
651
|
if self._ws_connected:
|
|
612
652
|
self._ws_connected.set()
|
|
@@ -899,9 +939,15 @@ class Launcher:
|
|
|
899
939
|
async def _wait_event(self, event_type: str, module_id: str, timeout: float) -> dict | None:
|
|
900
940
|
"""Wait for a specific event from a module. Returns data dict or None on timeout."""
|
|
901
941
|
key = f"{event_type}:{module_id}"
|
|
902
|
-
|
|
903
|
-
|
|
904
|
-
self._event_waiters
|
|
942
|
+
# Reuse existing waiter if one was pre-registered (e.g. in _ws_connect)
|
|
943
|
+
# This prevents a race where the event arrives before this method is called
|
|
944
|
+
existing = self._event_waiters.get(key)
|
|
945
|
+
if existing:
|
|
946
|
+
evt, data = existing
|
|
947
|
+
else:
|
|
948
|
+
evt = asyncio.Event()
|
|
949
|
+
data = {}
|
|
950
|
+
self._event_waiters[key] = (evt, data)
|
|
905
951
|
try:
|
|
906
952
|
await asyncio.wait_for(evt.wait(), timeout=timeout)
|
|
907
953
|
return data
|
|
@@ -981,12 +1027,21 @@ class Launcher:
|
|
|
981
1027
|
|
|
982
1028
|
async def _graceful_shutdown_all(self):
|
|
983
1029
|
"""Shut down all modules. Order:
|
|
984
|
-
1. Send
|
|
985
|
-
2.
|
|
986
|
-
3.
|
|
987
|
-
4.
|
|
1030
|
+
1. Send module.exiting for Launcher itself (so Watchdog knows it's intentional)
|
|
1031
|
+
2. Send shutdown to graceful modules (excl. Kernel) — let them start cleanup
|
|
1032
|
+
3. Terminate non-graceful modules (fast, runs during graceful cleanup)
|
|
1033
|
+
4. Wait for graceful modules to exit (process monitoring)
|
|
1034
|
+
5. Shut down Kernel last (keeps event routing alive throughout)
|
|
988
1035
|
"""
|
|
989
1036
|
self._system_shutting_down = True
|
|
1037
|
+
|
|
1038
|
+
# Send module.exiting for Launcher before anything else
|
|
1039
|
+
await self._publish_event("module.exiting", {
|
|
1040
|
+
"module_id": "launcher",
|
|
1041
|
+
"reason": "system_shutdown",
|
|
1042
|
+
"action": "none",
|
|
1043
|
+
})
|
|
1044
|
+
|
|
990
1045
|
running = [n for n in self.modules if self.process_manager.is_running(n)]
|
|
991
1046
|
# Also check core modules
|
|
992
1047
|
for cn in CORE_MODULE_NAMES:
|
|
@@ -1457,6 +1512,22 @@ class Launcher:
|
|
|
1457
1512
|
running = []
|
|
1458
1513
|
exited = []
|
|
1459
1514
|
stopped = []
|
|
1515
|
+
|
|
1516
|
+
# Add Launcher itself to running list
|
|
1517
|
+
from types import SimpleNamespace
|
|
1518
|
+
launcher_info = SimpleNamespace(
|
|
1519
|
+
display_name="Launcher",
|
|
1520
|
+
type="infrastructure",
|
|
1521
|
+
)
|
|
1522
|
+
launcher_rec = SimpleNamespace(
|
|
1523
|
+
pid=os.getpid(),
|
|
1524
|
+
started_at=self._start_unix,
|
|
1525
|
+
)
|
|
1526
|
+
running.append(("launcher", launcher_info, launcher_rec))
|
|
1527
|
+
# Launcher is ready immediately (ready_time = 0)
|
|
1528
|
+
if "launcher" not in self._ready_times:
|
|
1529
|
+
self._ready_times["launcher"] = 0.0
|
|
1530
|
+
|
|
1460
1531
|
for name, info in self.modules.items():
|
|
1461
1532
|
rec = self.process_manager.get_record(name)
|
|
1462
1533
|
is_running = self.process_manager.is_running(name)
|
|
@@ -1527,9 +1598,16 @@ class Launcher:
|
|
|
1527
1598
|
label = info.display_name or name
|
|
1528
1599
|
ready_t = self._ready_times.get(name)
|
|
1529
1600
|
time_str = f"{ready_t:.2f}s" if ready_t is not None else "—"
|
|
1601
|
+
|
|
1602
|
+
# Calculate elapsed from start
|
|
1530
1603
|
if ready_t is not None and hasattr(self, '_start_unix'):
|
|
1531
|
-
|
|
1532
|
-
|
|
1604
|
+
if name == "launcher":
|
|
1605
|
+
# Launcher: ready_t is already relative to _start_unix
|
|
1606
|
+
es_str = f"{ready_t:.2f}s"
|
|
1607
|
+
else:
|
|
1608
|
+
# Other modules: rec.started_at is unix timestamp
|
|
1609
|
+
elapsed_from_start = (rec.started_at + ready_t) - self._start_unix
|
|
1610
|
+
es_str = f"{elapsed_from_start:.2f}s"
|
|
1533
1611
|
else:
|
|
1534
1612
|
es_str = "—"
|
|
1535
1613
|
|
|
@@ -327,8 +327,19 @@ class ProcessManager:
|
|
|
327
327
|
return 0
|
|
328
328
|
|
|
329
329
|
# Dead launcher (or old format) — clean up its child processes
|
|
330
|
+
# Sort: watchdog first, kernel last, others in middle (prevents cascading issues)
|
|
331
|
+
def _cleanup_sort_key(entry):
|
|
332
|
+
name = entry.get("name", "")
|
|
333
|
+
if name == "watchdog":
|
|
334
|
+
return (0, name)
|
|
335
|
+
if name == "kernel":
|
|
336
|
+
return (2, name)
|
|
337
|
+
return (1, name)
|
|
338
|
+
|
|
339
|
+
records_sorted = sorted(records, key=_cleanup_sort_key)
|
|
340
|
+
|
|
330
341
|
killed = 0
|
|
331
|
-
for entry in
|
|
342
|
+
for entry in records_sorted:
|
|
332
343
|
pid = entry.get("pid", 0)
|
|
333
344
|
cmd = entry.get("cmd", [])
|
|
334
345
|
name = entry.get("name", "?")
|