@agentunion/kite 1.3.1 → 1.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/kernel/server.py CHANGED
@@ -64,6 +64,11 @@ class KernelServer:
64
64
  self._launcher_subscribed = False
65
65
  self._ready_published = False
66
66
 
67
+ # Debounce timers for disconnected modules (module_id -> asyncio.Task)
68
+ self._debounce_tasks: dict[str, asyncio.Task] = {}
69
+ # Launcher loss timer (35s after launcher offline)
70
+ self._launcher_loss_task: asyncio.Task | None = None
71
+
67
72
  # Build FastAPI app
68
73
  self.app = self._create_app()
69
74
 
@@ -110,13 +115,27 @@ class KernelServer:
110
115
 
111
116
  await ws.accept()
112
117
 
118
+ # Cancel debounce timer if module is reconnecting within 5s window
119
+ old_debounce = server._debounce_tasks.pop(module_id, None)
120
+ if old_debounce:
121
+ old_debounce.cancel()
122
+ print(f"[kernel] {module_id} reconnected within debounce window")
123
+
113
124
  # Register connection in both EventHub and shared connections table
114
125
  server.event_hub.add_connection(module_id, ws)
115
126
  server.connections[module_id] = ws
116
127
 
128
+ # Set connected status in registry (if module exists)
129
+ server.registry.set_connected(module_id)
130
+
117
131
  # Track Launcher connection
118
132
  if module_id == "launcher":
119
133
  server._launcher_connected = True
134
+ # Cancel launcher loss timer if reconnecting
135
+ if server._launcher_loss_task:
136
+ server._launcher_loss_task.cancel()
137
+ server._launcher_loss_task = None
138
+ print(f"[kernel] launcher reconnected, cancelled loss timer")
120
139
  print(f"[kernel] launcher connected")
121
140
 
122
141
  # Renew heartbeat on connect
@@ -163,12 +182,19 @@ class KernelServer:
163
182
  if "not connected" not in err and "closed" not in err:
164
183
  print(f"[kernel] WebSocket error for {module_id}: {e}")
165
184
  finally:
166
- # Cleanup
185
+ # Cleanup connection but DON'T immediately set offline — debounce
167
186
  server.event_hub.remove_connection(module_id)
168
187
  server.connections.pop(module_id, None)
169
- server.registry.set_offline(module_id)
170
- server.event_hub.publish_internal(
171
- "module.offline", {"module_id": module_id})
188
+
189
+ # Cancel existing debounce for this module (if reconnecting fast)
190
+ old_task = server._debounce_tasks.pop(module_id, None)
191
+ if old_task:
192
+ old_task.cancel()
193
+
194
+ # Start 5s debounce timer
195
+ server._debounce_tasks[module_id] = asyncio.create_task(
196
+ server._debounce_offline(module_id)
197
+ )
172
198
 
173
199
  # ── HTTP endpoints (debug only) ──
174
200
 
@@ -180,7 +206,7 @@ class KernelServer:
180
206
  "module_count": len(server.registry.modules),
181
207
  "online_count": sum(
182
208
  1 for m in server.registry.modules.values()
183
- if m.get("status") == "online"
209
+ if m.get("status") in ("registered", "ready")
184
210
  ),
185
211
  "event_stats": eh_health.get("details", {}),
186
212
  }
@@ -227,6 +253,50 @@ class KernelServer:
227
253
  except Exception as e:
228
254
  print(f"[kernel] Dedup cleanup error: {e}")
229
255
 
256
+ # ── Debounce & Launcher loss ──
257
+
258
+ async def _debounce_offline(self, module_id: str):
259
+ """Wait 5s after WS disconnect. If module doesn't reconnect, mark offline."""
260
+ try:
261
+ await asyncio.sleep(5)
262
+ except asyncio.CancelledError:
263
+ return # Module reconnected within 5s — cancelled by ws_endpoint
264
+
265
+ # 5s elapsed, module did not reconnect — mark offline
266
+ self._debounce_tasks.pop(module_id, None)
267
+ self.registry.set_offline(module_id)
268
+ self.event_hub.publish_internal("module.offline", {"module_id": module_id})
269
+ print(f"[kernel] {module_id} offline (5s debounce expired)")
270
+
271
+ # If launcher went offline, start 35s launcher loss timer
272
+ if module_id == "launcher":
273
+ self._launcher_connected = False
274
+ if not self._launcher_loss_task:
275
+ self._launcher_loss_task = asyncio.create_task(
276
+ self._launcher_loss_timeout()
277
+ )
278
+
279
+ async def _launcher_loss_timeout(self):
280
+ """35s after launcher goes offline (post-debounce). Trigger graceful shutdown."""
281
+ try:
282
+ await asyncio.sleep(30) # 5s debounce already elapsed, total = 35s
283
+ except asyncio.CancelledError:
284
+ return # Launcher reconnected
285
+
286
+ print("[kernel] Launcher lost for 35s, triggering graceful shutdown")
287
+ self._launcher_loss_task = None
288
+
289
+ # Publish module.shutdown with reason launcher_lost to all modules
290
+ self.event_hub.publish_internal("module.shutdown", {
291
+ "reason": "launcher_lost",
292
+ })
293
+
294
+ # Wait for modules to clean up (up to 10s)
295
+ await asyncio.sleep(10)
296
+
297
+ # Shutdown Kernel itself
298
+ await self.shutdown()
299
+
230
300
  # ── Self-registration ──
231
301
 
232
302
  def self_register(self):
@@ -185,6 +185,40 @@ def show_history(record_file: Path, limit: int = 10):
185
185
 
186
186
  print("=" * 80 + "\n")
187
187
 
188
+ # 计算每日新增
189
+ daily_stats = {}
190
+ for record in records:
191
+ date = record["timestamp"][:10] # YYYY-MM-DD
192
+ total = record["stats"]["total"]
193
+ if date not in daily_stats:
194
+ daily_stats[date] = {"first": total, "last": total}
195
+ else:
196
+ daily_stats[date]["last"] = total
197
+
198
+ # 计算每日增量
199
+ daily_changes = []
200
+ for date in sorted(daily_stats.keys()):
201
+ day_data = daily_stats[date]
202
+ daily_change = day_data["last"] - day_data["first"]
203
+ if daily_change != 0: # 只显示有变化的日期
204
+ daily_changes.append((date, daily_change))
205
+
206
+ if daily_changes:
207
+ print("=" * 80)
208
+ print("每日新增代码行数")
209
+ print("=" * 80)
210
+ print(f"{'日期':<15} {'新增行数':>15}")
211
+ print("-" * 80)
212
+ for date, change in daily_changes[-10:]: # 最近 10 天
213
+ if change > 0:
214
+ change_str = f"{GREEN}+{change:,}{RESET}"
215
+ elif change < 0:
216
+ change_str = f"{RED}{change:,}{RESET}"
217
+ else:
218
+ change_str = "0"
219
+ print(f"{date:<15} {change_str:>15}")
220
+ print("=" * 80 + "\n")
221
+
188
222
 
189
223
  def run_stats():
190
224
  """Run code stats from main.py entry point (simplified output)."""
package/launcher/entry.py CHANGED
@@ -303,6 +303,21 @@ class Launcher:
303
303
  ch = msvcrt.getch()
304
304
  if ch == b'\x1b': # ESC - force exit immediately
305
305
  print("[launcher] ESC 强制退出")
306
+ # Send module.exiting before exit (best effort)
307
+ try:
308
+ if self._ws and self._loop:
309
+ import concurrent.futures
310
+ fut = asyncio.run_coroutine_threadsafe(
311
+ self._publish_event("module.exiting", {
312
+ "module_id": "launcher",
313
+ "reason": "ESC exit",
314
+ "action": "none",
315
+ }),
316
+ self._loop,
317
+ )
318
+ fut.result(timeout=1) # Wait up to 1s
319
+ except Exception:
320
+ pass
306
321
  os._exit(0)
307
322
  elif ch in (b'q', b'Q'): # q/Q - graceful shutdown
308
323
  self._request_shutdown("收到退出请求,正在关闭...")
@@ -316,7 +331,7 @@ class Launcher:
316
331
  """Full 2-phase startup sequence, then monitor loop."""
317
332
  self._loop = asyncio.get_running_loop()
318
333
  self._ws_connected = asyncio.Event() # Create event in async context
319
- t_start = time.monotonic()
334
+ self._t_start = time.monotonic() # Store for launcher ready_time calculation
320
335
  self._start_unix = time.time()
321
336
  phase_times = {}
322
337
  G = "\033[32m"
@@ -396,7 +411,7 @@ class Launcher:
396
411
  )
397
412
 
398
413
  # ── Startup report ──
399
- total_time = time.monotonic() - t_start
414
+ total_time = time.monotonic() - self._t_start
400
415
  await self._print_startup_report(total_time, phase_times,
401
416
  global_instances=global_instances,
402
417
  cleaned_stats=cleaned_stats)
@@ -557,17 +572,36 @@ class Launcher:
557
572
  # ── Kernel WebSocket connection (JSON-RPC 2.0) ──
558
573
 
559
574
  async def _ws_loop(self):
560
- """Connect to Kernel, reconnect on failure."""
575
+ """Connect to Kernel, reconnect on failure with exponential backoff."""
576
+ retry_delay = 0.3
577
+ max_delay = 5.0
578
+ max_retries = 10
579
+ attempt = 0
561
580
  while not self._thread_shutdown.is_set():
562
581
  try:
563
582
  await self._ws_connect()
583
+ retry_delay = 0.3 # Reset on successful connection
584
+ attempt = 0
564
585
  except asyncio.CancelledError:
565
586
  return
566
587
  except Exception as e:
567
588
  if not self._system_shutting_down:
568
- print(f"[launcher] Kernel 连接错误: {e}")
589
+ attempt += 1
590
+ # Check for auth failure (don't retry)
591
+ if hasattr(e, 'rcvd') and e.rcvd is not None:
592
+ code = e.rcvd.code if hasattr(e.rcvd, 'code') else 0
593
+ if code in (4001, 4003):
594
+ print(f"[launcher] Kernel 认证失败 (code {code}),退出")
595
+ sys.exit(1)
596
+ if attempt >= max_retries:
597
+ print(f"[launcher] Kernel 重连失败 {max_retries} 次,退出")
598
+ sys.exit(1)
599
+ print(f"[launcher] Kernel 连接错误: {e}, {retry_delay:.1f}s 后重试 ({attempt}/{max_retries})")
569
600
  self._ws = None
570
- await asyncio.sleep(5)
601
+ if self._thread_shutdown.is_set():
602
+ return
603
+ await asyncio.sleep(retry_delay)
604
+ retry_delay = min(retry_delay * 2, max_delay)
571
605
 
572
606
  async def _ws_connect(self):
573
607
  """Single WebSocket session with JSON-RPC 2.0 protocol."""
@@ -607,6 +641,12 @@ class Launcher:
607
641
  })
608
642
  print("[launcher] 已注册到 Kernel")
609
643
 
644
+ # Publish module.ready for Launcher itself (every reconnect)
645
+ await self._publish_event("module.ready", {
646
+ "module_id": "launcher",
647
+ "graceful_shutdown": True,
648
+ })
649
+
610
650
  # Signal that connection is ready (after subscription and registration)
611
651
  if self._ws_connected:
612
652
  self._ws_connected.set()
@@ -899,9 +939,15 @@ class Launcher:
899
939
  async def _wait_event(self, event_type: str, module_id: str, timeout: float) -> dict | None:
900
940
  """Wait for a specific event from a module. Returns data dict or None on timeout."""
901
941
  key = f"{event_type}:{module_id}"
902
- evt = asyncio.Event()
903
- data = {}
904
- self._event_waiters[key] = (evt, data)
942
+ # Reuse existing waiter if one was pre-registered (e.g. in _ws_connect)
943
+ # This prevents a race where the event arrives before this method is called
944
+ existing = self._event_waiters.get(key)
945
+ if existing:
946
+ evt, data = existing
947
+ else:
948
+ evt = asyncio.Event()
949
+ data = {}
950
+ self._event_waiters[key] = (evt, data)
905
951
  try:
906
952
  await asyncio.wait_for(evt.wait(), timeout=timeout)
907
953
  return data
@@ -981,12 +1027,21 @@ class Launcher:
981
1027
 
982
1028
  async def _graceful_shutdown_all(self):
983
1029
  """Shut down all modules. Order:
984
- 1. Send shutdown to graceful modules (excl. Kernel) let them start cleanup
985
- 2. Terminate non-graceful modules (fast, runs during graceful cleanup)
986
- 3. Wait for graceful modules to exit (process monitoring)
987
- 4. Shut down Kernel last (keeps event routing alive throughout)
1030
+ 1. Send module.exiting for Launcher itself (so Watchdog knows it's intentional)
1031
+ 2. Send shutdown to graceful modules (excl. Kernel) let them start cleanup
1032
+ 3. Terminate non-graceful modules (fast, runs during graceful cleanup)
1033
+ 4. Wait for graceful modules to exit (process monitoring)
1034
+ 5. Shut down Kernel last (keeps event routing alive throughout)
988
1035
  """
989
1036
  self._system_shutting_down = True
1037
+
1038
+ # Send module.exiting for Launcher before anything else
1039
+ await self._publish_event("module.exiting", {
1040
+ "module_id": "launcher",
1041
+ "reason": "system_shutdown",
1042
+ "action": "none",
1043
+ })
1044
+
990
1045
  running = [n for n in self.modules if self.process_manager.is_running(n)]
991
1046
  # Also check core modules
992
1047
  for cn in CORE_MODULE_NAMES:
@@ -1457,6 +1512,22 @@ class Launcher:
1457
1512
  running = []
1458
1513
  exited = []
1459
1514
  stopped = []
1515
+
1516
+ # Add Launcher itself to running list
1517
+ from types import SimpleNamespace
1518
+ launcher_info = SimpleNamespace(
1519
+ display_name="Launcher",
1520
+ type="infrastructure",
1521
+ )
1522
+ launcher_rec = SimpleNamespace(
1523
+ pid=os.getpid(),
1524
+ started_at=self._start_unix,
1525
+ )
1526
+ running.append(("launcher", launcher_info, launcher_rec))
1527
+ # Launcher is ready immediately (ready_time = 0)
1528
+ if "launcher" not in self._ready_times:
1529
+ self._ready_times["launcher"] = 0.0
1530
+
1460
1531
  for name, info in self.modules.items():
1461
1532
  rec = self.process_manager.get_record(name)
1462
1533
  is_running = self.process_manager.is_running(name)
@@ -1527,9 +1598,16 @@ class Launcher:
1527
1598
  label = info.display_name or name
1528
1599
  ready_t = self._ready_times.get(name)
1529
1600
  time_str = f"{ready_t:.2f}s" if ready_t is not None else "—"
1601
+
1602
+ # Calculate elapsed from start
1530
1603
  if ready_t is not None and hasattr(self, '_start_unix'):
1531
- elapsed_from_start = (rec.started_at + ready_t) - self._start_unix
1532
- es_str = f"{elapsed_from_start:.2f}s"
1604
+ if name == "launcher":
1605
+ # Launcher: ready_t is already relative to _start_unix
1606
+ es_str = f"{ready_t:.2f}s"
1607
+ else:
1608
+ # Other modules: rec.started_at is unix timestamp
1609
+ elapsed_from_start = (rec.started_at + ready_t) - self._start_unix
1610
+ es_str = f"{elapsed_from_start:.2f}s"
1533
1611
  else:
1534
1612
  es_str = "—"
1535
1613
 
@@ -327,8 +327,19 @@ class ProcessManager:
327
327
  return 0
328
328
 
329
329
  # Dead launcher (or old format) — clean up its child processes
330
+ # Sort: watchdog first, kernel last, others in middle (prevents cascading issues)
331
+ def _cleanup_sort_key(entry):
332
+ name = entry.get("name", "")
333
+ if name == "watchdog":
334
+ return (0, name)
335
+ if name == "kernel":
336
+ return (2, name)
337
+ return (1, name)
338
+
339
+ records_sorted = sorted(records, key=_cleanup_sort_key)
340
+
330
341
  killed = 0
331
- for entry in records:
342
+ for entry in records_sorted:
332
343
  pid = entry.get("pid", 0)
333
344
  cmd = entry.get("cmd", [])
334
345
  name = entry.get("name", "?")
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@agentunion/kite",
3
- "version": "1.3.1",
3
+ "version": "1.3.2",
4
4
  "description": "Kite framework launcher — start Kite from anywhere",
5
5
  "bin": {
6
6
  "kite": "./cli.js"