openclaw-agent-dashboard 1.0.44 → 1.0.45

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,9 +3,12 @@
3
3
  支持按分钟查看调用详情,便于分析调用瓶颈
4
4
  """
5
5
  from fastapi import APIRouter
6
- from typing import List, Dict, Any, Optional
6
+ from typing import List, Dict, Any, Optional, Tuple
7
+ import copy
7
8
  import json
8
9
  import re
10
+ import asyncio
11
+ import time
9
12
  from pathlib import Path
10
13
  from datetime import datetime, timedelta, timezone
11
14
  from zoneinfo import ZoneInfo
@@ -19,6 +22,31 @@ TZ_DISPLAY = ZoneInfo('Asia/Shanghai')
19
22
 
20
23
  router = APIRouter()
21
24
 
25
+ # 聚合统计多次并发请求(WS + 轮询 + 多标签)共用;TTL 短以保证大致实时
26
+ _perf_stats_cache: Dict[str, Tuple[float, Dict[str, Any]]] = {}
27
+ _PERF_STATS_CACHE_TTL_SEC = 12.0
28
+
29
+ # 柱体钻取:多次点击 / 并发标签共用短缓存
30
+ _perf_details_cache: Dict[str, Tuple[float, Dict[str, Any]]] = {}
31
+ _PERF_DETAILS_CACHE_TTL_SEC = 12.0
32
+
33
+ # 轻量解析 envelope ISO 时间,便于跳过明显早于查询窗口的行(避免 json.loads + schema)
34
+ _QUICK_ENV_TS_RE = re.compile(r'"timestamp"\s*:\s*"([^"]+)"')
35
+
36
+
37
+ def _quick_envelope_timestamp_utc(line: str) -> Optional[datetime]:
38
+ m = _QUICK_ENV_TS_RE.search(line)
39
+ if not m:
40
+ return None
41
+ try:
42
+ return datetime.fromisoformat(m.group(1).replace("Z", "+00:00"))
43
+ except ValueError:
44
+ return None
45
+
46
+
47
+ def _perf_cache_key(range_minutes: int, range_hours: int, granularity: str) -> str:
48
+ return f"{range_minutes}:{range_hours}:{granularity}"
49
+
22
50
 
23
51
  def _extract_trigger_text(msg: Dict) -> str:
24
52
  """从消息中提取触发内容(完整展示)"""
@@ -177,11 +205,25 @@ def parse_session_file(session_path: Path, range_hours: int = 1) -> List[Dict]:
177
205
  range_hours: 时间范围(小时),0 表示不限制
178
206
  """
179
207
  messages = []
208
+ now = datetime.now(timezone.utc)
209
+ time_ago = now - timedelta(hours=range_hours) if range_hours > 0 else None
210
+
211
+ # 启发式:窗口内若有 assistant usage,文件通常在窗口内有过写入;过久未修改则可跳过整文件
212
+ if time_ago is not None:
213
+ try:
214
+ if session_path.stat().st_mtime < time_ago.timestamp():
215
+ return []
216
+ except OSError:
217
+ return []
180
218
 
181
219
  try:
182
220
  with open(session_path, 'r', encoding='utf-8') as f:
183
221
  for line in f:
184
222
  try:
223
+ if time_ago is not None:
224
+ qt = _quick_envelope_timestamp_utc(line)
225
+ if qt is not None and qt < time_ago:
226
+ continue
185
227
  envelope, msg = parse_session_jsonl_line(line)
186
228
  if (
187
229
  not envelope
@@ -200,11 +242,8 @@ def parse_session_file(session_path: Path, range_hours: int = 1) -> List[Dict]:
200
242
  str(envelope['timestamp']).replace('Z', '+00:00')
201
243
  )
202
244
 
203
- if range_hours > 0:
204
- now = datetime.now(timezone.utc)
205
- time_ago = now - timedelta(hours=range_hours)
206
- if timestamp < time_ago:
207
- continue
245
+ if time_ago is not None and timestamp < time_ago:
246
+ continue
208
247
 
209
248
  messages.append({
210
249
  'timestamp': timestamp,
@@ -240,14 +279,8 @@ async def get_performance_stats(range: str = "20m"):
240
279
  return stats
241
280
 
242
281
 
243
- async def get_real_stats(range_minutes: int = 20, range_hours: int = 1, granularity: str = "minute") -> Dict:
244
- """获取真实的 TPM/RPM 统计
245
-
246
- Args:
247
- range_minutes: 时间范围(分钟)
248
- range_hours: 用于解析 session 的时间范围(小时)
249
- granularity: 聚合粒度 (minute, hour)
250
- """
282
+ def _compute_real_stats_sync(range_minutes: int = 20, range_hours: int = 1, granularity: str = "minute") -> Dict:
283
+ """同步聚合 TPM/RPM(在线程池中运行,避免阻塞事件循环)。"""
251
284
  stats = {
252
285
  'current': {
253
286
  'tpm': 0,
@@ -381,24 +414,38 @@ async def get_real_stats(range_minutes: int = 20, range_hours: int = 1, granular
381
414
  return stats
382
415
 
383
416
 
384
- async def get_minute_details(
417
+ async def get_real_stats(range_minutes: int = 20, range_hours: int = 1, granularity: str = "minute") -> Dict:
418
+ """获取真实的 TPM/RPM 统计(线程池计算 + 短时缓存,减轻重复扫盘)。"""
419
+ key = _perf_cache_key(range_minutes, range_hours, granularity)
420
+ now = time.monotonic()
421
+ hit = _perf_stats_cache.get(key)
422
+ if hit is not None and (now - hit[0]) < _PERF_STATS_CACHE_TTL_SEC:
423
+ return hit[1]
424
+ data = await asyncio.to_thread(_compute_real_stats_sync, range_minutes, range_hours, granularity)
425
+ _perf_stats_cache[key] = (now, data)
426
+ return data
427
+
428
+
429
+ def _perf_details_cache_key(
430
+ timestamp_ms: int,
431
+ granularity: str,
432
+ agent: str,
433
+ search: str,
434
+ sort: str,
435
+ limit: int,
436
+ ) -> str:
437
+ return f"{timestamp_ms}:{granularity}:{agent}:{search}:{sort}:{limit}"
438
+
439
+
440
+ def _compute_minute_details_sync(
385
441
  timestamp_ms: int,
386
442
  granularity: str = "minute",
387
443
  agent: Optional[str] = None,
388
444
  search: Optional[str] = None,
389
445
  sort: str = "tokens_desc",
390
- limit: int = 50
446
+ limit: int = 50,
391
447
  ) -> Dict[str, Any]:
392
- """获取指定时间窗口的调用详情,用于柱体点击钻取。时间展示使用 Asia/Shanghai 时区
393
-
394
- Args:
395
- timestamp_ms: Unix 毫秒时间戳
396
- granularity: 粒度 (minute, hour)
397
- agent: 筛选指定 Agent
398
- search: 搜索触发内容
399
- sort: 排序方式 (tokens_desc, tokens_asc, time_asc, time_desc)
400
- limit: 返回数量限制
401
- """
448
+ """同步聚合柱体钻取数据(线程池 + 短 TTL 缓存)。"""
402
449
  try:
403
450
  ts = datetime.fromtimestamp(timestamp_ms / 1000, tz=timezone.utc)
404
451
  ts_local = ts.astimezone(TZ_DISPLAY)
@@ -419,6 +466,7 @@ async def get_minute_details(
419
466
 
420
467
  all_calls = []
421
468
  agent_set = set()
469
+ window_start_ts = time_start.timestamp()
422
470
 
423
471
  for agent_dir in agents_path.iterdir():
424
472
  if not agent_dir.is_dir():
@@ -437,6 +485,12 @@ async def get_minute_details(
437
485
  for session_file in sessions_path.glob('*.jsonl'):
438
486
  if 'lock' in session_file.name or 'deleted' in session_file.name:
439
487
  continue
488
+ try:
489
+ # 与 parse_session_file 相同启发式:窗口开始后未修改的文件不可能含该窗内的 assistant 记录
490
+ if session_file.stat().st_mtime < window_start_ts:
491
+ continue
492
+ except OSError:
493
+ continue
440
494
  records = parse_session_file_with_details(session_file, agent_id)
441
495
  for r in records:
442
496
  if time_start <= r['timestamp'] < time_end:
@@ -503,6 +557,44 @@ async def get_minute_details(
503
557
  return {'timeWindow': '', 'calls': [], 'totalCalls': 0, 'totalTokens': 0, 'summary': {'avgTokens': 0}, 'agents': [], 'pagination': {'total': 0, 'limit': limit, 'hasMore': False}}
504
558
 
505
559
 
560
+ async def get_minute_details(
561
+ timestamp_ms: int,
562
+ granularity: str = "minute",
563
+ agent: Optional[str] = None,
564
+ search: Optional[str] = None,
565
+ sort: str = "tokens_desc",
566
+ limit: int = 50
567
+ ) -> Dict[str, Any]:
568
+ """获取指定时间窗口的调用详情,用于柱体点击钻取。时间展示使用 Asia/Shanghai 时区
569
+
570
+ Args:
571
+ timestamp_ms: Unix 毫秒时间戳
572
+ granularity: 粒度 (minute, hour)
573
+ agent: 筛选指定 Agent
574
+ search: 搜索触发内容
575
+ sort: 排序方式 (tokens_desc, tokens_asc, time_asc, time_desc)
576
+ limit: 返回数量限制
577
+ """
578
+ ag = agent or ""
579
+ sr = search or ""
580
+ key = _perf_details_cache_key(timestamp_ms, granularity, ag, sr, sort, limit)
581
+ now = time.monotonic()
582
+ hit = _perf_details_cache.get(key)
583
+ if hit is not None and (now - hit[0]) < _PERF_DETAILS_CACHE_TTL_SEC:
584
+ return copy.deepcopy(hit[1])
585
+ data = await asyncio.to_thread(
586
+ _compute_minute_details_sync,
587
+ timestamp_ms,
588
+ granularity,
589
+ agent,
590
+ search,
591
+ sort,
592
+ limit,
593
+ )
594
+ _perf_details_cache[key] = (now, copy.deepcopy(data))
595
+ return data
596
+
597
+
506
598
  @router.get("/performance/details")
507
599
  async def get_performance_details(
508
600
  timestamp: int,
@@ -1,11 +1,13 @@
1
1
  """
2
2
  Timeline API 路由 - 实时执行时序图
3
3
  """
4
+ import asyncio
5
+ import copy
4
6
  import logging
5
7
  import time
6
8
  from fastapi import APIRouter, Query, HTTPException
7
9
  from pydantic import BaseModel
8
- from typing import Optional, List, Dict, Any
10
+ from typing import Optional, List, Dict, Any, Tuple
9
11
  import sys
10
12
  from pathlib import Path
11
13
 
@@ -20,6 +22,14 @@ from data.config_reader import get_agent_config
20
22
 
21
23
  router = APIRouter()
22
24
 
25
+ # 切换 agent / 轮询重复命中时减轻重复读盘解析(短时 stale 可接受)
26
+ _timeline_cache: Dict[str, Tuple[float, Dict[str, Any]]] = {}
27
+ _TIMELINE_CACHE_TTL_SEC = 5.0
28
+
29
+
30
+ def _timeline_cache_key(agent_id: str, session_key: Optional[str], limit: int) -> str:
31
+ return f"{agent_id}\x00{session_key or ''}\x00{limit}"
32
+
23
33
 
24
34
  class TimelineStats(BaseModel):
25
35
  totalDuration: int
@@ -80,11 +90,18 @@ async def get_timeline(
80
90
  raise HTTPException(status_code=404, detail=f"Agent {agent_id} not found")
81
91
 
82
92
  t0 = time.perf_counter()
83
- try:
84
- result = get_timeline_steps(agent_id, session_key, limit)
85
- except Exception as e:
86
- record_error("unknown", str(e), "api:timeline:get", exc=e)
87
- raise HTTPException(status_code=500, detail=safe_api_error_detail(e)) from e
93
+ cache_key = _timeline_cache_key(agent_id, session_key, limit)
94
+ now_mono = time.monotonic()
95
+ hit = _timeline_cache.get(cache_key)
96
+ if hit is not None and (now_mono - hit[0]) < _TIMELINE_CACHE_TTL_SEC:
97
+ result = copy.deepcopy(hit[1])
98
+ else:
99
+ try:
100
+ result = await asyncio.to_thread(get_timeline_steps, agent_id, session_key, limit)
101
+ except Exception as e:
102
+ record_error("unknown", str(e), "api:timeline:get", exc=e)
103
+ raise HTTPException(status_code=500, detail=safe_api_error_detail(e)) from e
104
+ _timeline_cache[cache_key] = (now_mono, copy.deepcopy(result))
88
105
  elapsed_ms = (time.perf_counter() - t0) * 1000
89
106
  steps_count = len(result.get("steps", []))
90
107
  if elapsed_ms >= 100.0:
@@ -127,7 +144,7 @@ async def get_timeline_steps_only(
127
144
  raise HTTPException(status_code=404, detail=f"Agent {agent_id} not found")
128
145
 
129
146
  try:
130
- result = get_timeline_steps(agent_id, session_key, limit)
147
+ result = await asyncio.to_thread(get_timeline_steps, agent_id, session_key, limit)
131
148
  except Exception as e:
132
149
  record_error("unknown", str(e), "api:timeline:steps", exc=e)
133
150
  raise HTTPException(status_code=500, detail=safe_api_error_detail(e)) from e
@@ -153,7 +170,7 @@ async def get_timeline_summary(agent_id: str, session_key: Optional[str] = Query
153
170
  raise HTTPException(status_code=404, detail=f"Agent {agent_id} not found")
154
171
 
155
172
  try:
156
- result = get_timeline_steps(agent_id, session_key, limit=10) # 只需基本信息
173
+ result = await asyncio.to_thread(get_timeline_steps, agent_id, session_key, 10) # 只需基本信息
157
174
  except Exception as e:
158
175
  record_error("unknown", str(e), "api:timeline:summary", exc=e)
159
176
  raise HTTPException(status_code=500, detail=safe_api_error_detail(e)) from e
@@ -7,6 +7,7 @@ from typing import Set, List, Dict, Any
7
7
  import json
8
8
  import asyncio
9
9
  import sys
10
+ import time
10
11
  from pathlib import Path
11
12
 
12
13
  sys.path.append(str(Path(__file__).parent.parent))
@@ -18,30 +19,46 @@ router = APIRouter()
18
19
  # 活跃的 WebSocket 连接
19
20
  active_connections: Set[WebSocket] = set()
20
21
 
21
- # 周期性推送间隔(秒)- 优化:从 3 秒缩短到 1 秒
22
- BROADCAST_INTERVAL_SEC = 1
22
+ # 周期性增量检查基准间隔(秒);空闲时会自动退避拉长(见 _periodic_broadcast_loop)
23
+ BROADCAST_INTERVAL_SEC = 5
23
24
  _broadcast_task: asyncio.Task | None = None
25
+ _broadcast_sleep_sec: float = float(BROADCAST_INTERVAL_SEC)
26
+ _broadcast_idle_streak: int = 0
27
+
28
+ # 文件监听等高频触发下合并 full_state,降低前端解析与重绘压力
29
+ FULL_STATE_MIN_INTERVAL_SEC = 2.0
30
+ _last_full_state_monotonic: float = 0.0
24
31
 
25
32
 
26
33
  async def _periodic_broadcast_loop():
27
- """周期性广播状态更新(增量),确保无文件变更时也有更新"""
34
+ """周期性广播状态更新(增量);连续无变更则拉长睡眠间隔,上限 30s。"""
35
+ global _broadcast_sleep_sec, _broadcast_idle_streak
28
36
  while True:
29
- await asyncio.sleep(BROADCAST_INTERVAL_SEC)
37
+ await asyncio.sleep(_broadcast_sleep_sec)
30
38
  if active_connections:
31
- # 只推送状态变化的 Agent
32
39
  try:
33
40
  from status.status_calculator import get_changed_agents
34
41
  changed_agents = await get_changed_agents()
35
42
  if changed_agents:
43
+ _broadcast_idle_streak = 0
44
+ _broadcast_sleep_sec = float(BROADCAST_INTERVAL_SEC)
36
45
  await broadcast_state_update(changed_agents)
46
+ else:
47
+ _broadcast_idle_streak += 1
48
+ if _broadcast_idle_streak >= 3:
49
+ _broadcast_sleep_sec = min(_broadcast_sleep_sec * 2.0, 30.0)
50
+ except asyncio.CancelledError:
51
+ raise
37
52
  except Exception as e:
38
53
  record_error("unknown", str(e), "websocket:periodic_broadcast", exc=e)
39
54
 
40
55
 
41
56
  def _ensure_broadcast_task():
42
57
  """有连接时启动周期性推送"""
43
- global _broadcast_task
58
+ global _broadcast_task, _broadcast_sleep_sec, _broadcast_idle_streak
44
59
  if active_connections and (_broadcast_task is None or _broadcast_task.done()):
60
+ _broadcast_sleep_sec = float(BROADCAST_INTERVAL_SEC)
61
+ _broadcast_idle_streak = 0
45
62
  _broadcast_task = asyncio.create_task(_periodic_broadcast_loop())
46
63
 
47
64
 
@@ -210,9 +227,15 @@ async def broadcast_full_state():
210
227
  优化点:
211
228
  1. 使用 get_collaboration_dynamic() 代替 get_collaboration()
212
229
  2. 只推送动态数据,减少数据量
230
+ 3. 短时间重复调用节流,避免监听线程连震时频繁全量推送
213
231
  """
232
+ global _last_full_state_monotonic
214
233
  if not active_connections:
215
234
  return
235
+ now = time.monotonic()
236
+ if now - _last_full_state_monotonic < FULL_STATE_MIN_INTERVAL_SEC:
237
+ return
238
+ _last_full_state_monotonic = now
216
239
  try:
217
240
  from .agents import get_agents as get_agents_list
218
241
  from .subagents import get_subagents
@@ -25,19 +25,20 @@ class SchemaValidator:
25
25
  self.schema = schema
26
26
  self.strict = strict
27
27
  self._validator = Draft202012Validator(schema)
28
- self._last_errors: List[str] = []
29
28
 
30
29
  def validate(self, data: Any) -> ValidationResult:
31
- self._last_errors = []
30
+ """线程安全:校验结果仅通过返回值给出,实例上不保留最后一次错误(避免并发覆盖)。"""
31
+ errors: List[str] = []
32
32
  if not isinstance(data, (dict, list)) and self.schema.get("type") == "object":
33
- self._last_errors.append("expected object")
34
- return ValidationResult(False, list(self._last_errors))
33
+ errors.append("expected object")
34
+ return ValidationResult(False, errors)
35
35
  try:
36
36
  self._validator.validate(data)
37
37
  return ValidationResult(True, [])
38
38
  except jsonschema.ValidationError as e:
39
- self._last_errors.append(e.message)
40
- return ValidationResult(False, list(self._last_errors))
39
+ errors.append(e.message)
40
+ return ValidationResult(False, errors)
41
41
 
42
42
  def get_error_details(self) -> Dict[str, Any]:
43
- return {"errors": list(self._last_errors)}
43
+ """兼容旧接口;共享校验器实例时不代表「最后一次校验」。请使用 validate() 的返回值。"""
44
+ return {"errors": []}
@@ -17,6 +17,15 @@ from core.schemas.session_schema import session_envelope_schema, session_message
17
17
 
18
18
  _audit_log = logging.getLogger("openclaw.fortify.audit")
19
19
 
20
+ # 模块级复用:避免每条 JSONL 构造 Draft202012Validator(CPU 大户)。实例线程安全见 SchemaValidator.validate。
21
+ _MSG_SCHEMA_OPTIONAL = dict(session_message_schema)
22
+ _MSG_SCHEMA_OPTIONAL.pop("required", None)
23
+ _ENVELOPE_STRICT = SchemaValidator(session_envelope_schema, strict=True)
24
+ _ENVELOPE_LOOSE = SchemaValidator(session_envelope_schema, strict=False)
25
+ _MSG_STRICT_FULL = SchemaValidator(session_message_schema, strict=True)
26
+ _MSG_LOOSE_OPTIONAL = SchemaValidator(_MSG_SCHEMA_OPTIONAL, strict=False)
27
+ _RELAXED_MESSAGE = SchemaValidator(_MSG_SCHEMA_OPTIONAL, strict=False)
28
+
20
29
 
21
30
  def _ensure_audit_logging() -> None:
22
31
  if _audit_log.handlers:
@@ -123,7 +132,7 @@ def parse_session_jsonl_line(
123
132
  record_error("parsing-error", "json_decode session line", "session_jsonl")
124
133
  return None, None
125
134
 
126
- env_validator = SchemaValidator(session_envelope_schema, strict=strict)
135
+ env_validator = _ENVELOPE_STRICT if strict else _ENVELOPE_LOOSE
127
136
  env_res = env_validator.validate(data)
128
137
  if not env_res.is_valid:
129
138
  from core.error_handler import record_error
@@ -144,11 +153,7 @@ def parse_session_jsonl_line(
144
153
  else:
145
154
  return data, None
146
155
 
147
- msg_schema = dict(session_message_schema)
148
- if not strict:
149
- msg_schema = dict(msg_schema)
150
- msg_schema.pop("required", None)
151
- mv = SchemaValidator(msg_schema, strict=strict)
156
+ mv = _MSG_STRICT_FULL if strict else _MSG_LOOSE_OPTIONAL
152
157
  mv_res = mv.validate(msg)
153
158
  if mv_res.is_valid:
154
159
  return data, msg
@@ -157,10 +162,7 @@ def parse_session_jsonl_line(
157
162
  repaired_msg = dict(msg)
158
163
  if "role" not in repaired_msg:
159
164
  repaired_msg["role"] = "assistant"
160
- relaxed = dict(msg_schema)
161
- relaxed.pop("required", None)
162
- mv2 = SchemaValidator(relaxed, strict=False)
163
- if mv2.validate(repaired_msg).is_valid:
165
+ if _RELAXED_MESSAGE.validate(repaired_msg).is_valid:
164
166
  audit_repair("message_schema_repair", json.dumps(msg), json.dumps(repaired_msg))
165
167
  return data, repaired_msg
166
168
 
@@ -174,10 +176,7 @@ def parse_session_jsonl_line(
174
176
 
175
177
  def validate_message_dict(msg: Dict[str, Any]) -> Tuple[bool, List[str]]:
176
178
  cfg = get_fortify_config()
177
- msg_schema = dict(session_message_schema)
178
- if not cfg.json_strict:
179
- msg_schema.pop("required", None)
180
- mv = SchemaValidator(msg_schema, strict=cfg.json_strict)
179
+ mv = _MSG_STRICT_FULL if cfg.json_strict else _MSG_LOOSE_OPTIONAL
181
180
  r = mv.validate(msg)
182
181
  return r.is_valid, r.errors
183
182
 
@@ -17,7 +17,7 @@ _LOG = logging.getLogger("openclaw.fortify.watcher")
17
17
  from core.config_fortify import get_fortify_config
18
18
  from core.error_handler import record_error, record_watcher_failure, record_watcher_recovery
19
19
 
20
- DEBOUNCE_SECONDS = 0.3
20
+ DEBOUNCE_SECONDS = 1.5
21
21
 
22
22
 
23
23
  def _extract_agent_id_from_path(filepath: str) -> Optional[str]:
@@ -260,8 +260,8 @@ def _on_file_changed(filepath: Optional[str] = None) -> None:
260
260
 
261
261
  loop = _event_loop
262
262
  if loop and broadcast_full_state:
263
- future = asyncio.run_coroutine_threadsafe(broadcast_full_state(), loop)
264
- future.result(timeout=10)
263
+ # fire-and-forget:避免阻塞 watchdog 线程;节流由 websocket.broadcast_full_state 负责
264
+ asyncio.run_coroutine_threadsafe(broadcast_full_state(), loop)
265
265
  except Exception as e:
266
266
  _last_error = str(e)
267
267
  record_error("unknown", str(e), "file_watcher_push")