openclaw-agent-dashboard 1.0.39 → 1.0.40

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/dashboard/api/agent_config_api.py +28 -7
  2. package/dashboard/api/agents.py +48 -10
  3. package/dashboard/api/agents_config.py +5 -1
  4. package/dashboard/api/chains.py +25 -5
  5. package/dashboard/api/collaboration.py +10 -9
  6. package/dashboard/api/debug_paths.py +5 -1
  7. package/dashboard/api/error_analysis.py +29 -11
  8. package/dashboard/api/errors.py +27 -11
  9. package/dashboard/api/fortify_routes.py +80 -0
  10. package/dashboard/api/input_safety.py +60 -0
  11. package/dashboard/api/performance.py +73 -53
  12. package/dashboard/api/subagents.py +95 -99
  13. package/dashboard/api/timeline.py +24 -3
  14. package/dashboard/api/version.py +2 -0
  15. package/dashboard/api/websocket.py +9 -7
  16. package/dashboard/core/__init__.py +1 -0
  17. package/dashboard/core/config_fortify.py +112 -0
  18. package/dashboard/core/error_handler.py +339 -0
  19. package/dashboard/core/fallback_manager.py +70 -0
  20. package/dashboard/core/safe_api_error.py +76 -0
  21. package/dashboard/core/schemas/__init__.py +16 -0
  22. package/dashboard/core/schemas/base.py +43 -0
  23. package/dashboard/core/schemas/session_schema.py +40 -0
  24. package/dashboard/core/schemas/subagent_schema.py +23 -0
  25. package/dashboard/data/agent_config_manager.py +6 -4
  26. package/dashboard/data/chain_reader.py +16 -12
  27. package/dashboard/data/error_analyzer.py +15 -11
  28. package/dashboard/data/session_reader.py +268 -46
  29. package/dashboard/data/subagent_reader.py +74 -49
  30. package/dashboard/data/timeline_reader.py +35 -49
  31. package/dashboard/main.py +24 -2
  32. package/dashboard/mechanism_reader.py +4 -5
  33. package/dashboard/mechanisms.py +2 -2
  34. package/dashboard/pytest.ini +3 -0
  35. package/dashboard/requirements.txt +5 -0
  36. package/dashboard/status/cache_fp_probe.py +40 -0
  37. package/dashboard/status/status_cache.py +199 -72
  38. package/dashboard/status/status_calculator.py +50 -30
  39. package/dashboard/tests/conftest.py +84 -0
  40. package/dashboard/tests/test_api_contracts.py +372 -0
  41. package/dashboard/tests/test_bench_fortify.py +176 -0
  42. package/dashboard/tests/test_fortify.py +741 -0
  43. package/dashboard/utils/__init__.py +1 -0
  44. package/dashboard/utils/data_repair.py +210 -0
  45. package/dashboard/watchers/file_watcher.py +367 -77
  46. package/openclaw.plugin.json +1 -1
  47. package/package.json +1 -1
  48. package/dashboard/agents.py +0 -74
  49. package/dashboard/collaboration.py +0 -407
  50. package/dashboard/errors.py +0 -63
  51. package/dashboard/performance.py +0 -474
  52. package/dashboard/session_reader.py +0 -240
  53. package/dashboard/status_calculator.py +0 -121
  54. package/dashboard/subagent_reader.py +0 -232
@@ -2,113 +2,240 @@
2
2
  状态缓存 - 缓存 Agent 状态计算结果
3
3
  通过缓存减少重复的文件读取操作,提升状态计算性能
4
4
  """
5
+ from __future__ import annotations
6
+
7
+ import sys
5
8
  import threading
6
9
  import time
7
- from typing import Dict, Any, Optional
10
+ from pathlib import Path
11
+ from typing import Any, Dict, Optional
8
12
 
13
+ try:
14
+ import psutil
15
+ except ImportError:
16
+ psutil = None # type: ignore
9
17
 
10
- class StatusCache:
11
- """Agent 状态缓存(线程安全)
12
-
13
- 功能:
14
- - 缓存 Agent 状态计算结果
15
- - TTL 机制自动过期
16
- - 文件变更时主动失效缓存
17
- - 线程安全(使用锁保护)
18
- - 最大条目数限制,超出时清理最旧条目
18
+
19
+ def _estimate_payload_size(data: Dict[str, Any]) -> int:
20
+ n = 256
21
+ for k, v in data.items():
22
+ if str(k).startswith("_"):
23
+ continue
24
+ n += sys.getsizeof(k) + sys.getsizeof(v)
25
+ return n
26
+
27
+
28
+ def source_mtimes_for_agent_cache(agent_id: str) -> Dict[str, Optional[float]]:
19
29
  """
20
-
21
- def __init__(self, ttl_ms: int = 1000, max_size: int = 100):
22
- """
23
- 初始化缓存
24
-
25
- Args:
26
- ttl_ms: 缓存过期时间(毫秒),默认 1 秒
27
- max_size: 最大条目数,超出时清理最旧条目,默认 100
28
- """
30
+ 用于缓存「双验证」:与状态计算强相关的源文件 mtime。
31
+ TTL 通过后若任一 mtime 变化则视为 miss(REQ_002-SPEC-06)。
32
+ """
33
+ from data.config_reader import get_openclaw_root, normalize_openclaw_agent_id
34
+
35
+ aid = normalize_openclaw_agent_id(agent_id)
36
+ root = get_openclaw_root()
37
+ out: Dict[str, Optional[float]] = {}
38
+ paths: Dict[str, Path] = {
39
+ "sessions_index": root / "agents" / aid / "sessions" / "sessions.json",
40
+ "subagent_runs": root / "subagents" / "runs.json",
41
+ }
42
+ for key, p in paths.items():
43
+ try:
44
+ out[key] = p.stat().st_mtime if p.is_file() else None
45
+ except OSError:
46
+ out[key] = None
47
+ return out
48
+
49
+
50
+ class StatusCache:
51
+ """Agent 状态缓存(线程安全)"""
52
+
53
+ def __init__(self, ttl_ms: int = 1000, max_size: int = 100, max_memory_mb: int = 100):
29
54
  self.ttl_ms = ttl_ms
30
55
  self.max_size = max_size
56
+ self.max_memory_bytes = max_memory_mb * 1024 * 1024
31
57
  self._cache: Dict[str, Dict[str, Any]] = {}
32
58
  self._lock = threading.Lock()
33
-
59
+ self._hits = 0
60
+ self._misses = 0
61
+ self._evictions = 0
62
+ self._fp_invalidations = 0
63
+ self._stale_fallback_reads = 0
64
+ self.preload_enabled = True
65
+
34
66
  def get(self, agent_id: str) -> Optional[Dict[str, Any]]:
35
- """
36
- 获取缓存的状态
37
-
38
- Args:
39
- agent_id: Agent ID
40
-
41
- Returns:
42
- 缓存的状态数据,未命中或已过期返回 None
43
- """
44
67
  with self._lock:
45
68
  entry = self._cache.get(agent_id)
46
69
  if not entry:
70
+ self._misses += 1
47
71
  return None
48
-
49
- # 检查是否过期
50
72
  now = time.time() * 1000
51
- if now - entry['_timestamp'] > self.ttl_ms:
52
- del self._cache[agent_id]
73
+ if now - entry["_timestamp"] > self.ttl_ms:
74
+ # TTL 逻辑 miss,但保留条目供 IO 降级时 get_stale_fallback(REQ_003-AC-003)
75
+ self._misses += 1
53
76
  return None
54
-
55
- # 返回状态数据(不包含元数据)
56
- return {k: v for k, v in entry.items() if not k.startswith('_')}
57
-
77
+ from core.config_fortify import get_fortify_config
78
+
79
+ if get_fortify_config().cache_double_check:
80
+ fp = entry.get("_fp")
81
+ if fp is not None:
82
+ current = source_mtimes_for_agent_cache(agent_id)
83
+ if current != fp:
84
+ del self._cache[agent_id]
85
+ self._misses += 1
86
+ self._fp_invalidations += 1
87
+ return None
88
+ self._hits += 1
89
+ entry["_last_access"] = now
90
+ return {k: v for k, v in entry.items() if not str(k).startswith("_")}
91
+
92
+ def get_stale_fallback(self, agent_id: str) -> Optional[Dict[str, Any]]:
93
+ """忽略 TTL 与 mtime 双验证,返回仍驻留在缓存中的最近一条数据(降级读)。"""
94
+ with self._lock:
95
+ entry = self._cache.get(agent_id)
96
+ if not entry:
97
+ return None
98
+ self._stale_fallback_reads += 1
99
+ now = time.time() * 1000
100
+ entry["_last_access"] = now
101
+ return {k: v for k, v in entry.items() if not str(k).startswith("_")}
102
+
58
103
  def set(self, agent_id: str, data: Dict[str, Any]) -> None:
59
- """
60
- 设置缓存
61
-
62
- Args:
63
- agent_id: Agent ID
64
- data: 状态数据
65
- """
66
104
  with self._lock:
67
- # 限制缓存大小:超出时删除最旧的条目
68
105
  if len(self._cache) >= self.max_size and agent_id not in self._cache:
69
- oldest_key = min(
70
- self._cache.keys(),
71
- key=lambda k: self._cache[k].get('_timestamp', 0)
72
- )
73
- del self._cache[oldest_key]
74
-
106
+ self._evict_oldest(exclude=agent_id)
107
+
108
+ now = time.time() * 1000
109
+ est = _estimate_payload_size(data)
110
+ from core.config_fortify import get_fortify_config
111
+
112
+ fp: Optional[Dict[str, Optional[float]]] = None
113
+ if get_fortify_config().cache_double_check:
114
+ fp = source_mtimes_for_agent_cache(agent_id)
75
115
  self._cache[agent_id] = {
76
116
  **data,
77
- '_timestamp': time.time() * 1000
117
+ "_timestamp": now,
118
+ "_last_access": now,
119
+ "_est_bytes": est,
120
+ **({"_fp": fp} if fp is not None else {}),
78
121
  }
79
-
122
+ self._enforce_memory(agent_id)
123
+
124
+ def _evict_oldest(self, exclude: Optional[str] = None) -> None:
125
+ keys = [k for k in self._cache if k != exclude]
126
+ if not keys:
127
+ return
128
+ oldest_key = min(
129
+ keys,
130
+ key=lambda k: self._cache[k].get("_last_access", self._cache[k].get("_timestamp", 0)),
131
+ )
132
+ del self._cache[oldest_key]
133
+ self._evictions += 1
134
+
135
+ def _total_estimated_bytes(self) -> int:
136
+ return int(sum(self._cache[k].get("_est_bytes", 0) for k in self._cache))
137
+
138
+ def _enforce_memory(self, protect_key: Optional[str] = None) -> None:
139
+ while self._total_estimated_bytes() > self.max_memory_bytes and len(self._cache) > 1:
140
+ self._evict_oldest(exclude=protect_key)
141
+ if protect_key and len(self._cache) == 1:
142
+ break
143
+
80
144
  def invalidate(self, agent_id: Optional[str] = None) -> None:
81
- """
82
- 失效缓存
83
-
84
- Args:
85
- agent_id: 指定 Agent ID,None 表示清空所有
86
- """
87
145
  with self._lock:
88
146
  if agent_id:
89
147
  self._cache.pop(agent_id, None)
90
148
  else:
91
149
  self._cache.clear()
92
-
93
- def get_stats(self) -> Dict[str, Any]:
150
+
151
+ def invalidate_stale_fp_entries(self) -> int:
94
152
  """
95
- 获取缓存统计信息
96
-
97
- Returns:
98
- {'size': int, 'ttl_ms': int, 'max_size': int}
153
+ 后台探针:对仍在 TTL 内的条目比对 mtime 指纹,不一致则剔除(RISK-004 / NFR-R-004)。
154
+ 与 get() 内双验证逻辑一致,适用于长时间无请求时的最终一致补强。
99
155
  """
156
+ from core.config_fortify import get_fortify_config
157
+
158
+ if not get_fortify_config().cache_double_check:
159
+ return 0
160
+ invalidated = 0
100
161
  with self._lock:
162
+ agent_ids = list(self._cache.keys())
163
+ now_ms = time.time() * 1000
164
+ for agent_id in agent_ids:
165
+ with self._lock:
166
+ entry = self._cache.get(agent_id)
167
+ if not entry:
168
+ continue
169
+ if now_ms - entry["_timestamp"] > self.ttl_ms:
170
+ continue
171
+ fp = entry.get("_fp")
172
+ if fp is None:
173
+ continue
174
+ current = source_mtimes_for_agent_cache(agent_id)
175
+ if current == fp:
176
+ continue
177
+ with self._lock:
178
+ entry2 = self._cache.get(agent_id)
179
+ if entry2 and entry2.get("_fp") == fp:
180
+ del self._cache[agent_id]
181
+ self._fp_invalidations += 1
182
+ invalidated += 1
183
+ return invalidated
184
+
185
+ def get_stats(self) -> Dict[str, Any]:
186
+ from core.config_fortify import get_fortify_config
187
+
188
+ cfg = get_fortify_config()
189
+ dbl = cfg.cache_double_check
190
+ with self._lock:
191
+ total = self._hits + self._misses
192
+ hit_rate = (self._hits / total) if total else 0.0
193
+ rss_mb = None
194
+ if psutil:
195
+ try:
196
+ rss_mb = round(psutil.Process().memory_info().rss / (1024 * 1024), 2)
197
+ except Exception:
198
+ pass
101
199
  return {
102
- 'size': len(self._cache),
103
- 'ttl_ms': self.ttl_ms,
104
- 'max_size': self.max_size
200
+ "size": len(self._cache),
201
+ "max_size": self.max_size,
202
+ "memory_usage_mb": round(self._total_estimated_bytes() / (1024 * 1024), 3),
203
+ "memory_estimate_mb": round(self._total_estimated_bytes() / (1024 * 1024), 3),
204
+ "max_memory_mb": round(self.max_memory_bytes / (1024 * 1024), 2),
205
+ "process_rss_mb": rss_mb,
206
+ "hit_rate": round(hit_rate, 4),
207
+ "ttl_seconds": self.ttl_ms / 1000.0,
208
+ "preload_enabled": self.preload_enabled,
209
+ "cache_double_check": dbl,
210
+ "fp_probe_interval_sec": cfg.cache_fp_probe_interval_sec,
211
+ "stats": {
212
+ "hits": self._hits,
213
+ "misses": self._misses,
214
+ "evictions": self._evictions,
215
+ "fp_invalidations": self._fp_invalidations,
216
+ "stale_fallback_reads": self._stale_fallback_reads,
217
+ },
105
218
  }
106
219
 
107
220
 
108
- # 全局单例
109
- _cache = StatusCache(ttl_ms=1000)
221
+ _cache_instance: Optional[StatusCache] = None
110
222
 
111
223
 
112
224
  def get_cache() -> StatusCache:
113
- """获取全局缓存实例"""
114
- return _cache
225
+ global _cache_instance
226
+ if _cache_instance is None:
227
+ from core.config_fortify import get_fortify_config
228
+
229
+ c = get_fortify_config()
230
+ _cache_instance = StatusCache(
231
+ ttl_ms=c.cache_ttl_seconds * 1000,
232
+ max_size=c.cache_max_entries,
233
+ max_memory_mb=c.cache_max_memory_mb,
234
+ )
235
+ _cache_instance.preload_enabled = c.cache_preload
236
+ return _cache_instance
237
+
238
+
239
+ def reset_cache_for_tests() -> None:
240
+ global _cache_instance
241
+ _cache_instance = None
@@ -77,48 +77,68 @@ def calculate_agent_status(agent_id: str, use_cache: bool = True) -> AgentStatus
77
77
  cached = cache.get(agent_id)
78
78
  if cached and 'status' in cached:
79
79
  return cached['status']
80
-
81
- # 重新计算
82
- # 检查异常
83
- if has_recent_errors(agent_id, minutes=5):
84
- status = 'down'
85
- # 检查工作中:subagent run 未结束(与连线 activePath 同源)
86
- elif is_agent_working(agent_id):
87
- status = 'working'
88
- elif _main_agent_solo_processing(agent_id):
89
- status = 'working'
90
- else:
91
- # 默认空闲
92
- status = 'idle'
93
-
80
+
81
+ try:
82
+ # 重新计算
83
+ if has_recent_errors(agent_id, minutes=5):
84
+ status = 'down'
85
+ elif is_agent_working(agent_id):
86
+ status = 'working'
87
+ elif _main_agent_solo_processing(agent_id):
88
+ status = 'working'
89
+ else:
90
+ status = 'idle'
91
+ except OSError as e:
92
+ from core.error_handler import classify_exception, record_error
93
+ from core.fallback_manager import run_fallback
94
+
95
+ cat = classify_exception(e)
96
+ record_error(cat, str(e), f"status_calculator:calculate:{agent_id}", exc=e)
97
+ fb = run_fallback(cat, agent_id=agent_id)
98
+ if fb is not None:
99
+ return fb # type: ignore[return-value]
100
+ return 'idle'
101
+
94
102
  # 更新缓存(只缓存状态)
95
103
  if use_cache:
96
104
  cache = get_cache()
97
105
  cache.set(agent_id, {'status': status})
98
-
106
+
99
107
  return status
100
108
 
101
109
 
102
110
  def get_agents_with_status() -> list:
103
111
  """获取所有 Agent 及其状态"""
104
- agents = get_agents_list()
112
+ try:
113
+ agents = get_agents_list()
114
+ except OSError as e:
115
+ from core.error_handler import classify_exception, record_error
116
+
117
+ record_error(classify_exception(e), str(e), "get_agents_with_status:list", exc=e)
118
+ return []
119
+
105
120
  result = []
106
-
121
+
107
122
  for agent in agents:
108
123
  agent_id = agent.get('id')
109
- status = calculate_agent_status(agent_id)
110
-
111
- # 获取当前任务(仅工作中展示;空闲时不应残留已结束 run 的文案)
112
- current_task = get_current_task(agent_id)
113
- if status == 'idle':
124
+ try:
125
+ status = calculate_agent_status(agent_id)
126
+ current_task = get_current_task(agent_id)
127
+ if status == 'idle':
128
+ current_task = ''
129
+ last_active = get_last_active_time(agent_id)
130
+ last_error = get_last_error(agent_id) if status == 'down' else None
131
+ except OSError as e:
132
+ from core.error_handler import classify_exception, record_error
133
+ from core.fallback_manager import run_fallback
134
+
135
+ cat = classify_exception(e)
136
+ record_error(cat, str(e), f"get_agents_with_status:{agent_id}", exc=e)
137
+ status = run_fallback(cat, agent_id=agent_id) or 'idle'
114
138
  current_task = ''
115
-
116
- # 获取最后活跃时间
117
- last_active = get_last_active_time(agent_id)
118
-
119
- # 获取错误信息
120
- last_error = get_last_error(agent_id) if status == 'down' else None
121
-
139
+ last_active = 0
140
+ last_error = None
141
+
122
142
  result.append({
123
143
  'id': agent_id,
124
144
  'name': agent.get('name'),
@@ -128,7 +148,7 @@ def get_agents_with_status() -> list:
128
148
  'lastActiveAt': last_active,
129
149
  'error': last_error
130
150
  })
131
-
151
+
132
152
  return result
133
153
 
134
154
 
@@ -0,0 +1,84 @@
1
+ """Shared pytest fixtures for backend tests."""
2
+ from __future__ import annotations
3
+
4
+ import json
5
+ import sys
6
+ from pathlib import Path
7
+
8
+ import pytest
9
+
10
+ BACKEND = Path(__file__).resolve().parent.parent
11
+ sys.path.insert(0, str(BACKEND))
12
+
13
+
14
+ @pytest.fixture(autouse=True)
15
+ def reset_fortify_state():
16
+ """Reset all fortify singletons between tests."""
17
+ from core.config_fortify import refresh_fortify_config_cache
18
+ from core.fallback_manager import reset_fallback_handlers_for_tests
19
+ from status.status_cache import reset_cache_for_tests
20
+
21
+ reset_cache_for_tests()
22
+ reset_fallback_handlers_for_tests()
23
+ refresh_fortify_config_cache()
24
+ yield
25
+ reset_cache_for_tests()
26
+ reset_fallback_handlers_for_tests()
27
+ refresh_fortify_config_cache()
28
+
29
+
30
+ @pytest.fixture
31
+ def fake_openclaw_root(tmp_path: Path):
32
+ """Minimal fake openclaw root with sessions.json index and JSONL fixtures."""
33
+ root = tmp_path / ".openclaw"
34
+ root.mkdir(parents=True, exist_ok=True)
35
+
36
+ agents_dir = root / "agents"
37
+ agents_dir.mkdir(exist_ok=True)
38
+
39
+ main_agent = agents_dir / "main"
40
+ main_agent.mkdir(exist_ok=True)
41
+
42
+ # sessions/ subdirectory (canonical path: agents/<id>/sessions/)
43
+ sessions_dir = main_agent / "sessions"
44
+ sessions_dir.mkdir(exist_ok=True)
45
+
46
+ # sessions.json index
47
+ sessions_index = {
48
+ "sessions": [
49
+ {
50
+ "id": "session-001",
51
+ "status": "active",
52
+ "updatedAt": 1746000000,
53
+ "turns": 3,
54
+ },
55
+ {
56
+ "id": "session-002",
57
+ "status": "completed",
58
+ "updatedAt": 1745900000,
59
+ "turns": 7,
60
+ },
61
+ ]
62
+ }
63
+ sessions_file = sessions_dir / "sessions.json"
64
+ sessions_file.write_text(json.dumps(sessions_index))
65
+
66
+ # JSONL session file (in sessions/ subdirectory)
67
+ session_jsonl = sessions_dir / "session-001.jsonl"
68
+ messages = [
69
+ {"type": "start", "sessionId": "session-001", "timestamp": 1746000000},
70
+ {"type": "message", "message": {"role": "user", "content": [{"type": "text", "text": "hello"}]}},
71
+ {"type": "message", "message": {"role": "assistant", "content": [{"type": "text", "text": "hi"}]}},
72
+ ]
73
+ session_jsonl.write_text("\n".join(json.dumps(m) for m in messages) + "\n")
74
+
75
+ # JSONL with repaired line (trailing comma) — CA-003 fixture
76
+ session_with_bad = sessions_dir / "session-002.jsonl"
77
+ bad_messages = [
78
+ json.dumps({"type": "start", "sessionId": "session-002"}),
79
+ '{"type": "message", "message": {"role": "user", "content": [{"type": "text", "text": "test"}]}}',
80
+ '{"type": "end", "sessionId": "session-002", "status": "ok"}',
81
+ ]
82
+ session_with_bad.write_text("\n".join(bad_messages) + "\n")
83
+
84
+ return root