openclaw-agent-dashboard 1.0.39 → 1.0.41

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/dashboard/api/agent_config_api.py +28 -7
  2. package/dashboard/api/agents.py +48 -10
  3. package/dashboard/api/agents_config.py +5 -1
  4. package/dashboard/api/chains.py +25 -5
  5. package/dashboard/api/collaboration.py +10 -9
  6. package/dashboard/api/debug_paths.py +5 -1
  7. package/dashboard/api/error_analysis.py +29 -11
  8. package/dashboard/api/errors.py +37 -11
  9. package/dashboard/api/fortify_routes.py +108 -0
  10. package/dashboard/api/input_safety.py +60 -0
  11. package/dashboard/api/performance.py +73 -53
  12. package/dashboard/api/subagents.py +95 -99
  13. package/dashboard/api/timeline.py +24 -3
  14. package/dashboard/api/version.py +2 -0
  15. package/dashboard/api/websocket.py +9 -7
  16. package/dashboard/core/__init__.py +1 -0
  17. package/dashboard/core/config_fortify.py +125 -0
  18. package/dashboard/core/error_handler.py +488 -0
  19. package/dashboard/core/fallback_manager.py +81 -0
  20. package/dashboard/core/logging_config.py +217 -0
  21. package/dashboard/core/safe_api_error.py +76 -0
  22. package/dashboard/core/schemas/__init__.py +16 -0
  23. package/dashboard/core/schemas/base.py +43 -0
  24. package/dashboard/core/schemas/session_schema.py +40 -0
  25. package/dashboard/core/schemas/subagent_schema.py +23 -0
  26. package/dashboard/data/agent_config_manager.py +6 -4
  27. package/dashboard/data/chain_reader.py +16 -12
  28. package/dashboard/data/error_analyzer.py +15 -11
  29. package/dashboard/data/session_reader.py +268 -46
  30. package/dashboard/data/subagent_reader.py +74 -49
  31. package/dashboard/data/timeline_reader.py +35 -49
  32. package/dashboard/main.py +24 -2
  33. package/dashboard/mechanism_reader.py +4 -5
  34. package/dashboard/mechanisms.py +2 -2
  35. package/dashboard/pytest.ini +3 -0
  36. package/dashboard/requirements.txt +5 -0
  37. package/dashboard/status/cache_fp_probe.py +40 -0
  38. package/dashboard/status/status_cache.py +199 -72
  39. package/dashboard/status/status_calculator.py +50 -30
  40. package/dashboard/tests/conftest.py +87 -0
  41. package/dashboard/tests/test_api_contracts.py +372 -0
  42. package/dashboard/tests/test_bench_fortify.py +176 -0
  43. package/dashboard/tests/test_fortify.py +952 -0
  44. package/dashboard/utils/__init__.py +1 -0
  45. package/dashboard/utils/data_repair.py +210 -0
  46. package/dashboard/watchers/file_watcher.py +380 -77
  47. package/frontend-dist/assets/{index-cYIOn3Wq.css → index-BIZ2xHfw.css} +1 -1
  48. package/frontend-dist/assets/{index-DyRXGevD.js → index-Cnr0b02R.js} +1 -1
  49. package/frontend-dist/index.html +2 -2
  50. package/openclaw.plugin.json +1 -1
  51. package/package.json +1 -1
  52. package/dashboard/agents.py +0 -74
  53. package/dashboard/collaboration.py +0 -407
  54. package/dashboard/errors.py +0 -63
  55. package/dashboard/performance.py +0 -474
  56. package/dashboard/session_reader.py +0 -240
  57. package/dashboard/status_calculator.py +0 -121
  58. package/dashboard/subagent_reader.py +0 -232
@@ -1,7 +1,6 @@
1
1
  """
2
2
  时序数据读取器 - 将 session jsonl 解析为可视化时序步骤
3
3
  """
4
- import json
5
4
  import logging
6
5
  import os
7
6
  from functools import lru_cache
@@ -92,7 +91,13 @@ class TimelineStep:
92
91
 
93
92
 
94
93
  from data.config_reader import get_openclaw_root, normalize_openclaw_agent_id, agent_ids_equal
95
- from data.session_reader import normalize_sessions_index, resolve_session_jsonl_path
94
+ from data.session_reader import (
95
+ normalize_sessions_index,
96
+ resolve_session_jsonl_path,
97
+ _load_sessions_index_file,
98
+ )
99
+ from data.subagent_reader import load_subagent_runs
100
+ from utils.data_repair import parse_session_jsonl_line
96
101
 
97
102
 
98
103
  def _read_session_header_timestamp(path: Path) -> Optional[int]:
@@ -102,10 +107,10 @@ def _read_session_header_timestamp(path: Path) -> Optional[int]:
102
107
  first = f.readline()
103
108
  if not first.strip():
104
109
  return None
105
- data = json.loads(first.strip())
106
- if data.get('type') == 'session':
107
- return _parse_timestamp(data.get('timestamp', 0))
108
- except (json.JSONDecodeError, OSError, IOError):
110
+ envelope, _ = parse_session_jsonl_line(first.strip())
111
+ if envelope and envelope.get('type') == 'session':
112
+ return _parse_timestamp(envelope.get('timestamp', 0))
113
+ except (OSError, IOError):
109
114
  pass
110
115
  return None
111
116
 
@@ -260,17 +265,11 @@ def get_subagent_runs() -> Dict[str, List[Dict]]:
260
265
 
261
266
  @lru_cache(maxsize=16)
262
267
  def _get_subagent_runs_cached(mtime: float) -> Dict[str, List[Dict]]:
263
- runs_file = get_openclaw_root() / "subagents" / "runs.json"
264
- try:
265
- with open(runs_file, 'r', encoding='utf-8') as f:
266
- data = json.load(f)
267
- except (json.JSONDecodeError, IOError, OSError):
268
- return {}
269
268
  runs_by_agent: Dict[str, List[Dict]] = {}
270
- runs = data.get('runs', {})
271
- for run_id, run_info in runs.items():
269
+ for run_info in load_subagent_runs():
272
270
  if not isinstance(run_info, dict):
273
271
  continue
272
+ run_id = str(run_info.get('runId', ''))
274
273
  child_key = run_info.get('childSessionKey', '')
275
274
  if ':' in child_key:
276
275
  parts = child_key.split(':')
@@ -297,9 +296,11 @@ def _get_requester_info_for_session(agent_id: str, session_key: Optional[str]) -
297
296
  sessions_index = get_openclaw_root() / f"agents/{state_id}/sessions/sessions.json"
298
297
  if sessions_index.exists():
299
298
  try:
300
- with open(sessions_index, 'r', encoding='utf-8') as f:
301
- index_data = json.load(f)
302
- index_map = normalize_sessions_index(index_data)
299
+ index_data = _load_sessions_index_file(sessions_index)
300
+ if not index_data:
301
+ index_map = {}
302
+ else:
303
+ index_map = normalize_sessions_index(index_data)
303
304
  if not session_key:
304
305
  entries = list(index_map.items())
305
306
  if entries:
@@ -328,14 +329,8 @@ def _get_requester_info_for_session(agent_id: str, session_key: Optional[str]) -
328
329
  session_key = runs[0].get('childSessionKey')
329
330
  if not session_key:
330
331
  return {}
331
- runs_file = get_openclaw_root() / "subagents" / "runs.json"
332
- if not runs_file.exists():
333
- return {}
334
332
  try:
335
- with open(runs_file, 'r', encoding='utf-8') as f:
336
- data = json.load(f)
337
- runs = data.get('runs', {})
338
- for run_id, run_info in runs.items():
333
+ for run_info in load_subagent_runs():
339
334
  if not isinstance(run_info, dict):
340
335
  continue
341
336
  child_key = run_info.get('childSessionKey', '')
@@ -612,10 +607,10 @@ def resolve_agent_session_jsonl(
612
607
  index_path = sessions_path / "sessions.json"
613
608
  index_map: Dict[str, Dict[str, Any]] = {}
614
609
  if index_path.exists():
615
- try:
616
- with open(index_path, 'r', encoding='utf-8') as f:
617
- index_map = normalize_sessions_index(json.load(f))
618
- except (json.JSONDecodeError, IOError):
610
+ raw = _load_sessions_index_file(index_path)
611
+ if raw:
612
+ index_map = normalize_sessions_index(raw)
613
+ else:
619
614
  index_map = {}
620
615
 
621
616
  prefix = f"agent:{state_id}:"
@@ -895,17 +890,13 @@ def _extract_subagent_steps_from_main_lines(
895
890
  for line in lines:
896
891
  if '"type":"message"' not in line and '"type": "message"' not in line:
897
892
  continue
898
- try:
899
- data = json.loads(line.strip())
900
- except json.JSONDecodeError:
901
- continue
902
- if data.get('type') != 'message':
893
+ envelope, msg = parse_session_jsonl_line(line)
894
+ if envelope is None or envelope.get('type') != 'message' or msg is None:
903
895
  continue
904
- msg = data.get('message', {})
905
896
  role = msg.get('role')
906
897
  if not role:
907
898
  continue
908
- timestamp = _parse_timestamp(msg.get('timestamp') or data.get('timestamp', 0))
899
+ timestamp = _parse_timestamp(msg.get('timestamp') or envelope.get('timestamp', 0))
909
900
  duration = 0
910
901
  if last_timestamp and timestamp:
911
902
  duration = timestamp - last_timestamp
@@ -1112,21 +1103,19 @@ def _parse_session_lines(
1112
1103
  sender_id = requester_info.get('senderId') if requester_info else None
1113
1104
  sender_name = requester_info.get('senderName') if requester_info else None
1114
1105
  for line in lines:
1115
- try:
1116
- data = json.loads(line.strip())
1117
- except json.JSONDecodeError:
1106
+ envelope, msg = parse_session_jsonl_line(line)
1107
+ if envelope is None:
1118
1108
  continue
1119
- msg_type = data.get('type')
1109
+ msg_type = envelope.get('type')
1120
1110
  if msg_type == 'session':
1121
- started_at = _parse_timestamp(data.get('timestamp', 0))
1111
+ started_at = _parse_timestamp(envelope.get('timestamp', 0))
1122
1112
  continue
1123
- if msg_type != 'message':
1113
+ if msg_type != 'message' or msg is None:
1124
1114
  continue
1125
- msg = data.get('message', {})
1126
1115
  role = msg.get('role')
1127
1116
  if not role:
1128
1117
  continue
1129
- timestamp = _parse_timestamp(msg.get('timestamp') or data.get('timestamp', 0))
1118
+ timestamp = _parse_timestamp(msg.get('timestamp') or envelope.get('timestamp', 0))
1130
1119
  duration = 0
1131
1120
  if last_timestamp and timestamp:
1132
1121
  duration = timestamp - last_timestamp
@@ -1295,13 +1284,10 @@ def _line_index_of_first_user_message(path: Path) -> Optional[int]:
1295
1284
  for i, line in enumerate(f):
1296
1285
  if '"role"' not in line or 'user' not in line:
1297
1286
  continue
1298
- try:
1299
- d = json.loads(line.strip())
1300
- except json.JSONDecodeError:
1301
- continue
1302
- if d.get('type') != 'message':
1287
+ env, msg = parse_session_jsonl_line(line)
1288
+ if env is None or env.get('type') != 'message' or msg is None:
1303
1289
  continue
1304
- if (d.get('message') or {}).get('role') == 'user':
1290
+ if msg.get('role') == 'user':
1305
1291
  return i
1306
1292
  except (OSError, IOError):
1307
1293
  pass
package/dashboard/main.py CHANGED
@@ -13,14 +13,35 @@ import asyncio
13
13
  async def lifespan(app: FastAPI):
14
14
  """应用生命周期:启动时启动文件监听,关闭时停止"""
15
15
  loop = asyncio.get_running_loop()
16
+ probe_stop = None
16
17
  try:
17
18
  from watchers.file_watcher import start_file_watcher
19
+ from core.config_fortify import get_fortify_config
20
+
18
21
  start_file_watcher(loop)
22
+ cfg = get_fortify_config()
23
+ if cfg.cache_preload:
24
+ try:
25
+ from status.status_calculator import get_agents_with_status
26
+
27
+ get_agents_with_status()
28
+ except Exception as e:
29
+ from core.error_handler import record_error
30
+
31
+ record_error("unknown", str(e), "main:cache_preload", exc=e)
32
+ from status.cache_fp_probe import start_cache_fp_probe_background
33
+
34
+ probe_stop = start_cache_fp_probe_background()
19
35
  except Exception as e:
20
- print(f"[Main] 文件监听启动失败: {e}")
36
+ from core.error_handler import record_error
37
+
38
+ record_error("unknown", str(e), "main:file_watcher_start", exc=e)
21
39
  yield
22
40
  try:
41
+ if probe_stop is not None:
42
+ probe_stop.set()
23
43
  from watchers.file_watcher import stop_file_watcher
44
+
24
45
  stop_file_watcher()
25
46
  except Exception:
26
47
  pass
@@ -47,11 +68,12 @@ app.add_middleware(
47
68
  import sys
48
69
  sys.path.append(str(Path(__file__).parent))
49
70
 
50
- from api import agents, subagents, websocket, performance, collaboration, agents_config, errors, timeline, chains, agent_config_api, error_analysis, debug_paths, version
71
+ from api import agents, subagents, websocket, performance, collaboration, agents_config, errors, timeline, chains, agent_config_api, error_analysis, debug_paths, version, fortify_routes
51
72
 
52
73
  # 注册 API 路由
53
74
  app.include_router(agents.router, prefix="/api", tags=["agents"])
54
75
  app.include_router(errors.router, prefix="/api", tags=["errors"])
76
+ app.include_router(fortify_routes.router, prefix="/api", tags=["fortify"])
55
77
  app.include_router(agents_config.router, prefix="/api", tags=["agents-config"])
56
78
  app.include_router(subagents.router, prefix="/api", tags=["subagents"])
57
79
  app.include_router(websocket.router, tags=["websocket"])
@@ -7,7 +7,7 @@ from typing import Dict, Any, List
7
7
 
8
8
 
9
9
  from data.config_reader import get_openclaw_root, normalize_openclaw_agent_id
10
- from data.session_reader import normalize_sessions_index
10
+ from data.session_reader import normalize_sessions_index, _load_sessions_index_file
11
11
 
12
12
 
13
13
  def get_agent_mechanisms(agent_id: str) -> Dict[str, Any]:
@@ -30,9 +30,8 @@ def get_agent_mechanisms(agent_id: str) -> Dict[str, Any]:
30
30
  return result
31
31
 
32
32
  try:
33
- with open(sessions_index, 'r', encoding='utf-8') as f:
34
- data = json.load(f)
35
- if not isinstance(data, dict):
33
+ data = _load_sessions_index_file(sessions_index)
34
+ if not data:
36
35
  return result
37
36
 
38
37
  # 取最新 session 的机制信息(兼容 sessions.json 顶层或 entries 嵌套)
@@ -129,6 +128,6 @@ def get_agent_mechanisms(agent_id: str) -> Dict[str, Any]:
129
128
 
130
129
  def get_all_agents_mechanisms() -> List[Dict[str, Any]]:
131
130
  """获取所有 Agent 的机制使用情况"""
132
- from .config_reader import get_agents_list
131
+ from data.config_reader import get_agents_list
133
132
  agents = get_agents_list()
134
133
  return [get_agent_mechanisms(a.get('id', '')) for a in agents if a.get('id')]
@@ -14,14 +14,14 @@ router = APIRouter()
14
14
  @router.get("/mechanisms")
15
15
  async def get_mechanisms():
16
16
  """获取所有 Agent 的机制使用情况"""
17
- from data.mechanism_reader import get_all_agents_mechanisms
17
+ from mechanism_reader import get_all_agents_mechanisms
18
18
  return get_all_agents_mechanisms()
19
19
 
20
20
 
21
21
  @router.get("/mechanisms/{agent_id}")
22
22
  async def get_agent_mechanisms(agent_id: str):
23
23
  """获取单个 Agent 的机制使用情况"""
24
- from data.mechanism_reader import get_agent_mechanisms
24
+ from mechanism_reader import get_agent_mechanisms
25
25
  from data.config_reader import get_agent_config
26
26
 
27
27
  if not get_agent_config(agent_id):
@@ -0,0 +1,3 @@
1
+ [pytest]
2
+ markers =
3
+ benchmark: 可选性能烟测(对负载敏感,默认与主套件一起跑)
@@ -4,3 +4,8 @@ pydantic==2.11.7
4
4
  python-multipart==0.0.20
5
5
  watchdog>=3.0.0
6
6
  tzdata
7
+ jsonschema>=4.0.0
8
+ psutil>=5.9.0
9
+ pytest>=7.0.0
10
+ pytest-asyncio>=0.23.0
11
+ httpx>=0.27.0
@@ -0,0 +1,40 @@
1
+ """可选后台线程:周期性对 StatusCache 做 mtime 双验证剔除(RISK-004)。"""
2
+ from __future__ import annotations
3
+
4
+ import logging
5
+ import threading
6
+ from typing import Optional
7
+
8
+ _LOG = logging.getLogger("openclaw.fortify.cache_probe")
9
+
10
+
11
+ def start_cache_fp_probe_background() -> Optional[threading.Event]:
12
+ """
13
+ 若 OPENCLAW_CACHE_FP_PROBE_INTERVAL > 0,启动守护线程并返回用于停止的 Event;
14
+ 否则返回 None。
15
+ """
16
+ from core.config_fortify import get_fortify_config
17
+
18
+ interval = get_fortify_config().cache_fp_probe_interval_sec
19
+ if interval <= 0:
20
+ return None
21
+
22
+ stop = threading.Event()
23
+
24
+ def loop() -> None:
25
+ from status.status_cache import get_cache
26
+
27
+ while not stop.is_set():
28
+ if stop.wait(timeout=interval):
29
+ break
30
+ try:
31
+ n = get_cache().invalidate_stale_fp_entries()
32
+ if n:
33
+ _LOG.info("cache_fp_probe removed %s stale cache entries", n)
34
+ except Exception as e:
35
+ from core.error_handler import record_error
36
+
37
+ record_error("unknown", str(e), "cache_fp_probe", exc=e)
38
+
39
+ threading.Thread(target=loop, daemon=True, name="openclaw_cache_fp_probe").start()
40
+ return stop
@@ -2,113 +2,240 @@
2
2
  状态缓存 - 缓存 Agent 状态计算结果
3
3
  通过缓存减少重复的文件读取操作,提升状态计算性能
4
4
  """
5
+ from __future__ import annotations
6
+
7
+ import sys
5
8
  import threading
6
9
  import time
7
- from typing import Dict, Any, Optional
10
+ from pathlib import Path
11
+ from typing import Any, Dict, Optional
8
12
 
13
+ try:
14
+ import psutil
15
+ except ImportError:
16
+ psutil = None # type: ignore
9
17
 
10
- class StatusCache:
11
- """Agent 状态缓存(线程安全)
12
-
13
- 功能:
14
- - 缓存 Agent 状态计算结果
15
- - TTL 机制自动过期
16
- - 文件变更时主动失效缓存
17
- - 线程安全(使用锁保护)
18
- - 最大条目数限制,超出时清理最旧条目
18
+
19
+ def _estimate_payload_size(data: Dict[str, Any]) -> int:
20
+ n = 256
21
+ for k, v in data.items():
22
+ if str(k).startswith("_"):
23
+ continue
24
+ n += sys.getsizeof(k) + sys.getsizeof(v)
25
+ return n
26
+
27
+
28
+ def source_mtimes_for_agent_cache(agent_id: str) -> Dict[str, Optional[float]]:
19
29
  """
20
-
21
- def __init__(self, ttl_ms: int = 1000, max_size: int = 100):
22
- """
23
- 初始化缓存
24
-
25
- Args:
26
- ttl_ms: 缓存过期时间(毫秒),默认 1 秒
27
- max_size: 最大条目数,超出时清理最旧条目,默认 100
28
- """
30
+ 用于缓存「双验证」:与状态计算强相关的源文件 mtime。
31
+ TTL 通过后若任一 mtime 变化则视为 miss(REQ_002-SPEC-06)。
32
+ """
33
+ from data.config_reader import get_openclaw_root, normalize_openclaw_agent_id
34
+
35
+ aid = normalize_openclaw_agent_id(agent_id)
36
+ root = get_openclaw_root()
37
+ out: Dict[str, Optional[float]] = {}
38
+ paths: Dict[str, Path] = {
39
+ "sessions_index": root / "agents" / aid / "sessions" / "sessions.json",
40
+ "subagent_runs": root / "subagents" / "runs.json",
41
+ }
42
+ for key, p in paths.items():
43
+ try:
44
+ out[key] = p.stat().st_mtime if p.is_file() else None
45
+ except OSError:
46
+ out[key] = None
47
+ return out
48
+
49
+
50
+ class StatusCache:
51
+ """Agent 状态缓存(线程安全)"""
52
+
53
+ def __init__(self, ttl_ms: int = 1000, max_size: int = 100, max_memory_mb: int = 100):
29
54
  self.ttl_ms = ttl_ms
30
55
  self.max_size = max_size
56
+ self.max_memory_bytes = max_memory_mb * 1024 * 1024
31
57
  self._cache: Dict[str, Dict[str, Any]] = {}
32
58
  self._lock = threading.Lock()
33
-
59
+ self._hits = 0
60
+ self._misses = 0
61
+ self._evictions = 0
62
+ self._fp_invalidations = 0
63
+ self._stale_fallback_reads = 0
64
+ self.preload_enabled = True
65
+
34
66
  def get(self, agent_id: str) -> Optional[Dict[str, Any]]:
35
- """
36
- 获取缓存的状态
37
-
38
- Args:
39
- agent_id: Agent ID
40
-
41
- Returns:
42
- 缓存的状态数据,未命中或已过期返回 None
43
- """
44
67
  with self._lock:
45
68
  entry = self._cache.get(agent_id)
46
69
  if not entry:
70
+ self._misses += 1
47
71
  return None
48
-
49
- # 检查是否过期
50
72
  now = time.time() * 1000
51
- if now - entry['_timestamp'] > self.ttl_ms:
52
- del self._cache[agent_id]
73
+ if now - entry["_timestamp"] > self.ttl_ms:
74
+ # TTL 逻辑 miss,但保留条目供 IO 降级时 get_stale_fallback(REQ_003-AC-003)
75
+ self._misses += 1
53
76
  return None
54
-
55
- # 返回状态数据(不包含元数据)
56
- return {k: v for k, v in entry.items() if not k.startswith('_')}
57
-
77
+ from core.config_fortify import get_fortify_config
78
+
79
+ if get_fortify_config().cache_double_check:
80
+ fp = entry.get("_fp")
81
+ if fp is not None:
82
+ current = source_mtimes_for_agent_cache(agent_id)
83
+ if current != fp:
84
+ del self._cache[agent_id]
85
+ self._misses += 1
86
+ self._fp_invalidations += 1
87
+ return None
88
+ self._hits += 1
89
+ entry["_last_access"] = now
90
+ return {k: v for k, v in entry.items() if not str(k).startswith("_")}
91
+
92
+ def get_stale_fallback(self, agent_id: str) -> Optional[Dict[str, Any]]:
93
+ """忽略 TTL 与 mtime 双验证,返回仍驻留在缓存中的最近一条数据(降级读)。"""
94
+ with self._lock:
95
+ entry = self._cache.get(agent_id)
96
+ if not entry:
97
+ return None
98
+ self._stale_fallback_reads += 1
99
+ now = time.time() * 1000
100
+ entry["_last_access"] = now
101
+ return {k: v for k, v in entry.items() if not str(k).startswith("_")}
102
+
58
103
  def set(self, agent_id: str, data: Dict[str, Any]) -> None:
59
- """
60
- 设置缓存
61
-
62
- Args:
63
- agent_id: Agent ID
64
- data: 状态数据
65
- """
66
104
  with self._lock:
67
- # 限制缓存大小:超出时删除最旧的条目
68
105
  if len(self._cache) >= self.max_size and agent_id not in self._cache:
69
- oldest_key = min(
70
- self._cache.keys(),
71
- key=lambda k: self._cache[k].get('_timestamp', 0)
72
- )
73
- del self._cache[oldest_key]
74
-
106
+ self._evict_oldest(exclude=agent_id)
107
+
108
+ now = time.time() * 1000
109
+ est = _estimate_payload_size(data)
110
+ from core.config_fortify import get_fortify_config
111
+
112
+ fp: Optional[Dict[str, Optional[float]]] = None
113
+ if get_fortify_config().cache_double_check:
114
+ fp = source_mtimes_for_agent_cache(agent_id)
75
115
  self._cache[agent_id] = {
76
116
  **data,
77
- '_timestamp': time.time() * 1000
117
+ "_timestamp": now,
118
+ "_last_access": now,
119
+ "_est_bytes": est,
120
+ **({"_fp": fp} if fp is not None else {}),
78
121
  }
79
-
122
+ self._enforce_memory(agent_id)
123
+
124
+ def _evict_oldest(self, exclude: Optional[str] = None) -> None:
125
+ keys = [k for k in self._cache if k != exclude]
126
+ if not keys:
127
+ return
128
+ oldest_key = min(
129
+ keys,
130
+ key=lambda k: self._cache[k].get("_last_access", self._cache[k].get("_timestamp", 0)),
131
+ )
132
+ del self._cache[oldest_key]
133
+ self._evictions += 1
134
+
135
+ def _total_estimated_bytes(self) -> int:
136
+ return int(sum(self._cache[k].get("_est_bytes", 0) for k in self._cache))
137
+
138
+ def _enforce_memory(self, protect_key: Optional[str] = None) -> None:
139
+ while self._total_estimated_bytes() > self.max_memory_bytes and len(self._cache) > 1:
140
+ self._evict_oldest(exclude=protect_key)
141
+ if protect_key and len(self._cache) == 1:
142
+ break
143
+
80
144
  def invalidate(self, agent_id: Optional[str] = None) -> None:
81
- """
82
- 失效缓存
83
-
84
- Args:
85
- agent_id: 指定 Agent ID,None 表示清空所有
86
- """
87
145
  with self._lock:
88
146
  if agent_id:
89
147
  self._cache.pop(agent_id, None)
90
148
  else:
91
149
  self._cache.clear()
92
-
93
- def get_stats(self) -> Dict[str, Any]:
150
+
151
+ def invalidate_stale_fp_entries(self) -> int:
94
152
  """
95
- 获取缓存统计信息
96
-
97
- Returns:
98
- {'size': int, 'ttl_ms': int, 'max_size': int}
153
+ 后台探针:对仍在 TTL 内的条目比对 mtime 指纹,不一致则剔除(RISK-004 / NFR-R-004)。
154
+ 与 get() 内双验证逻辑一致,适用于长时间无请求时的最终一致补强。
99
155
  """
156
+ from core.config_fortify import get_fortify_config
157
+
158
+ if not get_fortify_config().cache_double_check:
159
+ return 0
160
+ invalidated = 0
100
161
  with self._lock:
162
+ agent_ids = list(self._cache.keys())
163
+ now_ms = time.time() * 1000
164
+ for agent_id in agent_ids:
165
+ with self._lock:
166
+ entry = self._cache.get(agent_id)
167
+ if not entry:
168
+ continue
169
+ if now_ms - entry["_timestamp"] > self.ttl_ms:
170
+ continue
171
+ fp = entry.get("_fp")
172
+ if fp is None:
173
+ continue
174
+ current = source_mtimes_for_agent_cache(agent_id)
175
+ if current == fp:
176
+ continue
177
+ with self._lock:
178
+ entry2 = self._cache.get(agent_id)
179
+ if entry2 and entry2.get("_fp") == fp:
180
+ del self._cache[agent_id]
181
+ self._fp_invalidations += 1
182
+ invalidated += 1
183
+ return invalidated
184
+
185
+ def get_stats(self) -> Dict[str, Any]:
186
+ from core.config_fortify import get_fortify_config
187
+
188
+ cfg = get_fortify_config()
189
+ dbl = cfg.cache_double_check
190
+ with self._lock:
191
+ total = self._hits + self._misses
192
+ hit_rate = (self._hits / total) if total else 0.0
193
+ rss_mb = None
194
+ if psutil:
195
+ try:
196
+ rss_mb = round(psutil.Process().memory_info().rss / (1024 * 1024), 2)
197
+ except Exception:
198
+ pass
101
199
  return {
102
- 'size': len(self._cache),
103
- 'ttl_ms': self.ttl_ms,
104
- 'max_size': self.max_size
200
+ "size": len(self._cache),
201
+ "max_size": self.max_size,
202
+ "memory_usage_mb": round(self._total_estimated_bytes() / (1024 * 1024), 3),
203
+ "memory_estimate_mb": round(self._total_estimated_bytes() / (1024 * 1024), 3),
204
+ "max_memory_mb": round(self.max_memory_bytes / (1024 * 1024), 2),
205
+ "process_rss_mb": rss_mb,
206
+ "hit_rate": round(hit_rate, 4),
207
+ "ttl_seconds": self.ttl_ms / 1000.0,
208
+ "preload_enabled": self.preload_enabled,
209
+ "cache_double_check": dbl,
210
+ "fp_probe_interval_sec": cfg.cache_fp_probe_interval_sec,
211
+ "stats": {
212
+ "hits": self._hits,
213
+ "misses": self._misses,
214
+ "evictions": self._evictions,
215
+ "fp_invalidations": self._fp_invalidations,
216
+ "stale_fallback_reads": self._stale_fallback_reads,
217
+ },
105
218
  }
106
219
 
107
220
 
108
- # 全局单例
109
- _cache = StatusCache(ttl_ms=1000)
221
+ _cache_instance: Optional[StatusCache] = None
110
222
 
111
223
 
112
224
  def get_cache() -> StatusCache:
113
- """获取全局缓存实例"""
114
- return _cache
225
+ global _cache_instance
226
+ if _cache_instance is None:
227
+ from core.config_fortify import get_fortify_config
228
+
229
+ c = get_fortify_config()
230
+ _cache_instance = StatusCache(
231
+ ttl_ms=c.cache_ttl_seconds * 1000,
232
+ max_size=c.cache_max_entries,
233
+ max_memory_mb=c.cache_max_memory_mb,
234
+ )
235
+ _cache_instance.preload_enabled = c.cache_preload
236
+ return _cache_instance
237
+
238
+
239
+ def reset_cache_for_tests() -> None:
240
+ global _cache_instance
241
+ _cache_instance = None