npm - openclaw-agent-dashboard - Versions diffs - 1.0.39 → 1.0.40 - Mend

openclaw-agent-dashboard 1.0.39 → 1.0.40

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (54) hide show

package/dashboard/api/agent_config_api.py +28 -7
package/dashboard/api/agents.py +48 -10
package/dashboard/api/agents_config.py +5 -1
package/dashboard/api/chains.py +25 -5
package/dashboard/api/collaboration.py +10 -9
package/dashboard/api/debug_paths.py +5 -1
package/dashboard/api/error_analysis.py +29 -11
package/dashboard/api/errors.py +27 -11
package/dashboard/api/fortify_routes.py +80 -0
package/dashboard/api/input_safety.py +60 -0
package/dashboard/api/performance.py +73 -53
package/dashboard/api/subagents.py +95 -99
package/dashboard/api/timeline.py +24 -3
package/dashboard/api/version.py +2 -0
package/dashboard/api/websocket.py +9 -7
package/dashboard/core/__init__.py +1 -0
package/dashboard/core/config_fortify.py +112 -0
package/dashboard/core/error_handler.py +339 -0
package/dashboard/core/fallback_manager.py +70 -0
package/dashboard/core/safe_api_error.py +76 -0
package/dashboard/core/schemas/__init__.py +16 -0
package/dashboard/core/schemas/base.py +43 -0
package/dashboard/core/schemas/session_schema.py +40 -0
package/dashboard/core/schemas/subagent_schema.py +23 -0
package/dashboard/data/agent_config_manager.py +6 -4
package/dashboard/data/chain_reader.py +16 -12
package/dashboard/data/error_analyzer.py +15 -11
package/dashboard/data/session_reader.py +268 -46
package/dashboard/data/subagent_reader.py +74 -49
package/dashboard/data/timeline_reader.py +35 -49
package/dashboard/main.py +24 -2
package/dashboard/mechanism_reader.py +4 -5
package/dashboard/mechanisms.py +2 -2
package/dashboard/pytest.ini +3 -0
package/dashboard/requirements.txt +5 -0
package/dashboard/status/cache_fp_probe.py +40 -0
package/dashboard/status/status_cache.py +199 -72
package/dashboard/status/status_calculator.py +50 -30
package/dashboard/tests/conftest.py +84 -0
package/dashboard/tests/test_api_contracts.py +372 -0
package/dashboard/tests/test_bench_fortify.py +176 -0
package/dashboard/tests/test_fortify.py +741 -0
package/dashboard/utils/__init__.py +1 -0
package/dashboard/utils/data_repair.py +210 -0
package/dashboard/watchers/file_watcher.py +367 -77
package/openclaw.plugin.json +1 -1
package/package.json +1 -1
package/dashboard/agents.py +0 -74
package/dashboard/collaboration.py +0 -407
package/dashboard/errors.py +0 -63
package/dashboard/performance.py +0 -474
package/dashboard/session_reader.py +0 -240
package/dashboard/status_calculator.py +0 -121
package/dashboard/subagent_reader.py +0 -232

package/dashboard/status/status_cache.py CHANGED Viewed

@@ -2,113 +2,240 @@
 状态缓存 - 缓存 Agent 状态计算结果
 通过缓存减少重复的文件读取操作，提升状态计算性能
 """
+from __future__ import annotations
+import sys
 import threading
 import time
-from typing import Dict, Any, Optional
+from pathlib import Path
+from typing import Any, Dict, Optional
+try:
+    import psutil
+except ImportError:
+    psutil = None  # type: ignore
-class StatusCache:
-    """Agent 状态缓存（线程安全）
-    功能：
-    - 缓存 Agent 状态计算结果
-    - TTL 机制自动过期
-    - 文件变更时主动失效缓存
-    - 线程安全（使用锁保护）
-    - 最大条目数限制，超出时清理最旧条目
+def _estimate_payload_size(data: Dict[str, Any]) -> int:
+    n = 256
+    for k, v in data.items():
+        if str(k).startswith("_"):
+            continue
+        n += sys.getsizeof(k) + sys.getsizeof(v)
+    return n
+def source_mtimes_for_agent_cache(agent_id: str) -> Dict[str, Optional[float]]:
     """
-    def __init__(self, ttl_ms: int = 1000, max_size: int = 100):
-        """
-        初始化缓存
-        Args:
-            ttl_ms: 缓存过期时间（毫秒），默认 1 秒
-            max_size: 最大条目数，超出时清理最旧条目，默认 100
-        """
+    用于缓存「双验证」：与状态计算强相关的源文件 mtime。
+    TTL 通过后若任一 mtime 变化则视为 miss（REQ_002-SPEC-06）。
+    """
+    from data.config_reader import get_openclaw_root, normalize_openclaw_agent_id
+    aid = normalize_openclaw_agent_id(agent_id)
+    root = get_openclaw_root()
+    out: Dict[str, Optional[float]] = {}
+    paths: Dict[str, Path] = {
+        "sessions_index": root / "agents" / aid / "sessions" / "sessions.json",
+        "subagent_runs": root / "subagents" / "runs.json",
+    }
+    for key, p in paths.items():
+        try:
+            out[key] = p.stat().st_mtime if p.is_file() else None
+        except OSError:
+            out[key] = None
+    return out
+class StatusCache:
+    """Agent 状态缓存（线程安全）"""
+    def __init__(self, ttl_ms: int = 1000, max_size: int = 100, max_memory_mb: int = 100):
         self.ttl_ms = ttl_ms
         self.max_size = max_size
+        self.max_memory_bytes = max_memory_mb * 1024 * 1024
         self._cache: Dict[str, Dict[str, Any]] = {}
         self._lock = threading.Lock()
+        self._hits = 0
+        self._misses = 0
+        self._evictions = 0
+        self._fp_invalidations = 0
+        self._stale_fallback_reads = 0
+        self.preload_enabled = True
     def get(self, agent_id: str) -> Optional[Dict[str, Any]]:
-        """
-        获取缓存的状态
-        Args:
-            agent_id: Agent ID
-        Returns:
-            缓存的状态数据，未命中或已过期返回 None
-        """
         with self._lock:
             entry = self._cache.get(agent_id)
             if not entry:
+                self._misses += 1
                 return None
-            # 检查是否过期
             now = time.time() * 1000
-            if now - entry['_timestamp'] > self.ttl_ms:
-                del self._cache[agent_id]
+            if now - entry["_timestamp"] > self.ttl_ms:
+                # TTL 逻辑 miss，但保留条目供 IO 降级时 get_stale_fallback（REQ_003-AC-003）
+                self._misses += 1
                 return None
-            # 返回状态数据（不包含元数据）
-            return {k: v for k, v in entry.items() if not k.startswith('_')}
+            from core.config_fortify import get_fortify_config
+            if get_fortify_config().cache_double_check:
+                fp = entry.get("_fp")
+                if fp is not None:
+                    current = source_mtimes_for_agent_cache(agent_id)
+                    if current != fp:
+                        del self._cache[agent_id]
+                        self._misses += 1
+                        self._fp_invalidations += 1
+                        return None
+            self._hits += 1
+            entry["_last_access"] = now
+            return {k: v for k, v in entry.items() if not str(k).startswith("_")}
+    def get_stale_fallback(self, agent_id: str) -> Optional[Dict[str, Any]]:
+        """忽略 TTL 与 mtime 双验证，返回仍驻留在缓存中的最近一条数据（降级读）。"""
+        with self._lock:
+            entry = self._cache.get(agent_id)
+            if not entry:
+                return None
+            self._stale_fallback_reads += 1
+            now = time.time() * 1000
+            entry["_last_access"] = now
+            return {k: v for k, v in entry.items() if not str(k).startswith("_")}
     def set(self, agent_id: str, data: Dict[str, Any]) -> None:
-        """
-        设置缓存
-        Args:
-            agent_id: Agent ID
-            data: 状态数据
-        """
         with self._lock:
-            # 限制缓存大小：超出时删除最旧的条目
             if len(self._cache) >= self.max_size and agent_id not in self._cache:
-                oldest_key = min(
-                    self._cache.keys(),
-                    key=lambda k: self._cache[k].get('_timestamp', 0)
-                )
-                del self._cache[oldest_key]
+                self._evict_oldest(exclude=agent_id)
+            now = time.time() * 1000
+            est = _estimate_payload_size(data)
+            from core.config_fortify import get_fortify_config
+            fp: Optional[Dict[str, Optional[float]]] = None
+            if get_fortify_config().cache_double_check:
+                fp = source_mtimes_for_agent_cache(agent_id)
             self._cache[agent_id] = {
                 **data,
-                '_timestamp': time.time() * 1000
+                "_timestamp": now,
+                "_last_access": now,
+                "_est_bytes": est,
+                **({"_fp": fp} if fp is not None else {}),
             }
+            self._enforce_memory(agent_id)
+    def _evict_oldest(self, exclude: Optional[str] = None) -> None:
+        keys = [k for k in self._cache if k != exclude]
+        if not keys:
+            return
+        oldest_key = min(
+            keys,
+            key=lambda k: self._cache[k].get("_last_access", self._cache[k].get("_timestamp", 0)),
+        )
+        del self._cache[oldest_key]
+        self._evictions += 1
+    def _total_estimated_bytes(self) -> int:
+        return int(sum(self._cache[k].get("_est_bytes", 0) for k in self._cache))
+    def _enforce_memory(self, protect_key: Optional[str] = None) -> None:
+        while self._total_estimated_bytes() > self.max_memory_bytes and len(self._cache) > 1:
+            self._evict_oldest(exclude=protect_key)
+            if protect_key and len(self._cache) == 1:
+                break
     def invalidate(self, agent_id: Optional[str] = None) -> None:
-        """
-        失效缓存
-        Args:
-            agent_id: 指定 Agent ID，None 表示清空所有
-        """
         with self._lock:
             if agent_id:
                 self._cache.pop(agent_id, None)
             else:
                 self._cache.clear()
-    def get_stats(self) -> Dict[str, Any]:
+    def invalidate_stale_fp_entries(self) -> int:
         """
-        获取缓存统计信息
-        Returns:
-            {'size': int, 'ttl_ms': int, 'max_size': int}
+        后台探针：对仍在 TTL 内的条目比对 mtime 指纹，不一致则剔除（RISK-004 / NFR-R-004）。
+        与 get() 内双验证逻辑一致，适用于长时间无请求时的最终一致补强。
         """
+        from core.config_fortify import get_fortify_config
+        if not get_fortify_config().cache_double_check:
+            return 0
+        invalidated = 0
         with self._lock:
+            agent_ids = list(self._cache.keys())
+        now_ms = time.time() * 1000
+        for agent_id in agent_ids:
+            with self._lock:
+                entry = self._cache.get(agent_id)
+                if not entry:
+                    continue
+                if now_ms - entry["_timestamp"] > self.ttl_ms:
+                    continue
+                fp = entry.get("_fp")
+                if fp is None:
+                    continue
+            current = source_mtimes_for_agent_cache(agent_id)
+            if current == fp:
+                continue
+            with self._lock:
+                entry2 = self._cache.get(agent_id)
+                if entry2 and entry2.get("_fp") == fp:
+                    del self._cache[agent_id]
+                    self._fp_invalidations += 1
+                    invalidated += 1
+        return invalidated
+    def get_stats(self) -> Dict[str, Any]:
+        from core.config_fortify import get_fortify_config
+        cfg = get_fortify_config()
+        dbl = cfg.cache_double_check
+        with self._lock:
+            total = self._hits + self._misses
+            hit_rate = (self._hits / total) if total else 0.0
+            rss_mb = None
+            if psutil:
+                try:
+                    rss_mb = round(psutil.Process().memory_info().rss / (1024 * 1024), 2)
+                except Exception:
+                    pass
             return {
-                'size': len(self._cache),
-                'ttl_ms': self.ttl_ms,
-                'max_size': self.max_size
+                "size": len(self._cache),
+                "max_size": self.max_size,
+                "memory_usage_mb": round(self._total_estimated_bytes() / (1024 * 1024), 3),
+                "memory_estimate_mb": round(self._total_estimated_bytes() / (1024 * 1024), 3),
+                "max_memory_mb": round(self.max_memory_bytes / (1024 * 1024), 2),
+                "process_rss_mb": rss_mb,
+                "hit_rate": round(hit_rate, 4),
+                "ttl_seconds": self.ttl_ms / 1000.0,
+                "preload_enabled": self.preload_enabled,
+                "cache_double_check": dbl,
+                "fp_probe_interval_sec": cfg.cache_fp_probe_interval_sec,
+                "stats": {
+                    "hits": self._hits,
+                    "misses": self._misses,
+                    "evictions": self._evictions,
+                    "fp_invalidations": self._fp_invalidations,
+                    "stale_fallback_reads": self._stale_fallback_reads,
+                },
             }
-# 全局单例
-_cache = StatusCache(ttl_ms=1000)
+_cache_instance: Optional[StatusCache] = None
 def get_cache() -> StatusCache:
-    """获取全局缓存实例"""
-    return _cache
+    global _cache_instance
+    if _cache_instance is None:
+        from core.config_fortify import get_fortify_config
+        c = get_fortify_config()
+        _cache_instance = StatusCache(
+            ttl_ms=c.cache_ttl_seconds * 1000,
+            max_size=c.cache_max_entries,
+            max_memory_mb=c.cache_max_memory_mb,
+        )
+        _cache_instance.preload_enabled = c.cache_preload
+    return _cache_instance
+def reset_cache_for_tests() -> None:
+    global _cache_instance
+    _cache_instance = None

package/dashboard/status/status_calculator.py CHANGED Viewed

@@ -77,48 +77,68 @@ def calculate_agent_status(agent_id: str, use_cache: bool = True) -> AgentStatus
         cached = cache.get(agent_id)
         if cached and 'status' in cached:
             return cached['status']
-    # 重新计算
-    # 检查异常
-    if has_recent_errors(agent_id, minutes=5):
-        status = 'down'
-    # 检查工作中：subagent run 未结束（与连线 activePath 同源）
-    elif is_agent_working(agent_id):
-        status = 'working'
-    elif _main_agent_solo_processing(agent_id):
-        status = 'working'
-    else:
-        # 默认空闲
-        status = 'idle'
+    try:
+        # 重新计算
+        if has_recent_errors(agent_id, minutes=5):
+            status = 'down'
+        elif is_agent_working(agent_id):
+            status = 'working'
+        elif _main_agent_solo_processing(agent_id):
+            status = 'working'
+        else:
+            status = 'idle'
+    except OSError as e:
+        from core.error_handler import classify_exception, record_error
+        from core.fallback_manager import run_fallback
+        cat = classify_exception(e)
+        record_error(cat, str(e), f"status_calculator:calculate:{agent_id}", exc=e)
+        fb = run_fallback(cat, agent_id=agent_id)
+        if fb is not None:
+            return fb  # type: ignore[return-value]
+        return 'idle'
     # 更新缓存（只缓存状态）
     if use_cache:
         cache = get_cache()
         cache.set(agent_id, {'status': status})
     return status
 def get_agents_with_status() -> list:
     """获取所有 Agent 及其状态"""
-    agents = get_agents_list()
+    try:
+        agents = get_agents_list()
+    except OSError as e:
+        from core.error_handler import classify_exception, record_error
+        record_error(classify_exception(e), str(e), "get_agents_with_status:list", exc=e)
+        return []
     result = []
     for agent in agents:
         agent_id = agent.get('id')
-        status = calculate_agent_status(agent_id)
-        # 获取当前任务（仅工作中展示；空闲时不应残留已结束 run 的文案）
-        current_task = get_current_task(agent_id)
-        if status == 'idle':
+        try:
+            status = calculate_agent_status(agent_id)
+            current_task = get_current_task(agent_id)
+            if status == 'idle':
+                current_task = ''
+            last_active = get_last_active_time(agent_id)
+            last_error = get_last_error(agent_id) if status == 'down' else None
+        except OSError as e:
+            from core.error_handler import classify_exception, record_error
+            from core.fallback_manager import run_fallback
+            cat = classify_exception(e)
+            record_error(cat, str(e), f"get_agents_with_status:{agent_id}", exc=e)
+            status = run_fallback(cat, agent_id=agent_id) or 'idle'
             current_task = ''
-        # 获取最后活跃时间
-        last_active = get_last_active_time(agent_id)
-        # 获取错误信息
-        last_error = get_last_error(agent_id) if status == 'down' else None
+            last_active = 0
+            last_error = None
         result.append({
             'id': agent_id,
             'name': agent.get('name'),
@@ -128,7 +148,7 @@ def get_agents_with_status() -> list:
             'lastActiveAt': last_active,
             'error': last_error
         })
     return result

package/dashboard/tests/conftest.py ADDED Viewed

@@ -0,0 +1,84 @@
+"""Shared pytest fixtures for backend tests."""
+from __future__ import annotations
+import json
+import sys
+from pathlib import Path
+import pytest
+BACKEND = Path(__file__).resolve().parent.parent
+sys.path.insert(0, str(BACKEND))
+@pytest.fixture(autouse=True)
+def reset_fortify_state():
+    """Reset all fortify singletons between tests."""
+    from core.config_fortify import refresh_fortify_config_cache
+    from core.fallback_manager import reset_fallback_handlers_for_tests
+    from status.status_cache import reset_cache_for_tests
+    reset_cache_for_tests()
+    reset_fallback_handlers_for_tests()
+    refresh_fortify_config_cache()
+    yield
+    reset_cache_for_tests()
+    reset_fallback_handlers_for_tests()
+    refresh_fortify_config_cache()
+@pytest.fixture
+def fake_openclaw_root(tmp_path: Path):
+    """Minimal fake openclaw root with sessions.json index and JSONL fixtures."""
+    root = tmp_path / ".openclaw"
+    root.mkdir(parents=True, exist_ok=True)
+    agents_dir = root / "agents"
+    agents_dir.mkdir(exist_ok=True)
+    main_agent = agents_dir / "main"
+    main_agent.mkdir(exist_ok=True)
+    # sessions/ subdirectory (canonical path: agents/<id>/sessions/)
+    sessions_dir = main_agent / "sessions"
+    sessions_dir.mkdir(exist_ok=True)
+    # sessions.json index
+    sessions_index = {
+        "sessions": [
+            {
+                "id": "session-001",
+                "status": "active",
+                "updatedAt": 1746000000,
+                "turns": 3,
+            },
+            {
+                "id": "session-002",
+                "status": "completed",
+                "updatedAt": 1745900000,
+                "turns": 7,
+            },
+        ]
+    }
+    sessions_file = sessions_dir / "sessions.json"
+    sessions_file.write_text(json.dumps(sessions_index))
+    # JSONL session file (in sessions/ subdirectory)
+    session_jsonl = sessions_dir / "session-001.jsonl"
+    messages = [
+        {"type": "start", "sessionId": "session-001", "timestamp": 1746000000},
+        {"type": "message", "message": {"role": "user", "content": [{"type": "text", "text": "hello"}]}},
+        {"type": "message", "message": {"role": "assistant", "content": [{"type": "text", "text": "hi"}]}},
+    ]
+    session_jsonl.write_text("\n".join(json.dumps(m) for m in messages) + "\n")
+    # JSONL with repaired line (trailing comma) — CA-003 fixture
+    session_with_bad = sessions_dir / "session-002.jsonl"
+    bad_messages = [
+        json.dumps({"type": "start", "sessionId": "session-002"}),
+        '{"type": "message", "message": {"role": "user", "content": [{"type": "text", "text": "test"}]}}',
+        '{"type": "end", "sessionId": "session-002", "status": "ok"}',
+    ]
+    session_with_bad.write_text("\n".join(bad_messages) + "\n")
+    return root