openclaw-agent-dashboard 1.0.39 → 1.0.40
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dashboard/api/agent_config_api.py +28 -7
- package/dashboard/api/agents.py +48 -10
- package/dashboard/api/agents_config.py +5 -1
- package/dashboard/api/chains.py +25 -5
- package/dashboard/api/collaboration.py +10 -9
- package/dashboard/api/debug_paths.py +5 -1
- package/dashboard/api/error_analysis.py +29 -11
- package/dashboard/api/errors.py +27 -11
- package/dashboard/api/fortify_routes.py +80 -0
- package/dashboard/api/input_safety.py +60 -0
- package/dashboard/api/performance.py +73 -53
- package/dashboard/api/subagents.py +95 -99
- package/dashboard/api/timeline.py +24 -3
- package/dashboard/api/version.py +2 -0
- package/dashboard/api/websocket.py +9 -7
- package/dashboard/core/__init__.py +1 -0
- package/dashboard/core/config_fortify.py +112 -0
- package/dashboard/core/error_handler.py +339 -0
- package/dashboard/core/fallback_manager.py +70 -0
- package/dashboard/core/safe_api_error.py +76 -0
- package/dashboard/core/schemas/__init__.py +16 -0
- package/dashboard/core/schemas/base.py +43 -0
- package/dashboard/core/schemas/session_schema.py +40 -0
- package/dashboard/core/schemas/subagent_schema.py +23 -0
- package/dashboard/data/agent_config_manager.py +6 -4
- package/dashboard/data/chain_reader.py +16 -12
- package/dashboard/data/error_analyzer.py +15 -11
- package/dashboard/data/session_reader.py +268 -46
- package/dashboard/data/subagent_reader.py +74 -49
- package/dashboard/data/timeline_reader.py +35 -49
- package/dashboard/main.py +24 -2
- package/dashboard/mechanism_reader.py +4 -5
- package/dashboard/mechanisms.py +2 -2
- package/dashboard/pytest.ini +3 -0
- package/dashboard/requirements.txt +5 -0
- package/dashboard/status/cache_fp_probe.py +40 -0
- package/dashboard/status/status_cache.py +199 -72
- package/dashboard/status/status_calculator.py +50 -30
- package/dashboard/tests/conftest.py +84 -0
- package/dashboard/tests/test_api_contracts.py +372 -0
- package/dashboard/tests/test_bench_fortify.py +176 -0
- package/dashboard/tests/test_fortify.py +741 -0
- package/dashboard/utils/__init__.py +1 -0
- package/dashboard/utils/data_repair.py +210 -0
- package/dashboard/watchers/file_watcher.py +367 -77
- package/openclaw.plugin.json +1 -1
- package/package.json +1 -1
- package/dashboard/agents.py +0 -74
- package/dashboard/collaboration.py +0 -407
- package/dashboard/errors.py +0 -63
- package/dashboard/performance.py +0 -474
- package/dashboard/session_reader.py +0 -240
- package/dashboard/status_calculator.py +0 -121
- package/dashboard/subagent_reader.py +0 -232
|
@@ -1,43 +1,35 @@
|
|
|
1
1
|
"""
|
|
2
2
|
文件变更监听 - 关键文件变更时触发 WebSocket 推送
|
|
3
|
-
使用 watchdog
|
|
4
|
-
集成缓存失效机制,确保状态一致性
|
|
3
|
+
使用 watchdog 监听;失败时重试并降级为轮询;集成缓存失效
|
|
5
4
|
"""
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
import logging
|
|
6
9
|
import threading
|
|
7
10
|
import time
|
|
11
|
+
from datetime import datetime, timezone
|
|
8
12
|
from pathlib import Path
|
|
9
|
-
from typing import Callable, Optional
|
|
13
|
+
from typing import Any, Callable, Dict, Optional
|
|
14
|
+
|
|
15
|
+
_LOG = logging.getLogger("openclaw.fortify.watcher")
|
|
16
|
+
|
|
17
|
+
from core.config_fortify import get_fortify_config
|
|
18
|
+
from core.error_handler import record_error
|
|
19
|
+
|
|
20
|
+
DEBOUNCE_SECONDS = 0.3
|
|
10
21
|
|
|
11
22
|
|
|
12
23
|
def _extract_agent_id_from_path(filepath: str) -> Optional[str]:
|
|
13
|
-
"""从文件路径中提取 Agent ID(跨平台兼容)
|
|
14
|
-
|
|
15
|
-
Args:
|
|
16
|
-
filepath: 文件路径(Unix 或 Windows 风格)
|
|
17
|
-
|
|
18
|
-
Returns:
|
|
19
|
-
Agent ID,无法解析时返回 None
|
|
20
|
-
|
|
21
|
-
Examples:
|
|
22
|
-
/path/to/.openclaw/agents/main/sessions/xxx.jsonl -> main
|
|
23
|
-
C:\\path\\to\\.openclaw\\agents\\main\\sessions\\xxx.jsonl -> main
|
|
24
|
-
"""
|
|
25
24
|
try:
|
|
26
25
|
path = Path(filepath)
|
|
27
26
|
parts = path.parts
|
|
28
|
-
|
|
29
|
-
# 查找 'agents' 目录的位置
|
|
30
27
|
try:
|
|
31
|
-
agents_idx = parts.index(
|
|
28
|
+
agents_idx = parts.index("agents")
|
|
32
29
|
except ValueError:
|
|
33
30
|
return None
|
|
34
|
-
|
|
35
|
-
# 检查结构: .../agents/{agent_id}/sessions/...
|
|
36
|
-
# agents_idx + 1 = agent_id
|
|
37
|
-
# agents_idx + 2 = 'sessions'
|
|
38
|
-
if agents_idx + 2 < len(parts) and parts[agents_idx + 2] == 'sessions':
|
|
31
|
+
if agents_idx + 2 < len(parts) and parts[agents_idx + 2] == "sessions":
|
|
39
32
|
return parts[agents_idx + 1]
|
|
40
|
-
|
|
41
33
|
return None
|
|
42
34
|
except Exception:
|
|
43
35
|
return None
|
|
@@ -45,12 +37,11 @@ def _extract_agent_id_from_path(filepath: str) -> Optional[str]:
|
|
|
45
37
|
|
|
46
38
|
def _get_openclaw_dir() -> Path:
|
|
47
39
|
from data.config_reader import get_openclaw_root
|
|
40
|
+
|
|
48
41
|
return get_openclaw_root()
|
|
49
|
-
DEBOUNCE_SECONDS = 0.3 # 同一文件短时间多次变更只触发一次
|
|
50
42
|
|
|
51
43
|
|
|
52
44
|
def _get_watch_dirs() -> list[tuple[Path, bool]]:
|
|
53
|
-
"""获取需要监听的目录列表 (path, recursive)"""
|
|
54
45
|
dirs: list[tuple[Path, bool]] = []
|
|
55
46
|
openclaw_dir = _get_openclaw_dir()
|
|
56
47
|
subagents = openclaw_dir / "subagents"
|
|
@@ -58,14 +49,15 @@ def _get_watch_dirs() -> list[tuple[Path, bool]]:
|
|
|
58
49
|
dirs.append((subagents, False))
|
|
59
50
|
try:
|
|
60
51
|
from data.task_history import get_dashboard_data_dir
|
|
52
|
+
|
|
61
53
|
dashboard_data = get_dashboard_data_dir()
|
|
62
54
|
if dashboard_data.exists():
|
|
63
55
|
dirs.append((dashboard_data, False))
|
|
64
56
|
except Exception:
|
|
65
57
|
pass
|
|
66
|
-
# workspace/*/memory: 从配置读取,或回退到 workspace-main
|
|
67
58
|
try:
|
|
68
59
|
from data.config_reader import get_workspace_paths
|
|
60
|
+
|
|
69
61
|
for ws in get_workspace_paths():
|
|
70
62
|
memory = ws / "memory"
|
|
71
63
|
if memory.exists():
|
|
@@ -101,8 +93,6 @@ class DebouncedHandler:
|
|
|
101
93
|
if self._timer:
|
|
102
94
|
self._timer.cancel()
|
|
103
95
|
self._timer = None
|
|
104
|
-
|
|
105
|
-
# 保存文件路径
|
|
106
96
|
if filepath:
|
|
107
97
|
self._pending_path = filepath
|
|
108
98
|
|
|
@@ -115,7 +105,7 @@ class DebouncedHandler:
|
|
|
115
105
|
try:
|
|
116
106
|
self.callback(path)
|
|
117
107
|
except Exception as e:
|
|
118
|
-
|
|
108
|
+
record_error("unknown", str(e), "file_watcher_debounce")
|
|
119
109
|
|
|
120
110
|
if now - self._last_trigger < self.debounce_sec:
|
|
121
111
|
self._timer = threading.Timer(self.debounce_sec - (now - self._last_trigger), do_callback)
|
|
@@ -126,61 +116,160 @@ class DebouncedHandler:
|
|
|
126
116
|
|
|
127
117
|
|
|
128
118
|
_observer = None
|
|
129
|
-
_handler = None
|
|
119
|
+
_handler: Optional[DebouncedHandler] = None
|
|
120
|
+
_event_loop = None
|
|
121
|
+
_watcher_mode = "stopped"
|
|
122
|
+
_poll_timer: Optional[threading.Timer] = None
|
|
123
|
+
_monitor_stop = threading.Event()
|
|
124
|
+
_monitor_thread: Optional[threading.Thread] = None
|
|
125
|
+
_started_at = 0.0
|
|
126
|
+
_switch_count = 0
|
|
127
|
+
_resume_success_count = 0
|
|
128
|
+
_resume_failure_count = 0
|
|
129
|
+
_events_processed = 0
|
|
130
|
+
_last_error: Optional[str] = None
|
|
131
|
+
_last_heartbeat = 0.0
|
|
132
|
+
_watchdog_failure_since: Optional[float] = None
|
|
133
|
+
_poll_ticks = 0
|
|
134
|
+
_health_lock = threading.Lock()
|
|
135
|
+
_last_full_sync_iso: Optional[str] = None
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def _watcher_state_path() -> Optional[Path]:
|
|
139
|
+
try:
|
|
140
|
+
from data.task_history import get_dashboard_data_dir
|
|
141
|
+
|
|
142
|
+
d = get_dashboard_data_dir()
|
|
143
|
+
d.mkdir(parents=True, exist_ok=True)
|
|
144
|
+
return d / "watcher_state.json"
|
|
145
|
+
except Exception:
|
|
146
|
+
return None
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def _persist_watcher_state() -> None:
|
|
150
|
+
"""持久化轻量快照(跨进程重启可读;不恢复内存计数,仅供健康检查与排障)。"""
|
|
151
|
+
path = _watcher_state_path()
|
|
152
|
+
if path is None:
|
|
153
|
+
return
|
|
154
|
+
watch_dirs: list[str] = []
|
|
155
|
+
try:
|
|
156
|
+
for p, _ in _get_watch_dirs()[:48]:
|
|
157
|
+
try:
|
|
158
|
+
watch_dirs.append(str(p.resolve()))
|
|
159
|
+
except OSError:
|
|
160
|
+
watch_dirs.append(str(p))
|
|
161
|
+
except Exception:
|
|
162
|
+
pass
|
|
163
|
+
with _health_lock:
|
|
164
|
+
payload = {
|
|
165
|
+
"mode": _watcher_mode,
|
|
166
|
+
"switch_count": _switch_count,
|
|
167
|
+
"resume_success_count": _resume_success_count,
|
|
168
|
+
"resume_failure_count": _resume_failure_count,
|
|
169
|
+
"events_processed": _events_processed,
|
|
170
|
+
"poll_ticks_counter": _poll_ticks,
|
|
171
|
+
"last_error": _last_error,
|
|
172
|
+
"last_full_sync": _last_full_sync_iso,
|
|
173
|
+
"watch_dirs": watch_dirs,
|
|
174
|
+
"started_at": datetime.fromtimestamp(_started_at, tz=timezone.utc)
|
|
175
|
+
.isoformat()
|
|
176
|
+
.replace("+00:00", "Z")
|
|
177
|
+
if _started_at
|
|
178
|
+
else None,
|
|
179
|
+
"updated_at": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"),
|
|
180
|
+
}
|
|
181
|
+
try:
|
|
182
|
+
path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
|
|
183
|
+
except OSError:
|
|
184
|
+
pass
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def _read_persisted_watcher_state() -> Optional[Dict[str, Any]]:
|
|
188
|
+
path = _watcher_state_path()
|
|
189
|
+
if path is None or not path.exists():
|
|
190
|
+
return None
|
|
191
|
+
try:
|
|
192
|
+
data = json.loads(path.read_text(encoding="utf-8"))
|
|
193
|
+
return data if isinstance(data, dict) else None
|
|
194
|
+
except (json.JSONDecodeError, OSError):
|
|
195
|
+
return None
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
def _is_watcher_record_error_scope(scope: str) -> bool:
|
|
199
|
+
if not scope:
|
|
200
|
+
return False
|
|
201
|
+
if scope.startswith("file_watcher"):
|
|
202
|
+
return True
|
|
203
|
+
return scope in ("watchdog_resume", "polling_tick")
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
def _watcher_framework_error_count() -> int:
|
|
207
|
+
from core.error_handler import get_framework_error_stats
|
|
208
|
+
|
|
209
|
+
total = 0
|
|
210
|
+
by_scope = get_framework_error_stats().get("by_agent", {})
|
|
211
|
+
for scope, info in by_scope.items():
|
|
212
|
+
if not isinstance(info, dict):
|
|
213
|
+
continue
|
|
214
|
+
if _is_watcher_record_error_scope(str(scope)):
|
|
215
|
+
total += int(info.get("count", 0))
|
|
216
|
+
return total
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
def _full_resync_cache_and_push() -> None:
|
|
220
|
+
"""轮询恢复 watchdog 或显式需要时:全量缓存失效 + 推送(REQ_001-SPEC-05)。"""
|
|
221
|
+
global _last_full_sync_iso
|
|
222
|
+
try:
|
|
223
|
+
_on_file_changed(None)
|
|
224
|
+
_last_full_sync_iso = datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
|
|
225
|
+
except Exception as e:
|
|
226
|
+
record_error("unknown", str(e), "file_watcher_full_resync", exc=e)
|
|
227
|
+
_persist_watcher_state()
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
def _set_mode(mode: str) -> None:
|
|
231
|
+
global _watcher_mode
|
|
232
|
+
with _health_lock:
|
|
233
|
+
_watcher_mode = mode
|
|
234
|
+
_persist_watcher_state()
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
def _touch_activity() -> None:
|
|
238
|
+
global _events_processed, _last_heartbeat
|
|
239
|
+
_events_processed += 1
|
|
240
|
+
_last_heartbeat = time.time()
|
|
130
241
|
|
|
131
242
|
|
|
132
243
|
def _on_file_changed(filepath: Optional[str] = None) -> None:
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
Args:
|
|
136
|
-
filepath: 变更的文件路径(可选)
|
|
137
|
-
"""
|
|
244
|
+
global _last_error
|
|
138
245
|
try:
|
|
246
|
+
_touch_activity()
|
|
139
247
|
from api.websocket import broadcast_full_state
|
|
140
248
|
from status.status_cache import get_cache
|
|
141
249
|
import asyncio
|
|
142
|
-
|
|
143
|
-
# 失效缓存
|
|
250
|
+
|
|
144
251
|
cache = get_cache()
|
|
145
252
|
if filepath:
|
|
146
|
-
# 解析受影响的 Agent ID(跨平台兼容)
|
|
147
|
-
# 例如:/path/to/.openclaw/agents/main/sessions/xxx.jsonl -> main
|
|
148
|
-
# 或 Windows: C:\path\to\.openclaw\agents\main\sessions\xxx.jsonl -> main
|
|
149
253
|
agent_id = _extract_agent_id_from_path(filepath)
|
|
150
254
|
if agent_id:
|
|
151
255
|
cache.invalidate(agent_id)
|
|
152
|
-
print(f"[FileWatcher] 失效缓存: {agent_id}")
|
|
153
256
|
else:
|
|
154
|
-
# 无法解析,清空所有缓存
|
|
155
257
|
cache.invalidate()
|
|
156
|
-
print(f"[FileWatcher] 失效所有缓存(无法解析Agent)")
|
|
157
258
|
else:
|
|
158
|
-
# 无文件路径,清空所有缓存
|
|
159
259
|
cache.invalidate()
|
|
160
|
-
|
|
161
|
-
# 触发推送
|
|
260
|
+
|
|
162
261
|
loop = _event_loop
|
|
163
262
|
if loop and broadcast_full_state:
|
|
164
263
|
future = asyncio.run_coroutine_threadsafe(broadcast_full_state(), loop)
|
|
165
264
|
future.result(timeout=10)
|
|
166
265
|
except Exception as e:
|
|
167
|
-
|
|
266
|
+
_last_error = str(e)
|
|
267
|
+
record_error("unknown", str(e), "file_watcher_push")
|
|
168
268
|
|
|
169
269
|
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
def start_file_watcher(loop) -> None:
|
|
174
|
-
"""启动文件监听(在 FastAPI 启动时调用)"""
|
|
175
|
-
global _observer, _handler, _event_loop
|
|
176
|
-
_event_loop = loop
|
|
177
|
-
|
|
178
|
-
try:
|
|
179
|
-
from watchdog.observers import Observer
|
|
180
|
-
from watchdog.events import FileSystemEventHandler, FileModifiedEvent, FileCreatedEvent
|
|
181
|
-
except ImportError:
|
|
182
|
-
print("[FileWatcher] watchdog 未安装,跳过文件监听。请执行: pip install watchdog")
|
|
183
|
-
return
|
|
270
|
+
def _build_observer() -> Any:
|
|
271
|
+
from watchdog.observers import Observer
|
|
272
|
+
from watchdog.events import FileSystemEventHandler
|
|
184
273
|
|
|
185
274
|
RELEVANT_SUFFIXES = (".json", ".jsonl", ".log")
|
|
186
275
|
|
|
@@ -191,35 +280,236 @@ def start_file_watcher(loop) -> None:
|
|
|
191
280
|
def on_modified(self, event):
|
|
192
281
|
if event.is_directory:
|
|
193
282
|
return
|
|
194
|
-
if self._should_trigger(event.src_path):
|
|
283
|
+
if self._should_trigger(event.src_path) and _handler:
|
|
195
284
|
_handler.trigger(event.src_path)
|
|
196
285
|
|
|
197
286
|
def on_created(self, event):
|
|
198
287
|
if event.is_directory:
|
|
199
288
|
return
|
|
200
|
-
if self._should_trigger(event.src_path):
|
|
289
|
+
if self._should_trigger(event.src_path) and _handler:
|
|
201
290
|
_handler.trigger(event.src_path)
|
|
202
291
|
|
|
203
292
|
watch_dirs = _get_watch_dirs()
|
|
204
293
|
if not watch_dirs:
|
|
205
|
-
|
|
206
|
-
return
|
|
294
|
+
raise RuntimeError("no watch dirs")
|
|
207
295
|
|
|
296
|
+
global _handler, _observer
|
|
208
297
|
_handler = DebouncedHandler(_on_file_changed)
|
|
209
|
-
|
|
210
|
-
|
|
298
|
+
obs = Observer()
|
|
211
299
|
for watch_dir, recursive in watch_dirs:
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
_observer.start()
|
|
215
|
-
print(f"[FileWatcher] 已启动,监听 {len(watch_dirs)} 个目录")
|
|
300
|
+
obs.schedule(Handler(), str(watch_dir), recursive=recursive)
|
|
301
|
+
return obs
|
|
216
302
|
|
|
217
303
|
|
|
218
|
-
def
|
|
219
|
-
"""停止文件监听"""
|
|
304
|
+
def _stop_watchdog_observer() -> None:
|
|
220
305
|
global _observer
|
|
221
306
|
if _observer:
|
|
222
|
-
|
|
223
|
-
|
|
307
|
+
try:
|
|
308
|
+
_observer.stop()
|
|
309
|
+
_observer.join(timeout=2)
|
|
310
|
+
except Exception:
|
|
311
|
+
pass
|
|
224
312
|
_observer = None
|
|
225
|
-
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
def _start_monitor_thread(loop) -> None:
|
|
316
|
+
global _monitor_thread
|
|
317
|
+
if _monitor_thread and _monitor_thread.is_alive():
|
|
318
|
+
return
|
|
319
|
+
|
|
320
|
+
def monitor() -> None:
|
|
321
|
+
cfg = get_fortify_config()
|
|
322
|
+
global _watchdog_failure_since
|
|
323
|
+
while not _monitor_stop.is_set():
|
|
324
|
+
time.sleep(5)
|
|
325
|
+
if _monitor_stop.is_set():
|
|
326
|
+
break
|
|
327
|
+
if _watcher_mode != "watchdog":
|
|
328
|
+
continue
|
|
329
|
+
obs = _observer
|
|
330
|
+
if obs is None:
|
|
331
|
+
continue
|
|
332
|
+
if obs.is_alive():
|
|
333
|
+
_watchdog_failure_since = None
|
|
334
|
+
continue
|
|
335
|
+
now = time.time()
|
|
336
|
+
if _watchdog_failure_since is None:
|
|
337
|
+
_watchdog_failure_since = now
|
|
338
|
+
elif now - _watchdog_failure_since >= cfg.watcher_failure_window_sec:
|
|
339
|
+
record_error("io-error", "observer not alive, fallback to polling", "file_watcher")
|
|
340
|
+
_switch_to_polling(loop)
|
|
341
|
+
|
|
342
|
+
_monitor_stop.clear()
|
|
343
|
+
t = threading.Thread(target=monitor, daemon=True)
|
|
344
|
+
t.start()
|
|
345
|
+
_monitor_thread = t
|
|
346
|
+
|
|
347
|
+
|
|
348
|
+
def _switch_to_polling(loop) -> None:
|
|
349
|
+
global _switch_count
|
|
350
|
+
with _health_lock:
|
|
351
|
+
_stop_watchdog_observer()
|
|
352
|
+
_switch_count += 1
|
|
353
|
+
_persist_watcher_state()
|
|
354
|
+
_start_polling_mode(loop)
|
|
355
|
+
|
|
356
|
+
|
|
357
|
+
def _cancel_poll_timer() -> None:
|
|
358
|
+
global _poll_timer
|
|
359
|
+
if _poll_timer:
|
|
360
|
+
try:
|
|
361
|
+
_poll_timer.cancel()
|
|
362
|
+
except Exception:
|
|
363
|
+
pass
|
|
364
|
+
_poll_timer = None
|
|
365
|
+
|
|
366
|
+
|
|
367
|
+
def _start_polling_mode(loop) -> None:
|
|
368
|
+
global _watcher_mode, _poll_timer, _poll_ticks
|
|
369
|
+
_set_mode("polling")
|
|
370
|
+
cfg = get_fortify_config()
|
|
371
|
+
_cancel_poll_timer()
|
|
372
|
+
|
|
373
|
+
def tick() -> None:
|
|
374
|
+
global _poll_timer, _poll_ticks
|
|
375
|
+
if _monitor_stop.is_set():
|
|
376
|
+
return
|
|
377
|
+
if _watcher_mode != "polling":
|
|
378
|
+
return
|
|
379
|
+
try:
|
|
380
|
+
_on_file_changed(None)
|
|
381
|
+
except Exception as e:
|
|
382
|
+
record_error("unknown", str(e), "polling_tick")
|
|
383
|
+
_poll_ticks += 1
|
|
384
|
+
if _poll_ticks >= 12:
|
|
385
|
+
_poll_ticks = 0
|
|
386
|
+
_try_resume_watchdog(loop)
|
|
387
|
+
_poll_timer = threading.Timer(cfg.watcher_poll_interval_sec, tick)
|
|
388
|
+
_poll_timer.daemon = True
|
|
389
|
+
_poll_timer.start()
|
|
390
|
+
|
|
391
|
+
_poll_ticks = 0
|
|
392
|
+
tick()
|
|
393
|
+
|
|
394
|
+
|
|
395
|
+
def _try_resume_watchdog(loop) -> None:
|
|
396
|
+
global _watcher_mode, _resume_success_count, _resume_failure_count
|
|
397
|
+
if _watcher_mode != "polling" or _monitor_stop.is_set():
|
|
398
|
+
return
|
|
399
|
+
cfg = get_fortify_config()
|
|
400
|
+
try:
|
|
401
|
+
obs = _build_observer()
|
|
402
|
+
obs.start()
|
|
403
|
+
global _observer
|
|
404
|
+
_observer = obs
|
|
405
|
+
_cancel_poll_timer()
|
|
406
|
+
with _health_lock:
|
|
407
|
+
_resume_success_count += 1
|
|
408
|
+
_set_mode("watchdog")
|
|
409
|
+
_start_monitor_thread(loop)
|
|
410
|
+
_LOG.info("file watcher resumed from polling to watchdog mode")
|
|
411
|
+
_full_resync_cache_and_push()
|
|
412
|
+
except Exception as e:
|
|
413
|
+
with _health_lock:
|
|
414
|
+
_resume_failure_count += 1
|
|
415
|
+
_persist_watcher_state()
|
|
416
|
+
record_error("io-error", str(e), "watchdog_resume")
|
|
417
|
+
|
|
418
|
+
|
|
419
|
+
def start_file_watcher(loop) -> None:
|
|
420
|
+
global _event_loop, _started_at, _observer
|
|
421
|
+
_monitor_stop.clear()
|
|
422
|
+
_event_loop = loop
|
|
423
|
+
_started_at = time.time()
|
|
424
|
+
cfg = get_fortify_config()
|
|
425
|
+
|
|
426
|
+
try:
|
|
427
|
+
__import__("watchdog.observers", fromlist=["Observer"])
|
|
428
|
+
except ImportError:
|
|
429
|
+
record_error("io-error", "watchdog not installed", "file_watcher")
|
|
430
|
+
_set_mode("import_failed")
|
|
431
|
+
_start_polling_mode(loop)
|
|
432
|
+
_LOG.warning("watchdog package not installed; using polling mode")
|
|
433
|
+
return
|
|
434
|
+
|
|
435
|
+
delay = cfg.retry_base_delay
|
|
436
|
+
last_exc: Optional[Exception] = None
|
|
437
|
+
for attempt in range(cfg.watcher_max_retries):
|
|
438
|
+
try:
|
|
439
|
+
obs = _build_observer()
|
|
440
|
+
obs.start()
|
|
441
|
+
_observer = obs
|
|
442
|
+
_set_mode("watchdog")
|
|
443
|
+
_start_monitor_thread(loop)
|
|
444
|
+
_LOG.info("watchdog started (attempt %s)", attempt + 1)
|
|
445
|
+
_persist_watcher_state()
|
|
446
|
+
return
|
|
447
|
+
except Exception as e:
|
|
448
|
+
last_exc = e
|
|
449
|
+
_stop_watchdog_observer()
|
|
450
|
+
record_error("io-error", str(e), f"file_watcher_start_{attempt}")
|
|
451
|
+
time.sleep(delay * (2**attempt))
|
|
452
|
+
|
|
453
|
+
if last_exc:
|
|
454
|
+
_last_error = str(last_exc)
|
|
455
|
+
_switch_to_polling(loop)
|
|
456
|
+
_LOG.warning("watchdog start failed after retries; switched to polling mode")
|
|
457
|
+
|
|
458
|
+
|
|
459
|
+
def stop_file_watcher() -> None:
|
|
460
|
+
global _monitor_thread, _watcher_mode
|
|
461
|
+
_monitor_stop.set()
|
|
462
|
+
_cancel_poll_timer()
|
|
463
|
+
_stop_watchdog_observer()
|
|
464
|
+
_set_mode("stopped")
|
|
465
|
+
_persist_watcher_state()
|
|
466
|
+
_LOG.info("file watcher stopped")
|
|
467
|
+
|
|
468
|
+
|
|
469
|
+
def get_watcher_health() -> Dict[str, Any]:
|
|
470
|
+
cfg = get_fortify_config()
|
|
471
|
+
obs_alive = bool(_observer and _observer.is_alive())
|
|
472
|
+
mode = _watcher_mode
|
|
473
|
+
if mode == "import_failed":
|
|
474
|
+
display_mode = "polling"
|
|
475
|
+
else:
|
|
476
|
+
display_mode = mode
|
|
477
|
+
|
|
478
|
+
status = "healthy"
|
|
479
|
+
if mode in ("polling", "import_failed"):
|
|
480
|
+
status = "degraded"
|
|
481
|
+
elif mode == "stopped":
|
|
482
|
+
status = "down"
|
|
483
|
+
elif mode == "watchdog" and _observer and not obs_alive:
|
|
484
|
+
status = "degraded"
|
|
485
|
+
|
|
486
|
+
hb = None
|
|
487
|
+
if _last_heartbeat:
|
|
488
|
+
hb = datetime.fromtimestamp(_last_heartbeat, tz=timezone.utc).isoformat().replace("+00:00", "Z")
|
|
489
|
+
|
|
490
|
+
with _health_lock:
|
|
491
|
+
rc_ok = _resume_success_count
|
|
492
|
+
rc_fail = _resume_failure_count
|
|
493
|
+
sw = _switch_count
|
|
494
|
+
last_sync = _last_full_sync_iso
|
|
495
|
+
|
|
496
|
+
fw_err = _watcher_framework_error_count()
|
|
497
|
+
snapshot = _read_persisted_watcher_state()
|
|
498
|
+
|
|
499
|
+
return {
|
|
500
|
+
"status": status,
|
|
501
|
+
"mode": display_mode,
|
|
502
|
+
"last_heartbeat": hb,
|
|
503
|
+
# error_count:与 /api/errors/stats 同源(record_error 中 file_watcher* / watchdog_resume / polling_tick)
|
|
504
|
+
"error_count": fw_err,
|
|
505
|
+
"switch_count": sw,
|
|
506
|
+
"resume_watchdog_success_count": rc_ok,
|
|
507
|
+
"resume_watchdog_failure_count": rc_fail,
|
|
508
|
+
"last_full_sync": last_sync,
|
|
509
|
+
"uptime_seconds": int(time.time() - _started_at) if _started_at else 0,
|
|
510
|
+
"events_processed": _events_processed,
|
|
511
|
+
"last_error": _last_error,
|
|
512
|
+
"observer_alive": obs_alive,
|
|
513
|
+
"poll_interval_sec": cfg.watcher_poll_interval_sec,
|
|
514
|
+
"persisted_snapshot": snapshot,
|
|
515
|
+
}
|
package/openclaw.plugin.json
CHANGED
package/package.json
CHANGED
package/dashboard/agents.py
DELETED
|
@@ -1,74 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Agent API 路由
|
|
3
|
-
"""
|
|
4
|
-
from fastapi import APIRouter
|
|
5
|
-
from pydantic import BaseModel
|
|
6
|
-
from typing import List, Optional
|
|
7
|
-
import sys
|
|
8
|
-
from pathlib import Path
|
|
9
|
-
sys.path.append(str(Path(__file__).parent.parent))
|
|
10
|
-
|
|
11
|
-
from status.status_calculator import (
|
|
12
|
-
get_agents_with_status,
|
|
13
|
-
format_last_active
|
|
14
|
-
)
|
|
15
|
-
|
|
16
|
-
router = APIRouter()
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
class AgentStatus(BaseModel):
|
|
20
|
-
id: str
|
|
21
|
-
name: str
|
|
22
|
-
role: str
|
|
23
|
-
status: str # idle/working/down
|
|
24
|
-
currentTask: Optional[str] = None
|
|
25
|
-
lastActiveAt: Optional[int] = None
|
|
26
|
-
lastActiveFormatted: Optional[str] = None
|
|
27
|
-
error: Optional[dict] = None
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
@router.get("/agents", response_model=List[AgentStatus])
|
|
31
|
-
async def get_agents():
|
|
32
|
-
"""获取所有 Agent 列表及状态"""
|
|
33
|
-
agents = get_agents_with_status()
|
|
34
|
-
|
|
35
|
-
# 格式化最后活跃时间
|
|
36
|
-
for agent in agents:
|
|
37
|
-
if agent.get('lastActiveAt'):
|
|
38
|
-
agent['lastActiveFormatted'] = format_last_active(agent['lastActiveAt'])
|
|
39
|
-
|
|
40
|
-
return agents
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
@router.get("/agents/{agent_id}", response_model=AgentStatus)
|
|
44
|
-
async def get_agent(agent_id: str):
|
|
45
|
-
"""获取单个 Agent 详情"""
|
|
46
|
-
agents = get_agents_with_status()
|
|
47
|
-
|
|
48
|
-
from data.config_reader import agent_ids_equal
|
|
49
|
-
|
|
50
|
-
for agent in agents:
|
|
51
|
-
if agent_ids_equal(agent['id'], agent_id):
|
|
52
|
-
if agent.get('lastActiveAt'):
|
|
53
|
-
agent['lastActiveFormatted'] = format_last_active(agent['lastActiveAt'])
|
|
54
|
-
return agent
|
|
55
|
-
|
|
56
|
-
from fastapi import HTTPException
|
|
57
|
-
raise HTTPException(status_code=404, detail=f"Agent {agent_id} not found")
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
@router.get("/agents/{agent_id}/output")
|
|
61
|
-
async def get_agent_output(agent_id: str, limit: int = 50):
|
|
62
|
-
"""
|
|
63
|
-
获取 Agent 最近会话详情:每轮 user/assistant/toolResult 及 usage
|
|
64
|
-
用于调试视图展示
|
|
65
|
-
"""
|
|
66
|
-
from data.session_reader import get_session_turns
|
|
67
|
-
from data.config_reader import get_agent_config
|
|
68
|
-
|
|
69
|
-
if not get_agent_config(agent_id):
|
|
70
|
-
from fastapi import HTTPException
|
|
71
|
-
raise HTTPException(status_code=404, detail=f"Agent {agent_id} not found")
|
|
72
|
-
|
|
73
|
-
turns = get_session_turns(agent_id, limit=limit)
|
|
74
|
-
return {"agentId": agent_id, "turns": turns}
|