openclaw-agent-dashboard 1.0.39 → 1.0.41

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/dashboard/api/agent_config_api.py +28 -7
  2. package/dashboard/api/agents.py +48 -10
  3. package/dashboard/api/agents_config.py +5 -1
  4. package/dashboard/api/chains.py +25 -5
  5. package/dashboard/api/collaboration.py +10 -9
  6. package/dashboard/api/debug_paths.py +5 -1
  7. package/dashboard/api/error_analysis.py +29 -11
  8. package/dashboard/api/errors.py +37 -11
  9. package/dashboard/api/fortify_routes.py +108 -0
  10. package/dashboard/api/input_safety.py +60 -0
  11. package/dashboard/api/performance.py +73 -53
  12. package/dashboard/api/subagents.py +95 -99
  13. package/dashboard/api/timeline.py +24 -3
  14. package/dashboard/api/version.py +2 -0
  15. package/dashboard/api/websocket.py +9 -7
  16. package/dashboard/core/__init__.py +1 -0
  17. package/dashboard/core/config_fortify.py +125 -0
  18. package/dashboard/core/error_handler.py +488 -0
  19. package/dashboard/core/fallback_manager.py +81 -0
  20. package/dashboard/core/logging_config.py +217 -0
  21. package/dashboard/core/safe_api_error.py +76 -0
  22. package/dashboard/core/schemas/__init__.py +16 -0
  23. package/dashboard/core/schemas/base.py +43 -0
  24. package/dashboard/core/schemas/session_schema.py +40 -0
  25. package/dashboard/core/schemas/subagent_schema.py +23 -0
  26. package/dashboard/data/agent_config_manager.py +6 -4
  27. package/dashboard/data/chain_reader.py +16 -12
  28. package/dashboard/data/error_analyzer.py +15 -11
  29. package/dashboard/data/session_reader.py +268 -46
  30. package/dashboard/data/subagent_reader.py +74 -49
  31. package/dashboard/data/timeline_reader.py +35 -49
  32. package/dashboard/main.py +24 -2
  33. package/dashboard/mechanism_reader.py +4 -5
  34. package/dashboard/mechanisms.py +2 -2
  35. package/dashboard/pytest.ini +3 -0
  36. package/dashboard/requirements.txt +5 -0
  37. package/dashboard/status/cache_fp_probe.py +40 -0
  38. package/dashboard/status/status_cache.py +199 -72
  39. package/dashboard/status/status_calculator.py +50 -30
  40. package/dashboard/tests/conftest.py +87 -0
  41. package/dashboard/tests/test_api_contracts.py +372 -0
  42. package/dashboard/tests/test_bench_fortify.py +176 -0
  43. package/dashboard/tests/test_fortify.py +952 -0
  44. package/dashboard/utils/__init__.py +1 -0
  45. package/dashboard/utils/data_repair.py +210 -0
  46. package/dashboard/watchers/file_watcher.py +380 -77
  47. package/frontend-dist/assets/{index-cYIOn3Wq.css → index-BIZ2xHfw.css} +1 -1
  48. package/frontend-dist/assets/{index-DyRXGevD.js → index-Cnr0b02R.js} +1 -1
  49. package/frontend-dist/index.html +2 -2
  50. package/openclaw.plugin.json +1 -1
  51. package/package.json +1 -1
  52. package/dashboard/agents.py +0 -74
  53. package/dashboard/collaboration.py +0 -407
  54. package/dashboard/errors.py +0 -63
  55. package/dashboard/performance.py +0 -474
  56. package/dashboard/session_reader.py +0 -240
  57. package/dashboard/status_calculator.py +0 -121
  58. package/dashboard/subagent_reader.py +0 -232
@@ -2,13 +2,21 @@
2
2
  Agent 配置 API - 提供配置读取和修改接口
3
3
  """
4
4
  from fastapi import APIRouter, HTTPException
5
- from pydantic import BaseModel
5
+ from pydantic import BaseModel, Field
6
+ from typing import List, Optional
7
+ try:
8
+ from pydantic import field_validator
9
+ except ImportError:
10
+ from pydantic import validator as field_validator
6
11
  from typing import List, Optional
7
12
  import sys
8
13
  from pathlib import Path
9
14
 
10
15
  sys.path.append(str(Path(__file__).parent.parent))
11
16
 
17
+ from api.input_safety import require_safe_agent_id
18
+ from core.error_handler import record_error
19
+ from core.safe_api_error import safe_api_error_detail, safe_client_string
12
20
  from data.agent_config_manager import (
13
21
  get_agent_full_info,
14
22
  get_all_agents_info,
@@ -21,8 +29,15 @@ router = APIRouter()
21
29
 
22
30
 
23
31
  class UpdateModelRequest(BaseModel):
24
- primary: Optional[str] = None
25
- fallbacks: Optional[List[str]] = None
32
+ primary: Optional[str] = Field(None, max_length=256)
33
+ fallbacks: Optional[List[str]] = Field(None, max_length=32)
34
+
35
+ @field_validator("fallbacks")
36
+ @classmethod
37
+ def _fallback_items_len(cls, v: Optional[List[str]]) -> Optional[List[str]]:
38
+ if v and any(len(str(x)) > 256 for x in v):
39
+ raise ValueError("fallback model id too long")
40
+ return v
26
41
 
27
42
 
28
43
  @router.get("/agent-config")
@@ -35,12 +50,14 @@ async def list_agent_configs():
35
50
  'total': len(agents),
36
51
  }
37
52
  except Exception as e:
38
- return {'agents': [], 'total': 0, 'error': str(e)}
53
+ record_error("unknown", str(e), "api:agent_config:list", exc=e)
54
+ return {'agents': [], 'total': 0, 'error': safe_client_string(str(e))}
39
55
 
40
56
 
41
57
  @router.get("/agent-config/{agent_id}")
42
58
  async def get_agent_config(agent_id: str):
43
59
  """获取单个 Agent 的详细配置"""
60
+ require_safe_agent_id(agent_id)
44
61
  try:
45
62
  info = get_agent_full_info(agent_id)
46
63
  if not info.get('found'):
@@ -49,12 +66,14 @@ async def get_agent_config(agent_id: str):
49
66
  except HTTPException:
50
67
  raise
51
68
  except Exception as e:
52
- raise HTTPException(status_code=500, detail=str(e))
69
+ record_error("unknown", str(e), "api:agent_config:get_one", exc=e)
70
+ raise HTTPException(status_code=500, detail=safe_api_error_detail(e))
53
71
 
54
72
 
55
73
  @router.put("/agent-config/{agent_id}/model")
56
74
  async def update_agent_model_config(agent_id: str, request: UpdateModelRequest):
57
75
  """更新 Agent 的模型配置"""
76
+ require_safe_agent_id(agent_id)
58
77
  try:
59
78
  result = update_agent_model(
60
79
  agent_id,
@@ -74,7 +93,8 @@ async def update_agent_model_config(agent_id: str, request: UpdateModelRequest):
74
93
  except HTTPException:
75
94
  raise
76
95
  except Exception as e:
77
- raise HTTPException(status_code=500, detail=str(e))
96
+ record_error("unknown", str(e), "api:agent_config:put_model", exc=e)
97
+ raise HTTPException(status_code=500, detail=safe_api_error_detail(e))
78
98
 
79
99
 
80
100
  @router.get("/available-models")
@@ -87,4 +107,5 @@ async def list_available_models():
87
107
  'total': len(models),
88
108
  }
89
109
  except Exception as e:
90
- return {'models': [], 'total': 0, 'error': str(e)}
110
+ record_error("unknown", str(e), "api:agent_config:models", exc=e)
111
+ return {'models': [], 'total': 0, 'error': safe_client_string(str(e))}
@@ -1,13 +1,18 @@
1
1
  """
2
2
  Agent API 路由
3
3
  """
4
- from fastapi import APIRouter
5
- from pydantic import BaseModel
6
- from typing import List, Optional
4
+ import asyncio
7
5
  import sys
8
6
  from pathlib import Path
7
+ from typing import List, Optional
8
+
9
9
  sys.path.append(str(Path(__file__).parent.parent))
10
10
 
11
+ from fastapi import APIRouter, HTTPException, Query
12
+ from pydantic import BaseModel
13
+
14
+ from api.input_safety import require_safe_agent_id
15
+
11
16
  from status.status_calculator import (
12
17
  get_agents_with_status,
13
18
  format_last_active
@@ -30,7 +35,17 @@ class AgentStatus(BaseModel):
30
35
  @router.get("/agents", response_model=List[AgentStatus])
31
36
  async def get_agents():
32
37
  """获取所有 Agent 列表及状态"""
33
- agents = get_agents_with_status()
38
+ from core.error_handler import ErrorHandler
39
+
40
+ def _load():
41
+ h = ErrorHandler(max_retry=2, base_delay=0.5)
42
+ return h.run_with_retry(
43
+ lambda: get_agents_with_status(),
44
+ operation="get_agents_with_status",
45
+ error_type="io-error",
46
+ )
47
+
48
+ agents = await asyncio.to_thread(_load)
34
49
 
35
50
  # 格式化最后活跃时间
36
51
  for agent in agents:
@@ -43,7 +58,18 @@ async def get_agents():
43
58
  @router.get("/agents/{agent_id}", response_model=AgentStatus)
44
59
  async def get_agent(agent_id: str):
45
60
  """获取单个 Agent 详情"""
46
- agents = get_agents_with_status()
61
+ require_safe_agent_id(agent_id)
62
+ from core.error_handler import ErrorHandler
63
+
64
+ def _load():
65
+ h = ErrorHandler(max_retry=2, base_delay=0.5)
66
+ return h.run_with_retry(
67
+ lambda: get_agents_with_status(),
68
+ operation="get_agents_with_status",
69
+ error_type="io-error",
70
+ )
71
+
72
+ agents = await asyncio.to_thread(_load)
47
73
 
48
74
  from data.config_reader import agent_ids_equal
49
75
 
@@ -53,22 +79,34 @@ async def get_agent(agent_id: str):
53
79
  agent['lastActiveFormatted'] = format_last_active(agent['lastActiveAt'])
54
80
  return agent
55
81
 
56
- from fastapi import HTTPException
57
82
  raise HTTPException(status_code=404, detail=f"Agent {agent_id} not found")
58
83
 
59
84
 
60
85
  @router.get("/agents/{agent_id}/output")
61
- async def get_agent_output(agent_id: str, limit: int = 50):
86
+ async def get_agent_output(
87
+ agent_id: str,
88
+ limit: int = Query(50, ge=1, le=500, description="返回最近轮次数上限"),
89
+ ):
62
90
  """
63
91
  获取 Agent 最近会话详情:每轮 user/assistant/toolResult 及 usage
64
92
  用于调试视图展示
65
93
  """
94
+ require_safe_agent_id(agent_id)
66
95
  from data.session_reader import get_session_turns
67
96
  from data.config_reader import get_agent_config
68
97
 
69
98
  if not get_agent_config(agent_id):
70
- from fastapi import HTTPException
71
99
  raise HTTPException(status_code=404, detail=f"Agent {agent_id} not found")
72
-
73
- turns = get_session_turns(agent_id, limit=limit)
100
+
101
+ from core.error_handler import ErrorHandler
102
+
103
+ def _load_turns():
104
+ h = ErrorHandler(max_retry=2, base_delay=0.5)
105
+ return h.run_with_retry(
106
+ lambda: get_session_turns(agent_id, limit=limit),
107
+ operation="get_session_turns",
108
+ error_type="io-error",
109
+ )
110
+
111
+ turns = await asyncio.to_thread(_load_turns)
74
112
  return {"agentId": agent_id, "turns": turns}
@@ -5,6 +5,9 @@ Agent 配置 API - 直接从 openclaw.json 读取
5
5
  from fastapi import APIRouter
6
6
  from typing import List, Dict, Any
7
7
 
8
+ from core.error_handler import record_error
9
+ from core.safe_api_error import safe_client_string
10
+
8
11
  router = APIRouter()
9
12
 
10
13
 
@@ -62,6 +65,7 @@ async def get_agents_config():
62
65
  'lastUpdate': int(__import__('time').time() * 1000),
63
66
  }
64
67
  except Exception as e:
68
+ record_error("unknown", str(e), "api:agents_config:get", exc=e)
65
69
  return {
66
70
  'nodes': [],
67
71
  'edges': [],
@@ -71,5 +75,5 @@ async def get_agents_config():
71
75
  'models': [],
72
76
  'recentCalls': [],
73
77
  'lastUpdate': 0,
74
- '_error': str(e),
78
+ '_error': safe_client_string(str(e)),
75
79
  }
@@ -8,6 +8,9 @@ import sys
8
8
  from pathlib import Path
9
9
  sys.path.append(str(Path(__file__).parent.parent))
10
10
 
11
+ from api.input_safety import require_safe_run_or_chain_id
12
+ from core.error_handler import record_error
13
+ from core.safe_api_error import safe_api_error_detail
11
14
  from data.chain_reader import (
12
15
  build_task_chains,
13
16
  get_task_chain,
@@ -79,8 +82,12 @@ async def list_chains(
79
82
  - 节点间的派发关系
80
83
  - 各节点的状态和进度
81
84
  """
82
- chains = build_task_chains(limit=limit)
83
- active = get_active_chain()
85
+ try:
86
+ chains = build_task_chains(limit=limit)
87
+ active = get_active_chain()
88
+ except Exception as e:
89
+ record_error("unknown", str(e), "api:chains:list", exc=e)
90
+ raise HTTPException(status_code=500, detail=safe_api_error_detail(e)) from e
84
91
 
85
92
  return {
86
93
  "chains": chains,
@@ -95,7 +102,11 @@ async def get_summary():
95
102
 
96
103
  快速查看所有链路的状态分布
97
104
  """
98
- return get_chains_summary()
105
+ try:
106
+ return get_chains_summary()
107
+ except Exception as e:
108
+ record_error("unknown", str(e), "api:chains:summary", exc=e)
109
+ raise HTTPException(status_code=500, detail=safe_api_error_detail(e)) from e
99
110
 
100
111
 
101
112
  @router.get("/chains/active")
@@ -105,7 +116,11 @@ async def get_active():
105
116
 
106
117
  返回正在执行的任务链
107
118
  """
108
- chain = get_active_chain()
119
+ try:
120
+ chain = get_active_chain()
121
+ except Exception as e:
122
+ record_error("unknown", str(e), "api:chains:active", exc=e)
123
+ raise HTTPException(status_code=500, detail=safe_api_error_detail(e)) from e
109
124
  if not chain:
110
125
  return {"activeChain": None, "message": "当前没有正在执行的任务链"}
111
126
 
@@ -119,7 +134,12 @@ async def get_chain(chain_id: str):
119
134
 
120
135
  返回完整的链路信息,包括所有节点和边
121
136
  """
122
- chain = get_task_chain(chain_id)
137
+ require_safe_run_or_chain_id(chain_id, name="chain_id")
138
+ try:
139
+ chain = get_task_chain(chain_id)
140
+ except Exception as e:
141
+ record_error("unknown", str(e), "api:chains:detail", exc=e)
142
+ raise HTTPException(status_code=500, detail=safe_api_error_detail(e)) from e
123
143
  if not chain:
124
144
  raise HTTPException(status_code=404, detail=f"Chain {chain_id} not found")
125
145
 
@@ -17,6 +17,8 @@ TZ_DISPLAY = ZoneInfo('Asia/Shanghai')
17
17
 
18
18
  sys.path.append(str(Path(__file__).parent.parent))
19
19
 
20
+ from core.error_handler import record_error
21
+
20
22
  router = APIRouter()
21
23
  logger = logging.getLogger(__name__)
22
24
 
@@ -261,7 +263,7 @@ def _get_model_mapping() -> Dict[str, str]:
261
263
  if base != short:
262
264
  _model_mapping_cache[base] = model_id
263
265
  except Exception as e:
264
- logger.warning(f"Failed to build model mapping: {e}")
266
+ record_error("unknown", str(e), "collaboration:model_mapping", exc=e)
265
267
  _model_mapping_cache = {}
266
268
  return _model_mapping_cache
267
269
 
@@ -372,7 +374,7 @@ def _enrich_main_agent_active_tasks_if_needed(
372
374
 
373
375
  def _get_agent_error_info(agent_id: str) -> Optional[Dict[str, Any]]:
374
376
  """获取 agent 的错误/异常信息"""
375
- from session_reader import get_last_error, has_recent_errors
377
+ from data.session_reader import get_last_error, has_recent_errors
376
378
 
377
379
  if has_recent_errors(agent_id, minutes=10):
378
380
  error = get_last_error(agent_id)
@@ -389,7 +391,7 @@ def _get_agent_error_info(agent_id: str) -> Optional[Dict[str, Any]]:
389
391
  def _check_agent_stuck(agent_id: str) -> Optional[Dict[str, Any]]:
390
392
  """检查 agent 是否卡顿(长时间无响应但有活跃任务)"""
391
393
  import time
392
- from session_reader import get_session_updated_at
394
+ from data.session_reader import get_session_updated_at
393
395
  from data.subagent_reader import is_agent_working, get_active_runs
394
396
 
395
397
  if not is_agent_working(agent_id):
@@ -763,14 +765,13 @@ async def get_collaboration():
763
765
  active_path.extend([main_agent_id, agent_id_canon, task_id])
764
766
 
765
767
  except Exception as e:
766
- print(f"Error building collaboration data: {e}")
767
- import traceback
768
- traceback.print_exc()
768
+ record_error("unknown", str(e), "collaboration:build_flow", exc=e)
769
769
 
770
770
  if not nodes:
771
771
  try:
772
772
  main_agent_id = get_main_agent_id()
773
- except Exception:
773
+ except Exception as e:
774
+ record_error("unknown", str(e), "collaboration:fallback_main_id", exc=e)
774
775
  main_agent_id = 'main'
775
776
  nodes = [
776
777
  CollaborationNode(id=main_agent_id, type="agent", name="主 Agent", status="idle"),
@@ -905,7 +906,7 @@ async def get_collaboration_dynamic():
905
906
  alert=dyn_status['alert']
906
907
  )
907
908
  except Exception as e:
908
- logger.warning(f"Failed to get display status for {aid}: {e}")
909
+ record_error("unknown", str(e), f"collaboration:display_status:{aid}", exc=e)
909
910
 
910
911
  # PM:覆盖为与连线同源的状态;并修正 dynamic 文案,避免 calculate_agent_status 的 solo 与卡片矛盾
911
912
  main_collab = _main_agent_status_for_collaboration(main_agent_id)
@@ -933,7 +934,7 @@ async def get_collaboration_dynamic():
933
934
  if requester_id and requester_id != agent_id:
934
935
  active_path.extend([requester_id])
935
936
  except Exception as e:
936
- logger.error(f"Error building collaboration dynamic: {e}")
937
+ record_error("unknown", str(e), "collaboration:dynamic", exc=e)
937
938
 
938
939
  recent_calls_raw = _get_recent_model_calls(30)
939
940
  model_calls = [
@@ -4,6 +4,9 @@
4
4
  """
5
5
  from fastapi import APIRouter
6
6
 
7
+ from core.error_handler import record_error
8
+ from core.safe_api_error import safe_client_string
9
+
7
10
  router = APIRouter()
8
11
 
9
12
 
@@ -17,9 +20,10 @@ async def get_debug_paths():
17
20
  from data.config_reader import get_openclaw_root
18
21
  root = get_openclaw_root()
19
22
  except Exception as e:
23
+ record_error("unknown", str(e), "api:debug_paths", exc=e)
20
24
  return {
21
25
  "success": False,
22
- "error": str(e),
26
+ "error": safe_client_string(str(e)),
23
27
  "openclawRoot": None,
24
28
  "openclawJsonExists": False,
25
29
  "agentsDirExists": False,
@@ -1,13 +1,20 @@
1
1
  """
2
2
  错误分析 API - 提供错误根因分析接口
3
3
  """
4
- from fastapi import APIRouter, HTTPException
4
+ from fastapi import APIRouter, HTTPException, Query
5
+ from pydantic import BaseModel, Field
5
6
  from typing import Optional
6
7
  import sys
7
8
  from pathlib import Path
8
9
 
9
10
  sys.path.append(str(Path(__file__).parent.parent))
10
11
 
12
+ from api.input_safety import (
13
+ require_safe_agent_id,
14
+ require_safe_session_file_segment,
15
+ )
16
+ from core.error_handler import record_error
17
+ from core.safe_api_error import safe_api_error_detail, safe_client_string
11
18
  from data.error_analyzer import (
12
19
  analyze_agent_errors,
13
20
  analyze_all_agents_errors,
@@ -19,6 +26,10 @@ from data.error_analyzer import (
19
26
  router = APIRouter()
20
27
 
21
28
 
29
+ class ClassifyErrorRequest(BaseModel):
30
+ message: str = Field(..., min_length=1, max_length=16_000)
31
+
32
+
22
33
  def format_error_for_display(error: dict) -> dict:
23
34
  """格式化错误信息用于前端展示"""
24
35
  severity_colors = {
@@ -77,23 +88,26 @@ async def get_global_error_analysis():
77
88
 
78
89
  return result
79
90
  except Exception as e:
80
- import traceback
81
- traceback.print_exc()
91
+ record_error("unknown", str(e), "api:error_analysis:global", exc=e)
82
92
  return {
83
93
  'agents': [],
84
94
  'globalSummary': {},
85
- 'error': str(e),
95
+ 'error': safe_client_string(str(e)),
86
96
  }
87
97
 
88
98
 
89
99
  @router.get("/error-analysis/{agent_id}")
90
- async def get_agent_error_analysis(agent_id: str, session_limit: int = 5):
100
+ async def get_agent_error_analysis(
101
+ agent_id: str,
102
+ session_limit: int = Query(5, ge=1, le=50, description="分析的 session 数量上限"),
103
+ ):
91
104
  """
92
105
  获取单个 Agent 的错误分析
93
106
 
94
107
  - agent_id: Agent ID
95
108
  - session_limit: 分析最近的 N 个 session
96
109
  """
110
+ require_safe_agent_id(agent_id)
97
111
  try:
98
112
  result = analyze_agent_errors(agent_id, session_limit)
99
113
  result['errors'] = [
@@ -102,9 +116,8 @@ async def get_agent_error_analysis(agent_id: str, session_limit: int = 5):
102
116
  ]
103
117
  return result
104
118
  except Exception as e:
105
- import traceback
106
- traceback.print_exc()
107
- raise HTTPException(status_code=500, detail=str(e))
119
+ record_error("unknown", str(e), "api:error_analysis:agent", exc=e)
120
+ raise HTTPException(status_code=500, detail=safe_api_error_detail(e))
108
121
 
109
122
 
110
123
  @router.get("/error-analysis/{agent_id}/{session_file}/{turn_index}")
@@ -114,6 +127,8 @@ async def get_error_detail_api(agent_id: str, session_file: str, turn_index: int
114
127
 
115
128
  包括错误发生前的工具调用链
116
129
  """
130
+ require_safe_agent_id(agent_id)
131
+ session_file = require_safe_session_file_segment(session_file)
117
132
  try:
118
133
  error = get_error_detail(agent_id, session_file, turn_index)
119
134
  if not error:
@@ -122,17 +137,19 @@ async def get_error_detail_api(agent_id: str, session_file: str, turn_index: int
122
137
  except HTTPException:
123
138
  raise
124
139
  except Exception as e:
125
- raise HTTPException(status_code=500, detail=str(e))
140
+ record_error("unknown", str(e), "api:error_analysis:detail", exc=e)
141
+ raise HTTPException(status_code=500, detail=safe_api_error_detail(e))
126
142
 
127
143
 
128
144
  @router.post("/error-analysis/classify")
129
- async def classify_error_message(message: str):
145
+ async def classify_error_message(req: ClassifyErrorRequest):
130
146
  """
131
147
  对给定的错误消息进行分类
132
148
 
133
149
  返回错误类型、严重程度和修复建议
134
150
  """
135
151
  try:
152
+ message = req.message
136
153
  error_type, severity = classify_error(message)
137
154
  suggestions = get_error_suggestions(error_type, message)
138
155
 
@@ -143,4 +160,5 @@ async def classify_error_message(message: str):
143
160
  'suggestions': suggestions,
144
161
  }
145
162
  except Exception as e:
146
- raise HTTPException(status_code=500, detail=str(e))
163
+ record_error("unknown", str(e), "api:error_analysis:classify", exc=e)
164
+ raise HTTPException(status_code=500, detail=safe_api_error_detail(e))
@@ -2,7 +2,7 @@
2
2
  错误中心 API - 聚合 Session 错误、Model Failures、API 状态
3
3
  支持统计、筛选、趋势分析
4
4
  """
5
- from fastapi import APIRouter, Query
5
+ from fastapi import APIRouter, HTTPException, Query
6
6
  from typing import List, Dict, Any, Optional
7
7
  from datetime import datetime, timedelta
8
8
  import sys
@@ -12,6 +12,9 @@ from collections import defaultdict
12
12
 
13
13
  sys.path.append(str(Path(__file__).parent.parent))
14
14
 
15
+ from core.error_handler import get_framework_error_stats_for_client, record_error
16
+ from core.safe_api_error import safe_api_error_detail
17
+
15
18
  router = APIRouter()
16
19
 
17
20
 
@@ -232,8 +235,12 @@ async def get_errors(
232
235
  获取错误中心数据
233
236
  支持按 Agent、类型、模型筛选
234
237
  """
235
- session_errors = get_session_errors(limit, agent, type)
236
- model_failures = get_model_failures(limit, model, type)
238
+ try:
239
+ session_errors = get_session_errors(limit, agent, type)
240
+ model_failures = get_model_failures(limit, model, type)
241
+ except Exception as e:
242
+ record_error("unknown", str(e), "api:errors:list", exc=e)
243
+ raise HTTPException(status_code=500, detail=safe_api_error_detail(e)) from e
237
244
 
238
245
  return {
239
246
  "sessionErrors": session_errors,
@@ -247,10 +254,15 @@ async def get_errors_stats():
247
254
  获取错误统计数据
248
255
  包括:总数、按类型分布、按 Agent 分布、时间趋势
249
256
  """
250
- session_errors = get_session_errors(200)
251
- model_failures = get_model_failures(200)
252
-
253
- return get_error_stats(session_errors, model_failures)
257
+ try:
258
+ session_errors = get_session_errors(200)
259
+ model_failures = get_model_failures(200)
260
+ out = get_error_stats(session_errors, model_failures)
261
+ out["framework"] = get_framework_error_stats_for_client()
262
+ return out
263
+ except Exception as e:
264
+ record_error("unknown", str(e), "api:errors:stats", exc=e)
265
+ raise HTTPException(status_code=500, detail=safe_api_error_detail(e)) from e
254
266
 
255
267
 
256
268
  @router.get("/errors/api-status")
@@ -268,10 +280,14 @@ async def get_errors_summary():
268
280
  获取错误中心完整数据(一次请求获取所有)
269
281
  包括:错误列表、统计、API 状态
270
282
  """
271
- session_errors = get_session_errors(100)
272
- model_failures = get_model_failures(100)
273
- api_status = get_api_status()
274
- stats = get_error_stats(session_errors, model_failures)
283
+ try:
284
+ session_errors = get_session_errors(100)
285
+ model_failures = get_model_failures(100)
286
+ api_status = get_api_status()
287
+ stats = get_error_stats(session_errors, model_failures)
288
+ except Exception as e:
289
+ record_error("unknown", str(e), "api:errors:summary", exc=e)
290
+ raise HTTPException(status_code=500, detail=safe_api_error_detail(e)) from e
275
291
 
276
292
  return {
277
293
  "sessionErrors": session_errors,
@@ -279,3 +295,13 @@ async def get_errors_summary():
279
295
  "apiStatus": api_status,
280
296
  "stats": stats,
281
297
  }
298
+
299
+
300
+ @router.get("/errors/reliability")
301
+ async def get_reliability_stats():
302
+ """
303
+ NFR-R 可靠性指标接口
304
+ 包括:监听成功率(NFR-R-002)、错误恢复时间(NFR-R-003)、优雅降级率(NFR-R-005)
305
+ """
306
+ from core.error_handler import get_reliability_metrics
307
+ return get_reliability_metrics()
@@ -0,0 +1,108 @@
1
+ """TECHDEBT_FORTIFY: health, cache stats, data validation, logging endpoints."""
2
+ from __future__ import annotations
3
+
4
+ import sys
5
+ from pathlib import Path
6
+ from typing import Any, Optional
7
+
8
+ from fastapi import APIRouter, HTTPException, Query
9
+
10
+ sys.path.append(str(Path(__file__).parent.parent))
11
+
12
+ from api.input_safety import require_safe_agent_id, require_safe_session_file_segment
13
+ from core.error_handler import record_error
14
+ from core.safe_api_error import safe_api_error_detail
15
+
16
+ router = APIRouter()
17
+
18
+
19
+ @router.get("/health/watcher")
20
+ async def watcher_health() -> Any:
21
+ from watchers.file_watcher import get_watcher_health
22
+
23
+ return get_watcher_health()
24
+
25
+
26
+ @router.get("/cache/stats")
27
+ async def cache_stats() -> Any:
28
+ from status.status_cache import get_cache
29
+
30
+ c = get_cache()
31
+ s = c.get_stats()
32
+ from datetime import datetime, timezone
33
+
34
+ s["last_update"] = datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
35
+ return {
36
+ "cache_size": s["size"],
37
+ "max_size": s["max_size"],
38
+ "memory_usage_mb": s.get("memory_usage_mb"),
39
+ "max_memory_mb": s["max_memory_mb"],
40
+ "hit_rate": s["hit_rate"],
41
+ "ttl_seconds": s["ttl_seconds"],
42
+ "preload_enabled": s["preload_enabled"],
43
+ "cache_double_check": s.get("cache_double_check"),
44
+ "fp_probe_interval_sec": s.get("fp_probe_interval_sec"),
45
+ "last_update": s["last_update"],
46
+ "stats": s["stats"],
47
+ "process_rss_mb": s.get("process_rss_mb"),
48
+ }
49
+
50
+
51
+ @router.get("/data/validate")
52
+ async def validate_session_data(
53
+ agent_id: str = Query(..., description="Agent ID"),
54
+ session_file: Optional[str] = Query(
55
+ None,
56
+ description="可选:相对于 agents/{agent_id}/sessions 的 .jsonl 路径(如 foo.jsonl)",
57
+ ),
58
+ auto_repair: bool = Query(True),
59
+ include_details: bool = Query(False),
60
+ max_lines: int = Query(1000, ge=1, le=50_000),
61
+ ) -> Any:
62
+ from data.session_reader import get_session_validation_report
63
+
64
+ if not agent_id.strip():
65
+ raise HTTPException(status_code=400, detail="agent_id required")
66
+ aid = require_safe_agent_id(agent_id.strip())
67
+ rel = session_file.strip() if session_file and session_file.strip() else None
68
+ if rel:
69
+ rel = require_safe_session_file_segment(rel)
70
+ try:
71
+ return get_session_validation_report(
72
+ aid,
73
+ relative_session_file=rel,
74
+ auto_repair=auto_repair,
75
+ include_details=include_details,
76
+ max_lines=max_lines,
77
+ )
78
+ except Exception as e:
79
+ record_error("unknown", str(e), "api:fortify:validate", exc=e)
80
+ raise HTTPException(status_code=500, detail=safe_api_error_detail(e)) from e
81
+
82
+
83
+ @router.get("/logging/config")
84
+ async def logging_config() -> Any:
85
+ """
86
+ NFR-S-003: Get logging configuration and status.
87
+
88
+ Returns current logging configuration for diagnostics and monitoring.
89
+ """
90
+ try:
91
+ from core.logging_config import get_logging_config_summary
92
+ return {
93
+ "status": "ok",
94
+ "config": get_logging_config_summary(),
95
+ }
96
+ except ImportError:
97
+ # Fallback if logging_config is not available
98
+ return {
99
+ "status": "ok",
100
+ "config": {
101
+ "log_retention_days": 30,
102
+ "log_max_size_mb": 100,
103
+ "log_backup_count": 5,
104
+ "log_file_path": None,
105
+ "log_compression": True,
106
+ },
107
+ "note": "Enhanced logging not configured",
108
+ }