openclaw-agent-dashboard 1.0.42 → 1.0.44

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -86,12 +86,13 @@ async def get_timeline(
86
86
  record_error("unknown", str(e), "api:timeline:get", exc=e)
87
87
  raise HTTPException(status_code=500, detail=safe_api_error_detail(e)) from e
88
88
  elapsed_ms = (time.perf_counter() - t0) * 1000
89
- if elapsed_ms >= 200.0:
89
+ steps_count = len(result.get("steps", []))
90
+ if elapsed_ms >= 100.0:
90
91
  LOG.info(
91
92
  "timeline agent=%s limit=%d steps=%d ms=%.1f",
92
93
  agent_id,
93
94
  limit,
94
- len(result.get("steps", [])),
95
+ steps_count,
95
96
  elapsed_ms,
96
97
  )
97
98
 
@@ -152,22 +152,32 @@ def get_models_configured_by_agents() -> List[str]:
152
152
  """
153
153
  从配置中收集「各 Agent 实际配置使用」的模型 ID(仅 primary + fallbacks)。
154
154
  用于协作流程右侧模型面板:只显示有 Agent 配置的模型,不含白名单中未使用的。
155
+
156
+ 策略:仅包含作为 primary 使用、或被某 agent 配置过的模型。
157
+ 不包含 defaults.model.fallbacks 中没有任何 agent 当 primary 使用的模型。
155
158
  """
156
159
  agents = get_agents_list()
157
- model_ids = set()
158
- defaults = get_default_config()
159
- default_model = defaults.get('model', {})
160
- if default_model.get('primary'):
161
- model_ids.add(default_model['primary'])
162
- for fb in default_model.get('fallbacks') or []:
163
- model_ids.add(fb)
160
+ # Step 1: 收集所有 primary 模型(用于判断 fallback 是否被实际使用)
161
+ primaries: List[str] = []
164
162
  for agent in agents:
165
163
  cfg = get_agent_models(agent.get('id', ''))
166
164
  if cfg.get('primary'):
167
- model_ids.add(cfg['primary'])
165
+ primaries.append(cfg['primary'])
166
+ primary_set = set(primaries)
167
+
168
+ # Step 2: 收集所有 primary
169
+ model_ids: List[str] = list(dict.fromkeys(primaries)) # 保持顺序去重
170
+
171
+ # Step 3: 只添加被某 agent 实际配置过的 fallback(不被 primary_set 包含的不添加)
172
+ seen = set(primary_set)
173
+ for agent in agents:
174
+ cfg = get_agent_models(agent.get('id', ''))
168
175
  for fb in cfg.get('fallbacks', []):
169
- model_ids.add(fb)
170
- return sorted(model_ids)
176
+ if fb and fb not in seen:
177
+ model_ids.append(fb)
178
+ seen.add(fb)
179
+
180
+ return model_ids
171
181
 
172
182
 
173
183
  def get_all_models_from_agents() -> List[str]:
@@ -187,6 +197,23 @@ def get_all_models_from_agents() -> List[str]:
187
197
  return sorted(model_ids)
188
198
 
189
199
 
200
+ def get_default_models_from_defaults() -> List[str]:
201
+ """
202
+ 仅返回 agents.defaults.model.primary + fallbacks 中的模型。
203
+ 用于协作流程右侧模型面板:当没有任何 Agent 实际配置某模型时,
204
+ 不应因白名单而显示该模型(避免「配置未使用但显示在右侧」)。
205
+ """
206
+ defaults = get_default_config()
207
+ default_model = defaults.get('model', {})
208
+ result = []
209
+ if default_model.get('primary'):
210
+ result.append(default_model['primary'])
211
+ for fb in default_model.get('fallbacks') or []:
212
+ if fb not in result:
213
+ result.append(fb)
214
+ return result
215
+
216
+
190
217
  def get_model_display_name(model_id: str) -> str:
191
218
  """获取模型显示名。展示策略:使用 id 不用别名(与 OpenClaw 白名单逻辑一致)"""
192
219
  if not model_id:
@@ -269,9 +269,12 @@ def has_recent_errors(agent_id: str, minutes: int = 5) -> bool:
269
269
 
270
270
 
271
271
  def get_last_error(agent_id: str) -> Optional[Dict[str, Any]]:
272
- """获取最近的错误信息"""
272
+ """
273
+ 获取最近的错误信息,优先从 session stopReason=error 获取,
274
+ 若无则从 runs.json 中最近结束的 error run 兜底。
275
+ """
273
276
  messages = get_recent_messages(agent_id, limit=100)
274
-
277
+
275
278
  for msg in reversed(messages):
276
279
  if msg.get('stopReason') == 'error':
277
280
  return {
@@ -279,7 +282,39 @@ def get_last_error(agent_id: str) -> Optional[Dict[str, Any]]:
279
282
  'message': msg.get('errorMessage', ''),
280
283
  'timestamp': msg.get('timestamp', 0)
281
284
  }
282
-
285
+
286
+ # 兜底:检查 runs.json 中最近结束的 error run
287
+ run_error = _get_last_run_error(agent_id)
288
+ if run_error:
289
+ return run_error
290
+
291
+ return None
292
+
293
+
294
+ def _get_last_run_error(agent_id: str) -> Optional[Dict[str, Any]]:
295
+ """
296
+ 从 runs.json 获取最近结束的 error run 的错误信息。
297
+ 用于补充 session 中未落 stopReason=error 的 Gateway 中断等场景。
298
+ """
299
+ import time
300
+ from data.subagent_reader import get_agent_runs
301
+
302
+ runs = get_agent_runs(agent_id, limit=20)
303
+ cutoff = int(time.time() * 1000) - 5 * 60 * 1000
304
+ for run in runs:
305
+ ended = run.get('endedAt')
306
+ if not ended or ended < cutoff:
307
+ continue
308
+ outcome = run.get('outcome')
309
+ if not isinstance(outcome, dict) or outcome.get('status') != 'error':
310
+ continue
311
+ error_msg = outcome.get('error', '') or ''
312
+ return {
313
+ 'type': detect_error_type(error_msg),
314
+ 'message': error_msg,
315
+ 'timestamp': ended,
316
+ 'source': 'run' # 标记来源,便于调试
317
+ }
283
318
  return None
284
319
 
285
320
 
@@ -15,6 +15,8 @@ LOG = logging.getLogger(__name__)
15
15
  LARGE_JSONL_BYTES = 512 * 1024
16
16
  TAIL_JSONL_BYTES = 2 * 1024 * 1024
17
17
  TAIL_JSONL_MAX_LINES = 4000
18
+ # 主 Agent 头部安全行数(超大文件且步骤不足 limit 时补充读)
19
+ _HEAD_JSONL_LINES = 2000
18
20
 
19
21
 
20
22
  class StepType(str, Enum):
@@ -115,9 +117,12 @@ def _read_session_header_timestamp(path: Path) -> Optional[int]:
115
117
  return None
116
118
 
117
119
 
118
- def _read_jsonl_tail_line_slice(path: Path) -> Optional[List[str]]:
120
+ def _read_jsonl_tail_line_slice(path: Path, target_lines: int = 0) -> Optional[List[str]]:
119
121
  """
120
122
  大文件时返回尾部若干行(字节与行数双上限),否则返回 None 表示应整文件读取。
123
+
124
+ 当 target_lines > 0 时:优先满足 target_lines(但最少读 TAIL_JSONL_MAX_LINES/2 行保证有足够数据),
125
+ 上限为 TAIL_JSONL_MAX_LINES。当 target_lines = 0 时退化为原行为。
121
126
  """
122
127
  try:
123
128
  size = path.stat().st_size
@@ -125,6 +130,12 @@ def _read_jsonl_tail_line_slice(path: Path) -> Optional[List[str]]:
125
130
  return None
126
131
  if size <= LARGE_JSONL_BYTES:
127
132
  return None
133
+
134
+ # 目标行数转换为行数上限:多读一些(2x)以便有足够步骤,但不超过 TAIL_JSONL_MAX_LINES
135
+ max_lines = TAIL_JSONL_MAX_LINES
136
+ if target_lines > 0:
137
+ max_lines = min(max_lines, max(target_lines * 2, TAIL_JSONL_MAX_LINES // 2))
138
+
128
139
  with open(path, 'rb') as f:
129
140
  f.seek(max(0, size - TAIL_JSONL_BYTES))
130
141
  raw = f.read()
@@ -134,14 +145,40 @@ def _read_jsonl_tail_line_slice(path: Path) -> Optional[List[str]]:
134
145
  return []
135
146
  if size > TAIL_JSONL_BYTES:
136
147
  lines = lines[1:]
137
- if len(lines) > TAIL_JSONL_MAX_LINES:
138
- lines = lines[-TAIL_JSONL_MAX_LINES:]
148
+ if len(lines) > max_lines:
149
+ lines = lines[-max_lines:]
139
150
  return lines
140
151
 
141
152
 
142
- def _read_text_lines(path: Path) -> List[str]:
153
+ def _read_text_lines(path: Path, max_lines: int = 0) -> List[str]:
154
+ """读取文件全部行,或当 max_lines > 0 时只读尾部 max_lines 行。"""
155
+ if max_lines <= 0:
156
+ with open(path, 'r', encoding='utf-8') as f:
157
+ return f.readlines()
158
+ # 只读尾部 max_lines 行(从头读,跳过后半部分)
143
159
  with open(path, 'r', encoding='utf-8') as f:
144
- return f.readlines()
160
+ f.seek(0, 2)
161
+ file_size = f.tell()
162
+ if file_size <= 64 * 1024:
163
+ lines = f.read().splitlines()
164
+ else:
165
+ # 从文件末尾读取约 1MB 再提取最后 max_lines 行
166
+ chunk_size = min(1024 * 1024, file_size)
167
+ f.seek(max(0, file_size - chunk_size))
168
+ tail = f.read()
169
+ all_lines = tail.splitlines()
170
+ # 如果不够,继续向前读
171
+ lines = all_lines
172
+ lines_read = len(all_lines)
173
+ while lines_read < max_lines and file_size > chunk_size:
174
+ f.seek(max(0, file_size - chunk_size * 2))
175
+ more = f.read(chunk_size)
176
+ more_lines = more.splitlines()
177
+ lines = more_lines + lines
178
+ lines_read = len(lines)
179
+ if len(lines) > max_lines:
180
+ lines = lines[-max_lines:]
181
+ return lines
145
182
 
146
183
 
147
184
  # 子 Agent 回传消息的特征
@@ -629,22 +666,9 @@ def resolve_agent_session_jsonl(
629
666
  if isinstance(index_map.get(k), dict) and str(k).startswith(prefix)
630
667
  ]
631
668
 
632
- # 1) 与当前子任务最一致:runs.json 中该 agent 最近一次 run 的 childSessionKey
633
- runs = get_subagent_runs().get(state_id, [])
634
- if runs:
635
- runs.sort(key=lambda x: x.get('startedAt', 0), reverse=True)
636
- preferred_key = runs[0].get('childSessionKey')
637
- if preferred_key and preferred_key in index_map:
638
- ent = index_map[preferred_key]
639
- if isinstance(ent, dict):
640
- p = resolve_session_jsonl_path(sessions_path, ent)
641
- if p and p.is_file():
642
- sid = ent.get('sessionId') or preferred_key
643
- return p, sid, preferred_key
644
-
645
- # 2) 按 sessions.json 的 updatedAt/lastMessageAt 选最近会话(在 glob mtime 之前)
646
- # OpenClaw 在任务结束后可能从 runs.json 移除 run,此处仍可定位「最近活跃」子会话 jsonl。
647
- # 多文件时比仅凭 *.jsonl 的 mtime 更稳,且与 4/24 当晚最晚更新 session 一致。
669
+ # 直接按 sessions.json updatedAt 选最新会话。
670
+ # runs.json 中的 run 即使已结束也仍保留在列表中,用它优先会错误选中旧 session;
671
+ # 而 updatedAt 由 OpenClaw 维护,能准确反映会话的实际最后活跃时间。
648
672
  if agent_keys:
649
673
  agent_keys.sort(
650
674
  key=lambda k: (index_map[k].get('updatedAt') or index_map[k].get('lastMessageAt') or 0),
@@ -1358,44 +1382,52 @@ def _parse_session_file(
1358
1382
  )
1359
1383
  elif file_size <= _SUBAGENT_READ_SAFETY_BYTES:
1360
1384
  steps, started_at, session_status = _parse_session_lines(
1361
- _read_text_lines(path), requester_info, started_at_hint=header_ts
1385
+ _read_text_lines(path, limit * 3), requester_info, started_at_hint=header_ts
1362
1386
  )
1363
1387
  if subagent_anchor_ms is None:
1364
1388
  steps = _slice_subagent_steps_from_first_user(steps)
1365
1389
  elif subagent_anchor_ms is not None:
1366
1390
  # 超大 + 有 run:以尾部为窗口(近期)再交给 get_timeline 的 _apply 锚定
1367
- tail_lines = _read_jsonl_tail_line_slice(path)
1391
+ tail_lines = _read_jsonl_tail_line_slice(path, target_lines=limit)
1368
1392
  if tail_lines is not None:
1369
1393
  steps, started_at, session_status = _parse_session_lines(
1370
1394
  tail_lines, requester_info, started_at_hint=header_ts
1371
1395
  )
1372
1396
  else:
1373
1397
  steps, started_at, session_status = _parse_session_lines(
1374
- _read_text_line_window(path, 0, _MAX_LINES_AFTER_TASK_START),
1398
+ _read_text_line_window(path, 0, limit * 3),
1375
1399
  requester_info, started_at_hint=header_ts
1376
1400
  )
1377
1401
  else:
1378
1402
  # 超大 + 无 run:先定位首条 user 行,自 PM/主控下发起读有限行
1379
1403
  uidx = _line_index_of_first_user_message(path)
1380
1404
  start = uidx if uidx is not None else 0
1381
- part = _read_text_line_window(path, start, _MAX_LINES_AFTER_TASK_START)
1405
+ part = _read_text_line_window(path, start, limit * 3)
1382
1406
  steps, started_at, session_status = _parse_session_lines(
1383
1407
  part, requester_info, started_at_hint=header_ts
1384
1408
  )
1385
1409
  steps = _slice_subagent_steps_from_first_user(steps)
1386
1410
  else:
1387
- tail_lines = _read_jsonl_tail_line_slice(path)
1411
+ tail_lines = _read_jsonl_tail_line_slice(path, target_lines=limit)
1388
1412
  if tail_lines is not None:
1389
1413
  steps, started_at, session_status = _parse_session_lines(
1390
1414
  tail_lines, requester_info, started_at_hint=header_ts
1391
1415
  )
1392
1416
  if len(steps) < limit:
1393
- steps, started_at, session_status = _parse_session_lines(
1394
- _read_text_lines(path), requester_info, started_at_hint=header_ts
1395
- )
1417
+ # 尾部步骤不够,从头部补充(最多读 limit 步对应的行数缓冲)
1418
+ head_lines = _read_text_lines(path, _HEAD_JSONL_LINES)
1419
+ if head_lines:
1420
+ more_steps, _, _ = _parse_session_lines(
1421
+ head_lines, requester_info, started_at_hint=header_ts
1422
+ )
1423
+ # 合并并重新截取最新的 limit 步
1424
+ combined = more_steps + steps
1425
+ if len(combined) > limit:
1426
+ combined = combined[-limit:]
1427
+ steps, started_at, session_status = combined, started_at, session_status
1396
1428
  else:
1397
1429
  steps, started_at, session_status = _parse_session_lines(
1398
- _read_text_lines(path), requester_info, started_at_hint=header_ts
1430
+ _read_text_lines(path, limit * 3), requester_info, started_at_hint=header_ts
1399
1431
  )
1400
1432
 
1401
1433
  if len(steps) > step_budget:
@@ -30,6 +30,28 @@ MAIN_AGENT_SOLO_STREAM_GRACE_SEC = 20
30
30
 
31
31
  AgentStatus = Literal['idle', 'working', 'down']
32
32
 
33
+ # 最近多久内的 error run 应视为 down 状态(分钟)
34
+ _RECENT_ERROR_RUN_WINDOW_MINUTES = 5
35
+
36
+
37
+ def _has_recent_error_run(agent_id: str, minutes: int = _RECENT_ERROR_RUN_WINDOW_MINUTES) -> bool:
38
+ """
39
+ 检查 runs.json 中是否有最近结束且 outcome.status == 'error' 的 run。
40
+ 用于补充 session stopReason=error:Gateway 重启等原因导致的 run 中断
41
+ 会写入 runs.json 但不一定会话落 stopReason=error。
42
+ """
43
+ import time
44
+ runs = get_agent_runs(agent_id, limit=20)
45
+ cutoff = int(time.time() * 1000) - minutes * 60 * 1000
46
+ for run in runs:
47
+ ended = run.get('endedAt')
48
+ if not ended or ended < cutoff:
49
+ continue
50
+ outcome = run.get('outcome')
51
+ if isinstance(outcome, dict) and outcome.get('status') == 'error':
52
+ return True
53
+ return False
54
+
33
55
 
34
56
  def _main_agent_solo_processing(agent_id: str) -> bool:
35
57
  """
@@ -58,16 +80,16 @@ def _main_agent_solo_processing(agent_id: str) -> bool:
58
80
  def calculate_agent_status(agent_id: str, use_cache: bool = True) -> AgentStatus:
59
81
  """
60
82
  计算 Agent 状态(基于 runs.json + sessions.json)
61
-
83
+
62
84
  优先级:
63
- 1. 异常 (down) - 最近5分钟有 stopReason=error
85
+ 1. 异常 (down) - 最近5分钟有 stopReason=error,或有最近结束的 error run
64
86
  2. 工作中 (working) - 有活跃 subagent run;或主 Agent 且无 run 时 thinking / 未完成工具 / 短窗内会话写入
65
87
  3. 空闲 (idle) - 其余情况(子 Agent 无 run 即空闲,与协作图 activePath 一致)
66
-
88
+
67
89
  Args:
68
90
  agent_id: Agent ID
69
91
  use_cache: 是否使用缓存(默认 True)
70
-
92
+
71
93
  Returns:
72
94
  Agent 状态
73
95
  """
@@ -82,6 +104,8 @@ def calculate_agent_status(agent_id: str, use_cache: bool = True) -> AgentStatus
82
104
  # 重新计算
83
105
  if has_recent_errors(agent_id, minutes=5):
84
106
  status = 'down'
107
+ elif _has_recent_error_run(agent_id, minutes=5):
108
+ status = 'down'
85
109
  elif is_agent_working(agent_id):
86
110
  status = 'working'
87
111
  elif _main_agent_solo_processing(agent_id):
@@ -124,7 +148,8 @@ def get_agents_with_status() -> list:
124
148
  try:
125
149
  status = calculate_agent_status(agent_id)
126
150
  current_task = get_current_task(agent_id)
127
- if status == 'idle':
151
+ # idle 且无已结束 run 任务时才清空 currentTask
152
+ if status == 'idle' and not current_task:
128
153
  current_task = ''
129
154
  last_active = get_last_active_time(agent_id)
130
155
  last_error = get_last_error(agent_id) if status == 'down' else None
@@ -155,16 +180,32 @@ def get_agents_with_status() -> list:
155
180
  def get_current_task(agent_id: str) -> str:
156
181
  """
157
182
  获取 Agent 当前任务描述。
158
- 仅从未结束的 run(endedAt 为空)读取;已结束的 run 只代表历史,不应在空闲时仍当「当前任务」展示。
183
+
184
+ 优先级:
185
+ 1. 活跃 run(endedAt 为空)—— 代表正在执行的任务
186
+ 2. 最近结束的 run——即使已结束也要展示(run 失败中断后仍需可见)
159
187
  """
160
188
  runs = get_agent_runs(agent_id, limit=40)
189
+
190
+ # 优先级1:未结束的 run
191
+ for run in runs:
192
+ if run.get('endedAt') is None:
193
+ task = run.get('task', '') or ''
194
+ if len(task) > 60:
195
+ task = task[:57] + '...'
196
+ return task
197
+
198
+ # 优先级2:最近的已结束 run(确保失败中断的任务也能在 Dashboard 上看到)
161
199
  for run in runs:
162
200
  if run.get('endedAt') is not None:
163
- continue
164
- task = run.get('task', '') or ''
165
- if len(task) > 60:
166
- task = task[:57] + '...'
167
- return task
201
+ task = run.get('task', '') or ''
202
+ if task:
203
+ outcome = run.get('outcome', {})
204
+ status = outcome.get('status') if isinstance(outcome, dict) else None
205
+ prefix = '[失败] ' if status == 'error' else '[已结束] '
206
+ if len(task) > 57:
207
+ task = task[:57] + '...'
208
+ return prefix + task
168
209
 
169
210
  return ''
170
211