openclaw-diag-cli 0.1.3 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -85,7 +85,6 @@ def emit_config(out: output.Output, data: list, obj, prefix: str = "") -> None:
85
85
  def main() -> int:
86
86
  parser = cli.build_common_parser(
87
87
  description="模块 3:采集 OpenClaw 配置(含敏感字段脱敏)",
88
- prog="03_configuration",
89
88
  )
90
89
  args = parser.parse_args()
91
90
 
@@ -95,6 +94,10 @@ def main() -> int:
95
94
  config_path = args.config
96
95
  if not os.path.isfile(config_path):
97
96
  out.item(f"配置文件未找到: {config_path}")
97
+ out.line(" 下一步:")
98
+ out.line(" 1) 确认 OpenClaw 已经初始化(运行过 `openclaw` 即会生成配置)")
99
+ out.line(" 2) 用 OPENCLAW_CONFIG=/path/to/openclaw.json 或 --config 指向正确路径")
100
+ out.line(" 3) 在容器/远端诊断时,用 OPENCLAW_HOME=/path 整体覆盖")
98
101
  out.evidence(config_path, "<文件缺失>")
99
102
  out.set_data("config_path", config_path)
100
103
  out.set_data("found", False)
@@ -11,7 +11,6 @@ import sys
11
11
  from collections import defaultdict
12
12
  from datetime import datetime
13
13
  from pathlib import Path
14
- from typing import List, Optional
15
14
 
16
15
  sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
17
16
 
@@ -146,11 +145,20 @@ def section_restart_events(out: output.Output) -> None:
146
145
 
147
146
  def section_model_api(out: output.Output, args) -> None:
148
147
  if not os.path.isfile(args.config):
148
+ out.item("模型 API: 配置文件未找到")
149
+ out.set_data("model_api_status", {
150
+ "found": False, "reason": "config_not_found", "checked": args.config,
151
+ })
149
152
  return
150
153
  try:
151
154
  with open(args.config) as f:
152
155
  cfg = json.load(f)
153
- except Exception:
156
+ except (OSError, json.JSONDecodeError) as e:
157
+ out.item(f"模型 API: 配置读取失败 ({type(e).__name__})")
158
+ out.set_data("model_api_status", {
159
+ "found": False, "reason": "config_unreadable",
160
+ "checked": args.config, "error": str(e)[:200],
161
+ })
154
162
  return
155
163
  models = cfg.get("models", {}) or {}
156
164
  all_cfgs = {}
@@ -290,8 +298,12 @@ def section_ws_lifecycle(out: output.Output, app_log: str) -> None:
290
298
  else:
291
299
  continue
292
300
  events.append((ts_dt, ts_str, account, kind, msg))
293
- except OSError:
294
- out.item("Channel WS: 读取应用日志失败")
301
+ except OSError as e:
302
+ out.item(f"Channel WS: 读取应用日志失败 ({type(e).__name__})")
303
+ out.set_data("ws_summary_status", {
304
+ "found": False, "reason": "log_unreadable",
305
+ "checked": app_log, "error": str(e)[:200],
306
+ })
295
307
  return
296
308
 
297
309
  if not events and not expired:
@@ -564,7 +576,12 @@ def section_gateway_errors(out: output.Output, app_log: str) -> None:
564
576
  continue
565
577
  kind, code, reason = r
566
578
  events.append((ts, kind, code, reason or "(no reason)"))
567
- except OSError:
579
+ except OSError as e:
580
+ out.item(f"Gateway 错误码: 读取应用日志失败 ({type(e).__name__})")
581
+ out.set_data("gateway_errors_status", {
582
+ "found": False, "reason": "log_unreadable",
583
+ "checked": app_log, "error": str(e)[:200],
584
+ })
568
585
  return
569
586
 
570
587
  if not events:
@@ -614,7 +631,6 @@ def section_gateway_errors(out: output.Output, app_log: str) -> None:
614
631
  def main() -> int:
615
632
  parser = cli.build_common_parser(
616
633
  description="模块 4:Gateway 状态采集",
617
- prog="04_gateway",
618
634
  )
619
635
  args = parser.parse_args()
620
636
 
@@ -622,6 +638,7 @@ def main() -> int:
622
638
  out.section("模块 4:Gateway 状态")
623
639
 
624
640
  port = 18789
641
+ port_source = "default"
625
642
  if os.path.isfile(args.config):
626
643
  try:
627
644
  with open(args.config) as f:
@@ -629,8 +646,13 @@ def main() -> int:
629
646
  cp = cfg.get("gateway", {}).get("port")
630
647
  if cp:
631
648
  port = int(cp)
632
- except Exception:
633
- pass
649
+ port_source = "config"
650
+ except (OSError, json.JSONDecodeError, ValueError) as e:
651
+ out.set_data("port_source_status", {
652
+ "found": False, "reason": "config_unreadable",
653
+ "checked": args.config, "error": str(e)[:200],
654
+ })
655
+ out.set_data("port_source", port_source)
634
656
 
635
657
  section_process_port(out, args, port)
636
658
  section_restart_events(out)
@@ -79,21 +79,25 @@ def render_log_line(line: str, max_len: int = 300) -> str:
79
79
  return line
80
80
 
81
81
 
82
- def collect_error_lines(log_files: List[str]) -> List[str]:
82
+ def collect_error_lines(log_files: List[str]):
83
+ """Returns (matched_lines, unreadable_files). One unreadable file does not
84
+ abort the whole scan, but we tell the caller which paths failed."""
83
85
  out: List[str] = []
86
+ unreadable: List[dict] = []
84
87
  for lf in log_files:
85
88
  try:
86
89
  with open(lf, errors="replace") as f:
87
90
  for ln in f:
88
91
  if _ERR_RE.search(ln):
89
92
  out.append(ln.rstrip("\n"))
90
- except OSError:
91
- continue
92
- return out
93
+ except OSError as e:
94
+ unreadable.append({"path": lf, "error": f"{type(e).__name__}: {e}"})
95
+ return out, unreadable
93
96
 
94
97
 
95
- def collect_api_errors(log_files: List[str]) -> List[str]:
98
+ def collect_api_errors(log_files: List[str]):
96
99
  out: List[str] = []
100
+ unreadable: List[dict] = []
97
101
  for lf in log_files:
98
102
  try:
99
103
  with open(lf, errors="replace") as f:
@@ -107,9 +111,9 @@ def collect_api_errors(log_files: List[str]) -> List[str]:
107
111
  if _API_EXCLUDE_TXT_RE.search(ln):
108
112
  continue
109
113
  out.append(ln.rstrip("\n"))
110
- except OSError:
111
- continue
112
- return out
114
+ except OSError as e:
115
+ unreadable.append({"path": lf, "error": f"{type(e).__name__}: {e}"})
116
+ return out, unreadable
113
117
 
114
118
 
115
119
  def journalctl_errors() -> str:
@@ -152,17 +156,21 @@ def tool_errors_from_session(session_path: str):
152
156
  msg = obj.get("message", {}) or {}
153
157
  if msg.get("isError"):
154
158
  counts[msg.get("toolName", "unknown")] += 1
155
- except Exception:
156
- pass
159
+ except (json.JSONDecodeError, ValueError):
160
+ # Expected: session.jsonl can have malformed lines from
161
+ # interrupted writes; skip and keep counting.
162
+ continue
157
163
  except OSError:
158
- pass
164
+ # Session file disappeared between glob() and open(). Caller already
165
+ # falls back to "no recent session"; reporting per-file unreadable
166
+ # would mostly add noise here.
167
+ return counts
159
168
  return counts
160
169
 
161
170
 
162
171
  def main() -> int:
163
172
  parser = cli.build_common_parser(
164
173
  description="模块 5:采集近期错误日志",
165
- prog="05_recent_errors",
166
174
  )
167
175
  args = parser.parse_args()
168
176
 
@@ -187,8 +195,10 @@ def main() -> int:
187
195
  out.line("")
188
196
 
189
197
  if logs:
190
- err_lines = collect_error_lines(logs)
198
+ err_lines, err_unreadable = collect_error_lines(logs)
191
199
  out.set_data("app_error_count", len(err_lines))
200
+ if err_unreadable:
201
+ out.set_data("app_log_unreadable", err_unreadable)
192
202
  if err_lines:
193
203
  out.item(f"应用日志 ERROR 级别: {len(err_lines)} 条 — Gateway 运行时报错,包括工具失败、模型异常等")
194
204
  rendered = []
@@ -202,7 +212,7 @@ def main() -> int:
202
212
  else:
203
213
  out.item("应用日志 ERROR 级别: 0 条 — Gateway 运行时报错")
204
214
 
205
- api_lines = collect_api_errors(logs)
215
+ api_lines, _api_unreadable = collect_api_errors(logs)
206
216
  out.set_data("api_error_count", len(api_lines))
207
217
  if api_lines:
208
218
  out.item(f"模型 API HTTP 错误: {len(api_lines)} 条 ")
@@ -13,11 +13,12 @@ import sys
13
13
  import time
14
14
  from collections import Counter, deque
15
15
  from pathlib import Path
16
- from typing import Optional
17
16
 
18
17
  sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
19
18
 
20
- from ocdiag import cli, output, paths
19
+ from ocdiag import cli, output
20
+ from ocdiag.timeutil import fmt_age, fmt_ts
21
+ from ocdiag.tokens import fmt_tokens, percentile
21
22
 
22
23
  try:
23
24
  from croniter import croniter # type: ignore
@@ -26,15 +27,6 @@ except ImportError:
26
27
  HAS_CRONITER = False
27
28
 
28
29
 
29
- def fmt_ts(ms):
30
- if not ms:
31
- return "?"
32
- try:
33
- return datetime.datetime.fromtimestamp(ms / 1000).strftime("%Y-%m-%d %H:%M:%S")
34
- except Exception:
35
- return str(ms)
36
-
37
-
38
30
  def fmt_duration(ms):
39
31
  if ms is None:
40
32
  return "?"
@@ -46,24 +38,6 @@ def fmt_duration(ms):
46
38
  return f"{s/3600:.1f}h"
47
39
 
48
40
 
49
- def fmt_age(ms_delta):
50
- s = abs(ms_delta) / 1000
51
- if s < 60:
52
- return f"{s:.0f}秒"
53
- if s < 3600:
54
- return f"{s/60:.0f}分钟"
55
- if s < 86400:
56
- return f"{s/3600:.1f}小时"
57
- return f"{s/86400:.1f}天"
58
-
59
-
60
- def percentile(sorted_list, p):
61
- if not sorted_list:
62
- return None
63
- k = max(0, min(len(sorted_list) - 1, int(len(sorted_list) * p)))
64
- return sorted_list[k]
65
-
66
-
67
41
  def format_schedule(sched):
68
42
  k = sched.get("kind", "?")
69
43
  if k == "cron":
@@ -121,16 +95,6 @@ def load_runs(runs_dir, jid):
121
95
  return out
122
96
 
123
97
 
124
- def fmt_k(n):
125
- if n is None:
126
- return "?"
127
- if n >= 1_000_000:
128
- return f"{n/1_000_000:.1f}M"
129
- if n >= 1000:
130
- return f"{n/1000:.1f}K"
131
- return str(n)
132
-
133
-
134
98
  def extract_usage(r):
135
99
  u = r.get("usage")
136
100
  if not u and isinstance(r.get("result"), dict):
@@ -463,7 +427,7 @@ def section_jobs(out: output.Output, jobs_file: str, state_file: str, runs_dir:
463
427
  cost_sum += cost
464
428
  has_cost = True
465
429
  if has_usage:
466
- line = f" tokens(最近{len(recent)}次): in={fmt_k(input_sum)} out={fmt_k(output_sum)}"
430
+ line = f" tokens(最近{len(recent)}次): in={fmt_tokens(input_sum)} out={fmt_tokens(output_sum)}"
467
431
  if has_cost:
468
432
  line += f" | cost=${cost_sum:.4f}"
469
433
  out.item(line)
@@ -671,7 +635,6 @@ def section_system_crontab(out: output.Output) -> None:
671
635
  def main() -> int:
672
636
  parser = cli.build_common_parser(
673
637
  description="模块 6:定时任务采集",
674
- prog="06_cron_jobs",
675
638
  )
676
639
  args = parser.parse_args()
677
640
 
@@ -7,7 +7,6 @@ import glob
7
7
  import json
8
8
  import os
9
9
  import sys
10
- import tempfile
11
10
  from collections import defaultdict
12
11
  from datetime import datetime, timezone, timedelta
13
12
  from pathlib import Path
@@ -15,45 +14,13 @@ from pathlib import Path
15
14
  sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
16
15
 
17
16
  from ocdiag import cli, output
17
+ from ocdiag.timeutil import parse_msg_ts, parse_obj_ts
18
+ from ocdiag.tokens import fmt_tokens, pct
18
19
 
19
20
 
20
21
  NORMAL_STOPS = {"stop", "end_turn", "toolUse", "tool_calls", ""}
21
22
 
22
23
 
23
- def parse_obj_ts(ts_str):
24
- if not ts_str:
25
- return None
26
- try:
27
- return datetime.fromisoformat(ts_str.replace("Z", "+00:00"))
28
- except Exception:
29
- return None
30
-
31
-
32
- def parse_msg_ts(ms):
33
- if ms is None:
34
- return None
35
- try:
36
- return datetime.fromtimestamp(int(ms) / 1000, tz=timezone.utc)
37
- except Exception:
38
- return None
39
-
40
-
41
- def pct(sorted_vals, p):
42
- if not sorted_vals:
43
- return 0.0
44
- n = len(sorted_vals)
45
- idx = min(n - 1, int(n * p))
46
- return sorted_vals[idx]
47
-
48
-
49
- def fmt_tokens(n):
50
- if n >= 1_000_000:
51
- return f"{n/1_000_000:.1f}M"
52
- if n >= 1_000:
53
- return f"{n/1_000:.1f}K"
54
- return str(n)
55
-
56
-
57
24
  def fmt_args(tool_name, tc_args, max_len=100):
58
25
  if isinstance(tc_args, str):
59
26
  try:
@@ -432,11 +399,18 @@ def render(out: output.Output, data, file_count):
432
399
  else:
433
400
  for i, (sec, val, hint) in enumerate(bottleneck_items):
434
401
  out.item(f"#{i+1}: {sec}(P95={val:.1f}s, {hint})")
402
+ out.set_data("bottleneck", {
403
+ "model_p95": round(model_p95, 3),
404
+ "tool_p95": round(tool_p95, 3),
405
+ "model_top": model_top,
406
+ "tool_top": tool_top,
407
+ })
435
408
 
436
409
  out.subsection("模型性能")
437
410
  out.item(f"数据来源: 最近 {file_count} 个 session 文件")
438
411
  out.line("")
439
412
  model_stats = data["model_stats"]
413
+ models_payload = {}
440
414
  if not model_stats:
441
415
  out.item("最近 Session 中未发现模型使用数据")
442
416
  else:
@@ -481,9 +455,29 @@ def render(out: output.Output, data, file_count):
481
455
  stops_str = " ".join(f"{k}:{v}" for k, v in sorted(stops.items(), key=lambda x: -x[1]))
482
456
  out.item(f" stopReasons: {stops_str}" if stops_str else " stopReasons: (none)")
483
457
  out.line("")
458
+ models_payload[model_key] = {
459
+ "calls": calls,
460
+ "p50_s": round(p50, 3),
461
+ "p95_s": round(p95, 3),
462
+ "max_s": round(mx, 3),
463
+ "throughput_tok_s": (
464
+ None if s["output"] == 0 or total_dur <= 0
465
+ else round(s["output"] / total_dur, 1)
466
+ ),
467
+ "input_tokens": s["input"],
468
+ "output_tokens": s["output"],
469
+ "cache_read_tokens": s["cache_read"],
470
+ "cache_write_tokens": s["cache_write"],
471
+ "cost_usd": round(s["cost"], 6),
472
+ "success_rate_pct": round(success, 1),
473
+ "stop_reasons": dict(stops),
474
+ }
475
+ out.set_data("models", models_payload)
476
+ out.set_data("session_files_analyzed", file_count)
484
477
 
485
478
  out.subsection("工具性能(Top 10 by 调用量)")
486
479
  timed_tools = {n: s for n, s in data["tool_stats"].items() if s["durations"]}
480
+ tools_payload = {}
487
481
  if not timed_tools:
488
482
  out.item("(无工具调用数据)")
489
483
  else:
@@ -491,8 +485,11 @@ def render(out: output.Output, data, file_count):
491
485
  for name, s in ranked:
492
486
  durs = sorted(s["durations"])
493
487
  calls = s["calls"]
488
+ p50 = pct(durs, 0.50)
489
+ p95 = pct(durs, 0.95)
490
+ mx = durs[-1]
494
491
  err_rate = (s["errors"] / calls * 100) if calls else 0.0
495
- dur_str = f"P50={pct(durs,0.50):.3f}s P95={pct(durs,0.95):.3f}s Max={durs[-1]:.3f}s"
492
+ dur_str = f"P50={p50:.3f}s P95={p95:.3f}s Max={mx:.3f}s"
496
493
  out.item(f"{name}: {calls} 次 | {dur_str} | 错误 {err_rate:.0f}%")
497
494
  timed = [r for r in s["records"] if r["dur"] is not None]
498
495
  timed.sort(key=lambda r: r["dur"], reverse=True)
@@ -511,6 +508,15 @@ def render(out: output.Output, data, file_count):
511
508
  tail = f", {r['err_brief']}" if r["err_brief"] else ""
512
509
  out.item(f" 失败: {args} (error, {dur_txt}{tail})")
513
510
  err_shown += 1
511
+ tools_payload[name] = {
512
+ "calls": calls,
513
+ "errors": s["errors"],
514
+ "error_rate_pct": round(err_rate, 1),
515
+ "p50_s": round(p50, 3),
516
+ "p95_s": round(p95, 3),
517
+ "max_s": round(mx, 3),
518
+ }
519
+ out.set_data("tools", tools_payload)
514
520
 
515
521
  out.subsection("慢调用 Top 20")
516
522
  slow = sorted(data["slow_calls_top"], key=lambda x: x[0], reverse=True)
@@ -529,12 +535,17 @@ def render(out: output.Output, data, file_count):
529
535
  else:
530
536
  for i, entry in enumerate(top20, 1):
531
537
  out.item(f"[{i}] {entry[2]}")
538
+ out.set_data("slow_calls_top20", [
539
+ {"duration_s": round(e[0], 3), "kind": e[1], "summary": e[2]}
540
+ for e in top20
541
+ ])
532
542
 
533
543
  out.subsection("异常 stopReason — 模型非正常结束(如 error、中断)")
534
544
  abnormal_stops = data["abnormal_stops"]
535
545
  out.item(f"共 {len(abnormal_stops)} 条" + ("(无异常)" if not abnormal_stops else ""))
536
546
  for s in abnormal_stops[:20]:
537
547
  out.item(s)
548
+ out.set_data("abnormal_stops", abnormal_stops)
538
549
 
539
550
  out.subsection("模型 API 错误分布")
540
551
  api_err_total = sum(data["api_error_stats"].values())
@@ -548,9 +559,16 @@ def render(out: output.Output, data, file_count):
548
559
  out.item("分布:")
549
560
  for cat, n in sorted(data["api_error_stats"].items(), key=lambda kv: -kv[1]):
550
561
  out.item(f" {cat}: {n}")
562
+ out.set_data("api_errors", {
563
+ "total_calls": api_total,
564
+ "error_count": api_err_total,
565
+ "error_rate_pct": round(api_err_total / api_total * 100, 2) if api_total else 0.0,
566
+ "by_category": dict(data["api_error_stats"]),
567
+ })
551
568
 
552
569
  out.subsection("端到端消息延迟(user 发送 → assistant 最终响应)")
553
570
  e2e = data["e2e_latencies"]
571
+ e2e_payload = {"count": 0}
554
572
  if not e2e:
555
573
  out.item("(数据不足,未发现 user→assistant 配对)")
556
574
  else:
@@ -573,10 +591,19 @@ def render(out: output.Output, data, file_count):
573
591
  n = bucket_counts[lbl]
574
592
  pct_v = (n / total * 100) if total else 0.0
575
593
  out.item(f" {lbl}: {n} ({pct_v:.1f}%)")
594
+ e2e_payload = {
595
+ "count": total,
596
+ "p50_s": round(p50, 3),
597
+ "p95_s": round(p95, 3),
598
+ "max_s": round(mx, 3),
599
+ "buckets": dict(bucket_counts),
600
+ }
601
+ out.set_data("e2e_latency", e2e_payload)
576
602
 
577
603
  out.subsection("延迟 vs 上下文大小")
578
604
  ctx_buckets_def = data["ctx_buckets_def"]
579
605
  ctx_durs = data["ctx_bucket_durs"]
606
+ ctx_payload = {}
580
607
  if not any(ctx_durs.get(l) for l, _ in ctx_buckets_def):
581
608
  out.item("(数据不足)")
582
609
  else:
@@ -585,13 +612,21 @@ def render(out: output.Output, data, file_count):
585
612
  durs = sorted(ctx_durs.get(b_label, []))
586
613
  if not durs:
587
614
  out.line(f" {b_label:<14} {0:>8} {'-':>10} {'-':>10}")
615
+ ctx_payload[b_label] = {"count": 0, "p50_s": None, "p95_s": None}
588
616
  continue
589
617
  p50 = pct(durs, 0.50)
590
618
  p95 = pct(durs, 0.95)
591
619
  out.line(f" {b_label:<14} {len(durs):>8} {p50:>9.1f}s {p95:>9.1f}s")
620
+ ctx_payload[b_label] = {
621
+ "count": len(durs),
622
+ "p50_s": round(p50, 3),
623
+ "p95_s": round(p95, 3),
624
+ }
625
+ out.set_data("ctx_buckets", ctx_payload)
592
626
 
593
627
  out.subsection("每日趋势(最近 7 天)")
594
628
  daily_stats = data["daily_stats"]
629
+ daily_payload = []
595
630
  if not daily_stats:
596
631
  out.item("(数据不足)")
597
632
  else:
@@ -602,12 +637,22 @@ def render(out: output.Output, data, file_count):
602
637
  d = daily_stats.get(d_key)
603
638
  if not d or d["calls"] == 0:
604
639
  out.line(f" {d_key:<10} {0:>8} {'-':>10} {'-':>14}")
640
+ daily_payload.append({"date": d_key, "calls": 0,
641
+ "p50_s": None, "output_tokens": 0})
605
642
  continue
606
643
  durs = sorted(d["durs"])
607
644
  p50 = pct(durs, 0.50) if durs else 0.0
608
645
  out.line(f" {d_key:<10} {d['calls']:>8} {p50:>9.1f}s {fmt_tokens(d['output']):>14}")
646
+ daily_payload.append({
647
+ "date": d_key,
648
+ "calls": d["calls"],
649
+ "p50_s": round(p50, 3),
650
+ "output_tokens": d["output"],
651
+ })
652
+ out.set_data("daily_trend", daily_payload)
609
653
 
610
654
  out.subsection("Cache 命中率")
655
+ cache_payload = {"total_calls": data["cache_total_calls"]}
611
656
  if data["cache_total_calls"] == 0:
612
657
  out.item("(无数据)")
613
658
  else:
@@ -622,17 +667,34 @@ def render(out: output.Output, data, file_count):
622
667
  f"cache_write: {fmt_tokens(data['cache_sum_cache_write'])}"
623
668
  )
624
669
  denom = data["cache_sum_input"] + data["cache_sum_cache_read"]
670
+ ratio_pct = None
625
671
  if denom > 0:
626
672
  ratio = data["cache_sum_cache_read"] / denom * 100
673
+ ratio_pct = round(ratio, 3)
627
674
  out.item(
628
675
  f"上下文 cache 占比: cacheRead/(input+cacheRead) = "
629
676
  f"{ratio:.3f}% ({fmt_tokens(data['cache_sum_cache_read'])}/{fmt_tokens(denom)})"
630
677
  )
678
+ cache_payload = {
679
+ "total_calls": data["cache_total_calls"],
680
+ "calls_with_cache_read": data["cache_calls_with_cache"],
681
+ "hit_rate_pct": round(hit_pct, 2),
682
+ "input_tokens": data["cache_sum_input"],
683
+ "cache_read_tokens": data["cache_sum_cache_read"],
684
+ "cache_write_tokens": data["cache_sum_cache_write"],
685
+ "ctx_cache_ratio_pct": ratio_pct,
686
+ }
687
+ out.set_data("cache_hit_rate", cache_payload)
631
688
 
632
689
  out.subsection("工具错误明细")
633
690
  tool_stats = data["tool_stats"]
634
691
  err_total = sum(s["errors"] for s in tool_stats.values())
635
692
  call_total = sum(s["calls"] for s in tool_stats.values())
693
+ tool_errors_payload = {
694
+ "total_errors": err_total,
695
+ "total_calls": call_total,
696
+ "by_tool": {},
697
+ }
636
698
  if err_total == 0:
637
699
  out.item(f"共 0 次错误 (总调用 {call_total} 次中)")
638
700
  else:
@@ -642,13 +704,22 @@ def render(out: output.Output, data, file_count):
642
704
  if s["errors"] == 0:
643
705
  continue
644
706
  out.line(f" {name} ({s['errors']}次):")
707
+ samples = []
645
708
  for r in s["error_records"][:3]:
646
709
  ts_label = r["ts"].strftime("%Y-%m-%d %H:%M:%S") if r["ts"] else "?"
647
710
  brief = r["err_brief"] or "(无错误内容)"
648
711
  out.line(f" {ts_label} | {brief[:100]}")
712
+ samples.append({"ts": ts_label, "brief": brief[:200]})
713
+ tool_errors_payload["by_tool"][name] = {
714
+ "errors": s["errors"],
715
+ "calls": s["calls"],
716
+ "samples": samples,
717
+ }
718
+ out.set_data("tool_errors", tool_errors_payload)
649
719
 
650
720
  out.subsection("Session 消耗 Top 5")
651
721
  session_stats = data["session_stats"]
722
+ session_top_payload = []
652
723
  if not session_stats:
653
724
  out.item("(无数据)")
654
725
  else:
@@ -657,12 +728,18 @@ def render(out: output.Output, data, file_count):
657
728
  for sid, ss in ranked:
658
729
  out.line(f" {sid:<40} {ss['calls']:>8} "
659
730
  f"{fmt_tokens(ss['tokens']):>10} {ss['duration']:>11.0f}s")
731
+ session_top_payload.append({
732
+ "session": sid,
733
+ "calls": ss["calls"],
734
+ "tokens": ss["tokens"],
735
+ "duration_s": round(ss["duration"], 1),
736
+ })
737
+ out.set_data("session_top5", session_top_payload)
660
738
 
661
739
 
662
740
  def main() -> int:
663
741
  parser = cli.build_common_parser(
664
742
  description="模块 7:模型与性能数据",
665
- prog="07_performance",
666
743
  )
667
744
  args = parser.parse_args()
668
745
  out = output.init("performance", json_mode=args.json, no_color=args.no_color)
@@ -675,11 +752,6 @@ def main() -> int:
675
752
 
676
753
  data = analyze_sessions(session_files)
677
754
  render(out, data, len(session_files))
678
-
679
- if args.json:
680
- out.set_data("model_count", len(data["model_stats"]))
681
- out.set_data("session_files_analyzed", len(session_files))
682
- out.set_data("e2e_latency_count", len(data["e2e_latencies"]))
683
755
  return out.done()
684
756
 
685
757
 
@@ -10,69 +10,18 @@ import re
10
10
  import sys
11
11
  import time
12
12
  from collections import defaultdict
13
- from datetime import datetime, timezone
14
13
  from pathlib import Path
15
14
 
16
15
  sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
17
16
 
18
17
  from ocdiag import cli, output
18
+ from ocdiag.timeutil import fmt_duration, parse_msg_ts, parse_obj_ts
19
+ from ocdiag.tokens import fmt_tokens, human_size, pct
19
20
 
20
21
 
21
22
  NORMAL_STOPS = {"stop", "end_turn", "toolUse", "tool_calls", ""}
22
23
 
23
24
 
24
- def parse_obj_ts(ts_str):
25
- if not ts_str:
26
- return None
27
- try:
28
- return datetime.fromisoformat(ts_str.replace("Z", "+00:00"))
29
- except Exception:
30
- return None
31
-
32
-
33
- def parse_msg_ts(ms):
34
- if ms is None:
35
- return None
36
- try:
37
- return datetime.fromtimestamp(int(ms) / 1000, tz=timezone.utc)
38
- except Exception:
39
- return None
40
-
41
-
42
- def human_size(b):
43
- if b < 1024:
44
- return f"{b}B"
45
- if b < 1048576:
46
- return f"{b/1024:.1f}KB"
47
- if b < 1073741824:
48
- return f"{b/1048576:.1f}MB"
49
- return f"{b/1073741824:.1f}GB"
50
-
51
-
52
- def fmt_tokens(n):
53
- if n >= 1_000_000:
54
- return f"{n/1_000_000:.1f}M"
55
- if n >= 1_000:
56
- return f"{n/1_000:.1f}K"
57
- return str(n)
58
-
59
-
60
- def fmt_duration(sec):
61
- if sec < 60:
62
- return f"{sec:.0f}s"
63
- if sec < 3600:
64
- return f"{sec/60:.1f}m"
65
- return f"{sec/3600:.1f}h"
66
-
67
-
68
- def pct(sorted_vals, p):
69
- if not sorted_vals:
70
- return 0.0
71
- n = len(sorted_vals)
72
- idx = min(n - 1, int(n * p))
73
- return sorted_vals[idx]
74
-
75
-
76
25
  def build_id_to_key_map(agent_dir):
77
26
  sess_json = os.path.join(agent_dir, "sessions", "sessions.json")
78
27
  id_to_key = {}
@@ -502,7 +451,6 @@ def stuck_dimension(out: output.Output, log_dir: str) -> None:
502
451
  def main() -> int:
503
452
  parser = cli.build_common_parser(
504
453
  description="模块 8:Session 数据采集 + Stuck 探测",
505
- prog="08_sessions",
506
454
  )
507
455
  args = parser.parse_args()
508
456
  out = output.init("sessions", json_mode=args.json, no_color=args.no_color)