openclaw-diag-cli 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,687 @@
1
+ #!/usr/bin/env python3
2
+ """模块 7:模型与性能数据(慢调用 Top 20、E2E 延迟、Cache 命中率)。"""
3
+
4
+ from __future__ import annotations
5
+
6
+ import glob
7
+ import json
8
+ import os
9
+ import sys
10
+ import tempfile
11
+ from collections import defaultdict
12
+ from datetime import datetime, timezone, timedelta
13
+ from pathlib import Path
14
+
15
+ sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
16
+
17
+ from ocdiag import cli, output
18
+
19
+
20
+ NORMAL_STOPS = {"stop", "end_turn", "toolUse", "tool_calls", ""}
21
+
22
+
23
+ def parse_obj_ts(ts_str):
24
+ if not ts_str:
25
+ return None
26
+ try:
27
+ return datetime.fromisoformat(ts_str.replace("Z", "+00:00"))
28
+ except Exception:
29
+ return None
30
+
31
+
32
+ def parse_msg_ts(ms):
33
+ if ms is None:
34
+ return None
35
+ try:
36
+ return datetime.fromtimestamp(int(ms) / 1000, tz=timezone.utc)
37
+ except Exception:
38
+ return None
39
+
40
+
41
+ def pct(sorted_vals, p):
42
+ if not sorted_vals:
43
+ return 0.0
44
+ n = len(sorted_vals)
45
+ idx = min(n - 1, int(n * p))
46
+ return sorted_vals[idx]
47
+
48
+
49
+ def fmt_tokens(n):
50
+ if n >= 1_000_000:
51
+ return f"{n/1_000_000:.1f}M"
52
+ if n >= 1_000:
53
+ return f"{n/1_000:.1f}K"
54
+ return str(n)
55
+
56
+
57
+ def fmt_args(tool_name, tc_args, max_len=100):
58
+ if isinstance(tc_args, str):
59
+ try:
60
+ tc_args = json.loads(tc_args)
61
+ except Exception:
62
+ return (tc_args or "")[:max_len]
63
+ if not isinstance(tc_args, dict) or not tc_args:
64
+ return ""
65
+
66
+ def trunc(s, n):
67
+ s = "" if s is None else str(s)
68
+ return s[:n] + ("..." if len(s) > n else "")
69
+
70
+ name = (tool_name or "").lower()
71
+ if name == "exec":
72
+ return trunc(tc_args.get("command", ""), max_len)
73
+ if name == "web_fetch":
74
+ return trunc(tc_args.get("url", ""), max_len)
75
+ if name == "web_search":
76
+ return trunc(tc_args.get("query", ""), max_len)
77
+ if name == "sessions_spawn":
78
+ aid = tc_args.get("agentId", "")
79
+ task = trunc(tc_args.get("task", ""), 60)
80
+ return trunc(f"agentId={aid}, task={task}", max_len)
81
+ if name in ("read", "write", "edit"):
82
+ return trunc(tc_args.get("path", ""), max_len)
83
+ if name == "cron":
84
+ action = tc_args.get("action", "")
85
+ jid = tc_args.get("jobId", "")
86
+ s = f"action={action}, jobId={jid}" if jid else f"action={action}"
87
+ return trunc(s, max_len)
88
+ if name in ("image", "image_generate"):
89
+ return trunc(tc_args.get("prompt", ""), 60)
90
+ parts = []
91
+ for k, v in list(tc_args.items())[:3]:
92
+ sv = str(v)
93
+ if len(sv) > 50:
94
+ sv = sv[:50] + "..."
95
+ parts.append(f"{k}={sv}")
96
+ return trunc(", ".join(parts), max_len)
97
+
98
+
99
+ def categorize_api_error(msg, stop):
100
+ text = str(msg.get("error", ""))
101
+ content = msg.get("content", "")
102
+ if isinstance(content, list):
103
+ for c in content:
104
+ if isinstance(c, dict):
105
+ t = c.get("text") or c.get("content") or ""
106
+ if t:
107
+ text += " " + str(t)
108
+ elif isinstance(c, str):
109
+ text += " " + c
110
+ elif isinstance(content, str):
111
+ text += " " + content
112
+ text = text[:1000]
113
+ low = text.lower()
114
+ if "429" in text or "rate limit" in low or "throttl" in low:
115
+ return "rate_limit(429)"
116
+ if "503" in text or "service unavailable" in low:
117
+ return "service_unavailable(503)"
118
+ if "401" in text or "403" in text or "unauthorized" in low or "forbidden" in low:
119
+ return "auth_error(401/403)"
120
+ if "500" in text or "internal server" in low:
121
+ return "server_error(500)"
122
+ if "timeout" in low or "timed out" in low:
123
+ return "timeout"
124
+ if "connection" in low and ("refused" in low or "reset" in low or "aborted" in low):
125
+ return "connection_error"
126
+ if stop == "aborted" or "aborted" in low:
127
+ return "aborted"
128
+ return f"other(stop={stop or 'n/a'})"
129
+
130
+
131
+ def collect_session_files(sessions_base, limit=20):
132
+ files = []
133
+ pattern1 = os.path.join(sessions_base, "*", "*", "*.jsonl")
134
+ pattern2 = os.path.join(sessions_base, "*", "*", "*.jsonl.reset.*")
135
+ for pat in (pattern1, pattern2):
136
+ for p in glob.glob(pat):
137
+ if p.endswith(".trajectory.jsonl"):
138
+ continue
139
+ try:
140
+ m = os.path.getmtime(p)
141
+ except OSError:
142
+ continue
143
+ files.append((m, p))
144
+ files.sort(reverse=True)
145
+ return [p for _, p in files[:limit]]
146
+
147
+
148
+ def tail_lines(path, n=500):
149
+ try:
150
+ with open(path, "r", errors="replace") as f:
151
+ return f.readlines()[-n:]
152
+ except OSError:
153
+ return []
154
+
155
+
156
+ def analyze_sessions(session_files):
157
+ model_stats = defaultdict(lambda: {
158
+ "calls": 0, "input": 0, "output": 0,
159
+ "cache_read": 0, "cache_write": 0, "cost": 0.0,
160
+ "durations": [], "stop_reasons": defaultdict(int),
161
+ })
162
+ tool_stats = defaultdict(lambda: {
163
+ "calls": 0, "errors": 0, "durations": [],
164
+ "records": [], "error_records": [],
165
+ })
166
+ all_model_durations = []
167
+ all_tool_durations = []
168
+ abnormal_stops = []
169
+ slow_calls_top = []
170
+ ctx_buckets_def = [
171
+ ("<50K", 50_000),
172
+ ("50K-100K", 100_000),
173
+ ("100K-200K", 200_000),
174
+ (">200K", float("inf")),
175
+ ]
176
+ ctx_bucket_durs = defaultdict(list)
177
+ daily_stats = defaultdict(lambda: {"calls": 0, "durs": [], "output": 0})
178
+ cache_total_calls = 0
179
+ cache_calls_with_cache = 0
180
+ cache_sum_input = 0
181
+ cache_sum_cache_read = 0
182
+ cache_sum_cache_write = 0
183
+ session_stats = defaultdict(lambda: {"calls": 0, "tokens": 0, "duration": 0.0})
184
+
185
+ e2e_latencies = []
186
+
187
+ api_error_stats = defaultdict(int)
188
+ api_total_assistant_calls = 0
189
+
190
+ for session_path in session_files:
191
+ sess_id = os.path.basename(session_path).split(".jsonl")[0]
192
+ current_session_id = sess_id
193
+ max_msg_ms = 0
194
+ pending_tool_calls = {}
195
+ current_turn_user_ts = None
196
+ current_turn_last_assistant_ts = None
197
+
198
+ for raw_line in tail_lines(session_path, 500):
199
+ raw_line = raw_line.strip()
200
+ if not raw_line:
201
+ continue
202
+ try:
203
+ obj = json.loads(raw_line)
204
+ except Exception:
205
+ continue
206
+
207
+ msg = obj.get("message", {}) or {}
208
+ role = msg.get("role", "")
209
+ obj_ts = parse_obj_ts(obj.get("timestamp"))
210
+ msg_ts_raw = msg.get("timestamp")
211
+ msg_ts = parse_msg_ts(msg_ts_raw)
212
+
213
+ if msg_ts_raw is not None:
214
+ if msg_ts_raw < max_msg_ms - 1000:
215
+ continue
216
+ if msg_ts_raw > max_msg_ms:
217
+ max_msg_ms = msg_ts_raw
218
+
219
+ if role == "assistant":
220
+ provider = msg.get("provider") or ""
221
+ model = msg.get("model") or "?"
222
+ if provider == "openclaw" and model in ("delivery-mirror", "gateway-injected"):
223
+ continue
224
+ model_key = f"{provider}/{model}" if provider else model
225
+ usage = msg.get("usage", {}) or {}
226
+ stop = msg.get("stopReason", "") or ""
227
+ inp = usage.get("input", 0) or 0
228
+ out_v = usage.get("output", 0) or 0
229
+ cr = usage.get("cacheRead", 0) or 0
230
+ cw = usage.get("cacheWrite", 0) or 0
231
+ cost_obj = usage.get("cost", {}) or {}
232
+ cost = (cost_obj.get("total", 0) or 0) if isinstance(cost_obj, dict) else 0
233
+
234
+ s = model_stats[model_key]
235
+ s["calls"] += 1
236
+ s["input"] += inp
237
+ s["output"] += out_v
238
+ s["cache_read"] += cr
239
+ s["cache_write"] += cw
240
+ s["cost"] += cost
241
+ if stop:
242
+ s["stop_reasons"][stop] += 1
243
+
244
+ dur = None
245
+ if obj_ts and msg_ts:
246
+ dur = (obj_ts - msg_ts).total_seconds()
247
+ if 0 <= dur <= 600:
248
+ s["durations"].append(dur)
249
+ all_model_durations.append((dur, model_key))
250
+ label = obj_ts.strftime("%Y-%m-%d %H:%M:%S")
251
+ dedup_key = (label, model_key, inp)
252
+ slow_calls_top.append((
253
+ dur, "model",
254
+ f"{label} | {model_key} | {dur:.1f}s | "
255
+ f"in={fmt_tokens(inp + cr)} out={fmt_tokens(out_v)} | stop={stop or 'n/a'}",
256
+ dedup_key,
257
+ ))
258
+ ctx_size = inp + cr
259
+ for (b_label, b_upper) in ctx_buckets_def:
260
+ if ctx_size < b_upper:
261
+ ctx_bucket_durs[b_label].append(dur)
262
+ break
263
+
264
+ if obj_ts:
265
+ day_key = obj_ts.strftime("%m-%d")
266
+ d = daily_stats[day_key]
267
+ d["calls"] += 1
268
+ if dur is not None and 0 <= dur <= 600:
269
+ d["durs"].append(dur)
270
+ d["output"] += out_v
271
+
272
+ if current_session_id:
273
+ ss = session_stats[current_session_id]
274
+ ss["calls"] += 1
275
+ ss["tokens"] += inp + cr + out_v
276
+ if dur is not None and 0 <= dur <= 600:
277
+ ss["duration"] += dur
278
+
279
+ cache_total_calls += 1
280
+ if cr > 0:
281
+ cache_calls_with_cache += 1
282
+ cache_sum_input += inp
283
+ cache_sum_cache_read += cr
284
+ cache_sum_cache_write += cw
285
+
286
+ api_total_assistant_calls += 1
287
+ if stop and stop not in NORMAL_STOPS:
288
+ label = obj_ts.strftime("%Y-%m-%d %H:%M:%S") if obj_ts else "?"
289
+ abnormal_stops.append(
290
+ f"{label} | {model_key} | stop={stop} | "
291
+ f"in={fmt_tokens(inp + cr)} out={fmt_tokens(out_v)}"
292
+ )
293
+ api_error_stats[categorize_api_error(msg, stop)] += 1
294
+
295
+ if obj_ts is not None and current_turn_user_ts is not None and out_v > 0:
296
+ current_turn_last_assistant_ts = obj_ts
297
+
298
+ for part in msg.get("content", []) or []:
299
+ if not isinstance(part, dict):
300
+ continue
301
+ if part.get("type") != "toolCall":
302
+ continue
303
+ tc_id = part.get("id", "")
304
+ if not tc_id:
305
+ continue
306
+ tc_name = part.get("name", "") or ""
307
+ tc_args = part.get("arguments", part.get("input", ""))
308
+ pending_tool_calls[tc_id] = (tc_name, tc_args)
309
+
310
+ elif role == "toolResult":
311
+ tool_name = msg.get("toolName") or "?"
312
+ tool_id = msg.get("toolCallId") or ""
313
+ details = msg.get("details") or {}
314
+ dur_ms = details.get("durationMs") if isinstance(details, dict) else None
315
+ is_error = bool(msg.get("isError", False))
316
+
317
+ ts = tool_stats[tool_name]
318
+ ts["calls"] += 1
319
+ if is_error:
320
+ ts["errors"] += 1
321
+
322
+ pending = pending_tool_calls.pop(tool_id, None) if tool_id else None
323
+ if pending:
324
+ args_name, raw_args = pending
325
+ args_str = fmt_args(args_name or tool_name, raw_args)
326
+ else:
327
+ args_str = ""
328
+
329
+ err_brief = ""
330
+ if is_error:
331
+ content = msg.get("content")
332
+ if isinstance(content, list):
333
+ for c in content:
334
+ if isinstance(c, dict):
335
+ t = c.get("text") or c.get("content") or ""
336
+ if t:
337
+ err_brief = str(t)
338
+ break
339
+ elif isinstance(c, str):
340
+ err_brief = c
341
+ break
342
+ elif isinstance(content, str):
343
+ err_brief = content
344
+ err_brief = err_brief.replace("\n", " ")[:80]
345
+
346
+ dur_s = None
347
+ if isinstance(dur_ms, (int, float)) and dur_ms >= 0:
348
+ dur_s = dur_ms / 1000.0
349
+
350
+ rec = {
351
+ "dur": dur_s, "args": args_str,
352
+ "is_error": is_error, "err_brief": err_brief,
353
+ "ts": obj_ts,
354
+ }
355
+ ts["records"].append(rec)
356
+ if is_error:
357
+ ts["error_records"].append(rec)
358
+
359
+ if dur_s is not None:
360
+ ts["durations"].append(dur_s)
361
+ all_tool_durations.append((dur_s, tool_name))
362
+ label = obj_ts.strftime("%Y-%m-%d %H:%M:%S") if obj_ts else "?"
363
+ brief = f" | {args_str}" if args_str else ""
364
+ err_suffix = f" | {err_brief}" if (is_error and err_brief) else ""
365
+ err_tag = " | error=True" if is_error else ""
366
+ slow_calls_top.append((
367
+ dur_s, "tool",
368
+ f"{label} | {tool_name} | {dur_s:.1f}s{err_tag}{brief}{err_suffix}",
369
+ None,
370
+ ))
371
+
372
+ elif role == "user":
373
+ if current_turn_user_ts is not None and current_turn_last_assistant_ts is not None:
374
+ _lat = (current_turn_last_assistant_ts - current_turn_user_ts).total_seconds()
375
+ if 1 <= _lat <= 3600:
376
+ e2e_latencies.append(_lat)
377
+ current_turn_user_ts = obj_ts
378
+ current_turn_last_assistant_ts = None
379
+
380
+ if current_turn_user_ts is not None and current_turn_last_assistant_ts is not None:
381
+ _lat = (current_turn_last_assistant_ts - current_turn_user_ts).total_seconds()
382
+ if 1 <= _lat <= 3600:
383
+ e2e_latencies.append(_lat)
384
+
385
+ return dict(
386
+ model_stats=model_stats,
387
+ tool_stats=tool_stats,
388
+ all_model_durations=all_model_durations,
389
+ all_tool_durations=all_tool_durations,
390
+ abnormal_stops=abnormal_stops,
391
+ slow_calls_top=slow_calls_top,
392
+ ctx_buckets_def=ctx_buckets_def,
393
+ ctx_bucket_durs=ctx_bucket_durs,
394
+ daily_stats=daily_stats,
395
+ cache_total_calls=cache_total_calls,
396
+ cache_calls_with_cache=cache_calls_with_cache,
397
+ cache_sum_input=cache_sum_input,
398
+ cache_sum_cache_read=cache_sum_cache_read,
399
+ cache_sum_cache_write=cache_sum_cache_write,
400
+ session_stats=session_stats,
401
+ e2e_latencies=e2e_latencies,
402
+ api_error_stats=api_error_stats,
403
+ api_total_assistant_calls=api_total_assistant_calls,
404
+ )
405
+
406
+
407
+ def render(out: output.Output, data, file_count):
408
+ # Bottleneck
409
+ out.subsection("性能耗时分布")
410
+ all_md = data["all_model_durations"]
411
+ all_td = data["all_tool_durations"]
412
+ model_p95 = pct(sorted(d for d, _ in all_md), 0.95) if all_md else 0.0
413
+ tool_p95 = pct(sorted(d for d, _ in all_td), 0.95) if all_td else 0.0
414
+
415
+ def top_contributor(pairs):
416
+ counts = defaultdict(int)
417
+ for d, n in pairs:
418
+ counts[n] += 1
419
+ if not counts:
420
+ return None
421
+ return max(counts.items(), key=lambda x: x[1])[0]
422
+
423
+ model_top = top_contributor(all_md) or "?"
424
+ tool_top = top_contributor(all_td) or "?"
425
+ bottleneck_items = [
426
+ ("模型响应", model_p95, f"主要来自 {model_top}"),
427
+ ("工具执行", tool_p95, f"主要来自 {tool_top}"),
428
+ ]
429
+ bottleneck_items.sort(key=lambda x: x[1], reverse=True)
430
+ if not (all_md or all_td):
431
+ out.item("(数据不足)")
432
+ else:
433
+ for i, (sec, val, hint) in enumerate(bottleneck_items):
434
+ out.item(f"#{i+1}: {sec}(P95={val:.1f}s, {hint})")
435
+
436
+ out.subsection("模型性能")
437
+ out.item(f"数据来源: 最近 {file_count} 个 session 文件")
438
+ out.line("")
439
+ model_stats = data["model_stats"]
440
+ if not model_stats:
441
+ out.item("最近 Session 中未发现模型使用数据")
442
+ else:
443
+ for model_key in sorted(model_stats.keys(), key=lambda k: model_stats[k]["calls"], reverse=True):
444
+ s = model_stats[model_key]
445
+ durs = sorted(s["durations"])
446
+ calls = s["calls"]
447
+ p50 = pct(durs, 0.50)
448
+ p95 = pct(durs, 0.95)
449
+ mx = durs[-1] if durs else 0.0
450
+ total_dur = sum(durs)
451
+ tokens_unreported = (s["input"] + s["output"] + s["cache_read"]) == 0
452
+ throughput_str = (
453
+ "N/A" if s["output"] == 0
454
+ else f"{(s['output'] / total_dur):.1f} tok/s" if total_dur > 0 else "N/A"
455
+ )
456
+ stops = s["stop_reasons"]
457
+ normal = sum(v for k, v in stops.items() if k in NORMAL_STOPS)
458
+ success = (normal / calls * 100) if calls else 0.0
459
+ latency_warn = ""
460
+ if calls > 3 and len(durs) > 3 and durs[0] == durs[-1]:
461
+ latency_warn = " [⚠ 时间精度不足]"
462
+ out.item(model_key)
463
+ out.item(f" 调用: {calls} | P50: {p50:.1f}s | P95: {p95:.1f}s | "
464
+ f"Max: {mx:.1f}s | 吞吐: {throughput_str} | 成功率: {success:.0f}%{latency_warn}")
465
+ if tokens_unreported:
466
+ tok_line = " tokens: 未报告"
467
+ if s["cache_write"]:
468
+ tok_line += f" | cache_write={fmt_tokens(s['cache_write'])}"
469
+ tok_line += " | cost: N/A" if s["cost"] == 0 else f" | cost: ${s['cost']:.4f}"
470
+ else:
471
+ tok_line = (
472
+ f" tokens: in={fmt_tokens(s['input'] + s['cache_read'])} "
473
+ f"out={fmt_tokens(s['output'])}"
474
+ )
475
+ if s["cache_read"]:
476
+ tok_line += f" | cache_read={fmt_tokens(s['cache_read'])}"
477
+ if s["cache_write"]:
478
+ tok_line += f" | cache_write={fmt_tokens(s['cache_write'])}"
479
+ tok_line += f" | cost: ${s['cost']:.4f}"
480
+ out.item(tok_line)
481
+ stops_str = " ".join(f"{k}:{v}" for k, v in sorted(stops.items(), key=lambda x: -x[1]))
482
+ out.item(f" stopReasons: {stops_str}" if stops_str else " stopReasons: (none)")
483
+ out.line("")
484
+
485
+ out.subsection("工具性能(Top 10 by 调用量)")
486
+ timed_tools = {n: s for n, s in data["tool_stats"].items() if s["durations"]}
487
+ if not timed_tools:
488
+ out.item("(无工具调用数据)")
489
+ else:
490
+ ranked = sorted(timed_tools.items(), key=lambda kv: kv[1]["calls"], reverse=True)[:10]
491
+ for name, s in ranked:
492
+ durs = sorted(s["durations"])
493
+ calls = s["calls"]
494
+ err_rate = (s["errors"] / calls * 100) if calls else 0.0
495
+ dur_str = f"P50={pct(durs,0.50):.3f}s P95={pct(durs,0.95):.3f}s Max={durs[-1]:.3f}s"
496
+ out.item(f"{name}: {calls} 次 | {dur_str} | 错误 {err_rate:.0f}%")
497
+ timed = [r for r in s["records"] if r["dur"] is not None]
498
+ timed.sort(key=lambda r: r["dur"], reverse=True)
499
+ for i, r in enumerate(timed[:10]):
500
+ args = r["args"] or "(无参数)"
501
+ out.item(f" #{i+1}: {args} ({r['dur']:.3f}s)")
502
+ shown_ids = {id(r) for r in timed[:10]}
503
+ err_shown = 0
504
+ for r in s["error_records"]:
505
+ if id(r) in shown_ids:
506
+ continue
507
+ if err_shown >= 2:
508
+ break
509
+ args = r["args"] or "(无参数)"
510
+ dur_txt = f"{r['dur']:.1f}s" if r["dur"] is not None else "n/a"
511
+ tail = f", {r['err_brief']}" if r["err_brief"] else ""
512
+ out.item(f" 失败: {args} (error, {dur_txt}{tail})")
513
+ err_shown += 1
514
+
515
+ out.subsection("慢调用 Top 20")
516
+ slow = sorted(data["slow_calls_top"], key=lambda x: x[0], reverse=True)
517
+ seen_keys = set()
518
+ dedup = []
519
+ for entry in slow:
520
+ key = entry[3]
521
+ if key is not None:
522
+ if key in seen_keys:
523
+ continue
524
+ seen_keys.add(key)
525
+ dedup.append(entry)
526
+ top20 = dedup[:20]
527
+ if not top20:
528
+ out.item("(无数据)")
529
+ else:
530
+ for i, entry in enumerate(top20, 1):
531
+ out.item(f"[{i}] {entry[2]}")
532
+
533
+ out.subsection("异常 stopReason — 模型非正常结束(如 error、中断)")
534
+ abnormal_stops = data["abnormal_stops"]
535
+ out.item(f"共 {len(abnormal_stops)} 条" + ("(无异常)" if not abnormal_stops else ""))
536
+ for s in abnormal_stops[:20]:
537
+ out.item(s)
538
+
539
+ out.subsection("模型 API 错误分布")
540
+ api_err_total = sum(data["api_error_stats"].values())
541
+ api_total = data["api_total_assistant_calls"]
542
+ if api_total == 0:
543
+ out.item("(无调用数据)")
544
+ else:
545
+ err_rate = api_err_total / api_total * 100
546
+ out.item(f"总异常: {api_err_total} (总调用 {api_total} 中, 异常率 {err_rate:.1f}%)")
547
+ if api_err_total > 0:
548
+ out.item("分布:")
549
+ for cat, n in sorted(data["api_error_stats"].items(), key=lambda kv: -kv[1]):
550
+ out.item(f" {cat}: {n}")
551
+
552
+ out.subsection("端到端消息延迟(user 发送 → assistant 最终响应)")
553
+ e2e = data["e2e_latencies"]
554
+ if not e2e:
555
+ out.item("(数据不足,未发现 user→assistant 配对)")
556
+ else:
557
+ lat_sorted = sorted(e2e)
558
+ p50 = pct(lat_sorted, 0.50)
559
+ p95 = pct(lat_sorted, 0.95)
560
+ mx = lat_sorted[-1]
561
+ out.item(f"样本: {len(lat_sorted)} | P50={p50:.1f}s P95={p95:.1f}s Max={mx:.1f}s")
562
+ e2e_buckets = [("<10s", 10.0), ("10-30s", 30.0), ("30-60s", 60.0),
563
+ ("60-120s", 120.0), (">120s", float("inf"))]
564
+ bucket_counts = {lbl: 0 for lbl, _ in e2e_buckets}
565
+ for v in lat_sorted:
566
+ for (lbl, upper) in e2e_buckets:
567
+ if v < upper:
568
+ bucket_counts[lbl] += 1
569
+ break
570
+ total = len(lat_sorted)
571
+ out.item("分布:")
572
+ for (lbl, _) in e2e_buckets:
573
+ n = bucket_counts[lbl]
574
+ pct_v = (n / total * 100) if total else 0.0
575
+ out.item(f" {lbl}: {n} ({pct_v:.1f}%)")
576
+
577
+ out.subsection("延迟 vs 上下文大小")
578
+ ctx_buckets_def = data["ctx_buckets_def"]
579
+ ctx_durs = data["ctx_bucket_durs"]
580
+ if not any(ctx_durs.get(l) for l, _ in ctx_buckets_def):
581
+ out.item("(数据不足)")
582
+ else:
583
+ out.line(f" {'上下文段':<14} {'调用数':>8} {'P50延迟':>10} {'P95延迟':>10}")
584
+ for (b_label, _) in ctx_buckets_def:
585
+ durs = sorted(ctx_durs.get(b_label, []))
586
+ if not durs:
587
+ out.line(f" {b_label:<14} {0:>8} {'-':>10} {'-':>10}")
588
+ continue
589
+ p50 = pct(durs, 0.50)
590
+ p95 = pct(durs, 0.95)
591
+ out.line(f" {b_label:<14} {len(durs):>8} {p50:>9.1f}s {p95:>9.1f}s")
592
+
593
+ out.subsection("每日趋势(最近 7 天)")
594
+ daily_stats = data["daily_stats"]
595
+ if not daily_stats:
596
+ out.item("(数据不足)")
597
+ else:
598
+ today = datetime.now(timezone.utc).date()
599
+ day_list = [(today - timedelta(days=i)).strftime("%m-%d") for i in range(7)]
600
+ out.line(f" {'日期':<10} {'调用数':>8} {'P50延迟':>10} {'总输出tokens':>14}")
601
+ for d_key in day_list:
602
+ d = daily_stats.get(d_key)
603
+ if not d or d["calls"] == 0:
604
+ out.line(f" {d_key:<10} {0:>8} {'-':>10} {'-':>14}")
605
+ continue
606
+ durs = sorted(d["durs"])
607
+ p50 = pct(durs, 0.50) if durs else 0.0
608
+ out.line(f" {d_key:<10} {d['calls']:>8} {p50:>9.1f}s {fmt_tokens(d['output']):>14}")
609
+
610
+ out.subsection("Cache 命中率")
611
+ if data["cache_total_calls"] == 0:
612
+ out.item("(无数据)")
613
+ else:
614
+ hit_pct = (data["cache_calls_with_cache"] / data["cache_total_calls"] * 100)
615
+ out.item(
616
+ f"总调用: {data['cache_total_calls']} | "
617
+ f"触发 cache_read 的调用: {data['cache_calls_with_cache']} ({hit_pct:.1f}%)"
618
+ )
619
+ out.item(
620
+ f"cache_read: {fmt_tokens(data['cache_sum_cache_read'])} | "
621
+ f"input(非 cache): {fmt_tokens(data['cache_sum_input'])} | "
622
+ f"cache_write: {fmt_tokens(data['cache_sum_cache_write'])}"
623
+ )
624
+ denom = data["cache_sum_input"] + data["cache_sum_cache_read"]
625
+ if denom > 0:
626
+ ratio = data["cache_sum_cache_read"] / denom * 100
627
+ out.item(
628
+ f"上下文 cache 占比: cacheRead/(input+cacheRead) = "
629
+ f"{ratio:.3f}% ({fmt_tokens(data['cache_sum_cache_read'])}/{fmt_tokens(denom)})"
630
+ )
631
+
632
+ out.subsection("工具错误明细")
633
+ tool_stats = data["tool_stats"]
634
+ err_total = sum(s["errors"] for s in tool_stats.values())
635
+ call_total = sum(s["calls"] for s in tool_stats.values())
636
+ if err_total == 0:
637
+ out.item(f"共 0 次错误 (总调用 {call_total} 次中)")
638
+ else:
639
+ out.item(f"共 {err_total} 次错误 (总调用 {call_total} 次中):")
640
+ for name in sorted(tool_stats.keys(), key=lambda k: tool_stats[k]["errors"], reverse=True):
641
+ s = tool_stats[name]
642
+ if s["errors"] == 0:
643
+ continue
644
+ out.line(f" {name} ({s['errors']}次):")
645
+ for r in s["error_records"][:3]:
646
+ ts_label = r["ts"].strftime("%Y-%m-%d %H:%M:%S") if r["ts"] else "?"
647
+ brief = r["err_brief"] or "(无错误内容)"
648
+ out.line(f" {ts_label} | {brief[:100]}")
649
+
650
+ out.subsection("Session 消耗 Top 5")
651
+ session_stats = data["session_stats"]
652
+ if not session_stats:
653
+ out.item("(无数据)")
654
+ else:
655
+ ranked = sorted(session_stats.items(), key=lambda kv: kv[1]["tokens"], reverse=True)[:5]
656
+ out.line(f" {'session':<40} {'调用数':>8} {'总tokens':>10} {'模型耗时累计':>12}")
657
+ for sid, ss in ranked:
658
+ out.line(f" {sid:<40} {ss['calls']:>8} "
659
+ f"{fmt_tokens(ss['tokens']):>10} {ss['duration']:>11.0f}s")
660
+
661
+
662
+ def main() -> int:
663
+ parser = cli.build_common_parser(
664
+ description="模块 7:模型与性能数据",
665
+ prog="07_performance",
666
+ )
667
+ args = parser.parse_args()
668
+ out = output.init("performance", json_mode=args.json, no_color=args.no_color)
669
+ out.section("模块 7:模型与性能数据")
670
+
671
+ session_files = collect_session_files(args.sessions_base, limit=20)
672
+ if not session_files:
673
+ out.item("未找到 Session 文件")
674
+ return out.done()
675
+
676
+ data = analyze_sessions(session_files)
677
+ render(out, data, len(session_files))
678
+
679
+ if args.json:
680
+ out.set_data("model_count", len(data["model_stats"]))
681
+ out.set_data("session_files_analyzed", len(session_files))
682
+ out.set_data("e2e_latency_count", len(data["e2e_latencies"]))
683
+ return out.done()
684
+
685
+
686
+ if __name__ == "__main__":
687
+ sys.exit(main())