openclaw-diag-cli 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +260 -0
- package/bin/ocdiag +14 -0
- package/bin/openclaw-diag.js +275 -0
- package/diag/01_sys_health.py +443 -0
- package/diag/02_environment.py +292 -0
- package/diag/03_configuration.py +131 -0
- package/diag/04_gateway.py +651 -0
- package/diag/05_recent_errors.py +246 -0
- package/diag/06_cron_jobs.py +694 -0
- package/diag/07_performance.py +687 -0
- package/diag/08_sessions.py +518 -0
- package/diag/09_plugin_diag.py +535 -0
- package/diag/10_shell_history.py +121 -0
- package/diag/__init__.py +0 -0
- package/lib/bundle.py +204 -0
- package/ocdiag/__init__.py +3 -0
- package/ocdiag/cli.py +39 -0
- package/ocdiag/dispatcher.py +137 -0
- package/ocdiag/jsonlog.py +65 -0
- package/ocdiag/output.py +131 -0
- package/ocdiag/paths.py +48 -0
- package/ocdiag/recent_logs.py +53 -0
- package/ocdiag/sensitive.py +41 -0
- package/ocdiag/timeutil.py +77 -0
- package/ocdiag/tokens.py +46 -0
- package/package.json +42 -0
- package/tools/__init__.py +0 -0
- package/tools/oc_session_extract.py +254 -0
- package/tools/oc_session_trace.py +715 -0
|
@@ -0,0 +1,687 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""模块 7:模型与性能数据(慢调用 Top 20、E2E 延迟、Cache 命中率)。"""
|
|
3
|
+
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
|
|
6
|
+
import glob
|
|
7
|
+
import json
|
|
8
|
+
import os
|
|
9
|
+
import sys
|
|
10
|
+
import tempfile
|
|
11
|
+
from collections import defaultdict
|
|
12
|
+
from datetime import datetime, timezone, timedelta
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
|
|
15
|
+
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
|
|
16
|
+
|
|
17
|
+
from ocdiag import cli, output
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
NORMAL_STOPS = {"stop", "end_turn", "toolUse", "tool_calls", ""}
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def parse_obj_ts(ts_str):
|
|
24
|
+
if not ts_str:
|
|
25
|
+
return None
|
|
26
|
+
try:
|
|
27
|
+
return datetime.fromisoformat(ts_str.replace("Z", "+00:00"))
|
|
28
|
+
except Exception:
|
|
29
|
+
return None
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def parse_msg_ts(ms):
|
|
33
|
+
if ms is None:
|
|
34
|
+
return None
|
|
35
|
+
try:
|
|
36
|
+
return datetime.fromtimestamp(int(ms) / 1000, tz=timezone.utc)
|
|
37
|
+
except Exception:
|
|
38
|
+
return None
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def pct(sorted_vals, p):
|
|
42
|
+
if not sorted_vals:
|
|
43
|
+
return 0.0
|
|
44
|
+
n = len(sorted_vals)
|
|
45
|
+
idx = min(n - 1, int(n * p))
|
|
46
|
+
return sorted_vals[idx]
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def fmt_tokens(n):
|
|
50
|
+
if n >= 1_000_000:
|
|
51
|
+
return f"{n/1_000_000:.1f}M"
|
|
52
|
+
if n >= 1_000:
|
|
53
|
+
return f"{n/1_000:.1f}K"
|
|
54
|
+
return str(n)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def fmt_args(tool_name, tc_args, max_len=100):
|
|
58
|
+
if isinstance(tc_args, str):
|
|
59
|
+
try:
|
|
60
|
+
tc_args = json.loads(tc_args)
|
|
61
|
+
except Exception:
|
|
62
|
+
return (tc_args or "")[:max_len]
|
|
63
|
+
if not isinstance(tc_args, dict) or not tc_args:
|
|
64
|
+
return ""
|
|
65
|
+
|
|
66
|
+
def trunc(s, n):
|
|
67
|
+
s = "" if s is None else str(s)
|
|
68
|
+
return s[:n] + ("..." if len(s) > n else "")
|
|
69
|
+
|
|
70
|
+
name = (tool_name or "").lower()
|
|
71
|
+
if name == "exec":
|
|
72
|
+
return trunc(tc_args.get("command", ""), max_len)
|
|
73
|
+
if name == "web_fetch":
|
|
74
|
+
return trunc(tc_args.get("url", ""), max_len)
|
|
75
|
+
if name == "web_search":
|
|
76
|
+
return trunc(tc_args.get("query", ""), max_len)
|
|
77
|
+
if name == "sessions_spawn":
|
|
78
|
+
aid = tc_args.get("agentId", "")
|
|
79
|
+
task = trunc(tc_args.get("task", ""), 60)
|
|
80
|
+
return trunc(f"agentId={aid}, task={task}", max_len)
|
|
81
|
+
if name in ("read", "write", "edit"):
|
|
82
|
+
return trunc(tc_args.get("path", ""), max_len)
|
|
83
|
+
if name == "cron":
|
|
84
|
+
action = tc_args.get("action", "")
|
|
85
|
+
jid = tc_args.get("jobId", "")
|
|
86
|
+
s = f"action={action}, jobId={jid}" if jid else f"action={action}"
|
|
87
|
+
return trunc(s, max_len)
|
|
88
|
+
if name in ("image", "image_generate"):
|
|
89
|
+
return trunc(tc_args.get("prompt", ""), 60)
|
|
90
|
+
parts = []
|
|
91
|
+
for k, v in list(tc_args.items())[:3]:
|
|
92
|
+
sv = str(v)
|
|
93
|
+
if len(sv) > 50:
|
|
94
|
+
sv = sv[:50] + "..."
|
|
95
|
+
parts.append(f"{k}={sv}")
|
|
96
|
+
return trunc(", ".join(parts), max_len)
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def categorize_api_error(msg, stop):
|
|
100
|
+
text = str(msg.get("error", ""))
|
|
101
|
+
content = msg.get("content", "")
|
|
102
|
+
if isinstance(content, list):
|
|
103
|
+
for c in content:
|
|
104
|
+
if isinstance(c, dict):
|
|
105
|
+
t = c.get("text") or c.get("content") or ""
|
|
106
|
+
if t:
|
|
107
|
+
text += " " + str(t)
|
|
108
|
+
elif isinstance(c, str):
|
|
109
|
+
text += " " + c
|
|
110
|
+
elif isinstance(content, str):
|
|
111
|
+
text += " " + content
|
|
112
|
+
text = text[:1000]
|
|
113
|
+
low = text.lower()
|
|
114
|
+
if "429" in text or "rate limit" in low or "throttl" in low:
|
|
115
|
+
return "rate_limit(429)"
|
|
116
|
+
if "503" in text or "service unavailable" in low:
|
|
117
|
+
return "service_unavailable(503)"
|
|
118
|
+
if "401" in text or "403" in text or "unauthorized" in low or "forbidden" in low:
|
|
119
|
+
return "auth_error(401/403)"
|
|
120
|
+
if "500" in text or "internal server" in low:
|
|
121
|
+
return "server_error(500)"
|
|
122
|
+
if "timeout" in low or "timed out" in low:
|
|
123
|
+
return "timeout"
|
|
124
|
+
if "connection" in low and ("refused" in low or "reset" in low or "aborted" in low):
|
|
125
|
+
return "connection_error"
|
|
126
|
+
if stop == "aborted" or "aborted" in low:
|
|
127
|
+
return "aborted"
|
|
128
|
+
return f"other(stop={stop or 'n/a'})"
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def collect_session_files(sessions_base, limit=20):
|
|
132
|
+
files = []
|
|
133
|
+
pattern1 = os.path.join(sessions_base, "*", "*", "*.jsonl")
|
|
134
|
+
pattern2 = os.path.join(sessions_base, "*", "*", "*.jsonl.reset.*")
|
|
135
|
+
for pat in (pattern1, pattern2):
|
|
136
|
+
for p in glob.glob(pat):
|
|
137
|
+
if p.endswith(".trajectory.jsonl"):
|
|
138
|
+
continue
|
|
139
|
+
try:
|
|
140
|
+
m = os.path.getmtime(p)
|
|
141
|
+
except OSError:
|
|
142
|
+
continue
|
|
143
|
+
files.append((m, p))
|
|
144
|
+
files.sort(reverse=True)
|
|
145
|
+
return [p for _, p in files[:limit]]
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def tail_lines(path, n=500):
|
|
149
|
+
try:
|
|
150
|
+
with open(path, "r", errors="replace") as f:
|
|
151
|
+
return f.readlines()[-n:]
|
|
152
|
+
except OSError:
|
|
153
|
+
return []
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def analyze_sessions(session_files):
|
|
157
|
+
model_stats = defaultdict(lambda: {
|
|
158
|
+
"calls": 0, "input": 0, "output": 0,
|
|
159
|
+
"cache_read": 0, "cache_write": 0, "cost": 0.0,
|
|
160
|
+
"durations": [], "stop_reasons": defaultdict(int),
|
|
161
|
+
})
|
|
162
|
+
tool_stats = defaultdict(lambda: {
|
|
163
|
+
"calls": 0, "errors": 0, "durations": [],
|
|
164
|
+
"records": [], "error_records": [],
|
|
165
|
+
})
|
|
166
|
+
all_model_durations = []
|
|
167
|
+
all_tool_durations = []
|
|
168
|
+
abnormal_stops = []
|
|
169
|
+
slow_calls_top = []
|
|
170
|
+
ctx_buckets_def = [
|
|
171
|
+
("<50K", 50_000),
|
|
172
|
+
("50K-100K", 100_000),
|
|
173
|
+
("100K-200K", 200_000),
|
|
174
|
+
(">200K", float("inf")),
|
|
175
|
+
]
|
|
176
|
+
ctx_bucket_durs = defaultdict(list)
|
|
177
|
+
daily_stats = defaultdict(lambda: {"calls": 0, "durs": [], "output": 0})
|
|
178
|
+
cache_total_calls = 0
|
|
179
|
+
cache_calls_with_cache = 0
|
|
180
|
+
cache_sum_input = 0
|
|
181
|
+
cache_sum_cache_read = 0
|
|
182
|
+
cache_sum_cache_write = 0
|
|
183
|
+
session_stats = defaultdict(lambda: {"calls": 0, "tokens": 0, "duration": 0.0})
|
|
184
|
+
|
|
185
|
+
e2e_latencies = []
|
|
186
|
+
|
|
187
|
+
api_error_stats = defaultdict(int)
|
|
188
|
+
api_total_assistant_calls = 0
|
|
189
|
+
|
|
190
|
+
for session_path in session_files:
|
|
191
|
+
sess_id = os.path.basename(session_path).split(".jsonl")[0]
|
|
192
|
+
current_session_id = sess_id
|
|
193
|
+
max_msg_ms = 0
|
|
194
|
+
pending_tool_calls = {}
|
|
195
|
+
current_turn_user_ts = None
|
|
196
|
+
current_turn_last_assistant_ts = None
|
|
197
|
+
|
|
198
|
+
for raw_line in tail_lines(session_path, 500):
|
|
199
|
+
raw_line = raw_line.strip()
|
|
200
|
+
if not raw_line:
|
|
201
|
+
continue
|
|
202
|
+
try:
|
|
203
|
+
obj = json.loads(raw_line)
|
|
204
|
+
except Exception:
|
|
205
|
+
continue
|
|
206
|
+
|
|
207
|
+
msg = obj.get("message", {}) or {}
|
|
208
|
+
role = msg.get("role", "")
|
|
209
|
+
obj_ts = parse_obj_ts(obj.get("timestamp"))
|
|
210
|
+
msg_ts_raw = msg.get("timestamp")
|
|
211
|
+
msg_ts = parse_msg_ts(msg_ts_raw)
|
|
212
|
+
|
|
213
|
+
if msg_ts_raw is not None:
|
|
214
|
+
if msg_ts_raw < max_msg_ms - 1000:
|
|
215
|
+
continue
|
|
216
|
+
if msg_ts_raw > max_msg_ms:
|
|
217
|
+
max_msg_ms = msg_ts_raw
|
|
218
|
+
|
|
219
|
+
if role == "assistant":
|
|
220
|
+
provider = msg.get("provider") or ""
|
|
221
|
+
model = msg.get("model") or "?"
|
|
222
|
+
if provider == "openclaw" and model in ("delivery-mirror", "gateway-injected"):
|
|
223
|
+
continue
|
|
224
|
+
model_key = f"{provider}/{model}" if provider else model
|
|
225
|
+
usage = msg.get("usage", {}) or {}
|
|
226
|
+
stop = msg.get("stopReason", "") or ""
|
|
227
|
+
inp = usage.get("input", 0) or 0
|
|
228
|
+
out_v = usage.get("output", 0) or 0
|
|
229
|
+
cr = usage.get("cacheRead", 0) or 0
|
|
230
|
+
cw = usage.get("cacheWrite", 0) or 0
|
|
231
|
+
cost_obj = usage.get("cost", {}) or {}
|
|
232
|
+
cost = (cost_obj.get("total", 0) or 0) if isinstance(cost_obj, dict) else 0
|
|
233
|
+
|
|
234
|
+
s = model_stats[model_key]
|
|
235
|
+
s["calls"] += 1
|
|
236
|
+
s["input"] += inp
|
|
237
|
+
s["output"] += out_v
|
|
238
|
+
s["cache_read"] += cr
|
|
239
|
+
s["cache_write"] += cw
|
|
240
|
+
s["cost"] += cost
|
|
241
|
+
if stop:
|
|
242
|
+
s["stop_reasons"][stop] += 1
|
|
243
|
+
|
|
244
|
+
dur = None
|
|
245
|
+
if obj_ts and msg_ts:
|
|
246
|
+
dur = (obj_ts - msg_ts).total_seconds()
|
|
247
|
+
if 0 <= dur <= 600:
|
|
248
|
+
s["durations"].append(dur)
|
|
249
|
+
all_model_durations.append((dur, model_key))
|
|
250
|
+
label = obj_ts.strftime("%Y-%m-%d %H:%M:%S")
|
|
251
|
+
dedup_key = (label, model_key, inp)
|
|
252
|
+
slow_calls_top.append((
|
|
253
|
+
dur, "model",
|
|
254
|
+
f"{label} | {model_key} | {dur:.1f}s | "
|
|
255
|
+
f"in={fmt_tokens(inp + cr)} out={fmt_tokens(out_v)} | stop={stop or 'n/a'}",
|
|
256
|
+
dedup_key,
|
|
257
|
+
))
|
|
258
|
+
ctx_size = inp + cr
|
|
259
|
+
for (b_label, b_upper) in ctx_buckets_def:
|
|
260
|
+
if ctx_size < b_upper:
|
|
261
|
+
ctx_bucket_durs[b_label].append(dur)
|
|
262
|
+
break
|
|
263
|
+
|
|
264
|
+
if obj_ts:
|
|
265
|
+
day_key = obj_ts.strftime("%m-%d")
|
|
266
|
+
d = daily_stats[day_key]
|
|
267
|
+
d["calls"] += 1
|
|
268
|
+
if dur is not None and 0 <= dur <= 600:
|
|
269
|
+
d["durs"].append(dur)
|
|
270
|
+
d["output"] += out_v
|
|
271
|
+
|
|
272
|
+
if current_session_id:
|
|
273
|
+
ss = session_stats[current_session_id]
|
|
274
|
+
ss["calls"] += 1
|
|
275
|
+
ss["tokens"] += inp + cr + out_v
|
|
276
|
+
if dur is not None and 0 <= dur <= 600:
|
|
277
|
+
ss["duration"] += dur
|
|
278
|
+
|
|
279
|
+
cache_total_calls += 1
|
|
280
|
+
if cr > 0:
|
|
281
|
+
cache_calls_with_cache += 1
|
|
282
|
+
cache_sum_input += inp
|
|
283
|
+
cache_sum_cache_read += cr
|
|
284
|
+
cache_sum_cache_write += cw
|
|
285
|
+
|
|
286
|
+
api_total_assistant_calls += 1
|
|
287
|
+
if stop and stop not in NORMAL_STOPS:
|
|
288
|
+
label = obj_ts.strftime("%Y-%m-%d %H:%M:%S") if obj_ts else "?"
|
|
289
|
+
abnormal_stops.append(
|
|
290
|
+
f"{label} | {model_key} | stop={stop} | "
|
|
291
|
+
f"in={fmt_tokens(inp + cr)} out={fmt_tokens(out_v)}"
|
|
292
|
+
)
|
|
293
|
+
api_error_stats[categorize_api_error(msg, stop)] += 1
|
|
294
|
+
|
|
295
|
+
if obj_ts is not None and current_turn_user_ts is not None and out_v > 0:
|
|
296
|
+
current_turn_last_assistant_ts = obj_ts
|
|
297
|
+
|
|
298
|
+
for part in msg.get("content", []) or []:
|
|
299
|
+
if not isinstance(part, dict):
|
|
300
|
+
continue
|
|
301
|
+
if part.get("type") != "toolCall":
|
|
302
|
+
continue
|
|
303
|
+
tc_id = part.get("id", "")
|
|
304
|
+
if not tc_id:
|
|
305
|
+
continue
|
|
306
|
+
tc_name = part.get("name", "") or ""
|
|
307
|
+
tc_args = part.get("arguments", part.get("input", ""))
|
|
308
|
+
pending_tool_calls[tc_id] = (tc_name, tc_args)
|
|
309
|
+
|
|
310
|
+
elif role == "toolResult":
|
|
311
|
+
tool_name = msg.get("toolName") or "?"
|
|
312
|
+
tool_id = msg.get("toolCallId") or ""
|
|
313
|
+
details = msg.get("details") or {}
|
|
314
|
+
dur_ms = details.get("durationMs") if isinstance(details, dict) else None
|
|
315
|
+
is_error = bool(msg.get("isError", False))
|
|
316
|
+
|
|
317
|
+
ts = tool_stats[tool_name]
|
|
318
|
+
ts["calls"] += 1
|
|
319
|
+
if is_error:
|
|
320
|
+
ts["errors"] += 1
|
|
321
|
+
|
|
322
|
+
pending = pending_tool_calls.pop(tool_id, None) if tool_id else None
|
|
323
|
+
if pending:
|
|
324
|
+
args_name, raw_args = pending
|
|
325
|
+
args_str = fmt_args(args_name or tool_name, raw_args)
|
|
326
|
+
else:
|
|
327
|
+
args_str = ""
|
|
328
|
+
|
|
329
|
+
err_brief = ""
|
|
330
|
+
if is_error:
|
|
331
|
+
content = msg.get("content")
|
|
332
|
+
if isinstance(content, list):
|
|
333
|
+
for c in content:
|
|
334
|
+
if isinstance(c, dict):
|
|
335
|
+
t = c.get("text") or c.get("content") or ""
|
|
336
|
+
if t:
|
|
337
|
+
err_brief = str(t)
|
|
338
|
+
break
|
|
339
|
+
elif isinstance(c, str):
|
|
340
|
+
err_brief = c
|
|
341
|
+
break
|
|
342
|
+
elif isinstance(content, str):
|
|
343
|
+
err_brief = content
|
|
344
|
+
err_brief = err_brief.replace("\n", " ")[:80]
|
|
345
|
+
|
|
346
|
+
dur_s = None
|
|
347
|
+
if isinstance(dur_ms, (int, float)) and dur_ms >= 0:
|
|
348
|
+
dur_s = dur_ms / 1000.0
|
|
349
|
+
|
|
350
|
+
rec = {
|
|
351
|
+
"dur": dur_s, "args": args_str,
|
|
352
|
+
"is_error": is_error, "err_brief": err_brief,
|
|
353
|
+
"ts": obj_ts,
|
|
354
|
+
}
|
|
355
|
+
ts["records"].append(rec)
|
|
356
|
+
if is_error:
|
|
357
|
+
ts["error_records"].append(rec)
|
|
358
|
+
|
|
359
|
+
if dur_s is not None:
|
|
360
|
+
ts["durations"].append(dur_s)
|
|
361
|
+
all_tool_durations.append((dur_s, tool_name))
|
|
362
|
+
label = obj_ts.strftime("%Y-%m-%d %H:%M:%S") if obj_ts else "?"
|
|
363
|
+
brief = f" | {args_str}" if args_str else ""
|
|
364
|
+
err_suffix = f" | {err_brief}" if (is_error and err_brief) else ""
|
|
365
|
+
err_tag = " | error=True" if is_error else ""
|
|
366
|
+
slow_calls_top.append((
|
|
367
|
+
dur_s, "tool",
|
|
368
|
+
f"{label} | {tool_name} | {dur_s:.1f}s{err_tag}{brief}{err_suffix}",
|
|
369
|
+
None,
|
|
370
|
+
))
|
|
371
|
+
|
|
372
|
+
elif role == "user":
|
|
373
|
+
if current_turn_user_ts is not None and current_turn_last_assistant_ts is not None:
|
|
374
|
+
_lat = (current_turn_last_assistant_ts - current_turn_user_ts).total_seconds()
|
|
375
|
+
if 1 <= _lat <= 3600:
|
|
376
|
+
e2e_latencies.append(_lat)
|
|
377
|
+
current_turn_user_ts = obj_ts
|
|
378
|
+
current_turn_last_assistant_ts = None
|
|
379
|
+
|
|
380
|
+
if current_turn_user_ts is not None and current_turn_last_assistant_ts is not None:
|
|
381
|
+
_lat = (current_turn_last_assistant_ts - current_turn_user_ts).total_seconds()
|
|
382
|
+
if 1 <= _lat <= 3600:
|
|
383
|
+
e2e_latencies.append(_lat)
|
|
384
|
+
|
|
385
|
+
return dict(
|
|
386
|
+
model_stats=model_stats,
|
|
387
|
+
tool_stats=tool_stats,
|
|
388
|
+
all_model_durations=all_model_durations,
|
|
389
|
+
all_tool_durations=all_tool_durations,
|
|
390
|
+
abnormal_stops=abnormal_stops,
|
|
391
|
+
slow_calls_top=slow_calls_top,
|
|
392
|
+
ctx_buckets_def=ctx_buckets_def,
|
|
393
|
+
ctx_bucket_durs=ctx_bucket_durs,
|
|
394
|
+
daily_stats=daily_stats,
|
|
395
|
+
cache_total_calls=cache_total_calls,
|
|
396
|
+
cache_calls_with_cache=cache_calls_with_cache,
|
|
397
|
+
cache_sum_input=cache_sum_input,
|
|
398
|
+
cache_sum_cache_read=cache_sum_cache_read,
|
|
399
|
+
cache_sum_cache_write=cache_sum_cache_write,
|
|
400
|
+
session_stats=session_stats,
|
|
401
|
+
e2e_latencies=e2e_latencies,
|
|
402
|
+
api_error_stats=api_error_stats,
|
|
403
|
+
api_total_assistant_calls=api_total_assistant_calls,
|
|
404
|
+
)
|
|
405
|
+
|
|
406
|
+
|
|
407
|
+
def render(out: output.Output, data, file_count):
|
|
408
|
+
# Bottleneck
|
|
409
|
+
out.subsection("性能耗时分布")
|
|
410
|
+
all_md = data["all_model_durations"]
|
|
411
|
+
all_td = data["all_tool_durations"]
|
|
412
|
+
model_p95 = pct(sorted(d for d, _ in all_md), 0.95) if all_md else 0.0
|
|
413
|
+
tool_p95 = pct(sorted(d for d, _ in all_td), 0.95) if all_td else 0.0
|
|
414
|
+
|
|
415
|
+
def top_contributor(pairs):
|
|
416
|
+
counts = defaultdict(int)
|
|
417
|
+
for d, n in pairs:
|
|
418
|
+
counts[n] += 1
|
|
419
|
+
if not counts:
|
|
420
|
+
return None
|
|
421
|
+
return max(counts.items(), key=lambda x: x[1])[0]
|
|
422
|
+
|
|
423
|
+
model_top = top_contributor(all_md) or "?"
|
|
424
|
+
tool_top = top_contributor(all_td) or "?"
|
|
425
|
+
bottleneck_items = [
|
|
426
|
+
("模型响应", model_p95, f"主要来自 {model_top}"),
|
|
427
|
+
("工具执行", tool_p95, f"主要来自 {tool_top}"),
|
|
428
|
+
]
|
|
429
|
+
bottleneck_items.sort(key=lambda x: x[1], reverse=True)
|
|
430
|
+
if not (all_md or all_td):
|
|
431
|
+
out.item("(数据不足)")
|
|
432
|
+
else:
|
|
433
|
+
for i, (sec, val, hint) in enumerate(bottleneck_items):
|
|
434
|
+
out.item(f"#{i+1}: {sec}(P95={val:.1f}s, {hint})")
|
|
435
|
+
|
|
436
|
+
out.subsection("模型性能")
|
|
437
|
+
out.item(f"数据来源: 最近 {file_count} 个 session 文件")
|
|
438
|
+
out.line("")
|
|
439
|
+
model_stats = data["model_stats"]
|
|
440
|
+
if not model_stats:
|
|
441
|
+
out.item("最近 Session 中未发现模型使用数据")
|
|
442
|
+
else:
|
|
443
|
+
for model_key in sorted(model_stats.keys(), key=lambda k: model_stats[k]["calls"], reverse=True):
|
|
444
|
+
s = model_stats[model_key]
|
|
445
|
+
durs = sorted(s["durations"])
|
|
446
|
+
calls = s["calls"]
|
|
447
|
+
p50 = pct(durs, 0.50)
|
|
448
|
+
p95 = pct(durs, 0.95)
|
|
449
|
+
mx = durs[-1] if durs else 0.0
|
|
450
|
+
total_dur = sum(durs)
|
|
451
|
+
tokens_unreported = (s["input"] + s["output"] + s["cache_read"]) == 0
|
|
452
|
+
throughput_str = (
|
|
453
|
+
"N/A" if s["output"] == 0
|
|
454
|
+
else f"{(s['output'] / total_dur):.1f} tok/s" if total_dur > 0 else "N/A"
|
|
455
|
+
)
|
|
456
|
+
stops = s["stop_reasons"]
|
|
457
|
+
normal = sum(v for k, v in stops.items() if k in NORMAL_STOPS)
|
|
458
|
+
success = (normal / calls * 100) if calls else 0.0
|
|
459
|
+
latency_warn = ""
|
|
460
|
+
if calls > 3 and len(durs) > 3 and durs[0] == durs[-1]:
|
|
461
|
+
latency_warn = " [⚠ 时间精度不足]"
|
|
462
|
+
out.item(model_key)
|
|
463
|
+
out.item(f" 调用: {calls} | P50: {p50:.1f}s | P95: {p95:.1f}s | "
|
|
464
|
+
f"Max: {mx:.1f}s | 吞吐: {throughput_str} | 成功率: {success:.0f}%{latency_warn}")
|
|
465
|
+
if tokens_unreported:
|
|
466
|
+
tok_line = " tokens: 未报告"
|
|
467
|
+
if s["cache_write"]:
|
|
468
|
+
tok_line += f" | cache_write={fmt_tokens(s['cache_write'])}"
|
|
469
|
+
tok_line += " | cost: N/A" if s["cost"] == 0 else f" | cost: ${s['cost']:.4f}"
|
|
470
|
+
else:
|
|
471
|
+
tok_line = (
|
|
472
|
+
f" tokens: in={fmt_tokens(s['input'] + s['cache_read'])} "
|
|
473
|
+
f"out={fmt_tokens(s['output'])}"
|
|
474
|
+
)
|
|
475
|
+
if s["cache_read"]:
|
|
476
|
+
tok_line += f" | cache_read={fmt_tokens(s['cache_read'])}"
|
|
477
|
+
if s["cache_write"]:
|
|
478
|
+
tok_line += f" | cache_write={fmt_tokens(s['cache_write'])}"
|
|
479
|
+
tok_line += f" | cost: ${s['cost']:.4f}"
|
|
480
|
+
out.item(tok_line)
|
|
481
|
+
stops_str = " ".join(f"{k}:{v}" for k, v in sorted(stops.items(), key=lambda x: -x[1]))
|
|
482
|
+
out.item(f" stopReasons: {stops_str}" if stops_str else " stopReasons: (none)")
|
|
483
|
+
out.line("")
|
|
484
|
+
|
|
485
|
+
out.subsection("工具性能(Top 10 by 调用量)")
|
|
486
|
+
timed_tools = {n: s for n, s in data["tool_stats"].items() if s["durations"]}
|
|
487
|
+
if not timed_tools:
|
|
488
|
+
out.item("(无工具调用数据)")
|
|
489
|
+
else:
|
|
490
|
+
ranked = sorted(timed_tools.items(), key=lambda kv: kv[1]["calls"], reverse=True)[:10]
|
|
491
|
+
for name, s in ranked:
|
|
492
|
+
durs = sorted(s["durations"])
|
|
493
|
+
calls = s["calls"]
|
|
494
|
+
err_rate = (s["errors"] / calls * 100) if calls else 0.0
|
|
495
|
+
dur_str = f"P50={pct(durs,0.50):.3f}s P95={pct(durs,0.95):.3f}s Max={durs[-1]:.3f}s"
|
|
496
|
+
out.item(f"{name}: {calls} 次 | {dur_str} | 错误 {err_rate:.0f}%")
|
|
497
|
+
timed = [r for r in s["records"] if r["dur"] is not None]
|
|
498
|
+
timed.sort(key=lambda r: r["dur"], reverse=True)
|
|
499
|
+
for i, r in enumerate(timed[:10]):
|
|
500
|
+
args = r["args"] or "(无参数)"
|
|
501
|
+
out.item(f" #{i+1}: {args} ({r['dur']:.3f}s)")
|
|
502
|
+
shown_ids = {id(r) for r in timed[:10]}
|
|
503
|
+
err_shown = 0
|
|
504
|
+
for r in s["error_records"]:
|
|
505
|
+
if id(r) in shown_ids:
|
|
506
|
+
continue
|
|
507
|
+
if err_shown >= 2:
|
|
508
|
+
break
|
|
509
|
+
args = r["args"] or "(无参数)"
|
|
510
|
+
dur_txt = f"{r['dur']:.1f}s" if r["dur"] is not None else "n/a"
|
|
511
|
+
tail = f", {r['err_brief']}" if r["err_brief"] else ""
|
|
512
|
+
out.item(f" 失败: {args} (error, {dur_txt}{tail})")
|
|
513
|
+
err_shown += 1
|
|
514
|
+
|
|
515
|
+
out.subsection("慢调用 Top 20")
|
|
516
|
+
slow = sorted(data["slow_calls_top"], key=lambda x: x[0], reverse=True)
|
|
517
|
+
seen_keys = set()
|
|
518
|
+
dedup = []
|
|
519
|
+
for entry in slow:
|
|
520
|
+
key = entry[3]
|
|
521
|
+
if key is not None:
|
|
522
|
+
if key in seen_keys:
|
|
523
|
+
continue
|
|
524
|
+
seen_keys.add(key)
|
|
525
|
+
dedup.append(entry)
|
|
526
|
+
top20 = dedup[:20]
|
|
527
|
+
if not top20:
|
|
528
|
+
out.item("(无数据)")
|
|
529
|
+
else:
|
|
530
|
+
for i, entry in enumerate(top20, 1):
|
|
531
|
+
out.item(f"[{i}] {entry[2]}")
|
|
532
|
+
|
|
533
|
+
out.subsection("异常 stopReason — 模型非正常结束(如 error、中断)")
|
|
534
|
+
abnormal_stops = data["abnormal_stops"]
|
|
535
|
+
out.item(f"共 {len(abnormal_stops)} 条" + ("(无异常)" if not abnormal_stops else ""))
|
|
536
|
+
for s in abnormal_stops[:20]:
|
|
537
|
+
out.item(s)
|
|
538
|
+
|
|
539
|
+
out.subsection("模型 API 错误分布")
|
|
540
|
+
api_err_total = sum(data["api_error_stats"].values())
|
|
541
|
+
api_total = data["api_total_assistant_calls"]
|
|
542
|
+
if api_total == 0:
|
|
543
|
+
out.item("(无调用数据)")
|
|
544
|
+
else:
|
|
545
|
+
err_rate = api_err_total / api_total * 100
|
|
546
|
+
out.item(f"总异常: {api_err_total} (总调用 {api_total} 中, 异常率 {err_rate:.1f}%)")
|
|
547
|
+
if api_err_total > 0:
|
|
548
|
+
out.item("分布:")
|
|
549
|
+
for cat, n in sorted(data["api_error_stats"].items(), key=lambda kv: -kv[1]):
|
|
550
|
+
out.item(f" {cat}: {n}")
|
|
551
|
+
|
|
552
|
+
out.subsection("端到端消息延迟(user 发送 → assistant 最终响应)")
|
|
553
|
+
e2e = data["e2e_latencies"]
|
|
554
|
+
if not e2e:
|
|
555
|
+
out.item("(数据不足,未发现 user→assistant 配对)")
|
|
556
|
+
else:
|
|
557
|
+
lat_sorted = sorted(e2e)
|
|
558
|
+
p50 = pct(lat_sorted, 0.50)
|
|
559
|
+
p95 = pct(lat_sorted, 0.95)
|
|
560
|
+
mx = lat_sorted[-1]
|
|
561
|
+
out.item(f"样本: {len(lat_sorted)} | P50={p50:.1f}s P95={p95:.1f}s Max={mx:.1f}s")
|
|
562
|
+
e2e_buckets = [("<10s", 10.0), ("10-30s", 30.0), ("30-60s", 60.0),
|
|
563
|
+
("60-120s", 120.0), (">120s", float("inf"))]
|
|
564
|
+
bucket_counts = {lbl: 0 for lbl, _ in e2e_buckets}
|
|
565
|
+
for v in lat_sorted:
|
|
566
|
+
for (lbl, upper) in e2e_buckets:
|
|
567
|
+
if v < upper:
|
|
568
|
+
bucket_counts[lbl] += 1
|
|
569
|
+
break
|
|
570
|
+
total = len(lat_sorted)
|
|
571
|
+
out.item("分布:")
|
|
572
|
+
for (lbl, _) in e2e_buckets:
|
|
573
|
+
n = bucket_counts[lbl]
|
|
574
|
+
pct_v = (n / total * 100) if total else 0.0
|
|
575
|
+
out.item(f" {lbl}: {n} ({pct_v:.1f}%)")
|
|
576
|
+
|
|
577
|
+
out.subsection("延迟 vs 上下文大小")
|
|
578
|
+
ctx_buckets_def = data["ctx_buckets_def"]
|
|
579
|
+
ctx_durs = data["ctx_bucket_durs"]
|
|
580
|
+
if not any(ctx_durs.get(l) for l, _ in ctx_buckets_def):
|
|
581
|
+
out.item("(数据不足)")
|
|
582
|
+
else:
|
|
583
|
+
out.line(f" {'上下文段':<14} {'调用数':>8} {'P50延迟':>10} {'P95延迟':>10}")
|
|
584
|
+
for (b_label, _) in ctx_buckets_def:
|
|
585
|
+
durs = sorted(ctx_durs.get(b_label, []))
|
|
586
|
+
if not durs:
|
|
587
|
+
out.line(f" {b_label:<14} {0:>8} {'-':>10} {'-':>10}")
|
|
588
|
+
continue
|
|
589
|
+
p50 = pct(durs, 0.50)
|
|
590
|
+
p95 = pct(durs, 0.95)
|
|
591
|
+
out.line(f" {b_label:<14} {len(durs):>8} {p50:>9.1f}s {p95:>9.1f}s")
|
|
592
|
+
|
|
593
|
+
out.subsection("每日趋势(最近 7 天)")
|
|
594
|
+
daily_stats = data["daily_stats"]
|
|
595
|
+
if not daily_stats:
|
|
596
|
+
out.item("(数据不足)")
|
|
597
|
+
else:
|
|
598
|
+
today = datetime.now(timezone.utc).date()
|
|
599
|
+
day_list = [(today - timedelta(days=i)).strftime("%m-%d") for i in range(7)]
|
|
600
|
+
out.line(f" {'日期':<10} {'调用数':>8} {'P50延迟':>10} {'总输出tokens':>14}")
|
|
601
|
+
for d_key in day_list:
|
|
602
|
+
d = daily_stats.get(d_key)
|
|
603
|
+
if not d or d["calls"] == 0:
|
|
604
|
+
out.line(f" {d_key:<10} {0:>8} {'-':>10} {'-':>14}")
|
|
605
|
+
continue
|
|
606
|
+
durs = sorted(d["durs"])
|
|
607
|
+
p50 = pct(durs, 0.50) if durs else 0.0
|
|
608
|
+
out.line(f" {d_key:<10} {d['calls']:>8} {p50:>9.1f}s {fmt_tokens(d['output']):>14}")
|
|
609
|
+
|
|
610
|
+
out.subsection("Cache 命中率")
|
|
611
|
+
if data["cache_total_calls"] == 0:
|
|
612
|
+
out.item("(无数据)")
|
|
613
|
+
else:
|
|
614
|
+
hit_pct = (data["cache_calls_with_cache"] / data["cache_total_calls"] * 100)
|
|
615
|
+
out.item(
|
|
616
|
+
f"总调用: {data['cache_total_calls']} | "
|
|
617
|
+
f"触发 cache_read 的调用: {data['cache_calls_with_cache']} ({hit_pct:.1f}%)"
|
|
618
|
+
)
|
|
619
|
+
out.item(
|
|
620
|
+
f"cache_read: {fmt_tokens(data['cache_sum_cache_read'])} | "
|
|
621
|
+
f"input(非 cache): {fmt_tokens(data['cache_sum_input'])} | "
|
|
622
|
+
f"cache_write: {fmt_tokens(data['cache_sum_cache_write'])}"
|
|
623
|
+
)
|
|
624
|
+
denom = data["cache_sum_input"] + data["cache_sum_cache_read"]
|
|
625
|
+
if denom > 0:
|
|
626
|
+
ratio = data["cache_sum_cache_read"] / denom * 100
|
|
627
|
+
out.item(
|
|
628
|
+
f"上下文 cache 占比: cacheRead/(input+cacheRead) = "
|
|
629
|
+
f"{ratio:.3f}% ({fmt_tokens(data['cache_sum_cache_read'])}/{fmt_tokens(denom)})"
|
|
630
|
+
)
|
|
631
|
+
|
|
632
|
+
out.subsection("工具错误明细")
|
|
633
|
+
tool_stats = data["tool_stats"]
|
|
634
|
+
err_total = sum(s["errors"] for s in tool_stats.values())
|
|
635
|
+
call_total = sum(s["calls"] for s in tool_stats.values())
|
|
636
|
+
if err_total == 0:
|
|
637
|
+
out.item(f"共 0 次错误 (总调用 {call_total} 次中)")
|
|
638
|
+
else:
|
|
639
|
+
out.item(f"共 {err_total} 次错误 (总调用 {call_total} 次中):")
|
|
640
|
+
for name in sorted(tool_stats.keys(), key=lambda k: tool_stats[k]["errors"], reverse=True):
|
|
641
|
+
s = tool_stats[name]
|
|
642
|
+
if s["errors"] == 0:
|
|
643
|
+
continue
|
|
644
|
+
out.line(f" {name} ({s['errors']}次):")
|
|
645
|
+
for r in s["error_records"][:3]:
|
|
646
|
+
ts_label = r["ts"].strftime("%Y-%m-%d %H:%M:%S") if r["ts"] else "?"
|
|
647
|
+
brief = r["err_brief"] or "(无错误内容)"
|
|
648
|
+
out.line(f" {ts_label} | {brief[:100]}")
|
|
649
|
+
|
|
650
|
+
out.subsection("Session 消耗 Top 5")
|
|
651
|
+
session_stats = data["session_stats"]
|
|
652
|
+
if not session_stats:
|
|
653
|
+
out.item("(无数据)")
|
|
654
|
+
else:
|
|
655
|
+
ranked = sorted(session_stats.items(), key=lambda kv: kv[1]["tokens"], reverse=True)[:5]
|
|
656
|
+
out.line(f" {'session':<40} {'调用数':>8} {'总tokens':>10} {'模型耗时累计':>12}")
|
|
657
|
+
for sid, ss in ranked:
|
|
658
|
+
out.line(f" {sid:<40} {ss['calls']:>8} "
|
|
659
|
+
f"{fmt_tokens(ss['tokens']):>10} {ss['duration']:>11.0f}s")
|
|
660
|
+
|
|
661
|
+
|
|
662
|
+
def main() -> int:
|
|
663
|
+
parser = cli.build_common_parser(
|
|
664
|
+
description="模块 7:模型与性能数据",
|
|
665
|
+
prog="07_performance",
|
|
666
|
+
)
|
|
667
|
+
args = parser.parse_args()
|
|
668
|
+
out = output.init("performance", json_mode=args.json, no_color=args.no_color)
|
|
669
|
+
out.section("模块 7:模型与性能数据")
|
|
670
|
+
|
|
671
|
+
session_files = collect_session_files(args.sessions_base, limit=20)
|
|
672
|
+
if not session_files:
|
|
673
|
+
out.item("未找到 Session 文件")
|
|
674
|
+
return out.done()
|
|
675
|
+
|
|
676
|
+
data = analyze_sessions(session_files)
|
|
677
|
+
render(out, data, len(session_files))
|
|
678
|
+
|
|
679
|
+
if args.json:
|
|
680
|
+
out.set_data("model_count", len(data["model_stats"]))
|
|
681
|
+
out.set_data("session_files_analyzed", len(session_files))
|
|
682
|
+
out.set_data("e2e_latency_count", len(data["e2e_latencies"]))
|
|
683
|
+
return out.done()
|
|
684
|
+
|
|
685
|
+
|
|
686
|
+
if __name__ == "__main__":
|
|
687
|
+
sys.exit(main())
|