openclaw-diag-cli 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,715 @@
1
+ #!/usr/bin/env python3
2
+ """Trace the processing timeline of a user message in an OpenClaw session.
3
+
4
+ Channel-agnostic. Uses only universal data sources:
5
+ 1. session.jsonl (required) — message-level timeline
6
+ 2. trajectory.jsonl (optional) — run-level metadata
7
+ 3. gateway log (optional) — embedded run start/prompt start/prompt end
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import argparse
13
+ import glob
14
+ import json
15
+ import os
16
+ import re
17
+ import sys
18
+ from datetime import datetime, timezone
19
+ from pathlib import Path
20
+ from typing import Any, Dict, List, Optional, Tuple
21
+
22
+ sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
23
+
24
+ from ocdiag import paths
25
+
26
+
27
+ DEFAULT_BASE_DIR = paths.SESSIONS_BASE
28
+ DEFAULT_LOG_DIR = paths.LOG_DIR
29
+
30
+
31
+ def iso_to_epoch_ms(iso: str) -> int:
32
+ s = iso.replace("Z", "+00:00")
33
+ try:
34
+ dt = datetime.fromisoformat(s)
35
+ except ValueError:
36
+ return 0
37
+ return int(dt.timestamp() * 1000)
38
+
39
+
40
+ def epoch_ms_to_iso(ms: int) -> str:
41
+ dt = datetime.fromtimestamp(ms / 1000, tz=timezone.utc)
42
+ return dt.strftime("%Y-%m-%dT%H:%M:%S.") + f"{ms % 1000:03d}Z"
43
+
44
+
45
+ def fmt_duration(ms: float) -> str:
46
+ if ms < 1000:
47
+ return f"{ms:.0f}ms"
48
+ if ms < 60_000:
49
+ return f"{ms / 1000:.1f}s"
50
+ m = int(ms // 60_000)
51
+ s = (ms % 60_000) / 1000
52
+ return f"{m}m{s:.1f}s"
53
+
54
+
55
+ def human_size(n: int) -> str:
56
+ for unit in ("B", "KB", "MB", "GB"):
57
+ if n < 1024:
58
+ return f"{n:.1f} {unit}" if unit != "B" else f"{n} {unit}"
59
+ n /= 1024
60
+ return f"{n:.1f} TB"
61
+
62
+
63
+ def extract_text(content: Any) -> str:
64
+ if isinstance(content, str):
65
+ return content
66
+ if isinstance(content, list):
67
+ parts = []
68
+ for c in content:
69
+ if isinstance(c, dict):
70
+ if c.get("type") == "text":
71
+ parts.append(c.get("text", ""))
72
+ elif c.get("type") == "toolCall":
73
+ parts.append(f"[toolCall:{c.get('name','')}]")
74
+ return " ".join(parts)
75
+ return str(content)
76
+
77
+
78
+ def find_session_file(
79
+ session_id: str,
80
+ base_dir: str = DEFAULT_BASE_DIR,
81
+ agent: Optional[str] = None,
82
+ ) -> Optional[str]:
83
+ if agent:
84
+ agent_dirs = [os.path.join(base_dir, agent)]
85
+ else:
86
+ agent_dirs = sorted(glob.glob(os.path.join(base_dir, "*")))
87
+
88
+ candidates: List[Tuple[str, str]] = []
89
+ for ad in agent_dirs:
90
+ sd = os.path.join(ad, "sessions")
91
+ if not os.path.isdir(sd):
92
+ continue
93
+ for entry in os.listdir(sd):
94
+ if not entry.startswith(session_id):
95
+ continue
96
+ if ".trajectory" in entry or entry.endswith(".json"):
97
+ continue
98
+ full = os.path.join(sd, entry)
99
+ if not os.path.isfile(full):
100
+ continue
101
+ if entry == f"{session_id}.jsonl":
102
+ candidates.append((full, "active"))
103
+ elif ".jsonl.deleted." in entry:
104
+ candidates.append((full, "deleted"))
105
+ elif ".jsonl.reset." in entry:
106
+ candidates.append((full, "reset"))
107
+ elif ".jsonl.bak-" in entry:
108
+ candidates.append((full, "backup"))
109
+
110
+ prio = {"active": 0, "deleted": 1, "reset": 2, "backup": 3}
111
+ candidates.sort(key=lambda x: prio.get(x[1], 9))
112
+ return candidates[0][0] if candidates else None
113
+
114
+
115
+ def find_trajectory_file(session_file: str) -> Optional[str]:
116
+ d = os.path.dirname(session_file)
117
+ base = os.path.basename(session_file).split(".jsonl")[0]
118
+ traj = os.path.join(d, f"{base}.trajectory.jsonl")
119
+ return traj if os.path.isfile(traj) else None
120
+
121
+
122
+ def find_gateway_logs(log_dir: str) -> List[str]:
123
+ return sorted(glob.glob(os.path.join(log_dir, "openclaw-*.log")))
124
+
125
+
126
+ def load_records(filepath: str) -> List[Dict]:
127
+ records: List[Dict] = []
128
+ with open(filepath, "r") as f:
129
+ for line in f:
130
+ line = line.strip()
131
+ if not line:
132
+ continue
133
+ try:
134
+ records.append(json.loads(line))
135
+ except json.JSONDecodeError:
136
+ continue
137
+ return records
138
+
139
+
140
+ def find_user_messages(records: List[Dict]) -> List[Tuple[int, Dict]]:
141
+ result = []
142
+ for i, r in enumerate(records):
143
+ if r.get("type") == "message":
144
+ msg = r.get("message", {})
145
+ if msg.get("role") == "user":
146
+ result.append((i, r))
147
+ return result
148
+
149
+
150
+ def find_first_message(records: List[Dict]) -> List[Tuple[int, Dict]]:
151
+ """Fall back: any record whose type=='message' (regardless of role)."""
152
+ result = []
153
+ for i, r in enumerate(records):
154
+ if r.get("type") == "message" and isinstance(r.get("message"), dict):
155
+ result.append((i, r))
156
+ return result
157
+
158
+
159
+ def select_user_message(records, msg_index=None, msg_id=None, msg_match=None):
160
+ user_msgs = find_user_messages(records)
161
+ if not user_msgs:
162
+ # No user messages — fall back to scanning all message records so trace
163
+ # still works for assistant-only streams (e.g. cron delivery sessions).
164
+ user_msgs = find_first_message(records)
165
+ if not user_msgs:
166
+ print("Error: no message records found in session", file=sys.stderr)
167
+ sys.exit(1)
168
+ print(
169
+ f"Note: no user-role messages; tracing from first message record "
170
+ f"({len(user_msgs)} message(s) total)",
171
+ file=sys.stderr,
172
+ )
173
+ if msg_id is not None:
174
+ for idx, r in user_msgs:
175
+ if r.get("id") == msg_id:
176
+ return idx, r
177
+ print(f"Error: no message with id '{msg_id}'", file=sys.stderr)
178
+ sys.exit(1)
179
+ if msg_match is not None:
180
+ for idx, r in user_msgs:
181
+ text = extract_text(r.get("message", {}).get("content", ""))
182
+ if msg_match in text:
183
+ return idx, r
184
+ print(f"Error: no message matching '{msg_match}'", file=sys.stderr)
185
+ sys.exit(1)
186
+ if msg_index is not None:
187
+ if msg_index < 0 or msg_index >= len(user_msgs):
188
+ print(f"Error: msg-index {msg_index} out of range (0..{len(user_msgs)-1})",
189
+ file=sys.stderr)
190
+ sys.exit(1)
191
+ return user_msgs[msg_index]
192
+ return user_msgs[-1]
193
+
194
+
195
+ def extract_trace_records(records, start_idx):
196
+ trace = []
197
+ for i in range(start_idx, len(records)):
198
+ r = records[i]
199
+ if i > start_idx and r.get("type") == "message":
200
+ msg = r.get("message", {})
201
+ if msg.get("role") == "user":
202
+ break
203
+ trace.append(r)
204
+ return trace
205
+
206
+
207
+ def _tool_batch_duration(results, prev_epoch):
208
+ if not results or prev_epoch is None:
209
+ return 0
210
+ max_ts = max(r.get("message", {}).get("timestamp", 0) for r in results)
211
+ return max(0, max_ts - prev_epoch)
212
+
213
+
214
+ def _flush_tool_batch(events, tool_execs, results, base_ms, prev_epoch):
215
+ if not results:
216
+ return
217
+ batch_start_epoch = prev_epoch or base_ms
218
+ batch_end_epoch = max(r.get("message", {}).get("timestamp", 0) for r in results)
219
+ batch_dur = max(0, batch_end_epoch - batch_start_epoch)
220
+ by_name: Dict[str, int] = {}
221
+ errors = 0
222
+ for r in results:
223
+ msg = r.get("message", {})
224
+ name = msg.get("toolName", "?")
225
+ by_name[name] = by_name.get(name, 0) + 1
226
+ if msg.get("isError"):
227
+ errors += 1
228
+ parts = [(f"{n}" + (f" ×{cnt}" if cnt > 1 else "")) for n, cnt in by_name.items()]
229
+ tools_str = " + ".join(parts)
230
+ status = "ok" if errors == 0 else f"{errors} error(s)"
231
+ events.append({
232
+ "offset_ms": max(0, (batch_start_epoch - base_ms)),
233
+ "type": "tool_batch",
234
+ "detail": f"{tools_str} → {status} ({fmt_duration(batch_dur)})",
235
+ "count": len(results),
236
+ "duration_ms": batch_dur,
237
+ })
238
+ for r in results:
239
+ msg = r.get("message", {})
240
+ name = msg.get("toolName", "?")
241
+ ts = msg.get("timestamp", 0)
242
+ dur = max(0, ts - batch_start_epoch) if ts and batch_start_epoch else 0
243
+ tool_execs.append({
244
+ "name": name,
245
+ "duration_ms": dur,
246
+ "is_error": msg.get("isError", False),
247
+ })
248
+
249
+
250
+ def analyze_phases(trace):
251
+ events: List[Dict] = []
252
+ model_calls: List[Dict] = []
253
+ tool_execs: List[Dict] = []
254
+
255
+ user_rec = trace[0]
256
+ user_msg = user_rec.get("message", {})
257
+ base_ms = user_msg.get("timestamp", 0)
258
+ if not base_ms:
259
+ base_ms = iso_to_epoch_ms(user_rec.get("timestamp", ""))
260
+
261
+ events.append({"offset_ms": 0, "type": "user", "detail": "Message received"})
262
+
263
+ model_num = 0
264
+ tool_num = 0
265
+ prev_assistant_record_epoch: Optional[int] = None
266
+ pending_tool_results: List[Dict] = []
267
+ total_model_ms = 0
268
+ total_tool_ms = 0
269
+ total_input_tokens = 0
270
+ total_output_tokens = 0
271
+ total_cache_read = 0
272
+ total_cache_write = 0
273
+
274
+ for r in trace[1:]:
275
+ rtype = r.get("type")
276
+ if rtype == "message":
277
+ msg = r.get("message", {})
278
+ role = msg.get("role")
279
+ if role == "assistant":
280
+ if pending_tool_results:
281
+ _flush_tool_batch(events, tool_execs, pending_tool_results,
282
+ base_ms, prev_assistant_record_epoch)
283
+ batch_dur = _tool_batch_duration(pending_tool_results, prev_assistant_record_epoch)
284
+ total_tool_ms += batch_dur
285
+ tool_num += len(pending_tool_results)
286
+ pending_tool_results = []
287
+ model_num += 1
288
+ msg_ts = msg.get("timestamp", 0)
289
+ record_epoch = iso_to_epoch_ms(r.get("timestamp", ""))
290
+ duration_ms = record_epoch - msg_ts if (record_epoch and msg_ts) else 0
291
+ usage = msg.get("usage", {})
292
+ out_tok = usage.get("output", 0)
293
+ in_tok = usage.get("input", 0)
294
+ cache_r = usage.get("cacheRead", 0)
295
+ cache_w = usage.get("cacheWrite", 0)
296
+ stop = msg.get("stopReason", "")
297
+ provider = msg.get("provider", "")
298
+ model = msg.get("model", "")
299
+ rate = out_tok / (duration_ms / 1000) if duration_ms > 0 else 0
300
+ start_offset = msg_ts - base_ms if msg_ts else 0
301
+ end_offset = record_epoch - base_ms if record_epoch else 0
302
+ events.append({
303
+ "offset_ms": start_offset, "type": "model_start", "num": model_num,
304
+ "detail": f"Call started → {provider}/{model}" if provider else "Call started",
305
+ })
306
+ events.append({
307
+ "offset_ms": end_offset, "type": "model_end", "num": model_num,
308
+ "detail": f"Completed (stopReason={stop})" + (" ← FINAL" if stop == "stop" else ""),
309
+ "duration_ms": duration_ms, "tokens_in": in_tok, "tokens_out": out_tok,
310
+ "cache_read": cache_r, "cache_write": cache_w, "rate": round(rate, 1),
311
+ })
312
+ tool_names = []
313
+ content = msg.get("content", [])
314
+ if isinstance(content, list):
315
+ for c in content:
316
+ if isinstance(c, dict) and c.get("type") == "toolCall":
317
+ tool_names.append(c.get("name", "?"))
318
+ model_calls.append({
319
+ "num": model_num, "duration_ms": duration_ms,
320
+ "tokens_out": out_tok, "tokens_in": in_tok,
321
+ "cache_read": cache_r, "cache_write": cache_w,
322
+ "stop_reason": stop, "tool_names": tool_names,
323
+ "provider": provider, "model": model, "rate": round(rate, 1),
324
+ })
325
+ total_model_ms += duration_ms
326
+ total_input_tokens += in_tok
327
+ total_output_tokens += out_tok
328
+ total_cache_read += cache_r
329
+ total_cache_write += cache_w
330
+ prev_assistant_record_epoch = record_epoch
331
+ elif role == "toolResult":
332
+ pending_tool_results.append(r)
333
+ elif rtype == "custom" and r.get("customType") == "openclaw:prompt-error":
334
+ data = r.get("data", {})
335
+ err_ts = data.get("timestamp", 0)
336
+ offset = err_ts - base_ms if err_ts else 0
337
+ events.append({
338
+ "offset_ms": offset, "type": "error",
339
+ "detail": f"prompt-error: {data.get('error', '?')}",
340
+ "provider": data.get("provider", ""), "model": data.get("model", ""),
341
+ })
342
+
343
+ if pending_tool_results:
344
+ _flush_tool_batch(events, tool_execs, pending_tool_results,
345
+ base_ms, prev_assistant_record_epoch)
346
+ batch_dur = _tool_batch_duration(pending_tool_results, prev_assistant_record_epoch)
347
+ total_tool_ms += batch_dur
348
+ tool_num += len(pending_tool_results)
349
+
350
+ last_offset = events[-1]["offset_ms"] if events else 0
351
+ return {
352
+ "events": events, "model_calls": model_calls, "tool_execs": tool_execs,
353
+ "summary": {
354
+ "total_ms": last_offset, "model_count": model_num,
355
+ "model_total_ms": total_model_ms, "tool_count": tool_num,
356
+ "tool_total_ms": total_tool_ms, "total_input_tokens": total_input_tokens,
357
+ "total_output_tokens": total_output_tokens,
358
+ "total_cache_read": total_cache_read, "total_cache_write": total_cache_write,
359
+ },
360
+ "base_epoch_ms": base_ms,
361
+ }
362
+
363
+
364
+ def load_trajectory_info(traj_path, base_epoch_ms):
365
+ runs: Dict[str, List[Dict]] = {}
366
+ try:
367
+ with open(traj_path, "r") as f:
368
+ for line in f:
369
+ line = line.strip()
370
+ if not line:
371
+ continue
372
+ try:
373
+ r = json.loads(line)
374
+ except json.JSONDecodeError:
375
+ continue
376
+ rid = r.get("runId", "")
377
+ if rid:
378
+ runs.setdefault(rid, []).append(r)
379
+ except OSError:
380
+ return None
381
+ if not runs:
382
+ return None
383
+ best_run = None
384
+ best_delta = float("inf")
385
+ for rid, evts in runs.items():
386
+ for e in evts:
387
+ if e.get("type") == "session.started":
388
+ ts = iso_to_epoch_ms(e.get("ts", ""))
389
+ delta = abs(ts - base_epoch_ms)
390
+ if delta < best_delta:
391
+ best_delta = delta
392
+ best_run = rid
393
+ break
394
+ if best_run is None or best_delta > 60_000:
395
+ return None
396
+ evts = runs[best_run]
397
+ info: Dict[str, Any] = {"runId": best_run}
398
+ ts_map: Dict[str, int] = {}
399
+ for e in evts:
400
+ etype = e.get("type", "")
401
+ ts_map[etype] = iso_to_epoch_ms(e.get("ts", ""))
402
+ if etype == "session.started":
403
+ data = e.get("data", {})
404
+ info["trigger"] = data.get("trigger")
405
+ info["toolCount"] = data.get("toolCount")
406
+ elif etype == "trace.metadata":
407
+ data = e.get("data", {})
408
+ model_info = data.get("model", {})
409
+ info["model_config"] = {
410
+ k: model_info.get(k)
411
+ for k in ("provider", "name", "api", "thinkLevel", "reasoningLevel")
412
+ if model_info.get(k) is not None
413
+ }
414
+ elif etype == "session.ended":
415
+ data = e.get("data", {})
416
+ info["status"] = data.get("status")
417
+ info["aborted"] = data.get("aborted")
418
+ info["timedOut"] = data.get("timedOut")
419
+ if "session.started" in ts_map and "context.compiled" in ts_map:
420
+ info["context_compilation_ms"] = ts_map["context.compiled"] - ts_map["session.started"]
421
+ if "context.compiled" in ts_map and "prompt.submitted" in ts_map:
422
+ info["prompt_submission_ms"] = ts_map["prompt.submitted"] - ts_map["context.compiled"]
423
+ return info
424
+
425
+
426
+ def _parse_log_ts(ts_str):
427
+ try:
428
+ dt = datetime.fromisoformat(ts_str)
429
+ return int(dt.timestamp() * 1000)
430
+ except (ValueError, TypeError):
431
+ return None
432
+
433
+
434
+ def load_gateway_timing(log_files, session_id, base_epoch_ms):
435
+ if not log_files:
436
+ return None
437
+ run_start = None
438
+ prompt_start = None
439
+ prompt_end = None
440
+ duration = None
441
+ base_date = epoch_ms_to_iso(base_epoch_ms)[:10]
442
+ for lf in log_files:
443
+ if base_date not in os.path.basename(lf):
444
+ continue
445
+ try:
446
+ with open(lf, "r") as f:
447
+ for line in f:
448
+ if session_id not in line or "agent/embedded" not in line:
449
+ continue
450
+ try:
451
+ rec = json.loads(line.strip())
452
+ except json.JSONDecodeError:
453
+ continue
454
+ msg = rec.get("1", "")
455
+ ts_str = rec.get("time", "")
456
+ if "embedded run start:" in msg and f"sessionId={session_id}" in msg:
457
+ ts = _parse_log_ts(ts_str)
458
+ if ts and abs(ts - base_epoch_ms) < 120_000:
459
+ run_start = ts
460
+ elif "embedded run prompt start:" in msg and f"sessionId={session_id}" in msg:
461
+ ts = _parse_log_ts(ts_str)
462
+ if ts and abs(ts - base_epoch_ms) < 120_000:
463
+ prompt_start = ts
464
+ elif "embedded run prompt end:" in msg and f"sessionId={session_id}" in msg:
465
+ ts = _parse_log_ts(ts_str)
466
+ if run_start and ts and ts > run_start:
467
+ prompt_end = ts
468
+ m = re.search(r"durationMs=(\d+)", msg)
469
+ duration = int(m.group(1)) if m else None
470
+ except OSError:
471
+ continue
472
+ if run_start is None:
473
+ return None
474
+ result: Dict[str, Any] = {}
475
+ if run_start and prompt_start:
476
+ result["run_to_prompt_ms"] = prompt_start - run_start
477
+ if prompt_start and prompt_end:
478
+ result["prompt_duration_ms"] = prompt_end - prompt_start
479
+ if duration:
480
+ result["reported_duration_ms"] = duration
481
+ return result if result else None
482
+
483
+
484
+ SEP = "═" * 66
485
+ LINE = "─" * 66
486
+
487
+
488
+ def _pct(part, total):
489
+ if total == 0:
490
+ return "0%"
491
+ return f"{part / total * 100:.1f}%"
492
+
493
+
494
+ def format_text(session_id, user_msg_index, user_msg_id, analysis,
495
+ traj_info=None, gw_info=None):
496
+ lines: List[str] = []
497
+ lines.append(SEP)
498
+ lines.append(f"Message Trace: session {session_id}")
499
+ lines.append(f"User Message #{user_msg_index} (id: {user_msg_id})")
500
+ lines.append(SEP)
501
+ lines.append("")
502
+ lines.append("Timeline:")
503
+ lines.append(LINE)
504
+ for ev in analysis["events"]:
505
+ off = ev["offset_ms"]
506
+ etype = ev["type"]
507
+ detail = ev.get("detail", "")
508
+ if etype == "user":
509
+ lines.append(f" T+{off:<10} [user] {detail}")
510
+ elif etype == "model_start":
511
+ lines.append(f" T+{off:<10} [model #{ev['num']}] {detail}")
512
+ elif etype == "model_end":
513
+ lines.append(f" T+{off:<10} [model #{ev['num']}] {detail}")
514
+ lines.append(
515
+ f" ├─ tokens: in={ev.get('tokens_in',0)} out={ev.get('tokens_out',0)}"
516
+ + (f" cache_read={ev['cache_read']}" if ev.get("cache_read") else "")
517
+ + (f" cache_write={ev['cache_write']}" if ev.get("cache_write") else "")
518
+ )
519
+ lines.append(f" ├─ duration: {fmt_duration(ev.get('duration_ms', 0))}")
520
+ lines.append(f" └─ rate: {ev.get('rate', 0)} tok/s")
521
+ elif etype == "tool_batch":
522
+ lines.append(f" T+{off:<10} [tool] {detail}")
523
+ elif etype == "error":
524
+ lines.append(f" T+{off:<10} [ERROR] {detail}")
525
+ lines.append(LINE)
526
+ lines.append("")
527
+
528
+ s = analysis["summary"]
529
+ total = s["total_ms"]
530
+ lines.append("Summary:")
531
+ lines.append(f" Total time: {fmt_duration(total)}")
532
+ lines.append(
533
+ f" Model calls: {s['model_count']}"
534
+ + (f", total {fmt_duration(s['model_total_ms'])} ({_pct(s['model_total_ms'], total)})"
535
+ if s["model_count"] else "")
536
+ )
537
+ lines.append(
538
+ f" Tool executions: {s['tool_count']}"
539
+ + (f", total {fmt_duration(s['tool_total_ms'])} ({_pct(s['tool_total_ms'], total)})"
540
+ if s["tool_count"] else "")
541
+ )
542
+ lines.append(
543
+ f" Tokens: in={s['total_input_tokens']} out={s['total_output_tokens']}"
544
+ + (f" cache_read={s['total_cache_read']}" if s["total_cache_read"] else "")
545
+ + (f" cache_write={s['total_cache_write']}" if s["total_cache_write"] else "")
546
+ )
547
+ avg_rate = s["total_output_tokens"] / (s["model_total_ms"] / 1000) if s["model_total_ms"] > 0 else 0
548
+ lines.append(f" Avg output rate: {avg_rate:.1f} tok/s")
549
+ lines.append("")
550
+
551
+ if analysis["model_calls"]:
552
+ lines.append(" Model breakdown:")
553
+ for mc in analysis["model_calls"]:
554
+ tools_str = ""
555
+ if mc["stop_reason"] == "toolUse" and mc["tool_names"]:
556
+ tnames = mc["tool_names"]
557
+ if len(tnames) <= 3:
558
+ tools_str = ",".join(tnames)
559
+ else:
560
+ tools_str = f"{tnames[0]}+{len(tnames)-1}more"
561
+ tools_str = f" (toolUse → {tools_str})"
562
+ elif mc["stop_reason"] == "stop":
563
+ tools_str = " (stop) ← final"
564
+ else:
565
+ tools_str = f" ({mc['stop_reason']})" if mc["stop_reason"] else ""
566
+ lines.append(
567
+ f" #{mc['num']:<3} {fmt_duration(mc['duration_ms']):>8} "
568
+ f"out={mc['tokens_out']:<6}{tools_str}"
569
+ )
570
+ lines.append("")
571
+
572
+ if analysis["tool_execs"]:
573
+ by_name: Dict[str, Dict] = {}
574
+ for te in analysis["tool_execs"]:
575
+ name = te["name"]
576
+ if name not in by_name:
577
+ by_name[name] = {"count": 0, "total_ms": 0, "errors": 0}
578
+ by_name[name]["count"] += 1
579
+ by_name[name]["total_ms"] += te["duration_ms"]
580
+ if te["is_error"]:
581
+ by_name[name]["errors"] += 1
582
+ lines.append(" Tool breakdown:")
583
+ for name, info in sorted(by_name.items(), key=lambda x: -x[1]["total_ms"]):
584
+ avg = info["total_ms"] / info["count"] if info["count"] else 0
585
+ err_str = f" ({info['errors']} errors)" if info["errors"] else ""
586
+ lines.append(
587
+ f" {name + ':':<24} {info['count']} call(s), "
588
+ f"{fmt_duration(info['total_ms'])} total, "
589
+ f"avg {fmt_duration(avg)}{err_str}"
590
+ )
591
+ lines.append("")
592
+
593
+ if traj_info:
594
+ lines.append(" Run metadata (from trajectory):")
595
+ lines.append(f" runId: {traj_info.get('runId', '?')}")
596
+ if traj_info.get("trigger"):
597
+ lines.append(f" trigger: {traj_info['trigger']}")
598
+ if traj_info.get("context_compilation_ms") is not None:
599
+ lines.append(f" context compilation: {fmt_duration(traj_info['context_compilation_ms'])}")
600
+ if traj_info.get("model_config"):
601
+ cfg = traj_info["model_config"]
602
+ parts = [f"{k}={v}" for k, v in cfg.items() if v is not None]
603
+ lines.append(f" model config: {', '.join(parts)}")
604
+ if traj_info.get("status"):
605
+ lines.append(f" status: {traj_info['status']}")
606
+ lines.append("")
607
+
608
+ if gw_info:
609
+ lines.append(" Gateway timing (from log):")
610
+ if "run_to_prompt_ms" in gw_info:
611
+ lines.append(f" run_start → prompt_start: {fmt_duration(gw_info['run_to_prompt_ms'])} (context compilation)")
612
+ if "prompt_duration_ms" in gw_info:
613
+ lines.append(f" prompt_start → prompt_end: {fmt_duration(gw_info['prompt_duration_ms'])} (total embedded run)")
614
+ lines.append("")
615
+
616
+ return "\n".join(lines)
617
+
618
+
619
+ def format_json(session_id, session_file, user_msg_index, user_msg_id, analysis,
620
+ traj_info=None, gw_info=None):
621
+ result = {
622
+ "session_id": session_id, "session_file": session_file,
623
+ "user_message_index": user_msg_index, "user_message_id": user_msg_id,
624
+ "base_epoch_ms": analysis["base_epoch_ms"],
625
+ "timeline": analysis["events"], "model_calls": analysis["model_calls"],
626
+ "tool_execs": analysis["tool_execs"], "summary": analysis["summary"],
627
+ }
628
+ if traj_info:
629
+ result["trajectory"] = traj_info
630
+ if gw_info:
631
+ result["gateway"] = gw_info
632
+ return json.dumps(result, indent=2, ensure_ascii=False)
633
+
634
+
635
+ def main():
636
+ parser = argparse.ArgumentParser(
637
+ description="Trace the processing timeline of a user message in an OpenClaw session.",
638
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter,
639
+ )
640
+ parser.add_argument("session_id", help="Session UUID to trace")
641
+ parser.add_argument("--msg-index", type=int, default=None, help="Nth user message (0-based)")
642
+ parser.add_argument("--msg-id", default=None, help="Message by id field")
643
+ parser.add_argument("--msg-match", default=None, help="First user message containing TEXT")
644
+ parser.add_argument("-o", "--output", default=None, help="Write output to file")
645
+ parser.add_argument("--base-dir", default=DEFAULT_BASE_DIR, help="Agents base directory")
646
+ parser.add_argument("--agent", default=None, help="Limit to specific agent")
647
+ parser.add_argument("--log-dir", default=DEFAULT_LOG_DIR, help="Gateway log directory")
648
+ parser.add_argument("--no-trajectory", action="store_true", help="Skip trajectory enrichment")
649
+ parser.add_argument("--no-log", action="store_true", help="Skip gateway log enrichment")
650
+ parser.add_argument("--json", action="store_true", help="Output as structured JSON")
651
+ args = parser.parse_args()
652
+
653
+ session_file = find_session_file(args.session_id, args.base_dir, args.agent)
654
+ if not session_file:
655
+ print(f"Error: no session file found for '{args.session_id}' under {args.base_dir}",
656
+ file=sys.stderr)
657
+ sys.exit(1)
658
+
659
+ records = load_records(session_file)
660
+ if not records:
661
+ print(f"Error: session file is empty: {session_file}", file=sys.stderr)
662
+ sys.exit(1)
663
+
664
+ user_msgs = find_user_messages(records) or find_first_message(records)
665
+ rec_idx, user_rec = select_user_message(records, args.msg_index, args.msg_id, args.msg_match)
666
+ try:
667
+ user_msg_ordinal = next(i for i, (ri, _) in enumerate(user_msgs) if ri == rec_idx)
668
+ except StopIteration:
669
+ user_msg_ordinal = 0
670
+ user_msg_id = user_rec.get("id", "?")
671
+
672
+ trace = extract_trace_records(records, rec_idx)
673
+ if len(trace) < 2:
674
+ print("Warning: trace contains only the user message (no response found)",
675
+ file=sys.stderr)
676
+
677
+ analysis = analyze_phases(trace)
678
+
679
+ traj_info = None
680
+ if not args.no_trajectory:
681
+ traj_path = find_trajectory_file(session_file)
682
+ if traj_path:
683
+ traj_info = load_trajectory_info(traj_path, analysis["base_epoch_ms"])
684
+
685
+ gw_info = None
686
+ if not args.no_log:
687
+ log_files = find_gateway_logs(args.log_dir)
688
+ if log_files:
689
+ gw_info = load_gateway_timing(log_files, args.session_id, analysis["base_epoch_ms"])
690
+
691
+ if args.json:
692
+ out_str = format_json(args.session_id, session_file, user_msg_ordinal,
693
+ user_msg_id, analysis, traj_info, gw_info)
694
+ else:
695
+ out_str = format_text(args.session_id, user_msg_ordinal, user_msg_id,
696
+ analysis, traj_info, gw_info)
697
+
698
+ if args.output:
699
+ with open(args.output, "w") as f:
700
+ f.write(out_str + "\n")
701
+ print(f"Trace written to {args.output}", file=sys.stderr)
702
+ else:
703
+ try:
704
+ print(out_str)
705
+ except BrokenPipeError:
706
+ pass
707
+
708
+
709
+ if __name__ == "__main__":
710
+ try:
711
+ main()
712
+ except KeyboardInterrupt:
713
+ sys.exit(130)
714
+ except BrokenPipeError:
715
+ sys.exit(0)