openclaw-diag-cli 0.1.2 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,7 +7,6 @@ import json
7
7
  import os
8
8
  import re
9
9
  import shlex
10
- import shutil
11
10
  import subprocess
12
11
  import sys
13
12
  from pathlib import Path
@@ -16,7 +15,7 @@ from typing import Optional
16
15
  sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
17
16
 
18
17
  from ocdiag import cli, output, paths
19
- from ocdiag.sensitive import safe_val
18
+ from ocdiag.sensitive import safe_val, sanitize_text
20
19
 
21
20
 
22
21
  def run(cmd, timeout=5):
@@ -110,7 +109,6 @@ def parse_proc_environ(pid: str) -> Optional[list]:
110
109
  def main() -> int:
111
110
  parser = cli.build_common_parser(
112
111
  description="模块 2:采集 OpenClaw 基础环境",
113
- prog="02_environment",
114
112
  )
115
113
  args = parser.parse_args()
116
114
  out = output.init("environment", json_mode=args.json, no_color=args.no_color)
@@ -118,11 +116,17 @@ def main() -> int:
118
116
 
119
117
  oc_version = detect_oc_version()
120
118
  if oc_version:
121
- out.item(f"ArkClaw 版本: {oc_version}")
119
+ out.item(f"OpenClaw 版本: {oc_version}")
122
120
  else:
123
- out.item("ArkClaw 版本: 无法确定")
121
+ out.item("OpenClaw 版本: 无法确定")
124
122
  out.evidence("openclaw --version", "命令未找到或无输出")
125
123
  out.set_data("oc_version", oc_version)
124
+ if not oc_version:
125
+ out.set_data("oc_version_status", {
126
+ "found": False,
127
+ "reason": "command_not_found",
128
+ "checked": "openclaw --version + pnpm/global node_modules",
129
+ })
126
130
 
127
131
  service_file = paths.SERVICE_FILE
128
132
  svc_version = None
@@ -153,6 +157,10 @@ def main() -> int:
153
157
  out.item("Node.js: 未找到")
154
158
  out.evidence("node --version", "命令未找到")
155
159
  out.set_data("node_version", node_ver)
160
+ if not node_ver:
161
+ out.set_data("node_version_status", {
162
+ "found": False, "reason": "command_not_found", "checked": "node --version",
163
+ })
156
164
 
157
165
  rc, stdout, _ = run(["free", "-m"])
158
166
  mem_avail = ""
@@ -166,6 +174,10 @@ def main() -> int:
166
174
  if mem_avail:
167
175
  out.item(f"可用内存: {mem_avail} MB")
168
176
  out.set_data("memory_available_mb", mem_avail)
177
+ if not mem_avail:
178
+ out.set_data("memory_status", {
179
+ "found": False, "reason": "free_unavailable", "checked": "free -m",
180
+ })
169
181
 
170
182
  rc, stdout, _ = run(["df", "-m", paths.OPENCLAW_HOME])
171
183
  disk_avail = ""
@@ -178,6 +190,11 @@ def main() -> int:
178
190
  if disk_avail:
179
191
  out.item(f"磁盘可用 ({paths.OPENCLAW_HOME}): {disk_avail} MB")
180
192
  out.set_data("disk_available_mb", disk_avail)
193
+ if not disk_avail:
194
+ out.set_data("disk_status", {
195
+ "found": False, "reason": "df_unavailable",
196
+ "checked": f"df -m {paths.OPENCLAW_HOME}",
197
+ })
181
198
 
182
199
  gw_status = gateway_systemctl_status()
183
200
  if gw_status:
@@ -245,8 +262,16 @@ def main() -> int:
245
262
  out.set_data("gateway_env", [{"key": k, "value": v} for k, v in env_pairs])
246
263
  elif pid:
247
264
  out.item(f"无法读取 /proc/{pid}/environ(权限不足?)")
265
+ out.set_data("gateway_env_status", {
266
+ "found": False, "reason": "proc_unreadable",
267
+ "checked": f"/proc/{pid}/environ",
268
+ })
248
269
  else:
249
270
  out.item("Gateway 进程未运行,跳过")
271
+ out.set_data("gateway_env_status", {
272
+ "found": False, "reason": "process_not_running",
273
+ "checked": "pgrep -f openclaw.*gateway",
274
+ })
250
275
 
251
276
  if os.path.isfile(paths.SERVICE_ENV_FILE):
252
277
  out.line("")
@@ -281,9 +306,10 @@ def main() -> int:
281
306
  try:
282
307
  with open(service_file) as f:
283
308
  for line in f:
284
- out.item(line.rstrip("\n"))
285
- except OSError:
286
- pass
309
+ raw = line.rstrip("\n")
310
+ out.item(raw if args.unmask else sanitize_text(raw))
311
+ except OSError as e:
312
+ out.item(f"读取失败: {e}")
287
313
 
288
314
  return out.done()
289
315
 
@@ -85,7 +85,6 @@ def emit_config(out: output.Output, data: list, obj, prefix: str = "") -> None:
85
85
  def main() -> int:
86
86
  parser = cli.build_common_parser(
87
87
  description="模块 3:采集 OpenClaw 配置(含敏感字段脱敏)",
88
- prog="03_configuration",
89
88
  )
90
89
  args = parser.parse_args()
91
90
 
@@ -95,6 +94,10 @@ def main() -> int:
95
94
  config_path = args.config
96
95
  if not os.path.isfile(config_path):
97
96
  out.item(f"配置文件未找到: {config_path}")
97
+ out.line(" 下一步:")
98
+ out.line(" 1) 确认 OpenClaw 已经初始化(运行过 `openclaw` 即会生成配置)")
99
+ out.line(" 2) 用 OPENCLAW_CONFIG=/path/to/openclaw.json 或 --config 指向正确路径")
100
+ out.line(" 3) 在容器/远端诊断时,用 OPENCLAW_HOME=/path 整体覆盖")
98
101
  out.evidence(config_path, "<文件缺失>")
99
102
  out.set_data("config_path", config_path)
100
103
  out.set_data("found", False)
@@ -11,7 +11,6 @@ import sys
11
11
  from collections import defaultdict
12
12
  from datetime import datetime
13
13
  from pathlib import Path
14
- from typing import List, Optional
15
14
 
16
15
  sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
17
16
 
@@ -146,11 +145,20 @@ def section_restart_events(out: output.Output) -> None:
146
145
 
147
146
  def section_model_api(out: output.Output, args) -> None:
148
147
  if not os.path.isfile(args.config):
148
+ out.item("模型 API: 配置文件未找到")
149
+ out.set_data("model_api_status", {
150
+ "found": False, "reason": "config_not_found", "checked": args.config,
151
+ })
149
152
  return
150
153
  try:
151
154
  with open(args.config) as f:
152
155
  cfg = json.load(f)
153
- except Exception:
156
+ except (OSError, json.JSONDecodeError) as e:
157
+ out.item(f"模型 API: 配置读取失败 ({type(e).__name__})")
158
+ out.set_data("model_api_status", {
159
+ "found": False, "reason": "config_unreadable",
160
+ "checked": args.config, "error": str(e)[:200],
161
+ })
154
162
  return
155
163
  models = cfg.get("models", {}) or {}
156
164
  all_cfgs = {}
@@ -290,8 +298,12 @@ def section_ws_lifecycle(out: output.Output, app_log: str) -> None:
290
298
  else:
291
299
  continue
292
300
  events.append((ts_dt, ts_str, account, kind, msg))
293
- except OSError:
294
- out.item("Channel WS: 读取应用日志失败")
301
+ except OSError as e:
302
+ out.item(f"Channel WS: 读取应用日志失败 ({type(e).__name__})")
303
+ out.set_data("ws_summary_status", {
304
+ "found": False, "reason": "log_unreadable",
305
+ "checked": app_log, "error": str(e)[:200],
306
+ })
295
307
  return
296
308
 
297
309
  if not events and not expired:
@@ -564,7 +576,12 @@ def section_gateway_errors(out: output.Output, app_log: str) -> None:
564
576
  continue
565
577
  kind, code, reason = r
566
578
  events.append((ts, kind, code, reason or "(no reason)"))
567
- except OSError:
579
+ except OSError as e:
580
+ out.item(f"Gateway 错误码: 读取应用日志失败 ({type(e).__name__})")
581
+ out.set_data("gateway_errors_status", {
582
+ "found": False, "reason": "log_unreadable",
583
+ "checked": app_log, "error": str(e)[:200],
584
+ })
568
585
  return
569
586
 
570
587
  if not events:
@@ -614,7 +631,6 @@ def section_gateway_errors(out: output.Output, app_log: str) -> None:
614
631
  def main() -> int:
615
632
  parser = cli.build_common_parser(
616
633
  description="模块 4:Gateway 状态采集",
617
- prog="04_gateway",
618
634
  )
619
635
  args = parser.parse_args()
620
636
 
@@ -622,6 +638,7 @@ def main() -> int:
622
638
  out.section("模块 4:Gateway 状态")
623
639
 
624
640
  port = 18789
641
+ port_source = "default"
625
642
  if os.path.isfile(args.config):
626
643
  try:
627
644
  with open(args.config) as f:
@@ -629,8 +646,13 @@ def main() -> int:
629
646
  cp = cfg.get("gateway", {}).get("port")
630
647
  if cp:
631
648
  port = int(cp)
632
- except Exception:
633
- pass
649
+ port_source = "config"
650
+ except (OSError, json.JSONDecodeError, ValueError) as e:
651
+ out.set_data("port_source_status", {
652
+ "found": False, "reason": "config_unreadable",
653
+ "checked": args.config, "error": str(e)[:200],
654
+ })
655
+ out.set_data("port_source", port_source)
634
656
 
635
657
  section_process_port(out, args, port)
636
658
  section_restart_events(out)
@@ -79,21 +79,25 @@ def render_log_line(line: str, max_len: int = 300) -> str:
79
79
  return line
80
80
 
81
81
 
82
- def collect_error_lines(log_files: List[str]) -> List[str]:
82
+ def collect_error_lines(log_files: List[str]):
83
+ """Returns (matched_lines, unreadable_files). One unreadable file does not
84
+ abort the whole scan, but we tell the caller which paths failed."""
83
85
  out: List[str] = []
86
+ unreadable: List[dict] = []
84
87
  for lf in log_files:
85
88
  try:
86
89
  with open(lf, errors="replace") as f:
87
90
  for ln in f:
88
91
  if _ERR_RE.search(ln):
89
92
  out.append(ln.rstrip("\n"))
90
- except OSError:
91
- continue
92
- return out
93
+ except OSError as e:
94
+ unreadable.append({"path": lf, "error": f"{type(e).__name__}: {e}"})
95
+ return out, unreadable
93
96
 
94
97
 
95
- def collect_api_errors(log_files: List[str]) -> List[str]:
98
+ def collect_api_errors(log_files: List[str]):
96
99
  out: List[str] = []
100
+ unreadable: List[dict] = []
97
101
  for lf in log_files:
98
102
  try:
99
103
  with open(lf, errors="replace") as f:
@@ -107,9 +111,9 @@ def collect_api_errors(log_files: List[str]) -> List[str]:
107
111
  if _API_EXCLUDE_TXT_RE.search(ln):
108
112
  continue
109
113
  out.append(ln.rstrip("\n"))
110
- except OSError:
111
- continue
112
- return out
114
+ except OSError as e:
115
+ unreadable.append({"path": lf, "error": f"{type(e).__name__}: {e}"})
116
+ return out, unreadable
113
117
 
114
118
 
115
119
  def journalctl_errors() -> str:
@@ -152,17 +156,21 @@ def tool_errors_from_session(session_path: str):
152
156
  msg = obj.get("message", {}) or {}
153
157
  if msg.get("isError"):
154
158
  counts[msg.get("toolName", "unknown")] += 1
155
- except Exception:
156
- pass
159
+ except (json.JSONDecodeError, ValueError):
160
+ # Expected: session.jsonl can have malformed lines from
161
+ # interrupted writes; skip and keep counting.
162
+ continue
157
163
  except OSError:
158
- pass
164
+ # Session file disappeared between glob() and open(). Caller already
165
+ # falls back to "no recent session"; reporting per-file unreadable
166
+ # would mostly add noise here.
167
+ return counts
159
168
  return counts
160
169
 
161
170
 
162
171
  def main() -> int:
163
172
  parser = cli.build_common_parser(
164
173
  description="模块 5:采集近期错误日志",
165
- prog="05_recent_errors",
166
174
  )
167
175
  args = parser.parse_args()
168
176
 
@@ -187,8 +195,10 @@ def main() -> int:
187
195
  out.line("")
188
196
 
189
197
  if logs:
190
- err_lines = collect_error_lines(logs)
198
+ err_lines, err_unreadable = collect_error_lines(logs)
191
199
  out.set_data("app_error_count", len(err_lines))
200
+ if err_unreadable:
201
+ out.set_data("app_log_unreadable", err_unreadable)
192
202
  if err_lines:
193
203
  out.item(f"应用日志 ERROR 级别: {len(err_lines)} 条 — Gateway 运行时报错,包括工具失败、模型异常等")
194
204
  rendered = []
@@ -202,7 +212,7 @@ def main() -> int:
202
212
  else:
203
213
  out.item("应用日志 ERROR 级别: 0 条 — Gateway 运行时报错")
204
214
 
205
- api_lines = collect_api_errors(logs)
215
+ api_lines, _api_unreadable = collect_api_errors(logs)
206
216
  out.set_data("api_error_count", len(api_lines))
207
217
  if api_lines:
208
218
  out.item(f"模型 API HTTP 错误: {len(api_lines)} 条 ")
@@ -13,11 +13,12 @@ import sys
13
13
  import time
14
14
  from collections import Counter, deque
15
15
  from pathlib import Path
16
- from typing import Optional
17
16
 
18
17
  sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
19
18
 
20
- from ocdiag import cli, output, paths
19
+ from ocdiag import cli, output
20
+ from ocdiag.timeutil import fmt_age, fmt_ts
21
+ from ocdiag.tokens import fmt_tokens, percentile
21
22
 
22
23
  try:
23
24
  from croniter import croniter # type: ignore
@@ -26,15 +27,6 @@ except ImportError:
26
27
  HAS_CRONITER = False
27
28
 
28
29
 
29
- def fmt_ts(ms):
30
- if not ms:
31
- return "?"
32
- try:
33
- return datetime.datetime.fromtimestamp(ms / 1000).strftime("%Y-%m-%d %H:%M:%S")
34
- except Exception:
35
- return str(ms)
36
-
37
-
38
30
  def fmt_duration(ms):
39
31
  if ms is None:
40
32
  return "?"
@@ -46,24 +38,6 @@ def fmt_duration(ms):
46
38
  return f"{s/3600:.1f}h"
47
39
 
48
40
 
49
- def fmt_age(ms_delta):
50
- s = abs(ms_delta) / 1000
51
- if s < 60:
52
- return f"{s:.0f}秒"
53
- if s < 3600:
54
- return f"{s/60:.0f}分钟"
55
- if s < 86400:
56
- return f"{s/3600:.1f}小时"
57
- return f"{s/86400:.1f}天"
58
-
59
-
60
- def percentile(sorted_list, p):
61
- if not sorted_list:
62
- return None
63
- k = max(0, min(len(sorted_list) - 1, int(len(sorted_list) * p)))
64
- return sorted_list[k]
65
-
66
-
67
41
  def format_schedule(sched):
68
42
  k = sched.get("kind", "?")
69
43
  if k == "cron":
@@ -121,16 +95,6 @@ def load_runs(runs_dir, jid):
121
95
  return out
122
96
 
123
97
 
124
- def fmt_k(n):
125
- if n is None:
126
- return "?"
127
- if n >= 1_000_000:
128
- return f"{n/1_000_000:.1f}M"
129
- if n >= 1000:
130
- return f"{n/1000:.1f}K"
131
- return str(n)
132
-
133
-
134
98
  def extract_usage(r):
135
99
  u = r.get("usage")
136
100
  if not u and isinstance(r.get("result"), dict):
@@ -463,7 +427,7 @@ def section_jobs(out: output.Output, jobs_file: str, state_file: str, runs_dir:
463
427
  cost_sum += cost
464
428
  has_cost = True
465
429
  if has_usage:
466
- line = f" tokens(最近{len(recent)}次): in={fmt_k(input_sum)} out={fmt_k(output_sum)}"
430
+ line = f" tokens(最近{len(recent)}次): in={fmt_tokens(input_sum)} out={fmt_tokens(output_sum)}"
467
431
  if has_cost:
468
432
  line += f" | cost=${cost_sum:.4f}"
469
433
  out.item(line)
@@ -671,7 +635,6 @@ def section_system_crontab(out: output.Output) -> None:
671
635
  def main() -> int:
672
636
  parser = cli.build_common_parser(
673
637
  description="模块 6:定时任务采集",
674
- prog="06_cron_jobs",
675
638
  )
676
639
  args = parser.parse_args()
677
640