openclaw-diag-cli 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +260 -0
- package/bin/ocdiag +14 -0
- package/bin/openclaw-diag.js +275 -0
- package/diag/01_sys_health.py +443 -0
- package/diag/02_environment.py +292 -0
- package/diag/03_configuration.py +131 -0
- package/diag/04_gateway.py +651 -0
- package/diag/05_recent_errors.py +246 -0
- package/diag/06_cron_jobs.py +694 -0
- package/diag/07_performance.py +687 -0
- package/diag/08_sessions.py +518 -0
- package/diag/09_plugin_diag.py +535 -0
- package/diag/10_shell_history.py +121 -0
- package/diag/__init__.py +0 -0
- package/lib/bundle.py +204 -0
- package/ocdiag/__init__.py +3 -0
- package/ocdiag/cli.py +39 -0
- package/ocdiag/dispatcher.py +137 -0
- package/ocdiag/jsonlog.py +65 -0
- package/ocdiag/output.py +131 -0
- package/ocdiag/paths.py +48 -0
- package/ocdiag/recent_logs.py +53 -0
- package/ocdiag/sensitive.py +41 -0
- package/ocdiag/timeutil.py +77 -0
- package/ocdiag/tokens.py +46 -0
- package/package.json +42 -0
- package/tools/__init__.py +0 -0
- package/tools/oc_session_extract.py +254 -0
- package/tools/oc_session_trace.py +715 -0
|
@@ -0,0 +1,246 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""模块 5:近期错误日志(应用日志 + journalctl + 工具调用错误)。"""
|
|
3
|
+
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
|
|
6
|
+
import datetime
|
|
7
|
+
import glob
|
|
8
|
+
import json
|
|
9
|
+
import os
|
|
10
|
+
import re
|
|
11
|
+
import subprocess
|
|
12
|
+
import sys
|
|
13
|
+
from collections import Counter
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
from typing import List
|
|
16
|
+
|
|
17
|
+
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
|
|
18
|
+
|
|
19
|
+
from ocdiag import cli, output, recent_logs
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
_ERR_RE = re.compile(r'"logLevelName"\s*:\s*"(ERROR|FATAL)"')
|
|
23
|
+
_LEVEL_KEY = re.compile(r'"(logLevelName|level)"\s*:\s*"(ERROR|WARN|error|warn)"')
|
|
24
|
+
_HTTP_ERR_RE = re.compile(
|
|
25
|
+
r"HTTP [45][0-9][0-9]|\"status\":\s*(?:4[0-9][0-9]|5[0-9][0-9])|"
|
|
26
|
+
r"rate.limit|quota.exceeded",
|
|
27
|
+
re.IGNORECASE,
|
|
28
|
+
)
|
|
29
|
+
_API_EXCLUDE_SUB_RE = re.compile(
|
|
30
|
+
r'"subsystem":\s*"(tools|agent/embedded)"|allowlist contains',
|
|
31
|
+
re.IGNORECASE,
|
|
32
|
+
)
|
|
33
|
+
_API_EXCLUDE_TXT_RE = re.compile(r"embedded run agent|agent end|agent start", re.IGNORECASE)
|
|
34
|
+
_TS_RE = re.compile(r"\[(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}[^\]]*)\]\s*(.*)")
|
|
35
|
+
_SUBSYSTEM_STRIP_RE = re.compile(r'\s*\[\{[^}]*"subsystem"[^}]*\}\]\s*')
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def extract_msg(obj):
|
|
39
|
+
parts = []
|
|
40
|
+
for k in ("0", "1", "2", "msg", "message"):
|
|
41
|
+
v = obj.get(k, "")
|
|
42
|
+
if not v or not isinstance(v, str):
|
|
43
|
+
continue
|
|
44
|
+
if v.startswith("{"):
|
|
45
|
+
try:
|
|
46
|
+
inner = json.loads(v)
|
|
47
|
+
if isinstance(inner, dict):
|
|
48
|
+
meaningful = {ik: iv for ik, iv in inner.items() if ik != "subsystem"}
|
|
49
|
+
if meaningful:
|
|
50
|
+
parts.append(" ".join(f"{ik}={iv}" for ik, iv in meaningful.items()))
|
|
51
|
+
continue
|
|
52
|
+
except Exception:
|
|
53
|
+
pass
|
|
54
|
+
parts.append(v)
|
|
55
|
+
return " ".join(parts) if parts else None
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def render_log_line(line: str, max_len: int = 300) -> str:
|
|
59
|
+
line = line.strip()
|
|
60
|
+
if not line:
|
|
61
|
+
return ""
|
|
62
|
+
try:
|
|
63
|
+
obj = json.loads(line)
|
|
64
|
+
ts = obj.get("time", "")[:19]
|
|
65
|
+
msg = extract_msg(obj)
|
|
66
|
+
if not msg:
|
|
67
|
+
msg = str({k: v for k, v in obj.items() if k not in ("_meta", "time")})
|
|
68
|
+
if isinstance(msg, str) and len(msg) > max_len:
|
|
69
|
+
msg = msg[:max_len] + "..."
|
|
70
|
+
level = obj.get("_meta", {}).get("logLevelName", "ERROR")
|
|
71
|
+
return f"[{ts}] {level}: {msg}"
|
|
72
|
+
except Exception:
|
|
73
|
+
line = _SUBSYSTEM_STRIP_RE.sub(" ", line).strip()
|
|
74
|
+
m = _TS_RE.match(line)
|
|
75
|
+
if m:
|
|
76
|
+
line = f"[{m.group(1)[:19]}] {m.group(2)}"
|
|
77
|
+
if len(line) > max_len:
|
|
78
|
+
line = line[:max_len] + "..."
|
|
79
|
+
return line
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def collect_error_lines(log_files: List[str]) -> List[str]:
|
|
83
|
+
out: List[str] = []
|
|
84
|
+
for lf in log_files:
|
|
85
|
+
try:
|
|
86
|
+
with open(lf, errors="replace") as f:
|
|
87
|
+
for ln in f:
|
|
88
|
+
if _ERR_RE.search(ln):
|
|
89
|
+
out.append(ln.rstrip("\n"))
|
|
90
|
+
except OSError:
|
|
91
|
+
continue
|
|
92
|
+
return out
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def collect_api_errors(log_files: List[str]) -> List[str]:
|
|
96
|
+
out: List[str] = []
|
|
97
|
+
for lf in log_files:
|
|
98
|
+
try:
|
|
99
|
+
with open(lf, errors="replace") as f:
|
|
100
|
+
for ln in f:
|
|
101
|
+
if not _LEVEL_KEY.search(ln):
|
|
102
|
+
continue
|
|
103
|
+
if not _HTTP_ERR_RE.search(ln):
|
|
104
|
+
continue
|
|
105
|
+
if _API_EXCLUDE_SUB_RE.search(ln):
|
|
106
|
+
continue
|
|
107
|
+
if _API_EXCLUDE_TXT_RE.search(ln):
|
|
108
|
+
continue
|
|
109
|
+
out.append(ln.rstrip("\n"))
|
|
110
|
+
except OSError:
|
|
111
|
+
continue
|
|
112
|
+
return out
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def journalctl_errors() -> str:
|
|
116
|
+
try:
|
|
117
|
+
r = subprocess.run(
|
|
118
|
+
["journalctl", "--user", "-u", "openclaw-gateway",
|
|
119
|
+
"--since", "today", "--priority", "err", "--no-pager"],
|
|
120
|
+
capture_output=True, text=True, timeout=10, check=False,
|
|
121
|
+
)
|
|
122
|
+
return r.stdout
|
|
123
|
+
except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
|
|
124
|
+
return ""
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def find_recent_session(sessions_base: str):
|
|
128
|
+
if not os.path.isdir(sessions_base):
|
|
129
|
+
return None
|
|
130
|
+
best = None
|
|
131
|
+
best_mtime = -1.0
|
|
132
|
+
for f in glob.glob(os.path.join(sessions_base, "*", "**", "*.jsonl"), recursive=True):
|
|
133
|
+
try:
|
|
134
|
+
m = os.path.getmtime(f)
|
|
135
|
+
except OSError:
|
|
136
|
+
continue
|
|
137
|
+
if m > best_mtime:
|
|
138
|
+
best_mtime = m
|
|
139
|
+
best = f
|
|
140
|
+
return best
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def tool_errors_from_session(session_path: str):
|
|
144
|
+
counts = Counter()
|
|
145
|
+
try:
|
|
146
|
+
# tail-equivalent: load all but only keep last 500
|
|
147
|
+
with open(session_path, errors="replace") as f:
|
|
148
|
+
lines = f.readlines()
|
|
149
|
+
for line in lines[-500:]:
|
|
150
|
+
try:
|
|
151
|
+
obj = json.loads(line)
|
|
152
|
+
msg = obj.get("message", {}) or {}
|
|
153
|
+
if msg.get("isError"):
|
|
154
|
+
counts[msg.get("toolName", "unknown")] += 1
|
|
155
|
+
except Exception:
|
|
156
|
+
pass
|
|
157
|
+
except OSError:
|
|
158
|
+
pass
|
|
159
|
+
return counts
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def main() -> int:
|
|
163
|
+
parser = cli.build_common_parser(
|
|
164
|
+
description="模块 5:采集近期错误日志",
|
|
165
|
+
prog="05_recent_errors",
|
|
166
|
+
)
|
|
167
|
+
args = parser.parse_args()
|
|
168
|
+
|
|
169
|
+
out = output.init("recent_errors", json_mode=args.json, no_color=args.no_color)
|
|
170
|
+
out.section("模块 5:近期日志")
|
|
171
|
+
|
|
172
|
+
logs = recent_logs.discover_recent_logs(args.log_dir)
|
|
173
|
+
out.set_data("scanned_logs", [os.path.basename(p) for p in logs])
|
|
174
|
+
|
|
175
|
+
if logs:
|
|
176
|
+
out.item(f"今日有更新的日志文件 ({len(logs)} 个):")
|
|
177
|
+
for lf in logs:
|
|
178
|
+
try:
|
|
179
|
+
ts = os.path.getmtime(lf)
|
|
180
|
+
ts_str = datetime.datetime.fromtimestamp(ts).strftime("%H:%M:%S")
|
|
181
|
+
except OSError:
|
|
182
|
+
ts_str = "?"
|
|
183
|
+
out.item(f" {os.path.basename(lf)} (mtime: {ts_str})")
|
|
184
|
+
else:
|
|
185
|
+
out.item("今日无更新的日志文件")
|
|
186
|
+
|
|
187
|
+
out.line("")
|
|
188
|
+
|
|
189
|
+
if logs:
|
|
190
|
+
err_lines = collect_error_lines(logs)
|
|
191
|
+
out.set_data("app_error_count", len(err_lines))
|
|
192
|
+
if err_lines:
|
|
193
|
+
out.item(f"应用日志 ERROR 级别: {len(err_lines)} 条 — Gateway 运行时报错,包括工具失败、模型异常等")
|
|
194
|
+
rendered = []
|
|
195
|
+
for ln in err_lines[:100]:
|
|
196
|
+
r = render_log_line(ln, 300)
|
|
197
|
+
if r:
|
|
198
|
+
rendered.append(r)
|
|
199
|
+
if len(err_lines) > 100:
|
|
200
|
+
rendered.append(f"... 共 {len(err_lines)} 条")
|
|
201
|
+
out.evidence("近期日志", "\n".join(rendered))
|
|
202
|
+
else:
|
|
203
|
+
out.item("应用日志 ERROR 级别: 0 条 — Gateway 运行时报错")
|
|
204
|
+
|
|
205
|
+
api_lines = collect_api_errors(logs)
|
|
206
|
+
out.set_data("api_error_count", len(api_lines))
|
|
207
|
+
if api_lines:
|
|
208
|
+
out.item(f"模型 API HTTP 错误: {len(api_lines)} 条 ")
|
|
209
|
+
rendered = []
|
|
210
|
+
for ln in api_lines[:100]:
|
|
211
|
+
r = render_log_line(ln, 500)
|
|
212
|
+
if r:
|
|
213
|
+
rendered.append(r)
|
|
214
|
+
out.evidence("近期日志", "\n".join(rendered))
|
|
215
|
+
else:
|
|
216
|
+
out.item("应用日志未找到(今日无更新的日志文件)")
|
|
217
|
+
|
|
218
|
+
journal_out = journalctl_errors()
|
|
219
|
+
if journal_out and "No entries" not in journal_out and "no entries" not in journal_out:
|
|
220
|
+
lines = journal_out.splitlines()[:50]
|
|
221
|
+
if lines:
|
|
222
|
+
out.item("Journalctl ERROR 级别:")
|
|
223
|
+
out.evidence("journalctl --priority err", "\n".join(lines))
|
|
224
|
+
out.set_data("journalctl_errors", len(lines))
|
|
225
|
+
else:
|
|
226
|
+
out.item("Journalctl ERROR: 0 条 — 系统级进程错误")
|
|
227
|
+
out.set_data("journalctl_errors", 0)
|
|
228
|
+
|
|
229
|
+
recent_session = find_recent_session(args.sessions_base)
|
|
230
|
+
if recent_session:
|
|
231
|
+
counts = tool_errors_from_session(recent_session)
|
|
232
|
+
total = sum(counts.values())
|
|
233
|
+
out.item(f"最近 Session 的工具调用错误: {total} — 工具返回 error 的次数,过多说明某个工具持续异常")
|
|
234
|
+
out.set_data("session_tool_error_count", total)
|
|
235
|
+
if total > 0:
|
|
236
|
+
detail = "; ".join(f"{n}:{c}" for n, c in counts.most_common(10))
|
|
237
|
+
out.evidence(os.path.basename(recent_session), detail)
|
|
238
|
+
out.set_data("session_tool_errors", dict(counts))
|
|
239
|
+
else:
|
|
240
|
+
out.item("未找到 Session 文件,跳过工具调用检查")
|
|
241
|
+
|
|
242
|
+
return out.done()
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
if __name__ == "__main__":
|
|
246
|
+
sys.exit(main())
|