openclaw-diag-cli 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +260 -0
- package/bin/ocdiag +14 -0
- package/bin/openclaw-diag.js +275 -0
- package/diag/01_sys_health.py +443 -0
- package/diag/02_environment.py +292 -0
- package/diag/03_configuration.py +131 -0
- package/diag/04_gateway.py +651 -0
- package/diag/05_recent_errors.py +246 -0
- package/diag/06_cron_jobs.py +694 -0
- package/diag/07_performance.py +687 -0
- package/diag/08_sessions.py +518 -0
- package/diag/09_plugin_diag.py +535 -0
- package/diag/10_shell_history.py +121 -0
- package/diag/__init__.py +0 -0
- package/lib/bundle.py +204 -0
- package/ocdiag/__init__.py +3 -0
- package/ocdiag/cli.py +39 -0
- package/ocdiag/dispatcher.py +137 -0
- package/ocdiag/jsonlog.py +65 -0
- package/ocdiag/output.py +131 -0
- package/ocdiag/paths.py +48 -0
- package/ocdiag/recent_logs.py +53 -0
- package/ocdiag/sensitive.py +41 -0
- package/ocdiag/timeutil.py +77 -0
- package/ocdiag/tokens.py +46 -0
- package/package.json +42 -0
- package/tools/__init__.py +0 -0
- package/tools/oc_session_extract.py +254 -0
- package/tools/oc_session_trace.py +715 -0
|
@@ -0,0 +1,694 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""模块 6:定时任务(jobs.json + jobs-state.json + runs/ 三源合并)。"""
|
|
3
|
+
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
|
|
6
|
+
import datetime
|
|
7
|
+
import glob
|
|
8
|
+
import json
|
|
9
|
+
import os
|
|
10
|
+
import re
|
|
11
|
+
import subprocess
|
|
12
|
+
import sys
|
|
13
|
+
import time
|
|
14
|
+
from collections import Counter, deque
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
from typing import Optional
|
|
17
|
+
|
|
18
|
+
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
|
|
19
|
+
|
|
20
|
+
from ocdiag import cli, output, paths
|
|
21
|
+
|
|
22
|
+
try:
|
|
23
|
+
from croniter import croniter # type: ignore
|
|
24
|
+
HAS_CRONITER = True
|
|
25
|
+
except ImportError:
|
|
26
|
+
HAS_CRONITER = False
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def fmt_ts(ms):
|
|
30
|
+
if not ms:
|
|
31
|
+
return "?"
|
|
32
|
+
try:
|
|
33
|
+
return datetime.datetime.fromtimestamp(ms / 1000).strftime("%Y-%m-%d %H:%M:%S")
|
|
34
|
+
except Exception:
|
|
35
|
+
return str(ms)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def fmt_duration(ms):
|
|
39
|
+
if ms is None:
|
|
40
|
+
return "?"
|
|
41
|
+
s = ms / 1000
|
|
42
|
+
if s < 60:
|
|
43
|
+
return f"{s:.1f}s"
|
|
44
|
+
if s < 3600:
|
|
45
|
+
return f"{s/60:.1f}min"
|
|
46
|
+
return f"{s/3600:.1f}h"
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def fmt_age(ms_delta):
|
|
50
|
+
s = abs(ms_delta) / 1000
|
|
51
|
+
if s < 60:
|
|
52
|
+
return f"{s:.0f}秒"
|
|
53
|
+
if s < 3600:
|
|
54
|
+
return f"{s/60:.0f}分钟"
|
|
55
|
+
if s < 86400:
|
|
56
|
+
return f"{s/3600:.1f}小时"
|
|
57
|
+
return f"{s/86400:.1f}天"
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def percentile(sorted_list, p):
|
|
61
|
+
if not sorted_list:
|
|
62
|
+
return None
|
|
63
|
+
k = max(0, min(len(sorted_list) - 1, int(len(sorted_list) * p)))
|
|
64
|
+
return sorted_list[k]
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def format_schedule(sched):
|
|
68
|
+
k = sched.get("kind", "?")
|
|
69
|
+
if k == "cron":
|
|
70
|
+
return f"cron {sched.get('expr','?')} (tz={sched.get('tz','UTC')})"
|
|
71
|
+
if k == "every":
|
|
72
|
+
return f"every {sched.get('everyMs',0)/1000:.0f}s"
|
|
73
|
+
if k == "at":
|
|
74
|
+
return f"at {sched.get('at','?')}"
|
|
75
|
+
return str(sched)[:100]
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def expected_interval_ms(sched, runs):
|
|
79
|
+
k = sched.get("kind")
|
|
80
|
+
if k == "every":
|
|
81
|
+
return sched.get("everyMs")
|
|
82
|
+
if k == "cron" and HAS_CRONITER:
|
|
83
|
+
try:
|
|
84
|
+
base = datetime.datetime.now()
|
|
85
|
+
it = croniter(sched["expr"], base)
|
|
86
|
+
t1 = it.get_next(datetime.datetime)
|
|
87
|
+
t2 = it.get_next(datetime.datetime)
|
|
88
|
+
return int((t2 - t1).total_seconds() * 1000)
|
|
89
|
+
except Exception:
|
|
90
|
+
pass
|
|
91
|
+
if runs and len(runs) >= 3:
|
|
92
|
+
ts_list = sorted([r.get("runAtMs") or r.get("ts") for r in runs
|
|
93
|
+
if (r.get("runAtMs") or r.get("ts"))])
|
|
94
|
+
if len(ts_list) >= 3:
|
|
95
|
+
gaps = sorted(ts_list[i + 1] - ts_list[i] for i in range(len(ts_list) - 1))
|
|
96
|
+
return gaps[len(gaps) // 2]
|
|
97
|
+
return None
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def load_runs(runs_dir, jid):
|
|
101
|
+
if not jid or not runs_dir:
|
|
102
|
+
return []
|
|
103
|
+
p = os.path.join(runs_dir, f"{jid}.jsonl")
|
|
104
|
+
if not os.path.isfile(p):
|
|
105
|
+
return []
|
|
106
|
+
buf = deque(maxlen=200)
|
|
107
|
+
try:
|
|
108
|
+
with open(p) as f:
|
|
109
|
+
for line in f:
|
|
110
|
+
line = line.strip()
|
|
111
|
+
if line:
|
|
112
|
+
buf.append(line)
|
|
113
|
+
except OSError:
|
|
114
|
+
return []
|
|
115
|
+
out = []
|
|
116
|
+
for line in buf:
|
|
117
|
+
try:
|
|
118
|
+
out.append(json.loads(line))
|
|
119
|
+
except Exception:
|
|
120
|
+
pass
|
|
121
|
+
return out
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def fmt_k(n):
|
|
125
|
+
if n is None:
|
|
126
|
+
return "?"
|
|
127
|
+
if n >= 1_000_000:
|
|
128
|
+
return f"{n/1_000_000:.1f}M"
|
|
129
|
+
if n >= 1000:
|
|
130
|
+
return f"{n/1000:.1f}K"
|
|
131
|
+
return str(n)
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def extract_usage(r):
|
|
135
|
+
u = r.get("usage")
|
|
136
|
+
if not u and isinstance(r.get("result"), dict):
|
|
137
|
+
u = r["result"].get("usage")
|
|
138
|
+
if not isinstance(u, dict):
|
|
139
|
+
return None, None, None
|
|
140
|
+
inp = u.get("input") or u.get("input_tokens")
|
|
141
|
+
out = u.get("output") or u.get("output_tokens")
|
|
142
|
+
cost = None
|
|
143
|
+
c = u.get("cost")
|
|
144
|
+
if isinstance(c, dict):
|
|
145
|
+
cost = c.get("total")
|
|
146
|
+
elif isinstance(c, (int, float)):
|
|
147
|
+
cost = c
|
|
148
|
+
return inp, out, cost
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def extract_error_text(r):
|
|
152
|
+
err = r.get("error") or r.get("errorMessage")
|
|
153
|
+
if not err and isinstance(r.get("result"), dict):
|
|
154
|
+
err = r["result"].get("error")
|
|
155
|
+
if isinstance(err, dict):
|
|
156
|
+
err = err.get("message") or json.dumps(err, ensure_ascii=False)
|
|
157
|
+
if not err:
|
|
158
|
+
return ""
|
|
159
|
+
return re.sub(r"\s+", " ", str(err))[:100]
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def extract_delivery_reason(r):
|
|
163
|
+
reason = r.get("deliveryError")
|
|
164
|
+
if not reason:
|
|
165
|
+
dlv = r.get("delivery")
|
|
166
|
+
if isinstance(dlv, dict):
|
|
167
|
+
res = dlv.get("resolved")
|
|
168
|
+
if isinstance(res, dict):
|
|
169
|
+
reason = res.get("error")
|
|
170
|
+
if isinstance(reason, dict):
|
|
171
|
+
reason = reason.get("message") or json.dumps(reason, ensure_ascii=False)
|
|
172
|
+
if not reason:
|
|
173
|
+
return ""
|
|
174
|
+
return re.sub(r"\s+", " ", str(reason))[:100]
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def analyze(job, runs, now_ms):
|
|
178
|
+
state = job.get("state", {}) or {}
|
|
179
|
+
enabled = job.get("enabled", True)
|
|
180
|
+
|
|
181
|
+
finished = [r for r in runs if r.get("status") or r.get("action") == "finished"]
|
|
182
|
+
recent = finished[-20:]
|
|
183
|
+
|
|
184
|
+
ok = sum(1 for r in recent if r.get("status") == "ok")
|
|
185
|
+
fail = len(recent) - ok
|
|
186
|
+
success_rate = (ok / len(recent) * 100) if recent else None
|
|
187
|
+
|
|
188
|
+
durs = sorted([r["durationMs"] for r in recent if r.get("durationMs") is not None])
|
|
189
|
+
p50 = percentile(durs, 0.5)
|
|
190
|
+
p95 = percentile(durs, 0.95)
|
|
191
|
+
dur_max = durs[-1] if durs else None
|
|
192
|
+
last_dur = recent[-1].get("durationMs") if recent else None
|
|
193
|
+
|
|
194
|
+
deliv = Counter(r.get("deliveryStatus") for r in recent if r.get("deliveryStatus"))
|
|
195
|
+
deliv_total = sum(deliv.values())
|
|
196
|
+
deliv_ok = deliv.get("delivered", 0)
|
|
197
|
+
deliv_unrequested = deliv.get("not-requested", 0)
|
|
198
|
+
deliv_effective = deliv_total - deliv_unrequested
|
|
199
|
+
deliv_fail_rate = None
|
|
200
|
+
if deliv_effective > 0:
|
|
201
|
+
deliv_fail_rate = (deliv_effective - deliv_ok) / deliv_effective * 100
|
|
202
|
+
|
|
203
|
+
exp_interval = expected_interval_ms(job.get("schedule", {}), runs)
|
|
204
|
+
|
|
205
|
+
flags = []
|
|
206
|
+
if not enabled:
|
|
207
|
+
return dict(status="disabled", flags=flags, recent=recent, success_rate=success_rate,
|
|
208
|
+
ok=ok, fail=fail, p50=p50, p95=p95, dur_max=dur_max, last_dur=last_dur,
|
|
209
|
+
deliv_ok=deliv_ok, deliv_total=deliv_total, deliv_effective=deliv_effective,
|
|
210
|
+
deliv_fail_rate=deliv_fail_rate, exp_interval=exp_interval)
|
|
211
|
+
|
|
212
|
+
cerr = state.get("consecutiveErrors", 0) or 0
|
|
213
|
+
last_err = job.get("lastError") or state.get("lastError") or ""
|
|
214
|
+
if cerr >= 3:
|
|
215
|
+
le = re.sub(r"\s+", " ", str(last_err))[:100] if last_err else ""
|
|
216
|
+
flags.append(("error", f"连续失败 {cerr} 次" + (f"(最近错误: {le})" if le else "")))
|
|
217
|
+
elif cerr >= 1:
|
|
218
|
+
flags.append(("note", f"最近失败 {cerr} 次"))
|
|
219
|
+
|
|
220
|
+
if success_rate is not None and len(recent) >= 5 and success_rate < 80:
|
|
221
|
+
flags.append(("error", f"成功率 {success_rate:.0f}%(最近 {len(recent)} 次)"))
|
|
222
|
+
|
|
223
|
+
next_run = state.get("nextRunAtMs")
|
|
224
|
+
if next_run and exp_interval:
|
|
225
|
+
drift = now_ms - next_run
|
|
226
|
+
if drift > 2 * exp_interval:
|
|
227
|
+
flags.append(("error", f"调度卡住:nextRun 已过期 {fmt_age(drift)}(预期间隔 {fmt_age(exp_interval)})"))
|
|
228
|
+
|
|
229
|
+
if last_dur and p95 and len(durs) >= 5 and last_dur > p95 * 2:
|
|
230
|
+
flags.append(("note", f"最近耗时 {fmt_duration(last_dur)} 超历史 P95 ({fmt_duration(p95)}) 两倍"))
|
|
231
|
+
|
|
232
|
+
if deliv_effective >= 5 and deliv_fail_rate is not None and deliv_fail_rate > 20:
|
|
233
|
+
flags.append(("error", f"投递失败率 {deliv_fail_rate:.0f}%({deliv_effective - deliv_ok}/{deliv_effective})"))
|
|
234
|
+
|
|
235
|
+
created = job.get("createdAtMs", 0) or 0
|
|
236
|
+
age_since_create = now_ms - created if created else 0
|
|
237
|
+
last_run = state.get("lastRunAtMs")
|
|
238
|
+
is_silent = False
|
|
239
|
+
if age_since_create > 3600 * 1000:
|
|
240
|
+
if not last_run and not runs:
|
|
241
|
+
flags.append(("silent", "任务已创建但从未执行"))
|
|
242
|
+
is_silent = True
|
|
243
|
+
elif exp_interval and last_run:
|
|
244
|
+
idle = now_ms - last_run
|
|
245
|
+
if idle > 2 * exp_interval:
|
|
246
|
+
flags.append(("silent", f"已 {fmt_age(idle)} 未执行(预期间隔 {fmt_age(exp_interval)})"))
|
|
247
|
+
is_silent = True
|
|
248
|
+
|
|
249
|
+
if is_silent:
|
|
250
|
+
status = "silent"
|
|
251
|
+
elif any(f[0] == "error" for f in flags):
|
|
252
|
+
status = "warn"
|
|
253
|
+
else:
|
|
254
|
+
status = "ok"
|
|
255
|
+
|
|
256
|
+
return dict(status=status, flags=flags, recent=recent, success_rate=success_rate,
|
|
257
|
+
ok=ok, fail=fail, p50=p50, p95=p95, dur_max=dur_max, last_dur=last_dur,
|
|
258
|
+
deliv_ok=deliv_ok, deliv_total=deliv_total, deliv_effective=deliv_effective,
|
|
259
|
+
deliv_fail_rate=deliv_fail_rate, exp_interval=exp_interval)
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
def section_jobs(out: output.Output, jobs_file: str, state_file: str, runs_dir: str) -> None:
|
|
263
|
+
out.item("【OpenClaw 定时任务】— jobs.json + jobs-state.json + runs/")
|
|
264
|
+
if not os.path.isfile(jobs_file):
|
|
265
|
+
out.item(" jobs.json 不存在 — 未创建过定时任务")
|
|
266
|
+
return
|
|
267
|
+
|
|
268
|
+
try:
|
|
269
|
+
with open(jobs_file) as f:
|
|
270
|
+
data = json.load(f)
|
|
271
|
+
except Exception as e:
|
|
272
|
+
out.item(f" jobs.json 解析失败: {e}")
|
|
273
|
+
return
|
|
274
|
+
|
|
275
|
+
if isinstance(data, dict):
|
|
276
|
+
jobs = data.get("jobs", [])
|
|
277
|
+
if isinstance(jobs, dict):
|
|
278
|
+
jobs = list(jobs.values())
|
|
279
|
+
elif not isinstance(jobs, list):
|
|
280
|
+
jobs = []
|
|
281
|
+
elif isinstance(data, list):
|
|
282
|
+
jobs = data
|
|
283
|
+
else:
|
|
284
|
+
jobs = []
|
|
285
|
+
|
|
286
|
+
if not jobs:
|
|
287
|
+
out.item(" jobs.json 存在但无任务")
|
|
288
|
+
return
|
|
289
|
+
|
|
290
|
+
ext_state = {}
|
|
291
|
+
if state_file and os.path.isfile(state_file):
|
|
292
|
+
try:
|
|
293
|
+
with open(state_file) as f:
|
|
294
|
+
sd = json.load(f)
|
|
295
|
+
ext_jobs = sd.get("jobs", {}) if isinstance(sd, dict) else {}
|
|
296
|
+
for jid, entry in ext_jobs.items():
|
|
297
|
+
if isinstance(entry, dict):
|
|
298
|
+
ext_state[jid] = entry.get("state", {}) or {}
|
|
299
|
+
except Exception:
|
|
300
|
+
pass
|
|
301
|
+
|
|
302
|
+
for j in jobs:
|
|
303
|
+
jid = j.get("id")
|
|
304
|
+
if jid and not j.get("state") and jid in ext_state:
|
|
305
|
+
j["state"] = ext_state[jid]
|
|
306
|
+
|
|
307
|
+
now_ms = int(time.time() * 1000)
|
|
308
|
+
analyses = []
|
|
309
|
+
for j in jobs:
|
|
310
|
+
runs = load_runs(runs_dir, j.get("id"))
|
|
311
|
+
analyses.append((j, runs, analyze(j, runs, now_ms)))
|
|
312
|
+
|
|
313
|
+
total = len(jobs)
|
|
314
|
+
enabled_count = sum(1 for j in jobs if j.get("enabled", True))
|
|
315
|
+
disabled_count = total - enabled_count
|
|
316
|
+
out.item(f" 共 {total} 个任务({enabled_count} 启用, {disabled_count} 禁用)")
|
|
317
|
+
out.line("")
|
|
318
|
+
|
|
319
|
+
out.set_data("total_jobs", total)
|
|
320
|
+
out.set_data("enabled_count", enabled_count)
|
|
321
|
+
out.set_data("disabled_count", disabled_count)
|
|
322
|
+
|
|
323
|
+
ok_list = [a for a in analyses if a[2]["status"] == "ok"]
|
|
324
|
+
warn_list = [a for a in analyses if a[2]["status"] == "warn"]
|
|
325
|
+
silent_list = [a for a in analyses if a[2]["status"] == "silent"]
|
|
326
|
+
disabled_list = [a for a in analyses if a[2]["status"] == "disabled"]
|
|
327
|
+
|
|
328
|
+
def _job_name(j):
|
|
329
|
+
return j.get("name") or j.get("id", "?")
|
|
330
|
+
|
|
331
|
+
out.set_data("status_overview", {
|
|
332
|
+
"ok": [_job_name(j) for j, _, _ in ok_list],
|
|
333
|
+
"warn": [_job_name(j) for j, _, _ in warn_list],
|
|
334
|
+
"silent": [_job_name(j) for j, _, _ in silent_list],
|
|
335
|
+
"disabled": [_job_name(j) for j, _, _ in disabled_list],
|
|
336
|
+
})
|
|
337
|
+
|
|
338
|
+
jobs_payload = []
|
|
339
|
+
for j, runs, a in analyses:
|
|
340
|
+
state = j.get("state", {}) or {}
|
|
341
|
+
jobs_payload.append({
|
|
342
|
+
"id": j.get("id"),
|
|
343
|
+
"name": j.get("name") or j.get("id"),
|
|
344
|
+
"status": a["status"],
|
|
345
|
+
"schedule": j.get("schedule", {}),
|
|
346
|
+
"success_rate": a["success_rate"],
|
|
347
|
+
"p50_ms": a["p50"],
|
|
348
|
+
"p95_ms": a["p95"],
|
|
349
|
+
"last_run_ts": state.get("lastRunAtMs"),
|
|
350
|
+
"next_run_ts": state.get("nextRunAtMs"),
|
|
351
|
+
"consecutive_errors": state.get("consecutiveErrors", 0) or 0,
|
|
352
|
+
"flags": [{"kind": k, "msg": m} for k, m in a["flags"]],
|
|
353
|
+
})
|
|
354
|
+
out.set_data("jobs", jobs_payload)
|
|
355
|
+
|
|
356
|
+
out.item(" ── 状态概览 ──")
|
|
357
|
+
if ok_list:
|
|
358
|
+
out.item(f" 正常: {len(ok_list)} 个任务")
|
|
359
|
+
if warn_list:
|
|
360
|
+
out.item(f" 异常: {len(warn_list)} 个任务")
|
|
361
|
+
for j, _, a in warn_list:
|
|
362
|
+
nm = j.get("name") or j.get("id", "?")
|
|
363
|
+
msg = next((f[1] for f in a["flags"] if f[0] == "error"), "")
|
|
364
|
+
out.item(f" · {nm}: {msg}")
|
|
365
|
+
if silent_list:
|
|
366
|
+
out.item(f" 静默: {len(silent_list)} 个任务超期未执行")
|
|
367
|
+
for j, _, a in silent_list:
|
|
368
|
+
nm = j.get("name") or j.get("id", "?")
|
|
369
|
+
msg = next((f[1] for f in a["flags"] if f[0] == "silent"), "")
|
|
370
|
+
out.item(f" · {nm}: {msg}")
|
|
371
|
+
if disabled_list:
|
|
372
|
+
out.item(f" 禁用: {len(disabled_list)} 个任务(不纳入调度)")
|
|
373
|
+
if not (ok_list or warn_list or silent_list or disabled_list):
|
|
374
|
+
out.item(" (无任务)")
|
|
375
|
+
|
|
376
|
+
out.line("")
|
|
377
|
+
out.item(" ── 任务详情 ──")
|
|
378
|
+
out.line("")
|
|
379
|
+
|
|
380
|
+
for idx, (j, runs, a) in enumerate(analyses, 1):
|
|
381
|
+
status = a["status"]
|
|
382
|
+
nm = j.get("name") or j.get("id", "?")
|
|
383
|
+
icon_label = {
|
|
384
|
+
"ok": "正常", "warn": "异常", "silent": "静默", "disabled": "禁用",
|
|
385
|
+
}.get(status, "?")
|
|
386
|
+
out.item(f" [{idx}] {nm} ({icon_label})")
|
|
387
|
+
|
|
388
|
+
if status == "disabled":
|
|
389
|
+
out.item(f" 调度: {format_schedule(j.get('schedule', {}))} | ID: {j.get('id', '?')}")
|
|
390
|
+
out.line("")
|
|
391
|
+
continue
|
|
392
|
+
|
|
393
|
+
out.item(f" 调度: {format_schedule(j.get('schedule', {}))}")
|
|
394
|
+
state = j.get("state", {}) or {}
|
|
395
|
+
last_run = state.get("lastRunAtMs")
|
|
396
|
+
if last_run:
|
|
397
|
+
ls = state.get("lastStatus") or state.get("lastRunStatus") or "?"
|
|
398
|
+
ld = state.get("lastDurationMs")
|
|
399
|
+
line = f" 上次执行: {fmt_ts(last_run)} | {ls}"
|
|
400
|
+
if ld is not None:
|
|
401
|
+
line += f" | {fmt_duration(ld)}"
|
|
402
|
+
out.item(line)
|
|
403
|
+
else:
|
|
404
|
+
out.item(" 上次执行: 从未执行")
|
|
405
|
+
|
|
406
|
+
nr = state.get("nextRunAtMs")
|
|
407
|
+
if nr:
|
|
408
|
+
delta = nr - now_ms
|
|
409
|
+
if delta >= 0:
|
|
410
|
+
out.item(f" 下次执行: {fmt_ts(nr)} (在 {fmt_age(delta)}后)")
|
|
411
|
+
else:
|
|
412
|
+
out.item(f" 下次执行: {fmt_ts(nr)} (已过期 {fmt_age(delta)})")
|
|
413
|
+
|
|
414
|
+
if a["success_rate"] is not None:
|
|
415
|
+
n = a["ok"] + a["fail"]
|
|
416
|
+
out.item(f" 成功率: {a['success_rate']:.0f}% (最近 {n} 次: ok={a['ok']} fail={a['fail']})")
|
|
417
|
+
|
|
418
|
+
if a["p50"] is not None:
|
|
419
|
+
parts = [f"P50={fmt_duration(a['p50'])}"]
|
|
420
|
+
if a["p95"] is not None and a["p95"] != a["p50"]:
|
|
421
|
+
parts.append(f"P95={fmt_duration(a['p95'])}")
|
|
422
|
+
if a["dur_max"] is not None and a["dur_max"] != a["p50"]:
|
|
423
|
+
parts.append(f"Max={fmt_duration(a['dur_max'])}")
|
|
424
|
+
out.item(" 耗时: " + " ".join(parts))
|
|
425
|
+
|
|
426
|
+
payload = j.get("payload") or {}
|
|
427
|
+
session_target = j.get("sessionTarget")
|
|
428
|
+
delivery = j.get("delivery") or {}
|
|
429
|
+
payload_lines = []
|
|
430
|
+
if isinstance(payload, dict) and payload:
|
|
431
|
+
for pk, pv in payload.items():
|
|
432
|
+
if pv is None or pv == "":
|
|
433
|
+
continue
|
|
434
|
+
sv = str(pv)
|
|
435
|
+
if len(sv) > 80:
|
|
436
|
+
sv = sv[:77] + "..."
|
|
437
|
+
payload_lines.append(f"{pk}={sv}")
|
|
438
|
+
if session_target:
|
|
439
|
+
payload_lines.append(f"sessionTarget={session_target}")
|
|
440
|
+
if isinstance(delivery, dict) and delivery:
|
|
441
|
+
del_parts = [f"{dk}={dv}" for dk, dv in delivery.items()
|
|
442
|
+
if dv is not None and dv != ""]
|
|
443
|
+
if del_parts:
|
|
444
|
+
payload_lines.append(f"delivery={{ {', '.join(del_parts)} }}")
|
|
445
|
+
if payload_lines:
|
|
446
|
+
out.item(" payload: " + " | ".join(payload_lines))
|
|
447
|
+
|
|
448
|
+
recent = a["recent"]
|
|
449
|
+
input_sum = 0
|
|
450
|
+
output_sum = 0
|
|
451
|
+
cost_sum = 0.0
|
|
452
|
+
has_usage = False
|
|
453
|
+
has_cost = False
|
|
454
|
+
for r in recent:
|
|
455
|
+
inp, outp, cost = extract_usage(r)
|
|
456
|
+
if inp is not None:
|
|
457
|
+
input_sum += inp
|
|
458
|
+
has_usage = True
|
|
459
|
+
if outp is not None:
|
|
460
|
+
output_sum += outp
|
|
461
|
+
has_usage = True
|
|
462
|
+
if cost is not None:
|
|
463
|
+
cost_sum += cost
|
|
464
|
+
has_cost = True
|
|
465
|
+
if has_usage:
|
|
466
|
+
line = f" tokens(最近{len(recent)}次): in={fmt_k(input_sum)} out={fmt_k(output_sum)}"
|
|
467
|
+
if has_cost:
|
|
468
|
+
line += f" | cost=${cost_sum:.4f}"
|
|
469
|
+
out.item(line)
|
|
470
|
+
|
|
471
|
+
if status != "ok":
|
|
472
|
+
cerr = state.get("consecutiveErrors", 0) or 0
|
|
473
|
+
if cerr > 0:
|
|
474
|
+
out.item(f" 连续失败: {cerr} 次")
|
|
475
|
+
|
|
476
|
+
fail_runs = [r for r in recent if r.get("status") and r.get("status") != "ok"]
|
|
477
|
+
if fail_runs:
|
|
478
|
+
seen_errs = set()
|
|
479
|
+
samples = []
|
|
480
|
+
for r in reversed(fail_runs):
|
|
481
|
+
err = extract_error_text(r) or "(无错误详情)"
|
|
482
|
+
if err in seen_errs:
|
|
483
|
+
continue
|
|
484
|
+
seen_errs.add(err)
|
|
485
|
+
samples.append((r.get("ts") or r.get("runAtMs"), err))
|
|
486
|
+
if len(samples) >= 3:
|
|
487
|
+
break
|
|
488
|
+
if samples:
|
|
489
|
+
out.item(f" 最近失败({len(samples)}):")
|
|
490
|
+
for ts, err in samples:
|
|
491
|
+
out.item(f" {fmt_ts(ts)} | {err}")
|
|
492
|
+
|
|
493
|
+
delivery_cfg = j.get("delivery")
|
|
494
|
+
if isinstance(delivery_cfg, dict) and delivery_cfg:
|
|
495
|
+
deliv_meta_parts = []
|
|
496
|
+
if delivery_cfg.get("mode"):
|
|
497
|
+
deliv_meta_parts.append(f"模式={delivery_cfg['mode']}")
|
|
498
|
+
if delivery_cfg.get("channel"):
|
|
499
|
+
deliv_meta_parts.append(f"channel={delivery_cfg['channel']}")
|
|
500
|
+
deliv_fails = []
|
|
501
|
+
seen_reasons = set()
|
|
502
|
+
for r in reversed(recent):
|
|
503
|
+
ds = r.get("deliveryStatus")
|
|
504
|
+
if ds in (None, "", "not-requested", "delivered"):
|
|
505
|
+
continue
|
|
506
|
+
reason = extract_delivery_reason(r) or "(未知原因)"
|
|
507
|
+
key = (ds, reason)
|
|
508
|
+
if key in seen_reasons:
|
|
509
|
+
continue
|
|
510
|
+
seen_reasons.add(key)
|
|
511
|
+
deliv_fails.append((r.get("ts") or r.get("runAtMs"), ds, reason))
|
|
512
|
+
if len(deliv_fails) >= 3:
|
|
513
|
+
break
|
|
514
|
+
if deliv_meta_parts and deliv_fails:
|
|
515
|
+
out.item(" 投递: " + " ".join(deliv_meta_parts))
|
|
516
|
+
out.item(" 投递失败样本:")
|
|
517
|
+
for ts, ds, reason in deliv_fails:
|
|
518
|
+
out.item(f" {fmt_ts(ts)} | status={ds} | reason={reason}")
|
|
519
|
+
|
|
520
|
+
finished_all = [r for r in runs if r.get("status") or r.get("action") == "finished"]
|
|
521
|
+
if finished_all:
|
|
522
|
+
today = datetime.datetime.now().date()
|
|
523
|
+
buckets = {}
|
|
524
|
+
for r in finished_all:
|
|
525
|
+
t = r.get("runAtMs") or r.get("ts")
|
|
526
|
+
if not t:
|
|
527
|
+
continue
|
|
528
|
+
try:
|
|
529
|
+
d = datetime.datetime.fromtimestamp(t / 1000).date()
|
|
530
|
+
except Exception:
|
|
531
|
+
continue
|
|
532
|
+
delta = (today - d).days
|
|
533
|
+
if 0 <= delta < 7:
|
|
534
|
+
b = buckets.setdefault(d, [0, 0])
|
|
535
|
+
b[0] += 1
|
|
536
|
+
if r.get("status") == "ok":
|
|
537
|
+
b[1] += 1
|
|
538
|
+
if buckets:
|
|
539
|
+
days_sorted = sorted(buckets.keys(), reverse=True)
|
|
540
|
+
parts = []
|
|
541
|
+
for d in days_sorted:
|
|
542
|
+
total_d, ok_d = buckets[d][0], buckets[d][1]
|
|
543
|
+
rate = (ok_d / total_d * 100) if total_d else 0
|
|
544
|
+
parts.append(f"{d.strftime('%m-%d')}: {total_d}次 {rate:.0f}%")
|
|
545
|
+
out.item(" 最近7天: " + " | ".join(parts))
|
|
546
|
+
|
|
547
|
+
out.line("")
|
|
548
|
+
|
|
549
|
+
|
|
550
|
+
def section_heartbeat(out: output.Output, args) -> None:
|
|
551
|
+
out.line("")
|
|
552
|
+
out.item("【OpenClaw Heartbeat】— Agent 定期唤醒机制,用于执行 HEARTBEAT.md 中的周期性任务")
|
|
553
|
+
|
|
554
|
+
hb_every = "未配置"
|
|
555
|
+
if os.path.isfile(args.config):
|
|
556
|
+
try:
|
|
557
|
+
with open(args.config) as f:
|
|
558
|
+
cfg = json.load(f)
|
|
559
|
+
hb = cfg.get("agents", {}).get("defaults", {}).get("heartbeat", {})
|
|
560
|
+
hb_every = hb.get("every", "未配置")
|
|
561
|
+
except Exception:
|
|
562
|
+
hb_every = "读取失败"
|
|
563
|
+
out.item(f" 配置: agents.defaults.heartbeat.every = {hb_every}")
|
|
564
|
+
hb_data = {"config_every": hb_every, "agents": {}}
|
|
565
|
+
|
|
566
|
+
sessions_base = args.sessions_base
|
|
567
|
+
if os.path.isdir(sessions_base):
|
|
568
|
+
try:
|
|
569
|
+
cfg = json.load(open(args.config)) if os.path.isfile(args.config) else {}
|
|
570
|
+
except Exception:
|
|
571
|
+
cfg = {}
|
|
572
|
+
agent_workspaces = {}
|
|
573
|
+
for a in cfg.get("agents", {}).get("list", []) or []:
|
|
574
|
+
if isinstance(a, dict) and a.get("id"):
|
|
575
|
+
agent_workspaces[a["id"]] = a.get("workspace", "")
|
|
576
|
+
for agent_dir in sorted(glob.glob(os.path.join(sessions_base, "*"))):
|
|
577
|
+
agent_id = os.path.basename(agent_dir)
|
|
578
|
+
ws_dir = agent_workspaces.get(agent_id, "")
|
|
579
|
+
hb_file = os.path.join(ws_dir, "HEARTBEAT.md") if ws_dir else ""
|
|
580
|
+
if ws_dir and os.path.isfile(hb_file):
|
|
581
|
+
try:
|
|
582
|
+
with open(hb_file) as f:
|
|
583
|
+
content = f.read()
|
|
584
|
+
lines = [ln for ln in content.splitlines()
|
|
585
|
+
if ln.strip() and not ln.startswith(("#", "```", "<!--"))][:5]
|
|
586
|
+
except OSError:
|
|
587
|
+
lines = []
|
|
588
|
+
if not lines:
|
|
589
|
+
out.item(f" {agent_id}: HEARTBEAT.md 存在但为空(不会触发 heartbeat)")
|
|
590
|
+
hb_data["agents"][agent_id] = {"heartbeat_md": "empty"}
|
|
591
|
+
else:
|
|
592
|
+
out.item(f" {agent_id}: HEARTBEAT.md 有内容(会触发 heartbeat)")
|
|
593
|
+
out.evidence(hb_file, "\n".join(lines))
|
|
594
|
+
hb_data["agents"][agent_id] = {"heartbeat_md": "active"}
|
|
595
|
+
else:
|
|
596
|
+
out.item(f" {agent_id}: HEARTBEAT.md 不存在")
|
|
597
|
+
hb_data["agents"][agent_id] = {"heartbeat_md": "missing"}
|
|
598
|
+
|
|
599
|
+
log_pattern = os.path.join(args.log_dir, "openclaw-*.log")
|
|
600
|
+
interesting = []
|
|
601
|
+
started = []
|
|
602
|
+
for lf in sorted(glob.glob(log_pattern)):
|
|
603
|
+
try:
|
|
604
|
+
with open(lf, errors="replace") as f:
|
|
605
|
+
for raw in f:
|
|
606
|
+
if "gateway/heartbeat" not in raw:
|
|
607
|
+
continue
|
|
608
|
+
try:
|
|
609
|
+
d = json.loads(raw)
|
|
610
|
+
except Exception:
|
|
611
|
+
continue
|
|
612
|
+
ts = d.get("time", "?")[:19]
|
|
613
|
+
interval = ""
|
|
614
|
+
if isinstance(d.get("1"), dict):
|
|
615
|
+
ms = d["1"].get("intervalMs", 0)
|
|
616
|
+
interval = f"interval={ms/1000:.0f}s"
|
|
617
|
+
msg = str(d.get("2", ""))
|
|
618
|
+
else:
|
|
619
|
+
msg = str(d.get("1", ""))
|
|
620
|
+
if isinstance(d.get("2"), str):
|
|
621
|
+
msg += " | " + d["2"]
|
|
622
|
+
level = d.get("_meta", {}).get("logLevelName", "")
|
|
623
|
+
line = f"{ts} | {level} | {msg} {interval}".strip()
|
|
624
|
+
if "started" in msg:
|
|
625
|
+
started.append((ts, level, msg, interval))
|
|
626
|
+
else:
|
|
627
|
+
interesting.append(line)
|
|
628
|
+
except OSError:
|
|
629
|
+
continue
|
|
630
|
+
if interesting:
|
|
631
|
+
out.item(f" heartbeat 有效事件 {len(interesting)} 条(另有 {len(started)} 条启动记录)")
|
|
632
|
+
out.evidence("应用日志 (heartbeat)", "\n".join(interesting[:50]))
|
|
633
|
+
hb_data["events"] = len(interesting)
|
|
634
|
+
hb_data["started_count"] = len(started)
|
|
635
|
+
elif started:
|
|
636
|
+
intervals = sorted({s[3] for s in started if s[3]})
|
|
637
|
+
out.item(f" heartbeat 调度器: {len(started)} 次启动记录,间隔 {'、'.join(intervals)}")
|
|
638
|
+
hb_data["events"] = 0
|
|
639
|
+
hb_data["started_count"] = len(started)
|
|
640
|
+
hb_data["intervals"] = list(intervals)
|
|
641
|
+
else:
|
|
642
|
+
out.item(" heartbeat 日志: 0 条 — 未发现 heartbeat 相关记录")
|
|
643
|
+
hb_data["events"] = 0
|
|
644
|
+
hb_data["started_count"] = 0
|
|
645
|
+
|
|
646
|
+
out.set_data("heartbeat", hb_data)
|
|
647
|
+
|
|
648
|
+
|
|
649
|
+
def section_system_crontab(out: output.Output) -> None:
|
|
650
|
+
out.line("")
|
|
651
|
+
out.item("【系统 crontab】")
|
|
652
|
+
try:
|
|
653
|
+
r = subprocess.run(["crontab", "-l"], capture_output=True, text=True,
|
|
654
|
+
timeout=5, check=False)
|
|
655
|
+
text = r.stdout if r.returncode == 0 else r.stderr
|
|
656
|
+
except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
|
|
657
|
+
text = ""
|
|
658
|
+
if not text or "no crontab" in text.lower():
|
|
659
|
+
out.item(" 无(未配置系统定时任务)")
|
|
660
|
+
out.set_data("system_crontab", [])
|
|
661
|
+
return
|
|
662
|
+
entries = [ln for ln in text.splitlines() if ln.strip() and not ln.startswith("#")]
|
|
663
|
+
if entries:
|
|
664
|
+
out.item(f" 共 {len(entries)} 条")
|
|
665
|
+
out.evidence("crontab -l", "\n".join(entries))
|
|
666
|
+
else:
|
|
667
|
+
out.item(" 无有效条目(仅注释)")
|
|
668
|
+
out.set_data("system_crontab", entries)
|
|
669
|
+
|
|
670
|
+
|
|
671
|
+
def main() -> int:
|
|
672
|
+
parser = cli.build_common_parser(
|
|
673
|
+
description="模块 6:定时任务采集",
|
|
674
|
+
prog="06_cron_jobs",
|
|
675
|
+
)
|
|
676
|
+
args = parser.parse_args()
|
|
677
|
+
|
|
678
|
+
out = output.init("cron_jobs", json_mode=args.json, no_color=args.no_color)
|
|
679
|
+
out.section("模块 6:定时任务")
|
|
680
|
+
|
|
681
|
+
home = args.openclaw_home
|
|
682
|
+
jobs_file = os.path.join(home, "cron", "jobs.json")
|
|
683
|
+
state_file = os.path.join(home, "cron", "jobs-state.json")
|
|
684
|
+
runs_dir = os.path.join(home, "cron", "runs")
|
|
685
|
+
|
|
686
|
+
section_jobs(out, jobs_file, state_file, runs_dir)
|
|
687
|
+
section_heartbeat(out, args)
|
|
688
|
+
section_system_crontab(out)
|
|
689
|
+
|
|
690
|
+
return out.done()
|
|
691
|
+
|
|
692
|
+
|
|
693
|
+
if __name__ == "__main__":
|
|
694
|
+
sys.exit(main())
|