openclaw-diag-cli 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,694 @@
1
+ #!/usr/bin/env python3
2
+ """模块 6:定时任务(jobs.json + jobs-state.json + runs/ 三源合并)。"""
3
+
4
+ from __future__ import annotations
5
+
6
+ import datetime
7
+ import glob
8
+ import json
9
+ import os
10
+ import re
11
+ import subprocess
12
+ import sys
13
+ import time
14
+ from collections import Counter, deque
15
+ from pathlib import Path
16
+ from typing import Optional
17
+
18
+ sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
19
+
20
+ from ocdiag import cli, output, paths
21
+
22
+ try:
23
+ from croniter import croniter # type: ignore
24
+ HAS_CRONITER = True
25
+ except ImportError:
26
+ HAS_CRONITER = False
27
+
28
+
29
+ def fmt_ts(ms):
30
+ if not ms:
31
+ return "?"
32
+ try:
33
+ return datetime.datetime.fromtimestamp(ms / 1000).strftime("%Y-%m-%d %H:%M:%S")
34
+ except Exception:
35
+ return str(ms)
36
+
37
+
38
+ def fmt_duration(ms):
39
+ if ms is None:
40
+ return "?"
41
+ s = ms / 1000
42
+ if s < 60:
43
+ return f"{s:.1f}s"
44
+ if s < 3600:
45
+ return f"{s/60:.1f}min"
46
+ return f"{s/3600:.1f}h"
47
+
48
+
49
+ def fmt_age(ms_delta):
50
+ s = abs(ms_delta) / 1000
51
+ if s < 60:
52
+ return f"{s:.0f}秒"
53
+ if s < 3600:
54
+ return f"{s/60:.0f}分钟"
55
+ if s < 86400:
56
+ return f"{s/3600:.1f}小时"
57
+ return f"{s/86400:.1f}天"
58
+
59
+
60
+ def percentile(sorted_list, p):
61
+ if not sorted_list:
62
+ return None
63
+ k = max(0, min(len(sorted_list) - 1, int(len(sorted_list) * p)))
64
+ return sorted_list[k]
65
+
66
+
67
+ def format_schedule(sched):
68
+ k = sched.get("kind", "?")
69
+ if k == "cron":
70
+ return f"cron {sched.get('expr','?')} (tz={sched.get('tz','UTC')})"
71
+ if k == "every":
72
+ return f"every {sched.get('everyMs',0)/1000:.0f}s"
73
+ if k == "at":
74
+ return f"at {sched.get('at','?')}"
75
+ return str(sched)[:100]
76
+
77
+
78
+ def expected_interval_ms(sched, runs):
79
+ k = sched.get("kind")
80
+ if k == "every":
81
+ return sched.get("everyMs")
82
+ if k == "cron" and HAS_CRONITER:
83
+ try:
84
+ base = datetime.datetime.now()
85
+ it = croniter(sched["expr"], base)
86
+ t1 = it.get_next(datetime.datetime)
87
+ t2 = it.get_next(datetime.datetime)
88
+ return int((t2 - t1).total_seconds() * 1000)
89
+ except Exception:
90
+ pass
91
+ if runs and len(runs) >= 3:
92
+ ts_list = sorted([r.get("runAtMs") or r.get("ts") for r in runs
93
+ if (r.get("runAtMs") or r.get("ts"))])
94
+ if len(ts_list) >= 3:
95
+ gaps = sorted(ts_list[i + 1] - ts_list[i] for i in range(len(ts_list) - 1))
96
+ return gaps[len(gaps) // 2]
97
+ return None
98
+
99
+
100
+ def load_runs(runs_dir, jid):
101
+ if not jid or not runs_dir:
102
+ return []
103
+ p = os.path.join(runs_dir, f"{jid}.jsonl")
104
+ if not os.path.isfile(p):
105
+ return []
106
+ buf = deque(maxlen=200)
107
+ try:
108
+ with open(p) as f:
109
+ for line in f:
110
+ line = line.strip()
111
+ if line:
112
+ buf.append(line)
113
+ except OSError:
114
+ return []
115
+ out = []
116
+ for line in buf:
117
+ try:
118
+ out.append(json.loads(line))
119
+ except Exception:
120
+ pass
121
+ return out
122
+
123
+
124
+ def fmt_k(n):
125
+ if n is None:
126
+ return "?"
127
+ if n >= 1_000_000:
128
+ return f"{n/1_000_000:.1f}M"
129
+ if n >= 1000:
130
+ return f"{n/1000:.1f}K"
131
+ return str(n)
132
+
133
+
134
+ def extract_usage(r):
135
+ u = r.get("usage")
136
+ if not u and isinstance(r.get("result"), dict):
137
+ u = r["result"].get("usage")
138
+ if not isinstance(u, dict):
139
+ return None, None, None
140
+ inp = u.get("input") or u.get("input_tokens")
141
+ out = u.get("output") or u.get("output_tokens")
142
+ cost = None
143
+ c = u.get("cost")
144
+ if isinstance(c, dict):
145
+ cost = c.get("total")
146
+ elif isinstance(c, (int, float)):
147
+ cost = c
148
+ return inp, out, cost
149
+
150
+
151
+ def extract_error_text(r):
152
+ err = r.get("error") or r.get("errorMessage")
153
+ if not err and isinstance(r.get("result"), dict):
154
+ err = r["result"].get("error")
155
+ if isinstance(err, dict):
156
+ err = err.get("message") or json.dumps(err, ensure_ascii=False)
157
+ if not err:
158
+ return ""
159
+ return re.sub(r"\s+", " ", str(err))[:100]
160
+
161
+
162
+ def extract_delivery_reason(r):
163
+ reason = r.get("deliveryError")
164
+ if not reason:
165
+ dlv = r.get("delivery")
166
+ if isinstance(dlv, dict):
167
+ res = dlv.get("resolved")
168
+ if isinstance(res, dict):
169
+ reason = res.get("error")
170
+ if isinstance(reason, dict):
171
+ reason = reason.get("message") or json.dumps(reason, ensure_ascii=False)
172
+ if not reason:
173
+ return ""
174
+ return re.sub(r"\s+", " ", str(reason))[:100]
175
+
176
+
177
+ def analyze(job, runs, now_ms):
178
+ state = job.get("state", {}) or {}
179
+ enabled = job.get("enabled", True)
180
+
181
+ finished = [r for r in runs if r.get("status") or r.get("action") == "finished"]
182
+ recent = finished[-20:]
183
+
184
+ ok = sum(1 for r in recent if r.get("status") == "ok")
185
+ fail = len(recent) - ok
186
+ success_rate = (ok / len(recent) * 100) if recent else None
187
+
188
+ durs = sorted([r["durationMs"] for r in recent if r.get("durationMs") is not None])
189
+ p50 = percentile(durs, 0.5)
190
+ p95 = percentile(durs, 0.95)
191
+ dur_max = durs[-1] if durs else None
192
+ last_dur = recent[-1].get("durationMs") if recent else None
193
+
194
+ deliv = Counter(r.get("deliveryStatus") for r in recent if r.get("deliveryStatus"))
195
+ deliv_total = sum(deliv.values())
196
+ deliv_ok = deliv.get("delivered", 0)
197
+ deliv_unrequested = deliv.get("not-requested", 0)
198
+ deliv_effective = deliv_total - deliv_unrequested
199
+ deliv_fail_rate = None
200
+ if deliv_effective > 0:
201
+ deliv_fail_rate = (deliv_effective - deliv_ok) / deliv_effective * 100
202
+
203
+ exp_interval = expected_interval_ms(job.get("schedule", {}), runs)
204
+
205
+ flags = []
206
+ if not enabled:
207
+ return dict(status="disabled", flags=flags, recent=recent, success_rate=success_rate,
208
+ ok=ok, fail=fail, p50=p50, p95=p95, dur_max=dur_max, last_dur=last_dur,
209
+ deliv_ok=deliv_ok, deliv_total=deliv_total, deliv_effective=deliv_effective,
210
+ deliv_fail_rate=deliv_fail_rate, exp_interval=exp_interval)
211
+
212
+ cerr = state.get("consecutiveErrors", 0) or 0
213
+ last_err = job.get("lastError") or state.get("lastError") or ""
214
+ if cerr >= 3:
215
+ le = re.sub(r"\s+", " ", str(last_err))[:100] if last_err else ""
216
+ flags.append(("error", f"连续失败 {cerr} 次" + (f"(最近错误: {le})" if le else "")))
217
+ elif cerr >= 1:
218
+ flags.append(("note", f"最近失败 {cerr} 次"))
219
+
220
+ if success_rate is not None and len(recent) >= 5 and success_rate < 80:
221
+ flags.append(("error", f"成功率 {success_rate:.0f}%(最近 {len(recent)} 次)"))
222
+
223
+ next_run = state.get("nextRunAtMs")
224
+ if next_run and exp_interval:
225
+ drift = now_ms - next_run
226
+ if drift > 2 * exp_interval:
227
+ flags.append(("error", f"调度卡住:nextRun 已过期 {fmt_age(drift)}(预期间隔 {fmt_age(exp_interval)})"))
228
+
229
+ if last_dur and p95 and len(durs) >= 5 and last_dur > p95 * 2:
230
+ flags.append(("note", f"最近耗时 {fmt_duration(last_dur)} 超历史 P95 ({fmt_duration(p95)}) 两倍"))
231
+
232
+ if deliv_effective >= 5 and deliv_fail_rate is not None and deliv_fail_rate > 20:
233
+ flags.append(("error", f"投递失败率 {deliv_fail_rate:.0f}%({deliv_effective - deliv_ok}/{deliv_effective})"))
234
+
235
+ created = job.get("createdAtMs", 0) or 0
236
+ age_since_create = now_ms - created if created else 0
237
+ last_run = state.get("lastRunAtMs")
238
+ is_silent = False
239
+ if age_since_create > 3600 * 1000:
240
+ if not last_run and not runs:
241
+ flags.append(("silent", "任务已创建但从未执行"))
242
+ is_silent = True
243
+ elif exp_interval and last_run:
244
+ idle = now_ms - last_run
245
+ if idle > 2 * exp_interval:
246
+ flags.append(("silent", f"已 {fmt_age(idle)} 未执行(预期间隔 {fmt_age(exp_interval)})"))
247
+ is_silent = True
248
+
249
+ if is_silent:
250
+ status = "silent"
251
+ elif any(f[0] == "error" for f in flags):
252
+ status = "warn"
253
+ else:
254
+ status = "ok"
255
+
256
+ return dict(status=status, flags=flags, recent=recent, success_rate=success_rate,
257
+ ok=ok, fail=fail, p50=p50, p95=p95, dur_max=dur_max, last_dur=last_dur,
258
+ deliv_ok=deliv_ok, deliv_total=deliv_total, deliv_effective=deliv_effective,
259
+ deliv_fail_rate=deliv_fail_rate, exp_interval=exp_interval)
260
+
261
+
262
+ def section_jobs(out: output.Output, jobs_file: str, state_file: str, runs_dir: str) -> None:
263
+ out.item("【OpenClaw 定时任务】— jobs.json + jobs-state.json + runs/")
264
+ if not os.path.isfile(jobs_file):
265
+ out.item(" jobs.json 不存在 — 未创建过定时任务")
266
+ return
267
+
268
+ try:
269
+ with open(jobs_file) as f:
270
+ data = json.load(f)
271
+ except Exception as e:
272
+ out.item(f" jobs.json 解析失败: {e}")
273
+ return
274
+
275
+ if isinstance(data, dict):
276
+ jobs = data.get("jobs", [])
277
+ if isinstance(jobs, dict):
278
+ jobs = list(jobs.values())
279
+ elif not isinstance(jobs, list):
280
+ jobs = []
281
+ elif isinstance(data, list):
282
+ jobs = data
283
+ else:
284
+ jobs = []
285
+
286
+ if not jobs:
287
+ out.item(" jobs.json 存在但无任务")
288
+ return
289
+
290
+ ext_state = {}
291
+ if state_file and os.path.isfile(state_file):
292
+ try:
293
+ with open(state_file) as f:
294
+ sd = json.load(f)
295
+ ext_jobs = sd.get("jobs", {}) if isinstance(sd, dict) else {}
296
+ for jid, entry in ext_jobs.items():
297
+ if isinstance(entry, dict):
298
+ ext_state[jid] = entry.get("state", {}) or {}
299
+ except Exception:
300
+ pass
301
+
302
+ for j in jobs:
303
+ jid = j.get("id")
304
+ if jid and not j.get("state") and jid in ext_state:
305
+ j["state"] = ext_state[jid]
306
+
307
+ now_ms = int(time.time() * 1000)
308
+ analyses = []
309
+ for j in jobs:
310
+ runs = load_runs(runs_dir, j.get("id"))
311
+ analyses.append((j, runs, analyze(j, runs, now_ms)))
312
+
313
+ total = len(jobs)
314
+ enabled_count = sum(1 for j in jobs if j.get("enabled", True))
315
+ disabled_count = total - enabled_count
316
+ out.item(f" 共 {total} 个任务({enabled_count} 启用, {disabled_count} 禁用)")
317
+ out.line("")
318
+
319
+ out.set_data("total_jobs", total)
320
+ out.set_data("enabled_count", enabled_count)
321
+ out.set_data("disabled_count", disabled_count)
322
+
323
+ ok_list = [a for a in analyses if a[2]["status"] == "ok"]
324
+ warn_list = [a for a in analyses if a[2]["status"] == "warn"]
325
+ silent_list = [a for a in analyses if a[2]["status"] == "silent"]
326
+ disabled_list = [a for a in analyses if a[2]["status"] == "disabled"]
327
+
328
+ def _job_name(j):
329
+ return j.get("name") or j.get("id", "?")
330
+
331
+ out.set_data("status_overview", {
332
+ "ok": [_job_name(j) for j, _, _ in ok_list],
333
+ "warn": [_job_name(j) for j, _, _ in warn_list],
334
+ "silent": [_job_name(j) for j, _, _ in silent_list],
335
+ "disabled": [_job_name(j) for j, _, _ in disabled_list],
336
+ })
337
+
338
+ jobs_payload = []
339
+ for j, runs, a in analyses:
340
+ state = j.get("state", {}) or {}
341
+ jobs_payload.append({
342
+ "id": j.get("id"),
343
+ "name": j.get("name") or j.get("id"),
344
+ "status": a["status"],
345
+ "schedule": j.get("schedule", {}),
346
+ "success_rate": a["success_rate"],
347
+ "p50_ms": a["p50"],
348
+ "p95_ms": a["p95"],
349
+ "last_run_ts": state.get("lastRunAtMs"),
350
+ "next_run_ts": state.get("nextRunAtMs"),
351
+ "consecutive_errors": state.get("consecutiveErrors", 0) or 0,
352
+ "flags": [{"kind": k, "msg": m} for k, m in a["flags"]],
353
+ })
354
+ out.set_data("jobs", jobs_payload)
355
+
356
+ out.item(" ── 状态概览 ──")
357
+ if ok_list:
358
+ out.item(f" 正常: {len(ok_list)} 个任务")
359
+ if warn_list:
360
+ out.item(f" 异常: {len(warn_list)} 个任务")
361
+ for j, _, a in warn_list:
362
+ nm = j.get("name") or j.get("id", "?")
363
+ msg = next((f[1] for f in a["flags"] if f[0] == "error"), "")
364
+ out.item(f" · {nm}: {msg}")
365
+ if silent_list:
366
+ out.item(f" 静默: {len(silent_list)} 个任务超期未执行")
367
+ for j, _, a in silent_list:
368
+ nm = j.get("name") or j.get("id", "?")
369
+ msg = next((f[1] for f in a["flags"] if f[0] == "silent"), "")
370
+ out.item(f" · {nm}: {msg}")
371
+ if disabled_list:
372
+ out.item(f" 禁用: {len(disabled_list)} 个任务(不纳入调度)")
373
+ if not (ok_list or warn_list or silent_list or disabled_list):
374
+ out.item(" (无任务)")
375
+
376
+ out.line("")
377
+ out.item(" ── 任务详情 ──")
378
+ out.line("")
379
+
380
+ for idx, (j, runs, a) in enumerate(analyses, 1):
381
+ status = a["status"]
382
+ nm = j.get("name") or j.get("id", "?")
383
+ icon_label = {
384
+ "ok": "正常", "warn": "异常", "silent": "静默", "disabled": "禁用",
385
+ }.get(status, "?")
386
+ out.item(f" [{idx}] {nm} ({icon_label})")
387
+
388
+ if status == "disabled":
389
+ out.item(f" 调度: {format_schedule(j.get('schedule', {}))} | ID: {j.get('id', '?')}")
390
+ out.line("")
391
+ continue
392
+
393
+ out.item(f" 调度: {format_schedule(j.get('schedule', {}))}")
394
+ state = j.get("state", {}) or {}
395
+ last_run = state.get("lastRunAtMs")
396
+ if last_run:
397
+ ls = state.get("lastStatus") or state.get("lastRunStatus") or "?"
398
+ ld = state.get("lastDurationMs")
399
+ line = f" 上次执行: {fmt_ts(last_run)} | {ls}"
400
+ if ld is not None:
401
+ line += f" | {fmt_duration(ld)}"
402
+ out.item(line)
403
+ else:
404
+ out.item(" 上次执行: 从未执行")
405
+
406
+ nr = state.get("nextRunAtMs")
407
+ if nr:
408
+ delta = nr - now_ms
409
+ if delta >= 0:
410
+ out.item(f" 下次执行: {fmt_ts(nr)} (在 {fmt_age(delta)}后)")
411
+ else:
412
+ out.item(f" 下次执行: {fmt_ts(nr)} (已过期 {fmt_age(delta)})")
413
+
414
+ if a["success_rate"] is not None:
415
+ n = a["ok"] + a["fail"]
416
+ out.item(f" 成功率: {a['success_rate']:.0f}% (最近 {n} 次: ok={a['ok']} fail={a['fail']})")
417
+
418
+ if a["p50"] is not None:
419
+ parts = [f"P50={fmt_duration(a['p50'])}"]
420
+ if a["p95"] is not None and a["p95"] != a["p50"]:
421
+ parts.append(f"P95={fmt_duration(a['p95'])}")
422
+ if a["dur_max"] is not None and a["dur_max"] != a["p50"]:
423
+ parts.append(f"Max={fmt_duration(a['dur_max'])}")
424
+ out.item(" 耗时: " + " ".join(parts))
425
+
426
+ payload = j.get("payload") or {}
427
+ session_target = j.get("sessionTarget")
428
+ delivery = j.get("delivery") or {}
429
+ payload_lines = []
430
+ if isinstance(payload, dict) and payload:
431
+ for pk, pv in payload.items():
432
+ if pv is None or pv == "":
433
+ continue
434
+ sv = str(pv)
435
+ if len(sv) > 80:
436
+ sv = sv[:77] + "..."
437
+ payload_lines.append(f"{pk}={sv}")
438
+ if session_target:
439
+ payload_lines.append(f"sessionTarget={session_target}")
440
+ if isinstance(delivery, dict) and delivery:
441
+ del_parts = [f"{dk}={dv}" for dk, dv in delivery.items()
442
+ if dv is not None and dv != ""]
443
+ if del_parts:
444
+ payload_lines.append(f"delivery={{ {', '.join(del_parts)} }}")
445
+ if payload_lines:
446
+ out.item(" payload: " + " | ".join(payload_lines))
447
+
448
+ recent = a["recent"]
449
+ input_sum = 0
450
+ output_sum = 0
451
+ cost_sum = 0.0
452
+ has_usage = False
453
+ has_cost = False
454
+ for r in recent:
455
+ inp, outp, cost = extract_usage(r)
456
+ if inp is not None:
457
+ input_sum += inp
458
+ has_usage = True
459
+ if outp is not None:
460
+ output_sum += outp
461
+ has_usage = True
462
+ if cost is not None:
463
+ cost_sum += cost
464
+ has_cost = True
465
+ if has_usage:
466
+ line = f" tokens(最近{len(recent)}次): in={fmt_k(input_sum)} out={fmt_k(output_sum)}"
467
+ if has_cost:
468
+ line += f" | cost=${cost_sum:.4f}"
469
+ out.item(line)
470
+
471
+ if status != "ok":
472
+ cerr = state.get("consecutiveErrors", 0) or 0
473
+ if cerr > 0:
474
+ out.item(f" 连续失败: {cerr} 次")
475
+
476
+ fail_runs = [r for r in recent if r.get("status") and r.get("status") != "ok"]
477
+ if fail_runs:
478
+ seen_errs = set()
479
+ samples = []
480
+ for r in reversed(fail_runs):
481
+ err = extract_error_text(r) or "(无错误详情)"
482
+ if err in seen_errs:
483
+ continue
484
+ seen_errs.add(err)
485
+ samples.append((r.get("ts") or r.get("runAtMs"), err))
486
+ if len(samples) >= 3:
487
+ break
488
+ if samples:
489
+ out.item(f" 最近失败({len(samples)}):")
490
+ for ts, err in samples:
491
+ out.item(f" {fmt_ts(ts)} | {err}")
492
+
493
+ delivery_cfg = j.get("delivery")
494
+ if isinstance(delivery_cfg, dict) and delivery_cfg:
495
+ deliv_meta_parts = []
496
+ if delivery_cfg.get("mode"):
497
+ deliv_meta_parts.append(f"模式={delivery_cfg['mode']}")
498
+ if delivery_cfg.get("channel"):
499
+ deliv_meta_parts.append(f"channel={delivery_cfg['channel']}")
500
+ deliv_fails = []
501
+ seen_reasons = set()
502
+ for r in reversed(recent):
503
+ ds = r.get("deliveryStatus")
504
+ if ds in (None, "", "not-requested", "delivered"):
505
+ continue
506
+ reason = extract_delivery_reason(r) or "(未知原因)"
507
+ key = (ds, reason)
508
+ if key in seen_reasons:
509
+ continue
510
+ seen_reasons.add(key)
511
+ deliv_fails.append((r.get("ts") or r.get("runAtMs"), ds, reason))
512
+ if len(deliv_fails) >= 3:
513
+ break
514
+ if deliv_meta_parts and deliv_fails:
515
+ out.item(" 投递: " + " ".join(deliv_meta_parts))
516
+ out.item(" 投递失败样本:")
517
+ for ts, ds, reason in deliv_fails:
518
+ out.item(f" {fmt_ts(ts)} | status={ds} | reason={reason}")
519
+
520
+ finished_all = [r for r in runs if r.get("status") or r.get("action") == "finished"]
521
+ if finished_all:
522
+ today = datetime.datetime.now().date()
523
+ buckets = {}
524
+ for r in finished_all:
525
+ t = r.get("runAtMs") or r.get("ts")
526
+ if not t:
527
+ continue
528
+ try:
529
+ d = datetime.datetime.fromtimestamp(t / 1000).date()
530
+ except Exception:
531
+ continue
532
+ delta = (today - d).days
533
+ if 0 <= delta < 7:
534
+ b = buckets.setdefault(d, [0, 0])
535
+ b[0] += 1
536
+ if r.get("status") == "ok":
537
+ b[1] += 1
538
+ if buckets:
539
+ days_sorted = sorted(buckets.keys(), reverse=True)
540
+ parts = []
541
+ for d in days_sorted:
542
+ total_d, ok_d = buckets[d][0], buckets[d][1]
543
+ rate = (ok_d / total_d * 100) if total_d else 0
544
+ parts.append(f"{d.strftime('%m-%d')}: {total_d}次 {rate:.0f}%")
545
+ out.item(" 最近7天: " + " | ".join(parts))
546
+
547
+ out.line("")
548
+
549
+
550
+ def section_heartbeat(out: output.Output, args) -> None:
551
+ out.line("")
552
+ out.item("【OpenClaw Heartbeat】— Agent 定期唤醒机制,用于执行 HEARTBEAT.md 中的周期性任务")
553
+
554
+ hb_every = "未配置"
555
+ if os.path.isfile(args.config):
556
+ try:
557
+ with open(args.config) as f:
558
+ cfg = json.load(f)
559
+ hb = cfg.get("agents", {}).get("defaults", {}).get("heartbeat", {})
560
+ hb_every = hb.get("every", "未配置")
561
+ except Exception:
562
+ hb_every = "读取失败"
563
+ out.item(f" 配置: agents.defaults.heartbeat.every = {hb_every}")
564
+ hb_data = {"config_every": hb_every, "agents": {}}
565
+
566
+ sessions_base = args.sessions_base
567
+ if os.path.isdir(sessions_base):
568
+ try:
569
+ cfg = json.load(open(args.config)) if os.path.isfile(args.config) else {}
570
+ except Exception:
571
+ cfg = {}
572
+ agent_workspaces = {}
573
+ for a in cfg.get("agents", {}).get("list", []) or []:
574
+ if isinstance(a, dict) and a.get("id"):
575
+ agent_workspaces[a["id"]] = a.get("workspace", "")
576
+ for agent_dir in sorted(glob.glob(os.path.join(sessions_base, "*"))):
577
+ agent_id = os.path.basename(agent_dir)
578
+ ws_dir = agent_workspaces.get(agent_id, "")
579
+ hb_file = os.path.join(ws_dir, "HEARTBEAT.md") if ws_dir else ""
580
+ if ws_dir and os.path.isfile(hb_file):
581
+ try:
582
+ with open(hb_file) as f:
583
+ content = f.read()
584
+ lines = [ln for ln in content.splitlines()
585
+ if ln.strip() and not ln.startswith(("#", "```", "<!--"))][:5]
586
+ except OSError:
587
+ lines = []
588
+ if not lines:
589
+ out.item(f" {agent_id}: HEARTBEAT.md 存在但为空(不会触发 heartbeat)")
590
+ hb_data["agents"][agent_id] = {"heartbeat_md": "empty"}
591
+ else:
592
+ out.item(f" {agent_id}: HEARTBEAT.md 有内容(会触发 heartbeat)")
593
+ out.evidence(hb_file, "\n".join(lines))
594
+ hb_data["agents"][agent_id] = {"heartbeat_md": "active"}
595
+ else:
596
+ out.item(f" {agent_id}: HEARTBEAT.md 不存在")
597
+ hb_data["agents"][agent_id] = {"heartbeat_md": "missing"}
598
+
599
+ log_pattern = os.path.join(args.log_dir, "openclaw-*.log")
600
+ interesting = []
601
+ started = []
602
+ for lf in sorted(glob.glob(log_pattern)):
603
+ try:
604
+ with open(lf, errors="replace") as f:
605
+ for raw in f:
606
+ if "gateway/heartbeat" not in raw:
607
+ continue
608
+ try:
609
+ d = json.loads(raw)
610
+ except Exception:
611
+ continue
612
+ ts = d.get("time", "?")[:19]
613
+ interval = ""
614
+ if isinstance(d.get("1"), dict):
615
+ ms = d["1"].get("intervalMs", 0)
616
+ interval = f"interval={ms/1000:.0f}s"
617
+ msg = str(d.get("2", ""))
618
+ else:
619
+ msg = str(d.get("1", ""))
620
+ if isinstance(d.get("2"), str):
621
+ msg += " | " + d["2"]
622
+ level = d.get("_meta", {}).get("logLevelName", "")
623
+ line = f"{ts} | {level} | {msg} {interval}".strip()
624
+ if "started" in msg:
625
+ started.append((ts, level, msg, interval))
626
+ else:
627
+ interesting.append(line)
628
+ except OSError:
629
+ continue
630
+ if interesting:
631
+ out.item(f" heartbeat 有效事件 {len(interesting)} 条(另有 {len(started)} 条启动记录)")
632
+ out.evidence("应用日志 (heartbeat)", "\n".join(interesting[:50]))
633
+ hb_data["events"] = len(interesting)
634
+ hb_data["started_count"] = len(started)
635
+ elif started:
636
+ intervals = sorted({s[3] for s in started if s[3]})
637
+ out.item(f" heartbeat 调度器: {len(started)} 次启动记录,间隔 {'、'.join(intervals)}")
638
+ hb_data["events"] = 0
639
+ hb_data["started_count"] = len(started)
640
+ hb_data["intervals"] = list(intervals)
641
+ else:
642
+ out.item(" heartbeat 日志: 0 条 — 未发现 heartbeat 相关记录")
643
+ hb_data["events"] = 0
644
+ hb_data["started_count"] = 0
645
+
646
+ out.set_data("heartbeat", hb_data)
647
+
648
+
649
+ def section_system_crontab(out: output.Output) -> None:
650
+ out.line("")
651
+ out.item("【系统 crontab】")
652
+ try:
653
+ r = subprocess.run(["crontab", "-l"], capture_output=True, text=True,
654
+ timeout=5, check=False)
655
+ text = r.stdout if r.returncode == 0 else r.stderr
656
+ except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
657
+ text = ""
658
+ if not text or "no crontab" in text.lower():
659
+ out.item(" 无(未配置系统定时任务)")
660
+ out.set_data("system_crontab", [])
661
+ return
662
+ entries = [ln for ln in text.splitlines() if ln.strip() and not ln.startswith("#")]
663
+ if entries:
664
+ out.item(f" 共 {len(entries)} 条")
665
+ out.evidence("crontab -l", "\n".join(entries))
666
+ else:
667
+ out.item(" 无有效条目(仅注释)")
668
+ out.set_data("system_crontab", entries)
669
+
670
+
671
+ def main() -> int:
672
+ parser = cli.build_common_parser(
673
+ description="模块 6:定时任务采集",
674
+ prog="06_cron_jobs",
675
+ )
676
+ args = parser.parse_args()
677
+
678
+ out = output.init("cron_jobs", json_mode=args.json, no_color=args.no_color)
679
+ out.section("模块 6:定时任务")
680
+
681
+ home = args.openclaw_home
682
+ jobs_file = os.path.join(home, "cron", "jobs.json")
683
+ state_file = os.path.join(home, "cron", "jobs-state.json")
684
+ runs_dir = os.path.join(home, "cron", "runs")
685
+
686
+ section_jobs(out, jobs_file, state_file, runs_dir)
687
+ section_heartbeat(out, args)
688
+ section_system_crontab(out)
689
+
690
+ return out.done()
691
+
692
+
693
+ if __name__ == "__main__":
694
+ sys.exit(main())