openclaw-diag-cli 0.1.3 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +84 -71
- package/bin/openclaw-diag.js +65 -176
- package/diag/01_sys_health.py +0 -2
- package/diag/02_environment.py +32 -6
- package/diag/03_configuration.py +4 -1
- package/diag/04_gateway.py +30 -8
- package/diag/05_recent_errors.py +24 -14
- package/diag/06_cron_jobs.py +4 -41
- package/diag/07_performance.py +114 -42
- package/diag/08_sessions.py +2 -54
- package/diag/09_plugin_diag.py +52 -25
- package/diag/10_shell_history.py +28 -10
- package/lib/bundle.py +6 -13
- package/ocdiag/__init__.py +1 -1
- package/ocdiag/cli.py +16 -1
- package/ocdiag/dispatcher.py +140 -53
- package/ocdiag/doctor.py +162 -0
- package/ocdiag/jsonlog.py +0 -5
- package/ocdiag/paths.py +0 -1
- package/ocdiag/recent_logs.py +0 -3
- package/ocdiag/sensitive.py +95 -1
- package/ocdiag/timeutil.py +0 -11
- package/ocdiag/tokens.py +0 -4
- package/package.json +2 -2
- package/tools/oc_session_extract.py +75 -7
- package/tools/oc_session_trace.py +31 -9
package/diag/09_plugin_diag.py
CHANGED
|
@@ -17,6 +17,7 @@ sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
|
|
|
17
17
|
|
|
18
18
|
from ocdiag import cli, output
|
|
19
19
|
from ocdiag.jsonlog import parse_name
|
|
20
|
+
from ocdiag.sensitive import sanitize_text
|
|
20
21
|
from ocdiag.timeutil import fmt_hms
|
|
21
22
|
|
|
22
23
|
|
|
@@ -110,13 +111,18 @@ def scan_logs(today_logs):
|
|
|
110
111
|
for logf in today_logs:
|
|
111
112
|
try:
|
|
112
113
|
fh = open(logf, "r", errors="replace")
|
|
113
|
-
except
|
|
114
|
+
except OSError:
|
|
115
|
+
# Best-effort: if today's log is unreadable, skip it; the parent
|
|
116
|
+
# caller still surfaces "no log data" via the empty plugin_diag
|
|
117
|
+
# output. (We don't fail the whole module for one missing file.)
|
|
114
118
|
continue
|
|
115
119
|
with fh:
|
|
116
120
|
for line in fh:
|
|
117
121
|
try:
|
|
118
122
|
o = json.loads(line)
|
|
119
|
-
except
|
|
123
|
+
except (json.JSONDecodeError, ValueError):
|
|
124
|
+
# Expected: log files are JSONL; non-JSON lines are emitted
|
|
125
|
+
# by Node before logger init. Drop those lines silently.
|
|
120
126
|
continue
|
|
121
127
|
plugin, sub = parse_name(o)
|
|
122
128
|
lvl = o.get("_meta", {}).get("logLevelName", "")
|
|
@@ -190,18 +196,23 @@ def scan_logs(today_logs):
|
|
|
190
196
|
|
|
191
197
|
|
|
192
198
|
def load_configured(config_path):
|
|
199
|
+
"""Return {plugin_id: enabled_bool}. Status reported as second return."""
|
|
193
200
|
configured = {}
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
201
|
+
status = {"found": True}
|
|
202
|
+
if not config_path or not os.path.isfile(config_path):
|
|
203
|
+
return configured, {"found": False, "reason": "config_not_found",
|
|
204
|
+
"checked": config_path or ""}
|
|
205
|
+
try:
|
|
206
|
+
with open(config_path) as f:
|
|
207
|
+
cfg = json.load(f)
|
|
208
|
+
entries = cfg.get("plugins", {}).get("entries", {}) or {}
|
|
209
|
+
for k, v in entries.items():
|
|
210
|
+
if isinstance(v, dict):
|
|
211
|
+
configured[k] = bool(v.get("enabled", False))
|
|
212
|
+
except (OSError, json.JSONDecodeError) as e:
|
|
213
|
+
return configured, {"found": False, "reason": "config_unreadable",
|
|
214
|
+
"checked": config_path, "error": str(e)[:200]}
|
|
215
|
+
return configured, status
|
|
205
216
|
|
|
206
217
|
|
|
207
218
|
def load_extensions(oc_home):
|
|
@@ -274,7 +285,10 @@ def section_state(out, scan, configured, extensions):
|
|
|
274
285
|
})
|
|
275
286
|
|
|
276
287
|
|
|
277
|
-
def section_errors(out, scan, configured):
|
|
288
|
+
def section_errors(out, scan, configured, unmask=False):
|
|
289
|
+
def _scrub(s: str) -> str:
|
|
290
|
+
return s if unmask else sanitize_text(s)
|
|
291
|
+
|
|
278
292
|
out.subsection("9.2 插件错误/警告")
|
|
279
293
|
plugin_level_counts = scan["plugin_level_counts"]
|
|
280
294
|
plugin_error_samples = scan["plugin_error_samples"]
|
|
@@ -313,9 +327,11 @@ def section_errors(out, scan, configured):
|
|
|
313
327
|
if samples:
|
|
314
328
|
for ts, lvl, text in dedup_messages(samples, max_unique=999):
|
|
315
329
|
tag = {"ERROR": "E", "FATAL": "F", "WARN": "W"}.get(lvl, "?")
|
|
316
|
-
snippet = text.replace("\n", " ")
|
|
330
|
+
snippet = _scrub(text.replace("\n", " "))
|
|
317
331
|
out.item(f" [{tag}] {fmt_hms(ts)}: {snippet}")
|
|
318
|
-
sample_payload.append({
|
|
332
|
+
sample_payload.append({
|
|
333
|
+
"ts": ts, "level": lvl, "msg": _scrub(text[:300]),
|
|
334
|
+
})
|
|
319
335
|
if err > 0 or warn > 0 or sample_payload:
|
|
320
336
|
errors_payload[p] = {
|
|
321
337
|
"error_count": err,
|
|
@@ -331,9 +347,9 @@ def section_errors(out, scan, configured):
|
|
|
331
347
|
out.item(f"[plugin-manager]: {len(pm_errors)} ERROR, {len(pm_warns)} WARN, "
|
|
332
348
|
f"{len(plugin_diag_messages)} total")
|
|
333
349
|
for ts, _lvl, text in dedup_messages(pm_errors, max_unique=999):
|
|
334
|
-
out.item(f" [E] {fmt_hms(ts)}: {text.replace(chr(10),' ')}")
|
|
350
|
+
out.item(f" [E] {fmt_hms(ts)}: {_scrub(text.replace(chr(10),' '))}")
|
|
335
351
|
for ts, _lvl, text in dedup_messages(pm_warns, max_unique=999):
|
|
336
|
-
out.item(f" [W] {fmt_hms(ts)}: {text.replace(chr(10),' ')}")
|
|
352
|
+
out.item(f" [W] {fmt_hms(ts)}: {_scrub(text.replace(chr(10),' '))}")
|
|
337
353
|
elif plugin_diag_messages:
|
|
338
354
|
out.item(f"[plugin-manager]: 0 ERROR, 0 WARN, {len(plugin_diag_messages)} total")
|
|
339
355
|
|
|
@@ -444,12 +460,15 @@ def walk_urls(val, out_set):
|
|
|
444
460
|
walk_urls(v, out_set)
|
|
445
461
|
|
|
446
462
|
|
|
447
|
-
def section_deps(out, config_path):
|
|
463
|
+
def section_deps(out, config_path, unmask=False):
|
|
448
464
|
out.subsection("9.5 插件外部依赖")
|
|
449
465
|
plugin_deps = {}
|
|
450
466
|
if not (config_path and os.path.isfile(config_path)):
|
|
451
467
|
out.item("未发现已启用插件的外部依赖配置")
|
|
452
468
|
out.set_data("plugin_deps", {})
|
|
469
|
+
out.set_data("plugin_deps_status",
|
|
470
|
+
{"found": False, "reason": "config_not_found",
|
|
471
|
+
"checked": config_path or ""})
|
|
453
472
|
return
|
|
454
473
|
try:
|
|
455
474
|
with open(config_path) as f:
|
|
@@ -464,8 +483,13 @@ def section_deps(out, config_path):
|
|
|
464
483
|
walk_urls(pconf, hosts)
|
|
465
484
|
hosts = {h for h in hosts if not h.startswith(("127.", "localhost", "0.0.0.0"))}
|
|
466
485
|
plugin_deps[pid] = hosts
|
|
467
|
-
except
|
|
468
|
-
|
|
486
|
+
except (OSError, json.JSONDecodeError) as e:
|
|
487
|
+
out.item(f"配置读取/解析失败: {type(e).__name__}")
|
|
488
|
+
out.set_data("plugin_deps", {})
|
|
489
|
+
out.set_data("plugin_deps_status",
|
|
490
|
+
{"found": False, "reason": "config_unreadable",
|
|
491
|
+
"checked": config_path, "error": str(e)[:200]})
|
|
492
|
+
return
|
|
469
493
|
|
|
470
494
|
if not plugin_deps:
|
|
471
495
|
out.item("未发现已启用插件的外部依赖配置")
|
|
@@ -509,7 +533,6 @@ def section_deps(out, config_path):
|
|
|
509
533
|
def main() -> int:
|
|
510
534
|
parser = cli.build_common_parser(
|
|
511
535
|
description="模块 9:插件诊断",
|
|
512
|
-
prog="09_plugin_diag",
|
|
513
536
|
)
|
|
514
537
|
args = parser.parse_args()
|
|
515
538
|
out = output.init("plugin_diag", json_mode=args.json, no_color=args.no_color)
|
|
@@ -519,14 +542,18 @@ def main() -> int:
|
|
|
519
542
|
today_logs = sorted(glob.glob(os.path.join(args.log_dir, f"openclaw-{today}.log")))
|
|
520
543
|
|
|
521
544
|
scan = scan_logs(today_logs)
|
|
522
|
-
configured = load_configured(args.config)
|
|
545
|
+
configured, configured_status = load_configured(args.config)
|
|
523
546
|
extensions = load_extensions(args.openclaw_home)
|
|
547
|
+
if not configured_status.get("found", True):
|
|
548
|
+
out.item(f"配置加载失败: {configured_status.get('reason')} "
|
|
549
|
+
f"({configured_status.get('checked')})")
|
|
550
|
+
out.set_data("configured_status", configured_status)
|
|
524
551
|
|
|
525
552
|
section_state(out, scan, configured, extensions)
|
|
526
|
-
section_errors(out, scan, configured)
|
|
553
|
+
section_errors(out, scan, configured, unmask=args.unmask)
|
|
527
554
|
section_hooks(out, scan)
|
|
528
555
|
section_channels(out, scan)
|
|
529
|
-
section_deps(out, args.config)
|
|
556
|
+
section_deps(out, args.config, unmask=args.unmask)
|
|
530
557
|
|
|
531
558
|
return out.done()
|
|
532
559
|
|
package/diag/10_shell_history.py
CHANGED
|
@@ -12,6 +12,7 @@ from typing import List, Tuple
|
|
|
12
12
|
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
|
|
13
13
|
|
|
14
14
|
from ocdiag import cli, output
|
|
15
|
+
from ocdiag.sensitive import sanitize_text
|
|
15
16
|
|
|
16
17
|
|
|
17
18
|
DANGEROUS_RE = re.compile(
|
|
@@ -34,26 +35,34 @@ def list_history_files() -> List[str]:
|
|
|
34
35
|
return [c for c in candidates if os.path.isfile(c)]
|
|
35
36
|
|
|
36
37
|
|
|
37
|
-
def read_lines(path: str) -> List[Tuple[int, str]]:
|
|
38
|
+
def read_lines(path: str) -> Tuple[List[Tuple[int, str]], str]:
|
|
39
|
+
"""Read history file. Returns (lines, error_str). error_str=='' on success.
|
|
40
|
+
|
|
41
|
+
Permission denied / missing files become an explicit error instead of an
|
|
42
|
+
empty list, so the caller can distinguish "no commands" from "couldn't read".
|
|
43
|
+
"""
|
|
38
44
|
out: List[Tuple[int, str]] = []
|
|
39
45
|
try:
|
|
40
46
|
with open(path, "r", errors="replace") as f:
|
|
41
47
|
for i, line in enumerate(f, 1):
|
|
42
48
|
out.append((i, line.rstrip("\n")))
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
49
|
+
return out, ""
|
|
50
|
+
except OSError as e:
|
|
51
|
+
return out, f"{type(e).__name__}: {e}"
|
|
46
52
|
|
|
47
53
|
|
|
48
54
|
def main() -> int:
|
|
49
55
|
parser = cli.build_common_parser(
|
|
50
56
|
description="模块 10:采集 shell 历史",
|
|
51
|
-
prog="10_shell_history",
|
|
52
57
|
)
|
|
53
58
|
args = parser.parse_args()
|
|
54
59
|
|
|
55
60
|
out = output.init("shell_history", json_mode=args.json, no_color=args.no_color)
|
|
56
61
|
out.section("模块 10:命令执行历史")
|
|
62
|
+
|
|
63
|
+
def maybe_sanitize(s: str) -> str:
|
|
64
|
+
return s if args.unmask else sanitize_text(s)
|
|
65
|
+
|
|
57
66
|
out.line(" 系统 shell 历史记录,用于判断是否有人或脚本执行过高危命令"
|
|
58
67
|
"(rm -rf、kill、systemctl stop 等)。")
|
|
59
68
|
out.line("")
|
|
@@ -66,7 +75,16 @@ def main() -> int:
|
|
|
66
75
|
|
|
67
76
|
files_data = []
|
|
68
77
|
for hfile in history_files:
|
|
69
|
-
lines = read_lines(hfile)
|
|
78
|
+
lines, read_err = read_lines(hfile)
|
|
79
|
+
if read_err:
|
|
80
|
+
out.item(f"{os.path.basename(hfile)} — 读取失败 ({read_err})")
|
|
81
|
+
files_data.append({
|
|
82
|
+
"path": hfile,
|
|
83
|
+
"found": False,
|
|
84
|
+
"reason": "unreadable",
|
|
85
|
+
"error": read_err,
|
|
86
|
+
})
|
|
87
|
+
continue
|
|
70
88
|
total = len(lines)
|
|
71
89
|
out.item(f"{os.path.basename(hfile)} — 共 {total} 条记录")
|
|
72
90
|
|
|
@@ -79,7 +97,7 @@ def main() -> int:
|
|
|
79
97
|
|
|
80
98
|
if dangerous:
|
|
81
99
|
out.item(f" 高危命令: {len(dangerous)} 条 ")
|
|
82
|
-
ev = "\n".join(f"{n}: {ln}" for n, ln in dangerous)
|
|
100
|
+
ev = "\n".join(f"{n}: {maybe_sanitize(ln)}" for n, ln in dangerous)
|
|
83
101
|
out.evidence(f"{hfile} (高危)", ev)
|
|
84
102
|
else:
|
|
85
103
|
out.item(" 高危命令: 0 条")
|
|
@@ -92,7 +110,7 @@ def main() -> int:
|
|
|
92
110
|
f" OpenClaw 相关命令: 全文 {oc_total} 条,最近 30 条采样 {len(oc_cmds)} 条 — "
|
|
93
111
|
"用户手动执行的 openclaw 命令"
|
|
94
112
|
)
|
|
95
|
-
ev = "\n".join(f"{n}: {ln}" for n, ln in oc_cmds)
|
|
113
|
+
ev = "\n".join(f"{n}: {maybe_sanitize(ln)}" for n, ln in oc_cmds)
|
|
96
114
|
out.evidence(f"{hfile} (openclaw)", ev)
|
|
97
115
|
else:
|
|
98
116
|
out.item(" OpenClaw 相关命令: 0 条")
|
|
@@ -100,14 +118,14 @@ def main() -> int:
|
|
|
100
118
|
recent = lines[-20:]
|
|
101
119
|
if recent:
|
|
102
120
|
out.item(" 最近 20 条命令:")
|
|
103
|
-
ev = "\n".join(ln for _, ln in recent)
|
|
121
|
+
ev = "\n".join(maybe_sanitize(ln) for _, ln in recent)
|
|
104
122
|
out.evidence(f"{hfile} (最近)", ev)
|
|
105
123
|
|
|
106
124
|
files_data.append({
|
|
107
125
|
"path": hfile,
|
|
108
126
|
"total_lines": total,
|
|
109
127
|
"dangerous_count": len(dangerous),
|
|
110
|
-
"dangerous": [{"line": n, "cmd": ln} for n, ln in dangerous],
|
|
128
|
+
"dangerous": [{"line": n, "cmd": maybe_sanitize(ln)} for n, ln in dangerous],
|
|
111
129
|
"openclaw_count_total": oc_total,
|
|
112
130
|
"openclaw_count_sample_30": len(oc_cmds),
|
|
113
131
|
"recent_count": len(recent),
|
package/lib/bundle.py
CHANGED
|
@@ -21,19 +21,12 @@ from typing import Dict, List, Optional
|
|
|
21
21
|
|
|
22
22
|
REPO_ROOT = Path(__file__).resolve().parent.parent
|
|
23
23
|
|
|
24
|
-
#
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
("recent_errors", "diag/05_recent_errors.py"),
|
|
31
|
-
("cron_jobs", "diag/06_cron_jobs.py"),
|
|
32
|
-
("performance", "diag/07_performance.py"),
|
|
33
|
-
("sessions", "diag/08_sessions.py"),
|
|
34
|
-
("plugin_diag", "diag/09_plugin_diag.py"),
|
|
35
|
-
("shell_history", "diag/10_shell_history.py"),
|
|
36
|
-
]
|
|
24
|
+
# Single source of truth: ocdiag.dispatcher. We import it dynamically so the
|
|
25
|
+
# bundle script stays runnable from a fresh checkout (no install needed).
|
|
26
|
+
sys.path.insert(0, str(REPO_ROOT))
|
|
27
|
+
from ocdiag.dispatcher import STATE_COLLECTORS # noqa: E402
|
|
28
|
+
|
|
29
|
+
MODULES: List[tuple] = [(mid, rel) for mid, _label, rel in STATE_COLLECTORS]
|
|
37
30
|
MODULE_BY_ID = {mid: rel for mid, rel in MODULES}
|
|
38
31
|
|
|
39
32
|
# Order matters: each submodule is exec'd into its own module object, and its
|
package/ocdiag/__init__.py
CHANGED
package/ocdiag/cli.py
CHANGED
|
@@ -1,14 +1,23 @@
|
|
|
1
|
-
"""Common argparse setup for diag scripts.
|
|
1
|
+
"""Common argparse setup for diag scripts.
|
|
2
|
+
|
|
3
|
+
When invoked via the dispatcher (`openclaw-diag <id>`), the dispatcher exports
|
|
4
|
+
OPENCLAW_DIAG_PROG="openclaw-diag <id>" before running the script so argparse
|
|
5
|
+
uses that as `prog`. When you run the script directly (e.g.
|
|
6
|
+
`python3 diag/01_sys_health.py`), argparse falls back to the script basename.
|
|
7
|
+
"""
|
|
2
8
|
|
|
3
9
|
from __future__ import annotations
|
|
4
10
|
|
|
5
11
|
import argparse
|
|
12
|
+
import os
|
|
6
13
|
from typing import Optional
|
|
7
14
|
|
|
8
15
|
from . import paths
|
|
9
16
|
|
|
10
17
|
|
|
11
18
|
def build_common_parser(description: str, prog: Optional[str] = None) -> argparse.ArgumentParser:
|
|
19
|
+
if prog is None:
|
|
20
|
+
prog = os.environ.get("OPENCLAW_DIAG_PROG") or None
|
|
12
21
|
p = argparse.ArgumentParser(
|
|
13
22
|
prog=prog,
|
|
14
23
|
description=description,
|
|
@@ -36,4 +45,10 @@ def build_common_parser(description: str, prog: Optional[str] = None) -> argpars
|
|
|
36
45
|
)
|
|
37
46
|
p.add_argument("--json", action="store_true", help="Emit JSON output")
|
|
38
47
|
p.add_argument("--no-color", action="store_true", help="Disable colored output")
|
|
48
|
+
p.add_argument(
|
|
49
|
+
"--unmask",
|
|
50
|
+
action="store_true",
|
|
51
|
+
help="Disable default sanitization of secrets in free-form text "
|
|
52
|
+
"(shell history / plugin errors / systemd / sessions). Off by default.",
|
|
53
|
+
)
|
|
39
54
|
return p
|
package/ocdiag/dispatcher.py
CHANGED
|
@@ -5,39 +5,42 @@ Layout:
|
|
|
5
5
|
ocdiag <object-inspector> ARG runs that inspector (e.g. `ocdiag trace UUID`)
|
|
6
6
|
ocdiag all [--skip a,b] runs every state collector
|
|
7
7
|
ocdiag list prints the catalogue grouped by parameter mode
|
|
8
|
-
ocdiag
|
|
8
|
+
ocdiag bundle <id> emits a self-contained single-file .py
|
|
9
|
+
ocdiag doctor environment health check
|
|
9
10
|
"""
|
|
10
11
|
|
|
11
12
|
from __future__ import annotations
|
|
12
13
|
|
|
14
|
+
import json
|
|
13
15
|
import os
|
|
14
16
|
import runpy
|
|
15
17
|
import sys
|
|
16
18
|
import time
|
|
19
|
+
import traceback
|
|
17
20
|
from pathlib import Path
|
|
18
|
-
from typing import List
|
|
21
|
+
from typing import List, Optional, Tuple
|
|
19
22
|
|
|
20
23
|
|
|
21
24
|
REPO_ROOT = Path(__file__).resolve().parent.parent
|
|
22
25
|
|
|
23
26
|
# State collectors: zero required args, parameter-free observation of system state.
|
|
24
27
|
STATE_COLLECTORS = [
|
|
25
|
-
("sys_health", "
|
|
26
|
-
("environment", "OpenClaw
|
|
27
|
-
("configuration", "
|
|
28
|
-
("gateway", "Gateway
|
|
29
|
-
("recent_errors", "
|
|
30
|
-
("cron_jobs", "
|
|
31
|
-
("performance", "
|
|
32
|
-
("sessions", "Session
|
|
33
|
-
("plugin_diag", "
|
|
34
|
-
("shell_history", "Shell
|
|
28
|
+
("sys_health", "系统健康(DNS / 网络 / CPU / 内存 / 磁盘 / 进程 / 时间)", "diag/01_sys_health.py"),
|
|
29
|
+
("environment", "OpenClaw 版本、Gateway 进程环境变量", "diag/02_environment.py"),
|
|
30
|
+
("configuration", "openclaw.json 展平(敏感字段脱敏)", "diag/03_configuration.py"),
|
|
31
|
+
("gateway", "Gateway 进程、端口、24h 重启、WS 生命周期、错误码", "diag/04_gateway.py"),
|
|
32
|
+
("recent_errors", "应用日志 / journalctl / session 工具调用错误聚合", "diag/05_recent_errors.py"),
|
|
33
|
+
("cron_jobs", "定时任务状态、连续失败、调度漂移、静默检测", "diag/06_cron_jobs.py"),
|
|
34
|
+
("performance", "模型/工具耗时 P50/P95、慢调用、E2E 延迟、Cache 命中率", "diag/07_performance.py"),
|
|
35
|
+
("sessions", "Session 总览、活跃度、Stuck 探测", "diag/08_sessions.py"),
|
|
36
|
+
("plugin_diag", "插件状态一致性、ERROR/WARN、Hook、Channel、外部 DNS", "diag/09_plugin_diag.py"),
|
|
37
|
+
("shell_history", "Shell 历史中的高危命令与最近操作", "diag/10_shell_history.py"),
|
|
35
38
|
]
|
|
36
39
|
|
|
37
40
|
# Object inspectors: take a session uuid (or other identifier) and inspect it.
|
|
38
41
|
OBJECT_INSPECTORS = [
|
|
39
|
-
("trace", "
|
|
40
|
-
("extract", "导出 session 为可读格式",
|
|
42
|
+
("trace", "追踪一条用户消息从进入到响应的完整时间轴", "tools/oc_session_trace.py"),
|
|
43
|
+
("extract", "导出 session.jsonl 为可读格式", "tools/oc_session_extract.py"),
|
|
41
44
|
]
|
|
42
45
|
|
|
43
46
|
STATE_BY_ID = {mid: (label, script) for mid, label, script in STATE_COLLECTORS}
|
|
@@ -46,31 +49,67 @@ MODULE_BY_ID = {**STATE_BY_ID, **OBJECT_BY_ID}
|
|
|
46
49
|
MODULE_IDS = set(MODULE_BY_ID.keys())
|
|
47
50
|
|
|
48
51
|
|
|
52
|
+
def cmd_list_json() -> int:
|
|
53
|
+
"""Machine-readable module catalogue. Single source of truth consumed
|
|
54
|
+
by the Node shell and the bundle script (axiom #3)."""
|
|
55
|
+
payload = {
|
|
56
|
+
"state_collectors": [
|
|
57
|
+
{"id": mid, "label": label, "script": rel}
|
|
58
|
+
for mid, label, rel in STATE_COLLECTORS
|
|
59
|
+
],
|
|
60
|
+
"object_inspectors": [
|
|
61
|
+
{"id": mid, "label": label, "script": rel}
|
|
62
|
+
for mid, label, rel in OBJECT_INSPECTORS
|
|
63
|
+
],
|
|
64
|
+
}
|
|
65
|
+
print(json.dumps(payload, ensure_ascii=False))
|
|
66
|
+
return 0
|
|
67
|
+
|
|
68
|
+
|
|
49
69
|
def cmd_list() -> int:
|
|
50
|
-
print("
|
|
70
|
+
print("openclaw-diag — 可用诊断")
|
|
51
71
|
print()
|
|
52
|
-
print("
|
|
72
|
+
print(" 扫描类(无需参数):")
|
|
53
73
|
for mid, label, _ in STATE_COLLECTORS:
|
|
54
74
|
print(f" {mid:<16s} {label}")
|
|
55
75
|
print()
|
|
56
|
-
print("
|
|
76
|
+
print(" 对象类(需要 session uuid):")
|
|
57
77
|
for mid, label, _ in OBJECT_INSPECTORS:
|
|
58
78
|
print(f" {mid:<16s} {label}")
|
|
59
79
|
print()
|
|
60
|
-
print("
|
|
61
|
-
print(" all
|
|
62
|
-
print(" doctor 检查 Node/Python/OpenClaw 环境")
|
|
63
|
-
print(" bundle <id>
|
|
80
|
+
print(" 其它命令:")
|
|
81
|
+
print(" all 一次跑完所有扫描类")
|
|
82
|
+
print(" doctor 检查 Node / Python / openclaw-diag / OpenClaw 环境")
|
|
83
|
+
print(" bundle <id> 生成 self-contained 单文件 .py(离线机器用)")
|
|
64
84
|
return 0
|
|
65
85
|
|
|
66
86
|
|
|
67
|
-
def run_script(
|
|
87
|
+
def run_script(
|
|
88
|
+
script_rel: str,
|
|
89
|
+
extra_args: List[str],
|
|
90
|
+
module_id: Optional[str] = None,
|
|
91
|
+
) -> int:
|
|
92
|
+
"""Execute a diag script in-process. Returns the rc.
|
|
93
|
+
|
|
94
|
+
On crash, in addition to the human-readable stderr trace we emit a single
|
|
95
|
+
NDJSON error record to stdout when the script was invoked with --json.
|
|
96
|
+
This guarantees `all --json` produces N records for N modules — including
|
|
97
|
+
crashes — so downstream parsers don't silently lose modules. (Axiom #4)
|
|
98
|
+
"""
|
|
68
99
|
script_path = REPO_ROOT / script_rel
|
|
69
100
|
if not script_path.is_file():
|
|
70
101
|
print(f"Error: script not found: {script_path}", file=sys.stderr)
|
|
71
102
|
return 2
|
|
103
|
+
json_mode = "--json" in extra_args
|
|
104
|
+
mid = module_id or script_path.stem
|
|
72
105
|
saved_argv = sys.argv[:]
|
|
106
|
+
saved_prog = os.environ.get("OPENCLAW_DIAG_PROG")
|
|
73
107
|
try:
|
|
108
|
+
# runpy.run_path resets sys.argv[0] to the script path, so we
|
|
109
|
+
# advertise the user-facing name through an env var instead. cli.py
|
|
110
|
+
# picks it up as the argparse prog so --help reads as
|
|
111
|
+
# "openclaw-diag sys_health" rather than "01_sys_health.py".
|
|
112
|
+
os.environ["OPENCLAW_DIAG_PROG"] = f"openclaw-diag {mid}"
|
|
74
113
|
sys.argv = [str(script_path), *extra_args]
|
|
75
114
|
runpy.run_path(str(script_path), run_name="__main__")
|
|
76
115
|
return 0
|
|
@@ -79,14 +118,32 @@ def run_script(script_rel: str, extra_args: List[str]) -> int:
|
|
|
79
118
|
return int(e.code) if e.code is not None else 0
|
|
80
119
|
except (TypeError, ValueError):
|
|
81
120
|
return 1
|
|
82
|
-
except
|
|
121
|
+
except BaseException as e: # noqa: BLE001 — emit then re-classify
|
|
83
122
|
print(f" ERROR: {script_path.name} crashed: {type(e).__name__}: {e}",
|
|
84
123
|
file=sys.stderr)
|
|
85
|
-
import traceback
|
|
86
124
|
traceback.print_exc(file=sys.stderr)
|
|
125
|
+
if json_mode:
|
|
126
|
+
err_record = {
|
|
127
|
+
"module": mid,
|
|
128
|
+
"status": "error",
|
|
129
|
+
"error": f"{type(e).__name__}: {e}",
|
|
130
|
+
"traceback": traceback.format_exc(),
|
|
131
|
+
}
|
|
132
|
+
try:
|
|
133
|
+
sys.stdout.write(json.dumps(err_record, ensure_ascii=False) + "\n")
|
|
134
|
+
sys.stdout.flush()
|
|
135
|
+
except Exception:
|
|
136
|
+
# If stdout itself is broken (closed pipe), there's nothing
|
|
137
|
+
# productive to do — the stderr trace above already records
|
|
138
|
+
# the crash.
|
|
139
|
+
pass
|
|
87
140
|
return 2
|
|
88
141
|
finally:
|
|
89
142
|
sys.argv = saved_argv
|
|
143
|
+
if saved_prog is None:
|
|
144
|
+
os.environ.pop("OPENCLAW_DIAG_PROG", None)
|
|
145
|
+
else:
|
|
146
|
+
os.environ["OPENCLAW_DIAG_PROG"] = saved_prog
|
|
90
147
|
|
|
91
148
|
|
|
92
149
|
def cmd_all(extra_args: List[str], skip_ids: List[str]) -> int:
|
|
@@ -101,7 +158,7 @@ def cmd_all(extra_args: List[str], skip_ids: List[str]) -> int:
|
|
|
101
158
|
n += 1
|
|
102
159
|
print(f"\n[{n}/{total}] {label} ({mid})...", flush=True, file=progress_stream)
|
|
103
160
|
t0 = time.time()
|
|
104
|
-
rc = run_script(script, extra_args)
|
|
161
|
+
rc = run_script(script, extra_args, module_id=mid)
|
|
105
162
|
elapsed = time.time() - t0
|
|
106
163
|
print(f"[{n}/{total}] {label} ({mid}) ... done ({elapsed:.1f}s)",
|
|
107
164
|
flush=True, file=progress_stream)
|
|
@@ -110,7 +167,27 @@ def cmd_all(extra_args: List[str], skip_ids: List[str]) -> int:
|
|
|
110
167
|
return rc_overall
|
|
111
168
|
|
|
112
169
|
|
|
113
|
-
def
|
|
170
|
+
def cmd_bundle(rest: List[str]) -> int:
|
|
171
|
+
"""Generate a self-contained single-file diag script.
|
|
172
|
+
|
|
173
|
+
Lives here (rather than in lib/bundle.py only) so the Python entry has
|
|
174
|
+
parity with Node — `python3 bin/ocdiag bundle gateway` works the same as
|
|
175
|
+
`node bin/openclaw-diag.js bundle gateway`. (Axiom #3)
|
|
176
|
+
"""
|
|
177
|
+
if not rest or rest[0] in ("-h", "--help"):
|
|
178
|
+
print("Usage: openclaw-diag bundle <id>", file=sys.stderr)
|
|
179
|
+
print(" Emits the bundle to stdout. Use shell redirection to save.", file=sys.stderr)
|
|
180
|
+
print(file=sys.stderr)
|
|
181
|
+
print("Available ids:", file=sys.stderr)
|
|
182
|
+
for mid, _label, _ in STATE_COLLECTORS:
|
|
183
|
+
print(f" {mid}", file=sys.stderr)
|
|
184
|
+
return 0 if rest else 2
|
|
185
|
+
sys.path.insert(0, str(REPO_ROOT / "lib"))
|
|
186
|
+
import bundle # type: ignore
|
|
187
|
+
return bundle.main(rest)
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
def _split_skip(rest: List[str]) -> Tuple[List[str], List[str]]:
|
|
114
191
|
"""Pull out --skip a,b out of an argv tail; return (skip_ids, passthrough)."""
|
|
115
192
|
skip_ids: List[str] = []
|
|
116
193
|
passthrough: List[str] = []
|
|
@@ -126,22 +203,30 @@ def _split_skip(rest: List[str]) -> (List[str], List[str]):
|
|
|
126
203
|
return skip_ids, passthrough
|
|
127
204
|
|
|
128
205
|
|
|
206
|
+
def _suggest_command(unknown: str) -> Optional[str]:
|
|
207
|
+
"""Best-effort typo suggestion for a misspelled command."""
|
|
208
|
+
import difflib
|
|
209
|
+
candidates = list(MODULE_BY_ID.keys()) + ["all", "list", "doctor", "bundle"]
|
|
210
|
+
matches = difflib.get_close_matches(unknown, candidates, n=1, cutoff=0.6)
|
|
211
|
+
return matches[0] if matches else None
|
|
212
|
+
|
|
213
|
+
|
|
129
214
|
def print_help() -> None:
|
|
130
|
-
print("
|
|
215
|
+
print("openclaw-diag — OpenClaw 诊断工具箱")
|
|
131
216
|
print()
|
|
132
|
-
print("
|
|
133
|
-
print("
|
|
134
|
-
print("
|
|
135
|
-
print("
|
|
136
|
-
print("
|
|
217
|
+
print("用法:")
|
|
218
|
+
print(" openclaw-diag <id> [args...] 跑单个诊断")
|
|
219
|
+
print(" openclaw-diag all [--skip a,b] 跑全部 state collectors")
|
|
220
|
+
print(" openclaw-diag list 列出所有诊断")
|
|
221
|
+
print(" openclaw-diag doctor 检查环境")
|
|
222
|
+
print(" openclaw-diag bundle <id> 生成单文件 .py")
|
|
137
223
|
print()
|
|
138
|
-
print("
|
|
224
|
+
print("扫描类(无需参数):")
|
|
139
225
|
print(" " + " ".join(mid for mid, _, _ in STATE_COLLECTORS))
|
|
140
|
-
print("
|
|
226
|
+
print("对象类(需要 session uuid):")
|
|
141
227
|
print(" " + " ".join(mid for mid, _, _ in OBJECT_INSPECTORS))
|
|
142
228
|
print()
|
|
143
|
-
print("--
|
|
144
|
-
print("其它参数(--config / --log-dir / --json / --no-color)原样传递给脚本。")
|
|
229
|
+
print("常用 flag:--json(结构化输出) --no-color(关掉颜色) --unmask(不脱敏)")
|
|
145
230
|
|
|
146
231
|
|
|
147
232
|
def main(argv=None) -> int:
|
|
@@ -154,33 +239,35 @@ def main(argv=None) -> int:
|
|
|
154
239
|
head, rest = argv[0], argv[1:]
|
|
155
240
|
|
|
156
241
|
if head == "list":
|
|
242
|
+
if "--json" in rest:
|
|
243
|
+
return cmd_list_json()
|
|
157
244
|
return cmd_list()
|
|
158
245
|
|
|
246
|
+
if head == "doctor":
|
|
247
|
+
from ocdiag import doctor
|
|
248
|
+
json_mode = "--json" in rest
|
|
249
|
+
node_version = None
|
|
250
|
+
for i, a in enumerate(rest):
|
|
251
|
+
if a == "--node-version" and i + 1 < len(rest):
|
|
252
|
+
node_version = rest[i + 1]
|
|
253
|
+
break
|
|
254
|
+
return doctor.run(json_mode=json_mode, node_version=node_version)
|
|
255
|
+
|
|
159
256
|
if head == "all":
|
|
160
257
|
skip_ids, passthrough = _split_skip(rest)
|
|
161
258
|
return cmd_all(passthrough, skip_ids)
|
|
162
259
|
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
if not rest:
|
|
166
|
-
print("Error: run requires a target (module id or 'all').", file=sys.stderr)
|
|
167
|
-
return 2
|
|
168
|
-
target, sub = rest[0], rest[1:]
|
|
169
|
-
if target == "all":
|
|
170
|
-
skip_ids, passthrough = _split_skip(sub)
|
|
171
|
-
return cmd_all(passthrough, skip_ids)
|
|
172
|
-
if target in MODULE_BY_ID:
|
|
173
|
-
_, script = MODULE_BY_ID[target]
|
|
174
|
-
return run_script(script, sub)
|
|
175
|
-
print(f"Error: unknown diagnostic '{target}'. Use `ocdiag list`.", file=sys.stderr)
|
|
176
|
-
return 2
|
|
260
|
+
if head == "bundle":
|
|
261
|
+
return cmd_bundle(rest)
|
|
177
262
|
|
|
178
263
|
if head in MODULE_BY_ID:
|
|
179
264
|
_, script = MODULE_BY_ID[head]
|
|
180
|
-
return run_script(script, rest)
|
|
265
|
+
return run_script(script, rest, module_id=head)
|
|
181
266
|
|
|
182
|
-
|
|
183
|
-
|
|
267
|
+
suggestion = _suggest_command(head)
|
|
268
|
+
hint = f"(你是不是想说 `{suggestion}`?)" if suggestion else ""
|
|
269
|
+
print(f"Error: 未知命令 '{head}'{hint}", file=sys.stderr)
|
|
270
|
+
print(f"运行 `openclaw-diag list` 查看全部诊断。", file=sys.stderr)
|
|
184
271
|
return 2
|
|
185
272
|
|
|
186
273
|
|