npm - openclaw-diag-cli - Versions diffs - 0.1.3 → 0.2.1 - Mend

openclaw-diag-cli 0.1.3 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

package/README.md +84 -71
package/bin/openclaw-diag.js +65 -176
package/diag/01_sys_health.py +0 -2
package/diag/02_environment.py +32 -6
package/diag/03_configuration.py +4 -1
package/diag/04_gateway.py +30 -8
package/diag/05_recent_errors.py +24 -14
package/diag/06_cron_jobs.py +4 -41
package/diag/07_performance.py +114 -42
package/diag/08_sessions.py +2 -54
package/diag/09_plugin_diag.py +52 -25
package/diag/10_shell_history.py +28 -10
package/lib/bundle.py +6 -13
package/ocdiag/__init__.py +1 -1
package/ocdiag/cli.py +16 -1
package/ocdiag/dispatcher.py +140 -53
package/ocdiag/doctor.py +162 -0
package/ocdiag/jsonlog.py +0 -5
package/ocdiag/paths.py +0 -1
package/ocdiag/recent_logs.py +0 -3
package/ocdiag/sensitive.py +95 -1
package/ocdiag/timeutil.py +0 -11
package/ocdiag/tokens.py +0 -4
package/package.json +2 -2
package/tools/oc_session_extract.py +75 -7
package/tools/oc_session_trace.py +31 -9

package/diag/09_plugin_diag.py CHANGED Viewed

@@ -17,6 +17,7 @@ sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
 from ocdiag import cli, output
 from ocdiag.jsonlog import parse_name
+from ocdiag.sensitive import sanitize_text
 from ocdiag.timeutil import fmt_hms
@@ -110,13 +111,18 @@ def scan_logs(today_logs):
     for logf in today_logs:
         try:
             fh = open(logf, "r", errors="replace")
-        except Exception:
+        except OSError:
+            # Best-effort: if today's log is unreadable, skip it; the parent
+            # caller still surfaces "no log data" via the empty plugin_diag
+            # output. (We don't fail the whole module for one missing file.)
             continue
         with fh:
             for line in fh:
                 try:
                     o = json.loads(line)
-                except Exception:
+                except (json.JSONDecodeError, ValueError):
+                    # Expected: log files are JSONL; non-JSON lines are emitted
+                    # by Node before logger init. Drop those lines silently.
                     continue
                 plugin, sub = parse_name(o)
                 lvl = o.get("_meta", {}).get("logLevelName", "")
@@ -190,18 +196,23 @@ def scan_logs(today_logs):
 def load_configured(config_path):
+    """Return {plugin_id: enabled_bool}. Status reported as second return."""
     configured = {}
-    if config_path and os.path.isfile(config_path):
-        try:
-            with open(config_path) as f:
-                cfg = json.load(f)
-            entries = cfg.get("plugins", {}).get("entries", {}) or {}
-            for k, v in entries.items():
-                if isinstance(v, dict):
-                    configured[k] = bool(v.get("enabled", False))
-        except Exception:
-            pass
-    return configured
+    status = {"found": True}
+    if not config_path or not os.path.isfile(config_path):
+        return configured, {"found": False, "reason": "config_not_found",
+                            "checked": config_path or ""}
+    try:
+        with open(config_path) as f:
+            cfg = json.load(f)
+        entries = cfg.get("plugins", {}).get("entries", {}) or {}
+        for k, v in entries.items():
+            if isinstance(v, dict):
+                configured[k] = bool(v.get("enabled", False))
+    except (OSError, json.JSONDecodeError) as e:
+        return configured, {"found": False, "reason": "config_unreadable",
+                            "checked": config_path, "error": str(e)[:200]}
+    return configured, status
 def load_extensions(oc_home):
@@ -274,7 +285,10 @@ def section_state(out, scan, configured, extensions):
     })
-def section_errors(out, scan, configured):
+def section_errors(out, scan, configured, unmask=False):
+    def _scrub(s: str) -> str:
+        return s if unmask else sanitize_text(s)
     out.subsection("9.2 插件错误/警告")
     plugin_level_counts = scan["plugin_level_counts"]
     plugin_error_samples = scan["plugin_error_samples"]
@@ -313,9 +327,11 @@ def section_errors(out, scan, configured):
         if samples:
             for ts, lvl, text in dedup_messages(samples, max_unique=999):
                 tag = {"ERROR": "E", "FATAL": "F", "WARN": "W"}.get(lvl, "?")
-                snippet = text.replace("\n", " ")
+                snippet = _scrub(text.replace("\n", " "))
                 out.item(f"  [{tag}] {fmt_hms(ts)}: {snippet}")
-                sample_payload.append({"ts": ts, "level": lvl, "msg": text[:300]})
+                sample_payload.append({
+                    "ts": ts, "level": lvl, "msg": _scrub(text[:300]),
+                })
         if err > 0 or warn > 0 or sample_payload:
             errors_payload[p] = {
                 "error_count": err,
@@ -331,9 +347,9 @@ def section_errors(out, scan, configured):
         out.item(f"[plugin-manager]: {len(pm_errors)} ERROR, {len(pm_warns)} WARN, "
                  f"{len(plugin_diag_messages)} total")
         for ts, _lvl, text in dedup_messages(pm_errors, max_unique=999):
-            out.item(f"  [E] {fmt_hms(ts)}: {text.replace(chr(10),' ')}")
+            out.item(f"  [E] {fmt_hms(ts)}: {_scrub(text.replace(chr(10),' '))}")
         for ts, _lvl, text in dedup_messages(pm_warns, max_unique=999):
-            out.item(f"  [W] {fmt_hms(ts)}: {text.replace(chr(10),' ')}")
+            out.item(f"  [W] {fmt_hms(ts)}: {_scrub(text.replace(chr(10),' '))}")
     elif plugin_diag_messages:
         out.item(f"[plugin-manager]: 0 ERROR, 0 WARN, {len(plugin_diag_messages)} total")
@@ -444,12 +460,15 @@ def walk_urls(val, out_set):
             walk_urls(v, out_set)
-def section_deps(out, config_path):
+def section_deps(out, config_path, unmask=False):
     out.subsection("9.5 插件外部依赖")
     plugin_deps = {}
     if not (config_path and os.path.isfile(config_path)):
         out.item("未发现已启用插件的外部依赖配置")
         out.set_data("plugin_deps", {})
+        out.set_data("plugin_deps_status",
+                     {"found": False, "reason": "config_not_found",
+                      "checked": config_path or ""})
         return
     try:
         with open(config_path) as f:
@@ -464,8 +483,13 @@ def section_deps(out, config_path):
             walk_urls(pconf, hosts)
             hosts = {h for h in hosts if not h.startswith(("127.", "localhost", "0.0.0.0"))}
             plugin_deps[pid] = hosts
-    except Exception:
-        pass
+    except (OSError, json.JSONDecodeError) as e:
+        out.item(f"配置读取/解析失败: {type(e).__name__}")
+        out.set_data("plugin_deps", {})
+        out.set_data("plugin_deps_status",
+                     {"found": False, "reason": "config_unreadable",
+                      "checked": config_path, "error": str(e)[:200]})
+        return
     if not plugin_deps:
         out.item("未发现已启用插件的外部依赖配置")
@@ -509,7 +533,6 @@ def section_deps(out, config_path):
 def main() -> int:
     parser = cli.build_common_parser(
         description="模块 9：插件诊断",
-        prog="09_plugin_diag",
     )
     args = parser.parse_args()
     out = output.init("plugin_diag", json_mode=args.json, no_color=args.no_color)
@@ -519,14 +542,18 @@ def main() -> int:
     today_logs = sorted(glob.glob(os.path.join(args.log_dir, f"openclaw-{today}.log")))
     scan = scan_logs(today_logs)
-    configured = load_configured(args.config)
+    configured, configured_status = load_configured(args.config)
     extensions = load_extensions(args.openclaw_home)
+    if not configured_status.get("found", True):
+        out.item(f"配置加载失败: {configured_status.get('reason')} "
+                 f"({configured_status.get('checked')})")
+        out.set_data("configured_status", configured_status)
     section_state(out, scan, configured, extensions)
-    section_errors(out, scan, configured)
+    section_errors(out, scan, configured, unmask=args.unmask)
     section_hooks(out, scan)
     section_channels(out, scan)
-    section_deps(out, args.config)
+    section_deps(out, args.config, unmask=args.unmask)
     return out.done()

package/diag/10_shell_history.py CHANGED Viewed

@@ -12,6 +12,7 @@ from typing import List, Tuple
 sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
 from ocdiag import cli, output
+from ocdiag.sensitive import sanitize_text
 DANGEROUS_RE = re.compile(
@@ -34,26 +35,34 @@ def list_history_files() -> List[str]:
     return [c for c in candidates if os.path.isfile(c)]
-def read_lines(path: str) -> List[Tuple[int, str]]:
+def read_lines(path: str) -> Tuple[List[Tuple[int, str]], str]:
+    """Read history file. Returns (lines, error_str). error_str=='' on success.
+    Permission denied / missing files become an explicit error instead of an
+    empty list, so the caller can distinguish "no commands" from "couldn't read".
+    """
     out: List[Tuple[int, str]] = []
     try:
         with open(path, "r", errors="replace") as f:
             for i, line in enumerate(f, 1):
                 out.append((i, line.rstrip("\n")))
-    except OSError:
-        pass
-    return out
+        return out, ""
+    except OSError as e:
+        return out, f"{type(e).__name__}: {e}"
 def main() -> int:
     parser = cli.build_common_parser(
         description="模块 10：采集 shell 历史",
-        prog="10_shell_history",
     )
     args = parser.parse_args()
     out = output.init("shell_history", json_mode=args.json, no_color=args.no_color)
     out.section("模块 10：命令执行历史")
+    def maybe_sanitize(s: str) -> str:
+        return s if args.unmask else sanitize_text(s)
     out.line("  系统 shell 历史记录，用于判断是否有人或脚本执行过高危命令"
              "（rm -rf、kill、systemctl stop 等）。")
     out.line("")
@@ -66,7 +75,16 @@ def main() -> int:
     files_data = []
     for hfile in history_files:
-        lines = read_lines(hfile)
+        lines, read_err = read_lines(hfile)
+        if read_err:
+            out.item(f"{os.path.basename(hfile)} — 读取失败 ({read_err})")
+            files_data.append({
+                "path": hfile,
+                "found": False,
+                "reason": "unreadable",
+                "error": read_err,
+            })
+            continue
         total = len(lines)
         out.item(f"{os.path.basename(hfile)} — 共 {total} 条记录")
@@ -79,7 +97,7 @@ def main() -> int:
         if dangerous:
             out.item(f"  高危命令: {len(dangerous)} 条 ")
-            ev = "\n".join(f"{n}: {ln}" for n, ln in dangerous)
+            ev = "\n".join(f"{n}: {maybe_sanitize(ln)}" for n, ln in dangerous)
             out.evidence(f"{hfile} (高危)", ev)
         else:
             out.item("  高危命令: 0 条")
@@ -92,7 +110,7 @@ def main() -> int:
                 f"  OpenClaw 相关命令: 全文 {oc_total} 条，最近 30 条采样 {len(oc_cmds)} 条 — "
                 "用户手动执行的 openclaw 命令"
             )
-            ev = "\n".join(f"{n}: {ln}" for n, ln in oc_cmds)
+            ev = "\n".join(f"{n}: {maybe_sanitize(ln)}" for n, ln in oc_cmds)
             out.evidence(f"{hfile} (openclaw)", ev)
         else:
             out.item("  OpenClaw 相关命令: 0 条")
@@ -100,14 +118,14 @@ def main() -> int:
         recent = lines[-20:]
         if recent:
             out.item("  最近 20 条命令:")
-            ev = "\n".join(ln for _, ln in recent)
+            ev = "\n".join(maybe_sanitize(ln) for _, ln in recent)
             out.evidence(f"{hfile} (最近)", ev)
         files_data.append({
             "path": hfile,
             "total_lines": total,
             "dangerous_count": len(dangerous),
-            "dangerous": [{"line": n, "cmd": ln} for n, ln in dangerous],
+            "dangerous": [{"line": n, "cmd": maybe_sanitize(ln)} for n, ln in dangerous],
             "openclaw_count_total": oc_total,
             "openclaw_count_sample_30": len(oc_cmds),
             "recent_count": len(recent),

package/lib/bundle.py CHANGED Viewed

@@ -21,19 +21,12 @@ from typing import Dict, List, Optional
 REPO_ROOT = Path(__file__).resolve().parent.parent
-# ID → script filename, kept in sync with ocdiag/dispatcher.py:MODULES.
-MODULES: List[tuple] = [
-    ("sys_health",     "diag/01_sys_health.py"),
-    ("environment",    "diag/02_environment.py"),
-    ("configuration",  "diag/03_configuration.py"),
-    ("gateway",        "diag/04_gateway.py"),
-    ("recent_errors",  "diag/05_recent_errors.py"),
-    ("cron_jobs",      "diag/06_cron_jobs.py"),
-    ("performance",    "diag/07_performance.py"),
-    ("sessions",       "diag/08_sessions.py"),
-    ("plugin_diag",    "diag/09_plugin_diag.py"),
-    ("shell_history",  "diag/10_shell_history.py"),
-]
+# Single source of truth: ocdiag.dispatcher. We import it dynamically so the
+# bundle script stays runnable from a fresh checkout (no install needed).
+sys.path.insert(0, str(REPO_ROOT))
+from ocdiag.dispatcher import STATE_COLLECTORS  # noqa: E402
+MODULES: List[tuple] = [(mid, rel) for mid, _label, rel in STATE_COLLECTORS]
 MODULE_BY_ID = {mid: rel for mid, rel in MODULES}
 # Order matters: each submodule is exec'd into its own module object, and its

package/ocdiag/__init__.py CHANGED Viewed

@@ -1,3 +1,3 @@
 """ocdiag — shared library for openclaw-diag-cli scripts."""
-__version__ = "0.1.3"
+__version__ = "0.2.1"

package/ocdiag/cli.py CHANGED Viewed

@@ -1,14 +1,23 @@
-"""Common argparse setup for diag scripts."""
+"""Common argparse setup for diag scripts.
+When invoked via the dispatcher (`openclaw-diag <id>`), the dispatcher exports
+OPENCLAW_DIAG_PROG="openclaw-diag <id>" before running the script so argparse
+uses that as `prog`. When you run the script directly (e.g.
+`python3 diag/01_sys_health.py`), argparse falls back to the script basename.
+"""
 from __future__ import annotations
 import argparse
+import os
 from typing import Optional
 from . import paths
 def build_common_parser(description: str, prog: Optional[str] = None) -> argparse.ArgumentParser:
+    if prog is None:
+        prog = os.environ.get("OPENCLAW_DIAG_PROG") or None
     p = argparse.ArgumentParser(
         prog=prog,
         description=description,
@@ -36,4 +45,10 @@ def build_common_parser(description: str, prog: Optional[str] = None) -> argpars
     )
     p.add_argument("--json", action="store_true", help="Emit JSON output")
     p.add_argument("--no-color", action="store_true", help="Disable colored output")
+    p.add_argument(
+        "--unmask",
+        action="store_true",
+        help="Disable default sanitization of secrets in free-form text "
+             "(shell history / plugin errors / systemd / sessions). Off by default.",
+    )
     return p

package/ocdiag/dispatcher.py CHANGED Viewed

@@ -5,39 +5,42 @@ Layout:
   ocdiag <object-inspector> ARG runs that inspector  (e.g. `ocdiag trace UUID`)
   ocdiag all [--skip a,b]       runs every state collector
   ocdiag list                   prints the catalogue grouped by parameter mode
-  ocdiag run <id> [args...]     legacy alias retained for 0.1.x users
+  ocdiag bundle <id>            emits a self-contained single-file .py
+  ocdiag doctor                 environment health check
 """
 from __future__ import annotations
+import json
 import os
 import runpy
 import sys
 import time
+import traceback
 from pathlib import Path
-from typing import List
+from typing import List, Optional, Tuple
 REPO_ROOT = Path(__file__).resolve().parent.parent
 # State collectors: zero required args, parameter-free observation of system state.
 STATE_COLLECTORS = [
-    ("sys_health",     "系统健康检查",          "diag/01_sys_health.py"),
-    ("environment",    "OpenClaw 基础环境",     "diag/02_environment.py"),
-    ("configuration",  "配置展平（脱敏）",      "diag/03_configuration.py"),
-    ("gateway",        "Gateway 状态",          "diag/04_gateway.py"),
-    ("recent_errors",  "近期错误聚合",          "diag/05_recent_errors.py"),
-    ("cron_jobs",      "定时任务状态",          "diag/06_cron_jobs.py"),
-    ("performance",    "模型/工具性能",         "diag/07_performance.py"),
-    ("sessions",       "Session 数据",          "diag/08_sessions.py"),
-    ("plugin_diag",    "插件诊断",              "diag/09_plugin_diag.py"),
-    ("shell_history",  "Shell 历史",            "diag/10_shell_history.py"),
+    ("sys_health",     "系统健康（DNS / 网络 / CPU / 内存 / 磁盘 / 进程 / 时间）",  "diag/01_sys_health.py"),
+    ("environment",    "OpenClaw 版本、Gateway 进程环境变量",                       "diag/02_environment.py"),
+    ("configuration",  "openclaw.json 展平（敏感字段脱敏）",                        "diag/03_configuration.py"),
+    ("gateway",        "Gateway 进程、端口、24h 重启、WS 生命周期、错误码",         "diag/04_gateway.py"),
+    ("recent_errors",  "应用日志 / journalctl / session 工具调用错误聚合",           "diag/05_recent_errors.py"),
+    ("cron_jobs",      "定时任务状态、连续失败、调度漂移、静默检测",                "diag/06_cron_jobs.py"),
+    ("performance",    "模型/工具耗时 P50/P95、慢调用、E2E 延迟、Cache 命中率",     "diag/07_performance.py"),
+    ("sessions",       "Session 总览、活跃度、Stuck 探测",                          "diag/08_sessions.py"),
+    ("plugin_diag",    "插件状态一致性、ERROR/WARN、Hook、Channel、外部 DNS",       "diag/09_plugin_diag.py"),
+    ("shell_history",  "Shell 历史中的高危命令与最近操作",                          "diag/10_shell_history.py"),
 ]
 # Object inspectors: take a session uuid (or other identifier) and inspect it.
 OBJECT_INSPECTORS = [
-    ("trace",   "追踪用户消息时间轴",  "tools/oc_session_trace.py"),
-    ("extract", "导出 session 为可读格式", "tools/oc_session_extract.py"),
+    ("trace",   "追踪一条用户消息从进入到响应的完整时间轴", "tools/oc_session_trace.py"),
+    ("extract", "导出 session.jsonl 为可读格式",            "tools/oc_session_extract.py"),
 ]
 STATE_BY_ID = {mid: (label, script) for mid, label, script in STATE_COLLECTORS}
@@ -46,31 +49,67 @@ MODULE_BY_ID = {**STATE_BY_ID, **OBJECT_BY_ID}
 MODULE_IDS = set(MODULE_BY_ID.keys())
+def cmd_list_json() -> int:
+    """Machine-readable module catalogue. Single source of truth consumed
+    by the Node shell and the bundle script (axiom #3)."""
+    payload = {
+        "state_collectors": [
+            {"id": mid, "label": label, "script": rel}
+            for mid, label, rel in STATE_COLLECTORS
+        ],
+        "object_inspectors": [
+            {"id": mid, "label": label, "script": rel}
+            for mid, label, rel in OBJECT_INSPECTORS
+        ],
+    }
+    print(json.dumps(payload, ensure_ascii=False))
+    return 0
 def cmd_list() -> int:
-    print("Available diagnostics:")
+    print("openclaw-diag — 可用诊断")
     print()
-    print("  State collectors (no args needed):")
+    print("  扫描类（无需参数）：")
     for mid, label, _ in STATE_COLLECTORS:
         print(f"    {mid:<16s} {label}")
     print()
-    print("  Object inspectors (require session uuid):")
+    print("  对象类（需要 session uuid）：")
     for mid, label, _ in OBJECT_INSPECTORS:
         print(f"    {mid:<16s} {label}")
     print()
-    print("  Meta:")
-    print("    all              跑全部 state collectors")
-    print("    doctor           检查 Node/Python/OpenClaw 环境")
-    print("    bundle <id>      打包成 self-contained 单文件")
+    print("  其它命令：")
+    print("    all              一次跑完所有扫描类")
+    print("    doctor           检查 Node / Python / openclaw-diag / OpenClaw 环境")
+    print("    bundle <id>      生成 self-contained 单文件 .py（离线机器用）")
     return 0
-def run_script(script_rel: str, extra_args: List[str]) -> int:
+def run_script(
+    script_rel: str,
+    extra_args: List[str],
+    module_id: Optional[str] = None,
+) -> int:
+    """Execute a diag script in-process. Returns the rc.
+    On crash, in addition to the human-readable stderr trace we emit a single
+    NDJSON error record to stdout when the script was invoked with --json.
+    This guarantees `all --json` produces N records for N modules — including
+    crashes — so downstream parsers don't silently lose modules. (Axiom #4)
+    """
     script_path = REPO_ROOT / script_rel
     if not script_path.is_file():
         print(f"Error: script not found: {script_path}", file=sys.stderr)
         return 2
+    json_mode = "--json" in extra_args
+    mid = module_id or script_path.stem
     saved_argv = sys.argv[:]
+    saved_prog = os.environ.get("OPENCLAW_DIAG_PROG")
     try:
+        # runpy.run_path resets sys.argv[0] to the script path, so we
+        # advertise the user-facing name through an env var instead. cli.py
+        # picks it up as the argparse prog so --help reads as
+        # "openclaw-diag sys_health" rather than "01_sys_health.py".
+        os.environ["OPENCLAW_DIAG_PROG"] = f"openclaw-diag {mid}"
         sys.argv = [str(script_path), *extra_args]
         runpy.run_path(str(script_path), run_name="__main__")
         return 0
@@ -79,14 +118,32 @@ def run_script(script_rel: str, extra_args: List[str]) -> int:
             return int(e.code) if e.code is not None else 0
         except (TypeError, ValueError):
             return 1
-    except Exception as e:
+    except BaseException as e:  # noqa: BLE001 — emit then re-classify
         print(f"  ERROR: {script_path.name} crashed: {type(e).__name__}: {e}",
               file=sys.stderr)
-        import traceback
         traceback.print_exc(file=sys.stderr)
+        if json_mode:
+            err_record = {
+                "module": mid,
+                "status": "error",
+                "error": f"{type(e).__name__}: {e}",
+                "traceback": traceback.format_exc(),
+            }
+            try:
+                sys.stdout.write(json.dumps(err_record, ensure_ascii=False) + "\n")
+                sys.stdout.flush()
+            except Exception:
+                # If stdout itself is broken (closed pipe), there's nothing
+                # productive to do — the stderr trace above already records
+                # the crash.
+                pass
         return 2
     finally:
         sys.argv = saved_argv
+        if saved_prog is None:
+            os.environ.pop("OPENCLAW_DIAG_PROG", None)
+        else:
+            os.environ["OPENCLAW_DIAG_PROG"] = saved_prog
 def cmd_all(extra_args: List[str], skip_ids: List[str]) -> int:
@@ -101,7 +158,7 @@ def cmd_all(extra_args: List[str], skip_ids: List[str]) -> int:
         n += 1
         print(f"\n[{n}/{total}] {label} ({mid})...", flush=True, file=progress_stream)
         t0 = time.time()
-        rc = run_script(script, extra_args)
+        rc = run_script(script, extra_args, module_id=mid)
         elapsed = time.time() - t0
         print(f"[{n}/{total}] {label} ({mid}) ... done ({elapsed:.1f}s)",
               flush=True, file=progress_stream)
@@ -110,7 +167,27 @@ def cmd_all(extra_args: List[str], skip_ids: List[str]) -> int:
     return rc_overall
-def _split_skip(rest: List[str]) -> (List[str], List[str]):
+def cmd_bundle(rest: List[str]) -> int:
+    """Generate a self-contained single-file diag script.
+    Lives here (rather than in lib/bundle.py only) so the Python entry has
+    parity with Node — `python3 bin/ocdiag bundle gateway` works the same as
+    `node bin/openclaw-diag.js bundle gateway`. (Axiom #3)
+    """
+    if not rest or rest[0] in ("-h", "--help"):
+        print("Usage: openclaw-diag bundle <id>", file=sys.stderr)
+        print("       Emits the bundle to stdout. Use shell redirection to save.", file=sys.stderr)
+        print(file=sys.stderr)
+        print("Available ids:", file=sys.stderr)
+        for mid, _label, _ in STATE_COLLECTORS:
+            print(f"  {mid}", file=sys.stderr)
+        return 0 if rest else 2
+    sys.path.insert(0, str(REPO_ROOT / "lib"))
+    import bundle  # type: ignore
+    return bundle.main(rest)
+def _split_skip(rest: List[str]) -> Tuple[List[str], List[str]]:
     """Pull out --skip a,b out of an argv tail; return (skip_ids, passthrough)."""
     skip_ids: List[str] = []
     passthrough: List[str] = []
@@ -126,22 +203,30 @@ def _split_skip(rest: List[str]) -> (List[str], List[str]):
     return skip_ids, passthrough
+def _suggest_command(unknown: str) -> Optional[str]:
+    """Best-effort typo suggestion for a misspelled command."""
+    import difflib
+    candidates = list(MODULE_BY_ID.keys()) + ["all", "list", "doctor", "bundle"]
+    matches = difflib.get_close_matches(unknown, candidates, n=1, cutoff=0.6)
+    return matches[0] if matches else None
 def print_help() -> None:
-    print("ocdiag — OpenClaw 诊断工具箱")
+    print("openclaw-diag — OpenClaw 诊断工具箱")
     print()
-    print("Usage:")
-    print("  ocdiag <id> [args...]            跑单个诊断（state collector 或 object inspector）")
-    print("  ocdiag all [--skip a,b]          跑全部 state collectors")
-    print("  ocdiag list                      列出所有诊断")
-    print("  ocdiag run <id> [args...]        旧用法别名（0.1.x 兼容）")
+    print("用法：")
+    print("  openclaw-diag <id> [args...]      跑单个诊断")
+    print("  openclaw-diag all [--skip a,b]    跑全部 state collectors")
+    print("  openclaw-diag list                列出所有诊断")
+    print("  openclaw-diag doctor              检查环境")
+    print("  openclaw-diag bundle <id>         生成单文件 .py")
     print()
-    print("State collectors:")
+    print("扫描类（无需参数）：")
     print("  " + "  ".join(mid for mid, _, _ in STATE_COLLECTORS))
-    print("Object inspectors:")
+    print("对象类（需要 session uuid）：")
     print("  " + "  ".join(mid for mid, _, _ in OBJECT_INSPECTORS))
     print()
-    print("--skip 后接逗号分隔 id 列表（仅对 all 有意义）。")
-    print("其它参数（--config / --log-dir / --json / --no-color）原样传递给脚本。")
+    print("常用 flag：--json（结构化输出）  --no-color（关掉颜色）  --unmask（不脱敏）")
 def main(argv=None) -> int:
@@ -154,33 +239,35 @@ def main(argv=None) -> int:
     head, rest = argv[0], argv[1:]
     if head == "list":
+        if "--json" in rest:
+            return cmd_list_json()
         return cmd_list()
+    if head == "doctor":
+        from ocdiag import doctor
+        json_mode = "--json" in rest
+        node_version = None
+        for i, a in enumerate(rest):
+            if a == "--node-version" and i + 1 < len(rest):
+                node_version = rest[i + 1]
+                break
+        return doctor.run(json_mode=json_mode, node_version=node_version)
     if head == "all":
         skip_ids, passthrough = _split_skip(rest)
         return cmd_all(passthrough, skip_ids)
-    # Backward-compat alias: `ocdiag run <id> [args...]` still works.
-    if head == "run":
-        if not rest:
-            print("Error: run requires a target (module id or 'all').", file=sys.stderr)
-            return 2
-        target, sub = rest[0], rest[1:]
-        if target == "all":
-            skip_ids, passthrough = _split_skip(sub)
-            return cmd_all(passthrough, skip_ids)
-        if target in MODULE_BY_ID:
-            _, script = MODULE_BY_ID[target]
-            return run_script(script, sub)
-        print(f"Error: unknown diagnostic '{target}'. Use `ocdiag list`.", file=sys.stderr)
-        return 2
+    if head == "bundle":
+        return cmd_bundle(rest)
     if head in MODULE_BY_ID:
         _, script = MODULE_BY_ID[head]
-        return run_script(script, rest)
+        return run_script(script, rest, module_id=head)
-    print(f"Error: unknown command '{head}'. Use `ocdiag list` to see available diagnostics.",
-          file=sys.stderr)
+    suggestion = _suggest_command(head)
+    hint = f"（你是不是想说 `{suggestion}`？）" if suggestion else ""
+    print(f"Error: 未知命令 '{head}'{hint}", file=sys.stderr)
+    print(f"运行 `openclaw-diag list` 查看全部诊断。", file=sys.stderr)
     return 2