npm - openclaw-diag-cli - Versions diffs - 0.1.3 → 0.2.1 - Mend

openclaw-diag-cli 0.1.3 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

package/README.md +84 -71
package/bin/openclaw-diag.js +65 -176
package/diag/01_sys_health.py +0 -2
package/diag/02_environment.py +32 -6
package/diag/03_configuration.py +4 -1
package/diag/04_gateway.py +30 -8
package/diag/05_recent_errors.py +24 -14
package/diag/06_cron_jobs.py +4 -41
package/diag/07_performance.py +114 -42
package/diag/08_sessions.py +2 -54
package/diag/09_plugin_diag.py +52 -25
package/diag/10_shell_history.py +28 -10
package/lib/bundle.py +6 -13
package/ocdiag/__init__.py +1 -1
package/ocdiag/cli.py +16 -1
package/ocdiag/dispatcher.py +140 -53
package/ocdiag/doctor.py +162 -0
package/ocdiag/jsonlog.py +0 -5
package/ocdiag/paths.py +0 -1
package/ocdiag/recent_logs.py +0 -3
package/ocdiag/sensitive.py +95 -1
package/ocdiag/timeutil.py +0 -11
package/ocdiag/tokens.py +0 -4
package/package.json +2 -2
package/tools/oc_session_extract.py +75 -7
package/tools/oc_session_trace.py +31 -9

package/ocdiag/doctor.py ADDED Viewed

@@ -0,0 +1,162 @@
+"""``ocdiag doctor`` — environment health-check.
+Sole authoritative implementation. Both Node (`bin/openclaw-diag.js doctor`)
+and Python (`bin/ocdiag doctor` / `python3 -m ocdiag.doctor`) entry points
+call this function. The Node entry is now a thin spawn wrapper.
+Checks:
+  - Python version (>= 3.8)
+  - ocdiag package importable + version
+  - All registered diag scripts respond to ``--help``
+  - openclaw.json exists at expected path
+Node version isn't visible from Python so we accept it as a passthrough
+argument; if absent, doctor reports node check as ``skipped``.
+"""
+from __future__ import annotations
+import argparse
+import json
+import os
+import subprocess
+import sys
+from pathlib import Path
+from typing import Optional
+REPO_ROOT = Path(__file__).resolve().parent.parent
+def _node_status(node_version: Optional[str]) -> dict:
+    if not node_version:
+        return {"version": None, "ok": True, "skipped": True,
+                "reason": "Node check is performed by the Node entry; "
+                          "ocdiag is fine without Node when invoked from Python"}
+    # Normalize: accept "v22.22.2" or "22.22.2"
+    normalized = node_version.lstrip("v")
+    try:
+        major = int(normalized.split(".", 1)[0])
+    except ValueError:
+        return {"version": normalized, "ok": False, "reason": "unparseable"}
+    return {"version": normalized, "ok": major >= 18,
+            "required": ">=18"}
+def _python_status() -> dict:
+    v = sys.version_info
+    return {
+        "version": f"{v.major}.{v.minor}.{v.micro}",
+        "ok": v >= (3, 8),
+        "required": ">=3.8",
+        "executable": sys.executable,
+    }
+def _ocdiag_status() -> dict:
+    try:
+        import ocdiag  # type: ignore
+        return {"ok": True, "version": getattr(ocdiag, "__version__", "?")}
+    except ImportError as e:
+        return {"ok": False, "error": str(e)[:200]}
+def _diag_scripts_status() -> dict:
+    from ocdiag.dispatcher import STATE_COLLECTORS, OBJECT_INSPECTORS
+    failed = []
+    all_scripts = []
+    for mid, _label, rel in (*STATE_COLLECTORS, *OBJECT_INSPECTORS):
+        all_scripts.append((mid, REPO_ROOT / rel))
+    for mid, path in all_scripts:
+        if not path.is_file():
+            failed.append({"script": mid, "reason": "missing", "path": str(path)})
+            continue
+        r = subprocess.run(
+            [sys.executable, str(path), "--help"],
+            capture_output=True, text=True, timeout=10, check=False,
+        )
+        if r.returncode != 0:
+            failed.append({
+                "script": mid,
+                "rc": r.returncode,
+                "stderr": (r.stderr or "")[:200],
+            })
+    return {"ok": not failed, "total": len(all_scripts), "failed": failed}
+def _openclaw_config_status() -> dict:
+    home = os.path.expanduser("~")
+    cfg = os.environ.get("OPENCLAW_CONFIG") or os.path.join(
+        os.environ.get("OPENCLAW_HOME", os.path.join(home, ".openclaw")),
+        "openclaw.json",
+    )
+    return {"path": cfg, "exists": os.path.isfile(cfg)}
+def run(json_mode: bool = False, node_version: Optional[str] = None) -> int:
+    """Execute the doctor check. Returns rc (0 if everything OK, 1 otherwise)."""
+    result = {
+        "node": _node_status(node_version),
+        "python": _python_status(),
+        "ocdiag": _ocdiag_status(),
+        "diag_scripts": _diag_scripts_status(),
+        "openclaw_config": _openclaw_config_status(),
+    }
+    ok = (
+        result["node"].get("ok", True)
+        and result["python"]["ok"]
+        and result["ocdiag"]["ok"]
+        and result["diag_scripts"]["ok"]
+    )
+    if json_mode:
+        print(json.dumps(result, ensure_ascii=False, indent=2))
+    else:
+        node = result["node"]
+        if node.get("skipped"):
+            print(f"ℹ Node check skipped (run via npx to verify Node version)")
+        elif node["ok"]:
+            print(f"✓ Node v{node['version']}")
+        else:
+            print(f"✗ Node v{node.get('version','?')} (need {node.get('required','?')})")
+        py = result["python"]
+        mark = "✓" if py["ok"] else "✗"
+        print(f"{mark} Python {py['version']} ({py['executable']})")
+        oc = result["ocdiag"]
+        if oc["ok"]:
+            print(f"✓ ocdiag package importable (version {oc['version']})")
+        else:
+            print(f"✗ ocdiag package not importable: {oc.get('error','?')}")
+        ds = result["diag_scripts"]
+        if ds["ok"]:
+            print(f"✓ All {ds['total']} diagnostics respond to --help")
+        else:
+            print(f"✗ {len(ds['failed'])}/{ds['total']} diagnostics failed --help:")
+            for f in ds["failed"]:
+                print(f"    {f.get('script','?')} (rc={f.get('rc','?')})")
+        cfg = result["openclaw_config"]
+        if cfg["exists"]:
+            print(f"✓ OpenClaw config present ({cfg['path']})")
+        else:
+            print(f"ℹ OpenClaw config not found ({cfg['path']}) — diagnostics will run but report missing")
+    return 0 if ok else 1
+def main(argv=None) -> int:
+    p = argparse.ArgumentParser(prog="ocdiag-doctor",
+                                 description="Health-check the ocdiag install + environment")
+    p.add_argument("--json", action="store_true", help="Emit JSON output")
+    p.add_argument("--node-version", default=None,
+                   help="Node version string (e.g. '20.12.1') passed in by the Node "
+                        "shell. Omit when running from Python directly.")
+    args = p.parse_args(argv)
+    return run(json_mode=args.json, node_version=args.node_version)
+if __name__ == "__main__":
+    sys.exit(main())

package/ocdiag/jsonlog.py CHANGED Viewed

@@ -58,8 +58,3 @@ def parse_name(obj: Dict[str, Any]) -> Tuple[Optional[str], Optional[str]]:
     return p.get("plugin"), p.get("subsystem")
-def log_level(obj: Dict[str, Any]) -> str:
-    meta = obj.get("_meta") or {}
-    if isinstance(meta, dict):
-        return meta.get("logLevelName", "") or ""
-    return ""

package/ocdiag/paths.py CHANGED Viewed

@@ -3,7 +3,6 @@
 from __future__ import annotations
 import os
-from pathlib import Path
 def _env_path(name: str, default: str) -> str:

package/ocdiag/recent_logs.py CHANGED Viewed

@@ -48,6 +48,3 @@ def latest_app_log(log_dir: str) -> Optional[str]:
     return matched[0][1]
-def all_logs(log_dir: str) -> List[str]:
-    pattern = os.path.join(log_dir, "openclaw-*.log")
-    return sorted(glob.glob(pattern))

package/ocdiag/sensitive.py CHANGED Viewed

@@ -1,4 +1,23 @@
-"""Mask sensitive config values (keys, secrets, tokens)."""
+"""Mask sensitive config values + sanitize free-form text.
+Two layers:
+1. ``mask`` / ``safe_val`` / ``is_sensitive_key`` — used when we already know
+   we're looking at a config key/value pair (configuration flatten, env vars).
+   Masking is keyed off the *key name*.
+2. ``sanitize_text`` — used when scanning free-form text (shell history lines,
+   plugin error messages, systemd unit files, session message bodies). We don't
+   know the structure, so we run a pattern-based scrubber. Best-effort: the
+   patterns below cover the common token shapes (Anthropic/OpenAI sk-, GitHub
+   ghp_/gho_/ghs_/github_pat_, npm npm_, AWS AKIA, ``Bearer xxx``, URL
+   credentials, ``KEY=value`` with secret-ish key). It will miss bespoke or
+   obfuscated formats — callers who need stronger guarantees should mask the
+   whole field.
+The ``--unmask`` flag, declared in ``ocdiag.cli``, propagates to call sites
+that opt-in to honouring it (currently the session extract tool).
+"""
 from __future__ import annotations
@@ -39,3 +58,78 @@ def safe_val(key: str, val, max_len: int = 300) -> str:
         return mask(val) if val else '""'
     s = str(val)
     return s[:max_len] + "..." if len(s) > max_len else s
+# ── sanitize_text ──
+# Token shapes worth scrubbing by themselves (no key=value context).
+# Each pattern matches the *whole* secret; we replace with `<***>` keeping
+# the leading prefix so the reader can still tell what kind of secret it was.
+_TOKEN_PATTERNS = [
+    # Anthropic / OpenAI style (`sk-...` / `sk-ant-...`)
+    (re.compile(r"\b(sk-(?:ant-)?[A-Za-z0-9_\-]{16,})"), "sk-<***>"),
+    # GitHub PAT family
+    (re.compile(r"\b(gh[posu]_[A-Za-z0-9]{20,})"), "<gh-token>"),
+    (re.compile(r"\b(github_pat_[A-Za-z0-9_]{20,})"), "<github_pat>"),
+    # npm
+    (re.compile(r"\b(npm_[A-Za-z0-9]{30,})"), "<npm_token>"),
+    # AWS access key id
+    (re.compile(r"\b(AKIA[0-9A-Z]{16})"), "<AKIA-***>"),
+    # Authorization headers
+    (re.compile(r"(Bearer\s+)([A-Za-z0-9_\-\.=]{8,})", re.IGNORECASE), r"\1<***>"),
+    # URLs with embedded credentials: scheme://user:pass@host
+    (re.compile(r"([a-zA-Z][a-zA-Z0-9+\-.]*://)([^/\s:@]+):([^/\s@]+)@"), r"\1<user>:<***>@"),
+]
+# KEY=VALUE / KEY: VALUE in free text where the key looks secret-ish.
+# Use SENSITIVE_PATTERN over the key name; match value up to whitespace, quote,
+# or end-of-line.  Three forms:
+#   KEY=value         (env var, dotenv)
+#   KEY="value"       (shell quoted)
+#   KEY: value        (yaml-ish)
+_KV_BARE = re.compile(
+    r"\b([A-Za-z_][A-Za-z0-9_\-\.]*"
+    r"(?:KEY|SECRET|TOKEN|PASSWORD|CREDENTIAL|AUTH|PRIVATE|SIGNING)[A-Za-z0-9_\-\.]*)"
+    r"\s*=\s*([^\s\"';#]+)",
+    re.IGNORECASE,
+)
+_KV_QUOTED = re.compile(
+    r"\b([A-Za-z_][A-Za-z0-9_\-\.]*"
+    r"(?:KEY|SECRET|TOKEN|PASSWORD|CREDENTIAL|AUTH|PRIVATE|SIGNING)[A-Za-z0-9_\-\.]*)"
+    r"\s*=\s*([\"'])([^\"']+)\2",
+    re.IGNORECASE,
+)
+_KV_COLON = re.compile(
+    r"\b([A-Za-z_][A-Za-z0-9_\-\.]*"
+    r"(?:KEY|SECRET|TOKEN|PASSWORD|CREDENTIAL|AUTH|PRIVATE|SIGNING)[A-Za-z0-9_\-\.]*)"
+    r"\s*:\s*([^\s\"';#,}\]]+)",
+    re.IGNORECASE,
+)
+def sanitize_text(text: str, context: str = "generic") -> str:
+    """Scrub well-known secret shapes from free-form text.
+    Best-effort, not a guarantee. Returns the text unchanged if it's not a str.
+    """
+    if not isinstance(text, str) or not text:
+        return text
+    # Order: longer/more-specific (KV with quotes) first, then bare KV, then
+    # bare token shapes. KV passes also catch things like `API_KEY=abc` where
+    # the value would not match a token pattern.
+    def _kv_quoted_sub(m):
+        return f"{m.group(1)}={m.group(2)}<***>{m.group(2)}"
+    def _kv_bare_sub(m):
+        return f"{m.group(1)}=<***>"
+    def _kv_colon_sub(m):
+        return f"{m.group(1)}: <***>"
+    text = _KV_QUOTED.sub(_kv_quoted_sub, text)
+    text = _KV_BARE.sub(_kv_bare_sub, text)
+    text = _KV_COLON.sub(_kv_colon_sub, text)
+    for pat, repl in _TOKEN_PATTERNS:
+        text = pat.sub(repl, text)
+    return text

package/ocdiag/timeutil.py CHANGED Viewed

@@ -37,17 +37,6 @@ def fmt_duration(sec) -> str:
     return f"{s/3600:.1f}h"
-def fmt_duration_ms(ms) -> str:
-    if ms is None:
-        return "?"
-    s = float(ms) / 1000.0
-    if s < 60:
-        return f"{s:.1f}s"
-    if s < 3600:
-        return f"{s/60:.1f}min"
-    return f"{s/3600:.1f}h"
 def fmt_age(ms_delta) -> str:
     s = abs(float(ms_delta)) / 1000
     if s < 60:

package/ocdiag/tokens.py CHANGED Viewed

@@ -16,10 +16,6 @@ def fmt_tokens(n) -> str:
     return str(n)
-def fmt_k(n) -> str:
-    return fmt_tokens(n)
 def percentile(sorted_list: List[float], p: float) -> Optional[float]:
     if not sorted_list:
         return None

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "openclaw-diag-cli",
-  "version": "0.1.3",
-  "description": "OpenClaw read-only diagnostic CLI. Zero-dependency Python scripts wrapped in Node for npx-friendly install.",
+  "version": "0.2.1",
+  "description": "OpenClaw observer-only diagnostic CLI. Zero-dependency Python scripts wrapped in Node for npx-friendly install.",
   "keywords": [
     "openclaw",
     "diagnostic",

package/tools/oc_session_extract.py CHANGED Viewed

@@ -9,11 +9,12 @@ import json
 import os
 import sys
 from pathlib import Path
-from typing import Iterator, List, Optional, TextIO, Tuple
+from typing import List, Optional, TextIO, Tuple
 sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
 from ocdiag import paths
+from ocdiag.sensitive import sanitize_text
 DEFAULT_BASE_DIR = paths.SESSIONS_BASE
@@ -40,6 +41,29 @@ def classify_state(filename: str) -> str:
     return "unknown"
+def _recent_session_ids(base_dir, limit=5):
+    """Return the most-recently-modified active session UUIDs."""
+    found: List[Tuple[float, str]] = []
+    for ad in glob.glob(os.path.join(base_dir, "*")):
+        sd = os.path.join(ad, "sessions")
+        if not os.path.isdir(sd):
+            continue
+        for entry in os.listdir(sd):
+            if not entry.endswith(".jsonl"):
+                continue
+            if ".trajectory" in entry or ".jsonl.reset." in entry:
+                continue
+            path = os.path.join(sd, entry)
+            try:
+                mtime = os.path.getmtime(path)
+            except OSError:
+                continue
+            sid = entry[:-len(".jsonl")]
+            found.append((mtime, sid))
+    found.sort(reverse=True)
+    return [sid for _, sid in found[:limit]]
 def find_session_files(session_id, base_dir=DEFAULT_BASE_DIR, agent=None):
     if agent:
         agent_dirs = [os.path.join(base_dir, agent)]
@@ -85,23 +109,57 @@ def write_header(out, path, state):
     out.write(SEPARATOR + "\n\n")
-def extract_file(path, state, out, pretty=True, type_filter=None):
+def _sanitize_record(obj):
+    """Walk a session record and scrub free-form text content fields.
+    Sessions store user/assistant messages under ``message.content``. We don't
+    rewrite tool args or metadata: those keep structure that matters for
+    diagnosis. We only scrub free-form prose where secrets typically live
+    (user-pasted tokens, error tracebacks).
+    """
+    if not isinstance(obj, dict):
+        return obj
+    msg = obj.get("message")
+    if isinstance(msg, dict):
+        content = msg.get("content")
+        if isinstance(content, str):
+            msg["content"] = sanitize_text(content)
+        elif isinstance(content, list):
+            for part in content:
+                if isinstance(part, dict):
+                    for k in ("text", "content"):
+                        v = part.get(k)
+                        if isinstance(v, str):
+                            part[k] = sanitize_text(v)
+        # Also scrub any top-level text-ish fields the gateway may have set.
+        for k in ("text", "summary"):
+            v = msg.get(k)
+            if isinstance(v, str):
+                msg[k] = sanitize_text(v)
+    return obj
+def extract_file(path, state, out, pretty=True, type_filter=None, sanitize=True):
     write_header(out, path, state)
     written = 0
     for line_no, obj, raw, err in stream_records(path):
         if err is not None:
             out.write(f"--- Record {line_no} [PARSE ERROR: {err}] ---\n")
-            out.write(raw + "\n\n")
+            out.write((sanitize_text(raw) if sanitize else raw) + "\n\n")
             written += 1
             continue
         rtype = obj.get("type", "?") if isinstance(obj, dict) else "?"
         if type_filter is not None and rtype not in type_filter:
             continue
         out.write(f"--- Record {line_no} [type: {rtype}] ---\n")
+        if sanitize:
+            obj = _sanitize_record(obj)
         if pretty:
             out.write(json.dumps(obj, indent=2, ensure_ascii=False))
         else:
-            out.write(raw)
+            # Non-pretty mode: emit the (possibly sanitized) JSON or fall back
+            # to the original raw line if we didn't touch it.
+            out.write(json.dumps(obj, ensure_ascii=False) if sanitize else raw)
         out.write("\n\n")
         written += 1
     return written
@@ -178,6 +236,7 @@ def select_files(files, extract_all, _out):
 def main() -> int:
     p = argparse.ArgumentParser(
+        prog=os.environ.get("OPENCLAW_DIAG_PROG") or None,
         description="Extract OpenClaw session JSONL files into human-readable format.",
         formatter_class=argparse.ArgumentDefaultsHelpFormatter,
     )
@@ -192,15 +251,24 @@ def main() -> int:
     p.add_argument("--types", help="Filter by record type (comma-separated, e.g. 'message,toolCall')")
     p.add_argument("--summary", action="store_true",
                    help="Show record-count summary instead of full extraction")
+    p.add_argument("--unmask", action="store_true",
+                   help="Disable default sanitization of secret-shaped substrings "
+                        "in message content (off = scrubbed)")
     args = p.parse_args()
     files = find_session_files(args.session_id, args.base_dir, args.agent)
     if not files:
         sys.stderr.write(
-            f"Error: no files found for session ID '{args.session_id}' under {args.base_dir}"
-            + (f" (agent={args.agent})" if args.agent else "")
+            f"Error: 找不到 session '{args.session_id}'（在 {args.base_dir} 下）"
+            + (f" agent={args.agent}" if args.agent else "")
             + "\n"
         )
+        suggestions = _recent_session_ids(args.base_dir, limit=5)
+        if suggestions:
+            sys.stderr.write("  最近的 5 个 session：\n")
+            for sid in suggestions:
+                sys.stderr.write(f"    {sid}\n")
+            sys.stderr.write("  提示：完整 UUID 或前缀（至少 8 位）都可。\n")
         return 1
     if args.list:
@@ -231,7 +299,7 @@ def main() -> int:
                 summarize_file(path, state, out_fp)
             else:
                 extract_file(path, state, out_fp, pretty=not args.no_pretty,
-                             type_filter=type_filter)
+                             type_filter=type_filter, sanitize=not args.unmask)
     except BrokenPipeError:
         try:
             sys.stdout.flush()

package/tools/oc_session_trace.py CHANGED Viewed

@@ -52,14 +52,6 @@ def fmt_duration(ms: float) -> str:
     return f"{m}m{s:.1f}s"
-def human_size(n: int) -> str:
-    for unit in ("B", "KB", "MB", "GB"):
-        if n < 1024:
-            return f"{n:.1f} {unit}" if unit != "B" else f"{n} {unit}"
-        n /= 1024
-    return f"{n:.1f} TB"
 def extract_text(content: Any) -> str:
     if isinstance(content, str):
         return content
@@ -112,6 +104,29 @@ def find_session_file(
     return candidates[0][0] if candidates else None
+def _recent_session_ids(base_dir: str, limit: int = 5) -> List[str]:
+    """Return the most-recently-modified active session UUIDs (no .reset/.bak/.deleted)."""
+    found: List[Tuple[float, str]] = []
+    for ad in glob.glob(os.path.join(base_dir, "*")):
+        sd = os.path.join(ad, "sessions")
+        if not os.path.isdir(sd):
+            continue
+        for entry in os.listdir(sd):
+            if not entry.endswith(".jsonl"):
+                continue
+            if ".trajectory" in entry or ".jsonl.reset." in entry:
+                continue
+            path = os.path.join(sd, entry)
+            try:
+                mtime = os.path.getmtime(path)
+            except OSError:
+                continue
+            sid = entry[:-len(".jsonl")]
+            found.append((mtime, sid))
+    found.sort(reverse=True)
+    return [sid for _, sid in found[:limit]]
 def find_trajectory_file(session_file: str) -> Optional[str]:
     d = os.path.dirname(session_file)
     base = os.path.basename(session_file).split(".jsonl")[0]
@@ -634,6 +649,7 @@ def format_json(session_id, session_file, user_msg_index, user_msg_id, analysis,
 def main():
     parser = argparse.ArgumentParser(
+        prog=os.environ.get("OPENCLAW_DIAG_PROG") or None,
         description="Trace the processing timeline of a user message in an OpenClaw session.",
         formatter_class=argparse.ArgumentDefaultsHelpFormatter,
     )
@@ -652,8 +668,14 @@ def main():
     session_file = find_session_file(args.session_id, args.base_dir, args.agent)
     if not session_file:
-        print(f"Error: no session file found for '{args.session_id}' under {args.base_dir}",
+        print(f"Error: 找不到 session '{args.session_id}'（在 {args.base_dir} 下）",
               file=sys.stderr)
+        suggestions = _recent_session_ids(args.base_dir, limit=5)
+        if suggestions:
+            print(f"  最近的 5 个 session：", file=sys.stderr)
+            for sid in suggestions:
+                print(f"    {sid}", file=sys.stderr)
+            print(f"  提示：UUID 完整 36 位，前缀也可（至少 8 位）。", file=sys.stderr)
         sys.exit(1)
     records = load_records(session_file)