npm - openclaw-diag-cli - Versions diffs - 0.1.3 → 0.2.2 - Mend

openclaw-diag-cli 0.1.3 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

package/README.md +83 -71
package/bin/ocdiag +0 -1
package/bin/openclaw-diag.js +65 -176
package/diag/01_sys_health.py +0 -2
package/diag/02_environment.py +32 -6
package/diag/03_configuration.py +4 -1
package/diag/04_gateway.py +30 -8
package/diag/05_recent_errors.py +24 -14
package/diag/06_cron_jobs.py +4 -41
package/diag/07_performance.py +114 -42
package/diag/08_sessions.py +2 -54
package/diag/09_plugin_diag.py +52 -25
package/diag/10_shell_history.py +28 -10
package/lib/__pycache__/bundle.cpython-310.pyc +0 -0
package/lib/bundle.py +6 -13
package/ocdiag/__init__.py +1 -1
package/ocdiag/__pycache__/__init__.cpython-310.pyc +0 -0
package/ocdiag/__pycache__/cli.cpython-310.pyc +0 -0
package/ocdiag/__pycache__/dispatcher.cpython-310.pyc +0 -0
package/ocdiag/__pycache__/doctor.cpython-310.pyc +0 -0
package/ocdiag/__pycache__/jsonlog.cpython-310.pyc +0 -0
package/ocdiag/__pycache__/output.cpython-310.pyc +0 -0
package/ocdiag/__pycache__/paths.cpython-310.pyc +0 -0
package/ocdiag/__pycache__/recent_logs.cpython-310.pyc +0 -0
package/ocdiag/__pycache__/sensitive.cpython-310.pyc +0 -0
package/ocdiag/__pycache__/sessions.cpython-310.pyc +0 -0
package/ocdiag/__pycache__/timeutil.cpython-310.pyc +0 -0
package/ocdiag/__pycache__/tokens.cpython-310.pyc +0 -0
package/ocdiag/cli.py +16 -1
package/ocdiag/dispatcher.py +140 -53
package/ocdiag/doctor.py +162 -0
package/ocdiag/jsonlog.py +0 -5
package/ocdiag/paths.py +0 -17
package/ocdiag/recent_logs.py +0 -3
package/ocdiag/sensitive.py +95 -1
package/ocdiag/sessions.py +161 -0
package/ocdiag/timeutil.py +0 -11
package/ocdiag/tokens.py +0 -4
package/package.json +2 -2
package/tools/oc_session_extract.py +190 -67
package/tools/oc_session_trace.py +48 -46

package/ocdiag/sensitive.py CHANGED Viewed

@@ -1,4 +1,23 @@
-"""Mask sensitive config values (keys, secrets, tokens)."""
+"""Mask sensitive config values + sanitize free-form text.
+Two layers:
+1. ``mask`` / ``safe_val`` / ``is_sensitive_key`` — used when we already know
+   we're looking at a config key/value pair (configuration flatten, env vars).
+   Masking is keyed off the *key name*.
+2. ``sanitize_text`` — used when scanning free-form text (shell history lines,
+   plugin error messages, systemd unit files, session message bodies). We don't
+   know the structure, so we run a pattern-based scrubber. Best-effort: the
+   patterns below cover the common token shapes (Anthropic/OpenAI sk-, GitHub
+   ghp_/gho_/ghs_/github_pat_, npm npm_, AWS AKIA, ``Bearer xxx``, URL
+   credentials, ``KEY=value`` with secret-ish key). It will miss bespoke or
+   obfuscated formats — callers who need stronger guarantees should mask the
+   whole field.
+The ``--unmask`` flag, declared in ``ocdiag.cli``, propagates to call sites
+that opt-in to honouring it (currently the session extract tool).
+"""
 from __future__ import annotations
@@ -39,3 +58,78 @@ def safe_val(key: str, val, max_len: int = 300) -> str:
         return mask(val) if val else '""'
     s = str(val)
     return s[:max_len] + "..." if len(s) > max_len else s
+# ── sanitize_text ──
+# Token shapes worth scrubbing by themselves (no key=value context).
+# Each pattern matches the *whole* secret; we replace with `<***>` keeping
+# the leading prefix so the reader can still tell what kind of secret it was.
+_TOKEN_PATTERNS = [
+    # Anthropic / OpenAI style (`sk-...` / `sk-ant-...`)
+    (re.compile(r"\b(sk-(?:ant-)?[A-Za-z0-9_\-]{16,})"), "sk-<***>"),
+    # GitHub PAT family
+    (re.compile(r"\b(gh[posu]_[A-Za-z0-9]{20,})"), "<gh-token>"),
+    (re.compile(r"\b(github_pat_[A-Za-z0-9_]{20,})"), "<github_pat>"),
+    # npm
+    (re.compile(r"\b(npm_[A-Za-z0-9]{30,})"), "<npm_token>"),
+    # AWS access key id
+    (re.compile(r"\b(AKIA[0-9A-Z]{16})"), "<AKIA-***>"),
+    # Authorization headers
+    (re.compile(r"(Bearer\s+)([A-Za-z0-9_\-\.=]{8,})", re.IGNORECASE), r"\1<***>"),
+    # URLs with embedded credentials: scheme://user:pass@host
+    (re.compile(r"([a-zA-Z][a-zA-Z0-9+\-.]*://)([^/\s:@]+):([^/\s@]+)@"), r"\1<user>:<***>@"),
+]
+# KEY=VALUE / KEY: VALUE in free text where the key looks secret-ish.
+# Use SENSITIVE_PATTERN over the key name; match value up to whitespace, quote,
+# or end-of-line.  Three forms:
+#   KEY=value         (env var, dotenv)
+#   KEY="value"       (shell quoted)
+#   KEY: value        (yaml-ish)
+_KV_BARE = re.compile(
+    r"\b([A-Za-z_][A-Za-z0-9_\-\.]*"
+    r"(?:KEY|SECRET|TOKEN|PASSWORD|CREDENTIAL|AUTH|PRIVATE|SIGNING)[A-Za-z0-9_\-\.]*)"
+    r"\s*=\s*([^\s\"';#]+)",
+    re.IGNORECASE,
+)
+_KV_QUOTED = re.compile(
+    r"\b([A-Za-z_][A-Za-z0-9_\-\.]*"
+    r"(?:KEY|SECRET|TOKEN|PASSWORD|CREDENTIAL|AUTH|PRIVATE|SIGNING)[A-Za-z0-9_\-\.]*)"
+    r"\s*=\s*([\"'])([^\"']+)\2",
+    re.IGNORECASE,
+)
+_KV_COLON = re.compile(
+    r"\b([A-Za-z_][A-Za-z0-9_\-\.]*"
+    r"(?:KEY|SECRET|TOKEN|PASSWORD|CREDENTIAL|AUTH|PRIVATE|SIGNING)[A-Za-z0-9_\-\.]*)"
+    r"\s*:\s*([^\s\"';#,}\]]+)",
+    re.IGNORECASE,
+)
+def sanitize_text(text: str, context: str = "generic") -> str:
+    """Scrub well-known secret shapes from free-form text.
+    Best-effort, not a guarantee. Returns the text unchanged if it's not a str.
+    """
+    if not isinstance(text, str) or not text:
+        return text
+    # Order: longer/more-specific (KV with quotes) first, then bare KV, then
+    # bare token shapes. KV passes also catch things like `API_KEY=abc` where
+    # the value would not match a token pattern.
+    def _kv_quoted_sub(m):
+        return f"{m.group(1)}={m.group(2)}<***>{m.group(2)}"
+    def _kv_bare_sub(m):
+        return f"{m.group(1)}=<***>"
+    def _kv_colon_sub(m):
+        return f"{m.group(1)}: <***>"
+    text = _KV_QUOTED.sub(_kv_quoted_sub, text)
+    text = _KV_BARE.sub(_kv_bare_sub, text)
+    text = _KV_COLON.sub(_kv_colon_sub, text)
+    for pat, repl in _TOKEN_PATTERNS:
+        text = pat.sub(repl, text)
+    return text

package/ocdiag/sessions.py ADDED Viewed

@@ -0,0 +1,161 @@
+"""Shared session-file lookup utilities for trace/extract.
+A "session" is identified by a UUID. On disk it can have multiple files:
+  <uuid>.jsonl              — active
+  <uuid>.jsonl.lock         — write lock (transient, filtered by default)
+  <uuid>.jsonl.deleted.<ts> — soft-deleted
+  <uuid>.jsonl.reset.<ts>   — pre-reset snapshot
+  <uuid>.jsonl.bak-<pid>    — backup snapshot
+Sibling artifacts (NOT session content):
+  <uuid>.trajectory.jsonl, <uuid>.acp-stream.jsonl, <uuid>.json
+Callers may pass a full UUID or a prefix of at least MIN_PREFIX_LEN chars.
+"""
+from __future__ import annotations
+import glob
+import os
+import re
+from typing import Dict, List, Optional, Tuple
+from . import paths
+MIN_PREFIX_LEN = 8
+_TRANSIENT_SUFFIXES = (".lock", ".tmp", ".swp")
+_UUID_CHAR = re.compile(r"^[0-9a-fA-F-]+$")
+def classify_state(filename: str) -> str:
+    """Tag a session-file basename with its lifecycle state."""
+    if ".jsonl.deleted." in filename:
+        return "deleted"
+    if ".jsonl.reset." in filename:
+        return "reset"
+    if ".jsonl.bak-" in filename:
+        return "backup"
+    if filename.endswith(".jsonl.lock"):
+        return "lock"
+    if filename.endswith(".jsonl"):
+        return "active"
+    return "unknown"
+def _session_uuid_of(filename: str) -> Optional[str]:
+    """Return the session UUID the file belongs to, or None for siblings."""
+    if ".trajectory" in filename or ".acp-stream" in filename:
+        return None
+    if filename.endswith(".json") and not filename.endswith(".jsonl"):
+        return None
+    idx = filename.find(".jsonl")
+    if idx <= 0:
+        return None
+    return filename[:idx]
+def _is_transient(filename: str) -> bool:
+    if ".jsonl.bak-" in filename:
+        return False
+    return any(filename.endswith(s) for s in _TRANSIENT_SUFFIXES) or filename.endswith(".bak")
+def is_valid_query(session_id: str) -> Tuple[bool, str]:
+    """Reject queries shorter than MIN_PREFIX_LEN or with non-UUID chars."""
+    if not session_id:
+        return False, "session id 不能为空"
+    if len(session_id) < MIN_PREFIX_LEN:
+        return False, (
+            f"session id 太短（'{session_id}' 只有 {len(session_id)} 字符），"
+            f"至少需要 {MIN_PREFIX_LEN} 位 UUID 前缀"
+        )
+    if not _UUID_CHAR.match(session_id):
+        return False, f"session id 含非法字符（仅允许十六进制和连字符）: '{session_id}'"
+    return True, ""
+def resolve(
+    session_id: str,
+    base_dir: str = paths.SESSIONS_BASE,
+    agent: Optional[str] = None,
+    include_transient: bool = False,
+) -> Tuple[List[Tuple[str, str]], List[str]]:
+    """Resolve a UUID or prefix to its on-disk session files.
+    Returns ``(files, candidates)``:
+      - ``files``: ``[(abs_path, state), ...]`` for the resolved session,
+        sorted by lifecycle priority (active first). Empty when ambiguous or
+        when there are 0 matches.
+      - ``candidates``: when multiple distinct session UUIDs share the
+        prefix, this lists their full UUIDs sorted; otherwise empty.
+    """
+    if agent:
+        agent_dirs = [os.path.join(base_dir, agent)]
+    else:
+        agent_dirs = sorted(glob.glob(os.path.join(base_dir, "*")))
+    by_uuid: Dict[str, List[Tuple[str, str]]] = {}
+    for ad in agent_dirs:
+        sd = os.path.join(ad, "sessions")
+        if not os.path.isdir(sd):
+            continue
+        try:
+            entries = os.listdir(sd)
+        except OSError:
+            continue
+        for entry in entries:
+            if not entry.startswith(session_id):
+                continue
+            uuid = _session_uuid_of(entry)
+            if uuid is None:
+                continue
+            if not include_transient and _is_transient(entry):
+                continue
+            full = os.path.join(sd, entry)
+            if not os.path.isfile(full):
+                continue
+            state = classify_state(entry)
+            by_uuid.setdefault(uuid, []).append((full, state))
+    if not by_uuid:
+        return [], []
+    if len(by_uuid) > 1:
+        return [], sorted(by_uuid.keys())
+    files = next(iter(by_uuid.values()))
+    prio = {"active": 0, "lock": 1, "deleted": 2, "reset": 3, "backup": 4, "unknown": 9}
+    files.sort(key=lambda x: (prio.get(x[1], 9), x[0]))
+    return files, []
+def recent_session_ids(
+    base_dir: str = paths.SESSIONS_BASE,
+    limit: int = 5,
+) -> List[str]:
+    """Return the most-recently-modified active session UUIDs."""
+    found: List[Tuple[float, str]] = []
+    for ad in glob.glob(os.path.join(base_dir, "*")):
+        sd = os.path.join(ad, "sessions")
+        if not os.path.isdir(sd):
+            continue
+        try:
+            entries = os.listdir(sd)
+        except OSError:
+            continue
+        for entry in entries:
+            if not entry.endswith(".jsonl"):
+                continue
+            uuid = _session_uuid_of(entry)
+            if uuid is None or entry != f"{uuid}.jsonl":
+                continue
+            path = os.path.join(sd, entry)
+            try:
+                mtime = os.path.getmtime(path)
+            except OSError:
+                continue
+            found.append((mtime, uuid))
+    found.sort(reverse=True)
+    return [sid for _, sid in found[:limit]]

package/ocdiag/timeutil.py CHANGED Viewed

@@ -37,17 +37,6 @@ def fmt_duration(sec) -> str:
     return f"{s/3600:.1f}h"
-def fmt_duration_ms(ms) -> str:
-    if ms is None:
-        return "?"
-    s = float(ms) / 1000.0
-    if s < 60:
-        return f"{s:.1f}s"
-    if s < 3600:
-        return f"{s/60:.1f}min"
-    return f"{s/3600:.1f}h"
 def fmt_age(ms_delta) -> str:
     s = abs(float(ms_delta)) / 1000
     if s < 60:

package/ocdiag/tokens.py CHANGED Viewed

@@ -16,10 +16,6 @@ def fmt_tokens(n) -> str:
     return str(n)
-def fmt_k(n) -> str:
-    return fmt_tokens(n)
 def percentile(sorted_list: List[float], p: float) -> Optional[float]:
     if not sorted_list:
         return None

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "openclaw-diag-cli",
-  "version": "0.1.3",
-  "description": "OpenClaw read-only diagnostic CLI. Zero-dependency Python scripts wrapped in Node for npx-friendly install.",
+  "version": "0.2.2",
+  "description": "OpenClaw observer-only diagnostic CLI. Zero-dependency Python scripts wrapped in Node for npx-friendly install.",
   "keywords": [
     "openclaw",
     "diagnostic",

package/tools/oc_session_extract.py CHANGED Viewed

@@ -4,16 +4,17 @@
 from __future__ import annotations
 import argparse
-import glob
 import json
 import os
 import sys
+from datetime import datetime, timezone
 from pathlib import Path
-from typing import Iterator, List, Optional, TextIO, Tuple
+from typing import Any, Dict, List, Optional, TextIO, Tuple
 sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
-from ocdiag import paths
+from ocdiag import paths, sessions
+from ocdiag.sensitive import sanitize_text
 DEFAULT_BASE_DIR = paths.SESSIONS_BASE
@@ -28,38 +29,6 @@ def human_size(n: int) -> str:
     return f"{n:.1f} PB"
-def classify_state(filename: str) -> str:
-    if filename.endswith(".jsonl"):
-        return "active"
-    if ".jsonl.deleted." in filename:
-        return "deleted"
-    if ".jsonl.reset." in filename:
-        return "reset"
-    if ".jsonl.bak-" in filename:
-        return "backup"
-    return "unknown"
-def find_session_files(session_id, base_dir=DEFAULT_BASE_DIR, agent=None):
-    if agent:
-        agent_dirs = [os.path.join(base_dir, agent)]
-    else:
-        agent_dirs = sorted(glob.glob(os.path.join(base_dir, "*")))
-    found = []
-    for agent_dir in agent_dirs:
-        sessions_dir = os.path.join(agent_dir, "sessions")
-        if not os.path.isdir(sessions_dir):
-            continue
-        pattern = os.path.join(sessions_dir, f"{session_id}.jsonl*")
-        for path in sorted(glob.glob(pattern)):
-            name = os.path.basename(path)
-            if ".trajectory" in name:
-                continue
-            state = classify_state(name)
-            found.append((path, state))
-    return found
 def stream_records(path):
     with open(path, "r", encoding="utf-8", errors="replace") as f:
         for i, line in enumerate(f, start=1):
@@ -85,23 +54,54 @@ def write_header(out, path, state):
     out.write(SEPARATOR + "\n\n")
-def extract_file(path, state, out, pretty=True, type_filter=None):
+def _sanitize_record(obj):
+    """Walk a session record and scrub free-form text content fields.
+    Sessions store user/assistant messages under ``message.content``. We don't
+    rewrite tool args or metadata: those keep structure that matters for
+    diagnosis. We only scrub free-form prose where secrets typically live
+    (user-pasted tokens, error tracebacks).
+    """
+    if not isinstance(obj, dict):
+        return obj
+    msg = obj.get("message")
+    if isinstance(msg, dict):
+        content = msg.get("content")
+        if isinstance(content, str):
+            msg["content"] = sanitize_text(content)
+        elif isinstance(content, list):
+            for part in content:
+                if isinstance(part, dict):
+                    for k in ("text", "content"):
+                        v = part.get(k)
+                        if isinstance(v, str):
+                            part[k] = sanitize_text(v)
+        for k in ("text", "summary"):
+            v = msg.get(k)
+            if isinstance(v, str):
+                msg[k] = sanitize_text(v)
+    return obj
+def extract_file(path, state, out, pretty=True, type_filter=None, sanitize=True):
     write_header(out, path, state)
     written = 0
     for line_no, obj, raw, err in stream_records(path):
         if err is not None:
             out.write(f"--- Record {line_no} [PARSE ERROR: {err}] ---\n")
-            out.write(raw + "\n\n")
+            out.write((sanitize_text(raw) if sanitize else raw) + "\n\n")
             written += 1
             continue
         rtype = obj.get("type", "?") if isinstance(obj, dict) else "?"
         if type_filter is not None and rtype not in type_filter:
             continue
         out.write(f"--- Record {line_no} [type: {rtype}] ---\n")
+        if sanitize:
+            obj = _sanitize_record(obj)
         if pretty:
             out.write(json.dumps(obj, indent=2, ensure_ascii=False))
         else:
-            out.write(raw)
+            out.write(json.dumps(obj, ensure_ascii=False) if sanitize else raw)
         out.write("\n\n")
         written += 1
     return written
@@ -109,7 +109,23 @@ def extract_file(path, state, out, pretty=True, type_filter=None):
 def summarize_file(path, state, out):
     write_header(out, path, state)
-    counts: dict = {}
+    info = _collect_summary(path, sanitize=False)
+    out.write(f"Total records: {info['total_records']}\n")
+    if info["parse_errors"]:
+        out.write(f"Parse errors: {info['parse_errors']}\n")
+    out.write("By type:\n")
+    by_type = info["by_type"]
+    for k in sorted(by_type, key=lambda k: -by_type[k]):
+        out.write(f"  {k}: {by_type[k]}\n")
+    tr = info["time_range"]
+    if tr["start"] or tr["end"]:
+        out.write(f"Time range: {tr['start'] or '?'}  →  {tr['end'] or '?'}\n")
+    out.write("\n")
+def _collect_summary(path: str, sanitize: bool = True) -> Dict[str, Any]:
+    """Walk one file and produce a summary block (used by text + JSON mode)."""
+    by_type: Dict[str, int] = {}
     total = 0
     earliest: Optional[str] = None
     latest: Optional[str] = None
@@ -120,25 +136,40 @@ def summarize_file(path, state, out):
             parse_errors += 1
             continue
         if not isinstance(obj, dict):
-            counts["<non-object>"] = counts.get("<non-object>", 0) + 1
+            by_type["<non-object>"] = by_type.get("<non-object>", 0) + 1
             continue
         rtype = obj.get("type", "<no-type>")
-        counts[rtype] = counts.get(rtype, 0) + 1
+        by_type[rtype] = by_type.get(rtype, 0) + 1
         ts = obj.get("timestamp")
         if isinstance(ts, str):
             if earliest is None or ts < earliest:
                 earliest = ts
             if latest is None or ts > latest:
                 latest = ts
-    out.write(f"Total records: {total}\n")
-    if parse_errors:
-        out.write(f"Parse errors: {parse_errors}\n")
-    out.write("By type:\n")
-    for k in sorted(counts, key=lambda k: -counts[k]):
-        out.write(f"  {k}: {counts[k]}\n")
-    if earliest or latest:
-        out.write(f"Time range: {earliest or '?'}  →  {latest or '?'}\n")
-    out.write("\n")
+    return {
+        "total_records": total,
+        "parse_errors": parse_errors,
+        "by_type": by_type,
+        "time_range": {"start": earliest, "end": latest},
+    }
+def _collect_records(path: str, type_filter, sanitize: bool) -> List[Dict]:
+    out: List[Dict] = []
+    for line_no, obj, raw, err in stream_records(path):
+        if err is not None:
+            out.append({"line": line_no, "parse_error": err, "raw": raw})
+            continue
+        if not isinstance(obj, dict):
+            out.append({"line": line_no, "value": obj})
+            continue
+        rtype = obj.get("type", "?")
+        if type_filter is not None and rtype not in type_filter:
+            continue
+        if sanitize:
+            obj = _sanitize_record(obj)
+        out.append(obj)
+    return out
 def list_files(files, out):
@@ -176,32 +207,118 @@ def select_files(files, extract_all, _out):
     return []
+def _resolve_or_die(session_id: str, base_dir: str, agent: Optional[str],
+                    include_transient: bool) -> List[Tuple[str, str]]:
+    ok, msg = sessions.is_valid_query(session_id)
+    if not ok:
+        sys.stderr.write(f"Error: {msg}\n")
+        sys.exit(2)
+    files, candidates = sessions.resolve(
+        session_id, base_dir=base_dir, agent=agent,
+        include_transient=include_transient,
+    )
+    if candidates:
+        sys.stderr.write(
+            f"Error: 前缀 '{session_id}' 匹配多个 session（请补长前缀）：\n"
+        )
+        for sid in candidates:
+            sys.stderr.write(f"    {sid}\n")
+        sys.exit(1)
+    if not files:
+        sys.stderr.write(
+            f"Error: 找不到 session '{session_id}'（在 {base_dir} 下）"
+            + (f" agent={agent}" if agent else "")
+            + "\n"
+        )
+        suggestions = sessions.recent_session_ids(base_dir, limit=5)
+        if suggestions:
+            sys.stderr.write("  最近的 5 个 session：\n")
+            for sid in suggestions:
+                sys.stderr.write(f"    {sid}\n")
+            sys.stderr.write("  提示：完整 UUID 或前缀（至少 8 位）都可。\n")
+        sys.exit(1)
+    return files
+def _emit_json(session_id: str, selected: List[Tuple[str, str]],
+               out_fp: TextIO, summary_only: bool, type_filter,
+               sanitize: bool) -> None:
+    files_payload: List[Dict[str, Any]] = []
+    aggregate_total = 0
+    aggregate_by_type: Dict[str, int] = {}
+    aggregate_start: Optional[str] = None
+    aggregate_end: Optional[str] = None
+    for path, state in selected:
+        try:
+            size = os.path.getsize(path)
+        except OSError:
+            size = 0
+        entry: Dict[str, Any] = {
+            "path": path,
+            "state": state,
+            "size_bytes": size,
+        }
+        if summary_only:
+            s = _collect_summary(path, sanitize=sanitize)
+            entry["summary"] = s
+            aggregate_total += s["total_records"]
+            for k, v in s["by_type"].items():
+                aggregate_by_type[k] = aggregate_by_type.get(k, 0) + v
+            tr = s["time_range"]
+            if tr["start"] and (aggregate_start is None or tr["start"] < aggregate_start):
+                aggregate_start = tr["start"]
+            if tr["end"] and (aggregate_end is None or tr["end"] > aggregate_end):
+                aggregate_end = tr["end"]
+        else:
+            entry["records"] = _collect_records(path, type_filter, sanitize=sanitize)
+        files_payload.append(entry)
+    payload: Dict[str, Any] = {
+        "session_id": session_id,
+        "files": files_payload,
+        "generated_at": datetime.now(tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
+        "sanitized": sanitize,
+    }
+    if summary_only:
+        payload["summary"] = {
+            "total_records": aggregate_total,
+            "by_type": aggregate_by_type,
+            "time_range": {"start": aggregate_start, "end": aggregate_end},
+        }
+    out_fp.write(json.dumps(payload, ensure_ascii=False, indent=2))
+    out_fp.write("\n")
 def main() -> int:
     p = argparse.ArgumentParser(
+        prog=os.environ.get("OPENCLAW_DIAG_PROG") or None,
         description="Extract OpenClaw session JSONL files into human-readable format.",
         formatter_class=argparse.ArgumentDefaultsHelpFormatter,
     )
-    p.add_argument("session_id", help="Session UUID to extract")
+    p.add_argument("session_id", help="Session UUID (full or 8+ char prefix)")
     p.add_argument("-o", "--output", help="Write output to FILE instead of stdout")
     p.add_argument("-a", "--all", action="store_true",
-                   help="Extract all versions found (active + deleted + reset + backup)")
-    p.add_argument("--list", action="store_true", help="List found files; do not extract")
+                   help="Extract all versions (active + reset + deleted + backup + lock)")
+    p.add_argument("--list", action="store_true",
+                   help="List all matching files (incl. .lock); do not extract")
     p.add_argument("--agent", help="Limit search to specific agent directory")
     p.add_argument("--base-dir", default=DEFAULT_BASE_DIR, help="Override base directory")
     p.add_argument("--no-pretty", action="store_true", help="Output raw JSON lines")
     p.add_argument("--types", help="Filter by record type (comma-separated, e.g. 'message,toolCall')")
     p.add_argument("--summary", action="store_true",
                    help="Show record-count summary instead of full extraction")
+    p.add_argument("--json", action="store_true",
+                   help="Emit structured JSON (compatible with state collectors' --json)")
+    p.add_argument("--unmask", action="store_true",
+                   help="Disable default sanitization of secret-shaped substrings "
+                        "in message content (off = scrubbed)")
     args = p.parse_args()
-    files = find_session_files(args.session_id, args.base_dir, args.agent)
-    if not files:
-        sys.stderr.write(
-            f"Error: no files found for session ID '{args.session_id}' under {args.base_dir}"
-            + (f" (agent={args.agent})" if args.agent else "")
-            + "\n"
-        )
-        return 1
+    # --list and --all see lock files; default mode hides them so non-interactive
+    # callers (cron, jq pipes) don't trip on a transient .jsonl.lock sibling.
+    include_transient = bool(args.all or args.list)
+    files = _resolve_or_die(args.session_id, args.base_dir, args.agent,
+                            include_transient=include_transient)
     if args.list:
         list_files(files, sys.stdout)
@@ -226,12 +343,18 @@ def main() -> int:
         out_fp = sys.stdout
     try:
-        for path, state in selected:
-            if args.summary:
-                summarize_file(path, state, out_fp)
-            else:
-                extract_file(path, state, out_fp, pretty=not args.no_pretty,
-                             type_filter=type_filter)
+        if args.json:
+            _emit_json(args.session_id, selected, out_fp,
+                       summary_only=args.summary,
+                       type_filter=type_filter,
+                       sanitize=not args.unmask)
+        else:
+            for path, state in selected:
+                if args.summary:
+                    summarize_file(path, state, out_fp)
+                else:
+                    extract_file(path, state, out_fp, pretty=not args.no_pretty,
+                                 type_filter=type_filter, sanitize=not args.unmask)
     except BrokenPipeError:
         try:
             sys.stdout.flush()