openclaw-diag-cli 0.1.3 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +84 -71
- package/bin/openclaw-diag.js +65 -176
- package/diag/01_sys_health.py +0 -2
- package/diag/02_environment.py +32 -6
- package/diag/03_configuration.py +4 -1
- package/diag/04_gateway.py +30 -8
- package/diag/05_recent_errors.py +24 -14
- package/diag/06_cron_jobs.py +4 -41
- package/diag/07_performance.py +114 -42
- package/diag/08_sessions.py +2 -54
- package/diag/09_plugin_diag.py +52 -25
- package/diag/10_shell_history.py +28 -10
- package/lib/bundle.py +6 -13
- package/ocdiag/__init__.py +1 -1
- package/ocdiag/cli.py +16 -1
- package/ocdiag/dispatcher.py +140 -53
- package/ocdiag/doctor.py +162 -0
- package/ocdiag/jsonlog.py +0 -5
- package/ocdiag/paths.py +0 -1
- package/ocdiag/recent_logs.py +0 -3
- package/ocdiag/sensitive.py +95 -1
- package/ocdiag/timeutil.py +0 -11
- package/ocdiag/tokens.py +0 -4
- package/package.json +2 -2
- package/tools/oc_session_extract.py +75 -7
- package/tools/oc_session_trace.py +31 -9
package/ocdiag/doctor.py
ADDED
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
"""``ocdiag doctor`` — environment health-check.
|
|
2
|
+
|
|
3
|
+
Sole authoritative implementation. Both Node (`bin/openclaw-diag.js doctor`)
|
|
4
|
+
and Python (`bin/ocdiag doctor` / `python3 -m ocdiag.doctor`) entry points
|
|
5
|
+
call this function. The Node entry is now a thin spawn wrapper.
|
|
6
|
+
|
|
7
|
+
Checks:
|
|
8
|
+
- Python version (>= 3.8)
|
|
9
|
+
- ocdiag package importable + version
|
|
10
|
+
- All registered diag scripts respond to ``--help``
|
|
11
|
+
- openclaw.json exists at expected path
|
|
12
|
+
|
|
13
|
+
Node version isn't visible from Python so we accept it as a passthrough
|
|
14
|
+
argument; if absent, doctor reports node check as ``skipped``.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
import argparse
|
|
20
|
+
import json
|
|
21
|
+
import os
|
|
22
|
+
import subprocess
|
|
23
|
+
import sys
|
|
24
|
+
from pathlib import Path
|
|
25
|
+
from typing import Optional
|
|
26
|
+
|
|
27
|
+
REPO_ROOT = Path(__file__).resolve().parent.parent
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _node_status(node_version: Optional[str]) -> dict:
|
|
31
|
+
if not node_version:
|
|
32
|
+
return {"version": None, "ok": True, "skipped": True,
|
|
33
|
+
"reason": "Node check is performed by the Node entry; "
|
|
34
|
+
"ocdiag is fine without Node when invoked from Python"}
|
|
35
|
+
# Normalize: accept "v22.22.2" or "22.22.2"
|
|
36
|
+
normalized = node_version.lstrip("v")
|
|
37
|
+
try:
|
|
38
|
+
major = int(normalized.split(".", 1)[0])
|
|
39
|
+
except ValueError:
|
|
40
|
+
return {"version": normalized, "ok": False, "reason": "unparseable"}
|
|
41
|
+
return {"version": normalized, "ok": major >= 18,
|
|
42
|
+
"required": ">=18"}
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _python_status() -> dict:
|
|
46
|
+
v = sys.version_info
|
|
47
|
+
return {
|
|
48
|
+
"version": f"{v.major}.{v.minor}.{v.micro}",
|
|
49
|
+
"ok": v >= (3, 8),
|
|
50
|
+
"required": ">=3.8",
|
|
51
|
+
"executable": sys.executable,
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _ocdiag_status() -> dict:
|
|
56
|
+
try:
|
|
57
|
+
import ocdiag # type: ignore
|
|
58
|
+
return {"ok": True, "version": getattr(ocdiag, "__version__", "?")}
|
|
59
|
+
except ImportError as e:
|
|
60
|
+
return {"ok": False, "error": str(e)[:200]}
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _diag_scripts_status() -> dict:
|
|
64
|
+
from ocdiag.dispatcher import STATE_COLLECTORS, OBJECT_INSPECTORS
|
|
65
|
+
failed = []
|
|
66
|
+
all_scripts = []
|
|
67
|
+
for mid, _label, rel in (*STATE_COLLECTORS, *OBJECT_INSPECTORS):
|
|
68
|
+
all_scripts.append((mid, REPO_ROOT / rel))
|
|
69
|
+
for mid, path in all_scripts:
|
|
70
|
+
if not path.is_file():
|
|
71
|
+
failed.append({"script": mid, "reason": "missing", "path": str(path)})
|
|
72
|
+
continue
|
|
73
|
+
r = subprocess.run(
|
|
74
|
+
[sys.executable, str(path), "--help"],
|
|
75
|
+
capture_output=True, text=True, timeout=10, check=False,
|
|
76
|
+
)
|
|
77
|
+
if r.returncode != 0:
|
|
78
|
+
failed.append({
|
|
79
|
+
"script": mid,
|
|
80
|
+
"rc": r.returncode,
|
|
81
|
+
"stderr": (r.stderr or "")[:200],
|
|
82
|
+
})
|
|
83
|
+
return {"ok": not failed, "total": len(all_scripts), "failed": failed}
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def _openclaw_config_status() -> dict:
|
|
87
|
+
home = os.path.expanduser("~")
|
|
88
|
+
cfg = os.environ.get("OPENCLAW_CONFIG") or os.path.join(
|
|
89
|
+
os.environ.get("OPENCLAW_HOME", os.path.join(home, ".openclaw")),
|
|
90
|
+
"openclaw.json",
|
|
91
|
+
)
|
|
92
|
+
return {"path": cfg, "exists": os.path.isfile(cfg)}
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def run(json_mode: bool = False, node_version: Optional[str] = None) -> int:
|
|
96
|
+
"""Execute the doctor check. Returns rc (0 if everything OK, 1 otherwise)."""
|
|
97
|
+
result = {
|
|
98
|
+
"node": _node_status(node_version),
|
|
99
|
+
"python": _python_status(),
|
|
100
|
+
"ocdiag": _ocdiag_status(),
|
|
101
|
+
"diag_scripts": _diag_scripts_status(),
|
|
102
|
+
"openclaw_config": _openclaw_config_status(),
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
ok = (
|
|
106
|
+
result["node"].get("ok", True)
|
|
107
|
+
and result["python"]["ok"]
|
|
108
|
+
and result["ocdiag"]["ok"]
|
|
109
|
+
and result["diag_scripts"]["ok"]
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
if json_mode:
|
|
113
|
+
print(json.dumps(result, ensure_ascii=False, indent=2))
|
|
114
|
+
else:
|
|
115
|
+
node = result["node"]
|
|
116
|
+
if node.get("skipped"):
|
|
117
|
+
print(f"ℹ Node check skipped (run via npx to verify Node version)")
|
|
118
|
+
elif node["ok"]:
|
|
119
|
+
print(f"✓ Node v{node['version']}")
|
|
120
|
+
else:
|
|
121
|
+
print(f"✗ Node v{node.get('version','?')} (need {node.get('required','?')})")
|
|
122
|
+
|
|
123
|
+
py = result["python"]
|
|
124
|
+
mark = "✓" if py["ok"] else "✗"
|
|
125
|
+
print(f"{mark} Python {py['version']} ({py['executable']})")
|
|
126
|
+
|
|
127
|
+
oc = result["ocdiag"]
|
|
128
|
+
if oc["ok"]:
|
|
129
|
+
print(f"✓ ocdiag package importable (version {oc['version']})")
|
|
130
|
+
else:
|
|
131
|
+
print(f"✗ ocdiag package not importable: {oc.get('error','?')}")
|
|
132
|
+
|
|
133
|
+
ds = result["diag_scripts"]
|
|
134
|
+
if ds["ok"]:
|
|
135
|
+
print(f"✓ All {ds['total']} diagnostics respond to --help")
|
|
136
|
+
else:
|
|
137
|
+
print(f"✗ {len(ds['failed'])}/{ds['total']} diagnostics failed --help:")
|
|
138
|
+
for f in ds["failed"]:
|
|
139
|
+
print(f" {f.get('script','?')} (rc={f.get('rc','?')})")
|
|
140
|
+
|
|
141
|
+
cfg = result["openclaw_config"]
|
|
142
|
+
if cfg["exists"]:
|
|
143
|
+
print(f"✓ OpenClaw config present ({cfg['path']})")
|
|
144
|
+
else:
|
|
145
|
+
print(f"ℹ OpenClaw config not found ({cfg['path']}) — diagnostics will run but report missing")
|
|
146
|
+
|
|
147
|
+
return 0 if ok else 1
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def main(argv=None) -> int:
|
|
151
|
+
p = argparse.ArgumentParser(prog="ocdiag-doctor",
|
|
152
|
+
description="Health-check the ocdiag install + environment")
|
|
153
|
+
p.add_argument("--json", action="store_true", help="Emit JSON output")
|
|
154
|
+
p.add_argument("--node-version", default=None,
|
|
155
|
+
help="Node version string (e.g. '20.12.1') passed in by the Node "
|
|
156
|
+
"shell. Omit when running from Python directly.")
|
|
157
|
+
args = p.parse_args(argv)
|
|
158
|
+
return run(json_mode=args.json, node_version=args.node_version)
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
if __name__ == "__main__":
|
|
162
|
+
sys.exit(main())
|
package/ocdiag/jsonlog.py
CHANGED
|
@@ -58,8 +58,3 @@ def parse_name(obj: Dict[str, Any]) -> Tuple[Optional[str], Optional[str]]:
|
|
|
58
58
|
return p.get("plugin"), p.get("subsystem")
|
|
59
59
|
|
|
60
60
|
|
|
61
|
-
def log_level(obj: Dict[str, Any]) -> str:
|
|
62
|
-
meta = obj.get("_meta") or {}
|
|
63
|
-
if isinstance(meta, dict):
|
|
64
|
-
return meta.get("logLevelName", "") or ""
|
|
65
|
-
return ""
|
package/ocdiag/paths.py
CHANGED
package/ocdiag/recent_logs.py
CHANGED
package/ocdiag/sensitive.py
CHANGED
|
@@ -1,4 +1,23 @@
|
|
|
1
|
-
"""Mask sensitive config values
|
|
1
|
+
"""Mask sensitive config values + sanitize free-form text.
|
|
2
|
+
|
|
3
|
+
Two layers:
|
|
4
|
+
|
|
5
|
+
1. ``mask`` / ``safe_val`` / ``is_sensitive_key`` — used when we already know
|
|
6
|
+
we're looking at a config key/value pair (configuration flatten, env vars).
|
|
7
|
+
Masking is keyed off the *key name*.
|
|
8
|
+
|
|
9
|
+
2. ``sanitize_text`` — used when scanning free-form text (shell history lines,
|
|
10
|
+
plugin error messages, systemd unit files, session message bodies). We don't
|
|
11
|
+
know the structure, so we run a pattern-based scrubber. Best-effort: the
|
|
12
|
+
patterns below cover the common token shapes (Anthropic/OpenAI sk-, GitHub
|
|
13
|
+
ghp_/gho_/ghs_/github_pat_, npm npm_, AWS AKIA, ``Bearer xxx``, URL
|
|
14
|
+
credentials, ``KEY=value`` with secret-ish key). It will miss bespoke or
|
|
15
|
+
obfuscated formats — callers who need stronger guarantees should mask the
|
|
16
|
+
whole field.
|
|
17
|
+
|
|
18
|
+
The ``--unmask`` flag, declared in ``ocdiag.cli``, propagates to call sites
|
|
19
|
+
that opt-in to honouring it (currently the session extract tool).
|
|
20
|
+
"""
|
|
2
21
|
|
|
3
22
|
from __future__ import annotations
|
|
4
23
|
|
|
@@ -39,3 +58,78 @@ def safe_val(key: str, val, max_len: int = 300) -> str:
|
|
|
39
58
|
return mask(val) if val else '""'
|
|
40
59
|
s = str(val)
|
|
41
60
|
return s[:max_len] + "..." if len(s) > max_len else s
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
# ── sanitize_text ──
|
|
64
|
+
|
|
65
|
+
# Token shapes worth scrubbing by themselves (no key=value context).
|
|
66
|
+
# Each pattern matches the *whole* secret; we replace with `<***>` keeping
|
|
67
|
+
# the leading prefix so the reader can still tell what kind of secret it was.
|
|
68
|
+
_TOKEN_PATTERNS = [
|
|
69
|
+
# Anthropic / OpenAI style (`sk-...` / `sk-ant-...`)
|
|
70
|
+
(re.compile(r"\b(sk-(?:ant-)?[A-Za-z0-9_\-]{16,})"), "sk-<***>"),
|
|
71
|
+
# GitHub PAT family
|
|
72
|
+
(re.compile(r"\b(gh[posu]_[A-Za-z0-9]{20,})"), "<gh-token>"),
|
|
73
|
+
(re.compile(r"\b(github_pat_[A-Za-z0-9_]{20,})"), "<github_pat>"),
|
|
74
|
+
# npm
|
|
75
|
+
(re.compile(r"\b(npm_[A-Za-z0-9]{30,})"), "<npm_token>"),
|
|
76
|
+
# AWS access key id
|
|
77
|
+
(re.compile(r"\b(AKIA[0-9A-Z]{16})"), "<AKIA-***>"),
|
|
78
|
+
# Authorization headers
|
|
79
|
+
(re.compile(r"(Bearer\s+)([A-Za-z0-9_\-\.=]{8,})", re.IGNORECASE), r"\1<***>"),
|
|
80
|
+
# URLs with embedded credentials: scheme://user:pass@host
|
|
81
|
+
(re.compile(r"([a-zA-Z][a-zA-Z0-9+\-.]*://)([^/\s:@]+):([^/\s@]+)@"), r"\1<user>:<***>@"),
|
|
82
|
+
]
|
|
83
|
+
|
|
84
|
+
# KEY=VALUE / KEY: VALUE in free text where the key looks secret-ish.
|
|
85
|
+
# Use SENSITIVE_PATTERN over the key name; match value up to whitespace, quote,
|
|
86
|
+
# or end-of-line. Three forms:
|
|
87
|
+
# KEY=value (env var, dotenv)
|
|
88
|
+
# KEY="value" (shell quoted)
|
|
89
|
+
# KEY: value (yaml-ish)
|
|
90
|
+
_KV_BARE = re.compile(
|
|
91
|
+
r"\b([A-Za-z_][A-Za-z0-9_\-\.]*"
|
|
92
|
+
r"(?:KEY|SECRET|TOKEN|PASSWORD|CREDENTIAL|AUTH|PRIVATE|SIGNING)[A-Za-z0-9_\-\.]*)"
|
|
93
|
+
r"\s*=\s*([^\s\"';#]+)",
|
|
94
|
+
re.IGNORECASE,
|
|
95
|
+
)
|
|
96
|
+
_KV_QUOTED = re.compile(
|
|
97
|
+
r"\b([A-Za-z_][A-Za-z0-9_\-\.]*"
|
|
98
|
+
r"(?:KEY|SECRET|TOKEN|PASSWORD|CREDENTIAL|AUTH|PRIVATE|SIGNING)[A-Za-z0-9_\-\.]*)"
|
|
99
|
+
r"\s*=\s*([\"'])([^\"']+)\2",
|
|
100
|
+
re.IGNORECASE,
|
|
101
|
+
)
|
|
102
|
+
_KV_COLON = re.compile(
|
|
103
|
+
r"\b([A-Za-z_][A-Za-z0-9_\-\.]*"
|
|
104
|
+
r"(?:KEY|SECRET|TOKEN|PASSWORD|CREDENTIAL|AUTH|PRIVATE|SIGNING)[A-Za-z0-9_\-\.]*)"
|
|
105
|
+
r"\s*:\s*([^\s\"';#,}\]]+)",
|
|
106
|
+
re.IGNORECASE,
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def sanitize_text(text: str, context: str = "generic") -> str:
|
|
111
|
+
"""Scrub well-known secret shapes from free-form text.
|
|
112
|
+
|
|
113
|
+
Best-effort, not a guarantee. Returns the text unchanged if it's not a str.
|
|
114
|
+
"""
|
|
115
|
+
if not isinstance(text, str) or not text:
|
|
116
|
+
return text
|
|
117
|
+
|
|
118
|
+
# Order: longer/more-specific (KV with quotes) first, then bare KV, then
|
|
119
|
+
# bare token shapes. KV passes also catch things like `API_KEY=abc` where
|
|
120
|
+
# the value would not match a token pattern.
|
|
121
|
+
def _kv_quoted_sub(m):
|
|
122
|
+
return f"{m.group(1)}={m.group(2)}<***>{m.group(2)}"
|
|
123
|
+
|
|
124
|
+
def _kv_bare_sub(m):
|
|
125
|
+
return f"{m.group(1)}=<***>"
|
|
126
|
+
|
|
127
|
+
def _kv_colon_sub(m):
|
|
128
|
+
return f"{m.group(1)}: <***>"
|
|
129
|
+
|
|
130
|
+
text = _KV_QUOTED.sub(_kv_quoted_sub, text)
|
|
131
|
+
text = _KV_BARE.sub(_kv_bare_sub, text)
|
|
132
|
+
text = _KV_COLON.sub(_kv_colon_sub, text)
|
|
133
|
+
for pat, repl in _TOKEN_PATTERNS:
|
|
134
|
+
text = pat.sub(repl, text)
|
|
135
|
+
return text
|
package/ocdiag/timeutil.py
CHANGED
|
@@ -37,17 +37,6 @@ def fmt_duration(sec) -> str:
|
|
|
37
37
|
return f"{s/3600:.1f}h"
|
|
38
38
|
|
|
39
39
|
|
|
40
|
-
def fmt_duration_ms(ms) -> str:
|
|
41
|
-
if ms is None:
|
|
42
|
-
return "?"
|
|
43
|
-
s = float(ms) / 1000.0
|
|
44
|
-
if s < 60:
|
|
45
|
-
return f"{s:.1f}s"
|
|
46
|
-
if s < 3600:
|
|
47
|
-
return f"{s/60:.1f}min"
|
|
48
|
-
return f"{s/3600:.1f}h"
|
|
49
|
-
|
|
50
|
-
|
|
51
40
|
def fmt_age(ms_delta) -> str:
|
|
52
41
|
s = abs(float(ms_delta)) / 1000
|
|
53
42
|
if s < 60:
|
package/ocdiag/tokens.py
CHANGED
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "openclaw-diag-cli",
|
|
3
|
-
"version": "0.1
|
|
4
|
-
"description": "OpenClaw
|
|
3
|
+
"version": "0.2.1",
|
|
4
|
+
"description": "OpenClaw observer-only diagnostic CLI. Zero-dependency Python scripts wrapped in Node for npx-friendly install.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"openclaw",
|
|
7
7
|
"diagnostic",
|
|
@@ -9,11 +9,12 @@ import json
|
|
|
9
9
|
import os
|
|
10
10
|
import sys
|
|
11
11
|
from pathlib import Path
|
|
12
|
-
from typing import
|
|
12
|
+
from typing import List, Optional, TextIO, Tuple
|
|
13
13
|
|
|
14
14
|
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
|
|
15
15
|
|
|
16
16
|
from ocdiag import paths
|
|
17
|
+
from ocdiag.sensitive import sanitize_text
|
|
17
18
|
|
|
18
19
|
|
|
19
20
|
DEFAULT_BASE_DIR = paths.SESSIONS_BASE
|
|
@@ -40,6 +41,29 @@ def classify_state(filename: str) -> str:
|
|
|
40
41
|
return "unknown"
|
|
41
42
|
|
|
42
43
|
|
|
44
|
+
def _recent_session_ids(base_dir, limit=5):
|
|
45
|
+
"""Return the most-recently-modified active session UUIDs."""
|
|
46
|
+
found: List[Tuple[float, str]] = []
|
|
47
|
+
for ad in glob.glob(os.path.join(base_dir, "*")):
|
|
48
|
+
sd = os.path.join(ad, "sessions")
|
|
49
|
+
if not os.path.isdir(sd):
|
|
50
|
+
continue
|
|
51
|
+
for entry in os.listdir(sd):
|
|
52
|
+
if not entry.endswith(".jsonl"):
|
|
53
|
+
continue
|
|
54
|
+
if ".trajectory" in entry or ".jsonl.reset." in entry:
|
|
55
|
+
continue
|
|
56
|
+
path = os.path.join(sd, entry)
|
|
57
|
+
try:
|
|
58
|
+
mtime = os.path.getmtime(path)
|
|
59
|
+
except OSError:
|
|
60
|
+
continue
|
|
61
|
+
sid = entry[:-len(".jsonl")]
|
|
62
|
+
found.append((mtime, sid))
|
|
63
|
+
found.sort(reverse=True)
|
|
64
|
+
return [sid for _, sid in found[:limit]]
|
|
65
|
+
|
|
66
|
+
|
|
43
67
|
def find_session_files(session_id, base_dir=DEFAULT_BASE_DIR, agent=None):
|
|
44
68
|
if agent:
|
|
45
69
|
agent_dirs = [os.path.join(base_dir, agent)]
|
|
@@ -85,23 +109,57 @@ def write_header(out, path, state):
|
|
|
85
109
|
out.write(SEPARATOR + "\n\n")
|
|
86
110
|
|
|
87
111
|
|
|
88
|
-
def
|
|
112
|
+
def _sanitize_record(obj):
|
|
113
|
+
"""Walk a session record and scrub free-form text content fields.
|
|
114
|
+
|
|
115
|
+
Sessions store user/assistant messages under ``message.content``. We don't
|
|
116
|
+
rewrite tool args or metadata: those keep structure that matters for
|
|
117
|
+
diagnosis. We only scrub free-form prose where secrets typically live
|
|
118
|
+
(user-pasted tokens, error tracebacks).
|
|
119
|
+
"""
|
|
120
|
+
if not isinstance(obj, dict):
|
|
121
|
+
return obj
|
|
122
|
+
msg = obj.get("message")
|
|
123
|
+
if isinstance(msg, dict):
|
|
124
|
+
content = msg.get("content")
|
|
125
|
+
if isinstance(content, str):
|
|
126
|
+
msg["content"] = sanitize_text(content)
|
|
127
|
+
elif isinstance(content, list):
|
|
128
|
+
for part in content:
|
|
129
|
+
if isinstance(part, dict):
|
|
130
|
+
for k in ("text", "content"):
|
|
131
|
+
v = part.get(k)
|
|
132
|
+
if isinstance(v, str):
|
|
133
|
+
part[k] = sanitize_text(v)
|
|
134
|
+
# Also scrub any top-level text-ish fields the gateway may have set.
|
|
135
|
+
for k in ("text", "summary"):
|
|
136
|
+
v = msg.get(k)
|
|
137
|
+
if isinstance(v, str):
|
|
138
|
+
msg[k] = sanitize_text(v)
|
|
139
|
+
return obj
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def extract_file(path, state, out, pretty=True, type_filter=None, sanitize=True):
|
|
89
143
|
write_header(out, path, state)
|
|
90
144
|
written = 0
|
|
91
145
|
for line_no, obj, raw, err in stream_records(path):
|
|
92
146
|
if err is not None:
|
|
93
147
|
out.write(f"--- Record {line_no} [PARSE ERROR: {err}] ---\n")
|
|
94
|
-
out.write(raw + "\n\n")
|
|
148
|
+
out.write((sanitize_text(raw) if sanitize else raw) + "\n\n")
|
|
95
149
|
written += 1
|
|
96
150
|
continue
|
|
97
151
|
rtype = obj.get("type", "?") if isinstance(obj, dict) else "?"
|
|
98
152
|
if type_filter is not None and rtype not in type_filter:
|
|
99
153
|
continue
|
|
100
154
|
out.write(f"--- Record {line_no} [type: {rtype}] ---\n")
|
|
155
|
+
if sanitize:
|
|
156
|
+
obj = _sanitize_record(obj)
|
|
101
157
|
if pretty:
|
|
102
158
|
out.write(json.dumps(obj, indent=2, ensure_ascii=False))
|
|
103
159
|
else:
|
|
104
|
-
|
|
160
|
+
# Non-pretty mode: emit the (possibly sanitized) JSON or fall back
|
|
161
|
+
# to the original raw line if we didn't touch it.
|
|
162
|
+
out.write(json.dumps(obj, ensure_ascii=False) if sanitize else raw)
|
|
105
163
|
out.write("\n\n")
|
|
106
164
|
written += 1
|
|
107
165
|
return written
|
|
@@ -178,6 +236,7 @@ def select_files(files, extract_all, _out):
|
|
|
178
236
|
|
|
179
237
|
def main() -> int:
|
|
180
238
|
p = argparse.ArgumentParser(
|
|
239
|
+
prog=os.environ.get("OPENCLAW_DIAG_PROG") or None,
|
|
181
240
|
description="Extract OpenClaw session JSONL files into human-readable format.",
|
|
182
241
|
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
|
183
242
|
)
|
|
@@ -192,15 +251,24 @@ def main() -> int:
|
|
|
192
251
|
p.add_argument("--types", help="Filter by record type (comma-separated, e.g. 'message,toolCall')")
|
|
193
252
|
p.add_argument("--summary", action="store_true",
|
|
194
253
|
help="Show record-count summary instead of full extraction")
|
|
254
|
+
p.add_argument("--unmask", action="store_true",
|
|
255
|
+
help="Disable default sanitization of secret-shaped substrings "
|
|
256
|
+
"in message content (off = scrubbed)")
|
|
195
257
|
args = p.parse_args()
|
|
196
258
|
|
|
197
259
|
files = find_session_files(args.session_id, args.base_dir, args.agent)
|
|
198
260
|
if not files:
|
|
199
261
|
sys.stderr.write(
|
|
200
|
-
f"Error:
|
|
201
|
-
+ (f"
|
|
262
|
+
f"Error: 找不到 session '{args.session_id}'(在 {args.base_dir} 下)"
|
|
263
|
+
+ (f" agent={args.agent}" if args.agent else "")
|
|
202
264
|
+ "\n"
|
|
203
265
|
)
|
|
266
|
+
suggestions = _recent_session_ids(args.base_dir, limit=5)
|
|
267
|
+
if suggestions:
|
|
268
|
+
sys.stderr.write(" 最近的 5 个 session:\n")
|
|
269
|
+
for sid in suggestions:
|
|
270
|
+
sys.stderr.write(f" {sid}\n")
|
|
271
|
+
sys.stderr.write(" 提示:完整 UUID 或前缀(至少 8 位)都可。\n")
|
|
204
272
|
return 1
|
|
205
273
|
|
|
206
274
|
if args.list:
|
|
@@ -231,7 +299,7 @@ def main() -> int:
|
|
|
231
299
|
summarize_file(path, state, out_fp)
|
|
232
300
|
else:
|
|
233
301
|
extract_file(path, state, out_fp, pretty=not args.no_pretty,
|
|
234
|
-
type_filter=type_filter)
|
|
302
|
+
type_filter=type_filter, sanitize=not args.unmask)
|
|
235
303
|
except BrokenPipeError:
|
|
236
304
|
try:
|
|
237
305
|
sys.stdout.flush()
|
|
@@ -52,14 +52,6 @@ def fmt_duration(ms: float) -> str:
|
|
|
52
52
|
return f"{m}m{s:.1f}s"
|
|
53
53
|
|
|
54
54
|
|
|
55
|
-
def human_size(n: int) -> str:
|
|
56
|
-
for unit in ("B", "KB", "MB", "GB"):
|
|
57
|
-
if n < 1024:
|
|
58
|
-
return f"{n:.1f} {unit}" if unit != "B" else f"{n} {unit}"
|
|
59
|
-
n /= 1024
|
|
60
|
-
return f"{n:.1f} TB"
|
|
61
|
-
|
|
62
|
-
|
|
63
55
|
def extract_text(content: Any) -> str:
|
|
64
56
|
if isinstance(content, str):
|
|
65
57
|
return content
|
|
@@ -112,6 +104,29 @@ def find_session_file(
|
|
|
112
104
|
return candidates[0][0] if candidates else None
|
|
113
105
|
|
|
114
106
|
|
|
107
|
+
def _recent_session_ids(base_dir: str, limit: int = 5) -> List[str]:
|
|
108
|
+
"""Return the most-recently-modified active session UUIDs (no .reset/.bak/.deleted)."""
|
|
109
|
+
found: List[Tuple[float, str]] = []
|
|
110
|
+
for ad in glob.glob(os.path.join(base_dir, "*")):
|
|
111
|
+
sd = os.path.join(ad, "sessions")
|
|
112
|
+
if not os.path.isdir(sd):
|
|
113
|
+
continue
|
|
114
|
+
for entry in os.listdir(sd):
|
|
115
|
+
if not entry.endswith(".jsonl"):
|
|
116
|
+
continue
|
|
117
|
+
if ".trajectory" in entry or ".jsonl.reset." in entry:
|
|
118
|
+
continue
|
|
119
|
+
path = os.path.join(sd, entry)
|
|
120
|
+
try:
|
|
121
|
+
mtime = os.path.getmtime(path)
|
|
122
|
+
except OSError:
|
|
123
|
+
continue
|
|
124
|
+
sid = entry[:-len(".jsonl")]
|
|
125
|
+
found.append((mtime, sid))
|
|
126
|
+
found.sort(reverse=True)
|
|
127
|
+
return [sid for _, sid in found[:limit]]
|
|
128
|
+
|
|
129
|
+
|
|
115
130
|
def find_trajectory_file(session_file: str) -> Optional[str]:
|
|
116
131
|
d = os.path.dirname(session_file)
|
|
117
132
|
base = os.path.basename(session_file).split(".jsonl")[0]
|
|
@@ -634,6 +649,7 @@ def format_json(session_id, session_file, user_msg_index, user_msg_id, analysis,
|
|
|
634
649
|
|
|
635
650
|
def main():
|
|
636
651
|
parser = argparse.ArgumentParser(
|
|
652
|
+
prog=os.environ.get("OPENCLAW_DIAG_PROG") or None,
|
|
637
653
|
description="Trace the processing timeline of a user message in an OpenClaw session.",
|
|
638
654
|
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
|
639
655
|
)
|
|
@@ -652,8 +668,14 @@ def main():
|
|
|
652
668
|
|
|
653
669
|
session_file = find_session_file(args.session_id, args.base_dir, args.agent)
|
|
654
670
|
if not session_file:
|
|
655
|
-
print(f"Error:
|
|
671
|
+
print(f"Error: 找不到 session '{args.session_id}'(在 {args.base_dir} 下)",
|
|
656
672
|
file=sys.stderr)
|
|
673
|
+
suggestions = _recent_session_ids(args.base_dir, limit=5)
|
|
674
|
+
if suggestions:
|
|
675
|
+
print(f" 最近的 5 个 session:", file=sys.stderr)
|
|
676
|
+
for sid in suggestions:
|
|
677
|
+
print(f" {sid}", file=sys.stderr)
|
|
678
|
+
print(f" 提示:UUID 完整 36 位,前缀也可(至少 8 位)。", file=sys.stderr)
|
|
657
679
|
sys.exit(1)
|
|
658
680
|
|
|
659
681
|
records = load_records(session_file)
|