openclaw-diag-cli 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +11 -12
- package/bin/ocdiag +0 -1
- package/lib/__pycache__/bundle.cpython-310.pyc +0 -0
- package/ocdiag/__init__.py +1 -1
- package/ocdiag/__pycache__/__init__.cpython-310.pyc +0 -0
- package/ocdiag/__pycache__/cli.cpython-310.pyc +0 -0
- package/ocdiag/__pycache__/dispatcher.cpython-310.pyc +0 -0
- package/ocdiag/__pycache__/doctor.cpython-310.pyc +0 -0
- package/ocdiag/__pycache__/jsonlog.cpython-310.pyc +0 -0
- package/ocdiag/__pycache__/output.cpython-310.pyc +0 -0
- package/ocdiag/__pycache__/paths.cpython-310.pyc +0 -0
- package/ocdiag/__pycache__/recent_logs.cpython-310.pyc +0 -0
- package/ocdiag/__pycache__/sensitive.cpython-310.pyc +0 -0
- package/ocdiag/__pycache__/sessions.cpython-310.pyc +0 -0
- package/ocdiag/__pycache__/timeutil.cpython-310.pyc +0 -0
- package/ocdiag/__pycache__/tokens.cpython-310.pyc +0 -0
- package/ocdiag/paths.py +0 -16
- package/ocdiag/sessions.py +161 -0
- package/package.json +1 -1
- package/tools/oc_session_extract.py +151 -96
- package/tools/oc_session_trace.py +41 -61
package/README.md
CHANGED
|
@@ -6,7 +6,7 @@ OpenClaw 出问题时,**先跑这条命令再开 ticket**:
|
|
|
6
6
|
npx openclaw-diag-cli all
|
|
7
7
|
```
|
|
8
8
|
|
|
9
|
-
零安装、零依赖、observer-only —
|
|
9
|
+
零安装、零依赖、observer-only — 不改 OpenClaw 的配置 / session / cron / 服务状态;只读探测,可写诊断输出(落到工具自己的目录)。
|
|
10
10
|
|
|
11
11
|
## 这是什么
|
|
12
12
|
|
|
@@ -60,17 +60,16 @@ openclaw-diag trace <session-uuid>
|
|
|
60
60
|
```
|
|
61
61
|
── 模块 4:Gateway 状态 ──
|
|
62
62
|
|
|
63
|
-
•
|
|
64
|
-
|
|
65
|
-
•
|
|
66
|
-
|
|
67
|
-
•
|
|
68
|
-
|
|
69
|
-
•
|
|
70
|
-
最近 1h 内 134 次连接,平均存活 47s,无异常关闭
|
|
63
|
+
• Systemd: Active: active (running) since Sun 2026-05-17 20:45:02 CST; 11h ago
|
|
64
|
+
• Main PID: 142687 (node)
|
|
65
|
+
• 端口 18789 监听: 是 | HTTP 健康检查: 200
|
|
66
|
+
• 24h 启停事件: 0 次启动 — 近 24h 无重启/停止记录
|
|
67
|
+
• 模型 API [https://bedrock-runtime.us-east-1.amazonaws.com]: HTTP 200
|
|
68
|
+
• Channel WS: 最近 1h 5 次连接,平均存活 32s
|
|
69
|
+
• Gateway 错误码: 0 条
|
|
71
70
|
```
|
|
72
71
|
|
|
73
|
-
加 `--json`
|
|
72
|
+
加 `--json` 后输出结构化(覆盖文本里出现的核心字段,便于 jq / 监控管道)。
|
|
74
73
|
|
|
75
74
|
## 诊断列表
|
|
76
75
|
|
|
@@ -93,12 +92,12 @@ openclaw-diag list # 看完整列表
|
|
|
93
92
|
| `plugin_diag` | 插件状态一致性、ERROR/WARN、Hook 异常、Channel、外部依赖 DNS |
|
|
94
93
|
| `shell_history` | 高危命令、openclaw 命令、最近操作 |
|
|
95
94
|
|
|
96
|
-
**对象类(需要 session uuid
|
|
95
|
+
**对象类(需要 session uuid 或 ≥ 8 位前缀;都支持 `--json`)**
|
|
97
96
|
|
|
98
97
|
| 诊断 | 看什么 |
|
|
99
98
|
|---|---|
|
|
100
99
|
| `trace <uuid>` | 一条用户消息从进入到响应的完整时间轴 |
|
|
101
|
-
| `extract <uuid>` | session.jsonl 导出为可读格式(active / reset / deleted / backup
|
|
100
|
+
| `extract <uuid>` | session.jsonl 导出为可读格式(active / reset / deleted / backup 全状态;`--summary` 仅汇总) |
|
|
102
101
|
|
|
103
102
|
**其它命令**
|
|
104
103
|
|
package/bin/ocdiag
CHANGED
|
Binary file
|
package/ocdiag/__init__.py
CHANGED
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/ocdiag/paths.py
CHANGED
|
@@ -29,19 +29,3 @@ SERVICE_ENV_FILE = _env_path(
|
|
|
29
29
|
"OPENCLAW_SERVICE_ENV_FILE",
|
|
30
30
|
os.path.join(HOME, ".config", "systemd", "user", "openclaw-gateway.service.d", "env.conf"),
|
|
31
31
|
)
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
def home() -> str:
|
|
35
|
-
return HOME
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
def config_path() -> str:
|
|
39
|
-
return CONFIG
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
def log_dir() -> str:
|
|
43
|
-
return LOG_DIR
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
def sessions_base() -> str:
|
|
47
|
-
return SESSIONS_BASE
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
"""Shared session-file lookup utilities for trace/extract.
|
|
2
|
+
|
|
3
|
+
A "session" is identified by a UUID. On disk it can have multiple files:
|
|
4
|
+
<uuid>.jsonl — active
|
|
5
|
+
<uuid>.jsonl.lock — write lock (transient, filtered by default)
|
|
6
|
+
<uuid>.jsonl.deleted.<ts> — soft-deleted
|
|
7
|
+
<uuid>.jsonl.reset.<ts> — pre-reset snapshot
|
|
8
|
+
<uuid>.jsonl.bak-<pid> — backup snapshot
|
|
9
|
+
|
|
10
|
+
Sibling artifacts (NOT session content):
|
|
11
|
+
<uuid>.trajectory.jsonl, <uuid>.acp-stream.jsonl, <uuid>.json
|
|
12
|
+
|
|
13
|
+
Callers may pass a full UUID or a prefix of at least MIN_PREFIX_LEN chars.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import glob
|
|
19
|
+
import os
|
|
20
|
+
import re
|
|
21
|
+
from typing import Dict, List, Optional, Tuple
|
|
22
|
+
|
|
23
|
+
from . import paths
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
MIN_PREFIX_LEN = 8
|
|
27
|
+
|
|
28
|
+
_TRANSIENT_SUFFIXES = (".lock", ".tmp", ".swp")
|
|
29
|
+
|
|
30
|
+
_UUID_CHAR = re.compile(r"^[0-9a-fA-F-]+$")
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def classify_state(filename: str) -> str:
|
|
34
|
+
"""Tag a session-file basename with its lifecycle state."""
|
|
35
|
+
if ".jsonl.deleted." in filename:
|
|
36
|
+
return "deleted"
|
|
37
|
+
if ".jsonl.reset." in filename:
|
|
38
|
+
return "reset"
|
|
39
|
+
if ".jsonl.bak-" in filename:
|
|
40
|
+
return "backup"
|
|
41
|
+
if filename.endswith(".jsonl.lock"):
|
|
42
|
+
return "lock"
|
|
43
|
+
if filename.endswith(".jsonl"):
|
|
44
|
+
return "active"
|
|
45
|
+
return "unknown"
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _session_uuid_of(filename: str) -> Optional[str]:
|
|
49
|
+
"""Return the session UUID the file belongs to, or None for siblings."""
|
|
50
|
+
if ".trajectory" in filename or ".acp-stream" in filename:
|
|
51
|
+
return None
|
|
52
|
+
if filename.endswith(".json") and not filename.endswith(".jsonl"):
|
|
53
|
+
return None
|
|
54
|
+
idx = filename.find(".jsonl")
|
|
55
|
+
if idx <= 0:
|
|
56
|
+
return None
|
|
57
|
+
return filename[:idx]
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def _is_transient(filename: str) -> bool:
|
|
61
|
+
if ".jsonl.bak-" in filename:
|
|
62
|
+
return False
|
|
63
|
+
return any(filename.endswith(s) for s in _TRANSIENT_SUFFIXES) or filename.endswith(".bak")
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def is_valid_query(session_id: str) -> Tuple[bool, str]:
|
|
67
|
+
"""Reject queries shorter than MIN_PREFIX_LEN or with non-UUID chars."""
|
|
68
|
+
if not session_id:
|
|
69
|
+
return False, "session id 不能为空"
|
|
70
|
+
if len(session_id) < MIN_PREFIX_LEN:
|
|
71
|
+
return False, (
|
|
72
|
+
f"session id 太短('{session_id}' 只有 {len(session_id)} 字符),"
|
|
73
|
+
f"至少需要 {MIN_PREFIX_LEN} 位 UUID 前缀"
|
|
74
|
+
)
|
|
75
|
+
if not _UUID_CHAR.match(session_id):
|
|
76
|
+
return False, f"session id 含非法字符(仅允许十六进制和连字符): '{session_id}'"
|
|
77
|
+
return True, ""
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def resolve(
|
|
81
|
+
session_id: str,
|
|
82
|
+
base_dir: str = paths.SESSIONS_BASE,
|
|
83
|
+
agent: Optional[str] = None,
|
|
84
|
+
include_transient: bool = False,
|
|
85
|
+
) -> Tuple[List[Tuple[str, str]], List[str]]:
|
|
86
|
+
"""Resolve a UUID or prefix to its on-disk session files.
|
|
87
|
+
|
|
88
|
+
Returns ``(files, candidates)``:
|
|
89
|
+
- ``files``: ``[(abs_path, state), ...]`` for the resolved session,
|
|
90
|
+
sorted by lifecycle priority (active first). Empty when ambiguous or
|
|
91
|
+
when there are 0 matches.
|
|
92
|
+
- ``candidates``: when multiple distinct session UUIDs share the
|
|
93
|
+
prefix, this lists their full UUIDs sorted; otherwise empty.
|
|
94
|
+
"""
|
|
95
|
+
if agent:
|
|
96
|
+
agent_dirs = [os.path.join(base_dir, agent)]
|
|
97
|
+
else:
|
|
98
|
+
agent_dirs = sorted(glob.glob(os.path.join(base_dir, "*")))
|
|
99
|
+
|
|
100
|
+
by_uuid: Dict[str, List[Tuple[str, str]]] = {}
|
|
101
|
+
for ad in agent_dirs:
|
|
102
|
+
sd = os.path.join(ad, "sessions")
|
|
103
|
+
if not os.path.isdir(sd):
|
|
104
|
+
continue
|
|
105
|
+
try:
|
|
106
|
+
entries = os.listdir(sd)
|
|
107
|
+
except OSError:
|
|
108
|
+
continue
|
|
109
|
+
for entry in entries:
|
|
110
|
+
if not entry.startswith(session_id):
|
|
111
|
+
continue
|
|
112
|
+
uuid = _session_uuid_of(entry)
|
|
113
|
+
if uuid is None:
|
|
114
|
+
continue
|
|
115
|
+
if not include_transient and _is_transient(entry):
|
|
116
|
+
continue
|
|
117
|
+
full = os.path.join(sd, entry)
|
|
118
|
+
if not os.path.isfile(full):
|
|
119
|
+
continue
|
|
120
|
+
state = classify_state(entry)
|
|
121
|
+
by_uuid.setdefault(uuid, []).append((full, state))
|
|
122
|
+
|
|
123
|
+
if not by_uuid:
|
|
124
|
+
return [], []
|
|
125
|
+
if len(by_uuid) > 1:
|
|
126
|
+
return [], sorted(by_uuid.keys())
|
|
127
|
+
|
|
128
|
+
files = next(iter(by_uuid.values()))
|
|
129
|
+
prio = {"active": 0, "lock": 1, "deleted": 2, "reset": 3, "backup": 4, "unknown": 9}
|
|
130
|
+
files.sort(key=lambda x: (prio.get(x[1], 9), x[0]))
|
|
131
|
+
return files, []
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def recent_session_ids(
|
|
135
|
+
base_dir: str = paths.SESSIONS_BASE,
|
|
136
|
+
limit: int = 5,
|
|
137
|
+
) -> List[str]:
|
|
138
|
+
"""Return the most-recently-modified active session UUIDs."""
|
|
139
|
+
found: List[Tuple[float, str]] = []
|
|
140
|
+
for ad in glob.glob(os.path.join(base_dir, "*")):
|
|
141
|
+
sd = os.path.join(ad, "sessions")
|
|
142
|
+
if not os.path.isdir(sd):
|
|
143
|
+
continue
|
|
144
|
+
try:
|
|
145
|
+
entries = os.listdir(sd)
|
|
146
|
+
except OSError:
|
|
147
|
+
continue
|
|
148
|
+
for entry in entries:
|
|
149
|
+
if not entry.endswith(".jsonl"):
|
|
150
|
+
continue
|
|
151
|
+
uuid = _session_uuid_of(entry)
|
|
152
|
+
if uuid is None or entry != f"{uuid}.jsonl":
|
|
153
|
+
continue
|
|
154
|
+
path = os.path.join(sd, entry)
|
|
155
|
+
try:
|
|
156
|
+
mtime = os.path.getmtime(path)
|
|
157
|
+
except OSError:
|
|
158
|
+
continue
|
|
159
|
+
found.append((mtime, uuid))
|
|
160
|
+
found.sort(reverse=True)
|
|
161
|
+
return [sid for _, sid in found[:limit]]
|
package/package.json
CHANGED
|
@@ -4,16 +4,16 @@
|
|
|
4
4
|
from __future__ import annotations
|
|
5
5
|
|
|
6
6
|
import argparse
|
|
7
|
-
import glob
|
|
8
7
|
import json
|
|
9
8
|
import os
|
|
10
9
|
import sys
|
|
10
|
+
from datetime import datetime, timezone
|
|
11
11
|
from pathlib import Path
|
|
12
|
-
from typing import List, Optional, TextIO, Tuple
|
|
12
|
+
from typing import Any, Dict, List, Optional, TextIO, Tuple
|
|
13
13
|
|
|
14
14
|
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
|
|
15
15
|
|
|
16
|
-
from ocdiag import paths
|
|
16
|
+
from ocdiag import paths, sessions
|
|
17
17
|
from ocdiag.sensitive import sanitize_text
|
|
18
18
|
|
|
19
19
|
|
|
@@ -29,61 +29,6 @@ def human_size(n: int) -> str:
|
|
|
29
29
|
return f"{n:.1f} PB"
|
|
30
30
|
|
|
31
31
|
|
|
32
|
-
def classify_state(filename: str) -> str:
|
|
33
|
-
if filename.endswith(".jsonl"):
|
|
34
|
-
return "active"
|
|
35
|
-
if ".jsonl.deleted." in filename:
|
|
36
|
-
return "deleted"
|
|
37
|
-
if ".jsonl.reset." in filename:
|
|
38
|
-
return "reset"
|
|
39
|
-
if ".jsonl.bak-" in filename:
|
|
40
|
-
return "backup"
|
|
41
|
-
return "unknown"
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
def _recent_session_ids(base_dir, limit=5):
|
|
45
|
-
"""Return the most-recently-modified active session UUIDs."""
|
|
46
|
-
found: List[Tuple[float, str]] = []
|
|
47
|
-
for ad in glob.glob(os.path.join(base_dir, "*")):
|
|
48
|
-
sd = os.path.join(ad, "sessions")
|
|
49
|
-
if not os.path.isdir(sd):
|
|
50
|
-
continue
|
|
51
|
-
for entry in os.listdir(sd):
|
|
52
|
-
if not entry.endswith(".jsonl"):
|
|
53
|
-
continue
|
|
54
|
-
if ".trajectory" in entry or ".jsonl.reset." in entry:
|
|
55
|
-
continue
|
|
56
|
-
path = os.path.join(sd, entry)
|
|
57
|
-
try:
|
|
58
|
-
mtime = os.path.getmtime(path)
|
|
59
|
-
except OSError:
|
|
60
|
-
continue
|
|
61
|
-
sid = entry[:-len(".jsonl")]
|
|
62
|
-
found.append((mtime, sid))
|
|
63
|
-
found.sort(reverse=True)
|
|
64
|
-
return [sid for _, sid in found[:limit]]
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
def find_session_files(session_id, base_dir=DEFAULT_BASE_DIR, agent=None):
|
|
68
|
-
if agent:
|
|
69
|
-
agent_dirs = [os.path.join(base_dir, agent)]
|
|
70
|
-
else:
|
|
71
|
-
agent_dirs = sorted(glob.glob(os.path.join(base_dir, "*")))
|
|
72
|
-
found = []
|
|
73
|
-
for agent_dir in agent_dirs:
|
|
74
|
-
sessions_dir = os.path.join(agent_dir, "sessions")
|
|
75
|
-
if not os.path.isdir(sessions_dir):
|
|
76
|
-
continue
|
|
77
|
-
pattern = os.path.join(sessions_dir, f"{session_id}.jsonl*")
|
|
78
|
-
for path in sorted(glob.glob(pattern)):
|
|
79
|
-
name = os.path.basename(path)
|
|
80
|
-
if ".trajectory" in name:
|
|
81
|
-
continue
|
|
82
|
-
state = classify_state(name)
|
|
83
|
-
found.append((path, state))
|
|
84
|
-
return found
|
|
85
|
-
|
|
86
|
-
|
|
87
32
|
def stream_records(path):
|
|
88
33
|
with open(path, "r", encoding="utf-8", errors="replace") as f:
|
|
89
34
|
for i, line in enumerate(f, start=1):
|
|
@@ -131,7 +76,6 @@ def _sanitize_record(obj):
|
|
|
131
76
|
v = part.get(k)
|
|
132
77
|
if isinstance(v, str):
|
|
133
78
|
part[k] = sanitize_text(v)
|
|
134
|
-
# Also scrub any top-level text-ish fields the gateway may have set.
|
|
135
79
|
for k in ("text", "summary"):
|
|
136
80
|
v = msg.get(k)
|
|
137
81
|
if isinstance(v, str):
|
|
@@ -157,8 +101,6 @@ def extract_file(path, state, out, pretty=True, type_filter=None, sanitize=True)
|
|
|
157
101
|
if pretty:
|
|
158
102
|
out.write(json.dumps(obj, indent=2, ensure_ascii=False))
|
|
159
103
|
else:
|
|
160
|
-
# Non-pretty mode: emit the (possibly sanitized) JSON or fall back
|
|
161
|
-
# to the original raw line if we didn't touch it.
|
|
162
104
|
out.write(json.dumps(obj, ensure_ascii=False) if sanitize else raw)
|
|
163
105
|
out.write("\n\n")
|
|
164
106
|
written += 1
|
|
@@ -167,7 +109,23 @@ def extract_file(path, state, out, pretty=True, type_filter=None, sanitize=True)
|
|
|
167
109
|
|
|
168
110
|
def summarize_file(path, state, out):
|
|
169
111
|
write_header(out, path, state)
|
|
170
|
-
|
|
112
|
+
info = _collect_summary(path, sanitize=False)
|
|
113
|
+
out.write(f"Total records: {info['total_records']}\n")
|
|
114
|
+
if info["parse_errors"]:
|
|
115
|
+
out.write(f"Parse errors: {info['parse_errors']}\n")
|
|
116
|
+
out.write("By type:\n")
|
|
117
|
+
by_type = info["by_type"]
|
|
118
|
+
for k in sorted(by_type, key=lambda k: -by_type[k]):
|
|
119
|
+
out.write(f" {k}: {by_type[k]}\n")
|
|
120
|
+
tr = info["time_range"]
|
|
121
|
+
if tr["start"] or tr["end"]:
|
|
122
|
+
out.write(f"Time range: {tr['start'] or '?'} → {tr['end'] or '?'}\n")
|
|
123
|
+
out.write("\n")
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def _collect_summary(path: str, sanitize: bool = True) -> Dict[str, Any]:
|
|
127
|
+
"""Walk one file and produce a summary block (used by text + JSON mode)."""
|
|
128
|
+
by_type: Dict[str, int] = {}
|
|
171
129
|
total = 0
|
|
172
130
|
earliest: Optional[str] = None
|
|
173
131
|
latest: Optional[str] = None
|
|
@@ -178,25 +136,40 @@ def summarize_file(path, state, out):
|
|
|
178
136
|
parse_errors += 1
|
|
179
137
|
continue
|
|
180
138
|
if not isinstance(obj, dict):
|
|
181
|
-
|
|
139
|
+
by_type["<non-object>"] = by_type.get("<non-object>", 0) + 1
|
|
182
140
|
continue
|
|
183
141
|
rtype = obj.get("type", "<no-type>")
|
|
184
|
-
|
|
142
|
+
by_type[rtype] = by_type.get(rtype, 0) + 1
|
|
185
143
|
ts = obj.get("timestamp")
|
|
186
144
|
if isinstance(ts, str):
|
|
187
145
|
if earliest is None or ts < earliest:
|
|
188
146
|
earliest = ts
|
|
189
147
|
if latest is None or ts > latest:
|
|
190
148
|
latest = ts
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
149
|
+
return {
|
|
150
|
+
"total_records": total,
|
|
151
|
+
"parse_errors": parse_errors,
|
|
152
|
+
"by_type": by_type,
|
|
153
|
+
"time_range": {"start": earliest, "end": latest},
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def _collect_records(path: str, type_filter, sanitize: bool) -> List[Dict]:
|
|
158
|
+
out: List[Dict] = []
|
|
159
|
+
for line_no, obj, raw, err in stream_records(path):
|
|
160
|
+
if err is not None:
|
|
161
|
+
out.append({"line": line_no, "parse_error": err, "raw": raw})
|
|
162
|
+
continue
|
|
163
|
+
if not isinstance(obj, dict):
|
|
164
|
+
out.append({"line": line_no, "value": obj})
|
|
165
|
+
continue
|
|
166
|
+
rtype = obj.get("type", "?")
|
|
167
|
+
if type_filter is not None and rtype not in type_filter:
|
|
168
|
+
continue
|
|
169
|
+
if sanitize:
|
|
170
|
+
obj = _sanitize_record(obj)
|
|
171
|
+
out.append(obj)
|
|
172
|
+
return out
|
|
200
173
|
|
|
201
174
|
|
|
202
175
|
def list_files(files, out):
|
|
@@ -234,42 +207,118 @@ def select_files(files, extract_all, _out):
|
|
|
234
207
|
return []
|
|
235
208
|
|
|
236
209
|
|
|
210
|
+
def _resolve_or_die(session_id: str, base_dir: str, agent: Optional[str],
|
|
211
|
+
include_transient: bool) -> List[Tuple[str, str]]:
|
|
212
|
+
ok, msg = sessions.is_valid_query(session_id)
|
|
213
|
+
if not ok:
|
|
214
|
+
sys.stderr.write(f"Error: {msg}\n")
|
|
215
|
+
sys.exit(2)
|
|
216
|
+
files, candidates = sessions.resolve(
|
|
217
|
+
session_id, base_dir=base_dir, agent=agent,
|
|
218
|
+
include_transient=include_transient,
|
|
219
|
+
)
|
|
220
|
+
if candidates:
|
|
221
|
+
sys.stderr.write(
|
|
222
|
+
f"Error: 前缀 '{session_id}' 匹配多个 session(请补长前缀):\n"
|
|
223
|
+
)
|
|
224
|
+
for sid in candidates:
|
|
225
|
+
sys.stderr.write(f" {sid}\n")
|
|
226
|
+
sys.exit(1)
|
|
227
|
+
if not files:
|
|
228
|
+
sys.stderr.write(
|
|
229
|
+
f"Error: 找不到 session '{session_id}'(在 {base_dir} 下)"
|
|
230
|
+
+ (f" agent={agent}" if agent else "")
|
|
231
|
+
+ "\n"
|
|
232
|
+
)
|
|
233
|
+
suggestions = sessions.recent_session_ids(base_dir, limit=5)
|
|
234
|
+
if suggestions:
|
|
235
|
+
sys.stderr.write(" 最近的 5 个 session:\n")
|
|
236
|
+
for sid in suggestions:
|
|
237
|
+
sys.stderr.write(f" {sid}\n")
|
|
238
|
+
sys.stderr.write(" 提示:完整 UUID 或前缀(至少 8 位)都可。\n")
|
|
239
|
+
sys.exit(1)
|
|
240
|
+
return files
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
def _emit_json(session_id: str, selected: List[Tuple[str, str]],
|
|
244
|
+
out_fp: TextIO, summary_only: bool, type_filter,
|
|
245
|
+
sanitize: bool) -> None:
|
|
246
|
+
files_payload: List[Dict[str, Any]] = []
|
|
247
|
+
aggregate_total = 0
|
|
248
|
+
aggregate_by_type: Dict[str, int] = {}
|
|
249
|
+
aggregate_start: Optional[str] = None
|
|
250
|
+
aggregate_end: Optional[str] = None
|
|
251
|
+
for path, state in selected:
|
|
252
|
+
try:
|
|
253
|
+
size = os.path.getsize(path)
|
|
254
|
+
except OSError:
|
|
255
|
+
size = 0
|
|
256
|
+
entry: Dict[str, Any] = {
|
|
257
|
+
"path": path,
|
|
258
|
+
"state": state,
|
|
259
|
+
"size_bytes": size,
|
|
260
|
+
}
|
|
261
|
+
if summary_only:
|
|
262
|
+
s = _collect_summary(path, sanitize=sanitize)
|
|
263
|
+
entry["summary"] = s
|
|
264
|
+
aggregate_total += s["total_records"]
|
|
265
|
+
for k, v in s["by_type"].items():
|
|
266
|
+
aggregate_by_type[k] = aggregate_by_type.get(k, 0) + v
|
|
267
|
+
tr = s["time_range"]
|
|
268
|
+
if tr["start"] and (aggregate_start is None or tr["start"] < aggregate_start):
|
|
269
|
+
aggregate_start = tr["start"]
|
|
270
|
+
if tr["end"] and (aggregate_end is None or tr["end"] > aggregate_end):
|
|
271
|
+
aggregate_end = tr["end"]
|
|
272
|
+
else:
|
|
273
|
+
entry["records"] = _collect_records(path, type_filter, sanitize=sanitize)
|
|
274
|
+
files_payload.append(entry)
|
|
275
|
+
|
|
276
|
+
payload: Dict[str, Any] = {
|
|
277
|
+
"session_id": session_id,
|
|
278
|
+
"files": files_payload,
|
|
279
|
+
"generated_at": datetime.now(tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
|
|
280
|
+
"sanitized": sanitize,
|
|
281
|
+
}
|
|
282
|
+
if summary_only:
|
|
283
|
+
payload["summary"] = {
|
|
284
|
+
"total_records": aggregate_total,
|
|
285
|
+
"by_type": aggregate_by_type,
|
|
286
|
+
"time_range": {"start": aggregate_start, "end": aggregate_end},
|
|
287
|
+
}
|
|
288
|
+
out_fp.write(json.dumps(payload, ensure_ascii=False, indent=2))
|
|
289
|
+
out_fp.write("\n")
|
|
290
|
+
|
|
291
|
+
|
|
237
292
|
def main() -> int:
|
|
238
293
|
p = argparse.ArgumentParser(
|
|
239
294
|
prog=os.environ.get("OPENCLAW_DIAG_PROG") or None,
|
|
240
295
|
description="Extract OpenClaw session JSONL files into human-readable format.",
|
|
241
296
|
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
|
242
297
|
)
|
|
243
|
-
p.add_argument("session_id", help="Session UUID
|
|
298
|
+
p.add_argument("session_id", help="Session UUID (full or 8+ char prefix)")
|
|
244
299
|
p.add_argument("-o", "--output", help="Write output to FILE instead of stdout")
|
|
245
300
|
p.add_argument("-a", "--all", action="store_true",
|
|
246
|
-
help="Extract all versions
|
|
247
|
-
p.add_argument("--list", action="store_true",
|
|
301
|
+
help="Extract all versions (active + reset + deleted + backup + lock)")
|
|
302
|
+
p.add_argument("--list", action="store_true",
|
|
303
|
+
help="List all matching files (incl. .lock); do not extract")
|
|
248
304
|
p.add_argument("--agent", help="Limit search to specific agent directory")
|
|
249
305
|
p.add_argument("--base-dir", default=DEFAULT_BASE_DIR, help="Override base directory")
|
|
250
306
|
p.add_argument("--no-pretty", action="store_true", help="Output raw JSON lines")
|
|
251
307
|
p.add_argument("--types", help="Filter by record type (comma-separated, e.g. 'message,toolCall')")
|
|
252
308
|
p.add_argument("--summary", action="store_true",
|
|
253
309
|
help="Show record-count summary instead of full extraction")
|
|
310
|
+
p.add_argument("--json", action="store_true",
|
|
311
|
+
help="Emit structured JSON (compatible with state collectors' --json)")
|
|
254
312
|
p.add_argument("--unmask", action="store_true",
|
|
255
313
|
help="Disable default sanitization of secret-shaped substrings "
|
|
256
314
|
"in message content (off = scrubbed)")
|
|
257
315
|
args = p.parse_args()
|
|
258
316
|
|
|
259
|
-
files
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
+ "\n"
|
|
265
|
-
)
|
|
266
|
-
suggestions = _recent_session_ids(args.base_dir, limit=5)
|
|
267
|
-
if suggestions:
|
|
268
|
-
sys.stderr.write(" 最近的 5 个 session:\n")
|
|
269
|
-
for sid in suggestions:
|
|
270
|
-
sys.stderr.write(f" {sid}\n")
|
|
271
|
-
sys.stderr.write(" 提示:完整 UUID 或前缀(至少 8 位)都可。\n")
|
|
272
|
-
return 1
|
|
317
|
+
# --list and --all see lock files; default mode hides them so non-interactive
|
|
318
|
+
# callers (cron, jq pipes) don't trip on a transient .jsonl.lock sibling.
|
|
319
|
+
include_transient = bool(args.all or args.list)
|
|
320
|
+
files = _resolve_or_die(args.session_id, args.base_dir, args.agent,
|
|
321
|
+
include_transient=include_transient)
|
|
273
322
|
|
|
274
323
|
if args.list:
|
|
275
324
|
list_files(files, sys.stdout)
|
|
@@ -294,12 +343,18 @@ def main() -> int:
|
|
|
294
343
|
out_fp = sys.stdout
|
|
295
344
|
|
|
296
345
|
try:
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
346
|
+
if args.json:
|
|
347
|
+
_emit_json(args.session_id, selected, out_fp,
|
|
348
|
+
summary_only=args.summary,
|
|
349
|
+
type_filter=type_filter,
|
|
350
|
+
sanitize=not args.unmask)
|
|
351
|
+
else:
|
|
352
|
+
for path, state in selected:
|
|
353
|
+
if args.summary:
|
|
354
|
+
summarize_file(path, state, out_fp)
|
|
355
|
+
else:
|
|
356
|
+
extract_file(path, state, out_fp, pretty=not args.no_pretty,
|
|
357
|
+
type_filter=type_filter, sanitize=not args.unmask)
|
|
303
358
|
except BrokenPipeError:
|
|
304
359
|
try:
|
|
305
360
|
sys.stdout.flush()
|
|
@@ -21,7 +21,7 @@ from typing import Any, Dict, List, Optional, Tuple
|
|
|
21
21
|
|
|
22
22
|
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
|
|
23
23
|
|
|
24
|
-
from ocdiag import paths
|
|
24
|
+
from ocdiag import paths, sessions
|
|
25
25
|
|
|
26
26
|
|
|
27
27
|
DEFAULT_BASE_DIR = paths.SESSIONS_BASE
|
|
@@ -67,64 +67,25 @@ def extract_text(content: Any) -> str:
|
|
|
67
67
|
return str(content)
|
|
68
68
|
|
|
69
69
|
|
|
70
|
-
def
|
|
70
|
+
def resolve_session_file(
|
|
71
71
|
session_id: str,
|
|
72
72
|
base_dir: str = DEFAULT_BASE_DIR,
|
|
73
73
|
agent: Optional[str] = None,
|
|
74
|
-
) -> Optional[str]:
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
continue
|
|
90
|
-
full = os.path.join(sd, entry)
|
|
91
|
-
if not os.path.isfile(full):
|
|
92
|
-
continue
|
|
93
|
-
if entry == f"{session_id}.jsonl":
|
|
94
|
-
candidates.append((full, "active"))
|
|
95
|
-
elif ".jsonl.deleted." in entry:
|
|
96
|
-
candidates.append((full, "deleted"))
|
|
97
|
-
elif ".jsonl.reset." in entry:
|
|
98
|
-
candidates.append((full, "reset"))
|
|
99
|
-
elif ".jsonl.bak-" in entry:
|
|
100
|
-
candidates.append((full, "backup"))
|
|
101
|
-
|
|
102
|
-
prio = {"active": 0, "deleted": 1, "reset": 2, "backup": 3}
|
|
103
|
-
candidates.sort(key=lambda x: prio.get(x[1], 9))
|
|
104
|
-
return candidates[0][0] if candidates else None
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
def _recent_session_ids(base_dir: str, limit: int = 5) -> List[str]:
|
|
108
|
-
"""Return the most-recently-modified active session UUIDs (no .reset/.bak/.deleted)."""
|
|
109
|
-
found: List[Tuple[float, str]] = []
|
|
110
|
-
for ad in glob.glob(os.path.join(base_dir, "*")):
|
|
111
|
-
sd = os.path.join(ad, "sessions")
|
|
112
|
-
if not os.path.isdir(sd):
|
|
113
|
-
continue
|
|
114
|
-
for entry in os.listdir(sd):
|
|
115
|
-
if not entry.endswith(".jsonl"):
|
|
116
|
-
continue
|
|
117
|
-
if ".trajectory" in entry or ".jsonl.reset." in entry:
|
|
118
|
-
continue
|
|
119
|
-
path = os.path.join(sd, entry)
|
|
120
|
-
try:
|
|
121
|
-
mtime = os.path.getmtime(path)
|
|
122
|
-
except OSError:
|
|
123
|
-
continue
|
|
124
|
-
sid = entry[:-len(".jsonl")]
|
|
125
|
-
found.append((mtime, sid))
|
|
126
|
-
found.sort(reverse=True)
|
|
127
|
-
return [sid for _, sid in found[:limit]]
|
|
74
|
+
) -> Tuple[Optional[str], List[str]]:
|
|
75
|
+
"""Resolve UUID-or-prefix to a single session file path.
|
|
76
|
+
|
|
77
|
+
Returns ``(path, candidates)``. ``path`` is None on miss or ambiguity;
|
|
78
|
+
``candidates`` is non-empty only when the prefix matched multiple
|
|
79
|
+
distinct session UUIDs.
|
|
80
|
+
"""
|
|
81
|
+
files, candidates = sessions.resolve(
|
|
82
|
+
session_id, base_dir=base_dir, agent=agent, include_transient=False,
|
|
83
|
+
)
|
|
84
|
+
if candidates:
|
|
85
|
+
return None, candidates
|
|
86
|
+
if not files:
|
|
87
|
+
return None, []
|
|
88
|
+
return files[0][0], []
|
|
128
89
|
|
|
129
90
|
|
|
130
91
|
def find_trajectory_file(session_file: str) -> Optional[str]:
|
|
@@ -666,11 +627,25 @@ def main():
|
|
|
666
627
|
parser.add_argument("--json", action="store_true", help="Output as structured JSON")
|
|
667
628
|
args = parser.parse_args()
|
|
668
629
|
|
|
669
|
-
|
|
630
|
+
ok, msg = sessions.is_valid_query(args.session_id)
|
|
631
|
+
if not ok:
|
|
632
|
+
print(f"Error: {msg}", file=sys.stderr)
|
|
633
|
+
sys.exit(2)
|
|
634
|
+
session_file, candidates = resolve_session_file(
|
|
635
|
+
args.session_id, args.base_dir, args.agent,
|
|
636
|
+
)
|
|
637
|
+
if candidates:
|
|
638
|
+
print(
|
|
639
|
+
f"Error: 前缀 '{args.session_id}' 匹配多个 session(请补长前缀):",
|
|
640
|
+
file=sys.stderr,
|
|
641
|
+
)
|
|
642
|
+
for sid in candidates:
|
|
643
|
+
print(f" {sid}", file=sys.stderr)
|
|
644
|
+
sys.exit(1)
|
|
670
645
|
if not session_file:
|
|
671
646
|
print(f"Error: 找不到 session '{args.session_id}'(在 {args.base_dir} 下)",
|
|
672
647
|
file=sys.stderr)
|
|
673
|
-
suggestions =
|
|
648
|
+
suggestions = sessions.recent_session_ids(args.base_dir, limit=5)
|
|
674
649
|
if suggestions:
|
|
675
650
|
print(f" 最近的 5 个 session:", file=sys.stderr)
|
|
676
651
|
for sid in suggestions:
|
|
@@ -678,6 +653,11 @@ def main():
|
|
|
678
653
|
print(f" 提示:UUID 完整 36 位,前缀也可(至少 8 位)。", file=sys.stderr)
|
|
679
654
|
sys.exit(1)
|
|
680
655
|
|
|
656
|
+
# If the user passed a prefix, recover the full UUID from the resolved
|
|
657
|
+
# filename so log lookups and JSON output use the canonical id.
|
|
658
|
+
resolved_basename = os.path.basename(session_file)
|
|
659
|
+
full_session_id = resolved_basename.split(".jsonl", 1)[0]
|
|
660
|
+
|
|
681
661
|
records = load_records(session_file)
|
|
682
662
|
if not records:
|
|
683
663
|
print(f"Error: session file is empty: {session_file}", file=sys.stderr)
|
|
@@ -708,13 +688,13 @@ def main():
|
|
|
708
688
|
if not args.no_log:
|
|
709
689
|
log_files = find_gateway_logs(args.log_dir)
|
|
710
690
|
if log_files:
|
|
711
|
-
gw_info = load_gateway_timing(log_files,
|
|
691
|
+
gw_info = load_gateway_timing(log_files, full_session_id, analysis["base_epoch_ms"])
|
|
712
692
|
|
|
713
693
|
if args.json:
|
|
714
|
-
out_str = format_json(
|
|
694
|
+
out_str = format_json(full_session_id, session_file, user_msg_ordinal,
|
|
715
695
|
user_msg_id, analysis, traj_info, gw_info)
|
|
716
696
|
else:
|
|
717
|
-
out_str = format_text(
|
|
697
|
+
out_str = format_text(full_session_id, user_msg_ordinal, user_msg_id,
|
|
718
698
|
analysis, traj_info, gw_info)
|
|
719
699
|
|
|
720
700
|
if args.output:
|