openclaw-diag-cli 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -6,7 +6,7 @@ OpenClaw 出问题时,**先跑这条命令再开 ticket**:
6
6
  npx openclaw-diag-cli all
7
7
  ```
8
8
 
9
- 零安装、零依赖、observer-only — 只读探测,绝不改你的状态。
9
+ 零安装、零依赖、observer-only — 不改 OpenClaw 的配置 / session / cron / 服务状态;只读探测,可写诊断输出(落到工具自己的目录)。
10
10
 
11
11
  ## 这是什么
12
12
 
@@ -60,17 +60,16 @@ openclaw-diag trace <session-uuid>
60
60
  ```
61
61
  ── 模块 4:Gateway 状态 ──
62
62
 
63
- 进程 / 端口
64
- PID 12847 (uptime 3d 2h),监听 :8080,HTTP /healthz → 200
65
- 24h 重启
66
- 无重启事件
67
- Model API
68
- amazon-bedrock 可达(DNS+HTTP+认证均通)
69
- WS 生命周期
70
- 最近 1h 内 134 次连接,平均存活 47s,无异常关闭
63
+ Systemd: Active: active (running) since Sun 2026-05-17 20:45:02 CST; 11h ago
64
+ • Main PID: 142687 (node)
65
+ 端口 18789 监听: 是 | HTTP 健康检查: 200
66
+ • 24h 启停事件: 0 次启动 — 近 24h 无重启/停止记录
67
+ 模型 API [https://bedrock-runtime.us-east-1.amazonaws.com]: HTTP 200
68
+ Channel WS: 最近 1h 5 次连接,平均存活 32s
69
+ Gateway 错误码: 0 条
71
70
  ```
72
71
 
73
- 加 `--json` 后输出严格结构化(同字段、同值),方便管道处理。
72
+ 加 `--json` 后输出结构化(覆盖文本里出现的核心字段,便于 jq / 监控管道)。
74
73
 
75
74
  ## 诊断列表
76
75
 
@@ -93,12 +92,12 @@ openclaw-diag list # 看完整列表
93
92
  | `plugin_diag` | 插件状态一致性、ERROR/WARN、Hook 异常、Channel、外部依赖 DNS |
94
93
  | `shell_history` | 高危命令、openclaw 命令、最近操作 |
95
94
 
96
- **对象类(需要 session uuid)**
95
+ **对象类(需要 session uuid 或 ≥ 8 位前缀;都支持 `--json`)**
97
96
 
98
97
  | 诊断 | 看什么 |
99
98
  |---|---|
100
99
  | `trace <uuid>` | 一条用户消息从进入到响应的完整时间轴 |
101
- | `extract <uuid>` | session.jsonl 导出为可读格式(active / reset / deleted / backup 全状态) |
100
+ | `extract <uuid>` | session.jsonl 导出为可读格式(active / reset / deleted / backup 全状态;`--summary` 仅汇总) |
102
101
 
103
102
  **其它命令**
104
103
 
package/bin/ocdiag CHANGED
@@ -1,7 +1,6 @@
1
1
  #!/usr/bin/env python3
2
2
  """ocdiag entry-point shim that runs the dispatcher from the repo root."""
3
3
 
4
- import os
5
4
  import sys
6
5
  from pathlib import Path
7
6
 
@@ -1,3 +1,3 @@
1
1
  """ocdiag — shared library for openclaw-diag-cli scripts."""
2
2
 
3
- __version__ = "0.2.1"
3
+ __version__ = "0.2.2"
package/ocdiag/paths.py CHANGED
@@ -29,19 +29,3 @@ SERVICE_ENV_FILE = _env_path(
29
29
  "OPENCLAW_SERVICE_ENV_FILE",
30
30
  os.path.join(HOME, ".config", "systemd", "user", "openclaw-gateway.service.d", "env.conf"),
31
31
  )
32
-
33
-
34
- def home() -> str:
35
- return HOME
36
-
37
-
38
- def config_path() -> str:
39
- return CONFIG
40
-
41
-
42
- def log_dir() -> str:
43
- return LOG_DIR
44
-
45
-
46
- def sessions_base() -> str:
47
- return SESSIONS_BASE
@@ -0,0 +1,161 @@
1
+ """Shared session-file lookup utilities for trace/extract.
2
+
3
+ A "session" is identified by a UUID. On disk it can have multiple files:
4
+ <uuid>.jsonl — active
5
+ <uuid>.jsonl.lock — write lock (transient, filtered by default)
6
+ <uuid>.jsonl.deleted.<ts> — soft-deleted
7
+ <uuid>.jsonl.reset.<ts> — pre-reset snapshot
8
+ <uuid>.jsonl.bak-<pid> — backup snapshot
9
+
10
+ Sibling artifacts (NOT session content):
11
+ <uuid>.trajectory.jsonl, <uuid>.acp-stream.jsonl, <uuid>.json
12
+
13
+ Callers may pass a full UUID or a prefix of at least MIN_PREFIX_LEN chars.
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import glob
19
+ import os
20
+ import re
21
+ from typing import Dict, List, Optional, Tuple
22
+
23
+ from . import paths
24
+
25
+
26
+ MIN_PREFIX_LEN = 8
27
+
28
+ _TRANSIENT_SUFFIXES = (".lock", ".tmp", ".swp")
29
+
30
+ _UUID_CHAR = re.compile(r"^[0-9a-fA-F-]+$")
31
+
32
+
33
+ def classify_state(filename: str) -> str:
34
+ """Tag a session-file basename with its lifecycle state."""
35
+ if ".jsonl.deleted." in filename:
36
+ return "deleted"
37
+ if ".jsonl.reset." in filename:
38
+ return "reset"
39
+ if ".jsonl.bak-" in filename:
40
+ return "backup"
41
+ if filename.endswith(".jsonl.lock"):
42
+ return "lock"
43
+ if filename.endswith(".jsonl"):
44
+ return "active"
45
+ return "unknown"
46
+
47
+
48
+ def _session_uuid_of(filename: str) -> Optional[str]:
49
+ """Return the session UUID the file belongs to, or None for siblings."""
50
+ if ".trajectory" in filename or ".acp-stream" in filename:
51
+ return None
52
+ if filename.endswith(".json") and not filename.endswith(".jsonl"):
53
+ return None
54
+ idx = filename.find(".jsonl")
55
+ if idx <= 0:
56
+ return None
57
+ return filename[:idx]
58
+
59
+
60
+ def _is_transient(filename: str) -> bool:
61
+ if ".jsonl.bak-" in filename:
62
+ return False
63
+ return any(filename.endswith(s) for s in _TRANSIENT_SUFFIXES) or filename.endswith(".bak")
64
+
65
+
66
+ def is_valid_query(session_id: str) -> Tuple[bool, str]:
67
+ """Reject queries shorter than MIN_PREFIX_LEN or with non-UUID chars."""
68
+ if not session_id:
69
+ return False, "session id 不能为空"
70
+ if len(session_id) < MIN_PREFIX_LEN:
71
+ return False, (
72
+ f"session id 太短('{session_id}' 只有 {len(session_id)} 字符),"
73
+ f"至少需要 {MIN_PREFIX_LEN} 位 UUID 前缀"
74
+ )
75
+ if not _UUID_CHAR.match(session_id):
76
+ return False, f"session id 含非法字符(仅允许十六进制和连字符): '{session_id}'"
77
+ return True, ""
78
+
79
+
80
+ def resolve(
81
+ session_id: str,
82
+ base_dir: str = paths.SESSIONS_BASE,
83
+ agent: Optional[str] = None,
84
+ include_transient: bool = False,
85
+ ) -> Tuple[List[Tuple[str, str]], List[str]]:
86
+ """Resolve a UUID or prefix to its on-disk session files.
87
+
88
+ Returns ``(files, candidates)``:
89
+ - ``files``: ``[(abs_path, state), ...]`` for the resolved session,
90
+ sorted by lifecycle priority (active first). Empty when ambiguous or
91
+ when there are 0 matches.
92
+ - ``candidates``: when multiple distinct session UUIDs share the
93
+ prefix, this lists their full UUIDs sorted; otherwise empty.
94
+ """
95
+ if agent:
96
+ agent_dirs = [os.path.join(base_dir, agent)]
97
+ else:
98
+ agent_dirs = sorted(glob.glob(os.path.join(base_dir, "*")))
99
+
100
+ by_uuid: Dict[str, List[Tuple[str, str]]] = {}
101
+ for ad in agent_dirs:
102
+ sd = os.path.join(ad, "sessions")
103
+ if not os.path.isdir(sd):
104
+ continue
105
+ try:
106
+ entries = os.listdir(sd)
107
+ except OSError:
108
+ continue
109
+ for entry in entries:
110
+ if not entry.startswith(session_id):
111
+ continue
112
+ uuid = _session_uuid_of(entry)
113
+ if uuid is None:
114
+ continue
115
+ if not include_transient and _is_transient(entry):
116
+ continue
117
+ full = os.path.join(sd, entry)
118
+ if not os.path.isfile(full):
119
+ continue
120
+ state = classify_state(entry)
121
+ by_uuid.setdefault(uuid, []).append((full, state))
122
+
123
+ if not by_uuid:
124
+ return [], []
125
+ if len(by_uuid) > 1:
126
+ return [], sorted(by_uuid.keys())
127
+
128
+ files = next(iter(by_uuid.values()))
129
+ prio = {"active": 0, "lock": 1, "deleted": 2, "reset": 3, "backup": 4, "unknown": 9}
130
+ files.sort(key=lambda x: (prio.get(x[1], 9), x[0]))
131
+ return files, []
132
+
133
+
134
+ def recent_session_ids(
135
+ base_dir: str = paths.SESSIONS_BASE,
136
+ limit: int = 5,
137
+ ) -> List[str]:
138
+ """Return the most-recently-modified active session UUIDs."""
139
+ found: List[Tuple[float, str]] = []
140
+ for ad in glob.glob(os.path.join(base_dir, "*")):
141
+ sd = os.path.join(ad, "sessions")
142
+ if not os.path.isdir(sd):
143
+ continue
144
+ try:
145
+ entries = os.listdir(sd)
146
+ except OSError:
147
+ continue
148
+ for entry in entries:
149
+ if not entry.endswith(".jsonl"):
150
+ continue
151
+ uuid = _session_uuid_of(entry)
152
+ if uuid is None or entry != f"{uuid}.jsonl":
153
+ continue
154
+ path = os.path.join(sd, entry)
155
+ try:
156
+ mtime = os.path.getmtime(path)
157
+ except OSError:
158
+ continue
159
+ found.append((mtime, uuid))
160
+ found.sort(reverse=True)
161
+ return [sid for _, sid in found[:limit]]
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "openclaw-diag-cli",
3
- "version": "0.2.1",
3
+ "version": "0.2.2",
4
4
  "description": "OpenClaw observer-only diagnostic CLI. Zero-dependency Python scripts wrapped in Node for npx-friendly install.",
5
5
  "keywords": [
6
6
  "openclaw",
@@ -4,16 +4,16 @@
4
4
  from __future__ import annotations
5
5
 
6
6
  import argparse
7
- import glob
8
7
  import json
9
8
  import os
10
9
  import sys
10
+ from datetime import datetime, timezone
11
11
  from pathlib import Path
12
- from typing import List, Optional, TextIO, Tuple
12
+ from typing import Any, Dict, List, Optional, TextIO, Tuple
13
13
 
14
14
  sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
15
15
 
16
- from ocdiag import paths
16
+ from ocdiag import paths, sessions
17
17
  from ocdiag.sensitive import sanitize_text
18
18
 
19
19
 
@@ -29,61 +29,6 @@ def human_size(n: int) -> str:
29
29
  return f"{n:.1f} PB"
30
30
 
31
31
 
32
- def classify_state(filename: str) -> str:
33
- if filename.endswith(".jsonl"):
34
- return "active"
35
- if ".jsonl.deleted." in filename:
36
- return "deleted"
37
- if ".jsonl.reset." in filename:
38
- return "reset"
39
- if ".jsonl.bak-" in filename:
40
- return "backup"
41
- return "unknown"
42
-
43
-
44
- def _recent_session_ids(base_dir, limit=5):
45
- """Return the most-recently-modified active session UUIDs."""
46
- found: List[Tuple[float, str]] = []
47
- for ad in glob.glob(os.path.join(base_dir, "*")):
48
- sd = os.path.join(ad, "sessions")
49
- if not os.path.isdir(sd):
50
- continue
51
- for entry in os.listdir(sd):
52
- if not entry.endswith(".jsonl"):
53
- continue
54
- if ".trajectory" in entry or ".jsonl.reset." in entry:
55
- continue
56
- path = os.path.join(sd, entry)
57
- try:
58
- mtime = os.path.getmtime(path)
59
- except OSError:
60
- continue
61
- sid = entry[:-len(".jsonl")]
62
- found.append((mtime, sid))
63
- found.sort(reverse=True)
64
- return [sid for _, sid in found[:limit]]
65
-
66
-
67
- def find_session_files(session_id, base_dir=DEFAULT_BASE_DIR, agent=None):
68
- if agent:
69
- agent_dirs = [os.path.join(base_dir, agent)]
70
- else:
71
- agent_dirs = sorted(glob.glob(os.path.join(base_dir, "*")))
72
- found = []
73
- for agent_dir in agent_dirs:
74
- sessions_dir = os.path.join(agent_dir, "sessions")
75
- if not os.path.isdir(sessions_dir):
76
- continue
77
- pattern = os.path.join(sessions_dir, f"{session_id}.jsonl*")
78
- for path in sorted(glob.glob(pattern)):
79
- name = os.path.basename(path)
80
- if ".trajectory" in name:
81
- continue
82
- state = classify_state(name)
83
- found.append((path, state))
84
- return found
85
-
86
-
87
32
  def stream_records(path):
88
33
  with open(path, "r", encoding="utf-8", errors="replace") as f:
89
34
  for i, line in enumerate(f, start=1):
@@ -131,7 +76,6 @@ def _sanitize_record(obj):
131
76
  v = part.get(k)
132
77
  if isinstance(v, str):
133
78
  part[k] = sanitize_text(v)
134
- # Also scrub any top-level text-ish fields the gateway may have set.
135
79
  for k in ("text", "summary"):
136
80
  v = msg.get(k)
137
81
  if isinstance(v, str):
@@ -157,8 +101,6 @@ def extract_file(path, state, out, pretty=True, type_filter=None, sanitize=True)
157
101
  if pretty:
158
102
  out.write(json.dumps(obj, indent=2, ensure_ascii=False))
159
103
  else:
160
- # Non-pretty mode: emit the (possibly sanitized) JSON or fall back
161
- # to the original raw line if we didn't touch it.
162
104
  out.write(json.dumps(obj, ensure_ascii=False) if sanitize else raw)
163
105
  out.write("\n\n")
164
106
  written += 1
@@ -167,7 +109,23 @@ def extract_file(path, state, out, pretty=True, type_filter=None, sanitize=True)
167
109
 
168
110
  def summarize_file(path, state, out):
169
111
  write_header(out, path, state)
170
- counts: dict = {}
112
+ info = _collect_summary(path, sanitize=False)
113
+ out.write(f"Total records: {info['total_records']}\n")
114
+ if info["parse_errors"]:
115
+ out.write(f"Parse errors: {info['parse_errors']}\n")
116
+ out.write("By type:\n")
117
+ by_type = info["by_type"]
118
+ for k in sorted(by_type, key=lambda k: -by_type[k]):
119
+ out.write(f" {k}: {by_type[k]}\n")
120
+ tr = info["time_range"]
121
+ if tr["start"] or tr["end"]:
122
+ out.write(f"Time range: {tr['start'] or '?'} → {tr['end'] or '?'}\n")
123
+ out.write("\n")
124
+
125
+
126
+ def _collect_summary(path: str, sanitize: bool = True) -> Dict[str, Any]:
127
+ """Walk one file and produce a summary block (used by text + JSON mode)."""
128
+ by_type: Dict[str, int] = {}
171
129
  total = 0
172
130
  earliest: Optional[str] = None
173
131
  latest: Optional[str] = None
@@ -178,25 +136,40 @@ def summarize_file(path, state, out):
178
136
  parse_errors += 1
179
137
  continue
180
138
  if not isinstance(obj, dict):
181
- counts["<non-object>"] = counts.get("<non-object>", 0) + 1
139
+ by_type["<non-object>"] = by_type.get("<non-object>", 0) + 1
182
140
  continue
183
141
  rtype = obj.get("type", "<no-type>")
184
- counts[rtype] = counts.get(rtype, 0) + 1
142
+ by_type[rtype] = by_type.get(rtype, 0) + 1
185
143
  ts = obj.get("timestamp")
186
144
  if isinstance(ts, str):
187
145
  if earliest is None or ts < earliest:
188
146
  earliest = ts
189
147
  if latest is None or ts > latest:
190
148
  latest = ts
191
- out.write(f"Total records: {total}\n")
192
- if parse_errors:
193
- out.write(f"Parse errors: {parse_errors}\n")
194
- out.write("By type:\n")
195
- for k in sorted(counts, key=lambda k: -counts[k]):
196
- out.write(f" {k}: {counts[k]}\n")
197
- if earliest or latest:
198
- out.write(f"Time range: {earliest or '?'} → {latest or '?'}\n")
199
- out.write("\n")
149
+ return {
150
+ "total_records": total,
151
+ "parse_errors": parse_errors,
152
+ "by_type": by_type,
153
+ "time_range": {"start": earliest, "end": latest},
154
+ }
155
+
156
+
157
+ def _collect_records(path: str, type_filter, sanitize: bool) -> List[Dict]:
158
+ out: List[Dict] = []
159
+ for line_no, obj, raw, err in stream_records(path):
160
+ if err is not None:
161
+ out.append({"line": line_no, "parse_error": err, "raw": raw})
162
+ continue
163
+ if not isinstance(obj, dict):
164
+ out.append({"line": line_no, "value": obj})
165
+ continue
166
+ rtype = obj.get("type", "?")
167
+ if type_filter is not None and rtype not in type_filter:
168
+ continue
169
+ if sanitize:
170
+ obj = _sanitize_record(obj)
171
+ out.append(obj)
172
+ return out
200
173
 
201
174
 
202
175
  def list_files(files, out):
@@ -234,42 +207,118 @@ def select_files(files, extract_all, _out):
234
207
  return []
235
208
 
236
209
 
210
+ def _resolve_or_die(session_id: str, base_dir: str, agent: Optional[str],
211
+ include_transient: bool) -> List[Tuple[str, str]]:
212
+ ok, msg = sessions.is_valid_query(session_id)
213
+ if not ok:
214
+ sys.stderr.write(f"Error: {msg}\n")
215
+ sys.exit(2)
216
+ files, candidates = sessions.resolve(
217
+ session_id, base_dir=base_dir, agent=agent,
218
+ include_transient=include_transient,
219
+ )
220
+ if candidates:
221
+ sys.stderr.write(
222
+ f"Error: 前缀 '{session_id}' 匹配多个 session(请补长前缀):\n"
223
+ )
224
+ for sid in candidates:
225
+ sys.stderr.write(f" {sid}\n")
226
+ sys.exit(1)
227
+ if not files:
228
+ sys.stderr.write(
229
+ f"Error: 找不到 session '{session_id}'(在 {base_dir} 下)"
230
+ + (f" agent={agent}" if agent else "")
231
+ + "\n"
232
+ )
233
+ suggestions = sessions.recent_session_ids(base_dir, limit=5)
234
+ if suggestions:
235
+ sys.stderr.write(" 最近的 5 个 session:\n")
236
+ for sid in suggestions:
237
+ sys.stderr.write(f" {sid}\n")
238
+ sys.stderr.write(" 提示:完整 UUID 或前缀(至少 8 位)都可。\n")
239
+ sys.exit(1)
240
+ return files
241
+
242
+
243
+ def _emit_json(session_id: str, selected: List[Tuple[str, str]],
244
+ out_fp: TextIO, summary_only: bool, type_filter,
245
+ sanitize: bool) -> None:
246
+ files_payload: List[Dict[str, Any]] = []
247
+ aggregate_total = 0
248
+ aggregate_by_type: Dict[str, int] = {}
249
+ aggregate_start: Optional[str] = None
250
+ aggregate_end: Optional[str] = None
251
+ for path, state in selected:
252
+ try:
253
+ size = os.path.getsize(path)
254
+ except OSError:
255
+ size = 0
256
+ entry: Dict[str, Any] = {
257
+ "path": path,
258
+ "state": state,
259
+ "size_bytes": size,
260
+ }
261
+ if summary_only:
262
+ s = _collect_summary(path, sanitize=sanitize)
263
+ entry["summary"] = s
264
+ aggregate_total += s["total_records"]
265
+ for k, v in s["by_type"].items():
266
+ aggregate_by_type[k] = aggregate_by_type.get(k, 0) + v
267
+ tr = s["time_range"]
268
+ if tr["start"] and (aggregate_start is None or tr["start"] < aggregate_start):
269
+ aggregate_start = tr["start"]
270
+ if tr["end"] and (aggregate_end is None or tr["end"] > aggregate_end):
271
+ aggregate_end = tr["end"]
272
+ else:
273
+ entry["records"] = _collect_records(path, type_filter, sanitize=sanitize)
274
+ files_payload.append(entry)
275
+
276
+ payload: Dict[str, Any] = {
277
+ "session_id": session_id,
278
+ "files": files_payload,
279
+ "generated_at": datetime.now(tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
280
+ "sanitized": sanitize,
281
+ }
282
+ if summary_only:
283
+ payload["summary"] = {
284
+ "total_records": aggregate_total,
285
+ "by_type": aggregate_by_type,
286
+ "time_range": {"start": aggregate_start, "end": aggregate_end},
287
+ }
288
+ out_fp.write(json.dumps(payload, ensure_ascii=False, indent=2))
289
+ out_fp.write("\n")
290
+
291
+
237
292
  def main() -> int:
238
293
  p = argparse.ArgumentParser(
239
294
  prog=os.environ.get("OPENCLAW_DIAG_PROG") or None,
240
295
  description="Extract OpenClaw session JSONL files into human-readable format.",
241
296
  formatter_class=argparse.ArgumentDefaultsHelpFormatter,
242
297
  )
243
- p.add_argument("session_id", help="Session UUID to extract")
298
+ p.add_argument("session_id", help="Session UUID (full or 8+ char prefix)")
244
299
  p.add_argument("-o", "--output", help="Write output to FILE instead of stdout")
245
300
  p.add_argument("-a", "--all", action="store_true",
246
- help="Extract all versions found (active + deleted + reset + backup)")
247
- p.add_argument("--list", action="store_true", help="List found files; do not extract")
301
+ help="Extract all versions (active + reset + deleted + backup + lock)")
302
+ p.add_argument("--list", action="store_true",
303
+ help="List all matching files (incl. .lock); do not extract")
248
304
  p.add_argument("--agent", help="Limit search to specific agent directory")
249
305
  p.add_argument("--base-dir", default=DEFAULT_BASE_DIR, help="Override base directory")
250
306
  p.add_argument("--no-pretty", action="store_true", help="Output raw JSON lines")
251
307
  p.add_argument("--types", help="Filter by record type (comma-separated, e.g. 'message,toolCall')")
252
308
  p.add_argument("--summary", action="store_true",
253
309
  help="Show record-count summary instead of full extraction")
310
+ p.add_argument("--json", action="store_true",
311
+ help="Emit structured JSON (compatible with state collectors' --json)")
254
312
  p.add_argument("--unmask", action="store_true",
255
313
  help="Disable default sanitization of secret-shaped substrings "
256
314
  "in message content (off = scrubbed)")
257
315
  args = p.parse_args()
258
316
 
259
- files = find_session_files(args.session_id, args.base_dir, args.agent)
260
- if not files:
261
- sys.stderr.write(
262
- f"Error: 找不到 session '{args.session_id}'(在 {args.base_dir} 下)"
263
- + (f" agent={args.agent}" if args.agent else "")
264
- + "\n"
265
- )
266
- suggestions = _recent_session_ids(args.base_dir, limit=5)
267
- if suggestions:
268
- sys.stderr.write(" 最近的 5 个 session:\n")
269
- for sid in suggestions:
270
- sys.stderr.write(f" {sid}\n")
271
- sys.stderr.write(" 提示:完整 UUID 或前缀(至少 8 位)都可。\n")
272
- return 1
317
+ # --list and --all see lock files; default mode hides them so non-interactive
318
+ # callers (cron, jq pipes) don't trip on a transient .jsonl.lock sibling.
319
+ include_transient = bool(args.all or args.list)
320
+ files = _resolve_or_die(args.session_id, args.base_dir, args.agent,
321
+ include_transient=include_transient)
273
322
 
274
323
  if args.list:
275
324
  list_files(files, sys.stdout)
@@ -294,12 +343,18 @@ def main() -> int:
294
343
  out_fp = sys.stdout
295
344
 
296
345
  try:
297
- for path, state in selected:
298
- if args.summary:
299
- summarize_file(path, state, out_fp)
300
- else:
301
- extract_file(path, state, out_fp, pretty=not args.no_pretty,
302
- type_filter=type_filter, sanitize=not args.unmask)
346
+ if args.json:
347
+ _emit_json(args.session_id, selected, out_fp,
348
+ summary_only=args.summary,
349
+ type_filter=type_filter,
350
+ sanitize=not args.unmask)
351
+ else:
352
+ for path, state in selected:
353
+ if args.summary:
354
+ summarize_file(path, state, out_fp)
355
+ else:
356
+ extract_file(path, state, out_fp, pretty=not args.no_pretty,
357
+ type_filter=type_filter, sanitize=not args.unmask)
303
358
  except BrokenPipeError:
304
359
  try:
305
360
  sys.stdout.flush()
@@ -21,7 +21,7 @@ from typing import Any, Dict, List, Optional, Tuple
21
21
 
22
22
  sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
23
23
 
24
- from ocdiag import paths
24
+ from ocdiag import paths, sessions
25
25
 
26
26
 
27
27
  DEFAULT_BASE_DIR = paths.SESSIONS_BASE
@@ -67,64 +67,25 @@ def extract_text(content: Any) -> str:
67
67
  return str(content)
68
68
 
69
69
 
70
- def find_session_file(
70
+ def resolve_session_file(
71
71
  session_id: str,
72
72
  base_dir: str = DEFAULT_BASE_DIR,
73
73
  agent: Optional[str] = None,
74
- ) -> Optional[str]:
75
- if agent:
76
- agent_dirs = [os.path.join(base_dir, agent)]
77
- else:
78
- agent_dirs = sorted(glob.glob(os.path.join(base_dir, "*")))
79
-
80
- candidates: List[Tuple[str, str]] = []
81
- for ad in agent_dirs:
82
- sd = os.path.join(ad, "sessions")
83
- if not os.path.isdir(sd):
84
- continue
85
- for entry in os.listdir(sd):
86
- if not entry.startswith(session_id):
87
- continue
88
- if ".trajectory" in entry or entry.endswith(".json"):
89
- continue
90
- full = os.path.join(sd, entry)
91
- if not os.path.isfile(full):
92
- continue
93
- if entry == f"{session_id}.jsonl":
94
- candidates.append((full, "active"))
95
- elif ".jsonl.deleted." in entry:
96
- candidates.append((full, "deleted"))
97
- elif ".jsonl.reset." in entry:
98
- candidates.append((full, "reset"))
99
- elif ".jsonl.bak-" in entry:
100
- candidates.append((full, "backup"))
101
-
102
- prio = {"active": 0, "deleted": 1, "reset": 2, "backup": 3}
103
- candidates.sort(key=lambda x: prio.get(x[1], 9))
104
- return candidates[0][0] if candidates else None
105
-
106
-
107
- def _recent_session_ids(base_dir: str, limit: int = 5) -> List[str]:
108
- """Return the most-recently-modified active session UUIDs (no .reset/.bak/.deleted)."""
109
- found: List[Tuple[float, str]] = []
110
- for ad in glob.glob(os.path.join(base_dir, "*")):
111
- sd = os.path.join(ad, "sessions")
112
- if not os.path.isdir(sd):
113
- continue
114
- for entry in os.listdir(sd):
115
- if not entry.endswith(".jsonl"):
116
- continue
117
- if ".trajectory" in entry or ".jsonl.reset." in entry:
118
- continue
119
- path = os.path.join(sd, entry)
120
- try:
121
- mtime = os.path.getmtime(path)
122
- except OSError:
123
- continue
124
- sid = entry[:-len(".jsonl")]
125
- found.append((mtime, sid))
126
- found.sort(reverse=True)
127
- return [sid for _, sid in found[:limit]]
74
+ ) -> Tuple[Optional[str], List[str]]:
75
+ """Resolve UUID-or-prefix to a single session file path.
76
+
77
+ Returns ``(path, candidates)``. ``path`` is None on miss or ambiguity;
78
+ ``candidates`` is non-empty only when the prefix matched multiple
79
+ distinct session UUIDs.
80
+ """
81
+ files, candidates = sessions.resolve(
82
+ session_id, base_dir=base_dir, agent=agent, include_transient=False,
83
+ )
84
+ if candidates:
85
+ return None, candidates
86
+ if not files:
87
+ return None, []
88
+ return files[0][0], []
128
89
 
129
90
 
130
91
  def find_trajectory_file(session_file: str) -> Optional[str]:
@@ -666,11 +627,25 @@ def main():
666
627
  parser.add_argument("--json", action="store_true", help="Output as structured JSON")
667
628
  args = parser.parse_args()
668
629
 
669
- session_file = find_session_file(args.session_id, args.base_dir, args.agent)
630
+ ok, msg = sessions.is_valid_query(args.session_id)
631
+ if not ok:
632
+ print(f"Error: {msg}", file=sys.stderr)
633
+ sys.exit(2)
634
+ session_file, candidates = resolve_session_file(
635
+ args.session_id, args.base_dir, args.agent,
636
+ )
637
+ if candidates:
638
+ print(
639
+ f"Error: 前缀 '{args.session_id}' 匹配多个 session(请补长前缀):",
640
+ file=sys.stderr,
641
+ )
642
+ for sid in candidates:
643
+ print(f" {sid}", file=sys.stderr)
644
+ sys.exit(1)
670
645
  if not session_file:
671
646
  print(f"Error: 找不到 session '{args.session_id}'(在 {args.base_dir} 下)",
672
647
  file=sys.stderr)
673
- suggestions = _recent_session_ids(args.base_dir, limit=5)
648
+ suggestions = sessions.recent_session_ids(args.base_dir, limit=5)
674
649
  if suggestions:
675
650
  print(f" 最近的 5 个 session:", file=sys.stderr)
676
651
  for sid in suggestions:
@@ -678,6 +653,11 @@ def main():
678
653
  print(f" 提示:UUID 完整 36 位,前缀也可(至少 8 位)。", file=sys.stderr)
679
654
  sys.exit(1)
680
655
 
656
+ # If the user passed a prefix, recover the full UUID from the resolved
657
+ # filename so log lookups and JSON output use the canonical id.
658
+ resolved_basename = os.path.basename(session_file)
659
+ full_session_id = resolved_basename.split(".jsonl", 1)[0]
660
+
681
661
  records = load_records(session_file)
682
662
  if not records:
683
663
  print(f"Error: session file is empty: {session_file}", file=sys.stderr)
@@ -708,13 +688,13 @@ def main():
708
688
  if not args.no_log:
709
689
  log_files = find_gateway_logs(args.log_dir)
710
690
  if log_files:
711
- gw_info = load_gateway_timing(log_files, args.session_id, analysis["base_epoch_ms"])
691
+ gw_info = load_gateway_timing(log_files, full_session_id, analysis["base_epoch_ms"])
712
692
 
713
693
  if args.json:
714
- out_str = format_json(args.session_id, session_file, user_msg_ordinal,
694
+ out_str = format_json(full_session_id, session_file, user_msg_ordinal,
715
695
  user_msg_id, analysis, traj_info, gw_info)
716
696
  else:
717
- out_str = format_text(args.session_id, user_msg_ordinal, user_msg_id,
697
+ out_str = format_text(full_session_id, user_msg_ordinal, user_msg_id,
718
698
  analysis, traj_info, gw_info)
719
699
 
720
700
  if args.output: