claude-history 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- claude_history/__init__.py +3 -0
- claude_history/cli.py +745 -0
- claude_history/parser.py +750 -0
- claude_history-0.5.0.dist-info/METADATA +113 -0
- claude_history-0.5.0.dist-info/RECORD +9 -0
- claude_history-0.5.0.dist-info/WHEEL +5 -0
- claude_history-0.5.0.dist-info/entry_points.txt +2 -0
- claude_history-0.5.0.dist-info/licenses/LICENSE +21 -0
- claude_history-0.5.0.dist-info/top_level.txt +1 -0
claude_history/parser.py
ADDED
|
@@ -0,0 +1,750 @@
|
|
|
1
|
+
"""Parse Claude Code JSONL conversation files."""
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
import json as _rg_json
|
|
7
|
+
import shutil
|
|
8
|
+
import subprocess
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
try:
|
|
12
|
+
import orjson as _json
|
|
13
|
+
|
|
14
|
+
def _loads(line: str) -> Any:
|
|
15
|
+
return _json.loads(line)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
except Exception: # pragma: no cover - fallback for environments without orjson
|
|
19
|
+
import json as _json
|
|
20
|
+
|
|
21
|
+
def _loads(line: str) -> Any:
|
|
22
|
+
return _json.loads(line)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@dataclass
|
|
26
|
+
class MessageBlock:
|
|
27
|
+
"""A block of content within a message."""
|
|
28
|
+
|
|
29
|
+
type: str
|
|
30
|
+
text: str | None = None
|
|
31
|
+
tool_name: str | None = None
|
|
32
|
+
tool_input: dict[str, Any] | None = None
|
|
33
|
+
tool_id: str | None = None
|
|
34
|
+
|
|
35
|
+
def summary(self, max_length: int = 100) -> str:
|
|
36
|
+
"""Get a short summary of this block."""
|
|
37
|
+
if self.type == "text":
|
|
38
|
+
text = self.text or ""
|
|
39
|
+
if len(text) > max_length:
|
|
40
|
+
return text[:max_length] + "..."
|
|
41
|
+
return text
|
|
42
|
+
elif self.type == "tool_use":
|
|
43
|
+
return f"[Tool: {self.tool_name}]"
|
|
44
|
+
elif self.type == "tool_result":
|
|
45
|
+
text = self.text or ""
|
|
46
|
+
if len(text) > max_length:
|
|
47
|
+
return f"[Tool Result: {text[:max_length]}...]"
|
|
48
|
+
return f"[Tool Result: {text}]"
|
|
49
|
+
elif self.type == "thinking":
|
|
50
|
+
text = self.text or ""
|
|
51
|
+
if len(text) > max_length:
|
|
52
|
+
return f"[Thinking: {text[:max_length]}...]"
|
|
53
|
+
return f"[Thinking: {text}]"
|
|
54
|
+
else:
|
|
55
|
+
return f"[{self.type}]"
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
@dataclass
|
|
59
|
+
class Message:
|
|
60
|
+
"""A single message in a conversation."""
|
|
61
|
+
|
|
62
|
+
uuid: str
|
|
63
|
+
role: str # "user", "assistant", or "tool"
|
|
64
|
+
blocks: list[MessageBlock] = field(default_factory=list)
|
|
65
|
+
timestamp: datetime | None = None
|
|
66
|
+
model: str | None = None
|
|
67
|
+
is_sidechain: bool = False
|
|
68
|
+
is_meta: bool = False
|
|
69
|
+
is_tool_result: bool = False # True if this is a tool result message
|
|
70
|
+
|
|
71
|
+
@property
|
|
72
|
+
def text(self) -> str:
|
|
73
|
+
"""Get all text content from this message."""
|
|
74
|
+
texts = []
|
|
75
|
+
for block in self.blocks:
|
|
76
|
+
if block.type == "text" and block.text:
|
|
77
|
+
texts.append(block.text)
|
|
78
|
+
return "\n".join(texts)
|
|
79
|
+
|
|
80
|
+
@property
|
|
81
|
+
def has_tool_use(self) -> bool:
|
|
82
|
+
"""Check if this message contains tool usage."""
|
|
83
|
+
return any(b.type == "tool_use" for b in self.blocks)
|
|
84
|
+
|
|
85
|
+
@property
|
|
86
|
+
def tool_names(self) -> list[str]:
|
|
87
|
+
"""Get list of tools used in this message."""
|
|
88
|
+
return [
|
|
89
|
+
b.tool_name for b in self.blocks if b.type == "tool_use" and b.tool_name
|
|
90
|
+
]
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
@dataclass
|
|
94
|
+
class Conversation:
|
|
95
|
+
"""A parsed Claude Code conversation."""
|
|
96
|
+
|
|
97
|
+
session_id: str
|
|
98
|
+
messages: list[Message] = field(default_factory=list)
|
|
99
|
+
cwd: str | None = None
|
|
100
|
+
git_branch: str | None = None
|
|
101
|
+
version: str | None = None
|
|
102
|
+
summaries: list[str] = field(default_factory=list)
|
|
103
|
+
file_path: Path | None = None
|
|
104
|
+
|
|
105
|
+
@property
|
|
106
|
+
def title(self) -> str:
|
|
107
|
+
"""Generate a title from the first user message."""
|
|
108
|
+
for msg in self.messages:
|
|
109
|
+
if msg.role == "user" and not msg.is_meta and msg.text:
|
|
110
|
+
text = msg.text.strip()
|
|
111
|
+
# Skip command messages
|
|
112
|
+
if text.startswith("<"):
|
|
113
|
+
continue
|
|
114
|
+
# Truncate long titles
|
|
115
|
+
first_line = text.split("\n")[0]
|
|
116
|
+
if len(first_line) > 80:
|
|
117
|
+
return first_line[:77] + "..."
|
|
118
|
+
return first_line
|
|
119
|
+
return f"Session {self.session_id[:8]}"
|
|
120
|
+
|
|
121
|
+
@property
|
|
122
|
+
def start_time(self) -> datetime | None:
|
|
123
|
+
"""Get the timestamp of the first message."""
|
|
124
|
+
for msg in self.messages:
|
|
125
|
+
if msg.timestamp:
|
|
126
|
+
return msg.timestamp
|
|
127
|
+
return None
|
|
128
|
+
|
|
129
|
+
@property
|
|
130
|
+
def end_time(self) -> datetime | None:
|
|
131
|
+
"""Get the timestamp of the last message."""
|
|
132
|
+
for msg in reversed(self.messages):
|
|
133
|
+
if msg.timestamp:
|
|
134
|
+
return msg.timestamp
|
|
135
|
+
return None
|
|
136
|
+
|
|
137
|
+
@property
|
|
138
|
+
def user_message_count(self) -> int:
|
|
139
|
+
"""Count non-meta user messages."""
|
|
140
|
+
return sum(1 for m in self.messages if m.role == "user" and not m.is_meta)
|
|
141
|
+
|
|
142
|
+
@property
|
|
143
|
+
def assistant_message_count(self) -> int:
|
|
144
|
+
"""Count assistant messages."""
|
|
145
|
+
return sum(1 for m in self.messages if m.role == "assistant")
|
|
146
|
+
|
|
147
|
+
@property
|
|
148
|
+
def tool_use_count(self) -> int:
|
|
149
|
+
"""Count total tool uses across all messages."""
|
|
150
|
+
count = 0
|
|
151
|
+
for msg in self.messages:
|
|
152
|
+
count += sum(1 for b in msg.blocks if b.type == "tool_use")
|
|
153
|
+
return count
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def parse_content_blocks(content: Any) -> list[MessageBlock]:
|
|
157
|
+
"""Parse message content into blocks."""
|
|
158
|
+
blocks = []
|
|
159
|
+
|
|
160
|
+
if isinstance(content, str):
|
|
161
|
+
blocks.append(MessageBlock(type="text", text=content))
|
|
162
|
+
elif isinstance(content, list):
|
|
163
|
+
for item in content:
|
|
164
|
+
if isinstance(item, dict):
|
|
165
|
+
block_type = item.get("type", "unknown")
|
|
166
|
+
if block_type == "text":
|
|
167
|
+
blocks.append(MessageBlock(type="text", text=item.get("text", "")))
|
|
168
|
+
elif block_type == "tool_use":
|
|
169
|
+
blocks.append(
|
|
170
|
+
MessageBlock(
|
|
171
|
+
type="tool_use",
|
|
172
|
+
tool_name=item.get("name"),
|
|
173
|
+
tool_input=item.get("input"),
|
|
174
|
+
tool_id=item.get("id"),
|
|
175
|
+
)
|
|
176
|
+
)
|
|
177
|
+
elif block_type == "tool_result":
|
|
178
|
+
result_content = item.get("content", "")
|
|
179
|
+
if isinstance(result_content, list):
|
|
180
|
+
# Extract text from result content blocks
|
|
181
|
+
texts = []
|
|
182
|
+
for rc in result_content:
|
|
183
|
+
if isinstance(rc, dict) and rc.get("type") == "text":
|
|
184
|
+
texts.append(rc.get("text", ""))
|
|
185
|
+
result_content = "\n".join(texts)
|
|
186
|
+
blocks.append(
|
|
187
|
+
MessageBlock(type="tool_result", text=str(result_content))
|
|
188
|
+
)
|
|
189
|
+
elif block_type == "thinking":
|
|
190
|
+
blocks.append(
|
|
191
|
+
MessageBlock(type="thinking", text=item.get("thinking", ""))
|
|
192
|
+
)
|
|
193
|
+
else:
|
|
194
|
+
blocks.append(MessageBlock(type=block_type, text=str(item)))
|
|
195
|
+
elif isinstance(item, str):
|
|
196
|
+
blocks.append(MessageBlock(type="text", text=item))
|
|
197
|
+
|
|
198
|
+
return blocks
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def extract_text_and_flags(content: Any) -> tuple[str, bool, bool]:
|
|
202
|
+
"""Extract text and block flags from message content.
|
|
203
|
+
|
|
204
|
+
Returns (text, has_blocks, has_tool_result).
|
|
205
|
+
"""
|
|
206
|
+
if isinstance(content, str):
|
|
207
|
+
return content, True, False
|
|
208
|
+
|
|
209
|
+
if isinstance(content, list):
|
|
210
|
+
texts: list[str] = []
|
|
211
|
+
has_blocks = False
|
|
212
|
+
has_tool_result = False
|
|
213
|
+
|
|
214
|
+
for item in content:
|
|
215
|
+
if isinstance(item, dict):
|
|
216
|
+
has_blocks = True
|
|
217
|
+
block_type = item.get("type", "unknown")
|
|
218
|
+
if block_type == "text":
|
|
219
|
+
texts.append(item.get("text", ""))
|
|
220
|
+
elif block_type == "tool_result":
|
|
221
|
+
has_tool_result = True
|
|
222
|
+
# Other block types don't contribute to text.
|
|
223
|
+
elif isinstance(item, str):
|
|
224
|
+
has_blocks = True
|
|
225
|
+
texts.append(item)
|
|
226
|
+
|
|
227
|
+
return "\n".join(texts), has_blocks, has_tool_result
|
|
228
|
+
|
|
229
|
+
return "", False, False
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
def parse_timestamp(ts: str | None) -> datetime | None:
|
|
233
|
+
"""Parse an ISO timestamp string."""
|
|
234
|
+
if not ts:
|
|
235
|
+
return None
|
|
236
|
+
try:
|
|
237
|
+
# Handle various ISO formats
|
|
238
|
+
if ts.endswith("Z"):
|
|
239
|
+
ts = ts[:-1] + "+00:00"
|
|
240
|
+
return datetime.fromisoformat(ts)
|
|
241
|
+
except (ValueError, TypeError):
|
|
242
|
+
return None
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
def parse_conversation(file_path: Path) -> Conversation:
|
|
246
|
+
"""Parse a Claude Code JSONL conversation file."""
|
|
247
|
+
messages_by_uuid: dict[str, Message] = {}
|
|
248
|
+
session_id = file_path.stem
|
|
249
|
+
cwd = None
|
|
250
|
+
git_branch = None
|
|
251
|
+
version = None
|
|
252
|
+
summaries = []
|
|
253
|
+
|
|
254
|
+
with open(file_path, "r", encoding="utf-8") as f:
|
|
255
|
+
for line in f:
|
|
256
|
+
line = line.strip()
|
|
257
|
+
if not line:
|
|
258
|
+
continue
|
|
259
|
+
|
|
260
|
+
try:
|
|
261
|
+
entry = _loads(line)
|
|
262
|
+
except ValueError:
|
|
263
|
+
continue
|
|
264
|
+
|
|
265
|
+
entry_type = entry.get("type")
|
|
266
|
+
|
|
267
|
+
# Skip non-message entries
|
|
268
|
+
if entry_type == "file-history-snapshot":
|
|
269
|
+
continue
|
|
270
|
+
|
|
271
|
+
# Extract summaries
|
|
272
|
+
if entry_type == "summary":
|
|
273
|
+
if summary_text := entry.get("summary"):
|
|
274
|
+
summaries.append(summary_text)
|
|
275
|
+
continue
|
|
276
|
+
|
|
277
|
+
# Extract metadata
|
|
278
|
+
if entry.get("sessionId"):
|
|
279
|
+
session_id = entry["sessionId"]
|
|
280
|
+
if entry.get("cwd"):
|
|
281
|
+
cwd = entry["cwd"]
|
|
282
|
+
if entry.get("gitBranch"):
|
|
283
|
+
git_branch = entry["gitBranch"]
|
|
284
|
+
if entry.get("version"):
|
|
285
|
+
version = entry["version"]
|
|
286
|
+
|
|
287
|
+
# Parse message entries
|
|
288
|
+
if entry_type in ("user", "assistant") and "message" in entry:
|
|
289
|
+
msg_data = entry["message"]
|
|
290
|
+
uuid = entry.get("uuid", "")
|
|
291
|
+
msg_id = msg_data.get("id", uuid)
|
|
292
|
+
|
|
293
|
+
# Use message ID for deduplication (streaming updates share same ID)
|
|
294
|
+
dedup_key = msg_id or uuid
|
|
295
|
+
|
|
296
|
+
content = msg_data.get("content", [])
|
|
297
|
+
blocks = parse_content_blocks(content)
|
|
298
|
+
|
|
299
|
+
# Skip empty messages
|
|
300
|
+
if not blocks:
|
|
301
|
+
continue
|
|
302
|
+
|
|
303
|
+
timestamp = parse_timestamp(entry.get("timestamp"))
|
|
304
|
+
model = msg_data.get("model")
|
|
305
|
+
is_sidechain = entry.get("isSidechain", False)
|
|
306
|
+
is_meta = entry.get("isMeta", False)
|
|
307
|
+
|
|
308
|
+
# Check if this is a tool result (user message containing tool_result blocks)
|
|
309
|
+
is_tool_result = entry_type == "user" and any(
|
|
310
|
+
b.type == "tool_result" for b in blocks
|
|
311
|
+
)
|
|
312
|
+
role = "tool" if is_tool_result else entry_type
|
|
313
|
+
|
|
314
|
+
# Update or create message
|
|
315
|
+
if dedup_key in messages_by_uuid:
|
|
316
|
+
# Merge blocks (streaming updates)
|
|
317
|
+
existing = messages_by_uuid[dedup_key]
|
|
318
|
+
existing.blocks = blocks # Replace with latest
|
|
319
|
+
if timestamp:
|
|
320
|
+
existing.timestamp = timestamp
|
|
321
|
+
else:
|
|
322
|
+
messages_by_uuid[dedup_key] = Message(
|
|
323
|
+
uuid=uuid,
|
|
324
|
+
role=role,
|
|
325
|
+
blocks=blocks,
|
|
326
|
+
timestamp=timestamp,
|
|
327
|
+
model=model,
|
|
328
|
+
is_sidechain=is_sidechain,
|
|
329
|
+
is_meta=is_meta,
|
|
330
|
+
is_tool_result=is_tool_result,
|
|
331
|
+
)
|
|
332
|
+
|
|
333
|
+
# Sort messages by timestamp
|
|
334
|
+
messages = list(messages_by_uuid.values())
|
|
335
|
+
messages.sort(key=lambda m: m.timestamp or datetime.min)
|
|
336
|
+
|
|
337
|
+
return Conversation(
|
|
338
|
+
session_id=session_id,
|
|
339
|
+
messages=messages,
|
|
340
|
+
cwd=cwd,
|
|
341
|
+
git_branch=git_branch,
|
|
342
|
+
version=version,
|
|
343
|
+
summaries=summaries,
|
|
344
|
+
file_path=file_path,
|
|
345
|
+
)
|
|
346
|
+
|
|
347
|
+
|
|
348
|
+
def find_conversations(directory: Path) -> list[Path]:
|
|
349
|
+
"""Find all JSONL conversation files in a directory."""
|
|
350
|
+
return sorted(
|
|
351
|
+
directory.glob("*.jsonl"), key=lambda p: p.stat().st_mtime, reverse=True
|
|
352
|
+
)
|
|
353
|
+
|
|
354
|
+
|
|
355
|
+
@dataclass
|
|
356
|
+
class ConversationSummary:
|
|
357
|
+
"""Lightweight summary of a conversation file."""
|
|
358
|
+
|
|
359
|
+
session_id: str
|
|
360
|
+
title: str
|
|
361
|
+
start_time: datetime | None
|
|
362
|
+
user_message_count: int
|
|
363
|
+
assistant_message_count: int
|
|
364
|
+
|
|
365
|
+
|
|
366
|
+
@dataclass
|
|
367
|
+
class SearchMatch:
|
|
368
|
+
"""Search match within a conversation."""
|
|
369
|
+
|
|
370
|
+
session_id: str
|
|
371
|
+
title: str
|
|
372
|
+
timestamp: datetime | None
|
|
373
|
+
snippet: str
|
|
374
|
+
|
|
375
|
+
|
|
376
|
+
def _title_from_user_messages(
|
|
377
|
+
session_id: str, messages: dict[str, dict[str, Any]]
|
|
378
|
+
) -> str:
|
|
379
|
+
best_text = None
|
|
380
|
+
best_ts = None
|
|
381
|
+
|
|
382
|
+
for data in messages.values():
|
|
383
|
+
if data.get("role", "user") != "user":
|
|
384
|
+
continue
|
|
385
|
+
if data.get("is_meta"):
|
|
386
|
+
continue
|
|
387
|
+
text = (data.get("text") or "").strip()
|
|
388
|
+
if not text or text.startswith("<"):
|
|
389
|
+
continue
|
|
390
|
+
ts = data.get("timestamp") or datetime.min
|
|
391
|
+
if best_ts is None or ts < best_ts:
|
|
392
|
+
best_ts = ts
|
|
393
|
+
best_text = text
|
|
394
|
+
|
|
395
|
+
if not best_text:
|
|
396
|
+
return f"Session {session_id[:8]}"
|
|
397
|
+
|
|
398
|
+
first_line = best_text.split("\n")[0]
|
|
399
|
+
if len(first_line) > 80:
|
|
400
|
+
return first_line[:77] + "..."
|
|
401
|
+
return first_line
|
|
402
|
+
|
|
403
|
+
|
|
404
|
+
def summarize_conversation(file_path: Path) -> ConversationSummary:
|
|
405
|
+
"""Parse just enough to summarize a conversation for listings."""
|
|
406
|
+
messages: dict[str, dict[str, Any]] = {}
|
|
407
|
+
session_id = file_path.stem
|
|
408
|
+
|
|
409
|
+
with open(file_path, "r", encoding="utf-8") as f:
|
|
410
|
+
for line in f:
|
|
411
|
+
line = line.strip()
|
|
412
|
+
if not line:
|
|
413
|
+
continue
|
|
414
|
+
|
|
415
|
+
try:
|
|
416
|
+
entry = _loads(line)
|
|
417
|
+
except ValueError:
|
|
418
|
+
continue
|
|
419
|
+
|
|
420
|
+
entry_type = entry.get("type")
|
|
421
|
+
if entry_type == "file-history-snapshot":
|
|
422
|
+
continue
|
|
423
|
+
if entry_type == "summary":
|
|
424
|
+
continue
|
|
425
|
+
|
|
426
|
+
if entry.get("sessionId"):
|
|
427
|
+
session_id = entry["sessionId"]
|
|
428
|
+
|
|
429
|
+
if entry_type in ("user", "assistant") and "message" in entry:
|
|
430
|
+
msg_data = entry["message"]
|
|
431
|
+
uuid = entry.get("uuid", "")
|
|
432
|
+
msg_id = msg_data.get("id", uuid)
|
|
433
|
+
dedup_key = msg_id or uuid
|
|
434
|
+
|
|
435
|
+
content = msg_data.get("content", [])
|
|
436
|
+
text, has_blocks, has_tool_result = extract_text_and_flags(content)
|
|
437
|
+
if not has_blocks:
|
|
438
|
+
continue
|
|
439
|
+
|
|
440
|
+
role = (
|
|
441
|
+
"tool" if entry_type == "user" and has_tool_result else entry_type
|
|
442
|
+
)
|
|
443
|
+
timestamp = parse_timestamp(entry.get("timestamp"))
|
|
444
|
+
is_meta = entry.get("isMeta", False)
|
|
445
|
+
|
|
446
|
+
messages[dedup_key] = {
|
|
447
|
+
"role": role,
|
|
448
|
+
"timestamp": timestamp,
|
|
449
|
+
"text": text,
|
|
450
|
+
"is_meta": is_meta,
|
|
451
|
+
}
|
|
452
|
+
|
|
453
|
+
user_count = sum(
|
|
454
|
+
1 for m in messages.values() if m["role"] == "user" and not m["is_meta"]
|
|
455
|
+
)
|
|
456
|
+
assistant_count = sum(1 for m in messages.values() if m["role"] == "assistant")
|
|
457
|
+
start_time = min(
|
|
458
|
+
(m["timestamp"] for m in messages.values() if m["timestamp"]),
|
|
459
|
+
default=None,
|
|
460
|
+
)
|
|
461
|
+
title = _title_from_user_messages(session_id, messages)
|
|
462
|
+
|
|
463
|
+
return ConversationSummary(
|
|
464
|
+
session_id=session_id,
|
|
465
|
+
title=title,
|
|
466
|
+
start_time=start_time,
|
|
467
|
+
user_message_count=user_count,
|
|
468
|
+
assistant_message_count=assistant_count,
|
|
469
|
+
)
|
|
470
|
+
|
|
471
|
+
|
|
472
|
+
def search_conversation_file(
|
|
473
|
+
file_path: Path, query: str, limit: int
|
|
474
|
+
) -> list[SearchMatch]:
|
|
475
|
+
"""Search a single conversation file for query matches."""
|
|
476
|
+
if limit <= 0:
|
|
477
|
+
return []
|
|
478
|
+
|
|
479
|
+
query_lower = query.lower()
|
|
480
|
+
session_id = file_path.stem
|
|
481
|
+
user_messages: dict[str, dict[str, Any]] = {}
|
|
482
|
+
matches: list[tuple[datetime | None, str]] = []
|
|
483
|
+
|
|
484
|
+
with open(file_path, "r", encoding="utf-8") as f:
|
|
485
|
+
for line in f:
|
|
486
|
+
line = line.strip()
|
|
487
|
+
if not line:
|
|
488
|
+
continue
|
|
489
|
+
|
|
490
|
+
try:
|
|
491
|
+
entry = _loads(line)
|
|
492
|
+
except ValueError:
|
|
493
|
+
continue
|
|
494
|
+
|
|
495
|
+
entry_type = entry.get("type")
|
|
496
|
+
if entry_type == "file-history-snapshot":
|
|
497
|
+
continue
|
|
498
|
+
if entry_type == "summary":
|
|
499
|
+
continue
|
|
500
|
+
|
|
501
|
+
if entry.get("sessionId"):
|
|
502
|
+
session_id = entry["sessionId"]
|
|
503
|
+
|
|
504
|
+
if entry_type in ("user", "assistant") and "message" in entry:
|
|
505
|
+
msg_data = entry["message"]
|
|
506
|
+
uuid = entry.get("uuid", "")
|
|
507
|
+
msg_id = msg_data.get("id", uuid)
|
|
508
|
+
dedup_key = msg_id or uuid
|
|
509
|
+
|
|
510
|
+
content = msg_data.get("content", [])
|
|
511
|
+
text, has_blocks, has_tool_result = extract_text_and_flags(content)
|
|
512
|
+
if not has_blocks:
|
|
513
|
+
continue
|
|
514
|
+
|
|
515
|
+
role = (
|
|
516
|
+
"tool" if entry_type == "user" and has_tool_result else entry_type
|
|
517
|
+
)
|
|
518
|
+
timestamp = parse_timestamp(entry.get("timestamp"))
|
|
519
|
+
is_meta = entry.get("isMeta", False)
|
|
520
|
+
|
|
521
|
+
if role == "user":
|
|
522
|
+
user_messages[dedup_key] = {
|
|
523
|
+
"text": text,
|
|
524
|
+
"timestamp": timestamp,
|
|
525
|
+
"is_meta": is_meta,
|
|
526
|
+
}
|
|
527
|
+
|
|
528
|
+
if role in ("user", "assistant") and text:
|
|
529
|
+
text_lower = text.lower()
|
|
530
|
+
idx = text_lower.find(query_lower)
|
|
531
|
+
if idx >= 0:
|
|
532
|
+
start = max(0, idx - 50)
|
|
533
|
+
end = min(len(text), idx + len(query) + 50)
|
|
534
|
+
snippet = text[start:end]
|
|
535
|
+
if start > 0:
|
|
536
|
+
snippet = "..." + snippet
|
|
537
|
+
if end < len(text):
|
|
538
|
+
snippet = snippet + "..."
|
|
539
|
+
matches.append((timestamp, snippet))
|
|
540
|
+
if len(matches) >= limit:
|
|
541
|
+
break
|
|
542
|
+
|
|
543
|
+
title = _title_from_user_messages(session_id, user_messages)
|
|
544
|
+
|
|
545
|
+
return [
|
|
546
|
+
SearchMatch(session_id=session_id, title=title, timestamp=ts, snippet=snippet)
|
|
547
|
+
for ts, snippet in matches
|
|
548
|
+
]
|
|
549
|
+
|
|
550
|
+
|
|
551
|
+
def _rg_available() -> bool:
|
|
552
|
+
return shutil.which("rg") is not None
|
|
553
|
+
|
|
554
|
+
|
|
555
|
+
def _fast_title_from_file(file_path: Path) -> str:
|
|
556
|
+
"""Find the first non-meta user message to use as a title."""
|
|
557
|
+
session_id = file_path.stem
|
|
558
|
+
with open(file_path, "r", encoding="utf-8") as f:
|
|
559
|
+
for line in f:
|
|
560
|
+
line = line.strip()
|
|
561
|
+
if not line:
|
|
562
|
+
continue
|
|
563
|
+
try:
|
|
564
|
+
entry = _loads(line)
|
|
565
|
+
except ValueError:
|
|
566
|
+
continue
|
|
567
|
+
|
|
568
|
+
entry_type = entry.get("type")
|
|
569
|
+
if entry_type != "user" or "message" not in entry:
|
|
570
|
+
continue
|
|
571
|
+
|
|
572
|
+
msg_data = entry["message"]
|
|
573
|
+
content = msg_data.get("content", [])
|
|
574
|
+
text, has_blocks, has_tool_result = extract_text_and_flags(content)
|
|
575
|
+
if not has_blocks:
|
|
576
|
+
continue
|
|
577
|
+
|
|
578
|
+
if entry.get("isMeta", False):
|
|
579
|
+
continue
|
|
580
|
+
|
|
581
|
+
text = (text or "").strip()
|
|
582
|
+
if not text or text.startswith("<"):
|
|
583
|
+
continue
|
|
584
|
+
|
|
585
|
+
first_line = text.split("\n")[0]
|
|
586
|
+
if len(first_line) > 80:
|
|
587
|
+
return first_line[:77] + "..."
|
|
588
|
+
return first_line
|
|
589
|
+
|
|
590
|
+
return f"Session {session_id[:8]}"
|
|
591
|
+
|
|
592
|
+
|
|
593
|
+
def search_conversations(
|
|
594
|
+
projects_dir: Path, query: str, limit: int
|
|
595
|
+
) -> list[SearchMatch]:
|
|
596
|
+
"""Search across all conversations, using ripgrep if available."""
|
|
597
|
+
if limit <= 0:
|
|
598
|
+
return []
|
|
599
|
+
|
|
600
|
+
if _rg_available():
|
|
601
|
+
try:
|
|
602
|
+
return _search_with_ripgrep(projects_dir, query, limit)
|
|
603
|
+
except Exception:
|
|
604
|
+
# Fall back to Python search on any error.
|
|
605
|
+
pass
|
|
606
|
+
|
|
607
|
+
results: list[SearchMatch] = []
|
|
608
|
+
for project_path in projects_dir.iterdir():
|
|
609
|
+
if not project_path.is_dir():
|
|
610
|
+
continue
|
|
611
|
+
for conv_path in find_conversations(project_path):
|
|
612
|
+
try:
|
|
613
|
+
matches = search_conversation_file(
|
|
614
|
+
conv_path, query, limit - len(results)
|
|
615
|
+
)
|
|
616
|
+
results.extend(matches)
|
|
617
|
+
if len(results) >= limit:
|
|
618
|
+
return results
|
|
619
|
+
except Exception:
|
|
620
|
+
continue
|
|
621
|
+
return results
|
|
622
|
+
|
|
623
|
+
|
|
624
|
+
def _search_with_ripgrep(
|
|
625
|
+
projects_dir: Path, query: str, limit: int
|
|
626
|
+
) -> list[SearchMatch]:
|
|
627
|
+
"""Search using ripgrep to avoid scanning every line in Python."""
|
|
628
|
+
cmd = [
|
|
629
|
+
"rg",
|
|
630
|
+
"--json",
|
|
631
|
+
"-F",
|
|
632
|
+
"-i",
|
|
633
|
+
"-g",
|
|
634
|
+
"*.jsonl",
|
|
635
|
+
query,
|
|
636
|
+
str(projects_dir),
|
|
637
|
+
]
|
|
638
|
+
proc = subprocess.Popen(
|
|
639
|
+
cmd,
|
|
640
|
+
stdout=subprocess.PIPE,
|
|
641
|
+
stderr=subprocess.PIPE,
|
|
642
|
+
text=True,
|
|
643
|
+
encoding="utf-8",
|
|
644
|
+
errors="replace",
|
|
645
|
+
)
|
|
646
|
+
|
|
647
|
+
results: list[SearchMatch] = []
|
|
648
|
+
title_cache: dict[str, str] = {}
|
|
649
|
+
query_lower = query.lower()
|
|
650
|
+
|
|
651
|
+
try:
|
|
652
|
+
assert proc.stdout is not None
|
|
653
|
+
for raw in proc.stdout:
|
|
654
|
+
raw = raw.strip()
|
|
655
|
+
if not raw:
|
|
656
|
+
continue
|
|
657
|
+
|
|
658
|
+
try:
|
|
659
|
+
event = _rg_json.loads(raw)
|
|
660
|
+
except ValueError:
|
|
661
|
+
continue
|
|
662
|
+
|
|
663
|
+
if event.get("type") != "match":
|
|
664
|
+
continue
|
|
665
|
+
|
|
666
|
+
data = event.get("data", {})
|
|
667
|
+
path_text = data.get("path", {}).get("text")
|
|
668
|
+
line_text = data.get("lines", {}).get("text")
|
|
669
|
+
if not path_text or not line_text:
|
|
670
|
+
continue
|
|
671
|
+
|
|
672
|
+
try:
|
|
673
|
+
entry = _loads(line_text.strip())
|
|
674
|
+
except ValueError:
|
|
675
|
+
continue
|
|
676
|
+
|
|
677
|
+
entry_type = entry.get("type")
|
|
678
|
+
if entry_type not in ("user", "assistant"):
|
|
679
|
+
continue
|
|
680
|
+
|
|
681
|
+
if "message" not in entry:
|
|
682
|
+
continue
|
|
683
|
+
|
|
684
|
+
msg_data = entry["message"]
|
|
685
|
+
content = msg_data.get("content", [])
|
|
686
|
+
text, has_blocks, has_tool_result = extract_text_and_flags(content)
|
|
687
|
+
if not has_blocks or not text:
|
|
688
|
+
continue
|
|
689
|
+
|
|
690
|
+
# Ignore tool-result pseudo messages.
|
|
691
|
+
if entry_type == "user" and has_tool_result:
|
|
692
|
+
continue
|
|
693
|
+
|
|
694
|
+
if entry.get("isMeta", False):
|
|
695
|
+
continue
|
|
696
|
+
|
|
697
|
+
text_lower = text.lower()
|
|
698
|
+
idx = text_lower.find(query_lower)
|
|
699
|
+
if idx < 0:
|
|
700
|
+
continue
|
|
701
|
+
|
|
702
|
+
start = max(0, idx - 50)
|
|
703
|
+
end = min(len(text), idx + len(query) + 50)
|
|
704
|
+
snippet = text[start:end]
|
|
705
|
+
if start > 0:
|
|
706
|
+
snippet = "..." + snippet
|
|
707
|
+
if end < len(text):
|
|
708
|
+
snippet = snippet + "..."
|
|
709
|
+
|
|
710
|
+
title = title_cache.get(path_text)
|
|
711
|
+
if title is None:
|
|
712
|
+
title = _fast_title_from_file(Path(path_text))
|
|
713
|
+
title_cache[path_text] = title
|
|
714
|
+
|
|
715
|
+
session_id = entry.get("sessionId") or Path(path_text).stem
|
|
716
|
+
timestamp = parse_timestamp(entry.get("timestamp"))
|
|
717
|
+
results.append(
|
|
718
|
+
SearchMatch(
|
|
719
|
+
session_id=session_id,
|
|
720
|
+
title=title,
|
|
721
|
+
timestamp=timestamp,
|
|
722
|
+
snippet=snippet,
|
|
723
|
+
)
|
|
724
|
+
)
|
|
725
|
+
|
|
726
|
+
if len(results) >= limit:
|
|
727
|
+
proc.terminate()
|
|
728
|
+
break
|
|
729
|
+
finally:
|
|
730
|
+
if proc.stdout:
|
|
731
|
+
proc.stdout.close()
|
|
732
|
+
if proc.stderr:
|
|
733
|
+
proc.stderr.close()
|
|
734
|
+
try:
|
|
735
|
+
proc.wait(timeout=2)
|
|
736
|
+
except subprocess.TimeoutExpired:
|
|
737
|
+
proc.kill()
|
|
738
|
+
proc.wait(timeout=2)
|
|
739
|
+
|
|
740
|
+
return results
|
|
741
|
+
|
|
742
|
+
|
|
743
|
+
def get_file_size_human(path: Path) -> str:
|
|
744
|
+
"""Get human-readable file size."""
|
|
745
|
+
size = path.stat().st_size
|
|
746
|
+
for unit in ["B", "KB", "MB", "GB"]:
|
|
747
|
+
if size < 1024:
|
|
748
|
+
return f"{size:.1f}{unit}"
|
|
749
|
+
size /= 1024
|
|
750
|
+
return f"{size:.1f}TB"
|