claude-history 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,750 @@
1
+ """Parse Claude Code JSONL conversation files."""
2
+
3
+ from dataclasses import dataclass, field
4
+ from datetime import datetime
5
+ from pathlib import Path
6
+ import json as _rg_json
7
+ import shutil
8
+ import subprocess
9
+ from typing import Any
10
+
11
+ try:
12
+ import orjson as _json
13
+
14
+ def _loads(line: str) -> Any:
15
+ return _json.loads(line)
16
+
17
+
18
+ except Exception: # pragma: no cover - fallback for environments without orjson
19
+ import json as _json
20
+
21
+ def _loads(line: str) -> Any:
22
+ return _json.loads(line)
23
+
24
+
25
+ @dataclass
26
+ class MessageBlock:
27
+ """A block of content within a message."""
28
+
29
+ type: str
30
+ text: str | None = None
31
+ tool_name: str | None = None
32
+ tool_input: dict[str, Any] | None = None
33
+ tool_id: str | None = None
34
+
35
+ def summary(self, max_length: int = 100) -> str:
36
+ """Get a short summary of this block."""
37
+ if self.type == "text":
38
+ text = self.text or ""
39
+ if len(text) > max_length:
40
+ return text[:max_length] + "..."
41
+ return text
42
+ elif self.type == "tool_use":
43
+ return f"[Tool: {self.tool_name}]"
44
+ elif self.type == "tool_result":
45
+ text = self.text or ""
46
+ if len(text) > max_length:
47
+ return f"[Tool Result: {text[:max_length]}...]"
48
+ return f"[Tool Result: {text}]"
49
+ elif self.type == "thinking":
50
+ text = self.text or ""
51
+ if len(text) > max_length:
52
+ return f"[Thinking: {text[:max_length]}...]"
53
+ return f"[Thinking: {text}]"
54
+ else:
55
+ return f"[{self.type}]"
56
+
57
+
58
+ @dataclass
59
+ class Message:
60
+ """A single message in a conversation."""
61
+
62
+ uuid: str
63
+ role: str # "user", "assistant", or "tool"
64
+ blocks: list[MessageBlock] = field(default_factory=list)
65
+ timestamp: datetime | None = None
66
+ model: str | None = None
67
+ is_sidechain: bool = False
68
+ is_meta: bool = False
69
+ is_tool_result: bool = False # True if this is a tool result message
70
+
71
+ @property
72
+ def text(self) -> str:
73
+ """Get all text content from this message."""
74
+ texts = []
75
+ for block in self.blocks:
76
+ if block.type == "text" and block.text:
77
+ texts.append(block.text)
78
+ return "\n".join(texts)
79
+
80
+ @property
81
+ def has_tool_use(self) -> bool:
82
+ """Check if this message contains tool usage."""
83
+ return any(b.type == "tool_use" for b in self.blocks)
84
+
85
+ @property
86
+ def tool_names(self) -> list[str]:
87
+ """Get list of tools used in this message."""
88
+ return [
89
+ b.tool_name for b in self.blocks if b.type == "tool_use" and b.tool_name
90
+ ]
91
+
92
+
93
+ @dataclass
94
+ class Conversation:
95
+ """A parsed Claude Code conversation."""
96
+
97
+ session_id: str
98
+ messages: list[Message] = field(default_factory=list)
99
+ cwd: str | None = None
100
+ git_branch: str | None = None
101
+ version: str | None = None
102
+ summaries: list[str] = field(default_factory=list)
103
+ file_path: Path | None = None
104
+
105
+ @property
106
+ def title(self) -> str:
107
+ """Generate a title from the first user message."""
108
+ for msg in self.messages:
109
+ if msg.role == "user" and not msg.is_meta and msg.text:
110
+ text = msg.text.strip()
111
+ # Skip command messages
112
+ if text.startswith("<"):
113
+ continue
114
+ # Truncate long titles
115
+ first_line = text.split("\n")[0]
116
+ if len(first_line) > 80:
117
+ return first_line[:77] + "..."
118
+ return first_line
119
+ return f"Session {self.session_id[:8]}"
120
+
121
+ @property
122
+ def start_time(self) -> datetime | None:
123
+ """Get the timestamp of the first message."""
124
+ for msg in self.messages:
125
+ if msg.timestamp:
126
+ return msg.timestamp
127
+ return None
128
+
129
+ @property
130
+ def end_time(self) -> datetime | None:
131
+ """Get the timestamp of the last message."""
132
+ for msg in reversed(self.messages):
133
+ if msg.timestamp:
134
+ return msg.timestamp
135
+ return None
136
+
137
+ @property
138
+ def user_message_count(self) -> int:
139
+ """Count non-meta user messages."""
140
+ return sum(1 for m in self.messages if m.role == "user" and not m.is_meta)
141
+
142
+ @property
143
+ def assistant_message_count(self) -> int:
144
+ """Count assistant messages."""
145
+ return sum(1 for m in self.messages if m.role == "assistant")
146
+
147
+ @property
148
+ def tool_use_count(self) -> int:
149
+ """Count total tool uses across all messages."""
150
+ count = 0
151
+ for msg in self.messages:
152
+ count += sum(1 for b in msg.blocks if b.type == "tool_use")
153
+ return count
154
+
155
+
156
+ def parse_content_blocks(content: Any) -> list[MessageBlock]:
157
+ """Parse message content into blocks."""
158
+ blocks = []
159
+
160
+ if isinstance(content, str):
161
+ blocks.append(MessageBlock(type="text", text=content))
162
+ elif isinstance(content, list):
163
+ for item in content:
164
+ if isinstance(item, dict):
165
+ block_type = item.get("type", "unknown")
166
+ if block_type == "text":
167
+ blocks.append(MessageBlock(type="text", text=item.get("text", "")))
168
+ elif block_type == "tool_use":
169
+ blocks.append(
170
+ MessageBlock(
171
+ type="tool_use",
172
+ tool_name=item.get("name"),
173
+ tool_input=item.get("input"),
174
+ tool_id=item.get("id"),
175
+ )
176
+ )
177
+ elif block_type == "tool_result":
178
+ result_content = item.get("content", "")
179
+ if isinstance(result_content, list):
180
+ # Extract text from result content blocks
181
+ texts = []
182
+ for rc in result_content:
183
+ if isinstance(rc, dict) and rc.get("type") == "text":
184
+ texts.append(rc.get("text", ""))
185
+ result_content = "\n".join(texts)
186
+ blocks.append(
187
+ MessageBlock(type="tool_result", text=str(result_content))
188
+ )
189
+ elif block_type == "thinking":
190
+ blocks.append(
191
+ MessageBlock(type="thinking", text=item.get("thinking", ""))
192
+ )
193
+ else:
194
+ blocks.append(MessageBlock(type=block_type, text=str(item)))
195
+ elif isinstance(item, str):
196
+ blocks.append(MessageBlock(type="text", text=item))
197
+
198
+ return blocks
199
+
200
+
201
+ def extract_text_and_flags(content: Any) -> tuple[str, bool, bool]:
202
+ """Extract text and block flags from message content.
203
+
204
+ Returns (text, has_blocks, has_tool_result).
205
+ """
206
+ if isinstance(content, str):
207
+ return content, True, False
208
+
209
+ if isinstance(content, list):
210
+ texts: list[str] = []
211
+ has_blocks = False
212
+ has_tool_result = False
213
+
214
+ for item in content:
215
+ if isinstance(item, dict):
216
+ has_blocks = True
217
+ block_type = item.get("type", "unknown")
218
+ if block_type == "text":
219
+ texts.append(item.get("text", ""))
220
+ elif block_type == "tool_result":
221
+ has_tool_result = True
222
+ # Other block types don't contribute to text.
223
+ elif isinstance(item, str):
224
+ has_blocks = True
225
+ texts.append(item)
226
+
227
+ return "\n".join(texts), has_blocks, has_tool_result
228
+
229
+ return "", False, False
230
+
231
+
232
+ def parse_timestamp(ts: str | None) -> datetime | None:
233
+ """Parse an ISO timestamp string."""
234
+ if not ts:
235
+ return None
236
+ try:
237
+ # Handle various ISO formats
238
+ if ts.endswith("Z"):
239
+ ts = ts[:-1] + "+00:00"
240
+ return datetime.fromisoformat(ts)
241
+ except (ValueError, TypeError):
242
+ return None
243
+
244
+
245
+ def parse_conversation(file_path: Path) -> Conversation:
246
+ """Parse a Claude Code JSONL conversation file."""
247
+ messages_by_uuid: dict[str, Message] = {}
248
+ session_id = file_path.stem
249
+ cwd = None
250
+ git_branch = None
251
+ version = None
252
+ summaries = []
253
+
254
+ with open(file_path, "r", encoding="utf-8") as f:
255
+ for line in f:
256
+ line = line.strip()
257
+ if not line:
258
+ continue
259
+
260
+ try:
261
+ entry = _loads(line)
262
+ except ValueError:
263
+ continue
264
+
265
+ entry_type = entry.get("type")
266
+
267
+ # Skip non-message entries
268
+ if entry_type == "file-history-snapshot":
269
+ continue
270
+
271
+ # Extract summaries
272
+ if entry_type == "summary":
273
+ if summary_text := entry.get("summary"):
274
+ summaries.append(summary_text)
275
+ continue
276
+
277
+ # Extract metadata
278
+ if entry.get("sessionId"):
279
+ session_id = entry["sessionId"]
280
+ if entry.get("cwd"):
281
+ cwd = entry["cwd"]
282
+ if entry.get("gitBranch"):
283
+ git_branch = entry["gitBranch"]
284
+ if entry.get("version"):
285
+ version = entry["version"]
286
+
287
+ # Parse message entries
288
+ if entry_type in ("user", "assistant") and "message" in entry:
289
+ msg_data = entry["message"]
290
+ uuid = entry.get("uuid", "")
291
+ msg_id = msg_data.get("id", uuid)
292
+
293
+ # Use message ID for deduplication (streaming updates share same ID)
294
+ dedup_key = msg_id or uuid
295
+
296
+ content = msg_data.get("content", [])
297
+ blocks = parse_content_blocks(content)
298
+
299
+ # Skip empty messages
300
+ if not blocks:
301
+ continue
302
+
303
+ timestamp = parse_timestamp(entry.get("timestamp"))
304
+ model = msg_data.get("model")
305
+ is_sidechain = entry.get("isSidechain", False)
306
+ is_meta = entry.get("isMeta", False)
307
+
308
+ # Check if this is a tool result (user message containing tool_result blocks)
309
+ is_tool_result = entry_type == "user" and any(
310
+ b.type == "tool_result" for b in blocks
311
+ )
312
+ role = "tool" if is_tool_result else entry_type
313
+
314
+ # Update or create message
315
+ if dedup_key in messages_by_uuid:
316
+ # Merge blocks (streaming updates)
317
+ existing = messages_by_uuid[dedup_key]
318
+ existing.blocks = blocks # Replace with latest
319
+ if timestamp:
320
+ existing.timestamp = timestamp
321
+ else:
322
+ messages_by_uuid[dedup_key] = Message(
323
+ uuid=uuid,
324
+ role=role,
325
+ blocks=blocks,
326
+ timestamp=timestamp,
327
+ model=model,
328
+ is_sidechain=is_sidechain,
329
+ is_meta=is_meta,
330
+ is_tool_result=is_tool_result,
331
+ )
332
+
333
+ # Sort messages by timestamp
334
+ messages = list(messages_by_uuid.values())
335
+ messages.sort(key=lambda m: m.timestamp or datetime.min)
336
+
337
+ return Conversation(
338
+ session_id=session_id,
339
+ messages=messages,
340
+ cwd=cwd,
341
+ git_branch=git_branch,
342
+ version=version,
343
+ summaries=summaries,
344
+ file_path=file_path,
345
+ )
346
+
347
+
348
+ def find_conversations(directory: Path) -> list[Path]:
349
+ """Find all JSONL conversation files in a directory."""
350
+ return sorted(
351
+ directory.glob("*.jsonl"), key=lambda p: p.stat().st_mtime, reverse=True
352
+ )
353
+
354
+
355
+ @dataclass
356
+ class ConversationSummary:
357
+ """Lightweight summary of a conversation file."""
358
+
359
+ session_id: str
360
+ title: str
361
+ start_time: datetime | None
362
+ user_message_count: int
363
+ assistant_message_count: int
364
+
365
+
366
+ @dataclass
367
+ class SearchMatch:
368
+ """Search match within a conversation."""
369
+
370
+ session_id: str
371
+ title: str
372
+ timestamp: datetime | None
373
+ snippet: str
374
+
375
+
376
+ def _title_from_user_messages(
377
+ session_id: str, messages: dict[str, dict[str, Any]]
378
+ ) -> str:
379
+ best_text = None
380
+ best_ts = None
381
+
382
+ for data in messages.values():
383
+ if data.get("role", "user") != "user":
384
+ continue
385
+ if data.get("is_meta"):
386
+ continue
387
+ text = (data.get("text") or "").strip()
388
+ if not text or text.startswith("<"):
389
+ continue
390
+ ts = data.get("timestamp") or datetime.min
391
+ if best_ts is None or ts < best_ts:
392
+ best_ts = ts
393
+ best_text = text
394
+
395
+ if not best_text:
396
+ return f"Session {session_id[:8]}"
397
+
398
+ first_line = best_text.split("\n")[0]
399
+ if len(first_line) > 80:
400
+ return first_line[:77] + "..."
401
+ return first_line
402
+
403
+
404
+ def summarize_conversation(file_path: Path) -> ConversationSummary:
405
+ """Parse just enough to summarize a conversation for listings."""
406
+ messages: dict[str, dict[str, Any]] = {}
407
+ session_id = file_path.stem
408
+
409
+ with open(file_path, "r", encoding="utf-8") as f:
410
+ for line in f:
411
+ line = line.strip()
412
+ if not line:
413
+ continue
414
+
415
+ try:
416
+ entry = _loads(line)
417
+ except ValueError:
418
+ continue
419
+
420
+ entry_type = entry.get("type")
421
+ if entry_type == "file-history-snapshot":
422
+ continue
423
+ if entry_type == "summary":
424
+ continue
425
+
426
+ if entry.get("sessionId"):
427
+ session_id = entry["sessionId"]
428
+
429
+ if entry_type in ("user", "assistant") and "message" in entry:
430
+ msg_data = entry["message"]
431
+ uuid = entry.get("uuid", "")
432
+ msg_id = msg_data.get("id", uuid)
433
+ dedup_key = msg_id or uuid
434
+
435
+ content = msg_data.get("content", [])
436
+ text, has_blocks, has_tool_result = extract_text_and_flags(content)
437
+ if not has_blocks:
438
+ continue
439
+
440
+ role = (
441
+ "tool" if entry_type == "user" and has_tool_result else entry_type
442
+ )
443
+ timestamp = parse_timestamp(entry.get("timestamp"))
444
+ is_meta = entry.get("isMeta", False)
445
+
446
+ messages[dedup_key] = {
447
+ "role": role,
448
+ "timestamp": timestamp,
449
+ "text": text,
450
+ "is_meta": is_meta,
451
+ }
452
+
453
+ user_count = sum(
454
+ 1 for m in messages.values() if m["role"] == "user" and not m["is_meta"]
455
+ )
456
+ assistant_count = sum(1 for m in messages.values() if m["role"] == "assistant")
457
+ start_time = min(
458
+ (m["timestamp"] for m in messages.values() if m["timestamp"]),
459
+ default=None,
460
+ )
461
+ title = _title_from_user_messages(session_id, messages)
462
+
463
+ return ConversationSummary(
464
+ session_id=session_id,
465
+ title=title,
466
+ start_time=start_time,
467
+ user_message_count=user_count,
468
+ assistant_message_count=assistant_count,
469
+ )
470
+
471
+
472
+ def search_conversation_file(
473
+ file_path: Path, query: str, limit: int
474
+ ) -> list[SearchMatch]:
475
+ """Search a single conversation file for query matches."""
476
+ if limit <= 0:
477
+ return []
478
+
479
+ query_lower = query.lower()
480
+ session_id = file_path.stem
481
+ user_messages: dict[str, dict[str, Any]] = {}
482
+ matches: list[tuple[datetime | None, str]] = []
483
+
484
+ with open(file_path, "r", encoding="utf-8") as f:
485
+ for line in f:
486
+ line = line.strip()
487
+ if not line:
488
+ continue
489
+
490
+ try:
491
+ entry = _loads(line)
492
+ except ValueError:
493
+ continue
494
+
495
+ entry_type = entry.get("type")
496
+ if entry_type == "file-history-snapshot":
497
+ continue
498
+ if entry_type == "summary":
499
+ continue
500
+
501
+ if entry.get("sessionId"):
502
+ session_id = entry["sessionId"]
503
+
504
+ if entry_type in ("user", "assistant") and "message" in entry:
505
+ msg_data = entry["message"]
506
+ uuid = entry.get("uuid", "")
507
+ msg_id = msg_data.get("id", uuid)
508
+ dedup_key = msg_id or uuid
509
+
510
+ content = msg_data.get("content", [])
511
+ text, has_blocks, has_tool_result = extract_text_and_flags(content)
512
+ if not has_blocks:
513
+ continue
514
+
515
+ role = (
516
+ "tool" if entry_type == "user" and has_tool_result else entry_type
517
+ )
518
+ timestamp = parse_timestamp(entry.get("timestamp"))
519
+ is_meta = entry.get("isMeta", False)
520
+
521
+ if role == "user":
522
+ user_messages[dedup_key] = {
523
+ "text": text,
524
+ "timestamp": timestamp,
525
+ "is_meta": is_meta,
526
+ }
527
+
528
+ if role in ("user", "assistant") and text:
529
+ text_lower = text.lower()
530
+ idx = text_lower.find(query_lower)
531
+ if idx >= 0:
532
+ start = max(0, idx - 50)
533
+ end = min(len(text), idx + len(query) + 50)
534
+ snippet = text[start:end]
535
+ if start > 0:
536
+ snippet = "..." + snippet
537
+ if end < len(text):
538
+ snippet = snippet + "..."
539
+ matches.append((timestamp, snippet))
540
+ if len(matches) >= limit:
541
+ break
542
+
543
+ title = _title_from_user_messages(session_id, user_messages)
544
+
545
+ return [
546
+ SearchMatch(session_id=session_id, title=title, timestamp=ts, snippet=snippet)
547
+ for ts, snippet in matches
548
+ ]
549
+
550
+
551
+ def _rg_available() -> bool:
552
+ return shutil.which("rg") is not None
553
+
554
+
555
+ def _fast_title_from_file(file_path: Path) -> str:
556
+ """Find the first non-meta user message to use as a title."""
557
+ session_id = file_path.stem
558
+ with open(file_path, "r", encoding="utf-8") as f:
559
+ for line in f:
560
+ line = line.strip()
561
+ if not line:
562
+ continue
563
+ try:
564
+ entry = _loads(line)
565
+ except ValueError:
566
+ continue
567
+
568
+ entry_type = entry.get("type")
569
+ if entry_type != "user" or "message" not in entry:
570
+ continue
571
+
572
+ msg_data = entry["message"]
573
+ content = msg_data.get("content", [])
574
+ text, has_blocks, has_tool_result = extract_text_and_flags(content)
575
+ if not has_blocks:
576
+ continue
577
+
578
+ if entry.get("isMeta", False):
579
+ continue
580
+
581
+ text = (text or "").strip()
582
+ if not text or text.startswith("<"):
583
+ continue
584
+
585
+ first_line = text.split("\n")[0]
586
+ if len(first_line) > 80:
587
+ return first_line[:77] + "..."
588
+ return first_line
589
+
590
+ return f"Session {session_id[:8]}"
591
+
592
+
593
+ def search_conversations(
594
+ projects_dir: Path, query: str, limit: int
595
+ ) -> list[SearchMatch]:
596
+ """Search across all conversations, using ripgrep if available."""
597
+ if limit <= 0:
598
+ return []
599
+
600
+ if _rg_available():
601
+ try:
602
+ return _search_with_ripgrep(projects_dir, query, limit)
603
+ except Exception:
604
+ # Fall back to Python search on any error.
605
+ pass
606
+
607
+ results: list[SearchMatch] = []
608
+ for project_path in projects_dir.iterdir():
609
+ if not project_path.is_dir():
610
+ continue
611
+ for conv_path in find_conversations(project_path):
612
+ try:
613
+ matches = search_conversation_file(
614
+ conv_path, query, limit - len(results)
615
+ )
616
+ results.extend(matches)
617
+ if len(results) >= limit:
618
+ return results
619
+ except Exception:
620
+ continue
621
+ return results
622
+
623
+
624
+ def _search_with_ripgrep(
625
+ projects_dir: Path, query: str, limit: int
626
+ ) -> list[SearchMatch]:
627
+ """Search using ripgrep to avoid scanning every line in Python."""
628
+ cmd = [
629
+ "rg",
630
+ "--json",
631
+ "-F",
632
+ "-i",
633
+ "-g",
634
+ "*.jsonl",
635
+ query,
636
+ str(projects_dir),
637
+ ]
638
+ proc = subprocess.Popen(
639
+ cmd,
640
+ stdout=subprocess.PIPE,
641
+ stderr=subprocess.PIPE,
642
+ text=True,
643
+ encoding="utf-8",
644
+ errors="replace",
645
+ )
646
+
647
+ results: list[SearchMatch] = []
648
+ title_cache: dict[str, str] = {}
649
+ query_lower = query.lower()
650
+
651
+ try:
652
+ assert proc.stdout is not None
653
+ for raw in proc.stdout:
654
+ raw = raw.strip()
655
+ if not raw:
656
+ continue
657
+
658
+ try:
659
+ event = _rg_json.loads(raw)
660
+ except ValueError:
661
+ continue
662
+
663
+ if event.get("type") != "match":
664
+ continue
665
+
666
+ data = event.get("data", {})
667
+ path_text = data.get("path", {}).get("text")
668
+ line_text = data.get("lines", {}).get("text")
669
+ if not path_text or not line_text:
670
+ continue
671
+
672
+ try:
673
+ entry = _loads(line_text.strip())
674
+ except ValueError:
675
+ continue
676
+
677
+ entry_type = entry.get("type")
678
+ if entry_type not in ("user", "assistant"):
679
+ continue
680
+
681
+ if "message" not in entry:
682
+ continue
683
+
684
+ msg_data = entry["message"]
685
+ content = msg_data.get("content", [])
686
+ text, has_blocks, has_tool_result = extract_text_and_flags(content)
687
+ if not has_blocks or not text:
688
+ continue
689
+
690
+ # Ignore tool-result pseudo messages.
691
+ if entry_type == "user" and has_tool_result:
692
+ continue
693
+
694
+ if entry.get("isMeta", False):
695
+ continue
696
+
697
+ text_lower = text.lower()
698
+ idx = text_lower.find(query_lower)
699
+ if idx < 0:
700
+ continue
701
+
702
+ start = max(0, idx - 50)
703
+ end = min(len(text), idx + len(query) + 50)
704
+ snippet = text[start:end]
705
+ if start > 0:
706
+ snippet = "..." + snippet
707
+ if end < len(text):
708
+ snippet = snippet + "..."
709
+
710
+ title = title_cache.get(path_text)
711
+ if title is None:
712
+ title = _fast_title_from_file(Path(path_text))
713
+ title_cache[path_text] = title
714
+
715
+ session_id = entry.get("sessionId") or Path(path_text).stem
716
+ timestamp = parse_timestamp(entry.get("timestamp"))
717
+ results.append(
718
+ SearchMatch(
719
+ session_id=session_id,
720
+ title=title,
721
+ timestamp=timestamp,
722
+ snippet=snippet,
723
+ )
724
+ )
725
+
726
+ if len(results) >= limit:
727
+ proc.terminate()
728
+ break
729
+ finally:
730
+ if proc.stdout:
731
+ proc.stdout.close()
732
+ if proc.stderr:
733
+ proc.stderr.close()
734
+ try:
735
+ proc.wait(timeout=2)
736
+ except subprocess.TimeoutExpired:
737
+ proc.kill()
738
+ proc.wait(timeout=2)
739
+
740
+ return results
741
+
742
+
743
+ def get_file_size_human(path: Path) -> str:
744
+ """Get human-readable file size."""
745
+ size = path.stat().st_size
746
+ for unit in ["B", "KB", "MB", "GB"]:
747
+ if size < 1024:
748
+ return f"{size:.1f}{unit}"
749
+ size /= 1024
750
+ return f"{size:.1f}TB"