plumb-dev 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
plumb/__init__.py ADDED
@@ -0,0 +1,13 @@
1
+ __version__ = "0.1.0"
2
+
3
+
4
+ class PlumbError(Exception):
5
+ """Base exception for all Plumb errors."""
6
+
7
+
8
+ class PlumbInferenceError(PlumbError):
9
+ """Raised when an LLM inference call fails after retries."""
10
+
11
+
12
+ class PlumbAuthError(PlumbError):
13
+ """Raised when the API key is missing or invalid."""
@@ -0,0 +1,191 @@
1
+ """Read Claude Code native session files for conversation extraction.
2
+
3
+ Claude Code stores session data at ~/.claude/projects/<encoded-path>/<uuid>.jsonl.
4
+ This module discovers and parses those files to extract conversation turns.
5
+ """
6
+ from __future__ import annotations
7
+
8
+ import json
9
+ import logging
10
+ from datetime import datetime, timezone
11
+ from pathlib import Path
12
+ from typing import Optional
13
+
14
+ from plumb.conversation import ConversationTurn
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ def encode_project_path(repo_root: Path) -> str:
20
+ """Convert an absolute path to Claude Code's encoded directory name.
21
+
22
+ /Users/foo/myrepo -> -Users-foo-myrepo
23
+ """
24
+ path_str = str(repo_root.resolve()).rstrip("/")
25
+ return path_str.replace("/", "-")
26
+
27
+
28
+ def find_session_dir(repo_root: Path) -> Optional[Path]:
29
+ """Return ~/.claude/projects/<encoded>/ if it exists."""
30
+ encoded = encode_project_path(repo_root)
31
+ session_dir = Path.home() / ".claude" / "projects" / encoded
32
+ if session_dir.is_dir():
33
+ return session_dir
34
+ return None
35
+
36
+
37
+ def list_session_files(
38
+ session_dir: Path, modified_after: Optional[datetime] = None
39
+ ) -> list[Path]:
40
+ """List *.jsonl files, optionally filtering by mtime, sorted by mtime ascending."""
41
+ files = list(session_dir.glob("*.jsonl"))
42
+ if modified_after is not None:
43
+ ts = modified_after.timestamp()
44
+ files = [f for f in files if f.stat().st_mtime >= ts]
45
+ files.sort(key=lambda f: f.stat().st_mtime)
46
+ return files
47
+
48
+
49
+ def _parse_session_entry(entry: dict) -> Optional[ConversationTurn]:
50
+ """Parse one JSONL line from a Claude Code session file.
51
+
52
+ Returns a ConversationTurn or None if the entry should be skipped.
53
+ """
54
+ entry_type = entry.get("type")
55
+ if entry_type not in ("user", "assistant"):
56
+ return None
57
+
58
+ if entry.get("isSidechain") or entry.get("isMeta"):
59
+ return None
60
+
61
+ timestamp = entry.get("timestamp")
62
+ message = entry.get("message", {})
63
+ content = message.get("content", "")
64
+
65
+ if entry_type == "user":
66
+ # Skip entries where content is a list (tool_results)
67
+ if not isinstance(content, str):
68
+ return None
69
+ return ConversationTurn(role="user", content=content, timestamp=timestamp)
70
+
71
+ # Assistant entries: content is a list with one block per JSONL line
72
+ if not isinstance(content, list) or not content:
73
+ return None
74
+
75
+ block = content[0]
76
+ block_type = block.get("type")
77
+
78
+ if block_type == "text":
79
+ text = block.get("text", "")
80
+ if text:
81
+ return ConversationTurn(role="assistant", content=text, timestamp=timestamp)
82
+
83
+ elif block_type == "tool_use":
84
+ name = block.get("name", "unknown")
85
+ return ConversationTurn(
86
+ role="assistant",
87
+ content=f"[tool: {name}]",
88
+ timestamp=timestamp,
89
+ )
90
+
91
+ # Skip thinking blocks and anything else
92
+ return None
93
+
94
+
95
+ def parse_session_file(
96
+ path: Path, since: Optional[datetime] = None
97
+ ) -> list[ConversationTurn]:
98
+ """Read one session JSONL file and return parsed conversation turns."""
99
+ turns: list[ConversationTurn] = []
100
+ try:
101
+ text = path.read_text(errors="replace")
102
+ except OSError:
103
+ logger.warning("Could not read session file: %s", path)
104
+ return turns
105
+
106
+ for line in text.splitlines():
107
+ line = line.strip()
108
+ if not line:
109
+ continue
110
+ try:
111
+ entry = json.loads(line)
112
+ except json.JSONDecodeError:
113
+ continue
114
+
115
+ turn = _parse_session_entry(entry)
116
+ if turn is None:
117
+ continue
118
+
119
+ if since and turn.timestamp:
120
+ try:
121
+ turn_dt = datetime.fromisoformat(
122
+ turn.timestamp.replace("Z", "+00:00")
123
+ )
124
+ if turn_dt <= since:
125
+ continue
126
+ except (ValueError, TypeError):
127
+ pass
128
+
129
+ turns.append(turn)
130
+ return turns
131
+
132
+
133
+ def _commit_sha_to_datetime(repo_root: Path, sha: str) -> Optional[datetime]:
134
+ """Convert a commit SHA to its committed datetime using gitpython."""
135
+ try:
136
+ from git import Repo
137
+
138
+ repo = Repo(repo_root)
139
+ commit = repo.commit(sha)
140
+ return commit.committed_datetime
141
+ except Exception:
142
+ logger.debug("Could not resolve commit SHA %s to datetime", sha)
143
+ return None
144
+
145
+
146
+ def read_claude_sessions(
147
+ repo_root: Path,
148
+ since_commit: Optional[str] = None,
149
+ since_datetime: Optional[str] = None,
150
+ ) -> list[ConversationTurn]:
151
+ """Top-level orchestrator: find and parse all relevant Claude Code session files.
152
+
153
+ 1. Find session directory for this repo
154
+ 2. Determine cutoff datetime (since_datetime takes priority over since_commit)
155
+ 3. List session files modified after that datetime
156
+ 4. Parse each file, merge turns, sort by timestamp
157
+ """
158
+ session_dir = find_session_dir(repo_root)
159
+ if session_dir is None:
160
+ return []
161
+
162
+ cutoff_dt: Optional[datetime] = None
163
+
164
+ # since_datetime (from last_extracted_at) is the tighter bound
165
+ if since_datetime:
166
+ try:
167
+ cutoff_dt = datetime.fromisoformat(
168
+ since_datetime.replace("Z", "+00:00")
169
+ )
170
+ except (ValueError, TypeError):
171
+ pass
172
+
173
+ # Fall back to commit datetime
174
+ if cutoff_dt is None and since_commit:
175
+ cutoff_dt = _commit_sha_to_datetime(repo_root, since_commit)
176
+
177
+ files = list_session_files(session_dir, modified_after=cutoff_dt)
178
+ if not files:
179
+ return []
180
+
181
+ all_turns: list[ConversationTurn] = []
182
+ for f in files:
183
+ turns = parse_session_file(f, since=cutoff_dt)
184
+ all_turns.extend(turns)
185
+
186
+ # Sort by timestamp
187
+ def sort_key(t: ConversationTurn) -> str:
188
+ return t.timestamp or ""
189
+
190
+ all_turns.sort(key=sort_key)
191
+ return all_turns