contexthub-cli 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
File without changes
@@ -0,0 +1,207 @@
1
+ """Context capture: Claude Code session parsing, file, stdin, env."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import os
7
+ import sys
8
+ from pathlib import Path
9
+
10
+ MAX_CONTEXT_BYTES = 200 * 1024 # 200KB
11
+
12
+
13
+ class ContextError(Exception):
14
+ pass
15
+
16
+
17
+ def capture_context(
18
+ session: bool = False,
19
+ context_file: str | None = None,
20
+ stdin: bool = False,
21
+ git_root: Path | None = None,
22
+ ) -> tuple[str | None, str | None, str | None]:
23
+ """Capture context from the first available source.
24
+
25
+ Returns (raw_context, context_source, session_id).
26
+ """
27
+ if session:
28
+ return _capture_session(git_root)
29
+ if context_file:
30
+ return _capture_file(context_file)
31
+ if stdin:
32
+ return _capture_stdin()
33
+ env_val = os.environ.get("CH_CONTEXT")
34
+ if env_val:
35
+ return truncate_context(env_val), "env", None
36
+ return None, None, None
37
+
38
+
39
+ def _capture_session(git_root: Path | None) -> tuple[str, str, str]:
40
+ """Parse the latest Claude Code JSONL session for this project."""
41
+ if git_root is None:
42
+ raise ContextError("Cannot determine project path for session lookup.")
43
+
44
+ # Claude Code stores sessions under ~/.claude/projects/{slug}/
45
+ # Slug: replace '/' with '-' from git root absolute path
46
+ slug = str(git_root.resolve()).replace("/", "-")
47
+ sessions_dir = Path.home() / ".claude" / "projects" / slug
48
+
49
+ if not sessions_dir.is_dir():
50
+ raise ContextError("No Claude Code session found for this project.")
51
+
52
+ # Find the latest .jsonl file (by modification time), excluding agent- files
53
+ jsonl_files = [
54
+ f for f in sessions_dir.glob("*.jsonl")
55
+ if not f.name.startswith("agent-")
56
+ ]
57
+ if not jsonl_files:
58
+ raise ContextError("No Claude Code session found for this project.")
59
+
60
+ latest = max(jsonl_files, key=lambda f: f.stat().st_mtime)
61
+ session_id = latest.stem
62
+
63
+ transcript = parse_session_jsonl(latest)
64
+ return truncate_context(transcript), "claude_session", session_id
65
+
66
+
67
+ def parse_session_jsonl(path: Path) -> str:
68
+ """Parse a Claude Code JSONL session file into a structured transcript."""
69
+ entries = []
70
+ seen_message_ids: dict[str, int] = {} # message.id -> last index in entries
71
+
72
+ with open(path, "r", encoding="utf-8") as f:
73
+ for line in f:
74
+ line = line.strip()
75
+ if not line:
76
+ continue
77
+ try:
78
+ entry = json.loads(line)
79
+ except json.JSONDecodeError:
80
+ continue
81
+
82
+ entry_type = entry.get("type")
83
+ if entry_type not in ("user", "assistant"):
84
+ continue
85
+
86
+ msg = entry.get("message", {})
87
+ msg_id = msg.get("id")
88
+ content = msg.get("content", "")
89
+ role = msg.get("role", entry_type)
90
+
91
+ # Deduplicate streaming assistant messages (same message.id)
92
+ if msg_id and msg_id in seen_message_ids:
93
+ # Replace previous entry with this one (later = more complete)
94
+ entries[seen_message_ids[msg_id]] = (role, content)
95
+ continue
96
+
97
+ idx = len(entries)
98
+ entries.append((role, content))
99
+ if msg_id:
100
+ seen_message_ids[msg_id] = idx
101
+
102
+ # Format transcript
103
+ parts = []
104
+ for role, content in entries:
105
+ text = format_content(content)
106
+ if text:
107
+ label = "USER" if role == "user" else "ASSISTANT"
108
+ parts.append(f"[{label}]\n{text}")
109
+
110
+ return "\n\n---\n\n".join(parts)
111
+
112
+
113
+ def format_content(content) -> str:
114
+ """Format message content (string or content blocks) into readable text."""
115
+ if isinstance(content, str):
116
+ return content
117
+
118
+ if not isinstance(content, list):
119
+ return ""
120
+
121
+ parts = []
122
+ for block in content:
123
+ if not isinstance(block, dict):
124
+ continue
125
+
126
+ block_type = block.get("type")
127
+
128
+ if block_type == "text":
129
+ text = block.get("text", "").strip()
130
+ if text:
131
+ parts.append(text)
132
+
133
+ elif block_type == "thinking":
134
+ thinking = block.get("thinking", "").strip()
135
+ if thinking:
136
+ parts.append(f"<thinking>\n{thinking}\n</thinking>")
137
+
138
+ elif block_type == "tool_use":
139
+ name = block.get("name", "unknown")
140
+ inp = block.get("input", {})
141
+ # Compact representation of tool call
142
+ inp_str = json.dumps(inp, indent=2) if inp else ""
143
+ parts.append(f"[Tool: {name}]\n{inp_str}")
144
+
145
+ elif block_type == "tool_result":
146
+ tool_id = block.get("tool_use_id", "")
147
+ result_content = block.get("content", "")
148
+ if isinstance(result_content, list):
149
+ # Extract text from content blocks
150
+ texts = []
151
+ for rc in result_content:
152
+ if isinstance(rc, dict) and rc.get("type") == "text":
153
+ texts.append(rc.get("text", ""))
154
+ result_content = "\n".join(texts)
155
+ if result_content:
156
+ parts.append(f"[Tool Result: {tool_id}]\n{result_content}")
157
+
158
+ return "\n\n".join(parts)
159
+
160
+
161
+ def find_latest_session(git_root: Path) -> tuple[Path, str] | None:
162
+ """Find the latest Claude Code JSONL session file for this project.
163
+
164
+ Returns (path, session_id) or None if not found.
165
+ """
166
+ slug = str(git_root.resolve()).replace("/", "-")
167
+ sessions_dir = Path.home() / ".claude" / "projects" / slug
168
+
169
+ if not sessions_dir.is_dir():
170
+ return None
171
+
172
+ jsonl_files = [
173
+ f for f in sessions_dir.glob("*.jsonl")
174
+ if not f.name.startswith("agent-")
175
+ ]
176
+ if not jsonl_files:
177
+ return None
178
+
179
+ latest = max(jsonl_files, key=lambda f: f.stat().st_mtime)
180
+ return latest, latest.stem
181
+
182
+
183
+ def _capture_file(filepath: str) -> tuple[str, str, None]:
184
+ """Read context from a file."""
185
+ path = Path(filepath)
186
+ if not path.is_file():
187
+ raise ContextError(f"Context file not found: {filepath}")
188
+ text = path.read_text(encoding="utf-8")
189
+ return truncate_context(text), "file", None
190
+
191
+
192
+ def _capture_stdin() -> tuple[str, str, None]:
193
+ """Read context from stdin."""
194
+ if sys.stdin.isatty():
195
+ raise ContextError("No input on stdin. Pipe content or use --context FILE.")
196
+ text = sys.stdin.read()
197
+ return truncate_context(text), "stdin", None
198
+
199
+
200
+ def truncate_context(text: str) -> str:
201
+ """Truncate text to MAX_CONTEXT_BYTES."""
202
+ encoded = text.encode("utf-8")
203
+ if len(encoded) <= MAX_CONTEXT_BYTES:
204
+ return text
205
+ # Truncate at byte boundary, decode safely
206
+ truncated = encoded[:MAX_CONTEXT_BYTES].decode("utf-8", errors="ignore")
207
+ return truncated + "\n... (truncated at 200KB)"
@@ -0,0 +1,173 @@
1
+ """Git subprocess wrapper."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import subprocess
6
+ from pathlib import Path
7
+
8
+
9
+ class GitError(Exception):
10
+ pass
11
+
12
+
13
+ def _run(args: list[str], cwd: Path | None = None) -> str:
14
+ try:
15
+ result = subprocess.run(
16
+ ["git"] + args,
17
+ capture_output=True,
18
+ text=True,
19
+ cwd=cwd,
20
+ )
21
+ except FileNotFoundError:
22
+ raise GitError("git is not installed or not in PATH.")
23
+ if result.returncode != 0:
24
+ raise GitError(result.stderr.strip())
25
+ return result.stdout.strip()
26
+
27
+
28
+ def is_inside_work_tree(cwd: Path | None = None) -> bool:
29
+ try:
30
+ out = _run(["rev-parse", "--is-inside-work-tree"], cwd=cwd)
31
+ return out == "true"
32
+ except GitError:
33
+ return False
34
+
35
+
36
+ def get_toplevel(cwd: Path | None = None) -> Path:
37
+ return Path(_run(["rev-parse", "--show-toplevel"], cwd=cwd))
38
+
39
+
40
+ def init_repo(cwd: Path | None = None) -> None:
41
+ _run(["init"], cwd=cwd)
42
+
43
+
44
+ def add_remote(name: str, url: str, cwd: Path | None = None) -> None:
45
+ _run(["remote", "add", name, url], cwd=cwd)
46
+
47
+
48
+ def get_remote_url(remote: str = "origin", cwd: Path | None = None) -> str:
49
+ return _run(["remote", "get-url", remote], cwd=cwd)
50
+
51
+
52
+ def add_all(cwd: Path | None = None) -> None:
53
+ _run(["add", "-A"], cwd=cwd)
54
+
55
+
56
+ def commit(message: str, cwd: Path | None = None) -> None:
57
+ _run(["commit", "-m", message], cwd=cwd)
58
+
59
+
60
+ def has_staged_changes(cwd: Path | None = None) -> bool:
61
+ try:
62
+ result = subprocess.run(
63
+ ["git", "diff", "--cached", "--quiet"],
64
+ capture_output=True,
65
+ cwd=cwd,
66
+ )
67
+ return result.returncode != 0
68
+ except FileNotFoundError:
69
+ raise GitError("git is not installed or not in PATH.")
70
+
71
+
72
+ def has_changes(cwd: Path | None = None) -> bool:
73
+ """Check if there are any staged, unstaged, or untracked changes."""
74
+ result = subprocess.run(
75
+ ["git", "status", "--porcelain"],
76
+ capture_output=True,
77
+ text=True,
78
+ cwd=cwd,
79
+ )
80
+ return bool(result.stdout.strip())
81
+
82
+
83
+ def get_head_hash(cwd: Path | None = None) -> str:
84
+ return _run(["rev-parse", "HEAD"], cwd=cwd)
85
+
86
+
87
+ def get_current_branch(cwd: Path | None = None) -> str | None:
88
+ try:
89
+ return _run(["branch", "--show-current"], cwd=cwd) or None
90
+ except GitError:
91
+ return None
92
+
93
+
94
+ def get_changed_files(commit_hash: str, cwd: Path | None = None) -> list[str]:
95
+ out = _run(["diff-tree", "--no-commit-id", "--name-only", "-r", commit_hash], cwd=cwd)
96
+ if not out:
97
+ # Root commit (no parent) returns empty without --root
98
+ out = _run(["diff-tree", "--root", "--no-commit-id", "--name-only", "-r", commit_hash], cwd=cwd)
99
+ return [f for f in out.splitlines() if f]
100
+
101
+
102
+ def push(cwd: Path | None = None) -> None:
103
+ try:
104
+ _run(["push"], cwd=cwd)
105
+ except GitError:
106
+ # No upstream set — push with -u to set tracking
107
+ branch = _run(["branch", "--show-current"], cwd=cwd)
108
+ _run(["push", "-u", "origin", branch], cwd=cwd)
109
+
110
+
111
+ def fetch(cwd: Path | None = None) -> None:
112
+ _run(["fetch", "origin"], cwd=cwd)
113
+
114
+
115
+ def pull_rebase(cwd: Path | None = None) -> bool:
116
+ """Pull with rebase. Returns True if clean, raises GitError on conflict."""
117
+ try:
118
+ _run(["pull", "--rebase", "origin", _run(["branch", "--show-current"], cwd=cwd)], cwd=cwd)
119
+ return True
120
+ except GitError as e:
121
+ if "CONFLICT" in str(e) or "could not apply" in str(e):
122
+ raise GitError("rebase_conflict")
123
+ raise
124
+
125
+
126
+ def has_conflicts(cwd: Path | None = None) -> bool:
127
+ """Check if there are unmerged files (conflict markers)."""
128
+ result = subprocess.run(
129
+ ["git", "diff", "--name-only", "--diff-filter=U"],
130
+ capture_output=True,
131
+ text=True,
132
+ cwd=cwd,
133
+ )
134
+ return bool(result.stdout.strip())
135
+
136
+
137
+ def get_conflicted_files(cwd: Path | None = None) -> list[str]:
138
+ """Get list of files with conflicts."""
139
+ result = subprocess.run(
140
+ ["git", "diff", "--name-only", "--diff-filter=U"],
141
+ capture_output=True,
142
+ text=True,
143
+ cwd=cwd,
144
+ )
145
+ return [f for f in result.stdout.strip().splitlines() if f]
146
+
147
+
148
+ def get_upstream_commits(cwd: Path | None = None) -> list[str]:
149
+ """Get commit hashes that are on the remote but not local (incoming commits)."""
150
+ branch = _run(["branch", "--show-current"], cwd=cwd)
151
+ try:
152
+ out = _run(["log", f"HEAD..origin/{branch}", "--format=%H"], cwd=cwd)
153
+ return [h for h in out.splitlines() if h]
154
+ except GitError:
155
+ return []
156
+
157
+
158
+ def get_remote_head(cwd: Path | None = None) -> str | None:
159
+ """Get the commit hash at the tip of the remote tracking branch."""
160
+ try:
161
+ branch = _run(["branch", "--show-current"], cwd=cwd)
162
+ return _run(["rev-parse", f"origin/{branch}"], cwd=cwd)
163
+ except GitError:
164
+ return None
165
+
166
+
167
+ def show_file(commit: str, path: str, cwd: Path | None = None) -> str:
168
+ """Return file content at a specific commit."""
169
+ return _run(["show", f"{commit}:{path}"], cwd=cwd)
170
+
171
+
172
+ def abort_rebase(cwd: Path | None = None) -> None:
173
+ _run(["rebase", "--abort"], cwd=cwd)
@@ -0,0 +1,35 @@
1
+ """GitHub remote URL parsing."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+
7
+
8
+ def parse_github_remote(url: str) -> tuple[str, str] | None:
9
+ """Extract (owner, repo) from a GitHub HTTPS or SSH remote URL.
10
+
11
+ Supports:
12
+ - https://github.com/owner/repo.git
13
+ - https://github.com/owner/repo
14
+ - git@github.com:owner/repo.git
15
+ - git@github.com:owner/repo
16
+ - ssh://git@github.com/owner/repo.git
17
+
18
+ Returns None if the URL doesn't match a GitHub pattern.
19
+ """
20
+ # HTTPS pattern
21
+ m = re.match(r"https?://github\.com/([^/]+)/([^/]+?)(?:\.git)?$", url)
22
+ if m:
23
+ return m.group(1), m.group(2)
24
+
25
+ # SSH shorthand: git@github.com:owner/repo.git
26
+ m = re.match(r"git@github\.com:([^/]+)/([^/]+?)(?:\.git)?$", url)
27
+ if m:
28
+ return m.group(1), m.group(2)
29
+
30
+ # SSH full: ssh://git@github.com/owner/repo.git
31
+ m = re.match(r"ssh://git@github\.com/([^/]+)/([^/]+?)(?:\.git)?$", url)
32
+ if m:
33
+ return m.group(1), m.group(2)
34
+
35
+ return None
@@ -0,0 +1,81 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from dataclasses import dataclass, field, asdict
5
+
6
+
7
+ @dataclass
8
+ class ContextRecord:
9
+ id: str
10
+ git_commit_hash: str
11
+ commit_message: str
12
+ files_changed: list[str]
13
+ timestamp: str
14
+ raw_context: str | None = None
15
+ context_source: str | None = None
16
+ session_id: str | None = None
17
+ branch: str | None = None
18
+ goal: str | None = None
19
+ subgoal: str | None = None
20
+
21
+ def to_dict(self) -> dict:
22
+ return asdict(self)
23
+
24
+ def to_json(self) -> str:
25
+ return json.dumps(self.to_dict(), indent=2)
26
+
27
+
28
+ @dataclass
29
+ class ResolutionRecord:
30
+ id: str
31
+ owner: str
32
+ repo: str
33
+ local_sha: str
34
+ remote_sha: str
35
+ session_id: str | None
36
+ status: str # "in_progress" | "pending_review" | "accepted" | "completed" | "failed"
37
+ started_at: str
38
+ conflicted_files: list[str]
39
+ repo_path: str | None = None
40
+ resolved_commit_hash: str | None = None
41
+ completed_at: str | None = None
42
+ review_count: int = 0
43
+ file_snapshots: dict[str, str] = field(default_factory=dict)
44
+
45
+ def to_dict(self) -> dict:
46
+ return asdict(self)
47
+
48
+ def to_json(self) -> str:
49
+ return json.dumps(self.to_dict(), indent=2)
50
+
51
+
52
+ @dataclass
53
+ class RepoConfig:
54
+ owner: str
55
+ repo: str
56
+ remote_url: str
57
+ remote_name: str = "origin"
58
+
59
+ def to_dict(self) -> dict:
60
+ return {
61
+ "version": "0.1.0",
62
+ "github": {
63
+ "owner": self.owner,
64
+ "repo": self.repo,
65
+ "remote_url": self.remote_url,
66
+ "remote_name": self.remote_name,
67
+ },
68
+ }
69
+
70
+ def to_json(self) -> str:
71
+ return json.dumps(self.to_dict(), indent=2)
72
+
73
+ @classmethod
74
+ def from_dict(cls, data: dict) -> RepoConfig:
75
+ gh = data["github"]
76
+ return cls(
77
+ owner=gh["owner"],
78
+ repo=gh["repo"],
79
+ remote_url=gh["remote_url"],
80
+ remote_name=gh.get("remote_name", "origin"),
81
+ )