npm - contexthub-cli - Versions diffs - 0.1.0 - Mend

contexthub-cli 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/bin/ch +25 -0
package/contexthub/__init__.py +3 -0
package/contexthub/cli.py +1331 -0
package/contexthub/core/__init__.py +0 -0
package/contexthub/core/context.py +207 -0
package/contexthub/core/git.py +173 -0
package/contexthub/core/github.py +35 -0
package/contexthub/core/models.py +81 -0
package/contexthub/core/r2.py +221 -0
package/contexthub/hooks.py +154 -0
package/install.sh +44 -0
package/package.json +22 -0
package/requirements.txt +2 -0

package/contexthub/core/__init__.py ADDED Viewed

File without changes

package/contexthub/core/context.py ADDED Viewed

@@ -0,0 +1,207 @@
+"""Context capture: Claude Code session parsing, file, stdin, env."""
+from __future__ import annotations
+import json
+import os
+import sys
+from pathlib import Path
+MAX_CONTEXT_BYTES = 200 * 1024  # 200KB
+class ContextError(Exception):
+    pass
+def capture_context(
+    session: bool = False,
+    context_file: str | None = None,
+    stdin: bool = False,
+    git_root: Path | None = None,
+) -> tuple[str | None, str | None, str | None]:
+    """Capture context from the first available source.
+    Returns (raw_context, context_source, session_id).
+    """
+    if session:
+        return _capture_session(git_root)
+    if context_file:
+        return _capture_file(context_file)
+    if stdin:
+        return _capture_stdin()
+    env_val = os.environ.get("CH_CONTEXT")
+    if env_val:
+        return truncate_context(env_val), "env", None
+    return None, None, None
+def _capture_session(git_root: Path | None) -> tuple[str, str, str]:
+    """Parse the latest Claude Code JSONL session for this project."""
+    if git_root is None:
+        raise ContextError("Cannot determine project path for session lookup.")
+    # Claude Code stores sessions under ~/.claude/projects/{slug}/
+    # Slug: replace '/' with '-' from git root absolute path
+    slug = str(git_root.resolve()).replace("/", "-")
+    sessions_dir = Path.home() / ".claude" / "projects" / slug
+    if not sessions_dir.is_dir():
+        raise ContextError("No Claude Code session found for this project.")
+    # Find the latest .jsonl file (by modification time), excluding agent- files
+    jsonl_files = [
+        f for f in sessions_dir.glob("*.jsonl")
+        if not f.name.startswith("agent-")
+    ]
+    if not jsonl_files:
+        raise ContextError("No Claude Code session found for this project.")
+    latest = max(jsonl_files, key=lambda f: f.stat().st_mtime)
+    session_id = latest.stem
+    transcript = parse_session_jsonl(latest)
+    return truncate_context(transcript), "claude_session", session_id
+def parse_session_jsonl(path: Path) -> str:
+    """Parse a Claude Code JSONL session file into a structured transcript."""
+    entries = []
+    seen_message_ids: dict[str, int] = {}  # message.id -> last index in entries
+    with open(path, "r", encoding="utf-8") as f:
+        for line in f:
+            line = line.strip()
+            if not line:
+                continue
+            try:
+                entry = json.loads(line)
+            except json.JSONDecodeError:
+                continue
+            entry_type = entry.get("type")
+            if entry_type not in ("user", "assistant"):
+                continue
+            msg = entry.get("message", {})
+            msg_id = msg.get("id")
+            content = msg.get("content", "")
+            role = msg.get("role", entry_type)
+            # Deduplicate streaming assistant messages (same message.id)
+            if msg_id and msg_id in seen_message_ids:
+                # Replace previous entry with this one (later = more complete)
+                entries[seen_message_ids[msg_id]] = (role, content)
+                continue
+            idx = len(entries)
+            entries.append((role, content))
+            if msg_id:
+                seen_message_ids[msg_id] = idx
+    # Format transcript
+    parts = []
+    for role, content in entries:
+        text = format_content(content)
+        if text:
+            label = "USER" if role == "user" else "ASSISTANT"
+            parts.append(f"[{label}]\n{text}")
+    return "\n\n---\n\n".join(parts)
+def format_content(content) -> str:
+    """Format message content (string or content blocks) into readable text."""
+    if isinstance(content, str):
+        return content
+    if not isinstance(content, list):
+        return ""
+    parts = []
+    for block in content:
+        if not isinstance(block, dict):
+            continue
+        block_type = block.get("type")
+        if block_type == "text":
+            text = block.get("text", "").strip()
+            if text:
+                parts.append(text)
+        elif block_type == "thinking":
+            thinking = block.get("thinking", "").strip()
+            if thinking:
+                parts.append(f"<thinking>\n{thinking}\n</thinking>")
+        elif block_type == "tool_use":
+            name = block.get("name", "unknown")
+            inp = block.get("input", {})
+            # Compact representation of tool call
+            inp_str = json.dumps(inp, indent=2) if inp else ""
+            parts.append(f"[Tool: {name}]\n{inp_str}")
+        elif block_type == "tool_result":
+            tool_id = block.get("tool_use_id", "")
+            result_content = block.get("content", "")
+            if isinstance(result_content, list):
+                # Extract text from content blocks
+                texts = []
+                for rc in result_content:
+                    if isinstance(rc, dict) and rc.get("type") == "text":
+                        texts.append(rc.get("text", ""))
+                result_content = "\n".join(texts)
+            if result_content:
+                parts.append(f"[Tool Result: {tool_id}]\n{result_content}")
+    return "\n\n".join(parts)
+def find_latest_session(git_root: Path) -> tuple[Path, str] | None:
+    """Find the latest Claude Code JSONL session file for this project.
+    Returns (path, session_id) or None if not found.
+    """
+    slug = str(git_root.resolve()).replace("/", "-")
+    sessions_dir = Path.home() / ".claude" / "projects" / slug
+    if not sessions_dir.is_dir():
+        return None
+    jsonl_files = [
+        f for f in sessions_dir.glob("*.jsonl")
+        if not f.name.startswith("agent-")
+    ]
+    if not jsonl_files:
+        return None
+    latest = max(jsonl_files, key=lambda f: f.stat().st_mtime)
+    return latest, latest.stem
+def _capture_file(filepath: str) -> tuple[str, str, None]:
+    """Read context from a file."""
+    path = Path(filepath)
+    if not path.is_file():
+        raise ContextError(f"Context file not found: {filepath}")
+    text = path.read_text(encoding="utf-8")
+    return truncate_context(text), "file", None
+def _capture_stdin() -> tuple[str, str, None]:
+    """Read context from stdin."""
+    if sys.stdin.isatty():
+        raise ContextError("No input on stdin. Pipe content or use --context FILE.")
+    text = sys.stdin.read()
+    return truncate_context(text), "stdin", None
+def truncate_context(text: str) -> str:
+    """Truncate text to MAX_CONTEXT_BYTES."""
+    encoded = text.encode("utf-8")
+    if len(encoded) <= MAX_CONTEXT_BYTES:
+        return text
+    # Truncate at byte boundary, decode safely
+    truncated = encoded[:MAX_CONTEXT_BYTES].decode("utf-8", errors="ignore")
+    return truncated + "\n... (truncated at 200KB)"

package/contexthub/core/git.py ADDED Viewed

@@ -0,0 +1,173 @@
+"""Git subprocess wrapper."""
+from __future__ import annotations
+import subprocess
+from pathlib import Path
+class GitError(Exception):
+    pass
+def _run(args: list[str], cwd: Path | None = None) -> str:
+    try:
+        result = subprocess.run(
+            ["git"] + args,
+            capture_output=True,
+            text=True,
+            cwd=cwd,
+        )
+    except FileNotFoundError:
+        raise GitError("git is not installed or not in PATH.")
+    if result.returncode != 0:
+        raise GitError(result.stderr.strip())
+    return result.stdout.strip()
+def is_inside_work_tree(cwd: Path | None = None) -> bool:
+    try:
+        out = _run(["rev-parse", "--is-inside-work-tree"], cwd=cwd)
+        return out == "true"
+    except GitError:
+        return False
+def get_toplevel(cwd: Path | None = None) -> Path:
+    return Path(_run(["rev-parse", "--show-toplevel"], cwd=cwd))
+def init_repo(cwd: Path | None = None) -> None:
+    _run(["init"], cwd=cwd)
+def add_remote(name: str, url: str, cwd: Path | None = None) -> None:
+    _run(["remote", "add", name, url], cwd=cwd)
+def get_remote_url(remote: str = "origin", cwd: Path | None = None) -> str:
+    return _run(["remote", "get-url", remote], cwd=cwd)
+def add_all(cwd: Path | None = None) -> None:
+    _run(["add", "-A"], cwd=cwd)
+def commit(message: str, cwd: Path | None = None) -> None:
+    _run(["commit", "-m", message], cwd=cwd)
+def has_staged_changes(cwd: Path | None = None) -> bool:
+    try:
+        result = subprocess.run(
+            ["git", "diff", "--cached", "--quiet"],
+            capture_output=True,
+            cwd=cwd,
+        )
+        return result.returncode != 0
+    except FileNotFoundError:
+        raise GitError("git is not installed or not in PATH.")
+def has_changes(cwd: Path | None = None) -> bool:
+    """Check if there are any staged, unstaged, or untracked changes."""
+    result = subprocess.run(
+        ["git", "status", "--porcelain"],
+        capture_output=True,
+        text=True,
+        cwd=cwd,
+    )
+    return bool(result.stdout.strip())
+def get_head_hash(cwd: Path | None = None) -> str:
+    return _run(["rev-parse", "HEAD"], cwd=cwd)
+def get_current_branch(cwd: Path | None = None) -> str | None:
+    try:
+        return _run(["branch", "--show-current"], cwd=cwd) or None
+    except GitError:
+        return None
+def get_changed_files(commit_hash: str, cwd: Path | None = None) -> list[str]:
+    out = _run(["diff-tree", "--no-commit-id", "--name-only", "-r", commit_hash], cwd=cwd)
+    if not out:
+        # Root commit (no parent) returns empty without --root
+        out = _run(["diff-tree", "--root", "--no-commit-id", "--name-only", "-r", commit_hash], cwd=cwd)
+    return [f for f in out.splitlines() if f]
+def push(cwd: Path | None = None) -> None:
+    try:
+        _run(["push"], cwd=cwd)
+    except GitError:
+        # No upstream set — push with -u to set tracking
+        branch = _run(["branch", "--show-current"], cwd=cwd)
+        _run(["push", "-u", "origin", branch], cwd=cwd)
+def fetch(cwd: Path | None = None) -> None:
+    _run(["fetch", "origin"], cwd=cwd)
+def pull_rebase(cwd: Path | None = None) -> bool:
+    """Pull with rebase. Returns True if clean, raises GitError on conflict."""
+    try:
+        _run(["pull", "--rebase", "origin", _run(["branch", "--show-current"], cwd=cwd)], cwd=cwd)
+        return True
+    except GitError as e:
+        if "CONFLICT" in str(e) or "could not apply" in str(e):
+            raise GitError("rebase_conflict")
+        raise
+def has_conflicts(cwd: Path | None = None) -> bool:
+    """Check if there are unmerged files (conflict markers)."""
+    result = subprocess.run(
+        ["git", "diff", "--name-only", "--diff-filter=U"],
+        capture_output=True,
+        text=True,
+        cwd=cwd,
+    )
+    return bool(result.stdout.strip())
+def get_conflicted_files(cwd: Path | None = None) -> list[str]:
+    """Get list of files with conflicts."""
+    result = subprocess.run(
+        ["git", "diff", "--name-only", "--diff-filter=U"],
+        capture_output=True,
+        text=True,
+        cwd=cwd,
+    )
+    return [f for f in result.stdout.strip().splitlines() if f]
+def get_upstream_commits(cwd: Path | None = None) -> list[str]:
+    """Get commit hashes that are on the remote but not local (incoming commits)."""
+    branch = _run(["branch", "--show-current"], cwd=cwd)
+    try:
+        out = _run(["log", f"HEAD..origin/{branch}", "--format=%H"], cwd=cwd)
+        return [h for h in out.splitlines() if h]
+    except GitError:
+        return []
+def get_remote_head(cwd: Path | None = None) -> str | None:
+    """Get the commit hash at the tip of the remote tracking branch."""
+    try:
+        branch = _run(["branch", "--show-current"], cwd=cwd)
+        return _run(["rev-parse", f"origin/{branch}"], cwd=cwd)
+    except GitError:
+        return None
+def show_file(commit: str, path: str, cwd: Path | None = None) -> str:
+    """Return file content at a specific commit."""
+    return _run(["show", f"{commit}:{path}"], cwd=cwd)
+def abort_rebase(cwd: Path | None = None) -> None:
+    _run(["rebase", "--abort"], cwd=cwd)

package/contexthub/core/github.py ADDED Viewed

@@ -0,0 +1,35 @@
+"""GitHub remote URL parsing."""
+from __future__ import annotations
+import re
+def parse_github_remote(url: str) -> tuple[str, str] | None:
+    """Extract (owner, repo) from a GitHub HTTPS or SSH remote URL.
+    Supports:
+      - https://github.com/owner/repo.git
+      - https://github.com/owner/repo
+      - git@github.com:owner/repo.git
+      - git@github.com:owner/repo
+      - ssh://git@github.com/owner/repo.git
+    Returns None if the URL doesn't match a GitHub pattern.
+    """
+    # HTTPS pattern
+    m = re.match(r"https?://github\.com/([^/]+)/([^/]+?)(?:\.git)?$", url)
+    if m:
+        return m.group(1), m.group(2)
+    # SSH shorthand: git@github.com:owner/repo.git
+    m = re.match(r"git@github\.com:([^/]+)/([^/]+?)(?:\.git)?$", url)
+    if m:
+        return m.group(1), m.group(2)
+    # SSH full: ssh://git@github.com/owner/repo.git
+    m = re.match(r"ssh://git@github\.com/([^/]+)/([^/]+?)(?:\.git)?$", url)
+    if m:
+        return m.group(1), m.group(2)
+    return None

package/contexthub/core/models.py ADDED Viewed

@@ -0,0 +1,81 @@
+from __future__ import annotations
+import json
+from dataclasses import dataclass, field, asdict
+@dataclass
+class ContextRecord:
+    id: str
+    git_commit_hash: str
+    commit_message: str
+    files_changed: list[str]
+    timestamp: str
+    raw_context: str | None = None
+    context_source: str | None = None
+    session_id: str | None = None
+    branch: str | None = None
+    goal: str | None = None
+    subgoal: str | None = None
+    def to_dict(self) -> dict:
+        return asdict(self)
+    def to_json(self) -> str:
+        return json.dumps(self.to_dict(), indent=2)
+@dataclass
+class ResolutionRecord:
+    id: str
+    owner: str
+    repo: str
+    local_sha: str
+    remote_sha: str
+    session_id: str | None
+    status: str  # "in_progress" | "pending_review" | "accepted" | "completed" | "failed"
+    started_at: str
+    conflicted_files: list[str]
+    repo_path: str | None = None
+    resolved_commit_hash: str | None = None
+    completed_at: str | None = None
+    review_count: int = 0
+    file_snapshots: dict[str, str] = field(default_factory=dict)
+    def to_dict(self) -> dict:
+        return asdict(self)
+    def to_json(self) -> str:
+        return json.dumps(self.to_dict(), indent=2)
+@dataclass
+class RepoConfig:
+    owner: str
+    repo: str
+    remote_url: str
+    remote_name: str = "origin"
+    def to_dict(self) -> dict:
+        return {
+            "version": "0.1.0",
+            "github": {
+                "owner": self.owner,
+                "repo": self.repo,
+                "remote_url": self.remote_url,
+                "remote_name": self.remote_name,
+            },
+        }
+    def to_json(self) -> str:
+        return json.dumps(self.to_dict(), indent=2)
+    @classmethod
+    def from_dict(cls, data: dict) -> RepoConfig:
+        gh = data["github"]
+        return cls(
+            owner=gh["owner"],
+            repo=gh["repo"],
+            remote_url=gh["remote_url"],
+            remote_name=gh.get("remote_name", "origin"),
+        )