PyPI - gora-cli - Versions diffs - 0.1.2__py3-none-any.whl - Mend

gora-cli 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

gora/__init__.py +5 -0
gora/__main__.py +13 -0
gora/cli.py +459 -0
gora/go_tui/go.mod +42 -0
gora/go_tui/go.sum +106 -0
gora/go_tui/main.go +2634 -0
gora/go_tui/main_test.go +626 -0
gora/parsers.py +626 -0
gora/store.py +935 -0
gora/tui.py +115 -0
gora_cli-0.1.2.dist-info/METADATA +282 -0
gora_cli-0.1.2.dist-info/RECORD +14 -0
gora_cli-0.1.2.dist-info/WHEEL +4 -0
gora_cli-0.1.2.dist-info/entry_points.txt +2 -0

gora/parsers.py ADDED Viewed

@@ -0,0 +1,626 @@
+from __future__ import annotations
+from dataclasses import dataclass
+import hashlib
+import json
+from pathlib import Path
+import re
+from typing import Any, Iterable
+PROVIDERS = ("codex", "claude", "pi")
+CONTEXT_INJECTION_PREFIXES = (
+    "# AGENTS.md instructions",
+    "AGENTS.md instructions",
+)
+TITLE_NOISE_PREFIXES = (
+    *CONTEXT_INJECTION_PREFIXES,
+    "<turn_aborted>",
+    "<user_action>",
+    "<environment_context>",
+)
+IMAGE_TAG_PATTERN = re.compile(r"(?s)</?image\b[^>]*(>|$)")
+DIRECT_SECRET_PATTERNS: tuple[re.Pattern[str], ...] = (
+    re.compile(r"\bsk-proj-[A-Za-z0-9_-]{12,}\b"),
+    re.compile(r"\bsk-ant-[A-Za-z0-9_-]{12,}\b"),
+    re.compile(r"\bsk-[A-Za-z0-9_-]{20,}\b"),
+    re.compile(r"\bgithub_pat_[A-Za-z0-9_]{20,}\b"),
+    re.compile(r"\bgh[pousr]_[A-Za-z0-9_]{20,}\b"),
+    re.compile(r"\bAKIA[0-9A-Z]{16}\b"),
+)
+KEY_VALUE_SECRET_PATTERN = re.compile(
+    r"(?i)\b([A-Z0-9_]*(?:TOKEN|SECRET|API_KEY|PASSWORD|PASS|PWD|AUTH)[A-Z0-9_]*\s*[:=]\s*)([^\s]+)"
+)
+AUTHORIZATION_BEARER_PATTERN = re.compile(r"(?i)\b(authorization\s*:\s*bearer\s+)([^\s]+)")
+DIRECT_SECRET_MARKERS = ("sk-", "github_pat_", "ghp_", "gho_", "ghu_", "ghs_", "ghr_", "AKIA")
+KEY_VALUE_SECRET_MARKERS = (
+    "TOKEN=",
+    "TOKEN =",
+    "TOKEN:",
+    "TOKEN :",
+    "SECRET=",
+    "SECRET =",
+    "SECRET:",
+    "SECRET :",
+    "API_KEY=",
+    "API_KEY =",
+    "API_KEY:",
+    "API_KEY :",
+    "PASSWORD=",
+    "PASSWORD =",
+    "PASSWORD:",
+    "PASSWORD :",
+    "PASS=",
+    "PASS =",
+    "PASS:",
+    "PASS :",
+    "PWD=",
+    "PWD =",
+    "PWD:",
+    "PWD :",
+    "AUTH=",
+    "AUTH =",
+    "AUTH:",
+    "AUTH :",
+    "AUTHORIZATION:",
+    "AUTHORIZATION :",
+)
+THINKING_BLOCK_TYPES = {"thinking", "reasoning"}
+TOOL_CALL_BLOCK_TYPES = {
+    "custom-tool-call",
+    "function-call",
+    "server-tool-call",
+    "tool-call",
+    "tool-use",
+    "toolcall",
+    "web-search-call",
+}
+TOOL_RESULT_BLOCK_TYPES = {
+    "custom-tool-call-output",
+    "function-call-output",
+    "tool-result",
+    "toolresult",
+}
+@dataclass(frozen=True)
+class ChatMessage:
+    ordinal: int
+    role: str
+    text: str
+    timestamp: str | None
+    raw_type: str | None = None
+    model: str | None = None
+    model_provider: str | None = None
+@dataclass(frozen=True)
+class ChatSession:
+    provider: str
+    session_id: str
+    source_path: Path
+    cwd: str | None
+    started_at: str | None
+    updated_at: str | None
+    title: str | None
+    messages: tuple[ChatMessage, ...]
+    source_mtime: float
+    source_size: int
+    parent_session_id: str | None = None
+    thread_source: str | None = None
+    source_label: str | None = None
+def discover_history_files(provider: str, home: Path | None = None) -> list[Path]:
+    home = home or Path.home()
+    roots = {
+        "codex": home / ".codex" / "sessions",
+        "claude": home / ".claude" / "projects",
+        "pi": home / ".pi" / "agent" / "sessions",
+    }
+    if provider not in roots:
+        raise ValueError(f"unsupported provider: {provider}")
+    root = roots[provider]
+    if not root.exists():
+        return []
+    return sorted(root.rglob("*.jsonl"), key=lambda path: path.stat().st_mtime, reverse=True)
+def parse_history_file(provider: str, path: Path, *, include_tool_results: bool = False) -> ChatSession:
+    if provider == "codex":
+        return parse_codex(path, include_tool_results=include_tool_results)
+    if provider == "claude":
+        return parse_claude(path, include_tool_results=include_tool_results)
+    if provider == "pi":
+        return parse_pi(path, include_tool_results=include_tool_results)
+    raise ValueError(f"unsupported provider: {provider}")
+def parse_codex(path: Path, *, include_tool_results: bool = False) -> ChatSession:
+    session_id = path.stem
+    parent_session_id: str | None = None
+    thread_source: str | None = None
+    source_label: str | None = None
+    cwd: str | None = None
+    model_provider: str | None = None
+    current_model: str | None = None
+    started_at: str | None = None
+    updated_at: str | None = None
+    messages: list[ChatMessage] = []
+    for obj in _iter_jsonl(path):
+        timestamp = _string(obj.get("timestamp"))
+        updated_at = _max_time(updated_at, timestamp)
+        if obj.get("type") == "session_meta":
+            payload = _dict(obj.get("payload"))
+            session_id = _string(payload.get("id")) or session_id
+            parent_session_id = _string(payload.get("parent_thread_id")) or parent_session_id
+            thread_source = _string(payload.get("thread_source")) or thread_source
+            source_label = _source_label(payload.get("source")) or source_label
+            cwd = _string(payload.get("cwd")) or cwd
+            model_provider = _string(payload.get("model_provider")) or model_provider
+            started_at = _string(payload.get("timestamp")) or timestamp or started_at
+            updated_at = _max_time(updated_at, started_at)
+            continue
+        if obj.get("type") == "turn_context":
+            payload = _dict(obj.get("payload"))
+            cwd = _string(payload.get("cwd")) or cwd
+            current_model = _codex_turn_model(payload) or current_model
+            continue
+        if obj.get("type") != "response_item":
+            continue
+        payload = _dict(obj.get("payload"))
+        payload_type = _normalized_type(_string(payload.get("type")))
+        if payload_type == "message":
+            role = normalize_role(_string(payload.get("role")) or "unknown")
+            role = _role_from_blocks(role, payload.get("content"))
+            text = content_to_text(payload.get("content"), include_tool_results=include_tool_results)
+        elif payload_type in TOOL_CALL_BLOCK_TYPES:
+            role = "tool-call"
+            text = _format_tool_call(payload)
+        elif payload_type in TOOL_RESULT_BLOCK_TYPES:
+            role = "tool"
+            text = _format_tool_result(payload)
+        else:
+            continue
+        _append_message(
+            messages,
+            role=role,
+            text=text,
+            timestamp=timestamp,
+            raw_type=_string(payload.get("type")),
+            model=_string(payload.get("model")) or current_model,
+            model_provider=_string(payload.get("model_provider")) or model_provider,
+        )
+    return _session(
+        "codex",
+        session_id,
+        path,
+        cwd,
+        started_at,
+        updated_at,
+        messages,
+        parent_session_id=parent_session_id,
+        thread_source=thread_source,
+        source_label=source_label,
+    )
+def parse_claude(path: Path, *, include_tool_results: bool = False) -> ChatSession:
+    session_id = path.stem
+    explicit_session_id = False
+    cwd: str | None = None
+    started_at: str | None = None
+    updated_at: str | None = None
+    messages: list[ChatMessage] = []
+    for obj in _iter_jsonl(path):
+        typ = _string(obj.get("type"))
+        if typ not in {"user", "assistant"}:
+            continue
+        timestamp = _string(obj.get("timestamp"))
+        started_at = started_at or timestamp
+        updated_at = _max_time(updated_at, timestamp)
+        parsed_session_id = _string(obj.get("sessionId"))
+        if parsed_session_id:
+            session_id = parsed_session_id
+            explicit_session_id = True
+        cwd = _string(obj.get("cwd")) or cwd
+        message = _dict(obj.get("message"))
+        content = message.get("content")
+        role = normalize_role(_string(message.get("role")) or typ or "unknown")
+        role = _role_from_blocks(role, content)
+        text = content_to_text(content, include_tool_results=include_tool_results)
+        _append_message(
+            messages,
+            role=role,
+            text=text,
+            timestamp=timestamp,
+            raw_type=typ,
+            model=_string(message.get("model")),
+            model_provider=_string(message.get("provider")),
+        )
+    if not explicit_session_id:
+        session_id = _fallback_session_id(path)
+    return _session("claude", session_id, path, cwd, started_at, updated_at, messages)
+def parse_pi(path: Path, *, include_tool_results: bool = False) -> ChatSession:
+    session_id = _session_id_from_pi_filename(path)
+    cwd: str | None = None
+    current_model: str | None = None
+    current_model_provider: str | None = None
+    started_at: str | None = None
+    updated_at: str | None = None
+    messages: list[ChatMessage] = []
+    for obj in _iter_jsonl(path):
+        typ = _string(obj.get("type"))
+        timestamp = _string(obj.get("timestamp"))
+        updated_at = _max_time(updated_at, timestamp)
+        if typ == "session":
+            session_id = _string(obj.get("id")) or session_id
+            cwd = _string(obj.get("cwd")) or cwd
+            started_at = timestamp or started_at
+            continue
+        if typ == "model_change":
+            current_model = _string(obj.get("modelId")) or current_model
+            current_model_provider = _string(obj.get("provider")) or current_model_provider
+            continue
+        if typ != "message":
+            continue
+        message = _dict(obj.get("message"))
+        role = normalize_role(_string(message.get("role")) or "unknown")
+        role = _role_from_blocks(role, message.get("content"))
+        text = content_to_text(message.get("content"), include_tool_results=include_tool_results)
+        _append_message(
+            messages,
+            role=role,
+            text=text,
+            timestamp=timestamp,
+            raw_type=typ,
+            model=_string(message.get("model")) or current_model,
+            model_provider=_string(message.get("provider")) or current_model_provider,
+        )
+    return _session("pi", session_id, path, cwd, started_at, updated_at, messages)
+def content_to_text(value: Any, *, include_tool_results: bool) -> str:
+    parts: list[str] = []
+    _collect_text(value, parts, include_tool_results=include_tool_results)
+    return redact_secrets("\n".join(part.strip() for part in parts if part and part.strip()).strip())
+def redact_secrets(text: str) -> str:
+    redacted = text
+    if any(marker in redacted for marker in DIRECT_SECRET_MARKERS):
+        for pattern in DIRECT_SECRET_PATTERNS:
+            redacted = pattern.sub("<redacted>", redacted)
+    upper = redacted.upper()
+    if any(marker in upper for marker in KEY_VALUE_SECRET_MARKERS):
+        redacted = AUTHORIZATION_BEARER_PATTERN.sub(r"\1<redacted>", redacted)
+        redacted = KEY_VALUE_SECRET_PATTERN.sub(r"\1<redacted>", redacted)
+    return redacted
+def is_context_injection_text(text: str | None) -> bool:
+    if not text:
+        return False
+    stripped = text.lstrip()
+    return any(stripped.startswith(prefix) for prefix in CONTEXT_INJECTION_PREFIXES)
+def is_title_noise_text(text: str | None) -> bool:
+    if not text:
+        return False
+    stripped = text.lstrip()
+    return any(stripped.startswith(prefix) for prefix in TITLE_NOISE_PREFIXES)
+def is_image_reference_text(text: str | None) -> bool:
+    if not text:
+        return False
+    return bool(IMAGE_TAG_PATTERN.search(text))
+def normalize_role(role: str) -> str:
+    normalized = role.strip().lower().replace("_", "-")
+    if normalized in {"toolresult", "tool-result"}:
+        return "tool"
+    if normalized in TOOL_CALL_BLOCK_TYPES:
+        return "tool-call"
+    return normalized or "unknown"
+def _collect_text(value: Any, parts: list[str], *, include_tool_results: bool) -> None:
+    if value is None:
+        return
+    if isinstance(value, str):
+        parts.append(value)
+        return
+    if isinstance(value, list):
+        for item in value:
+            _collect_text(item, parts, include_tool_results=include_tool_results)
+        return
+    if not isinstance(value, dict):
+        return
+    block_type = _normalized_type(_string(value.get("type")))
+    if block_type in THINKING_BLOCK_TYPES:
+        return
+    if block_type in TOOL_CALL_BLOCK_TYPES:
+        parts.append(_format_tool_call(value))
+        return
+    if block_type in {"text", "input-text", "output-text"} and isinstance(value.get("text"), str):
+        parts.append(value["text"])
+        return
+    if block_type in TOOL_RESULT_BLOCK_TYPES:
+        _collect_text(value.get("content"), parts, include_tool_results=include_tool_results)
+        return
+    if block_type in {"image", "input-image"}:
+        mime_type = _string(value.get("mimeType")) or _string(value.get("mime_type"))
+        parts.append(f"[image: {mime_type or 'attachment'}]")
+        return
+    if isinstance(value.get("text"), str):
+        parts.append(value["text"])
+        return
+    if "content" in value:
+        _collect_text(value.get("content"), parts, include_tool_results=include_tool_results)
+def _role_from_blocks(role: str, content: Any) -> str:
+    if not isinstance(content, list) or not content:
+        return role
+    block_types = [
+        _normalized_type(_string(block.get("type")))
+        for block in content
+        if isinstance(block, dict)
+    ]
+    meaningful = [block_type for block_type in block_types if block_type not in THINKING_BLOCK_TYPES]
+    if meaningful and all(block_type in TOOL_RESULT_BLOCK_TYPES for block_type in meaningful):
+        return "tool"
+    if meaningful and all(block_type in TOOL_CALL_BLOCK_TYPES for block_type in meaningful):
+        return "tool-call"
+    return role
+def _append_message(
+    messages: list[ChatMessage],
+    *,
+    role: str,
+    text: str,
+    timestamp: str | None,
+    raw_type: str | None,
+    model: str | None,
+    model_provider: str | None,
+) -> None:
+    if not text:
+        return
+    messages.append(
+        ChatMessage(
+            ordinal=len(messages),
+            role=role,
+            text=text,
+            timestamp=timestamp,
+            raw_type=raw_type,
+            model=model,
+            model_provider=model_provider,
+        )
+    )
+def _format_tool_call(value: dict[str, Any]) -> str:
+    block_type = _normalized_type(_string(value.get("type")))
+    name = (
+        _string(value.get("name"))
+        or _string(value.get("tool_name"))
+        or _string(value.get("toolName"))
+        or _display_type(block_type)
+    )
+    call_id = _string(value.get("call_id")) or _string(value.get("id")) or _string(value.get("tool_use_id"))
+    status = _string(value.get("status"))
+    arguments = _first_present(value, "arguments", "input", "args", "query")
+    lines = [f"Tool call: {name}"]
+    if call_id:
+        lines.append(f"Call ID: {call_id}")
+    if status:
+        lines.append(f"Status: {status}")
+    if arguments is not None and arguments != "":
+        lines.append(f"Arguments:\n{_stringify_value(arguments)}")
+    return "\n".join(lines)
+def _format_tool_result(value: dict[str, Any]) -> str:
+    call_id = _string(value.get("call_id")) or _string(value.get("id")) or _string(value.get("tool_use_id"))
+    output = _first_present(value, "output", "content", "result", "text")
+    output_text = content_to_text(output, include_tool_results=True)
+    if not output_text and output is not None:
+        output_text = redact_secrets(_stringify_value(output))
+    heading = "Tool result"
+    if call_id:
+        heading += f": {call_id}"
+    if not output_text:
+        return heading
+    return f"{heading}\n{output_text}"
+def _first_present(value: dict[str, Any], *keys: str) -> Any:
+    for key in keys:
+        if key in value:
+            return value[key]
+    return None
+def _stringify_value(value: Any) -> str:
+    if isinstance(value, str):
+        return value
+    try:
+        return json.dumps(value, indent=2, sort_keys=True)
+    except (TypeError, ValueError):
+        return str(value)
+def _normalized_type(value: str | None) -> str:
+    if not value:
+        return ""
+    normalized = value.strip().replace("_", "-").lower()
+    if normalized == "toolcall":
+        return "tool-call"
+    if normalized == "toolresult":
+        return "tool-result"
+    return normalized
+def _display_type(value: str) -> str:
+    return value.replace("-", " ") if value else "tool"
+def _codex_turn_model(payload: dict[str, Any]) -> str | None:
+    model = _string(payload.get("model"))
+    if model:
+        return model
+    collaboration_mode = _dict(payload.get("collaboration_mode"))
+    settings = _dict(collaboration_mode.get("settings"))
+    return _string(settings.get("model"))
+def _iter_jsonl(path: Path) -> Iterable[dict[str, Any]]:
+    with path.open("r", encoding="utf-8") as handle:
+        for line in handle:
+            stripped = line.strip()
+            if not stripped:
+                continue
+            try:
+                obj = json.loads(stripped)
+            except json.JSONDecodeError:
+                continue
+            if isinstance(obj, dict):
+                yield obj
+def _session(
+    provider: str,
+    session_id: str,
+    path: Path,
+    cwd: str | None,
+    started_at: str | None,
+    updated_at: str | None,
+    messages: list[ChatMessage],
+    *,
+    parent_session_id: str | None = None,
+    thread_source: str | None = None,
+    source_label: str | None = None,
+) -> ChatSession:
+    stat = path.stat()
+    if not updated_at and messages:
+        updated_at = messages[-1].timestamp
+    return ChatSession(
+        provider=provider,
+        session_id=session_id,
+        source_path=path,
+        cwd=cwd,
+        started_at=started_at,
+        updated_at=updated_at or started_at,
+        title=_title_from_messages(messages),
+        messages=tuple(messages),
+        source_mtime=stat.st_mtime,
+        source_size=stat.st_size,
+        parent_session_id=parent_session_id,
+        thread_source=thread_source,
+        source_label=source_label,
+    )
+def _source_label(value: Any) -> str | None:
+    if isinstance(value, str):
+        return value
+    if isinstance(value, dict):
+        subagent = _string(value.get("subagent"))
+        if subagent:
+            return f"subagent:{subagent}"
+    return None
+def _title_from_messages(messages: list[ChatMessage]) -> str | None:
+    for message in messages:
+        if message.role == "user" and not is_title_noise_text(message.text):
+            title = _title_text(message.text)
+            if title:
+                return _limit_title(title, limit=96)
+    for message in messages:
+        if message.text and not is_title_noise_text(message.text):
+            title = _title_text(message.text)
+            if title:
+                return _limit_title(title, limit=96)
+    return None
+def _title_text(text: str) -> str:
+    without_images = IMAGE_TAG_PATTERN.sub(" ", text)
+    line = " ".join(without_images.split())
+    if not line and IMAGE_TAG_PATTERN.search(text):
+        return "image attachment"
+    return line
+def _limit_title(line: str, *, limit: int) -> str:
+    if len(line) <= limit:
+        return line
+    return line[: limit - 1].rstrip() + "..."
+def _max_time(left: str | None, right: str | None) -> str | None:
+    if not left:
+        return right
+    if not right:
+        return left
+    return max(left, right)
+def _string(value: Any) -> str | None:
+    return value if isinstance(value, str) else None
+def _dict(value: Any) -> dict[str, Any]:
+    return value if isinstance(value, dict) else {}
+def _session_id_from_pi_filename(path: Path) -> str:
+    name = path.stem
+    if "_" in name:
+        return name.rsplit("_", 1)[1]
+    return name
+def _fallback_session_id(path: Path) -> str:
+    digest = hashlib.sha1(str(path).encode("utf-8")).hexdigest()[:10]
+    return f"{path.stem}-{digest}"