PyPI - codeatrium - Versions diffs - 0.1.0__py3-none-any.whl - Mend

codeatrium 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

codeatrium/__init__.py +3 -0
codeatrium/__main__.py +5 -0
codeatrium/cli/__init__.py +295 -0
codeatrium/cli/distill_cmd.py +76 -0
codeatrium/cli/hook_cmd.py +24 -0
codeatrium/cli/index_cmd.py +62 -0
codeatrium/cli/prime_cmd.py +90 -0
codeatrium/cli/search_cmd.py +128 -0
codeatrium/cli/server_cmd.py +122 -0
codeatrium/cli/show_cmd.py +151 -0
codeatrium/cli/status_cmd.py +59 -0
codeatrium/config.py +96 -0
codeatrium/db.py +135 -0
codeatrium/distiller.py +290 -0
codeatrium/embedder.py +168 -0
codeatrium/embedder_server.py +172 -0
codeatrium/hooks.py +156 -0
codeatrium/indexer.py +237 -0
codeatrium/llm.py +148 -0
codeatrium/models.py +53 -0
codeatrium/paths.py +74 -0
codeatrium/py.typed +0 -0
codeatrium/resolver.py +301 -0
codeatrium/search.py +273 -0
codeatrium-0.1.0.dist-info/METADATA +180 -0
codeatrium-0.1.0.dist-info/RECORD +29 -0
codeatrium-0.1.0.dist-info/WHEEL +4 -0
codeatrium-0.1.0.dist-info/entry_points.txt +2 -0
codeatrium-0.1.0.dist-info/licenses/LICENSE +21 -0

codeatrium/hooks.py ADDED Viewed

@@ -0,0 +1,156 @@
+"""Claude Code hook 設定の JSON 操作ロジック"""
+from __future__ import annotations
+import json
+import shlex
+from pathlib import Path
+from typing import Any, cast
+from codeatrium.config import DEFAULT_DISTILL_BATCH_LIMIT
+from codeatrium.paths import loci_bin
+def install_hooks(batch_limit: int = DEFAULT_DISTILL_BATCH_LIMIT) -> tuple[bool, str]:
+    """Claude Code の Stop / SessionStart フックに loci を登録する。
+    Returns: (changed, message) — 変更の有無と結果メッセージ
+    """
+    settings_path = Path.home() / ".claude" / "settings.json"
+    if settings_path.exists():
+        with settings_path.open() as f:
+            settings: dict[str, Any] = json.load(f)
+    else:
+        settings = {}
+    hooks = settings.setdefault("hooks", {})
+    loci = shlex.quote(loci_bin())
+    index_cmd = f"{loci} index"
+    distill_cmd = f"nohup {loci} distill --limit {int(batch_limit)} > /dev/null 2>&1 &"
+    server_cmd = f"nohup {loci} server start > /dev/null 2>&1 &"
+    prime_cmd = f"{loci} prime"
+    changed = False
+    # --- Stop hook: loci index (async: true) ---
+    stop_hooks: list[dict[str, Any]] = hooks.setdefault("Stop", [])
+    stop_installed = False
+    for entry in stop_hooks:
+        for h in entry.get("hooks", []):
+            if "loci" in h.get("command", "") and "index" in h.get("command", ""):
+                stop_installed = True
+                if h.get("command") != index_cmd or not h.get("async"):
+                    h["command"] = index_cmd
+                    h["async"] = True
+                    h.pop("nohup", None)
+                    changed = True
+    if not stop_installed:
+        stop_hooks.append(
+            {"hooks": [{"type": "command", "command": index_cmd, "async": True}]}
+        )
+        changed = True
+    # --- SessionStart hook: loci server start + loci distill (nohup detach) ---
+    session_start_hooks: list[dict[str, Any]] = hooks.setdefault("SessionStart", [])
+    server_start_installed = False
+    for entry in session_start_hooks:
+        if entry.get("matcher") != "startup|clear|resume|compact":
+            continue
+        for h in entry.get("hooks", []):
+            if "loci" in h.get("command", "") and "server" in h.get("command", ""):
+                server_start_installed = True
+                if h.get("command") != server_cmd:
+                    h["command"] = server_cmd
+                    changed = True
+    session_start_installed = False
+    for entry in session_start_hooks:
+        if entry.get("matcher") != "startup|clear|resume|compact":
+            continue
+        for h in entry.get("hooks", []):
+            if "loci" in h.get("command", "") and "distill" in h.get("command", ""):
+                session_start_installed = True
+                if h.get("command") != distill_cmd:
+                    h["command"] = distill_cmd
+                    changed = True
+    if not server_start_installed or not session_start_installed:
+        target_entry = next(
+            (
+                e
+                for e in session_start_hooks
+                if e.get("matcher") == "startup|clear|resume|compact"
+            ),
+            None,
+        )
+        if target_entry is None:
+            target_entry = {"matcher": "startup|clear|resume|compact", "hooks": []}
+            session_start_hooks.append(target_entry)
+        hooks_list = cast(list[dict[str, Any]], target_entry["hooks"])
+        if not server_start_installed:
+            hooks_list.append({"type": "command", "command": server_cmd})
+            changed = True
+        if not session_start_installed:
+            hooks_list.append({"type": "command", "command": distill_cmd})
+            changed = True
+    # --- SessionStart hook: loci prime (blocking, stdout をコンテキストに注入) ---
+    prime_installed = False
+    for entry in session_start_hooks:
+        if entry.get("matcher") != "startup|clear|resume|compact":
+            continue
+        for h in entry.get("hooks", []):
+            if "loci" in h.get("command", "") and "prime" in h.get("command", ""):
+                prime_installed = True
+                if h.get("command") != prime_cmd:
+                    h["command"] = prime_cmd
+                    changed = True
+    if not prime_installed:
+        target_entry = next(
+            (
+                e
+                for e in session_start_hooks
+                if e.get("matcher") == "startup|clear|resume|compact"
+            ),
+            None,
+        )
+        if target_entry is None:
+            target_entry = {"matcher": "startup|clear|resume|compact", "hooks": []}
+            session_start_hooks.append(target_entry)
+        cast(list[dict[str, Any]], target_entry["hooks"]).append(
+            {"type": "command", "command": prime_cmd}
+        )
+        changed = True
+    # 古い SessionEnd の loci distill エントリがあれば削除
+    if "SessionEnd" in hooks:
+        hooks["SessionEnd"] = [
+            entry
+            for entry in hooks["SessionEnd"]
+            if not any(
+                "loci" in h.get("command", "") and "distill" in h.get("command", "")
+                for h in entry.get("hooks", [])
+            )
+        ]
+        if not hooks["SessionEnd"]:
+            del hooks["SessionEnd"]
+        changed = True
+    if not changed:
+        return False, "Hooks already up to date."
+    settings_path.parent.mkdir(parents=True, exist_ok=True)
+    with settings_path.open("w") as f:
+        json.dump(settings, f, ensure_ascii=False, indent=2)
+    lines = [
+        f"Hooks installed: {settings_path}",
+        f"  Stop (async):       {index_cmd}",
+        f"  SessionStart:       {server_cmd}",
+        f"  SessionStart:       {distill_cmd}",
+        f"  SessionStart:       {prime_cmd}",
+        "  (matcher: startup|clear|resume|compact)",
+    ]
+    return True, "\n".join(lines)

codeatrium/indexer.py ADDED Viewed

@@ -0,0 +1,237 @@
+"""
+.jsonl パース・exchange 分割・DB 保存
+exchange 境界定義:
+  role="user" かつ isMeta!=true かつ実質的なテキスト発話を持つエントリから
+  次の同様エントリの直前まで。ツール呼び出し・中間応答は同一 exchange に含める。
+フィルタルール（SPEC Section 6 / 論文 Section 3.1 準拠）:
+  - 50文字未満の exchange は trivial として除外
+  - isMeta=True の user エントリは exchange 境界としない
+"""
+from __future__ import annotations
+import hashlib
+import json
+from dataclasses import dataclass
+from datetime import UTC, datetime
+from pathlib import Path
+from typing import Any
+@dataclass
+class Exchange:
+    """exchange 単位の verbatim テキスト"""
+    id: str
+    conversation_id: str
+    ply_start: int
+    ply_end: int
+    user_content: str
+    agent_content: str
+# ---- 内部ヘルパー ----
+def _sha256(text: str) -> str:
+    return hashlib.sha256(text.encode()).hexdigest()
+def _extract_text(content: Any) -> str:
+    """message.content から平文テキストを抽出する"""
+    if isinstance(content, str):
+        return content
+    if isinstance(content, list):
+        parts: list[str] = []
+        for block in content:
+            if isinstance(block, dict):
+                if block.get("type") == "text":
+                    parts.append(block.get("text", ""))
+                elif block.get("type") == "thinking":
+                    pass  # thinking block は含めない
+        return "\n".join(p for p in parts if p)
+    return ""
+# コンパクション要約の先頭パターン（CC が自動生成するセッション引き継ぎテキスト）
+_COMPACT_PREFIXES = (
+    "This session is being continued from a previous conversation",
+    "前のセッションからの引き継ぎです",
+    "このセッションは、以前の会話から引き継がれています",
+)
+# loci distill が claude --print に渡す蒸留プロンプトの先頭パターン
+_DISTILL_PROMPT_PREFIX = "この対話のやり取りをJSONに蒸留してください"
+def _is_compaction_summary(text: str) -> bool:
+    """CC のコンパクション要約エントリか判定する"""
+    t = text.strip()
+    return any(t.startswith(prefix) for prefix in _COMPACT_PREFIXES)
+def _is_real_user_entry(entry: dict) -> bool:
+    """実質的なユーザー発話を持つ user エントリか判定する"""
+    if entry.get("type") != "user":
+        return False
+    if entry.get("isMeta", False):
+        return False
+    msg = entry.get("message", {})
+    if not isinstance(msg, dict):
+        return False
+    if msg.get("role") != "user":
+        return False
+    content = msg.get("content", "")
+    text = _extract_text(content)
+    # tool_result のみの場合は実質発話なし
+    if isinstance(content, list) and all(
+        isinstance(b, dict) and b.get("type") == "tool_result"
+        for b in content
+        if isinstance(b, dict)
+    ):
+        return False
+    # コンパクション要約は exchange 境界としない
+    if _is_compaction_summary(text):
+        return False
+    # loci distill の蒸留プロンプトは除外
+    if text.strip().startswith(_DISTILL_PROMPT_PREFIX):
+        return False
+    return bool(text.strip())
+# ---- 公開API ----
+def parse_exchanges(jsonl_path: Path, min_chars: int = 50) -> list[Exchange]:
+    """
+    .jsonl ファイルを読んで exchange リストを返す。
+    trivial（min_chars 文字未満）は除外する。
+    """
+    entries: list[dict] = []
+    with jsonl_path.open(encoding="utf-8") as f:
+        for line in f:
+            line = line.strip()
+            if not line:
+                continue
+            try:
+                entries.append(json.loads(line))
+            except json.JSONDecodeError:
+                continue
+    conversation_id = _sha256(str(jsonl_path))
+    # exchange の境界インデックスを収集
+    boundaries: list[int] = [i for i, e in enumerate(entries) if _is_real_user_entry(e)]
+    exchanges: list[Exchange] = []
+    for b_idx, start in enumerate(boundaries):
+        end = (
+            boundaries[b_idx + 1] - 1
+            if b_idx + 1 < len(boundaries)
+            else len(entries) - 1
+        )
+        user_entry = entries[start]
+        user_text = _extract_text(user_entry["message"]["content"])
+        # assistant の発話を連結（コンパクション要約ゾーンは除外）
+        agent_parts: list[str] = []
+        in_compaction_zone = False
+        for e in entries[start + 1 : end + 1]:
+            if e.get("type") == "user":
+                msg = e.get("message", {})
+                if isinstance(msg, dict):
+                    text = _extract_text(msg.get("content", ""))
+                    in_compaction_zone = _is_compaction_summary(text)
+                continue
+            if e.get("type") == "assistant" and not in_compaction_zone:
+                msg = e.get("message", {})
+                if isinstance(msg, dict):
+                    text = _extract_text(msg.get("content", ""))
+                    if text:
+                        agent_parts.append(text)
+        agent_text = "\n".join(agent_parts)
+        combined = user_text + agent_text
+        # trivial フィルタ
+        if len(combined) < min_chars:
+            continue
+        user_uuid = user_entry.get("uuid", f"{start}")
+        exchange_id = _sha256(f"{conversation_id}:{user_uuid}")
+        exchanges.append(
+            Exchange(
+                id=exchange_id,
+                conversation_id=conversation_id,
+                ply_start=start,
+                ply_end=end,
+                user_content=user_text,
+                agent_content=agent_text,
+            )
+        )
+    return exchanges
+def index_file(jsonl_path: Path, db_path: Path, min_chars: int = 50) -> int:
+    """
+    .jsonl ファイルを DB に登録する。
+    既存 conversation の場合は last_ply_end 以降の新規 exchange のみ追加する。
+    Returns: 新規登録した exchange 数
+    """
+    from codeatrium.db import get_connection
+    conversation_id = _sha256(str(jsonl_path))
+    con = get_connection(db_path)
+    # 既存 conversation の last_ply_end を取得
+    row = con.execute(
+        "SELECT last_ply_end FROM conversations WHERE id = ?", (conversation_id,)
+    ).fetchone()
+    last_ply_end = row["last_ply_end"] if row is not None else -1
+    exchanges = parse_exchanges(jsonl_path, min_chars=min_chars)
+    new_exchanges = [ex for ex in exchanges if ex.ply_start > last_ply_end]
+    if not new_exchanges:
+        con.close()
+        return 0
+    # conversations に登録 or 更新
+    mtime = datetime.fromtimestamp(jsonl_path.stat().st_mtime, tz=UTC).isoformat()
+    if row is None:
+        con.execute(
+            "INSERT INTO conversations (id, source_path, started_at, last_ply_end) "
+            "VALUES (?, ?, ?, ?)",
+            (conversation_id, str(jsonl_path), mtime, new_exchanges[-1].ply_end),
+        )
+    else:
+        con.execute(
+            "UPDATE conversations SET last_ply_end = ? WHERE id = ?",
+            (new_exchanges[-1].ply_end, conversation_id),
+        )
+    for ex in new_exchanges:
+        con.execute(
+            """
+            INSERT OR IGNORE INTO exchanges
+                (id, conversation_id, ply_start, ply_end, user_content, agent_content)
+            VALUES (?, ?, ?, ?, ?, ?)
+            """,
+            (
+                ex.id,
+                ex.conversation_id,
+                ex.ply_start,
+                ex.ply_end,
+                ex.user_content,
+                ex.agent_content,
+            ),
+        )
+    con.commit()
+    con.close()
+    return len(new_exchanges)

codeatrium/llm.py ADDED Viewed

@@ -0,0 +1,148 @@
+"""LLM 呼び出しラッパー: claude --print でプロンプトを実行し JSON を返す"""
+from __future__ import annotations
+import json
+import subprocess
+from pathlib import Path
+from typing import Any
+# ---- プロンプト定数 ----
+DISTILL_PROMPT_TEMPLATE = """\
+この対話のやり取りをJSONに蒸留してください：
+- "exchange_core": 1-2文。何が達成または決定されましたか？\
+やり取り内の特定の用語を使用してください。\
+テキストに存在しない詳細を捏造しないでください。\
+やり取りがほぼ空の場合は、簡潔にその旨を述べてください。
+- "specific_context": テキストからの具体的な詳細1つ：\
+数値、エラーメッセージ、パラメータ名、またはファイルパス。\
+テキストから正確にコピーしてください。プロジェクトパスは使用しないでください。
+- "room_assignments": 1-3個の部屋。各部屋はこのやり取りが属するトピックです。\
+{{"room_type": "<file|concept|workflow>", "room_key": "<識別子>",\
+ "room_label": "<短いラベル>", "relevance": <0.0-1.0>}}\
+部屋は関連するやり取りをグループ化するのに十分具体的なものにしてください\
+（例：「errors」ではなく「retry_timeout」）。
+"files_touched"は含めないでください。
+やり取り (メッセージ {ply_start}-{ply_end}): {messages_text}
+JSONのみで回答してください。"""
+JSON_SCHEMA = json.dumps(
+    {
+        "type": "object",
+        "properties": {
+            "exchange_core": {"type": "string", "maxLength": 300},
+            "specific_context": {"type": "string", "maxLength": 200},
+            "room_assignments": {
+                "type": "array",
+                "maxItems": 3,
+                "items": {
+                    "type": "object",
+                    "properties": {
+                        "room_type": {
+                            "type": "string",
+                            "enum": ["file", "concept", "workflow"],
+                        },
+                        "room_key": {"type": "string"},
+                        "room_label": {"type": "string"},
+                        "relevance": {
+                            "type": "number",
+                            "minimum": 0,
+                            "maximum": 1,
+                        },
+                    },
+                    "required": ["room_type", "room_key", "room_label", "relevance"],
+                },
+            },
+        },
+        "required": ["exchange_core", "specific_context", "room_assignments"],
+    }
+)
+# ---- 副作用制御 ----
+def _session_dir() -> Path:
+    """claude -p が書き出す JSONL のディレクトリ"""
+    return Path.home() / ".claude" / "projects"
+def _snapshot_jsonl(session_dir: Path) -> set[Path]:
+    if not session_dir.exists():
+        return set()
+    return set(session_dir.rglob("*.jsonl"))
+def _cleanup_side_effect_jsonls(session_dir: Path, before: set[Path]) -> None:
+    """claude -p 呼び出しで生成された JSONL を削除する"""
+    if not session_dir.exists():
+        return
+    after = set(session_dir.rglob("*.jsonl"))
+    for p in after - before:
+        try:
+            p.unlink()
+        except OSError:
+            pass
+# ---- LLM 呼び出し ----
+def call_claude(prompt: str, model: str | None = None) -> dict[str, Any]:
+    """claude -p でプロンプトを実行し JSON を返す（テストでモック対象）"""
+    import shutil
+    from codeatrium.config import DEFAULT_DISTILL_MODEL
+    cli = shutil.which("claude")
+    if cli is None:
+        raise RuntimeError("claude CLI not found in PATH")
+    session_dir = _session_dir()
+    before = _snapshot_jsonl(session_dir)
+    try:
+        result = subprocess.run(
+            [
+                cli,
+                "--print",
+                "--model",
+                model or DEFAULT_DISTILL_MODEL,
+                "--output-format",
+                "json",
+                "--json-schema",
+                JSON_SCHEMA,
+                "--no-session-persistence",
+                "--setting-sources",
+                "",
+            ],
+            input=prompt,
+            capture_output=True,
+            text=True,
+            timeout=300,
+        )
+    finally:
+        _cleanup_side_effect_jsonls(session_dir, before)
+    if result.returncode != 0:
+        raise RuntimeError(f"claude -p failed: {result.stderr}")
+    outer = json.loads(result.stdout)
+    if isinstance(outer, dict):
+        if "structured_output" in outer and outer["structured_output"]:
+            return outer["structured_output"]
+        inner = outer.get("result", "")
+        if isinstance(inner, str) and inner.strip():
+            text = inner.strip()
+            if text.startswith("```"):
+                lines = text.splitlines()
+                text = "\n".join(
+                    lines[1:-1] if lines[-1].strip() == "```" else lines[1:]
+                )
+            return json.loads(text.strip())
+    return outer

codeatrium/models.py ADDED Viewed

@@ -0,0 +1,53 @@
+"""共有データクラス定義"""
+from __future__ import annotations
+from dataclasses import dataclass, field
+from typing import Any
+@dataclass
+class PalaceObject:
+    """蒸留済み palace object"""
+    exchange_core: str
+    specific_context: str
+    room_assignments: list[dict[str, Any]]
+    files_touched: list[str] = field(default_factory=list)
+@dataclass
+class BM25Result:
+    """BM25 verbatim 検索結果"""
+    exchange_id: str
+    user_content: str
+    agent_content: str
+    bm25_score: float
+@dataclass
+class HNSWPalaceResult:
+    """HNSW distilled 検索結果"""
+    exchange_id: str
+    user_content: str
+    agent_content: str
+    exchange_core: str
+    specific_context: str
+    distance: float
+@dataclass
+class FusedResult:
+    """RRF 融合検索結果（SPEC 準拠の出力フォーマット）"""
+    exchange_id: str
+    user_content: str
+    agent_content: str
+    score: float
+    exchange_core: str | None = None
+    specific_context: str | None = None
+    verbatim_ref: str | None = None
+    rooms: list[dict[str, Any]] = field(default_factory=list)
+    symbols: list[dict[str, Any]] = field(default_factory=list)

codeatrium/paths.py ADDED Viewed

@@ -0,0 +1,74 @@
+"""パス解決ヘルパー: プロジェクトルート・DB パス・Claude セッションログパスの解決"""
+from __future__ import annotations
+import subprocess
+from pathlib import Path
+CLAUDE_PROJECTS_DIR = Path.home() / ".claude" / "projects"
+CODEATRIUM_DIR = ".codeatrium"
+DB_NAME = "memory.db"
+def git_root() -> Path | None:
+    """git rev-parse --show-toplevel でリポジトリルートを返す。git 外なら None"""
+    try:
+        result = subprocess.run(
+            ["git", "rev-parse", "--show-toplevel"],
+            capture_output=True,
+            text=True,
+            check=True,
+        )
+        return Path(result.stdout.strip())
+    except subprocess.CalledProcessError:
+        return None
+def find_project_root() -> Path:
+    """.codeatrium/ を探してプロジェクトルートを返す。
+    検索順: cwd → 親ディレクトリ（git root まで）
+    git root を超えて遡らないことでプロジェクト外の DB を拾わない。
+    """
+    cwd = Path.cwd()
+    root = git_root()
+    candidates = [cwd, *cwd.parents]
+    for p in candidates:
+        if (p / CODEATRIUM_DIR).exists():
+            return p
+        if root and p == root:
+            break
+    return root or cwd
+def db_path(project_root: Path) -> Path:
+    return project_root / CODEATRIUM_DIR / DB_NAME
+def resolve_claude_projects_path(project_root: Path) -> Path | None:
+    """project_root から対応する ~/.claude/projects/<hash>/ を解決する。
+    Claude Code はパスの "/" を "-" に変換したディレクトリ名を使う。
+    """
+    if not CLAUDE_PROJECTS_DIR.exists():
+        return None
+    candidates = [project_root, Path.cwd()]
+    for base in candidates:
+        dir_name = str(base).replace("/", "-")
+        candidate = CLAUDE_PROJECTS_DIR / dir_name
+        if candidate.exists() and any(candidate.rglob("*.jsonl")):
+            return candidate
+    return None
+def sock_path(project_root: Path) -> Path:
+    return db_path(project_root).parent / "embedder.sock"
+def server_pid_path(project_root: Path) -> Path:
+    return db_path(project_root).parent / "embedder.pid"
+def loci_bin() -> str:
+    """sys.executable と同じ venv の bin/loci のフルパスを返す（PATH 非依存）。"""
+    import sys
+    return str(Path(sys.executable).parent / "loci")

codeatrium/py.typed ADDED Viewed

File without changes