codeatrium 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,122 @@
1
+ """loci server start/stop/status コマンド"""
2
+
3
+ from __future__ import annotations
4
+
5
+ import typer
6
+
7
+ server_app = typer.Typer(help="embedding サーバー管理")
8
+
9
+
10
+ @server_app.command("start")
11
+ def server_start() -> None:
12
+ """embedding サーバーをバックグラウンドで起動する"""
13
+ import json as _json
14
+ import socket as _socket
15
+ import subprocess
16
+
17
+ from codeatrium.embedder import _loci_python
18
+ from codeatrium.paths import db_path, find_project_root, server_pid_path, sock_path
19
+
20
+ root = find_project_root()
21
+ if not db_path(root).exists():
22
+ typer.echo("Not initialized. Run `loci init` first.", err=True)
23
+ raise typer.Exit(1)
24
+
25
+ sock = sock_path(root)
26
+
27
+ if sock.exists():
28
+ try:
29
+ with _socket.socket(_socket.AF_UNIX, _socket.SOCK_STREAM) as s:
30
+ s.settimeout(1.0)
31
+ s.connect(str(sock))
32
+ s.sendall((_json.dumps({"type": "ping"}) + "\n").encode())
33
+ resp = s.recv(256)
34
+ if b"ok" in resp:
35
+ typer.echo("Server is already running.")
36
+ return
37
+ except Exception:
38
+ sock.unlink(missing_ok=True)
39
+
40
+ pid_path = server_pid_path(root)
41
+ proc = subprocess.Popen(
42
+ [_loci_python(), "-m", "codeatrium.embedder_server", str(sock)],
43
+ stdout=subprocess.DEVNULL,
44
+ stderr=subprocess.DEVNULL,
45
+ start_new_session=True,
46
+ )
47
+ pid_path.write_text(str(proc.pid))
48
+
49
+ import time
50
+
51
+ for i in range(150):
52
+ if sock.exists():
53
+ typer.echo(f"Server started (PID {proc.pid})")
54
+ return
55
+ time.sleep(0.2)
56
+ if i % 25 == 24:
57
+ typer.echo(" Loading model...", err=True)
58
+
59
+ typer.echo("Server failed to start.", err=True)
60
+ raise typer.Exit(1)
61
+
62
+
63
+ @server_app.command("stop")
64
+ def server_stop() -> None:
65
+ """embedding サーバーを停止する"""
66
+ import json as _json
67
+ import socket as _socket
68
+
69
+ from codeatrium.paths import find_project_root, server_pid_path, sock_path
70
+
71
+ root = find_project_root()
72
+ sock = sock_path(root)
73
+
74
+ if not sock.exists():
75
+ typer.echo("Server is not running.")
76
+ return
77
+
78
+ try:
79
+ with _socket.socket(_socket.AF_UNIX, _socket.SOCK_STREAM) as s:
80
+ s.settimeout(2.0)
81
+ s.connect(str(sock))
82
+ s.sendall((_json.dumps({"type": "stop"}) + "\n").encode())
83
+ typer.echo("Server stopped.")
84
+ except Exception as e:
85
+ typer.echo(f"Could not connect to server: {e}", err=True)
86
+ sock.unlink(missing_ok=True)
87
+
88
+ server_pid_path(root).unlink(missing_ok=True)
89
+
90
+
91
+ @server_app.command("status")
92
+ def server_status() -> None:
93
+ """embedding サーバーの状態を確認する"""
94
+ import json as _json
95
+ import socket as _socket
96
+
97
+ from codeatrium.paths import find_project_root, server_pid_path, sock_path
98
+
99
+ root = find_project_root()
100
+ sock = sock_path(root)
101
+
102
+ if not sock.exists():
103
+ typer.echo("Server: stopped")
104
+ return
105
+
106
+ try:
107
+ with _socket.socket(_socket.AF_UNIX, _socket.SOCK_STREAM) as s:
108
+ s.settimeout(1.0)
109
+ s.connect(str(sock))
110
+ s.sendall((_json.dumps({"type": "ping"}) + "\n").encode())
111
+ resp = s.recv(256)
112
+ if b"ok" in resp:
113
+ pid_path = server_pid_path(root)
114
+ pid = pid_path.read_text().strip() if pid_path.exists() else "unknown"
115
+ typer.echo(f"Server: running (PID {pid})")
116
+ typer.echo(f"Socket: {sock}")
117
+ return
118
+ except Exception:
119
+ pass
120
+
121
+ typer.echo("Server: socket exists but not responding")
122
+ sock.unlink(missing_ok=True)
@@ -0,0 +1,151 @@
1
+ """loci show / loci dump コマンド"""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from typing import Annotated, Any
7
+
8
+ import typer
9
+
10
+
11
+ def show(
12
+ ref: Annotated[str, typer.Argument(help="verbatim_ref (path:ply=N)")],
13
+ json_output: Annotated[bool, typer.Option("--json", help="JSON で出力")] = False,
14
+ ) -> None:
15
+ """verbatim_ref から exchange の原文を取得する"""
16
+ from codeatrium.db import get_connection
17
+ from codeatrium.paths import db_path, find_project_root
18
+
19
+ if ":ply=" not in ref:
20
+ typer.echo("Invalid ref format. Expected: <path>:ply=<N>", err=True)
21
+ raise typer.Exit(1)
22
+ path_part, ply_part = ref.rsplit(":ply=", 1)
23
+ try:
24
+ ply = int(ply_part)
25
+ except ValueError:
26
+ typer.echo(f"Invalid ply value: {ply_part}", err=True)
27
+ raise typer.Exit(1)
28
+
29
+ root = find_project_root()
30
+ db = db_path(root)
31
+ if not db.exists():
32
+ typer.echo("Not initialized. Run `loci init` first.", err=True)
33
+ raise typer.Exit(1)
34
+
35
+ con = get_connection(db)
36
+ row = con.execute(
37
+ """
38
+ SELECT e.user_content, e.agent_content, e.ply_start, e.ply_end
39
+ FROM exchanges e
40
+ JOIN conversations c ON c.id = e.conversation_id
41
+ WHERE c.source_path = ? AND e.ply_start = ?
42
+ """,
43
+ (path_part, ply),
44
+ ).fetchone()
45
+ con.close()
46
+
47
+ if row is None:
48
+ typer.echo("Exchange not found.")
49
+ return
50
+
51
+ if json_output:
52
+ typer.echo(
53
+ json.dumps(
54
+ {
55
+ "user_content": row["user_content"],
56
+ "agent_content": row["agent_content"],
57
+ "ply_start": row["ply_start"],
58
+ "ply_end": row["ply_end"],
59
+ },
60
+ ensure_ascii=False,
61
+ indent=2,
62
+ )
63
+ )
64
+ else:
65
+ typer.echo(f"[User] (ply {row['ply_start']}-{row['ply_end']})")
66
+ typer.echo(row["user_content"])
67
+ typer.echo("\n[Agent]")
68
+ typer.echo(row["agent_content"])
69
+
70
+
71
+ def dump(
72
+ distilled: Annotated[
73
+ bool, typer.Option("--distilled", help="蒸留済み palace objects を出力")
74
+ ] = False,
75
+ limit: Annotated[int, typer.Option("--limit", "-n", help="最大件数")] = 1000,
76
+ json_output: Annotated[bool, typer.Option("--json", help="JSON で出力")] = False,
77
+ ) -> None:
78
+ """蒸留済み palace objects を新しい順に出力する(セッション開始時の in-context ロード用)"""
79
+ from codeatrium.db import get_connection
80
+ from codeatrium.paths import db_path, find_project_root
81
+
82
+ if not distilled:
83
+ typer.echo("Use --distilled to dump palace objects.", err=True)
84
+ raise typer.Exit(1)
85
+
86
+ root = find_project_root()
87
+ db = db_path(root)
88
+ if not db.exists():
89
+ typer.echo("Not initialized. Run `loci init` first.", err=True)
90
+ raise typer.Exit(1)
91
+
92
+ con = get_connection(db)
93
+ rows = con.execute(
94
+ """
95
+ SELECT p.id, p.exchange_id, p.exchange_core, p.specific_context,
96
+ e.distilled_at
97
+ FROM palace_objects p
98
+ JOIN exchanges e ON e.id = p.exchange_id
99
+ ORDER BY e.distilled_at DESC
100
+ LIMIT ?
101
+ """,
102
+ (limit,),
103
+ ).fetchall()
104
+
105
+ if not rows:
106
+ typer.echo("No distilled objects found.")
107
+ con.close()
108
+ return
109
+
110
+ palace_ids = [r["id"] for r in rows]
111
+ placeholders = ",".join("?" * len(palace_ids))
112
+ room_rows = con.execute(
113
+ f"""
114
+ SELECT palace_object_id, room_type, room_key, room_label
115
+ FROM rooms
116
+ WHERE palace_object_id IN ({placeholders})
117
+ ORDER BY relevance DESC
118
+ """,
119
+ palace_ids,
120
+ ).fetchall()
121
+ con.close()
122
+
123
+ rooms_map: dict[str, list[Any]] = {}
124
+ for r in room_rows:
125
+ rooms_map.setdefault(r["palace_object_id"], []).append(
126
+ {
127
+ "room_type": r["room_type"],
128
+ "room_key": r["room_key"],
129
+ "room_label": r["room_label"],
130
+ }
131
+ )
132
+
133
+ if json_output:
134
+ output = [
135
+ {
136
+ "exchange_core": r["exchange_core"],
137
+ "specific_context": r["specific_context"],
138
+ "rooms": rooms_map.get(r["id"], []),
139
+ "date": (r["distilled_at"] or "")[:10],
140
+ }
141
+ for r in rows
142
+ ]
143
+ typer.echo(json.dumps(output, ensure_ascii=False, indent=2))
144
+ else:
145
+ for r in rows:
146
+ date = (r["distilled_at"] or "")[:10]
147
+ typer.echo(f"\n[{date}] {r['exchange_core']}")
148
+ if r["specific_context"]:
149
+ typer.echo(f" {r['specific_context']}")
150
+ for rm in rooms_map.get(r["id"], [])[:2]:
151
+ typer.echo(f" #{rm['room_key']}")
@@ -0,0 +1,59 @@
1
+ """loci status コマンド"""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from typing import Annotated
7
+
8
+ import typer
9
+
10
+
11
+ def status(
12
+ json_output: Annotated[bool, typer.Option("--json", help="JSON で出力")] = False,
13
+ ) -> None:
14
+ """インデックス状態(exchange 数・蒸留済み数・DB サイズ)を表示する"""
15
+ from codeatrium.db import get_connection
16
+ from codeatrium.paths import db_path, find_project_root
17
+
18
+ root = find_project_root()
19
+ db = db_path(root)
20
+
21
+ if not db.exists():
22
+ typer.echo("Not initialized. Run `loci init` first.", err=True)
23
+ raise typer.Exit(1)
24
+
25
+ con = get_connection(db)
26
+ total = con.execute("SELECT COUNT(*) FROM exchanges").fetchone()[0]
27
+ distilled = con.execute(
28
+ "SELECT COUNT(*) FROM exchanges WHERE distilled_at IS NOT NULL"
29
+ ).fetchone()[0]
30
+ palace_count = con.execute("SELECT COUNT(*) FROM palace_objects").fetchone()[0]
31
+ symbol_count = con.execute("SELECT COUNT(*) FROM symbols").fetchone()[0]
32
+ con.close()
33
+
34
+ db_size_bytes = db.stat().st_size
35
+ db_size_kb = db_size_bytes / 1024
36
+
37
+ if json_output:
38
+ typer.echo(
39
+ json.dumps(
40
+ {
41
+ "db_path": str(db),
42
+ "exchanges": total,
43
+ "distilled": distilled,
44
+ "undistilled": total - distilled,
45
+ "palace_objects": palace_count,
46
+ "symbols": symbol_count,
47
+ "db_size_kb": round(db_size_kb, 1),
48
+ },
49
+ ensure_ascii=False,
50
+ indent=2,
51
+ )
52
+ )
53
+ else:
54
+ typer.echo(f"DB: {db} ({db_size_kb:.1f} KB)")
55
+ typer.echo(
56
+ f"Exchanges : {total} total, {distilled} distilled, {total - distilled} pending"
57
+ )
58
+ typer.echo(f"Palace : {palace_count}")
59
+ typer.echo(f"Symbols : {symbol_count}")
codeatrium/config.py ADDED
@@ -0,0 +1,96 @@
1
+ """設定ファイルの読み込み — .codeatrium/config.toml"""
2
+
3
+ from __future__ import annotations
4
+
5
+ import tomllib
6
+ from dataclasses import dataclass
7
+ from pathlib import Path
8
+ from typing import Any
9
+
10
+ CONFIG_FILENAME = "config.toml"
11
+
12
+ # ---- デフォルト値 ----
13
+
14
+ DEFAULT_DISTILL_MODEL = "claude-haiku-4-5-20251001"
15
+ DEFAULT_DISTILL_BATCH_LIMIT = 20
16
+ DEFAULT_INDEX_MIN_CHARS = 50
17
+ DEFAULT_DISTILL_MIN_CHARS = 100
18
+
19
+
20
+ @dataclass
21
+ class Config:
22
+ """ユーザー設定"""
23
+
24
+ distill_model: str = DEFAULT_DISTILL_MODEL
25
+ distill_batch_limit: int = DEFAULT_DISTILL_BATCH_LIMIT
26
+ index_min_chars: int = DEFAULT_INDEX_MIN_CHARS
27
+ distill_min_chars: int = DEFAULT_DISTILL_MIN_CHARS
28
+
29
+
30
+ def load_config(project_root: Path) -> Config:
31
+ """project_root/.codeatrium/config.toml を読んで Config を返す。
32
+ ファイルがなければデフォルト。不正な値は警告してデフォルトにフォールバック。
33
+ """
34
+ config_path = project_root / ".codeatrium" / CONFIG_FILENAME
35
+ if not config_path.exists():
36
+ return Config()
37
+
38
+ try:
39
+ with config_path.open("rb") as f:
40
+ data = tomllib.load(f)
41
+ except Exception as e:
42
+ import sys
43
+
44
+ print(f"Warning: failed to parse {config_path}: {e}", file=sys.stderr)
45
+ return Config()
46
+
47
+ distill: dict[str, Any] = data.get("distill", {})
48
+
49
+ model = distill.get("model", DEFAULT_DISTILL_MODEL)
50
+ if not isinstance(model, str) or not model.strip():
51
+ import sys
52
+
53
+ print(
54
+ "Warning: distill.model must be a non-empty string, using default.",
55
+ file=sys.stderr,
56
+ )
57
+ model = DEFAULT_DISTILL_MODEL
58
+
59
+ batch_limit = distill.get("batch_limit", DEFAULT_DISTILL_BATCH_LIMIT)
60
+ if not isinstance(batch_limit, int) or batch_limit < 1:
61
+ import sys
62
+
63
+ print(
64
+ "Warning: distill.batch_limit must be a positive integer, using default.",
65
+ file=sys.stderr,
66
+ )
67
+ batch_limit = DEFAULT_DISTILL_BATCH_LIMIT
68
+
69
+ index: dict[str, Any] = data.get("index", {})
70
+
71
+ min_chars = index.get("min_chars", DEFAULT_INDEX_MIN_CHARS)
72
+ if not isinstance(min_chars, int) or min_chars < 1:
73
+ import sys
74
+
75
+ print(
76
+ "Warning: index.min_chars must be a positive integer, using default.",
77
+ file=sys.stderr,
78
+ )
79
+ min_chars = DEFAULT_INDEX_MIN_CHARS
80
+
81
+ distill_min_chars = distill.get("min_chars", DEFAULT_DISTILL_MIN_CHARS)
82
+ if not isinstance(distill_min_chars, int) or distill_min_chars < 1:
83
+ import sys
84
+
85
+ print(
86
+ "Warning: distill.min_chars must be a positive integer, using default.",
87
+ file=sys.stderr,
88
+ )
89
+ distill_min_chars = DEFAULT_DISTILL_MIN_CHARS
90
+
91
+ return Config(
92
+ distill_model=model,
93
+ distill_batch_limit=batch_limit,
94
+ index_min_chars=min_chars,
95
+ distill_min_chars=distill_min_chars,
96
+ )
codeatrium/db.py ADDED
@@ -0,0 +1,135 @@
1
+ """
2
+ SQLite DB の初期化・スキーマ定義・接続管理
3
+
4
+ テーブル構成:
5
+ conversations - .jsonl ファイル単位の会話記録(重複排除キャッシュ)
6
+ exchanges - exchange 単位の verbatim テキスト
7
+ exchanges_fts - exchanges の FTS5 仮想テーブル(BM25 verbatim 検索用)
8
+ vec_exchanges - sqlite-vec HNSW インデックス(Phase1 verbatim ベクトル検索用)
9
+ palace_objects - 蒸留済み palace object(exchange_core + specific_context)
10
+ rooms - palace object の room_assignments
11
+ vec_palace - sqlite-vec HNSW インデックス(Phase2 distilled ベクトル検索用)
12
+ symbols - tree-sitter 解決済みシンボル(Phase3 コード逆引き用)
13
+ """
14
+
15
+ import sqlite3
16
+ from pathlib import Path
17
+
18
+ import sqlite_vec
19
+
20
+
21
+ def get_connection(db_path: Path) -> sqlite3.Connection:
22
+ """sqlite-vec 拡張をロードした接続を返す"""
23
+ con = sqlite3.connect(db_path)
24
+ con.enable_load_extension(True)
25
+ sqlite_vec.load(con)
26
+ con.enable_load_extension(False)
27
+ con.row_factory = sqlite3.Row
28
+ return con
29
+
30
+
31
+ def init_db(db_path: Path) -> None:
32
+ """DB を初期化してスキーマを作成する(冪等)"""
33
+ db_path.parent.mkdir(parents=True, exist_ok=True)
34
+ con = get_connection(db_path)
35
+
36
+ con.executescript("""
37
+ CREATE TABLE IF NOT EXISTS conversations (
38
+ id TEXT PRIMARY KEY, -- sha256(source_path)
39
+ source_path TEXT NOT NULL UNIQUE,
40
+ started_at TIMESTAMP,
41
+ last_ply_end INT NOT NULL DEFAULT -1 -- 最後にインデックスした ply_end(差分用)
42
+ );
43
+
44
+ CREATE TABLE IF NOT EXISTS exchanges (
45
+ id TEXT PRIMARY KEY, -- sha256(conversation_id + ":" + user_uuid)
46
+ conversation_id TEXT NOT NULL,
47
+ ply_start INT NOT NULL,
48
+ ply_end INT NOT NULL,
49
+ user_content TEXT NOT NULL,
50
+ agent_content TEXT NOT NULL,
51
+ distilled_at TIMESTAMP -- NULL = 未蒸留
52
+ );
53
+
54
+ CREATE VIRTUAL TABLE IF NOT EXISTS exchanges_fts USING fts5(
55
+ user_content,
56
+ agent_content,
57
+ content=exchanges,
58
+ content_rowid=rowid
59
+ );
60
+
61
+ CREATE TRIGGER IF NOT EXISTS exchanges_ai
62
+ AFTER INSERT ON exchanges BEGIN
63
+ INSERT INTO exchanges_fts(rowid, user_content, agent_content)
64
+ VALUES (new.rowid, new.user_content, new.agent_content);
65
+ END;
66
+
67
+ CREATE TRIGGER IF NOT EXISTS exchanges_ad
68
+ AFTER DELETE ON exchanges BEGIN
69
+ INSERT INTO exchanges_fts(exchanges_fts, rowid, user_content, agent_content)
70
+ VALUES ('delete', old.rowid, old.user_content, old.agent_content);
71
+ END;
72
+
73
+ CREATE TRIGGER IF NOT EXISTS exchanges_au
74
+ AFTER UPDATE ON exchanges BEGIN
75
+ INSERT INTO exchanges_fts(exchanges_fts, rowid, user_content, agent_content)
76
+ VALUES ('delete', old.rowid, old.user_content, old.agent_content);
77
+ INSERT INTO exchanges_fts(rowid, user_content, agent_content)
78
+ VALUES (new.rowid, new.user_content, new.agent_content);
79
+ END;
80
+
81
+ CREATE TABLE IF NOT EXISTS palace_objects (
82
+ id TEXT PRIMARY KEY,
83
+ exchange_id TEXT NOT NULL,
84
+ exchange_core TEXT NOT NULL,
85
+ specific_context TEXT NOT NULL,
86
+ distill_text TEXT NOT NULL -- exchange_core + newline + specific_context
87
+ );
88
+
89
+ CREATE TABLE IF NOT EXISTS rooms (
90
+ id TEXT PRIMARY KEY,
91
+ palace_object_id TEXT NOT NULL,
92
+ room_type TEXT NOT NULL, -- "file" / "concept" / "workflow"
93
+ room_key TEXT NOT NULL,
94
+ room_label TEXT NOT NULL,
95
+ relevance REAL NOT NULL,
96
+ dedup_hash TEXT NOT NULL -- hash(room_type, room_key)
97
+ );
98
+
99
+ CREATE TABLE IF NOT EXISTS symbols (
100
+ id TEXT PRIMARY KEY, -- sha256(symbol_name + file_path)
101
+ palace_object_id TEXT NOT NULL,
102
+ symbol_name TEXT NOT NULL, -- "AuthMiddleware.validate"
103
+ symbol_kind TEXT NOT NULL, -- "function" / "class" / "method"
104
+ file_path TEXT NOT NULL,
105
+ signature TEXT NOT NULL,
106
+ line INT NOT NULL,
107
+ dedup_hash TEXT NOT NULL -- sha256(symbol_name + file_path)
108
+ );
109
+ """)
110
+
111
+ # マイグレーション: last_ply_end カラムが無い既存 DB に追加
112
+ try:
113
+ con.execute("ALTER TABLE conversations ADD COLUMN last_ply_end INT NOT NULL DEFAULT -1")
114
+ con.commit()
115
+ except Exception:
116
+ pass # カラムが既に存在する場合は無視
117
+
118
+ # sqlite-vec の仮想テーブル(HNSW, Phase1 verbatim embedding 用)
119
+ con.execute("""
120
+ CREATE VIRTUAL TABLE IF NOT EXISTS vec_exchanges USING vec0(
121
+ exchange_id TEXT PRIMARY KEY,
122
+ embedding FLOAT[384]
123
+ )
124
+ """)
125
+
126
+ # sqlite-vec の仮想テーブル(HNSW, Phase2 distilled embedding 用)
127
+ con.execute("""
128
+ CREATE VIRTUAL TABLE IF NOT EXISTS vec_palace USING vec0(
129
+ palace_id TEXT PRIMARY KEY,
130
+ embedding FLOAT[384]
131
+ )
132
+ """)
133
+
134
+ con.commit()
135
+ con.close()