codeatrium 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codeatrium/__init__.py +3 -0
- codeatrium/__main__.py +5 -0
- codeatrium/cli/__init__.py +295 -0
- codeatrium/cli/distill_cmd.py +76 -0
- codeatrium/cli/hook_cmd.py +24 -0
- codeatrium/cli/index_cmd.py +62 -0
- codeatrium/cli/prime_cmd.py +90 -0
- codeatrium/cli/search_cmd.py +128 -0
- codeatrium/cli/server_cmd.py +122 -0
- codeatrium/cli/show_cmd.py +151 -0
- codeatrium/cli/status_cmd.py +59 -0
- codeatrium/config.py +96 -0
- codeatrium/db.py +135 -0
- codeatrium/distiller.py +290 -0
- codeatrium/embedder.py +168 -0
- codeatrium/embedder_server.py +172 -0
- codeatrium/hooks.py +156 -0
- codeatrium/indexer.py +237 -0
- codeatrium/llm.py +148 -0
- codeatrium/models.py +53 -0
- codeatrium/paths.py +74 -0
- codeatrium/py.typed +0 -0
- codeatrium/resolver.py +301 -0
- codeatrium/search.py +273 -0
- codeatrium-0.1.0.dist-info/METADATA +180 -0
- codeatrium-0.1.0.dist-info/RECORD +29 -0
- codeatrium-0.1.0.dist-info/WHEEL +4 -0
- codeatrium-0.1.0.dist-info/entry_points.txt +2 -0
- codeatrium-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
"""loci server start/stop/status コマンド"""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import typer
|
|
6
|
+
|
|
7
|
+
server_app = typer.Typer(help="embedding サーバー管理")
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@server_app.command("start")
|
|
11
|
+
def server_start() -> None:
|
|
12
|
+
"""embedding サーバーをバックグラウンドで起動する"""
|
|
13
|
+
import json as _json
|
|
14
|
+
import socket as _socket
|
|
15
|
+
import subprocess
|
|
16
|
+
|
|
17
|
+
from codeatrium.embedder import _loci_python
|
|
18
|
+
from codeatrium.paths import db_path, find_project_root, server_pid_path, sock_path
|
|
19
|
+
|
|
20
|
+
root = find_project_root()
|
|
21
|
+
if not db_path(root).exists():
|
|
22
|
+
typer.echo("Not initialized. Run `loci init` first.", err=True)
|
|
23
|
+
raise typer.Exit(1)
|
|
24
|
+
|
|
25
|
+
sock = sock_path(root)
|
|
26
|
+
|
|
27
|
+
if sock.exists():
|
|
28
|
+
try:
|
|
29
|
+
with _socket.socket(_socket.AF_UNIX, _socket.SOCK_STREAM) as s:
|
|
30
|
+
s.settimeout(1.0)
|
|
31
|
+
s.connect(str(sock))
|
|
32
|
+
s.sendall((_json.dumps({"type": "ping"}) + "\n").encode())
|
|
33
|
+
resp = s.recv(256)
|
|
34
|
+
if b"ok" in resp:
|
|
35
|
+
typer.echo("Server is already running.")
|
|
36
|
+
return
|
|
37
|
+
except Exception:
|
|
38
|
+
sock.unlink(missing_ok=True)
|
|
39
|
+
|
|
40
|
+
pid_path = server_pid_path(root)
|
|
41
|
+
proc = subprocess.Popen(
|
|
42
|
+
[_loci_python(), "-m", "codeatrium.embedder_server", str(sock)],
|
|
43
|
+
stdout=subprocess.DEVNULL,
|
|
44
|
+
stderr=subprocess.DEVNULL,
|
|
45
|
+
start_new_session=True,
|
|
46
|
+
)
|
|
47
|
+
pid_path.write_text(str(proc.pid))
|
|
48
|
+
|
|
49
|
+
import time
|
|
50
|
+
|
|
51
|
+
for i in range(150):
|
|
52
|
+
if sock.exists():
|
|
53
|
+
typer.echo(f"Server started (PID {proc.pid})")
|
|
54
|
+
return
|
|
55
|
+
time.sleep(0.2)
|
|
56
|
+
if i % 25 == 24:
|
|
57
|
+
typer.echo(" Loading model...", err=True)
|
|
58
|
+
|
|
59
|
+
typer.echo("Server failed to start.", err=True)
|
|
60
|
+
raise typer.Exit(1)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
@server_app.command("stop")
|
|
64
|
+
def server_stop() -> None:
|
|
65
|
+
"""embedding サーバーを停止する"""
|
|
66
|
+
import json as _json
|
|
67
|
+
import socket as _socket
|
|
68
|
+
|
|
69
|
+
from codeatrium.paths import find_project_root, server_pid_path, sock_path
|
|
70
|
+
|
|
71
|
+
root = find_project_root()
|
|
72
|
+
sock = sock_path(root)
|
|
73
|
+
|
|
74
|
+
if not sock.exists():
|
|
75
|
+
typer.echo("Server is not running.")
|
|
76
|
+
return
|
|
77
|
+
|
|
78
|
+
try:
|
|
79
|
+
with _socket.socket(_socket.AF_UNIX, _socket.SOCK_STREAM) as s:
|
|
80
|
+
s.settimeout(2.0)
|
|
81
|
+
s.connect(str(sock))
|
|
82
|
+
s.sendall((_json.dumps({"type": "stop"}) + "\n").encode())
|
|
83
|
+
typer.echo("Server stopped.")
|
|
84
|
+
except Exception as e:
|
|
85
|
+
typer.echo(f"Could not connect to server: {e}", err=True)
|
|
86
|
+
sock.unlink(missing_ok=True)
|
|
87
|
+
|
|
88
|
+
server_pid_path(root).unlink(missing_ok=True)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
@server_app.command("status")
|
|
92
|
+
def server_status() -> None:
|
|
93
|
+
"""embedding サーバーの状態を確認する"""
|
|
94
|
+
import json as _json
|
|
95
|
+
import socket as _socket
|
|
96
|
+
|
|
97
|
+
from codeatrium.paths import find_project_root, server_pid_path, sock_path
|
|
98
|
+
|
|
99
|
+
root = find_project_root()
|
|
100
|
+
sock = sock_path(root)
|
|
101
|
+
|
|
102
|
+
if not sock.exists():
|
|
103
|
+
typer.echo("Server: stopped")
|
|
104
|
+
return
|
|
105
|
+
|
|
106
|
+
try:
|
|
107
|
+
with _socket.socket(_socket.AF_UNIX, _socket.SOCK_STREAM) as s:
|
|
108
|
+
s.settimeout(1.0)
|
|
109
|
+
s.connect(str(sock))
|
|
110
|
+
s.sendall((_json.dumps({"type": "ping"}) + "\n").encode())
|
|
111
|
+
resp = s.recv(256)
|
|
112
|
+
if b"ok" in resp:
|
|
113
|
+
pid_path = server_pid_path(root)
|
|
114
|
+
pid = pid_path.read_text().strip() if pid_path.exists() else "unknown"
|
|
115
|
+
typer.echo(f"Server: running (PID {pid})")
|
|
116
|
+
typer.echo(f"Socket: {sock}")
|
|
117
|
+
return
|
|
118
|
+
except Exception:
|
|
119
|
+
pass
|
|
120
|
+
|
|
121
|
+
typer.echo("Server: socket exists but not responding")
|
|
122
|
+
sock.unlink(missing_ok=True)
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
"""loci show / loci dump コマンド"""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from typing import Annotated, Any
|
|
7
|
+
|
|
8
|
+
import typer
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def show(
|
|
12
|
+
ref: Annotated[str, typer.Argument(help="verbatim_ref (path:ply=N)")],
|
|
13
|
+
json_output: Annotated[bool, typer.Option("--json", help="JSON で出力")] = False,
|
|
14
|
+
) -> None:
|
|
15
|
+
"""verbatim_ref から exchange の原文を取得する"""
|
|
16
|
+
from codeatrium.db import get_connection
|
|
17
|
+
from codeatrium.paths import db_path, find_project_root
|
|
18
|
+
|
|
19
|
+
if ":ply=" not in ref:
|
|
20
|
+
typer.echo("Invalid ref format. Expected: <path>:ply=<N>", err=True)
|
|
21
|
+
raise typer.Exit(1)
|
|
22
|
+
path_part, ply_part = ref.rsplit(":ply=", 1)
|
|
23
|
+
try:
|
|
24
|
+
ply = int(ply_part)
|
|
25
|
+
except ValueError:
|
|
26
|
+
typer.echo(f"Invalid ply value: {ply_part}", err=True)
|
|
27
|
+
raise typer.Exit(1)
|
|
28
|
+
|
|
29
|
+
root = find_project_root()
|
|
30
|
+
db = db_path(root)
|
|
31
|
+
if not db.exists():
|
|
32
|
+
typer.echo("Not initialized. Run `loci init` first.", err=True)
|
|
33
|
+
raise typer.Exit(1)
|
|
34
|
+
|
|
35
|
+
con = get_connection(db)
|
|
36
|
+
row = con.execute(
|
|
37
|
+
"""
|
|
38
|
+
SELECT e.user_content, e.agent_content, e.ply_start, e.ply_end
|
|
39
|
+
FROM exchanges e
|
|
40
|
+
JOIN conversations c ON c.id = e.conversation_id
|
|
41
|
+
WHERE c.source_path = ? AND e.ply_start = ?
|
|
42
|
+
""",
|
|
43
|
+
(path_part, ply),
|
|
44
|
+
).fetchone()
|
|
45
|
+
con.close()
|
|
46
|
+
|
|
47
|
+
if row is None:
|
|
48
|
+
typer.echo("Exchange not found.")
|
|
49
|
+
return
|
|
50
|
+
|
|
51
|
+
if json_output:
|
|
52
|
+
typer.echo(
|
|
53
|
+
json.dumps(
|
|
54
|
+
{
|
|
55
|
+
"user_content": row["user_content"],
|
|
56
|
+
"agent_content": row["agent_content"],
|
|
57
|
+
"ply_start": row["ply_start"],
|
|
58
|
+
"ply_end": row["ply_end"],
|
|
59
|
+
},
|
|
60
|
+
ensure_ascii=False,
|
|
61
|
+
indent=2,
|
|
62
|
+
)
|
|
63
|
+
)
|
|
64
|
+
else:
|
|
65
|
+
typer.echo(f"[User] (ply {row['ply_start']}-{row['ply_end']})")
|
|
66
|
+
typer.echo(row["user_content"])
|
|
67
|
+
typer.echo("\n[Agent]")
|
|
68
|
+
typer.echo(row["agent_content"])
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def dump(
|
|
72
|
+
distilled: Annotated[
|
|
73
|
+
bool, typer.Option("--distilled", help="蒸留済み palace objects を出力")
|
|
74
|
+
] = False,
|
|
75
|
+
limit: Annotated[int, typer.Option("--limit", "-n", help="最大件数")] = 1000,
|
|
76
|
+
json_output: Annotated[bool, typer.Option("--json", help="JSON で出力")] = False,
|
|
77
|
+
) -> None:
|
|
78
|
+
"""蒸留済み palace objects を新しい順に出力する(セッション開始時の in-context ロード用)"""
|
|
79
|
+
from codeatrium.db import get_connection
|
|
80
|
+
from codeatrium.paths import db_path, find_project_root
|
|
81
|
+
|
|
82
|
+
if not distilled:
|
|
83
|
+
typer.echo("Use --distilled to dump palace objects.", err=True)
|
|
84
|
+
raise typer.Exit(1)
|
|
85
|
+
|
|
86
|
+
root = find_project_root()
|
|
87
|
+
db = db_path(root)
|
|
88
|
+
if not db.exists():
|
|
89
|
+
typer.echo("Not initialized. Run `loci init` first.", err=True)
|
|
90
|
+
raise typer.Exit(1)
|
|
91
|
+
|
|
92
|
+
con = get_connection(db)
|
|
93
|
+
rows = con.execute(
|
|
94
|
+
"""
|
|
95
|
+
SELECT p.id, p.exchange_id, p.exchange_core, p.specific_context,
|
|
96
|
+
e.distilled_at
|
|
97
|
+
FROM palace_objects p
|
|
98
|
+
JOIN exchanges e ON e.id = p.exchange_id
|
|
99
|
+
ORDER BY e.distilled_at DESC
|
|
100
|
+
LIMIT ?
|
|
101
|
+
""",
|
|
102
|
+
(limit,),
|
|
103
|
+
).fetchall()
|
|
104
|
+
|
|
105
|
+
if not rows:
|
|
106
|
+
typer.echo("No distilled objects found.")
|
|
107
|
+
con.close()
|
|
108
|
+
return
|
|
109
|
+
|
|
110
|
+
palace_ids = [r["id"] for r in rows]
|
|
111
|
+
placeholders = ",".join("?" * len(palace_ids))
|
|
112
|
+
room_rows = con.execute(
|
|
113
|
+
f"""
|
|
114
|
+
SELECT palace_object_id, room_type, room_key, room_label
|
|
115
|
+
FROM rooms
|
|
116
|
+
WHERE palace_object_id IN ({placeholders})
|
|
117
|
+
ORDER BY relevance DESC
|
|
118
|
+
""",
|
|
119
|
+
palace_ids,
|
|
120
|
+
).fetchall()
|
|
121
|
+
con.close()
|
|
122
|
+
|
|
123
|
+
rooms_map: dict[str, list[Any]] = {}
|
|
124
|
+
for r in room_rows:
|
|
125
|
+
rooms_map.setdefault(r["palace_object_id"], []).append(
|
|
126
|
+
{
|
|
127
|
+
"room_type": r["room_type"],
|
|
128
|
+
"room_key": r["room_key"],
|
|
129
|
+
"room_label": r["room_label"],
|
|
130
|
+
}
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
if json_output:
|
|
134
|
+
output = [
|
|
135
|
+
{
|
|
136
|
+
"exchange_core": r["exchange_core"],
|
|
137
|
+
"specific_context": r["specific_context"],
|
|
138
|
+
"rooms": rooms_map.get(r["id"], []),
|
|
139
|
+
"date": (r["distilled_at"] or "")[:10],
|
|
140
|
+
}
|
|
141
|
+
for r in rows
|
|
142
|
+
]
|
|
143
|
+
typer.echo(json.dumps(output, ensure_ascii=False, indent=2))
|
|
144
|
+
else:
|
|
145
|
+
for r in rows:
|
|
146
|
+
date = (r["distilled_at"] or "")[:10]
|
|
147
|
+
typer.echo(f"\n[{date}] {r['exchange_core']}")
|
|
148
|
+
if r["specific_context"]:
|
|
149
|
+
typer.echo(f" {r['specific_context']}")
|
|
150
|
+
for rm in rooms_map.get(r["id"], [])[:2]:
|
|
151
|
+
typer.echo(f" #{rm['room_key']}")
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
"""loci status コマンド"""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from typing import Annotated
|
|
7
|
+
|
|
8
|
+
import typer
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def status(
|
|
12
|
+
json_output: Annotated[bool, typer.Option("--json", help="JSON で出力")] = False,
|
|
13
|
+
) -> None:
|
|
14
|
+
"""インデックス状態(exchange 数・蒸留済み数・DB サイズ)を表示する"""
|
|
15
|
+
from codeatrium.db import get_connection
|
|
16
|
+
from codeatrium.paths import db_path, find_project_root
|
|
17
|
+
|
|
18
|
+
root = find_project_root()
|
|
19
|
+
db = db_path(root)
|
|
20
|
+
|
|
21
|
+
if not db.exists():
|
|
22
|
+
typer.echo("Not initialized. Run `loci init` first.", err=True)
|
|
23
|
+
raise typer.Exit(1)
|
|
24
|
+
|
|
25
|
+
con = get_connection(db)
|
|
26
|
+
total = con.execute("SELECT COUNT(*) FROM exchanges").fetchone()[0]
|
|
27
|
+
distilled = con.execute(
|
|
28
|
+
"SELECT COUNT(*) FROM exchanges WHERE distilled_at IS NOT NULL"
|
|
29
|
+
).fetchone()[0]
|
|
30
|
+
palace_count = con.execute("SELECT COUNT(*) FROM palace_objects").fetchone()[0]
|
|
31
|
+
symbol_count = con.execute("SELECT COUNT(*) FROM symbols").fetchone()[0]
|
|
32
|
+
con.close()
|
|
33
|
+
|
|
34
|
+
db_size_bytes = db.stat().st_size
|
|
35
|
+
db_size_kb = db_size_bytes / 1024
|
|
36
|
+
|
|
37
|
+
if json_output:
|
|
38
|
+
typer.echo(
|
|
39
|
+
json.dumps(
|
|
40
|
+
{
|
|
41
|
+
"db_path": str(db),
|
|
42
|
+
"exchanges": total,
|
|
43
|
+
"distilled": distilled,
|
|
44
|
+
"undistilled": total - distilled,
|
|
45
|
+
"palace_objects": palace_count,
|
|
46
|
+
"symbols": symbol_count,
|
|
47
|
+
"db_size_kb": round(db_size_kb, 1),
|
|
48
|
+
},
|
|
49
|
+
ensure_ascii=False,
|
|
50
|
+
indent=2,
|
|
51
|
+
)
|
|
52
|
+
)
|
|
53
|
+
else:
|
|
54
|
+
typer.echo(f"DB: {db} ({db_size_kb:.1f} KB)")
|
|
55
|
+
typer.echo(
|
|
56
|
+
f"Exchanges : {total} total, {distilled} distilled, {total - distilled} pending"
|
|
57
|
+
)
|
|
58
|
+
typer.echo(f"Palace : {palace_count}")
|
|
59
|
+
typer.echo(f"Symbols : {symbol_count}")
|
codeatrium/config.py
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
"""設定ファイルの読み込み — .codeatrium/config.toml"""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import tomllib
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
CONFIG_FILENAME = "config.toml"
|
|
11
|
+
|
|
12
|
+
# ---- デフォルト値 ----
|
|
13
|
+
|
|
14
|
+
DEFAULT_DISTILL_MODEL = "claude-haiku-4-5-20251001"
|
|
15
|
+
DEFAULT_DISTILL_BATCH_LIMIT = 20
|
|
16
|
+
DEFAULT_INDEX_MIN_CHARS = 50
|
|
17
|
+
DEFAULT_DISTILL_MIN_CHARS = 100
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@dataclass
|
|
21
|
+
class Config:
|
|
22
|
+
"""ユーザー設定"""
|
|
23
|
+
|
|
24
|
+
distill_model: str = DEFAULT_DISTILL_MODEL
|
|
25
|
+
distill_batch_limit: int = DEFAULT_DISTILL_BATCH_LIMIT
|
|
26
|
+
index_min_chars: int = DEFAULT_INDEX_MIN_CHARS
|
|
27
|
+
distill_min_chars: int = DEFAULT_DISTILL_MIN_CHARS
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def load_config(project_root: Path) -> Config:
|
|
31
|
+
"""project_root/.codeatrium/config.toml を読んで Config を返す。
|
|
32
|
+
ファイルがなければデフォルト。不正な値は警告してデフォルトにフォールバック。
|
|
33
|
+
"""
|
|
34
|
+
config_path = project_root / ".codeatrium" / CONFIG_FILENAME
|
|
35
|
+
if not config_path.exists():
|
|
36
|
+
return Config()
|
|
37
|
+
|
|
38
|
+
try:
|
|
39
|
+
with config_path.open("rb") as f:
|
|
40
|
+
data = tomllib.load(f)
|
|
41
|
+
except Exception as e:
|
|
42
|
+
import sys
|
|
43
|
+
|
|
44
|
+
print(f"Warning: failed to parse {config_path}: {e}", file=sys.stderr)
|
|
45
|
+
return Config()
|
|
46
|
+
|
|
47
|
+
distill: dict[str, Any] = data.get("distill", {})
|
|
48
|
+
|
|
49
|
+
model = distill.get("model", DEFAULT_DISTILL_MODEL)
|
|
50
|
+
if not isinstance(model, str) or not model.strip():
|
|
51
|
+
import sys
|
|
52
|
+
|
|
53
|
+
print(
|
|
54
|
+
"Warning: distill.model must be a non-empty string, using default.",
|
|
55
|
+
file=sys.stderr,
|
|
56
|
+
)
|
|
57
|
+
model = DEFAULT_DISTILL_MODEL
|
|
58
|
+
|
|
59
|
+
batch_limit = distill.get("batch_limit", DEFAULT_DISTILL_BATCH_LIMIT)
|
|
60
|
+
if not isinstance(batch_limit, int) or batch_limit < 1:
|
|
61
|
+
import sys
|
|
62
|
+
|
|
63
|
+
print(
|
|
64
|
+
"Warning: distill.batch_limit must be a positive integer, using default.",
|
|
65
|
+
file=sys.stderr,
|
|
66
|
+
)
|
|
67
|
+
batch_limit = DEFAULT_DISTILL_BATCH_LIMIT
|
|
68
|
+
|
|
69
|
+
index: dict[str, Any] = data.get("index", {})
|
|
70
|
+
|
|
71
|
+
min_chars = index.get("min_chars", DEFAULT_INDEX_MIN_CHARS)
|
|
72
|
+
if not isinstance(min_chars, int) or min_chars < 1:
|
|
73
|
+
import sys
|
|
74
|
+
|
|
75
|
+
print(
|
|
76
|
+
"Warning: index.min_chars must be a positive integer, using default.",
|
|
77
|
+
file=sys.stderr,
|
|
78
|
+
)
|
|
79
|
+
min_chars = DEFAULT_INDEX_MIN_CHARS
|
|
80
|
+
|
|
81
|
+
distill_min_chars = distill.get("min_chars", DEFAULT_DISTILL_MIN_CHARS)
|
|
82
|
+
if not isinstance(distill_min_chars, int) or distill_min_chars < 1:
|
|
83
|
+
import sys
|
|
84
|
+
|
|
85
|
+
print(
|
|
86
|
+
"Warning: distill.min_chars must be a positive integer, using default.",
|
|
87
|
+
file=sys.stderr,
|
|
88
|
+
)
|
|
89
|
+
distill_min_chars = DEFAULT_DISTILL_MIN_CHARS
|
|
90
|
+
|
|
91
|
+
return Config(
|
|
92
|
+
distill_model=model,
|
|
93
|
+
distill_batch_limit=batch_limit,
|
|
94
|
+
index_min_chars=min_chars,
|
|
95
|
+
distill_min_chars=distill_min_chars,
|
|
96
|
+
)
|
codeatrium/db.py
ADDED
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
"""
|
|
2
|
+
SQLite DB の初期化・スキーマ定義・接続管理
|
|
3
|
+
|
|
4
|
+
テーブル構成:
|
|
5
|
+
conversations - .jsonl ファイル単位の会話記録(重複排除キャッシュ)
|
|
6
|
+
exchanges - exchange 単位の verbatim テキスト
|
|
7
|
+
exchanges_fts - exchanges の FTS5 仮想テーブル(BM25 verbatim 検索用)
|
|
8
|
+
vec_exchanges - sqlite-vec HNSW インデックス(Phase1 verbatim ベクトル検索用)
|
|
9
|
+
palace_objects - 蒸留済み palace object(exchange_core + specific_context)
|
|
10
|
+
rooms - palace object の room_assignments
|
|
11
|
+
vec_palace - sqlite-vec HNSW インデックス(Phase2 distilled ベクトル検索用)
|
|
12
|
+
symbols - tree-sitter 解決済みシンボル(Phase3 コード逆引き用)
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
import sqlite3
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
|
|
18
|
+
import sqlite_vec
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def get_connection(db_path: Path) -> sqlite3.Connection:
|
|
22
|
+
"""sqlite-vec 拡張をロードした接続を返す"""
|
|
23
|
+
con = sqlite3.connect(db_path)
|
|
24
|
+
con.enable_load_extension(True)
|
|
25
|
+
sqlite_vec.load(con)
|
|
26
|
+
con.enable_load_extension(False)
|
|
27
|
+
con.row_factory = sqlite3.Row
|
|
28
|
+
return con
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def init_db(db_path: Path) -> None:
|
|
32
|
+
"""DB を初期化してスキーマを作成する(冪等)"""
|
|
33
|
+
db_path.parent.mkdir(parents=True, exist_ok=True)
|
|
34
|
+
con = get_connection(db_path)
|
|
35
|
+
|
|
36
|
+
con.executescript("""
|
|
37
|
+
CREATE TABLE IF NOT EXISTS conversations (
|
|
38
|
+
id TEXT PRIMARY KEY, -- sha256(source_path)
|
|
39
|
+
source_path TEXT NOT NULL UNIQUE,
|
|
40
|
+
started_at TIMESTAMP,
|
|
41
|
+
last_ply_end INT NOT NULL DEFAULT -1 -- 最後にインデックスした ply_end(差分用)
|
|
42
|
+
);
|
|
43
|
+
|
|
44
|
+
CREATE TABLE IF NOT EXISTS exchanges (
|
|
45
|
+
id TEXT PRIMARY KEY, -- sha256(conversation_id + ":" + user_uuid)
|
|
46
|
+
conversation_id TEXT NOT NULL,
|
|
47
|
+
ply_start INT NOT NULL,
|
|
48
|
+
ply_end INT NOT NULL,
|
|
49
|
+
user_content TEXT NOT NULL,
|
|
50
|
+
agent_content TEXT NOT NULL,
|
|
51
|
+
distilled_at TIMESTAMP -- NULL = 未蒸留
|
|
52
|
+
);
|
|
53
|
+
|
|
54
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS exchanges_fts USING fts5(
|
|
55
|
+
user_content,
|
|
56
|
+
agent_content,
|
|
57
|
+
content=exchanges,
|
|
58
|
+
content_rowid=rowid
|
|
59
|
+
);
|
|
60
|
+
|
|
61
|
+
CREATE TRIGGER IF NOT EXISTS exchanges_ai
|
|
62
|
+
AFTER INSERT ON exchanges BEGIN
|
|
63
|
+
INSERT INTO exchanges_fts(rowid, user_content, agent_content)
|
|
64
|
+
VALUES (new.rowid, new.user_content, new.agent_content);
|
|
65
|
+
END;
|
|
66
|
+
|
|
67
|
+
CREATE TRIGGER IF NOT EXISTS exchanges_ad
|
|
68
|
+
AFTER DELETE ON exchanges BEGIN
|
|
69
|
+
INSERT INTO exchanges_fts(exchanges_fts, rowid, user_content, agent_content)
|
|
70
|
+
VALUES ('delete', old.rowid, old.user_content, old.agent_content);
|
|
71
|
+
END;
|
|
72
|
+
|
|
73
|
+
CREATE TRIGGER IF NOT EXISTS exchanges_au
|
|
74
|
+
AFTER UPDATE ON exchanges BEGIN
|
|
75
|
+
INSERT INTO exchanges_fts(exchanges_fts, rowid, user_content, agent_content)
|
|
76
|
+
VALUES ('delete', old.rowid, old.user_content, old.agent_content);
|
|
77
|
+
INSERT INTO exchanges_fts(rowid, user_content, agent_content)
|
|
78
|
+
VALUES (new.rowid, new.user_content, new.agent_content);
|
|
79
|
+
END;
|
|
80
|
+
|
|
81
|
+
CREATE TABLE IF NOT EXISTS palace_objects (
|
|
82
|
+
id TEXT PRIMARY KEY,
|
|
83
|
+
exchange_id TEXT NOT NULL,
|
|
84
|
+
exchange_core TEXT NOT NULL,
|
|
85
|
+
specific_context TEXT NOT NULL,
|
|
86
|
+
distill_text TEXT NOT NULL -- exchange_core + newline + specific_context
|
|
87
|
+
);
|
|
88
|
+
|
|
89
|
+
CREATE TABLE IF NOT EXISTS rooms (
|
|
90
|
+
id TEXT PRIMARY KEY,
|
|
91
|
+
palace_object_id TEXT NOT NULL,
|
|
92
|
+
room_type TEXT NOT NULL, -- "file" / "concept" / "workflow"
|
|
93
|
+
room_key TEXT NOT NULL,
|
|
94
|
+
room_label TEXT NOT NULL,
|
|
95
|
+
relevance REAL NOT NULL,
|
|
96
|
+
dedup_hash TEXT NOT NULL -- hash(room_type, room_key)
|
|
97
|
+
);
|
|
98
|
+
|
|
99
|
+
CREATE TABLE IF NOT EXISTS symbols (
|
|
100
|
+
id TEXT PRIMARY KEY, -- sha256(symbol_name + file_path)
|
|
101
|
+
palace_object_id TEXT NOT NULL,
|
|
102
|
+
symbol_name TEXT NOT NULL, -- "AuthMiddleware.validate"
|
|
103
|
+
symbol_kind TEXT NOT NULL, -- "function" / "class" / "method"
|
|
104
|
+
file_path TEXT NOT NULL,
|
|
105
|
+
signature TEXT NOT NULL,
|
|
106
|
+
line INT NOT NULL,
|
|
107
|
+
dedup_hash TEXT NOT NULL -- sha256(symbol_name + file_path)
|
|
108
|
+
);
|
|
109
|
+
""")
|
|
110
|
+
|
|
111
|
+
# マイグレーション: last_ply_end カラムが無い既存 DB に追加
|
|
112
|
+
try:
|
|
113
|
+
con.execute("ALTER TABLE conversations ADD COLUMN last_ply_end INT NOT NULL DEFAULT -1")
|
|
114
|
+
con.commit()
|
|
115
|
+
except Exception:
|
|
116
|
+
pass # カラムが既に存在する場合は無視
|
|
117
|
+
|
|
118
|
+
# sqlite-vec の仮想テーブル(HNSW, Phase1 verbatim embedding 用)
|
|
119
|
+
con.execute("""
|
|
120
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS vec_exchanges USING vec0(
|
|
121
|
+
exchange_id TEXT PRIMARY KEY,
|
|
122
|
+
embedding FLOAT[384]
|
|
123
|
+
)
|
|
124
|
+
""")
|
|
125
|
+
|
|
126
|
+
# sqlite-vec の仮想テーブル(HNSW, Phase2 distilled embedding 用)
|
|
127
|
+
con.execute("""
|
|
128
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS vec_palace USING vec0(
|
|
129
|
+
palace_id TEXT PRIMARY KEY,
|
|
130
|
+
embedding FLOAT[384]
|
|
131
|
+
)
|
|
132
|
+
""")
|
|
133
|
+
|
|
134
|
+
con.commit()
|
|
135
|
+
con.close()
|