codeatrium 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codeatrium-0.1.0/.gitignore +26 -0
- codeatrium-0.1.0/CHANGELOG.md +18 -0
- codeatrium-0.1.0/LICENSE +21 -0
- codeatrium-0.1.0/Makefile +22 -0
- codeatrium-0.1.0/PKG-INFO +180 -0
- codeatrium-0.1.0/README.ja.md +147 -0
- codeatrium-0.1.0/README.md +147 -0
- codeatrium-0.1.0/assets/architecture.svg +79 -0
- codeatrium-0.1.0/assets/interface.en.svg +65 -0
- codeatrium-0.1.0/assets/interface.ja.svg +65 -0
- codeatrium-0.1.0/pyproject.toml +97 -0
- codeatrium-0.1.0/src/codeatrium/__init__.py +3 -0
- codeatrium-0.1.0/src/codeatrium/__main__.py +5 -0
- codeatrium-0.1.0/src/codeatrium/cli/__init__.py +295 -0
- codeatrium-0.1.0/src/codeatrium/cli/distill_cmd.py +76 -0
- codeatrium-0.1.0/src/codeatrium/cli/hook_cmd.py +24 -0
- codeatrium-0.1.0/src/codeatrium/cli/index_cmd.py +62 -0
- codeatrium-0.1.0/src/codeatrium/cli/prime_cmd.py +90 -0
- codeatrium-0.1.0/src/codeatrium/cli/search_cmd.py +128 -0
- codeatrium-0.1.0/src/codeatrium/cli/server_cmd.py +122 -0
- codeatrium-0.1.0/src/codeatrium/cli/show_cmd.py +151 -0
- codeatrium-0.1.0/src/codeatrium/cli/status_cmd.py +59 -0
- codeatrium-0.1.0/src/codeatrium/config.py +96 -0
- codeatrium-0.1.0/src/codeatrium/db.py +135 -0
- codeatrium-0.1.0/src/codeatrium/distiller.py +290 -0
- codeatrium-0.1.0/src/codeatrium/embedder.py +168 -0
- codeatrium-0.1.0/src/codeatrium/embedder_server.py +172 -0
- codeatrium-0.1.0/src/codeatrium/hooks.py +156 -0
- codeatrium-0.1.0/src/codeatrium/indexer.py +237 -0
- codeatrium-0.1.0/src/codeatrium/llm.py +148 -0
- codeatrium-0.1.0/src/codeatrium/models.py +53 -0
- codeatrium-0.1.0/src/codeatrium/paths.py +74 -0
- codeatrium-0.1.0/src/codeatrium/py.typed +0 -0
- codeatrium-0.1.0/src/codeatrium/resolver.py +301 -0
- codeatrium-0.1.0/src/codeatrium/search.py +273 -0
- codeatrium-0.1.0/tests/__init__.py +0 -0
- codeatrium-0.1.0/tests/test_config.py +117 -0
- codeatrium-0.1.0/tests/test_db.py +69 -0
- codeatrium-0.1.0/tests/test_distiller.py +329 -0
- codeatrium-0.1.0/tests/test_embedder.py +47 -0
- codeatrium-0.1.0/tests/test_index_cmd.py +35 -0
- codeatrium-0.1.0/tests/test_indexer.py +303 -0
- codeatrium-0.1.0/tests/test_init.py +361 -0
- codeatrium-0.1.0/tests/test_resolver.py +139 -0
- codeatrium-0.1.0/tests/test_search_phase2.py +290 -0
- codeatrium-0.1.0/tests/test_security.py +184 -0
- codeatrium-0.1.0/tests/test_server_cmd.py +21 -0
- codeatrium-0.1.0/tests/test_show_dump.py +191 -0
- codeatrium-0.1.0/tests/test_status_hook.py +190 -0
- codeatrium-0.1.0/uv.lock +1349 -0
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*.egg-info/
|
|
5
|
+
dist/
|
|
6
|
+
build/
|
|
7
|
+
|
|
8
|
+
# Virtual environment
|
|
9
|
+
.venv/
|
|
10
|
+
|
|
11
|
+
# Test & lint caches
|
|
12
|
+
.pytest_cache/
|
|
13
|
+
.ruff_cache/
|
|
14
|
+
|
|
15
|
+
# Tool data (local only)
|
|
16
|
+
.codeatrium/
|
|
17
|
+
.logosyncx/
|
|
18
|
+
|
|
19
|
+
# Claude Code local settings
|
|
20
|
+
.claude/
|
|
21
|
+
|
|
22
|
+
# Internal docs (local only)
|
|
23
|
+
docs/internal/
|
|
24
|
+
|
|
25
|
+
# 論文資料(ローカルのみ)
|
|
26
|
+
mem/
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
## [0.1.0] - 2026-03-31
|
|
4
|
+
|
|
5
|
+
### Added
|
|
6
|
+
|
|
7
|
+
- `loci init` — initialize `.codeatrium/` in project root
|
|
8
|
+
- `loci index` — parse `.jsonl` session logs, split into exchanges, embed with multilingual-e5-small
|
|
9
|
+
- `loci distill` — distill exchanges via `claude --print` into palace objects (exchange_core, specific_context, room_assignments)
|
|
10
|
+
- `loci search` — cross-layer RRF fusion search (BM25 verbatim + HNSW distilled)
|
|
11
|
+
- `loci context` — reverse lookup: code symbol → past conversations
|
|
12
|
+
- `loci show` — fetch verbatim exchange by ref
|
|
13
|
+
- `loci status` — show index state
|
|
14
|
+
- `loci server start/stop/status` — Unix socket embedding server for <0.2s search
|
|
15
|
+
- `loci hook install` — register Claude Code SessionStart/Stop hooks
|
|
16
|
+
- `config.toml` support for distill model and batch limit
|
|
17
|
+
- tree-sitter symbol resolution (Python, TypeScript, Go)
|
|
18
|
+
- Bilingual support (Japanese + English) via multilingual-e5-small
|
codeatrium-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 senna-lang
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
VENV := .venv/bin
|
|
2
|
+
|
|
3
|
+
.PHONY: test lint fmt typecheck check hooks
|
|
4
|
+
|
|
5
|
+
test:
|
|
6
|
+
$(VENV)/pytest tests/ -v
|
|
7
|
+
|
|
8
|
+
lint:
|
|
9
|
+
$(VENV)/ruff check src/ tests/
|
|
10
|
+
|
|
11
|
+
fmt:
|
|
12
|
+
$(VENV)/ruff format src/ tests/
|
|
13
|
+
|
|
14
|
+
typecheck:
|
|
15
|
+
$(VENV)/pyright src/
|
|
16
|
+
|
|
17
|
+
check: lint typecheck test
|
|
18
|
+
|
|
19
|
+
hooks:
|
|
20
|
+
@echo '#!/bin/sh\nmake check' > .git/hooks/pre-commit
|
|
21
|
+
@chmod +x .git/hooks/pre-commit
|
|
22
|
+
@echo "pre-commit hook installed: runs make check before every commit"
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: codeatrium
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Memory palace for AI coding agents — index sessions, recall code context in <0.2s
|
|
5
|
+
Project-URL: Homepage, https://github.com/senna-lang/codeatrium
|
|
6
|
+
Project-URL: Repository, https://github.com/senna-lang/codeatrium
|
|
7
|
+
Project-URL: Issues, https://github.com/senna-lang/codeatrium/issues
|
|
8
|
+
Author-email: senna-lang <senna-lang@users.noreply.github.com>
|
|
9
|
+
License: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: ai,claude,cli,code-context,coding-agent,conversation-history,developer-tools,memory,semantic-search
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Environment :: Console
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
20
|
+
Requires-Python: >=3.11
|
|
21
|
+
Requires-Dist: sentence-transformers>=3.0.0
|
|
22
|
+
Requires-Dist: sqlite-vec>=0.1.0
|
|
23
|
+
Requires-Dist: tree-sitter-go<0.24.0,>=0.23.0
|
|
24
|
+
Requires-Dist: tree-sitter-python<0.24.0,>=0.23.0
|
|
25
|
+
Requires-Dist: tree-sitter-typescript<0.24.0,>=0.23.0
|
|
26
|
+
Requires-Dist: tree-sitter<0.24.0,>=0.23.0
|
|
27
|
+
Requires-Dist: typer[all]>=0.12.0
|
|
28
|
+
Provides-Extra: dev
|
|
29
|
+
Requires-Dist: pyright>=1.1.0; extra == 'dev'
|
|
30
|
+
Requires-Dist: pytest>=8.0.0; extra == 'dev'
|
|
31
|
+
Requires-Dist: ruff>=0.4.0; extra == 'dev'
|
|
32
|
+
Description-Content-Type: text/markdown
|
|
33
|
+
|
|
34
|
+
# Codeatrium
|
|
35
|
+
|
|
36
|
+
[](https://github.com/senna-lang/Codeatrium/actions/workflows/ci.yml)
|
|
37
|
+
[](https://pypi.org/project/codeatrium/)
|
|
38
|
+
[](LICENSE)
|
|
39
|
+
|
|
40
|
+
A **memory palace** for AI coding agents.
|
|
41
|
+
|
|
42
|
+
English · [日本語](README.ja.md)
|
|
43
|
+
|
|
44
|
+
Codeatrium distills past conversations into *palace objects* and stores them in a searchable index, giving agents long-term memory. Past decisions, implementations, and code locations can be recalled in under 0.2 seconds.
|
|
45
|
+
|
|
46
|
+
The CLI command `loci` (from [Method of Loci](https://en.wikipedia.org/wiki/Method_of_loci)) is designed to be **called by the agent itself** — running `loci search "..." --json` from within a prompt.
|
|
47
|
+
|
|
48
|
+
The architecture extends the conversational memory model from [arXiv:2603.13017](https://arxiv.org/abs/2603.13017) for coding agents.
|
|
49
|
+
|
|
50
|
+
> **Note:** Currently [Claude Code](https://docs.anthropic.com/en/docs/claude-code) only. Session log format (`.jsonl`) and distillation (`claude --print`) depend on Claude Code.
|
|
51
|
+
|
|
52
|
+
## Simple Interface
|
|
53
|
+
|
|
54
|
+
Agents use two core commands:
|
|
55
|
+
|
|
56
|
+
- **Semantic search** — `loci search "query"` retrieves past conversations by semantic similarity
|
|
57
|
+
- **Reverse lookup from code** — `loci context --symbol "name"` recalls past conversations about a specific code symbol
|
|
58
|
+
- tree-sitter symbol resolution (Python / TypeScript / Go) lets agents understand implementation intent before editing
|
|
59
|
+
|
|
60
|
+
<img src="assets/interface.en.svg" alt="Simple Interface" width="800">
|
|
61
|
+
|
|
62
|
+
## How It Works
|
|
63
|
+
|
|
64
|
+
<img src="assets/architecture.svg" alt="Codeatrium Architecture" width="800">
|
|
65
|
+
|
|
66
|
+
1. **Index** — Splits agent session logs into exchanges (user utterance + agent response pairs) and indexes them with FTS5 for keyword search
|
|
67
|
+
2. **Distill** — An LLM (`claude --print`, default `claude-haiku-4-5`) summarizes each exchange into a palace object: `exchange_core` (what was done), `specific_context` (concrete details), `room_assignments` (topic tags). tree-sitter resolves touched files to symbol level (function/class/method + file + line + signature)
|
|
68
|
+
3. **Search** — Cross-layer search fusing BM25 on verbatim text with HNSW on distilled embeddings via RRF
|
|
69
|
+
|
|
70
|
+
Raw conversations are not embedded — only the condensed distilled text is embedded with `multilingual-e5-small` (384-dim), balancing semantic search quality with embedding cost. The embedding model runs as a **Unix socket server**, keeping search latency **under 0.2 seconds** after the first load.
|
|
71
|
+
|
|
72
|
+
## Installation
|
|
73
|
+
|
|
74
|
+
```bash
|
|
75
|
+
pipx install codeatrium
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
Requires Python 3.11+.
|
|
79
|
+
|
|
80
|
+
## Quick Start
|
|
81
|
+
|
|
82
|
+
```bash
|
|
83
|
+
# Initialize in project root
|
|
84
|
+
loci init
|
|
85
|
+
|
|
86
|
+
# Install hooks for automatic indexing
|
|
87
|
+
loci hook install
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
When running `loci init`, if past session logs are detected, you'll be prompted with:
|
|
91
|
+
|
|
92
|
+
> [!IMPORTANT]
|
|
93
|
+
> When adopting this tool mid-project, a large number of exchanges may already exist. Distilling all of them will consume significant `claude --print` (Haiku) tokens. We recommend starting with `Skip all` or `Distill last 50`.
|
|
94
|
+
|
|
95
|
+
1. **Min chars threshold** — Minimum character filter for exchanges (default: 50). This controls how many exchanges become distillation candidates. Higher values exclude short conversations and reduce token usage; lower values include nearly everything.
|
|
96
|
+
2. **Handling existing exchanges** — Choose how much past history to distill:
|
|
97
|
+
- Skip all (no past session distillation)
|
|
98
|
+
- Distill last 50 (recent history only)
|
|
99
|
+
- Distill all (everything — high token cost)
|
|
100
|
+
- Custom (specify a number)
|
|
101
|
+
3. **Run distillation now?** — Choose No to defer to the next session start
|
|
102
|
+
|
|
103
|
+
## Agent Instructions
|
|
104
|
+
|
|
105
|
+
Agent instructions are injected automatically — no manual setup required:
|
|
106
|
+
|
|
107
|
+
- **`loci init`** — Inserts a marker section (`<!-- BEGIN CODEATRIUM -->...<!-- END CODEATRIUM -->`) into `CLAUDE.md`
|
|
108
|
+
- **`loci prime`** — Dynamically injects command usage into the context window at every session start via SessionStart Hook
|
|
109
|
+
|
|
110
|
+
## CLI Commands
|
|
111
|
+
|
|
112
|
+
| Command | Description |
|
|
113
|
+
|---------|-------------|
|
|
114
|
+
| `loci init` | Initialize `.codeatrium/` in project root |
|
|
115
|
+
| `loci index` | Index new session logs |
|
|
116
|
+
| `loci distill [--limit N]` | Distill undistilled exchanges via LLM |
|
|
117
|
+
| `loci search "query" --json` | Semantic search (agent-facing) |
|
|
118
|
+
| `loci context --symbol "name" --json` | Code symbol → past conversations |
|
|
119
|
+
| `loci show "<ref>" --json` | Retrieve verbatim conversation |
|
|
120
|
+
| `loci status` | Show index state |
|
|
121
|
+
| `loci server start/stop/status` | Embedding server management |
|
|
122
|
+
| `loci hook install` | Register hooks in Claude Code settings |
|
|
123
|
+
|
|
124
|
+
## Automation (Claude Code Hooks)
|
|
125
|
+
|
|
126
|
+
After `loci hook install`, everything runs automatically:
|
|
127
|
+
|
|
128
|
+
| Hook | Trigger | Command |
|
|
129
|
+
|------|---------|---------|
|
|
130
|
+
| Stop (async) | After every turn | `loci index` |
|
|
131
|
+
| SessionStart | startup / `/clear` / `/resume` / `compact` | `loci prime` |
|
|
132
|
+
| SessionStart | startup / `/clear` / `/resume` / `compact` | `loci server start` |
|
|
133
|
+
| SessionStart | startup / `/clear` / `/resume` / `compact` | `loci distill` |
|
|
134
|
+
|
|
135
|
+
- **`loci index`** — Runs asynchronously after every turn. Indexes only new exchanges, so it's fast even mid-session
|
|
136
|
+
- **`loci distill`** — Distills undistilled exchanges at session start via `claude --print`. Calls Haiku through the user's Claude Code (default: `claude-haiku-4-5`)
|
|
137
|
+
- **`loci server start`** — Keeps the embedding model (~500MB) resident in memory for sub-0.2s search latency
|
|
138
|
+
|
|
139
|
+
## Search Output
|
|
140
|
+
|
|
141
|
+
```json
|
|
142
|
+
[
|
|
143
|
+
{
|
|
144
|
+
"exchange_core": "Added connection pool with pool_size=5",
|
|
145
|
+
"specific_context": "pool_size=5, max_overflow=10",
|
|
146
|
+
"rooms": [
|
|
147
|
+
{ "room_type": "concept", "room_key": "db-pool", "room_label": "DB connection pooling" }
|
|
148
|
+
],
|
|
149
|
+
"symbols": [
|
|
150
|
+
{ "name": "create_pool", "file": "src/db.py", "line": 42, "signature": "def create_pool(...)" }
|
|
151
|
+
],
|
|
152
|
+
"verbatim_ref": "~/.claude/projects/.../session.jsonl:ply=42"
|
|
153
|
+
}
|
|
154
|
+
]
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
## Configuration
|
|
158
|
+
|
|
159
|
+
`.codeatrium/config.toml` (generated by `loci init`):
|
|
160
|
+
|
|
161
|
+
```toml
|
|
162
|
+
[distill]
|
|
163
|
+
model = "claude-haiku-4-5-20251001" # Model for distillation (default)
|
|
164
|
+
batch_limit = 20 # Max distillations per hook run
|
|
165
|
+
|
|
166
|
+
[index]
|
|
167
|
+
min_chars = 50 # Skip exchanges shorter than this
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
## Acknowledgments
|
|
171
|
+
|
|
172
|
+
The palace object model, room-based topic grouping, and BM25+HNSW fusion search are based on:
|
|
173
|
+
|
|
174
|
+
> *Structured Distillation for Personalized Agent Memory*
|
|
175
|
+
> (arXiv:2603.13017)
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
## License
|
|
179
|
+
|
|
180
|
+
MIT
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
# Codeatrium
|
|
2
|
+
|
|
3
|
+
[](https://github.com/senna-lang/Codeatrium/actions/workflows/ci.yml)
|
|
4
|
+
[](https://pypi.org/project/codeatrium/)
|
|
5
|
+
[](LICENSE)
|
|
6
|
+
|
|
7
|
+
[English](README.md) · 日本語
|
|
8
|
+
|
|
9
|
+
AI コーディングエージェントに**記憶の宮殿**を。
|
|
10
|
+
|
|
11
|
+
Codeatrium は過去の会話を *palace object* に蒸留し、検索可能なインデックスに保存することで、エージェントに長期記憶を与えます。過去の意思決定・実装・コード位置を 0.2 秒で想起できます。
|
|
12
|
+
|
|
13
|
+
CLI コマンド `loci`([Method of Loci=記憶の宮殿](https://ja.wikipedia.org/wiki/%E5%A0%B4%E6%89%80%E6%B3%95)に由来)は**エージェント自身が呼び出す**ことを想定しています。`loci search "..." --json` をプロンプト内から実行します。
|
|
14
|
+
|
|
15
|
+
アーキテクチャは [arXiv:2603.13017](https://arxiv.org/abs/2603.13017) の会話記憶モデルを、コーディングエージェント向けに拡張したものです。
|
|
16
|
+
|
|
17
|
+
> **Note:** 現在は [Claude Code](https://docs.anthropic.com/en/docs/claude-code) 専用です。セッションログ形式(`.jsonl`)と蒸留(`claude --print`)が Claude Code に依存しています。
|
|
18
|
+
|
|
19
|
+
## シンプルなインターフェース
|
|
20
|
+
|
|
21
|
+
エージェントが使用するコマンドは基本2つ。
|
|
22
|
+
|
|
23
|
+
- **セマンティック検索** — `loci search "クエリ"` でセマンティック類似度から過去の会話を検索
|
|
24
|
+
- **コードから逆引き** — `loci context --symbol "名前"` で特定のコードシンボルに関する過去の会話を想起
|
|
25
|
+
- tree-sitter(Python / TypeScript / Go)のシンボル解決により、エージェントは実装意図・背景を把握できる
|
|
26
|
+
|
|
27
|
+
<img src="assets/interface.ja.svg" alt="Simple Interface" width="800">
|
|
28
|
+
|
|
29
|
+
## 仕組み
|
|
30
|
+
|
|
31
|
+
<img src="assets/architecture.svg" alt="Codeatrium Architecture" width="800">
|
|
32
|
+
|
|
33
|
+
1. **Index** — エージェントのセッションログを exchange(ユーザー発話 + エージェント応答のペア)に分割し、FTS5 でキーワード検索可能にする
|
|
34
|
+
2. **Distill** — LLM(`claude --print`、デフォルトは `claude-haiku-4-5`)が各 exchange を palace object に要約: `exchange_core`(何をしたか)、`specific_context`(具体的な詳細)、`room_assignments`(トピックタグ)。tree-sitter で触れたファイルをシンボルレベル(関数・クラス・メソッド + ファイル + 行 + シグネチャ)に解決
|
|
35
|
+
3. **Search** — 会話原文の BM25 と蒸留済み埋め込みの HNSW を RRF で融合するクロスレイヤー検索
|
|
36
|
+
|
|
37
|
+
会話原文は埋め込まず、蒸留で濃縮されたテキストのみを `multilingual-e5-small`(384次元)で埋め込むことで、セマンティック検索の精度と埋め込みコストを両立しています。埋め込みモデルは **Unix ソケットサーバー**で常駐し、初回以降の検索は **0.2 秒以内**で返ります。
|
|
38
|
+
|
|
39
|
+
## インストール
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
pipx install codeatrium
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
Python 3.11 以上が必要です。
|
|
46
|
+
|
|
47
|
+
## クイックスタート
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
# プロジェクトルートで初期化
|
|
51
|
+
loci init
|
|
52
|
+
|
|
53
|
+
# 自動インデックスのフックをインストール
|
|
54
|
+
loci hook install
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
`loci init` を実行すると、過去のセッションログが検出された場合に以下の質問が表示されます:
|
|
58
|
+
|
|
59
|
+
> [!IMPORTANT]
|
|
60
|
+
> 途中からこのツールを導入する場合、すでに大量の exchange が蓄積されています。全件蒸留すると `claude --print` (Haiku) のトークンが大量に消費されるため、まずは `Skip all` か `Distill last 50` で始めることを推奨します。
|
|
61
|
+
|
|
62
|
+
1. **Min chars threshold** — exchange の最小文字数フィルタ(デフォルト: 50文字)。この閾値で蒸留の母数(対象 exchange 数)が決まります。値を大きくすると短い会話が除外され蒸留対象が減り、小さくするとほぼ全ての会話が蒸留対象になりトークン消費が増えます。
|
|
63
|
+
2. **既存 exchange の扱い** — 過去のセッションをどこまで蒸留するか選択:
|
|
64
|
+
- Skip all(過去のセッション蒸留なし)
|
|
65
|
+
- Distill last 50(直近の履歴のみ)
|
|
66
|
+
- Distill all(全件、トークン消費あり)
|
|
67
|
+
- Custom(件数を指定)
|
|
68
|
+
3. **蒸留を今すぐ実行するか** — No を選ぶと次回セッション開始時に自動実行されます
|
|
69
|
+
|
|
70
|
+
## エージェント向けインストラクション
|
|
71
|
+
|
|
72
|
+
エージェントへのインストラクションは自動挿入されるので手動で書く必要はありません:
|
|
73
|
+
|
|
74
|
+
- **`loci init`** — `CLAUDE.md` にマーカー付きセクション(`<!-- BEGIN CODEATRIUM -->...<!-- END CODEATRIUM -->`)を挿入。
|
|
75
|
+
- **`loci prime`** — SessionStart Hook で毎セッション開始時にコマンドの使い方をコンテキストウィンドウに動的注入
|
|
76
|
+
|
|
77
|
+
## CLI コマンド
|
|
78
|
+
|
|
79
|
+
| コマンド | 説明 |
|
|
80
|
+
|---------|------|
|
|
81
|
+
| `loci init` | プロジェクトルートに `.codeatrium/` を初期化 |
|
|
82
|
+
| `loci index` | 新しいセッションログをインデックス |
|
|
83
|
+
| `loci distill [--limit N]` | 未蒸留の exchange を LLM で蒸留 |
|
|
84
|
+
| `loci search "クエリ" --json` | セマンティック検索(エージェント向け) |
|
|
85
|
+
| `loci context --symbol "名前" --json` | コードシンボル → 過去の会話 |
|
|
86
|
+
| `loci show "<ref>" --json` | 会話原文を取得 |
|
|
87
|
+
| `loci status` | インデックス状態を表示 |
|
|
88
|
+
| `loci server start/stop/status` | 埋め込みサーバー管理 |
|
|
89
|
+
| `loci hook install` | Claude Code の設定にフックを登録 |
|
|
90
|
+
|
|
91
|
+
## 自動化(Claude Code フック)
|
|
92
|
+
|
|
93
|
+
`loci hook install` 後、すべて自動で動作します:
|
|
94
|
+
|
|
95
|
+
| フック | トリガー | コマンド |
|
|
96
|
+
|--------|---------|---------|
|
|
97
|
+
| Stop (async) | 毎ラリー後 | `loci index` |
|
|
98
|
+
| SessionStart | 起動時 / `/clear` / `/resume` / `compact` | `loci prime` |
|
|
99
|
+
| SessionStart | 起動時 / `/clear` / `/resume` / `compact` | `loci server start` |
|
|
100
|
+
| SessionStart | 起動時 / `/clear` / `/resume` / `compact` | `loci distill` |
|
|
101
|
+
|
|
102
|
+
- **`loci index`** — 毎ラリー後に非同期で実行。セッション途中でも差分のみインデックスするので高速
|
|
103
|
+
- **`loci distill`** — セッション開始時に未蒸留の exchange を `claude --print` で蒸留。ユーザーの Claude Code で Haiku を呼び出します(デフォルト: `claude-haiku-4-5`)
|
|
104
|
+
- **`loci server start`** — 埋め込みモデル(約500MB)をメモリに常駐させ、以降の検索を 0.2 秒以内に
|
|
105
|
+
|
|
106
|
+
## 検索出力
|
|
107
|
+
|
|
108
|
+
```json
|
|
109
|
+
[
|
|
110
|
+
{
|
|
111
|
+
"exchange_core": "pool_size=5 でコネクションプールを追加した",
|
|
112
|
+
"specific_context": "pool_size=5, max_overflow=10",
|
|
113
|
+
"rooms": [
|
|
114
|
+
{ "room_type": "concept", "room_key": "db-pool", "room_label": "DB コネクションプーリング" }
|
|
115
|
+
],
|
|
116
|
+
"symbols": [
|
|
117
|
+
{ "name": "create_pool", "file": "src/db.py", "line": 42, "signature": "def create_pool(...)" }
|
|
118
|
+
],
|
|
119
|
+
"verbatim_ref": "~/.claude/projects/.../session.jsonl:ply=42"
|
|
120
|
+
}
|
|
121
|
+
]
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
## 設定
|
|
125
|
+
|
|
126
|
+
`.codeatrium/config.toml`(`loci init` で生成):
|
|
127
|
+
|
|
128
|
+
```toml
|
|
129
|
+
[distill]
|
|
130
|
+
model = "claude-haiku-4-5-20251001" # 蒸留に使うモデル(デフォルト)
|
|
131
|
+
batch_limit = 20 # 1回あたりの蒸留上限
|
|
132
|
+
|
|
133
|
+
[index]
|
|
134
|
+
min_chars = 50 # この文字数未満の exchange をスキップ
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
## Acknowledgments
|
|
138
|
+
|
|
139
|
+
Palace object モデル、room ベースのトピックグルーピング、BM25+HNSW 融合検索は以下の論文に基づいています:
|
|
140
|
+
|
|
141
|
+
> *Structured Distillation for Personalized Agent Memory*
|
|
142
|
+
> (arXiv:2603.13017)
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
## ライセンス
|
|
146
|
+
|
|
147
|
+
MIT
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
# Codeatrium
|
|
2
|
+
|
|
3
|
+
[](https://github.com/senna-lang/Codeatrium/actions/workflows/ci.yml)
|
|
4
|
+
[](https://pypi.org/project/codeatrium/)
|
|
5
|
+
[](LICENSE)
|
|
6
|
+
|
|
7
|
+
A **memory palace** for AI coding agents.
|
|
8
|
+
|
|
9
|
+
English · [日本語](README.ja.md)
|
|
10
|
+
|
|
11
|
+
Codeatrium distills past conversations into *palace objects* and stores them in a searchable index, giving agents long-term memory. Past decisions, implementations, and code locations can be recalled in under 0.2 seconds.
|
|
12
|
+
|
|
13
|
+
The CLI command `loci` (from [Method of Loci](https://en.wikipedia.org/wiki/Method_of_loci)) is designed to be **called by the agent itself** — running `loci search "..." --json` from within a prompt.
|
|
14
|
+
|
|
15
|
+
The architecture extends the conversational memory model from [arXiv:2603.13017](https://arxiv.org/abs/2603.13017) for coding agents.
|
|
16
|
+
|
|
17
|
+
> **Note:** Currently [Claude Code](https://docs.anthropic.com/en/docs/claude-code) only. Session log format (`.jsonl`) and distillation (`claude --print`) depend on Claude Code.
|
|
18
|
+
|
|
19
|
+
## Simple Interface
|
|
20
|
+
|
|
21
|
+
Agents use two core commands:
|
|
22
|
+
|
|
23
|
+
- **Semantic search** — `loci search "query"` retrieves past conversations by semantic similarity
|
|
24
|
+
- **Reverse lookup from code** — `loci context --symbol "name"` recalls past conversations about a specific code symbol
|
|
25
|
+
- tree-sitter symbol resolution (Python / TypeScript / Go) lets agents understand implementation intent before editing
|
|
26
|
+
|
|
27
|
+
<img src="assets/interface.en.svg" alt="Simple Interface" width="800">
|
|
28
|
+
|
|
29
|
+
## How It Works
|
|
30
|
+
|
|
31
|
+
<img src="assets/architecture.svg" alt="Codeatrium Architecture" width="800">
|
|
32
|
+
|
|
33
|
+
1. **Index** — Splits agent session logs into exchanges (user utterance + agent response pairs) and indexes them with FTS5 for keyword search
|
|
34
|
+
2. **Distill** — An LLM (`claude --print`, default `claude-haiku-4-5`) summarizes each exchange into a palace object: `exchange_core` (what was done), `specific_context` (concrete details), `room_assignments` (topic tags). tree-sitter resolves touched files to symbol level (function/class/method + file + line + signature)
|
|
35
|
+
3. **Search** — Cross-layer search fusing BM25 on verbatim text with HNSW on distilled embeddings via RRF
|
|
36
|
+
|
|
37
|
+
Raw conversations are not embedded — only the condensed distilled text is embedded with `multilingual-e5-small` (384-dim), balancing semantic search quality with embedding cost. The embedding model runs as a **Unix socket server**, keeping search latency **under 0.2 seconds** after the first load.
|
|
38
|
+
|
|
39
|
+
## Installation
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
pipx install codeatrium
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
Requires Python 3.11+.
|
|
46
|
+
|
|
47
|
+
## Quick Start
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
# Initialize in project root
|
|
51
|
+
loci init
|
|
52
|
+
|
|
53
|
+
# Install hooks for automatic indexing
|
|
54
|
+
loci hook install
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
When running `loci init`, if past session logs are detected, you'll be prompted with:
|
|
58
|
+
|
|
59
|
+
> [!IMPORTANT]
|
|
60
|
+
> When adopting this tool mid-project, a large number of exchanges may already exist. Distilling all of them will consume significant `claude --print` (Haiku) tokens. We recommend starting with `Skip all` or `Distill last 50`.
|
|
61
|
+
|
|
62
|
+
1. **Min chars threshold** — Minimum character filter for exchanges (default: 50). This controls how many exchanges become distillation candidates. Higher values exclude short conversations and reduce token usage; lower values include nearly everything.
|
|
63
|
+
2. **Handling existing exchanges** — Choose how much past history to distill:
|
|
64
|
+
- Skip all (no past session distillation)
|
|
65
|
+
- Distill last 50 (recent history only)
|
|
66
|
+
- Distill all (everything — high token cost)
|
|
67
|
+
- Custom (specify a number)
|
|
68
|
+
3. **Run distillation now?** — Choose No to defer to the next session start
|
|
69
|
+
|
|
70
|
+
## Agent Instructions
|
|
71
|
+
|
|
72
|
+
Agent instructions are injected automatically — no manual setup required:
|
|
73
|
+
|
|
74
|
+
- **`loci init`** — Inserts a marker section (`<!-- BEGIN CODEATRIUM -->...<!-- END CODEATRIUM -->`) into `CLAUDE.md`
|
|
75
|
+
- **`loci prime`** — Dynamically injects command usage into the context window at every session start via SessionStart Hook
|
|
76
|
+
|
|
77
|
+
## CLI Commands
|
|
78
|
+
|
|
79
|
+
| Command | Description |
|
|
80
|
+
|---------|-------------|
|
|
81
|
+
| `loci init` | Initialize `.codeatrium/` in project root |
|
|
82
|
+
| `loci index` | Index new session logs |
|
|
83
|
+
| `loci distill [--limit N]` | Distill undistilled exchanges via LLM |
|
|
84
|
+
| `loci search "query" --json` | Semantic search (agent-facing) |
|
|
85
|
+
| `loci context --symbol "name" --json` | Code symbol → past conversations |
|
|
86
|
+
| `loci show "<ref>" --json` | Retrieve verbatim conversation |
|
|
87
|
+
| `loci status` | Show index state |
|
|
88
|
+
| `loci server start/stop/status` | Embedding server management |
|
|
89
|
+
| `loci hook install` | Register hooks in Claude Code settings |
|
|
90
|
+
|
|
91
|
+
## Automation (Claude Code Hooks)
|
|
92
|
+
|
|
93
|
+
After `loci hook install`, everything runs automatically:
|
|
94
|
+
|
|
95
|
+
| Hook | Trigger | Command |
|
|
96
|
+
|------|---------|---------|
|
|
97
|
+
| Stop (async) | After every turn | `loci index` |
|
|
98
|
+
| SessionStart | startup / `/clear` / `/resume` / `compact` | `loci prime` |
|
|
99
|
+
| SessionStart | startup / `/clear` / `/resume` / `compact` | `loci server start` |
|
|
100
|
+
| SessionStart | startup / `/clear` / `/resume` / `compact` | `loci distill` |
|
|
101
|
+
|
|
102
|
+
- **`loci index`** — Runs asynchronously after every turn. Indexes only new exchanges, so it's fast even mid-session
|
|
103
|
+
- **`loci distill`** — Distills undistilled exchanges at session start via `claude --print`. Calls Haiku through the user's Claude Code (default: `claude-haiku-4-5`)
|
|
104
|
+
- **`loci server start`** — Keeps the embedding model (~500MB) resident in memory for sub-0.2s search latency
|
|
105
|
+
|
|
106
|
+
## Search Output
|
|
107
|
+
|
|
108
|
+
```json
|
|
109
|
+
[
|
|
110
|
+
{
|
|
111
|
+
"exchange_core": "Added connection pool with pool_size=5",
|
|
112
|
+
"specific_context": "pool_size=5, max_overflow=10",
|
|
113
|
+
"rooms": [
|
|
114
|
+
{ "room_type": "concept", "room_key": "db-pool", "room_label": "DB connection pooling" }
|
|
115
|
+
],
|
|
116
|
+
"symbols": [
|
|
117
|
+
{ "name": "create_pool", "file": "src/db.py", "line": 42, "signature": "def create_pool(...)" }
|
|
118
|
+
],
|
|
119
|
+
"verbatim_ref": "~/.claude/projects/.../session.jsonl:ply=42"
|
|
120
|
+
}
|
|
121
|
+
]
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
## Configuration
|
|
125
|
+
|
|
126
|
+
`.codeatrium/config.toml` (generated by `loci init`):
|
|
127
|
+
|
|
128
|
+
```toml
|
|
129
|
+
[distill]
|
|
130
|
+
model = "claude-haiku-4-5-20251001" # Model for distillation (default)
|
|
131
|
+
batch_limit = 20 # Max distillations per hook run
|
|
132
|
+
|
|
133
|
+
[index]
|
|
134
|
+
min_chars = 50 # Skip exchanges shorter than this
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
## Acknowledgments
|
|
138
|
+
|
|
139
|
+
The palace object model, room-based topic grouping, and BM25+HNSW fusion search are based on:
|
|
140
|
+
|
|
141
|
+
> *Structured Distillation for Personalized Agent Memory*
|
|
142
|
+
> (arXiv:2603.13017)
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
## License
|
|
146
|
+
|
|
147
|
+
MIT
|