sage-memory 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sage_memory-0.1.0/.gitignore +9 -0
- sage_memory-0.1.0/PKG-INFO +318 -0
- sage_memory-0.1.0/README.md +303 -0
- sage_memory-0.1.0/pyproject.toml +25 -0
- sage_memory-0.1.0/src/sage_memory/__init__.py +8 -0
- sage_memory-0.1.0/src/sage_memory/__main__.py +3 -0
- sage_memory-0.1.0/src/sage_memory/db.py +190 -0
- sage_memory-0.1.0/src/sage_memory/embedder.py +175 -0
- sage_memory-0.1.0/src/sage_memory/migrations/001_initial.sql +53 -0
- sage_memory-0.1.0/src/sage_memory/search.py +328 -0
- sage_memory-0.1.0/src/sage_memory/server.py +213 -0
- sage_memory-0.1.0/src/sage_memory/store.py +222 -0
|
@@ -0,0 +1,318 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: sage-memory
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Ultrafast local MCP memory for LLMs — project-aware, zero-config
|
|
5
|
+
License: MIT
|
|
6
|
+
Requires-Python: >=3.11
|
|
7
|
+
Requires-Dist: mcp>=1.0.0
|
|
8
|
+
Requires-Dist: sqlite-vec>=0.1.6
|
|
9
|
+
Provides-Extra: dev
|
|
10
|
+
Requires-Dist: pytest-asyncio>=0.24; extra == 'dev'
|
|
11
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
12
|
+
Provides-Extra: neural
|
|
13
|
+
Requires-Dist: fastembed>=0.4.0; extra == 'neural'
|
|
14
|
+
Description-Content-Type: text/markdown
|
|
15
|
+
|
|
16
|
+
# sage-memory
|
|
17
|
+
|
|
18
|
+
Persistent memory for AI coding assistants. Project-aware, zero-config, sub-3ms search.
|
|
19
|
+
|
|
20
|
+
sage-memory is an [MCP](https://modelcontextprotocol.io) server that gives LLMs long-term memory scoped to your project. Your AI assistant stores what it learns about your codebase — architecture decisions, patterns, gotchas, conventions — and retrieves it in future sessions.
|
|
21
|
+
|
|
22
|
+
- **91% recall** on natural language queries (BM25 ranking with OR semantics)
|
|
23
|
+
- **Sub-3ms search**, ~1,000 writes/sec on real codebases
|
|
24
|
+
- **Project-isolated** — each codebase gets its own database at `.sage-memory/`
|
|
25
|
+
- **Dual-scope** — project knowledge + global preferences, merged and ranked automatically
|
|
26
|
+
- **Zero configuration** — auto-detects project root, creates database, routes queries
|
|
27
|
+
- **2 dependencies, ~1,100 lines** — lean, auditable, no ML stack required
|
|
28
|
+
- **Works with** Claude Code, Cursor, Windsurf, VS Code, and any MCP-compatible client
|
|
29
|
+
|
|
30
|
+
```
|
|
31
|
+
~/code/billing-service/
|
|
32
|
+
.sage-memory/memory.db ← this project's knowledge (auto-created)
|
|
33
|
+
src/
|
|
34
|
+
tests/
|
|
35
|
+
|
|
36
|
+
~/.sage-memory/memory.db ← your cross-project patterns & preferences
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
## Why
|
|
40
|
+
|
|
41
|
+
LLMs forget everything between sessions. Every time you open your editor, your AI assistant starts from scratch — re-reading files, re-discovering patterns, re-learning your codebase's quirks. sage-memory fixes this.
|
|
42
|
+
|
|
43
|
+
When your assistant figures out that "the billing service uses a saga pattern with compensating transactions," it stores that understanding. Next session, when you ask it to add a new payment method, it searches memory first and immediately has the architectural context it needs.
|
|
44
|
+
|
|
45
|
+
The knowledge lives *with your project*, not in a cloud service. Each project gets its own SQLite database. Your private codebase knowledge never leaves your machine.
|
|
46
|
+
|
|
47
|
+
## Setup
|
|
48
|
+
|
|
49
|
+
Add sage-memory to your MCP client config. With [uv](https://docs.astral.sh/uv/), it installs and runs automatically — no manual `pip install` needed.
|
|
50
|
+
|
|
51
|
+
> **Don't have uv?** It's a fast, modern Python package manager. Install it in one command:
|
|
52
|
+
>
|
|
53
|
+
> macOS / Linux: `curl -LsSf https://astral.sh/uv/install.sh | sh`
|
|
54
|
+
>
|
|
55
|
+
> Windows: `powershell -ExecutionPolicy ByPass -c "irm https://astral.sh/uv/install.ps1 | iex"`
|
|
56
|
+
>
|
|
57
|
+
> See the [uv installation guide](https://docs.astral.sh/uv/getting-started/installation/) for more options.
|
|
58
|
+
|
|
59
|
+
### Claude Code
|
|
60
|
+
|
|
61
|
+
In `~/.claude.json` (or your project's `.claude.json`):
|
|
62
|
+
|
|
63
|
+
```json
|
|
64
|
+
{
|
|
65
|
+
"mcpServers": {
|
|
66
|
+
"memory": {
|
|
67
|
+
"command": "uvx",
|
|
68
|
+
"args": ["sage-memory"]
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
### Cursor
|
|
75
|
+
|
|
76
|
+
In `.cursor/mcp.json`:
|
|
77
|
+
|
|
78
|
+
```json
|
|
79
|
+
{
|
|
80
|
+
"mcpServers": {
|
|
81
|
+
"memory": {
|
|
82
|
+
"command": "uvx",
|
|
83
|
+
"args": ["sage-memory"]
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
That's it. No paths, no tokens, no database URLs. sage-memory detects your project root automatically.
|
|
90
|
+
|
|
91
|
+
<details>
|
|
92
|
+
<summary><b>Alternative: install with pip</b></summary>
|
|
93
|
+
|
|
94
|
+
If you prefer managing the installation yourself:
|
|
95
|
+
|
|
96
|
+
```bash
|
|
97
|
+
pip install sage-memory
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
Then use `"command": "sage-memory"` in your MCP config instead of `uvx`:
|
|
101
|
+
|
|
102
|
+
```json
|
|
103
|
+
{
|
|
104
|
+
"mcpServers": {
|
|
105
|
+
"memory": {
|
|
106
|
+
"command": "sage-memory"
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
For neural embeddings (higher recall on semantic queries):
|
|
113
|
+
|
|
114
|
+
```bash
|
|
115
|
+
pip install sage-memory[neural]
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
</details>
|
|
119
|
+
|
|
120
|
+
## How It Works
|
|
121
|
+
|
|
122
|
+
### Two databases, automatic routing
|
|
123
|
+
|
|
124
|
+
sage-memory manages two databases transparently:
|
|
125
|
+
|
|
126
|
+
**Project DB** (`.sage-memory/memory.db` at your project root) stores knowledge specific to this codebase — architecture, patterns, domain logic, debugging insights.
|
|
127
|
+
|
|
128
|
+
**Global DB** (`~/.sage-memory/memory.db`) stores cross-project knowledge — your coding conventions, preferred tools, style preferences.
|
|
129
|
+
|
|
130
|
+
Every search query hits both databases. Project results rank higher. You never think about which database to use — `scope: "project"` (default) writes to the project DB, `scope: "global"` writes to global.
|
|
131
|
+
|
|
132
|
+
### Search pipeline
|
|
133
|
+
|
|
134
|
+
```
|
|
135
|
+
query ─── FTS5 BM25 (OR semantics, stopword removal, prefix matching)
|
|
136
|
+
│
|
|
137
|
+
├── term-frequency filtering (drops terms matching >20% of corpus)
|
|
138
|
+
│
|
|
139
|
+
├── optional: sqlite-vec cosine similarity (when neural embedder installed)
|
|
140
|
+
│
|
|
141
|
+
└── Reciprocal Rank Fusion → normalize [0,1]
|
|
142
|
+
→ project priority boost (+10%)
|
|
143
|
+
→ tag match boost (+3% each, cap 15%)
|
|
144
|
+
→ recency tiebreaker (14-day half-life)
|
|
145
|
+
→ deduplicate across DBs
|
|
146
|
+
→ top-k results
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
The key design choice: **FTS5 with OR semantics, not AND.** When you search "how does payment failure handling work," BM25 ranks documents by how many query terms match and how rare those terms are. A document matching 4 of 6 terms scores higher than one matching 1 of 6. AND semantics require ALL terms to match — which returns zero results for most natural language queries.
|
|
150
|
+
|
|
151
|
+
### Store pipeline
|
|
152
|
+
|
|
153
|
+
```
|
|
154
|
+
content ─── normalize ─── SHA-256 hash ─── dedup check
|
|
155
|
+
─── INSERT + FTS5 trigger ─── commit [< 1ms]
|
|
156
|
+
─── embed + vec INSERT (if neural backend) [deferred]
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
Embedding is decoupled from storage. The memory is keyword-searchable the instant it's stored. Vector indexing happens separately and only when a neural embedder is installed. If embedding fails, nothing is lost.
|
|
160
|
+
|
|
161
|
+
## Tools
|
|
162
|
+
|
|
163
|
+
### memory_store
|
|
164
|
+
|
|
165
|
+
Store knowledge for later retrieval. The AI assistant calls this when it understands something worth remembering.
|
|
166
|
+
|
|
167
|
+
```json
|
|
168
|
+
{
|
|
169
|
+
"content": "The billing service uses a saga pattern for multi-step payment processing. PaymentOrchestrator coordinates between StripeGateway, LedgerService, and NotificationService. Failures at any step trigger compensating transactions defined in saga_rollback_handlers.",
|
|
170
|
+
"title": "Payment saga orchestration in billing service",
|
|
171
|
+
"tags": ["billing", "payments", "architecture"],
|
|
172
|
+
"scope": "project"
|
|
173
|
+
}
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
Content is SHA-256 hashed for automatic deduplication. If the same content is stored twice, sage-memory returns the existing entry's ID instead of creating a duplicate.
|
|
177
|
+
|
|
178
|
+
### memory_search
|
|
179
|
+
|
|
180
|
+
Search across project and global knowledge using natural language.
|
|
181
|
+
|
|
182
|
+
```json
|
|
183
|
+
{
|
|
184
|
+
"query": "how does payment failure handling work",
|
|
185
|
+
"tags": ["billing"],
|
|
186
|
+
"limit": 5
|
|
187
|
+
}
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
Results include a relevance score, source database label (`project` or `global`), and the full stored content. The assistant uses this to ground its responses in project-specific context.
|
|
191
|
+
|
|
192
|
+
### memory_update
|
|
193
|
+
|
|
194
|
+
Update existing knowledge when understanding deepens or code changes. Only provide fields you want to change. Content changes automatically re-index for search.
|
|
195
|
+
|
|
196
|
+
### memory_delete
|
|
197
|
+
|
|
198
|
+
Remove knowledge by ID when it becomes outdated or incorrect.
|
|
199
|
+
|
|
200
|
+
### memory_list
|
|
201
|
+
|
|
202
|
+
Browse stored memories with pagination. Useful for auditing what the assistant has learned about your codebase.
|
|
203
|
+
|
|
204
|
+
## Best Workflow: Capture Knowledge
|
|
205
|
+
|
|
206
|
+
sage-memory is most effective with a deliberate knowledge-capture workflow:
|
|
207
|
+
|
|
208
|
+
1. **Explore**: Ask your AI assistant to analyze a module, service, or subsystem
|
|
209
|
+
2. **Understand**: It reads source code, traces dependencies, identifies patterns
|
|
210
|
+
3. **Store**: It persists its understanding via `memory_store` with a descriptive title, detailed content, and relevant tags
|
|
211
|
+
4. **Document** (optional): It creates a `docs/ai/knowledge-{name}.md` companion file in your repo for human reference
|
|
212
|
+
5. **Retrieve**: On future tasks, it searches memory first for relevant context before reading code
|
|
213
|
+
|
|
214
|
+
This works because the AI writes both the stored content and later queries using consistent domain vocabulary — making keyword search highly effective without neural embeddings.
|
|
215
|
+
|
|
216
|
+
Example prompt to trigger this workflow:
|
|
217
|
+
|
|
218
|
+
> Analyze the authentication system in this project. Understand how it works, what patterns it uses, and store your understanding in memory for future sessions.
|
|
219
|
+
|
|
220
|
+
## Performance
|
|
221
|
+
|
|
222
|
+
Benchmarked against 4 real Python codebases (FastAPI, Pydantic, httpx, Rich — 340K lines total) with 50 adversarial queries across 5 categories.
|
|
223
|
+
|
|
224
|
+
### Scale
|
|
225
|
+
|
|
226
|
+
| Memories | Store mean | Throughput | Search mean | Search P95 | Recall |
|
|
227
|
+
|----------|-----------|------------|-------------|------------|--------|
|
|
228
|
+
| 1,000 | 1.0ms | 1,000/s | 2.5ms | 9ms | 80% |
|
|
229
|
+
| 5,000 | 0.9ms | 1,100/s | 12ms | 56ms | 81% |
|
|
230
|
+
| 10,000 | 0.9ms | 1,055/s | 21ms | 72ms | 83% |
|
|
231
|
+
| 22,000 | 1.0ms | 1,000/s | 46ms | 101ms | 83% |
|
|
232
|
+
|
|
233
|
+
Store throughput holds steady at ~1,000 memories/sec regardless of database size. Search stays under 50ms for typical per-project databases (< 15K memories).
|
|
234
|
+
|
|
235
|
+
### Recall by query category
|
|
236
|
+
|
|
237
|
+
| Category | Recall | What it tests |
|
|
238
|
+
|---|---|---|
|
|
239
|
+
| Exact API lookups | 95% | Finding specific classes, functions, APIs by name |
|
|
240
|
+
| Scoped queries | 91% | Same query, different project scopes |
|
|
241
|
+
| Semantic paraphrases | 66%* | Natural language with no keyword overlap |
|
|
242
|
+
| Cross-codebase | 68% | Searching across multiple codebases |
|
|
243
|
+
| Adversarial | 60% | Typos, single words, very long queries, edge cases |
|
|
244
|
+
|
|
245
|
+
\* Semantic recall reaches 85%+ with the optional neural embedder installed (`pip install sage-memory[neural]`).
|
|
246
|
+
|
|
247
|
+
### LLM-authored content (the real use case)
|
|
248
|
+
|
|
249
|
+
When tested with genuine capture-knowledge content — LLM-written understanding of the httpx codebase, not raw code chunks — retrieval quality jumps significantly:
|
|
250
|
+
|
|
251
|
+
| Query type | Recall |
|
|
252
|
+
|---|---|
|
|
253
|
+
| Exact API lookups | 100% |
|
|
254
|
+
| Developer workflow questions | 100% |
|
|
255
|
+
| Architecture questions | 100% |
|
|
256
|
+
| Semantic paraphrases | 81% |
|
|
257
|
+
| Adversarial | 83% |
|
|
258
|
+
| **Overall** | **91%** |
|
|
259
|
+
|
|
260
|
+
LLM-authored knowledge retrieves better because the AI uses consistent domain vocabulary when writing both the stored content and later queries — a natural fit for BM25 keyword ranking.
|
|
261
|
+
|
|
262
|
+
## Optional: Neural Embeddings
|
|
263
|
+
|
|
264
|
+
The default installation uses a zero-dependency local embedder (character n-gram TF-IDF hashing) that handles morphological similarity — "authenticate" ↔ "authentication" ↔ "auth" produce similar vectors. This is effective for LLM-authored content where vocabulary is consistent.
|
|
265
|
+
|
|
266
|
+
For higher recall on semantic queries with vocabulary gaps, install the neural backend:
|
|
267
|
+
|
|
268
|
+
```bash
|
|
269
|
+
pip install sage-memory[neural]
|
|
270
|
+
```
|
|
271
|
+
|
|
272
|
+
This adds [fastembed](https://github.com/qdrant/fastembed) with a 30MB ONNX model. No code changes needed — sage-memory detects the backend automatically and enables hybrid search (FTS5 + vector similarity fused via Reciprocal Rank Fusion).
|
|
273
|
+
|
|
274
|
+
## Architecture
|
|
275
|
+
|
|
276
|
+
```
|
|
277
|
+
7 files · ~1,100 lines of Python · 2 required dependencies (mcp, sqlite-vec)
|
|
278
|
+
|
|
279
|
+
src/sage_memory/
|
|
280
|
+
├── server.py 213 lines MCP server, 5 tools, dict-based dispatch
|
|
281
|
+
├── search.py 328 lines Dual-DB search, FTS5 OR, RRF fusion, access tracking
|
|
282
|
+
├── store.py 222 lines Store, update, delete, list, deferred embedding
|
|
283
|
+
├── embedder.py 175 lines Embedder protocol + local + optional neural
|
|
284
|
+
├── db.py 190 lines Project detection, dual DB connections, migrations
|
|
285
|
+
└── migrations/
|
|
286
|
+
└── 001.sql 53 lines Schema: memories + FTS5 + vec0 index
|
|
287
|
+
```
|
|
288
|
+
|
|
289
|
+
### Design principles
|
|
290
|
+
|
|
291
|
+
**Lean.** 7 source files, ~1,100 lines. No frameworks, no abstractions beyond what's needed. Every line earns its place.
|
|
292
|
+
|
|
293
|
+
**Two dependencies.** `mcp` (the protocol) and `sqlite-vec` (vector search extension). The neural embedder is optional. No PyTorch, no heavy ML stack by default.
|
|
294
|
+
|
|
295
|
+
**Project-local databases.** Each project gets its own SQLite file. No cross-project noise. No scaling problems — a single project rarely exceeds 15K memories, which is FTS5's sweet spot.
|
|
296
|
+
|
|
297
|
+
**Zero configuration.** Auto-detects project root. Auto-creates database. Auto-routes to project or global scope. The developer adds one MCP config block and never thinks about it again.
|
|
298
|
+
|
|
299
|
+
**Correctness over cleverness.** FTS5 OR because AND breaks natural language queries. Normalized RRF scores because raw scores have incompatible scales. Content-hash dedup because LLMs will store the same insight repeatedly. Deferred embedding because the write path shouldn't depend on the slowest component.
|
|
300
|
+
|
|
301
|
+
## Development
|
|
302
|
+
|
|
303
|
+
```bash
|
|
304
|
+
git clone https://github.com/<your-org>/sage-memory.git
|
|
305
|
+
cd sage-memory
|
|
306
|
+
pip install -e ".[dev]"
|
|
307
|
+
pytest
|
|
308
|
+
```
|
|
309
|
+
|
|
310
|
+
Run the server locally:
|
|
311
|
+
|
|
312
|
+
```bash
|
|
313
|
+
python -m sage_memory
|
|
314
|
+
```
|
|
315
|
+
|
|
316
|
+
## License
|
|
317
|
+
|
|
318
|
+
MIT
|
|
@@ -0,0 +1,303 @@
|
|
|
1
|
+
# sage-memory
|
|
2
|
+
|
|
3
|
+
Persistent memory for AI coding assistants. Project-aware, zero-config, sub-3ms search.
|
|
4
|
+
|
|
5
|
+
sage-memory is an [MCP](https://modelcontextprotocol.io) server that gives LLMs long-term memory scoped to your project. Your AI assistant stores what it learns about your codebase — architecture decisions, patterns, gotchas, conventions — and retrieves it in future sessions.
|
|
6
|
+
|
|
7
|
+
- **91% recall** on natural language queries (BM25 ranking with OR semantics)
|
|
8
|
+
- **Sub-3ms search**, ~1,000 writes/sec on real codebases
|
|
9
|
+
- **Project-isolated** — each codebase gets its own database at `.sage-memory/`
|
|
10
|
+
- **Dual-scope** — project knowledge + global preferences, merged and ranked automatically
|
|
11
|
+
- **Zero configuration** — auto-detects project root, creates database, routes queries
|
|
12
|
+
- **2 dependencies, ~1,100 lines** — lean, auditable, no ML stack required
|
|
13
|
+
- **Works with** Claude Code, Cursor, Windsurf, VS Code, and any MCP-compatible client
|
|
14
|
+
|
|
15
|
+
```
|
|
16
|
+
~/code/billing-service/
|
|
17
|
+
.sage-memory/memory.db ← this project's knowledge (auto-created)
|
|
18
|
+
src/
|
|
19
|
+
tests/
|
|
20
|
+
|
|
21
|
+
~/.sage-memory/memory.db ← your cross-project patterns & preferences
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
## Why
|
|
25
|
+
|
|
26
|
+
LLMs forget everything between sessions. Every time you open your editor, your AI assistant starts from scratch — re-reading files, re-discovering patterns, re-learning your codebase's quirks. sage-memory fixes this.
|
|
27
|
+
|
|
28
|
+
When your assistant figures out that "the billing service uses a saga pattern with compensating transactions," it stores that understanding. Next session, when you ask it to add a new payment method, it searches memory first and immediately has the architectural context it needs.
|
|
29
|
+
|
|
30
|
+
The knowledge lives *with your project*, not in a cloud service. Each project gets its own SQLite database. Your private codebase knowledge never leaves your machine.
|
|
31
|
+
|
|
32
|
+
## Setup
|
|
33
|
+
|
|
34
|
+
Add sage-memory to your MCP client config. With [uv](https://docs.astral.sh/uv/), it installs and runs automatically — no manual `pip install` needed.
|
|
35
|
+
|
|
36
|
+
> **Don't have uv?** It's a fast, modern Python package manager. Install it in one command:
|
|
37
|
+
>
|
|
38
|
+
> macOS / Linux: `curl -LsSf https://astral.sh/uv/install.sh | sh`
|
|
39
|
+
>
|
|
40
|
+
> Windows: `powershell -ExecutionPolicy ByPass -c "irm https://astral.sh/uv/install.ps1 | iex"`
|
|
41
|
+
>
|
|
42
|
+
> See the [uv installation guide](https://docs.astral.sh/uv/getting-started/installation/) for more options.
|
|
43
|
+
|
|
44
|
+
### Claude Code
|
|
45
|
+
|
|
46
|
+
In `~/.claude.json` (or your project's `.claude.json`):
|
|
47
|
+
|
|
48
|
+
```json
|
|
49
|
+
{
|
|
50
|
+
"mcpServers": {
|
|
51
|
+
"memory": {
|
|
52
|
+
"command": "uvx",
|
|
53
|
+
"args": ["sage-memory"]
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
### Cursor
|
|
60
|
+
|
|
61
|
+
In `.cursor/mcp.json`:
|
|
62
|
+
|
|
63
|
+
```json
|
|
64
|
+
{
|
|
65
|
+
"mcpServers": {
|
|
66
|
+
"memory": {
|
|
67
|
+
"command": "uvx",
|
|
68
|
+
"args": ["sage-memory"]
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
That's it. No paths, no tokens, no database URLs. sage-memory detects your project root automatically.
|
|
75
|
+
|
|
76
|
+
<details>
|
|
77
|
+
<summary><b>Alternative: install with pip</b></summary>
|
|
78
|
+
|
|
79
|
+
If you prefer managing the installation yourself:
|
|
80
|
+
|
|
81
|
+
```bash
|
|
82
|
+
pip install sage-memory
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
Then use `"command": "sage-memory"` in your MCP config instead of `uvx`:
|
|
86
|
+
|
|
87
|
+
```json
|
|
88
|
+
{
|
|
89
|
+
"mcpServers": {
|
|
90
|
+
"memory": {
|
|
91
|
+
"command": "sage-memory"
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
For neural embeddings (higher recall on semantic queries):
|
|
98
|
+
|
|
99
|
+
```bash
|
|
100
|
+
pip install sage-memory[neural]
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
</details>
|
|
104
|
+
|
|
105
|
+
## How It Works
|
|
106
|
+
|
|
107
|
+
### Two databases, automatic routing
|
|
108
|
+
|
|
109
|
+
sage-memory manages two databases transparently:
|
|
110
|
+
|
|
111
|
+
**Project DB** (`.sage-memory/memory.db` at your project root) stores knowledge specific to this codebase — architecture, patterns, domain logic, debugging insights.
|
|
112
|
+
|
|
113
|
+
**Global DB** (`~/.sage-memory/memory.db`) stores cross-project knowledge — your coding conventions, preferred tools, style preferences.
|
|
114
|
+
|
|
115
|
+
Every search query hits both databases. Project results rank higher. You never think about which database to use — `scope: "project"` (default) writes to the project DB, `scope: "global"` writes to global.
|
|
116
|
+
|
|
117
|
+
### Search pipeline
|
|
118
|
+
|
|
119
|
+
```
|
|
120
|
+
query ─── FTS5 BM25 (OR semantics, stopword removal, prefix matching)
|
|
121
|
+
│
|
|
122
|
+
├── term-frequency filtering (drops terms matching >20% of corpus)
|
|
123
|
+
│
|
|
124
|
+
├── optional: sqlite-vec cosine similarity (when neural embedder installed)
|
|
125
|
+
│
|
|
126
|
+
└── Reciprocal Rank Fusion → normalize [0,1]
|
|
127
|
+
→ project priority boost (+10%)
|
|
128
|
+
→ tag match boost (+3% each, cap 15%)
|
|
129
|
+
→ recency tiebreaker (14-day half-life)
|
|
130
|
+
→ deduplicate across DBs
|
|
131
|
+
→ top-k results
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
The key design choice: **FTS5 with OR semantics, not AND.** When you search "how does payment failure handling work," BM25 ranks documents by how many query terms match and how rare those terms are. A document matching 4 of 6 terms scores higher than one matching 1 of 6. AND semantics require ALL terms to match — which returns zero results for most natural language queries.
|
|
135
|
+
|
|
136
|
+
### Store pipeline
|
|
137
|
+
|
|
138
|
+
```
|
|
139
|
+
content ─── normalize ─── SHA-256 hash ─── dedup check
|
|
140
|
+
─── INSERT + FTS5 trigger ─── commit [< 1ms]
|
|
141
|
+
─── embed + vec INSERT (if neural backend) [deferred]
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
Embedding is decoupled from storage. The memory is keyword-searchable the instant it's stored. Vector indexing happens separately and only when a neural embedder is installed. If embedding fails, nothing is lost.
|
|
145
|
+
|
|
146
|
+
## Tools
|
|
147
|
+
|
|
148
|
+
### memory_store
|
|
149
|
+
|
|
150
|
+
Store knowledge for later retrieval. The AI assistant calls this when it understands something worth remembering.
|
|
151
|
+
|
|
152
|
+
```json
|
|
153
|
+
{
|
|
154
|
+
"content": "The billing service uses a saga pattern for multi-step payment processing. PaymentOrchestrator coordinates between StripeGateway, LedgerService, and NotificationService. Failures at any step trigger compensating transactions defined in saga_rollback_handlers.",
|
|
155
|
+
"title": "Payment saga orchestration in billing service",
|
|
156
|
+
"tags": ["billing", "payments", "architecture"],
|
|
157
|
+
"scope": "project"
|
|
158
|
+
}
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
Content is SHA-256 hashed for automatic deduplication. If the same content is stored twice, sage-memory returns the existing entry's ID instead of creating a duplicate.
|
|
162
|
+
|
|
163
|
+
### memory_search
|
|
164
|
+
|
|
165
|
+
Search across project and global knowledge using natural language.
|
|
166
|
+
|
|
167
|
+
```json
|
|
168
|
+
{
|
|
169
|
+
"query": "how does payment failure handling work",
|
|
170
|
+
"tags": ["billing"],
|
|
171
|
+
"limit": 5
|
|
172
|
+
}
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
Results include a relevance score, source database label (`project` or `global`), and the full stored content. The assistant uses this to ground its responses in project-specific context.
|
|
176
|
+
|
|
177
|
+
### memory_update
|
|
178
|
+
|
|
179
|
+
Update existing knowledge when understanding deepens or code changes. Only provide fields you want to change. Content changes automatically re-index for search.
|
|
180
|
+
|
|
181
|
+
### memory_delete
|
|
182
|
+
|
|
183
|
+
Remove knowledge by ID when it becomes outdated or incorrect.
|
|
184
|
+
|
|
185
|
+
### memory_list
|
|
186
|
+
|
|
187
|
+
Browse stored memories with pagination. Useful for auditing what the assistant has learned about your codebase.
|
|
188
|
+
|
|
189
|
+
## Best Workflow: Capture Knowledge
|
|
190
|
+
|
|
191
|
+
sage-memory is most effective with a deliberate knowledge-capture workflow:
|
|
192
|
+
|
|
193
|
+
1. **Explore**: Ask your AI assistant to analyze a module, service, or subsystem
|
|
194
|
+
2. **Understand**: It reads source code, traces dependencies, identifies patterns
|
|
195
|
+
3. **Store**: It persists its understanding via `memory_store` with a descriptive title, detailed content, and relevant tags
|
|
196
|
+
4. **Document** (optional): It creates a `docs/ai/knowledge-{name}.md` companion file in your repo for human reference
|
|
197
|
+
5. **Retrieve**: On future tasks, it searches memory first for relevant context before reading code
|
|
198
|
+
|
|
199
|
+
This works because the AI writes both the stored content and later queries using consistent domain vocabulary — making keyword search highly effective without neural embeddings.
|
|
200
|
+
|
|
201
|
+
Example prompt to trigger this workflow:
|
|
202
|
+
|
|
203
|
+
> Analyze the authentication system in this project. Understand how it works, what patterns it uses, and store your understanding in memory for future sessions.
|
|
204
|
+
|
|
205
|
+
## Performance
|
|
206
|
+
|
|
207
|
+
Benchmarked against 4 real Python codebases (FastAPI, Pydantic, httpx, Rich — 340K lines total) with 50 adversarial queries across 5 categories.
|
|
208
|
+
|
|
209
|
+
### Scale
|
|
210
|
+
|
|
211
|
+
| Memories | Store mean | Throughput | Search mean | Search P95 | Recall |
|
|
212
|
+
|----------|-----------|------------|-------------|------------|--------|
|
|
213
|
+
| 1,000 | 1.0ms | 1,000/s | 2.5ms | 9ms | 80% |
|
|
214
|
+
| 5,000 | 0.9ms | 1,100/s | 12ms | 56ms | 81% |
|
|
215
|
+
| 10,000 | 0.9ms | 1,055/s | 21ms | 72ms | 83% |
|
|
216
|
+
| 22,000 | 1.0ms | 1,000/s | 46ms | 101ms | 83% |
|
|
217
|
+
|
|
218
|
+
Store throughput holds steady at ~1,000 memories/sec regardless of database size. Search stays under 50ms for typical per-project databases (< 15K memories).
|
|
219
|
+
|
|
220
|
+
### Recall by query category
|
|
221
|
+
|
|
222
|
+
| Category | Recall | What it tests |
|
|
223
|
+
|---|---|---|
|
|
224
|
+
| Exact API lookups | 95% | Finding specific classes, functions, APIs by name |
|
|
225
|
+
| Scoped queries | 91% | Same query, different project scopes |
|
|
226
|
+
| Semantic paraphrases | 66%* | Natural language with no keyword overlap |
|
|
227
|
+
| Cross-codebase | 68% | Searching across multiple codebases |
|
|
228
|
+
| Adversarial | 60% | Typos, single words, very long queries, edge cases |
|
|
229
|
+
|
|
230
|
+
\* Semantic recall reaches 85%+ with the optional neural embedder installed (`pip install sage-memory[neural]`).
|
|
231
|
+
|
|
232
|
+
### LLM-authored content (the real use case)
|
|
233
|
+
|
|
234
|
+
When tested with genuine capture-knowledge content — LLM-written understanding of the httpx codebase, not raw code chunks — retrieval quality jumps significantly:
|
|
235
|
+
|
|
236
|
+
| Query type | Recall |
|
|
237
|
+
|---|---|
|
|
238
|
+
| Exact API lookups | 100% |
|
|
239
|
+
| Developer workflow questions | 100% |
|
|
240
|
+
| Architecture questions | 100% |
|
|
241
|
+
| Semantic paraphrases | 81% |
|
|
242
|
+
| Adversarial | 83% |
|
|
243
|
+
| **Overall** | **91%** |
|
|
244
|
+
|
|
245
|
+
LLM-authored knowledge retrieves better because the AI uses consistent domain vocabulary when writing both the stored content and later queries — a natural fit for BM25 keyword ranking.
|
|
246
|
+
|
|
247
|
+
## Optional: Neural Embeddings
|
|
248
|
+
|
|
249
|
+
The default installation uses a zero-dependency local embedder (character n-gram TF-IDF hashing) that handles morphological similarity — "authenticate" ↔ "authentication" ↔ "auth" produce similar vectors. This is effective for LLM-authored content where vocabulary is consistent.
|
|
250
|
+
|
|
251
|
+
For higher recall on semantic queries with vocabulary gaps, install the neural backend:
|
|
252
|
+
|
|
253
|
+
```bash
|
|
254
|
+
pip install sage-memory[neural]
|
|
255
|
+
```
|
|
256
|
+
|
|
257
|
+
This adds [fastembed](https://github.com/qdrant/fastembed) with a 30MB ONNX model. No code changes needed — sage-memory detects the backend automatically and enables hybrid search (FTS5 + vector similarity fused via Reciprocal Rank Fusion).
|
|
258
|
+
|
|
259
|
+
## Architecture
|
|
260
|
+
|
|
261
|
+
```
|
|
262
|
+
7 files · ~1,100 lines of Python · 2 required dependencies (mcp, sqlite-vec)
|
|
263
|
+
|
|
264
|
+
src/sage_memory/
|
|
265
|
+
├── server.py 213 lines MCP server, 5 tools, dict-based dispatch
|
|
266
|
+
├── search.py 328 lines Dual-DB search, FTS5 OR, RRF fusion, access tracking
|
|
267
|
+
├── store.py 222 lines Store, update, delete, list, deferred embedding
|
|
268
|
+
├── embedder.py 175 lines Embedder protocol + local + optional neural
|
|
269
|
+
├── db.py 190 lines Project detection, dual DB connections, migrations
|
|
270
|
+
└── migrations/
|
|
271
|
+
└── 001.sql 53 lines Schema: memories + FTS5 + vec0 index
|
|
272
|
+
```
|
|
273
|
+
|
|
274
|
+
### Design principles
|
|
275
|
+
|
|
276
|
+
**Lean.** 7 source files, ~1,100 lines. No frameworks, no abstractions beyond what's needed. Every line earns its place.
|
|
277
|
+
|
|
278
|
+
**Two dependencies.** `mcp` (the protocol) and `sqlite-vec` (vector search extension). The neural embedder is optional. No PyTorch, no heavy ML stack by default.
|
|
279
|
+
|
|
280
|
+
**Project-local databases.** Each project gets its own SQLite file. No cross-project noise. No scaling problems — a single project rarely exceeds 15K memories, which is FTS5's sweet spot.
|
|
281
|
+
|
|
282
|
+
**Zero configuration.** Auto-detects project root. Auto-creates database. Auto-routes to project or global scope. The developer adds one MCP config block and never thinks about it again.
|
|
283
|
+
|
|
284
|
+
**Correctness over cleverness.** FTS5 OR because AND breaks natural language queries. Normalized RRF scores because raw scores have incompatible scales. Content-hash dedup because LLMs will store the same insight repeatedly. Deferred embedding because the write path shouldn't depend on the slowest component.
|
|
285
|
+
|
|
286
|
+
## Development
|
|
287
|
+
|
|
288
|
+
```bash
|
|
289
|
+
git clone https://github.com/<your-org>/sage-memory.git
|
|
290
|
+
cd sage-memory
|
|
291
|
+
pip install -e ".[dev]"
|
|
292
|
+
pytest
|
|
293
|
+
```
|
|
294
|
+
|
|
295
|
+
Run the server locally:
|
|
296
|
+
|
|
297
|
+
```bash
|
|
298
|
+
python -m sage_memory
|
|
299
|
+
```
|
|
300
|
+
|
|
301
|
+
## License
|
|
302
|
+
|
|
303
|
+
MIT
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "sage-memory"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "Ultrafast local MCP memory for LLMs — project-aware, zero-config"
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
license = { text = "MIT" }
|
|
7
|
+
requires-python = ">=3.11"
|
|
8
|
+
dependencies = [
|
|
9
|
+
"mcp>=1.0.0",
|
|
10
|
+
"sqlite-vec>=0.1.6",
|
|
11
|
+
]
|
|
12
|
+
|
|
13
|
+
[project.optional-dependencies]
|
|
14
|
+
neural = ["fastembed>=0.4.0"]
|
|
15
|
+
dev = ["pytest>=8.0", "pytest-asyncio>=0.24"]
|
|
16
|
+
|
|
17
|
+
[project.scripts]
|
|
18
|
+
sage-memory = "sage_memory:main"
|
|
19
|
+
|
|
20
|
+
[build-system]
|
|
21
|
+
requires = ["hatchling"]
|
|
22
|
+
build-backend = "hatchling.build"
|
|
23
|
+
|
|
24
|
+
[tool.hatch.build.targets.wheel]
|
|
25
|
+
packages = ["src/sage_memory"]
|