memctl 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- memctl-0.1.0/LICENSE +21 -0
- memctl-0.1.0/PKG-INFO +528 -0
- memctl-0.1.0/README.md +485 -0
- memctl-0.1.0/memctl/__init__.py +36 -0
- memctl-0.1.0/memctl/cli.py +769 -0
- memctl-0.1.0/memctl/config.py +81 -0
- memctl-0.1.0/memctl/consolidate.py +275 -0
- memctl-0.1.0/memctl/extract.py +306 -0
- memctl-0.1.0/memctl/ingest.py +469 -0
- memctl-0.1.0/memctl/mcp/__init__.py +1 -0
- memctl-0.1.0/memctl/mcp/formatting.py +212 -0
- memctl-0.1.0/memctl/mcp/server.py +150 -0
- memctl-0.1.0/memctl/mcp/tools.py +479 -0
- memctl-0.1.0/memctl/policy.py +292 -0
- memctl-0.1.0/memctl/proposer.py +142 -0
- memctl-0.1.0/memctl/store.py +1228 -0
- memctl-0.1.0/memctl/types.py +325 -0
- memctl-0.1.0/memctl.egg-info/PKG-INFO +528 -0
- memctl-0.1.0/memctl.egg-info/SOURCES.txt +32 -0
- memctl-0.1.0/memctl.egg-info/dependency_links.txt +1 -0
- memctl-0.1.0/memctl.egg-info/entry_points.txt +2 -0
- memctl-0.1.0/memctl.egg-info/requires.txt +18 -0
- memctl-0.1.0/memctl.egg-info/top_level.txt +1 -0
- memctl-0.1.0/pyproject.toml +58 -0
- memctl-0.1.0/setup.cfg +4 -0
- memctl-0.1.0/tests/test_cli.py +431 -0
- memctl-0.1.0/tests/test_contracts.py +252 -0
- memctl-0.1.0/tests/test_extract.py +396 -0
- memctl-0.1.0/tests/test_forward_compat.py +279 -0
- memctl-0.1.0/tests/test_ingest.py +219 -0
- memctl-0.1.0/tests/test_pipe_compose.py +322 -0
- memctl-0.1.0/tests/test_policy.py +235 -0
- memctl-0.1.0/tests/test_store.py +411 -0
- memctl-0.1.0/tests/test_types.py +272 -0
memctl-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Dr. Olivier Vitrac, PhD, HDR - Adservio
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
memctl-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,528 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: memctl
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A Unix-native memory control plane for LLM orchestration
|
|
5
|
+
Author-email: Olivier Vitrac <olivier.vitrac@adservio.fr>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/ovitrac/memctl
|
|
8
|
+
Project-URL: Repository, https://github.com/ovitrac/memctl
|
|
9
|
+
Project-URL: Issues, https://github.com/ovitrac/memctl/issues
|
|
10
|
+
Project-URL: Changelog, https://github.com/ovitrac/memctl/blob/main/CHANGELOG.md
|
|
11
|
+
Keywords: llm,memory,sqlite,fts5,mcp,rag,cli,ai-agents
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Environment :: Console
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
16
|
+
Classifier: Operating System :: POSIX :: Linux
|
|
17
|
+
Classifier: Operating System :: MacOS
|
|
18
|
+
Classifier: Programming Language :: Python :: 3
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
23
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
24
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
25
|
+
Requires-Python: >=3.10
|
|
26
|
+
Description-Content-Type: text/markdown
|
|
27
|
+
License-File: LICENSE
|
|
28
|
+
Provides-Extra: docs
|
|
29
|
+
Requires-Dist: python-docx>=1.0.0; extra == "docs"
|
|
30
|
+
Requires-Dist: python-pptx>=0.6.21; extra == "docs"
|
|
31
|
+
Requires-Dist: openpyxl>=3.1.0; extra == "docs"
|
|
32
|
+
Requires-Dist: odfpy>=1.4.1; extra == "docs"
|
|
33
|
+
Provides-Extra: mcp
|
|
34
|
+
Requires-Dist: mcp[cli]>=0.1.0; extra == "mcp"
|
|
35
|
+
Provides-Extra: dev
|
|
36
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
37
|
+
Requires-Dist: ruff; extra == "dev"
|
|
38
|
+
Provides-Extra: all
|
|
39
|
+
Requires-Dist: memctl[docs]; extra == "all"
|
|
40
|
+
Requires-Dist: memctl[mcp]; extra == "all"
|
|
41
|
+
Requires-Dist: memctl[dev]; extra == "all"
|
|
42
|
+
Dynamic: license-file
|
|
43
|
+
|
|
44
|
+
# memctl
|
|
45
|
+
|
|
46
|
+
**A Unix-native memory control plane for LLM orchestration.**
|
|
47
|
+
|
|
48
|
+
One file, one truth. Ingest files, recall with FTS5, pipe into any LLM.
|
|
49
|
+
|
|
50
|
+
```
|
|
51
|
+
pip install memctl
|
|
52
|
+
memctl init
|
|
53
|
+
memctl push "project architecture" --source src/ | llm "Summarize the architecture"
|
|
54
|
+
echo "The architecture uses event sourcing" | memctl pull --tags arch
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
---
|
|
58
|
+
|
|
59
|
+
## Why memctl?
|
|
60
|
+
|
|
61
|
+
LLMs forget everything between turns. memctl gives them persistent, structured, policy-governed memory backed by a single SQLite file.
|
|
62
|
+
|
|
63
|
+
- **Zero dependencies** — stdlib only. No numpy, no torch, no compiled extensions.
|
|
64
|
+
- **One file** — Everything in `memory.db` (SQLite + FTS5 + WAL).
|
|
65
|
+
- **Unix composable** — `push` writes to stdout, `pull` reads from stdin. Pipe freely.
|
|
66
|
+
- **Policy-governed** — 30 detection patterns block secrets, injection, and instructional content before storage.
|
|
67
|
+
- **Content-addressed** — SHA-256 dedup ensures idempotent ingestion.
|
|
68
|
+
- **Forward-compatible** — Identical schema to [RAGIX](https://github.com/ovitrac/RAGIX). Upgrade seamlessly.
|
|
69
|
+
|
|
70
|
+
---
|
|
71
|
+
|
|
72
|
+
## Installation
|
|
73
|
+
|
|
74
|
+
```bash
|
|
75
|
+
pip install memctl
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
For Office/ODF document ingestion (.docx, .odt, .pptx, .odp, .xlsx, .ods):
|
|
79
|
+
|
|
80
|
+
```bash
|
|
81
|
+
pip install memctl[docs]
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
For MCP server support (Claude Code / Claude Desktop):
|
|
85
|
+
|
|
86
|
+
```bash
|
|
87
|
+
pip install memctl[mcp]
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
For everything:
|
|
91
|
+
|
|
92
|
+
```bash
|
|
93
|
+
pip install memctl[all]
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
**Requirements:** Python 3.10+ (3.12 recommended). No compiled dependencies for core.
|
|
97
|
+
PDF extraction requires `pdftotext` from poppler-utils (`sudo apt install poppler-utils` or `brew install poppler`).
|
|
98
|
+
|
|
99
|
+
---
|
|
100
|
+
|
|
101
|
+
## Quickstart
|
|
102
|
+
|
|
103
|
+
### 1. Initialize a memory workspace
|
|
104
|
+
|
|
105
|
+
```bash
|
|
106
|
+
memctl init
|
|
107
|
+
# Creates .memory/memory.db, .memory/config.yaml, .memory/.gitignore
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
Set the environment variable for convenience:
|
|
111
|
+
|
|
112
|
+
```bash
|
|
113
|
+
eval $(memctl init)
|
|
114
|
+
# Sets MEMCTL_DB=.memory/memory.db
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
### 2. Ingest files and recall
|
|
118
|
+
|
|
119
|
+
```bash
|
|
120
|
+
# Ingest source files + recall matching items → injection block on stdout
|
|
121
|
+
memctl push "authentication flow" --source src/auth/
|
|
122
|
+
|
|
123
|
+
# Ingest Office documents (requires memctl[docs])
|
|
124
|
+
memctl push "project status" --source reports/*.docx slides/*.pptx
|
|
125
|
+
|
|
126
|
+
# Ingest PDFs (requires pdftotext)
|
|
127
|
+
memctl push "specifications" --source specs/*.pdf
|
|
128
|
+
|
|
129
|
+
# Recall only (no ingestion)
|
|
130
|
+
memctl push "database schema"
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
### 3. Store LLM output
|
|
134
|
+
|
|
135
|
+
```bash
|
|
136
|
+
# Pipe LLM output into memory
|
|
137
|
+
echo "We chose JWT for stateless auth" | memctl pull --tags auth,decision --title "Auth decision"
|
|
138
|
+
|
|
139
|
+
# Or pipe from any LLM CLI
|
|
140
|
+
memctl push "API design" | llm "Analyze this" | memctl pull --tags api
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
### 4. Search
|
|
144
|
+
|
|
145
|
+
```bash
|
|
146
|
+
# Human-readable
|
|
147
|
+
memctl search "authentication"
|
|
148
|
+
|
|
149
|
+
# JSON for scripts
|
|
150
|
+
memctl search "database" --json -k 5
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
### 5. Inspect and manage
|
|
154
|
+
|
|
155
|
+
```bash
|
|
156
|
+
memctl show MEM-abc123def456 # Show item details
|
|
157
|
+
memctl stats # Store metrics
|
|
158
|
+
memctl stats --json # Machine-readable stats
|
|
159
|
+
memctl consolidate # Merge similar STM items
|
|
160
|
+
memctl consolidate --dry-run # Preview without writing
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
---
|
|
164
|
+
|
|
165
|
+
## CLI Reference
|
|
166
|
+
|
|
167
|
+
```
|
|
168
|
+
memctl <command> [options]
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
### Commands
|
|
172
|
+
|
|
173
|
+
| Command | Description |
|
|
174
|
+
|---------|-------------|
|
|
175
|
+
| `init [PATH]` | Initialize a memory workspace (default: `.memory`) |
|
|
176
|
+
| `push QUERY [--source ...]` | Ingest files + recall matching items to stdout |
|
|
177
|
+
| `pull [--tags T] [--title T]` | Read stdin, store as memory items |
|
|
178
|
+
| `search QUERY [-k N]` | FTS5 full-text search |
|
|
179
|
+
| `show ID` | Display a single memory item |
|
|
180
|
+
| `stats` | Store statistics |
|
|
181
|
+
| `consolidate [--dry-run]` | Deterministic merge of similar STM items |
|
|
182
|
+
| `serve` | Start MCP server (requires `memctl[mcp]`) |
|
|
183
|
+
|
|
184
|
+
### Global Flags
|
|
185
|
+
|
|
186
|
+
| Flag | Description |
|
|
187
|
+
|------|-------------|
|
|
188
|
+
| `--db PATH` | SQLite database path |
|
|
189
|
+
| `--json` | Machine-readable JSON output |
|
|
190
|
+
| `-q, --quiet` | Suppress stderr progress messages |
|
|
191
|
+
| `-v, --verbose` | Enable debug logging |
|
|
192
|
+
|
|
193
|
+
### Command Details
|
|
194
|
+
|
|
195
|
+
#### `memctl init`
|
|
196
|
+
|
|
197
|
+
```bash
|
|
198
|
+
memctl init [PATH] [--force] [--fts-tokenizer fr|en|raw]
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
Creates the workspace directory, SQLite database with schema, `config.yaml`, and `.gitignore`. Prints `export MEMCTL_DB="..."` to stdout for eval.
|
|
202
|
+
|
|
203
|
+
Idempotent: running twice on the same path exits 0 without error.
|
|
204
|
+
|
|
205
|
+
#### `memctl push`
|
|
206
|
+
|
|
207
|
+
```bash
|
|
208
|
+
memctl push QUERY [--source FILE ...] [--budget N] [--tier TIER] [--tags T] [--scope S]
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
Two-phase command:
|
|
212
|
+
1. **Ingest** (optional): processes `--source` files with SHA-256 dedup and paragraph chunking.
|
|
213
|
+
2. **Recall**: FTS5 search for QUERY, format matching items as an injection block on stdout.
|
|
214
|
+
|
|
215
|
+
stdout contains only the injection block (`format_version=1`). Progress goes to stderr.
|
|
216
|
+
|
|
217
|
+
#### `memctl pull`
|
|
218
|
+
|
|
219
|
+
```bash
|
|
220
|
+
echo "..." | memctl pull [--tags T] [--title T] [--scope S]
|
|
221
|
+
```
|
|
222
|
+
|
|
223
|
+
Reads text from stdin and stores it as memory items. Attempts structured proposal extraction first; falls back to single-note storage. All content passes through the policy engine before storage.
|
|
224
|
+
|
|
225
|
+
#### `memctl search`
|
|
226
|
+
|
|
227
|
+
```bash
|
|
228
|
+
memctl search QUERY [--tier TIER] [--type TYPE] [-k N] [--json]
|
|
229
|
+
```
|
|
230
|
+
|
|
231
|
+
FTS5 full-text search. Returns human-readable output by default, or JSON with `--json`.
|
|
232
|
+
|
|
233
|
+
#### `memctl consolidate`
|
|
234
|
+
|
|
235
|
+
```bash
|
|
236
|
+
memctl consolidate [--scope S] [--dry-run] [--json]
|
|
237
|
+
```
|
|
238
|
+
|
|
239
|
+
Deterministic consolidation: clusters STM items by type + tag overlap (Jaccard), merges each cluster (longest content wins), promotes to MTM. High-usage MTM items promote to LTM. No LLM calls.
|
|
240
|
+
|
|
241
|
+
---
|
|
242
|
+
|
|
243
|
+
## Environment Variables
|
|
244
|
+
|
|
245
|
+
| Variable | Default | Description |
|
|
246
|
+
|----------|---------|-------------|
|
|
247
|
+
| `MEMCTL_DB` | `.memory/memory.db` | Path to SQLite database |
|
|
248
|
+
| `MEMCTL_BUDGET` | `2200` | Token budget for injection blocks |
|
|
249
|
+
| `MEMCTL_FTS` | `fr` | FTS tokenizer preset (`fr`/`en`/`raw`) |
|
|
250
|
+
| `MEMCTL_TIER` | `stm` | Default write tier |
|
|
251
|
+
| `MEMCTL_SESSION` | *(unset)* | Session ID for audit provenance |
|
|
252
|
+
|
|
253
|
+
**Precedence:** `CLI --flag` > `MEMCTL_*` env var > compiled default. Always.
|
|
254
|
+
|
|
255
|
+
---
|
|
256
|
+
|
|
257
|
+
## Exit Codes
|
|
258
|
+
|
|
259
|
+
| Code | Meaning |
|
|
260
|
+
|------|---------|
|
|
261
|
+
| 0 | Success (including idempotent no-op) |
|
|
262
|
+
| 1 | Operational error (bad args, empty input, policy rejection) |
|
|
263
|
+
| 2 | Internal failure (unexpected exception, I/O error) |
|
|
264
|
+
|
|
265
|
+
---
|
|
266
|
+
|
|
267
|
+
## Shell Integration
|
|
268
|
+
|
|
269
|
+
Add to `.bashrc`, `.zshrc`, or your project's `env.sh`:
|
|
270
|
+
|
|
271
|
+
```bash
|
|
272
|
+
export MEMCTL_DB=.memory/memory.db
|
|
273
|
+
|
|
274
|
+
# Shortcuts
|
|
275
|
+
meminit() { memctl init "${1:-.memory}"; }
|
|
276
|
+
memq() { memctl push "$1"; } # recall only
|
|
277
|
+
memp() { memctl push "$1" ${2:+--source "$2"}; } # push with optional source
|
|
278
|
+
mempull() { memctl pull --tags "${1:-}" ${2:+--title "$2"}; }
|
|
279
|
+
```
|
|
280
|
+
|
|
281
|
+
### Pipe Recipes
|
|
282
|
+
|
|
283
|
+
```bash
|
|
284
|
+
# Ingest docs + recall + feed to LLM + store output
|
|
285
|
+
memctl push "API design" --source docs/ | llm "Summarize" | memctl pull --tags api
|
|
286
|
+
|
|
287
|
+
# Search and pipe to jq
|
|
288
|
+
memctl search "auth" --json | jq '.[].title'
|
|
289
|
+
|
|
290
|
+
# Batch ingest a directory
|
|
291
|
+
memctl push "project overview" --source src/ tests/ docs/ -q
|
|
292
|
+
|
|
293
|
+
# Export all items as JSONL
|
|
294
|
+
memctl search "" --json | jq -c '.[]'
|
|
295
|
+
```
|
|
296
|
+
|
|
297
|
+
---
|
|
298
|
+
|
|
299
|
+
## MCP Server
|
|
300
|
+
|
|
301
|
+
memctl exposes 7 MCP tools for integration with Claude Code, Claude Desktop, VS Code, and any MCP-compatible client.
|
|
302
|
+
|
|
303
|
+
### Start the Server
|
|
304
|
+
|
|
305
|
+
```bash
|
|
306
|
+
memctl serve --db .memory/memory.db
|
|
307
|
+
# or
|
|
308
|
+
python -m memctl.mcp.server --db .memory/memory.db
|
|
309
|
+
```
|
|
310
|
+
|
|
311
|
+
### Claude Code Integration
|
|
312
|
+
|
|
313
|
+
Add to `.claude/settings.json`:
|
|
314
|
+
|
|
315
|
+
```json
|
|
316
|
+
{
|
|
317
|
+
"mcpServers": {
|
|
318
|
+
"memctl": {
|
|
319
|
+
"command": "memctl",
|
|
320
|
+
"args": ["serve", "--db", ".memory/memory.db"]
|
|
321
|
+
}
|
|
322
|
+
}
|
|
323
|
+
}
|
|
324
|
+
```
|
|
325
|
+
|
|
326
|
+
### MCP Tools
|
|
327
|
+
|
|
328
|
+
| Tool | Description |
|
|
329
|
+
|------|-------------|
|
|
330
|
+
| `memory_recall` | Token-budgeted context injection (primary tool) |
|
|
331
|
+
| `memory_search` | Interactive FTS5 discovery |
|
|
332
|
+
| `memory_propose` | Store findings with policy governance |
|
|
333
|
+
| `memory_write` | Direct write (privileged/dev operations) |
|
|
334
|
+
| `memory_read` | Read items by ID |
|
|
335
|
+
| `memory_stats` | Store metrics |
|
|
336
|
+
| `memory_consolidate` | Trigger deterministic merge |
|
|
337
|
+
|
|
338
|
+
Tool names use the `memory_*` prefix for drop-in compatibility with RAGIX.
|
|
339
|
+
|
|
340
|
+
---
|
|
341
|
+
|
|
342
|
+
## How It Works
|
|
343
|
+
|
|
344
|
+
### Architecture
|
|
345
|
+
|
|
346
|
+
```
|
|
347
|
+
memctl/
|
|
348
|
+
├── types.py Data model (MemoryItem, MemoryProposal, MemoryEvent, MemoryLink)
|
|
349
|
+
├── store.py SQLite + FTS5 + WAL backend (9 tables + schema_meta)
|
|
350
|
+
├── extract.py Text extraction (text files + binary format dispatch)
|
|
351
|
+
├── ingest.py Paragraph chunking, SHA-256 dedup, source resolution
|
|
352
|
+
├── policy.py Write governance (30 patterns: secrets, injection, instructional)
|
|
353
|
+
├── config.py Dataclass configuration
|
|
354
|
+
├── cli.py 8 CLI commands
|
|
355
|
+
├── consolidate.py Deterministic merge (Jaccard clustering, no LLM)
|
|
356
|
+
├── proposer.py LLM output parsing (delimiter + regex)
|
|
357
|
+
└── mcp/
|
|
358
|
+
├── tools.py 7 MCP tools (memory_* prefix)
|
|
359
|
+
├── formatting.py Injection block format (format_version=1)
|
|
360
|
+
└── server.py FastMCP server entry point
|
|
361
|
+
```
|
|
362
|
+
|
|
363
|
+
14 source files. ~4,800 lines. Zero compiled dependencies for core.
|
|
364
|
+
|
|
365
|
+
### Memory Tiers
|
|
366
|
+
|
|
367
|
+
| Tier | Purpose | Lifecycle |
|
|
368
|
+
|------|---------|-----------|
|
|
369
|
+
| **STM** (Short-Term) | Recent observations, unverified facts | Created by `pull`. Consolidated or expired. |
|
|
370
|
+
| **MTM** (Medium-Term) | Verified, consolidated knowledge | Created by `consolidate`. Promoted by usage. |
|
|
371
|
+
| **LTM** (Long-Term) | Stable decisions, definitions, constraints | Promoted from MTM by usage count or type. |
|
|
372
|
+
|
|
373
|
+
### Policy Engine
|
|
374
|
+
|
|
375
|
+
Every write path passes through the policy engine. No exceptions.
|
|
376
|
+
|
|
377
|
+
**Hard blocks** (rejected):
|
|
378
|
+
- 10 secret detection patterns (API keys, tokens, passwords, private keys, JWTs)
|
|
379
|
+
- 8 injection patterns (prompt override, system prompt fragments)
|
|
380
|
+
- 8 instructional block patterns (tool invocation syntax, role fragments)
|
|
381
|
+
- Oversized content (>2000 chars for non-pointer types)
|
|
382
|
+
|
|
383
|
+
**Soft blocks** (quarantined to STM with expiry):
|
|
384
|
+
- 4 instructional quarantine patterns (imperative self-instructions)
|
|
385
|
+
- Missing provenance or justification
|
|
386
|
+
- Quarantined items stored with `injectable=False`
|
|
387
|
+
|
|
388
|
+
### FTS5 Tokenizer Presets
|
|
389
|
+
|
|
390
|
+
| Preset | Tokenizer | Use Case |
|
|
391
|
+
|--------|-----------|----------|
|
|
392
|
+
| `fr` | `unicode61 remove_diacritics 2` | French-safe default (accent normalization) |
|
|
393
|
+
| `en` | `porter unicode61 remove_diacritics 2` | English with Porter stemming |
|
|
394
|
+
| `raw` | `unicode61` | No diacritics removal, no stemming |
|
|
395
|
+
|
|
396
|
+
Expert override: `memctl init --fts-tokenizer "porter unicode61 remove_diacritics 2"`
|
|
397
|
+
|
|
398
|
+
### Supported Formats
|
|
399
|
+
|
|
400
|
+
| Category | Extensions | Requirement |
|
|
401
|
+
|----------|-----------|-------------|
|
|
402
|
+
| Text / Markup | `.md` `.txt` `.rst` `.csv` `.tsv` `.html` `.xml` `.json` `.yaml` `.toml` | None (stdlib) |
|
|
403
|
+
| Source Code | `.py` `.js` `.ts` `.jsx` `.tsx` `.java` `.go` `.rs` `.c` `.cpp` `.sh` `.sql` `.css` … | None (stdlib) |
|
|
404
|
+
| Office Documents | `.docx` `.odt` | `pip install memctl[docs]` |
|
|
405
|
+
| Presentations | `.pptx` `.odp` | `pip install memctl[docs]` |
|
|
406
|
+
| Spreadsheets | `.xlsx` `.ods` | `pip install memctl[docs]` |
|
|
407
|
+
| PDF | `.pdf` | `pdftotext` (poppler-utils) |
|
|
408
|
+
|
|
409
|
+
All formats are extracted to plain text before chunking and ingestion. Binary format libraries are lazy-imported — a missing library produces a clear `ImportError` with install instructions.
|
|
410
|
+
|
|
411
|
+
### Content Addressing
|
|
412
|
+
|
|
413
|
+
Every ingested file is hashed (SHA-256). Re-ingesting the same file is a no-op. Every memory item stores a `content_hash` for deduplication.
|
|
414
|
+
|
|
415
|
+
### Consolidation
|
|
416
|
+
|
|
417
|
+
Deterministic, no-LLM merge pipeline:
|
|
418
|
+
|
|
419
|
+
1. Collect non-archived STM items
|
|
420
|
+
2. Cluster by type + tag overlap (Jaccard similarity)
|
|
421
|
+
3. Merge each cluster: longest content wins; tie-break by earliest `created_at`, then lexicographic ID
|
|
422
|
+
4. Write merged items at MTM tier + `supersedes` links
|
|
423
|
+
5. Archive originals (`archived=True`)
|
|
424
|
+
6. Promote high-usage MTM items to LTM
|
|
425
|
+
|
|
426
|
+
---
|
|
427
|
+
|
|
428
|
+
## Database Schema
|
|
429
|
+
|
|
430
|
+
Single SQLite file with WAL mode. 9 tables + 1 FTS5 virtual table:
|
|
431
|
+
|
|
432
|
+
| Table | Purpose |
|
|
433
|
+
|-------|---------|
|
|
434
|
+
| `memory_items` | Core memory items (22 columns) |
|
|
435
|
+
| `memory_revisions` | Immutable revision history |
|
|
436
|
+
| `memory_events` | Audit log (every read/write/consolidate) |
|
|
437
|
+
| `memory_links` | Directional relationships (supersedes, supports, etc.) |
|
|
438
|
+
| `memory_embeddings` | Reserved for RAGIX (empty in memctl) |
|
|
439
|
+
| `corpus_hashes` | SHA-256 file dedup registry |
|
|
440
|
+
| `corpus_metadata` | Corpus-level metadata |
|
|
441
|
+
| `schema_meta` | Schema version, creation info |
|
|
442
|
+
| `memory_palace_locations` | Reserved for RAGIX |
|
|
443
|
+
| `memory_items_fts` | FTS5 virtual table for full-text search |
|
|
444
|
+
|
|
445
|
+
Schema version is tracked in `schema_meta`. Current: `SCHEMA_VERSION=1`.
|
|
446
|
+
|
|
447
|
+
---
|
|
448
|
+
|
|
449
|
+
## Migration to RAGIX
|
|
450
|
+
|
|
451
|
+
memctl is extracted from [RAGIX](https://github.com/ovitrac/RAGIX) and maintains schema-identical databases. To upgrade:
|
|
452
|
+
|
|
453
|
+
```bash
|
|
454
|
+
pip install ragix[all]
|
|
455
|
+
# Point at the same database — all items carry over
|
|
456
|
+
ragix memory stats --db .memory/memory.db
|
|
457
|
+
```
|
|
458
|
+
|
|
459
|
+
| Feature | memctl | RAGIX |
|
|
460
|
+
|---------|--------|-------|
|
|
461
|
+
| SQLite schema | Identical | Identical |
|
|
462
|
+
| Injection format | `format_version=1` | `format_version=1` |
|
|
463
|
+
| MCP tool names | `memory_*` | `memory_*` |
|
|
464
|
+
| FTS5 recall | Yes | Yes (+ hybrid embeddings) |
|
|
465
|
+
| Embeddings | No | Yes (FAISS + Ollama) |
|
|
466
|
+
| LLM-assisted merge | No | Yes |
|
|
467
|
+
| Graph-RAG | No | Yes |
|
|
468
|
+
| Reporting | No | Yes |
|
|
469
|
+
|
|
470
|
+
---
|
|
471
|
+
|
|
472
|
+
## Python API
|
|
473
|
+
|
|
474
|
+
```python
|
|
475
|
+
from memctl import MemoryStore, MemoryItem, MemoryPolicy
|
|
476
|
+
|
|
477
|
+
# Open or create a store
|
|
478
|
+
store = MemoryStore(db_path=".memory/memory.db")
|
|
479
|
+
|
|
480
|
+
# Write an item
|
|
481
|
+
item = MemoryItem(
|
|
482
|
+
title="Architecture decision",
|
|
483
|
+
content="We chose event sourcing for state management",
|
|
484
|
+
tier="stm",
|
|
485
|
+
type="decision",
|
|
486
|
+
tags=["architecture", "event-sourcing"],
|
|
487
|
+
)
|
|
488
|
+
store.write_item(item, reason="manual")
|
|
489
|
+
|
|
490
|
+
# Search
|
|
491
|
+
results = store.search_fulltext("event sourcing", limit=10)
|
|
492
|
+
for r in results:
|
|
493
|
+
print(f"[{r.tier}] {r.title}: {r.content[:80]}")
|
|
494
|
+
|
|
495
|
+
# Policy check
|
|
496
|
+
policy = MemoryPolicy()
|
|
497
|
+
from memctl.types import MemoryProposal
|
|
498
|
+
proposal = MemoryProposal(
|
|
499
|
+
title="Config", content="Some content",
|
|
500
|
+
why_store="Important finding",
|
|
501
|
+
provenance_hint={"source_kind": "doc", "source_id": "design.md"},
|
|
502
|
+
)
|
|
503
|
+
verdict = policy.evaluate_proposal(proposal)
|
|
504
|
+
print(verdict.action) # "accept", "quarantine", or "reject"
|
|
505
|
+
|
|
506
|
+
store.close()
|
|
507
|
+
```
|
|
508
|
+
|
|
509
|
+
---
|
|
510
|
+
|
|
511
|
+
## Testing
|
|
512
|
+
|
|
513
|
+
```bash
|
|
514
|
+
pip install memctl[dev]
|
|
515
|
+
pytest tests/ -v
|
|
516
|
+
```
|
|
517
|
+
|
|
518
|
+
210 tests covering types, store, policy, ingest, text extraction, forward compatibility, contracts, CLI (subprocess), and pipe composition.
|
|
519
|
+
|
|
520
|
+
---
|
|
521
|
+
|
|
522
|
+
## License
|
|
523
|
+
|
|
524
|
+
MIT License. See [LICENSE](LICENSE) for details.
|
|
525
|
+
|
|
526
|
+
---
|
|
527
|
+
|
|
528
|
+
**Author:** Olivier Vitrac, PhD, HDR | [olivier.vitrac@adservio.fr](mailto:olivier.vitrac@adservio.fr) | Adservio Innovation Lab
|