routing-memory 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024-2026 Ahmet Yigit Sertel
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,12 @@
1
+ include LICENSE
2
+ include README.md
3
+ include pyproject.toml
4
+
5
+ recursive-include rm *.py py.typed
6
+ recursive-exclude experiments *
7
+ recursive-exclude tests *
8
+ recursive-exclude *.egg-info *
9
+
10
+ global-exclude __pycache__
11
+ global-exclude *.py[cod]
12
+ global-exclude .DS_Store
@@ -0,0 +1,212 @@
1
+ Metadata-Version: 2.4
2
+ Name: routing-memory
3
+ Version: 0.1.0
4
+ Summary: Lightweight long-term memory for LLM agents via vector-quantized routing
5
+ Author: Ahmet Yigit Sertel
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/AhmetYSertel/routing-memory
8
+ Project-URL: Repository, https://github.com/AhmetYSertel/routing-memory
9
+ Project-URL: Issues, https://github.com/AhmetYSertel/routing-memory/issues
10
+ Keywords: llm,memory,retrieval,vector-quantization,agent,rag
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: Intended Audience :: Science/Research
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Operating System :: OS Independent
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.9
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
22
+ Requires-Python: >=3.9
23
+ Description-Content-Type: text/markdown
24
+ License-File: LICENSE
25
+ Requires-Dist: numpy>=1.24
26
+ Requires-Dist: scikit-learn>=1.3
27
+ Requires-Dist: sentence-transformers>=2.2
28
+ Dynamic: license-file
29
+
30
+ # Routing Memory (RM)
31
+
32
+ **Lightweight long-term memory for LLM agents via vector-quantized routing.**
33
+
34
+ RM replaces brute-force dense retrieval with a VQ codebook that compresses N items into K centroid buckets. Queries probe only the top-n centroids, rerank by dot product, and return results — achieving **768x per-item compression** and **99%+ recall** at a fraction of the latency and memory cost.
35
+
36
+ ```
37
+ pip install routing-memory
38
+ ```
39
+
40
+ ## Quick Start
41
+
42
+ ```python
43
+ from rm import RoutingMemory
44
+
45
+ memory = RoutingMemory()
46
+
47
+ # Store memories
48
+ memory.add("User prefers dark mode for all applications")
49
+ memory.add("Meeting with Alice scheduled for March 15 at 2pm")
50
+ memory.add("Project deadline is end of Q1 2026")
51
+
52
+ # Search
53
+ results = memory.search("what are the user's UI preferences?", top_k=3)
54
+ for r in results:
55
+ print(f" [{r['score']:.3f}] {r['text']}")
56
+ ```
57
+
58
+ ## Features
59
+
60
+ | Feature | Description |
61
+ |---------|-------------|
62
+ | **VQ Codebook** | MiniBatchKMeans clustering with adaptive K = ceil(N/B_target) |
63
+ | **Multi-probe retrieval** | Query top-n centroids, collect candidates, rerank by dot product |
64
+ | **Score filtering** | Threshold-based filtering saves tokens by dropping low-relevance results |
65
+ | **Drift detection** | Rolling qerr monitoring with automatic alarm when distribution shifts |
66
+ | **Online adaptation** | EMA centroid updates, bucket splits, idle centroid pruning |
67
+ | **Persistence** | SQLite backend for durable storage across sessions |
68
+ | **Pluggable backends** | Swap embedding models or storage engines via clean interfaces |
69
+
70
+ ## Architecture
71
+
72
+ ```
73
+ Query ──> Encode ──> Top-n Centroids ──> Collect Candidates ──> Dot-Product Rerank ──> Filter ──> Results
74
+ | |
75
+ VQ Codebook Score Threshold
76
+ (K centroids) (tau >= 0.3)
77
+ ```
78
+
79
+ **Compression**: Each item needs only a 2-byte centroid assignment vs 384x4 = 1536 bytes for dense fp32. That's **768x compression**.
80
+
81
+ **Recall**: With n=4 probes on 5K items: R@5 = 0.9916 (99.2% of dense baseline).
82
+
83
+ ## API Reference
84
+
85
+ ### `RoutingMemory`
86
+
87
+ ```python
88
+ RoutingMemory(
89
+ db_path="rm_memory.db", # SQLite path (None for in-memory)
90
+ embedding_model="all-MiniLM-L6-v2", # any sentence-transformers model
91
+ n_probes=3, # centroids to probe per query
92
+ score_threshold=0.3, # minimum retrieval score
93
+ seed=42, # random seed
94
+ )
95
+ ```
96
+
97
+ **Methods:**
98
+
99
+ | Method | Description |
100
+ |--------|-------------|
101
+ | `add(text, item_id=None, metadata=None)` | Store a memory item, returns item ID |
102
+ | `search(query, top_k=5, threshold=None)` | Semantic search, returns list of dicts |
103
+ | `search_with_signals(query, top_k=5)` | Search with routing signals (confidence, margin, qerr) |
104
+ | `stats()` | Memory statistics (item count, K, compression, drift) |
105
+ | `codebook_info()` | Codebook details (K, dim, Gini, dead codes) |
106
+ | `save()` | Persist codebook state |
107
+ | `close()` | Close storage connection |
108
+
109
+ ### Low-level Components
110
+
111
+ ```python
112
+ from rm import Codebook, L1Retriever, DriftMonitor
113
+
114
+ # Direct codebook access
115
+ cb = Codebook(dim=384, seed=42)
116
+ cb.fit(embeddings, item_ids)
117
+ centroid_id, qerr = cb.encode(query_embedding)
118
+ conf = cb.conf(query_embedding)
119
+ margin = cb.margin(query_embedding)
120
+
121
+ # Retriever
122
+ retriever = L1Retriever(cb, n_probes=4, top_k=10, score_threshold=0.3)
123
+ result = retriever.query(query_embedding) # returns L1Result
124
+
125
+ # Drift monitor
126
+ monitor = DriftMonitor()
127
+ alarm = monitor.record(qerr, margin) # returns DriftAlarm or None
128
+ ```
129
+
130
+ ## Experiment Suite
131
+
132
+ RM ships with 13 reproducible experiments (7 hypothesis tests + 6 application benchmarks).
133
+
134
+ ```bash
135
+ # Run all experiments
136
+ python -m rm.experiments.run_all
137
+
138
+ # Run specific experiments
139
+ python -m rm.experiments.run_all --select H1 H2 A4
140
+ ```
141
+
142
+ ### Results Summary
143
+
144
+ | Exp | Name | Key Metric | Result |
145
+ |-----|------|-----------|--------|
146
+ | H1 | Codebook Fundamentals | Fidelity@K=64 | 0.7415 |
147
+ | H2 | Retrieval Quality | R@5 (n=4 probes) | 0.9916 |
148
+ | H3 | Score-Based Filtering | Savings@tau=0.7 | 59.1% (R@5=0.958) |
149
+ | H4 | Adaptive K Heuristics | Best heuristic | sqrtN (lowest Gini) |
150
+ | H5 | Drift Detection | Alarm latency | 11 episodes |
151
+ | H6 | Multi-Encoder Robustness | RM/Dense ratio spread | 0.0036 |
152
+ | H7 | Storage & Latency | Per-item compression | 768x vs fp32 |
153
+ | A1 | MS-MARCO Passage Retrieval | R@5 (n=4) | 0.9585 |
154
+ | A2 | LoCoMo Conversational Memory | R@5 | 0.9934 |
155
+ | A3 | Enrichment Generalization | Delta RM | +0.053 |
156
+ | A4 | Million-Scale (1M items) | R@5 | 0.8556 |
157
+ | A5 | Pareto Frontier | RM dominant at n>=4 | 91.8% R@5 @ 2.8ms |
158
+ | A6 | Bucket Imbalance | Gini (100K, K=256) | 0.3628 |
159
+
160
+ All experiments use real embeddings (all-MiniLM-L6-v2, d=384) and seed=42.
161
+
162
+ ## Project Structure
163
+
164
+ ```
165
+ rm/
166
+ rm/ # Core package
167
+ __init__.py
168
+ codebook.py # VQ codebook (MiniBatchKMeans, adaptive K)
169
+ retrieval.py # Multi-probe retrieval with dot-product rerank
170
+ filtering.py # Score-based result filtering
171
+ drift.py # Distribution drift detection
172
+ memory.py # RoutingMemory high-level API
173
+ embeddings/ # Pluggable embedding backends
174
+ base.py # Abstract interface
175
+ local.py # sentence-transformers wrapper
176
+ storage/ # Pluggable storage backends
177
+ base.py # Abstract interface
178
+ sqlite.py # SQLite persistence
179
+ experiments/ # 13 reproducible experiments
180
+ run_all.py # Experiment runner (--select support)
181
+ shared/ # Data generation, plotting utilities
182
+ h1_codebook/ .. h7_storage/ # Hypothesis tests
183
+ a1_msmarco/ .. a6_imbalance/ # Application benchmarks
184
+ tests/ # pytest test suite
185
+ pyproject.toml # Package configuration
186
+ LICENSE # MIT
187
+ ```
188
+
189
+ ## Development
190
+
191
+ ```bash
192
+ git clone https://github.com/AhmetYSertel/routing-memory.git
193
+ cd routing-memory
194
+ pip install -e ".[dev]"
195
+ pytest tests/ -v
196
+ ```
197
+
198
+ ## Citation
199
+
200
+ If you use RM in your research, please cite the HGA paper:
201
+
202
+ ```bibtex
203
+ @article{sertel2026hga,
204
+ title={Hybrid Governance Architecture: Structured Memory and Adaptive Routing for LLM Agents},
205
+ author={Sertel, Ahmet Yigit},
206
+ year={2026}
207
+ }
208
+ ```
209
+
210
+ ## License
211
+
212
+ MIT
@@ -0,0 +1,183 @@
1
+ # Routing Memory (RM)
2
+
3
+ **Lightweight long-term memory for LLM agents via vector-quantized routing.**
4
+
5
+ RM replaces brute-force dense retrieval with a VQ codebook that compresses N items into K centroid buckets. Queries probe only the top-n centroids, rerank by dot product, and return results — achieving **768x per-item compression** and **99%+ recall** at a fraction of the latency and memory cost.
6
+
7
+ ```
8
+ pip install routing-memory
9
+ ```
10
+
11
+ ## Quick Start
12
+
13
+ ```python
14
+ from rm import RoutingMemory
15
+
16
+ memory = RoutingMemory()
17
+
18
+ # Store memories
19
+ memory.add("User prefers dark mode for all applications")
20
+ memory.add("Meeting with Alice scheduled for March 15 at 2pm")
21
+ memory.add("Project deadline is end of Q1 2026")
22
+
23
+ # Search
24
+ results = memory.search("what are the user's UI preferences?", top_k=3)
25
+ for r in results:
26
+ print(f" [{r['score']:.3f}] {r['text']}")
27
+ ```
28
+
29
+ ## Features
30
+
31
+ | Feature | Description |
32
+ |---------|-------------|
33
+ | **VQ Codebook** | MiniBatchKMeans clustering with adaptive K = ceil(N/B_target) |
34
+ | **Multi-probe retrieval** | Query top-n centroids, collect candidates, rerank by dot product |
35
+ | **Score filtering** | Threshold-based filtering saves tokens by dropping low-relevance results |
36
+ | **Drift detection** | Rolling qerr monitoring with automatic alarm when distribution shifts |
37
+ | **Online adaptation** | EMA centroid updates, bucket splits, idle centroid pruning |
38
+ | **Persistence** | SQLite backend for durable storage across sessions |
39
+ | **Pluggable backends** | Swap embedding models or storage engines via clean interfaces |
40
+
41
+ ## Architecture
42
+
43
+ ```
44
+ Query ──> Encode ──> Top-n Centroids ──> Collect Candidates ──> Dot-Product Rerank ──> Filter ──> Results
45
+ | |
46
+ VQ Codebook Score Threshold
47
+ (K centroids) (tau >= 0.3)
48
+ ```
49
+
50
+ **Compression**: Each item needs only a 2-byte centroid assignment vs 384x4 = 1536 bytes for dense fp32. That's **768x compression**.
51
+
52
+ **Recall**: With n=4 probes on 5K items: R@5 = 0.9916 (99.2% of dense baseline).
53
+
54
+ ## API Reference
55
+
56
+ ### `RoutingMemory`
57
+
58
+ ```python
59
+ RoutingMemory(
60
+ db_path="rm_memory.db", # SQLite path (None for in-memory)
61
+ embedding_model="all-MiniLM-L6-v2", # any sentence-transformers model
62
+ n_probes=3, # centroids to probe per query
63
+ score_threshold=0.3, # minimum retrieval score
64
+ seed=42, # random seed
65
+ )
66
+ ```
67
+
68
+ **Methods:**
69
+
70
+ | Method | Description |
71
+ |--------|-------------|
72
+ | `add(text, item_id=None, metadata=None)` | Store a memory item, returns item ID |
73
+ | `search(query, top_k=5, threshold=None)` | Semantic search, returns list of dicts |
74
+ | `search_with_signals(query, top_k=5)` | Search with routing signals (confidence, margin, qerr) |
75
+ | `stats()` | Memory statistics (item count, K, compression, drift) |
76
+ | `codebook_info()` | Codebook details (K, dim, Gini, dead codes) |
77
+ | `save()` | Persist codebook state |
78
+ | `close()` | Close storage connection |
79
+
80
+ ### Low-level Components
81
+
82
+ ```python
83
+ from rm import Codebook, L1Retriever, DriftMonitor
84
+
85
+ # Direct codebook access
86
+ cb = Codebook(dim=384, seed=42)
87
+ cb.fit(embeddings, item_ids)
88
+ centroid_id, qerr = cb.encode(query_embedding)
89
+ conf = cb.conf(query_embedding)
90
+ margin = cb.margin(query_embedding)
91
+
92
+ # Retriever
93
+ retriever = L1Retriever(cb, n_probes=4, top_k=10, score_threshold=0.3)
94
+ result = retriever.query(query_embedding) # returns L1Result
95
+
96
+ # Drift monitor
97
+ monitor = DriftMonitor()
98
+ alarm = monitor.record(qerr, margin) # returns DriftAlarm or None
99
+ ```
100
+
101
+ ## Experiment Suite
102
+
103
+ RM ships with 13 reproducible experiments (7 hypothesis tests + 6 application benchmarks).
104
+
105
+ ```bash
106
+ # Run all experiments
107
+ python -m rm.experiments.run_all
108
+
109
+ # Run specific experiments
110
+ python -m rm.experiments.run_all --select H1 H2 A4
111
+ ```
112
+
113
+ ### Results Summary
114
+
115
+ | Exp | Name | Key Metric | Result |
116
+ |-----|------|-----------|--------|
117
+ | H1 | Codebook Fundamentals | Fidelity@K=64 | 0.7415 |
118
+ | H2 | Retrieval Quality | R@5 (n=4 probes) | 0.9916 |
119
+ | H3 | Score-Based Filtering | Savings@tau=0.7 | 59.1% (R@5=0.958) |
120
+ | H4 | Adaptive K Heuristics | Best heuristic | sqrtN (lowest Gini) |
121
+ | H5 | Drift Detection | Alarm latency | 11 episodes |
122
+ | H6 | Multi-Encoder Robustness | RM/Dense ratio spread | 0.0036 |
123
+ | H7 | Storage & Latency | Per-item compression | 768x vs fp32 |
124
+ | A1 | MS-MARCO Passage Retrieval | R@5 (n=4) | 0.9585 |
125
+ | A2 | LoCoMo Conversational Memory | R@5 | 0.9934 |
126
+ | A3 | Enrichment Generalization | Delta RM | +0.053 |
127
+ | A4 | Million-Scale (1M items) | R@5 | 0.8556 |
128
+ | A5 | Pareto Frontier | RM dominant at n>=4 | 91.8% R@5 @ 2.8ms |
129
+ | A6 | Bucket Imbalance | Gini (100K, K=256) | 0.3628 |
130
+
131
+ All experiments use real embeddings (all-MiniLM-L6-v2, d=384) and seed=42.
132
+
133
+ ## Project Structure
134
+
135
+ ```
136
+ rm/
137
+ rm/ # Core package
138
+ __init__.py
139
+ codebook.py # VQ codebook (MiniBatchKMeans, adaptive K)
140
+ retrieval.py # Multi-probe retrieval with dot-product rerank
141
+ filtering.py # Score-based result filtering
142
+ drift.py # Distribution drift detection
143
+ memory.py # RoutingMemory high-level API
144
+ embeddings/ # Pluggable embedding backends
145
+ base.py # Abstract interface
146
+ local.py # sentence-transformers wrapper
147
+ storage/ # Pluggable storage backends
148
+ base.py # Abstract interface
149
+ sqlite.py # SQLite persistence
150
+ experiments/ # 13 reproducible experiments
151
+ run_all.py # Experiment runner (--select support)
152
+ shared/ # Data generation, plotting utilities
153
+ h1_codebook/ .. h7_storage/ # Hypothesis tests
154
+ a1_msmarco/ .. a6_imbalance/ # Application benchmarks
155
+ tests/ # pytest test suite
156
+ pyproject.toml # Package configuration
157
+ LICENSE # MIT
158
+ ```
159
+
160
+ ## Development
161
+
162
+ ```bash
163
+ git clone https://github.com/AhmetYSertel/routing-memory.git
164
+ cd routing-memory
165
+ pip install -e ".[dev]"
166
+ pytest tests/ -v
167
+ ```
168
+
169
+ ## Citation
170
+
171
+ If you use RM in your research, please cite the HGA paper:
172
+
173
+ ```bibtex
174
+ @article{sertel2026hga,
175
+ title={Hybrid Governance Architecture: Structured Memory and Adaptive Routing for LLM Agents},
176
+ author={Sertel, Ahmet Yigit},
177
+ year={2026}
178
+ }
179
+ ```
180
+
181
+ ## License
182
+
183
+ MIT
@@ -0,0 +1,42 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68.0", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "routing-memory"
7
+ version = "0.1.0"
8
+ description = "Lightweight long-term memory for LLM agents via vector-quantized routing"
9
+ readme = "README.md"
10
+ license = {text = "MIT"}
11
+ requires-python = ">=3.9"
12
+ authors = [{name = "Ahmet Yigit Sertel"}]
13
+ keywords = ["llm", "memory", "retrieval", "vector-quantization", "agent", "rag"]
14
+ classifiers = [
15
+ "Development Status :: 4 - Beta",
16
+ "Intended Audience :: Developers",
17
+ "Intended Audience :: Science/Research",
18
+ "License :: OSI Approved :: MIT License",
19
+ "Operating System :: OS Independent",
20
+ "Programming Language :: Python :: 3",
21
+ "Programming Language :: Python :: 3.9",
22
+ "Programming Language :: Python :: 3.10",
23
+ "Programming Language :: Python :: 3.11",
24
+ "Programming Language :: Python :: 3.12",
25
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
26
+ ]
27
+ dependencies = [
28
+ "numpy>=1.24",
29
+ "scikit-learn>=1.3",
30
+ "sentence-transformers>=2.2",
31
+ ]
32
+
33
+ [project.urls]
34
+ Homepage = "https://github.com/AhmetYSertel/routing-memory"
35
+ Repository = "https://github.com/AhmetYSertel/routing-memory"
36
+ Issues = "https://github.com/AhmetYSertel/routing-memory/issues"
37
+
38
+ [tool.setuptools.packages.find]
39
+ include = ["rm*"]
40
+
41
+ [tool.setuptools.package-data]
42
+ rm = ["py.typed"]
@@ -0,0 +1,28 @@
1
+ """Routing Memory — Lightweight long-term memory via vector-quantized routing."""
2
+
3
+ from .memory import RoutingMemory
4
+ from .codebook import Codebook, CentroidBucket
5
+ from .retrieval import L1Retriever, L1Result, RetrievalResult
6
+ from .filtering import filter_by_score, filter_top_n
7
+ from .drift import DriftMonitor, DriftAlarm
8
+ from .embeddings import LocalEmbeddings, EmbeddingBackend
9
+ from .storage import RMSQLiteBackend, RMStorageBackend
10
+
11
+ __all__ = [
12
+ "RoutingMemory",
13
+ "Codebook",
14
+ "CentroidBucket",
15
+ "L1Retriever",
16
+ "L1Result",
17
+ "RetrievalResult",
18
+ "filter_by_score",
19
+ "filter_top_n",
20
+ "DriftMonitor",
21
+ "DriftAlarm",
22
+ "LocalEmbeddings",
23
+ "EmbeddingBackend",
24
+ "RMSQLiteBackend",
25
+ "RMStorageBackend",
26
+ ]
27
+
28
+ __version__ = "0.1.0"