PyPI - routing-memory - Versions diffs - 0.1.0__tar.gz - Mend

routing-memory 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

routing_memory-0.1.0/LICENSE +21 -0
routing_memory-0.1.0/MANIFEST.in +12 -0
routing_memory-0.1.0/PKG-INFO +212 -0
routing_memory-0.1.0/README.md +183 -0
routing_memory-0.1.0/pyproject.toml +42 -0
routing_memory-0.1.0/rm/__init__.py +28 -0
routing_memory-0.1.0/rm/codebook.py +337 -0
routing_memory-0.1.0/rm/drift.py +145 -0
routing_memory-0.1.0/rm/embeddings/__init__.py +4 -0
routing_memory-0.1.0/rm/embeddings/base.py +22 -0
routing_memory-0.1.0/rm/embeddings/local.py +56 -0
routing_memory-0.1.0/rm/filtering.py +15 -0
routing_memory-0.1.0/rm/memory.py +351 -0
routing_memory-0.1.0/rm/py.typed +0 -0
routing_memory-0.1.0/rm/retrieval.py +90 -0
routing_memory-0.1.0/rm/storage/__init__.py +4 -0
routing_memory-0.1.0/rm/storage/base.py +46 -0
routing_memory-0.1.0/rm/storage/sqlite.py +122 -0
routing_memory-0.1.0/routing_memory.egg-info/SOURCES.txt +17 -0
routing_memory-0.1.0/setup.cfg +4 -0

routing_memory-0.1.0/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2024-2026 Ahmet Yigit Sertel
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

routing_memory-0.1.0/MANIFEST.in ADDED Viewed

@@ -0,0 +1,12 @@
+include LICENSE
+include README.md
+include pyproject.toml
+recursive-include rm *.py py.typed
+recursive-exclude experiments *
+recursive-exclude tests *
+recursive-exclude *.egg-info *
+global-exclude __pycache__
+global-exclude *.py[cod]
+global-exclude .DS_Store

routing_memory-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,212 @@
+Metadata-Version: 2.4
+Name: routing-memory
+Version: 0.1.0
+Summary: Lightweight long-term memory for LLM agents via vector-quantized routing
+Author: Ahmet Yigit Sertel
+License: MIT
+Project-URL: Homepage, https://github.com/AhmetYSertel/routing-memory
+Project-URL: Repository, https://github.com/AhmetYSertel/routing-memory
+Project-URL: Issues, https://github.com/AhmetYSertel/routing-memory/issues
+Keywords: llm,memory,retrieval,vector-quantization,agent,rag
+Classifier: Development Status :: 4 - Beta
+Classifier: Intended Audience :: Developers
+Classifier: Intended Audience :: Science/Research
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
+Requires-Python: >=3.9
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: numpy>=1.24
+Requires-Dist: scikit-learn>=1.3
+Requires-Dist: sentence-transformers>=2.2
+Dynamic: license-file
+# Routing Memory (RM)
+**Lightweight long-term memory for LLM agents via vector-quantized routing.**
+RM replaces brute-force dense retrieval with a VQ codebook that compresses N items into K centroid buckets. Queries probe only the top-n centroids, rerank by dot product, and return results — achieving **768x per-item compression** and **99%+ recall** at a fraction of the latency and memory cost.
+```
+pip install routing-memory
+```
+## Quick Start
+```python
+from rm import RoutingMemory
+memory = RoutingMemory()
+# Store memories
+memory.add("User prefers dark mode for all applications")
+memory.add("Meeting with Alice scheduled for March 15 at 2pm")
+memory.add("Project deadline is end of Q1 2026")
+# Search
+results = memory.search("what are the user's UI preferences?", top_k=3)
+for r in results:
+    print(f"  [{r['score']:.3f}] {r['text']}")
+```
+## Features
+| Feature | Description |
+|---------|-------------|
+| **VQ Codebook** | MiniBatchKMeans clustering with adaptive K = ceil(N/B_target) |
+| **Multi-probe retrieval** | Query top-n centroids, collect candidates, rerank by dot product |
+| **Score filtering** | Threshold-based filtering saves tokens by dropping low-relevance results |
+| **Drift detection** | Rolling qerr monitoring with automatic alarm when distribution shifts |
+| **Online adaptation** | EMA centroid updates, bucket splits, idle centroid pruning |
+| **Persistence** | SQLite backend for durable storage across sessions |
+| **Pluggable backends** | Swap embedding models or storage engines via clean interfaces |
+## Architecture
+```
+Query ──> Encode ──> Top-n Centroids ──> Collect Candidates ──> Dot-Product Rerank ──> Filter ──> Results
+                          |                                           |
+                     VQ Codebook                               Score Threshold
+                     (K centroids)                               (tau >= 0.3)
+```
+**Compression**: Each item needs only a 2-byte centroid assignment vs 384x4 = 1536 bytes for dense fp32. That's **768x compression**.
+**Recall**: With n=4 probes on 5K items: R@5 = 0.9916 (99.2% of dense baseline).
+## API Reference
+### `RoutingMemory`
+```python
+RoutingMemory(
+    db_path="rm_memory.db",      # SQLite path (None for in-memory)
+    embedding_model="all-MiniLM-L6-v2",  # any sentence-transformers model
+    n_probes=3,                  # centroids to probe per query
+    score_threshold=0.3,         # minimum retrieval score
+    seed=42,                     # random seed
+)
+```
+**Methods:**
+| Method | Description |
+|--------|-------------|
+| `add(text, item_id=None, metadata=None)` | Store a memory item, returns item ID |
+| `search(query, top_k=5, threshold=None)` | Semantic search, returns list of dicts |
+| `search_with_signals(query, top_k=5)` | Search with routing signals (confidence, margin, qerr) |
+| `stats()` | Memory statistics (item count, K, compression, drift) |
+| `codebook_info()` | Codebook details (K, dim, Gini, dead codes) |
+| `save()` | Persist codebook state |
+| `close()` | Close storage connection |
+### Low-level Components
+```python
+from rm import Codebook, L1Retriever, DriftMonitor
+# Direct codebook access
+cb = Codebook(dim=384, seed=42)
+cb.fit(embeddings, item_ids)
+centroid_id, qerr = cb.encode(query_embedding)
+conf = cb.conf(query_embedding)
+margin = cb.margin(query_embedding)
+# Retriever
+retriever = L1Retriever(cb, n_probes=4, top_k=10, score_threshold=0.3)
+result = retriever.query(query_embedding)  # returns L1Result
+# Drift monitor
+monitor = DriftMonitor()
+alarm = monitor.record(qerr, margin)  # returns DriftAlarm or None
+```
+## Experiment Suite
+RM ships with 13 reproducible experiments (7 hypothesis tests + 6 application benchmarks).
+```bash
+# Run all experiments
+python -m rm.experiments.run_all
+# Run specific experiments
+python -m rm.experiments.run_all --select H1 H2 A4
+```
+### Results Summary
+| Exp | Name | Key Metric | Result |
+|-----|------|-----------|--------|
+| H1 | Codebook Fundamentals | Fidelity@K=64 | 0.7415 |
+| H2 | Retrieval Quality | R@5 (n=4 probes) | 0.9916 |
+| H3 | Score-Based Filtering | Savings@tau=0.7 | 59.1% (R@5=0.958) |
+| H4 | Adaptive K Heuristics | Best heuristic | sqrtN (lowest Gini) |
+| H5 | Drift Detection | Alarm latency | 11 episodes |
+| H6 | Multi-Encoder Robustness | RM/Dense ratio spread | 0.0036 |
+| H7 | Storage & Latency | Per-item compression | 768x vs fp32 |
+| A1 | MS-MARCO Passage Retrieval | R@5 (n=4) | 0.9585 |
+| A2 | LoCoMo Conversational Memory | R@5 | 0.9934 |
+| A3 | Enrichment Generalization | Delta RM | +0.053 |
+| A4 | Million-Scale (1M items) | R@5 | 0.8556 |
+| A5 | Pareto Frontier | RM dominant at n>=4 | 91.8% R@5 @ 2.8ms |
+| A6 | Bucket Imbalance | Gini (100K, K=256) | 0.3628 |
+All experiments use real embeddings (all-MiniLM-L6-v2, d=384) and seed=42.
+## Project Structure
+```
+rm/
+  rm/                     # Core package
+    __init__.py
+    codebook.py           # VQ codebook (MiniBatchKMeans, adaptive K)
+    retrieval.py          # Multi-probe retrieval with dot-product rerank
+    filtering.py          # Score-based result filtering
+    drift.py              # Distribution drift detection
+    memory.py             # RoutingMemory high-level API
+    embeddings/           # Pluggable embedding backends
+      base.py             # Abstract interface
+      local.py            # sentence-transformers wrapper
+    storage/              # Pluggable storage backends
+      base.py             # Abstract interface
+      sqlite.py           # SQLite persistence
+  experiments/            # 13 reproducible experiments
+    run_all.py            # Experiment runner (--select support)
+    shared/               # Data generation, plotting utilities
+    h1_codebook/ .. h7_storage/   # Hypothesis tests
+    a1_msmarco/ .. a6_imbalance/  # Application benchmarks
+  tests/                  # pytest test suite
+  pyproject.toml          # Package configuration
+  LICENSE                 # MIT
+```
+## Development
+```bash
+git clone https://github.com/AhmetYSertel/routing-memory.git
+cd routing-memory
+pip install -e ".[dev]"
+pytest tests/ -v
+```
+## Citation
+If you use RM in your research, please cite the HGA paper:
+```bibtex
+@article{sertel2026hga,
+  title={Hybrid Governance Architecture: Structured Memory and Adaptive Routing for LLM Agents},
+  author={Sertel, Ahmet Yigit},
+  year={2026}
+}
+```
+## License
+MIT

routing_memory-0.1.0/README.md ADDED Viewed

@@ -0,0 +1,183 @@
+# Routing Memory (RM)
+**Lightweight long-term memory for LLM agents via vector-quantized routing.**
+RM replaces brute-force dense retrieval with a VQ codebook that compresses N items into K centroid buckets. Queries probe only the top-n centroids, rerank by dot product, and return results — achieving **768x per-item compression** and **99%+ recall** at a fraction of the latency and memory cost.
+```
+pip install routing-memory
+```
+## Quick Start
+```python
+from rm import RoutingMemory
+memory = RoutingMemory()
+# Store memories
+memory.add("User prefers dark mode for all applications")
+memory.add("Meeting with Alice scheduled for March 15 at 2pm")
+memory.add("Project deadline is end of Q1 2026")
+# Search
+results = memory.search("what are the user's UI preferences?", top_k=3)
+for r in results:
+    print(f"  [{r['score']:.3f}] {r['text']}")
+```
+## Features
+| Feature | Description |
+|---------|-------------|
+| **VQ Codebook** | MiniBatchKMeans clustering with adaptive K = ceil(N/B_target) |
+| **Multi-probe retrieval** | Query top-n centroids, collect candidates, rerank by dot product |
+| **Score filtering** | Threshold-based filtering saves tokens by dropping low-relevance results |
+| **Drift detection** | Rolling qerr monitoring with automatic alarm when distribution shifts |
+| **Online adaptation** | EMA centroid updates, bucket splits, idle centroid pruning |
+| **Persistence** | SQLite backend for durable storage across sessions |
+| **Pluggable backends** | Swap embedding models or storage engines via clean interfaces |
+## Architecture
+```
+Query ──> Encode ──> Top-n Centroids ──> Collect Candidates ──> Dot-Product Rerank ──> Filter ──> Results
+                          |                                           |
+                     VQ Codebook                               Score Threshold
+                     (K centroids)                               (tau >= 0.3)
+```
+**Compression**: Each item needs only a 2-byte centroid assignment vs 384x4 = 1536 bytes for dense fp32. That's **768x compression**.
+**Recall**: With n=4 probes on 5K items: R@5 = 0.9916 (99.2% of dense baseline).
+## API Reference
+### `RoutingMemory`
+```python
+RoutingMemory(
+    db_path="rm_memory.db",      # SQLite path (None for in-memory)
+    embedding_model="all-MiniLM-L6-v2",  # any sentence-transformers model
+    n_probes=3,                  # centroids to probe per query
+    score_threshold=0.3,         # minimum retrieval score
+    seed=42,                     # random seed
+)
+```
+**Methods:**
+| Method | Description |
+|--------|-------------|
+| `add(text, item_id=None, metadata=None)` | Store a memory item, returns item ID |
+| `search(query, top_k=5, threshold=None)` | Semantic search, returns list of dicts |
+| `search_with_signals(query, top_k=5)` | Search with routing signals (confidence, margin, qerr) |
+| `stats()` | Memory statistics (item count, K, compression, drift) |
+| `codebook_info()` | Codebook details (K, dim, Gini, dead codes) |
+| `save()` | Persist codebook state |
+| `close()` | Close storage connection |
+### Low-level Components
+```python
+from rm import Codebook, L1Retriever, DriftMonitor
+# Direct codebook access
+cb = Codebook(dim=384, seed=42)
+cb.fit(embeddings, item_ids)
+centroid_id, qerr = cb.encode(query_embedding)
+conf = cb.conf(query_embedding)
+margin = cb.margin(query_embedding)
+# Retriever
+retriever = L1Retriever(cb, n_probes=4, top_k=10, score_threshold=0.3)
+result = retriever.query(query_embedding)  # returns L1Result
+# Drift monitor
+monitor = DriftMonitor()
+alarm = monitor.record(qerr, margin)  # returns DriftAlarm or None
+```
+## Experiment Suite
+RM ships with 13 reproducible experiments (7 hypothesis tests + 6 application benchmarks).
+```bash
+# Run all experiments
+python -m rm.experiments.run_all
+# Run specific experiments
+python -m rm.experiments.run_all --select H1 H2 A4
+```
+### Results Summary
+| Exp | Name | Key Metric | Result |
+|-----|------|-----------|--------|
+| H1 | Codebook Fundamentals | Fidelity@K=64 | 0.7415 |
+| H2 | Retrieval Quality | R@5 (n=4 probes) | 0.9916 |
+| H3 | Score-Based Filtering | Savings@tau=0.7 | 59.1% (R@5=0.958) |
+| H4 | Adaptive K Heuristics | Best heuristic | sqrtN (lowest Gini) |
+| H5 | Drift Detection | Alarm latency | 11 episodes |
+| H6 | Multi-Encoder Robustness | RM/Dense ratio spread | 0.0036 |
+| H7 | Storage & Latency | Per-item compression | 768x vs fp32 |
+| A1 | MS-MARCO Passage Retrieval | R@5 (n=4) | 0.9585 |
+| A2 | LoCoMo Conversational Memory | R@5 | 0.9934 |
+| A3 | Enrichment Generalization | Delta RM | +0.053 |
+| A4 | Million-Scale (1M items) | R@5 | 0.8556 |
+| A5 | Pareto Frontier | RM dominant at n>=4 | 91.8% R@5 @ 2.8ms |
+| A6 | Bucket Imbalance | Gini (100K, K=256) | 0.3628 |
+All experiments use real embeddings (all-MiniLM-L6-v2, d=384) and seed=42.
+## Project Structure
+```
+rm/
+  rm/                     # Core package
+    __init__.py
+    codebook.py           # VQ codebook (MiniBatchKMeans, adaptive K)
+    retrieval.py          # Multi-probe retrieval with dot-product rerank
+    filtering.py          # Score-based result filtering
+    drift.py              # Distribution drift detection
+    memory.py             # RoutingMemory high-level API
+    embeddings/           # Pluggable embedding backends
+      base.py             # Abstract interface
+      local.py            # sentence-transformers wrapper
+    storage/              # Pluggable storage backends
+      base.py             # Abstract interface
+      sqlite.py           # SQLite persistence
+  experiments/            # 13 reproducible experiments
+    run_all.py            # Experiment runner (--select support)
+    shared/               # Data generation, plotting utilities
+    h1_codebook/ .. h7_storage/   # Hypothesis tests
+    a1_msmarco/ .. a6_imbalance/  # Application benchmarks
+  tests/                  # pytest test suite
+  pyproject.toml          # Package configuration
+  LICENSE                 # MIT
+```
+## Development
+```bash
+git clone https://github.com/AhmetYSertel/routing-memory.git
+cd routing-memory
+pip install -e ".[dev]"
+pytest tests/ -v
+```
+## Citation
+If you use RM in your research, please cite the HGA paper:
+```bibtex
+@article{sertel2026hga,
+  title={Hybrid Governance Architecture: Structured Memory and Adaptive Routing for LLM Agents},
+  author={Sertel, Ahmet Yigit},
+  year={2026}
+}
+```
+## License
+MIT

routing_memory-0.1.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,42 @@
+[build-system]
+requires = ["setuptools>=68.0", "wheel"]
+build-backend = "setuptools.build_meta"
+[project]
+name = "routing-memory"
+version = "0.1.0"
+description = "Lightweight long-term memory for LLM agents via vector-quantized routing"
+readme = "README.md"
+license = {text = "MIT"}
+requires-python = ">=3.9"
+authors = [{name = "Ahmet Yigit Sertel"}]
+keywords = ["llm", "memory", "retrieval", "vector-quantization", "agent", "rag"]
+classifiers = [
+    "Development Status :: 4 - Beta",
+    "Intended Audience :: Developers",
+    "Intended Audience :: Science/Research",
+    "License :: OSI Approved :: MIT License",
+    "Operating System :: OS Independent",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Topic :: Scientific/Engineering :: Artificial Intelligence",
+]
+dependencies = [
+    "numpy>=1.24",
+    "scikit-learn>=1.3",
+    "sentence-transformers>=2.2",
+]
+[project.urls]
+Homepage = "https://github.com/AhmetYSertel/routing-memory"
+Repository = "https://github.com/AhmetYSertel/routing-memory"
+Issues = "https://github.com/AhmetYSertel/routing-memory/issues"
+[tool.setuptools.packages.find]
+include = ["rm*"]
+[tool.setuptools.package-data]
+rm = ["py.typed"]

routing_memory-0.1.0/rm/__init__.py ADDED Viewed

@@ -0,0 +1,28 @@
+"""Routing Memory — Lightweight long-term memory via vector-quantized routing."""
+from .memory import RoutingMemory
+from .codebook import Codebook, CentroidBucket
+from .retrieval import L1Retriever, L1Result, RetrievalResult
+from .filtering import filter_by_score, filter_top_n
+from .drift import DriftMonitor, DriftAlarm
+from .embeddings import LocalEmbeddings, EmbeddingBackend
+from .storage import RMSQLiteBackend, RMStorageBackend
+__all__ = [
+    "RoutingMemory",
+    "Codebook",
+    "CentroidBucket",
+    "L1Retriever",
+    "L1Result",
+    "RetrievalResult",
+    "filter_by_score",
+    "filter_top_n",
+    "DriftMonitor",
+    "DriftAlarm",
+    "LocalEmbeddings",
+    "EmbeddingBackend",
+    "RMSQLiteBackend",
+    "RMStorageBackend",
+]
+__version__ = "0.1.0"