lattice-memory-e8 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lattice_memory_e8-0.2.0/LICENSE +21 -0
- lattice_memory_e8-0.2.0/PKG-INFO +492 -0
- lattice_memory_e8-0.2.0/README.md +459 -0
- lattice_memory_e8-0.2.0/lattice_memory_e8.egg-info/PKG-INFO +492 -0
- lattice_memory_e8-0.2.0/lattice_memory_e8.egg-info/SOURCES.txt +91 -0
- lattice_memory_e8-0.2.0/lattice_memory_e8.egg-info/dependency_links.txt +1 -0
- lattice_memory_e8-0.2.0/lattice_memory_e8.egg-info/entry_points.txt +5 -0
- lattice_memory_e8-0.2.0/lattice_memory_e8.egg-info/requires.txt +28 -0
- lattice_memory_e8-0.2.0/lattice_memory_e8.egg-info/top_level.txt +1 -0
- lattice_memory_e8-0.2.0/latticememory/__init__.py +174 -0
- lattice_memory_e8-0.2.0/latticememory/agent_memory.py +207 -0
- lattice_memory_e8-0.2.0/latticememory/agent_sync.py +315 -0
- lattice_memory_e8-0.2.0/latticememory/calibrate_proxy.py +216 -0
- lattice_memory_e8-0.2.0/latticememory/cli.py +742 -0
- lattice_memory_e8-0.2.0/latticememory/dedup.py +91 -0
- lattice_memory_e8-0.2.0/latticememory/dns.py +171 -0
- lattice_memory_e8-0.2.0/latticememory/dual_encoder.py +799 -0
- lattice_memory_e8-0.2.0/latticememory/event_store.py +594 -0
- lattice_memory_e8-0.2.0/latticememory/fallbacks.py +156 -0
- lattice_memory_e8-0.2.0/latticememory/flywheel.py +739 -0
- lattice_memory_e8-0.2.0/latticememory/hamming_router.py +470 -0
- lattice_memory_e8-0.2.0/latticememory/ide/__init__.py +1 -0
- lattice_memory_e8-0.2.0/latticememory/ide/cli.py +178 -0
- lattice_memory_e8-0.2.0/latticememory/ide/config.py +72 -0
- lattice_memory_e8-0.2.0/latticememory/ide/lattice_ops.py +50 -0
- lattice_memory_e8-0.2.0/latticememory/ide/providers.py +58 -0
- lattice_memory_e8-0.2.0/latticememory/ide/vscode.py +37 -0
- lattice_memory_e8-0.2.0/latticememory/ide/workspace.py +15 -0
- lattice_memory_e8-0.2.0/latticememory/index.py +179 -0
- lattice_memory_e8-0.2.0/latticememory/integrations/__init__.py +1 -0
- lattice_memory_e8-0.2.0/latticememory/integrations/hf_datasets.py +94 -0
- lattice_memory_e8-0.2.0/latticememory/integrations/langchain.py +57 -0
- lattice_memory_e8-0.2.0/latticememory/integrations/llamaindex.py +123 -0
- lattice_memory_e8-0.2.0/latticememory/memory.py +889 -0
- lattice_memory_e8-0.2.0/latticememory/moe.py +229 -0
- lattice_memory_e8-0.2.0/latticememory/multi_cache.py +133 -0
- lattice_memory_e8-0.2.0/latticememory/observability.py +279 -0
- lattice_memory_e8-0.2.0/latticememory/observatory.py +1288 -0
- lattice_memory_e8-0.2.0/latticememory/pipeline.py +151 -0
- lattice_memory_e8-0.2.0/latticememory/proxy.py +1146 -0
- lattice_memory_e8-0.2.0/latticememory/proxy_server.py +81 -0
- lattice_memory_e8-0.2.0/latticememory/qa_bot.py +450 -0
- lattice_memory_e8-0.2.0/latticememory/rag/__init__.py +1 -0
- lattice_memory_e8-0.2.0/latticememory/rag/e8_retriever.py +490 -0
- lattice_memory_e8-0.2.0/latticememory/redis_store.py +234 -0
- lattice_memory_e8-0.2.0/latticememory/semantic_cache.py +369 -0
- lattice_memory_e8-0.2.0/latticememory/service.py +1016 -0
- lattice_memory_e8-0.2.0/latticememory/snap_trainer.py +1226 -0
- lattice_memory_e8-0.2.0/latticememory/sqlite_store.py +124 -0
- lattice_memory_e8-0.2.0/latticememory/stream.py +169 -0
- lattice_memory_e8-0.2.0/latticememory/text_runtime.py +178 -0
- lattice_memory_e8-0.2.0/latticememory/train_msmarco.py +11 -0
- lattice_memory_e8-0.2.0/latticememory/training.py +1510 -0
- lattice_memory_e8-0.2.0/latticememory/verticals/__init__.py +21 -0
- lattice_memory_e8-0.2.0/latticememory/verticals/clause_coder.py +149 -0
- lattice_memory_e8-0.2.0/latticememory/verticals/content_moderator.py +144 -0
- lattice_memory_e8-0.2.0/latticememory/verticals/edge_memory.py +170 -0
- lattice_memory_e8-0.2.0/latticememory/verticals/private_sync.py +120 -0
- lattice_memory_e8-0.2.0/latticememory/verticals/prompt_firewall.py +268 -0
- lattice_memory_e8-0.2.0/latticememory/verticals/rate_limiter.py +250 -0
- lattice_memory_e8-0.2.0/latticememory/verticals/soc_monitor.py +134 -0
- lattice_memory_e8-0.2.0/latticememory/verticals/ticket_analyzer.py +109 -0
- lattice_memory_e8-0.2.0/latticememory/verticals/training_cleaner.py +256 -0
- lattice_memory_e8-0.2.0/pyproject.toml +47 -0
- lattice_memory_e8-0.2.0/setup.cfg +4 -0
- lattice_memory_e8-0.2.0/tests/test_agent_swarm.py +195 -0
- lattice_memory_e8-0.2.0/tests/test_agent_sync.py +173 -0
- lattice_memory_e8-0.2.0/tests/test_benchmarks.py +1023 -0
- lattice_memory_e8-0.2.0/tests/test_browser_extension.py +105 -0
- lattice_memory_e8-0.2.0/tests/test_cross_model_dns.py +107 -0
- lattice_memory_e8-0.2.0/tests/test_fallback_quantization.py +232 -0
- lattice_memory_e8-0.2.0/tests/test_flywheel.py +336 -0
- lattice_memory_e8-0.2.0/tests/test_gap_fixes.py +678 -0
- lattice_memory_e8-0.2.0/tests/test_hamming_router.py +389 -0
- lattice_memory_e8-0.2.0/tests/test_hf_datasets_integration.py +50 -0
- lattice_memory_e8-0.2.0/tests/test_ide_cli.py +89 -0
- lattice_memory_e8-0.2.0/tests/test_ide_config.py +45 -0
- lattice_memory_e8-0.2.0/tests/test_ide_providers.py +53 -0
- lattice_memory_e8-0.2.0/tests/test_ide_vscode.py +58 -0
- lattice_memory_e8-0.2.0/tests/test_langchain_cache.py +83 -0
- lattice_memory_e8-0.2.0/tests/test_lattice_index.py +154 -0
- lattice_memory_e8-0.2.0/tests/test_llamaindex_store.py +92 -0
- lattice_memory_e8-0.2.0/tests/test_new_features.py +533 -0
- lattice_memory_e8-0.2.0/tests/test_observatory.py +893 -0
- lattice_memory_e8-0.2.0/tests/test_phase0b_features.py +54 -0
- lattice_memory_e8-0.2.0/tests/test_pipeline.py +89 -0
- lattice_memory_e8-0.2.0/tests/test_proxy.py +917 -0
- lattice_memory_e8-0.2.0/tests/test_snap_trainer.py +715 -0
- lattice_memory_e8-0.2.0/tests/test_sqlite_persistence.py +146 -0
- lattice_memory_e8-0.2.0/tests/test_stream.py +156 -0
- lattice_memory_e8-0.2.0/tests/test_training_pipeline.py +541 -0
- lattice_memory_e8-0.2.0/tests/test_verticals.py +1008 -0
- lattice_memory_e8-0.2.0/tests/test_wasm_parity.py +42 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 dfrokido
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,492 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: lattice-memory-e8
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: LatticeMemory — E8 lattice semantic cache and LLM proxy. Calibrated Hamming routing, zero-false-positive intent caching, compliance mode.
|
|
5
|
+
License-Expression: MIT
|
|
6
|
+
Project-URL: Homepage, https://github.com/sangmorg1-debug/latticememory
|
|
7
|
+
Project-URL: HuggingFace Space, https://huggingface.co/spaces/dfrokido/LatticeMemory
|
|
8
|
+
Requires-Python: >=3.10
|
|
9
|
+
Description-Content-Type: text/markdown
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Requires-Dist: torch>=2.0.0
|
|
12
|
+
Requires-Dist: transformers>=4.30.0
|
|
13
|
+
Requires-Dist: sentence-transformers>=3.0.0
|
|
14
|
+
Requires-Dist: numpy
|
|
15
|
+
Provides-Extra: langchain
|
|
16
|
+
Requires-Dist: langchain-core>=0.1.0; extra == "langchain"
|
|
17
|
+
Provides-Extra: faiss
|
|
18
|
+
Requires-Dist: faiss-cpu; extra == "faiss"
|
|
19
|
+
Provides-Extra: llamaindex
|
|
20
|
+
Requires-Dist: llama-index-core; extra == "llamaindex"
|
|
21
|
+
Provides-Extra: hf
|
|
22
|
+
Requires-Dist: datasets>=2.0.0; extra == "hf"
|
|
23
|
+
Provides-Extra: proxy
|
|
24
|
+
Requires-Dist: fastapi; extra == "proxy"
|
|
25
|
+
Requires-Dist: uvicorn; extra == "proxy"
|
|
26
|
+
Requires-Dist: httpx; extra == "proxy"
|
|
27
|
+
Provides-Extra: redis
|
|
28
|
+
Requires-Dist: redis>=4.0.0; extra == "redis"
|
|
29
|
+
Provides-Extra: training
|
|
30
|
+
Requires-Dist: datasets>=2.0.0; extra == "training"
|
|
31
|
+
Requires-Dist: tqdm; extra == "training"
|
|
32
|
+
Dynamic: license-file
|
|
33
|
+
|
|
34
|
+
# LatticeMemory
|
|
35
|
+
|
|
36
|
+
**Semantic cache, dedup, and hybrid memory — 32× compressed E8 keys for instant repeat-query hits, dense fallback for novel retrieval.**
|
|
37
|
+
|
|
38
|
+
LatticeMemory uses the [E8 lattice](https://en.wikipedia.org/wiki/E8_lattice) — the densest sphere packing in 8 dimensions — as a deterministic address space for text embeddings. Every 1024-dim embedding snaps to a 128-byte E8 key. Identical or near-identical text lands on the same key; novel queries fall through to a dense float32/Int8 fallback.
|
|
39
|
+
|
|
40
|
+
[**Live Demo →**](https://huggingface.co/spaces/dfrokido/LatticeMemory) | [**Model →**](https://huggingface.co/dfrokido/bge-large-e8-snap) | [**GitHub →**](https://github.com/sangmorg1-debug/latticememory)
|
|
41
|
+
|
|
42
|
+
---
|
|
43
|
+
|
|
44
|
+
## What it's for
|
|
45
|
+
|
|
46
|
+
| Workload | E8 path | Fallback needed? |
|
|
47
|
+
| --- | --- | --- |
|
|
48
|
+
| Repeat / paraphrase LLM queries (cache) | ✅ O(1) exact or Hamming hit | No |
|
|
49
|
+
| Semantic deduplication, near-duplicate detection | ✅ Key collision = duplicate | No |
|
|
50
|
+
| Dataset quality filtering, semantic sharding | ✅ Stable cluster addresses | No |
|
|
51
|
+
| IoT/command normalization (symmetric vocab) | ✅ Fixed command set → fixed keys | No |
|
|
52
|
+
| **Asymmetric QA/passage search (RAG)** | ❌ Query ≠ passage in E8 space | **Yes — Int8 or float32 required** |
|
|
53
|
+
|
|
54
|
+
E8 keys route fast for content that is semantically identical or near-identical. They are not a replacement for vector search on asymmetric workloads where the query text and the correct passage are structurally different.
|
|
55
|
+
|
|
56
|
+
---
|
|
57
|
+
|
|
58
|
+
## Benchmarks
|
|
59
|
+
|
|
60
|
+
**Compression (bge-large 1024-dim):**
|
|
61
|
+
|
|
62
|
+
| Method | Compression | Index / 1M docs | Retrieval p50 @ 100K docs |
|
|
63
|
+
| --- | ---: | ---: | ---: |
|
|
64
|
+
| Float32 | 1× | 4.1 GB | 20.8 ms |
|
|
65
|
+
| **LatticeMemory E8 keys** | **32×** | **0.13 GB** | O(1) on key hit |
|
|
66
|
+
|
|
67
|
+
**Fallback quality (1K docs, 100 paraphrase queries, recall vs float32):**
|
|
68
|
+
|
|
69
|
+
| Fallback | Compression vs float32 | Recall@10 overlap | Top-1 agreement | Search p50 |
|
|
70
|
+
| --- | ---: | ---: | ---: | ---: |
|
|
71
|
+
| Float32 | 1× | 100.0% | 100.0% | 0.14 ms |
|
|
72
|
+
| Int8 | 4× | 95.1% | 91.0% | 1.97 ms |
|
|
73
|
+
| Int4 | 8× | 12.1% | 1.0% | 4.21 ms |
|
|
74
|
+
|
|
75
|
+
- **Int8 fallback** is the recommended fallback for RAG/QA — 4× smaller than float32, 95% recall parity.
|
|
76
|
+
- **STS quality:** `bge-large-e8-snap` scores 0.8714 vs 0.8637 float baseline (+0.0077).
|
|
77
|
+
|
|
78
|
+
> **Compression basis:** 1 address byte per 8-dim block × 128 blocks = 128 bytes for 1024-dim vs 4,096 bytes float32 = 32×. This applies to E8 key storage only; hybrid mode also stores the dense index.
|
|
79
|
+
|
|
80
|
+
---
|
|
81
|
+
|
|
82
|
+
## Install
|
|
83
|
+
|
|
84
|
+
```bash
|
|
85
|
+
pip install lattice-memory-e8
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
The PyPI distribution is named `lattice-memory-e8` (the plain `latticememory` name
|
|
89
|
+
collides with an unrelated existing package on PyPI) — the import name is unaffected:
|
|
90
|
+
`import latticememory` works exactly as shown throughout this README.
|
|
91
|
+
|
|
92
|
+
Optional extras:
|
|
93
|
+
|
|
94
|
+
```bash
|
|
95
|
+
pip install 'lattice-memory-e8[proxy]' # FastAPI proxy server (fastapi, uvicorn, httpx)
|
|
96
|
+
pip install 'lattice-memory-e8[redis]' # Redis backend for multi-instance caches
|
|
97
|
+
pip install 'lattice-memory-e8[hf]' # HuggingFace datasets integration
|
|
98
|
+
pip install 'lattice-memory-e8[faiss]' # FAISS vector fallback
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
---
|
|
102
|
+
|
|
103
|
+
## Quickstart
|
|
104
|
+
|
|
105
|
+
### Semantic cache (the primary use case)
|
|
106
|
+
|
|
107
|
+
```python
|
|
108
|
+
from latticememory import LatticeIndex
|
|
109
|
+
|
|
110
|
+
index = LatticeIndex() # downloads dfrokido/bge-large-e8-snap on first run (~500MB)
|
|
111
|
+
|
|
112
|
+
index.add([
|
|
113
|
+
"What is the refund policy?",
|
|
114
|
+
"How do I reset my password?",
|
|
115
|
+
"Where is my order?",
|
|
116
|
+
])
|
|
117
|
+
|
|
118
|
+
# Exact text → guaranteed O(1) lattice_exact hit
|
|
119
|
+
result = index.search("What is the refund policy?", top_k=1)
|
|
120
|
+
print(result[0].retrieval_path) # lattice_exact
|
|
121
|
+
|
|
122
|
+
# Near-paraphrase → lattice_exact or Hamming hit (same E8 neighborhood)
|
|
123
|
+
result2 = index.search("What's your return policy?", top_k=1)
|
|
124
|
+
print(result2[0].retrieval_path) # lattice_exact or lattice_hamming
|
|
125
|
+
|
|
126
|
+
print(index.stats())
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
### Semantic cache with answer lookup
|
|
130
|
+
|
|
131
|
+
```python
|
|
132
|
+
from latticememory import RFSnapSemanticCache, RFSnapTextMemory, RFSnapLatticeMemory
|
|
133
|
+
from sentence_transformers import SentenceTransformer
|
|
134
|
+
|
|
135
|
+
encoder = SentenceTransformer("dfrokido/bge-large-e8-snap")
|
|
136
|
+
lm = RFSnapLatticeMemory(d_model=1024)
|
|
137
|
+
rt = RFSnapTextMemory(encoder=encoder, d_model=1024, memory=lm)
|
|
138
|
+
cache = RFSnapSemanticCache(runtime=rt)
|
|
139
|
+
|
|
140
|
+
cache.put("What is the refund policy?", value="30-day returns, full refund.")
|
|
141
|
+
result = cache.get("What's your return policy?") # paraphrase hit
|
|
142
|
+
print(result.hit) # True
|
|
143
|
+
print(result.value) # "30-day returns, full refund."
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
### Hybrid RAG / document search
|
|
147
|
+
|
|
148
|
+
For asymmetric search (user questions against document passages), use hybrid mode — E8 for cache hits, dense fallback for novel queries:
|
|
149
|
+
|
|
150
|
+
```python
|
|
151
|
+
from latticememory import LatticeIndex
|
|
152
|
+
|
|
153
|
+
index = LatticeIndex(mode="hybrid") # Int8 fallback enabled automatically
|
|
154
|
+
index.add([
|
|
155
|
+
"The refund window is 30 days from purchase date.",
|
|
156
|
+
"Password resets are sent to your registered email.",
|
|
157
|
+
"Orders ship within 2 business days.",
|
|
158
|
+
])
|
|
159
|
+
|
|
160
|
+
# Novel query → routes through E8, misses, falls back to Int8 dense search
|
|
161
|
+
result = index.search("Can I return something after a month?", top_k=1)
|
|
162
|
+
print(result[0].retrieval_path) # fallback
|
|
163
|
+
print(result[0].text) # The refund window is 30 days...
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
---
|
|
167
|
+
|
|
168
|
+
## HammingRouter — Catch Paraphrases at Scale
|
|
169
|
+
|
|
170
|
+
`HammingRouter` caches full Q&A pairs and matches incoming queries by Hamming distance on their E8 keys. A threshold of 70–111 blocks (out of 128) catches paraphrases while controlling false positives.
|
|
171
|
+
|
|
172
|
+
```python
|
|
173
|
+
from latticememory import HammingRouter
|
|
174
|
+
|
|
175
|
+
router = HammingRouter(threshold=100) # tune per domain
|
|
176
|
+
|
|
177
|
+
# Index known Q&A pairs
|
|
178
|
+
router.add("What is your cancellation policy?", answer="Cancel anytime, no fee.", intent="cancel")
|
|
179
|
+
router.add("How do I cancel my subscription?", answer="Cancel anytime, no fee.", intent="cancel")
|
|
180
|
+
|
|
181
|
+
# Match a paraphrase
|
|
182
|
+
match = router.match("Can I cancel at any time?")
|
|
183
|
+
if match:
|
|
184
|
+
print(match.answer) # "Cancel anytime, no fee."
|
|
185
|
+
print(match.hamming_distance) # e.g. 97
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
**Threshold guidance (BANKING77 benchmark):**
|
|
189
|
+
|
|
190
|
+
| Threshold | Recall | FP rate | Use case |
|
|
191
|
+
| --- | --- | --- | --- |
|
|
192
|
+
| 70 | 4.5% | 0.0% | Proxy default — zero false positives |
|
|
193
|
+
| 100 | 52.5% | 0.0% | Practical helpdesk operating point |
|
|
194
|
+
| 111 | 84.0% | 4.5% | Router default — calibrate per domain |
|
|
195
|
+
|
|
196
|
+
---
|
|
197
|
+
|
|
198
|
+
## LLM Cache Proxy
|
|
199
|
+
|
|
200
|
+
Drop-in OpenAI-compatible HTTP proxy. Same prompt or near-paraphrase returns the cached response without hitting the upstream model.
|
|
201
|
+
|
|
202
|
+
```bash
|
|
203
|
+
pip install 'lattice-memory-e8[proxy]'
|
|
204
|
+
```
|
|
205
|
+
|
|
206
|
+
```bash
|
|
207
|
+
lattice serve --key sk-... --cache helpdesk.db --miss-log misses.jsonl --port 8000
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
Or with Docker:
|
|
211
|
+
|
|
212
|
+
```bash
|
|
213
|
+
OPENAI_API_KEY=sk-... docker-compose up
|
|
214
|
+
```
|
|
215
|
+
|
|
216
|
+
Point your OpenAI client at `http://localhost:8000` — no other code changes needed.
|
|
217
|
+
|
|
218
|
+
**Features:**
|
|
219
|
+
|
|
220
|
+
- `X-Lattice-Cache: HIT/MISS` and `X-Lattice-Savings-USD` on every response
|
|
221
|
+
- Streaming SSE + non-streaming JSON
|
|
222
|
+
- SQLite persistence — survives process restart
|
|
223
|
+
- HammingRouter approximate cache in `shadow` or `serve` mode
|
|
224
|
+
- TTL per-entry expiry
|
|
225
|
+
- Compliance mode — only serve pre-approved responses (for regulated industries)
|
|
226
|
+
- Admin CRUD API gated by `X-Lattice-Admin-Key`
|
|
227
|
+
- Warm-start from CSV/JSON/JSONL
|
|
228
|
+
|
|
229
|
+
---
|
|
230
|
+
|
|
231
|
+
## LangChain Integration
|
|
232
|
+
|
|
233
|
+
```bash
|
|
234
|
+
pip install lattice-memory-e8 langchain-core langchain-openai
|
|
235
|
+
```
|
|
236
|
+
|
|
237
|
+
```python
|
|
238
|
+
from langchain_openai import ChatOpenAI
|
|
239
|
+
from langchain_core.globals import set_llm_cache
|
|
240
|
+
from latticememory.integrations.langchain import LatticeMemoryCache
|
|
241
|
+
|
|
242
|
+
set_llm_cache(LatticeMemoryCache())
|
|
243
|
+
llm = ChatOpenAI(model="gpt-4o")
|
|
244
|
+
|
|
245
|
+
llm.invoke("What is the capital of France?") # miss — calls API
|
|
246
|
+
llm.invoke("What is the capital of France?") # hit — O(1) key match
|
|
247
|
+
llm.invoke("Which city is France's capital?") # likely hit — same E8 neighborhood
|
|
248
|
+
```
|
|
249
|
+
|
|
250
|
+
---
|
|
251
|
+
|
|
252
|
+
## Deduplication
|
|
253
|
+
|
|
254
|
+
```python
|
|
255
|
+
from latticememory import LatticeTrainingCleaner, RFSnapSemanticCache
|
|
256
|
+
|
|
257
|
+
# batch dedup
|
|
258
|
+
cleaner = LatticeTrainingCleaner(cache)
|
|
259
|
+
result = cleaner.clean([
|
|
260
|
+
"The quick brown fox jumps over the lazy dog.",
|
|
261
|
+
"A fast brown fox leaped over a sleeping dog.", # near-duplicate
|
|
262
|
+
"Machine learning is a branch of artificial intelligence.",
|
|
263
|
+
])
|
|
264
|
+
print(result.kept_count) # 2
|
|
265
|
+
print(result.duplicate_count) # 1
|
|
266
|
+
print(result.dedup_rate) # 0.333...
|
|
267
|
+
|
|
268
|
+
# streaming dedup (generator)
|
|
269
|
+
for unique_text in cleaner.stream(iter(large_corpus)):
|
|
270
|
+
process(unique_text)
|
|
271
|
+
```
|
|
272
|
+
|
|
273
|
+
Or via CLI:
|
|
274
|
+
|
|
275
|
+
```bash
|
|
276
|
+
lattice dedup corpus.jsonl --text-col text --output corpus_deduped.jsonl
|
|
277
|
+
```
|
|
278
|
+
|
|
279
|
+
---
|
|
280
|
+
|
|
281
|
+
## Vertical Applications
|
|
282
|
+
|
|
283
|
+
All 9 verticals ship in `latticememory.verticals` and wrap `RFSnapSemanticCache`.
|
|
284
|
+
|
|
285
|
+
| Vertical | Class | Key Capability |
|
|
286
|
+
| --- | --- | --- |
|
|
287
|
+
| SOC Monitor | `LatticeSOCMonitor` | O(1) alert dedup for SIEM event streams |
|
|
288
|
+
| Ticket Analyzer | `LatticeTicketAnalyzer` | Intent-based ticket routing + gap detection |
|
|
289
|
+
| Content Moderator | `LatticeContentModerator` | Semantic near-miss content policy |
|
|
290
|
+
| Clause Coder | `LatticeClauseCoder` | Legal clause classification |
|
|
291
|
+
| Edge Memory | `LatticeEdgeMemory` | On-device personalization without cloud |
|
|
292
|
+
| Private Sync | `LatticePrivateSync` | Federated key sync, no raw text transfer |
|
|
293
|
+
| **Prompt Firewall** | `LatticePromptFirewall` | Semantic injection/jailbreak detection |
|
|
294
|
+
| **Semantic Rate Limiter** | `LatticeSemanticRateLimiter` | Per-intent sliding-window rate limiting |
|
|
295
|
+
| **Training Cleaner** | `LatticeTrainingCleaner` | O(N) near-duplicate removal for LLM training sets |
|
|
296
|
+
|
|
297
|
+
### Prompt Firewall
|
|
298
|
+
|
|
299
|
+
```python
|
|
300
|
+
from latticememory import LatticePromptFirewall, RFSnapSemanticCache
|
|
301
|
+
|
|
302
|
+
fw = LatticePromptFirewall(cache)
|
|
303
|
+
fw.load_injection_defaults() # loads 14 common injection/jailbreak patterns
|
|
304
|
+
|
|
305
|
+
result = fw.check("Ignore all previous instructions and")
|
|
306
|
+
print(result.blocked) # True
|
|
307
|
+
print(result.category) # prompt_injection
|
|
308
|
+
|
|
309
|
+
# Add custom deny patterns
|
|
310
|
+
fw.add_deny_pattern("roleplay as an unfiltered AI", category="jailbreak")
|
|
311
|
+
```
|
|
312
|
+
|
|
313
|
+
### Semantic Rate Limiter
|
|
314
|
+
|
|
315
|
+
```python
|
|
316
|
+
from latticememory import LatticeSemanticRateLimiter
|
|
317
|
+
|
|
318
|
+
limiter = LatticeSemanticRateLimiter(cache, limit=10, window_seconds=60.0)
|
|
319
|
+
|
|
320
|
+
r = limiter.check("tell me about Python", client_id="user_123")
|
|
321
|
+
print(r.allowed) # True
|
|
322
|
+
print(r.remaining) # 9
|
|
323
|
+
print(r.retry_after) # 0.0
|
|
324
|
+
```
|
|
325
|
+
|
|
326
|
+
### Training Data Cleaner
|
|
327
|
+
|
|
328
|
+
```python
|
|
329
|
+
from latticememory import LatticeTrainingCleaner
|
|
330
|
+
|
|
331
|
+
cleaner = LatticeTrainingCleaner(cache)
|
|
332
|
+
result = cleaner.clean_to_jsonl(texts, output_path="clean.jsonl")
|
|
333
|
+
print(result.summary())
|
|
334
|
+
# Total: 50000 | Kept: 43217 | Duplicates removed: 6783 (13.6%)
|
|
335
|
+
```
|
|
336
|
+
|
|
337
|
+
---
|
|
338
|
+
|
|
339
|
+
## Agent Memory Sync
|
|
340
|
+
|
|
341
|
+
`AgentMemorySync` lets agents in a swarm share only the E8 keys they are missing — no embedding transfer, just 128-byte addresses.
|
|
342
|
+
|
|
343
|
+
```python
|
|
344
|
+
from latticememory import AgentMemorySync
|
|
345
|
+
|
|
346
|
+
# Two independent agents
|
|
347
|
+
agent_a = AgentMemorySync(runtime=rt_a)
|
|
348
|
+
agent_b = AgentMemorySync(runtime=rt_b)
|
|
349
|
+
|
|
350
|
+
# Register peers
|
|
351
|
+
agent_a.register_peer(agent_b)
|
|
352
|
+
|
|
353
|
+
# Pull-sync: B gets everything A knows
|
|
354
|
+
agent_b.sync_from_peer(agent_a)
|
|
355
|
+
|
|
356
|
+
# Push-broadcast: A broadcasts a new key to all registered peers
|
|
357
|
+
new_key = next(iter(agent_a.get_known_keys()))
|
|
358
|
+
agent_a.share(new_key) # agent_b receives it immediately
|
|
359
|
+
|
|
360
|
+
# Diff: check what each side is missing
|
|
361
|
+
diff = agent_a.diff(agent_b.get_known_keys())
|
|
362
|
+
# {"extra": set(), "missing": set()} ← fully in sync
|
|
363
|
+
```
|
|
364
|
+
|
|
365
|
+
See `examples/agent_swarm_demo.py` for a complete end-to-end scenario.
|
|
366
|
+
|
|
367
|
+
---
|
|
368
|
+
|
|
369
|
+
## Active Learning Flywheel
|
|
370
|
+
|
|
371
|
+
Every proxy cache miss can be logged. `LatticeFlywheel` clusters miss logs by E8 key proximity to surface emerging intent gaps — groups of queries the cache doesn't cover yet.
|
|
372
|
+
|
|
373
|
+
```python
|
|
374
|
+
from latticememory import LatticeFlywheel
|
|
375
|
+
|
|
376
|
+
fw = LatticeFlywheel("misses.jsonl")
|
|
377
|
+
|
|
378
|
+
# From your proxy, log each miss:
|
|
379
|
+
fw.log_miss("How do I bulk export my contacts?", e8_key_hex=e8_key)
|
|
380
|
+
|
|
381
|
+
# Detect drifting intents (new query patterns emerging):
|
|
382
|
+
drifting = fw.detect_drift(window_seconds=7*86400, min_delta=5)
|
|
383
|
+
for cluster in drifting:
|
|
384
|
+
print(f"+{cluster['delta']} queries: {cluster['representative']!r}")
|
|
385
|
+
|
|
386
|
+
# Check if re-training is warranted:
|
|
387
|
+
if fw.should_finetune():
|
|
388
|
+
print("Recommend: add Q&A pairs for these new intent clusters")
|
|
389
|
+
```
|
|
390
|
+
|
|
391
|
+
Or via CLI:
|
|
392
|
+
|
|
393
|
+
```bash
|
|
394
|
+
lattice drift --log misses.jsonl --window 604800 --export drift_report.json
|
|
395
|
+
```
|
|
396
|
+
|
|
397
|
+
---
|
|
398
|
+
|
|
399
|
+
## CLI Reference
|
|
400
|
+
|
|
401
|
+
| Command | What it does |
|
|
402
|
+
| --- | --- |
|
|
403
|
+
| `lattice populate` | Load Q&A pairs from CSV/JSON into a SQLite cache |
|
|
404
|
+
| `lattice inspect` | Print cache statistics |
|
|
405
|
+
| `lattice export` | Export all cache entries to a portable JSONL file |
|
|
406
|
+
| `lattice import` | Re-import a JSONL export into a new cache |
|
|
407
|
+
| `lattice gaps` | Show top miss clusters (unmet query intents) |
|
|
408
|
+
| `lattice drift` | Detect drifting intents + finetune recommendation |
|
|
409
|
+
| `lattice dedup` | Deduplicate a text file using E8 lattice hashing |
|
|
410
|
+
| `lattice serve` | Start the proxy server |
|
|
411
|
+
| `lattice analytics` | Fetch live analytics from a running proxy |
|
|
412
|
+
|
|
413
|
+
---
|
|
414
|
+
|
|
415
|
+
## CLI IDE
|
|
416
|
+
|
|
417
|
+
`lattice ide` opens a local terminal command center for BYOK AI chat, cache operations,
|
|
418
|
+
proxy diagnostics, vertical discovery, and VS Code CLI bridging.
|
|
419
|
+
|
|
420
|
+
```bash
|
|
421
|
+
export LATTICE_IDE_BASE_URL=https://api.openai.com/v1
|
|
422
|
+
export LATTICE_IDE_MODEL=gpt-4o-mini
|
|
423
|
+
export LATTICE_IDE_API_KEY=sk-...
|
|
424
|
+
|
|
425
|
+
lattice ide chat "Summarize the current cache analytics"
|
|
426
|
+
lattice ide cache inspect --cache helpdesk.db
|
|
427
|
+
lattice ide proxy doctor --port 8000
|
|
428
|
+
lattice ide verticals list
|
|
429
|
+
lattice ide vscode status
|
|
430
|
+
```
|
|
431
|
+
|
|
432
|
+
Run `lattice ide` with no arguments for an interactive `lm>` shell. The first IDE
|
|
433
|
+
slice uses OpenAI-compatible chat endpoints, so it works with OpenAI and compatible
|
|
434
|
+
BYOK gateways. VS Code integration uses the installed `code` command; it does not
|
|
435
|
+
require a VS Code extension.
|
|
436
|
+
|
|
437
|
+
---
|
|
438
|
+
|
|
439
|
+
## How It Works
|
|
440
|
+
|
|
441
|
+
```text
|
|
442
|
+
float32 embedding [1024-dim]
|
|
443
|
+
→ 128 blocks of 8 floats
|
|
444
|
+
→ each block → nearest E8 Shell-1 point (240 possible addresses)
|
|
445
|
+
→ 1-byte address per block = 128-byte E8 key ← used for cache routing (32× vs float32)
|
|
446
|
+
→ optional 2-byte scale per block = full 384-byte quantized representation
|
|
447
|
+
|
|
448
|
+
query → same key → O(1) lattice_exact lookup
|
|
449
|
+
query → Hamming-N neighbor → O(1) HammingRouter lookup
|
|
450
|
+
query → no neighbor found → dense fallback (Int8 or float32 ANN)
|
|
451
|
+
```
|
|
452
|
+
|
|
453
|
+
The E8 key is a **deterministic hash of meaning** — not an approximation. Two texts that are semantically identical land on the same key every time, without cosine threshold tuning.
|
|
454
|
+
|
|
455
|
+
---
|
|
456
|
+
|
|
457
|
+
## Redis Backend
|
|
458
|
+
|
|
459
|
+
For multi-instance deployments sharing a single cache:
|
|
460
|
+
|
|
461
|
+
```python
|
|
462
|
+
from latticememory import LatticeRedisStore, RFSnapSemanticCache, patch_cache_with_redis
|
|
463
|
+
|
|
464
|
+
cache = RFSnapSemanticCache(...)
|
|
465
|
+
patch_cache_with_redis(cache, redis_url="redis://localhost:6379", namespace="helpdesk")
|
|
466
|
+
# Now cache._entries reads/writes Redis instead of the in-memory dict
|
|
467
|
+
```
|
|
468
|
+
|
|
469
|
+
---
|
|
470
|
+
|
|
471
|
+
## Test Suite
|
|
472
|
+
|
|
473
|
+
508 tests, all passing:
|
|
474
|
+
|
|
475
|
+
```bash
|
|
476
|
+
python -m pytest tests/ -q
|
|
477
|
+
# 508 passed in ~70s
|
|
478
|
+
```
|
|
479
|
+
|
|
480
|
+
---
|
|
481
|
+
|
|
482
|
+
## Design Partners
|
|
483
|
+
|
|
484
|
+
We're looking for 3 teams with high-repetition LLM workloads (support bots, document QA, internal search) to pilot semantic cache + dedup at no cost.
|
|
485
|
+
|
|
486
|
+
**[dfrokido@gmail.com](mailto:dfrokido@gmail.com)**
|
|
487
|
+
|
|
488
|
+
---
|
|
489
|
+
|
|
490
|
+
## License
|
|
491
|
+
|
|
492
|
+
MIT
|