lix-open-cache 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lix_open_cache-0.1.0/PKG-INFO +10 -0
- lix_open_cache-0.1.0/README.md +489 -0
- lix_open_cache-0.1.0/lix_open_cache/__init__.py +20 -0
- lix_open_cache-0.1.0/lix_open_cache/config.py +60 -0
- lix_open_cache-0.1.0/lix_open_cache/context_window.py +59 -0
- lix_open_cache-0.1.0/lix_open_cache/conversation_archive.py +236 -0
- lix_open_cache-0.1.0/lix_open_cache/coordinator.py +94 -0
- lix_open_cache-0.1.0/lix_open_cache/huffman_codec.py +166 -0
- lix_open_cache-0.1.0/lix_open_cache/hybrid_cache.py +333 -0
- lix_open_cache-0.1.0/lix_open_cache/redis_pool.py +57 -0
- lix_open_cache-0.1.0/lix_open_cache/semantic_cache.py +186 -0
- lix_open_cache-0.1.0/lix_open_cache.egg-info/PKG-INFO +10 -0
- lix_open_cache-0.1.0/lix_open_cache.egg-info/SOURCES.txt +16 -0
- lix_open_cache-0.1.0/lix_open_cache.egg-info/dependency_links.txt +1 -0
- lix_open_cache-0.1.0/lix_open_cache.egg-info/requires.txt +6 -0
- lix_open_cache-0.1.0/lix_open_cache.egg-info/top_level.txt +1 -0
- lix_open_cache-0.1.0/pyproject.toml +20 -0
- lix_open_cache-0.1.0/setup.cfg +4 -0
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: lix-open-cache
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Reusable session management and multi-layer Redis caching for conversational AI
|
|
5
|
+
Requires-Python: >=3.10
|
|
6
|
+
Requires-Dist: redis>=5.0
|
|
7
|
+
Requires-Dist: numpy>=1.24
|
|
8
|
+
Requires-Dist: loguru>=0.7
|
|
9
|
+
Provides-Extra: lz4
|
|
10
|
+
Requires-Dist: lz4>=4.0; extra == "lz4"
|
|
@@ -0,0 +1,489 @@
|
|
|
1
|
+
# lix_open_cache
|
|
2
|
+
|
|
3
|
+
Standalone multi-layer caching and session management for conversational AI. Extracted from [lixSearch](https://github.com/pollinations/lixsearch) into a reusable, pip-installable package.
|
|
4
|
+
|
|
5
|
+
Drop it into any chatbot, search assistant, or RAG pipeline to get production-grade session memory, semantic caching, and compressed disk archival out of the box.
|
|
6
|
+
|
|
7
|
+
```
|
|
8
|
+
pip install lix-open-cache
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Research Paper
|
|
12
|
+
|
|
13
|
+
This library is described in detail in our research paper:
|
|
14
|
+
|
|
15
|
+
> **A Three-Layer Caching Architecture for Low-Latency LLM Web Search on Commodity CPU Hardware**
|
|
16
|
+
> Ayushman Bhattacharya (Pollinations.ai), 2026
|
|
17
|
+
> [Read the paper (PDF)](../docs/paper/lix_cache_paper.pdf)
|
|
18
|
+
|
|
19
|
+
The paper covers the origin story (building a cost-effective alternative to SearchGPT), the architecture and design decisions behind each caching layer, production evaluation on an 8-vCPU server (89.3% hit rate, 0.1ms latency, 1,000x cost reduction), and the Huffman compression scheme for conversation archival.
|
|
20
|
+
|
|
21
|
+
If you use this library in your research, please cite:
|
|
22
|
+
|
|
23
|
+
```bibtex
|
|
24
|
+
@article{bhattacharya2026lixcache,
|
|
25
|
+
title={A Three-Layer Caching Architecture for Low-Latency LLM Web Search on Commodity CPU Hardware},
|
|
26
|
+
author={Bhattacharya, Ayushman},
|
|
27
|
+
year={2026},
|
|
28
|
+
url={https://github.com/pollinations/lixsearch/blob/main/docs/paper/lix_cache_paper.pdf},
|
|
29
|
+
note={Licensed under CC BY-NC-ND 4.0}
|
|
30
|
+
}
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
## What it solves
|
|
34
|
+
|
|
35
|
+
| Problem | Layer | Solution |
|
|
36
|
+
|---|---|---|
|
|
37
|
+
| "What did we just talk about?" | Session Context Window (Redis DB 2) | Rolling window of 20 messages in Redis, overflow to Huffman-compressed disk |
|
|
38
|
+
| "Didn't we already answer this?" | Semantic Query Cache (Redis DB 0) | Cache LLM responses keyed by embedding similarity (cosine ≥ 0.90) |
|
|
39
|
+
| "We already embedded this URL" | URL Embedding Cache (Redis DB 1) | Global cache of URL → embedding vector, shared across sessions |
|
|
40
|
+
|
|
41
|
+
## Architecture
|
|
42
|
+
|
|
43
|
+
```
|
|
44
|
+
User message arrives
|
|
45
|
+
│
|
|
46
|
+
├─ ① SessionContextWindow (Redis DB 2)
|
|
47
|
+
│ ├─ get_context() → last 20 messages from Redis
|
|
48
|
+
│ ├─ If Redis empty → load from .huff archive → re-hydrate
|
|
49
|
+
│ └─ Inject into LLM prompt as conversation history
|
|
50
|
+
│
|
|
51
|
+
├─ ② SemanticCacheRedis (Redis DB 0)
|
|
52
|
+
│ ├─ Compute query embedding vector
|
|
53
|
+
│ ├─ cosine_similarity(cached, new) ≥ 0.90?
|
|
54
|
+
│ │ ├─ HIT → return cached response (skip LLM)
|
|
55
|
+
│ │ └─ MISS → continue pipeline
|
|
56
|
+
│ └─ After LLM: cache (embedding, response) for 5 min
|
|
57
|
+
│
|
|
58
|
+
├─ ③ URLEmbeddingCache (Redis DB 1)
|
|
59
|
+
│ ├─ Before embedding a URL: check Redis
|
|
60
|
+
│ │ ├─ HIT → use cached vector (~0ms vs ~200ms)
|
|
61
|
+
│ │ └─ MISS → compute, cache for 24h
|
|
62
|
+
│ └─ Global (shared across all sessions)
|
|
63
|
+
│
|
|
64
|
+
└─ HybridConversationCache (backing store)
|
|
65
|
+
├─ Hot: Redis ordered list (LPUSH/RPOP, 20-msg window)
|
|
66
|
+
├─ Cold: Huffman-compressed .huff files on disk
|
|
67
|
+
├─ Overflow: oldest messages spill hot → cold
|
|
68
|
+
└─ LRU daemon: idle 2h → migrate all to disk, free Redis
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
## Package structure
|
|
72
|
+
|
|
73
|
+
```
|
|
74
|
+
lix_open_cache/
|
|
75
|
+
├── pyproject.toml
|
|
76
|
+
├── README.md
|
|
77
|
+
└── lix_open_cache/
|
|
78
|
+
├── __init__.py # public API
|
|
79
|
+
├── config.py # CacheConfig dataclass
|
|
80
|
+
├── redis_pool.py # Connection-pooled Redis factory
|
|
81
|
+
├── huffman_codec.py # Canonical Huffman encoder/decoder
|
|
82
|
+
├── conversation_archive.py # .huff disk persistence
|
|
83
|
+
├── hybrid_cache.py # Redis hot + disk cold + LRU eviction
|
|
84
|
+
├── semantic_cache.py # SemanticCacheRedis + URLEmbeddingCache
|
|
85
|
+
├── context_window.py # SessionContextWindow (wraps hybrid_cache)
|
|
86
|
+
└── coordinator.py # CacheCoordinator (orchestrates all 3)
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
## Installation
|
|
90
|
+
|
|
91
|
+
**From PyPI** (once published):
|
|
92
|
+
```bash
|
|
93
|
+
pip install lix-open-cache
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
**From source:**
|
|
97
|
+
```bash
|
|
98
|
+
git clone https://github.com/pollinations/lixsearch.git
|
|
99
|
+
cd lixsearch/lix_open_cache
|
|
100
|
+
pip install -e .
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
**Dependencies:**
|
|
104
|
+
|
|
105
|
+
| Package | Version | Why |
|
|
106
|
+
|---|---|---|
|
|
107
|
+
| redis | ≥ 5.0 | All three cache layers |
|
|
108
|
+
| numpy | ≥ 1.24 | Embedding vectors, cosine similarity |
|
|
109
|
+
| loguru | ≥ 0.7 | Structured logging |
|
|
110
|
+
| lz4 (optional) | ≥ 4.0 | Alternative compression method |
|
|
111
|
+
|
|
112
|
+
## Quick start
|
|
113
|
+
|
|
114
|
+
### Full 3-layer setup
|
|
115
|
+
|
|
116
|
+
```python
|
|
117
|
+
from lix_open_cache import CacheConfig, CacheCoordinator
|
|
118
|
+
|
|
119
|
+
config = CacheConfig(
|
|
120
|
+
redis_host="localhost",
|
|
121
|
+
redis_port=6379,
|
|
122
|
+
redis_key_prefix="mychat",
|
|
123
|
+
archive_dir="./data/conversations",
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
cache = CacheCoordinator(session_id="user-abc", config=config)
|
|
127
|
+
|
|
128
|
+
# Store messages
|
|
129
|
+
cache.add_message_to_context("user", "What's the weather in Tokyo?")
|
|
130
|
+
cache.add_message_to_context("assistant", "It's 22°C and sunny.")
|
|
131
|
+
|
|
132
|
+
# Retrieve context for next LLM call
|
|
133
|
+
history = cache.get_context_messages()
|
|
134
|
+
|
|
135
|
+
# Check semantic cache before calling LLM
|
|
136
|
+
import numpy as np
|
|
137
|
+
query_embedding = np.random.rand(384).astype(np.float32)
|
|
138
|
+
cached = cache.get_semantic_response("https://weather.com", query_embedding)
|
|
139
|
+
if cached:
|
|
140
|
+
print("Cache hit — skip LLM")
|
|
141
|
+
else:
|
|
142
|
+
response = {"answer": "22°C and sunny", "sources": ["..."]}
|
|
143
|
+
cache.cache_semantic_response("https://weather.com", query_embedding, response)
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
### Session memory only (no semantic cache)
|
|
147
|
+
|
|
148
|
+
```python
|
|
149
|
+
from lix_open_cache import HybridConversationCache, CacheConfig
|
|
150
|
+
|
|
151
|
+
config = CacheConfig(redis_host="localhost", redis_port=6379)
|
|
152
|
+
cache = HybridConversationCache("session-123", config=config)
|
|
153
|
+
|
|
154
|
+
cache.add_message("user", "hello")
|
|
155
|
+
cache.add_message("assistant", "hey there!")
|
|
156
|
+
|
|
157
|
+
messages = cache.get_context() # last 20 from Redis
|
|
158
|
+
|
|
159
|
+
# Smart retrieval: recent + semantically relevant from disk
|
|
160
|
+
context = cache.smart_context(
|
|
161
|
+
query="what did we talk about yesterday?",
|
|
162
|
+
query_embedding=your_embedding,
|
|
163
|
+
recent_k=10,
|
|
164
|
+
disk_k=5,
|
|
165
|
+
)
|
|
166
|
+
# → {"recent": [...last 10...], "relevant": [...5 from disk archive...]}
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
### Disk-only (no Redis)
|
|
170
|
+
|
|
171
|
+
```python
|
|
172
|
+
from lix_open_cache import ConversationArchive
|
|
173
|
+
|
|
174
|
+
archive = ConversationArchive("./data/chats", session_ttl_days=30)
|
|
175
|
+
|
|
176
|
+
archive.append_turn("sess-1", {"role": "user", "content": "hello"})
|
|
177
|
+
archive.append_turn("sess-1", {"role": "assistant", "content": "hi!"})
|
|
178
|
+
|
|
179
|
+
turns = archive.load_all("sess-1")
|
|
180
|
+
recent = archive.load_recent("sess-1", 5)
|
|
181
|
+
results = archive.search_by_text("sess-1", "hello", top_k=3)
|
|
182
|
+
|
|
183
|
+
archive.cleanup_expired()
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
### Just the Huffman codec
|
|
187
|
+
|
|
188
|
+
```python
|
|
189
|
+
from lix_open_cache import HuffmanCodec
|
|
190
|
+
from lix_open_cache.huffman_codec import encode_str, decode_bytes
|
|
191
|
+
|
|
192
|
+
text = "The quick brown fox jumps over the lazy dog" * 100
|
|
193
|
+
compressed = encode_str(text)
|
|
194
|
+
restored = decode_bytes(compressed)
|
|
195
|
+
assert restored == text
|
|
196
|
+
print(f"{len(text)}B → {len(compressed)}B ({len(compressed)/len(text)*100:.0f}%)")
|
|
197
|
+
```
|
|
198
|
+
|
|
199
|
+
## Configuration
|
|
200
|
+
|
|
201
|
+
All tunables live in a single `CacheConfig` dataclass. No global state, no scattered constants.
|
|
202
|
+
|
|
203
|
+
```python
|
|
204
|
+
from lix_open_cache import CacheConfig
|
|
205
|
+
|
|
206
|
+
config = CacheConfig(
|
|
207
|
+
# Redis connection
|
|
208
|
+
redis_host="redis.internal",
|
|
209
|
+
redis_port=6379,
|
|
210
|
+
redis_password="secret",
|
|
211
|
+
redis_key_prefix="mychat",
|
|
212
|
+
redis_pool_size=50,
|
|
213
|
+
|
|
214
|
+
# Session context window (Redis DB 2)
|
|
215
|
+
session_redis_db=2,
|
|
216
|
+
session_ttl_seconds=86400, # 24h
|
|
217
|
+
hot_window_size=20, # messages kept in Redis
|
|
218
|
+
session_max_tokens=None, # no token limit
|
|
219
|
+
|
|
220
|
+
# Semantic query cache (Redis DB 0)
|
|
221
|
+
semantic_redis_db=0,
|
|
222
|
+
semantic_ttl_seconds=300, # 5 min
|
|
223
|
+
semantic_similarity_threshold=0.90, # cosine similarity threshold
|
|
224
|
+
semantic_max_items_per_url=50,
|
|
225
|
+
|
|
226
|
+
# URL embedding cache (Redis DB 1)
|
|
227
|
+
url_cache_redis_db=1,
|
|
228
|
+
url_cache_ttl_seconds=86400, # 24h
|
|
229
|
+
|
|
230
|
+
# Disk archive
|
|
231
|
+
archive_dir="./data/conversations",
|
|
232
|
+
disk_ttl_days=14, # purge after 14 days
|
|
233
|
+
|
|
234
|
+
# LRU eviction
|
|
235
|
+
evict_after_minutes=120, # 2h idle → migrate to disk
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
# Or from environment variables (12-factor apps):
|
|
239
|
+
# Reads MYAPP_REDIS_HOST, MYAPP_REDIS_PORT, MYAPP_SEMANTIC_TTL_SECONDS, etc.
|
|
240
|
+
config = CacheConfig.from_env("MYAPP")
|
|
241
|
+
```
|
|
242
|
+
|
|
243
|
+
## Redis DB layout
|
|
244
|
+
|
|
245
|
+
Three logical databases on a single Redis server:
|
|
246
|
+
|
|
247
|
+
| DB | Layer | TTL | Scope | What it stores |
|
|
248
|
+
|---|---|---|---|---|
|
|
249
|
+
| 0 | Semantic query cache | 5 min | Per-session | `(query_embedding, LLM response)` pairs per URL |
|
|
250
|
+
| 1 | URL embedding cache | 24h | Global | URL → float32 embedding vector |
|
|
251
|
+
| 2 | Session context window | 24h | Per-session | Last 20 conversation messages |
|
|
252
|
+
|
|
253
|
+
Separate DBs instead of key prefixes so you can `FLUSHDB` one layer without touching others, and monitor each independently via `DBSIZE`.
|
|
254
|
+
|
|
255
|
+
## How each layer works
|
|
256
|
+
|
|
257
|
+
### Session Context Window
|
|
258
|
+
|
|
259
|
+
```
|
|
260
|
+
add_message("user", "hello")
|
|
261
|
+
│
|
|
262
|
+
├─ LPUSH message_id to Redis ordered list
|
|
263
|
+
├─ SETEX message JSON with TTL
|
|
264
|
+
│
|
|
265
|
+
└─ Window > 20?
|
|
266
|
+
├─ Yes → RPOP oldest
|
|
267
|
+
│ ├─ Append to .huff disk archive
|
|
268
|
+
│ └─ DELETE from Redis
|
|
269
|
+
└─ No → done
|
|
270
|
+
```
|
|
271
|
+
|
|
272
|
+
```
|
|
273
|
+
get_context()
|
|
274
|
+
│
|
|
275
|
+
├─ Redis has messages?
|
|
276
|
+
│ ├─ Yes → return them, refresh all TTLs
|
|
277
|
+
│ └─ No → session was evicted
|
|
278
|
+
│ ├─ Load from .huff archive
|
|
279
|
+
│ ├─ Re-hydrate Redis with last 20
|
|
280
|
+
│ └─ Return full history
|
|
281
|
+
│
|
|
282
|
+
└─ Redis down?
|
|
283
|
+
└─ Read everything from disk (graceful fallback)
|
|
284
|
+
```
|
|
285
|
+
|
|
286
|
+
**LRU eviction daemon:** Background thread, checks every 60s. Session idle > `evict_after_minutes` → migrate all Redis messages to disk, free memory. When user returns, `get_context()` re-hydrates transparently.
|
|
287
|
+
|
|
288
|
+
**smart_context():** Returns `{"recent": [...], "relevant": [...]}` — recent messages from Redis plus semantically relevant messages from the disk archive (matched by embedding cosine similarity).
|
|
289
|
+
|
|
290
|
+
### Semantic Query Cache
|
|
291
|
+
|
|
292
|
+
Keyed by `(session_id, URL, query_embedding)`. Each URL stores up to 50 `(embedding, response)` pairs.
|
|
293
|
+
|
|
294
|
+
On lookup: compute cosine similarity between the new query embedding and all cached embeddings for that URL. If any exceed 0.90 → cache hit, return the cached response, skip the LLM.
|
|
295
|
+
|
|
296
|
+
- Per-session isolation (privacy)
|
|
297
|
+
- 5-minute TTL (freshness)
|
|
298
|
+
- Catches rephrasings: "weather Tokyo" vs "Tokyo weather forecast" → cosine ~0.94 → HIT
|
|
299
|
+
|
|
300
|
+
### URL Embedding Cache
|
|
301
|
+
|
|
302
|
+
Global (shared across all sessions), 24h TTL. Maps URL → raw float32 bytes in Redis.
|
|
303
|
+
|
|
304
|
+
Computing embeddings costs ~200ms per URL. This cache means the embedding model only runs once per URL per day, regardless of how many sessions fetch it.
|
|
305
|
+
|
|
306
|
+
## Hybrid storage: hot + cold
|
|
307
|
+
|
|
308
|
+
The two-tier architecture:
|
|
309
|
+
|
|
310
|
+
**Hot (Redis):** Ordered list of message IDs. Each message stored as a separate key with TTL. Fast reads (~1ms). Limited to `hot_window_size` messages per session.
|
|
311
|
+
|
|
312
|
+
**Cold (Disk):** Huffman-compressed `.huff` files. One file per session at `{archive_dir}/{session_id}.huff`. Self-contained binary format with a 24-byte header you can read without decompressing.
|
|
313
|
+
|
|
314
|
+
### .huff file format
|
|
315
|
+
|
|
316
|
+
```
|
|
317
|
+
Offset Size Field
|
|
318
|
+
0 4B Magic: "CAv1"
|
|
319
|
+
4 8B created_at (float64 LE, unix timestamp)
|
|
320
|
+
12 8B updated_at (float64 LE, unix timestamp)
|
|
321
|
+
20 4B num_turns (uint32 LE)
|
|
322
|
+
24 var Huffman-compressed JSON array of turn objects
|
|
323
|
+
```
|
|
324
|
+
|
|
325
|
+
### Why Huffman over gzip?
|
|
326
|
+
|
|
327
|
+
Conversation text has very skewed byte frequencies (~18% spaces, ~13% 'e', ~0.07% 'z'). Huffman assigns shorter bit codes to frequent bytes. For small payloads (<100KB), this beats gzip because there's no dictionary overhead. ~54% compression ratio on typical conversation text. Pure Python, zero native dependencies.
|
|
328
|
+
|
|
329
|
+
## Connection pooling
|
|
330
|
+
|
|
331
|
+
`create_redis_client()` maintains a global pool keyed by `(host, port, db)`:
|
|
332
|
+
|
|
333
|
+
```python
|
|
334
|
+
from lix_open_cache import create_redis_client, CacheConfig
|
|
335
|
+
|
|
336
|
+
config = CacheConfig(redis_host="localhost", redis_port=6379)
|
|
337
|
+
|
|
338
|
+
# First call: creates ConnectionPool, pings, returns client
|
|
339
|
+
client = create_redis_client(host="localhost", port=6379, db=2, config=config)
|
|
340
|
+
|
|
341
|
+
# Same (host, port, db): reuses existing pool
|
|
342
|
+
client = create_redis_client(host="localhost", port=6379, db=2, config=config)
|
|
343
|
+
```
|
|
344
|
+
|
|
345
|
+
Handles auth gracefully — tries with password first, falls back to no-auth on `AuthenticationError`.
|
|
346
|
+
|
|
347
|
+
## API reference
|
|
348
|
+
|
|
349
|
+
### CacheConfig
|
|
350
|
+
|
|
351
|
+
| Method | Description |
|
|
352
|
+
|---|---|
|
|
353
|
+
| `CacheConfig(**kwargs)` | Create config with explicit values |
|
|
354
|
+
| `CacheConfig.from_env(prefix)` | Load from env vars: `{PREFIX}_REDIS_HOST`, etc. |
|
|
355
|
+
|
|
356
|
+
### CacheCoordinator
|
|
357
|
+
|
|
358
|
+
| Method | Description |
|
|
359
|
+
|---|---|
|
|
360
|
+
| `__init__(session_id, config?)` | Initialize all 3 layers |
|
|
361
|
+
| `add_message_to_context(role, content, metadata?)` | Add to session window |
|
|
362
|
+
| `get_context_messages()` | Get rolling window |
|
|
363
|
+
| `get_formatted_context(max_lines?)` | Get as formatted string |
|
|
364
|
+
| `get_semantic_response(url, query_embedding)` | Check semantic cache |
|
|
365
|
+
| `cache_semantic_response(url, query_embedding, response)` | Store in semantic cache |
|
|
366
|
+
| `get_url_embedding(url)` | Get cached URL embedding |
|
|
367
|
+
| `cache_url_embedding(url, embedding)` | Cache URL embedding |
|
|
368
|
+
| `batch_cache_url_embeddings(dict)` | Batch cache |
|
|
369
|
+
| `clear_session_cache()` | Clear semantic + context |
|
|
370
|
+
| `clear_context()` | Clear context only |
|
|
371
|
+
| `get_stats()` | Stats from all 3 layers |
|
|
372
|
+
|
|
373
|
+
### SessionContextWindow
|
|
374
|
+
|
|
375
|
+
| Method | Description |
|
|
376
|
+
|---|---|
|
|
377
|
+
| `__init__(session_id, config?, **kwargs)` | Create context window |
|
|
378
|
+
| `add_message(role, content, metadata?)` | Add a message |
|
|
379
|
+
| `get_context()` | Get hot window messages |
|
|
380
|
+
| `get_full_history()` | All messages (Redis + disk) |
|
|
381
|
+
| `smart_context(query, embedding?, recent_k?, disk_k?)` | Recent + relevant from disk |
|
|
382
|
+
| `get_formatted_context(max_lines?)` | As formatted string |
|
|
383
|
+
| `flush_to_disk()` | Force migrate Redis → disk |
|
|
384
|
+
| `clear()` | Wipe Redis hot window |
|
|
385
|
+
| `get_stats()` | Session statistics |
|
|
386
|
+
|
|
387
|
+
### HybridConversationCache
|
|
388
|
+
|
|
389
|
+
| Method | Description |
|
|
390
|
+
|---|---|
|
|
391
|
+
| `__init__(session_id, config?, **kwargs)` | Create hybrid cache |
|
|
392
|
+
| `add_message(role, content, metadata?, embedding?)` | Add message (auto-evicts overflow) |
|
|
393
|
+
| `get_context()` | Hot window (auto re-hydrates from disk) |
|
|
394
|
+
| `get_full()` | Merge hot + cold |
|
|
395
|
+
| `smart_context(query, embedding?, recent_k?, disk_k?)` | Recent + relevant |
|
|
396
|
+
| `flush_to_disk()` | Migrate Redis → disk |
|
|
397
|
+
| `clear()` | Clear Redis keys |
|
|
398
|
+
| `delete_session()` | Delete from Redis + disk |
|
|
399
|
+
| `get_stats()` | Hot count, disk turns, sizes |
|
|
400
|
+
|
|
401
|
+
### ConversationArchive
|
|
402
|
+
|
|
403
|
+
| Method | Description |
|
|
404
|
+
|---|---|
|
|
405
|
+
| `__init__(archive_dir, session_ttl_days?)` | Create archive |
|
|
406
|
+
| `append_turn(session_id, turn)` | Append single turn |
|
|
407
|
+
| `append_turns(session_id, turns)` | Batch append |
|
|
408
|
+
| `load_all(session_id)` | Load all turns |
|
|
409
|
+
| `load_recent(session_id, n)` | Load last N turns |
|
|
410
|
+
| `search_by_text(session_id, query, top_k?)` | Text overlap search |
|
|
411
|
+
| `search_by_embedding(session_id, embedding, top_k?)` | Cosine similarity search |
|
|
412
|
+
| `delete_session(session_id)` | Delete archive file |
|
|
413
|
+
| `session_exists(session_id)` | Check if .huff exists |
|
|
414
|
+
| `get_metadata(session_id)` | Read header without decompressing |
|
|
415
|
+
| `cleanup_expired()` | Purge sessions older than TTL |
|
|
416
|
+
| `list_sessions()` | List all archived sessions |
|
|
417
|
+
|
|
418
|
+
### SemanticCacheRedis
|
|
419
|
+
|
|
420
|
+
| Method | Description |
|
|
421
|
+
|---|---|
|
|
422
|
+
| `__init__(session_id, config?, **kwargs)` | Create semantic cache |
|
|
423
|
+
| `get(url, query_embedding)` | Check for cached response |
|
|
424
|
+
| `set(url, query_embedding, response)` | Cache a response |
|
|
425
|
+
| `clear_session()` | Delete all entries for this session |
|
|
426
|
+
| `get_stats()` | Cache statistics |
|
|
427
|
+
|
|
428
|
+
### URLEmbeddingCache
|
|
429
|
+
|
|
430
|
+
| Method | Description |
|
|
431
|
+
|---|---|
|
|
432
|
+
| `__init__(session_id, config?, **kwargs)` | Create embedding cache |
|
|
433
|
+
| `get(url)` | Get cached embedding (np.ndarray or None) |
|
|
434
|
+
| `set(url, embedding)` | Cache an embedding |
|
|
435
|
+
| `batch_set(url_embeddings)` | Batch cache |
|
|
436
|
+
| `get_stats()` | Cache statistics |
|
|
437
|
+
|
|
438
|
+
### HuffmanCodec
|
|
439
|
+
|
|
440
|
+
| Method | Description |
|
|
441
|
+
|---|---|
|
|
442
|
+
| `HuffmanCodec.encode(data: bytes)` | Compress bytes → bytes |
|
|
443
|
+
| `HuffmanCodec.decode(data: bytes)` | Decompress bytes → bytes |
|
|
444
|
+
| `encode_str(text: str)` | Compress string → bytes |
|
|
445
|
+
| `decode_bytes(data: bytes)` | Decompress bytes → string |
|
|
446
|
+
|
|
447
|
+
## Publishing to PyPI
|
|
448
|
+
|
|
449
|
+
```bash
|
|
450
|
+
cd lix_open_cache
|
|
451
|
+
pip install build twine
|
|
452
|
+
|
|
453
|
+
# Build
|
|
454
|
+
python -m build
|
|
455
|
+
|
|
456
|
+
# Test on TestPyPI first
|
|
457
|
+
twine upload --repository testpypi dist/*
|
|
458
|
+
pip install --index-url https://test.pypi.org/simple/ lix-open-cache
|
|
459
|
+
|
|
460
|
+
# Publish to production PyPI
|
|
461
|
+
twine upload dist/*
|
|
462
|
+
```
|
|
463
|
+
|
|
464
|
+
For CI/CD, add a GitHub Actions workflow triggered on release:
|
|
465
|
+
|
|
466
|
+
```yaml
|
|
467
|
+
name: Publish to PyPI
|
|
468
|
+
on:
|
|
469
|
+
release:
|
|
470
|
+
types: [published]
|
|
471
|
+
jobs:
|
|
472
|
+
publish:
|
|
473
|
+
runs-on: ubuntu-latest
|
|
474
|
+
steps:
|
|
475
|
+
- uses: actions/checkout@v4
|
|
476
|
+
- uses: actions/setup-python@v5
|
|
477
|
+
with:
|
|
478
|
+
python-version: "3.11"
|
|
479
|
+
- run: pip install build twine
|
|
480
|
+
- run: cd lix_open_cache && python -m build
|
|
481
|
+
- run: cd lix_open_cache && twine upload dist/*
|
|
482
|
+
env:
|
|
483
|
+
TWINE_USERNAME: __token__
|
|
484
|
+
TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }}
|
|
485
|
+
```
|
|
486
|
+
|
|
487
|
+
## License
|
|
488
|
+
|
|
489
|
+
MIT — same as lixSearch.
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
from lix_open_cache.config import CacheConfig
|
|
2
|
+
from lix_open_cache.redis_pool import create_redis_client
|
|
3
|
+
from lix_open_cache.huffman_codec import HuffmanCodec
|
|
4
|
+
from lix_open_cache.conversation_archive import ConversationArchive
|
|
5
|
+
from lix_open_cache.hybrid_cache import HybridConversationCache
|
|
6
|
+
from lix_open_cache.semantic_cache import SemanticCacheRedis, URLEmbeddingCache
|
|
7
|
+
from lix_open_cache.context_window import SessionContextWindow
|
|
8
|
+
from lix_open_cache.coordinator import CacheCoordinator
|
|
9
|
+
|
|
10
|
+
__all__ = [
|
|
11
|
+
"CacheConfig",
|
|
12
|
+
"create_redis_client",
|
|
13
|
+
"HuffmanCodec",
|
|
14
|
+
"ConversationArchive",
|
|
15
|
+
"HybridConversationCache",
|
|
16
|
+
"SemanticCacheRedis",
|
|
17
|
+
"URLEmbeddingCache",
|
|
18
|
+
"SessionContextWindow",
|
|
19
|
+
"CacheCoordinator",
|
|
20
|
+
]
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from dataclasses import dataclass, field
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@dataclass
|
|
7
|
+
class CacheConfig:
|
|
8
|
+
redis_host: str = "localhost"
|
|
9
|
+
redis_port: int = 6379
|
|
10
|
+
redis_password: Optional[str] = None
|
|
11
|
+
redis_key_prefix: str = "lix"
|
|
12
|
+
redis_socket_connect_timeout: int = 5
|
|
13
|
+
redis_socket_keepalive: bool = True
|
|
14
|
+
redis_pool_size: int = 50
|
|
15
|
+
|
|
16
|
+
# Session context window — Redis DB 2 (hot messages)
|
|
17
|
+
session_redis_db: int = 2
|
|
18
|
+
session_ttl_seconds: int = 86400
|
|
19
|
+
hot_window_size: int = 20
|
|
20
|
+
session_max_tokens: Optional[int] = None
|
|
21
|
+
|
|
22
|
+
# Semantic query cache — Redis DB 0
|
|
23
|
+
semantic_redis_db: int = 0
|
|
24
|
+
semantic_ttl_seconds: int = 300
|
|
25
|
+
semantic_similarity_threshold: float = 0.90
|
|
26
|
+
semantic_max_items_per_url: int = 50
|
|
27
|
+
|
|
28
|
+
# URL embedding cache — Redis DB 1
|
|
29
|
+
url_cache_redis_db: int = 1
|
|
30
|
+
url_cache_ttl_seconds: int = 86400
|
|
31
|
+
url_cache_batch_size: int = 100
|
|
32
|
+
|
|
33
|
+
# Disk archive (Huffman-compressed .huff files)
|
|
34
|
+
archive_dir: str = "./data/conversations"
|
|
35
|
+
disk_ttl_days: int = 14
|
|
36
|
+
|
|
37
|
+
# LRU eviction daemon
|
|
38
|
+
evict_after_minutes: int = 120
|
|
39
|
+
|
|
40
|
+
@classmethod
|
|
41
|
+
def from_env(cls, prefix: str = "") -> "CacheConfig":
|
|
42
|
+
p = prefix.upper() + "_" if prefix else ""
|
|
43
|
+
return cls(
|
|
44
|
+
redis_host=os.getenv(f"{p}REDIS_HOST", "localhost"),
|
|
45
|
+
redis_port=int(os.getenv(f"{p}REDIS_PORT", "6379")),
|
|
46
|
+
redis_password=os.getenv(f"{p}REDIS_PASSWORD") or None,
|
|
47
|
+
redis_key_prefix=os.getenv(f"{p}REDIS_KEY_PREFIX", "lix"),
|
|
48
|
+
redis_pool_size=int(os.getenv(f"{p}REDIS_POOL_SIZE", "50")),
|
|
49
|
+
session_redis_db=int(os.getenv(f"{p}SESSION_REDIS_DB", "2")),
|
|
50
|
+
session_ttl_seconds=int(os.getenv(f"{p}SESSION_TTL_SECONDS", "86400")),
|
|
51
|
+
hot_window_size=int(os.getenv(f"{p}HOT_WINDOW_SIZE", "20")),
|
|
52
|
+
semantic_redis_db=int(os.getenv(f"{p}SEMANTIC_REDIS_DB", "0")),
|
|
53
|
+
semantic_ttl_seconds=int(os.getenv(f"{p}SEMANTIC_TTL_SECONDS", "300")),
|
|
54
|
+
semantic_similarity_threshold=float(os.getenv(f"{p}SEMANTIC_SIMILARITY_THRESHOLD", "0.90")),
|
|
55
|
+
url_cache_redis_db=int(os.getenv(f"{p}URL_CACHE_REDIS_DB", "1")),
|
|
56
|
+
url_cache_ttl_seconds=int(os.getenv(f"{p}URL_CACHE_TTL_SECONDS", "86400")),
|
|
57
|
+
archive_dir=os.getenv(f"{p}ARCHIVE_DIR", "./data/conversations"),
|
|
58
|
+
disk_ttl_days=int(os.getenv(f"{p}DISK_TTL_DAYS", "14")),
|
|
59
|
+
evict_after_minutes=int(os.getenv(f"{p}EVICT_AFTER_MINUTES", "120")),
|
|
60
|
+
)
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
from typing import Dict, List, Optional
|
|
2
|
+
|
|
3
|
+
from loguru import logger
|
|
4
|
+
|
|
5
|
+
from lix_open_cache.config import CacheConfig
|
|
6
|
+
from lix_open_cache.hybrid_cache import HybridConversationCache
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class SessionContextWindow:
|
|
10
|
+
|
|
11
|
+
def __init__(self, session_id: str, config: Optional[CacheConfig] = None, **kwargs):
|
|
12
|
+
self._config = config or CacheConfig()
|
|
13
|
+
c = self._config
|
|
14
|
+
|
|
15
|
+
self.session_id = session_id
|
|
16
|
+
self.window_size = kwargs.get("window_size", c.hot_window_size)
|
|
17
|
+
self.ttl_seconds = kwargs.get("ttl_seconds", c.session_ttl_seconds)
|
|
18
|
+
self.max_tokens = kwargs.get("max_tokens", c.session_max_tokens)
|
|
19
|
+
|
|
20
|
+
self._hybrid = HybridConversationCache(
|
|
21
|
+
session_id=session_id,
|
|
22
|
+
config=c,
|
|
23
|
+
redis_host=kwargs.get("redis_host", c.redis_host),
|
|
24
|
+
redis_port=kwargs.get("redis_port", c.redis_port),
|
|
25
|
+
redis_db=kwargs.get("redis_db", c.session_redis_db),
|
|
26
|
+
hot_window_size=self.window_size,
|
|
27
|
+
redis_ttl=self.ttl_seconds,
|
|
28
|
+
)
|
|
29
|
+
logger.info(
|
|
30
|
+
f"[SessionContextWindow] session={session_id} initialized "
|
|
31
|
+
f"(hot_window={self.window_size}, ttl={self.ttl_seconds}s)"
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
def add_message(self, role: str, content: str, metadata: Optional[Dict] = None) -> int:
|
|
35
|
+
return self._hybrid.add_message(role, content, metadata)
|
|
36
|
+
|
|
37
|
+
def get_context(self) -> List[Dict]:
|
|
38
|
+
return self._hybrid.get_context()
|
|
39
|
+
|
|
40
|
+
def get_full_history(self) -> List[Dict]:
|
|
41
|
+
return self._hybrid.get_full()
|
|
42
|
+
|
|
43
|
+
def smart_context(self, query: str, query_embedding=None, recent_k: int = 10, disk_k: int = 5) -> Dict:
|
|
44
|
+
return self._hybrid.smart_context(query, query_embedding, recent_k, disk_k)
|
|
45
|
+
|
|
46
|
+
def get_formatted_context(self, max_lines: int = 50) -> str:
|
|
47
|
+
return self._hybrid.get_formatted_context(max_lines)
|
|
48
|
+
|
|
49
|
+
def clear(self) -> bool:
|
|
50
|
+
return self._hybrid.clear()
|
|
51
|
+
|
|
52
|
+
def flush_to_disk(self) -> bool:
|
|
53
|
+
return self._hybrid.flush_to_disk()
|
|
54
|
+
|
|
55
|
+
def get_stats(self) -> Dict:
|
|
56
|
+
stats = self._hybrid.get_stats()
|
|
57
|
+
stats["ttl_seconds"] = self.ttl_seconds
|
|
58
|
+
stats["max_tokens"] = self.max_tokens
|
|
59
|
+
return stats
|