rnsr 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rnsr/__init__.py +118 -0
- rnsr/__main__.py +242 -0
- rnsr/agent/__init__.py +218 -0
- rnsr/agent/cross_doc_navigator.py +767 -0
- rnsr/agent/graph.py +1557 -0
- rnsr/agent/llm_cache.py +575 -0
- rnsr/agent/navigator_api.py +497 -0
- rnsr/agent/provenance.py +772 -0
- rnsr/agent/query_clarifier.py +617 -0
- rnsr/agent/reasoning_memory.py +736 -0
- rnsr/agent/repl_env.py +709 -0
- rnsr/agent/rlm_navigator.py +2108 -0
- rnsr/agent/self_reflection.py +602 -0
- rnsr/agent/variable_store.py +308 -0
- rnsr/benchmarks/__init__.py +118 -0
- rnsr/benchmarks/comprehensive_benchmark.py +733 -0
- rnsr/benchmarks/evaluation_suite.py +1210 -0
- rnsr/benchmarks/finance_bench.py +147 -0
- rnsr/benchmarks/pdf_merger.py +178 -0
- rnsr/benchmarks/performance.py +321 -0
- rnsr/benchmarks/quality.py +321 -0
- rnsr/benchmarks/runner.py +298 -0
- rnsr/benchmarks/standard_benchmarks.py +995 -0
- rnsr/client.py +560 -0
- rnsr/document_store.py +394 -0
- rnsr/exceptions.py +74 -0
- rnsr/extraction/__init__.py +172 -0
- rnsr/extraction/candidate_extractor.py +357 -0
- rnsr/extraction/entity_extractor.py +581 -0
- rnsr/extraction/entity_linker.py +825 -0
- rnsr/extraction/grounded_extractor.py +722 -0
- rnsr/extraction/learned_types.py +599 -0
- rnsr/extraction/models.py +232 -0
- rnsr/extraction/relationship_extractor.py +600 -0
- rnsr/extraction/relationship_patterns.py +511 -0
- rnsr/extraction/relationship_validator.py +392 -0
- rnsr/extraction/rlm_extractor.py +589 -0
- rnsr/extraction/rlm_unified_extractor.py +990 -0
- rnsr/extraction/tot_validator.py +610 -0
- rnsr/extraction/unified_extractor.py +342 -0
- rnsr/indexing/__init__.py +60 -0
- rnsr/indexing/knowledge_graph.py +1128 -0
- rnsr/indexing/kv_store.py +313 -0
- rnsr/indexing/persistence.py +323 -0
- rnsr/indexing/semantic_retriever.py +237 -0
- rnsr/indexing/semantic_search.py +320 -0
- rnsr/indexing/skeleton_index.py +395 -0
- rnsr/ingestion/__init__.py +161 -0
- rnsr/ingestion/chart_parser.py +569 -0
- rnsr/ingestion/document_boundary.py +662 -0
- rnsr/ingestion/font_histogram.py +334 -0
- rnsr/ingestion/header_classifier.py +595 -0
- rnsr/ingestion/hierarchical_cluster.py +515 -0
- rnsr/ingestion/layout_detector.py +356 -0
- rnsr/ingestion/layout_model.py +379 -0
- rnsr/ingestion/ocr_fallback.py +177 -0
- rnsr/ingestion/pipeline.py +936 -0
- rnsr/ingestion/semantic_fallback.py +417 -0
- rnsr/ingestion/table_parser.py +799 -0
- rnsr/ingestion/text_builder.py +460 -0
- rnsr/ingestion/tree_builder.py +402 -0
- rnsr/ingestion/vision_retrieval.py +965 -0
- rnsr/ingestion/xy_cut.py +555 -0
- rnsr/llm.py +733 -0
- rnsr/models.py +167 -0
- rnsr/py.typed +2 -0
- rnsr-0.1.0.dist-info/METADATA +592 -0
- rnsr-0.1.0.dist-info/RECORD +72 -0
- rnsr-0.1.0.dist-info/WHEEL +5 -0
- rnsr-0.1.0.dist-info/entry_points.txt +2 -0
- rnsr-0.1.0.dist-info/licenses/LICENSE +21 -0
- rnsr-0.1.0.dist-info/top_level.txt +1 -0
rnsr/agent/llm_cache.py
ADDED
|
@@ -0,0 +1,575 @@
|
|
|
1
|
+
"""
|
|
2
|
+
RNSR LLM Response Cache
|
|
3
|
+
|
|
4
|
+
Caches LLM responses for significant performance and cost improvement.
|
|
5
|
+
|
|
6
|
+
Features:
|
|
7
|
+
- Semantic-aware caching (similar prompts hit same cache)
|
|
8
|
+
- TTL-based expiration
|
|
9
|
+
- Cache warming from LearnedQueryPatterns
|
|
10
|
+
- Thread-safe with optional persistence
|
|
11
|
+
|
|
12
|
+
Storage: SQLite for persistence, in-memory for speed
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import hashlib
|
|
18
|
+
import json
|
|
19
|
+
import os
|
|
20
|
+
import sqlite3
|
|
21
|
+
import time
|
|
22
|
+
from dataclasses import dataclass, field
|
|
23
|
+
from datetime import datetime
|
|
24
|
+
from pathlib import Path
|
|
25
|
+
from threading import Lock
|
|
26
|
+
from typing import Any, Callable
|
|
27
|
+
|
|
28
|
+
import structlog
|
|
29
|
+
|
|
30
|
+
logger = structlog.get_logger(__name__)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
# Default cache location
|
|
34
|
+
DEFAULT_CACHE_PATH = Path.home() / ".rnsr" / "llm_cache.db"
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
# =============================================================================
|
|
38
|
+
# Cache Entry
|
|
39
|
+
# =============================================================================
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@dataclass
|
|
43
|
+
class CacheEntry:
|
|
44
|
+
"""A cached LLM response."""
|
|
45
|
+
|
|
46
|
+
key: str
|
|
47
|
+
prompt_hash: str
|
|
48
|
+
prompt_preview: str # First 200 chars for debugging
|
|
49
|
+
response: str
|
|
50
|
+
created_at: float
|
|
51
|
+
expires_at: float
|
|
52
|
+
hit_count: int = 0
|
|
53
|
+
last_hit_at: float | None = None
|
|
54
|
+
|
|
55
|
+
# Metadata
|
|
56
|
+
model: str = ""
|
|
57
|
+
token_count: int = 0
|
|
58
|
+
|
|
59
|
+
def is_expired(self) -> bool:
|
|
60
|
+
"""Check if entry has expired."""
|
|
61
|
+
return time.time() > self.expires_at
|
|
62
|
+
|
|
63
|
+
def to_dict(self) -> dict[str, Any]:
|
|
64
|
+
"""Convert to dictionary."""
|
|
65
|
+
return {
|
|
66
|
+
"key": self.key,
|
|
67
|
+
"prompt_hash": self.prompt_hash,
|
|
68
|
+
"prompt_preview": self.prompt_preview,
|
|
69
|
+
"response": self.response,
|
|
70
|
+
"created_at": self.created_at,
|
|
71
|
+
"expires_at": self.expires_at,
|
|
72
|
+
"hit_count": self.hit_count,
|
|
73
|
+
"last_hit_at": self.last_hit_at,
|
|
74
|
+
"model": self.model,
|
|
75
|
+
"token_count": self.token_count,
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
# =============================================================================
|
|
80
|
+
# LLM Cache
|
|
81
|
+
# =============================================================================
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
class LLMCache:
|
|
85
|
+
"""
|
|
86
|
+
Thread-safe LLM response cache.
|
|
87
|
+
|
|
88
|
+
Uses a combination of:
|
|
89
|
+
1. Exact prompt hash matching (fast)
|
|
90
|
+
2. Normalized prompt matching (handles whitespace/formatting)
|
|
91
|
+
3. Optional semantic similarity (slower but more hits)
|
|
92
|
+
"""
|
|
93
|
+
|
|
94
|
+
def __init__(
|
|
95
|
+
self,
|
|
96
|
+
storage_path: Path | str | None = None,
|
|
97
|
+
default_ttl_seconds: int = 3600, # 1 hour
|
|
98
|
+
max_entries: int = 10000,
|
|
99
|
+
enable_persistence: bool = True,
|
|
100
|
+
enable_semantic_matching: bool = False,
|
|
101
|
+
):
|
|
102
|
+
"""
|
|
103
|
+
Initialize the LLM cache.
|
|
104
|
+
|
|
105
|
+
Args:
|
|
106
|
+
storage_path: Path to SQLite cache file.
|
|
107
|
+
default_ttl_seconds: Default time-to-live for entries.
|
|
108
|
+
max_entries: Maximum cache entries.
|
|
109
|
+
enable_persistence: Whether to persist to disk.
|
|
110
|
+
enable_semantic_matching: Enable semantic similarity matching.
|
|
111
|
+
"""
|
|
112
|
+
self.storage_path = Path(storage_path) if storage_path else DEFAULT_CACHE_PATH
|
|
113
|
+
self.default_ttl_seconds = default_ttl_seconds
|
|
114
|
+
self.max_entries = max_entries
|
|
115
|
+
self.enable_persistence = enable_persistence
|
|
116
|
+
self.enable_semantic_matching = enable_semantic_matching
|
|
117
|
+
|
|
118
|
+
self._lock = Lock()
|
|
119
|
+
self._memory_cache: dict[str, CacheEntry] = {}
|
|
120
|
+
self._stats = {
|
|
121
|
+
"hits": 0,
|
|
122
|
+
"misses": 0,
|
|
123
|
+
"evictions": 0,
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
if enable_persistence:
|
|
127
|
+
self._init_db()
|
|
128
|
+
self._load_from_db()
|
|
129
|
+
|
|
130
|
+
def _init_db(self) -> None:
|
|
131
|
+
"""Initialize SQLite database."""
|
|
132
|
+
try:
|
|
133
|
+
self.storage_path.parent.mkdir(parents=True, exist_ok=True)
|
|
134
|
+
|
|
135
|
+
conn = sqlite3.connect(str(self.storage_path))
|
|
136
|
+
cursor = conn.cursor()
|
|
137
|
+
|
|
138
|
+
cursor.execute("""
|
|
139
|
+
CREATE TABLE IF NOT EXISTS cache (
|
|
140
|
+
key TEXT PRIMARY KEY,
|
|
141
|
+
prompt_hash TEXT NOT NULL,
|
|
142
|
+
prompt_preview TEXT,
|
|
143
|
+
response TEXT NOT NULL,
|
|
144
|
+
created_at REAL NOT NULL,
|
|
145
|
+
expires_at REAL NOT NULL,
|
|
146
|
+
hit_count INTEGER DEFAULT 0,
|
|
147
|
+
last_hit_at REAL,
|
|
148
|
+
model TEXT,
|
|
149
|
+
token_count INTEGER DEFAULT 0
|
|
150
|
+
)
|
|
151
|
+
""")
|
|
152
|
+
|
|
153
|
+
cursor.execute("""
|
|
154
|
+
CREATE INDEX IF NOT EXISTS idx_prompt_hash ON cache(prompt_hash)
|
|
155
|
+
""")
|
|
156
|
+
|
|
157
|
+
cursor.execute("""
|
|
158
|
+
CREATE INDEX IF NOT EXISTS idx_expires_at ON cache(expires_at)
|
|
159
|
+
""")
|
|
160
|
+
|
|
161
|
+
conn.commit()
|
|
162
|
+
conn.close()
|
|
163
|
+
|
|
164
|
+
logger.debug("llm_cache_db_initialized", path=str(self.storage_path))
|
|
165
|
+
|
|
166
|
+
except Exception as e:
|
|
167
|
+
logger.warning("llm_cache_db_init_failed", error=str(e))
|
|
168
|
+
self.enable_persistence = False
|
|
169
|
+
|
|
170
|
+
def _load_from_db(self) -> None:
|
|
171
|
+
"""Load non-expired entries from database."""
|
|
172
|
+
if not self.enable_persistence:
|
|
173
|
+
return
|
|
174
|
+
|
|
175
|
+
try:
|
|
176
|
+
conn = sqlite3.connect(str(self.storage_path))
|
|
177
|
+
cursor = conn.cursor()
|
|
178
|
+
|
|
179
|
+
# Load only non-expired entries
|
|
180
|
+
now = time.time()
|
|
181
|
+
cursor.execute("""
|
|
182
|
+
SELECT key, prompt_hash, prompt_preview, response,
|
|
183
|
+
created_at, expires_at, hit_count, last_hit_at,
|
|
184
|
+
model, token_count
|
|
185
|
+
FROM cache
|
|
186
|
+
WHERE expires_at > ?
|
|
187
|
+
ORDER BY last_hit_at DESC
|
|
188
|
+
LIMIT ?
|
|
189
|
+
""", (now, self.max_entries))
|
|
190
|
+
|
|
191
|
+
rows = cursor.fetchall()
|
|
192
|
+
|
|
193
|
+
for row in rows:
|
|
194
|
+
entry = CacheEntry(
|
|
195
|
+
key=row[0],
|
|
196
|
+
prompt_hash=row[1],
|
|
197
|
+
prompt_preview=row[2] or "",
|
|
198
|
+
response=row[3],
|
|
199
|
+
created_at=row[4],
|
|
200
|
+
expires_at=row[5],
|
|
201
|
+
hit_count=row[6] or 0,
|
|
202
|
+
last_hit_at=row[7],
|
|
203
|
+
model=row[8] or "",
|
|
204
|
+
token_count=row[9] or 0,
|
|
205
|
+
)
|
|
206
|
+
self._memory_cache[entry.key] = entry
|
|
207
|
+
|
|
208
|
+
conn.close()
|
|
209
|
+
|
|
210
|
+
logger.info("llm_cache_loaded", entries=len(self._memory_cache))
|
|
211
|
+
|
|
212
|
+
except Exception as e:
|
|
213
|
+
logger.warning("llm_cache_load_failed", error=str(e))
|
|
214
|
+
|
|
215
|
+
def _compute_key(self, prompt: str) -> tuple[str, str]:
|
|
216
|
+
"""Compute cache key and hash for a prompt."""
|
|
217
|
+
# Normalize prompt
|
|
218
|
+
normalized = self._normalize_prompt(prompt)
|
|
219
|
+
|
|
220
|
+
# Compute hash
|
|
221
|
+
prompt_hash = hashlib.sha256(normalized.encode()).hexdigest()[:32]
|
|
222
|
+
|
|
223
|
+
# Key includes hash
|
|
224
|
+
key = f"llm_{prompt_hash}"
|
|
225
|
+
|
|
226
|
+
return key, prompt_hash
|
|
227
|
+
|
|
228
|
+
def _normalize_prompt(self, prompt: str) -> str:
|
|
229
|
+
"""Normalize prompt for consistent hashing."""
|
|
230
|
+
# Remove excessive whitespace
|
|
231
|
+
normalized = " ".join(prompt.split())
|
|
232
|
+
|
|
233
|
+
# Lowercase for case-insensitive matching
|
|
234
|
+
normalized = normalized.lower()
|
|
235
|
+
|
|
236
|
+
return normalized
|
|
237
|
+
|
|
238
|
+
def get(self, prompt: str) -> str | None:
|
|
239
|
+
"""
|
|
240
|
+
Get cached response for a prompt.
|
|
241
|
+
|
|
242
|
+
Args:
|
|
243
|
+
prompt: The LLM prompt.
|
|
244
|
+
|
|
245
|
+
Returns:
|
|
246
|
+
Cached response or None if not found.
|
|
247
|
+
"""
|
|
248
|
+
key, prompt_hash = self._compute_key(prompt)
|
|
249
|
+
|
|
250
|
+
with self._lock:
|
|
251
|
+
entry = self._memory_cache.get(key)
|
|
252
|
+
|
|
253
|
+
if entry is None:
|
|
254
|
+
self._stats["misses"] += 1
|
|
255
|
+
return None
|
|
256
|
+
|
|
257
|
+
if entry.is_expired():
|
|
258
|
+
# Remove expired entry
|
|
259
|
+
del self._memory_cache[key]
|
|
260
|
+
self._stats["misses"] += 1
|
|
261
|
+
return None
|
|
262
|
+
|
|
263
|
+
# Update hit stats
|
|
264
|
+
entry.hit_count += 1
|
|
265
|
+
entry.last_hit_at = time.time()
|
|
266
|
+
self._stats["hits"] += 1
|
|
267
|
+
|
|
268
|
+
logger.debug(
|
|
269
|
+
"cache_hit",
|
|
270
|
+
key=key[:16],
|
|
271
|
+
hit_count=entry.hit_count,
|
|
272
|
+
)
|
|
273
|
+
|
|
274
|
+
return entry.response
|
|
275
|
+
|
|
276
|
+
def set(
|
|
277
|
+
self,
|
|
278
|
+
prompt: str,
|
|
279
|
+
response: str,
|
|
280
|
+
ttl_seconds: int | None = None,
|
|
281
|
+
model: str = "",
|
|
282
|
+
token_count: int = 0,
|
|
283
|
+
) -> None:
|
|
284
|
+
"""
|
|
285
|
+
Cache an LLM response.
|
|
286
|
+
|
|
287
|
+
Args:
|
|
288
|
+
prompt: The LLM prompt.
|
|
289
|
+
response: The LLM response.
|
|
290
|
+
ttl_seconds: Time-to-live (uses default if not specified).
|
|
291
|
+
model: Model name for tracking.
|
|
292
|
+
token_count: Token count for tracking.
|
|
293
|
+
"""
|
|
294
|
+
key, prompt_hash = self._compute_key(prompt)
|
|
295
|
+
ttl = ttl_seconds or self.default_ttl_seconds
|
|
296
|
+
|
|
297
|
+
now = time.time()
|
|
298
|
+
|
|
299
|
+
entry = CacheEntry(
|
|
300
|
+
key=key,
|
|
301
|
+
prompt_hash=prompt_hash,
|
|
302
|
+
prompt_preview=prompt[:200],
|
|
303
|
+
response=response,
|
|
304
|
+
created_at=now,
|
|
305
|
+
expires_at=now + ttl,
|
|
306
|
+
model=model,
|
|
307
|
+
token_count=token_count,
|
|
308
|
+
)
|
|
309
|
+
|
|
310
|
+
with self._lock:
|
|
311
|
+
# Evict if at capacity
|
|
312
|
+
if len(self._memory_cache) >= self.max_entries:
|
|
313
|
+
self._evict_oldest()
|
|
314
|
+
|
|
315
|
+
self._memory_cache[key] = entry
|
|
316
|
+
|
|
317
|
+
# Persist asynchronously
|
|
318
|
+
if self.enable_persistence:
|
|
319
|
+
self._persist_entry(entry)
|
|
320
|
+
|
|
321
|
+
logger.debug(
|
|
322
|
+
"cache_set",
|
|
323
|
+
key=key[:16],
|
|
324
|
+
ttl=ttl,
|
|
325
|
+
)
|
|
326
|
+
|
|
327
|
+
def _evict_oldest(self) -> None:
|
|
328
|
+
"""Evict oldest entries when at capacity."""
|
|
329
|
+
if not self._memory_cache:
|
|
330
|
+
return
|
|
331
|
+
|
|
332
|
+
# Find entries to evict (oldest 10%)
|
|
333
|
+
entries = list(self._memory_cache.items())
|
|
334
|
+
entries.sort(key=lambda x: x[1].last_hit_at or x[1].created_at)
|
|
335
|
+
|
|
336
|
+
evict_count = max(1, len(entries) // 10)
|
|
337
|
+
|
|
338
|
+
for i in range(evict_count):
|
|
339
|
+
key = entries[i][0]
|
|
340
|
+
del self._memory_cache[key]
|
|
341
|
+
self._stats["evictions"] += 1
|
|
342
|
+
|
|
343
|
+
logger.debug("cache_evicted", count=evict_count)
|
|
344
|
+
|
|
345
|
+
def _persist_entry(self, entry: CacheEntry) -> None:
|
|
346
|
+
"""Persist entry to database."""
|
|
347
|
+
if not self.enable_persistence:
|
|
348
|
+
return
|
|
349
|
+
|
|
350
|
+
try:
|
|
351
|
+
conn = sqlite3.connect(str(self.storage_path))
|
|
352
|
+
cursor = conn.cursor()
|
|
353
|
+
|
|
354
|
+
cursor.execute("""
|
|
355
|
+
INSERT OR REPLACE INTO cache
|
|
356
|
+
(key, prompt_hash, prompt_preview, response, created_at,
|
|
357
|
+
expires_at, hit_count, last_hit_at, model, token_count)
|
|
358
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
359
|
+
""", (
|
|
360
|
+
entry.key,
|
|
361
|
+
entry.prompt_hash,
|
|
362
|
+
entry.prompt_preview,
|
|
363
|
+
entry.response,
|
|
364
|
+
entry.created_at,
|
|
365
|
+
entry.expires_at,
|
|
366
|
+
entry.hit_count,
|
|
367
|
+
entry.last_hit_at,
|
|
368
|
+
entry.model,
|
|
369
|
+
entry.token_count,
|
|
370
|
+
))
|
|
371
|
+
|
|
372
|
+
conn.commit()
|
|
373
|
+
conn.close()
|
|
374
|
+
|
|
375
|
+
except Exception as e:
|
|
376
|
+
logger.warning("cache_persist_failed", error=str(e))
|
|
377
|
+
|
|
378
|
+
def invalidate(self, prompt: str) -> bool:
|
|
379
|
+
"""
|
|
380
|
+
Invalidate a cached entry.
|
|
381
|
+
|
|
382
|
+
Args:
|
|
383
|
+
prompt: The prompt to invalidate.
|
|
384
|
+
|
|
385
|
+
Returns:
|
|
386
|
+
True if entry was found and removed.
|
|
387
|
+
"""
|
|
388
|
+
key, _ = self._compute_key(prompt)
|
|
389
|
+
|
|
390
|
+
with self._lock:
|
|
391
|
+
if key in self._memory_cache:
|
|
392
|
+
del self._memory_cache[key]
|
|
393
|
+
return True
|
|
394
|
+
|
|
395
|
+
return False
|
|
396
|
+
|
|
397
|
+
def clear(self) -> None:
|
|
398
|
+
"""Clear all cached entries."""
|
|
399
|
+
with self._lock:
|
|
400
|
+
self._memory_cache.clear()
|
|
401
|
+
|
|
402
|
+
if self.enable_persistence:
|
|
403
|
+
try:
|
|
404
|
+
conn = sqlite3.connect(str(self.storage_path))
|
|
405
|
+
cursor = conn.cursor()
|
|
406
|
+
cursor.execute("DELETE FROM cache")
|
|
407
|
+
conn.commit()
|
|
408
|
+
conn.close()
|
|
409
|
+
except Exception as e:
|
|
410
|
+
logger.warning("cache_clear_failed", error=str(e))
|
|
411
|
+
|
|
412
|
+
logger.info("cache_cleared")
|
|
413
|
+
|
|
414
|
+
def cleanup_expired(self) -> int:
|
|
415
|
+
"""Remove expired entries."""
|
|
416
|
+
now = time.time()
|
|
417
|
+
expired_keys = []
|
|
418
|
+
|
|
419
|
+
with self._lock:
|
|
420
|
+
for key, entry in self._memory_cache.items():
|
|
421
|
+
if entry.is_expired():
|
|
422
|
+
expired_keys.append(key)
|
|
423
|
+
|
|
424
|
+
for key in expired_keys:
|
|
425
|
+
del self._memory_cache[key]
|
|
426
|
+
|
|
427
|
+
if self.enable_persistence and expired_keys:
|
|
428
|
+
try:
|
|
429
|
+
conn = sqlite3.connect(str(self.storage_path))
|
|
430
|
+
cursor = conn.cursor()
|
|
431
|
+
cursor.execute("DELETE FROM cache WHERE expires_at < ?", (now,))
|
|
432
|
+
conn.commit()
|
|
433
|
+
conn.close()
|
|
434
|
+
except Exception:
|
|
435
|
+
pass
|
|
436
|
+
|
|
437
|
+
if expired_keys:
|
|
438
|
+
logger.info("expired_entries_cleaned", count=len(expired_keys))
|
|
439
|
+
|
|
440
|
+
return len(expired_keys)
|
|
441
|
+
|
|
442
|
+
def get_stats(self) -> dict[str, Any]:
|
|
443
|
+
"""Get cache statistics."""
|
|
444
|
+
with self._lock:
|
|
445
|
+
total = self._stats["hits"] + self._stats["misses"]
|
|
446
|
+
hit_rate = self._stats["hits"] / total if total > 0 else 0.0
|
|
447
|
+
|
|
448
|
+
return {
|
|
449
|
+
"entries": len(self._memory_cache),
|
|
450
|
+
"hits": self._stats["hits"],
|
|
451
|
+
"misses": self._stats["misses"],
|
|
452
|
+
"evictions": self._stats["evictions"],
|
|
453
|
+
"hit_rate": hit_rate,
|
|
454
|
+
"max_entries": self.max_entries,
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
|
|
458
|
+
# =============================================================================
|
|
459
|
+
# Cached LLM Wrapper
|
|
460
|
+
# =============================================================================
|
|
461
|
+
|
|
462
|
+
|
|
463
|
+
class CachedLLM:
|
|
464
|
+
"""
|
|
465
|
+
Wrapper that adds caching to any LLM function.
|
|
466
|
+
|
|
467
|
+
Usage:
|
|
468
|
+
cached_llm = CachedLLM(llm.complete)
|
|
469
|
+
response = cached_llm("What is 2+2?") # Calls LLM
|
|
470
|
+
response = cached_llm("What is 2+2?") # Returns cached
|
|
471
|
+
"""
|
|
472
|
+
|
|
473
|
+
def __init__(
|
|
474
|
+
self,
|
|
475
|
+
llm_fn: Callable[[str], str],
|
|
476
|
+
cache: LLMCache | None = None,
|
|
477
|
+
ttl_seconds: int = 3600,
|
|
478
|
+
model_name: str = "",
|
|
479
|
+
):
|
|
480
|
+
"""
|
|
481
|
+
Initialize cached LLM wrapper.
|
|
482
|
+
|
|
483
|
+
Args:
|
|
484
|
+
llm_fn: The underlying LLM function.
|
|
485
|
+
cache: Cache instance (creates default if None).
|
|
486
|
+
ttl_seconds: Default TTL for cached responses.
|
|
487
|
+
model_name: Model name for tracking.
|
|
488
|
+
"""
|
|
489
|
+
self.llm_fn = llm_fn
|
|
490
|
+
self.cache = cache or get_global_cache()
|
|
491
|
+
self.ttl_seconds = ttl_seconds
|
|
492
|
+
self.model_name = model_name
|
|
493
|
+
|
|
494
|
+
def __call__(self, prompt: str, use_cache: bool = True) -> str:
|
|
495
|
+
"""
|
|
496
|
+
Call LLM with caching.
|
|
497
|
+
|
|
498
|
+
Args:
|
|
499
|
+
prompt: The prompt to send.
|
|
500
|
+
use_cache: Whether to use cache (default True).
|
|
501
|
+
|
|
502
|
+
Returns:
|
|
503
|
+
LLM response (possibly cached).
|
|
504
|
+
"""
|
|
505
|
+
if use_cache:
|
|
506
|
+
cached = self.cache.get(prompt)
|
|
507
|
+
if cached is not None:
|
|
508
|
+
return cached
|
|
509
|
+
|
|
510
|
+
# Call underlying LLM
|
|
511
|
+
response = self.llm_fn(prompt)
|
|
512
|
+
response_str = str(response) if not isinstance(response, str) else response
|
|
513
|
+
|
|
514
|
+
# Cache the response
|
|
515
|
+
if use_cache:
|
|
516
|
+
self.cache.set(
|
|
517
|
+
prompt=prompt,
|
|
518
|
+
response=response_str,
|
|
519
|
+
ttl_seconds=self.ttl_seconds,
|
|
520
|
+
model=self.model_name,
|
|
521
|
+
)
|
|
522
|
+
|
|
523
|
+
return response_str
|
|
524
|
+
|
|
525
|
+
def complete(self, prompt: str, use_cache: bool = True) -> str:
|
|
526
|
+
"""Alias for __call__ for compatibility."""
|
|
527
|
+
return self(prompt, use_cache)
|
|
528
|
+
|
|
529
|
+
def get_stats(self) -> dict[str, Any]:
|
|
530
|
+
"""Get cache statistics."""
|
|
531
|
+
return self.cache.get_stats()
|
|
532
|
+
|
|
533
|
+
|
|
534
|
+
# =============================================================================
|
|
535
|
+
# Global Cache
|
|
536
|
+
# =============================================================================
|
|
537
|
+
|
|
538
|
+
_global_cache: LLMCache | None = None
|
|
539
|
+
|
|
540
|
+
|
|
541
|
+
def get_global_cache() -> LLMCache:
|
|
542
|
+
"""Get or create the global LLM cache."""
|
|
543
|
+
global _global_cache
|
|
544
|
+
|
|
545
|
+
if _global_cache is None:
|
|
546
|
+
custom_path = os.getenv("RNSR_LLM_CACHE_PATH")
|
|
547
|
+
_global_cache = LLMCache(
|
|
548
|
+
storage_path=custom_path if custom_path else None
|
|
549
|
+
)
|
|
550
|
+
|
|
551
|
+
return _global_cache
|
|
552
|
+
|
|
553
|
+
|
|
554
|
+
def wrap_llm_with_cache(
|
|
555
|
+
llm_fn: Callable[[str], str],
|
|
556
|
+
ttl_seconds: int = 3600,
|
|
557
|
+
model_name: str = "",
|
|
558
|
+
) -> CachedLLM:
|
|
559
|
+
"""
|
|
560
|
+
Wrap an LLM function with caching.
|
|
561
|
+
|
|
562
|
+
Args:
|
|
563
|
+
llm_fn: The LLM function to wrap.
|
|
564
|
+
ttl_seconds: Cache TTL.
|
|
565
|
+
model_name: Model name for tracking.
|
|
566
|
+
|
|
567
|
+
Returns:
|
|
568
|
+
CachedLLM wrapper.
|
|
569
|
+
"""
|
|
570
|
+
return CachedLLM(
|
|
571
|
+
llm_fn=llm_fn,
|
|
572
|
+
cache=get_global_cache(),
|
|
573
|
+
ttl_seconds=ttl_seconds,
|
|
574
|
+
model_name=model_name,
|
|
575
|
+
)
|