nc1709 1.15.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nc1709/__init__.py +13 -0
- nc1709/agent/__init__.py +36 -0
- nc1709/agent/core.py +505 -0
- nc1709/agent/mcp_bridge.py +245 -0
- nc1709/agent/permissions.py +298 -0
- nc1709/agent/tools/__init__.py +21 -0
- nc1709/agent/tools/base.py +440 -0
- nc1709/agent/tools/bash_tool.py +367 -0
- nc1709/agent/tools/file_tools.py +454 -0
- nc1709/agent/tools/notebook_tools.py +516 -0
- nc1709/agent/tools/search_tools.py +322 -0
- nc1709/agent/tools/task_tool.py +284 -0
- nc1709/agent/tools/web_tools.py +555 -0
- nc1709/agents/__init__.py +17 -0
- nc1709/agents/auto_fix.py +506 -0
- nc1709/agents/test_generator.py +507 -0
- nc1709/checkpoints.py +372 -0
- nc1709/cli.py +3380 -0
- nc1709/cli_ui.py +1080 -0
- nc1709/cognitive/__init__.py +149 -0
- nc1709/cognitive/anticipation.py +594 -0
- nc1709/cognitive/context_engine.py +1046 -0
- nc1709/cognitive/council.py +824 -0
- nc1709/cognitive/learning.py +761 -0
- nc1709/cognitive/router.py +583 -0
- nc1709/cognitive/system.py +519 -0
- nc1709/config.py +155 -0
- nc1709/custom_commands.py +300 -0
- nc1709/executor.py +333 -0
- nc1709/file_controller.py +354 -0
- nc1709/git_integration.py +308 -0
- nc1709/github_integration.py +477 -0
- nc1709/image_input.py +446 -0
- nc1709/linting.py +519 -0
- nc1709/llm_adapter.py +667 -0
- nc1709/logger.py +192 -0
- nc1709/mcp/__init__.py +18 -0
- nc1709/mcp/client.py +370 -0
- nc1709/mcp/manager.py +407 -0
- nc1709/mcp/protocol.py +210 -0
- nc1709/mcp/server.py +473 -0
- nc1709/memory/__init__.py +20 -0
- nc1709/memory/embeddings.py +325 -0
- nc1709/memory/indexer.py +474 -0
- nc1709/memory/sessions.py +432 -0
- nc1709/memory/vector_store.py +451 -0
- nc1709/models/__init__.py +86 -0
- nc1709/models/detector.py +377 -0
- nc1709/models/formats.py +315 -0
- nc1709/models/manager.py +438 -0
- nc1709/models/registry.py +497 -0
- nc1709/performance/__init__.py +343 -0
- nc1709/performance/cache.py +705 -0
- nc1709/performance/pipeline.py +611 -0
- nc1709/performance/tiering.py +543 -0
- nc1709/plan_mode.py +362 -0
- nc1709/plugins/__init__.py +17 -0
- nc1709/plugins/agents/__init__.py +18 -0
- nc1709/plugins/agents/django_agent.py +912 -0
- nc1709/plugins/agents/docker_agent.py +623 -0
- nc1709/plugins/agents/fastapi_agent.py +887 -0
- nc1709/plugins/agents/git_agent.py +731 -0
- nc1709/plugins/agents/nextjs_agent.py +867 -0
- nc1709/plugins/base.py +359 -0
- nc1709/plugins/manager.py +411 -0
- nc1709/plugins/registry.py +337 -0
- nc1709/progress.py +443 -0
- nc1709/prompts/__init__.py +22 -0
- nc1709/prompts/agent_system.py +180 -0
- nc1709/prompts/task_prompts.py +340 -0
- nc1709/prompts/unified_prompt.py +133 -0
- nc1709/reasoning_engine.py +541 -0
- nc1709/remote_client.py +266 -0
- nc1709/shell_completions.py +349 -0
- nc1709/slash_commands.py +649 -0
- nc1709/task_classifier.py +408 -0
- nc1709/version_check.py +177 -0
- nc1709/web/__init__.py +8 -0
- nc1709/web/server.py +950 -0
- nc1709/web/templates/index.html +1127 -0
- nc1709-1.15.4.dist-info/METADATA +858 -0
- nc1709-1.15.4.dist-info/RECORD +86 -0
- nc1709-1.15.4.dist-info/WHEEL +5 -0
- nc1709-1.15.4.dist-info/entry_points.txt +2 -0
- nc1709-1.15.4.dist-info/licenses/LICENSE +9 -0
- nc1709-1.15.4.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,705 @@
|
|
|
1
|
+
"""
|
|
2
|
+
NC1709 Performance - Multi-Level Intelligent Caching
|
|
3
|
+
|
|
4
|
+
Implements a 3-level cache system inspired by CPU cache architecture:
|
|
5
|
+
- L1: Exact match cache (<1ms lookup)
|
|
6
|
+
- L2: Semantic similarity cache (~10ms lookup)
|
|
7
|
+
- L3: Template/pattern cache (~50ms lookup)
|
|
8
|
+
|
|
9
|
+
Target: 30-40% cache hit rate for significant latency reduction.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import hashlib
|
|
13
|
+
import time
|
|
14
|
+
import json
|
|
15
|
+
import logging
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
from typing import Optional, Dict, Any, List, Tuple, NamedTuple
|
|
18
|
+
from dataclasses import dataclass, field
|
|
19
|
+
from collections import OrderedDict
|
|
20
|
+
import threading
|
|
21
|
+
|
|
22
|
+
logger = logging.getLogger(__name__)
|
|
23
|
+
|
|
24
|
+
# Optional: sentence-transformers for semantic cache
|
|
25
|
+
try:
|
|
26
|
+
import numpy as np
|
|
27
|
+
NUMPY_AVAILABLE = True
|
|
28
|
+
except ImportError:
|
|
29
|
+
NUMPY_AVAILABLE = False
|
|
30
|
+
np = None
|
|
31
|
+
|
|
32
|
+
try:
|
|
33
|
+
from sentence_transformers import SentenceTransformer
|
|
34
|
+
SEMANTIC_AVAILABLE = True
|
|
35
|
+
except ImportError:
|
|
36
|
+
SEMANTIC_AVAILABLE = False
|
|
37
|
+
SentenceTransformer = None
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@dataclass
|
|
41
|
+
class CacheEntry:
|
|
42
|
+
"""A cached response entry"""
|
|
43
|
+
prompt: str
|
|
44
|
+
context_hash: str
|
|
45
|
+
response: str
|
|
46
|
+
model_used: str
|
|
47
|
+
created_at: float
|
|
48
|
+
access_count: int = 0
|
|
49
|
+
last_accessed: float = 0
|
|
50
|
+
tokens_saved: int = 0
|
|
51
|
+
embedding: Optional[Any] = None # numpy array if available
|
|
52
|
+
|
|
53
|
+
def touch(self):
|
|
54
|
+
"""Update access statistics"""
|
|
55
|
+
self.access_count += 1
|
|
56
|
+
self.last_accessed = time.time()
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
@dataclass
|
|
60
|
+
class CacheStats:
|
|
61
|
+
"""Cache performance statistics"""
|
|
62
|
+
l1_hits: int = 0
|
|
63
|
+
l2_hits: int = 0
|
|
64
|
+
l3_hits: int = 0
|
|
65
|
+
misses: int = 0
|
|
66
|
+
total_time_saved_ms: float = 0
|
|
67
|
+
total_tokens_saved: int = 0
|
|
68
|
+
|
|
69
|
+
@property
|
|
70
|
+
def total_queries(self) -> int:
|
|
71
|
+
return self.l1_hits + self.l2_hits + self.l3_hits + self.misses
|
|
72
|
+
|
|
73
|
+
@property
|
|
74
|
+
def hit_rate(self) -> float:
|
|
75
|
+
if self.total_queries == 0:
|
|
76
|
+
return 0.0
|
|
77
|
+
return (self.l1_hits + self.l2_hits + self.l3_hits) / self.total_queries
|
|
78
|
+
|
|
79
|
+
@property
|
|
80
|
+
def l1_rate(self) -> float:
|
|
81
|
+
if self.total_queries == 0:
|
|
82
|
+
return 0.0
|
|
83
|
+
return self.l1_hits / self.total_queries
|
|
84
|
+
|
|
85
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
86
|
+
return {
|
|
87
|
+
"l1_hits": self.l1_hits,
|
|
88
|
+
"l2_hits": self.l2_hits,
|
|
89
|
+
"l3_hits": self.l3_hits,
|
|
90
|
+
"misses": self.misses,
|
|
91
|
+
"total_queries": self.total_queries,
|
|
92
|
+
"hit_rate": round(self.hit_rate * 100, 2),
|
|
93
|
+
"total_time_saved_ms": round(self.total_time_saved_ms, 2),
|
|
94
|
+
"total_tokens_saved": self.total_tokens_saved,
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
class CacheResult(NamedTuple):
|
|
99
|
+
"""Result from cache lookup"""
|
|
100
|
+
hit: bool
|
|
101
|
+
response: Optional[str]
|
|
102
|
+
level: Optional[str] # "L1", "L2", "L3", or None
|
|
103
|
+
similarity: Optional[float] # For L2/L3 hits
|
|
104
|
+
time_ms: float
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
class L1ExactCache:
|
|
108
|
+
"""
|
|
109
|
+
Level 1: Exact Match Cache
|
|
110
|
+
|
|
111
|
+
Fastest lookup - requires identical prompt + context hash.
|
|
112
|
+
Uses LRU eviction policy.
|
|
113
|
+
|
|
114
|
+
Lookup time: <1ms
|
|
115
|
+
"""
|
|
116
|
+
|
|
117
|
+
def __init__(self, max_size: int = 1000):
|
|
118
|
+
self.max_size = max_size
|
|
119
|
+
self._cache: OrderedDict[str, CacheEntry] = OrderedDict()
|
|
120
|
+
self._lock = threading.RLock()
|
|
121
|
+
|
|
122
|
+
def _make_key(self, prompt: str, context_hash: str) -> str:
|
|
123
|
+
"""Create cache key from prompt and context"""
|
|
124
|
+
combined = f"{prompt.strip().lower()}:{context_hash}"
|
|
125
|
+
return hashlib.sha256(combined.encode()).hexdigest()[:32]
|
|
126
|
+
|
|
127
|
+
def get(self, prompt: str, context_hash: str) -> Optional[CacheEntry]:
|
|
128
|
+
"""Look up exact match"""
|
|
129
|
+
key = self._make_key(prompt, context_hash)
|
|
130
|
+
|
|
131
|
+
with self._lock:
|
|
132
|
+
if key in self._cache:
|
|
133
|
+
entry = self._cache[key]
|
|
134
|
+
entry.touch()
|
|
135
|
+
# Move to end (most recently used)
|
|
136
|
+
self._cache.move_to_end(key)
|
|
137
|
+
return entry
|
|
138
|
+
|
|
139
|
+
return None
|
|
140
|
+
|
|
141
|
+
def set(self, prompt: str, context_hash: str, response: str,
|
|
142
|
+
model_used: str, tokens_saved: int = 0) -> None:
|
|
143
|
+
"""Store in cache"""
|
|
144
|
+
key = self._make_key(prompt, context_hash)
|
|
145
|
+
|
|
146
|
+
with self._lock:
|
|
147
|
+
# Evict if at capacity
|
|
148
|
+
while len(self._cache) >= self.max_size:
|
|
149
|
+
self._cache.popitem(last=False) # Remove oldest
|
|
150
|
+
|
|
151
|
+
self._cache[key] = CacheEntry(
|
|
152
|
+
prompt=prompt,
|
|
153
|
+
context_hash=context_hash,
|
|
154
|
+
response=response,
|
|
155
|
+
model_used=model_used,
|
|
156
|
+
created_at=time.time(),
|
|
157
|
+
last_accessed=time.time(),
|
|
158
|
+
tokens_saved=tokens_saved,
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
def clear(self) -> int:
|
|
162
|
+
"""Clear cache, return number of entries cleared"""
|
|
163
|
+
with self._lock:
|
|
164
|
+
count = len(self._cache)
|
|
165
|
+
self._cache.clear()
|
|
166
|
+
return count
|
|
167
|
+
|
|
168
|
+
def __len__(self) -> int:
|
|
169
|
+
return len(self._cache)
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
class L2SemanticCache:
|
|
173
|
+
"""
|
|
174
|
+
Level 2: Semantic Similarity Cache
|
|
175
|
+
|
|
176
|
+
Finds cached responses for semantically similar prompts.
|
|
177
|
+
Uses sentence embeddings and cosine similarity.
|
|
178
|
+
|
|
179
|
+
Example matches:
|
|
180
|
+
- "write a function to reverse a string" ≈ "create a string reversal function"
|
|
181
|
+
- "fix the bug in login" ≈ "debug the authentication issue"
|
|
182
|
+
|
|
183
|
+
Lookup time: ~10ms
|
|
184
|
+
"""
|
|
185
|
+
|
|
186
|
+
def __init__(
|
|
187
|
+
self,
|
|
188
|
+
similarity_threshold: float = 0.92,
|
|
189
|
+
max_size: int = 500,
|
|
190
|
+
model_name: str = 'all-MiniLM-L6-v2'
|
|
191
|
+
):
|
|
192
|
+
self.threshold = similarity_threshold
|
|
193
|
+
self.max_size = max_size
|
|
194
|
+
self._lock = threading.RLock()
|
|
195
|
+
|
|
196
|
+
# Initialize encoder if available
|
|
197
|
+
self._encoder = None
|
|
198
|
+
self._encoder_ready = False
|
|
199
|
+
|
|
200
|
+
if SEMANTIC_AVAILABLE and NUMPY_AVAILABLE:
|
|
201
|
+
try:
|
|
202
|
+
self._encoder = SentenceTransformer(model_name)
|
|
203
|
+
self._encoder_ready = True
|
|
204
|
+
logger.info(f"L2 semantic cache initialized with {model_name}")
|
|
205
|
+
except Exception as e:
|
|
206
|
+
logger.warning(f"Failed to load sentence transformer: {e}")
|
|
207
|
+
else:
|
|
208
|
+
logger.info("L2 semantic cache disabled (sentence-transformers not installed)")
|
|
209
|
+
|
|
210
|
+
self._entries: List[CacheEntry] = []
|
|
211
|
+
self._embeddings: Optional[Any] = None # numpy array
|
|
212
|
+
|
|
213
|
+
@property
|
|
214
|
+
def available(self) -> bool:
|
|
215
|
+
return self._encoder_ready
|
|
216
|
+
|
|
217
|
+
def get(self, prompt: str, context_hash: str) -> Optional[Tuple[CacheEntry, float]]:
|
|
218
|
+
"""
|
|
219
|
+
Find semantically similar cached response.
|
|
220
|
+
Returns (entry, similarity_score) or None.
|
|
221
|
+
"""
|
|
222
|
+
if not self._encoder_ready or not self._entries:
|
|
223
|
+
return None
|
|
224
|
+
|
|
225
|
+
with self._lock:
|
|
226
|
+
try:
|
|
227
|
+
# Encode query
|
|
228
|
+
query_embedding = self._encoder.encode(
|
|
229
|
+
prompt,
|
|
230
|
+
convert_to_numpy=True,
|
|
231
|
+
show_progress_bar=False
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
# Compute cosine similarities
|
|
235
|
+
# Normalize query
|
|
236
|
+
query_norm = query_embedding / np.linalg.norm(query_embedding)
|
|
237
|
+
|
|
238
|
+
# Compute dot products (embeddings are already normalized)
|
|
239
|
+
similarities = np.dot(self._embeddings, query_norm)
|
|
240
|
+
|
|
241
|
+
# Find best match
|
|
242
|
+
best_idx = np.argmax(similarities)
|
|
243
|
+
best_score = float(similarities[best_idx])
|
|
244
|
+
|
|
245
|
+
if best_score >= self.threshold:
|
|
246
|
+
entry = self._entries[best_idx]
|
|
247
|
+
|
|
248
|
+
# Verify context also matches
|
|
249
|
+
if entry.context_hash == context_hash:
|
|
250
|
+
entry.touch()
|
|
251
|
+
return (entry, best_score)
|
|
252
|
+
|
|
253
|
+
except Exception as e:
|
|
254
|
+
logger.warning(f"L2 cache lookup error: {e}")
|
|
255
|
+
|
|
256
|
+
return None
|
|
257
|
+
|
|
258
|
+
def set(self, prompt: str, context_hash: str, response: str,
|
|
259
|
+
model_used: str, tokens_saved: int = 0) -> None:
|
|
260
|
+
"""Store in semantic cache"""
|
|
261
|
+
if not self._encoder_ready:
|
|
262
|
+
return
|
|
263
|
+
|
|
264
|
+
with self._lock:
|
|
265
|
+
try:
|
|
266
|
+
# Evict if at capacity
|
|
267
|
+
while len(self._entries) >= self.max_size:
|
|
268
|
+
self._evict_lru()
|
|
269
|
+
|
|
270
|
+
# Compute and normalize embedding
|
|
271
|
+
embedding = self._encoder.encode(
|
|
272
|
+
prompt,
|
|
273
|
+
convert_to_numpy=True,
|
|
274
|
+
show_progress_bar=False
|
|
275
|
+
)
|
|
276
|
+
embedding = embedding / np.linalg.norm(embedding)
|
|
277
|
+
|
|
278
|
+
entry = CacheEntry(
|
|
279
|
+
prompt=prompt,
|
|
280
|
+
context_hash=context_hash,
|
|
281
|
+
response=response,
|
|
282
|
+
model_used=model_used,
|
|
283
|
+
created_at=time.time(),
|
|
284
|
+
last_accessed=time.time(),
|
|
285
|
+
tokens_saved=tokens_saved,
|
|
286
|
+
embedding=embedding,
|
|
287
|
+
)
|
|
288
|
+
|
|
289
|
+
self._entries.append(entry)
|
|
290
|
+
|
|
291
|
+
# Update embeddings matrix
|
|
292
|
+
if self._embeddings is None:
|
|
293
|
+
self._embeddings = embedding.reshape(1, -1)
|
|
294
|
+
else:
|
|
295
|
+
self._embeddings = np.vstack([self._embeddings, embedding])
|
|
296
|
+
|
|
297
|
+
except Exception as e:
|
|
298
|
+
logger.warning(f"L2 cache set error: {e}")
|
|
299
|
+
|
|
300
|
+
def _evict_lru(self) -> None:
|
|
301
|
+
"""Evict least recently used entry"""
|
|
302
|
+
if not self._entries:
|
|
303
|
+
return
|
|
304
|
+
|
|
305
|
+
# Find LRU entry
|
|
306
|
+
lru_idx = min(
|
|
307
|
+
range(len(self._entries)),
|
|
308
|
+
key=lambda i: self._entries[i].last_accessed
|
|
309
|
+
)
|
|
310
|
+
|
|
311
|
+
# Remove entry and corresponding embedding row
|
|
312
|
+
del self._entries[lru_idx]
|
|
313
|
+
if self._embeddings is not None:
|
|
314
|
+
self._embeddings = np.delete(self._embeddings, lru_idx, axis=0)
|
|
315
|
+
|
|
316
|
+
def clear(self) -> int:
|
|
317
|
+
"""Clear cache"""
|
|
318
|
+
with self._lock:
|
|
319
|
+
count = len(self._entries)
|
|
320
|
+
self._entries.clear()
|
|
321
|
+
self._embeddings = None
|
|
322
|
+
return count
|
|
323
|
+
|
|
324
|
+
def __len__(self) -> int:
|
|
325
|
+
return len(self._entries)
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
class L3TemplateCache:
|
|
329
|
+
"""
|
|
330
|
+
Level 3: Template/Pattern Cache
|
|
331
|
+
|
|
332
|
+
Caches responses for common patterns that can be parameterized.
|
|
333
|
+
Useful for repetitive tasks with slight variations.
|
|
334
|
+
|
|
335
|
+
Examples:
|
|
336
|
+
- "write tests for {function}" → cached test template
|
|
337
|
+
- "explain {concept}" → cached explanation pattern
|
|
338
|
+
- "add logging to {file}" → cached modification pattern
|
|
339
|
+
|
|
340
|
+
Lookup time: ~50ms
|
|
341
|
+
"""
|
|
342
|
+
|
|
343
|
+
# Common templates to detect
|
|
344
|
+
TEMPLATES = [
|
|
345
|
+
# Test writing
|
|
346
|
+
(r"(write|create|add)\s+(unit\s+)?tests?\s+for\s+(.+)", "test_function"),
|
|
347
|
+
(r"test\s+(.+)\s+(function|class|module)", "test_function"),
|
|
348
|
+
|
|
349
|
+
# Explanations
|
|
350
|
+
(r"(explain|describe|what\s+is)\s+(.+)", "explanation"),
|
|
351
|
+
(r"how\s+does\s+(.+)\s+work", "explanation"),
|
|
352
|
+
|
|
353
|
+
# Documentation
|
|
354
|
+
(r"(document|add\s+docs?\s+to|docstring\s+for)\s+(.+)", "documentation"),
|
|
355
|
+
|
|
356
|
+
# Refactoring
|
|
357
|
+
(r"refactor\s+(.+)", "refactoring"),
|
|
358
|
+
(r"(clean\s+up|improve)\s+(.+)", "refactoring"),
|
|
359
|
+
|
|
360
|
+
# Type hints
|
|
361
|
+
(r"add\s+type\s+hints?\s+to\s+(.+)", "type_hints"),
|
|
362
|
+
|
|
363
|
+
# Logging
|
|
364
|
+
(r"add\s+logging\s+to\s+(.+)", "logging"),
|
|
365
|
+
|
|
366
|
+
# Error handling
|
|
367
|
+
(r"add\s+error\s+handling\s+to\s+(.+)", "error_handling"),
|
|
368
|
+
]
|
|
369
|
+
|
|
370
|
+
def __init__(self, max_size: int = 200):
|
|
371
|
+
self.max_size = max_size
|
|
372
|
+
self._cache: Dict[str, List[CacheEntry]] = {} # template_type -> entries
|
|
373
|
+
self._lock = threading.RLock()
|
|
374
|
+
|
|
375
|
+
# Compile regex patterns
|
|
376
|
+
import re
|
|
377
|
+
self._patterns = [
|
|
378
|
+
(re.compile(pattern, re.IGNORECASE), template_type)
|
|
379
|
+
for pattern, template_type in self.TEMPLATES
|
|
380
|
+
]
|
|
381
|
+
|
|
382
|
+
def _detect_template(self, prompt: str) -> Optional[Tuple[str, str]]:
|
|
383
|
+
"""
|
|
384
|
+
Detect if prompt matches a template pattern.
|
|
385
|
+
Returns (template_type, extracted_entity) or None.
|
|
386
|
+
"""
|
|
387
|
+
for pattern, template_type in self._patterns:
|
|
388
|
+
match = pattern.search(prompt)
|
|
389
|
+
if match:
|
|
390
|
+
# Extract the variable part (last group)
|
|
391
|
+
entity = match.groups()[-1] if match.groups() else ""
|
|
392
|
+
return (template_type, entity.strip())
|
|
393
|
+
return None
|
|
394
|
+
|
|
395
|
+
def get(self, prompt: str, context_hash: str) -> Optional[Tuple[CacheEntry, float]]:
|
|
396
|
+
"""
|
|
397
|
+
Find template-matched cached response.
|
|
398
|
+
Returns (entry, confidence) or None.
|
|
399
|
+
"""
|
|
400
|
+
template_match = self._detect_template(prompt)
|
|
401
|
+
if not template_match:
|
|
402
|
+
return None
|
|
403
|
+
|
|
404
|
+
template_type, entity = template_match
|
|
405
|
+
|
|
406
|
+
with self._lock:
|
|
407
|
+
if template_type not in self._cache:
|
|
408
|
+
return None
|
|
409
|
+
|
|
410
|
+
entries = self._cache[template_type]
|
|
411
|
+
|
|
412
|
+
# Find best match by context similarity
|
|
413
|
+
for entry in entries:
|
|
414
|
+
if entry.context_hash == context_hash:
|
|
415
|
+
entry.touch()
|
|
416
|
+
return (entry, 0.85) # Template match confidence
|
|
417
|
+
|
|
418
|
+
# If no exact context match, return most recent with lower confidence
|
|
419
|
+
if entries:
|
|
420
|
+
entry = max(entries, key=lambda e: e.last_accessed)
|
|
421
|
+
entry.touch()
|
|
422
|
+
return (entry, 0.70)
|
|
423
|
+
|
|
424
|
+
return None
|
|
425
|
+
|
|
426
|
+
def set(self, prompt: str, context_hash: str, response: str,
|
|
427
|
+
model_used: str, tokens_saved: int = 0) -> None:
|
|
428
|
+
"""Store in template cache if prompt matches a template"""
|
|
429
|
+
template_match = self._detect_template(prompt)
|
|
430
|
+
if not template_match:
|
|
431
|
+
return
|
|
432
|
+
|
|
433
|
+
template_type, _ = template_match
|
|
434
|
+
|
|
435
|
+
with self._lock:
|
|
436
|
+
if template_type not in self._cache:
|
|
437
|
+
self._cache[template_type] = []
|
|
438
|
+
|
|
439
|
+
entries = self._cache[template_type]
|
|
440
|
+
|
|
441
|
+
# Limit entries per template
|
|
442
|
+
max_per_template = self.max_size // len(self.TEMPLATES)
|
|
443
|
+
while len(entries) >= max_per_template:
|
|
444
|
+
# Remove oldest
|
|
445
|
+
entries.sort(key=lambda e: e.last_accessed)
|
|
446
|
+
entries.pop(0)
|
|
447
|
+
|
|
448
|
+
entries.append(CacheEntry(
|
|
449
|
+
prompt=prompt,
|
|
450
|
+
context_hash=context_hash,
|
|
451
|
+
response=response,
|
|
452
|
+
model_used=model_used,
|
|
453
|
+
created_at=time.time(),
|
|
454
|
+
last_accessed=time.time(),
|
|
455
|
+
tokens_saved=tokens_saved,
|
|
456
|
+
))
|
|
457
|
+
|
|
458
|
+
def clear(self) -> int:
|
|
459
|
+
"""Clear cache"""
|
|
460
|
+
with self._lock:
|
|
461
|
+
count = sum(len(entries) for entries in self._cache.values())
|
|
462
|
+
self._cache.clear()
|
|
463
|
+
return count
|
|
464
|
+
|
|
465
|
+
def __len__(self) -> int:
|
|
466
|
+
return sum(len(entries) for entries in self._cache.values())
|
|
467
|
+
|
|
468
|
+
|
|
469
|
+
class LayeredCache:
|
|
470
|
+
"""
|
|
471
|
+
Unified multi-level cache manager.
|
|
472
|
+
|
|
473
|
+
Queries flow through L1 → L2 → L3 until a hit is found.
|
|
474
|
+
Responses are stored in all applicable cache levels.
|
|
475
|
+
|
|
476
|
+
Usage:
|
|
477
|
+
cache = LayeredCache()
|
|
478
|
+
|
|
479
|
+
# Lookup
|
|
480
|
+
result = cache.get(prompt, context_hash)
|
|
481
|
+
if result.hit:
|
|
482
|
+
return result.response # Cache hit!
|
|
483
|
+
|
|
484
|
+
# After generating response
|
|
485
|
+
cache.set(prompt, context_hash, response, model_used)
|
|
486
|
+
"""
|
|
487
|
+
|
|
488
|
+
def __init__(
|
|
489
|
+
self,
|
|
490
|
+
l1_size: int = 1000,
|
|
491
|
+
l2_size: int = 500,
|
|
492
|
+
l2_threshold: float = 0.92,
|
|
493
|
+
l3_size: int = 200,
|
|
494
|
+
persist_path: Optional[Path] = None
|
|
495
|
+
):
|
|
496
|
+
self.l1 = L1ExactCache(max_size=l1_size)
|
|
497
|
+
self.l2 = L2SemanticCache(
|
|
498
|
+
similarity_threshold=l2_threshold,
|
|
499
|
+
max_size=l2_size
|
|
500
|
+
)
|
|
501
|
+
self.l3 = L3TemplateCache(max_size=l3_size)
|
|
502
|
+
|
|
503
|
+
self.stats = CacheStats()
|
|
504
|
+
self.persist_path = persist_path
|
|
505
|
+
|
|
506
|
+
# Average response time for savings calculation (ms)
|
|
507
|
+
self._avg_response_time = 5000 # 5 seconds default
|
|
508
|
+
|
|
509
|
+
def get(self, prompt: str, context_hash: str = "") -> CacheResult:
|
|
510
|
+
"""
|
|
511
|
+
Look up in all cache levels.
|
|
512
|
+
Returns CacheResult with hit status and response.
|
|
513
|
+
"""
|
|
514
|
+
start = time.time()
|
|
515
|
+
|
|
516
|
+
# L1: Exact match (fastest)
|
|
517
|
+
entry = self.l1.get(prompt, context_hash)
|
|
518
|
+
if entry:
|
|
519
|
+
elapsed = (time.time() - start) * 1000
|
|
520
|
+
self.stats.l1_hits += 1
|
|
521
|
+
self.stats.total_time_saved_ms += self._avg_response_time
|
|
522
|
+
self.stats.total_tokens_saved += entry.tokens_saved
|
|
523
|
+
|
|
524
|
+
logger.debug(f"L1 cache hit in {elapsed:.2f}ms")
|
|
525
|
+
return CacheResult(
|
|
526
|
+
hit=True,
|
|
527
|
+
response=entry.response,
|
|
528
|
+
level="L1",
|
|
529
|
+
similarity=1.0,
|
|
530
|
+
time_ms=elapsed
|
|
531
|
+
)
|
|
532
|
+
|
|
533
|
+
# L2: Semantic match
|
|
534
|
+
if self.l2.available:
|
|
535
|
+
result = self.l2.get(prompt, context_hash)
|
|
536
|
+
if result:
|
|
537
|
+
entry, similarity = result
|
|
538
|
+
elapsed = (time.time() - start) * 1000
|
|
539
|
+
self.stats.l2_hits += 1
|
|
540
|
+
self.stats.total_time_saved_ms += self._avg_response_time
|
|
541
|
+
self.stats.total_tokens_saved += entry.tokens_saved
|
|
542
|
+
|
|
543
|
+
logger.debug(f"L2 cache hit (sim={similarity:.3f}) in {elapsed:.2f}ms")
|
|
544
|
+
return CacheResult(
|
|
545
|
+
hit=True,
|
|
546
|
+
response=entry.response,
|
|
547
|
+
level="L2",
|
|
548
|
+
similarity=similarity,
|
|
549
|
+
time_ms=elapsed
|
|
550
|
+
)
|
|
551
|
+
|
|
552
|
+
# L3: Template match
|
|
553
|
+
result = self.l3.get(prompt, context_hash)
|
|
554
|
+
if result:
|
|
555
|
+
entry, confidence = result
|
|
556
|
+
elapsed = (time.time() - start) * 1000
|
|
557
|
+
self.stats.l3_hits += 1
|
|
558
|
+
self.stats.total_time_saved_ms += self._avg_response_time * 0.5 # Partial savings
|
|
559
|
+
self.stats.total_tokens_saved += entry.tokens_saved
|
|
560
|
+
|
|
561
|
+
logger.debug(f"L3 cache hit (conf={confidence:.3f}) in {elapsed:.2f}ms")
|
|
562
|
+
return CacheResult(
|
|
563
|
+
hit=True,
|
|
564
|
+
response=entry.response,
|
|
565
|
+
level="L3",
|
|
566
|
+
similarity=confidence,
|
|
567
|
+
time_ms=elapsed
|
|
568
|
+
)
|
|
569
|
+
|
|
570
|
+
# Cache miss
|
|
571
|
+
elapsed = (time.time() - start) * 1000
|
|
572
|
+
self.stats.misses += 1
|
|
573
|
+
|
|
574
|
+
return CacheResult(
|
|
575
|
+
hit=False,
|
|
576
|
+
response=None,
|
|
577
|
+
level=None,
|
|
578
|
+
similarity=None,
|
|
579
|
+
time_ms=elapsed
|
|
580
|
+
)
|
|
581
|
+
|
|
582
|
+
def set(
|
|
583
|
+
self,
|
|
584
|
+
prompt: str,
|
|
585
|
+
context_hash: str,
|
|
586
|
+
response: str,
|
|
587
|
+
model_used: str,
|
|
588
|
+
tokens_saved: int = 0
|
|
589
|
+
) -> None:
|
|
590
|
+
"""Store response in all applicable cache levels"""
|
|
591
|
+
# Always store in L1
|
|
592
|
+
self.l1.set(prompt, context_hash, response, model_used, tokens_saved)
|
|
593
|
+
|
|
594
|
+
# Store in L2 if available
|
|
595
|
+
if self.l2.available:
|
|
596
|
+
self.l2.set(prompt, context_hash, response, model_used, tokens_saved)
|
|
597
|
+
|
|
598
|
+
# Store in L3 if matches a template
|
|
599
|
+
self.l3.set(prompt, context_hash, response, model_used, tokens_saved)
|
|
600
|
+
|
|
601
|
+
def get_stats(self) -> Dict[str, Any]:
|
|
602
|
+
"""Get cache statistics"""
|
|
603
|
+
stats = self.stats.to_dict()
|
|
604
|
+
stats["sizes"] = {
|
|
605
|
+
"l1": len(self.l1),
|
|
606
|
+
"l2": len(self.l2),
|
|
607
|
+
"l3": len(self.l3),
|
|
608
|
+
}
|
|
609
|
+
stats["l2_available"] = self.l2.available
|
|
610
|
+
return stats
|
|
611
|
+
|
|
612
|
+
def clear(self) -> Dict[str, int]:
|
|
613
|
+
"""Clear all caches"""
|
|
614
|
+
return {
|
|
615
|
+
"l1_cleared": self.l1.clear(),
|
|
616
|
+
"l2_cleared": self.l2.clear(),
|
|
617
|
+
"l3_cleared": self.l3.clear(),
|
|
618
|
+
}
|
|
619
|
+
|
|
620
|
+
def save(self) -> bool:
|
|
621
|
+
"""Persist L1 cache to disk"""
|
|
622
|
+
if not self.persist_path:
|
|
623
|
+
return False
|
|
624
|
+
|
|
625
|
+
try:
|
|
626
|
+
self.persist_path.parent.mkdir(parents=True, exist_ok=True)
|
|
627
|
+
|
|
628
|
+
# Serialize L1 cache (L2/L3 are rebuilt from L1)
|
|
629
|
+
data = {
|
|
630
|
+
"entries": [
|
|
631
|
+
{
|
|
632
|
+
"prompt": entry.prompt,
|
|
633
|
+
"context_hash": entry.context_hash,
|
|
634
|
+
"response": entry.response,
|
|
635
|
+
"model_used": entry.model_used,
|
|
636
|
+
"created_at": entry.created_at,
|
|
637
|
+
"access_count": entry.access_count,
|
|
638
|
+
"tokens_saved": entry.tokens_saved,
|
|
639
|
+
}
|
|
640
|
+
for entry in self.l1._cache.values()
|
|
641
|
+
],
|
|
642
|
+
"stats": self.stats.to_dict(),
|
|
643
|
+
}
|
|
644
|
+
|
|
645
|
+
with open(self.persist_path, 'w') as f:
|
|
646
|
+
json.dump(data, f)
|
|
647
|
+
|
|
648
|
+
logger.info(f"Cache saved to {self.persist_path}")
|
|
649
|
+
return True
|
|
650
|
+
|
|
651
|
+
except Exception as e:
|
|
652
|
+
logger.error(f"Failed to save cache: {e}")
|
|
653
|
+
return False
|
|
654
|
+
|
|
655
|
+
def load(self) -> bool:
|
|
656
|
+
"""Load cache from disk"""
|
|
657
|
+
if not self.persist_path or not self.persist_path.exists():
|
|
658
|
+
return False
|
|
659
|
+
|
|
660
|
+
try:
|
|
661
|
+
with open(self.persist_path) as f:
|
|
662
|
+
data = json.load(f)
|
|
663
|
+
|
|
664
|
+
for entry_data in data.get("entries", []):
|
|
665
|
+
self.l1.set(
|
|
666
|
+
entry_data["prompt"],
|
|
667
|
+
entry_data["context_hash"],
|
|
668
|
+
entry_data["response"],
|
|
669
|
+
entry_data["model_used"],
|
|
670
|
+
entry_data.get("tokens_saved", 0)
|
|
671
|
+
)
|
|
672
|
+
|
|
673
|
+
logger.info(f"Loaded {len(self.l1)} entries from cache")
|
|
674
|
+
return True
|
|
675
|
+
|
|
676
|
+
except Exception as e:
|
|
677
|
+
logger.error(f"Failed to load cache: {e}")
|
|
678
|
+
return False
|
|
679
|
+
|
|
680
|
+
|
|
681
|
+
# Convenience functions
|
|
682
|
+
def make_context_hash(context: Dict[str, Any]) -> str:
|
|
683
|
+
"""Create hash from context dictionary"""
|
|
684
|
+
# Sort keys for consistent hashing
|
|
685
|
+
serialized = json.dumps(context, sort_keys=True, default=str)
|
|
686
|
+
return hashlib.sha256(serialized.encode()).hexdigest()[:16]
|
|
687
|
+
|
|
688
|
+
|
|
689
|
+
# Singleton instance
|
|
690
|
+
_cache: Optional[LayeredCache] = None
|
|
691
|
+
|
|
692
|
+
|
|
693
|
+
def get_cache(
|
|
694
|
+
persist_path: Optional[Path] = None,
|
|
695
|
+
**kwargs
|
|
696
|
+
) -> LayeredCache:
|
|
697
|
+
"""Get or create the global cache instance"""
|
|
698
|
+
global _cache
|
|
699
|
+
if _cache is None:
|
|
700
|
+
_cache = LayeredCache(
|
|
701
|
+
persist_path=persist_path or Path.home() / ".nc1709" / "cache.json",
|
|
702
|
+
**kwargs
|
|
703
|
+
)
|
|
704
|
+
_cache.load() # Try to load persisted cache
|
|
705
|
+
return _cache
|