nc1709 1.15.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. nc1709/__init__.py +13 -0
  2. nc1709/agent/__init__.py +36 -0
  3. nc1709/agent/core.py +505 -0
  4. nc1709/agent/mcp_bridge.py +245 -0
  5. nc1709/agent/permissions.py +298 -0
  6. nc1709/agent/tools/__init__.py +21 -0
  7. nc1709/agent/tools/base.py +440 -0
  8. nc1709/agent/tools/bash_tool.py +367 -0
  9. nc1709/agent/tools/file_tools.py +454 -0
  10. nc1709/agent/tools/notebook_tools.py +516 -0
  11. nc1709/agent/tools/search_tools.py +322 -0
  12. nc1709/agent/tools/task_tool.py +284 -0
  13. nc1709/agent/tools/web_tools.py +555 -0
  14. nc1709/agents/__init__.py +17 -0
  15. nc1709/agents/auto_fix.py +506 -0
  16. nc1709/agents/test_generator.py +507 -0
  17. nc1709/checkpoints.py +372 -0
  18. nc1709/cli.py +3380 -0
  19. nc1709/cli_ui.py +1080 -0
  20. nc1709/cognitive/__init__.py +149 -0
  21. nc1709/cognitive/anticipation.py +594 -0
  22. nc1709/cognitive/context_engine.py +1046 -0
  23. nc1709/cognitive/council.py +824 -0
  24. nc1709/cognitive/learning.py +761 -0
  25. nc1709/cognitive/router.py +583 -0
  26. nc1709/cognitive/system.py +519 -0
  27. nc1709/config.py +155 -0
  28. nc1709/custom_commands.py +300 -0
  29. nc1709/executor.py +333 -0
  30. nc1709/file_controller.py +354 -0
  31. nc1709/git_integration.py +308 -0
  32. nc1709/github_integration.py +477 -0
  33. nc1709/image_input.py +446 -0
  34. nc1709/linting.py +519 -0
  35. nc1709/llm_adapter.py +667 -0
  36. nc1709/logger.py +192 -0
  37. nc1709/mcp/__init__.py +18 -0
  38. nc1709/mcp/client.py +370 -0
  39. nc1709/mcp/manager.py +407 -0
  40. nc1709/mcp/protocol.py +210 -0
  41. nc1709/mcp/server.py +473 -0
  42. nc1709/memory/__init__.py +20 -0
  43. nc1709/memory/embeddings.py +325 -0
  44. nc1709/memory/indexer.py +474 -0
  45. nc1709/memory/sessions.py +432 -0
  46. nc1709/memory/vector_store.py +451 -0
  47. nc1709/models/__init__.py +86 -0
  48. nc1709/models/detector.py +377 -0
  49. nc1709/models/formats.py +315 -0
  50. nc1709/models/manager.py +438 -0
  51. nc1709/models/registry.py +497 -0
  52. nc1709/performance/__init__.py +343 -0
  53. nc1709/performance/cache.py +705 -0
  54. nc1709/performance/pipeline.py +611 -0
  55. nc1709/performance/tiering.py +543 -0
  56. nc1709/plan_mode.py +362 -0
  57. nc1709/plugins/__init__.py +17 -0
  58. nc1709/plugins/agents/__init__.py +18 -0
  59. nc1709/plugins/agents/django_agent.py +912 -0
  60. nc1709/plugins/agents/docker_agent.py +623 -0
  61. nc1709/plugins/agents/fastapi_agent.py +887 -0
  62. nc1709/plugins/agents/git_agent.py +731 -0
  63. nc1709/plugins/agents/nextjs_agent.py +867 -0
  64. nc1709/plugins/base.py +359 -0
  65. nc1709/plugins/manager.py +411 -0
  66. nc1709/plugins/registry.py +337 -0
  67. nc1709/progress.py +443 -0
  68. nc1709/prompts/__init__.py +22 -0
  69. nc1709/prompts/agent_system.py +180 -0
  70. nc1709/prompts/task_prompts.py +340 -0
  71. nc1709/prompts/unified_prompt.py +133 -0
  72. nc1709/reasoning_engine.py +541 -0
  73. nc1709/remote_client.py +266 -0
  74. nc1709/shell_completions.py +349 -0
  75. nc1709/slash_commands.py +649 -0
  76. nc1709/task_classifier.py +408 -0
  77. nc1709/version_check.py +177 -0
  78. nc1709/web/__init__.py +8 -0
  79. nc1709/web/server.py +950 -0
  80. nc1709/web/templates/index.html +1127 -0
  81. nc1709-1.15.4.dist-info/METADATA +858 -0
  82. nc1709-1.15.4.dist-info/RECORD +86 -0
  83. nc1709-1.15.4.dist-info/WHEEL +5 -0
  84. nc1709-1.15.4.dist-info/entry_points.txt +2 -0
  85. nc1709-1.15.4.dist-info/licenses/LICENSE +9 -0
  86. nc1709-1.15.4.dist-info/top_level.txt +1 -0
@@ -0,0 +1,705 @@
1
+ """
2
+ NC1709 Performance - Multi-Level Intelligent Caching
3
+
4
+ Implements a 3-level cache system inspired by CPU cache architecture:
5
+ - L1: Exact match cache (<1ms lookup)
6
+ - L2: Semantic similarity cache (~10ms lookup)
7
+ - L3: Template/pattern cache (~50ms lookup)
8
+
9
+ Target: 30-40% cache hit rate for significant latency reduction.
10
+ """
11
+
12
+ import hashlib
13
+ import time
14
+ import json
15
+ import logging
16
+ from pathlib import Path
17
+ from typing import Optional, Dict, Any, List, Tuple, NamedTuple
18
+ from dataclasses import dataclass, field
19
+ from collections import OrderedDict
20
+ import threading
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+ # Optional: sentence-transformers for semantic cache
25
+ try:
26
+ import numpy as np
27
+ NUMPY_AVAILABLE = True
28
+ except ImportError:
29
+ NUMPY_AVAILABLE = False
30
+ np = None
31
+
32
+ try:
33
+ from sentence_transformers import SentenceTransformer
34
+ SEMANTIC_AVAILABLE = True
35
+ except ImportError:
36
+ SEMANTIC_AVAILABLE = False
37
+ SentenceTransformer = None
38
+
39
+
40
+ @dataclass
41
+ class CacheEntry:
42
+ """A cached response entry"""
43
+ prompt: str
44
+ context_hash: str
45
+ response: str
46
+ model_used: str
47
+ created_at: float
48
+ access_count: int = 0
49
+ last_accessed: float = 0
50
+ tokens_saved: int = 0
51
+ embedding: Optional[Any] = None # numpy array if available
52
+
53
+ def touch(self):
54
+ """Update access statistics"""
55
+ self.access_count += 1
56
+ self.last_accessed = time.time()
57
+
58
+
59
+ @dataclass
60
+ class CacheStats:
61
+ """Cache performance statistics"""
62
+ l1_hits: int = 0
63
+ l2_hits: int = 0
64
+ l3_hits: int = 0
65
+ misses: int = 0
66
+ total_time_saved_ms: float = 0
67
+ total_tokens_saved: int = 0
68
+
69
+ @property
70
+ def total_queries(self) -> int:
71
+ return self.l1_hits + self.l2_hits + self.l3_hits + self.misses
72
+
73
+ @property
74
+ def hit_rate(self) -> float:
75
+ if self.total_queries == 0:
76
+ return 0.0
77
+ return (self.l1_hits + self.l2_hits + self.l3_hits) / self.total_queries
78
+
79
+ @property
80
+ def l1_rate(self) -> float:
81
+ if self.total_queries == 0:
82
+ return 0.0
83
+ return self.l1_hits / self.total_queries
84
+
85
+ def to_dict(self) -> Dict[str, Any]:
86
+ return {
87
+ "l1_hits": self.l1_hits,
88
+ "l2_hits": self.l2_hits,
89
+ "l3_hits": self.l3_hits,
90
+ "misses": self.misses,
91
+ "total_queries": self.total_queries,
92
+ "hit_rate": round(self.hit_rate * 100, 2),
93
+ "total_time_saved_ms": round(self.total_time_saved_ms, 2),
94
+ "total_tokens_saved": self.total_tokens_saved,
95
+ }
96
+
97
+
98
+ class CacheResult(NamedTuple):
99
+ """Result from cache lookup"""
100
+ hit: bool
101
+ response: Optional[str]
102
+ level: Optional[str] # "L1", "L2", "L3", or None
103
+ similarity: Optional[float] # For L2/L3 hits
104
+ time_ms: float
105
+
106
+
107
+ class L1ExactCache:
108
+ """
109
+ Level 1: Exact Match Cache
110
+
111
+ Fastest lookup - requires identical prompt + context hash.
112
+ Uses LRU eviction policy.
113
+
114
+ Lookup time: <1ms
115
+ """
116
+
117
+ def __init__(self, max_size: int = 1000):
118
+ self.max_size = max_size
119
+ self._cache: OrderedDict[str, CacheEntry] = OrderedDict()
120
+ self._lock = threading.RLock()
121
+
122
+ def _make_key(self, prompt: str, context_hash: str) -> str:
123
+ """Create cache key from prompt and context"""
124
+ combined = f"{prompt.strip().lower()}:{context_hash}"
125
+ return hashlib.sha256(combined.encode()).hexdigest()[:32]
126
+
127
+ def get(self, prompt: str, context_hash: str) -> Optional[CacheEntry]:
128
+ """Look up exact match"""
129
+ key = self._make_key(prompt, context_hash)
130
+
131
+ with self._lock:
132
+ if key in self._cache:
133
+ entry = self._cache[key]
134
+ entry.touch()
135
+ # Move to end (most recently used)
136
+ self._cache.move_to_end(key)
137
+ return entry
138
+
139
+ return None
140
+
141
+ def set(self, prompt: str, context_hash: str, response: str,
142
+ model_used: str, tokens_saved: int = 0) -> None:
143
+ """Store in cache"""
144
+ key = self._make_key(prompt, context_hash)
145
+
146
+ with self._lock:
147
+ # Evict if at capacity
148
+ while len(self._cache) >= self.max_size:
149
+ self._cache.popitem(last=False) # Remove oldest
150
+
151
+ self._cache[key] = CacheEntry(
152
+ prompt=prompt,
153
+ context_hash=context_hash,
154
+ response=response,
155
+ model_used=model_used,
156
+ created_at=time.time(),
157
+ last_accessed=time.time(),
158
+ tokens_saved=tokens_saved,
159
+ )
160
+
161
+ def clear(self) -> int:
162
+ """Clear cache, return number of entries cleared"""
163
+ with self._lock:
164
+ count = len(self._cache)
165
+ self._cache.clear()
166
+ return count
167
+
168
+ def __len__(self) -> int:
169
+ return len(self._cache)
170
+
171
+
172
+ class L2SemanticCache:
173
+ """
174
+ Level 2: Semantic Similarity Cache
175
+
176
+ Finds cached responses for semantically similar prompts.
177
+ Uses sentence embeddings and cosine similarity.
178
+
179
+ Example matches:
180
+ - "write a function to reverse a string" ≈ "create a string reversal function"
181
+ - "fix the bug in login" ≈ "debug the authentication issue"
182
+
183
+ Lookup time: ~10ms
184
+ """
185
+
186
+ def __init__(
187
+ self,
188
+ similarity_threshold: float = 0.92,
189
+ max_size: int = 500,
190
+ model_name: str = 'all-MiniLM-L6-v2'
191
+ ):
192
+ self.threshold = similarity_threshold
193
+ self.max_size = max_size
194
+ self._lock = threading.RLock()
195
+
196
+ # Initialize encoder if available
197
+ self._encoder = None
198
+ self._encoder_ready = False
199
+
200
+ if SEMANTIC_AVAILABLE and NUMPY_AVAILABLE:
201
+ try:
202
+ self._encoder = SentenceTransformer(model_name)
203
+ self._encoder_ready = True
204
+ logger.info(f"L2 semantic cache initialized with {model_name}")
205
+ except Exception as e:
206
+ logger.warning(f"Failed to load sentence transformer: {e}")
207
+ else:
208
+ logger.info("L2 semantic cache disabled (sentence-transformers not installed)")
209
+
210
+ self._entries: List[CacheEntry] = []
211
+ self._embeddings: Optional[Any] = None # numpy array
212
+
213
+ @property
214
+ def available(self) -> bool:
215
+ return self._encoder_ready
216
+
217
+ def get(self, prompt: str, context_hash: str) -> Optional[Tuple[CacheEntry, float]]:
218
+ """
219
+ Find semantically similar cached response.
220
+ Returns (entry, similarity_score) or None.
221
+ """
222
+ if not self._encoder_ready or not self._entries:
223
+ return None
224
+
225
+ with self._lock:
226
+ try:
227
+ # Encode query
228
+ query_embedding = self._encoder.encode(
229
+ prompt,
230
+ convert_to_numpy=True,
231
+ show_progress_bar=False
232
+ )
233
+
234
+ # Compute cosine similarities
235
+ # Normalize query
236
+ query_norm = query_embedding / np.linalg.norm(query_embedding)
237
+
238
+ # Compute dot products (embeddings are already normalized)
239
+ similarities = np.dot(self._embeddings, query_norm)
240
+
241
+ # Find best match
242
+ best_idx = np.argmax(similarities)
243
+ best_score = float(similarities[best_idx])
244
+
245
+ if best_score >= self.threshold:
246
+ entry = self._entries[best_idx]
247
+
248
+ # Verify context also matches
249
+ if entry.context_hash == context_hash:
250
+ entry.touch()
251
+ return (entry, best_score)
252
+
253
+ except Exception as e:
254
+ logger.warning(f"L2 cache lookup error: {e}")
255
+
256
+ return None
257
+
258
+ def set(self, prompt: str, context_hash: str, response: str,
259
+ model_used: str, tokens_saved: int = 0) -> None:
260
+ """Store in semantic cache"""
261
+ if not self._encoder_ready:
262
+ return
263
+
264
+ with self._lock:
265
+ try:
266
+ # Evict if at capacity
267
+ while len(self._entries) >= self.max_size:
268
+ self._evict_lru()
269
+
270
+ # Compute and normalize embedding
271
+ embedding = self._encoder.encode(
272
+ prompt,
273
+ convert_to_numpy=True,
274
+ show_progress_bar=False
275
+ )
276
+ embedding = embedding / np.linalg.norm(embedding)
277
+
278
+ entry = CacheEntry(
279
+ prompt=prompt,
280
+ context_hash=context_hash,
281
+ response=response,
282
+ model_used=model_used,
283
+ created_at=time.time(),
284
+ last_accessed=time.time(),
285
+ tokens_saved=tokens_saved,
286
+ embedding=embedding,
287
+ )
288
+
289
+ self._entries.append(entry)
290
+
291
+ # Update embeddings matrix
292
+ if self._embeddings is None:
293
+ self._embeddings = embedding.reshape(1, -1)
294
+ else:
295
+ self._embeddings = np.vstack([self._embeddings, embedding])
296
+
297
+ except Exception as e:
298
+ logger.warning(f"L2 cache set error: {e}")
299
+
300
+ def _evict_lru(self) -> None:
301
+ """Evict least recently used entry"""
302
+ if not self._entries:
303
+ return
304
+
305
+ # Find LRU entry
306
+ lru_idx = min(
307
+ range(len(self._entries)),
308
+ key=lambda i: self._entries[i].last_accessed
309
+ )
310
+
311
+ # Remove entry and corresponding embedding row
312
+ del self._entries[lru_idx]
313
+ if self._embeddings is not None:
314
+ self._embeddings = np.delete(self._embeddings, lru_idx, axis=0)
315
+
316
+ def clear(self) -> int:
317
+ """Clear cache"""
318
+ with self._lock:
319
+ count = len(self._entries)
320
+ self._entries.clear()
321
+ self._embeddings = None
322
+ return count
323
+
324
+ def __len__(self) -> int:
325
+ return len(self._entries)
326
+
327
+
328
+ class L3TemplateCache:
329
+ """
330
+ Level 3: Template/Pattern Cache
331
+
332
+ Caches responses for common patterns that can be parameterized.
333
+ Useful for repetitive tasks with slight variations.
334
+
335
+ Examples:
336
+ - "write tests for {function}" → cached test template
337
+ - "explain {concept}" → cached explanation pattern
338
+ - "add logging to {file}" → cached modification pattern
339
+
340
+ Lookup time: ~50ms
341
+ """
342
+
343
+ # Common templates to detect
344
+ TEMPLATES = [
345
+ # Test writing
346
+ (r"(write|create|add)\s+(unit\s+)?tests?\s+for\s+(.+)", "test_function"),
347
+ (r"test\s+(.+)\s+(function|class|module)", "test_function"),
348
+
349
+ # Explanations
350
+ (r"(explain|describe|what\s+is)\s+(.+)", "explanation"),
351
+ (r"how\s+does\s+(.+)\s+work", "explanation"),
352
+
353
+ # Documentation
354
+ (r"(document|add\s+docs?\s+to|docstring\s+for)\s+(.+)", "documentation"),
355
+
356
+ # Refactoring
357
+ (r"refactor\s+(.+)", "refactoring"),
358
+ (r"(clean\s+up|improve)\s+(.+)", "refactoring"),
359
+
360
+ # Type hints
361
+ (r"add\s+type\s+hints?\s+to\s+(.+)", "type_hints"),
362
+
363
+ # Logging
364
+ (r"add\s+logging\s+to\s+(.+)", "logging"),
365
+
366
+ # Error handling
367
+ (r"add\s+error\s+handling\s+to\s+(.+)", "error_handling"),
368
+ ]
369
+
370
+ def __init__(self, max_size: int = 200):
371
+ self.max_size = max_size
372
+ self._cache: Dict[str, List[CacheEntry]] = {} # template_type -> entries
373
+ self._lock = threading.RLock()
374
+
375
+ # Compile regex patterns
376
+ import re
377
+ self._patterns = [
378
+ (re.compile(pattern, re.IGNORECASE), template_type)
379
+ for pattern, template_type in self.TEMPLATES
380
+ ]
381
+
382
+ def _detect_template(self, prompt: str) -> Optional[Tuple[str, str]]:
383
+ """
384
+ Detect if prompt matches a template pattern.
385
+ Returns (template_type, extracted_entity) or None.
386
+ """
387
+ for pattern, template_type in self._patterns:
388
+ match = pattern.search(prompt)
389
+ if match:
390
+ # Extract the variable part (last group)
391
+ entity = match.groups()[-1] if match.groups() else ""
392
+ return (template_type, entity.strip())
393
+ return None
394
+
395
+ def get(self, prompt: str, context_hash: str) -> Optional[Tuple[CacheEntry, float]]:
396
+ """
397
+ Find template-matched cached response.
398
+ Returns (entry, confidence) or None.
399
+ """
400
+ template_match = self._detect_template(prompt)
401
+ if not template_match:
402
+ return None
403
+
404
+ template_type, entity = template_match
405
+
406
+ with self._lock:
407
+ if template_type not in self._cache:
408
+ return None
409
+
410
+ entries = self._cache[template_type]
411
+
412
+ # Find best match by context similarity
413
+ for entry in entries:
414
+ if entry.context_hash == context_hash:
415
+ entry.touch()
416
+ return (entry, 0.85) # Template match confidence
417
+
418
+ # If no exact context match, return most recent with lower confidence
419
+ if entries:
420
+ entry = max(entries, key=lambda e: e.last_accessed)
421
+ entry.touch()
422
+ return (entry, 0.70)
423
+
424
+ return None
425
+
426
+ def set(self, prompt: str, context_hash: str, response: str,
427
+ model_used: str, tokens_saved: int = 0) -> None:
428
+ """Store in template cache if prompt matches a template"""
429
+ template_match = self._detect_template(prompt)
430
+ if not template_match:
431
+ return
432
+
433
+ template_type, _ = template_match
434
+
435
+ with self._lock:
436
+ if template_type not in self._cache:
437
+ self._cache[template_type] = []
438
+
439
+ entries = self._cache[template_type]
440
+
441
+ # Limit entries per template
442
+ max_per_template = self.max_size // len(self.TEMPLATES)
443
+ while len(entries) >= max_per_template:
444
+ # Remove oldest
445
+ entries.sort(key=lambda e: e.last_accessed)
446
+ entries.pop(0)
447
+
448
+ entries.append(CacheEntry(
449
+ prompt=prompt,
450
+ context_hash=context_hash,
451
+ response=response,
452
+ model_used=model_used,
453
+ created_at=time.time(),
454
+ last_accessed=time.time(),
455
+ tokens_saved=tokens_saved,
456
+ ))
457
+
458
+ def clear(self) -> int:
459
+ """Clear cache"""
460
+ with self._lock:
461
+ count = sum(len(entries) for entries in self._cache.values())
462
+ self._cache.clear()
463
+ return count
464
+
465
+ def __len__(self) -> int:
466
+ return sum(len(entries) for entries in self._cache.values())
467
+
468
+
469
+ class LayeredCache:
470
+ """
471
+ Unified multi-level cache manager.
472
+
473
+ Queries flow through L1 → L2 → L3 until a hit is found.
474
+ Responses are stored in all applicable cache levels.
475
+
476
+ Usage:
477
+ cache = LayeredCache()
478
+
479
+ # Lookup
480
+ result = cache.get(prompt, context_hash)
481
+ if result.hit:
482
+ return result.response # Cache hit!
483
+
484
+ # After generating response
485
+ cache.set(prompt, context_hash, response, model_used)
486
+ """
487
+
488
+ def __init__(
489
+ self,
490
+ l1_size: int = 1000,
491
+ l2_size: int = 500,
492
+ l2_threshold: float = 0.92,
493
+ l3_size: int = 200,
494
+ persist_path: Optional[Path] = None
495
+ ):
496
+ self.l1 = L1ExactCache(max_size=l1_size)
497
+ self.l2 = L2SemanticCache(
498
+ similarity_threshold=l2_threshold,
499
+ max_size=l2_size
500
+ )
501
+ self.l3 = L3TemplateCache(max_size=l3_size)
502
+
503
+ self.stats = CacheStats()
504
+ self.persist_path = persist_path
505
+
506
+ # Average response time for savings calculation (ms)
507
+ self._avg_response_time = 5000 # 5 seconds default
508
+
509
+ def get(self, prompt: str, context_hash: str = "") -> CacheResult:
510
+ """
511
+ Look up in all cache levels.
512
+ Returns CacheResult with hit status and response.
513
+ """
514
+ start = time.time()
515
+
516
+ # L1: Exact match (fastest)
517
+ entry = self.l1.get(prompt, context_hash)
518
+ if entry:
519
+ elapsed = (time.time() - start) * 1000
520
+ self.stats.l1_hits += 1
521
+ self.stats.total_time_saved_ms += self._avg_response_time
522
+ self.stats.total_tokens_saved += entry.tokens_saved
523
+
524
+ logger.debug(f"L1 cache hit in {elapsed:.2f}ms")
525
+ return CacheResult(
526
+ hit=True,
527
+ response=entry.response,
528
+ level="L1",
529
+ similarity=1.0,
530
+ time_ms=elapsed
531
+ )
532
+
533
+ # L2: Semantic match
534
+ if self.l2.available:
535
+ result = self.l2.get(prompt, context_hash)
536
+ if result:
537
+ entry, similarity = result
538
+ elapsed = (time.time() - start) * 1000
539
+ self.stats.l2_hits += 1
540
+ self.stats.total_time_saved_ms += self._avg_response_time
541
+ self.stats.total_tokens_saved += entry.tokens_saved
542
+
543
+ logger.debug(f"L2 cache hit (sim={similarity:.3f}) in {elapsed:.2f}ms")
544
+ return CacheResult(
545
+ hit=True,
546
+ response=entry.response,
547
+ level="L2",
548
+ similarity=similarity,
549
+ time_ms=elapsed
550
+ )
551
+
552
+ # L3: Template match
553
+ result = self.l3.get(prompt, context_hash)
554
+ if result:
555
+ entry, confidence = result
556
+ elapsed = (time.time() - start) * 1000
557
+ self.stats.l3_hits += 1
558
+ self.stats.total_time_saved_ms += self._avg_response_time * 0.5 # Partial savings
559
+ self.stats.total_tokens_saved += entry.tokens_saved
560
+
561
+ logger.debug(f"L3 cache hit (conf={confidence:.3f}) in {elapsed:.2f}ms")
562
+ return CacheResult(
563
+ hit=True,
564
+ response=entry.response,
565
+ level="L3",
566
+ similarity=confidence,
567
+ time_ms=elapsed
568
+ )
569
+
570
+ # Cache miss
571
+ elapsed = (time.time() - start) * 1000
572
+ self.stats.misses += 1
573
+
574
+ return CacheResult(
575
+ hit=False,
576
+ response=None,
577
+ level=None,
578
+ similarity=None,
579
+ time_ms=elapsed
580
+ )
581
+
582
+ def set(
583
+ self,
584
+ prompt: str,
585
+ context_hash: str,
586
+ response: str,
587
+ model_used: str,
588
+ tokens_saved: int = 0
589
+ ) -> None:
590
+ """Store response in all applicable cache levels"""
591
+ # Always store in L1
592
+ self.l1.set(prompt, context_hash, response, model_used, tokens_saved)
593
+
594
+ # Store in L2 if available
595
+ if self.l2.available:
596
+ self.l2.set(prompt, context_hash, response, model_used, tokens_saved)
597
+
598
+ # Store in L3 if matches a template
599
+ self.l3.set(prompt, context_hash, response, model_used, tokens_saved)
600
+
601
+ def get_stats(self) -> Dict[str, Any]:
602
+ """Get cache statistics"""
603
+ stats = self.stats.to_dict()
604
+ stats["sizes"] = {
605
+ "l1": len(self.l1),
606
+ "l2": len(self.l2),
607
+ "l3": len(self.l3),
608
+ }
609
+ stats["l2_available"] = self.l2.available
610
+ return stats
611
+
612
+ def clear(self) -> Dict[str, int]:
613
+ """Clear all caches"""
614
+ return {
615
+ "l1_cleared": self.l1.clear(),
616
+ "l2_cleared": self.l2.clear(),
617
+ "l3_cleared": self.l3.clear(),
618
+ }
619
+
620
+ def save(self) -> bool:
621
+ """Persist L1 cache to disk"""
622
+ if not self.persist_path:
623
+ return False
624
+
625
+ try:
626
+ self.persist_path.parent.mkdir(parents=True, exist_ok=True)
627
+
628
+ # Serialize L1 cache (L2/L3 are rebuilt from L1)
629
+ data = {
630
+ "entries": [
631
+ {
632
+ "prompt": entry.prompt,
633
+ "context_hash": entry.context_hash,
634
+ "response": entry.response,
635
+ "model_used": entry.model_used,
636
+ "created_at": entry.created_at,
637
+ "access_count": entry.access_count,
638
+ "tokens_saved": entry.tokens_saved,
639
+ }
640
+ for entry in self.l1._cache.values()
641
+ ],
642
+ "stats": self.stats.to_dict(),
643
+ }
644
+
645
+ with open(self.persist_path, 'w') as f:
646
+ json.dump(data, f)
647
+
648
+ logger.info(f"Cache saved to {self.persist_path}")
649
+ return True
650
+
651
+ except Exception as e:
652
+ logger.error(f"Failed to save cache: {e}")
653
+ return False
654
+
655
+ def load(self) -> bool:
656
+ """Load cache from disk"""
657
+ if not self.persist_path or not self.persist_path.exists():
658
+ return False
659
+
660
+ try:
661
+ with open(self.persist_path) as f:
662
+ data = json.load(f)
663
+
664
+ for entry_data in data.get("entries", []):
665
+ self.l1.set(
666
+ entry_data["prompt"],
667
+ entry_data["context_hash"],
668
+ entry_data["response"],
669
+ entry_data["model_used"],
670
+ entry_data.get("tokens_saved", 0)
671
+ )
672
+
673
+ logger.info(f"Loaded {len(self.l1)} entries from cache")
674
+ return True
675
+
676
+ except Exception as e:
677
+ logger.error(f"Failed to load cache: {e}")
678
+ return False
679
+
680
+
681
+ # Convenience functions
682
+ def make_context_hash(context: Dict[str, Any]) -> str:
683
+ """Create hash from context dictionary"""
684
+ # Sort keys for consistent hashing
685
+ serialized = json.dumps(context, sort_keys=True, default=str)
686
+ return hashlib.sha256(serialized.encode()).hexdigest()[:16]
687
+
688
+
689
+ # Singleton instance
690
+ _cache: Optional[LayeredCache] = None
691
+
692
+
693
+ def get_cache(
694
+ persist_path: Optional[Path] = None,
695
+ **kwargs
696
+ ) -> LayeredCache:
697
+ """Get or create the global cache instance"""
698
+ global _cache
699
+ if _cache is None:
700
+ _cache = LayeredCache(
701
+ persist_path=persist_path or Path.home() / ".nc1709" / "cache.json",
702
+ **kwargs
703
+ )
704
+ _cache.load() # Try to load persisted cache
705
+ return _cache