headroom-ai 0.2.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. headroom/__init__.py +212 -0
  2. headroom/cache/__init__.py +76 -0
  3. headroom/cache/anthropic.py +517 -0
  4. headroom/cache/base.py +342 -0
  5. headroom/cache/compression_feedback.py +613 -0
  6. headroom/cache/compression_store.py +814 -0
  7. headroom/cache/dynamic_detector.py +1026 -0
  8. headroom/cache/google.py +884 -0
  9. headroom/cache/openai.py +584 -0
  10. headroom/cache/registry.py +175 -0
  11. headroom/cache/semantic.py +451 -0
  12. headroom/ccr/__init__.py +77 -0
  13. headroom/ccr/context_tracker.py +582 -0
  14. headroom/ccr/mcp_server.py +319 -0
  15. headroom/ccr/response_handler.py +772 -0
  16. headroom/ccr/tool_injection.py +415 -0
  17. headroom/cli.py +219 -0
  18. headroom/client.py +977 -0
  19. headroom/compression/__init__.py +42 -0
  20. headroom/compression/detector.py +424 -0
  21. headroom/compression/handlers/__init__.py +22 -0
  22. headroom/compression/handlers/base.py +219 -0
  23. headroom/compression/handlers/code_handler.py +506 -0
  24. headroom/compression/handlers/json_handler.py +418 -0
  25. headroom/compression/masks.py +345 -0
  26. headroom/compression/universal.py +465 -0
  27. headroom/config.py +474 -0
  28. headroom/exceptions.py +192 -0
  29. headroom/integrations/__init__.py +159 -0
  30. headroom/integrations/agno/__init__.py +53 -0
  31. headroom/integrations/agno/hooks.py +345 -0
  32. headroom/integrations/agno/model.py +625 -0
  33. headroom/integrations/agno/providers.py +154 -0
  34. headroom/integrations/langchain/__init__.py +106 -0
  35. headroom/integrations/langchain/agents.py +326 -0
  36. headroom/integrations/langchain/chat_model.py +1002 -0
  37. headroom/integrations/langchain/langsmith.py +324 -0
  38. headroom/integrations/langchain/memory.py +319 -0
  39. headroom/integrations/langchain/providers.py +200 -0
  40. headroom/integrations/langchain/retriever.py +371 -0
  41. headroom/integrations/langchain/streaming.py +341 -0
  42. headroom/integrations/mcp/__init__.py +37 -0
  43. headroom/integrations/mcp/server.py +533 -0
  44. headroom/memory/__init__.py +37 -0
  45. headroom/memory/extractor.py +390 -0
  46. headroom/memory/fast_store.py +621 -0
  47. headroom/memory/fast_wrapper.py +311 -0
  48. headroom/memory/inline_extractor.py +229 -0
  49. headroom/memory/store.py +434 -0
  50. headroom/memory/worker.py +260 -0
  51. headroom/memory/wrapper.py +321 -0
  52. headroom/models/__init__.py +39 -0
  53. headroom/models/registry.py +687 -0
  54. headroom/parser.py +293 -0
  55. headroom/pricing/__init__.py +51 -0
  56. headroom/pricing/anthropic_prices.py +81 -0
  57. headroom/pricing/litellm_pricing.py +113 -0
  58. headroom/pricing/openai_prices.py +91 -0
  59. headroom/pricing/registry.py +188 -0
  60. headroom/providers/__init__.py +61 -0
  61. headroom/providers/anthropic.py +621 -0
  62. headroom/providers/base.py +131 -0
  63. headroom/providers/cohere.py +362 -0
  64. headroom/providers/google.py +427 -0
  65. headroom/providers/litellm.py +297 -0
  66. headroom/providers/openai.py +566 -0
  67. headroom/providers/openai_compatible.py +521 -0
  68. headroom/proxy/__init__.py +19 -0
  69. headroom/proxy/server.py +2683 -0
  70. headroom/py.typed +0 -0
  71. headroom/relevance/__init__.py +124 -0
  72. headroom/relevance/base.py +106 -0
  73. headroom/relevance/bm25.py +255 -0
  74. headroom/relevance/embedding.py +255 -0
  75. headroom/relevance/hybrid.py +259 -0
  76. headroom/reporting/__init__.py +5 -0
  77. headroom/reporting/generator.py +549 -0
  78. headroom/storage/__init__.py +41 -0
  79. headroom/storage/base.py +125 -0
  80. headroom/storage/jsonl.py +220 -0
  81. headroom/storage/sqlite.py +289 -0
  82. headroom/telemetry/__init__.py +91 -0
  83. headroom/telemetry/collector.py +764 -0
  84. headroom/telemetry/models.py +880 -0
  85. headroom/telemetry/toin.py +1579 -0
  86. headroom/tokenizer.py +80 -0
  87. headroom/tokenizers/__init__.py +75 -0
  88. headroom/tokenizers/base.py +210 -0
  89. headroom/tokenizers/estimator.py +198 -0
  90. headroom/tokenizers/huggingface.py +317 -0
  91. headroom/tokenizers/mistral.py +245 -0
  92. headroom/tokenizers/registry.py +398 -0
  93. headroom/tokenizers/tiktoken_counter.py +248 -0
  94. headroom/transforms/__init__.py +106 -0
  95. headroom/transforms/base.py +57 -0
  96. headroom/transforms/cache_aligner.py +357 -0
  97. headroom/transforms/code_compressor.py +1313 -0
  98. headroom/transforms/content_detector.py +335 -0
  99. headroom/transforms/content_router.py +1158 -0
  100. headroom/transforms/llmlingua_compressor.py +638 -0
  101. headroom/transforms/log_compressor.py +529 -0
  102. headroom/transforms/pipeline.py +297 -0
  103. headroom/transforms/rolling_window.py +350 -0
  104. headroom/transforms/search_compressor.py +365 -0
  105. headroom/transforms/smart_crusher.py +2682 -0
  106. headroom/transforms/text_compressor.py +259 -0
  107. headroom/transforms/tool_crusher.py +338 -0
  108. headroom/utils.py +215 -0
  109. headroom_ai-0.2.13.dist-info/METADATA +315 -0
  110. headroom_ai-0.2.13.dist-info/RECORD +114 -0
  111. headroom_ai-0.2.13.dist-info/WHEEL +4 -0
  112. headroom_ai-0.2.13.dist-info/entry_points.txt +2 -0
  113. headroom_ai-0.2.13.dist-info/licenses/LICENSE +190 -0
  114. headroom_ai-0.2.13.dist-info/licenses/NOTICE +43 -0
@@ -0,0 +1,814 @@
1
+ """Compression Store for CCR (Compress-Cache-Retrieve) architecture.
2
+
3
+ This module implements reversible compression: when SmartCrusher compresses
4
+ tool outputs, the original data is cached here for on-demand retrieval.
5
+
6
+ Key insight from research: REVERSIBLE compression beats irreversible compression.
7
+ If the LLM needs data that was compressed away, it can retrieve it instantly.
8
+
9
+ Features:
10
+ - Thread-safe in-memory storage with TTL expiration
11
+ - BM25-based search within cached content
12
+ - Retrieval event tracking for feedback loop
13
+ - Automatic eviction when capacity is reached
14
+
15
+ Usage:
16
+ store = get_compression_store()
17
+
18
+ # Store compressed content
19
+ hash_key = store.store(
20
+ original=original_json,
21
+ compressed=compressed_json,
22
+ original_tokens=1000,
23
+ compressed_tokens=100,
24
+ tool_name="search_api",
25
+ )
26
+
27
+ # Retrieve later
28
+ entry = store.retrieve(hash_key)
29
+
30
+ # Or search within
31
+ results = store.search(hash_key, "user query")
32
+ """
33
+
34
+ from __future__ import annotations
35
+
36
+ import hashlib
37
+ import heapq
38
+ import json
39
+ import logging
40
+ import re
41
+ import threading
42
+ import time
43
+ from dataclasses import dataclass, field, replace
44
+ from typing import Any
45
+
46
+ from ..relevance.bm25 import BM25Scorer
47
+
48
+ logger = logging.getLogger(__name__)
49
+
50
+
51
+ @dataclass
52
+ class CompressionEntry:
53
+ """A cached compression entry with metadata for retrieval and feedback."""
54
+
55
+ hash: str
56
+ original_content: str
57
+ compressed_content: str
58
+ original_tokens: int
59
+ compressed_tokens: int
60
+ original_item_count: int
61
+ compressed_item_count: int
62
+ tool_name: str | None
63
+ tool_call_id: str | None
64
+ query_context: str | None
65
+ created_at: float
66
+ ttl: int = 300 # 5 minutes default
67
+
68
+ # TOIN integration: Store the tool signature hash for retrieval correlation
69
+ # This MUST match the hash used by SmartCrusher when recording compression
70
+ tool_signature_hash: str | None = None
71
+ compression_strategy: str | None = None # Strategy used for compression
72
+
73
+ # Feedback tracking
74
+ retrieval_count: int = 0
75
+ search_queries: list[str] = field(default_factory=list)
76
+ last_accessed: float | None = None
77
+
78
+ def is_expired(self) -> bool:
79
+ """Check if this entry has expired."""
80
+ return time.time() - self.created_at > self.ttl
81
+
82
+ def record_access(self, query: str | None = None) -> None:
83
+ """Record an access to this entry for feedback tracking."""
84
+ self.retrieval_count += 1
85
+ self.last_accessed = time.time()
86
+ if query and query not in self.search_queries:
87
+ self.search_queries.append(query)
88
+ # Keep only last 10 queries
89
+ if len(self.search_queries) > 10:
90
+ self.search_queries = self.search_queries[-10:]
91
+
92
+
93
+ @dataclass
94
+ class RetrievalEvent:
95
+ """Event logged when content is retrieved from cache."""
96
+
97
+ hash: str
98
+ query: str | None
99
+ items_retrieved: int
100
+ total_items: int
101
+ tool_name: str | None
102
+ timestamp: float
103
+ retrieval_type: str # "full" or "search"
104
+ tool_signature_hash: str | None = None # For TOIN correlation
105
+
106
+
107
+ class CompressionStore:
108
+ """Thread-safe store for compressed content with retrieval support.
109
+
110
+ This is the core of the CCR architecture. When SmartCrusher compresses
111
+ an array, the original content is stored here. If the LLM needs more
112
+ data, it can retrieve from this cache instantly.
113
+
114
+ Design principles:
115
+ - Zero external dependencies (pure Python)
116
+ - Thread-safe for concurrent access
117
+ - TTL-based expiration (default 5 minutes)
118
+ - LRU-style eviction when capacity is reached
119
+ - Built-in BM25 search for filtering
120
+ """
121
+
122
+ def __init__(
123
+ self,
124
+ max_entries: int = 1000,
125
+ default_ttl: int = 300,
126
+ enable_feedback: bool = True,
127
+ ):
128
+ """Initialize the compression store.
129
+
130
+ Args:
131
+ max_entries: Maximum number of entries to store.
132
+ default_ttl: Default TTL in seconds (5 minutes).
133
+ enable_feedback: Whether to track retrieval events.
134
+ """
135
+ self._store: dict[str, CompressionEntry] = {}
136
+ self._lock = threading.Lock()
137
+ self._max_entries = max_entries
138
+ self._default_ttl = default_ttl
139
+ self._enable_feedback = enable_feedback
140
+
141
+ # Feedback tracking
142
+ self._retrieval_events: list[RetrievalEvent] = []
143
+ self._max_events = 1000 # Keep last 1000 events
144
+ self._pending_feedback_events: list[RetrievalEvent] = []
145
+
146
+ # MEDIUM FIX #16: Use a min-heap for O(log n) eviction instead of O(n)
147
+ # Heap entries are (created_at, hash_key) tuples
148
+ self._eviction_heap: list[tuple[float, str]] = []
149
+ # CRITICAL FIX: Track stale entries count to know when heap cleanup is needed
150
+ self._stale_heap_entries = 0
151
+ # Threshold for triggering heap rebuild (when 50% are stale)
152
+ self._heap_rebuild_threshold = 0.5
153
+
154
+ # BM25 scorer for search
155
+ self._scorer = BM25Scorer()
156
+
157
+ def store(
158
+ self,
159
+ original: str,
160
+ compressed: str,
161
+ *,
162
+ original_tokens: int = 0,
163
+ compressed_tokens: int = 0,
164
+ original_item_count: int = 0,
165
+ compressed_item_count: int = 0,
166
+ tool_name: str | None = None,
167
+ tool_call_id: str | None = None,
168
+ query_context: str | None = None,
169
+ tool_signature_hash: str | None = None,
170
+ compression_strategy: str | None = None,
171
+ ttl: int | None = None,
172
+ ) -> str:
173
+ """Store compressed content and return hash for retrieval.
174
+
175
+ Args:
176
+ original: Original JSON content before compression.
177
+ compressed: Compressed JSON content.
178
+ original_tokens: Token count of original content.
179
+ compressed_tokens: Token count of compressed content.
180
+ original_item_count: Number of items in original array.
181
+ compressed_item_count: Number of items after compression.
182
+ tool_name: Name of the tool that produced this output.
183
+ tool_call_id: ID of the tool call.
184
+ query_context: User query context for relevance matching.
185
+ tool_signature_hash: Hash from ToolSignature for TOIN correlation.
186
+ compression_strategy: Strategy used for compression.
187
+ ttl: Custom TTL in seconds (uses default if not specified).
188
+
189
+ Returns:
190
+ Hash key for retrieving this content.
191
+ """
192
+ # Generate hash from original content
193
+ # CRITICAL FIX #5: Use 24 chars (96 bits) instead of 16 (64 bits) for better
194
+ # collision resistance. Birthday paradox: 50% collision at sqrt(2^n) entries.
195
+ # - 64 bits: ~4 billion entries for 50% collision
196
+ # - 96 bits: ~280 trillion entries for 50% collision
197
+ hash_key = hashlib.sha256(original.encode()).hexdigest()[:24]
198
+
199
+ entry = CompressionEntry(
200
+ hash=hash_key,
201
+ original_content=original,
202
+ compressed_content=compressed,
203
+ original_tokens=original_tokens,
204
+ compressed_tokens=compressed_tokens,
205
+ original_item_count=original_item_count,
206
+ compressed_item_count=compressed_item_count,
207
+ tool_name=tool_name,
208
+ tool_call_id=tool_call_id,
209
+ query_context=query_context,
210
+ created_at=time.time(),
211
+ ttl=ttl if ttl is not None else self._default_ttl,
212
+ tool_signature_hash=tool_signature_hash,
213
+ compression_strategy=compression_strategy,
214
+ )
215
+
216
+ # Process pending feedback BEFORE acquiring lock for eviction.
217
+ # This ensures feedback from entries about to be evicted is captured.
218
+ if self._enable_feedback:
219
+ self.process_pending_feedback()
220
+
221
+ with self._lock:
222
+ self._evict_if_needed()
223
+
224
+ # CRITICAL FIX: Hash collision detection
225
+ # If hash already exists with DIFFERENT content, log a warning.
226
+ # This indicates either a hash collision or duplicate store calls.
227
+ existing = self._store.get(hash_key)
228
+ if existing is not None:
229
+ if existing.original_content != original:
230
+ # True hash collision - different content, same hash
231
+ # This is extremely rare with SHA256[:24] but should be logged
232
+ logger.warning(
233
+ "Hash collision detected: hash=%s tool=%s (existing_len=%d, new_len=%d)",
234
+ hash_key,
235
+ tool_name,
236
+ len(existing.original_content),
237
+ len(original),
238
+ )
239
+ else:
240
+ # Same content being stored again - this is fine, just update
241
+ logger.debug(
242
+ "Duplicate store for hash=%s, updating entry",
243
+ hash_key,
244
+ )
245
+ # Mark old heap entry as stale since we're replacing
246
+ self._stale_heap_entries += 1
247
+
248
+ self._store[hash_key] = entry
249
+ # MEDIUM FIX #16: Add to eviction heap for O(log n) eviction
250
+ heapq.heappush(self._eviction_heap, (entry.created_at, hash_key))
251
+
252
+ return hash_key
253
+
254
+ def retrieve(
255
+ self,
256
+ hash_key: str,
257
+ query: str | None = None,
258
+ ) -> CompressionEntry | None:
259
+ """Retrieve original content by hash.
260
+
261
+ Args:
262
+ hash_key: Hash key returned by store().
263
+ query: Optional query for feedback tracking.
264
+
265
+ Returns:
266
+ CompressionEntry if found and not expired, None otherwise.
267
+ """
268
+ with self._lock:
269
+ entry = self._store.get(hash_key)
270
+
271
+ if entry is None:
272
+ return None
273
+
274
+ if entry.is_expired():
275
+ del self._store[hash_key]
276
+ # CRITICAL FIX: Track stale heap entry
277
+ self._stale_heap_entries += 1
278
+ return None
279
+
280
+ # Track access for feedback
281
+ entry.record_access(query)
282
+
283
+ # Log retrieval event
284
+ if self._enable_feedback:
285
+ self._log_retrieval(
286
+ hash_key=hash_key,
287
+ query=query,
288
+ items_retrieved=entry.original_item_count,
289
+ total_items=entry.original_item_count,
290
+ tool_name=entry.tool_name,
291
+ retrieval_type="full",
292
+ tool_signature_hash=entry.tool_signature_hash,
293
+ )
294
+
295
+ # CRITICAL: Make a deep copy to return
296
+ # (entry could be modified/evicted after lock release)
297
+ # The entry contains mutable fields (search_queries list) that must be copied
298
+ result_entry = replace(entry, search_queries=list(entry.search_queries))
299
+
300
+ # Process feedback immediately to ensure TOIN learns in real-time
301
+ if self._enable_feedback:
302
+ self.process_pending_feedback()
303
+
304
+ return result_entry
305
+
306
+ def get_metadata(
307
+ self,
308
+ hash_key: str,
309
+ ) -> dict[str, Any] | None:
310
+ """Get metadata about a stored entry without retrieving full content.
311
+
312
+ Useful for context tracking to know what was compressed without
313
+ fetching the entire original content.
314
+
315
+ Args:
316
+ hash_key: Hash key returned by store().
317
+
318
+ Returns:
319
+ Dict with metadata if found and not expired, None otherwise.
320
+ """
321
+ with self._lock:
322
+ entry = self._store.get(hash_key)
323
+
324
+ if entry is None:
325
+ return None
326
+
327
+ if entry.is_expired():
328
+ del self._store[hash_key]
329
+ self._stale_heap_entries += 1
330
+ return None
331
+
332
+ return {
333
+ "hash": entry.hash,
334
+ "tool_name": entry.tool_name,
335
+ "original_item_count": entry.original_item_count,
336
+ "compressed_item_count": entry.compressed_item_count,
337
+ "query_context": entry.query_context,
338
+ "compressed_content": entry.compressed_content,
339
+ "created_at": entry.created_at,
340
+ "ttl": entry.ttl,
341
+ }
342
+
343
+ def search(
344
+ self,
345
+ hash_key: str,
346
+ query: str,
347
+ max_results: int = 20,
348
+ score_threshold: float = 0.3,
349
+ ) -> list[dict[str, Any]]:
350
+ """Search within cached content using BM25.
351
+
352
+ Args:
353
+ hash_key: Hash key of cached content.
354
+ query: Search query.
355
+ max_results: Maximum number of results to return.
356
+ score_threshold: Minimum BM25 score to include.
357
+
358
+ Returns:
359
+ List of matching items from original content.
360
+ """
361
+ # Get entry without logging (we'll log the search separately)
362
+ entry = self._get_entry_for_search(hash_key, query)
363
+ if entry is None:
364
+ return []
365
+
366
+ try:
367
+ items = json.loads(entry.original_content)
368
+ if not isinstance(items, list):
369
+ return []
370
+ except json.JSONDecodeError:
371
+ return []
372
+
373
+ if not items:
374
+ return []
375
+
376
+ # Score each item using BM25
377
+ item_strs = [json.dumps(item, default=str) for item in items]
378
+ scores = self._scorer.score_batch(item_strs, query)
379
+
380
+ # Filter and sort by score
381
+ scored_items = [
382
+ (items[i], scores[i].score)
383
+ for i in range(len(items))
384
+ if scores[i].score >= score_threshold
385
+ ]
386
+ scored_items.sort(key=lambda x: x[1], reverse=True)
387
+
388
+ results = [item for item, _ in scored_items[:max_results]]
389
+
390
+ # Log retrieval event
391
+ if self._enable_feedback:
392
+ with self._lock:
393
+ self._log_retrieval(
394
+ hash_key=hash_key,
395
+ query=query,
396
+ items_retrieved=len(results),
397
+ total_items=len(items),
398
+ tool_name=entry.tool_name,
399
+ retrieval_type="search",
400
+ tool_signature_hash=entry.tool_signature_hash,
401
+ )
402
+ # Process feedback immediately to ensure TOIN learns in real-time
403
+ self.process_pending_feedback()
404
+
405
+ return results
406
+
407
+ def _get_entry_for_search(
408
+ self,
409
+ hash_key: str,
410
+ query: str | None = None,
411
+ ) -> CompressionEntry | None:
412
+ """Get entry without logging retrieval (used by search to avoid double-logging).
413
+
414
+ CRITICAL FIX #4: Returns a copy of the entry to prevent race conditions.
415
+ The caller may use the entry after we release the lock, and another thread
416
+ could modify or evict the original entry.
417
+
418
+ Args:
419
+ hash_key: Hash key returned by store().
420
+ query: Optional query for access tracking.
421
+
422
+ Returns:
423
+ CompressionEntry copy if found and not expired, None otherwise.
424
+ """
425
+ with self._lock:
426
+ entry = self._store.get(hash_key)
427
+
428
+ if entry is None:
429
+ return None
430
+
431
+ if entry.is_expired():
432
+ del self._store[hash_key]
433
+ # CRITICAL FIX: Track stale heap entry
434
+ self._stale_heap_entries += 1
435
+ return None
436
+
437
+ # Track access but don't log retrieval event (search will log separately)
438
+ entry.record_access(query)
439
+
440
+ # CRITICAL FIX #4: Return a copy to prevent race conditions
441
+ # The entry contains mutable fields (search_queries list) that could be
442
+ # modified by other threads after we release the lock
443
+ return replace(entry, search_queries=list(entry.search_queries))
444
+
445
+ def exists(self, hash_key: str, clean_expired: bool = False) -> bool:
446
+ """Check if a hash key exists and is not expired.
447
+
448
+ Args:
449
+ hash_key: The hash key to check.
450
+ clean_expired: If True, delete the entry if expired.
451
+ LOW FIX #20: Default False to make this a pure check.
452
+
453
+ Returns:
454
+ True if the entry exists and is not expired.
455
+ """
456
+ with self._lock:
457
+ entry = self._store.get(hash_key)
458
+ if entry is None:
459
+ return False
460
+ if entry.is_expired():
461
+ # LOW FIX #20: Only delete if explicitly requested
462
+ # This makes exists() a pure check by default
463
+ if clean_expired:
464
+ del self._store[hash_key]
465
+ # CRITICAL FIX: Track stale heap entry
466
+ self._stale_heap_entries += 1
467
+ return False
468
+ return True
469
+
470
+ def get_stats(self) -> dict[str, Any]:
471
+ """Get store statistics for monitoring."""
472
+ with self._lock:
473
+ # Clean expired entries
474
+ self._clean_expired()
475
+
476
+ total_original_tokens = sum(e.original_tokens for e in self._store.values())
477
+ total_compressed_tokens = sum(e.compressed_tokens for e in self._store.values())
478
+ total_retrievals = sum(e.retrieval_count for e in self._store.values())
479
+
480
+ return {
481
+ "entry_count": len(self._store),
482
+ "max_entries": self._max_entries,
483
+ "total_original_tokens": total_original_tokens,
484
+ "total_compressed_tokens": total_compressed_tokens,
485
+ "total_retrievals": total_retrievals,
486
+ "event_count": len(self._retrieval_events),
487
+ }
488
+
489
+ def get_retrieval_events(
490
+ self,
491
+ limit: int = 100,
492
+ tool_name: str | None = None,
493
+ ) -> list[RetrievalEvent]:
494
+ """Get recent retrieval events for feedback analysis.
495
+
496
+ Args:
497
+ limit: Maximum number of events to return.
498
+ tool_name: Filter by tool name if specified.
499
+
500
+ Returns:
501
+ List of recent retrieval events (copies to prevent mutation).
502
+ """
503
+ with self._lock:
504
+ # MEDIUM FIX #17: Take a slice copy immediately to avoid race conditions
505
+ # if another thread modifies _retrieval_events after we release the lock
506
+ events_copy = list(self._retrieval_events)
507
+
508
+ # Filter and slice outside lock (safe since we have a copy)
509
+ if tool_name:
510
+ events_copy = [e for e in events_copy if e.tool_name == tool_name]
511
+
512
+ return list(reversed(events_copy[-limit:]))
513
+
514
+ def clear(self) -> None:
515
+ """Clear all entries. Mainly for testing."""
516
+ with self._lock:
517
+ self._store.clear()
518
+ self._retrieval_events.clear()
519
+ self._pending_feedback_events.clear()
520
+ self._eviction_heap.clear() # MEDIUM FIX #16: Clear heap too
521
+ self._stale_heap_entries = 0 # CRITICAL FIX: Reset stale counter
522
+
523
+ def _evict_if_needed(self) -> None:
524
+ """Evict old entries if at capacity. Must be called with lock held.
525
+
526
+ MEDIUM FIX #16: Use heap for O(log n) eviction instead of O(n) scan.
527
+ CRITICAL FIX: Track and clean stale heap entries to prevent memory leak.
528
+ """
529
+ # First, remove expired entries
530
+ self._clean_expired()
531
+
532
+ # CRITICAL FIX: Rebuild heap if too many stale entries
533
+ # This prevents unbounded heap growth when entries are deleted/replaced
534
+ heap_size = len(self._eviction_heap)
535
+ if heap_size > 0:
536
+ stale_ratio = self._stale_heap_entries / heap_size
537
+ if stale_ratio >= self._heap_rebuild_threshold:
538
+ self._rebuild_heap()
539
+
540
+ # If still at capacity, remove oldest entries using heap
541
+ while len(self._store) >= self._max_entries and self._eviction_heap:
542
+ # Pop oldest from heap (O(log n))
543
+ created_at, hash_key = heapq.heappop(self._eviction_heap)
544
+
545
+ # Check if entry still exists and matches timestamp
546
+ # (entry might have been deleted or replaced)
547
+ entry = self._store.get(hash_key)
548
+ if entry is not None and entry.created_at == created_at:
549
+ # HIGH FIX: Track eviction as "successful compression" if never retrieved
550
+ # This prevents state divergence between store and feedback loop
551
+ if self._enable_feedback and entry.retrieval_count == 0:
552
+ # Entry was never retrieved = compression was successful
553
+ # Notify feedback system so it knows this strategy worked
554
+ self._record_eviction_success(entry)
555
+ del self._store[hash_key]
556
+ else:
557
+ # CRITICAL FIX: This was a stale entry, decrement counter
558
+ # (we already popped it, so the stale entry is now gone)
559
+ if self._stale_heap_entries > 0:
560
+ self._stale_heap_entries -= 1
561
+
562
+ def _clean_expired(self) -> None:
563
+ """Remove expired entries. Must be called with lock held.
564
+
565
+ CRITICAL FIX: Track stale heap entries when deleting to prevent memory leak.
566
+ """
567
+ expired_keys = [key for key, entry in self._store.items() if entry.is_expired()]
568
+ for key in expired_keys:
569
+ del self._store[key]
570
+ # CRITICAL FIX: Increment stale counter - the heap still has an entry
571
+ # for this key that will be stale when we try to evict
572
+ self._stale_heap_entries += 1
573
+
574
+ def _rebuild_heap(self) -> None:
575
+ """Rebuild heap from current store entries. Must be called with lock held.
576
+
577
+ CRITICAL FIX: This removes stale heap entries that accumulate when entries
578
+ are deleted or replaced. Without this, the heap grows unboundedly.
579
+ """
580
+ # Build new heap from current store entries only
581
+ self._eviction_heap = [
582
+ (entry.created_at, hash_key) for hash_key, entry in self._store.items()
583
+ ]
584
+ heapq.heapify(self._eviction_heap)
585
+ # Reset stale counter - heap is now clean
586
+ self._stale_heap_entries = 0
587
+ logger.debug(
588
+ "Rebuilt eviction heap: %d entries",
589
+ len(self._eviction_heap),
590
+ )
591
+
592
+ def _record_eviction_success(self, entry: CompressionEntry) -> None:
593
+ """Record successful compression when an entry is evicted without retrieval.
594
+
595
+ HIGH FIX: State divergence on eviction
596
+ When an entry is evicted and was NEVER retrieved, this indicates the
597
+ compression was fully successful - the LLM never needed the original data.
598
+ We notify the feedback system so it can learn from this success.
599
+
600
+ Must be called with lock held (entry data access).
601
+ Actual feedback notification happens outside lock.
602
+
603
+ Args:
604
+ entry: The entry being evicted.
605
+ """
606
+ # Capture entry data while we have the lock
607
+ tool_name = entry.tool_name
608
+ sig_hash = entry.tool_signature_hash
609
+ strategy = entry.compression_strategy
610
+
611
+ # We can't call feedback while holding the lock (would cause deadlock)
612
+ # Instead, queue this for deferred processing
613
+ if sig_hash is not None and strategy is not None:
614
+ # Create a synthetic "success" event that we'll process later
615
+ # Use a special retrieval type to indicate this was an eviction success
616
+ success_event = RetrievalEvent(
617
+ hash=entry.hash,
618
+ query=None,
619
+ items_retrieved=0, # No retrieval happened
620
+ total_items=entry.original_item_count,
621
+ tool_name=tool_name,
622
+ timestamp=time.time(),
623
+ retrieval_type="eviction_success", # Special marker
624
+ tool_signature_hash=sig_hash,
625
+ )
626
+ self._pending_feedback_events.append(success_event)
627
+ logger.debug(
628
+ "Recorded eviction success: hash=%s strategy=%s",
629
+ entry.hash[:8],
630
+ strategy,
631
+ )
632
+
633
+ def _log_retrieval(
634
+ self,
635
+ hash_key: str,
636
+ query: str | None,
637
+ items_retrieved: int,
638
+ total_items: int,
639
+ tool_name: str | None,
640
+ retrieval_type: str,
641
+ tool_signature_hash: str | None = None,
642
+ ) -> None:
643
+ """Log a retrieval event. Must be called with lock held."""
644
+ event = RetrievalEvent(
645
+ hash=hash_key,
646
+ query=query,
647
+ items_retrieved=items_retrieved,
648
+ total_items=total_items,
649
+ tool_name=tool_name,
650
+ timestamp=time.time(),
651
+ retrieval_type=retrieval_type,
652
+ tool_signature_hash=tool_signature_hash,
653
+ )
654
+
655
+ self._retrieval_events.append(event)
656
+
657
+ # Keep only recent events
658
+ if len(self._retrieval_events) > self._max_events:
659
+ self._retrieval_events = self._retrieval_events[-self._max_events :]
660
+
661
+ # Queue event for feedback processing (will be processed after lock release)
662
+ # This is safe because process_pending_feedback() uses the lock to atomically
663
+ # swap out the pending list before processing
664
+ self._pending_feedback_events.append(event)
665
+
666
+ def process_pending_feedback(self) -> None:
667
+ """Process pending feedback events.
668
+
669
+ Forwards events to:
670
+ 1. CompressionFeedback - for learning compression hints
671
+ 2. TelemetryCollector - for the data flywheel
672
+ 3. TOIN - for cross-user intelligence network
673
+
674
+ This is called automatically on each retrieval to ensure the
675
+ feedback loop operates in real-time.
676
+ """
677
+ from ..telemetry import get_telemetry_collector
678
+ from ..telemetry.toin import get_toin
679
+ from .compression_feedback import get_compression_feedback
680
+
681
+ # Get pending events and related entry data atomically
682
+ with self._lock:
683
+ events = self._pending_feedback_events
684
+ self._pending_feedback_events = []
685
+
686
+ # Gather entry data while holding lock to avoid race conditions
687
+ # Tuple: (event, tool_name, sig_hash, strategy, compressed_content)
688
+ event_data: list[
689
+ tuple[RetrievalEvent, str | None, str | None, str | None, str | None]
690
+ ] = []
691
+ for event in events:
692
+ entry = self._store.get(event.hash)
693
+ if entry:
694
+ # Use the ACTUAL tool_signature_hash stored during compression
695
+ # This MUST match the hash used by SmartCrusher
696
+ event_data.append(
697
+ (
698
+ event,
699
+ entry.tool_name,
700
+ entry.tool_signature_hash, # The correct hash!
701
+ entry.compression_strategy,
702
+ entry.compressed_content, # For TOIN field-level learning
703
+ )
704
+ )
705
+ else:
706
+ event_data.append((event, None, None, None, None))
707
+
708
+ # Process outside lock
709
+ if event_data:
710
+ feedback = get_compression_feedback()
711
+ telemetry = get_telemetry_collector()
712
+ toin = get_toin()
713
+
714
+ for event, _tool_name, sig_hash, strategy, compressed_content in event_data:
715
+ # Notify feedback system (pass strategy for success rate tracking)
716
+ feedback.record_retrieval(event, strategy=strategy)
717
+
718
+ # Extract query fields if present
719
+ query_fields = None
720
+ if event.query:
721
+ # Extract field:value patterns
722
+ query_fields = re.findall(r"(\w+)[=:]", event.query)
723
+
724
+ # Notify telemetry for data flywheel
725
+ try:
726
+ if sig_hash is not None:
727
+ telemetry.record_retrieval(
728
+ tool_signature_hash=sig_hash,
729
+ retrieval_type=event.retrieval_type,
730
+ query_fields=query_fields,
731
+ )
732
+ except Exception:
733
+ # Telemetry should never break the feedback loop
734
+ logger.debug("Telemetry record_retrieval failed", exc_info=True)
735
+
736
+ # Parse compressed content to extract items for TOIN field-level learning
737
+ retrieved_items: list[dict[str, Any]] | None = None
738
+ if compressed_content:
739
+ try:
740
+ parsed = json.loads(compressed_content)
741
+ # Handle both direct arrays and wrapped arrays
742
+ if isinstance(parsed, list):
743
+ # Filter to dicts only (field learning needs dict items)
744
+ retrieved_items = [item for item in parsed if isinstance(item, dict)]
745
+ elif isinstance(parsed, dict):
746
+ # Check for common wrapper patterns: {"items": [...], "results": [...]}
747
+ for key in ("items", "results", "data", "records"):
748
+ if key in parsed and isinstance(parsed[key], list):
749
+ retrieved_items = [
750
+ item for item in parsed[key] if isinstance(item, dict)
751
+ ]
752
+ break
753
+ except (json.JSONDecodeError, TypeError):
754
+ # Invalid JSON - skip field learning for this retrieval
755
+ pass
756
+
757
+ # Notify TOIN for cross-user learning
758
+ try:
759
+ if sig_hash is not None:
760
+ toin.record_retrieval(
761
+ tool_signature_hash=sig_hash,
762
+ retrieval_type=event.retrieval_type,
763
+ query=event.query,
764
+ query_fields=query_fields,
765
+ strategy=strategy, # Pass strategy for success rate tracking
766
+ retrieved_items=retrieved_items, # For field-level learning
767
+ )
768
+ except Exception:
769
+ # TOIN should never break the feedback loop
770
+ logger.debug("TOIN record_retrieval failed", exc_info=True)
771
+
772
+
773
+ # Global store instance (lazy initialization)
774
+ _compression_store: CompressionStore | None = None
775
+ _store_lock = threading.Lock()
776
+
777
+
778
+ def get_compression_store(
779
+ max_entries: int = 1000,
780
+ default_ttl: int = 300,
781
+ ) -> CompressionStore:
782
+ """Get the global compression store instance.
783
+
784
+ Uses lazy initialization with singleton pattern.
785
+
786
+ Args:
787
+ max_entries: Maximum entries (only used on first call).
788
+ default_ttl: Default TTL (only used on first call).
789
+
790
+ Returns:
791
+ Global CompressionStore instance.
792
+ """
793
+ global _compression_store
794
+
795
+ if _compression_store is None:
796
+ with _store_lock:
797
+ # Double-check after acquiring lock
798
+ if _compression_store is None:
799
+ _compression_store = CompressionStore(
800
+ max_entries=max_entries,
801
+ default_ttl=default_ttl,
802
+ )
803
+
804
+ return _compression_store
805
+
806
+
807
+ def reset_compression_store() -> None:
808
+ """Reset the global compression store. Mainly for testing."""
809
+ global _compression_store
810
+
811
+ with _store_lock:
812
+ if _compression_store is not None:
813
+ _compression_store.clear()
814
+ _compression_store = None