headroom-ai 0.2.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- headroom/__init__.py +212 -0
- headroom/cache/__init__.py +76 -0
- headroom/cache/anthropic.py +517 -0
- headroom/cache/base.py +342 -0
- headroom/cache/compression_feedback.py +613 -0
- headroom/cache/compression_store.py +814 -0
- headroom/cache/dynamic_detector.py +1026 -0
- headroom/cache/google.py +884 -0
- headroom/cache/openai.py +584 -0
- headroom/cache/registry.py +175 -0
- headroom/cache/semantic.py +451 -0
- headroom/ccr/__init__.py +77 -0
- headroom/ccr/context_tracker.py +582 -0
- headroom/ccr/mcp_server.py +319 -0
- headroom/ccr/response_handler.py +772 -0
- headroom/ccr/tool_injection.py +415 -0
- headroom/cli.py +219 -0
- headroom/client.py +977 -0
- headroom/compression/__init__.py +42 -0
- headroom/compression/detector.py +424 -0
- headroom/compression/handlers/__init__.py +22 -0
- headroom/compression/handlers/base.py +219 -0
- headroom/compression/handlers/code_handler.py +506 -0
- headroom/compression/handlers/json_handler.py +418 -0
- headroom/compression/masks.py +345 -0
- headroom/compression/universal.py +465 -0
- headroom/config.py +474 -0
- headroom/exceptions.py +192 -0
- headroom/integrations/__init__.py +159 -0
- headroom/integrations/agno/__init__.py +53 -0
- headroom/integrations/agno/hooks.py +345 -0
- headroom/integrations/agno/model.py +625 -0
- headroom/integrations/agno/providers.py +154 -0
- headroom/integrations/langchain/__init__.py +106 -0
- headroom/integrations/langchain/agents.py +326 -0
- headroom/integrations/langchain/chat_model.py +1002 -0
- headroom/integrations/langchain/langsmith.py +324 -0
- headroom/integrations/langchain/memory.py +319 -0
- headroom/integrations/langchain/providers.py +200 -0
- headroom/integrations/langchain/retriever.py +371 -0
- headroom/integrations/langchain/streaming.py +341 -0
- headroom/integrations/mcp/__init__.py +37 -0
- headroom/integrations/mcp/server.py +533 -0
- headroom/memory/__init__.py +37 -0
- headroom/memory/extractor.py +390 -0
- headroom/memory/fast_store.py +621 -0
- headroom/memory/fast_wrapper.py +311 -0
- headroom/memory/inline_extractor.py +229 -0
- headroom/memory/store.py +434 -0
- headroom/memory/worker.py +260 -0
- headroom/memory/wrapper.py +321 -0
- headroom/models/__init__.py +39 -0
- headroom/models/registry.py +687 -0
- headroom/parser.py +293 -0
- headroom/pricing/__init__.py +51 -0
- headroom/pricing/anthropic_prices.py +81 -0
- headroom/pricing/litellm_pricing.py +113 -0
- headroom/pricing/openai_prices.py +91 -0
- headroom/pricing/registry.py +188 -0
- headroom/providers/__init__.py +61 -0
- headroom/providers/anthropic.py +621 -0
- headroom/providers/base.py +131 -0
- headroom/providers/cohere.py +362 -0
- headroom/providers/google.py +427 -0
- headroom/providers/litellm.py +297 -0
- headroom/providers/openai.py +566 -0
- headroom/providers/openai_compatible.py +521 -0
- headroom/proxy/__init__.py +19 -0
- headroom/proxy/server.py +2683 -0
- headroom/py.typed +0 -0
- headroom/relevance/__init__.py +124 -0
- headroom/relevance/base.py +106 -0
- headroom/relevance/bm25.py +255 -0
- headroom/relevance/embedding.py +255 -0
- headroom/relevance/hybrid.py +259 -0
- headroom/reporting/__init__.py +5 -0
- headroom/reporting/generator.py +549 -0
- headroom/storage/__init__.py +41 -0
- headroom/storage/base.py +125 -0
- headroom/storage/jsonl.py +220 -0
- headroom/storage/sqlite.py +289 -0
- headroom/telemetry/__init__.py +91 -0
- headroom/telemetry/collector.py +764 -0
- headroom/telemetry/models.py +880 -0
- headroom/telemetry/toin.py +1579 -0
- headroom/tokenizer.py +80 -0
- headroom/tokenizers/__init__.py +75 -0
- headroom/tokenizers/base.py +210 -0
- headroom/tokenizers/estimator.py +198 -0
- headroom/tokenizers/huggingface.py +317 -0
- headroom/tokenizers/mistral.py +245 -0
- headroom/tokenizers/registry.py +398 -0
- headroom/tokenizers/tiktoken_counter.py +248 -0
- headroom/transforms/__init__.py +106 -0
- headroom/transforms/base.py +57 -0
- headroom/transforms/cache_aligner.py +357 -0
- headroom/transforms/code_compressor.py +1313 -0
- headroom/transforms/content_detector.py +335 -0
- headroom/transforms/content_router.py +1158 -0
- headroom/transforms/llmlingua_compressor.py +638 -0
- headroom/transforms/log_compressor.py +529 -0
- headroom/transforms/pipeline.py +297 -0
- headroom/transforms/rolling_window.py +350 -0
- headroom/transforms/search_compressor.py +365 -0
- headroom/transforms/smart_crusher.py +2682 -0
- headroom/transforms/text_compressor.py +259 -0
- headroom/transforms/tool_crusher.py +338 -0
- headroom/utils.py +215 -0
- headroom_ai-0.2.13.dist-info/METADATA +315 -0
- headroom_ai-0.2.13.dist-info/RECORD +114 -0
- headroom_ai-0.2.13.dist-info/WHEEL +4 -0
- headroom_ai-0.2.13.dist-info/entry_points.txt +2 -0
- headroom_ai-0.2.13.dist-info/licenses/LICENSE +190 -0
- headroom_ai-0.2.13.dist-info/licenses/NOTICE +43 -0
|
@@ -0,0 +1,814 @@
|
|
|
1
|
+
"""Compression Store for CCR (Compress-Cache-Retrieve) architecture.
|
|
2
|
+
|
|
3
|
+
This module implements reversible compression: when SmartCrusher compresses
|
|
4
|
+
tool outputs, the original data is cached here for on-demand retrieval.
|
|
5
|
+
|
|
6
|
+
Key insight from research: REVERSIBLE compression beats irreversible compression.
|
|
7
|
+
If the LLM needs data that was compressed away, it can retrieve it instantly.
|
|
8
|
+
|
|
9
|
+
Features:
|
|
10
|
+
- Thread-safe in-memory storage with TTL expiration
|
|
11
|
+
- BM25-based search within cached content
|
|
12
|
+
- Retrieval event tracking for feedback loop
|
|
13
|
+
- Automatic eviction when capacity is reached
|
|
14
|
+
|
|
15
|
+
Usage:
|
|
16
|
+
store = get_compression_store()
|
|
17
|
+
|
|
18
|
+
# Store compressed content
|
|
19
|
+
hash_key = store.store(
|
|
20
|
+
original=original_json,
|
|
21
|
+
compressed=compressed_json,
|
|
22
|
+
original_tokens=1000,
|
|
23
|
+
compressed_tokens=100,
|
|
24
|
+
tool_name="search_api",
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
# Retrieve later
|
|
28
|
+
entry = store.retrieve(hash_key)
|
|
29
|
+
|
|
30
|
+
# Or search within
|
|
31
|
+
results = store.search(hash_key, "user query")
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
from __future__ import annotations
|
|
35
|
+
|
|
36
|
+
import hashlib
|
|
37
|
+
import heapq
|
|
38
|
+
import json
|
|
39
|
+
import logging
|
|
40
|
+
import re
|
|
41
|
+
import threading
|
|
42
|
+
import time
|
|
43
|
+
from dataclasses import dataclass, field, replace
|
|
44
|
+
from typing import Any
|
|
45
|
+
|
|
46
|
+
from ..relevance.bm25 import BM25Scorer
|
|
47
|
+
|
|
48
|
+
logger = logging.getLogger(__name__)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@dataclass
|
|
52
|
+
class CompressionEntry:
|
|
53
|
+
"""A cached compression entry with metadata for retrieval and feedback."""
|
|
54
|
+
|
|
55
|
+
hash: str
|
|
56
|
+
original_content: str
|
|
57
|
+
compressed_content: str
|
|
58
|
+
original_tokens: int
|
|
59
|
+
compressed_tokens: int
|
|
60
|
+
original_item_count: int
|
|
61
|
+
compressed_item_count: int
|
|
62
|
+
tool_name: str | None
|
|
63
|
+
tool_call_id: str | None
|
|
64
|
+
query_context: str | None
|
|
65
|
+
created_at: float
|
|
66
|
+
ttl: int = 300 # 5 minutes default
|
|
67
|
+
|
|
68
|
+
# TOIN integration: Store the tool signature hash for retrieval correlation
|
|
69
|
+
# This MUST match the hash used by SmartCrusher when recording compression
|
|
70
|
+
tool_signature_hash: str | None = None
|
|
71
|
+
compression_strategy: str | None = None # Strategy used for compression
|
|
72
|
+
|
|
73
|
+
# Feedback tracking
|
|
74
|
+
retrieval_count: int = 0
|
|
75
|
+
search_queries: list[str] = field(default_factory=list)
|
|
76
|
+
last_accessed: float | None = None
|
|
77
|
+
|
|
78
|
+
def is_expired(self) -> bool:
|
|
79
|
+
"""Check if this entry has expired."""
|
|
80
|
+
return time.time() - self.created_at > self.ttl
|
|
81
|
+
|
|
82
|
+
def record_access(self, query: str | None = None) -> None:
|
|
83
|
+
"""Record an access to this entry for feedback tracking."""
|
|
84
|
+
self.retrieval_count += 1
|
|
85
|
+
self.last_accessed = time.time()
|
|
86
|
+
if query and query not in self.search_queries:
|
|
87
|
+
self.search_queries.append(query)
|
|
88
|
+
# Keep only last 10 queries
|
|
89
|
+
if len(self.search_queries) > 10:
|
|
90
|
+
self.search_queries = self.search_queries[-10:]
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
@dataclass
|
|
94
|
+
class RetrievalEvent:
|
|
95
|
+
"""Event logged when content is retrieved from cache."""
|
|
96
|
+
|
|
97
|
+
hash: str
|
|
98
|
+
query: str | None
|
|
99
|
+
items_retrieved: int
|
|
100
|
+
total_items: int
|
|
101
|
+
tool_name: str | None
|
|
102
|
+
timestamp: float
|
|
103
|
+
retrieval_type: str # "full" or "search"
|
|
104
|
+
tool_signature_hash: str | None = None # For TOIN correlation
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
class CompressionStore:
|
|
108
|
+
"""Thread-safe store for compressed content with retrieval support.
|
|
109
|
+
|
|
110
|
+
This is the core of the CCR architecture. When SmartCrusher compresses
|
|
111
|
+
an array, the original content is stored here. If the LLM needs more
|
|
112
|
+
data, it can retrieve from this cache instantly.
|
|
113
|
+
|
|
114
|
+
Design principles:
|
|
115
|
+
- Zero external dependencies (pure Python)
|
|
116
|
+
- Thread-safe for concurrent access
|
|
117
|
+
- TTL-based expiration (default 5 minutes)
|
|
118
|
+
- LRU-style eviction when capacity is reached
|
|
119
|
+
- Built-in BM25 search for filtering
|
|
120
|
+
"""
|
|
121
|
+
|
|
122
|
+
def __init__(
|
|
123
|
+
self,
|
|
124
|
+
max_entries: int = 1000,
|
|
125
|
+
default_ttl: int = 300,
|
|
126
|
+
enable_feedback: bool = True,
|
|
127
|
+
):
|
|
128
|
+
"""Initialize the compression store.
|
|
129
|
+
|
|
130
|
+
Args:
|
|
131
|
+
max_entries: Maximum number of entries to store.
|
|
132
|
+
default_ttl: Default TTL in seconds (5 minutes).
|
|
133
|
+
enable_feedback: Whether to track retrieval events.
|
|
134
|
+
"""
|
|
135
|
+
self._store: dict[str, CompressionEntry] = {}
|
|
136
|
+
self._lock = threading.Lock()
|
|
137
|
+
self._max_entries = max_entries
|
|
138
|
+
self._default_ttl = default_ttl
|
|
139
|
+
self._enable_feedback = enable_feedback
|
|
140
|
+
|
|
141
|
+
# Feedback tracking
|
|
142
|
+
self._retrieval_events: list[RetrievalEvent] = []
|
|
143
|
+
self._max_events = 1000 # Keep last 1000 events
|
|
144
|
+
self._pending_feedback_events: list[RetrievalEvent] = []
|
|
145
|
+
|
|
146
|
+
# MEDIUM FIX #16: Use a min-heap for O(log n) eviction instead of O(n)
|
|
147
|
+
# Heap entries are (created_at, hash_key) tuples
|
|
148
|
+
self._eviction_heap: list[tuple[float, str]] = []
|
|
149
|
+
# CRITICAL FIX: Track stale entries count to know when heap cleanup is needed
|
|
150
|
+
self._stale_heap_entries = 0
|
|
151
|
+
# Threshold for triggering heap rebuild (when 50% are stale)
|
|
152
|
+
self._heap_rebuild_threshold = 0.5
|
|
153
|
+
|
|
154
|
+
# BM25 scorer for search
|
|
155
|
+
self._scorer = BM25Scorer()
|
|
156
|
+
|
|
157
|
+
def store(
|
|
158
|
+
self,
|
|
159
|
+
original: str,
|
|
160
|
+
compressed: str,
|
|
161
|
+
*,
|
|
162
|
+
original_tokens: int = 0,
|
|
163
|
+
compressed_tokens: int = 0,
|
|
164
|
+
original_item_count: int = 0,
|
|
165
|
+
compressed_item_count: int = 0,
|
|
166
|
+
tool_name: str | None = None,
|
|
167
|
+
tool_call_id: str | None = None,
|
|
168
|
+
query_context: str | None = None,
|
|
169
|
+
tool_signature_hash: str | None = None,
|
|
170
|
+
compression_strategy: str | None = None,
|
|
171
|
+
ttl: int | None = None,
|
|
172
|
+
) -> str:
|
|
173
|
+
"""Store compressed content and return hash for retrieval.
|
|
174
|
+
|
|
175
|
+
Args:
|
|
176
|
+
original: Original JSON content before compression.
|
|
177
|
+
compressed: Compressed JSON content.
|
|
178
|
+
original_tokens: Token count of original content.
|
|
179
|
+
compressed_tokens: Token count of compressed content.
|
|
180
|
+
original_item_count: Number of items in original array.
|
|
181
|
+
compressed_item_count: Number of items after compression.
|
|
182
|
+
tool_name: Name of the tool that produced this output.
|
|
183
|
+
tool_call_id: ID of the tool call.
|
|
184
|
+
query_context: User query context for relevance matching.
|
|
185
|
+
tool_signature_hash: Hash from ToolSignature for TOIN correlation.
|
|
186
|
+
compression_strategy: Strategy used for compression.
|
|
187
|
+
ttl: Custom TTL in seconds (uses default if not specified).
|
|
188
|
+
|
|
189
|
+
Returns:
|
|
190
|
+
Hash key for retrieving this content.
|
|
191
|
+
"""
|
|
192
|
+
# Generate hash from original content
|
|
193
|
+
# CRITICAL FIX #5: Use 24 chars (96 bits) instead of 16 (64 bits) for better
|
|
194
|
+
# collision resistance. Birthday paradox: 50% collision at sqrt(2^n) entries.
|
|
195
|
+
# - 64 bits: ~4 billion entries for 50% collision
|
|
196
|
+
# - 96 bits: ~280 trillion entries for 50% collision
|
|
197
|
+
hash_key = hashlib.sha256(original.encode()).hexdigest()[:24]
|
|
198
|
+
|
|
199
|
+
entry = CompressionEntry(
|
|
200
|
+
hash=hash_key,
|
|
201
|
+
original_content=original,
|
|
202
|
+
compressed_content=compressed,
|
|
203
|
+
original_tokens=original_tokens,
|
|
204
|
+
compressed_tokens=compressed_tokens,
|
|
205
|
+
original_item_count=original_item_count,
|
|
206
|
+
compressed_item_count=compressed_item_count,
|
|
207
|
+
tool_name=tool_name,
|
|
208
|
+
tool_call_id=tool_call_id,
|
|
209
|
+
query_context=query_context,
|
|
210
|
+
created_at=time.time(),
|
|
211
|
+
ttl=ttl if ttl is not None else self._default_ttl,
|
|
212
|
+
tool_signature_hash=tool_signature_hash,
|
|
213
|
+
compression_strategy=compression_strategy,
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
# Process pending feedback BEFORE acquiring lock for eviction.
|
|
217
|
+
# This ensures feedback from entries about to be evicted is captured.
|
|
218
|
+
if self._enable_feedback:
|
|
219
|
+
self.process_pending_feedback()
|
|
220
|
+
|
|
221
|
+
with self._lock:
|
|
222
|
+
self._evict_if_needed()
|
|
223
|
+
|
|
224
|
+
# CRITICAL FIX: Hash collision detection
|
|
225
|
+
# If hash already exists with DIFFERENT content, log a warning.
|
|
226
|
+
# This indicates either a hash collision or duplicate store calls.
|
|
227
|
+
existing = self._store.get(hash_key)
|
|
228
|
+
if existing is not None:
|
|
229
|
+
if existing.original_content != original:
|
|
230
|
+
# True hash collision - different content, same hash
|
|
231
|
+
# This is extremely rare with SHA256[:24] but should be logged
|
|
232
|
+
logger.warning(
|
|
233
|
+
"Hash collision detected: hash=%s tool=%s (existing_len=%d, new_len=%d)",
|
|
234
|
+
hash_key,
|
|
235
|
+
tool_name,
|
|
236
|
+
len(existing.original_content),
|
|
237
|
+
len(original),
|
|
238
|
+
)
|
|
239
|
+
else:
|
|
240
|
+
# Same content being stored again - this is fine, just update
|
|
241
|
+
logger.debug(
|
|
242
|
+
"Duplicate store for hash=%s, updating entry",
|
|
243
|
+
hash_key,
|
|
244
|
+
)
|
|
245
|
+
# Mark old heap entry as stale since we're replacing
|
|
246
|
+
self._stale_heap_entries += 1
|
|
247
|
+
|
|
248
|
+
self._store[hash_key] = entry
|
|
249
|
+
# MEDIUM FIX #16: Add to eviction heap for O(log n) eviction
|
|
250
|
+
heapq.heappush(self._eviction_heap, (entry.created_at, hash_key))
|
|
251
|
+
|
|
252
|
+
return hash_key
|
|
253
|
+
|
|
254
|
+
def retrieve(
|
|
255
|
+
self,
|
|
256
|
+
hash_key: str,
|
|
257
|
+
query: str | None = None,
|
|
258
|
+
) -> CompressionEntry | None:
|
|
259
|
+
"""Retrieve original content by hash.
|
|
260
|
+
|
|
261
|
+
Args:
|
|
262
|
+
hash_key: Hash key returned by store().
|
|
263
|
+
query: Optional query for feedback tracking.
|
|
264
|
+
|
|
265
|
+
Returns:
|
|
266
|
+
CompressionEntry if found and not expired, None otherwise.
|
|
267
|
+
"""
|
|
268
|
+
with self._lock:
|
|
269
|
+
entry = self._store.get(hash_key)
|
|
270
|
+
|
|
271
|
+
if entry is None:
|
|
272
|
+
return None
|
|
273
|
+
|
|
274
|
+
if entry.is_expired():
|
|
275
|
+
del self._store[hash_key]
|
|
276
|
+
# CRITICAL FIX: Track stale heap entry
|
|
277
|
+
self._stale_heap_entries += 1
|
|
278
|
+
return None
|
|
279
|
+
|
|
280
|
+
# Track access for feedback
|
|
281
|
+
entry.record_access(query)
|
|
282
|
+
|
|
283
|
+
# Log retrieval event
|
|
284
|
+
if self._enable_feedback:
|
|
285
|
+
self._log_retrieval(
|
|
286
|
+
hash_key=hash_key,
|
|
287
|
+
query=query,
|
|
288
|
+
items_retrieved=entry.original_item_count,
|
|
289
|
+
total_items=entry.original_item_count,
|
|
290
|
+
tool_name=entry.tool_name,
|
|
291
|
+
retrieval_type="full",
|
|
292
|
+
tool_signature_hash=entry.tool_signature_hash,
|
|
293
|
+
)
|
|
294
|
+
|
|
295
|
+
# CRITICAL: Make a deep copy to return
|
|
296
|
+
# (entry could be modified/evicted after lock release)
|
|
297
|
+
# The entry contains mutable fields (search_queries list) that must be copied
|
|
298
|
+
result_entry = replace(entry, search_queries=list(entry.search_queries))
|
|
299
|
+
|
|
300
|
+
# Process feedback immediately to ensure TOIN learns in real-time
|
|
301
|
+
if self._enable_feedback:
|
|
302
|
+
self.process_pending_feedback()
|
|
303
|
+
|
|
304
|
+
return result_entry
|
|
305
|
+
|
|
306
|
+
def get_metadata(
|
|
307
|
+
self,
|
|
308
|
+
hash_key: str,
|
|
309
|
+
) -> dict[str, Any] | None:
|
|
310
|
+
"""Get metadata about a stored entry without retrieving full content.
|
|
311
|
+
|
|
312
|
+
Useful for context tracking to know what was compressed without
|
|
313
|
+
fetching the entire original content.
|
|
314
|
+
|
|
315
|
+
Args:
|
|
316
|
+
hash_key: Hash key returned by store().
|
|
317
|
+
|
|
318
|
+
Returns:
|
|
319
|
+
Dict with metadata if found and not expired, None otherwise.
|
|
320
|
+
"""
|
|
321
|
+
with self._lock:
|
|
322
|
+
entry = self._store.get(hash_key)
|
|
323
|
+
|
|
324
|
+
if entry is None:
|
|
325
|
+
return None
|
|
326
|
+
|
|
327
|
+
if entry.is_expired():
|
|
328
|
+
del self._store[hash_key]
|
|
329
|
+
self._stale_heap_entries += 1
|
|
330
|
+
return None
|
|
331
|
+
|
|
332
|
+
return {
|
|
333
|
+
"hash": entry.hash,
|
|
334
|
+
"tool_name": entry.tool_name,
|
|
335
|
+
"original_item_count": entry.original_item_count,
|
|
336
|
+
"compressed_item_count": entry.compressed_item_count,
|
|
337
|
+
"query_context": entry.query_context,
|
|
338
|
+
"compressed_content": entry.compressed_content,
|
|
339
|
+
"created_at": entry.created_at,
|
|
340
|
+
"ttl": entry.ttl,
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
def search(
|
|
344
|
+
self,
|
|
345
|
+
hash_key: str,
|
|
346
|
+
query: str,
|
|
347
|
+
max_results: int = 20,
|
|
348
|
+
score_threshold: float = 0.3,
|
|
349
|
+
) -> list[dict[str, Any]]:
|
|
350
|
+
"""Search within cached content using BM25.
|
|
351
|
+
|
|
352
|
+
Args:
|
|
353
|
+
hash_key: Hash key of cached content.
|
|
354
|
+
query: Search query.
|
|
355
|
+
max_results: Maximum number of results to return.
|
|
356
|
+
score_threshold: Minimum BM25 score to include.
|
|
357
|
+
|
|
358
|
+
Returns:
|
|
359
|
+
List of matching items from original content.
|
|
360
|
+
"""
|
|
361
|
+
# Get entry without logging (we'll log the search separately)
|
|
362
|
+
entry = self._get_entry_for_search(hash_key, query)
|
|
363
|
+
if entry is None:
|
|
364
|
+
return []
|
|
365
|
+
|
|
366
|
+
try:
|
|
367
|
+
items = json.loads(entry.original_content)
|
|
368
|
+
if not isinstance(items, list):
|
|
369
|
+
return []
|
|
370
|
+
except json.JSONDecodeError:
|
|
371
|
+
return []
|
|
372
|
+
|
|
373
|
+
if not items:
|
|
374
|
+
return []
|
|
375
|
+
|
|
376
|
+
# Score each item using BM25
|
|
377
|
+
item_strs = [json.dumps(item, default=str) for item in items]
|
|
378
|
+
scores = self._scorer.score_batch(item_strs, query)
|
|
379
|
+
|
|
380
|
+
# Filter and sort by score
|
|
381
|
+
scored_items = [
|
|
382
|
+
(items[i], scores[i].score)
|
|
383
|
+
for i in range(len(items))
|
|
384
|
+
if scores[i].score >= score_threshold
|
|
385
|
+
]
|
|
386
|
+
scored_items.sort(key=lambda x: x[1], reverse=True)
|
|
387
|
+
|
|
388
|
+
results = [item for item, _ in scored_items[:max_results]]
|
|
389
|
+
|
|
390
|
+
# Log retrieval event
|
|
391
|
+
if self._enable_feedback:
|
|
392
|
+
with self._lock:
|
|
393
|
+
self._log_retrieval(
|
|
394
|
+
hash_key=hash_key,
|
|
395
|
+
query=query,
|
|
396
|
+
items_retrieved=len(results),
|
|
397
|
+
total_items=len(items),
|
|
398
|
+
tool_name=entry.tool_name,
|
|
399
|
+
retrieval_type="search",
|
|
400
|
+
tool_signature_hash=entry.tool_signature_hash,
|
|
401
|
+
)
|
|
402
|
+
# Process feedback immediately to ensure TOIN learns in real-time
|
|
403
|
+
self.process_pending_feedback()
|
|
404
|
+
|
|
405
|
+
return results
|
|
406
|
+
|
|
407
|
+
def _get_entry_for_search(
|
|
408
|
+
self,
|
|
409
|
+
hash_key: str,
|
|
410
|
+
query: str | None = None,
|
|
411
|
+
) -> CompressionEntry | None:
|
|
412
|
+
"""Get entry without logging retrieval (used by search to avoid double-logging).
|
|
413
|
+
|
|
414
|
+
CRITICAL FIX #4: Returns a copy of the entry to prevent race conditions.
|
|
415
|
+
The caller may use the entry after we release the lock, and another thread
|
|
416
|
+
could modify or evict the original entry.
|
|
417
|
+
|
|
418
|
+
Args:
|
|
419
|
+
hash_key: Hash key returned by store().
|
|
420
|
+
query: Optional query for access tracking.
|
|
421
|
+
|
|
422
|
+
Returns:
|
|
423
|
+
CompressionEntry copy if found and not expired, None otherwise.
|
|
424
|
+
"""
|
|
425
|
+
with self._lock:
|
|
426
|
+
entry = self._store.get(hash_key)
|
|
427
|
+
|
|
428
|
+
if entry is None:
|
|
429
|
+
return None
|
|
430
|
+
|
|
431
|
+
if entry.is_expired():
|
|
432
|
+
del self._store[hash_key]
|
|
433
|
+
# CRITICAL FIX: Track stale heap entry
|
|
434
|
+
self._stale_heap_entries += 1
|
|
435
|
+
return None
|
|
436
|
+
|
|
437
|
+
# Track access but don't log retrieval event (search will log separately)
|
|
438
|
+
entry.record_access(query)
|
|
439
|
+
|
|
440
|
+
# CRITICAL FIX #4: Return a copy to prevent race conditions
|
|
441
|
+
# The entry contains mutable fields (search_queries list) that could be
|
|
442
|
+
# modified by other threads after we release the lock
|
|
443
|
+
return replace(entry, search_queries=list(entry.search_queries))
|
|
444
|
+
|
|
445
|
+
def exists(self, hash_key: str, clean_expired: bool = False) -> bool:
|
|
446
|
+
"""Check if a hash key exists and is not expired.
|
|
447
|
+
|
|
448
|
+
Args:
|
|
449
|
+
hash_key: The hash key to check.
|
|
450
|
+
clean_expired: If True, delete the entry if expired.
|
|
451
|
+
LOW FIX #20: Default False to make this a pure check.
|
|
452
|
+
|
|
453
|
+
Returns:
|
|
454
|
+
True if the entry exists and is not expired.
|
|
455
|
+
"""
|
|
456
|
+
with self._lock:
|
|
457
|
+
entry = self._store.get(hash_key)
|
|
458
|
+
if entry is None:
|
|
459
|
+
return False
|
|
460
|
+
if entry.is_expired():
|
|
461
|
+
# LOW FIX #20: Only delete if explicitly requested
|
|
462
|
+
# This makes exists() a pure check by default
|
|
463
|
+
if clean_expired:
|
|
464
|
+
del self._store[hash_key]
|
|
465
|
+
# CRITICAL FIX: Track stale heap entry
|
|
466
|
+
self._stale_heap_entries += 1
|
|
467
|
+
return False
|
|
468
|
+
return True
|
|
469
|
+
|
|
470
|
+
def get_stats(self) -> dict[str, Any]:
|
|
471
|
+
"""Get store statistics for monitoring."""
|
|
472
|
+
with self._lock:
|
|
473
|
+
# Clean expired entries
|
|
474
|
+
self._clean_expired()
|
|
475
|
+
|
|
476
|
+
total_original_tokens = sum(e.original_tokens for e in self._store.values())
|
|
477
|
+
total_compressed_tokens = sum(e.compressed_tokens for e in self._store.values())
|
|
478
|
+
total_retrievals = sum(e.retrieval_count for e in self._store.values())
|
|
479
|
+
|
|
480
|
+
return {
|
|
481
|
+
"entry_count": len(self._store),
|
|
482
|
+
"max_entries": self._max_entries,
|
|
483
|
+
"total_original_tokens": total_original_tokens,
|
|
484
|
+
"total_compressed_tokens": total_compressed_tokens,
|
|
485
|
+
"total_retrievals": total_retrievals,
|
|
486
|
+
"event_count": len(self._retrieval_events),
|
|
487
|
+
}
|
|
488
|
+
|
|
489
|
+
def get_retrieval_events(
|
|
490
|
+
self,
|
|
491
|
+
limit: int = 100,
|
|
492
|
+
tool_name: str | None = None,
|
|
493
|
+
) -> list[RetrievalEvent]:
|
|
494
|
+
"""Get recent retrieval events for feedback analysis.
|
|
495
|
+
|
|
496
|
+
Args:
|
|
497
|
+
limit: Maximum number of events to return.
|
|
498
|
+
tool_name: Filter by tool name if specified.
|
|
499
|
+
|
|
500
|
+
Returns:
|
|
501
|
+
List of recent retrieval events (copies to prevent mutation).
|
|
502
|
+
"""
|
|
503
|
+
with self._lock:
|
|
504
|
+
# MEDIUM FIX #17: Take a slice copy immediately to avoid race conditions
|
|
505
|
+
# if another thread modifies _retrieval_events after we release the lock
|
|
506
|
+
events_copy = list(self._retrieval_events)
|
|
507
|
+
|
|
508
|
+
# Filter and slice outside lock (safe since we have a copy)
|
|
509
|
+
if tool_name:
|
|
510
|
+
events_copy = [e for e in events_copy if e.tool_name == tool_name]
|
|
511
|
+
|
|
512
|
+
return list(reversed(events_copy[-limit:]))
|
|
513
|
+
|
|
514
|
+
def clear(self) -> None:
|
|
515
|
+
"""Clear all entries. Mainly for testing."""
|
|
516
|
+
with self._lock:
|
|
517
|
+
self._store.clear()
|
|
518
|
+
self._retrieval_events.clear()
|
|
519
|
+
self._pending_feedback_events.clear()
|
|
520
|
+
self._eviction_heap.clear() # MEDIUM FIX #16: Clear heap too
|
|
521
|
+
self._stale_heap_entries = 0 # CRITICAL FIX: Reset stale counter
|
|
522
|
+
|
|
523
|
+
def _evict_if_needed(self) -> None:
|
|
524
|
+
"""Evict old entries if at capacity. Must be called with lock held.
|
|
525
|
+
|
|
526
|
+
MEDIUM FIX #16: Use heap for O(log n) eviction instead of O(n) scan.
|
|
527
|
+
CRITICAL FIX: Track and clean stale heap entries to prevent memory leak.
|
|
528
|
+
"""
|
|
529
|
+
# First, remove expired entries
|
|
530
|
+
self._clean_expired()
|
|
531
|
+
|
|
532
|
+
# CRITICAL FIX: Rebuild heap if too many stale entries
|
|
533
|
+
# This prevents unbounded heap growth when entries are deleted/replaced
|
|
534
|
+
heap_size = len(self._eviction_heap)
|
|
535
|
+
if heap_size > 0:
|
|
536
|
+
stale_ratio = self._stale_heap_entries / heap_size
|
|
537
|
+
if stale_ratio >= self._heap_rebuild_threshold:
|
|
538
|
+
self._rebuild_heap()
|
|
539
|
+
|
|
540
|
+
# If still at capacity, remove oldest entries using heap
|
|
541
|
+
while len(self._store) >= self._max_entries and self._eviction_heap:
|
|
542
|
+
# Pop oldest from heap (O(log n))
|
|
543
|
+
created_at, hash_key = heapq.heappop(self._eviction_heap)
|
|
544
|
+
|
|
545
|
+
# Check if entry still exists and matches timestamp
|
|
546
|
+
# (entry might have been deleted or replaced)
|
|
547
|
+
entry = self._store.get(hash_key)
|
|
548
|
+
if entry is not None and entry.created_at == created_at:
|
|
549
|
+
# HIGH FIX: Track eviction as "successful compression" if never retrieved
|
|
550
|
+
# This prevents state divergence between store and feedback loop
|
|
551
|
+
if self._enable_feedback and entry.retrieval_count == 0:
|
|
552
|
+
# Entry was never retrieved = compression was successful
|
|
553
|
+
# Notify feedback system so it knows this strategy worked
|
|
554
|
+
self._record_eviction_success(entry)
|
|
555
|
+
del self._store[hash_key]
|
|
556
|
+
else:
|
|
557
|
+
# CRITICAL FIX: This was a stale entry, decrement counter
|
|
558
|
+
# (we already popped it, so the stale entry is now gone)
|
|
559
|
+
if self._stale_heap_entries > 0:
|
|
560
|
+
self._stale_heap_entries -= 1
|
|
561
|
+
|
|
562
|
+
def _clean_expired(self) -> None:
|
|
563
|
+
"""Remove expired entries. Must be called with lock held.
|
|
564
|
+
|
|
565
|
+
CRITICAL FIX: Track stale heap entries when deleting to prevent memory leak.
|
|
566
|
+
"""
|
|
567
|
+
expired_keys = [key for key, entry in self._store.items() if entry.is_expired()]
|
|
568
|
+
for key in expired_keys:
|
|
569
|
+
del self._store[key]
|
|
570
|
+
# CRITICAL FIX: Increment stale counter - the heap still has an entry
|
|
571
|
+
# for this key that will be stale when we try to evict
|
|
572
|
+
self._stale_heap_entries += 1
|
|
573
|
+
|
|
574
|
+
def _rebuild_heap(self) -> None:
|
|
575
|
+
"""Rebuild heap from current store entries. Must be called with lock held.
|
|
576
|
+
|
|
577
|
+
CRITICAL FIX: This removes stale heap entries that accumulate when entries
|
|
578
|
+
are deleted or replaced. Without this, the heap grows unboundedly.
|
|
579
|
+
"""
|
|
580
|
+
# Build new heap from current store entries only
|
|
581
|
+
self._eviction_heap = [
|
|
582
|
+
(entry.created_at, hash_key) for hash_key, entry in self._store.items()
|
|
583
|
+
]
|
|
584
|
+
heapq.heapify(self._eviction_heap)
|
|
585
|
+
# Reset stale counter - heap is now clean
|
|
586
|
+
self._stale_heap_entries = 0
|
|
587
|
+
logger.debug(
|
|
588
|
+
"Rebuilt eviction heap: %d entries",
|
|
589
|
+
len(self._eviction_heap),
|
|
590
|
+
)
|
|
591
|
+
|
|
592
|
+
def _record_eviction_success(self, entry: CompressionEntry) -> None:
|
|
593
|
+
"""Record successful compression when an entry is evicted without retrieval.
|
|
594
|
+
|
|
595
|
+
HIGH FIX: State divergence on eviction
|
|
596
|
+
When an entry is evicted and was NEVER retrieved, this indicates the
|
|
597
|
+
compression was fully successful - the LLM never needed the original data.
|
|
598
|
+
We notify the feedback system so it can learn from this success.
|
|
599
|
+
|
|
600
|
+
Must be called with lock held (entry data access).
|
|
601
|
+
Actual feedback notification happens outside lock.
|
|
602
|
+
|
|
603
|
+
Args:
|
|
604
|
+
entry: The entry being evicted.
|
|
605
|
+
"""
|
|
606
|
+
# Capture entry data while we have the lock
|
|
607
|
+
tool_name = entry.tool_name
|
|
608
|
+
sig_hash = entry.tool_signature_hash
|
|
609
|
+
strategy = entry.compression_strategy
|
|
610
|
+
|
|
611
|
+
# We can't call feedback while holding the lock (would cause deadlock)
|
|
612
|
+
# Instead, queue this for deferred processing
|
|
613
|
+
if sig_hash is not None and strategy is not None:
|
|
614
|
+
# Create a synthetic "success" event that we'll process later
|
|
615
|
+
# Use a special retrieval type to indicate this was an eviction success
|
|
616
|
+
success_event = RetrievalEvent(
|
|
617
|
+
hash=entry.hash,
|
|
618
|
+
query=None,
|
|
619
|
+
items_retrieved=0, # No retrieval happened
|
|
620
|
+
total_items=entry.original_item_count,
|
|
621
|
+
tool_name=tool_name,
|
|
622
|
+
timestamp=time.time(),
|
|
623
|
+
retrieval_type="eviction_success", # Special marker
|
|
624
|
+
tool_signature_hash=sig_hash,
|
|
625
|
+
)
|
|
626
|
+
self._pending_feedback_events.append(success_event)
|
|
627
|
+
logger.debug(
|
|
628
|
+
"Recorded eviction success: hash=%s strategy=%s",
|
|
629
|
+
entry.hash[:8],
|
|
630
|
+
strategy,
|
|
631
|
+
)
|
|
632
|
+
|
|
633
|
+
def _log_retrieval(
|
|
634
|
+
self,
|
|
635
|
+
hash_key: str,
|
|
636
|
+
query: str | None,
|
|
637
|
+
items_retrieved: int,
|
|
638
|
+
total_items: int,
|
|
639
|
+
tool_name: str | None,
|
|
640
|
+
retrieval_type: str,
|
|
641
|
+
tool_signature_hash: str | None = None,
|
|
642
|
+
) -> None:
|
|
643
|
+
"""Log a retrieval event. Must be called with lock held."""
|
|
644
|
+
event = RetrievalEvent(
|
|
645
|
+
hash=hash_key,
|
|
646
|
+
query=query,
|
|
647
|
+
items_retrieved=items_retrieved,
|
|
648
|
+
total_items=total_items,
|
|
649
|
+
tool_name=tool_name,
|
|
650
|
+
timestamp=time.time(),
|
|
651
|
+
retrieval_type=retrieval_type,
|
|
652
|
+
tool_signature_hash=tool_signature_hash,
|
|
653
|
+
)
|
|
654
|
+
|
|
655
|
+
self._retrieval_events.append(event)
|
|
656
|
+
|
|
657
|
+
# Keep only recent events
|
|
658
|
+
if len(self._retrieval_events) > self._max_events:
|
|
659
|
+
self._retrieval_events = self._retrieval_events[-self._max_events :]
|
|
660
|
+
|
|
661
|
+
# Queue event for feedback processing (will be processed after lock release)
|
|
662
|
+
# This is safe because process_pending_feedback() uses the lock to atomically
|
|
663
|
+
# swap out the pending list before processing
|
|
664
|
+
self._pending_feedback_events.append(event)
|
|
665
|
+
|
|
666
|
+
def process_pending_feedback(self) -> None:
|
|
667
|
+
"""Process pending feedback events.
|
|
668
|
+
|
|
669
|
+
Forwards events to:
|
|
670
|
+
1. CompressionFeedback - for learning compression hints
|
|
671
|
+
2. TelemetryCollector - for the data flywheel
|
|
672
|
+
3. TOIN - for cross-user intelligence network
|
|
673
|
+
|
|
674
|
+
This is called automatically on each retrieval to ensure the
|
|
675
|
+
feedback loop operates in real-time.
|
|
676
|
+
"""
|
|
677
|
+
from ..telemetry import get_telemetry_collector
|
|
678
|
+
from ..telemetry.toin import get_toin
|
|
679
|
+
from .compression_feedback import get_compression_feedback
|
|
680
|
+
|
|
681
|
+
# Get pending events and related entry data atomically
|
|
682
|
+
with self._lock:
|
|
683
|
+
events = self._pending_feedback_events
|
|
684
|
+
self._pending_feedback_events = []
|
|
685
|
+
|
|
686
|
+
# Gather entry data while holding lock to avoid race conditions
|
|
687
|
+
# Tuple: (event, tool_name, sig_hash, strategy, compressed_content)
|
|
688
|
+
event_data: list[
|
|
689
|
+
tuple[RetrievalEvent, str | None, str | None, str | None, str | None]
|
|
690
|
+
] = []
|
|
691
|
+
for event in events:
|
|
692
|
+
entry = self._store.get(event.hash)
|
|
693
|
+
if entry:
|
|
694
|
+
# Use the ACTUAL tool_signature_hash stored during compression
|
|
695
|
+
# This MUST match the hash used by SmartCrusher
|
|
696
|
+
event_data.append(
|
|
697
|
+
(
|
|
698
|
+
event,
|
|
699
|
+
entry.tool_name,
|
|
700
|
+
entry.tool_signature_hash, # The correct hash!
|
|
701
|
+
entry.compression_strategy,
|
|
702
|
+
entry.compressed_content, # For TOIN field-level learning
|
|
703
|
+
)
|
|
704
|
+
)
|
|
705
|
+
else:
|
|
706
|
+
event_data.append((event, None, None, None, None))
|
|
707
|
+
|
|
708
|
+
# Process outside lock
|
|
709
|
+
if event_data:
|
|
710
|
+
feedback = get_compression_feedback()
|
|
711
|
+
telemetry = get_telemetry_collector()
|
|
712
|
+
toin = get_toin()
|
|
713
|
+
|
|
714
|
+
for event, _tool_name, sig_hash, strategy, compressed_content in event_data:
|
|
715
|
+
# Notify feedback system (pass strategy for success rate tracking)
|
|
716
|
+
feedback.record_retrieval(event, strategy=strategy)
|
|
717
|
+
|
|
718
|
+
# Extract query fields if present
|
|
719
|
+
query_fields = None
|
|
720
|
+
if event.query:
|
|
721
|
+
# Extract field:value patterns
|
|
722
|
+
query_fields = re.findall(r"(\w+)[=:]", event.query)
|
|
723
|
+
|
|
724
|
+
# Notify telemetry for data flywheel
|
|
725
|
+
try:
|
|
726
|
+
if sig_hash is not None:
|
|
727
|
+
telemetry.record_retrieval(
|
|
728
|
+
tool_signature_hash=sig_hash,
|
|
729
|
+
retrieval_type=event.retrieval_type,
|
|
730
|
+
query_fields=query_fields,
|
|
731
|
+
)
|
|
732
|
+
except Exception:
|
|
733
|
+
# Telemetry should never break the feedback loop
|
|
734
|
+
logger.debug("Telemetry record_retrieval failed", exc_info=True)
|
|
735
|
+
|
|
736
|
+
# Parse compressed content to extract items for TOIN field-level learning
|
|
737
|
+
retrieved_items: list[dict[str, Any]] | None = None
|
|
738
|
+
if compressed_content:
|
|
739
|
+
try:
|
|
740
|
+
parsed = json.loads(compressed_content)
|
|
741
|
+
# Handle both direct arrays and wrapped arrays
|
|
742
|
+
if isinstance(parsed, list):
|
|
743
|
+
# Filter to dicts only (field learning needs dict items)
|
|
744
|
+
retrieved_items = [item for item in parsed if isinstance(item, dict)]
|
|
745
|
+
elif isinstance(parsed, dict):
|
|
746
|
+
# Check for common wrapper patterns: {"items": [...], "results": [...]}
|
|
747
|
+
for key in ("items", "results", "data", "records"):
|
|
748
|
+
if key in parsed and isinstance(parsed[key], list):
|
|
749
|
+
retrieved_items = [
|
|
750
|
+
item for item in parsed[key] if isinstance(item, dict)
|
|
751
|
+
]
|
|
752
|
+
break
|
|
753
|
+
except (json.JSONDecodeError, TypeError):
|
|
754
|
+
# Invalid JSON - skip field learning for this retrieval
|
|
755
|
+
pass
|
|
756
|
+
|
|
757
|
+
# Notify TOIN for cross-user learning
|
|
758
|
+
try:
|
|
759
|
+
if sig_hash is not None:
|
|
760
|
+
toin.record_retrieval(
|
|
761
|
+
tool_signature_hash=sig_hash,
|
|
762
|
+
retrieval_type=event.retrieval_type,
|
|
763
|
+
query=event.query,
|
|
764
|
+
query_fields=query_fields,
|
|
765
|
+
strategy=strategy, # Pass strategy for success rate tracking
|
|
766
|
+
retrieved_items=retrieved_items, # For field-level learning
|
|
767
|
+
)
|
|
768
|
+
except Exception:
|
|
769
|
+
# TOIN should never break the feedback loop
|
|
770
|
+
logger.debug("TOIN record_retrieval failed", exc_info=True)
|
|
771
|
+
|
|
772
|
+
|
|
773
|
+
# Global store instance (lazy initialization)
|
|
774
|
+
_compression_store: CompressionStore | None = None
|
|
775
|
+
_store_lock = threading.Lock()
|
|
776
|
+
|
|
777
|
+
|
|
778
|
+
def get_compression_store(
|
|
779
|
+
max_entries: int = 1000,
|
|
780
|
+
default_ttl: int = 300,
|
|
781
|
+
) -> CompressionStore:
|
|
782
|
+
"""Get the global compression store instance.
|
|
783
|
+
|
|
784
|
+
Uses lazy initialization with singleton pattern.
|
|
785
|
+
|
|
786
|
+
Args:
|
|
787
|
+
max_entries: Maximum entries (only used on first call).
|
|
788
|
+
default_ttl: Default TTL (only used on first call).
|
|
789
|
+
|
|
790
|
+
Returns:
|
|
791
|
+
Global CompressionStore instance.
|
|
792
|
+
"""
|
|
793
|
+
global _compression_store
|
|
794
|
+
|
|
795
|
+
if _compression_store is None:
|
|
796
|
+
with _store_lock:
|
|
797
|
+
# Double-check after acquiring lock
|
|
798
|
+
if _compression_store is None:
|
|
799
|
+
_compression_store = CompressionStore(
|
|
800
|
+
max_entries=max_entries,
|
|
801
|
+
default_ttl=default_ttl,
|
|
802
|
+
)
|
|
803
|
+
|
|
804
|
+
return _compression_store
|
|
805
|
+
|
|
806
|
+
|
|
807
|
+
def reset_compression_store() -> None:
|
|
808
|
+
"""Reset the global compression store. Mainly for testing."""
|
|
809
|
+
global _compression_store
|
|
810
|
+
|
|
811
|
+
with _store_lock:
|
|
812
|
+
if _compression_store is not None:
|
|
813
|
+
_compression_store.clear()
|
|
814
|
+
_compression_store = None
|