headroom-ai 0.2.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- headroom/__init__.py +212 -0
- headroom/cache/__init__.py +76 -0
- headroom/cache/anthropic.py +517 -0
- headroom/cache/base.py +342 -0
- headroom/cache/compression_feedback.py +613 -0
- headroom/cache/compression_store.py +814 -0
- headroom/cache/dynamic_detector.py +1026 -0
- headroom/cache/google.py +884 -0
- headroom/cache/openai.py +584 -0
- headroom/cache/registry.py +175 -0
- headroom/cache/semantic.py +451 -0
- headroom/ccr/__init__.py +77 -0
- headroom/ccr/context_tracker.py +582 -0
- headroom/ccr/mcp_server.py +319 -0
- headroom/ccr/response_handler.py +772 -0
- headroom/ccr/tool_injection.py +415 -0
- headroom/cli.py +219 -0
- headroom/client.py +977 -0
- headroom/compression/__init__.py +42 -0
- headroom/compression/detector.py +424 -0
- headroom/compression/handlers/__init__.py +22 -0
- headroom/compression/handlers/base.py +219 -0
- headroom/compression/handlers/code_handler.py +506 -0
- headroom/compression/handlers/json_handler.py +418 -0
- headroom/compression/masks.py +345 -0
- headroom/compression/universal.py +465 -0
- headroom/config.py +474 -0
- headroom/exceptions.py +192 -0
- headroom/integrations/__init__.py +159 -0
- headroom/integrations/agno/__init__.py +53 -0
- headroom/integrations/agno/hooks.py +345 -0
- headroom/integrations/agno/model.py +625 -0
- headroom/integrations/agno/providers.py +154 -0
- headroom/integrations/langchain/__init__.py +106 -0
- headroom/integrations/langchain/agents.py +326 -0
- headroom/integrations/langchain/chat_model.py +1002 -0
- headroom/integrations/langchain/langsmith.py +324 -0
- headroom/integrations/langchain/memory.py +319 -0
- headroom/integrations/langchain/providers.py +200 -0
- headroom/integrations/langchain/retriever.py +371 -0
- headroom/integrations/langchain/streaming.py +341 -0
- headroom/integrations/mcp/__init__.py +37 -0
- headroom/integrations/mcp/server.py +533 -0
- headroom/memory/__init__.py +37 -0
- headroom/memory/extractor.py +390 -0
- headroom/memory/fast_store.py +621 -0
- headroom/memory/fast_wrapper.py +311 -0
- headroom/memory/inline_extractor.py +229 -0
- headroom/memory/store.py +434 -0
- headroom/memory/worker.py +260 -0
- headroom/memory/wrapper.py +321 -0
- headroom/models/__init__.py +39 -0
- headroom/models/registry.py +687 -0
- headroom/parser.py +293 -0
- headroom/pricing/__init__.py +51 -0
- headroom/pricing/anthropic_prices.py +81 -0
- headroom/pricing/litellm_pricing.py +113 -0
- headroom/pricing/openai_prices.py +91 -0
- headroom/pricing/registry.py +188 -0
- headroom/providers/__init__.py +61 -0
- headroom/providers/anthropic.py +621 -0
- headroom/providers/base.py +131 -0
- headroom/providers/cohere.py +362 -0
- headroom/providers/google.py +427 -0
- headroom/providers/litellm.py +297 -0
- headroom/providers/openai.py +566 -0
- headroom/providers/openai_compatible.py +521 -0
- headroom/proxy/__init__.py +19 -0
- headroom/proxy/server.py +2683 -0
- headroom/py.typed +0 -0
- headroom/relevance/__init__.py +124 -0
- headroom/relevance/base.py +106 -0
- headroom/relevance/bm25.py +255 -0
- headroom/relevance/embedding.py +255 -0
- headroom/relevance/hybrid.py +259 -0
- headroom/reporting/__init__.py +5 -0
- headroom/reporting/generator.py +549 -0
- headroom/storage/__init__.py +41 -0
- headroom/storage/base.py +125 -0
- headroom/storage/jsonl.py +220 -0
- headroom/storage/sqlite.py +289 -0
- headroom/telemetry/__init__.py +91 -0
- headroom/telemetry/collector.py +764 -0
- headroom/telemetry/models.py +880 -0
- headroom/telemetry/toin.py +1579 -0
- headroom/tokenizer.py +80 -0
- headroom/tokenizers/__init__.py +75 -0
- headroom/tokenizers/base.py +210 -0
- headroom/tokenizers/estimator.py +198 -0
- headroom/tokenizers/huggingface.py +317 -0
- headroom/tokenizers/mistral.py +245 -0
- headroom/tokenizers/registry.py +398 -0
- headroom/tokenizers/tiktoken_counter.py +248 -0
- headroom/transforms/__init__.py +106 -0
- headroom/transforms/base.py +57 -0
- headroom/transforms/cache_aligner.py +357 -0
- headroom/transforms/code_compressor.py +1313 -0
- headroom/transforms/content_detector.py +335 -0
- headroom/transforms/content_router.py +1158 -0
- headroom/transforms/llmlingua_compressor.py +638 -0
- headroom/transforms/log_compressor.py +529 -0
- headroom/transforms/pipeline.py +297 -0
- headroom/transforms/rolling_window.py +350 -0
- headroom/transforms/search_compressor.py +365 -0
- headroom/transforms/smart_crusher.py +2682 -0
- headroom/transforms/text_compressor.py +259 -0
- headroom/transforms/tool_crusher.py +338 -0
- headroom/utils.py +215 -0
- headroom_ai-0.2.13.dist-info/METADATA +315 -0
- headroom_ai-0.2.13.dist-info/RECORD +114 -0
- headroom_ai-0.2.13.dist-info/WHEEL +4 -0
- headroom_ai-0.2.13.dist-info/entry_points.txt +2 -0
- headroom_ai-0.2.13.dist-info/licenses/LICENSE +190 -0
- headroom_ai-0.2.13.dist-info/licenses/NOTICE +43 -0
|
@@ -0,0 +1,613 @@
|
|
|
1
|
+
"""Compression Feedback Loop for learning optimal compression strategies.
|
|
2
|
+
|
|
3
|
+
This module analyzes retrieval patterns from the CompressionStore to learn
|
|
4
|
+
what kinds of compression work well and what doesn't. It provides hints to
|
|
5
|
+
SmartCrusher to improve compression over time.
|
|
6
|
+
|
|
7
|
+
Key insight from ACON research: Learn compression guidelines by analyzing failures.
|
|
8
|
+
When compression causes the LLM to retrieve more data, that's a signal that
|
|
9
|
+
we compressed too aggressively.
|
|
10
|
+
|
|
11
|
+
Features:
|
|
12
|
+
- Track retrieval rates per tool type
|
|
13
|
+
- Learn common search queries for each tool
|
|
14
|
+
- Adjust compression aggressiveness based on patterns
|
|
15
|
+
- Provide hints: max_items, fields to preserve, etc.
|
|
16
|
+
|
|
17
|
+
Usage:
|
|
18
|
+
feedback = CompressionFeedback(compression_store)
|
|
19
|
+
|
|
20
|
+
# Get hints before compressing
|
|
21
|
+
hints = feedback.get_compression_hints("github_search_repos")
|
|
22
|
+
# hints = {"max_items": 50, "preserve_fields": ["id", "name"], ...}
|
|
23
|
+
|
|
24
|
+
# Apply hints in SmartCrusher config
|
|
25
|
+
config = SmartCrusherConfig(max_items=hints.get("max_items", 15))
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
from __future__ import annotations
|
|
29
|
+
|
|
30
|
+
import re
|
|
31
|
+
import threading
|
|
32
|
+
import time
|
|
33
|
+
from dataclasses import dataclass, field
|
|
34
|
+
from typing import TYPE_CHECKING, Any
|
|
35
|
+
|
|
36
|
+
if TYPE_CHECKING:
|
|
37
|
+
from .compression_store import CompressionStore, RetrievalEvent
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@dataclass
|
|
41
|
+
class LocalToolPattern:
|
|
42
|
+
"""Learned patterns for a specific tool type (local feedback).
|
|
43
|
+
|
|
44
|
+
MEDIUM FIX #18: Renamed from ToolPattern to avoid confusion with
|
|
45
|
+
headroom.telemetry.toin.ToolPattern which serves a different purpose:
|
|
46
|
+
- LocalToolPattern: Local feedback patterns keyed by tool_name
|
|
47
|
+
- toin.ToolPattern: Cross-user TOIN patterns keyed by tool_signature_hash
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
tool_name: str
|
|
51
|
+
|
|
52
|
+
# Retrieval statistics
|
|
53
|
+
total_compressions: int = 0
|
|
54
|
+
total_retrievals: int = 0
|
|
55
|
+
full_retrievals: int = 0 # Retrieved entire original content
|
|
56
|
+
search_retrievals: int = 0 # Used search within content
|
|
57
|
+
|
|
58
|
+
# Query analysis
|
|
59
|
+
common_queries: dict[str, int] = field(default_factory=dict)
|
|
60
|
+
queried_fields: dict[str, int] = field(default_factory=dict)
|
|
61
|
+
|
|
62
|
+
# Strategy analysis - track which strategies work for this tool
|
|
63
|
+
strategy_compressions: dict[str, int] = field(default_factory=dict)
|
|
64
|
+
strategy_retrievals: dict[str, int] = field(default_factory=dict)
|
|
65
|
+
|
|
66
|
+
# Signature hash tracking - correlate with TOIN patterns
|
|
67
|
+
signature_hashes: set[str] = field(default_factory=set)
|
|
68
|
+
|
|
69
|
+
# Timing
|
|
70
|
+
last_compression: float = 0.0
|
|
71
|
+
last_retrieval: float = 0.0
|
|
72
|
+
|
|
73
|
+
# Calculated metrics
|
|
74
|
+
@property
|
|
75
|
+
def retrieval_rate(self) -> float:
|
|
76
|
+
"""Fraction of compressions that resulted in retrieval."""
|
|
77
|
+
if self.total_compressions == 0:
|
|
78
|
+
return 0.0
|
|
79
|
+
return self.total_retrievals / self.total_compressions
|
|
80
|
+
|
|
81
|
+
@property
|
|
82
|
+
def full_retrieval_rate(self) -> float:
|
|
83
|
+
"""Fraction of retrievals that were full (not search)."""
|
|
84
|
+
if self.total_retrievals == 0:
|
|
85
|
+
return 0.0
|
|
86
|
+
return self.full_retrievals / self.total_retrievals
|
|
87
|
+
|
|
88
|
+
@property
|
|
89
|
+
def search_rate(self) -> float:
|
|
90
|
+
"""Fraction of retrievals that used search."""
|
|
91
|
+
if self.total_retrievals == 0:
|
|
92
|
+
return 0.0
|
|
93
|
+
return self.search_retrievals / self.total_retrievals
|
|
94
|
+
|
|
95
|
+
def strategy_retrieval_rate(self, strategy: str) -> float:
|
|
96
|
+
"""Get retrieval rate for a specific compression strategy."""
|
|
97
|
+
compressions = self.strategy_compressions.get(strategy, 0)
|
|
98
|
+
if compressions == 0:
|
|
99
|
+
return 0.0
|
|
100
|
+
retrievals = self.strategy_retrievals.get(strategy, 0)
|
|
101
|
+
return retrievals / compressions
|
|
102
|
+
|
|
103
|
+
def best_strategy(self) -> str | None:
|
|
104
|
+
"""Find the strategy with lowest retrieval rate (most successful)."""
|
|
105
|
+
if not self.strategy_compressions:
|
|
106
|
+
return None
|
|
107
|
+
|
|
108
|
+
best = None
|
|
109
|
+
best_rate = 1.0
|
|
110
|
+
|
|
111
|
+
for strategy in self.strategy_compressions:
|
|
112
|
+
rate = self.strategy_retrieval_rate(strategy)
|
|
113
|
+
# Only consider strategies with enough samples
|
|
114
|
+
if self.strategy_compressions[strategy] >= 3 and rate < best_rate:
|
|
115
|
+
best_rate = rate
|
|
116
|
+
best = strategy
|
|
117
|
+
|
|
118
|
+
return best
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
@dataclass
|
|
122
|
+
class CompressionHints:
|
|
123
|
+
"""Hints for optimizing compression of a specific tool's output."""
|
|
124
|
+
|
|
125
|
+
# Item count hints
|
|
126
|
+
max_items: int = 15 # Default from SmartCrusher
|
|
127
|
+
min_items: int = 3
|
|
128
|
+
suggested_items: int | None = None # Calculated optimal
|
|
129
|
+
|
|
130
|
+
# Field preservation
|
|
131
|
+
preserve_fields: list[str] = field(default_factory=list)
|
|
132
|
+
|
|
133
|
+
# Compression aggressiveness (0.0 = aggressive, 1.0 = conservative)
|
|
134
|
+
aggressiveness: float = 0.7
|
|
135
|
+
|
|
136
|
+
# Reasoning
|
|
137
|
+
reason: str = ""
|
|
138
|
+
|
|
139
|
+
# Whether to skip compression entirely
|
|
140
|
+
skip_compression: bool = False
|
|
141
|
+
|
|
142
|
+
# Recommended compression strategy based on local learning
|
|
143
|
+
recommended_strategy: str | None = None
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
class CompressionFeedback:
|
|
147
|
+
"""Learn from retrieval patterns to improve compression.
|
|
148
|
+
|
|
149
|
+
This class analyzes retrieval events from CompressionStore and builds
|
|
150
|
+
tool-specific patterns. These patterns inform compression decisions.
|
|
151
|
+
|
|
152
|
+
Design principles:
|
|
153
|
+
- High retrieval rate (>50%) → compress less aggressively
|
|
154
|
+
- Full retrieval dominates → data is unique, skip compression
|
|
155
|
+
- Search retrieval dominates → keep compressed, add search capability
|
|
156
|
+
- Frequent queries → preserve fields mentioned in queries
|
|
157
|
+
"""
|
|
158
|
+
|
|
159
|
+
# Thresholds for adjusting compression
|
|
160
|
+
HIGH_RETRIEVAL_THRESHOLD = 0.5 # 50% retrieval = too aggressive
|
|
161
|
+
MEDIUM_RETRIEVAL_THRESHOLD = 0.2 # 20% retrieval = acceptable
|
|
162
|
+
MIN_SAMPLES_FOR_HINTS = 5 # Need at least 5 events to make recommendations
|
|
163
|
+
|
|
164
|
+
def __init__(
|
|
165
|
+
self,
|
|
166
|
+
store: CompressionStore | None = None,
|
|
167
|
+
enable_learning: bool = True,
|
|
168
|
+
analysis_interval: float = 60.0,
|
|
169
|
+
):
|
|
170
|
+
"""Initialize feedback analyzer.
|
|
171
|
+
|
|
172
|
+
Args:
|
|
173
|
+
store: CompressionStore to analyze. If None, uses global store.
|
|
174
|
+
enable_learning: Whether to update patterns from events.
|
|
175
|
+
analysis_interval: Interval in seconds between re-analyzing store events.
|
|
176
|
+
"""
|
|
177
|
+
self._store = store
|
|
178
|
+
self._enable_learning = enable_learning
|
|
179
|
+
self._lock = threading.Lock()
|
|
180
|
+
|
|
181
|
+
# Learned patterns per tool
|
|
182
|
+
self._tool_patterns: dict[str, LocalToolPattern] = {}
|
|
183
|
+
|
|
184
|
+
# Time-based tracking
|
|
185
|
+
self._last_analysis: float = 0.0
|
|
186
|
+
self._analysis_interval: float = analysis_interval
|
|
187
|
+
self._last_event_timestamp: float = (
|
|
188
|
+
0.0 # Track last processed event to avoid double-counting
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
# Global statistics
|
|
192
|
+
self._total_compressions: int = 0
|
|
193
|
+
self._total_retrievals: int = 0
|
|
194
|
+
|
|
195
|
+
@property
|
|
196
|
+
def store(self) -> CompressionStore:
|
|
197
|
+
"""Get the compression store (lazy load global if not set)."""
|
|
198
|
+
if self._store is None:
|
|
199
|
+
from .compression_store import get_compression_store
|
|
200
|
+
|
|
201
|
+
self._store = get_compression_store()
|
|
202
|
+
return self._store
|
|
203
|
+
|
|
204
|
+
def record_compression(
|
|
205
|
+
self,
|
|
206
|
+
tool_name: str | None,
|
|
207
|
+
original_count: int,
|
|
208
|
+
compressed_count: int,
|
|
209
|
+
strategy: str | None = None,
|
|
210
|
+
tool_signature_hash: str | None = None,
|
|
211
|
+
) -> None:
|
|
212
|
+
"""Record that a compression occurred.
|
|
213
|
+
|
|
214
|
+
Called by SmartCrusher after compressing to track compression events.
|
|
215
|
+
|
|
216
|
+
Args:
|
|
217
|
+
tool_name: Name of the tool whose output was compressed.
|
|
218
|
+
original_count: Original item count.
|
|
219
|
+
compressed_count: Compressed item count.
|
|
220
|
+
strategy: Compression strategy used (e.g., "SMART_SAMPLE", "TOP_N").
|
|
221
|
+
tool_signature_hash: Hash from ToolSignature for correlation with TOIN.
|
|
222
|
+
"""
|
|
223
|
+
if not self._enable_learning or not tool_name:
|
|
224
|
+
return
|
|
225
|
+
|
|
226
|
+
with self._lock:
|
|
227
|
+
self._total_compressions += 1
|
|
228
|
+
|
|
229
|
+
if tool_name not in self._tool_patterns:
|
|
230
|
+
self._tool_patterns[tool_name] = LocalToolPattern(tool_name=tool_name)
|
|
231
|
+
|
|
232
|
+
pattern = self._tool_patterns[tool_name]
|
|
233
|
+
pattern.total_compressions += 1
|
|
234
|
+
pattern.last_compression = time.time()
|
|
235
|
+
|
|
236
|
+
# Track strategy usage
|
|
237
|
+
if strategy:
|
|
238
|
+
pattern.strategy_compressions[strategy] = (
|
|
239
|
+
pattern.strategy_compressions.get(strategy, 0) + 1
|
|
240
|
+
)
|
|
241
|
+
|
|
242
|
+
# CRITICAL FIX: When truncating strategy dicts, keep them in sync
|
|
243
|
+
# to prevent desync between compressions and retrievals.
|
|
244
|
+
# Both dicts must have the same keys for accurate retrieval rate calculation.
|
|
245
|
+
if len(pattern.strategy_compressions) > 50:
|
|
246
|
+
self._truncate_strategy_dicts(pattern)
|
|
247
|
+
|
|
248
|
+
# Track signature hash for TOIN correlation
|
|
249
|
+
if tool_signature_hash:
|
|
250
|
+
pattern.signature_hashes.add(tool_signature_hash)
|
|
251
|
+
# CRITICAL FIX: Use deterministic truncation for signature_hashes
|
|
252
|
+
# Sort lexicographically to ensure consistent behavior across runs
|
|
253
|
+
if len(pattern.signature_hashes) > 100:
|
|
254
|
+
sorted_hashes = sorted(pattern.signature_hashes)[:100]
|
|
255
|
+
pattern.signature_hashes = set(sorted_hashes)
|
|
256
|
+
|
|
257
|
+
def record_retrieval(
|
|
258
|
+
self,
|
|
259
|
+
event: RetrievalEvent,
|
|
260
|
+
strategy: str | None = None,
|
|
261
|
+
) -> None:
|
|
262
|
+
"""Record a retrieval event for pattern learning.
|
|
263
|
+
|
|
264
|
+
Called by CompressionStore when content is retrieved.
|
|
265
|
+
|
|
266
|
+
Args:
|
|
267
|
+
event: The retrieval event to record.
|
|
268
|
+
strategy: Compression strategy that was used (for tracking success rates).
|
|
269
|
+
"""
|
|
270
|
+
if not self._enable_learning:
|
|
271
|
+
return
|
|
272
|
+
|
|
273
|
+
tool_name = event.tool_name
|
|
274
|
+
if not tool_name:
|
|
275
|
+
return
|
|
276
|
+
|
|
277
|
+
with self._lock:
|
|
278
|
+
self._total_retrievals += 1
|
|
279
|
+
|
|
280
|
+
if tool_name not in self._tool_patterns:
|
|
281
|
+
self._tool_patterns[tool_name] = LocalToolPattern(tool_name=tool_name)
|
|
282
|
+
|
|
283
|
+
pattern = self._tool_patterns[tool_name]
|
|
284
|
+
pattern.total_retrievals += 1
|
|
285
|
+
pattern.last_retrieval = time.time()
|
|
286
|
+
|
|
287
|
+
if event.retrieval_type == "full":
|
|
288
|
+
pattern.full_retrievals += 1
|
|
289
|
+
else:
|
|
290
|
+
pattern.search_retrievals += 1
|
|
291
|
+
|
|
292
|
+
# Track strategy retrievals (for success rate calculation)
|
|
293
|
+
if strategy:
|
|
294
|
+
pattern.strategy_retrievals[strategy] = (
|
|
295
|
+
pattern.strategy_retrievals.get(strategy, 0) + 1
|
|
296
|
+
)
|
|
297
|
+
|
|
298
|
+
# CRITICAL FIX: When truncating strategy dicts, keep them in sync
|
|
299
|
+
# to prevent desync between compressions and retrievals.
|
|
300
|
+
if len(pattern.strategy_retrievals) > 50:
|
|
301
|
+
self._truncate_strategy_dicts(pattern)
|
|
302
|
+
|
|
303
|
+
# Track query patterns
|
|
304
|
+
if event.query:
|
|
305
|
+
query_lower = event.query.lower()
|
|
306
|
+
pattern.common_queries[query_lower] = pattern.common_queries.get(query_lower, 0) + 1
|
|
307
|
+
|
|
308
|
+
# HIGH: Limit common_queries dict to prevent unbounded growth
|
|
309
|
+
if len(pattern.common_queries) > 100:
|
|
310
|
+
sorted_queries = sorted(
|
|
311
|
+
pattern.common_queries.items(),
|
|
312
|
+
key=lambda x: x[1],
|
|
313
|
+
reverse=True,
|
|
314
|
+
)[:100]
|
|
315
|
+
pattern.common_queries = dict(sorted_queries)
|
|
316
|
+
|
|
317
|
+
# Extract potential field names from query
|
|
318
|
+
self._extract_field_hints(pattern, event.query)
|
|
319
|
+
|
|
320
|
+
def _truncate_strategy_dicts(self, pattern: LocalToolPattern) -> None:
|
|
321
|
+
"""Truncate strategy_compressions and strategy_retrievals in sync.
|
|
322
|
+
|
|
323
|
+
CRITICAL FIX: Both dicts must have the same keys for accurate retrieval
|
|
324
|
+
rate calculation. When truncating, we keep the union of top strategies
|
|
325
|
+
from both dicts, then truncate both to the same key set.
|
|
326
|
+
"""
|
|
327
|
+
# Get top 40 strategies from each dict (using 40 to allow union to stay under 50)
|
|
328
|
+
top_compressions = {
|
|
329
|
+
k
|
|
330
|
+
for k, _ in sorted(
|
|
331
|
+
pattern.strategy_compressions.items(),
|
|
332
|
+
key=lambda x: x[1],
|
|
333
|
+
reverse=True,
|
|
334
|
+
)[:40]
|
|
335
|
+
}
|
|
336
|
+
top_retrievals = {
|
|
337
|
+
k
|
|
338
|
+
for k, _ in sorted(
|
|
339
|
+
pattern.strategy_retrievals.items(),
|
|
340
|
+
key=lambda x: x[1],
|
|
341
|
+
reverse=True,
|
|
342
|
+
)[:40]
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
# Keep union of top strategies from both
|
|
346
|
+
keys_to_keep = top_compressions | top_retrievals
|
|
347
|
+
|
|
348
|
+
# Truncate both dicts to same keys
|
|
349
|
+
pattern.strategy_compressions = {
|
|
350
|
+
k: v for k, v in pattern.strategy_compressions.items() if k in keys_to_keep
|
|
351
|
+
}
|
|
352
|
+
pattern.strategy_retrievals = {
|
|
353
|
+
k: v for k, v in pattern.strategy_retrievals.items() if k in keys_to_keep
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
def _extract_field_hints(self, pattern: LocalToolPattern, query: str) -> None:
|
|
357
|
+
"""Extract potential field names from search queries.
|
|
358
|
+
|
|
359
|
+
Common patterns:
|
|
360
|
+
- "field:value" or "field=value"
|
|
361
|
+
- JSON field names like "status", "error", "id"
|
|
362
|
+
"""
|
|
363
|
+
# Look for field:value patterns
|
|
364
|
+
field_patterns = re.findall(r"(\w+)[=:]", query)
|
|
365
|
+
for field_name in field_patterns:
|
|
366
|
+
pattern.queried_fields[field_name] = pattern.queried_fields.get(field_name, 0) + 1
|
|
367
|
+
|
|
368
|
+
# Look for common JSON field names
|
|
369
|
+
common_fields = [
|
|
370
|
+
"id",
|
|
371
|
+
"name",
|
|
372
|
+
"status",
|
|
373
|
+
"error",
|
|
374
|
+
"message",
|
|
375
|
+
"type",
|
|
376
|
+
"code",
|
|
377
|
+
"result",
|
|
378
|
+
"value",
|
|
379
|
+
"data",
|
|
380
|
+
"items",
|
|
381
|
+
"count",
|
|
382
|
+
]
|
|
383
|
+
query_lower = query.lower()
|
|
384
|
+
for common_field in common_fields:
|
|
385
|
+
if common_field in query_lower:
|
|
386
|
+
pattern.queried_fields[common_field] = (
|
|
387
|
+
pattern.queried_fields.get(common_field, 0) + 1
|
|
388
|
+
)
|
|
389
|
+
|
|
390
|
+
# HIGH: Limit queried_fields dict to prevent unbounded growth
|
|
391
|
+
if len(pattern.queried_fields) > 50:
|
|
392
|
+
sorted_fields = sorted(
|
|
393
|
+
pattern.queried_fields.items(),
|
|
394
|
+
key=lambda x: x[1],
|
|
395
|
+
reverse=True,
|
|
396
|
+
)[:50]
|
|
397
|
+
pattern.queried_fields = dict(sorted_fields)
|
|
398
|
+
|
|
399
|
+
def get_compression_hints(
|
|
400
|
+
self,
|
|
401
|
+
tool_name: str | None,
|
|
402
|
+
) -> CompressionHints:
|
|
403
|
+
"""Get compression hints for a specific tool based on learned patterns.
|
|
404
|
+
|
|
405
|
+
Args:
|
|
406
|
+
tool_name: Name of the tool to get hints for.
|
|
407
|
+
|
|
408
|
+
Returns:
|
|
409
|
+
CompressionHints with recommended settings.
|
|
410
|
+
"""
|
|
411
|
+
hints = CompressionHints()
|
|
412
|
+
|
|
413
|
+
if not tool_name:
|
|
414
|
+
hints.reason = "No tool name provided, using defaults"
|
|
415
|
+
return hints
|
|
416
|
+
|
|
417
|
+
with self._lock:
|
|
418
|
+
pattern = self._tool_patterns.get(tool_name)
|
|
419
|
+
|
|
420
|
+
if pattern is None:
|
|
421
|
+
hints.reason = f"No pattern data for {tool_name}, using defaults"
|
|
422
|
+
return hints
|
|
423
|
+
|
|
424
|
+
# Need minimum samples for reliable hints
|
|
425
|
+
if pattern.total_compressions < self.MIN_SAMPLES_FOR_HINTS:
|
|
426
|
+
hints.reason = (
|
|
427
|
+
f"Insufficient data ({pattern.total_compressions} samples), "
|
|
428
|
+
f"need {self.MIN_SAMPLES_FOR_HINTS}"
|
|
429
|
+
)
|
|
430
|
+
return hints
|
|
431
|
+
|
|
432
|
+
# Calculate hints based on retrieval rate
|
|
433
|
+
retrieval_rate = pattern.retrieval_rate
|
|
434
|
+
|
|
435
|
+
if retrieval_rate > self.HIGH_RETRIEVAL_THRESHOLD:
|
|
436
|
+
# High retrieval = compress less aggressively
|
|
437
|
+
if pattern.full_retrieval_rate > 0.8:
|
|
438
|
+
# Almost all retrievals are full → skip compression
|
|
439
|
+
hints.skip_compression = True
|
|
440
|
+
hints.reason = (
|
|
441
|
+
f"Very high full retrieval rate ({pattern.full_retrieval_rate:.0%}), "
|
|
442
|
+
f"recommending skip compression"
|
|
443
|
+
)
|
|
444
|
+
else:
|
|
445
|
+
# Mix of full and search → increase items
|
|
446
|
+
hints.max_items = 50
|
|
447
|
+
hints.suggested_items = 40
|
|
448
|
+
hints.aggressiveness = 0.3
|
|
449
|
+
hints.reason = (
|
|
450
|
+
f"High retrieval rate ({retrieval_rate:.0%}), "
|
|
451
|
+
f"recommending less aggressive compression"
|
|
452
|
+
)
|
|
453
|
+
|
|
454
|
+
elif retrieval_rate > self.MEDIUM_RETRIEVAL_THRESHOLD:
|
|
455
|
+
# Medium retrieval = slightly less aggressive
|
|
456
|
+
hints.max_items = 30
|
|
457
|
+
hints.suggested_items = 25
|
|
458
|
+
hints.aggressiveness = 0.5
|
|
459
|
+
hints.reason = (
|
|
460
|
+
f"Medium retrieval rate ({retrieval_rate:.0%}), "
|
|
461
|
+
f"recommending moderate compression"
|
|
462
|
+
)
|
|
463
|
+
|
|
464
|
+
else:
|
|
465
|
+
# Low retrieval = current compression is working
|
|
466
|
+
hints.max_items = 15
|
|
467
|
+
hints.suggested_items = 10
|
|
468
|
+
hints.aggressiveness = 0.7
|
|
469
|
+
hints.reason = (
|
|
470
|
+
f"Low retrieval rate ({retrieval_rate:.0%}), current compression is effective"
|
|
471
|
+
)
|
|
472
|
+
|
|
473
|
+
# Add field preservation hints based on common queries
|
|
474
|
+
if pattern.queried_fields:
|
|
475
|
+
# Get top 5 most queried fields
|
|
476
|
+
sorted_fields = sorted(
|
|
477
|
+
pattern.queried_fields.items(),
|
|
478
|
+
key=lambda x: x[1],
|
|
479
|
+
reverse=True,
|
|
480
|
+
)[:5]
|
|
481
|
+
hints.preserve_fields = [f for f, _ in sorted_fields]
|
|
482
|
+
|
|
483
|
+
# Recommend the best strategy based on local retrieval patterns
|
|
484
|
+
best = pattern.best_strategy()
|
|
485
|
+
if best:
|
|
486
|
+
hints.recommended_strategy = best
|
|
487
|
+
|
|
488
|
+
return hints
|
|
489
|
+
|
|
490
|
+
def get_all_patterns(self) -> dict[str, LocalToolPattern]:
|
|
491
|
+
"""Get all learned tool patterns.
|
|
492
|
+
|
|
493
|
+
Returns:
|
|
494
|
+
Dict mapping tool names to their patterns.
|
|
495
|
+
HIGH FIX: Returns deep copies to prevent external mutation of internal state.
|
|
496
|
+
"""
|
|
497
|
+
import copy as copy_module
|
|
498
|
+
|
|
499
|
+
with self._lock:
|
|
500
|
+
# Deep copy to prevent external code from modifying internal state
|
|
501
|
+
return copy_module.deepcopy(self._tool_patterns)
|
|
502
|
+
|
|
503
|
+
def get_stats(self) -> dict[str, Any]:
|
|
504
|
+
"""Get feedback statistics for monitoring.
|
|
505
|
+
|
|
506
|
+
Returns:
|
|
507
|
+
Dict with feedback statistics.
|
|
508
|
+
"""
|
|
509
|
+
with self._lock:
|
|
510
|
+
return {
|
|
511
|
+
"total_compressions": self._total_compressions,
|
|
512
|
+
"total_retrievals": self._total_retrievals,
|
|
513
|
+
"global_retrieval_rate": (
|
|
514
|
+
self._total_retrievals / self._total_compressions
|
|
515
|
+
if self._total_compressions > 0
|
|
516
|
+
else 0.0
|
|
517
|
+
),
|
|
518
|
+
"tools_tracked": len(self._tool_patterns),
|
|
519
|
+
"tool_patterns": {
|
|
520
|
+
name: {
|
|
521
|
+
"compressions": p.total_compressions,
|
|
522
|
+
"retrievals": p.total_retrievals,
|
|
523
|
+
"retrieval_rate": p.retrieval_rate,
|
|
524
|
+
"full_rate": p.full_retrieval_rate,
|
|
525
|
+
"search_rate": p.search_rate,
|
|
526
|
+
"common_queries": list(p.common_queries.keys())[:5],
|
|
527
|
+
"queried_fields": list(p.queried_fields.keys())[:5],
|
|
528
|
+
}
|
|
529
|
+
for name, p in self._tool_patterns.items()
|
|
530
|
+
},
|
|
531
|
+
}
|
|
532
|
+
|
|
533
|
+
def analyze_from_store(self) -> None:
|
|
534
|
+
"""Analyze retrieval events from the store.
|
|
535
|
+
|
|
536
|
+
This pulls recent events from CompressionStore and updates patterns.
|
|
537
|
+
Useful for catching up after restart or periodic refresh.
|
|
538
|
+
|
|
539
|
+
HIGH FIX: All timestamp reads/writes happen under lock to prevent race
|
|
540
|
+
conditions where another thread could cause events to be missed or
|
|
541
|
+
double-counted.
|
|
542
|
+
"""
|
|
543
|
+
if not self._enable_learning:
|
|
544
|
+
return
|
|
545
|
+
|
|
546
|
+
# Rate limit analysis - check under lock for thread safety
|
|
547
|
+
now = time.time()
|
|
548
|
+
with self._lock:
|
|
549
|
+
if now - self._last_analysis < self._analysis_interval:
|
|
550
|
+
return
|
|
551
|
+
# Mark that we're starting analysis (prevents concurrent analysis)
|
|
552
|
+
self._last_analysis = now
|
|
553
|
+
last_ts = self._last_event_timestamp
|
|
554
|
+
|
|
555
|
+
# Fetch events outside lock (store has its own lock)
|
|
556
|
+
events = self.store.get_retrieval_events(limit=1000)
|
|
557
|
+
|
|
558
|
+
# Filter events to only process new ones (avoid double-counting)
|
|
559
|
+
new_events = [e for e in events if e.timestamp > last_ts]
|
|
560
|
+
|
|
561
|
+
if new_events:
|
|
562
|
+
# Find the maximum timestamp from new events
|
|
563
|
+
max_timestamp = max(e.timestamp for e in new_events)
|
|
564
|
+
|
|
565
|
+
for event in new_events:
|
|
566
|
+
self.record_retrieval(event)
|
|
567
|
+
|
|
568
|
+
# Update the timestamp AFTER processing - under lock for atomicity
|
|
569
|
+
with self._lock:
|
|
570
|
+
# Only update if our max_timestamp is greater than current
|
|
571
|
+
# (another thread may have processed newer events)
|
|
572
|
+
if max_timestamp > self._last_event_timestamp:
|
|
573
|
+
self._last_event_timestamp = max_timestamp
|
|
574
|
+
|
|
575
|
+
def clear(self) -> None:
|
|
576
|
+
"""Clear all learned patterns. Mainly for testing."""
|
|
577
|
+
with self._lock:
|
|
578
|
+
self._tool_patterns.clear()
|
|
579
|
+
self._total_compressions = 0
|
|
580
|
+
self._total_retrievals = 0
|
|
581
|
+
self._last_analysis = 0.0
|
|
582
|
+
self._last_event_timestamp = 0.0
|
|
583
|
+
|
|
584
|
+
|
|
585
|
+
# Global feedback instance (lazy initialization)
|
|
586
|
+
_compression_feedback: CompressionFeedback | None = None
|
|
587
|
+
_feedback_lock = threading.Lock()
|
|
588
|
+
|
|
589
|
+
|
|
590
|
+
def get_compression_feedback() -> CompressionFeedback:
|
|
591
|
+
"""Get the global compression feedback instance.
|
|
592
|
+
|
|
593
|
+
Returns:
|
|
594
|
+
Global CompressionFeedback instance.
|
|
595
|
+
"""
|
|
596
|
+
global _compression_feedback
|
|
597
|
+
|
|
598
|
+
if _compression_feedback is None:
|
|
599
|
+
with _feedback_lock:
|
|
600
|
+
if _compression_feedback is None:
|
|
601
|
+
_compression_feedback = CompressionFeedback()
|
|
602
|
+
|
|
603
|
+
return _compression_feedback
|
|
604
|
+
|
|
605
|
+
|
|
606
|
+
def reset_compression_feedback() -> None:
|
|
607
|
+
"""Reset the global compression feedback. Mainly for testing."""
|
|
608
|
+
global _compression_feedback
|
|
609
|
+
|
|
610
|
+
with _feedback_lock:
|
|
611
|
+
if _compression_feedback is not None:
|
|
612
|
+
_compression_feedback.clear()
|
|
613
|
+
_compression_feedback = None
|