headroom-ai 0.2.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- headroom/__init__.py +212 -0
- headroom/cache/__init__.py +76 -0
- headroom/cache/anthropic.py +517 -0
- headroom/cache/base.py +342 -0
- headroom/cache/compression_feedback.py +613 -0
- headroom/cache/compression_store.py +814 -0
- headroom/cache/dynamic_detector.py +1026 -0
- headroom/cache/google.py +884 -0
- headroom/cache/openai.py +584 -0
- headroom/cache/registry.py +175 -0
- headroom/cache/semantic.py +451 -0
- headroom/ccr/__init__.py +77 -0
- headroom/ccr/context_tracker.py +582 -0
- headroom/ccr/mcp_server.py +319 -0
- headroom/ccr/response_handler.py +772 -0
- headroom/ccr/tool_injection.py +415 -0
- headroom/cli.py +219 -0
- headroom/client.py +977 -0
- headroom/compression/__init__.py +42 -0
- headroom/compression/detector.py +424 -0
- headroom/compression/handlers/__init__.py +22 -0
- headroom/compression/handlers/base.py +219 -0
- headroom/compression/handlers/code_handler.py +506 -0
- headroom/compression/handlers/json_handler.py +418 -0
- headroom/compression/masks.py +345 -0
- headroom/compression/universal.py +465 -0
- headroom/config.py +474 -0
- headroom/exceptions.py +192 -0
- headroom/integrations/__init__.py +159 -0
- headroom/integrations/agno/__init__.py +53 -0
- headroom/integrations/agno/hooks.py +345 -0
- headroom/integrations/agno/model.py +625 -0
- headroom/integrations/agno/providers.py +154 -0
- headroom/integrations/langchain/__init__.py +106 -0
- headroom/integrations/langchain/agents.py +326 -0
- headroom/integrations/langchain/chat_model.py +1002 -0
- headroom/integrations/langchain/langsmith.py +324 -0
- headroom/integrations/langchain/memory.py +319 -0
- headroom/integrations/langchain/providers.py +200 -0
- headroom/integrations/langchain/retriever.py +371 -0
- headroom/integrations/langchain/streaming.py +341 -0
- headroom/integrations/mcp/__init__.py +37 -0
- headroom/integrations/mcp/server.py +533 -0
- headroom/memory/__init__.py +37 -0
- headroom/memory/extractor.py +390 -0
- headroom/memory/fast_store.py +621 -0
- headroom/memory/fast_wrapper.py +311 -0
- headroom/memory/inline_extractor.py +229 -0
- headroom/memory/store.py +434 -0
- headroom/memory/worker.py +260 -0
- headroom/memory/wrapper.py +321 -0
- headroom/models/__init__.py +39 -0
- headroom/models/registry.py +687 -0
- headroom/parser.py +293 -0
- headroom/pricing/__init__.py +51 -0
- headroom/pricing/anthropic_prices.py +81 -0
- headroom/pricing/litellm_pricing.py +113 -0
- headroom/pricing/openai_prices.py +91 -0
- headroom/pricing/registry.py +188 -0
- headroom/providers/__init__.py +61 -0
- headroom/providers/anthropic.py +621 -0
- headroom/providers/base.py +131 -0
- headroom/providers/cohere.py +362 -0
- headroom/providers/google.py +427 -0
- headroom/providers/litellm.py +297 -0
- headroom/providers/openai.py +566 -0
- headroom/providers/openai_compatible.py +521 -0
- headroom/proxy/__init__.py +19 -0
- headroom/proxy/server.py +2683 -0
- headroom/py.typed +0 -0
- headroom/relevance/__init__.py +124 -0
- headroom/relevance/base.py +106 -0
- headroom/relevance/bm25.py +255 -0
- headroom/relevance/embedding.py +255 -0
- headroom/relevance/hybrid.py +259 -0
- headroom/reporting/__init__.py +5 -0
- headroom/reporting/generator.py +549 -0
- headroom/storage/__init__.py +41 -0
- headroom/storage/base.py +125 -0
- headroom/storage/jsonl.py +220 -0
- headroom/storage/sqlite.py +289 -0
- headroom/telemetry/__init__.py +91 -0
- headroom/telemetry/collector.py +764 -0
- headroom/telemetry/models.py +880 -0
- headroom/telemetry/toin.py +1579 -0
- headroom/tokenizer.py +80 -0
- headroom/tokenizers/__init__.py +75 -0
- headroom/tokenizers/base.py +210 -0
- headroom/tokenizers/estimator.py +198 -0
- headroom/tokenizers/huggingface.py +317 -0
- headroom/tokenizers/mistral.py +245 -0
- headroom/tokenizers/registry.py +398 -0
- headroom/tokenizers/tiktoken_counter.py +248 -0
- headroom/transforms/__init__.py +106 -0
- headroom/transforms/base.py +57 -0
- headroom/transforms/cache_aligner.py +357 -0
- headroom/transforms/code_compressor.py +1313 -0
- headroom/transforms/content_detector.py +335 -0
- headroom/transforms/content_router.py +1158 -0
- headroom/transforms/llmlingua_compressor.py +638 -0
- headroom/transforms/log_compressor.py +529 -0
- headroom/transforms/pipeline.py +297 -0
- headroom/transforms/rolling_window.py +350 -0
- headroom/transforms/search_compressor.py +365 -0
- headroom/transforms/smart_crusher.py +2682 -0
- headroom/transforms/text_compressor.py +259 -0
- headroom/transforms/tool_crusher.py +338 -0
- headroom/utils.py +215 -0
- headroom_ai-0.2.13.dist-info/METADATA +315 -0
- headroom_ai-0.2.13.dist-info/RECORD +114 -0
- headroom_ai-0.2.13.dist-info/WHEEL +4 -0
- headroom_ai-0.2.13.dist-info/entry_points.txt +2 -0
- headroom_ai-0.2.13.dist-info/licenses/LICENSE +190 -0
- headroom_ai-0.2.13.dist-info/licenses/NOTICE +43 -0
|
@@ -0,0 +1,345 @@
|
|
|
1
|
+
"""Structure mask system for compression.
|
|
2
|
+
|
|
3
|
+
A StructureMask identifies which parts of content are "structural" (should be
|
|
4
|
+
preserved) vs "compressible" (can be compressed by LLMLingua).
|
|
5
|
+
|
|
6
|
+
This separates the concerns of:
|
|
7
|
+
1. Structure detection (handlers) - What tokens are navigational?
|
|
8
|
+
2. Content compression (LLMLingua) - What tokens can be removed?
|
|
9
|
+
|
|
10
|
+
The mask is content-agnostic - it's just a boolean array aligned to tokens.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
from collections.abc import Callable, Sequence
|
|
16
|
+
from dataclasses import dataclass, field
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class StructureMask:
|
|
21
|
+
"""A mask identifying structural vs compressible tokens.
|
|
22
|
+
|
|
23
|
+
The mask is aligned to a token sequence. True means "preserve this token"
|
|
24
|
+
(it's structural/navigational), False means "compressible" (LLMLingua can
|
|
25
|
+
potentially remove it).
|
|
26
|
+
|
|
27
|
+
Attributes:
|
|
28
|
+
tokens: The tokenized content (list of strings or token IDs).
|
|
29
|
+
mask: Boolean array, True = preserve, False = compressible.
|
|
30
|
+
metadata: Optional handler-specific metadata.
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
tokens: Sequence[str | int]
|
|
34
|
+
mask: list[bool]
|
|
35
|
+
metadata: dict = field(default_factory=dict)
|
|
36
|
+
|
|
37
|
+
def __post_init__(self) -> None:
|
|
38
|
+
"""Validate mask alignment."""
|
|
39
|
+
if len(self.tokens) != len(self.mask):
|
|
40
|
+
raise ValueError(
|
|
41
|
+
f"Mask length ({len(self.mask)}) must match tokens length ({len(self.tokens)})"
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
@property
|
|
45
|
+
def preservation_ratio(self) -> float:
|
|
46
|
+
"""Fraction of tokens marked for preservation."""
|
|
47
|
+
if not self.mask:
|
|
48
|
+
return 0.0
|
|
49
|
+
return sum(self.mask) / len(self.mask)
|
|
50
|
+
|
|
51
|
+
@property
|
|
52
|
+
def structural_count(self) -> int:
|
|
53
|
+
"""Number of structural (preserved) tokens."""
|
|
54
|
+
return sum(self.mask)
|
|
55
|
+
|
|
56
|
+
@property
|
|
57
|
+
def compressible_count(self) -> int:
|
|
58
|
+
"""Number of compressible tokens."""
|
|
59
|
+
return len(self.mask) - sum(self.mask)
|
|
60
|
+
|
|
61
|
+
def get_structural_tokens(self) -> list[str | int]:
|
|
62
|
+
"""Get list of tokens marked as structural."""
|
|
63
|
+
return [t for t, m in zip(self.tokens, self.mask) if m]
|
|
64
|
+
|
|
65
|
+
def get_compressible_tokens(self) -> list[str | int]:
|
|
66
|
+
"""Get list of tokens marked as compressible."""
|
|
67
|
+
return [t for t, m in zip(self.tokens, self.mask) if not m]
|
|
68
|
+
|
|
69
|
+
@classmethod
|
|
70
|
+
def empty(cls, tokens: Sequence[str | int]) -> StructureMask:
|
|
71
|
+
"""Create a mask with no structural tokens (all compressible)."""
|
|
72
|
+
return cls(tokens=tokens, mask=[False] * len(tokens))
|
|
73
|
+
|
|
74
|
+
@classmethod
|
|
75
|
+
def full(cls, tokens: Sequence[str | int]) -> StructureMask:
|
|
76
|
+
"""Create a mask preserving all tokens (nothing compressible)."""
|
|
77
|
+
return cls(tokens=tokens, mask=[True] * len(tokens))
|
|
78
|
+
|
|
79
|
+
def union(self, other: StructureMask) -> StructureMask:
|
|
80
|
+
"""Combine masks - preserve if EITHER mask says preserve.
|
|
81
|
+
|
|
82
|
+
Useful when combining multiple structure detection strategies.
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
other: Another mask to combine with.
|
|
86
|
+
|
|
87
|
+
Returns:
|
|
88
|
+
New mask with union of preserved tokens.
|
|
89
|
+
|
|
90
|
+
Raises:
|
|
91
|
+
ValueError: If masks have different lengths.
|
|
92
|
+
"""
|
|
93
|
+
if len(self.mask) != len(other.mask):
|
|
94
|
+
raise ValueError("Cannot union masks of different lengths")
|
|
95
|
+
|
|
96
|
+
return StructureMask(
|
|
97
|
+
tokens=self.tokens,
|
|
98
|
+
mask=[a or b for a, b in zip(self.mask, other.mask)],
|
|
99
|
+
metadata={"source": "union", **self.metadata, **other.metadata},
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
def intersection(self, other: StructureMask) -> StructureMask:
|
|
103
|
+
"""Combine masks - preserve only if BOTH masks say preserve.
|
|
104
|
+
|
|
105
|
+
Useful for being more aggressive with compression.
|
|
106
|
+
|
|
107
|
+
Args:
|
|
108
|
+
other: Another mask to combine with.
|
|
109
|
+
|
|
110
|
+
Returns:
|
|
111
|
+
New mask with intersection of preserved tokens.
|
|
112
|
+
|
|
113
|
+
Raises:
|
|
114
|
+
ValueError: If masks have different lengths.
|
|
115
|
+
"""
|
|
116
|
+
if len(self.mask) != len(other.mask):
|
|
117
|
+
raise ValueError("Cannot intersect masks of different lengths")
|
|
118
|
+
|
|
119
|
+
return StructureMask(
|
|
120
|
+
tokens=self.tokens,
|
|
121
|
+
mask=[a and b for a, b in zip(self.mask, other.mask)],
|
|
122
|
+
metadata={"source": "intersection", **self.metadata, **other.metadata},
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
@dataclass
|
|
127
|
+
class MaskSpan:
|
|
128
|
+
"""A contiguous span in the mask.
|
|
129
|
+
|
|
130
|
+
Useful for applying different compression strategies to different
|
|
131
|
+
parts of the content.
|
|
132
|
+
"""
|
|
133
|
+
|
|
134
|
+
start: int
|
|
135
|
+
end: int
|
|
136
|
+
is_structural: bool
|
|
137
|
+
label: str = "" # Optional label (e.g., "key", "value", "signature")
|
|
138
|
+
|
|
139
|
+
@property
|
|
140
|
+
def length(self) -> int:
|
|
141
|
+
"""Length of the span."""
|
|
142
|
+
return self.end - self.start
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def mask_to_spans(mask: StructureMask) -> list[MaskSpan]:
|
|
146
|
+
"""Convert a mask to a list of contiguous spans.
|
|
147
|
+
|
|
148
|
+
This is useful for processing structural and compressible regions
|
|
149
|
+
separately.
|
|
150
|
+
|
|
151
|
+
Args:
|
|
152
|
+
mask: The structure mask.
|
|
153
|
+
|
|
154
|
+
Returns:
|
|
155
|
+
List of MaskSpan objects representing contiguous regions.
|
|
156
|
+
|
|
157
|
+
Example:
|
|
158
|
+
>>> tokens = ["def", " ", "foo", "(", ")", ":", " ", "pass"]
|
|
159
|
+
>>> mask = StructureMask(tokens, [True, True, True, True, True, True, False, False])
|
|
160
|
+
>>> spans = mask_to_spans(mask)
|
|
161
|
+
>>> [(s.start, s.end, s.is_structural) for s in spans]
|
|
162
|
+
[(0, 6, True), (6, 8, False)]
|
|
163
|
+
"""
|
|
164
|
+
if not mask.mask:
|
|
165
|
+
return []
|
|
166
|
+
|
|
167
|
+
spans = []
|
|
168
|
+
current_start = 0
|
|
169
|
+
current_structural = mask.mask[0]
|
|
170
|
+
|
|
171
|
+
for i, is_structural in enumerate(mask.mask[1:], start=1):
|
|
172
|
+
if is_structural != current_structural:
|
|
173
|
+
spans.append(
|
|
174
|
+
MaskSpan(
|
|
175
|
+
start=current_start,
|
|
176
|
+
end=i,
|
|
177
|
+
is_structural=current_structural,
|
|
178
|
+
)
|
|
179
|
+
)
|
|
180
|
+
current_start = i
|
|
181
|
+
current_structural = is_structural
|
|
182
|
+
|
|
183
|
+
# Don't forget the last span
|
|
184
|
+
spans.append(
|
|
185
|
+
MaskSpan(
|
|
186
|
+
start=current_start,
|
|
187
|
+
end=len(mask.mask),
|
|
188
|
+
is_structural=current_structural,
|
|
189
|
+
)
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
return spans
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def apply_mask_to_text(
|
|
196
|
+
text: str,
|
|
197
|
+
mask: StructureMask,
|
|
198
|
+
compress_fn: Callable[[str], str],
|
|
199
|
+
tokenizer_decode: Callable[[Sequence[str | int]], str] | None = None,
|
|
200
|
+
) -> str:
|
|
201
|
+
"""Apply compression to non-structural regions of text.
|
|
202
|
+
|
|
203
|
+
This is the core function that enables structure-preserving compression.
|
|
204
|
+
Structural regions are kept verbatim, non-structural regions are
|
|
205
|
+
passed to the compression function.
|
|
206
|
+
|
|
207
|
+
Args:
|
|
208
|
+
text: Original text.
|
|
209
|
+
mask: Structure mask aligned to tokens.
|
|
210
|
+
compress_fn: Function to compress text (e.g., LLMLingua).
|
|
211
|
+
tokenizer_decode: Optional function to decode tokens to text.
|
|
212
|
+
If not provided, assumes tokens are strings and joins them.
|
|
213
|
+
|
|
214
|
+
Returns:
|
|
215
|
+
Text with non-structural regions compressed.
|
|
216
|
+
"""
|
|
217
|
+
spans = mask_to_spans(mask)
|
|
218
|
+
result_parts = []
|
|
219
|
+
|
|
220
|
+
if tokenizer_decode is None:
|
|
221
|
+
# Default: assume tokens are strings
|
|
222
|
+
def tokenizer_decode(tokens: Sequence[str | int]) -> str:
|
|
223
|
+
return "".join(str(t) for t in tokens)
|
|
224
|
+
|
|
225
|
+
for span in spans:
|
|
226
|
+
span_tokens = mask.tokens[span.start : span.end]
|
|
227
|
+
span_text = tokenizer_decode(span_tokens)
|
|
228
|
+
|
|
229
|
+
if span.is_structural:
|
|
230
|
+
# Keep structural regions verbatim
|
|
231
|
+
result_parts.append(span_text)
|
|
232
|
+
else:
|
|
233
|
+
# Compress non-structural regions
|
|
234
|
+
compressed = compress_fn(span_text)
|
|
235
|
+
result_parts.append(compressed)
|
|
236
|
+
|
|
237
|
+
return "".join(result_parts)
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
@dataclass
|
|
241
|
+
class EntropyScore:
|
|
242
|
+
"""Entropy-based preservation signal.
|
|
243
|
+
|
|
244
|
+
High entropy content (UUIDs, hashes, random strings) should generally
|
|
245
|
+
be preserved because:
|
|
246
|
+
1. They're information-dense (can't be reconstructed)
|
|
247
|
+
2. They're often identifiers (semantically important)
|
|
248
|
+
3. LLMLingua may mangle them
|
|
249
|
+
|
|
250
|
+
This is a self-signal - no external classifier needed.
|
|
251
|
+
"""
|
|
252
|
+
|
|
253
|
+
value: float # 0.0 to 1.0, normalized entropy
|
|
254
|
+
should_preserve: bool # True if entropy above threshold
|
|
255
|
+
|
|
256
|
+
@classmethod
|
|
257
|
+
def compute(cls, text: str, threshold: float = 0.85) -> EntropyScore:
|
|
258
|
+
"""Compute entropy score for text.
|
|
259
|
+
|
|
260
|
+
Args:
|
|
261
|
+
text: Text to analyze.
|
|
262
|
+
threshold: Entropy threshold for preservation (0.0-1.0).
|
|
263
|
+
Higher = more selective.
|
|
264
|
+
|
|
265
|
+
Returns:
|
|
266
|
+
EntropyScore with value and preservation recommendation.
|
|
267
|
+
"""
|
|
268
|
+
if not text:
|
|
269
|
+
return cls(value=0.0, should_preserve=False)
|
|
270
|
+
|
|
271
|
+
# Calculate character entropy
|
|
272
|
+
import math
|
|
273
|
+
from collections import Counter
|
|
274
|
+
|
|
275
|
+
# Count character frequencies
|
|
276
|
+
counter = Counter(text)
|
|
277
|
+
total = len(text)
|
|
278
|
+
|
|
279
|
+
# Calculate Shannon entropy
|
|
280
|
+
entropy = 0.0
|
|
281
|
+
for count in counter.values():
|
|
282
|
+
if count > 0:
|
|
283
|
+
p = count / total
|
|
284
|
+
entropy -= p * math.log2(p)
|
|
285
|
+
|
|
286
|
+
# Normalize to 0-1 range
|
|
287
|
+
# Maximum possible entropy for this alphabet size
|
|
288
|
+
max_entropy = math.log2(len(counter)) if len(counter) > 1 else 1.0
|
|
289
|
+
normalized = entropy / max_entropy if max_entropy > 0 else 0.0
|
|
290
|
+
|
|
291
|
+
return cls(
|
|
292
|
+
value=normalized,
|
|
293
|
+
should_preserve=normalized >= threshold,
|
|
294
|
+
)
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
def compute_entropy_mask(
|
|
298
|
+
tokens: Sequence[str],
|
|
299
|
+
threshold: float = 0.85,
|
|
300
|
+
min_token_length: int = 8,
|
|
301
|
+
) -> StructureMask:
|
|
302
|
+
"""Create a mask preserving high-entropy tokens.
|
|
303
|
+
|
|
304
|
+
This is a self-signal that doesn't require content classification.
|
|
305
|
+
High-entropy tokens (UUIDs, hashes, etc.) are marked for preservation.
|
|
306
|
+
|
|
307
|
+
Args:
|
|
308
|
+
tokens: List of string tokens.
|
|
309
|
+
threshold: Entropy threshold (0.0-1.0). Higher = more selective.
|
|
310
|
+
min_token_length: Only check tokens this long or longer.
|
|
311
|
+
Short tokens rarely have meaningful entropy.
|
|
312
|
+
|
|
313
|
+
Returns:
|
|
314
|
+
StructureMask with high-entropy tokens marked for preservation.
|
|
315
|
+
|
|
316
|
+
Example:
|
|
317
|
+
>>> tokens = ["user", ":", " ", "8f14e45f-ceea-4123-8f14-e45fceea4123"]
|
|
318
|
+
>>> mask = compute_entropy_mask(tokens)
|
|
319
|
+
>>> mask.mask
|
|
320
|
+
[False, False, False, True] # UUID preserved
|
|
321
|
+
"""
|
|
322
|
+
mask = []
|
|
323
|
+
|
|
324
|
+
for token in tokens:
|
|
325
|
+
if isinstance(token, int):
|
|
326
|
+
# Token ID, can't compute entropy
|
|
327
|
+
mask.append(False)
|
|
328
|
+
continue
|
|
329
|
+
|
|
330
|
+
token_str = str(token)
|
|
331
|
+
|
|
332
|
+
# Skip short tokens
|
|
333
|
+
if len(token_str) < min_token_length:
|
|
334
|
+
mask.append(False)
|
|
335
|
+
continue
|
|
336
|
+
|
|
337
|
+
# Compute entropy
|
|
338
|
+
score = EntropyScore.compute(token_str, threshold)
|
|
339
|
+
mask.append(score.should_preserve)
|
|
340
|
+
|
|
341
|
+
return StructureMask(
|
|
342
|
+
tokens=tokens,
|
|
343
|
+
mask=mask,
|
|
344
|
+
metadata={"source": "entropy", "threshold": threshold},
|
|
345
|
+
)
|