headroom-ai 0.2.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. headroom/__init__.py +212 -0
  2. headroom/cache/__init__.py +76 -0
  3. headroom/cache/anthropic.py +517 -0
  4. headroom/cache/base.py +342 -0
  5. headroom/cache/compression_feedback.py +613 -0
  6. headroom/cache/compression_store.py +814 -0
  7. headroom/cache/dynamic_detector.py +1026 -0
  8. headroom/cache/google.py +884 -0
  9. headroom/cache/openai.py +584 -0
  10. headroom/cache/registry.py +175 -0
  11. headroom/cache/semantic.py +451 -0
  12. headroom/ccr/__init__.py +77 -0
  13. headroom/ccr/context_tracker.py +582 -0
  14. headroom/ccr/mcp_server.py +319 -0
  15. headroom/ccr/response_handler.py +772 -0
  16. headroom/ccr/tool_injection.py +415 -0
  17. headroom/cli.py +219 -0
  18. headroom/client.py +977 -0
  19. headroom/compression/__init__.py +42 -0
  20. headroom/compression/detector.py +424 -0
  21. headroom/compression/handlers/__init__.py +22 -0
  22. headroom/compression/handlers/base.py +219 -0
  23. headroom/compression/handlers/code_handler.py +506 -0
  24. headroom/compression/handlers/json_handler.py +418 -0
  25. headroom/compression/masks.py +345 -0
  26. headroom/compression/universal.py +465 -0
  27. headroom/config.py +474 -0
  28. headroom/exceptions.py +192 -0
  29. headroom/integrations/__init__.py +159 -0
  30. headroom/integrations/agno/__init__.py +53 -0
  31. headroom/integrations/agno/hooks.py +345 -0
  32. headroom/integrations/agno/model.py +625 -0
  33. headroom/integrations/agno/providers.py +154 -0
  34. headroom/integrations/langchain/__init__.py +106 -0
  35. headroom/integrations/langchain/agents.py +326 -0
  36. headroom/integrations/langchain/chat_model.py +1002 -0
  37. headroom/integrations/langchain/langsmith.py +324 -0
  38. headroom/integrations/langchain/memory.py +319 -0
  39. headroom/integrations/langchain/providers.py +200 -0
  40. headroom/integrations/langchain/retriever.py +371 -0
  41. headroom/integrations/langchain/streaming.py +341 -0
  42. headroom/integrations/mcp/__init__.py +37 -0
  43. headroom/integrations/mcp/server.py +533 -0
  44. headroom/memory/__init__.py +37 -0
  45. headroom/memory/extractor.py +390 -0
  46. headroom/memory/fast_store.py +621 -0
  47. headroom/memory/fast_wrapper.py +311 -0
  48. headroom/memory/inline_extractor.py +229 -0
  49. headroom/memory/store.py +434 -0
  50. headroom/memory/worker.py +260 -0
  51. headroom/memory/wrapper.py +321 -0
  52. headroom/models/__init__.py +39 -0
  53. headroom/models/registry.py +687 -0
  54. headroom/parser.py +293 -0
  55. headroom/pricing/__init__.py +51 -0
  56. headroom/pricing/anthropic_prices.py +81 -0
  57. headroom/pricing/litellm_pricing.py +113 -0
  58. headroom/pricing/openai_prices.py +91 -0
  59. headroom/pricing/registry.py +188 -0
  60. headroom/providers/__init__.py +61 -0
  61. headroom/providers/anthropic.py +621 -0
  62. headroom/providers/base.py +131 -0
  63. headroom/providers/cohere.py +362 -0
  64. headroom/providers/google.py +427 -0
  65. headroom/providers/litellm.py +297 -0
  66. headroom/providers/openai.py +566 -0
  67. headroom/providers/openai_compatible.py +521 -0
  68. headroom/proxy/__init__.py +19 -0
  69. headroom/proxy/server.py +2683 -0
  70. headroom/py.typed +0 -0
  71. headroom/relevance/__init__.py +124 -0
  72. headroom/relevance/base.py +106 -0
  73. headroom/relevance/bm25.py +255 -0
  74. headroom/relevance/embedding.py +255 -0
  75. headroom/relevance/hybrid.py +259 -0
  76. headroom/reporting/__init__.py +5 -0
  77. headroom/reporting/generator.py +549 -0
  78. headroom/storage/__init__.py +41 -0
  79. headroom/storage/base.py +125 -0
  80. headroom/storage/jsonl.py +220 -0
  81. headroom/storage/sqlite.py +289 -0
  82. headroom/telemetry/__init__.py +91 -0
  83. headroom/telemetry/collector.py +764 -0
  84. headroom/telemetry/models.py +880 -0
  85. headroom/telemetry/toin.py +1579 -0
  86. headroom/tokenizer.py +80 -0
  87. headroom/tokenizers/__init__.py +75 -0
  88. headroom/tokenizers/base.py +210 -0
  89. headroom/tokenizers/estimator.py +198 -0
  90. headroom/tokenizers/huggingface.py +317 -0
  91. headroom/tokenizers/mistral.py +245 -0
  92. headroom/tokenizers/registry.py +398 -0
  93. headroom/tokenizers/tiktoken_counter.py +248 -0
  94. headroom/transforms/__init__.py +106 -0
  95. headroom/transforms/base.py +57 -0
  96. headroom/transforms/cache_aligner.py +357 -0
  97. headroom/transforms/code_compressor.py +1313 -0
  98. headroom/transforms/content_detector.py +335 -0
  99. headroom/transforms/content_router.py +1158 -0
  100. headroom/transforms/llmlingua_compressor.py +638 -0
  101. headroom/transforms/log_compressor.py +529 -0
  102. headroom/transforms/pipeline.py +297 -0
  103. headroom/transforms/rolling_window.py +350 -0
  104. headroom/transforms/search_compressor.py +365 -0
  105. headroom/transforms/smart_crusher.py +2682 -0
  106. headroom/transforms/text_compressor.py +259 -0
  107. headroom/transforms/tool_crusher.py +338 -0
  108. headroom/utils.py +215 -0
  109. headroom_ai-0.2.13.dist-info/METADATA +315 -0
  110. headroom_ai-0.2.13.dist-info/RECORD +114 -0
  111. headroom_ai-0.2.13.dist-info/WHEEL +4 -0
  112. headroom_ai-0.2.13.dist-info/entry_points.txt +2 -0
  113. headroom_ai-0.2.13.dist-info/licenses/LICENSE +190 -0
  114. headroom_ai-0.2.13.dist-info/licenses/NOTICE +43 -0
@@ -0,0 +1,365 @@
1
+ """Search results compressor for grep/ripgrep output.
2
+
3
+ This module compresses search results (grep, ripgrep, ag) which are one of
4
+ the most common outputs in coding tasks. Typical compression: 5-10x.
5
+
6
+ Input Format (grep -n style):
7
+ src/utils.py:42:def process_data(items):
8
+ src/utils.py:43: \"\"\"Process items with validation.\"\"\"
9
+ src/models.py:15:class DataProcessor:
10
+
11
+ Compression Strategy:
12
+ 1. Parse into {file: [(line, content), ...]} structure
13
+ 2. Group by file
14
+ 3. For each file: keep first match, last match, context-relevant matches
15
+ 4. Deduplicate near-identical lines
16
+ 5. Add summary: [... and N more matches in file.py]
17
+
18
+ Integrates with CCR for reversible compression.
19
+ """
20
+
21
+ from __future__ import annotations
22
+
23
+ import re
24
+ from dataclasses import dataclass, field
25
+
26
+
27
+ @dataclass
28
+ class SearchMatch:
29
+ """A single search match."""
30
+
31
+ file: str
32
+ line_number: int
33
+ content: str
34
+ score: float = 0.0 # Relevance score
35
+
36
+
37
+ @dataclass
38
+ class FileMatches:
39
+ """All matches in a single file."""
40
+
41
+ file: str
42
+ matches: list[SearchMatch] = field(default_factory=list)
43
+
44
+ @property
45
+ def first(self) -> SearchMatch | None:
46
+ return self.matches[0] if self.matches else None
47
+
48
+ @property
49
+ def last(self) -> SearchMatch | None:
50
+ return self.matches[-1] if self.matches else None
51
+
52
+
53
+ @dataclass
54
+ class SearchCompressorConfig:
55
+ """Configuration for search result compression."""
56
+
57
+ # Per-file limits
58
+ max_matches_per_file: int = 5
59
+ always_keep_first: bool = True
60
+ always_keep_last: bool = True
61
+
62
+ # Global limits
63
+ max_total_matches: int = 30
64
+ max_files: int = 15
65
+
66
+ # Context matching
67
+ context_keywords: list[str] = field(default_factory=list)
68
+ boost_errors: bool = True
69
+
70
+ # CCR integration
71
+ enable_ccr: bool = True
72
+ min_matches_for_ccr: int = 10
73
+
74
+
75
+ class SearchCompressor:
76
+ """Compresses grep/ripgrep search results.
77
+
78
+ Example:
79
+ >>> compressor = SearchCompressor()
80
+ >>> result = compressor.compress(search_output, context="find error handlers")
81
+ >>> print(result.compressed) # Reduced output with summary
82
+ """
83
+
84
+ # Pattern to parse grep-style output: file:line:content
85
+ _GREP_PATTERN = re.compile(r"^([^:]+):(\d+):(.*)$")
86
+
87
+ # Pattern for ripgrep with context (file-line-content or file:line:content)
88
+ _RG_CONTEXT_PATTERN = re.compile(r"^([^:-]+)[:-](\d+)[:-](.*)$")
89
+
90
+ # Error/important patterns to prioritize
91
+ _PRIORITY_PATTERNS = [
92
+ re.compile(r"\b(error|exception|fail|fatal)\b", re.IGNORECASE),
93
+ re.compile(r"\b(warn|warning)\b", re.IGNORECASE),
94
+ re.compile(r"\b(todo|fixme|hack|xxx)\b", re.IGNORECASE),
95
+ ]
96
+
97
+ def __init__(self, config: SearchCompressorConfig | None = None):
98
+ """Initialize search compressor.
99
+
100
+ Args:
101
+ config: Compression configuration.
102
+ """
103
+ self.config = config or SearchCompressorConfig()
104
+
105
+ def compress(
106
+ self,
107
+ content: str,
108
+ context: str = "",
109
+ ) -> SearchCompressionResult:
110
+ """Compress search results.
111
+
112
+ Args:
113
+ content: Raw grep/ripgrep output.
114
+ context: User query context for relevance scoring.
115
+
116
+ Returns:
117
+ SearchCompressionResult with compressed output and metadata.
118
+ """
119
+ # Parse search results
120
+ file_matches = self._parse_search_results(content)
121
+
122
+ if not file_matches:
123
+ return SearchCompressionResult(
124
+ compressed=content,
125
+ original=content,
126
+ original_match_count=0,
127
+ compressed_match_count=0,
128
+ files_affected=0,
129
+ compression_ratio=1.0,
130
+ )
131
+
132
+ # Count original matches
133
+ original_count = sum(len(fm.matches) for fm in file_matches.values())
134
+
135
+ # Score matches by relevance
136
+ self._score_matches(file_matches, context)
137
+
138
+ # Select top matches per file
139
+ selected = self._select_matches(file_matches)
140
+
141
+ # Format compressed output
142
+ compressed, summaries = self._format_output(selected, file_matches)
143
+
144
+ # Count compressed matches
145
+ compressed_count = sum(len(fm.matches) for fm in selected.values())
146
+
147
+ # Calculate compression ratio
148
+ ratio = len(compressed) / max(len(content), 1)
149
+
150
+ # Store in CCR if significant compression
151
+ cache_key = None
152
+ if (
153
+ self.config.enable_ccr
154
+ and original_count >= self.config.min_matches_for_ccr
155
+ and ratio < 0.8
156
+ ):
157
+ cache_key = self._store_in_ccr(content, compressed, original_count)
158
+ if cache_key:
159
+ compressed += f"\n[{original_count} matches compressed. hash={cache_key}]"
160
+
161
+ return SearchCompressionResult(
162
+ compressed=compressed,
163
+ original=content,
164
+ original_match_count=original_count,
165
+ compressed_match_count=compressed_count,
166
+ files_affected=len(file_matches),
167
+ compression_ratio=ratio,
168
+ cache_key=cache_key,
169
+ summaries=summaries,
170
+ )
171
+
172
+ def _parse_search_results(self, content: str) -> dict[str, FileMatches]:
173
+ """Parse grep-style output into structured data."""
174
+ file_matches: dict[str, FileMatches] = {}
175
+
176
+ for line in content.split("\n"):
177
+ line = line.strip()
178
+ if not line:
179
+ continue
180
+
181
+ # Try grep pattern first
182
+ match = self._GREP_PATTERN.match(line)
183
+ if not match:
184
+ match = self._RG_CONTEXT_PATTERN.match(line)
185
+
186
+ if match:
187
+ file_path, line_num, match_content = match.groups()
188
+
189
+ if file_path not in file_matches:
190
+ file_matches[file_path] = FileMatches(file=file_path)
191
+
192
+ file_matches[file_path].matches.append(
193
+ SearchMatch(
194
+ file=file_path,
195
+ line_number=int(line_num),
196
+ content=match_content,
197
+ )
198
+ )
199
+
200
+ return file_matches
201
+
202
+ def _score_matches(
203
+ self,
204
+ file_matches: dict[str, FileMatches],
205
+ context: str,
206
+ ) -> None:
207
+ """Score matches by relevance to context."""
208
+ context_lower = context.lower()
209
+ context_words = set(context_lower.split())
210
+
211
+ for fm in file_matches.values():
212
+ for match in fm.matches:
213
+ score = 0.0
214
+ content_lower = match.content.lower()
215
+
216
+ # Score by context word overlap
217
+ for word in context_words:
218
+ if len(word) > 2 and word in content_lower:
219
+ score += 0.3
220
+
221
+ # Boost error/warning patterns
222
+ if self.config.boost_errors:
223
+ for i, pattern in enumerate(self._PRIORITY_PATTERNS):
224
+ if pattern.search(match.content):
225
+ score += 0.5 - (i * 0.1) # Higher boost for errors
226
+
227
+ # Boost for keyword matches
228
+ for keyword in self.config.context_keywords:
229
+ if keyword.lower() in content_lower:
230
+ score += 0.4
231
+
232
+ match.score = min(1.0, score)
233
+
234
+ def _select_matches(
235
+ self,
236
+ file_matches: dict[str, FileMatches],
237
+ ) -> dict[str, FileMatches]:
238
+ """Select top matches per file and globally."""
239
+ selected: dict[str, FileMatches] = {}
240
+
241
+ # Sort files by total match score (highest first)
242
+ sorted_files = sorted(
243
+ file_matches.items(),
244
+ key=lambda x: sum(m.score for m in x[1].matches),
245
+ reverse=True,
246
+ )
247
+
248
+ # Limit number of files
249
+ sorted_files = sorted_files[: self.config.max_files]
250
+
251
+ total_selected = 0
252
+ for file_path, fm in sorted_files:
253
+ if total_selected >= self.config.max_total_matches:
254
+ break
255
+
256
+ # Sort matches by score
257
+ sorted_matches = sorted(fm.matches, key=lambda m: m.score, reverse=True)
258
+
259
+ # Select matches for this file
260
+ file_selected: list[SearchMatch] = []
261
+ remaining_slots = min(
262
+ self.config.max_matches_per_file,
263
+ self.config.max_total_matches - total_selected,
264
+ )
265
+
266
+ # Always include first and last if configured
267
+ if self.config.always_keep_first and fm.first:
268
+ file_selected.append(fm.first)
269
+ remaining_slots -= 1
270
+
271
+ if (
272
+ self.config.always_keep_last
273
+ and fm.last
274
+ and fm.last != fm.first
275
+ and remaining_slots > 0
276
+ ):
277
+ file_selected.append(fm.last)
278
+ remaining_slots -= 1
279
+
280
+ # Fill remaining slots with highest-scoring matches
281
+ for match in sorted_matches:
282
+ if remaining_slots <= 0:
283
+ break
284
+ if match not in file_selected:
285
+ file_selected.append(match)
286
+ remaining_slots -= 1
287
+
288
+ # Sort by line number for output
289
+ file_selected.sort(key=lambda m: m.line_number)
290
+
291
+ selected[file_path] = FileMatches(file=file_path, matches=file_selected)
292
+ total_selected += len(file_selected)
293
+
294
+ return selected
295
+
296
+ def _format_output(
297
+ self,
298
+ selected: dict[str, FileMatches],
299
+ original: dict[str, FileMatches],
300
+ ) -> tuple[str, dict[str, str]]:
301
+ """Format selected matches back to grep-style output."""
302
+ lines: list[str] = []
303
+ summaries: dict[str, str] = {}
304
+
305
+ for file_path, fm in sorted(selected.items()):
306
+ for match in fm.matches:
307
+ lines.append(f"{match.file}:{match.line_number}:{match.content}")
308
+
309
+ # Add summary if matches were omitted
310
+ original_fm = original.get(file_path)
311
+ if original_fm and len(original_fm.matches) > len(fm.matches):
312
+ omitted = len(original_fm.matches) - len(fm.matches)
313
+ summary = f"[... and {omitted} more matches in {file_path}]"
314
+ lines.append(summary)
315
+ summaries[file_path] = summary
316
+
317
+ return "\n".join(lines), summaries
318
+
319
+ def _store_in_ccr(
320
+ self,
321
+ original: str,
322
+ compressed: str,
323
+ original_count: int,
324
+ ) -> str | None:
325
+ """Store original in CCR for later retrieval."""
326
+ try:
327
+ from ..cache.compression_store import get_compression_store
328
+
329
+ store = get_compression_store()
330
+ return store.store(
331
+ original,
332
+ compressed,
333
+ original_item_count=original_count,
334
+ )
335
+ except ImportError:
336
+ # CCR not available
337
+ return None
338
+ except Exception:
339
+ # Silently fail CCR storage
340
+ return None
341
+
342
+
343
+ @dataclass
344
+ class SearchCompressionResult:
345
+ """Result of search result compression."""
346
+
347
+ compressed: str
348
+ original: str
349
+ original_match_count: int
350
+ compressed_match_count: int
351
+ files_affected: int
352
+ compression_ratio: float
353
+ cache_key: str | None = None
354
+ summaries: dict[str, str] = field(default_factory=dict)
355
+
356
+ @property
357
+ def tokens_saved_estimate(self) -> int:
358
+ """Estimate tokens saved (rough: 1 token per 4 chars)."""
359
+ chars_saved = len(self.original) - len(self.compressed)
360
+ return max(0, chars_saved // 4)
361
+
362
+ @property
363
+ def matches_omitted(self) -> int:
364
+ """Number of matches omitted."""
365
+ return self.original_match_count - self.compressed_match_count