headroom-ai 0.2.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. headroom/__init__.py +212 -0
  2. headroom/cache/__init__.py +76 -0
  3. headroom/cache/anthropic.py +517 -0
  4. headroom/cache/base.py +342 -0
  5. headroom/cache/compression_feedback.py +613 -0
  6. headroom/cache/compression_store.py +814 -0
  7. headroom/cache/dynamic_detector.py +1026 -0
  8. headroom/cache/google.py +884 -0
  9. headroom/cache/openai.py +584 -0
  10. headroom/cache/registry.py +175 -0
  11. headroom/cache/semantic.py +451 -0
  12. headroom/ccr/__init__.py +77 -0
  13. headroom/ccr/context_tracker.py +582 -0
  14. headroom/ccr/mcp_server.py +319 -0
  15. headroom/ccr/response_handler.py +772 -0
  16. headroom/ccr/tool_injection.py +415 -0
  17. headroom/cli.py +219 -0
  18. headroom/client.py +977 -0
  19. headroom/compression/__init__.py +42 -0
  20. headroom/compression/detector.py +424 -0
  21. headroom/compression/handlers/__init__.py +22 -0
  22. headroom/compression/handlers/base.py +219 -0
  23. headroom/compression/handlers/code_handler.py +506 -0
  24. headroom/compression/handlers/json_handler.py +418 -0
  25. headroom/compression/masks.py +345 -0
  26. headroom/compression/universal.py +465 -0
  27. headroom/config.py +474 -0
  28. headroom/exceptions.py +192 -0
  29. headroom/integrations/__init__.py +159 -0
  30. headroom/integrations/agno/__init__.py +53 -0
  31. headroom/integrations/agno/hooks.py +345 -0
  32. headroom/integrations/agno/model.py +625 -0
  33. headroom/integrations/agno/providers.py +154 -0
  34. headroom/integrations/langchain/__init__.py +106 -0
  35. headroom/integrations/langchain/agents.py +326 -0
  36. headroom/integrations/langchain/chat_model.py +1002 -0
  37. headroom/integrations/langchain/langsmith.py +324 -0
  38. headroom/integrations/langchain/memory.py +319 -0
  39. headroom/integrations/langchain/providers.py +200 -0
  40. headroom/integrations/langchain/retriever.py +371 -0
  41. headroom/integrations/langchain/streaming.py +341 -0
  42. headroom/integrations/mcp/__init__.py +37 -0
  43. headroom/integrations/mcp/server.py +533 -0
  44. headroom/memory/__init__.py +37 -0
  45. headroom/memory/extractor.py +390 -0
  46. headroom/memory/fast_store.py +621 -0
  47. headroom/memory/fast_wrapper.py +311 -0
  48. headroom/memory/inline_extractor.py +229 -0
  49. headroom/memory/store.py +434 -0
  50. headroom/memory/worker.py +260 -0
  51. headroom/memory/wrapper.py +321 -0
  52. headroom/models/__init__.py +39 -0
  53. headroom/models/registry.py +687 -0
  54. headroom/parser.py +293 -0
  55. headroom/pricing/__init__.py +51 -0
  56. headroom/pricing/anthropic_prices.py +81 -0
  57. headroom/pricing/litellm_pricing.py +113 -0
  58. headroom/pricing/openai_prices.py +91 -0
  59. headroom/pricing/registry.py +188 -0
  60. headroom/providers/__init__.py +61 -0
  61. headroom/providers/anthropic.py +621 -0
  62. headroom/providers/base.py +131 -0
  63. headroom/providers/cohere.py +362 -0
  64. headroom/providers/google.py +427 -0
  65. headroom/providers/litellm.py +297 -0
  66. headroom/providers/openai.py +566 -0
  67. headroom/providers/openai_compatible.py +521 -0
  68. headroom/proxy/__init__.py +19 -0
  69. headroom/proxy/server.py +2683 -0
  70. headroom/py.typed +0 -0
  71. headroom/relevance/__init__.py +124 -0
  72. headroom/relevance/base.py +106 -0
  73. headroom/relevance/bm25.py +255 -0
  74. headroom/relevance/embedding.py +255 -0
  75. headroom/relevance/hybrid.py +259 -0
  76. headroom/reporting/__init__.py +5 -0
  77. headroom/reporting/generator.py +549 -0
  78. headroom/storage/__init__.py +41 -0
  79. headroom/storage/base.py +125 -0
  80. headroom/storage/jsonl.py +220 -0
  81. headroom/storage/sqlite.py +289 -0
  82. headroom/telemetry/__init__.py +91 -0
  83. headroom/telemetry/collector.py +764 -0
  84. headroom/telemetry/models.py +880 -0
  85. headroom/telemetry/toin.py +1579 -0
  86. headroom/tokenizer.py +80 -0
  87. headroom/tokenizers/__init__.py +75 -0
  88. headroom/tokenizers/base.py +210 -0
  89. headroom/tokenizers/estimator.py +198 -0
  90. headroom/tokenizers/huggingface.py +317 -0
  91. headroom/tokenizers/mistral.py +245 -0
  92. headroom/tokenizers/registry.py +398 -0
  93. headroom/tokenizers/tiktoken_counter.py +248 -0
  94. headroom/transforms/__init__.py +106 -0
  95. headroom/transforms/base.py +57 -0
  96. headroom/transforms/cache_aligner.py +357 -0
  97. headroom/transforms/code_compressor.py +1313 -0
  98. headroom/transforms/content_detector.py +335 -0
  99. headroom/transforms/content_router.py +1158 -0
  100. headroom/transforms/llmlingua_compressor.py +638 -0
  101. headroom/transforms/log_compressor.py +529 -0
  102. headroom/transforms/pipeline.py +297 -0
  103. headroom/transforms/rolling_window.py +350 -0
  104. headroom/transforms/search_compressor.py +365 -0
  105. headroom/transforms/smart_crusher.py +2682 -0
  106. headroom/transforms/text_compressor.py +259 -0
  107. headroom/transforms/tool_crusher.py +338 -0
  108. headroom/utils.py +215 -0
  109. headroom_ai-0.2.13.dist-info/METADATA +315 -0
  110. headroom_ai-0.2.13.dist-info/RECORD +114 -0
  111. headroom_ai-0.2.13.dist-info/WHEEL +4 -0
  112. headroom_ai-0.2.13.dist-info/entry_points.txt +2 -0
  113. headroom_ai-0.2.13.dist-info/licenses/LICENSE +190 -0
  114. headroom_ai-0.2.13.dist-info/licenses/NOTICE +43 -0
@@ -0,0 +1,506 @@
1
+ """Code structure handler using AST parsing.
2
+
3
+ Extracts structural elements from source code:
4
+ - Import statements
5
+ - Function/method signatures
6
+ - Class definitions
7
+ - Type annotations
8
+ - Decorators
9
+
10
+ Function bodies are marked as compressible while preserving signatures.
11
+ This enables the LLM to see all available functions/methods while body
12
+ implementations are compressed.
13
+
14
+ Uses tree-sitter for parsing when available, falls back to regex patterns.
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ import logging
20
+ import re
21
+ import threading
22
+ from dataclasses import dataclass
23
+ from enum import Enum
24
+ from typing import Any
25
+
26
+ from headroom.compression.handlers.base import BaseStructureHandler, HandlerResult
27
+ from headroom.compression.masks import StructureMask
28
+
29
+ logger = logging.getLogger(__name__)
30
+
31
+ # Lazy-loaded tree-sitter
32
+ _tree_sitter_available: bool | None = None
33
+ _tree_sitter_parsers: dict[str, Any] = {}
34
+ _tree_sitter_lock = threading.Lock()
35
+
36
+
37
+ def _check_tree_sitter() -> bool:
38
+ """Check if tree-sitter is available."""
39
+ global _tree_sitter_available
40
+ if _tree_sitter_available is None:
41
+ try:
42
+ import tree_sitter_language_pack # noqa: F401
43
+
44
+ _tree_sitter_available = True
45
+ except ImportError:
46
+ _tree_sitter_available = False
47
+ return _tree_sitter_available
48
+
49
+
50
+ def _get_parser(language: str) -> Any:
51
+ """Get tree-sitter parser for language."""
52
+ global _tree_sitter_parsers
53
+
54
+ if not _check_tree_sitter():
55
+ raise ImportError("tree-sitter-language-pack not installed")
56
+
57
+ with _tree_sitter_lock:
58
+ if language not in _tree_sitter_parsers:
59
+ from tree_sitter_language_pack import get_parser
60
+
61
+ _tree_sitter_parsers[language] = get_parser(language) # type: ignore[arg-type]
62
+
63
+ return _tree_sitter_parsers[language]
64
+
65
+
66
+ class CodeLanguage(Enum):
67
+ """Supported programming languages."""
68
+
69
+ PYTHON = "python"
70
+ JAVASCRIPT = "javascript"
71
+ TYPESCRIPT = "typescript"
72
+ GO = "go"
73
+ RUST = "rust"
74
+ JAVA = "java"
75
+ C = "c"
76
+ CPP = "cpp"
77
+
78
+
79
+ @dataclass
80
+ class CodeSpan:
81
+ """A span of code with its structural role."""
82
+
83
+ start: int
84
+ end: int
85
+ role: str # "import", "signature", "body", "decorator", etc.
86
+ is_structural: bool
87
+
88
+
89
+ # Language-specific AST node types that are structural
90
+ _STRUCTURAL_NODE_TYPES: dict[str, set[str]] = {
91
+ "python": {
92
+ "import_statement",
93
+ "import_from_statement",
94
+ "function_definition", # Just the signature part
95
+ "class_definition",
96
+ "decorated_definition",
97
+ "type_alias_statement",
98
+ },
99
+ "javascript": {
100
+ "import_statement",
101
+ "export_statement",
102
+ "function_declaration",
103
+ "class_declaration",
104
+ "method_definition",
105
+ "arrow_function", # Signature only
106
+ },
107
+ "typescript": {
108
+ "import_statement",
109
+ "export_statement",
110
+ "function_declaration",
111
+ "class_declaration",
112
+ "method_definition",
113
+ "interface_declaration",
114
+ "type_alias_declaration",
115
+ },
116
+ "go": {
117
+ "import_declaration",
118
+ "function_declaration",
119
+ "method_declaration",
120
+ "type_declaration",
121
+ "interface_type",
122
+ },
123
+ "rust": {
124
+ "use_declaration",
125
+ "function_item",
126
+ "impl_item",
127
+ "struct_item",
128
+ "enum_item",
129
+ "trait_item",
130
+ },
131
+ "java": {
132
+ "import_declaration",
133
+ "class_declaration",
134
+ "method_declaration",
135
+ "interface_declaration",
136
+ "annotation",
137
+ },
138
+ }
139
+
140
+ # Regex patterns for fallback detection
141
+ _SIGNATURE_PATTERNS: dict[str, list[re.Pattern[str]]] = {
142
+ "python": [
143
+ re.compile(r"^\s*(async\s+)?def\s+\w+\s*\([^)]*\)\s*(->\s*[^:]+)?:", re.MULTILINE),
144
+ re.compile(r"^\s*class\s+\w+(\([^)]*\))?:", re.MULTILINE),
145
+ re.compile(r"^\s*@\w+(\([^)]*\))?\s*$", re.MULTILINE),
146
+ ],
147
+ "javascript": [
148
+ re.compile(r"^\s*(async\s+)?function\s+\w+\s*\([^)]*\)", re.MULTILINE),
149
+ re.compile(r"^\s*class\s+\w+(\s+extends\s+\w+)?", re.MULTILINE),
150
+ re.compile(r"^\s*(const|let|var)\s+\w+\s*=\s*(async\s+)?\([^)]*\)\s*=>", re.MULTILINE),
151
+ ],
152
+ "typescript": [
153
+ re.compile(r"^\s*(async\s+)?function\s+\w+\s*(<[^>]+>)?\s*\([^)]*\)", re.MULTILINE),
154
+ re.compile(r"^\s*class\s+\w+(<[^>]+>)?(\s+extends\s+\w+)?", re.MULTILINE),
155
+ re.compile(r"^\s*interface\s+\w+(<[^>]+>)?", re.MULTILINE),
156
+ re.compile(r"^\s*type\s+\w+(<[^>]+>)?\s*=", re.MULTILINE),
157
+ ],
158
+ "go": [
159
+ re.compile(r"^\s*func\s+(\([^)]+\)\s+)?\w+\s*\([^)]*\)", re.MULTILINE),
160
+ re.compile(r"^\s*type\s+\w+\s+(struct|interface)", re.MULTILINE),
161
+ ],
162
+ "rust": [
163
+ re.compile(r"^\s*(pub\s+)?(async\s+)?fn\s+\w+\s*(<[^>]+>)?\s*\([^)]*\)", re.MULTILINE),
164
+ re.compile(r"^\s*(pub\s+)?struct\s+\w+", re.MULTILINE),
165
+ re.compile(r"^\s*(pub\s+)?enum\s+\w+", re.MULTILINE),
166
+ re.compile(r"^\s*(pub\s+)?trait\s+\w+", re.MULTILINE),
167
+ re.compile(r"^\s*impl(<[^>]+>)?\s+\w+", re.MULTILINE),
168
+ ],
169
+ "java": [
170
+ re.compile(
171
+ r"^\s*(public|private|protected)?\s*(static\s+)?\w+\s+\w+\s*\([^)]*\)", re.MULTILINE
172
+ ),
173
+ re.compile(r"^\s*(public\s+)?(class|interface|enum)\s+\w+", re.MULTILINE),
174
+ re.compile(r"^\s*@\w+(\([^)]*\))?\s*$", re.MULTILINE),
175
+ ],
176
+ }
177
+
178
+ # Import patterns for fallback
179
+ _IMPORT_PATTERNS: dict[str, re.Pattern[str]] = {
180
+ "python": re.compile(r"^\s*(import\s+\w+|from\s+\w+\s+import)", re.MULTILINE),
181
+ "javascript": re.compile(r"^\s*(import\s+.*from|require\s*\()", re.MULTILINE),
182
+ "typescript": re.compile(r"^\s*(import\s+.*from|require\s*\()", re.MULTILINE),
183
+ "go": re.compile(r'^\s*import\s+(\(|")', re.MULTILINE),
184
+ "rust": re.compile(r"^\s*use\s+\w+", re.MULTILINE),
185
+ "java": re.compile(r"^\s*import\s+[\w.]+;", re.MULTILINE),
186
+ }
187
+
188
+
189
+ class CodeStructureHandler(BaseStructureHandler):
190
+ """Handler for source code.
191
+
192
+ Preserves:
193
+ - Import/use statements
194
+ - Function/method signatures (not bodies)
195
+ - Class/struct/interface definitions
196
+ - Type declarations
197
+ - Decorators/annotations
198
+
199
+ Marks as compressible:
200
+ - Function/method bodies
201
+ - Comments (optionally preserved)
202
+ - Whitespace
203
+
204
+ Example:
205
+ >>> handler = CodeStructureHandler()
206
+ >>> code = '''
207
+ ... def hello(name: str) -> str:
208
+ ... message = f"Hello, {name}!"
209
+ ... return message
210
+ ... '''
211
+ >>> result = handler.get_mask(code, language="python")
212
+ >>> # Signature "def hello(name: str) -> str:" preserved
213
+ >>> # Body content compressed
214
+ """
215
+
216
+ def __init__(
217
+ self,
218
+ preserve_comments: bool = False,
219
+ use_tree_sitter: bool = True,
220
+ default_language: str = "python",
221
+ ):
222
+ """Initialize the code handler.
223
+
224
+ Args:
225
+ preserve_comments: Whether to preserve comments as structural.
226
+ use_tree_sitter: Whether to use tree-sitter for parsing.
227
+ Falls back to regex if False or unavailable.
228
+ default_language: Default language when detection fails.
229
+ """
230
+ super().__init__(name="code")
231
+ self.preserve_comments = preserve_comments
232
+ self.use_tree_sitter = use_tree_sitter
233
+ self.default_language = default_language
234
+
235
+ def can_handle(self, content: str) -> bool:
236
+ """Check if content looks like source code."""
237
+ # Quick heuristic checks
238
+ code_indicators = [
239
+ "def ",
240
+ "class ",
241
+ "function ",
242
+ "import ",
243
+ "const ",
244
+ "let ",
245
+ "var ",
246
+ "func ",
247
+ "fn ",
248
+ "pub ",
249
+ "package ",
250
+ "struct ",
251
+ "interface ",
252
+ ]
253
+ return any(indicator in content for indicator in code_indicators)
254
+
255
+ def _extract_mask(
256
+ self,
257
+ content: str,
258
+ tokens: list[str],
259
+ language: str | None = None,
260
+ **kwargs: Any,
261
+ ) -> HandlerResult:
262
+ """Extract structure mask from code.
263
+
264
+ Args:
265
+ content: Source code content.
266
+ tokens: Character-level tokens.
267
+ language: Programming language (auto-detected if None).
268
+ **kwargs: Additional options.
269
+
270
+ Returns:
271
+ HandlerResult with mask marking structural elements.
272
+ """
273
+ # Detect language if not provided
274
+ if language is None:
275
+ language = self._detect_language(content)
276
+
277
+ # Try tree-sitter first
278
+ if self.use_tree_sitter and _check_tree_sitter():
279
+ try:
280
+ return self._extract_with_tree_sitter(content, tokens, language)
281
+ except Exception as e:
282
+ logger.debug("Tree-sitter parsing failed, using fallback: %s", e)
283
+
284
+ # Fallback to regex
285
+ return self._extract_with_regex(content, tokens, language)
286
+
287
+ def _extract_with_tree_sitter(
288
+ self,
289
+ content: str,
290
+ tokens: list[str],
291
+ language: str,
292
+ ) -> HandlerResult:
293
+ """Extract structure using tree-sitter AST.
294
+
295
+ Args:
296
+ content: Source code.
297
+ tokens: Character tokens.
298
+ language: Language name.
299
+
300
+ Returns:
301
+ HandlerResult with mask.
302
+ """
303
+ parser = _get_parser(language)
304
+ tree = parser.parse(content.encode("utf-8"))
305
+
306
+ # Collect structural spans
307
+ spans: list[CodeSpan] = []
308
+
309
+ def visit_node(node: Any, depth: int = 0) -> None:
310
+ """Visit AST node and collect structural spans."""
311
+ node_type = node.type
312
+ structural_types = _STRUCTURAL_NODE_TYPES.get(language, set())
313
+
314
+ # Check if this is a structural node type
315
+ if node_type in structural_types:
316
+ # For functions, only the signature is structural
317
+ if "function" in node_type or "method" in node_type:
318
+ # Find the body node and exclude it
319
+ body_node = None
320
+ for child in node.children:
321
+ if child.type in ("block", "statement_block", "compound_statement"):
322
+ body_node = child
323
+ break
324
+
325
+ if body_node:
326
+ # Signature is from start to body start
327
+ spans.append(
328
+ CodeSpan(
329
+ start=node.start_byte,
330
+ end=body_node.start_byte,
331
+ role="signature",
332
+ is_structural=True,
333
+ )
334
+ )
335
+ # Body is compressible
336
+ spans.append(
337
+ CodeSpan(
338
+ start=body_node.start_byte,
339
+ end=body_node.end_byte,
340
+ role="body",
341
+ is_structural=False,
342
+ )
343
+ )
344
+ else:
345
+ # No body found, preserve whole thing
346
+ spans.append(
347
+ CodeSpan(
348
+ start=node.start_byte,
349
+ end=node.end_byte,
350
+ role=node_type,
351
+ is_structural=True,
352
+ )
353
+ )
354
+ else:
355
+ # Non-function structural nodes
356
+ spans.append(
357
+ CodeSpan(
358
+ start=node.start_byte,
359
+ end=node.end_byte,
360
+ role=node_type,
361
+ is_structural=True,
362
+ )
363
+ )
364
+ elif node_type == "comment" and self.preserve_comments:
365
+ spans.append(
366
+ CodeSpan(
367
+ start=node.start_byte,
368
+ end=node.end_byte,
369
+ role="comment",
370
+ is_structural=True,
371
+ )
372
+ )
373
+
374
+ # Recurse into children
375
+ for child in node.children:
376
+ visit_node(child, depth + 1)
377
+
378
+ visit_node(tree.root_node)
379
+
380
+ # Build mask from spans
381
+ mask = self._spans_to_mask(spans, len(content))
382
+
383
+ return HandlerResult(
384
+ mask=StructureMask(tokens=tokens, mask=mask),
385
+ handler_name=self.name,
386
+ confidence=0.95,
387
+ metadata={
388
+ "language": language,
389
+ "parser": "tree-sitter",
390
+ "structural_spans": len([s for s in spans if s.is_structural]),
391
+ },
392
+ )
393
+
394
+ def _extract_with_regex(
395
+ self,
396
+ content: str,
397
+ tokens: list[str],
398
+ language: str,
399
+ ) -> HandlerResult:
400
+ """Extract structure using regex patterns (fallback).
401
+
402
+ Args:
403
+ content: Source code.
404
+ tokens: Character tokens.
405
+ language: Language name.
406
+
407
+ Returns:
408
+ HandlerResult with mask.
409
+ """
410
+ spans: list[CodeSpan] = []
411
+
412
+ # Match imports
413
+ import_pattern = _IMPORT_PATTERNS.get(language)
414
+ if import_pattern:
415
+ for match in import_pattern.finditer(content):
416
+ # Find end of import line
417
+ end = content.find("\n", match.end())
418
+ if end == -1:
419
+ end = len(content)
420
+ spans.append(
421
+ CodeSpan(
422
+ start=match.start(),
423
+ end=end,
424
+ role="import",
425
+ is_structural=True,
426
+ )
427
+ )
428
+
429
+ # Match signatures
430
+ signature_patterns = _SIGNATURE_PATTERNS.get(language, [])
431
+ for pattern in signature_patterns:
432
+ for match in pattern.finditer(content):
433
+ spans.append(
434
+ CodeSpan(
435
+ start=match.start(),
436
+ end=match.end(),
437
+ role="signature",
438
+ is_structural=True,
439
+ )
440
+ )
441
+
442
+ # Build mask from spans
443
+ mask = self._spans_to_mask(spans, len(content))
444
+
445
+ return HandlerResult(
446
+ mask=StructureMask(tokens=tokens, mask=mask),
447
+ handler_name=self.name,
448
+ confidence=0.7, # Lower confidence for regex
449
+ metadata={
450
+ "language": language,
451
+ "parser": "regex",
452
+ "structural_spans": len(spans),
453
+ },
454
+ )
455
+
456
+ def _spans_to_mask(self, spans: list[CodeSpan], length: int) -> list[bool]:
457
+ """Convert spans to character-level mask.
458
+
459
+ Args:
460
+ spans: List of code spans.
461
+ length: Total content length.
462
+
463
+ Returns:
464
+ Boolean mask aligned to characters.
465
+ """
466
+ mask = [False] * length
467
+
468
+ for span in spans:
469
+ if span.is_structural:
470
+ for i in range(span.start, min(span.end, length)):
471
+ mask[i] = True
472
+
473
+ return mask
474
+
475
+ def _detect_language(self, content: str) -> str:
476
+ """Detect programming language from content.
477
+
478
+ Args:
479
+ content: Source code content.
480
+
481
+ Returns:
482
+ Language name (lowercase).
483
+ """
484
+ # Check for language-specific markers
485
+ markers = {
486
+ "python": ["def ", "import ", "from ", "class ", "async def"],
487
+ "javascript": ["function ", "const ", "let ", "var ", "=>"],
488
+ "typescript": ["interface ", "type ", ": string", ": number"],
489
+ "go": ["func ", "package ", "import (", "type "],
490
+ "rust": ["fn ", "let mut", "impl ", "pub fn", "use "],
491
+ "java": ["public class", "private ", "protected ", "void "],
492
+ }
493
+
494
+ scores: dict[str, int] = {}
495
+ for lang, patterns in markers.items():
496
+ scores[lang] = sum(1 for p in patterns if p in content)
497
+
498
+ if not scores or max(scores.values()) == 0:
499
+ return self.default_language
500
+
501
+ return max(scores, key=lambda k: scores[k])
502
+
503
+
504
+ def is_tree_sitter_available() -> bool:
505
+ """Check if tree-sitter is available."""
506
+ return _check_tree_sitter()