PyPI - headroom-ai - Versions diffs - 0.2.13__py3-none-any.whl - Mend

headroom-ai 0.2.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (114) hide show

headroom/__init__.py +212 -0
headroom/cache/__init__.py +76 -0
headroom/cache/anthropic.py +517 -0
headroom/cache/base.py +342 -0
headroom/cache/compression_feedback.py +613 -0
headroom/cache/compression_store.py +814 -0
headroom/cache/dynamic_detector.py +1026 -0
headroom/cache/google.py +884 -0
headroom/cache/openai.py +584 -0
headroom/cache/registry.py +175 -0
headroom/cache/semantic.py +451 -0
headroom/ccr/__init__.py +77 -0
headroom/ccr/context_tracker.py +582 -0
headroom/ccr/mcp_server.py +319 -0
headroom/ccr/response_handler.py +772 -0
headroom/ccr/tool_injection.py +415 -0
headroom/cli.py +219 -0
headroom/client.py +977 -0
headroom/compression/__init__.py +42 -0
headroom/compression/detector.py +424 -0
headroom/compression/handlers/__init__.py +22 -0
headroom/compression/handlers/base.py +219 -0
headroom/compression/handlers/code_handler.py +506 -0
headroom/compression/handlers/json_handler.py +418 -0
headroom/compression/masks.py +345 -0
headroom/compression/universal.py +465 -0
headroom/config.py +474 -0
headroom/exceptions.py +192 -0
headroom/integrations/__init__.py +159 -0
headroom/integrations/agno/__init__.py +53 -0
headroom/integrations/agno/hooks.py +345 -0
headroom/integrations/agno/model.py +625 -0
headroom/integrations/agno/providers.py +154 -0
headroom/integrations/langchain/__init__.py +106 -0
headroom/integrations/langchain/agents.py +326 -0
headroom/integrations/langchain/chat_model.py +1002 -0
headroom/integrations/langchain/langsmith.py +324 -0
headroom/integrations/langchain/memory.py +319 -0
headroom/integrations/langchain/providers.py +200 -0
headroom/integrations/langchain/retriever.py +371 -0
headroom/integrations/langchain/streaming.py +341 -0
headroom/integrations/mcp/__init__.py +37 -0
headroom/integrations/mcp/server.py +533 -0
headroom/memory/__init__.py +37 -0
headroom/memory/extractor.py +390 -0
headroom/memory/fast_store.py +621 -0
headroom/memory/fast_wrapper.py +311 -0
headroom/memory/inline_extractor.py +229 -0
headroom/memory/store.py +434 -0
headroom/memory/worker.py +260 -0
headroom/memory/wrapper.py +321 -0
headroom/models/__init__.py +39 -0
headroom/models/registry.py +687 -0
headroom/parser.py +293 -0
headroom/pricing/__init__.py +51 -0
headroom/pricing/anthropic_prices.py +81 -0
headroom/pricing/litellm_pricing.py +113 -0
headroom/pricing/openai_prices.py +91 -0
headroom/pricing/registry.py +188 -0
headroom/providers/__init__.py +61 -0
headroom/providers/anthropic.py +621 -0
headroom/providers/base.py +131 -0
headroom/providers/cohere.py +362 -0
headroom/providers/google.py +427 -0
headroom/providers/litellm.py +297 -0
headroom/providers/openai.py +566 -0
headroom/providers/openai_compatible.py +521 -0
headroom/proxy/__init__.py +19 -0
headroom/proxy/server.py +2683 -0
headroom/py.typed +0 -0
headroom/relevance/__init__.py +124 -0
headroom/relevance/base.py +106 -0
headroom/relevance/bm25.py +255 -0
headroom/relevance/embedding.py +255 -0
headroom/relevance/hybrid.py +259 -0
headroom/reporting/__init__.py +5 -0
headroom/reporting/generator.py +549 -0
headroom/storage/__init__.py +41 -0
headroom/storage/base.py +125 -0
headroom/storage/jsonl.py +220 -0
headroom/storage/sqlite.py +289 -0
headroom/telemetry/__init__.py +91 -0
headroom/telemetry/collector.py +764 -0
headroom/telemetry/models.py +880 -0
headroom/telemetry/toin.py +1579 -0
headroom/tokenizer.py +80 -0
headroom/tokenizers/__init__.py +75 -0
headroom/tokenizers/base.py +210 -0
headroom/tokenizers/estimator.py +198 -0
headroom/tokenizers/huggingface.py +317 -0
headroom/tokenizers/mistral.py +245 -0
headroom/tokenizers/registry.py +398 -0
headroom/tokenizers/tiktoken_counter.py +248 -0
headroom/transforms/__init__.py +106 -0
headroom/transforms/base.py +57 -0
headroom/transforms/cache_aligner.py +357 -0
headroom/transforms/code_compressor.py +1313 -0
headroom/transforms/content_detector.py +335 -0
headroom/transforms/content_router.py +1158 -0
headroom/transforms/llmlingua_compressor.py +638 -0
headroom/transforms/log_compressor.py +529 -0
headroom/transforms/pipeline.py +297 -0
headroom/transforms/rolling_window.py +350 -0
headroom/transforms/search_compressor.py +365 -0
headroom/transforms/smart_crusher.py +2682 -0
headroom/transforms/text_compressor.py +259 -0
headroom/transforms/tool_crusher.py +338 -0
headroom/utils.py +215 -0
headroom_ai-0.2.13.dist-info/METADATA +315 -0
headroom_ai-0.2.13.dist-info/RECORD +114 -0
headroom_ai-0.2.13.dist-info/WHEEL +4 -0
headroom_ai-0.2.13.dist-info/entry_points.txt +2 -0
headroom_ai-0.2.13.dist-info/licenses/LICENSE +190 -0
headroom_ai-0.2.13.dist-info/licenses/NOTICE +43 -0

headroom/compression/universal.py ADDED Viewed

@@ -0,0 +1,465 @@
+"""Universal compressor with ML-based detection and structure preservation.
+This is the main entry point for compression. It:
+1. Detects content type using Magika (ML)
+2. Extracts structure using appropriate handler
+3. Compresses non-structural content with LLMLingua
+4. Optionally stores original in CCR for retrieval
+Usage:
+    compressor = UniversalCompressor()
+    result = compressor.compress(content)
+    # Result contains:
+    # - compressed: The compressed content
+    # - compression_ratio: original_tokens / compressed_tokens
+    # - content_type: Detected content type
+    # - preservation_ratio: Fraction of content preserved as structure
+"""
+from __future__ import annotations
+import logging
+from collections.abc import Callable
+from dataclasses import dataclass, field
+from typing import Any
+from headroom.compression.detector import (
+    ContentType,
+    DetectionResult,
+    FallbackDetector,
+    get_detector,
+)
+from headroom.compression.handlers.base import (
+    NoOpHandler,
+    StructureHandler,
+)
+from headroom.compression.handlers.code_handler import CodeStructureHandler
+from headroom.compression.handlers.json_handler import JSONStructureHandler
+from headroom.compression.masks import (
+    StructureMask,
+    compute_entropy_mask,
+    mask_to_spans,
+)
+logger = logging.getLogger(__name__)
+@dataclass
+class UniversalCompressorConfig:
+    """Configuration for UniversalCompressor.
+    Attributes:
+        use_magika: Use ML-based detection (requires magika package).
+        use_llmlingua: Use LLMLingua for content compression.
+        use_entropy_preservation: Preserve high-entropy tokens (UUIDs, etc.).
+        entropy_threshold: Threshold for entropy-based preservation.
+        min_content_length: Minimum content length to compress.
+        compression_ratio_target: Target compression ratio (0.0-1.0).
+        ccr_enabled: Store originals in CCR for retrieval.
+    """
+    use_magika: bool = True
+    use_llmlingua: bool = True
+    use_entropy_preservation: bool = True
+    entropy_threshold: float = 0.85
+    min_content_length: int = 100
+    compression_ratio_target: float = 0.3  # Target 70% reduction
+    ccr_enabled: bool = True
+@dataclass
+class CompressionResult:
+    """Result from compression.
+    Attributes:
+        compressed: The compressed content.
+        original: The original content (for reference).
+        compression_ratio: compressed_length / original_length.
+        tokens_before: Estimated token count before compression.
+        tokens_after: Estimated token count after compression.
+        content_type: Detected content type.
+        detection_confidence: Confidence of content type detection.
+        handler_used: Name of structure handler used.
+        preservation_ratio: Fraction of content marked as structural.
+        ccr_key: CCR storage key (if CCR enabled).
+        metadata: Additional metadata.
+    """
+    compressed: str
+    original: str
+    compression_ratio: float
+    tokens_before: int
+    tokens_after: int
+    content_type: ContentType
+    detection_confidence: float
+    handler_used: str
+    preservation_ratio: float
+    ccr_key: str | None = None
+    metadata: dict = field(default_factory=dict)
+    @property
+    def tokens_saved(self) -> int:
+        """Number of tokens saved."""
+        return max(0, self.tokens_before - self.tokens_after)
+    @property
+    def savings_percentage(self) -> float:
+        """Percentage of tokens saved."""
+        if self.tokens_before == 0:
+            return 0.0
+        return (self.tokens_saved / self.tokens_before) * 100
+class UniversalCompressor:
+    """Universal compressor with ML detection and structure preservation.
+    This compressor automatically:
+    1. Detects content type (JSON, code, logs, text) using ML
+    2. Extracts structure (keys, signatures, templates)
+    3. Preserves structure while compressing content
+    4. Stores original for CCR retrieval
+    Example:
+        >>> compressor = UniversalCompressor()
+        >>> result = compressor.compress('{"users": [{"id": 1, "name": "Alice"}]}')
+        >>> print(result.content_type)  # ContentType.JSON
+        >>> print(result.compressed)     # Structure preserved, values compressed
+    """
+    def __init__(
+        self,
+        config: UniversalCompressorConfig | None = None,
+        handlers: dict[ContentType, StructureHandler] | None = None,
+        compress_fn: Callable[[str], str] | None = None,
+    ):
+        """Initialize the compressor.
+        Args:
+            config: Compression configuration.
+            handlers: Custom handlers for content types.
+            compress_fn: Custom compression function. If None, uses
+                LLMLingua when available, else simple truncation.
+        """
+        self.config = config or UniversalCompressorConfig()
+        # Initialize detector
+        if self.config.use_magika:
+            self._detector = get_detector(prefer_magika=True)
+        else:
+            self._detector = FallbackDetector()
+        # Initialize handlers
+        self._handlers: dict[ContentType, StructureHandler] = handlers or {
+            ContentType.JSON: JSONStructureHandler(),
+            ContentType.CODE: CodeStructureHandler(),
+        }
+        self._noop_handler = NoOpHandler()
+        # Initialize compression function
+        self._compress_fn = compress_fn or self._get_default_compress_fn()
+        # CCR store (lazy initialized)
+        self._ccr_store: Any | None = None
+    def _get_default_compress_fn(self) -> Callable[[str], str]:
+        """Get default compression function.
+        Returns LLMLingua wrapper if available, else simple truncation.
+        """
+        if self.config.use_llmlingua:
+            try:
+                return self._llmlingua_compress
+            except ImportError:
+                logger.info("LLMLingua not available, using simple compression")
+        return self._simple_compress
+    def _llmlingua_compress(self, text: str) -> str:
+        """Compress using LLMLingua.
+        Args:
+            text: Text to compress.
+        Returns:
+            Compressed text.
+        """
+        try:
+            from headroom.transforms.llmlingua_compressor import compress_with_llmlingua
+            return compress_with_llmlingua(
+                text,
+                compression_rate=self.config.compression_ratio_target,
+            )
+        except ImportError:
+            return self._simple_compress(text)
+        except Exception as e:
+            logger.warning("LLMLingua compression failed: %s", e)
+            return self._simple_compress(text)
+    def _simple_compress(self, text: str) -> str:
+        """Simple compression fallback (truncation with indicator).
+        Args:
+            text: Text to compress.
+        Returns:
+            Truncated text with indicator.
+        """
+        target_len = int(len(text) * self.config.compression_ratio_target)
+        if len(text) <= target_len:
+            return text
+        # Keep first and last portions
+        keep_start = target_len * 2 // 3
+        keep_end = target_len // 3
+        return text[:keep_start] + "\n...[compressed]...\n" + text[-keep_end:]
+    def compress(
+        self,
+        content: str,
+        content_type: ContentType | None = None,
+        **kwargs: Any,
+    ) -> CompressionResult:
+        """Compress content with structure preservation.
+        Args:
+            content: Content to compress.
+            content_type: Override content type detection.
+            **kwargs: Handler-specific options.
+        Returns:
+            CompressionResult with compressed content and metadata.
+        """
+        # Handle empty/short content
+        if not content or len(content) < self.config.min_content_length:
+            return CompressionResult(
+                compressed=content,
+                original=content,
+                compression_ratio=1.0,
+                tokens_before=self._estimate_tokens(content),
+                tokens_after=self._estimate_tokens(content),
+                content_type=ContentType.UNKNOWN,
+                detection_confidence=0.0,
+                handler_used="none",
+                preservation_ratio=1.0,
+                metadata={"skipped": "content too short"},
+            )
+        # Detect content type
+        if content_type is None:
+            detection = self._detector.detect(content)
+        else:
+            detection = DetectionResult(
+                content_type=content_type,
+                confidence=1.0,
+                raw_label="override",
+            )
+        # Get handler for content type
+        handler = self._handlers.get(detection.content_type, self._noop_handler)
+        # Tokenize content (character-level for masks)
+        tokens = list(content)
+        # Get structure mask from handler
+        handler_result = handler.get_mask(content, tokens, **kwargs)
+        structure_mask = handler_result.mask
+        # Optionally add entropy-based preservation
+        if self.config.use_entropy_preservation:
+            entropy_mask = compute_entropy_mask(
+                tokens,
+                threshold=self.config.entropy_threshold,
+            )
+            # Union: preserve if either mask says preserve
+            structure_mask = structure_mask.union(entropy_mask)
+        # Apply compression to non-structural parts
+        compressed = self._compress_with_mask(content, structure_mask)
+        # Estimate tokens
+        tokens_before = self._estimate_tokens(content)
+        tokens_after = self._estimate_tokens(compressed)
+        # Store in CCR if enabled
+        ccr_key = None
+        if self.config.ccr_enabled:
+            ccr_key = self._store_in_ccr(content, compressed)
+        return CompressionResult(
+            compressed=compressed,
+            original=content,
+            compression_ratio=len(compressed) / len(content) if content else 1.0,
+            tokens_before=tokens_before,
+            tokens_after=tokens_after,
+            content_type=detection.content_type,
+            detection_confidence=detection.confidence,
+            handler_used=handler_result.handler_name,
+            preservation_ratio=structure_mask.preservation_ratio,
+            ccr_key=ccr_key,
+            metadata={
+                "detection": {
+                    "raw_label": detection.raw_label,
+                    "language": detection.language,
+                },
+                "handler": handler_result.metadata,
+            },
+        )
+    def _compress_with_mask(self, content: str, mask: StructureMask) -> str:
+        """Apply compression respecting structure mask.
+        Args:
+            content: Original content.
+            mask: Structure mask.
+        Returns:
+            Compressed content with structure preserved.
+        """
+        spans = mask_to_spans(mask)
+        result_parts: list[str] = []
+        for span in spans:
+            span_content = content[span.start : span.end]
+            if span.is_structural:
+                # Preserve structural content
+                result_parts.append(span_content)
+            else:
+                # Compress non-structural content
+                if len(span_content) > 50:  # Only compress if substantial
+                    compressed = self._compress_fn(span_content)
+                    result_parts.append(compressed)
+                else:
+                    result_parts.append(span_content)
+        return "".join(result_parts)
+    def _estimate_tokens(self, text: str) -> int:
+        """Estimate token count.
+        Uses simple heuristic: ~4 characters per token.
+        Args:
+            text: Text to estimate.
+        Returns:
+            Estimated token count.
+        """
+        if not text:
+            return 0
+        # Simple estimation: ~4 chars per token on average
+        return len(text) // 4
+    def _store_in_ccr(self, original: str, compressed: str) -> str | None:
+        """Store original in CCR for retrieval.
+        Args:
+            original: Original content.
+            compressed: Compressed content.
+        Returns:
+            CCR key if stored, None otherwise.
+        """
+        try:
+            if self._ccr_store is None:
+                from headroom.cache.compression_store import CompressionStore
+                self._ccr_store = CompressionStore()
+            key = self._ccr_store.store(
+                original,
+                compressed,
+                original_tokens=self._estimate_tokens(original),
+                compressed_tokens=self._estimate_tokens(compressed),
+            )
+            return key
+        except ImportError:
+            logger.debug("CCR store not available")
+            return None
+        except Exception as e:
+            logger.warning("Failed to store in CCR: %s", e)
+            return None
+    def compress_batch(
+        self,
+        contents: list[str],
+        **kwargs: Any,
+    ) -> list[CompressionResult]:
+        """Compress multiple contents.
+        More efficient than calling compress() in a loop for
+        ML detection.
+        Args:
+            contents: List of contents to compress.
+            **kwargs: Handler-specific options.
+        Returns:
+            List of CompressionResults.
+        """
+        if not contents:
+            return []
+        # Batch detection
+        if hasattr(self._detector, "detect_batch"):
+            detections = self._detector.detect_batch(contents)
+        else:
+            detections = [self._detector.detect(c) for c in contents]
+        # Compress each with detected type
+        results = []
+        for content, detection in zip(contents, detections):
+            result = self.compress(
+                content,
+                content_type=detection.content_type,
+                **kwargs,
+            )
+            results.append(result)
+        return results
+    def get_handler(self, content_type: ContentType) -> StructureHandler:
+        """Get handler for content type.
+        Args:
+            content_type: Content type.
+        Returns:
+            Handler for the content type.
+        """
+        return self._handlers.get(content_type, self._noop_handler)
+    def register_handler(
+        self,
+        content_type: ContentType,
+        handler: StructureHandler,
+    ) -> None:
+        """Register a custom handler for a content type.
+        Args:
+            content_type: Content type to handle.
+            handler: Handler instance.
+        """
+        self._handlers[content_type] = handler
+def compress(content: str, **kwargs: Any) -> CompressionResult:
+    """Convenience function for one-off compression.
+    Args:
+        content: Content to compress.
+        **kwargs: Passed to UniversalCompressor.compress().
+    Returns:
+        CompressionResult.
+    Example:
+        >>> from headroom.compression import compress
+        >>> result = compress('{"users": [{"id": 1}, {"id": 2}]}')
+        >>> print(result.compressed)
+    """
+    compressor = UniversalCompressor()
+    return compressor.compress(content, **kwargs)