PyPI - abstractcore - Versions diffs - 2.5.2__py3-none-any.whl → 2.6.0__py3-none-any.whl - Mend

abstractcore 2.5.2py3-none-any.whl → 2.6.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (66) hide show

abstractcore/__init__.py +19 -1
abstractcore/architectures/detection.py +252 -6
abstractcore/assets/architecture_formats.json +14 -1
abstractcore/assets/model_capabilities.json +533 -10
abstractcore/compression/__init__.py +29 -0
abstractcore/compression/analytics.py +420 -0
abstractcore/compression/cache.py +250 -0
abstractcore/compression/config.py +279 -0
abstractcore/compression/exceptions.py +30 -0
abstractcore/compression/glyph_processor.py +381 -0
abstractcore/compression/optimizer.py +388 -0
abstractcore/compression/orchestrator.py +380 -0
abstractcore/compression/pil_text_renderer.py +818 -0
abstractcore/compression/quality.py +226 -0
abstractcore/compression/text_formatter.py +666 -0
abstractcore/compression/vision_compressor.py +371 -0
abstractcore/config/main.py +64 -0
abstractcore/config/manager.py +100 -5
abstractcore/core/retry.py +2 -2
abstractcore/core/session.py +193 -7
abstractcore/download.py +253 -0
abstractcore/embeddings/manager.py +2 -2
abstractcore/events/__init__.py +113 -2
abstractcore/exceptions/__init__.py +49 -2
abstractcore/media/auto_handler.py +312 -18
abstractcore/media/handlers/local_handler.py +14 -2
abstractcore/media/handlers/openai_handler.py +62 -3
abstractcore/media/processors/__init__.py +11 -1
abstractcore/media/processors/direct_pdf_processor.py +210 -0
abstractcore/media/processors/glyph_pdf_processor.py +227 -0
abstractcore/media/processors/image_processor.py +7 -1
abstractcore/media/processors/office_processor.py +2 -2
abstractcore/media/processors/text_processor.py +18 -3
abstractcore/media/types.py +164 -7
abstractcore/media/utils/image_scaler.py +2 -2
abstractcore/media/vision_fallback.py +2 -2
abstractcore/providers/__init__.py +18 -0
abstractcore/providers/anthropic_provider.py +228 -8
abstractcore/providers/base.py +378 -11
abstractcore/providers/huggingface_provider.py +563 -23
abstractcore/providers/lmstudio_provider.py +284 -4
abstractcore/providers/mlx_provider.py +27 -2
abstractcore/providers/model_capabilities.py +352 -0
abstractcore/providers/ollama_provider.py +282 -6
abstractcore/providers/openai_provider.py +286 -8
abstractcore/providers/registry.py +85 -13
abstractcore/providers/streaming.py +2 -2
abstractcore/server/app.py +91 -81
abstractcore/tools/common_tools.py +2 -2
abstractcore/tools/handler.py +2 -2
abstractcore/tools/parser.py +2 -2
abstractcore/tools/registry.py +2 -2
abstractcore/tools/syntax_rewriter.py +2 -2
abstractcore/tools/tag_rewriter.py +3 -3
abstractcore/utils/__init__.py +4 -1
abstractcore/utils/self_fixes.py +2 -2
abstractcore/utils/trace_export.py +287 -0
abstractcore/utils/version.py +1 -1
abstractcore/utils/vlm_token_calculator.py +655 -0
{abstractcore-2.5.2.dist-info → abstractcore-2.6.0.dist-info}/METADATA +207 -8
abstractcore-2.6.0.dist-info/RECORD +108 -0
abstractcore-2.5.2.dist-info/RECORD +0 -90
{abstractcore-2.5.2.dist-info → abstractcore-2.6.0.dist-info}/WHEEL +0 -0
{abstractcore-2.5.2.dist-info → abstractcore-2.6.0.dist-info}/entry_points.txt +0 -0
{abstractcore-2.5.2.dist-info → abstractcore-2.6.0.dist-info}/licenses/LICENSE +0 -0
{abstractcore-2.5.2.dist-info → abstractcore-2.6.0.dist-info}/top_level.txt +0 -0

abstractcore/exceptions/__init__.py CHANGED Viewed

@@ -106,10 +106,55 @@ def format_model_error(provider: str, invalid_model: str, available_models: list
     return message.rstrip()
+def format_auth_error(provider: str, reason: str = None) -> str:
+    """
+    Format actionable authentication error with setup instructions.
+    Args:
+        provider: Provider name (e.g., "openai", "anthropic")
+        reason: Optional reason for auth failure
+    Returns:
+        Formatted error message with fix instructions
+    """
+    urls = {
+        "openai": "https://platform.openai.com/api-keys",
+        "anthropic": "https://console.anthropic.com/settings/keys",
+    }
+    msg = f"{provider.upper()} authentication failed"
+    if reason:
+        msg += f": {reason}"
+    msg += f"\nFix: abstractcore --set-api-key {provider} YOUR_KEY"
+    if provider.lower() in urls:
+        msg += f"\nGet key: {urls[provider.lower()]}"
+    return msg
+def format_provider_error(provider: str, reason: str) -> str:
+    """
+    Format actionable provider unavailability error with setup instructions.
+    Args:
+        provider: Provider name (e.g., "ollama", "lmstudio")
+        reason: Reason for unavailability (e.g., "Connection refused")
+    Returns:
+        Formatted error message with setup instructions
+    """
+    instructions = {
+        "ollama": "Install: https://ollama.com/download\nStart: ollama serve",
+        "lmstudio": "Install: https://lmstudio.ai/\nEnable API in settings",
+    }
+    msg = f"Provider '{provider}' unavailable: {reason}"
+    if provider.lower() in instructions:
+        msg += f"\n{instructions[provider.lower()]}"
+    return msg
 # Export all exceptions for easy importing
 __all__ = [
     'AbstractCoreError',
-    'ProviderError',
+    'ProviderError',
     'ProviderAPIError',
     'AuthenticationError',
     'Authentication',  # Backward compatibility alias
@@ -121,5 +166,7 @@ __all__ = [
     'SessionError',
     'ConfigurationError',
     'ModelNotFoundError',
-    'format_model_error'
+    'format_model_error',
+    'format_auth_error',
+    'format_provider_error'
 ]

abstractcore/media/auto_handler.py CHANGED Viewed

@@ -13,6 +13,25 @@ from typing import Dict, Any, Optional, List
 from .base import BaseMediaHandler
 from .types import MediaContent, MediaType, ContentFormat, detect_media_type
 from .processors import ImageProcessor, TextProcessor, PDFProcessor, OfficeProcessor
+from ..exceptions import UnsupportedFeatureError
+# Import Glyph compression support
+try:
+    from ..compression.orchestrator import CompressionOrchestrator
+    from ..compression.config import GlyphConfig
+    GLYPH_AVAILABLE = True
+except ImportError:
+    CompressionOrchestrator = None
+    GlyphConfig = None
+    GLYPH_AVAILABLE = False
+# Import vision detection
+try:
+    from ..architectures.detection import supports_vision
+    VISION_DETECTION_AVAILABLE = True
+except ImportError:
+    supports_vision = None
+    VISION_DETECTION_AVAILABLE = False
 class AutoMediaHandler(BaseMediaHandler):
@@ -41,6 +60,11 @@ class AutoMediaHandler(BaseMediaHandler):
         self._text_processor = None
         self._pdf_processor = None
         self._office_processor = None
+        # Initialize Glyph compression support
+        self._compression_orchestrator = None
+        self.glyph_config = kwargs.get('glyph_config')
+        self.enable_compression = kwargs.get('enable_glyph_compression', GLYPH_AVAILABLE)
         # Track which processors are available
         self._available_processors = self._check_processor_availability()
@@ -74,6 +98,20 @@ class AutoMediaHandler(BaseMediaHandler):
             availability['office'] = True
         except ImportError:
             availability['office'] = False
+        # GlyphProcessor (requires reportlab and pdf2image)
+        glyph_deps_available = True
+        if GLYPH_AVAILABLE and self.enable_compression:
+            # Check actual dependencies
+            try:
+                import reportlab
+                import pdf2image
+            except ImportError:
+                glyph_deps_available = False
+        else:
+            glyph_deps_available = False
+        availability['glyph'] = glyph_deps_available
         return availability
@@ -100,6 +138,13 @@ class AutoMediaHandler(BaseMediaHandler):
         if self._office_processor is None:
             self._office_processor = OfficeProcessor(**self.processor_config)
         return self._office_processor
+    def _get_compression_orchestrator(self) -> 'CompressionOrchestrator':
+        """Get or create CompressionOrchestrator instance."""
+        if self._compression_orchestrator is None and GLYPH_AVAILABLE:
+            config = self.glyph_config or GlyphConfig.from_abstractcore_config()
+            self._compression_orchestrator = CompressionOrchestrator(config)
+        return self._compression_orchestrator
     def _select_processor(self, file_path: Path, media_type: MediaType) -> Optional[BaseMediaHandler]:
         """
@@ -167,6 +212,20 @@ class AutoMediaHandler(BaseMediaHandler):
         Returns:
             MediaContent object with processed content
         """
+        # Check if Glyph compression should be applied
+        provider = kwargs.get('provider')
+        model = kwargs.get('model')
+        glyph_compression = kwargs.get('glyph_compression', 'auto')
+        if self._should_apply_compression(file_path, media_type, provider, model, glyph_compression):
+            try:
+                # Remove provider and model from kwargs to avoid duplicate arguments
+                compression_kwargs = {k: v for k, v in kwargs.items() if k not in ['provider', 'model']}
+                return self._apply_compression(file_path, provider, model, **compression_kwargs)
+            except Exception as e:
+                self.logger.warning(f"Glyph compression failed, falling back to standard processing: {e}")
+                # Continue with standard processing
         # Select the appropriate processor
         processor = self._select_processor(file_path, media_type)
@@ -218,6 +277,221 @@ class AutoMediaHandler(BaseMediaHandler):
             fallback_processing=True,
             available_processors=list(self._available_processors.keys())
         )
+    def _should_apply_compression(self, file_path: Path, media_type: MediaType,
+                                provider: str, model: str, glyph_compression: str) -> bool:
+        """
+        Check if Glyph compression should be applied.
+        ⚠️ EXPERIMENTAL FEATURE: Glyph compression requires vision-capable models.
+        Raises:
+            UnsupportedFeatureError: When glyph_compression="always" but model lacks vision support
+        """
+        # Check if Glyph is available
+        if not self._available_processors.get('glyph', False):
+            if glyph_compression == "always":
+                # User explicitly requested compression but it's not available
+                self._log_compression_unavailable_warning()
+            return False
+        if glyph_compression == "never":
+            return False
+        # Check vision support for compression
+        model_supports_vision = self._check_vision_support(model)
+        if glyph_compression == "always":
+            # Explicit compression request - enforce vision requirement
+            if not model_supports_vision:
+                raise UnsupportedFeatureError(
+                    f"Glyph compression requires a vision-capable model. "
+                    f"Model '{model}' does not support vision. "
+                    f"Vision-capable models include: gpt-4o, gpt-4o-mini, claude-3-5-sonnet, "
+                    f"llama3.2-vision, qwen2-vl, gemini-1.5-pro, gemini-1.5-flash, etc."
+                )
+            return True
+        # Auto-decision logic
+        if not provider or not model:
+            return False
+        # Only compress text-based content
+        if media_type not in [MediaType.TEXT, MediaType.DOCUMENT]:
+            return False
+        # Auto mode: check vision support and warn if not supported
+        if not model_supports_vision:
+            self.logger.warning(
+                f"Glyph compression skipped: model '{model}' does not support vision. "
+                f"Use a vision-capable model to enable compression."
+            )
+            return False
+        try:
+            orchestrator = self._get_compression_orchestrator()
+            if orchestrator:
+                return orchestrator.should_compress(file_path, provider, model, glyph_compression)
+        except Exception as e:
+            self.logger.debug(f"Compression decision failed: {e}")
+        return False
+    def _check_vision_support(self, model: str) -> bool:
+        """
+        Check if the model supports vision capabilities.
+        Args:
+            model: Model name to check
+        Returns:
+            True if model supports vision, False otherwise
+        """
+        if not model or not VISION_DETECTION_AVAILABLE:
+            # Conservative approach: assume no vision if detection unavailable
+            return False
+        try:
+            return supports_vision(model)
+        except Exception as e:
+            self.logger.debug(f"Failed to check vision support for model '{model}': {e}")
+            return False
+    def _log_compression_unavailable_warning(self):
+        """Log detailed warning about why Glyph compression is unavailable."""
+        self.logger.warning("Glyph compression requested but not available")
+        # Check specific reasons
+        if not GLYPH_AVAILABLE:
+            self.logger.warning("Glyph compression modules could not be imported")
+        # Check dependencies
+        missing_deps = []
+        try:
+            import reportlab
+        except ImportError:
+            missing_deps.append("reportlab")
+        try:
+            import pdf2image
+        except ImportError:
+            missing_deps.append("pdf2image")
+        if missing_deps:
+            deps_str = ", ".join(missing_deps)
+            self.logger.warning(f"Missing Glyph dependencies: {deps_str}")
+            self.logger.warning(f"Install with: pip install {' '.join(missing_deps)}")
+        if not self.enable_compression:
+            self.logger.warning("Glyph compression is disabled in AutoMediaHandler configuration")
+    def _apply_compression(self, file_path: Path, provider: str, model: str, **kwargs) -> MediaContent:
+        """Apply Glyph compression to the file."""
+        media_type = detect_media_type(file_path)
+        # For PDF files, use direct PDF-to-image conversion (no text extraction!)
+        if media_type == MediaType.DOCUMENT and file_path.suffix.lower() == '.pdf':
+            try:
+                from .processors.direct_pdf_processor import DirectPDFProcessor
+                # Configure for optimal compression (2 pages per image)
+                direct_processor = DirectPDFProcessor(
+                    pages_per_image=2,  # 16 pages → 8 images
+                    dpi=150,  # Good quality for VLM processing
+                    layout='horizontal',  # Side-by-side like open book
+                    gap=20,  # Small gap between pages
+                    **kwargs
+                )
+                # Get all combined images
+                combined_images = direct_processor.get_combined_image_paths(file_path)
+                # Get session info for metadata from DirectPDFProcessor
+                from ..config import get_config_manager
+                import hashlib
+                config_manager = get_config_manager()
+                glyph_cache_base = Path(config_manager.config.cache.glyph_cache_dir).expanduser()
+                pdf_hash = hashlib.md5(str(file_path).encode()).hexdigest()[:8]
+                session_id = f"pdf_{pdf_hash}_{len(combined_images)}pages"
+                # Create MediaContent objects for each combined image
+                media_contents = []
+                for i, img_path in enumerate(combined_images):
+                    with open(img_path, 'rb') as f:
+                        image_data = f.read()
+                    import base64
+                    encoded_data = base64.b64encode(image_data).decode('utf-8')
+                    media_content = MediaContent(
+                        media_type=MediaType.IMAGE,
+                        content=encoded_data,
+                        content_format=ContentFormat.BASE64,
+                        mime_type="image/png",
+                        metadata={
+                            'compression_used': True,
+                            'compression_method': 'direct_pdf_conversion',
+                            'pages_per_image': 2,
+                            'image_index': i,
+                            'total_images': len(combined_images),
+                            'original_file': str(file_path),
+                            'glyph_session_id': session_id,
+                            'glyph_cache_dir': str(glyph_cache_base / session_id),
+                            'processing_method': 'direct_pdf_conversion'  # For compatibility with test script
+                        }
+                    )
+                    media_contents.append(media_content)
+                self.logger.info(f"Direct PDF conversion: {len(combined_images)} combined images created")
+                # Return first image (in full implementation, would handle multiple)
+                if media_contents:
+                    return media_contents[0]
+                else:
+                    raise Exception("No combined images created")
+            except Exception as e:
+                self.logger.warning(f"DirectPDFProcessor failed: {e}, falling back to text extraction")
+                # Fall back to text extraction method
+                pass
+        # Fallback: text extraction method (for non-PDF or if direct method fails)
+        orchestrator = self._get_compression_orchestrator()
+        if not orchestrator:
+            raise Exception("Compression orchestrator not available")
+        if media_type == MediaType.DOCUMENT and file_path.suffix.lower() == '.pdf':
+            processor = self._get_pdf_processor()
+        elif media_type == MediaType.DOCUMENT:
+            processor = self._get_office_processor()
+        else:
+            processor = self._get_text_processor()
+        # Extract text content
+        extracted_content = processor._process_internal(file_path, media_type, **kwargs)
+        text_content = extracted_content.content
+        # Compress the extracted text content
+        glyph_compression = kwargs.get('glyph_compression', 'auto')
+        compressed_content = orchestrator.compress_content(text_content, provider, model, glyph_compression)
+        if compressed_content and len(compressed_content) > 0:
+            # Return first compressed image as primary content
+            # Additional images can be accessed through metadata
+            primary_content = compressed_content[0]
+            # Add information about additional images
+            if len(compressed_content) > 1:
+                primary_content.metadata['additional_images'] = len(compressed_content) - 1
+                primary_content.metadata['total_compressed_images'] = len(compressed_content)
+            # Add compression metadata
+            primary_content.metadata['compression_used'] = True
+            primary_content.metadata['original_file'] = str(file_path)
+            return primary_content
+        else:
+            raise Exception("No compressed content generated")
     def supports_media_type(self, media_type: MediaType) -> bool:
         """
@@ -259,9 +533,9 @@ class AutoMediaHandler(BaseMediaHandler):
             return format_ext.lower() in image_formats
         elif media_type == MediaType.TEXT:
-            # Text formats (always available)
-            text_formats = {'txt', 'md', 'csv', 'tsv', 'json', 'yaml', 'yml'}
-            return format_ext.lower() in text_formats
+            # TextProcessor can handle ANY text file through its plain text fallback
+            # This is always available and supports all text-based files
+            return True
         elif media_type == MediaType.DOCUMENT:
             # PDF support
@@ -272,9 +546,9 @@ class AutoMediaHandler(BaseMediaHandler):
             if format_ext.lower() in {'docx', 'xlsx', 'pptx'}:
                 return self._available_processors.get('office', False) or True  # Fallback to text
-            # Text document support (always available)
-            text_formats = {'txt', 'md', 'csv', 'tsv', 'json', 'yaml', 'yml'}
-            return format_ext.lower() in text_formats
+            # Any other document type can be handled by text processor as fallback
+            # This allows processing of unknown document formats
+            return True
         return False
@@ -282,27 +556,47 @@ class AutoMediaHandler(BaseMediaHandler):
         """
         Get supported formats organized by media type.
+        Returns comprehensive list of all supported file extensions.
+        Note: TEXT type supports ANY text-based file through content detection
+        and fallback processing, not just the listed extensions.
         Returns:
             Dictionary mapping media type to list of supported extensions
+        Example:
+            >>> handler = AutoMediaHandler()
+            >>> formats = handler.get_supported_formats()
+            >>> len(formats['text'])  # 70+ text extensions
+            70+
+            >>> 'r' in formats['text']  # R scripts supported
+            True
         """
-        formats = {}
+        from .types import get_all_supported_extensions
-        # Image formats
-        if self._available_processors.get('image', False):
-            formats['image'] = ['jpg', 'jpeg', 'png', 'gif', 'bmp', 'tiff', 'webp']
+        # Get comprehensive list from FILE_TYPE_MAPPINGS
+        all_formats = get_all_supported_extensions()
-        # Document formats
-        doc_formats = ['txt', 'md', 'csv', 'tsv', 'json', 'yaml', 'yml']
+        # Filter based on available processors
+        result = {}
-        if self._available_processors.get('pdf', False):
-            doc_formats.append('pdf')
+        # Image formats (requires PIL)
+        if self._available_processors.get('image', False):
+            result['image'] = all_formats.get('image', [])
-        if self._available_processors.get('office', False):
-            doc_formats.extend(['docx', 'xlsx', 'pptx'])
+        # Text formats (always available - TextProcessor has built-in fallback)
+        # Note: This includes 70+ extensions + unknown text files via content detection
+        result['text'] = all_formats.get('text', [])
+        # Document formats (includes PDFs, Office docs, and text fallbacks)
+        result['document'] = all_formats.get('document', [])
-        formats['document'] = doc_formats
+        # Audio/Video (not yet implemented but listed for completeness)
+        if 'audio' in all_formats:
+            result['audio'] = all_formats['audio']
+        if 'video' in all_formats:
+            result['video'] = all_formats['video']
-        return formats
+        return result
     def get_processor_info(self) -> Dict[str, Any]:
         """

abstractcore/media/handlers/local_handler.py CHANGED Viewed

@@ -412,12 +412,24 @@ class LocalMediaHandler(BaseProviderMediaHandler):
             if media_content.media_type == MediaType.IMAGE and self.can_handle_media(media_content):
                 if media_content.content_format == ContentFormat.BASE64:
                     data_url = f"data:{media_content.mime_type};base64,{media_content.content}"
-                    content.append({
+                    image_obj = {
                         "type": "image_url",
                         "image_url": {
                             "url": data_url
                         }
-                    })
+                    }
+                    # Add detail level if specified in metadata (for Qwen models)
+                    detail_level = media_content.metadata.get('detail_level', 'auto')
+                    self.logger.debug(f"MediaContent metadata: {media_content.metadata}")
+                    self.logger.debug(f"Found detail_level: {detail_level}")
+                    if detail_level in ['low', 'high', 'auto']:
+                        image_obj["image_url"]["detail"] = detail_level
+                        self.logger.info(f"Setting detail level to '{detail_level}' for LMStudio image")
+                    else:
+                        self.logger.warning(f"Invalid detail level '{detail_level}', skipping")
+                    content.append(image_obj)
                 else:
                     self.logger.warning(f"LMStudio requires base64 image format, got {media_content.content_format}")

abstractcore/media/handlers/openai_handler.py CHANGED Viewed

@@ -30,6 +30,9 @@ class OpenAIMediaHandler(BaseProviderMediaHandler):
         """
         super().__init__("openai", model_capabilities, **kwargs)
+        # Store model name for Qwen-specific optimizations
+        self.model_name = kwargs.get('model_name', '')
         # OpenAI-specific configuration
         self.max_image_size = kwargs.get('max_image_size', 20 * 1024 * 1024)  # 20MB
         self.supported_image_detail = kwargs.get('supported_image_detail', ['auto', 'low', 'high'])
@@ -118,11 +121,64 @@ class OpenAIMediaHandler(BaseProviderMediaHandler):
         # Add detail level if supported by model
         if self.model_capabilities.get('vision_support'):
             detail_level = media_content.metadata.get('detail_level', 'auto')
+            self.logger.debug(f"OpenAI Handler - MediaContent metadata: {media_content.metadata}")
+            self.logger.debug(f"OpenAI Handler - Found detail_level: {detail_level}")
+            # Auto-adjust detail level for Qwen models to prevent context overflow
+            if self._is_qwen_model() and detail_level == 'auto':
+                detail_level = self._get_optimal_detail_for_qwen(media_content)
+                self.logger.debug(f"OpenAI Handler - Qwen auto-adjusted detail_level: {detail_level}")
             if detail_level in self.supported_image_detail:
                 image_obj["image_url"]["detail"] = detail_level
+                self.logger.info(f"OpenAI Handler - Setting detail level to '{detail_level}' for image")
+            else:
+                self.logger.warning(f"OpenAI Handler - Invalid detail level '{detail_level}', supported: {self.supported_image_detail}")
         return image_obj
+    def _is_qwen_model(self) -> bool:
+        """Check if the current model is a Qwen vision model."""
+        if not hasattr(self, 'model_name') or not self.model_name:
+            return False
+        model_name_lower = self.model_name.lower()
+        return any(qwen_variant in model_name_lower for qwen_variant in [
+            'qwen3-vl', 'qwen2.5-vl', 'qwen-vl', 'qwen/qwen3-vl', 'qwen/qwen2.5-vl'
+        ])
+    def _get_optimal_detail_for_qwen(self, media_content: MediaContent) -> str:
+        """
+        Determine optimal detail level for Qwen models based on context constraints.
+        According to SiliconFlow documentation:
+        - detail=low: 256 tokens per image (448x448 resize)
+        - detail=high: Variable tokens based on resolution (can be 24,576+ tokens)
+        For Qwen3-VL-30B with 131,072 token context limit, we should use detail=low
+        when processing multiple images to avoid context overflow.
+        """
+        # Get model context limit
+        max_tokens = self.model_capabilities.get('max_tokens', 32768)
+        max_image_tokens = self.model_capabilities.get('max_image_tokens', 24576)
+        # Estimate how many images we might be processing
+        # This is a heuristic - in practice we'd need the full batch context
+        estimated_images = getattr(self, '_estimated_image_count', 1)
+        # Calculate potential token usage with high detail
+        high_detail_tokens = estimated_images * max_image_tokens
+        # Use low detail if high detail would consume >60% of context
+        context_threshold = max_tokens * 0.6
+        if high_detail_tokens > context_threshold:
+            self.logger.info(f"Using detail=low for Qwen model: {estimated_images} images would consume "
+                           f"{high_detail_tokens:,} tokens (>{context_threshold:,} threshold)")
+            return 'low'
+        else:
+            return 'high'
     def _format_text_for_openai(self, media_content: MediaContent) -> Dict[str, Any]:
         """
         Format text/document content for OpenAI API.
@@ -226,12 +282,15 @@ class OpenAIMediaHandler(BaseProviderMediaHandler):
             Estimated token count
         """
         if media_content.media_type == MediaType.IMAGE:
-            # OpenAI image token estimation
-            # Base cost varies by detail level and image size
+            # Image token estimation varies by model
             detail_level = media_content.metadata.get('detail_level', 'auto')
             if detail_level == 'low':
-                return 85  # Low detail images use 85 tokens
+                # Qwen models use 256 tokens for low detail, OpenAI uses 85
+                if self._is_qwen_model():
+                    return 256  # Qwen low detail token count
+                else:
+                    return 85   # OpenAI low detail token count
             else:
                 # High detail calculation based on image dimensions
                 width = media_content.metadata.get('final_size', [512, 512])[0]

abstractcore/media/processors/__init__.py CHANGED Viewed

@@ -10,4 +10,14 @@ from .text_processor import TextProcessor
 from .pdf_processor import PDFProcessor
 from .office_processor import OfficeProcessor
-__all__ = ['ImageProcessor', 'TextProcessor', 'PDFProcessor', 'OfficeProcessor']
+# Import Glyph processor if available
+try:
+    from ...compression.glyph_processor import GlyphProcessor
+    GLYPH_AVAILABLE = True
+except ImportError:
+    GlyphProcessor = None
+    GLYPH_AVAILABLE = False
+__all__ = ['ImageProcessor', 'TextProcessor', 'PDFProcessor', 'OfficeProcessor']
+if GLYPH_AVAILABLE:
+    __all__.append('GlyphProcessor')

abstractcore 2.5.2__py3-none-any.whl → 2.6.0__py3-none-any.whl

abstractcore 2.5.2py3-none-any.whl → 2.6.0py3-none-any.whl