PyPI - abstractcore - Versions diffs - 2.5.2__py3-none-any.whl → 2.5.3__py3-none-any.whl - Mend

abstractcore 2.5.2py3-none-any.whl → 2.5.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

abstractcore/__init__.py +12 -0
abstractcore/architectures/detection.py +250 -4
abstractcore/assets/architecture_formats.json +14 -1
abstractcore/assets/model_capabilities.json +533 -10
abstractcore/compression/__init__.py +29 -0
abstractcore/compression/analytics.py +420 -0
abstractcore/compression/cache.py +250 -0
abstractcore/compression/config.py +279 -0
abstractcore/compression/exceptions.py +30 -0
abstractcore/compression/glyph_processor.py +381 -0
abstractcore/compression/optimizer.py +388 -0
abstractcore/compression/orchestrator.py +380 -0
abstractcore/compression/pil_text_renderer.py +818 -0
abstractcore/compression/quality.py +226 -0
abstractcore/compression/text_formatter.py +666 -0
abstractcore/compression/vision_compressor.py +371 -0
abstractcore/config/main.py +64 -0
abstractcore/config/manager.py +100 -5
abstractcore/core/session.py +61 -6
abstractcore/events/__init__.py +1 -1
abstractcore/media/auto_handler.py +312 -18
abstractcore/media/handlers/local_handler.py +14 -2
abstractcore/media/handlers/openai_handler.py +62 -3
abstractcore/media/processors/__init__.py +11 -1
abstractcore/media/processors/direct_pdf_processor.py +210 -0
abstractcore/media/processors/glyph_pdf_processor.py +227 -0
abstractcore/media/processors/image_processor.py +7 -1
abstractcore/media/processors/text_processor.py +18 -3
abstractcore/media/types.py +164 -7
abstractcore/providers/__init__.py +18 -0
abstractcore/providers/anthropic_provider.py +28 -2
abstractcore/providers/base.py +278 -6
abstractcore/providers/huggingface_provider.py +563 -23
abstractcore/providers/lmstudio_provider.py +38 -2
abstractcore/providers/mlx_provider.py +27 -2
abstractcore/providers/model_capabilities.py +352 -0
abstractcore/providers/ollama_provider.py +38 -4
abstractcore/providers/openai_provider.py +28 -2
abstractcore/providers/registry.py +85 -13
abstractcore/server/app.py +91 -81
abstractcore/utils/__init__.py +4 -1
abstractcore/utils/trace_export.py +287 -0
abstractcore/utils/version.py +1 -1
abstractcore/utils/vlm_token_calculator.py +655 -0
{abstractcore-2.5.2.dist-info → abstractcore-2.5.3.dist-info}/METADATA +107 -6
{abstractcore-2.5.2.dist-info → abstractcore-2.5.3.dist-info}/RECORD +50 -33
{abstractcore-2.5.2.dist-info → abstractcore-2.5.3.dist-info}/WHEEL +0 -0
{abstractcore-2.5.2.dist-info → abstractcore-2.5.3.dist-info}/entry_points.txt +0 -0
{abstractcore-2.5.2.dist-info → abstractcore-2.5.3.dist-info}/licenses/LICENSE +0 -0
{abstractcore-2.5.2.dist-info → abstractcore-2.5.3.dist-info}/top_level.txt +0 -0

abstractcore/providers/registry.py CHANGED Viewed

@@ -202,10 +202,14 @@ class ProviderRegistry:
         Args:
             provider_name: Name of the provider
-            **kwargs: Provider-specific parameters (e.g., api_key, base_url)
+            **kwargs: Provider-specific parameters including:
+                - api_key: API key for authentication (if required)
+                - base_url: Base URL for API endpoint (if applicable)
+                - input_capabilities: List of ModelInputCapability enums to filter by input capability
+                - output_capabilities: List of ModelOutputCapability enums to filter by output capability
         Returns:
-            List of available model names
+            List of available model names, optionally filtered by capabilities
         """
         try:
             provider_class = self.get_provider_class(provider_name)
@@ -285,13 +289,64 @@ class ProviderRegistry:
             for provider_name in self.list_provider_names()
         ]
-    def get_providers_with_models(self) -> List[Dict[str, Any]]:
-        """Get only providers that have available models."""
-        all_providers = self.get_all_providers_status()
-        return [
-            provider for provider in all_providers
-            if provider.get("status") == "available" and provider.get("model_count", 0) > 0
-        ]
+    def get_providers_with_models(self, include_models: bool = True) -> List[Dict[str, Any]]:
+        """
+        Get only providers that have available models.
+        Args:
+            include_models: If True, include actual model lists (slower).
+                           If False, return metadata only (much faster). Default: True.
+        """
+        if include_models:
+            # Original behavior - get full status including model lists
+            all_providers = self.get_all_providers_status()
+            return [
+                provider for provider in all_providers
+                if provider.get("status") == "available" and provider.get("model_count", 0) > 0
+            ]
+        else:
+            # Fast path - get all provider metadata without model enumeration
+            # Note: We return all providers since we can't quickly determine which have models
+            return self.get_providers_metadata_only()
+    def get_providers_metadata_only(self) -> List[Dict[str, Any]]:
+        """
+        Get provider metadata without enumerating models (fast path).
+        This method returns provider information without making API calls
+        or scanning for models, making it extremely fast for UI discovery.
+        """
+        providers_metadata = []
+        for provider_name in self.list_provider_names():
+            provider_info = self.get_provider_info(provider_name)
+            if not provider_info:
+                continue
+            # Basic availability check without model enumeration
+            try:
+                provider_class = self.get_provider_class(provider_name)
+                status = "available"  # Assume available if class can be imported
+            except Exception:
+                status = "error"
+            metadata = {
+                "name": provider_info.name,
+                "display_name": provider_info.display_name,
+                "type": provider_info.provider_type,
+                "model_count": "unknown",  # Don't enumerate models
+                "status": status,
+                "description": provider_info.description,
+                "local_provider": provider_info.local_provider,
+                "authentication_required": provider_info.authentication_required,
+                "supported_features": provider_info.supported_features,
+                "installation_extras": provider_info.installation_extras,
+                "models": []  # Empty list for fast response
+            }
+            providers_metadata.append(metadata)
+        return providers_metadata
     def create_provider_instance(self, provider_name: str, model: Optional[str] = None, **kwargs):
         """
@@ -348,7 +403,7 @@ def is_provider_available(provider_name: str) -> bool:
     return get_provider_registry().is_provider_available(provider_name)
-def get_all_providers_with_models() -> List[Dict[str, Any]]:
+def get_all_providers_with_models(include_models: bool = True) -> List[Dict[str, Any]]:
     """
     Get comprehensive information about all providers with available models.
@@ -356,14 +411,18 @@ def get_all_providers_with_models() -> List[Dict[str, Any]]:
     for provider discovery and information. It replaces the manual provider
     lists in factory.py and server/app.py.
+    Args:
+        include_models: If True, include actual model lists (slower).
+                       If False, return metadata only (much faster). Default: True.
     Returns:
         List of provider dictionaries with comprehensive metadata including:
         - name, display_name, type, description
         - model_count, status, supported_features
         - local_provider, authentication_required
-        - installation_extras, sample models
+        - installation_extras, sample models (if include_models=True)
     """
-    return get_provider_registry().get_providers_with_models()
+    return get_provider_registry().get_providers_with_models(include_models=include_models)
 def get_all_providers_status() -> List[Dict[str, Any]]:
@@ -386,5 +445,18 @@ def create_provider(provider_name: str, model: Optional[str] = None, **kwargs):
 def get_available_models_for_provider(provider_name: str, **kwargs) -> List[str]:
-    """Get available models for a specific provider."""
+    """
+    Get available models for a specific provider.
+    Args:
+        provider_name: Name of the provider
+        **kwargs: Provider-specific parameters including:
+            - api_key: API key for authentication (if required)
+            - base_url: Base URL for API endpoint (if applicable)
+            - input_capabilities: List of ModelInputCapability enums to filter by input capability
+            - output_capabilities: List of ModelOutputCapability enums to filter by output capability
+    Returns:
+        List of available model names, optionally filtered by capabilities
+    """
     return get_provider_registry().get_available_models(provider_name, **kwargs)

abstractcore/server/app.py CHANGED Viewed

@@ -261,53 +261,43 @@ async def general_exception_handler(request: Request, exc: Exception):
 # Model Type Detection
 # ============================================================================
-class ModelType(str, Enum):
-    """Model type enumeration for filtering"""
-    TEXT_GENERATION = "text-generation"
-    TEXT_EMBEDDING = "text-embedding"
+# Import the core capability enums directly
+from ..providers.model_capabilities import ModelInputCapability, ModelOutputCapability
-def is_embedding_model(model_name: str) -> bool:
-    """
-    Detect if a model is an embedding model based on naming heuristics.
-    Args:
-        model_name: The model name to check
-    Returns:
-        True if the model appears to be an embedding model
-    """
-    model_lower = model_name.lower()
-    # Heuristics for embedding models
-    embedding_patterns = [
-        "embed",           # Most embedding models contain "embed"
-        "all-minilm",      # Sentence-transformers MiniLM models
-        "all-mpnet",       # Sentence-transformers MPNet models
-        "nomic-embed",     # Nomic embedding models
-        "bert-",           # BERT models (e.g., bert-base-uncased)
-        "-bert",           # BERT-based embedding models (e.g., nomic-bert-2048)
-        "bge-",            # BAAI BGE embedding models
-        "gte-",            # GTE embedding models
-        "e5-",             # E5 embedding models
-        "instructor-",     # Instructor embedding models
-        "granite-embedding", # IBM Granite embedding models
-    ]
-    return any(pattern in model_lower for pattern in embedding_patterns)
 # ============================================================================
 # Provider Model Discovery (Using Centralized Registry)
 # ============================================================================
-def get_models_from_provider(provider_name: str) -> List[str]:
-    """Get available models from a specific provider using the centralized provider registry."""
+def get_models_from_provider(
+    provider_name: str,
+    input_capabilities=None,
+    output_capabilities=None
+) -> List[str]:
+    """
+    Get available models from a specific provider using the centralized provider registry.
+    Args:
+        provider_name: Name of the provider
+        input_capabilities: Optional list of ModelInputCapability enums
+        output_capabilities: Optional list of ModelOutputCapability enums
+    Returns:
+        List of model names from the provider, optionally filtered
+    """
     try:
         from ..providers.registry import get_available_models_for_provider
-        return get_available_models_for_provider(provider_name)
+        return get_available_models_for_provider(
+            provider_name,
+            input_capabilities=input_capabilities,
+            output_capabilities=output_capabilities
+        )
     except Exception as e:
         logger.debug(f"Failed to get models from provider {provider_name}: {e}")
         return []
 # ============================================================================
 # OpenAI Responses API Models (100% Compatible)
 # ============================================================================
@@ -994,43 +984,47 @@ async def list_models(
         description="Filter by provider (e.g., 'ollama', 'openai', 'anthropic', 'lmstudio')",
         example=""
     ),
-    type: Optional[ModelType] = Query(
+    input_type: Optional[ModelInputCapability] = Query(
         None,
-        description="Filter by model type: 'text-generation' for chat/completion models, 'text-embedding' for embedding models",
-        example="text-generation"
-    )
+        description="Filter by input capability: 'text', 'image', 'audio', 'video'"
+    ),
+    output_type: Optional[ModelOutputCapability] = Query(
+        None,
+        description="Filter by output capability: 'text', 'embeddings'"
+    ),
 ):
     """
     List available models from AbstractCore providers.
-    Returns a list of all available models, optionally filtered by provider and/or model type.
-    **Filters:**
-    - `provider`: Limit results to a specific provider
-    - `type`: Limit results to a specific model type (text-generation or text-embedding)
+    Returns a list of all available models, optionally filtered by provider and/or capabilities.
+    **Filtering System:**
+    - `input_type`: Filter by what INPUT the model can process (text, image, audio, video)
+    - `output_type`: Filter by what OUTPUT the model generates (text, embeddings)
     **Examples:**
     - `/v1/models` - All models from all providers
-    - `/v1/models?type=text-embedding` - Only embedding models
-    - `/v1/models?type=text-generation` - Only text generation models
-    - `/v1/models?provider=ollama` - Only Ollama models
-    - `/v1/models?provider=ollama&type=text-embedding` - Ollama embedding models only
+    - `/v1/models?output_type=embeddings` - Only embedding models
+    - `/v1/models?input_type=text&output_type=text` - Text-only models that generate text
+    - `/v1/models?input_type=image` - Models that can analyze images
+    - `/v1/models?provider=ollama&input_type=image` - Ollama vision models only
     """
     try:
         models_data = []
+        # Use the capability enums directly
+        input_capabilities = [input_type] if input_type else None
+        output_capabilities = [output_type] if output_type else None
         if provider:
-            # Get models from specific provider
-            models = get_models_from_provider(provider.lower())
+            # Get models from specific provider with optional filtering
+            models = get_models_from_provider(
+                provider.lower(),
+                input_capabilities=input_capabilities,
+                output_capabilities=output_capabilities
+            )
             for model in models:
-                # Apply type filter if specified
-                if type:
-                    is_embedding = is_embedding_model(model)
-                    if type == ModelType.TEXT_EMBEDDING and not is_embedding:
-                        continue  # Skip non-embedding models
-                    if type == ModelType.TEXT_GENERATION and is_embedding:
-                        continue  # Skip embedding models
                 model_id = f"{provider.lower()}/{model}"
                 models_data.append({
                     "id": model_id,
@@ -1040,23 +1034,25 @@ async def list_models(
                     "permission": [{"allow_create_engine": False, "allow_sampling": True}]
                 })
-            filter_msg = f" (type={type.value})" if type else ""
+            filter_parts = []
+            if input_type:
+                filter_parts.append(f"input_type={input_type.value}")
+            if output_type:
+                filter_parts.append(f"output_type={output_type.value}")
+            filter_msg = f" ({', '.join(filter_parts)})" if filter_parts else ""
             logger.info(f"Listed {len(models_data)} models for provider {provider}{filter_msg}")
         else:
             # Get models from all providers using centralized registry
             from ..providers.registry import list_available_providers
             providers = list_available_providers()
             for prov in providers:
-                models = get_models_from_provider(prov)
+                models = get_models_from_provider(
+                    prov,
+                    input_capabilities=input_capabilities,
+                    output_capabilities=output_capabilities
+                )
                 for model in models:
-                    # Apply type filter if specified
-                    if type:
-                        is_embedding = is_embedding_model(model)
-                        if type == ModelType.TEXT_EMBEDDING and not is_embedding:
-                            continue  # Skip non-embedding models
-                        if type == ModelType.TEXT_GENERATION and is_embedding:
-                            continue  # Skip embedding models
                     model_id = f"{prov}/{model}"
                     models_data.append({
                         "id": model_id,
@@ -1066,7 +1062,13 @@ async def list_models(
                         "permission": [{"allow_create_engine": False, "allow_sampling": True}]
                     })
-            filter_msg = f" (type={type.value})" if type else ""
+            filter_parts = []
+            if input_type:
+                filter_parts.append(f"input_type={input_type.value}")
+            if output_type:
+                filter_parts.append(f"output_type={output_type.value}")
+            filter_msg = f" ({', '.join(filter_parts)})" if filter_parts else ""
             logger.info(f"Listed {len(models_data)} models from all providers{filter_msg}")
         return {
@@ -1082,18 +1084,31 @@ async def list_models(
         }
 @app.get("/providers")
-async def list_providers():
+async def list_providers(
+    include_models: bool = Query(
+        False,
+        description="Include model lists for each provider. Set to true for full information (slower)."
+    )
+):
     """
     List all available AbstractCore providers and their capabilities.
     Returns comprehensive information about all registered LLM providers, including:
     - Provider name, display name, and type
-    - Number of available models and sample models
+    - Number of available models and sample models (if include_models=True)
     - Current availability status and detailed error information
     - Provider description and supported features
     - Authentication requirements and installation instructions
     - Local vs. cloud provider designation
+    **Query Parameters:**
+    - `include_models` (bool, default=False): Include model lists for each provider.
+      Set to `true` for full information (slower).
+    **Performance:**
+    - `include_models=false`: Metadata only (very fast, ~15ms) - **DEFAULT**
+    - `include_models=true`: Full information including model lists (slower, ~800ms)
     **Supported Providers:**
     - **OpenAI**: Commercial API with GPT-4, GPT-3.5, and embedding models
     - **Anthropic**: Commercial API with Claude 3 family models
@@ -1103,24 +1118,19 @@ async def list_providers():
     - **HuggingFace**: Access to HuggingFace models (transformers and embeddings)
     **Use Cases:**
-    - Discover available providers before making requests
-    - Check provider availability and model counts
+    - Fast provider discovery: `GET /providers` (default, very fast)
+    - Full provider information: `GET /providers?include_models=true`
     - Build dynamic provider selection UIs
     - Monitor provider status and troubleshoot issues
     - Get installation instructions for missing dependencies
-    **Enhanced Information:**
-    This endpoint now uses the centralized provider registry to provide
-    comprehensive information including supported features, authentication
-    requirements, and detailed status information.
     **Returns:** A list of provider objects with comprehensive metadata.
     """
     try:
         from ..providers.registry import get_all_providers_with_models, get_all_providers_status
         # Get providers with models (available providers)
-        available_providers = get_all_providers_with_models()
+        available_providers = get_all_providers_with_models(include_models=include_models)
         # Optionally include all providers (even those with issues) for debugging
         # Uncomment the next line if you want to see providers with errors too:

abstractcore/utils/__init__.py CHANGED Viewed

@@ -13,6 +13,7 @@ from .token_utils import (
     ContentType
 )
 from .message_preprocessor import MessagePreprocessor, parse_files, has_files
+from .trace_export import export_traces, summarize_traces
 __all__ = [
     'configure_logging',
@@ -27,5 +28,7 @@ __all__ = [
     'ContentType',
     'MessagePreprocessor',
     'parse_files',
-    'has_files'
+    'has_files',
+    'export_traces',
+    'summarize_traces'
 ]

abstractcore 2.5.2__py3-none-any.whl → 2.5.3__py3-none-any.whl

abstractcore 2.5.2py3-none-any.whl → 2.5.3py3-none-any.whl