PyPI - abstractcore - Versions diffs - 2.5.2__py3-none-any.whl → 2.6.0__py3-none-any.whl - Mend

abstractcore 2.5.2py3-none-any.whl → 2.6.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (66) hide show

abstractcore/__init__.py +19 -1
abstractcore/architectures/detection.py +252 -6
abstractcore/assets/architecture_formats.json +14 -1
abstractcore/assets/model_capabilities.json +533 -10
abstractcore/compression/__init__.py +29 -0
abstractcore/compression/analytics.py +420 -0
abstractcore/compression/cache.py +250 -0
abstractcore/compression/config.py +279 -0
abstractcore/compression/exceptions.py +30 -0
abstractcore/compression/glyph_processor.py +381 -0
abstractcore/compression/optimizer.py +388 -0
abstractcore/compression/orchestrator.py +380 -0
abstractcore/compression/pil_text_renderer.py +818 -0
abstractcore/compression/quality.py +226 -0
abstractcore/compression/text_formatter.py +666 -0
abstractcore/compression/vision_compressor.py +371 -0
abstractcore/config/main.py +64 -0
abstractcore/config/manager.py +100 -5
abstractcore/core/retry.py +2 -2
abstractcore/core/session.py +193 -7
abstractcore/download.py +253 -0
abstractcore/embeddings/manager.py +2 -2
abstractcore/events/__init__.py +113 -2
abstractcore/exceptions/__init__.py +49 -2
abstractcore/media/auto_handler.py +312 -18
abstractcore/media/handlers/local_handler.py +14 -2
abstractcore/media/handlers/openai_handler.py +62 -3
abstractcore/media/processors/__init__.py +11 -1
abstractcore/media/processors/direct_pdf_processor.py +210 -0
abstractcore/media/processors/glyph_pdf_processor.py +227 -0
abstractcore/media/processors/image_processor.py +7 -1
abstractcore/media/processors/office_processor.py +2 -2
abstractcore/media/processors/text_processor.py +18 -3
abstractcore/media/types.py +164 -7
abstractcore/media/utils/image_scaler.py +2 -2
abstractcore/media/vision_fallback.py +2 -2
abstractcore/providers/__init__.py +18 -0
abstractcore/providers/anthropic_provider.py +228 -8
abstractcore/providers/base.py +378 -11
abstractcore/providers/huggingface_provider.py +563 -23
abstractcore/providers/lmstudio_provider.py +284 -4
abstractcore/providers/mlx_provider.py +27 -2
abstractcore/providers/model_capabilities.py +352 -0
abstractcore/providers/ollama_provider.py +282 -6
abstractcore/providers/openai_provider.py +286 -8
abstractcore/providers/registry.py +85 -13
abstractcore/providers/streaming.py +2 -2
abstractcore/server/app.py +91 -81
abstractcore/tools/common_tools.py +2 -2
abstractcore/tools/handler.py +2 -2
abstractcore/tools/parser.py +2 -2
abstractcore/tools/registry.py +2 -2
abstractcore/tools/syntax_rewriter.py +2 -2
abstractcore/tools/tag_rewriter.py +3 -3
abstractcore/utils/__init__.py +4 -1
abstractcore/utils/self_fixes.py +2 -2
abstractcore/utils/trace_export.py +287 -0
abstractcore/utils/version.py +1 -1
abstractcore/utils/vlm_token_calculator.py +655 -0
{abstractcore-2.5.2.dist-info → abstractcore-2.6.0.dist-info}/METADATA +207 -8
abstractcore-2.6.0.dist-info/RECORD +108 -0
abstractcore-2.5.2.dist-info/RECORD +0 -90
{abstractcore-2.5.2.dist-info → abstractcore-2.6.0.dist-info}/WHEEL +0 -0
{abstractcore-2.5.2.dist-info → abstractcore-2.6.0.dist-info}/entry_points.txt +0 -0
{abstractcore-2.5.2.dist-info → abstractcore-2.6.0.dist-info}/licenses/LICENSE +0 -0
{abstractcore-2.5.2.dist-info → abstractcore-2.6.0.dist-info}/top_level.txt +0 -0

abstractcore/server/app.py CHANGED Viewed

@@ -261,53 +261,43 @@ async def general_exception_handler(request: Request, exc: Exception):
 # Model Type Detection
 # ============================================================================
-class ModelType(str, Enum):
-    """Model type enumeration for filtering"""
-    TEXT_GENERATION = "text-generation"
-    TEXT_EMBEDDING = "text-embedding"
+# Import the core capability enums directly
+from ..providers.model_capabilities import ModelInputCapability, ModelOutputCapability
-def is_embedding_model(model_name: str) -> bool:
-    """
-    Detect if a model is an embedding model based on naming heuristics.
-    Args:
-        model_name: The model name to check
-    Returns:
-        True if the model appears to be an embedding model
-    """
-    model_lower = model_name.lower()
-    # Heuristics for embedding models
-    embedding_patterns = [
-        "embed",           # Most embedding models contain "embed"
-        "all-minilm",      # Sentence-transformers MiniLM models
-        "all-mpnet",       # Sentence-transformers MPNet models
-        "nomic-embed",     # Nomic embedding models
-        "bert-",           # BERT models (e.g., bert-base-uncased)
-        "-bert",           # BERT-based embedding models (e.g., nomic-bert-2048)
-        "bge-",            # BAAI BGE embedding models
-        "gte-",            # GTE embedding models
-        "e5-",             # E5 embedding models
-        "instructor-",     # Instructor embedding models
-        "granite-embedding", # IBM Granite embedding models
-    ]
-    return any(pattern in model_lower for pattern in embedding_patterns)
 # ============================================================================
 # Provider Model Discovery (Using Centralized Registry)
 # ============================================================================
-def get_models_from_provider(provider_name: str) -> List[str]:
-    """Get available models from a specific provider using the centralized provider registry."""
+def get_models_from_provider(
+    provider_name: str,
+    input_capabilities=None,
+    output_capabilities=None
+) -> List[str]:
+    """
+    Get available models from a specific provider using the centralized provider registry.
+    Args:
+        provider_name: Name of the provider
+        input_capabilities: Optional list of ModelInputCapability enums
+        output_capabilities: Optional list of ModelOutputCapability enums
+    Returns:
+        List of model names from the provider, optionally filtered
+    """
     try:
         from ..providers.registry import get_available_models_for_provider
-        return get_available_models_for_provider(provider_name)
+        return get_available_models_for_provider(
+            provider_name,
+            input_capabilities=input_capabilities,
+            output_capabilities=output_capabilities
+        )
     except Exception as e:
         logger.debug(f"Failed to get models from provider {provider_name}: {e}")
         return []
 # ============================================================================
 # OpenAI Responses API Models (100% Compatible)
 # ============================================================================
@@ -994,43 +984,47 @@ async def list_models(
         description="Filter by provider (e.g., 'ollama', 'openai', 'anthropic', 'lmstudio')",
         example=""
     ),
-    type: Optional[ModelType] = Query(
+    input_type: Optional[ModelInputCapability] = Query(
         None,
-        description="Filter by model type: 'text-generation' for chat/completion models, 'text-embedding' for embedding models",
-        example="text-generation"
-    )
+        description="Filter by input capability: 'text', 'image', 'audio', 'video'"
+    ),
+    output_type: Optional[ModelOutputCapability] = Query(
+        None,
+        description="Filter by output capability: 'text', 'embeddings'"
+    ),
 ):
     """
     List available models from AbstractCore providers.
-    Returns a list of all available models, optionally filtered by provider and/or model type.
-    **Filters:**
-    - `provider`: Limit results to a specific provider
-    - `type`: Limit results to a specific model type (text-generation or text-embedding)
+    Returns a list of all available models, optionally filtered by provider and/or capabilities.
+    **Filtering System:**
+    - `input_type`: Filter by what INPUT the model can process (text, image, audio, video)
+    - `output_type`: Filter by what OUTPUT the model generates (text, embeddings)
     **Examples:**
     - `/v1/models` - All models from all providers
-    - `/v1/models?type=text-embedding` - Only embedding models
-    - `/v1/models?type=text-generation` - Only text generation models
-    - `/v1/models?provider=ollama` - Only Ollama models
-    - `/v1/models?provider=ollama&type=text-embedding` - Ollama embedding models only
+    - `/v1/models?output_type=embeddings` - Only embedding models
+    - `/v1/models?input_type=text&output_type=text` - Text-only models that generate text
+    - `/v1/models?input_type=image` - Models that can analyze images
+    - `/v1/models?provider=ollama&input_type=image` - Ollama vision models only
     """
     try:
         models_data = []
+        # Use the capability enums directly
+        input_capabilities = [input_type] if input_type else None
+        output_capabilities = [output_type] if output_type else None
         if provider:
-            # Get models from specific provider
-            models = get_models_from_provider(provider.lower())
+            # Get models from specific provider with optional filtering
+            models = get_models_from_provider(
+                provider.lower(),
+                input_capabilities=input_capabilities,
+                output_capabilities=output_capabilities
+            )
             for model in models:
-                # Apply type filter if specified
-                if type:
-                    is_embedding = is_embedding_model(model)
-                    if type == ModelType.TEXT_EMBEDDING and not is_embedding:
-                        continue  # Skip non-embedding models
-                    if type == ModelType.TEXT_GENERATION and is_embedding:
-                        continue  # Skip embedding models
                 model_id = f"{provider.lower()}/{model}"
                 models_data.append({
                     "id": model_id,
@@ -1040,23 +1034,25 @@ async def list_models(
                     "permission": [{"allow_create_engine": False, "allow_sampling": True}]
                 })
-            filter_msg = f" (type={type.value})" if type else ""
+            filter_parts = []
+            if input_type:
+                filter_parts.append(f"input_type={input_type.value}")
+            if output_type:
+                filter_parts.append(f"output_type={output_type.value}")
+            filter_msg = f" ({', '.join(filter_parts)})" if filter_parts else ""
             logger.info(f"Listed {len(models_data)} models for provider {provider}{filter_msg}")
         else:
             # Get models from all providers using centralized registry
             from ..providers.registry import list_available_providers
             providers = list_available_providers()
             for prov in providers:
-                models = get_models_from_provider(prov)
+                models = get_models_from_provider(
+                    prov,
+                    input_capabilities=input_capabilities,
+                    output_capabilities=output_capabilities
+                )
                 for model in models:
-                    # Apply type filter if specified
-                    if type:
-                        is_embedding = is_embedding_model(model)
-                        if type == ModelType.TEXT_EMBEDDING and not is_embedding:
-                            continue  # Skip non-embedding models
-                        if type == ModelType.TEXT_GENERATION and is_embedding:
-                            continue  # Skip embedding models
                     model_id = f"{prov}/{model}"
                     models_data.append({
                         "id": model_id,
@@ -1066,7 +1062,13 @@ async def list_models(
                         "permission": [{"allow_create_engine": False, "allow_sampling": True}]
                     })
-            filter_msg = f" (type={type.value})" if type else ""
+            filter_parts = []
+            if input_type:
+                filter_parts.append(f"input_type={input_type.value}")
+            if output_type:
+                filter_parts.append(f"output_type={output_type.value}")
+            filter_msg = f" ({', '.join(filter_parts)})" if filter_parts else ""
             logger.info(f"Listed {len(models_data)} models from all providers{filter_msg}")
         return {
@@ -1082,18 +1084,31 @@ async def list_models(
         }
 @app.get("/providers")
-async def list_providers():
+async def list_providers(
+    include_models: bool = Query(
+        False,
+        description="Include model lists for each provider. Set to true for full information (slower)."
+    )
+):
     """
     List all available AbstractCore providers and their capabilities.
     Returns comprehensive information about all registered LLM providers, including:
     - Provider name, display name, and type
-    - Number of available models and sample models
+    - Number of available models and sample models (if include_models=True)
     - Current availability status and detailed error information
     - Provider description and supported features
     - Authentication requirements and installation instructions
     - Local vs. cloud provider designation
+    **Query Parameters:**
+    - `include_models` (bool, default=False): Include model lists for each provider.
+      Set to `true` for full information (slower).
+    **Performance:**
+    - `include_models=false`: Metadata only (very fast, ~15ms) - **DEFAULT**
+    - `include_models=true`: Full information including model lists (slower, ~800ms)
     **Supported Providers:**
     - **OpenAI**: Commercial API with GPT-4, GPT-3.5, and embedding models
     - **Anthropic**: Commercial API with Claude 3 family models
@@ -1103,24 +1118,19 @@ async def list_providers():
     - **HuggingFace**: Access to HuggingFace models (transformers and embeddings)
     **Use Cases:**
-    - Discover available providers before making requests
-    - Check provider availability and model counts
+    - Fast provider discovery: `GET /providers` (default, very fast)
+    - Full provider information: `GET /providers?include_models=true`
     - Build dynamic provider selection UIs
     - Monitor provider status and troubleshoot issues
     - Get installation instructions for missing dependencies
-    **Enhanced Information:**
-    This endpoint now uses the centralized provider registry to provide
-    comprehensive information including supported features, authentication
-    requirements, and detailed status information.
     **Returns:** A list of provider objects with comprehensive metadata.
     """
     try:
         from ..providers.registry import get_all_providers_with_models, get_all_providers_status
         # Get providers with models (available providers)
-        available_providers = get_all_providers_with_models()
+        available_providers = get_all_providers_with_models(include_models=include_models)
         # Optionally include all providers (even those with issues) for debugging
         # Uncomment the next line if you want to see providers with errors too:

abstractcore/tools/common_tools.py CHANGED Viewed

@@ -12,7 +12,6 @@ import subprocess
 import requests
 from pathlib import Path
 from typing import Optional, Dict, Any, Union
-import logging
 import platform
 import re
 import time
@@ -43,8 +42,9 @@ except ImportError:
 # Import our enhanced tool decorator
 from abstractcore.tools.core import tool
+from abstractcore.utils.structured_logging import get_logger
-logger = logging.getLogger(__name__)
+logger = get_logger(__name__)
 # File Operations
 @tool(

abstractcore/tools/handler.py CHANGED Viewed

@@ -6,14 +6,14 @@ across all models, whether they have native tool APIs or require prompting.
 """
 import json
-import logging
 from typing import List, Dict, Any, Optional, Union, Callable
 from ..architectures import detect_architecture, get_model_capabilities, get_architecture_format
 from .core import ToolDefinition, ToolCall, ToolCallResponse, ToolResult
 from .parser import detect_tool_calls, parse_tool_calls, format_tool_prompt
+from ..utils.structured_logging import get_logger
-logger = logging.getLogger(__name__)
+logger = get_logger(__name__)
 class UniversalToolHandler:

abstractcore/tools/parser.py CHANGED Viewed

@@ -7,14 +7,14 @@ responses based on their architecture.
 import re
 import json
-import logging
 from typing import List, Optional, Dict, Any
 from enum import Enum
 from .core import ToolCall, ToolDefinition
 from ..architectures import detect_architecture, get_architecture_format
+from ..utils.structured_logging import get_logger
-logger = logging.getLogger(__name__)
+logger = get_logger(__name__)
 class ToolFormat(Enum):

abstractcore/tools/registry.py CHANGED Viewed

@@ -5,15 +5,15 @@ This module provides a centralized registry for managing available tools
 and executing them safely.
 """
-import logging
 import time
 from typing import Dict, List, Any, Callable, Optional, Union
 from functools import wraps
 from .core import ToolDefinition, ToolCall, ToolResult
 from ..events import EventType, emit_global, create_tool_event
+from ..utils.structured_logging import get_logger
-logger = logging.getLogger(__name__)
+logger = get_logger(__name__)
 class ToolRegistry:

abstractcore/tools/syntax_rewriter.py CHANGED Viewed

@@ -8,15 +8,15 @@ Supports multiple target formats including OpenAI, Codex, and custom agent forma
 import re
 import json
 import uuid
-import logging
 from typing import List, Dict, Any, Optional, Union
 from dataclasses import dataclass
 from enum import Enum
 from .core import ToolCall
 from .parser import parse_tool_calls
+from ..utils.structured_logging import get_logger
-logger = logging.getLogger(__name__)
+logger = get_logger(__name__)
 class SyntaxFormat(Enum):

abstractcore/tools/tag_rewriter.py CHANGED Viewed

@@ -9,6 +9,9 @@ import re
 import json
 from typing import Dict, Any, Optional, Tuple, List
 from dataclasses import dataclass
+from ..utils.structured_logging import get_logger
+logger = get_logger(__name__)
 @dataclass
@@ -161,9 +164,6 @@ class ToolCallTagRewriter:
         Returns:
             Text with rewritten tool call tags
         """
-        import logging
-        logger = logging.getLogger(__name__)
         logger.debug(f"rewrite_text called with text: {text[:100] if text else None}")
         logger.debug(f"Target output tags: start='{self._output_start_tag}', end='{self._output_end_tag}'")

abstractcore/utils/__init__.py CHANGED Viewed

@@ -13,6 +13,7 @@ from .token_utils import (
     ContentType
 )
 from .message_preprocessor import MessagePreprocessor, parse_files, has_files
+from .trace_export import export_traces, summarize_traces
 __all__ = [
     'configure_logging',
@@ -27,5 +28,7 @@ __all__ = [
     'ContentType',
     'MessagePreprocessor',
     'parse_files',
-    'has_files'
+    'has_files',
+    'export_traces',
+    'summarize_traces'
 ]

abstractcore/utils/self_fixes.py CHANGED Viewed

@@ -8,9 +8,9 @@ before giving up on parsing.
 import json
 import re
 from typing import Optional
-import logging
+from .structured_logging import get_logger
-logger = logging.getLogger(__name__)
+logger = get_logger(__name__)
 def fix_json(text: str) -> Optional[str]:

abstractcore 2.5.2__py3-none-any.whl → 2.6.0__py3-none-any.whl

abstractcore 2.5.2py3-none-any.whl → 2.6.0py3-none-any.whl