PyPI - msgmodel - Versions diffs - 3.2.1__py3-none-any.whl - Mend

msgmodel 3.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

msgmodel/__init__.py +81 -0
msgmodel/__main__.py +224 -0
msgmodel/config.py +159 -0
msgmodel/core.py +506 -0
msgmodel/exceptions.py +93 -0
msgmodel/providers/__init__.py +11 -0
msgmodel/providers/gemini.py +325 -0
msgmodel/providers/openai.py +350 -0
msgmodel/py.typed +0 -0
msgmodel/security.py +165 -0
msgmodel-3.2.1.dist-info/METADATA +416 -0
msgmodel-3.2.1.dist-info/RECORD +16 -0
msgmodel-3.2.1.dist-info/WHEEL +5 -0
msgmodel-3.2.1.dist-info/entry_points.txt +2 -0
msgmodel-3.2.1.dist-info/licenses/LICENSE +21 -0
msgmodel-3.2.1.dist-info/top_level.txt +1 -0

msgmodel/core.py ADDED Viewed

@@ -0,0 +1,506 @@
+"""
+msgmodel.core
+~~~~~~~~~~~~~
+Core API for the msgmodel library.
+Provides a unified interface to query any supported LLM provider.
+"""
+import os
+import io
+import base64
+import mimetypes
+import logging
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Optional, Dict, Any, Iterator, Union
+from .config import (
+    Provider,
+    OpenAIConfig,
+    GeminiConfig,
+    ProviderConfig,
+    get_default_config,
+    OPENAI_API_KEY_ENV,
+    GEMINI_API_KEY_ENV,
+    OPENAI_API_KEY_FILE,
+    GEMINI_API_KEY_FILE,
+)
+from .exceptions import (
+    MsgModelError,
+    ConfigurationError,
+    AuthenticationError,
+    FileError,
+    APIError,
+)
+from .providers.openai import OpenAIProvider
+from .providers.gemini import GeminiProvider
+logger = logging.getLogger(__name__)
+# MIME type constants
+MIME_TYPE_PDF = "application/pdf"
+MIME_TYPE_OCTET_STREAM = "application/octet-stream"
+FILE_ENCODING = "utf-8"
+@dataclass
+class LLMResponse:
+    """
+    Structured response from an LLM provider.
+    Attributes:
+        text: The extracted text response
+        raw_response: The complete raw API response
+        model: The model that generated the response
+        provider: The provider that was used
+        usage: Token usage information (if available)
+    """
+    text: str
+    raw_response: Dict[str, Any]
+    model: str
+    provider: str
+    usage: Optional[Dict[str, int]] = None
+def _get_api_key(
+    provider: Provider,
+    api_key: Optional[str] = None
+) -> str:
+    """
+    Get the API key for a provider from various sources.
+    Priority:
+    1. Directly provided api_key parameter
+    2. Environment variable
+    3. Key file in current directory
+    Args:
+        provider: The LLM provider
+        api_key: Optional directly provided API key
+    Returns:
+        The API key string
+    Raises:
+        AuthenticationError: If no API key can be found
+    """
+    if api_key:
+        return api_key
+    # Map providers to their env vars and files
+    env_vars = {
+        Provider.OPENAI: OPENAI_API_KEY_ENV,
+        Provider.GEMINI: GEMINI_API_KEY_ENV,
+    }
+    key_files = {
+        Provider.OPENAI: OPENAI_API_KEY_FILE,
+        Provider.GEMINI: GEMINI_API_KEY_FILE,
+    }
+    env_var = env_vars[provider]
+    key = os.environ.get(env_var)
+    if key:
+        return key
+    # Try key file
+    key_file = key_files[provider]
+    if Path(key_file).exists():
+        try:
+            with open(key_file, "r", encoding=FILE_ENCODING) as f:
+                return f.read().strip()
+        except IOError as e:
+            raise AuthenticationError(f"Failed to read API key file {key_file}: {e}")
+    raise AuthenticationError(
+        f"No API key found for {provider.value}. "
+        f"Provide api_key parameter, set {env_var} environment variable, "
+        f"or create {key_file} file."
+    )
+def _infer_mime_type(file_like: io.BytesIO, filename: Optional[str] = None) -> str:
+    """
+    Infer MIME type from filename or file content with fallback magic byte detection.
+    v3.2.1 Enhancement: Detects MIME type using multiple strategies:
+    1. Filename-based detection (fastest, most reliable)
+    2. Magic byte detection (fallback for files without extensions)
+    3. Safe default (application/octet-stream)
+    Args:
+        file_like: BytesIO object to inspect
+        filename: Optional filename hint for MIME type detection
+    Returns:
+        MIME type string (e.g., 'image/png', 'application/pdf')
+    """
+    # Strategy 1: Try filename-based detection
+    if filename:
+        mime_type, _ = mimetypes.guess_type(filename)
+        if mime_type:
+            return mime_type
+    # Strategy 2: Magic byte detection for common file formats
+    try:
+        current_pos = file_like.tell()
+        file_like.seek(0)
+        magic_bytes = file_like.read(512)
+        file_like.seek(current_pos)
+        # Magic byte signatures for common formats
+        signatures = {
+            b'%PDF': 'application/pdf',
+            b'\x89PNG\r\n\x1a\n': 'image/png',
+            b'\xff\xd8\xff': 'image/jpeg',
+            b'GIF8': 'image/gif',
+            b'BM': 'image/bmp',
+            b'RIFF': 'audio/wav',
+            b'ID3': 'audio/mpeg',
+            b'PK\x03\x04': 'application/zip',
+            b'\x50\x4b\x03\x04': 'application/zip',
+            b'\xef\xbb\xbf<?xml': 'application/xml',
+            b'<?xml': 'application/xml',
+        }
+        for sig, mime_type in signatures.items():
+            if magic_bytes.startswith(sig):
+                return mime_type
+    except (AttributeError, IOError):
+        pass
+    # Strategy 3: Safe default
+    return MIME_TYPE_OCTET_STREAM
+def _prepare_file_data(file_path: str) -> Dict[str, Any]:
+    """
+    Prepare file data from disk for API submission.
+    Args:
+        file_path: Path to the file on disk
+    Returns:
+        Dictionary containing file metadata and encoded data
+    Raises:
+        FileError: If the file cannot be read
+    """
+    try:
+        path = Path(file_path)
+        with open(path, "rb") as f:
+            binary_content = f.read()
+    except (FileNotFoundError, IOError, OSError) as e:
+        raise FileError(f"Failed to read file {file_path}: {e}")
+    # Use improved MIME type inference with fallback
+    mime_type = _infer_mime_type(io.BytesIO(binary_content), filename=Path(file_path).name)
+    encoded_data = base64.b64encode(binary_content).decode("utf-8")
+    return {
+        "mime_type": mime_type,
+        "data": encoded_data,
+        "filename": Path(file_path).name,
+        "is_file_like": False,  # Mark as disk file
+    }
+def _prepare_file_like_data(file_like: io.BytesIO, filename: Optional[str] = None) -> Dict[str, Any]:
+    """
+    Prepare file-like object data for API submission.
+    Processes a BytesIO object entirely in memory (never touches disk).
+    This is the only supported method for file upload in msgmodel v3.2.1+
+    to ensure privacy and stateless operation.
+    v3.2.1 Enhancement: Includes improved MIME type inference with magic byte fallback.
+    Args:
+        file_like: An io.BytesIO object containing binary data
+        filename: Optional filename hint (defaults to 'upload.bin')
+    Returns:
+        Dictionary containing file metadata and encoded data
+    Raises:
+        FileError: If the file-like object cannot be read
+    """
+    try:
+        # Seek to beginning to ensure we read the full content
+        file_like.seek(0)
+        binary_content = file_like.read()
+        # Reset position for potential reuse by caller
+        file_like.seek(0)
+    except (AttributeError, IOError, OSError) as e:
+        raise FileError(f"Failed to read from file-like object: {e}")
+    # v3.2.1: Use improved MIME type inference with fallback
+    mime_type = _infer_mime_type(file_like, filename)
+    encoded_data = base64.b64encode(binary_content).decode("utf-8")
+    return {
+        "mime_type": mime_type,
+        "data": encoded_data,
+        "filename": filename or "upload.bin",
+        "is_file_like": True,  # Mark as in-memory file
+    }
+def _validate_max_tokens(max_tokens: int) -> None:
+    """Validate max_tokens parameter."""
+    if max_tokens < 1:
+        raise ConfigurationError("max_tokens must be at least 1")
+    if max_tokens > 1000000:
+        logger.warning(f"max_tokens={max_tokens} is very large and may cause issues")
+def query(
+    provider: Union[str, Provider],
+    prompt: str,
+    api_key: Optional[str] = None,
+    system_instruction: Optional[str] = None,
+    file_like: Optional[io.BytesIO] = None,
+    filename: Optional[str] = None,
+    config: Optional[ProviderConfig] = None,
+    max_tokens: Optional[int] = None,
+    model: Optional[str] = None,
+    temperature: Optional[float] = None,
+) -> LLMResponse:
+    """
+    Query an LLM provider and return a structured response.
+    This is the main entry point for the library. It provides a unified
+    interface to all supported LLM providers.
+    Args:
+        provider: The LLM provider ('openai' or 'gemini', or 'o', 'g')
+        prompt: The user prompt text
+        api_key: API key (optional if set via env var or file)
+        system_instruction: Optional system instruction/prompt
+        file_like: Optional file-like object (io.BytesIO) - must be seekable
+            This is the only method for file upload. Files are base64-encoded
+            and embedded in prompts for privacy and stateless operation.
+            Limited to practical API constraints (~15-20MB for OpenAI, ~22MB for Gemini).
+        filename: Optional filename hint for MIME type detection when using file_like.
+            If not provided, attempts to use file_like.name attribute. Defaults to 'upload.bin'
+        config: Optional provider-specific configuration object
+        max_tokens: Override for max tokens (convenience parameter)
+        model: Override for model (convenience parameter)
+        temperature: Override for temperature (convenience parameter)
+    Returns:
+        LLMResponse containing the text response and metadata
+    Raises:
+        ConfigurationError: For invalid configuration
+        AuthenticationError: For API key issues
+        FileError: For file-related issues
+        APIError: For API call failures
+    Examples:
+        >>> # Simple query with env var API key
+        >>> response = query("openai", "Hello, world!")
+        >>> print(response.text)
+        >>> # Query with in-memory file (privacy-focused, no disk access)
+        >>> import io
+        >>> file_obj = io.BytesIO(binary_content)
+        >>> response = query(
+        ...     "openai",
+        ...     "Analyze this document",
+        ...     file_like=file_obj,
+        ...     filename="document.pdf",  # Enables proper MIME type detection
+        ...     system_instruction="You are a document analyst"
+        ... )
+        >>> # Using .name attribute on BytesIO (alternative to filename param)
+        >>> file_obj = io.BytesIO(binary_content)
+        >>> file_obj.name = "image.png"  # Set name attribute for MIME detection
+        >>> response = query("gemini", "Describe this image", file_like=file_obj)
+    """
+    # Normalize provider
+    if isinstance(provider, str):
+        provider = Provider.from_string(provider)
+    # Get API key
+    key = _get_api_key(provider, api_key)
+    # Get or create config
+    if config is None:
+        config = get_default_config(provider)
+    # Apply convenience overrides
+    if max_tokens is not None:
+        _validate_max_tokens(max_tokens)
+        config.max_tokens = max_tokens
+    if model is not None:
+        config.model = model
+    if temperature is not None:
+        config.temperature = temperature
+    # Prepare file data if provided
+    file_data = None
+    if file_like:
+        # Use provided filename, fall back to .name attribute, then default
+        file_hint = filename or getattr(file_like, 'name', 'upload.bin')
+        file_data = _prepare_file_like_data(file_like, filename=file_hint)
+    # Create provider instance and make request
+    if provider == Provider.OPENAI:
+        assert isinstance(config, OpenAIConfig)
+        prov = OpenAIProvider(key, config)
+        raw_response = prov.query(prompt, system_instruction, file_data)
+        text = prov.extract_text(raw_response)
+    elif provider == Provider.GEMINI:
+        assert isinstance(config, GeminiConfig)
+        prov = GeminiProvider(key, config)
+        raw_response = prov.query(prompt, system_instruction, file_data)
+        text = prov.extract_text(raw_response)
+    else:
+        # Should never reach here due to Provider enum, but maintain type safety
+        raise ConfigurationError(f"Unsupported provider: {provider}")
+    # Extract usage info if available
+    usage = None
+    if "usage" in raw_response:
+        usage = raw_response["usage"]
+    return LLMResponse(
+        text=text,
+        raw_response=raw_response,
+        model=config.model,
+        provider=provider.value,
+        usage=usage,
+    )
+def stream(
+    provider: Union[str, Provider],
+    prompt: str,
+    api_key: Optional[str] = None,
+    system_instruction: Optional[str] = None,
+    file_path: Optional[str] = None,
+    file_like: Optional[io.BytesIO] = None,
+    filename: Optional[str] = None,
+    config: Optional[ProviderConfig] = None,
+    max_tokens: Optional[int] = None,
+    model: Optional[str] = None,
+    temperature: Optional[float] = None,
+    timeout: float = 300,
+    on_chunk: Optional[Any] = None,
+) -> Iterator[str]:
+    """
+    Stream a response from an LLM provider.
+    Similar to query(), but yields text chunks as they arrive instead
+    of waiting for the complete response.
+    Args:
+        provider: The LLM provider ('openai', 'gemini', 'claude', or 'o', 'g', 'c')
+        prompt: The user prompt text
+        api_key: API key (optional if set via env var or file)
+        system_instruction: Optional system instruction/prompt
+        file_path: Optional path to a file (image, PDF, etc.)
+        file_like: Optional file-like object (io.BytesIO) - must be seekable
+        filename: Optional filename hint for MIME type detection when using file_like.
+            If not provided, attempts to use file_like.name attribute. Defaults to 'upload.bin'
+        config: Optional provider-specific configuration object
+        max_tokens: Override for max tokens (convenience parameter)
+        model: Override for model (convenience parameter)
+        temperature: Override for temperature (convenience parameter)
+        timeout: Timeout in seconds for streaming connection (default: 300s/5min). v3.2.1+
+        on_chunk: Optional callback(chunk) -> bool. Return False to abort stream. v3.2.1+
+    Yields:
+        Text chunks as they arrive from the API
+    Raises:
+        ConfigurationError: For invalid configuration or file conflicts
+        AuthenticationError: For API key issues
+        FileError: For file-related issues
+        APIError: For API call failures
+        StreamingError: For streaming-specific issues
+    Examples:
+        >>> # Stream response to prompt
+        >>> for chunk in stream("openai", "Tell me a story"):
+        ...     print(chunk, end="", flush=True)
+        >>> # Stream with file attachment from disk
+        >>> for chunk in stream("gemini", "Summarize this PDF", file_path="document.pdf"):
+        ...     print(chunk, end="", flush=True)
+        >>> # Stream with in-memory file (privacy-focused, no disk access)
+        >>> import io
+        >>> file_obj = io.BytesIO(uploaded_file_bytes)
+        >>> for chunk in stream(
+        ...     "openai",
+        ...     "Analyze this uploaded file",
+        ...     file_like=file_obj,
+        ...     filename="document.pdf",  # Enables proper MIME type detection
+        ...     system_instruction="Provide detailed analysis"
+        ... ):
+        ...     print(chunk, end="", flush=True)
+        >>> # Gemini with BytesIO and .name attribute for MIME detection
+        >>> file_obj = io.BytesIO(image_bytes)
+        >>> file_obj.name = "photo.jpg"
+        >>> for chunk in stream("gemini", "Describe this photo", file_like=file_obj):
+        ...     print(chunk, end="", flush=True)
+    """
+    # Normalize provider
+    if isinstance(provider, str):
+        provider = Provider.from_string(provider)
+    # Check for mutually exclusive file parameters
+    if file_path is not None and file_like is not None:
+        raise ConfigurationError(
+            "Cannot specify both file_path and file_like. "
+            "Use file_path for disk files or file_like for in-memory BytesIO objects, not both."
+        )
+    # Get API key
+    key = _get_api_key(provider, api_key)
+    # Get or create config
+    if config is None:
+        config = get_default_config(provider)
+    # Apply convenience overrides
+    if max_tokens is not None:
+        _validate_max_tokens(max_tokens)
+        config.max_tokens = max_tokens
+    if model is not None:
+        config.model = model
+    if temperature is not None:
+        config.temperature = temperature
+    # Prepare file data if provided
+    file_data = None
+    if file_path:
+        file_data = _prepare_file_data(file_path)
+    elif file_like:
+        # Use provided filename, fall back to .name attribute, then default
+        file_hint = filename or getattr(file_like, 'name', 'upload.bin')
+        file_data = _prepare_file_like_data(file_like, filename=file_hint)
+    # Create provider instance and stream
+    if provider == Provider.OPENAI:
+        assert isinstance(config, OpenAIConfig)
+        prov = OpenAIProvider(key, config)
+        yield from prov.stream(prompt, system_instruction, file_data, timeout=timeout, on_chunk=on_chunk)
+    elif provider == Provider.GEMINI:
+        assert isinstance(config, GeminiConfig)
+        prov = GeminiProvider(key, config)
+        yield from prov.stream(prompt, system_instruction, file_data, timeout=timeout, on_chunk=on_chunk)
+    else:
+        # Should never reach here due to Provider enum, but maintain type safety
+        raise ConfigurationError(f"Unsupported provider: {provider}")

msgmodel/exceptions.py ADDED Viewed

@@ -0,0 +1,93 @@
+"""
+msgmodel.exceptions
+~~~~~~~~~~~~~~~~~~~
+Custom exceptions for the msgmodel library.
+All exceptions inherit from MsgModelError, allowing callers to catch
+all library-specific errors with a single except clause.
+"""
+class MsgModelError(Exception):
+    """Base exception for all msgmodel errors."""
+    pass
+class ConfigurationError(MsgModelError):
+    """
+    Raised when configuration is invalid or incomplete.
+    Examples:
+        - Invalid provider name
+        - Invalid max_tokens value
+        - Missing required parameters
+    """
+    pass
+class AuthenticationError(MsgModelError):
+    """
+    Raised when API authentication fails.
+    Examples:
+        - Missing API key
+        - Invalid API key
+        - API key file not found
+    """
+    pass
+class FileError(MsgModelError):
+    """
+    Raised when file operations fail.
+    Examples:
+        - File not found
+        - Unable to read file
+        - Invalid file format
+    """
+    pass
+class APIError(MsgModelError):
+    """
+    Raised when an API call fails.
+    Attributes:
+        status_code: HTTP status code from the API response
+        response_text: Raw response text from the API
+    """
+    def __init__(
+        self,
+        message: str,
+        status_code: int | None = None,
+        response_text: str | None = None
+    ):
+        super().__init__(message)
+        self.status_code = status_code
+        self.response_text = response_text
+class ProviderError(MsgModelError):
+    """
+    Raised when a provider-specific error occurs.
+    Examples:
+        - Unsupported file type for provider
+        - Provider-specific validation failure
+        - Missing provider dependency (e.g., anthropic package)
+    """
+    pass
+class StreamingError(MsgModelError):
+    """
+    Raised when streaming-specific errors occur.
+    Examples:
+        - Connection interrupted during streaming
+        - Invalid streaming response format
+    """
+    pass

msgmodel/providers/__init__.py ADDED Viewed

@@ -0,0 +1,11 @@
+"""
+msgmodel.providers
+~~~~~~~~~~~~~~~~~~
+Provider-specific implementations for LLM API calls.
+"""
+from .openai import OpenAIProvider
+from .gemini import GeminiProvider
+__all__ = ["OpenAIProvider", "GeminiProvider"]