PyPI - lumen-resources - Versions diffs - 0.2.0__py3-none-any.whl - Mend

lumen-resources 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

lumen_resources/__init__.py +89 -0
lumen_resources/cli.py +402 -0
lumen_resources/downloader.py +449 -0
lumen_resources/exceptions.py +110 -0
lumen_resources/lumen_config.py +459 -0
lumen_resources/lumen_config_validator.py +270 -0
lumen_resources/model_info.py +233 -0
lumen_resources/model_info_validator.py +257 -0
lumen_resources/platform.py +270 -0
lumen_resources/result_schemas/README.md +14 -0
lumen_resources/result_schemas/__init__.py +14 -0
lumen_resources/result_schemas/embedding_v1.py +29 -0
lumen_resources/result_schemas/face_v1.py +55 -0
lumen_resources/result_schemas/labels_v1.py +39 -0
lumen_resources/schemas/config-schema.yaml +249 -0
lumen_resources/schemas/model_info-schema.json +166 -0
lumen_resources/schemas/result_schemas/embedding_v1.json +35 -0
lumen_resources/schemas/result_schemas/face_v1.json +61 -0
lumen_resources/schemas/result_schemas/labels_v1.json +49 -0
lumen_resources-0.2.0.dist-info/METADATA +133 -0
lumen_resources-0.2.0.dist-info/RECORD +24 -0
lumen_resources-0.2.0.dist-info/WHEEL +5 -0
lumen_resources-0.2.0.dist-info/entry_points.txt +2 -0
lumen_resources-0.2.0.dist-info/top_level.txt +1 -0

lumen_resources/lumen_config_validator.py ADDED Viewed

@@ -0,0 +1,270 @@
+"""
+Configuration validator for Lumen services.
+Provides validation utilities for YAML configuration files against
+the Lumen configuration schema.
+"""
+from pathlib import Path
+from typing import Any
+import yaml
+from jsonschema import Draft7Validator
+from pydantic import ValidationError
+from .lumen_config import LumenConfig
+from .exceptions import ConfigError
+class ConfigValidator:
+    """Validator for Lumen configuration files.
+    Provides comprehensive validation for YAML configuration files using both
+    JSON Schema and Pydantic models. Supports strict validation with custom
+    validators and flexible validation for development scenarios.
+    Attributes:
+        schema: Loaded JSON Schema for validation.
+        validator: Draft7Validator instance for JSON Schema validation.
+    Example:
+        >>> validator = ConfigValidator()
+        >>> is_valid, errors = validator.validate_file("config.yaml")
+        >>> if not is_valid:
+        ...     for error in errors:
+        ...         print(f"Validation error: {error}")
+    """
+    def __init__(self, schema_path: Path | None = None):
+        """Initialize validator with optional custom schema.
+        Args:
+            schema_path: Optional path to JSON Schema file. If None, uses
+                the bundled schema from docs/schemas/config-schema.yaml.
+        Raises:
+            FileNotFoundError: If the schema file is not found.
+            yaml.YAMLError: If the schema file is invalid YAML.
+        Example:
+            >>> validator = ConfigValidator()  # Uses bundled schema
+            >>> validator = ConfigValidator(Path("custom-schema.yaml"))  # Custom schema
+        """
+        if schema_path is None:
+            # Use bundled schema from docs/
+            schema_path = Path(__file__).parent / "schemas" / "config-schema.yaml"
+        if not schema_path.exists():
+            raise FileNotFoundError(f"Schema file not found: {schema_path}")
+        with open(schema_path, "r", encoding="utf-8") as f:
+            self.schema = yaml.safe_load(f)
+        self.validator = Draft7Validator(self.schema)
+    def validate_file(
+        self, config_path: Path | str, strict: bool = True
+    ) -> tuple[bool, list[str]]:
+        """Validate configuration file against schema.
+        Performs validation of a YAML configuration file using either
+        JSON Schema validation (flexible) or Pydantic validation (strict).
+        Args:
+            config_path: Path to configuration YAML file.
+            strict: If True, use Pydantic validation with custom validators.
+                If False, use JSON Schema validation only.
+        Returns:
+            Tuple of (is_valid, error_messages). is_valid is True if the
+            configuration passes validation, False otherwise. error_messages
+            contains detailed validation error messages.
+        Example:
+            >>> validator = ConfigValidator()
+            >>> is_valid, errors = validator.validate_file("config.yaml", strict=True)
+            >>> if not is_valid:
+            ...     for error in errors:
+            ...         print(f"Error: {error}")
+        """
+        config_path = Path(config_path)
+        if not config_path.exists():
+            return False, [f"Configuration file not found: {config_path}"]
+        try:
+            with open(config_path, "r", encoding="utf-8") as f:
+                config_data = yaml.safe_load(f)
+        except yaml.YAMLError as e:
+            return False, [f"Invalid YAML syntax: {e}"]
+        except Exception as e:
+            return False, [f"Failed to load file: {e}"]
+        if strict:
+            # Use Pydantic validation (stricter, includes custom validators)
+            return self._validate_with_pydantic(config_data)
+        else:
+            # Use JSON Schema validation only
+            return self._validate_with_jsonschema(config_data)
+    def _validate_with_jsonschema(
+        self, config_data: dict[str, Any]
+    ) -> tuple[bool, list[str]]:
+        """Validate configuration data using JSON Schema.
+        Performs flexible validation using the JSON Schema specification.
+        This method is less strict than Pydantic validation but provides
+        good basic structural validation.
+        Args:
+            config_data: Parsed configuration data dictionary.
+        Returns:
+            Tuple of (is_valid, error_messages) where is_valid indicates
+            if the data passes JSON Schema validation.
+        Example:
+            >>> validator = ConfigValidator()
+            >>> is_valid, errors = validator._validate_with_jsonschema(data)
+        """
+        errors = sorted(self.validator.iter_errors(config_data), key=lambda e: e.path)
+        if not errors:
+            return True, []
+        error_messages = []
+        for error in errors:
+            path = ".".join(str(p) for p in error.path) if error.path else "root"
+            error_messages.append(f"{error.message} (at: {path})")
+        return False, error_messages
+    def _validate_with_pydantic(
+        self, config_data: dict[str, Any]
+    ) -> tuple[bool, list[str]]:
+        """Validate configuration data using Pydantic models.
+        Performs strict validation using Pydantic models with custom validators.
+        This provides the most comprehensive validation including type checking,
+        pattern matching, and business logic validation.
+        Args:
+            config_data: Parsed configuration data dictionary.
+        Returns:
+            Tuple of (is_valid, error_messages) where is_valid indicates
+            if the data passes Pydantic model validation.
+        Example:
+            >>> validator = ConfigValidator()
+            >>> is_valid, errors = validator._validate_with_pydantic(data)
+        """
+        try:
+            LumenConfig(**config_data)
+            return True, []
+        except ValidationError as e:
+            # Parse pydantic validation errors
+            error_messages = []
+            for error in e.errors():
+                loc = ".".join(str(loc_part) for loc_part in error["loc"])
+                msg = error["msg"]
+                error_messages.append(f"{msg} (at: {loc})")
+            return False, error_messages
+        except Exception as e:
+            return False, [f"Validation error: {e}"]
+    def validate_and_load(self, config_path: Path | str) -> LumenConfig:
+        """Validate and load configuration file.
+        Performs strict validation using Pydantic models and returns a validated
+        LumenConfig instance if successful. This is the recommended method
+        for loading configuration in production code.
+        Args:
+            config_path: Path to configuration YAML file.
+        Returns:
+            Validated LumenConfig instance with all data properly typed
+            and validated.
+        Raises:
+            ConfigError: If validation fails or file cannot be loaded.
+        Example:
+            >>> validator = ConfigValidator()
+            >>> config = validator.validate_and_load("config.yaml")
+            >>> print(config.metadata.version)
+            '1.0.0'
+        """
+        config_path = Path(config_path)
+        is_valid, errors = self.validate_file(config_path, strict=True)
+        if not is_valid:
+            error_msg = "Configuration validation failed:\n" + "\n".join(
+                f"  - {err}" for err in errors
+            )
+            raise ConfigError(error_msg)
+        # Load and construct the validated configuration
+        with open(config_path, "r", encoding="utf-8") as f:
+            config_data = yaml.safe_load(f)
+        return LumenConfig(**config_data)
+def validate_config_file(
+    config_path: Path | str, schema_path: Path | str | None = None
+) -> tuple[bool, list[str]]:
+    """Convenience function to validate a configuration file.
+    Simple one-line function for validating configuration files using
+    the default schema or a custom schema. Uses strict validation.
+    Args:
+        config_path: Path to configuration YAML file.
+        schema_path: Optional path to custom JSON Schema file.
+    Returns:
+        Tuple of (is_valid, error_messages) where is_valid is True if
+        the configuration passes validation, and error_messages contains
+        detailed validation errors if validation fails.
+    Example:
+        >>> is_valid, errors = validate_config_file("config.yaml")
+        >>> if not is_valid:
+        ...     for error in errors:
+        ...         print(f"Error: {error}")
+    """
+    schema_path_obj = Path(schema_path) if schema_path else None
+    validator = ConfigValidator(schema_path_obj)
+    return validator.validate_file(config_path, strict=True)
+def load_and_validate_config(config_path: Path | str) -> LumenConfig:
+    """Load and validate configuration file.
+    This is the recommended way to load configuration in production.
+    Combines validation and loading into a single operation for convenience
+    and ensures that only validated configurations are returned.
+    Args:
+        config_path: Path to configuration YAML file.
+    Returns:
+        Validated LumenConfig instance with all data properly typed and
+        validated against the schema.
+    Raises:
+        ConfigError: If validation fails or file is not found.
+        FileNotFoundError: If the configuration file does not exist.
+        yaml.YAMLError: If the configuration file contains invalid YAML.
+    Example:
+        >>> from lumen_resources.lumen_config_validator import load_and_validate_config
+        >>> config = load_and_validate_config("config.yaml")
+        >>> print(config.metadata.cache_dir)
+        '/models'
+    """
+    validator = ConfigValidator()
+    return validator.validate_and_load(config_path)

lumen_resources/model_info.py ADDED Viewed

@@ -0,0 +1,233 @@
+# generated by datamodel-codegen:
+#   filename:  model_info-schema.json
+#   timestamp: 2025-10-19T06:58:43+00:00
+from __future__ import annotations
+from datetime import date
+from enum import Enum
+from pydantic import AwareDatetime, BaseModel, ConfigDict, Field
+class Format(Enum):
+    """Model source format type.
+    Defines the format and source platform for a model. Different formats
+    have different loading mechanisms and repository structures.
+    Attributes:
+        huggingface: Hugging Face Hub model format.
+        openclip: OpenCLIP model format.
+        modelscope: ModelScope model format.
+        custom: Custom model format.
+    Example:
+        >>> source = Source(format=Format.huggingface, repo_id="openai/clip-vit-base-patch32")
+        >>> print(source.format.value)
+        'huggingface'
+    """
+    huggingface = "huggingface"
+    openclip = "openclip"
+    modelscope = "modelscope"
+    custom = "custom"
+class Source(BaseModel):
+    """Model source information.
+    Contains information about where and how to obtain the model, including
+    the format type and repository identifier.
+    Attributes:
+        format: Model format type (huggingface, openclip, modelscope, custom).
+        repo_id: Repository identifier for the model source.
+    Example:
+        >>> source = Source(
+        ...     format=Format.huggingface,
+        ...     repo_id="openai/clip-vit-base-patch32"
+        ... )
+        >>> print(source.repo_id)
+        'openai/clip-vit-base-patch32'
+    """
+    model_config = ConfigDict(
+        extra="forbid",
+    )
+    format: Format
+    repo_id: str = Field(
+        ..., description="Repository identifier for model source", min_length=1
+    )
+class Requirements(BaseModel):
+    """Python environment requirements for model runtime.
+    Specifies the Python version and package dependencies required to run
+    the model in a specific runtime configuration.
+    Attributes:
+        python: Minimum Python version requirement.
+        dependencies: List of required Python package dependencies.
+    Example:
+        >>> req = Requirements(
+        ...     python=">=3.8",
+        ...     dependencies=["torch", "transformers", "pillow"]
+        ... )
+        >>> print(req.python)
+        '>=3.8'
+    """
+    python: str | None = None
+    dependencies: list[str] | None = None
+class Runtimes(BaseModel):
+    """Runtime configuration for a specific model execution environment.
+    Defines the availability, file requirements, device compatibility, and
+    dependencies for a model runtime (e.g., torch, onnx, rknn).
+    Attributes:
+        available: Whether this runtime is available for the model.
+        files: List of required files or dict mapping runtime to file lists.
+        devices: List of compatible devices for this runtime.
+        requirements: Python environment requirements for this runtime.
+    Example:
+        >>> runtime = Runtimes(
+        ...     available=True,
+        ...     files=["model.pt", "config.json"],
+        ...     devices=["cuda", "cpu"],
+        ...     requirements=Requirements(python=">=3.8", dependencies=["torch"])
+        ... )
+        >>> print(runtime.available)
+        True
+    """
+    model_config = ConfigDict(
+        extra="forbid",
+    )
+    available: bool
+    files: list[str] | dict[str, list[str]] | None = None
+    devices: list[str] | None = None
+    requirements: Requirements | None = None
+class Datasets(BaseModel):
+    """Dataset configuration for model evaluation and inference.
+    Defines the label and embedding datasets used for zero-shot classification
+    or other dataset-specific model operations.
+    Attributes:
+        labels: Dataset identifier for class labels.
+        embeddings: Dataset identifier for embeddings.
+    Example:
+        >>> dataset = Datasets(
+        ...     labels="imagenet1k_labels",
+        ...     embeddings="imagenet1k_embeddings"
+        ... )
+        >>> print(dataset.labels)
+        'imagenet1k_labels'
+    """
+    model_config = ConfigDict(
+        extra="forbid",
+    )
+    labels: str
+    embeddings: str
+class Metadata(BaseModel):
+    """Model metadata information.
+    Contains descriptive metadata about the model including licensing,
+    authorship, creation dates, and categorization tags.
+    Attributes:
+        license: License identifier for the model.
+        author: Model author or organization.
+        created_at: Model creation date.
+        updated_at: Last model update timestamp.
+        tags: List of descriptive tags for categorization.
+    Example:
+        >>> metadata = Metadata(
+        ...     license="MIT",
+        ...     author="OpenAI",
+        ...     tags=["computer-vision", "multimodal", "clip"]
+        ... )
+        >>> print(metadata.license)
+        'MIT'
+    """
+    model_config = ConfigDict(
+        extra="forbid",
+    )
+    license: str | None = None
+    author: str | None = None
+    created_at: date | None = None
+    updated_at: AwareDatetime | None = None
+    tags: list[str] | None = None
+class ModelInfo(BaseModel):
+    """Schema for Lumen AI model configuration files.
+    Complete model definition including source information, runtime configurations,
+    dataset compatibility, and metadata. This is the top-level schema for
+    model_info.json files.
+    Attributes:
+        name: Model name identifier, also OpenCLIP model identifier if applicable.
+        version: Model version following semantic versioning (X.Y.Z).
+        description: Model description and purpose.
+        model_type: Type/category of the model.
+        embedding_dim: Dimension of the model's embedding space.
+        source: Model source information including format and repository.
+        runtimes: Dictionary mapping runtime names to runtime configurations.
+        datasets: Optional dataset configurations for model evaluation.
+        metadata: Optional model metadata including license and author.
+    Example:
+        >>> model_info = ModelInfo(
+        ...     name="ViT-B-32",
+        ...     version="1.0.0",
+        ...     description="Vision Transformer for CLIP",
+        ...     model_type="vision-transformer",
+        ...     embedding_dim=512,
+        ...     source=Source(format=Format.huggingface, repo_id="openai/clip-vit-base-patch32"),
+        ...     runtimes={"torch": Runtimes(available=True)}
+        ... )
+        >>> print(model_info.name)
+        'ViT-B-32'
+    """
+    model_config = ConfigDict(
+        extra="forbid",
+    )
+    name: str = Field(
+        ...,
+        description="Model name identifier, this is also openclip model identifier if openclip is set as source format",
+        max_length=100,
+        min_length=1,
+    )
+    version: str = Field(
+        ..., description="Model version", pattern="^[0-9]+\\.[0-9]+\\.[0-9]+$"
+    )
+    description: str = Field(
+        ..., description="Model description and purpose", max_length=500, min_length=1
+    )
+    model_type: str = Field(..., description="Type of the model")
+    embedding_dim: int = Field(
+        ..., description="Dimension of the embedding space", ge=1, le=100000
+    )
+    source: Source
+    runtimes: dict[str, Runtimes]
+    datasets: dict[str, Datasets] | None = None
+    metadata: Metadata | None = None