PyPI - lumen-resources - Versions diffs - 0.2.0__py3-none-any.whl - Mend

lumen-resources 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

lumen_resources/__init__.py +89 -0
lumen_resources/cli.py +402 -0
lumen_resources/downloader.py +449 -0
lumen_resources/exceptions.py +110 -0
lumen_resources/lumen_config.py +459 -0
lumen_resources/lumen_config_validator.py +270 -0
lumen_resources/model_info.py +233 -0
lumen_resources/model_info_validator.py +257 -0
lumen_resources/platform.py +270 -0
lumen_resources/result_schemas/README.md +14 -0
lumen_resources/result_schemas/__init__.py +14 -0
lumen_resources/result_schemas/embedding_v1.py +29 -0
lumen_resources/result_schemas/face_v1.py +55 -0
lumen_resources/result_schemas/labels_v1.py +39 -0
lumen_resources/schemas/config-schema.yaml +249 -0
lumen_resources/schemas/model_info-schema.json +166 -0
lumen_resources/schemas/result_schemas/embedding_v1.json +35 -0
lumen_resources/schemas/result_schemas/face_v1.json +61 -0
lumen_resources/schemas/result_schemas/labels_v1.json +49 -0
lumen_resources-0.2.0.dist-info/METADATA +133 -0
lumen_resources-0.2.0.dist-info/RECORD +24 -0
lumen_resources-0.2.0.dist-info/WHEEL +5 -0
lumen_resources-0.2.0.dist-info/entry_points.txt +2 -0
lumen_resources-0.2.0.dist-info/top_level.txt +1 -0

lumen_resources/model_info_validator.py ADDED Viewed

@@ -0,0 +1,257 @@
+"""
+Validator for model_info.json files using JSON Schema and Pydantic.
+"""
+import json
+from pathlib import Path
+from typing import Any
+from jsonschema import Draft7Validator
+from pydantic import ValidationError
+from .model_info import ModelInfo
+class ModelInfoValidator:
+    """Validator for model_info.json files.
+    Provides comprehensive validation for model information JSON files using
+    both JSON Schema and Pydantic models. Ensures model metadata integrity
+    and compliance with the Lumen model specification.
+    Attributes:
+        schema: Loaded JSON Schema for validation.
+        validator: Draft7Validator instance for JSON Schema validation.
+    Example:
+        >>> validator = ModelInfoValidator()
+        >>> is_valid, errors = validator.validate_file("model_info.json")
+        >>> if not is_valid:
+        ...     for error in errors:
+        ...         print(f"Validation error: {error}")
+    """
+    def __init__(self, schema_path: str | Path | None = None):
+        """Initialize validator with JSON schema.
+        Args:
+            schema_path: Path to model_info-schema.json file. If None,
+                uses the bundled schema from docs/schemas/model_info-schema.json.
+        Raises:
+            FileNotFoundError: If the schema file is not found.
+            json.JSONDecodeError: If the schema file contains invalid JSON.
+        Example:
+            >>> validator = ModelInfoValidator()  # Uses bundled schema
+            >>> validator = ModelInfoValidator(Path("custom-schema.json"))
+        """
+        if schema_path is None:
+            schema_path = Path(__file__).parent / "schemas" / "model_info-schema.json"
+        else:
+            schema_path = Path(schema_path)
+        if not schema_path.exists():
+            raise FileNotFoundError(f"Schema file not found: {schema_path}")
+        with open(schema_path, "r", encoding="utf-8") as f:
+            self.schema: dict[str, Any] = json.load(f)
+        self.validator = Draft7Validator(self.schema)
+    def validate_file(
+        self, path: str | Path, strict: bool = True
+    ) -> tuple[bool, list[str]]:
+        """Validate a model_info.json file.
+        Performs validation of model information JSON files using either
+        JSON Schema validation (flexible) or Pydantic validation (strict).
+        Args:
+            path: Path to model_info.json file.
+            strict: If True, use Pydantic validation with custom validators.
+                If False, use JSON Schema validation only.
+        Returns:
+            Tuple of (is_valid, error_messages) where is_valid indicates
+            if the file passes validation, and error_messages contains
+            detailed validation error messages.
+        Example:
+            >>> validator = ModelInfoValidator()
+            >>> is_valid, errors = validator.validate_file("model_info.json", strict=True)
+            >>> if not is_valid:
+            ...     for error in errors:
+            ...         print(f"Error: {error}")
+        """
+        path = Path(path)
+        if not path.exists():
+            return False, [f"File not found: {path}"]
+        try:
+            with open(path, "r", encoding="utf-8") as f:
+                data = json.load(f)
+        except json.JSONDecodeError as e:
+            return False, [f"Invalid JSON: {e}"]
+        except Exception as e:
+            return False, [f"Error reading file: {e}"]
+        if strict:
+            return self._validate_with_pydantic(data)
+        else:
+            return self._validate_with_jsonschema(data)
+    def _validate_with_jsonschema(self, data: dict[str, Any]) -> tuple[bool, list[str]]:
+        """Validate model info data using JSON Schema.
+        Performs flexible validation using the JSON Schema specification.
+        This method provides basic structural validation for model information.
+        Args:
+            data: Parsed model_info.json data dictionary.
+        Returns:
+            Tuple of (is_valid, error_messages) where is_valid indicates
+            if the data passes JSON Schema validation.
+        Example:
+            >>> validator = ModelInfoValidator()
+            >>> is_valid, errors = validator._validate_with_jsonschema(data)
+        """
+        errors = sorted(self.validator.iter_errors(data), key=lambda e: e.path)
+        if not errors:
+            return True, []
+        error_messages = []
+        for error in errors:
+            path = ".".join(str(p) for p in error.path) if error.path else "root"
+            error_messages.append(f"{error.message} (at: {path})")
+        return False, error_messages
+    def _validate_with_pydantic(self, data: dict[str, Any]) -> tuple[bool, list[str]]:
+        """Validate model info data using Pydantic models.
+        Performs strict validation using Pydantic models with custom validators.
+        This provides comprehensive validation including type checking,
+        pattern matching, and model-specific business rules.
+        Args:
+            data: Parsed model_info.json data dictionary.
+        Returns:
+            Tuple of (is_valid, error_messages) where is_valid indicates
+            if the data passes Pydantic model validation.
+        Example:
+            >>> validator = ModelInfoValidator()
+            >>> is_valid, errors = validator._validate_with_pydantic(data)
+        """
+        try:
+            ModelInfo.model_validate(data)
+            return True, []
+        except ValidationError as e:
+            # Parse pydantic validation errors
+            error_messages = []
+            for error in e.errors():
+                loc = ".".join(str(loc_part) for loc_part in error["loc"])
+                msg = error["msg"]
+                error_messages.append(f"{msg} (at: {loc})")
+            return False, error_messages
+        except Exception as e:
+            return False, [f"Validation error: {e}"]
+    def validate_and_load(self, path: str | Path) -> ModelInfo:
+        """Validate and load model_info.json file.
+        Performs strict validation using Pydantic models and returns a validated
+        ModelInfo instance if successful. This is the recommended method
+        for loading model information in production code.
+        Args:
+            path: Path to model_info.json file.
+        Returns:
+            Validated ModelInfo instance with all data properly typed
+            and validated.
+        Raises:
+            ValueError: If validation fails or file cannot be loaded.
+            FileNotFoundError: If the model_info.json file does not exist.
+            json.JSONDecodeError: If the file contains invalid JSON.
+        Example:
+            >>> validator = ModelInfoValidator()
+            >>> model_info = validator.validate_and_load("model_info.json")
+            >>> print(model_info.name)
+            'ViT-B-32'
+        """
+        path = Path(path)
+        is_valid, errors = self.validate_file(path, strict=True)
+        if not is_valid:
+            error_msg = "Model info validation failed:\n" + "\n".join(
+                f"  - {err}" for err in errors
+            )
+            raise ValueError(error_msg)
+        with open(path, "r", encoding="utf-8") as f:
+            data = json.load(f)
+        return ModelInfo.model_validate(data)
+def load_and_validate_model_info(path: str | Path) -> ModelInfo:
+    """Load and validate a model_info.json file.
+    This is the recommended way to load model information in production.
+    Combines validation and loading into a single operation for convenience
+    and ensures that only validated model information is returned.
+    Args:
+        path: Path to model_info.json file.
+    Returns:
+        Validated ModelInfo instance with all data properly typed and
+        validated against the model specification.
+    Raises:
+        ValueError: If validation fails or file cannot be loaded.
+        FileNotFoundError: If the model_info.json file does not exist.
+        json.JSONDecodeError: If the file contains invalid JSON.
+    Example:
+        >>> from lumen_resources.model_info_validator import load_and_validate_model_info
+        >>> model_info = load_and_validate_model_info("model_info.json")
+        >>> print(model_info.version)
+        '1.0.0'
+    """
+    validator = ModelInfoValidator()
+    return validator.validate_and_load(path)
+def validate_file(path: str | Path, strict: bool = True) -> tuple[bool, list[str]]:
+    """Convenience function to validate a model_info.json file.
+    Simple one-line function for validating model information JSON files.
+    Uses strict validation by default for maximum reliability.
+    Args:
+        path: Path to model_info.json file.
+        strict: If True, use Pydantic validation with custom validators.
+            If False, use JSON Schema validation only. Defaults to True.
+    Returns:
+        Tuple of (is_valid, error_messages) where is_valid is True if
+        the file passes validation, and error_messages contains detailed
+        validation errors if validation fails.
+    Example:
+        >>> is_valid, errors = validate_file("model_info.json")
+        >>> if not is_valid:
+        ...     for error in errors:
+        ...         print(f"Error: {error}")
+    """
+    validator = ModelInfoValidator()
+    return validator.validate_file(path, strict=strict)

lumen_resources/platform.py ADDED Viewed

@@ -0,0 +1,270 @@
+"""
+Platform Adapter for Model Repository Access
+This module provides a unified interface for downloading models from HuggingFace Hub
+and ModelScope Hub with efficient file filtering capabilities.
+Features:
+- Unified API for both HuggingFace and ModelScope platforms
+- File pattern filtering during download (not post-download)
+- Automatic cache management and file organization
+- Force download and cache invalidation support
+- Supports two-phase dataset downloads used by the Downloader:
+  1) First pass downloads runtime-specific files plus JSON metadata (model_info.json).
+  2) Second pass optionally fetches dataset files using the exact relative path from
+     model_info.json's "datasets" mapping via allow_patterns=[relative_path].
+@requires: Platform-specific SDK installed (huggingface_hub or modelscope)
+@returns: Downloaded model files in local cache with filtering applied
+@errors: DownloadError, PlatformUnavailableError
+"""
+import shutil
+from enum import Enum
+from pathlib import Path
+from types import ModuleType
+from .exceptions import DownloadError, PlatformUnavailableError
+class PlatformType(str, Enum):
+    """Supported model repository platforms.
+    Defines the platforms that can be used for downloading models.
+    Each platform has its own SDK and API requirements.
+    Attributes:
+        HUGGINGFACE: Hugging Face Hub platform.
+        MODELSCOPE: ModelScope Hub platform.
+    Example:
+        >>> platform_type = PlatformType.HUGGINGFACE
+        >>> print(platform_type.value)
+        'huggingface'
+    """
+    HUGGINGFACE = "huggingface"
+    MODELSCOPE = "modelscope"
+class Platform:
+    """Unified platform adapter for HuggingFace and ModelScope.
+    Provides a consistent interface for downloading models from different
+    repositories while handling platform-specific requirements and optimizations.
+    Supports efficient file filtering during download to minimize bandwidth usage.
+    Attributes:
+        platform_type: The type of platform (HUGGINGFACE or MODELSCOPE).
+        owner: Organization/owner name for model repositories.
+    Example:
+        >>> platform = Platform(PlatformType.HUGGINGFACE, "openai")
+        >>> model_path = platform.download_model(
+        ...     repo_name="clip-vit-base-patch32",
+        ...     cache_dir=Path("/cache"),
+        ...     allow_patterns=["*.json", "*.pt"]
+        ... )
+    """
+    def __init__(self, platform_type: PlatformType, owner: str):
+        """Initialize platform adapter.
+        Args:
+            platform_type: Type of platform (HUGGINGFACE or MODELSCOPE).
+            owner: Organization/owner name on the platform.
+        Raises:
+            PlatformUnavailableError: If required SDK is not installed.
+        """
+        self.platform_type: PlatformType = platform_type
+        self.owner: str = owner
+        self._check_availability()
+    def _check_availability(self) -> None:
+        """Check if the required platform SDK is available.
+        Validates that the appropriate SDK (huggingface_hub or modelscope)
+        is installed and imports the necessary functions for the platform.
+        Raises:
+            PlatformUnavailableError: If the required SDK is not installed.
+        Example:
+            >>> platform = Platform(PlatformType.HUGGINGFACE, "owner")
+            >>> # If huggingface_hub is not installed, raises PlatformUnavailableError
+        """
+        if self.platform_type == PlatformType.HUGGINGFACE:
+            try:
+                import huggingface_hub
+                self._hf_hub: ModuleType = huggingface_hub
+            except ImportError:
+                raise PlatformUnavailableError(
+                    "HuggingFace Hub SDK not available. "
+                    + "Install with: pip install huggingface_hub"
+                )
+        elif self.platform_type == PlatformType.MODELSCOPE:
+            try:
+                from modelscope.hub.snapshot_download import snapshot_download
+                self._ms_snapshot_download = snapshot_download
+            except ImportError:
+                raise PlatformUnavailableError(
+                    "ModelScope SDK not available. Install with: pip install modelscope"
+                )
+    def download_model(
+        self,
+        repo_name: str,
+        cache_dir: Path,
+        allow_patterns: list[str],
+        force: bool = False,
+    ) -> Path:
+        """Download model files from the platform with efficient filtering.
+        Downloads model files using pattern-based filtering to minimize bandwidth
+        usage. Supports both HuggingFace and ModelScope platforms with their
+        respective SDKs while providing a unified interface.
+        Args:
+            repo_name: Repository name (without owner prefix).
+            cache_dir: Local cache directory for storing downloaded models.
+            allow_patterns: List of glob patterns for files to download.
+                Examples: ['*.json', '*.bin', 'tokenizer/*', 'model_info.json'].
+            force: Force re-download even if cached.
+                - HuggingFace: Uses native force_download parameter.
+                - ModelScope: Clears cache directory before download.
+        Returns:
+            Path to the downloaded model directory.
+        Raises:
+            DownloadError: If download fails for any reason.
+        Example:
+            >>> platform = Platform(PlatformType.HUGGINGFACE, "openai")
+            >>> model_path = platform.download_model(
+            ...     repo_name="clip-vit-base-patch32",
+            ...     cache_dir=Path("/cache"),
+            ...     allow_patterns=["*.json", "*.pt"],
+            ...     force=True
+            ... )
+            >>> print(model_path.name)
+            'clip-vit-base-patch32'
+        """
+        repo_id = f"{self.owner}/{repo_name}"
+        target_dir = cache_dir / "models" / repo_name
+        try:
+            if self.platform_type == PlatformType.HUGGINGFACE:
+                return self._download_from_huggingface(
+                    repo_id, target_dir, allow_patterns, force
+                )
+            elif self.platform_type == PlatformType.MODELSCOPE:
+                return self._download_from_modelscope(
+                    repo_id, target_dir, allow_patterns, force
+                )
+            else:
+                raise DownloadError(f"Unsupported platform type: {self.platform_type}")
+        except Exception as e:
+            raise DownloadError(f"Failed to download {repo_id}: {e}") from e
+    def _download_from_huggingface(
+        self,
+        repo_id: str,
+        cache_dir: Path,
+        allow_patterns: list[str],
+        force: bool,
+    ) -> Path:
+        """Download from HuggingFace Hub.
+        Uses the huggingface_hub library to download model files with
+        pattern-based filtering and optional force re-download.
+        Args:
+            repo_id: Full repository ID (owner/repo).
+            cache_dir: Local cache directory for storing files.
+            allow_patterns: File patterns to download.
+            force: Whether to force re-download ignoring cache.
+        Returns:
+            Path to the downloaded model directory.
+        Example:
+            >>> platform = Platform(PlatformType.HUGGINGFACE, "owner")
+            >>> path = platform._download_from_huggingface(
+            ...     "owner/repo", Path("/cache"), ["*.json"], False
+            ... )
+        """
+        _ = self._hf_hub.snapshot_download(
+            repo_id=repo_id,
+            allow_patterns=allow_patterns,
+            local_dir=cache_dir,
+            local_files_only=False,
+            force_download=force,
+        )
+        return cache_dir
+    def _download_from_modelscope(
+        self,
+        repo_id: str,
+        cache_dir: Path,
+        allow_patterns: list[str],
+        force: bool,
+    ) -> Path:
+        """Download from ModelScope Hub.
+        Uses the ModelScope SDK to download model files with pattern-based filtering.
+        Implements force download by clearing the cache directory before download.
+        Args:
+            repo_id: Full repository ID (owner/repo).
+            cache_dir: Local cache directory for storing files.
+            allow_patterns: File patterns to download.
+            force: Force re-download by clearing cache first.
+        Returns:
+            Path to the downloaded model directory.
+        Example:
+            >>> platform = Platform(PlatformType.MODELSCOPE, "owner")
+            >>> path = platform._download_from_modelscope(
+            ...     "owner/repo", Path("/cache"), ["*.json"], False
+            ... )
+        """
+        # Handle force download by clearing ModelScope cache
+        if force:
+            if cache_dir.exists():
+                shutil.rmtree(cache_dir)
+        # ModelScope supports allow_patterns parameter (HuggingFace compatible)
+        _ = self._ms_snapshot_download(
+            model_id=repo_id,
+            local_dir=str(cache_dir),
+            allow_patterns=allow_patterns,
+            local_files_only=False,
+        )
+        return cache_dir
+    def cleanup_model(self, repo_name: str, cache_dir: Path) -> None:
+        """Remove a model directory from cache.
+        Used for cleanup when download/validation fails or for manual cache management.
+        Removes the entire model directory including all downloaded files.
+        Args:
+            repo_name: Repository name (without owner prefix).
+            cache_dir: Base cache directory containing models.
+        Example:
+            >>> platform = Platform(PlatformType.HUGGINGFACE, "owner")
+            >>> platform.cleanup_model("model-name", Path("/cache"))
+            >>> # Model directory removed if it existed
+        """
+        target_dir = cache_dir / "models" / repo_name
+        if target_dir.exists():
+            shutil.rmtree(target_dir)

lumen_resources/result_schemas/README.md ADDED Viewed

@@ -0,0 +1,14 @@
+```bash
+datamodel-codegen \
+  --input src/lumen_resources/schemas/result_schemas \
+  --output src/lumen_resources/result_schemas/ \
+  --use-schema-description \
+  --use-field-description \
+  --target-python-version 3.10 \
+  --use-standard-collections \
+  --use-union-operator \
+  --output-model-type pydantic_v2.BaseModel \
+  --field-constraints \
+  --input-file-type jsonschema
+```

lumen_resources/result_schemas/__init__.py ADDED Viewed

@@ -0,0 +1,14 @@
+# generated by datamodel-codegen:
+#   filename:  result_schemas
+#   timestamp: 2025-11-28T17:04:43+00:00
+from .embedding_v1 import EmbeddingV1
+from .face_v1 import FaceV1
+from .labels_v1 import LabelsV1
+__all__ = [
+    "FaceV1",
+    "EmbeddingV1",
+    "LabelsV1",
+]

lumen_resources/result_schemas/embedding_v1.py ADDED Viewed

@@ -0,0 +1,29 @@
+# generated by datamodel-codegen:
+#   filename:  embedding_v1.json
+#   timestamp: 2025-11-28T17:04:43+00:00
+from __future__ import annotations
+from pydantic import BaseModel, ConfigDict, Field
+class EmbeddingV1(BaseModel):
+    """
+    Universal schema for embedding responses across all Lumen services (face, clip, ocr, etc.)
+    """
+    model_config = ConfigDict(
+        extra='forbid',
+    )
+    vector: list[float] = Field(..., min_length=1)
+    """
+    Embedding vector
+    """
+    dim: int = Field(..., ge=1)
+    """
+    Embedding dimension
+    """
+    model_id: str = Field(..., min_length=1)
+    """
+    Model identifier that generated the embedding
+    """

lumen_resources/result_schemas/face_v1.py ADDED Viewed

@@ -0,0 +1,55 @@
+# generated by datamodel-codegen:
+#   filename:  face_v1.json
+#   timestamp: 2025-11-28T17:04:43+00:00
+from __future__ import annotations
+from pydantic import BaseModel, ConfigDict, Field, RootModel
+class BboxItem(RootModel[float]):
+    root: float = Field(..., ge=0.0)
+class Face(BaseModel):
+    model_config = ConfigDict(
+        extra='forbid',
+    )
+    bbox: list[BboxItem] = Field(..., max_length=4, min_length=4)
+    """
+    Bounding box coordinates [x1, y1, x2, y2] where (x1, y1) is top-left corner and (x2, y2) is bottom-right corner
+    """
+    confidence: float = Field(..., ge=0.0, le=1.0)
+    """
+    Detection confidence score
+    """
+    landmarks: list[float] | None = None
+    """
+    Facial landmark points (optional)
+    """
+    embedding: list[float] | None = None
+    """
+    Face embedding vector for recognition/comparison (optional)
+    """
+class FaceV1(BaseModel):
+    """
+    Universal schema for face detection and embedding responses across Lumen services
+    """
+    model_config = ConfigDict(
+        extra='forbid',
+    )
+    faces: list[Face]
+    """
+    Detected faces with bounding boxes and metadata
+    """
+    count: int = Field(..., ge=0)
+    """
+    Number of detected faces
+    """
+    model_id: str = Field(..., min_length=1)
+    """
+    Model identifier
+    """

lumen_resources/result_schemas/labels_v1.py ADDED Viewed

@@ -0,0 +1,39 @@
+# generated by datamodel-codegen:
+#   filename:  labels_v1.json
+#   timestamp: 2025-11-28T17:04:43+00:00
+from __future__ import annotations
+from pydantic import BaseModel, ConfigDict, Field
+class Label(BaseModel):
+    model_config = ConfigDict(
+        extra='forbid',
+    )
+    label: str
+    """
+    The classification label or class name
+    """
+    score: float
+    """
+    Confidence score for this label
+    """
+class LabelsV1(BaseModel):
+    """
+    Classification response schema for Lumen services. Returns ranked labels with confidence scores.
+    """
+    model_config = ConfigDict(
+        extra='forbid',
+    )
+    labels: list[Label]
+    """
+    Array of classification results
+    """
+    model_id: str = Field(..., min_length=1)
+    """
+    Identifier of the model that generated the classification
+    """