PyPI - gen-worker - Versions diffs - 0.1.4__py3-none-any.whl - Mend

gen-worker 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

gen_worker/__init__.py +19 -0
gen_worker/decorators.py +66 -0
gen_worker/default_model_manager/__init__.py +5 -0
gen_worker/downloader.py +84 -0
gen_worker/entrypoint.py +135 -0
gen_worker/errors.py +10 -0
gen_worker/model_interface.py +48 -0
gen_worker/pb/__init__.py +27 -0
gen_worker/pb/frontend_pb2.py +53 -0
gen_worker/pb/frontend_pb2_grpc.py +189 -0
gen_worker/pb/worker_scheduler_pb2.py +69 -0
gen_worker/pb/worker_scheduler_pb2_grpc.py +100 -0
gen_worker/py.typed +0 -0
gen_worker/testing/__init__.py +1 -0
gen_worker/testing/stub_manager.py +69 -0
gen_worker/torch_manager/__init__.py +4 -0
gen_worker/torch_manager/manager.py +2059 -0
gen_worker/torch_manager/utils/base_types/architecture.py +145 -0
gen_worker/torch_manager/utils/base_types/common.py +52 -0
gen_worker/torch_manager/utils/base_types/config.py +46 -0
gen_worker/torch_manager/utils/config.py +321 -0
gen_worker/torch_manager/utils/db/database.py +46 -0
gen_worker/torch_manager/utils/device.py +26 -0
gen_worker/torch_manager/utils/diffusers_fix.py +10 -0
gen_worker/torch_manager/utils/flashpack_loader.py +262 -0
gen_worker/torch_manager/utils/globals.py +59 -0
gen_worker/torch_manager/utils/load_models.py +238 -0
gen_worker/torch_manager/utils/local_cache.py +340 -0
gen_worker/torch_manager/utils/model_downloader.py +763 -0
gen_worker/torch_manager/utils/parse_cli.py +98 -0
gen_worker/torch_manager/utils/paths.py +22 -0
gen_worker/torch_manager/utils/repository.py +141 -0
gen_worker/torch_manager/utils/utils.py +43 -0
gen_worker/types.py +47 -0
gen_worker/worker.py +1720 -0
gen_worker-0.1.4.dist-info/METADATA +113 -0
gen_worker-0.1.4.dist-info/RECORD +38 -0
gen_worker-0.1.4.dist-info/WHEEL +4 -0

gen_worker/torch_manager/utils/flashpack_loader.py ADDED Viewed

@@ -0,0 +1,262 @@
+"""
+FlashPack Loading Integration for DefaultModelManager
+This module provides FlashPack loading capability to the model manager.
+It checks if a FlashPack version of a model exists and loads from it
+for faster loading times (2-4s vs 8-12s for safetensors).
+Now with local cache support - copies models from NFS to local NVMe first.
+Integration:
+1. Add this import to manager.py:
+   from .utils.flashpack_loader import FlashPackLoader
+2. Initialize in DefaultModelManager.__init__():
+   self.flashpack_loader = FlashPackLoader()
+3. Modify _load_model_by_source() to try FlashPack first (see integration code below)
+"""
+import os
+import logging
+from pathlib import Path
+from typing import Optional, Tuple, Type, Union, Dict, List
+import hashlib
+import asyncio
+import torch
+from diffusers import DiffusionPipeline
+logger = logging.getLogger(__name__)
+# FlashPack suffix for directories
+FLASHPACK_SUFFIX = ".flashpack"
+# Components that can be loaded from FlashPack
+FLASHPACK_COMPONENTS = ["unet", "vae", "text_encoder", "text_encoder_2", "transformer"]
+# NFS paths
+NFS_COZY_MODELS = "/workspace/.cozy-creator/models"
+NFS_HF_CACHE = "/workspace/.cache/huggingface/hub"
+class FlashPackLoader:
+    """
+    Handles loading models from FlashPack format with local cache support.
+    FlashPack provides 2-4x faster loading compared to safetensors.
+    Local cache copies models from NFS to local NVMe for additional speedup.
+    """
+    def __init__(
+        self,
+        cozy_models_dir: str = NFS_COZY_MODELS,
+        hf_cache_dir: str = NFS_HF_CACHE,
+        use_local_cache: bool = True,
+    ):
+        self.cozy_models_dir = Path(cozy_models_dir)
+        self.hf_cache_dir = Path(hf_cache_dir)
+        self._flashpack_available = self._check_flashpack_installed()
+        # Initialize local cache if enabled
+        self.local_cache = None
+        if use_local_cache:
+            try:
+                from .local_cache import LocalModelCache
+                self.local_cache = LocalModelCache()
+                logger.info("✓ Local NVMe cache enabled")
+            except ImportError:
+                logger.warning("LocalModelCache not available, using NFS directly")
+    def _check_flashpack_installed(self) -> bool:
+        """Check if flashpack library is available"""
+        try:
+            from flashpack import assign_from_file
+            return True
+        except ImportError:
+            logger.warning("FlashPack not installed. Using standard loading.")
+            return False
+    def get_flashpack_path(self, model_id: str, source: str) -> Optional[Path]:
+        """
+        Get the FlashPack directory path for a model if it exists.
+        Checks local cache first, then NFS.
+        Args:
+            model_id: Model identifier (e.g., "pony.realism")
+            source: Source string from pipeline_defs
+        Returns:
+            Path to FlashPack directory or None if not found
+        """
+        if not self._flashpack_available:
+            return None
+        # Check local cache first
+        if self.local_cache:
+            local_path = self.local_cache.get_local_path_if_cached(model_id, source)
+            if local_path and local_path.exists() and FLASHPACK_SUFFIX in local_path.name:
+                logger.info(f"⚡ FlashPack found in local cache for {model_id}")
+                return local_path
+        # Check NFS
+        if source.startswith("hf:"):
+            base_path = self._get_hf_flashpack_path(source[3:])
+        else:
+            base_path = self._get_civitai_flashpack_path(model_id, source)
+        if base_path and base_path.exists():
+            if (base_path / "pipeline").exists():
+                logger.info(f"⚡ FlashPack found on NFS for {model_id}: {base_path}")
+                return base_path
+        return None
+    def _get_hf_flashpack_path(self, repo_id: str) -> Optional[Path]:
+        """Get FlashPack path for HuggingFace model"""
+        folder_name = f"models--{repo_id.replace('/', '--')}"
+        flashpack_path = self.hf_cache_dir / (folder_name + FLASHPACK_SUFFIX)
+        return flashpack_path
+    def _get_civitai_flashpack_path(self, model_id: str, source: str) -> Optional[Path]:
+        """Get FlashPack path for Civitai model"""
+        safe_name = model_id.replace("/", "-")
+        # Find the original model directory
+        matching_dirs = list(self.cozy_models_dir.glob(f"{safe_name}--*"))
+        if not matching_dirs:
+            # Try finding by URL hash
+            url_hash = hashlib.md5(source.encode()).hexdigest()[:8]
+            matching_dirs = list(self.cozy_models_dir.glob(f"{safe_name}--{url_hash}"))
+        if not matching_dirs:
+            return None
+        # Get the FlashPack sibling directory
+        original_dir = matching_dirs[0]
+        flashpack_path = original_dir.parent / (original_dir.name + FLASHPACK_SUFFIX)
+        return flashpack_path
+    async def load_from_flashpack(
+        self,
+        model_id: str,
+        flashpack_path: Path,
+        pipeline_class: Type[DiffusionPipeline],
+    ) -> Optional[DiffusionPipeline]:
+        """
+        Load a model from FlashPack format.
+        Copies to local cache first if enabled.
+        Args:
+            model_id: Model identifier
+            flashpack_path: Path to FlashPack directory (on NFS)
+            pipeline_class: Pipeline class to instantiate
+        Returns:
+            Loaded pipeline or None if loading failed
+        """
+        try:
+            from flashpack import assign_from_file
+            # Copy to local cache first if enabled
+            load_path = flashpack_path
+            if self.local_cache:
+                # Get source for cache lookup
+                source = self._infer_source_from_path(flashpack_path)
+                local_path = await self.local_cache.ensure_local(
+                    model_id, source, priority=True
+                )
+                if local_path:
+                    load_path = local_path
+                    logger.info(f"⚡ Loading {model_id} from local cache")
+                else:
+                    logger.warning(f"Local cache failed, loading from NFS")
+            logger.info(f"⚡ Loading {model_id} from FlashPack at {load_path}...")
+            # Determine dtype based on model type
+            torch_dtype = torch.bfloat16 if "flux" in model_id.lower() else torch.float16
+            # Load pipeline config (scheduler, tokenizer, etc.)
+            pipeline_config_dir = load_path / "pipeline"
+            # Load base pipeline from config (this creates the model structure)
+            pipeline = await asyncio.to_thread(
+                pipeline_class.from_pretrained,
+                str(pipeline_config_dir),
+            )
+            # Assign FlashPack weights to each component
+            for component_name in FLASHPACK_COMPONENTS:
+                fp_file = load_path / f"{component_name}.flashpack"
+                if fp_file.exists() and hasattr(pipeline, component_name):
+                    component = getattr(pipeline, component_name)
+                    if component is not None:
+                        logger.info(f"   Assigning {component_name} from FlashPack...")
+                        await asyncio.to_thread(
+                            assign_from_file,
+                            component,
+                            str(fp_file)
+                        )
+            # Move to cuda with correct dtype
+            pipeline.to("cuda", dtype=torch_dtype)
+            logger.info(f"✅ Successfully loaded {model_id} from FlashPack")
+            return pipeline
+        except Exception as e:
+            logger.error(f"❌ FlashPack loading failed for {model_id}: {e}")
+            logger.exception("Full traceback:")
+            return None
+    def _infer_source_from_path(self, flashpack_path: Path) -> str:
+        """Infer source string from FlashPack path for cache lookup"""
+        path_str = str(flashpack_path)
+        if "models--" in path_str:
+            # HuggingFace model
+            # Extract repo_id from models--org--name.flashpack
+            name = flashpack_path.name.replace(FLASHPACK_SUFFIX, "")
+            repo_id = name.replace("models--", "").replace("--", "/")
+            return f"hf:{repo_id}"
+        else:
+            # Civitai model - return path as source
+            return path_str
+    def has_flashpack(self, model_id: str, source: str) -> bool:
+        """Check if FlashPack version exists for a model"""
+        return self.get_flashpack_path(model_id, source) is not None
+    async def prefetch_deployment_models(
+        self,
+        model_ids: List[str],
+        sources: Dict[str, str],
+        exclude_model_id: Optional[str] = None
+    ):
+        """
+        Background prefetch models for a deployment to local cache.
+        Args:
+            model_ids: List of model IDs from deployment
+            sources: Dict mapping model_id → source string
+            exclude_model_id: Model to skip (already being loaded)
+        """
+        if not self.local_cache:
+            return
+        # Filter out the model already being loaded
+        models_to_prefetch = [
+            mid for mid in model_ids
+            if mid != exclude_model_id
+        ]
+        if models_to_prefetch:
+            logger.info(f"🔄 Starting background prefetch for {len(models_to_prefetch)} models")
+            await self.local_cache.prefetch_models(models_to_prefetch, sources)
+    def get_cache_stats(self) -> Optional[Dict]:
+        """Get local cache statistics"""
+        if self.local_cache:
+            return self.local_cache.get_cache_stats()
+        return None

gen_worker/torch_manager/utils/globals.py ADDED Viewed

@@ -0,0 +1,59 @@
+from typing import Type, Any
+from .base_types.architecture import Architecture
+from .base_types.common import TorchDevice
+from .device import get_torch_device
+_available_torch_device: TorchDevice = get_torch_device()
+# Model Memory Manager
+_MODEL_MEMORY_MANAGER = None
+# Model Downloader
+_MODEL_DOWNLOADER = None
+_ARCHITECTURES: dict[str, type[Architecture[Any]]] = {}
+"""
+Global class containing all architecture definitions
+"""
+def get_model_downloader():
+    """Get or create the global ModelManager instance"""
+    global _MODEL_DOWNLOADER
+    if _MODEL_DOWNLOADER is None:
+        from .model_downloader import ModelManager
+        _MODEL_DOWNLOADER = ModelManager()
+    return _MODEL_DOWNLOADER
+def get_model_memory_manager():
+    global _MODEL_MEMORY_MANAGER
+    if _MODEL_MEMORY_MANAGER is None:
+        from ..manager import ModelMemoryManager
+        _MODEL_MEMORY_MANAGER = ModelMemoryManager()
+    return _MODEL_MEMORY_MANAGER
+def update_architectures(architectures: dict[str, Type["Architecture"]]):
+    global _ARCHITECTURES
+    _ARCHITECTURES.update(architectures)
+def get_architectures() -> dict[str, Type["Architecture"]]:
+    return _ARCHITECTURES
+def get_available_torch_device():
+    global _available_torch_device
+    return _available_torch_device
+def set_available_torch_device(device: TorchDevice):
+    print("Setting device", device)
+    global _available_torch_device
+    _available_torch_device = device

gen_worker/torch_manager/utils/load_models.py ADDED Viewed

@@ -0,0 +1,238 @@
+from __future__ import annotations
+import os
+from pathlib import Path
+import torch
+import struct
+import json
+from typing import Type, Optional, Any
+from safetensors.torch import load_file as safetensors_load_file
+from spandrel import canonicalize_state_dict
+from spandrel.__helpers.unpickler import (
+    RestrictedUnpickle,
+)  # probably shouldn't import from private modules...
+from .base_types.architecture import (
+    Architecture,
+    StateDict,
+    TorchDevice,
+    ComponentMetadata,
+)
+METADATA_HEADER_SIZE = 8
+# TO DO: make this more efficient; we don't want to have to evaluate EVERY architecture
+# for EVERY file. ALSO we need stop multiple architectures from claiming the same
+# keys; i.e., if there are 5 architecture definitions for stable-diffusion-1 installed,
+# then only the first one should get to claim those keys, otherwise it gets confusing
+# on which model it should use
+def from_file(
+    path: str | Path,
+    device: Optional[TorchDevice] = None,
+    registry: dict[str, Type[Architecture]] = None,
+) -> dict[str, Architecture]:
+    """
+    Loads a model from a file path. It detects the architecture, instantiates the
+    architecture, and loads the state dict into the PyTorch class.
+    Throws a `ValueError` if the file extension is not supported.
+    Returns an empty dictionary if no supported model architecture is found.
+    """
+    state_dict = load_state_dict_from_file(path, device=device)
+    metadata = read_safetensors_metadata(path)
+    return from_state_dict(state_dict, metadata, device, registry)
+def from_state_dict(
+    state_dict: StateDict,
+    metadata: dict[str, Any] = {},
+    device: Optional[TorchDevice] = None,
+    registry: dict[str, Type[Architecture]] = None,
+) -> dict[str, Architecture]:
+    """
+    Load a model from the given state dict.
+    Returns an empty dictionary if no supported model architecture is found.
+    """
+    # Fetch class instances
+    components = components_from_state_dict(state_dict, metadata, registry)
+    # Load the state dict into the class instance, and move to device
+    for _arch_id, architecture in components.items():
+        try:
+            architecture.load(state_dict, device)
+        except Exception as e:
+            print(e)
+    return components
+def components_from_state_dict(
+    state_dict: StateDict,
+    metadata: dict,
+    registry: Optional[dict[str, Type[Architecture]]] = None,
+) -> dict[str, Architecture]:
+    """
+    Detect all models present inside of a state dict; does not load the state-dict into
+    memory however; it only calls the Architecture's constructor to return a class instance.
+    """
+    components: dict[str, Architecture] = {}
+    if registry is None:
+        from .globals import _ARCHITECTURES
+        registry = _ARCHITECTURES
+    for arch_id, architecture in registry.items():  # Iterate through all architectures
+        try:
+            # print("Now in load model")
+            # print(metadata)
+            # print(architecture)
+            # print("Done above")
+            checkpoint_metadata = architecture.detect(
+                state_dict=state_dict, metadata=metadata
+            )
+            # print(checkpoint_metadata)
+            # print("Done in load model")
+            # detect_signature = inspect.signature(architecture.detect)
+            # if 'state_dict' in detect_signature.parameters and 'metadata' in detect_signature.parameters:
+            #     checkpoint_metadata = architecture.detect(state_dict=state_dict, metadata=metadata)
+            # elif 'state_dict' in detect_signature.parameters:
+            #     checkpoint_metadata = architecture.detect(state_dict=state_dict)
+            # elif 'metadata' in detect_signature.parameters:
+            #     checkpoint_metadata = architecture.detect(metadata=metadata)
+            # else:
+            #     continue
+        except Exception:
+            checkpoint_metadata = None
+        if checkpoint_metadata is not None:
+            model = architecture(metadata=metadata)
+            components.update({arch_id: model})
+    return components
+def load_state_dict_from_file(
+    path: str | Path, device: Optional[TorchDevice] = None
+) -> StateDict:
+    """
+    Load the state dict of a model from the given file path.
+    State dicts are typically only useful to pass them into the `load`
+    function of a specific architecture.
+    Throws a `ValueError` if the file extension is not supported.
+    """
+    extension = os.path.splitext(path)[1].lower()
+    if isinstance(device, str):
+        device = torch.device(device)  # make pyright type-checker happy
+    state_dict: StateDict
+    if extension == ".pt":
+        try:
+            state_dict = _load_torchscript(path, device)
+        except RuntimeError:
+            # If torchscript loading fails, try loading as a normal state dict
+            try:
+                pth_state_dict = _load_pth(path, device)
+            except Exception:
+                pth_state_dict = None
+            if pth_state_dict is None:
+                # the file was likely a torchscript file, but failed to load
+                # re-raise the original error, so the user knows what went wrong
+                raise
+            state_dict = pth_state_dict
+    elif extension == ".pth" or extension == ".ckpt":
+        state_dict = _load_pth(path, device)
+    elif extension == ".safetensors":
+        state_dict = _load_safetensors(path, device)
+    else:
+        raise ValueError(
+            f"Unsupported model file extension {extension}. Please try a supported model type."
+        )
+    return canonicalize_state_dict(state_dict)
+def _load_pth(path: str | Path, device: Optional[torch.device] = None) -> StateDict:
+    return torch.load(
+        f=path,
+        map_location=device,
+        pickle_module=RestrictedUnpickle,
+    )
+def _load_torchscript(
+    path: str | Path, device: Optional[torch.device] = None
+) -> StateDict:
+    return torch.jit.load(path, map_location=device).state_dict()
+def _load_safetensors(
+    path: str | Path, device: Optional[TorchDevice] = None
+) -> StateDict:
+    if device is not None:
+        if isinstance(device, torch.device):
+            device = str(device)
+        return safetensors_load_file(path, device=device)
+    else:
+        return safetensors_load_file(path)
+def read_safetensors_metadata(file_path: str | Path) -> dict[str, Any]:
+    if not str(file_path).endswith(".safetensors"):
+        print(f"Error: File '{file_path}' is not a '.safetensors' file.")
+        return {}
+    if not os.path.isfile(file_path):
+        print(f"Error: File '{file_path}' not found.")
+        return {}
+    with open(file_path, "rb") as file:
+        header_size_bytes = file.read(METADATA_HEADER_SIZE)
+        header_size = struct.unpack("<Q", header_size_bytes)[0]
+        if header_size is None or header_size == 0:
+            return {}
+        header_bytes = file.read(header_size)
+        header = json.loads(header_bytes)
+    return header.get("__metadata__", {})
+def find_component_models(
+    state_dict: StateDict,
+    metadata: Optional[dict] = None,
+    registry: dict[str, Type[Architecture]] = None,
+) -> dict[str, ComponentMetadata]:
+    """
+    Detect all models present inside of a state dict, and return a dict. The keys of
+    the dict are the architecture's unique identifier that can be instantiated using
+    this state-dict, and the value is the metadata of the corresponding architecture
+    if it were instantiated using this same state-dict + metadata.
+    """
+    components: dict[str, ComponentMetadata] = {}
+    if registry is None:
+        from .globals import _ARCHITECTURES
+        registry = _ARCHITECTURES
+    for arch_id, architecture in registry.items():  # Iterate through all architectures
+        try:
+            checkpoint_metadata = architecture.detect(
+                state_dict=state_dict, metadata=metadata
+            )
+            if checkpoint_metadata is not None:
+                # this will overwrite previous architectures with the same id
+                components.update({arch_id: checkpoint_metadata})
+        except Exception as e:
+            print(f"Encountered error running architecture.detect for {arch_id}: {e}")
+    return components