PyPI - diffusers - Versions diffs - 0.33.1__py3-none-any.whl → 0.35.0__py3-none-any.whl - Mend

diffusers 0.33.1py3-none-any.whl → 0.35.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (551) hide show

diffusers/loaders/peft.py CHANGED Viewed

@@ -13,6 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import inspect
+import json
 import os
 from functools import partial
 from pathlib import Path
@@ -28,13 +29,13 @@ from ..utils import (
     convert_unet_state_dict_to_peft,
     delete_adapter_layers,
     get_adapter_name,
-    get_peft_kwargs,
     is_peft_available,
     is_peft_version,
     logging,
     set_adapter_layers,
     set_weights_and_activate_adapters,
 )
+from ..utils.peft_utils import _create_lora_config, _maybe_warn_for_unhandled_keys
 from .lora_base import _fetch_state_dict, _func_optionally_disable_offloading
 from .unet_loader_utils import _maybe_expand_lora_scales
@@ -52,32 +53,18 @@ _SET_ADAPTER_SCALE_FN_MAPPING = {
     "HunyuanVideoTransformer3DModel": lambda model_cls, weights: weights,
     "LTXVideoTransformer3DModel": lambda model_cls, weights: weights,
     "SanaTransformer2DModel": lambda model_cls, weights: weights,
+    "AuraFlowTransformer2DModel": lambda model_cls, weights: weights,
     "Lumina2Transformer2DModel": lambda model_cls, weights: weights,
     "WanTransformer3DModel": lambda model_cls, weights: weights,
     "CogView4Transformer2DModel": lambda model_cls, weights: weights,
+    "HiDreamImageTransformer2DModel": lambda model_cls, weights: weights,
+    "HunyuanVideoFramepackTransformer3DModel": lambda model_cls, weights: weights,
+    "WanVACETransformer3DModel": lambda model_cls, weights: weights,
+    "ChromaTransformer2DModel": lambda model_cls, weights: weights,
+    "QwenImageTransformer2DModel": lambda model_cls, weights: weights,
 }
-def _maybe_raise_error_for_ambiguity(config):
-    rank_pattern = config["rank_pattern"].copy()
-    target_modules = config["target_modules"]
-    for key in list(rank_pattern.keys()):
-        # try to detect ambiguity
-        # `target_modules` can also be a str, in which case this loop would loop
-        # over the chars of the str. The technically correct way to match LoRA keys
-        # in PEFT is to use LoraModel._check_target_module_exists (lora_config, key).
-        # But this cuts it for now.
-        exact_matches = [mod for mod in target_modules if mod == key]
-        substring_matches = [mod for mod in target_modules if key in mod and mod != key]
-        if exact_matches and substring_matches:
-            if is_peft_version("<", "0.14.1"):
-                raise ValueError(
-                    "There are ambiguous keys present in this LoRA. To load it, please update your `peft` installation - `pip install -U peft`."
-                )
 class PeftAdapterMixin:
     """
     A class containing all functions for loading and using adapters weights that are supported in PEFT library. For
@@ -99,17 +86,6 @@ class PeftAdapterMixin:
     @classmethod
     # Copied from diffusers.loaders.lora_base.LoraBaseMixin._optionally_disable_offloading
     def _optionally_disable_offloading(cls, _pipeline):
-        """
-        Optionally removes offloading in case the pipeline has been already sequentially offloaded to CPU.
-        Args:
-            _pipeline (`DiffusionPipeline`):
-                The pipeline to disable offloading for.
-        Returns:
-            tuple:
-                A tuple indicating if `is_model_cpu_offload` or `is_sequential_cpu_offload` is True.
-        """
         return _func_optionally_disable_offloading(_pipeline=_pipeline)
     def load_lora_adapter(
@@ -181,10 +157,15 @@ class PeftAdapterMixin:
                 Note that hotswapping adapters of the text encoder is not yet supported. There are some further
                 limitations to this technique, which are documented here:
                 https://huggingface.co/docs/peft/main/en/package_reference/hotswap
+            metadata:
+                LoRA adapter metadata. When supplied, the metadata inferred through the state dict isn't used to
+                initialize `LoraConfig`.
         """
-        from peft import LoraConfig, inject_adapter_in_model, set_peft_model_state_dict
+        from peft import inject_adapter_in_model, set_peft_model_state_dict
         from peft.tuners.tuners_utils import BaseTunerLayer
+        from ..hooks.group_offloading import _maybe_remove_and_reapply_group_offloading
         cache_dir = kwargs.pop("cache_dir", None)
         force_download = kwargs.pop("force_download", False)
         proxies = kwargs.pop("proxies", None)
@@ -198,6 +179,7 @@ class PeftAdapterMixin:
         network_alphas = kwargs.pop("network_alphas", None)
         _pipeline = kwargs.pop("_pipeline", None)
         low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", False)
+        metadata = kwargs.pop("metadata", None)
         allow_pickle = False
         if low_cpu_mem_usage and is_peft_version("<=", "0.13.0"):
@@ -205,12 +187,8 @@ class PeftAdapterMixin:
                 "`low_cpu_mem_usage=True` is not compatible with this `peft` version. Please update it with `pip install -U peft`."
             )
-        user_agent = {
-            "file_type": "attn_procs_weights",
-            "framework": "pytorch",
-        }
-        state_dict = _fetch_state_dict(
+        user_agent = {"file_type": "attn_procs_weights", "framework": "pytorch"}
+        state_dict, metadata = _fetch_state_dict(
             pretrained_model_name_or_path_or_dict=pretrained_model_name_or_path_or_dict,
             weight_name=weight_name,
             use_safetensors=use_safetensors,
@@ -223,12 +201,17 @@ class PeftAdapterMixin:
             subfolder=subfolder,
             user_agent=user_agent,
             allow_pickle=allow_pickle,
+            metadata=metadata,
         )
         if network_alphas is not None and prefix is None:
             raise ValueError("`network_alphas` cannot be None when `prefix` is None.")
+        if network_alphas and metadata:
+            raise ValueError("Both `network_alphas` and `metadata` cannot be specified.")
         if prefix is not None:
-            state_dict = {k[len(f"{prefix}.") :]: v for k, v in state_dict.items() if k.startswith(f"{prefix}.")}
+            state_dict = {k.removeprefix(f"{prefix}."): v for k, v in state_dict.items() if k.startswith(f"{prefix}.")}
+            if metadata is not None:
+                metadata = {k.removeprefix(f"{prefix}."): v for k, v in metadata.items() if k.startswith(f"{prefix}.")}
         if len(state_dict) > 0:
             if adapter_name in getattr(self, "peft_config", {}) and not hotswap:
@@ -248,7 +231,7 @@ class PeftAdapterMixin:
             rank = {}
             for key, val in state_dict.items():
-                # Cannot figure out rank from lora layers that don't have atleast 2 dimensions.
+                # Cannot figure out rank from lora layers that don't have at least 2 dimensions.
                 # Bias layers in LoRA only have a single dimension
                 if "lora_B" in key and val.ndim > 1:
                     # Check out https://github.com/huggingface/peft/pull/2419 for the `^` symbol.
@@ -259,44 +242,33 @@ class PeftAdapterMixin:
             if network_alphas is not None and len(network_alphas) >= 1:
                 alpha_keys = [k for k in network_alphas.keys() if k.startswith(f"{prefix}.")]
-                network_alphas = {k.replace(f"{prefix}.", ""): v for k, v in network_alphas.items() if k in alpha_keys}
-            lora_config_kwargs = get_peft_kwargs(rank, network_alpha_dict=network_alphas, peft_state_dict=state_dict)
-            _maybe_raise_error_for_ambiguity(lora_config_kwargs)
+                network_alphas = {
+                    k.removeprefix(f"{prefix}."): v for k, v in network_alphas.items() if k in alpha_keys
+                }
-            if "use_dora" in lora_config_kwargs:
-                if lora_config_kwargs["use_dora"]:
-                    if is_peft_version("<", "0.9.0"):
-                        raise ValueError(
-                            "You need `peft` 0.9.0 at least to use DoRA-enabled LoRAs. Please upgrade your installation of `peft`."
-                        )
-                else:
-                    if is_peft_version("<", "0.9.0"):
-                        lora_config_kwargs.pop("use_dora")
-            if "lora_bias" in lora_config_kwargs:
-                if lora_config_kwargs["lora_bias"]:
-                    if is_peft_version("<=", "0.13.2"):
-                        raise ValueError(
-                            "You need `peft` 0.14.0 at least to use `lora_bias` in LoRAs. Please upgrade your installation of `peft`."
-                        )
-                else:
-                    if is_peft_version("<=", "0.13.2"):
-                        lora_config_kwargs.pop("lora_bias")
-            lora_config = LoraConfig(**lora_config_kwargs)
             # adapter_name
             if adapter_name is None:
                 adapter_name = get_adapter_name(self)
+            # create LoraConfig
+            lora_config = _create_lora_config(
+                state_dict,
+                network_alphas,
+                metadata,
+                rank,
+                model_state_dict=self.state_dict(),
+                adapter_name=adapter_name,
+            )
             # <Unsafe code
             # We can be sure that the following works as it just sets attention processors, lora layers and puts all in the same dtype
             # Now we remove any existing hooks to `_pipeline`.
             # In case the pipeline has been already offloaded to CPU - temporarily remove the hooks
-            # otherwise loading LoRA weights will lead to an error
-            is_model_cpu_offload, is_sequential_cpu_offload = self._optionally_disable_offloading(_pipeline)
+            # otherwise loading LoRA weights will lead to an error.
+            is_model_cpu_offload, is_sequential_cpu_offload, is_group_offload = self._optionally_disable_offloading(
+                _pipeline
+            )
             peft_kwargs = {}
             if is_peft_version(">=", "0.13.1"):
                 peft_kwargs["low_cpu_mem_usage"] = low_cpu_mem_usage
@@ -328,7 +300,7 @@ class PeftAdapterMixin:
                         new_sd[k] = v
                     return new_sd
-            # To handle scenarios where we cannot successfully set state dict. If it's unsucessful,
+            # To handle scenarios where we cannot successfully set state dict. If it's unsuccessful,
             # we should also delete the `peft_config` associated to the `adapter_name`.
             try:
                 if hotswap:
@@ -342,13 +314,15 @@ class PeftAdapterMixin:
                             config=lora_config,
                         )
                     except Exception as e:
-                        logger.error(f"Hotswapping {adapter_name} was unsucessful with the following error: \n{e}")
+                        logger.error(f"Hotswapping {adapter_name} was unsuccessful with the following error: \n{e}")
                         raise
                     # the hotswap function raises if there are incompatible keys, so if we reach this point we can set
                     # it to None
                     incompatible_keys = None
                 else:
-                    inject_adapter_in_model(lora_config, self, adapter_name=adapter_name, **peft_kwargs)
+                    inject_adapter_in_model(
+                        lora_config, self, adapter_name=adapter_name, state_dict=state_dict, **peft_kwargs
+                    )
                     incompatible_keys = set_peft_model_state_dict(self, state_dict, adapter_name, **peft_kwargs)
                     if self._prepare_lora_hotswap_kwargs is not None:
@@ -377,46 +351,28 @@ class PeftAdapterMixin:
                                     module.delete_adapter(adapter_name)
                     self.peft_config.pop(adapter_name)
-                logger.error(f"Loading {adapter_name} was unsucessful with the following error: \n{e}")
+                logger.error(f"Loading {adapter_name} was unsuccessful with the following error: \n{e}")
                 raise
-            warn_msg = ""
-            if incompatible_keys is not None:
-                # Check only for unexpected keys.
-                unexpected_keys = getattr(incompatible_keys, "unexpected_keys", None)
-                if unexpected_keys:
-                    lora_unexpected_keys = [k for k in unexpected_keys if "lora_" in k and adapter_name in k]
-                    if lora_unexpected_keys:
-                        warn_msg = (
-                            f"Loading adapter weights from state_dict led to unexpected keys found in the model:"
-                            f" {', '.join(lora_unexpected_keys)}. "
-                        )
-                # Filter missing keys specific to the current adapter.
-                missing_keys = getattr(incompatible_keys, "missing_keys", None)
-                if missing_keys:
-                    lora_missing_keys = [k for k in missing_keys if "lora_" in k and adapter_name in k]
-                    if lora_missing_keys:
-                        warn_msg += (
-                            f"Loading adapter weights from state_dict led to missing keys in the model:"
-                            f" {', '.join(lora_missing_keys)}."
-                        )
-            if warn_msg:
-                logger.warning(warn_msg)
+            _maybe_warn_for_unhandled_keys(incompatible_keys, adapter_name)
             # Offload back.
             if is_model_cpu_offload:
                 _pipeline.enable_model_cpu_offload()
             elif is_sequential_cpu_offload:
                 _pipeline.enable_sequential_cpu_offload()
+            elif is_group_offload:
+                for component in _pipeline.components.values():
+                    if isinstance(component, torch.nn.Module):
+                        _maybe_remove_and_reapply_group_offloading(component)
             # Unsafe code />
         if prefix is not None and not state_dict:
+            model_class_name = self.__class__.__name__
             logger.warning(
-                f"No LoRA keys associated to {self.__class__.__name__} found with the {prefix=}. "
+                f"No LoRA keys associated to {model_class_name} found with the {prefix=}. "
                 "This is safe to ignore if LoRA state dict didn't originally have any "
-                f"{self.__class__.__name__} related params. You can also try specifying `prefix=None` "
+                f"{model_class_name} related params. You can also try specifying `prefix=None` "
                 "to resolve the warning. Otherwise, open an issue if you think it's unexpected: "
                 "https://github.com/huggingface/diffusers/issues/new"
             )
@@ -439,17 +395,13 @@ class PeftAdapterMixin:
                 underlying model has multiple adapters loaded.
             upcast_before_saving (`bool`, defaults to `False`):
                 Whether to cast the underlying model to `torch.float32` before serialization.
-            save_function (`Callable`):
-                The function to use to save the state dictionary. Useful during distributed training when you need to
-                replace `torch.save` with another method. Can be configured with the environment variable
-                `DIFFUSERS_SAVE_MODE`.
             safe_serialization (`bool`, *optional*, defaults to `True`):
                 Whether to save the model using `safetensors` or the traditional PyTorch way with `pickle`.
             weight_name: (`str`, *optional*, defaults to `None`): Name of the file to serialize the state dict with.
         """
         from peft.utils import get_peft_model_state_dict
-        from .lora_base import LORA_WEIGHT_NAME, LORA_WEIGHT_NAME_SAFE
+        from .lora_base import LORA_ADAPTER_METADATA_KEY, LORA_WEIGHT_NAME, LORA_WEIGHT_NAME_SAFE
         if adapter_name is None:
             adapter_name = get_adapter_name(self)
@@ -457,6 +409,8 @@ class PeftAdapterMixin:
         if adapter_name not in getattr(self, "peft_config", {}):
             raise ValueError(f"Adapter name {adapter_name} not found in the model.")
+        lora_adapter_metadata = self.peft_config[adapter_name].to_dict()
         lora_layers_to_save = get_peft_model_state_dict(
             self.to(dtype=torch.float32 if upcast_before_saving else None), adapter_name=adapter_name
         )
@@ -466,7 +420,15 @@ class PeftAdapterMixin:
         if safe_serialization:
             def save_function(weights, filename):
-                return safetensors.torch.save_file(weights, filename, metadata={"format": "pt"})
+                # Inject framework format.
+                metadata = {"format": "pt"}
+                if lora_adapter_metadata is not None:
+                    for key, value in lora_adapter_metadata.items():
+                        if isinstance(value, set):
+                            lora_adapter_metadata[key] = list(value)
+                    metadata[LORA_ADAPTER_METADATA_KEY] = json.dumps(lora_adapter_metadata, indent=2, sort_keys=True)
+                return safetensors.torch.save_file(weights, filename, metadata=metadata)
         else:
             save_function = torch.save
@@ -479,7 +441,6 @@ class PeftAdapterMixin:
             else:
                 weight_name = LORA_WEIGHT_NAME
-        # TODO: we could consider saving the `peft_config` as well.
         save_path = Path(save_directory, weight_name).as_posix()
         save_function(lora_layers_to_save, save_path)
         logger.info(f"Model weights saved in {save_path}")
@@ -490,7 +451,7 @@ class PeftAdapterMixin:
         weights: Optional[Union[float, Dict, List[float], List[Dict], List[None]]] = None,
     ):
         """
-        Set the currently active adapters for use in the UNet.
+        Set the currently active adapters for use in the diffusion network (e.g. unet, transformer, etc.).
         Args:
             adapter_names (`List[str]` or `str`):
@@ -512,7 +473,7 @@ class PeftAdapterMixin:
             "jbilcke-hf/sdxl-cinematic-1", weight_name="pytorch_lora_weights.safetensors", adapter_name="cinematic"
         )
         pipeline.load_lora_weights("nerijs/pixel-art-xl", weight_name="pixel-art-xl.safetensors", adapter_name="pixel")
-        pipeline.set_adapters(["cinematic", "pixel"], adapter_weights=[0.5, 0.5])
+        pipeline.unet.set_adapters(["cinematic", "pixel"], adapter_weights=[0.5, 0.5])
         ```
         """
         if not USE_PEFT_BACKEND:
@@ -710,7 +671,7 @@ class PeftAdapterMixin:
             if self.lora_scale != 1.0:
                 module.scale_layer(self.lora_scale)
-            # For BC with prevous PEFT versions, we need to check the signature
+            # For BC with previous PEFT versions, we need to check the signature
             # of the `merge` method to see if it supports the `adapter_names` argument.
             supported_merge_kwargs = list(inspect.signature(module.merge).parameters)
             if "adapter_names" in supported_merge_kwargs:
@@ -738,11 +699,16 @@ class PeftAdapterMixin:
         if not USE_PEFT_BACKEND:
             raise ValueError("PEFT backend is required for `unload_lora()`.")
+        from ..hooks.group_offloading import _maybe_remove_and_reapply_group_offloading
         from ..utils import recurse_remove_peft_layers
         recurse_remove_peft_layers(self)
         if hasattr(self, "peft_config"):
             del self.peft_config
+        if hasattr(self, "_hf_peft_config_loaded"):
+            self._hf_peft_config_loaded = None
+        _maybe_remove_and_reapply_group_offloading(self)
     def disable_lora(self):
         """
@@ -760,7 +726,7 @@ class PeftAdapterMixin:
         pipeline.load_lora_weights(
             "jbilcke-hf/sdxl-cinematic-1", weight_name="pytorch_lora_weights.safetensors", adapter_name="cinematic"
         )
-        pipeline.disable_lora()
+        pipeline.unet.disable_lora()
         ```
         """
         if not USE_PEFT_BACKEND:
@@ -783,7 +749,7 @@ class PeftAdapterMixin:
         pipeline.load_lora_weights(
             "jbilcke-hf/sdxl-cinematic-1", weight_name="pytorch_lora_weights.safetensors", adapter_name="cinematic"
         )
-        pipeline.enable_lora()
+        pipeline.unet.enable_lora()
         ```
         """
         if not USE_PEFT_BACKEND:
@@ -810,7 +776,7 @@ class PeftAdapterMixin:
         pipeline.load_lora_weights(
             "jbilcke-hf/sdxl-cinematic-1", weight_name="pytorch_lora_weights.safetensors", adapter_names="cinematic"
         )
-        pipeline.delete_adapters("cinematic")
+        pipeline.unet.delete_adapters("cinematic")
         ```
         """
         if not USE_PEFT_BACKEND:

diffusers/loaders/single_file.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright 2024 The HuggingFace Team. All rights reserved.
+# Copyright 2025 The HuggingFace Team. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -453,7 +453,7 @@ class FromSingleFileMixin:
                     logger.warning(
                         "Detected legacy `from_single_file` loading behavior. Attempting to create the pipeline based on inferred components.\n"
                         "This may lead to errors if the model components are not correctly inferred. \n"
-                        "To avoid this warning, please explicity pass the `config` argument to `from_single_file` with a path to a local diffusers model repo \n"
+                        "To avoid this warning, please explicitly pass the `config` argument to `from_single_file` with a path to a local diffusers model repo \n"
                         "e.g. `from_single_file(<my model checkpoint path>, config=<path to local diffusers model repo>) \n"
                         "or run `from_single_file` with `local_files_only=False` first to update the local cache directory with "
                         "the necessary config files.\n"

diffusers/loaders/single_file_model.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Copyright 2024 The HuggingFace Team. All rights reserved.
+# Copyright 2025 The HuggingFace Team. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -21,15 +21,20 @@ import torch
 from huggingface_hub.utils import validate_hf_hub_args
 from typing_extensions import Self
+from .. import __version__
 from ..quantizers import DiffusersAutoQuantizer
-from ..utils import deprecate, is_accelerate_available, logging
+from ..utils import deprecate, is_accelerate_available, is_torch_version, logging
+from ..utils.torch_utils import empty_device_cache
 from .single_file_utils import (
     SingleFileComponentError,
     convert_animatediff_checkpoint_to_diffusers,
     convert_auraflow_transformer_checkpoint_to_diffusers,
     convert_autoencoder_dc_checkpoint_to_diffusers,
+    convert_chroma_transformer_checkpoint_to_diffusers,
     convert_controlnet_checkpoint,
+    convert_cosmos_transformer_checkpoint_to_diffusers,
     convert_flux_transformer_checkpoint_to_diffusers,
+    convert_hidream_transformer_to_diffusers,
     convert_hunyuan_video_transformer_to_diffusers,
     convert_ldm_unet_checkpoint,
     convert_ldm_vae_checkpoint,
@@ -57,8 +62,12 @@ logger = logging.get_logger(__name__)
 if is_accelerate_available():
     from accelerate import dispatch_model, init_empty_weights
-    from ..models.modeling_utils import load_model_dict_into_meta
+    from ..models.model_loading_utils import load_model_dict_into_meta
+if is_torch_version(">=", "1.9.0") and is_accelerate_available():
+    _LOW_CPU_MEM_USAGE_DEFAULT = True
+else:
+    _LOW_CPU_MEM_USAGE_DEFAULT = False
 SINGLE_FILE_LOADABLE_CLASSES = {
     "StableCascadeUNet": {
@@ -95,6 +104,10 @@ SINGLE_FILE_LOADABLE_CLASSES = {
         "checkpoint_mapping_fn": convert_flux_transformer_checkpoint_to_diffusers,
         "default_subfolder": "transformer",
     },
+    "ChromaTransformer2DModel": {
+        "checkpoint_mapping_fn": convert_chroma_transformer_checkpoint_to_diffusers,
+        "default_subfolder": "transformer",
+    },
     "LTXVideoTransformer3DModel": {
         "checkpoint_mapping_fn": convert_ltx_transformer_checkpoint_to_diffusers,
         "default_subfolder": "transformer",
@@ -128,13 +141,33 @@ SINGLE_FILE_LOADABLE_CLASSES = {
         "checkpoint_mapping_fn": convert_wan_transformer_to_diffusers,
         "default_subfolder": "transformer",
     },
+    "WanVACETransformer3DModel": {
+        "checkpoint_mapping_fn": convert_wan_transformer_to_diffusers,
+        "default_subfolder": "transformer",
+    },
     "AutoencoderKLWan": {
         "checkpoint_mapping_fn": convert_wan_vae_to_diffusers,
         "default_subfolder": "vae",
     },
+    "HiDreamImageTransformer2DModel": {
+        "checkpoint_mapping_fn": convert_hidream_transformer_to_diffusers,
+        "default_subfolder": "transformer",
+    },
+    "CosmosTransformer3DModel": {
+        "checkpoint_mapping_fn": convert_cosmos_transformer_checkpoint_to_diffusers,
+        "default_subfolder": "transformer",
+    },
+    "QwenImageTransformer2DModel": {
+        "checkpoint_mapping_fn": lambda x: x,
+        "default_subfolder": "transformer",
+    },
 }
+def _should_convert_state_dict_to_diffusers(model_state_dict, checkpoint_state_dict):
+    return not set(model_state_dict.keys()).issubset(set(checkpoint_state_dict.keys()))
 def _get_single_file_loadable_mapping_class(cls):
     diffusers_module = importlib.import_module(__name__.split(".")[0])
     for loadable_class_str in SINGLE_FILE_LOADABLE_CLASSES:
@@ -186,9 +219,8 @@ class FromOriginalModelMixin:
             original_config (`str`, *optional*):
                 Dict or path to a yaml file containing the configuration for the model in its original format.
                     If a dict is provided, it will be used to initialize the model configuration.
-            torch_dtype (`str` or `torch.dtype`, *optional*):
-                Override the default `torch.dtype` and load the model with another dtype. If `"auto"` is passed, the
-                dtype is automatically derived from the model's weights.
+            torch_dtype (`torch.dtype`, *optional*):
+                Override the default `torch.dtype` and load the model with another dtype.
             force_download (`bool`, *optional*, defaults to `False`):
                 Whether or not to force the (re-)download of the model weights and configuration files, overriding the
                 cached versions if they exist.
@@ -208,6 +240,11 @@ class FromOriginalModelMixin:
             revision (`str`, *optional*, defaults to `"main"`):
                 The specific model version to use. It can be a branch name, a tag name, a commit id, or any identifier
                 allowed by Git.
+            low_cpu_mem_usage (`bool`, *optional*, defaults to `True` if torch version >= 1.9.0 and
+                is_accelerate_available() else `False`): Speed up model loading only loading the pretrained weights and
+                not initializing the weights. This also tries to not use more than 1x model size in CPU memory
+                (including peak memory) while loading the model. Only supported for PyTorch >= 1.9.0. If you are using
+                an older version of PyTorch, setting this argument to `True` will raise an error.
             disable_mmap ('bool', *optional*, defaults to 'False'):
                 Whether to disable mmap when loading a Safetensors model. This option can perform better when the model
                 is on a network mount or hard drive, which may not handle the seeky-ness of mmap very well.
@@ -257,9 +294,15 @@ class FromOriginalModelMixin:
         config_revision = kwargs.pop("config_revision", None)
         torch_dtype = kwargs.pop("torch_dtype", None)
         quantization_config = kwargs.pop("quantization_config", None)
+        low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", _LOW_CPU_MEM_USAGE_DEFAULT)
         device = kwargs.pop("device", None)
         disable_mmap = kwargs.pop("disable_mmap", False)
+        user_agent = {"diffusers": __version__, "file_type": "single_file", "framework": "pytorch"}
+        # In order to ensure popular quantization methods are supported. Can be disable with `disable_telemetry`
+        if quantization_config is not None:
+            user_agent["quant"] = quantization_config.quant_method.value
         if torch_dtype is not None and not isinstance(torch_dtype, torch.dtype):
             torch_dtype = torch.float32
             logger.warning(
@@ -278,6 +321,7 @@ class FromOriginalModelMixin:
                 local_files_only=local_files_only,
                 revision=revision,
                 disable_mmap=disable_mmap,
+                user_agent=user_agent,
             )
         if quantization_config is not None:
             hf_quantizer = DiffusersAutoQuantizer.from_config(quantization_config)
@@ -355,19 +399,23 @@ class FromOriginalModelMixin:
             model_kwargs = {k: kwargs.get(k) for k in kwargs if k in expected_kwargs or k in optional_kwargs}
             diffusers_model_config.update(model_kwargs)
+        ctx = init_empty_weights if low_cpu_mem_usage else nullcontext
+        with ctx():
+            model = cls.from_config(diffusers_model_config)
         checkpoint_mapping_kwargs = _get_mapping_function_kwargs(checkpoint_mapping_fn, **kwargs)
-        diffusers_format_checkpoint = checkpoint_mapping_fn(
-            config=diffusers_model_config, checkpoint=checkpoint, **checkpoint_mapping_kwargs
-        )
+        if _should_convert_state_dict_to_diffusers(model.state_dict(), checkpoint):
+            diffusers_format_checkpoint = checkpoint_mapping_fn(
+                config=diffusers_model_config, checkpoint=checkpoint, **checkpoint_mapping_kwargs
+            )
+        else:
+            diffusers_format_checkpoint = checkpoint
         if not diffusers_format_checkpoint:
             raise SingleFileComponentError(
                 f"Failed to load {mapping_class_name}. Weights for this component appear to be missing in the checkpoint."
             )
-        ctx = init_empty_weights if is_accelerate_available() else nullcontext
-        with ctx():
-            model = cls.from_config(diffusers_model_config)
         # Check if `_keep_in_fp32_modules` is not None
         use_keep_in_fp32_modules = (cls._keep_in_fp32_modules is not None) and (
             (torch_dtype == torch.float16) or hasattr(hf_quantizer, "use_keep_in_fp32_modules")
@@ -389,7 +437,7 @@ class FromOriginalModelMixin:
             )
         device_map = None
-        if is_accelerate_available():
+        if low_cpu_mem_usage:
             param_device = torch.device(device) if device else torch.device("cpu")
             empty_state_dict = model.state_dict()
             unexpected_keys = [
@@ -405,6 +453,7 @@ class FromOriginalModelMixin:
                 keep_in_fp32_modules=keep_in_fp32_modules,
                 unexpected_keys=unexpected_keys,
             )
+            empty_device_cache()
         else:
             _, unexpected_keys = model.load_state_dict(diffusers_format_checkpoint, strict=False)

diffusers 0.33.1__py3-none-any.whl → 0.35.0__py3-none-any.whl

diffusers 0.33.1py3-none-any.whl → 0.35.0py3-none-any.whl