PyPI - diffusers - Versions diffs - 0.27.1__py3-none-any.whl → 0.28.0__py3-none-any.whl - Mend

diffusers 0.27.1py3-none-any.whl → 0.28.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (270) hide show

diffusers/loaders/lora.py CHANGED Viewed

@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import copy
 import inspect
 import os
 from pathlib import Path
@@ -25,7 +26,7 @@ from packaging import version
 from torch import nn
 from .. import __version__
-from ..models.modeling_utils import _LOW_CPU_MEM_USAGE_DEFAULT
+from ..models.modeling_utils import _LOW_CPU_MEM_USAGE_DEFAULT, load_state_dict
 from ..utils import (
     USE_PEFT_BACKEND,
     _get_model_file,
@@ -36,6 +37,7 @@ from ..utils import (
     get_adapter_name,
     get_peft_kwargs,
     is_accelerate_available,
+    is_peft_version,
     is_transformers_available,
     logging,
     recurse_remove_peft_layers,
@@ -113,7 +115,7 @@ class LoraLoaderMixin:
         # First, ensure that the checkpoint is a compatible one and can be successfully loaded.
         state_dict, network_alphas = self.lora_state_dict(pretrained_model_name_or_path_or_dict, **kwargs)
-        is_correct_format = all("lora" in key for key in state_dict.keys())
+        is_correct_format = all("lora" in key or "dora_scale" in key for key in state_dict.keys())
         if not is_correct_format:
             raise ValueError("Invalid LoRA checkpoint.")
@@ -174,9 +176,9 @@ class LoraLoaderMixin:
             force_download (`bool`, *optional*, defaults to `False`):
                 Whether or not to force the (re-)download of the model weights and configuration files, overriding the
                 cached versions if they exist.
-            resume_download (`bool`, *optional*, defaults to `False`):
-                Whether or not to resume downloading the model weights and configuration files. If set to `False`, any
-                incompletely downloaded files are deleted.
+            resume_download:
+                Deprecated and ignored. All downloads are now resumed by default when possible. Will be removed in v1
+                of Diffusers.
             proxies (`Dict[str, str]`, *optional*):
                 A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128',
                 'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
@@ -206,7 +208,7 @@ class LoraLoaderMixin:
         # UNet and text encoder or both.
         cache_dir = kwargs.pop("cache_dir", None)
         force_download = kwargs.pop("force_download", False)
-        resume_download = kwargs.pop("resume_download", False)
+        resume_download = kwargs.pop("resume_download", None)
         proxies = kwargs.pop("proxies", None)
         local_files_only = kwargs.pop("local_files_only", None)
         token = kwargs.pop("token", None)
@@ -281,7 +283,7 @@ class LoraLoaderMixin:
                     subfolder=subfolder,
                     user_agent=user_agent,
                 )
-                state_dict = torch.load(model_file, map_location="cpu")
+                state_dict = load_state_dict(model_file)
         else:
             state_dict = pretrained_model_name_or_path_or_dict
@@ -361,13 +363,17 @@ class LoraLoaderMixin:
         is_model_cpu_offload = False
         is_sequential_cpu_offload = False
-        if _pipeline is not None:
+        if _pipeline is not None and _pipeline.hf_device_map is None:
             for _, component in _pipeline.components.items():
                 if isinstance(component, nn.Module) and hasattr(component, "_hf_hook"):
                     if not is_model_cpu_offload:
                         is_model_cpu_offload = isinstance(component._hf_hook, CpuOffload)
                     if not is_sequential_cpu_offload:
-                        is_sequential_cpu_offload = isinstance(component._hf_hook, AlignDevicesHook)
+                        is_sequential_cpu_offload = (
+                            isinstance(component._hf_hook, AlignDevicesHook)
+                            or hasattr(component._hf_hook, "hooks")
+                            and isinstance(component._hf_hook.hooks[0], AlignDevicesHook)
+                        )
                     logger.info(
                         "Accelerate hooks detected. Since you have called `load_lora_weights()`, the previous hooks will be first removed. Then the LoRA parameters will be loaded and the hooks will be applied again."
@@ -451,6 +457,15 @@ class LoraLoaderMixin:
                     rank[key] = val.shape[1]
             lora_config_kwargs = get_peft_kwargs(rank, network_alphas, state_dict, is_unet=True)
+            if "use_dora" in lora_config_kwargs:
+                if lora_config_kwargs["use_dora"]:
+                    if is_peft_version("<", "0.9.0"):
+                        raise ValueError(
+                            "You need `peft` 0.9.0 at least to use DoRA-enabled LoRAs. Please upgrade your installation of `peft`."
+                        )
+                else:
+                    if is_peft_version("<", "0.9.0"):
+                        lora_config_kwargs.pop("use_dora")
             lora_config = LoraConfig(**lora_config_kwargs)
             # adapter_name
@@ -572,6 +587,15 @@ class LoraLoaderMixin:
                     }
                 lora_config_kwargs = get_peft_kwargs(rank, network_alphas, text_encoder_lora_state_dict, is_unet=False)
+                if "use_dora" in lora_config_kwargs:
+                    if lora_config_kwargs["use_dora"]:
+                        if is_peft_version("<", "0.9.0"):
+                            raise ValueError(
+                                "You need `peft` 0.9.0 at least to use DoRA-enabled LoRAs. Please upgrade your installation of `peft`."
+                            )
+                    else:
+                        if is_peft_version("<", "0.9.0"):
+                            lora_config_kwargs.pop("use_dora")
                 lora_config = LoraConfig(**lora_config_kwargs)
                 # adapter_name
@@ -654,6 +678,13 @@ class LoraLoaderMixin:
                     rank[key] = val.shape[1]
             lora_config_kwargs = get_peft_kwargs(rank, network_alphas, state_dict)
+            if "use_dora" in lora_config_kwargs:
+                if lora_config_kwargs["use_dora"] and is_peft_version("<", "0.9.0"):
+                    raise ValueError(
+                        "You need `peft` 0.9.0 at least to use DoRA-enabled LoRAs. Please upgrade your installation of `peft`."
+                    )
+                else:
+                    lora_config_kwargs.pop("use_dora")
             lora_config = LoraConfig(**lora_config_kwargs)
             # adapter_name
@@ -959,7 +990,7 @@ class LoraLoaderMixin:
         self,
         adapter_names: Union[List[str], str],
         text_encoder: Optional["PreTrainedModel"] = None,  # noqa: F821
-        text_encoder_weights: List[float] = None,
+        text_encoder_weights: Optional[Union[float, List[float], List[None]]] = None,
     ):
         """
         Sets the adapter layers for the text encoder.
@@ -977,15 +1008,20 @@ class LoraLoaderMixin:
             raise ValueError("PEFT backend is required for this method.")
         def process_weights(adapter_names, weights):
-            if weights is None:
-                weights = [1.0] * len(adapter_names)
-            elif isinstance(weights, float):
-                weights = [weights]
+            # Expand weights into a list, one entry per adapter
+            # e.g. for 2 adapters:  7 -> [7,7] ; [3, None] -> [3, None]
+            if not isinstance(weights, list):
+                weights = [weights] * len(adapter_names)
             if len(adapter_names) != len(weights):
                 raise ValueError(
                     f"Length of adapter names {len(adapter_names)} is not equal to the length of the weights {len(weights)}"
                 )
+            # Set None values to default of 1.0
+            # e.g. [7,7] -> [7,7] ; [3, None] -> [3,1]
+            weights = [w if w is not None else 1.0 for w in weights]
             return weights
         adapter_names = [adapter_names] if isinstance(adapter_names, str) else adapter_names
@@ -1033,17 +1069,77 @@ class LoraLoaderMixin:
     def set_adapters(
         self,
         adapter_names: Union[List[str], str],
-        adapter_weights: Optional[List[float]] = None,
+        adapter_weights: Optional[Union[float, Dict, List[float], List[Dict]]] = None,
     ):
+        adapter_names = [adapter_names] if isinstance(adapter_names, str) else adapter_names
+        adapter_weights = copy.deepcopy(adapter_weights)
+        # Expand weights into a list, one entry per adapter
+        if not isinstance(adapter_weights, list):
+            adapter_weights = [adapter_weights] * len(adapter_names)
+        if len(adapter_names) != len(adapter_weights):
+            raise ValueError(
+                f"Length of adapter names {len(adapter_names)} is not equal to the length of the weights {len(adapter_weights)}"
+            )
+        # Decompose weights into weights for unet, text_encoder and text_encoder_2
+        unet_lora_weights, text_encoder_lora_weights, text_encoder_2_lora_weights = [], [], []
+        list_adapters = self.get_list_adapters()  # eg {"unet": ["adapter1", "adapter2"], "text_encoder": ["adapter2"]}
+        all_adapters = {
+            adapter for adapters in list_adapters.values() for adapter in adapters
+        }  # eg ["adapter1", "adapter2"]
+        invert_list_adapters = {
+            adapter: [part for part, adapters in list_adapters.items() if adapter in adapters]
+            for adapter in all_adapters
+        }  # eg {"adapter1": ["unet"], "adapter2": ["unet", "text_encoder"]}
+        for adapter_name, weights in zip(adapter_names, adapter_weights):
+            if isinstance(weights, dict):
+                unet_lora_weight = weights.pop("unet", None)
+                text_encoder_lora_weight = weights.pop("text_encoder", None)
+                text_encoder_2_lora_weight = weights.pop("text_encoder_2", None)
+                if len(weights) > 0:
+                    raise ValueError(
+                        f"Got invalid key '{weights.keys()}' in lora weight dict for adapter {adapter_name}."
+                    )
+                if text_encoder_2_lora_weight is not None and not hasattr(self, "text_encoder_2"):
+                    logger.warning(
+                        "Lora weight dict contains text_encoder_2 weights but will be ignored because pipeline does not have text_encoder_2."
+                    )
+                # warn if adapter doesn't have parts specified by adapter_weights
+                for part_weight, part_name in zip(
+                    [unet_lora_weight, text_encoder_lora_weight, text_encoder_2_lora_weight],
+                    ["unet", "text_encoder", "text_encoder_2"],
+                ):
+                    if part_weight is not None and part_name not in invert_list_adapters[adapter_name]:
+                        logger.warning(
+                            f"Lora weight dict for adapter '{adapter_name}' contains {part_name}, but this will be ignored because {adapter_name} does not contain weights for {part_name}. Valid parts for {adapter_name} are: {invert_list_adapters[adapter_name]}."
+                        )
+            else:
+                unet_lora_weight = weights
+                text_encoder_lora_weight = weights
+                text_encoder_2_lora_weight = weights
+            unet_lora_weights.append(unet_lora_weight)
+            text_encoder_lora_weights.append(text_encoder_lora_weight)
+            text_encoder_2_lora_weights.append(text_encoder_2_lora_weight)
         unet = getattr(self, self.unet_name) if not hasattr(self, "unet") else self.unet
         # Handle the UNET
-        unet.set_adapters(adapter_names, adapter_weights)
+        unet.set_adapters(adapter_names, unet_lora_weights)
         # Handle the Text Encoder
         if hasattr(self, "text_encoder"):
-            self.set_adapters_for_text_encoder(adapter_names, self.text_encoder, adapter_weights)
+            self.set_adapters_for_text_encoder(adapter_names, self.text_encoder, text_encoder_lora_weights)
         if hasattr(self, "text_encoder_2"):
-            self.set_adapters_for_text_encoder(adapter_names, self.text_encoder_2, adapter_weights)
+            self.set_adapters_for_text_encoder(adapter_names, self.text_encoder_2, text_encoder_2_lora_weights)
     def disable_lora(self):
         if not USE_PEFT_BACKEND:
@@ -1175,6 +1271,11 @@ class LoraLoaderMixin:
                 for adapter_name in adapter_names:
                     unet_module.lora_A[adapter_name].to(device)
                     unet_module.lora_B[adapter_name].to(device)
+                    # this is a param, not a module, so device placement is not in-place -> re-assign
+                    if hasattr(unet_module, "lora_magnitude_vector") and unet_module.lora_magnitude_vector is not None:
+                        unet_module.lora_magnitude_vector[adapter_name] = unet_module.lora_magnitude_vector[
+                            adapter_name
+                        ].to(device)
         # Handle the text encoder
         modules_to_process = []
@@ -1191,6 +1292,14 @@ class LoraLoaderMixin:
                     for adapter_name in adapter_names:
                         text_encoder_module.lora_A[adapter_name].to(device)
                         text_encoder_module.lora_B[adapter_name].to(device)
+                        # this is a param, not a module, so device placement is not in-place -> re-assign
+                        if (
+                            hasattr(text_encoder_module, "lora_magnitude_vector")
+                            and text_encoder_module.lora_magnitude_vector is not None
+                        ):
+                            text_encoder_module.lora_magnitude_vector[
+                                adapter_name
+                            ] = text_encoder_module.lora_magnitude_vector[adapter_name].to(device)
 class StableDiffusionXLLoraLoaderMixin(LoraLoaderMixin):
@@ -1243,7 +1352,7 @@ class StableDiffusionXLLoraLoaderMixin(LoraLoaderMixin):
             unet_config=self.unet.config,
             **kwargs,
         )
-        is_correct_format = all("lora" in key for key in state_dict.keys())
+        is_correct_format = all("lora" in key or "dora_scale" in key for key in state_dict.keys())
         if not is_correct_format:
             raise ValueError("Invalid LoRA checkpoint.")
@@ -1297,6 +1406,9 @@ class StableDiffusionXLLoraLoaderMixin(LoraLoaderMixin):
             text_encoder_lora_layers (`Dict[str, torch.nn.Module]` or `Dict[str, torch.Tensor]`):
                 State dict of the LoRA layers corresponding to the `text_encoder`. Must explicitly pass the text
                 encoder LoRA state dict because it comes from 🤗 Transformers.
+            text_encoder_2_lora_layers (`Dict[str, torch.nn.Module]` or `Dict[str, torch.Tensor]`):
+                State dict of the LoRA layers corresponding to the `text_encoder_2`. Must explicitly pass the text
+                encoder LoRA state dict because it comes from 🤗 Transformers.
             is_main_process (`bool`, *optional*, defaults to `True`):
                 Whether the process calling this is the main process or not. Useful during distributed training and you
                 need to call this function on all processes. In this case, set `is_main_process=True` only on the main
@@ -1323,8 +1435,10 @@ class StableDiffusionXLLoraLoaderMixin(LoraLoaderMixin):
         if unet_lora_layers:
             state_dict.update(pack_weights(unet_lora_layers, "unet"))
-        if text_encoder_lora_layers and text_encoder_2_lora_layers:
+        if text_encoder_lora_layers:
             state_dict.update(pack_weights(text_encoder_lora_layers, "text_encoder"))
+        if text_encoder_2_lora_layers:
             state_dict.update(pack_weights(text_encoder_2_lora_layers, "text_encoder_2"))
         cls.write_lora_layers(

diffusers/loaders/lora_conversion_utils.py CHANGED Viewed

@@ -14,7 +14,7 @@
 import re
-from ..utils import logging
+from ..utils import is_peft_version, logging
 logger = logging.get_logger(__name__)
@@ -128,6 +128,15 @@ def _convert_kohya_lora_to_diffusers(state_dict, unet_name="unet", text_encoder_
     te_state_dict = {}
     te2_state_dict = {}
     network_alphas = {}
+    is_unet_dora_lora = any("dora_scale" in k and "lora_unet_" in k for k in state_dict)
+    is_te_dora_lora = any("dora_scale" in k and ("lora_te_" in k or "lora_te1_" in k) for k in state_dict)
+    is_te2_dora_lora = any("dora_scale" in k and "lora_te2_" in k for k in state_dict)
+    if is_unet_dora_lora or is_te_dora_lora or is_te2_dora_lora:
+        if is_peft_version("<", "0.9.0"):
+            raise ValueError(
+                "You need `peft` 0.9.0 at least to use DoRA-enabled LoRAs. Please upgrade your installation of `peft`."
+            )
     # every down weight has a corresponding up weight and potentially an alpha weight
     lora_keys = [k for k in state_dict.keys() if k.endswith("lora_down.weight")]
@@ -198,46 +207,19 @@ def _convert_kohya_lora_to_diffusers(state_dict, unet_name="unet", text_encoder_
                 unet_state_dict[diffusers_name] = state_dict.pop(key)
                 unet_state_dict[diffusers_name.replace(".down.", ".up.")] = state_dict.pop(lora_name_up)
-        elif lora_name.startswith("lora_te_"):
-            diffusers_name = key.replace("lora_te_", "").replace("_", ".")
-            diffusers_name = diffusers_name.replace("text.model", "text_model")
-            diffusers_name = diffusers_name.replace("self.attn", "self_attn")
-            diffusers_name = diffusers_name.replace("q.proj.lora", "to_q_lora")
-            diffusers_name = diffusers_name.replace("k.proj.lora", "to_k_lora")
-            diffusers_name = diffusers_name.replace("v.proj.lora", "to_v_lora")
-            diffusers_name = diffusers_name.replace("out.proj.lora", "to_out_lora")
-            if "self_attn" in diffusers_name:
-                te_state_dict[diffusers_name] = state_dict.pop(key)
-                te_state_dict[diffusers_name.replace(".down.", ".up.")] = state_dict.pop(lora_name_up)
-            elif "mlp" in diffusers_name:
-                # Be aware that this is the new diffusers convention and the rest of the code might
-                # not utilize it yet.
-                diffusers_name = diffusers_name.replace(".lora.", ".lora_linear_layer.")
-                te_state_dict[diffusers_name] = state_dict.pop(key)
-                te_state_dict[diffusers_name.replace(".down.", ".up.")] = state_dict.pop(lora_name_up)
+            if is_unet_dora_lora:
+                dora_scale_key_to_replace = "_lora.down." if "_lora.down." in diffusers_name else ".lora.down."
+                unet_state_dict[
+                    diffusers_name.replace(dora_scale_key_to_replace, ".lora_magnitude_vector.")
+                ] = state_dict.pop(key.replace("lora_down.weight", "dora_scale"))
-        # (sayakpaul): Duplicate code. Needs to be cleaned.
-        elif lora_name.startswith("lora_te1_"):
-            diffusers_name = key.replace("lora_te1_", "").replace("_", ".")
-            diffusers_name = diffusers_name.replace("text.model", "text_model")
-            diffusers_name = diffusers_name.replace("self.attn", "self_attn")
-            diffusers_name = diffusers_name.replace("q.proj.lora", "to_q_lora")
-            diffusers_name = diffusers_name.replace("k.proj.lora", "to_k_lora")
-            diffusers_name = diffusers_name.replace("v.proj.lora", "to_v_lora")
-            diffusers_name = diffusers_name.replace("out.proj.lora", "to_out_lora")
-            if "self_attn" in diffusers_name:
-                te_state_dict[diffusers_name] = state_dict.pop(key)
-                te_state_dict[diffusers_name.replace(".down.", ".up.")] = state_dict.pop(lora_name_up)
-            elif "mlp" in diffusers_name:
-                # Be aware that this is the new diffusers convention and the rest of the code might
-                # not utilize it yet.
-                diffusers_name = diffusers_name.replace(".lora.", ".lora_linear_layer.")
-                te_state_dict[diffusers_name] = state_dict.pop(key)
-                te_state_dict[diffusers_name.replace(".down.", ".up.")] = state_dict.pop(lora_name_up)
+        elif lora_name.startswith(("lora_te_", "lora_te1_", "lora_te2_")):
+            if lora_name.startswith(("lora_te_", "lora_te1_")):
+                key_to_replace = "lora_te_" if lora_name.startswith("lora_te_") else "lora_te1_"
+            else:
+                key_to_replace = "lora_te2_"
-        # (sayakpaul): Duplicate code. Needs to be cleaned.
-        elif lora_name.startswith("lora_te2_"):
-            diffusers_name = key.replace("lora_te2_", "").replace("_", ".")
+            diffusers_name = key.replace(key_to_replace, "").replace("_", ".")
             diffusers_name = diffusers_name.replace("text.model", "text_model")
             diffusers_name = diffusers_name.replace("self.attn", "self_attn")
             diffusers_name = diffusers_name.replace("q.proj.lora", "to_q_lora")
@@ -245,14 +227,35 @@ def _convert_kohya_lora_to_diffusers(state_dict, unet_name="unet", text_encoder_
             diffusers_name = diffusers_name.replace("v.proj.lora", "to_v_lora")
             diffusers_name = diffusers_name.replace("out.proj.lora", "to_out_lora")
             if "self_attn" in diffusers_name:
-                te2_state_dict[diffusers_name] = state_dict.pop(key)
-                te2_state_dict[diffusers_name.replace(".down.", ".up.")] = state_dict.pop(lora_name_up)
+                if lora_name.startswith(("lora_te_", "lora_te1_")):
+                    te_state_dict[diffusers_name] = state_dict.pop(key)
+                    te_state_dict[diffusers_name.replace(".down.", ".up.")] = state_dict.pop(lora_name_up)
+                else:
+                    te2_state_dict[diffusers_name] = state_dict.pop(key)
+                    te2_state_dict[diffusers_name.replace(".down.", ".up.")] = state_dict.pop(lora_name_up)
             elif "mlp" in diffusers_name:
                 # Be aware that this is the new diffusers convention and the rest of the code might
                 # not utilize it yet.
                 diffusers_name = diffusers_name.replace(".lora.", ".lora_linear_layer.")
-                te2_state_dict[diffusers_name] = state_dict.pop(key)
-                te2_state_dict[diffusers_name.replace(".down.", ".up.")] = state_dict.pop(lora_name_up)
+                if lora_name.startswith(("lora_te_", "lora_te1_")):
+                    te_state_dict[diffusers_name] = state_dict.pop(key)
+                    te_state_dict[diffusers_name.replace(".down.", ".up.")] = state_dict.pop(lora_name_up)
+                else:
+                    te2_state_dict[diffusers_name] = state_dict.pop(key)
+                    te2_state_dict[diffusers_name.replace(".down.", ".up.")] = state_dict.pop(lora_name_up)
+            if (is_te_dora_lora or is_te2_dora_lora) and lora_name.startswith(("lora_te_", "lora_te1_", "lora_te2_")):
+                dora_scale_key_to_replace_te = (
+                    "_lora.down." if "_lora.down." in diffusers_name else ".lora_linear_layer."
+                )
+                if lora_name.startswith(("lora_te_", "lora_te1_")):
+                    te_state_dict[
+                        diffusers_name.replace(dora_scale_key_to_replace_te, ".lora_magnitude_vector.")
+                    ] = state_dict.pop(key.replace("lora_down.weight", "dora_scale"))
+                elif lora_name.startswith("lora_te2_"):
+                    te2_state_dict[
+                        diffusers_name.replace(dora_scale_key_to_replace_te, ".lora_magnitude_vector.")
+                    ] = state_dict.pop(key.replace("lora_down.weight", "dora_scale"))
         # Rename the alphas so that they can be mapped appropriately.
         if lora_name_alpha in state_dict:

diffusers/loaders/peft.py CHANGED Viewed

@@ -20,7 +20,8 @@ from ..utils import MIN_PEFT_VERSION, check_peft_version, is_peft_available
 class PeftAdapterMixin:
     """
     A class containing all functions for loading and using adapters weights that are supported in PEFT library. For
-    more details about adapters and injecting them in a transformer-based model, check out the PEFT [documentation](https://huggingface.co/docs/peft/index).
+    more details about adapters and injecting them in a transformer-based model, check out the PEFT
+    [documentation](https://huggingface.co/docs/peft/index).
     Install the latest version of PEFT, and use this mixin to:
@@ -143,8 +144,8 @@ class PeftAdapterMixin:
     def enable_adapters(self) -> None:
         """
-        Enable adapters that are attached to the model. The model uses `self.active_adapters()` to retrieve the
-        list of adapters to enable.
+        Enable adapters that are attached to the model. The model uses `self.active_adapters()` to retrieve the list of
+        adapters to enable.
         If you are not familiar with adapters and PEFT methods, we invite you to read more about them on the PEFT
         [documentation](https://huggingface.co/docs/peft).

diffusers 0.27.1__py3-none-any.whl → 0.28.0__py3-none-any.whl

diffusers 0.27.1py3-none-any.whl → 0.28.0py3-none-any.whl