PyPI - diffusers - Versions diffs - 0.29.2__py3-none-any.whl → 0.30.1__py3-none-any.whl - Mend

diffusers 0.29.2py3-none-any.whl → 0.30.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (220) hide show

diffusers/loaders/unet.py CHANGED Viewed

@@ -11,13 +11,11 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import inspect
 import os
 from collections import defaultdict
 from contextlib import nullcontext
-from functools import partial
 from pathlib import Path
-from typing import Callable, Dict, List, Optional, Union
+from typing import Callable, Dict, Union
 import safetensors
 import torch
@@ -38,18 +36,14 @@ from ..utils import (
     USE_PEFT_BACKEND,
     _get_model_file,
     convert_unet_state_dict_to_peft,
-    delete_adapter_layers,
     get_adapter_name,
     get_peft_kwargs,
     is_accelerate_available,
     is_peft_version,
     is_torch_version,
     logging,
-    set_adapter_layers,
-    set_weights_and_activate_adapters,
 )
-from .lora import LORA_WEIGHT_NAME, LORA_WEIGHT_NAME_SAFE, TEXT_ENCODER_NAME, UNET_NAME
-from .unet_loader_utils import _maybe_expand_lora_scales
+from .lora_pipeline import LORA_WEIGHT_NAME, LORA_WEIGHT_NAME_SAFE, TEXT_ENCODER_NAME, UNET_NAME
 from .utils import AttnProcsLayers
@@ -97,9 +91,7 @@ class UNet2DConditionLoadersMixin:
             force_download (`bool`, *optional*, defaults to `False`):
                 Whether or not to force the (re-)download of the model weights and configuration files, overriding the
                 cached versions if they exist.
-            resume_download:
-                Deprecated and ignored. All downloads are now resumed by default when possible. Will be removed in v1
-                of Diffusers.
             proxies (`Dict[str, str]`, *optional*):
                 A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128',
                 'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
@@ -140,7 +132,6 @@ class UNet2DConditionLoadersMixin:
         """
         cache_dir = kwargs.pop("cache_dir", None)
         force_download = kwargs.pop("force_download", False)
-        resume_download = kwargs.pop("resume_download", None)
         proxies = kwargs.pop("proxies", None)
         local_files_only = kwargs.pop("local_files_only", None)
         token = kwargs.pop("token", None)
@@ -174,7 +165,6 @@ class UNet2DConditionLoadersMixin:
                         weights_name=weight_name or LORA_WEIGHT_NAME_SAFE,
                         cache_dir=cache_dir,
                         force_download=force_download,
-                        resume_download=resume_download,
                         proxies=proxies,
                         local_files_only=local_files_only,
                         token=token,
@@ -194,7 +184,6 @@ class UNet2DConditionLoadersMixin:
                     weights_name=weight_name or LORA_WEIGHT_NAME,
                     cache_dir=cache_dir,
                     force_download=force_download,
-                    resume_download=resume_download,
                     proxies=proxies,
                     local_files_only=local_files_only,
                     token=token,
@@ -362,7 +351,7 @@ class UNet2DConditionLoadersMixin:
         return is_model_cpu_offload, is_sequential_cpu_offload
     @classmethod
-    # Copied from diffusers.loaders.lora.LoraLoaderMixin._optionally_disable_offloading
+    # Copied from diffusers.loaders.lora_base.LoraBaseMixin._optionally_disable_offloading
     def _optionally_disable_offloading(cls, _pipeline):
         """
         Optionally removes offloading in case the pipeline has been already sequentially offloaded to CPU.
@@ -457,6 +446,15 @@ class UNet2DConditionLoadersMixin:
         )
         if is_custom_diffusion:
             state_dict = self._get_custom_diffusion_state_dict()
+            if save_function is None and safe_serialization:
+                # safetensors does not support saving dicts with non-tensor values
+                empty_state_dict = {k: v for k, v in state_dict.items() if not isinstance(v, torch.Tensor)}
+                if len(empty_state_dict) > 0:
+                    logger.warning(
+                        f"Safetensors does not support saving dicts with non-tensor values. "
+                        f"The following keys will be ignored: {empty_state_dict.keys()}"
+                    )
+                state_dict = {k: v for k, v in state_dict.items() if isinstance(v, torch.Tensor)}
         else:
             if not USE_PEFT_BACKEND:
                 raise ValueError("PEFT backend is required for saving LoRAs using the `save_attn_procs()` method.")
@@ -515,194 +513,6 @@ class UNet2DConditionLoadersMixin:
         return state_dict
-    def fuse_lora(self, lora_scale=1.0, safe_fusing=False, adapter_names=None):
-        if not USE_PEFT_BACKEND:
-            raise ValueError("PEFT backend is required for `fuse_lora()`.")
-        self.lora_scale = lora_scale
-        self._safe_fusing = safe_fusing
-        self.apply(partial(self._fuse_lora_apply, adapter_names=adapter_names))
-    def _fuse_lora_apply(self, module, adapter_names=None):
-        from peft.tuners.tuners_utils import BaseTunerLayer
-        merge_kwargs = {"safe_merge": self._safe_fusing}
-        if isinstance(module, BaseTunerLayer):
-            if self.lora_scale != 1.0:
-                module.scale_layer(self.lora_scale)
-            # For BC with prevous PEFT versions, we need to check the signature
-            # of the `merge` method to see if it supports the `adapter_names` argument.
-            supported_merge_kwargs = list(inspect.signature(module.merge).parameters)
-            if "adapter_names" in supported_merge_kwargs:
-                merge_kwargs["adapter_names"] = adapter_names
-            elif "adapter_names" not in supported_merge_kwargs and adapter_names is not None:
-                raise ValueError(
-                    "The `adapter_names` argument is not supported with your PEFT version. Please upgrade"
-                    " to the latest version of PEFT. `pip install -U peft`"
-                )
-            module.merge(**merge_kwargs)
-    def unfuse_lora(self):
-        if not USE_PEFT_BACKEND:
-            raise ValueError("PEFT backend is required for `unfuse_lora()`.")
-        self.apply(self._unfuse_lora_apply)
-    def _unfuse_lora_apply(self, module):
-        from peft.tuners.tuners_utils import BaseTunerLayer
-        if isinstance(module, BaseTunerLayer):
-            module.unmerge()
-    def unload_lora(self):
-        if not USE_PEFT_BACKEND:
-            raise ValueError("PEFT backend is required for `unload_lora()`.")
-        from ..utils import recurse_remove_peft_layers
-        recurse_remove_peft_layers(self)
-        if hasattr(self, "peft_config"):
-            del self.peft_config
-    def set_adapters(
-        self,
-        adapter_names: Union[List[str], str],
-        weights: Optional[Union[float, Dict, List[float], List[Dict], List[None]]] = None,
-    ):
-        """
-        Set the currently active adapters for use in the UNet.
-        Args:
-            adapter_names (`List[str]` or `str`):
-                The names of the adapters to use.
-            adapter_weights (`Union[List[float], float]`, *optional*):
-                The adapter(s) weights to use with the UNet. If `None`, the weights are set to `1.0` for all the
-                adapters.
-        Example:
-        ```py
-        from diffusers import AutoPipelineForText2Image
-        import torch
-        pipeline = AutoPipelineForText2Image.from_pretrained(
-            "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16
-        ).to("cuda")
-        pipeline.load_lora_weights(
-            "jbilcke-hf/sdxl-cinematic-1", weight_name="pytorch_lora_weights.safetensors", adapter_name="cinematic"
-        )
-        pipeline.load_lora_weights("nerijs/pixel-art-xl", weight_name="pixel-art-xl.safetensors", adapter_name="pixel")
-        pipeline.set_adapters(["cinematic", "pixel"], adapter_weights=[0.5, 0.5])
-        ```
-        """
-        if not USE_PEFT_BACKEND:
-            raise ValueError("PEFT backend is required for `set_adapters()`.")
-        adapter_names = [adapter_names] if isinstance(adapter_names, str) else adapter_names
-        # Expand weights into a list, one entry per adapter
-        # examples for e.g. 2 adapters:  [{...}, 7] -> [7,7] ; None -> [None, None]
-        if not isinstance(weights, list):
-            weights = [weights] * len(adapter_names)
-        if len(adapter_names) != len(weights):
-            raise ValueError(
-                f"Length of adapter names {len(adapter_names)} is not equal to the length of their weights {len(weights)}."
-            )
-        # Set None values to default of 1.0
-        # e.g. [{...}, 7] -> [{...}, 7] ; [None, None] -> [1.0, 1.0]
-        weights = [w if w is not None else 1.0 for w in weights]
-        # e.g. [{...}, 7] -> [{expanded dict...}, 7]
-        weights = _maybe_expand_lora_scales(self, weights)
-        set_weights_and_activate_adapters(self, adapter_names, weights)
-    def disable_lora(self):
-        """
-        Disable the UNet's active LoRA layers.
-        Example:
-        ```py
-        from diffusers import AutoPipelineForText2Image
-        import torch
-        pipeline = AutoPipelineForText2Image.from_pretrained(
-            "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16
-        ).to("cuda")
-        pipeline.load_lora_weights(
-            "jbilcke-hf/sdxl-cinematic-1", weight_name="pytorch_lora_weights.safetensors", adapter_name="cinematic"
-        )
-        pipeline.disable_lora()
-        ```
-        """
-        if not USE_PEFT_BACKEND:
-            raise ValueError("PEFT backend is required for this method.")
-        set_adapter_layers(self, enabled=False)
-    def enable_lora(self):
-        """
-        Enable the UNet's active LoRA layers.
-        Example:
-        ```py
-        from diffusers import AutoPipelineForText2Image
-        import torch
-        pipeline = AutoPipelineForText2Image.from_pretrained(
-            "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16
-        ).to("cuda")
-        pipeline.load_lora_weights(
-            "jbilcke-hf/sdxl-cinematic-1", weight_name="pytorch_lora_weights.safetensors", adapter_name="cinematic"
-        )
-        pipeline.enable_lora()
-        ```
-        """
-        if not USE_PEFT_BACKEND:
-            raise ValueError("PEFT backend is required for this method.")
-        set_adapter_layers(self, enabled=True)
-    def delete_adapters(self, adapter_names: Union[List[str], str]):
-        """
-        Delete an adapter's LoRA layers from the UNet.
-        Args:
-            adapter_names (`Union[List[str], str]`):
-                The names (single string or list of strings) of the adapter to delete.
-        Example:
-        ```py
-        from diffusers import AutoPipelineForText2Image
-        import torch
-        pipeline = AutoPipelineForText2Image.from_pretrained(
-            "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16
-        ).to("cuda")
-        pipeline.load_lora_weights(
-            "jbilcke-hf/sdxl-cinematic-1", weight_name="pytorch_lora_weights.safetensors", adapter_names="cinematic"
-        )
-        pipeline.delete_adapters("cinematic")
-        ```
-        """
-        if not USE_PEFT_BACKEND:
-            raise ValueError("PEFT backend is required for this method.")
-        if isinstance(adapter_names, str):
-            adapter_names = [adapter_names]
-        for adapter_name in adapter_names:
-            delete_adapter_layers(self, adapter_name)
-            # Pop also the corresponding adapter from the config
-            if hasattr(self, "peft_config"):
-                self.peft_config.pop(adapter_name, None)
     def _convert_ip_adapter_image_proj_to_diffusers(self, state_dict, low_cpu_mem_usage=False):
         if low_cpu_mem_usage:
             if is_accelerate_available():
@@ -922,8 +732,6 @@ class UNet2DConditionLoadersMixin:
     def _convert_ip_adapter_attn_to_diffusers(self, state_dicts, low_cpu_mem_usage=False):
         from ..models.attention_processor import (
-            AttnProcessor,
-            AttnProcessor2_0,
             IPAdapterAttnProcessor,
             IPAdapterAttnProcessor2_0,
         )
@@ -963,9 +771,7 @@ class UNet2DConditionLoadersMixin:
                 hidden_size = self.config.block_out_channels[block_id]
             if cross_attention_dim is None or "motion_modules" in name:
-                attn_processor_class = (
-                    AttnProcessor2_0 if hasattr(F, "scaled_dot_product_attention") else AttnProcessor
-                )
+                attn_processor_class = self.attn_processors[name].__class__
                 attn_procs[name] = attn_processor_class()
             else:
@@ -1017,6 +823,15 @@ class UNet2DConditionLoadersMixin:
     def _load_ip_adapter_weights(self, state_dicts, low_cpu_mem_usage=False):
         if not isinstance(state_dicts, list):
             state_dicts = [state_dicts]
+        # Kolors Unet already has a `encoder_hid_proj`
+        if (
+            self.encoder_hid_proj is not None
+            and self.config.encoder_hid_dim_type == "text_proj"
+            and not hasattr(self, "text_encoder_hid_proj")
+        ):
+            self.text_encoder_hid_proj = self.encoder_hid_proj
         # Set encoder_hid_proj after loading ip_adapter weights,
         # because `IPAdapterPlusImageProjection` also has `attn_processors`.
         self.encoder_hid_proj = None

diffusers/models/__init__.py CHANGED Viewed

@@ -28,22 +28,32 @@ if is_torch_available():
     _import_structure["adapter"] = ["MultiAdapter", "T2IAdapter"]
     _import_structure["autoencoders.autoencoder_asym_kl"] = ["AsymmetricAutoencoderKL"]
     _import_structure["autoencoders.autoencoder_kl"] = ["AutoencoderKL"]
+    _import_structure["autoencoders.autoencoder_kl_cogvideox"] = ["AutoencoderKLCogVideoX"]
     _import_structure["autoencoders.autoencoder_kl_temporal_decoder"] = ["AutoencoderKLTemporalDecoder"]
+    _import_structure["autoencoders.autoencoder_oobleck"] = ["AutoencoderOobleck"]
     _import_structure["autoencoders.autoencoder_tiny"] = ["AutoencoderTiny"]
     _import_structure["autoencoders.consistency_decoder_vae"] = ["ConsistencyDecoderVAE"]
     _import_structure["autoencoders.vq_model"] = ["VQModel"]
     _import_structure["controlnet"] = ["ControlNetModel"]
+    _import_structure["controlnet_hunyuan"] = ["HunyuanDiT2DControlNetModel", "HunyuanDiT2DMultiControlNetModel"]
     _import_structure["controlnet_sd3"] = ["SD3ControlNetModel", "SD3MultiControlNetModel"]
+    _import_structure["controlnet_sparsectrl"] = ["SparseControlNetModel"]
     _import_structure["controlnet_xs"] = ["ControlNetXSAdapter", "UNetControlNetXSModel"]
     _import_structure["embeddings"] = ["ImageProjection"]
     _import_structure["modeling_utils"] = ["ModelMixin"]
+    _import_structure["transformers.auraflow_transformer_2d"] = ["AuraFlowTransformer2DModel"]
+    _import_structure["transformers.cogvideox_transformer_3d"] = ["CogVideoXTransformer3DModel"]
     _import_structure["transformers.dit_transformer_2d"] = ["DiTTransformer2DModel"]
     _import_structure["transformers.dual_transformer_2d"] = ["DualTransformer2DModel"]
     _import_structure["transformers.hunyuan_transformer_2d"] = ["HunyuanDiT2DModel"]
+    _import_structure["transformers.latte_transformer_3d"] = ["LatteTransformer3DModel"]
+    _import_structure["transformers.lumina_nextdit2d"] = ["LuminaNextDiT2DModel"]
     _import_structure["transformers.pixart_transformer_2d"] = ["PixArtTransformer2DModel"]
     _import_structure["transformers.prior_transformer"] = ["PriorTransformer"]
+    _import_structure["transformers.stable_audio_transformer"] = ["StableAudioDiTModel"]
     _import_structure["transformers.t5_film_transformer"] = ["T5FilmDecoder"]
     _import_structure["transformers.transformer_2d"] = ["Transformer2DModel"]
+    _import_structure["transformers.transformer_flux"] = ["FluxTransformer2DModel"]
     _import_structure["transformers.transformer_sd3"] = ["SD3Transformer2DModel"]
     _import_structure["transformers.transformer_temporal"] = ["TransformerTemporalModel"]
     _import_structure["unets.unet_1d"] = ["UNet1DModel"]
@@ -69,23 +79,33 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
         from .autoencoders import (
             AsymmetricAutoencoderKL,
             AutoencoderKL,
+            AutoencoderKLCogVideoX,
             AutoencoderKLTemporalDecoder,
+            AutoencoderOobleck,
             AutoencoderTiny,
             ConsistencyDecoderVAE,
             VQModel,
         )
         from .controlnet import ControlNetModel
+        from .controlnet_hunyuan import HunyuanDiT2DControlNetModel, HunyuanDiT2DMultiControlNetModel
         from .controlnet_sd3 import SD3ControlNetModel, SD3MultiControlNetModel
+        from .controlnet_sparsectrl import SparseControlNetModel
         from .controlnet_xs import ControlNetXSAdapter, UNetControlNetXSModel
         from .embeddings import ImageProjection
         from .modeling_utils import ModelMixin
         from .transformers import (
+            AuraFlowTransformer2DModel,
+            CogVideoXTransformer3DModel,
             DiTTransformer2DModel,
             DualTransformer2DModel,
+            FluxTransformer2DModel,
             HunyuanDiT2DModel,
+            LatteTransformer3DModel,
+            LuminaNextDiT2DModel,
             PixArtTransformer2DModel,
             PriorTransformer,
             SD3Transformer2DModel,
+            StableAudioDiTModel,
             T5FilmDecoder,
             Transformer2DModel,
             TransformerTemporalModel,

diffusers/models/activations.py CHANGED Viewed

@@ -123,6 +123,28 @@ class GEGLU(nn.Module):
             return hidden_states * self.gelu(gate)
+class SwiGLU(nn.Module):
+    r"""
+    A [variant](https://arxiv.org/abs/2002.05202) of the gated linear unit activation function. It's similar to `GEGLU`
+    but uses SiLU / Swish instead of GeLU.
+    Parameters:
+        dim_in (`int`): The number of channels in the input.
+        dim_out (`int`): The number of channels in the output.
+        bias (`bool`, defaults to True): Whether to use a bias in the linear layer.
+    """
+    def __init__(self, dim_in: int, dim_out: int, bias: bool = True):
+        super().__init__()
+        self.proj = nn.Linear(dim_in, dim_out * 2, bias=bias)
+        self.activation = nn.SiLU()
+    def forward(self, hidden_states):
+        hidden_states = self.proj(hidden_states)
+        hidden_states, gate = hidden_states.chunk(2, dim=-1)
+        return hidden_states * self.activation(gate)
 class ApproximateGELU(nn.Module):
     r"""
     The approximate form of the Gaussian Error Linear Unit (GELU). For more details, see section 2 of this

diffusers 0.29.2__py3-none-any.whl → 0.30.1__py3-none-any.whl

diffusers 0.29.2py3-none-any.whl → 0.30.1py3-none-any.whl