PyPI - diffusers - Versions diffs - 0.23.1__py3-none-any.whl → 0.25.0__py3-none-any.whl - Mend

diffusers 0.23.1py3-none-any.whl → 0.25.0py3-none-any.whl

Files changed (238) hide show

diffusers/models/lora.py CHANGED Viewed

@@ -12,19 +12,60 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+# IMPORTANT:                                                      #
+###################################################################
+# ----------------------------------------------------------------#
+# This file is deprecated and will be removed soon                #
+# (as soon as PEFT will become a required dependency for LoRA)    #
+# ----------------------------------------------------------------#
+###################################################################
 from typing import Optional, Tuple, Union
 import torch
 import torch.nn.functional as F
 from torch import nn
-from ..loaders import PatchedLoraProjection, text_encoder_attn_modules, text_encoder_mlp_modules
 from ..utils import logging
+from ..utils.import_utils import is_transformers_available
+if is_transformers_available():
+    from transformers import CLIPTextModel, CLIPTextModelWithProjection
 logger = logging.get_logger(__name__)  # pylint: disable=invalid-name
+def text_encoder_attn_modules(text_encoder):
+    attn_modules = []
+    if isinstance(text_encoder, (CLIPTextModel, CLIPTextModelWithProjection)):
+        for i, layer in enumerate(text_encoder.text_model.encoder.layers):
+            name = f"text_model.encoder.layers.{i}.self_attn"
+            mod = layer.self_attn
+            attn_modules.append((name, mod))
+    else:
+        raise ValueError(f"do not know how to get attention modules for: {text_encoder.__class__.__name__}")
+    return attn_modules
+def text_encoder_mlp_modules(text_encoder):
+    mlp_modules = []
+    if isinstance(text_encoder, (CLIPTextModel, CLIPTextModelWithProjection)):
+        for i, layer in enumerate(text_encoder.text_model.encoder.layers):
+            mlp_mod = layer.mlp
+            name = f"text_model.encoder.layers.{i}.mlp"
+            mlp_modules.append((name, mlp_mod))
+    else:
+        raise ValueError(f"do not know how to get mlp modules for: {text_encoder.__class__.__name__}")
+    return mlp_modules
 def adjust_lora_scale_text_encoder(text_encoder, lora_scale: float = 1.0):
     for _, attn_module in text_encoder_attn_modules(text_encoder):
         if isinstance(attn_module.q_proj, PatchedLoraProjection):
@@ -39,6 +80,95 @@ def adjust_lora_scale_text_encoder(text_encoder, lora_scale: float = 1.0):
             mlp_module.fc2.lora_scale = lora_scale
+class PatchedLoraProjection(torch.nn.Module):
+    def __init__(self, regular_linear_layer, lora_scale=1, network_alpha=None, rank=4, dtype=None):
+        super().__init__()
+        from ..models.lora import LoRALinearLayer
+        self.regular_linear_layer = regular_linear_layer
+        device = self.regular_linear_layer.weight.device
+        if dtype is None:
+            dtype = self.regular_linear_layer.weight.dtype
+        self.lora_linear_layer = LoRALinearLayer(
+            self.regular_linear_layer.in_features,
+            self.regular_linear_layer.out_features,
+            network_alpha=network_alpha,
+            device=device,
+            dtype=dtype,
+            rank=rank,
+        )
+        self.lora_scale = lora_scale
+    # overwrite PyTorch's `state_dict` to be sure that only the 'regular_linear_layer' weights are saved
+    # when saving the whole text encoder model and when LoRA is unloaded or fused
+    def state_dict(self, *args, destination=None, prefix="", keep_vars=False):
+        if self.lora_linear_layer is None:
+            return self.regular_linear_layer.state_dict(
+                *args, destination=destination, prefix=prefix, keep_vars=keep_vars
+            )
+        return super().state_dict(*args, destination=destination, prefix=prefix, keep_vars=keep_vars)
+    def _fuse_lora(self, lora_scale=1.0, safe_fusing=False):
+        if self.lora_linear_layer is None:
+            return
+        dtype, device = self.regular_linear_layer.weight.data.dtype, self.regular_linear_layer.weight.data.device
+        w_orig = self.regular_linear_layer.weight.data.float()
+        w_up = self.lora_linear_layer.up.weight.data.float()
+        w_down = self.lora_linear_layer.down.weight.data.float()
+        if self.lora_linear_layer.network_alpha is not None:
+            w_up = w_up * self.lora_linear_layer.network_alpha / self.lora_linear_layer.rank
+        fused_weight = w_orig + (lora_scale * torch.bmm(w_up[None, :], w_down[None, :])[0])
+        if safe_fusing and torch.isnan(fused_weight).any().item():
+            raise ValueError(
+                "This LoRA weight seems to be broken. "
+                f"Encountered NaN values when trying to fuse LoRA weights for {self}."
+                "LoRA weights will not be fused."
+            )
+        self.regular_linear_layer.weight.data = fused_weight.to(device=device, dtype=dtype)
+        # we can drop the lora layer now
+        self.lora_linear_layer = None
+        # offload the up and down matrices to CPU to not blow the memory
+        self.w_up = w_up.cpu()
+        self.w_down = w_down.cpu()
+        self.lora_scale = lora_scale
+    def _unfuse_lora(self):
+        if not (getattr(self, "w_up", None) is not None and getattr(self, "w_down", None) is not None):
+            return
+        fused_weight = self.regular_linear_layer.weight.data
+        dtype, device = fused_weight.dtype, fused_weight.device
+        w_up = self.w_up.to(device=device).float()
+        w_down = self.w_down.to(device).float()
+        unfused_weight = fused_weight.float() - (self.lora_scale * torch.bmm(w_up[None, :], w_down[None, :])[0])
+        self.regular_linear_layer.weight.data = unfused_weight.to(device=device, dtype=dtype)
+        self.w_up = None
+        self.w_down = None
+    def forward(self, input):
+        if self.lora_scale is None:
+            self.lora_scale = 1.0
+        if self.lora_linear_layer is None:
+            return self.regular_linear_layer(input)
+        return self.regular_linear_layer(input) + (self.lora_scale * self.lora_linear_layer(input))
 class LoRALinearLayer(nn.Module):
     r"""
     A linear layer that is used with LoRA.

diffusers/models/modeling_flax_utils.py CHANGED Viewed

@@ -24,13 +24,17 @@ from flax.core.frozen_dict import FrozenDict, unfreeze
 from flax.serialization import from_bytes, to_bytes
 from flax.traverse_util import flatten_dict, unflatten_dict
 from huggingface_hub import create_repo, hf_hub_download
-from huggingface_hub.utils import EntryNotFoundError, RepositoryNotFoundError, RevisionNotFoundError
+from huggingface_hub.utils import (
+    EntryNotFoundError,
+    RepositoryNotFoundError,
+    RevisionNotFoundError,
+    validate_hf_hub_args,
+)
 from requests import HTTPError
 from .. import __version__, is_torch_available
 from ..utils import (
     CONFIG_NAME,
-    DIFFUSERS_CACHE,
     FLAX_WEIGHTS_NAME,
     HUGGINGFACE_CO_RESOLVE_ENDPOINT,
     WEIGHTS_NAME,
@@ -52,6 +56,7 @@ class FlaxModelMixin(PushToHubMixin):
         - **config_name** ([`str`]) -- Filename to save a model to when calling [`~FlaxModelMixin.save_pretrained`].
     """
     config_name = CONFIG_NAME
     _automatically_saved_args = ["_diffusers_version", "_class_name", "_name_or_path"]
     _flax_internal_args = ["name", "parent", "dtype"]
@@ -196,6 +201,7 @@ class FlaxModelMixin(PushToHubMixin):
         raise NotImplementedError(f"init_weights method has to be implemented for {self}")
     @classmethod
+    @validate_hf_hub_args
     def from_pretrained(
         cls,
         pretrained_model_name_or_path: Union[str, os.PathLike],
@@ -287,13 +293,13 @@ class FlaxModelMixin(PushToHubMixin):
         ```
         """
         config = kwargs.pop("config", None)
-        cache_dir = kwargs.pop("cache_dir", DIFFUSERS_CACHE)
+        cache_dir = kwargs.pop("cache_dir", None)
         force_download = kwargs.pop("force_download", False)
         from_pt = kwargs.pop("from_pt", False)
         resume_download = kwargs.pop("resume_download", False)
         proxies = kwargs.pop("proxies", None)
         local_files_only = kwargs.pop("local_files_only", False)
-        use_auth_token = kwargs.pop("use_auth_token", None)
+        token = kwargs.pop("token", None)
         revision = kwargs.pop("revision", None)
         subfolder = kwargs.pop("subfolder", None)
@@ -313,7 +319,7 @@ class FlaxModelMixin(PushToHubMixin):
                 resume_download=resume_download,
                 proxies=proxies,
                 local_files_only=local_files_only,
-                use_auth_token=use_auth_token,
+                token=token,
                 revision=revision,
                 subfolder=subfolder,
                 **kwargs,
@@ -358,7 +364,7 @@ class FlaxModelMixin(PushToHubMixin):
                     proxies=proxies,
                     resume_download=resume_download,
                     local_files_only=local_files_only,
-                    use_auth_token=use_auth_token,
+                    token=token,
                     user_agent=user_agent,
                     subfolder=subfolder,
                     revision=revision,
@@ -368,7 +374,7 @@ class FlaxModelMixin(PushToHubMixin):
                 raise EnvironmentError(
                     f"{pretrained_model_name_or_path} is not a local folder and is not a valid model identifier "
                     "listed on 'https://huggingface.co/models'\nIf this is a private repository, make sure to pass a "
-                    "token having permission to this repo with `use_auth_token` or log in with `huggingface-cli "
+                    "token having permission to this repo with `token` or log in with `huggingface-cli "
                     "login`."
                 )
             except RevisionNotFoundError:
@@ -436,7 +442,7 @@ class FlaxModelMixin(PushToHubMixin):
             # make sure all arrays are stored as jnp.ndarray
             # NOTE: This is to prevent a bug this will be fixed in Flax >= v0.3.4:
             # https://github.com/google/flax/issues/1261
-        state = jax.tree_util.tree_map(lambda x: jax.device_put(x, jax.devices("cpu")[0]), state)
+        state = jax.tree_util.tree_map(lambda x: jax.device_put(x, jax.local_devices(backend="cpu")[0]), state)
         # flatten dicts
         state = flatten_dict(state)

diffusers/models/modeling_outputs.py ADDED Viewed

@@ -0,0 +1,17 @@
+from dataclasses import dataclass
+from ..utils import BaseOutput
+@dataclass
+class AutoencoderKLOutput(BaseOutput):
+    """
+    Output of AutoencoderKL encoding method.
+    Args:
+        latent_dist (`DiagonalGaussianDistribution`):
+            Encoded outputs of `Encoder` represented as the mean and logvar of `DiagonalGaussianDistribution`.
+            `DiagonalGaussianDistribution` allows for sampling latents from the distribution.
+    """
+    latent_dist: "DiagonalGaussianDistribution"  # noqa: F821

diffusers/models/modeling_utils.py CHANGED Viewed

@@ -18,20 +18,20 @@ import inspect
 import itertools
 import os
 import re
+from collections import OrderedDict
 from functools import partial
 from typing import Any, Callable, List, Optional, Tuple, Union
 import safetensors
 import torch
 from huggingface_hub import create_repo
-from torch import Tensor, device, nn
+from huggingface_hub.utils import validate_hf_hub_args
+from torch import Tensor, nn
 from .. import __version__
 from ..utils import (
     CONFIG_NAME,
-    DIFFUSERS_CACHE,
     FLAX_WEIGHTS_NAME,
-    HF_HUB_OFFLINE,
     MIN_PEFT_VERSION,
     SAFETENSORS_WEIGHTS_NAME,
     WEIGHTS_NAME,
@@ -61,7 +61,7 @@ if is_accelerate_available():
     from accelerate.utils.versions import is_torch_version
-def get_parameter_device(parameter: torch.nn.Module):
+def get_parameter_device(parameter: torch.nn.Module) -> torch.device:
     try:
         parameters_and_buffers = itertools.chain(parameter.parameters(), parameter.buffers())
         return next(parameters_and_buffers).device
@@ -77,7 +77,7 @@ def get_parameter_device(parameter: torch.nn.Module):
         return first_tuple[1].device
-def get_parameter_dtype(parameter: torch.nn.Module):
+def get_parameter_dtype(parameter: torch.nn.Module) -> torch.dtype:
     try:
         params = tuple(parameter.parameters())
         if len(params) > 0:
@@ -130,7 +130,13 @@ def load_state_dict(checkpoint_file: Union[str, os.PathLike], variant: Optional[
             )
-def load_model_dict_into_meta(model, state_dict, device=None, dtype=None, model_name_or_path=None):
+def load_model_dict_into_meta(
+    model,
+    state_dict: OrderedDict,
+    device: Optional[Union[str, torch.device]] = None,
+    dtype: Optional[Union[str, torch.dtype]] = None,
+    model_name_or_path: Optional[str] = None,
+) -> List[str]:
     device = device or torch.device("cpu")
     dtype = dtype or torch.float32
@@ -156,7 +162,7 @@ def load_model_dict_into_meta(model, state_dict, device=None, dtype=None, model_
     return unexpected_keys
-def _load_state_dict_into_model(model_to_load, state_dict):
+def _load_state_dict_into_model(model_to_load, state_dict: OrderedDict) -> List[str]:
     # Convert old format to new format if needed from a PyTorch state_dict
     # copy state_dict so _load_from_state_dict can modify it
     state_dict = state_dict.copy()
@@ -164,7 +170,7 @@ def _load_state_dict_into_model(model_to_load, state_dict):
     # PyTorch's `_load_from_state_dict` does not copy parameters in a module's descendants
     # so we need to apply the function recursively.
-    def load(module: torch.nn.Module, prefix=""):
+    def load(module: torch.nn.Module, prefix: str = ""):
         args = (state_dict, prefix, {}, True, [], [], error_msgs)
         module._load_from_state_dict(*args)
@@ -186,6 +192,7 @@ class ModelMixin(torch.nn.Module, PushToHubMixin):
         - **config_name** ([`str`]) -- Filename to save a model to when calling [`~models.ModelMixin.save_pretrained`].
     """
     config_name = CONFIG_NAME
     _automatically_saved_args = ["_diffusers_version", "_class_name", "_name_or_path"]
     _supports_gradient_checkpointing = False
@@ -220,7 +227,7 @@ class ModelMixin(torch.nn.Module, PushToHubMixin):
         """
         return any(hasattr(m, "gradient_checkpointing") and m.gradient_checkpointing for m in self.modules())
-    def enable_gradient_checkpointing(self):
+    def enable_gradient_checkpointing(self) -> None:
         """
         Activates gradient checkpointing for the current model (may be referred to as *activation checkpointing* or
         *checkpoint activations* in other frameworks).
@@ -229,7 +236,7 @@ class ModelMixin(torch.nn.Module, PushToHubMixin):
             raise ValueError(f"{self.__class__.__name__} does not support gradient checkpointing.")
         self.apply(partial(self._set_gradient_checkpointing, value=True))
-    def disable_gradient_checkpointing(self):
+    def disable_gradient_checkpointing(self) -> None:
         """
         Deactivates gradient checkpointing for the current model (may be referred to as *activation checkpointing* or
         *checkpoint activations* in other frameworks).
@@ -254,7 +261,7 @@ class ModelMixin(torch.nn.Module, PushToHubMixin):
             if isinstance(module, torch.nn.Module):
                 fn_recursive_set_mem_eff(module)
-    def enable_xformers_memory_efficient_attention(self, attention_op: Optional[Callable] = None):
+    def enable_xformers_memory_efficient_attention(self, attention_op: Optional[Callable] = None) -> None:
         r"""
         Enable memory efficient attention from [xFormers](https://facebookresearch.github.io/xformers/).
@@ -290,7 +297,7 @@ class ModelMixin(torch.nn.Module, PushToHubMixin):
         """
         self.set_use_memory_efficient_attention_xformers(True, attention_op)
-    def disable_xformers_memory_efficient_attention(self):
+    def disable_xformers_memory_efficient_attention(self) -> None:
         r"""
         Disable memory efficient attention from [xFormers](https://facebookresearch.github.io/xformers/).
         """
@@ -447,7 +454,7 @@ class ModelMixin(torch.nn.Module, PushToHubMixin):
         self,
         save_directory: Union[str, os.PathLike],
         is_main_process: bool = True,
-        save_function: Callable = None,
+        save_function: Optional[Callable] = None,
         safe_serialization: bool = True,
         variant: Optional[str] = None,
         push_to_hub: bool = False,
@@ -527,6 +534,7 @@ class ModelMixin(torch.nn.Module, PushToHubMixin):
             )
     @classmethod
+    @validate_hf_hub_args
     def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.PathLike]], **kwargs):
         r"""
         Instantiate a pretrained PyTorch model from a pretrained model configuration.
@@ -563,7 +571,7 @@ class ModelMixin(torch.nn.Module, PushToHubMixin):
             local_files_only(`bool`, *optional*, defaults to `False`):
                 Whether to only load local model weights and configuration files or not. If set to `True`, the model
                 won't be downloaded from the Hub.
-            use_auth_token (`str` or *bool*, *optional*):
+            token (`str` or *bool*, *optional*):
                 The token to use as HTTP bearer authorization for remote files. If `True`, the token generated from
                 `diffusers-cli login` (stored in `~/.huggingface`) is used.
             revision (`str`, *optional*, defaults to `"main"`):
@@ -632,15 +640,15 @@ class ModelMixin(torch.nn.Module, PushToHubMixin):
         You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
         ```
         """
-        cache_dir = kwargs.pop("cache_dir", DIFFUSERS_CACHE)
+        cache_dir = kwargs.pop("cache_dir", None)
         ignore_mismatched_sizes = kwargs.pop("ignore_mismatched_sizes", False)
         force_download = kwargs.pop("force_download", False)
         from_flax = kwargs.pop("from_flax", False)
         resume_download = kwargs.pop("resume_download", False)
         proxies = kwargs.pop("proxies", None)
         output_loading_info = kwargs.pop("output_loading_info", False)
-        local_files_only = kwargs.pop("local_files_only", HF_HUB_OFFLINE)
-        use_auth_token = kwargs.pop("use_auth_token", None)
+        local_files_only = kwargs.pop("local_files_only", None)
+        token = kwargs.pop("token", None)
         revision = kwargs.pop("revision", None)
         torch_dtype = kwargs.pop("torch_dtype", None)
         subfolder = kwargs.pop("subfolder", None)
@@ -710,7 +718,7 @@ class ModelMixin(torch.nn.Module, PushToHubMixin):
             resume_download=resume_download,
             proxies=proxies,
             local_files_only=local_files_only,
-            use_auth_token=use_auth_token,
+            token=token,
             revision=revision,
             subfolder=subfolder,
             device_map=device_map,
@@ -732,7 +740,7 @@ class ModelMixin(torch.nn.Module, PushToHubMixin):
                 resume_download=resume_download,
                 proxies=proxies,
                 local_files_only=local_files_only,
-                use_auth_token=use_auth_token,
+                token=token,
                 revision=revision,
                 subfolder=subfolder,
                 user_agent=user_agent,
@@ -755,7 +763,7 @@ class ModelMixin(torch.nn.Module, PushToHubMixin):
                         resume_download=resume_download,
                         proxies=proxies,
                         local_files_only=local_files_only,
-                        use_auth_token=use_auth_token,
+                        token=token,
                         revision=revision,
                         subfolder=subfolder,
                         user_agent=user_agent,
@@ -774,7 +782,7 @@ class ModelMixin(torch.nn.Module, PushToHubMixin):
                     resume_download=resume_download,
                     proxies=proxies,
                     local_files_only=local_files_only,
-                    use_auth_token=use_auth_token,
+                    token=token,
                     revision=revision,
                     subfolder=subfolder,
                     user_agent=user_agent,
@@ -910,10 +918,10 @@ class ModelMixin(torch.nn.Module, PushToHubMixin):
     def _load_pretrained_model(
         cls,
         model,
-        state_dict,
+        state_dict: OrderedDict,
         resolved_archive_file,
-        pretrained_model_name_or_path,
-        ignore_mismatched_sizes=False,
+        pretrained_model_name_or_path: Union[str, os.PathLike],
+        ignore_mismatched_sizes: bool = False,
     ):
         # Retrieve missing & unexpected_keys
         model_state_dict = model.state_dict()
@@ -1011,7 +1019,7 @@ class ModelMixin(torch.nn.Module, PushToHubMixin):
         return model, missing_keys, unexpected_keys, mismatched_keys, error_msgs
     @property
-    def device(self) -> device:
+    def device(self) -> torch.device:
         """
         `torch.device`: The device on which the module is (assuming that all the module parameters are on the same
         device).
@@ -1063,7 +1071,7 @@ class ModelMixin(torch.nn.Module, PushToHubMixin):
         else:
             return sum(p.numel() for p in self.parameters() if p.requires_grad or not only_trainable)
-    def _convert_deprecated_attention_blocks(self, state_dict):
+    def _convert_deprecated_attention_blocks(self, state_dict: OrderedDict) -> None:
         deprecated_attention_block_paths = []
         def recursive_find_attn_block(name, module):
@@ -1107,7 +1115,7 @@ class ModelMixin(torch.nn.Module, PushToHubMixin):
             if f"{path}.proj_attn.bias" in state_dict:
                 state_dict[f"{path}.to_out.0.bias"] = state_dict.pop(f"{path}.proj_attn.bias")
-    def _temp_convert_self_to_deprecated_attention_blocks(self):
+    def _temp_convert_self_to_deprecated_attention_blocks(self) -> None:
         deprecated_attention_block_modules = []
         def recursive_find_attn_block(module):
@@ -1134,10 +1142,10 @@ class ModelMixin(torch.nn.Module, PushToHubMixin):
             del module.to_v
             del module.to_out
-    def _undo_temp_convert_self_to_deprecated_attention_blocks(self):
+    def _undo_temp_convert_self_to_deprecated_attention_blocks(self) -> None:
         deprecated_attention_block_modules = []
-        def recursive_find_attn_block(module):
+        def recursive_find_attn_block(module) -> None:
             if hasattr(module, "_from_deprecated_attn_block") and module._from_deprecated_attn_block:
                 deprecated_attention_block_modules.append(module)

diffusers/models/normalization.py CHANGED Viewed

@@ -13,14 +13,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import numbers
 from typing import Dict, Optional, Tuple
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
+from ..utils import is_torch_version
 from .activations import get_activation
-from .embeddings import CombinedTimestepLabelEmbeddings, CombinedTimestepSizeEmbeddings
+from .embeddings import CombinedTimestepLabelEmbeddings, PixArtAlphaCombinedTimestepSizeEmbeddings
 class AdaLayerNorm(nn.Module):
@@ -91,7 +93,7 @@ class AdaLayerNormSingle(nn.Module):
     def __init__(self, embedding_dim: int, use_additional_conditions: bool = False):
         super().__init__()
-        self.emb = CombinedTimestepSizeEmbeddings(
+        self.emb = PixArtAlphaCombinedTimestepSizeEmbeddings(
             embedding_dim, size_emb_dim=embedding_dim // 3, use_additional_conditions=use_additional_conditions
         )
@@ -101,8 +103,8 @@ class AdaLayerNormSingle(nn.Module):
     def forward(
         self,
         timestep: torch.Tensor,
-        added_cond_kwargs: Dict[str, torch.Tensor] = None,
-        batch_size: int = None,
+        added_cond_kwargs: Optional[Dict[str, torch.Tensor]] = None,
+        batch_size: Optional[int] = None,
         hidden_dtype: Optional[torch.dtype] = None,
     ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
         # No modulation happening here.
@@ -146,3 +148,107 @@ class AdaGroupNorm(nn.Module):
         x = F.group_norm(x, self.num_groups, eps=self.eps)
         x = x * (1 + scale) + shift
         return x
+class AdaLayerNormContinuous(nn.Module):
+    def __init__(
+        self,
+        embedding_dim: int,
+        conditioning_embedding_dim: int,
+        # NOTE: It is a bit weird that the norm layer can be configured to have scale and shift parameters
+        # because the output is immediately scaled and shifted by the projected conditioning embeddings.
+        # Note that AdaLayerNorm does not let the norm layer have scale and shift parameters.
+        # However, this is how it was implemented in the original code, and it's rather likely you should
+        # set `elementwise_affine` to False.
+        elementwise_affine=True,
+        eps=1e-5,
+        bias=True,
+        norm_type="layer_norm",
+    ):
+        super().__init__()
+        self.silu = nn.SiLU()
+        self.linear = nn.Linear(conditioning_embedding_dim, embedding_dim * 2, bias=bias)
+        if norm_type == "layer_norm":
+            self.norm = LayerNorm(embedding_dim, eps, elementwise_affine, bias)
+        elif norm_type == "rms_norm":
+            self.norm = RMSNorm(embedding_dim, eps, elementwise_affine)
+        else:
+            raise ValueError(f"unknown norm_type {norm_type}")
+    def forward(self, x: torch.Tensor, conditioning_embedding: torch.Tensor) -> torch.Tensor:
+        emb = self.linear(self.silu(conditioning_embedding))
+        scale, shift = torch.chunk(emb, 2, dim=1)
+        x = self.norm(x) * (1 + scale)[:, None, :] + shift[:, None, :]
+        return x
+if is_torch_version(">=", "2.1.0"):
+    LayerNorm = nn.LayerNorm
+else:
+    # Has optional bias parameter compared to torch layer norm
+    # TODO: replace with torch layernorm once min required torch version >= 2.1
+    class LayerNorm(nn.Module):
+        def __init__(self, dim, eps: float = 1e-5, elementwise_affine: bool = True, bias: bool = True):
+            super().__init__()
+            self.eps = eps
+            if isinstance(dim, numbers.Integral):
+                dim = (dim,)
+            self.dim = torch.Size(dim)
+            if elementwise_affine:
+                self.weight = nn.Parameter(torch.ones(dim))
+                self.bias = nn.Parameter(torch.zeros(dim)) if bias else None
+            else:
+                self.weight = None
+                self.bias = None
+        def forward(self, input):
+            return F.layer_norm(input, self.dim, self.weight, self.bias, self.eps)
+class RMSNorm(nn.Module):
+    def __init__(self, dim, eps: float, elementwise_affine: bool = True):
+        super().__init__()
+        self.eps = eps
+        if isinstance(dim, numbers.Integral):
+            dim = (dim,)
+        self.dim = torch.Size(dim)
+        if elementwise_affine:
+            self.weight = nn.Parameter(torch.ones(dim))
+        else:
+            self.weight = None
+    def forward(self, hidden_states):
+        input_dtype = hidden_states.dtype
+        variance = hidden_states.to(torch.float32).pow(2).mean(-1, keepdim=True)
+        hidden_states = hidden_states * torch.rsqrt(variance + self.eps)
+        if self.weight is not None:
+            # convert into half-precision if necessary
+            if self.weight.dtype in [torch.float16, torch.bfloat16]:
+                hidden_states = hidden_states.to(self.weight.dtype)
+            hidden_states = hidden_states * self.weight
+        else:
+            hidden_states = hidden_states.to(input_dtype)
+        return hidden_states
+class GlobalResponseNorm(nn.Module):
+    # Taken from https://github.com/facebookresearch/ConvNeXt-V2/blob/3608f67cc1dae164790c5d0aead7bf2d73d9719b/models/utils.py#L105
+    def __init__(self, dim):
+        super().__init__()
+        self.gamma = nn.Parameter(torch.zeros(1, 1, 1, dim))
+        self.beta = nn.Parameter(torch.zeros(1, 1, 1, dim))
+    def forward(self, x):
+        gx = torch.norm(x, p=2, dim=(1, 2), keepdim=True)
+        nx = gx / (gx.mean(dim=-1, keepdim=True) + 1e-6)
+        return self.gamma * (x * nx) + self.beta + x

diffusers 0.23.1__py3-none-any.whl → 0.25.0__py3-none-any.whl

diffusers 0.23.1py3-none-any.whl → 0.25.0py3-none-any.whl