PyPI - diffusers - Versions diffs - 0.17.1__py3-none-any.whl → 0.18.2__py3-none-any.whl - Mend

diffusers 0.17.1py3-none-any.whl → 0.18.2py3-none-any.whl

Files changed (120) hide show

diffusers/__init__.py +26 -1
diffusers/configuration_utils.py +34 -29
diffusers/dependency_versions_table.py +4 -0
diffusers/image_processor.py +125 -12
diffusers/loaders.py +169 -203
diffusers/models/attention.py +24 -1
diffusers/models/attention_flax.py +10 -5
diffusers/models/attention_processor.py +3 -0
diffusers/models/autoencoder_kl.py +114 -33
diffusers/models/controlnet.py +131 -14
diffusers/models/controlnet_flax.py +37 -26
diffusers/models/cross_attention.py +17 -17
diffusers/models/embeddings.py +67 -0
diffusers/models/modeling_flax_utils.py +64 -56
diffusers/models/modeling_utils.py +193 -104
diffusers/models/prior_transformer.py +207 -37
diffusers/models/resnet.py +26 -26
diffusers/models/transformer_2d.py +36 -41
diffusers/models/transformer_temporal.py +24 -21
diffusers/models/unet_1d.py +31 -25
diffusers/models/unet_2d.py +43 -30
diffusers/models/unet_2d_blocks.py +210 -89
diffusers/models/unet_2d_blocks_flax.py +12 -12
diffusers/models/unet_2d_condition.py +172 -64
diffusers/models/unet_2d_condition_flax.py +38 -24
diffusers/models/unet_3d_blocks.py +34 -31
diffusers/models/unet_3d_condition.py +101 -34
diffusers/models/vae.py +5 -5
diffusers/models/vae_flax.py +37 -34
diffusers/models/vq_model.py +23 -14
diffusers/pipelines/__init__.py +24 -1
diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion.py +1 -1
diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion_img2img.py +5 -3
diffusers/pipelines/consistency_models/__init__.py +1 -0
diffusers/pipelines/consistency_models/pipeline_consistency_models.py +337 -0
diffusers/pipelines/controlnet/multicontrolnet.py +120 -1
diffusers/pipelines/controlnet/pipeline_controlnet.py +59 -17
diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +60 -15
diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +60 -17
diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +1 -1
diffusers/pipelines/kandinsky/__init__.py +1 -1
diffusers/pipelines/kandinsky/pipeline_kandinsky.py +4 -6
diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +1 -0
diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +1 -0
diffusers/pipelines/kandinsky2_2/__init__.py +7 -0
diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +317 -0
diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +372 -0
diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +434 -0
diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +398 -0
diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +531 -0
diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +541 -0
diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +605 -0
diffusers/pipelines/pipeline_flax_utils.py +2 -2
diffusers/pipelines/pipeline_utils.py +124 -146
diffusers/pipelines/shap_e/__init__.py +27 -0
diffusers/pipelines/shap_e/camera.py +147 -0
diffusers/pipelines/shap_e/pipeline_shap_e.py +390 -0
diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +349 -0
diffusers/pipelines/shap_e/renderer.py +709 -0
diffusers/pipelines/stable_diffusion/__init__.py +2 -0
diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +261 -66
diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +3 -3
diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +5 -3
diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +4 -2
diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py +6 -6
diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +1 -1
diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_k_diffusion.py +1 -1
diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_ldm3d.py +719 -0
diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_panorama.py +1 -1
diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_paradigms.py +832 -0
diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +17 -7
diffusers/pipelines/stable_diffusion_xl/__init__.py +26 -0
diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +823 -0
diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +896 -0
diffusers/pipelines/stable_diffusion_xl/watermark.py +31 -0
diffusers/pipelines/text_to_video_synthesis/__init__.py +2 -1
diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +5 -1
diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +771 -0
diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +92 -6
diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +3 -3
diffusers/pipelines/versatile_diffusion/modeling_text_unet.py +209 -91
diffusers/schedulers/__init__.py +3 -0
diffusers/schedulers/scheduling_consistency_models.py +380 -0
diffusers/schedulers/scheduling_ddim.py +28 -6
diffusers/schedulers/scheduling_ddim_inverse.py +19 -4
diffusers/schedulers/scheduling_ddim_parallel.py +642 -0
diffusers/schedulers/scheduling_ddpm.py +53 -7
diffusers/schedulers/scheduling_ddpm_parallel.py +604 -0
diffusers/schedulers/scheduling_deis_multistep.py +66 -11
diffusers/schedulers/scheduling_dpmsolver_multistep.py +55 -13
diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +19 -4
diffusers/schedulers/scheduling_dpmsolver_sde.py +73 -11
diffusers/schedulers/scheduling_dpmsolver_singlestep.py +23 -7
diffusers/schedulers/scheduling_euler_ancestral_discrete.py +58 -9
diffusers/schedulers/scheduling_euler_discrete.py +58 -8
diffusers/schedulers/scheduling_heun_discrete.py +89 -14
diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +73 -11
diffusers/schedulers/scheduling_k_dpm_2_discrete.py +73 -11
diffusers/schedulers/scheduling_lms_discrete.py +57 -8
diffusers/schedulers/scheduling_pndm.py +46 -10
diffusers/schedulers/scheduling_repaint.py +19 -4
diffusers/schedulers/scheduling_sde_ve.py +5 -1
diffusers/schedulers/scheduling_unclip.py +43 -4
diffusers/schedulers/scheduling_unipc_multistep.py +48 -7
diffusers/training_utils.py +1 -1
diffusers/utils/__init__.py +2 -1
diffusers/utils/dummy_pt_objects.py +60 -0
diffusers/utils/dummy_torch_and_transformers_and_invisible_watermark_objects.py +32 -0
diffusers/utils/dummy_torch_and_transformers_objects.py +180 -0
diffusers/utils/hub_utils.py +1 -1
diffusers/utils/import_utils.py +20 -3
diffusers/utils/logging.py +15 -18
diffusers/utils/outputs.py +3 -3
diffusers/utils/testing_utils.py +15 -0
{diffusers-0.17.1.dist-info → diffusers-0.18.2.dist-info}/METADATA +4 -2
{diffusers-0.17.1.dist-info → diffusers-0.18.2.dist-info}/RECORD +120 -94
{diffusers-0.17.1.dist-info → diffusers-0.18.2.dist-info}/WHEEL +1 -1
{diffusers-0.17.1.dist-info → diffusers-0.18.2.dist-info}/LICENSE +0 -0
{diffusers-0.17.1.dist-info → diffusers-0.18.2.dist-info}/entry_points.txt +0 -0
{diffusers-0.17.1.dist-info → diffusers-0.18.2.dist-info}/top_level.txt +0 -0

diffusers/models/modeling_utils.py CHANGED Viewed

@@ -22,7 +22,7 @@ from functools import partial
 from typing import Any, Callable, List, Optional, Tuple, Union
 import torch
-from torch import Tensor, device
+from torch import Tensor, device, nn
 from .. import __version__
 from ..utils import (
@@ -154,11 +154,10 @@ class ModelMixin(torch.nn.Module):
     r"""
     Base class for all models.
-    [`ModelMixin`] takes care of storing the configuration of the models and handles methods for loading, downloading
-    and saving models.
+    [`ModelMixin`] takes care of storing the model configuration and provides methods for loading, downloading and
+    saving models.
-        - **config_name** ([`str`]) -- A filename under which the model should be stored when calling
-          [`~models.ModelMixin.save_pretrained`].
+        - **config_name** ([`str`]) -- Filename to save a model to when calling [`~models.ModelMixin.save_pretrained`].
     """
     config_name = CONFIG_NAME
     _automatically_saved_args = ["_diffusers_version", "_class_name", "_name_or_path"]
@@ -190,18 +189,13 @@ class ModelMixin(torch.nn.Module):
     def is_gradient_checkpointing(self) -> bool:
         """
         Whether gradient checkpointing is activated for this model or not.
-        Note that in other frameworks this feature can be referred to as "activation checkpointing" or "checkpoint
-        activations".
         """
         return any(hasattr(m, "gradient_checkpointing") and m.gradient_checkpointing for m in self.modules())
     def enable_gradient_checkpointing(self):
         """
-        Activates gradient checkpointing for the current model.
-        Note that in other frameworks this feature can be referred to as "activation checkpointing" or "checkpoint
-        activations".
+        Activates gradient checkpointing for the current model (may be referred to as *activation checkpointing* or
+        *checkpoint activations* in other frameworks).
         """
         if not self._supports_gradient_checkpointing:
             raise ValueError(f"{self.__class__.__name__} does not support gradient checkpointing.")
@@ -209,10 +203,8 @@ class ModelMixin(torch.nn.Module):
     def disable_gradient_checkpointing(self):
         """
-        Deactivates gradient checkpointing for the current model.
-        Note that in other frameworks this feature can be referred to as "activation checkpointing" or "checkpoint
-        activations".
+        Deactivates gradient checkpointing for the current model (may be referred to as *activation checkpointing* or
+        *checkpoint activations* in other frameworks).
         """
         if self._supports_gradient_checkpointing:
             self.apply(partial(self._set_gradient_checkpointing, value=False))
@@ -236,13 +228,17 @@ class ModelMixin(torch.nn.Module):
     def enable_xformers_memory_efficient_attention(self, attention_op: Optional[Callable] = None):
         r"""
-        Enable memory efficient attention as implemented in xformers.
+        Enable memory efficient attention from [xFormers](https://facebookresearch.github.io/xformers/).
+        When this option is enabled, you should observe lower GPU memory usage and a potential speed up during
+        inference. Speed up during training is not guaranteed.
+        <Tip warning={true}>
-        When this option is enabled, you should observe lower GPU memory usage and a potential speed up at inference
-        time. Speed up at training time is not guaranteed.
+        ⚠️ When memory efficient attention and sliced attention are both enabled, memory efficient attention takes
+        precedent.
-        Warning: When Memory Efficient Attention and Sliced attention are both enabled, the Memory Efficient Attention
-        is used.
+        </Tip>
         Parameters:
             attention_op (`Callable`, *optional*):
@@ -268,7 +264,7 @@ class ModelMixin(torch.nn.Module):
     def disable_xformers_memory_efficient_attention(self):
         r"""
-        Disable memory efficient attention as implemented in xformers.
+        Disable memory efficient attention from [xFormers](https://facebookresearch.github.io/xformers/).
         """
         self.set_use_memory_efficient_attention_xformers(False)
@@ -281,24 +277,24 @@ class ModelMixin(torch.nn.Module):
         variant: Optional[str] = None,
     ):
         """
-        Save a model and its configuration file to a directory, so that it can be re-loaded using the
-        `[`~models.ModelMixin.from_pretrained`]` class method.
+        Save a model and its configuration file to a directory so that it can be reloaded using the
+        [`~models.ModelMixin.from_pretrained`] class method.
         Arguments:
             save_directory (`str` or `os.PathLike`):
-                Directory to which to save. Will be created if it doesn't exist.
+                Directory to save a model and its configuration file to. Will be created if it doesn't exist.
             is_main_process (`bool`, *optional*, defaults to `True`):
-                Whether the process calling this is the main process or not. Useful when in distributed training like
-                TPUs and need to call this function on all processes. In this case, set `is_main_process=True` only on
-                the main process to avoid race conditions.
+                Whether the process calling this is the main process or not. Useful during distributed training and you
+                need to call this function on all processes. In this case, set `is_main_process=True` only on the main
+                process to avoid race conditions.
             save_function (`Callable`):
-                The function to use to save the state dictionary. Useful on distributed training like TPUs when one
-                need to replace `torch.save` by another method. Can be configured with the environment variable
+                The function to use to save the state dictionary. Useful during distributed training when you need to
+                replace `torch.save` with another method. Can be configured with the environment variable
                 `DIFFUSERS_SAVE_MODE`.
             safe_serialization (`bool`, *optional*, defaults to `False`):
-                Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).
+                Whether to save the model using `safetensors` or the traditional PyTorch way with `pickle`.
             variant (`str`, *optional*):
-                If specified, weights are saved in the format pytorch_model.<variant>.bin.
+                If specified, weights are saved in the format `pytorch_model.<variant>.bin`.
         """
         if safe_serialization and not is_safetensors_available():
             raise ImportError("`safe_serialization` requires the `safetensors library: `pip install safetensors`.")
@@ -335,107 +331,108 @@ class ModelMixin(torch.nn.Module):
     @classmethod
     def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.PathLike]], **kwargs):
         r"""
-        Instantiate a pretrained pytorch model from a pre-trained model configuration.
-        The model is set in evaluation mode by default using `model.eval()` (Dropout modules are deactivated). To train
-        the model, you should first set it back in training mode with `model.train()`.
-        The warning *Weights from XXX not initialized from pretrained model* means that the weights of XXX do not come
-        pretrained with the rest of the model. It is up to you to train those weights with a downstream fine-tuning
-        task.
+        Instantiate a pretrained PyTorch model from a pretrained model configuration.
-        The warning *Weights from XXX not used in YYY* means that the layer XXX is not used by YYY, therefore those
-        weights are discarded.
+        The model is set in evaluation mode - `model.eval()` - by default, and dropout modules are deactivated. To
+        train the model, set it back in training mode with `model.train()`.
         Parameters:
             pretrained_model_name_or_path (`str` or `os.PathLike`, *optional*):
                 Can be either:
-                    - A string, the *model id* of a pretrained model hosted inside a model repo on huggingface.co.
-                      Valid model ids should have an organization name, like `google/ddpm-celebahq-256`.
-                    - A path to a *directory* containing model weights saved using [`~ModelMixin.save_config`], e.g.,
-                      `./my_model_directory/`.
+                    - A string, the *model id* (for example `google/ddpm-celebahq-256`) of a pretrained model hosted on
+                      the Hub.
+                    - A path to a *directory* (for example `./my_model_directory`) containing the model weights saved
+                      with [`~ModelMixin.save_pretrained`].
             cache_dir (`Union[str, os.PathLike]`, *optional*):
-                Path to a directory in which a downloaded pretrained model configuration should be cached if the
-                standard cache should not be used.
+                Path to a directory where a downloaded pretrained model configuration is cached if the standard cache
+                is not used.
             torch_dtype (`str` or `torch.dtype`, *optional*):
-                Override the default `torch.dtype` and load the model under this dtype. If `"auto"` is passed the dtype
-                will be automatically derived from the model's weights.
+                Override the default `torch.dtype` and load the model with another dtype. If `"auto"` is passed, the
+                dtype is automatically derived from the model's weights.
             force_download (`bool`, *optional*, defaults to `False`):
                 Whether or not to force the (re-)download of the model weights and configuration files, overriding the
                 cached versions if they exist.
             resume_download (`bool`, *optional*, defaults to `False`):
-                Whether or not to delete incompletely received files. Will attempt to resume the download if such a
-                file exists.
+                Whether or not to resume downloading the model weights and configuration files. If set to `False`, any
+                incompletely downloaded files are deleted.
             proxies (`Dict[str, str]`, *optional*):
-                A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
+                A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128',
                 'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
-            output_loading_info(`bool`, *optional*, defaults to `False`):
+            output_loading_info (`bool`, *optional*, defaults to `False`):
                 Whether or not to also return a dictionary containing missing keys, unexpected keys and error messages.
             local_files_only(`bool`, *optional*, defaults to `False`):
-                Whether or not to only look at local files (i.e., do not try to download the model).
+                Whether to only load local model weights and configuration files or not. If set to `True`, the model
+                won't be downloaded from the Hub.
             use_auth_token (`str` or *bool*, *optional*):
-                The token to use as HTTP bearer authorization for remote files. If `True`, will use the token generated
-                when running `diffusers-cli login` (stored in `~/.huggingface`).
+                The token to use as HTTP bearer authorization for remote files. If `True`, the token generated from
+                `diffusers-cli login` (stored in `~/.huggingface`) is used.
             revision (`str`, *optional*, defaults to `"main"`):
-                The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a
-                git-based system for storing models and other artifacts on huggingface.co, so `revision` can be any
-                identifier allowed by git.
+                The specific model version to use. It can be a branch name, a tag name, a commit id, or any identifier
+                allowed by Git.
             from_flax (`bool`, *optional*, defaults to `False`):
                 Load the model weights from a Flax checkpoint save file.
             subfolder (`str`, *optional*, defaults to `""`):
-                In case the relevant files are located inside a subfolder of the model repo (either remote in
-                huggingface.co or downloaded locally), you can specify the folder name here.
+                The subfolder location of a model file within a larger model repository on the Hub or locally.
             mirror (`str`, *optional*):
-                Mirror source to accelerate downloads in China. If you are from China and have an accessibility
-                problem, you can set this option to resolve it. Note that we do not guarantee the timeliness or safety.
-                Please refer to the mirror site for more information.
+                Mirror source to resolve accessibility issues if you're downloading a model in China. We do not
+                guarantee the timeliness or safety of the source, and you should refer to the mirror site for more
+                information.
             device_map (`str` or `Dict[str, Union[int, str, torch.device]]`, *optional*):
-                A map that specifies where each submodule should go. It doesn't need to be refined to each
-                parameter/buffer name, once a given module name is inside, every submodule of it will be sent to the
+                A map that specifies where each submodule should go. It doesn't need to be defined for each
+                parameter/buffer name; once a given module name is inside, every submodule of it will be sent to the
                 same device.
-                To have Accelerate compute the most optimized `device_map` automatically, set `device_map="auto"`. For
+                Set `device_map="auto"` to have 🤗 Accelerate automatically compute the most optimized `device_map`. For
                 more information about each option see [designing a device
                 map](https://hf.co/docs/accelerate/main/en/usage_guides/big_modeling#designing-a-device-map).
             max_memory (`Dict`, *optional*):
-                A dictionary device identifier to maximum memory. Will default to the maximum memory available for each
-                GPU and the available CPU RAM if unset.
+                A dictionary device identifier for the maximum memory. Will default to the maximum memory available for
+                each GPU and the available CPU RAM if unset.
             offload_folder (`str` or `os.PathLike`, *optional*):
-                If the `device_map` contains any value `"disk"`, the folder where we will offload weights.
+                The path to offload weights if `device_map` contains the value `"disk"`.
             offload_state_dict (`bool`, *optional*):
-                If `True`, will temporarily offload the CPU state dict to the hard drive to avoid getting out of CPU
-                RAM if the weight of the CPU state dict + the biggest shard of the checkpoint does not fit. Defaults to
-                `True` when there is some disk offload.
+                If `True`, temporarily offloads the CPU state dict to the hard drive to avoid running out of CPU RAM if
+                the weight of the CPU state dict + the biggest shard of the checkpoint does not fit. Defaults to `True`
+                when there is some disk offload.
             low_cpu_mem_usage (`bool`, *optional*, defaults to `True` if torch version >= 1.9.0 else `False`):
-                Speed up model loading by not initializing the weights and only loading the pre-trained weights. This
-                also tries to not use more than 1x model size in CPU memory (including peak memory) while loading the
-                model. This is only supported when torch version >= 1.9.0. If you are using an older version of torch,
-                setting this argument to `True` will raise an error.
+                Speed up model loading only loading the pretrained weights and not initializing the weights. This also
+                tries to not use more than 1x model size in CPU memory (including peak memory) while loading the model.
+                Only supported for PyTorch >= 1.9.0. If you are using an older version of PyTorch, setting this
+                argument to `True` will raise an error.
             variant (`str`, *optional*):
-                If specified load weights from `variant` filename, *e.g.* pytorch_model.<variant>.bin. `variant` is
-                ignored when using `from_flax`.
+                Load weights from a specified `variant` filename such as `"fp16"` or `"ema"`. This is ignored when
+                loading `from_flax`.
             use_safetensors (`bool`, *optional*, defaults to `None`):
-                If set to `None`, the `safetensors` weights will be downloaded if they're available **and** if the
-                `safetensors` library is installed. If set to `True`, the model will be forcibly loaded from
-                `safetensors` weights. If set to `False`, loading will *not* use `safetensors`.
+                If set to `None`, the `safetensors` weights are downloaded if they're available **and** if the
+                `safetensors` library is installed. If set to `True`, the model is forcibly loaded from `safetensors`
+                weights. If set to `False`, `safetensors` weights are not loaded.
         <Tip>
-         It is required to be logged in (`huggingface-cli login`) when you want to use private or [gated
-         models](https://huggingface.co/docs/hub/models-gated#gated-models).
+        To use private or [gated models](https://huggingface.co/docs/hub/models-gated#gated-models), log-in with
+        `huggingface-cli login`. You can also activate the special
+        ["offline-mode"](https://huggingface.co/diffusers/installation.html#offline-mode) to use this method in a
+        firewalled environment.
         </Tip>
-        <Tip>
+        Example:
-        Activate the special ["offline-mode"](https://huggingface.co/diffusers/installation.html#offline-mode) to use
-        this method in a firewalled environment.
+        ```py
+        from diffusers import UNet2DConditionModel
-        </Tip>
+        unet = UNet2DConditionModel.from_pretrained("runwayml/stable-diffusion-v1-5", subfolder="unet")
+        ```
+        If you get the error message below, you need to finetune the weights for your downstream task:
+        ```bash
+        Some weights of UNet2DConditionModel were not initialized from the model checkpoint at runwayml/stable-diffusion-v1-5 and are newly initialized because the shapes did not match:
+        - conv_in.weight: found shape torch.Size([320, 4, 3, 3]) in the checkpoint and torch.Size([320, 9, 3, 3]) in the model instantiated
+        You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
+        ```
         """
         cache_dir = kwargs.pop("cache_dir", DIFFUSERS_CACHE)
         ignore_mismatched_sizes = kwargs.pop("ignore_mismatched_sizes", False)
@@ -459,7 +456,7 @@ class ModelMixin(torch.nn.Module):
         if use_safetensors and not is_safetensors_available():
             raise ValueError(
-                "`use_safetensors`=True but safetensors is not installed. Please install safetensors with `pip install safetenstors"
+                "`use_safetensors`=True but safetensors is not installed. Please install safetensors with `pip install safetensors"
             )
         allow_pickle = False
@@ -646,15 +643,47 @@ class ModelMixin(torch.nn.Module):
                 else:  # else let accelerate handle loading and dispatching.
                     # Load weights and dispatch according to the device_map
                     # by default the device_map is None and the weights are loaded on the CPU
-                    accelerate.load_checkpoint_and_dispatch(
-                        model,
-                        model_file,
-                        device_map,
-                        max_memory=max_memory,
-                        offload_folder=offload_folder,
-                        offload_state_dict=offload_state_dict,
-                        dtype=torch_dtype,
-                    )
+                    try:
+                        accelerate.load_checkpoint_and_dispatch(
+                            model,
+                            model_file,
+                            device_map,
+                            max_memory=max_memory,
+                            offload_folder=offload_folder,
+                            offload_state_dict=offload_state_dict,
+                            dtype=torch_dtype,
+                        )
+                    except AttributeError as e:
+                        # When using accelerate loading, we do not have the ability to load the state
+                        # dict and rename the weight names manually. Additionally, accelerate skips
+                        # torch loading conventions and directly writes into `module.{_buffers, _parameters}`
+                        # (which look like they should be private variables?), so we can't use the standard hooks
+                        # to rename parameters on load. We need to mimic the original weight names so the correct
+                        # attributes are available. After we have loaded the weights, we convert the deprecated
+                        # names to the new non-deprecated names. Then we _greatly encourage_ the user to convert
+                        # the weights so we don't have to do this again.
+                        if "'Attention' object has no attribute" in str(e):
+                            logger.warn(
+                                f"Taking `{str(e)}` while using `accelerate.load_checkpoint_and_dispatch` to mean {pretrained_model_name_or_path}"
+                                " was saved with deprecated attention block weight names. We will load it with the deprecated attention block"
+                                " names and convert them on the fly to the new attention block format. Please re-save the model after this conversion,"
+                                " so we don't have to do the on the fly renaming in the future. If the model is from a hub checkpoint,"
+                                " please also re-upload it or open a PR on the original repository."
+                            )
+                            model._temp_convert_self_to_deprecated_attention_blocks()
+                            accelerate.load_checkpoint_and_dispatch(
+                                model,
+                                model_file,
+                                device_map,
+                                max_memory=max_memory,
+                                offload_folder=offload_folder,
+                                offload_state_dict=offload_state_dict,
+                                dtype=torch_dtype,
+                            )
+                            model._undo_temp_convert_self_to_deprecated_attention_blocks()
+                        else:
+                            raise e
                 loading_info = {
                     "missing_keys": [],
@@ -820,17 +849,27 @@ class ModelMixin(torch.nn.Module):
     def num_parameters(self, only_trainable: bool = False, exclude_embeddings: bool = False) -> int:
         """
-        Get number of (optionally, trainable or non-embeddings) parameters in the module.
+        Get number of (trainable or non-embedding) parameters in the module.
         Args:
             only_trainable (`bool`, *optional*, defaults to `False`):
-                Whether or not to return only the number of trainable parameters
+                Whether or not to return only the number of trainable parameters.
             exclude_embeddings (`bool`, *optional*, defaults to `False`):
-                Whether or not to return only the number of non-embeddings parameters
+                Whether or not to return only the number of non-embedding parameters.
         Returns:
             `int`: The number of parameters.
+        Example:
+        ```py
+        from diffusers import UNet2DConditionModel
+        model_id = "runwayml/stable-diffusion-v1-5"
+        unet = UNet2DConditionModel.from_pretrained(model_id, subfolder="unet")
+        unet.num_parameters(only_trainable=True)
+        859520964
+        ```
         """
         if exclude_embeddings:
@@ -889,3 +928,53 @@ class ModelMixin(torch.nn.Module):
                 state_dict[f"{path}.to_out.0.weight"] = state_dict.pop(f"{path}.proj_attn.weight")
             if f"{path}.proj_attn.bias" in state_dict:
                 state_dict[f"{path}.to_out.0.bias"] = state_dict.pop(f"{path}.proj_attn.bias")
+    def _temp_convert_self_to_deprecated_attention_blocks(self):
+        deprecated_attention_block_modules = []
+        def recursive_find_attn_block(module):
+            if hasattr(module, "_from_deprecated_attn_block") and module._from_deprecated_attn_block:
+                deprecated_attention_block_modules.append(module)
+            for sub_module in module.children():
+                recursive_find_attn_block(sub_module)
+        recursive_find_attn_block(self)
+        for module in deprecated_attention_block_modules:
+            module.query = module.to_q
+            module.key = module.to_k
+            module.value = module.to_v
+            module.proj_attn = module.to_out[0]
+            # We don't _have_ to delete the old attributes, but it's helpful to ensure
+            # that _all_ the weights are loaded into the new attributes and we're not
+            # making an incorrect assumption that this model should be converted when
+            # it really shouldn't be.
+            del module.to_q
+            del module.to_k
+            del module.to_v
+            del module.to_out
+    def _undo_temp_convert_self_to_deprecated_attention_blocks(self):
+        deprecated_attention_block_modules = []
+        def recursive_find_attn_block(module):
+            if hasattr(module, "_from_deprecated_attn_block") and module._from_deprecated_attn_block:
+                deprecated_attention_block_modules.append(module)
+            for sub_module in module.children():
+                recursive_find_attn_block(sub_module)
+        recursive_find_attn_block(self)
+        for module in deprecated_attention_block_modules:
+            module.to_q = module.query
+            module.to_k = module.key
+            module.to_v = module.value
+            module.to_out = nn.ModuleList([module.proj_attn, nn.Dropout(module.dropout)])
+            del module.query
+            del module.key
+            del module.value
+            del module.proj_attn

diffusers 0.17.1__py3-none-any.whl → 0.18.2__py3-none-any.whl

diffusers 0.17.1py3-none-any.whl → 0.18.2py3-none-any.whl