PyPI - diffusers - Versions diffs - 0.18.0__py3-none-any.whl → 0.18.2__py3-none-any.whl - Mend

diffusers 0.18.0py3-none-any.whl → 0.18.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

diffusers/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
-__version__ = "0.18.0"
+__version__ = "0.18.2"
 from .configuration_utils import ConfigMixin
 from .utils import (

diffusers/configuration_utils.py CHANGED Viewed

@@ -607,7 +607,7 @@ def register_to_config(init):
         # Take note of the parameters that were not present in the loaded config
         if len(set(new_kwargs.keys()) - set(init_kwargs)) > 0:
-            new_kwargs["_use_default_values"] = set(new_kwargs.keys()) - set(init_kwargs)
+            new_kwargs["_use_default_values"] = list(set(new_kwargs.keys()) - set(init_kwargs))
         new_kwargs = {**config_init_kwargs, **new_kwargs}
         getattr(self, "register_to_config")(**new_kwargs)
@@ -655,7 +655,7 @@ def flax_register_to_config(cls):
         # Take note of the parameters that were not present in the loaded config
         if len(set(new_kwargs.keys()) - set(init_kwargs)) > 0:
-            new_kwargs["_use_default_values"] = set(new_kwargs.keys()) - set(init_kwargs)
+            new_kwargs["_use_default_values"] = list(set(new_kwargs.keys()) - set(init_kwargs))
         getattr(self, "register_to_config")(**new_kwargs)
         original_init(self, *args, **kwargs)

diffusers/loaders.py CHANGED Viewed

@@ -177,7 +177,7 @@ class UNet2DConditionLoadersMixin:
         if use_safetensors and not is_safetensors_available():
             raise ValueError(
-                "`use_safetensors`=True but safetensors is not installed. Please install safetensors with `pip install safetenstors"
+                "`use_safetensors`=True but safetensors is not installed. Please install safetensors with `pip install safetensors"
             )
         allow_pickle = False
@@ -589,7 +589,7 @@ class TextualInversionLoaderMixin:
         if use_safetensors and not is_safetensors_available():
             raise ValueError(
-                "`use_safetensors`=True but safetensors is not installed. Please install safetensors with `pip install safetenstors"
+                "`use_safetensors`=True but safetensors is not installed. Please install safetensors with `pip install safetensors"
             )
         allow_pickle = False
@@ -806,7 +806,7 @@ class LoraLoaderMixin:
         if use_safetensors and not is_safetensors_available():
             raise ValueError(
-                "`use_safetensors`=True but safetensors is not installed. Please install safetensors with `pip install safetenstors"
+                "`use_safetensors`=True but safetensors is not installed. Please install safetensors with `pip install safetensors"
             )
         allow_pickle = False
@@ -1054,7 +1054,7 @@ class LoraLoaderMixin:
         if use_safetensors and not is_safetensors_available():
             raise ValueError(
-                "`use_safetensors`=True but safetensors is not installed. Please install safetensors with `pip install safetenstors"
+                "`use_safetensors`=True but safetensors is not installed. Please install safetensors with `pip install safetensors"
             )
         allow_pickle = False
@@ -1394,7 +1394,7 @@ class FromSingleFileMixin:
         use_auth_token = kwargs.pop("use_auth_token", None)
         revision = kwargs.pop("revision", None)
         extract_ema = kwargs.pop("extract_ema", False)
-        image_size = kwargs.pop("image_size", 512)
+        image_size = kwargs.pop("image_size", None)
         scheduler_type = kwargs.pop("scheduler_type", "pndm")
         num_in_channels = kwargs.pop("num_in_channels", None)
         upcast_attention = kwargs.pop("upcast_attention", None)

diffusers/models/attention_flax.py CHANGED Viewed

@@ -152,6 +152,7 @@ class FlaxAttention(nn.Module):
         self.value = nn.Dense(inner_dim, use_bias=False, dtype=self.dtype, name="to_v")
         self.proj_attn = nn.Dense(self.query_dim, dtype=self.dtype, name="to_out_0")
+        self.dropout_layer = nn.Dropout(rate=self.dropout)
     def reshape_heads_to_batch_dim(self, tensor):
         batch_size, seq_len, dim = tensor.shape
@@ -214,7 +215,7 @@ class FlaxAttention(nn.Module):
         hidden_states = self.reshape_batch_dim_to_heads(hidden_states)
         hidden_states = self.proj_attn(hidden_states)
-        return hidden_states
+        return self.dropout_layer(hidden_states, deterministic=deterministic)
 class FlaxBasicTransformerBlock(nn.Module):
@@ -260,6 +261,7 @@ class FlaxBasicTransformerBlock(nn.Module):
         self.norm1 = nn.LayerNorm(epsilon=1e-5, dtype=self.dtype)
         self.norm2 = nn.LayerNorm(epsilon=1e-5, dtype=self.dtype)
         self.norm3 = nn.LayerNorm(epsilon=1e-5, dtype=self.dtype)
+        self.dropout_layer = nn.Dropout(rate=self.dropout)
     def __call__(self, hidden_states, context, deterministic=True):
         # self attention
@@ -280,7 +282,7 @@ class FlaxBasicTransformerBlock(nn.Module):
         hidden_states = self.ff(self.norm3(hidden_states), deterministic=deterministic)
         hidden_states = hidden_states + residual
-        return hidden_states
+        return self.dropout_layer(hidden_states, deterministic=deterministic)
 class FlaxTransformer2DModel(nn.Module):
@@ -356,6 +358,8 @@ class FlaxTransformer2DModel(nn.Module):
                 dtype=self.dtype,
             )
+        self.dropout_layer = nn.Dropout(rate=self.dropout)
     def __call__(self, hidden_states, context, deterministic=True):
         batch, height, width, channels = hidden_states.shape
         residual = hidden_states
@@ -378,7 +382,7 @@ class FlaxTransformer2DModel(nn.Module):
             hidden_states = self.proj_out(hidden_states)
         hidden_states = hidden_states + residual
-        return hidden_states
+        return self.dropout_layer(hidden_states, deterministic=deterministic)
 class FlaxFeedForward(nn.Module):
@@ -409,7 +413,7 @@ class FlaxFeedForward(nn.Module):
         self.net_2 = nn.Dense(self.dim, dtype=self.dtype)
     def __call__(self, hidden_states, deterministic=True):
-        hidden_states = self.net_0(hidden_states)
+        hidden_states = self.net_0(hidden_states, deterministic=deterministic)
         hidden_states = self.net_2(hidden_states)
         return hidden_states
@@ -434,8 +438,9 @@ class FlaxGEGLU(nn.Module):
     def setup(self):
         inner_dim = self.dim * 4
         self.proj = nn.Dense(inner_dim * 2, dtype=self.dtype)
+        self.dropout_layer = nn.Dropout(rate=self.dropout)
     def __call__(self, hidden_states, deterministic=True):
         hidden_states = self.proj(hidden_states)
         hidden_linear, hidden_gelu = jnp.split(hidden_states, 2, axis=2)
-        return hidden_linear * nn.gelu(hidden_gelu)
+        return self.dropout_layer(hidden_linear * nn.gelu(hidden_gelu), deterministic=deterministic)

diffusers/models/modeling_utils.py CHANGED Viewed

@@ -456,7 +456,7 @@ class ModelMixin(torch.nn.Module):
         if use_safetensors and not is_safetensors_available():
             raise ValueError(
-                "`use_safetensors`=True but safetensors is not installed. Please install safetensors with `pip install safetenstors"
+                "`use_safetensors`=True but safetensors is not installed. Please install safetensors with `pip install safetensors"
             )
         allow_pickle = False

diffusers/pipelines/pipeline_utils.py CHANGED Viewed

@@ -204,7 +204,7 @@ def variant_compatible_siblings(filenames, variant=None) -> Union[List[os.PathLi
     transformers_index_format = r"\d{5}-of-\d{5}"
     if variant is not None:
-        # `diffusion_pytorch_model.fp16.bin` as well as `model.fp16-00001-of-00002.safetenstors`
+        # `diffusion_pytorch_model.fp16.bin` as well as `model.fp16-00001-of-00002.safetensors`
         variant_file_re = re.compile(
             rf"({'|'.join(weight_prefixes)})\.({variant}|{variant}-{transformers_index_format})\.({'|'.join(weight_suffixs)})$"
         )
@@ -213,7 +213,7 @@ def variant_compatible_siblings(filenames, variant=None) -> Union[List[os.PathLi
             rf"({'|'.join(weight_prefixes)})\.({'|'.join(weight_suffixs)})\.index\.{variant}\.json$"
         )
-    # `diffusion_pytorch_model.bin` as well as `model-00001-of-00002.safetenstors`
+    # `diffusion_pytorch_model.bin` as well as `model-00001-of-00002.safetensors`
     non_variant_file_re = re.compile(
         rf"({'|'.join(weight_prefixes)})(-{transformers_index_format})?\.({'|'.join(weight_suffixs)})$"
     )
@@ -1168,7 +1168,7 @@ class DiffusionPipeline(ConfigMixin):
         if use_safetensors and not is_safetensors_available():
             raise ValueError(
-                "`use_safetensors`=True but safetensors is not installed. Please install safetensors with `pip install safetenstors"
+                "`use_safetensors`=True but safetensors is not installed. Please install safetensors with `pip install safetensors"
             )
         allow_pickle = False
@@ -1213,6 +1213,15 @@ class DiffusionPipeline(ConfigMixin):
             filenames = {sibling.rfilename for sibling in info.siblings}
             model_filenames, variant_filenames = variant_compatible_siblings(filenames, variant=variant)
+            if len(variant_filenames) == 0 and variant is not None:
+                deprecation_message = (
+                    f"You are trying to load the model files of the `variant={variant}`, but no such modeling files are available."
+                    f"The default model files: {model_filenames} will be loaded instead. Make sure to not load from `variant={variant}`"
+                    "if such variant modeling files are not available. Doing so will lead to an error in v0.22.0 as defaulting to non-variant"
+                    "modeling files is deprecated."
+                )
+                deprecate("no variant default", "0.22.0", deprecation_message, standard_warn=False)
             # remove ignored filenames
             model_filenames = set(model_filenames) - set(ignore_filenames)
             variant_filenames = set(variant_filenames) - set(ignore_filenames)
@@ -1302,7 +1311,7 @@ class DiffusionPipeline(ConfigMixin):
             snapshot_folder = Path(config_file).parent
             pipeline_is_cached = all((snapshot_folder / f).is_file() for f in expected_files)
-            if pipeline_is_cached:
+            if pipeline_is_cached and not force_download:
                 # if the pipeline is cached, we can directly return it
                 # else call snapshot_download
                 return snapshot_folder

diffusers/pipelines/stable_diffusion/convert_from_ckpt.py CHANGED Viewed

@@ -24,6 +24,7 @@ from transformers import (
     AutoFeatureExtractor,
     BertTokenizerFast,
     CLIPImageProcessor,
+    CLIPTextConfig,
     CLIPTextModel,
     CLIPTextModelWithProjection,
     CLIPTokenizer,
@@ -48,7 +49,7 @@ from ...schedulers import (
     PNDMScheduler,
     UnCLIPScheduler,
 )
-from ...utils import is_omegaconf_available, is_safetensors_available, logging
+from ...utils import is_accelerate_available, is_omegaconf_available, is_safetensors_available, logging
 from ...utils.import_utils import BACKENDS_MAPPING
 from ..latent_diffusion.pipeline_latent_diffusion import LDMBertConfig, LDMBertModel
 from ..paint_by_example import PaintByExampleImageEncoder
@@ -57,6 +58,10 @@ from .safety_checker import StableDiffusionSafetyChecker
 from .stable_unclip_image_normalizer import StableUnCLIPImageNormalizer
+if is_accelerate_available():
+    from accelerate import init_empty_weights
+    from accelerate.utils import set_module_tensor_to_device
 logger = logging.get_logger(__name__)  # pylint: disable=invalid-name
@@ -391,8 +396,8 @@ def convert_ldm_unet_checkpoint(
         # at least a 100 parameters have to start with `model_ema` in order for the checkpoint to be EMA
         if sum(k.startswith("model_ema") for k in keys) > 100 and extract_ema:
-            print(f"Checkpoint {path} has both EMA and non-EMA weights.")
-            print(
+            logger.warning(f"Checkpoint {path} has both EMA and non-EMA weights.")
+            logger.warning(
                 "In this conversion only the EMA weights are extracted. If you want to instead extract the non-EMA"
                 " weights (useful to continue fine-tuning), please make sure to remove the `--extract_ema` flag."
             )
@@ -402,7 +407,7 @@ def convert_ldm_unet_checkpoint(
                     unet_state_dict[key.replace(unet_key, "")] = checkpoint.pop(flat_ema_key)
         else:
             if sum(k.startswith("model_ema") for k in keys) > 100:
-                print(
+                logger.warning(
                     "In this conversion only the non-EMA weights are extracted. If you want to instead extract the EMA"
                     " weights (usually better for inference), please make sure to add the `--extract_ema` flag."
                 )
@@ -770,11 +775,12 @@ def convert_ldm_bert_checkpoint(checkpoint, config):
 def convert_ldm_clip_checkpoint(checkpoint, local_files_only=False, text_encoder=None):
-    text_model = (
-        CLIPTextModel.from_pretrained("openai/clip-vit-large-patch14", local_files_only=local_files_only)
-        if text_encoder is None
-        else text_encoder
-    )
+    if text_encoder is None:
+        config_name = "openai/clip-vit-large-patch14"
+        config = CLIPTextConfig.from_pretrained(config_name)
+        with init_empty_weights():
+            text_model = CLIPTextModel(config)
     keys = list(checkpoint.keys())
@@ -787,7 +793,8 @@ def convert_ldm_clip_checkpoint(checkpoint, local_files_only=False, text_encoder
             if key.startswith(prefix):
                 text_model_dict[key[len(prefix + ".") :]] = checkpoint[key]
-    text_model.load_state_dict(text_model_dict)
+    for param_name, param in text_model_dict.items():
+        set_module_tensor_to_device(text_model, param_name, "cpu", value=param)
     return text_model
@@ -884,14 +891,26 @@ def convert_paint_by_example_checkpoint(checkpoint):
     return model
-def convert_open_clip_checkpoint(checkpoint, prefix="cond_stage_model.model."):
+def convert_open_clip_checkpoint(
+    checkpoint, config_name, prefix="cond_stage_model.model.", has_projection=False, **config_kwargs
+):
     # text_model = CLIPTextModel.from_pretrained("stabilityai/stable-diffusion-2", subfolder="text_encoder")
-    text_model = CLIPTextModelWithProjection.from_pretrained(
-        "laion/CLIP-ViT-bigG-14-laion2B-39B-b160k", projection_dim=1280
-    )
+    # text_model = CLIPTextModelWithProjection.from_pretrained(
+    #    "laion/CLIP-ViT-bigG-14-laion2B-39B-b160k", projection_dim=1280
+    # )
+    config = CLIPTextConfig.from_pretrained(config_name, **config_kwargs)
+    with init_empty_weights():
+        text_model = CLIPTextModelWithProjection(config) if has_projection else CLIPTextModel(config)
     keys = list(checkpoint.keys())
+    keys_to_ignore = []
+    if config_name == "stabilityai/stable-diffusion-2" and config.num_hidden_layers == 23:
+        # make sure to remove all keys > 22
+        keys_to_ignore += [k for k in keys if k.startswith("cond_stage_model.model.transformer.resblocks.23")]
+        keys_to_ignore += ["cond_stage_model.model.text_projection"]
     text_model_dict = {}
     if prefix + "text_projection" in checkpoint:
@@ -902,8 +921,8 @@ def convert_open_clip_checkpoint(checkpoint, prefix="cond_stage_model.model."):
     text_model_dict["text_model.embeddings.position_ids"] = text_model.text_model.embeddings.get_buffer("position_ids")
     for key in keys:
-        # if "resblocks.23" in key:  # Diffusers drops the final layer and only uses the penultimate layer
-        #     continue
+        if key in keys_to_ignore:
+            continue
         if key[len(prefix) :] in textenc_conversion_map:
             if key.endswith("text_projection"):
                 value = checkpoint[key].T
@@ -931,7 +950,8 @@ def convert_open_clip_checkpoint(checkpoint, prefix="cond_stage_model.model."):
                 text_model_dict[new_key] = checkpoint[key]
-    text_model.load_state_dict(text_model_dict)
+    for param_name, param in text_model_dict.items():
+        set_module_tensor_to_device(text_model, param_name, "cpu", value=param)
     return text_model
@@ -1061,7 +1081,7 @@ def convert_controlnet_checkpoint(
 def download_from_original_stable_diffusion_ckpt(
     checkpoint_path: str,
     original_config_file: str = None,
-    image_size: int = 512,
+    image_size: Optional[int] = None,
     prediction_type: str = None,
     model_type: str = None,
     extract_ema: bool = False,
@@ -1144,6 +1164,7 @@ def download_from_original_stable_diffusion_ckpt(
         LDMTextToImagePipeline,
         PaintByExamplePipeline,
         StableDiffusionControlNetPipeline,
+        StableDiffusionInpaintPipeline,
         StableDiffusionPipeline,
         StableDiffusionXLImg2ImgPipeline,
         StableDiffusionXLPipeline,
@@ -1166,12 +1187,9 @@ def download_from_original_stable_diffusion_ckpt(
         if not is_safetensors_available():
             raise ValueError(BACKENDS_MAPPING["safetensors"][1])
-        from safetensors import safe_open
+        from safetensors.torch import load_file as safe_load
-        checkpoint = {}
-        with safe_open(checkpoint_path, framework="pt", device="cpu") as f:
-            for key in f.keys():
-                checkpoint[key] = f.get_tensor(key)
+        checkpoint = safe_load(checkpoint_path, device="cpu")
     else:
         if device is None:
             device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -1183,7 +1201,7 @@ def download_from_original_stable_diffusion_ckpt(
     if "global_step" in checkpoint:
         global_step = checkpoint["global_step"]
     else:
-        print("global_step key not found in model")
+        logger.debug("global_step key not found in model")
         global_step = None
     # NOTE: this while loop isn't great but this controlnet checkpoint has one additional
@@ -1230,8 +1248,15 @@ def download_from_original_stable_diffusion_ckpt(
             model_type = "SDXL"
         else:
             model_type = "SDXL-Refiner"
+        if image_size is None:
+            image_size = 1024
-    if num_in_channels is not None:
+    if num_in_channels is None and pipeline_class == StableDiffusionInpaintPipeline:
+        num_in_channels = 9
+    elif num_in_channels is None:
+        num_in_channels = 4
+    if "unet_config" in original_config.model.params:
         original_config["model"]["params"]["unet_config"]["params"]["in_channels"] = num_in_channels
     if (
@@ -1263,7 +1288,6 @@ def download_from_original_stable_diffusion_ckpt(
     num_train_timesteps = getattr(original_config.model.params, "timesteps", None) or 1000
     if model_type in ["SDXL", "SDXL-Refiner"]:
-        image_size = 1024
         scheduler_dict = {
             "beta_schedule": "scaled_linear",
             "beta_start": 0.00085,
@@ -1279,7 +1303,6 @@ def download_from_original_stable_diffusion_ckpt(
         }
         scheduler = EulerDiscreteScheduler.from_config(scheduler_dict)
         scheduler_type = "euler"
-        vae_path = "stabilityai/sdxl-vae"
     else:
         beta_start = getattr(original_config.model.params, "linear_start", None) or 0.02
         beta_end = getattr(original_config.model.params, "linear_end", None) or 0.085
@@ -1318,25 +1341,45 @@ def download_from_original_stable_diffusion_ckpt(
     # Convert the UNet2DConditionModel model.
     unet_config = create_unet_diffusers_config(original_config, image_size=image_size)
     unet_config["upcast_attention"] = upcast_attention
-    unet = UNet2DConditionModel(**unet_config)
+    with init_empty_weights():
+        unet = UNet2DConditionModel(**unet_config)
     converted_unet_checkpoint = convert_ldm_unet_checkpoint(
         checkpoint, unet_config, path=checkpoint_path, extract_ema=extract_ema
     )
-    unet.load_state_dict(converted_unet_checkpoint)
+    for param_name, param in converted_unet_checkpoint.items():
+        set_module_tensor_to_device(unet, param_name, "cpu", value=param)
     # Convert the VAE model.
     if vae_path is None:
         vae_config = create_vae_diffusers_config(original_config, image_size=image_size)
         converted_vae_checkpoint = convert_ldm_vae_checkpoint(checkpoint, vae_config)
-        vae = AutoencoderKL(**vae_config)
-        vae.load_state_dict(converted_vae_checkpoint)
+        if (
+            "model" in original_config
+            and "params" in original_config.model
+            and "scale_factor" in original_config.model.params
+        ):
+            vae_scaling_factor = original_config.model.params.scale_factor
+        else:
+            vae_scaling_factor = 0.18215  # default SD scaling factor
+        vae_config["scaling_factor"] = vae_scaling_factor
+        with init_empty_weights():
+            vae = AutoencoderKL(**vae_config)
+        for param_name, param in converted_vae_checkpoint.items():
+            set_module_tensor_to_device(vae, param_name, "cpu", value=param)
     else:
         vae = AutoencoderKL.from_pretrained(vae_path)
     if model_type == "FrozenOpenCLIPEmbedder":
-        text_model = convert_open_clip_checkpoint(checkpoint)
+        config_name = "stabilityai/stable-diffusion-2"
+        config_kwargs = {"subfolder": "text_encoder"}
+        text_model = convert_open_clip_checkpoint(checkpoint, config_name, **config_kwargs)
         tokenizer = CLIPTokenizer.from_pretrained("stabilityai/stable-diffusion-2", subfolder="tokenizer")
         if stable_unclip is None:
@@ -1469,7 +1512,12 @@ def download_from_original_stable_diffusion_ckpt(
             tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14")
             text_encoder = convert_ldm_clip_checkpoint(checkpoint, local_files_only=local_files_only)
             tokenizer_2 = CLIPTokenizer.from_pretrained("laion/CLIP-ViT-bigG-14-laion2B-39B-b160k", pad_token="!")
-            text_encoder_2 = convert_open_clip_checkpoint(checkpoint, prefix="conditioner.embedders.1.model.")
+            config_name = "laion/CLIP-ViT-bigG-14-laion2B-39B-b160k"
+            config_kwargs = {"projection_dim": 1280}
+            text_encoder_2 = convert_open_clip_checkpoint(
+                checkpoint, config_name, prefix="conditioner.embedders.1.model.", has_projection=True, **config_kwargs
+            )
             pipe = StableDiffusionXLPipeline(
                 vae=vae,
@@ -1485,7 +1533,12 @@ def download_from_original_stable_diffusion_ckpt(
             tokenizer = None
             text_encoder = None
             tokenizer_2 = CLIPTokenizer.from_pretrained("laion/CLIP-ViT-bigG-14-laion2B-39B-b160k", pad_token="!")
-            text_encoder_2 = convert_open_clip_checkpoint(checkpoint, prefix="conditioner.embedders.0.model.")
+            config_name = "laion/CLIP-ViT-bigG-14-laion2B-39B-b160k"
+            config_kwargs = {"projection_dim": 1280}
+            text_encoder_2 = convert_open_clip_checkpoint(
+                checkpoint, config_name, prefix="conditioner.embedders.0.model.", has_projection=True, **config_kwargs
+            )
             pipe = StableDiffusionXLImg2ImgPipeline(
                 vae=vae,

diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py CHANGED Viewed

@@ -24,7 +24,7 @@ from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer
 from ...configuration_utils import FrozenDict
 from ...image_processor import VaeImageProcessor
-from ...loaders import LoraLoaderMixin, TextualInversionLoaderMixin
+from ...loaders import FromSingleFileMixin, LoraLoaderMixin, TextualInversionLoaderMixin
 from ...models import AutoencoderKL, UNet2DConditionModel
 from ...schedulers import KarrasDiffusionSchedulers
 from ...utils import deprecate, is_accelerate_available, is_accelerate_version, logging, randn_tensor
@@ -153,7 +153,9 @@ def prepare_mask_and_masked_image(image, mask, height, width, return_image: bool
     return mask, masked_image
-class StableDiffusionInpaintPipeline(DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin):
+class StableDiffusionInpaintPipeline(
+    DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin
+):
     r"""
     Pipeline for text-guided image inpainting using Stable Diffusion.

diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py CHANGED Viewed

@@ -748,15 +748,19 @@ class StableDiffusionUpscalePipeline(DiffusionPipeline, TextualInversionLoaderMi
         # make sure the VAE is in float32 mode, as it overflows in float16
         self.vae.to(dtype=torch.float32)
-        use_torch_2_0_or_xformers = self.vae.decoder.mid_block.attentions[0].processor in [
-            AttnProcessor2_0,
-            XFormersAttnProcessor,
-            LoRAXFormersAttnProcessor,
-            LoRAAttnProcessor2_0,
-        ]
+        use_torch_2_0_or_xformers = isinstance(
+            self.vae.decoder.mid_block.attentions[0].processor,
+            (
+                AttnProcessor2_0,
+                XFormersAttnProcessor,
+                LoRAXFormersAttnProcessor,
+                LoRAAttnProcessor2_0,
+            ),
+        )
         # if xformers or torch_2_0 is used attention block does not need
         # to be in float32 which can save lots of memory
-        if not use_torch_2_0_or_xformers:
+        if use_torch_2_0_or_xformers:
             self.vae.post_quant_conv.to(latents.dtype)
             self.vae.decoder.conv_in.to(latents.dtype)
             self.vae.decoder.mid_block.to(latents.dtype)

diffusers/pipelines/stable_diffusion_xl/__init__.py CHANGED Viewed

@@ -8,7 +8,6 @@ from ...utils import BaseOutput, is_invisible_watermark_available, is_torch_avai
 @dataclass
-# Copied from diffusers.pipelines.stable_diffusion.__init__.StableDiffusionPipelineOutput with StableDiffusion->StableDiffusionXL
 class StableDiffusionXLPipelineOutput(BaseOutput):
     """
     Output class for Stable Diffusion pipelines.
@@ -17,13 +16,9 @@ class StableDiffusionXLPipelineOutput(BaseOutput):
         images (`List[PIL.Image.Image]` or `np.ndarray`)
             List of denoised PIL images of length `batch_size` or numpy array of shape `(batch_size, height, width,
             num_channels)`. PIL images or numpy array present the denoised images of the diffusion pipeline.
-        nsfw_content_detected (`List[bool]`)
-            List of flags denoting whether the corresponding generated image likely represents "not-safe-for-work"
-            (nsfw) content, or `None` if safety checking could not be performed.
     """
     images: Union[List[PIL.Image.Image], np.ndarray]
-    nsfw_content_detected: Optional[List[bool]]
 if is_transformers_available() and is_torch_available() and is_invisible_watermark_available():

diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py CHANGED Viewed

@@ -129,9 +129,11 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin):
         self.register_to_config(force_zeros_for_empty_prompt=force_zeros_for_empty_prompt)
         self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
         self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
+        self.default_sample_size = self.unet.config.sample_size
         self.watermark = StableDiffusionXLWatermarker()
+    # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing
     def enable_vae_slicing(self):
         r"""
         Enable sliced VAE decoding.
@@ -141,6 +143,7 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin):
         """
         self.vae.enable_slicing()
+    # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing
     def disable_vae_slicing(self):
         r"""
         Disable sliced VAE decoding. If `enable_vae_slicing` was previously invoked, this method will go back to
@@ -148,6 +151,7 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin):
         """
         self.vae.disable_slicing()
+    # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling
     def enable_vae_tiling(self):
         r"""
         Enable tiled VAE decoding.
@@ -157,6 +161,7 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin):
         """
         self.vae.enable_tiling()
+    # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling
     def disable_vae_tiling(self):
         r"""
         Disable tiled VAE decoding. If `enable_vae_tiling` was previously invoked, this method will go back to
@@ -183,7 +188,7 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin):
             self.to("cpu", silence_dtype_warnings=True)
             torch.cuda.empty_cache()  # otherwise we don't see the memory savings (but they probably exist)
-        for cpu_offloaded_model in [self.unet, self.text_encoder, self.vae]:
+        for cpu_offloaded_model in [self.unet, self.text_encoder, self.text_encoder_2, self.vae]:
             cpu_offload(cpu_offloaded_model, device)
     def enable_model_cpu_offload(self, gpu_id=0):
@@ -217,6 +222,7 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin):
         self.final_offload_hook = hook
     @property
+    # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._execution_device
     def _execution_device(self):
         r"""
         Returns the device on which the pipeline's models will be executed. After calling
@@ -237,12 +243,14 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin):
     def encode_prompt(
         self,
         prompt,
-        device,
-        num_images_per_prompt,
-        do_classifier_free_guidance,
+        device: Optional[torch.device] = None,
+        num_images_per_prompt: int = 1,
+        do_classifier_free_guidance: bool = True,
         negative_prompt=None,
         prompt_embeds: Optional[torch.FloatTensor] = None,
         negative_prompt_embeds: Optional[torch.FloatTensor] = None,
+        pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
+        negative_pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
         lora_scale: Optional[float] = None,
     ):
         r"""
@@ -268,9 +276,18 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin):
                 Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
                 weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
                 argument.
+            pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
+                Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting.
+                If not provided, pooled text embeddings will be generated from `prompt` input argument.
+            negative_pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
+                Pre-generated negative pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
+                weighting. If not provided, pooled negative_prompt_embeds will be generated from `negative_prompt`
+                input argument.
             lora_scale (`float`, *optional*):
                 A lora scale that will be applied to all LoRA layers of the text encoder if LoRA layers are loaded.
         """
+        device = device or self._execution_device
         # set lora scale so that monkey patched LoRA
         # function of text encoder can correctly access it
         if lora_scale is not None and isinstance(self, LoraLoaderMixin):
@@ -399,6 +416,7 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin):
             negative_prompt_embeds = torch.concat(negative_prompt_embeds_list, dim=-1)
+        bs_embed = pooled_prompt_embeds.shape[0]
         pooled_prompt_embeds = pooled_prompt_embeds.repeat(1, num_images_per_prompt).view(
             bs_embed * num_images_per_prompt, -1
         )
@@ -408,20 +426,7 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin):
         return prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds
-    def run_safety_checker(self, image, device, dtype):
-        if self.safety_checker is None:
-            has_nsfw_concept = None
-        else:
-            if torch.is_tensor(image):
-                feature_extractor_input = self.image_processor.postprocess(image, output_type="pil")
-            else:
-                feature_extractor_input = self.image_processor.numpy_to_pil(image)
-            safety_checker_input = self.feature_extractor(feature_extractor_input, return_tensors="pt").to(device)
-            image, has_nsfw_concept = self.safety_checker(
-                images=image, clip_input=safety_checker_input.pixel_values.to(dtype)
-            )
-        return image, has_nsfw_concept
+    # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_extra_step_kwargs
     def prepare_extra_step_kwargs(self, generator, eta):
         # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
         # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
@@ -448,6 +453,8 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin):
         negative_prompt=None,
         prompt_embeds=None,
         negative_prompt_embeds=None,
+        pooled_prompt_embeds=None,
+        negative_pooled_prompt_embeds=None,
     ):
         if height % 8 != 0 or width % 8 != 0:
             raise ValueError(f"`height` and `width` have to be divisible by 8 but are {height} and {width}.")
@@ -486,6 +493,17 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin):
                     f" {negative_prompt_embeds.shape}."
                 )
+        if prompt_embeds is not None and pooled_prompt_embeds is None:
+            raise ValueError(
+                "If `prompt_embeds` are provided, `pooled_prompt_embeds` also have to be passed. Make sure to generate `pooled_prompt_embeds` from the same text encoder that was used to generate `prompt_embeds`."
+            )
+        if negative_prompt_embeds is not None and negative_pooled_prompt_embeds is None:
+            raise ValueError(
+                "If `negative_prompt_embeds` are provided, `negative_pooled_prompt_embeds` also have to be passed. Make sure to generate `negative_pooled_prompt_embeds` from the same text encoder that was used to generate `negative_prompt_embeds`."
+            )
+    # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_latents
     def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype, device, generator, latents=None):
         shape = (batch_size, num_channels_latents, height // self.vae_scale_factor, width // self.vae_scale_factor)
         if isinstance(generator, list) and len(generator) != batch_size:
@@ -535,6 +553,8 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin):
         latents: Optional[torch.FloatTensor] = None,
         prompt_embeds: Optional[torch.FloatTensor] = None,
         negative_prompt_embeds: Optional[torch.FloatTensor] = None,
+        pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
+        negative_pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
         output_type: Optional[str] = "pil",
         return_dict: bool = True,
         callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
@@ -588,6 +608,13 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin):
                 Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
                 weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
                 argument.
+            pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
+                Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting.
+                If not provided, pooled text embeddings will be generated from `prompt` input argument.
+            negative_pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
+                Pre-generated negative pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
+                weighting. If not provided, pooled negative_prompt_embeds will be generated from `negative_prompt`
+                input argument.
             output_type (`str`, *optional*, defaults to `"pil"`):
                 The output format of the generate image. Choose between
                 [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`.
@@ -626,15 +653,23 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin):
             "not-safe-for-work" (nsfw) content, according to the `safety_checker`.
         """
         # 0. Default height and width to unet
-        height = height or self.unet.config.sample_size * self.vae_scale_factor
-        width = width or self.unet.config.sample_size * self.vae_scale_factor
+        height = height or self.default_sample_size * self.vae_scale_factor
+        width = width or self.default_sample_size * self.vae_scale_factor
         original_size = original_size or (height, width)
         target_size = target_size or (height, width)
         # 1. Check inputs. Raise error if not correct
         self.check_inputs(
-            prompt, height, width, callback_steps, negative_prompt, prompt_embeds, negative_prompt_embeds
+            prompt,
+            height,
+            width,
+            callback_steps,
+            negative_prompt,
+            prompt_embeds,
+            negative_prompt_embeds,
+            pooled_prompt_embeds,
+            negative_pooled_prompt_embeds,
         )
         # 2. Define call parameters
@@ -669,6 +704,8 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin):
             negative_prompt,
             prompt_embeds=prompt_embeds,
             negative_prompt_embeds=negative_prompt_embeds,
+            pooled_prompt_embeds=pooled_prompt_embeds,
+            negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
             lora_scale=text_encoder_lora_scale,
         )
@@ -749,15 +786,18 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin):
         # make sure the VAE is in float32 mode, as it overflows in float16
         self.vae.to(dtype=torch.float32)
-        use_torch_2_0_or_xformers = self.vae.decoder.mid_block.attentions[0].processor in [
-            AttnProcessor2_0,
-            XFormersAttnProcessor,
-            LoRAXFormersAttnProcessor,
-            LoRAAttnProcessor2_0,
-        ]
+        use_torch_2_0_or_xformers = isinstance(
+            self.vae.decoder.mid_block.attentions[0].processor,
+            (
+                AttnProcessor2_0,
+                XFormersAttnProcessor,
+                LoRAXFormersAttnProcessor,
+                LoRAAttnProcessor2_0,
+            ),
+        )
         # if xformers or torch_2_0 is used attention block does not need
         # to be in float32 which can save lots of memory
-        if not use_torch_2_0_or_xformers:
+        if use_torch_2_0_or_xformers:
             self.vae.post_quant_conv.to(latents.dtype)
             self.vae.decoder.conv_in.to(latents.dtype)
             self.vae.decoder.mid_block.to(latents.dtype)
@@ -765,27 +805,19 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin):
             latents = latents.float()
         if not output_type == "latent":
-            # CHECK there is problem here (PVP)
             image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False)[0]
-            has_nsfw_concept = None
         else:
             image = latents
-            has_nsfw_concept = None
-            return StableDiffusionXLPipelineOutput(images=image, nsfw_content_detected=None)
-        if has_nsfw_concept is None:
-            do_denormalize = [True] * image.shape[0]
-        else:
-            do_denormalize = [not has_nsfw for has_nsfw in has_nsfw_concept]
+            return StableDiffusionXLPipelineOutput(images=image)
         image = self.watermark.apply_watermark(image)
-        image = self.image_processor.postprocess(image, output_type=output_type, do_denormalize=do_denormalize)
+        image = self.image_processor.postprocess(image, output_type=output_type)
         # Offload last model to CPU
         if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None:
             self.final_offload_hook.offload()
         if not return_dict:
-            return (image, has_nsfw_concept)
+            return (image,)
-        return StableDiffusionXLPipelineOutput(images=image, nsfw_content_detected=has_nsfw_concept)
+        return StableDiffusionXLPipelineOutput(images=image)

diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py CHANGED Viewed

@@ -140,6 +140,7 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
         self.watermark = StableDiffusionXLWatermarker()
+    # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing
     def enable_vae_slicing(self):
         r"""
         Enable sliced VAE decoding.
@@ -149,6 +150,7 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
         """
         self.vae.enable_slicing()
+    # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing
     def disable_vae_slicing(self):
         r"""
         Disable sliced VAE decoding. If `enable_vae_slicing` was previously invoked, this method will go back to
@@ -156,6 +158,7 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
         """
         self.vae.disable_slicing()
+    # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling
     def enable_vae_tiling(self):
         r"""
         Enable tiled VAE decoding.
@@ -165,6 +168,7 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
         """
         self.vae.enable_tiling()
+    # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling
     def disable_vae_tiling(self):
         r"""
         Disable tiled VAE decoding. If `enable_vae_tiling` was previously invoked, this method will go back to
@@ -172,6 +176,7 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
         """
         self.vae.disable_tiling()
+    # Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.enable_sequential_cpu_offload
     def enable_sequential_cpu_offload(self, gpu_id=0):
         r"""
         Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet,
@@ -191,9 +196,10 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
             self.to("cpu", silence_dtype_warnings=True)
             torch.cuda.empty_cache()  # otherwise we don't see the memory savings (but they probably exist)
-        for cpu_offloaded_model in [self.unet, self.text_encoder, self.vae]:
+        for cpu_offloaded_model in [self.unet, self.text_encoder, self.text_encoder_2, self.vae]:
             cpu_offload(cpu_offloaded_model, device)
+    # Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.enable_model_cpu_offload
     def enable_model_cpu_offload(self, gpu_id=0):
         r"""
         Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared
@@ -225,6 +231,7 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
         self.final_offload_hook = hook
     @property
+    # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._execution_device
     def _execution_device(self):
         r"""
         Returns the device on which the pipeline's models will be executed. After calling
@@ -242,15 +249,18 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
                 return torch.device(module._hf_hook.execution_device)
         return self.device
+    # Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.encode_prompt
     def encode_prompt(
         self,
         prompt,
-        device,
-        num_images_per_prompt,
-        do_classifier_free_guidance,
+        device: Optional[torch.device] = None,
+        num_images_per_prompt: int = 1,
+        do_classifier_free_guidance: bool = True,
         negative_prompt=None,
         prompt_embeds: Optional[torch.FloatTensor] = None,
         negative_prompt_embeds: Optional[torch.FloatTensor] = None,
+        pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
+        negative_pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
         lora_scale: Optional[float] = None,
     ):
         r"""
@@ -276,9 +286,18 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
                 Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
                 weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
                 argument.
+            pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
+                Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting.
+                If not provided, pooled text embeddings will be generated from `prompt` input argument.
+            negative_pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
+                Pre-generated negative pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
+                weighting. If not provided, pooled negative_prompt_embeds will be generated from `negative_prompt`
+                input argument.
             lora_scale (`float`, *optional*):
                 A lora scale that will be applied to all LoRA layers of the text encoder if LoRA layers are loaded.
         """
+        device = device or self._execution_device
         # set lora scale so that monkey patched LoRA
         # function of text encoder can correctly access it
         if lora_scale is not None and isinstance(self, LoraLoaderMixin):
@@ -327,13 +346,11 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
                     text_input_ids.to(device),
                     output_hidden_states=True,
                 )
                 # We are only ALWAYS interested in the pooled output of the final text encoder
                 pooled_prompt_embeds = prompt_embeds[0]
                 prompt_embeds = prompt_embeds.hidden_states[-2]
-                prompt_embeds = prompt_embeds
                 bs_embed, seq_len, _ = prompt_embeds.shape
                 # duplicate text embeddings for each generation per prompt, using mps friendly method
                 prompt_embeds = prompt_embeds.repeat(1, num_images_per_prompt, 1)
@@ -349,10 +366,9 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
             negative_prompt_embeds = torch.zeros_like(prompt_embeds)
             negative_pooled_prompt_embeds = torch.zeros_like(pooled_prompt_embeds)
         elif do_classifier_free_guidance and negative_prompt_embeds is None:
+            negative_prompt = negative_prompt or ""
             uncond_tokens: List[str]
-            if negative_prompt is None:
-                uncond_tokens = [""] * batch_size
-            elif prompt is not None and type(prompt) is not type(negative_prompt):
+            if prompt is not None and type(prompt) is not type(negative_prompt):
                 raise TypeError(
                     f"`negative_prompt` should be the same type to `prompt`, but got {type(negative_prompt)} !="
                     f" {type(prompt)}."
@@ -389,7 +405,6 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
                 )
                 # We are only ALWAYS interested in the pooled output of the final text encoder
                 negative_pooled_prompt_embeds = negative_prompt_embeds[0]
                 negative_prompt_embeds = negative_prompt_embeds.hidden_states[-2]
                 if do_classifier_free_guidance:
@@ -411,6 +426,7 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
             negative_prompt_embeds = torch.concat(negative_prompt_embeds_list, dim=-1)
+        bs_embed = pooled_prompt_embeds.shape[0]
         pooled_prompt_embeds = pooled_prompt_embeds.repeat(1, num_images_per_prompt).view(
             bs_embed * num_images_per_prompt, -1
         )
@@ -420,20 +436,7 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
         return prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds
-    def run_safety_checker(self, image, device, dtype):
-        if self.safety_checker is None:
-            has_nsfw_concept = None
-        else:
-            if torch.is_tensor(image):
-                feature_extractor_input = self.image_processor.postprocess(image, output_type="pil")
-            else:
-                feature_extractor_input = self.image_processor.numpy_to_pil(image)
-            safety_checker_input = self.feature_extractor(feature_extractor_input, return_tensors="pt").to(device)
-            image, has_nsfw_concept = self.safety_checker(
-                images=image, clip_input=safety_checker_input.pixel_values.to(dtype)
-            )
-        return image, has_nsfw_concept
+    # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_extra_step_kwargs
     def prepare_extra_step_kwargs(self, generator, eta):
         # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
         # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
@@ -624,6 +627,8 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
         latents: Optional[torch.FloatTensor] = None,
         prompt_embeds: Optional[torch.FloatTensor] = None,
         negative_prompt_embeds: Optional[torch.FloatTensor] = None,
+        pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
+        negative_pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
         output_type: Optional[str] = "pil",
         return_dict: bool = True,
         callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
@@ -683,6 +688,13 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
                 Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
                 weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
                 argument.
+            pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
+                Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting.
+                If not provided, pooled text embeddings will be generated from `prompt` input argument.
+            negative_pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
+                Pre-generated negative pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
+                weighting. If not provided, pooled negative_prompt_embeds will be generated from `negative_prompt`
+                input argument.
             output_type (`str`, *optional*, defaults to `"pil"`):
                 The output format of the generate image. Choose between
                 [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`.
@@ -759,6 +771,8 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
             negative_prompt,
             prompt_embeds=prompt_embeds,
             negative_prompt_embeds=negative_prompt_embeds,
+            pooled_prompt_embeds=pooled_prompt_embeds,
+            negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
             lora_scale=text_encoder_lora_scale,
         )
@@ -845,15 +859,18 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
         # make sure the VAE is in float32 mode, as it overflows in float16
         self.vae.to(dtype=torch.float32)
-        use_torch_2_0_or_xformers = self.vae.decoder.mid_block.attentions[0].processor in [
-            AttnProcessor2_0,
-            XFormersAttnProcessor,
-            LoRAXFormersAttnProcessor,
-            LoRAAttnProcessor2_0,
-        ]
+        use_torch_2_0_or_xformers = isinstance(
+            self.vae.decoder.mid_block.attentions[0].processor,
+            (
+                AttnProcessor2_0,
+                XFormersAttnProcessor,
+                LoRAXFormersAttnProcessor,
+                LoRAAttnProcessor2_0,
+            ),
+        )
         # if xformers or torch_2_0 is used attention block does not need
         # to be in float32 which can save lots of memory
-        if not use_torch_2_0_or_xformers:
+        if use_torch_2_0_or_xformers:
             self.vae.post_quant_conv.to(latents.dtype)
             self.vae.decoder.conv_in.to(latents.dtype)
             self.vae.decoder.mid_block.to(latents.dtype)
@@ -862,24 +879,18 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
         if not output_type == "latent":
             image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False)[0]
-            has_nsfw_concept = None
         else:
             image = latents
-            return StableDiffusionXLPipelineOutput(images=image, nsfw_content_detected=None)
-        if has_nsfw_concept is None:
-            do_denormalize = [True] * image.shape[0]
-        else:
-            do_denormalize = [not has_nsfw for has_nsfw in has_nsfw_concept]
+            return StableDiffusionXLPipelineOutput(images=image)
         image = self.watermark.apply_watermark(image)
-        image = self.image_processor.postprocess(image, output_type=output_type, do_denormalize=do_denormalize)
+        image = self.image_processor.postprocess(image, output_type=output_type)
         # Offload last model to CPU
         if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None:
             self.final_offload_hook.offload()
         if not return_dict:
-            return (image, has_nsfw_concept)
+            return (image,)
-        return StableDiffusionXLPipelineOutput(images=image, nsfw_content_detected=has_nsfw_concept)
+        return StableDiffusionXLPipelineOutput(images=image)

{diffusers-0.18.0.dist-info → diffusers-0.18.2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: diffusers
-Version: 0.18.0
+Version: 0.18.2
 Summary: Diffusers
 Home-page: https://github.com/huggingface/diffusers
 Author: The HuggingFace team

{diffusers-0.18.0.dist-info → diffusers-0.18.2.dist-info}/RECORD RENAMED Viewed

@@ -1,9 +1,9 @@
-diffusers/__init__.py,sha256=Wcm-mUVXAJwULWpuWcyFbdceTZdVb6gqW0NG8YGD30A,9329
-diffusers/configuration_utils.py,sha256=xBuxUFnruv-0Y9obZvbvM3-0l9MRel1J--8V46WTB98,30357
+diffusers/__init__.py,sha256=Vtoe0ie8nREHRwBNNwzbyQ2rwqLTcB4399y6DBFTOok,9329
+diffusers/configuration_utils.py,sha256=--Nwf_FViQXq71M8PcgUUjT_YoLV1WYqV49Fnk-amkk,30369
 diffusers/dependency_versions_check.py,sha256=T2AQMFfOGMCULAqRAE8zf1VE5j7GFxxs7SfEuhTY4lA,1756
 diffusers/dependency_versions_table.py,sha256=TnzJqBXnJYMXeMw61Lgq_QlTkjWydwOKDIKbV0RXG6Q,1446
 diffusers/image_processor.py,sha256=VqdToqZY-Xdb0sqibwVn1A9gdGOU3OvgQpr67mnMWGg,13700
-diffusers/loaders.py,sha256=sbotPO3y1mRXF4byG9DMwmqWeD_wWNYBIycY1qgUCuI,75164
+diffusers/loaders.py,sha256=9trJ4QdgKOmfqguAKHq73fu5VDjw13krtgyJq7AnpQw,75161
 diffusers/optimization.py,sha256=KZpFO98pzgt1l-etti_7k5c-EK9WEY3-XossN6VEGrs,14546
 diffusers/pipeline_utils.py,sha256=dJVuXQ_ZBHkW64dwPbIPM51QnqQKIp9-WSIhRQYlJg4,1147
 diffusers/training_utils.py,sha256=TEuw7ro2RT35ujfMW2DKzb1KZpF4-HfuKSZ1NNnIIvI,13195
@@ -16,7 +16,7 @@ diffusers/experimental/rl/value_guided_sampling.py,sha256=iIhf1gc2QP7Jx4HrsoOyRC
 diffusers/models/__init__.py,sha256=MDG83d8C1YGGSnGNwi9sG6c33_FEaMGS3BVGnaqWJqQ,1446
 diffusers/models/activations.py,sha256=cWe7qw4wR626ADw-abcV3lI1v5Vim_R_eNMc5jPlaLo,297
 diffusers/models/attention.py,sha256=Nfmze9IvGR5a6ir9o0Z4DbAQ8repJxBo2_t4fDsnvHw,15197
-diffusers/models/attention_flax.py,sha256=IHc1OfaIfmlJ2xYHdZ2UGixO9m08ThBN-6C7fv1XEb0,17680
+diffusers/models/attention_flax.py,sha256=6IOINRK5flDgnzsLiSLIfhBnDtdY9LyhcDIUXVS_Gag,18142
 diffusers/models/attention_processor.py,sha256=04g9405fWhb-C0xO9cnn-LfAMcSwxZ9fOzYrX98aa6A,70119
 diffusers/models/autoencoder_kl.py,sha256=qM2oRqJROHvA3PSwMDmNISQzK3oFmgJiRRzvHZw9dHQ,17913
 diffusers/models/controlnet.py,sha256=OzCVtpmlJXTfIze3Bmc6p7lGFirxvlI-MroHL7HQ5mQ,33086
@@ -28,7 +28,7 @@ diffusers/models/embeddings_flax.py,sha256=87ysODCdTERpYfH-EDhElOUyCAu8z6-xIQCqL
 diffusers/models/modeling_flax_pytorch_utils.py,sha256=yFQHU86DdvrzFLfkTbyZZ0_PWKrjnp08s46dD-wf_tw,4601
 diffusers/models/modeling_flax_utils.py,sha256=0ailGzoCLU5-81rn048e2UJEr0S1lHGBQGqpOJzWfWQ,26071
 diffusers/models/modeling_pytorch_flax_utils.py,sha256=5dt6mC956MYrIMp8Owvx8QQv8xsfik6vu0frgb_c6HE,6974
-diffusers/models/modeling_utils.py,sha256=_tRAf4PGPdH1gqHoYlPEiNoTuSpj9RejTxpue8BBvIA,46589
+diffusers/models/modeling_utils.py,sha256=vHf-AWIwuTvyjtOCbTryupWmQLxiujNhBfVL0hmop_k,46588
 diffusers/models/prior_transformer.py,sha256=5A8Tgq4VXkjH0ib05kPHXPObekLYdrRwuCgnGvoMVN4,16574
 diffusers/models/resnet.py,sha256=y9FIuXYUTHYA3AFUeDBwiHJVu0crM0fMRnzEJ3ZtVf4,35294
 diffusers/models/resnet_flax.py,sha256=VKF-ti1jlH_GnlWRy9dY6ETc-W9ZitfQoNjmrFAQxuU,4021
@@ -50,7 +50,7 @@ diffusers/models/vq_model.py,sha256=_98GsNUGg3HxcC97zQSgxEPVuDNvn1DcJP6TCTpGLVE,
 diffusers/pipelines/__init__.py,sha256=pjJh4SXSHjSBtzzAsiuQp64YQ03xPMdgTzK-0-iV9Ew,7009
 diffusers/pipelines/onnx_utils.py,sha256=M-6GBVRFji_ik5x1CMxrz9r5oEBr9TTblqLsI1HfiS4,8282
 diffusers/pipelines/pipeline_flax_utils.py,sha256=CLjAhcwfBJ1xTbdRbyWHGdcd5uRJDoXDdxruuK2t2iM,25924
-diffusers/pipelines/pipeline_utils.py,sha256=NTjp1RgH4aSFNEgSslMQGDvE5Ij-XCy_eImcwiMBT-w,71753
+diffusers/pipelines/pipeline_utils.py,sha256=2P6oTVvZcs33-LoWUQosYkdsl1bEKE3MfnQdhkjubRw,72464
 diffusers/pipelines/alt_diffusion/__init__.py,sha256=rCOBtGQ7xi3DahUXY8r5ICt_t6S0ogp4uDJL9q4avso,1346
 diffusers/pipelines/alt_diffusion/modeling_roberta_series.py,sha256=_UC4IxHAg2QAFtw4yCvo2eLIDBRmg2JvvtOr6k5PFC8,5580
 diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion.py,sha256=YgUvsnah_cIXNwzJgxE87fftWox7leXOY8lzZeph7c8,40641
@@ -127,7 +127,7 @@ diffusers/pipelines/spectrogram_diffusion/midi_utils.py,sha256=HmOSMSaKZlloW8J6m
 diffusers/pipelines/spectrogram_diffusion/notes_encoder.py,sha256=Yq3W0lkAMGhx5pGklTvomBHjqR1nAVALBcYlzZBSQ90,2921
 diffusers/pipelines/spectrogram_diffusion/pipeline_spectrogram_diffusion.py,sha256=GAHovdMWMhmGgS02kFOaS7_Lq9AJmTxrBZC0VElHwBQ,8657
 diffusers/pipelines/stable_diffusion/__init__.py,sha256=nBYUiO6TbCsqNfImNCPi1aE-Q35Lc5r9B7qWb9TDjcM,6164
-diffusers/pipelines/stable_diffusion/convert_from_ckpt.py,sha256=Y4J394whXeBduWXJlcp04E71rBzD9PcCDPYd3ZETlrw,67209
+diffusers/pipelines/stable_diffusion/convert_from_ckpt.py,sha256=wYHT2MGLa6LFcmlvtxgDCQ5tqZCqejur2hN-0YL0GsA,69501
 diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py,sha256=sA76ZiTUVOTiCMDss7z3nouqg8czJwBmhX7OPuYheWk,43554
 diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py,sha256=Pbprq5sXlbS6JPP44eOzzm0FrwsccrHoaXuFWY_Kx38,20922
 diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_controlnet.py,sha256=rjxVeyAmMTQiTiQF7Q9y3BYU45jCImKZGx37ir8zpM8,1257
@@ -145,7 +145,7 @@ diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py,sha2
 diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_diffedit.py,sha256=-k3M22p4KXDkUJAnxj4xU9VM_QBBqs_pV0XHlfYzsKk,80921
 diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py,sha256=xOvGZvBBvB1Ee_9B46bKpRK5C9SuxbNZhrQ44nvIYsQ,23137
 diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py,sha256=XPNN7lINZoezjS2ciifKfIvRXotUbB_dyByaAPOE3Vs,42149
-diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py,sha256=oWUXc2LgfjNFBroXqhu1JaK6FlXI84kKaxg2OtXdi7U,55092
+diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py,sha256=0Q9m9yJ5rED3U1vyjmrHVft9jIqB35KkfDerc_VYJNg,55140
 diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py,sha256=gzklmzg25O3y48wNubrl_jzKwzZlgiperjyV6OqdvxU,42148
 diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py,sha256=g8yEk161s1CO8NDVm6ZTMdONtilXDIKQWI6dMnRk6Bs,40919
 diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_k_diffusion.py,sha256=UM-T14SgwlIk9is3QDQx7IsTWU-_ZsD1iA4rCO9-3Rk,31679
@@ -156,7 +156,7 @@ diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_panorama.py,sha25
 diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_paradigms.py,sha256=cnwmjjTQA5lMd-pK-MhnwLF8bK-pwZu0S3xiHLO2WFk,43302
 diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_pix2pix_zero.py,sha256=_EtteuRKzWruht97EpQO7zrqoyhGyZUUgDFuw04mW5M,62705
 diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_sag.py,sha256=xiBT7l8nha5HLg2MEeOYozUwmNIUZzbW1mjgt6b7JU8,40491
-diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py,sha256=4__1bReiM-LDL1JUEZ4wUb2Pk02W9r1ZPkvj3FQ-A6U,38548
+diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py,sha256=TOJge0fcAt10pg-hJdjhQdMi53pBzBrcx7x6vIGMZVc,38610
 diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py,sha256=ef5W_IynHqocHCWJcX32MUxRPvShH3spsv0RA_lBlTk,46256
 diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py,sha256=YSwcDOwL1sE6ItdYm1ZuYb3uZVAf-DzuhtndV_Auqzw,40305
 diffusers/pipelines/stable_diffusion/safety_checker.py,sha256=zLs3meGi6JiRYlHntPiBEaU9_JjYcZnzrPa5picFiG4,5734
@@ -165,9 +165,9 @@ diffusers/pipelines/stable_diffusion/stable_unclip_image_normalizer.py,sha256=RE
 diffusers/pipelines/stable_diffusion_safe/__init__.py,sha256=FAuvPLSYCLDzJ1d2GntTwQXpxgABEaoLrj5LdQOtxpA,2502
 diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py,sha256=yZMsvIdDAhF7maJOpc9UWSIUbWpIghOypzgo4-vAI0A,37886
 diffusers/pipelines/stable_diffusion_safe/safety_checker.py,sha256=lEXvS-_WCcVpje14hoajJG2Z4jlWs0UsID3IqWTnOys,5049
-diffusers/pipelines/stable_diffusion_xl/__init__.py,sha256=ORE7mQPSp_8k1V6Lzc85dJOQCJ5BOTFwzxOMvf7XRS8,1372
-diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py,sha256=Xnr-X09UTvzcz6d6kTxJZ3pyrCVEnH78b6x6H70JOh8,38907
-diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py,sha256=IGHWSxR6QrjG702DPFegBT0ZjjOTC5qh53vSPSHyTso,44219
+diffusers/pipelines/stable_diffusion_xl/__init__.py,sha256=-RFjtUQxnCEPSF0Foq90HWIjyHblHOH0eHoNg1dqj68,953
+diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py,sha256=MTG8Ym65rS3vvALVHWkHHP5cEKMcjpSnYHZuh_lwKcU,41570
+diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py,sha256=USKMxXkFUeDJQr77Ecw1QlJieiet37KdQB53wWqpCc4,46168
 diffusers/pipelines/stable_diffusion_xl/watermark.py,sha256=22Pg7TXApd4oRBvyJDh5B5L6--Zj7hKaYj8dHSTsGzQ,1142
 diffusers/pipelines/stochastic_karras_ve/__init__.py,sha256=StxEhuNuCeEY3qv3ZIcBfXsaxDH3JmWeuHx1xCHnYRI,60
 diffusers/pipelines/stochastic_karras_ve/pipeline_stochastic_karras_ve.py,sha256=zZn4jZ8iHJSsoMvStG3l4WvQ6wAtKjK0LDjLRQA3PLU,5669
@@ -254,9 +254,9 @@ diffusers/utils/outputs.py,sha256=l5RdKO6SRnnz7fsXsmmnkOyCf_0z35kwfkDbnhCFeAc,36
 diffusers/utils/pil_utils.py,sha256=F7M3QWYQyRcLNsS8876wgKqOnhzg8hNTPHQy6Q-jYj0,1423
 diffusers/utils/testing_utils.py,sha256=TiKwlhR4SvEIIkAOrF11qYNg27p_tVp0ifJgEW2mNAk,21197
 diffusers/utils/torch_utils.py,sha256=4gRMtlH81IrbYh_pfR0ZkDNbuxmVX03fmR6xrDTZIP0,3378
-diffusers-0.18.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-diffusers-0.18.0.dist-info/METADATA,sha256=R-HlsAiR3JVN5Q8fP_WphGIxMb69nAaiYq7jycXXAvA,17540
-diffusers-0.18.0.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
-diffusers-0.18.0.dist-info/entry_points.txt,sha256=VULXr1th-UU5J0Ou_l0If6E4CY4HSSiMElweZ58u9H0,73
-diffusers-0.18.0.dist-info/top_level.txt,sha256=axJl2884vMSvhzrFrSoht36QXA_6gZN9cKtg4xOO72o,10
-diffusers-0.18.0.dist-info/RECORD,,
+diffusers-0.18.2.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+diffusers-0.18.2.dist-info/METADATA,sha256=iO5QVnb_Ri2SVl5YaXfVn7P6JsyQJ2PiTcQ5aNNubxY,17540
+diffusers-0.18.2.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
+diffusers-0.18.2.dist-info/entry_points.txt,sha256=VULXr1th-UU5J0Ou_l0If6E4CY4HSSiMElweZ58u9H0,73
+diffusers-0.18.2.dist-info/top_level.txt,sha256=axJl2884vMSvhzrFrSoht36QXA_6gZN9cKtg4xOO72o,10
+diffusers-0.18.2.dist-info/RECORD,,

{diffusers-0.18.0.dist-info → diffusers-0.18.2.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: bdist_wheel (0.38.4)
+Generator: bdist_wheel (0.40.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

{diffusers-0.18.0.dist-info → diffusers-0.18.2.dist-info}/LICENSE RENAMED Viewed

File without changes

{diffusers-0.18.0.dist-info → diffusers-0.18.2.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{diffusers-0.18.0.dist-info → diffusers-0.18.2.dist-info}/top_level.txt RENAMED Viewed

File without changes

diffusers 0.18.0__py3-none-any.whl → 0.18.2__py3-none-any.whl

diffusers 0.18.0py3-none-any.whl → 0.18.2py3-none-any.whl