diffusers 0.18.0__py3-none-any.whl → 0.18.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
diffusers/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
- __version__ = "0.18.0"
1
+ __version__ = "0.18.2"
2
2
 
3
3
  from .configuration_utils import ConfigMixin
4
4
  from .utils import (
@@ -607,7 +607,7 @@ def register_to_config(init):
607
607
 
608
608
  # Take note of the parameters that were not present in the loaded config
609
609
  if len(set(new_kwargs.keys()) - set(init_kwargs)) > 0:
610
- new_kwargs["_use_default_values"] = set(new_kwargs.keys()) - set(init_kwargs)
610
+ new_kwargs["_use_default_values"] = list(set(new_kwargs.keys()) - set(init_kwargs))
611
611
 
612
612
  new_kwargs = {**config_init_kwargs, **new_kwargs}
613
613
  getattr(self, "register_to_config")(**new_kwargs)
@@ -655,7 +655,7 @@ def flax_register_to_config(cls):
655
655
 
656
656
  # Take note of the parameters that were not present in the loaded config
657
657
  if len(set(new_kwargs.keys()) - set(init_kwargs)) > 0:
658
- new_kwargs["_use_default_values"] = set(new_kwargs.keys()) - set(init_kwargs)
658
+ new_kwargs["_use_default_values"] = list(set(new_kwargs.keys()) - set(init_kwargs))
659
659
 
660
660
  getattr(self, "register_to_config")(**new_kwargs)
661
661
  original_init(self, *args, **kwargs)
diffusers/loaders.py CHANGED
@@ -177,7 +177,7 @@ class UNet2DConditionLoadersMixin:
177
177
 
178
178
  if use_safetensors and not is_safetensors_available():
179
179
  raise ValueError(
180
- "`use_safetensors`=True but safetensors is not installed. Please install safetensors with `pip install safetenstors"
180
+ "`use_safetensors`=True but safetensors is not installed. Please install safetensors with `pip install safetensors"
181
181
  )
182
182
 
183
183
  allow_pickle = False
@@ -589,7 +589,7 @@ class TextualInversionLoaderMixin:
589
589
 
590
590
  if use_safetensors and not is_safetensors_available():
591
591
  raise ValueError(
592
- "`use_safetensors`=True but safetensors is not installed. Please install safetensors with `pip install safetenstors"
592
+ "`use_safetensors`=True but safetensors is not installed. Please install safetensors with `pip install safetensors"
593
593
  )
594
594
 
595
595
  allow_pickle = False
@@ -806,7 +806,7 @@ class LoraLoaderMixin:
806
806
 
807
807
  if use_safetensors and not is_safetensors_available():
808
808
  raise ValueError(
809
- "`use_safetensors`=True but safetensors is not installed. Please install safetensors with `pip install safetenstors"
809
+ "`use_safetensors`=True but safetensors is not installed. Please install safetensors with `pip install safetensors"
810
810
  )
811
811
 
812
812
  allow_pickle = False
@@ -1054,7 +1054,7 @@ class LoraLoaderMixin:
1054
1054
 
1055
1055
  if use_safetensors and not is_safetensors_available():
1056
1056
  raise ValueError(
1057
- "`use_safetensors`=True but safetensors is not installed. Please install safetensors with `pip install safetenstors"
1057
+ "`use_safetensors`=True but safetensors is not installed. Please install safetensors with `pip install safetensors"
1058
1058
  )
1059
1059
 
1060
1060
  allow_pickle = False
@@ -1394,7 +1394,7 @@ class FromSingleFileMixin:
1394
1394
  use_auth_token = kwargs.pop("use_auth_token", None)
1395
1395
  revision = kwargs.pop("revision", None)
1396
1396
  extract_ema = kwargs.pop("extract_ema", False)
1397
- image_size = kwargs.pop("image_size", 512)
1397
+ image_size = kwargs.pop("image_size", None)
1398
1398
  scheduler_type = kwargs.pop("scheduler_type", "pndm")
1399
1399
  num_in_channels = kwargs.pop("num_in_channels", None)
1400
1400
  upcast_attention = kwargs.pop("upcast_attention", None)
@@ -152,6 +152,7 @@ class FlaxAttention(nn.Module):
152
152
  self.value = nn.Dense(inner_dim, use_bias=False, dtype=self.dtype, name="to_v")
153
153
 
154
154
  self.proj_attn = nn.Dense(self.query_dim, dtype=self.dtype, name="to_out_0")
155
+ self.dropout_layer = nn.Dropout(rate=self.dropout)
155
156
 
156
157
  def reshape_heads_to_batch_dim(self, tensor):
157
158
  batch_size, seq_len, dim = tensor.shape
@@ -214,7 +215,7 @@ class FlaxAttention(nn.Module):
214
215
 
215
216
  hidden_states = self.reshape_batch_dim_to_heads(hidden_states)
216
217
  hidden_states = self.proj_attn(hidden_states)
217
- return hidden_states
218
+ return self.dropout_layer(hidden_states, deterministic=deterministic)
218
219
 
219
220
 
220
221
  class FlaxBasicTransformerBlock(nn.Module):
@@ -260,6 +261,7 @@ class FlaxBasicTransformerBlock(nn.Module):
260
261
  self.norm1 = nn.LayerNorm(epsilon=1e-5, dtype=self.dtype)
261
262
  self.norm2 = nn.LayerNorm(epsilon=1e-5, dtype=self.dtype)
262
263
  self.norm3 = nn.LayerNorm(epsilon=1e-5, dtype=self.dtype)
264
+ self.dropout_layer = nn.Dropout(rate=self.dropout)
263
265
 
264
266
  def __call__(self, hidden_states, context, deterministic=True):
265
267
  # self attention
@@ -280,7 +282,7 @@ class FlaxBasicTransformerBlock(nn.Module):
280
282
  hidden_states = self.ff(self.norm3(hidden_states), deterministic=deterministic)
281
283
  hidden_states = hidden_states + residual
282
284
 
283
- return hidden_states
285
+ return self.dropout_layer(hidden_states, deterministic=deterministic)
284
286
 
285
287
 
286
288
  class FlaxTransformer2DModel(nn.Module):
@@ -356,6 +358,8 @@ class FlaxTransformer2DModel(nn.Module):
356
358
  dtype=self.dtype,
357
359
  )
358
360
 
361
+ self.dropout_layer = nn.Dropout(rate=self.dropout)
362
+
359
363
  def __call__(self, hidden_states, context, deterministic=True):
360
364
  batch, height, width, channels = hidden_states.shape
361
365
  residual = hidden_states
@@ -378,7 +382,7 @@ class FlaxTransformer2DModel(nn.Module):
378
382
  hidden_states = self.proj_out(hidden_states)
379
383
 
380
384
  hidden_states = hidden_states + residual
381
- return hidden_states
385
+ return self.dropout_layer(hidden_states, deterministic=deterministic)
382
386
 
383
387
 
384
388
  class FlaxFeedForward(nn.Module):
@@ -409,7 +413,7 @@ class FlaxFeedForward(nn.Module):
409
413
  self.net_2 = nn.Dense(self.dim, dtype=self.dtype)
410
414
 
411
415
  def __call__(self, hidden_states, deterministic=True):
412
- hidden_states = self.net_0(hidden_states)
416
+ hidden_states = self.net_0(hidden_states, deterministic=deterministic)
413
417
  hidden_states = self.net_2(hidden_states)
414
418
  return hidden_states
415
419
 
@@ -434,8 +438,9 @@ class FlaxGEGLU(nn.Module):
434
438
  def setup(self):
435
439
  inner_dim = self.dim * 4
436
440
  self.proj = nn.Dense(inner_dim * 2, dtype=self.dtype)
441
+ self.dropout_layer = nn.Dropout(rate=self.dropout)
437
442
 
438
443
  def __call__(self, hidden_states, deterministic=True):
439
444
  hidden_states = self.proj(hidden_states)
440
445
  hidden_linear, hidden_gelu = jnp.split(hidden_states, 2, axis=2)
441
- return hidden_linear * nn.gelu(hidden_gelu)
446
+ return self.dropout_layer(hidden_linear * nn.gelu(hidden_gelu), deterministic=deterministic)
@@ -456,7 +456,7 @@ class ModelMixin(torch.nn.Module):
456
456
 
457
457
  if use_safetensors and not is_safetensors_available():
458
458
  raise ValueError(
459
- "`use_safetensors`=True but safetensors is not installed. Please install safetensors with `pip install safetenstors"
459
+ "`use_safetensors`=True but safetensors is not installed. Please install safetensors with `pip install safetensors"
460
460
  )
461
461
 
462
462
  allow_pickle = False
@@ -204,7 +204,7 @@ def variant_compatible_siblings(filenames, variant=None) -> Union[List[os.PathLi
204
204
  transformers_index_format = r"\d{5}-of-\d{5}"
205
205
 
206
206
  if variant is not None:
207
- # `diffusion_pytorch_model.fp16.bin` as well as `model.fp16-00001-of-00002.safetenstors`
207
+ # `diffusion_pytorch_model.fp16.bin` as well as `model.fp16-00001-of-00002.safetensors`
208
208
  variant_file_re = re.compile(
209
209
  rf"({'|'.join(weight_prefixes)})\.({variant}|{variant}-{transformers_index_format})\.({'|'.join(weight_suffixs)})$"
210
210
  )
@@ -213,7 +213,7 @@ def variant_compatible_siblings(filenames, variant=None) -> Union[List[os.PathLi
213
213
  rf"({'|'.join(weight_prefixes)})\.({'|'.join(weight_suffixs)})\.index\.{variant}\.json$"
214
214
  )
215
215
 
216
- # `diffusion_pytorch_model.bin` as well as `model-00001-of-00002.safetenstors`
216
+ # `diffusion_pytorch_model.bin` as well as `model-00001-of-00002.safetensors`
217
217
  non_variant_file_re = re.compile(
218
218
  rf"({'|'.join(weight_prefixes)})(-{transformers_index_format})?\.({'|'.join(weight_suffixs)})$"
219
219
  )
@@ -1168,7 +1168,7 @@ class DiffusionPipeline(ConfigMixin):
1168
1168
 
1169
1169
  if use_safetensors and not is_safetensors_available():
1170
1170
  raise ValueError(
1171
- "`use_safetensors`=True but safetensors is not installed. Please install safetensors with `pip install safetenstors"
1171
+ "`use_safetensors`=True but safetensors is not installed. Please install safetensors with `pip install safetensors"
1172
1172
  )
1173
1173
 
1174
1174
  allow_pickle = False
@@ -1213,6 +1213,15 @@ class DiffusionPipeline(ConfigMixin):
1213
1213
  filenames = {sibling.rfilename for sibling in info.siblings}
1214
1214
  model_filenames, variant_filenames = variant_compatible_siblings(filenames, variant=variant)
1215
1215
 
1216
+ if len(variant_filenames) == 0 and variant is not None:
1217
+ deprecation_message = (
1218
+ f"You are trying to load the model files of the `variant={variant}`, but no such modeling files are available."
1219
+ f"The default model files: {model_filenames} will be loaded instead. Make sure to not load from `variant={variant}`"
1220
+ "if such variant modeling files are not available. Doing so will lead to an error in v0.22.0 as defaulting to non-variant"
1221
+ "modeling files is deprecated."
1222
+ )
1223
+ deprecate("no variant default", "0.22.0", deprecation_message, standard_warn=False)
1224
+
1216
1225
  # remove ignored filenames
1217
1226
  model_filenames = set(model_filenames) - set(ignore_filenames)
1218
1227
  variant_filenames = set(variant_filenames) - set(ignore_filenames)
@@ -1302,7 +1311,7 @@ class DiffusionPipeline(ConfigMixin):
1302
1311
  snapshot_folder = Path(config_file).parent
1303
1312
  pipeline_is_cached = all((snapshot_folder / f).is_file() for f in expected_files)
1304
1313
 
1305
- if pipeline_is_cached:
1314
+ if pipeline_is_cached and not force_download:
1306
1315
  # if the pipeline is cached, we can directly return it
1307
1316
  # else call snapshot_download
1308
1317
  return snapshot_folder
@@ -24,6 +24,7 @@ from transformers import (
24
24
  AutoFeatureExtractor,
25
25
  BertTokenizerFast,
26
26
  CLIPImageProcessor,
27
+ CLIPTextConfig,
27
28
  CLIPTextModel,
28
29
  CLIPTextModelWithProjection,
29
30
  CLIPTokenizer,
@@ -48,7 +49,7 @@ from ...schedulers import (
48
49
  PNDMScheduler,
49
50
  UnCLIPScheduler,
50
51
  )
51
- from ...utils import is_omegaconf_available, is_safetensors_available, logging
52
+ from ...utils import is_accelerate_available, is_omegaconf_available, is_safetensors_available, logging
52
53
  from ...utils.import_utils import BACKENDS_MAPPING
53
54
  from ..latent_diffusion.pipeline_latent_diffusion import LDMBertConfig, LDMBertModel
54
55
  from ..paint_by_example import PaintByExampleImageEncoder
@@ -57,6 +58,10 @@ from .safety_checker import StableDiffusionSafetyChecker
57
58
  from .stable_unclip_image_normalizer import StableUnCLIPImageNormalizer
58
59
 
59
60
 
61
+ if is_accelerate_available():
62
+ from accelerate import init_empty_weights
63
+ from accelerate.utils import set_module_tensor_to_device
64
+
60
65
  logger = logging.get_logger(__name__) # pylint: disable=invalid-name
61
66
 
62
67
 
@@ -391,8 +396,8 @@ def convert_ldm_unet_checkpoint(
391
396
 
392
397
  # at least a 100 parameters have to start with `model_ema` in order for the checkpoint to be EMA
393
398
  if sum(k.startswith("model_ema") for k in keys) > 100 and extract_ema:
394
- print(f"Checkpoint {path} has both EMA and non-EMA weights.")
395
- print(
399
+ logger.warning(f"Checkpoint {path} has both EMA and non-EMA weights.")
400
+ logger.warning(
396
401
  "In this conversion only the EMA weights are extracted. If you want to instead extract the non-EMA"
397
402
  " weights (useful to continue fine-tuning), please make sure to remove the `--extract_ema` flag."
398
403
  )
@@ -402,7 +407,7 @@ def convert_ldm_unet_checkpoint(
402
407
  unet_state_dict[key.replace(unet_key, "")] = checkpoint.pop(flat_ema_key)
403
408
  else:
404
409
  if sum(k.startswith("model_ema") for k in keys) > 100:
405
- print(
410
+ logger.warning(
406
411
  "In this conversion only the non-EMA weights are extracted. If you want to instead extract the EMA"
407
412
  " weights (usually better for inference), please make sure to add the `--extract_ema` flag."
408
413
  )
@@ -770,11 +775,12 @@ def convert_ldm_bert_checkpoint(checkpoint, config):
770
775
 
771
776
 
772
777
  def convert_ldm_clip_checkpoint(checkpoint, local_files_only=False, text_encoder=None):
773
- text_model = (
774
- CLIPTextModel.from_pretrained("openai/clip-vit-large-patch14", local_files_only=local_files_only)
775
- if text_encoder is None
776
- else text_encoder
777
- )
778
+ if text_encoder is None:
779
+ config_name = "openai/clip-vit-large-patch14"
780
+ config = CLIPTextConfig.from_pretrained(config_name)
781
+
782
+ with init_empty_weights():
783
+ text_model = CLIPTextModel(config)
778
784
 
779
785
  keys = list(checkpoint.keys())
780
786
 
@@ -787,7 +793,8 @@ def convert_ldm_clip_checkpoint(checkpoint, local_files_only=False, text_encoder
787
793
  if key.startswith(prefix):
788
794
  text_model_dict[key[len(prefix + ".") :]] = checkpoint[key]
789
795
 
790
- text_model.load_state_dict(text_model_dict)
796
+ for param_name, param in text_model_dict.items():
797
+ set_module_tensor_to_device(text_model, param_name, "cpu", value=param)
791
798
 
792
799
  return text_model
793
800
 
@@ -884,14 +891,26 @@ def convert_paint_by_example_checkpoint(checkpoint):
884
891
  return model
885
892
 
886
893
 
887
- def convert_open_clip_checkpoint(checkpoint, prefix="cond_stage_model.model."):
894
+ def convert_open_clip_checkpoint(
895
+ checkpoint, config_name, prefix="cond_stage_model.model.", has_projection=False, **config_kwargs
896
+ ):
888
897
  # text_model = CLIPTextModel.from_pretrained("stabilityai/stable-diffusion-2", subfolder="text_encoder")
889
- text_model = CLIPTextModelWithProjection.from_pretrained(
890
- "laion/CLIP-ViT-bigG-14-laion2B-39B-b160k", projection_dim=1280
891
- )
898
+ # text_model = CLIPTextModelWithProjection.from_pretrained(
899
+ # "laion/CLIP-ViT-bigG-14-laion2B-39B-b160k", projection_dim=1280
900
+ # )
901
+ config = CLIPTextConfig.from_pretrained(config_name, **config_kwargs)
902
+
903
+ with init_empty_weights():
904
+ text_model = CLIPTextModelWithProjection(config) if has_projection else CLIPTextModel(config)
892
905
 
893
906
  keys = list(checkpoint.keys())
894
907
 
908
+ keys_to_ignore = []
909
+ if config_name == "stabilityai/stable-diffusion-2" and config.num_hidden_layers == 23:
910
+ # make sure to remove all keys > 22
911
+ keys_to_ignore += [k for k in keys if k.startswith("cond_stage_model.model.transformer.resblocks.23")]
912
+ keys_to_ignore += ["cond_stage_model.model.text_projection"]
913
+
895
914
  text_model_dict = {}
896
915
 
897
916
  if prefix + "text_projection" in checkpoint:
@@ -902,8 +921,8 @@ def convert_open_clip_checkpoint(checkpoint, prefix="cond_stage_model.model."):
902
921
  text_model_dict["text_model.embeddings.position_ids"] = text_model.text_model.embeddings.get_buffer("position_ids")
903
922
 
904
923
  for key in keys:
905
- # if "resblocks.23" in key: # Diffusers drops the final layer and only uses the penultimate layer
906
- # continue
924
+ if key in keys_to_ignore:
925
+ continue
907
926
  if key[len(prefix) :] in textenc_conversion_map:
908
927
  if key.endswith("text_projection"):
909
928
  value = checkpoint[key].T
@@ -931,7 +950,8 @@ def convert_open_clip_checkpoint(checkpoint, prefix="cond_stage_model.model."):
931
950
 
932
951
  text_model_dict[new_key] = checkpoint[key]
933
952
 
934
- text_model.load_state_dict(text_model_dict)
953
+ for param_name, param in text_model_dict.items():
954
+ set_module_tensor_to_device(text_model, param_name, "cpu", value=param)
935
955
 
936
956
  return text_model
937
957
 
@@ -1061,7 +1081,7 @@ def convert_controlnet_checkpoint(
1061
1081
  def download_from_original_stable_diffusion_ckpt(
1062
1082
  checkpoint_path: str,
1063
1083
  original_config_file: str = None,
1064
- image_size: int = 512,
1084
+ image_size: Optional[int] = None,
1065
1085
  prediction_type: str = None,
1066
1086
  model_type: str = None,
1067
1087
  extract_ema: bool = False,
@@ -1144,6 +1164,7 @@ def download_from_original_stable_diffusion_ckpt(
1144
1164
  LDMTextToImagePipeline,
1145
1165
  PaintByExamplePipeline,
1146
1166
  StableDiffusionControlNetPipeline,
1167
+ StableDiffusionInpaintPipeline,
1147
1168
  StableDiffusionPipeline,
1148
1169
  StableDiffusionXLImg2ImgPipeline,
1149
1170
  StableDiffusionXLPipeline,
@@ -1166,12 +1187,9 @@ def download_from_original_stable_diffusion_ckpt(
1166
1187
  if not is_safetensors_available():
1167
1188
  raise ValueError(BACKENDS_MAPPING["safetensors"][1])
1168
1189
 
1169
- from safetensors import safe_open
1190
+ from safetensors.torch import load_file as safe_load
1170
1191
 
1171
- checkpoint = {}
1172
- with safe_open(checkpoint_path, framework="pt", device="cpu") as f:
1173
- for key in f.keys():
1174
- checkpoint[key] = f.get_tensor(key)
1192
+ checkpoint = safe_load(checkpoint_path, device="cpu")
1175
1193
  else:
1176
1194
  if device is None:
1177
1195
  device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -1183,7 +1201,7 @@ def download_from_original_stable_diffusion_ckpt(
1183
1201
  if "global_step" in checkpoint:
1184
1202
  global_step = checkpoint["global_step"]
1185
1203
  else:
1186
- print("global_step key not found in model")
1204
+ logger.debug("global_step key not found in model")
1187
1205
  global_step = None
1188
1206
 
1189
1207
  # NOTE: this while loop isn't great but this controlnet checkpoint has one additional
@@ -1230,8 +1248,15 @@ def download_from_original_stable_diffusion_ckpt(
1230
1248
  model_type = "SDXL"
1231
1249
  else:
1232
1250
  model_type = "SDXL-Refiner"
1251
+ if image_size is None:
1252
+ image_size = 1024
1233
1253
 
1234
- if num_in_channels is not None:
1254
+ if num_in_channels is None and pipeline_class == StableDiffusionInpaintPipeline:
1255
+ num_in_channels = 9
1256
+ elif num_in_channels is None:
1257
+ num_in_channels = 4
1258
+
1259
+ if "unet_config" in original_config.model.params:
1235
1260
  original_config["model"]["params"]["unet_config"]["params"]["in_channels"] = num_in_channels
1236
1261
 
1237
1262
  if (
@@ -1263,7 +1288,6 @@ def download_from_original_stable_diffusion_ckpt(
1263
1288
  num_train_timesteps = getattr(original_config.model.params, "timesteps", None) or 1000
1264
1289
 
1265
1290
  if model_type in ["SDXL", "SDXL-Refiner"]:
1266
- image_size = 1024
1267
1291
  scheduler_dict = {
1268
1292
  "beta_schedule": "scaled_linear",
1269
1293
  "beta_start": 0.00085,
@@ -1279,7 +1303,6 @@ def download_from_original_stable_diffusion_ckpt(
1279
1303
  }
1280
1304
  scheduler = EulerDiscreteScheduler.from_config(scheduler_dict)
1281
1305
  scheduler_type = "euler"
1282
- vae_path = "stabilityai/sdxl-vae"
1283
1306
  else:
1284
1307
  beta_start = getattr(original_config.model.params, "linear_start", None) or 0.02
1285
1308
  beta_end = getattr(original_config.model.params, "linear_end", None) or 0.085
@@ -1318,25 +1341,45 @@ def download_from_original_stable_diffusion_ckpt(
1318
1341
  # Convert the UNet2DConditionModel model.
1319
1342
  unet_config = create_unet_diffusers_config(original_config, image_size=image_size)
1320
1343
  unet_config["upcast_attention"] = upcast_attention
1321
- unet = UNet2DConditionModel(**unet_config)
1344
+ with init_empty_weights():
1345
+ unet = UNet2DConditionModel(**unet_config)
1322
1346
 
1323
1347
  converted_unet_checkpoint = convert_ldm_unet_checkpoint(
1324
1348
  checkpoint, unet_config, path=checkpoint_path, extract_ema=extract_ema
1325
1349
  )
1326
- unet.load_state_dict(converted_unet_checkpoint)
1350
+
1351
+ for param_name, param in converted_unet_checkpoint.items():
1352
+ set_module_tensor_to_device(unet, param_name, "cpu", value=param)
1327
1353
 
1328
1354
  # Convert the VAE model.
1329
1355
  if vae_path is None:
1330
1356
  vae_config = create_vae_diffusers_config(original_config, image_size=image_size)
1331
1357
  converted_vae_checkpoint = convert_ldm_vae_checkpoint(checkpoint, vae_config)
1332
1358
 
1333
- vae = AutoencoderKL(**vae_config)
1334
- vae.load_state_dict(converted_vae_checkpoint)
1359
+ if (
1360
+ "model" in original_config
1361
+ and "params" in original_config.model
1362
+ and "scale_factor" in original_config.model.params
1363
+ ):
1364
+ vae_scaling_factor = original_config.model.params.scale_factor
1365
+ else:
1366
+ vae_scaling_factor = 0.18215 # default SD scaling factor
1367
+
1368
+ vae_config["scaling_factor"] = vae_scaling_factor
1369
+
1370
+ with init_empty_weights():
1371
+ vae = AutoencoderKL(**vae_config)
1372
+
1373
+ for param_name, param in converted_vae_checkpoint.items():
1374
+ set_module_tensor_to_device(vae, param_name, "cpu", value=param)
1335
1375
  else:
1336
1376
  vae = AutoencoderKL.from_pretrained(vae_path)
1337
1377
 
1338
1378
  if model_type == "FrozenOpenCLIPEmbedder":
1339
- text_model = convert_open_clip_checkpoint(checkpoint)
1379
+ config_name = "stabilityai/stable-diffusion-2"
1380
+ config_kwargs = {"subfolder": "text_encoder"}
1381
+
1382
+ text_model = convert_open_clip_checkpoint(checkpoint, config_name, **config_kwargs)
1340
1383
  tokenizer = CLIPTokenizer.from_pretrained("stabilityai/stable-diffusion-2", subfolder="tokenizer")
1341
1384
 
1342
1385
  if stable_unclip is None:
@@ -1469,7 +1512,12 @@ def download_from_original_stable_diffusion_ckpt(
1469
1512
  tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14")
1470
1513
  text_encoder = convert_ldm_clip_checkpoint(checkpoint, local_files_only=local_files_only)
1471
1514
  tokenizer_2 = CLIPTokenizer.from_pretrained("laion/CLIP-ViT-bigG-14-laion2B-39B-b160k", pad_token="!")
1472
- text_encoder_2 = convert_open_clip_checkpoint(checkpoint, prefix="conditioner.embedders.1.model.")
1515
+
1516
+ config_name = "laion/CLIP-ViT-bigG-14-laion2B-39B-b160k"
1517
+ config_kwargs = {"projection_dim": 1280}
1518
+ text_encoder_2 = convert_open_clip_checkpoint(
1519
+ checkpoint, config_name, prefix="conditioner.embedders.1.model.", has_projection=True, **config_kwargs
1520
+ )
1473
1521
 
1474
1522
  pipe = StableDiffusionXLPipeline(
1475
1523
  vae=vae,
@@ -1485,7 +1533,12 @@ def download_from_original_stable_diffusion_ckpt(
1485
1533
  tokenizer = None
1486
1534
  text_encoder = None
1487
1535
  tokenizer_2 = CLIPTokenizer.from_pretrained("laion/CLIP-ViT-bigG-14-laion2B-39B-b160k", pad_token="!")
1488
- text_encoder_2 = convert_open_clip_checkpoint(checkpoint, prefix="conditioner.embedders.0.model.")
1536
+
1537
+ config_name = "laion/CLIP-ViT-bigG-14-laion2B-39B-b160k"
1538
+ config_kwargs = {"projection_dim": 1280}
1539
+ text_encoder_2 = convert_open_clip_checkpoint(
1540
+ checkpoint, config_name, prefix="conditioner.embedders.0.model.", has_projection=True, **config_kwargs
1541
+ )
1489
1542
 
1490
1543
  pipe = StableDiffusionXLImg2ImgPipeline(
1491
1544
  vae=vae,
@@ -24,7 +24,7 @@ from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer
24
24
 
25
25
  from ...configuration_utils import FrozenDict
26
26
  from ...image_processor import VaeImageProcessor
27
- from ...loaders import LoraLoaderMixin, TextualInversionLoaderMixin
27
+ from ...loaders import FromSingleFileMixin, LoraLoaderMixin, TextualInversionLoaderMixin
28
28
  from ...models import AutoencoderKL, UNet2DConditionModel
29
29
  from ...schedulers import KarrasDiffusionSchedulers
30
30
  from ...utils import deprecate, is_accelerate_available, is_accelerate_version, logging, randn_tensor
@@ -153,7 +153,9 @@ def prepare_mask_and_masked_image(image, mask, height, width, return_image: bool
153
153
  return mask, masked_image
154
154
 
155
155
 
156
- class StableDiffusionInpaintPipeline(DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin):
156
+ class StableDiffusionInpaintPipeline(
157
+ DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin
158
+ ):
157
159
  r"""
158
160
  Pipeline for text-guided image inpainting using Stable Diffusion.
159
161
 
@@ -748,15 +748,19 @@ class StableDiffusionUpscalePipeline(DiffusionPipeline, TextualInversionLoaderMi
748
748
  # make sure the VAE is in float32 mode, as it overflows in float16
749
749
  self.vae.to(dtype=torch.float32)
750
750
 
751
- use_torch_2_0_or_xformers = self.vae.decoder.mid_block.attentions[0].processor in [
752
- AttnProcessor2_0,
753
- XFormersAttnProcessor,
754
- LoRAXFormersAttnProcessor,
755
- LoRAAttnProcessor2_0,
756
- ]
751
+ use_torch_2_0_or_xformers = isinstance(
752
+ self.vae.decoder.mid_block.attentions[0].processor,
753
+ (
754
+ AttnProcessor2_0,
755
+ XFormersAttnProcessor,
756
+ LoRAXFormersAttnProcessor,
757
+ LoRAAttnProcessor2_0,
758
+ ),
759
+ )
760
+
757
761
  # if xformers or torch_2_0 is used attention block does not need
758
762
  # to be in float32 which can save lots of memory
759
- if not use_torch_2_0_or_xformers:
763
+ if use_torch_2_0_or_xformers:
760
764
  self.vae.post_quant_conv.to(latents.dtype)
761
765
  self.vae.decoder.conv_in.to(latents.dtype)
762
766
  self.vae.decoder.mid_block.to(latents.dtype)
@@ -8,7 +8,6 @@ from ...utils import BaseOutput, is_invisible_watermark_available, is_torch_avai
8
8
 
9
9
 
10
10
  @dataclass
11
- # Copied from diffusers.pipelines.stable_diffusion.__init__.StableDiffusionPipelineOutput with StableDiffusion->StableDiffusionXL
12
11
  class StableDiffusionXLPipelineOutput(BaseOutput):
13
12
  """
14
13
  Output class for Stable Diffusion pipelines.
@@ -17,13 +16,9 @@ class StableDiffusionXLPipelineOutput(BaseOutput):
17
16
  images (`List[PIL.Image.Image]` or `np.ndarray`)
18
17
  List of denoised PIL images of length `batch_size` or numpy array of shape `(batch_size, height, width,
19
18
  num_channels)`. PIL images or numpy array present the denoised images of the diffusion pipeline.
20
- nsfw_content_detected (`List[bool]`)
21
- List of flags denoting whether the corresponding generated image likely represents "not-safe-for-work"
22
- (nsfw) content, or `None` if safety checking could not be performed.
23
19
  """
24
20
 
25
21
  images: Union[List[PIL.Image.Image], np.ndarray]
26
- nsfw_content_detected: Optional[List[bool]]
27
22
 
28
23
 
29
24
  if is_transformers_available() and is_torch_available() and is_invisible_watermark_available():
@@ -129,9 +129,11 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin):
129
129
  self.register_to_config(force_zeros_for_empty_prompt=force_zeros_for_empty_prompt)
130
130
  self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
131
131
  self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
132
+ self.default_sample_size = self.unet.config.sample_size
132
133
 
133
134
  self.watermark = StableDiffusionXLWatermarker()
134
135
 
136
+ # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing
135
137
  def enable_vae_slicing(self):
136
138
  r"""
137
139
  Enable sliced VAE decoding.
@@ -141,6 +143,7 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin):
141
143
  """
142
144
  self.vae.enable_slicing()
143
145
 
146
+ # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing
144
147
  def disable_vae_slicing(self):
145
148
  r"""
146
149
  Disable sliced VAE decoding. If `enable_vae_slicing` was previously invoked, this method will go back to
@@ -148,6 +151,7 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin):
148
151
  """
149
152
  self.vae.disable_slicing()
150
153
 
154
+ # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling
151
155
  def enable_vae_tiling(self):
152
156
  r"""
153
157
  Enable tiled VAE decoding.
@@ -157,6 +161,7 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin):
157
161
  """
158
162
  self.vae.enable_tiling()
159
163
 
164
+ # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling
160
165
  def disable_vae_tiling(self):
161
166
  r"""
162
167
  Disable tiled VAE decoding. If `enable_vae_tiling` was previously invoked, this method will go back to
@@ -183,7 +188,7 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin):
183
188
  self.to("cpu", silence_dtype_warnings=True)
184
189
  torch.cuda.empty_cache() # otherwise we don't see the memory savings (but they probably exist)
185
190
 
186
- for cpu_offloaded_model in [self.unet, self.text_encoder, self.vae]:
191
+ for cpu_offloaded_model in [self.unet, self.text_encoder, self.text_encoder_2, self.vae]:
187
192
  cpu_offload(cpu_offloaded_model, device)
188
193
 
189
194
  def enable_model_cpu_offload(self, gpu_id=0):
@@ -217,6 +222,7 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin):
217
222
  self.final_offload_hook = hook
218
223
 
219
224
  @property
225
+ # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._execution_device
220
226
  def _execution_device(self):
221
227
  r"""
222
228
  Returns the device on which the pipeline's models will be executed. After calling
@@ -237,12 +243,14 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin):
237
243
  def encode_prompt(
238
244
  self,
239
245
  prompt,
240
- device,
241
- num_images_per_prompt,
242
- do_classifier_free_guidance,
246
+ device: Optional[torch.device] = None,
247
+ num_images_per_prompt: int = 1,
248
+ do_classifier_free_guidance: bool = True,
243
249
  negative_prompt=None,
244
250
  prompt_embeds: Optional[torch.FloatTensor] = None,
245
251
  negative_prompt_embeds: Optional[torch.FloatTensor] = None,
252
+ pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
253
+ negative_pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
246
254
  lora_scale: Optional[float] = None,
247
255
  ):
248
256
  r"""
@@ -268,9 +276,18 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin):
268
276
  Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
269
277
  weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
270
278
  argument.
279
+ pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
280
+ Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting.
281
+ If not provided, pooled text embeddings will be generated from `prompt` input argument.
282
+ negative_pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
283
+ Pre-generated negative pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
284
+ weighting. If not provided, pooled negative_prompt_embeds will be generated from `negative_prompt`
285
+ input argument.
271
286
  lora_scale (`float`, *optional*):
272
287
  A lora scale that will be applied to all LoRA layers of the text encoder if LoRA layers are loaded.
273
288
  """
289
+ device = device or self._execution_device
290
+
274
291
  # set lora scale so that monkey patched LoRA
275
292
  # function of text encoder can correctly access it
276
293
  if lora_scale is not None and isinstance(self, LoraLoaderMixin):
@@ -399,6 +416,7 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin):
399
416
 
400
417
  negative_prompt_embeds = torch.concat(negative_prompt_embeds_list, dim=-1)
401
418
 
419
+ bs_embed = pooled_prompt_embeds.shape[0]
402
420
  pooled_prompt_embeds = pooled_prompt_embeds.repeat(1, num_images_per_prompt).view(
403
421
  bs_embed * num_images_per_prompt, -1
404
422
  )
@@ -408,20 +426,7 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin):
408
426
 
409
427
  return prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds
410
428
 
411
- def run_safety_checker(self, image, device, dtype):
412
- if self.safety_checker is None:
413
- has_nsfw_concept = None
414
- else:
415
- if torch.is_tensor(image):
416
- feature_extractor_input = self.image_processor.postprocess(image, output_type="pil")
417
- else:
418
- feature_extractor_input = self.image_processor.numpy_to_pil(image)
419
- safety_checker_input = self.feature_extractor(feature_extractor_input, return_tensors="pt").to(device)
420
- image, has_nsfw_concept = self.safety_checker(
421
- images=image, clip_input=safety_checker_input.pixel_values.to(dtype)
422
- )
423
- return image, has_nsfw_concept
424
-
429
+ # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_extra_step_kwargs
425
430
  def prepare_extra_step_kwargs(self, generator, eta):
426
431
  # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
427
432
  # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
@@ -448,6 +453,8 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin):
448
453
  negative_prompt=None,
449
454
  prompt_embeds=None,
450
455
  negative_prompt_embeds=None,
456
+ pooled_prompt_embeds=None,
457
+ negative_pooled_prompt_embeds=None,
451
458
  ):
452
459
  if height % 8 != 0 or width % 8 != 0:
453
460
  raise ValueError(f"`height` and `width` have to be divisible by 8 but are {height} and {width}.")
@@ -486,6 +493,17 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin):
486
493
  f" {negative_prompt_embeds.shape}."
487
494
  )
488
495
 
496
+ if prompt_embeds is not None and pooled_prompt_embeds is None:
497
+ raise ValueError(
498
+ "If `prompt_embeds` are provided, `pooled_prompt_embeds` also have to be passed. Make sure to generate `pooled_prompt_embeds` from the same text encoder that was used to generate `prompt_embeds`."
499
+ )
500
+
501
+ if negative_prompt_embeds is not None and negative_pooled_prompt_embeds is None:
502
+ raise ValueError(
503
+ "If `negative_prompt_embeds` are provided, `negative_pooled_prompt_embeds` also have to be passed. Make sure to generate `negative_pooled_prompt_embeds` from the same text encoder that was used to generate `negative_prompt_embeds`."
504
+ )
505
+
506
+ # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_latents
489
507
  def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype, device, generator, latents=None):
490
508
  shape = (batch_size, num_channels_latents, height // self.vae_scale_factor, width // self.vae_scale_factor)
491
509
  if isinstance(generator, list) and len(generator) != batch_size:
@@ -535,6 +553,8 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin):
535
553
  latents: Optional[torch.FloatTensor] = None,
536
554
  prompt_embeds: Optional[torch.FloatTensor] = None,
537
555
  negative_prompt_embeds: Optional[torch.FloatTensor] = None,
556
+ pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
557
+ negative_pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
538
558
  output_type: Optional[str] = "pil",
539
559
  return_dict: bool = True,
540
560
  callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
@@ -588,6 +608,13 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin):
588
608
  Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
589
609
  weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
590
610
  argument.
611
+ pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
612
+ Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting.
613
+ If not provided, pooled text embeddings will be generated from `prompt` input argument.
614
+ negative_pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
615
+ Pre-generated negative pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
616
+ weighting. If not provided, pooled negative_prompt_embeds will be generated from `negative_prompt`
617
+ input argument.
591
618
  output_type (`str`, *optional*, defaults to `"pil"`):
592
619
  The output format of the generate image. Choose between
593
620
  [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`.
@@ -626,15 +653,23 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin):
626
653
  "not-safe-for-work" (nsfw) content, according to the `safety_checker`.
627
654
  """
628
655
  # 0. Default height and width to unet
629
- height = height or self.unet.config.sample_size * self.vae_scale_factor
630
- width = width or self.unet.config.sample_size * self.vae_scale_factor
656
+ height = height or self.default_sample_size * self.vae_scale_factor
657
+ width = width or self.default_sample_size * self.vae_scale_factor
631
658
 
632
659
  original_size = original_size or (height, width)
633
660
  target_size = target_size or (height, width)
634
661
 
635
662
  # 1. Check inputs. Raise error if not correct
636
663
  self.check_inputs(
637
- prompt, height, width, callback_steps, negative_prompt, prompt_embeds, negative_prompt_embeds
664
+ prompt,
665
+ height,
666
+ width,
667
+ callback_steps,
668
+ negative_prompt,
669
+ prompt_embeds,
670
+ negative_prompt_embeds,
671
+ pooled_prompt_embeds,
672
+ negative_pooled_prompt_embeds,
638
673
  )
639
674
 
640
675
  # 2. Define call parameters
@@ -669,6 +704,8 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin):
669
704
  negative_prompt,
670
705
  prompt_embeds=prompt_embeds,
671
706
  negative_prompt_embeds=negative_prompt_embeds,
707
+ pooled_prompt_embeds=pooled_prompt_embeds,
708
+ negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
672
709
  lora_scale=text_encoder_lora_scale,
673
710
  )
674
711
 
@@ -749,15 +786,18 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin):
749
786
  # make sure the VAE is in float32 mode, as it overflows in float16
750
787
  self.vae.to(dtype=torch.float32)
751
788
 
752
- use_torch_2_0_or_xformers = self.vae.decoder.mid_block.attentions[0].processor in [
753
- AttnProcessor2_0,
754
- XFormersAttnProcessor,
755
- LoRAXFormersAttnProcessor,
756
- LoRAAttnProcessor2_0,
757
- ]
789
+ use_torch_2_0_or_xformers = isinstance(
790
+ self.vae.decoder.mid_block.attentions[0].processor,
791
+ (
792
+ AttnProcessor2_0,
793
+ XFormersAttnProcessor,
794
+ LoRAXFormersAttnProcessor,
795
+ LoRAAttnProcessor2_0,
796
+ ),
797
+ )
758
798
  # if xformers or torch_2_0 is used attention block does not need
759
799
  # to be in float32 which can save lots of memory
760
- if not use_torch_2_0_or_xformers:
800
+ if use_torch_2_0_or_xformers:
761
801
  self.vae.post_quant_conv.to(latents.dtype)
762
802
  self.vae.decoder.conv_in.to(latents.dtype)
763
803
  self.vae.decoder.mid_block.to(latents.dtype)
@@ -765,27 +805,19 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin):
765
805
  latents = latents.float()
766
806
 
767
807
  if not output_type == "latent":
768
- # CHECK there is problem here (PVP)
769
808
  image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False)[0]
770
- has_nsfw_concept = None
771
809
  else:
772
810
  image = latents
773
- has_nsfw_concept = None
774
- return StableDiffusionXLPipelineOutput(images=image, nsfw_content_detected=None)
775
-
776
- if has_nsfw_concept is None:
777
- do_denormalize = [True] * image.shape[0]
778
- else:
779
- do_denormalize = [not has_nsfw for has_nsfw in has_nsfw_concept]
811
+ return StableDiffusionXLPipelineOutput(images=image)
780
812
 
781
813
  image = self.watermark.apply_watermark(image)
782
- image = self.image_processor.postprocess(image, output_type=output_type, do_denormalize=do_denormalize)
814
+ image = self.image_processor.postprocess(image, output_type=output_type)
783
815
 
784
816
  # Offload last model to CPU
785
817
  if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None:
786
818
  self.final_offload_hook.offload()
787
819
 
788
820
  if not return_dict:
789
- return (image, has_nsfw_concept)
821
+ return (image,)
790
822
 
791
- return StableDiffusionXLPipelineOutput(images=image, nsfw_content_detected=has_nsfw_concept)
823
+ return StableDiffusionXLPipelineOutput(images=image)
@@ -140,6 +140,7 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
140
140
 
141
141
  self.watermark = StableDiffusionXLWatermarker()
142
142
 
143
+ # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_slicing
143
144
  def enable_vae_slicing(self):
144
145
  r"""
145
146
  Enable sliced VAE decoding.
@@ -149,6 +150,7 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
149
150
  """
150
151
  self.vae.enable_slicing()
151
152
 
153
+ # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_slicing
152
154
  def disable_vae_slicing(self):
153
155
  r"""
154
156
  Disable sliced VAE decoding. If `enable_vae_slicing` was previously invoked, this method will go back to
@@ -156,6 +158,7 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
156
158
  """
157
159
  self.vae.disable_slicing()
158
160
 
161
+ # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_vae_tiling
159
162
  def enable_vae_tiling(self):
160
163
  r"""
161
164
  Enable tiled VAE decoding.
@@ -165,6 +168,7 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
165
168
  """
166
169
  self.vae.enable_tiling()
167
170
 
171
+ # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.disable_vae_tiling
168
172
  def disable_vae_tiling(self):
169
173
  r"""
170
174
  Disable tiled VAE decoding. If `enable_vae_tiling` was previously invoked, this method will go back to
@@ -172,6 +176,7 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
172
176
  """
173
177
  self.vae.disable_tiling()
174
178
 
179
+ # Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.enable_sequential_cpu_offload
175
180
  def enable_sequential_cpu_offload(self, gpu_id=0):
176
181
  r"""
177
182
  Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet,
@@ -191,9 +196,10 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
191
196
  self.to("cpu", silence_dtype_warnings=True)
192
197
  torch.cuda.empty_cache() # otherwise we don't see the memory savings (but they probably exist)
193
198
 
194
- for cpu_offloaded_model in [self.unet, self.text_encoder, self.vae]:
199
+ for cpu_offloaded_model in [self.unet, self.text_encoder, self.text_encoder_2, self.vae]:
195
200
  cpu_offload(cpu_offloaded_model, device)
196
201
 
202
+ # Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.enable_model_cpu_offload
197
203
  def enable_model_cpu_offload(self, gpu_id=0):
198
204
  r"""
199
205
  Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared
@@ -225,6 +231,7 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
225
231
  self.final_offload_hook = hook
226
232
 
227
233
  @property
234
+ # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._execution_device
228
235
  def _execution_device(self):
229
236
  r"""
230
237
  Returns the device on which the pipeline's models will be executed. After calling
@@ -242,15 +249,18 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
242
249
  return torch.device(module._hf_hook.execution_device)
243
250
  return self.device
244
251
 
252
+ # Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.encode_prompt
245
253
  def encode_prompt(
246
254
  self,
247
255
  prompt,
248
- device,
249
- num_images_per_prompt,
250
- do_classifier_free_guidance,
256
+ device: Optional[torch.device] = None,
257
+ num_images_per_prompt: int = 1,
258
+ do_classifier_free_guidance: bool = True,
251
259
  negative_prompt=None,
252
260
  prompt_embeds: Optional[torch.FloatTensor] = None,
253
261
  negative_prompt_embeds: Optional[torch.FloatTensor] = None,
262
+ pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
263
+ negative_pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
254
264
  lora_scale: Optional[float] = None,
255
265
  ):
256
266
  r"""
@@ -276,9 +286,18 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
276
286
  Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
277
287
  weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
278
288
  argument.
289
+ pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
290
+ Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting.
291
+ If not provided, pooled text embeddings will be generated from `prompt` input argument.
292
+ negative_pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
293
+ Pre-generated negative pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
294
+ weighting. If not provided, pooled negative_prompt_embeds will be generated from `negative_prompt`
295
+ input argument.
279
296
  lora_scale (`float`, *optional*):
280
297
  A lora scale that will be applied to all LoRA layers of the text encoder if LoRA layers are loaded.
281
298
  """
299
+ device = device or self._execution_device
300
+
282
301
  # set lora scale so that monkey patched LoRA
283
302
  # function of text encoder can correctly access it
284
303
  if lora_scale is not None and isinstance(self, LoraLoaderMixin):
@@ -327,13 +346,11 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
327
346
  text_input_ids.to(device),
328
347
  output_hidden_states=True,
329
348
  )
349
+
330
350
  # We are only ALWAYS interested in the pooled output of the final text encoder
331
351
  pooled_prompt_embeds = prompt_embeds[0]
332
-
333
352
  prompt_embeds = prompt_embeds.hidden_states[-2]
334
353
 
335
- prompt_embeds = prompt_embeds
336
-
337
354
  bs_embed, seq_len, _ = prompt_embeds.shape
338
355
  # duplicate text embeddings for each generation per prompt, using mps friendly method
339
356
  prompt_embeds = prompt_embeds.repeat(1, num_images_per_prompt, 1)
@@ -349,10 +366,9 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
349
366
  negative_prompt_embeds = torch.zeros_like(prompt_embeds)
350
367
  negative_pooled_prompt_embeds = torch.zeros_like(pooled_prompt_embeds)
351
368
  elif do_classifier_free_guidance and negative_prompt_embeds is None:
369
+ negative_prompt = negative_prompt or ""
352
370
  uncond_tokens: List[str]
353
- if negative_prompt is None:
354
- uncond_tokens = [""] * batch_size
355
- elif prompt is not None and type(prompt) is not type(negative_prompt):
371
+ if prompt is not None and type(prompt) is not type(negative_prompt):
356
372
  raise TypeError(
357
373
  f"`negative_prompt` should be the same type to `prompt`, but got {type(negative_prompt)} !="
358
374
  f" {type(prompt)}."
@@ -389,7 +405,6 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
389
405
  )
390
406
  # We are only ALWAYS interested in the pooled output of the final text encoder
391
407
  negative_pooled_prompt_embeds = negative_prompt_embeds[0]
392
-
393
408
  negative_prompt_embeds = negative_prompt_embeds.hidden_states[-2]
394
409
 
395
410
  if do_classifier_free_guidance:
@@ -411,6 +426,7 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
411
426
 
412
427
  negative_prompt_embeds = torch.concat(negative_prompt_embeds_list, dim=-1)
413
428
 
429
+ bs_embed = pooled_prompt_embeds.shape[0]
414
430
  pooled_prompt_embeds = pooled_prompt_embeds.repeat(1, num_images_per_prompt).view(
415
431
  bs_embed * num_images_per_prompt, -1
416
432
  )
@@ -420,20 +436,7 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
420
436
 
421
437
  return prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds
422
438
 
423
- def run_safety_checker(self, image, device, dtype):
424
- if self.safety_checker is None:
425
- has_nsfw_concept = None
426
- else:
427
- if torch.is_tensor(image):
428
- feature_extractor_input = self.image_processor.postprocess(image, output_type="pil")
429
- else:
430
- feature_extractor_input = self.image_processor.numpy_to_pil(image)
431
- safety_checker_input = self.feature_extractor(feature_extractor_input, return_tensors="pt").to(device)
432
- image, has_nsfw_concept = self.safety_checker(
433
- images=image, clip_input=safety_checker_input.pixel_values.to(dtype)
434
- )
435
- return image, has_nsfw_concept
436
-
439
+ # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_extra_step_kwargs
437
440
  def prepare_extra_step_kwargs(self, generator, eta):
438
441
  # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
439
442
  # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
@@ -624,6 +627,8 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
624
627
  latents: Optional[torch.FloatTensor] = None,
625
628
  prompt_embeds: Optional[torch.FloatTensor] = None,
626
629
  negative_prompt_embeds: Optional[torch.FloatTensor] = None,
630
+ pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
631
+ negative_pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
627
632
  output_type: Optional[str] = "pil",
628
633
  return_dict: bool = True,
629
634
  callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
@@ -683,6 +688,13 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
683
688
  Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
684
689
  weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
685
690
  argument.
691
+ pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
692
+ Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting.
693
+ If not provided, pooled text embeddings will be generated from `prompt` input argument.
694
+ negative_pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
695
+ Pre-generated negative pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
696
+ weighting. If not provided, pooled negative_prompt_embeds will be generated from `negative_prompt`
697
+ input argument.
686
698
  output_type (`str`, *optional*, defaults to `"pil"`):
687
699
  The output format of the generate image. Choose between
688
700
  [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`.
@@ -759,6 +771,8 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
759
771
  negative_prompt,
760
772
  prompt_embeds=prompt_embeds,
761
773
  negative_prompt_embeds=negative_prompt_embeds,
774
+ pooled_prompt_embeds=pooled_prompt_embeds,
775
+ negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
762
776
  lora_scale=text_encoder_lora_scale,
763
777
  )
764
778
 
@@ -845,15 +859,18 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
845
859
  # make sure the VAE is in float32 mode, as it overflows in float16
846
860
  self.vae.to(dtype=torch.float32)
847
861
 
848
- use_torch_2_0_or_xformers = self.vae.decoder.mid_block.attentions[0].processor in [
849
- AttnProcessor2_0,
850
- XFormersAttnProcessor,
851
- LoRAXFormersAttnProcessor,
852
- LoRAAttnProcessor2_0,
853
- ]
862
+ use_torch_2_0_or_xformers = isinstance(
863
+ self.vae.decoder.mid_block.attentions[0].processor,
864
+ (
865
+ AttnProcessor2_0,
866
+ XFormersAttnProcessor,
867
+ LoRAXFormersAttnProcessor,
868
+ LoRAAttnProcessor2_0,
869
+ ),
870
+ )
854
871
  # if xformers or torch_2_0 is used attention block does not need
855
872
  # to be in float32 which can save lots of memory
856
- if not use_torch_2_0_or_xformers:
873
+ if use_torch_2_0_or_xformers:
857
874
  self.vae.post_quant_conv.to(latents.dtype)
858
875
  self.vae.decoder.conv_in.to(latents.dtype)
859
876
  self.vae.decoder.mid_block.to(latents.dtype)
@@ -862,24 +879,18 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
862
879
 
863
880
  if not output_type == "latent":
864
881
  image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False)[0]
865
- has_nsfw_concept = None
866
882
  else:
867
883
  image = latents
868
- return StableDiffusionXLPipelineOutput(images=image, nsfw_content_detected=None)
869
-
870
- if has_nsfw_concept is None:
871
- do_denormalize = [True] * image.shape[0]
872
- else:
873
- do_denormalize = [not has_nsfw for has_nsfw in has_nsfw_concept]
884
+ return StableDiffusionXLPipelineOutput(images=image)
874
885
 
875
886
  image = self.watermark.apply_watermark(image)
876
- image = self.image_processor.postprocess(image, output_type=output_type, do_denormalize=do_denormalize)
887
+ image = self.image_processor.postprocess(image, output_type=output_type)
877
888
 
878
889
  # Offload last model to CPU
879
890
  if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None:
880
891
  self.final_offload_hook.offload()
881
892
 
882
893
  if not return_dict:
883
- return (image, has_nsfw_concept)
894
+ return (image,)
884
895
 
885
- return StableDiffusionXLPipelineOutput(images=image, nsfw_content_detected=has_nsfw_concept)
896
+ return StableDiffusionXLPipelineOutput(images=image)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: diffusers
3
- Version: 0.18.0
3
+ Version: 0.18.2
4
4
  Summary: Diffusers
5
5
  Home-page: https://github.com/huggingface/diffusers
6
6
  Author: The HuggingFace team
@@ -1,9 +1,9 @@
1
- diffusers/__init__.py,sha256=Wcm-mUVXAJwULWpuWcyFbdceTZdVb6gqW0NG8YGD30A,9329
2
- diffusers/configuration_utils.py,sha256=xBuxUFnruv-0Y9obZvbvM3-0l9MRel1J--8V46WTB98,30357
1
+ diffusers/__init__.py,sha256=Vtoe0ie8nREHRwBNNwzbyQ2rwqLTcB4399y6DBFTOok,9329
2
+ diffusers/configuration_utils.py,sha256=--Nwf_FViQXq71M8PcgUUjT_YoLV1WYqV49Fnk-amkk,30369
3
3
  diffusers/dependency_versions_check.py,sha256=T2AQMFfOGMCULAqRAE8zf1VE5j7GFxxs7SfEuhTY4lA,1756
4
4
  diffusers/dependency_versions_table.py,sha256=TnzJqBXnJYMXeMw61Lgq_QlTkjWydwOKDIKbV0RXG6Q,1446
5
5
  diffusers/image_processor.py,sha256=VqdToqZY-Xdb0sqibwVn1A9gdGOU3OvgQpr67mnMWGg,13700
6
- diffusers/loaders.py,sha256=sbotPO3y1mRXF4byG9DMwmqWeD_wWNYBIycY1qgUCuI,75164
6
+ diffusers/loaders.py,sha256=9trJ4QdgKOmfqguAKHq73fu5VDjw13krtgyJq7AnpQw,75161
7
7
  diffusers/optimization.py,sha256=KZpFO98pzgt1l-etti_7k5c-EK9WEY3-XossN6VEGrs,14546
8
8
  diffusers/pipeline_utils.py,sha256=dJVuXQ_ZBHkW64dwPbIPM51QnqQKIp9-WSIhRQYlJg4,1147
9
9
  diffusers/training_utils.py,sha256=TEuw7ro2RT35ujfMW2DKzb1KZpF4-HfuKSZ1NNnIIvI,13195
@@ -16,7 +16,7 @@ diffusers/experimental/rl/value_guided_sampling.py,sha256=iIhf1gc2QP7Jx4HrsoOyRC
16
16
  diffusers/models/__init__.py,sha256=MDG83d8C1YGGSnGNwi9sG6c33_FEaMGS3BVGnaqWJqQ,1446
17
17
  diffusers/models/activations.py,sha256=cWe7qw4wR626ADw-abcV3lI1v5Vim_R_eNMc5jPlaLo,297
18
18
  diffusers/models/attention.py,sha256=Nfmze9IvGR5a6ir9o0Z4DbAQ8repJxBo2_t4fDsnvHw,15197
19
- diffusers/models/attention_flax.py,sha256=IHc1OfaIfmlJ2xYHdZ2UGixO9m08ThBN-6C7fv1XEb0,17680
19
+ diffusers/models/attention_flax.py,sha256=6IOINRK5flDgnzsLiSLIfhBnDtdY9LyhcDIUXVS_Gag,18142
20
20
  diffusers/models/attention_processor.py,sha256=04g9405fWhb-C0xO9cnn-LfAMcSwxZ9fOzYrX98aa6A,70119
21
21
  diffusers/models/autoencoder_kl.py,sha256=qM2oRqJROHvA3PSwMDmNISQzK3oFmgJiRRzvHZw9dHQ,17913
22
22
  diffusers/models/controlnet.py,sha256=OzCVtpmlJXTfIze3Bmc6p7lGFirxvlI-MroHL7HQ5mQ,33086
@@ -28,7 +28,7 @@ diffusers/models/embeddings_flax.py,sha256=87ysODCdTERpYfH-EDhElOUyCAu8z6-xIQCqL
28
28
  diffusers/models/modeling_flax_pytorch_utils.py,sha256=yFQHU86DdvrzFLfkTbyZZ0_PWKrjnp08s46dD-wf_tw,4601
29
29
  diffusers/models/modeling_flax_utils.py,sha256=0ailGzoCLU5-81rn048e2UJEr0S1lHGBQGqpOJzWfWQ,26071
30
30
  diffusers/models/modeling_pytorch_flax_utils.py,sha256=5dt6mC956MYrIMp8Owvx8QQv8xsfik6vu0frgb_c6HE,6974
31
- diffusers/models/modeling_utils.py,sha256=_tRAf4PGPdH1gqHoYlPEiNoTuSpj9RejTxpue8BBvIA,46589
31
+ diffusers/models/modeling_utils.py,sha256=vHf-AWIwuTvyjtOCbTryupWmQLxiujNhBfVL0hmop_k,46588
32
32
  diffusers/models/prior_transformer.py,sha256=5A8Tgq4VXkjH0ib05kPHXPObekLYdrRwuCgnGvoMVN4,16574
33
33
  diffusers/models/resnet.py,sha256=y9FIuXYUTHYA3AFUeDBwiHJVu0crM0fMRnzEJ3ZtVf4,35294
34
34
  diffusers/models/resnet_flax.py,sha256=VKF-ti1jlH_GnlWRy9dY6ETc-W9ZitfQoNjmrFAQxuU,4021
@@ -50,7 +50,7 @@ diffusers/models/vq_model.py,sha256=_98GsNUGg3HxcC97zQSgxEPVuDNvn1DcJP6TCTpGLVE,
50
50
  diffusers/pipelines/__init__.py,sha256=pjJh4SXSHjSBtzzAsiuQp64YQ03xPMdgTzK-0-iV9Ew,7009
51
51
  diffusers/pipelines/onnx_utils.py,sha256=M-6GBVRFji_ik5x1CMxrz9r5oEBr9TTblqLsI1HfiS4,8282
52
52
  diffusers/pipelines/pipeline_flax_utils.py,sha256=CLjAhcwfBJ1xTbdRbyWHGdcd5uRJDoXDdxruuK2t2iM,25924
53
- diffusers/pipelines/pipeline_utils.py,sha256=NTjp1RgH4aSFNEgSslMQGDvE5Ij-XCy_eImcwiMBT-w,71753
53
+ diffusers/pipelines/pipeline_utils.py,sha256=2P6oTVvZcs33-LoWUQosYkdsl1bEKE3MfnQdhkjubRw,72464
54
54
  diffusers/pipelines/alt_diffusion/__init__.py,sha256=rCOBtGQ7xi3DahUXY8r5ICt_t6S0ogp4uDJL9q4avso,1346
55
55
  diffusers/pipelines/alt_diffusion/modeling_roberta_series.py,sha256=_UC4IxHAg2QAFtw4yCvo2eLIDBRmg2JvvtOr6k5PFC8,5580
56
56
  diffusers/pipelines/alt_diffusion/pipeline_alt_diffusion.py,sha256=YgUvsnah_cIXNwzJgxE87fftWox7leXOY8lzZeph7c8,40641
@@ -127,7 +127,7 @@ diffusers/pipelines/spectrogram_diffusion/midi_utils.py,sha256=HmOSMSaKZlloW8J6m
127
127
  diffusers/pipelines/spectrogram_diffusion/notes_encoder.py,sha256=Yq3W0lkAMGhx5pGklTvomBHjqR1nAVALBcYlzZBSQ90,2921
128
128
  diffusers/pipelines/spectrogram_diffusion/pipeline_spectrogram_diffusion.py,sha256=GAHovdMWMhmGgS02kFOaS7_Lq9AJmTxrBZC0VElHwBQ,8657
129
129
  diffusers/pipelines/stable_diffusion/__init__.py,sha256=nBYUiO6TbCsqNfImNCPi1aE-Q35Lc5r9B7qWb9TDjcM,6164
130
- diffusers/pipelines/stable_diffusion/convert_from_ckpt.py,sha256=Y4J394whXeBduWXJlcp04E71rBzD9PcCDPYd3ZETlrw,67209
130
+ diffusers/pipelines/stable_diffusion/convert_from_ckpt.py,sha256=wYHT2MGLa6LFcmlvtxgDCQ5tqZCqejur2hN-0YL0GsA,69501
131
131
  diffusers/pipelines/stable_diffusion/pipeline_cycle_diffusion.py,sha256=sA76ZiTUVOTiCMDss7z3nouqg8czJwBmhX7OPuYheWk,43554
132
132
  diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py,sha256=Pbprq5sXlbS6JPP44eOzzm0FrwsccrHoaXuFWY_Kx38,20922
133
133
  diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_controlnet.py,sha256=rjxVeyAmMTQiTiQF7Q9y3BYU45jCImKZGx37ir8zpM8,1257
@@ -145,7 +145,7 @@ diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py,sha2
145
145
  diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_diffedit.py,sha256=-k3M22p4KXDkUJAnxj4xU9VM_QBBqs_pV0XHlfYzsKk,80921
146
146
  diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py,sha256=xOvGZvBBvB1Ee_9B46bKpRK5C9SuxbNZhrQ44nvIYsQ,23137
147
147
  diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py,sha256=XPNN7lINZoezjS2ciifKfIvRXotUbB_dyByaAPOE3Vs,42149
148
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py,sha256=oWUXc2LgfjNFBroXqhu1JaK6FlXI84kKaxg2OtXdi7U,55092
148
+ diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py,sha256=0Q9m9yJ5rED3U1vyjmrHVft9jIqB35KkfDerc_VYJNg,55140
149
149
  diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint_legacy.py,sha256=gzklmzg25O3y48wNubrl_jzKwzZlgiperjyV6OqdvxU,42148
150
150
  diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py,sha256=g8yEk161s1CO8NDVm6ZTMdONtilXDIKQWI6dMnRk6Bs,40919
151
151
  diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_k_diffusion.py,sha256=UM-T14SgwlIk9is3QDQx7IsTWU-_ZsD1iA4rCO9-3Rk,31679
@@ -156,7 +156,7 @@ diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_panorama.py,sha25
156
156
  diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_paradigms.py,sha256=cnwmjjTQA5lMd-pK-MhnwLF8bK-pwZu0S3xiHLO2WFk,43302
157
157
  diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_pix2pix_zero.py,sha256=_EtteuRKzWruht97EpQO7zrqoyhGyZUUgDFuw04mW5M,62705
158
158
  diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_sag.py,sha256=xiBT7l8nha5HLg2MEeOYozUwmNIUZzbW1mjgt6b7JU8,40491
159
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py,sha256=4__1bReiM-LDL1JUEZ4wUb2Pk02W9r1ZPkvj3FQ-A6U,38548
159
+ diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py,sha256=TOJge0fcAt10pg-hJdjhQdMi53pBzBrcx7x6vIGMZVc,38610
160
160
  diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py,sha256=ef5W_IynHqocHCWJcX32MUxRPvShH3spsv0RA_lBlTk,46256
161
161
  diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py,sha256=YSwcDOwL1sE6ItdYm1ZuYb3uZVAf-DzuhtndV_Auqzw,40305
162
162
  diffusers/pipelines/stable_diffusion/safety_checker.py,sha256=zLs3meGi6JiRYlHntPiBEaU9_JjYcZnzrPa5picFiG4,5734
@@ -165,9 +165,9 @@ diffusers/pipelines/stable_diffusion/stable_unclip_image_normalizer.py,sha256=RE
165
165
  diffusers/pipelines/stable_diffusion_safe/__init__.py,sha256=FAuvPLSYCLDzJ1d2GntTwQXpxgABEaoLrj5LdQOtxpA,2502
166
166
  diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py,sha256=yZMsvIdDAhF7maJOpc9UWSIUbWpIghOypzgo4-vAI0A,37886
167
167
  diffusers/pipelines/stable_diffusion_safe/safety_checker.py,sha256=lEXvS-_WCcVpje14hoajJG2Z4jlWs0UsID3IqWTnOys,5049
168
- diffusers/pipelines/stable_diffusion_xl/__init__.py,sha256=ORE7mQPSp_8k1V6Lzc85dJOQCJ5BOTFwzxOMvf7XRS8,1372
169
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py,sha256=Xnr-X09UTvzcz6d6kTxJZ3pyrCVEnH78b6x6H70JOh8,38907
170
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py,sha256=IGHWSxR6QrjG702DPFegBT0ZjjOTC5qh53vSPSHyTso,44219
168
+ diffusers/pipelines/stable_diffusion_xl/__init__.py,sha256=-RFjtUQxnCEPSF0Foq90HWIjyHblHOH0eHoNg1dqj68,953
169
+ diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py,sha256=MTG8Ym65rS3vvALVHWkHHP5cEKMcjpSnYHZuh_lwKcU,41570
170
+ diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py,sha256=USKMxXkFUeDJQr77Ecw1QlJieiet37KdQB53wWqpCc4,46168
171
171
  diffusers/pipelines/stable_diffusion_xl/watermark.py,sha256=22Pg7TXApd4oRBvyJDh5B5L6--Zj7hKaYj8dHSTsGzQ,1142
172
172
  diffusers/pipelines/stochastic_karras_ve/__init__.py,sha256=StxEhuNuCeEY3qv3ZIcBfXsaxDH3JmWeuHx1xCHnYRI,60
173
173
  diffusers/pipelines/stochastic_karras_ve/pipeline_stochastic_karras_ve.py,sha256=zZn4jZ8iHJSsoMvStG3l4WvQ6wAtKjK0LDjLRQA3PLU,5669
@@ -254,9 +254,9 @@ diffusers/utils/outputs.py,sha256=l5RdKO6SRnnz7fsXsmmnkOyCf_0z35kwfkDbnhCFeAc,36
254
254
  diffusers/utils/pil_utils.py,sha256=F7M3QWYQyRcLNsS8876wgKqOnhzg8hNTPHQy6Q-jYj0,1423
255
255
  diffusers/utils/testing_utils.py,sha256=TiKwlhR4SvEIIkAOrF11qYNg27p_tVp0ifJgEW2mNAk,21197
256
256
  diffusers/utils/torch_utils.py,sha256=4gRMtlH81IrbYh_pfR0ZkDNbuxmVX03fmR6xrDTZIP0,3378
257
- diffusers-0.18.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
258
- diffusers-0.18.0.dist-info/METADATA,sha256=R-HlsAiR3JVN5Q8fP_WphGIxMb69nAaiYq7jycXXAvA,17540
259
- diffusers-0.18.0.dist-info/WHEEL,sha256=2wepM1nk4DS4eFpYrW1TTqPcoGNfHhhO_i5m4cOimbo,92
260
- diffusers-0.18.0.dist-info/entry_points.txt,sha256=VULXr1th-UU5J0Ou_l0If6E4CY4HSSiMElweZ58u9H0,73
261
- diffusers-0.18.0.dist-info/top_level.txt,sha256=axJl2884vMSvhzrFrSoht36QXA_6gZN9cKtg4xOO72o,10
262
- diffusers-0.18.0.dist-info/RECORD,,
257
+ diffusers-0.18.2.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
258
+ diffusers-0.18.2.dist-info/METADATA,sha256=iO5QVnb_Ri2SVl5YaXfVn7P6JsyQJ2PiTcQ5aNNubxY,17540
259
+ diffusers-0.18.2.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
260
+ diffusers-0.18.2.dist-info/entry_points.txt,sha256=VULXr1th-UU5J0Ou_l0If6E4CY4HSSiMElweZ58u9H0,73
261
+ diffusers-0.18.2.dist-info/top_level.txt,sha256=axJl2884vMSvhzrFrSoht36QXA_6gZN9cKtg4xOO72o,10
262
+ diffusers-0.18.2.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.38.4)
2
+ Generator: bdist_wheel (0.40.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5