diffusers 0.24.0__py3-none-any.whl → 0.25.0__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (174) hide show
  1. diffusers/__init__.py +11 -1
  2. diffusers/commands/fp16_safetensors.py +10 -11
  3. diffusers/configuration_utils.py +12 -8
  4. diffusers/dependency_versions_table.py +2 -1
  5. diffusers/experimental/rl/value_guided_sampling.py +1 -1
  6. diffusers/image_processor.py +286 -46
  7. diffusers/loaders/ip_adapter.py +11 -9
  8. diffusers/loaders/lora.py +198 -60
  9. diffusers/loaders/single_file.py +24 -18
  10. diffusers/loaders/textual_inversion.py +10 -14
  11. diffusers/loaders/unet.py +130 -37
  12. diffusers/models/__init__.py +18 -12
  13. diffusers/models/activations.py +9 -6
  14. diffusers/models/attention.py +137 -16
  15. diffusers/models/attention_processor.py +133 -46
  16. diffusers/models/autoencoders/__init__.py +5 -0
  17. diffusers/models/{autoencoder_asym_kl.py → autoencoders/autoencoder_asym_kl.py} +4 -4
  18. diffusers/models/{autoencoder_kl.py → autoencoders/autoencoder_kl.py} +45 -6
  19. diffusers/models/{autoencoder_kl_temporal_decoder.py → autoencoders/autoencoder_kl_temporal_decoder.py} +8 -8
  20. diffusers/models/{autoencoder_tiny.py → autoencoders/autoencoder_tiny.py} +4 -4
  21. diffusers/models/{consistency_decoder_vae.py → autoencoders/consistency_decoder_vae.py} +14 -14
  22. diffusers/models/{vae.py → autoencoders/vae.py} +9 -5
  23. diffusers/models/downsampling.py +338 -0
  24. diffusers/models/embeddings.py +112 -29
  25. diffusers/models/modeling_flax_utils.py +12 -7
  26. diffusers/models/modeling_utils.py +10 -10
  27. diffusers/models/normalization.py +108 -2
  28. diffusers/models/resnet.py +15 -699
  29. diffusers/models/transformer_2d.py +2 -2
  30. diffusers/models/unet_2d_condition.py +37 -0
  31. diffusers/models/{unet_kandi3.py → unet_kandinsky3.py} +105 -159
  32. diffusers/models/upsampling.py +454 -0
  33. diffusers/models/uvit_2d.py +471 -0
  34. diffusers/models/vq_model.py +9 -2
  35. diffusers/pipelines/__init__.py +81 -73
  36. diffusers/pipelines/amused/__init__.py +62 -0
  37. diffusers/pipelines/amused/pipeline_amused.py +328 -0
  38. diffusers/pipelines/amused/pipeline_amused_img2img.py +347 -0
  39. diffusers/pipelines/amused/pipeline_amused_inpaint.py +378 -0
  40. diffusers/pipelines/animatediff/pipeline_animatediff.py +38 -10
  41. diffusers/pipelines/auto_pipeline.py +17 -13
  42. diffusers/pipelines/controlnet/pipeline_controlnet.py +27 -10
  43. diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +47 -5
  44. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +25 -8
  45. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +4 -6
  46. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +26 -10
  47. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +4 -3
  48. diffusers/pipelines/deprecated/__init__.py +153 -0
  49. diffusers/pipelines/{alt_diffusion → deprecated/alt_diffusion}/__init__.py +3 -3
  50. diffusers/pipelines/{alt_diffusion → deprecated/alt_diffusion}/pipeline_alt_diffusion.py +91 -18
  51. diffusers/pipelines/{alt_diffusion → deprecated/alt_diffusion}/pipeline_alt_diffusion_img2img.py +91 -18
  52. diffusers/pipelines/{alt_diffusion → deprecated/alt_diffusion}/pipeline_output.py +1 -1
  53. diffusers/pipelines/{audio_diffusion → deprecated/audio_diffusion}/__init__.py +1 -1
  54. diffusers/pipelines/{audio_diffusion → deprecated/audio_diffusion}/mel.py +2 -2
  55. diffusers/pipelines/{audio_diffusion → deprecated/audio_diffusion}/pipeline_audio_diffusion.py +4 -4
  56. diffusers/pipelines/{latent_diffusion_uncond → deprecated/latent_diffusion_uncond}/__init__.py +1 -1
  57. diffusers/pipelines/{latent_diffusion_uncond → deprecated/latent_diffusion_uncond}/pipeline_latent_diffusion_uncond.py +4 -4
  58. diffusers/pipelines/{pndm → deprecated/pndm}/__init__.py +1 -1
  59. diffusers/pipelines/{pndm → deprecated/pndm}/pipeline_pndm.py +4 -4
  60. diffusers/pipelines/{repaint → deprecated/repaint}/__init__.py +1 -1
  61. diffusers/pipelines/{repaint → deprecated/repaint}/pipeline_repaint.py +5 -5
  62. diffusers/pipelines/{score_sde_ve → deprecated/score_sde_ve}/__init__.py +1 -1
  63. diffusers/pipelines/{score_sde_ve → deprecated/score_sde_ve}/pipeline_score_sde_ve.py +4 -4
  64. diffusers/pipelines/{spectrogram_diffusion → deprecated/spectrogram_diffusion}/__init__.py +6 -6
  65. diffusers/pipelines/{spectrogram_diffusion/continous_encoder.py → deprecated/spectrogram_diffusion/continuous_encoder.py} +2 -2
  66. diffusers/pipelines/{spectrogram_diffusion → deprecated/spectrogram_diffusion}/midi_utils.py +1 -1
  67. diffusers/pipelines/{spectrogram_diffusion → deprecated/spectrogram_diffusion}/notes_encoder.py +2 -2
  68. diffusers/pipelines/{spectrogram_diffusion → deprecated/spectrogram_diffusion}/pipeline_spectrogram_diffusion.py +7 -7
  69. diffusers/pipelines/deprecated/stable_diffusion_variants/__init__.py +55 -0
  70. diffusers/pipelines/{stable_diffusion → deprecated/stable_diffusion_variants}/pipeline_cycle_diffusion.py +16 -11
  71. diffusers/pipelines/{stable_diffusion → deprecated/stable_diffusion_variants}/pipeline_onnx_stable_diffusion_inpaint_legacy.py +6 -6
  72. diffusers/pipelines/{stable_diffusion → deprecated/stable_diffusion_variants}/pipeline_stable_diffusion_inpaint_legacy.py +11 -11
  73. diffusers/pipelines/{stable_diffusion → deprecated/stable_diffusion_variants}/pipeline_stable_diffusion_model_editing.py +16 -11
  74. diffusers/pipelines/{stable_diffusion → deprecated/stable_diffusion_variants}/pipeline_stable_diffusion_paradigms.py +10 -10
  75. diffusers/pipelines/{stable_diffusion → deprecated/stable_diffusion_variants}/pipeline_stable_diffusion_pix2pix_zero.py +13 -13
  76. diffusers/pipelines/{stochastic_karras_ve → deprecated/stochastic_karras_ve}/__init__.py +1 -1
  77. diffusers/pipelines/{stochastic_karras_ve → deprecated/stochastic_karras_ve}/pipeline_stochastic_karras_ve.py +4 -4
  78. diffusers/pipelines/{versatile_diffusion → deprecated/versatile_diffusion}/__init__.py +3 -3
  79. diffusers/pipelines/{versatile_diffusion → deprecated/versatile_diffusion}/modeling_text_unet.py +54 -11
  80. diffusers/pipelines/{versatile_diffusion → deprecated/versatile_diffusion}/pipeline_versatile_diffusion.py +4 -4
  81. diffusers/pipelines/{versatile_diffusion → deprecated/versatile_diffusion}/pipeline_versatile_diffusion_dual_guided.py +6 -6
  82. diffusers/pipelines/{versatile_diffusion → deprecated/versatile_diffusion}/pipeline_versatile_diffusion_image_variation.py +6 -6
  83. diffusers/pipelines/{versatile_diffusion → deprecated/versatile_diffusion}/pipeline_versatile_diffusion_text_to_image.py +6 -6
  84. diffusers/pipelines/{vq_diffusion → deprecated/vq_diffusion}/__init__.py +3 -3
  85. diffusers/pipelines/{vq_diffusion → deprecated/vq_diffusion}/pipeline_vq_diffusion.py +5 -5
  86. diffusers/pipelines/kandinsky3/__init__.py +4 -4
  87. diffusers/pipelines/kandinsky3/convert_kandinsky3_unet.py +98 -0
  88. diffusers/pipelines/kandinsky3/{kandinsky3_pipeline.py → pipeline_kandinsky3.py} +172 -35
  89. diffusers/pipelines/kandinsky3/{kandinsky3img2img_pipeline.py → pipeline_kandinsky3_img2img.py} +228 -34
  90. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +46 -5
  91. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +47 -6
  92. diffusers/pipelines/onnx_utils.py +8 -5
  93. diffusers/pipelines/pipeline_flax_utils.py +7 -6
  94. diffusers/pipelines/pipeline_utils.py +30 -29
  95. diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +51 -2
  96. diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +3 -3
  97. diffusers/pipelines/stable_diffusion/__init__.py +1 -72
  98. diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +67 -75
  99. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +92 -8
  100. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +92 -8
  101. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +138 -10
  102. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +57 -7
  103. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +3 -0
  104. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +6 -0
  105. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +5 -0
  106. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +5 -0
  107. diffusers/pipelines/stable_diffusion_attend_and_excite/__init__.py +48 -0
  108. diffusers/pipelines/{stable_diffusion → stable_diffusion_attend_and_excite}/pipeline_stable_diffusion_attend_and_excite.py +5 -2
  109. diffusers/pipelines/stable_diffusion_diffedit/__init__.py +48 -0
  110. diffusers/pipelines/{stable_diffusion → stable_diffusion_diffedit}/pipeline_stable_diffusion_diffedit.py +2 -3
  111. diffusers/pipelines/stable_diffusion_gligen/__init__.py +50 -0
  112. diffusers/pipelines/{stable_diffusion → stable_diffusion_gligen}/pipeline_stable_diffusion_gligen.py +2 -2
  113. diffusers/pipelines/{stable_diffusion → stable_diffusion_gligen}/pipeline_stable_diffusion_gligen_text_image.py +3 -3
  114. diffusers/pipelines/stable_diffusion_k_diffusion/__init__.py +60 -0
  115. diffusers/pipelines/{stable_diffusion → stable_diffusion_k_diffusion}/pipeline_stable_diffusion_k_diffusion.py +6 -1
  116. diffusers/pipelines/stable_diffusion_ldm3d/__init__.py +48 -0
  117. diffusers/pipelines/{stable_diffusion → stable_diffusion_ldm3d}/pipeline_stable_diffusion_ldm3d.py +50 -7
  118. diffusers/pipelines/stable_diffusion_panorama/__init__.py +48 -0
  119. diffusers/pipelines/{stable_diffusion → stable_diffusion_panorama}/pipeline_stable_diffusion_panorama.py +56 -8
  120. diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +58 -6
  121. diffusers/pipelines/stable_diffusion_sag/__init__.py +48 -0
  122. diffusers/pipelines/{stable_diffusion → stable_diffusion_sag}/pipeline_stable_diffusion_sag.py +67 -10
  123. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +97 -15
  124. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +98 -14
  125. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +97 -14
  126. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +7 -5
  127. diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +12 -9
  128. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +6 -0
  129. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +5 -0
  130. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +5 -0
  131. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +331 -9
  132. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +468 -9
  133. diffusers/pipelines/unclip/pipeline_unclip.py +2 -1
  134. diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +1 -0
  135. diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py +1 -1
  136. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +4 -0
  137. diffusers/schedulers/__init__.py +2 -0
  138. diffusers/schedulers/scheduling_amused.py +162 -0
  139. diffusers/schedulers/scheduling_consistency_models.py +2 -0
  140. diffusers/schedulers/scheduling_ddim_inverse.py +1 -4
  141. diffusers/schedulers/scheduling_ddpm.py +46 -0
  142. diffusers/schedulers/scheduling_ddpm_parallel.py +46 -0
  143. diffusers/schedulers/scheduling_deis_multistep.py +13 -1
  144. diffusers/schedulers/scheduling_dpmsolver_multistep.py +13 -1
  145. diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +13 -1
  146. diffusers/schedulers/scheduling_dpmsolver_sde.py +2 -0
  147. diffusers/schedulers/scheduling_dpmsolver_singlestep.py +13 -1
  148. diffusers/schedulers/scheduling_euler_ancestral_discrete.py +58 -0
  149. diffusers/schedulers/scheduling_euler_discrete.py +62 -3
  150. diffusers/schedulers/scheduling_heun_discrete.py +2 -0
  151. diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +2 -0
  152. diffusers/schedulers/scheduling_k_dpm_2_discrete.py +2 -0
  153. diffusers/schedulers/scheduling_lms_discrete.py +2 -0
  154. diffusers/schedulers/scheduling_unipc_multistep.py +13 -1
  155. diffusers/schedulers/scheduling_utils.py +3 -1
  156. diffusers/schedulers/scheduling_utils_flax.py +3 -1
  157. diffusers/training_utils.py +1 -1
  158. diffusers/utils/__init__.py +0 -2
  159. diffusers/utils/constants.py +2 -5
  160. diffusers/utils/dummy_pt_objects.py +30 -0
  161. diffusers/utils/dummy_torch_and_transformers_objects.py +45 -0
  162. diffusers/utils/dynamic_modules_utils.py +14 -18
  163. diffusers/utils/hub_utils.py +24 -36
  164. diffusers/utils/logging.py +1 -1
  165. diffusers/utils/state_dict_utils.py +8 -0
  166. diffusers/utils/testing_utils.py +199 -1
  167. diffusers/utils/torch_utils.py +3 -3
  168. {diffusers-0.24.0.dist-info → diffusers-0.25.0.dist-info}/METADATA +54 -53
  169. {diffusers-0.24.0.dist-info → diffusers-0.25.0.dist-info}/RECORD +174 -155
  170. {diffusers-0.24.0.dist-info → diffusers-0.25.0.dist-info}/WHEEL +1 -1
  171. {diffusers-0.24.0.dist-info → diffusers-0.25.0.dist-info}/entry_points.txt +0 -1
  172. /diffusers/pipelines/{alt_diffusion → deprecated/alt_diffusion}/modeling_roberta_series.py +0 -0
  173. {diffusers-0.24.0.dist-info → diffusers-0.25.0.dist-info}/LICENSE +0 -0
  174. {diffusers-0.24.0.dist-info → diffusers-0.25.0.dist-info}/top_level.txt +0 -0
@@ -33,9 +33,10 @@ from ...loaders import (
33
33
  StableDiffusionXLLoraLoaderMixin,
34
34
  TextualInversionLoaderMixin,
35
35
  )
36
- from ...models import AutoencoderKL, UNet2DConditionModel
36
+ from ...models import AutoencoderKL, ImageProjection, UNet2DConditionModel
37
37
  from ...models.attention_processor import (
38
38
  AttnProcessor2_0,
39
+ FusedAttnProcessor2_0,
39
40
  LoRAAttnProcessor2_0,
40
41
  LoRAXFormersAttnProcessor,
41
42
  XFormersAttnProcessor,
@@ -321,12 +322,12 @@ class StableDiffusionXLInpaintPipeline(
321
322
  This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the
322
323
  library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)
323
324
 
324
- In addition the pipeline inherits the following loading methods:
325
- - *LoRA*: [`loaders.StableDiffusionXLLoraLoaderMixin.load_lora_weights`]
326
- - *Ckpt*: [`loaders.FromSingleFileMixin.from_single_file`]
327
-
328
- as well as the following saving methods:
329
- - *LoRA*: [`loaders.StableDiffusionXLLoraLoaderMixin.save_lora_weights`]
325
+ The pipeline also inherits the following loading methods:
326
+ - [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings
327
+ - [`~loaders.FromSingleFileMixin.from_single_file`] for loading `.ckpt` files
328
+ - [`~loaders.StableDiffusionXLLoraLoaderMixin.load_lora_weights`] for loading LoRA weights
329
+ - [`~loaders.StableDiffusionXLLoraLoaderMixin.save_lora_weights`] for saving LoRA weights
330
+ - [`~loaders.IPAdapterMixin.load_ip_adapter`] for loading IP Adapters
330
331
 
331
332
  Args:
332
333
  vae ([`AutoencoderKL`]):
@@ -363,7 +364,7 @@ class StableDiffusionXLInpaintPipeline(
363
364
  watermarker will be used.
364
365
  """
365
366
 
366
- model_cpu_offload_seq = "text_encoder->text_encoder_2->unet->vae"
367
+ model_cpu_offload_seq = "text_encoder->text_encoder_2->image_encoder->unet->vae"
367
368
 
368
369
  _optional_components = [
369
370
  "tokenizer",
@@ -462,18 +463,29 @@ class StableDiffusionXLInpaintPipeline(
462
463
  self.vae.disable_tiling()
463
464
 
464
465
  # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.encode_image
465
- def encode_image(self, image, device, num_images_per_prompt):
466
+ def encode_image(self, image, device, num_images_per_prompt, output_hidden_states=None):
466
467
  dtype = next(self.image_encoder.parameters()).dtype
467
468
 
468
469
  if not isinstance(image, torch.Tensor):
469
470
  image = self.feature_extractor(image, return_tensors="pt").pixel_values
470
471
 
471
472
  image = image.to(device=device, dtype=dtype)
472
- image_embeds = self.image_encoder(image).image_embeds
473
- image_embeds = image_embeds.repeat_interleave(num_images_per_prompt, dim=0)
473
+ if output_hidden_states:
474
+ image_enc_hidden_states = self.image_encoder(image, output_hidden_states=True).hidden_states[-2]
475
+ image_enc_hidden_states = image_enc_hidden_states.repeat_interleave(num_images_per_prompt, dim=0)
476
+ uncond_image_enc_hidden_states = self.image_encoder(
477
+ torch.zeros_like(image), output_hidden_states=True
478
+ ).hidden_states[-2]
479
+ uncond_image_enc_hidden_states = uncond_image_enc_hidden_states.repeat_interleave(
480
+ num_images_per_prompt, dim=0
481
+ )
482
+ return image_enc_hidden_states, uncond_image_enc_hidden_states
483
+ else:
484
+ image_embeds = self.image_encoder(image).image_embeds
485
+ image_embeds = image_embeds.repeat_interleave(num_images_per_prompt, dim=0)
486
+ uncond_image_embeds = torch.zeros_like(image_embeds)
474
487
 
475
- uncond_image_embeds = torch.zeros_like(image_embeds)
476
- return image_embeds, uncond_image_embeds
488
+ return image_embeds, uncond_image_embeds
477
489
 
478
490
  # Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.encode_prompt
479
491
  def encode_prompt(
@@ -1073,6 +1085,67 @@ class StableDiffusionXLInpaintPipeline(
1073
1085
  """Disables the FreeU mechanism if enabled."""
1074
1086
  self.unet.disable_freeu()
1075
1087
 
1088
+ # Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.fuse_qkv_projections
1089
+ def fuse_qkv_projections(self, unet: bool = True, vae: bool = True):
1090
+ """
1091
+ Enables fused QKV projections. For self-attention modules, all projection matrices (i.e., query,
1092
+ key, value) are fused. For cross-attention modules, key and value projection matrices are fused.
1093
+
1094
+ <Tip warning={true}>
1095
+
1096
+ This API is 🧪 experimental.
1097
+
1098
+ </Tip>
1099
+
1100
+ Args:
1101
+ unet (`bool`, defaults to `True`): To apply fusion on the UNet.
1102
+ vae (`bool`, defaults to `True`): To apply fusion on the VAE.
1103
+ """
1104
+ self.fusing_unet = False
1105
+ self.fusing_vae = False
1106
+
1107
+ if unet:
1108
+ self.fusing_unet = True
1109
+ self.unet.fuse_qkv_projections()
1110
+ self.unet.set_attn_processor(FusedAttnProcessor2_0())
1111
+
1112
+ if vae:
1113
+ if not isinstance(self.vae, AutoencoderKL):
1114
+ raise ValueError("`fuse_qkv_projections()` is only supported for the VAE of type `AutoencoderKL`.")
1115
+
1116
+ self.fusing_vae = True
1117
+ self.vae.fuse_qkv_projections()
1118
+ self.vae.set_attn_processor(FusedAttnProcessor2_0())
1119
+
1120
+ # Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.unfuse_qkv_projections
1121
+ def unfuse_qkv_projections(self, unet: bool = True, vae: bool = True):
1122
+ """Disable QKV projection fusion if enabled.
1123
+
1124
+ <Tip warning={true}>
1125
+
1126
+ This API is 🧪 experimental.
1127
+
1128
+ </Tip>
1129
+
1130
+ Args:
1131
+ unet (`bool`, defaults to `True`): To apply fusion on the UNet.
1132
+ vae (`bool`, defaults to `True`): To apply fusion on the VAE.
1133
+
1134
+ """
1135
+ if unet:
1136
+ if not self.fusing_unet:
1137
+ logger.warning("The UNet was not initially fused for QKV projections. Doing nothing.")
1138
+ else:
1139
+ self.unet.unfuse_qkv_projections()
1140
+ self.fusing_unet = False
1141
+
1142
+ if vae:
1143
+ if not self.fusing_vae:
1144
+ logger.warning("The VAE was not initially fused for QKV projections. Doing nothing.")
1145
+ else:
1146
+ self.vae.unfuse_qkv_projections()
1147
+ self.fusing_vae = False
1148
+
1076
1149
  # Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding
1077
1150
  def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32):
1078
1151
  """
@@ -1137,6 +1210,10 @@ class StableDiffusionXLInpaintPipeline(
1137
1210
  def num_timesteps(self):
1138
1211
  return self._num_timesteps
1139
1212
 
1213
+ @property
1214
+ def interrupt(self):
1215
+ return self._interrupt
1216
+
1140
1217
  @torch.no_grad()
1141
1218
  @replace_example_docstring(EXAMPLE_DOC_STRING)
1142
1219
  def __call__(
@@ -1389,6 +1466,7 @@ class StableDiffusionXLInpaintPipeline(
1389
1466
  self._cross_attention_kwargs = cross_attention_kwargs
1390
1467
  self._denoising_end = denoising_end
1391
1468
  self._denoising_start = denoising_start
1469
+ self._interrupt = False
1392
1470
 
1393
1471
  # 2. Define call parameters
1394
1472
  if prompt is not None and isinstance(prompt, str):
@@ -1568,7 +1646,10 @@ class StableDiffusionXLInpaintPipeline(
1568
1646
  add_time_ids = add_time_ids.to(device)
1569
1647
 
1570
1648
  if ip_adapter_image is not None:
1571
- image_embeds, negative_image_embeds = self.encode_image(ip_adapter_image, device, num_images_per_prompt)
1649
+ output_hidden_state = False if isinstance(self.unet.encoder_hid_proj, ImageProjection) else True
1650
+ image_embeds, negative_image_embeds = self.encode_image(
1651
+ ip_adapter_image, device, num_images_per_prompt, output_hidden_state
1652
+ )
1572
1653
  if self.do_classifier_free_guidance:
1573
1654
  image_embeds = torch.cat([negative_image_embeds, image_embeds])
1574
1655
  image_embeds = image_embeds.to(device)
@@ -1608,6 +1689,8 @@ class StableDiffusionXLInpaintPipeline(
1608
1689
  self._num_timesteps = len(timesteps)
1609
1690
  with self.progress_bar(total=num_inference_steps) as progress_bar:
1610
1691
  for i, t in enumerate(timesteps):
1692
+ if self.interrupt:
1693
+ continue
1611
1694
  # expand the latents if we are doing classifier free guidance
1612
1695
  latent_model_input = torch.cat([latents] * 2) if self.do_classifier_free_guidance else latents
1613
1696
 
@@ -24,6 +24,7 @@ from ...loaders import FromSingleFileMixin, StableDiffusionXLLoraLoaderMixin, Te
24
24
  from ...models import AutoencoderKL, UNet2DConditionModel
25
25
  from ...models.attention_processor import (
26
26
  AttnProcessor2_0,
27
+ FusedAttnProcessor2_0,
27
28
  LoRAAttnProcessor2_0,
28
29
  LoRAXFormersAttnProcessor,
29
30
  XFormersAttnProcessor,
@@ -125,11 +126,11 @@ class StableDiffusionXLInstructPix2PixPipeline(
125
126
  This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the
126
127
  library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)
127
128
 
128
- In addition the pipeline inherits the following loading methods:
129
- - *LoRA*: [`loaders.StableDiffusionXLLoraLoaderMixin.load_lora_weights`]
130
-
131
- as well as the following saving methods:
132
- - *LoRA*: [`loaders.StableDiffusionXLLoraLoaderMixin.save_lora_weights`]
129
+ The pipeline also inherits the following loading methods:
130
+ - [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings
131
+ - [`~loaders.FromSingleFileMixin.from_single_file`] for loading `.ckpt` files
132
+ - [`~loaders.StableDiffusionXLLoraLoaderMixin.load_lora_weights`] for loading LoRA weights
133
+ - [`~loaders.StableDiffusionXLLoraLoaderMixin.save_lora_weights`] for saving LoRA weights
133
134
 
134
135
  Args:
135
136
  vae ([`AutoencoderKL`]):
@@ -610,6 +611,7 @@ class StableDiffusionXLInstructPix2PixPipeline(
610
611
  XFormersAttnProcessor,
611
612
  LoRAXFormersAttnProcessor,
612
613
  LoRAAttnProcessor2_0,
614
+ FusedAttnProcessor2_0,
613
615
  ),
614
616
  )
615
617
  # if xformers or torch_2_0 is used attention block does not need
@@ -25,7 +25,7 @@ from ...image_processor import VaeImageProcessor
25
25
  from ...models import AutoencoderKLTemporalDecoder, UNetSpatioTemporalConditionModel
26
26
  from ...schedulers import EulerDiscreteScheduler
27
27
  from ...utils import BaseOutput, logging
28
- from ...utils.torch_utils import randn_tensor
28
+ from ...utils.torch_utils import is_compiled_module, randn_tensor
29
29
  from ..pipeline_utils import DiffusionPipeline
30
30
 
31
31
 
@@ -211,7 +211,8 @@ class StableVideoDiffusionPipeline(DiffusionPipeline):
211
211
 
212
212
  latents = 1 / self.vae.config.scaling_factor * latents
213
213
 
214
- accepts_num_frames = "num_frames" in set(inspect.signature(self.vae.forward).parameters.keys())
214
+ forward_vae_fn = self.vae._orig_mod.forward if is_compiled_module(self.vae) else self.vae.forward
215
+ accepts_num_frames = "num_frames" in set(inspect.signature(forward_vae_fn).parameters.keys())
215
216
 
216
217
  # decode decode_chunk_size frames at a time to avoid OOM
217
218
  frames = []
@@ -290,7 +291,9 @@ class StableVideoDiffusionPipeline(DiffusionPipeline):
290
291
  # corresponds to doing no classifier free guidance.
291
292
  @property
292
293
  def do_classifier_free_guidance(self):
293
- return self._guidance_scale > 1 and self.unet.config.time_cond_proj_dim is None
294
+ if isinstance(self.guidance_scale, (int, float)):
295
+ return self.guidance_scale
296
+ return self.guidance_scale.max() > 1
294
297
 
295
298
  @property
296
299
  def num_timesteps(self):
@@ -415,10 +418,10 @@ class StableVideoDiffusionPipeline(DiffusionPipeline):
415
418
  # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
416
419
  # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
417
420
  # corresponds to doing no classifier free guidance.
418
- do_classifier_free_guidance = max_guidance_scale > 1.0
421
+ self._guidance_scale = max_guidance_scale
419
422
 
420
423
  # 3. Encode input image
421
- image_embeddings = self._encode_image(image, device, num_videos_per_prompt, do_classifier_free_guidance)
424
+ image_embeddings = self._encode_image(image, device, num_videos_per_prompt, self.do_classifier_free_guidance)
422
425
 
423
426
  # NOTE: Stable Diffusion Video was conditioned on fps - 1, which
424
427
  # is why it is reduced here.
@@ -434,7 +437,7 @@ class StableVideoDiffusionPipeline(DiffusionPipeline):
434
437
  if needs_upcasting:
435
438
  self.vae.to(dtype=torch.float32)
436
439
 
437
- image_latents = self._encode_vae_image(image, device, num_videos_per_prompt, do_classifier_free_guidance)
440
+ image_latents = self._encode_vae_image(image, device, num_videos_per_prompt, self.do_classifier_free_guidance)
438
441
  image_latents = image_latents.to(image_embeddings.dtype)
439
442
 
440
443
  # cast back to fp16 if needed
@@ -453,7 +456,7 @@ class StableVideoDiffusionPipeline(DiffusionPipeline):
453
456
  image_embeddings.dtype,
454
457
  batch_size,
455
458
  num_videos_per_prompt,
456
- do_classifier_free_guidance,
459
+ self.do_classifier_free_guidance,
457
460
  )
458
461
  added_time_ids = added_time_ids.to(device)
459
462
 
@@ -489,7 +492,7 @@ class StableVideoDiffusionPipeline(DiffusionPipeline):
489
492
  with self.progress_bar(total=num_inference_steps) as progress_bar:
490
493
  for i, t in enumerate(timesteps):
491
494
  # expand the latents if we are doing classifier free guidance
492
- latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents
495
+ latent_model_input = torch.cat([latents] * 2) if self.do_classifier_free_guidance else latents
493
496
  latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
494
497
 
495
498
  # Concatenate image_latents over channels dimention
@@ -505,7 +508,7 @@ class StableVideoDiffusionPipeline(DiffusionPipeline):
505
508
  )[0]
506
509
 
507
510
  # perform guidance
508
- if do_classifier_free_guidance:
511
+ if self.do_classifier_free_guidance:
509
512
  noise_pred_uncond, noise_pred_cond = noise_pred.chunk(2)
510
513
  noise_pred = noise_pred_uncond + self.guidance_scale * (noise_pred_cond - noise_pred_uncond)
511
514
 
@@ -178,6 +178,12 @@ class StableDiffusionXLAdapterPipeline(
178
178
  This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the
179
179
  library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)
180
180
 
181
+ The pipeline also inherits the following loading methods:
182
+ - [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings
183
+ - [`~loaders.FromSingleFileMixin.from_single_file`] for loading `.ckpt` files
184
+ - [`~loaders.StableDiffusionXLLoraLoaderMixin.load_lora_weights`] for loading LoRA weights
185
+ - [`~loaders.StableDiffusionXLLoraLoaderMixin.save_lora_weights`] for saving LoRA weights
186
+
181
187
  Args:
182
188
  adapter ([`T2IAdapter`] or [`MultiAdapter`] or `List[T2IAdapter]`):
183
189
  Provides additional conditioning to the unet during the denoising process. If you set multiple Adapter as a
@@ -83,6 +83,11 @@ class TextToVideoSDPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lora
83
83
  This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods
84
84
  implemented for all pipelines (downloading, saving, running on a particular device, etc.).
85
85
 
86
+ The pipeline also inherits the following loading methods:
87
+ - [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings
88
+ - [`~loaders.LoraLoaderMixin.load_lora_weights`] for loading LoRA weights
89
+ - [`~loaders.LoraLoaderMixin.save_lora_weights`] for saving LoRA weights
90
+
86
91
  Args:
87
92
  vae ([`AutoencoderKL`]):
88
93
  Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations.
@@ -159,6 +159,11 @@ class VideoToVideoSDPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lor
159
159
  This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods
160
160
  implemented for all pipelines (downloading, saving, running on a particular device, etc.).
161
161
 
162
+ The pipeline also inherits the following loading methods:
163
+ - [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings
164
+ - [`~loaders.LoraLoaderMixin.load_lora_weights`] for loading LoRA weights
165
+ - [`~loaders.LoraLoaderMixin.save_lora_weights`] for saving LoRA weights
166
+
162
167
  Args:
163
168
  vae ([`AutoencoderKL`]):
164
169
  Variational Auto-Encoder (VAE) Model to encode and decode videos to and from latent representations.