diffusers 0.23.1__py3-none-any.whl → 0.25.0__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (238) hide show
  1. diffusers/__init__.py +26 -2
  2. diffusers/commands/fp16_safetensors.py +10 -11
  3. diffusers/configuration_utils.py +13 -8
  4. diffusers/dependency_versions_check.py +0 -1
  5. diffusers/dependency_versions_table.py +5 -5
  6. diffusers/experimental/rl/value_guided_sampling.py +1 -1
  7. diffusers/image_processor.py +463 -51
  8. diffusers/loaders/__init__.py +82 -0
  9. diffusers/loaders/ip_adapter.py +159 -0
  10. diffusers/loaders/lora.py +1553 -0
  11. diffusers/loaders/lora_conversion_utils.py +284 -0
  12. diffusers/loaders/single_file.py +637 -0
  13. diffusers/loaders/textual_inversion.py +455 -0
  14. diffusers/loaders/unet.py +828 -0
  15. diffusers/loaders/utils.py +59 -0
  16. diffusers/models/__init__.py +26 -9
  17. diffusers/models/activations.py +9 -6
  18. diffusers/models/attention.py +301 -29
  19. diffusers/models/attention_flax.py +9 -1
  20. diffusers/models/attention_processor.py +378 -6
  21. diffusers/models/autoencoders/__init__.py +5 -0
  22. diffusers/models/{autoencoder_asym_kl.py → autoencoders/autoencoder_asym_kl.py} +17 -12
  23. diffusers/models/{autoencoder_kl.py → autoencoders/autoencoder_kl.py} +47 -23
  24. diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +402 -0
  25. diffusers/models/{autoencoder_tiny.py → autoencoders/autoencoder_tiny.py} +24 -28
  26. diffusers/models/{consistency_decoder_vae.py → autoencoders/consistency_decoder_vae.py} +51 -44
  27. diffusers/models/{vae.py → autoencoders/vae.py} +71 -17
  28. diffusers/models/controlnet.py +59 -39
  29. diffusers/models/controlnet_flax.py +19 -18
  30. diffusers/models/downsampling.py +338 -0
  31. diffusers/models/embeddings.py +112 -29
  32. diffusers/models/embeddings_flax.py +2 -0
  33. diffusers/models/lora.py +131 -1
  34. diffusers/models/modeling_flax_utils.py +14 -8
  35. diffusers/models/modeling_outputs.py +17 -0
  36. diffusers/models/modeling_utils.py +37 -29
  37. diffusers/models/normalization.py +110 -4
  38. diffusers/models/resnet.py +299 -652
  39. diffusers/models/transformer_2d.py +22 -5
  40. diffusers/models/transformer_temporal.py +183 -1
  41. diffusers/models/unet_2d_blocks_flax.py +5 -0
  42. diffusers/models/unet_2d_condition.py +46 -0
  43. diffusers/models/unet_2d_condition_flax.py +13 -13
  44. diffusers/models/unet_3d_blocks.py +957 -173
  45. diffusers/models/unet_3d_condition.py +16 -8
  46. diffusers/models/unet_kandinsky3.py +535 -0
  47. diffusers/models/unet_motion_model.py +48 -33
  48. diffusers/models/unet_spatio_temporal_condition.py +489 -0
  49. diffusers/models/upsampling.py +454 -0
  50. diffusers/models/uvit_2d.py +471 -0
  51. diffusers/models/vae_flax.py +7 -0
  52. diffusers/models/vq_model.py +12 -3
  53. diffusers/optimization.py +16 -9
  54. diffusers/pipelines/__init__.py +137 -76
  55. diffusers/pipelines/amused/__init__.py +62 -0
  56. diffusers/pipelines/amused/pipeline_amused.py +328 -0
  57. diffusers/pipelines/amused/pipeline_amused_img2img.py +347 -0
  58. diffusers/pipelines/amused/pipeline_amused_inpaint.py +378 -0
  59. diffusers/pipelines/animatediff/pipeline_animatediff.py +66 -8
  60. diffusers/pipelines/audioldm/pipeline_audioldm.py +1 -0
  61. diffusers/pipelines/auto_pipeline.py +23 -13
  62. diffusers/pipelines/consistency_models/pipeline_consistency_models.py +1 -0
  63. diffusers/pipelines/controlnet/pipeline_controlnet.py +238 -35
  64. diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +148 -37
  65. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +155 -41
  66. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +123 -43
  67. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +216 -39
  68. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +106 -34
  69. diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py +1 -0
  70. diffusers/pipelines/ddim/pipeline_ddim.py +1 -0
  71. diffusers/pipelines/ddpm/pipeline_ddpm.py +1 -0
  72. diffusers/pipelines/deepfloyd_if/pipeline_if.py +13 -1
  73. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +13 -1
  74. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +13 -1
  75. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +13 -1
  76. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +13 -1
  77. diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +13 -1
  78. diffusers/pipelines/deprecated/__init__.py +153 -0
  79. diffusers/pipelines/{alt_diffusion → deprecated/alt_diffusion}/__init__.py +3 -3
  80. diffusers/pipelines/{alt_diffusion → deprecated/alt_diffusion}/pipeline_alt_diffusion.py +177 -34
  81. diffusers/pipelines/{alt_diffusion → deprecated/alt_diffusion}/pipeline_alt_diffusion_img2img.py +182 -37
  82. diffusers/pipelines/{alt_diffusion → deprecated/alt_diffusion}/pipeline_output.py +1 -1
  83. diffusers/pipelines/{audio_diffusion → deprecated/audio_diffusion}/__init__.py +1 -1
  84. diffusers/pipelines/{audio_diffusion → deprecated/audio_diffusion}/mel.py +2 -2
  85. diffusers/pipelines/{audio_diffusion → deprecated/audio_diffusion}/pipeline_audio_diffusion.py +4 -4
  86. diffusers/pipelines/{latent_diffusion_uncond → deprecated/latent_diffusion_uncond}/__init__.py +1 -1
  87. diffusers/pipelines/{latent_diffusion_uncond → deprecated/latent_diffusion_uncond}/pipeline_latent_diffusion_uncond.py +4 -4
  88. diffusers/pipelines/{pndm → deprecated/pndm}/__init__.py +1 -1
  89. diffusers/pipelines/{pndm → deprecated/pndm}/pipeline_pndm.py +4 -4
  90. diffusers/pipelines/{repaint → deprecated/repaint}/__init__.py +1 -1
  91. diffusers/pipelines/{repaint → deprecated/repaint}/pipeline_repaint.py +5 -5
  92. diffusers/pipelines/{score_sde_ve → deprecated/score_sde_ve}/__init__.py +1 -1
  93. diffusers/pipelines/{score_sde_ve → deprecated/score_sde_ve}/pipeline_score_sde_ve.py +5 -4
  94. diffusers/pipelines/{spectrogram_diffusion → deprecated/spectrogram_diffusion}/__init__.py +6 -6
  95. diffusers/pipelines/{spectrogram_diffusion/continous_encoder.py → deprecated/spectrogram_diffusion/continuous_encoder.py} +2 -2
  96. diffusers/pipelines/{spectrogram_diffusion → deprecated/spectrogram_diffusion}/midi_utils.py +1 -1
  97. diffusers/pipelines/{spectrogram_diffusion → deprecated/spectrogram_diffusion}/notes_encoder.py +2 -2
  98. diffusers/pipelines/{spectrogram_diffusion → deprecated/spectrogram_diffusion}/pipeline_spectrogram_diffusion.py +8 -7
  99. diffusers/pipelines/deprecated/stable_diffusion_variants/__init__.py +55 -0
  100. diffusers/pipelines/{stable_diffusion → deprecated/stable_diffusion_variants}/pipeline_cycle_diffusion.py +34 -13
  101. diffusers/pipelines/{stable_diffusion → deprecated/stable_diffusion_variants}/pipeline_onnx_stable_diffusion_inpaint_legacy.py +7 -6
  102. diffusers/pipelines/{stable_diffusion → deprecated/stable_diffusion_variants}/pipeline_stable_diffusion_inpaint_legacy.py +12 -11
  103. diffusers/pipelines/{stable_diffusion → deprecated/stable_diffusion_variants}/pipeline_stable_diffusion_model_editing.py +17 -11
  104. diffusers/pipelines/{stable_diffusion → deprecated/stable_diffusion_variants}/pipeline_stable_diffusion_paradigms.py +11 -10
  105. diffusers/pipelines/{stable_diffusion → deprecated/stable_diffusion_variants}/pipeline_stable_diffusion_pix2pix_zero.py +14 -13
  106. diffusers/pipelines/{stochastic_karras_ve → deprecated/stochastic_karras_ve}/__init__.py +1 -1
  107. diffusers/pipelines/{stochastic_karras_ve → deprecated/stochastic_karras_ve}/pipeline_stochastic_karras_ve.py +4 -4
  108. diffusers/pipelines/{versatile_diffusion → deprecated/versatile_diffusion}/__init__.py +3 -3
  109. diffusers/pipelines/{versatile_diffusion → deprecated/versatile_diffusion}/modeling_text_unet.py +83 -51
  110. diffusers/pipelines/{versatile_diffusion → deprecated/versatile_diffusion}/pipeline_versatile_diffusion.py +4 -4
  111. diffusers/pipelines/{versatile_diffusion → deprecated/versatile_diffusion}/pipeline_versatile_diffusion_dual_guided.py +7 -6
  112. diffusers/pipelines/{versatile_diffusion → deprecated/versatile_diffusion}/pipeline_versatile_diffusion_image_variation.py +7 -6
  113. diffusers/pipelines/{versatile_diffusion → deprecated/versatile_diffusion}/pipeline_versatile_diffusion_text_to_image.py +7 -6
  114. diffusers/pipelines/{vq_diffusion → deprecated/vq_diffusion}/__init__.py +3 -3
  115. diffusers/pipelines/{vq_diffusion → deprecated/vq_diffusion}/pipeline_vq_diffusion.py +5 -5
  116. diffusers/pipelines/dit/pipeline_dit.py +1 -0
  117. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +1 -1
  118. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +3 -3
  119. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +1 -1
  120. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +1 -1
  121. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +1 -1
  122. diffusers/pipelines/kandinsky3/__init__.py +49 -0
  123. diffusers/pipelines/kandinsky3/convert_kandinsky3_unet.py +98 -0
  124. diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +589 -0
  125. diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +654 -0
  126. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +111 -11
  127. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +102 -9
  128. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +1 -0
  129. diffusers/pipelines/musicldm/pipeline_musicldm.py +1 -1
  130. diffusers/pipelines/onnx_utils.py +8 -5
  131. diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +7 -2
  132. diffusers/pipelines/pipeline_flax_utils.py +11 -8
  133. diffusers/pipelines/pipeline_utils.py +63 -42
  134. diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +247 -38
  135. diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +3 -3
  136. diffusers/pipelines/stable_diffusion/__init__.py +37 -65
  137. diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +75 -78
  138. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +2 -2
  139. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py +2 -4
  140. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +1 -0
  141. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +174 -11
  142. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +8 -3
  143. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +1 -0
  144. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +178 -11
  145. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +224 -13
  146. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +74 -20
  147. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +4 -0
  148. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +7 -0
  149. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +5 -0
  150. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +5 -0
  151. diffusers/pipelines/stable_diffusion_attend_and_excite/__init__.py +48 -0
  152. diffusers/pipelines/{stable_diffusion → stable_diffusion_attend_and_excite}/pipeline_stable_diffusion_attend_and_excite.py +6 -2
  153. diffusers/pipelines/stable_diffusion_diffedit/__init__.py +48 -0
  154. diffusers/pipelines/{stable_diffusion → stable_diffusion_diffedit}/pipeline_stable_diffusion_diffedit.py +3 -3
  155. diffusers/pipelines/stable_diffusion_gligen/__init__.py +50 -0
  156. diffusers/pipelines/{stable_diffusion → stable_diffusion_gligen}/pipeline_stable_diffusion_gligen.py +3 -2
  157. diffusers/pipelines/{stable_diffusion → stable_diffusion_gligen}/pipeline_stable_diffusion_gligen_text_image.py +4 -3
  158. diffusers/pipelines/stable_diffusion_k_diffusion/__init__.py +60 -0
  159. diffusers/pipelines/{stable_diffusion → stable_diffusion_k_diffusion}/pipeline_stable_diffusion_k_diffusion.py +7 -1
  160. diffusers/pipelines/stable_diffusion_ldm3d/__init__.py +48 -0
  161. diffusers/pipelines/{stable_diffusion → stable_diffusion_ldm3d}/pipeline_stable_diffusion_ldm3d.py +51 -7
  162. diffusers/pipelines/stable_diffusion_panorama/__init__.py +48 -0
  163. diffusers/pipelines/{stable_diffusion → stable_diffusion_panorama}/pipeline_stable_diffusion_panorama.py +57 -8
  164. diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +58 -6
  165. diffusers/pipelines/stable_diffusion_sag/__init__.py +48 -0
  166. diffusers/pipelines/{stable_diffusion → stable_diffusion_sag}/pipeline_stable_diffusion_sag.py +68 -10
  167. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +194 -17
  168. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +205 -16
  169. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +206 -17
  170. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +23 -17
  171. diffusers/pipelines/stable_video_diffusion/__init__.py +58 -0
  172. diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +652 -0
  173. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +108 -12
  174. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +115 -14
  175. diffusers/pipelines/text_to_video_synthesis/__init__.py +2 -0
  176. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +6 -0
  177. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +23 -3
  178. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +334 -10
  179. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +1331 -0
  180. diffusers/pipelines/unclip/pipeline_unclip.py +2 -1
  181. diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +1 -0
  182. diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py +1 -1
  183. diffusers/pipelines/wuerstchen/modeling_wuerstchen_common.py +14 -4
  184. diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +9 -5
  185. diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +1 -1
  186. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_combined.py +2 -2
  187. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +5 -1
  188. diffusers/schedulers/__init__.py +4 -4
  189. diffusers/schedulers/deprecated/__init__.py +50 -0
  190. diffusers/schedulers/{scheduling_karras_ve.py → deprecated/scheduling_karras_ve.py} +4 -4
  191. diffusers/schedulers/{scheduling_sde_vp.py → deprecated/scheduling_sde_vp.py} +4 -6
  192. diffusers/schedulers/scheduling_amused.py +162 -0
  193. diffusers/schedulers/scheduling_consistency_models.py +2 -0
  194. diffusers/schedulers/scheduling_ddim.py +1 -3
  195. diffusers/schedulers/scheduling_ddim_inverse.py +2 -7
  196. diffusers/schedulers/scheduling_ddim_parallel.py +1 -3
  197. diffusers/schedulers/scheduling_ddpm.py +47 -3
  198. diffusers/schedulers/scheduling_ddpm_parallel.py +47 -3
  199. diffusers/schedulers/scheduling_deis_multistep.py +28 -6
  200. diffusers/schedulers/scheduling_dpmsolver_multistep.py +28 -6
  201. diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +28 -6
  202. diffusers/schedulers/scheduling_dpmsolver_sde.py +3 -3
  203. diffusers/schedulers/scheduling_dpmsolver_singlestep.py +28 -6
  204. diffusers/schedulers/scheduling_euler_ancestral_discrete.py +59 -3
  205. diffusers/schedulers/scheduling_euler_discrete.py +102 -16
  206. diffusers/schedulers/scheduling_heun_discrete.py +17 -5
  207. diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +17 -5
  208. diffusers/schedulers/scheduling_k_dpm_2_discrete.py +17 -5
  209. diffusers/schedulers/scheduling_lcm.py +123 -29
  210. diffusers/schedulers/scheduling_lms_discrete.py +3 -3
  211. diffusers/schedulers/scheduling_pndm.py +1 -3
  212. diffusers/schedulers/scheduling_repaint.py +1 -3
  213. diffusers/schedulers/scheduling_unipc_multistep.py +28 -6
  214. diffusers/schedulers/scheduling_utils.py +3 -1
  215. diffusers/schedulers/scheduling_utils_flax.py +3 -1
  216. diffusers/training_utils.py +1 -1
  217. diffusers/utils/__init__.py +1 -2
  218. diffusers/utils/constants.py +10 -12
  219. diffusers/utils/dummy_pt_objects.py +75 -0
  220. diffusers/utils/dummy_torch_and_transformers_objects.py +105 -0
  221. diffusers/utils/dynamic_modules_utils.py +18 -22
  222. diffusers/utils/export_utils.py +8 -3
  223. diffusers/utils/hub_utils.py +24 -36
  224. diffusers/utils/logging.py +11 -11
  225. diffusers/utils/outputs.py +5 -5
  226. diffusers/utils/peft_utils.py +88 -44
  227. diffusers/utils/state_dict_utils.py +8 -0
  228. diffusers/utils/testing_utils.py +199 -1
  229. diffusers/utils/torch_utils.py +4 -4
  230. {diffusers-0.23.1.dist-info → diffusers-0.25.0.dist-info}/METADATA +86 -69
  231. diffusers-0.25.0.dist-info/RECORD +360 -0
  232. {diffusers-0.23.1.dist-info → diffusers-0.25.0.dist-info}/WHEEL +1 -1
  233. {diffusers-0.23.1.dist-info → diffusers-0.25.0.dist-info}/entry_points.txt +0 -1
  234. diffusers/loaders.py +0 -3336
  235. diffusers-0.23.1.dist-info/RECORD +0 -323
  236. /diffusers/pipelines/{alt_diffusion → deprecated/alt_diffusion}/modeling_roberta_series.py +0 -0
  237. {diffusers-0.23.1.dist-info → diffusers-0.25.0.dist-info}/LICENSE +0 -0
  238. {diffusers-0.23.1.dist-info → diffusers-0.25.0.dist-info}/top_level.txt +0 -0
@@ -18,13 +18,25 @@ from typing import Any, Callable, Dict, List, Optional, Tuple, Union
18
18
  import numpy as np
19
19
  import PIL.Image
20
20
  import torch
21
- from transformers import CLIPTextModel, CLIPTextModelWithProjection, CLIPTokenizer
21
+ from transformers import (
22
+ CLIPImageProcessor,
23
+ CLIPTextModel,
24
+ CLIPTextModelWithProjection,
25
+ CLIPTokenizer,
26
+ CLIPVisionModelWithProjection,
27
+ )
22
28
 
23
29
  from ...image_processor import PipelineImageInput, VaeImageProcessor
24
- from ...loaders import FromSingleFileMixin, StableDiffusionXLLoraLoaderMixin, TextualInversionLoaderMixin
25
- from ...models import AutoencoderKL, UNet2DConditionModel
30
+ from ...loaders import (
31
+ FromSingleFileMixin,
32
+ IPAdapterMixin,
33
+ StableDiffusionXLLoraLoaderMixin,
34
+ TextualInversionLoaderMixin,
35
+ )
36
+ from ...models import AutoencoderKL, ImageProjection, UNet2DConditionModel
26
37
  from ...models.attention_processor import (
27
38
  AttnProcessor2_0,
39
+ FusedAttnProcessor2_0,
28
40
  LoRAAttnProcessor2_0,
29
41
  LoRAXFormersAttnProcessor,
30
42
  XFormersAttnProcessor,
@@ -239,17 +251,70 @@ def prepare_mask_and_masked_image(image, mask, height, width, return_image: bool
239
251
 
240
252
 
241
253
  # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.retrieve_latents
242
- def retrieve_latents(encoder_output, generator):
243
- if hasattr(encoder_output, "latent_dist"):
254
+ def retrieve_latents(
255
+ encoder_output: torch.Tensor, generator: Optional[torch.Generator] = None, sample_mode: str = "sample"
256
+ ):
257
+ if hasattr(encoder_output, "latent_dist") and sample_mode == "sample":
244
258
  return encoder_output.latent_dist.sample(generator)
259
+ elif hasattr(encoder_output, "latent_dist") and sample_mode == "argmax":
260
+ return encoder_output.latent_dist.mode()
245
261
  elif hasattr(encoder_output, "latents"):
246
262
  return encoder_output.latents
247
263
  else:
248
264
  raise AttributeError("Could not access latents of provided encoder_output")
249
265
 
250
266
 
267
+ # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.retrieve_timesteps
268
+ def retrieve_timesteps(
269
+ scheduler,
270
+ num_inference_steps: Optional[int] = None,
271
+ device: Optional[Union[str, torch.device]] = None,
272
+ timesteps: Optional[List[int]] = None,
273
+ **kwargs,
274
+ ):
275
+ """
276
+ Calls the scheduler's `set_timesteps` method and retrieves timesteps from the scheduler after the call. Handles
277
+ custom timesteps. Any kwargs will be supplied to `scheduler.set_timesteps`.
278
+
279
+ Args:
280
+ scheduler (`SchedulerMixin`):
281
+ The scheduler to get timesteps from.
282
+ num_inference_steps (`int`):
283
+ The number of diffusion steps used when generating samples with a pre-trained model. If used,
284
+ `timesteps` must be `None`.
285
+ device (`str` or `torch.device`, *optional*):
286
+ The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
287
+ timesteps (`List[int]`, *optional*):
288
+ Custom timesteps used to support arbitrary spacing between timesteps. If `None`, then the default
289
+ timestep spacing strategy of the scheduler is used. If `timesteps` is passed, `num_inference_steps`
290
+ must be `None`.
291
+
292
+ Returns:
293
+ `Tuple[torch.Tensor, int]`: A tuple where the first element is the timestep schedule from the scheduler and the
294
+ second element is the number of inference steps.
295
+ """
296
+ if timesteps is not None:
297
+ accepts_timesteps = "timesteps" in set(inspect.signature(scheduler.set_timesteps).parameters.keys())
298
+ if not accepts_timesteps:
299
+ raise ValueError(
300
+ f"The current scheduler class {scheduler.__class__}'s `set_timesteps` does not support custom"
301
+ f" timestep schedules. Please check whether you are using the correct scheduler."
302
+ )
303
+ scheduler.set_timesteps(timesteps=timesteps, device=device, **kwargs)
304
+ timesteps = scheduler.timesteps
305
+ num_inference_steps = len(timesteps)
306
+ else:
307
+ scheduler.set_timesteps(num_inference_steps, device=device, **kwargs)
308
+ timesteps = scheduler.timesteps
309
+ return timesteps, num_inference_steps
310
+
311
+
251
312
  class StableDiffusionXLInpaintPipeline(
252
- DiffusionPipeline, TextualInversionLoaderMixin, StableDiffusionXLLoraLoaderMixin, FromSingleFileMixin
313
+ DiffusionPipeline,
314
+ TextualInversionLoaderMixin,
315
+ StableDiffusionXLLoraLoaderMixin,
316
+ FromSingleFileMixin,
317
+ IPAdapterMixin,
253
318
  ):
254
319
  r"""
255
320
  Pipeline for text-to-image generation using Stable Diffusion XL.
@@ -257,12 +322,12 @@ class StableDiffusionXLInpaintPipeline(
257
322
  This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the
258
323
  library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)
259
324
 
260
- In addition the pipeline inherits the following loading methods:
261
- - *LoRA*: [`loaders.StableDiffusionXLLoraLoaderMixin.load_lora_weights`]
262
- - *Ckpt*: [`loaders.FromSingleFileMixin.from_single_file`]
263
-
264
- as well as the following saving methods:
265
- - *LoRA*: [`loaders.StableDiffusionXLLoraLoaderMixin.save_lora_weights`]
325
+ The pipeline also inherits the following loading methods:
326
+ - [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings
327
+ - [`~loaders.FromSingleFileMixin.from_single_file`] for loading `.ckpt` files
328
+ - [`~loaders.StableDiffusionXLLoraLoaderMixin.load_lora_weights`] for loading LoRA weights
329
+ - [`~loaders.StableDiffusionXLLoraLoaderMixin.save_lora_weights`] for saving LoRA weights
330
+ - [`~loaders.IPAdapterMixin.load_ip_adapter`] for loading IP Adapters
266
331
 
267
332
  Args:
268
333
  vae ([`AutoencoderKL`]):
@@ -298,9 +363,17 @@ class StableDiffusionXLInpaintPipeline(
298
363
  watermark output images. If not defined, it will default to True if the package is installed, otherwise no
299
364
  watermarker will be used.
300
365
  """
301
- model_cpu_offload_seq = "text_encoder->text_encoder_2->unet->vae"
302
366
 
303
- _optional_components = ["tokenizer", "tokenizer_2", "text_encoder", "text_encoder_2"]
367
+ model_cpu_offload_seq = "text_encoder->text_encoder_2->image_encoder->unet->vae"
368
+
369
+ _optional_components = [
370
+ "tokenizer",
371
+ "tokenizer_2",
372
+ "text_encoder",
373
+ "text_encoder_2",
374
+ "image_encoder",
375
+ "feature_extractor",
376
+ ]
304
377
  _callback_tensor_inputs = [
305
378
  "latents",
306
379
  "prompt_embeds",
@@ -322,6 +395,8 @@ class StableDiffusionXLInpaintPipeline(
322
395
  tokenizer_2: CLIPTokenizer,
323
396
  unet: UNet2DConditionModel,
324
397
  scheduler: KarrasDiffusionSchedulers,
398
+ image_encoder: CLIPVisionModelWithProjection = None,
399
+ feature_extractor: CLIPImageProcessor = None,
325
400
  requires_aesthetics_score: bool = False,
326
401
  force_zeros_for_empty_prompt: bool = True,
327
402
  add_watermarker: Optional[bool] = None,
@@ -335,6 +410,8 @@ class StableDiffusionXLInpaintPipeline(
335
410
  tokenizer=tokenizer,
336
411
  tokenizer_2=tokenizer_2,
337
412
  unet=unet,
413
+ image_encoder=image_encoder,
414
+ feature_extractor=feature_extractor,
338
415
  scheduler=scheduler,
339
416
  )
340
417
  self.register_to_config(force_zeros_for_empty_prompt=force_zeros_for_empty_prompt)
@@ -385,6 +462,31 @@ class StableDiffusionXLInpaintPipeline(
385
462
  """
386
463
  self.vae.disable_tiling()
387
464
 
465
+ # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.encode_image
466
+ def encode_image(self, image, device, num_images_per_prompt, output_hidden_states=None):
467
+ dtype = next(self.image_encoder.parameters()).dtype
468
+
469
+ if not isinstance(image, torch.Tensor):
470
+ image = self.feature_extractor(image, return_tensors="pt").pixel_values
471
+
472
+ image = image.to(device=device, dtype=dtype)
473
+ if output_hidden_states:
474
+ image_enc_hidden_states = self.image_encoder(image, output_hidden_states=True).hidden_states[-2]
475
+ image_enc_hidden_states = image_enc_hidden_states.repeat_interleave(num_images_per_prompt, dim=0)
476
+ uncond_image_enc_hidden_states = self.image_encoder(
477
+ torch.zeros_like(image), output_hidden_states=True
478
+ ).hidden_states[-2]
479
+ uncond_image_enc_hidden_states = uncond_image_enc_hidden_states.repeat_interleave(
480
+ num_images_per_prompt, dim=0
481
+ )
482
+ return image_enc_hidden_states, uncond_image_enc_hidden_states
483
+ else:
484
+ image_embeds = self.image_encoder(image).image_embeds
485
+ image_embeds = image_embeds.repeat_interleave(num_images_per_prompt, dim=0)
486
+ uncond_image_embeds = torch.zeros_like(image_embeds)
487
+
488
+ return image_embeds, uncond_image_embeds
489
+
388
490
  # Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.encode_prompt
389
491
  def encode_prompt(
390
492
  self,
@@ -741,10 +843,11 @@ class StableDiffusionXLInpaintPipeline(
741
843
 
742
844
  if image.shape[1] == 4:
743
845
  image_latents = image.to(device=device, dtype=dtype)
846
+ image_latents = image_latents.repeat(batch_size // image_latents.shape[0], 1, 1, 1)
744
847
  elif return_image_latents or (latents is None and not is_strength_max):
745
848
  image = image.to(device=device, dtype=dtype)
746
849
  image_latents = self._encode_vae_image(image=image, generator=generator)
747
- image_latents = image_latents.repeat(batch_size // image_latents.shape[0], 1, 1, 1)
850
+ image_latents = image_latents.repeat(batch_size // image_latents.shape[0], 1, 1, 1)
748
851
 
749
852
  if latents is None and add_noise:
750
853
  noise = randn_tensor(shape, generator=generator, device=device, dtype=dtype)
@@ -982,6 +1085,67 @@ class StableDiffusionXLInpaintPipeline(
982
1085
  """Disables the FreeU mechanism if enabled."""
983
1086
  self.unet.disable_freeu()
984
1087
 
1088
+ # Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.fuse_qkv_projections
1089
+ def fuse_qkv_projections(self, unet: bool = True, vae: bool = True):
1090
+ """
1091
+ Enables fused QKV projections. For self-attention modules, all projection matrices (i.e., query,
1092
+ key, value) are fused. For cross-attention modules, key and value projection matrices are fused.
1093
+
1094
+ <Tip warning={true}>
1095
+
1096
+ This API is 🧪 experimental.
1097
+
1098
+ </Tip>
1099
+
1100
+ Args:
1101
+ unet (`bool`, defaults to `True`): To apply fusion on the UNet.
1102
+ vae (`bool`, defaults to `True`): To apply fusion on the VAE.
1103
+ """
1104
+ self.fusing_unet = False
1105
+ self.fusing_vae = False
1106
+
1107
+ if unet:
1108
+ self.fusing_unet = True
1109
+ self.unet.fuse_qkv_projections()
1110
+ self.unet.set_attn_processor(FusedAttnProcessor2_0())
1111
+
1112
+ if vae:
1113
+ if not isinstance(self.vae, AutoencoderKL):
1114
+ raise ValueError("`fuse_qkv_projections()` is only supported for the VAE of type `AutoencoderKL`.")
1115
+
1116
+ self.fusing_vae = True
1117
+ self.vae.fuse_qkv_projections()
1118
+ self.vae.set_attn_processor(FusedAttnProcessor2_0())
1119
+
1120
+ # Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.unfuse_qkv_projections
1121
+ def unfuse_qkv_projections(self, unet: bool = True, vae: bool = True):
1122
+ """Disable QKV projection fusion if enabled.
1123
+
1124
+ <Tip warning={true}>
1125
+
1126
+ This API is 🧪 experimental.
1127
+
1128
+ </Tip>
1129
+
1130
+ Args:
1131
+ unet (`bool`, defaults to `True`): To apply fusion on the UNet.
1132
+ vae (`bool`, defaults to `True`): To apply fusion on the VAE.
1133
+
1134
+ """
1135
+ if unet:
1136
+ if not self.fusing_unet:
1137
+ logger.warning("The UNet was not initially fused for QKV projections. Doing nothing.")
1138
+ else:
1139
+ self.unet.unfuse_qkv_projections()
1140
+ self.fusing_unet = False
1141
+
1142
+ if vae:
1143
+ if not self.fusing_vae:
1144
+ logger.warning("The VAE was not initially fused for QKV projections. Doing nothing.")
1145
+ else:
1146
+ self.vae.unfuse_qkv_projections()
1147
+ self.fusing_vae = False
1148
+
985
1149
  # Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding
986
1150
  def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32):
987
1151
  """
@@ -1046,6 +1210,10 @@ class StableDiffusionXLInpaintPipeline(
1046
1210
  def num_timesteps(self):
1047
1211
  return self._num_timesteps
1048
1212
 
1213
+ @property
1214
+ def interrupt(self):
1215
+ return self._interrupt
1216
+
1049
1217
  @torch.no_grad()
1050
1218
  @replace_example_docstring(EXAMPLE_DOC_STRING)
1051
1219
  def __call__(
@@ -1059,6 +1227,7 @@ class StableDiffusionXLInpaintPipeline(
1059
1227
  width: Optional[int] = None,
1060
1228
  strength: float = 0.9999,
1061
1229
  num_inference_steps: int = 50,
1230
+ timesteps: List[int] = None,
1062
1231
  denoising_start: Optional[float] = None,
1063
1232
  denoising_end: Optional[float] = None,
1064
1233
  guidance_scale: float = 7.5,
@@ -1072,6 +1241,7 @@ class StableDiffusionXLInpaintPipeline(
1072
1241
  negative_prompt_embeds: Optional[torch.FloatTensor] = None,
1073
1242
  pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
1074
1243
  negative_pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
1244
+ ip_adapter_image: Optional[PipelineImageInput] = None,
1075
1245
  output_type: Optional[str] = "pil",
1076
1246
  return_dict: bool = True,
1077
1247
  cross_attention_kwargs: Optional[Dict[str, Any]] = None,
@@ -1128,6 +1298,10 @@ class StableDiffusionXLInpaintPipeline(
1128
1298
  num_inference_steps (`int`, *optional*, defaults to 50):
1129
1299
  The number of denoising steps. More denoising steps usually lead to a higher quality image at the
1130
1300
  expense of slower inference.
1301
+ timesteps (`List[int]`, *optional*):
1302
+ Custom timesteps to use for the denoising process with schedulers which support a `timesteps` argument
1303
+ in their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is
1304
+ passed will be used. Must be in descending order.
1131
1305
  denoising_start (`float`, *optional*):
1132
1306
  When specified, indicates the fraction (between 0.0 and 1.0) of the total denoising process to be
1133
1307
  bypassed before it is initiated. Consequently, the initial part of the denoising process is skipped and
@@ -1170,6 +1344,7 @@ class StableDiffusionXLInpaintPipeline(
1170
1344
  Pre-generated negative pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
1171
1345
  weighting. If not provided, pooled negative_prompt_embeds will be generated from `negative_prompt`
1172
1346
  input argument.
1347
+ ip_adapter_image: (`PipelineImageInput`, *optional*): Optional image input to work with IP Adapters.
1173
1348
  num_images_per_prompt (`int`, *optional*, defaults to 1):
1174
1349
  The number of images to generate per prompt.
1175
1350
  eta (`float`, *optional*, defaults to 0.0):
@@ -1240,7 +1415,7 @@ class StableDiffusionXLInpaintPipeline(
1240
1415
  callback_on_step_end_tensor_inputs (`List`, *optional*):
1241
1416
  The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
1242
1417
  will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
1243
- `._callback_tensor_inputs` attribute of your pipeine class.
1418
+ `._callback_tensor_inputs` attribute of your pipeline class.
1244
1419
 
1245
1420
  Examples:
1246
1421
 
@@ -1291,6 +1466,7 @@ class StableDiffusionXLInpaintPipeline(
1291
1466
  self._cross_attention_kwargs = cross_attention_kwargs
1292
1467
  self._denoising_end = denoising_end
1293
1468
  self._denoising_start = denoising_start
1469
+ self._interrupt = False
1294
1470
 
1295
1471
  # 2. Define call parameters
1296
1472
  if prompt is not None and isinstance(prompt, str):
@@ -1332,7 +1508,7 @@ class StableDiffusionXLInpaintPipeline(
1332
1508
  def denoising_value_valid(dnv):
1333
1509
  return isinstance(self.denoising_end, float) and 0 < dnv < 1
1334
1510
 
1335
- self.scheduler.set_timesteps(num_inference_steps, device=device)
1511
+ timesteps, num_inference_steps = retrieve_timesteps(self.scheduler, num_inference_steps, device, timesteps)
1336
1512
  timesteps, num_inference_steps = self.get_timesteps(
1337
1513
  num_inference_steps,
1338
1514
  strength,
@@ -1469,6 +1645,15 @@ class StableDiffusionXLInpaintPipeline(
1469
1645
  add_text_embeds = add_text_embeds.to(device)
1470
1646
  add_time_ids = add_time_ids.to(device)
1471
1647
 
1648
+ if ip_adapter_image is not None:
1649
+ output_hidden_state = False if isinstance(self.unet.encoder_hid_proj, ImageProjection) else True
1650
+ image_embeds, negative_image_embeds = self.encode_image(
1651
+ ip_adapter_image, device, num_images_per_prompt, output_hidden_state
1652
+ )
1653
+ if self.do_classifier_free_guidance:
1654
+ image_embeds = torch.cat([negative_image_embeds, image_embeds])
1655
+ image_embeds = image_embeds.to(device)
1656
+
1472
1657
  # 11. Denoising loop
1473
1658
  num_warmup_steps = max(len(timesteps) - num_inference_steps * self.scheduler.order, 0)
1474
1659
 
@@ -1504,6 +1689,8 @@ class StableDiffusionXLInpaintPipeline(
1504
1689
  self._num_timesteps = len(timesteps)
1505
1690
  with self.progress_bar(total=num_inference_steps) as progress_bar:
1506
1691
  for i, t in enumerate(timesteps):
1692
+ if self.interrupt:
1693
+ continue
1507
1694
  # expand the latents if we are doing classifier free guidance
1508
1695
  latent_model_input = torch.cat([latents] * 2) if self.do_classifier_free_guidance else latents
1509
1696
 
@@ -1515,6 +1702,8 @@ class StableDiffusionXLInpaintPipeline(
1515
1702
 
1516
1703
  # predict the noise residual
1517
1704
  added_cond_kwargs = {"text_embeds": add_text_embeds, "time_ids": add_time_ids}
1705
+ if ip_adapter_image is not None:
1706
+ added_cond_kwargs["image_embeds"] = image_embeds
1518
1707
  noise_pred = self.unet(
1519
1708
  latent_model_input,
1520
1709
  t,
@@ -24,6 +24,7 @@ from ...loaders import FromSingleFileMixin, StableDiffusionXLLoraLoaderMixin, Te
24
24
  from ...models import AutoencoderKL, UNet2DConditionModel
25
25
  from ...models.attention_processor import (
26
26
  AttnProcessor2_0,
27
+ FusedAttnProcessor2_0,
27
28
  LoRAAttnProcessor2_0,
28
29
  LoRAXFormersAttnProcessor,
29
30
  XFormersAttnProcessor,
@@ -88,6 +89,20 @@ EXAMPLE_DOC_STRING = """
88
89
  """
89
90
 
90
91
 
92
+ # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.retrieve_latents
93
+ def retrieve_latents(
94
+ encoder_output: torch.Tensor, generator: Optional[torch.Generator] = None, sample_mode: str = "sample"
95
+ ):
96
+ if hasattr(encoder_output, "latent_dist") and sample_mode == "sample":
97
+ return encoder_output.latent_dist.sample(generator)
98
+ elif hasattr(encoder_output, "latent_dist") and sample_mode == "argmax":
99
+ return encoder_output.latent_dist.mode()
100
+ elif hasattr(encoder_output, "latents"):
101
+ return encoder_output.latents
102
+ else:
103
+ raise AttributeError("Could not access latents of provided encoder_output")
104
+
105
+
91
106
  def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
92
107
  """
93
108
  Rescale `noise_cfg` according to `guidance_rescale`. Based on findings of [Common Diffusion Noise Schedules and
@@ -111,11 +126,11 @@ class StableDiffusionXLInstructPix2PixPipeline(
111
126
  This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the
112
127
  library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)
113
128
 
114
- In addition the pipeline inherits the following loading methods:
115
- - *LoRA*: [`loaders.StableDiffusionXLLoraLoaderMixin.load_lora_weights`]
116
-
117
- as well as the following saving methods:
118
- - *LoRA*: [`loaders.StableDiffusionXLLoraLoaderMixin.save_lora_weights`]
129
+ The pipeline also inherits the following loading methods:
130
+ - [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings
131
+ - [`~loaders.FromSingleFileMixin.from_single_file`] for loading `.ckpt` files
132
+ - [`~loaders.StableDiffusionXLLoraLoaderMixin.load_lora_weights`] for loading LoRA weights
133
+ - [`~loaders.StableDiffusionXLLoraLoaderMixin.save_lora_weights`] for saving LoRA weights
119
134
 
120
135
  Args:
121
136
  vae ([`AutoencoderKL`]):
@@ -151,6 +166,7 @@ class StableDiffusionXLInstructPix2PixPipeline(
151
166
  watermark output images. If not defined, it will default to True if the package is installed, otherwise no
152
167
  watermarker will be used.
153
168
  """
169
+
154
170
  model_cpu_offload_seq = "text_encoder->text_encoder_2->unet->vae"
155
171
  _optional_components = ["tokenizer", "tokenizer_2", "text_encoder", "text_encoder_2"]
156
172
 
@@ -532,17 +548,7 @@ class StableDiffusionXLInstructPix2PixPipeline(
532
548
  self.upcast_vae()
533
549
  image = image.to(next(iter(self.vae.post_quant_conv.parameters())).dtype)
534
550
 
535
- if isinstance(generator, list) and len(generator) != batch_size:
536
- raise ValueError(
537
- f"You have passed a list of generators of length {len(generator)}, but requested an effective batch"
538
- f" size of {batch_size}. Make sure the batch size matches the length of the generators."
539
- )
540
-
541
- if isinstance(generator, list):
542
- image_latents = [self.vae.encode(image[i : i + 1]).latent_dist.mode() for i in range(batch_size)]
543
- image_latents = torch.cat(image_latents, dim=0)
544
- else:
545
- image_latents = self.vae.encode(image).latent_dist.mode()
551
+ image_latents = retrieve_latents(self.vae.encode(image), sample_mode="argmax")
546
552
 
547
553
  # cast back to fp16 if needed
548
554
  if needs_upcasting:
@@ -605,6 +611,7 @@ class StableDiffusionXLInstructPix2PixPipeline(
605
611
  XFormersAttnProcessor,
606
612
  LoRAXFormersAttnProcessor,
607
613
  LoRAAttnProcessor2_0,
614
+ FusedAttnProcessor2_0,
608
615
  ),
609
616
  )
610
617
  # if xformers or torch_2_0 is used attention block does not need
@@ -865,7 +872,6 @@ class StableDiffusionXLInstructPix2PixPipeline(
865
872
  prompt_embeds.dtype,
866
873
  device,
867
874
  do_classifier_free_guidance,
868
- generator,
869
875
  )
870
876
 
871
877
  # 7. Prepare latent variables
@@ -0,0 +1,58 @@
1
+ from typing import TYPE_CHECKING
2
+
3
+ from ...utils import (
4
+ DIFFUSERS_SLOW_IMPORT,
5
+ BaseOutput,
6
+ OptionalDependencyNotAvailable,
7
+ _LazyModule,
8
+ get_objects_from_module,
9
+ is_torch_available,
10
+ is_transformers_available,
11
+ )
12
+
13
+
14
+ _dummy_objects = {}
15
+ _import_structure = {}
16
+
17
+ try:
18
+ if not (is_transformers_available() and is_torch_available()):
19
+ raise OptionalDependencyNotAvailable()
20
+ except OptionalDependencyNotAvailable:
21
+ from ...utils import dummy_torch_and_transformers_objects
22
+
23
+ _dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
24
+ else:
25
+ _import_structure.update(
26
+ {
27
+ "pipeline_stable_video_diffusion": [
28
+ "StableVideoDiffusionPipeline",
29
+ "StableVideoDiffusionPipelineOutput",
30
+ ],
31
+ }
32
+ )
33
+
34
+
35
+ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
36
+ try:
37
+ if not (is_transformers_available() and is_torch_available()):
38
+ raise OptionalDependencyNotAvailable()
39
+ except OptionalDependencyNotAvailable:
40
+ from ...utils.dummy_torch_and_transformers_objects import *
41
+ else:
42
+ from .pipeline_stable_video_diffusion import (
43
+ StableVideoDiffusionPipeline,
44
+ StableVideoDiffusionPipelineOutput,
45
+ )
46
+
47
+ else:
48
+ import sys
49
+
50
+ sys.modules[__name__] = _LazyModule(
51
+ __name__,
52
+ globals()["__file__"],
53
+ _import_structure,
54
+ module_spec=__spec__,
55
+ )
56
+
57
+ for name, value in _dummy_objects.items():
58
+ setattr(sys.modules[__name__], name, value)