diffusers 0.24.0__py3-none-any.whl → 0.25.0__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (174) hide show
  1. diffusers/__init__.py +11 -1
  2. diffusers/commands/fp16_safetensors.py +10 -11
  3. diffusers/configuration_utils.py +12 -8
  4. diffusers/dependency_versions_table.py +2 -1
  5. diffusers/experimental/rl/value_guided_sampling.py +1 -1
  6. diffusers/image_processor.py +286 -46
  7. diffusers/loaders/ip_adapter.py +11 -9
  8. diffusers/loaders/lora.py +198 -60
  9. diffusers/loaders/single_file.py +24 -18
  10. diffusers/loaders/textual_inversion.py +10 -14
  11. diffusers/loaders/unet.py +130 -37
  12. diffusers/models/__init__.py +18 -12
  13. diffusers/models/activations.py +9 -6
  14. diffusers/models/attention.py +137 -16
  15. diffusers/models/attention_processor.py +133 -46
  16. diffusers/models/autoencoders/__init__.py +5 -0
  17. diffusers/models/{autoencoder_asym_kl.py → autoencoders/autoencoder_asym_kl.py} +4 -4
  18. diffusers/models/{autoencoder_kl.py → autoencoders/autoencoder_kl.py} +45 -6
  19. diffusers/models/{autoencoder_kl_temporal_decoder.py → autoencoders/autoencoder_kl_temporal_decoder.py} +8 -8
  20. diffusers/models/{autoencoder_tiny.py → autoencoders/autoencoder_tiny.py} +4 -4
  21. diffusers/models/{consistency_decoder_vae.py → autoencoders/consistency_decoder_vae.py} +14 -14
  22. diffusers/models/{vae.py → autoencoders/vae.py} +9 -5
  23. diffusers/models/downsampling.py +338 -0
  24. diffusers/models/embeddings.py +112 -29
  25. diffusers/models/modeling_flax_utils.py +12 -7
  26. diffusers/models/modeling_utils.py +10 -10
  27. diffusers/models/normalization.py +108 -2
  28. diffusers/models/resnet.py +15 -699
  29. diffusers/models/transformer_2d.py +2 -2
  30. diffusers/models/unet_2d_condition.py +37 -0
  31. diffusers/models/{unet_kandi3.py → unet_kandinsky3.py} +105 -159
  32. diffusers/models/upsampling.py +454 -0
  33. diffusers/models/uvit_2d.py +471 -0
  34. diffusers/models/vq_model.py +9 -2
  35. diffusers/pipelines/__init__.py +81 -73
  36. diffusers/pipelines/amused/__init__.py +62 -0
  37. diffusers/pipelines/amused/pipeline_amused.py +328 -0
  38. diffusers/pipelines/amused/pipeline_amused_img2img.py +347 -0
  39. diffusers/pipelines/amused/pipeline_amused_inpaint.py +378 -0
  40. diffusers/pipelines/animatediff/pipeline_animatediff.py +38 -10
  41. diffusers/pipelines/auto_pipeline.py +17 -13
  42. diffusers/pipelines/controlnet/pipeline_controlnet.py +27 -10
  43. diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +47 -5
  44. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +25 -8
  45. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +4 -6
  46. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +26 -10
  47. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +4 -3
  48. diffusers/pipelines/deprecated/__init__.py +153 -0
  49. diffusers/pipelines/{alt_diffusion → deprecated/alt_diffusion}/__init__.py +3 -3
  50. diffusers/pipelines/{alt_diffusion → deprecated/alt_diffusion}/pipeline_alt_diffusion.py +91 -18
  51. diffusers/pipelines/{alt_diffusion → deprecated/alt_diffusion}/pipeline_alt_diffusion_img2img.py +91 -18
  52. diffusers/pipelines/{alt_diffusion → deprecated/alt_diffusion}/pipeline_output.py +1 -1
  53. diffusers/pipelines/{audio_diffusion → deprecated/audio_diffusion}/__init__.py +1 -1
  54. diffusers/pipelines/{audio_diffusion → deprecated/audio_diffusion}/mel.py +2 -2
  55. diffusers/pipelines/{audio_diffusion → deprecated/audio_diffusion}/pipeline_audio_diffusion.py +4 -4
  56. diffusers/pipelines/{latent_diffusion_uncond → deprecated/latent_diffusion_uncond}/__init__.py +1 -1
  57. diffusers/pipelines/{latent_diffusion_uncond → deprecated/latent_diffusion_uncond}/pipeline_latent_diffusion_uncond.py +4 -4
  58. diffusers/pipelines/{pndm → deprecated/pndm}/__init__.py +1 -1
  59. diffusers/pipelines/{pndm → deprecated/pndm}/pipeline_pndm.py +4 -4
  60. diffusers/pipelines/{repaint → deprecated/repaint}/__init__.py +1 -1
  61. diffusers/pipelines/{repaint → deprecated/repaint}/pipeline_repaint.py +5 -5
  62. diffusers/pipelines/{score_sde_ve → deprecated/score_sde_ve}/__init__.py +1 -1
  63. diffusers/pipelines/{score_sde_ve → deprecated/score_sde_ve}/pipeline_score_sde_ve.py +4 -4
  64. diffusers/pipelines/{spectrogram_diffusion → deprecated/spectrogram_diffusion}/__init__.py +6 -6
  65. diffusers/pipelines/{spectrogram_diffusion/continous_encoder.py → deprecated/spectrogram_diffusion/continuous_encoder.py} +2 -2
  66. diffusers/pipelines/{spectrogram_diffusion → deprecated/spectrogram_diffusion}/midi_utils.py +1 -1
  67. diffusers/pipelines/{spectrogram_diffusion → deprecated/spectrogram_diffusion}/notes_encoder.py +2 -2
  68. diffusers/pipelines/{spectrogram_diffusion → deprecated/spectrogram_diffusion}/pipeline_spectrogram_diffusion.py +7 -7
  69. diffusers/pipelines/deprecated/stable_diffusion_variants/__init__.py +55 -0
  70. diffusers/pipelines/{stable_diffusion → deprecated/stable_diffusion_variants}/pipeline_cycle_diffusion.py +16 -11
  71. diffusers/pipelines/{stable_diffusion → deprecated/stable_diffusion_variants}/pipeline_onnx_stable_diffusion_inpaint_legacy.py +6 -6
  72. diffusers/pipelines/{stable_diffusion → deprecated/stable_diffusion_variants}/pipeline_stable_diffusion_inpaint_legacy.py +11 -11
  73. diffusers/pipelines/{stable_diffusion → deprecated/stable_diffusion_variants}/pipeline_stable_diffusion_model_editing.py +16 -11
  74. diffusers/pipelines/{stable_diffusion → deprecated/stable_diffusion_variants}/pipeline_stable_diffusion_paradigms.py +10 -10
  75. diffusers/pipelines/{stable_diffusion → deprecated/stable_diffusion_variants}/pipeline_stable_diffusion_pix2pix_zero.py +13 -13
  76. diffusers/pipelines/{stochastic_karras_ve → deprecated/stochastic_karras_ve}/__init__.py +1 -1
  77. diffusers/pipelines/{stochastic_karras_ve → deprecated/stochastic_karras_ve}/pipeline_stochastic_karras_ve.py +4 -4
  78. diffusers/pipelines/{versatile_diffusion → deprecated/versatile_diffusion}/__init__.py +3 -3
  79. diffusers/pipelines/{versatile_diffusion → deprecated/versatile_diffusion}/modeling_text_unet.py +54 -11
  80. diffusers/pipelines/{versatile_diffusion → deprecated/versatile_diffusion}/pipeline_versatile_diffusion.py +4 -4
  81. diffusers/pipelines/{versatile_diffusion → deprecated/versatile_diffusion}/pipeline_versatile_diffusion_dual_guided.py +6 -6
  82. diffusers/pipelines/{versatile_diffusion → deprecated/versatile_diffusion}/pipeline_versatile_diffusion_image_variation.py +6 -6
  83. diffusers/pipelines/{versatile_diffusion → deprecated/versatile_diffusion}/pipeline_versatile_diffusion_text_to_image.py +6 -6
  84. diffusers/pipelines/{vq_diffusion → deprecated/vq_diffusion}/__init__.py +3 -3
  85. diffusers/pipelines/{vq_diffusion → deprecated/vq_diffusion}/pipeline_vq_diffusion.py +5 -5
  86. diffusers/pipelines/kandinsky3/__init__.py +4 -4
  87. diffusers/pipelines/kandinsky3/convert_kandinsky3_unet.py +98 -0
  88. diffusers/pipelines/kandinsky3/{kandinsky3_pipeline.py → pipeline_kandinsky3.py} +172 -35
  89. diffusers/pipelines/kandinsky3/{kandinsky3img2img_pipeline.py → pipeline_kandinsky3_img2img.py} +228 -34
  90. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +46 -5
  91. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +47 -6
  92. diffusers/pipelines/onnx_utils.py +8 -5
  93. diffusers/pipelines/pipeline_flax_utils.py +7 -6
  94. diffusers/pipelines/pipeline_utils.py +30 -29
  95. diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +51 -2
  96. diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +3 -3
  97. diffusers/pipelines/stable_diffusion/__init__.py +1 -72
  98. diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +67 -75
  99. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +92 -8
  100. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +92 -8
  101. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +138 -10
  102. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +57 -7
  103. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +3 -0
  104. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +6 -0
  105. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +5 -0
  106. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +5 -0
  107. diffusers/pipelines/stable_diffusion_attend_and_excite/__init__.py +48 -0
  108. diffusers/pipelines/{stable_diffusion → stable_diffusion_attend_and_excite}/pipeline_stable_diffusion_attend_and_excite.py +5 -2
  109. diffusers/pipelines/stable_diffusion_diffedit/__init__.py +48 -0
  110. diffusers/pipelines/{stable_diffusion → stable_diffusion_diffedit}/pipeline_stable_diffusion_diffedit.py +2 -3
  111. diffusers/pipelines/stable_diffusion_gligen/__init__.py +50 -0
  112. diffusers/pipelines/{stable_diffusion → stable_diffusion_gligen}/pipeline_stable_diffusion_gligen.py +2 -2
  113. diffusers/pipelines/{stable_diffusion → stable_diffusion_gligen}/pipeline_stable_diffusion_gligen_text_image.py +3 -3
  114. diffusers/pipelines/stable_diffusion_k_diffusion/__init__.py +60 -0
  115. diffusers/pipelines/{stable_diffusion → stable_diffusion_k_diffusion}/pipeline_stable_diffusion_k_diffusion.py +6 -1
  116. diffusers/pipelines/stable_diffusion_ldm3d/__init__.py +48 -0
  117. diffusers/pipelines/{stable_diffusion → stable_diffusion_ldm3d}/pipeline_stable_diffusion_ldm3d.py +50 -7
  118. diffusers/pipelines/stable_diffusion_panorama/__init__.py +48 -0
  119. diffusers/pipelines/{stable_diffusion → stable_diffusion_panorama}/pipeline_stable_diffusion_panorama.py +56 -8
  120. diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +58 -6
  121. diffusers/pipelines/stable_diffusion_sag/__init__.py +48 -0
  122. diffusers/pipelines/{stable_diffusion → stable_diffusion_sag}/pipeline_stable_diffusion_sag.py +67 -10
  123. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +97 -15
  124. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +98 -14
  125. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +97 -14
  126. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +7 -5
  127. diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +12 -9
  128. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +6 -0
  129. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +5 -0
  130. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +5 -0
  131. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +331 -9
  132. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +468 -9
  133. diffusers/pipelines/unclip/pipeline_unclip.py +2 -1
  134. diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +1 -0
  135. diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py +1 -1
  136. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +4 -0
  137. diffusers/schedulers/__init__.py +2 -0
  138. diffusers/schedulers/scheduling_amused.py +162 -0
  139. diffusers/schedulers/scheduling_consistency_models.py +2 -0
  140. diffusers/schedulers/scheduling_ddim_inverse.py +1 -4
  141. diffusers/schedulers/scheduling_ddpm.py +46 -0
  142. diffusers/schedulers/scheduling_ddpm_parallel.py +46 -0
  143. diffusers/schedulers/scheduling_deis_multistep.py +13 -1
  144. diffusers/schedulers/scheduling_dpmsolver_multistep.py +13 -1
  145. diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +13 -1
  146. diffusers/schedulers/scheduling_dpmsolver_sde.py +2 -0
  147. diffusers/schedulers/scheduling_dpmsolver_singlestep.py +13 -1
  148. diffusers/schedulers/scheduling_euler_ancestral_discrete.py +58 -0
  149. diffusers/schedulers/scheduling_euler_discrete.py +62 -3
  150. diffusers/schedulers/scheduling_heun_discrete.py +2 -0
  151. diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +2 -0
  152. diffusers/schedulers/scheduling_k_dpm_2_discrete.py +2 -0
  153. diffusers/schedulers/scheduling_lms_discrete.py +2 -0
  154. diffusers/schedulers/scheduling_unipc_multistep.py +13 -1
  155. diffusers/schedulers/scheduling_utils.py +3 -1
  156. diffusers/schedulers/scheduling_utils_flax.py +3 -1
  157. diffusers/training_utils.py +1 -1
  158. diffusers/utils/__init__.py +0 -2
  159. diffusers/utils/constants.py +2 -5
  160. diffusers/utils/dummy_pt_objects.py +30 -0
  161. diffusers/utils/dummy_torch_and_transformers_objects.py +45 -0
  162. diffusers/utils/dynamic_modules_utils.py +14 -18
  163. diffusers/utils/hub_utils.py +24 -36
  164. diffusers/utils/logging.py +1 -1
  165. diffusers/utils/state_dict_utils.py +8 -0
  166. diffusers/utils/testing_utils.py +199 -1
  167. diffusers/utils/torch_utils.py +3 -3
  168. {diffusers-0.24.0.dist-info → diffusers-0.25.0.dist-info}/METADATA +54 -53
  169. {diffusers-0.24.0.dist-info → diffusers-0.25.0.dist-info}/RECORD +174 -155
  170. {diffusers-0.24.0.dist-info → diffusers-0.25.0.dist-info}/WHEEL +1 -1
  171. {diffusers-0.24.0.dist-info → diffusers-0.25.0.dist-info}/entry_points.txt +0 -1
  172. /diffusers/pipelines/{alt_diffusion → deprecated/alt_diffusion}/modeling_roberta_series.py +0 -0
  173. {diffusers-0.24.0.dist-info → diffusers-0.25.0.dist-info}/LICENSE +0 -0
  174. {diffusers-0.24.0.dist-info → diffusers-0.25.0.dist-info}/top_level.txt +0 -0
@@ -24,7 +24,7 @@ from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer, CLIPV
24
24
 
25
25
  from ...image_processor import PipelineImageInput, VaeImageProcessor
26
26
  from ...loaders import FromSingleFileMixin, IPAdapterMixin, LoraLoaderMixin, TextualInversionLoaderMixin
27
- from ...models import AutoencoderKL, ControlNetModel, UNet2DConditionModel
27
+ from ...models import AutoencoderKL, ControlNetModel, ImageProjection, UNet2DConditionModel
28
28
  from ...models.lora import adjust_lora_scale_text_encoder
29
29
  from ...schedulers import KarrasDiffusionSchedulers
30
30
  from ...utils import (
@@ -147,6 +147,9 @@ class StableDiffusionControlNetPipeline(
147
147
 
148
148
  The pipeline also inherits the following loading methods:
149
149
  - [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings
150
+ - [`~loaders.LoraLoaderMixin.load_lora_weights`] for loading LoRA weights
151
+ - [`~loaders.LoraLoaderMixin.save_lora_weights`] for saving LoRA weights
152
+ - [`~loaders.FromSingleFileMixin.from_single_file`] for loading `.ckpt` files
150
153
  - [`~loaders.IPAdapterMixin.load_ip_adapter`] for loading IP Adapters
151
154
 
152
155
  Args:
@@ -173,7 +176,7 @@ class StableDiffusionControlNetPipeline(
173
176
  A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
174
177
  """
175
178
 
176
- model_cpu_offload_seq = "text_encoder->unet->vae"
179
+ model_cpu_offload_seq = "text_encoder->image_encoder->unet->vae"
177
180
  _optional_components = ["safety_checker", "feature_extractor", "image_encoder"]
178
181
  _exclude_from_cpu_offload = ["safety_checker"]
179
182
  _callback_tensor_inputs = ["latents", "prompt_embeds", "negative_prompt_embeds"]
@@ -479,18 +482,29 @@ class StableDiffusionControlNetPipeline(
479
482
  return prompt_embeds, negative_prompt_embeds
480
483
 
481
484
  # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.encode_image
482
- def encode_image(self, image, device, num_images_per_prompt):
485
+ def encode_image(self, image, device, num_images_per_prompt, output_hidden_states=None):
483
486
  dtype = next(self.image_encoder.parameters()).dtype
484
487
 
485
488
  if not isinstance(image, torch.Tensor):
486
489
  image = self.feature_extractor(image, return_tensors="pt").pixel_values
487
490
 
488
491
  image = image.to(device=device, dtype=dtype)
489
- image_embeds = self.image_encoder(image).image_embeds
490
- image_embeds = image_embeds.repeat_interleave(num_images_per_prompt, dim=0)
492
+ if output_hidden_states:
493
+ image_enc_hidden_states = self.image_encoder(image, output_hidden_states=True).hidden_states[-2]
494
+ image_enc_hidden_states = image_enc_hidden_states.repeat_interleave(num_images_per_prompt, dim=0)
495
+ uncond_image_enc_hidden_states = self.image_encoder(
496
+ torch.zeros_like(image), output_hidden_states=True
497
+ ).hidden_states[-2]
498
+ uncond_image_enc_hidden_states = uncond_image_enc_hidden_states.repeat_interleave(
499
+ num_images_per_prompt, dim=0
500
+ )
501
+ return image_enc_hidden_states, uncond_image_enc_hidden_states
502
+ else:
503
+ image_embeds = self.image_encoder(image).image_embeds
504
+ image_embeds = image_embeds.repeat_interleave(num_images_per_prompt, dim=0)
505
+ uncond_image_embeds = torch.zeros_like(image_embeds)
491
506
 
492
- uncond_image_embeds = torch.zeros_like(image_embeds)
493
- return image_embeds, uncond_image_embeds
507
+ return image_embeds, uncond_image_embeds
494
508
 
495
509
  # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.run_safety_checker
496
510
  def run_safety_checker(self, image, device, dtype):
@@ -619,7 +633,7 @@ class StableDiffusionControlNetPipeline(
619
633
  # When `image` is a nested list:
620
634
  # (e.g. [[canny_image_1, pose_image_1], [canny_image_2, pose_image_2]])
621
635
  elif any(isinstance(i, list) for i in image):
622
- raise ValueError("A single batch of multiple conditionings are supported at the moment.")
636
+ raise ValueError("A single batch of multiple conditionings is not supported at the moment.")
623
637
  elif len(image) != len(self.controlnet.nets):
624
638
  raise ValueError(
625
639
  f"For multiple controlnets: `image` must have the same length as the number of controlnets, but got {len(image)} images and {len(self.controlnet.nets)} ControlNets."
@@ -645,7 +659,7 @@ class StableDiffusionControlNetPipeline(
645
659
  ):
646
660
  if isinstance(controlnet_conditioning_scale, list):
647
661
  if any(isinstance(i, list) for i in controlnet_conditioning_scale):
648
- raise ValueError("A single batch of multiple conditionings are supported at the moment.")
662
+ raise ValueError("A single batch of multiple conditionings is not supported at the moment.")
649
663
  elif isinstance(controlnet_conditioning_scale, list) and len(controlnet_conditioning_scale) != len(
650
664
  self.controlnet.nets
651
665
  ):
@@ -1067,7 +1081,10 @@ class StableDiffusionControlNetPipeline(
1067
1081
  prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds])
1068
1082
 
1069
1083
  if ip_adapter_image is not None:
1070
- image_embeds, negative_image_embeds = self.encode_image(ip_adapter_image, device, num_images_per_prompt)
1084
+ output_hidden_state = False if isinstance(self.unet.encoder_hid_proj, ImageProjection) else True
1085
+ image_embeds, negative_image_embeds = self.encode_image(
1086
+ ip_adapter_image, device, num_images_per_prompt, output_hidden_state
1087
+ )
1071
1088
  if self.do_classifier_free_guidance:
1072
1089
  image_embeds = torch.cat([negative_image_embeds, image_embeds])
1073
1090
 
@@ -19,10 +19,10 @@ import numpy as np
19
19
  import PIL.Image
20
20
  import torch
21
21
  import torch.nn.functional as F
22
- from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer
22
+ from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer, CLIPVisionModelWithProjection
23
23
 
24
24
  from ...image_processor import PipelineImageInput, VaeImageProcessor
25
- from ...loaders import FromSingleFileMixin, LoraLoaderMixin, TextualInversionLoaderMixin
25
+ from ...loaders import FromSingleFileMixin, IPAdapterMixin, LoraLoaderMixin, TextualInversionLoaderMixin
26
26
  from ...models import AutoencoderKL, ControlNetModel, UNet2DConditionModel
27
27
  from ...models.lora import adjust_lora_scale_text_encoder
28
28
  from ...schedulers import KarrasDiffusionSchedulers
@@ -130,7 +130,7 @@ def prepare_image(image):
130
130
 
131
131
 
132
132
  class StableDiffusionControlNetImg2ImgPipeline(
133
- DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin, FromSingleFileMixin
133
+ DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin, IPAdapterMixin, FromSingleFileMixin
134
134
  ):
135
135
  r"""
136
136
  Pipeline for image-to-image generation using Stable Diffusion with ControlNet guidance.
@@ -140,6 +140,10 @@ class StableDiffusionControlNetImg2ImgPipeline(
140
140
 
141
141
  The pipeline also inherits the following loading methods:
142
142
  - [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings
143
+ - [`~loaders.LoraLoaderMixin.load_lora_weights`] for loading LoRA weights
144
+ - [`~loaders.LoraLoaderMixin.save_lora_weights`] for saving LoRA weights
145
+ - [`~loaders.FromSingleFileMixin.from_single_file`] for loading `.ckpt` files
146
+ - [`~loaders.IPAdapterMixin.load_ip_adapter`] for loading IP Adapters
143
147
 
144
148
  Args:
145
149
  vae ([`AutoencoderKL`]):
@@ -166,7 +170,7 @@ class StableDiffusionControlNetImg2ImgPipeline(
166
170
  """
167
171
 
168
172
  model_cpu_offload_seq = "text_encoder->unet->vae"
169
- _optional_components = ["safety_checker", "feature_extractor"]
173
+ _optional_components = ["safety_checker", "feature_extractor", "image_encoder"]
170
174
  _exclude_from_cpu_offload = ["safety_checker"]
171
175
  _callback_tensor_inputs = ["latents", "prompt_embeds", "negative_prompt_embeds"]
172
176
 
@@ -180,6 +184,7 @@ class StableDiffusionControlNetImg2ImgPipeline(
180
184
  scheduler: KarrasDiffusionSchedulers,
181
185
  safety_checker: StableDiffusionSafetyChecker,
182
186
  feature_extractor: CLIPImageProcessor,
187
+ image_encoder: CLIPVisionModelWithProjection = None,
183
188
  requires_safety_checker: bool = True,
184
189
  ):
185
190
  super().__init__()
@@ -212,6 +217,7 @@ class StableDiffusionControlNetImg2ImgPipeline(
212
217
  scheduler=scheduler,
213
218
  safety_checker=safety_checker,
214
219
  feature_extractor=feature_extractor,
220
+ image_encoder=image_encoder,
215
221
  )
216
222
  self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
217
223
  self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor, do_convert_rgb=True)
@@ -468,6 +474,31 @@ class StableDiffusionControlNetImg2ImgPipeline(
468
474
 
469
475
  return prompt_embeds, negative_prompt_embeds
470
476
 
477
+ # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.encode_image
478
+ def encode_image(self, image, device, num_images_per_prompt, output_hidden_states=None):
479
+ dtype = next(self.image_encoder.parameters()).dtype
480
+
481
+ if not isinstance(image, torch.Tensor):
482
+ image = self.feature_extractor(image, return_tensors="pt").pixel_values
483
+
484
+ image = image.to(device=device, dtype=dtype)
485
+ if output_hidden_states:
486
+ image_enc_hidden_states = self.image_encoder(image, output_hidden_states=True).hidden_states[-2]
487
+ image_enc_hidden_states = image_enc_hidden_states.repeat_interleave(num_images_per_prompt, dim=0)
488
+ uncond_image_enc_hidden_states = self.image_encoder(
489
+ torch.zeros_like(image), output_hidden_states=True
490
+ ).hidden_states[-2]
491
+ uncond_image_enc_hidden_states = uncond_image_enc_hidden_states.repeat_interleave(
492
+ num_images_per_prompt, dim=0
493
+ )
494
+ return image_enc_hidden_states, uncond_image_enc_hidden_states
495
+ else:
496
+ image_embeds = self.image_encoder(image).image_embeds
497
+ image_embeds = image_embeds.repeat_interleave(num_images_per_prompt, dim=0)
498
+ uncond_image_embeds = torch.zeros_like(image_embeds)
499
+
500
+ return image_embeds, uncond_image_embeds
501
+
471
502
  # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.run_safety_checker
472
503
  def run_safety_checker(self, image, device, dtype):
473
504
  if self.safety_checker is None:
@@ -861,6 +892,7 @@ class StableDiffusionControlNetImg2ImgPipeline(
861
892
  latents: Optional[torch.FloatTensor] = None,
862
893
  prompt_embeds: Optional[torch.FloatTensor] = None,
863
894
  negative_prompt_embeds: Optional[torch.FloatTensor] = None,
895
+ ip_adapter_image: Optional[PipelineImageInput] = None,
864
896
  output_type: Optional[str] = "pil",
865
897
  return_dict: bool = True,
866
898
  cross_attention_kwargs: Optional[Dict[str, Any]] = None,
@@ -922,6 +954,7 @@ class StableDiffusionControlNetImg2ImgPipeline(
922
954
  negative_prompt_embeds (`torch.FloatTensor`, *optional*):
923
955
  Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If
924
956
  not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument.
957
+ ip_adapter_image: (`PipelineImageInput`, *optional*): Optional image input to work with IP Adapters.
925
958
  output_type (`str`, *optional*, defaults to `"pil"`):
926
959
  The output format of the generated image. Choose between `PIL.Image` or `np.array`.
927
960
  return_dict (`bool`, *optional*, defaults to `True`):
@@ -1053,6 +1086,11 @@ class StableDiffusionControlNetImg2ImgPipeline(
1053
1086
  if self.do_classifier_free_guidance:
1054
1087
  prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds])
1055
1088
 
1089
+ if ip_adapter_image is not None:
1090
+ image_embeds, negative_image_embeds = self.encode_image(ip_adapter_image, device, num_images_per_prompt)
1091
+ if self.do_classifier_free_guidance:
1092
+ image_embeds = torch.cat([negative_image_embeds, image_embeds])
1093
+
1056
1094
  # 4. Prepare image
1057
1095
  image = self.image_processor.preprocess(image, height=height, width=width).to(dtype=torch.float32)
1058
1096
 
@@ -1111,7 +1149,10 @@ class StableDiffusionControlNetImg2ImgPipeline(
1111
1149
  # 7. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
1112
1150
  extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta)
1113
1151
 
1114
- # 7.1 Create tensor stating which controlnets to keep
1152
+ # 7.1 Add image embeds for IP-Adapter
1153
+ added_cond_kwargs = {"image_embeds": image_embeds} if ip_adapter_image is not None else None
1154
+
1155
+ # 7.2 Create tensor stating which controlnets to keep
1115
1156
  controlnet_keep = []
1116
1157
  for i in range(len(timesteps)):
1117
1158
  keeps = [
@@ -1171,6 +1212,7 @@ class StableDiffusionControlNetImg2ImgPipeline(
1171
1212
  cross_attention_kwargs=self.cross_attention_kwargs,
1172
1213
  down_block_additional_residuals=down_block_res_samples,
1173
1214
  mid_block_additional_residual=mid_block_res_sample,
1215
+ added_cond_kwargs=added_cond_kwargs,
1174
1216
  return_dict=False,
1175
1217
  )[0]
1176
1218
 
@@ -25,7 +25,7 @@ from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer, CLIPV
25
25
 
26
26
  from ...image_processor import PipelineImageInput, VaeImageProcessor
27
27
  from ...loaders import FromSingleFileMixin, IPAdapterMixin, LoraLoaderMixin, TextualInversionLoaderMixin
28
- from ...models import AutoencoderKL, ControlNetModel, UNet2DConditionModel
28
+ from ...models import AutoencoderKL, ControlNetModel, ImageProjection, UNet2DConditionModel
29
29
  from ...models.lora import adjust_lora_scale_text_encoder
30
30
  from ...schedulers import KarrasDiffusionSchedulers
31
31
  from ...utils import (
@@ -251,6 +251,9 @@ class StableDiffusionControlNetInpaintPipeline(
251
251
 
252
252
  The pipeline also inherits the following loading methods:
253
253
  - [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings
254
+ - [`~loaders.LoraLoaderMixin.load_lora_weights`] for loading LoRA weights
255
+ - [`~loaders.LoraLoaderMixin.save_lora_weights`] for saving LoRA weights
256
+ - [`~loaders.FromSingleFileMixin.from_single_file`] for loading `.ckpt` files
254
257
  - [`~loaders.IPAdapterMixin.load_ip_adapter`] for loading IP Adapters
255
258
 
256
259
  <Tip>
@@ -288,7 +291,7 @@ class StableDiffusionControlNetInpaintPipeline(
288
291
  A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
289
292
  """
290
293
 
291
- model_cpu_offload_seq = "text_encoder->unet->vae"
294
+ model_cpu_offload_seq = "text_encoder->image_encoder->unet->vae"
292
295
  _optional_components = ["safety_checker", "feature_extractor", "image_encoder"]
293
296
  _exclude_from_cpu_offload = ["safety_checker"]
294
297
  _callback_tensor_inputs = ["latents", "prompt_embeds", "negative_prompt_embeds"]
@@ -597,18 +600,29 @@ class StableDiffusionControlNetInpaintPipeline(
597
600
  return prompt_embeds, negative_prompt_embeds
598
601
 
599
602
  # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.encode_image
600
- def encode_image(self, image, device, num_images_per_prompt):
603
+ def encode_image(self, image, device, num_images_per_prompt, output_hidden_states=None):
601
604
  dtype = next(self.image_encoder.parameters()).dtype
602
605
 
603
606
  if not isinstance(image, torch.Tensor):
604
607
  image = self.feature_extractor(image, return_tensors="pt").pixel_values
605
608
 
606
609
  image = image.to(device=device, dtype=dtype)
607
- image_embeds = self.image_encoder(image).image_embeds
608
- image_embeds = image_embeds.repeat_interleave(num_images_per_prompt, dim=0)
610
+ if output_hidden_states:
611
+ image_enc_hidden_states = self.image_encoder(image, output_hidden_states=True).hidden_states[-2]
612
+ image_enc_hidden_states = image_enc_hidden_states.repeat_interleave(num_images_per_prompt, dim=0)
613
+ uncond_image_enc_hidden_states = self.image_encoder(
614
+ torch.zeros_like(image), output_hidden_states=True
615
+ ).hidden_states[-2]
616
+ uncond_image_enc_hidden_states = uncond_image_enc_hidden_states.repeat_interleave(
617
+ num_images_per_prompt, dim=0
618
+ )
619
+ return image_enc_hidden_states, uncond_image_enc_hidden_states
620
+ else:
621
+ image_embeds = self.image_encoder(image).image_embeds
622
+ image_embeds = image_embeds.repeat_interleave(num_images_per_prompt, dim=0)
623
+ uncond_image_embeds = torch.zeros_like(image_embeds)
609
624
 
610
- uncond_image_embeds = torch.zeros_like(image_embeds)
611
- return image_embeds, uncond_image_embeds
625
+ return image_embeds, uncond_image_embeds
612
626
 
613
627
  # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.run_safety_checker
614
628
  def run_safety_checker(self, image, device, dtype):
@@ -1284,7 +1298,10 @@ class StableDiffusionControlNetInpaintPipeline(
1284
1298
  prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds])
1285
1299
 
1286
1300
  if ip_adapter_image is not None:
1287
- image_embeds, negative_image_embeds = self.encode_image(ip_adapter_image, device, num_images_per_prompt)
1301
+ output_hidden_state = False if isinstance(self.unet.encoder_hid_proj, ImageProjection) else True
1302
+ image_embeds, negative_image_embeds = self.encode_image(
1303
+ ip_adapter_image, device, num_images_per_prompt, output_hidden_state
1304
+ )
1288
1305
  if self.do_classifier_free_guidance:
1289
1306
  image_embeds = torch.cat([negative_image_embeds, image_embeds])
1290
1307
 
@@ -148,12 +148,10 @@ class StableDiffusionXLControlNetInpaintPipeline(
148
148
  This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the
149
149
  library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)
150
150
 
151
- In addition the pipeline inherits the following loading methods:
152
- - *LoRA*: [`loaders.StableDiffusionXLLoraLoaderMixin.load_lora_weights`]
153
- - *Ckpt*: [`loaders.FromSingleFileMixin.from_single_file`]
154
-
155
- as well as the following saving methods:
156
- - *LoRA*: [`loaders.StableDiffusionXLLoraLoaderMixin.save_lora_weights`]
151
+ The pipeline also inherits the following loading methods:
152
+ - [`~loaders.StableDiffusionXLLoraLoaderMixin.load_lora_weights`] for loading LoRA weights
153
+ - [`~loaders.StableDiffusionXLLoraLoaderMixin.save_lora_weights`] for saving LoRA weights
154
+ - [`~loaders.FromSingleFileMixin.from_single_file`] for loading `.ckpt` files
157
155
 
158
156
  Args:
159
157
  vae ([`AutoencoderKL`]):
@@ -37,7 +37,7 @@ from ...loaders import (
37
37
  StableDiffusionXLLoraLoaderMixin,
38
38
  TextualInversionLoaderMixin,
39
39
  )
40
- from ...models import AutoencoderKL, ControlNetModel, UNet2DConditionModel
40
+ from ...models import AutoencoderKL, ControlNetModel, ImageProjection, UNet2DConditionModel
41
41
  from ...models.attention_processor import (
42
42
  AttnProcessor2_0,
43
43
  LoRAAttnProcessor2_0,
@@ -129,8 +129,10 @@ class StableDiffusionXLControlNetPipeline(
129
129
 
130
130
  The pipeline also inherits the following loading methods:
131
131
  - [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings
132
- - [`loaders.StableDiffusionXLLoraLoaderMixin.load_lora_weights`] for loading LoRA weights
133
- - [`loaders.FromSingleFileMixin.from_single_file`] for loading `.ckpt` files
132
+ - [`~loaders.StableDiffusionXLLoraLoaderMixin.load_lora_weights`] for loading LoRA weights
133
+ - [`~loaders.StableDiffusionXLLoraLoaderMixin.save_lora_weights`] for saving LoRA weights
134
+ - [`~loaders.FromSingleFileMixin.from_single_file`] for loading `.ckpt` files
135
+ - [`~loaders.IPAdapterMixin.load_ip_adapter`] for loading IP Adapters
134
136
 
135
137
  Args:
136
138
  vae ([`AutoencoderKL`]):
@@ -163,7 +165,7 @@ class StableDiffusionXLControlNetPipeline(
163
165
  """
164
166
 
165
167
  # leave controlnet out on purpose because it iterates with unet
166
- model_cpu_offload_seq = "text_encoder->text_encoder_2->unet->vae"
168
+ model_cpu_offload_seq = "text_encoder->text_encoder_2->image_encoder->unet->vae"
167
169
  _optional_components = [
168
170
  "tokenizer",
169
171
  "tokenizer_2",
@@ -489,18 +491,29 @@ class StableDiffusionXLControlNetPipeline(
489
491
  return prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds
490
492
 
491
493
  # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.encode_image
492
- def encode_image(self, image, device, num_images_per_prompt):
494
+ def encode_image(self, image, device, num_images_per_prompt, output_hidden_states=None):
493
495
  dtype = next(self.image_encoder.parameters()).dtype
494
496
 
495
497
  if not isinstance(image, torch.Tensor):
496
498
  image = self.feature_extractor(image, return_tensors="pt").pixel_values
497
499
 
498
500
  image = image.to(device=device, dtype=dtype)
499
- image_embeds = self.image_encoder(image).image_embeds
500
- image_embeds = image_embeds.repeat_interleave(num_images_per_prompt, dim=0)
501
+ if output_hidden_states:
502
+ image_enc_hidden_states = self.image_encoder(image, output_hidden_states=True).hidden_states[-2]
503
+ image_enc_hidden_states = image_enc_hidden_states.repeat_interleave(num_images_per_prompt, dim=0)
504
+ uncond_image_enc_hidden_states = self.image_encoder(
505
+ torch.zeros_like(image), output_hidden_states=True
506
+ ).hidden_states[-2]
507
+ uncond_image_enc_hidden_states = uncond_image_enc_hidden_states.repeat_interleave(
508
+ num_images_per_prompt, dim=0
509
+ )
510
+ return image_enc_hidden_states, uncond_image_enc_hidden_states
511
+ else:
512
+ image_embeds = self.image_encoder(image).image_embeds
513
+ image_embeds = image_embeds.repeat_interleave(num_images_per_prompt, dim=0)
514
+ uncond_image_embeds = torch.zeros_like(image_embeds)
501
515
 
502
- uncond_image_embeds = torch.zeros_like(image_embeds)
503
- return image_embeds, uncond_image_embeds
516
+ return image_embeds, uncond_image_embeds
504
517
 
505
518
  # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_extra_step_kwargs
506
519
  def prepare_extra_step_kwargs(self, generator, eta):
@@ -1169,7 +1182,10 @@ class StableDiffusionXLControlNetPipeline(
1169
1182
 
1170
1183
  # 3.2 Encode ip_adapter_image
1171
1184
  if ip_adapter_image is not None:
1172
- image_embeds, negative_image_embeds = self.encode_image(ip_adapter_image, device, num_images_per_prompt)
1185
+ output_hidden_state = False if isinstance(self.unet.encoder_hid_proj, ImageProjection) else True
1186
+ image_embeds, negative_image_embeds = self.encode_image(
1187
+ ip_adapter_image, device, num_images_per_prompt, output_hidden_state
1188
+ )
1173
1189
  if self.do_classifier_free_guidance:
1174
1190
  image_embeds = torch.cat([negative_image_embeds, image_embeds])
1175
1191
 
@@ -155,9 +155,10 @@ class StableDiffusionXLControlNetImg2ImgPipeline(
155
155
  This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the
156
156
  library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)
157
157
 
158
- In addition the pipeline inherits the following loading methods:
159
- - *Textual-Inversion*: [`loaders.TextualInversionLoaderMixin.load_textual_inversion`]
160
- - *LoRA*: [`loaders.StableDiffusionXLLoraLoaderMixin.load_lora_weights`]
158
+ The pipeline also inherits the following loading methods:
159
+ - [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings
160
+ - [`~loaders.StableDiffusionXLLoraLoaderMixin.load_lora_weights`] for loading LoRA weights
161
+ - [`~loaders.StableDiffusionXLLoraLoaderMixin.save_lora_weights`] for saving LoRA weights
161
162
 
162
163
  Args:
163
164
  vae ([`AutoencoderKL`]):
@@ -0,0 +1,153 @@
1
+ from typing import TYPE_CHECKING
2
+
3
+ from ...utils import (
4
+ DIFFUSERS_SLOW_IMPORT,
5
+ OptionalDependencyNotAvailable,
6
+ _LazyModule,
7
+ get_objects_from_module,
8
+ is_librosa_available,
9
+ is_note_seq_available,
10
+ is_torch_available,
11
+ is_transformers_available,
12
+ )
13
+
14
+
15
+ _dummy_objects = {}
16
+ _import_structure = {}
17
+
18
+ try:
19
+ if not is_torch_available():
20
+ raise OptionalDependencyNotAvailable()
21
+ except OptionalDependencyNotAvailable:
22
+ from ...utils import dummy_pt_objects
23
+
24
+ _dummy_objects.update(get_objects_from_module(dummy_pt_objects))
25
+ else:
26
+ _import_structure["latent_diffusion_uncond"] = ["LDMPipeline"]
27
+ _import_structure["pndm"] = ["PNDMPipeline"]
28
+ _import_structure["repaint"] = ["RePaintPipeline"]
29
+ _import_structure["score_sde_ve"] = ["ScoreSdeVePipeline"]
30
+ _import_structure["stochastic_karras_ve"] = ["KarrasVePipeline"]
31
+
32
+ try:
33
+ if not (is_transformers_available() and is_torch_available()):
34
+ raise OptionalDependencyNotAvailable()
35
+ except OptionalDependencyNotAvailable:
36
+ from ...utils import dummy_torch_and_transformers_objects
37
+
38
+ _dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
39
+ else:
40
+ _import_structure["alt_diffusion"] = [
41
+ "AltDiffusionImg2ImgPipeline",
42
+ "AltDiffusionPipeline",
43
+ "AltDiffusionPipelineOutput",
44
+ ]
45
+ _import_structure["versatile_diffusion"] = [
46
+ "VersatileDiffusionDualGuidedPipeline",
47
+ "VersatileDiffusionImageVariationPipeline",
48
+ "VersatileDiffusionPipeline",
49
+ "VersatileDiffusionTextToImagePipeline",
50
+ ]
51
+ _import_structure["vq_diffusion"] = ["VQDiffusionPipeline"]
52
+ _import_structure["stable_diffusion_variants"] = [
53
+ "CycleDiffusionPipeline",
54
+ "StableDiffusionInpaintPipelineLegacy",
55
+ "StableDiffusionPix2PixZeroPipeline",
56
+ "StableDiffusionParadigmsPipeline",
57
+ "StableDiffusionModelEditingPipeline",
58
+ ]
59
+
60
+ try:
61
+ if not (is_torch_available() and is_librosa_available()):
62
+ raise OptionalDependencyNotAvailable()
63
+ except OptionalDependencyNotAvailable:
64
+ from ...utils import dummy_torch_and_librosa_objects # noqa F403
65
+
66
+ _dummy_objects.update(get_objects_from_module(dummy_torch_and_librosa_objects))
67
+
68
+ else:
69
+ _import_structure["audio_diffusion"] = ["AudioDiffusionPipeline", "Mel"]
70
+
71
+ try:
72
+ if not (is_transformers_available() and is_torch_available() and is_note_seq_available()):
73
+ raise OptionalDependencyNotAvailable()
74
+ except OptionalDependencyNotAvailable:
75
+ from ...utils import dummy_transformers_and_torch_and_note_seq_objects # noqa F403
76
+
77
+ _dummy_objects.update(get_objects_from_module(dummy_transformers_and_torch_and_note_seq_objects))
78
+
79
+ else:
80
+ _import_structure["spectrogram_diffusion"] = ["MidiProcessor", "SpectrogramDiffusionPipeline"]
81
+
82
+
83
+ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
84
+ try:
85
+ if not is_torch_available():
86
+ raise OptionalDependencyNotAvailable()
87
+ except OptionalDependencyNotAvailable:
88
+ from ...utils.dummy_pt_objects import *
89
+
90
+ else:
91
+ from .latent_diffusion_uncond import LDMPipeline
92
+ from .pndm import PNDMPipeline
93
+ from .repaint import RePaintPipeline
94
+ from .score_sde_ve import ScoreSdeVePipeline
95
+ from .stochastic_karras_ve import KarrasVePipeline
96
+
97
+ try:
98
+ if not (is_transformers_available() and is_torch_available()):
99
+ raise OptionalDependencyNotAvailable()
100
+ except OptionalDependencyNotAvailable:
101
+ from ...utils.dummy_torch_and_transformers_objects import *
102
+
103
+ else:
104
+ from .alt_diffusion import AltDiffusionImg2ImgPipeline, AltDiffusionPipeline, AltDiffusionPipelineOutput
105
+ from .audio_diffusion import AudioDiffusionPipeline, Mel
106
+ from .spectrogram_diffusion import SpectrogramDiffusionPipeline
107
+ from .stable_diffusion_variants import (
108
+ CycleDiffusionPipeline,
109
+ StableDiffusionInpaintPipelineLegacy,
110
+ StableDiffusionModelEditingPipeline,
111
+ StableDiffusionParadigmsPipeline,
112
+ StableDiffusionPix2PixZeroPipeline,
113
+ )
114
+ from .stochastic_karras_ve import KarrasVePipeline
115
+ from .versatile_diffusion import (
116
+ VersatileDiffusionDualGuidedPipeline,
117
+ VersatileDiffusionImageVariationPipeline,
118
+ VersatileDiffusionPipeline,
119
+ VersatileDiffusionTextToImagePipeline,
120
+ )
121
+ from .vq_diffusion import VQDiffusionPipeline
122
+
123
+ try:
124
+ if not (is_torch_available() and is_librosa_available()):
125
+ raise OptionalDependencyNotAvailable()
126
+ except OptionalDependencyNotAvailable:
127
+ from ...utils.dummy_torch_and_librosa_objects import *
128
+ else:
129
+ from .audio_diffusion import AudioDiffusionPipeline, Mel
130
+
131
+ try:
132
+ if not (is_transformers_available() and is_torch_available() and is_note_seq_available()):
133
+ raise OptionalDependencyNotAvailable()
134
+ except OptionalDependencyNotAvailable:
135
+ from ...utils.dummy_transformers_and_torch_and_note_seq_objects import * # noqa F403
136
+ else:
137
+ from .spectrogram_diffusion import (
138
+ MidiProcessor,
139
+ SpectrogramDiffusionPipeline,
140
+ )
141
+
142
+
143
+ else:
144
+ import sys
145
+
146
+ sys.modules[__name__] = _LazyModule(
147
+ __name__,
148
+ globals()["__file__"],
149
+ _import_structure,
150
+ module_spec=__spec__,
151
+ )
152
+ for name, value in _dummy_objects.items():
153
+ setattr(sys.modules[__name__], name, value)
@@ -1,6 +1,6 @@
1
1
  from typing import TYPE_CHECKING
2
2
 
3
- from ...utils import (
3
+ from ....utils import (
4
4
  DIFFUSERS_SLOW_IMPORT,
5
5
  OptionalDependencyNotAvailable,
6
6
  _LazyModule,
@@ -17,7 +17,7 @@ try:
17
17
  if not (is_transformers_available() and is_torch_available()):
18
18
  raise OptionalDependencyNotAvailable()
19
19
  except OptionalDependencyNotAvailable:
20
- from ...utils import dummy_torch_and_transformers_objects
20
+ from ....utils import dummy_torch_and_transformers_objects
21
21
 
22
22
  _dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
23
23
  else:
@@ -32,7 +32,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
32
32
  if not (is_transformers_available() and is_torch_available()):
33
33
  raise OptionalDependencyNotAvailable()
34
34
  except OptionalDependencyNotAvailable:
35
- from ...utils.dummy_torch_and_transformers_objects import *
35
+ from ....utils.dummy_torch_and_transformers_objects import *
36
36
 
37
37
  else:
38
38
  from .modeling_roberta_series import RobertaSeriesModelWithTransformation