diffusers 0.24.0__py3-none-any.whl → 0.25.1__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (174) hide show
  1. diffusers/__init__.py +11 -1
  2. diffusers/commands/fp16_safetensors.py +10 -11
  3. diffusers/configuration_utils.py +12 -8
  4. diffusers/dependency_versions_table.py +3 -2
  5. diffusers/experimental/rl/value_guided_sampling.py +1 -1
  6. diffusers/image_processor.py +286 -46
  7. diffusers/loaders/ip_adapter.py +11 -9
  8. diffusers/loaders/lora.py +198 -60
  9. diffusers/loaders/single_file.py +24 -18
  10. diffusers/loaders/textual_inversion.py +10 -14
  11. diffusers/loaders/unet.py +130 -37
  12. diffusers/models/__init__.py +18 -12
  13. diffusers/models/activations.py +9 -6
  14. diffusers/models/attention.py +137 -16
  15. diffusers/models/attention_processor.py +133 -46
  16. diffusers/models/autoencoders/__init__.py +5 -0
  17. diffusers/models/{autoencoder_asym_kl.py → autoencoders/autoencoder_asym_kl.py} +4 -4
  18. diffusers/models/{autoencoder_kl.py → autoencoders/autoencoder_kl.py} +45 -6
  19. diffusers/models/{autoencoder_kl_temporal_decoder.py → autoencoders/autoencoder_kl_temporal_decoder.py} +8 -8
  20. diffusers/models/{autoencoder_tiny.py → autoencoders/autoencoder_tiny.py} +4 -4
  21. diffusers/models/{consistency_decoder_vae.py → autoencoders/consistency_decoder_vae.py} +14 -14
  22. diffusers/models/{vae.py → autoencoders/vae.py} +9 -5
  23. diffusers/models/downsampling.py +338 -0
  24. diffusers/models/embeddings.py +112 -29
  25. diffusers/models/modeling_flax_utils.py +12 -7
  26. diffusers/models/modeling_utils.py +10 -10
  27. diffusers/models/normalization.py +108 -2
  28. diffusers/models/resnet.py +15 -699
  29. diffusers/models/transformer_2d.py +2 -2
  30. diffusers/models/unet_2d_condition.py +37 -0
  31. diffusers/models/{unet_kandi3.py → unet_kandinsky3.py} +105 -159
  32. diffusers/models/upsampling.py +454 -0
  33. diffusers/models/uvit_2d.py +471 -0
  34. diffusers/models/vq_model.py +9 -2
  35. diffusers/pipelines/__init__.py +81 -73
  36. diffusers/pipelines/amused/__init__.py +62 -0
  37. diffusers/pipelines/amused/pipeline_amused.py +328 -0
  38. diffusers/pipelines/amused/pipeline_amused_img2img.py +347 -0
  39. diffusers/pipelines/amused/pipeline_amused_inpaint.py +378 -0
  40. diffusers/pipelines/animatediff/pipeline_animatediff.py +38 -10
  41. diffusers/pipelines/auto_pipeline.py +17 -13
  42. diffusers/pipelines/controlnet/pipeline_controlnet.py +27 -10
  43. diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +47 -5
  44. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +25 -8
  45. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +4 -6
  46. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +26 -10
  47. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +4 -3
  48. diffusers/pipelines/deprecated/__init__.py +153 -0
  49. diffusers/pipelines/{alt_diffusion → deprecated/alt_diffusion}/__init__.py +3 -3
  50. diffusers/pipelines/{alt_diffusion → deprecated/alt_diffusion}/pipeline_alt_diffusion.py +91 -18
  51. diffusers/pipelines/{alt_diffusion → deprecated/alt_diffusion}/pipeline_alt_diffusion_img2img.py +91 -18
  52. diffusers/pipelines/{alt_diffusion → deprecated/alt_diffusion}/pipeline_output.py +1 -1
  53. diffusers/pipelines/{audio_diffusion → deprecated/audio_diffusion}/__init__.py +1 -1
  54. diffusers/pipelines/{audio_diffusion → deprecated/audio_diffusion}/mel.py +2 -2
  55. diffusers/pipelines/{audio_diffusion → deprecated/audio_diffusion}/pipeline_audio_diffusion.py +4 -4
  56. diffusers/pipelines/{latent_diffusion_uncond → deprecated/latent_diffusion_uncond}/__init__.py +1 -1
  57. diffusers/pipelines/{latent_diffusion_uncond → deprecated/latent_diffusion_uncond}/pipeline_latent_diffusion_uncond.py +4 -4
  58. diffusers/pipelines/{pndm → deprecated/pndm}/__init__.py +1 -1
  59. diffusers/pipelines/{pndm → deprecated/pndm}/pipeline_pndm.py +4 -4
  60. diffusers/pipelines/{repaint → deprecated/repaint}/__init__.py +1 -1
  61. diffusers/pipelines/{repaint → deprecated/repaint}/pipeline_repaint.py +5 -5
  62. diffusers/pipelines/{score_sde_ve → deprecated/score_sde_ve}/__init__.py +1 -1
  63. diffusers/pipelines/{score_sde_ve → deprecated/score_sde_ve}/pipeline_score_sde_ve.py +4 -4
  64. diffusers/pipelines/{spectrogram_diffusion → deprecated/spectrogram_diffusion}/__init__.py +6 -6
  65. diffusers/pipelines/{spectrogram_diffusion/continous_encoder.py → deprecated/spectrogram_diffusion/continuous_encoder.py} +2 -2
  66. diffusers/pipelines/{spectrogram_diffusion → deprecated/spectrogram_diffusion}/midi_utils.py +1 -1
  67. diffusers/pipelines/{spectrogram_diffusion → deprecated/spectrogram_diffusion}/notes_encoder.py +2 -2
  68. diffusers/pipelines/{spectrogram_diffusion → deprecated/spectrogram_diffusion}/pipeline_spectrogram_diffusion.py +7 -7
  69. diffusers/pipelines/deprecated/stable_diffusion_variants/__init__.py +55 -0
  70. diffusers/pipelines/{stable_diffusion → deprecated/stable_diffusion_variants}/pipeline_cycle_diffusion.py +16 -11
  71. diffusers/pipelines/{stable_diffusion → deprecated/stable_diffusion_variants}/pipeline_onnx_stable_diffusion_inpaint_legacy.py +6 -6
  72. diffusers/pipelines/{stable_diffusion → deprecated/stable_diffusion_variants}/pipeline_stable_diffusion_inpaint_legacy.py +11 -11
  73. diffusers/pipelines/{stable_diffusion → deprecated/stable_diffusion_variants}/pipeline_stable_diffusion_model_editing.py +16 -11
  74. diffusers/pipelines/{stable_diffusion → deprecated/stable_diffusion_variants}/pipeline_stable_diffusion_paradigms.py +10 -10
  75. diffusers/pipelines/{stable_diffusion → deprecated/stable_diffusion_variants}/pipeline_stable_diffusion_pix2pix_zero.py +13 -13
  76. diffusers/pipelines/{stochastic_karras_ve → deprecated/stochastic_karras_ve}/__init__.py +1 -1
  77. diffusers/pipelines/{stochastic_karras_ve → deprecated/stochastic_karras_ve}/pipeline_stochastic_karras_ve.py +4 -4
  78. diffusers/pipelines/{versatile_diffusion → deprecated/versatile_diffusion}/__init__.py +3 -3
  79. diffusers/pipelines/{versatile_diffusion → deprecated/versatile_diffusion}/modeling_text_unet.py +54 -11
  80. diffusers/pipelines/{versatile_diffusion → deprecated/versatile_diffusion}/pipeline_versatile_diffusion.py +4 -4
  81. diffusers/pipelines/{versatile_diffusion → deprecated/versatile_diffusion}/pipeline_versatile_diffusion_dual_guided.py +6 -6
  82. diffusers/pipelines/{versatile_diffusion → deprecated/versatile_diffusion}/pipeline_versatile_diffusion_image_variation.py +6 -6
  83. diffusers/pipelines/{versatile_diffusion → deprecated/versatile_diffusion}/pipeline_versatile_diffusion_text_to_image.py +6 -6
  84. diffusers/pipelines/{vq_diffusion → deprecated/vq_diffusion}/__init__.py +3 -3
  85. diffusers/pipelines/{vq_diffusion → deprecated/vq_diffusion}/pipeline_vq_diffusion.py +5 -5
  86. diffusers/pipelines/kandinsky3/__init__.py +4 -4
  87. diffusers/pipelines/kandinsky3/convert_kandinsky3_unet.py +98 -0
  88. diffusers/pipelines/kandinsky3/{kandinsky3_pipeline.py → pipeline_kandinsky3.py} +172 -35
  89. diffusers/pipelines/kandinsky3/{kandinsky3img2img_pipeline.py → pipeline_kandinsky3_img2img.py} +228 -34
  90. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +46 -5
  91. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +47 -6
  92. diffusers/pipelines/onnx_utils.py +8 -5
  93. diffusers/pipelines/pipeline_flax_utils.py +7 -6
  94. diffusers/pipelines/pipeline_utils.py +32 -31
  95. diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +51 -2
  96. diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +3 -3
  97. diffusers/pipelines/stable_diffusion/__init__.py +1 -72
  98. diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +67 -75
  99. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +92 -8
  100. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +92 -8
  101. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +138 -10
  102. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +57 -7
  103. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +3 -0
  104. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +6 -0
  105. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +5 -0
  106. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +5 -0
  107. diffusers/pipelines/stable_diffusion_attend_and_excite/__init__.py +48 -0
  108. diffusers/pipelines/{stable_diffusion → stable_diffusion_attend_and_excite}/pipeline_stable_diffusion_attend_and_excite.py +5 -2
  109. diffusers/pipelines/stable_diffusion_diffedit/__init__.py +48 -0
  110. diffusers/pipelines/{stable_diffusion → stable_diffusion_diffedit}/pipeline_stable_diffusion_diffedit.py +2 -3
  111. diffusers/pipelines/stable_diffusion_gligen/__init__.py +50 -0
  112. diffusers/pipelines/{stable_diffusion → stable_diffusion_gligen}/pipeline_stable_diffusion_gligen.py +2 -2
  113. diffusers/pipelines/{stable_diffusion → stable_diffusion_gligen}/pipeline_stable_diffusion_gligen_text_image.py +3 -3
  114. diffusers/pipelines/stable_diffusion_k_diffusion/__init__.py +60 -0
  115. diffusers/pipelines/{stable_diffusion → stable_diffusion_k_diffusion}/pipeline_stable_diffusion_k_diffusion.py +6 -1
  116. diffusers/pipelines/stable_diffusion_ldm3d/__init__.py +48 -0
  117. diffusers/pipelines/{stable_diffusion → stable_diffusion_ldm3d}/pipeline_stable_diffusion_ldm3d.py +50 -7
  118. diffusers/pipelines/stable_diffusion_panorama/__init__.py +48 -0
  119. diffusers/pipelines/{stable_diffusion → stable_diffusion_panorama}/pipeline_stable_diffusion_panorama.py +56 -8
  120. diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +58 -6
  121. diffusers/pipelines/stable_diffusion_sag/__init__.py +48 -0
  122. diffusers/pipelines/{stable_diffusion → stable_diffusion_sag}/pipeline_stable_diffusion_sag.py +67 -10
  123. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +97 -15
  124. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +98 -14
  125. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +97 -14
  126. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +7 -5
  127. diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +12 -9
  128. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +6 -0
  129. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +5 -0
  130. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +5 -0
  131. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +331 -9
  132. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +468 -9
  133. diffusers/pipelines/unclip/pipeline_unclip.py +2 -1
  134. diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +1 -0
  135. diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py +1 -1
  136. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +4 -0
  137. diffusers/schedulers/__init__.py +2 -0
  138. diffusers/schedulers/scheduling_amused.py +162 -0
  139. diffusers/schedulers/scheduling_consistency_models.py +2 -0
  140. diffusers/schedulers/scheduling_ddim_inverse.py +1 -4
  141. diffusers/schedulers/scheduling_ddpm.py +46 -0
  142. diffusers/schedulers/scheduling_ddpm_parallel.py +46 -0
  143. diffusers/schedulers/scheduling_deis_multistep.py +13 -1
  144. diffusers/schedulers/scheduling_dpmsolver_multistep.py +13 -1
  145. diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +13 -1
  146. diffusers/schedulers/scheduling_dpmsolver_sde.py +2 -0
  147. diffusers/schedulers/scheduling_dpmsolver_singlestep.py +13 -1
  148. diffusers/schedulers/scheduling_euler_ancestral_discrete.py +58 -0
  149. diffusers/schedulers/scheduling_euler_discrete.py +62 -3
  150. diffusers/schedulers/scheduling_heun_discrete.py +2 -0
  151. diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +2 -0
  152. diffusers/schedulers/scheduling_k_dpm_2_discrete.py +2 -0
  153. diffusers/schedulers/scheduling_lms_discrete.py +2 -0
  154. diffusers/schedulers/scheduling_unipc_multistep.py +13 -1
  155. diffusers/schedulers/scheduling_utils.py +3 -1
  156. diffusers/schedulers/scheduling_utils_flax.py +3 -1
  157. diffusers/training_utils.py +1 -1
  158. diffusers/utils/__init__.py +0 -2
  159. diffusers/utils/constants.py +2 -5
  160. diffusers/utils/dummy_pt_objects.py +30 -0
  161. diffusers/utils/dummy_torch_and_transformers_objects.py +45 -0
  162. diffusers/utils/dynamic_modules_utils.py +14 -18
  163. diffusers/utils/hub_utils.py +24 -36
  164. diffusers/utils/logging.py +1 -1
  165. diffusers/utils/state_dict_utils.py +8 -0
  166. diffusers/utils/testing_utils.py +199 -1
  167. diffusers/utils/torch_utils.py +3 -3
  168. {diffusers-0.24.0.dist-info → diffusers-0.25.1.dist-info}/METADATA +55 -53
  169. {diffusers-0.24.0.dist-info → diffusers-0.25.1.dist-info}/RECORD +174 -155
  170. {diffusers-0.24.0.dist-info → diffusers-0.25.1.dist-info}/WHEEL +1 -1
  171. {diffusers-0.24.0.dist-info → diffusers-0.25.1.dist-info}/entry_points.txt +0 -1
  172. /diffusers/pipelines/{alt_diffusion → deprecated/alt_diffusion}/modeling_roberta_series.py +0 -0
  173. {diffusers-0.24.0.dist-info → diffusers-0.25.1.dist-info}/LICENSE +0 -0
  174. {diffusers-0.24.0.dist-info → diffusers-0.25.1.dist-info}/top_level.txt +0 -0
@@ -24,7 +24,8 @@ from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer, CLIPV
24
24
  from ...configuration_utils import FrozenDict
25
25
  from ...image_processor import PipelineImageInput, VaeImageProcessor
26
26
  from ...loaders import FromSingleFileMixin, IPAdapterMixin, LoraLoaderMixin, TextualInversionLoaderMixin
27
- from ...models import AsymmetricAutoencoderKL, AutoencoderKL, UNet2DConditionModel
27
+ from ...models import AsymmetricAutoencoderKL, AutoencoderKL, ImageProjection, UNet2DConditionModel
28
+ from ...models.attention_processor import FusedAttnProcessor2_0
28
29
  from ...models.lora import adjust_lora_scale_text_encoder
29
30
  from ...schedulers import KarrasDiffusionSchedulers
30
31
  from ...utils import USE_PEFT_BACKEND, deprecate, logging, scale_lora_layers, unscale_lora_layers
@@ -232,6 +233,7 @@ class StableDiffusionInpaintPipeline(
232
233
  - [`~loaders.LoraLoaderMixin.load_lora_weights`] for loading LoRA weights
233
234
  - [`~loaders.LoraLoaderMixin.save_lora_weights`] for saving LoRA weights
234
235
  - [`~loaders.IPAdapterMixin.load_ip_adapter`] for loading IP Adapters
236
+ - [`~loaders.FromSingleFileMixin.from_single_file`] for loading `.ckpt` files
235
237
 
236
238
  Args:
237
239
  vae ([`AutoencoderKL`, `AsymmetricAutoencoderKL`]):
@@ -253,7 +255,7 @@ class StableDiffusionInpaintPipeline(
253
255
  A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
254
256
  """
255
257
 
256
- model_cpu_offload_seq = "text_encoder->unet->vae"
258
+ model_cpu_offload_seq = "text_encoder->image_encoder->unet->vae"
257
259
  _optional_components = ["safety_checker", "feature_extractor", "image_encoder"]
258
260
  _exclude_from_cpu_offload = ["safety_checker"]
259
261
  _callback_tensor_inputs = ["latents", "prompt_embeds", "negative_prompt_embeds", "mask", "masked_image_latents"]
@@ -574,18 +576,29 @@ class StableDiffusionInpaintPipeline(
574
576
  return prompt_embeds, negative_prompt_embeds
575
577
 
576
578
  # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.encode_image
577
- def encode_image(self, image, device, num_images_per_prompt):
579
+ def encode_image(self, image, device, num_images_per_prompt, output_hidden_states=None):
578
580
  dtype = next(self.image_encoder.parameters()).dtype
579
581
 
580
582
  if not isinstance(image, torch.Tensor):
581
583
  image = self.feature_extractor(image, return_tensors="pt").pixel_values
582
584
 
583
585
  image = image.to(device=device, dtype=dtype)
584
- image_embeds = self.image_encoder(image).image_embeds
585
- image_embeds = image_embeds.repeat_interleave(num_images_per_prompt, dim=0)
586
+ if output_hidden_states:
587
+ image_enc_hidden_states = self.image_encoder(image, output_hidden_states=True).hidden_states[-2]
588
+ image_enc_hidden_states = image_enc_hidden_states.repeat_interleave(num_images_per_prompt, dim=0)
589
+ uncond_image_enc_hidden_states = self.image_encoder(
590
+ torch.zeros_like(image), output_hidden_states=True
591
+ ).hidden_states[-2]
592
+ uncond_image_enc_hidden_states = uncond_image_enc_hidden_states.repeat_interleave(
593
+ num_images_per_prompt, dim=0
594
+ )
595
+ return image_enc_hidden_states, uncond_image_enc_hidden_states
596
+ else:
597
+ image_embeds = self.image_encoder(image).image_embeds
598
+ image_embeds = image_embeds.repeat_interleave(num_images_per_prompt, dim=0)
599
+ uncond_image_embeds = torch.zeros_like(image_embeds)
586
600
 
587
- uncond_image_embeds = torch.zeros_like(image_embeds)
588
- return image_embeds, uncond_image_embeds
601
+ return image_embeds, uncond_image_embeds
589
602
 
590
603
  # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.run_safety_checker
591
604
  def run_safety_checker(self, image, device, dtype):
@@ -623,6 +636,8 @@ class StableDiffusionInpaintPipeline(
623
636
  def check_inputs(
624
637
  self,
625
638
  prompt,
639
+ image,
640
+ mask_image,
626
641
  height,
627
642
  width,
628
643
  strength,
@@ -631,6 +646,7 @@ class StableDiffusionInpaintPipeline(
631
646
  prompt_embeds=None,
632
647
  negative_prompt_embeds=None,
633
648
  callback_on_step_end_tensor_inputs=None,
649
+ padding_mask_crop=None,
634
650
  ):
635
651
  if strength < 0 or strength > 1:
636
652
  raise ValueError(f"The value of strength should in [0.0, 1.0] but is {strength}")
@@ -676,6 +692,21 @@ class StableDiffusionInpaintPipeline(
676
692
  f" got: `prompt_embeds` {prompt_embeds.shape} != `negative_prompt_embeds`"
677
693
  f" {negative_prompt_embeds.shape}."
678
694
  )
695
+ if padding_mask_crop is not None:
696
+ if self.unet.config.in_channels != 4:
697
+ raise ValueError(
698
+ f"The UNet should have 4 input channels for inpainting mask crop, but has"
699
+ f" {self.unet.config.in_channels} input channels."
700
+ )
701
+ if not isinstance(image, PIL.Image.Image):
702
+ raise ValueError(
703
+ f"The image should be a PIL image when inpainting mask crop, but is of type" f" {type(image)}."
704
+ )
705
+ if not isinstance(mask_image, PIL.Image.Image):
706
+ raise ValueError(
707
+ f"The mask image should be a PIL image when inpainting mask crop, but is of type"
708
+ f" {type(mask_image)}."
709
+ )
679
710
 
680
711
  def prepare_latents(
681
712
  self,
@@ -832,6 +863,67 @@ class StableDiffusionInpaintPipeline(
832
863
  """Disables the FreeU mechanism if enabled."""
833
864
  self.unet.disable_freeu()
834
865
 
866
+ # Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.fuse_qkv_projections
867
+ def fuse_qkv_projections(self, unet: bool = True, vae: bool = True):
868
+ """
869
+ Enables fused QKV projections. For self-attention modules, all projection matrices (i.e., query,
870
+ key, value) are fused. For cross-attention modules, key and value projection matrices are fused.
871
+
872
+ <Tip warning={true}>
873
+
874
+ This API is 🧪 experimental.
875
+
876
+ </Tip>
877
+
878
+ Args:
879
+ unet (`bool`, defaults to `True`): To apply fusion on the UNet.
880
+ vae (`bool`, defaults to `True`): To apply fusion on the VAE.
881
+ """
882
+ self.fusing_unet = False
883
+ self.fusing_vae = False
884
+
885
+ if unet:
886
+ self.fusing_unet = True
887
+ self.unet.fuse_qkv_projections()
888
+ self.unet.set_attn_processor(FusedAttnProcessor2_0())
889
+
890
+ if vae:
891
+ if not isinstance(self.vae, AutoencoderKL):
892
+ raise ValueError("`fuse_qkv_projections()` is only supported for the VAE of type `AutoencoderKL`.")
893
+
894
+ self.fusing_vae = True
895
+ self.vae.fuse_qkv_projections()
896
+ self.vae.set_attn_processor(FusedAttnProcessor2_0())
897
+
898
+ # Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.unfuse_qkv_projections
899
+ def unfuse_qkv_projections(self, unet: bool = True, vae: bool = True):
900
+ """Disable QKV projection fusion if enabled.
901
+
902
+ <Tip warning={true}>
903
+
904
+ This API is 🧪 experimental.
905
+
906
+ </Tip>
907
+
908
+ Args:
909
+ unet (`bool`, defaults to `True`): To apply fusion on the UNet.
910
+ vae (`bool`, defaults to `True`): To apply fusion on the VAE.
911
+
912
+ """
913
+ if unet:
914
+ if not self.fusing_unet:
915
+ logger.warning("The UNet was not initially fused for QKV projections. Doing nothing.")
916
+ else:
917
+ self.unet.unfuse_qkv_projections()
918
+ self.fusing_unet = False
919
+
920
+ if vae:
921
+ if not self.fusing_vae:
922
+ logger.warning("The VAE was not initially fused for QKV projections. Doing nothing.")
923
+ else:
924
+ self.vae.unfuse_qkv_projections()
925
+ self.fusing_vae = False
926
+
835
927
  # Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding
836
928
  def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32):
837
929
  """
@@ -884,6 +976,10 @@ class StableDiffusionInpaintPipeline(
884
976
  def num_timesteps(self):
885
977
  return self._num_timesteps
886
978
 
979
+ @property
980
+ def interrupt(self):
981
+ return self._interrupt
982
+
887
983
  @torch.no_grad()
888
984
  def __call__(
889
985
  self,
@@ -893,6 +989,7 @@ class StableDiffusionInpaintPipeline(
893
989
  masked_image_latents: torch.FloatTensor = None,
894
990
  height: Optional[int] = None,
895
991
  width: Optional[int] = None,
992
+ padding_mask_crop: Optional[int] = None,
896
993
  strength: float = 1.0,
897
994
  num_inference_steps: int = 50,
898
995
  timesteps: List[int] = None,
@@ -937,6 +1034,12 @@ class StableDiffusionInpaintPipeline(
937
1034
  The height in pixels of the generated image.
938
1035
  width (`int`, *optional*, defaults to `self.unet.config.sample_size * self.vae_scale_factor`):
939
1036
  The width in pixels of the generated image.
1037
+ padding_mask_crop (`int`, *optional*, defaults to `None`):
1038
+ The size of margin in the crop to be applied to the image and masking. If `None`, no crop is applied to image and mask_image. If
1039
+ `padding_mask_crop` is not `None`, it will first find a rectangular region with the same aspect ration of the image and
1040
+ contains all masked area, and then expand that area based on `padding_mask_crop`. The image and mask_image will then be cropped based on
1041
+ the expanded area before resizing to the original image size for inpainting. This is useful when the masked area is small while the image is large
1042
+ and contain information inreleant for inpainging, such as background.
940
1043
  strength (`float`, *optional*, defaults to 1.0):
941
1044
  Indicates extent to transform the reference `image`. Must be between 0 and 1. `image` is used as a
942
1045
  starting point and more noise is added the higher the `strength`. The number of denoising steps depends
@@ -1057,6 +1160,8 @@ class StableDiffusionInpaintPipeline(
1057
1160
  # 1. Check inputs
1058
1161
  self.check_inputs(
1059
1162
  prompt,
1163
+ image,
1164
+ mask_image,
1060
1165
  height,
1061
1166
  width,
1062
1167
  strength,
@@ -1065,11 +1170,13 @@ class StableDiffusionInpaintPipeline(
1065
1170
  prompt_embeds,
1066
1171
  negative_prompt_embeds,
1067
1172
  callback_on_step_end_tensor_inputs,
1173
+ padding_mask_crop,
1068
1174
  )
1069
1175
 
1070
1176
  self._guidance_scale = guidance_scale
1071
1177
  self._clip_skip = clip_skip
1072
1178
  self._cross_attention_kwargs = cross_attention_kwargs
1179
+ self._interrupt = False
1073
1180
 
1074
1181
  # 2. Define call parameters
1075
1182
  if prompt is not None and isinstance(prompt, str):
@@ -1103,7 +1210,10 @@ class StableDiffusionInpaintPipeline(
1103
1210
  prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds])
1104
1211
 
1105
1212
  if ip_adapter_image is not None:
1106
- image_embeds, negative_image_embeds = self.encode_image(ip_adapter_image, device, num_images_per_prompt)
1213
+ output_hidden_state = False if isinstance(self.unet.encoder_hid_proj, ImageProjection) else True
1214
+ image_embeds, negative_image_embeds = self.encode_image(
1215
+ ip_adapter_image, device, num_images_per_prompt, output_hidden_state
1216
+ )
1107
1217
  if self.do_classifier_free_guidance:
1108
1218
  image_embeds = torch.cat([negative_image_embeds, image_embeds])
1109
1219
 
@@ -1125,7 +1235,17 @@ class StableDiffusionInpaintPipeline(
1125
1235
 
1126
1236
  # 5. Preprocess mask and image
1127
1237
 
1128
- init_image = self.image_processor.preprocess(image, height=height, width=width)
1238
+ if padding_mask_crop is not None:
1239
+ crops_coords = self.mask_processor.get_crop_region(mask_image, width, height, pad=padding_mask_crop)
1240
+ resize_mode = "fill"
1241
+ else:
1242
+ crops_coords = None
1243
+ resize_mode = "default"
1244
+
1245
+ original_image = image
1246
+ init_image = self.image_processor.preprocess(
1247
+ image, height=height, width=width, crops_coords=crops_coords, resize_mode=resize_mode
1248
+ )
1129
1249
  init_image = init_image.to(dtype=torch.float32)
1130
1250
 
1131
1251
  # 6. Prepare latent variables
@@ -1155,7 +1275,9 @@ class StableDiffusionInpaintPipeline(
1155
1275
  latents, noise = latents_outputs
1156
1276
 
1157
1277
  # 7. Prepare mask latent variables
1158
- mask_condition = self.mask_processor.preprocess(mask_image, height=height, width=width)
1278
+ mask_condition = self.mask_processor.preprocess(
1279
+ mask_image, height=height, width=width, resize_mode=resize_mode, crops_coords=crops_coords
1280
+ )
1159
1281
 
1160
1282
  if masked_image_latents is None:
1161
1283
  masked_image = init_image * (mask_condition < 0.5)
@@ -1211,6 +1333,9 @@ class StableDiffusionInpaintPipeline(
1211
1333
  self._num_timesteps = len(timesteps)
1212
1334
  with self.progress_bar(total=num_inference_steps) as progress_bar:
1213
1335
  for i, t in enumerate(timesteps):
1336
+ if self.interrupt:
1337
+ continue
1338
+
1214
1339
  # expand the latents if we are doing classifier free guidance
1215
1340
  latent_model_input = torch.cat([latents] * 2) if self.do_classifier_free_guidance else latents
1216
1341
 
@@ -1295,6 +1420,9 @@ class StableDiffusionInpaintPipeline(
1295
1420
 
1296
1421
  image = self.image_processor.postprocess(image, output_type=output_type, do_denormalize=do_denormalize)
1297
1422
 
1423
+ if padding_mask_crop is not None:
1424
+ image = [self.image_processor.apply_overlay(mask_image, original_image, i, crops_coords) for i in image]
1425
+
1298
1426
  # Offload all models
1299
1427
  self.maybe_free_model_hooks()
1300
1428
 
@@ -18,11 +18,11 @@ from typing import Callable, Dict, List, Optional, Union
18
18
  import numpy as np
19
19
  import PIL.Image
20
20
  import torch
21
- from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer
21
+ from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer, CLIPVisionModelWithProjection
22
22
 
23
23
  from ...image_processor import PipelineImageInput, VaeImageProcessor
24
- from ...loaders import LoraLoaderMixin, TextualInversionLoaderMixin
25
- from ...models import AutoencoderKL, UNet2DConditionModel
24
+ from ...loaders import IPAdapterMixin, LoraLoaderMixin, TextualInversionLoaderMixin
25
+ from ...models import AutoencoderKL, ImageProjection, UNet2DConditionModel
26
26
  from ...schedulers import KarrasDiffusionSchedulers
27
27
  from ...utils import PIL_INTERPOLATION, deprecate, logging
28
28
  from ...utils.torch_utils import randn_tensor
@@ -72,7 +72,9 @@ def retrieve_latents(
72
72
  raise AttributeError("Could not access latents of provided encoder_output")
73
73
 
74
74
 
75
- class StableDiffusionInstructPix2PixPipeline(DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin):
75
+ class StableDiffusionInstructPix2PixPipeline(
76
+ DiffusionPipeline, TextualInversionLoaderMixin, LoraLoaderMixin, IPAdapterMixin
77
+ ):
76
78
  r"""
77
79
  Pipeline for pixel-level image editing by following text instructions (based on Stable Diffusion).
78
80
 
@@ -83,6 +85,7 @@ class StableDiffusionInstructPix2PixPipeline(DiffusionPipeline, TextualInversion
83
85
  - [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings
84
86
  - [`~loaders.LoraLoaderMixin.load_lora_weights`] for loading LoRA weights
85
87
  - [`~loaders.LoraLoaderMixin.save_lora_weights`] for saving LoRA weights
88
+ - [`~loaders.IPAdapterMixin.load_ip_adapter`] for loading IP Adapters
86
89
 
87
90
  Args:
88
91
  vae ([`AutoencoderKL`]):
@@ -105,7 +108,7 @@ class StableDiffusionInstructPix2PixPipeline(DiffusionPipeline, TextualInversion
105
108
  """
106
109
 
107
110
  model_cpu_offload_seq = "text_encoder->unet->vae"
108
- _optional_components = ["safety_checker", "feature_extractor"]
111
+ _optional_components = ["safety_checker", "feature_extractor", "image_encoder"]
109
112
  _exclude_from_cpu_offload = ["safety_checker"]
110
113
  _callback_tensor_inputs = ["latents", "prompt_embeds", "image_latents"]
111
114
 
@@ -118,6 +121,7 @@ class StableDiffusionInstructPix2PixPipeline(DiffusionPipeline, TextualInversion
118
121
  scheduler: KarrasDiffusionSchedulers,
119
122
  safety_checker: StableDiffusionSafetyChecker,
120
123
  feature_extractor: CLIPImageProcessor,
124
+ image_encoder: Optional[CLIPVisionModelWithProjection] = None,
121
125
  requires_safety_checker: bool = True,
122
126
  ):
123
127
  super().__init__()
@@ -146,6 +150,7 @@ class StableDiffusionInstructPix2PixPipeline(DiffusionPipeline, TextualInversion
146
150
  scheduler=scheduler,
147
151
  safety_checker=safety_checker,
148
152
  feature_extractor=feature_extractor,
153
+ image_encoder=image_encoder,
149
154
  )
150
155
  self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
151
156
  self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
@@ -166,6 +171,7 @@ class StableDiffusionInstructPix2PixPipeline(DiffusionPipeline, TextualInversion
166
171
  latents: Optional[torch.FloatTensor] = None,
167
172
  prompt_embeds: Optional[torch.FloatTensor] = None,
168
173
  negative_prompt_embeds: Optional[torch.FloatTensor] = None,
174
+ ip_adapter_image: Optional[PipelineImageInput] = None,
169
175
  output_type: Optional[str] = "pil",
170
176
  return_dict: bool = True,
171
177
  callback_on_step_end: Optional[Callable[[int, int, Dict], None]] = None,
@@ -213,6 +219,8 @@ class StableDiffusionInstructPix2PixPipeline(DiffusionPipeline, TextualInversion
213
219
  negative_prompt_embeds (`torch.FloatTensor`, *optional*):
214
220
  Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If
215
221
  not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument.
222
+ ip_adapter_image: (`PipelineImageInput`, *optional*):
223
+ Optional image input to work with IP Adapters.
216
224
  output_type (`str`, *optional*, defaults to `"pil"`):
217
225
  The output format of the generated image. Choose between `PIL.Image` or `np.array`.
218
226
  return_dict (`bool`, *optional*, defaults to `True`):
@@ -293,6 +301,16 @@ class StableDiffusionInstructPix2PixPipeline(DiffusionPipeline, TextualInversion
293
301
  self._guidance_scale = guidance_scale
294
302
  self._image_guidance_scale = image_guidance_scale
295
303
 
304
+ device = self._execution_device
305
+
306
+ if ip_adapter_image is not None:
307
+ output_hidden_state = False if isinstance(self.unet.encoder_hid_proj, ImageProjection) else True
308
+ image_embeds, negative_image_embeds = self.encode_image(
309
+ ip_adapter_image, device, num_images_per_prompt, output_hidden_state
310
+ )
311
+ if self.do_classifier_free_guidance:
312
+ image_embeds = torch.cat([image_embeds, negative_image_embeds, negative_image_embeds])
313
+
296
314
  if image is None:
297
315
  raise ValueError("`image` input cannot be undefined.")
298
316
 
@@ -367,6 +385,9 @@ class StableDiffusionInstructPix2PixPipeline(DiffusionPipeline, TextualInversion
367
385
  # 8. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
368
386
  extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta)
369
387
 
388
+ # 8.1 Add image embeds for IP-Adapter
389
+ added_cond_kwargs = {"image_embeds": image_embeds} if ip_adapter_image is not None else None
390
+
370
391
  # 9. Denoising loop
371
392
  num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order
372
393
  self._num_timesteps = len(timesteps)
@@ -383,7 +404,11 @@ class StableDiffusionInstructPix2PixPipeline(DiffusionPipeline, TextualInversion
383
404
 
384
405
  # predict the noise residual
385
406
  noise_pred = self.unet(
386
- scaled_latent_model_input, t, encoder_hidden_states=prompt_embeds, return_dict=False
407
+ scaled_latent_model_input,
408
+ t,
409
+ encoder_hidden_states=prompt_embeds,
410
+ added_cond_kwargs=added_cond_kwargs,
411
+ return_dict=False,
387
412
  )[0]
388
413
 
389
414
  # Hack:
@@ -598,11 +623,36 @@ class StableDiffusionInstructPix2PixPipeline(DiffusionPipeline, TextualInversion
598
623
  # For classifier free guidance, we need to do two forward passes.
599
624
  # Here we concatenate the unconditional and text embeddings into a single batch
600
625
  # to avoid doing two forward passes
601
- # pix2pix has two negative embeddings, and unlike in other pipelines latents are ordered [prompt_embeds, negative_prompt_embeds, negative_prompt_embeds]
626
+ # pix2pix has two negative embeddings, and unlike in other pipelines latents are ordered [prompt_embeds, negative_prompt_embeds, negative_prompt_embeds]
602
627
  prompt_embeds = torch.cat([prompt_embeds, negative_prompt_embeds, negative_prompt_embeds])
603
628
 
604
629
  return prompt_embeds
605
630
 
631
+ # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.encode_image
632
+ def encode_image(self, image, device, num_images_per_prompt, output_hidden_states=None):
633
+ dtype = next(self.image_encoder.parameters()).dtype
634
+
635
+ if not isinstance(image, torch.Tensor):
636
+ image = self.feature_extractor(image, return_tensors="pt").pixel_values
637
+
638
+ image = image.to(device=device, dtype=dtype)
639
+ if output_hidden_states:
640
+ image_enc_hidden_states = self.image_encoder(image, output_hidden_states=True).hidden_states[-2]
641
+ image_enc_hidden_states = image_enc_hidden_states.repeat_interleave(num_images_per_prompt, dim=0)
642
+ uncond_image_enc_hidden_states = self.image_encoder(
643
+ torch.zeros_like(image), output_hidden_states=True
644
+ ).hidden_states[-2]
645
+ uncond_image_enc_hidden_states = uncond_image_enc_hidden_states.repeat_interleave(
646
+ num_images_per_prompt, dim=0
647
+ )
648
+ return image_enc_hidden_states, uncond_image_enc_hidden_states
649
+ else:
650
+ image_embeds = self.image_encoder(image).image_embeds
651
+ image_embeds = image_embeds.repeat_interleave(num_images_per_prompt, dim=0)
652
+ uncond_image_embeds = torch.zeros_like(image_embeds)
653
+
654
+ return image_embeds, uncond_image_embeds
655
+
606
656
  # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.run_safety_checker
607
657
  def run_safety_checker(self, image, device, dtype):
608
658
  if self.safety_checker is None:
@@ -67,6 +67,9 @@ class StableDiffusionLatentUpscalePipeline(DiffusionPipeline, FromSingleFileMixi
67
67
  This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods
68
68
  implemented for all pipelines (downloading, saving, running on a particular device, etc.).
69
69
 
70
+ The pipeline also inherits the following loading methods:
71
+ - [`~loaders.FromSingleFileMixin.from_single_file`] for loading `.ckpt` files
72
+
70
73
  Args:
71
74
  vae ([`AutoencoderKL`]):
72
75
  Variational Auto-Encoder (VAE) model to encode and decode images to and from latent representations.
@@ -76,6 +76,12 @@ class StableDiffusionUpscalePipeline(
76
76
  This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods
77
77
  implemented for all pipelines (downloading, saving, running on a particular device, etc.).
78
78
 
79
+ The pipeline also inherits the following loading methods:
80
+ - [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings
81
+ - [`~loaders.LoraLoaderMixin.load_lora_weights`] for loading LoRA weights
82
+ - [`~loaders.LoraLoaderMixin.save_lora_weights`] for saving LoRA weights
83
+ - [`~loaders.FromSingleFileMixin.from_single_file`] for loading `.ckpt` files
84
+
79
85
  Args:
80
86
  vae ([`AutoencoderKL`]):
81
87
  Variational Auto-Encoder (VAE) model to encode and decode images to and from latent representations.
@@ -65,6 +65,11 @@ class StableUnCLIPPipeline(DiffusionPipeline, TextualInversionLoaderMixin, LoraL
65
65
  This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods
66
66
  implemented for all pipelines (downloading, saving, running on a particular device, etc.).
67
67
 
68
+ The pipeline also inherits the following loading methods:
69
+ - [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings
70
+ - [`~loaders.LoraLoaderMixin.load_lora_weights`] for loading LoRA weights
71
+ - [`~loaders.LoraLoaderMixin.save_lora_weights`] for saving LoRA weights
72
+
68
73
  Args:
69
74
  prior_tokenizer ([`CLIPTokenizer`]):
70
75
  A [`CLIPTokenizer`].
@@ -76,6 +76,11 @@ class StableUnCLIPImg2ImgPipeline(DiffusionPipeline, TextualInversionLoaderMixin
76
76
  This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods
77
77
  implemented for all pipelines (downloading, saving, running on a particular device, etc.).
78
78
 
79
+ The pipeline also inherits the following loading methods:
80
+ - [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings
81
+ - [`~loaders.LoraLoaderMixin.load_lora_weights`] for loading LoRA weights
82
+ - [`~loaders.LoraLoaderMixin.save_lora_weights`] for saving LoRA weights
83
+
79
84
  Args:
80
85
  feature_extractor ([`CLIPImageProcessor`]):
81
86
  Feature extractor for image pre-processing before being encoded.
@@ -0,0 +1,48 @@
1
+ from typing import TYPE_CHECKING
2
+
3
+ from ...utils import (
4
+ DIFFUSERS_SLOW_IMPORT,
5
+ OptionalDependencyNotAvailable,
6
+ _LazyModule,
7
+ get_objects_from_module,
8
+ is_torch_available,
9
+ is_transformers_available,
10
+ )
11
+
12
+
13
+ _dummy_objects = {}
14
+ _import_structure = {}
15
+
16
+
17
+ try:
18
+ if not (is_transformers_available() and is_torch_available()):
19
+ raise OptionalDependencyNotAvailable()
20
+ except OptionalDependencyNotAvailable:
21
+ from ...utils import dummy_torch_and_transformers_objects # noqa F403
22
+
23
+ _dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
24
+ else:
25
+ _import_structure["pipeline_stable_diffusion_attend_and_excite"] = ["StableDiffusionAttendAndExcitePipeline"]
26
+
27
+ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
28
+ try:
29
+ if not (is_transformers_available() and is_torch_available()):
30
+ raise OptionalDependencyNotAvailable()
31
+
32
+ except OptionalDependencyNotAvailable:
33
+ from ...utils.dummy_torch_and_transformers_objects import *
34
+ else:
35
+ from .pipeline_stable_diffusion_attend_and_excite import StableDiffusionAttendAndExcitePipeline
36
+
37
+ else:
38
+ import sys
39
+
40
+ sys.modules[__name__] = _LazyModule(
41
+ __name__,
42
+ globals()["__file__"],
43
+ _import_structure,
44
+ module_spec=__spec__,
45
+ )
46
+
47
+ for name, value in _dummy_objects.items():
48
+ setattr(sys.modules[__name__], name, value)
@@ -37,8 +37,8 @@ from ...utils import (
37
37
  )
38
38
  from ...utils.torch_utils import randn_tensor
39
39
  from ..pipeline_utils import DiffusionPipeline
40
- from . import StableDiffusionPipelineOutput
41
- from .safety_checker import StableDiffusionSafetyChecker
40
+ from ..stable_diffusion import StableDiffusionPipelineOutput
41
+ from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
42
42
 
43
43
 
44
44
  logger = logging.get_logger(__name__)
@@ -177,6 +177,9 @@ class StableDiffusionAttendAndExcitePipeline(DiffusionPipeline, TextualInversion
177
177
  This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods
178
178
  implemented for all pipelines (downloading, saving, running on a particular device, etc.).
179
179
 
180
+ The pipeline also inherits the following loading methods:
181
+ - [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`] for loading textual inversion embeddings
182
+
180
183
  Args:
181
184
  vae ([`AutoencoderKL`]):
182
185
  Variational Auto-Encoder (VAE) model to encode and decode images to and from latent representations.
@@ -0,0 +1,48 @@
1
+ from typing import TYPE_CHECKING
2
+
3
+ from ...utils import (
4
+ DIFFUSERS_SLOW_IMPORT,
5
+ OptionalDependencyNotAvailable,
6
+ _LazyModule,
7
+ get_objects_from_module,
8
+ is_torch_available,
9
+ is_transformers_available,
10
+ )
11
+
12
+
13
+ _dummy_objects = {}
14
+ _import_structure = {}
15
+
16
+
17
+ try:
18
+ if not (is_transformers_available() and is_torch_available()):
19
+ raise OptionalDependencyNotAvailable()
20
+ except OptionalDependencyNotAvailable:
21
+ from ...utils import dummy_torch_and_transformers_objects # noqa F403
22
+
23
+ _dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
24
+ else:
25
+ _import_structure["pipeline_stable_diffusion_diffedit"] = ["StableDiffusionDiffEditPipeline"]
26
+
27
+ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
28
+ try:
29
+ if not (is_transformers_available() and is_torch_available()):
30
+ raise OptionalDependencyNotAvailable()
31
+
32
+ except OptionalDependencyNotAvailable:
33
+ from ...utils.dummy_torch_and_transformers_objects import *
34
+ else:
35
+ from .pipeline_stable_diffusion_diffedit import StableDiffusionDiffEditPipeline
36
+
37
+ else:
38
+ import sys
39
+
40
+ sys.modules[__name__] = _LazyModule(
41
+ __name__,
42
+ globals()["__file__"],
43
+ _import_structure,
44
+ module_spec=__spec__,
45
+ )
46
+
47
+ for name, value in _dummy_objects.items():
48
+ setattr(sys.modules[__name__], name, value)
@@ -40,8 +40,8 @@ from ...utils import (
40
40
  )
41
41
  from ...utils.torch_utils import randn_tensor
42
42
  from ..pipeline_utils import DiffusionPipeline
43
- from . import StableDiffusionPipelineOutput
44
- from .safety_checker import StableDiffusionSafetyChecker
43
+ from ..stable_diffusion import StableDiffusionPipelineOutput
44
+ from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
45
45
 
46
46
 
47
47
  logger = logging.get_logger(__name__) # pylint: disable=invalid-name
@@ -788,7 +788,6 @@ class StableDiffusionDiffEditPipeline(DiffusionPipeline, TextualInversionLoaderM
788
788
  latents = latents * self.scheduler.init_noise_sigma
789
789
  return latents
790
790
 
791
- # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_pix2pix_zero.StableDiffusionPix2PixZeroPipeline.prepare_image_latents
792
791
  def prepare_image_latents(self, image, batch_size, dtype, device, generator=None):
793
792
  if not isinstance(image, (torch.Tensor, PIL.Image.Image, list)):
794
793
  raise ValueError(
@@ -0,0 +1,50 @@
1
+ from typing import TYPE_CHECKING
2
+
3
+ from ...utils import (
4
+ DIFFUSERS_SLOW_IMPORT,
5
+ OptionalDependencyNotAvailable,
6
+ _LazyModule,
7
+ get_objects_from_module,
8
+ is_torch_available,
9
+ is_transformers_available,
10
+ )
11
+
12
+
13
+ _dummy_objects = {}
14
+ _import_structure = {}
15
+
16
+
17
+ try:
18
+ if not (is_transformers_available() and is_torch_available()):
19
+ raise OptionalDependencyNotAvailable()
20
+ except OptionalDependencyNotAvailable:
21
+ from ...utils import dummy_torch_and_transformers_objects # noqa F403
22
+
23
+ _dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
24
+ else:
25
+ _import_structure["pipeline_stable_diffusion_gligen"] = ["StableDiffusionGLIGENPipeline"]
26
+ _import_structure["pipeline_stable_diffusion_gligen_text_image"] = ["StableDiffusionGLIGENTextImagePipeline"]
27
+
28
+ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
29
+ try:
30
+ if not (is_transformers_available() and is_torch_available()):
31
+ raise OptionalDependencyNotAvailable()
32
+
33
+ except OptionalDependencyNotAvailable:
34
+ from ...utils.dummy_torch_and_transformers_objects import *
35
+ else:
36
+ from .pipeline_stable_diffusion_gligen import StableDiffusionGLIGENPipeline
37
+ from .pipeline_stable_diffusion_gligen_text_image import StableDiffusionGLIGENTextImagePipeline
38
+
39
+ else:
40
+ import sys
41
+
42
+ sys.modules[__name__] = _LazyModule(
43
+ __name__,
44
+ globals()["__file__"],
45
+ _import_structure,
46
+ module_spec=__spec__,
47
+ )
48
+
49
+ for name, value in _dummy_objects.items():
50
+ setattr(sys.modules[__name__], name, value)