diffusers 0.29.2__py3-none-any.whl → 0.30.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (220) hide show
  1. diffusers/__init__.py +94 -3
  2. diffusers/commands/env.py +1 -5
  3. diffusers/configuration_utils.py +4 -9
  4. diffusers/dependency_versions_table.py +2 -2
  5. diffusers/image_processor.py +1 -2
  6. diffusers/loaders/__init__.py +17 -2
  7. diffusers/loaders/ip_adapter.py +10 -7
  8. diffusers/loaders/lora_base.py +752 -0
  9. diffusers/loaders/lora_pipeline.py +2222 -0
  10. diffusers/loaders/peft.py +213 -5
  11. diffusers/loaders/single_file.py +1 -12
  12. diffusers/loaders/single_file_model.py +31 -10
  13. diffusers/loaders/single_file_utils.py +262 -2
  14. diffusers/loaders/textual_inversion.py +1 -6
  15. diffusers/loaders/unet.py +23 -208
  16. diffusers/models/__init__.py +20 -0
  17. diffusers/models/activations.py +22 -0
  18. diffusers/models/attention.py +386 -7
  19. diffusers/models/attention_processor.py +1795 -629
  20. diffusers/models/autoencoders/__init__.py +2 -0
  21. diffusers/models/autoencoders/autoencoder_kl.py +14 -3
  22. diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +1035 -0
  23. diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +1 -1
  24. diffusers/models/autoencoders/autoencoder_oobleck.py +464 -0
  25. diffusers/models/autoencoders/autoencoder_tiny.py +1 -0
  26. diffusers/models/autoencoders/consistency_decoder_vae.py +1 -1
  27. diffusers/models/autoencoders/vq_model.py +4 -4
  28. diffusers/models/controlnet.py +2 -3
  29. diffusers/models/controlnet_hunyuan.py +401 -0
  30. diffusers/models/controlnet_sd3.py +11 -11
  31. diffusers/models/controlnet_sparsectrl.py +789 -0
  32. diffusers/models/controlnet_xs.py +40 -10
  33. diffusers/models/downsampling.py +68 -0
  34. diffusers/models/embeddings.py +319 -36
  35. diffusers/models/model_loading_utils.py +1 -3
  36. diffusers/models/modeling_flax_utils.py +1 -6
  37. diffusers/models/modeling_utils.py +4 -16
  38. diffusers/models/normalization.py +203 -12
  39. diffusers/models/transformers/__init__.py +6 -0
  40. diffusers/models/transformers/auraflow_transformer_2d.py +527 -0
  41. diffusers/models/transformers/cogvideox_transformer_3d.py +345 -0
  42. diffusers/models/transformers/hunyuan_transformer_2d.py +19 -15
  43. diffusers/models/transformers/latte_transformer_3d.py +327 -0
  44. diffusers/models/transformers/lumina_nextdit2d.py +340 -0
  45. diffusers/models/transformers/pixart_transformer_2d.py +102 -1
  46. diffusers/models/transformers/prior_transformer.py +1 -1
  47. diffusers/models/transformers/stable_audio_transformer.py +458 -0
  48. diffusers/models/transformers/transformer_flux.py +455 -0
  49. diffusers/models/transformers/transformer_sd3.py +18 -4
  50. diffusers/models/unets/unet_1d_blocks.py +1 -1
  51. diffusers/models/unets/unet_2d_condition.py +8 -1
  52. diffusers/models/unets/unet_3d_blocks.py +51 -920
  53. diffusers/models/unets/unet_3d_condition.py +4 -1
  54. diffusers/models/unets/unet_i2vgen_xl.py +4 -1
  55. diffusers/models/unets/unet_kandinsky3.py +1 -1
  56. diffusers/models/unets/unet_motion_model.py +1330 -84
  57. diffusers/models/unets/unet_spatio_temporal_condition.py +1 -1
  58. diffusers/models/unets/unet_stable_cascade.py +1 -3
  59. diffusers/models/unets/uvit_2d.py +1 -1
  60. diffusers/models/upsampling.py +64 -0
  61. diffusers/models/vq_model.py +8 -4
  62. diffusers/optimization.py +1 -1
  63. diffusers/pipelines/__init__.py +100 -3
  64. diffusers/pipelines/animatediff/__init__.py +4 -0
  65. diffusers/pipelines/animatediff/pipeline_animatediff.py +50 -40
  66. diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +1076 -0
  67. diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +17 -27
  68. diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +1008 -0
  69. diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +51 -38
  70. diffusers/pipelines/audioldm2/modeling_audioldm2.py +1 -1
  71. diffusers/pipelines/audioldm2/pipeline_audioldm2.py +1 -0
  72. diffusers/pipelines/aura_flow/__init__.py +48 -0
  73. diffusers/pipelines/aura_flow/pipeline_aura_flow.py +591 -0
  74. diffusers/pipelines/auto_pipeline.py +97 -19
  75. diffusers/pipelines/cogvideo/__init__.py +48 -0
  76. diffusers/pipelines/cogvideo/pipeline_cogvideox.py +687 -0
  77. diffusers/pipelines/consistency_models/pipeline_consistency_models.py +1 -1
  78. diffusers/pipelines/controlnet/pipeline_controlnet.py +24 -30
  79. diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +31 -30
  80. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +24 -153
  81. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +19 -28
  82. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +18 -28
  83. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +29 -32
  84. diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +2 -2
  85. diffusers/pipelines/controlnet_hunyuandit/__init__.py +48 -0
  86. diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +1042 -0
  87. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +35 -0
  88. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +10 -6
  89. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +0 -4
  90. diffusers/pipelines/deepfloyd_if/pipeline_if.py +2 -2
  91. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +2 -2
  92. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +2 -2
  93. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +2 -2
  94. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +2 -2
  95. diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +2 -2
  96. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +11 -6
  97. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +11 -6
  98. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +6 -6
  99. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +6 -6
  100. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +10 -10
  101. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +10 -6
  102. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +3 -3
  103. diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +1 -1
  104. diffusers/pipelines/flux/__init__.py +47 -0
  105. diffusers/pipelines/flux/pipeline_flux.py +749 -0
  106. diffusers/pipelines/flux/pipeline_output.py +21 -0
  107. diffusers/pipelines/free_init_utils.py +2 -0
  108. diffusers/pipelines/free_noise_utils.py +236 -0
  109. diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +2 -2
  110. diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +2 -2
  111. diffusers/pipelines/kolors/__init__.py +54 -0
  112. diffusers/pipelines/kolors/pipeline_kolors.py +1070 -0
  113. diffusers/pipelines/kolors/pipeline_kolors_img2img.py +1247 -0
  114. diffusers/pipelines/kolors/pipeline_output.py +21 -0
  115. diffusers/pipelines/kolors/text_encoder.py +889 -0
  116. diffusers/pipelines/kolors/tokenizer.py +334 -0
  117. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +30 -29
  118. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +23 -29
  119. diffusers/pipelines/latte/__init__.py +48 -0
  120. diffusers/pipelines/latte/pipeline_latte.py +881 -0
  121. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +4 -4
  122. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +0 -4
  123. diffusers/pipelines/lumina/__init__.py +48 -0
  124. diffusers/pipelines/lumina/pipeline_lumina.py +897 -0
  125. diffusers/pipelines/pag/__init__.py +67 -0
  126. diffusers/pipelines/pag/pag_utils.py +237 -0
  127. diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +1329 -0
  128. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +1612 -0
  129. diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +953 -0
  130. diffusers/pipelines/pag/pipeline_pag_kolors.py +1136 -0
  131. diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +872 -0
  132. diffusers/pipelines/pag/pipeline_pag_sd.py +1050 -0
  133. diffusers/pipelines/pag/pipeline_pag_sd_3.py +985 -0
  134. diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +862 -0
  135. diffusers/pipelines/pag/pipeline_pag_sd_xl.py +1333 -0
  136. diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +1529 -0
  137. diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +1753 -0
  138. diffusers/pipelines/pia/pipeline_pia.py +30 -37
  139. diffusers/pipelines/pipeline_flax_utils.py +4 -9
  140. diffusers/pipelines/pipeline_loading_utils.py +0 -3
  141. diffusers/pipelines/pipeline_utils.py +2 -14
  142. diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +0 -1
  143. diffusers/pipelines/stable_audio/__init__.py +50 -0
  144. diffusers/pipelines/stable_audio/modeling_stable_audio.py +158 -0
  145. diffusers/pipelines/stable_audio/pipeline_stable_audio.py +745 -0
  146. diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +2 -0
  147. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +1 -1
  148. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +23 -29
  149. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +15 -8
  150. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +30 -29
  151. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +23 -152
  152. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +8 -4
  153. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +11 -11
  154. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +8 -6
  155. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +6 -6
  156. diffusers/pipelines/stable_diffusion_3/__init__.py +2 -0
  157. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +34 -3
  158. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +33 -7
  159. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +1201 -0
  160. diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +3 -3
  161. diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +6 -6
  162. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +5 -5
  163. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +5 -5
  164. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +6 -6
  165. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +0 -4
  166. diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +23 -29
  167. diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +27 -29
  168. diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +3 -3
  169. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +17 -27
  170. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +26 -29
  171. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +17 -145
  172. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +0 -4
  173. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +6 -6
  174. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +18 -28
  175. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +8 -6
  176. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +8 -6
  177. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +6 -4
  178. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +0 -4
  179. diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +3 -3
  180. diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +1 -1
  181. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +5 -4
  182. diffusers/schedulers/__init__.py +8 -0
  183. diffusers/schedulers/scheduling_cosine_dpmsolver_multistep.py +572 -0
  184. diffusers/schedulers/scheduling_ddim.py +1 -1
  185. diffusers/schedulers/scheduling_ddim_cogvideox.py +449 -0
  186. diffusers/schedulers/scheduling_ddpm.py +1 -1
  187. diffusers/schedulers/scheduling_ddpm_parallel.py +1 -1
  188. diffusers/schedulers/scheduling_deis_multistep.py +2 -2
  189. diffusers/schedulers/scheduling_dpm_cogvideox.py +489 -0
  190. diffusers/schedulers/scheduling_dpmsolver_multistep.py +1 -1
  191. diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +1 -1
  192. diffusers/schedulers/scheduling_dpmsolver_singlestep.py +64 -19
  193. diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py +2 -2
  194. diffusers/schedulers/scheduling_flow_match_euler_discrete.py +63 -39
  195. diffusers/schedulers/scheduling_flow_match_heun_discrete.py +321 -0
  196. diffusers/schedulers/scheduling_ipndm.py +1 -1
  197. diffusers/schedulers/scheduling_unipc_multistep.py +1 -1
  198. diffusers/schedulers/scheduling_utils.py +1 -3
  199. diffusers/schedulers/scheduling_utils_flax.py +1 -3
  200. diffusers/training_utils.py +99 -14
  201. diffusers/utils/__init__.py +2 -2
  202. diffusers/utils/dummy_pt_objects.py +210 -0
  203. diffusers/utils/dummy_torch_and_torchsde_objects.py +15 -0
  204. diffusers/utils/dummy_torch_and_transformers_and_sentencepiece_objects.py +47 -0
  205. diffusers/utils/dummy_torch_and_transformers_objects.py +315 -0
  206. diffusers/utils/dynamic_modules_utils.py +1 -11
  207. diffusers/utils/export_utils.py +1 -4
  208. diffusers/utils/hub_utils.py +45 -42
  209. diffusers/utils/import_utils.py +19 -16
  210. diffusers/utils/loading_utils.py +76 -3
  211. diffusers/utils/testing_utils.py +11 -8
  212. {diffusers-0.29.2.dist-info → diffusers-0.30.0.dist-info}/METADATA +73 -83
  213. {diffusers-0.29.2.dist-info → diffusers-0.30.0.dist-info}/RECORD +217 -164
  214. {diffusers-0.29.2.dist-info → diffusers-0.30.0.dist-info}/WHEEL +1 -1
  215. diffusers/loaders/autoencoder.py +0 -146
  216. diffusers/loaders/controlnet.py +0 -136
  217. diffusers/loaders/lora.py +0 -1728
  218. {diffusers-0.29.2.dist-info → diffusers-0.30.0.dist-info}/LICENSE +0 -0
  219. {diffusers-0.29.2.dist-info → diffusers-0.30.0.dist-info}/entry_points.txt +0 -0
  220. {diffusers-0.29.2.dist-info → diffusers-0.30.0.dist-info}/top_level.txt +0 -0
@@ -41,8 +41,6 @@ from ...loaders import (
41
41
  from ...models import AutoencoderKL, ControlNetModel, ImageProjection, UNet2DConditionModel
42
42
  from ...models.attention_processor import (
43
43
  AttnProcessor2_0,
44
- LoRAAttnProcessor2_0,
45
- LoRAXFormersAttnProcessor,
46
44
  XFormersAttnProcessor,
47
45
  )
48
46
  from ...models.lora import adjust_lora_scale_text_encoder
@@ -556,6 +554,9 @@ class StableDiffusionXLControlNetPipeline(
556
554
  def prepare_ip_adapter_image_embeds(
557
555
  self, ip_adapter_image, ip_adapter_image_embeds, device, num_images_per_prompt, do_classifier_free_guidance
558
556
  ):
557
+ image_embeds = []
558
+ if do_classifier_free_guidance:
559
+ negative_image_embeds = []
559
560
  if ip_adapter_image_embeds is None:
560
561
  if not isinstance(ip_adapter_image, list):
561
562
  ip_adapter_image = [ip_adapter_image]
@@ -565,7 +566,6 @@ class StableDiffusionXLControlNetPipeline(
565
566
  f"`ip_adapter_image` must have same length as the number of IP Adapters. Got {len(ip_adapter_image)} images and {len(self.unet.encoder_hid_proj.image_projection_layers)} IP Adapters."
566
567
  )
567
568
 
568
- image_embeds = []
569
569
  for single_ip_adapter_image, image_proj_layer in zip(
570
570
  ip_adapter_image, self.unet.encoder_hid_proj.image_projection_layers
571
571
  ):
@@ -573,36 +573,28 @@ class StableDiffusionXLControlNetPipeline(
573
573
  single_image_embeds, single_negative_image_embeds = self.encode_image(
574
574
  single_ip_adapter_image, device, 1, output_hidden_state
575
575
  )
576
- single_image_embeds = torch.stack([single_image_embeds] * num_images_per_prompt, dim=0)
577
- single_negative_image_embeds = torch.stack(
578
- [single_negative_image_embeds] * num_images_per_prompt, dim=0
579
- )
580
576
 
577
+ image_embeds.append(single_image_embeds[None, :])
581
578
  if do_classifier_free_guidance:
582
- single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds])
583
- single_image_embeds = single_image_embeds.to(device)
584
-
585
- image_embeds.append(single_image_embeds)
579
+ negative_image_embeds.append(single_negative_image_embeds[None, :])
586
580
  else:
587
- repeat_dims = [1]
588
- image_embeds = []
589
581
  for single_image_embeds in ip_adapter_image_embeds:
590
582
  if do_classifier_free_guidance:
591
583
  single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2)
592
- single_image_embeds = single_image_embeds.repeat(
593
- num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
594
- )
595
- single_negative_image_embeds = single_negative_image_embeds.repeat(
596
- num_images_per_prompt, *(repeat_dims * len(single_negative_image_embeds.shape[1:]))
597
- )
598
- single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds])
599
- else:
600
- single_image_embeds = single_image_embeds.repeat(
601
- num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
602
- )
584
+ negative_image_embeds.append(single_negative_image_embeds)
603
585
  image_embeds.append(single_image_embeds)
604
586
 
605
- return image_embeds
587
+ ip_adapter_image_embeds = []
588
+ for i, single_image_embeds in enumerate(image_embeds):
589
+ single_image_embeds = torch.cat([single_image_embeds] * num_images_per_prompt, dim=0)
590
+ if do_classifier_free_guidance:
591
+ single_negative_image_embeds = torch.cat([negative_image_embeds[i]] * num_images_per_prompt, dim=0)
592
+ single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds], dim=0)
593
+
594
+ single_image_embeds = single_image_embeds.to(device=device)
595
+ ip_adapter_image_embeds.append(single_image_embeds)
596
+
597
+ return ip_adapter_image_embeds
606
598
 
607
599
  # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_extra_step_kwargs
608
600
  def prepare_extra_step_kwargs(self, generator, eta):
@@ -931,8 +923,6 @@ class StableDiffusionXLControlNetPipeline(
931
923
  (
932
924
  AttnProcessor2_0,
933
925
  XFormersAttnProcessor,
934
- LoRAXFormersAttnProcessor,
935
- LoRAAttnProcessor2_0,
936
926
  ),
937
927
  )
938
928
  # if xformers or torch_2_0 is used attention block does not need
@@ -1497,7 +1487,7 @@ class StableDiffusionXLControlNetPipeline(
1497
1487
  )
1498
1488
 
1499
1489
  if guess_mode and self.do_classifier_free_guidance:
1500
- # Infered ControlNet only for the conditional batch.
1490
+ # Inferred ControlNet only for the conditional batch.
1501
1491
  # To apply the output of ControlNet to both the unconditional and conditional batches,
1502
1492
  # add 0 to the unconditional batch to keep it unchanged.
1503
1493
  down_block_res_samples = [torch.cat([torch.zeros_like(d), d]) for d in down_block_res_samples]
@@ -41,8 +41,6 @@ from ...loaders import (
41
41
  from ...models import AutoencoderKL, ControlNetModel, ImageProjection, UNet2DConditionModel
42
42
  from ...models.attention_processor import (
43
43
  AttnProcessor2_0,
44
- LoRAAttnProcessor2_0,
45
- LoRAXFormersAttnProcessor,
46
44
  XFormersAttnProcessor,
47
45
  )
48
46
  from ...models.lora import adjust_lora_scale_text_encoder
@@ -78,13 +76,13 @@ EXAMPLE_DOC_STRING = """
78
76
  >>> import numpy as np
79
77
  >>> from PIL import Image
80
78
 
81
- >>> from transformers import DPTFeatureExtractor, DPTForDepthEstimation
79
+ >>> from transformers import DPTImageProcessor, DPTForDepthEstimation
82
80
  >>> from diffusers import ControlNetModel, StableDiffusionXLControlNetImg2ImgPipeline, AutoencoderKL
83
81
  >>> from diffusers.utils import load_image
84
82
 
85
83
 
86
84
  >>> depth_estimator = DPTForDepthEstimation.from_pretrained("Intel/dpt-hybrid-midas").to("cuda")
87
- >>> feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-hybrid-midas")
85
+ >>> feature_extractor = DPTImageProcessor.from_pretrained("Intel/dpt-hybrid-midas")
88
86
  >>> controlnet = ControlNetModel.from_pretrained(
89
87
  ... "diffusers/controlnet-depth-sdxl-1.0-small",
90
88
  ... variant="fp16",
@@ -550,6 +548,9 @@ class StableDiffusionXLControlNetImg2ImgPipeline(
550
548
  def prepare_ip_adapter_image_embeds(
551
549
  self, ip_adapter_image, ip_adapter_image_embeds, device, num_images_per_prompt, do_classifier_free_guidance
552
550
  ):
551
+ image_embeds = []
552
+ if do_classifier_free_guidance:
553
+ negative_image_embeds = []
553
554
  if ip_adapter_image_embeds is None:
554
555
  if not isinstance(ip_adapter_image, list):
555
556
  ip_adapter_image = [ip_adapter_image]
@@ -559,7 +560,6 @@ class StableDiffusionXLControlNetImg2ImgPipeline(
559
560
  f"`ip_adapter_image` must have same length as the number of IP Adapters. Got {len(ip_adapter_image)} images and {len(self.unet.encoder_hid_proj.image_projection_layers)} IP Adapters."
560
561
  )
561
562
 
562
- image_embeds = []
563
563
  for single_ip_adapter_image, image_proj_layer in zip(
564
564
  ip_adapter_image, self.unet.encoder_hid_proj.image_projection_layers
565
565
  ):
@@ -567,36 +567,28 @@ class StableDiffusionXLControlNetImg2ImgPipeline(
567
567
  single_image_embeds, single_negative_image_embeds = self.encode_image(
568
568
  single_ip_adapter_image, device, 1, output_hidden_state
569
569
  )
570
- single_image_embeds = torch.stack([single_image_embeds] * num_images_per_prompt, dim=0)
571
- single_negative_image_embeds = torch.stack(
572
- [single_negative_image_embeds] * num_images_per_prompt, dim=0
573
- )
574
570
 
571
+ image_embeds.append(single_image_embeds[None, :])
575
572
  if do_classifier_free_guidance:
576
- single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds])
577
- single_image_embeds = single_image_embeds.to(device)
578
-
579
- image_embeds.append(single_image_embeds)
573
+ negative_image_embeds.append(single_negative_image_embeds[None, :])
580
574
  else:
581
- repeat_dims = [1]
582
- image_embeds = []
583
575
  for single_image_embeds in ip_adapter_image_embeds:
584
576
  if do_classifier_free_guidance:
585
577
  single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2)
586
- single_image_embeds = single_image_embeds.repeat(
587
- num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
588
- )
589
- single_negative_image_embeds = single_negative_image_embeds.repeat(
590
- num_images_per_prompt, *(repeat_dims * len(single_negative_image_embeds.shape[1:]))
591
- )
592
- single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds])
593
- else:
594
- single_image_embeds = single_image_embeds.repeat(
595
- num_images_per_prompt, *(repeat_dims * len(single_image_embeds.shape[1:]))
596
- )
578
+ negative_image_embeds.append(single_negative_image_embeds)
597
579
  image_embeds.append(single_image_embeds)
598
580
 
599
- return image_embeds
581
+ ip_adapter_image_embeds = []
582
+ for i, single_image_embeds in enumerate(image_embeds):
583
+ single_image_embeds = torch.cat([single_image_embeds] * num_images_per_prompt, dim=0)
584
+ if do_classifier_free_guidance:
585
+ single_negative_image_embeds = torch.cat([negative_image_embeds[i]] * num_images_per_prompt, dim=0)
586
+ single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds], dim=0)
587
+
588
+ single_image_embeds = single_image_embeds.to(device=device)
589
+ ip_adapter_image_embeds.append(single_image_embeds)
590
+
591
+ return ip_adapter_image_embeds
600
592
 
601
593
  # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_extra_step_kwargs
602
594
  def prepare_extra_step_kwargs(self, generator, eta):
@@ -938,6 +930,13 @@ class StableDiffusionXLControlNetImg2ImgPipeline(
938
930
  )
939
931
 
940
932
  elif isinstance(generator, list):
933
+ if image.shape[0] < batch_size and batch_size % image.shape[0] == 0:
934
+ image = torch.cat([image] * (batch_size // image.shape[0]), dim=0)
935
+ elif image.shape[0] < batch_size and batch_size % image.shape[0] != 0:
936
+ raise ValueError(
937
+ f"Cannot duplicate `image` of batch size {image.shape[0]} to effective batch_size {batch_size} "
938
+ )
939
+
941
940
  init_latents = [
942
941
  retrieve_latents(self.vae.encode(image[i : i + 1]), generator=generator[i])
943
942
  for i in range(batch_size)
@@ -951,8 +950,8 @@ class StableDiffusionXLControlNetImg2ImgPipeline(
951
950
 
952
951
  init_latents = init_latents.to(dtype)
953
952
  if latents_mean is not None and latents_std is not None:
954
- latents_mean = latents_mean.to(device=self.device, dtype=dtype)
955
- latents_std = latents_std.to(device=self.device, dtype=dtype)
953
+ latents_mean = latents_mean.to(device=device, dtype=dtype)
954
+ latents_std = latents_std.to(device=device, dtype=dtype)
956
955
  init_latents = (init_latents - latents_mean) * self.vae.config.scaling_factor / latents_std
957
956
  else:
958
957
  init_latents = self.vae.config.scaling_factor * init_latents
@@ -1039,8 +1038,6 @@ class StableDiffusionXLControlNetImg2ImgPipeline(
1039
1038
  (
1040
1039
  AttnProcessor2_0,
1041
1040
  XFormersAttnProcessor,
1042
- LoRAXFormersAttnProcessor,
1043
- LoRAAttnProcessor2_0,
1044
1041
  ),
1045
1042
  )
1046
1043
  # if xformers or torch_2_0 is used attention block does not need
@@ -1554,7 +1551,7 @@ class StableDiffusionXLControlNetImg2ImgPipeline(
1554
1551
  )
1555
1552
 
1556
1553
  if guess_mode and self.do_classifier_free_guidance:
1557
- # Infered ControlNet only for the conditional batch.
1554
+ # Inferred ControlNet only for the conditional batch.
1558
1555
  # To apply the output of ControlNet to both the unconditional and conditional batches,
1559
1556
  # add 0 to the unconditional batch to keep it unchanged.
1560
1557
  down_block_res_samples = [torch.cat([torch.zeros_like(d), d]) for d in down_block_res_samples]
@@ -23,7 +23,7 @@ from flax.core.frozen_dict import FrozenDict
23
23
  from flax.jax_utils import unreplicate
24
24
  from flax.training.common_utils import shard
25
25
  from PIL import Image
26
- from transformers import CLIPFeatureExtractor, CLIPTokenizer, FlaxCLIPTextModel
26
+ from transformers import CLIPImageProcessor, CLIPTokenizer, FlaxCLIPTextModel
27
27
 
28
28
  from ...models import FlaxAutoencoderKL, FlaxControlNetModel, FlaxUNet2DConditionModel
29
29
  from ...schedulers import (
@@ -149,7 +149,7 @@ class FlaxStableDiffusionControlNetPipeline(FlaxDiffusionPipeline):
149
149
  FlaxDDIMScheduler, FlaxPNDMScheduler, FlaxLMSDiscreteScheduler, FlaxDPMSolverMultistepScheduler
150
150
  ],
151
151
  safety_checker: FlaxStableDiffusionSafetyChecker,
152
- feature_extractor: CLIPFeatureExtractor,
152
+ feature_extractor: CLIPImageProcessor,
153
153
  dtype: jnp.dtype = jnp.float32,
154
154
  ):
155
155
  super().__init__()
@@ -0,0 +1,48 @@
1
+ from typing import TYPE_CHECKING
2
+
3
+ from ...utils import (
4
+ DIFFUSERS_SLOW_IMPORT,
5
+ OptionalDependencyNotAvailable,
6
+ _LazyModule,
7
+ get_objects_from_module,
8
+ is_torch_available,
9
+ is_transformers_available,
10
+ )
11
+
12
+
13
+ _dummy_objects = {}
14
+ _import_structure = {}
15
+
16
+
17
+ try:
18
+ if not (is_transformers_available() and is_torch_available()):
19
+ raise OptionalDependencyNotAvailable()
20
+ except OptionalDependencyNotAvailable:
21
+ from ...utils import dummy_torch_and_transformers_objects # noqa F403
22
+
23
+ _dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
24
+ else:
25
+ _import_structure["pipeline_hunyuandit_controlnet"] = ["HunyuanDiTControlNetPipeline"]
26
+
27
+ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
28
+ try:
29
+ if not (is_transformers_available() and is_torch_available()):
30
+ raise OptionalDependencyNotAvailable()
31
+
32
+ except OptionalDependencyNotAvailable:
33
+ from ...utils.dummy_torch_and_transformers_objects import *
34
+ else:
35
+ from .pipeline_hunyuandit_controlnet import HunyuanDiTControlNetPipeline
36
+
37
+ else:
38
+ import sys
39
+
40
+ sys.modules[__name__] = _LazyModule(
41
+ __name__,
42
+ globals()["__file__"],
43
+ _import_structure,
44
+ module_spec=__spec__,
45
+ )
46
+
47
+ for name, value in _dummy_objects.items():
48
+ setattr(sys.modules[__name__], name, value)