diffusers 0.30.3__py3-none-any.whl → 0.32.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (268) hide show
  1. diffusers/__init__.py +97 -4
  2. diffusers/callbacks.py +56 -3
  3. diffusers/configuration_utils.py +13 -1
  4. diffusers/image_processor.py +282 -71
  5. diffusers/loaders/__init__.py +24 -3
  6. diffusers/loaders/ip_adapter.py +543 -16
  7. diffusers/loaders/lora_base.py +138 -125
  8. diffusers/loaders/lora_conversion_utils.py +647 -0
  9. diffusers/loaders/lora_pipeline.py +2216 -230
  10. diffusers/loaders/peft.py +380 -0
  11. diffusers/loaders/single_file_model.py +71 -4
  12. diffusers/loaders/single_file_utils.py +597 -10
  13. diffusers/loaders/textual_inversion.py +5 -3
  14. diffusers/loaders/transformer_flux.py +181 -0
  15. diffusers/loaders/transformer_sd3.py +89 -0
  16. diffusers/loaders/unet.py +56 -12
  17. diffusers/models/__init__.py +49 -12
  18. diffusers/models/activations.py +22 -9
  19. diffusers/models/adapter.py +53 -53
  20. diffusers/models/attention.py +98 -13
  21. diffusers/models/attention_flax.py +1 -1
  22. diffusers/models/attention_processor.py +2160 -346
  23. diffusers/models/autoencoders/__init__.py +5 -0
  24. diffusers/models/autoencoders/autoencoder_dc.py +620 -0
  25. diffusers/models/autoencoders/autoencoder_kl.py +73 -12
  26. diffusers/models/autoencoders/autoencoder_kl_allegro.py +1149 -0
  27. diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +213 -105
  28. diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py +1176 -0
  29. diffusers/models/autoencoders/autoencoder_kl_ltx.py +1338 -0
  30. diffusers/models/autoencoders/autoencoder_kl_mochi.py +1166 -0
  31. diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +3 -10
  32. diffusers/models/autoencoders/autoencoder_tiny.py +4 -2
  33. diffusers/models/autoencoders/vae.py +18 -5
  34. diffusers/models/controlnet.py +47 -802
  35. diffusers/models/controlnet_flux.py +70 -0
  36. diffusers/models/controlnet_sd3.py +26 -376
  37. diffusers/models/controlnet_sparsectrl.py +46 -719
  38. diffusers/models/controlnets/__init__.py +23 -0
  39. diffusers/models/controlnets/controlnet.py +872 -0
  40. diffusers/models/{controlnet_flax.py → controlnets/controlnet_flax.py} +5 -5
  41. diffusers/models/controlnets/controlnet_flux.py +536 -0
  42. diffusers/models/{controlnet_hunyuan.py → controlnets/controlnet_hunyuan.py} +7 -7
  43. diffusers/models/controlnets/controlnet_sd3.py +489 -0
  44. diffusers/models/controlnets/controlnet_sparsectrl.py +788 -0
  45. diffusers/models/controlnets/controlnet_union.py +832 -0
  46. diffusers/models/{controlnet_xs.py → controlnets/controlnet_xs.py} +14 -13
  47. diffusers/models/controlnets/multicontrolnet.py +183 -0
  48. diffusers/models/embeddings.py +996 -92
  49. diffusers/models/embeddings_flax.py +23 -9
  50. diffusers/models/model_loading_utils.py +264 -14
  51. diffusers/models/modeling_flax_utils.py +1 -1
  52. diffusers/models/modeling_utils.py +334 -51
  53. diffusers/models/normalization.py +157 -13
  54. diffusers/models/transformers/__init__.py +6 -0
  55. diffusers/models/transformers/auraflow_transformer_2d.py +3 -2
  56. diffusers/models/transformers/cogvideox_transformer_3d.py +69 -13
  57. diffusers/models/transformers/dit_transformer_2d.py +1 -1
  58. diffusers/models/transformers/latte_transformer_3d.py +4 -4
  59. diffusers/models/transformers/pixart_transformer_2d.py +10 -2
  60. diffusers/models/transformers/sana_transformer.py +488 -0
  61. diffusers/models/transformers/stable_audio_transformer.py +1 -1
  62. diffusers/models/transformers/transformer_2d.py +1 -1
  63. diffusers/models/transformers/transformer_allegro.py +422 -0
  64. diffusers/models/transformers/transformer_cogview3plus.py +386 -0
  65. diffusers/models/transformers/transformer_flux.py +189 -51
  66. diffusers/models/transformers/transformer_hunyuan_video.py +789 -0
  67. diffusers/models/transformers/transformer_ltx.py +469 -0
  68. diffusers/models/transformers/transformer_mochi.py +499 -0
  69. diffusers/models/transformers/transformer_sd3.py +112 -18
  70. diffusers/models/transformers/transformer_temporal.py +1 -1
  71. diffusers/models/unets/unet_1d_blocks.py +1 -1
  72. diffusers/models/unets/unet_2d.py +8 -1
  73. diffusers/models/unets/unet_2d_blocks.py +88 -21
  74. diffusers/models/unets/unet_2d_condition.py +9 -9
  75. diffusers/models/unets/unet_3d_blocks.py +9 -7
  76. diffusers/models/unets/unet_motion_model.py +46 -68
  77. diffusers/models/unets/unet_spatio_temporal_condition.py +23 -0
  78. diffusers/models/unets/unet_stable_cascade.py +2 -2
  79. diffusers/models/unets/uvit_2d.py +1 -1
  80. diffusers/models/upsampling.py +14 -6
  81. diffusers/pipelines/__init__.py +69 -6
  82. diffusers/pipelines/allegro/__init__.py +48 -0
  83. diffusers/pipelines/allegro/pipeline_allegro.py +938 -0
  84. diffusers/pipelines/allegro/pipeline_output.py +23 -0
  85. diffusers/pipelines/animatediff/__init__.py +2 -0
  86. diffusers/pipelines/animatediff/pipeline_animatediff.py +45 -21
  87. diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +52 -22
  88. diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +18 -4
  89. diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +3 -1
  90. diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +104 -72
  91. diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +1341 -0
  92. diffusers/pipelines/audioldm2/modeling_audioldm2.py +3 -3
  93. diffusers/pipelines/aura_flow/pipeline_aura_flow.py +2 -9
  94. diffusers/pipelines/auto_pipeline.py +88 -10
  95. diffusers/pipelines/blip_diffusion/modeling_blip2.py +1 -1
  96. diffusers/pipelines/cogvideo/__init__.py +2 -0
  97. diffusers/pipelines/cogvideo/pipeline_cogvideox.py +80 -39
  98. diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +825 -0
  99. diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +108 -50
  100. diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +89 -50
  101. diffusers/pipelines/cogview3/__init__.py +47 -0
  102. diffusers/pipelines/cogview3/pipeline_cogview3plus.py +674 -0
  103. diffusers/pipelines/cogview3/pipeline_output.py +21 -0
  104. diffusers/pipelines/controlnet/__init__.py +86 -80
  105. diffusers/pipelines/controlnet/multicontrolnet.py +7 -178
  106. diffusers/pipelines/controlnet/pipeline_controlnet.py +20 -3
  107. diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +9 -2
  108. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +9 -2
  109. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +37 -15
  110. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +12 -4
  111. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +9 -4
  112. diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +1790 -0
  113. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +1501 -0
  114. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +1627 -0
  115. diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +22 -4
  116. diffusers/pipelines/controlnet_sd3/__init__.py +4 -0
  117. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +56 -20
  118. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +1153 -0
  119. diffusers/pipelines/ddpm/pipeline_ddpm.py +2 -2
  120. diffusers/pipelines/deepfloyd_if/pipeline_output.py +6 -5
  121. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +16 -4
  122. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +1 -1
  123. diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +32 -9
  124. diffusers/pipelines/flux/__init__.py +23 -1
  125. diffusers/pipelines/flux/modeling_flux.py +47 -0
  126. diffusers/pipelines/flux/pipeline_flux.py +256 -48
  127. diffusers/pipelines/flux/pipeline_flux_control.py +889 -0
  128. diffusers/pipelines/flux/pipeline_flux_control_img2img.py +945 -0
  129. diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +1141 -0
  130. diffusers/pipelines/flux/pipeline_flux_controlnet.py +1006 -0
  131. diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +998 -0
  132. diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +1204 -0
  133. diffusers/pipelines/flux/pipeline_flux_fill.py +969 -0
  134. diffusers/pipelines/flux/pipeline_flux_img2img.py +856 -0
  135. diffusers/pipelines/flux/pipeline_flux_inpaint.py +1022 -0
  136. diffusers/pipelines/flux/pipeline_flux_prior_redux.py +492 -0
  137. diffusers/pipelines/flux/pipeline_output.py +16 -0
  138. diffusers/pipelines/free_noise_utils.py +365 -5
  139. diffusers/pipelines/hunyuan_video/__init__.py +48 -0
  140. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +687 -0
  141. diffusers/pipelines/hunyuan_video/pipeline_output.py +20 -0
  142. diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +20 -4
  143. diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +9 -9
  144. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +2 -2
  145. diffusers/pipelines/kolors/pipeline_kolors.py +1 -1
  146. diffusers/pipelines/kolors/pipeline_kolors_img2img.py +14 -11
  147. diffusers/pipelines/kolors/text_encoder.py +2 -2
  148. diffusers/pipelines/kolors/tokenizer.py +4 -0
  149. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +1 -1
  150. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +1 -1
  151. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +1 -1
  152. diffusers/pipelines/latte/pipeline_latte.py +2 -2
  153. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +15 -3
  154. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +15 -3
  155. diffusers/pipelines/ltx/__init__.py +50 -0
  156. diffusers/pipelines/ltx/pipeline_ltx.py +789 -0
  157. diffusers/pipelines/ltx/pipeline_ltx_image2video.py +885 -0
  158. diffusers/pipelines/ltx/pipeline_output.py +20 -0
  159. diffusers/pipelines/lumina/pipeline_lumina.py +3 -10
  160. diffusers/pipelines/mochi/__init__.py +48 -0
  161. diffusers/pipelines/mochi/pipeline_mochi.py +748 -0
  162. diffusers/pipelines/mochi/pipeline_output.py +20 -0
  163. diffusers/pipelines/pag/__init__.py +13 -0
  164. diffusers/pipelines/pag/pag_utils.py +8 -2
  165. diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +2 -3
  166. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +1543 -0
  167. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +3 -5
  168. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +1683 -0
  169. diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +22 -6
  170. diffusers/pipelines/pag/pipeline_pag_kolors.py +1 -1
  171. diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +7 -14
  172. diffusers/pipelines/pag/pipeline_pag_sana.py +886 -0
  173. diffusers/pipelines/pag/pipeline_pag_sd.py +18 -6
  174. diffusers/pipelines/pag/pipeline_pag_sd_3.py +18 -9
  175. diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py +1058 -0
  176. diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +5 -1
  177. diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +1094 -0
  178. diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py +1356 -0
  179. diffusers/pipelines/pag/pipeline_pag_sd_xl.py +18 -6
  180. diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +31 -16
  181. diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +42 -19
  182. diffusers/pipelines/pia/pipeline_pia.py +2 -0
  183. diffusers/pipelines/pipeline_flax_utils.py +1 -1
  184. diffusers/pipelines/pipeline_loading_utils.py +250 -31
  185. diffusers/pipelines/pipeline_utils.py +158 -186
  186. diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +7 -14
  187. diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +7 -14
  188. diffusers/pipelines/sana/__init__.py +47 -0
  189. diffusers/pipelines/sana/pipeline_output.py +21 -0
  190. diffusers/pipelines/sana/pipeline_sana.py +884 -0
  191. diffusers/pipelines/stable_audio/pipeline_stable_audio.py +12 -1
  192. diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +35 -3
  193. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +2 -2
  194. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +46 -9
  195. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +1 -1
  196. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +1 -1
  197. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +241 -81
  198. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +228 -23
  199. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +82 -13
  200. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +60 -11
  201. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +11 -1
  202. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +1 -1
  203. diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +16 -4
  204. diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +16 -4
  205. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +16 -12
  206. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +29 -22
  207. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +29 -22
  208. diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +1 -1
  209. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +1 -1
  210. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +16 -4
  211. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +15 -3
  212. diffusers/pipelines/unidiffuser/modeling_uvit.py +2 -2
  213. diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +1 -1
  214. diffusers/quantizers/__init__.py +16 -0
  215. diffusers/quantizers/auto.py +139 -0
  216. diffusers/quantizers/base.py +233 -0
  217. diffusers/quantizers/bitsandbytes/__init__.py +2 -0
  218. diffusers/quantizers/bitsandbytes/bnb_quantizer.py +561 -0
  219. diffusers/quantizers/bitsandbytes/utils.py +306 -0
  220. diffusers/quantizers/gguf/__init__.py +1 -0
  221. diffusers/quantizers/gguf/gguf_quantizer.py +159 -0
  222. diffusers/quantizers/gguf/utils.py +456 -0
  223. diffusers/quantizers/quantization_config.py +669 -0
  224. diffusers/quantizers/torchao/__init__.py +15 -0
  225. diffusers/quantizers/torchao/torchao_quantizer.py +285 -0
  226. diffusers/schedulers/scheduling_ddim.py +4 -1
  227. diffusers/schedulers/scheduling_ddim_cogvideox.py +4 -1
  228. diffusers/schedulers/scheduling_ddim_parallel.py +4 -1
  229. diffusers/schedulers/scheduling_ddpm.py +6 -7
  230. diffusers/schedulers/scheduling_ddpm_parallel.py +6 -7
  231. diffusers/schedulers/scheduling_deis_multistep.py +102 -6
  232. diffusers/schedulers/scheduling_dpmsolver_multistep.py +113 -6
  233. diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +111 -5
  234. diffusers/schedulers/scheduling_dpmsolver_sde.py +125 -10
  235. diffusers/schedulers/scheduling_dpmsolver_singlestep.py +126 -7
  236. diffusers/schedulers/scheduling_edm_euler.py +8 -6
  237. diffusers/schedulers/scheduling_euler_ancestral_discrete.py +4 -1
  238. diffusers/schedulers/scheduling_euler_discrete.py +92 -7
  239. diffusers/schedulers/scheduling_flow_match_euler_discrete.py +153 -6
  240. diffusers/schedulers/scheduling_flow_match_heun_discrete.py +4 -5
  241. diffusers/schedulers/scheduling_heun_discrete.py +114 -8
  242. diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +116 -11
  243. diffusers/schedulers/scheduling_k_dpm_2_discrete.py +110 -8
  244. diffusers/schedulers/scheduling_lcm.py +2 -6
  245. diffusers/schedulers/scheduling_lms_discrete.py +76 -1
  246. diffusers/schedulers/scheduling_repaint.py +1 -1
  247. diffusers/schedulers/scheduling_sasolver.py +102 -6
  248. diffusers/schedulers/scheduling_tcd.py +2 -6
  249. diffusers/schedulers/scheduling_unclip.py +4 -1
  250. diffusers/schedulers/scheduling_unipc_multistep.py +127 -5
  251. diffusers/training_utils.py +63 -19
  252. diffusers/utils/__init__.py +7 -1
  253. diffusers/utils/constants.py +1 -0
  254. diffusers/utils/dummy_pt_objects.py +240 -0
  255. diffusers/utils/dummy_torch_and_transformers_objects.py +435 -0
  256. diffusers/utils/dynamic_modules_utils.py +3 -3
  257. diffusers/utils/hub_utils.py +44 -40
  258. diffusers/utils/import_utils.py +98 -8
  259. diffusers/utils/loading_utils.py +28 -4
  260. diffusers/utils/peft_utils.py +6 -3
  261. diffusers/utils/testing_utils.py +115 -1
  262. diffusers/utils/torch_utils.py +3 -0
  263. {diffusers-0.30.3.dist-info → diffusers-0.32.0.dist-info}/METADATA +73 -72
  264. {diffusers-0.30.3.dist-info → diffusers-0.32.0.dist-info}/RECORD +268 -193
  265. {diffusers-0.30.3.dist-info → diffusers-0.32.0.dist-info}/WHEEL +1 -1
  266. {diffusers-0.30.3.dist-info → diffusers-0.32.0.dist-info}/LICENSE +0 -0
  267. {diffusers-0.30.3.dist-info → diffusers-0.32.0.dist-info}/entry_points.txt +0 -0
  268. {diffusers-0.30.3.dist-info → diffusers-0.32.0.dist-info}/top_level.txt +0 -0
@@ -116,7 +116,7 @@ class AnimateDiffTransformer3D(nn.Module):
116
116
 
117
117
  self.in_channels = in_channels
118
118
 
119
- self.norm = torch.nn.GroupNorm(num_groups=norm_num_groups, num_channels=in_channels, eps=1e-6, affine=True)
119
+ self.norm = nn.GroupNorm(num_groups=norm_num_groups, num_channels=in_channels, eps=1e-6, affine=True)
120
120
  self.proj_in = nn.Linear(in_channels, inner_dim)
121
121
 
122
122
  # 3. Define transformers blocks
@@ -187,12 +187,12 @@ class AnimateDiffTransformer3D(nn.Module):
187
187
  hidden_states = self.norm(hidden_states)
188
188
  hidden_states = hidden_states.permute(0, 3, 4, 2, 1).reshape(batch_size * height * width, num_frames, channel)
189
189
 
190
- hidden_states = self.proj_in(hidden_states)
190
+ hidden_states = self.proj_in(input=hidden_states)
191
191
 
192
192
  # 2. Blocks
193
193
  for block in self.transformer_blocks:
194
194
  hidden_states = block(
195
- hidden_states,
195
+ hidden_states=hidden_states,
196
196
  encoder_hidden_states=encoder_hidden_states,
197
197
  timestep=timestep,
198
198
  cross_attention_kwargs=cross_attention_kwargs,
@@ -200,7 +200,7 @@ class AnimateDiffTransformer3D(nn.Module):
200
200
  )
201
201
 
202
202
  # 3. Output
203
- hidden_states = self.proj_out(hidden_states)
203
+ hidden_states = self.proj_out(input=hidden_states)
204
204
  hidden_states = (
205
205
  hidden_states[None, None, :]
206
206
  .reshape(batch_size, height, width, num_frames, channel)
@@ -323,7 +323,7 @@ class DownBlockMotion(nn.Module):
323
323
 
324
324
  blocks = zip(self.resnets, self.motion_modules)
325
325
  for resnet, motion_module in blocks:
326
- if self.training and self.gradient_checkpointing:
326
+ if torch.is_grad_enabled() and self.gradient_checkpointing:
327
327
 
328
328
  def create_custom_forward(module):
329
329
  def custom_forward(*inputs):
@@ -344,7 +344,7 @@ class DownBlockMotion(nn.Module):
344
344
  )
345
345
 
346
346
  else:
347
- hidden_states = resnet(hidden_states, temb)
347
+ hidden_states = resnet(input_tensor=hidden_states, temb=temb)
348
348
 
349
349
  hidden_states = motion_module(hidden_states, num_frames=num_frames)
350
350
 
@@ -352,7 +352,7 @@ class DownBlockMotion(nn.Module):
352
352
 
353
353
  if self.downsamplers is not None:
354
354
  for downsampler in self.downsamplers:
355
- hidden_states = downsampler(hidden_states)
355
+ hidden_states = downsampler(hidden_states=hidden_states)
356
356
 
357
357
  output_states = output_states + (hidden_states,)
358
358
 
@@ -513,7 +513,7 @@ class CrossAttnDownBlockMotion(nn.Module):
513
513
 
514
514
  blocks = list(zip(self.resnets, self.attentions, self.motion_modules))
515
515
  for i, (resnet, attn, motion_module) in enumerate(blocks):
516
- if self.training and self.gradient_checkpointing:
516
+ if torch.is_grad_enabled() and self.gradient_checkpointing:
517
517
 
518
518
  def create_custom_forward(module, return_dict=None):
519
519
  def custom_forward(*inputs):
@@ -531,25 +531,18 @@ class CrossAttnDownBlockMotion(nn.Module):
531
531
  temb,
532
532
  **ckpt_kwargs,
533
533
  )
534
- hidden_states = attn(
535
- hidden_states,
536
- encoder_hidden_states=encoder_hidden_states,
537
- cross_attention_kwargs=cross_attention_kwargs,
538
- attention_mask=attention_mask,
539
- encoder_attention_mask=encoder_attention_mask,
540
- return_dict=False,
541
- )[0]
542
534
  else:
543
- hidden_states = resnet(hidden_states, temb)
535
+ hidden_states = resnet(input_tensor=hidden_states, temb=temb)
536
+
537
+ hidden_states = attn(
538
+ hidden_states=hidden_states,
539
+ encoder_hidden_states=encoder_hidden_states,
540
+ cross_attention_kwargs=cross_attention_kwargs,
541
+ attention_mask=attention_mask,
542
+ encoder_attention_mask=encoder_attention_mask,
543
+ return_dict=False,
544
+ )[0]
544
545
 
545
- hidden_states = attn(
546
- hidden_states,
547
- encoder_hidden_states=encoder_hidden_states,
548
- cross_attention_kwargs=cross_attention_kwargs,
549
- attention_mask=attention_mask,
550
- encoder_attention_mask=encoder_attention_mask,
551
- return_dict=False,
552
- )[0]
553
546
  hidden_states = motion_module(
554
547
  hidden_states,
555
548
  num_frames=num_frames,
@@ -563,7 +556,7 @@ class CrossAttnDownBlockMotion(nn.Module):
563
556
 
564
557
  if self.downsamplers is not None:
565
558
  for downsampler in self.downsamplers:
566
- hidden_states = downsampler(hidden_states)
559
+ hidden_states = downsampler(hidden_states=hidden_states)
567
560
 
568
561
  output_states = output_states + (hidden_states,)
569
562
 
@@ -739,7 +732,7 @@ class CrossAttnUpBlockMotion(nn.Module):
739
732
 
740
733
  hidden_states = torch.cat([hidden_states, res_hidden_states], dim=1)
741
734
 
742
- if self.training and self.gradient_checkpointing:
735
+ if torch.is_grad_enabled() and self.gradient_checkpointing:
743
736
 
744
737
  def create_custom_forward(module, return_dict=None):
745
738
  def custom_forward(*inputs):
@@ -757,25 +750,18 @@ class CrossAttnUpBlockMotion(nn.Module):
757
750
  temb,
758
751
  **ckpt_kwargs,
759
752
  )
760
- hidden_states = attn(
761
- hidden_states,
762
- encoder_hidden_states=encoder_hidden_states,
763
- cross_attention_kwargs=cross_attention_kwargs,
764
- attention_mask=attention_mask,
765
- encoder_attention_mask=encoder_attention_mask,
766
- return_dict=False,
767
- )[0]
768
753
  else:
769
- hidden_states = resnet(hidden_states, temb)
754
+ hidden_states = resnet(input_tensor=hidden_states, temb=temb)
755
+
756
+ hidden_states = attn(
757
+ hidden_states=hidden_states,
758
+ encoder_hidden_states=encoder_hidden_states,
759
+ cross_attention_kwargs=cross_attention_kwargs,
760
+ attention_mask=attention_mask,
761
+ encoder_attention_mask=encoder_attention_mask,
762
+ return_dict=False,
763
+ )[0]
770
764
 
771
- hidden_states = attn(
772
- hidden_states,
773
- encoder_hidden_states=encoder_hidden_states,
774
- cross_attention_kwargs=cross_attention_kwargs,
775
- attention_mask=attention_mask,
776
- encoder_attention_mask=encoder_attention_mask,
777
- return_dict=False,
778
- )[0]
779
765
  hidden_states = motion_module(
780
766
  hidden_states,
781
767
  num_frames=num_frames,
@@ -783,7 +769,7 @@ class CrossAttnUpBlockMotion(nn.Module):
783
769
 
784
770
  if self.upsamplers is not None:
785
771
  for upsampler in self.upsamplers:
786
- hidden_states = upsampler(hidden_states, upsample_size)
772
+ hidden_states = upsampler(hidden_states=hidden_states, output_size=upsample_size)
787
773
 
788
774
  return hidden_states
789
775
 
@@ -909,7 +895,7 @@ class UpBlockMotion(nn.Module):
909
895
 
910
896
  hidden_states = torch.cat([hidden_states, res_hidden_states], dim=1)
911
897
 
912
- if self.training and self.gradient_checkpointing:
898
+ if torch.is_grad_enabled() and self.gradient_checkpointing:
913
899
 
914
900
  def create_custom_forward(module):
915
901
  def custom_forward(*inputs):
@@ -929,13 +915,13 @@ class UpBlockMotion(nn.Module):
929
915
  create_custom_forward(resnet), hidden_states, temb
930
916
  )
931
917
  else:
932
- hidden_states = resnet(hidden_states, temb)
918
+ hidden_states = resnet(input_tensor=hidden_states, temb=temb)
933
919
 
934
920
  hidden_states = motion_module(hidden_states, num_frames=num_frames)
935
921
 
936
922
  if self.upsamplers is not None:
937
923
  for upsampler in self.upsamplers:
938
- hidden_states = upsampler(hidden_states, upsample_size)
924
+ hidden_states = upsampler(hidden_states=hidden_states, output_size=upsample_size)
939
925
 
940
926
  return hidden_states
941
927
 
@@ -1080,11 +1066,20 @@ class UNetMidBlockCrossAttnMotion(nn.Module):
1080
1066
  if cross_attention_kwargs.get("scale", None) is not None:
1081
1067
  logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.")
1082
1068
 
1083
- hidden_states = self.resnets[0](hidden_states, temb)
1069
+ hidden_states = self.resnets[0](input_tensor=hidden_states, temb=temb)
1084
1070
 
1085
1071
  blocks = zip(self.attentions, self.resnets[1:], self.motion_modules)
1086
1072
  for attn, resnet, motion_module in blocks:
1087
- if self.training and self.gradient_checkpointing:
1073
+ hidden_states = attn(
1074
+ hidden_states=hidden_states,
1075
+ encoder_hidden_states=encoder_hidden_states,
1076
+ cross_attention_kwargs=cross_attention_kwargs,
1077
+ attention_mask=attention_mask,
1078
+ encoder_attention_mask=encoder_attention_mask,
1079
+ return_dict=False,
1080
+ )[0]
1081
+
1082
+ if torch.is_grad_enabled() and self.gradient_checkpointing:
1088
1083
 
1089
1084
  def create_custom_forward(module, return_dict=None):
1090
1085
  def custom_forward(*inputs):
@@ -1096,14 +1091,6 @@ class UNetMidBlockCrossAttnMotion(nn.Module):
1096
1091
  return custom_forward
1097
1092
 
1098
1093
  ckpt_kwargs: Dict[str, Any] = {"use_reentrant": False} if is_torch_version(">=", "1.11.0") else {}
1099
- hidden_states = attn(
1100
- hidden_states,
1101
- encoder_hidden_states=encoder_hidden_states,
1102
- cross_attention_kwargs=cross_attention_kwargs,
1103
- attention_mask=attention_mask,
1104
- encoder_attention_mask=encoder_attention_mask,
1105
- return_dict=False,
1106
- )[0]
1107
1094
  hidden_states = torch.utils.checkpoint.checkpoint(
1108
1095
  create_custom_forward(motion_module),
1109
1096
  hidden_states,
@@ -1117,19 +1104,11 @@ class UNetMidBlockCrossAttnMotion(nn.Module):
1117
1104
  **ckpt_kwargs,
1118
1105
  )
1119
1106
  else:
1120
- hidden_states = attn(
1121
- hidden_states,
1122
- encoder_hidden_states=encoder_hidden_states,
1123
- cross_attention_kwargs=cross_attention_kwargs,
1124
- attention_mask=attention_mask,
1125
- encoder_attention_mask=encoder_attention_mask,
1126
- return_dict=False,
1127
- )[0]
1128
1107
  hidden_states = motion_module(
1129
1108
  hidden_states,
1130
1109
  num_frames=num_frames,
1131
1110
  )
1132
- hidden_states = resnet(hidden_states, temb)
1111
+ hidden_states = resnet(input_tensor=hidden_states, temb=temb)
1133
1112
 
1134
1113
  return hidden_states
1135
1114
 
@@ -2178,7 +2157,6 @@ class UNetMotionModel(ModelMixin, ConfigMixin, UNet2DConditionLoadersMixin, Peft
2178
2157
 
2179
2158
  emb = emb if aug_emb is None else emb + aug_emb
2180
2159
  emb = emb.repeat_interleave(repeats=num_frames, dim=0)
2181
- encoder_hidden_states = encoder_hidden_states.repeat_interleave(repeats=num_frames, dim=0)
2182
2160
 
2183
2161
  if self.encoder_hid_proj is not None and self.config.encoder_hid_dim_type == "ip_image_proj":
2184
2162
  if "image_embeds" not in added_cond_kwargs:
@@ -382,6 +382,20 @@ class UNetSpatioTemporalConditionModel(ModelMixin, ConfigMixin, UNet2DConditionL
382
382
  If `return_dict` is True, an [`~models.unet_slatio_temporal.UNetSpatioTemporalConditionOutput`] is
383
383
  returned, otherwise a `tuple` is returned where the first element is the sample tensor.
384
384
  """
385
+ # By default samples have to be AT least a multiple of the overall upsampling factor.
386
+ # The overall upsampling factor is equal to 2 ** (# num of upsampling layears).
387
+ # However, the upsampling interpolation output size can be forced to fit any upsampling size
388
+ # on the fly if necessary.
389
+ default_overall_up_factor = 2**self.num_upsamplers
390
+
391
+ # upsample size should be forwarded when sample is not a multiple of `default_overall_up_factor`
392
+ forward_upsample_size = False
393
+ upsample_size = None
394
+
395
+ if any(s % default_overall_up_factor != 0 for s in sample.shape[-2:]):
396
+ logger.info("Forward upsample size to force interpolation output size.")
397
+ forward_upsample_size = True
398
+
385
399
  # 1. time
386
400
  timesteps = timestep
387
401
  if not torch.is_tensor(timesteps):
@@ -457,15 +471,23 @@ class UNetSpatioTemporalConditionModel(ModelMixin, ConfigMixin, UNet2DConditionL
457
471
 
458
472
  # 5. up
459
473
  for i, upsample_block in enumerate(self.up_blocks):
474
+ is_final_block = i == len(self.up_blocks) - 1
475
+
460
476
  res_samples = down_block_res_samples[-len(upsample_block.resnets) :]
461
477
  down_block_res_samples = down_block_res_samples[: -len(upsample_block.resnets)]
462
478
 
479
+ # if we have not reached the final block and need to forward the
480
+ # upsample size, we do it here
481
+ if not is_final_block and forward_upsample_size:
482
+ upsample_size = down_block_res_samples[-1].shape[2:]
483
+
463
484
  if hasattr(upsample_block, "has_cross_attention") and upsample_block.has_cross_attention:
464
485
  sample = upsample_block(
465
486
  hidden_states=sample,
466
487
  temb=emb,
467
488
  res_hidden_states_tuple=res_samples,
468
489
  encoder_hidden_states=encoder_hidden_states,
490
+ upsample_size=upsample_size,
469
491
  image_only_indicator=image_only_indicator,
470
492
  )
471
493
  else:
@@ -473,6 +495,7 @@ class UNetSpatioTemporalConditionModel(ModelMixin, ConfigMixin, UNet2DConditionL
473
495
  hidden_states=sample,
474
496
  temb=emb,
475
497
  res_hidden_states_tuple=res_samples,
498
+ upsample_size=upsample_size,
476
499
  image_only_indicator=image_only_indicator,
477
500
  )
478
501
 
@@ -455,7 +455,7 @@ class StableCascadeUNet(ModelMixin, ConfigMixin, FromOriginalModelMixin):
455
455
  level_outputs = []
456
456
  block_group = zip(self.down_blocks, self.down_downscalers, self.down_repeat_mappers)
457
457
 
458
- if self.training and self.gradient_checkpointing:
458
+ if torch.is_grad_enabled() and self.gradient_checkpointing:
459
459
 
460
460
  def create_custom_forward(module):
461
461
  def custom_forward(*inputs):
@@ -504,7 +504,7 @@ class StableCascadeUNet(ModelMixin, ConfigMixin, FromOriginalModelMixin):
504
504
  x = level_outputs[0]
505
505
  block_group = zip(self.up_blocks, self.up_upscalers, self.up_repeat_mappers)
506
506
 
507
- if self.training and self.gradient_checkpointing:
507
+ if torch.is_grad_enabled() and self.gradient_checkpointing:
508
508
 
509
509
  def create_custom_forward(module):
510
510
  def custom_forward(*inputs):
@@ -181,7 +181,7 @@ class UVit2DModel(ModelMixin, ConfigMixin, PeftAdapterMixin):
181
181
  hidden_states = self.project_to_hidden(hidden_states)
182
182
 
183
183
  for layer in self.transformer_layers:
184
- if self.training and self.gradient_checkpointing:
184
+ if torch.is_grad_enabled() and self.gradient_checkpointing:
185
185
 
186
186
  def layer_(*args):
187
187
  return checkpoint(layer, *args)
@@ -19,6 +19,7 @@ import torch.nn as nn
19
19
  import torch.nn.functional as F
20
20
 
21
21
  from ..utils import deprecate
22
+ from ..utils.import_utils import is_torch_version
22
23
  from .normalization import RMSNorm
23
24
 
24
25
 
@@ -151,11 +152,10 @@ class Upsample2D(nn.Module):
151
152
  if self.use_conv_transpose:
152
153
  return self.conv(hidden_states)
153
154
 
154
- # Cast to float32 to as 'upsample_nearest2d_out_frame' op does not support bfloat16
155
- # TODO(Suraj): Remove this cast once the issue is fixed in PyTorch
156
- # https://github.com/pytorch/pytorch/issues/86679
155
+ # Cast to float32 to as 'upsample_nearest2d_out_frame' op does not support bfloat16 until PyTorch 2.1
156
+ # https://github.com/pytorch/pytorch/issues/86679#issuecomment-1783978767
157
157
  dtype = hidden_states.dtype
158
- if dtype == torch.bfloat16:
158
+ if dtype == torch.bfloat16 and is_torch_version("<", "2.1"):
159
159
  hidden_states = hidden_states.to(torch.float32)
160
160
 
161
161
  # upsample_nearest_nhwc fails with large batch sizes. see https://github.com/huggingface/diffusers/issues/984
@@ -165,13 +165,21 @@ class Upsample2D(nn.Module):
165
165
  # if `output_size` is passed we force the interpolation output
166
166
  # size and do not make use of `scale_factor=2`
167
167
  if self.interpolate:
168
+ # upsample_nearest_nhwc also fails when the number of output elements is large
169
+ # https://github.com/pytorch/pytorch/issues/141831
170
+ scale_factor = (
171
+ 2 if output_size is None else max([f / s for f, s in zip(output_size, hidden_states.shape[-2:])])
172
+ )
173
+ if hidden_states.numel() * scale_factor > pow(2, 31):
174
+ hidden_states = hidden_states.contiguous()
175
+
168
176
  if output_size is None:
169
177
  hidden_states = F.interpolate(hidden_states, scale_factor=2.0, mode="nearest")
170
178
  else:
171
179
  hidden_states = F.interpolate(hidden_states, size=output_size, mode="nearest")
172
180
 
173
- # If the input is bfloat16, we cast back to bfloat16
174
- if dtype == torch.bfloat16:
181
+ # Cast back to original dtype
182
+ if dtype == torch.bfloat16 and is_torch_version("<", "2.1"):
175
183
  hidden_states = hidden_states.to(dtype)
176
184
 
177
185
  # TODO(Suraj, Patrick) - clean up after weight dicts are correctly renamed
@@ -116,6 +116,7 @@ else:
116
116
  "VersatileDiffusionTextToImagePipeline",
117
117
  ]
118
118
  )
119
+ _import_structure["allegro"] = ["AllegroPipeline"]
119
120
  _import_structure["amused"] = ["AmusedImg2ImgPipeline", "AmusedInpaintPipeline", "AmusedPipeline"]
120
121
  _import_structure["animatediff"] = [
121
122
  "AnimateDiffPipeline",
@@ -123,8 +124,22 @@ else:
123
124
  "AnimateDiffSDXLPipeline",
124
125
  "AnimateDiffSparseControlNetPipeline",
125
126
  "AnimateDiffVideoToVideoPipeline",
127
+ "AnimateDiffVideoToVideoControlNetPipeline",
128
+ ]
129
+ _import_structure["flux"] = [
130
+ "FluxControlPipeline",
131
+ "FluxControlInpaintPipeline",
132
+ "FluxControlImg2ImgPipeline",
133
+ "FluxControlNetPipeline",
134
+ "FluxControlNetImg2ImgPipeline",
135
+ "FluxControlNetInpaintPipeline",
136
+ "FluxImg2ImgPipeline",
137
+ "FluxInpaintPipeline",
138
+ "FluxPipeline",
139
+ "FluxFillPipeline",
140
+ "FluxPriorReduxPipeline",
141
+ "ReduxImageEncoder",
126
142
  ]
127
- _import_structure["flux"] = ["FluxPipeline"]
128
143
  _import_structure["audioldm"] = ["AudioLDMPipeline"]
129
144
  _import_structure["audioldm2"] = [
130
145
  "AudioLDM2Pipeline",
@@ -136,7 +151,9 @@ else:
136
151
  "CogVideoXPipeline",
137
152
  "CogVideoXImageToVideoPipeline",
138
153
  "CogVideoXVideoToVideoPipeline",
154
+ "CogVideoXFunControlPipeline",
139
155
  ]
156
+ _import_structure["cogview3"] = ["CogView3PlusPipeline"]
140
157
  _import_structure["controlnet"].extend(
141
158
  [
142
159
  "BlipDiffusionControlNetPipeline",
@@ -146,21 +163,30 @@ else:
146
163
  "StableDiffusionXLControlNetImg2ImgPipeline",
147
164
  "StableDiffusionXLControlNetInpaintPipeline",
148
165
  "StableDiffusionXLControlNetPipeline",
166
+ "StableDiffusionXLControlNetUnionPipeline",
167
+ "StableDiffusionXLControlNetUnionInpaintPipeline",
168
+ "StableDiffusionXLControlNetUnionImg2ImgPipeline",
149
169
  ]
150
170
  )
151
171
  _import_structure["pag"].extend(
152
172
  [
173
+ "StableDiffusionControlNetPAGInpaintPipeline",
153
174
  "AnimateDiffPAGPipeline",
154
175
  "KolorsPAGPipeline",
155
176
  "HunyuanDiTPAGPipeline",
156
177
  "StableDiffusion3PAGPipeline",
178
+ "StableDiffusion3PAGImg2ImgPipeline",
157
179
  "StableDiffusionPAGPipeline",
180
+ "StableDiffusionPAGImg2ImgPipeline",
181
+ "StableDiffusionPAGInpaintPipeline",
158
182
  "StableDiffusionControlNetPAGPipeline",
159
183
  "StableDiffusionXLPAGPipeline",
160
184
  "StableDiffusionXLPAGInpaintPipeline",
185
+ "StableDiffusionXLControlNetPAGImg2ImgPipeline",
161
186
  "StableDiffusionXLControlNetPAGPipeline",
162
187
  "StableDiffusionXLPAGImg2ImgPipeline",
163
188
  "PixArtSigmaPAGPipeline",
189
+ "SanaPAGPipeline",
164
190
  ]
165
191
  )
166
192
  _import_structure["controlnet_xs"].extend(
@@ -177,6 +203,7 @@ else:
177
203
  _import_structure["controlnet_sd3"].extend(
178
204
  [
179
205
  "StableDiffusion3ControlNetPipeline",
206
+ "StableDiffusion3ControlNetInpaintingPipeline",
180
207
  ]
181
208
  )
182
209
  _import_structure["deepfloyd_if"] = [
@@ -188,6 +215,7 @@ else:
188
215
  "IFSuperResolutionPipeline",
189
216
  ]
190
217
  _import_structure["hunyuandit"] = ["HunyuanDiTPipeline"]
218
+ _import_structure["hunyuan_video"] = ["HunyuanVideoPipeline"]
191
219
  _import_structure["kandinsky"] = [
192
220
  "KandinskyCombinedPipeline",
193
221
  "KandinskyImg2ImgCombinedPipeline",
@@ -225,6 +253,7 @@ else:
225
253
  ]
226
254
  )
227
255
  _import_structure["latte"] = ["LattePipeline"]
256
+ _import_structure["ltx"] = ["LTXPipeline", "LTXImageToVideoPipeline"]
228
257
  _import_structure["lumina"] = ["LuminaText2ImgPipeline"]
229
258
  _import_structure["marigold"].extend(
230
259
  [
@@ -232,10 +261,12 @@ else:
232
261
  "MarigoldNormalsPipeline",
233
262
  ]
234
263
  )
264
+ _import_structure["mochi"] = ["MochiPipeline"]
235
265
  _import_structure["musicldm"] = ["MusicLDMPipeline"]
236
266
  _import_structure["paint_by_example"] = ["PaintByExamplePipeline"]
237
267
  _import_structure["pia"] = ["PIAPipeline"]
238
268
  _import_structure["pixart_alpha"] = ["PixArtAlphaPipeline", "PixArtSigmaPipeline"]
269
+ _import_structure["sana"] = ["SanaPipeline"]
239
270
  _import_structure["semantic_stable_diffusion"] = ["SemanticStableDiffusionPipeline"]
240
271
  _import_structure["shap_e"] = ["ShapEImg2ImgPipeline", "ShapEPipeline"]
241
272
  _import_structure["stable_audio"] = [
@@ -440,12 +471,14 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
440
471
  except OptionalDependencyNotAvailable:
441
472
  from ..utils.dummy_torch_and_transformers_objects import *
442
473
  else:
474
+ from .allegro import AllegroPipeline
443
475
  from .amused import AmusedImg2ImgPipeline, AmusedInpaintPipeline, AmusedPipeline
444
476
  from .animatediff import (
445
477
  AnimateDiffControlNetPipeline,
446
478
  AnimateDiffPipeline,
447
479
  AnimateDiffSDXLPipeline,
448
480
  AnimateDiffSparseControlNetPipeline,
481
+ AnimateDiffVideoToVideoControlNetPipeline,
449
482
  AnimateDiffVideoToVideoPipeline,
450
483
  )
451
484
  from .audioldm import AudioLDMPipeline
@@ -456,7 +489,13 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
456
489
  )
457
490
  from .aura_flow import AuraFlowPipeline
458
491
  from .blip_diffusion import BlipDiffusionPipeline
459
- from .cogvideo import CogVideoXImageToVideoPipeline, CogVideoXPipeline, CogVideoXVideoToVideoPipeline
492
+ from .cogvideo import (
493
+ CogVideoXFunControlPipeline,
494
+ CogVideoXImageToVideoPipeline,
495
+ CogVideoXPipeline,
496
+ CogVideoXVideoToVideoPipeline,
497
+ )
498
+ from .cogview3 import CogView3PlusPipeline
460
499
  from .controlnet import (
461
500
  BlipDiffusionControlNetPipeline,
462
501
  StableDiffusionControlNetImg2ImgPipeline,
@@ -465,13 +504,14 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
465
504
  StableDiffusionXLControlNetImg2ImgPipeline,
466
505
  StableDiffusionXLControlNetInpaintPipeline,
467
506
  StableDiffusionXLControlNetPipeline,
507
+ StableDiffusionXLControlNetUnionImg2ImgPipeline,
508
+ StableDiffusionXLControlNetUnionInpaintPipeline,
509
+ StableDiffusionXLControlNetUnionPipeline,
468
510
  )
469
511
  from .controlnet_hunyuandit import (
470
512
  HunyuanDiTControlNetPipeline,
471
513
  )
472
- from .controlnet_sd3 import (
473
- StableDiffusion3ControlNetPipeline,
474
- )
514
+ from .controlnet_sd3 import StableDiffusion3ControlNetInpaintingPipeline, StableDiffusion3ControlNetPipeline
475
515
  from .controlnet_xs import (
476
516
  StableDiffusionControlNetXSPipeline,
477
517
  StableDiffusionXLControlNetXSPipeline,
@@ -498,7 +538,21 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
498
538
  VersatileDiffusionTextToImagePipeline,
499
539
  VQDiffusionPipeline,
500
540
  )
501
- from .flux import FluxPipeline
541
+ from .flux import (
542
+ FluxControlImg2ImgPipeline,
543
+ FluxControlInpaintPipeline,
544
+ FluxControlNetImg2ImgPipeline,
545
+ FluxControlNetInpaintPipeline,
546
+ FluxControlNetPipeline,
547
+ FluxControlPipeline,
548
+ FluxFillPipeline,
549
+ FluxImg2ImgPipeline,
550
+ FluxInpaintPipeline,
551
+ FluxPipeline,
552
+ FluxPriorReduxPipeline,
553
+ ReduxImageEncoder,
554
+ )
555
+ from .hunyuan_video import HunyuanVideoPipeline
502
556
  from .hunyuandit import HunyuanDiTPipeline
503
557
  from .i2vgen_xl import I2VGenXLPipeline
504
558
  from .kandinsky import (
@@ -538,20 +592,28 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
538
592
  LEditsPPPipelineStableDiffusion,
539
593
  LEditsPPPipelineStableDiffusionXL,
540
594
  )
595
+ from .ltx import LTXImageToVideoPipeline, LTXPipeline
541
596
  from .lumina import LuminaText2ImgPipeline
542
597
  from .marigold import (
543
598
  MarigoldDepthPipeline,
544
599
  MarigoldNormalsPipeline,
545
600
  )
601
+ from .mochi import MochiPipeline
546
602
  from .musicldm import MusicLDMPipeline
547
603
  from .pag import (
548
604
  AnimateDiffPAGPipeline,
549
605
  HunyuanDiTPAGPipeline,
550
606
  KolorsPAGPipeline,
551
607
  PixArtSigmaPAGPipeline,
608
+ SanaPAGPipeline,
609
+ StableDiffusion3PAGImg2ImgPipeline,
552
610
  StableDiffusion3PAGPipeline,
611
+ StableDiffusionControlNetPAGInpaintPipeline,
553
612
  StableDiffusionControlNetPAGPipeline,
613
+ StableDiffusionPAGImg2ImgPipeline,
614
+ StableDiffusionPAGInpaintPipeline,
554
615
  StableDiffusionPAGPipeline,
616
+ StableDiffusionXLControlNetPAGImg2ImgPipeline,
555
617
  StableDiffusionXLControlNetPAGPipeline,
556
618
  StableDiffusionXLPAGImg2ImgPipeline,
557
619
  StableDiffusionXLPAGInpaintPipeline,
@@ -560,6 +622,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
560
622
  from .paint_by_example import PaintByExamplePipeline
561
623
  from .pia import PIAPipeline
562
624
  from .pixart_alpha import PixArtAlphaPipeline, PixArtSigmaPipeline
625
+ from .sana import SanaPipeline
563
626
  from .semantic_stable_diffusion import SemanticStableDiffusionPipeline
564
627
  from .shap_e import ShapEImg2ImgPipeline, ShapEPipeline
565
628
  from .stable_audio import StableAudioPipeline, StableAudioProjectionModel
@@ -0,0 +1,48 @@
1
+ from typing import TYPE_CHECKING
2
+
3
+ from ...utils import (
4
+ DIFFUSERS_SLOW_IMPORT,
5
+ OptionalDependencyNotAvailable,
6
+ _LazyModule,
7
+ get_objects_from_module,
8
+ is_torch_available,
9
+ is_transformers_available,
10
+ )
11
+
12
+
13
+ _dummy_objects = {}
14
+ _import_structure = {}
15
+
16
+
17
+ try:
18
+ if not (is_transformers_available() and is_torch_available()):
19
+ raise OptionalDependencyNotAvailable()
20
+ except OptionalDependencyNotAvailable:
21
+ from ...utils import dummy_torch_and_transformers_objects # noqa F403
22
+
23
+ _dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
24
+ else:
25
+ _import_structure["pipeline_allegro"] = ["AllegroPipeline"]
26
+
27
+ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
28
+ try:
29
+ if not (is_transformers_available() and is_torch_available()):
30
+ raise OptionalDependencyNotAvailable()
31
+
32
+ except OptionalDependencyNotAvailable:
33
+ from ...utils.dummy_torch_and_transformers_objects import *
34
+ else:
35
+ from .pipeline_allegro import AllegroPipeline
36
+
37
+ else:
38
+ import sys
39
+
40
+ sys.modules[__name__] = _LazyModule(
41
+ __name__,
42
+ globals()["__file__"],
43
+ _import_structure,
44
+ module_spec=__spec__,
45
+ )
46
+
47
+ for name, value in _dummy_objects.items():
48
+ setattr(sys.modules[__name__], name, value)