diffusers 0.33.1__py3-none-any.whl → 0.35.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (551) hide show
  1. diffusers/__init__.py +145 -1
  2. diffusers/callbacks.py +35 -0
  3. diffusers/commands/__init__.py +1 -1
  4. diffusers/commands/custom_blocks.py +134 -0
  5. diffusers/commands/diffusers_cli.py +3 -1
  6. diffusers/commands/env.py +1 -1
  7. diffusers/commands/fp16_safetensors.py +2 -2
  8. diffusers/configuration_utils.py +11 -2
  9. diffusers/dependency_versions_check.py +1 -1
  10. diffusers/dependency_versions_table.py +3 -3
  11. diffusers/experimental/rl/value_guided_sampling.py +1 -1
  12. diffusers/guiders/__init__.py +41 -0
  13. diffusers/guiders/adaptive_projected_guidance.py +188 -0
  14. diffusers/guiders/auto_guidance.py +190 -0
  15. diffusers/guiders/classifier_free_guidance.py +141 -0
  16. diffusers/guiders/classifier_free_zero_star_guidance.py +152 -0
  17. diffusers/guiders/frequency_decoupled_guidance.py +327 -0
  18. diffusers/guiders/guider_utils.py +309 -0
  19. diffusers/guiders/perturbed_attention_guidance.py +271 -0
  20. diffusers/guiders/skip_layer_guidance.py +262 -0
  21. diffusers/guiders/smoothed_energy_guidance.py +251 -0
  22. diffusers/guiders/tangential_classifier_free_guidance.py +143 -0
  23. diffusers/hooks/__init__.py +17 -0
  24. diffusers/hooks/_common.py +56 -0
  25. diffusers/hooks/_helpers.py +293 -0
  26. diffusers/hooks/faster_cache.py +9 -8
  27. diffusers/hooks/first_block_cache.py +259 -0
  28. diffusers/hooks/group_offloading.py +332 -227
  29. diffusers/hooks/hooks.py +58 -3
  30. diffusers/hooks/layer_skip.py +263 -0
  31. diffusers/hooks/layerwise_casting.py +5 -10
  32. diffusers/hooks/pyramid_attention_broadcast.py +15 -12
  33. diffusers/hooks/smoothed_energy_guidance_utils.py +167 -0
  34. diffusers/hooks/utils.py +43 -0
  35. diffusers/image_processor.py +7 -2
  36. diffusers/loaders/__init__.py +10 -0
  37. diffusers/loaders/ip_adapter.py +260 -18
  38. diffusers/loaders/lora_base.py +261 -127
  39. diffusers/loaders/lora_conversion_utils.py +657 -35
  40. diffusers/loaders/lora_pipeline.py +2778 -1246
  41. diffusers/loaders/peft.py +78 -112
  42. diffusers/loaders/single_file.py +2 -2
  43. diffusers/loaders/single_file_model.py +64 -15
  44. diffusers/loaders/single_file_utils.py +395 -7
  45. diffusers/loaders/textual_inversion.py +3 -2
  46. diffusers/loaders/transformer_flux.py +10 -11
  47. diffusers/loaders/transformer_sd3.py +8 -3
  48. diffusers/loaders/unet.py +24 -21
  49. diffusers/loaders/unet_loader_utils.py +6 -3
  50. diffusers/loaders/utils.py +1 -1
  51. diffusers/models/__init__.py +23 -1
  52. diffusers/models/activations.py +5 -5
  53. diffusers/models/adapter.py +2 -3
  54. diffusers/models/attention.py +488 -7
  55. diffusers/models/attention_dispatch.py +1218 -0
  56. diffusers/models/attention_flax.py +10 -10
  57. diffusers/models/attention_processor.py +113 -667
  58. diffusers/models/auto_model.py +49 -12
  59. diffusers/models/autoencoders/__init__.py +2 -0
  60. diffusers/models/autoencoders/autoencoder_asym_kl.py +4 -4
  61. diffusers/models/autoencoders/autoencoder_dc.py +17 -4
  62. diffusers/models/autoencoders/autoencoder_kl.py +5 -5
  63. diffusers/models/autoencoders/autoencoder_kl_allegro.py +4 -4
  64. diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +6 -6
  65. diffusers/models/autoencoders/autoencoder_kl_cosmos.py +1110 -0
  66. diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py +2 -2
  67. diffusers/models/autoencoders/autoencoder_kl_ltx.py +3 -3
  68. diffusers/models/autoencoders/autoencoder_kl_magvit.py +4 -4
  69. diffusers/models/autoencoders/autoencoder_kl_mochi.py +3 -3
  70. diffusers/models/autoencoders/autoencoder_kl_qwenimage.py +1070 -0
  71. diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +4 -4
  72. diffusers/models/autoencoders/autoencoder_kl_wan.py +626 -62
  73. diffusers/models/autoencoders/autoencoder_oobleck.py +1 -1
  74. diffusers/models/autoencoders/autoencoder_tiny.py +3 -3
  75. diffusers/models/autoencoders/consistency_decoder_vae.py +1 -1
  76. diffusers/models/autoencoders/vae.py +13 -2
  77. diffusers/models/autoencoders/vq_model.py +2 -2
  78. diffusers/models/cache_utils.py +32 -10
  79. diffusers/models/controlnet.py +1 -1
  80. diffusers/models/controlnet_flux.py +1 -1
  81. diffusers/models/controlnet_sd3.py +1 -1
  82. diffusers/models/controlnet_sparsectrl.py +1 -1
  83. diffusers/models/controlnets/__init__.py +1 -0
  84. diffusers/models/controlnets/controlnet.py +3 -3
  85. diffusers/models/controlnets/controlnet_flax.py +1 -1
  86. diffusers/models/controlnets/controlnet_flux.py +21 -20
  87. diffusers/models/controlnets/controlnet_hunyuan.py +2 -2
  88. diffusers/models/controlnets/controlnet_sana.py +290 -0
  89. diffusers/models/controlnets/controlnet_sd3.py +1 -1
  90. diffusers/models/controlnets/controlnet_sparsectrl.py +2 -2
  91. diffusers/models/controlnets/controlnet_union.py +5 -5
  92. diffusers/models/controlnets/controlnet_xs.py +7 -7
  93. diffusers/models/controlnets/multicontrolnet.py +4 -5
  94. diffusers/models/controlnets/multicontrolnet_union.py +5 -6
  95. diffusers/models/downsampling.py +2 -2
  96. diffusers/models/embeddings.py +36 -46
  97. diffusers/models/embeddings_flax.py +2 -2
  98. diffusers/models/lora.py +3 -3
  99. diffusers/models/model_loading_utils.py +233 -1
  100. diffusers/models/modeling_flax_utils.py +1 -2
  101. diffusers/models/modeling_utils.py +203 -108
  102. diffusers/models/normalization.py +4 -4
  103. diffusers/models/resnet.py +2 -2
  104. diffusers/models/resnet_flax.py +1 -1
  105. diffusers/models/transformers/__init__.py +7 -0
  106. diffusers/models/transformers/auraflow_transformer_2d.py +70 -24
  107. diffusers/models/transformers/cogvideox_transformer_3d.py +1 -1
  108. diffusers/models/transformers/consisid_transformer_3d.py +1 -1
  109. diffusers/models/transformers/dit_transformer_2d.py +2 -2
  110. diffusers/models/transformers/dual_transformer_2d.py +1 -1
  111. diffusers/models/transformers/hunyuan_transformer_2d.py +2 -2
  112. diffusers/models/transformers/latte_transformer_3d.py +4 -5
  113. diffusers/models/transformers/lumina_nextdit2d.py +2 -2
  114. diffusers/models/transformers/pixart_transformer_2d.py +3 -3
  115. diffusers/models/transformers/prior_transformer.py +1 -1
  116. diffusers/models/transformers/sana_transformer.py +8 -3
  117. diffusers/models/transformers/stable_audio_transformer.py +5 -9
  118. diffusers/models/transformers/t5_film_transformer.py +3 -3
  119. diffusers/models/transformers/transformer_2d.py +1 -1
  120. diffusers/models/transformers/transformer_allegro.py +1 -1
  121. diffusers/models/transformers/transformer_chroma.py +641 -0
  122. diffusers/models/transformers/transformer_cogview3plus.py +5 -10
  123. diffusers/models/transformers/transformer_cogview4.py +353 -27
  124. diffusers/models/transformers/transformer_cosmos.py +586 -0
  125. diffusers/models/transformers/transformer_flux.py +376 -138
  126. diffusers/models/transformers/transformer_hidream_image.py +942 -0
  127. diffusers/models/transformers/transformer_hunyuan_video.py +12 -8
  128. diffusers/models/transformers/transformer_hunyuan_video_framepack.py +416 -0
  129. diffusers/models/transformers/transformer_ltx.py +105 -24
  130. diffusers/models/transformers/transformer_lumina2.py +1 -1
  131. diffusers/models/transformers/transformer_mochi.py +1 -1
  132. diffusers/models/transformers/transformer_omnigen.py +2 -2
  133. diffusers/models/transformers/transformer_qwenimage.py +645 -0
  134. diffusers/models/transformers/transformer_sd3.py +7 -7
  135. diffusers/models/transformers/transformer_skyreels_v2.py +607 -0
  136. diffusers/models/transformers/transformer_temporal.py +1 -1
  137. diffusers/models/transformers/transformer_wan.py +316 -87
  138. diffusers/models/transformers/transformer_wan_vace.py +387 -0
  139. diffusers/models/unets/unet_1d.py +1 -1
  140. diffusers/models/unets/unet_1d_blocks.py +1 -1
  141. diffusers/models/unets/unet_2d.py +1 -1
  142. diffusers/models/unets/unet_2d_blocks.py +1 -1
  143. diffusers/models/unets/unet_2d_blocks_flax.py +8 -7
  144. diffusers/models/unets/unet_2d_condition.py +4 -3
  145. diffusers/models/unets/unet_2d_condition_flax.py +2 -2
  146. diffusers/models/unets/unet_3d_blocks.py +1 -1
  147. diffusers/models/unets/unet_3d_condition.py +3 -3
  148. diffusers/models/unets/unet_i2vgen_xl.py +3 -3
  149. diffusers/models/unets/unet_kandinsky3.py +1 -1
  150. diffusers/models/unets/unet_motion_model.py +2 -2
  151. diffusers/models/unets/unet_stable_cascade.py +1 -1
  152. diffusers/models/upsampling.py +2 -2
  153. diffusers/models/vae_flax.py +2 -2
  154. diffusers/models/vq_model.py +1 -1
  155. diffusers/modular_pipelines/__init__.py +83 -0
  156. diffusers/modular_pipelines/components_manager.py +1068 -0
  157. diffusers/modular_pipelines/flux/__init__.py +66 -0
  158. diffusers/modular_pipelines/flux/before_denoise.py +689 -0
  159. diffusers/modular_pipelines/flux/decoders.py +109 -0
  160. diffusers/modular_pipelines/flux/denoise.py +227 -0
  161. diffusers/modular_pipelines/flux/encoders.py +412 -0
  162. diffusers/modular_pipelines/flux/modular_blocks.py +181 -0
  163. diffusers/modular_pipelines/flux/modular_pipeline.py +59 -0
  164. diffusers/modular_pipelines/modular_pipeline.py +2446 -0
  165. diffusers/modular_pipelines/modular_pipeline_utils.py +672 -0
  166. diffusers/modular_pipelines/node_utils.py +665 -0
  167. diffusers/modular_pipelines/stable_diffusion_xl/__init__.py +77 -0
  168. diffusers/modular_pipelines/stable_diffusion_xl/before_denoise.py +1874 -0
  169. diffusers/modular_pipelines/stable_diffusion_xl/decoders.py +208 -0
  170. diffusers/modular_pipelines/stable_diffusion_xl/denoise.py +771 -0
  171. diffusers/modular_pipelines/stable_diffusion_xl/encoders.py +887 -0
  172. diffusers/modular_pipelines/stable_diffusion_xl/modular_blocks.py +380 -0
  173. diffusers/modular_pipelines/stable_diffusion_xl/modular_pipeline.py +365 -0
  174. diffusers/modular_pipelines/wan/__init__.py +66 -0
  175. diffusers/modular_pipelines/wan/before_denoise.py +365 -0
  176. diffusers/modular_pipelines/wan/decoders.py +105 -0
  177. diffusers/modular_pipelines/wan/denoise.py +261 -0
  178. diffusers/modular_pipelines/wan/encoders.py +242 -0
  179. diffusers/modular_pipelines/wan/modular_blocks.py +144 -0
  180. diffusers/modular_pipelines/wan/modular_pipeline.py +90 -0
  181. diffusers/pipelines/__init__.py +68 -6
  182. diffusers/pipelines/allegro/pipeline_allegro.py +11 -11
  183. diffusers/pipelines/amused/pipeline_amused.py +7 -6
  184. diffusers/pipelines/amused/pipeline_amused_img2img.py +6 -5
  185. diffusers/pipelines/amused/pipeline_amused_inpaint.py +6 -5
  186. diffusers/pipelines/animatediff/pipeline_animatediff.py +6 -6
  187. diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +6 -6
  188. diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +16 -15
  189. diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +6 -6
  190. diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +5 -5
  191. diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +5 -5
  192. diffusers/pipelines/audioldm/pipeline_audioldm.py +8 -7
  193. diffusers/pipelines/audioldm2/modeling_audioldm2.py +1 -1
  194. diffusers/pipelines/audioldm2/pipeline_audioldm2.py +22 -13
  195. diffusers/pipelines/aura_flow/pipeline_aura_flow.py +48 -11
  196. diffusers/pipelines/auto_pipeline.py +23 -20
  197. diffusers/pipelines/blip_diffusion/modeling_blip2.py +1 -1
  198. diffusers/pipelines/blip_diffusion/modeling_ctx_clip.py +2 -2
  199. diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +11 -10
  200. diffusers/pipelines/chroma/__init__.py +49 -0
  201. diffusers/pipelines/chroma/pipeline_chroma.py +949 -0
  202. diffusers/pipelines/chroma/pipeline_chroma_img2img.py +1034 -0
  203. diffusers/pipelines/chroma/pipeline_output.py +21 -0
  204. diffusers/pipelines/cogvideo/pipeline_cogvideox.py +17 -16
  205. diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +17 -16
  206. diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +18 -17
  207. diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +17 -16
  208. diffusers/pipelines/cogview3/pipeline_cogview3plus.py +9 -9
  209. diffusers/pipelines/cogview4/pipeline_cogview4.py +23 -22
  210. diffusers/pipelines/cogview4/pipeline_cogview4_control.py +7 -7
  211. diffusers/pipelines/consisid/consisid_utils.py +2 -2
  212. diffusers/pipelines/consisid/pipeline_consisid.py +8 -8
  213. diffusers/pipelines/consistency_models/pipeline_consistency_models.py +1 -1
  214. diffusers/pipelines/controlnet/pipeline_controlnet.py +7 -7
  215. diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +11 -10
  216. diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +7 -7
  217. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +7 -7
  218. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +14 -14
  219. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +10 -6
  220. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +13 -13
  221. diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +226 -107
  222. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +12 -8
  223. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +207 -105
  224. diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +1 -1
  225. diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +8 -8
  226. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +7 -7
  227. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +7 -7
  228. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +12 -10
  229. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +9 -7
  230. diffusers/pipelines/cosmos/__init__.py +54 -0
  231. diffusers/pipelines/cosmos/pipeline_cosmos2_text2image.py +673 -0
  232. diffusers/pipelines/cosmos/pipeline_cosmos2_video2world.py +792 -0
  233. diffusers/pipelines/cosmos/pipeline_cosmos_text2world.py +664 -0
  234. diffusers/pipelines/cosmos/pipeline_cosmos_video2world.py +826 -0
  235. diffusers/pipelines/cosmos/pipeline_output.py +40 -0
  236. diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py +5 -4
  237. diffusers/pipelines/ddim/pipeline_ddim.py +4 -4
  238. diffusers/pipelines/ddpm/pipeline_ddpm.py +1 -1
  239. diffusers/pipelines/deepfloyd_if/pipeline_if.py +10 -10
  240. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +10 -10
  241. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +10 -10
  242. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +10 -10
  243. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +10 -10
  244. diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +10 -10
  245. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +8 -8
  246. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +5 -5
  247. diffusers/pipelines/deprecated/audio_diffusion/mel.py +1 -1
  248. diffusers/pipelines/deprecated/audio_diffusion/pipeline_audio_diffusion.py +3 -3
  249. diffusers/pipelines/deprecated/latent_diffusion_uncond/pipeline_latent_diffusion_uncond.py +1 -1
  250. diffusers/pipelines/deprecated/pndm/pipeline_pndm.py +2 -2
  251. diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +4 -3
  252. diffusers/pipelines/deprecated/score_sde_ve/pipeline_score_sde_ve.py +1 -1
  253. diffusers/pipelines/deprecated/spectrogram_diffusion/continuous_encoder.py +1 -1
  254. diffusers/pipelines/deprecated/spectrogram_diffusion/midi_utils.py +1 -1
  255. diffusers/pipelines/deprecated/spectrogram_diffusion/notes_encoder.py +1 -1
  256. diffusers/pipelines/deprecated/spectrogram_diffusion/pipeline_spectrogram_diffusion.py +1 -1
  257. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +8 -8
  258. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_onnx_stable_diffusion_inpaint_legacy.py +9 -9
  259. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +10 -10
  260. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +10 -8
  261. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +5 -5
  262. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +18 -18
  263. diffusers/pipelines/deprecated/stochastic_karras_ve/pipeline_stochastic_karras_ve.py +1 -1
  264. diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +2 -2
  265. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +6 -6
  266. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +5 -5
  267. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +5 -5
  268. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +5 -5
  269. diffusers/pipelines/deprecated/vq_diffusion/pipeline_vq_diffusion.py +1 -1
  270. diffusers/pipelines/dit/pipeline_dit.py +4 -2
  271. diffusers/pipelines/easyanimate/pipeline_easyanimate.py +4 -4
  272. diffusers/pipelines/easyanimate/pipeline_easyanimate_control.py +4 -4
  273. diffusers/pipelines/easyanimate/pipeline_easyanimate_inpaint.py +7 -6
  274. diffusers/pipelines/flux/__init__.py +4 -0
  275. diffusers/pipelines/flux/modeling_flux.py +1 -1
  276. diffusers/pipelines/flux/pipeline_flux.py +37 -36
  277. diffusers/pipelines/flux/pipeline_flux_control.py +9 -9
  278. diffusers/pipelines/flux/pipeline_flux_control_img2img.py +7 -7
  279. diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +7 -7
  280. diffusers/pipelines/flux/pipeline_flux_controlnet.py +7 -7
  281. diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +31 -23
  282. diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +3 -2
  283. diffusers/pipelines/flux/pipeline_flux_fill.py +7 -7
  284. diffusers/pipelines/flux/pipeline_flux_img2img.py +40 -7
  285. diffusers/pipelines/flux/pipeline_flux_inpaint.py +12 -7
  286. diffusers/pipelines/flux/pipeline_flux_kontext.py +1134 -0
  287. diffusers/pipelines/flux/pipeline_flux_kontext_inpaint.py +1460 -0
  288. diffusers/pipelines/flux/pipeline_flux_prior_redux.py +2 -2
  289. diffusers/pipelines/flux/pipeline_output.py +6 -4
  290. diffusers/pipelines/free_init_utils.py +2 -2
  291. diffusers/pipelines/free_noise_utils.py +3 -3
  292. diffusers/pipelines/hidream_image/__init__.py +47 -0
  293. diffusers/pipelines/hidream_image/pipeline_hidream_image.py +1026 -0
  294. diffusers/pipelines/hidream_image/pipeline_output.py +35 -0
  295. diffusers/pipelines/hunyuan_video/__init__.py +2 -0
  296. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_skyreels_image2video.py +8 -8
  297. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +26 -25
  298. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_framepack.py +1114 -0
  299. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_image2video.py +71 -15
  300. diffusers/pipelines/hunyuan_video/pipeline_output.py +19 -0
  301. diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +8 -8
  302. diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +10 -8
  303. diffusers/pipelines/kandinsky/pipeline_kandinsky.py +6 -6
  304. diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +34 -34
  305. diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +19 -26
  306. diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +7 -7
  307. diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +11 -11
  308. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +6 -6
  309. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +35 -35
  310. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +6 -6
  311. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +17 -39
  312. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +17 -45
  313. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +7 -7
  314. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +10 -10
  315. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +10 -10
  316. diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +7 -7
  317. diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +17 -38
  318. diffusers/pipelines/kolors/pipeline_kolors.py +10 -10
  319. diffusers/pipelines/kolors/pipeline_kolors_img2img.py +12 -12
  320. diffusers/pipelines/kolors/text_encoder.py +3 -3
  321. diffusers/pipelines/kolors/tokenizer.py +1 -1
  322. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +2 -2
  323. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +2 -2
  324. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +1 -1
  325. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion_superresolution.py +3 -3
  326. diffusers/pipelines/latte/pipeline_latte.py +12 -12
  327. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +13 -13
  328. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +17 -16
  329. diffusers/pipelines/ltx/__init__.py +4 -0
  330. diffusers/pipelines/ltx/modeling_latent_upsampler.py +188 -0
  331. diffusers/pipelines/ltx/pipeline_ltx.py +64 -18
  332. diffusers/pipelines/ltx/pipeline_ltx_condition.py +117 -38
  333. diffusers/pipelines/ltx/pipeline_ltx_image2video.py +63 -18
  334. diffusers/pipelines/ltx/pipeline_ltx_latent_upsample.py +277 -0
  335. diffusers/pipelines/lumina/pipeline_lumina.py +13 -13
  336. diffusers/pipelines/lumina2/pipeline_lumina2.py +10 -10
  337. diffusers/pipelines/marigold/marigold_image_processing.py +2 -2
  338. diffusers/pipelines/mochi/pipeline_mochi.py +15 -14
  339. diffusers/pipelines/musicldm/pipeline_musicldm.py +16 -13
  340. diffusers/pipelines/omnigen/pipeline_omnigen.py +13 -11
  341. diffusers/pipelines/omnigen/processor_omnigen.py +8 -3
  342. diffusers/pipelines/onnx_utils.py +15 -2
  343. diffusers/pipelines/pag/pag_utils.py +2 -2
  344. diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +12 -8
  345. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +7 -7
  346. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +10 -6
  347. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +14 -14
  348. diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +8 -8
  349. diffusers/pipelines/pag/pipeline_pag_kolors.py +10 -10
  350. diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +11 -11
  351. diffusers/pipelines/pag/pipeline_pag_sana.py +18 -12
  352. diffusers/pipelines/pag/pipeline_pag_sd.py +8 -8
  353. diffusers/pipelines/pag/pipeline_pag_sd_3.py +7 -7
  354. diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py +7 -7
  355. diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +6 -6
  356. diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +5 -5
  357. diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py +8 -8
  358. diffusers/pipelines/pag/pipeline_pag_sd_xl.py +16 -15
  359. diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +18 -17
  360. diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +12 -12
  361. diffusers/pipelines/paint_by_example/image_encoder.py +1 -1
  362. diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +8 -7
  363. diffusers/pipelines/pia/pipeline_pia.py +8 -6
  364. diffusers/pipelines/pipeline_flax_utils.py +5 -6
  365. diffusers/pipelines/pipeline_loading_utils.py +113 -15
  366. diffusers/pipelines/pipeline_utils.py +127 -48
  367. diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +14 -12
  368. diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +31 -11
  369. diffusers/pipelines/qwenimage/__init__.py +55 -0
  370. diffusers/pipelines/qwenimage/pipeline_output.py +21 -0
  371. diffusers/pipelines/qwenimage/pipeline_qwenimage.py +726 -0
  372. diffusers/pipelines/qwenimage/pipeline_qwenimage_edit.py +882 -0
  373. diffusers/pipelines/qwenimage/pipeline_qwenimage_img2img.py +829 -0
  374. diffusers/pipelines/qwenimage/pipeline_qwenimage_inpaint.py +1015 -0
  375. diffusers/pipelines/sana/__init__.py +4 -0
  376. diffusers/pipelines/sana/pipeline_sana.py +23 -21
  377. diffusers/pipelines/sana/pipeline_sana_controlnet.py +1106 -0
  378. diffusers/pipelines/sana/pipeline_sana_sprint.py +23 -19
  379. diffusers/pipelines/sana/pipeline_sana_sprint_img2img.py +981 -0
  380. diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +7 -6
  381. diffusers/pipelines/shap_e/camera.py +1 -1
  382. diffusers/pipelines/shap_e/pipeline_shap_e.py +1 -1
  383. diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +1 -1
  384. diffusers/pipelines/shap_e/renderer.py +3 -3
  385. diffusers/pipelines/skyreels_v2/__init__.py +59 -0
  386. diffusers/pipelines/skyreels_v2/pipeline_output.py +20 -0
  387. diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2.py +610 -0
  388. diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_diffusion_forcing.py +978 -0
  389. diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_diffusion_forcing_i2v.py +1059 -0
  390. diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_diffusion_forcing_v2v.py +1063 -0
  391. diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_i2v.py +745 -0
  392. diffusers/pipelines/stable_audio/modeling_stable_audio.py +1 -1
  393. diffusers/pipelines/stable_audio/pipeline_stable_audio.py +5 -5
  394. diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +8 -8
  395. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +13 -13
  396. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +9 -9
  397. diffusers/pipelines/stable_diffusion/__init__.py +0 -7
  398. diffusers/pipelines/stable_diffusion/clip_image_project_model.py +1 -1
  399. diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +11 -4
  400. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +1 -1
  401. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_img2img.py +1 -1
  402. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_inpaint.py +1 -1
  403. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +12 -11
  404. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py +10 -10
  405. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +11 -11
  406. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +10 -10
  407. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +10 -9
  408. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +5 -5
  409. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +5 -5
  410. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +5 -5
  411. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +5 -5
  412. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +5 -5
  413. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +4 -4
  414. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +5 -5
  415. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +7 -7
  416. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +5 -5
  417. diffusers/pipelines/stable_diffusion/safety_checker.py +1 -1
  418. diffusers/pipelines/stable_diffusion/safety_checker_flax.py +1 -1
  419. diffusers/pipelines/stable_diffusion/stable_unclip_image_normalizer.py +1 -1
  420. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +13 -12
  421. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +7 -7
  422. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +7 -7
  423. diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +12 -8
  424. diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +15 -9
  425. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +11 -9
  426. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +11 -9
  427. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +18 -12
  428. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +11 -8
  429. diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +11 -8
  430. diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +15 -12
  431. diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +8 -6
  432. diffusers/pipelines/stable_diffusion_safe/safety_checker.py +1 -1
  433. diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +15 -11
  434. diffusers/pipelines/stable_diffusion_xl/pipeline_flax_stable_diffusion_xl.py +1 -1
  435. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +16 -15
  436. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +18 -17
  437. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +12 -12
  438. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +16 -15
  439. diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +3 -3
  440. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +12 -12
  441. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +18 -17
  442. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +12 -7
  443. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +12 -7
  444. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +15 -13
  445. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +24 -21
  446. diffusers/pipelines/unclip/pipeline_unclip.py +4 -3
  447. diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +4 -3
  448. diffusers/pipelines/unclip/text_proj.py +2 -2
  449. diffusers/pipelines/unidiffuser/modeling_text_decoder.py +2 -2
  450. diffusers/pipelines/unidiffuser/modeling_uvit.py +1 -1
  451. diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +8 -7
  452. diffusers/pipelines/visualcloze/__init__.py +52 -0
  453. diffusers/pipelines/visualcloze/pipeline_visualcloze_combined.py +444 -0
  454. diffusers/pipelines/visualcloze/pipeline_visualcloze_generation.py +952 -0
  455. diffusers/pipelines/visualcloze/visualcloze_utils.py +251 -0
  456. diffusers/pipelines/wan/__init__.py +2 -0
  457. diffusers/pipelines/wan/pipeline_wan.py +91 -30
  458. diffusers/pipelines/wan/pipeline_wan_i2v.py +145 -45
  459. diffusers/pipelines/wan/pipeline_wan_vace.py +975 -0
  460. diffusers/pipelines/wan/pipeline_wan_video2video.py +14 -16
  461. diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py +1 -1
  462. diffusers/pipelines/wuerstchen/modeling_wuerstchen_diffnext.py +1 -1
  463. diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +1 -1
  464. diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +8 -8
  465. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_combined.py +16 -15
  466. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +6 -6
  467. diffusers/quantizers/__init__.py +3 -1
  468. diffusers/quantizers/base.py +17 -1
  469. diffusers/quantizers/bitsandbytes/bnb_quantizer.py +4 -0
  470. diffusers/quantizers/bitsandbytes/utils.py +10 -7
  471. diffusers/quantizers/gguf/gguf_quantizer.py +13 -4
  472. diffusers/quantizers/gguf/utils.py +108 -16
  473. diffusers/quantizers/pipe_quant_config.py +202 -0
  474. diffusers/quantizers/quantization_config.py +18 -16
  475. diffusers/quantizers/quanto/quanto_quantizer.py +4 -0
  476. diffusers/quantizers/torchao/torchao_quantizer.py +31 -1
  477. diffusers/schedulers/__init__.py +3 -1
  478. diffusers/schedulers/deprecated/scheduling_karras_ve.py +4 -3
  479. diffusers/schedulers/deprecated/scheduling_sde_vp.py +1 -1
  480. diffusers/schedulers/scheduling_consistency_models.py +1 -1
  481. diffusers/schedulers/scheduling_cosine_dpmsolver_multistep.py +10 -5
  482. diffusers/schedulers/scheduling_ddim.py +8 -8
  483. diffusers/schedulers/scheduling_ddim_cogvideox.py +5 -5
  484. diffusers/schedulers/scheduling_ddim_flax.py +6 -6
  485. diffusers/schedulers/scheduling_ddim_inverse.py +6 -6
  486. diffusers/schedulers/scheduling_ddim_parallel.py +22 -22
  487. diffusers/schedulers/scheduling_ddpm.py +9 -9
  488. diffusers/schedulers/scheduling_ddpm_flax.py +7 -7
  489. diffusers/schedulers/scheduling_ddpm_parallel.py +18 -18
  490. diffusers/schedulers/scheduling_ddpm_wuerstchen.py +2 -2
  491. diffusers/schedulers/scheduling_deis_multistep.py +16 -9
  492. diffusers/schedulers/scheduling_dpm_cogvideox.py +5 -5
  493. diffusers/schedulers/scheduling_dpmsolver_multistep.py +18 -12
  494. diffusers/schedulers/scheduling_dpmsolver_multistep_flax.py +22 -20
  495. diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +11 -11
  496. diffusers/schedulers/scheduling_dpmsolver_sde.py +2 -2
  497. diffusers/schedulers/scheduling_dpmsolver_singlestep.py +19 -13
  498. diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py +13 -8
  499. diffusers/schedulers/scheduling_edm_euler.py +20 -11
  500. diffusers/schedulers/scheduling_euler_ancestral_discrete.py +3 -3
  501. diffusers/schedulers/scheduling_euler_discrete.py +3 -3
  502. diffusers/schedulers/scheduling_euler_discrete_flax.py +3 -3
  503. diffusers/schedulers/scheduling_flow_match_euler_discrete.py +20 -5
  504. diffusers/schedulers/scheduling_flow_match_heun_discrete.py +1 -1
  505. diffusers/schedulers/scheduling_flow_match_lcm.py +561 -0
  506. diffusers/schedulers/scheduling_heun_discrete.py +2 -2
  507. diffusers/schedulers/scheduling_ipndm.py +2 -2
  508. diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +2 -2
  509. diffusers/schedulers/scheduling_k_dpm_2_discrete.py +2 -2
  510. diffusers/schedulers/scheduling_karras_ve_flax.py +5 -5
  511. diffusers/schedulers/scheduling_lcm.py +3 -3
  512. diffusers/schedulers/scheduling_lms_discrete.py +2 -2
  513. diffusers/schedulers/scheduling_lms_discrete_flax.py +1 -1
  514. diffusers/schedulers/scheduling_pndm.py +4 -4
  515. diffusers/schedulers/scheduling_pndm_flax.py +4 -4
  516. diffusers/schedulers/scheduling_repaint.py +9 -9
  517. diffusers/schedulers/scheduling_sasolver.py +15 -15
  518. diffusers/schedulers/scheduling_scm.py +1 -2
  519. diffusers/schedulers/scheduling_sde_ve.py +1 -1
  520. diffusers/schedulers/scheduling_sde_ve_flax.py +2 -2
  521. diffusers/schedulers/scheduling_tcd.py +3 -3
  522. diffusers/schedulers/scheduling_unclip.py +5 -5
  523. diffusers/schedulers/scheduling_unipc_multistep.py +21 -12
  524. diffusers/schedulers/scheduling_utils.py +3 -3
  525. diffusers/schedulers/scheduling_utils_flax.py +2 -2
  526. diffusers/schedulers/scheduling_vq_diffusion.py +1 -1
  527. diffusers/training_utils.py +91 -5
  528. diffusers/utils/__init__.py +15 -0
  529. diffusers/utils/accelerate_utils.py +1 -1
  530. diffusers/utils/constants.py +4 -0
  531. diffusers/utils/doc_utils.py +1 -1
  532. diffusers/utils/dummy_pt_objects.py +432 -0
  533. diffusers/utils/dummy_torch_and_transformers_objects.py +480 -0
  534. diffusers/utils/dynamic_modules_utils.py +85 -8
  535. diffusers/utils/export_utils.py +1 -1
  536. diffusers/utils/hub_utils.py +33 -17
  537. diffusers/utils/import_utils.py +151 -18
  538. diffusers/utils/logging.py +1 -1
  539. diffusers/utils/outputs.py +2 -1
  540. diffusers/utils/peft_utils.py +96 -10
  541. diffusers/utils/state_dict_utils.py +20 -3
  542. diffusers/utils/testing_utils.py +195 -17
  543. diffusers/utils/torch_utils.py +43 -5
  544. diffusers/video_processor.py +2 -2
  545. {diffusers-0.33.1.dist-info → diffusers-0.35.0.dist-info}/METADATA +72 -57
  546. diffusers-0.35.0.dist-info/RECORD +703 -0
  547. {diffusers-0.33.1.dist-info → diffusers-0.35.0.dist-info}/WHEEL +1 -1
  548. diffusers-0.33.1.dist-info/RECORD +0 -608
  549. {diffusers-0.33.1.dist-info → diffusers-0.35.0.dist-info}/LICENSE +0 -0
  550. {diffusers-0.33.1.dist-info → diffusers-0.35.0.dist-info}/entry_points.txt +0 -0
  551. {diffusers-0.33.1.dist-info → diffusers-0.35.0.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
1
- # Copyright 2024 The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -19,7 +19,6 @@ from typing import Any, Callable, Dict, List, Optional, Tuple, Union
19
19
  import numpy as np
20
20
  import PIL.Image
21
21
  import torch
22
- import torch.nn.functional as F
23
22
  from transformers import (
24
23
  CLIPImageProcessor,
25
24
  CLIPTextModel,
@@ -38,7 +37,13 @@ from ...loaders import (
38
37
  StableDiffusionXLLoraLoaderMixin,
39
38
  TextualInversionLoaderMixin,
40
39
  )
41
- from ...models import AutoencoderKL, ControlNetModel, ControlNetUnionModel, ImageProjection, UNet2DConditionModel
40
+ from ...models import (
41
+ AutoencoderKL,
42
+ ControlNetUnionModel,
43
+ ImageProjection,
44
+ MultiControlNetUnionModel,
45
+ UNet2DConditionModel,
46
+ )
42
47
  from ...models.attention_processor import (
43
48
  AttnProcessor2_0,
44
49
  XFormersAttnProcessor,
@@ -53,7 +58,7 @@ from ...utils import (
53
58
  scale_lora_layers,
54
59
  unscale_lora_layers,
55
60
  )
56
- from ...utils.torch_utils import is_compiled_module, randn_tensor
61
+ from ...utils.torch_utils import empty_device_cache, is_compiled_module, randn_tensor
57
62
  from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
58
63
  from ..stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput
59
64
 
@@ -262,7 +267,9 @@ class StableDiffusionXLControlNetUnionImg2ImgPipeline(
262
267
  tokenizer: CLIPTokenizer,
263
268
  tokenizer_2: CLIPTokenizer,
264
269
  unet: UNet2DConditionModel,
265
- controlnet: ControlNetUnionModel,
270
+ controlnet: Union[
271
+ ControlNetUnionModel, List[ControlNetUnionModel], Tuple[ControlNetUnionModel], MultiControlNetUnionModel
272
+ ],
266
273
  scheduler: KarrasDiffusionSchedulers,
267
274
  requires_aesthetics_score: bool = False,
268
275
  force_zeros_for_empty_prompt: bool = True,
@@ -272,8 +279,8 @@ class StableDiffusionXLControlNetUnionImg2ImgPipeline(
272
279
  ):
273
280
  super().__init__()
274
281
 
275
- if not isinstance(controlnet, ControlNetUnionModel):
276
- raise ValueError("Expected `controlnet` to be of type `ControlNetUnionModel`.")
282
+ if isinstance(controlnet, (list, tuple)):
283
+ controlnet = MultiControlNetUnionModel(controlnet)
277
284
 
278
285
  self.register_modules(
279
286
  vae=vae,
@@ -616,7 +623,7 @@ class StableDiffusionXLControlNetUnionImg2ImgPipeline(
616
623
  def prepare_extra_step_kwargs(self, generator, eta):
617
624
  # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
618
625
  # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
619
- # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
626
+ # eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
620
627
  # and should be between [0, 1]
621
628
 
622
629
  accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
@@ -649,6 +656,7 @@ class StableDiffusionXLControlNetUnionImg2ImgPipeline(
649
656
  controlnet_conditioning_scale=1.0,
650
657
  control_guidance_start=0.0,
651
658
  control_guidance_end=1.0,
659
+ control_mode=None,
652
660
  callback_on_step_end_tensor_inputs=None,
653
661
  ):
654
662
  if strength < 0 or strength > 1:
@@ -722,28 +730,44 @@ class StableDiffusionXLControlNetUnionImg2ImgPipeline(
722
730
  "If `negative_prompt_embeds` are provided, `negative_pooled_prompt_embeds` also have to be passed. Make sure to generate `negative_pooled_prompt_embeds` from the same text encoder that was used to generate `negative_prompt_embeds`."
723
731
  )
724
732
 
733
+ # `prompt` needs more sophisticated handling when there are multiple
734
+ # conditionings.
735
+ if isinstance(self.controlnet, MultiControlNetUnionModel):
736
+ if isinstance(prompt, list):
737
+ logger.warning(
738
+ f"You have {len(self.controlnet.nets)} ControlNets and you have passed {len(prompt)}"
739
+ " prompts. The conditionings will be fixed across the prompts."
740
+ )
741
+
725
742
  # Check `image`
726
- is_compiled = hasattr(F, "scaled_dot_product_attention") and isinstance(
727
- self.controlnet, torch._dynamo.eval_frame.OptimizedModule
728
- )
729
- if (
730
- isinstance(self.controlnet, ControlNetModel)
731
- or is_compiled
732
- and isinstance(self.controlnet._orig_mod, ControlNetModel)
733
- ):
734
- self.check_image(image, prompt, prompt_embeds)
735
- elif (
736
- isinstance(self.controlnet, ControlNetUnionModel)
737
- or is_compiled
738
- and isinstance(self.controlnet._orig_mod, ControlNetUnionModel)
739
- ):
740
- self.check_image(image, prompt, prompt_embeds)
741
- else:
742
- assert False
743
+ controlnet = self.controlnet._orig_mod if is_compiled_module(self.controlnet) else self.controlnet
744
+
745
+ if isinstance(controlnet, ControlNetUnionModel):
746
+ for image_ in image:
747
+ self.check_image(image_, prompt, prompt_embeds)
748
+ elif isinstance(controlnet, MultiControlNetUnionModel):
749
+ if not isinstance(image, list):
750
+ raise TypeError("For multiple controlnets: `image` must be type `list`")
751
+ elif not all(isinstance(i, list) for i in image):
752
+ raise ValueError("For multiple controlnets: elements of `image` must be list of conditionings.")
753
+ elif len(image) != len(self.controlnet.nets):
754
+ raise ValueError(
755
+ f"For multiple controlnets: `image` must have the same length as the number of controlnets, but got {len(image)} images and {len(self.controlnet.nets)} ControlNets."
756
+ )
757
+
758
+ for images_ in image:
759
+ for image_ in images_:
760
+ self.check_image(image_, prompt, prompt_embeds)
743
761
 
744
762
  if not isinstance(control_guidance_start, (tuple, list)):
745
763
  control_guidance_start = [control_guidance_start]
746
764
 
765
+ if isinstance(controlnet, MultiControlNetUnionModel):
766
+ if len(control_guidance_start) != len(self.controlnet.nets):
767
+ raise ValueError(
768
+ f"`control_guidance_start`: {control_guidance_start} has {len(control_guidance_start)} elements but there are {len(self.controlnet.nets)} controlnets available. Make sure to provide {len(self.controlnet.nets)}."
769
+ )
770
+
747
771
  if not isinstance(control_guidance_end, (tuple, list)):
748
772
  control_guidance_end = [control_guidance_end]
749
773
 
@@ -762,6 +786,15 @@ class StableDiffusionXLControlNetUnionImg2ImgPipeline(
762
786
  if end > 1.0:
763
787
  raise ValueError(f"control guidance end: {end} can't be larger than 1.0.")
764
788
 
789
+ # Check `control_mode`
790
+ if isinstance(controlnet, ControlNetUnionModel):
791
+ if max(control_mode) >= controlnet.config.num_control_type:
792
+ raise ValueError(f"control_mode: must be lower than {controlnet.config.num_control_type}.")
793
+ elif isinstance(controlnet, MultiControlNetUnionModel):
794
+ for _control_mode, _controlnet in zip(control_mode, self.controlnet.nets):
795
+ if max(_control_mode) >= _controlnet.config.num_control_type:
796
+ raise ValueError(f"control_mode: must be lower than {_controlnet.config.num_control_type}.")
797
+
765
798
  if ip_adapter_image is not None and ip_adapter_image_embeds is not None:
766
799
  raise ValueError(
767
800
  "Provide either `ip_adapter_image` or `ip_adapter_image_embeds`. Cannot leave both `ip_adapter_image` and `ip_adapter_image_embeds` defined."
@@ -876,7 +909,7 @@ class StableDiffusionXLControlNetUnionImg2ImgPipeline(
876
909
  # Offload text encoder if `enable_model_cpu_offload` was enabled
877
910
  if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None:
878
911
  self.text_encoder_2.to("cpu")
879
- torch.cuda.empty_cache()
912
+ empty_device_cache()
880
913
 
881
914
  image = image.to(device=device, dtype=dtype)
882
915
 
@@ -1024,7 +1057,7 @@ class StableDiffusionXLControlNetUnionImg2ImgPipeline(
1024
1057
  return self._clip_skip
1025
1058
 
1026
1059
  # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
1027
- # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
1060
+ # of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
1028
1061
  # corresponds to doing no classifier free guidance.
1029
1062
  @property
1030
1063
  def do_classifier_free_guidance(self):
@@ -1049,7 +1082,7 @@ class StableDiffusionXLControlNetUnionImg2ImgPipeline(
1049
1082
  prompt: Union[str, List[str]] = None,
1050
1083
  prompt_2: Optional[Union[str, List[str]]] = None,
1051
1084
  image: PipelineImageInput = None,
1052
- control_image: PipelineImageInput = None,
1085
+ control_image: Union[PipelineImageInput, List[PipelineImageInput]] = None,
1053
1086
  height: Optional[int] = None,
1054
1087
  width: Optional[int] = None,
1055
1088
  strength: float = 0.8,
@@ -1074,7 +1107,7 @@ class StableDiffusionXLControlNetUnionImg2ImgPipeline(
1074
1107
  guess_mode: bool = False,
1075
1108
  control_guidance_start: Union[float, List[float]] = 0.0,
1076
1109
  control_guidance_end: Union[float, List[float]] = 1.0,
1077
- control_mode: Optional[Union[int, List[int]]] = None,
1110
+ control_mode: Optional[Union[int, List[int], List[List[int]]]] = None,
1078
1111
  original_size: Tuple[int, int] = None,
1079
1112
  crops_coords_top_left: Tuple[int, int] = (0, 0),
1080
1113
  target_size: Tuple[int, int] = None,
@@ -1104,13 +1137,13 @@ class StableDiffusionXLControlNetUnionImg2ImgPipeline(
1104
1137
  `List[List[torch.Tensor]]`, `List[List[np.ndarray]]` or `List[List[PIL.Image.Image]]`):
1105
1138
  The initial image will be used as the starting point for the image generation process. Can also accept
1106
1139
  image latents as `image`, if passing latents directly, it will not be encoded again.
1107
- control_image (`PipelineImageInput`):
1108
- The ControlNet input condition. ControlNet uses this input condition to generate guidance to Unet. If
1109
- the type is specified as `torch.Tensor`, it is passed to ControlNet as is. `PIL.Image.Image` can also
1110
- be accepted as an image. The dimensions of the output image defaults to `image`'s dimensions. If height
1111
- and/or width are passed, `image` is resized according to them. If multiple ControlNets are specified in
1112
- init, images must be passed as a list such that each element of the list can be correctly batched for
1113
- input to a single controlnet.
1140
+ control_image (`PipelineImageInput` or `List[PipelineImageInput]`, *optional*):
1141
+ The ControlNet input condition to provide guidance to the `unet` for generation. If the type is
1142
+ specified as `torch.Tensor`, it is passed to ControlNet as is. `PIL.Image.Image` can also be accepted
1143
+ as an image. The dimensions of the output image defaults to `image`'s dimensions. If height and/or
1144
+ width are passed, `image` is resized accordingly. If multiple ControlNets are specified in `init`,
1145
+ images must be passed as a list such that each element of the list can be correctly batched for input
1146
+ to a single ControlNet.
1114
1147
  height (`int`, *optional*, defaults to the size of control_image):
1115
1148
  The height in pixels of the generated image. Anything below 512 pixels won't work well for
1116
1149
  [stabilityai/stable-diffusion-xl-base-1.0](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0)
@@ -1129,11 +1162,11 @@ class StableDiffusionXLControlNetUnionImg2ImgPipeline(
1129
1162
  The number of denoising steps. More denoising steps usually lead to a higher quality image at the
1130
1163
  expense of slower inference.
1131
1164
  guidance_scale (`float`, *optional*, defaults to 7.5):
1132
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
1133
- `guidance_scale` is defined as `w` of equation 2. of [Imagen
1134
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
1135
- 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
1136
- usually at the expense of lower image quality.
1165
+ Guidance scale as defined in [Classifier-Free Diffusion
1166
+ Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
1167
+ of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
1168
+ `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
1169
+ the text `prompt`, usually at the expense of lower image quality.
1137
1170
  negative_prompt (`str` or `List[str]`, *optional*):
1138
1171
  The prompt or prompts not to guide the image generation. If not defined, one has to pass
1139
1172
  `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
@@ -1144,8 +1177,8 @@ class StableDiffusionXLControlNetUnionImg2ImgPipeline(
1144
1177
  num_images_per_prompt (`int`, *optional*, defaults to 1):
1145
1178
  The number of images to generate per prompt.
1146
1179
  eta (`float`, *optional*, defaults to 0.0):
1147
- Corresponds to parameter eta (η) in the DDIM paper: https://arxiv.org/abs/2010.02502. Only applies to
1148
- [`schedulers.DDIMScheduler`], will be ignored for others.
1180
+ Corresponds to parameter eta (η) in the DDIM paper: https://huggingface.co/papers/2010.02502. Only
1181
+ applies to [`schedulers.DDIMScheduler`], will be ignored for others.
1149
1182
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
1150
1183
  One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
1151
1184
  to make generation deterministic.
@@ -1184,16 +1217,21 @@ class StableDiffusionXLControlNetUnionImg2ImgPipeline(
1184
1217
  `self.processor` in
1185
1218
  [diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
1186
1219
  controlnet_conditioning_scale (`float` or `List[float]`, *optional*, defaults to 1.0):
1187
- The outputs of the controlnet are multiplied by `controlnet_conditioning_scale` before they are added
1188
- to the residual in the original unet. If multiple ControlNets are specified in init, you can set the
1189
- corresponding scale as a list.
1220
+ The outputs of the ControlNet are multiplied by `controlnet_conditioning_scale` before they are added
1221
+ to the residual in the original `unet`. If multiple ControlNets are specified in `init`, you can set
1222
+ the corresponding scale as a list.
1190
1223
  guess_mode (`bool`, *optional*, defaults to `False`):
1191
1224
  In this mode, the ControlNet encoder will try best to recognize the content of the input image even if
1192
1225
  you remove all prompts. The `guidance_scale` between 3.0 and 5.0 is recommended.
1193
1226
  control_guidance_start (`float` or `List[float]`, *optional*, defaults to 0.0):
1194
- The percentage of total steps at which the controlnet starts applying.
1227
+ The percentage of total steps at which the ControlNet starts applying.
1195
1228
  control_guidance_end (`float` or `List[float]`, *optional*, defaults to 1.0):
1196
- The percentage of total steps at which the controlnet stops applying.
1229
+ The percentage of total steps at which the ControlNet stops applying.
1230
+ control_mode (`int` or `List[int]` or `List[List[int]], *optional*):
1231
+ The control condition types for the ControlNet. See the ControlNet's model card forinformation on the
1232
+ available control modes. If multiple ControlNets are specified in `init`, control_mode should be a list
1233
+ where each ControlNet should have its corresponding control mode list. Should reflect the order of
1234
+ conditions in control_image
1197
1235
  original_size (`Tuple[int]`, *optional*, defaults to (1024, 1024)):
1198
1236
  If `original_size` is not the same as `target_size` the image will appear to be down- or upsampled.
1199
1237
  `original_size` defaults to `(height, width)` if not specified. Part of SDXL's micro-conditioning as
@@ -1273,12 +1311,6 @@ class StableDiffusionXLControlNetUnionImg2ImgPipeline(
1273
1311
 
1274
1312
  controlnet = self.controlnet._orig_mod if is_compiled_module(self.controlnet) else self.controlnet
1275
1313
 
1276
- # align format for control guidance
1277
- if not isinstance(control_guidance_start, list) and isinstance(control_guidance_end, list):
1278
- control_guidance_start = len(control_guidance_end) * [control_guidance_start]
1279
- elif not isinstance(control_guidance_end, list) and isinstance(control_guidance_start, list):
1280
- control_guidance_end = len(control_guidance_start) * [control_guidance_end]
1281
-
1282
1314
  if not isinstance(control_image, list):
1283
1315
  control_image = [control_image]
1284
1316
  else:
@@ -1287,37 +1319,56 @@ class StableDiffusionXLControlNetUnionImg2ImgPipeline(
1287
1319
  if not isinstance(control_mode, list):
1288
1320
  control_mode = [control_mode]
1289
1321
 
1290
- if len(control_image) != len(control_mode):
1291
- raise ValueError("Expected len(control_image) == len(control_type)")
1322
+ if isinstance(controlnet, MultiControlNetUnionModel):
1323
+ control_image = [[item] for item in control_image]
1324
+ control_mode = [[item] for item in control_mode]
1292
1325
 
1293
- num_control_type = controlnet.config.num_control_type
1326
+ # align format for control guidance
1327
+ if not isinstance(control_guidance_start, list) and isinstance(control_guidance_end, list):
1328
+ control_guidance_start = len(control_guidance_end) * [control_guidance_start]
1329
+ elif not isinstance(control_guidance_end, list) and isinstance(control_guidance_start, list):
1330
+ control_guidance_end = len(control_guidance_start) * [control_guidance_end]
1331
+ elif not isinstance(control_guidance_start, list) and not isinstance(control_guidance_end, list):
1332
+ mult = len(controlnet.nets) if isinstance(controlnet, MultiControlNetUnionModel) else len(control_mode)
1333
+ control_guidance_start, control_guidance_end = (
1334
+ mult * [control_guidance_start],
1335
+ mult * [control_guidance_end],
1336
+ )
1337
+
1338
+ if isinstance(controlnet_conditioning_scale, float):
1339
+ mult = len(controlnet.nets) if isinstance(controlnet, MultiControlNetUnionModel) else len(control_mode)
1340
+ controlnet_conditioning_scale = [controlnet_conditioning_scale] * mult
1294
1341
 
1295
1342
  # 1. Check inputs
1296
- control_type = [0 for _ in range(num_control_type)]
1297
- for _image, control_idx in zip(control_image, control_mode):
1298
- control_type[control_idx] = 1
1299
- self.check_inputs(
1300
- prompt,
1301
- prompt_2,
1302
- _image,
1303
- strength,
1304
- num_inference_steps,
1305
- callback_steps,
1306
- negative_prompt,
1307
- negative_prompt_2,
1308
- prompt_embeds,
1309
- negative_prompt_embeds,
1310
- pooled_prompt_embeds,
1311
- negative_pooled_prompt_embeds,
1312
- ip_adapter_image,
1313
- ip_adapter_image_embeds,
1314
- controlnet_conditioning_scale,
1315
- control_guidance_start,
1316
- control_guidance_end,
1317
- callback_on_step_end_tensor_inputs,
1318
- )
1343
+ self.check_inputs(
1344
+ prompt,
1345
+ prompt_2,
1346
+ control_image,
1347
+ strength,
1348
+ num_inference_steps,
1349
+ callback_steps,
1350
+ negative_prompt,
1351
+ negative_prompt_2,
1352
+ prompt_embeds,
1353
+ negative_prompt_embeds,
1354
+ pooled_prompt_embeds,
1355
+ negative_pooled_prompt_embeds,
1356
+ ip_adapter_image,
1357
+ ip_adapter_image_embeds,
1358
+ controlnet_conditioning_scale,
1359
+ control_guidance_start,
1360
+ control_guidance_end,
1361
+ control_mode,
1362
+ callback_on_step_end_tensor_inputs,
1363
+ )
1319
1364
 
1320
- control_type = torch.Tensor(control_type)
1365
+ if isinstance(controlnet, ControlNetUnionModel):
1366
+ control_type = torch.zeros(controlnet.config.num_control_type).scatter_(0, torch.tensor(control_mode), 1)
1367
+ elif isinstance(controlnet, MultiControlNetUnionModel):
1368
+ control_type = [
1369
+ torch.zeros(controlnet_.config.num_control_type).scatter_(0, torch.tensor(control_mode_), 1)
1370
+ for control_mode_, controlnet_ in zip(control_mode, self.controlnet.nets)
1371
+ ]
1321
1372
 
1322
1373
  self._guidance_scale = guidance_scale
1323
1374
  self._clip_skip = clip_skip
@@ -1334,7 +1385,11 @@ class StableDiffusionXLControlNetUnionImg2ImgPipeline(
1334
1385
 
1335
1386
  device = self._execution_device
1336
1387
 
1337
- global_pool_conditions = controlnet.config.global_pool_conditions
1388
+ global_pool_conditions = (
1389
+ controlnet.config.global_pool_conditions
1390
+ if isinstance(controlnet, ControlNetUnionModel)
1391
+ else controlnet.nets[0].config.global_pool_conditions
1392
+ )
1338
1393
  guess_mode = guess_mode or global_pool_conditions
1339
1394
 
1340
1395
  # 3.1. Encode input prompt
@@ -1372,22 +1427,55 @@ class StableDiffusionXLControlNetUnionImg2ImgPipeline(
1372
1427
  self.do_classifier_free_guidance,
1373
1428
  )
1374
1429
 
1375
- # 4. Prepare image and controlnet_conditioning_image
1430
+ # 4.1 Prepare image
1376
1431
  image = self.image_processor.preprocess(image, height=height, width=width).to(dtype=torch.float32)
1377
1432
 
1378
- for idx, _ in enumerate(control_image):
1379
- control_image[idx] = self.prepare_control_image(
1380
- image=control_image[idx],
1381
- width=width,
1382
- height=height,
1383
- batch_size=batch_size * num_images_per_prompt,
1384
- num_images_per_prompt=num_images_per_prompt,
1385
- device=device,
1386
- dtype=controlnet.dtype,
1387
- do_classifier_free_guidance=self.do_classifier_free_guidance,
1388
- guess_mode=guess_mode,
1389
- )
1390
- height, width = control_image[idx].shape[-2:]
1433
+ # 4.2 Prepare control images
1434
+ if isinstance(controlnet, ControlNetUnionModel):
1435
+ control_images = []
1436
+
1437
+ for image_ in control_image:
1438
+ image_ = self.prepare_control_image(
1439
+ image=image_,
1440
+ width=width,
1441
+ height=height,
1442
+ batch_size=batch_size * num_images_per_prompt,
1443
+ num_images_per_prompt=num_images_per_prompt,
1444
+ device=device,
1445
+ dtype=controlnet.dtype,
1446
+ do_classifier_free_guidance=self.do_classifier_free_guidance,
1447
+ guess_mode=guess_mode,
1448
+ )
1449
+
1450
+ control_images.append(image_)
1451
+
1452
+ control_image = control_images
1453
+ height, width = control_image[0].shape[-2:]
1454
+
1455
+ elif isinstance(controlnet, MultiControlNetUnionModel):
1456
+ control_images = []
1457
+
1458
+ for control_image_ in control_image:
1459
+ images = []
1460
+
1461
+ for image_ in control_image_:
1462
+ image_ = self.prepare_control_image(
1463
+ image=image_,
1464
+ width=width,
1465
+ height=height,
1466
+ batch_size=batch_size * num_images_per_prompt,
1467
+ num_images_per_prompt=num_images_per_prompt,
1468
+ device=device,
1469
+ dtype=controlnet.dtype,
1470
+ do_classifier_free_guidance=self.do_classifier_free_guidance,
1471
+ guess_mode=guess_mode,
1472
+ )
1473
+
1474
+ images.append(image_)
1475
+ control_images.append(images)
1476
+
1477
+ control_image = control_images
1478
+ height, width = control_image[0][0].shape[-2:]
1391
1479
 
1392
1480
  # 5. Prepare timesteps
1393
1481
  self.scheduler.set_timesteps(num_inference_steps, device=device)
@@ -1414,10 +1502,11 @@ class StableDiffusionXLControlNetUnionImg2ImgPipeline(
1414
1502
  # 7.1 Create tensor stating which controlnets to keep
1415
1503
  controlnet_keep = []
1416
1504
  for i in range(len(timesteps)):
1417
- controlnet_keep.append(
1418
- 1.0
1419
- - float(i / len(timesteps) < control_guidance_start or (i + 1) / len(timesteps) > control_guidance_end)
1420
- )
1505
+ keeps = [
1506
+ 1.0 - float(i / len(timesteps) < s or (i + 1) / len(timesteps) > e)
1507
+ for s, e in zip(control_guidance_start, control_guidance_end)
1508
+ ]
1509
+ controlnet_keep.append(keeps)
1421
1510
 
1422
1511
  # 7.2 Prepare added time ids & embeddings
1423
1512
  original_size = original_size or (height, width)
@@ -1460,12 +1549,25 @@ class StableDiffusionXLControlNetUnionImg2ImgPipeline(
1460
1549
  prompt_embeds = prompt_embeds.to(device)
1461
1550
  add_text_embeds = add_text_embeds.to(device)
1462
1551
  add_time_ids = add_time_ids.to(device)
1463
- control_type = (
1464
- control_type.reshape(1, -1)
1465
- .to(device, dtype=prompt_embeds.dtype)
1466
- .repeat(batch_size * num_images_per_prompt * 2, 1)
1552
+
1553
+ control_type_repeat_factor = (
1554
+ batch_size * num_images_per_prompt * (2 if self.do_classifier_free_guidance else 1)
1467
1555
  )
1468
1556
 
1557
+ if isinstance(controlnet, ControlNetUnionModel):
1558
+ control_type = (
1559
+ control_type.reshape(1, -1)
1560
+ .to(self._execution_device, dtype=prompt_embeds.dtype)
1561
+ .repeat(control_type_repeat_factor, 1)
1562
+ )
1563
+ elif isinstance(controlnet, MultiControlNetUnionModel):
1564
+ control_type = [
1565
+ _control_type.reshape(1, -1)
1566
+ .to(self._execution_device, dtype=prompt_embeds.dtype)
1567
+ .repeat(control_type_repeat_factor, 1)
1568
+ for _control_type in control_type
1569
+ ]
1570
+
1469
1571
  # 8. Denoising loop
1470
1572
  num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order
1471
1573
  with self.progress_bar(total=num_inference_steps) as progress_bar:
@@ -1574,7 +1676,7 @@ class StableDiffusionXLControlNetUnionImg2ImgPipeline(
1574
1676
  if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None:
1575
1677
  self.unet.to("cpu")
1576
1678
  self.controlnet.to("cpu")
1577
- torch.cuda.empty_cache()
1679
+ empty_device_cache()
1578
1680
 
1579
1681
  if not output_type == "latent":
1580
1682
  # make sure the VAE is in float32 mode, as it overflows in float16
@@ -1,4 +1,4 @@
1
- # Copyright 2024 The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -1,4 +1,4 @@
1
- # Copyright 2024 HunyuanDiT Authors and The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 HunyuanDiT Authors and The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -144,7 +144,7 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
144
144
  r"""
145
145
  Rescales `noise_cfg` tensor based on `guidance_rescale` to improve image quality and fix overexposure. Based on
146
146
  Section 3.4 from [Common Diffusion Noise Schedules and Sample Steps are
147
- Flawed](https://arxiv.org/pdf/2305.08891.pdf).
147
+ Flawed](https://huggingface.co/papers/2305.08891).
148
148
 
149
149
  Args:
150
150
  noise_cfg (`torch.Tensor`):
@@ -463,7 +463,7 @@ class HunyuanDiTControlNetPipeline(DiffusionPipeline):
463
463
  def prepare_extra_step_kwargs(self, generator, eta):
464
464
  # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
465
465
  # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
466
- # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
466
+ # eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
467
467
  # and should be between [0, 1]
468
468
 
469
469
  accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
@@ -621,7 +621,7 @@ class HunyuanDiTControlNetPipeline(DiffusionPipeline):
621
621
  return self._guidance_rescale
622
622
 
623
623
  # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
624
- # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
624
+ # of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
625
625
  # corresponds to doing no classifier free guidance.
626
626
  @property
627
627
  def do_classifier_free_guidance(self):
@@ -709,8 +709,8 @@ class HunyuanDiTControlNetPipeline(DiffusionPipeline):
709
709
  num_images_per_prompt (`int`, *optional*, defaults to 1):
710
710
  The number of images to generate per prompt.
711
711
  eta (`float`, *optional*, defaults to 0.0):
712
- Corresponds to parameter eta (η) from the [DDIM](https://arxiv.org/abs/2010.02502) paper. Only applies
713
- to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
712
+ Corresponds to parameter eta (η) from the [DDIM](https://huggingface.co/papers/2010.02502) paper. Only
713
+ applies to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
714
714
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
715
715
  A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
716
716
  generation deterministic.
@@ -746,7 +746,7 @@ class HunyuanDiTControlNetPipeline(DiffusionPipeline):
746
746
  inputs will be passed.
747
747
  guidance_rescale (`float`, *optional*, defaults to 0.0):
748
748
  Rescale the noise_cfg according to `guidance_rescale`. Based on findings of [Common Diffusion Noise
749
- Schedules and Sample Steps are Flawed](https://arxiv.org/pdf/2305.08891.pdf). See Section 3.4
749
+ Schedules and Sample Steps are Flawed](https://huggingface.co/papers/2305.08891). See Section 3.4
750
750
  original_size (`Tuple[int, int]`, *optional*, defaults to `(1024, 1024)`):
751
751
  The original size of the image. Used to calculate the time ids.
752
752
  target_size (`Tuple[int, int]`, *optional*):
@@ -1009,7 +1009,7 @@ class HunyuanDiTControlNetPipeline(DiffusionPipeline):
1009
1009
  noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
1010
1010
 
1011
1011
  if self.do_classifier_free_guidance and guidance_rescale > 0.0:
1012
- # Based on 3.4. in https://arxiv.org/pdf/2305.08891.pdf
1012
+ # Based on 3.4. in https://huggingface.co/papers/2305.08891
1013
1013
  noise_pred = rescale_noise_cfg(noise_pred, noise_pred_text, guidance_rescale=guidance_rescale)
1014
1014
 
1015
1015
  # compute the previous noisy sample x_t -> x_t-1
@@ -1,4 +1,4 @@
1
- # Copyright 2024 Stability AI, The HuggingFace Team and The InstantX Team. All rights reserved.
1
+ # Copyright 2025 Stability AI, The HuggingFace Team and The InstantX Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -719,7 +719,7 @@ class StableDiffusion3ControlNetPipeline(
719
719
  return self._clip_skip
720
720
 
721
721
  # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
722
- # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
722
+ # of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
723
723
  # corresponds to doing no classifier free guidance.
724
724
  @property
725
725
  def do_classifier_free_guidance(self):
@@ -877,11 +877,11 @@ class StableDiffusion3ControlNetPipeline(
877
877
  their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
878
878
  will be used.
879
879
  guidance_scale (`float`, *optional*, defaults to 5.0):
880
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
881
- `guidance_scale` is defined as `w` of equation 2. of [Imagen
882
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
883
- 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
884
- usually at the expense of lower image quality.
880
+ Guidance scale as defined in [Classifier-Free Diffusion
881
+ Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
882
+ of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
883
+ `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
884
+ the text `prompt`, usually at the expense of lower image quality.
885
885
  control_guidance_start (`float` or `List[float]`, *optional*, defaults to 0.0):
886
886
  The percentage of total steps at which the ControlNet starts applying.
887
887
  control_guidance_end (`float` or `List[float]`, *optional*, defaults to 1.0):
@@ -1,4 +1,4 @@
1
- # Copyright 2024 Stability AI, The HuggingFace Team and The AlimamaCreative Team. All rights reserved.
1
+ # Copyright 2025 Stability AI, The HuggingFace Team and The AlimamaCreative Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -769,7 +769,7 @@ class StableDiffusion3ControlNetInpaintingPipeline(
769
769
  return self._clip_skip
770
770
 
771
771
  # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
772
- # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
772
+ # of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
773
773
  # corresponds to doing no classifier free guidance.
774
774
  @property
775
775
  def do_classifier_free_guidance(self):
@@ -928,11 +928,11 @@ class StableDiffusion3ControlNetInpaintingPipeline(
928
928
  their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
929
929
  will be used.
930
930
  guidance_scale (`float`, *optional*, defaults to 5.0):
931
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
932
- `guidance_scale` is defined as `w` of equation 2. of [Imagen
933
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
934
- 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
935
- usually at the expense of lower image quality.
931
+ Guidance scale as defined in [Classifier-Free Diffusion
932
+ Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
933
+ of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
934
+ `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
935
+ the text `prompt`, usually at the expense of lower image quality.
936
936
  control_guidance_start (`float` or `List[float]`, *optional*, defaults to 0.0):
937
937
  The percentage of total steps at which the ControlNet starts applying.
938
938
  control_guidance_end (`float` or `List[float]`, *optional*, defaults to 1.0):