diffusers 0.32.2__py3-none-any.whl → 0.33.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (389) hide show
  1. diffusers/__init__.py +186 -3
  2. diffusers/configuration_utils.py +40 -12
  3. diffusers/dependency_versions_table.py +9 -2
  4. diffusers/hooks/__init__.py +9 -0
  5. diffusers/hooks/faster_cache.py +653 -0
  6. diffusers/hooks/group_offloading.py +793 -0
  7. diffusers/hooks/hooks.py +236 -0
  8. diffusers/hooks/layerwise_casting.py +245 -0
  9. diffusers/hooks/pyramid_attention_broadcast.py +311 -0
  10. diffusers/loaders/__init__.py +6 -0
  11. diffusers/loaders/ip_adapter.py +38 -30
  12. diffusers/loaders/lora_base.py +121 -86
  13. diffusers/loaders/lora_conversion_utils.py +504 -44
  14. diffusers/loaders/lora_pipeline.py +1769 -181
  15. diffusers/loaders/peft.py +167 -57
  16. diffusers/loaders/single_file.py +17 -2
  17. diffusers/loaders/single_file_model.py +53 -5
  18. diffusers/loaders/single_file_utils.py +646 -72
  19. diffusers/loaders/textual_inversion.py +9 -9
  20. diffusers/loaders/transformer_flux.py +8 -9
  21. diffusers/loaders/transformer_sd3.py +120 -39
  22. diffusers/loaders/unet.py +20 -7
  23. diffusers/models/__init__.py +22 -0
  24. diffusers/models/activations.py +9 -9
  25. diffusers/models/attention.py +0 -1
  26. diffusers/models/attention_processor.py +163 -25
  27. diffusers/models/auto_model.py +169 -0
  28. diffusers/models/autoencoders/__init__.py +2 -0
  29. diffusers/models/autoencoders/autoencoder_asym_kl.py +2 -0
  30. diffusers/models/autoencoders/autoencoder_dc.py +106 -4
  31. diffusers/models/autoencoders/autoencoder_kl.py +0 -4
  32. diffusers/models/autoencoders/autoencoder_kl_allegro.py +5 -23
  33. diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +17 -55
  34. diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py +17 -97
  35. diffusers/models/autoencoders/autoencoder_kl_ltx.py +326 -107
  36. diffusers/models/autoencoders/autoencoder_kl_magvit.py +1094 -0
  37. diffusers/models/autoencoders/autoencoder_kl_mochi.py +21 -56
  38. diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +11 -42
  39. diffusers/models/autoencoders/autoencoder_kl_wan.py +855 -0
  40. diffusers/models/autoencoders/autoencoder_oobleck.py +1 -0
  41. diffusers/models/autoencoders/autoencoder_tiny.py +0 -4
  42. diffusers/models/autoencoders/consistency_decoder_vae.py +3 -1
  43. diffusers/models/autoencoders/vae.py +31 -141
  44. diffusers/models/autoencoders/vq_model.py +3 -0
  45. diffusers/models/cache_utils.py +108 -0
  46. diffusers/models/controlnets/__init__.py +1 -0
  47. diffusers/models/controlnets/controlnet.py +3 -8
  48. diffusers/models/controlnets/controlnet_flux.py +14 -42
  49. diffusers/models/controlnets/controlnet_sd3.py +58 -34
  50. diffusers/models/controlnets/controlnet_sparsectrl.py +4 -7
  51. diffusers/models/controlnets/controlnet_union.py +27 -18
  52. diffusers/models/controlnets/controlnet_xs.py +7 -46
  53. diffusers/models/controlnets/multicontrolnet_union.py +196 -0
  54. diffusers/models/embeddings.py +18 -7
  55. diffusers/models/model_loading_utils.py +122 -80
  56. diffusers/models/modeling_flax_pytorch_utils.py +1 -1
  57. diffusers/models/modeling_flax_utils.py +1 -1
  58. diffusers/models/modeling_pytorch_flax_utils.py +1 -1
  59. diffusers/models/modeling_utils.py +617 -272
  60. diffusers/models/normalization.py +67 -14
  61. diffusers/models/resnet.py +1 -1
  62. diffusers/models/transformers/__init__.py +6 -0
  63. diffusers/models/transformers/auraflow_transformer_2d.py +9 -35
  64. diffusers/models/transformers/cogvideox_transformer_3d.py +13 -24
  65. diffusers/models/transformers/consisid_transformer_3d.py +789 -0
  66. diffusers/models/transformers/dit_transformer_2d.py +5 -19
  67. diffusers/models/transformers/hunyuan_transformer_2d.py +4 -3
  68. diffusers/models/transformers/latte_transformer_3d.py +20 -15
  69. diffusers/models/transformers/lumina_nextdit2d.py +3 -1
  70. diffusers/models/transformers/pixart_transformer_2d.py +4 -19
  71. diffusers/models/transformers/prior_transformer.py +5 -1
  72. diffusers/models/transformers/sana_transformer.py +144 -40
  73. diffusers/models/transformers/stable_audio_transformer.py +5 -20
  74. diffusers/models/transformers/transformer_2d.py +7 -22
  75. diffusers/models/transformers/transformer_allegro.py +9 -17
  76. diffusers/models/transformers/transformer_cogview3plus.py +6 -17
  77. diffusers/models/transformers/transformer_cogview4.py +462 -0
  78. diffusers/models/transformers/transformer_easyanimate.py +527 -0
  79. diffusers/models/transformers/transformer_flux.py +68 -110
  80. diffusers/models/transformers/transformer_hunyuan_video.py +404 -46
  81. diffusers/models/transformers/transformer_ltx.py +53 -35
  82. diffusers/models/transformers/transformer_lumina2.py +548 -0
  83. diffusers/models/transformers/transformer_mochi.py +6 -17
  84. diffusers/models/transformers/transformer_omnigen.py +469 -0
  85. diffusers/models/transformers/transformer_sd3.py +56 -86
  86. diffusers/models/transformers/transformer_temporal.py +5 -11
  87. diffusers/models/transformers/transformer_wan.py +469 -0
  88. diffusers/models/unets/unet_1d.py +3 -1
  89. diffusers/models/unets/unet_2d.py +21 -20
  90. diffusers/models/unets/unet_2d_blocks.py +19 -243
  91. diffusers/models/unets/unet_2d_condition.py +4 -6
  92. diffusers/models/unets/unet_3d_blocks.py +14 -127
  93. diffusers/models/unets/unet_3d_condition.py +8 -12
  94. diffusers/models/unets/unet_i2vgen_xl.py +5 -13
  95. diffusers/models/unets/unet_kandinsky3.py +0 -4
  96. diffusers/models/unets/unet_motion_model.py +20 -114
  97. diffusers/models/unets/unet_spatio_temporal_condition.py +7 -8
  98. diffusers/models/unets/unet_stable_cascade.py +8 -35
  99. diffusers/models/unets/uvit_2d.py +1 -4
  100. diffusers/optimization.py +2 -2
  101. diffusers/pipelines/__init__.py +57 -8
  102. diffusers/pipelines/allegro/pipeline_allegro.py +22 -2
  103. diffusers/pipelines/amused/pipeline_amused.py +15 -2
  104. diffusers/pipelines/amused/pipeline_amused_img2img.py +15 -2
  105. diffusers/pipelines/amused/pipeline_amused_inpaint.py +15 -2
  106. diffusers/pipelines/animatediff/pipeline_animatediff.py +15 -2
  107. diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +15 -3
  108. diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +24 -4
  109. diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +15 -2
  110. diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +16 -4
  111. diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +16 -4
  112. diffusers/pipelines/audioldm/pipeline_audioldm.py +13 -2
  113. diffusers/pipelines/audioldm2/modeling_audioldm2.py +13 -68
  114. diffusers/pipelines/audioldm2/pipeline_audioldm2.py +39 -9
  115. diffusers/pipelines/aura_flow/pipeline_aura_flow.py +63 -7
  116. diffusers/pipelines/auto_pipeline.py +35 -14
  117. diffusers/pipelines/blip_diffusion/blip_image_processing.py +1 -1
  118. diffusers/pipelines/blip_diffusion/modeling_blip2.py +5 -8
  119. diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +12 -0
  120. diffusers/pipelines/cogvideo/pipeline_cogvideox.py +22 -6
  121. diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +22 -6
  122. diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +22 -5
  123. diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +22 -6
  124. diffusers/pipelines/cogview3/pipeline_cogview3plus.py +12 -4
  125. diffusers/pipelines/cogview4/__init__.py +49 -0
  126. diffusers/pipelines/cogview4/pipeline_cogview4.py +684 -0
  127. diffusers/pipelines/cogview4/pipeline_cogview4_control.py +732 -0
  128. diffusers/pipelines/cogview4/pipeline_output.py +21 -0
  129. diffusers/pipelines/consisid/__init__.py +49 -0
  130. diffusers/pipelines/consisid/consisid_utils.py +357 -0
  131. diffusers/pipelines/consisid/pipeline_consisid.py +974 -0
  132. diffusers/pipelines/consisid/pipeline_output.py +20 -0
  133. diffusers/pipelines/consistency_models/pipeline_consistency_models.py +11 -0
  134. diffusers/pipelines/controlnet/pipeline_controlnet.py +6 -5
  135. diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +13 -0
  136. diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +17 -5
  137. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +31 -12
  138. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +26 -7
  139. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +20 -3
  140. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +22 -3
  141. diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +26 -25
  142. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +224 -109
  143. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +25 -29
  144. diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +7 -4
  145. diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +3 -5
  146. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +121 -10
  147. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +122 -11
  148. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +12 -1
  149. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +20 -3
  150. diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py +14 -2
  151. diffusers/pipelines/ddim/pipeline_ddim.py +14 -1
  152. diffusers/pipelines/ddpm/pipeline_ddpm.py +15 -1
  153. diffusers/pipelines/deepfloyd_if/pipeline_if.py +12 -0
  154. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +12 -0
  155. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +14 -1
  156. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +12 -0
  157. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +14 -1
  158. diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +14 -1
  159. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +11 -7
  160. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +11 -7
  161. diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +1 -1
  162. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +10 -6
  163. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_onnx_stable_diffusion_inpaint_legacy.py +2 -2
  164. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +11 -7
  165. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +1 -1
  166. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +1 -1
  167. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +1 -1
  168. diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +10 -105
  169. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +1 -1
  170. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +1 -1
  171. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +1 -1
  172. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +1 -1
  173. diffusers/pipelines/dit/pipeline_dit.py +15 -2
  174. diffusers/pipelines/easyanimate/__init__.py +52 -0
  175. diffusers/pipelines/easyanimate/pipeline_easyanimate.py +770 -0
  176. diffusers/pipelines/easyanimate/pipeline_easyanimate_control.py +994 -0
  177. diffusers/pipelines/easyanimate/pipeline_easyanimate_inpaint.py +1234 -0
  178. diffusers/pipelines/easyanimate/pipeline_output.py +20 -0
  179. diffusers/pipelines/flux/pipeline_flux.py +53 -21
  180. diffusers/pipelines/flux/pipeline_flux_control.py +9 -12
  181. diffusers/pipelines/flux/pipeline_flux_control_img2img.py +6 -10
  182. diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +8 -10
  183. diffusers/pipelines/flux/pipeline_flux_controlnet.py +185 -13
  184. diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +8 -10
  185. diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +16 -16
  186. diffusers/pipelines/flux/pipeline_flux_fill.py +107 -39
  187. diffusers/pipelines/flux/pipeline_flux_img2img.py +193 -15
  188. diffusers/pipelines/flux/pipeline_flux_inpaint.py +199 -19
  189. diffusers/pipelines/free_noise_utils.py +3 -3
  190. diffusers/pipelines/hunyuan_video/__init__.py +4 -0
  191. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_skyreels_image2video.py +804 -0
  192. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +90 -23
  193. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_image2video.py +924 -0
  194. diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +3 -5
  195. diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +13 -1
  196. diffusers/pipelines/kandinsky/pipeline_kandinsky.py +12 -0
  197. diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +1 -1
  198. diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +12 -0
  199. diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +13 -1
  200. diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +12 -0
  201. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +12 -1
  202. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +13 -0
  203. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +12 -0
  204. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +12 -1
  205. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +12 -1
  206. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +12 -0
  207. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +12 -0
  208. diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +12 -0
  209. diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +12 -0
  210. diffusers/pipelines/kolors/pipeline_kolors.py +10 -8
  211. diffusers/pipelines/kolors/pipeline_kolors_img2img.py +6 -4
  212. diffusers/pipelines/kolors/text_encoder.py +7 -34
  213. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +12 -1
  214. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +13 -1
  215. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +14 -13
  216. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion_superresolution.py +12 -1
  217. diffusers/pipelines/latte/pipeline_latte.py +36 -7
  218. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +67 -13
  219. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +60 -15
  220. diffusers/pipelines/ltx/__init__.py +2 -0
  221. diffusers/pipelines/ltx/pipeline_ltx.py +25 -13
  222. diffusers/pipelines/ltx/pipeline_ltx_condition.py +1194 -0
  223. diffusers/pipelines/ltx/pipeline_ltx_image2video.py +31 -17
  224. diffusers/pipelines/lumina/__init__.py +2 -2
  225. diffusers/pipelines/lumina/pipeline_lumina.py +83 -20
  226. diffusers/pipelines/lumina2/__init__.py +48 -0
  227. diffusers/pipelines/lumina2/pipeline_lumina2.py +790 -0
  228. diffusers/pipelines/marigold/__init__.py +2 -0
  229. diffusers/pipelines/marigold/marigold_image_processing.py +127 -14
  230. diffusers/pipelines/marigold/pipeline_marigold_depth.py +31 -16
  231. diffusers/pipelines/marigold/pipeline_marigold_intrinsics.py +721 -0
  232. diffusers/pipelines/marigold/pipeline_marigold_normals.py +31 -16
  233. diffusers/pipelines/mochi/pipeline_mochi.py +14 -18
  234. diffusers/pipelines/musicldm/pipeline_musicldm.py +16 -1
  235. diffusers/pipelines/omnigen/__init__.py +50 -0
  236. diffusers/pipelines/omnigen/pipeline_omnigen.py +512 -0
  237. diffusers/pipelines/omnigen/processor_omnigen.py +327 -0
  238. diffusers/pipelines/onnx_utils.py +5 -3
  239. diffusers/pipelines/pag/pag_utils.py +1 -1
  240. diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +12 -1
  241. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +15 -4
  242. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +20 -3
  243. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +20 -3
  244. diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +1 -3
  245. diffusers/pipelines/pag/pipeline_pag_kolors.py +6 -4
  246. diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +16 -3
  247. diffusers/pipelines/pag/pipeline_pag_sana.py +65 -8
  248. diffusers/pipelines/pag/pipeline_pag_sd.py +23 -7
  249. diffusers/pipelines/pag/pipeline_pag_sd_3.py +3 -5
  250. diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py +3 -5
  251. diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +13 -1
  252. diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +23 -7
  253. diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py +26 -10
  254. diffusers/pipelines/pag/pipeline_pag_sd_xl.py +12 -4
  255. diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +7 -3
  256. diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +10 -6
  257. diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +13 -3
  258. diffusers/pipelines/pia/pipeline_pia.py +13 -1
  259. diffusers/pipelines/pipeline_flax_utils.py +7 -7
  260. diffusers/pipelines/pipeline_loading_utils.py +193 -83
  261. diffusers/pipelines/pipeline_utils.py +221 -106
  262. diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +17 -5
  263. diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +17 -4
  264. diffusers/pipelines/sana/__init__.py +2 -0
  265. diffusers/pipelines/sana/pipeline_sana.py +183 -58
  266. diffusers/pipelines/sana/pipeline_sana_sprint.py +889 -0
  267. diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +12 -2
  268. diffusers/pipelines/shap_e/pipeline_shap_e.py +12 -0
  269. diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +12 -0
  270. diffusers/pipelines/shap_e/renderer.py +6 -6
  271. diffusers/pipelines/stable_audio/pipeline_stable_audio.py +1 -1
  272. diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +15 -4
  273. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +12 -8
  274. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +12 -1
  275. diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +3 -2
  276. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +14 -10
  277. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_img2img.py +3 -3
  278. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_inpaint.py +14 -10
  279. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +2 -2
  280. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py +4 -3
  281. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +5 -4
  282. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +2 -2
  283. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +18 -13
  284. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +30 -8
  285. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +24 -10
  286. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +28 -12
  287. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +39 -18
  288. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +17 -6
  289. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +13 -3
  290. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +20 -3
  291. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +14 -2
  292. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +13 -1
  293. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +16 -17
  294. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +136 -18
  295. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +150 -21
  296. diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +15 -3
  297. diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +26 -11
  298. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +15 -3
  299. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +22 -4
  300. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +30 -13
  301. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +12 -4
  302. diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +15 -3
  303. diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +15 -3
  304. diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +26 -12
  305. diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +16 -4
  306. diffusers/pipelines/stable_diffusion_xl/pipeline_flax_stable_diffusion_xl.py +1 -1
  307. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +12 -4
  308. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +7 -3
  309. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +10 -6
  310. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +11 -4
  311. diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +13 -2
  312. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +18 -4
  313. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +26 -5
  314. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +13 -1
  315. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +13 -1
  316. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +28 -6
  317. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +26 -4
  318. diffusers/pipelines/transformers_loading_utils.py +121 -0
  319. diffusers/pipelines/unclip/pipeline_unclip.py +11 -1
  320. diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +11 -1
  321. diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +19 -2
  322. diffusers/pipelines/wan/__init__.py +51 -0
  323. diffusers/pipelines/wan/pipeline_output.py +20 -0
  324. diffusers/pipelines/wan/pipeline_wan.py +593 -0
  325. diffusers/pipelines/wan/pipeline_wan_i2v.py +722 -0
  326. diffusers/pipelines/wan/pipeline_wan_video2video.py +725 -0
  327. diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +7 -31
  328. diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +12 -1
  329. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +12 -1
  330. diffusers/quantizers/auto.py +5 -1
  331. diffusers/quantizers/base.py +5 -9
  332. diffusers/quantizers/bitsandbytes/bnb_quantizer.py +41 -29
  333. diffusers/quantizers/bitsandbytes/utils.py +30 -20
  334. diffusers/quantizers/gguf/gguf_quantizer.py +1 -0
  335. diffusers/quantizers/gguf/utils.py +4 -2
  336. diffusers/quantizers/quantization_config.py +59 -4
  337. diffusers/quantizers/quanto/__init__.py +1 -0
  338. diffusers/quantizers/quanto/quanto_quantizer.py +177 -0
  339. diffusers/quantizers/quanto/utils.py +60 -0
  340. diffusers/quantizers/torchao/__init__.py +1 -1
  341. diffusers/quantizers/torchao/torchao_quantizer.py +47 -2
  342. diffusers/schedulers/__init__.py +2 -1
  343. diffusers/schedulers/scheduling_consistency_models.py +1 -2
  344. diffusers/schedulers/scheduling_ddim_inverse.py +1 -1
  345. diffusers/schedulers/scheduling_ddpm.py +2 -3
  346. diffusers/schedulers/scheduling_ddpm_parallel.py +1 -2
  347. diffusers/schedulers/scheduling_dpmsolver_multistep.py +12 -4
  348. diffusers/schedulers/scheduling_edm_euler.py +45 -10
  349. diffusers/schedulers/scheduling_flow_match_euler_discrete.py +116 -28
  350. diffusers/schedulers/scheduling_flow_match_heun_discrete.py +7 -6
  351. diffusers/schedulers/scheduling_heun_discrete.py +1 -1
  352. diffusers/schedulers/scheduling_lcm.py +1 -2
  353. diffusers/schedulers/scheduling_lms_discrete.py +1 -1
  354. diffusers/schedulers/scheduling_repaint.py +5 -1
  355. diffusers/schedulers/scheduling_scm.py +265 -0
  356. diffusers/schedulers/scheduling_tcd.py +1 -2
  357. diffusers/schedulers/scheduling_utils.py +2 -1
  358. diffusers/training_utils.py +14 -7
  359. diffusers/utils/__init__.py +9 -1
  360. diffusers/utils/constants.py +13 -1
  361. diffusers/utils/deprecation_utils.py +1 -1
  362. diffusers/utils/dummy_bitsandbytes_objects.py +17 -0
  363. diffusers/utils/dummy_gguf_objects.py +17 -0
  364. diffusers/utils/dummy_optimum_quanto_objects.py +17 -0
  365. diffusers/utils/dummy_pt_objects.py +233 -0
  366. diffusers/utils/dummy_torch_and_transformers_and_opencv_objects.py +17 -0
  367. diffusers/utils/dummy_torch_and_transformers_objects.py +270 -0
  368. diffusers/utils/dummy_torchao_objects.py +17 -0
  369. diffusers/utils/dynamic_modules_utils.py +1 -1
  370. diffusers/utils/export_utils.py +28 -3
  371. diffusers/utils/hub_utils.py +52 -102
  372. diffusers/utils/import_utils.py +121 -221
  373. diffusers/utils/loading_utils.py +2 -1
  374. diffusers/utils/logging.py +1 -2
  375. diffusers/utils/peft_utils.py +6 -14
  376. diffusers/utils/remote_utils.py +425 -0
  377. diffusers/utils/source_code_parsing_utils.py +52 -0
  378. diffusers/utils/state_dict_utils.py +15 -1
  379. diffusers/utils/testing_utils.py +243 -13
  380. diffusers/utils/torch_utils.py +10 -0
  381. diffusers/utils/typing_utils.py +91 -0
  382. diffusers/video_processor.py +1 -1
  383. {diffusers-0.32.2.dist-info → diffusers-0.33.0.dist-info}/METADATA +76 -44
  384. diffusers-0.33.0.dist-info/RECORD +608 -0
  385. {diffusers-0.32.2.dist-info → diffusers-0.33.0.dist-info}/WHEEL +1 -1
  386. diffusers-0.32.2.dist-info/RECORD +0 -550
  387. {diffusers-0.32.2.dist-info → diffusers-0.33.0.dist-info}/LICENSE +0 -0
  388. {diffusers-0.32.2.dist-info → diffusers-0.33.0.dist-info}/entry_points.txt +0 -0
  389. {diffusers-0.32.2.dist-info → diffusers-0.33.0.dist-info}/top_level.txt +0 -0
@@ -43,6 +43,7 @@ from ...schedulers import KarrasDiffusionSchedulers
43
43
  from ...utils import (
44
44
  PIL_INTERPOLATION,
45
45
  USE_PEFT_BACKEND,
46
+ is_torch_xla_available,
46
47
  logging,
47
48
  replace_example_docstring,
48
49
  scale_lora_layers,
@@ -53,8 +54,16 @@ from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
53
54
  from ..stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput
54
55
 
55
56
 
57
+ if is_torch_xla_available():
58
+ import torch_xla.core.xla_model as xm
59
+
60
+ XLA_AVAILABLE = True
61
+ else:
62
+ XLA_AVAILABLE = False
63
+
56
64
  logger = logging.get_logger(__name__) # pylint: disable=invalid-name
57
65
 
66
+
58
67
  EXAMPLE_DOC_STRING = """
59
68
  Examples:
60
69
  ```py
@@ -248,7 +257,8 @@ class StableDiffusionXLAdapterPipeline(
248
257
  [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`].
249
258
  safety_checker ([`StableDiffusionSafetyChecker`]):
250
259
  Classification module that estimates whether generated images could be considered offensive or harmful.
251
- Please, refer to the [model card](https://huggingface.co/runwayml/stable-diffusion-v1-5) for details.
260
+ Please, refer to the [model card](https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5) for
261
+ details.
252
262
  feature_extractor ([`CLIPImageProcessor`]):
253
263
  Model that extracts features from generated images to be used as inputs for the `safety_checker`.
254
264
  """
@@ -292,9 +302,13 @@ class StableDiffusionXLAdapterPipeline(
292
302
  image_encoder=image_encoder,
293
303
  )
294
304
  self.register_to_config(force_zeros_for_empty_prompt=force_zeros_for_empty_prompt)
295
- self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
305
+ self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
296
306
  self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
297
- self.default_sample_size = self.unet.config.sample_size
307
+ self.default_sample_size = (
308
+ self.unet.config.sample_size
309
+ if hasattr(self, "unet") and self.unet is not None and hasattr(self.unet.config, "sample_size")
310
+ else 128
311
+ )
298
312
 
299
313
  # Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.encode_prompt
300
314
  def encode_prompt(
@@ -422,7 +436,9 @@ class StableDiffusionXLAdapterPipeline(
422
436
  prompt_embeds = text_encoder(text_input_ids.to(device), output_hidden_states=True)
423
437
 
424
438
  # We are only ALWAYS interested in the pooled output of the final text encoder
425
- pooled_prompt_embeds = prompt_embeds[0]
439
+ if pooled_prompt_embeds is None and prompt_embeds[0].ndim == 2:
440
+ pooled_prompt_embeds = prompt_embeds[0]
441
+
426
442
  if clip_skip is None:
427
443
  prompt_embeds = prompt_embeds.hidden_states[-2]
428
444
  else:
@@ -481,8 +497,10 @@ class StableDiffusionXLAdapterPipeline(
481
497
  uncond_input.input_ids.to(device),
482
498
  output_hidden_states=True,
483
499
  )
500
+
484
501
  # We are only ALWAYS interested in the pooled output of the final text encoder
485
- negative_pooled_prompt_embeds = negative_prompt_embeds[0]
502
+ if negative_pooled_prompt_embeds is None and negative_prompt_embeds[0].ndim == 2:
503
+ negative_pooled_prompt_embeds = negative_prompt_embeds[0]
486
504
  negative_prompt_embeds = negative_prompt_embeds.hidden_states[-2]
487
505
 
488
506
  negative_prompt_embeds_list.append(negative_prompt_embeds)
@@ -1261,6 +1279,9 @@ class StableDiffusionXLAdapterPipeline(
1261
1279
  step_idx = i // getattr(self.scheduler, "order", 1)
1262
1280
  callback(step_idx, t, latents)
1263
1281
 
1282
+ if XLA_AVAILABLE:
1283
+ xm.mark_step()
1284
+
1264
1285
  if not output_type == "latent":
1265
1286
  # make sure the VAE is in float32 mode, as it overflows in float16
1266
1287
  needs_upcasting = self.vae.dtype == torch.float16 and self.vae.config.force_upcast
@@ -25,6 +25,7 @@ from ...schedulers import KarrasDiffusionSchedulers
25
25
  from ...utils import (
26
26
  USE_PEFT_BACKEND,
27
27
  deprecate,
28
+ is_torch_xla_available,
28
29
  logging,
29
30
  replace_example_docstring,
30
31
  scale_lora_layers,
@@ -36,8 +37,16 @@ from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
36
37
  from . import TextToVideoSDPipelineOutput
37
38
 
38
39
 
40
+ if is_torch_xla_available():
41
+ import torch_xla.core.xla_model as xm
42
+
43
+ XLA_AVAILABLE = True
44
+ else:
45
+ XLA_AVAILABLE = False
46
+
39
47
  logger = logging.get_logger(__name__) # pylint: disable=invalid-name
40
48
 
49
+
41
50
  EXAMPLE_DOC_STRING = """
42
51
  Examples:
43
52
  ```py
@@ -105,7 +114,7 @@ class TextToVideoSDPipeline(
105
114
  unet=unet,
106
115
  scheduler=scheduler,
107
116
  )
108
- self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
117
+ self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
109
118
  self.video_processor = VideoProcessor(do_resize=False, vae_scale_factor=self.vae_scale_factor)
110
119
 
111
120
  # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt
@@ -627,6 +636,9 @@ class TextToVideoSDPipeline(
627
636
  step_idx = i // getattr(self.scheduler, "order", 1)
628
637
  callback(step_idx, t, latents)
629
638
 
639
+ if XLA_AVAILABLE:
640
+ xm.mark_step()
641
+
630
642
  # 8. Post processing
631
643
  if output_type == "latent":
632
644
  video = latents
@@ -26,6 +26,7 @@ from ...schedulers import KarrasDiffusionSchedulers
26
26
  from ...utils import (
27
27
  USE_PEFT_BACKEND,
28
28
  deprecate,
29
+ is_torch_xla_available,
29
30
  logging,
30
31
  replace_example_docstring,
31
32
  scale_lora_layers,
@@ -37,8 +38,16 @@ from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
37
38
  from . import TextToVideoSDPipelineOutput
38
39
 
39
40
 
41
+ if is_torch_xla_available():
42
+ import torch_xla.core.xla_model as xm
43
+
44
+ XLA_AVAILABLE = True
45
+ else:
46
+ XLA_AVAILABLE = False
47
+
40
48
  logger = logging.get_logger(__name__) # pylint: disable=invalid-name
41
49
 
50
+
42
51
  EXAMPLE_DOC_STRING = """
43
52
  Examples:
44
53
  ```py
@@ -140,7 +149,7 @@ class VideoToVideoSDPipeline(
140
149
  unet=unet,
141
150
  scheduler=scheduler,
142
151
  )
143
- self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
152
+ self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
144
153
  self.video_processor = VideoProcessor(do_resize=False, vae_scale_factor=self.vae_scale_factor)
145
154
 
146
155
  # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt
@@ -679,6 +688,9 @@ class VideoToVideoSDPipeline(
679
688
  step_idx = i // getattr(self.scheduler, "order", 1)
680
689
  callback(step_idx, t, latents)
681
690
 
691
+ if XLA_AVAILABLE:
692
+ xm.mark_step()
693
+
682
694
  # manually for max memory savings
683
695
  if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None:
684
696
  self.unet.to("cpu")
@@ -11,16 +11,30 @@ from torch.nn.functional import grid_sample
11
11
  from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer
12
12
 
13
13
  from ...image_processor import VaeImageProcessor
14
- from ...loaders import StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
14
+ from ...loaders import FromSingleFileMixin, StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
15
15
  from ...models import AutoencoderKL, UNet2DConditionModel
16
16
  from ...models.lora import adjust_lora_scale_text_encoder
17
17
  from ...schedulers import KarrasDiffusionSchedulers
18
- from ...utils import USE_PEFT_BACKEND, BaseOutput, logging, scale_lora_layers, unscale_lora_layers
18
+ from ...utils import (
19
+ USE_PEFT_BACKEND,
20
+ BaseOutput,
21
+ is_torch_xla_available,
22
+ logging,
23
+ scale_lora_layers,
24
+ unscale_lora_layers,
25
+ )
19
26
  from ...utils.torch_utils import randn_tensor
20
27
  from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
21
28
  from ..stable_diffusion import StableDiffusionSafetyChecker
22
29
 
23
30
 
31
+ if is_torch_xla_available():
32
+ import torch_xla.core.xla_model as xm
33
+
34
+ XLA_AVAILABLE = True
35
+ else:
36
+ XLA_AVAILABLE = False
37
+
24
38
  logger = logging.get_logger(__name__) # pylint: disable=invalid-name
25
39
 
26
40
 
@@ -282,7 +296,11 @@ def create_motion_field_and_warp_latents(motion_field_strength_x, motion_field_s
282
296
 
283
297
 
284
298
  class TextToVideoZeroPipeline(
285
- DiffusionPipeline, StableDiffusionMixin, TextualInversionLoaderMixin, StableDiffusionLoraLoaderMixin
299
+ DiffusionPipeline,
300
+ StableDiffusionMixin,
301
+ TextualInversionLoaderMixin,
302
+ StableDiffusionLoraLoaderMixin,
303
+ FromSingleFileMixin,
286
304
  ):
287
305
  r"""
288
306
  Pipeline for zero-shot text-to-video generation using Stable Diffusion.
@@ -304,8 +322,8 @@ class TextToVideoZeroPipeline(
304
322
  [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`].
305
323
  safety_checker ([`StableDiffusionSafetyChecker`]):
306
324
  Classification module that estimates whether generated images could be considered offensive or harmful.
307
- Please refer to the [model card](https://huggingface.co/runwayml/stable-diffusion-v1-5) for more details
308
- about a model's potential harms.
325
+ Please refer to the [model card](https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5) for
326
+ more details about a model's potential harms.
309
327
  feature_extractor ([`CLIPImageProcessor`]):
310
328
  A [`CLIPImageProcessor`] to extract features from generated images; used as inputs to the `safety_checker`.
311
329
  """
@@ -340,7 +358,7 @@ class TextToVideoZeroPipeline(
340
358
  " it only for use-cases that involve analyzing network behavior or auditing its results. For more"
341
359
  " information, please have a look at https://github.com/huggingface/diffusers/pull/254 ."
342
360
  )
343
- self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
361
+ self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
344
362
  self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
345
363
 
346
364
  def forward_loop(self, x_t0, t0, t1, generator):
@@ -440,6 +458,10 @@ class TextToVideoZeroPipeline(
440
458
  if callback is not None and i % callback_steps == 0:
441
459
  step_idx = i // getattr(self.scheduler, "order", 1)
442
460
  callback(step_idx, t, latents)
461
+
462
+ if XLA_AVAILABLE:
463
+ xm.mark_step()
464
+
443
465
  return latents.clone().detach()
444
466
 
445
467
  # Copied from diffusers.pipelines.stable_diffusion_k_diffusion.pipeline_stable_diffusion_k_diffusion.StableDiffusionKDiffusionPipeline.check_inputs
@@ -42,6 +42,16 @@ if is_invisible_watermark_available():
42
42
  from ..stable_diffusion_xl.watermark import StableDiffusionXLWatermarker
43
43
 
44
44
 
45
+ from ...utils import is_torch_xla_available
46
+
47
+
48
+ if is_torch_xla_available():
49
+ import torch_xla.core.xla_model as xm
50
+
51
+ XLA_AVAILABLE = True
52
+ else:
53
+ XLA_AVAILABLE = False
54
+
45
55
  logger = logging.get_logger(__name__) # pylint: disable=invalid-name
46
56
 
47
57
 
@@ -409,10 +419,14 @@ class TextToVideoZeroSDXLPipeline(
409
419
  feature_extractor=feature_extractor,
410
420
  )
411
421
  self.register_to_config(force_zeros_for_empty_prompt=force_zeros_for_empty_prompt)
412
- self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
422
+ self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
413
423
  self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
414
424
 
415
- self.default_sample_size = self.unet.config.sample_size
425
+ self.default_sample_size = (
426
+ self.unet.config.sample_size
427
+ if hasattr(self, "unet") and self.unet is not None and hasattr(self.unet.config, "sample_size")
428
+ else 128
429
+ )
416
430
 
417
431
  add_watermarker = add_watermarker if add_watermarker is not None else is_invisible_watermark_available()
418
432
 
@@ -705,7 +719,9 @@ class TextToVideoZeroSDXLPipeline(
705
719
  prompt_embeds = text_encoder(text_input_ids.to(device), output_hidden_states=True)
706
720
 
707
721
  # We are only ALWAYS interested in the pooled output of the final text encoder
708
- pooled_prompt_embeds = prompt_embeds[0]
722
+ if pooled_prompt_embeds is None and prompt_embeds[0].ndim == 2:
723
+ pooled_prompt_embeds = prompt_embeds[0]
724
+
709
725
  if clip_skip is None:
710
726
  prompt_embeds = prompt_embeds.hidden_states[-2]
711
727
  else:
@@ -764,8 +780,10 @@ class TextToVideoZeroSDXLPipeline(
764
780
  uncond_input.input_ids.to(device),
765
781
  output_hidden_states=True,
766
782
  )
783
+
767
784
  # We are only ALWAYS interested in the pooled output of the final text encoder
768
- negative_pooled_prompt_embeds = negative_prompt_embeds[0]
785
+ if negative_pooled_prompt_embeds is None and negative_prompt_embeds[0].ndim == 2:
786
+ negative_pooled_prompt_embeds = negative_prompt_embeds[0]
769
787
  negative_prompt_embeds = negative_prompt_embeds.hidden_states[-2]
770
788
 
771
789
  negative_prompt_embeds_list.append(negative_prompt_embeds)
@@ -922,6 +940,10 @@ class TextToVideoZeroSDXLPipeline(
922
940
  progress_bar.update()
923
941
  if callback is not None and i % callback_steps == 0:
924
942
  callback(i, t, latents)
943
+
944
+ if XLA_AVAILABLE:
945
+ xm.mark_step()
946
+
925
947
  return latents.clone().detach()
926
948
 
927
949
  @torch.no_grad()
@@ -0,0 +1,121 @@
1
+ # coding=utf-8
2
+ # Copyright 2025 The HuggingFace Inc. team.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ import contextlib
16
+ import os
17
+ import tempfile
18
+ from typing import TYPE_CHECKING, Dict
19
+
20
+ from huggingface_hub import DDUFEntry
21
+ from tqdm import tqdm
22
+
23
+ from ..utils import is_safetensors_available, is_transformers_available, is_transformers_version
24
+
25
+
26
+ if TYPE_CHECKING:
27
+ from transformers import PreTrainedModel, PreTrainedTokenizer
28
+
29
+ if is_transformers_available():
30
+ from transformers import PreTrainedModel, PreTrainedTokenizer
31
+
32
+ if is_safetensors_available():
33
+ import safetensors.torch
34
+
35
+
36
+ def _load_tokenizer_from_dduf(
37
+ cls: "PreTrainedTokenizer", name: str, dduf_entries: Dict[str, DDUFEntry], **kwargs
38
+ ) -> "PreTrainedTokenizer":
39
+ """
40
+ Load a tokenizer from a DDUF archive.
41
+
42
+ In practice, `transformers` do not provide a way to load a tokenizer from a DDUF archive. This function is a
43
+ workaround by extracting the tokenizer files from the DDUF archive and loading the tokenizer from the extracted
44
+ files. There is an extra cost of extracting the files, but of limited impact as the tokenizer files are usually
45
+ small-ish.
46
+ """
47
+ with tempfile.TemporaryDirectory() as tmp_dir:
48
+ for entry_name, entry in dduf_entries.items():
49
+ if entry_name.startswith(name + "/"):
50
+ tmp_entry_path = os.path.join(tmp_dir, *entry_name.split("/"))
51
+ # need to create intermediary directory if they don't exist
52
+ os.makedirs(os.path.dirname(tmp_entry_path), exist_ok=True)
53
+ with open(tmp_entry_path, "wb") as f:
54
+ with entry.as_mmap() as mm:
55
+ f.write(mm)
56
+ return cls.from_pretrained(os.path.dirname(tmp_entry_path), **kwargs)
57
+
58
+
59
+ def _load_transformers_model_from_dduf(
60
+ cls: "PreTrainedModel", name: str, dduf_entries: Dict[str, DDUFEntry], **kwargs
61
+ ) -> "PreTrainedModel":
62
+ """
63
+ Load a transformers model from a DDUF archive.
64
+
65
+ In practice, `transformers` do not provide a way to load a model from a DDUF archive. This function is a workaround
66
+ by instantiating a model from the config file and loading the weights from the DDUF archive directly.
67
+ """
68
+ config_file = dduf_entries.get(f"{name}/config.json")
69
+ if config_file is None:
70
+ raise EnvironmentError(
71
+ f"Could not find a config.json file for component {name} in DDUF file (contains {dduf_entries.keys()})."
72
+ )
73
+ generation_config = dduf_entries.get(f"{name}/generation_config.json", None)
74
+
75
+ weight_files = [
76
+ entry
77
+ for entry_name, entry in dduf_entries.items()
78
+ if entry_name.startswith(f"{name}/") and entry_name.endswith(".safetensors")
79
+ ]
80
+ if not weight_files:
81
+ raise EnvironmentError(
82
+ f"Could not find any weight file for component {name} in DDUF file (contains {dduf_entries.keys()})."
83
+ )
84
+ if not is_safetensors_available():
85
+ raise EnvironmentError(
86
+ "Safetensors is not available, cannot load model from DDUF. Please `pip install safetensors`."
87
+ )
88
+ if is_transformers_version("<", "4.47.0"):
89
+ raise ImportError(
90
+ "You need to install `transformers>4.47.0` in order to load a transformers model from a DDUF file. "
91
+ "You can install it with: `pip install --upgrade transformers`"
92
+ )
93
+
94
+ with tempfile.TemporaryDirectory() as tmp_dir:
95
+ from transformers import AutoConfig, GenerationConfig
96
+
97
+ tmp_config_file = os.path.join(tmp_dir, "config.json")
98
+ with open(tmp_config_file, "w") as f:
99
+ f.write(config_file.read_text())
100
+ config = AutoConfig.from_pretrained(tmp_config_file)
101
+ if generation_config is not None:
102
+ tmp_generation_config_file = os.path.join(tmp_dir, "generation_config.json")
103
+ with open(tmp_generation_config_file, "w") as f:
104
+ f.write(generation_config.read_text())
105
+ generation_config = GenerationConfig.from_pretrained(tmp_generation_config_file)
106
+ state_dict = {}
107
+ with contextlib.ExitStack() as stack:
108
+ for entry in tqdm(weight_files, desc="Loading state_dict"): # Loop over safetensors files
109
+ # Memory-map the safetensors file
110
+ mmap = stack.enter_context(entry.as_mmap())
111
+ # Load tensors from the memory-mapped file
112
+ tensors = safetensors.torch.load(mmap)
113
+ # Update the state dictionary with tensors
114
+ state_dict.update(tensors)
115
+ return cls.from_pretrained(
116
+ pretrained_model_name_or_path=None,
117
+ config=config,
118
+ generation_config=generation_config,
119
+ state_dict=state_dict,
120
+ **kwargs,
121
+ )
@@ -22,12 +22,19 @@ from transformers.models.clip.modeling_clip import CLIPTextModelOutput
22
22
 
23
23
  from ...models import PriorTransformer, UNet2DConditionModel, UNet2DModel
24
24
  from ...schedulers import UnCLIPScheduler
25
- from ...utils import logging
25
+ from ...utils import is_torch_xla_available, logging
26
26
  from ...utils.torch_utils import randn_tensor
27
27
  from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
28
28
  from .text_proj import UnCLIPTextProjModel
29
29
 
30
30
 
31
+ if is_torch_xla_available():
32
+ import torch_xla.core.xla_model as xm
33
+
34
+ XLA_AVAILABLE = True
35
+ else:
36
+ XLA_AVAILABLE = False
37
+
31
38
  logger = logging.get_logger(__name__) # pylint: disable=invalid-name
32
39
 
33
40
 
@@ -474,6 +481,9 @@ class UnCLIPPipeline(DiffusionPipeline):
474
481
  noise_pred, t, super_res_latents, prev_timestep=prev_timestep, generator=generator
475
482
  ).prev_sample
476
483
 
484
+ if XLA_AVAILABLE:
485
+ xm.mark_step()
486
+
477
487
  image = super_res_latents
478
488
  # done super res
479
489
 
@@ -27,12 +27,19 @@ from transformers import (
27
27
 
28
28
  from ...models import UNet2DConditionModel, UNet2DModel
29
29
  from ...schedulers import UnCLIPScheduler
30
- from ...utils import logging
30
+ from ...utils import is_torch_xla_available, logging
31
31
  from ...utils.torch_utils import randn_tensor
32
32
  from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
33
33
  from .text_proj import UnCLIPTextProjModel
34
34
 
35
35
 
36
+ if is_torch_xla_available():
37
+ import torch_xla.core.xla_model as xm
38
+
39
+ XLA_AVAILABLE = True
40
+ else:
41
+ XLA_AVAILABLE = False
42
+
36
43
  logger = logging.get_logger(__name__) # pylint: disable=invalid-name
37
44
 
38
45
 
@@ -400,6 +407,9 @@ class UnCLIPImageVariationPipeline(DiffusionPipeline):
400
407
  noise_pred, t, super_res_latents, prev_timestep=prev_timestep, generator=generator
401
408
  ).prev_sample
402
409
 
410
+ if XLA_AVAILABLE:
411
+ xm.mark_step()
412
+
403
413
  image = super_res_latents
404
414
 
405
415
  # done super res
@@ -18,7 +18,14 @@ from ...loaders import StableDiffusionLoraLoaderMixin, TextualInversionLoaderMix
18
18
  from ...models import AutoencoderKL
19
19
  from ...models.lora import adjust_lora_scale_text_encoder
20
20
  from ...schedulers import KarrasDiffusionSchedulers
21
- from ...utils import USE_PEFT_BACKEND, deprecate, logging, scale_lora_layers, unscale_lora_layers
21
+ from ...utils import (
22
+ USE_PEFT_BACKEND,
23
+ deprecate,
24
+ is_torch_xla_available,
25
+ logging,
26
+ scale_lora_layers,
27
+ unscale_lora_layers,
28
+ )
22
29
  from ...utils.outputs import BaseOutput
23
30
  from ...utils.torch_utils import randn_tensor
24
31
  from ..pipeline_utils import DiffusionPipeline
@@ -26,6 +33,13 @@ from .modeling_text_decoder import UniDiffuserTextDecoder
26
33
  from .modeling_uvit import UniDiffuserModel
27
34
 
28
35
 
36
+ if is_torch_xla_available():
37
+ import torch_xla.core.xla_model as xm
38
+
39
+ XLA_AVAILABLE = True
40
+ else:
41
+ XLA_AVAILABLE = False
42
+
29
43
  logger = logging.get_logger(__name__) # pylint: disable=invalid-name
30
44
 
31
45
 
@@ -117,7 +131,7 @@ class UniDiffuserPipeline(DiffusionPipeline):
117
131
  scheduler=scheduler,
118
132
  )
119
133
 
120
- self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
134
+ self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
121
135
  self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
122
136
 
123
137
  self.num_channels_latents = vae.config.latent_channels
@@ -1378,6 +1392,9 @@ class UniDiffuserPipeline(DiffusionPipeline):
1378
1392
  step_idx = i // getattr(self.scheduler, "order", 1)
1379
1393
  callback(step_idx, t, latents)
1380
1394
 
1395
+ if XLA_AVAILABLE:
1396
+ xm.mark_step()
1397
+
1381
1398
  # 9. Post-processing
1382
1399
  image = None
1383
1400
  text = None
@@ -0,0 +1,51 @@
1
+ from typing import TYPE_CHECKING
2
+
3
+ from ...utils import (
4
+ DIFFUSERS_SLOW_IMPORT,
5
+ OptionalDependencyNotAvailable,
6
+ _LazyModule,
7
+ get_objects_from_module,
8
+ is_torch_available,
9
+ is_transformers_available,
10
+ )
11
+
12
+
13
+ _dummy_objects = {}
14
+ _import_structure = {}
15
+
16
+
17
+ try:
18
+ if not (is_transformers_available() and is_torch_available()):
19
+ raise OptionalDependencyNotAvailable()
20
+ except OptionalDependencyNotAvailable:
21
+ from ...utils import dummy_torch_and_transformers_objects # noqa F403
22
+
23
+ _dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
24
+ else:
25
+ _import_structure["pipeline_wan"] = ["WanPipeline"]
26
+ _import_structure["pipeline_wan_i2v"] = ["WanImageToVideoPipeline"]
27
+ _import_structure["pipeline_wan_video2video"] = ["WanVideoToVideoPipeline"]
28
+ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
29
+ try:
30
+ if not (is_transformers_available() and is_torch_available()):
31
+ raise OptionalDependencyNotAvailable()
32
+
33
+ except OptionalDependencyNotAvailable:
34
+ from ...utils.dummy_torch_and_transformers_objects import *
35
+ else:
36
+ from .pipeline_wan import WanPipeline
37
+ from .pipeline_wan_i2v import WanImageToVideoPipeline
38
+ from .pipeline_wan_video2video import WanVideoToVideoPipeline
39
+
40
+ else:
41
+ import sys
42
+
43
+ sys.modules[__name__] = _LazyModule(
44
+ __name__,
45
+ globals()["__file__"],
46
+ _import_structure,
47
+ module_spec=__spec__,
48
+ )
49
+
50
+ for name, value in _dummy_objects.items():
51
+ setattr(sys.modules[__name__], name, value)
@@ -0,0 +1,20 @@
1
+ from dataclasses import dataclass
2
+
3
+ import torch
4
+
5
+ from diffusers.utils import BaseOutput
6
+
7
+
8
+ @dataclass
9
+ class WanPipelineOutput(BaseOutput):
10
+ r"""
11
+ Output class for Wan pipelines.
12
+
13
+ Args:
14
+ frames (`torch.Tensor`, `np.ndarray`, or List[List[PIL.Image.Image]]):
15
+ List of video outputs - It can be a nested list of length `batch_size,` with each sub-list containing
16
+ denoised PIL image sequences of length `num_frames.` It can also be a NumPy array or Torch tensor of shape
17
+ `(batch_size, num_frames, channels, height, width)`.
18
+ """
19
+
20
+ frames: torch.Tensor