diffusers 0.32.2__py3-none-any.whl → 0.33.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (389) hide show
  1. diffusers/__init__.py +186 -3
  2. diffusers/configuration_utils.py +40 -12
  3. diffusers/dependency_versions_table.py +9 -2
  4. diffusers/hooks/__init__.py +9 -0
  5. diffusers/hooks/faster_cache.py +653 -0
  6. diffusers/hooks/group_offloading.py +793 -0
  7. diffusers/hooks/hooks.py +236 -0
  8. diffusers/hooks/layerwise_casting.py +245 -0
  9. diffusers/hooks/pyramid_attention_broadcast.py +311 -0
  10. diffusers/loaders/__init__.py +6 -0
  11. diffusers/loaders/ip_adapter.py +38 -30
  12. diffusers/loaders/lora_base.py +121 -86
  13. diffusers/loaders/lora_conversion_utils.py +504 -44
  14. diffusers/loaders/lora_pipeline.py +1769 -181
  15. diffusers/loaders/peft.py +167 -57
  16. diffusers/loaders/single_file.py +17 -2
  17. diffusers/loaders/single_file_model.py +53 -5
  18. diffusers/loaders/single_file_utils.py +646 -72
  19. diffusers/loaders/textual_inversion.py +9 -9
  20. diffusers/loaders/transformer_flux.py +8 -9
  21. diffusers/loaders/transformer_sd3.py +120 -39
  22. diffusers/loaders/unet.py +20 -7
  23. diffusers/models/__init__.py +22 -0
  24. diffusers/models/activations.py +9 -9
  25. diffusers/models/attention.py +0 -1
  26. diffusers/models/attention_processor.py +163 -25
  27. diffusers/models/auto_model.py +169 -0
  28. diffusers/models/autoencoders/__init__.py +2 -0
  29. diffusers/models/autoencoders/autoencoder_asym_kl.py +2 -0
  30. diffusers/models/autoencoders/autoencoder_dc.py +106 -4
  31. diffusers/models/autoencoders/autoencoder_kl.py +0 -4
  32. diffusers/models/autoencoders/autoencoder_kl_allegro.py +5 -23
  33. diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +17 -55
  34. diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py +17 -97
  35. diffusers/models/autoencoders/autoencoder_kl_ltx.py +326 -107
  36. diffusers/models/autoencoders/autoencoder_kl_magvit.py +1094 -0
  37. diffusers/models/autoencoders/autoencoder_kl_mochi.py +21 -56
  38. diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +11 -42
  39. diffusers/models/autoencoders/autoencoder_kl_wan.py +855 -0
  40. diffusers/models/autoencoders/autoencoder_oobleck.py +1 -0
  41. diffusers/models/autoencoders/autoencoder_tiny.py +0 -4
  42. diffusers/models/autoencoders/consistency_decoder_vae.py +3 -1
  43. diffusers/models/autoencoders/vae.py +31 -141
  44. diffusers/models/autoencoders/vq_model.py +3 -0
  45. diffusers/models/cache_utils.py +108 -0
  46. diffusers/models/controlnets/__init__.py +1 -0
  47. diffusers/models/controlnets/controlnet.py +3 -8
  48. diffusers/models/controlnets/controlnet_flux.py +14 -42
  49. diffusers/models/controlnets/controlnet_sd3.py +58 -34
  50. diffusers/models/controlnets/controlnet_sparsectrl.py +4 -7
  51. diffusers/models/controlnets/controlnet_union.py +27 -18
  52. diffusers/models/controlnets/controlnet_xs.py +7 -46
  53. diffusers/models/controlnets/multicontrolnet_union.py +196 -0
  54. diffusers/models/embeddings.py +18 -7
  55. diffusers/models/model_loading_utils.py +122 -80
  56. diffusers/models/modeling_flax_pytorch_utils.py +1 -1
  57. diffusers/models/modeling_flax_utils.py +1 -1
  58. diffusers/models/modeling_pytorch_flax_utils.py +1 -1
  59. diffusers/models/modeling_utils.py +617 -272
  60. diffusers/models/normalization.py +67 -14
  61. diffusers/models/resnet.py +1 -1
  62. diffusers/models/transformers/__init__.py +6 -0
  63. diffusers/models/transformers/auraflow_transformer_2d.py +9 -35
  64. diffusers/models/transformers/cogvideox_transformer_3d.py +13 -24
  65. diffusers/models/transformers/consisid_transformer_3d.py +789 -0
  66. diffusers/models/transformers/dit_transformer_2d.py +5 -19
  67. diffusers/models/transformers/hunyuan_transformer_2d.py +4 -3
  68. diffusers/models/transformers/latte_transformer_3d.py +20 -15
  69. diffusers/models/transformers/lumina_nextdit2d.py +3 -1
  70. diffusers/models/transformers/pixart_transformer_2d.py +4 -19
  71. diffusers/models/transformers/prior_transformer.py +5 -1
  72. diffusers/models/transformers/sana_transformer.py +144 -40
  73. diffusers/models/transformers/stable_audio_transformer.py +5 -20
  74. diffusers/models/transformers/transformer_2d.py +7 -22
  75. diffusers/models/transformers/transformer_allegro.py +9 -17
  76. diffusers/models/transformers/transformer_cogview3plus.py +6 -17
  77. diffusers/models/transformers/transformer_cogview4.py +462 -0
  78. diffusers/models/transformers/transformer_easyanimate.py +527 -0
  79. diffusers/models/transformers/transformer_flux.py +68 -110
  80. diffusers/models/transformers/transformer_hunyuan_video.py +404 -46
  81. diffusers/models/transformers/transformer_ltx.py +53 -35
  82. diffusers/models/transformers/transformer_lumina2.py +548 -0
  83. diffusers/models/transformers/transformer_mochi.py +6 -17
  84. diffusers/models/transformers/transformer_omnigen.py +469 -0
  85. diffusers/models/transformers/transformer_sd3.py +56 -86
  86. diffusers/models/transformers/transformer_temporal.py +5 -11
  87. diffusers/models/transformers/transformer_wan.py +469 -0
  88. diffusers/models/unets/unet_1d.py +3 -1
  89. diffusers/models/unets/unet_2d.py +21 -20
  90. diffusers/models/unets/unet_2d_blocks.py +19 -243
  91. diffusers/models/unets/unet_2d_condition.py +4 -6
  92. diffusers/models/unets/unet_3d_blocks.py +14 -127
  93. diffusers/models/unets/unet_3d_condition.py +8 -12
  94. diffusers/models/unets/unet_i2vgen_xl.py +5 -13
  95. diffusers/models/unets/unet_kandinsky3.py +0 -4
  96. diffusers/models/unets/unet_motion_model.py +20 -114
  97. diffusers/models/unets/unet_spatio_temporal_condition.py +7 -8
  98. diffusers/models/unets/unet_stable_cascade.py +8 -35
  99. diffusers/models/unets/uvit_2d.py +1 -4
  100. diffusers/optimization.py +2 -2
  101. diffusers/pipelines/__init__.py +57 -8
  102. diffusers/pipelines/allegro/pipeline_allegro.py +22 -2
  103. diffusers/pipelines/amused/pipeline_amused.py +15 -2
  104. diffusers/pipelines/amused/pipeline_amused_img2img.py +15 -2
  105. diffusers/pipelines/amused/pipeline_amused_inpaint.py +15 -2
  106. diffusers/pipelines/animatediff/pipeline_animatediff.py +15 -2
  107. diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +15 -3
  108. diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +24 -4
  109. diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +15 -2
  110. diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +16 -4
  111. diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +16 -4
  112. diffusers/pipelines/audioldm/pipeline_audioldm.py +13 -2
  113. diffusers/pipelines/audioldm2/modeling_audioldm2.py +13 -68
  114. diffusers/pipelines/audioldm2/pipeline_audioldm2.py +39 -9
  115. diffusers/pipelines/aura_flow/pipeline_aura_flow.py +63 -7
  116. diffusers/pipelines/auto_pipeline.py +35 -14
  117. diffusers/pipelines/blip_diffusion/blip_image_processing.py +1 -1
  118. diffusers/pipelines/blip_diffusion/modeling_blip2.py +5 -8
  119. diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +12 -0
  120. diffusers/pipelines/cogvideo/pipeline_cogvideox.py +22 -6
  121. diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +22 -6
  122. diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +22 -5
  123. diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +22 -6
  124. diffusers/pipelines/cogview3/pipeline_cogview3plus.py +12 -4
  125. diffusers/pipelines/cogview4/__init__.py +49 -0
  126. diffusers/pipelines/cogview4/pipeline_cogview4.py +684 -0
  127. diffusers/pipelines/cogview4/pipeline_cogview4_control.py +732 -0
  128. diffusers/pipelines/cogview4/pipeline_output.py +21 -0
  129. diffusers/pipelines/consisid/__init__.py +49 -0
  130. diffusers/pipelines/consisid/consisid_utils.py +357 -0
  131. diffusers/pipelines/consisid/pipeline_consisid.py +974 -0
  132. diffusers/pipelines/consisid/pipeline_output.py +20 -0
  133. diffusers/pipelines/consistency_models/pipeline_consistency_models.py +11 -0
  134. diffusers/pipelines/controlnet/pipeline_controlnet.py +6 -5
  135. diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +13 -0
  136. diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +17 -5
  137. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +31 -12
  138. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +26 -7
  139. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +20 -3
  140. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +22 -3
  141. diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +26 -25
  142. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +224 -109
  143. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +25 -29
  144. diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +7 -4
  145. diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +3 -5
  146. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +121 -10
  147. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +122 -11
  148. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +12 -1
  149. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +20 -3
  150. diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py +14 -2
  151. diffusers/pipelines/ddim/pipeline_ddim.py +14 -1
  152. diffusers/pipelines/ddpm/pipeline_ddpm.py +15 -1
  153. diffusers/pipelines/deepfloyd_if/pipeline_if.py +12 -0
  154. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +12 -0
  155. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +14 -1
  156. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +12 -0
  157. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +14 -1
  158. diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +14 -1
  159. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +11 -7
  160. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +11 -7
  161. diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +1 -1
  162. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +10 -6
  163. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_onnx_stable_diffusion_inpaint_legacy.py +2 -2
  164. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +11 -7
  165. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +1 -1
  166. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +1 -1
  167. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +1 -1
  168. diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +10 -105
  169. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +1 -1
  170. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +1 -1
  171. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +1 -1
  172. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +1 -1
  173. diffusers/pipelines/dit/pipeline_dit.py +15 -2
  174. diffusers/pipelines/easyanimate/__init__.py +52 -0
  175. diffusers/pipelines/easyanimate/pipeline_easyanimate.py +770 -0
  176. diffusers/pipelines/easyanimate/pipeline_easyanimate_control.py +994 -0
  177. diffusers/pipelines/easyanimate/pipeline_easyanimate_inpaint.py +1234 -0
  178. diffusers/pipelines/easyanimate/pipeline_output.py +20 -0
  179. diffusers/pipelines/flux/pipeline_flux.py +53 -21
  180. diffusers/pipelines/flux/pipeline_flux_control.py +9 -12
  181. diffusers/pipelines/flux/pipeline_flux_control_img2img.py +6 -10
  182. diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +8 -10
  183. diffusers/pipelines/flux/pipeline_flux_controlnet.py +185 -13
  184. diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +8 -10
  185. diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +16 -16
  186. diffusers/pipelines/flux/pipeline_flux_fill.py +107 -39
  187. diffusers/pipelines/flux/pipeline_flux_img2img.py +193 -15
  188. diffusers/pipelines/flux/pipeline_flux_inpaint.py +199 -19
  189. diffusers/pipelines/free_noise_utils.py +3 -3
  190. diffusers/pipelines/hunyuan_video/__init__.py +4 -0
  191. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_skyreels_image2video.py +804 -0
  192. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +90 -23
  193. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_image2video.py +924 -0
  194. diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +3 -5
  195. diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +13 -1
  196. diffusers/pipelines/kandinsky/pipeline_kandinsky.py +12 -0
  197. diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +1 -1
  198. diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +12 -0
  199. diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +13 -1
  200. diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +12 -0
  201. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +12 -1
  202. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +13 -0
  203. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +12 -0
  204. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +12 -1
  205. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +12 -1
  206. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +12 -0
  207. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +12 -0
  208. diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +12 -0
  209. diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +12 -0
  210. diffusers/pipelines/kolors/pipeline_kolors.py +10 -8
  211. diffusers/pipelines/kolors/pipeline_kolors_img2img.py +6 -4
  212. diffusers/pipelines/kolors/text_encoder.py +7 -34
  213. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +12 -1
  214. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +13 -1
  215. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +14 -13
  216. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion_superresolution.py +12 -1
  217. diffusers/pipelines/latte/pipeline_latte.py +36 -7
  218. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +67 -13
  219. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +60 -15
  220. diffusers/pipelines/ltx/__init__.py +2 -0
  221. diffusers/pipelines/ltx/pipeline_ltx.py +25 -13
  222. diffusers/pipelines/ltx/pipeline_ltx_condition.py +1194 -0
  223. diffusers/pipelines/ltx/pipeline_ltx_image2video.py +31 -17
  224. diffusers/pipelines/lumina/__init__.py +2 -2
  225. diffusers/pipelines/lumina/pipeline_lumina.py +83 -20
  226. diffusers/pipelines/lumina2/__init__.py +48 -0
  227. diffusers/pipelines/lumina2/pipeline_lumina2.py +790 -0
  228. diffusers/pipelines/marigold/__init__.py +2 -0
  229. diffusers/pipelines/marigold/marigold_image_processing.py +127 -14
  230. diffusers/pipelines/marigold/pipeline_marigold_depth.py +31 -16
  231. diffusers/pipelines/marigold/pipeline_marigold_intrinsics.py +721 -0
  232. diffusers/pipelines/marigold/pipeline_marigold_normals.py +31 -16
  233. diffusers/pipelines/mochi/pipeline_mochi.py +14 -18
  234. diffusers/pipelines/musicldm/pipeline_musicldm.py +16 -1
  235. diffusers/pipelines/omnigen/__init__.py +50 -0
  236. diffusers/pipelines/omnigen/pipeline_omnigen.py +512 -0
  237. diffusers/pipelines/omnigen/processor_omnigen.py +327 -0
  238. diffusers/pipelines/onnx_utils.py +5 -3
  239. diffusers/pipelines/pag/pag_utils.py +1 -1
  240. diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +12 -1
  241. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +15 -4
  242. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +20 -3
  243. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +20 -3
  244. diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +1 -3
  245. diffusers/pipelines/pag/pipeline_pag_kolors.py +6 -4
  246. diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +16 -3
  247. diffusers/pipelines/pag/pipeline_pag_sana.py +65 -8
  248. diffusers/pipelines/pag/pipeline_pag_sd.py +23 -7
  249. diffusers/pipelines/pag/pipeline_pag_sd_3.py +3 -5
  250. diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py +3 -5
  251. diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +13 -1
  252. diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +23 -7
  253. diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py +26 -10
  254. diffusers/pipelines/pag/pipeline_pag_sd_xl.py +12 -4
  255. diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +7 -3
  256. diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +10 -6
  257. diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +13 -3
  258. diffusers/pipelines/pia/pipeline_pia.py +13 -1
  259. diffusers/pipelines/pipeline_flax_utils.py +7 -7
  260. diffusers/pipelines/pipeline_loading_utils.py +193 -83
  261. diffusers/pipelines/pipeline_utils.py +221 -106
  262. diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +17 -5
  263. diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +17 -4
  264. diffusers/pipelines/sana/__init__.py +2 -0
  265. diffusers/pipelines/sana/pipeline_sana.py +183 -58
  266. diffusers/pipelines/sana/pipeline_sana_sprint.py +889 -0
  267. diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +12 -2
  268. diffusers/pipelines/shap_e/pipeline_shap_e.py +12 -0
  269. diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +12 -0
  270. diffusers/pipelines/shap_e/renderer.py +6 -6
  271. diffusers/pipelines/stable_audio/pipeline_stable_audio.py +1 -1
  272. diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +15 -4
  273. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +12 -8
  274. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +12 -1
  275. diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +3 -2
  276. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +14 -10
  277. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_img2img.py +3 -3
  278. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_inpaint.py +14 -10
  279. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +2 -2
  280. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py +4 -3
  281. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +5 -4
  282. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +2 -2
  283. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +18 -13
  284. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +30 -8
  285. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +24 -10
  286. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +28 -12
  287. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +39 -18
  288. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +17 -6
  289. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +13 -3
  290. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +20 -3
  291. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +14 -2
  292. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +13 -1
  293. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +16 -17
  294. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +136 -18
  295. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +150 -21
  296. diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +15 -3
  297. diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +26 -11
  298. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +15 -3
  299. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +22 -4
  300. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +30 -13
  301. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +12 -4
  302. diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +15 -3
  303. diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +15 -3
  304. diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +26 -12
  305. diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +16 -4
  306. diffusers/pipelines/stable_diffusion_xl/pipeline_flax_stable_diffusion_xl.py +1 -1
  307. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +12 -4
  308. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +7 -3
  309. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +10 -6
  310. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +11 -4
  311. diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +13 -2
  312. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +18 -4
  313. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +26 -5
  314. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +13 -1
  315. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +13 -1
  316. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +28 -6
  317. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +26 -4
  318. diffusers/pipelines/transformers_loading_utils.py +121 -0
  319. diffusers/pipelines/unclip/pipeline_unclip.py +11 -1
  320. diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +11 -1
  321. diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +19 -2
  322. diffusers/pipelines/wan/__init__.py +51 -0
  323. diffusers/pipelines/wan/pipeline_output.py +20 -0
  324. diffusers/pipelines/wan/pipeline_wan.py +595 -0
  325. diffusers/pipelines/wan/pipeline_wan_i2v.py +724 -0
  326. diffusers/pipelines/wan/pipeline_wan_video2video.py +727 -0
  327. diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +7 -31
  328. diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +12 -1
  329. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +12 -1
  330. diffusers/quantizers/auto.py +5 -1
  331. diffusers/quantizers/base.py +5 -9
  332. diffusers/quantizers/bitsandbytes/bnb_quantizer.py +41 -29
  333. diffusers/quantizers/bitsandbytes/utils.py +30 -20
  334. diffusers/quantizers/gguf/gguf_quantizer.py +1 -0
  335. diffusers/quantizers/gguf/utils.py +4 -2
  336. diffusers/quantizers/quantization_config.py +59 -4
  337. diffusers/quantizers/quanto/__init__.py +1 -0
  338. diffusers/quantizers/quanto/quanto_quantizer.py +177 -0
  339. diffusers/quantizers/quanto/utils.py +60 -0
  340. diffusers/quantizers/torchao/__init__.py +1 -1
  341. diffusers/quantizers/torchao/torchao_quantizer.py +47 -2
  342. diffusers/schedulers/__init__.py +2 -1
  343. diffusers/schedulers/scheduling_consistency_models.py +1 -2
  344. diffusers/schedulers/scheduling_ddim_inverse.py +1 -1
  345. diffusers/schedulers/scheduling_ddpm.py +2 -3
  346. diffusers/schedulers/scheduling_ddpm_parallel.py +1 -2
  347. diffusers/schedulers/scheduling_dpmsolver_multistep.py +12 -4
  348. diffusers/schedulers/scheduling_edm_euler.py +45 -10
  349. diffusers/schedulers/scheduling_flow_match_euler_discrete.py +116 -28
  350. diffusers/schedulers/scheduling_flow_match_heun_discrete.py +7 -6
  351. diffusers/schedulers/scheduling_heun_discrete.py +1 -1
  352. diffusers/schedulers/scheduling_lcm.py +1 -2
  353. diffusers/schedulers/scheduling_lms_discrete.py +1 -1
  354. diffusers/schedulers/scheduling_repaint.py +5 -1
  355. diffusers/schedulers/scheduling_scm.py +265 -0
  356. diffusers/schedulers/scheduling_tcd.py +1 -2
  357. diffusers/schedulers/scheduling_utils.py +2 -1
  358. diffusers/training_utils.py +14 -7
  359. diffusers/utils/__init__.py +9 -1
  360. diffusers/utils/constants.py +13 -1
  361. diffusers/utils/deprecation_utils.py +1 -1
  362. diffusers/utils/dummy_bitsandbytes_objects.py +17 -0
  363. diffusers/utils/dummy_gguf_objects.py +17 -0
  364. diffusers/utils/dummy_optimum_quanto_objects.py +17 -0
  365. diffusers/utils/dummy_pt_objects.py +233 -0
  366. diffusers/utils/dummy_torch_and_transformers_and_opencv_objects.py +17 -0
  367. diffusers/utils/dummy_torch_and_transformers_objects.py +270 -0
  368. diffusers/utils/dummy_torchao_objects.py +17 -0
  369. diffusers/utils/dynamic_modules_utils.py +1 -1
  370. diffusers/utils/export_utils.py +28 -3
  371. diffusers/utils/hub_utils.py +52 -102
  372. diffusers/utils/import_utils.py +121 -221
  373. diffusers/utils/loading_utils.py +2 -1
  374. diffusers/utils/logging.py +1 -2
  375. diffusers/utils/peft_utils.py +6 -14
  376. diffusers/utils/remote_utils.py +425 -0
  377. diffusers/utils/source_code_parsing_utils.py +52 -0
  378. diffusers/utils/state_dict_utils.py +15 -1
  379. diffusers/utils/testing_utils.py +243 -13
  380. diffusers/utils/torch_utils.py +10 -0
  381. diffusers/utils/typing_utils.py +91 -0
  382. diffusers/video_processor.py +1 -1
  383. {diffusers-0.32.2.dist-info → diffusers-0.33.1.dist-info}/METADATA +21 -4
  384. diffusers-0.33.1.dist-info/RECORD +608 -0
  385. {diffusers-0.32.2.dist-info → diffusers-0.33.1.dist-info}/WHEEL +1 -1
  386. diffusers-0.32.2.dist-info/RECORD +0 -550
  387. {diffusers-0.32.2.dist-info → diffusers-0.33.1.dist-info}/LICENSE +0 -0
  388. {diffusers-0.32.2.dist-info → diffusers-0.33.1.dist-info}/entry_points.txt +0 -0
  389. {diffusers-0.32.2.dist-info → diffusers-0.33.1.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,5 @@
1
1
  # coding=utf-8
2
- # Copyright 2024 The HuggingFace Inc. team.
2
+ # Copyright 2025 The HuggingFace Inc. team.
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
5
  # you may not use this file except in compliance with the License.
@@ -22,6 +22,7 @@ from ..models.controlnets import ControlNetUnionModel
22
22
  from ..utils import is_sentencepiece_available
23
23
  from .aura_flow import AuraFlowPipeline
24
24
  from .cogview3 import CogView3PlusPipeline
25
+ from .cogview4 import CogView4ControlPipeline, CogView4Pipeline
25
26
  from .controlnet import (
26
27
  StableDiffusionControlNetImg2ImgPipeline,
27
28
  StableDiffusionControlNetInpaintPipeline,
@@ -33,6 +34,10 @@ from .controlnet import (
33
34
  StableDiffusionXLControlNetUnionInpaintPipeline,
34
35
  StableDiffusionXLControlNetUnionPipeline,
35
36
  )
37
+ from .controlnet_sd3 import (
38
+ StableDiffusion3ControlNetInpaintingPipeline,
39
+ StableDiffusion3ControlNetPipeline,
40
+ )
36
41
  from .deepfloyd_if import IFImg2ImgPipeline, IFInpaintingPipeline, IFPipeline
37
42
  from .flux import (
38
43
  FluxControlImg2ImgPipeline,
@@ -64,10 +69,12 @@ from .kandinsky2_2 import (
64
69
  )
65
70
  from .kandinsky3 import Kandinsky3Img2ImgPipeline, Kandinsky3Pipeline
66
71
  from .latent_consistency_models import LatentConsistencyModelImg2ImgPipeline, LatentConsistencyModelPipeline
67
- from .lumina import LuminaText2ImgPipeline
72
+ from .lumina import LuminaPipeline
73
+ from .lumina2 import Lumina2Pipeline
68
74
  from .pag import (
69
75
  HunyuanDiTPAGPipeline,
70
76
  PixArtSigmaPAGPipeline,
77
+ SanaPAGPipeline,
71
78
  StableDiffusion3PAGImg2ImgPipeline,
72
79
  StableDiffusion3PAGPipeline,
73
80
  StableDiffusionControlNetPAGInpaintPipeline,
@@ -82,6 +89,7 @@ from .pag import (
82
89
  StableDiffusionXLPAGPipeline,
83
90
  )
84
91
  from .pixart_alpha import PixArtAlphaPipeline, PixArtSigmaPipeline
92
+ from .sana import SanaPipeline
85
93
  from .stable_cascade import StableCascadeCombinedPipeline, StableCascadeDecoderPipeline
86
94
  from .stable_diffusion import (
87
95
  StableDiffusionImg2ImgPipeline,
@@ -116,11 +124,14 @@ AUTO_TEXT2IMAGE_PIPELINES_MAPPING = OrderedDict(
116
124
  ("stable-diffusion-controlnet", StableDiffusionControlNetPipeline),
117
125
  ("stable-diffusion-xl-controlnet", StableDiffusionXLControlNetPipeline),
118
126
  ("stable-diffusion-xl-controlnet-union", StableDiffusionXLControlNetUnionPipeline),
127
+ ("stable-diffusion-3-controlnet", StableDiffusion3ControlNetPipeline),
119
128
  ("wuerstchen", WuerstchenCombinedPipeline),
120
129
  ("cascade", StableCascadeCombinedPipeline),
121
130
  ("lcm", LatentConsistencyModelPipeline),
122
131
  ("pixart-alpha", PixArtAlphaPipeline),
123
132
  ("pixart-sigma", PixArtSigmaPipeline),
133
+ ("sana", SanaPipeline),
134
+ ("sana-pag", SanaPAGPipeline),
124
135
  ("stable-diffusion-pag", StableDiffusionPAGPipeline),
125
136
  ("stable-diffusion-controlnet-pag", StableDiffusionControlNetPAGPipeline),
126
137
  ("stable-diffusion-xl-pag", StableDiffusionXLPAGPipeline),
@@ -130,8 +141,11 @@ AUTO_TEXT2IMAGE_PIPELINES_MAPPING = OrderedDict(
130
141
  ("flux", FluxPipeline),
131
142
  ("flux-control", FluxControlPipeline),
132
143
  ("flux-controlnet", FluxControlNetPipeline),
133
- ("lumina", LuminaText2ImgPipeline),
144
+ ("lumina", LuminaPipeline),
145
+ ("lumina2", Lumina2Pipeline),
134
146
  ("cogview3", CogView3PlusPipeline),
147
+ ("cogview4", CogView4Pipeline),
148
+ ("cogview4-control", CogView4ControlPipeline),
135
149
  ]
136
150
  )
137
151
 
@@ -170,6 +184,7 @@ AUTO_INPAINT_PIPELINES_MAPPING = OrderedDict(
170
184
  ("stable-diffusion-controlnet-pag", StableDiffusionControlNetPAGInpaintPipeline),
171
185
  ("stable-diffusion-xl-controlnet", StableDiffusionXLControlNetInpaintPipeline),
172
186
  ("stable-diffusion-xl-controlnet-union", StableDiffusionXLControlNetUnionInpaintPipeline),
187
+ ("stable-diffusion-3-controlnet", StableDiffusion3ControlNetInpaintingPipeline),
173
188
  ("stable-diffusion-xl-pag", StableDiffusionXLPAGInpaintPipeline),
174
189
  ("flux", FluxInpaintPipeline),
175
190
  ("flux-controlnet", FluxControlNetInpaintPipeline),
@@ -293,7 +308,7 @@ class AutoPipelineForText2Image(ConfigMixin):
293
308
  If you get the error message below, you need to finetune the weights for your downstream task:
294
309
 
295
310
  ```
296
- Some weights of UNet2DConditionModel were not initialized from the model checkpoint at runwayml/stable-diffusion-v1-5 and are newly initialized because the shapes did not match:
311
+ Some weights of UNet2DConditionModel were not initialized from the model checkpoint at stable-diffusion-v1-5/stable-diffusion-v1-5 and are newly initialized because the shapes did not match:
297
312
  - conv_in.weight: found shape torch.Size([320, 4, 3, 3]) in the checkpoint and torch.Size([320, 9, 3, 3]) in the model instantiated
298
313
  You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
299
314
  ```
@@ -385,7 +400,7 @@ class AutoPipelineForText2Image(ConfigMixin):
385
400
  ```py
386
401
  >>> from diffusers import AutoPipelineForText2Image
387
402
 
388
- >>> pipeline = AutoPipelineForText2Image.from_pretrained("runwayml/stable-diffusion-v1-5")
403
+ >>> pipeline = AutoPipelineForText2Image.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5")
389
404
  >>> image = pipeline(prompt).images[0]
390
405
  ```
391
406
  """
@@ -448,7 +463,7 @@ class AutoPipelineForText2Image(ConfigMixin):
448
463
  >>> from diffusers import AutoPipelineForText2Image, AutoPipelineForImage2Image
449
464
 
450
465
  >>> pipe_i2i = AutoPipelineForImage2Image.from_pretrained(
451
- ... "runwayml/stable-diffusion-v1-5", requires_safety_checker=False
466
+ ... "stable-diffusion-v1-5/stable-diffusion-v1-5", requires_safety_checker=False
452
467
  ... )
453
468
 
454
469
  >>> pipe_t2i = AutoPipelineForText2Image.from_pipe(pipe_i2i)
@@ -528,7 +543,9 @@ class AutoPipelineForText2Image(ConfigMixin):
528
543
  if k not in text_2_image_kwargs
529
544
  }
530
545
 
531
- missing_modules = set(expected_modules) - set(pipeline._optional_components) - set(text_2_image_kwargs.keys())
546
+ missing_modules = (
547
+ set(expected_modules) - set(text_2_image_cls._optional_components) - set(text_2_image_kwargs.keys())
548
+ )
532
549
 
533
550
  if len(missing_modules) > 0:
534
551
  raise ValueError(
@@ -587,7 +604,7 @@ class AutoPipelineForImage2Image(ConfigMixin):
587
604
  If you get the error message below, you need to finetune the weights for your downstream task:
588
605
 
589
606
  ```
590
- Some weights of UNet2DConditionModel were not initialized from the model checkpoint at runwayml/stable-diffusion-v1-5 and are newly initialized because the shapes did not match:
607
+ Some weights of UNet2DConditionModel were not initialized from the model checkpoint at stable-diffusion-v1-5/stable-diffusion-v1-5 and are newly initialized because the shapes did not match:
591
608
  - conv_in.weight: found shape torch.Size([320, 4, 3, 3]) in the checkpoint and torch.Size([320, 9, 3, 3]) in the model instantiated
592
609
  You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
593
610
  ```
@@ -679,7 +696,7 @@ class AutoPipelineForImage2Image(ConfigMixin):
679
696
  ```py
680
697
  >>> from diffusers import AutoPipelineForImage2Image
681
698
 
682
- >>> pipeline = AutoPipelineForImage2Image.from_pretrained("runwayml/stable-diffusion-v1-5")
699
+ >>> pipeline = AutoPipelineForImage2Image.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5")
683
700
  >>> image = pipeline(prompt, image).images[0]
684
701
  ```
685
702
  """
@@ -754,7 +771,7 @@ class AutoPipelineForImage2Image(ConfigMixin):
754
771
  >>> from diffusers import AutoPipelineForText2Image, AutoPipelineForImage2Image
755
772
 
756
773
  >>> pipe_t2i = AutoPipelineForText2Image.from_pretrained(
757
- ... "runwayml/stable-diffusion-v1-5", requires_safety_checker=False
774
+ ... "stable-diffusion-v1-5/stable-diffusion-v1-5", requires_safety_checker=False
758
775
  ... )
759
776
 
760
777
  >>> pipe_i2i = AutoPipelineForImage2Image.from_pipe(pipe_t2i)
@@ -838,7 +855,9 @@ class AutoPipelineForImage2Image(ConfigMixin):
838
855
  if k not in image_2_image_kwargs
839
856
  }
840
857
 
841
- missing_modules = set(expected_modules) - set(pipeline._optional_components) - set(image_2_image_kwargs.keys())
858
+ missing_modules = (
859
+ set(expected_modules) - set(image_2_image_cls._optional_components) - set(image_2_image_kwargs.keys())
860
+ )
842
861
 
843
862
  if len(missing_modules) > 0:
844
863
  raise ValueError(
@@ -896,7 +915,7 @@ class AutoPipelineForInpainting(ConfigMixin):
896
915
  If you get the error message below, you need to finetune the weights for your downstream task:
897
916
 
898
917
  ```
899
- Some weights of UNet2DConditionModel were not initialized from the model checkpoint at runwayml/stable-diffusion-v1-5 and are newly initialized because the shapes did not match:
918
+ Some weights of UNet2DConditionModel were not initialized from the model checkpoint at stable-diffusion-v1-5/stable-diffusion-v1-5 and are newly initialized because the shapes did not match:
900
919
  - conv_in.weight: found shape torch.Size([320, 4, 3, 3]) in the checkpoint and torch.Size([320, 9, 3, 3]) in the model instantiated
901
920
  You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
902
921
  ```
@@ -988,7 +1007,7 @@ class AutoPipelineForInpainting(ConfigMixin):
988
1007
  ```py
989
1008
  >>> from diffusers import AutoPipelineForInpainting
990
1009
 
991
- >>> pipeline = AutoPipelineForInpainting.from_pretrained("runwayml/stable-diffusion-v1-5")
1010
+ >>> pipeline = AutoPipelineForInpainting.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5")
992
1011
  >>> image = pipeline(prompt, image=init_image, mask_image=mask_image).images[0]
993
1012
  ```
994
1013
  """
@@ -1141,7 +1160,9 @@ class AutoPipelineForInpainting(ConfigMixin):
1141
1160
  if k not in inpainting_kwargs
1142
1161
  }
1143
1162
 
1144
- missing_modules = set(expected_modules) - set(pipeline._optional_components) - set(inpainting_kwargs.keys())
1163
+ missing_modules = (
1164
+ set(expected_modules) - set(inpainting_cls._optional_components) - set(inpainting_kwargs.keys())
1165
+ )
1145
1166
 
1146
1167
  if len(missing_modules) > 0:
1147
1168
  raise ValueError(
@@ -1,5 +1,5 @@
1
1
  # coding=utf-8
2
- # Copyright 2024 The HuggingFace Inc. team. All rights reserved.
2
+ # Copyright 2025 The HuggingFace Inc. team. All rights reserved.
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
5
5
  # you may not use this file except in compliance with the License.
@@ -174,19 +174,16 @@ class Blip2QFormerEncoder(nn.Module):
174
174
  )
175
175
  use_cache = False
176
176
 
177
- def create_custom_forward(module):
178
- def custom_forward(*inputs):
179
- return module(*inputs, past_key_value, output_attentions, query_length)
180
-
181
- return custom_forward
182
-
183
- layer_outputs = torch.utils.checkpoint.checkpoint(
184
- create_custom_forward(layer_module),
177
+ layer_outputs = self._gradient_checkpointing_func(
178
+ layer_module,
185
179
  hidden_states,
186
180
  attention_mask,
187
181
  layer_head_mask,
188
182
  encoder_hidden_states,
189
183
  encoder_attention_mask,
184
+ past_key_value,
185
+ output_attentions,
186
+ query_length,
190
187
  )
191
188
  else:
192
189
  layer_outputs = layer_module(
@@ -20,6 +20,7 @@ from transformers import CLIPTokenizer
20
20
  from ...models import AutoencoderKL, UNet2DConditionModel
21
21
  from ...schedulers import PNDMScheduler
22
22
  from ...utils import (
23
+ is_torch_xla_available,
23
24
  logging,
24
25
  replace_example_docstring,
25
26
  )
@@ -30,8 +31,16 @@ from .modeling_blip2 import Blip2QFormerModel
30
31
  from .modeling_ctx_clip import ContextCLIPTextModel
31
32
 
32
33
 
34
+ if is_torch_xla_available():
35
+ import torch_xla.core.xla_model as xm
36
+
37
+ XLA_AVAILABLE = True
38
+ else:
39
+ XLA_AVAILABLE = False
40
+
33
41
  logger = logging.get_logger(__name__) # pylint: disable=invalid-name
34
42
 
43
+
35
44
  EXAMPLE_DOC_STRING = """
36
45
  Examples:
37
46
  ```py
@@ -336,6 +345,9 @@ class BlipDiffusionPipeline(DiffusionPipeline):
336
345
  latents,
337
346
  )["prev_sample"]
338
347
 
348
+ if XLA_AVAILABLE:
349
+ xm.mark_step()
350
+
339
351
  image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False)[0]
340
352
  image = self.image_processor.postprocess(image, output_type=output_type)
341
353
 
@@ -26,12 +26,19 @@ from ...models import AutoencoderKLCogVideoX, CogVideoXTransformer3DModel
26
26
  from ...models.embeddings import get_3d_rotary_pos_embed
27
27
  from ...pipelines.pipeline_utils import DiffusionPipeline
28
28
  from ...schedulers import CogVideoXDDIMScheduler, CogVideoXDPMScheduler
29
- from ...utils import logging, replace_example_docstring
29
+ from ...utils import is_torch_xla_available, logging, replace_example_docstring
30
30
  from ...utils.torch_utils import randn_tensor
31
31
  from ...video_processor import VideoProcessor
32
32
  from .pipeline_output import CogVideoXPipelineOutput
33
33
 
34
34
 
35
+ if is_torch_xla_available():
36
+ import torch_xla.core.xla_model as xm
37
+
38
+ XLA_AVAILABLE = True
39
+ else:
40
+ XLA_AVAILABLE = False
41
+
35
42
  logger = logging.get_logger(__name__) # pylint: disable=invalid-name
36
43
 
37
44
 
@@ -183,14 +190,12 @@ class CogVideoXPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin):
183
190
  tokenizer=tokenizer, text_encoder=text_encoder, vae=vae, transformer=transformer, scheduler=scheduler
184
191
  )
185
192
  self.vae_scale_factor_spatial = (
186
- 2 ** (len(self.vae.config.block_out_channels) - 1) if hasattr(self, "vae") and self.vae is not None else 8
193
+ 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
187
194
  )
188
195
  self.vae_scale_factor_temporal = (
189
- self.vae.config.temporal_compression_ratio if hasattr(self, "vae") and self.vae is not None else 4
190
- )
191
- self.vae_scaling_factor_image = (
192
- self.vae.config.scaling_factor if hasattr(self, "vae") and self.vae is not None else 0.7
196
+ self.vae.config.temporal_compression_ratio if getattr(self, "vae", None) else 4
193
197
  )
198
+ self.vae_scaling_factor_image = self.vae.config.scaling_factor if getattr(self, "vae", None) else 0.7
194
199
 
195
200
  self.video_processor = VideoProcessor(vae_scale_factor=self.vae_scale_factor_spatial)
196
201
 
@@ -489,6 +494,10 @@ class CogVideoXPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin):
489
494
  def attention_kwargs(self):
490
495
  return self._attention_kwargs
491
496
 
497
+ @property
498
+ def current_timestep(self):
499
+ return self._current_timestep
500
+
492
501
  @property
493
502
  def interrupt(self):
494
503
  return self._interrupt
@@ -622,6 +631,7 @@ class CogVideoXPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin):
622
631
  )
623
632
  self._guidance_scale = guidance_scale
624
633
  self._attention_kwargs = attention_kwargs
634
+ self._current_timestep = None
625
635
  self._interrupt = False
626
636
 
627
637
  # 2. Default call parameters
@@ -700,6 +710,7 @@ class CogVideoXPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin):
700
710
  if self.interrupt:
701
711
  continue
702
712
 
713
+ self._current_timestep = t
703
714
  latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents
704
715
  latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
705
716
 
@@ -755,6 +766,11 @@ class CogVideoXPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin):
755
766
  if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
756
767
  progress_bar.update()
757
768
 
769
+ if XLA_AVAILABLE:
770
+ xm.mark_step()
771
+
772
+ self._current_timestep = None
773
+
758
774
  if not output_type == "latent":
759
775
  # Discard any padding frames that were added for CogVideoX 1.5
760
776
  latents = latents[:, additional_frames:]
@@ -27,12 +27,19 @@ from ...models import AutoencoderKLCogVideoX, CogVideoXTransformer3DModel
27
27
  from ...models.embeddings import get_3d_rotary_pos_embed
28
28
  from ...pipelines.pipeline_utils import DiffusionPipeline
29
29
  from ...schedulers import KarrasDiffusionSchedulers
30
- from ...utils import logging, replace_example_docstring
30
+ from ...utils import is_torch_xla_available, logging, replace_example_docstring
31
31
  from ...utils.torch_utils import randn_tensor
32
32
  from ...video_processor import VideoProcessor
33
33
  from .pipeline_output import CogVideoXPipelineOutput
34
34
 
35
35
 
36
+ if is_torch_xla_available():
37
+ import torch_xla.core.xla_model as xm
38
+
39
+ XLA_AVAILABLE = True
40
+ else:
41
+ XLA_AVAILABLE = False
42
+
36
43
  logger = logging.get_logger(__name__) # pylint: disable=invalid-name
37
44
 
38
45
 
@@ -190,14 +197,12 @@ class CogVideoXFunControlPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin):
190
197
  tokenizer=tokenizer, text_encoder=text_encoder, vae=vae, transformer=transformer, scheduler=scheduler
191
198
  )
192
199
  self.vae_scale_factor_spatial = (
193
- 2 ** (len(self.vae.config.block_out_channels) - 1) if hasattr(self, "vae") and self.vae is not None else 8
200
+ 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
194
201
  )
195
202
  self.vae_scale_factor_temporal = (
196
- self.vae.config.temporal_compression_ratio if hasattr(self, "vae") and self.vae is not None else 4
197
- )
198
- self.vae_scaling_factor_image = (
199
- self.vae.config.scaling_factor if hasattr(self, "vae") and self.vae is not None else 0.7
203
+ self.vae.config.temporal_compression_ratio if getattr(self, "vae", None) else 4
200
204
  )
205
+ self.vae_scaling_factor_image = self.vae.config.scaling_factor if getattr(self, "vae", None) else 0.7
201
206
 
202
207
  self.video_processor = VideoProcessor(vae_scale_factor=self.vae_scale_factor_spatial)
203
208
 
@@ -535,6 +540,10 @@ class CogVideoXFunControlPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin):
535
540
  def attention_kwargs(self):
536
541
  return self._attention_kwargs
537
542
 
543
+ @property
544
+ def current_timestep(self):
545
+ return self._current_timestep
546
+
538
547
  @property
539
548
  def interrupt(self):
540
549
  return self._interrupt
@@ -675,6 +684,7 @@ class CogVideoXFunControlPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin):
675
684
  )
676
685
  self._guidance_scale = guidance_scale
677
686
  self._attention_kwargs = attention_kwargs
687
+ self._current_timestep = None
678
688
  self._interrupt = False
679
689
 
680
690
  # 2. Default call parameters
@@ -761,6 +771,7 @@ class CogVideoXFunControlPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin):
761
771
  if self.interrupt:
762
772
  continue
763
773
 
774
+ self._current_timestep = t
764
775
  latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents
765
776
  latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
766
777
 
@@ -810,6 +821,11 @@ class CogVideoXFunControlPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin):
810
821
  if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
811
822
  progress_bar.update()
812
823
 
824
+ if XLA_AVAILABLE:
825
+ xm.mark_step()
826
+
827
+ self._current_timestep = None
828
+
813
829
  if not output_type == "latent":
814
830
  video = self.decode_latents(latents)
815
831
  video = self.video_processor.postprocess_video(video=video, output_type=output_type)
@@ -29,6 +29,7 @@ from ...models.embeddings import get_3d_rotary_pos_embed
29
29
  from ...pipelines.pipeline_utils import DiffusionPipeline
30
30
  from ...schedulers import CogVideoXDDIMScheduler, CogVideoXDPMScheduler
31
31
  from ...utils import (
32
+ is_torch_xla_available,
32
33
  logging,
33
34
  replace_example_docstring,
34
35
  )
@@ -37,6 +38,13 @@ from ...video_processor import VideoProcessor
37
38
  from .pipeline_output import CogVideoXPipelineOutput
38
39
 
39
40
 
41
+ if is_torch_xla_available():
42
+ import torch_xla.core.xla_model as xm
43
+
44
+ XLA_AVAILABLE = True
45
+ else:
46
+ XLA_AVAILABLE = False
47
+
40
48
  logger = logging.get_logger(__name__) # pylint: disable=invalid-name
41
49
 
42
50
 
@@ -203,14 +211,12 @@ class CogVideoXImageToVideoPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin)
203
211
  scheduler=scheduler,
204
212
  )
205
213
  self.vae_scale_factor_spatial = (
206
- 2 ** (len(self.vae.config.block_out_channels) - 1) if hasattr(self, "vae") and self.vae is not None else 8
214
+ 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
207
215
  )
208
216
  self.vae_scale_factor_temporal = (
209
- self.vae.config.temporal_compression_ratio if hasattr(self, "vae") and self.vae is not None else 4
210
- )
211
- self.vae_scaling_factor_image = (
212
- self.vae.config.scaling_factor if hasattr(self, "vae") and self.vae is not None else 0.7
217
+ self.vae.config.temporal_compression_ratio if getattr(self, "vae", None) else 4
213
218
  )
219
+ self.vae_scaling_factor_image = self.vae.config.scaling_factor if getattr(self, "vae", None) else 0.7
214
220
 
215
221
  self.video_processor = VideoProcessor(vae_scale_factor=self.vae_scale_factor_spatial)
216
222
 
@@ -585,6 +591,10 @@ class CogVideoXImageToVideoPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin)
585
591
  def attention_kwargs(self):
586
592
  return self._attention_kwargs
587
593
 
594
+ @property
595
+ def current_timestep(self):
596
+ return self._current_timestep
597
+
588
598
  @property
589
599
  def interrupt(self):
590
600
  return self._interrupt
@@ -722,6 +732,7 @@ class CogVideoXImageToVideoPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin)
722
732
  negative_prompt_embeds=negative_prompt_embeds,
723
733
  )
724
734
  self._guidance_scale = guidance_scale
735
+ self._current_timestep = None
725
736
  self._attention_kwargs = attention_kwargs
726
737
  self._interrupt = False
727
738
 
@@ -809,6 +820,7 @@ class CogVideoXImageToVideoPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin)
809
820
  if self.interrupt:
810
821
  continue
811
822
 
823
+ self._current_timestep = t
812
824
  latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents
813
825
  latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
814
826
 
@@ -868,6 +880,11 @@ class CogVideoXImageToVideoPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin)
868
880
  if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
869
881
  progress_bar.update()
870
882
 
883
+ if XLA_AVAILABLE:
884
+ xm.mark_step()
885
+
886
+ self._current_timestep = None
887
+
871
888
  if not output_type == "latent":
872
889
  # Discard any padding frames that were added for CogVideoX 1.5
873
890
  latents = latents[:, additional_frames:]
@@ -27,12 +27,19 @@ from ...models import AutoencoderKLCogVideoX, CogVideoXTransformer3DModel
27
27
  from ...models.embeddings import get_3d_rotary_pos_embed
28
28
  from ...pipelines.pipeline_utils import DiffusionPipeline
29
29
  from ...schedulers import CogVideoXDDIMScheduler, CogVideoXDPMScheduler
30
- from ...utils import logging, replace_example_docstring
30
+ from ...utils import is_torch_xla_available, logging, replace_example_docstring
31
31
  from ...utils.torch_utils import randn_tensor
32
32
  from ...video_processor import VideoProcessor
33
33
  from .pipeline_output import CogVideoXPipelineOutput
34
34
 
35
35
 
36
+ if is_torch_xla_available():
37
+ import torch_xla.core.xla_model as xm
38
+
39
+ XLA_AVAILABLE = True
40
+ else:
41
+ XLA_AVAILABLE = False
42
+
36
43
  logger = logging.get_logger(__name__) # pylint: disable=invalid-name
37
44
 
38
45
 
@@ -206,14 +213,12 @@ class CogVideoXVideoToVideoPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin)
206
213
  )
207
214
 
208
215
  self.vae_scale_factor_spatial = (
209
- 2 ** (len(self.vae.config.block_out_channels) - 1) if hasattr(self, "vae") and self.vae is not None else 8
216
+ 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
210
217
  )
211
218
  self.vae_scale_factor_temporal = (
212
- self.vae.config.temporal_compression_ratio if hasattr(self, "vae") and self.vae is not None else 4
213
- )
214
- self.vae_scaling_factor_image = (
215
- self.vae.config.scaling_factor if hasattr(self, "vae") and self.vae is not None else 0.7
219
+ self.vae.config.temporal_compression_ratio if getattr(self, "vae", None) else 4
216
220
  )
221
+ self.vae_scaling_factor_image = self.vae.config.scaling_factor if getattr(self, "vae", None) else 0.7
217
222
 
218
223
  self.video_processor = VideoProcessor(vae_scale_factor=self.vae_scale_factor_spatial)
219
224
 
@@ -559,6 +564,10 @@ class CogVideoXVideoToVideoPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin)
559
564
  def attention_kwargs(self):
560
565
  return self._attention_kwargs
561
566
 
567
+ @property
568
+ def current_timestep(self):
569
+ return self._current_timestep
570
+
562
571
  @property
563
572
  def interrupt(self):
564
573
  return self._interrupt
@@ -695,6 +704,7 @@ class CogVideoXVideoToVideoPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin)
695
704
  )
696
705
  self._guidance_scale = guidance_scale
697
706
  self._attention_kwargs = attention_kwargs
707
+ self._current_timestep = None
698
708
  self._interrupt = False
699
709
 
700
710
  # 2. Default call parameters
@@ -781,6 +791,7 @@ class CogVideoXVideoToVideoPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin)
781
791
  if self.interrupt:
782
792
  continue
783
793
 
794
+ self._current_timestep = t
784
795
  latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents
785
796
  latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
786
797
 
@@ -836,6 +847,11 @@ class CogVideoXVideoToVideoPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin)
836
847
  if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
837
848
  progress_bar.update()
838
849
 
850
+ if XLA_AVAILABLE:
851
+ xm.mark_step()
852
+
853
+ self._current_timestep = None
854
+
839
855
  if not output_type == "latent":
840
856
  video = self.decode_latents(latents)
841
857
  video = self.video_processor.postprocess_video(video=video, output_type=output_type)
@@ -24,11 +24,18 @@ from ...image_processor import VaeImageProcessor
24
24
  from ...models import AutoencoderKL, CogView3PlusTransformer2DModel
25
25
  from ...pipelines.pipeline_utils import DiffusionPipeline
26
26
  from ...schedulers import CogVideoXDDIMScheduler, CogVideoXDPMScheduler
27
- from ...utils import logging, replace_example_docstring
27
+ from ...utils import is_torch_xla_available, logging, replace_example_docstring
28
28
  from ...utils.torch_utils import randn_tensor
29
29
  from .pipeline_output import CogView3PipelineOutput
30
30
 
31
31
 
32
+ if is_torch_xla_available():
33
+ import torch_xla.core.xla_model as xm
34
+
35
+ XLA_AVAILABLE = True
36
+ else:
37
+ XLA_AVAILABLE = False
38
+
32
39
  logger = logging.get_logger(__name__) # pylint: disable=invalid-name
33
40
 
34
41
 
@@ -153,9 +160,7 @@ class CogView3PlusPipeline(DiffusionPipeline):
153
160
  self.register_modules(
154
161
  tokenizer=tokenizer, text_encoder=text_encoder, vae=vae, transformer=transformer, scheduler=scheduler
155
162
  )
156
- self.vae_scale_factor = (
157
- 2 ** (len(self.vae.config.block_out_channels) - 1) if hasattr(self, "vae") and self.vae is not None else 8
158
- )
163
+ self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
159
164
 
160
165
  self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
161
166
 
@@ -656,6 +661,9 @@ class CogView3PlusPipeline(DiffusionPipeline):
656
661
  if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
657
662
  progress_bar.update()
658
663
 
664
+ if XLA_AVAILABLE:
665
+ xm.mark_step()
666
+
659
667
  if not output_type == "latent":
660
668
  image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False, generator=generator)[
661
669
  0
@@ -0,0 +1,49 @@
1
+ from typing import TYPE_CHECKING
2
+
3
+ from ...utils import (
4
+ DIFFUSERS_SLOW_IMPORT,
5
+ OptionalDependencyNotAvailable,
6
+ _LazyModule,
7
+ get_objects_from_module,
8
+ is_torch_available,
9
+ is_transformers_available,
10
+ )
11
+
12
+
13
+ _dummy_objects = {}
14
+ _additional_imports = {}
15
+ _import_structure = {"pipeline_output": ["CogView4PlusPipelineOutput"]}
16
+
17
+ try:
18
+ if not (is_transformers_available() and is_torch_available()):
19
+ raise OptionalDependencyNotAvailable()
20
+ except OptionalDependencyNotAvailable:
21
+ from ...utils import dummy_torch_and_transformers_objects # noqa F403
22
+
23
+ _dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
24
+ else:
25
+ _import_structure["pipeline_cogview4"] = ["CogView4Pipeline"]
26
+ _import_structure["pipeline_cogview4_control"] = ["CogView4ControlPipeline"]
27
+ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
28
+ try:
29
+ if not (is_transformers_available() and is_torch_available()):
30
+ raise OptionalDependencyNotAvailable()
31
+ except OptionalDependencyNotAvailable:
32
+ from ...utils.dummy_torch_and_transformers_objects import * # noqa F403
33
+ else:
34
+ from .pipeline_cogview4 import CogView4Pipeline
35
+ from .pipeline_cogview4_control import CogView4ControlPipeline
36
+ else:
37
+ import sys
38
+
39
+ sys.modules[__name__] = _LazyModule(
40
+ __name__,
41
+ globals()["__file__"],
42
+ _import_structure,
43
+ module_spec=__spec__,
44
+ )
45
+
46
+ for name, value in _dummy_objects.items():
47
+ setattr(sys.modules[__name__], name, value)
48
+ for name, value in _additional_imports.items():
49
+ setattr(sys.modules[__name__], name, value)