diffusers 0.32.2__py3-none-any.whl → 0.33.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (389) hide show
  1. diffusers/__init__.py +186 -3
  2. diffusers/configuration_utils.py +40 -12
  3. diffusers/dependency_versions_table.py +9 -2
  4. diffusers/hooks/__init__.py +9 -0
  5. diffusers/hooks/faster_cache.py +653 -0
  6. diffusers/hooks/group_offloading.py +793 -0
  7. diffusers/hooks/hooks.py +236 -0
  8. diffusers/hooks/layerwise_casting.py +245 -0
  9. diffusers/hooks/pyramid_attention_broadcast.py +311 -0
  10. diffusers/loaders/__init__.py +6 -0
  11. diffusers/loaders/ip_adapter.py +38 -30
  12. diffusers/loaders/lora_base.py +121 -86
  13. diffusers/loaders/lora_conversion_utils.py +504 -44
  14. diffusers/loaders/lora_pipeline.py +1769 -181
  15. diffusers/loaders/peft.py +167 -57
  16. diffusers/loaders/single_file.py +17 -2
  17. diffusers/loaders/single_file_model.py +53 -5
  18. diffusers/loaders/single_file_utils.py +646 -72
  19. diffusers/loaders/textual_inversion.py +9 -9
  20. diffusers/loaders/transformer_flux.py +8 -9
  21. diffusers/loaders/transformer_sd3.py +120 -39
  22. diffusers/loaders/unet.py +20 -7
  23. diffusers/models/__init__.py +22 -0
  24. diffusers/models/activations.py +9 -9
  25. diffusers/models/attention.py +0 -1
  26. diffusers/models/attention_processor.py +163 -25
  27. diffusers/models/auto_model.py +169 -0
  28. diffusers/models/autoencoders/__init__.py +2 -0
  29. diffusers/models/autoencoders/autoencoder_asym_kl.py +2 -0
  30. diffusers/models/autoencoders/autoencoder_dc.py +106 -4
  31. diffusers/models/autoencoders/autoencoder_kl.py +0 -4
  32. diffusers/models/autoencoders/autoencoder_kl_allegro.py +5 -23
  33. diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +17 -55
  34. diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py +17 -97
  35. diffusers/models/autoencoders/autoencoder_kl_ltx.py +326 -107
  36. diffusers/models/autoencoders/autoencoder_kl_magvit.py +1094 -0
  37. diffusers/models/autoencoders/autoencoder_kl_mochi.py +21 -56
  38. diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +11 -42
  39. diffusers/models/autoencoders/autoencoder_kl_wan.py +855 -0
  40. diffusers/models/autoencoders/autoencoder_oobleck.py +1 -0
  41. diffusers/models/autoencoders/autoencoder_tiny.py +0 -4
  42. diffusers/models/autoencoders/consistency_decoder_vae.py +3 -1
  43. diffusers/models/autoencoders/vae.py +31 -141
  44. diffusers/models/autoencoders/vq_model.py +3 -0
  45. diffusers/models/cache_utils.py +108 -0
  46. diffusers/models/controlnets/__init__.py +1 -0
  47. diffusers/models/controlnets/controlnet.py +3 -8
  48. diffusers/models/controlnets/controlnet_flux.py +14 -42
  49. diffusers/models/controlnets/controlnet_sd3.py +58 -34
  50. diffusers/models/controlnets/controlnet_sparsectrl.py +4 -7
  51. diffusers/models/controlnets/controlnet_union.py +27 -18
  52. diffusers/models/controlnets/controlnet_xs.py +7 -46
  53. diffusers/models/controlnets/multicontrolnet_union.py +196 -0
  54. diffusers/models/embeddings.py +18 -7
  55. diffusers/models/model_loading_utils.py +122 -80
  56. diffusers/models/modeling_flax_pytorch_utils.py +1 -1
  57. diffusers/models/modeling_flax_utils.py +1 -1
  58. diffusers/models/modeling_pytorch_flax_utils.py +1 -1
  59. diffusers/models/modeling_utils.py +617 -272
  60. diffusers/models/normalization.py +67 -14
  61. diffusers/models/resnet.py +1 -1
  62. diffusers/models/transformers/__init__.py +6 -0
  63. diffusers/models/transformers/auraflow_transformer_2d.py +9 -35
  64. diffusers/models/transformers/cogvideox_transformer_3d.py +13 -24
  65. diffusers/models/transformers/consisid_transformer_3d.py +789 -0
  66. diffusers/models/transformers/dit_transformer_2d.py +5 -19
  67. diffusers/models/transformers/hunyuan_transformer_2d.py +4 -3
  68. diffusers/models/transformers/latte_transformer_3d.py +20 -15
  69. diffusers/models/transformers/lumina_nextdit2d.py +3 -1
  70. diffusers/models/transformers/pixart_transformer_2d.py +4 -19
  71. diffusers/models/transformers/prior_transformer.py +5 -1
  72. diffusers/models/transformers/sana_transformer.py +144 -40
  73. diffusers/models/transformers/stable_audio_transformer.py +5 -20
  74. diffusers/models/transformers/transformer_2d.py +7 -22
  75. diffusers/models/transformers/transformer_allegro.py +9 -17
  76. diffusers/models/transformers/transformer_cogview3plus.py +6 -17
  77. diffusers/models/transformers/transformer_cogview4.py +462 -0
  78. diffusers/models/transformers/transformer_easyanimate.py +527 -0
  79. diffusers/models/transformers/transformer_flux.py +68 -110
  80. diffusers/models/transformers/transformer_hunyuan_video.py +404 -46
  81. diffusers/models/transformers/transformer_ltx.py +53 -35
  82. diffusers/models/transformers/transformer_lumina2.py +548 -0
  83. diffusers/models/transformers/transformer_mochi.py +6 -17
  84. diffusers/models/transformers/transformer_omnigen.py +469 -0
  85. diffusers/models/transformers/transformer_sd3.py +56 -86
  86. diffusers/models/transformers/transformer_temporal.py +5 -11
  87. diffusers/models/transformers/transformer_wan.py +469 -0
  88. diffusers/models/unets/unet_1d.py +3 -1
  89. diffusers/models/unets/unet_2d.py +21 -20
  90. diffusers/models/unets/unet_2d_blocks.py +19 -243
  91. diffusers/models/unets/unet_2d_condition.py +4 -6
  92. diffusers/models/unets/unet_3d_blocks.py +14 -127
  93. diffusers/models/unets/unet_3d_condition.py +8 -12
  94. diffusers/models/unets/unet_i2vgen_xl.py +5 -13
  95. diffusers/models/unets/unet_kandinsky3.py +0 -4
  96. diffusers/models/unets/unet_motion_model.py +20 -114
  97. diffusers/models/unets/unet_spatio_temporal_condition.py +7 -8
  98. diffusers/models/unets/unet_stable_cascade.py +8 -35
  99. diffusers/models/unets/uvit_2d.py +1 -4
  100. diffusers/optimization.py +2 -2
  101. diffusers/pipelines/__init__.py +57 -8
  102. diffusers/pipelines/allegro/pipeline_allegro.py +22 -2
  103. diffusers/pipelines/amused/pipeline_amused.py +15 -2
  104. diffusers/pipelines/amused/pipeline_amused_img2img.py +15 -2
  105. diffusers/pipelines/amused/pipeline_amused_inpaint.py +15 -2
  106. diffusers/pipelines/animatediff/pipeline_animatediff.py +15 -2
  107. diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +15 -3
  108. diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +24 -4
  109. diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +15 -2
  110. diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +16 -4
  111. diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +16 -4
  112. diffusers/pipelines/audioldm/pipeline_audioldm.py +13 -2
  113. diffusers/pipelines/audioldm2/modeling_audioldm2.py +13 -68
  114. diffusers/pipelines/audioldm2/pipeline_audioldm2.py +39 -9
  115. diffusers/pipelines/aura_flow/pipeline_aura_flow.py +63 -7
  116. diffusers/pipelines/auto_pipeline.py +35 -14
  117. diffusers/pipelines/blip_diffusion/blip_image_processing.py +1 -1
  118. diffusers/pipelines/blip_diffusion/modeling_blip2.py +5 -8
  119. diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +12 -0
  120. diffusers/pipelines/cogvideo/pipeline_cogvideox.py +22 -6
  121. diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +22 -6
  122. diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +22 -5
  123. diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +22 -6
  124. diffusers/pipelines/cogview3/pipeline_cogview3plus.py +12 -4
  125. diffusers/pipelines/cogview4/__init__.py +49 -0
  126. diffusers/pipelines/cogview4/pipeline_cogview4.py +684 -0
  127. diffusers/pipelines/cogview4/pipeline_cogview4_control.py +732 -0
  128. diffusers/pipelines/cogview4/pipeline_output.py +21 -0
  129. diffusers/pipelines/consisid/__init__.py +49 -0
  130. diffusers/pipelines/consisid/consisid_utils.py +357 -0
  131. diffusers/pipelines/consisid/pipeline_consisid.py +974 -0
  132. diffusers/pipelines/consisid/pipeline_output.py +20 -0
  133. diffusers/pipelines/consistency_models/pipeline_consistency_models.py +11 -0
  134. diffusers/pipelines/controlnet/pipeline_controlnet.py +6 -5
  135. diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +13 -0
  136. diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +17 -5
  137. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +31 -12
  138. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +26 -7
  139. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +20 -3
  140. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +22 -3
  141. diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +26 -25
  142. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +224 -109
  143. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +25 -29
  144. diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +7 -4
  145. diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +3 -5
  146. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +121 -10
  147. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +122 -11
  148. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +12 -1
  149. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +20 -3
  150. diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py +14 -2
  151. diffusers/pipelines/ddim/pipeline_ddim.py +14 -1
  152. diffusers/pipelines/ddpm/pipeline_ddpm.py +15 -1
  153. diffusers/pipelines/deepfloyd_if/pipeline_if.py +12 -0
  154. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +12 -0
  155. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +14 -1
  156. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +12 -0
  157. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +14 -1
  158. diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +14 -1
  159. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +11 -7
  160. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +11 -7
  161. diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +1 -1
  162. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +10 -6
  163. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_onnx_stable_diffusion_inpaint_legacy.py +2 -2
  164. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +11 -7
  165. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +1 -1
  166. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +1 -1
  167. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +1 -1
  168. diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +10 -105
  169. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +1 -1
  170. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +1 -1
  171. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +1 -1
  172. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +1 -1
  173. diffusers/pipelines/dit/pipeline_dit.py +15 -2
  174. diffusers/pipelines/easyanimate/__init__.py +52 -0
  175. diffusers/pipelines/easyanimate/pipeline_easyanimate.py +770 -0
  176. diffusers/pipelines/easyanimate/pipeline_easyanimate_control.py +994 -0
  177. diffusers/pipelines/easyanimate/pipeline_easyanimate_inpaint.py +1234 -0
  178. diffusers/pipelines/easyanimate/pipeline_output.py +20 -0
  179. diffusers/pipelines/flux/pipeline_flux.py +53 -21
  180. diffusers/pipelines/flux/pipeline_flux_control.py +9 -12
  181. diffusers/pipelines/flux/pipeline_flux_control_img2img.py +6 -10
  182. diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +8 -10
  183. diffusers/pipelines/flux/pipeline_flux_controlnet.py +185 -13
  184. diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +8 -10
  185. diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +16 -16
  186. diffusers/pipelines/flux/pipeline_flux_fill.py +107 -39
  187. diffusers/pipelines/flux/pipeline_flux_img2img.py +193 -15
  188. diffusers/pipelines/flux/pipeline_flux_inpaint.py +199 -19
  189. diffusers/pipelines/free_noise_utils.py +3 -3
  190. diffusers/pipelines/hunyuan_video/__init__.py +4 -0
  191. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_skyreels_image2video.py +804 -0
  192. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +90 -23
  193. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_image2video.py +924 -0
  194. diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +3 -5
  195. diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +13 -1
  196. diffusers/pipelines/kandinsky/pipeline_kandinsky.py +12 -0
  197. diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +1 -1
  198. diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +12 -0
  199. diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +13 -1
  200. diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +12 -0
  201. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +12 -1
  202. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +13 -0
  203. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +12 -0
  204. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +12 -1
  205. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +12 -1
  206. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +12 -0
  207. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +12 -0
  208. diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +12 -0
  209. diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +12 -0
  210. diffusers/pipelines/kolors/pipeline_kolors.py +10 -8
  211. diffusers/pipelines/kolors/pipeline_kolors_img2img.py +6 -4
  212. diffusers/pipelines/kolors/text_encoder.py +7 -34
  213. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +12 -1
  214. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +13 -1
  215. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +14 -13
  216. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion_superresolution.py +12 -1
  217. diffusers/pipelines/latte/pipeline_latte.py +36 -7
  218. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +67 -13
  219. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +60 -15
  220. diffusers/pipelines/ltx/__init__.py +2 -0
  221. diffusers/pipelines/ltx/pipeline_ltx.py +25 -13
  222. diffusers/pipelines/ltx/pipeline_ltx_condition.py +1194 -0
  223. diffusers/pipelines/ltx/pipeline_ltx_image2video.py +31 -17
  224. diffusers/pipelines/lumina/__init__.py +2 -2
  225. diffusers/pipelines/lumina/pipeline_lumina.py +83 -20
  226. diffusers/pipelines/lumina2/__init__.py +48 -0
  227. diffusers/pipelines/lumina2/pipeline_lumina2.py +790 -0
  228. diffusers/pipelines/marigold/__init__.py +2 -0
  229. diffusers/pipelines/marigold/marigold_image_processing.py +127 -14
  230. diffusers/pipelines/marigold/pipeline_marigold_depth.py +31 -16
  231. diffusers/pipelines/marigold/pipeline_marigold_intrinsics.py +721 -0
  232. diffusers/pipelines/marigold/pipeline_marigold_normals.py +31 -16
  233. diffusers/pipelines/mochi/pipeline_mochi.py +14 -18
  234. diffusers/pipelines/musicldm/pipeline_musicldm.py +16 -1
  235. diffusers/pipelines/omnigen/__init__.py +50 -0
  236. diffusers/pipelines/omnigen/pipeline_omnigen.py +512 -0
  237. diffusers/pipelines/omnigen/processor_omnigen.py +327 -0
  238. diffusers/pipelines/onnx_utils.py +5 -3
  239. diffusers/pipelines/pag/pag_utils.py +1 -1
  240. diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +12 -1
  241. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +15 -4
  242. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +20 -3
  243. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +20 -3
  244. diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +1 -3
  245. diffusers/pipelines/pag/pipeline_pag_kolors.py +6 -4
  246. diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +16 -3
  247. diffusers/pipelines/pag/pipeline_pag_sana.py +65 -8
  248. diffusers/pipelines/pag/pipeline_pag_sd.py +23 -7
  249. diffusers/pipelines/pag/pipeline_pag_sd_3.py +3 -5
  250. diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py +3 -5
  251. diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +13 -1
  252. diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +23 -7
  253. diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py +26 -10
  254. diffusers/pipelines/pag/pipeline_pag_sd_xl.py +12 -4
  255. diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +7 -3
  256. diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +10 -6
  257. diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +13 -3
  258. diffusers/pipelines/pia/pipeline_pia.py +13 -1
  259. diffusers/pipelines/pipeline_flax_utils.py +7 -7
  260. diffusers/pipelines/pipeline_loading_utils.py +193 -83
  261. diffusers/pipelines/pipeline_utils.py +221 -106
  262. diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +17 -5
  263. diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +17 -4
  264. diffusers/pipelines/sana/__init__.py +2 -0
  265. diffusers/pipelines/sana/pipeline_sana.py +183 -58
  266. diffusers/pipelines/sana/pipeline_sana_sprint.py +889 -0
  267. diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +12 -2
  268. diffusers/pipelines/shap_e/pipeline_shap_e.py +12 -0
  269. diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +12 -0
  270. diffusers/pipelines/shap_e/renderer.py +6 -6
  271. diffusers/pipelines/stable_audio/pipeline_stable_audio.py +1 -1
  272. diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +15 -4
  273. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +12 -8
  274. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +12 -1
  275. diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +3 -2
  276. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +14 -10
  277. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_img2img.py +3 -3
  278. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_inpaint.py +14 -10
  279. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +2 -2
  280. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py +4 -3
  281. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +5 -4
  282. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +2 -2
  283. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +18 -13
  284. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +30 -8
  285. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +24 -10
  286. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +28 -12
  287. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +39 -18
  288. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +17 -6
  289. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +13 -3
  290. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +20 -3
  291. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +14 -2
  292. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +13 -1
  293. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +16 -17
  294. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +136 -18
  295. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +150 -21
  296. diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +15 -3
  297. diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +26 -11
  298. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +15 -3
  299. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +22 -4
  300. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +30 -13
  301. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +12 -4
  302. diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +15 -3
  303. diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +15 -3
  304. diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +26 -12
  305. diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +16 -4
  306. diffusers/pipelines/stable_diffusion_xl/pipeline_flax_stable_diffusion_xl.py +1 -1
  307. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +12 -4
  308. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +7 -3
  309. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +10 -6
  310. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +11 -4
  311. diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +13 -2
  312. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +18 -4
  313. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +26 -5
  314. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +13 -1
  315. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +13 -1
  316. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +28 -6
  317. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +26 -4
  318. diffusers/pipelines/transformers_loading_utils.py +121 -0
  319. diffusers/pipelines/unclip/pipeline_unclip.py +11 -1
  320. diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +11 -1
  321. diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +19 -2
  322. diffusers/pipelines/wan/__init__.py +51 -0
  323. diffusers/pipelines/wan/pipeline_output.py +20 -0
  324. diffusers/pipelines/wan/pipeline_wan.py +593 -0
  325. diffusers/pipelines/wan/pipeline_wan_i2v.py +722 -0
  326. diffusers/pipelines/wan/pipeline_wan_video2video.py +725 -0
  327. diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +7 -31
  328. diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +12 -1
  329. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +12 -1
  330. diffusers/quantizers/auto.py +5 -1
  331. diffusers/quantizers/base.py +5 -9
  332. diffusers/quantizers/bitsandbytes/bnb_quantizer.py +41 -29
  333. diffusers/quantizers/bitsandbytes/utils.py +30 -20
  334. diffusers/quantizers/gguf/gguf_quantizer.py +1 -0
  335. diffusers/quantizers/gguf/utils.py +4 -2
  336. diffusers/quantizers/quantization_config.py +59 -4
  337. diffusers/quantizers/quanto/__init__.py +1 -0
  338. diffusers/quantizers/quanto/quanto_quantizer.py +177 -0
  339. diffusers/quantizers/quanto/utils.py +60 -0
  340. diffusers/quantizers/torchao/__init__.py +1 -1
  341. diffusers/quantizers/torchao/torchao_quantizer.py +47 -2
  342. diffusers/schedulers/__init__.py +2 -1
  343. diffusers/schedulers/scheduling_consistency_models.py +1 -2
  344. diffusers/schedulers/scheduling_ddim_inverse.py +1 -1
  345. diffusers/schedulers/scheduling_ddpm.py +2 -3
  346. diffusers/schedulers/scheduling_ddpm_parallel.py +1 -2
  347. diffusers/schedulers/scheduling_dpmsolver_multistep.py +12 -4
  348. diffusers/schedulers/scheduling_edm_euler.py +45 -10
  349. diffusers/schedulers/scheduling_flow_match_euler_discrete.py +116 -28
  350. diffusers/schedulers/scheduling_flow_match_heun_discrete.py +7 -6
  351. diffusers/schedulers/scheduling_heun_discrete.py +1 -1
  352. diffusers/schedulers/scheduling_lcm.py +1 -2
  353. diffusers/schedulers/scheduling_lms_discrete.py +1 -1
  354. diffusers/schedulers/scheduling_repaint.py +5 -1
  355. diffusers/schedulers/scheduling_scm.py +265 -0
  356. diffusers/schedulers/scheduling_tcd.py +1 -2
  357. diffusers/schedulers/scheduling_utils.py +2 -1
  358. diffusers/training_utils.py +14 -7
  359. diffusers/utils/__init__.py +9 -1
  360. diffusers/utils/constants.py +13 -1
  361. diffusers/utils/deprecation_utils.py +1 -1
  362. diffusers/utils/dummy_bitsandbytes_objects.py +17 -0
  363. diffusers/utils/dummy_gguf_objects.py +17 -0
  364. diffusers/utils/dummy_optimum_quanto_objects.py +17 -0
  365. diffusers/utils/dummy_pt_objects.py +233 -0
  366. diffusers/utils/dummy_torch_and_transformers_and_opencv_objects.py +17 -0
  367. diffusers/utils/dummy_torch_and_transformers_objects.py +270 -0
  368. diffusers/utils/dummy_torchao_objects.py +17 -0
  369. diffusers/utils/dynamic_modules_utils.py +1 -1
  370. diffusers/utils/export_utils.py +28 -3
  371. diffusers/utils/hub_utils.py +52 -102
  372. diffusers/utils/import_utils.py +121 -221
  373. diffusers/utils/loading_utils.py +2 -1
  374. diffusers/utils/logging.py +1 -2
  375. diffusers/utils/peft_utils.py +6 -14
  376. diffusers/utils/remote_utils.py +425 -0
  377. diffusers/utils/source_code_parsing_utils.py +52 -0
  378. diffusers/utils/state_dict_utils.py +15 -1
  379. diffusers/utils/testing_utils.py +243 -13
  380. diffusers/utils/torch_utils.py +10 -0
  381. diffusers/utils/typing_utils.py +91 -0
  382. diffusers/video_processor.py +1 -1
  383. {diffusers-0.32.2.dist-info → diffusers-0.33.0.dist-info}/METADATA +76 -44
  384. diffusers-0.33.0.dist-info/RECORD +608 -0
  385. {diffusers-0.32.2.dist-info → diffusers-0.33.0.dist-info}/WHEEL +1 -1
  386. diffusers-0.32.2.dist-info/RECORD +0 -550
  387. {diffusers-0.32.2.dist-info → diffusers-0.33.0.dist-info}/LICENSE +0 -0
  388. {diffusers-0.32.2.dist-info → diffusers-0.33.0.dist-info}/entry_points.txt +0 -0
  389. {diffusers-0.32.2.dist-info → diffusers-0.33.0.dist-info}/top_level.txt +0 -0
@@ -10,6 +10,7 @@ from ..utils import (
10
10
  is_librosa_available,
11
11
  is_note_seq_available,
12
12
  is_onnx_available,
13
+ is_opencv_available,
13
14
  is_sentencepiece_available,
14
15
  is_torch_available,
15
16
  is_torch_npu_available,
@@ -154,6 +155,7 @@ else:
154
155
  "CogVideoXFunControlPipeline",
155
156
  ]
156
157
  _import_structure["cogview3"] = ["CogView3PlusPipeline"]
158
+ _import_structure["cogview4"] = ["CogView4Pipeline", "CogView4ControlPipeline"]
157
159
  _import_structure["controlnet"].extend(
158
160
  [
159
161
  "BlipDiffusionControlNetPipeline",
@@ -214,8 +216,17 @@ else:
214
216
  "IFPipeline",
215
217
  "IFSuperResolutionPipeline",
216
218
  ]
219
+ _import_structure["easyanimate"] = [
220
+ "EasyAnimatePipeline",
221
+ "EasyAnimateInpaintPipeline",
222
+ "EasyAnimateControlPipeline",
223
+ ]
217
224
  _import_structure["hunyuandit"] = ["HunyuanDiTPipeline"]
218
- _import_structure["hunyuan_video"] = ["HunyuanVideoPipeline"]
225
+ _import_structure["hunyuan_video"] = [
226
+ "HunyuanVideoPipeline",
227
+ "HunyuanSkyreelsImageToVideoPipeline",
228
+ "HunyuanVideoImageToVideoPipeline",
229
+ ]
219
230
  _import_structure["kandinsky"] = [
220
231
  "KandinskyCombinedPipeline",
221
232
  "KandinskyImg2ImgCombinedPipeline",
@@ -253,20 +264,23 @@ else:
253
264
  ]
254
265
  )
255
266
  _import_structure["latte"] = ["LattePipeline"]
256
- _import_structure["ltx"] = ["LTXPipeline", "LTXImageToVideoPipeline"]
257
- _import_structure["lumina"] = ["LuminaText2ImgPipeline"]
267
+ _import_structure["ltx"] = ["LTXPipeline", "LTXImageToVideoPipeline", "LTXConditionPipeline"]
268
+ _import_structure["lumina"] = ["LuminaPipeline", "LuminaText2ImgPipeline"]
269
+ _import_structure["lumina2"] = ["Lumina2Pipeline", "Lumina2Text2ImgPipeline"]
258
270
  _import_structure["marigold"].extend(
259
271
  [
260
272
  "MarigoldDepthPipeline",
273
+ "MarigoldIntrinsicsPipeline",
261
274
  "MarigoldNormalsPipeline",
262
275
  ]
263
276
  )
264
277
  _import_structure["mochi"] = ["MochiPipeline"]
265
278
  _import_structure["musicldm"] = ["MusicLDMPipeline"]
279
+ _import_structure["omnigen"] = ["OmniGenPipeline"]
266
280
  _import_structure["paint_by_example"] = ["PaintByExamplePipeline"]
267
281
  _import_structure["pia"] = ["PIAPipeline"]
268
282
  _import_structure["pixart_alpha"] = ["PixArtAlphaPipeline", "PixArtSigmaPipeline"]
269
- _import_structure["sana"] = ["SanaPipeline"]
283
+ _import_structure["sana"] = ["SanaPipeline", "SanaSprintPipeline"]
270
284
  _import_structure["semantic_stable_diffusion"] = ["SemanticStableDiffusionPipeline"]
271
285
  _import_structure["shap_e"] = ["ShapEImg2ImgPipeline", "ShapEPipeline"]
272
286
  _import_structure["stable_audio"] = [
@@ -342,6 +356,7 @@ else:
342
356
  "WuerstchenDecoderPipeline",
343
357
  "WuerstchenPriorPipeline",
344
358
  ]
359
+ _import_structure["wan"] = ["WanPipeline", "WanImageToVideoPipeline", "WanVideoToVideoPipeline"]
345
360
  try:
346
361
  if not is_onnx_available():
347
362
  raise OptionalDependencyNotAvailable()
@@ -399,6 +414,18 @@ else:
399
414
  "KolorsImg2ImgPipeline",
400
415
  ]
401
416
 
417
+ try:
418
+ if not (is_torch_available() and is_transformers_available() and is_opencv_available()):
419
+ raise OptionalDependencyNotAvailable()
420
+ except OptionalDependencyNotAvailable:
421
+ from ..utils import (
422
+ dummy_torch_and_transformers_and_opencv_objects,
423
+ )
424
+
425
+ _dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_and_opencv_objects))
426
+ else:
427
+ _import_structure["consisid"] = ["ConsisIDPipeline"]
428
+
402
429
  try:
403
430
  if not is_flax_available():
404
431
  raise OptionalDependencyNotAvailable()
@@ -496,6 +523,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
496
523
  CogVideoXVideoToVideoPipeline,
497
524
  )
498
525
  from .cogview3 import CogView3PlusPipeline
526
+ from .cogview4 import CogView4ControlPipeline, CogView4Pipeline
499
527
  from .controlnet import (
500
528
  BlipDiffusionControlNetPipeline,
501
529
  StableDiffusionControlNetImg2ImgPipeline,
@@ -538,6 +566,11 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
538
566
  VersatileDiffusionTextToImagePipeline,
539
567
  VQDiffusionPipeline,
540
568
  )
569
+ from .easyanimate import (
570
+ EasyAnimateControlPipeline,
571
+ EasyAnimateInpaintPipeline,
572
+ EasyAnimatePipeline,
573
+ )
541
574
  from .flux import (
542
575
  FluxControlImg2ImgPipeline,
543
576
  FluxControlInpaintPipeline,
@@ -552,7 +585,11 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
552
585
  FluxPriorReduxPipeline,
553
586
  ReduxImageEncoder,
554
587
  )
555
- from .hunyuan_video import HunyuanVideoPipeline
588
+ from .hunyuan_video import (
589
+ HunyuanSkyreelsImageToVideoPipeline,
590
+ HunyuanVideoImageToVideoPipeline,
591
+ HunyuanVideoPipeline,
592
+ )
556
593
  from .hunyuandit import HunyuanDiTPipeline
557
594
  from .i2vgen_xl import I2VGenXLPipeline
558
595
  from .kandinsky import (
@@ -592,14 +629,17 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
592
629
  LEditsPPPipelineStableDiffusion,
593
630
  LEditsPPPipelineStableDiffusionXL,
594
631
  )
595
- from .ltx import LTXImageToVideoPipeline, LTXPipeline
596
- from .lumina import LuminaText2ImgPipeline
632
+ from .ltx import LTXConditionPipeline, LTXImageToVideoPipeline, LTXPipeline
633
+ from .lumina import LuminaPipeline, LuminaText2ImgPipeline
634
+ from .lumina2 import Lumina2Pipeline, Lumina2Text2ImgPipeline
597
635
  from .marigold import (
598
636
  MarigoldDepthPipeline,
637
+ MarigoldIntrinsicsPipeline,
599
638
  MarigoldNormalsPipeline,
600
639
  )
601
640
  from .mochi import MochiPipeline
602
641
  from .musicldm import MusicLDMPipeline
642
+ from .omnigen import OmniGenPipeline
603
643
  from .pag import (
604
644
  AnimateDiffPAGPipeline,
605
645
  HunyuanDiTPAGPipeline,
@@ -622,7 +662,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
622
662
  from .paint_by_example import PaintByExamplePipeline
623
663
  from .pia import PIAPipeline
624
664
  from .pixart_alpha import PixArtAlphaPipeline, PixArtSigmaPipeline
625
- from .sana import SanaPipeline
665
+ from .sana import SanaPipeline, SanaSprintPipeline
626
666
  from .semantic_stable_diffusion import SemanticStableDiffusionPipeline
627
667
  from .shap_e import ShapEImg2ImgPipeline, ShapEPipeline
628
668
  from .stable_audio import StableAudioPipeline, StableAudioProjectionModel
@@ -680,6 +720,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
680
720
  UniDiffuserPipeline,
681
721
  UniDiffuserTextDecoder,
682
722
  )
723
+ from .wan import WanImageToVideoPipeline, WanPipeline, WanVideoToVideoPipeline
683
724
  from .wuerstchen import (
684
725
  WuerstchenCombinedPipeline,
685
726
  WuerstchenDecoderPipeline,
@@ -731,6 +772,14 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
731
772
  KolorsPipeline,
732
773
  )
733
774
 
775
+ try:
776
+ if not (is_torch_available() and is_transformers_available() and is_opencv_available()):
777
+ raise OptionalDependencyNotAvailable()
778
+ except OptionalDependencyNotAvailable:
779
+ from ..utils.dummy_torch_and_transformers_and_opencv_objects import *
780
+ else:
781
+ from .consisid import ConsisIDPipeline
782
+
734
783
  try:
735
784
  if not is_flax_available():
736
785
  raise OptionalDependencyNotAvailable()
@@ -33,6 +33,7 @@ from ...utils import (
33
33
  deprecate,
34
34
  is_bs4_available,
35
35
  is_ftfy_available,
36
+ is_torch_xla_available,
36
37
  logging,
37
38
  replace_example_docstring,
38
39
  )
@@ -41,6 +42,14 @@ from ...video_processor import VideoProcessor
41
42
  from .pipeline_output import AllegroPipelineOutput
42
43
 
43
44
 
45
+ if is_torch_xla_available():
46
+ import torch_xla.core.xla_model as xm
47
+
48
+ XLA_AVAILABLE = True
49
+ else:
50
+ XLA_AVAILABLE = False
51
+
52
+
44
53
  logger = logging.get_logger(__name__)
45
54
 
46
55
  if is_bs4_available():
@@ -194,10 +203,10 @@ class AllegroPipeline(DiffusionPipeline):
194
203
  tokenizer=tokenizer, text_encoder=text_encoder, vae=vae, transformer=transformer, scheduler=scheduler
195
204
  )
196
205
  self.vae_scale_factor_spatial = (
197
- 2 ** (len(self.vae.config.block_out_channels) - 1) if hasattr(self, "vae") and self.vae is not None else 8
206
+ 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
198
207
  )
199
208
  self.vae_scale_factor_temporal = (
200
- self.vae.config.temporal_compression_ratio if hasattr(self, "vae") and self.vae is not None else 4
209
+ self.vae.config.temporal_compression_ratio if getattr(self, "vae", None) else 4
201
210
  )
202
211
 
203
212
  self.video_processor = VideoProcessor(vae_scale_factor=self.vae_scale_factor_spatial)
@@ -674,6 +683,10 @@ class AllegroPipeline(DiffusionPipeline):
674
683
  def num_timesteps(self):
675
684
  return self._num_timesteps
676
685
 
686
+ @property
687
+ def current_timestep(self):
688
+ return self._current_timestep
689
+
677
690
  @property
678
691
  def interrupt(self):
679
692
  return self._interrupt
@@ -806,6 +819,7 @@ class AllegroPipeline(DiffusionPipeline):
806
819
  negative_prompt_attention_mask,
807
820
  )
808
821
  self._guidance_scale = guidance_scale
822
+ self._current_timestep = None
809
823
  self._interrupt = False
810
824
 
811
825
  # 2. Default height and width to transformer
@@ -883,6 +897,7 @@ class AllegroPipeline(DiffusionPipeline):
883
897
  if self.interrupt:
884
898
  continue
885
899
 
900
+ self._current_timestep = t
886
901
  latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents
887
902
  latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
888
903
 
@@ -921,6 +936,11 @@ class AllegroPipeline(DiffusionPipeline):
921
936
  if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
922
937
  progress_bar.update()
923
938
 
939
+ if XLA_AVAILABLE:
940
+ xm.mark_step()
941
+
942
+ self._current_timestep = None
943
+
924
944
  if not output_type == "latent":
925
945
  latents = latents.to(self.vae.dtype)
926
946
  video = self.decode_latents(latents)
@@ -20,10 +20,18 @@ from transformers import CLIPTextModelWithProjection, CLIPTokenizer
20
20
  from ...image_processor import VaeImageProcessor
21
21
  from ...models import UVit2DModel, VQModel
22
22
  from ...schedulers import AmusedScheduler
23
- from ...utils import replace_example_docstring
23
+ from ...utils import is_torch_xla_available, replace_example_docstring
24
24
  from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
25
25
 
26
26
 
27
+ if is_torch_xla_available():
28
+ import torch_xla.core.xla_model as xm
29
+
30
+ XLA_AVAILABLE = True
31
+ else:
32
+ XLA_AVAILABLE = False
33
+
34
+
27
35
  EXAMPLE_DOC_STRING = """
28
36
  Examples:
29
37
  ```py
@@ -66,7 +74,9 @@ class AmusedPipeline(DiffusionPipeline):
66
74
  transformer=transformer,
67
75
  scheduler=scheduler,
68
76
  )
69
- self.vae_scale_factor = 2 ** (len(self.vqvae.config.block_out_channels) - 1)
77
+ self.vae_scale_factor = (
78
+ 2 ** (len(self.vqvae.config.block_out_channels) - 1) if getattr(self, "vqvae", None) else 8
79
+ )
70
80
  self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor, do_normalize=False)
71
81
 
72
82
  @torch.no_grad()
@@ -297,6 +307,9 @@ class AmusedPipeline(DiffusionPipeline):
297
307
  step_idx = i // getattr(self.scheduler, "order", 1)
298
308
  callback(step_idx, timestep, latents)
299
309
 
310
+ if XLA_AVAILABLE:
311
+ xm.mark_step()
312
+
300
313
  if output_type == "latent":
301
314
  output = latents
302
315
  else:
@@ -20,10 +20,18 @@ from transformers import CLIPTextModelWithProjection, CLIPTokenizer
20
20
  from ...image_processor import PipelineImageInput, VaeImageProcessor
21
21
  from ...models import UVit2DModel, VQModel
22
22
  from ...schedulers import AmusedScheduler
23
- from ...utils import replace_example_docstring
23
+ from ...utils import is_torch_xla_available, replace_example_docstring
24
24
  from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
25
25
 
26
26
 
27
+ if is_torch_xla_available():
28
+ import torch_xla.core.xla_model as xm
29
+
30
+ XLA_AVAILABLE = True
31
+ else:
32
+ XLA_AVAILABLE = False
33
+
34
+
27
35
  EXAMPLE_DOC_STRING = """
28
36
  Examples:
29
37
  ```py
@@ -81,7 +89,9 @@ class AmusedImg2ImgPipeline(DiffusionPipeline):
81
89
  transformer=transformer,
82
90
  scheduler=scheduler,
83
91
  )
84
- self.vae_scale_factor = 2 ** (len(self.vqvae.config.block_out_channels) - 1)
92
+ self.vae_scale_factor = (
93
+ 2 ** (len(self.vqvae.config.block_out_channels) - 1) if getattr(self, "vqvae", None) else 8
94
+ )
85
95
  self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor, do_normalize=False)
86
96
 
87
97
  @torch.no_grad()
@@ -323,6 +333,9 @@ class AmusedImg2ImgPipeline(DiffusionPipeline):
323
333
  step_idx = i // getattr(self.scheduler, "order", 1)
324
334
  callback(step_idx, timestep, latents)
325
335
 
336
+ if XLA_AVAILABLE:
337
+ xm.mark_step()
338
+
326
339
  if output_type == "latent":
327
340
  output = latents
328
341
  else:
@@ -21,10 +21,18 @@ from transformers import CLIPTextModelWithProjection, CLIPTokenizer
21
21
  from ...image_processor import PipelineImageInput, VaeImageProcessor
22
22
  from ...models import UVit2DModel, VQModel
23
23
  from ...schedulers import AmusedScheduler
24
- from ...utils import replace_example_docstring
24
+ from ...utils import is_torch_xla_available, replace_example_docstring
25
25
  from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
26
26
 
27
27
 
28
+ if is_torch_xla_available():
29
+ import torch_xla.core.xla_model as xm
30
+
31
+ XLA_AVAILABLE = True
32
+ else:
33
+ XLA_AVAILABLE = False
34
+
35
+
28
36
  EXAMPLE_DOC_STRING = """
29
37
  Examples:
30
38
  ```py
@@ -89,7 +97,9 @@ class AmusedInpaintPipeline(DiffusionPipeline):
89
97
  transformer=transformer,
90
98
  scheduler=scheduler,
91
99
  )
92
- self.vae_scale_factor = 2 ** (len(self.vqvae.config.block_out_channels) - 1)
100
+ self.vae_scale_factor = (
101
+ 2 ** (len(self.vqvae.config.block_out_channels) - 1) if getattr(self, "vqvae", None) else 8
102
+ )
93
103
  self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor, do_normalize=False)
94
104
  self.mask_processor = VaeImageProcessor(
95
105
  vae_scale_factor=self.vae_scale_factor,
@@ -354,6 +364,9 @@ class AmusedInpaintPipeline(DiffusionPipeline):
354
364
  step_idx = i // getattr(self.scheduler, "order", 1)
355
365
  callback(step_idx, timestep, latents)
356
366
 
367
+ if XLA_AVAILABLE:
368
+ xm.mark_step()
369
+
357
370
  if output_type == "latent":
358
371
  output = latents
359
372
  else:
@@ -19,7 +19,7 @@ import torch
19
19
  from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer, CLIPVisionModelWithProjection
20
20
 
21
21
  from ...image_processor import PipelineImageInput
22
- from ...loaders import IPAdapterMixin, StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
22
+ from ...loaders import FromSingleFileMixin, IPAdapterMixin, StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
23
23
  from ...models import AutoencoderKL, ImageProjection, UNet2DConditionModel, UNetMotionModel
24
24
  from ...models.lora import adjust_lora_scale_text_encoder
25
25
  from ...models.unets.unet_motion_model import MotionAdapter
@@ -34,6 +34,7 @@ from ...schedulers import (
34
34
  from ...utils import (
35
35
  USE_PEFT_BACKEND,
36
36
  deprecate,
37
+ is_torch_xla_available,
37
38
  logging,
38
39
  replace_example_docstring,
39
40
  scale_lora_layers,
@@ -47,8 +48,16 @@ from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
47
48
  from .pipeline_output import AnimateDiffPipelineOutput
48
49
 
49
50
 
51
+ if is_torch_xla_available():
52
+ import torch_xla.core.xla_model as xm
53
+
54
+ XLA_AVAILABLE = True
55
+ else:
56
+ XLA_AVAILABLE = False
57
+
50
58
  logger = logging.get_logger(__name__) # pylint: disable=invalid-name
51
59
 
60
+
52
61
  EXAMPLE_DOC_STRING = """
53
62
  Examples:
54
63
  ```py
@@ -74,6 +83,7 @@ class AnimateDiffPipeline(
74
83
  StableDiffusionLoraLoaderMixin,
75
84
  FreeInitMixin,
76
85
  AnimateDiffFreeNoiseMixin,
86
+ FromSingleFileMixin,
77
87
  ):
78
88
  r"""
79
89
  Pipeline for text-to-video generation.
@@ -139,7 +149,7 @@ class AnimateDiffPipeline(
139
149
  feature_extractor=feature_extractor,
140
150
  image_encoder=image_encoder,
141
151
  )
142
- self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
152
+ self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
143
153
  self.video_processor = VideoProcessor(do_resize=False, vae_scale_factor=self.vae_scale_factor)
144
154
 
145
155
  # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.encode_prompt with num_images_per_prompt -> num_videos_per_prompt
@@ -844,6 +854,9 @@ class AnimateDiffPipeline(
844
854
  if callback is not None and i % callback_steps == 0:
845
855
  callback(i, t, latents)
846
856
 
857
+ if XLA_AVAILABLE:
858
+ xm.mark_step()
859
+
847
860
  # 9. Post processing
848
861
  if output_type == "latent":
849
862
  video = latents
@@ -20,7 +20,7 @@ import torch.nn.functional as F
20
20
  from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer, CLIPVisionModelWithProjection
21
21
 
22
22
  from ...image_processor import PipelineImageInput
23
- from ...loaders import IPAdapterMixin, StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
23
+ from ...loaders import FromSingleFileMixin, IPAdapterMixin, StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
24
24
  from ...models import (
25
25
  AutoencoderKL,
26
26
  ControlNetModel,
@@ -32,7 +32,7 @@ from ...models import (
32
32
  from ...models.lora import adjust_lora_scale_text_encoder
33
33
  from ...models.unets.unet_motion_model import MotionAdapter
34
34
  from ...schedulers import KarrasDiffusionSchedulers
35
- from ...utils import USE_PEFT_BACKEND, logging, scale_lora_layers, unscale_lora_layers
35
+ from ...utils import USE_PEFT_BACKEND, is_torch_xla_available, logging, scale_lora_layers, unscale_lora_layers
36
36
  from ...utils.torch_utils import is_compiled_module, randn_tensor
37
37
  from ...video_processor import VideoProcessor
38
38
  from ..free_init_utils import FreeInitMixin
@@ -41,8 +41,16 @@ from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
41
41
  from .pipeline_output import AnimateDiffPipelineOutput
42
42
 
43
43
 
44
+ if is_torch_xla_available():
45
+ import torch_xla.core.xla_model as xm
46
+
47
+ XLA_AVAILABLE = True
48
+ else:
49
+ XLA_AVAILABLE = False
50
+
44
51
  logger = logging.get_logger(__name__) # pylint: disable=invalid-name
45
52
 
53
+
46
54
  EXAMPLE_DOC_STRING = """
47
55
  Examples:
48
56
  ```py
@@ -117,6 +125,7 @@ class AnimateDiffControlNetPipeline(
117
125
  StableDiffusionLoraLoaderMixin,
118
126
  FreeInitMixin,
119
127
  AnimateDiffFreeNoiseMixin,
128
+ FromSingleFileMixin,
120
129
  ):
121
130
  r"""
122
131
  Pipeline for text-to-video generation with ControlNet guidance.
@@ -180,7 +189,7 @@ class AnimateDiffControlNetPipeline(
180
189
  feature_extractor=feature_extractor,
181
190
  image_encoder=image_encoder,
182
191
  )
183
- self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
192
+ self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
184
193
  self.video_processor = VideoProcessor(vae_scale_factor=self.vae_scale_factor)
185
194
  self.control_video_processor = VideoProcessor(
186
195
  vae_scale_factor=self.vae_scale_factor, do_convert_rgb=True, do_normalize=False
@@ -1090,6 +1099,9 @@ class AnimateDiffControlNetPipeline(
1090
1099
  if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
1091
1100
  progress_bar.update()
1092
1101
 
1102
+ if XLA_AVAILABLE:
1103
+ xm.mark_step()
1104
+
1093
1105
  # 9. Post processing
1094
1106
  if output_type == "latent":
1095
1107
  video = latents
@@ -48,6 +48,7 @@ from ...schedulers import (
48
48
  )
49
49
  from ...utils import (
50
50
  USE_PEFT_BACKEND,
51
+ is_torch_xla_available,
51
52
  logging,
52
53
  replace_example_docstring,
53
54
  scale_lora_layers,
@@ -60,8 +61,16 @@ from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
60
61
  from .pipeline_output import AnimateDiffPipelineOutput
61
62
 
62
63
 
64
+ if is_torch_xla_available():
65
+ import torch_xla.core.xla_model as xm
66
+
67
+ XLA_AVAILABLE = True
68
+ else:
69
+ XLA_AVAILABLE = False
70
+
63
71
  logger = logging.get_logger(__name__) # pylint: disable=invalid-name
64
72
 
73
+
65
74
  EXAMPLE_DOC_STRING = """
66
75
  Examples:
67
76
  ```py
@@ -307,10 +316,14 @@ class AnimateDiffSDXLPipeline(
307
316
  feature_extractor=feature_extractor,
308
317
  )
309
318
  self.register_to_config(force_zeros_for_empty_prompt=force_zeros_for_empty_prompt)
310
- self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
319
+ self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
311
320
  self.video_processor = VideoProcessor(vae_scale_factor=self.vae_scale_factor)
312
321
 
313
- self.default_sample_size = self.unet.config.sample_size
322
+ self.default_sample_size = (
323
+ self.unet.config.sample_size
324
+ if hasattr(self, "unet") and self.unet is not None and hasattr(self.unet.config, "sample_size")
325
+ else 128
326
+ )
314
327
 
315
328
  # Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.encode_prompt with num_images_per_prompt->num_videos_per_prompt
316
329
  def encode_prompt(
@@ -438,7 +451,9 @@ class AnimateDiffSDXLPipeline(
438
451
  prompt_embeds = text_encoder(text_input_ids.to(device), output_hidden_states=True)
439
452
 
440
453
  # We are only ALWAYS interested in the pooled output of the final text encoder
441
- pooled_prompt_embeds = prompt_embeds[0]
454
+ if pooled_prompt_embeds is None and prompt_embeds[0].ndim == 2:
455
+ pooled_prompt_embeds = prompt_embeds[0]
456
+
442
457
  if clip_skip is None:
443
458
  prompt_embeds = prompt_embeds.hidden_states[-2]
444
459
  else:
@@ -497,8 +512,10 @@ class AnimateDiffSDXLPipeline(
497
512
  uncond_input.input_ids.to(device),
498
513
  output_hidden_states=True,
499
514
  )
515
+
500
516
  # We are only ALWAYS interested in the pooled output of the final text encoder
501
- negative_pooled_prompt_embeds = negative_prompt_embeds[0]
517
+ if negative_pooled_prompt_embeds is None and negative_prompt_embeds[0].ndim == 2:
518
+ negative_pooled_prompt_embeds = negative_prompt_embeds[0]
502
519
  negative_prompt_embeds = negative_prompt_embeds.hidden_states[-2]
503
520
 
504
521
  negative_prompt_embeds_list.append(negative_prompt_embeds)
@@ -1261,6 +1278,9 @@ class AnimateDiffSDXLPipeline(
1261
1278
 
1262
1279
  progress_bar.update()
1263
1280
 
1281
+ if XLA_AVAILABLE:
1282
+ xm.mark_step()
1283
+
1264
1284
  # make sure the VAE is in float32 mode, as it overflows in float16
1265
1285
  needs_upcasting = self.vae.dtype == torch.float16 and self.vae.config.force_upcast
1266
1286
 
@@ -22,7 +22,7 @@ import torch.nn.functional as F
22
22
  from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer, CLIPVisionModelWithProjection
23
23
 
24
24
  from ...image_processor import PipelineImageInput, VaeImageProcessor
25
- from ...loaders import IPAdapterMixin, StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
25
+ from ...loaders import FromSingleFileMixin, IPAdapterMixin, StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
26
26
  from ...models import AutoencoderKL, ImageProjection, UNet2DConditionModel, UNetMotionModel
27
27
  from ...models.controlnets.controlnet_sparsectrl import SparseControlNetModel
28
28
  from ...models.lora import adjust_lora_scale_text_encoder
@@ -30,6 +30,7 @@ from ...models.unets.unet_motion_model import MotionAdapter
30
30
  from ...schedulers import KarrasDiffusionSchedulers
31
31
  from ...utils import (
32
32
  USE_PEFT_BACKEND,
33
+ is_torch_xla_available,
33
34
  logging,
34
35
  replace_example_docstring,
35
36
  scale_lora_layers,
@@ -42,8 +43,16 @@ from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
42
43
  from .pipeline_output import AnimateDiffPipelineOutput
43
44
 
44
45
 
46
+ if is_torch_xla_available():
47
+ import torch_xla.core.xla_model as xm
48
+
49
+ XLA_AVAILABLE = True
50
+ else:
51
+ XLA_AVAILABLE = False
52
+
45
53
  logger = logging.get_logger(__name__) # pylint: disable=invalid-name
46
54
 
55
+
47
56
  EXAMPLE_DOC_STRING = """
48
57
  Examples:
49
58
  ```python
@@ -127,6 +136,7 @@ class AnimateDiffSparseControlNetPipeline(
127
136
  IPAdapterMixin,
128
137
  StableDiffusionLoraLoaderMixin,
129
138
  FreeInitMixin,
139
+ FromSingleFileMixin,
130
140
  ):
131
141
  r"""
132
142
  Pipeline for controlled text-to-video generation using the method described in [SparseCtrl: Adding Sparse Controls
@@ -188,7 +198,7 @@ class AnimateDiffSparseControlNetPipeline(
188
198
  feature_extractor=feature_extractor,
189
199
  image_encoder=image_encoder,
190
200
  )
191
- self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
201
+ self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
192
202
  self.video_processor = VideoProcessor(do_resize=False, vae_scale_factor=self.vae_scale_factor)
193
203
  self.control_image_processor = VaeImageProcessor(
194
204
  vae_scale_factor=self.vae_scale_factor, do_convert_rgb=True, do_normalize=False
@@ -994,6 +1004,9 @@ class AnimateDiffSparseControlNetPipeline(
994
1004
  if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
995
1005
  progress_bar.update()
996
1006
 
1007
+ if XLA_AVAILABLE:
1008
+ xm.mark_step()
1009
+
997
1010
  # 11. Post processing
998
1011
  if output_type == "latent":
999
1012
  video = latents