diffusers 0.33.1__py3-none-any.whl → 0.34.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (478) hide show
  1. diffusers/__init__.py +48 -1
  2. diffusers/commands/__init__.py +1 -1
  3. diffusers/commands/diffusers_cli.py +1 -1
  4. diffusers/commands/env.py +1 -1
  5. diffusers/commands/fp16_safetensors.py +1 -1
  6. diffusers/dependency_versions_check.py +1 -1
  7. diffusers/dependency_versions_table.py +1 -1
  8. diffusers/experimental/rl/value_guided_sampling.py +1 -1
  9. diffusers/hooks/faster_cache.py +2 -2
  10. diffusers/hooks/group_offloading.py +128 -29
  11. diffusers/hooks/hooks.py +2 -2
  12. diffusers/hooks/layerwise_casting.py +3 -3
  13. diffusers/hooks/pyramid_attention_broadcast.py +1 -1
  14. diffusers/image_processor.py +7 -2
  15. diffusers/loaders/__init__.py +4 -0
  16. diffusers/loaders/ip_adapter.py +5 -14
  17. diffusers/loaders/lora_base.py +212 -111
  18. diffusers/loaders/lora_conversion_utils.py +275 -34
  19. diffusers/loaders/lora_pipeline.py +1554 -819
  20. diffusers/loaders/peft.py +52 -109
  21. diffusers/loaders/single_file.py +2 -2
  22. diffusers/loaders/single_file_model.py +20 -4
  23. diffusers/loaders/single_file_utils.py +225 -5
  24. diffusers/loaders/textual_inversion.py +3 -2
  25. diffusers/loaders/transformer_flux.py +1 -1
  26. diffusers/loaders/transformer_sd3.py +2 -2
  27. diffusers/loaders/unet.py +2 -16
  28. diffusers/loaders/unet_loader_utils.py +1 -1
  29. diffusers/loaders/utils.py +1 -1
  30. diffusers/models/__init__.py +15 -1
  31. diffusers/models/activations.py +5 -5
  32. diffusers/models/adapter.py +2 -3
  33. diffusers/models/attention.py +4 -4
  34. diffusers/models/attention_flax.py +10 -10
  35. diffusers/models/attention_processor.py +14 -10
  36. diffusers/models/auto_model.py +47 -10
  37. diffusers/models/autoencoders/__init__.py +1 -0
  38. diffusers/models/autoencoders/autoencoder_asym_kl.py +4 -4
  39. diffusers/models/autoencoders/autoencoder_dc.py +3 -3
  40. diffusers/models/autoencoders/autoencoder_kl.py +4 -4
  41. diffusers/models/autoencoders/autoencoder_kl_allegro.py +4 -4
  42. diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +6 -6
  43. diffusers/models/autoencoders/autoencoder_kl_cosmos.py +1108 -0
  44. diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py +2 -2
  45. diffusers/models/autoencoders/autoencoder_kl_ltx.py +3 -3
  46. diffusers/models/autoencoders/autoencoder_kl_magvit.py +4 -4
  47. diffusers/models/autoencoders/autoencoder_kl_mochi.py +3 -3
  48. diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +4 -4
  49. diffusers/models/autoencoders/autoencoder_kl_wan.py +256 -22
  50. diffusers/models/autoencoders/autoencoder_oobleck.py +1 -1
  51. diffusers/models/autoencoders/autoencoder_tiny.py +3 -3
  52. diffusers/models/autoencoders/consistency_decoder_vae.py +1 -1
  53. diffusers/models/autoencoders/vae.py +13 -2
  54. diffusers/models/autoencoders/vq_model.py +2 -2
  55. diffusers/models/cache_utils.py +1 -1
  56. diffusers/models/controlnet.py +1 -1
  57. diffusers/models/controlnet_flux.py +1 -1
  58. diffusers/models/controlnet_sd3.py +1 -1
  59. diffusers/models/controlnet_sparsectrl.py +1 -1
  60. diffusers/models/controlnets/__init__.py +1 -0
  61. diffusers/models/controlnets/controlnet.py +3 -3
  62. diffusers/models/controlnets/controlnet_flax.py +1 -1
  63. diffusers/models/controlnets/controlnet_flux.py +16 -15
  64. diffusers/models/controlnets/controlnet_hunyuan.py +2 -2
  65. diffusers/models/controlnets/controlnet_sana.py +290 -0
  66. diffusers/models/controlnets/controlnet_sd3.py +1 -1
  67. diffusers/models/controlnets/controlnet_sparsectrl.py +2 -2
  68. diffusers/models/controlnets/controlnet_union.py +1 -1
  69. diffusers/models/controlnets/controlnet_xs.py +7 -7
  70. diffusers/models/controlnets/multicontrolnet.py +4 -5
  71. diffusers/models/controlnets/multicontrolnet_union.py +5 -6
  72. diffusers/models/downsampling.py +2 -2
  73. diffusers/models/embeddings.py +10 -12
  74. diffusers/models/embeddings_flax.py +2 -2
  75. diffusers/models/lora.py +3 -3
  76. diffusers/models/modeling_utils.py +44 -14
  77. diffusers/models/normalization.py +4 -4
  78. diffusers/models/resnet.py +2 -2
  79. diffusers/models/resnet_flax.py +1 -1
  80. diffusers/models/transformers/__init__.py +5 -0
  81. diffusers/models/transformers/auraflow_transformer_2d.py +70 -24
  82. diffusers/models/transformers/cogvideox_transformer_3d.py +1 -1
  83. diffusers/models/transformers/consisid_transformer_3d.py +1 -1
  84. diffusers/models/transformers/dit_transformer_2d.py +2 -2
  85. diffusers/models/transformers/dual_transformer_2d.py +1 -1
  86. diffusers/models/transformers/hunyuan_transformer_2d.py +2 -2
  87. diffusers/models/transformers/latte_transformer_3d.py +4 -5
  88. diffusers/models/transformers/lumina_nextdit2d.py +2 -2
  89. diffusers/models/transformers/pixart_transformer_2d.py +3 -3
  90. diffusers/models/transformers/prior_transformer.py +1 -1
  91. diffusers/models/transformers/sana_transformer.py +8 -3
  92. diffusers/models/transformers/stable_audio_transformer.py +5 -9
  93. diffusers/models/transformers/t5_film_transformer.py +3 -3
  94. diffusers/models/transformers/transformer_2d.py +1 -1
  95. diffusers/models/transformers/transformer_allegro.py +1 -1
  96. diffusers/models/transformers/transformer_chroma.py +742 -0
  97. diffusers/models/transformers/transformer_cogview3plus.py +5 -10
  98. diffusers/models/transformers/transformer_cogview4.py +317 -25
  99. diffusers/models/transformers/transformer_cosmos.py +579 -0
  100. diffusers/models/transformers/transformer_flux.py +9 -11
  101. diffusers/models/transformers/transformer_hidream_image.py +942 -0
  102. diffusers/models/transformers/transformer_hunyuan_video.py +6 -8
  103. diffusers/models/transformers/transformer_hunyuan_video_framepack.py +416 -0
  104. diffusers/models/transformers/transformer_ltx.py +2 -2
  105. diffusers/models/transformers/transformer_lumina2.py +1 -1
  106. diffusers/models/transformers/transformer_mochi.py +1 -1
  107. diffusers/models/transformers/transformer_omnigen.py +2 -2
  108. diffusers/models/transformers/transformer_sd3.py +7 -7
  109. diffusers/models/transformers/transformer_temporal.py +1 -1
  110. diffusers/models/transformers/transformer_wan.py +24 -8
  111. diffusers/models/transformers/transformer_wan_vace.py +393 -0
  112. diffusers/models/unets/unet_1d.py +1 -1
  113. diffusers/models/unets/unet_1d_blocks.py +1 -1
  114. diffusers/models/unets/unet_2d.py +1 -1
  115. diffusers/models/unets/unet_2d_blocks.py +1 -1
  116. diffusers/models/unets/unet_2d_blocks_flax.py +8 -7
  117. diffusers/models/unets/unet_2d_condition.py +2 -2
  118. diffusers/models/unets/unet_2d_condition_flax.py +2 -2
  119. diffusers/models/unets/unet_3d_blocks.py +1 -1
  120. diffusers/models/unets/unet_3d_condition.py +3 -3
  121. diffusers/models/unets/unet_i2vgen_xl.py +3 -3
  122. diffusers/models/unets/unet_kandinsky3.py +1 -1
  123. diffusers/models/unets/unet_motion_model.py +2 -2
  124. diffusers/models/unets/unet_stable_cascade.py +1 -1
  125. diffusers/models/upsampling.py +2 -2
  126. diffusers/models/vae_flax.py +2 -2
  127. diffusers/models/vq_model.py +1 -1
  128. diffusers/pipelines/__init__.py +37 -6
  129. diffusers/pipelines/allegro/pipeline_allegro.py +11 -11
  130. diffusers/pipelines/amused/pipeline_amused.py +7 -6
  131. diffusers/pipelines/amused/pipeline_amused_img2img.py +6 -5
  132. diffusers/pipelines/amused/pipeline_amused_inpaint.py +6 -5
  133. diffusers/pipelines/animatediff/pipeline_animatediff.py +6 -6
  134. diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +6 -6
  135. diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +16 -15
  136. diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +6 -6
  137. diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +5 -5
  138. diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +5 -5
  139. diffusers/pipelines/audioldm/pipeline_audioldm.py +8 -7
  140. diffusers/pipelines/audioldm2/modeling_audioldm2.py +1 -1
  141. diffusers/pipelines/audioldm2/pipeline_audioldm2.py +23 -13
  142. diffusers/pipelines/aura_flow/pipeline_aura_flow.py +48 -11
  143. diffusers/pipelines/auto_pipeline.py +6 -7
  144. diffusers/pipelines/blip_diffusion/modeling_blip2.py +1 -1
  145. diffusers/pipelines/blip_diffusion/modeling_ctx_clip.py +2 -2
  146. diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +11 -10
  147. diffusers/pipelines/chroma/__init__.py +49 -0
  148. diffusers/pipelines/chroma/pipeline_chroma.py +949 -0
  149. diffusers/pipelines/chroma/pipeline_chroma_img2img.py +1034 -0
  150. diffusers/pipelines/chroma/pipeline_output.py +21 -0
  151. diffusers/pipelines/cogvideo/pipeline_cogvideox.py +8 -8
  152. diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +8 -8
  153. diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +8 -8
  154. diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +8 -8
  155. diffusers/pipelines/cogview3/pipeline_cogview3plus.py +9 -9
  156. diffusers/pipelines/cogview4/pipeline_cogview4.py +7 -7
  157. diffusers/pipelines/cogview4/pipeline_cogview4_control.py +7 -7
  158. diffusers/pipelines/consisid/consisid_utils.py +2 -2
  159. diffusers/pipelines/consisid/pipeline_consisid.py +8 -8
  160. diffusers/pipelines/consistency_models/pipeline_consistency_models.py +1 -1
  161. diffusers/pipelines/controlnet/pipeline_controlnet.py +7 -7
  162. diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +8 -8
  163. diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +7 -7
  164. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +7 -7
  165. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +14 -14
  166. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +10 -6
  167. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +13 -13
  168. diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +14 -14
  169. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +5 -5
  170. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +13 -13
  171. diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +1 -1
  172. diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +8 -8
  173. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +7 -7
  174. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +7 -7
  175. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +12 -10
  176. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +9 -7
  177. diffusers/pipelines/cosmos/__init__.py +54 -0
  178. diffusers/pipelines/cosmos/pipeline_cosmos2_text2image.py +673 -0
  179. diffusers/pipelines/cosmos/pipeline_cosmos2_video2world.py +792 -0
  180. diffusers/pipelines/cosmos/pipeline_cosmos_text2world.py +664 -0
  181. diffusers/pipelines/cosmos/pipeline_cosmos_video2world.py +826 -0
  182. diffusers/pipelines/cosmos/pipeline_output.py +40 -0
  183. diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py +5 -4
  184. diffusers/pipelines/ddim/pipeline_ddim.py +4 -4
  185. diffusers/pipelines/ddpm/pipeline_ddpm.py +1 -1
  186. diffusers/pipelines/deepfloyd_if/pipeline_if.py +10 -10
  187. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +10 -10
  188. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +10 -10
  189. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +10 -10
  190. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +10 -10
  191. diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +10 -10
  192. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +8 -8
  193. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +5 -5
  194. diffusers/pipelines/deprecated/audio_diffusion/mel.py +1 -1
  195. diffusers/pipelines/deprecated/audio_diffusion/pipeline_audio_diffusion.py +3 -3
  196. diffusers/pipelines/deprecated/latent_diffusion_uncond/pipeline_latent_diffusion_uncond.py +1 -1
  197. diffusers/pipelines/deprecated/pndm/pipeline_pndm.py +2 -2
  198. diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +4 -3
  199. diffusers/pipelines/deprecated/score_sde_ve/pipeline_score_sde_ve.py +1 -1
  200. diffusers/pipelines/deprecated/spectrogram_diffusion/continuous_encoder.py +1 -1
  201. diffusers/pipelines/deprecated/spectrogram_diffusion/midi_utils.py +1 -1
  202. diffusers/pipelines/deprecated/spectrogram_diffusion/notes_encoder.py +1 -1
  203. diffusers/pipelines/deprecated/spectrogram_diffusion/pipeline_spectrogram_diffusion.py +1 -1
  204. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +7 -7
  205. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_onnx_stable_diffusion_inpaint_legacy.py +9 -9
  206. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +10 -10
  207. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +10 -8
  208. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +5 -5
  209. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +18 -18
  210. diffusers/pipelines/deprecated/stochastic_karras_ve/pipeline_stochastic_karras_ve.py +1 -1
  211. diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +2 -2
  212. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +6 -6
  213. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +5 -5
  214. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +5 -5
  215. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +5 -5
  216. diffusers/pipelines/deprecated/vq_diffusion/pipeline_vq_diffusion.py +1 -1
  217. diffusers/pipelines/dit/pipeline_dit.py +1 -1
  218. diffusers/pipelines/easyanimate/pipeline_easyanimate.py +4 -4
  219. diffusers/pipelines/easyanimate/pipeline_easyanimate_control.py +4 -4
  220. diffusers/pipelines/easyanimate/pipeline_easyanimate_inpaint.py +7 -6
  221. diffusers/pipelines/flux/modeling_flux.py +1 -1
  222. diffusers/pipelines/flux/pipeline_flux.py +10 -17
  223. diffusers/pipelines/flux/pipeline_flux_control.py +6 -6
  224. diffusers/pipelines/flux/pipeline_flux_control_img2img.py +6 -6
  225. diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +6 -6
  226. diffusers/pipelines/flux/pipeline_flux_controlnet.py +6 -6
  227. diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +30 -22
  228. diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +2 -1
  229. diffusers/pipelines/flux/pipeline_flux_fill.py +6 -6
  230. diffusers/pipelines/flux/pipeline_flux_img2img.py +39 -6
  231. diffusers/pipelines/flux/pipeline_flux_inpaint.py +11 -6
  232. diffusers/pipelines/flux/pipeline_flux_prior_redux.py +1 -1
  233. diffusers/pipelines/free_init_utils.py +2 -2
  234. diffusers/pipelines/free_noise_utils.py +3 -3
  235. diffusers/pipelines/hidream_image/__init__.py +47 -0
  236. diffusers/pipelines/hidream_image/pipeline_hidream_image.py +1026 -0
  237. diffusers/pipelines/hidream_image/pipeline_output.py +35 -0
  238. diffusers/pipelines/hunyuan_video/__init__.py +2 -0
  239. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_skyreels_image2video.py +8 -8
  240. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +8 -8
  241. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_framepack.py +1114 -0
  242. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_image2video.py +71 -15
  243. diffusers/pipelines/hunyuan_video/pipeline_output.py +19 -0
  244. diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +8 -8
  245. diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +10 -8
  246. diffusers/pipelines/kandinsky/pipeline_kandinsky.py +6 -6
  247. diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +34 -34
  248. diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +19 -26
  249. diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +7 -7
  250. diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +11 -11
  251. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +6 -6
  252. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +35 -35
  253. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +6 -6
  254. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +17 -39
  255. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +17 -45
  256. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +7 -7
  257. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +10 -10
  258. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +10 -10
  259. diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +7 -7
  260. diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +17 -38
  261. diffusers/pipelines/kolors/pipeline_kolors.py +10 -10
  262. diffusers/pipelines/kolors/pipeline_kolors_img2img.py +12 -12
  263. diffusers/pipelines/kolors/text_encoder.py +3 -3
  264. diffusers/pipelines/kolors/tokenizer.py +1 -1
  265. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +2 -2
  266. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +2 -2
  267. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +1 -1
  268. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion_superresolution.py +3 -3
  269. diffusers/pipelines/latte/pipeline_latte.py +12 -12
  270. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +13 -13
  271. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +17 -16
  272. diffusers/pipelines/ltx/__init__.py +4 -0
  273. diffusers/pipelines/ltx/modeling_latent_upsampler.py +188 -0
  274. diffusers/pipelines/ltx/pipeline_ltx.py +51 -6
  275. diffusers/pipelines/ltx/pipeline_ltx_condition.py +107 -29
  276. diffusers/pipelines/ltx/pipeline_ltx_image2video.py +50 -6
  277. diffusers/pipelines/ltx/pipeline_ltx_latent_upsample.py +277 -0
  278. diffusers/pipelines/lumina/pipeline_lumina.py +13 -13
  279. diffusers/pipelines/lumina2/pipeline_lumina2.py +10 -10
  280. diffusers/pipelines/marigold/marigold_image_processing.py +2 -2
  281. diffusers/pipelines/mochi/pipeline_mochi.py +6 -6
  282. diffusers/pipelines/musicldm/pipeline_musicldm.py +16 -13
  283. diffusers/pipelines/omnigen/pipeline_omnigen.py +13 -11
  284. diffusers/pipelines/omnigen/processor_omnigen.py +8 -3
  285. diffusers/pipelines/onnx_utils.py +15 -2
  286. diffusers/pipelines/pag/pag_utils.py +2 -2
  287. diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +12 -8
  288. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +7 -7
  289. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +10 -6
  290. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +14 -14
  291. diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +8 -8
  292. diffusers/pipelines/pag/pipeline_pag_kolors.py +10 -10
  293. diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +11 -11
  294. diffusers/pipelines/pag/pipeline_pag_sana.py +18 -12
  295. diffusers/pipelines/pag/pipeline_pag_sd.py +8 -8
  296. diffusers/pipelines/pag/pipeline_pag_sd_3.py +7 -7
  297. diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py +7 -7
  298. diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +6 -6
  299. diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +5 -5
  300. diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py +8 -8
  301. diffusers/pipelines/pag/pipeline_pag_sd_xl.py +16 -15
  302. diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +18 -17
  303. diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +12 -12
  304. diffusers/pipelines/paint_by_example/image_encoder.py +1 -1
  305. diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +8 -7
  306. diffusers/pipelines/pia/pipeline_pia.py +8 -6
  307. diffusers/pipelines/pipeline_flax_utils.py +3 -4
  308. diffusers/pipelines/pipeline_loading_utils.py +89 -13
  309. diffusers/pipelines/pipeline_utils.py +105 -33
  310. diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +11 -11
  311. diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +11 -11
  312. diffusers/pipelines/sana/__init__.py +4 -0
  313. diffusers/pipelines/sana/pipeline_sana.py +23 -21
  314. diffusers/pipelines/sana/pipeline_sana_controlnet.py +1106 -0
  315. diffusers/pipelines/sana/pipeline_sana_sprint.py +23 -19
  316. diffusers/pipelines/sana/pipeline_sana_sprint_img2img.py +981 -0
  317. diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +7 -6
  318. diffusers/pipelines/shap_e/camera.py +1 -1
  319. diffusers/pipelines/shap_e/pipeline_shap_e.py +1 -1
  320. diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +1 -1
  321. diffusers/pipelines/shap_e/renderer.py +3 -3
  322. diffusers/pipelines/stable_audio/modeling_stable_audio.py +1 -1
  323. diffusers/pipelines/stable_audio/pipeline_stable_audio.py +5 -5
  324. diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +8 -8
  325. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +13 -13
  326. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +9 -9
  327. diffusers/pipelines/stable_diffusion/__init__.py +0 -7
  328. diffusers/pipelines/stable_diffusion/clip_image_project_model.py +1 -1
  329. diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +11 -4
  330. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +1 -1
  331. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_img2img.py +1 -1
  332. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_inpaint.py +1 -1
  333. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +10 -10
  334. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py +10 -10
  335. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +10 -10
  336. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +9 -9
  337. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +8 -8
  338. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +5 -5
  339. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +5 -5
  340. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +5 -5
  341. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +5 -5
  342. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +5 -5
  343. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +4 -4
  344. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +5 -5
  345. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +7 -7
  346. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +5 -5
  347. diffusers/pipelines/stable_diffusion/safety_checker.py +1 -1
  348. diffusers/pipelines/stable_diffusion/safety_checker_flax.py +1 -1
  349. diffusers/pipelines/stable_diffusion/stable_unclip_image_normalizer.py +1 -1
  350. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +7 -7
  351. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +7 -7
  352. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +7 -7
  353. diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +12 -8
  354. diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +15 -9
  355. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +11 -9
  356. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +11 -9
  357. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +18 -12
  358. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +11 -8
  359. diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +11 -8
  360. diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +15 -12
  361. diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +8 -6
  362. diffusers/pipelines/stable_diffusion_safe/safety_checker.py +1 -1
  363. diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +15 -11
  364. diffusers/pipelines/stable_diffusion_xl/pipeline_flax_stable_diffusion_xl.py +1 -1
  365. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +16 -15
  366. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +18 -17
  367. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +12 -12
  368. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +16 -15
  369. diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +3 -3
  370. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +12 -12
  371. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +18 -17
  372. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +12 -7
  373. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +12 -7
  374. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +15 -13
  375. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +24 -21
  376. diffusers/pipelines/unclip/pipeline_unclip.py +4 -3
  377. diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +4 -3
  378. diffusers/pipelines/unclip/text_proj.py +2 -2
  379. diffusers/pipelines/unidiffuser/modeling_text_decoder.py +2 -2
  380. diffusers/pipelines/unidiffuser/modeling_uvit.py +1 -1
  381. diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +8 -7
  382. diffusers/pipelines/visualcloze/__init__.py +52 -0
  383. diffusers/pipelines/visualcloze/pipeline_visualcloze_combined.py +444 -0
  384. diffusers/pipelines/visualcloze/pipeline_visualcloze_generation.py +952 -0
  385. diffusers/pipelines/visualcloze/visualcloze_utils.py +251 -0
  386. diffusers/pipelines/wan/__init__.py +2 -0
  387. diffusers/pipelines/wan/pipeline_wan.py +13 -10
  388. diffusers/pipelines/wan/pipeline_wan_i2v.py +38 -18
  389. diffusers/pipelines/wan/pipeline_wan_vace.py +976 -0
  390. diffusers/pipelines/wan/pipeline_wan_video2video.py +14 -16
  391. diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py +1 -1
  392. diffusers/pipelines/wuerstchen/modeling_wuerstchen_diffnext.py +1 -1
  393. diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +1 -1
  394. diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +8 -8
  395. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_combined.py +16 -15
  396. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +6 -6
  397. diffusers/quantizers/__init__.py +179 -1
  398. diffusers/quantizers/base.py +6 -1
  399. diffusers/quantizers/bitsandbytes/bnb_quantizer.py +4 -0
  400. diffusers/quantizers/bitsandbytes/utils.py +10 -7
  401. diffusers/quantizers/gguf/gguf_quantizer.py +13 -4
  402. diffusers/quantizers/gguf/utils.py +16 -13
  403. diffusers/quantizers/quantization_config.py +18 -16
  404. diffusers/quantizers/quanto/quanto_quantizer.py +4 -0
  405. diffusers/quantizers/torchao/torchao_quantizer.py +5 -1
  406. diffusers/schedulers/__init__.py +3 -1
  407. diffusers/schedulers/deprecated/scheduling_karras_ve.py +4 -3
  408. diffusers/schedulers/deprecated/scheduling_sde_vp.py +1 -1
  409. diffusers/schedulers/scheduling_consistency_models.py +1 -1
  410. diffusers/schedulers/scheduling_cosine_dpmsolver_multistep.py +10 -5
  411. diffusers/schedulers/scheduling_ddim.py +8 -8
  412. diffusers/schedulers/scheduling_ddim_cogvideox.py +5 -5
  413. diffusers/schedulers/scheduling_ddim_flax.py +6 -6
  414. diffusers/schedulers/scheduling_ddim_inverse.py +6 -6
  415. diffusers/schedulers/scheduling_ddim_parallel.py +22 -22
  416. diffusers/schedulers/scheduling_ddpm.py +9 -9
  417. diffusers/schedulers/scheduling_ddpm_flax.py +7 -7
  418. diffusers/schedulers/scheduling_ddpm_parallel.py +18 -18
  419. diffusers/schedulers/scheduling_ddpm_wuerstchen.py +2 -2
  420. diffusers/schedulers/scheduling_deis_multistep.py +8 -8
  421. diffusers/schedulers/scheduling_dpm_cogvideox.py +5 -5
  422. diffusers/schedulers/scheduling_dpmsolver_multistep.py +12 -12
  423. diffusers/schedulers/scheduling_dpmsolver_multistep_flax.py +22 -20
  424. diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +11 -11
  425. diffusers/schedulers/scheduling_dpmsolver_sde.py +2 -2
  426. diffusers/schedulers/scheduling_dpmsolver_singlestep.py +13 -13
  427. diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py +13 -8
  428. diffusers/schedulers/scheduling_edm_euler.py +20 -11
  429. diffusers/schedulers/scheduling_euler_ancestral_discrete.py +3 -3
  430. diffusers/schedulers/scheduling_euler_discrete.py +3 -3
  431. diffusers/schedulers/scheduling_euler_discrete_flax.py +3 -3
  432. diffusers/schedulers/scheduling_flow_match_euler_discrete.py +20 -5
  433. diffusers/schedulers/scheduling_flow_match_heun_discrete.py +1 -1
  434. diffusers/schedulers/scheduling_flow_match_lcm.py +561 -0
  435. diffusers/schedulers/scheduling_heun_discrete.py +2 -2
  436. diffusers/schedulers/scheduling_ipndm.py +2 -2
  437. diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +2 -2
  438. diffusers/schedulers/scheduling_k_dpm_2_discrete.py +2 -2
  439. diffusers/schedulers/scheduling_karras_ve_flax.py +5 -5
  440. diffusers/schedulers/scheduling_lcm.py +3 -3
  441. diffusers/schedulers/scheduling_lms_discrete.py +2 -2
  442. diffusers/schedulers/scheduling_lms_discrete_flax.py +1 -1
  443. diffusers/schedulers/scheduling_pndm.py +4 -4
  444. diffusers/schedulers/scheduling_pndm_flax.py +4 -4
  445. diffusers/schedulers/scheduling_repaint.py +9 -9
  446. diffusers/schedulers/scheduling_sasolver.py +15 -15
  447. diffusers/schedulers/scheduling_scm.py +1 -1
  448. diffusers/schedulers/scheduling_sde_ve.py +1 -1
  449. diffusers/schedulers/scheduling_sde_ve_flax.py +2 -2
  450. diffusers/schedulers/scheduling_tcd.py +3 -3
  451. diffusers/schedulers/scheduling_unclip.py +5 -5
  452. diffusers/schedulers/scheduling_unipc_multistep.py +11 -11
  453. diffusers/schedulers/scheduling_utils.py +1 -1
  454. diffusers/schedulers/scheduling_utils_flax.py +1 -1
  455. diffusers/schedulers/scheduling_vq_diffusion.py +1 -1
  456. diffusers/training_utils.py +13 -5
  457. diffusers/utils/__init__.py +5 -0
  458. diffusers/utils/accelerate_utils.py +1 -1
  459. diffusers/utils/doc_utils.py +1 -1
  460. diffusers/utils/dummy_pt_objects.py +120 -0
  461. diffusers/utils/dummy_torch_and_transformers_objects.py +225 -0
  462. diffusers/utils/dynamic_modules_utils.py +21 -3
  463. diffusers/utils/export_utils.py +1 -1
  464. diffusers/utils/import_utils.py +81 -18
  465. diffusers/utils/logging.py +1 -1
  466. diffusers/utils/outputs.py +2 -1
  467. diffusers/utils/peft_utils.py +91 -8
  468. diffusers/utils/state_dict_utils.py +20 -3
  469. diffusers/utils/testing_utils.py +59 -7
  470. diffusers/utils/torch_utils.py +25 -5
  471. diffusers/video_processor.py +2 -2
  472. {diffusers-0.33.1.dist-info → diffusers-0.34.0.dist-info}/METADATA +70 -55
  473. diffusers-0.34.0.dist-info/RECORD +639 -0
  474. {diffusers-0.33.1.dist-info → diffusers-0.34.0.dist-info}/WHEEL +1 -1
  475. diffusers-0.33.1.dist-info/RECORD +0 -608
  476. {diffusers-0.33.1.dist-info → diffusers-0.34.0.dist-info}/LICENSE +0 -0
  477. {diffusers-0.33.1.dist-info → diffusers-0.34.0.dist-info}/entry_points.txt +0 -0
  478. {diffusers-0.33.1.dist-info → diffusers-0.34.0.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
1
- # Copyright 2024 The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -33,7 +33,7 @@ from ...utils import (
33
33
  )
34
34
  from ...utils.torch_utils import randn_tensor
35
35
  from ...video_processor import VideoProcessor
36
- from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
36
+ from ..pipeline_utils import DeprecatedPipelineMixin, DiffusionPipeline, StableDiffusionMixin
37
37
  from . import TextToVideoSDPipelineOutput
38
38
 
39
39
 
@@ -68,8 +68,13 @@ EXAMPLE_DOC_STRING = """
68
68
 
69
69
 
70
70
  class TextToVideoSDPipeline(
71
- DiffusionPipeline, StableDiffusionMixin, TextualInversionLoaderMixin, StableDiffusionLoraLoaderMixin
71
+ DeprecatedPipelineMixin,
72
+ DiffusionPipeline,
73
+ StableDiffusionMixin,
74
+ TextualInversionLoaderMixin,
75
+ StableDiffusionLoraLoaderMixin,
72
76
  ):
77
+ _last_supported_version = "0.33.1"
73
78
  r"""
74
79
  Pipeline for text-to-video generation.
75
80
 
@@ -349,7 +354,7 @@ class TextToVideoSDPipeline(
349
354
  def prepare_extra_step_kwargs(self, generator, eta):
350
355
  # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
351
356
  # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
352
- # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
357
+ # eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
353
358
  # and should be between [0, 1]
354
359
 
355
360
  accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
@@ -489,8 +494,8 @@ class TextToVideoSDPipeline(
489
494
  num_images_per_prompt (`int`, *optional*, defaults to 1):
490
495
  The number of images to generate per prompt.
491
496
  eta (`float`, *optional*, defaults to 0.0):
492
- Corresponds to parameter eta (η) from the [DDIM](https://arxiv.org/abs/2010.02502) paper. Only applies
493
- to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
497
+ Corresponds to parameter eta (η) from the [DDIM](https://huggingface.co/papers/2010.02502) paper. Only
498
+ applies to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
494
499
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
495
500
  A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
496
501
  generation deterministic.
@@ -550,7 +555,7 @@ class TextToVideoSDPipeline(
550
555
 
551
556
  device = self._execution_device
552
557
  # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
553
- # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
558
+ # of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
554
559
  # corresponds to doing no classifier free guidance.
555
560
  do_classifier_free_guidance = guidance_scale > 1.0
556
561
 
@@ -1,4 +1,4 @@
1
- # Copyright 2024 The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -34,7 +34,7 @@ from ...utils import (
34
34
  )
35
35
  from ...utils.torch_utils import randn_tensor
36
36
  from ...video_processor import VideoProcessor
37
- from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
37
+ from ..pipeline_utils import DeprecatedPipelineMixin, DiffusionPipeline, StableDiffusionMixin
38
38
  from . import TextToVideoSDPipelineOutput
39
39
 
40
40
 
@@ -103,8 +103,13 @@ def retrieve_latents(
103
103
 
104
104
 
105
105
  class VideoToVideoSDPipeline(
106
- DiffusionPipeline, StableDiffusionMixin, TextualInversionLoaderMixin, StableDiffusionLoraLoaderMixin
106
+ DeprecatedPipelineMixin,
107
+ DiffusionPipeline,
108
+ StableDiffusionMixin,
109
+ TextualInversionLoaderMixin,
110
+ StableDiffusionLoraLoaderMixin,
107
111
  ):
112
+ _last_supported_version = "0.33.1"
108
113
  r"""
109
114
  Pipeline for text-guided video-to-video generation.
110
115
 
@@ -385,7 +390,7 @@ class VideoToVideoSDPipeline(
385
390
  def prepare_extra_step_kwargs(self, generator, eta):
386
391
  # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
387
392
  # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
388
- # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
393
+ # eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
389
394
  # and should be between [0, 1]
390
395
 
391
396
  accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
@@ -553,8 +558,8 @@ class VideoToVideoSDPipeline(
553
558
  The prompt or prompts to guide what to not include in video generation. If not defined, you need to
554
559
  pass `negative_prompt_embeds` instead. Ignored when not using guidance (`guidance_scale < 1`).
555
560
  eta (`float`, *optional*, defaults to 0.0):
556
- Corresponds to parameter eta (η) from the [DDIM](https://arxiv.org/abs/2010.02502) paper. Only applies
557
- to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
561
+ Corresponds to parameter eta (η) from the [DDIM](https://huggingface.co/papers/2010.02502) paper. Only
562
+ applies to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
558
563
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
559
564
  A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
560
565
  generation deterministic.
@@ -609,7 +614,7 @@ class VideoToVideoSDPipeline(
609
614
 
610
615
  device = self._execution_device
611
616
  # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
612
- # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
617
+ # of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
613
618
  # corresponds to doing no classifier free guidance.
614
619
  do_classifier_free_guidance = guidance_scale > 1.0
615
620
 
@@ -23,8 +23,8 @@ from ...utils import (
23
23
  scale_lora_layers,
24
24
  unscale_lora_layers,
25
25
  )
26
- from ...utils.torch_utils import randn_tensor
27
- from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
26
+ from ...utils.torch_utils import empty_device_cache, randn_tensor
27
+ from ..pipeline_utils import DeprecatedPipelineMixin, DiffusionPipeline, StableDiffusionMixin
28
28
  from ..stable_diffusion import StableDiffusionSafetyChecker
29
29
 
30
30
 
@@ -296,12 +296,14 @@ def create_motion_field_and_warp_latents(motion_field_strength_x, motion_field_s
296
296
 
297
297
 
298
298
  class TextToVideoZeroPipeline(
299
+ DeprecatedPipelineMixin,
299
300
  DiffusionPipeline,
300
301
  StableDiffusionMixin,
301
302
  TextualInversionLoaderMixin,
302
303
  StableDiffusionLoraLoaderMixin,
303
304
  FromSingleFileMixin,
304
305
  ):
306
+ _last_supported_version = "0.33.1"
305
307
  r"""
306
308
  Pipeline for zero-shot text-to-video generation using Stable Diffusion.
307
309
 
@@ -588,8 +590,8 @@ class TextToVideoZeroPipeline(
588
590
  num_videos_per_prompt (`int`, *optional*, defaults to 1):
589
591
  The number of videos to generate per prompt.
590
592
  eta (`float`, *optional*, defaults to 0.0):
591
- Corresponds to parameter eta (η) from the [DDIM](https://arxiv.org/abs/2010.02502) paper. Only applies
592
- to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
593
+ Corresponds to parameter eta (η) from the [DDIM](https://huggingface.co/papers/2010.02502) paper. Only
594
+ applies to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
593
595
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
594
596
  A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
595
597
  generation deterministic.
@@ -610,17 +612,17 @@ class TextToVideoZeroPipeline(
610
612
  The frequency at which the `callback` function is called. If not specified, the callback is called at
611
613
  every step.
612
614
  motion_field_strength_x (`float`, *optional*, defaults to 12):
613
- Strength of motion in generated video along x-axis. See the [paper](https://arxiv.org/abs/2303.13439),
614
- Sect. 3.3.1.
615
+ Strength of motion in generated video along x-axis. See the
616
+ [paper](https://huggingface.co/papers/2303.13439), Sect. 3.3.1.
615
617
  motion_field_strength_y (`float`, *optional*, defaults to 12):
616
- Strength of motion in generated video along y-axis. See the [paper](https://arxiv.org/abs/2303.13439),
617
- Sect. 3.3.1.
618
+ Strength of motion in generated video along y-axis. See the
619
+ [paper](https://huggingface.co/papers/2303.13439), Sect. 3.3.1.
618
620
  t0 (`int`, *optional*, defaults to 44):
619
621
  Timestep t0. Should be in the range [0, num_inference_steps - 1]. See the
620
- [paper](https://arxiv.org/abs/2303.13439), Sect. 3.3.1.
622
+ [paper](https://huggingface.co/papers/2303.13439), Sect. 3.3.1.
621
623
  t1 (`int`, *optional*, defaults to 47):
622
624
  Timestep t0. Should be in the range [t0 + 1, num_inference_steps - 1]. See the
623
- [paper](https://arxiv.org/abs/2303.13439), Sect. 3.3.1.
625
+ [paper](https://huggingface.co/papers/2303.13439), Sect. 3.3.1.
624
626
  frame_ids (`List[int]`, *optional*):
625
627
  Indexes of the frames that are being generated. This is used when generating longer videos
626
628
  chunk-by-chunk.
@@ -663,7 +665,7 @@ class TextToVideoZeroPipeline(
663
665
  batch_size = 1 if isinstance(prompt, str) else len(prompt)
664
666
  device = self._execution_device
665
667
  # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
666
- # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
668
+ # of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
667
669
  # corresponds to doing no classifier free guidance.
668
670
  do_classifier_free_guidance = guidance_scale > 1.0
669
671
 
@@ -758,7 +760,7 @@ class TextToVideoZeroPipeline(
758
760
  # manually for max memory savings
759
761
  if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None:
760
762
  self.unet.to("cpu")
761
- torch.cuda.empty_cache()
763
+ empty_device_cache()
762
764
 
763
765
  if output_type == "latent":
764
766
  image = latents
@@ -797,7 +799,7 @@ class TextToVideoZeroPipeline(
797
799
  def prepare_extra_step_kwargs(self, generator, eta):
798
800
  # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
799
801
  # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
800
- # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
802
+ # eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
801
803
  # and should be between [0, 1]
802
804
 
803
805
  accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
@@ -35,7 +35,7 @@ from ...utils import (
35
35
  unscale_lora_layers,
36
36
  )
37
37
  from ...utils.torch_utils import randn_tensor
38
- from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
38
+ from ..pipeline_utils import DeprecatedPipelineMixin, DiffusionPipeline, StableDiffusionMixin
39
39
 
40
40
 
41
41
  if is_invisible_watermark_available():
@@ -323,7 +323,7 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
323
323
  r"""
324
324
  Rescales `noise_cfg` tensor based on `guidance_rescale` to improve image quality and fix overexposure. Based on
325
325
  Section 3.4 from [Common Diffusion Noise Schedules and Sample Steps are
326
- Flawed](https://arxiv.org/pdf/2305.08891.pdf).
326
+ Flawed](https://huggingface.co/papers/2305.08891).
327
327
 
328
328
  Args:
329
329
  noise_cfg (`torch.Tensor`):
@@ -346,11 +346,13 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
346
346
 
347
347
 
348
348
  class TextToVideoZeroSDXLPipeline(
349
+ DeprecatedPipelineMixin,
349
350
  DiffusionPipeline,
350
351
  StableDiffusionMixin,
351
352
  StableDiffusionXLLoraLoaderMixin,
352
353
  TextualInversionLoaderMixin,
353
354
  ):
355
+ _last_supported_version = "0.33.1"
354
356
  r"""
355
357
  Pipeline for zero-shot text-to-video generation using Stable Diffusion XL.
356
358
 
@@ -439,7 +441,7 @@ class TextToVideoZeroSDXLPipeline(
439
441
  def prepare_extra_step_kwargs(self, generator, eta):
440
442
  # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
441
443
  # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
442
- # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
444
+ # eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
443
445
  # and should be between [0, 1]
444
446
 
445
447
  accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
@@ -929,7 +931,7 @@ class TextToVideoZeroSDXLPipeline(
929
931
  noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
930
932
 
931
933
  if do_classifier_free_guidance and guidance_rescale > 0.0:
932
- # Based on 3.4. in https://arxiv.org/pdf/2305.08891.pdf
934
+ # Based on 3.4. in https://huggingface.co/papers/2305.08891
933
935
  noise_pred = rescale_noise_cfg(noise_pred, noise_pred_text, guidance_rescale=guidance_rescale)
934
936
 
935
937
  # compute the previous noisy sample x_t -> x_t-1
@@ -1009,11 +1011,11 @@ class TextToVideoZeroSDXLPipeline(
1009
1011
  "Mixture of Denoisers" multi-pipeline setup, as elaborated in [**Refining the Image
1010
1012
  Output**](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/stable_diffusion_xl#refining-the-image-output)
1011
1013
  guidance_scale (`float`, *optional*, defaults to 7.5):
1012
- Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
1013
- `guidance_scale` is defined as `w` of equation 2. of [Imagen
1014
- Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
1015
- 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`,
1016
- usually at the expense of lower image quality.
1014
+ Guidance scale as defined in [Classifier-Free Diffusion
1015
+ Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
1016
+ of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
1017
+ `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
1018
+ the text `prompt`, usually at the expense of lower image quality.
1017
1019
  negative_prompt (`str` or `List[str]`, *optional*):
1018
1020
  The prompt or prompts not to guide the image generation. If not defined, one has to pass
1019
1021
  `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
@@ -1024,8 +1026,8 @@ class TextToVideoZeroSDXLPipeline(
1024
1026
  num_videos_per_prompt (`int`, *optional*, defaults to 1):
1025
1027
  The number of videos to generate per prompt.
1026
1028
  eta (`float`, *optional*, defaults to 0.0):
1027
- Corresponds to parameter eta (η) in the DDIM paper: https://arxiv.org/abs/2010.02502. Only applies to
1028
- [`schedulers.DDIMScheduler`], will be ignored for others.
1029
+ Corresponds to parameter eta (η) in the DDIM paper: https://huggingface.co/papers/2010.02502. Only
1030
+ applies to [`schedulers.DDIMScheduler`], will be ignored for others.
1029
1031
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
1030
1032
  One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
1031
1033
  to make generation deterministic.
@@ -1051,11 +1053,11 @@ class TextToVideoZeroSDXLPipeline(
1051
1053
  generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
1052
1054
  tensor will ge generated by sampling using the supplied random `generator`.
1053
1055
  motion_field_strength_x (`float`, *optional*, defaults to 12):
1054
- Strength of motion in generated video along x-axis. See the [paper](https://arxiv.org/abs/2303.13439),
1055
- Sect. 3.3.1.
1056
+ Strength of motion in generated video along x-axis. See the
1057
+ [paper](https://huggingface.co/papers/2303.13439), Sect. 3.3.1.
1056
1058
  motion_field_strength_y (`float`, *optional*, defaults to 12):
1057
- Strength of motion in generated video along y-axis. See the [paper](https://arxiv.org/abs/2303.13439),
1058
- Sect. 3.3.1.
1059
+ Strength of motion in generated video along y-axis. See the
1060
+ [paper](https://huggingface.co/papers/2303.13439), Sect. 3.3.1.
1059
1061
  output_type (`str`, *optional*, defaults to `"pil"`):
1060
1062
  The output format of the generate image. Choose between
1061
1063
  [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`.
@@ -1074,9 +1076,10 @@ class TextToVideoZeroSDXLPipeline(
1074
1076
  [diffusers.cross_attention](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/cross_attention.py).
1075
1077
  guidance_rescale (`float`, *optional*, defaults to 0.7):
1076
1078
  Guidance rescale factor proposed by [Common Diffusion Noise Schedules and Sample Steps are
1077
- Flawed](https://arxiv.org/pdf/2305.08891.pdf) `guidance_scale` is defined as `φ` in equation 16. of
1078
- [Common Diffusion Noise Schedules and Sample Steps are Flawed](https://arxiv.org/pdf/2305.08891.pdf).
1079
- Guidance rescale factor should fix overexposure when using zero terminal SNR.
1079
+ Flawed](https://huggingface.co/papers/2305.08891) `guidance_scale` is defined as `φ` in equation 16. of
1080
+ [Common Diffusion Noise Schedules and Sample Steps are
1081
+ Flawed](https://huggingface.co/papers/2305.08891). Guidance rescale factor should fix overexposure when
1082
+ using zero terminal SNR.
1080
1083
  original_size (`Tuple[int]`, *optional*, defaults to (1024, 1024)):
1081
1084
  If `original_size` is not the same as `target_size` the image will appear to be down- or upsampled.
1082
1085
  `original_size` defaults to `(width, height)` if not specified. Part of SDXL's micro-conditioning as
@@ -1093,10 +1096,10 @@ class TextToVideoZeroSDXLPipeline(
1093
1096
  section 2.2 of [https://huggingface.co/papers/2307.01952](https://huggingface.co/papers/2307.01952).
1094
1097
  t0 (`int`, *optional*, defaults to 44):
1095
1098
  Timestep t0. Should be in the range [0, num_inference_steps - 1]. See the
1096
- [paper](https://arxiv.org/abs/2303.13439), Sect. 3.3.1.
1099
+ [paper](https://huggingface.co/papers/2303.13439), Sect. 3.3.1.
1097
1100
  t1 (`int`, *optional*, defaults to 47):
1098
1101
  Timestep t0. Should be in the range [t0 + 1, num_inference_steps - 1]. See the
1099
- [paper](https://arxiv.org/abs/2303.13439), Sect. 3.3.1.
1102
+ [paper](https://huggingface.co/papers/2303.13439), Sect. 3.3.1.
1100
1103
 
1101
1104
  Returns:
1102
1105
  [`~pipelines.text_to_video_synthesis.pipeline_text_to_video_zero.TextToVideoSDXLPipelineOutput`] or
@@ -1153,7 +1156,7 @@ class TextToVideoZeroSDXLPipeline(
1153
1156
  )
1154
1157
  device = self._execution_device
1155
1158
  # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
1156
- # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
1159
+ # of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
1157
1160
  # corresponds to doing no classifier free guidance.
1158
1161
  do_classifier_free_guidance = guidance_scale > 1.0
1159
1162
 
@@ -1,4 +1,4 @@
1
- # Copyright 2024 Kakao Brain and The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 Kakao Brain and The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -24,7 +24,7 @@ from ...models import PriorTransformer, UNet2DConditionModel, UNet2DModel
24
24
  from ...schedulers import UnCLIPScheduler
25
25
  from ...utils import is_torch_xla_available, logging
26
26
  from ...utils.torch_utils import randn_tensor
27
- from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
27
+ from ..pipeline_utils import DeprecatedPipelineMixin, DiffusionPipeline, ImagePipelineOutput
28
28
  from .text_proj import UnCLIPTextProjModel
29
29
 
30
30
 
@@ -38,7 +38,7 @@ else:
38
38
  logger = logging.get_logger(__name__) # pylint: disable=invalid-name
39
39
 
40
40
 
41
- class UnCLIPPipeline(DiffusionPipeline):
41
+ class UnCLIPPipeline(DeprecatedPipelineMixin, DiffusionPipeline):
42
42
  """
43
43
  Pipeline for text-to-image generation using unCLIP.
44
44
 
@@ -69,6 +69,7 @@ class UnCLIPPipeline(DiffusionPipeline):
69
69
 
70
70
  """
71
71
 
72
+ _last_supported_version = "0.33.1"
72
73
  _exclude_from_cpu_offload = ["prior"]
73
74
 
74
75
  prior: PriorTransformer
@@ -1,4 +1,4 @@
1
- # Copyright 2024 Kakao Brain and The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 Kakao Brain and The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -29,7 +29,7 @@ from ...models import UNet2DConditionModel, UNet2DModel
29
29
  from ...schedulers import UnCLIPScheduler
30
30
  from ...utils import is_torch_xla_available, logging
31
31
  from ...utils.torch_utils import randn_tensor
32
- from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
32
+ from ..pipeline_utils import DeprecatedPipelineMixin, DiffusionPipeline, ImagePipelineOutput
33
33
  from .text_proj import UnCLIPTextProjModel
34
34
 
35
35
 
@@ -43,7 +43,7 @@ else:
43
43
  logger = logging.get_logger(__name__) # pylint: disable=invalid-name
44
44
 
45
45
 
46
- class UnCLIPImageVariationPipeline(DiffusionPipeline):
46
+ class UnCLIPImageVariationPipeline(DeprecatedPipelineMixin, DiffusionPipeline):
47
47
  """
48
48
  Pipeline to generate image variations from an input image using UnCLIP.
49
49
 
@@ -73,6 +73,7 @@ class UnCLIPImageVariationPipeline(DiffusionPipeline):
73
73
  Scheduler used in the super resolution denoising process (a modified [`DDPMScheduler`]).
74
74
  """
75
75
 
76
+ _last_supported_version = "0.33.1"
76
77
  decoder: UNet2DConditionModel
77
78
  text_proj: UnCLIPTextProjModel
78
79
  text_encoder: CLIPTextModelWithProjection
@@ -1,4 +1,4 @@
1
- # Copyright 2024 Kakao Brain and The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 Kakao Brain and The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -24,7 +24,7 @@ class UnCLIPTextProjModel(ModelMixin, ConfigMixin):
24
24
  Utility class for CLIP embeddings. Used to combine the image and text embeddings into a format usable by the
25
25
  decoder.
26
26
 
27
- For more details, see the original paper: https://arxiv.org/abs/2204.06125 section 2.1
27
+ For more details, see the original paper: https://huggingface.co/papers/2204.06125 section 2.1
28
28
  """
29
29
 
30
30
  @register_to_config
@@ -13,7 +13,7 @@ from ...models import ModelMixin
13
13
  # Modified from ClipCaptionModel in https://github.com/thu-ml/unidiffuser/blob/main/libs/caption_decoder.py
14
14
  class UniDiffuserTextDecoder(ModelMixin, ConfigMixin, ModuleUtilsMixin):
15
15
  """
16
- Text decoder model for a image-text [UniDiffuser](https://arxiv.org/pdf/2303.06555.pdf) model. This is used to
16
+ Text decoder model for a image-text [UniDiffuser](https://huggingface.co/papers/2303.06555) model. This is used to
17
17
  generate text from the UniDiffuser image-text embedding.
18
18
 
19
19
  Parameters:
@@ -140,7 +140,7 @@ class UniDiffuserTextDecoder(ModelMixin, ConfigMixin, ModuleUtilsMixin):
140
140
  input_ids (`torch.Tensor` of shape `(N, max_seq_len)`):
141
141
  Text tokens to use for inference.
142
142
  prefix_embeds (`torch.Tensor` of shape `(N, prefix_length, 768)`):
143
- Prefix embedding to preprend to the embedded tokens.
143
+ Prefix embedding to prepend to the embedded tokens.
144
144
  attention_mask (`torch.Tensor` of shape `(N, prefix_length + max_seq_len, 768)`, *optional*):
145
145
  Attention mask for the prefix embedding.
146
146
  labels (`torch.Tensor`, *optional*):
@@ -832,7 +832,7 @@ class UTransformer2DModel(ModelMixin, ConfigMixin):
832
832
 
833
833
  class UniDiffuserModel(ModelMixin, ConfigMixin):
834
834
  """
835
- Transformer model for a image-text [UniDiffuser](https://arxiv.org/pdf/2303.06555.pdf) model. This is a
835
+ Transformer model for a image-text [UniDiffuser](https://huggingface.co/papers/2303.06555) model. This is a
836
836
  modification of [`UTransformer2DModel`] with input and output heads for the VAE-embedded latent image, the
837
837
  CLIP-embedded image, and the CLIP-embedded prompt (see paper for more details).
838
838
 
@@ -28,7 +28,7 @@ from ...utils import (
28
28
  )
29
29
  from ...utils.outputs import BaseOutput
30
30
  from ...utils.torch_utils import randn_tensor
31
- from ..pipeline_utils import DiffusionPipeline
31
+ from ..pipeline_utils import DeprecatedPipelineMixin, DiffusionPipeline
32
32
  from .modeling_text_decoder import UniDiffuserTextDecoder
33
33
  from .modeling_uvit import UniDiffuserModel
34
34
 
@@ -62,7 +62,7 @@ class ImageTextPipelineOutput(BaseOutput):
62
62
  text: Optional[Union[List[str], List[List[str]]]]
63
63
 
64
64
 
65
- class UniDiffuserPipeline(DiffusionPipeline):
65
+ class UniDiffuserPipeline(DeprecatedPipelineMixin, DiffusionPipeline):
66
66
  r"""
67
67
  Pipeline for a bimodal image-text model which supports unconditional text and image generation, text-conditioned
68
68
  image generation, image-conditioned text generation, and joint image-text generation.
@@ -96,6 +96,7 @@ class UniDiffuserPipeline(DiffusionPipeline):
96
96
  original UniDiffuser paper uses the [`DPMSolverMultistepScheduler`] scheduler.
97
97
  """
98
98
 
99
+ _last_supported_version = "0.33.1"
99
100
  # TODO: support for moving submodules for components with enable_model_cpu_offload
100
101
  model_cpu_offload_seq = "text_encoder->image_encoder->unet->vae->text_decoder"
101
102
 
@@ -153,7 +154,7 @@ class UniDiffuserPipeline(DiffusionPipeline):
153
154
  def prepare_extra_step_kwargs(self, generator, eta):
154
155
  # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
155
156
  # eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
156
- # eta corresponds to η in DDIM paper: https://arxiv.org/abs/2010.02502
157
+ # eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
157
158
  # and should be between [0, 1]
158
159
 
159
160
  accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
@@ -803,7 +804,7 @@ class UniDiffuserPipeline(DiffusionPipeline):
803
804
 
804
805
  def _combine(self, img_vae, img_clip):
805
806
  r"""
806
- Combines a latent iamge img_vae of shape (B, C, H, W) and a CLIP-embedded image img_clip of shape (B, 1,
807
+ Combines a latent image img_vae of shape (B, C, H, W) and a CLIP-embedded image img_clip of shape (B, 1,
807
808
  clip_img_dim) into a single tensor of shape (B, C * H * W + clip_img_dim).
808
809
  """
809
810
  img_vae = torch.reshape(img_vae, (img_vae.shape[0], -1))
@@ -1154,8 +1155,8 @@ class UniDiffuserPipeline(DiffusionPipeline):
1154
1155
  `text` mode. If the mode is joint and both `num_images_per_prompt` and `num_prompts_per_image` are
1155
1156
  supplied, `min(num_images_per_prompt, num_prompts_per_image)` samples are generated.
1156
1157
  eta (`float`, *optional*, defaults to 0.0):
1157
- Corresponds to parameter eta (η) from the [DDIM](https://arxiv.org/abs/2010.02502) paper. Only applies
1158
- to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
1158
+ Corresponds to parameter eta (η) from the [DDIM](https://huggingface.co/papers/2010.02502) paper. Only
1159
+ applies to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
1159
1160
  generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
1160
1161
  A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
1161
1162
  generation deterministic.
@@ -1243,7 +1244,7 @@ class UniDiffuserPipeline(DiffusionPipeline):
1243
1244
  reduce_text_emb_dim = self.text_intermediate_dim < self.text_encoder_hidden_size or self.mode != "text2img"
1244
1245
 
1245
1246
  # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
1246
- # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
1247
+ # of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
1247
1248
  # corresponds to doing no classifier free guidance.
1248
1249
  # Note that this differs from the formulation in the unidiffusers paper!
1249
1250
  do_classifier_free_guidance = guidance_scale > 1.0
@@ -0,0 +1,52 @@
1
+ from typing import TYPE_CHECKING
2
+
3
+ from ...utils import (
4
+ DIFFUSERS_SLOW_IMPORT,
5
+ OptionalDependencyNotAvailable,
6
+ _LazyModule,
7
+ get_objects_from_module,
8
+ is_torch_available,
9
+ is_transformers_available,
10
+ )
11
+
12
+
13
+ _dummy_objects = {}
14
+ _import_structure = {}
15
+
16
+
17
+ try:
18
+ if not (is_transformers_available() and is_torch_available()):
19
+ raise OptionalDependencyNotAvailable()
20
+ except OptionalDependencyNotAvailable:
21
+ from ...utils import dummy_torch_and_transformers_objects # noqa F403
22
+
23
+ _dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
24
+ else:
25
+ _import_structure["pipeline_visualcloze_combined"] = ["VisualClozePipeline"]
26
+ _import_structure["pipeline_visualcloze_generation"] = ["VisualClozeGenerationPipeline"]
27
+
28
+
29
+ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
30
+ try:
31
+ if not (is_transformers_available() and is_torch_available()):
32
+ raise OptionalDependencyNotAvailable()
33
+
34
+ except OptionalDependencyNotAvailable:
35
+ from ...utils.dummy_torch_and_transformers_objects import *
36
+ else:
37
+ from .pipeline_visualcloze_combined import VisualClozePipeline
38
+ from .pipeline_visualcloze_generation import VisualClozeGenerationPipeline
39
+
40
+
41
+ else:
42
+ import sys
43
+
44
+ sys.modules[__name__] = _LazyModule(
45
+ __name__,
46
+ globals()["__file__"],
47
+ _import_structure,
48
+ module_spec=__spec__,
49
+ )
50
+
51
+ for name, value in _dummy_objects.items():
52
+ setattr(sys.modules[__name__], name, value)