diffusers 0.33.0__py3-none-any.whl → 0.34.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (478) hide show
  1. diffusers/__init__.py +48 -1
  2. diffusers/commands/__init__.py +1 -1
  3. diffusers/commands/diffusers_cli.py +1 -1
  4. diffusers/commands/env.py +1 -1
  5. diffusers/commands/fp16_safetensors.py +1 -1
  6. diffusers/dependency_versions_check.py +1 -1
  7. diffusers/dependency_versions_table.py +1 -1
  8. diffusers/experimental/rl/value_guided_sampling.py +1 -1
  9. diffusers/hooks/faster_cache.py +2 -2
  10. diffusers/hooks/group_offloading.py +128 -29
  11. diffusers/hooks/hooks.py +2 -2
  12. diffusers/hooks/layerwise_casting.py +3 -3
  13. diffusers/hooks/pyramid_attention_broadcast.py +1 -1
  14. diffusers/image_processor.py +7 -2
  15. diffusers/loaders/__init__.py +4 -0
  16. diffusers/loaders/ip_adapter.py +5 -14
  17. diffusers/loaders/lora_base.py +212 -111
  18. diffusers/loaders/lora_conversion_utils.py +275 -34
  19. diffusers/loaders/lora_pipeline.py +1554 -819
  20. diffusers/loaders/peft.py +52 -109
  21. diffusers/loaders/single_file.py +2 -2
  22. diffusers/loaders/single_file_model.py +20 -4
  23. diffusers/loaders/single_file_utils.py +225 -5
  24. diffusers/loaders/textual_inversion.py +3 -2
  25. diffusers/loaders/transformer_flux.py +1 -1
  26. diffusers/loaders/transformer_sd3.py +2 -2
  27. diffusers/loaders/unet.py +2 -16
  28. diffusers/loaders/unet_loader_utils.py +1 -1
  29. diffusers/loaders/utils.py +1 -1
  30. diffusers/models/__init__.py +15 -1
  31. diffusers/models/activations.py +5 -5
  32. diffusers/models/adapter.py +2 -3
  33. diffusers/models/attention.py +4 -4
  34. diffusers/models/attention_flax.py +10 -10
  35. diffusers/models/attention_processor.py +14 -10
  36. diffusers/models/auto_model.py +47 -10
  37. diffusers/models/autoencoders/__init__.py +1 -0
  38. diffusers/models/autoencoders/autoencoder_asym_kl.py +4 -4
  39. diffusers/models/autoencoders/autoencoder_dc.py +3 -3
  40. diffusers/models/autoencoders/autoencoder_kl.py +4 -4
  41. diffusers/models/autoencoders/autoencoder_kl_allegro.py +4 -4
  42. diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +6 -6
  43. diffusers/models/autoencoders/autoencoder_kl_cosmos.py +1108 -0
  44. diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py +2 -2
  45. diffusers/models/autoencoders/autoencoder_kl_ltx.py +3 -3
  46. diffusers/models/autoencoders/autoencoder_kl_magvit.py +4 -4
  47. diffusers/models/autoencoders/autoencoder_kl_mochi.py +3 -3
  48. diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +4 -4
  49. diffusers/models/autoencoders/autoencoder_kl_wan.py +256 -22
  50. diffusers/models/autoencoders/autoencoder_oobleck.py +1 -1
  51. diffusers/models/autoencoders/autoencoder_tiny.py +3 -3
  52. diffusers/models/autoencoders/consistency_decoder_vae.py +1 -1
  53. diffusers/models/autoencoders/vae.py +13 -2
  54. diffusers/models/autoencoders/vq_model.py +2 -2
  55. diffusers/models/cache_utils.py +1 -1
  56. diffusers/models/controlnet.py +1 -1
  57. diffusers/models/controlnet_flux.py +1 -1
  58. diffusers/models/controlnet_sd3.py +1 -1
  59. diffusers/models/controlnet_sparsectrl.py +1 -1
  60. diffusers/models/controlnets/__init__.py +1 -0
  61. diffusers/models/controlnets/controlnet.py +3 -3
  62. diffusers/models/controlnets/controlnet_flax.py +1 -1
  63. diffusers/models/controlnets/controlnet_flux.py +16 -15
  64. diffusers/models/controlnets/controlnet_hunyuan.py +2 -2
  65. diffusers/models/controlnets/controlnet_sana.py +290 -0
  66. diffusers/models/controlnets/controlnet_sd3.py +1 -1
  67. diffusers/models/controlnets/controlnet_sparsectrl.py +2 -2
  68. diffusers/models/controlnets/controlnet_union.py +1 -1
  69. diffusers/models/controlnets/controlnet_xs.py +7 -7
  70. diffusers/models/controlnets/multicontrolnet.py +4 -5
  71. diffusers/models/controlnets/multicontrolnet_union.py +5 -6
  72. diffusers/models/downsampling.py +2 -2
  73. diffusers/models/embeddings.py +10 -12
  74. diffusers/models/embeddings_flax.py +2 -2
  75. diffusers/models/lora.py +3 -3
  76. diffusers/models/modeling_utils.py +44 -14
  77. diffusers/models/normalization.py +4 -4
  78. diffusers/models/resnet.py +2 -2
  79. diffusers/models/resnet_flax.py +1 -1
  80. diffusers/models/transformers/__init__.py +5 -0
  81. diffusers/models/transformers/auraflow_transformer_2d.py +70 -24
  82. diffusers/models/transformers/cogvideox_transformer_3d.py +1 -1
  83. diffusers/models/transformers/consisid_transformer_3d.py +1 -1
  84. diffusers/models/transformers/dit_transformer_2d.py +2 -2
  85. diffusers/models/transformers/dual_transformer_2d.py +1 -1
  86. diffusers/models/transformers/hunyuan_transformer_2d.py +2 -2
  87. diffusers/models/transformers/latte_transformer_3d.py +4 -5
  88. diffusers/models/transformers/lumina_nextdit2d.py +2 -2
  89. diffusers/models/transformers/pixart_transformer_2d.py +3 -3
  90. diffusers/models/transformers/prior_transformer.py +1 -1
  91. diffusers/models/transformers/sana_transformer.py +8 -3
  92. diffusers/models/transformers/stable_audio_transformer.py +5 -9
  93. diffusers/models/transformers/t5_film_transformer.py +3 -3
  94. diffusers/models/transformers/transformer_2d.py +1 -1
  95. diffusers/models/transformers/transformer_allegro.py +1 -1
  96. diffusers/models/transformers/transformer_chroma.py +742 -0
  97. diffusers/models/transformers/transformer_cogview3plus.py +5 -10
  98. diffusers/models/transformers/transformer_cogview4.py +317 -25
  99. diffusers/models/transformers/transformer_cosmos.py +579 -0
  100. diffusers/models/transformers/transformer_flux.py +9 -11
  101. diffusers/models/transformers/transformer_hidream_image.py +942 -0
  102. diffusers/models/transformers/transformer_hunyuan_video.py +6 -8
  103. diffusers/models/transformers/transformer_hunyuan_video_framepack.py +416 -0
  104. diffusers/models/transformers/transformer_ltx.py +2 -2
  105. diffusers/models/transformers/transformer_lumina2.py +1 -1
  106. diffusers/models/transformers/transformer_mochi.py +1 -1
  107. diffusers/models/transformers/transformer_omnigen.py +2 -2
  108. diffusers/models/transformers/transformer_sd3.py +7 -7
  109. diffusers/models/transformers/transformer_temporal.py +1 -1
  110. diffusers/models/transformers/transformer_wan.py +24 -8
  111. diffusers/models/transformers/transformer_wan_vace.py +393 -0
  112. diffusers/models/unets/unet_1d.py +1 -1
  113. diffusers/models/unets/unet_1d_blocks.py +1 -1
  114. diffusers/models/unets/unet_2d.py +1 -1
  115. diffusers/models/unets/unet_2d_blocks.py +1 -1
  116. diffusers/models/unets/unet_2d_blocks_flax.py +8 -7
  117. diffusers/models/unets/unet_2d_condition.py +2 -2
  118. diffusers/models/unets/unet_2d_condition_flax.py +2 -2
  119. diffusers/models/unets/unet_3d_blocks.py +1 -1
  120. diffusers/models/unets/unet_3d_condition.py +3 -3
  121. diffusers/models/unets/unet_i2vgen_xl.py +3 -3
  122. diffusers/models/unets/unet_kandinsky3.py +1 -1
  123. diffusers/models/unets/unet_motion_model.py +2 -2
  124. diffusers/models/unets/unet_stable_cascade.py +1 -1
  125. diffusers/models/upsampling.py +2 -2
  126. diffusers/models/vae_flax.py +2 -2
  127. diffusers/models/vq_model.py +1 -1
  128. diffusers/pipelines/__init__.py +37 -6
  129. diffusers/pipelines/allegro/pipeline_allegro.py +11 -11
  130. diffusers/pipelines/amused/pipeline_amused.py +7 -6
  131. diffusers/pipelines/amused/pipeline_amused_img2img.py +6 -5
  132. diffusers/pipelines/amused/pipeline_amused_inpaint.py +6 -5
  133. diffusers/pipelines/animatediff/pipeline_animatediff.py +6 -6
  134. diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +6 -6
  135. diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +16 -15
  136. diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +6 -6
  137. diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +5 -5
  138. diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +5 -5
  139. diffusers/pipelines/audioldm/pipeline_audioldm.py +8 -7
  140. diffusers/pipelines/audioldm2/modeling_audioldm2.py +1 -1
  141. diffusers/pipelines/audioldm2/pipeline_audioldm2.py +23 -13
  142. diffusers/pipelines/aura_flow/pipeline_aura_flow.py +48 -11
  143. diffusers/pipelines/auto_pipeline.py +6 -7
  144. diffusers/pipelines/blip_diffusion/modeling_blip2.py +1 -1
  145. diffusers/pipelines/blip_diffusion/modeling_ctx_clip.py +2 -2
  146. diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +11 -10
  147. diffusers/pipelines/chroma/__init__.py +49 -0
  148. diffusers/pipelines/chroma/pipeline_chroma.py +949 -0
  149. diffusers/pipelines/chroma/pipeline_chroma_img2img.py +1034 -0
  150. diffusers/pipelines/chroma/pipeline_output.py +21 -0
  151. diffusers/pipelines/cogvideo/pipeline_cogvideox.py +8 -8
  152. diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +8 -8
  153. diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +8 -8
  154. diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +8 -8
  155. diffusers/pipelines/cogview3/pipeline_cogview3plus.py +9 -9
  156. diffusers/pipelines/cogview4/pipeline_cogview4.py +7 -7
  157. diffusers/pipelines/cogview4/pipeline_cogview4_control.py +7 -7
  158. diffusers/pipelines/consisid/consisid_utils.py +2 -2
  159. diffusers/pipelines/consisid/pipeline_consisid.py +8 -8
  160. diffusers/pipelines/consistency_models/pipeline_consistency_models.py +1 -1
  161. diffusers/pipelines/controlnet/pipeline_controlnet.py +7 -7
  162. diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +8 -8
  163. diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +7 -7
  164. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +7 -7
  165. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +14 -14
  166. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +10 -6
  167. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +13 -13
  168. diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +14 -14
  169. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +5 -5
  170. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +13 -13
  171. diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +1 -1
  172. diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +8 -8
  173. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +7 -7
  174. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +7 -7
  175. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +12 -10
  176. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +9 -7
  177. diffusers/pipelines/cosmos/__init__.py +54 -0
  178. diffusers/pipelines/cosmos/pipeline_cosmos2_text2image.py +673 -0
  179. diffusers/pipelines/cosmos/pipeline_cosmos2_video2world.py +792 -0
  180. diffusers/pipelines/cosmos/pipeline_cosmos_text2world.py +664 -0
  181. diffusers/pipelines/cosmos/pipeline_cosmos_video2world.py +826 -0
  182. diffusers/pipelines/cosmos/pipeline_output.py +40 -0
  183. diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py +5 -4
  184. diffusers/pipelines/ddim/pipeline_ddim.py +4 -4
  185. diffusers/pipelines/ddpm/pipeline_ddpm.py +1 -1
  186. diffusers/pipelines/deepfloyd_if/pipeline_if.py +10 -10
  187. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +10 -10
  188. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +10 -10
  189. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +10 -10
  190. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +10 -10
  191. diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +10 -10
  192. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +8 -8
  193. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +5 -5
  194. diffusers/pipelines/deprecated/audio_diffusion/mel.py +1 -1
  195. diffusers/pipelines/deprecated/audio_diffusion/pipeline_audio_diffusion.py +3 -3
  196. diffusers/pipelines/deprecated/latent_diffusion_uncond/pipeline_latent_diffusion_uncond.py +1 -1
  197. diffusers/pipelines/deprecated/pndm/pipeline_pndm.py +2 -2
  198. diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +4 -3
  199. diffusers/pipelines/deprecated/score_sde_ve/pipeline_score_sde_ve.py +1 -1
  200. diffusers/pipelines/deprecated/spectrogram_diffusion/continuous_encoder.py +1 -1
  201. diffusers/pipelines/deprecated/spectrogram_diffusion/midi_utils.py +1 -1
  202. diffusers/pipelines/deprecated/spectrogram_diffusion/notes_encoder.py +1 -1
  203. diffusers/pipelines/deprecated/spectrogram_diffusion/pipeline_spectrogram_diffusion.py +1 -1
  204. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +7 -7
  205. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_onnx_stable_diffusion_inpaint_legacy.py +9 -9
  206. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +10 -10
  207. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +10 -8
  208. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +5 -5
  209. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +18 -18
  210. diffusers/pipelines/deprecated/stochastic_karras_ve/pipeline_stochastic_karras_ve.py +1 -1
  211. diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +2 -2
  212. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +6 -6
  213. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +5 -5
  214. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +5 -5
  215. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +5 -5
  216. diffusers/pipelines/deprecated/vq_diffusion/pipeline_vq_diffusion.py +1 -1
  217. diffusers/pipelines/dit/pipeline_dit.py +1 -1
  218. diffusers/pipelines/easyanimate/pipeline_easyanimate.py +4 -4
  219. diffusers/pipelines/easyanimate/pipeline_easyanimate_control.py +4 -4
  220. diffusers/pipelines/easyanimate/pipeline_easyanimate_inpaint.py +7 -6
  221. diffusers/pipelines/flux/modeling_flux.py +1 -1
  222. diffusers/pipelines/flux/pipeline_flux.py +10 -17
  223. diffusers/pipelines/flux/pipeline_flux_control.py +6 -6
  224. diffusers/pipelines/flux/pipeline_flux_control_img2img.py +6 -6
  225. diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +6 -6
  226. diffusers/pipelines/flux/pipeline_flux_controlnet.py +6 -6
  227. diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +30 -22
  228. diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +2 -1
  229. diffusers/pipelines/flux/pipeline_flux_fill.py +6 -6
  230. diffusers/pipelines/flux/pipeline_flux_img2img.py +39 -6
  231. diffusers/pipelines/flux/pipeline_flux_inpaint.py +11 -6
  232. diffusers/pipelines/flux/pipeline_flux_prior_redux.py +1 -1
  233. diffusers/pipelines/free_init_utils.py +2 -2
  234. diffusers/pipelines/free_noise_utils.py +3 -3
  235. diffusers/pipelines/hidream_image/__init__.py +47 -0
  236. diffusers/pipelines/hidream_image/pipeline_hidream_image.py +1026 -0
  237. diffusers/pipelines/hidream_image/pipeline_output.py +35 -0
  238. diffusers/pipelines/hunyuan_video/__init__.py +2 -0
  239. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_skyreels_image2video.py +8 -8
  240. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +8 -8
  241. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_framepack.py +1114 -0
  242. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_image2video.py +71 -15
  243. diffusers/pipelines/hunyuan_video/pipeline_output.py +19 -0
  244. diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +8 -8
  245. diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +10 -8
  246. diffusers/pipelines/kandinsky/pipeline_kandinsky.py +6 -6
  247. diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +34 -34
  248. diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +19 -26
  249. diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +7 -7
  250. diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +11 -11
  251. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +6 -6
  252. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +35 -35
  253. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +6 -6
  254. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +17 -39
  255. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +17 -45
  256. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +7 -7
  257. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +10 -10
  258. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +10 -10
  259. diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +7 -7
  260. diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +17 -38
  261. diffusers/pipelines/kolors/pipeline_kolors.py +10 -10
  262. diffusers/pipelines/kolors/pipeline_kolors_img2img.py +12 -12
  263. diffusers/pipelines/kolors/text_encoder.py +3 -3
  264. diffusers/pipelines/kolors/tokenizer.py +1 -1
  265. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +2 -2
  266. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +2 -2
  267. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +1 -1
  268. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion_superresolution.py +3 -3
  269. diffusers/pipelines/latte/pipeline_latte.py +12 -12
  270. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +13 -13
  271. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +17 -16
  272. diffusers/pipelines/ltx/__init__.py +4 -0
  273. diffusers/pipelines/ltx/modeling_latent_upsampler.py +188 -0
  274. diffusers/pipelines/ltx/pipeline_ltx.py +51 -6
  275. diffusers/pipelines/ltx/pipeline_ltx_condition.py +107 -29
  276. diffusers/pipelines/ltx/pipeline_ltx_image2video.py +50 -6
  277. diffusers/pipelines/ltx/pipeline_ltx_latent_upsample.py +277 -0
  278. diffusers/pipelines/lumina/pipeline_lumina.py +13 -13
  279. diffusers/pipelines/lumina2/pipeline_lumina2.py +10 -10
  280. diffusers/pipelines/marigold/marigold_image_processing.py +2 -2
  281. diffusers/pipelines/mochi/pipeline_mochi.py +6 -6
  282. diffusers/pipelines/musicldm/pipeline_musicldm.py +16 -13
  283. diffusers/pipelines/omnigen/pipeline_omnigen.py +13 -11
  284. diffusers/pipelines/omnigen/processor_omnigen.py +8 -3
  285. diffusers/pipelines/onnx_utils.py +15 -2
  286. diffusers/pipelines/pag/pag_utils.py +2 -2
  287. diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +12 -8
  288. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +7 -7
  289. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +10 -6
  290. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +14 -14
  291. diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +8 -8
  292. diffusers/pipelines/pag/pipeline_pag_kolors.py +10 -10
  293. diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +11 -11
  294. diffusers/pipelines/pag/pipeline_pag_sana.py +18 -12
  295. diffusers/pipelines/pag/pipeline_pag_sd.py +8 -8
  296. diffusers/pipelines/pag/pipeline_pag_sd_3.py +7 -7
  297. diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py +7 -7
  298. diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +6 -6
  299. diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +5 -5
  300. diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py +8 -8
  301. diffusers/pipelines/pag/pipeline_pag_sd_xl.py +16 -15
  302. diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +18 -17
  303. diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +12 -12
  304. diffusers/pipelines/paint_by_example/image_encoder.py +1 -1
  305. diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +8 -7
  306. diffusers/pipelines/pia/pipeline_pia.py +8 -6
  307. diffusers/pipelines/pipeline_flax_utils.py +3 -4
  308. diffusers/pipelines/pipeline_loading_utils.py +89 -13
  309. diffusers/pipelines/pipeline_utils.py +105 -33
  310. diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +11 -11
  311. diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +11 -11
  312. diffusers/pipelines/sana/__init__.py +4 -0
  313. diffusers/pipelines/sana/pipeline_sana.py +23 -21
  314. diffusers/pipelines/sana/pipeline_sana_controlnet.py +1106 -0
  315. diffusers/pipelines/sana/pipeline_sana_sprint.py +23 -19
  316. diffusers/pipelines/sana/pipeline_sana_sprint_img2img.py +981 -0
  317. diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +7 -6
  318. diffusers/pipelines/shap_e/camera.py +1 -1
  319. diffusers/pipelines/shap_e/pipeline_shap_e.py +1 -1
  320. diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +1 -1
  321. diffusers/pipelines/shap_e/renderer.py +3 -3
  322. diffusers/pipelines/stable_audio/modeling_stable_audio.py +1 -1
  323. diffusers/pipelines/stable_audio/pipeline_stable_audio.py +5 -5
  324. diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +8 -8
  325. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +13 -13
  326. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +9 -9
  327. diffusers/pipelines/stable_diffusion/__init__.py +0 -7
  328. diffusers/pipelines/stable_diffusion/clip_image_project_model.py +1 -1
  329. diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +11 -4
  330. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +1 -1
  331. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_img2img.py +1 -1
  332. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_inpaint.py +1 -1
  333. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +10 -10
  334. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py +10 -10
  335. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +10 -10
  336. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +9 -9
  337. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +8 -8
  338. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +5 -5
  339. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +5 -5
  340. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +5 -5
  341. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +5 -5
  342. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +5 -5
  343. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +4 -4
  344. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +5 -5
  345. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +7 -7
  346. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +5 -5
  347. diffusers/pipelines/stable_diffusion/safety_checker.py +1 -1
  348. diffusers/pipelines/stable_diffusion/safety_checker_flax.py +1 -1
  349. diffusers/pipelines/stable_diffusion/stable_unclip_image_normalizer.py +1 -1
  350. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +7 -7
  351. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +7 -7
  352. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +7 -7
  353. diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +12 -8
  354. diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +15 -9
  355. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +11 -9
  356. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +11 -9
  357. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +18 -12
  358. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +11 -8
  359. diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +11 -8
  360. diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +15 -12
  361. diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +8 -6
  362. diffusers/pipelines/stable_diffusion_safe/safety_checker.py +1 -1
  363. diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +15 -11
  364. diffusers/pipelines/stable_diffusion_xl/pipeline_flax_stable_diffusion_xl.py +1 -1
  365. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +16 -15
  366. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +18 -17
  367. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +12 -12
  368. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +16 -15
  369. diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +3 -3
  370. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +12 -12
  371. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +18 -17
  372. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +12 -7
  373. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +12 -7
  374. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +15 -13
  375. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +24 -21
  376. diffusers/pipelines/unclip/pipeline_unclip.py +4 -3
  377. diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +4 -3
  378. diffusers/pipelines/unclip/text_proj.py +2 -2
  379. diffusers/pipelines/unidiffuser/modeling_text_decoder.py +2 -2
  380. diffusers/pipelines/unidiffuser/modeling_uvit.py +1 -1
  381. diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +8 -7
  382. diffusers/pipelines/visualcloze/__init__.py +52 -0
  383. diffusers/pipelines/visualcloze/pipeline_visualcloze_combined.py +444 -0
  384. diffusers/pipelines/visualcloze/pipeline_visualcloze_generation.py +952 -0
  385. diffusers/pipelines/visualcloze/visualcloze_utils.py +251 -0
  386. diffusers/pipelines/wan/__init__.py +2 -0
  387. diffusers/pipelines/wan/pipeline_wan.py +17 -12
  388. diffusers/pipelines/wan/pipeline_wan_i2v.py +42 -20
  389. diffusers/pipelines/wan/pipeline_wan_vace.py +976 -0
  390. diffusers/pipelines/wan/pipeline_wan_video2video.py +18 -18
  391. diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py +1 -1
  392. diffusers/pipelines/wuerstchen/modeling_wuerstchen_diffnext.py +1 -1
  393. diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +1 -1
  394. diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +8 -8
  395. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_combined.py +16 -15
  396. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +6 -6
  397. diffusers/quantizers/__init__.py +179 -1
  398. diffusers/quantizers/base.py +6 -1
  399. diffusers/quantizers/bitsandbytes/bnb_quantizer.py +4 -0
  400. diffusers/quantizers/bitsandbytes/utils.py +10 -7
  401. diffusers/quantizers/gguf/gguf_quantizer.py +13 -4
  402. diffusers/quantizers/gguf/utils.py +16 -13
  403. diffusers/quantizers/quantization_config.py +18 -16
  404. diffusers/quantizers/quanto/quanto_quantizer.py +4 -0
  405. diffusers/quantizers/torchao/torchao_quantizer.py +5 -1
  406. diffusers/schedulers/__init__.py +3 -1
  407. diffusers/schedulers/deprecated/scheduling_karras_ve.py +4 -3
  408. diffusers/schedulers/deprecated/scheduling_sde_vp.py +1 -1
  409. diffusers/schedulers/scheduling_consistency_models.py +1 -1
  410. diffusers/schedulers/scheduling_cosine_dpmsolver_multistep.py +10 -5
  411. diffusers/schedulers/scheduling_ddim.py +8 -8
  412. diffusers/schedulers/scheduling_ddim_cogvideox.py +5 -5
  413. diffusers/schedulers/scheduling_ddim_flax.py +6 -6
  414. diffusers/schedulers/scheduling_ddim_inverse.py +6 -6
  415. diffusers/schedulers/scheduling_ddim_parallel.py +22 -22
  416. diffusers/schedulers/scheduling_ddpm.py +9 -9
  417. diffusers/schedulers/scheduling_ddpm_flax.py +7 -7
  418. diffusers/schedulers/scheduling_ddpm_parallel.py +18 -18
  419. diffusers/schedulers/scheduling_ddpm_wuerstchen.py +2 -2
  420. diffusers/schedulers/scheduling_deis_multistep.py +8 -8
  421. diffusers/schedulers/scheduling_dpm_cogvideox.py +5 -5
  422. diffusers/schedulers/scheduling_dpmsolver_multistep.py +12 -12
  423. diffusers/schedulers/scheduling_dpmsolver_multistep_flax.py +22 -20
  424. diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +11 -11
  425. diffusers/schedulers/scheduling_dpmsolver_sde.py +2 -2
  426. diffusers/schedulers/scheduling_dpmsolver_singlestep.py +13 -13
  427. diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py +13 -8
  428. diffusers/schedulers/scheduling_edm_euler.py +20 -11
  429. diffusers/schedulers/scheduling_euler_ancestral_discrete.py +3 -3
  430. diffusers/schedulers/scheduling_euler_discrete.py +3 -3
  431. diffusers/schedulers/scheduling_euler_discrete_flax.py +3 -3
  432. diffusers/schedulers/scheduling_flow_match_euler_discrete.py +20 -5
  433. diffusers/schedulers/scheduling_flow_match_heun_discrete.py +1 -1
  434. diffusers/schedulers/scheduling_flow_match_lcm.py +561 -0
  435. diffusers/schedulers/scheduling_heun_discrete.py +2 -2
  436. diffusers/schedulers/scheduling_ipndm.py +2 -2
  437. diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +2 -2
  438. diffusers/schedulers/scheduling_k_dpm_2_discrete.py +2 -2
  439. diffusers/schedulers/scheduling_karras_ve_flax.py +5 -5
  440. diffusers/schedulers/scheduling_lcm.py +3 -3
  441. diffusers/schedulers/scheduling_lms_discrete.py +2 -2
  442. diffusers/schedulers/scheduling_lms_discrete_flax.py +1 -1
  443. diffusers/schedulers/scheduling_pndm.py +4 -4
  444. diffusers/schedulers/scheduling_pndm_flax.py +4 -4
  445. diffusers/schedulers/scheduling_repaint.py +9 -9
  446. diffusers/schedulers/scheduling_sasolver.py +15 -15
  447. diffusers/schedulers/scheduling_scm.py +1 -1
  448. diffusers/schedulers/scheduling_sde_ve.py +1 -1
  449. diffusers/schedulers/scheduling_sde_ve_flax.py +2 -2
  450. diffusers/schedulers/scheduling_tcd.py +3 -3
  451. diffusers/schedulers/scheduling_unclip.py +5 -5
  452. diffusers/schedulers/scheduling_unipc_multistep.py +11 -11
  453. diffusers/schedulers/scheduling_utils.py +1 -1
  454. diffusers/schedulers/scheduling_utils_flax.py +1 -1
  455. diffusers/schedulers/scheduling_vq_diffusion.py +1 -1
  456. diffusers/training_utils.py +13 -5
  457. diffusers/utils/__init__.py +5 -0
  458. diffusers/utils/accelerate_utils.py +1 -1
  459. diffusers/utils/doc_utils.py +1 -1
  460. diffusers/utils/dummy_pt_objects.py +120 -0
  461. diffusers/utils/dummy_torch_and_transformers_objects.py +225 -0
  462. diffusers/utils/dynamic_modules_utils.py +21 -3
  463. diffusers/utils/export_utils.py +1 -1
  464. diffusers/utils/import_utils.py +81 -18
  465. diffusers/utils/logging.py +1 -1
  466. diffusers/utils/outputs.py +2 -1
  467. diffusers/utils/peft_utils.py +91 -8
  468. diffusers/utils/state_dict_utils.py +20 -3
  469. diffusers/utils/testing_utils.py +59 -7
  470. diffusers/utils/torch_utils.py +25 -5
  471. diffusers/video_processor.py +2 -2
  472. {diffusers-0.33.0.dist-info → diffusers-0.34.0.dist-info}/METADATA +3 -3
  473. diffusers-0.34.0.dist-info/RECORD +639 -0
  474. diffusers-0.33.0.dist-info/RECORD +0 -608
  475. {diffusers-0.33.0.dist-info → diffusers-0.34.0.dist-info}/LICENSE +0 -0
  476. {diffusers-0.33.0.dist-info → diffusers-0.34.0.dist-info}/WHEEL +0 -0
  477. {diffusers-0.33.0.dist-info → diffusers-0.34.0.dist-info}/entry_points.txt +0 -0
  478. {diffusers-0.33.0.dist-info → diffusers-0.34.0.dist-info}/top_level.txt +0 -0
@@ -75,7 +75,7 @@ class QuantizationConfigMixin:
75
75
  Args:
76
76
  config_dict (`Dict[str, Any]`):
77
77
  Dictionary that will be used to instantiate the configuration object.
78
- return_unused_kwargs (`bool`,*optional*, defaults to `False`):
78
+ return_unused_kwargs (`bool`, *optional*, defaults to `False`):
79
79
  Whether or not to return a list of unused keyword arguments. Used for `from_pretrained` method in
80
80
  `PreTrainedModel`.
81
81
  kwargs (`Dict[str, Any]`):
@@ -179,7 +179,7 @@ class BitsAndBytesConfig(QuantizationConfigMixin):
179
179
  This is a wrapper class about all possible attributes and features that you can play with a model that has been
180
180
  loaded using `bitsandbytes`.
181
181
 
182
- This replaces `load_in_8bit` or `load_in_4bit`therefore both options are mutually exclusive.
182
+ This replaces `load_in_8bit` or `load_in_4bit` therefore both options are mutually exclusive.
183
183
 
184
184
  Currently only supports `LLM.int8()`, `FP4`, and `NF4` quantization. If more methods are added to `bitsandbytes`,
185
185
  then more arguments will be added to this class.
@@ -192,10 +192,10 @@ class BitsAndBytesConfig(QuantizationConfigMixin):
192
192
  `bitsandbytes`.
193
193
  llm_int8_threshold (`float`, *optional*, defaults to 6.0):
194
194
  This corresponds to the outlier threshold for outlier detection as described in `LLM.int8() : 8-bit Matrix
195
- Multiplication for Transformers at Scale` paper: https://arxiv.org/abs/2208.07339 Any hidden states value
196
- that is above this threshold will be considered an outlier and the operation on those values will be done
197
- in fp16. Values are usually normally distributed, that is, most values are in the range [-3.5, 3.5], but
198
- there are some exceptional systematic outliers that are very differently distributed for large models.
195
+ Multiplication for Transformers at Scale` paper: https://huggingface.co/papers/2208.07339 Any hidden states
196
+ value that is above this threshold will be considered an outlier and the operation on those values will be
197
+ done in fp16. Values are usually normally distributed, that is, most values are in the range [-3.5, 3.5],
198
+ but there are some exceptional systematic outliers that are very differently distributed for large models.
199
199
  These outliers are often in the interval [-60, -6] or [6, 60]. Int8 quantization works well for values of
200
200
  magnitude ~5, but beyond that, there is a significant performance penalty. A good default threshold is 6,
201
201
  but a lower threshold might be needed for more unstable models (small models, fine-tuning).
@@ -493,7 +493,7 @@ class TorchAoConfig(QuantizationConfigMixin):
493
493
  TORCHAO_QUANT_TYPE_METHODS = self._get_torchao_quant_type_to_method()
494
494
  if self.quant_type not in TORCHAO_QUANT_TYPE_METHODS.keys():
495
495
  is_floating_quant_type = self.quant_type.startswith("float") or self.quant_type.startswith("fp")
496
- if is_floating_quant_type and not self._is_cuda_capability_atleast_8_9():
496
+ if is_floating_quant_type and not self._is_xpu_or_cuda_capability_atleast_8_9():
497
497
  raise ValueError(
498
498
  f"Requested quantization type: {self.quant_type} is not supported on GPUs with CUDA capability <= 8.9. You "
499
499
  f"can check the CUDA capability of your GPU using `torch.cuda.get_device_capability()`."
@@ -645,7 +645,7 @@ class TorchAoConfig(QuantizationConfigMixin):
645
645
  QUANTIZATION_TYPES.update(INT8_QUANTIZATION_TYPES)
646
646
  QUANTIZATION_TYPES.update(UINTX_QUANTIZATION_DTYPES)
647
647
 
648
- if cls._is_cuda_capability_atleast_8_9():
648
+ if cls._is_xpu_or_cuda_capability_atleast_8_9():
649
649
  QUANTIZATION_TYPES.update(FLOATX_QUANTIZATION_TYPES)
650
650
 
651
651
  return QUANTIZATION_TYPES
@@ -655,14 +655,16 @@ class TorchAoConfig(QuantizationConfigMixin):
655
655
  )
656
656
 
657
657
  @staticmethod
658
- def _is_cuda_capability_atleast_8_9() -> bool:
659
- if not torch.cuda.is_available():
660
- raise RuntimeError("TorchAO requires a CUDA compatible GPU and installation of PyTorch.")
661
-
662
- major, minor = torch.cuda.get_device_capability()
663
- if major == 8:
664
- return minor >= 9
665
- return major >= 9
658
+ def _is_xpu_or_cuda_capability_atleast_8_9() -> bool:
659
+ if torch.cuda.is_available():
660
+ major, minor = torch.cuda.get_device_capability()
661
+ if major == 8:
662
+ return minor >= 9
663
+ return major >= 9
664
+ elif torch.xpu.is_available():
665
+ return True
666
+ else:
667
+ raise RuntimeError("TorchAO requires a CUDA compatible GPU or Intel XPU and installation of PyTorch.")
666
668
 
667
669
  def get_apply_tensor_subclass(self):
668
670
  TORCHAO_QUANT_TYPE_METHODS = self._get_torchao_quant_type_to_method()
@@ -175,3 +175,7 @@ class QuantoQuantizer(DiffusersQuantizer):
175
175
  @property
176
176
  def is_serializable(self):
177
177
  return True
178
+
179
+ @property
180
+ def is_compileable(self) -> bool:
181
+ return True
@@ -262,7 +262,7 @@ class TorchAoHfQuantizer(DiffusersQuantizer):
262
262
  **kwargs,
263
263
  ):
264
264
  r"""
265
- Each nn.Linear layer that needs to be quantized is processsed here. First, we set the value the weight tensor,
265
+ Each nn.Linear layer that needs to be quantized is processed here. First, we set the value the weight tensor,
266
266
  then we move it to the target device. Finally, we quantize the module.
267
267
  """
268
268
  module, tensor_name = get_module_from_name(model, param_name)
@@ -335,3 +335,7 @@ class TorchAoHfQuantizer(DiffusersQuantizer):
335
335
  @property
336
336
  def is_trainable(self):
337
337
  return self.quantization_config.quant_type.startswith("int8")
338
+
339
+ @property
340
+ def is_compileable(self) -> bool:
341
+ return True
@@ -1,4 +1,4 @@
1
- # Copyright 2024 The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -60,6 +60,7 @@ else:
60
60
  _import_structure["scheduling_euler_discrete"] = ["EulerDiscreteScheduler"]
61
61
  _import_structure["scheduling_flow_match_euler_discrete"] = ["FlowMatchEulerDiscreteScheduler"]
62
62
  _import_structure["scheduling_flow_match_heun_discrete"] = ["FlowMatchHeunDiscreteScheduler"]
63
+ _import_structure["scheduling_flow_match_lcm"] = ["FlowMatchLCMScheduler"]
63
64
  _import_structure["scheduling_heun_discrete"] = ["HeunDiscreteScheduler"]
64
65
  _import_structure["scheduling_ipndm"] = ["IPNDMScheduler"]
65
66
  _import_structure["scheduling_k_dpm_2_ancestral_discrete"] = ["KDPM2AncestralDiscreteScheduler"]
@@ -161,6 +162,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
161
162
  from .scheduling_euler_discrete import EulerDiscreteScheduler
162
163
  from .scheduling_flow_match_euler_discrete import FlowMatchEulerDiscreteScheduler
163
164
  from .scheduling_flow_match_heun_discrete import FlowMatchHeunDiscreteScheduler
165
+ from .scheduling_flow_match_lcm import FlowMatchLCMScheduler
164
166
  from .scheduling_heun_discrete import HeunDiscreteScheduler
165
167
  from .scheduling_ipndm import IPNDMScheduler
166
168
  from .scheduling_k_dpm_2_ancestral_discrete import KDPM2AncestralDiscreteScheduler
@@ -1,4 +1,4 @@
1
- # Copyright 2024 NVIDIA and The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 NVIDIA and The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -55,8 +55,9 @@ class KarrasVeScheduler(SchedulerMixin, ConfigMixin):
55
55
 
56
56
  <Tip>
57
57
 
58
- For more details on the parameters, see [Appendix E](https://arxiv.org/abs/2206.00364). The grid search values used
59
- to find the optimal `{s_noise, s_churn, s_min, s_max}` for a specific model are described in Table 5 of the paper.
58
+ For more details on the parameters, see [Appendix E](https://huggingface.co/papers/2206.00364). The grid search
59
+ values used to find the optimal `{s_noise, s_churn, s_min, s_max}` for a specific model are described in Table 5 of
60
+ the paper.
60
61
 
61
62
  </Tip>
62
63
 
@@ -1,4 +1,4 @@
1
- # Copyright 2024 Google Brain and The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 Google Brain and The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -1,4 +1,4 @@
1
- # Copyright 2024 The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -1,4 +1,4 @@
1
- # Copyright 2024 TSAIL Team and The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 TSAIL Team and The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -30,7 +30,7 @@ class CosineDPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
30
30
  Implements a variant of `DPMSolverMultistepScheduler` with cosine schedule, proposed by Nichol and Dhariwal (2021).
31
31
  This scheduler was used in Stable Audio Open [1].
32
32
 
33
- [1] Evans, Parker, et al. "Stable Audio Open" https://arxiv.org/abs/2407.14358
33
+ [1] Evans, Parker, et al. "Stable Audio Open" https://huggingface.co/papers/2407.14358
34
34
 
35
35
  This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic
36
36
  methods the library implements for all schedulers such as loading and saving.
@@ -44,8 +44,8 @@ class CosineDPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
44
44
  The standard deviation of the data distribution. This is set to 1.0 in Stable Audio Open [1].
45
45
  sigma_schedule (`str`, *optional*, defaults to `exponential`):
46
46
  Sigma schedule to compute the `sigmas`. By default, we the schedule introduced in the EDM paper
47
- (https://arxiv.org/abs/2206.00364). Other acceptable value is "exponential". The exponential schedule was
48
- incorporated in this model: https://huggingface.co/stabilityai/cosxl.
47
+ (https://huggingface.co/papers/2206.00364). Other acceptable value is "exponential". The exponential
48
+ schedule was incorporated in this model: https://huggingface.co/stabilityai/cosxl.
49
49
  num_train_timesteps (`int`, defaults to 1000):
50
50
  The number of diffusion steps to train the model.
51
51
  solver_order (`int`, defaults to 2):
@@ -144,7 +144,7 @@ class CosineDPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
144
144
 
145
145
  # Copied from diffusers.schedulers.scheduling_edm_euler.EDMEulerScheduler.precondition_inputs
146
146
  def precondition_inputs(self, sample, sigma):
147
- c_in = 1 / ((sigma**2 + self.config.sigma_data**2) ** 0.5)
147
+ c_in = self._get_conditioning_c_in(sigma)
148
148
  scaled_sample = sample * c_in
149
149
  return scaled_sample
150
150
 
@@ -568,5 +568,10 @@ class CosineDPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
568
568
  noisy_samples = original_samples + noise * sigma
569
569
  return noisy_samples
570
570
 
571
+ # Copied from diffusers.schedulers.scheduling_edm_euler.EDMEulerScheduler._get_conditioning_c_in
572
+ def _get_conditioning_c_in(self, sigma):
573
+ c_in = 1 / ((sigma**2 + self.config.sigma_data**2) ** 0.5)
574
+ return c_in
575
+
571
576
  def __len__(self):
572
577
  return self.config.num_train_timesteps
@@ -1,4 +1,4 @@
1
- # Copyright 2024 Stanford University Team and The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 Stanford University Team and The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -94,7 +94,7 @@ def betas_for_alpha_bar(
94
94
 
95
95
  def rescale_zero_terminal_snr(betas):
96
96
  """
97
- Rescales betas to have zero terminal SNR Based on https://arxiv.org/pdf/2305.08891.pdf (Algorithm 1)
97
+ Rescales betas to have zero terminal SNR Based on https://huggingface.co/papers/2305.08891 (Algorithm 1)
98
98
 
99
99
 
100
100
  Args:
@@ -269,7 +269,7 @@ class DDIMScheduler(SchedulerMixin, ConfigMixin):
269
269
  pixels from saturation at each step. We find that dynamic thresholding results in significantly better
270
270
  photorealism as well as better image-text alignment, especially when using very large guidance weights."
271
271
 
272
- https://arxiv.org/abs/2205.11487
272
+ https://huggingface.co/papers/2205.11487
273
273
  """
274
274
  dtype = sample.dtype
275
275
  batch_size, channels, *remaining_dims = sample.shape
@@ -312,7 +312,7 @@ class DDIMScheduler(SchedulerMixin, ConfigMixin):
312
312
 
313
313
  self.num_inference_steps = num_inference_steps
314
314
 
315
- # "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://arxiv.org/abs/2305.08891
315
+ # "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://huggingface.co/papers/2305.08891
316
316
  if self.config.timestep_spacing == "linspace":
317
317
  timesteps = (
318
318
  np.linspace(0, self.config.num_train_timesteps - 1, num_inference_steps)
@@ -387,7 +387,7 @@ class DDIMScheduler(SchedulerMixin, ConfigMixin):
387
387
  "Number of inference steps is 'None', you need to run 'set_timesteps' after creating the scheduler"
388
388
  )
389
389
 
390
- # See formulas (12) and (16) of DDIM paper https://arxiv.org/pdf/2010.02502.pdf
390
+ # See formulas (12) and (16) of DDIM paper https://huggingface.co/papers/2010.02502
391
391
  # Ideally, read DDIM paper in-detail understanding
392
392
 
393
393
  # Notation (<variable name> -> <name in paper>
@@ -408,7 +408,7 @@ class DDIMScheduler(SchedulerMixin, ConfigMixin):
408
408
  beta_prod_t = 1 - alpha_prod_t
409
409
 
410
410
  # 3. compute predicted original sample from predicted noise also called
411
- # "predicted x_0" of formula (12) from https://arxiv.org/pdf/2010.02502.pdf
411
+ # "predicted x_0" of formula (12) from https://huggingface.co/papers/2010.02502
412
412
  if self.config.prediction_type == "epsilon":
413
413
  pred_original_sample = (sample - beta_prod_t ** (0.5) * model_output) / alpha_prod_t ** (0.5)
414
414
  pred_epsilon = model_output
@@ -441,10 +441,10 @@ class DDIMScheduler(SchedulerMixin, ConfigMixin):
441
441
  # the pred_epsilon is always re-derived from the clipped x_0 in Glide
442
442
  pred_epsilon = (sample - alpha_prod_t ** (0.5) * pred_original_sample) / beta_prod_t ** (0.5)
443
443
 
444
- # 6. compute "direction pointing to x_t" of formula (12) from https://arxiv.org/pdf/2010.02502.pdf
444
+ # 6. compute "direction pointing to x_t" of formula (12) from https://huggingface.co/papers/2010.02502
445
445
  pred_sample_direction = (1 - alpha_prod_t_prev - std_dev_t**2) ** (0.5) * pred_epsilon
446
446
 
447
- # 7. compute x_t without "random noise" of formula (12) from https://arxiv.org/pdf/2010.02502.pdf
447
+ # 7. compute x_t without "random noise" of formula (12) from https://huggingface.co/papers/2010.02502
448
448
  prev_sample = alpha_prod_t_prev ** (0.5) * pred_original_sample + pred_sample_direction
449
449
 
450
450
  if eta > 0:
@@ -1,4 +1,4 @@
1
- # Copyright 2024 The CogVideoX team, Tsinghua University & ZhipuAI and The HuggingFace Team.
1
+ # Copyright 2025 The CogVideoX team, Tsinghua University & ZhipuAI and The HuggingFace Team.
2
2
  # All rights reserved.
3
3
  #
4
4
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -94,7 +94,7 @@ def betas_for_alpha_bar(
94
94
 
95
95
  def rescale_zero_terminal_snr(alphas_cumprod):
96
96
  """
97
- Rescales betas to have zero terminal SNR Based on https://arxiv.org/pdf/2305.08891.pdf (Algorithm 1)
97
+ Rescales betas to have zero terminal SNR Based on https://huggingface.co/papers/2305.08891 (Algorithm 1)
98
98
 
99
99
 
100
100
  Args:
@@ -275,7 +275,7 @@ class CogVideoXDDIMScheduler(SchedulerMixin, ConfigMixin):
275
275
 
276
276
  self.num_inference_steps = num_inference_steps
277
277
 
278
- # "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://arxiv.org/abs/2305.08891
278
+ # "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://huggingface.co/papers/2305.08891
279
279
  if self.config.timestep_spacing == "linspace":
280
280
  timesteps = (
281
281
  np.linspace(0, self.config.num_train_timesteps - 1, num_inference_steps)
@@ -350,7 +350,7 @@ class CogVideoXDDIMScheduler(SchedulerMixin, ConfigMixin):
350
350
  "Number of inference steps is 'None', you need to run 'set_timesteps' after creating the scheduler"
351
351
  )
352
352
 
353
- # See formulas (12) and (16) of DDIM paper https://arxiv.org/pdf/2010.02502.pdf
353
+ # See formulas (12) and (16) of DDIM paper https://huggingface.co/papers/2010.02502
354
354
  # Ideally, read DDIM paper in-detail understanding
355
355
 
356
356
  # Notation (<variable name> -> <name in paper>
@@ -371,7 +371,7 @@ class CogVideoXDDIMScheduler(SchedulerMixin, ConfigMixin):
371
371
  beta_prod_t = 1 - alpha_prod_t
372
372
 
373
373
  # 3. compute predicted original sample from predicted noise also called
374
- # "predicted x_0" of formula (12) from https://arxiv.org/pdf/2010.02502.pdf
374
+ # "predicted x_0" of formula (12) from https://huggingface.co/papers/2010.02502
375
375
  # To make style tests pass, commented out `pred_epsilon` as it is an unused variable
376
376
  if self.config.prediction_type == "epsilon":
377
377
  pred_original_sample = (sample - beta_prod_t ** (0.5) * model_output) / alpha_prod_t ** (0.5)
@@ -1,4 +1,4 @@
1
- # Copyright 2024 Stanford University Team and The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 Stanford University Team and The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -73,7 +73,7 @@ class FlaxDDIMScheduler(FlaxSchedulerMixin, ConfigMixin):
73
73
  [`SchedulerMixin`] provides general loading and saving functionality via the [`SchedulerMixin.save_pretrained`] and
74
74
  [`~SchedulerMixin.from_pretrained`] functions.
75
75
 
76
- For more details, see the original paper: https://arxiv.org/abs/2010.02502
76
+ For more details, see the original paper: https://huggingface.co/papers/2010.02502
77
77
 
78
78
  Args:
79
79
  num_train_timesteps (`int`): number of diffusion steps used to train the model.
@@ -230,7 +230,7 @@ class FlaxDDIMScheduler(FlaxSchedulerMixin, ConfigMixin):
230
230
  "Number of inference steps is 'None', you need to run 'set_timesteps' after creating the scheduler"
231
231
  )
232
232
 
233
- # See formulas (12) and (16) of DDIM paper https://arxiv.org/pdf/2010.02502.pdf
233
+ # See formulas (12) and (16) of DDIM paper https://huggingface.co/papers/2010.02502
234
234
  # Ideally, read DDIM paper in-detail understanding
235
235
 
236
236
  # Notation (<variable name> -> <name in paper>
@@ -254,7 +254,7 @@ class FlaxDDIMScheduler(FlaxSchedulerMixin, ConfigMixin):
254
254
  beta_prod_t = 1 - alpha_prod_t
255
255
 
256
256
  # 3. compute predicted original sample from predicted noise also called
257
- # "predicted x_0" of formula (12) from https://arxiv.org/pdf/2010.02502.pdf
257
+ # "predicted x_0" of formula (12) from https://huggingface.co/papers/2010.02502
258
258
  if self.config.prediction_type == "epsilon":
259
259
  pred_original_sample = (sample - beta_prod_t ** (0.5) * model_output) / alpha_prod_t ** (0.5)
260
260
  pred_epsilon = model_output
@@ -281,10 +281,10 @@ class FlaxDDIMScheduler(FlaxSchedulerMixin, ConfigMixin):
281
281
  variance = self._get_variance(state, timestep, prev_timestep)
282
282
  std_dev_t = eta * variance ** (0.5)
283
283
 
284
- # 5. compute "direction pointing to x_t" of formula (12) from https://arxiv.org/pdf/2010.02502.pdf
284
+ # 5. compute "direction pointing to x_t" of formula (12) from https://huggingface.co/papers/2010.02502
285
285
  pred_sample_direction = (1 - alpha_prod_t_prev - std_dev_t**2) ** (0.5) * pred_epsilon
286
286
 
287
- # 6. compute x_t without "random noise" of formula (12) from https://arxiv.org/pdf/2010.02502.pdf
287
+ # 6. compute x_t without "random noise" of formula (12) from https://huggingface.co/papers/2010.02502
288
288
  prev_sample = alpha_prod_t_prev ** (0.5) * pred_original_sample + pred_sample_direction
289
289
 
290
290
  if not return_dict:
@@ -1,4 +1,4 @@
1
- # Copyright 2024 The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -93,7 +93,7 @@ def betas_for_alpha_bar(
93
93
  # Copied from diffusers.schedulers.scheduling_ddim.rescale_zero_terminal_snr
94
94
  def rescale_zero_terminal_snr(betas):
95
95
  """
96
- Rescales betas to have zero terminal SNR Based on https://arxiv.org/pdf/2305.08891.pdf (Algorithm 1)
96
+ Rescales betas to have zero terminal SNR Based on https://huggingface.co/papers/2305.08891 (Algorithm 1)
97
97
 
98
98
 
99
99
  Args:
@@ -266,7 +266,7 @@ class DDIMInverseScheduler(SchedulerMixin, ConfigMixin):
266
266
 
267
267
  self.num_inference_steps = num_inference_steps
268
268
 
269
- # "leading" and "trailing" corresponds to annotation of Table 2. of https://arxiv.org/abs/2305.08891
269
+ # "leading" and "trailing" corresponds to annotation of Table 2. of https://huggingface.co/papers/2305.08891
270
270
  if self.config.timestep_spacing == "leading":
271
271
  step_ratio = self.config.num_train_timesteps // self.num_inference_steps
272
272
  # creates integer timesteps by multiplying by ratio
@@ -338,7 +338,7 @@ class DDIMInverseScheduler(SchedulerMixin, ConfigMixin):
338
338
  beta_prod_t = 1 - alpha_prod_t
339
339
 
340
340
  # 3. compute predicted original sample from predicted noise also called
341
- # "predicted x_0" of formula (12) from https://arxiv.org/pdf/2010.02502.pdf
341
+ # "predicted x_0" of formula (12) from https://huggingface.co/papers/2010.02502
342
342
  if self.config.prediction_type == "epsilon":
343
343
  pred_original_sample = (sample - beta_prod_t ** (0.5) * model_output) / alpha_prod_t ** (0.5)
344
344
  pred_epsilon = model_output
@@ -360,10 +360,10 @@ class DDIMInverseScheduler(SchedulerMixin, ConfigMixin):
360
360
  -self.config.clip_sample_range, self.config.clip_sample_range
361
361
  )
362
362
 
363
- # 5. compute "direction pointing to x_t" of formula (12) from https://arxiv.org/pdf/2010.02502.pdf
363
+ # 5. compute "direction pointing to x_t" of formula (12) from https://huggingface.co/papers/2010.02502
364
364
  pred_sample_direction = (1 - alpha_prod_t_prev) ** (0.5) * pred_epsilon
365
365
 
366
- # 6. compute x_t without "random noise" of formula (12) from https://arxiv.org/pdf/2010.02502.pdf
366
+ # 6. compute x_t without "random noise" of formula (12) from https://huggingface.co/papers/2010.02502
367
367
  prev_sample = alpha_prod_t_prev ** (0.5) * pred_original_sample + pred_sample_direction
368
368
 
369
369
  if not return_dict:
@@ -1,4 +1,4 @@
1
- # Copyright 2024 ParaDiGMS authors and The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 ParaDiGMS authors and The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -95,7 +95,7 @@ def betas_for_alpha_bar(
95
95
  # Copied from diffusers.schedulers.scheduling_ddim.rescale_zero_terminal_snr
96
96
  def rescale_zero_terminal_snr(betas):
97
97
  """
98
- Rescales betas to have zero terminal SNR Based on https://arxiv.org/pdf/2305.08891.pdf (Algorithm 1)
98
+ Rescales betas to have zero terminal SNR Based on https://huggingface.co/papers/2305.08891 (Algorithm 1)
99
99
 
100
100
 
101
101
  Args:
@@ -139,7 +139,7 @@ class DDIMParallelScheduler(SchedulerMixin, ConfigMixin):
139
139
  [`SchedulerMixin`] provides general loading and saving functionality via the [`SchedulerMixin.save_pretrained`] and
140
140
  [`~SchedulerMixin.from_pretrained`] functions.
141
141
 
142
- For more details, see the original paper: https://arxiv.org/abs/2010.02502
142
+ For more details, see the original paper: https://huggingface.co/papers/2010.02502
143
143
 
144
144
  Args:
145
145
  num_train_timesteps (`int`): number of diffusion steps used to train the model.
@@ -165,21 +165,21 @@ class DDIMParallelScheduler(SchedulerMixin, ConfigMixin):
165
165
  process), `sample` (directly predicting the noisy sample`) or `v_prediction` (see section 2.4
166
166
  https://imagen.research.google/video/paper.pdf)
167
167
  thresholding (`bool`, default `False`):
168
- whether to use the "dynamic thresholding" method (introduced by Imagen, https://arxiv.org/abs/2205.11487).
169
- Note that the thresholding method is unsuitable for latent-space diffusion models (such as
170
- stable-diffusion).
168
+ whether to use the "dynamic thresholding" method (introduced by Imagen,
169
+ https://huggingface.co/papers/2205.11487). Note that the thresholding method is unsuitable for latent-space
170
+ diffusion models (such as stable-diffusion).
171
171
  dynamic_thresholding_ratio (`float`, default `0.995`):
172
172
  the ratio for the dynamic thresholding method. Default is `0.995`, the same as Imagen
173
- (https://arxiv.org/abs/2205.11487). Valid only when `thresholding=True`.
173
+ (https://huggingface.co/papers/2205.11487). Valid only when `thresholding=True`.
174
174
  sample_max_value (`float`, default `1.0`):
175
175
  the threshold value for dynamic thresholding. Valid only when `thresholding=True`.
176
176
  timestep_spacing (`str`, default `"leading"`):
177
177
  The way the timesteps should be scaled. Refer to Table 2. of [Common Diffusion Noise Schedules and Sample
178
- Steps are Flawed](https://arxiv.org/abs/2305.08891) for more information.
178
+ Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information.
179
179
  rescale_betas_zero_snr (`bool`, default `False`):
180
- whether to rescale the betas to have zero terminal SNR (proposed by https://arxiv.org/pdf/2305.08891.pdf).
181
- This can enable the model to generate very bright and dark samples instead of limiting it to samples with
182
- medium brightness. Loosely related to
180
+ whether to rescale the betas to have zero terminal SNR (proposed by
181
+ https://huggingface.co/papers/2305.08891). This can enable the model to generate very bright and dark
182
+ samples instead of limiting it to samples with medium brightness. Loosely related to
183
183
  [`--offset_noise`](https://github.com/huggingface/diffusers/blob/74fd735eb073eb1d774b1ab4154a0876eb82f055/examples/dreambooth/train_dreambooth.py#L506).
184
184
  """
185
185
 
@@ -291,7 +291,7 @@ class DDIMParallelScheduler(SchedulerMixin, ConfigMixin):
291
291
  pixels from saturation at each step. We find that dynamic thresholding results in significantly better
292
292
  photorealism as well as better image-text alignment, especially when using very large guidance weights."
293
293
 
294
- https://arxiv.org/abs/2205.11487
294
+ https://huggingface.co/papers/2205.11487
295
295
  """
296
296
  dtype = sample.dtype
297
297
  batch_size, channels, *remaining_dims = sample.shape
@@ -335,7 +335,7 @@ class DDIMParallelScheduler(SchedulerMixin, ConfigMixin):
335
335
 
336
336
  self.num_inference_steps = num_inference_steps
337
337
 
338
- # "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://arxiv.org/abs/2305.08891
338
+ # "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://huggingface.co/papers/2305.08891
339
339
  if self.config.timestep_spacing == "linspace":
340
340
  timesteps = (
341
341
  np.linspace(0, self.config.num_train_timesteps - 1, num_inference_steps)
@@ -390,7 +390,7 @@ class DDIMParallelScheduler(SchedulerMixin, ConfigMixin):
390
390
  generator: random number generator.
391
391
  variance_noise (`torch.Tensor`): instead of generating noise for the variance using `generator`, we
392
392
  can directly provide the noise for the variance itself. This is useful for methods such as
393
- CycleDiffusion. (https://arxiv.org/abs/2210.05559)
393
+ CycleDiffusion. (https://huggingface.co/papers/2210.05559)
394
394
  return_dict (`bool`): option for returning tuple rather than DDIMParallelSchedulerOutput class
395
395
 
396
396
  Returns:
@@ -404,7 +404,7 @@ class DDIMParallelScheduler(SchedulerMixin, ConfigMixin):
404
404
  "Number of inference steps is 'None', you need to run 'set_timesteps' after creating the scheduler"
405
405
  )
406
406
 
407
- # See formulas (12) and (16) of DDIM paper https://arxiv.org/pdf/2010.02502.pdf
407
+ # See formulas (12) and (16) of DDIM paper https://huggingface.co/papers/2010.02502
408
408
  # Ideally, read DDIM paper in-detail understanding
409
409
 
410
410
  # Notation (<variable name> -> <name in paper>
@@ -425,7 +425,7 @@ class DDIMParallelScheduler(SchedulerMixin, ConfigMixin):
425
425
  beta_prod_t = 1 - alpha_prod_t
426
426
 
427
427
  # 3. compute predicted original sample from predicted noise also called
428
- # "predicted x_0" of formula (12) from https://arxiv.org/pdf/2010.02502.pdf
428
+ # "predicted x_0" of formula (12) from https://huggingface.co/papers/2010.02502
429
429
  if self.config.prediction_type == "epsilon":
430
430
  pred_original_sample = (sample - beta_prod_t ** (0.5) * model_output) / alpha_prod_t ** (0.5)
431
431
  pred_epsilon = model_output
@@ -458,10 +458,10 @@ class DDIMParallelScheduler(SchedulerMixin, ConfigMixin):
458
458
  # the pred_epsilon is always re-derived from the clipped x_0 in Glide
459
459
  pred_epsilon = (sample - alpha_prod_t ** (0.5) * pred_original_sample) / beta_prod_t ** (0.5)
460
460
 
461
- # 6. compute "direction pointing to x_t" of formula (12) from https://arxiv.org/pdf/2010.02502.pdf
461
+ # 6. compute "direction pointing to x_t" of formula (12) from https://huggingface.co/papers/2010.02502
462
462
  pred_sample_direction = (1 - alpha_prod_t_prev - std_dev_t**2) ** (0.5) * pred_epsilon
463
463
 
464
- # 7. compute x_t without "random noise" of formula (12) from https://arxiv.org/pdf/2010.02502.pdf
464
+ # 7. compute x_t without "random noise" of formula (12) from https://huggingface.co/papers/2010.02502
465
465
  prev_sample = alpha_prod_t_prev ** (0.5) * pred_original_sample + pred_sample_direction
466
466
 
467
467
  if eta > 0:
@@ -526,7 +526,7 @@ class DDIMParallelScheduler(SchedulerMixin, ConfigMixin):
526
526
 
527
527
  assert eta == 0.0
528
528
 
529
- # See formulas (12) and (16) of DDIM paper https://arxiv.org/pdf/2010.02502.pdf
529
+ # See formulas (12) and (16) of DDIM paper https://huggingface.co/papers/2010.02502
530
530
  # Ideally, read DDIM paper in-detail understanding
531
531
 
532
532
  # Notation (<variable name> -> <name in paper>
@@ -554,7 +554,7 @@ class DDIMParallelScheduler(SchedulerMixin, ConfigMixin):
554
554
  beta_prod_t = 1 - alpha_prod_t
555
555
 
556
556
  # 3. compute predicted original sample from predicted noise also called
557
- # "predicted x_0" of formula (12) from https://arxiv.org/pdf/2010.02502.pdf
557
+ # "predicted x_0" of formula (12) from https://huggingface.co/papers/2010.02502
558
558
  if self.config.prediction_type == "epsilon":
559
559
  pred_original_sample = (sample - beta_prod_t ** (0.5) * model_output) / alpha_prod_t ** (0.5)
560
560
  pred_epsilon = model_output
@@ -587,10 +587,10 @@ class DDIMParallelScheduler(SchedulerMixin, ConfigMixin):
587
587
  # the pred_epsilon is always re-derived from the clipped x_0 in Glide
588
588
  pred_epsilon = (sample - alpha_prod_t ** (0.5) * pred_original_sample) / beta_prod_t ** (0.5)
589
589
 
590
- # 6. compute "direction pointing to x_t" of formula (12) from https://arxiv.org/pdf/2010.02502.pdf
590
+ # 6. compute "direction pointing to x_t" of formula (12) from https://huggingface.co/papers/2010.02502
591
591
  pred_sample_direction = (1 - alpha_prod_t_prev - std_dev_t**2) ** (0.5) * pred_epsilon
592
592
 
593
- # 7. compute x_t without "random noise" of formula (12) from https://arxiv.org/pdf/2010.02502.pdf
593
+ # 7. compute x_t without "random noise" of formula (12) from https://huggingface.co/papers/2010.02502
594
594
  prev_sample = alpha_prod_t_prev ** (0.5) * pred_original_sample + pred_sample_direction
595
595
 
596
596
  return prev_sample
@@ -1,4 +1,4 @@
1
- # Copyright 2024 UC Berkeley Team and The HuggingFace Team. All rights reserved.
1
+ # Copyright 2025 UC Berkeley Team and The HuggingFace Team. All rights reserved.
2
2
  #
3
3
  # Licensed under the Apache License, Version 2.0 (the "License");
4
4
  # you may not use this file except in compliance with the License.
@@ -92,7 +92,7 @@ def betas_for_alpha_bar(
92
92
  # Copied from diffusers.schedulers.scheduling_ddim.rescale_zero_terminal_snr
93
93
  def rescale_zero_terminal_snr(betas):
94
94
  """
95
- Rescales betas to have zero terminal SNR Based on https://arxiv.org/pdf/2305.08891.pdf (Algorithm 1)
95
+ Rescales betas to have zero terminal SNR Based on https://huggingface.co/papers/2305.08891 (Algorithm 1)
96
96
 
97
97
 
98
98
  Args:
@@ -295,7 +295,7 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin):
295
295
  self.num_inference_steps = num_inference_steps
296
296
  self.custom_timesteps = False
297
297
 
298
- # "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://arxiv.org/abs/2305.08891
298
+ # "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://huggingface.co/papers/2305.08891
299
299
  if self.config.timestep_spacing == "linspace":
300
300
  timesteps = (
301
301
  np.linspace(0, self.config.num_train_timesteps - 1, num_inference_steps)
@@ -329,7 +329,7 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin):
329
329
  alpha_prod_t_prev = self.alphas_cumprod[prev_t] if prev_t >= 0 else self.one
330
330
  current_beta_t = 1 - alpha_prod_t / alpha_prod_t_prev
331
331
 
332
- # For t > 0, compute predicted variance βt (see formula (6) and (7) from https://arxiv.org/pdf/2006.11239.pdf)
332
+ # For t > 0, compute predicted variance βt (see formula (6) and (7) from https://huggingface.co/papers/2006.11239)
333
333
  # and sample from it to get previous sample
334
334
  # x_{t-1} ~ N(pred_prev_sample, variance) == add variance to pred_sample
335
335
  variance = (1 - alpha_prod_t_prev) / (1 - alpha_prod_t) * current_beta_t
@@ -343,7 +343,7 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin):
343
343
  # hacks - were probably added for training stability
344
344
  if variance_type == "fixed_small":
345
345
  variance = variance
346
- # for rl-diffuser https://arxiv.org/abs/2205.09991
346
+ # for rl-diffuser https://huggingface.co/papers/2205.09991
347
347
  elif variance_type == "fixed_small_log":
348
348
  variance = torch.log(variance)
349
349
  variance = torch.exp(0.5 * variance)
@@ -370,7 +370,7 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin):
370
370
  pixels from saturation at each step. We find that dynamic thresholding results in significantly better
371
371
  photorealism as well as better image-text alignment, especially when using very large guidance weights."
372
372
 
373
- https://arxiv.org/abs/2205.11487
373
+ https://huggingface.co/papers/2205.11487
374
374
  """
375
375
  dtype = sample.dtype
376
376
  batch_size, channels, *remaining_dims = sample.shape
@@ -443,7 +443,7 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin):
443
443
  current_beta_t = 1 - current_alpha_t
444
444
 
445
445
  # 2. compute predicted original sample from predicted noise also called
446
- # "predicted x_0" of formula (15) from https://arxiv.org/pdf/2006.11239.pdf
446
+ # "predicted x_0" of formula (15) from https://huggingface.co/papers/2006.11239
447
447
  if self.config.prediction_type == "epsilon":
448
448
  pred_original_sample = (sample - beta_prod_t ** (0.5) * model_output) / alpha_prod_t ** (0.5)
449
449
  elif self.config.prediction_type == "sample":
@@ -465,12 +465,12 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin):
465
465
  )
466
466
 
467
467
  # 4. Compute coefficients for pred_original_sample x_0 and current sample x_t
468
- # See formula (7) from https://arxiv.org/pdf/2006.11239.pdf
468
+ # See formula (7) from https://huggingface.co/papers/2006.11239
469
469
  pred_original_sample_coeff = (alpha_prod_t_prev ** (0.5) * current_beta_t) / beta_prod_t
470
470
  current_sample_coeff = current_alpha_t ** (0.5) * beta_prod_t_prev / beta_prod_t
471
471
 
472
472
  # 5. Compute predicted previous sample µ_t
473
- # See formula (7) from https://arxiv.org/pdf/2006.11239.pdf
473
+ # See formula (7) from https://huggingface.co/papers/2006.11239
474
474
  pred_prev_sample = pred_original_sample_coeff * pred_original_sample + current_sample_coeff * sample
475
475
 
476
476
  # 6. Add noise