diffusers 0.27.1__py3-none-any.whl → 0.32.2__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (445) hide show
  1. diffusers/__init__.py +233 -6
  2. diffusers/callbacks.py +209 -0
  3. diffusers/commands/env.py +102 -6
  4. diffusers/configuration_utils.py +45 -16
  5. diffusers/dependency_versions_table.py +4 -3
  6. diffusers/image_processor.py +434 -110
  7. diffusers/loaders/__init__.py +42 -9
  8. diffusers/loaders/ip_adapter.py +626 -36
  9. diffusers/loaders/lora_base.py +900 -0
  10. diffusers/loaders/lora_conversion_utils.py +991 -125
  11. diffusers/loaders/lora_pipeline.py +3812 -0
  12. diffusers/loaders/peft.py +571 -7
  13. diffusers/loaders/single_file.py +405 -173
  14. diffusers/loaders/single_file_model.py +385 -0
  15. diffusers/loaders/single_file_utils.py +1783 -713
  16. diffusers/loaders/textual_inversion.py +41 -23
  17. diffusers/loaders/transformer_flux.py +181 -0
  18. diffusers/loaders/transformer_sd3.py +89 -0
  19. diffusers/loaders/unet.py +464 -540
  20. diffusers/loaders/unet_loader_utils.py +163 -0
  21. diffusers/models/__init__.py +76 -7
  22. diffusers/models/activations.py +65 -10
  23. diffusers/models/adapter.py +53 -53
  24. diffusers/models/attention.py +605 -18
  25. diffusers/models/attention_flax.py +1 -1
  26. diffusers/models/attention_processor.py +4304 -687
  27. diffusers/models/autoencoders/__init__.py +8 -0
  28. diffusers/models/autoencoders/autoencoder_asym_kl.py +15 -17
  29. diffusers/models/autoencoders/autoencoder_dc.py +620 -0
  30. diffusers/models/autoencoders/autoencoder_kl.py +110 -28
  31. diffusers/models/autoencoders/autoencoder_kl_allegro.py +1149 -0
  32. diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +1482 -0
  33. diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py +1176 -0
  34. diffusers/models/autoencoders/autoencoder_kl_ltx.py +1338 -0
  35. diffusers/models/autoencoders/autoencoder_kl_mochi.py +1166 -0
  36. diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +19 -24
  37. diffusers/models/autoencoders/autoencoder_oobleck.py +464 -0
  38. diffusers/models/autoencoders/autoencoder_tiny.py +21 -18
  39. diffusers/models/autoencoders/consistency_decoder_vae.py +45 -20
  40. diffusers/models/autoencoders/vae.py +41 -29
  41. diffusers/models/autoencoders/vq_model.py +182 -0
  42. diffusers/models/controlnet.py +47 -800
  43. diffusers/models/controlnet_flux.py +70 -0
  44. diffusers/models/controlnet_sd3.py +68 -0
  45. diffusers/models/controlnet_sparsectrl.py +116 -0
  46. diffusers/models/controlnets/__init__.py +23 -0
  47. diffusers/models/controlnets/controlnet.py +872 -0
  48. diffusers/models/{controlnet_flax.py → controlnets/controlnet_flax.py} +9 -9
  49. diffusers/models/controlnets/controlnet_flux.py +536 -0
  50. diffusers/models/controlnets/controlnet_hunyuan.py +401 -0
  51. diffusers/models/controlnets/controlnet_sd3.py +489 -0
  52. diffusers/models/controlnets/controlnet_sparsectrl.py +788 -0
  53. diffusers/models/controlnets/controlnet_union.py +832 -0
  54. diffusers/models/controlnets/controlnet_xs.py +1946 -0
  55. diffusers/models/controlnets/multicontrolnet.py +183 -0
  56. diffusers/models/downsampling.py +85 -18
  57. diffusers/models/embeddings.py +1856 -158
  58. diffusers/models/embeddings_flax.py +23 -9
  59. diffusers/models/model_loading_utils.py +480 -0
  60. diffusers/models/modeling_flax_pytorch_utils.py +2 -1
  61. diffusers/models/modeling_flax_utils.py +2 -7
  62. diffusers/models/modeling_outputs.py +14 -0
  63. diffusers/models/modeling_pytorch_flax_utils.py +1 -1
  64. diffusers/models/modeling_utils.py +611 -146
  65. diffusers/models/normalization.py +361 -20
  66. diffusers/models/resnet.py +18 -23
  67. diffusers/models/transformers/__init__.py +16 -0
  68. diffusers/models/transformers/auraflow_transformer_2d.py +544 -0
  69. diffusers/models/transformers/cogvideox_transformer_3d.py +542 -0
  70. diffusers/models/transformers/dit_transformer_2d.py +240 -0
  71. diffusers/models/transformers/dual_transformer_2d.py +9 -8
  72. diffusers/models/transformers/hunyuan_transformer_2d.py +578 -0
  73. diffusers/models/transformers/latte_transformer_3d.py +327 -0
  74. diffusers/models/transformers/lumina_nextdit2d.py +340 -0
  75. diffusers/models/transformers/pixart_transformer_2d.py +445 -0
  76. diffusers/models/transformers/prior_transformer.py +13 -13
  77. diffusers/models/transformers/sana_transformer.py +488 -0
  78. diffusers/models/transformers/stable_audio_transformer.py +458 -0
  79. diffusers/models/transformers/t5_film_transformer.py +17 -19
  80. diffusers/models/transformers/transformer_2d.py +297 -187
  81. diffusers/models/transformers/transformer_allegro.py +422 -0
  82. diffusers/models/transformers/transformer_cogview3plus.py +386 -0
  83. diffusers/models/transformers/transformer_flux.py +593 -0
  84. diffusers/models/transformers/transformer_hunyuan_video.py +791 -0
  85. diffusers/models/transformers/transformer_ltx.py +469 -0
  86. diffusers/models/transformers/transformer_mochi.py +499 -0
  87. diffusers/models/transformers/transformer_sd3.py +461 -0
  88. diffusers/models/transformers/transformer_temporal.py +21 -19
  89. diffusers/models/unets/unet_1d.py +8 -8
  90. diffusers/models/unets/unet_1d_blocks.py +31 -31
  91. diffusers/models/unets/unet_2d.py +17 -10
  92. diffusers/models/unets/unet_2d_blocks.py +225 -149
  93. diffusers/models/unets/unet_2d_condition.py +41 -40
  94. diffusers/models/unets/unet_2d_condition_flax.py +6 -5
  95. diffusers/models/unets/unet_3d_blocks.py +192 -1057
  96. diffusers/models/unets/unet_3d_condition.py +22 -27
  97. diffusers/models/unets/unet_i2vgen_xl.py +22 -18
  98. diffusers/models/unets/unet_kandinsky3.py +2 -2
  99. diffusers/models/unets/unet_motion_model.py +1413 -89
  100. diffusers/models/unets/unet_spatio_temporal_condition.py +40 -16
  101. diffusers/models/unets/unet_stable_cascade.py +19 -18
  102. diffusers/models/unets/uvit_2d.py +2 -2
  103. diffusers/models/upsampling.py +95 -26
  104. diffusers/models/vq_model.py +12 -164
  105. diffusers/optimization.py +1 -1
  106. diffusers/pipelines/__init__.py +202 -3
  107. diffusers/pipelines/allegro/__init__.py +48 -0
  108. diffusers/pipelines/allegro/pipeline_allegro.py +938 -0
  109. diffusers/pipelines/allegro/pipeline_output.py +23 -0
  110. diffusers/pipelines/amused/pipeline_amused.py +12 -12
  111. diffusers/pipelines/amused/pipeline_amused_img2img.py +14 -12
  112. diffusers/pipelines/amused/pipeline_amused_inpaint.py +13 -11
  113. diffusers/pipelines/animatediff/__init__.py +8 -0
  114. diffusers/pipelines/animatediff/pipeline_animatediff.py +122 -109
  115. diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +1106 -0
  116. diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +1288 -0
  117. diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +1010 -0
  118. diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +236 -180
  119. diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +1341 -0
  120. diffusers/pipelines/animatediff/pipeline_output.py +3 -2
  121. diffusers/pipelines/audioldm/pipeline_audioldm.py +14 -14
  122. diffusers/pipelines/audioldm2/modeling_audioldm2.py +58 -39
  123. diffusers/pipelines/audioldm2/pipeline_audioldm2.py +121 -36
  124. diffusers/pipelines/aura_flow/__init__.py +48 -0
  125. diffusers/pipelines/aura_flow/pipeline_aura_flow.py +584 -0
  126. diffusers/pipelines/auto_pipeline.py +196 -28
  127. diffusers/pipelines/blip_diffusion/blip_image_processing.py +1 -1
  128. diffusers/pipelines/blip_diffusion/modeling_blip2.py +6 -6
  129. diffusers/pipelines/blip_diffusion/modeling_ctx_clip.py +1 -1
  130. diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +2 -2
  131. diffusers/pipelines/cogvideo/__init__.py +54 -0
  132. diffusers/pipelines/cogvideo/pipeline_cogvideox.py +772 -0
  133. diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +825 -0
  134. diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +885 -0
  135. diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +851 -0
  136. diffusers/pipelines/cogvideo/pipeline_output.py +20 -0
  137. diffusers/pipelines/cogview3/__init__.py +47 -0
  138. diffusers/pipelines/cogview3/pipeline_cogview3plus.py +674 -0
  139. diffusers/pipelines/cogview3/pipeline_output.py +21 -0
  140. diffusers/pipelines/consistency_models/pipeline_consistency_models.py +6 -6
  141. diffusers/pipelines/controlnet/__init__.py +86 -80
  142. diffusers/pipelines/controlnet/multicontrolnet.py +7 -182
  143. diffusers/pipelines/controlnet/pipeline_controlnet.py +134 -87
  144. diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +2 -2
  145. diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +93 -77
  146. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +88 -197
  147. diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +136 -90
  148. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +176 -80
  149. diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +125 -89
  150. diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +1790 -0
  151. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +1501 -0
  152. diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +1627 -0
  153. diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +2 -2
  154. diffusers/pipelines/controlnet_hunyuandit/__init__.py +48 -0
  155. diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +1060 -0
  156. diffusers/pipelines/controlnet_sd3/__init__.py +57 -0
  157. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +1133 -0
  158. diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +1153 -0
  159. diffusers/pipelines/controlnet_xs/__init__.py +68 -0
  160. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +916 -0
  161. diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +1111 -0
  162. diffusers/pipelines/ddpm/pipeline_ddpm.py +2 -2
  163. diffusers/pipelines/deepfloyd_if/pipeline_if.py +16 -30
  164. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +20 -35
  165. diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +23 -41
  166. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +22 -38
  167. diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +25 -41
  168. diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +19 -34
  169. diffusers/pipelines/deepfloyd_if/pipeline_output.py +6 -5
  170. diffusers/pipelines/deepfloyd_if/watermark.py +1 -1
  171. diffusers/pipelines/deprecated/alt_diffusion/modeling_roberta_series.py +11 -11
  172. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +70 -30
  173. diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +48 -25
  174. diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +2 -2
  175. diffusers/pipelines/deprecated/spectrogram_diffusion/pipeline_spectrogram_diffusion.py +7 -7
  176. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +21 -20
  177. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +27 -29
  178. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +33 -27
  179. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +33 -23
  180. diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +36 -30
  181. diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +102 -69
  182. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +13 -13
  183. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +10 -5
  184. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +11 -6
  185. diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +10 -5
  186. diffusers/pipelines/deprecated/vq_diffusion/pipeline_vq_diffusion.py +5 -5
  187. diffusers/pipelines/dit/pipeline_dit.py +7 -4
  188. diffusers/pipelines/flux/__init__.py +69 -0
  189. diffusers/pipelines/flux/modeling_flux.py +47 -0
  190. diffusers/pipelines/flux/pipeline_flux.py +957 -0
  191. diffusers/pipelines/flux/pipeline_flux_control.py +889 -0
  192. diffusers/pipelines/flux/pipeline_flux_control_img2img.py +945 -0
  193. diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +1141 -0
  194. diffusers/pipelines/flux/pipeline_flux_controlnet.py +1006 -0
  195. diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +998 -0
  196. diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +1204 -0
  197. diffusers/pipelines/flux/pipeline_flux_fill.py +969 -0
  198. diffusers/pipelines/flux/pipeline_flux_img2img.py +856 -0
  199. diffusers/pipelines/flux/pipeline_flux_inpaint.py +1022 -0
  200. diffusers/pipelines/flux/pipeline_flux_prior_redux.py +492 -0
  201. diffusers/pipelines/flux/pipeline_output.py +37 -0
  202. diffusers/pipelines/free_init_utils.py +41 -38
  203. diffusers/pipelines/free_noise_utils.py +596 -0
  204. diffusers/pipelines/hunyuan_video/__init__.py +48 -0
  205. diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +687 -0
  206. diffusers/pipelines/hunyuan_video/pipeline_output.py +20 -0
  207. diffusers/pipelines/hunyuandit/__init__.py +48 -0
  208. diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +916 -0
  209. diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +33 -48
  210. diffusers/pipelines/kandinsky/pipeline_kandinsky.py +8 -8
  211. diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +32 -29
  212. diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +11 -11
  213. diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +12 -12
  214. diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +10 -10
  215. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +6 -6
  216. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +34 -31
  217. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +10 -10
  218. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +10 -10
  219. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +6 -6
  220. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +8 -8
  221. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +7 -7
  222. diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +6 -6
  223. diffusers/pipelines/kandinsky3/convert_kandinsky3_unet.py +3 -3
  224. diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +22 -35
  225. diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +26 -37
  226. diffusers/pipelines/kolors/__init__.py +54 -0
  227. diffusers/pipelines/kolors/pipeline_kolors.py +1070 -0
  228. diffusers/pipelines/kolors/pipeline_kolors_img2img.py +1250 -0
  229. diffusers/pipelines/kolors/pipeline_output.py +21 -0
  230. diffusers/pipelines/kolors/text_encoder.py +889 -0
  231. diffusers/pipelines/kolors/tokenizer.py +338 -0
  232. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +82 -62
  233. diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +77 -60
  234. diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +12 -12
  235. diffusers/pipelines/latte/__init__.py +48 -0
  236. diffusers/pipelines/latte/pipeline_latte.py +881 -0
  237. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +80 -74
  238. diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +85 -76
  239. diffusers/pipelines/ledits_pp/pipeline_output.py +2 -2
  240. diffusers/pipelines/ltx/__init__.py +50 -0
  241. diffusers/pipelines/ltx/pipeline_ltx.py +789 -0
  242. diffusers/pipelines/ltx/pipeline_ltx_image2video.py +885 -0
  243. diffusers/pipelines/ltx/pipeline_output.py +20 -0
  244. diffusers/pipelines/lumina/__init__.py +48 -0
  245. diffusers/pipelines/lumina/pipeline_lumina.py +890 -0
  246. diffusers/pipelines/marigold/__init__.py +50 -0
  247. diffusers/pipelines/marigold/marigold_image_processing.py +576 -0
  248. diffusers/pipelines/marigold/pipeline_marigold_depth.py +813 -0
  249. diffusers/pipelines/marigold/pipeline_marigold_normals.py +690 -0
  250. diffusers/pipelines/mochi/__init__.py +48 -0
  251. diffusers/pipelines/mochi/pipeline_mochi.py +748 -0
  252. diffusers/pipelines/mochi/pipeline_output.py +20 -0
  253. diffusers/pipelines/musicldm/pipeline_musicldm.py +14 -14
  254. diffusers/pipelines/pag/__init__.py +80 -0
  255. diffusers/pipelines/pag/pag_utils.py +243 -0
  256. diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +1328 -0
  257. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +1543 -0
  258. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +1610 -0
  259. diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +1683 -0
  260. diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +969 -0
  261. diffusers/pipelines/pag/pipeline_pag_kolors.py +1136 -0
  262. diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +865 -0
  263. diffusers/pipelines/pag/pipeline_pag_sana.py +886 -0
  264. diffusers/pipelines/pag/pipeline_pag_sd.py +1062 -0
  265. diffusers/pipelines/pag/pipeline_pag_sd_3.py +994 -0
  266. diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py +1058 -0
  267. diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +866 -0
  268. diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +1094 -0
  269. diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py +1356 -0
  270. diffusers/pipelines/pag/pipeline_pag_sd_xl.py +1345 -0
  271. diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +1544 -0
  272. diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +1776 -0
  273. diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +17 -12
  274. diffusers/pipelines/pia/pipeline_pia.py +74 -164
  275. diffusers/pipelines/pipeline_flax_utils.py +5 -10
  276. diffusers/pipelines/pipeline_loading_utils.py +515 -53
  277. diffusers/pipelines/pipeline_utils.py +411 -222
  278. diffusers/pipelines/pixart_alpha/__init__.py +8 -1
  279. diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +76 -93
  280. diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +873 -0
  281. diffusers/pipelines/sana/__init__.py +47 -0
  282. diffusers/pipelines/sana/pipeline_output.py +21 -0
  283. diffusers/pipelines/sana/pipeline_sana.py +884 -0
  284. diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +27 -23
  285. diffusers/pipelines/shap_e/pipeline_shap_e.py +3 -3
  286. diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +14 -14
  287. diffusers/pipelines/shap_e/renderer.py +1 -1
  288. diffusers/pipelines/stable_audio/__init__.py +50 -0
  289. diffusers/pipelines/stable_audio/modeling_stable_audio.py +158 -0
  290. diffusers/pipelines/stable_audio/pipeline_stable_audio.py +756 -0
  291. diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +71 -25
  292. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +23 -19
  293. diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +35 -34
  294. diffusers/pipelines/stable_diffusion/__init__.py +0 -1
  295. diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +20 -11
  296. diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +1 -1
  297. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +2 -2
  298. diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +6 -6
  299. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +145 -79
  300. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +43 -28
  301. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +13 -8
  302. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +100 -68
  303. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +109 -201
  304. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +131 -32
  305. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +247 -87
  306. diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +30 -29
  307. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +35 -27
  308. diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +49 -42
  309. diffusers/pipelines/stable_diffusion/safety_checker.py +2 -1
  310. diffusers/pipelines/stable_diffusion_3/__init__.py +54 -0
  311. diffusers/pipelines/stable_diffusion_3/pipeline_output.py +21 -0
  312. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +1140 -0
  313. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +1036 -0
  314. diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +1250 -0
  315. diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +29 -20
  316. diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +59 -58
  317. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +31 -25
  318. diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +38 -22
  319. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +30 -24
  320. diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +24 -23
  321. diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +107 -67
  322. diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +316 -69
  323. diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +10 -5
  324. diffusers/pipelines/stable_diffusion_safe/safety_checker.py +1 -1
  325. diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +98 -30
  326. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +121 -83
  327. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +161 -105
  328. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +142 -218
  329. diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +45 -29
  330. diffusers/pipelines/stable_diffusion_xl/watermark.py +9 -3
  331. diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +110 -57
  332. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +69 -39
  333. diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +105 -74
  334. diffusers/pipelines/text_to_video_synthesis/pipeline_output.py +3 -2
  335. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +29 -49
  336. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +32 -93
  337. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +37 -25
  338. diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +54 -40
  339. diffusers/pipelines/unclip/pipeline_unclip.py +6 -6
  340. diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +6 -6
  341. diffusers/pipelines/unidiffuser/modeling_text_decoder.py +1 -1
  342. diffusers/pipelines/unidiffuser/modeling_uvit.py +12 -12
  343. diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +29 -28
  344. diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py +5 -5
  345. diffusers/pipelines/wuerstchen/modeling_wuerstchen_common.py +5 -10
  346. diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +6 -8
  347. diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +4 -4
  348. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_combined.py +12 -12
  349. diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +15 -14
  350. diffusers/{models/dual_transformer_2d.py → quantizers/__init__.py} +2 -6
  351. diffusers/quantizers/auto.py +139 -0
  352. diffusers/quantizers/base.py +233 -0
  353. diffusers/quantizers/bitsandbytes/__init__.py +2 -0
  354. diffusers/quantizers/bitsandbytes/bnb_quantizer.py +561 -0
  355. diffusers/quantizers/bitsandbytes/utils.py +306 -0
  356. diffusers/quantizers/gguf/__init__.py +1 -0
  357. diffusers/quantizers/gguf/gguf_quantizer.py +159 -0
  358. diffusers/quantizers/gguf/utils.py +456 -0
  359. diffusers/quantizers/quantization_config.py +669 -0
  360. diffusers/quantizers/torchao/__init__.py +15 -0
  361. diffusers/quantizers/torchao/torchao_quantizer.py +292 -0
  362. diffusers/schedulers/__init__.py +12 -2
  363. diffusers/schedulers/deprecated/__init__.py +1 -1
  364. diffusers/schedulers/deprecated/scheduling_karras_ve.py +25 -25
  365. diffusers/schedulers/scheduling_amused.py +5 -5
  366. diffusers/schedulers/scheduling_consistency_decoder.py +11 -11
  367. diffusers/schedulers/scheduling_consistency_models.py +23 -25
  368. diffusers/schedulers/scheduling_cosine_dpmsolver_multistep.py +572 -0
  369. diffusers/schedulers/scheduling_ddim.py +27 -26
  370. diffusers/schedulers/scheduling_ddim_cogvideox.py +452 -0
  371. diffusers/schedulers/scheduling_ddim_flax.py +2 -1
  372. diffusers/schedulers/scheduling_ddim_inverse.py +16 -16
  373. diffusers/schedulers/scheduling_ddim_parallel.py +32 -31
  374. diffusers/schedulers/scheduling_ddpm.py +27 -30
  375. diffusers/schedulers/scheduling_ddpm_flax.py +7 -3
  376. diffusers/schedulers/scheduling_ddpm_parallel.py +33 -36
  377. diffusers/schedulers/scheduling_ddpm_wuerstchen.py +14 -14
  378. diffusers/schedulers/scheduling_deis_multistep.py +150 -50
  379. diffusers/schedulers/scheduling_dpm_cogvideox.py +489 -0
  380. diffusers/schedulers/scheduling_dpmsolver_multistep.py +221 -84
  381. diffusers/schedulers/scheduling_dpmsolver_multistep_flax.py +2 -2
  382. diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +158 -52
  383. diffusers/schedulers/scheduling_dpmsolver_sde.py +153 -34
  384. diffusers/schedulers/scheduling_dpmsolver_singlestep.py +275 -86
  385. diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py +81 -57
  386. diffusers/schedulers/scheduling_edm_euler.py +62 -39
  387. diffusers/schedulers/scheduling_euler_ancestral_discrete.py +30 -29
  388. diffusers/schedulers/scheduling_euler_discrete.py +255 -74
  389. diffusers/schedulers/scheduling_flow_match_euler_discrete.py +458 -0
  390. diffusers/schedulers/scheduling_flow_match_heun_discrete.py +320 -0
  391. diffusers/schedulers/scheduling_heun_discrete.py +174 -46
  392. diffusers/schedulers/scheduling_ipndm.py +9 -9
  393. diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +138 -29
  394. diffusers/schedulers/scheduling_k_dpm_2_discrete.py +132 -26
  395. diffusers/schedulers/scheduling_karras_ve_flax.py +6 -6
  396. diffusers/schedulers/scheduling_lcm.py +23 -29
  397. diffusers/schedulers/scheduling_lms_discrete.py +105 -28
  398. diffusers/schedulers/scheduling_pndm.py +20 -20
  399. diffusers/schedulers/scheduling_repaint.py +21 -21
  400. diffusers/schedulers/scheduling_sasolver.py +157 -60
  401. diffusers/schedulers/scheduling_sde_ve.py +19 -19
  402. diffusers/schedulers/scheduling_tcd.py +41 -36
  403. diffusers/schedulers/scheduling_unclip.py +19 -16
  404. diffusers/schedulers/scheduling_unipc_multistep.py +243 -47
  405. diffusers/schedulers/scheduling_utils.py +12 -5
  406. diffusers/schedulers/scheduling_utils_flax.py +1 -3
  407. diffusers/schedulers/scheduling_vq_diffusion.py +10 -10
  408. diffusers/training_utils.py +214 -30
  409. diffusers/utils/__init__.py +17 -1
  410. diffusers/utils/constants.py +3 -0
  411. diffusers/utils/doc_utils.py +1 -0
  412. diffusers/utils/dummy_pt_objects.py +592 -7
  413. diffusers/utils/dummy_torch_and_torchsde_objects.py +15 -0
  414. diffusers/utils/dummy_torch_and_transformers_and_sentencepiece_objects.py +47 -0
  415. diffusers/utils/dummy_torch_and_transformers_objects.py +1001 -71
  416. diffusers/utils/dynamic_modules_utils.py +34 -29
  417. diffusers/utils/export_utils.py +50 -6
  418. diffusers/utils/hub_utils.py +131 -17
  419. diffusers/utils/import_utils.py +210 -8
  420. diffusers/utils/loading_utils.py +118 -5
  421. diffusers/utils/logging.py +4 -2
  422. diffusers/utils/peft_utils.py +37 -7
  423. diffusers/utils/state_dict_utils.py +13 -2
  424. diffusers/utils/testing_utils.py +193 -11
  425. diffusers/utils/torch_utils.py +4 -0
  426. diffusers/video_processor.py +113 -0
  427. {diffusers-0.27.1.dist-info → diffusers-0.32.2.dist-info}/METADATA +82 -91
  428. diffusers-0.32.2.dist-info/RECORD +550 -0
  429. {diffusers-0.27.1.dist-info → diffusers-0.32.2.dist-info}/WHEEL +1 -1
  430. diffusers/loaders/autoencoder.py +0 -146
  431. diffusers/loaders/controlnet.py +0 -136
  432. diffusers/loaders/lora.py +0 -1349
  433. diffusers/models/prior_transformer.py +0 -12
  434. diffusers/models/t5_film_transformer.py +0 -70
  435. diffusers/models/transformer_2d.py +0 -25
  436. diffusers/models/transformer_temporal.py +0 -34
  437. diffusers/models/unet_1d.py +0 -26
  438. diffusers/models/unet_1d_blocks.py +0 -203
  439. diffusers/models/unet_2d.py +0 -27
  440. diffusers/models/unet_2d_blocks.py +0 -375
  441. diffusers/models/unet_2d_condition.py +0 -25
  442. diffusers-0.27.1.dist-info/RECORD +0 -399
  443. {diffusers-0.27.1.dist-info → diffusers-0.32.2.dist-info}/LICENSE +0 -0
  444. {diffusers-0.27.1.dist-info → diffusers-0.32.2.dist-info}/entry_points.txt +0 -0
  445. {diffusers-0.27.1.dist-info → diffusers-0.32.2.dist-info}/top_level.txt +0 -0
@@ -13,14 +13,39 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import math
16
+ from dataclasses import dataclass
16
17
  from typing import List, Optional, Tuple, Union
17
18
 
18
19
  import numpy as np
19
20
  import torch
20
21
 
21
22
  from ..configuration_utils import ConfigMixin, register_to_config
23
+ from ..utils import BaseOutput, is_scipy_available
22
24
  from ..utils.torch_utils import randn_tensor
23
- from .scheduling_utils import KarrasDiffusionSchedulers, SchedulerMixin, SchedulerOutput
25
+ from .scheduling_utils import KarrasDiffusionSchedulers, SchedulerMixin
26
+
27
+
28
+ if is_scipy_available():
29
+ import scipy.stats
30
+
31
+
32
+ @dataclass
33
+ # Copied from diffusers.schedulers.scheduling_ddpm.DDPMSchedulerOutput with DDPM->KDPM2AncestralDiscrete
34
+ class KDPM2AncestralDiscreteSchedulerOutput(BaseOutput):
35
+ """
36
+ Output class for the scheduler's `step` function output.
37
+
38
+ Args:
39
+ prev_sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images):
40
+ Computed sample `(x_{t-1})` of previous timestep. `prev_sample` should be used as next model input in the
41
+ denoising loop.
42
+ pred_original_sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images):
43
+ The predicted denoised sample `(x_{0})` based on the model output from the current timestep.
44
+ `pred_original_sample` can be used to preview progress or for guidance.
45
+ """
46
+
47
+ prev_sample: torch.Tensor
48
+ pred_original_sample: Optional[torch.Tensor] = None
24
49
 
25
50
 
26
51
  # Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar
@@ -58,7 +83,7 @@ def betas_for_alpha_bar(
58
83
  return math.exp(t * -12.0)
59
84
 
60
85
  else:
61
- raise ValueError(f"Unsupported alpha_tranform_type: {alpha_transform_type}")
86
+ raise ValueError(f"Unsupported alpha_transform_type: {alpha_transform_type}")
62
87
 
63
88
  betas = []
64
89
  for i in range(num_diffusion_timesteps):
@@ -91,6 +116,11 @@ class KDPM2AncestralDiscreteScheduler(SchedulerMixin, ConfigMixin):
91
116
  use_karras_sigmas (`bool`, *optional*, defaults to `False`):
92
117
  Whether to use Karras sigmas for step sizes in the noise schedule during the sampling process. If `True`,
93
118
  the sigmas are determined according to a sequence of noise levels {σi}.
119
+ use_exponential_sigmas (`bool`, *optional*, defaults to `False`):
120
+ Whether to use exponential sigmas for step sizes in the noise schedule during the sampling process.
121
+ use_beta_sigmas (`bool`, *optional*, defaults to `False`):
122
+ Whether to use beta sigmas for step sizes in the noise schedule during the sampling process. Refer to [Beta
123
+ Sampling is All You Need](https://huggingface.co/papers/2407.12173) for more information.
94
124
  prediction_type (`str`, defaults to `epsilon`, *optional*):
95
125
  Prediction type of the scheduler function; can be `epsilon` (predicts the noise of the diffusion process),
96
126
  `sample` (directly predicts the noisy sample`) or `v_prediction` (see section 2.4 of [Imagen
@@ -114,10 +144,18 @@ class KDPM2AncestralDiscreteScheduler(SchedulerMixin, ConfigMixin):
114
144
  beta_schedule: str = "linear",
115
145
  trained_betas: Optional[Union[np.ndarray, List[float]]] = None,
116
146
  use_karras_sigmas: Optional[bool] = False,
147
+ use_exponential_sigmas: Optional[bool] = False,
148
+ use_beta_sigmas: Optional[bool] = False,
117
149
  prediction_type: str = "epsilon",
118
150
  timestep_spacing: str = "linspace",
119
151
  steps_offset: int = 0,
120
152
  ):
153
+ if self.config.use_beta_sigmas and not is_scipy_available():
154
+ raise ImportError("Make sure to install scipy if you want to use beta sigmas.")
155
+ if sum([self.config.use_beta_sigmas, self.config.use_exponential_sigmas, self.config.use_karras_sigmas]) > 1:
156
+ raise ValueError(
157
+ "Only one of `config.use_beta_sigmas`, `config.use_exponential_sigmas`, `config.use_karras_sigmas` can be used."
158
+ )
121
159
  if trained_betas is not None:
122
160
  self.betas = torch.tensor(trained_betas, dtype=torch.float32)
123
161
  elif beta_schedule == "linear":
@@ -129,7 +167,7 @@ class KDPM2AncestralDiscreteScheduler(SchedulerMixin, ConfigMixin):
129
167
  # Glide cosine schedule
130
168
  self.betas = betas_for_alpha_bar(num_train_timesteps)
131
169
  else:
132
- raise NotImplementedError(f"{beta_schedule} does is not implemented for {self.__class__}")
170
+ raise NotImplementedError(f"{beta_schedule} is not implemented for {self.__class__}")
133
171
 
134
172
  self.alphas = 1.0 - self.betas
135
173
  self.alphas_cumprod = torch.cumprod(self.alphas, dim=0)
@@ -151,7 +189,7 @@ class KDPM2AncestralDiscreteScheduler(SchedulerMixin, ConfigMixin):
151
189
  @property
152
190
  def step_index(self):
153
191
  """
154
- The index counter for current timestep. It will increae 1 after each scheduler step.
192
+ The index counter for current timestep. It will increase 1 after each scheduler step.
155
193
  """
156
194
  return self._step_index
157
195
 
@@ -175,21 +213,21 @@ class KDPM2AncestralDiscreteScheduler(SchedulerMixin, ConfigMixin):
175
213
 
176
214
  def scale_model_input(
177
215
  self,
178
- sample: torch.FloatTensor,
179
- timestep: Union[float, torch.FloatTensor],
180
- ) -> torch.FloatTensor:
216
+ sample: torch.Tensor,
217
+ timestep: Union[float, torch.Tensor],
218
+ ) -> torch.Tensor:
181
219
  """
182
220
  Ensures interchangeability with schedulers that need to scale the denoising model input depending on the
183
221
  current timestep.
184
222
 
185
223
  Args:
186
- sample (`torch.FloatTensor`):
224
+ sample (`torch.Tensor`):
187
225
  The input sample.
188
226
  timestep (`int`, *optional*):
189
227
  The current timestep in the diffusion chain.
190
228
 
191
229
  Returns:
192
- `torch.FloatTensor`:
230
+ `torch.Tensor`:
193
231
  A scaled input sample.
194
232
  """
195
233
  if self.step_index is None:
@@ -250,6 +288,12 @@ class KDPM2AncestralDiscreteScheduler(SchedulerMixin, ConfigMixin):
250
288
  if self.config.use_karras_sigmas:
251
289
  sigmas = self._convert_to_karras(in_sigmas=sigmas, num_inference_steps=num_inference_steps)
252
290
  timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]).round()
291
+ elif self.config.use_exponential_sigmas:
292
+ sigmas = self._convert_to_exponential(in_sigmas=sigmas, num_inference_steps=num_inference_steps)
293
+ timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas])
294
+ elif self.config.use_beta_sigmas:
295
+ sigmas = self._convert_to_beta(in_sigmas=sigmas, num_inference_steps=num_inference_steps)
296
+ timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas])
253
297
 
254
298
  self.log_sigmas = torch.from_numpy(log_sigmas).to(device)
255
299
  sigmas = np.concatenate([sigmas, [0.0]]).astype(np.float32)
@@ -321,7 +365,7 @@ class KDPM2AncestralDiscreteScheduler(SchedulerMixin, ConfigMixin):
321
365
  return t
322
366
 
323
367
  # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_karras
324
- def _convert_to_karras(self, in_sigmas: torch.FloatTensor, num_inference_steps) -> torch.FloatTensor:
368
+ def _convert_to_karras(self, in_sigmas: torch.Tensor, num_inference_steps) -> torch.Tensor:
325
369
  """Constructs the noise schedule of Karras et al. (2022)."""
326
370
 
327
371
  # Hack to make sure that other schedulers which copy this function don't break
@@ -346,6 +390,60 @@ class KDPM2AncestralDiscreteScheduler(SchedulerMixin, ConfigMixin):
346
390
  sigmas = (max_inv_rho + ramp * (min_inv_rho - max_inv_rho)) ** rho
347
391
  return sigmas
348
392
 
393
+ # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_exponential
394
+ def _convert_to_exponential(self, in_sigmas: torch.Tensor, num_inference_steps: int) -> torch.Tensor:
395
+ """Constructs an exponential noise schedule."""
396
+
397
+ # Hack to make sure that other schedulers which copy this function don't break
398
+ # TODO: Add this logic to the other schedulers
399
+ if hasattr(self.config, "sigma_min"):
400
+ sigma_min = self.config.sigma_min
401
+ else:
402
+ sigma_min = None
403
+
404
+ if hasattr(self.config, "sigma_max"):
405
+ sigma_max = self.config.sigma_max
406
+ else:
407
+ sigma_max = None
408
+
409
+ sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item()
410
+ sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item()
411
+
412
+ sigmas = np.exp(np.linspace(math.log(sigma_max), math.log(sigma_min), num_inference_steps))
413
+ return sigmas
414
+
415
+ # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_beta
416
+ def _convert_to_beta(
417
+ self, in_sigmas: torch.Tensor, num_inference_steps: int, alpha: float = 0.6, beta: float = 0.6
418
+ ) -> torch.Tensor:
419
+ """From "Beta Sampling is All You Need" [arXiv:2407.12173] (Lee et. al, 2024)"""
420
+
421
+ # Hack to make sure that other schedulers which copy this function don't break
422
+ # TODO: Add this logic to the other schedulers
423
+ if hasattr(self.config, "sigma_min"):
424
+ sigma_min = self.config.sigma_min
425
+ else:
426
+ sigma_min = None
427
+
428
+ if hasattr(self.config, "sigma_max"):
429
+ sigma_max = self.config.sigma_max
430
+ else:
431
+ sigma_max = None
432
+
433
+ sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item()
434
+ sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item()
435
+
436
+ sigmas = np.array(
437
+ [
438
+ sigma_min + (ppf * (sigma_max - sigma_min))
439
+ for ppf in [
440
+ scipy.stats.beta.ppf(timestep, alpha, beta)
441
+ for timestep in 1 - np.linspace(0, 1, num_inference_steps)
442
+ ]
443
+ ]
444
+ )
445
+ return sigmas
446
+
349
447
  @property
350
448
  def state_in_first_order(self):
351
449
  return self.sample is None
@@ -376,32 +474,34 @@ class KDPM2AncestralDiscreteScheduler(SchedulerMixin, ConfigMixin):
376
474
 
377
475
  def step(
378
476
  self,
379
- model_output: Union[torch.FloatTensor, np.ndarray],
380
- timestep: Union[float, torch.FloatTensor],
381
- sample: Union[torch.FloatTensor, np.ndarray],
477
+ model_output: Union[torch.Tensor, np.ndarray],
478
+ timestep: Union[float, torch.Tensor],
479
+ sample: Union[torch.Tensor, np.ndarray],
382
480
  generator: Optional[torch.Generator] = None,
383
481
  return_dict: bool = True,
384
- ) -> Union[SchedulerOutput, Tuple]:
482
+ ) -> Union[KDPM2AncestralDiscreteSchedulerOutput, Tuple]:
385
483
  """
386
484
  Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion
387
485
  process from the learned model outputs (most often the predicted noise).
388
486
 
389
487
  Args:
390
- model_output (`torch.FloatTensor`):
488
+ model_output (`torch.Tensor`):
391
489
  The direct output from learned diffusion model.
392
490
  timestep (`float`):
393
491
  The current discrete timestep in the diffusion chain.
394
- sample (`torch.FloatTensor`):
492
+ sample (`torch.Tensor`):
395
493
  A current instance of a sample created by the diffusion process.
396
494
  generator (`torch.Generator`, *optional*):
397
495
  A random number generator.
398
496
  return_dict (`bool`):
399
- Whether or not to return a [`~schedulers.scheduling_utils.SchedulerOutput`] or tuple.
497
+ Whether or not to return a
498
+ [`~schedulers.scheduling_k_dpm_2_ancestral_discrete.KDPM2AncestralDiscreteSchedulerOutput`] or tuple.
400
499
 
401
500
  Returns:
402
- [`~schedulers.scheduling_utils.SchedulerOutput`] or `tuple`:
403
- If return_dict is `True`, [`~schedulers.scheduling_ddim.SchedulerOutput`] is returned, otherwise a
404
- tuple is returned where the first element is the sample tensor.
501
+ [`~schedulers.scheduling_k_dpm_2_ancestral_discrete.KDPM2AncestralDiscreteSchedulerOutput`] or `tuple`:
502
+ If return_dict is `True`,
503
+ [`~schedulers.scheduling_k_dpm_2_ancestral_discrete.KDPM2AncestralDiscreteSchedulerOutput`] is
504
+ returned, otherwise a tuple is returned where the first element is the sample tensor.
405
505
  """
406
506
  if self.step_index is None:
407
507
  self._init_step_index(timestep)
@@ -424,9 +524,6 @@ class KDPM2AncestralDiscreteScheduler(SchedulerMixin, ConfigMixin):
424
524
  gamma = 0
425
525
  sigma_hat = sigma * (gamma + 1) # Note: sigma_hat == sigma for now
426
526
 
427
- device = model_output.device
428
- noise = randn_tensor(model_output.shape, dtype=model_output.dtype, device=device, generator=generator)
429
-
430
527
  # 1. compute predicted original sample (x_0) from sigma-scaled predicted noise
431
528
  if self.config.prediction_type == "epsilon":
432
529
  sigma_input = sigma_hat if self.state_in_first_order else sigma_interpol
@@ -464,23 +561,31 @@ class KDPM2AncestralDiscreteScheduler(SchedulerMixin, ConfigMixin):
464
561
  self.sample = None
465
562
 
466
563
  prev_sample = sample + derivative * dt
564
+ noise = randn_tensor(
565
+ model_output.shape, dtype=model_output.dtype, device=model_output.device, generator=generator
566
+ )
467
567
  prev_sample = prev_sample + noise * sigma_up
468
568
 
469
569
  # upon completion increase step index by one
470
570
  self._step_index += 1
471
571
 
472
572
  if not return_dict:
473
- return (prev_sample,)
573
+ return (
574
+ prev_sample,
575
+ pred_original_sample,
576
+ )
474
577
 
475
- return SchedulerOutput(prev_sample=prev_sample)
578
+ return KDPM2AncestralDiscreteSchedulerOutput(
579
+ prev_sample=prev_sample, pred_original_sample=pred_original_sample
580
+ )
476
581
 
477
582
  # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler.add_noise
478
583
  def add_noise(
479
584
  self,
480
- original_samples: torch.FloatTensor,
481
- noise: torch.FloatTensor,
482
- timesteps: torch.FloatTensor,
483
- ) -> torch.FloatTensor:
585
+ original_samples: torch.Tensor,
586
+ noise: torch.Tensor,
587
+ timesteps: torch.Tensor,
588
+ ) -> torch.Tensor:
484
589
  # Make sure sigmas and timesteps have the same device and dtype as original_samples
485
590
  sigmas = self.sigmas.to(device=original_samples.device, dtype=original_samples.dtype)
486
591
  if original_samples.device.type == "mps" and torch.is_floating_point(timesteps):
@@ -494,7 +599,11 @@ class KDPM2AncestralDiscreteScheduler(SchedulerMixin, ConfigMixin):
494
599
  # self.begin_index is None when scheduler is used for training, or pipeline does not implement set_begin_index
495
600
  if self.begin_index is None:
496
601
  step_indices = [self.index_for_timestep(t, schedule_timesteps) for t in timesteps]
602
+ elif self.step_index is not None:
603
+ # add_noise is called after first denoising step (for inpainting)
604
+ step_indices = [self.step_index] * timesteps.shape[0]
497
605
  else:
606
+ # add noise is called before first denoising step to create initial latent(img2img)
498
607
  step_indices = [self.begin_index] * timesteps.shape[0]
499
608
 
500
609
  sigma = sigmas[step_indices].flatten()
@@ -13,13 +13,38 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import math
16
+ from dataclasses import dataclass
16
17
  from typing import List, Optional, Tuple, Union
17
18
 
18
19
  import numpy as np
19
20
  import torch
20
21
 
21
22
  from ..configuration_utils import ConfigMixin, register_to_config
22
- from .scheduling_utils import KarrasDiffusionSchedulers, SchedulerMixin, SchedulerOutput
23
+ from ..utils import BaseOutput, is_scipy_available
24
+ from .scheduling_utils import KarrasDiffusionSchedulers, SchedulerMixin
25
+
26
+
27
+ if is_scipy_available():
28
+ import scipy.stats
29
+
30
+
31
+ @dataclass
32
+ # Copied from diffusers.schedulers.scheduling_ddpm.DDPMSchedulerOutput with DDPM->KDPM2Discrete
33
+ class KDPM2DiscreteSchedulerOutput(BaseOutput):
34
+ """
35
+ Output class for the scheduler's `step` function output.
36
+
37
+ Args:
38
+ prev_sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images):
39
+ Computed sample `(x_{t-1})` of previous timestep. `prev_sample` should be used as next model input in the
40
+ denoising loop.
41
+ pred_original_sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images):
42
+ The predicted denoised sample `(x_{0})` based on the model output from the current timestep.
43
+ `pred_original_sample` can be used to preview progress or for guidance.
44
+ """
45
+
46
+ prev_sample: torch.Tensor
47
+ pred_original_sample: Optional[torch.Tensor] = None
23
48
 
24
49
 
25
50
  # Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar
@@ -57,7 +82,7 @@ def betas_for_alpha_bar(
57
82
  return math.exp(t * -12.0)
58
83
 
59
84
  else:
60
- raise ValueError(f"Unsupported alpha_tranform_type: {alpha_transform_type}")
85
+ raise ValueError(f"Unsupported alpha_transform_type: {alpha_transform_type}")
61
86
 
62
87
  betas = []
63
88
  for i in range(num_diffusion_timesteps):
@@ -90,6 +115,11 @@ class KDPM2DiscreteScheduler(SchedulerMixin, ConfigMixin):
90
115
  use_karras_sigmas (`bool`, *optional*, defaults to `False`):
91
116
  Whether to use Karras sigmas for step sizes in the noise schedule during the sampling process. If `True`,
92
117
  the sigmas are determined according to a sequence of noise levels {σi}.
118
+ use_exponential_sigmas (`bool`, *optional*, defaults to `False`):
119
+ Whether to use exponential sigmas for step sizes in the noise schedule during the sampling process.
120
+ use_beta_sigmas (`bool`, *optional*, defaults to `False`):
121
+ Whether to use beta sigmas for step sizes in the noise schedule during the sampling process. Refer to [Beta
122
+ Sampling is All You Need](https://huggingface.co/papers/2407.12173) for more information.
93
123
  prediction_type (`str`, defaults to `epsilon`, *optional*):
94
124
  Prediction type of the scheduler function; can be `epsilon` (predicts the noise of the diffusion process),
95
125
  `sample` (directly predicts the noisy sample`) or `v_prediction` (see section 2.4 of [Imagen
@@ -113,10 +143,18 @@ class KDPM2DiscreteScheduler(SchedulerMixin, ConfigMixin):
113
143
  beta_schedule: str = "linear",
114
144
  trained_betas: Optional[Union[np.ndarray, List[float]]] = None,
115
145
  use_karras_sigmas: Optional[bool] = False,
146
+ use_exponential_sigmas: Optional[bool] = False,
147
+ use_beta_sigmas: Optional[bool] = False,
116
148
  prediction_type: str = "epsilon",
117
149
  timestep_spacing: str = "linspace",
118
150
  steps_offset: int = 0,
119
151
  ):
152
+ if self.config.use_beta_sigmas and not is_scipy_available():
153
+ raise ImportError("Make sure to install scipy if you want to use beta sigmas.")
154
+ if sum([self.config.use_beta_sigmas, self.config.use_exponential_sigmas, self.config.use_karras_sigmas]) > 1:
155
+ raise ValueError(
156
+ "Only one of `config.use_beta_sigmas`, `config.use_exponential_sigmas`, `config.use_karras_sigmas` can be used."
157
+ )
120
158
  if trained_betas is not None:
121
159
  self.betas = torch.tensor(trained_betas, dtype=torch.float32)
122
160
  elif beta_schedule == "linear":
@@ -128,7 +166,7 @@ class KDPM2DiscreteScheduler(SchedulerMixin, ConfigMixin):
128
166
  # Glide cosine schedule
129
167
  self.betas = betas_for_alpha_bar(num_train_timesteps)
130
168
  else:
131
- raise NotImplementedError(f"{beta_schedule} does is not implemented for {self.__class__}")
169
+ raise NotImplementedError(f"{beta_schedule} is not implemented for {self.__class__}")
132
170
 
133
171
  self.alphas = 1.0 - self.betas
134
172
  self.alphas_cumprod = torch.cumprod(self.alphas, dim=0)
@@ -151,7 +189,7 @@ class KDPM2DiscreteScheduler(SchedulerMixin, ConfigMixin):
151
189
  @property
152
190
  def step_index(self):
153
191
  """
154
- The index counter for current timestep. It will increae 1 after each scheduler step.
192
+ The index counter for current timestep. It will increase 1 after each scheduler step.
155
193
  """
156
194
  return self._step_index
157
195
 
@@ -175,21 +213,21 @@ class KDPM2DiscreteScheduler(SchedulerMixin, ConfigMixin):
175
213
 
176
214
  def scale_model_input(
177
215
  self,
178
- sample: torch.FloatTensor,
179
- timestep: Union[float, torch.FloatTensor],
180
- ) -> torch.FloatTensor:
216
+ sample: torch.Tensor,
217
+ timestep: Union[float, torch.Tensor],
218
+ ) -> torch.Tensor:
181
219
  """
182
220
  Ensures interchangeability with schedulers that need to scale the denoising model input depending on the
183
221
  current timestep.
184
222
 
185
223
  Args:
186
- sample (`torch.FloatTensor`):
224
+ sample (`torch.Tensor`):
187
225
  The input sample.
188
226
  timestep (`int`, *optional*):
189
227
  The current timestep in the diffusion chain.
190
228
 
191
229
  Returns:
192
- `torch.FloatTensor`:
230
+ `torch.Tensor`:
193
231
  A scaled input sample.
194
232
  """
195
233
  if self.step_index is None:
@@ -249,6 +287,12 @@ class KDPM2DiscreteScheduler(SchedulerMixin, ConfigMixin):
249
287
  if self.config.use_karras_sigmas:
250
288
  sigmas = self._convert_to_karras(in_sigmas=sigmas, num_inference_steps=num_inference_steps)
251
289
  timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas]).round()
290
+ elif self.config.use_exponential_sigmas:
291
+ sigmas = self._convert_to_exponential(in_sigmas=sigmas, num_inference_steps=num_inference_steps)
292
+ timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas])
293
+ elif self.config.use_beta_sigmas:
294
+ sigmas = self._convert_to_beta(in_sigmas=sigmas, num_inference_steps=num_inference_steps)
295
+ timesteps = np.array([self._sigma_to_t(sigma, log_sigmas) for sigma in sigmas])
252
296
 
253
297
  self.log_sigmas = torch.from_numpy(log_sigmas).to(device=device)
254
298
  sigmas = np.concatenate([sigmas, [0.0]]).astype(np.float32)
@@ -334,7 +378,7 @@ class KDPM2DiscreteScheduler(SchedulerMixin, ConfigMixin):
334
378
  return t
335
379
 
336
380
  # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_karras
337
- def _convert_to_karras(self, in_sigmas: torch.FloatTensor, num_inference_steps) -> torch.FloatTensor:
381
+ def _convert_to_karras(self, in_sigmas: torch.Tensor, num_inference_steps) -> torch.Tensor:
338
382
  """Constructs the noise schedule of Karras et al. (2022)."""
339
383
 
340
384
  # Hack to make sure that other schedulers which copy this function don't break
@@ -359,31 +403,86 @@ class KDPM2DiscreteScheduler(SchedulerMixin, ConfigMixin):
359
403
  sigmas = (max_inv_rho + ramp * (min_inv_rho - max_inv_rho)) ** rho
360
404
  return sigmas
361
405
 
406
+ # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_exponential
407
+ def _convert_to_exponential(self, in_sigmas: torch.Tensor, num_inference_steps: int) -> torch.Tensor:
408
+ """Constructs an exponential noise schedule."""
409
+
410
+ # Hack to make sure that other schedulers which copy this function don't break
411
+ # TODO: Add this logic to the other schedulers
412
+ if hasattr(self.config, "sigma_min"):
413
+ sigma_min = self.config.sigma_min
414
+ else:
415
+ sigma_min = None
416
+
417
+ if hasattr(self.config, "sigma_max"):
418
+ sigma_max = self.config.sigma_max
419
+ else:
420
+ sigma_max = None
421
+
422
+ sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item()
423
+ sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item()
424
+
425
+ sigmas = np.exp(np.linspace(math.log(sigma_max), math.log(sigma_min), num_inference_steps))
426
+ return sigmas
427
+
428
+ # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_beta
429
+ def _convert_to_beta(
430
+ self, in_sigmas: torch.Tensor, num_inference_steps: int, alpha: float = 0.6, beta: float = 0.6
431
+ ) -> torch.Tensor:
432
+ """From "Beta Sampling is All You Need" [arXiv:2407.12173] (Lee et. al, 2024)"""
433
+
434
+ # Hack to make sure that other schedulers which copy this function don't break
435
+ # TODO: Add this logic to the other schedulers
436
+ if hasattr(self.config, "sigma_min"):
437
+ sigma_min = self.config.sigma_min
438
+ else:
439
+ sigma_min = None
440
+
441
+ if hasattr(self.config, "sigma_max"):
442
+ sigma_max = self.config.sigma_max
443
+ else:
444
+ sigma_max = None
445
+
446
+ sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item()
447
+ sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item()
448
+
449
+ sigmas = np.array(
450
+ [
451
+ sigma_min + (ppf * (sigma_max - sigma_min))
452
+ for ppf in [
453
+ scipy.stats.beta.ppf(timestep, alpha, beta)
454
+ for timestep in 1 - np.linspace(0, 1, num_inference_steps)
455
+ ]
456
+ ]
457
+ )
458
+ return sigmas
459
+
362
460
  def step(
363
461
  self,
364
- model_output: Union[torch.FloatTensor, np.ndarray],
365
- timestep: Union[float, torch.FloatTensor],
366
- sample: Union[torch.FloatTensor, np.ndarray],
462
+ model_output: Union[torch.Tensor, np.ndarray],
463
+ timestep: Union[float, torch.Tensor],
464
+ sample: Union[torch.Tensor, np.ndarray],
367
465
  return_dict: bool = True,
368
- ) -> Union[SchedulerOutput, Tuple]:
466
+ ) -> Union[KDPM2DiscreteSchedulerOutput, Tuple]:
369
467
  """
370
468
  Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion
371
469
  process from the learned model outputs (most often the predicted noise).
372
470
 
373
471
  Args:
374
- model_output (`torch.FloatTensor`):
472
+ model_output (`torch.Tensor`):
375
473
  The direct output from learned diffusion model.
376
474
  timestep (`float`):
377
475
  The current discrete timestep in the diffusion chain.
378
- sample (`torch.FloatTensor`):
476
+ sample (`torch.Tensor`):
379
477
  A current instance of a sample created by the diffusion process.
380
478
  return_dict (`bool`):
381
- Whether or not to return a [`~schedulers.scheduling_utils.SchedulerOutput`] or tuple.
479
+ Whether or not to return a [`~schedulers.scheduling_k_dpm_2_discrete.KDPM2DiscreteSchedulerOutput`] or
480
+ tuple.
382
481
 
383
482
  Returns:
384
- [`~schedulers.scheduling_utils.SchedulerOutput`] or `tuple`:
385
- If return_dict is `True`, [`~schedulers.scheduling_utils.SchedulerOutput`] is returned, otherwise a
386
- tuple is returned where the first element is the sample tensor.
483
+ [`~schedulers.scheduling_k_dpm_2_discrete.KDPM2DiscreteSchedulerOutput`] or `tuple`:
484
+ If return_dict is `True`, [`~schedulers.scheduling_k_dpm_2_discrete.KDPM2DiscreteSchedulerOutput`] is
485
+ returned, otherwise a tuple is returned where the first element is the sample tensor.
387
486
  """
388
487
  if self.step_index is None:
389
488
  self._init_step_index(timestep)
@@ -445,17 +544,20 @@ class KDPM2DiscreteScheduler(SchedulerMixin, ConfigMixin):
445
544
  prev_sample = sample + derivative * dt
446
545
 
447
546
  if not return_dict:
448
- return (prev_sample,)
547
+ return (
548
+ prev_sample,
549
+ pred_original_sample,
550
+ )
449
551
 
450
- return SchedulerOutput(prev_sample=prev_sample)
552
+ return KDPM2DiscreteSchedulerOutput(prev_sample=prev_sample, pred_original_sample=pred_original_sample)
451
553
 
452
554
  # Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler.add_noise
453
555
  def add_noise(
454
556
  self,
455
- original_samples: torch.FloatTensor,
456
- noise: torch.FloatTensor,
457
- timesteps: torch.FloatTensor,
458
- ) -> torch.FloatTensor:
557
+ original_samples: torch.Tensor,
558
+ noise: torch.Tensor,
559
+ timesteps: torch.Tensor,
560
+ ) -> torch.Tensor:
459
561
  # Make sure sigmas and timesteps have the same device and dtype as original_samples
460
562
  sigmas = self.sigmas.to(device=original_samples.device, dtype=original_samples.dtype)
461
563
  if original_samples.device.type == "mps" and torch.is_floating_point(timesteps):
@@ -469,7 +571,11 @@ class KDPM2DiscreteScheduler(SchedulerMixin, ConfigMixin):
469
571
  # self.begin_index is None when scheduler is used for training, or pipeline does not implement set_begin_index
470
572
  if self.begin_index is None:
471
573
  step_indices = [self.index_for_timestep(t, schedule_timesteps) for t in timesteps]
574
+ elif self.step_index is not None:
575
+ # add_noise is called after first denoising step (for inpainting)
576
+ step_indices = [self.step_index] * timesteps.shape[0]
472
577
  else:
578
+ # add noise is called before first denoising step to create initial latent(img2img)
473
579
  step_indices = [self.begin_index] * timesteps.shape[0]
474
580
 
475
581
  sigma = sigmas[step_indices].flatten()
@@ -176,10 +176,10 @@ class FlaxKarrasVeScheduler(FlaxSchedulerMixin, ConfigMixin):
176
176
 
177
177
  Args:
178
178
  state (`KarrasVeSchedulerState`): the `FlaxKarrasVeScheduler` state data class.
179
- model_output (`torch.FloatTensor` or `np.ndarray`): direct output from learned diffusion model.
179
+ model_output (`torch.Tensor` or `np.ndarray`): direct output from learned diffusion model.
180
180
  sigma_hat (`float`): TODO
181
181
  sigma_prev (`float`): TODO
182
- sample_hat (`torch.FloatTensor` or `np.ndarray`): TODO
182
+ sample_hat (`torch.Tensor` or `np.ndarray`): TODO
183
183
  return_dict (`bool`): option for returning tuple rather than FlaxKarrasVeOutput class
184
184
 
185
185
  Returns:
@@ -213,12 +213,12 @@ class FlaxKarrasVeScheduler(FlaxSchedulerMixin, ConfigMixin):
213
213
 
214
214
  Args:
215
215
  state (`KarrasVeSchedulerState`): the `FlaxKarrasVeScheduler` state data class.
216
- model_output (`torch.FloatTensor` or `np.ndarray`): direct output from learned diffusion model.
216
+ model_output (`torch.Tensor` or `np.ndarray`): direct output from learned diffusion model.
217
217
  sigma_hat (`float`): TODO
218
218
  sigma_prev (`float`): TODO
219
- sample_hat (`torch.FloatTensor` or `np.ndarray`): TODO
220
- sample_prev (`torch.FloatTensor` or `np.ndarray`): TODO
221
- derivative (`torch.FloatTensor` or `np.ndarray`): TODO
219
+ sample_hat (`torch.Tensor` or `np.ndarray`): TODO
220
+ sample_prev (`torch.Tensor` or `np.ndarray`): TODO
221
+ derivative (`torch.Tensor` or `np.ndarray`): TODO
222
222
  return_dict (`bool`): option for returning tuple rather than FlaxKarrasVeOutput class
223
223
 
224
224
  Returns: