diffusers 0.33.0__py3-none-any.whl → 0.34.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diffusers/__init__.py +48 -1
- diffusers/commands/__init__.py +1 -1
- diffusers/commands/diffusers_cli.py +1 -1
- diffusers/commands/env.py +1 -1
- diffusers/commands/fp16_safetensors.py +1 -1
- diffusers/dependency_versions_check.py +1 -1
- diffusers/dependency_versions_table.py +1 -1
- diffusers/experimental/rl/value_guided_sampling.py +1 -1
- diffusers/hooks/faster_cache.py +2 -2
- diffusers/hooks/group_offloading.py +128 -29
- diffusers/hooks/hooks.py +2 -2
- diffusers/hooks/layerwise_casting.py +3 -3
- diffusers/hooks/pyramid_attention_broadcast.py +1 -1
- diffusers/image_processor.py +7 -2
- diffusers/loaders/__init__.py +4 -0
- diffusers/loaders/ip_adapter.py +5 -14
- diffusers/loaders/lora_base.py +212 -111
- diffusers/loaders/lora_conversion_utils.py +275 -34
- diffusers/loaders/lora_pipeline.py +1554 -819
- diffusers/loaders/peft.py +52 -109
- diffusers/loaders/single_file.py +2 -2
- diffusers/loaders/single_file_model.py +20 -4
- diffusers/loaders/single_file_utils.py +225 -5
- diffusers/loaders/textual_inversion.py +3 -2
- diffusers/loaders/transformer_flux.py +1 -1
- diffusers/loaders/transformer_sd3.py +2 -2
- diffusers/loaders/unet.py +2 -16
- diffusers/loaders/unet_loader_utils.py +1 -1
- diffusers/loaders/utils.py +1 -1
- diffusers/models/__init__.py +15 -1
- diffusers/models/activations.py +5 -5
- diffusers/models/adapter.py +2 -3
- diffusers/models/attention.py +4 -4
- diffusers/models/attention_flax.py +10 -10
- diffusers/models/attention_processor.py +14 -10
- diffusers/models/auto_model.py +47 -10
- diffusers/models/autoencoders/__init__.py +1 -0
- diffusers/models/autoencoders/autoencoder_asym_kl.py +4 -4
- diffusers/models/autoencoders/autoencoder_dc.py +3 -3
- diffusers/models/autoencoders/autoencoder_kl.py +4 -4
- diffusers/models/autoencoders/autoencoder_kl_allegro.py +4 -4
- diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +6 -6
- diffusers/models/autoencoders/autoencoder_kl_cosmos.py +1108 -0
- diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py +2 -2
- diffusers/models/autoencoders/autoencoder_kl_ltx.py +3 -3
- diffusers/models/autoencoders/autoencoder_kl_magvit.py +4 -4
- diffusers/models/autoencoders/autoencoder_kl_mochi.py +3 -3
- diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +4 -4
- diffusers/models/autoencoders/autoencoder_kl_wan.py +256 -22
- diffusers/models/autoencoders/autoencoder_oobleck.py +1 -1
- diffusers/models/autoencoders/autoencoder_tiny.py +3 -3
- diffusers/models/autoencoders/consistency_decoder_vae.py +1 -1
- diffusers/models/autoencoders/vae.py +13 -2
- diffusers/models/autoencoders/vq_model.py +2 -2
- diffusers/models/cache_utils.py +1 -1
- diffusers/models/controlnet.py +1 -1
- diffusers/models/controlnet_flux.py +1 -1
- diffusers/models/controlnet_sd3.py +1 -1
- diffusers/models/controlnet_sparsectrl.py +1 -1
- diffusers/models/controlnets/__init__.py +1 -0
- diffusers/models/controlnets/controlnet.py +3 -3
- diffusers/models/controlnets/controlnet_flax.py +1 -1
- diffusers/models/controlnets/controlnet_flux.py +16 -15
- diffusers/models/controlnets/controlnet_hunyuan.py +2 -2
- diffusers/models/controlnets/controlnet_sana.py +290 -0
- diffusers/models/controlnets/controlnet_sd3.py +1 -1
- diffusers/models/controlnets/controlnet_sparsectrl.py +2 -2
- diffusers/models/controlnets/controlnet_union.py +1 -1
- diffusers/models/controlnets/controlnet_xs.py +7 -7
- diffusers/models/controlnets/multicontrolnet.py +4 -5
- diffusers/models/controlnets/multicontrolnet_union.py +5 -6
- diffusers/models/downsampling.py +2 -2
- diffusers/models/embeddings.py +10 -12
- diffusers/models/embeddings_flax.py +2 -2
- diffusers/models/lora.py +3 -3
- diffusers/models/modeling_utils.py +44 -14
- diffusers/models/normalization.py +4 -4
- diffusers/models/resnet.py +2 -2
- diffusers/models/resnet_flax.py +1 -1
- diffusers/models/transformers/__init__.py +5 -0
- diffusers/models/transformers/auraflow_transformer_2d.py +70 -24
- diffusers/models/transformers/cogvideox_transformer_3d.py +1 -1
- diffusers/models/transformers/consisid_transformer_3d.py +1 -1
- diffusers/models/transformers/dit_transformer_2d.py +2 -2
- diffusers/models/transformers/dual_transformer_2d.py +1 -1
- diffusers/models/transformers/hunyuan_transformer_2d.py +2 -2
- diffusers/models/transformers/latte_transformer_3d.py +4 -5
- diffusers/models/transformers/lumina_nextdit2d.py +2 -2
- diffusers/models/transformers/pixart_transformer_2d.py +3 -3
- diffusers/models/transformers/prior_transformer.py +1 -1
- diffusers/models/transformers/sana_transformer.py +8 -3
- diffusers/models/transformers/stable_audio_transformer.py +5 -9
- diffusers/models/transformers/t5_film_transformer.py +3 -3
- diffusers/models/transformers/transformer_2d.py +1 -1
- diffusers/models/transformers/transformer_allegro.py +1 -1
- diffusers/models/transformers/transformer_chroma.py +742 -0
- diffusers/models/transformers/transformer_cogview3plus.py +5 -10
- diffusers/models/transformers/transformer_cogview4.py +317 -25
- diffusers/models/transformers/transformer_cosmos.py +579 -0
- diffusers/models/transformers/transformer_flux.py +9 -11
- diffusers/models/transformers/transformer_hidream_image.py +942 -0
- diffusers/models/transformers/transformer_hunyuan_video.py +6 -8
- diffusers/models/transformers/transformer_hunyuan_video_framepack.py +416 -0
- diffusers/models/transformers/transformer_ltx.py +2 -2
- diffusers/models/transformers/transformer_lumina2.py +1 -1
- diffusers/models/transformers/transformer_mochi.py +1 -1
- diffusers/models/transformers/transformer_omnigen.py +2 -2
- diffusers/models/transformers/transformer_sd3.py +7 -7
- diffusers/models/transformers/transformer_temporal.py +1 -1
- diffusers/models/transformers/transformer_wan.py +24 -8
- diffusers/models/transformers/transformer_wan_vace.py +393 -0
- diffusers/models/unets/unet_1d.py +1 -1
- diffusers/models/unets/unet_1d_blocks.py +1 -1
- diffusers/models/unets/unet_2d.py +1 -1
- diffusers/models/unets/unet_2d_blocks.py +1 -1
- diffusers/models/unets/unet_2d_blocks_flax.py +8 -7
- diffusers/models/unets/unet_2d_condition.py +2 -2
- diffusers/models/unets/unet_2d_condition_flax.py +2 -2
- diffusers/models/unets/unet_3d_blocks.py +1 -1
- diffusers/models/unets/unet_3d_condition.py +3 -3
- diffusers/models/unets/unet_i2vgen_xl.py +3 -3
- diffusers/models/unets/unet_kandinsky3.py +1 -1
- diffusers/models/unets/unet_motion_model.py +2 -2
- diffusers/models/unets/unet_stable_cascade.py +1 -1
- diffusers/models/upsampling.py +2 -2
- diffusers/models/vae_flax.py +2 -2
- diffusers/models/vq_model.py +1 -1
- diffusers/pipelines/__init__.py +37 -6
- diffusers/pipelines/allegro/pipeline_allegro.py +11 -11
- diffusers/pipelines/amused/pipeline_amused.py +7 -6
- diffusers/pipelines/amused/pipeline_amused_img2img.py +6 -5
- diffusers/pipelines/amused/pipeline_amused_inpaint.py +6 -5
- diffusers/pipelines/animatediff/pipeline_animatediff.py +6 -6
- diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +6 -6
- diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +16 -15
- diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +6 -6
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +5 -5
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +5 -5
- diffusers/pipelines/audioldm/pipeline_audioldm.py +8 -7
- diffusers/pipelines/audioldm2/modeling_audioldm2.py +1 -1
- diffusers/pipelines/audioldm2/pipeline_audioldm2.py +23 -13
- diffusers/pipelines/aura_flow/pipeline_aura_flow.py +48 -11
- diffusers/pipelines/auto_pipeline.py +6 -7
- diffusers/pipelines/blip_diffusion/modeling_blip2.py +1 -1
- diffusers/pipelines/blip_diffusion/modeling_ctx_clip.py +2 -2
- diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +11 -10
- diffusers/pipelines/chroma/__init__.py +49 -0
- diffusers/pipelines/chroma/pipeline_chroma.py +949 -0
- diffusers/pipelines/chroma/pipeline_chroma_img2img.py +1034 -0
- diffusers/pipelines/chroma/pipeline_output.py +21 -0
- diffusers/pipelines/cogvideo/pipeline_cogvideox.py +8 -8
- diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +8 -8
- diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +8 -8
- diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +8 -8
- diffusers/pipelines/cogview3/pipeline_cogview3plus.py +9 -9
- diffusers/pipelines/cogview4/pipeline_cogview4.py +7 -7
- diffusers/pipelines/cogview4/pipeline_cogview4_control.py +7 -7
- diffusers/pipelines/consisid/consisid_utils.py +2 -2
- diffusers/pipelines/consisid/pipeline_consisid.py +8 -8
- diffusers/pipelines/consistency_models/pipeline_consistency_models.py +1 -1
- diffusers/pipelines/controlnet/pipeline_controlnet.py +7 -7
- diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +8 -8
- diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +7 -7
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +7 -7
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +14 -14
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +10 -6
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +13 -13
- diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +14 -14
- diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +5 -5
- diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +13 -13
- diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +1 -1
- diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +8 -8
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +7 -7
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +7 -7
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +12 -10
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +9 -7
- diffusers/pipelines/cosmos/__init__.py +54 -0
- diffusers/pipelines/cosmos/pipeline_cosmos2_text2image.py +673 -0
- diffusers/pipelines/cosmos/pipeline_cosmos2_video2world.py +792 -0
- diffusers/pipelines/cosmos/pipeline_cosmos_text2world.py +664 -0
- diffusers/pipelines/cosmos/pipeline_cosmos_video2world.py +826 -0
- diffusers/pipelines/cosmos/pipeline_output.py +40 -0
- diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py +5 -4
- diffusers/pipelines/ddim/pipeline_ddim.py +4 -4
- diffusers/pipelines/ddpm/pipeline_ddpm.py +1 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +10 -10
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +8 -8
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +5 -5
- diffusers/pipelines/deprecated/audio_diffusion/mel.py +1 -1
- diffusers/pipelines/deprecated/audio_diffusion/pipeline_audio_diffusion.py +3 -3
- diffusers/pipelines/deprecated/latent_diffusion_uncond/pipeline_latent_diffusion_uncond.py +1 -1
- diffusers/pipelines/deprecated/pndm/pipeline_pndm.py +2 -2
- diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +4 -3
- diffusers/pipelines/deprecated/score_sde_ve/pipeline_score_sde_ve.py +1 -1
- diffusers/pipelines/deprecated/spectrogram_diffusion/continuous_encoder.py +1 -1
- diffusers/pipelines/deprecated/spectrogram_diffusion/midi_utils.py +1 -1
- diffusers/pipelines/deprecated/spectrogram_diffusion/notes_encoder.py +1 -1
- diffusers/pipelines/deprecated/spectrogram_diffusion/pipeline_spectrogram_diffusion.py +1 -1
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +7 -7
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_onnx_stable_diffusion_inpaint_legacy.py +9 -9
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +10 -10
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +10 -8
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +5 -5
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +18 -18
- diffusers/pipelines/deprecated/stochastic_karras_ve/pipeline_stochastic_karras_ve.py +1 -1
- diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +2 -2
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +6 -6
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +5 -5
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +5 -5
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +5 -5
- diffusers/pipelines/deprecated/vq_diffusion/pipeline_vq_diffusion.py +1 -1
- diffusers/pipelines/dit/pipeline_dit.py +1 -1
- diffusers/pipelines/easyanimate/pipeline_easyanimate.py +4 -4
- diffusers/pipelines/easyanimate/pipeline_easyanimate_control.py +4 -4
- diffusers/pipelines/easyanimate/pipeline_easyanimate_inpaint.py +7 -6
- diffusers/pipelines/flux/modeling_flux.py +1 -1
- diffusers/pipelines/flux/pipeline_flux.py +10 -17
- diffusers/pipelines/flux/pipeline_flux_control.py +6 -6
- diffusers/pipelines/flux/pipeline_flux_control_img2img.py +6 -6
- diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +6 -6
- diffusers/pipelines/flux/pipeline_flux_controlnet.py +6 -6
- diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +30 -22
- diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +2 -1
- diffusers/pipelines/flux/pipeline_flux_fill.py +6 -6
- diffusers/pipelines/flux/pipeline_flux_img2img.py +39 -6
- diffusers/pipelines/flux/pipeline_flux_inpaint.py +11 -6
- diffusers/pipelines/flux/pipeline_flux_prior_redux.py +1 -1
- diffusers/pipelines/free_init_utils.py +2 -2
- diffusers/pipelines/free_noise_utils.py +3 -3
- diffusers/pipelines/hidream_image/__init__.py +47 -0
- diffusers/pipelines/hidream_image/pipeline_hidream_image.py +1026 -0
- diffusers/pipelines/hidream_image/pipeline_output.py +35 -0
- diffusers/pipelines/hunyuan_video/__init__.py +2 -0
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_skyreels_image2video.py +8 -8
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +8 -8
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_framepack.py +1114 -0
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_image2video.py +71 -15
- diffusers/pipelines/hunyuan_video/pipeline_output.py +19 -0
- diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +8 -8
- diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +10 -8
- diffusers/pipelines/kandinsky/pipeline_kandinsky.py +6 -6
- diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +34 -34
- diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +19 -26
- diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +7 -7
- diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +11 -11
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +6 -6
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +35 -35
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +6 -6
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +17 -39
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +17 -45
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +7 -7
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +10 -10
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +10 -10
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +7 -7
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +17 -38
- diffusers/pipelines/kolors/pipeline_kolors.py +10 -10
- diffusers/pipelines/kolors/pipeline_kolors_img2img.py +12 -12
- diffusers/pipelines/kolors/text_encoder.py +3 -3
- diffusers/pipelines/kolors/tokenizer.py +1 -1
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +2 -2
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +2 -2
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +1 -1
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion_superresolution.py +3 -3
- diffusers/pipelines/latte/pipeline_latte.py +12 -12
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +13 -13
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +17 -16
- diffusers/pipelines/ltx/__init__.py +4 -0
- diffusers/pipelines/ltx/modeling_latent_upsampler.py +188 -0
- diffusers/pipelines/ltx/pipeline_ltx.py +51 -6
- diffusers/pipelines/ltx/pipeline_ltx_condition.py +107 -29
- diffusers/pipelines/ltx/pipeline_ltx_image2video.py +50 -6
- diffusers/pipelines/ltx/pipeline_ltx_latent_upsample.py +277 -0
- diffusers/pipelines/lumina/pipeline_lumina.py +13 -13
- diffusers/pipelines/lumina2/pipeline_lumina2.py +10 -10
- diffusers/pipelines/marigold/marigold_image_processing.py +2 -2
- diffusers/pipelines/mochi/pipeline_mochi.py +6 -6
- diffusers/pipelines/musicldm/pipeline_musicldm.py +16 -13
- diffusers/pipelines/omnigen/pipeline_omnigen.py +13 -11
- diffusers/pipelines/omnigen/processor_omnigen.py +8 -3
- diffusers/pipelines/onnx_utils.py +15 -2
- diffusers/pipelines/pag/pag_utils.py +2 -2
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +12 -8
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +7 -7
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +10 -6
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +14 -14
- diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +8 -8
- diffusers/pipelines/pag/pipeline_pag_kolors.py +10 -10
- diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +11 -11
- diffusers/pipelines/pag/pipeline_pag_sana.py +18 -12
- diffusers/pipelines/pag/pipeline_pag_sd.py +8 -8
- diffusers/pipelines/pag/pipeline_pag_sd_3.py +7 -7
- diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py +7 -7
- diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +6 -6
- diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +5 -5
- diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py +8 -8
- diffusers/pipelines/pag/pipeline_pag_sd_xl.py +16 -15
- diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +18 -17
- diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +12 -12
- diffusers/pipelines/paint_by_example/image_encoder.py +1 -1
- diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +8 -7
- diffusers/pipelines/pia/pipeline_pia.py +8 -6
- diffusers/pipelines/pipeline_flax_utils.py +3 -4
- diffusers/pipelines/pipeline_loading_utils.py +89 -13
- diffusers/pipelines/pipeline_utils.py +105 -33
- diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +11 -11
- diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +11 -11
- diffusers/pipelines/sana/__init__.py +4 -0
- diffusers/pipelines/sana/pipeline_sana.py +23 -21
- diffusers/pipelines/sana/pipeline_sana_controlnet.py +1106 -0
- diffusers/pipelines/sana/pipeline_sana_sprint.py +23 -19
- diffusers/pipelines/sana/pipeline_sana_sprint_img2img.py +981 -0
- diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +7 -6
- diffusers/pipelines/shap_e/camera.py +1 -1
- diffusers/pipelines/shap_e/pipeline_shap_e.py +1 -1
- diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +1 -1
- diffusers/pipelines/shap_e/renderer.py +3 -3
- diffusers/pipelines/stable_audio/modeling_stable_audio.py +1 -1
- diffusers/pipelines/stable_audio/pipeline_stable_audio.py +5 -5
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +8 -8
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +13 -13
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +9 -9
- diffusers/pipelines/stable_diffusion/__init__.py +0 -7
- diffusers/pipelines/stable_diffusion/clip_image_project_model.py +1 -1
- diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +11 -4
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_img2img.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_inpaint.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +10 -10
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py +10 -10
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +10 -10
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +9 -9
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +8 -8
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +4 -4
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +7 -7
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +5 -5
- diffusers/pipelines/stable_diffusion/safety_checker.py +1 -1
- diffusers/pipelines/stable_diffusion/safety_checker_flax.py +1 -1
- diffusers/pipelines/stable_diffusion/stable_unclip_image_normalizer.py +1 -1
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +7 -7
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +7 -7
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +7 -7
- diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +12 -8
- diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +15 -9
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +11 -9
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +11 -9
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +18 -12
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +11 -8
- diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +11 -8
- diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +15 -12
- diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +8 -6
- diffusers/pipelines/stable_diffusion_safe/safety_checker.py +1 -1
- diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +15 -11
- diffusers/pipelines/stable_diffusion_xl/pipeline_flax_stable_diffusion_xl.py +1 -1
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +16 -15
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +18 -17
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +12 -12
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +16 -15
- diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +3 -3
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +12 -12
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +18 -17
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +12 -7
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +12 -7
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +15 -13
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +24 -21
- diffusers/pipelines/unclip/pipeline_unclip.py +4 -3
- diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +4 -3
- diffusers/pipelines/unclip/text_proj.py +2 -2
- diffusers/pipelines/unidiffuser/modeling_text_decoder.py +2 -2
- diffusers/pipelines/unidiffuser/modeling_uvit.py +1 -1
- diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +8 -7
- diffusers/pipelines/visualcloze/__init__.py +52 -0
- diffusers/pipelines/visualcloze/pipeline_visualcloze_combined.py +444 -0
- diffusers/pipelines/visualcloze/pipeline_visualcloze_generation.py +952 -0
- diffusers/pipelines/visualcloze/visualcloze_utils.py +251 -0
- diffusers/pipelines/wan/__init__.py +2 -0
- diffusers/pipelines/wan/pipeline_wan.py +17 -12
- diffusers/pipelines/wan/pipeline_wan_i2v.py +42 -20
- diffusers/pipelines/wan/pipeline_wan_vace.py +976 -0
- diffusers/pipelines/wan/pipeline_wan_video2video.py +18 -18
- diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py +1 -1
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_diffnext.py +1 -1
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +1 -1
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +8 -8
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_combined.py +16 -15
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +6 -6
- diffusers/quantizers/__init__.py +179 -1
- diffusers/quantizers/base.py +6 -1
- diffusers/quantizers/bitsandbytes/bnb_quantizer.py +4 -0
- diffusers/quantizers/bitsandbytes/utils.py +10 -7
- diffusers/quantizers/gguf/gguf_quantizer.py +13 -4
- diffusers/quantizers/gguf/utils.py +16 -13
- diffusers/quantizers/quantization_config.py +18 -16
- diffusers/quantizers/quanto/quanto_quantizer.py +4 -0
- diffusers/quantizers/torchao/torchao_quantizer.py +5 -1
- diffusers/schedulers/__init__.py +3 -1
- diffusers/schedulers/deprecated/scheduling_karras_ve.py +4 -3
- diffusers/schedulers/deprecated/scheduling_sde_vp.py +1 -1
- diffusers/schedulers/scheduling_consistency_models.py +1 -1
- diffusers/schedulers/scheduling_cosine_dpmsolver_multistep.py +10 -5
- diffusers/schedulers/scheduling_ddim.py +8 -8
- diffusers/schedulers/scheduling_ddim_cogvideox.py +5 -5
- diffusers/schedulers/scheduling_ddim_flax.py +6 -6
- diffusers/schedulers/scheduling_ddim_inverse.py +6 -6
- diffusers/schedulers/scheduling_ddim_parallel.py +22 -22
- diffusers/schedulers/scheduling_ddpm.py +9 -9
- diffusers/schedulers/scheduling_ddpm_flax.py +7 -7
- diffusers/schedulers/scheduling_ddpm_parallel.py +18 -18
- diffusers/schedulers/scheduling_ddpm_wuerstchen.py +2 -2
- diffusers/schedulers/scheduling_deis_multistep.py +8 -8
- diffusers/schedulers/scheduling_dpm_cogvideox.py +5 -5
- diffusers/schedulers/scheduling_dpmsolver_multistep.py +12 -12
- diffusers/schedulers/scheduling_dpmsolver_multistep_flax.py +22 -20
- diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +11 -11
- diffusers/schedulers/scheduling_dpmsolver_sde.py +2 -2
- diffusers/schedulers/scheduling_dpmsolver_singlestep.py +13 -13
- diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py +13 -8
- diffusers/schedulers/scheduling_edm_euler.py +20 -11
- diffusers/schedulers/scheduling_euler_ancestral_discrete.py +3 -3
- diffusers/schedulers/scheduling_euler_discrete.py +3 -3
- diffusers/schedulers/scheduling_euler_discrete_flax.py +3 -3
- diffusers/schedulers/scheduling_flow_match_euler_discrete.py +20 -5
- diffusers/schedulers/scheduling_flow_match_heun_discrete.py +1 -1
- diffusers/schedulers/scheduling_flow_match_lcm.py +561 -0
- diffusers/schedulers/scheduling_heun_discrete.py +2 -2
- diffusers/schedulers/scheduling_ipndm.py +2 -2
- diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +2 -2
- diffusers/schedulers/scheduling_k_dpm_2_discrete.py +2 -2
- diffusers/schedulers/scheduling_karras_ve_flax.py +5 -5
- diffusers/schedulers/scheduling_lcm.py +3 -3
- diffusers/schedulers/scheduling_lms_discrete.py +2 -2
- diffusers/schedulers/scheduling_lms_discrete_flax.py +1 -1
- diffusers/schedulers/scheduling_pndm.py +4 -4
- diffusers/schedulers/scheduling_pndm_flax.py +4 -4
- diffusers/schedulers/scheduling_repaint.py +9 -9
- diffusers/schedulers/scheduling_sasolver.py +15 -15
- diffusers/schedulers/scheduling_scm.py +1 -1
- diffusers/schedulers/scheduling_sde_ve.py +1 -1
- diffusers/schedulers/scheduling_sde_ve_flax.py +2 -2
- diffusers/schedulers/scheduling_tcd.py +3 -3
- diffusers/schedulers/scheduling_unclip.py +5 -5
- diffusers/schedulers/scheduling_unipc_multistep.py +11 -11
- diffusers/schedulers/scheduling_utils.py +1 -1
- diffusers/schedulers/scheduling_utils_flax.py +1 -1
- diffusers/schedulers/scheduling_vq_diffusion.py +1 -1
- diffusers/training_utils.py +13 -5
- diffusers/utils/__init__.py +5 -0
- diffusers/utils/accelerate_utils.py +1 -1
- diffusers/utils/doc_utils.py +1 -1
- diffusers/utils/dummy_pt_objects.py +120 -0
- diffusers/utils/dummy_torch_and_transformers_objects.py +225 -0
- diffusers/utils/dynamic_modules_utils.py +21 -3
- diffusers/utils/export_utils.py +1 -1
- diffusers/utils/import_utils.py +81 -18
- diffusers/utils/logging.py +1 -1
- diffusers/utils/outputs.py +2 -1
- diffusers/utils/peft_utils.py +91 -8
- diffusers/utils/state_dict_utils.py +20 -3
- diffusers/utils/testing_utils.py +59 -7
- diffusers/utils/torch_utils.py +25 -5
- diffusers/video_processor.py +2 -2
- {diffusers-0.33.0.dist-info → diffusers-0.34.0.dist-info}/METADATA +3 -3
- diffusers-0.34.0.dist-info/RECORD +639 -0
- diffusers-0.33.0.dist-info/RECORD +0 -608
- {diffusers-0.33.0.dist-info → diffusers-0.34.0.dist-info}/LICENSE +0 -0
- {diffusers-0.33.0.dist-info → diffusers-0.34.0.dist-info}/WHEEL +0 -0
- {diffusers-0.33.0.dist-info → diffusers-0.34.0.dist-info}/entry_points.txt +0 -0
- {diffusers-0.33.0.dist-info → diffusers-0.34.0.dist-info}/top_level.txt +0 -0
@@ -75,7 +75,7 @@ class QuantizationConfigMixin:
|
|
75
75
|
Args:
|
76
76
|
config_dict (`Dict[str, Any]`):
|
77
77
|
Dictionary that will be used to instantiate the configuration object.
|
78
|
-
return_unused_kwargs (`bool
|
78
|
+
return_unused_kwargs (`bool`, *optional*, defaults to `False`):
|
79
79
|
Whether or not to return a list of unused keyword arguments. Used for `from_pretrained` method in
|
80
80
|
`PreTrainedModel`.
|
81
81
|
kwargs (`Dict[str, Any]`):
|
@@ -179,7 +179,7 @@ class BitsAndBytesConfig(QuantizationConfigMixin):
|
|
179
179
|
This is a wrapper class about all possible attributes and features that you can play with a model that has been
|
180
180
|
loaded using `bitsandbytes`.
|
181
181
|
|
182
|
-
This replaces `load_in_8bit` or `load_in_4bit`therefore both options are mutually exclusive.
|
182
|
+
This replaces `load_in_8bit` or `load_in_4bit` therefore both options are mutually exclusive.
|
183
183
|
|
184
184
|
Currently only supports `LLM.int8()`, `FP4`, and `NF4` quantization. If more methods are added to `bitsandbytes`,
|
185
185
|
then more arguments will be added to this class.
|
@@ -192,10 +192,10 @@ class BitsAndBytesConfig(QuantizationConfigMixin):
|
|
192
192
|
`bitsandbytes`.
|
193
193
|
llm_int8_threshold (`float`, *optional*, defaults to 6.0):
|
194
194
|
This corresponds to the outlier threshold for outlier detection as described in `LLM.int8() : 8-bit Matrix
|
195
|
-
Multiplication for Transformers at Scale` paper: https://
|
196
|
-
that is above this threshold will be considered an outlier and the operation on those values will be
|
197
|
-
in fp16. Values are usually normally distributed, that is, most values are in the range [-3.5, 3.5],
|
198
|
-
there are some exceptional systematic outliers that are very differently distributed for large models.
|
195
|
+
Multiplication for Transformers at Scale` paper: https://huggingface.co/papers/2208.07339 Any hidden states
|
196
|
+
value that is above this threshold will be considered an outlier and the operation on those values will be
|
197
|
+
done in fp16. Values are usually normally distributed, that is, most values are in the range [-3.5, 3.5],
|
198
|
+
but there are some exceptional systematic outliers that are very differently distributed for large models.
|
199
199
|
These outliers are often in the interval [-60, -6] or [6, 60]. Int8 quantization works well for values of
|
200
200
|
magnitude ~5, but beyond that, there is a significant performance penalty. A good default threshold is 6,
|
201
201
|
but a lower threshold might be needed for more unstable models (small models, fine-tuning).
|
@@ -493,7 +493,7 @@ class TorchAoConfig(QuantizationConfigMixin):
|
|
493
493
|
TORCHAO_QUANT_TYPE_METHODS = self._get_torchao_quant_type_to_method()
|
494
494
|
if self.quant_type not in TORCHAO_QUANT_TYPE_METHODS.keys():
|
495
495
|
is_floating_quant_type = self.quant_type.startswith("float") or self.quant_type.startswith("fp")
|
496
|
-
if is_floating_quant_type and not self.
|
496
|
+
if is_floating_quant_type and not self._is_xpu_or_cuda_capability_atleast_8_9():
|
497
497
|
raise ValueError(
|
498
498
|
f"Requested quantization type: {self.quant_type} is not supported on GPUs with CUDA capability <= 8.9. You "
|
499
499
|
f"can check the CUDA capability of your GPU using `torch.cuda.get_device_capability()`."
|
@@ -645,7 +645,7 @@ class TorchAoConfig(QuantizationConfigMixin):
|
|
645
645
|
QUANTIZATION_TYPES.update(INT8_QUANTIZATION_TYPES)
|
646
646
|
QUANTIZATION_TYPES.update(UINTX_QUANTIZATION_DTYPES)
|
647
647
|
|
648
|
-
if cls.
|
648
|
+
if cls._is_xpu_or_cuda_capability_atleast_8_9():
|
649
649
|
QUANTIZATION_TYPES.update(FLOATX_QUANTIZATION_TYPES)
|
650
650
|
|
651
651
|
return QUANTIZATION_TYPES
|
@@ -655,14 +655,16 @@ class TorchAoConfig(QuantizationConfigMixin):
|
|
655
655
|
)
|
656
656
|
|
657
657
|
@staticmethod
|
658
|
-
def
|
659
|
-
if
|
660
|
-
|
661
|
-
|
662
|
-
|
663
|
-
|
664
|
-
|
665
|
-
|
658
|
+
def _is_xpu_or_cuda_capability_atleast_8_9() -> bool:
|
659
|
+
if torch.cuda.is_available():
|
660
|
+
major, minor = torch.cuda.get_device_capability()
|
661
|
+
if major == 8:
|
662
|
+
return minor >= 9
|
663
|
+
return major >= 9
|
664
|
+
elif torch.xpu.is_available():
|
665
|
+
return True
|
666
|
+
else:
|
667
|
+
raise RuntimeError("TorchAO requires a CUDA compatible GPU or Intel XPU and installation of PyTorch.")
|
666
668
|
|
667
669
|
def get_apply_tensor_subclass(self):
|
668
670
|
TORCHAO_QUANT_TYPE_METHODS = self._get_torchao_quant_type_to_method()
|
@@ -262,7 +262,7 @@ class TorchAoHfQuantizer(DiffusersQuantizer):
|
|
262
262
|
**kwargs,
|
263
263
|
):
|
264
264
|
r"""
|
265
|
-
Each nn.Linear layer that needs to be quantized is
|
265
|
+
Each nn.Linear layer that needs to be quantized is processed here. First, we set the value the weight tensor,
|
266
266
|
then we move it to the target device. Finally, we quantize the module.
|
267
267
|
"""
|
268
268
|
module, tensor_name = get_module_from_name(model, param_name)
|
@@ -335,3 +335,7 @@ class TorchAoHfQuantizer(DiffusersQuantizer):
|
|
335
335
|
@property
|
336
336
|
def is_trainable(self):
|
337
337
|
return self.quantization_config.quant_type.startswith("int8")
|
338
|
+
|
339
|
+
@property
|
340
|
+
def is_compileable(self) -> bool:
|
341
|
+
return True
|
diffusers/schedulers/__init__.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -60,6 +60,7 @@ else:
|
|
60
60
|
_import_structure["scheduling_euler_discrete"] = ["EulerDiscreteScheduler"]
|
61
61
|
_import_structure["scheduling_flow_match_euler_discrete"] = ["FlowMatchEulerDiscreteScheduler"]
|
62
62
|
_import_structure["scheduling_flow_match_heun_discrete"] = ["FlowMatchHeunDiscreteScheduler"]
|
63
|
+
_import_structure["scheduling_flow_match_lcm"] = ["FlowMatchLCMScheduler"]
|
63
64
|
_import_structure["scheduling_heun_discrete"] = ["HeunDiscreteScheduler"]
|
64
65
|
_import_structure["scheduling_ipndm"] = ["IPNDMScheduler"]
|
65
66
|
_import_structure["scheduling_k_dpm_2_ancestral_discrete"] = ["KDPM2AncestralDiscreteScheduler"]
|
@@ -161,6 +162,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|
161
162
|
from .scheduling_euler_discrete import EulerDiscreteScheduler
|
162
163
|
from .scheduling_flow_match_euler_discrete import FlowMatchEulerDiscreteScheduler
|
163
164
|
from .scheduling_flow_match_heun_discrete import FlowMatchHeunDiscreteScheduler
|
165
|
+
from .scheduling_flow_match_lcm import FlowMatchLCMScheduler
|
164
166
|
from .scheduling_heun_discrete import HeunDiscreteScheduler
|
165
167
|
from .scheduling_ipndm import IPNDMScheduler
|
166
168
|
from .scheduling_k_dpm_2_ancestral_discrete import KDPM2AncestralDiscreteScheduler
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 NVIDIA and The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -55,8 +55,9 @@ class KarrasVeScheduler(SchedulerMixin, ConfigMixin):
|
|
55
55
|
|
56
56
|
<Tip>
|
57
57
|
|
58
|
-
For more details on the parameters, see [Appendix E](https://
|
59
|
-
to find the optimal `{s_noise, s_churn, s_min, s_max}` for a specific model are described in Table 5 of
|
58
|
+
For more details on the parameters, see [Appendix E](https://huggingface.co/papers/2206.00364). The grid search
|
59
|
+
values used to find the optimal `{s_noise, s_churn, s_min, s_max}` for a specific model are described in Table 5 of
|
60
|
+
the paper.
|
60
61
|
|
61
62
|
</Tip>
|
62
63
|
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 Google Brain and The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 TSAIL Team and The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -30,7 +30,7 @@ class CosineDPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
|
|
30
30
|
Implements a variant of `DPMSolverMultistepScheduler` with cosine schedule, proposed by Nichol and Dhariwal (2021).
|
31
31
|
This scheduler was used in Stable Audio Open [1].
|
32
32
|
|
33
|
-
[1] Evans, Parker, et al. "Stable Audio Open" https://
|
33
|
+
[1] Evans, Parker, et al. "Stable Audio Open" https://huggingface.co/papers/2407.14358
|
34
34
|
|
35
35
|
This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic
|
36
36
|
methods the library implements for all schedulers such as loading and saving.
|
@@ -44,8 +44,8 @@ class CosineDPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
|
|
44
44
|
The standard deviation of the data distribution. This is set to 1.0 in Stable Audio Open [1].
|
45
45
|
sigma_schedule (`str`, *optional*, defaults to `exponential`):
|
46
46
|
Sigma schedule to compute the `sigmas`. By default, we the schedule introduced in the EDM paper
|
47
|
-
(https://
|
48
|
-
incorporated in this model: https://huggingface.co/stabilityai/cosxl.
|
47
|
+
(https://huggingface.co/papers/2206.00364). Other acceptable value is "exponential". The exponential
|
48
|
+
schedule was incorporated in this model: https://huggingface.co/stabilityai/cosxl.
|
49
49
|
num_train_timesteps (`int`, defaults to 1000):
|
50
50
|
The number of diffusion steps to train the model.
|
51
51
|
solver_order (`int`, defaults to 2):
|
@@ -144,7 +144,7 @@ class CosineDPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
|
|
144
144
|
|
145
145
|
# Copied from diffusers.schedulers.scheduling_edm_euler.EDMEulerScheduler.precondition_inputs
|
146
146
|
def precondition_inputs(self, sample, sigma):
|
147
|
-
c_in =
|
147
|
+
c_in = self._get_conditioning_c_in(sigma)
|
148
148
|
scaled_sample = sample * c_in
|
149
149
|
return scaled_sample
|
150
150
|
|
@@ -568,5 +568,10 @@ class CosineDPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
|
|
568
568
|
noisy_samples = original_samples + noise * sigma
|
569
569
|
return noisy_samples
|
570
570
|
|
571
|
+
# Copied from diffusers.schedulers.scheduling_edm_euler.EDMEulerScheduler._get_conditioning_c_in
|
572
|
+
def _get_conditioning_c_in(self, sigma):
|
573
|
+
c_in = 1 / ((sigma**2 + self.config.sigma_data**2) ** 0.5)
|
574
|
+
return c_in
|
575
|
+
|
571
576
|
def __len__(self):
|
572
577
|
return self.config.num_train_timesteps
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 Stanford University Team and The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -94,7 +94,7 @@ def betas_for_alpha_bar(
|
|
94
94
|
|
95
95
|
def rescale_zero_terminal_snr(betas):
|
96
96
|
"""
|
97
|
-
Rescales betas to have zero terminal SNR Based on https://
|
97
|
+
Rescales betas to have zero terminal SNR Based on https://huggingface.co/papers/2305.08891 (Algorithm 1)
|
98
98
|
|
99
99
|
|
100
100
|
Args:
|
@@ -269,7 +269,7 @@ class DDIMScheduler(SchedulerMixin, ConfigMixin):
|
|
269
269
|
pixels from saturation at each step. We find that dynamic thresholding results in significantly better
|
270
270
|
photorealism as well as better image-text alignment, especially when using very large guidance weights."
|
271
271
|
|
272
|
-
https://
|
272
|
+
https://huggingface.co/papers/2205.11487
|
273
273
|
"""
|
274
274
|
dtype = sample.dtype
|
275
275
|
batch_size, channels, *remaining_dims = sample.shape
|
@@ -312,7 +312,7 @@ class DDIMScheduler(SchedulerMixin, ConfigMixin):
|
|
312
312
|
|
313
313
|
self.num_inference_steps = num_inference_steps
|
314
314
|
|
315
|
-
# "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://
|
315
|
+
# "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://huggingface.co/papers/2305.08891
|
316
316
|
if self.config.timestep_spacing == "linspace":
|
317
317
|
timesteps = (
|
318
318
|
np.linspace(0, self.config.num_train_timesteps - 1, num_inference_steps)
|
@@ -387,7 +387,7 @@ class DDIMScheduler(SchedulerMixin, ConfigMixin):
|
|
387
387
|
"Number of inference steps is 'None', you need to run 'set_timesteps' after creating the scheduler"
|
388
388
|
)
|
389
389
|
|
390
|
-
# See formulas (12) and (16) of DDIM paper https://
|
390
|
+
# See formulas (12) and (16) of DDIM paper https://huggingface.co/papers/2010.02502
|
391
391
|
# Ideally, read DDIM paper in-detail understanding
|
392
392
|
|
393
393
|
# Notation (<variable name> -> <name in paper>
|
@@ -408,7 +408,7 @@ class DDIMScheduler(SchedulerMixin, ConfigMixin):
|
|
408
408
|
beta_prod_t = 1 - alpha_prod_t
|
409
409
|
|
410
410
|
# 3. compute predicted original sample from predicted noise also called
|
411
|
-
# "predicted x_0" of formula (12) from https://
|
411
|
+
# "predicted x_0" of formula (12) from https://huggingface.co/papers/2010.02502
|
412
412
|
if self.config.prediction_type == "epsilon":
|
413
413
|
pred_original_sample = (sample - beta_prod_t ** (0.5) * model_output) / alpha_prod_t ** (0.5)
|
414
414
|
pred_epsilon = model_output
|
@@ -441,10 +441,10 @@ class DDIMScheduler(SchedulerMixin, ConfigMixin):
|
|
441
441
|
# the pred_epsilon is always re-derived from the clipped x_0 in Glide
|
442
442
|
pred_epsilon = (sample - alpha_prod_t ** (0.5) * pred_original_sample) / beta_prod_t ** (0.5)
|
443
443
|
|
444
|
-
# 6. compute "direction pointing to x_t" of formula (12) from https://
|
444
|
+
# 6. compute "direction pointing to x_t" of formula (12) from https://huggingface.co/papers/2010.02502
|
445
445
|
pred_sample_direction = (1 - alpha_prod_t_prev - std_dev_t**2) ** (0.5) * pred_epsilon
|
446
446
|
|
447
|
-
# 7. compute x_t without "random noise" of formula (12) from https://
|
447
|
+
# 7. compute x_t without "random noise" of formula (12) from https://huggingface.co/papers/2010.02502
|
448
448
|
prev_sample = alpha_prod_t_prev ** (0.5) * pred_original_sample + pred_sample_direction
|
449
449
|
|
450
450
|
if eta > 0:
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 The CogVideoX team, Tsinghua University & ZhipuAI and The HuggingFace Team.
|
2
2
|
# All rights reserved.
|
3
3
|
#
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
@@ -94,7 +94,7 @@ def betas_for_alpha_bar(
|
|
94
94
|
|
95
95
|
def rescale_zero_terminal_snr(alphas_cumprod):
|
96
96
|
"""
|
97
|
-
Rescales betas to have zero terminal SNR Based on https://
|
97
|
+
Rescales betas to have zero terminal SNR Based on https://huggingface.co/papers/2305.08891 (Algorithm 1)
|
98
98
|
|
99
99
|
|
100
100
|
Args:
|
@@ -275,7 +275,7 @@ class CogVideoXDDIMScheduler(SchedulerMixin, ConfigMixin):
|
|
275
275
|
|
276
276
|
self.num_inference_steps = num_inference_steps
|
277
277
|
|
278
|
-
# "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://
|
278
|
+
# "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://huggingface.co/papers/2305.08891
|
279
279
|
if self.config.timestep_spacing == "linspace":
|
280
280
|
timesteps = (
|
281
281
|
np.linspace(0, self.config.num_train_timesteps - 1, num_inference_steps)
|
@@ -350,7 +350,7 @@ class CogVideoXDDIMScheduler(SchedulerMixin, ConfigMixin):
|
|
350
350
|
"Number of inference steps is 'None', you need to run 'set_timesteps' after creating the scheduler"
|
351
351
|
)
|
352
352
|
|
353
|
-
# See formulas (12) and (16) of DDIM paper https://
|
353
|
+
# See formulas (12) and (16) of DDIM paper https://huggingface.co/papers/2010.02502
|
354
354
|
# Ideally, read DDIM paper in-detail understanding
|
355
355
|
|
356
356
|
# Notation (<variable name> -> <name in paper>
|
@@ -371,7 +371,7 @@ class CogVideoXDDIMScheduler(SchedulerMixin, ConfigMixin):
|
|
371
371
|
beta_prod_t = 1 - alpha_prod_t
|
372
372
|
|
373
373
|
# 3. compute predicted original sample from predicted noise also called
|
374
|
-
# "predicted x_0" of formula (12) from https://
|
374
|
+
# "predicted x_0" of formula (12) from https://huggingface.co/papers/2010.02502
|
375
375
|
# To make style tests pass, commented out `pred_epsilon` as it is an unused variable
|
376
376
|
if self.config.prediction_type == "epsilon":
|
377
377
|
pred_original_sample = (sample - beta_prod_t ** (0.5) * model_output) / alpha_prod_t ** (0.5)
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 Stanford University Team and The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -73,7 +73,7 @@ class FlaxDDIMScheduler(FlaxSchedulerMixin, ConfigMixin):
|
|
73
73
|
[`SchedulerMixin`] provides general loading and saving functionality via the [`SchedulerMixin.save_pretrained`] and
|
74
74
|
[`~SchedulerMixin.from_pretrained`] functions.
|
75
75
|
|
76
|
-
For more details, see the original paper: https://
|
76
|
+
For more details, see the original paper: https://huggingface.co/papers/2010.02502
|
77
77
|
|
78
78
|
Args:
|
79
79
|
num_train_timesteps (`int`): number of diffusion steps used to train the model.
|
@@ -230,7 +230,7 @@ class FlaxDDIMScheduler(FlaxSchedulerMixin, ConfigMixin):
|
|
230
230
|
"Number of inference steps is 'None', you need to run 'set_timesteps' after creating the scheduler"
|
231
231
|
)
|
232
232
|
|
233
|
-
# See formulas (12) and (16) of DDIM paper https://
|
233
|
+
# See formulas (12) and (16) of DDIM paper https://huggingface.co/papers/2010.02502
|
234
234
|
# Ideally, read DDIM paper in-detail understanding
|
235
235
|
|
236
236
|
# Notation (<variable name> -> <name in paper>
|
@@ -254,7 +254,7 @@ class FlaxDDIMScheduler(FlaxSchedulerMixin, ConfigMixin):
|
|
254
254
|
beta_prod_t = 1 - alpha_prod_t
|
255
255
|
|
256
256
|
# 3. compute predicted original sample from predicted noise also called
|
257
|
-
# "predicted x_0" of formula (12) from https://
|
257
|
+
# "predicted x_0" of formula (12) from https://huggingface.co/papers/2010.02502
|
258
258
|
if self.config.prediction_type == "epsilon":
|
259
259
|
pred_original_sample = (sample - beta_prod_t ** (0.5) * model_output) / alpha_prod_t ** (0.5)
|
260
260
|
pred_epsilon = model_output
|
@@ -281,10 +281,10 @@ class FlaxDDIMScheduler(FlaxSchedulerMixin, ConfigMixin):
|
|
281
281
|
variance = self._get_variance(state, timestep, prev_timestep)
|
282
282
|
std_dev_t = eta * variance ** (0.5)
|
283
283
|
|
284
|
-
# 5. compute "direction pointing to x_t" of formula (12) from https://
|
284
|
+
# 5. compute "direction pointing to x_t" of formula (12) from https://huggingface.co/papers/2010.02502
|
285
285
|
pred_sample_direction = (1 - alpha_prod_t_prev - std_dev_t**2) ** (0.5) * pred_epsilon
|
286
286
|
|
287
|
-
# 6. compute x_t without "random noise" of formula (12) from https://
|
287
|
+
# 6. compute x_t without "random noise" of formula (12) from https://huggingface.co/papers/2010.02502
|
288
288
|
prev_sample = alpha_prod_t_prev ** (0.5) * pred_original_sample + pred_sample_direction
|
289
289
|
|
290
290
|
if not return_dict:
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -93,7 +93,7 @@ def betas_for_alpha_bar(
|
|
93
93
|
# Copied from diffusers.schedulers.scheduling_ddim.rescale_zero_terminal_snr
|
94
94
|
def rescale_zero_terminal_snr(betas):
|
95
95
|
"""
|
96
|
-
Rescales betas to have zero terminal SNR Based on https://
|
96
|
+
Rescales betas to have zero terminal SNR Based on https://huggingface.co/papers/2305.08891 (Algorithm 1)
|
97
97
|
|
98
98
|
|
99
99
|
Args:
|
@@ -266,7 +266,7 @@ class DDIMInverseScheduler(SchedulerMixin, ConfigMixin):
|
|
266
266
|
|
267
267
|
self.num_inference_steps = num_inference_steps
|
268
268
|
|
269
|
-
# "leading" and "trailing" corresponds to annotation of Table 2. of https://
|
269
|
+
# "leading" and "trailing" corresponds to annotation of Table 2. of https://huggingface.co/papers/2305.08891
|
270
270
|
if self.config.timestep_spacing == "leading":
|
271
271
|
step_ratio = self.config.num_train_timesteps // self.num_inference_steps
|
272
272
|
# creates integer timesteps by multiplying by ratio
|
@@ -338,7 +338,7 @@ class DDIMInverseScheduler(SchedulerMixin, ConfigMixin):
|
|
338
338
|
beta_prod_t = 1 - alpha_prod_t
|
339
339
|
|
340
340
|
# 3. compute predicted original sample from predicted noise also called
|
341
|
-
# "predicted x_0" of formula (12) from https://
|
341
|
+
# "predicted x_0" of formula (12) from https://huggingface.co/papers/2010.02502
|
342
342
|
if self.config.prediction_type == "epsilon":
|
343
343
|
pred_original_sample = (sample - beta_prod_t ** (0.5) * model_output) / alpha_prod_t ** (0.5)
|
344
344
|
pred_epsilon = model_output
|
@@ -360,10 +360,10 @@ class DDIMInverseScheduler(SchedulerMixin, ConfigMixin):
|
|
360
360
|
-self.config.clip_sample_range, self.config.clip_sample_range
|
361
361
|
)
|
362
362
|
|
363
|
-
# 5. compute "direction pointing to x_t" of formula (12) from https://
|
363
|
+
# 5. compute "direction pointing to x_t" of formula (12) from https://huggingface.co/papers/2010.02502
|
364
364
|
pred_sample_direction = (1 - alpha_prod_t_prev) ** (0.5) * pred_epsilon
|
365
365
|
|
366
|
-
# 6. compute x_t without "random noise" of formula (12) from https://
|
366
|
+
# 6. compute x_t without "random noise" of formula (12) from https://huggingface.co/papers/2010.02502
|
367
367
|
prev_sample = alpha_prod_t_prev ** (0.5) * pred_original_sample + pred_sample_direction
|
368
368
|
|
369
369
|
if not return_dict:
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 ParaDiGMS authors and The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -95,7 +95,7 @@ def betas_for_alpha_bar(
|
|
95
95
|
# Copied from diffusers.schedulers.scheduling_ddim.rescale_zero_terminal_snr
|
96
96
|
def rescale_zero_terminal_snr(betas):
|
97
97
|
"""
|
98
|
-
Rescales betas to have zero terminal SNR Based on https://
|
98
|
+
Rescales betas to have zero terminal SNR Based on https://huggingface.co/papers/2305.08891 (Algorithm 1)
|
99
99
|
|
100
100
|
|
101
101
|
Args:
|
@@ -139,7 +139,7 @@ class DDIMParallelScheduler(SchedulerMixin, ConfigMixin):
|
|
139
139
|
[`SchedulerMixin`] provides general loading and saving functionality via the [`SchedulerMixin.save_pretrained`] and
|
140
140
|
[`~SchedulerMixin.from_pretrained`] functions.
|
141
141
|
|
142
|
-
For more details, see the original paper: https://
|
142
|
+
For more details, see the original paper: https://huggingface.co/papers/2010.02502
|
143
143
|
|
144
144
|
Args:
|
145
145
|
num_train_timesteps (`int`): number of diffusion steps used to train the model.
|
@@ -165,21 +165,21 @@ class DDIMParallelScheduler(SchedulerMixin, ConfigMixin):
|
|
165
165
|
process), `sample` (directly predicting the noisy sample`) or `v_prediction` (see section 2.4
|
166
166
|
https://imagen.research.google/video/paper.pdf)
|
167
167
|
thresholding (`bool`, default `False`):
|
168
|
-
whether to use the "dynamic thresholding" method (introduced by Imagen,
|
169
|
-
Note that the thresholding method is unsuitable for latent-space
|
170
|
-
stable-diffusion).
|
168
|
+
whether to use the "dynamic thresholding" method (introduced by Imagen,
|
169
|
+
https://huggingface.co/papers/2205.11487). Note that the thresholding method is unsuitable for latent-space
|
170
|
+
diffusion models (such as stable-diffusion).
|
171
171
|
dynamic_thresholding_ratio (`float`, default `0.995`):
|
172
172
|
the ratio for the dynamic thresholding method. Default is `0.995`, the same as Imagen
|
173
|
-
(https://
|
173
|
+
(https://huggingface.co/papers/2205.11487). Valid only when `thresholding=True`.
|
174
174
|
sample_max_value (`float`, default `1.0`):
|
175
175
|
the threshold value for dynamic thresholding. Valid only when `thresholding=True`.
|
176
176
|
timestep_spacing (`str`, default `"leading"`):
|
177
177
|
The way the timesteps should be scaled. Refer to Table 2. of [Common Diffusion Noise Schedules and Sample
|
178
|
-
Steps are Flawed](https://
|
178
|
+
Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information.
|
179
179
|
rescale_betas_zero_snr (`bool`, default `False`):
|
180
|
-
whether to rescale the betas to have zero terminal SNR (proposed by
|
181
|
-
This can enable the model to generate very bright and dark
|
182
|
-
medium brightness. Loosely related to
|
180
|
+
whether to rescale the betas to have zero terminal SNR (proposed by
|
181
|
+
https://huggingface.co/papers/2305.08891). This can enable the model to generate very bright and dark
|
182
|
+
samples instead of limiting it to samples with medium brightness. Loosely related to
|
183
183
|
[`--offset_noise`](https://github.com/huggingface/diffusers/blob/74fd735eb073eb1d774b1ab4154a0876eb82f055/examples/dreambooth/train_dreambooth.py#L506).
|
184
184
|
"""
|
185
185
|
|
@@ -291,7 +291,7 @@ class DDIMParallelScheduler(SchedulerMixin, ConfigMixin):
|
|
291
291
|
pixels from saturation at each step. We find that dynamic thresholding results in significantly better
|
292
292
|
photorealism as well as better image-text alignment, especially when using very large guidance weights."
|
293
293
|
|
294
|
-
https://
|
294
|
+
https://huggingface.co/papers/2205.11487
|
295
295
|
"""
|
296
296
|
dtype = sample.dtype
|
297
297
|
batch_size, channels, *remaining_dims = sample.shape
|
@@ -335,7 +335,7 @@ class DDIMParallelScheduler(SchedulerMixin, ConfigMixin):
|
|
335
335
|
|
336
336
|
self.num_inference_steps = num_inference_steps
|
337
337
|
|
338
|
-
# "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://
|
338
|
+
# "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://huggingface.co/papers/2305.08891
|
339
339
|
if self.config.timestep_spacing == "linspace":
|
340
340
|
timesteps = (
|
341
341
|
np.linspace(0, self.config.num_train_timesteps - 1, num_inference_steps)
|
@@ -390,7 +390,7 @@ class DDIMParallelScheduler(SchedulerMixin, ConfigMixin):
|
|
390
390
|
generator: random number generator.
|
391
391
|
variance_noise (`torch.Tensor`): instead of generating noise for the variance using `generator`, we
|
392
392
|
can directly provide the noise for the variance itself. This is useful for methods such as
|
393
|
-
CycleDiffusion. (https://
|
393
|
+
CycleDiffusion. (https://huggingface.co/papers/2210.05559)
|
394
394
|
return_dict (`bool`): option for returning tuple rather than DDIMParallelSchedulerOutput class
|
395
395
|
|
396
396
|
Returns:
|
@@ -404,7 +404,7 @@ class DDIMParallelScheduler(SchedulerMixin, ConfigMixin):
|
|
404
404
|
"Number of inference steps is 'None', you need to run 'set_timesteps' after creating the scheduler"
|
405
405
|
)
|
406
406
|
|
407
|
-
# See formulas (12) and (16) of DDIM paper https://
|
407
|
+
# See formulas (12) and (16) of DDIM paper https://huggingface.co/papers/2010.02502
|
408
408
|
# Ideally, read DDIM paper in-detail understanding
|
409
409
|
|
410
410
|
# Notation (<variable name> -> <name in paper>
|
@@ -425,7 +425,7 @@ class DDIMParallelScheduler(SchedulerMixin, ConfigMixin):
|
|
425
425
|
beta_prod_t = 1 - alpha_prod_t
|
426
426
|
|
427
427
|
# 3. compute predicted original sample from predicted noise also called
|
428
|
-
# "predicted x_0" of formula (12) from https://
|
428
|
+
# "predicted x_0" of formula (12) from https://huggingface.co/papers/2010.02502
|
429
429
|
if self.config.prediction_type == "epsilon":
|
430
430
|
pred_original_sample = (sample - beta_prod_t ** (0.5) * model_output) / alpha_prod_t ** (0.5)
|
431
431
|
pred_epsilon = model_output
|
@@ -458,10 +458,10 @@ class DDIMParallelScheduler(SchedulerMixin, ConfigMixin):
|
|
458
458
|
# the pred_epsilon is always re-derived from the clipped x_0 in Glide
|
459
459
|
pred_epsilon = (sample - alpha_prod_t ** (0.5) * pred_original_sample) / beta_prod_t ** (0.5)
|
460
460
|
|
461
|
-
# 6. compute "direction pointing to x_t" of formula (12) from https://
|
461
|
+
# 6. compute "direction pointing to x_t" of formula (12) from https://huggingface.co/papers/2010.02502
|
462
462
|
pred_sample_direction = (1 - alpha_prod_t_prev - std_dev_t**2) ** (0.5) * pred_epsilon
|
463
463
|
|
464
|
-
# 7. compute x_t without "random noise" of formula (12) from https://
|
464
|
+
# 7. compute x_t without "random noise" of formula (12) from https://huggingface.co/papers/2010.02502
|
465
465
|
prev_sample = alpha_prod_t_prev ** (0.5) * pred_original_sample + pred_sample_direction
|
466
466
|
|
467
467
|
if eta > 0:
|
@@ -526,7 +526,7 @@ class DDIMParallelScheduler(SchedulerMixin, ConfigMixin):
|
|
526
526
|
|
527
527
|
assert eta == 0.0
|
528
528
|
|
529
|
-
# See formulas (12) and (16) of DDIM paper https://
|
529
|
+
# See formulas (12) and (16) of DDIM paper https://huggingface.co/papers/2010.02502
|
530
530
|
# Ideally, read DDIM paper in-detail understanding
|
531
531
|
|
532
532
|
# Notation (<variable name> -> <name in paper>
|
@@ -554,7 +554,7 @@ class DDIMParallelScheduler(SchedulerMixin, ConfigMixin):
|
|
554
554
|
beta_prod_t = 1 - alpha_prod_t
|
555
555
|
|
556
556
|
# 3. compute predicted original sample from predicted noise also called
|
557
|
-
# "predicted x_0" of formula (12) from https://
|
557
|
+
# "predicted x_0" of formula (12) from https://huggingface.co/papers/2010.02502
|
558
558
|
if self.config.prediction_type == "epsilon":
|
559
559
|
pred_original_sample = (sample - beta_prod_t ** (0.5) * model_output) / alpha_prod_t ** (0.5)
|
560
560
|
pred_epsilon = model_output
|
@@ -587,10 +587,10 @@ class DDIMParallelScheduler(SchedulerMixin, ConfigMixin):
|
|
587
587
|
# the pred_epsilon is always re-derived from the clipped x_0 in Glide
|
588
588
|
pred_epsilon = (sample - alpha_prod_t ** (0.5) * pred_original_sample) / beta_prod_t ** (0.5)
|
589
589
|
|
590
|
-
# 6. compute "direction pointing to x_t" of formula (12) from https://
|
590
|
+
# 6. compute "direction pointing to x_t" of formula (12) from https://huggingface.co/papers/2010.02502
|
591
591
|
pred_sample_direction = (1 - alpha_prod_t_prev - std_dev_t**2) ** (0.5) * pred_epsilon
|
592
592
|
|
593
|
-
# 7. compute x_t without "random noise" of formula (12) from https://
|
593
|
+
# 7. compute x_t without "random noise" of formula (12) from https://huggingface.co/papers/2010.02502
|
594
594
|
prev_sample = alpha_prod_t_prev ** (0.5) * pred_original_sample + pred_sample_direction
|
595
595
|
|
596
596
|
return prev_sample
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 UC Berkeley Team and The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -92,7 +92,7 @@ def betas_for_alpha_bar(
|
|
92
92
|
# Copied from diffusers.schedulers.scheduling_ddim.rescale_zero_terminal_snr
|
93
93
|
def rescale_zero_terminal_snr(betas):
|
94
94
|
"""
|
95
|
-
Rescales betas to have zero terminal SNR Based on https://
|
95
|
+
Rescales betas to have zero terminal SNR Based on https://huggingface.co/papers/2305.08891 (Algorithm 1)
|
96
96
|
|
97
97
|
|
98
98
|
Args:
|
@@ -295,7 +295,7 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin):
|
|
295
295
|
self.num_inference_steps = num_inference_steps
|
296
296
|
self.custom_timesteps = False
|
297
297
|
|
298
|
-
# "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://
|
298
|
+
# "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://huggingface.co/papers/2305.08891
|
299
299
|
if self.config.timestep_spacing == "linspace":
|
300
300
|
timesteps = (
|
301
301
|
np.linspace(0, self.config.num_train_timesteps - 1, num_inference_steps)
|
@@ -329,7 +329,7 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin):
|
|
329
329
|
alpha_prod_t_prev = self.alphas_cumprod[prev_t] if prev_t >= 0 else self.one
|
330
330
|
current_beta_t = 1 - alpha_prod_t / alpha_prod_t_prev
|
331
331
|
|
332
|
-
# For t > 0, compute predicted variance βt (see formula (6) and (7) from https://
|
332
|
+
# For t > 0, compute predicted variance βt (see formula (6) and (7) from https://huggingface.co/papers/2006.11239)
|
333
333
|
# and sample from it to get previous sample
|
334
334
|
# x_{t-1} ~ N(pred_prev_sample, variance) == add variance to pred_sample
|
335
335
|
variance = (1 - alpha_prod_t_prev) / (1 - alpha_prod_t) * current_beta_t
|
@@ -343,7 +343,7 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin):
|
|
343
343
|
# hacks - were probably added for training stability
|
344
344
|
if variance_type == "fixed_small":
|
345
345
|
variance = variance
|
346
|
-
# for rl-diffuser https://
|
346
|
+
# for rl-diffuser https://huggingface.co/papers/2205.09991
|
347
347
|
elif variance_type == "fixed_small_log":
|
348
348
|
variance = torch.log(variance)
|
349
349
|
variance = torch.exp(0.5 * variance)
|
@@ -370,7 +370,7 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin):
|
|
370
370
|
pixels from saturation at each step. We find that dynamic thresholding results in significantly better
|
371
371
|
photorealism as well as better image-text alignment, especially when using very large guidance weights."
|
372
372
|
|
373
|
-
https://
|
373
|
+
https://huggingface.co/papers/2205.11487
|
374
374
|
"""
|
375
375
|
dtype = sample.dtype
|
376
376
|
batch_size, channels, *remaining_dims = sample.shape
|
@@ -443,7 +443,7 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin):
|
|
443
443
|
current_beta_t = 1 - current_alpha_t
|
444
444
|
|
445
445
|
# 2. compute predicted original sample from predicted noise also called
|
446
|
-
# "predicted x_0" of formula (15) from https://
|
446
|
+
# "predicted x_0" of formula (15) from https://huggingface.co/papers/2006.11239
|
447
447
|
if self.config.prediction_type == "epsilon":
|
448
448
|
pred_original_sample = (sample - beta_prod_t ** (0.5) * model_output) / alpha_prod_t ** (0.5)
|
449
449
|
elif self.config.prediction_type == "sample":
|
@@ -465,12 +465,12 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin):
|
|
465
465
|
)
|
466
466
|
|
467
467
|
# 4. Compute coefficients for pred_original_sample x_0 and current sample x_t
|
468
|
-
# See formula (7) from https://
|
468
|
+
# See formula (7) from https://huggingface.co/papers/2006.11239
|
469
469
|
pred_original_sample_coeff = (alpha_prod_t_prev ** (0.5) * current_beta_t) / beta_prod_t
|
470
470
|
current_sample_coeff = current_alpha_t ** (0.5) * beta_prod_t_prev / beta_prod_t
|
471
471
|
|
472
472
|
# 5. Compute predicted previous sample µ_t
|
473
|
-
# See formula (7) from https://
|
473
|
+
# See formula (7) from https://huggingface.co/papers/2006.11239
|
474
474
|
pred_prev_sample = pred_original_sample_coeff * pred_original_sample + current_sample_coeff * sample
|
475
475
|
|
476
476
|
# 6. Add noise
|