diffusers 0.33.1__py3-none-any.whl → 0.34.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diffusers/__init__.py +48 -1
- diffusers/commands/__init__.py +1 -1
- diffusers/commands/diffusers_cli.py +1 -1
- diffusers/commands/env.py +1 -1
- diffusers/commands/fp16_safetensors.py +1 -1
- diffusers/dependency_versions_check.py +1 -1
- diffusers/dependency_versions_table.py +1 -1
- diffusers/experimental/rl/value_guided_sampling.py +1 -1
- diffusers/hooks/faster_cache.py +2 -2
- diffusers/hooks/group_offloading.py +128 -29
- diffusers/hooks/hooks.py +2 -2
- diffusers/hooks/layerwise_casting.py +3 -3
- diffusers/hooks/pyramid_attention_broadcast.py +1 -1
- diffusers/image_processor.py +7 -2
- diffusers/loaders/__init__.py +4 -0
- diffusers/loaders/ip_adapter.py +5 -14
- diffusers/loaders/lora_base.py +212 -111
- diffusers/loaders/lora_conversion_utils.py +275 -34
- diffusers/loaders/lora_pipeline.py +1554 -819
- diffusers/loaders/peft.py +52 -109
- diffusers/loaders/single_file.py +2 -2
- diffusers/loaders/single_file_model.py +20 -4
- diffusers/loaders/single_file_utils.py +225 -5
- diffusers/loaders/textual_inversion.py +3 -2
- diffusers/loaders/transformer_flux.py +1 -1
- diffusers/loaders/transformer_sd3.py +2 -2
- diffusers/loaders/unet.py +2 -16
- diffusers/loaders/unet_loader_utils.py +1 -1
- diffusers/loaders/utils.py +1 -1
- diffusers/models/__init__.py +15 -1
- diffusers/models/activations.py +5 -5
- diffusers/models/adapter.py +2 -3
- diffusers/models/attention.py +4 -4
- diffusers/models/attention_flax.py +10 -10
- diffusers/models/attention_processor.py +14 -10
- diffusers/models/auto_model.py +47 -10
- diffusers/models/autoencoders/__init__.py +1 -0
- diffusers/models/autoencoders/autoencoder_asym_kl.py +4 -4
- diffusers/models/autoencoders/autoencoder_dc.py +3 -3
- diffusers/models/autoencoders/autoencoder_kl.py +4 -4
- diffusers/models/autoencoders/autoencoder_kl_allegro.py +4 -4
- diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +6 -6
- diffusers/models/autoencoders/autoencoder_kl_cosmos.py +1108 -0
- diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py +2 -2
- diffusers/models/autoencoders/autoencoder_kl_ltx.py +3 -3
- diffusers/models/autoencoders/autoencoder_kl_magvit.py +4 -4
- diffusers/models/autoencoders/autoencoder_kl_mochi.py +3 -3
- diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +4 -4
- diffusers/models/autoencoders/autoencoder_kl_wan.py +256 -22
- diffusers/models/autoencoders/autoencoder_oobleck.py +1 -1
- diffusers/models/autoencoders/autoencoder_tiny.py +3 -3
- diffusers/models/autoencoders/consistency_decoder_vae.py +1 -1
- diffusers/models/autoencoders/vae.py +13 -2
- diffusers/models/autoencoders/vq_model.py +2 -2
- diffusers/models/cache_utils.py +1 -1
- diffusers/models/controlnet.py +1 -1
- diffusers/models/controlnet_flux.py +1 -1
- diffusers/models/controlnet_sd3.py +1 -1
- diffusers/models/controlnet_sparsectrl.py +1 -1
- diffusers/models/controlnets/__init__.py +1 -0
- diffusers/models/controlnets/controlnet.py +3 -3
- diffusers/models/controlnets/controlnet_flax.py +1 -1
- diffusers/models/controlnets/controlnet_flux.py +16 -15
- diffusers/models/controlnets/controlnet_hunyuan.py +2 -2
- diffusers/models/controlnets/controlnet_sana.py +290 -0
- diffusers/models/controlnets/controlnet_sd3.py +1 -1
- diffusers/models/controlnets/controlnet_sparsectrl.py +2 -2
- diffusers/models/controlnets/controlnet_union.py +1 -1
- diffusers/models/controlnets/controlnet_xs.py +7 -7
- diffusers/models/controlnets/multicontrolnet.py +4 -5
- diffusers/models/controlnets/multicontrolnet_union.py +5 -6
- diffusers/models/downsampling.py +2 -2
- diffusers/models/embeddings.py +10 -12
- diffusers/models/embeddings_flax.py +2 -2
- diffusers/models/lora.py +3 -3
- diffusers/models/modeling_utils.py +44 -14
- diffusers/models/normalization.py +4 -4
- diffusers/models/resnet.py +2 -2
- diffusers/models/resnet_flax.py +1 -1
- diffusers/models/transformers/__init__.py +5 -0
- diffusers/models/transformers/auraflow_transformer_2d.py +70 -24
- diffusers/models/transformers/cogvideox_transformer_3d.py +1 -1
- diffusers/models/transformers/consisid_transformer_3d.py +1 -1
- diffusers/models/transformers/dit_transformer_2d.py +2 -2
- diffusers/models/transformers/dual_transformer_2d.py +1 -1
- diffusers/models/transformers/hunyuan_transformer_2d.py +2 -2
- diffusers/models/transformers/latte_transformer_3d.py +4 -5
- diffusers/models/transformers/lumina_nextdit2d.py +2 -2
- diffusers/models/transformers/pixart_transformer_2d.py +3 -3
- diffusers/models/transformers/prior_transformer.py +1 -1
- diffusers/models/transformers/sana_transformer.py +8 -3
- diffusers/models/transformers/stable_audio_transformer.py +5 -9
- diffusers/models/transformers/t5_film_transformer.py +3 -3
- diffusers/models/transformers/transformer_2d.py +1 -1
- diffusers/models/transformers/transformer_allegro.py +1 -1
- diffusers/models/transformers/transformer_chroma.py +742 -0
- diffusers/models/transformers/transformer_cogview3plus.py +5 -10
- diffusers/models/transformers/transformer_cogview4.py +317 -25
- diffusers/models/transformers/transformer_cosmos.py +579 -0
- diffusers/models/transformers/transformer_flux.py +9 -11
- diffusers/models/transformers/transformer_hidream_image.py +942 -0
- diffusers/models/transformers/transformer_hunyuan_video.py +6 -8
- diffusers/models/transformers/transformer_hunyuan_video_framepack.py +416 -0
- diffusers/models/transformers/transformer_ltx.py +2 -2
- diffusers/models/transformers/transformer_lumina2.py +1 -1
- diffusers/models/transformers/transformer_mochi.py +1 -1
- diffusers/models/transformers/transformer_omnigen.py +2 -2
- diffusers/models/transformers/transformer_sd3.py +7 -7
- diffusers/models/transformers/transformer_temporal.py +1 -1
- diffusers/models/transformers/transformer_wan.py +24 -8
- diffusers/models/transformers/transformer_wan_vace.py +393 -0
- diffusers/models/unets/unet_1d.py +1 -1
- diffusers/models/unets/unet_1d_blocks.py +1 -1
- diffusers/models/unets/unet_2d.py +1 -1
- diffusers/models/unets/unet_2d_blocks.py +1 -1
- diffusers/models/unets/unet_2d_blocks_flax.py +8 -7
- diffusers/models/unets/unet_2d_condition.py +2 -2
- diffusers/models/unets/unet_2d_condition_flax.py +2 -2
- diffusers/models/unets/unet_3d_blocks.py +1 -1
- diffusers/models/unets/unet_3d_condition.py +3 -3
- diffusers/models/unets/unet_i2vgen_xl.py +3 -3
- diffusers/models/unets/unet_kandinsky3.py +1 -1
- diffusers/models/unets/unet_motion_model.py +2 -2
- diffusers/models/unets/unet_stable_cascade.py +1 -1
- diffusers/models/upsampling.py +2 -2
- diffusers/models/vae_flax.py +2 -2
- diffusers/models/vq_model.py +1 -1
- diffusers/pipelines/__init__.py +37 -6
- diffusers/pipelines/allegro/pipeline_allegro.py +11 -11
- diffusers/pipelines/amused/pipeline_amused.py +7 -6
- diffusers/pipelines/amused/pipeline_amused_img2img.py +6 -5
- diffusers/pipelines/amused/pipeline_amused_inpaint.py +6 -5
- diffusers/pipelines/animatediff/pipeline_animatediff.py +6 -6
- diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +6 -6
- diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +16 -15
- diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +6 -6
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +5 -5
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +5 -5
- diffusers/pipelines/audioldm/pipeline_audioldm.py +8 -7
- diffusers/pipelines/audioldm2/modeling_audioldm2.py +1 -1
- diffusers/pipelines/audioldm2/pipeline_audioldm2.py +23 -13
- diffusers/pipelines/aura_flow/pipeline_aura_flow.py +48 -11
- diffusers/pipelines/auto_pipeline.py +6 -7
- diffusers/pipelines/blip_diffusion/modeling_blip2.py +1 -1
- diffusers/pipelines/blip_diffusion/modeling_ctx_clip.py +2 -2
- diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +11 -10
- diffusers/pipelines/chroma/__init__.py +49 -0
- diffusers/pipelines/chroma/pipeline_chroma.py +949 -0
- diffusers/pipelines/chroma/pipeline_chroma_img2img.py +1034 -0
- diffusers/pipelines/chroma/pipeline_output.py +21 -0
- diffusers/pipelines/cogvideo/pipeline_cogvideox.py +8 -8
- diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +8 -8
- diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +8 -8
- diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +8 -8
- diffusers/pipelines/cogview3/pipeline_cogview3plus.py +9 -9
- diffusers/pipelines/cogview4/pipeline_cogview4.py +7 -7
- diffusers/pipelines/cogview4/pipeline_cogview4_control.py +7 -7
- diffusers/pipelines/consisid/consisid_utils.py +2 -2
- diffusers/pipelines/consisid/pipeline_consisid.py +8 -8
- diffusers/pipelines/consistency_models/pipeline_consistency_models.py +1 -1
- diffusers/pipelines/controlnet/pipeline_controlnet.py +7 -7
- diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +8 -8
- diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +7 -7
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +7 -7
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +14 -14
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +10 -6
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +13 -13
- diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +14 -14
- diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +5 -5
- diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +13 -13
- diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +1 -1
- diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +8 -8
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +7 -7
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +7 -7
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +12 -10
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +9 -7
- diffusers/pipelines/cosmos/__init__.py +54 -0
- diffusers/pipelines/cosmos/pipeline_cosmos2_text2image.py +673 -0
- diffusers/pipelines/cosmos/pipeline_cosmos2_video2world.py +792 -0
- diffusers/pipelines/cosmos/pipeline_cosmos_text2world.py +664 -0
- diffusers/pipelines/cosmos/pipeline_cosmos_video2world.py +826 -0
- diffusers/pipelines/cosmos/pipeline_output.py +40 -0
- diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py +5 -4
- diffusers/pipelines/ddim/pipeline_ddim.py +4 -4
- diffusers/pipelines/ddpm/pipeline_ddpm.py +1 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +10 -10
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +8 -8
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +5 -5
- diffusers/pipelines/deprecated/audio_diffusion/mel.py +1 -1
- diffusers/pipelines/deprecated/audio_diffusion/pipeline_audio_diffusion.py +3 -3
- diffusers/pipelines/deprecated/latent_diffusion_uncond/pipeline_latent_diffusion_uncond.py +1 -1
- diffusers/pipelines/deprecated/pndm/pipeline_pndm.py +2 -2
- diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +4 -3
- diffusers/pipelines/deprecated/score_sde_ve/pipeline_score_sde_ve.py +1 -1
- diffusers/pipelines/deprecated/spectrogram_diffusion/continuous_encoder.py +1 -1
- diffusers/pipelines/deprecated/spectrogram_diffusion/midi_utils.py +1 -1
- diffusers/pipelines/deprecated/spectrogram_diffusion/notes_encoder.py +1 -1
- diffusers/pipelines/deprecated/spectrogram_diffusion/pipeline_spectrogram_diffusion.py +1 -1
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +7 -7
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_onnx_stable_diffusion_inpaint_legacy.py +9 -9
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +10 -10
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +10 -8
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +5 -5
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +18 -18
- diffusers/pipelines/deprecated/stochastic_karras_ve/pipeline_stochastic_karras_ve.py +1 -1
- diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +2 -2
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +6 -6
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +5 -5
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +5 -5
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +5 -5
- diffusers/pipelines/deprecated/vq_diffusion/pipeline_vq_diffusion.py +1 -1
- diffusers/pipelines/dit/pipeline_dit.py +1 -1
- diffusers/pipelines/easyanimate/pipeline_easyanimate.py +4 -4
- diffusers/pipelines/easyanimate/pipeline_easyanimate_control.py +4 -4
- diffusers/pipelines/easyanimate/pipeline_easyanimate_inpaint.py +7 -6
- diffusers/pipelines/flux/modeling_flux.py +1 -1
- diffusers/pipelines/flux/pipeline_flux.py +10 -17
- diffusers/pipelines/flux/pipeline_flux_control.py +6 -6
- diffusers/pipelines/flux/pipeline_flux_control_img2img.py +6 -6
- diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +6 -6
- diffusers/pipelines/flux/pipeline_flux_controlnet.py +6 -6
- diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +30 -22
- diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +2 -1
- diffusers/pipelines/flux/pipeline_flux_fill.py +6 -6
- diffusers/pipelines/flux/pipeline_flux_img2img.py +39 -6
- diffusers/pipelines/flux/pipeline_flux_inpaint.py +11 -6
- diffusers/pipelines/flux/pipeline_flux_prior_redux.py +1 -1
- diffusers/pipelines/free_init_utils.py +2 -2
- diffusers/pipelines/free_noise_utils.py +3 -3
- diffusers/pipelines/hidream_image/__init__.py +47 -0
- diffusers/pipelines/hidream_image/pipeline_hidream_image.py +1026 -0
- diffusers/pipelines/hidream_image/pipeline_output.py +35 -0
- diffusers/pipelines/hunyuan_video/__init__.py +2 -0
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_skyreels_image2video.py +8 -8
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +8 -8
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_framepack.py +1114 -0
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_image2video.py +71 -15
- diffusers/pipelines/hunyuan_video/pipeline_output.py +19 -0
- diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +8 -8
- diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +10 -8
- diffusers/pipelines/kandinsky/pipeline_kandinsky.py +6 -6
- diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +34 -34
- diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +19 -26
- diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +7 -7
- diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +11 -11
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +6 -6
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +35 -35
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +6 -6
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +17 -39
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +17 -45
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +7 -7
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +10 -10
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +10 -10
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +7 -7
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +17 -38
- diffusers/pipelines/kolors/pipeline_kolors.py +10 -10
- diffusers/pipelines/kolors/pipeline_kolors_img2img.py +12 -12
- diffusers/pipelines/kolors/text_encoder.py +3 -3
- diffusers/pipelines/kolors/tokenizer.py +1 -1
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +2 -2
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +2 -2
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +1 -1
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion_superresolution.py +3 -3
- diffusers/pipelines/latte/pipeline_latte.py +12 -12
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +13 -13
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +17 -16
- diffusers/pipelines/ltx/__init__.py +4 -0
- diffusers/pipelines/ltx/modeling_latent_upsampler.py +188 -0
- diffusers/pipelines/ltx/pipeline_ltx.py +51 -6
- diffusers/pipelines/ltx/pipeline_ltx_condition.py +107 -29
- diffusers/pipelines/ltx/pipeline_ltx_image2video.py +50 -6
- diffusers/pipelines/ltx/pipeline_ltx_latent_upsample.py +277 -0
- diffusers/pipelines/lumina/pipeline_lumina.py +13 -13
- diffusers/pipelines/lumina2/pipeline_lumina2.py +10 -10
- diffusers/pipelines/marigold/marigold_image_processing.py +2 -2
- diffusers/pipelines/mochi/pipeline_mochi.py +6 -6
- diffusers/pipelines/musicldm/pipeline_musicldm.py +16 -13
- diffusers/pipelines/omnigen/pipeline_omnigen.py +13 -11
- diffusers/pipelines/omnigen/processor_omnigen.py +8 -3
- diffusers/pipelines/onnx_utils.py +15 -2
- diffusers/pipelines/pag/pag_utils.py +2 -2
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +12 -8
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +7 -7
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +10 -6
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +14 -14
- diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +8 -8
- diffusers/pipelines/pag/pipeline_pag_kolors.py +10 -10
- diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +11 -11
- diffusers/pipelines/pag/pipeline_pag_sana.py +18 -12
- diffusers/pipelines/pag/pipeline_pag_sd.py +8 -8
- diffusers/pipelines/pag/pipeline_pag_sd_3.py +7 -7
- diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py +7 -7
- diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +6 -6
- diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +5 -5
- diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py +8 -8
- diffusers/pipelines/pag/pipeline_pag_sd_xl.py +16 -15
- diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +18 -17
- diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +12 -12
- diffusers/pipelines/paint_by_example/image_encoder.py +1 -1
- diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +8 -7
- diffusers/pipelines/pia/pipeline_pia.py +8 -6
- diffusers/pipelines/pipeline_flax_utils.py +3 -4
- diffusers/pipelines/pipeline_loading_utils.py +89 -13
- diffusers/pipelines/pipeline_utils.py +105 -33
- diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +11 -11
- diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +11 -11
- diffusers/pipelines/sana/__init__.py +4 -0
- diffusers/pipelines/sana/pipeline_sana.py +23 -21
- diffusers/pipelines/sana/pipeline_sana_controlnet.py +1106 -0
- diffusers/pipelines/sana/pipeline_sana_sprint.py +23 -19
- diffusers/pipelines/sana/pipeline_sana_sprint_img2img.py +981 -0
- diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +7 -6
- diffusers/pipelines/shap_e/camera.py +1 -1
- diffusers/pipelines/shap_e/pipeline_shap_e.py +1 -1
- diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +1 -1
- diffusers/pipelines/shap_e/renderer.py +3 -3
- diffusers/pipelines/stable_audio/modeling_stable_audio.py +1 -1
- diffusers/pipelines/stable_audio/pipeline_stable_audio.py +5 -5
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +8 -8
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +13 -13
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +9 -9
- diffusers/pipelines/stable_diffusion/__init__.py +0 -7
- diffusers/pipelines/stable_diffusion/clip_image_project_model.py +1 -1
- diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +11 -4
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_img2img.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_inpaint.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +10 -10
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py +10 -10
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +10 -10
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +9 -9
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +8 -8
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +4 -4
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +7 -7
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +5 -5
- diffusers/pipelines/stable_diffusion/safety_checker.py +1 -1
- diffusers/pipelines/stable_diffusion/safety_checker_flax.py +1 -1
- diffusers/pipelines/stable_diffusion/stable_unclip_image_normalizer.py +1 -1
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +7 -7
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +7 -7
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +7 -7
- diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +12 -8
- diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +15 -9
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +11 -9
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +11 -9
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +18 -12
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +11 -8
- diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +11 -8
- diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +15 -12
- diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +8 -6
- diffusers/pipelines/stable_diffusion_safe/safety_checker.py +1 -1
- diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +15 -11
- diffusers/pipelines/stable_diffusion_xl/pipeline_flax_stable_diffusion_xl.py +1 -1
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +16 -15
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +18 -17
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +12 -12
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +16 -15
- diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +3 -3
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +12 -12
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +18 -17
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +12 -7
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +12 -7
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +15 -13
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +24 -21
- diffusers/pipelines/unclip/pipeline_unclip.py +4 -3
- diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +4 -3
- diffusers/pipelines/unclip/text_proj.py +2 -2
- diffusers/pipelines/unidiffuser/modeling_text_decoder.py +2 -2
- diffusers/pipelines/unidiffuser/modeling_uvit.py +1 -1
- diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +8 -7
- diffusers/pipelines/visualcloze/__init__.py +52 -0
- diffusers/pipelines/visualcloze/pipeline_visualcloze_combined.py +444 -0
- diffusers/pipelines/visualcloze/pipeline_visualcloze_generation.py +952 -0
- diffusers/pipelines/visualcloze/visualcloze_utils.py +251 -0
- diffusers/pipelines/wan/__init__.py +2 -0
- diffusers/pipelines/wan/pipeline_wan.py +13 -10
- diffusers/pipelines/wan/pipeline_wan_i2v.py +38 -18
- diffusers/pipelines/wan/pipeline_wan_vace.py +976 -0
- diffusers/pipelines/wan/pipeline_wan_video2video.py +14 -16
- diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py +1 -1
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_diffnext.py +1 -1
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +1 -1
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +8 -8
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_combined.py +16 -15
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +6 -6
- diffusers/quantizers/__init__.py +179 -1
- diffusers/quantizers/base.py +6 -1
- diffusers/quantizers/bitsandbytes/bnb_quantizer.py +4 -0
- diffusers/quantizers/bitsandbytes/utils.py +10 -7
- diffusers/quantizers/gguf/gguf_quantizer.py +13 -4
- diffusers/quantizers/gguf/utils.py +16 -13
- diffusers/quantizers/quantization_config.py +18 -16
- diffusers/quantizers/quanto/quanto_quantizer.py +4 -0
- diffusers/quantizers/torchao/torchao_quantizer.py +5 -1
- diffusers/schedulers/__init__.py +3 -1
- diffusers/schedulers/deprecated/scheduling_karras_ve.py +4 -3
- diffusers/schedulers/deprecated/scheduling_sde_vp.py +1 -1
- diffusers/schedulers/scheduling_consistency_models.py +1 -1
- diffusers/schedulers/scheduling_cosine_dpmsolver_multistep.py +10 -5
- diffusers/schedulers/scheduling_ddim.py +8 -8
- diffusers/schedulers/scheduling_ddim_cogvideox.py +5 -5
- diffusers/schedulers/scheduling_ddim_flax.py +6 -6
- diffusers/schedulers/scheduling_ddim_inverse.py +6 -6
- diffusers/schedulers/scheduling_ddim_parallel.py +22 -22
- diffusers/schedulers/scheduling_ddpm.py +9 -9
- diffusers/schedulers/scheduling_ddpm_flax.py +7 -7
- diffusers/schedulers/scheduling_ddpm_parallel.py +18 -18
- diffusers/schedulers/scheduling_ddpm_wuerstchen.py +2 -2
- diffusers/schedulers/scheduling_deis_multistep.py +8 -8
- diffusers/schedulers/scheduling_dpm_cogvideox.py +5 -5
- diffusers/schedulers/scheduling_dpmsolver_multistep.py +12 -12
- diffusers/schedulers/scheduling_dpmsolver_multistep_flax.py +22 -20
- diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +11 -11
- diffusers/schedulers/scheduling_dpmsolver_sde.py +2 -2
- diffusers/schedulers/scheduling_dpmsolver_singlestep.py +13 -13
- diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py +13 -8
- diffusers/schedulers/scheduling_edm_euler.py +20 -11
- diffusers/schedulers/scheduling_euler_ancestral_discrete.py +3 -3
- diffusers/schedulers/scheduling_euler_discrete.py +3 -3
- diffusers/schedulers/scheduling_euler_discrete_flax.py +3 -3
- diffusers/schedulers/scheduling_flow_match_euler_discrete.py +20 -5
- diffusers/schedulers/scheduling_flow_match_heun_discrete.py +1 -1
- diffusers/schedulers/scheduling_flow_match_lcm.py +561 -0
- diffusers/schedulers/scheduling_heun_discrete.py +2 -2
- diffusers/schedulers/scheduling_ipndm.py +2 -2
- diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +2 -2
- diffusers/schedulers/scheduling_k_dpm_2_discrete.py +2 -2
- diffusers/schedulers/scheduling_karras_ve_flax.py +5 -5
- diffusers/schedulers/scheduling_lcm.py +3 -3
- diffusers/schedulers/scheduling_lms_discrete.py +2 -2
- diffusers/schedulers/scheduling_lms_discrete_flax.py +1 -1
- diffusers/schedulers/scheduling_pndm.py +4 -4
- diffusers/schedulers/scheduling_pndm_flax.py +4 -4
- diffusers/schedulers/scheduling_repaint.py +9 -9
- diffusers/schedulers/scheduling_sasolver.py +15 -15
- diffusers/schedulers/scheduling_scm.py +1 -1
- diffusers/schedulers/scheduling_sde_ve.py +1 -1
- diffusers/schedulers/scheduling_sde_ve_flax.py +2 -2
- diffusers/schedulers/scheduling_tcd.py +3 -3
- diffusers/schedulers/scheduling_unclip.py +5 -5
- diffusers/schedulers/scheduling_unipc_multistep.py +11 -11
- diffusers/schedulers/scheduling_utils.py +1 -1
- diffusers/schedulers/scheduling_utils_flax.py +1 -1
- diffusers/schedulers/scheduling_vq_diffusion.py +1 -1
- diffusers/training_utils.py +13 -5
- diffusers/utils/__init__.py +5 -0
- diffusers/utils/accelerate_utils.py +1 -1
- diffusers/utils/doc_utils.py +1 -1
- diffusers/utils/dummy_pt_objects.py +120 -0
- diffusers/utils/dummy_torch_and_transformers_objects.py +225 -0
- diffusers/utils/dynamic_modules_utils.py +21 -3
- diffusers/utils/export_utils.py +1 -1
- diffusers/utils/import_utils.py +81 -18
- diffusers/utils/logging.py +1 -1
- diffusers/utils/outputs.py +2 -1
- diffusers/utils/peft_utils.py +91 -8
- diffusers/utils/state_dict_utils.py +20 -3
- diffusers/utils/testing_utils.py +59 -7
- diffusers/utils/torch_utils.py +25 -5
- diffusers/video_processor.py +2 -2
- {diffusers-0.33.1.dist-info → diffusers-0.34.0.dist-info}/METADATA +70 -55
- diffusers-0.34.0.dist-info/RECORD +639 -0
- {diffusers-0.33.1.dist-info → diffusers-0.34.0.dist-info}/WHEEL +1 -1
- diffusers-0.33.1.dist-info/RECORD +0 -608
- {diffusers-0.33.1.dist-info → diffusers-0.34.0.dist-info}/LICENSE +0 -0
- {diffusers-0.33.1.dist-info → diffusers-0.34.0.dist-info}/entry_points.txt +0 -0
- {diffusers-0.33.1.dist-info → diffusers-0.34.0.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 UC Berkeley Team and The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -61,7 +61,7 @@ class FlaxDDPMScheduler(FlaxSchedulerMixin, ConfigMixin):
|
|
61
61
|
[`SchedulerMixin`] provides general loading and saving functionality via the [`SchedulerMixin.save_pretrained`] and
|
62
62
|
[`~SchedulerMixin.from_pretrained`] functions.
|
63
63
|
|
64
|
-
For more details, see the original paper: https://
|
64
|
+
For more details, see the original paper: https://huggingface.co/papers/2006.11239
|
65
65
|
|
66
66
|
Args:
|
67
67
|
num_train_timesteps (`int`): number of diffusion steps used to train the model.
|
@@ -163,7 +163,7 @@ class FlaxDDPMScheduler(FlaxSchedulerMixin, ConfigMixin):
|
|
163
163
|
alpha_prod_t = state.common.alphas_cumprod[t]
|
164
164
|
alpha_prod_t_prev = jnp.where(t > 0, state.common.alphas_cumprod[t - 1], jnp.array(1.0, dtype=self.dtype))
|
165
165
|
|
166
|
-
# For t > 0, compute predicted variance βt (see formula (6) and (7) from https://
|
166
|
+
# For t > 0, compute predicted variance βt (see formula (6) and (7) from https://huggingface.co/papers/2006.11239)
|
167
167
|
# and sample from it to get previous sample
|
168
168
|
# x_{t-1} ~ N(pred_prev_sample, variance) == add variance to pred_sample
|
169
169
|
variance = (1 - alpha_prod_t_prev) / (1 - alpha_prod_t) * state.common.betas[t]
|
@@ -174,7 +174,7 @@ class FlaxDDPMScheduler(FlaxSchedulerMixin, ConfigMixin):
|
|
174
174
|
# hacks - were probably added for training stability
|
175
175
|
if variance_type == "fixed_small":
|
176
176
|
variance = jnp.clip(variance, a_min=1e-20)
|
177
|
-
# for rl-diffuser https://
|
177
|
+
# for rl-diffuser https://huggingface.co/papers/2205.09991
|
178
178
|
elif variance_type == "fixed_small_log":
|
179
179
|
variance = jnp.log(jnp.clip(variance, a_min=1e-20))
|
180
180
|
elif variance_type == "fixed_large":
|
@@ -240,7 +240,7 @@ class FlaxDDPMScheduler(FlaxSchedulerMixin, ConfigMixin):
|
|
240
240
|
beta_prod_t_prev = 1 - alpha_prod_t_prev
|
241
241
|
|
242
242
|
# 2. compute predicted original sample from predicted noise also called
|
243
|
-
# "predicted x_0" of formula (15) from https://
|
243
|
+
# "predicted x_0" of formula (15) from https://huggingface.co/papers/2006.11239
|
244
244
|
if self.config.prediction_type == "epsilon":
|
245
245
|
pred_original_sample = (sample - beta_prod_t ** (0.5) * model_output) / alpha_prod_t ** (0.5)
|
246
246
|
elif self.config.prediction_type == "sample":
|
@@ -258,12 +258,12 @@ class FlaxDDPMScheduler(FlaxSchedulerMixin, ConfigMixin):
|
|
258
258
|
pred_original_sample = jnp.clip(pred_original_sample, -1, 1)
|
259
259
|
|
260
260
|
# 4. Compute coefficients for pred_original_sample x_0 and current sample x_t
|
261
|
-
# See formula (7) from https://
|
261
|
+
# See formula (7) from https://huggingface.co/papers/2006.11239
|
262
262
|
pred_original_sample_coeff = (alpha_prod_t_prev ** (0.5) * state.common.betas[t]) / beta_prod_t
|
263
263
|
current_sample_coeff = state.common.alphas[t] ** (0.5) * beta_prod_t_prev / beta_prod_t
|
264
264
|
|
265
265
|
# 5. Compute predicted previous sample µ_t
|
266
|
-
# See formula (7) from https://
|
266
|
+
# See formula (7) from https://huggingface.co/papers/2006.11239
|
267
267
|
pred_prev_sample = pred_original_sample_coeff * pred_original_sample + current_sample_coeff * sample
|
268
268
|
|
269
269
|
# 6. Add noise
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 ParaDiGMS authors and The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -94,7 +94,7 @@ def betas_for_alpha_bar(
|
|
94
94
|
# Copied from diffusers.schedulers.scheduling_ddim.rescale_zero_terminal_snr
|
95
95
|
def rescale_zero_terminal_snr(betas):
|
96
96
|
"""
|
97
|
-
Rescales betas to have zero terminal SNR Based on https://
|
97
|
+
Rescales betas to have zero terminal SNR Based on https://huggingface.co/papers/2305.08891 (Algorithm 1)
|
98
98
|
|
99
99
|
|
100
100
|
Args:
|
@@ -138,7 +138,7 @@ class DDPMParallelScheduler(SchedulerMixin, ConfigMixin):
|
|
138
138
|
[`SchedulerMixin`] provides general loading and saving functionality via the [`SchedulerMixin.save_pretrained`] and
|
139
139
|
[`~SchedulerMixin.from_pretrained`] functions.
|
140
140
|
|
141
|
-
For more details, see the original paper: https://
|
141
|
+
For more details, see the original paper: https://huggingface.co/papers/2006.11239
|
142
142
|
|
143
143
|
Args:
|
144
144
|
num_train_timesteps (`int`): number of diffusion steps used to train the model.
|
@@ -161,17 +161,17 @@ class DDPMParallelScheduler(SchedulerMixin, ConfigMixin):
|
|
161
161
|
process), `sample` (directly predicting the noisy sample`) or `v_prediction` (see section 2.4
|
162
162
|
https://imagen.research.google/video/paper.pdf)
|
163
163
|
thresholding (`bool`, default `False`):
|
164
|
-
whether to use the "dynamic thresholding" method (introduced by Imagen,
|
165
|
-
Note that the thresholding method is unsuitable for latent-space
|
166
|
-
stable-diffusion).
|
164
|
+
whether to use the "dynamic thresholding" method (introduced by Imagen,
|
165
|
+
https://huggingface.co/papers/2205.11487). Note that the thresholding method is unsuitable for latent-space
|
166
|
+
diffusion models (such as stable-diffusion).
|
167
167
|
dynamic_thresholding_ratio (`float`, default `0.995`):
|
168
168
|
the ratio for the dynamic thresholding method. Default is `0.995`, the same as Imagen
|
169
|
-
(https://
|
169
|
+
(https://huggingface.co/papers/2205.11487). Valid only when `thresholding=True`.
|
170
170
|
sample_max_value (`float`, default `1.0`):
|
171
171
|
the threshold value for dynamic thresholding. Valid only when `thresholding=True`.
|
172
172
|
timestep_spacing (`str`, default `"leading"`):
|
173
173
|
The way the timesteps should be scaled. Refer to Table 2. of [Common Diffusion Noise Schedules and Sample
|
174
|
-
Steps are Flawed](https://
|
174
|
+
Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information.
|
175
175
|
steps_offset (`int`, default `0`):
|
176
176
|
An offset added to the inference steps, as required by some model families.
|
177
177
|
rescale_betas_zero_snr (`bool`, defaults to `False`):
|
@@ -305,7 +305,7 @@ class DDPMParallelScheduler(SchedulerMixin, ConfigMixin):
|
|
305
305
|
self.num_inference_steps = num_inference_steps
|
306
306
|
self.custom_timesteps = False
|
307
307
|
|
308
|
-
# "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://
|
308
|
+
# "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://huggingface.co/papers/2305.08891
|
309
309
|
if self.config.timestep_spacing == "linspace":
|
310
310
|
timesteps = (
|
311
311
|
np.linspace(0, self.config.num_train_timesteps - 1, num_inference_steps)
|
@@ -340,7 +340,7 @@ class DDPMParallelScheduler(SchedulerMixin, ConfigMixin):
|
|
340
340
|
alpha_prod_t_prev = self.alphas_cumprod[prev_t] if prev_t >= 0 else self.one
|
341
341
|
current_beta_t = 1 - alpha_prod_t / alpha_prod_t_prev
|
342
342
|
|
343
|
-
# For t > 0, compute predicted variance βt (see formula (6) and (7) from https://
|
343
|
+
# For t > 0, compute predicted variance βt (see formula (6) and (7) from https://huggingface.co/papers/2006.11239)
|
344
344
|
# and sample from it to get previous sample
|
345
345
|
# x_{t-1} ~ N(pred_prev_sample, variance) == add variance to pred_sample
|
346
346
|
variance = (1 - alpha_prod_t_prev) / (1 - alpha_prod_t) * current_beta_t
|
@@ -354,7 +354,7 @@ class DDPMParallelScheduler(SchedulerMixin, ConfigMixin):
|
|
354
354
|
# hacks - were probably added for training stability
|
355
355
|
if variance_type == "fixed_small":
|
356
356
|
variance = variance
|
357
|
-
# for rl-diffuser https://
|
357
|
+
# for rl-diffuser https://huggingface.co/papers/2205.09991
|
358
358
|
elif variance_type == "fixed_small_log":
|
359
359
|
variance = torch.log(variance)
|
360
360
|
variance = torch.exp(0.5 * variance)
|
@@ -382,7 +382,7 @@ class DDPMParallelScheduler(SchedulerMixin, ConfigMixin):
|
|
382
382
|
pixels from saturation at each step. We find that dynamic thresholding results in significantly better
|
383
383
|
photorealism as well as better image-text alignment, especially when using very large guidance weights."
|
384
384
|
|
385
|
-
https://
|
385
|
+
https://huggingface.co/papers/2205.11487
|
386
386
|
"""
|
387
387
|
dtype = sample.dtype
|
388
388
|
batch_size, channels, *remaining_dims = sample.shape
|
@@ -451,7 +451,7 @@ class DDPMParallelScheduler(SchedulerMixin, ConfigMixin):
|
|
451
451
|
current_beta_t = 1 - current_alpha_t
|
452
452
|
|
453
453
|
# 2. compute predicted original sample from predicted noise also called
|
454
|
-
# "predicted x_0" of formula (15) from https://
|
454
|
+
# "predicted x_0" of formula (15) from https://huggingface.co/papers/2006.11239
|
455
455
|
if self.config.prediction_type == "epsilon":
|
456
456
|
pred_original_sample = (sample - beta_prod_t ** (0.5) * model_output) / alpha_prod_t ** (0.5)
|
457
457
|
elif self.config.prediction_type == "sample":
|
@@ -473,12 +473,12 @@ class DDPMParallelScheduler(SchedulerMixin, ConfigMixin):
|
|
473
473
|
)
|
474
474
|
|
475
475
|
# 4. Compute coefficients for pred_original_sample x_0 and current sample x_t
|
476
|
-
# See formula (7) from https://
|
476
|
+
# See formula (7) from https://huggingface.co/papers/2006.11239
|
477
477
|
pred_original_sample_coeff = (alpha_prod_t_prev ** (0.5) * current_beta_t) / beta_prod_t
|
478
478
|
current_sample_coeff = current_alpha_t ** (0.5) * beta_prod_t_prev / beta_prod_t
|
479
479
|
|
480
480
|
# 5. Compute predicted previous sample µ_t
|
481
|
-
# See formula (7) from https://
|
481
|
+
# See formula (7) from https://huggingface.co/papers/2006.11239
|
482
482
|
pred_prev_sample = pred_original_sample_coeff * pred_original_sample + current_sample_coeff * sample
|
483
483
|
|
484
484
|
# 6. Add noise
|
@@ -554,7 +554,7 @@ class DDPMParallelScheduler(SchedulerMixin, ConfigMixin):
|
|
554
554
|
current_beta_t = 1 - current_alpha_t
|
555
555
|
|
556
556
|
# 2. compute predicted original sample from predicted noise also called
|
557
|
-
# "predicted x_0" of formula (15) from https://
|
557
|
+
# "predicted x_0" of formula (15) from https://huggingface.co/papers/2006.11239
|
558
558
|
if self.config.prediction_type == "epsilon":
|
559
559
|
pred_original_sample = (sample - beta_prod_t ** (0.5) * model_output) / alpha_prod_t ** (0.5)
|
560
560
|
elif self.config.prediction_type == "sample":
|
@@ -576,12 +576,12 @@ class DDPMParallelScheduler(SchedulerMixin, ConfigMixin):
|
|
576
576
|
)
|
577
577
|
|
578
578
|
# 4. Compute coefficients for pred_original_sample x_0 and current sample x_t
|
579
|
-
# See formula (7) from https://
|
579
|
+
# See formula (7) from https://huggingface.co/papers/2006.11239
|
580
580
|
pred_original_sample_coeff = (alpha_prod_t_prev ** (0.5) * current_beta_t) / beta_prod_t
|
581
581
|
current_sample_coeff = current_alpha_t ** (0.5) * beta_prod_t_prev / beta_prod_t
|
582
582
|
|
583
583
|
# 5. Compute predicted previous sample µ_t
|
584
|
-
# See formula (7) from https://
|
584
|
+
# See formula (7) from https://huggingface.co/papers/2006.11239
|
585
585
|
pred_prev_sample = pred_original_sample_coeff * pred_original_sample + current_sample_coeff * sample
|
586
586
|
|
587
587
|
return pred_prev_sample
|
@@ -1,5 +1,5 @@
|
|
1
1
|
# Copyright (c) 2022 Pablo Pernías MIT License
|
2
|
-
# Copyright
|
2
|
+
# Copyright 2025 UC Berkeley Team and The HuggingFace Team. All rights reserved.
|
3
3
|
#
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
5
5
|
# you may not use this file except in compliance with the License.
|
@@ -95,7 +95,7 @@ class DDPMWuerstchenScheduler(SchedulerMixin, ConfigMixin):
|
|
95
95
|
[`SchedulerMixin`] provides general loading and saving functionality via the [`SchedulerMixin.save_pretrained`] and
|
96
96
|
[`~SchedulerMixin.from_pretrained`] functions.
|
97
97
|
|
98
|
-
For more details, see the original paper: https://
|
98
|
+
For more details, see the original paper: https://huggingface.co/papers/2006.11239
|
99
99
|
|
100
100
|
Args:
|
101
101
|
scaler (`float`): ....
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 FLAIR Lab and The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -12,7 +12,7 @@
|
|
12
12
|
# See the License for the specific language governing permissions and
|
13
13
|
# limitations under the License.
|
14
14
|
|
15
|
-
# DISCLAIMER: check https://
|
15
|
+
# DISCLAIMER: check https://huggingface.co/papers/2204.13902 and https://github.com/qsh-zh/deis for more info
|
16
16
|
# The codebase is modified based on https://github.com/huggingface/diffusers/blob/main/src/diffusers/schedulers/scheduling_dpmsolver_multistep.py
|
17
17
|
|
18
18
|
import math
|
@@ -242,7 +242,7 @@ class DEISMultistepScheduler(SchedulerMixin, ConfigMixin):
|
|
242
242
|
device (`str` or `torch.device`, *optional*):
|
243
243
|
The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
|
244
244
|
"""
|
245
|
-
# "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://
|
245
|
+
# "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://huggingface.co/papers/2305.08891
|
246
246
|
if self.config.timestep_spacing == "linspace":
|
247
247
|
timesteps = (
|
248
248
|
np.linspace(0, self.config.num_train_timesteps - 1, num_inference_steps + 1)
|
@@ -319,7 +319,7 @@ class DEISMultistepScheduler(SchedulerMixin, ConfigMixin):
|
|
319
319
|
pixels from saturation at each step. We find that dynamic thresholding results in significantly better
|
320
320
|
photorealism as well as better image-text alignment, especially when using very large guidance weights."
|
321
321
|
|
322
|
-
https://
|
322
|
+
https://huggingface.co/papers/2205.11487
|
323
323
|
"""
|
324
324
|
dtype = sample.dtype
|
325
325
|
batch_size, channels, *remaining_dims = sample.shape
|
@@ -486,7 +486,7 @@ class DEISMultistepScheduler(SchedulerMixin, ConfigMixin):
|
|
486
486
|
if len(args) > 1:
|
487
487
|
sample = args[1]
|
488
488
|
else:
|
489
|
-
raise ValueError("missing `sample` as a required
|
489
|
+
raise ValueError("missing `sample` as a required keyword argument")
|
490
490
|
if timestep is not None:
|
491
491
|
deprecate(
|
492
492
|
"timesteps",
|
@@ -549,7 +549,7 @@ class DEISMultistepScheduler(SchedulerMixin, ConfigMixin):
|
|
549
549
|
if len(args) > 2:
|
550
550
|
sample = args[2]
|
551
551
|
else:
|
552
|
-
raise ValueError("
|
552
|
+
raise ValueError("missing `sample` as a required keyword argument")
|
553
553
|
if timestep is not None:
|
554
554
|
deprecate(
|
555
555
|
"timesteps",
|
@@ -603,7 +603,7 @@ class DEISMultistepScheduler(SchedulerMixin, ConfigMixin):
|
|
603
603
|
if len(args) > 2:
|
604
604
|
sample = args[2]
|
605
605
|
else:
|
606
|
-
raise ValueError("
|
606
|
+
raise ValueError("missing `sample` as a required keyword argument")
|
607
607
|
if timestep_list is not None:
|
608
608
|
deprecate(
|
609
609
|
"timestep_list",
|
@@ -673,7 +673,7 @@ class DEISMultistepScheduler(SchedulerMixin, ConfigMixin):
|
|
673
673
|
if len(args) > 2:
|
674
674
|
sample = args[2]
|
675
675
|
else:
|
676
|
-
raise ValueError("
|
676
|
+
raise ValueError("missing `sample` as a required keyword argument")
|
677
677
|
if timestep_list is not None:
|
678
678
|
deprecate(
|
679
679
|
"timestep_list",
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 The CogVideoX team, Tsinghua University & ZhipuAI and The HuggingFace Team.
|
2
2
|
# All rights reserved.
|
3
3
|
#
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
@@ -95,7 +95,7 @@ def betas_for_alpha_bar(
|
|
95
95
|
|
96
96
|
def rescale_zero_terminal_snr(alphas_cumprod):
|
97
97
|
"""
|
98
|
-
Rescales betas to have zero terminal SNR Based on https://
|
98
|
+
Rescales betas to have zero terminal SNR Based on https://huggingface.co/papers/2305.08891 (Algorithm 1)
|
99
99
|
|
100
100
|
|
101
101
|
Args:
|
@@ -276,7 +276,7 @@ class CogVideoXDPMScheduler(SchedulerMixin, ConfigMixin):
|
|
276
276
|
|
277
277
|
self.num_inference_steps = num_inference_steps
|
278
278
|
|
279
|
-
# "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://
|
279
|
+
# "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://huggingface.co/papers/2305.08891
|
280
280
|
if self.config.timestep_spacing == "linspace":
|
281
281
|
timesteps = (
|
282
282
|
np.linspace(0, self.config.num_train_timesteps - 1, num_inference_steps)
|
@@ -377,7 +377,7 @@ class CogVideoXDPMScheduler(SchedulerMixin, ConfigMixin):
|
|
377
377
|
"Number of inference steps is 'None', you need to run 'set_timesteps' after creating the scheduler"
|
378
378
|
)
|
379
379
|
|
380
|
-
# See formulas (12) and (16) of DDIM paper https://
|
380
|
+
# See formulas (12) and (16) of DDIM paper https://huggingface.co/papers/2010.02502
|
381
381
|
# Ideally, read DDIM paper in-detail understanding
|
382
382
|
|
383
383
|
# Notation (<variable name> -> <name in paper>
|
@@ -399,7 +399,7 @@ class CogVideoXDPMScheduler(SchedulerMixin, ConfigMixin):
|
|
399
399
|
beta_prod_t = 1 - alpha_prod_t
|
400
400
|
|
401
401
|
# 3. compute predicted original sample from predicted noise also called
|
402
|
-
# "predicted x_0" of formula (12) from https://
|
402
|
+
# "predicted x_0" of formula (12) from https://huggingface.co/papers/2010.02502
|
403
403
|
# To make style tests pass, commented out `pred_epsilon` as it is an unused variable
|
404
404
|
if self.config.prediction_type == "epsilon":
|
405
405
|
pred_original_sample = (sample - beta_prod_t ** (0.5) * model_output) / alpha_prod_t ** (0.5)
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 TSAIL Team and The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -78,7 +78,7 @@ def betas_for_alpha_bar(
|
|
78
78
|
# Copied from diffusers.schedulers.scheduling_ddim.rescale_zero_terminal_snr
|
79
79
|
def rescale_zero_terminal_snr(betas):
|
80
80
|
"""
|
81
|
-
Rescales betas to have zero terminal SNR Based on https://
|
81
|
+
Rescales betas to have zero terminal SNR Based on https://huggingface.co/papers/2305.08891 (Algorithm 1)
|
82
82
|
|
83
83
|
|
84
84
|
Args:
|
@@ -366,7 +366,7 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
|
|
366
366
|
clipped_idx = torch.searchsorted(torch.flip(self.lambda_t, [0]), self.config.lambda_min_clipped)
|
367
367
|
last_timestep = ((self.config.num_train_timesteps - clipped_idx).numpy()).item()
|
368
368
|
|
369
|
-
# "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://
|
369
|
+
# "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://huggingface.co/papers/2305.08891
|
370
370
|
if self.config.timestep_spacing == "linspace":
|
371
371
|
timesteps = (
|
372
372
|
np.linspace(0, last_timestep - 1, num_inference_steps + 1)
|
@@ -460,7 +460,7 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
|
|
460
460
|
pixels from saturation at each step. We find that dynamic thresholding results in significantly better
|
461
461
|
photorealism as well as better image-text alignment, especially when using very large guidance weights."
|
462
462
|
|
463
|
-
https://
|
463
|
+
https://huggingface.co/papers/2205.11487
|
464
464
|
"""
|
465
465
|
dtype = sample.dtype
|
466
466
|
batch_size, channels, *remaining_dims = sample.shape
|
@@ -646,7 +646,7 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
|
|
646
646
|
if len(args) > 1:
|
647
647
|
sample = args[1]
|
648
648
|
else:
|
649
|
-
raise ValueError("missing `sample` as a required
|
649
|
+
raise ValueError("missing `sample` as a required keyword argument")
|
650
650
|
if timestep is not None:
|
651
651
|
deprecate(
|
652
652
|
"timesteps",
|
@@ -741,7 +741,7 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
|
|
741
741
|
if len(args) > 2:
|
742
742
|
sample = args[2]
|
743
743
|
else:
|
744
|
-
raise ValueError("
|
744
|
+
raise ValueError("missing `sample` as a required keyword argument")
|
745
745
|
if timestep is not None:
|
746
746
|
deprecate(
|
747
747
|
"timesteps",
|
@@ -810,7 +810,7 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
|
|
810
810
|
if len(args) > 2:
|
811
811
|
sample = args[2]
|
812
812
|
else:
|
813
|
-
raise ValueError("
|
813
|
+
raise ValueError("missing `sample` as a required keyword argument")
|
814
814
|
if timestep_list is not None:
|
815
815
|
deprecate(
|
816
816
|
"timestep_list",
|
@@ -845,7 +845,7 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
|
|
845
845
|
r0 = h_0 / h
|
846
846
|
D0, D1 = m0, (1.0 / r0) * (m0 - m1)
|
847
847
|
if self.config.algorithm_type == "dpmsolver++":
|
848
|
-
# See https://
|
848
|
+
# See https://huggingface.co/papers/2211.01095 for detailed derivations
|
849
849
|
if self.config.solver_type == "midpoint":
|
850
850
|
x_t = (
|
851
851
|
(sigma_t / sigma_s0) * sample
|
@@ -859,7 +859,7 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
|
|
859
859
|
+ (alpha_t * ((torch.exp(-h) - 1.0) / h + 1.0)) * D1
|
860
860
|
)
|
861
861
|
elif self.config.algorithm_type == "dpmsolver":
|
862
|
-
# See https://
|
862
|
+
# See https://huggingface.co/papers/2206.00927 for detailed derivations
|
863
863
|
if self.config.solver_type == "midpoint":
|
864
864
|
x_t = (
|
865
865
|
(alpha_t / alpha_s0) * sample
|
@@ -934,7 +934,7 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
|
|
934
934
|
if len(args) > 2:
|
935
935
|
sample = args[2]
|
936
936
|
else:
|
937
|
-
raise ValueError("
|
937
|
+
raise ValueError("missing `sample` as a required keyword argument")
|
938
938
|
if timestep_list is not None:
|
939
939
|
deprecate(
|
940
940
|
"timestep_list",
|
@@ -975,7 +975,7 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
|
|
975
975
|
D1 = D1_0 + (r0 / (r0 + r1)) * (D1_0 - D1_1)
|
976
976
|
D2 = (1.0 / (r0 + r1)) * (D1_0 - D1_1)
|
977
977
|
if self.config.algorithm_type == "dpmsolver++":
|
978
|
-
# See https://
|
978
|
+
# See https://huggingface.co/papers/2206.00927 for detailed derivations
|
979
979
|
x_t = (
|
980
980
|
(sigma_t / sigma_s0) * sample
|
981
981
|
- (alpha_t * (torch.exp(-h) - 1.0)) * D0
|
@@ -983,7 +983,7 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
|
|
983
983
|
- (alpha_t * ((torch.exp(-h) - 1.0 + h) / h**2 - 0.5)) * D2
|
984
984
|
)
|
985
985
|
elif self.config.algorithm_type == "dpmsolver":
|
986
|
-
# See https://
|
986
|
+
# See https://huggingface.co/papers/2206.00927 for detailed derivations
|
987
987
|
x_t = (
|
988
988
|
(alpha_t / alpha_s0) * sample
|
989
989
|
- (sigma_t * (torch.exp(h) - 1.0)) * D0
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 TSAIL Team and The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -80,14 +80,15 @@ class FlaxDPMSolverMultistepScheduler(FlaxSchedulerMixin, ConfigMixin):
|
|
80
80
|
the convergence order guarantee. Empirically, sampling by DPM-Solver with only 20 steps can generate high-quality
|
81
81
|
samples, and it can generate quite good samples even in only 10 steps.
|
82
82
|
|
83
|
-
For more details, see the original paper: https://
|
83
|
+
For more details, see the original paper: https://huggingface.co/papers/2206.00927 and
|
84
|
+
https://huggingface.co/papers/2211.01095
|
84
85
|
|
85
86
|
Currently, we support the multistep DPM-Solver for both noise prediction models and data prediction models. We
|
86
87
|
recommend to use `solver_order=2` for guided sampling, and `solver_order=3` for unconditional sampling.
|
87
88
|
|
88
|
-
We also support the "dynamic thresholding" method in Imagen (https://
|
89
|
-
diffusion models, you can set both `algorithm_type="dpmsolver++"` and `thresholding=True` to use the
|
90
|
-
thresholding. Note that the thresholding method is unsuitable for latent-space diffusion models (such as
|
89
|
+
We also support the "dynamic thresholding" method in Imagen (https://huggingface.co/papers/2205.11487). For
|
90
|
+
pixel-space diffusion models, you can set both `algorithm_type="dpmsolver++"` and `thresholding=True` to use the
|
91
|
+
dynamic thresholding. Note that the thresholding method is unsuitable for latent-space diffusion models (such as
|
91
92
|
stable-diffusion).
|
92
93
|
|
93
94
|
[`~ConfigMixin`] takes care of storing all config attributes that are passed in the scheduler's `__init__`
|
@@ -95,7 +96,8 @@ class FlaxDPMSolverMultistepScheduler(FlaxSchedulerMixin, ConfigMixin):
|
|
95
96
|
[`SchedulerMixin`] provides general loading and saving functionality via the [`SchedulerMixin.save_pretrained`] and
|
96
97
|
[`~SchedulerMixin.from_pretrained`] functions.
|
97
98
|
|
98
|
-
For more details, see the original paper: https://
|
99
|
+
For more details, see the original paper: https://huggingface.co/papers/2206.00927 and
|
100
|
+
https://huggingface.co/papers/2211.01095
|
99
101
|
|
100
102
|
Args:
|
101
103
|
num_train_timesteps (`int`): number of diffusion steps used to train the model.
|
@@ -113,21 +115,21 @@ class FlaxDPMSolverMultistepScheduler(FlaxSchedulerMixin, ConfigMixin):
|
|
113
115
|
indicates whether the model predicts the noise (epsilon), or the data / `x0`. One of `epsilon`, `sample`,
|
114
116
|
or `v-prediction`.
|
115
117
|
thresholding (`bool`, default `False`):
|
116
|
-
whether to use the "dynamic thresholding" method (introduced by Imagen,
|
117
|
-
For pixel-space diffusion models, you can set both
|
118
|
-
use the dynamic thresholding. Note that the
|
119
|
-
models (such as stable-diffusion).
|
118
|
+
whether to use the "dynamic thresholding" method (introduced by Imagen,
|
119
|
+
https://huggingface.co/papers/2205.11487). For pixel-space diffusion models, you can set both
|
120
|
+
`algorithm_type=dpmsolver++` and `thresholding=True` to use the dynamic thresholding. Note that the
|
121
|
+
thresholding method is unsuitable for latent-space diffusion models (such as stable-diffusion).
|
120
122
|
dynamic_thresholding_ratio (`float`, default `0.995`):
|
121
123
|
the ratio for the dynamic thresholding method. Default is `0.995`, the same as Imagen
|
122
|
-
(https://
|
124
|
+
(https://huggingface.co/papers/2205.11487).
|
123
125
|
sample_max_value (`float`, default `1.0`):
|
124
126
|
the threshold value for dynamic thresholding. Valid only when `thresholding=True` and
|
125
127
|
`algorithm_type="dpmsolver++`.
|
126
128
|
algorithm_type (`str`, default `dpmsolver++`):
|
127
129
|
the algorithm type for the solver. Either `dpmsolver` or `dpmsolver++`. The `dpmsolver` type implements the
|
128
|
-
algorithms in https://
|
129
|
-
https://
|
130
|
-
sampling (e.g. stable-diffusion).
|
130
|
+
algorithms in https://huggingface.co/papers/2206.00927, and the `dpmsolver++` type implements the
|
131
|
+
algorithms in https://huggingface.co/papers/2211.01095. We recommend to use `dpmsolver++` with
|
132
|
+
`solver_order=2` for guided sampling (e.g. stable-diffusion).
|
131
133
|
solver_type (`str`, default `midpoint`):
|
132
134
|
the solver type for the second-order solver. Either `midpoint` or `heun`. The solver type slightly affects
|
133
135
|
the sample quality, especially for small number of steps. We empirically find that `midpoint` solvers are
|
@@ -297,7 +299,7 @@ class FlaxDPMSolverMultistepScheduler(FlaxSchedulerMixin, ConfigMixin):
|
|
297
299
|
)
|
298
300
|
|
299
301
|
if self.config.thresholding:
|
300
|
-
# Dynamic thresholding in https://
|
302
|
+
# Dynamic thresholding in https://huggingface.co/papers/2205.11487
|
301
303
|
dynamic_max_val = jnp.percentile(
|
302
304
|
jnp.abs(x0_pred), self.config.dynamic_thresholding_ratio, axis=tuple(range(1, x0_pred.ndim))
|
303
305
|
)
|
@@ -335,7 +337,7 @@ class FlaxDPMSolverMultistepScheduler(FlaxSchedulerMixin, ConfigMixin):
|
|
335
337
|
"""
|
336
338
|
One step for the first-order DPM-Solver (equivalent to DDIM).
|
337
339
|
|
338
|
-
See https://
|
340
|
+
See https://huggingface.co/papers/2206.00927 for the detailed derivation.
|
339
341
|
|
340
342
|
Args:
|
341
343
|
model_output (`jnp.ndarray`): direct output from learned diffusion model.
|
@@ -390,7 +392,7 @@ class FlaxDPMSolverMultistepScheduler(FlaxSchedulerMixin, ConfigMixin):
|
|
390
392
|
r0 = h_0 / h
|
391
393
|
D0, D1 = m0, (1.0 / r0) * (m0 - m1)
|
392
394
|
if self.config.algorithm_type == "dpmsolver++":
|
393
|
-
# See https://
|
395
|
+
# See https://huggingface.co/papers/2211.01095 for detailed derivations
|
394
396
|
if self.config.solver_type == "midpoint":
|
395
397
|
x_t = (
|
396
398
|
(sigma_t / sigma_s0) * sample
|
@@ -404,7 +406,7 @@ class FlaxDPMSolverMultistepScheduler(FlaxSchedulerMixin, ConfigMixin):
|
|
404
406
|
+ (alpha_t * ((jnp.exp(-h) - 1.0) / h + 1.0)) * D1
|
405
407
|
)
|
406
408
|
elif self.config.algorithm_type == "dpmsolver":
|
407
|
-
# See https://
|
409
|
+
# See https://huggingface.co/papers/2206.00927 for detailed derivations
|
408
410
|
if self.config.solver_type == "midpoint":
|
409
411
|
x_t = (
|
410
412
|
(alpha_t / alpha_s0) * sample
|
@@ -458,7 +460,7 @@ class FlaxDPMSolverMultistepScheduler(FlaxSchedulerMixin, ConfigMixin):
|
|
458
460
|
D1 = D1_0 + (r0 / (r0 + r1)) * (D1_0 - D1_1)
|
459
461
|
D2 = (1.0 / (r0 + r1)) * (D1_0 - D1_1)
|
460
462
|
if self.config.algorithm_type == "dpmsolver++":
|
461
|
-
# See https://
|
463
|
+
# See https://huggingface.co/papers/2206.00927 for detailed derivations
|
462
464
|
x_t = (
|
463
465
|
(sigma_t / sigma_s0) * sample
|
464
466
|
- (alpha_t * (jnp.exp(-h) - 1.0)) * D0
|
@@ -466,7 +468,7 @@ class FlaxDPMSolverMultistepScheduler(FlaxSchedulerMixin, ConfigMixin):
|
|
466
468
|
- (alpha_t * ((jnp.exp(-h) - 1.0 + h) / h**2 - 0.5)) * D2
|
467
469
|
)
|
468
470
|
elif self.config.algorithm_type == "dpmsolver":
|
469
|
-
# See https://
|
471
|
+
# See https://huggingface.co/papers/2206.00927 for detailed derivations
|
470
472
|
x_t = (
|
471
473
|
(alpha_t / alpha_s0) * sample
|
472
474
|
- (sigma_t * (jnp.exp(h) - 1.0)) * D0
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 TSAIL Team and The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -257,7 +257,7 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
|
|
257
257
|
clipped_idx = torch.searchsorted(torch.flip(self.lambda_t, [0]), self.config.lambda_min_clipped).item()
|
258
258
|
self.noisiest_timestep = self.config.num_train_timesteps - 1 - clipped_idx
|
259
259
|
|
260
|
-
# "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://
|
260
|
+
# "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://huggingface.co/papers/2305.08891
|
261
261
|
if self.config.timestep_spacing == "linspace":
|
262
262
|
timesteps = (
|
263
263
|
np.linspace(0, self.noisiest_timestep, num_inference_steps + 1).round()[:-1].copy().astype(np.int64)
|
@@ -338,7 +338,7 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
|
|
338
338
|
pixels from saturation at each step. We find that dynamic thresholding results in significantly better
|
339
339
|
photorealism as well as better image-text alignment, especially when using very large guidance weights."
|
340
340
|
|
341
|
-
https://
|
341
|
+
https://huggingface.co/papers/2205.11487
|
342
342
|
"""
|
343
343
|
dtype = sample.dtype
|
344
344
|
batch_size, channels, *remaining_dims = sample.shape
|
@@ -513,7 +513,7 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
|
|
513
513
|
if len(args) > 1:
|
514
514
|
sample = args[1]
|
515
515
|
else:
|
516
|
-
raise ValueError("missing `sample` as a required
|
516
|
+
raise ValueError("missing `sample` as a required keyword argument")
|
517
517
|
if timestep is not None:
|
518
518
|
deprecate(
|
519
519
|
"timesteps",
|
@@ -609,7 +609,7 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
|
|
609
609
|
if len(args) > 2:
|
610
610
|
sample = args[2]
|
611
611
|
else:
|
612
|
-
raise ValueError("
|
612
|
+
raise ValueError("missing `sample` as a required keyword argument")
|
613
613
|
if timestep is not None:
|
614
614
|
deprecate(
|
615
615
|
"timesteps",
|
@@ -679,7 +679,7 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
|
|
679
679
|
if len(args) > 2:
|
680
680
|
sample = args[2]
|
681
681
|
else:
|
682
|
-
raise ValueError("
|
682
|
+
raise ValueError("missing `sample` as a required keyword argument")
|
683
683
|
if timestep_list is not None:
|
684
684
|
deprecate(
|
685
685
|
"timestep_list",
|
@@ -714,7 +714,7 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
|
|
714
714
|
r0 = h_0 / h
|
715
715
|
D0, D1 = m0, (1.0 / r0) * (m0 - m1)
|
716
716
|
if self.config.algorithm_type == "dpmsolver++":
|
717
|
-
# See https://
|
717
|
+
# See https://huggingface.co/papers/2211.01095 for detailed derivations
|
718
718
|
if self.config.solver_type == "midpoint":
|
719
719
|
x_t = (
|
720
720
|
(sigma_t / sigma_s0) * sample
|
@@ -728,7 +728,7 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
|
|
728
728
|
+ (alpha_t * ((torch.exp(-h) - 1.0) / h + 1.0)) * D1
|
729
729
|
)
|
730
730
|
elif self.config.algorithm_type == "dpmsolver":
|
731
|
-
# See https://
|
731
|
+
# See https://huggingface.co/papers/2206.00927 for detailed derivations
|
732
732
|
if self.config.solver_type == "midpoint":
|
733
733
|
x_t = (
|
734
734
|
(alpha_t / alpha_s0) * sample
|
@@ -804,7 +804,7 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
|
|
804
804
|
if len(args) > 2:
|
805
805
|
sample = args[2]
|
806
806
|
else:
|
807
|
-
raise ValueError("
|
807
|
+
raise ValueError("missing `sample` as a required keyword argument")
|
808
808
|
if timestep_list is not None:
|
809
809
|
deprecate(
|
810
810
|
"timestep_list",
|
@@ -845,7 +845,7 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
|
|
845
845
|
D1 = D1_0 + (r0 / (r0 + r1)) * (D1_0 - D1_1)
|
846
846
|
D2 = (1.0 / (r0 + r1)) * (D1_0 - D1_1)
|
847
847
|
if self.config.algorithm_type == "dpmsolver++":
|
848
|
-
# See https://
|
848
|
+
# See https://huggingface.co/papers/2206.00927 for detailed derivations
|
849
849
|
x_t = (
|
850
850
|
(sigma_t / sigma_s0) * sample
|
851
851
|
- (alpha_t * (torch.exp(-h) - 1.0)) * D0
|
@@ -853,7 +853,7 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
|
|
853
853
|
- (alpha_t * ((torch.exp(-h) - 1.0 + h) / h**2 - 0.5)) * D2
|
854
854
|
)
|
855
855
|
elif self.config.algorithm_type == "dpmsolver":
|
856
|
-
# See https://
|
856
|
+
# See https://huggingface.co/papers/2206.00927 for detailed derivations
|
857
857
|
x_t = (
|
858
858
|
(alpha_t / alpha_s0) * sample
|
859
859
|
- (sigma_t * (torch.exp(h) - 1.0)) * D0
|