diffusers 0.33.1__py3-none-any.whl → 0.35.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diffusers/__init__.py +145 -1
- diffusers/callbacks.py +35 -0
- diffusers/commands/__init__.py +1 -1
- diffusers/commands/custom_blocks.py +134 -0
- diffusers/commands/diffusers_cli.py +3 -1
- diffusers/commands/env.py +1 -1
- diffusers/commands/fp16_safetensors.py +2 -2
- diffusers/configuration_utils.py +11 -2
- diffusers/dependency_versions_check.py +1 -1
- diffusers/dependency_versions_table.py +3 -3
- diffusers/experimental/rl/value_guided_sampling.py +1 -1
- diffusers/guiders/__init__.py +41 -0
- diffusers/guiders/adaptive_projected_guidance.py +188 -0
- diffusers/guiders/auto_guidance.py +190 -0
- diffusers/guiders/classifier_free_guidance.py +141 -0
- diffusers/guiders/classifier_free_zero_star_guidance.py +152 -0
- diffusers/guiders/frequency_decoupled_guidance.py +327 -0
- diffusers/guiders/guider_utils.py +309 -0
- diffusers/guiders/perturbed_attention_guidance.py +271 -0
- diffusers/guiders/skip_layer_guidance.py +262 -0
- diffusers/guiders/smoothed_energy_guidance.py +251 -0
- diffusers/guiders/tangential_classifier_free_guidance.py +143 -0
- diffusers/hooks/__init__.py +17 -0
- diffusers/hooks/_common.py +56 -0
- diffusers/hooks/_helpers.py +293 -0
- diffusers/hooks/faster_cache.py +9 -8
- diffusers/hooks/first_block_cache.py +259 -0
- diffusers/hooks/group_offloading.py +332 -227
- diffusers/hooks/hooks.py +58 -3
- diffusers/hooks/layer_skip.py +263 -0
- diffusers/hooks/layerwise_casting.py +5 -10
- diffusers/hooks/pyramid_attention_broadcast.py +15 -12
- diffusers/hooks/smoothed_energy_guidance_utils.py +167 -0
- diffusers/hooks/utils.py +43 -0
- diffusers/image_processor.py +7 -2
- diffusers/loaders/__init__.py +10 -0
- diffusers/loaders/ip_adapter.py +260 -18
- diffusers/loaders/lora_base.py +261 -127
- diffusers/loaders/lora_conversion_utils.py +657 -35
- diffusers/loaders/lora_pipeline.py +2778 -1246
- diffusers/loaders/peft.py +78 -112
- diffusers/loaders/single_file.py +2 -2
- diffusers/loaders/single_file_model.py +64 -15
- diffusers/loaders/single_file_utils.py +395 -7
- diffusers/loaders/textual_inversion.py +3 -2
- diffusers/loaders/transformer_flux.py +10 -11
- diffusers/loaders/transformer_sd3.py +8 -3
- diffusers/loaders/unet.py +24 -21
- diffusers/loaders/unet_loader_utils.py +6 -3
- diffusers/loaders/utils.py +1 -1
- diffusers/models/__init__.py +23 -1
- diffusers/models/activations.py +5 -5
- diffusers/models/adapter.py +2 -3
- diffusers/models/attention.py +488 -7
- diffusers/models/attention_dispatch.py +1218 -0
- diffusers/models/attention_flax.py +10 -10
- diffusers/models/attention_processor.py +113 -667
- diffusers/models/auto_model.py +49 -12
- diffusers/models/autoencoders/__init__.py +2 -0
- diffusers/models/autoencoders/autoencoder_asym_kl.py +4 -4
- diffusers/models/autoencoders/autoencoder_dc.py +17 -4
- diffusers/models/autoencoders/autoencoder_kl.py +5 -5
- diffusers/models/autoencoders/autoencoder_kl_allegro.py +4 -4
- diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +6 -6
- diffusers/models/autoencoders/autoencoder_kl_cosmos.py +1110 -0
- diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py +2 -2
- diffusers/models/autoencoders/autoencoder_kl_ltx.py +3 -3
- diffusers/models/autoencoders/autoencoder_kl_magvit.py +4 -4
- diffusers/models/autoencoders/autoencoder_kl_mochi.py +3 -3
- diffusers/models/autoencoders/autoencoder_kl_qwenimage.py +1070 -0
- diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +4 -4
- diffusers/models/autoencoders/autoencoder_kl_wan.py +626 -62
- diffusers/models/autoencoders/autoencoder_oobleck.py +1 -1
- diffusers/models/autoencoders/autoencoder_tiny.py +3 -3
- diffusers/models/autoencoders/consistency_decoder_vae.py +1 -1
- diffusers/models/autoencoders/vae.py +13 -2
- diffusers/models/autoencoders/vq_model.py +2 -2
- diffusers/models/cache_utils.py +32 -10
- diffusers/models/controlnet.py +1 -1
- diffusers/models/controlnet_flux.py +1 -1
- diffusers/models/controlnet_sd3.py +1 -1
- diffusers/models/controlnet_sparsectrl.py +1 -1
- diffusers/models/controlnets/__init__.py +1 -0
- diffusers/models/controlnets/controlnet.py +3 -3
- diffusers/models/controlnets/controlnet_flax.py +1 -1
- diffusers/models/controlnets/controlnet_flux.py +21 -20
- diffusers/models/controlnets/controlnet_hunyuan.py +2 -2
- diffusers/models/controlnets/controlnet_sana.py +290 -0
- diffusers/models/controlnets/controlnet_sd3.py +1 -1
- diffusers/models/controlnets/controlnet_sparsectrl.py +2 -2
- diffusers/models/controlnets/controlnet_union.py +5 -5
- diffusers/models/controlnets/controlnet_xs.py +7 -7
- diffusers/models/controlnets/multicontrolnet.py +4 -5
- diffusers/models/controlnets/multicontrolnet_union.py +5 -6
- diffusers/models/downsampling.py +2 -2
- diffusers/models/embeddings.py +36 -46
- diffusers/models/embeddings_flax.py +2 -2
- diffusers/models/lora.py +3 -3
- diffusers/models/model_loading_utils.py +233 -1
- diffusers/models/modeling_flax_utils.py +1 -2
- diffusers/models/modeling_utils.py +203 -108
- diffusers/models/normalization.py +4 -4
- diffusers/models/resnet.py +2 -2
- diffusers/models/resnet_flax.py +1 -1
- diffusers/models/transformers/__init__.py +7 -0
- diffusers/models/transformers/auraflow_transformer_2d.py +70 -24
- diffusers/models/transformers/cogvideox_transformer_3d.py +1 -1
- diffusers/models/transformers/consisid_transformer_3d.py +1 -1
- diffusers/models/transformers/dit_transformer_2d.py +2 -2
- diffusers/models/transformers/dual_transformer_2d.py +1 -1
- diffusers/models/transformers/hunyuan_transformer_2d.py +2 -2
- diffusers/models/transformers/latte_transformer_3d.py +4 -5
- diffusers/models/transformers/lumina_nextdit2d.py +2 -2
- diffusers/models/transformers/pixart_transformer_2d.py +3 -3
- diffusers/models/transformers/prior_transformer.py +1 -1
- diffusers/models/transformers/sana_transformer.py +8 -3
- diffusers/models/transformers/stable_audio_transformer.py +5 -9
- diffusers/models/transformers/t5_film_transformer.py +3 -3
- diffusers/models/transformers/transformer_2d.py +1 -1
- diffusers/models/transformers/transformer_allegro.py +1 -1
- diffusers/models/transformers/transformer_chroma.py +641 -0
- diffusers/models/transformers/transformer_cogview3plus.py +5 -10
- diffusers/models/transformers/transformer_cogview4.py +353 -27
- diffusers/models/transformers/transformer_cosmos.py +586 -0
- diffusers/models/transformers/transformer_flux.py +376 -138
- diffusers/models/transformers/transformer_hidream_image.py +942 -0
- diffusers/models/transformers/transformer_hunyuan_video.py +12 -8
- diffusers/models/transformers/transformer_hunyuan_video_framepack.py +416 -0
- diffusers/models/transformers/transformer_ltx.py +105 -24
- diffusers/models/transformers/transformer_lumina2.py +1 -1
- diffusers/models/transformers/transformer_mochi.py +1 -1
- diffusers/models/transformers/transformer_omnigen.py +2 -2
- diffusers/models/transformers/transformer_qwenimage.py +645 -0
- diffusers/models/transformers/transformer_sd3.py +7 -7
- diffusers/models/transformers/transformer_skyreels_v2.py +607 -0
- diffusers/models/transformers/transformer_temporal.py +1 -1
- diffusers/models/transformers/transformer_wan.py +316 -87
- diffusers/models/transformers/transformer_wan_vace.py +387 -0
- diffusers/models/unets/unet_1d.py +1 -1
- diffusers/models/unets/unet_1d_blocks.py +1 -1
- diffusers/models/unets/unet_2d.py +1 -1
- diffusers/models/unets/unet_2d_blocks.py +1 -1
- diffusers/models/unets/unet_2d_blocks_flax.py +8 -7
- diffusers/models/unets/unet_2d_condition.py +4 -3
- diffusers/models/unets/unet_2d_condition_flax.py +2 -2
- diffusers/models/unets/unet_3d_blocks.py +1 -1
- diffusers/models/unets/unet_3d_condition.py +3 -3
- diffusers/models/unets/unet_i2vgen_xl.py +3 -3
- diffusers/models/unets/unet_kandinsky3.py +1 -1
- diffusers/models/unets/unet_motion_model.py +2 -2
- diffusers/models/unets/unet_stable_cascade.py +1 -1
- diffusers/models/upsampling.py +2 -2
- diffusers/models/vae_flax.py +2 -2
- diffusers/models/vq_model.py +1 -1
- diffusers/modular_pipelines/__init__.py +83 -0
- diffusers/modular_pipelines/components_manager.py +1068 -0
- diffusers/modular_pipelines/flux/__init__.py +66 -0
- diffusers/modular_pipelines/flux/before_denoise.py +689 -0
- diffusers/modular_pipelines/flux/decoders.py +109 -0
- diffusers/modular_pipelines/flux/denoise.py +227 -0
- diffusers/modular_pipelines/flux/encoders.py +412 -0
- diffusers/modular_pipelines/flux/modular_blocks.py +181 -0
- diffusers/modular_pipelines/flux/modular_pipeline.py +59 -0
- diffusers/modular_pipelines/modular_pipeline.py +2446 -0
- diffusers/modular_pipelines/modular_pipeline_utils.py +672 -0
- diffusers/modular_pipelines/node_utils.py +665 -0
- diffusers/modular_pipelines/stable_diffusion_xl/__init__.py +77 -0
- diffusers/modular_pipelines/stable_diffusion_xl/before_denoise.py +1874 -0
- diffusers/modular_pipelines/stable_diffusion_xl/decoders.py +208 -0
- diffusers/modular_pipelines/stable_diffusion_xl/denoise.py +771 -0
- diffusers/modular_pipelines/stable_diffusion_xl/encoders.py +887 -0
- diffusers/modular_pipelines/stable_diffusion_xl/modular_blocks.py +380 -0
- diffusers/modular_pipelines/stable_diffusion_xl/modular_pipeline.py +365 -0
- diffusers/modular_pipelines/wan/__init__.py +66 -0
- diffusers/modular_pipelines/wan/before_denoise.py +365 -0
- diffusers/modular_pipelines/wan/decoders.py +105 -0
- diffusers/modular_pipelines/wan/denoise.py +261 -0
- diffusers/modular_pipelines/wan/encoders.py +242 -0
- diffusers/modular_pipelines/wan/modular_blocks.py +144 -0
- diffusers/modular_pipelines/wan/modular_pipeline.py +90 -0
- diffusers/pipelines/__init__.py +68 -6
- diffusers/pipelines/allegro/pipeline_allegro.py +11 -11
- diffusers/pipelines/amused/pipeline_amused.py +7 -6
- diffusers/pipelines/amused/pipeline_amused_img2img.py +6 -5
- diffusers/pipelines/amused/pipeline_amused_inpaint.py +6 -5
- diffusers/pipelines/animatediff/pipeline_animatediff.py +6 -6
- diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +6 -6
- diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +16 -15
- diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +6 -6
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +5 -5
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +5 -5
- diffusers/pipelines/audioldm/pipeline_audioldm.py +8 -7
- diffusers/pipelines/audioldm2/modeling_audioldm2.py +1 -1
- diffusers/pipelines/audioldm2/pipeline_audioldm2.py +22 -13
- diffusers/pipelines/aura_flow/pipeline_aura_flow.py +48 -11
- diffusers/pipelines/auto_pipeline.py +23 -20
- diffusers/pipelines/blip_diffusion/modeling_blip2.py +1 -1
- diffusers/pipelines/blip_diffusion/modeling_ctx_clip.py +2 -2
- diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +11 -10
- diffusers/pipelines/chroma/__init__.py +49 -0
- diffusers/pipelines/chroma/pipeline_chroma.py +949 -0
- diffusers/pipelines/chroma/pipeline_chroma_img2img.py +1034 -0
- diffusers/pipelines/chroma/pipeline_output.py +21 -0
- diffusers/pipelines/cogvideo/pipeline_cogvideox.py +17 -16
- diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +17 -16
- diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +18 -17
- diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +17 -16
- diffusers/pipelines/cogview3/pipeline_cogview3plus.py +9 -9
- diffusers/pipelines/cogview4/pipeline_cogview4.py +23 -22
- diffusers/pipelines/cogview4/pipeline_cogview4_control.py +7 -7
- diffusers/pipelines/consisid/consisid_utils.py +2 -2
- diffusers/pipelines/consisid/pipeline_consisid.py +8 -8
- diffusers/pipelines/consistency_models/pipeline_consistency_models.py +1 -1
- diffusers/pipelines/controlnet/pipeline_controlnet.py +7 -7
- diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +11 -10
- diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +7 -7
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +7 -7
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +14 -14
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +10 -6
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +13 -13
- diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +226 -107
- diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +12 -8
- diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +207 -105
- diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +1 -1
- diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +8 -8
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +7 -7
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +7 -7
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +12 -10
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +9 -7
- diffusers/pipelines/cosmos/__init__.py +54 -0
- diffusers/pipelines/cosmos/pipeline_cosmos2_text2image.py +673 -0
- diffusers/pipelines/cosmos/pipeline_cosmos2_video2world.py +792 -0
- diffusers/pipelines/cosmos/pipeline_cosmos_text2world.py +664 -0
- diffusers/pipelines/cosmos/pipeline_cosmos_video2world.py +826 -0
- diffusers/pipelines/cosmos/pipeline_output.py +40 -0
- diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py +5 -4
- diffusers/pipelines/ddim/pipeline_ddim.py +4 -4
- diffusers/pipelines/ddpm/pipeline_ddpm.py +1 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +10 -10
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +8 -8
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +5 -5
- diffusers/pipelines/deprecated/audio_diffusion/mel.py +1 -1
- diffusers/pipelines/deprecated/audio_diffusion/pipeline_audio_diffusion.py +3 -3
- diffusers/pipelines/deprecated/latent_diffusion_uncond/pipeline_latent_diffusion_uncond.py +1 -1
- diffusers/pipelines/deprecated/pndm/pipeline_pndm.py +2 -2
- diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +4 -3
- diffusers/pipelines/deprecated/score_sde_ve/pipeline_score_sde_ve.py +1 -1
- diffusers/pipelines/deprecated/spectrogram_diffusion/continuous_encoder.py +1 -1
- diffusers/pipelines/deprecated/spectrogram_diffusion/midi_utils.py +1 -1
- diffusers/pipelines/deprecated/spectrogram_diffusion/notes_encoder.py +1 -1
- diffusers/pipelines/deprecated/spectrogram_diffusion/pipeline_spectrogram_diffusion.py +1 -1
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +8 -8
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_onnx_stable_diffusion_inpaint_legacy.py +9 -9
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +10 -10
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +10 -8
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +5 -5
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +18 -18
- diffusers/pipelines/deprecated/stochastic_karras_ve/pipeline_stochastic_karras_ve.py +1 -1
- diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +2 -2
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +6 -6
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +5 -5
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +5 -5
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +5 -5
- diffusers/pipelines/deprecated/vq_diffusion/pipeline_vq_diffusion.py +1 -1
- diffusers/pipelines/dit/pipeline_dit.py +4 -2
- diffusers/pipelines/easyanimate/pipeline_easyanimate.py +4 -4
- diffusers/pipelines/easyanimate/pipeline_easyanimate_control.py +4 -4
- diffusers/pipelines/easyanimate/pipeline_easyanimate_inpaint.py +7 -6
- diffusers/pipelines/flux/__init__.py +4 -0
- diffusers/pipelines/flux/modeling_flux.py +1 -1
- diffusers/pipelines/flux/pipeline_flux.py +37 -36
- diffusers/pipelines/flux/pipeline_flux_control.py +9 -9
- diffusers/pipelines/flux/pipeline_flux_control_img2img.py +7 -7
- diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +7 -7
- diffusers/pipelines/flux/pipeline_flux_controlnet.py +7 -7
- diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +31 -23
- diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +3 -2
- diffusers/pipelines/flux/pipeline_flux_fill.py +7 -7
- diffusers/pipelines/flux/pipeline_flux_img2img.py +40 -7
- diffusers/pipelines/flux/pipeline_flux_inpaint.py +12 -7
- diffusers/pipelines/flux/pipeline_flux_kontext.py +1134 -0
- diffusers/pipelines/flux/pipeline_flux_kontext_inpaint.py +1460 -0
- diffusers/pipelines/flux/pipeline_flux_prior_redux.py +2 -2
- diffusers/pipelines/flux/pipeline_output.py +6 -4
- diffusers/pipelines/free_init_utils.py +2 -2
- diffusers/pipelines/free_noise_utils.py +3 -3
- diffusers/pipelines/hidream_image/__init__.py +47 -0
- diffusers/pipelines/hidream_image/pipeline_hidream_image.py +1026 -0
- diffusers/pipelines/hidream_image/pipeline_output.py +35 -0
- diffusers/pipelines/hunyuan_video/__init__.py +2 -0
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_skyreels_image2video.py +8 -8
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +26 -25
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_framepack.py +1114 -0
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_image2video.py +71 -15
- diffusers/pipelines/hunyuan_video/pipeline_output.py +19 -0
- diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +8 -8
- diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +10 -8
- diffusers/pipelines/kandinsky/pipeline_kandinsky.py +6 -6
- diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +34 -34
- diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +19 -26
- diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +7 -7
- diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +11 -11
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +6 -6
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +35 -35
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +6 -6
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +17 -39
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +17 -45
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +7 -7
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +10 -10
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +10 -10
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +7 -7
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +17 -38
- diffusers/pipelines/kolors/pipeline_kolors.py +10 -10
- diffusers/pipelines/kolors/pipeline_kolors_img2img.py +12 -12
- diffusers/pipelines/kolors/text_encoder.py +3 -3
- diffusers/pipelines/kolors/tokenizer.py +1 -1
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +2 -2
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +2 -2
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +1 -1
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion_superresolution.py +3 -3
- diffusers/pipelines/latte/pipeline_latte.py +12 -12
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +13 -13
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +17 -16
- diffusers/pipelines/ltx/__init__.py +4 -0
- diffusers/pipelines/ltx/modeling_latent_upsampler.py +188 -0
- diffusers/pipelines/ltx/pipeline_ltx.py +64 -18
- diffusers/pipelines/ltx/pipeline_ltx_condition.py +117 -38
- diffusers/pipelines/ltx/pipeline_ltx_image2video.py +63 -18
- diffusers/pipelines/ltx/pipeline_ltx_latent_upsample.py +277 -0
- diffusers/pipelines/lumina/pipeline_lumina.py +13 -13
- diffusers/pipelines/lumina2/pipeline_lumina2.py +10 -10
- diffusers/pipelines/marigold/marigold_image_processing.py +2 -2
- diffusers/pipelines/mochi/pipeline_mochi.py +15 -14
- diffusers/pipelines/musicldm/pipeline_musicldm.py +16 -13
- diffusers/pipelines/omnigen/pipeline_omnigen.py +13 -11
- diffusers/pipelines/omnigen/processor_omnigen.py +8 -3
- diffusers/pipelines/onnx_utils.py +15 -2
- diffusers/pipelines/pag/pag_utils.py +2 -2
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +12 -8
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +7 -7
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +10 -6
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +14 -14
- diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +8 -8
- diffusers/pipelines/pag/pipeline_pag_kolors.py +10 -10
- diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +11 -11
- diffusers/pipelines/pag/pipeline_pag_sana.py +18 -12
- diffusers/pipelines/pag/pipeline_pag_sd.py +8 -8
- diffusers/pipelines/pag/pipeline_pag_sd_3.py +7 -7
- diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py +7 -7
- diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +6 -6
- diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +5 -5
- diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py +8 -8
- diffusers/pipelines/pag/pipeline_pag_sd_xl.py +16 -15
- diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +18 -17
- diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +12 -12
- diffusers/pipelines/paint_by_example/image_encoder.py +1 -1
- diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +8 -7
- diffusers/pipelines/pia/pipeline_pia.py +8 -6
- diffusers/pipelines/pipeline_flax_utils.py +5 -6
- diffusers/pipelines/pipeline_loading_utils.py +113 -15
- diffusers/pipelines/pipeline_utils.py +127 -48
- diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +14 -12
- diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +31 -11
- diffusers/pipelines/qwenimage/__init__.py +55 -0
- diffusers/pipelines/qwenimage/pipeline_output.py +21 -0
- diffusers/pipelines/qwenimage/pipeline_qwenimage.py +726 -0
- diffusers/pipelines/qwenimage/pipeline_qwenimage_edit.py +882 -0
- diffusers/pipelines/qwenimage/pipeline_qwenimage_img2img.py +829 -0
- diffusers/pipelines/qwenimage/pipeline_qwenimage_inpaint.py +1015 -0
- diffusers/pipelines/sana/__init__.py +4 -0
- diffusers/pipelines/sana/pipeline_sana.py +23 -21
- diffusers/pipelines/sana/pipeline_sana_controlnet.py +1106 -0
- diffusers/pipelines/sana/pipeline_sana_sprint.py +23 -19
- diffusers/pipelines/sana/pipeline_sana_sprint_img2img.py +981 -0
- diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +7 -6
- diffusers/pipelines/shap_e/camera.py +1 -1
- diffusers/pipelines/shap_e/pipeline_shap_e.py +1 -1
- diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +1 -1
- diffusers/pipelines/shap_e/renderer.py +3 -3
- diffusers/pipelines/skyreels_v2/__init__.py +59 -0
- diffusers/pipelines/skyreels_v2/pipeline_output.py +20 -0
- diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2.py +610 -0
- diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_diffusion_forcing.py +978 -0
- diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_diffusion_forcing_i2v.py +1059 -0
- diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_diffusion_forcing_v2v.py +1063 -0
- diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_i2v.py +745 -0
- diffusers/pipelines/stable_audio/modeling_stable_audio.py +1 -1
- diffusers/pipelines/stable_audio/pipeline_stable_audio.py +5 -5
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +8 -8
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +13 -13
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +9 -9
- diffusers/pipelines/stable_diffusion/__init__.py +0 -7
- diffusers/pipelines/stable_diffusion/clip_image_project_model.py +1 -1
- diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +11 -4
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_img2img.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_inpaint.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +12 -11
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py +10 -10
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +11 -11
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +10 -10
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +10 -9
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +4 -4
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +7 -7
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +5 -5
- diffusers/pipelines/stable_diffusion/safety_checker.py +1 -1
- diffusers/pipelines/stable_diffusion/safety_checker_flax.py +1 -1
- diffusers/pipelines/stable_diffusion/stable_unclip_image_normalizer.py +1 -1
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +13 -12
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +7 -7
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +7 -7
- diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +12 -8
- diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +15 -9
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +11 -9
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +11 -9
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +18 -12
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +11 -8
- diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +11 -8
- diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +15 -12
- diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +8 -6
- diffusers/pipelines/stable_diffusion_safe/safety_checker.py +1 -1
- diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +15 -11
- diffusers/pipelines/stable_diffusion_xl/pipeline_flax_stable_diffusion_xl.py +1 -1
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +16 -15
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +18 -17
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +12 -12
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +16 -15
- diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +3 -3
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +12 -12
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +18 -17
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +12 -7
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +12 -7
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +15 -13
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +24 -21
- diffusers/pipelines/unclip/pipeline_unclip.py +4 -3
- diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +4 -3
- diffusers/pipelines/unclip/text_proj.py +2 -2
- diffusers/pipelines/unidiffuser/modeling_text_decoder.py +2 -2
- diffusers/pipelines/unidiffuser/modeling_uvit.py +1 -1
- diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +8 -7
- diffusers/pipelines/visualcloze/__init__.py +52 -0
- diffusers/pipelines/visualcloze/pipeline_visualcloze_combined.py +444 -0
- diffusers/pipelines/visualcloze/pipeline_visualcloze_generation.py +952 -0
- diffusers/pipelines/visualcloze/visualcloze_utils.py +251 -0
- diffusers/pipelines/wan/__init__.py +2 -0
- diffusers/pipelines/wan/pipeline_wan.py +91 -30
- diffusers/pipelines/wan/pipeline_wan_i2v.py +145 -45
- diffusers/pipelines/wan/pipeline_wan_vace.py +975 -0
- diffusers/pipelines/wan/pipeline_wan_video2video.py +14 -16
- diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py +1 -1
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_diffnext.py +1 -1
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +1 -1
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +8 -8
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_combined.py +16 -15
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +6 -6
- diffusers/quantizers/__init__.py +3 -1
- diffusers/quantizers/base.py +17 -1
- diffusers/quantizers/bitsandbytes/bnb_quantizer.py +4 -0
- diffusers/quantizers/bitsandbytes/utils.py +10 -7
- diffusers/quantizers/gguf/gguf_quantizer.py +13 -4
- diffusers/quantizers/gguf/utils.py +108 -16
- diffusers/quantizers/pipe_quant_config.py +202 -0
- diffusers/quantizers/quantization_config.py +18 -16
- diffusers/quantizers/quanto/quanto_quantizer.py +4 -0
- diffusers/quantizers/torchao/torchao_quantizer.py +31 -1
- diffusers/schedulers/__init__.py +3 -1
- diffusers/schedulers/deprecated/scheduling_karras_ve.py +4 -3
- diffusers/schedulers/deprecated/scheduling_sde_vp.py +1 -1
- diffusers/schedulers/scheduling_consistency_models.py +1 -1
- diffusers/schedulers/scheduling_cosine_dpmsolver_multistep.py +10 -5
- diffusers/schedulers/scheduling_ddim.py +8 -8
- diffusers/schedulers/scheduling_ddim_cogvideox.py +5 -5
- diffusers/schedulers/scheduling_ddim_flax.py +6 -6
- diffusers/schedulers/scheduling_ddim_inverse.py +6 -6
- diffusers/schedulers/scheduling_ddim_parallel.py +22 -22
- diffusers/schedulers/scheduling_ddpm.py +9 -9
- diffusers/schedulers/scheduling_ddpm_flax.py +7 -7
- diffusers/schedulers/scheduling_ddpm_parallel.py +18 -18
- diffusers/schedulers/scheduling_ddpm_wuerstchen.py +2 -2
- diffusers/schedulers/scheduling_deis_multistep.py +16 -9
- diffusers/schedulers/scheduling_dpm_cogvideox.py +5 -5
- diffusers/schedulers/scheduling_dpmsolver_multistep.py +18 -12
- diffusers/schedulers/scheduling_dpmsolver_multistep_flax.py +22 -20
- diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +11 -11
- diffusers/schedulers/scheduling_dpmsolver_sde.py +2 -2
- diffusers/schedulers/scheduling_dpmsolver_singlestep.py +19 -13
- diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py +13 -8
- diffusers/schedulers/scheduling_edm_euler.py +20 -11
- diffusers/schedulers/scheduling_euler_ancestral_discrete.py +3 -3
- diffusers/schedulers/scheduling_euler_discrete.py +3 -3
- diffusers/schedulers/scheduling_euler_discrete_flax.py +3 -3
- diffusers/schedulers/scheduling_flow_match_euler_discrete.py +20 -5
- diffusers/schedulers/scheduling_flow_match_heun_discrete.py +1 -1
- diffusers/schedulers/scheduling_flow_match_lcm.py +561 -0
- diffusers/schedulers/scheduling_heun_discrete.py +2 -2
- diffusers/schedulers/scheduling_ipndm.py +2 -2
- diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +2 -2
- diffusers/schedulers/scheduling_k_dpm_2_discrete.py +2 -2
- diffusers/schedulers/scheduling_karras_ve_flax.py +5 -5
- diffusers/schedulers/scheduling_lcm.py +3 -3
- diffusers/schedulers/scheduling_lms_discrete.py +2 -2
- diffusers/schedulers/scheduling_lms_discrete_flax.py +1 -1
- diffusers/schedulers/scheduling_pndm.py +4 -4
- diffusers/schedulers/scheduling_pndm_flax.py +4 -4
- diffusers/schedulers/scheduling_repaint.py +9 -9
- diffusers/schedulers/scheduling_sasolver.py +15 -15
- diffusers/schedulers/scheduling_scm.py +1 -2
- diffusers/schedulers/scheduling_sde_ve.py +1 -1
- diffusers/schedulers/scheduling_sde_ve_flax.py +2 -2
- diffusers/schedulers/scheduling_tcd.py +3 -3
- diffusers/schedulers/scheduling_unclip.py +5 -5
- diffusers/schedulers/scheduling_unipc_multistep.py +21 -12
- diffusers/schedulers/scheduling_utils.py +3 -3
- diffusers/schedulers/scheduling_utils_flax.py +2 -2
- diffusers/schedulers/scheduling_vq_diffusion.py +1 -1
- diffusers/training_utils.py +91 -5
- diffusers/utils/__init__.py +15 -0
- diffusers/utils/accelerate_utils.py +1 -1
- diffusers/utils/constants.py +4 -0
- diffusers/utils/doc_utils.py +1 -1
- diffusers/utils/dummy_pt_objects.py +432 -0
- diffusers/utils/dummy_torch_and_transformers_objects.py +480 -0
- diffusers/utils/dynamic_modules_utils.py +85 -8
- diffusers/utils/export_utils.py +1 -1
- diffusers/utils/hub_utils.py +33 -17
- diffusers/utils/import_utils.py +151 -18
- diffusers/utils/logging.py +1 -1
- diffusers/utils/outputs.py +2 -1
- diffusers/utils/peft_utils.py +96 -10
- diffusers/utils/state_dict_utils.py +20 -3
- diffusers/utils/testing_utils.py +195 -17
- diffusers/utils/torch_utils.py +43 -5
- diffusers/video_processor.py +2 -2
- {diffusers-0.33.1.dist-info → diffusers-0.35.0.dist-info}/METADATA +72 -57
- diffusers-0.35.0.dist-info/RECORD +703 -0
- {diffusers-0.33.1.dist-info → diffusers-0.35.0.dist-info}/WHEEL +1 -1
- diffusers-0.33.1.dist-info/RECORD +0 -608
- {diffusers-0.33.1.dist-info → diffusers-0.35.0.dist-info}/LICENSE +0 -0
- {diffusers-0.33.1.dist-info → diffusers-0.35.0.dist-info}/entry_points.txt +0 -0
- {diffusers-0.33.1.dist-info → diffusers-0.35.0.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 ParaDiGMS authors and The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -95,7 +95,7 @@ def betas_for_alpha_bar(
|
|
95
95
|
# Copied from diffusers.schedulers.scheduling_ddim.rescale_zero_terminal_snr
|
96
96
|
def rescale_zero_terminal_snr(betas):
|
97
97
|
"""
|
98
|
-
Rescales betas to have zero terminal SNR Based on https://
|
98
|
+
Rescales betas to have zero terminal SNR Based on https://huggingface.co/papers/2305.08891 (Algorithm 1)
|
99
99
|
|
100
100
|
|
101
101
|
Args:
|
@@ -139,7 +139,7 @@ class DDIMParallelScheduler(SchedulerMixin, ConfigMixin):
|
|
139
139
|
[`SchedulerMixin`] provides general loading and saving functionality via the [`SchedulerMixin.save_pretrained`] and
|
140
140
|
[`~SchedulerMixin.from_pretrained`] functions.
|
141
141
|
|
142
|
-
For more details, see the original paper: https://
|
142
|
+
For more details, see the original paper: https://huggingface.co/papers/2010.02502
|
143
143
|
|
144
144
|
Args:
|
145
145
|
num_train_timesteps (`int`): number of diffusion steps used to train the model.
|
@@ -165,21 +165,21 @@ class DDIMParallelScheduler(SchedulerMixin, ConfigMixin):
|
|
165
165
|
process), `sample` (directly predicting the noisy sample`) or `v_prediction` (see section 2.4
|
166
166
|
https://imagen.research.google/video/paper.pdf)
|
167
167
|
thresholding (`bool`, default `False`):
|
168
|
-
whether to use the "dynamic thresholding" method (introduced by Imagen,
|
169
|
-
Note that the thresholding method is unsuitable for latent-space
|
170
|
-
stable-diffusion).
|
168
|
+
whether to use the "dynamic thresholding" method (introduced by Imagen,
|
169
|
+
https://huggingface.co/papers/2205.11487). Note that the thresholding method is unsuitable for latent-space
|
170
|
+
diffusion models (such as stable-diffusion).
|
171
171
|
dynamic_thresholding_ratio (`float`, default `0.995`):
|
172
172
|
the ratio for the dynamic thresholding method. Default is `0.995`, the same as Imagen
|
173
|
-
(https://
|
173
|
+
(https://huggingface.co/papers/2205.11487). Valid only when `thresholding=True`.
|
174
174
|
sample_max_value (`float`, default `1.0`):
|
175
175
|
the threshold value for dynamic thresholding. Valid only when `thresholding=True`.
|
176
176
|
timestep_spacing (`str`, default `"leading"`):
|
177
177
|
The way the timesteps should be scaled. Refer to Table 2. of [Common Diffusion Noise Schedules and Sample
|
178
|
-
Steps are Flawed](https://
|
178
|
+
Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information.
|
179
179
|
rescale_betas_zero_snr (`bool`, default `False`):
|
180
|
-
whether to rescale the betas to have zero terminal SNR (proposed by
|
181
|
-
This can enable the model to generate very bright and dark
|
182
|
-
medium brightness. Loosely related to
|
180
|
+
whether to rescale the betas to have zero terminal SNR (proposed by
|
181
|
+
https://huggingface.co/papers/2305.08891). This can enable the model to generate very bright and dark
|
182
|
+
samples instead of limiting it to samples with medium brightness. Loosely related to
|
183
183
|
[`--offset_noise`](https://github.com/huggingface/diffusers/blob/74fd735eb073eb1d774b1ab4154a0876eb82f055/examples/dreambooth/train_dreambooth.py#L506).
|
184
184
|
"""
|
185
185
|
|
@@ -291,7 +291,7 @@ class DDIMParallelScheduler(SchedulerMixin, ConfigMixin):
|
|
291
291
|
pixels from saturation at each step. We find that dynamic thresholding results in significantly better
|
292
292
|
photorealism as well as better image-text alignment, especially when using very large guidance weights."
|
293
293
|
|
294
|
-
https://
|
294
|
+
https://huggingface.co/papers/2205.11487
|
295
295
|
"""
|
296
296
|
dtype = sample.dtype
|
297
297
|
batch_size, channels, *remaining_dims = sample.shape
|
@@ -335,7 +335,7 @@ class DDIMParallelScheduler(SchedulerMixin, ConfigMixin):
|
|
335
335
|
|
336
336
|
self.num_inference_steps = num_inference_steps
|
337
337
|
|
338
|
-
# "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://
|
338
|
+
# "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://huggingface.co/papers/2305.08891
|
339
339
|
if self.config.timestep_spacing == "linspace":
|
340
340
|
timesteps = (
|
341
341
|
np.linspace(0, self.config.num_train_timesteps - 1, num_inference_steps)
|
@@ -390,7 +390,7 @@ class DDIMParallelScheduler(SchedulerMixin, ConfigMixin):
|
|
390
390
|
generator: random number generator.
|
391
391
|
variance_noise (`torch.Tensor`): instead of generating noise for the variance using `generator`, we
|
392
392
|
can directly provide the noise for the variance itself. This is useful for methods such as
|
393
|
-
CycleDiffusion. (https://
|
393
|
+
CycleDiffusion. (https://huggingface.co/papers/2210.05559)
|
394
394
|
return_dict (`bool`): option for returning tuple rather than DDIMParallelSchedulerOutput class
|
395
395
|
|
396
396
|
Returns:
|
@@ -404,7 +404,7 @@ class DDIMParallelScheduler(SchedulerMixin, ConfigMixin):
|
|
404
404
|
"Number of inference steps is 'None', you need to run 'set_timesteps' after creating the scheduler"
|
405
405
|
)
|
406
406
|
|
407
|
-
# See formulas (12) and (16) of DDIM paper https://
|
407
|
+
# See formulas (12) and (16) of DDIM paper https://huggingface.co/papers/2010.02502
|
408
408
|
# Ideally, read DDIM paper in-detail understanding
|
409
409
|
|
410
410
|
# Notation (<variable name> -> <name in paper>
|
@@ -425,7 +425,7 @@ class DDIMParallelScheduler(SchedulerMixin, ConfigMixin):
|
|
425
425
|
beta_prod_t = 1 - alpha_prod_t
|
426
426
|
|
427
427
|
# 3. compute predicted original sample from predicted noise also called
|
428
|
-
# "predicted x_0" of formula (12) from https://
|
428
|
+
# "predicted x_0" of formula (12) from https://huggingface.co/papers/2010.02502
|
429
429
|
if self.config.prediction_type == "epsilon":
|
430
430
|
pred_original_sample = (sample - beta_prod_t ** (0.5) * model_output) / alpha_prod_t ** (0.5)
|
431
431
|
pred_epsilon = model_output
|
@@ -458,10 +458,10 @@ class DDIMParallelScheduler(SchedulerMixin, ConfigMixin):
|
|
458
458
|
# the pred_epsilon is always re-derived from the clipped x_0 in Glide
|
459
459
|
pred_epsilon = (sample - alpha_prod_t ** (0.5) * pred_original_sample) / beta_prod_t ** (0.5)
|
460
460
|
|
461
|
-
# 6. compute "direction pointing to x_t" of formula (12) from https://
|
461
|
+
# 6. compute "direction pointing to x_t" of formula (12) from https://huggingface.co/papers/2010.02502
|
462
462
|
pred_sample_direction = (1 - alpha_prod_t_prev - std_dev_t**2) ** (0.5) * pred_epsilon
|
463
463
|
|
464
|
-
# 7. compute x_t without "random noise" of formula (12) from https://
|
464
|
+
# 7. compute x_t without "random noise" of formula (12) from https://huggingface.co/papers/2010.02502
|
465
465
|
prev_sample = alpha_prod_t_prev ** (0.5) * pred_original_sample + pred_sample_direction
|
466
466
|
|
467
467
|
if eta > 0:
|
@@ -526,7 +526,7 @@ class DDIMParallelScheduler(SchedulerMixin, ConfigMixin):
|
|
526
526
|
|
527
527
|
assert eta == 0.0
|
528
528
|
|
529
|
-
# See formulas (12) and (16) of DDIM paper https://
|
529
|
+
# See formulas (12) and (16) of DDIM paper https://huggingface.co/papers/2010.02502
|
530
530
|
# Ideally, read DDIM paper in-detail understanding
|
531
531
|
|
532
532
|
# Notation (<variable name> -> <name in paper>
|
@@ -554,7 +554,7 @@ class DDIMParallelScheduler(SchedulerMixin, ConfigMixin):
|
|
554
554
|
beta_prod_t = 1 - alpha_prod_t
|
555
555
|
|
556
556
|
# 3. compute predicted original sample from predicted noise also called
|
557
|
-
# "predicted x_0" of formula (12) from https://
|
557
|
+
# "predicted x_0" of formula (12) from https://huggingface.co/papers/2010.02502
|
558
558
|
if self.config.prediction_type == "epsilon":
|
559
559
|
pred_original_sample = (sample - beta_prod_t ** (0.5) * model_output) / alpha_prod_t ** (0.5)
|
560
560
|
pred_epsilon = model_output
|
@@ -587,10 +587,10 @@ class DDIMParallelScheduler(SchedulerMixin, ConfigMixin):
|
|
587
587
|
# the pred_epsilon is always re-derived from the clipped x_0 in Glide
|
588
588
|
pred_epsilon = (sample - alpha_prod_t ** (0.5) * pred_original_sample) / beta_prod_t ** (0.5)
|
589
589
|
|
590
|
-
# 6. compute "direction pointing to x_t" of formula (12) from https://
|
590
|
+
# 6. compute "direction pointing to x_t" of formula (12) from https://huggingface.co/papers/2010.02502
|
591
591
|
pred_sample_direction = (1 - alpha_prod_t_prev - std_dev_t**2) ** (0.5) * pred_epsilon
|
592
592
|
|
593
|
-
# 7. compute x_t without "random noise" of formula (12) from https://
|
593
|
+
# 7. compute x_t without "random noise" of formula (12) from https://huggingface.co/papers/2010.02502
|
594
594
|
prev_sample = alpha_prod_t_prev ** (0.5) * pred_original_sample + pred_sample_direction
|
595
595
|
|
596
596
|
return prev_sample
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 UC Berkeley Team and The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -92,7 +92,7 @@ def betas_for_alpha_bar(
|
|
92
92
|
# Copied from diffusers.schedulers.scheduling_ddim.rescale_zero_terminal_snr
|
93
93
|
def rescale_zero_terminal_snr(betas):
|
94
94
|
"""
|
95
|
-
Rescales betas to have zero terminal SNR Based on https://
|
95
|
+
Rescales betas to have zero terminal SNR Based on https://huggingface.co/papers/2305.08891 (Algorithm 1)
|
96
96
|
|
97
97
|
|
98
98
|
Args:
|
@@ -295,7 +295,7 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin):
|
|
295
295
|
self.num_inference_steps = num_inference_steps
|
296
296
|
self.custom_timesteps = False
|
297
297
|
|
298
|
-
# "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://
|
298
|
+
# "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://huggingface.co/papers/2305.08891
|
299
299
|
if self.config.timestep_spacing == "linspace":
|
300
300
|
timesteps = (
|
301
301
|
np.linspace(0, self.config.num_train_timesteps - 1, num_inference_steps)
|
@@ -329,7 +329,7 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin):
|
|
329
329
|
alpha_prod_t_prev = self.alphas_cumprod[prev_t] if prev_t >= 0 else self.one
|
330
330
|
current_beta_t = 1 - alpha_prod_t / alpha_prod_t_prev
|
331
331
|
|
332
|
-
# For t > 0, compute predicted variance βt (see formula (6) and (7) from https://
|
332
|
+
# For t > 0, compute predicted variance βt (see formula (6) and (7) from https://huggingface.co/papers/2006.11239)
|
333
333
|
# and sample from it to get previous sample
|
334
334
|
# x_{t-1} ~ N(pred_prev_sample, variance) == add variance to pred_sample
|
335
335
|
variance = (1 - alpha_prod_t_prev) / (1 - alpha_prod_t) * current_beta_t
|
@@ -343,7 +343,7 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin):
|
|
343
343
|
# hacks - were probably added for training stability
|
344
344
|
if variance_type == "fixed_small":
|
345
345
|
variance = variance
|
346
|
-
# for rl-diffuser https://
|
346
|
+
# for rl-diffuser https://huggingface.co/papers/2205.09991
|
347
347
|
elif variance_type == "fixed_small_log":
|
348
348
|
variance = torch.log(variance)
|
349
349
|
variance = torch.exp(0.5 * variance)
|
@@ -370,7 +370,7 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin):
|
|
370
370
|
pixels from saturation at each step. We find that dynamic thresholding results in significantly better
|
371
371
|
photorealism as well as better image-text alignment, especially when using very large guidance weights."
|
372
372
|
|
373
|
-
https://
|
373
|
+
https://huggingface.co/papers/2205.11487
|
374
374
|
"""
|
375
375
|
dtype = sample.dtype
|
376
376
|
batch_size, channels, *remaining_dims = sample.shape
|
@@ -443,7 +443,7 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin):
|
|
443
443
|
current_beta_t = 1 - current_alpha_t
|
444
444
|
|
445
445
|
# 2. compute predicted original sample from predicted noise also called
|
446
|
-
# "predicted x_0" of formula (15) from https://
|
446
|
+
# "predicted x_0" of formula (15) from https://huggingface.co/papers/2006.11239
|
447
447
|
if self.config.prediction_type == "epsilon":
|
448
448
|
pred_original_sample = (sample - beta_prod_t ** (0.5) * model_output) / alpha_prod_t ** (0.5)
|
449
449
|
elif self.config.prediction_type == "sample":
|
@@ -465,12 +465,12 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin):
|
|
465
465
|
)
|
466
466
|
|
467
467
|
# 4. Compute coefficients for pred_original_sample x_0 and current sample x_t
|
468
|
-
# See formula (7) from https://
|
468
|
+
# See formula (7) from https://huggingface.co/papers/2006.11239
|
469
469
|
pred_original_sample_coeff = (alpha_prod_t_prev ** (0.5) * current_beta_t) / beta_prod_t
|
470
470
|
current_sample_coeff = current_alpha_t ** (0.5) * beta_prod_t_prev / beta_prod_t
|
471
471
|
|
472
472
|
# 5. Compute predicted previous sample µ_t
|
473
|
-
# See formula (7) from https://
|
473
|
+
# See formula (7) from https://huggingface.co/papers/2006.11239
|
474
474
|
pred_prev_sample = pred_original_sample_coeff * pred_original_sample + current_sample_coeff * sample
|
475
475
|
|
476
476
|
# 6. Add noise
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 UC Berkeley Team and The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -61,7 +61,7 @@ class FlaxDDPMScheduler(FlaxSchedulerMixin, ConfigMixin):
|
|
61
61
|
[`SchedulerMixin`] provides general loading and saving functionality via the [`SchedulerMixin.save_pretrained`] and
|
62
62
|
[`~SchedulerMixin.from_pretrained`] functions.
|
63
63
|
|
64
|
-
For more details, see the original paper: https://
|
64
|
+
For more details, see the original paper: https://huggingface.co/papers/2006.11239
|
65
65
|
|
66
66
|
Args:
|
67
67
|
num_train_timesteps (`int`): number of diffusion steps used to train the model.
|
@@ -163,7 +163,7 @@ class FlaxDDPMScheduler(FlaxSchedulerMixin, ConfigMixin):
|
|
163
163
|
alpha_prod_t = state.common.alphas_cumprod[t]
|
164
164
|
alpha_prod_t_prev = jnp.where(t > 0, state.common.alphas_cumprod[t - 1], jnp.array(1.0, dtype=self.dtype))
|
165
165
|
|
166
|
-
# For t > 0, compute predicted variance βt (see formula (6) and (7) from https://
|
166
|
+
# For t > 0, compute predicted variance βt (see formula (6) and (7) from https://huggingface.co/papers/2006.11239)
|
167
167
|
# and sample from it to get previous sample
|
168
168
|
# x_{t-1} ~ N(pred_prev_sample, variance) == add variance to pred_sample
|
169
169
|
variance = (1 - alpha_prod_t_prev) / (1 - alpha_prod_t) * state.common.betas[t]
|
@@ -174,7 +174,7 @@ class FlaxDDPMScheduler(FlaxSchedulerMixin, ConfigMixin):
|
|
174
174
|
# hacks - were probably added for training stability
|
175
175
|
if variance_type == "fixed_small":
|
176
176
|
variance = jnp.clip(variance, a_min=1e-20)
|
177
|
-
# for rl-diffuser https://
|
177
|
+
# for rl-diffuser https://huggingface.co/papers/2205.09991
|
178
178
|
elif variance_type == "fixed_small_log":
|
179
179
|
variance = jnp.log(jnp.clip(variance, a_min=1e-20))
|
180
180
|
elif variance_type == "fixed_large":
|
@@ -240,7 +240,7 @@ class FlaxDDPMScheduler(FlaxSchedulerMixin, ConfigMixin):
|
|
240
240
|
beta_prod_t_prev = 1 - alpha_prod_t_prev
|
241
241
|
|
242
242
|
# 2. compute predicted original sample from predicted noise also called
|
243
|
-
# "predicted x_0" of formula (15) from https://
|
243
|
+
# "predicted x_0" of formula (15) from https://huggingface.co/papers/2006.11239
|
244
244
|
if self.config.prediction_type == "epsilon":
|
245
245
|
pred_original_sample = (sample - beta_prod_t ** (0.5) * model_output) / alpha_prod_t ** (0.5)
|
246
246
|
elif self.config.prediction_type == "sample":
|
@@ -258,12 +258,12 @@ class FlaxDDPMScheduler(FlaxSchedulerMixin, ConfigMixin):
|
|
258
258
|
pred_original_sample = jnp.clip(pred_original_sample, -1, 1)
|
259
259
|
|
260
260
|
# 4. Compute coefficients for pred_original_sample x_0 and current sample x_t
|
261
|
-
# See formula (7) from https://
|
261
|
+
# See formula (7) from https://huggingface.co/papers/2006.11239
|
262
262
|
pred_original_sample_coeff = (alpha_prod_t_prev ** (0.5) * state.common.betas[t]) / beta_prod_t
|
263
263
|
current_sample_coeff = state.common.alphas[t] ** (0.5) * beta_prod_t_prev / beta_prod_t
|
264
264
|
|
265
265
|
# 5. Compute predicted previous sample µ_t
|
266
|
-
# See formula (7) from https://
|
266
|
+
# See formula (7) from https://huggingface.co/papers/2006.11239
|
267
267
|
pred_prev_sample = pred_original_sample_coeff * pred_original_sample + current_sample_coeff * sample
|
268
268
|
|
269
269
|
# 6. Add noise
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 ParaDiGMS authors and The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -94,7 +94,7 @@ def betas_for_alpha_bar(
|
|
94
94
|
# Copied from diffusers.schedulers.scheduling_ddim.rescale_zero_terminal_snr
|
95
95
|
def rescale_zero_terminal_snr(betas):
|
96
96
|
"""
|
97
|
-
Rescales betas to have zero terminal SNR Based on https://
|
97
|
+
Rescales betas to have zero terminal SNR Based on https://huggingface.co/papers/2305.08891 (Algorithm 1)
|
98
98
|
|
99
99
|
|
100
100
|
Args:
|
@@ -138,7 +138,7 @@ class DDPMParallelScheduler(SchedulerMixin, ConfigMixin):
|
|
138
138
|
[`SchedulerMixin`] provides general loading and saving functionality via the [`SchedulerMixin.save_pretrained`] and
|
139
139
|
[`~SchedulerMixin.from_pretrained`] functions.
|
140
140
|
|
141
|
-
For more details, see the original paper: https://
|
141
|
+
For more details, see the original paper: https://huggingface.co/papers/2006.11239
|
142
142
|
|
143
143
|
Args:
|
144
144
|
num_train_timesteps (`int`): number of diffusion steps used to train the model.
|
@@ -161,17 +161,17 @@ class DDPMParallelScheduler(SchedulerMixin, ConfigMixin):
|
|
161
161
|
process), `sample` (directly predicting the noisy sample`) or `v_prediction` (see section 2.4
|
162
162
|
https://imagen.research.google/video/paper.pdf)
|
163
163
|
thresholding (`bool`, default `False`):
|
164
|
-
whether to use the "dynamic thresholding" method (introduced by Imagen,
|
165
|
-
Note that the thresholding method is unsuitable for latent-space
|
166
|
-
stable-diffusion).
|
164
|
+
whether to use the "dynamic thresholding" method (introduced by Imagen,
|
165
|
+
https://huggingface.co/papers/2205.11487). Note that the thresholding method is unsuitable for latent-space
|
166
|
+
diffusion models (such as stable-diffusion).
|
167
167
|
dynamic_thresholding_ratio (`float`, default `0.995`):
|
168
168
|
the ratio for the dynamic thresholding method. Default is `0.995`, the same as Imagen
|
169
|
-
(https://
|
169
|
+
(https://huggingface.co/papers/2205.11487). Valid only when `thresholding=True`.
|
170
170
|
sample_max_value (`float`, default `1.0`):
|
171
171
|
the threshold value for dynamic thresholding. Valid only when `thresholding=True`.
|
172
172
|
timestep_spacing (`str`, default `"leading"`):
|
173
173
|
The way the timesteps should be scaled. Refer to Table 2. of [Common Diffusion Noise Schedules and Sample
|
174
|
-
Steps are Flawed](https://
|
174
|
+
Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information.
|
175
175
|
steps_offset (`int`, default `0`):
|
176
176
|
An offset added to the inference steps, as required by some model families.
|
177
177
|
rescale_betas_zero_snr (`bool`, defaults to `False`):
|
@@ -305,7 +305,7 @@ class DDPMParallelScheduler(SchedulerMixin, ConfigMixin):
|
|
305
305
|
self.num_inference_steps = num_inference_steps
|
306
306
|
self.custom_timesteps = False
|
307
307
|
|
308
|
-
# "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://
|
308
|
+
# "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://huggingface.co/papers/2305.08891
|
309
309
|
if self.config.timestep_spacing == "linspace":
|
310
310
|
timesteps = (
|
311
311
|
np.linspace(0, self.config.num_train_timesteps - 1, num_inference_steps)
|
@@ -340,7 +340,7 @@ class DDPMParallelScheduler(SchedulerMixin, ConfigMixin):
|
|
340
340
|
alpha_prod_t_prev = self.alphas_cumprod[prev_t] if prev_t >= 0 else self.one
|
341
341
|
current_beta_t = 1 - alpha_prod_t / alpha_prod_t_prev
|
342
342
|
|
343
|
-
# For t > 0, compute predicted variance βt (see formula (6) and (7) from https://
|
343
|
+
# For t > 0, compute predicted variance βt (see formula (6) and (7) from https://huggingface.co/papers/2006.11239)
|
344
344
|
# and sample from it to get previous sample
|
345
345
|
# x_{t-1} ~ N(pred_prev_sample, variance) == add variance to pred_sample
|
346
346
|
variance = (1 - alpha_prod_t_prev) / (1 - alpha_prod_t) * current_beta_t
|
@@ -354,7 +354,7 @@ class DDPMParallelScheduler(SchedulerMixin, ConfigMixin):
|
|
354
354
|
# hacks - were probably added for training stability
|
355
355
|
if variance_type == "fixed_small":
|
356
356
|
variance = variance
|
357
|
-
# for rl-diffuser https://
|
357
|
+
# for rl-diffuser https://huggingface.co/papers/2205.09991
|
358
358
|
elif variance_type == "fixed_small_log":
|
359
359
|
variance = torch.log(variance)
|
360
360
|
variance = torch.exp(0.5 * variance)
|
@@ -382,7 +382,7 @@ class DDPMParallelScheduler(SchedulerMixin, ConfigMixin):
|
|
382
382
|
pixels from saturation at each step. We find that dynamic thresholding results in significantly better
|
383
383
|
photorealism as well as better image-text alignment, especially when using very large guidance weights."
|
384
384
|
|
385
|
-
https://
|
385
|
+
https://huggingface.co/papers/2205.11487
|
386
386
|
"""
|
387
387
|
dtype = sample.dtype
|
388
388
|
batch_size, channels, *remaining_dims = sample.shape
|
@@ -451,7 +451,7 @@ class DDPMParallelScheduler(SchedulerMixin, ConfigMixin):
|
|
451
451
|
current_beta_t = 1 - current_alpha_t
|
452
452
|
|
453
453
|
# 2. compute predicted original sample from predicted noise also called
|
454
|
-
# "predicted x_0" of formula (15) from https://
|
454
|
+
# "predicted x_0" of formula (15) from https://huggingface.co/papers/2006.11239
|
455
455
|
if self.config.prediction_type == "epsilon":
|
456
456
|
pred_original_sample = (sample - beta_prod_t ** (0.5) * model_output) / alpha_prod_t ** (0.5)
|
457
457
|
elif self.config.prediction_type == "sample":
|
@@ -473,12 +473,12 @@ class DDPMParallelScheduler(SchedulerMixin, ConfigMixin):
|
|
473
473
|
)
|
474
474
|
|
475
475
|
# 4. Compute coefficients for pred_original_sample x_0 and current sample x_t
|
476
|
-
# See formula (7) from https://
|
476
|
+
# See formula (7) from https://huggingface.co/papers/2006.11239
|
477
477
|
pred_original_sample_coeff = (alpha_prod_t_prev ** (0.5) * current_beta_t) / beta_prod_t
|
478
478
|
current_sample_coeff = current_alpha_t ** (0.5) * beta_prod_t_prev / beta_prod_t
|
479
479
|
|
480
480
|
# 5. Compute predicted previous sample µ_t
|
481
|
-
# See formula (7) from https://
|
481
|
+
# See formula (7) from https://huggingface.co/papers/2006.11239
|
482
482
|
pred_prev_sample = pred_original_sample_coeff * pred_original_sample + current_sample_coeff * sample
|
483
483
|
|
484
484
|
# 6. Add noise
|
@@ -554,7 +554,7 @@ class DDPMParallelScheduler(SchedulerMixin, ConfigMixin):
|
|
554
554
|
current_beta_t = 1 - current_alpha_t
|
555
555
|
|
556
556
|
# 2. compute predicted original sample from predicted noise also called
|
557
|
-
# "predicted x_0" of formula (15) from https://
|
557
|
+
# "predicted x_0" of formula (15) from https://huggingface.co/papers/2006.11239
|
558
558
|
if self.config.prediction_type == "epsilon":
|
559
559
|
pred_original_sample = (sample - beta_prod_t ** (0.5) * model_output) / alpha_prod_t ** (0.5)
|
560
560
|
elif self.config.prediction_type == "sample":
|
@@ -576,12 +576,12 @@ class DDPMParallelScheduler(SchedulerMixin, ConfigMixin):
|
|
576
576
|
)
|
577
577
|
|
578
578
|
# 4. Compute coefficients for pred_original_sample x_0 and current sample x_t
|
579
|
-
# See formula (7) from https://
|
579
|
+
# See formula (7) from https://huggingface.co/papers/2006.11239
|
580
580
|
pred_original_sample_coeff = (alpha_prod_t_prev ** (0.5) * current_beta_t) / beta_prod_t
|
581
581
|
current_sample_coeff = current_alpha_t ** (0.5) * beta_prod_t_prev / beta_prod_t
|
582
582
|
|
583
583
|
# 5. Compute predicted previous sample µ_t
|
584
|
-
# See formula (7) from https://
|
584
|
+
# See formula (7) from https://huggingface.co/papers/2006.11239
|
585
585
|
pred_prev_sample = pred_original_sample_coeff * pred_original_sample + current_sample_coeff * sample
|
586
586
|
|
587
587
|
return pred_prev_sample
|
@@ -1,5 +1,5 @@
|
|
1
1
|
# Copyright (c) 2022 Pablo Pernías MIT License
|
2
|
-
# Copyright
|
2
|
+
# Copyright 2025 UC Berkeley Team and The HuggingFace Team. All rights reserved.
|
3
3
|
#
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
5
5
|
# you may not use this file except in compliance with the License.
|
@@ -95,7 +95,7 @@ class DDPMWuerstchenScheduler(SchedulerMixin, ConfigMixin):
|
|
95
95
|
[`SchedulerMixin`] provides general loading and saving functionality via the [`SchedulerMixin.save_pretrained`] and
|
96
96
|
[`~SchedulerMixin.from_pretrained`] functions.
|
97
97
|
|
98
|
-
For more details, see the original paper: https://
|
98
|
+
For more details, see the original paper: https://huggingface.co/papers/2006.11239
|
99
99
|
|
100
100
|
Args:
|
101
101
|
scaler (`float`): ....
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 FLAIR Lab and The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -12,7 +12,7 @@
|
|
12
12
|
# See the License for the specific language governing permissions and
|
13
13
|
# limitations under the License.
|
14
14
|
|
15
|
-
# DISCLAIMER: check https://
|
15
|
+
# DISCLAIMER: check https://huggingface.co/papers/2204.13902 and https://github.com/qsh-zh/deis for more info
|
16
16
|
# The codebase is modified based on https://github.com/huggingface/diffusers/blob/main/src/diffusers/schedulers/scheduling_dpmsolver_multistep.py
|
17
17
|
|
18
18
|
import math
|
@@ -153,6 +153,8 @@ class DEISMultistepScheduler(SchedulerMixin, ConfigMixin):
|
|
153
153
|
flow_shift: Optional[float] = 1.0,
|
154
154
|
timestep_spacing: str = "linspace",
|
155
155
|
steps_offset: int = 0,
|
156
|
+
use_dynamic_shifting: bool = False,
|
157
|
+
time_shift_type: str = "exponential",
|
156
158
|
):
|
157
159
|
if self.config.use_beta_sigmas and not is_scipy_available():
|
158
160
|
raise ImportError("Make sure to install scipy if you want to use beta sigmas.")
|
@@ -232,7 +234,9 @@ class DEISMultistepScheduler(SchedulerMixin, ConfigMixin):
|
|
232
234
|
"""
|
233
235
|
self._begin_index = begin_index
|
234
236
|
|
235
|
-
def set_timesteps(
|
237
|
+
def set_timesteps(
|
238
|
+
self, num_inference_steps: int, device: Union[str, torch.device] = None, mu: Optional[float] = None
|
239
|
+
):
|
236
240
|
"""
|
237
241
|
Sets the discrete timesteps used for the diffusion chain (to be run before inference).
|
238
242
|
|
@@ -242,7 +246,10 @@ class DEISMultistepScheduler(SchedulerMixin, ConfigMixin):
|
|
242
246
|
device (`str` or `torch.device`, *optional*):
|
243
247
|
The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
|
244
248
|
"""
|
245
|
-
|
249
|
+
if mu is not None:
|
250
|
+
assert self.config.use_dynamic_shifting and self.config.time_shift_type == "exponential"
|
251
|
+
self.config.flow_shift = np.exp(mu)
|
252
|
+
# "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://huggingface.co/papers/2305.08891
|
246
253
|
if self.config.timestep_spacing == "linspace":
|
247
254
|
timesteps = (
|
248
255
|
np.linspace(0, self.config.num_train_timesteps - 1, num_inference_steps + 1)
|
@@ -319,7 +326,7 @@ class DEISMultistepScheduler(SchedulerMixin, ConfigMixin):
|
|
319
326
|
pixels from saturation at each step. We find that dynamic thresholding results in significantly better
|
320
327
|
photorealism as well as better image-text alignment, especially when using very large guidance weights."
|
321
328
|
|
322
|
-
https://
|
329
|
+
https://huggingface.co/papers/2205.11487
|
323
330
|
"""
|
324
331
|
dtype = sample.dtype
|
325
332
|
batch_size, channels, *remaining_dims = sample.shape
|
@@ -486,7 +493,7 @@ class DEISMultistepScheduler(SchedulerMixin, ConfigMixin):
|
|
486
493
|
if len(args) > 1:
|
487
494
|
sample = args[1]
|
488
495
|
else:
|
489
|
-
raise ValueError("missing `sample` as a required
|
496
|
+
raise ValueError("missing `sample` as a required keyword argument")
|
490
497
|
if timestep is not None:
|
491
498
|
deprecate(
|
492
499
|
"timesteps",
|
@@ -549,7 +556,7 @@ class DEISMultistepScheduler(SchedulerMixin, ConfigMixin):
|
|
549
556
|
if len(args) > 2:
|
550
557
|
sample = args[2]
|
551
558
|
else:
|
552
|
-
raise ValueError("
|
559
|
+
raise ValueError("missing `sample` as a required keyword argument")
|
553
560
|
if timestep is not None:
|
554
561
|
deprecate(
|
555
562
|
"timesteps",
|
@@ -603,7 +610,7 @@ class DEISMultistepScheduler(SchedulerMixin, ConfigMixin):
|
|
603
610
|
if len(args) > 2:
|
604
611
|
sample = args[2]
|
605
612
|
else:
|
606
|
-
raise ValueError("
|
613
|
+
raise ValueError("missing `sample` as a required keyword argument")
|
607
614
|
if timestep_list is not None:
|
608
615
|
deprecate(
|
609
616
|
"timestep_list",
|
@@ -673,7 +680,7 @@ class DEISMultistepScheduler(SchedulerMixin, ConfigMixin):
|
|
673
680
|
if len(args) > 2:
|
674
681
|
sample = args[2]
|
675
682
|
else:
|
676
|
-
raise ValueError("
|
683
|
+
raise ValueError("missing `sample` as a required keyword argument")
|
677
684
|
if timestep_list is not None:
|
678
685
|
deprecate(
|
679
686
|
"timestep_list",
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 The CogVideoX team, Tsinghua University & ZhipuAI and The HuggingFace Team.
|
2
2
|
# All rights reserved.
|
3
3
|
#
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
@@ -95,7 +95,7 @@ def betas_for_alpha_bar(
|
|
95
95
|
|
96
96
|
def rescale_zero_terminal_snr(alphas_cumprod):
|
97
97
|
"""
|
98
|
-
Rescales betas to have zero terminal SNR Based on https://
|
98
|
+
Rescales betas to have zero terminal SNR Based on https://huggingface.co/papers/2305.08891 (Algorithm 1)
|
99
99
|
|
100
100
|
|
101
101
|
Args:
|
@@ -276,7 +276,7 @@ class CogVideoXDPMScheduler(SchedulerMixin, ConfigMixin):
|
|
276
276
|
|
277
277
|
self.num_inference_steps = num_inference_steps
|
278
278
|
|
279
|
-
# "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://
|
279
|
+
# "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://huggingface.co/papers/2305.08891
|
280
280
|
if self.config.timestep_spacing == "linspace":
|
281
281
|
timesteps = (
|
282
282
|
np.linspace(0, self.config.num_train_timesteps - 1, num_inference_steps)
|
@@ -377,7 +377,7 @@ class CogVideoXDPMScheduler(SchedulerMixin, ConfigMixin):
|
|
377
377
|
"Number of inference steps is 'None', you need to run 'set_timesteps' after creating the scheduler"
|
378
378
|
)
|
379
379
|
|
380
|
-
# See formulas (12) and (16) of DDIM paper https://
|
380
|
+
# See formulas (12) and (16) of DDIM paper https://huggingface.co/papers/2010.02502
|
381
381
|
# Ideally, read DDIM paper in-detail understanding
|
382
382
|
|
383
383
|
# Notation (<variable name> -> <name in paper>
|
@@ -399,7 +399,7 @@ class CogVideoXDPMScheduler(SchedulerMixin, ConfigMixin):
|
|
399
399
|
beta_prod_t = 1 - alpha_prod_t
|
400
400
|
|
401
401
|
# 3. compute predicted original sample from predicted noise also called
|
402
|
-
# "predicted x_0" of formula (12) from https://
|
402
|
+
# "predicted x_0" of formula (12) from https://huggingface.co/papers/2010.02502
|
403
403
|
# To make style tests pass, commented out `pred_epsilon` as it is an unused variable
|
404
404
|
if self.config.prediction_type == "epsilon":
|
405
405
|
pred_original_sample = (sample - beta_prod_t ** (0.5) * model_output) / alpha_prod_t ** (0.5)
|