diffusers 0.33.0__py3-none-any.whl → 0.34.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diffusers/__init__.py +48 -1
- diffusers/commands/__init__.py +1 -1
- diffusers/commands/diffusers_cli.py +1 -1
- diffusers/commands/env.py +1 -1
- diffusers/commands/fp16_safetensors.py +1 -1
- diffusers/dependency_versions_check.py +1 -1
- diffusers/dependency_versions_table.py +1 -1
- diffusers/experimental/rl/value_guided_sampling.py +1 -1
- diffusers/hooks/faster_cache.py +2 -2
- diffusers/hooks/group_offloading.py +128 -29
- diffusers/hooks/hooks.py +2 -2
- diffusers/hooks/layerwise_casting.py +3 -3
- diffusers/hooks/pyramid_attention_broadcast.py +1 -1
- diffusers/image_processor.py +7 -2
- diffusers/loaders/__init__.py +4 -0
- diffusers/loaders/ip_adapter.py +5 -14
- diffusers/loaders/lora_base.py +212 -111
- diffusers/loaders/lora_conversion_utils.py +275 -34
- diffusers/loaders/lora_pipeline.py +1554 -819
- diffusers/loaders/peft.py +52 -109
- diffusers/loaders/single_file.py +2 -2
- diffusers/loaders/single_file_model.py +20 -4
- diffusers/loaders/single_file_utils.py +225 -5
- diffusers/loaders/textual_inversion.py +3 -2
- diffusers/loaders/transformer_flux.py +1 -1
- diffusers/loaders/transformer_sd3.py +2 -2
- diffusers/loaders/unet.py +2 -16
- diffusers/loaders/unet_loader_utils.py +1 -1
- diffusers/loaders/utils.py +1 -1
- diffusers/models/__init__.py +15 -1
- diffusers/models/activations.py +5 -5
- diffusers/models/adapter.py +2 -3
- diffusers/models/attention.py +4 -4
- diffusers/models/attention_flax.py +10 -10
- diffusers/models/attention_processor.py +14 -10
- diffusers/models/auto_model.py +47 -10
- diffusers/models/autoencoders/__init__.py +1 -0
- diffusers/models/autoencoders/autoencoder_asym_kl.py +4 -4
- diffusers/models/autoencoders/autoencoder_dc.py +3 -3
- diffusers/models/autoencoders/autoencoder_kl.py +4 -4
- diffusers/models/autoencoders/autoencoder_kl_allegro.py +4 -4
- diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +6 -6
- diffusers/models/autoencoders/autoencoder_kl_cosmos.py +1108 -0
- diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py +2 -2
- diffusers/models/autoencoders/autoencoder_kl_ltx.py +3 -3
- diffusers/models/autoencoders/autoencoder_kl_magvit.py +4 -4
- diffusers/models/autoencoders/autoencoder_kl_mochi.py +3 -3
- diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +4 -4
- diffusers/models/autoencoders/autoencoder_kl_wan.py +256 -22
- diffusers/models/autoencoders/autoencoder_oobleck.py +1 -1
- diffusers/models/autoencoders/autoencoder_tiny.py +3 -3
- diffusers/models/autoencoders/consistency_decoder_vae.py +1 -1
- diffusers/models/autoencoders/vae.py +13 -2
- diffusers/models/autoencoders/vq_model.py +2 -2
- diffusers/models/cache_utils.py +1 -1
- diffusers/models/controlnet.py +1 -1
- diffusers/models/controlnet_flux.py +1 -1
- diffusers/models/controlnet_sd3.py +1 -1
- diffusers/models/controlnet_sparsectrl.py +1 -1
- diffusers/models/controlnets/__init__.py +1 -0
- diffusers/models/controlnets/controlnet.py +3 -3
- diffusers/models/controlnets/controlnet_flax.py +1 -1
- diffusers/models/controlnets/controlnet_flux.py +16 -15
- diffusers/models/controlnets/controlnet_hunyuan.py +2 -2
- diffusers/models/controlnets/controlnet_sana.py +290 -0
- diffusers/models/controlnets/controlnet_sd3.py +1 -1
- diffusers/models/controlnets/controlnet_sparsectrl.py +2 -2
- diffusers/models/controlnets/controlnet_union.py +1 -1
- diffusers/models/controlnets/controlnet_xs.py +7 -7
- diffusers/models/controlnets/multicontrolnet.py +4 -5
- diffusers/models/controlnets/multicontrolnet_union.py +5 -6
- diffusers/models/downsampling.py +2 -2
- diffusers/models/embeddings.py +10 -12
- diffusers/models/embeddings_flax.py +2 -2
- diffusers/models/lora.py +3 -3
- diffusers/models/modeling_utils.py +44 -14
- diffusers/models/normalization.py +4 -4
- diffusers/models/resnet.py +2 -2
- diffusers/models/resnet_flax.py +1 -1
- diffusers/models/transformers/__init__.py +5 -0
- diffusers/models/transformers/auraflow_transformer_2d.py +70 -24
- diffusers/models/transformers/cogvideox_transformer_3d.py +1 -1
- diffusers/models/transformers/consisid_transformer_3d.py +1 -1
- diffusers/models/transformers/dit_transformer_2d.py +2 -2
- diffusers/models/transformers/dual_transformer_2d.py +1 -1
- diffusers/models/transformers/hunyuan_transformer_2d.py +2 -2
- diffusers/models/transformers/latte_transformer_3d.py +4 -5
- diffusers/models/transformers/lumina_nextdit2d.py +2 -2
- diffusers/models/transformers/pixart_transformer_2d.py +3 -3
- diffusers/models/transformers/prior_transformer.py +1 -1
- diffusers/models/transformers/sana_transformer.py +8 -3
- diffusers/models/transformers/stable_audio_transformer.py +5 -9
- diffusers/models/transformers/t5_film_transformer.py +3 -3
- diffusers/models/transformers/transformer_2d.py +1 -1
- diffusers/models/transformers/transformer_allegro.py +1 -1
- diffusers/models/transformers/transformer_chroma.py +742 -0
- diffusers/models/transformers/transformer_cogview3plus.py +5 -10
- diffusers/models/transformers/transformer_cogview4.py +317 -25
- diffusers/models/transformers/transformer_cosmos.py +579 -0
- diffusers/models/transformers/transformer_flux.py +9 -11
- diffusers/models/transformers/transformer_hidream_image.py +942 -0
- diffusers/models/transformers/transformer_hunyuan_video.py +6 -8
- diffusers/models/transformers/transformer_hunyuan_video_framepack.py +416 -0
- diffusers/models/transformers/transformer_ltx.py +2 -2
- diffusers/models/transformers/transformer_lumina2.py +1 -1
- diffusers/models/transformers/transformer_mochi.py +1 -1
- diffusers/models/transformers/transformer_omnigen.py +2 -2
- diffusers/models/transformers/transformer_sd3.py +7 -7
- diffusers/models/transformers/transformer_temporal.py +1 -1
- diffusers/models/transformers/transformer_wan.py +24 -8
- diffusers/models/transformers/transformer_wan_vace.py +393 -0
- diffusers/models/unets/unet_1d.py +1 -1
- diffusers/models/unets/unet_1d_blocks.py +1 -1
- diffusers/models/unets/unet_2d.py +1 -1
- diffusers/models/unets/unet_2d_blocks.py +1 -1
- diffusers/models/unets/unet_2d_blocks_flax.py +8 -7
- diffusers/models/unets/unet_2d_condition.py +2 -2
- diffusers/models/unets/unet_2d_condition_flax.py +2 -2
- diffusers/models/unets/unet_3d_blocks.py +1 -1
- diffusers/models/unets/unet_3d_condition.py +3 -3
- diffusers/models/unets/unet_i2vgen_xl.py +3 -3
- diffusers/models/unets/unet_kandinsky3.py +1 -1
- diffusers/models/unets/unet_motion_model.py +2 -2
- diffusers/models/unets/unet_stable_cascade.py +1 -1
- diffusers/models/upsampling.py +2 -2
- diffusers/models/vae_flax.py +2 -2
- diffusers/models/vq_model.py +1 -1
- diffusers/pipelines/__init__.py +37 -6
- diffusers/pipelines/allegro/pipeline_allegro.py +11 -11
- diffusers/pipelines/amused/pipeline_amused.py +7 -6
- diffusers/pipelines/amused/pipeline_amused_img2img.py +6 -5
- diffusers/pipelines/amused/pipeline_amused_inpaint.py +6 -5
- diffusers/pipelines/animatediff/pipeline_animatediff.py +6 -6
- diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +6 -6
- diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +16 -15
- diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +6 -6
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +5 -5
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +5 -5
- diffusers/pipelines/audioldm/pipeline_audioldm.py +8 -7
- diffusers/pipelines/audioldm2/modeling_audioldm2.py +1 -1
- diffusers/pipelines/audioldm2/pipeline_audioldm2.py +23 -13
- diffusers/pipelines/aura_flow/pipeline_aura_flow.py +48 -11
- diffusers/pipelines/auto_pipeline.py +6 -7
- diffusers/pipelines/blip_diffusion/modeling_blip2.py +1 -1
- diffusers/pipelines/blip_diffusion/modeling_ctx_clip.py +2 -2
- diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +11 -10
- diffusers/pipelines/chroma/__init__.py +49 -0
- diffusers/pipelines/chroma/pipeline_chroma.py +949 -0
- diffusers/pipelines/chroma/pipeline_chroma_img2img.py +1034 -0
- diffusers/pipelines/chroma/pipeline_output.py +21 -0
- diffusers/pipelines/cogvideo/pipeline_cogvideox.py +8 -8
- diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +8 -8
- diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +8 -8
- diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +8 -8
- diffusers/pipelines/cogview3/pipeline_cogview3plus.py +9 -9
- diffusers/pipelines/cogview4/pipeline_cogview4.py +7 -7
- diffusers/pipelines/cogview4/pipeline_cogview4_control.py +7 -7
- diffusers/pipelines/consisid/consisid_utils.py +2 -2
- diffusers/pipelines/consisid/pipeline_consisid.py +8 -8
- diffusers/pipelines/consistency_models/pipeline_consistency_models.py +1 -1
- diffusers/pipelines/controlnet/pipeline_controlnet.py +7 -7
- diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +8 -8
- diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +7 -7
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +7 -7
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +14 -14
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +10 -6
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +13 -13
- diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +14 -14
- diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +5 -5
- diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +13 -13
- diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +1 -1
- diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +8 -8
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +7 -7
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +7 -7
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +12 -10
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +9 -7
- diffusers/pipelines/cosmos/__init__.py +54 -0
- diffusers/pipelines/cosmos/pipeline_cosmos2_text2image.py +673 -0
- diffusers/pipelines/cosmos/pipeline_cosmos2_video2world.py +792 -0
- diffusers/pipelines/cosmos/pipeline_cosmos_text2world.py +664 -0
- diffusers/pipelines/cosmos/pipeline_cosmos_video2world.py +826 -0
- diffusers/pipelines/cosmos/pipeline_output.py +40 -0
- diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py +5 -4
- diffusers/pipelines/ddim/pipeline_ddim.py +4 -4
- diffusers/pipelines/ddpm/pipeline_ddpm.py +1 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +10 -10
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +8 -8
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +5 -5
- diffusers/pipelines/deprecated/audio_diffusion/mel.py +1 -1
- diffusers/pipelines/deprecated/audio_diffusion/pipeline_audio_diffusion.py +3 -3
- diffusers/pipelines/deprecated/latent_diffusion_uncond/pipeline_latent_diffusion_uncond.py +1 -1
- diffusers/pipelines/deprecated/pndm/pipeline_pndm.py +2 -2
- diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +4 -3
- diffusers/pipelines/deprecated/score_sde_ve/pipeline_score_sde_ve.py +1 -1
- diffusers/pipelines/deprecated/spectrogram_diffusion/continuous_encoder.py +1 -1
- diffusers/pipelines/deprecated/spectrogram_diffusion/midi_utils.py +1 -1
- diffusers/pipelines/deprecated/spectrogram_diffusion/notes_encoder.py +1 -1
- diffusers/pipelines/deprecated/spectrogram_diffusion/pipeline_spectrogram_diffusion.py +1 -1
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +7 -7
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_onnx_stable_diffusion_inpaint_legacy.py +9 -9
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +10 -10
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +10 -8
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +5 -5
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +18 -18
- diffusers/pipelines/deprecated/stochastic_karras_ve/pipeline_stochastic_karras_ve.py +1 -1
- diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +2 -2
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +6 -6
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +5 -5
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +5 -5
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +5 -5
- diffusers/pipelines/deprecated/vq_diffusion/pipeline_vq_diffusion.py +1 -1
- diffusers/pipelines/dit/pipeline_dit.py +1 -1
- diffusers/pipelines/easyanimate/pipeline_easyanimate.py +4 -4
- diffusers/pipelines/easyanimate/pipeline_easyanimate_control.py +4 -4
- diffusers/pipelines/easyanimate/pipeline_easyanimate_inpaint.py +7 -6
- diffusers/pipelines/flux/modeling_flux.py +1 -1
- diffusers/pipelines/flux/pipeline_flux.py +10 -17
- diffusers/pipelines/flux/pipeline_flux_control.py +6 -6
- diffusers/pipelines/flux/pipeline_flux_control_img2img.py +6 -6
- diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +6 -6
- diffusers/pipelines/flux/pipeline_flux_controlnet.py +6 -6
- diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +30 -22
- diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +2 -1
- diffusers/pipelines/flux/pipeline_flux_fill.py +6 -6
- diffusers/pipelines/flux/pipeline_flux_img2img.py +39 -6
- diffusers/pipelines/flux/pipeline_flux_inpaint.py +11 -6
- diffusers/pipelines/flux/pipeline_flux_prior_redux.py +1 -1
- diffusers/pipelines/free_init_utils.py +2 -2
- diffusers/pipelines/free_noise_utils.py +3 -3
- diffusers/pipelines/hidream_image/__init__.py +47 -0
- diffusers/pipelines/hidream_image/pipeline_hidream_image.py +1026 -0
- diffusers/pipelines/hidream_image/pipeline_output.py +35 -0
- diffusers/pipelines/hunyuan_video/__init__.py +2 -0
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_skyreels_image2video.py +8 -8
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +8 -8
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_framepack.py +1114 -0
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_image2video.py +71 -15
- diffusers/pipelines/hunyuan_video/pipeline_output.py +19 -0
- diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +8 -8
- diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +10 -8
- diffusers/pipelines/kandinsky/pipeline_kandinsky.py +6 -6
- diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +34 -34
- diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +19 -26
- diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +7 -7
- diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +11 -11
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +6 -6
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +35 -35
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +6 -6
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +17 -39
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +17 -45
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +7 -7
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +10 -10
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +10 -10
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +7 -7
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +17 -38
- diffusers/pipelines/kolors/pipeline_kolors.py +10 -10
- diffusers/pipelines/kolors/pipeline_kolors_img2img.py +12 -12
- diffusers/pipelines/kolors/text_encoder.py +3 -3
- diffusers/pipelines/kolors/tokenizer.py +1 -1
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +2 -2
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +2 -2
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +1 -1
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion_superresolution.py +3 -3
- diffusers/pipelines/latte/pipeline_latte.py +12 -12
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +13 -13
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +17 -16
- diffusers/pipelines/ltx/__init__.py +4 -0
- diffusers/pipelines/ltx/modeling_latent_upsampler.py +188 -0
- diffusers/pipelines/ltx/pipeline_ltx.py +51 -6
- diffusers/pipelines/ltx/pipeline_ltx_condition.py +107 -29
- diffusers/pipelines/ltx/pipeline_ltx_image2video.py +50 -6
- diffusers/pipelines/ltx/pipeline_ltx_latent_upsample.py +277 -0
- diffusers/pipelines/lumina/pipeline_lumina.py +13 -13
- diffusers/pipelines/lumina2/pipeline_lumina2.py +10 -10
- diffusers/pipelines/marigold/marigold_image_processing.py +2 -2
- diffusers/pipelines/mochi/pipeline_mochi.py +6 -6
- diffusers/pipelines/musicldm/pipeline_musicldm.py +16 -13
- diffusers/pipelines/omnigen/pipeline_omnigen.py +13 -11
- diffusers/pipelines/omnigen/processor_omnigen.py +8 -3
- diffusers/pipelines/onnx_utils.py +15 -2
- diffusers/pipelines/pag/pag_utils.py +2 -2
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +12 -8
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +7 -7
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +10 -6
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +14 -14
- diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +8 -8
- diffusers/pipelines/pag/pipeline_pag_kolors.py +10 -10
- diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +11 -11
- diffusers/pipelines/pag/pipeline_pag_sana.py +18 -12
- diffusers/pipelines/pag/pipeline_pag_sd.py +8 -8
- diffusers/pipelines/pag/pipeline_pag_sd_3.py +7 -7
- diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py +7 -7
- diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +6 -6
- diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +5 -5
- diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py +8 -8
- diffusers/pipelines/pag/pipeline_pag_sd_xl.py +16 -15
- diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +18 -17
- diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +12 -12
- diffusers/pipelines/paint_by_example/image_encoder.py +1 -1
- diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +8 -7
- diffusers/pipelines/pia/pipeline_pia.py +8 -6
- diffusers/pipelines/pipeline_flax_utils.py +3 -4
- diffusers/pipelines/pipeline_loading_utils.py +89 -13
- diffusers/pipelines/pipeline_utils.py +105 -33
- diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +11 -11
- diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +11 -11
- diffusers/pipelines/sana/__init__.py +4 -0
- diffusers/pipelines/sana/pipeline_sana.py +23 -21
- diffusers/pipelines/sana/pipeline_sana_controlnet.py +1106 -0
- diffusers/pipelines/sana/pipeline_sana_sprint.py +23 -19
- diffusers/pipelines/sana/pipeline_sana_sprint_img2img.py +981 -0
- diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +7 -6
- diffusers/pipelines/shap_e/camera.py +1 -1
- diffusers/pipelines/shap_e/pipeline_shap_e.py +1 -1
- diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +1 -1
- diffusers/pipelines/shap_e/renderer.py +3 -3
- diffusers/pipelines/stable_audio/modeling_stable_audio.py +1 -1
- diffusers/pipelines/stable_audio/pipeline_stable_audio.py +5 -5
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +8 -8
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +13 -13
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +9 -9
- diffusers/pipelines/stable_diffusion/__init__.py +0 -7
- diffusers/pipelines/stable_diffusion/clip_image_project_model.py +1 -1
- diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +11 -4
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_img2img.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_inpaint.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +10 -10
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py +10 -10
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +10 -10
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +9 -9
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +8 -8
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +4 -4
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +7 -7
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +5 -5
- diffusers/pipelines/stable_diffusion/safety_checker.py +1 -1
- diffusers/pipelines/stable_diffusion/safety_checker_flax.py +1 -1
- diffusers/pipelines/stable_diffusion/stable_unclip_image_normalizer.py +1 -1
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +7 -7
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +7 -7
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +7 -7
- diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +12 -8
- diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +15 -9
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +11 -9
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +11 -9
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +18 -12
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +11 -8
- diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +11 -8
- diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +15 -12
- diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +8 -6
- diffusers/pipelines/stable_diffusion_safe/safety_checker.py +1 -1
- diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +15 -11
- diffusers/pipelines/stable_diffusion_xl/pipeline_flax_stable_diffusion_xl.py +1 -1
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +16 -15
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +18 -17
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +12 -12
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +16 -15
- diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +3 -3
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +12 -12
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +18 -17
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +12 -7
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +12 -7
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +15 -13
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +24 -21
- diffusers/pipelines/unclip/pipeline_unclip.py +4 -3
- diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +4 -3
- diffusers/pipelines/unclip/text_proj.py +2 -2
- diffusers/pipelines/unidiffuser/modeling_text_decoder.py +2 -2
- diffusers/pipelines/unidiffuser/modeling_uvit.py +1 -1
- diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +8 -7
- diffusers/pipelines/visualcloze/__init__.py +52 -0
- diffusers/pipelines/visualcloze/pipeline_visualcloze_combined.py +444 -0
- diffusers/pipelines/visualcloze/pipeline_visualcloze_generation.py +952 -0
- diffusers/pipelines/visualcloze/visualcloze_utils.py +251 -0
- diffusers/pipelines/wan/__init__.py +2 -0
- diffusers/pipelines/wan/pipeline_wan.py +17 -12
- diffusers/pipelines/wan/pipeline_wan_i2v.py +42 -20
- diffusers/pipelines/wan/pipeline_wan_vace.py +976 -0
- diffusers/pipelines/wan/pipeline_wan_video2video.py +18 -18
- diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py +1 -1
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_diffnext.py +1 -1
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +1 -1
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +8 -8
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_combined.py +16 -15
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +6 -6
- diffusers/quantizers/__init__.py +179 -1
- diffusers/quantizers/base.py +6 -1
- diffusers/quantizers/bitsandbytes/bnb_quantizer.py +4 -0
- diffusers/quantizers/bitsandbytes/utils.py +10 -7
- diffusers/quantizers/gguf/gguf_quantizer.py +13 -4
- diffusers/quantizers/gguf/utils.py +16 -13
- diffusers/quantizers/quantization_config.py +18 -16
- diffusers/quantizers/quanto/quanto_quantizer.py +4 -0
- diffusers/quantizers/torchao/torchao_quantizer.py +5 -1
- diffusers/schedulers/__init__.py +3 -1
- diffusers/schedulers/deprecated/scheduling_karras_ve.py +4 -3
- diffusers/schedulers/deprecated/scheduling_sde_vp.py +1 -1
- diffusers/schedulers/scheduling_consistency_models.py +1 -1
- diffusers/schedulers/scheduling_cosine_dpmsolver_multistep.py +10 -5
- diffusers/schedulers/scheduling_ddim.py +8 -8
- diffusers/schedulers/scheduling_ddim_cogvideox.py +5 -5
- diffusers/schedulers/scheduling_ddim_flax.py +6 -6
- diffusers/schedulers/scheduling_ddim_inverse.py +6 -6
- diffusers/schedulers/scheduling_ddim_parallel.py +22 -22
- diffusers/schedulers/scheduling_ddpm.py +9 -9
- diffusers/schedulers/scheduling_ddpm_flax.py +7 -7
- diffusers/schedulers/scheduling_ddpm_parallel.py +18 -18
- diffusers/schedulers/scheduling_ddpm_wuerstchen.py +2 -2
- diffusers/schedulers/scheduling_deis_multistep.py +8 -8
- diffusers/schedulers/scheduling_dpm_cogvideox.py +5 -5
- diffusers/schedulers/scheduling_dpmsolver_multistep.py +12 -12
- diffusers/schedulers/scheduling_dpmsolver_multistep_flax.py +22 -20
- diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +11 -11
- diffusers/schedulers/scheduling_dpmsolver_sde.py +2 -2
- diffusers/schedulers/scheduling_dpmsolver_singlestep.py +13 -13
- diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py +13 -8
- diffusers/schedulers/scheduling_edm_euler.py +20 -11
- diffusers/schedulers/scheduling_euler_ancestral_discrete.py +3 -3
- diffusers/schedulers/scheduling_euler_discrete.py +3 -3
- diffusers/schedulers/scheduling_euler_discrete_flax.py +3 -3
- diffusers/schedulers/scheduling_flow_match_euler_discrete.py +20 -5
- diffusers/schedulers/scheduling_flow_match_heun_discrete.py +1 -1
- diffusers/schedulers/scheduling_flow_match_lcm.py +561 -0
- diffusers/schedulers/scheduling_heun_discrete.py +2 -2
- diffusers/schedulers/scheduling_ipndm.py +2 -2
- diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +2 -2
- diffusers/schedulers/scheduling_k_dpm_2_discrete.py +2 -2
- diffusers/schedulers/scheduling_karras_ve_flax.py +5 -5
- diffusers/schedulers/scheduling_lcm.py +3 -3
- diffusers/schedulers/scheduling_lms_discrete.py +2 -2
- diffusers/schedulers/scheduling_lms_discrete_flax.py +1 -1
- diffusers/schedulers/scheduling_pndm.py +4 -4
- diffusers/schedulers/scheduling_pndm_flax.py +4 -4
- diffusers/schedulers/scheduling_repaint.py +9 -9
- diffusers/schedulers/scheduling_sasolver.py +15 -15
- diffusers/schedulers/scheduling_scm.py +1 -1
- diffusers/schedulers/scheduling_sde_ve.py +1 -1
- diffusers/schedulers/scheduling_sde_ve_flax.py +2 -2
- diffusers/schedulers/scheduling_tcd.py +3 -3
- diffusers/schedulers/scheduling_unclip.py +5 -5
- diffusers/schedulers/scheduling_unipc_multistep.py +11 -11
- diffusers/schedulers/scheduling_utils.py +1 -1
- diffusers/schedulers/scheduling_utils_flax.py +1 -1
- diffusers/schedulers/scheduling_vq_diffusion.py +1 -1
- diffusers/training_utils.py +13 -5
- diffusers/utils/__init__.py +5 -0
- diffusers/utils/accelerate_utils.py +1 -1
- diffusers/utils/doc_utils.py +1 -1
- diffusers/utils/dummy_pt_objects.py +120 -0
- diffusers/utils/dummy_torch_and_transformers_objects.py +225 -0
- diffusers/utils/dynamic_modules_utils.py +21 -3
- diffusers/utils/export_utils.py +1 -1
- diffusers/utils/import_utils.py +81 -18
- diffusers/utils/logging.py +1 -1
- diffusers/utils/outputs.py +2 -1
- diffusers/utils/peft_utils.py +91 -8
- diffusers/utils/state_dict_utils.py +20 -3
- diffusers/utils/testing_utils.py +59 -7
- diffusers/utils/torch_utils.py +25 -5
- diffusers/video_processor.py +2 -2
- {diffusers-0.33.0.dist-info → diffusers-0.34.0.dist-info}/METADATA +3 -3
- diffusers-0.34.0.dist-info/RECORD +639 -0
- diffusers-0.33.0.dist-info/RECORD +0 -608
- {diffusers-0.33.0.dist-info → diffusers-0.34.0.dist-info}/LICENSE +0 -0
- {diffusers-0.33.0.dist-info → diffusers-0.34.0.dist-info}/WHEEL +0 -0
- {diffusers-0.33.0.dist-info → diffusers-0.34.0.dist-info}/entry_points.txt +0 -0
- {diffusers-0.33.0.dist-info → diffusers-0.34.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,561 @@
|
|
1
|
+
# Copyright 2025 Stability AI, Katherine Crowson and The HuggingFace Team. All rights reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
import math
|
16
|
+
from dataclasses import dataclass
|
17
|
+
from typing import List, Optional, Tuple, Union
|
18
|
+
|
19
|
+
import numpy as np
|
20
|
+
import torch
|
21
|
+
|
22
|
+
from ..configuration_utils import ConfigMixin, register_to_config
|
23
|
+
from ..utils import BaseOutput, is_scipy_available, logging
|
24
|
+
from ..utils.torch_utils import randn_tensor
|
25
|
+
from .scheduling_utils import SchedulerMixin
|
26
|
+
|
27
|
+
|
28
|
+
if is_scipy_available():
|
29
|
+
import scipy.stats
|
30
|
+
|
31
|
+
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
32
|
+
|
33
|
+
|
34
|
+
@dataclass
|
35
|
+
class FlowMatchLCMSchedulerOutput(BaseOutput):
|
36
|
+
"""
|
37
|
+
Output class for the scheduler's `step` function output.
|
38
|
+
|
39
|
+
Args:
|
40
|
+
prev_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images):
|
41
|
+
Computed sample `(x_{t-1})` of previous timestep. `prev_sample` should be used as next model input in the
|
42
|
+
denoising loop.
|
43
|
+
"""
|
44
|
+
|
45
|
+
prev_sample: torch.FloatTensor
|
46
|
+
|
47
|
+
|
48
|
+
class FlowMatchLCMScheduler(SchedulerMixin, ConfigMixin):
|
49
|
+
"""
|
50
|
+
LCM scheduler for Flow Matching.
|
51
|
+
|
52
|
+
This model inherits from [`SchedulerMixin`] and [`ConfigMixin`]. Check the superclass documentation for the generic
|
53
|
+
methods the library implements for all schedulers such as loading and saving.
|
54
|
+
|
55
|
+
Args:
|
56
|
+
num_train_timesteps (`int`, defaults to 1000):
|
57
|
+
The number of diffusion steps to train the model.
|
58
|
+
shift (`float`, defaults to 1.0):
|
59
|
+
The shift value for the timestep schedule.
|
60
|
+
use_dynamic_shifting (`bool`, defaults to False):
|
61
|
+
Whether to apply timestep shifting on-the-fly based on the image resolution.
|
62
|
+
base_shift (`float`, defaults to 0.5):
|
63
|
+
Value to stabilize image generation. Increasing `base_shift` reduces variation and image is more consistent
|
64
|
+
with desired output.
|
65
|
+
max_shift (`float`, defaults to 1.15):
|
66
|
+
Value change allowed to latent vectors. Increasing `max_shift` encourages more variation and image may be
|
67
|
+
more exaggerated or stylized.
|
68
|
+
base_image_seq_len (`int`, defaults to 256):
|
69
|
+
The base image sequence length.
|
70
|
+
max_image_seq_len (`int`, defaults to 4096):
|
71
|
+
The maximum image sequence length.
|
72
|
+
invert_sigmas (`bool`, defaults to False):
|
73
|
+
Whether to invert the sigmas.
|
74
|
+
shift_terminal (`float`, defaults to None):
|
75
|
+
The end value of the shifted timestep schedule.
|
76
|
+
use_karras_sigmas (`bool`, defaults to False):
|
77
|
+
Whether to use Karras sigmas for step sizes in the noise schedule during sampling.
|
78
|
+
use_exponential_sigmas (`bool`, defaults to False):
|
79
|
+
Whether to use exponential sigmas for step sizes in the noise schedule during sampling.
|
80
|
+
use_beta_sigmas (`bool`, defaults to False):
|
81
|
+
Whether to use beta sigmas for step sizes in the noise schedule during sampling.
|
82
|
+
time_shift_type (`str`, defaults to "exponential"):
|
83
|
+
The type of dynamic resolution-dependent timestep shifting to apply. Either "exponential" or "linear".
|
84
|
+
scale_factors ('list', defaults to None)
|
85
|
+
It defines how to scale the latents at which predictions are made.
|
86
|
+
upscale_mode ('str', defaults to 'bicubic')
|
87
|
+
Upscaling method, applied if scale-wise generation is considered
|
88
|
+
"""
|
89
|
+
|
90
|
+
_compatibles = []
|
91
|
+
order = 1
|
92
|
+
|
93
|
+
@register_to_config
|
94
|
+
def __init__(
|
95
|
+
self,
|
96
|
+
num_train_timesteps: int = 1000,
|
97
|
+
shift: float = 1.0,
|
98
|
+
use_dynamic_shifting: bool = False,
|
99
|
+
base_shift: Optional[float] = 0.5,
|
100
|
+
max_shift: Optional[float] = 1.15,
|
101
|
+
base_image_seq_len: Optional[int] = 256,
|
102
|
+
max_image_seq_len: Optional[int] = 4096,
|
103
|
+
invert_sigmas: bool = False,
|
104
|
+
shift_terminal: Optional[float] = None,
|
105
|
+
use_karras_sigmas: Optional[bool] = False,
|
106
|
+
use_exponential_sigmas: Optional[bool] = False,
|
107
|
+
use_beta_sigmas: Optional[bool] = False,
|
108
|
+
time_shift_type: str = "exponential",
|
109
|
+
scale_factors: Optional[List[float]] = None,
|
110
|
+
upscale_mode: Optional[str] = "bicubic",
|
111
|
+
):
|
112
|
+
if self.config.use_beta_sigmas and not is_scipy_available():
|
113
|
+
raise ImportError("Make sure to install scipy if you want to use beta sigmas.")
|
114
|
+
if sum([self.config.use_beta_sigmas, self.config.use_exponential_sigmas, self.config.use_karras_sigmas]) > 1:
|
115
|
+
raise ValueError(
|
116
|
+
"Only one of `config.use_beta_sigmas`, `config.use_exponential_sigmas`, `config.use_karras_sigmas` can be used."
|
117
|
+
)
|
118
|
+
if time_shift_type not in {"exponential", "linear"}:
|
119
|
+
raise ValueError("`time_shift_type` must either be 'exponential' or 'linear'.")
|
120
|
+
|
121
|
+
timesteps = np.linspace(1, num_train_timesteps, num_train_timesteps, dtype=np.float32)[::-1].copy()
|
122
|
+
timesteps = torch.from_numpy(timesteps).to(dtype=torch.float32)
|
123
|
+
|
124
|
+
sigmas = timesteps / num_train_timesteps
|
125
|
+
if not use_dynamic_shifting:
|
126
|
+
# when use_dynamic_shifting is True, we apply the timestep shifting on the fly based on the image resolution
|
127
|
+
sigmas = shift * sigmas / (1 + (shift - 1) * sigmas)
|
128
|
+
|
129
|
+
self.timesteps = sigmas * num_train_timesteps
|
130
|
+
|
131
|
+
self._step_index = None
|
132
|
+
self._begin_index = None
|
133
|
+
|
134
|
+
self._shift = shift
|
135
|
+
|
136
|
+
self._init_size = None
|
137
|
+
self._scale_factors = scale_factors
|
138
|
+
self._upscale_mode = upscale_mode
|
139
|
+
|
140
|
+
self.sigmas = sigmas.to("cpu") # to avoid too much CPU/GPU communication
|
141
|
+
self.sigma_min = self.sigmas[-1].item()
|
142
|
+
self.sigma_max = self.sigmas[0].item()
|
143
|
+
|
144
|
+
@property
|
145
|
+
def shift(self):
|
146
|
+
"""
|
147
|
+
The value used for shifting.
|
148
|
+
"""
|
149
|
+
return self._shift
|
150
|
+
|
151
|
+
@property
|
152
|
+
def step_index(self):
|
153
|
+
"""
|
154
|
+
The index counter for current timestep. It will increase 1 after each scheduler step.
|
155
|
+
"""
|
156
|
+
return self._step_index
|
157
|
+
|
158
|
+
@property
|
159
|
+
def begin_index(self):
|
160
|
+
"""
|
161
|
+
The index for the first timestep. It should be set from pipeline with `set_begin_index` method.
|
162
|
+
"""
|
163
|
+
return self._begin_index
|
164
|
+
|
165
|
+
# Copied from diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler.set_begin_index
|
166
|
+
def set_begin_index(self, begin_index: int = 0):
|
167
|
+
"""
|
168
|
+
Sets the begin index for the scheduler. This function should be run from pipeline before the inference.
|
169
|
+
|
170
|
+
Args:
|
171
|
+
begin_index (`int`):
|
172
|
+
The begin index for the scheduler.
|
173
|
+
"""
|
174
|
+
self._begin_index = begin_index
|
175
|
+
|
176
|
+
def set_shift(self, shift: float):
|
177
|
+
self._shift = shift
|
178
|
+
|
179
|
+
def set_scale_factors(self, scale_factors: list, upscale_mode):
|
180
|
+
"""
|
181
|
+
Sets scale factors for a scale-wise generation regime.
|
182
|
+
|
183
|
+
Args:
|
184
|
+
scale_factors (`list`):
|
185
|
+
The scale factors for each step
|
186
|
+
upscale_mode (`str`):
|
187
|
+
Upscaling method
|
188
|
+
"""
|
189
|
+
self._scale_factors = scale_factors
|
190
|
+
self._upscale_mode = upscale_mode
|
191
|
+
|
192
|
+
def scale_noise(
|
193
|
+
self,
|
194
|
+
sample: torch.FloatTensor,
|
195
|
+
timestep: Union[float, torch.FloatTensor],
|
196
|
+
noise: Optional[torch.FloatTensor] = None,
|
197
|
+
) -> torch.FloatTensor:
|
198
|
+
"""
|
199
|
+
Forward process in flow-matching
|
200
|
+
|
201
|
+
Args:
|
202
|
+
sample (`torch.FloatTensor`):
|
203
|
+
The input sample.
|
204
|
+
timestep (`int`, *optional*):
|
205
|
+
The current timestep in the diffusion chain.
|
206
|
+
|
207
|
+
Returns:
|
208
|
+
`torch.FloatTensor`:
|
209
|
+
A scaled input sample.
|
210
|
+
"""
|
211
|
+
# Make sure sigmas and timesteps have the same device and dtype as original_samples
|
212
|
+
sigmas = self.sigmas.to(device=sample.device, dtype=sample.dtype)
|
213
|
+
|
214
|
+
if sample.device.type == "mps" and torch.is_floating_point(timestep):
|
215
|
+
# mps does not support float64
|
216
|
+
schedule_timesteps = self.timesteps.to(sample.device, dtype=torch.float32)
|
217
|
+
timestep = timestep.to(sample.device, dtype=torch.float32)
|
218
|
+
else:
|
219
|
+
schedule_timesteps = self.timesteps.to(sample.device)
|
220
|
+
timestep = timestep.to(sample.device)
|
221
|
+
|
222
|
+
# self.begin_index is None when scheduler is used for training, or pipeline does not implement set_begin_index
|
223
|
+
if self.begin_index is None:
|
224
|
+
step_indices = [self.index_for_timestep(t, schedule_timesteps) for t in timestep]
|
225
|
+
elif self.step_index is not None:
|
226
|
+
# add_noise is called after first denoising step (for inpainting)
|
227
|
+
step_indices = [self.step_index] * timestep.shape[0]
|
228
|
+
else:
|
229
|
+
# add noise is called before first denoising step to create initial latent(img2img)
|
230
|
+
step_indices = [self.begin_index] * timestep.shape[0]
|
231
|
+
|
232
|
+
sigma = sigmas[step_indices].flatten()
|
233
|
+
while len(sigma.shape) < len(sample.shape):
|
234
|
+
sigma = sigma.unsqueeze(-1)
|
235
|
+
|
236
|
+
sample = sigma * noise + (1.0 - sigma) * sample
|
237
|
+
|
238
|
+
return sample
|
239
|
+
|
240
|
+
def _sigma_to_t(self, sigma):
|
241
|
+
return sigma * self.config.num_train_timesteps
|
242
|
+
|
243
|
+
def time_shift(self, mu: float, sigma: float, t: torch.Tensor):
|
244
|
+
if self.config.time_shift_type == "exponential":
|
245
|
+
return self._time_shift_exponential(mu, sigma, t)
|
246
|
+
elif self.config.time_shift_type == "linear":
|
247
|
+
return self._time_shift_linear(mu, sigma, t)
|
248
|
+
|
249
|
+
def stretch_shift_to_terminal(self, t: torch.Tensor) -> torch.Tensor:
|
250
|
+
r"""
|
251
|
+
Stretches and shifts the timestep schedule to ensure it terminates at the configured `shift_terminal` config
|
252
|
+
value.
|
253
|
+
|
254
|
+
Reference:
|
255
|
+
https://github.com/Lightricks/LTX-Video/blob/a01a171f8fe3d99dce2728d60a73fecf4d4238ae/ltx_video/schedulers/rf.py#L51
|
256
|
+
|
257
|
+
Args:
|
258
|
+
t (`torch.Tensor`):
|
259
|
+
A tensor of timesteps to be stretched and shifted.
|
260
|
+
|
261
|
+
Returns:
|
262
|
+
`torch.Tensor`:
|
263
|
+
A tensor of adjusted timesteps such that the final value equals `self.config.shift_terminal`.
|
264
|
+
"""
|
265
|
+
one_minus_z = 1 - t
|
266
|
+
scale_factor = one_minus_z[-1] / (1 - self.config.shift_terminal)
|
267
|
+
stretched_t = 1 - (one_minus_z / scale_factor)
|
268
|
+
return stretched_t
|
269
|
+
|
270
|
+
def set_timesteps(
|
271
|
+
self,
|
272
|
+
num_inference_steps: Optional[int] = None,
|
273
|
+
device: Union[str, torch.device] = None,
|
274
|
+
sigmas: Optional[List[float]] = None,
|
275
|
+
mu: Optional[float] = None,
|
276
|
+
timesteps: Optional[List[float]] = None,
|
277
|
+
):
|
278
|
+
"""
|
279
|
+
Sets the discrete timesteps used for the diffusion chain (to be run before inference).
|
280
|
+
|
281
|
+
Args:
|
282
|
+
num_inference_steps (`int`, *optional*):
|
283
|
+
The number of diffusion steps used when generating samples with a pre-trained model.
|
284
|
+
device (`str` or `torch.device`, *optional*):
|
285
|
+
The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
|
286
|
+
sigmas (`List[float]`, *optional*):
|
287
|
+
Custom values for sigmas to be used for each diffusion step. If `None`, the sigmas are computed
|
288
|
+
automatically.
|
289
|
+
mu (`float`, *optional*):
|
290
|
+
Determines the amount of shifting applied to sigmas when performing resolution-dependent timestep
|
291
|
+
shifting.
|
292
|
+
timesteps (`List[float]`, *optional*):
|
293
|
+
Custom values for timesteps to be used for each diffusion step. If `None`, the timesteps are computed
|
294
|
+
automatically.
|
295
|
+
"""
|
296
|
+
if self.config.use_dynamic_shifting and mu is None:
|
297
|
+
raise ValueError("`mu` must be passed when `use_dynamic_shifting` is set to be `True`")
|
298
|
+
|
299
|
+
if sigmas is not None and timesteps is not None:
|
300
|
+
if len(sigmas) != len(timesteps):
|
301
|
+
raise ValueError("`sigmas` and `timesteps` should have the same length")
|
302
|
+
|
303
|
+
if num_inference_steps is not None:
|
304
|
+
if (sigmas is not None and len(sigmas) != num_inference_steps) or (
|
305
|
+
timesteps is not None and len(timesteps) != num_inference_steps
|
306
|
+
):
|
307
|
+
raise ValueError(
|
308
|
+
"`sigmas` and `timesteps` should have the same length as num_inference_steps, if `num_inference_steps` is provided"
|
309
|
+
)
|
310
|
+
else:
|
311
|
+
num_inference_steps = len(sigmas) if sigmas is not None else len(timesteps)
|
312
|
+
|
313
|
+
self.num_inference_steps = num_inference_steps
|
314
|
+
|
315
|
+
# 1. Prepare default sigmas
|
316
|
+
is_timesteps_provided = timesteps is not None
|
317
|
+
|
318
|
+
if is_timesteps_provided:
|
319
|
+
timesteps = np.array(timesteps).astype(np.float32)
|
320
|
+
|
321
|
+
if sigmas is None:
|
322
|
+
if timesteps is None:
|
323
|
+
timesteps = np.linspace(
|
324
|
+
self._sigma_to_t(self.sigma_max), self._sigma_to_t(self.sigma_min), num_inference_steps
|
325
|
+
)
|
326
|
+
sigmas = timesteps / self.config.num_train_timesteps
|
327
|
+
else:
|
328
|
+
sigmas = np.array(sigmas).astype(np.float32)
|
329
|
+
num_inference_steps = len(sigmas)
|
330
|
+
|
331
|
+
# 2. Perform timestep shifting. Either no shifting is applied, or resolution-dependent shifting of
|
332
|
+
# "exponential" or "linear" type is applied
|
333
|
+
if self.config.use_dynamic_shifting:
|
334
|
+
sigmas = self.time_shift(mu, 1.0, sigmas)
|
335
|
+
else:
|
336
|
+
sigmas = self.shift * sigmas / (1 + (self.shift - 1) * sigmas)
|
337
|
+
|
338
|
+
# 3. If required, stretch the sigmas schedule to terminate at the configured `shift_terminal` value
|
339
|
+
if self.config.shift_terminal:
|
340
|
+
sigmas = self.stretch_shift_to_terminal(sigmas)
|
341
|
+
|
342
|
+
# 4. If required, convert sigmas to one of karras, exponential, or beta sigma schedules
|
343
|
+
if self.config.use_karras_sigmas:
|
344
|
+
sigmas = self._convert_to_karras(in_sigmas=sigmas, num_inference_steps=num_inference_steps)
|
345
|
+
elif self.config.use_exponential_sigmas:
|
346
|
+
sigmas = self._convert_to_exponential(in_sigmas=sigmas, num_inference_steps=num_inference_steps)
|
347
|
+
elif self.config.use_beta_sigmas:
|
348
|
+
sigmas = self._convert_to_beta(in_sigmas=sigmas, num_inference_steps=num_inference_steps)
|
349
|
+
|
350
|
+
# 5. Convert sigmas and timesteps to tensors and move to specified device
|
351
|
+
sigmas = torch.from_numpy(sigmas).to(dtype=torch.float32, device=device)
|
352
|
+
if not is_timesteps_provided:
|
353
|
+
timesteps = sigmas * self.config.num_train_timesteps
|
354
|
+
else:
|
355
|
+
timesteps = torch.from_numpy(timesteps).to(dtype=torch.float32, device=device)
|
356
|
+
|
357
|
+
# 6. Append the terminal sigma value.
|
358
|
+
# If a model requires inverted sigma schedule for denoising but timesteps without inversion, the
|
359
|
+
# `invert_sigmas` flag can be set to `True`. This case is only required in Mochi
|
360
|
+
if self.config.invert_sigmas:
|
361
|
+
sigmas = 1.0 - sigmas
|
362
|
+
timesteps = sigmas * self.config.num_train_timesteps
|
363
|
+
sigmas = torch.cat([sigmas, torch.ones(1, device=sigmas.device)])
|
364
|
+
else:
|
365
|
+
sigmas = torch.cat([sigmas, torch.zeros(1, device=sigmas.device)])
|
366
|
+
|
367
|
+
self.timesteps = timesteps
|
368
|
+
self.sigmas = sigmas
|
369
|
+
self._step_index = None
|
370
|
+
self._begin_index = None
|
371
|
+
|
372
|
+
def index_for_timestep(self, timestep, schedule_timesteps=None):
|
373
|
+
if schedule_timesteps is None:
|
374
|
+
schedule_timesteps = self.timesteps
|
375
|
+
|
376
|
+
indices = (schedule_timesteps == timestep).nonzero()
|
377
|
+
|
378
|
+
# The sigma index that is taken for the **very** first `step`
|
379
|
+
# is always the second index (or the last index if there is only 1)
|
380
|
+
# This way we can ensure we don't accidentally skip a sigma in
|
381
|
+
# case we start in the middle of the denoising schedule (e.g. for image-to-image)
|
382
|
+
pos = 1 if len(indices) > 1 else 0
|
383
|
+
|
384
|
+
return indices[pos].item()
|
385
|
+
|
386
|
+
def _init_step_index(self, timestep):
|
387
|
+
if self.begin_index is None:
|
388
|
+
if isinstance(timestep, torch.Tensor):
|
389
|
+
timestep = timestep.to(self.timesteps.device)
|
390
|
+
self._step_index = self.index_for_timestep(timestep)
|
391
|
+
else:
|
392
|
+
self._step_index = self._begin_index
|
393
|
+
|
394
|
+
def step(
|
395
|
+
self,
|
396
|
+
model_output: torch.FloatTensor,
|
397
|
+
timestep: Union[float, torch.FloatTensor],
|
398
|
+
sample: torch.FloatTensor,
|
399
|
+
generator: Optional[torch.Generator] = None,
|
400
|
+
return_dict: bool = True,
|
401
|
+
) -> Union[FlowMatchLCMSchedulerOutput, Tuple]:
|
402
|
+
"""
|
403
|
+
Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion
|
404
|
+
process from the learned model outputs (most often the predicted noise).
|
405
|
+
|
406
|
+
Args:
|
407
|
+
model_output (`torch.FloatTensor`):
|
408
|
+
The direct output from learned diffusion model.
|
409
|
+
timestep (`float`):
|
410
|
+
The current discrete timestep in the diffusion chain.
|
411
|
+
sample (`torch.FloatTensor`):
|
412
|
+
A current instance of a sample created by the diffusion process.
|
413
|
+
generator (`torch.Generator`, *optional*):
|
414
|
+
A random number generator.
|
415
|
+
return_dict (`bool`):
|
416
|
+
Whether or not to return a [`~schedulers.scheduling_flow_match_lcm.FlowMatchLCMSchedulerOutput`] or
|
417
|
+
tuple.
|
418
|
+
|
419
|
+
Returns:
|
420
|
+
[`~schedulers.scheduling_flow_match_lcm.FlowMatchLCMSchedulerOutput`] or `tuple`:
|
421
|
+
If return_dict is `True`, [`~schedulers.scheduling_flow_match_lcm.FlowMatchLCMSchedulerOutput`] is
|
422
|
+
returned, otherwise a tuple is returned where the first element is the sample tensor.
|
423
|
+
"""
|
424
|
+
|
425
|
+
if (
|
426
|
+
isinstance(timestep, int)
|
427
|
+
or isinstance(timestep, torch.IntTensor)
|
428
|
+
or isinstance(timestep, torch.LongTensor)
|
429
|
+
):
|
430
|
+
raise ValueError(
|
431
|
+
(
|
432
|
+
"Passing integer indices (e.g. from `enumerate(timesteps)`) as timesteps to"
|
433
|
+
" `FlowMatchLCMScheduler.step()` is not supported. Make sure to pass"
|
434
|
+
" one of the `scheduler.timesteps` as a timestep."
|
435
|
+
),
|
436
|
+
)
|
437
|
+
|
438
|
+
if self._scale_factors and self._upscale_mode and len(self.timesteps) != len(self._scale_factors) + 1:
|
439
|
+
raise ValueError(
|
440
|
+
"`_scale_factors` should have the same length as `timesteps` - 1, if `_scale_factors` are set."
|
441
|
+
)
|
442
|
+
|
443
|
+
if self._init_size is None or self.step_index is None:
|
444
|
+
self._init_size = model_output.size()[2:]
|
445
|
+
|
446
|
+
if self.step_index is None:
|
447
|
+
self._init_step_index(timestep)
|
448
|
+
|
449
|
+
# Upcast to avoid precision issues when computing prev_sample
|
450
|
+
sample = sample.to(torch.float32)
|
451
|
+
|
452
|
+
sigma = self.sigmas[self.step_index]
|
453
|
+
sigma_next = self.sigmas[self.step_index + 1]
|
454
|
+
x0_pred = sample - sigma * model_output
|
455
|
+
|
456
|
+
if self._scale_factors and self._upscale_mode:
|
457
|
+
if self._step_index < len(self._scale_factors):
|
458
|
+
size = [round(self._scale_factors[self._step_index] * size) for size in self._init_size]
|
459
|
+
x0_pred = torch.nn.functional.interpolate(x0_pred, size=size, mode=self._upscale_mode)
|
460
|
+
|
461
|
+
noise = randn_tensor(x0_pred.shape, generator=generator, device=x0_pred.device, dtype=x0_pred.dtype)
|
462
|
+
prev_sample = (1 - sigma_next) * x0_pred + sigma_next * noise
|
463
|
+
|
464
|
+
# upon completion increase step index by one
|
465
|
+
self._step_index += 1
|
466
|
+
# Cast sample back to model compatible dtype
|
467
|
+
prev_sample = prev_sample.to(model_output.dtype)
|
468
|
+
|
469
|
+
if not return_dict:
|
470
|
+
return (prev_sample,)
|
471
|
+
|
472
|
+
return FlowMatchLCMSchedulerOutput(prev_sample=prev_sample)
|
473
|
+
|
474
|
+
# Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_karras
|
475
|
+
def _convert_to_karras(self, in_sigmas: torch.Tensor, num_inference_steps) -> torch.Tensor:
|
476
|
+
"""Constructs the noise schedule of Karras et al. (2022)."""
|
477
|
+
|
478
|
+
# Hack to make sure that other schedulers which copy this function don't break
|
479
|
+
# TODO: Add this logic to the other schedulers
|
480
|
+
if hasattr(self.config, "sigma_min"):
|
481
|
+
sigma_min = self.config.sigma_min
|
482
|
+
else:
|
483
|
+
sigma_min = None
|
484
|
+
|
485
|
+
if hasattr(self.config, "sigma_max"):
|
486
|
+
sigma_max = self.config.sigma_max
|
487
|
+
else:
|
488
|
+
sigma_max = None
|
489
|
+
|
490
|
+
sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item()
|
491
|
+
sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item()
|
492
|
+
|
493
|
+
rho = 7.0 # 7.0 is the value used in the paper
|
494
|
+
ramp = np.linspace(0, 1, num_inference_steps)
|
495
|
+
min_inv_rho = sigma_min ** (1 / rho)
|
496
|
+
max_inv_rho = sigma_max ** (1 / rho)
|
497
|
+
sigmas = (max_inv_rho + ramp * (min_inv_rho - max_inv_rho)) ** rho
|
498
|
+
return sigmas
|
499
|
+
|
500
|
+
# Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_exponential
|
501
|
+
def _convert_to_exponential(self, in_sigmas: torch.Tensor, num_inference_steps: int) -> torch.Tensor:
|
502
|
+
"""Constructs an exponential noise schedule."""
|
503
|
+
|
504
|
+
# Hack to make sure that other schedulers which copy this function don't break
|
505
|
+
# TODO: Add this logic to the other schedulers
|
506
|
+
if hasattr(self.config, "sigma_min"):
|
507
|
+
sigma_min = self.config.sigma_min
|
508
|
+
else:
|
509
|
+
sigma_min = None
|
510
|
+
|
511
|
+
if hasattr(self.config, "sigma_max"):
|
512
|
+
sigma_max = self.config.sigma_max
|
513
|
+
else:
|
514
|
+
sigma_max = None
|
515
|
+
|
516
|
+
sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item()
|
517
|
+
sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item()
|
518
|
+
|
519
|
+
sigmas = np.exp(np.linspace(math.log(sigma_max), math.log(sigma_min), num_inference_steps))
|
520
|
+
return sigmas
|
521
|
+
|
522
|
+
# Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_beta
|
523
|
+
def _convert_to_beta(
|
524
|
+
self, in_sigmas: torch.Tensor, num_inference_steps: int, alpha: float = 0.6, beta: float = 0.6
|
525
|
+
) -> torch.Tensor:
|
526
|
+
"""From "Beta Sampling is All You Need" [arXiv:2407.12173] (Lee et. al, 2024)"""
|
527
|
+
|
528
|
+
# Hack to make sure that other schedulers which copy this function don't break
|
529
|
+
# TODO: Add this logic to the other schedulers
|
530
|
+
if hasattr(self.config, "sigma_min"):
|
531
|
+
sigma_min = self.config.sigma_min
|
532
|
+
else:
|
533
|
+
sigma_min = None
|
534
|
+
|
535
|
+
if hasattr(self.config, "sigma_max"):
|
536
|
+
sigma_max = self.config.sigma_max
|
537
|
+
else:
|
538
|
+
sigma_max = None
|
539
|
+
|
540
|
+
sigma_min = sigma_min if sigma_min is not None else in_sigmas[-1].item()
|
541
|
+
sigma_max = sigma_max if sigma_max is not None else in_sigmas[0].item()
|
542
|
+
|
543
|
+
sigmas = np.array(
|
544
|
+
[
|
545
|
+
sigma_min + (ppf * (sigma_max - sigma_min))
|
546
|
+
for ppf in [
|
547
|
+
scipy.stats.beta.ppf(timestep, alpha, beta)
|
548
|
+
for timestep in 1 - np.linspace(0, 1, num_inference_steps)
|
549
|
+
]
|
550
|
+
]
|
551
|
+
)
|
552
|
+
return sigmas
|
553
|
+
|
554
|
+
def _time_shift_exponential(self, mu, sigma, t):
|
555
|
+
return math.exp(mu) / (math.exp(mu) + (1 / t - 1) ** sigma)
|
556
|
+
|
557
|
+
def _time_shift_linear(self, mu, sigma, t):
|
558
|
+
return mu / (mu + (1 / t - 1) ** sigma)
|
559
|
+
|
560
|
+
def __len__(self):
|
561
|
+
return self.config.num_train_timesteps
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 Katherine Crowson, The HuggingFace Team and hlky. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -301,7 +301,7 @@ class HeunDiscreteScheduler(SchedulerMixin, ConfigMixin):
|
|
301
301
|
if timesteps is not None:
|
302
302
|
timesteps = np.array(timesteps, dtype=np.float32)
|
303
303
|
else:
|
304
|
-
# "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://
|
304
|
+
# "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://huggingface.co/papers/2305.08891
|
305
305
|
if self.config.timestep_spacing == "linspace":
|
306
306
|
timesteps = np.linspace(0, num_train_timesteps - 1, num_inference_steps, dtype=np.float32)[::-1].copy()
|
307
307
|
elif self.config.timestep_spacing == "leading":
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 Zhejiang University Team and The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -49,7 +49,7 @@ class IPNDMScheduler(SchedulerMixin, ConfigMixin):
|
|
49
49
|
self.init_noise_sigma = 1.0
|
50
50
|
|
51
51
|
# For now we only support F-PNDM, i.e. the runge-kutta method
|
52
|
-
# For more information on the algorithm please take a look at the paper: https://
|
52
|
+
# For more information on the algorithm please take a look at the paper: https://huggingface.co/papers/2202.09778
|
53
53
|
# mainly at formula (9), (12), (13) and the Algorithm 2.
|
54
54
|
self.pndm_order = 4
|
55
55
|
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 Katherine Crowson, The HuggingFace Team and hlky. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -260,7 +260,7 @@ class KDPM2AncestralDiscreteScheduler(SchedulerMixin, ConfigMixin):
|
|
260
260
|
|
261
261
|
num_train_timesteps = num_train_timesteps or self.config.num_train_timesteps
|
262
262
|
|
263
|
-
# "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://
|
263
|
+
# "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://huggingface.co/papers/2305.08891
|
264
264
|
if self.config.timestep_spacing == "linspace":
|
265
265
|
timesteps = np.linspace(0, num_train_timesteps - 1, num_inference_steps, dtype=np.float32)[::-1].copy()
|
266
266
|
elif self.config.timestep_spacing == "leading":
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 Katherine Crowson, The HuggingFace Team and hlky. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -260,7 +260,7 @@ class KDPM2DiscreteScheduler(SchedulerMixin, ConfigMixin):
|
|
260
260
|
|
261
261
|
num_train_timesteps = num_train_timesteps or self.config.num_train_timesteps
|
262
262
|
|
263
|
-
# "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://
|
263
|
+
# "linspace", "leading", "trailing" corresponds to annotation of Table 2. of https://huggingface.co/papers/2305.08891
|
264
264
|
if self.config.timestep_spacing == "linspace":
|
265
265
|
timesteps = np.linspace(0, num_train_timesteps - 1, num_inference_steps, dtype=np.float32)[::-1].copy()
|
266
266
|
elif self.config.timestep_spacing == "leading":
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 NVIDIA and The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -63,8 +63,8 @@ class FlaxKarrasVeScheduler(FlaxSchedulerMixin, ConfigMixin):
|
|
63
63
|
the VE column of Table 1 from [1] for reference.
|
64
64
|
|
65
65
|
[1] Karras, Tero, et al. "Elucidating the Design Space of Diffusion-Based Generative Models."
|
66
|
-
https://
|
67
|
-
differential equations." https://
|
66
|
+
https://huggingface.co/papers/2206.00364 [2] Song, Yang, et al. "Score-based generative modeling through stochastic
|
67
|
+
differential equations." https://huggingface.co/papers/2011.13456
|
68
68
|
|
69
69
|
[`~ConfigMixin`] takes care of storing all config attributes that are passed in the scheduler's `__init__`
|
70
70
|
function, such as `num_train_timesteps`. They can be accessed via `scheduler.config.num_train_timesteps`.
|
@@ -72,8 +72,8 @@ class FlaxKarrasVeScheduler(FlaxSchedulerMixin, ConfigMixin):
|
|
72
72
|
[`~SchedulerMixin.from_pretrained`] functions.
|
73
73
|
|
74
74
|
For more details on the parameters, see the original paper's Appendix E.: "Elucidating the Design Space of
|
75
|
-
Diffusion-Based Generative Models." https://
|
76
|
-
optimal {s_noise, s_churn, s_min, s_max} for a specific model are described in Table 5 of the paper.
|
75
|
+
Diffusion-Based Generative Models." https://huggingface.co/papers/2206.00364. The grid search values used to find
|
76
|
+
the optimal {s_noise, s_churn, s_min, s_max} for a specific model are described in Table 5 of the paper.
|
77
77
|
|
78
78
|
Args:
|
79
79
|
sigma_min (`float`): minimum noise magnitude
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 Stanford University Team and The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -97,7 +97,7 @@ def betas_for_alpha_bar(
|
|
97
97
|
# Copied from diffusers.schedulers.scheduling_ddim.rescale_zero_terminal_snr
|
98
98
|
def rescale_zero_terminal_snr(betas: torch.Tensor) -> torch.Tensor:
|
99
99
|
"""
|
100
|
-
Rescales betas to have zero terminal SNR Based on https://
|
100
|
+
Rescales betas to have zero terminal SNR Based on https://huggingface.co/papers/2305.08891 (Algorithm 1)
|
101
101
|
|
102
102
|
|
103
103
|
Args:
|
@@ -321,7 +321,7 @@ class LCMScheduler(SchedulerMixin, ConfigMixin):
|
|
321
321
|
pixels from saturation at each step. We find that dynamic thresholding results in significantly better
|
322
322
|
photorealism as well as better image-text alignment, especially when using very large guidance weights."
|
323
323
|
|
324
|
-
https://
|
324
|
+
https://huggingface.co/papers/2205.11487
|
325
325
|
"""
|
326
326
|
dtype = sample.dtype
|
327
327
|
batch_size, channels, *remaining_dims = sample.shape
|