diffusers 0.33.1__py3-none-any.whl → 0.34.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diffusers/__init__.py +48 -1
- diffusers/commands/__init__.py +1 -1
- diffusers/commands/diffusers_cli.py +1 -1
- diffusers/commands/env.py +1 -1
- diffusers/commands/fp16_safetensors.py +1 -1
- diffusers/dependency_versions_check.py +1 -1
- diffusers/dependency_versions_table.py +1 -1
- diffusers/experimental/rl/value_guided_sampling.py +1 -1
- diffusers/hooks/faster_cache.py +2 -2
- diffusers/hooks/group_offloading.py +128 -29
- diffusers/hooks/hooks.py +2 -2
- diffusers/hooks/layerwise_casting.py +3 -3
- diffusers/hooks/pyramid_attention_broadcast.py +1 -1
- diffusers/image_processor.py +7 -2
- diffusers/loaders/__init__.py +4 -0
- diffusers/loaders/ip_adapter.py +5 -14
- diffusers/loaders/lora_base.py +212 -111
- diffusers/loaders/lora_conversion_utils.py +275 -34
- diffusers/loaders/lora_pipeline.py +1554 -819
- diffusers/loaders/peft.py +52 -109
- diffusers/loaders/single_file.py +2 -2
- diffusers/loaders/single_file_model.py +20 -4
- diffusers/loaders/single_file_utils.py +225 -5
- diffusers/loaders/textual_inversion.py +3 -2
- diffusers/loaders/transformer_flux.py +1 -1
- diffusers/loaders/transformer_sd3.py +2 -2
- diffusers/loaders/unet.py +2 -16
- diffusers/loaders/unet_loader_utils.py +1 -1
- diffusers/loaders/utils.py +1 -1
- diffusers/models/__init__.py +15 -1
- diffusers/models/activations.py +5 -5
- diffusers/models/adapter.py +2 -3
- diffusers/models/attention.py +4 -4
- diffusers/models/attention_flax.py +10 -10
- diffusers/models/attention_processor.py +14 -10
- diffusers/models/auto_model.py +47 -10
- diffusers/models/autoencoders/__init__.py +1 -0
- diffusers/models/autoencoders/autoencoder_asym_kl.py +4 -4
- diffusers/models/autoencoders/autoencoder_dc.py +3 -3
- diffusers/models/autoencoders/autoencoder_kl.py +4 -4
- diffusers/models/autoencoders/autoencoder_kl_allegro.py +4 -4
- diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +6 -6
- diffusers/models/autoencoders/autoencoder_kl_cosmos.py +1108 -0
- diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py +2 -2
- diffusers/models/autoencoders/autoencoder_kl_ltx.py +3 -3
- diffusers/models/autoencoders/autoencoder_kl_magvit.py +4 -4
- diffusers/models/autoencoders/autoencoder_kl_mochi.py +3 -3
- diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +4 -4
- diffusers/models/autoencoders/autoencoder_kl_wan.py +256 -22
- diffusers/models/autoencoders/autoencoder_oobleck.py +1 -1
- diffusers/models/autoencoders/autoencoder_tiny.py +3 -3
- diffusers/models/autoencoders/consistency_decoder_vae.py +1 -1
- diffusers/models/autoencoders/vae.py +13 -2
- diffusers/models/autoencoders/vq_model.py +2 -2
- diffusers/models/cache_utils.py +1 -1
- diffusers/models/controlnet.py +1 -1
- diffusers/models/controlnet_flux.py +1 -1
- diffusers/models/controlnet_sd3.py +1 -1
- diffusers/models/controlnet_sparsectrl.py +1 -1
- diffusers/models/controlnets/__init__.py +1 -0
- diffusers/models/controlnets/controlnet.py +3 -3
- diffusers/models/controlnets/controlnet_flax.py +1 -1
- diffusers/models/controlnets/controlnet_flux.py +16 -15
- diffusers/models/controlnets/controlnet_hunyuan.py +2 -2
- diffusers/models/controlnets/controlnet_sana.py +290 -0
- diffusers/models/controlnets/controlnet_sd3.py +1 -1
- diffusers/models/controlnets/controlnet_sparsectrl.py +2 -2
- diffusers/models/controlnets/controlnet_union.py +1 -1
- diffusers/models/controlnets/controlnet_xs.py +7 -7
- diffusers/models/controlnets/multicontrolnet.py +4 -5
- diffusers/models/controlnets/multicontrolnet_union.py +5 -6
- diffusers/models/downsampling.py +2 -2
- diffusers/models/embeddings.py +10 -12
- diffusers/models/embeddings_flax.py +2 -2
- diffusers/models/lora.py +3 -3
- diffusers/models/modeling_utils.py +44 -14
- diffusers/models/normalization.py +4 -4
- diffusers/models/resnet.py +2 -2
- diffusers/models/resnet_flax.py +1 -1
- diffusers/models/transformers/__init__.py +5 -0
- diffusers/models/transformers/auraflow_transformer_2d.py +70 -24
- diffusers/models/transformers/cogvideox_transformer_3d.py +1 -1
- diffusers/models/transformers/consisid_transformer_3d.py +1 -1
- diffusers/models/transformers/dit_transformer_2d.py +2 -2
- diffusers/models/transformers/dual_transformer_2d.py +1 -1
- diffusers/models/transformers/hunyuan_transformer_2d.py +2 -2
- diffusers/models/transformers/latte_transformer_3d.py +4 -5
- diffusers/models/transformers/lumina_nextdit2d.py +2 -2
- diffusers/models/transformers/pixart_transformer_2d.py +3 -3
- diffusers/models/transformers/prior_transformer.py +1 -1
- diffusers/models/transformers/sana_transformer.py +8 -3
- diffusers/models/transformers/stable_audio_transformer.py +5 -9
- diffusers/models/transformers/t5_film_transformer.py +3 -3
- diffusers/models/transformers/transformer_2d.py +1 -1
- diffusers/models/transformers/transformer_allegro.py +1 -1
- diffusers/models/transformers/transformer_chroma.py +742 -0
- diffusers/models/transformers/transformer_cogview3plus.py +5 -10
- diffusers/models/transformers/transformer_cogview4.py +317 -25
- diffusers/models/transformers/transformer_cosmos.py +579 -0
- diffusers/models/transformers/transformer_flux.py +9 -11
- diffusers/models/transformers/transformer_hidream_image.py +942 -0
- diffusers/models/transformers/transformer_hunyuan_video.py +6 -8
- diffusers/models/transformers/transformer_hunyuan_video_framepack.py +416 -0
- diffusers/models/transformers/transformer_ltx.py +2 -2
- diffusers/models/transformers/transformer_lumina2.py +1 -1
- diffusers/models/transformers/transformer_mochi.py +1 -1
- diffusers/models/transformers/transformer_omnigen.py +2 -2
- diffusers/models/transformers/transformer_sd3.py +7 -7
- diffusers/models/transformers/transformer_temporal.py +1 -1
- diffusers/models/transformers/transformer_wan.py +24 -8
- diffusers/models/transformers/transformer_wan_vace.py +393 -0
- diffusers/models/unets/unet_1d.py +1 -1
- diffusers/models/unets/unet_1d_blocks.py +1 -1
- diffusers/models/unets/unet_2d.py +1 -1
- diffusers/models/unets/unet_2d_blocks.py +1 -1
- diffusers/models/unets/unet_2d_blocks_flax.py +8 -7
- diffusers/models/unets/unet_2d_condition.py +2 -2
- diffusers/models/unets/unet_2d_condition_flax.py +2 -2
- diffusers/models/unets/unet_3d_blocks.py +1 -1
- diffusers/models/unets/unet_3d_condition.py +3 -3
- diffusers/models/unets/unet_i2vgen_xl.py +3 -3
- diffusers/models/unets/unet_kandinsky3.py +1 -1
- diffusers/models/unets/unet_motion_model.py +2 -2
- diffusers/models/unets/unet_stable_cascade.py +1 -1
- diffusers/models/upsampling.py +2 -2
- diffusers/models/vae_flax.py +2 -2
- diffusers/models/vq_model.py +1 -1
- diffusers/pipelines/__init__.py +37 -6
- diffusers/pipelines/allegro/pipeline_allegro.py +11 -11
- diffusers/pipelines/amused/pipeline_amused.py +7 -6
- diffusers/pipelines/amused/pipeline_amused_img2img.py +6 -5
- diffusers/pipelines/amused/pipeline_amused_inpaint.py +6 -5
- diffusers/pipelines/animatediff/pipeline_animatediff.py +6 -6
- diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +6 -6
- diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +16 -15
- diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +6 -6
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +5 -5
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +5 -5
- diffusers/pipelines/audioldm/pipeline_audioldm.py +8 -7
- diffusers/pipelines/audioldm2/modeling_audioldm2.py +1 -1
- diffusers/pipelines/audioldm2/pipeline_audioldm2.py +23 -13
- diffusers/pipelines/aura_flow/pipeline_aura_flow.py +48 -11
- diffusers/pipelines/auto_pipeline.py +6 -7
- diffusers/pipelines/blip_diffusion/modeling_blip2.py +1 -1
- diffusers/pipelines/blip_diffusion/modeling_ctx_clip.py +2 -2
- diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +11 -10
- diffusers/pipelines/chroma/__init__.py +49 -0
- diffusers/pipelines/chroma/pipeline_chroma.py +949 -0
- diffusers/pipelines/chroma/pipeline_chroma_img2img.py +1034 -0
- diffusers/pipelines/chroma/pipeline_output.py +21 -0
- diffusers/pipelines/cogvideo/pipeline_cogvideox.py +8 -8
- diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +8 -8
- diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +8 -8
- diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +8 -8
- diffusers/pipelines/cogview3/pipeline_cogview3plus.py +9 -9
- diffusers/pipelines/cogview4/pipeline_cogview4.py +7 -7
- diffusers/pipelines/cogview4/pipeline_cogview4_control.py +7 -7
- diffusers/pipelines/consisid/consisid_utils.py +2 -2
- diffusers/pipelines/consisid/pipeline_consisid.py +8 -8
- diffusers/pipelines/consistency_models/pipeline_consistency_models.py +1 -1
- diffusers/pipelines/controlnet/pipeline_controlnet.py +7 -7
- diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +8 -8
- diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +7 -7
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +7 -7
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +14 -14
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +10 -6
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +13 -13
- diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +14 -14
- diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +5 -5
- diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +13 -13
- diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +1 -1
- diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +8 -8
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +7 -7
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +7 -7
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +12 -10
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +9 -7
- diffusers/pipelines/cosmos/__init__.py +54 -0
- diffusers/pipelines/cosmos/pipeline_cosmos2_text2image.py +673 -0
- diffusers/pipelines/cosmos/pipeline_cosmos2_video2world.py +792 -0
- diffusers/pipelines/cosmos/pipeline_cosmos_text2world.py +664 -0
- diffusers/pipelines/cosmos/pipeline_cosmos_video2world.py +826 -0
- diffusers/pipelines/cosmos/pipeline_output.py +40 -0
- diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py +5 -4
- diffusers/pipelines/ddim/pipeline_ddim.py +4 -4
- diffusers/pipelines/ddpm/pipeline_ddpm.py +1 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +10 -10
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +8 -8
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +5 -5
- diffusers/pipelines/deprecated/audio_diffusion/mel.py +1 -1
- diffusers/pipelines/deprecated/audio_diffusion/pipeline_audio_diffusion.py +3 -3
- diffusers/pipelines/deprecated/latent_diffusion_uncond/pipeline_latent_diffusion_uncond.py +1 -1
- diffusers/pipelines/deprecated/pndm/pipeline_pndm.py +2 -2
- diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +4 -3
- diffusers/pipelines/deprecated/score_sde_ve/pipeline_score_sde_ve.py +1 -1
- diffusers/pipelines/deprecated/spectrogram_diffusion/continuous_encoder.py +1 -1
- diffusers/pipelines/deprecated/spectrogram_diffusion/midi_utils.py +1 -1
- diffusers/pipelines/deprecated/spectrogram_diffusion/notes_encoder.py +1 -1
- diffusers/pipelines/deprecated/spectrogram_diffusion/pipeline_spectrogram_diffusion.py +1 -1
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +7 -7
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_onnx_stable_diffusion_inpaint_legacy.py +9 -9
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +10 -10
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +10 -8
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +5 -5
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +18 -18
- diffusers/pipelines/deprecated/stochastic_karras_ve/pipeline_stochastic_karras_ve.py +1 -1
- diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +2 -2
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +6 -6
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +5 -5
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +5 -5
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +5 -5
- diffusers/pipelines/deprecated/vq_diffusion/pipeline_vq_diffusion.py +1 -1
- diffusers/pipelines/dit/pipeline_dit.py +1 -1
- diffusers/pipelines/easyanimate/pipeline_easyanimate.py +4 -4
- diffusers/pipelines/easyanimate/pipeline_easyanimate_control.py +4 -4
- diffusers/pipelines/easyanimate/pipeline_easyanimate_inpaint.py +7 -6
- diffusers/pipelines/flux/modeling_flux.py +1 -1
- diffusers/pipelines/flux/pipeline_flux.py +10 -17
- diffusers/pipelines/flux/pipeline_flux_control.py +6 -6
- diffusers/pipelines/flux/pipeline_flux_control_img2img.py +6 -6
- diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +6 -6
- diffusers/pipelines/flux/pipeline_flux_controlnet.py +6 -6
- diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +30 -22
- diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +2 -1
- diffusers/pipelines/flux/pipeline_flux_fill.py +6 -6
- diffusers/pipelines/flux/pipeline_flux_img2img.py +39 -6
- diffusers/pipelines/flux/pipeline_flux_inpaint.py +11 -6
- diffusers/pipelines/flux/pipeline_flux_prior_redux.py +1 -1
- diffusers/pipelines/free_init_utils.py +2 -2
- diffusers/pipelines/free_noise_utils.py +3 -3
- diffusers/pipelines/hidream_image/__init__.py +47 -0
- diffusers/pipelines/hidream_image/pipeline_hidream_image.py +1026 -0
- diffusers/pipelines/hidream_image/pipeline_output.py +35 -0
- diffusers/pipelines/hunyuan_video/__init__.py +2 -0
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_skyreels_image2video.py +8 -8
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +8 -8
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_framepack.py +1114 -0
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_image2video.py +71 -15
- diffusers/pipelines/hunyuan_video/pipeline_output.py +19 -0
- diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +8 -8
- diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +10 -8
- diffusers/pipelines/kandinsky/pipeline_kandinsky.py +6 -6
- diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +34 -34
- diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +19 -26
- diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +7 -7
- diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +11 -11
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +6 -6
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +35 -35
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +6 -6
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +17 -39
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +17 -45
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +7 -7
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +10 -10
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +10 -10
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +7 -7
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +17 -38
- diffusers/pipelines/kolors/pipeline_kolors.py +10 -10
- diffusers/pipelines/kolors/pipeline_kolors_img2img.py +12 -12
- diffusers/pipelines/kolors/text_encoder.py +3 -3
- diffusers/pipelines/kolors/tokenizer.py +1 -1
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +2 -2
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +2 -2
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +1 -1
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion_superresolution.py +3 -3
- diffusers/pipelines/latte/pipeline_latte.py +12 -12
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +13 -13
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +17 -16
- diffusers/pipelines/ltx/__init__.py +4 -0
- diffusers/pipelines/ltx/modeling_latent_upsampler.py +188 -0
- diffusers/pipelines/ltx/pipeline_ltx.py +51 -6
- diffusers/pipelines/ltx/pipeline_ltx_condition.py +107 -29
- diffusers/pipelines/ltx/pipeline_ltx_image2video.py +50 -6
- diffusers/pipelines/ltx/pipeline_ltx_latent_upsample.py +277 -0
- diffusers/pipelines/lumina/pipeline_lumina.py +13 -13
- diffusers/pipelines/lumina2/pipeline_lumina2.py +10 -10
- diffusers/pipelines/marigold/marigold_image_processing.py +2 -2
- diffusers/pipelines/mochi/pipeline_mochi.py +6 -6
- diffusers/pipelines/musicldm/pipeline_musicldm.py +16 -13
- diffusers/pipelines/omnigen/pipeline_omnigen.py +13 -11
- diffusers/pipelines/omnigen/processor_omnigen.py +8 -3
- diffusers/pipelines/onnx_utils.py +15 -2
- diffusers/pipelines/pag/pag_utils.py +2 -2
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +12 -8
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +7 -7
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +10 -6
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +14 -14
- diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +8 -8
- diffusers/pipelines/pag/pipeline_pag_kolors.py +10 -10
- diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +11 -11
- diffusers/pipelines/pag/pipeline_pag_sana.py +18 -12
- diffusers/pipelines/pag/pipeline_pag_sd.py +8 -8
- diffusers/pipelines/pag/pipeline_pag_sd_3.py +7 -7
- diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py +7 -7
- diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +6 -6
- diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +5 -5
- diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py +8 -8
- diffusers/pipelines/pag/pipeline_pag_sd_xl.py +16 -15
- diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +18 -17
- diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +12 -12
- diffusers/pipelines/paint_by_example/image_encoder.py +1 -1
- diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +8 -7
- diffusers/pipelines/pia/pipeline_pia.py +8 -6
- diffusers/pipelines/pipeline_flax_utils.py +3 -4
- diffusers/pipelines/pipeline_loading_utils.py +89 -13
- diffusers/pipelines/pipeline_utils.py +105 -33
- diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +11 -11
- diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +11 -11
- diffusers/pipelines/sana/__init__.py +4 -0
- diffusers/pipelines/sana/pipeline_sana.py +23 -21
- diffusers/pipelines/sana/pipeline_sana_controlnet.py +1106 -0
- diffusers/pipelines/sana/pipeline_sana_sprint.py +23 -19
- diffusers/pipelines/sana/pipeline_sana_sprint_img2img.py +981 -0
- diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +7 -6
- diffusers/pipelines/shap_e/camera.py +1 -1
- diffusers/pipelines/shap_e/pipeline_shap_e.py +1 -1
- diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +1 -1
- diffusers/pipelines/shap_e/renderer.py +3 -3
- diffusers/pipelines/stable_audio/modeling_stable_audio.py +1 -1
- diffusers/pipelines/stable_audio/pipeline_stable_audio.py +5 -5
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +8 -8
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +13 -13
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +9 -9
- diffusers/pipelines/stable_diffusion/__init__.py +0 -7
- diffusers/pipelines/stable_diffusion/clip_image_project_model.py +1 -1
- diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +11 -4
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_img2img.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_inpaint.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +10 -10
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py +10 -10
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +10 -10
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +9 -9
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +8 -8
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +4 -4
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +7 -7
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +5 -5
- diffusers/pipelines/stable_diffusion/safety_checker.py +1 -1
- diffusers/pipelines/stable_diffusion/safety_checker_flax.py +1 -1
- diffusers/pipelines/stable_diffusion/stable_unclip_image_normalizer.py +1 -1
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +7 -7
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +7 -7
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +7 -7
- diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +12 -8
- diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +15 -9
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +11 -9
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +11 -9
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +18 -12
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +11 -8
- diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +11 -8
- diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +15 -12
- diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +8 -6
- diffusers/pipelines/stable_diffusion_safe/safety_checker.py +1 -1
- diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +15 -11
- diffusers/pipelines/stable_diffusion_xl/pipeline_flax_stable_diffusion_xl.py +1 -1
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +16 -15
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +18 -17
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +12 -12
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +16 -15
- diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +3 -3
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +12 -12
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +18 -17
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +12 -7
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +12 -7
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +15 -13
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +24 -21
- diffusers/pipelines/unclip/pipeline_unclip.py +4 -3
- diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +4 -3
- diffusers/pipelines/unclip/text_proj.py +2 -2
- diffusers/pipelines/unidiffuser/modeling_text_decoder.py +2 -2
- diffusers/pipelines/unidiffuser/modeling_uvit.py +1 -1
- diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +8 -7
- diffusers/pipelines/visualcloze/__init__.py +52 -0
- diffusers/pipelines/visualcloze/pipeline_visualcloze_combined.py +444 -0
- diffusers/pipelines/visualcloze/pipeline_visualcloze_generation.py +952 -0
- diffusers/pipelines/visualcloze/visualcloze_utils.py +251 -0
- diffusers/pipelines/wan/__init__.py +2 -0
- diffusers/pipelines/wan/pipeline_wan.py +13 -10
- diffusers/pipelines/wan/pipeline_wan_i2v.py +38 -18
- diffusers/pipelines/wan/pipeline_wan_vace.py +976 -0
- diffusers/pipelines/wan/pipeline_wan_video2video.py +14 -16
- diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py +1 -1
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_diffnext.py +1 -1
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +1 -1
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +8 -8
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_combined.py +16 -15
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +6 -6
- diffusers/quantizers/__init__.py +179 -1
- diffusers/quantizers/base.py +6 -1
- diffusers/quantizers/bitsandbytes/bnb_quantizer.py +4 -0
- diffusers/quantizers/bitsandbytes/utils.py +10 -7
- diffusers/quantizers/gguf/gguf_quantizer.py +13 -4
- diffusers/quantizers/gguf/utils.py +16 -13
- diffusers/quantizers/quantization_config.py +18 -16
- diffusers/quantizers/quanto/quanto_quantizer.py +4 -0
- diffusers/quantizers/torchao/torchao_quantizer.py +5 -1
- diffusers/schedulers/__init__.py +3 -1
- diffusers/schedulers/deprecated/scheduling_karras_ve.py +4 -3
- diffusers/schedulers/deprecated/scheduling_sde_vp.py +1 -1
- diffusers/schedulers/scheduling_consistency_models.py +1 -1
- diffusers/schedulers/scheduling_cosine_dpmsolver_multistep.py +10 -5
- diffusers/schedulers/scheduling_ddim.py +8 -8
- diffusers/schedulers/scheduling_ddim_cogvideox.py +5 -5
- diffusers/schedulers/scheduling_ddim_flax.py +6 -6
- diffusers/schedulers/scheduling_ddim_inverse.py +6 -6
- diffusers/schedulers/scheduling_ddim_parallel.py +22 -22
- diffusers/schedulers/scheduling_ddpm.py +9 -9
- diffusers/schedulers/scheduling_ddpm_flax.py +7 -7
- diffusers/schedulers/scheduling_ddpm_parallel.py +18 -18
- diffusers/schedulers/scheduling_ddpm_wuerstchen.py +2 -2
- diffusers/schedulers/scheduling_deis_multistep.py +8 -8
- diffusers/schedulers/scheduling_dpm_cogvideox.py +5 -5
- diffusers/schedulers/scheduling_dpmsolver_multistep.py +12 -12
- diffusers/schedulers/scheduling_dpmsolver_multistep_flax.py +22 -20
- diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +11 -11
- diffusers/schedulers/scheduling_dpmsolver_sde.py +2 -2
- diffusers/schedulers/scheduling_dpmsolver_singlestep.py +13 -13
- diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py +13 -8
- diffusers/schedulers/scheduling_edm_euler.py +20 -11
- diffusers/schedulers/scheduling_euler_ancestral_discrete.py +3 -3
- diffusers/schedulers/scheduling_euler_discrete.py +3 -3
- diffusers/schedulers/scheduling_euler_discrete_flax.py +3 -3
- diffusers/schedulers/scheduling_flow_match_euler_discrete.py +20 -5
- diffusers/schedulers/scheduling_flow_match_heun_discrete.py +1 -1
- diffusers/schedulers/scheduling_flow_match_lcm.py +561 -0
- diffusers/schedulers/scheduling_heun_discrete.py +2 -2
- diffusers/schedulers/scheduling_ipndm.py +2 -2
- diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +2 -2
- diffusers/schedulers/scheduling_k_dpm_2_discrete.py +2 -2
- diffusers/schedulers/scheduling_karras_ve_flax.py +5 -5
- diffusers/schedulers/scheduling_lcm.py +3 -3
- diffusers/schedulers/scheduling_lms_discrete.py +2 -2
- diffusers/schedulers/scheduling_lms_discrete_flax.py +1 -1
- diffusers/schedulers/scheduling_pndm.py +4 -4
- diffusers/schedulers/scheduling_pndm_flax.py +4 -4
- diffusers/schedulers/scheduling_repaint.py +9 -9
- diffusers/schedulers/scheduling_sasolver.py +15 -15
- diffusers/schedulers/scheduling_scm.py +1 -1
- diffusers/schedulers/scheduling_sde_ve.py +1 -1
- diffusers/schedulers/scheduling_sde_ve_flax.py +2 -2
- diffusers/schedulers/scheduling_tcd.py +3 -3
- diffusers/schedulers/scheduling_unclip.py +5 -5
- diffusers/schedulers/scheduling_unipc_multistep.py +11 -11
- diffusers/schedulers/scheduling_utils.py +1 -1
- diffusers/schedulers/scheduling_utils_flax.py +1 -1
- diffusers/schedulers/scheduling_vq_diffusion.py +1 -1
- diffusers/training_utils.py +13 -5
- diffusers/utils/__init__.py +5 -0
- diffusers/utils/accelerate_utils.py +1 -1
- diffusers/utils/doc_utils.py +1 -1
- diffusers/utils/dummy_pt_objects.py +120 -0
- diffusers/utils/dummy_torch_and_transformers_objects.py +225 -0
- diffusers/utils/dynamic_modules_utils.py +21 -3
- diffusers/utils/export_utils.py +1 -1
- diffusers/utils/import_utils.py +81 -18
- diffusers/utils/logging.py +1 -1
- diffusers/utils/outputs.py +2 -1
- diffusers/utils/peft_utils.py +91 -8
- diffusers/utils/state_dict_utils.py +20 -3
- diffusers/utils/testing_utils.py +59 -7
- diffusers/utils/torch_utils.py +25 -5
- diffusers/video_processor.py +2 -2
- {diffusers-0.33.1.dist-info → diffusers-0.34.0.dist-info}/METADATA +70 -55
- diffusers-0.34.0.dist-info/RECORD +639 -0
- {diffusers-0.33.1.dist-info → diffusers-0.34.0.dist-info}/WHEEL +1 -1
- diffusers-0.33.1.dist-info/RECORD +0 -608
- {diffusers-0.33.1.dist-info → diffusers-0.34.0.dist-info}/LICENSE +0 -0
- {diffusers-0.33.1.dist-info → diffusers-0.34.0.dist-info}/entry_points.txt +0 -0
- {diffusers-0.33.1.dist-info → diffusers-0.34.0.dist-info}/top_level.txt +0 -0
@@ -1,12 +1,12 @@
|
|
1
1
|
import inspect
|
2
2
|
from typing import Callable, Dict, List, Optional, Union
|
3
3
|
|
4
|
-
import numpy as np
|
5
4
|
import PIL
|
6
5
|
import PIL.Image
|
7
6
|
import torch
|
8
7
|
from transformers import T5EncoderModel, T5Tokenizer
|
9
8
|
|
9
|
+
from ...image_processor import VaeImageProcessor
|
10
10
|
from ...loaders import StableDiffusionLoraLoaderMixin
|
11
11
|
from ...models import Kandinsky3UNet, VQModel
|
12
12
|
from ...schedulers import DDPMScheduler
|
@@ -53,24 +53,6 @@ EXAMPLE_DOC_STRING = """
|
|
53
53
|
"""
|
54
54
|
|
55
55
|
|
56
|
-
def downscale_height_and_width(height, width, scale_factor=8):
|
57
|
-
new_height = height // scale_factor**2
|
58
|
-
if height % scale_factor**2 != 0:
|
59
|
-
new_height += 1
|
60
|
-
new_width = width // scale_factor**2
|
61
|
-
if width % scale_factor**2 != 0:
|
62
|
-
new_width += 1
|
63
|
-
return new_height * scale_factor, new_width * scale_factor
|
64
|
-
|
65
|
-
|
66
|
-
def prepare_image(pil_image):
|
67
|
-
arr = np.array(pil_image.convert("RGB"))
|
68
|
-
arr = arr.astype(np.float32) / 127.5 - 1
|
69
|
-
arr = np.transpose(arr, [2, 0, 1])
|
70
|
-
image = torch.from_numpy(arr).unsqueeze(0)
|
71
|
-
return image
|
72
|
-
|
73
|
-
|
74
56
|
class Kandinsky3Img2ImgPipeline(DiffusionPipeline, StableDiffusionLoraLoaderMixin):
|
75
57
|
model_cpu_offload_seq = "text_encoder->movq->unet->movq"
|
76
58
|
_callback_tensor_inputs = [
|
@@ -94,6 +76,14 @@ class Kandinsky3Img2ImgPipeline(DiffusionPipeline, StableDiffusionLoraLoaderMixi
|
|
94
76
|
self.register_modules(
|
95
77
|
tokenizer=tokenizer, text_encoder=text_encoder, unet=unet, scheduler=scheduler, movq=movq
|
96
78
|
)
|
79
|
+
movq_scale_factor = 2 ** (len(self.movq.config.block_out_channels) - 1) if getattr(self, "movq", None) else 8
|
80
|
+
movq_latent_channels = self.movq.config.latent_channels if getattr(self, "movq", None) else 4
|
81
|
+
self.image_processor = VaeImageProcessor(
|
82
|
+
vae_scale_factor=movq_scale_factor,
|
83
|
+
vae_latent_channels=movq_latent_channels,
|
84
|
+
resample="bicubic",
|
85
|
+
reducing_gap=1,
|
86
|
+
)
|
97
87
|
|
98
88
|
def get_timesteps(self, num_inference_steps, strength, device):
|
99
89
|
# get the original timestep using init_timestep
|
@@ -309,7 +299,7 @@ class Kandinsky3Img2ImgPipeline(DiffusionPipeline, StableDiffusionLoraLoaderMixi
|
|
309
299
|
def prepare_extra_step_kwargs(self, generator, eta):
|
310
300
|
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
311
301
|
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
312
|
-
# eta corresponds to η in DDIM paper: https://
|
302
|
+
# eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
|
313
303
|
# and should be between [0, 1]
|
314
304
|
|
315
305
|
accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
|
@@ -449,11 +439,11 @@ class Kandinsky3Img2ImgPipeline(DiffusionPipeline, StableDiffusionLoraLoaderMixi
|
|
449
439
|
The number of denoising steps. More denoising steps usually lead to a higher quality image at the
|
450
440
|
expense of slower inference.
|
451
441
|
guidance_scale (`float`, *optional*, defaults to 3.0):
|
452
|
-
Guidance scale as defined in [Classifier-Free Diffusion
|
453
|
-
`guidance_scale` is defined as `w` of equation 2.
|
454
|
-
Paper](https://
|
455
|
-
1`. Higher guidance scale encourages to generate images that are closely linked to
|
456
|
-
usually at the expense of lower image quality.
|
442
|
+
Guidance scale as defined in [Classifier-Free Diffusion
|
443
|
+
Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
|
444
|
+
of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
|
445
|
+
`guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
|
446
|
+
the text `prompt`, usually at the expense of lower image quality.
|
457
447
|
negative_prompt (`str` or `List[str]`, *optional*):
|
458
448
|
The prompt or prompts not to guide the image generation. If not defined, one has to pass
|
459
449
|
`negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
|
@@ -566,7 +556,7 @@ class Kandinsky3Img2ImgPipeline(DiffusionPipeline, StableDiffusionLoraLoaderMixi
|
|
566
556
|
f"Input is in incorrect format: {[type(i) for i in image]}. Currently, we only support PIL image and pytorch tensor"
|
567
557
|
)
|
568
558
|
|
569
|
-
image = torch.cat([
|
559
|
+
image = torch.cat([self.image_processor.preprocess(i) for i in image], dim=0)
|
570
560
|
image = image.to(dtype=prompt_embeds.dtype, device=device)
|
571
561
|
# 4. Prepare timesteps
|
572
562
|
self.scheduler.set_timesteps(num_inference_steps, device=device)
|
@@ -630,20 +620,9 @@ class Kandinsky3Img2ImgPipeline(DiffusionPipeline, StableDiffusionLoraLoaderMixi
|
|
630
620
|
xm.mark_step()
|
631
621
|
|
632
622
|
# post-processing
|
633
|
-
if output_type not in ["pt", "np", "pil", "latent"]:
|
634
|
-
raise ValueError(
|
635
|
-
f"Only the output types `pt`, `pil`, `np` and `latent` are supported not output_type={output_type}"
|
636
|
-
)
|
637
623
|
if not output_type == "latent":
|
638
624
|
image = self.movq.decode(latents, force_not_quantize=True)["sample"]
|
639
|
-
|
640
|
-
if output_type in ["np", "pil"]:
|
641
|
-
image = image * 0.5 + 0.5
|
642
|
-
image = image.clamp(0, 1)
|
643
|
-
image = image.cpu().permute(0, 2, 3, 1).float().numpy()
|
644
|
-
|
645
|
-
if output_type == "pil":
|
646
|
-
image = self.numpy_to_pil(image)
|
625
|
+
image = self.image_processor.postprocess(image, output_type)
|
647
626
|
else:
|
648
627
|
image = latents
|
649
628
|
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 Stability AI, Kwai-Kolors Team and The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -436,7 +436,7 @@ class KolorsPipeline(DiffusionPipeline, StableDiffusionMixin, StableDiffusionLor
|
|
436
436
|
def prepare_extra_step_kwargs(self, generator, eta):
|
437
437
|
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
438
438
|
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
439
|
-
# eta corresponds to η in DDIM paper: https://
|
439
|
+
# eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
|
440
440
|
# and should be between [0, 1]
|
441
441
|
|
442
442
|
accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
|
@@ -633,7 +633,7 @@ class KolorsPipeline(DiffusionPipeline, StableDiffusionMixin, StableDiffusionLor
|
|
633
633
|
return self._guidance_scale
|
634
634
|
|
635
635
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
636
|
-
# of the Imagen paper: https://
|
636
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
637
637
|
# corresponds to doing no classifier free guidance.
|
638
638
|
@property
|
639
639
|
def do_classifier_free_guidance(self):
|
@@ -729,11 +729,11 @@ class KolorsPipeline(DiffusionPipeline, StableDiffusionMixin, StableDiffusionLor
|
|
729
729
|
"Mixture of Denoisers" multi-pipeline setup, as elaborated in [**Refining the Image
|
730
730
|
Output**](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/stable_diffusion_xl#refining-the-image-output)
|
731
731
|
guidance_scale (`float`, *optional*, defaults to 5.0):
|
732
|
-
Guidance scale as defined in [Classifier-Free Diffusion
|
733
|
-
`guidance_scale` is defined as `w` of equation 2.
|
734
|
-
Paper](https://
|
735
|
-
1`. Higher guidance scale encourages to generate images that are closely linked to
|
736
|
-
usually at the expense of lower image quality.
|
732
|
+
Guidance scale as defined in [Classifier-Free Diffusion
|
733
|
+
Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
|
734
|
+
of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
|
735
|
+
`guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
|
736
|
+
the text `prompt`, usually at the expense of lower image quality.
|
737
737
|
negative_prompt (`str` or `List[str]`, *optional*):
|
738
738
|
The prompt or prompts not to guide the image generation. If not defined, one has to pass
|
739
739
|
`negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
|
@@ -741,8 +741,8 @@ class KolorsPipeline(DiffusionPipeline, StableDiffusionMixin, StableDiffusionLor
|
|
741
741
|
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
742
742
|
The number of images to generate per prompt.
|
743
743
|
eta (`float`, *optional*, defaults to 0.0):
|
744
|
-
Corresponds to parameter eta (η) in the DDIM paper: https://
|
745
|
-
[`schedulers.DDIMScheduler`], will be ignored for others.
|
744
|
+
Corresponds to parameter eta (η) in the DDIM paper: https://huggingface.co/papers/2010.02502. Only
|
745
|
+
applies to [`schedulers.DDIMScheduler`], will be ignored for others.
|
746
746
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
747
747
|
One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
|
748
748
|
to make generation deterministic.
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 Stability AI, Kwai-Kolors Team and The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -25,7 +25,7 @@ from ...models import AutoencoderKL, ImageProjection, UNet2DConditionModel
|
|
25
25
|
from ...models.attention_processor import AttnProcessor2_0, FusedAttnProcessor2_0, XFormersAttnProcessor
|
26
26
|
from ...schedulers import KarrasDiffusionSchedulers
|
27
27
|
from ...utils import is_torch_xla_available, logging, replace_example_docstring
|
28
|
-
from ...utils.torch_utils import randn_tensor
|
28
|
+
from ...utils.torch_utils import empty_device_cache, randn_tensor
|
29
29
|
from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
30
30
|
from .pipeline_output import KolorsPipelineOutput
|
31
31
|
from .text_encoder import ChatGLMModel
|
@@ -456,7 +456,7 @@ class KolorsImg2ImgPipeline(DiffusionPipeline, StableDiffusionMixin, StableDiffu
|
|
456
456
|
def prepare_extra_step_kwargs(self, generator, eta):
|
457
457
|
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
458
458
|
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
459
|
-
# eta corresponds to η in DDIM paper: https://
|
459
|
+
# eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
|
460
460
|
# and should be between [0, 1]
|
461
461
|
|
462
462
|
accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
|
@@ -618,7 +618,7 @@ class KolorsImg2ImgPipeline(DiffusionPipeline, StableDiffusionMixin, StableDiffu
|
|
618
618
|
# Offload text encoder if `enable_model_cpu_offload` was enabled
|
619
619
|
if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None:
|
620
620
|
self.text_encoder_2.to("cpu")
|
621
|
-
|
621
|
+
empty_device_cache()
|
622
622
|
|
623
623
|
image = image.to(device=device, dtype=dtype)
|
624
624
|
|
@@ -761,7 +761,7 @@ class KolorsImg2ImgPipeline(DiffusionPipeline, StableDiffusionMixin, StableDiffu
|
|
761
761
|
return self._guidance_scale
|
762
762
|
|
763
763
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
764
|
-
# of the Imagen paper: https://
|
764
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
765
765
|
# corresponds to doing no classifier free guidance.
|
766
766
|
@property
|
767
767
|
def do_classifier_free_guidance(self):
|
@@ -880,11 +880,11 @@ class KolorsImg2ImgPipeline(DiffusionPipeline, StableDiffusionMixin, StableDiffu
|
|
880
880
|
"Mixture of Denoisers" multi-pipeline setup, as elaborated in [**Refining the Image
|
881
881
|
Output**](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/stable_diffusion_xl#refining-the-image-output)
|
882
882
|
guidance_scale (`float`, *optional*, defaults to 5.0):
|
883
|
-
Guidance scale as defined in [Classifier-Free Diffusion
|
884
|
-
`guidance_scale` is defined as `w` of equation 2.
|
885
|
-
Paper](https://
|
886
|
-
1`. Higher guidance scale encourages to generate images that are closely linked to
|
887
|
-
usually at the expense of lower image quality.
|
883
|
+
Guidance scale as defined in [Classifier-Free Diffusion
|
884
|
+
Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
|
885
|
+
of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
|
886
|
+
`guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
|
887
|
+
the text `prompt`, usually at the expense of lower image quality.
|
888
888
|
negative_prompt (`str` or `List[str]`, *optional*):
|
889
889
|
The prompt or prompts not to guide the image generation. If not defined, one has to pass
|
890
890
|
`negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
|
@@ -892,8 +892,8 @@ class KolorsImg2ImgPipeline(DiffusionPipeline, StableDiffusionMixin, StableDiffu
|
|
892
892
|
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
893
893
|
The number of images to generate per prompt.
|
894
894
|
eta (`float`, *optional*, defaults to 0.0):
|
895
|
-
Corresponds to parameter eta (η) in the DDIM paper: https://
|
896
|
-
[`schedulers.DDIMScheduler`], will be ignored for others.
|
895
|
+
Corresponds to parameter eta (η) in the DDIM paper: https://huggingface.co/papers/2010.02502. Only
|
896
|
+
applies to [`schedulers.DDIMScheduler`], will be ignored for others.
|
897
897
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
898
898
|
One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
|
899
899
|
to make generation deterministic.
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 ChatGLM3-6B Model Team, Kwai-Kolors Team and The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -434,7 +434,7 @@ class MLP(torch.nn.Module):
|
|
434
434
|
|
435
435
|
self.add_bias = config.add_bias_linear
|
436
436
|
|
437
|
-
# Project to 4h. If using swiglu double the output width, see https://
|
437
|
+
# Project to 4h. If using swiglu double the output width, see https://huggingface.co/papers/2002.05202
|
438
438
|
self.dense_h_to_4h = nn.Linear(
|
439
439
|
config.hidden_size,
|
440
440
|
config.ffn_hidden_size * 2,
|
@@ -668,7 +668,7 @@ class Embedding(torch.nn.Module):
|
|
668
668
|
# Embeddings.
|
669
669
|
words_embeddings = self.word_embeddings(input_ids)
|
670
670
|
embeddings = words_embeddings
|
671
|
-
# Data format change to avoid explicit
|
671
|
+
# Data format change to avoid explicit transposes : [b s h] --> [s b h].
|
672
672
|
embeddings = embeddings.transpose(0, 1).contiguous()
|
673
673
|
# If the input flag for fp32 residual connection is set, convert for float.
|
674
674
|
if self.fp32_residual_connection:
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 ChatGLM3-6B Model Team, Kwai-Kolors Team and The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 Stanford University Team and The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -607,7 +607,7 @@ class LatentConsistencyModelImg2ImgPipeline(
|
|
607
607
|
def prepare_extra_step_kwargs(self, generator, eta):
|
608
608
|
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
609
609
|
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
610
|
-
# eta corresponds to η in DDIM paper: https://
|
610
|
+
# eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
|
611
611
|
# and should be between [0, 1]
|
612
612
|
|
613
613
|
accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 Stanford University Team and The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -548,7 +548,7 @@ class LatentConsistencyModelPipeline(
|
|
548
548
|
def prepare_extra_step_kwargs(self, generator, eta):
|
549
549
|
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
550
550
|
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
551
|
-
# eta corresponds to η in DDIM paper: https://
|
551
|
+
# eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
|
552
552
|
# and should be between [0, 1]
|
553
553
|
|
554
554
|
accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
|
@@ -95,8 +95,8 @@ class LDMSuperResolutionPipeline(DiffusionPipeline):
|
|
95
95
|
The number of denoising steps. More denoising steps usually lead to a higher quality image at the
|
96
96
|
expense of slower inference.
|
97
97
|
eta (`float`, *optional*, defaults to 0.0):
|
98
|
-
Corresponds to parameter eta (η) from the [DDIM](https://
|
99
|
-
to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
|
98
|
+
Corresponds to parameter eta (η) from the [DDIM](https://huggingface.co/papers/2010.02502) paper. Only
|
99
|
+
applies to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
|
100
100
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
101
101
|
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
|
102
102
|
generation deterministic.
|
@@ -166,7 +166,7 @@ class LDMSuperResolutionPipeline(DiffusionPipeline):
|
|
166
166
|
|
167
167
|
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature.
|
168
168
|
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
169
|
-
# eta corresponds to η in DDIM paper: https://
|
169
|
+
# eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
|
170
170
|
# and should be between [0, 1]
|
171
171
|
accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
|
172
172
|
extra_kwargs = {}
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 the Latte Team and The HuggingFace Team.
|
2
2
|
# All rights reserved.
|
3
3
|
#
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
@@ -356,7 +356,7 @@ class LattePipeline(DiffusionPipeline):
|
|
356
356
|
def prepare_extra_step_kwargs(self, generator, eta):
|
357
357
|
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
358
358
|
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
359
|
-
# eta corresponds to η in DDIM paper: https://
|
359
|
+
# eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
|
360
360
|
# and should be between [0, 1]
|
361
361
|
|
362
362
|
accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
|
@@ -501,7 +501,7 @@ class LattePipeline(DiffusionPipeline):
|
|
501
501
|
# &
|
502
502
|
caption = re.sub(r"&", "", caption)
|
503
503
|
|
504
|
-
# ip
|
504
|
+
# ip addresses:
|
505
505
|
caption = re.sub(r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}", " ", caption)
|
506
506
|
|
507
507
|
# article ids:
|
@@ -592,7 +592,7 @@ class LattePipeline(DiffusionPipeline):
|
|
592
592
|
return self._guidance_scale
|
593
593
|
|
594
594
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
595
|
-
# of the Imagen paper: https://
|
595
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
596
596
|
# corresponds to doing no classifier free guidance.
|
597
597
|
@property
|
598
598
|
def do_classifier_free_guidance(self):
|
@@ -657,11 +657,11 @@ class LattePipeline(DiffusionPipeline):
|
|
657
657
|
Custom timesteps to use for the denoising process. If not defined, equal spaced `num_inference_steps`
|
658
658
|
timesteps are used. Must be in descending order.
|
659
659
|
guidance_scale (`float`, *optional*, defaults to 7.0):
|
660
|
-
Guidance scale as defined in [Classifier-Free Diffusion
|
661
|
-
`guidance_scale` is defined as `w` of equation 2.
|
662
|
-
Paper](https://
|
663
|
-
1`. Higher guidance scale encourages to generate videos that are closely linked to
|
664
|
-
usually at the expense of lower video quality.
|
660
|
+
Guidance scale as defined in [Classifier-Free Diffusion
|
661
|
+
Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
|
662
|
+
of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
|
663
|
+
`guidance_scale > 1`. Higher guidance scale encourages to generate videos that are closely linked to
|
664
|
+
the text `prompt`, usually at the expense of lower video quality.
|
665
665
|
video_length (`int`, *optional*, defaults to 16):
|
666
666
|
The number of video frames that are generated. Defaults to 16 frames which at 8 frames per seconds
|
667
667
|
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
@@ -671,8 +671,8 @@ class LattePipeline(DiffusionPipeline):
|
|
671
671
|
width (`int`, *optional*, defaults to self.unet.config.sample_size):
|
672
672
|
The width in pixels of the generated video.
|
673
673
|
eta (`float`, *optional*, defaults to 0.0):
|
674
|
-
Corresponds to parameter eta (η) in the DDIM paper: https://
|
675
|
-
[`schedulers.DDIMScheduler`], will be ignored for others.
|
674
|
+
Corresponds to parameter eta (η) in the DDIM paper: https://huggingface.co/papers/2010.02502. Only
|
675
|
+
applies to [`schedulers.DDIMScheduler`], will be ignored for others.
|
676
676
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
677
677
|
One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
|
678
678
|
to make generation deterministic.
|
@@ -747,7 +747,7 @@ class LattePipeline(DiffusionPipeline):
|
|
747
747
|
device = self._execution_device
|
748
748
|
|
749
749
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
750
|
-
# of the Imagen paper: https://
|
750
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
751
751
|
# corresponds to doing no classifier free guidance.
|
752
752
|
do_classifier_free_guidance = guidance_scale > 1.0
|
753
753
|
|
@@ -244,7 +244,7 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
|
|
244
244
|
r"""
|
245
245
|
Rescales `noise_cfg` tensor based on `guidance_rescale` to improve image quality and fix overexposure. Based on
|
246
246
|
Section 3.4 from [Common Diffusion Noise Schedules and Sample Steps are
|
247
|
-
Flawed](https://
|
247
|
+
Flawed](https://huggingface.co/papers/2305.08891).
|
248
248
|
|
249
249
|
Args:
|
250
250
|
noise_cfg (`torch.Tensor`):
|
@@ -439,7 +439,7 @@ class LEditsPPPipelineStableDiffusion(
|
|
439
439
|
def prepare_extra_step_kwargs(self, eta, generator=None):
|
440
440
|
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
441
441
|
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
442
|
-
# eta corresponds to η in DDIM paper: https://
|
442
|
+
# eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
|
443
443
|
# and should be between [0, 1]
|
444
444
|
|
445
445
|
accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
|
@@ -808,7 +808,7 @@ class LEditsPPPipelineStableDiffusion(
|
|
808
808
|
edit_guidance_scale (`float` or `List[float]`, *optional*, defaults to 5):
|
809
809
|
Guidance scale for guiding the image generation. If provided as list values should correspond to
|
810
810
|
`editing_prompt`. `edit_guidance_scale` is defined as `s_e` of equation 12 of [LEDITS++
|
811
|
-
Paper](https://
|
811
|
+
Paper](https://huggingface.co/papers/2301.12247).
|
812
812
|
edit_warmup_steps (`float` or `List[float]`, *optional*, defaults to 10):
|
813
813
|
Number of diffusion steps (for each prompt) for which guidance will not be applied.
|
814
814
|
edit_cooldown_steps (`float` or `List[float]`, *optional*, defaults to `None`):
|
@@ -816,7 +816,7 @@ class LEditsPPPipelineStableDiffusion(
|
|
816
816
|
edit_threshold (`float` or `List[float]`, *optional*, defaults to 0.9):
|
817
817
|
Masking threshold of guidance. Threshold should be proportional to the image region that is modified.
|
818
818
|
'edit_threshold' is defined as 'λ' of equation 12 of [LEDITS++
|
819
|
-
Paper](https://
|
819
|
+
Paper](https://huggingface.co/papers/2301.12247).
|
820
820
|
user_mask (`torch.Tensor`, *optional*):
|
821
821
|
User-provided mask for even better control over the editing process. This is helpful when LEDITS++'s
|
822
822
|
implicit masks do not meet user preferences.
|
@@ -826,11 +826,11 @@ class LEditsPPPipelineStableDiffusion(
|
|
826
826
|
use_cross_attn_mask (`bool`, defaults to `False`):
|
827
827
|
Whether cross-attention masks are used. Cross-attention masks are always used when use_intersect_mask
|
828
828
|
is set to true. Cross-attention masks are defined as 'M^1' of equation 12 of [LEDITS++
|
829
|
-
paper](https://
|
829
|
+
paper](https://huggingface.co/papers/2311.16711).
|
830
830
|
use_intersect_mask (`bool`, defaults to `True`):
|
831
831
|
Whether the masking term is calculated as intersection of cross-attention masks and masks derived from
|
832
832
|
the noise estimate. Cross-attention mask are defined as 'M^1' and masks derived from the noise estimate
|
833
|
-
are defined as 'M^2' of equation 12 of [LEDITS++ paper](https://
|
833
|
+
are defined as 'M^2' of equation 12 of [LEDITS++ paper](https://huggingface.co/papers/2311.16711).
|
834
834
|
attn_store_steps (`List[int]`, *optional*):
|
835
835
|
Steps for which the attention maps are stored in the AttentionStore. Just for visualization purposes.
|
836
836
|
store_averaged_over_steps (`bool`, defaults to `True`):
|
@@ -841,7 +841,7 @@ class LEditsPPPipelineStableDiffusion(
|
|
841
841
|
[`self.processor`](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
842
842
|
guidance_rescale (`float`, *optional*, defaults to 0.0):
|
843
843
|
Guidance rescale factor from [Common Diffusion Noise Schedules and Sample Steps are
|
844
|
-
Flawed](https://
|
844
|
+
Flawed](https://huggingface.co/papers/2305.08891). Guidance rescale factor should fix overexposure when
|
845
845
|
using zero terminal SNR.
|
846
846
|
clip_skip (`int`, *optional*):
|
847
847
|
Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that
|
@@ -1191,7 +1191,7 @@ class LEditsPPPipelineStableDiffusion(
|
|
1191
1191
|
noise_pred = noise_pred_uncond + noise_guidance_edit
|
1192
1192
|
|
1193
1193
|
if enable_edit_guidance and self.guidance_rescale > 0.0:
|
1194
|
-
# Based on 3.4. in https://
|
1194
|
+
# Based on 3.4. in https://huggingface.co/papers/2305.08891
|
1195
1195
|
noise_pred = rescale_noise_cfg(
|
1196
1196
|
noise_pred,
|
1197
1197
|
noise_pred_edit_concepts.mean(dim=0, keepdim=False),
|
@@ -1268,8 +1268,8 @@ class LEditsPPPipelineStableDiffusion(
|
|
1268
1268
|
):
|
1269
1269
|
r"""
|
1270
1270
|
The function to the pipeline for image inversion as described by the [LEDITS++
|
1271
|
-
Paper](https://
|
1272
|
-
inversion proposed by [edit-friendly DPDM](https://
|
1271
|
+
Paper](https://huggingface.co/papers/2301.12247). If the scheduler is set to [`~schedulers.DDIMScheduler`] the
|
1272
|
+
inversion proposed by [edit-friendly DPDM](https://huggingface.co/papers/2304.06140) will be performed instead.
|
1273
1273
|
|
1274
1274
|
Args:
|
1275
1275
|
image (`PipelineImageInput`):
|
@@ -1443,7 +1443,7 @@ def compute_noise_ddim(scheduler, prev_latents, latents, timestep, noise_pred, e
|
|
1443
1443
|
beta_prod_t = 1 - alpha_prod_t
|
1444
1444
|
|
1445
1445
|
# 3. compute predicted original sample from predicted noise also called
|
1446
|
-
# "predicted x_0" of formula (12) from https://
|
1446
|
+
# "predicted x_0" of formula (12) from https://huggingface.co/papers/2010.02502
|
1447
1447
|
pred_original_sample = (latents - beta_prod_t ** (0.5) * noise_pred) / alpha_prod_t ** (0.5)
|
1448
1448
|
|
1449
1449
|
# 4. Clip "predicted x_0"
|
@@ -1455,10 +1455,10 @@ def compute_noise_ddim(scheduler, prev_latents, latents, timestep, noise_pred, e
|
|
1455
1455
|
variance = scheduler._get_variance(timestep, prev_timestep)
|
1456
1456
|
std_dev_t = eta * variance ** (0.5)
|
1457
1457
|
|
1458
|
-
# 6. compute "direction pointing to x_t" of formula (12) from https://
|
1458
|
+
# 6. compute "direction pointing to x_t" of formula (12) from https://huggingface.co/papers/2010.02502
|
1459
1459
|
pred_sample_direction = (1 - alpha_prod_t_prev - std_dev_t**2) ** (0.5) * noise_pred
|
1460
1460
|
|
1461
|
-
#
|
1461
|
+
# modified so that updated xtm1 is returned as well (to avoid error accumulation)
|
1462
1462
|
mu_xt = alpha_prod_t_prev ** (0.5) * pred_original_sample + pred_sample_direction
|
1463
1463
|
if variance > 0.0:
|
1464
1464
|
noise = (prev_latents - mu_xt) / (variance ** (0.5) * eta)
|
@@ -622,7 +622,7 @@ class LEditsPPPipelineStableDiffusionXL(
|
|
622
622
|
def prepare_extra_step_kwargs(self, eta, generator=None):
|
623
623
|
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
624
624
|
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
625
|
-
# eta corresponds to η in DDIM paper: https://
|
625
|
+
# eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
|
626
626
|
# and should be between [0, 1]
|
627
627
|
|
628
628
|
accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
|
@@ -747,7 +747,7 @@ class LEditsPPPipelineStableDiffusionXL(
|
|
747
747
|
return self._clip_skip
|
748
748
|
|
749
749
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
750
|
-
# of the Imagen paper: https://
|
750
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
751
751
|
# corresponds to doing no classifier free guidance.
|
752
752
|
@property
|
753
753
|
def do_classifier_free_guidance(self):
|
@@ -901,9 +901,10 @@ class LEditsPPPipelineStableDiffusionXL(
|
|
901
901
|
[diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
902
902
|
guidance_rescale (`float`, *optional*, defaults to 0.7):
|
903
903
|
Guidance rescale factor proposed by [Common Diffusion Noise Schedules and Sample Steps are
|
904
|
-
Flawed](https://
|
905
|
-
[Common Diffusion Noise Schedules and Sample Steps are
|
906
|
-
Guidance rescale factor should fix overexposure when
|
904
|
+
Flawed](https://huggingface.co/papers/2305.08891) `guidance_scale` is defined as `φ` in equation 16. of
|
905
|
+
[Common Diffusion Noise Schedules and Sample Steps are
|
906
|
+
Flawed](https://huggingface.co/papers/2305.08891). Guidance rescale factor should fix overexposure when
|
907
|
+
using zero terminal SNR.
|
907
908
|
crops_coords_top_left (`Tuple[int]`, *optional*, defaults to (0, 0)):
|
908
909
|
`crops_coords_top_left` can be used to generate an image that appears to be "cropped" from the position
|
909
910
|
`crops_coords_top_left` downwards. Favorable, well-centered images are usually achieved by setting
|
@@ -929,7 +930,7 @@ class LEditsPPPipelineStableDiffusionXL(
|
|
929
930
|
edit_guidance_scale (`float` or `List[float]`, *optional*, defaults to 5):
|
930
931
|
Guidance scale for guiding the image generation. If provided as list values should correspond to
|
931
932
|
`editing_prompt`. `edit_guidance_scale` is defined as `s_e` of equation 12 of [LEDITS++
|
932
|
-
Paper](https://
|
933
|
+
Paper](https://huggingface.co/papers/2301.12247).
|
933
934
|
edit_warmup_steps (`float` or `List[float]`, *optional*, defaults to 10):
|
934
935
|
Number of diffusion steps (for each prompt) for which guidance is not applied.
|
935
936
|
edit_cooldown_steps (`float` or `List[float]`, *optional*, defaults to `None`):
|
@@ -937,18 +938,18 @@ class LEditsPPPipelineStableDiffusionXL(
|
|
937
938
|
edit_threshold (`float` or `List[float]`, *optional*, defaults to 0.9):
|
938
939
|
Masking threshold of guidance. Threshold should be proportional to the image region that is modified.
|
939
940
|
'edit_threshold' is defined as 'λ' of equation 12 of [LEDITS++
|
940
|
-
Paper](https://
|
941
|
+
Paper](https://huggingface.co/papers/2301.12247).
|
941
942
|
sem_guidance (`List[torch.Tensor]`, *optional*):
|
942
943
|
List of pre-generated guidance vectors to be applied at generation. Length of the list has to
|
943
944
|
correspond to `num_inference_steps`.
|
944
945
|
use_cross_attn_mask:
|
945
946
|
Whether cross-attention masks are used. Cross-attention masks are always used when use_intersect_mask
|
946
947
|
is set to true. Cross-attention masks are defined as 'M^1' of equation 12 of [LEDITS++
|
947
|
-
paper](https://
|
948
|
+
paper](https://huggingface.co/papers/2311.16711).
|
948
949
|
use_intersect_mask:
|
949
950
|
Whether the masking term is calculated as intersection of cross-attention masks and masks derived from
|
950
951
|
the noise estimate. Cross-attention mask are defined as 'M^1' and masks derived from the noise estimate
|
951
|
-
are defined as 'M^2' of equation 12 of [LEDITS++ paper](https://
|
952
|
+
are defined as 'M^2' of equation 12 of [LEDITS++ paper](https://huggingface.co/papers/2311.16711).
|
952
953
|
user_mask:
|
953
954
|
User-provided mask for even better control over the editing process. This is helpful when LEDITS++'s
|
954
955
|
implicit masks do not meet user preferences.
|
@@ -1350,7 +1351,7 @@ class LEditsPPPipelineStableDiffusionXL(
|
|
1350
1351
|
|
1351
1352
|
# compute the previous noisy sample x_t -> x_t-1
|
1352
1353
|
if enable_edit_guidance and self.guidance_rescale > 0.0:
|
1353
|
-
# Based on 3.4. in https://
|
1354
|
+
# Based on 3.4. in https://huggingface.co/papers/2305.08891
|
1354
1355
|
noise_pred = rescale_noise_cfg(
|
1355
1356
|
noise_pred,
|
1356
1357
|
noise_pred_edit_concepts.mean(dim=0, keepdim=False),
|
@@ -1478,8 +1479,8 @@ class LEditsPPPipelineStableDiffusionXL(
|
|
1478
1479
|
):
|
1479
1480
|
r"""
|
1480
1481
|
The function to the pipeline for image inversion as described by the [LEDITS++
|
1481
|
-
Paper](https://
|
1482
|
-
inversion proposed by [edit-friendly DPDM](https://
|
1482
|
+
Paper](https://huggingface.co/papers/2301.12247). If the scheduler is set to [`~schedulers.DDIMScheduler`] the
|
1483
|
+
inversion proposed by [edit-friendly DPDM](https://huggingface.co/papers/2304.06140) will be performed instead.
|
1483
1484
|
|
1484
1485
|
Args:
|
1485
1486
|
image (`PipelineImageInput`):
|
@@ -1691,7 +1692,7 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
|
|
1691
1692
|
r"""
|
1692
1693
|
Rescales `noise_cfg` tensor based on `guidance_rescale` to improve image quality and fix overexposure. Based on
|
1693
1694
|
Section 3.4 from [Common Diffusion Noise Schedules and Sample Steps are
|
1694
|
-
Flawed](https://
|
1695
|
+
Flawed](https://huggingface.co/papers/2305.08891).
|
1695
1696
|
|
1696
1697
|
Args:
|
1697
1698
|
noise_cfg (`torch.Tensor`):
|
@@ -1727,7 +1728,7 @@ def compute_noise_ddim(scheduler, prev_latents, latents, timestep, noise_pred, e
|
|
1727
1728
|
beta_prod_t = 1 - alpha_prod_t
|
1728
1729
|
|
1729
1730
|
# 3. compute predicted original sample from predicted noise also called
|
1730
|
-
# "predicted x_0" of formula (12) from https://
|
1731
|
+
# "predicted x_0" of formula (12) from https://huggingface.co/papers/2010.02502
|
1731
1732
|
pred_original_sample = (latents - beta_prod_t ** (0.5) * noise_pred) / alpha_prod_t ** (0.5)
|
1732
1733
|
|
1733
1734
|
# 4. Clip "predicted x_0"
|
@@ -1739,10 +1740,10 @@ def compute_noise_ddim(scheduler, prev_latents, latents, timestep, noise_pred, e
|
|
1739
1740
|
variance = scheduler._get_variance(timestep, prev_timestep)
|
1740
1741
|
std_dev_t = eta * variance ** (0.5)
|
1741
1742
|
|
1742
|
-
# 6. compute "direction pointing to x_t" of formula (12) from https://
|
1743
|
+
# 6. compute "direction pointing to x_t" of formula (12) from https://huggingface.co/papers/2010.02502
|
1743
1744
|
pred_sample_direction = (1 - alpha_prod_t_prev - std_dev_t**2) ** (0.5) * noise_pred
|
1744
1745
|
|
1745
|
-
#
|
1746
|
+
# modified so that updated xtm1 is returned as well (to avoid error accumulation)
|
1746
1747
|
mu_xt = alpha_prod_t_prev ** (0.5) * pred_original_sample + pred_sample_direction
|
1747
1748
|
if variance > 0.0:
|
1748
1749
|
noise = (prev_latents - mu_xt) / (variance ** (0.5) * eta)
|