diffusers 0.33.0__py3-none-any.whl → 0.34.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diffusers/__init__.py +48 -1
- diffusers/commands/__init__.py +1 -1
- diffusers/commands/diffusers_cli.py +1 -1
- diffusers/commands/env.py +1 -1
- diffusers/commands/fp16_safetensors.py +1 -1
- diffusers/dependency_versions_check.py +1 -1
- diffusers/dependency_versions_table.py +1 -1
- diffusers/experimental/rl/value_guided_sampling.py +1 -1
- diffusers/hooks/faster_cache.py +2 -2
- diffusers/hooks/group_offloading.py +128 -29
- diffusers/hooks/hooks.py +2 -2
- diffusers/hooks/layerwise_casting.py +3 -3
- diffusers/hooks/pyramid_attention_broadcast.py +1 -1
- diffusers/image_processor.py +7 -2
- diffusers/loaders/__init__.py +4 -0
- diffusers/loaders/ip_adapter.py +5 -14
- diffusers/loaders/lora_base.py +212 -111
- diffusers/loaders/lora_conversion_utils.py +275 -34
- diffusers/loaders/lora_pipeline.py +1554 -819
- diffusers/loaders/peft.py +52 -109
- diffusers/loaders/single_file.py +2 -2
- diffusers/loaders/single_file_model.py +20 -4
- diffusers/loaders/single_file_utils.py +225 -5
- diffusers/loaders/textual_inversion.py +3 -2
- diffusers/loaders/transformer_flux.py +1 -1
- diffusers/loaders/transformer_sd3.py +2 -2
- diffusers/loaders/unet.py +2 -16
- diffusers/loaders/unet_loader_utils.py +1 -1
- diffusers/loaders/utils.py +1 -1
- diffusers/models/__init__.py +15 -1
- diffusers/models/activations.py +5 -5
- diffusers/models/adapter.py +2 -3
- diffusers/models/attention.py +4 -4
- diffusers/models/attention_flax.py +10 -10
- diffusers/models/attention_processor.py +14 -10
- diffusers/models/auto_model.py +47 -10
- diffusers/models/autoencoders/__init__.py +1 -0
- diffusers/models/autoencoders/autoencoder_asym_kl.py +4 -4
- diffusers/models/autoencoders/autoencoder_dc.py +3 -3
- diffusers/models/autoencoders/autoencoder_kl.py +4 -4
- diffusers/models/autoencoders/autoencoder_kl_allegro.py +4 -4
- diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +6 -6
- diffusers/models/autoencoders/autoencoder_kl_cosmos.py +1108 -0
- diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py +2 -2
- diffusers/models/autoencoders/autoencoder_kl_ltx.py +3 -3
- diffusers/models/autoencoders/autoencoder_kl_magvit.py +4 -4
- diffusers/models/autoencoders/autoencoder_kl_mochi.py +3 -3
- diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +4 -4
- diffusers/models/autoencoders/autoencoder_kl_wan.py +256 -22
- diffusers/models/autoencoders/autoencoder_oobleck.py +1 -1
- diffusers/models/autoencoders/autoencoder_tiny.py +3 -3
- diffusers/models/autoencoders/consistency_decoder_vae.py +1 -1
- diffusers/models/autoencoders/vae.py +13 -2
- diffusers/models/autoencoders/vq_model.py +2 -2
- diffusers/models/cache_utils.py +1 -1
- diffusers/models/controlnet.py +1 -1
- diffusers/models/controlnet_flux.py +1 -1
- diffusers/models/controlnet_sd3.py +1 -1
- diffusers/models/controlnet_sparsectrl.py +1 -1
- diffusers/models/controlnets/__init__.py +1 -0
- diffusers/models/controlnets/controlnet.py +3 -3
- diffusers/models/controlnets/controlnet_flax.py +1 -1
- diffusers/models/controlnets/controlnet_flux.py +16 -15
- diffusers/models/controlnets/controlnet_hunyuan.py +2 -2
- diffusers/models/controlnets/controlnet_sana.py +290 -0
- diffusers/models/controlnets/controlnet_sd3.py +1 -1
- diffusers/models/controlnets/controlnet_sparsectrl.py +2 -2
- diffusers/models/controlnets/controlnet_union.py +1 -1
- diffusers/models/controlnets/controlnet_xs.py +7 -7
- diffusers/models/controlnets/multicontrolnet.py +4 -5
- diffusers/models/controlnets/multicontrolnet_union.py +5 -6
- diffusers/models/downsampling.py +2 -2
- diffusers/models/embeddings.py +10 -12
- diffusers/models/embeddings_flax.py +2 -2
- diffusers/models/lora.py +3 -3
- diffusers/models/modeling_utils.py +44 -14
- diffusers/models/normalization.py +4 -4
- diffusers/models/resnet.py +2 -2
- diffusers/models/resnet_flax.py +1 -1
- diffusers/models/transformers/__init__.py +5 -0
- diffusers/models/transformers/auraflow_transformer_2d.py +70 -24
- diffusers/models/transformers/cogvideox_transformer_3d.py +1 -1
- diffusers/models/transformers/consisid_transformer_3d.py +1 -1
- diffusers/models/transformers/dit_transformer_2d.py +2 -2
- diffusers/models/transformers/dual_transformer_2d.py +1 -1
- diffusers/models/transformers/hunyuan_transformer_2d.py +2 -2
- diffusers/models/transformers/latte_transformer_3d.py +4 -5
- diffusers/models/transformers/lumina_nextdit2d.py +2 -2
- diffusers/models/transformers/pixart_transformer_2d.py +3 -3
- diffusers/models/transformers/prior_transformer.py +1 -1
- diffusers/models/transformers/sana_transformer.py +8 -3
- diffusers/models/transformers/stable_audio_transformer.py +5 -9
- diffusers/models/transformers/t5_film_transformer.py +3 -3
- diffusers/models/transformers/transformer_2d.py +1 -1
- diffusers/models/transformers/transformer_allegro.py +1 -1
- diffusers/models/transformers/transformer_chroma.py +742 -0
- diffusers/models/transformers/transformer_cogview3plus.py +5 -10
- diffusers/models/transformers/transformer_cogview4.py +317 -25
- diffusers/models/transformers/transformer_cosmos.py +579 -0
- diffusers/models/transformers/transformer_flux.py +9 -11
- diffusers/models/transformers/transformer_hidream_image.py +942 -0
- diffusers/models/transformers/transformer_hunyuan_video.py +6 -8
- diffusers/models/transformers/transformer_hunyuan_video_framepack.py +416 -0
- diffusers/models/transformers/transformer_ltx.py +2 -2
- diffusers/models/transformers/transformer_lumina2.py +1 -1
- diffusers/models/transformers/transformer_mochi.py +1 -1
- diffusers/models/transformers/transformer_omnigen.py +2 -2
- diffusers/models/transformers/transformer_sd3.py +7 -7
- diffusers/models/transformers/transformer_temporal.py +1 -1
- diffusers/models/transformers/transformer_wan.py +24 -8
- diffusers/models/transformers/transformer_wan_vace.py +393 -0
- diffusers/models/unets/unet_1d.py +1 -1
- diffusers/models/unets/unet_1d_blocks.py +1 -1
- diffusers/models/unets/unet_2d.py +1 -1
- diffusers/models/unets/unet_2d_blocks.py +1 -1
- diffusers/models/unets/unet_2d_blocks_flax.py +8 -7
- diffusers/models/unets/unet_2d_condition.py +2 -2
- diffusers/models/unets/unet_2d_condition_flax.py +2 -2
- diffusers/models/unets/unet_3d_blocks.py +1 -1
- diffusers/models/unets/unet_3d_condition.py +3 -3
- diffusers/models/unets/unet_i2vgen_xl.py +3 -3
- diffusers/models/unets/unet_kandinsky3.py +1 -1
- diffusers/models/unets/unet_motion_model.py +2 -2
- diffusers/models/unets/unet_stable_cascade.py +1 -1
- diffusers/models/upsampling.py +2 -2
- diffusers/models/vae_flax.py +2 -2
- diffusers/models/vq_model.py +1 -1
- diffusers/pipelines/__init__.py +37 -6
- diffusers/pipelines/allegro/pipeline_allegro.py +11 -11
- diffusers/pipelines/amused/pipeline_amused.py +7 -6
- diffusers/pipelines/amused/pipeline_amused_img2img.py +6 -5
- diffusers/pipelines/amused/pipeline_amused_inpaint.py +6 -5
- diffusers/pipelines/animatediff/pipeline_animatediff.py +6 -6
- diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +6 -6
- diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +16 -15
- diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +6 -6
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +5 -5
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +5 -5
- diffusers/pipelines/audioldm/pipeline_audioldm.py +8 -7
- diffusers/pipelines/audioldm2/modeling_audioldm2.py +1 -1
- diffusers/pipelines/audioldm2/pipeline_audioldm2.py +23 -13
- diffusers/pipelines/aura_flow/pipeline_aura_flow.py +48 -11
- diffusers/pipelines/auto_pipeline.py +6 -7
- diffusers/pipelines/blip_diffusion/modeling_blip2.py +1 -1
- diffusers/pipelines/blip_diffusion/modeling_ctx_clip.py +2 -2
- diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +11 -10
- diffusers/pipelines/chroma/__init__.py +49 -0
- diffusers/pipelines/chroma/pipeline_chroma.py +949 -0
- diffusers/pipelines/chroma/pipeline_chroma_img2img.py +1034 -0
- diffusers/pipelines/chroma/pipeline_output.py +21 -0
- diffusers/pipelines/cogvideo/pipeline_cogvideox.py +8 -8
- diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +8 -8
- diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +8 -8
- diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +8 -8
- diffusers/pipelines/cogview3/pipeline_cogview3plus.py +9 -9
- diffusers/pipelines/cogview4/pipeline_cogview4.py +7 -7
- diffusers/pipelines/cogview4/pipeline_cogview4_control.py +7 -7
- diffusers/pipelines/consisid/consisid_utils.py +2 -2
- diffusers/pipelines/consisid/pipeline_consisid.py +8 -8
- diffusers/pipelines/consistency_models/pipeline_consistency_models.py +1 -1
- diffusers/pipelines/controlnet/pipeline_controlnet.py +7 -7
- diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +8 -8
- diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +7 -7
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +7 -7
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +14 -14
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +10 -6
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +13 -13
- diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +14 -14
- diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +5 -5
- diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +13 -13
- diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +1 -1
- diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +8 -8
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +7 -7
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +7 -7
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +12 -10
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +9 -7
- diffusers/pipelines/cosmos/__init__.py +54 -0
- diffusers/pipelines/cosmos/pipeline_cosmos2_text2image.py +673 -0
- diffusers/pipelines/cosmos/pipeline_cosmos2_video2world.py +792 -0
- diffusers/pipelines/cosmos/pipeline_cosmos_text2world.py +664 -0
- diffusers/pipelines/cosmos/pipeline_cosmos_video2world.py +826 -0
- diffusers/pipelines/cosmos/pipeline_output.py +40 -0
- diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py +5 -4
- diffusers/pipelines/ddim/pipeline_ddim.py +4 -4
- diffusers/pipelines/ddpm/pipeline_ddpm.py +1 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +10 -10
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +8 -8
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +5 -5
- diffusers/pipelines/deprecated/audio_diffusion/mel.py +1 -1
- diffusers/pipelines/deprecated/audio_diffusion/pipeline_audio_diffusion.py +3 -3
- diffusers/pipelines/deprecated/latent_diffusion_uncond/pipeline_latent_diffusion_uncond.py +1 -1
- diffusers/pipelines/deprecated/pndm/pipeline_pndm.py +2 -2
- diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +4 -3
- diffusers/pipelines/deprecated/score_sde_ve/pipeline_score_sde_ve.py +1 -1
- diffusers/pipelines/deprecated/spectrogram_diffusion/continuous_encoder.py +1 -1
- diffusers/pipelines/deprecated/spectrogram_diffusion/midi_utils.py +1 -1
- diffusers/pipelines/deprecated/spectrogram_diffusion/notes_encoder.py +1 -1
- diffusers/pipelines/deprecated/spectrogram_diffusion/pipeline_spectrogram_diffusion.py +1 -1
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +7 -7
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_onnx_stable_diffusion_inpaint_legacy.py +9 -9
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +10 -10
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +10 -8
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +5 -5
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +18 -18
- diffusers/pipelines/deprecated/stochastic_karras_ve/pipeline_stochastic_karras_ve.py +1 -1
- diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +2 -2
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +6 -6
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +5 -5
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +5 -5
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +5 -5
- diffusers/pipelines/deprecated/vq_diffusion/pipeline_vq_diffusion.py +1 -1
- diffusers/pipelines/dit/pipeline_dit.py +1 -1
- diffusers/pipelines/easyanimate/pipeline_easyanimate.py +4 -4
- diffusers/pipelines/easyanimate/pipeline_easyanimate_control.py +4 -4
- diffusers/pipelines/easyanimate/pipeline_easyanimate_inpaint.py +7 -6
- diffusers/pipelines/flux/modeling_flux.py +1 -1
- diffusers/pipelines/flux/pipeline_flux.py +10 -17
- diffusers/pipelines/flux/pipeline_flux_control.py +6 -6
- diffusers/pipelines/flux/pipeline_flux_control_img2img.py +6 -6
- diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +6 -6
- diffusers/pipelines/flux/pipeline_flux_controlnet.py +6 -6
- diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +30 -22
- diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +2 -1
- diffusers/pipelines/flux/pipeline_flux_fill.py +6 -6
- diffusers/pipelines/flux/pipeline_flux_img2img.py +39 -6
- diffusers/pipelines/flux/pipeline_flux_inpaint.py +11 -6
- diffusers/pipelines/flux/pipeline_flux_prior_redux.py +1 -1
- diffusers/pipelines/free_init_utils.py +2 -2
- diffusers/pipelines/free_noise_utils.py +3 -3
- diffusers/pipelines/hidream_image/__init__.py +47 -0
- diffusers/pipelines/hidream_image/pipeline_hidream_image.py +1026 -0
- diffusers/pipelines/hidream_image/pipeline_output.py +35 -0
- diffusers/pipelines/hunyuan_video/__init__.py +2 -0
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_skyreels_image2video.py +8 -8
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +8 -8
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_framepack.py +1114 -0
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_image2video.py +71 -15
- diffusers/pipelines/hunyuan_video/pipeline_output.py +19 -0
- diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +8 -8
- diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +10 -8
- diffusers/pipelines/kandinsky/pipeline_kandinsky.py +6 -6
- diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +34 -34
- diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +19 -26
- diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +7 -7
- diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +11 -11
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +6 -6
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +35 -35
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +6 -6
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +17 -39
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +17 -45
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +7 -7
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +10 -10
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +10 -10
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +7 -7
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +17 -38
- diffusers/pipelines/kolors/pipeline_kolors.py +10 -10
- diffusers/pipelines/kolors/pipeline_kolors_img2img.py +12 -12
- diffusers/pipelines/kolors/text_encoder.py +3 -3
- diffusers/pipelines/kolors/tokenizer.py +1 -1
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +2 -2
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +2 -2
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +1 -1
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion_superresolution.py +3 -3
- diffusers/pipelines/latte/pipeline_latte.py +12 -12
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +13 -13
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +17 -16
- diffusers/pipelines/ltx/__init__.py +4 -0
- diffusers/pipelines/ltx/modeling_latent_upsampler.py +188 -0
- diffusers/pipelines/ltx/pipeline_ltx.py +51 -6
- diffusers/pipelines/ltx/pipeline_ltx_condition.py +107 -29
- diffusers/pipelines/ltx/pipeline_ltx_image2video.py +50 -6
- diffusers/pipelines/ltx/pipeline_ltx_latent_upsample.py +277 -0
- diffusers/pipelines/lumina/pipeline_lumina.py +13 -13
- diffusers/pipelines/lumina2/pipeline_lumina2.py +10 -10
- diffusers/pipelines/marigold/marigold_image_processing.py +2 -2
- diffusers/pipelines/mochi/pipeline_mochi.py +6 -6
- diffusers/pipelines/musicldm/pipeline_musicldm.py +16 -13
- diffusers/pipelines/omnigen/pipeline_omnigen.py +13 -11
- diffusers/pipelines/omnigen/processor_omnigen.py +8 -3
- diffusers/pipelines/onnx_utils.py +15 -2
- diffusers/pipelines/pag/pag_utils.py +2 -2
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +12 -8
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +7 -7
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +10 -6
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +14 -14
- diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +8 -8
- diffusers/pipelines/pag/pipeline_pag_kolors.py +10 -10
- diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +11 -11
- diffusers/pipelines/pag/pipeline_pag_sana.py +18 -12
- diffusers/pipelines/pag/pipeline_pag_sd.py +8 -8
- diffusers/pipelines/pag/pipeline_pag_sd_3.py +7 -7
- diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py +7 -7
- diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +6 -6
- diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +5 -5
- diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py +8 -8
- diffusers/pipelines/pag/pipeline_pag_sd_xl.py +16 -15
- diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +18 -17
- diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +12 -12
- diffusers/pipelines/paint_by_example/image_encoder.py +1 -1
- diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +8 -7
- diffusers/pipelines/pia/pipeline_pia.py +8 -6
- diffusers/pipelines/pipeline_flax_utils.py +3 -4
- diffusers/pipelines/pipeline_loading_utils.py +89 -13
- diffusers/pipelines/pipeline_utils.py +105 -33
- diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +11 -11
- diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +11 -11
- diffusers/pipelines/sana/__init__.py +4 -0
- diffusers/pipelines/sana/pipeline_sana.py +23 -21
- diffusers/pipelines/sana/pipeline_sana_controlnet.py +1106 -0
- diffusers/pipelines/sana/pipeline_sana_sprint.py +23 -19
- diffusers/pipelines/sana/pipeline_sana_sprint_img2img.py +981 -0
- diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +7 -6
- diffusers/pipelines/shap_e/camera.py +1 -1
- diffusers/pipelines/shap_e/pipeline_shap_e.py +1 -1
- diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +1 -1
- diffusers/pipelines/shap_e/renderer.py +3 -3
- diffusers/pipelines/stable_audio/modeling_stable_audio.py +1 -1
- diffusers/pipelines/stable_audio/pipeline_stable_audio.py +5 -5
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +8 -8
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +13 -13
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +9 -9
- diffusers/pipelines/stable_diffusion/__init__.py +0 -7
- diffusers/pipelines/stable_diffusion/clip_image_project_model.py +1 -1
- diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +11 -4
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_img2img.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_inpaint.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +10 -10
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py +10 -10
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +10 -10
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +9 -9
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +8 -8
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +4 -4
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +7 -7
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +5 -5
- diffusers/pipelines/stable_diffusion/safety_checker.py +1 -1
- diffusers/pipelines/stable_diffusion/safety_checker_flax.py +1 -1
- diffusers/pipelines/stable_diffusion/stable_unclip_image_normalizer.py +1 -1
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +7 -7
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +7 -7
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +7 -7
- diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +12 -8
- diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +15 -9
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +11 -9
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +11 -9
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +18 -12
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +11 -8
- diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +11 -8
- diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +15 -12
- diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +8 -6
- diffusers/pipelines/stable_diffusion_safe/safety_checker.py +1 -1
- diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +15 -11
- diffusers/pipelines/stable_diffusion_xl/pipeline_flax_stable_diffusion_xl.py +1 -1
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +16 -15
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +18 -17
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +12 -12
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +16 -15
- diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +3 -3
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +12 -12
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +18 -17
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +12 -7
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +12 -7
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +15 -13
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +24 -21
- diffusers/pipelines/unclip/pipeline_unclip.py +4 -3
- diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +4 -3
- diffusers/pipelines/unclip/text_proj.py +2 -2
- diffusers/pipelines/unidiffuser/modeling_text_decoder.py +2 -2
- diffusers/pipelines/unidiffuser/modeling_uvit.py +1 -1
- diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +8 -7
- diffusers/pipelines/visualcloze/__init__.py +52 -0
- diffusers/pipelines/visualcloze/pipeline_visualcloze_combined.py +444 -0
- diffusers/pipelines/visualcloze/pipeline_visualcloze_generation.py +952 -0
- diffusers/pipelines/visualcloze/visualcloze_utils.py +251 -0
- diffusers/pipelines/wan/__init__.py +2 -0
- diffusers/pipelines/wan/pipeline_wan.py +17 -12
- diffusers/pipelines/wan/pipeline_wan_i2v.py +42 -20
- diffusers/pipelines/wan/pipeline_wan_vace.py +976 -0
- diffusers/pipelines/wan/pipeline_wan_video2video.py +18 -18
- diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py +1 -1
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_diffnext.py +1 -1
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +1 -1
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +8 -8
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_combined.py +16 -15
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +6 -6
- diffusers/quantizers/__init__.py +179 -1
- diffusers/quantizers/base.py +6 -1
- diffusers/quantizers/bitsandbytes/bnb_quantizer.py +4 -0
- diffusers/quantizers/bitsandbytes/utils.py +10 -7
- diffusers/quantizers/gguf/gguf_quantizer.py +13 -4
- diffusers/quantizers/gguf/utils.py +16 -13
- diffusers/quantizers/quantization_config.py +18 -16
- diffusers/quantizers/quanto/quanto_quantizer.py +4 -0
- diffusers/quantizers/torchao/torchao_quantizer.py +5 -1
- diffusers/schedulers/__init__.py +3 -1
- diffusers/schedulers/deprecated/scheduling_karras_ve.py +4 -3
- diffusers/schedulers/deprecated/scheduling_sde_vp.py +1 -1
- diffusers/schedulers/scheduling_consistency_models.py +1 -1
- diffusers/schedulers/scheduling_cosine_dpmsolver_multistep.py +10 -5
- diffusers/schedulers/scheduling_ddim.py +8 -8
- diffusers/schedulers/scheduling_ddim_cogvideox.py +5 -5
- diffusers/schedulers/scheduling_ddim_flax.py +6 -6
- diffusers/schedulers/scheduling_ddim_inverse.py +6 -6
- diffusers/schedulers/scheduling_ddim_parallel.py +22 -22
- diffusers/schedulers/scheduling_ddpm.py +9 -9
- diffusers/schedulers/scheduling_ddpm_flax.py +7 -7
- diffusers/schedulers/scheduling_ddpm_parallel.py +18 -18
- diffusers/schedulers/scheduling_ddpm_wuerstchen.py +2 -2
- diffusers/schedulers/scheduling_deis_multistep.py +8 -8
- diffusers/schedulers/scheduling_dpm_cogvideox.py +5 -5
- diffusers/schedulers/scheduling_dpmsolver_multistep.py +12 -12
- diffusers/schedulers/scheduling_dpmsolver_multistep_flax.py +22 -20
- diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +11 -11
- diffusers/schedulers/scheduling_dpmsolver_sde.py +2 -2
- diffusers/schedulers/scheduling_dpmsolver_singlestep.py +13 -13
- diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py +13 -8
- diffusers/schedulers/scheduling_edm_euler.py +20 -11
- diffusers/schedulers/scheduling_euler_ancestral_discrete.py +3 -3
- diffusers/schedulers/scheduling_euler_discrete.py +3 -3
- diffusers/schedulers/scheduling_euler_discrete_flax.py +3 -3
- diffusers/schedulers/scheduling_flow_match_euler_discrete.py +20 -5
- diffusers/schedulers/scheduling_flow_match_heun_discrete.py +1 -1
- diffusers/schedulers/scheduling_flow_match_lcm.py +561 -0
- diffusers/schedulers/scheduling_heun_discrete.py +2 -2
- diffusers/schedulers/scheduling_ipndm.py +2 -2
- diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +2 -2
- diffusers/schedulers/scheduling_k_dpm_2_discrete.py +2 -2
- diffusers/schedulers/scheduling_karras_ve_flax.py +5 -5
- diffusers/schedulers/scheduling_lcm.py +3 -3
- diffusers/schedulers/scheduling_lms_discrete.py +2 -2
- diffusers/schedulers/scheduling_lms_discrete_flax.py +1 -1
- diffusers/schedulers/scheduling_pndm.py +4 -4
- diffusers/schedulers/scheduling_pndm_flax.py +4 -4
- diffusers/schedulers/scheduling_repaint.py +9 -9
- diffusers/schedulers/scheduling_sasolver.py +15 -15
- diffusers/schedulers/scheduling_scm.py +1 -1
- diffusers/schedulers/scheduling_sde_ve.py +1 -1
- diffusers/schedulers/scheduling_sde_ve_flax.py +2 -2
- diffusers/schedulers/scheduling_tcd.py +3 -3
- diffusers/schedulers/scheduling_unclip.py +5 -5
- diffusers/schedulers/scheduling_unipc_multistep.py +11 -11
- diffusers/schedulers/scheduling_utils.py +1 -1
- diffusers/schedulers/scheduling_utils_flax.py +1 -1
- diffusers/schedulers/scheduling_vq_diffusion.py +1 -1
- diffusers/training_utils.py +13 -5
- diffusers/utils/__init__.py +5 -0
- diffusers/utils/accelerate_utils.py +1 -1
- diffusers/utils/doc_utils.py +1 -1
- diffusers/utils/dummy_pt_objects.py +120 -0
- diffusers/utils/dummy_torch_and_transformers_objects.py +225 -0
- diffusers/utils/dynamic_modules_utils.py +21 -3
- diffusers/utils/export_utils.py +1 -1
- diffusers/utils/import_utils.py +81 -18
- diffusers/utils/logging.py +1 -1
- diffusers/utils/outputs.py +2 -1
- diffusers/utils/peft_utils.py +91 -8
- diffusers/utils/state_dict_utils.py +20 -3
- diffusers/utils/testing_utils.py +59 -7
- diffusers/utils/torch_utils.py +25 -5
- diffusers/video_processor.py +2 -2
- {diffusers-0.33.0.dist-info → diffusers-0.34.0.dist-info}/METADATA +3 -3
- diffusers-0.34.0.dist-info/RECORD +639 -0
- diffusers-0.33.0.dist-info/RECORD +0 -608
- {diffusers-0.33.0.dist-info → diffusers-0.34.0.dist-info}/LICENSE +0 -0
- {diffusers-0.33.0.dist-info → diffusers-0.34.0.dist-info}/WHEEL +0 -0
- {diffusers-0.33.0.dist-info → diffusers-0.34.0.dist-info}/entry_points.txt +0 -0
- {diffusers-0.33.0.dist-info → diffusers-0.34.0.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 Susung Hong and The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -34,7 +34,7 @@ from ...utils import (
|
|
34
34
|
unscale_lora_layers,
|
35
35
|
)
|
36
36
|
from ...utils.torch_utils import randn_tensor
|
37
|
-
from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
37
|
+
from ..pipeline_utils import DeprecatedPipelineMixin, DiffusionPipeline, StableDiffusionMixin
|
38
38
|
from ..stable_diffusion import StableDiffusionPipelineOutput
|
39
39
|
from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
|
40
40
|
|
@@ -106,8 +106,12 @@ class CrossAttnStoreProcessor:
|
|
106
106
|
return hidden_states
|
107
107
|
|
108
108
|
|
109
|
-
# Modified to get self-attention guidance scale in this paper (https://
|
110
|
-
class StableDiffusionSAGPipeline(
|
109
|
+
# Modified to get self-attention guidance scale in this paper (https://huggingface.co/papers/2210.00939) as an input
|
110
|
+
class StableDiffusionSAGPipeline(
|
111
|
+
DeprecatedPipelineMixin, DiffusionPipeline, StableDiffusionMixin, TextualInversionLoaderMixin, IPAdapterMixin
|
112
|
+
):
|
113
|
+
_last_supported_version = "0.33.1"
|
114
|
+
|
111
115
|
r"""
|
112
116
|
Pipeline for text-to-image generation using Stable Diffusion.
|
113
117
|
|
@@ -476,7 +480,7 @@ class StableDiffusionSAGPipeline(DiffusionPipeline, StableDiffusionMixin, Textua
|
|
476
480
|
def prepare_extra_step_kwargs(self, generator, eta):
|
477
481
|
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
478
482
|
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
479
|
-
# eta corresponds to η in DDIM paper: https://
|
483
|
+
# eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
|
480
484
|
# and should be between [0, 1]
|
481
485
|
|
482
486
|
accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
|
@@ -616,8 +620,8 @@ class StableDiffusionSAGPipeline(DiffusionPipeline, StableDiffusionMixin, Textua
|
|
616
620
|
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
617
621
|
The number of images to generate per prompt.
|
618
622
|
eta (`float`, *optional*, defaults to 0.0):
|
619
|
-
Corresponds to parameter eta (η) from the [DDIM](https://
|
620
|
-
to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
|
623
|
+
Corresponds to parameter eta (η) from the [DDIM](https://huggingface.co/papers/2010.02502) paper. Only
|
624
|
+
applies to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
|
621
625
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
622
626
|
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
|
623
627
|
generation deterministic.
|
@@ -681,11 +685,11 @@ class StableDiffusionSAGPipeline(DiffusionPipeline, StableDiffusionMixin, Textua
|
|
681
685
|
|
682
686
|
device = self._execution_device
|
683
687
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
684
|
-
# of the Imagen paper: https://
|
688
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
685
689
|
# corresponds to doing no classifier free guidance.
|
686
690
|
do_classifier_free_guidance = guidance_scale > 1.0
|
687
691
|
# and `sag_scale` is` `s` of equation (16)
|
688
|
-
# of the self-attention guidance paper: https://
|
692
|
+
# of the self-attention guidance paper: https://huggingface.co/papers/2210.00939
|
689
693
|
# `sag_scale = 0` means no self-attention guidance
|
690
694
|
do_self_attention_guidance = sag_scale > 0.0
|
691
695
|
|
@@ -802,7 +806,7 @@ class StableDiffusionSAGPipeline(DiffusionPipeline, StableDiffusionMixin, Textua
|
|
802
806
|
if do_self_attention_guidance:
|
803
807
|
# classifier-free guidance produces two chunks of attention map
|
804
808
|
# and we only use unconditional one according to equation (25)
|
805
|
-
# in https://
|
809
|
+
# in https://huggingface.co/papers/2210.00939
|
806
810
|
if do_classifier_free_guidance:
|
807
811
|
# DDIM-like prediction of x0
|
808
812
|
pred_x0 = self.pred_x0(latents, noise_pred_uncond, t)
|
@@ -876,7 +880,7 @@ class StableDiffusionSAGPipeline(DiffusionPipeline, StableDiffusionMixin, Textua
|
|
876
880
|
return StableDiffusionPipelineOutput(images=image, nsfw_content_detected=has_nsfw_concept)
|
877
881
|
|
878
882
|
def sag_masking(self, original_latents, attn_map, map_size, t, eps):
|
879
|
-
# Same masking process as in SAG paper: https://
|
883
|
+
# Same masking process as in SAG paper: https://huggingface.co/papers/2210.00939
|
880
884
|
bh, hw1, hw2 = attn_map.shape
|
881
885
|
b, latent_channel, latent_h, latent_w = original_latents.shape
|
882
886
|
h = self.unet.config.attention_head_dim
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -90,7 +90,7 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
|
|
90
90
|
r"""
|
91
91
|
Rescales `noise_cfg` tensor based on `guidance_rescale` to improve image quality and fix overexposure. Based on
|
92
92
|
Section 3.4 from [Common Diffusion Noise Schedules and Sample Steps are
|
93
|
-
Flawed](https://
|
93
|
+
Flawed](https://huggingface.co/papers/2305.08891).
|
94
94
|
|
95
95
|
Args:
|
96
96
|
noise_cfg (`torch.Tensor`):
|
@@ -598,7 +598,7 @@ class StableDiffusionXLPipeline(
|
|
598
598
|
def prepare_extra_step_kwargs(self, generator, eta):
|
599
599
|
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
600
600
|
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
601
|
-
# eta corresponds to η in DDIM paper: https://
|
601
|
+
# eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
|
602
602
|
# and should be between [0, 1]
|
603
603
|
|
604
604
|
accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
|
@@ -811,7 +811,7 @@ class StableDiffusionXLPipeline(
|
|
811
811
|
return self._clip_skip
|
812
812
|
|
813
813
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
814
|
-
# of the Imagen paper: https://
|
814
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
815
815
|
# corresponds to doing no classifier free guidance.
|
816
816
|
@property
|
817
817
|
def do_classifier_free_guidance(self):
|
@@ -914,11 +914,11 @@ class StableDiffusionXLPipeline(
|
|
914
914
|
"Mixture of Denoisers" multi-pipeline setup, as elaborated in [**Refining the Image
|
915
915
|
Output**](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/stable_diffusion_xl#refining-the-image-output)
|
916
916
|
guidance_scale (`float`, *optional*, defaults to 5.0):
|
917
|
-
Guidance scale as defined in [Classifier-Free Diffusion
|
918
|
-
`guidance_scale` is defined as `w` of equation 2.
|
919
|
-
Paper](https://
|
920
|
-
1`. Higher guidance scale encourages to generate images that are closely linked to
|
921
|
-
usually at the expense of lower image quality.
|
917
|
+
Guidance scale as defined in [Classifier-Free Diffusion
|
918
|
+
Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
|
919
|
+
of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
|
920
|
+
`guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
|
921
|
+
the text `prompt`, usually at the expense of lower image quality.
|
922
922
|
negative_prompt (`str` or `List[str]`, *optional*):
|
923
923
|
The prompt or prompts not to guide the image generation. If not defined, one has to pass
|
924
924
|
`negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
|
@@ -929,8 +929,8 @@ class StableDiffusionXLPipeline(
|
|
929
929
|
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
930
930
|
The number of images to generate per prompt.
|
931
931
|
eta (`float`, *optional*, defaults to 0.0):
|
932
|
-
Corresponds to parameter eta (η) in the DDIM paper: https://
|
933
|
-
[`schedulers.DDIMScheduler`], will be ignored for others.
|
932
|
+
Corresponds to parameter eta (η) in the DDIM paper: https://huggingface.co/papers/2010.02502. Only
|
933
|
+
applies to [`schedulers.DDIMScheduler`], will be ignored for others.
|
934
934
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
935
935
|
One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
|
936
936
|
to make generation deterministic.
|
@@ -970,9 +970,10 @@ class StableDiffusionXLPipeline(
|
|
970
970
|
[diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
971
971
|
guidance_rescale (`float`, *optional*, defaults to 0.0):
|
972
972
|
Guidance rescale factor proposed by [Common Diffusion Noise Schedules and Sample Steps are
|
973
|
-
Flawed](https://
|
974
|
-
[Common Diffusion Noise Schedules and Sample Steps are
|
975
|
-
Guidance rescale factor should fix overexposure when
|
973
|
+
Flawed](https://huggingface.co/papers/2305.08891) `guidance_scale` is defined as `φ` in equation 16. of
|
974
|
+
[Common Diffusion Noise Schedules and Sample Steps are
|
975
|
+
Flawed](https://huggingface.co/papers/2305.08891). Guidance rescale factor should fix overexposure when
|
976
|
+
using zero terminal SNR.
|
976
977
|
original_size (`Tuple[int]`, *optional*, defaults to (1024, 1024)):
|
977
978
|
If `original_size` is not the same as `target_size` the image will appear to be down- or upsampled.
|
978
979
|
`original_size` defaults to `(height, width)` if not specified. Part of SDXL's micro-conditioning as
|
@@ -1229,7 +1230,7 @@ class StableDiffusionXLPipeline(
|
|
1229
1230
|
noise_pred = noise_pred_uncond + self.guidance_scale * (noise_pred_text - noise_pred_uncond)
|
1230
1231
|
|
1231
1232
|
if self.do_classifier_free_guidance and self.guidance_rescale > 0.0:
|
1232
|
-
# Based on 3.4. in https://
|
1233
|
+
# Based on 3.4. in https://huggingface.co/papers/2305.08891
|
1233
1234
|
noise_pred = rescale_noise_cfg(noise_pred, noise_pred_text, guidance_rescale=self.guidance_rescale)
|
1234
1235
|
|
1235
1236
|
# compute the previous noisy sample x_t -> x_t-1
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -50,7 +50,7 @@ from ...utils import (
|
|
50
50
|
scale_lora_layers,
|
51
51
|
unscale_lora_layers,
|
52
52
|
)
|
53
|
-
from ...utils.torch_utils import randn_tensor
|
53
|
+
from ...utils.torch_utils import empty_device_cache, randn_tensor
|
54
54
|
from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
55
55
|
from .pipeline_output import StableDiffusionXLPipelineOutput
|
56
56
|
|
@@ -93,7 +93,7 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
|
|
93
93
|
r"""
|
94
94
|
Rescales `noise_cfg` tensor based on `guidance_rescale` to improve image quality and fix overexposure. Based on
|
95
95
|
Section 3.4 from [Common Diffusion Noise Schedules and Sample Steps are
|
96
|
-
Flawed](https://
|
96
|
+
Flawed](https://huggingface.co/papers/2305.08891).
|
97
97
|
|
98
98
|
Args:
|
99
99
|
noise_cfg (`torch.Tensor`):
|
@@ -544,7 +544,7 @@ class StableDiffusionXLImg2ImgPipeline(
|
|
544
544
|
def prepare_extra_step_kwargs(self, generator, eta):
|
545
545
|
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
546
546
|
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
547
|
-
# eta corresponds to η in DDIM paper: https://
|
547
|
+
# eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
|
548
548
|
# and should be between [0, 1]
|
549
549
|
|
550
550
|
accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
|
@@ -704,7 +704,7 @@ class StableDiffusionXLImg2ImgPipeline(
|
|
704
704
|
# Offload text encoder if `enable_model_cpu_offload` was enabled
|
705
705
|
if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None:
|
706
706
|
self.text_encoder_2.to("cpu")
|
707
|
-
|
707
|
+
empty_device_cache()
|
708
708
|
|
709
709
|
image = image.to(device=device, dtype=dtype)
|
710
710
|
|
@@ -957,7 +957,7 @@ class StableDiffusionXLImg2ImgPipeline(
|
|
957
957
|
return self._clip_skip
|
958
958
|
|
959
959
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
960
|
-
# of the Imagen paper: https://
|
960
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
961
961
|
# corresponds to doing no classifier free guidance.
|
962
962
|
@property
|
963
963
|
def do_classifier_free_guidance(self):
|
@@ -1074,11 +1074,11 @@ class StableDiffusionXLImg2ImgPipeline(
|
|
1074
1074
|
forms a part of a "Mixture of Denoisers" multi-pipeline setup, as elaborated in [**Refine Image
|
1075
1075
|
Quality**](https://huggingface.co/docs/diffusers/using-diffusers/sdxl#refine-image-quality).
|
1076
1076
|
guidance_scale (`float`, *optional*, defaults to 7.5):
|
1077
|
-
Guidance scale as defined in [Classifier-Free Diffusion
|
1078
|
-
`guidance_scale` is defined as `w` of equation 2.
|
1079
|
-
Paper](https://
|
1080
|
-
1`. Higher guidance scale encourages to generate images that are closely linked to
|
1081
|
-
usually at the expense of lower image quality.
|
1077
|
+
Guidance scale as defined in [Classifier-Free Diffusion
|
1078
|
+
Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
|
1079
|
+
of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
|
1080
|
+
`guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
|
1081
|
+
the text `prompt`, usually at the expense of lower image quality.
|
1082
1082
|
negative_prompt (`str` or `List[str]`, *optional*):
|
1083
1083
|
The prompt or prompts not to guide the image generation. If not defined, one has to pass
|
1084
1084
|
`negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
|
@@ -1089,8 +1089,8 @@ class StableDiffusionXLImg2ImgPipeline(
|
|
1089
1089
|
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
1090
1090
|
The number of images to generate per prompt.
|
1091
1091
|
eta (`float`, *optional*, defaults to 0.0):
|
1092
|
-
Corresponds to parameter eta (η) in the DDIM paper: https://
|
1093
|
-
[`schedulers.DDIMScheduler`], will be ignored for others.
|
1092
|
+
Corresponds to parameter eta (η) in the DDIM paper: https://huggingface.co/papers/2010.02502. Only
|
1093
|
+
applies to [`schedulers.DDIMScheduler`], will be ignored for others.
|
1094
1094
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
1095
1095
|
One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
|
1096
1096
|
to make generation deterministic.
|
@@ -1130,9 +1130,10 @@ class StableDiffusionXLImg2ImgPipeline(
|
|
1130
1130
|
[diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
1131
1131
|
guidance_rescale (`float`, *optional*, defaults to 0.0):
|
1132
1132
|
Guidance rescale factor proposed by [Common Diffusion Noise Schedules and Sample Steps are
|
1133
|
-
Flawed](https://
|
1134
|
-
[Common Diffusion Noise Schedules and Sample Steps are
|
1135
|
-
Guidance rescale factor should fix overexposure when
|
1133
|
+
Flawed](https://huggingface.co/papers/2305.08891) `guidance_scale` is defined as `φ` in equation 16. of
|
1134
|
+
[Common Diffusion Noise Schedules and Sample Steps are
|
1135
|
+
Flawed](https://huggingface.co/papers/2305.08891). Guidance rescale factor should fix overexposure when
|
1136
|
+
using zero terminal SNR.
|
1136
1137
|
original_size (`Tuple[int]`, *optional*, defaults to (1024, 1024)):
|
1137
1138
|
If `original_size` is not the same as `target_size` the image will appear to be down- or upsampled.
|
1138
1139
|
`original_size` defaults to `(height, width)` if not specified. Part of SDXL's micro-conditioning as
|
@@ -1420,7 +1421,7 @@ class StableDiffusionXLImg2ImgPipeline(
|
|
1420
1421
|
noise_pred = noise_pred_uncond + self.guidance_scale * (noise_pred_text - noise_pred_uncond)
|
1421
1422
|
|
1422
1423
|
if self.do_classifier_free_guidance and self.guidance_rescale > 0.0:
|
1423
|
-
# Based on 3.4. in https://
|
1424
|
+
# Based on 3.4. in https://huggingface.co/papers/2305.08891
|
1424
1425
|
noise_pred = rescale_noise_cfg(noise_pred, noise_pred_text, guidance_rescale=self.guidance_rescale)
|
1425
1426
|
|
1426
1427
|
# compute the previous noisy sample x_t -> x_t-1
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -104,7 +104,7 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
|
|
104
104
|
r"""
|
105
105
|
Rescales `noise_cfg` tensor based on `guidance_rescale` to improve image quality and fix overexposure. Based on
|
106
106
|
Section 3.4 from [Common Diffusion Noise Schedules and Sample Steps are
|
107
|
-
Flawed](https://
|
107
|
+
Flawed](https://huggingface.co/papers/2305.08891).
|
108
108
|
|
109
109
|
Args:
|
110
110
|
noise_cfg (`torch.Tensor`):
|
@@ -648,7 +648,7 @@ class StableDiffusionXLInpaintPipeline(
|
|
648
648
|
def prepare_extra_step_kwargs(self, generator, eta):
|
649
649
|
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
650
650
|
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
651
|
-
# eta corresponds to η in DDIM paper: https://
|
651
|
+
# eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
|
652
652
|
# and should be between [0, 1]
|
653
653
|
|
654
654
|
accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
|
@@ -1062,7 +1062,7 @@ class StableDiffusionXLInpaintPipeline(
|
|
1062
1062
|
return self._clip_skip
|
1063
1063
|
|
1064
1064
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
1065
|
-
# of the Imagen paper: https://
|
1065
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
1066
1066
|
# corresponds to doing no classifier free guidance.
|
1067
1067
|
@property
|
1068
1068
|
def do_classifier_free_guidance(self):
|
@@ -1208,11 +1208,11 @@ class StableDiffusionXLInpaintPipeline(
|
|
1208
1208
|
forms a part of a "Mixture of Denoisers" multi-pipeline setup, as elaborated in [**Refining the Image
|
1209
1209
|
Output**](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/stable_diffusion_xl#refining-the-image-output).
|
1210
1210
|
guidance_scale (`float`, *optional*, defaults to 7.5):
|
1211
|
-
Guidance scale as defined in [Classifier-Free Diffusion
|
1212
|
-
`guidance_scale` is defined as `w` of equation 2.
|
1213
|
-
Paper](https://
|
1214
|
-
1`. Higher guidance scale encourages to generate images that are closely linked to
|
1215
|
-
usually at the expense of lower image quality.
|
1211
|
+
Guidance scale as defined in [Classifier-Free Diffusion
|
1212
|
+
Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
|
1213
|
+
of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
|
1214
|
+
`guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
|
1215
|
+
the text `prompt`, usually at the expense of lower image quality.
|
1216
1216
|
negative_prompt (`str` or `List[str]`, *optional*):
|
1217
1217
|
The prompt or prompts not to guide the image generation. If not defined, one has to pass
|
1218
1218
|
`negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
|
@@ -1243,8 +1243,8 @@ class StableDiffusionXLInpaintPipeline(
|
|
1243
1243
|
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
1244
1244
|
The number of images to generate per prompt.
|
1245
1245
|
eta (`float`, *optional*, defaults to 0.0):
|
1246
|
-
Corresponds to parameter eta (η) in the DDIM paper: https://
|
1247
|
-
[`schedulers.DDIMScheduler`], will be ignored for others.
|
1246
|
+
Corresponds to parameter eta (η) in the DDIM paper: https://huggingface.co/papers/2010.02502. Only
|
1247
|
+
applies to [`schedulers.DDIMScheduler`], will be ignored for others.
|
1248
1248
|
generator (`torch.Generator`, *optional*):
|
1249
1249
|
One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
|
1250
1250
|
to make generation deterministic.
|
@@ -1638,7 +1638,7 @@ class StableDiffusionXLInpaintPipeline(
|
|
1638
1638
|
noise_pred = noise_pred_uncond + self.guidance_scale * (noise_pred_text - noise_pred_uncond)
|
1639
1639
|
|
1640
1640
|
if self.do_classifier_free_guidance and self.guidance_rescale > 0.0:
|
1641
|
-
# Based on 3.4. in https://
|
1641
|
+
# Based on 3.4. in https://huggingface.co/papers/2305.08891
|
1642
1642
|
noise_pred = rescale_noise_cfg(noise_pred, noise_pred_text, guidance_rescale=self.guidance_rescale)
|
1643
1643
|
|
1644
1644
|
# compute the previous noisy sample x_t -> x_t-1
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 Harutatsu Akiyama and The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -104,7 +104,7 @@ def retrieve_latents(
|
|
104
104
|
def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
|
105
105
|
"""
|
106
106
|
Rescale `noise_cfg` according to `guidance_rescale`. Based on findings of [Common Diffusion Noise Schedules and
|
107
|
-
Sample Steps are Flawed](https://
|
107
|
+
Sample Steps are Flawed](https://huggingface.co/papers/2305.08891). See Section 3.4
|
108
108
|
"""
|
109
109
|
std_text = noise_pred_text.std(dim=list(range(1, noise_pred_text.ndim)), keepdim=True)
|
110
110
|
std_cfg = noise_cfg.std(dim=list(range(1, noise_cfg.ndim)), keepdim=True)
|
@@ -427,7 +427,7 @@ class StableDiffusionXLInstructPix2PixPipeline(
|
|
427
427
|
def prepare_extra_step_kwargs(self, generator, eta):
|
428
428
|
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
429
429
|
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
430
|
-
# eta corresponds to η in DDIM paper: https://
|
430
|
+
# eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
|
431
431
|
# and should be between [0, 1]
|
432
432
|
|
433
433
|
accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
|
@@ -667,11 +667,11 @@ class StableDiffusionXLInstructPix2PixPipeline(
|
|
667
667
|
"Mixture of Denoisers" multi-pipeline setup, as elaborated in [**Refining the Image
|
668
668
|
Output**](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/stable_diffusion_xl#refining-the-image-output)
|
669
669
|
guidance_scale (`float`, *optional*, defaults to 5.0):
|
670
|
-
Guidance scale as defined in [Classifier-Free Diffusion
|
671
|
-
`guidance_scale` is defined as `w` of equation 2.
|
672
|
-
Paper](https://
|
673
|
-
1`. Higher guidance scale encourages to generate images that are closely linked to
|
674
|
-
usually at the expense of lower image quality.
|
670
|
+
Guidance scale as defined in [Classifier-Free Diffusion
|
671
|
+
Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
|
672
|
+
of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
|
673
|
+
`guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
|
674
|
+
the text `prompt`, usually at the expense of lower image quality.
|
675
675
|
image_guidance_scale (`float`, *optional*, defaults to 1.5):
|
676
676
|
Image guidance scale is to push the generated image towards the initial image `image`. Image guidance
|
677
677
|
scale is enabled by setting `image_guidance_scale > 1`. Higher image guidance scale encourages to
|
@@ -687,8 +687,8 @@ class StableDiffusionXLInstructPix2PixPipeline(
|
|
687
687
|
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
688
688
|
The number of images to generate per prompt.
|
689
689
|
eta (`float`, *optional*, defaults to 0.0):
|
690
|
-
Corresponds to parameter eta (η) in the DDIM paper: https://
|
691
|
-
[`schedulers.DDIMScheduler`], will be ignored for others.
|
690
|
+
Corresponds to parameter eta (η) in the DDIM paper: https://huggingface.co/papers/2010.02502. Only
|
691
|
+
applies to [`schedulers.DDIMScheduler`], will be ignored for others.
|
692
692
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
693
693
|
One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
|
694
694
|
to make generation deterministic.
|
@@ -728,9 +728,10 @@ class StableDiffusionXLInstructPix2PixPipeline(
|
|
728
728
|
[diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
729
729
|
guidance_rescale (`float`, *optional*, defaults to 0.0):
|
730
730
|
Guidance rescale factor proposed by [Common Diffusion Noise Schedules and Sample Steps are
|
731
|
-
Flawed](https://
|
732
|
-
[Common Diffusion Noise Schedules and Sample Steps are
|
733
|
-
Guidance rescale factor should fix overexposure when
|
731
|
+
Flawed](https://huggingface.co/papers/2305.08891) `guidance_scale` is defined as `φ` in equation 16. of
|
732
|
+
[Common Diffusion Noise Schedules and Sample Steps are
|
733
|
+
Flawed](https://huggingface.co/papers/2305.08891). Guidance rescale factor should fix overexposure when
|
734
|
+
using zero terminal SNR.
|
734
735
|
original_size (`Tuple[int]`, *optional*, defaults to (1024, 1024)):
|
735
736
|
If `original_size` is not the same as `target_size` the image will appear to be down- or upsampled.
|
736
737
|
`original_size` defaults to `(height, width)` if not specified. Part of SDXL's micro-conditioning as
|
@@ -785,7 +786,7 @@ class StableDiffusionXLInstructPix2PixPipeline(
|
|
785
786
|
device = self._execution_device
|
786
787
|
|
787
788
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
788
|
-
# of the Imagen paper: https://
|
789
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
789
790
|
# corresponds to doing no classifier free guidance.
|
790
791
|
do_classifier_free_guidance = guidance_scale > 1.0 and image_guidance_scale >= 1.0
|
791
792
|
|
@@ -928,7 +929,7 @@ class StableDiffusionXLInstructPix2PixPipeline(
|
|
928
929
|
)
|
929
930
|
|
930
931
|
if do_classifier_free_guidance and guidance_rescale > 0.0:
|
931
|
-
# Based on 3.4. in https://
|
932
|
+
# Based on 3.4. in https://huggingface.co/papers/2305.08891
|
932
933
|
noise_pred = rescale_noise_cfg(noise_pred, noise_pred_text, guidance_rescale=guidance_rescale)
|
933
934
|
|
934
935
|
# compute the previous noisy sample x_t -> x_t-1
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -369,7 +369,7 @@ class StableVideoDiffusionPipeline(DiffusionPipeline):
|
|
369
369
|
return self._guidance_scale
|
370
370
|
|
371
371
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
372
|
-
# of the Imagen paper: https://
|
372
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
373
373
|
# corresponds to doing no classifier free guidance.
|
374
374
|
@property
|
375
375
|
def do_classifier_free_guidance(self):
|
@@ -495,7 +495,7 @@ class StableVideoDiffusionPipeline(DiffusionPipeline):
|
|
495
495
|
batch_size = image.shape[0]
|
496
496
|
device = self._execution_device
|
497
497
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
498
|
-
# of the Imagen paper: https://
|
498
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
499
499
|
# corresponds to doing no classifier free guidance.
|
500
500
|
self._guidance_scale = max_guidance_scale
|
501
501
|
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 TencentARC and The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -123,7 +123,7 @@ def _preprocess_adapter_image(image, height, width):
|
|
123
123
|
image = torch.cat(image, dim=0)
|
124
124
|
else:
|
125
125
|
raise ValueError(
|
126
|
-
f"Invalid image tensor! Expecting image tensor with 3 or 4 dimension, but
|
126
|
+
f"Invalid image tensor! Expecting image tensor with 3 or 4 dimension, but receive: {image[0].ndim}"
|
127
127
|
)
|
128
128
|
return image
|
129
129
|
|
@@ -191,7 +191,7 @@ def retrieve_timesteps(
|
|
191
191
|
class StableDiffusionAdapterPipeline(DiffusionPipeline, StableDiffusionMixin, FromSingleFileMixin):
|
192
192
|
r"""
|
193
193
|
Pipeline for text-to-image generation using Stable Diffusion augmented with T2I-Adapter
|
194
|
-
https://
|
194
|
+
https://huggingface.co/papers/2302.08453
|
195
195
|
|
196
196
|
This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the
|
197
197
|
library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)
|
@@ -521,7 +521,7 @@ class StableDiffusionAdapterPipeline(DiffusionPipeline, StableDiffusionMixin, Fr
|
|
521
521
|
def prepare_extra_step_kwargs(self, generator, eta):
|
522
522
|
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
523
523
|
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
524
|
-
# eta corresponds to η in DDIM paper: https://
|
524
|
+
# eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
|
525
525
|
# and should be between [0, 1]
|
526
526
|
|
527
527
|
accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
|
@@ -680,7 +680,7 @@ class StableDiffusionAdapterPipeline(DiffusionPipeline, StableDiffusionMixin, Fr
|
|
680
680
|
return self._guidance_scale
|
681
681
|
|
682
682
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
683
|
-
# of the Imagen paper: https://
|
683
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
684
684
|
# corresponds to doing no classifier free guidance.
|
685
685
|
@property
|
686
686
|
def do_classifier_free_guidance(self):
|
@@ -740,11 +740,11 @@ class StableDiffusionAdapterPipeline(DiffusionPipeline, StableDiffusionMixin, Fr
|
|
740
740
|
their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
|
741
741
|
will be used.
|
742
742
|
guidance_scale (`float`, *optional*, defaults to 7.5):
|
743
|
-
Guidance scale as defined in [Classifier-Free Diffusion
|
744
|
-
`guidance_scale` is defined as `w` of equation 2.
|
745
|
-
Paper](https://
|
746
|
-
1`. Higher guidance scale encourages to generate images that are closely linked to
|
747
|
-
usually at the expense of lower image quality.
|
743
|
+
Guidance scale as defined in [Classifier-Free Diffusion
|
744
|
+
Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
|
745
|
+
of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
|
746
|
+
`guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
|
747
|
+
the text `prompt`, usually at the expense of lower image quality.
|
748
748
|
negative_prompt (`str` or `List[str]`, *optional*):
|
749
749
|
The prompt or prompts not to guide the image generation. If not defined, one has to pass
|
750
750
|
`negative_prompt_embeds`. instead. If not defined, one has to pass `negative_prompt_embeds`. instead.
|
@@ -752,8 +752,8 @@ class StableDiffusionAdapterPipeline(DiffusionPipeline, StableDiffusionMixin, Fr
|
|
752
752
|
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
753
753
|
The number of images to generate per prompt.
|
754
754
|
eta (`float`, *optional*, defaults to 0.0):
|
755
|
-
Corresponds to parameter eta (η) in the DDIM paper: https://
|
756
|
-
[`schedulers.DDIMScheduler`], will be ignored for others.
|
755
|
+
Corresponds to parameter eta (η) in the DDIM paper: https://huggingface.co/papers/2010.02502. Only
|
756
|
+
applies to [`schedulers.DDIMScheduler`], will be ignored for others.
|
757
757
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
758
758
|
One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
|
759
759
|
to make generation deterministic.
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 TencentARC and The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -121,7 +121,7 @@ def _preprocess_adapter_image(image, height, width):
|
|
121
121
|
image = torch.cat(image, dim=0)
|
122
122
|
else:
|
123
123
|
raise ValueError(
|
124
|
-
f"Invalid image tensor! Expecting image tensor with 3 or 4 dimension, but
|
124
|
+
f"Invalid image tensor! Expecting image tensor with 3 or 4 dimension, but receive: {image[0].ndim}"
|
125
125
|
)
|
126
126
|
return image
|
127
127
|
|
@@ -131,7 +131,7 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
|
|
131
131
|
r"""
|
132
132
|
Rescales `noise_cfg` tensor based on `guidance_rescale` to improve image quality and fix overexposure. Based on
|
133
133
|
Section 3.4 from [Common Diffusion Noise Schedules and Sample Steps are
|
134
|
-
Flawed](https://
|
134
|
+
Flawed](https://huggingface.co/papers/2305.08891).
|
135
135
|
|
136
136
|
Args:
|
137
137
|
noise_cfg (`torch.Tensor`):
|
@@ -223,7 +223,7 @@ class StableDiffusionXLAdapterPipeline(
|
|
223
223
|
):
|
224
224
|
r"""
|
225
225
|
Pipeline for text-to-image generation using Stable Diffusion augmented with T2I-Adapter
|
226
|
-
https://
|
226
|
+
https://huggingface.co/papers/2302.08453
|
227
227
|
|
228
228
|
This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the
|
229
229
|
library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)
|
@@ -624,7 +624,7 @@ class StableDiffusionXLAdapterPipeline(
|
|
624
624
|
def prepare_extra_step_kwargs(self, generator, eta):
|
625
625
|
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
626
626
|
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
627
|
-
# eta corresponds to η in DDIM paper: https://
|
627
|
+
# eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
|
628
628
|
# and should be between [0, 1]
|
629
629
|
|
630
630
|
accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
|
@@ -859,7 +859,7 @@ class StableDiffusionXLAdapterPipeline(
|
|
859
859
|
return self._guidance_scale
|
860
860
|
|
861
861
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
862
|
-
# of the Imagen paper: https://
|
862
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
863
863
|
# corresponds to doing no classifier free guidance.
|
864
864
|
@property
|
865
865
|
def do_classifier_free_guidance(self):
|
@@ -948,11 +948,11 @@ class StableDiffusionXLAdapterPipeline(
|
|
948
948
|
"Mixture of Denoisers" multi-pipeline setup, as elaborated in [**Refining the Image
|
949
949
|
Output**](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/stable_diffusion_xl#refining-the-image-output)
|
950
950
|
guidance_scale (`float`, *optional*, defaults to 5.0):
|
951
|
-
Guidance scale as defined in [Classifier-Free Diffusion
|
952
|
-
`guidance_scale` is defined as `w` of equation 2.
|
953
|
-
Paper](https://
|
954
|
-
1`. Higher guidance scale encourages to generate images that are closely linked to
|
955
|
-
usually at the expense of lower image quality.
|
951
|
+
Guidance scale as defined in [Classifier-Free Diffusion
|
952
|
+
Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
|
953
|
+
of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
|
954
|
+
`guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
|
955
|
+
the text `prompt`, usually at the expense of lower image quality.
|
956
956
|
negative_prompt (`str` or `List[str]`, *optional*):
|
957
957
|
The prompt or prompts not to guide the image generation. If not defined, one has to pass
|
958
958
|
`negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
|
@@ -963,8 +963,8 @@ class StableDiffusionXLAdapterPipeline(
|
|
963
963
|
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
964
964
|
The number of images to generate per prompt.
|
965
965
|
eta (`float`, *optional*, defaults to 0.0):
|
966
|
-
Corresponds to parameter eta (η) in the DDIM paper: https://
|
967
|
-
[`schedulers.DDIMScheduler`], will be ignored for others.
|
966
|
+
Corresponds to parameter eta (η) in the DDIM paper: https://huggingface.co/papers/2010.02502. Only
|
967
|
+
applies to [`schedulers.DDIMScheduler`], will be ignored for others.
|
968
968
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
969
969
|
One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
|
970
970
|
to make generation deterministic.
|
@@ -1010,9 +1010,10 @@ class StableDiffusionXLAdapterPipeline(
|
|
1010
1010
|
[diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
1011
1011
|
guidance_rescale (`float`, *optional*, defaults to 0.0):
|
1012
1012
|
Guidance rescale factor proposed by [Common Diffusion Noise Schedules and Sample Steps are
|
1013
|
-
Flawed](https://
|
1014
|
-
[Common Diffusion Noise Schedules and Sample Steps are
|
1015
|
-
Guidance rescale factor should fix overexposure when
|
1013
|
+
Flawed](https://huggingface.co/papers/2305.08891) `guidance_scale` is defined as `φ` in equation 16. of
|
1014
|
+
[Common Diffusion Noise Schedules and Sample Steps are
|
1015
|
+
Flawed](https://huggingface.co/papers/2305.08891). Guidance rescale factor should fix overexposure when
|
1016
|
+
using zero terminal SNR.
|
1016
1017
|
original_size (`Tuple[int]`, *optional*, defaults to (1024, 1024)):
|
1017
1018
|
If `original_size` is not the same as `target_size` the image will appear to be down- or upsampled.
|
1018
1019
|
`original_size` defaults to `(height, width)` if not specified. Part of SDXL's micro-conditioning as
|
@@ -1266,7 +1267,7 @@ class StableDiffusionXLAdapterPipeline(
|
|
1266
1267
|
noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
|
1267
1268
|
|
1268
1269
|
if self.do_classifier_free_guidance and guidance_rescale > 0.0:
|
1269
|
-
# Based on 3.4. in https://
|
1270
|
+
# Based on 3.4. in https://huggingface.co/papers/2305.08891
|
1270
1271
|
noise_pred = rescale_noise_cfg(noise_pred, noise_pred_text, guidance_rescale=guidance_rescale)
|
1271
1272
|
|
1272
1273
|
# compute the previous noisy sample x_t -> x_t-1
|