diffusers 0.33.0__py3-none-any.whl → 0.34.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diffusers/__init__.py +48 -1
- diffusers/commands/__init__.py +1 -1
- diffusers/commands/diffusers_cli.py +1 -1
- diffusers/commands/env.py +1 -1
- diffusers/commands/fp16_safetensors.py +1 -1
- diffusers/dependency_versions_check.py +1 -1
- diffusers/dependency_versions_table.py +1 -1
- diffusers/experimental/rl/value_guided_sampling.py +1 -1
- diffusers/hooks/faster_cache.py +2 -2
- diffusers/hooks/group_offloading.py +128 -29
- diffusers/hooks/hooks.py +2 -2
- diffusers/hooks/layerwise_casting.py +3 -3
- diffusers/hooks/pyramid_attention_broadcast.py +1 -1
- diffusers/image_processor.py +7 -2
- diffusers/loaders/__init__.py +4 -0
- diffusers/loaders/ip_adapter.py +5 -14
- diffusers/loaders/lora_base.py +212 -111
- diffusers/loaders/lora_conversion_utils.py +275 -34
- diffusers/loaders/lora_pipeline.py +1554 -819
- diffusers/loaders/peft.py +52 -109
- diffusers/loaders/single_file.py +2 -2
- diffusers/loaders/single_file_model.py +20 -4
- diffusers/loaders/single_file_utils.py +225 -5
- diffusers/loaders/textual_inversion.py +3 -2
- diffusers/loaders/transformer_flux.py +1 -1
- diffusers/loaders/transformer_sd3.py +2 -2
- diffusers/loaders/unet.py +2 -16
- diffusers/loaders/unet_loader_utils.py +1 -1
- diffusers/loaders/utils.py +1 -1
- diffusers/models/__init__.py +15 -1
- diffusers/models/activations.py +5 -5
- diffusers/models/adapter.py +2 -3
- diffusers/models/attention.py +4 -4
- diffusers/models/attention_flax.py +10 -10
- diffusers/models/attention_processor.py +14 -10
- diffusers/models/auto_model.py +47 -10
- diffusers/models/autoencoders/__init__.py +1 -0
- diffusers/models/autoencoders/autoencoder_asym_kl.py +4 -4
- diffusers/models/autoencoders/autoencoder_dc.py +3 -3
- diffusers/models/autoencoders/autoencoder_kl.py +4 -4
- diffusers/models/autoencoders/autoencoder_kl_allegro.py +4 -4
- diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +6 -6
- diffusers/models/autoencoders/autoencoder_kl_cosmos.py +1108 -0
- diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py +2 -2
- diffusers/models/autoencoders/autoencoder_kl_ltx.py +3 -3
- diffusers/models/autoencoders/autoencoder_kl_magvit.py +4 -4
- diffusers/models/autoencoders/autoencoder_kl_mochi.py +3 -3
- diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +4 -4
- diffusers/models/autoencoders/autoencoder_kl_wan.py +256 -22
- diffusers/models/autoencoders/autoencoder_oobleck.py +1 -1
- diffusers/models/autoencoders/autoencoder_tiny.py +3 -3
- diffusers/models/autoencoders/consistency_decoder_vae.py +1 -1
- diffusers/models/autoencoders/vae.py +13 -2
- diffusers/models/autoencoders/vq_model.py +2 -2
- diffusers/models/cache_utils.py +1 -1
- diffusers/models/controlnet.py +1 -1
- diffusers/models/controlnet_flux.py +1 -1
- diffusers/models/controlnet_sd3.py +1 -1
- diffusers/models/controlnet_sparsectrl.py +1 -1
- diffusers/models/controlnets/__init__.py +1 -0
- diffusers/models/controlnets/controlnet.py +3 -3
- diffusers/models/controlnets/controlnet_flax.py +1 -1
- diffusers/models/controlnets/controlnet_flux.py +16 -15
- diffusers/models/controlnets/controlnet_hunyuan.py +2 -2
- diffusers/models/controlnets/controlnet_sana.py +290 -0
- diffusers/models/controlnets/controlnet_sd3.py +1 -1
- diffusers/models/controlnets/controlnet_sparsectrl.py +2 -2
- diffusers/models/controlnets/controlnet_union.py +1 -1
- diffusers/models/controlnets/controlnet_xs.py +7 -7
- diffusers/models/controlnets/multicontrolnet.py +4 -5
- diffusers/models/controlnets/multicontrolnet_union.py +5 -6
- diffusers/models/downsampling.py +2 -2
- diffusers/models/embeddings.py +10 -12
- diffusers/models/embeddings_flax.py +2 -2
- diffusers/models/lora.py +3 -3
- diffusers/models/modeling_utils.py +44 -14
- diffusers/models/normalization.py +4 -4
- diffusers/models/resnet.py +2 -2
- diffusers/models/resnet_flax.py +1 -1
- diffusers/models/transformers/__init__.py +5 -0
- diffusers/models/transformers/auraflow_transformer_2d.py +70 -24
- diffusers/models/transformers/cogvideox_transformer_3d.py +1 -1
- diffusers/models/transformers/consisid_transformer_3d.py +1 -1
- diffusers/models/transformers/dit_transformer_2d.py +2 -2
- diffusers/models/transformers/dual_transformer_2d.py +1 -1
- diffusers/models/transformers/hunyuan_transformer_2d.py +2 -2
- diffusers/models/transformers/latte_transformer_3d.py +4 -5
- diffusers/models/transformers/lumina_nextdit2d.py +2 -2
- diffusers/models/transformers/pixart_transformer_2d.py +3 -3
- diffusers/models/transformers/prior_transformer.py +1 -1
- diffusers/models/transformers/sana_transformer.py +8 -3
- diffusers/models/transformers/stable_audio_transformer.py +5 -9
- diffusers/models/transformers/t5_film_transformer.py +3 -3
- diffusers/models/transformers/transformer_2d.py +1 -1
- diffusers/models/transformers/transformer_allegro.py +1 -1
- diffusers/models/transformers/transformer_chroma.py +742 -0
- diffusers/models/transformers/transformer_cogview3plus.py +5 -10
- diffusers/models/transformers/transformer_cogview4.py +317 -25
- diffusers/models/transformers/transformer_cosmos.py +579 -0
- diffusers/models/transformers/transformer_flux.py +9 -11
- diffusers/models/transformers/transformer_hidream_image.py +942 -0
- diffusers/models/transformers/transformer_hunyuan_video.py +6 -8
- diffusers/models/transformers/transformer_hunyuan_video_framepack.py +416 -0
- diffusers/models/transformers/transformer_ltx.py +2 -2
- diffusers/models/transformers/transformer_lumina2.py +1 -1
- diffusers/models/transformers/transformer_mochi.py +1 -1
- diffusers/models/transformers/transformer_omnigen.py +2 -2
- diffusers/models/transformers/transformer_sd3.py +7 -7
- diffusers/models/transformers/transformer_temporal.py +1 -1
- diffusers/models/transformers/transformer_wan.py +24 -8
- diffusers/models/transformers/transformer_wan_vace.py +393 -0
- diffusers/models/unets/unet_1d.py +1 -1
- diffusers/models/unets/unet_1d_blocks.py +1 -1
- diffusers/models/unets/unet_2d.py +1 -1
- diffusers/models/unets/unet_2d_blocks.py +1 -1
- diffusers/models/unets/unet_2d_blocks_flax.py +8 -7
- diffusers/models/unets/unet_2d_condition.py +2 -2
- diffusers/models/unets/unet_2d_condition_flax.py +2 -2
- diffusers/models/unets/unet_3d_blocks.py +1 -1
- diffusers/models/unets/unet_3d_condition.py +3 -3
- diffusers/models/unets/unet_i2vgen_xl.py +3 -3
- diffusers/models/unets/unet_kandinsky3.py +1 -1
- diffusers/models/unets/unet_motion_model.py +2 -2
- diffusers/models/unets/unet_stable_cascade.py +1 -1
- diffusers/models/upsampling.py +2 -2
- diffusers/models/vae_flax.py +2 -2
- diffusers/models/vq_model.py +1 -1
- diffusers/pipelines/__init__.py +37 -6
- diffusers/pipelines/allegro/pipeline_allegro.py +11 -11
- diffusers/pipelines/amused/pipeline_amused.py +7 -6
- diffusers/pipelines/amused/pipeline_amused_img2img.py +6 -5
- diffusers/pipelines/amused/pipeline_amused_inpaint.py +6 -5
- diffusers/pipelines/animatediff/pipeline_animatediff.py +6 -6
- diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +6 -6
- diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +16 -15
- diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +6 -6
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +5 -5
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +5 -5
- diffusers/pipelines/audioldm/pipeline_audioldm.py +8 -7
- diffusers/pipelines/audioldm2/modeling_audioldm2.py +1 -1
- diffusers/pipelines/audioldm2/pipeline_audioldm2.py +23 -13
- diffusers/pipelines/aura_flow/pipeline_aura_flow.py +48 -11
- diffusers/pipelines/auto_pipeline.py +6 -7
- diffusers/pipelines/blip_diffusion/modeling_blip2.py +1 -1
- diffusers/pipelines/blip_diffusion/modeling_ctx_clip.py +2 -2
- diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +11 -10
- diffusers/pipelines/chroma/__init__.py +49 -0
- diffusers/pipelines/chroma/pipeline_chroma.py +949 -0
- diffusers/pipelines/chroma/pipeline_chroma_img2img.py +1034 -0
- diffusers/pipelines/chroma/pipeline_output.py +21 -0
- diffusers/pipelines/cogvideo/pipeline_cogvideox.py +8 -8
- diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +8 -8
- diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +8 -8
- diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +8 -8
- diffusers/pipelines/cogview3/pipeline_cogview3plus.py +9 -9
- diffusers/pipelines/cogview4/pipeline_cogview4.py +7 -7
- diffusers/pipelines/cogview4/pipeline_cogview4_control.py +7 -7
- diffusers/pipelines/consisid/consisid_utils.py +2 -2
- diffusers/pipelines/consisid/pipeline_consisid.py +8 -8
- diffusers/pipelines/consistency_models/pipeline_consistency_models.py +1 -1
- diffusers/pipelines/controlnet/pipeline_controlnet.py +7 -7
- diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +8 -8
- diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +7 -7
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +7 -7
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +14 -14
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +10 -6
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +13 -13
- diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +14 -14
- diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +5 -5
- diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +13 -13
- diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +1 -1
- diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +8 -8
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +7 -7
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +7 -7
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +12 -10
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +9 -7
- diffusers/pipelines/cosmos/__init__.py +54 -0
- diffusers/pipelines/cosmos/pipeline_cosmos2_text2image.py +673 -0
- diffusers/pipelines/cosmos/pipeline_cosmos2_video2world.py +792 -0
- diffusers/pipelines/cosmos/pipeline_cosmos_text2world.py +664 -0
- diffusers/pipelines/cosmos/pipeline_cosmos_video2world.py +826 -0
- diffusers/pipelines/cosmos/pipeline_output.py +40 -0
- diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py +5 -4
- diffusers/pipelines/ddim/pipeline_ddim.py +4 -4
- diffusers/pipelines/ddpm/pipeline_ddpm.py +1 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +10 -10
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +8 -8
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +5 -5
- diffusers/pipelines/deprecated/audio_diffusion/mel.py +1 -1
- diffusers/pipelines/deprecated/audio_diffusion/pipeline_audio_diffusion.py +3 -3
- diffusers/pipelines/deprecated/latent_diffusion_uncond/pipeline_latent_diffusion_uncond.py +1 -1
- diffusers/pipelines/deprecated/pndm/pipeline_pndm.py +2 -2
- diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +4 -3
- diffusers/pipelines/deprecated/score_sde_ve/pipeline_score_sde_ve.py +1 -1
- diffusers/pipelines/deprecated/spectrogram_diffusion/continuous_encoder.py +1 -1
- diffusers/pipelines/deprecated/spectrogram_diffusion/midi_utils.py +1 -1
- diffusers/pipelines/deprecated/spectrogram_diffusion/notes_encoder.py +1 -1
- diffusers/pipelines/deprecated/spectrogram_diffusion/pipeline_spectrogram_diffusion.py +1 -1
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +7 -7
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_onnx_stable_diffusion_inpaint_legacy.py +9 -9
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +10 -10
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +10 -8
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +5 -5
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +18 -18
- diffusers/pipelines/deprecated/stochastic_karras_ve/pipeline_stochastic_karras_ve.py +1 -1
- diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +2 -2
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +6 -6
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +5 -5
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +5 -5
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +5 -5
- diffusers/pipelines/deprecated/vq_diffusion/pipeline_vq_diffusion.py +1 -1
- diffusers/pipelines/dit/pipeline_dit.py +1 -1
- diffusers/pipelines/easyanimate/pipeline_easyanimate.py +4 -4
- diffusers/pipelines/easyanimate/pipeline_easyanimate_control.py +4 -4
- diffusers/pipelines/easyanimate/pipeline_easyanimate_inpaint.py +7 -6
- diffusers/pipelines/flux/modeling_flux.py +1 -1
- diffusers/pipelines/flux/pipeline_flux.py +10 -17
- diffusers/pipelines/flux/pipeline_flux_control.py +6 -6
- diffusers/pipelines/flux/pipeline_flux_control_img2img.py +6 -6
- diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +6 -6
- diffusers/pipelines/flux/pipeline_flux_controlnet.py +6 -6
- diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +30 -22
- diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +2 -1
- diffusers/pipelines/flux/pipeline_flux_fill.py +6 -6
- diffusers/pipelines/flux/pipeline_flux_img2img.py +39 -6
- diffusers/pipelines/flux/pipeline_flux_inpaint.py +11 -6
- diffusers/pipelines/flux/pipeline_flux_prior_redux.py +1 -1
- diffusers/pipelines/free_init_utils.py +2 -2
- diffusers/pipelines/free_noise_utils.py +3 -3
- diffusers/pipelines/hidream_image/__init__.py +47 -0
- diffusers/pipelines/hidream_image/pipeline_hidream_image.py +1026 -0
- diffusers/pipelines/hidream_image/pipeline_output.py +35 -0
- diffusers/pipelines/hunyuan_video/__init__.py +2 -0
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_skyreels_image2video.py +8 -8
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +8 -8
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_framepack.py +1114 -0
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_image2video.py +71 -15
- diffusers/pipelines/hunyuan_video/pipeline_output.py +19 -0
- diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +8 -8
- diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +10 -8
- diffusers/pipelines/kandinsky/pipeline_kandinsky.py +6 -6
- diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +34 -34
- diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +19 -26
- diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +7 -7
- diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +11 -11
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +6 -6
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +35 -35
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +6 -6
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +17 -39
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +17 -45
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +7 -7
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +10 -10
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +10 -10
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +7 -7
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +17 -38
- diffusers/pipelines/kolors/pipeline_kolors.py +10 -10
- diffusers/pipelines/kolors/pipeline_kolors_img2img.py +12 -12
- diffusers/pipelines/kolors/text_encoder.py +3 -3
- diffusers/pipelines/kolors/tokenizer.py +1 -1
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +2 -2
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +2 -2
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +1 -1
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion_superresolution.py +3 -3
- diffusers/pipelines/latte/pipeline_latte.py +12 -12
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +13 -13
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +17 -16
- diffusers/pipelines/ltx/__init__.py +4 -0
- diffusers/pipelines/ltx/modeling_latent_upsampler.py +188 -0
- diffusers/pipelines/ltx/pipeline_ltx.py +51 -6
- diffusers/pipelines/ltx/pipeline_ltx_condition.py +107 -29
- diffusers/pipelines/ltx/pipeline_ltx_image2video.py +50 -6
- diffusers/pipelines/ltx/pipeline_ltx_latent_upsample.py +277 -0
- diffusers/pipelines/lumina/pipeline_lumina.py +13 -13
- diffusers/pipelines/lumina2/pipeline_lumina2.py +10 -10
- diffusers/pipelines/marigold/marigold_image_processing.py +2 -2
- diffusers/pipelines/mochi/pipeline_mochi.py +6 -6
- diffusers/pipelines/musicldm/pipeline_musicldm.py +16 -13
- diffusers/pipelines/omnigen/pipeline_omnigen.py +13 -11
- diffusers/pipelines/omnigen/processor_omnigen.py +8 -3
- diffusers/pipelines/onnx_utils.py +15 -2
- diffusers/pipelines/pag/pag_utils.py +2 -2
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +12 -8
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +7 -7
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +10 -6
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +14 -14
- diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +8 -8
- diffusers/pipelines/pag/pipeline_pag_kolors.py +10 -10
- diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +11 -11
- diffusers/pipelines/pag/pipeline_pag_sana.py +18 -12
- diffusers/pipelines/pag/pipeline_pag_sd.py +8 -8
- diffusers/pipelines/pag/pipeline_pag_sd_3.py +7 -7
- diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py +7 -7
- diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +6 -6
- diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +5 -5
- diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py +8 -8
- diffusers/pipelines/pag/pipeline_pag_sd_xl.py +16 -15
- diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +18 -17
- diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +12 -12
- diffusers/pipelines/paint_by_example/image_encoder.py +1 -1
- diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +8 -7
- diffusers/pipelines/pia/pipeline_pia.py +8 -6
- diffusers/pipelines/pipeline_flax_utils.py +3 -4
- diffusers/pipelines/pipeline_loading_utils.py +89 -13
- diffusers/pipelines/pipeline_utils.py +105 -33
- diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +11 -11
- diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +11 -11
- diffusers/pipelines/sana/__init__.py +4 -0
- diffusers/pipelines/sana/pipeline_sana.py +23 -21
- diffusers/pipelines/sana/pipeline_sana_controlnet.py +1106 -0
- diffusers/pipelines/sana/pipeline_sana_sprint.py +23 -19
- diffusers/pipelines/sana/pipeline_sana_sprint_img2img.py +981 -0
- diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +7 -6
- diffusers/pipelines/shap_e/camera.py +1 -1
- diffusers/pipelines/shap_e/pipeline_shap_e.py +1 -1
- diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +1 -1
- diffusers/pipelines/shap_e/renderer.py +3 -3
- diffusers/pipelines/stable_audio/modeling_stable_audio.py +1 -1
- diffusers/pipelines/stable_audio/pipeline_stable_audio.py +5 -5
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +8 -8
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +13 -13
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +9 -9
- diffusers/pipelines/stable_diffusion/__init__.py +0 -7
- diffusers/pipelines/stable_diffusion/clip_image_project_model.py +1 -1
- diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +11 -4
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_img2img.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_inpaint.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +10 -10
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py +10 -10
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +10 -10
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +9 -9
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +8 -8
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +4 -4
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +7 -7
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +5 -5
- diffusers/pipelines/stable_diffusion/safety_checker.py +1 -1
- diffusers/pipelines/stable_diffusion/safety_checker_flax.py +1 -1
- diffusers/pipelines/stable_diffusion/stable_unclip_image_normalizer.py +1 -1
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +7 -7
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +7 -7
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +7 -7
- diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +12 -8
- diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +15 -9
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +11 -9
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +11 -9
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +18 -12
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +11 -8
- diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +11 -8
- diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +15 -12
- diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +8 -6
- diffusers/pipelines/stable_diffusion_safe/safety_checker.py +1 -1
- diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +15 -11
- diffusers/pipelines/stable_diffusion_xl/pipeline_flax_stable_diffusion_xl.py +1 -1
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +16 -15
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +18 -17
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +12 -12
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +16 -15
- diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +3 -3
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +12 -12
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +18 -17
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +12 -7
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +12 -7
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +15 -13
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +24 -21
- diffusers/pipelines/unclip/pipeline_unclip.py +4 -3
- diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +4 -3
- diffusers/pipelines/unclip/text_proj.py +2 -2
- diffusers/pipelines/unidiffuser/modeling_text_decoder.py +2 -2
- diffusers/pipelines/unidiffuser/modeling_uvit.py +1 -1
- diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +8 -7
- diffusers/pipelines/visualcloze/__init__.py +52 -0
- diffusers/pipelines/visualcloze/pipeline_visualcloze_combined.py +444 -0
- diffusers/pipelines/visualcloze/pipeline_visualcloze_generation.py +952 -0
- diffusers/pipelines/visualcloze/visualcloze_utils.py +251 -0
- diffusers/pipelines/wan/__init__.py +2 -0
- diffusers/pipelines/wan/pipeline_wan.py +17 -12
- diffusers/pipelines/wan/pipeline_wan_i2v.py +42 -20
- diffusers/pipelines/wan/pipeline_wan_vace.py +976 -0
- diffusers/pipelines/wan/pipeline_wan_video2video.py +18 -18
- diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py +1 -1
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_diffnext.py +1 -1
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +1 -1
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +8 -8
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_combined.py +16 -15
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +6 -6
- diffusers/quantizers/__init__.py +179 -1
- diffusers/quantizers/base.py +6 -1
- diffusers/quantizers/bitsandbytes/bnb_quantizer.py +4 -0
- diffusers/quantizers/bitsandbytes/utils.py +10 -7
- diffusers/quantizers/gguf/gguf_quantizer.py +13 -4
- diffusers/quantizers/gguf/utils.py +16 -13
- diffusers/quantizers/quantization_config.py +18 -16
- diffusers/quantizers/quanto/quanto_quantizer.py +4 -0
- diffusers/quantizers/torchao/torchao_quantizer.py +5 -1
- diffusers/schedulers/__init__.py +3 -1
- diffusers/schedulers/deprecated/scheduling_karras_ve.py +4 -3
- diffusers/schedulers/deprecated/scheduling_sde_vp.py +1 -1
- diffusers/schedulers/scheduling_consistency_models.py +1 -1
- diffusers/schedulers/scheduling_cosine_dpmsolver_multistep.py +10 -5
- diffusers/schedulers/scheduling_ddim.py +8 -8
- diffusers/schedulers/scheduling_ddim_cogvideox.py +5 -5
- diffusers/schedulers/scheduling_ddim_flax.py +6 -6
- diffusers/schedulers/scheduling_ddim_inverse.py +6 -6
- diffusers/schedulers/scheduling_ddim_parallel.py +22 -22
- diffusers/schedulers/scheduling_ddpm.py +9 -9
- diffusers/schedulers/scheduling_ddpm_flax.py +7 -7
- diffusers/schedulers/scheduling_ddpm_parallel.py +18 -18
- diffusers/schedulers/scheduling_ddpm_wuerstchen.py +2 -2
- diffusers/schedulers/scheduling_deis_multistep.py +8 -8
- diffusers/schedulers/scheduling_dpm_cogvideox.py +5 -5
- diffusers/schedulers/scheduling_dpmsolver_multistep.py +12 -12
- diffusers/schedulers/scheduling_dpmsolver_multistep_flax.py +22 -20
- diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +11 -11
- diffusers/schedulers/scheduling_dpmsolver_sde.py +2 -2
- diffusers/schedulers/scheduling_dpmsolver_singlestep.py +13 -13
- diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py +13 -8
- diffusers/schedulers/scheduling_edm_euler.py +20 -11
- diffusers/schedulers/scheduling_euler_ancestral_discrete.py +3 -3
- diffusers/schedulers/scheduling_euler_discrete.py +3 -3
- diffusers/schedulers/scheduling_euler_discrete_flax.py +3 -3
- diffusers/schedulers/scheduling_flow_match_euler_discrete.py +20 -5
- diffusers/schedulers/scheduling_flow_match_heun_discrete.py +1 -1
- diffusers/schedulers/scheduling_flow_match_lcm.py +561 -0
- diffusers/schedulers/scheduling_heun_discrete.py +2 -2
- diffusers/schedulers/scheduling_ipndm.py +2 -2
- diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +2 -2
- diffusers/schedulers/scheduling_k_dpm_2_discrete.py +2 -2
- diffusers/schedulers/scheduling_karras_ve_flax.py +5 -5
- diffusers/schedulers/scheduling_lcm.py +3 -3
- diffusers/schedulers/scheduling_lms_discrete.py +2 -2
- diffusers/schedulers/scheduling_lms_discrete_flax.py +1 -1
- diffusers/schedulers/scheduling_pndm.py +4 -4
- diffusers/schedulers/scheduling_pndm_flax.py +4 -4
- diffusers/schedulers/scheduling_repaint.py +9 -9
- diffusers/schedulers/scheduling_sasolver.py +15 -15
- diffusers/schedulers/scheduling_scm.py +1 -1
- diffusers/schedulers/scheduling_sde_ve.py +1 -1
- diffusers/schedulers/scheduling_sde_ve_flax.py +2 -2
- diffusers/schedulers/scheduling_tcd.py +3 -3
- diffusers/schedulers/scheduling_unclip.py +5 -5
- diffusers/schedulers/scheduling_unipc_multistep.py +11 -11
- diffusers/schedulers/scheduling_utils.py +1 -1
- diffusers/schedulers/scheduling_utils_flax.py +1 -1
- diffusers/schedulers/scheduling_vq_diffusion.py +1 -1
- diffusers/training_utils.py +13 -5
- diffusers/utils/__init__.py +5 -0
- diffusers/utils/accelerate_utils.py +1 -1
- diffusers/utils/doc_utils.py +1 -1
- diffusers/utils/dummy_pt_objects.py +120 -0
- diffusers/utils/dummy_torch_and_transformers_objects.py +225 -0
- diffusers/utils/dynamic_modules_utils.py +21 -3
- diffusers/utils/export_utils.py +1 -1
- diffusers/utils/import_utils.py +81 -18
- diffusers/utils/logging.py +1 -1
- diffusers/utils/outputs.py +2 -1
- diffusers/utils/peft_utils.py +91 -8
- diffusers/utils/state_dict_utils.py +20 -3
- diffusers/utils/testing_utils.py +59 -7
- diffusers/utils/torch_utils.py +25 -5
- diffusers/video_processor.py +2 -2
- {diffusers-0.33.0.dist-info → diffusers-0.34.0.dist-info}/METADATA +3 -3
- diffusers-0.34.0.dist-info/RECORD +639 -0
- diffusers-0.33.0.dist-info/RECORD +0 -608
- {diffusers-0.33.0.dist-info → diffusers-0.34.0.dist-info}/LICENSE +0 -0
- {diffusers-0.33.0.dist-info → diffusers-0.34.0.dist-info}/WHEEL +0 -0
- {diffusers-0.33.0.dist-info → diffusers-0.34.0.dist-info}/entry_points.txt +0 -0
- {diffusers-0.33.0.dist-info → diffusers-0.34.0.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 PixArt-Sigma Authors and The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -35,7 +35,7 @@ from ...utils import (
|
|
35
35
|
logging,
|
36
36
|
replace_example_docstring,
|
37
37
|
)
|
38
|
-
from ...utils.torch_utils import randn_tensor
|
38
|
+
from ...utils.torch_utils import get_device, is_torch_version, randn_tensor
|
39
39
|
from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
|
40
40
|
from ..pixart_alpha.pipeline_pixart_alpha import (
|
41
41
|
ASPECT_RATIO_512_BIN,
|
@@ -363,7 +363,7 @@ class SanaPAGPipeline(DiffusionPipeline, PAGMixin):
|
|
363
363
|
def prepare_extra_step_kwargs(self, generator, eta):
|
364
364
|
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
365
365
|
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
366
|
-
# eta corresponds to η in DDIM paper: https://
|
366
|
+
# eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
|
367
367
|
# and should be between [0, 1]
|
368
368
|
|
369
369
|
accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
|
@@ -524,7 +524,7 @@ class SanaPAGPipeline(DiffusionPipeline, PAGMixin):
|
|
524
524
|
# &
|
525
525
|
caption = re.sub(r"&", "", caption)
|
526
526
|
|
527
|
-
# ip
|
527
|
+
# ip addresses:
|
528
528
|
caption = re.sub(r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}", " ", caption)
|
529
529
|
|
530
530
|
# article ids:
|
@@ -683,11 +683,11 @@ class SanaPAGPipeline(DiffusionPipeline, PAGMixin):
|
|
683
683
|
their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
|
684
684
|
will be used.
|
685
685
|
guidance_scale (`float`, *optional*, defaults to 4.5):
|
686
|
-
Guidance scale as defined in [Classifier-Free Diffusion
|
687
|
-
`guidance_scale` is defined as `w` of equation 2.
|
688
|
-
Paper](https://
|
689
|
-
1`. Higher guidance scale encourages to generate images that are closely linked to
|
690
|
-
usually at the expense of lower image quality.
|
686
|
+
Guidance scale as defined in [Classifier-Free Diffusion
|
687
|
+
Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
|
688
|
+
of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
|
689
|
+
`guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
|
690
|
+
the text `prompt`, usually at the expense of lower image quality.
|
691
691
|
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
692
692
|
The number of images to generate per prompt.
|
693
693
|
height (`int`, *optional*, defaults to self.unet.config.sample_size):
|
@@ -695,8 +695,8 @@ class SanaPAGPipeline(DiffusionPipeline, PAGMixin):
|
|
695
695
|
width (`int`, *optional*, defaults to self.unet.config.sample_size):
|
696
696
|
The width in pixels of the generated image.
|
697
697
|
eta (`float`, *optional*, defaults to 0.0):
|
698
|
-
Corresponds to parameter eta (η) in the DDIM paper: https://
|
699
|
-
[`schedulers.DDIMScheduler`], will be ignored for others.
|
698
|
+
Corresponds to parameter eta (η) in the DDIM paper: https://huggingface.co/papers/2010.02502. Only
|
699
|
+
applies to [`schedulers.DDIMScheduler`], will be ignored for others.
|
700
700
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
701
701
|
One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
|
702
702
|
to make generation deterministic.
|
@@ -917,9 +917,15 @@ class SanaPAGPipeline(DiffusionPipeline, PAGMixin):
|
|
917
917
|
image = latents
|
918
918
|
else:
|
919
919
|
latents = latents.to(self.vae.dtype)
|
920
|
+
torch_accelerator_module = getattr(torch, get_device(), torch.cuda)
|
921
|
+
oom_error = (
|
922
|
+
torch.OutOfMemoryError
|
923
|
+
if is_torch_version(">=", "2.5.0")
|
924
|
+
else torch_accelerator_module.OutOfMemoryError
|
925
|
+
)
|
920
926
|
try:
|
921
927
|
image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False)[0]
|
922
|
-
except
|
928
|
+
except oom_error as e:
|
923
929
|
warnings.warn(
|
924
930
|
f"{e}. \n"
|
925
931
|
f"Try to use VAE tiling for large images. For example: \n"
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -72,7 +72,7 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
|
|
72
72
|
r"""
|
73
73
|
Rescales `noise_cfg` tensor based on `guidance_rescale` to improve image quality and fix overexposure. Based on
|
74
74
|
Section 3.4 from [Common Diffusion Noise Schedules and Sample Steps are
|
75
|
-
Flawed](https://
|
75
|
+
Flawed](https://huggingface.co/papers/2305.08891).
|
76
76
|
|
77
77
|
Args:
|
78
78
|
noise_cfg (`torch.Tensor`):
|
@@ -573,7 +573,7 @@ class StableDiffusionPAGPipeline(
|
|
573
573
|
def prepare_extra_step_kwargs(self, generator, eta):
|
574
574
|
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
575
575
|
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
576
|
-
# eta corresponds to η in DDIM paper: https://
|
576
|
+
# eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
|
577
577
|
# and should be between [0, 1]
|
578
578
|
|
579
579
|
accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
|
@@ -724,7 +724,7 @@ class StableDiffusionPAGPipeline(
|
|
724
724
|
return self._clip_skip
|
725
725
|
|
726
726
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
727
|
-
# of the Imagen paper: https://
|
727
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
728
728
|
# corresponds to doing no classifier free guidance.
|
729
729
|
@property
|
730
730
|
def do_classifier_free_guidance(self):
|
@@ -802,8 +802,8 @@ class StableDiffusionPAGPipeline(
|
|
802
802
|
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
803
803
|
The number of images to generate per prompt.
|
804
804
|
eta (`float`, *optional*, defaults to 0.0):
|
805
|
-
Corresponds to parameter eta (η) from the [DDIM](https://
|
806
|
-
to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
|
805
|
+
Corresponds to parameter eta (η) from the [DDIM](https://huggingface.co/papers/2010.02502) paper. Only
|
806
|
+
applies to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
|
807
807
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
808
808
|
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
|
809
809
|
generation deterministic.
|
@@ -833,7 +833,7 @@ class StableDiffusionPAGPipeline(
|
|
833
833
|
[`self.processor`](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
834
834
|
guidance_rescale (`float`, *optional*, defaults to 0.0):
|
835
835
|
Guidance rescale factor from [Common Diffusion Noise Schedules and Sample Steps are
|
836
|
-
Flawed](https://
|
836
|
+
Flawed](https://huggingface.co/papers/2305.08891). Guidance rescale factor should fix overexposure when
|
837
837
|
using zero terminal SNR.
|
838
838
|
clip_skip (`int`, *optional*):
|
839
839
|
Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that
|
@@ -1027,7 +1027,7 @@ class StableDiffusionPAGPipeline(
|
|
1027
1027
|
noise_pred = noise_pred_uncond + self.guidance_scale * (noise_pred_text - noise_pred_uncond)
|
1028
1028
|
|
1029
1029
|
if self.do_classifier_free_guidance and self.guidance_rescale > 0.0:
|
1030
|
-
# Based on 3.4. in https://
|
1030
|
+
# Based on 3.4. in https://huggingface.co/papers/2305.08891
|
1031
1031
|
noise_pred = rescale_noise_cfg(noise_pred, noise_pred_text, guidance_rescale=self.guidance_rescale)
|
1032
1032
|
|
1033
1033
|
# compute the previous noisy sample x_t -> x_t-1
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 Stability AI and The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -663,7 +663,7 @@ class StableDiffusion3PAGPipeline(DiffusionPipeline, SD3LoraLoaderMixin, FromSin
|
|
663
663
|
return self._clip_skip
|
664
664
|
|
665
665
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
666
|
-
# of the Imagen paper: https://
|
666
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
667
667
|
# corresponds to doing no classifier free guidance.
|
668
668
|
@property
|
669
669
|
def do_classifier_free_guidance(self):
|
@@ -738,11 +738,11 @@ class StableDiffusion3PAGPipeline(DiffusionPipeline, SD3LoraLoaderMixin, FromSin
|
|
738
738
|
their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
|
739
739
|
will be used.
|
740
740
|
guidance_scale (`float`, *optional*, defaults to 7.0):
|
741
|
-
Guidance scale as defined in [Classifier-Free Diffusion
|
742
|
-
`guidance_scale` is defined as `w` of equation 2.
|
743
|
-
Paper](https://
|
744
|
-
1`. Higher guidance scale encourages to generate images that are closely linked to
|
745
|
-
usually at the expense of lower image quality.
|
741
|
+
Guidance scale as defined in [Classifier-Free Diffusion
|
742
|
+
Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
|
743
|
+
of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
|
744
|
+
`guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
|
745
|
+
the text `prompt`, usually at the expense of lower image quality.
|
746
746
|
negative_prompt (`str` or `List[str]`, *optional*):
|
747
747
|
The prompt or prompts not to guide the image generation. If not defined, one has to pass
|
748
748
|
`negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 Stability AI and The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -714,7 +714,7 @@ class StableDiffusion3PAGImg2ImgPipeline(DiffusionPipeline, SD3LoraLoaderMixin,
|
|
714
714
|
return self._clip_skip
|
715
715
|
|
716
716
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
717
|
-
# of the Imagen paper: https://
|
717
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
718
718
|
# corresponds to doing no classifier free guidance.
|
719
719
|
@property
|
720
720
|
def do_classifier_free_guidance(self):
|
@@ -799,11 +799,11 @@ class StableDiffusion3PAGImg2ImgPipeline(DiffusionPipeline, SD3LoraLoaderMixin,
|
|
799
799
|
their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
|
800
800
|
will be used.
|
801
801
|
guidance_scale (`float`, *optional*, defaults to 7.0):
|
802
|
-
Guidance scale as defined in [Classifier-Free Diffusion
|
803
|
-
`guidance_scale` is defined as `w` of equation 2.
|
804
|
-
Paper](https://
|
805
|
-
1`. Higher guidance scale encourages to generate images that are closely linked to
|
806
|
-
usually at the expense of lower image quality.
|
802
|
+
Guidance scale as defined in [Classifier-Free Diffusion
|
803
|
+
Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
|
804
|
+
of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
|
805
|
+
`guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
|
806
|
+
the text `prompt`, usually at the expense of lower image quality.
|
807
807
|
negative_prompt (`str` or `List[str]`, *optional*):
|
808
808
|
The prompt or prompts not to guide the image generation. If not defined, one has to pass
|
809
809
|
`negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -438,7 +438,7 @@ class AnimateDiffPAGPipeline(
|
|
438
438
|
def prepare_extra_step_kwargs(self, generator, eta):
|
439
439
|
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
440
440
|
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
441
|
-
# eta corresponds to η in DDIM paper: https://
|
441
|
+
# eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
|
442
442
|
# and should be between [0, 1]
|
443
443
|
|
444
444
|
accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
|
@@ -520,7 +520,7 @@ class AnimateDiffPAGPipeline(
|
|
520
520
|
def prepare_latents(
|
521
521
|
self, batch_size, num_channels_latents, num_frames, height, width, dtype, device, generator, latents=None
|
522
522
|
):
|
523
|
-
# If FreeNoise is enabled, generate latents as described in Equation (7) of [FreeNoise](https://
|
523
|
+
# If FreeNoise is enabled, generate latents as described in Equation (7) of [FreeNoise](https://huggingface.co/papers/2310.15169)
|
524
524
|
if self.free_noise_enabled:
|
525
525
|
latents = self._prepare_latents_free_noise(
|
526
526
|
batch_size, num_channels_latents, num_frames, height, width, dtype, device, generator, latents
|
@@ -558,7 +558,7 @@ class AnimateDiffPAGPipeline(
|
|
558
558
|
return self._clip_skip
|
559
559
|
|
560
560
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
561
|
-
# of the Imagen paper: https://
|
561
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
562
562
|
# corresponds to doing no classifier free guidance.
|
563
563
|
@property
|
564
564
|
def do_classifier_free_guidance(self):
|
@@ -624,8 +624,8 @@ class AnimateDiffPAGPipeline(
|
|
624
624
|
The prompt or prompts to guide what to not include in image generation. If not defined, you need to
|
625
625
|
pass `negative_prompt_embeds` instead. Ignored when not using guidance (`guidance_scale < 1`).
|
626
626
|
eta (`float`, *optional*, defaults to 0.0):
|
627
|
-
Corresponds to parameter eta (η) from the [DDIM](https://
|
628
|
-
to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
|
627
|
+
Corresponds to parameter eta (η) from the [DDIM](https://huggingface.co/papers/2010.02502) paper. Only
|
628
|
+
applies to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
|
629
629
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
630
630
|
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
|
631
631
|
generation deterministic.
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -568,7 +568,7 @@ class StableDiffusionPAGImg2ImgPipeline(
|
|
568
568
|
def prepare_extra_step_kwargs(self, generator, eta):
|
569
569
|
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
570
570
|
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
571
|
-
# eta corresponds to η in DDIM paper: https://
|
571
|
+
# eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
|
572
572
|
# and should be between [0, 1]
|
573
573
|
|
574
574
|
accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
|
@@ -761,7 +761,7 @@ class StableDiffusionPAGImg2ImgPipeline(
|
|
761
761
|
return self._clip_skip
|
762
762
|
|
763
763
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
764
|
-
# of the Imagen paper: https://
|
764
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
765
765
|
# corresponds to doing no classifier free guidance.
|
766
766
|
@property
|
767
767
|
def do_classifier_free_guidance(self):
|
@@ -847,8 +847,8 @@ class StableDiffusionPAGImg2ImgPipeline(
|
|
847
847
|
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
848
848
|
The number of images to generate per prompt.
|
849
849
|
eta (`float`, *optional*, defaults to 0.0):
|
850
|
-
Corresponds to parameter eta (η) from the [DDIM](https://
|
851
|
-
to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
|
850
|
+
Corresponds to parameter eta (η) from the [DDIM](https://huggingface.co/papers/2010.02502) paper. Only
|
851
|
+
applies to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
|
852
852
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
853
853
|
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
|
854
854
|
generation deterministic.
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -99,7 +99,7 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
|
|
99
99
|
r"""
|
100
100
|
Rescales `noise_cfg` tensor based on `guidance_rescale` to improve image quality and fix overexposure. Based on
|
101
101
|
Section 3.4 from [Common Diffusion Noise Schedules and Sample Steps are
|
102
|
-
Flawed](https://
|
102
|
+
Flawed](https://huggingface.co/papers/2305.08891).
|
103
103
|
|
104
104
|
Args:
|
105
105
|
noise_cfg (`torch.Tensor`):
|
@@ -603,7 +603,7 @@ class StableDiffusionPAGInpaintPipeline(
|
|
603
603
|
def prepare_extra_step_kwargs(self, generator, eta):
|
604
604
|
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
605
605
|
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
606
|
-
# eta corresponds to η in DDIM paper: https://
|
606
|
+
# eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
|
607
607
|
# and should be between [0, 1]
|
608
608
|
|
609
609
|
accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
|
@@ -889,7 +889,7 @@ class StableDiffusionPAGInpaintPipeline(
|
|
889
889
|
return self._clip_skip
|
890
890
|
|
891
891
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
892
|
-
# of the Imagen paper: https://
|
892
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
893
893
|
# corresponds to doing no classifier free guidance.
|
894
894
|
@property
|
895
895
|
def do_classifier_free_guidance(self):
|
@@ -972,8 +972,8 @@ class StableDiffusionPAGInpaintPipeline(
|
|
972
972
|
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
973
973
|
The number of images to generate per prompt.
|
974
974
|
eta (`float`, *optional*, defaults to 0.0):
|
975
|
-
Corresponds to parameter eta (η) from the [DDIM](https://
|
976
|
-
to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
|
975
|
+
Corresponds to parameter eta (η) from the [DDIM](https://huggingface.co/papers/2010.02502) paper. Only
|
976
|
+
applies to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
|
977
977
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
978
978
|
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
|
979
979
|
generation deterministic.
|
@@ -1003,7 +1003,7 @@ class StableDiffusionPAGInpaintPipeline(
|
|
1003
1003
|
[`self.processor`](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
1004
1004
|
guidance_rescale (`float`, *optional*, defaults to 0.0):
|
1005
1005
|
Guidance rescale factor from [Common Diffusion Noise Schedules and Sample Steps are
|
1006
|
-
Flawed](https://
|
1006
|
+
Flawed](https://huggingface.co/papers/2305.08891). Guidance rescale factor should fix overexposure when
|
1007
1007
|
using zero terminal SNR.
|
1008
1008
|
clip_skip (`int`, *optional*):
|
1009
1009
|
Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that
|
@@ -1294,7 +1294,7 @@ class StableDiffusionPAGInpaintPipeline(
|
|
1294
1294
|
noise_pred = noise_pred_uncond + self.guidance_scale * (noise_pred_text - noise_pred_uncond)
|
1295
1295
|
|
1296
1296
|
if self.do_classifier_free_guidance and self.guidance_rescale > 0.0:
|
1297
|
-
# Based on 3.4. in https://
|
1297
|
+
# Based on 3.4. in https://huggingface.co/papers/2305.08891
|
1298
1298
|
noise_pred = rescale_noise_cfg(noise_pred, noise_pred_text, guidance_rescale=self.guidance_rescale)
|
1299
1299
|
|
1300
1300
|
# compute the previous noisy sample x_t -> x_t-1
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -91,7 +91,7 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
|
|
91
91
|
r"""
|
92
92
|
Rescales `noise_cfg` tensor based on `guidance_rescale` to improve image quality and fix overexposure. Based on
|
93
93
|
Section 3.4 from [Common Diffusion Noise Schedules and Sample Steps are
|
94
|
-
Flawed](https://
|
94
|
+
Flawed](https://huggingface.co/papers/2305.08891).
|
95
95
|
|
96
96
|
Args:
|
97
97
|
noise_cfg (`torch.Tensor`):
|
@@ -607,7 +607,7 @@ class StableDiffusionXLPAGPipeline(
|
|
607
607
|
def prepare_extra_step_kwargs(self, generator, eta):
|
608
608
|
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
609
609
|
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
610
|
-
# eta corresponds to η in DDIM paper: https://
|
610
|
+
# eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
|
611
611
|
# and should be between [0, 1]
|
612
612
|
|
613
613
|
accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
|
@@ -823,7 +823,7 @@ class StableDiffusionXLPAGPipeline(
|
|
823
823
|
return self._clip_skip
|
824
824
|
|
825
825
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
826
|
-
# of the Imagen paper: https://
|
826
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
827
827
|
# corresponds to doing no classifier free guidance.
|
828
828
|
@property
|
829
829
|
def do_classifier_free_guidance(self):
|
@@ -925,11 +925,11 @@ class StableDiffusionXLPAGPipeline(
|
|
925
925
|
"Mixture of Denoisers" multi-pipeline setup, as elaborated in [**Refining the Image
|
926
926
|
Output**](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/stable_diffusion_xl#refining-the-image-output)
|
927
927
|
guidance_scale (`float`, *optional*, defaults to 5.0):
|
928
|
-
Guidance scale as defined in [Classifier-Free Diffusion
|
929
|
-
`guidance_scale` is defined as `w` of equation 2.
|
930
|
-
Paper](https://
|
931
|
-
1`. Higher guidance scale encourages to generate images that are closely linked to
|
932
|
-
usually at the expense of lower image quality.
|
928
|
+
Guidance scale as defined in [Classifier-Free Diffusion
|
929
|
+
Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
|
930
|
+
of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
|
931
|
+
`guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
|
932
|
+
the text `prompt`, usually at the expense of lower image quality.
|
933
933
|
negative_prompt (`str` or `List[str]`, *optional*):
|
934
934
|
The prompt or prompts not to guide the image generation. If not defined, one has to pass
|
935
935
|
`negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
|
@@ -940,8 +940,8 @@ class StableDiffusionXLPAGPipeline(
|
|
940
940
|
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
941
941
|
The number of images to generate per prompt.
|
942
942
|
eta (`float`, *optional*, defaults to 0.0):
|
943
|
-
Corresponds to parameter eta (η) in the DDIM paper: https://
|
944
|
-
[`schedulers.DDIMScheduler`], will be ignored for others.
|
943
|
+
Corresponds to parameter eta (η) in the DDIM paper: https://huggingface.co/papers/2010.02502. Only
|
944
|
+
applies to [`schedulers.DDIMScheduler`], will be ignored for others.
|
945
945
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
946
946
|
One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
|
947
947
|
to make generation deterministic.
|
@@ -981,9 +981,10 @@ class StableDiffusionXLPAGPipeline(
|
|
981
981
|
[diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
982
982
|
guidance_rescale (`float`, *optional*, defaults to 0.0):
|
983
983
|
Guidance rescale factor proposed by [Common Diffusion Noise Schedules and Sample Steps are
|
984
|
-
Flawed](https://
|
985
|
-
[Common Diffusion Noise Schedules and Sample Steps are
|
986
|
-
Guidance rescale factor should fix overexposure when
|
984
|
+
Flawed](https://huggingface.co/papers/2305.08891) `guidance_scale` is defined as `φ` in equation 16. of
|
985
|
+
[Common Diffusion Noise Schedules and Sample Steps are
|
986
|
+
Flawed](https://huggingface.co/papers/2305.08891). Guidance rescale factor should fix overexposure when
|
987
|
+
using zero terminal SNR.
|
987
988
|
original_size (`Tuple[int]`, *optional*, defaults to (1024, 1024)):
|
988
989
|
If `original_size` is not the same as `target_size` the image will appear to be down- or upsampled.
|
989
990
|
`original_size` defaults to `(height, width)` if not specified. Part of SDXL's micro-conditioning as
|
@@ -1266,7 +1267,7 @@ class StableDiffusionXLPAGPipeline(
|
|
1266
1267
|
noise_pred = noise_pred_uncond + self.guidance_scale * (noise_pred_text - noise_pred_uncond)
|
1267
1268
|
|
1268
1269
|
if self.do_classifier_free_guidance and self.guidance_rescale > 0.0:
|
1269
|
-
# Based on 3.4. in https://
|
1270
|
+
# Based on 3.4. in https://huggingface.co/papers/2305.08891
|
1270
1271
|
noise_pred = rescale_noise_cfg(noise_pred, noise_pred_text, guidance_rescale=self.guidance_rescale)
|
1271
1272
|
|
1272
1273
|
# compute the previous noisy sample x_t -> x_t-1
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -49,7 +49,7 @@ from ...utils import (
|
|
49
49
|
scale_lora_layers,
|
50
50
|
unscale_lora_layers,
|
51
51
|
)
|
52
|
-
from ...utils.torch_utils import randn_tensor
|
52
|
+
from ...utils.torch_utils import empty_device_cache, randn_tensor
|
53
53
|
from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
54
54
|
from ..stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput
|
55
55
|
from .pag_utils import PAGMixin
|
@@ -95,7 +95,7 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
|
|
95
95
|
r"""
|
96
96
|
Rescales `noise_cfg` tensor based on `guidance_rescale` to improve image quality and fix overexposure. Based on
|
97
97
|
Section 3.4 from [Common Diffusion Noise Schedules and Sample Steps are
|
98
|
-
Flawed](https://
|
98
|
+
Flawed](https://huggingface.co/papers/2305.08891).
|
99
99
|
|
100
100
|
Args:
|
101
101
|
noise_cfg (`torch.Tensor`):
|
@@ -553,7 +553,7 @@ class StableDiffusionXLPAGImg2ImgPipeline(
|
|
553
553
|
def prepare_extra_step_kwargs(self, generator, eta):
|
554
554
|
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
555
555
|
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
556
|
-
# eta corresponds to η in DDIM paper: https://
|
556
|
+
# eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
|
557
557
|
# and should be between [0, 1]
|
558
558
|
|
559
559
|
accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
|
@@ -716,7 +716,7 @@ class StableDiffusionXLPAGImg2ImgPipeline(
|
|
716
716
|
# Offload text encoder if `enable_model_cpu_offload` was enabled
|
717
717
|
if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None:
|
718
718
|
self.text_encoder_2.to("cpu")
|
719
|
-
|
719
|
+
empty_device_cache()
|
720
720
|
|
721
721
|
image = image.to(device=device, dtype=dtype)
|
722
722
|
|
@@ -970,7 +970,7 @@ class StableDiffusionXLPAGImg2ImgPipeline(
|
|
970
970
|
return self._clip_skip
|
971
971
|
|
972
972
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
973
|
-
# of the Imagen paper: https://
|
973
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
974
974
|
# corresponds to doing no classifier free guidance.
|
975
975
|
@property
|
976
976
|
def do_classifier_free_guidance(self):
|
@@ -1088,11 +1088,11 @@ class StableDiffusionXLPAGImg2ImgPipeline(
|
|
1088
1088
|
forms a part of a "Mixture of Denoisers" multi-pipeline setup, as elaborated in [**Refine Image
|
1089
1089
|
Quality**](https://huggingface.co/docs/diffusers/using-diffusers/sdxl#refine-image-quality).
|
1090
1090
|
guidance_scale (`float`, *optional*, defaults to 7.5):
|
1091
|
-
Guidance scale as defined in [Classifier-Free Diffusion
|
1092
|
-
`guidance_scale` is defined as `w` of equation 2.
|
1093
|
-
Paper](https://
|
1094
|
-
1`. Higher guidance scale encourages to generate images that are closely linked to
|
1095
|
-
usually at the expense of lower image quality.
|
1091
|
+
Guidance scale as defined in [Classifier-Free Diffusion
|
1092
|
+
Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
|
1093
|
+
of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
|
1094
|
+
`guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
|
1095
|
+
the text `prompt`, usually at the expense of lower image quality.
|
1096
1096
|
negative_prompt (`str` or `List[str]`, *optional*):
|
1097
1097
|
The prompt or prompts not to guide the image generation. If not defined, one has to pass
|
1098
1098
|
`negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
|
@@ -1103,8 +1103,8 @@ class StableDiffusionXLPAGImg2ImgPipeline(
|
|
1103
1103
|
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
1104
1104
|
The number of images to generate per prompt.
|
1105
1105
|
eta (`float`, *optional*, defaults to 0.0):
|
1106
|
-
Corresponds to parameter eta (η) in the DDIM paper: https://
|
1107
|
-
[`schedulers.DDIMScheduler`], will be ignored for others.
|
1106
|
+
Corresponds to parameter eta (η) in the DDIM paper: https://huggingface.co/papers/2010.02502. Only
|
1107
|
+
applies to [`schedulers.DDIMScheduler`], will be ignored for others.
|
1108
1108
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
1109
1109
|
One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
|
1110
1110
|
to make generation deterministic.
|
@@ -1144,9 +1144,10 @@ class StableDiffusionXLPAGImg2ImgPipeline(
|
|
1144
1144
|
[diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
1145
1145
|
guidance_rescale (`float`, *optional*, defaults to 0.0):
|
1146
1146
|
Guidance rescale factor proposed by [Common Diffusion Noise Schedules and Sample Steps are
|
1147
|
-
Flawed](https://
|
1148
|
-
[Common Diffusion Noise Schedules and Sample Steps are
|
1149
|
-
Guidance rescale factor should fix overexposure when
|
1147
|
+
Flawed](https://huggingface.co/papers/2305.08891) `guidance_scale` is defined as `φ` in equation 16. of
|
1148
|
+
[Common Diffusion Noise Schedules and Sample Steps are
|
1149
|
+
Flawed](https://huggingface.co/papers/2305.08891). Guidance rescale factor should fix overexposure when
|
1150
|
+
using zero terminal SNR.
|
1150
1151
|
original_size (`Tuple[int]`, *optional*, defaults to (1024, 1024)):
|
1151
1152
|
If `original_size` is not the same as `target_size` the image will appear to be down- or upsampled.
|
1152
1153
|
`original_size` defaults to `(height, width)` if not specified. Part of SDXL's micro-conditioning as
|
@@ -1461,7 +1462,7 @@ class StableDiffusionXLPAGImg2ImgPipeline(
|
|
1461
1462
|
noise_pred = noise_pred_uncond + self.guidance_scale * (noise_pred_text - noise_pred_uncond)
|
1462
1463
|
|
1463
1464
|
if self.do_classifier_free_guidance and self.guidance_rescale > 0.0:
|
1464
|
-
# Based on 3.4. in https://
|
1465
|
+
# Based on 3.4. in https://huggingface.co/papers/2305.08891
|
1465
1466
|
noise_pred = rescale_noise_cfg(noise_pred, noise_pred_text, guidance_rescale=self.guidance_rescale)
|
1466
1467
|
|
1467
1468
|
# compute the previous noisy sample x_t -> x_t-1
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -108,7 +108,7 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
|
|
108
108
|
r"""
|
109
109
|
Rescales `noise_cfg` tensor based on `guidance_rescale` to improve image quality and fix overexposure. Based on
|
110
110
|
Section 3.4 from [Common Diffusion Noise Schedules and Sample Steps are
|
111
|
-
Flawed](https://
|
111
|
+
Flawed](https://huggingface.co/papers/2305.08891).
|
112
112
|
|
113
113
|
Args:
|
114
114
|
noise_cfg (`torch.Tensor`):
|
@@ -643,7 +643,7 @@ class StableDiffusionXLPAGInpaintPipeline(
|
|
643
643
|
def prepare_extra_step_kwargs(self, generator, eta):
|
644
644
|
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
645
645
|
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
646
|
-
# eta corresponds to η in DDIM paper: https://
|
646
|
+
# eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
|
647
647
|
# and should be between [0, 1]
|
648
648
|
|
649
649
|
accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
|
@@ -1061,7 +1061,7 @@ class StableDiffusionXLPAGInpaintPipeline(
|
|
1061
1061
|
return self._clip_skip
|
1062
1062
|
|
1063
1063
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
1064
|
-
# of the Imagen paper: https://
|
1064
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
1065
1065
|
# corresponds to doing no classifier free guidance.
|
1066
1066
|
@property
|
1067
1067
|
def do_classifier_free_guidance(self):
|
@@ -1208,11 +1208,11 @@ class StableDiffusionXLPAGInpaintPipeline(
|
|
1208
1208
|
forms a part of a "Mixture of Denoisers" multi-pipeline setup, as elaborated in [**Refining the Image
|
1209
1209
|
Output**](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/stable_diffusion_xl#refining-the-image-output).
|
1210
1210
|
guidance_scale (`float`, *optional*, defaults to 7.5):
|
1211
|
-
Guidance scale as defined in [Classifier-Free Diffusion
|
1212
|
-
`guidance_scale` is defined as `w` of equation 2.
|
1213
|
-
Paper](https://
|
1214
|
-
1`. Higher guidance scale encourages to generate images that are closely linked to
|
1215
|
-
usually at the expense of lower image quality.
|
1211
|
+
Guidance scale as defined in [Classifier-Free Diffusion
|
1212
|
+
Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
|
1213
|
+
of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
|
1214
|
+
`guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
|
1215
|
+
the text `prompt`, usually at the expense of lower image quality.
|
1216
1216
|
negative_prompt (`str` or `List[str]`, *optional*):
|
1217
1217
|
The prompt or prompts not to guide the image generation. If not defined, one has to pass
|
1218
1218
|
`negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
|
@@ -1243,8 +1243,8 @@ class StableDiffusionXLPAGInpaintPipeline(
|
|
1243
1243
|
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
1244
1244
|
The number of images to generate per prompt.
|
1245
1245
|
eta (`float`, *optional*, defaults to 0.0):
|
1246
|
-
Corresponds to parameter eta (η) in the DDIM paper: https://
|
1247
|
-
[`schedulers.DDIMScheduler`], will be ignored for others.
|
1246
|
+
Corresponds to parameter eta (η) in the DDIM paper: https://huggingface.co/papers/2010.02502. Only
|
1247
|
+
applies to [`schedulers.DDIMScheduler`], will be ignored for others.
|
1248
1248
|
generator (`torch.Generator`, *optional*):
|
1249
1249
|
One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
|
1250
1250
|
to make generation deterministic.
|
@@ -1673,7 +1673,7 @@ class StableDiffusionXLPAGInpaintPipeline(
|
|
1673
1673
|
noise_pred = noise_pred_uncond + self.guidance_scale * (noise_pred_text - noise_pred_uncond)
|
1674
1674
|
|
1675
1675
|
if self.do_classifier_free_guidance and self.guidance_rescale > 0.0:
|
1676
|
-
# Based on 3.4. in https://
|
1676
|
+
# Based on 3.4. in https://huggingface.co/papers/2305.08891
|
1677
1677
|
noise_pred = rescale_noise_cfg(noise_pred, noise_pred_text, guidance_rescale=self.guidance_rescale)
|
1678
1678
|
|
1679
1679
|
# compute the previous noisy sample x_t -> x_t-1
|