diffusers 0.33.1__py3-none-any.whl → 0.34.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diffusers/__init__.py +48 -1
- diffusers/commands/__init__.py +1 -1
- diffusers/commands/diffusers_cli.py +1 -1
- diffusers/commands/env.py +1 -1
- diffusers/commands/fp16_safetensors.py +1 -1
- diffusers/dependency_versions_check.py +1 -1
- diffusers/dependency_versions_table.py +1 -1
- diffusers/experimental/rl/value_guided_sampling.py +1 -1
- diffusers/hooks/faster_cache.py +2 -2
- diffusers/hooks/group_offloading.py +128 -29
- diffusers/hooks/hooks.py +2 -2
- diffusers/hooks/layerwise_casting.py +3 -3
- diffusers/hooks/pyramid_attention_broadcast.py +1 -1
- diffusers/image_processor.py +7 -2
- diffusers/loaders/__init__.py +4 -0
- diffusers/loaders/ip_adapter.py +5 -14
- diffusers/loaders/lora_base.py +212 -111
- diffusers/loaders/lora_conversion_utils.py +275 -34
- diffusers/loaders/lora_pipeline.py +1554 -819
- diffusers/loaders/peft.py +52 -109
- diffusers/loaders/single_file.py +2 -2
- diffusers/loaders/single_file_model.py +20 -4
- diffusers/loaders/single_file_utils.py +225 -5
- diffusers/loaders/textual_inversion.py +3 -2
- diffusers/loaders/transformer_flux.py +1 -1
- diffusers/loaders/transformer_sd3.py +2 -2
- diffusers/loaders/unet.py +2 -16
- diffusers/loaders/unet_loader_utils.py +1 -1
- diffusers/loaders/utils.py +1 -1
- diffusers/models/__init__.py +15 -1
- diffusers/models/activations.py +5 -5
- diffusers/models/adapter.py +2 -3
- diffusers/models/attention.py +4 -4
- diffusers/models/attention_flax.py +10 -10
- diffusers/models/attention_processor.py +14 -10
- diffusers/models/auto_model.py +47 -10
- diffusers/models/autoencoders/__init__.py +1 -0
- diffusers/models/autoencoders/autoencoder_asym_kl.py +4 -4
- diffusers/models/autoencoders/autoencoder_dc.py +3 -3
- diffusers/models/autoencoders/autoencoder_kl.py +4 -4
- diffusers/models/autoencoders/autoencoder_kl_allegro.py +4 -4
- diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +6 -6
- diffusers/models/autoencoders/autoencoder_kl_cosmos.py +1108 -0
- diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py +2 -2
- diffusers/models/autoencoders/autoencoder_kl_ltx.py +3 -3
- diffusers/models/autoencoders/autoencoder_kl_magvit.py +4 -4
- diffusers/models/autoencoders/autoencoder_kl_mochi.py +3 -3
- diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +4 -4
- diffusers/models/autoencoders/autoencoder_kl_wan.py +256 -22
- diffusers/models/autoencoders/autoencoder_oobleck.py +1 -1
- diffusers/models/autoencoders/autoencoder_tiny.py +3 -3
- diffusers/models/autoencoders/consistency_decoder_vae.py +1 -1
- diffusers/models/autoencoders/vae.py +13 -2
- diffusers/models/autoencoders/vq_model.py +2 -2
- diffusers/models/cache_utils.py +1 -1
- diffusers/models/controlnet.py +1 -1
- diffusers/models/controlnet_flux.py +1 -1
- diffusers/models/controlnet_sd3.py +1 -1
- diffusers/models/controlnet_sparsectrl.py +1 -1
- diffusers/models/controlnets/__init__.py +1 -0
- diffusers/models/controlnets/controlnet.py +3 -3
- diffusers/models/controlnets/controlnet_flax.py +1 -1
- diffusers/models/controlnets/controlnet_flux.py +16 -15
- diffusers/models/controlnets/controlnet_hunyuan.py +2 -2
- diffusers/models/controlnets/controlnet_sana.py +290 -0
- diffusers/models/controlnets/controlnet_sd3.py +1 -1
- diffusers/models/controlnets/controlnet_sparsectrl.py +2 -2
- diffusers/models/controlnets/controlnet_union.py +1 -1
- diffusers/models/controlnets/controlnet_xs.py +7 -7
- diffusers/models/controlnets/multicontrolnet.py +4 -5
- diffusers/models/controlnets/multicontrolnet_union.py +5 -6
- diffusers/models/downsampling.py +2 -2
- diffusers/models/embeddings.py +10 -12
- diffusers/models/embeddings_flax.py +2 -2
- diffusers/models/lora.py +3 -3
- diffusers/models/modeling_utils.py +44 -14
- diffusers/models/normalization.py +4 -4
- diffusers/models/resnet.py +2 -2
- diffusers/models/resnet_flax.py +1 -1
- diffusers/models/transformers/__init__.py +5 -0
- diffusers/models/transformers/auraflow_transformer_2d.py +70 -24
- diffusers/models/transformers/cogvideox_transformer_3d.py +1 -1
- diffusers/models/transformers/consisid_transformer_3d.py +1 -1
- diffusers/models/transformers/dit_transformer_2d.py +2 -2
- diffusers/models/transformers/dual_transformer_2d.py +1 -1
- diffusers/models/transformers/hunyuan_transformer_2d.py +2 -2
- diffusers/models/transformers/latte_transformer_3d.py +4 -5
- diffusers/models/transformers/lumina_nextdit2d.py +2 -2
- diffusers/models/transformers/pixart_transformer_2d.py +3 -3
- diffusers/models/transformers/prior_transformer.py +1 -1
- diffusers/models/transformers/sana_transformer.py +8 -3
- diffusers/models/transformers/stable_audio_transformer.py +5 -9
- diffusers/models/transformers/t5_film_transformer.py +3 -3
- diffusers/models/transformers/transformer_2d.py +1 -1
- diffusers/models/transformers/transformer_allegro.py +1 -1
- diffusers/models/transformers/transformer_chroma.py +742 -0
- diffusers/models/transformers/transformer_cogview3plus.py +5 -10
- diffusers/models/transformers/transformer_cogview4.py +317 -25
- diffusers/models/transformers/transformer_cosmos.py +579 -0
- diffusers/models/transformers/transformer_flux.py +9 -11
- diffusers/models/transformers/transformer_hidream_image.py +942 -0
- diffusers/models/transformers/transformer_hunyuan_video.py +6 -8
- diffusers/models/transformers/transformer_hunyuan_video_framepack.py +416 -0
- diffusers/models/transformers/transformer_ltx.py +2 -2
- diffusers/models/transformers/transformer_lumina2.py +1 -1
- diffusers/models/transformers/transformer_mochi.py +1 -1
- diffusers/models/transformers/transformer_omnigen.py +2 -2
- diffusers/models/transformers/transformer_sd3.py +7 -7
- diffusers/models/transformers/transformer_temporal.py +1 -1
- diffusers/models/transformers/transformer_wan.py +24 -8
- diffusers/models/transformers/transformer_wan_vace.py +393 -0
- diffusers/models/unets/unet_1d.py +1 -1
- diffusers/models/unets/unet_1d_blocks.py +1 -1
- diffusers/models/unets/unet_2d.py +1 -1
- diffusers/models/unets/unet_2d_blocks.py +1 -1
- diffusers/models/unets/unet_2d_blocks_flax.py +8 -7
- diffusers/models/unets/unet_2d_condition.py +2 -2
- diffusers/models/unets/unet_2d_condition_flax.py +2 -2
- diffusers/models/unets/unet_3d_blocks.py +1 -1
- diffusers/models/unets/unet_3d_condition.py +3 -3
- diffusers/models/unets/unet_i2vgen_xl.py +3 -3
- diffusers/models/unets/unet_kandinsky3.py +1 -1
- diffusers/models/unets/unet_motion_model.py +2 -2
- diffusers/models/unets/unet_stable_cascade.py +1 -1
- diffusers/models/upsampling.py +2 -2
- diffusers/models/vae_flax.py +2 -2
- diffusers/models/vq_model.py +1 -1
- diffusers/pipelines/__init__.py +37 -6
- diffusers/pipelines/allegro/pipeline_allegro.py +11 -11
- diffusers/pipelines/amused/pipeline_amused.py +7 -6
- diffusers/pipelines/amused/pipeline_amused_img2img.py +6 -5
- diffusers/pipelines/amused/pipeline_amused_inpaint.py +6 -5
- diffusers/pipelines/animatediff/pipeline_animatediff.py +6 -6
- diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +6 -6
- diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +16 -15
- diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +6 -6
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +5 -5
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +5 -5
- diffusers/pipelines/audioldm/pipeline_audioldm.py +8 -7
- diffusers/pipelines/audioldm2/modeling_audioldm2.py +1 -1
- diffusers/pipelines/audioldm2/pipeline_audioldm2.py +23 -13
- diffusers/pipelines/aura_flow/pipeline_aura_flow.py +48 -11
- diffusers/pipelines/auto_pipeline.py +6 -7
- diffusers/pipelines/blip_diffusion/modeling_blip2.py +1 -1
- diffusers/pipelines/blip_diffusion/modeling_ctx_clip.py +2 -2
- diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +11 -10
- diffusers/pipelines/chroma/__init__.py +49 -0
- diffusers/pipelines/chroma/pipeline_chroma.py +949 -0
- diffusers/pipelines/chroma/pipeline_chroma_img2img.py +1034 -0
- diffusers/pipelines/chroma/pipeline_output.py +21 -0
- diffusers/pipelines/cogvideo/pipeline_cogvideox.py +8 -8
- diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +8 -8
- diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +8 -8
- diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +8 -8
- diffusers/pipelines/cogview3/pipeline_cogview3plus.py +9 -9
- diffusers/pipelines/cogview4/pipeline_cogview4.py +7 -7
- diffusers/pipelines/cogview4/pipeline_cogview4_control.py +7 -7
- diffusers/pipelines/consisid/consisid_utils.py +2 -2
- diffusers/pipelines/consisid/pipeline_consisid.py +8 -8
- diffusers/pipelines/consistency_models/pipeline_consistency_models.py +1 -1
- diffusers/pipelines/controlnet/pipeline_controlnet.py +7 -7
- diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +8 -8
- diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +7 -7
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +7 -7
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +14 -14
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +10 -6
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +13 -13
- diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +14 -14
- diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +5 -5
- diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +13 -13
- diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +1 -1
- diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +8 -8
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +7 -7
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +7 -7
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +12 -10
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +9 -7
- diffusers/pipelines/cosmos/__init__.py +54 -0
- diffusers/pipelines/cosmos/pipeline_cosmos2_text2image.py +673 -0
- diffusers/pipelines/cosmos/pipeline_cosmos2_video2world.py +792 -0
- diffusers/pipelines/cosmos/pipeline_cosmos_text2world.py +664 -0
- diffusers/pipelines/cosmos/pipeline_cosmos_video2world.py +826 -0
- diffusers/pipelines/cosmos/pipeline_output.py +40 -0
- diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py +5 -4
- diffusers/pipelines/ddim/pipeline_ddim.py +4 -4
- diffusers/pipelines/ddpm/pipeline_ddpm.py +1 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +10 -10
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +8 -8
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +5 -5
- diffusers/pipelines/deprecated/audio_diffusion/mel.py +1 -1
- diffusers/pipelines/deprecated/audio_diffusion/pipeline_audio_diffusion.py +3 -3
- diffusers/pipelines/deprecated/latent_diffusion_uncond/pipeline_latent_diffusion_uncond.py +1 -1
- diffusers/pipelines/deprecated/pndm/pipeline_pndm.py +2 -2
- diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +4 -3
- diffusers/pipelines/deprecated/score_sde_ve/pipeline_score_sde_ve.py +1 -1
- diffusers/pipelines/deprecated/spectrogram_diffusion/continuous_encoder.py +1 -1
- diffusers/pipelines/deprecated/spectrogram_diffusion/midi_utils.py +1 -1
- diffusers/pipelines/deprecated/spectrogram_diffusion/notes_encoder.py +1 -1
- diffusers/pipelines/deprecated/spectrogram_diffusion/pipeline_spectrogram_diffusion.py +1 -1
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +7 -7
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_onnx_stable_diffusion_inpaint_legacy.py +9 -9
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +10 -10
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +10 -8
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +5 -5
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +18 -18
- diffusers/pipelines/deprecated/stochastic_karras_ve/pipeline_stochastic_karras_ve.py +1 -1
- diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +2 -2
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +6 -6
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +5 -5
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +5 -5
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +5 -5
- diffusers/pipelines/deprecated/vq_diffusion/pipeline_vq_diffusion.py +1 -1
- diffusers/pipelines/dit/pipeline_dit.py +1 -1
- diffusers/pipelines/easyanimate/pipeline_easyanimate.py +4 -4
- diffusers/pipelines/easyanimate/pipeline_easyanimate_control.py +4 -4
- diffusers/pipelines/easyanimate/pipeline_easyanimate_inpaint.py +7 -6
- diffusers/pipelines/flux/modeling_flux.py +1 -1
- diffusers/pipelines/flux/pipeline_flux.py +10 -17
- diffusers/pipelines/flux/pipeline_flux_control.py +6 -6
- diffusers/pipelines/flux/pipeline_flux_control_img2img.py +6 -6
- diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +6 -6
- diffusers/pipelines/flux/pipeline_flux_controlnet.py +6 -6
- diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +30 -22
- diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +2 -1
- diffusers/pipelines/flux/pipeline_flux_fill.py +6 -6
- diffusers/pipelines/flux/pipeline_flux_img2img.py +39 -6
- diffusers/pipelines/flux/pipeline_flux_inpaint.py +11 -6
- diffusers/pipelines/flux/pipeline_flux_prior_redux.py +1 -1
- diffusers/pipelines/free_init_utils.py +2 -2
- diffusers/pipelines/free_noise_utils.py +3 -3
- diffusers/pipelines/hidream_image/__init__.py +47 -0
- diffusers/pipelines/hidream_image/pipeline_hidream_image.py +1026 -0
- diffusers/pipelines/hidream_image/pipeline_output.py +35 -0
- diffusers/pipelines/hunyuan_video/__init__.py +2 -0
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_skyreels_image2video.py +8 -8
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +8 -8
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_framepack.py +1114 -0
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_image2video.py +71 -15
- diffusers/pipelines/hunyuan_video/pipeline_output.py +19 -0
- diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +8 -8
- diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +10 -8
- diffusers/pipelines/kandinsky/pipeline_kandinsky.py +6 -6
- diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +34 -34
- diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +19 -26
- diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +7 -7
- diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +11 -11
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +6 -6
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +35 -35
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +6 -6
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +17 -39
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +17 -45
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +7 -7
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +10 -10
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +10 -10
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +7 -7
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +17 -38
- diffusers/pipelines/kolors/pipeline_kolors.py +10 -10
- diffusers/pipelines/kolors/pipeline_kolors_img2img.py +12 -12
- diffusers/pipelines/kolors/text_encoder.py +3 -3
- diffusers/pipelines/kolors/tokenizer.py +1 -1
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +2 -2
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +2 -2
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +1 -1
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion_superresolution.py +3 -3
- diffusers/pipelines/latte/pipeline_latte.py +12 -12
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +13 -13
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +17 -16
- diffusers/pipelines/ltx/__init__.py +4 -0
- diffusers/pipelines/ltx/modeling_latent_upsampler.py +188 -0
- diffusers/pipelines/ltx/pipeline_ltx.py +51 -6
- diffusers/pipelines/ltx/pipeline_ltx_condition.py +107 -29
- diffusers/pipelines/ltx/pipeline_ltx_image2video.py +50 -6
- diffusers/pipelines/ltx/pipeline_ltx_latent_upsample.py +277 -0
- diffusers/pipelines/lumina/pipeline_lumina.py +13 -13
- diffusers/pipelines/lumina2/pipeline_lumina2.py +10 -10
- diffusers/pipelines/marigold/marigold_image_processing.py +2 -2
- diffusers/pipelines/mochi/pipeline_mochi.py +6 -6
- diffusers/pipelines/musicldm/pipeline_musicldm.py +16 -13
- diffusers/pipelines/omnigen/pipeline_omnigen.py +13 -11
- diffusers/pipelines/omnigen/processor_omnigen.py +8 -3
- diffusers/pipelines/onnx_utils.py +15 -2
- diffusers/pipelines/pag/pag_utils.py +2 -2
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +12 -8
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +7 -7
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +10 -6
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +14 -14
- diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +8 -8
- diffusers/pipelines/pag/pipeline_pag_kolors.py +10 -10
- diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +11 -11
- diffusers/pipelines/pag/pipeline_pag_sana.py +18 -12
- diffusers/pipelines/pag/pipeline_pag_sd.py +8 -8
- diffusers/pipelines/pag/pipeline_pag_sd_3.py +7 -7
- diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py +7 -7
- diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +6 -6
- diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +5 -5
- diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py +8 -8
- diffusers/pipelines/pag/pipeline_pag_sd_xl.py +16 -15
- diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +18 -17
- diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +12 -12
- diffusers/pipelines/paint_by_example/image_encoder.py +1 -1
- diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +8 -7
- diffusers/pipelines/pia/pipeline_pia.py +8 -6
- diffusers/pipelines/pipeline_flax_utils.py +3 -4
- diffusers/pipelines/pipeline_loading_utils.py +89 -13
- diffusers/pipelines/pipeline_utils.py +105 -33
- diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +11 -11
- diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +11 -11
- diffusers/pipelines/sana/__init__.py +4 -0
- diffusers/pipelines/sana/pipeline_sana.py +23 -21
- diffusers/pipelines/sana/pipeline_sana_controlnet.py +1106 -0
- diffusers/pipelines/sana/pipeline_sana_sprint.py +23 -19
- diffusers/pipelines/sana/pipeline_sana_sprint_img2img.py +981 -0
- diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +7 -6
- diffusers/pipelines/shap_e/camera.py +1 -1
- diffusers/pipelines/shap_e/pipeline_shap_e.py +1 -1
- diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +1 -1
- diffusers/pipelines/shap_e/renderer.py +3 -3
- diffusers/pipelines/stable_audio/modeling_stable_audio.py +1 -1
- diffusers/pipelines/stable_audio/pipeline_stable_audio.py +5 -5
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +8 -8
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +13 -13
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +9 -9
- diffusers/pipelines/stable_diffusion/__init__.py +0 -7
- diffusers/pipelines/stable_diffusion/clip_image_project_model.py +1 -1
- diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +11 -4
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_img2img.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_inpaint.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +10 -10
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py +10 -10
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +10 -10
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +9 -9
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +8 -8
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +4 -4
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +7 -7
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +5 -5
- diffusers/pipelines/stable_diffusion/safety_checker.py +1 -1
- diffusers/pipelines/stable_diffusion/safety_checker_flax.py +1 -1
- diffusers/pipelines/stable_diffusion/stable_unclip_image_normalizer.py +1 -1
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +7 -7
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +7 -7
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +7 -7
- diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +12 -8
- diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +15 -9
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +11 -9
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +11 -9
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +18 -12
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +11 -8
- diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +11 -8
- diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +15 -12
- diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +8 -6
- diffusers/pipelines/stable_diffusion_safe/safety_checker.py +1 -1
- diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +15 -11
- diffusers/pipelines/stable_diffusion_xl/pipeline_flax_stable_diffusion_xl.py +1 -1
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +16 -15
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +18 -17
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +12 -12
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +16 -15
- diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +3 -3
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +12 -12
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +18 -17
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +12 -7
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +12 -7
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +15 -13
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +24 -21
- diffusers/pipelines/unclip/pipeline_unclip.py +4 -3
- diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +4 -3
- diffusers/pipelines/unclip/text_proj.py +2 -2
- diffusers/pipelines/unidiffuser/modeling_text_decoder.py +2 -2
- diffusers/pipelines/unidiffuser/modeling_uvit.py +1 -1
- diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +8 -7
- diffusers/pipelines/visualcloze/__init__.py +52 -0
- diffusers/pipelines/visualcloze/pipeline_visualcloze_combined.py +444 -0
- diffusers/pipelines/visualcloze/pipeline_visualcloze_generation.py +952 -0
- diffusers/pipelines/visualcloze/visualcloze_utils.py +251 -0
- diffusers/pipelines/wan/__init__.py +2 -0
- diffusers/pipelines/wan/pipeline_wan.py +13 -10
- diffusers/pipelines/wan/pipeline_wan_i2v.py +38 -18
- diffusers/pipelines/wan/pipeline_wan_vace.py +976 -0
- diffusers/pipelines/wan/pipeline_wan_video2video.py +14 -16
- diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py +1 -1
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_diffnext.py +1 -1
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +1 -1
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +8 -8
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_combined.py +16 -15
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +6 -6
- diffusers/quantizers/__init__.py +179 -1
- diffusers/quantizers/base.py +6 -1
- diffusers/quantizers/bitsandbytes/bnb_quantizer.py +4 -0
- diffusers/quantizers/bitsandbytes/utils.py +10 -7
- diffusers/quantizers/gguf/gguf_quantizer.py +13 -4
- diffusers/quantizers/gguf/utils.py +16 -13
- diffusers/quantizers/quantization_config.py +18 -16
- diffusers/quantizers/quanto/quanto_quantizer.py +4 -0
- diffusers/quantizers/torchao/torchao_quantizer.py +5 -1
- diffusers/schedulers/__init__.py +3 -1
- diffusers/schedulers/deprecated/scheduling_karras_ve.py +4 -3
- diffusers/schedulers/deprecated/scheduling_sde_vp.py +1 -1
- diffusers/schedulers/scheduling_consistency_models.py +1 -1
- diffusers/schedulers/scheduling_cosine_dpmsolver_multistep.py +10 -5
- diffusers/schedulers/scheduling_ddim.py +8 -8
- diffusers/schedulers/scheduling_ddim_cogvideox.py +5 -5
- diffusers/schedulers/scheduling_ddim_flax.py +6 -6
- diffusers/schedulers/scheduling_ddim_inverse.py +6 -6
- diffusers/schedulers/scheduling_ddim_parallel.py +22 -22
- diffusers/schedulers/scheduling_ddpm.py +9 -9
- diffusers/schedulers/scheduling_ddpm_flax.py +7 -7
- diffusers/schedulers/scheduling_ddpm_parallel.py +18 -18
- diffusers/schedulers/scheduling_ddpm_wuerstchen.py +2 -2
- diffusers/schedulers/scheduling_deis_multistep.py +8 -8
- diffusers/schedulers/scheduling_dpm_cogvideox.py +5 -5
- diffusers/schedulers/scheduling_dpmsolver_multistep.py +12 -12
- diffusers/schedulers/scheduling_dpmsolver_multistep_flax.py +22 -20
- diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +11 -11
- diffusers/schedulers/scheduling_dpmsolver_sde.py +2 -2
- diffusers/schedulers/scheduling_dpmsolver_singlestep.py +13 -13
- diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py +13 -8
- diffusers/schedulers/scheduling_edm_euler.py +20 -11
- diffusers/schedulers/scheduling_euler_ancestral_discrete.py +3 -3
- diffusers/schedulers/scheduling_euler_discrete.py +3 -3
- diffusers/schedulers/scheduling_euler_discrete_flax.py +3 -3
- diffusers/schedulers/scheduling_flow_match_euler_discrete.py +20 -5
- diffusers/schedulers/scheduling_flow_match_heun_discrete.py +1 -1
- diffusers/schedulers/scheduling_flow_match_lcm.py +561 -0
- diffusers/schedulers/scheduling_heun_discrete.py +2 -2
- diffusers/schedulers/scheduling_ipndm.py +2 -2
- diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +2 -2
- diffusers/schedulers/scheduling_k_dpm_2_discrete.py +2 -2
- diffusers/schedulers/scheduling_karras_ve_flax.py +5 -5
- diffusers/schedulers/scheduling_lcm.py +3 -3
- diffusers/schedulers/scheduling_lms_discrete.py +2 -2
- diffusers/schedulers/scheduling_lms_discrete_flax.py +1 -1
- diffusers/schedulers/scheduling_pndm.py +4 -4
- diffusers/schedulers/scheduling_pndm_flax.py +4 -4
- diffusers/schedulers/scheduling_repaint.py +9 -9
- diffusers/schedulers/scheduling_sasolver.py +15 -15
- diffusers/schedulers/scheduling_scm.py +1 -1
- diffusers/schedulers/scheduling_sde_ve.py +1 -1
- diffusers/schedulers/scheduling_sde_ve_flax.py +2 -2
- diffusers/schedulers/scheduling_tcd.py +3 -3
- diffusers/schedulers/scheduling_unclip.py +5 -5
- diffusers/schedulers/scheduling_unipc_multistep.py +11 -11
- diffusers/schedulers/scheduling_utils.py +1 -1
- diffusers/schedulers/scheduling_utils_flax.py +1 -1
- diffusers/schedulers/scheduling_vq_diffusion.py +1 -1
- diffusers/training_utils.py +13 -5
- diffusers/utils/__init__.py +5 -0
- diffusers/utils/accelerate_utils.py +1 -1
- diffusers/utils/doc_utils.py +1 -1
- diffusers/utils/dummy_pt_objects.py +120 -0
- diffusers/utils/dummy_torch_and_transformers_objects.py +225 -0
- diffusers/utils/dynamic_modules_utils.py +21 -3
- diffusers/utils/export_utils.py +1 -1
- diffusers/utils/import_utils.py +81 -18
- diffusers/utils/logging.py +1 -1
- diffusers/utils/outputs.py +2 -1
- diffusers/utils/peft_utils.py +91 -8
- diffusers/utils/state_dict_utils.py +20 -3
- diffusers/utils/testing_utils.py +59 -7
- diffusers/utils/torch_utils.py +25 -5
- diffusers/video_processor.py +2 -2
- {diffusers-0.33.1.dist-info → diffusers-0.34.0.dist-info}/METADATA +70 -55
- diffusers-0.34.0.dist-info/RECORD +639 -0
- {diffusers-0.33.1.dist-info → diffusers-0.34.0.dist-info}/WHEEL +1 -1
- diffusers-0.33.1.dist-info/RECORD +0 -608
- {diffusers-0.33.1.dist-info → diffusers-0.34.0.dist-info}/LICENSE +0 -0
- {diffusers-0.33.1.dist-info → diffusers-0.34.0.dist-info}/entry_points.txt +0 -0
- {diffusers-0.33.1.dist-info → diffusers-0.34.0.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 OmniGen team and The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -23,12 +23,14 @@ from ...image_processor import PipelineImageInput, VaeImageProcessor
|
|
23
23
|
from ...models.autoencoders import AutoencoderKL
|
24
24
|
from ...models.transformers import OmniGenTransformer2DModel
|
25
25
|
from ...schedulers import FlowMatchEulerDiscreteScheduler
|
26
|
-
from ...utils import is_torch_xla_available, logging, replace_example_docstring
|
26
|
+
from ...utils import is_torch_xla_available, is_torchvision_available, logging, replace_example_docstring
|
27
27
|
from ...utils.torch_utils import randn_tensor
|
28
28
|
from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
|
29
|
-
from .processor_omnigen import OmniGenMultiModalProcessor
|
30
29
|
|
31
30
|
|
31
|
+
if is_torchvision_available():
|
32
|
+
from .processor_omnigen import OmniGenMultiModalProcessor
|
33
|
+
|
32
34
|
if is_torch_xla_available():
|
33
35
|
XLA_AVAILABLE = True
|
34
36
|
else:
|
@@ -120,7 +122,7 @@ class OmniGenPipeline(
|
|
120
122
|
r"""
|
121
123
|
The OmniGen pipeline for multimodal-to-image generation.
|
122
124
|
|
123
|
-
Reference: https://
|
125
|
+
Reference: https://huggingface.co/papers/2409.11340
|
124
126
|
|
125
127
|
Args:
|
126
128
|
transformer ([`OmniGenTransformer2DModel`]):
|
@@ -176,7 +178,7 @@ class OmniGenPipeline(
|
|
176
178
|
get the continue embedding of input images by VAE
|
177
179
|
|
178
180
|
Args:
|
179
|
-
input_pixel_values:
|
181
|
+
input_pixel_values: normalized pixel of input images
|
180
182
|
device:
|
181
183
|
Returns: torch.Tensor
|
182
184
|
"""
|
@@ -346,13 +348,13 @@ class OmniGenPipeline(
|
|
346
348
|
in their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is
|
347
349
|
passed will be used. Must be in descending order.
|
348
350
|
guidance_scale (`float`, *optional*, defaults to 2.5):
|
349
|
-
Guidance scale as defined in [Classifier-Free Diffusion
|
350
|
-
`guidance_scale` is defined as `w` of equation 2.
|
351
|
-
Paper](https://
|
352
|
-
1`. Higher guidance scale encourages to generate images that are closely linked to
|
353
|
-
usually at the expense of lower image quality.
|
351
|
+
Guidance scale as defined in [Classifier-Free Diffusion
|
352
|
+
Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
|
353
|
+
of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
|
354
|
+
`guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
|
355
|
+
the text `prompt`, usually at the expense of lower image quality.
|
354
356
|
img_guidance_scale (`float`, *optional*, defaults to 1.6):
|
355
|
-
Defined as equation 3 in [Instrucpix2pix](https://
|
357
|
+
Defined as equation 3 in [Instrucpix2pix](https://huggingface.co/papers/2211.09800).
|
356
358
|
use_input_image_size_as_output (bool, defaults to False):
|
357
359
|
whether to use the input image size as the output image size, which can be used for single-image input,
|
358
360
|
e.g., image editing task
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 OmniGen team and The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -18,7 +18,12 @@ from typing import Dict, List
|
|
18
18
|
import numpy as np
|
19
19
|
import torch
|
20
20
|
from PIL import Image
|
21
|
-
|
21
|
+
|
22
|
+
from ...utils import is_torchvision_available
|
23
|
+
|
24
|
+
|
25
|
+
if is_torchvision_available():
|
26
|
+
from torchvision import transforms
|
22
27
|
|
23
28
|
|
24
29
|
def crop_image(pil_image, max_image_size):
|
@@ -198,7 +203,7 @@ class OmniGenCollator:
|
|
198
203
|
def create_mask(self, attention_mask, num_tokens_for_output_images):
|
199
204
|
"""
|
200
205
|
OmniGen applies causal attention to each element in the sequence, but applies bidirectional attention within
|
201
|
-
each image sequence References: [OmniGen](https://
|
206
|
+
each image sequence References: [OmniGen](https://huggingface.co/papers/2409.11340)
|
202
207
|
"""
|
203
208
|
extended_mask = []
|
204
209
|
padding_images = []
|
@@ -75,6 +75,11 @@ class OnnxRuntimeModel:
|
|
75
75
|
logger.info("No onnxruntime provider specified, using CPUExecutionProvider")
|
76
76
|
provider = "CPUExecutionProvider"
|
77
77
|
|
78
|
+
if provider_options is None:
|
79
|
+
provider_options = []
|
80
|
+
elif not isinstance(provider_options, list):
|
81
|
+
provider_options = [provider_options]
|
82
|
+
|
78
83
|
return ort.InferenceSession(
|
79
84
|
path, providers=[provider], sess_options=sess_options, provider_options=provider_options
|
80
85
|
)
|
@@ -174,7 +179,10 @@ class OnnxRuntimeModel:
|
|
174
179
|
# load model from local directory
|
175
180
|
if os.path.isdir(model_id):
|
176
181
|
model = OnnxRuntimeModel.load_model(
|
177
|
-
Path(model_id, model_file_name).as_posix(),
|
182
|
+
Path(model_id, model_file_name).as_posix(),
|
183
|
+
provider=provider,
|
184
|
+
sess_options=sess_options,
|
185
|
+
provider_options=kwargs.pop("provider_options"),
|
178
186
|
)
|
179
187
|
kwargs["model_save_dir"] = Path(model_id)
|
180
188
|
# load model from hub
|
@@ -190,7 +198,12 @@ class OnnxRuntimeModel:
|
|
190
198
|
)
|
191
199
|
kwargs["model_save_dir"] = Path(model_cache_path).parent
|
192
200
|
kwargs["latest_model_name"] = Path(model_cache_path).name
|
193
|
-
model = OnnxRuntimeModel.load_model(
|
201
|
+
model = OnnxRuntimeModel.load_model(
|
202
|
+
model_cache_path,
|
203
|
+
provider=provider,
|
204
|
+
sess_options=sess_options,
|
205
|
+
provider_options=kwargs.pop("provider_options"),
|
206
|
+
)
|
194
207
|
return cls(model=model, **kwargs)
|
195
208
|
|
196
209
|
@classmethod
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -31,7 +31,7 @@ logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
|
31
31
|
|
32
32
|
|
33
33
|
class PAGMixin:
|
34
|
-
r"""Mixin class for [Pertubed Attention Guidance](https://
|
34
|
+
r"""Mixin class for [Pertubed Attention Guidance](https://huggingface.co/papers/2403.17377v1)."""
|
35
35
|
|
36
36
|
def _set_pag_attn_processor(self, pag_applied_layers, do_classifier_free_guidance):
|
37
37
|
r"""
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -36,7 +36,7 @@ from ...utils import (
|
|
36
36
|
scale_lora_layers,
|
37
37
|
unscale_lora_layers,
|
38
38
|
)
|
39
|
-
from ...utils.torch_utils import is_compiled_module, is_torch_version, randn_tensor
|
39
|
+
from ...utils.torch_utils import empty_device_cache, is_compiled_module, is_torch_version, randn_tensor
|
40
40
|
from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
41
41
|
from ..stable_diffusion.pipeline_output import StableDiffusionPipelineOutput
|
42
42
|
from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
|
@@ -541,7 +541,7 @@ class StableDiffusionControlNetPAGPipeline(
|
|
541
541
|
def prepare_extra_step_kwargs(self, generator, eta):
|
542
542
|
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
543
543
|
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
544
|
-
# eta corresponds to η in DDIM paper: https://
|
544
|
+
# eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
|
545
545
|
# and should be between [0, 1]
|
546
546
|
|
547
547
|
accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
|
@@ -843,7 +843,7 @@ class StableDiffusionControlNetPAGPipeline(
|
|
843
843
|
return self._clip_skip
|
844
844
|
|
845
845
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
846
|
-
# of the Imagen paper: https://
|
846
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
847
847
|
# corresponds to doing no classifier free guidance.
|
848
848
|
@property
|
849
849
|
def do_classifier_free_guidance(self):
|
@@ -933,8 +933,8 @@ class StableDiffusionControlNetPAGPipeline(
|
|
933
933
|
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
934
934
|
The number of images to generate per prompt.
|
935
935
|
eta (`float`, *optional*, defaults to 0.0):
|
936
|
-
Corresponds to parameter eta (η) from the [DDIM](https://
|
937
|
-
to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
|
936
|
+
Corresponds to parameter eta (η) from the [DDIM](https://huggingface.co/papers/2010.02502) paper. Only
|
937
|
+
applies to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
|
938
938
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
939
939
|
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
|
940
940
|
generation deterministic.
|
@@ -1228,7 +1228,11 @@ class StableDiffusionControlNetPAGPipeline(
|
|
1228
1228
|
for i, t in enumerate(timesteps):
|
1229
1229
|
# Relevant thread:
|
1230
1230
|
# https://dev-discuss.pytorch.org/t/cudagraphs-in-pytorch-2-0/1428
|
1231
|
-
if (
|
1231
|
+
if (
|
1232
|
+
torch.cuda.is_available()
|
1233
|
+
and (is_unet_compiled and is_controlnet_compiled)
|
1234
|
+
and is_torch_higher_equal_2_1
|
1235
|
+
):
|
1232
1236
|
torch._inductor.cudagraph_mark_step_begin()
|
1233
1237
|
# expand the latents if we are doing classifier free guidance
|
1234
1238
|
latent_model_input = torch.cat([latents] * (prompt_embeds.shape[0] // latents.shape[0]))
|
@@ -1309,7 +1313,7 @@ class StableDiffusionControlNetPAGPipeline(
|
|
1309
1313
|
if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None:
|
1310
1314
|
self.unet.to("cpu")
|
1311
1315
|
self.controlnet.to("cpu")
|
1312
|
-
|
1316
|
+
empty_device_cache()
|
1313
1317
|
|
1314
1318
|
if not output_type == "latent":
|
1315
1319
|
image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False, generator=generator)[
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -37,7 +37,7 @@ from ...utils import (
|
|
37
37
|
scale_lora_layers,
|
38
38
|
unscale_lora_layers,
|
39
39
|
)
|
40
|
-
from ...utils.torch_utils import is_compiled_module, randn_tensor
|
40
|
+
from ...utils.torch_utils import empty_device_cache, is_compiled_module, randn_tensor
|
41
41
|
from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
42
42
|
from ..stable_diffusion import StableDiffusionPipelineOutput
|
43
43
|
from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
|
@@ -520,7 +520,7 @@ class StableDiffusionControlNetPAGInpaintPipeline(
|
|
520
520
|
def prepare_extra_step_kwargs(self, generator, eta):
|
521
521
|
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
522
522
|
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
523
|
-
# eta corresponds to η in DDIM paper: https://
|
523
|
+
# eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
|
524
524
|
# and should be between [0, 1]
|
525
525
|
|
526
526
|
accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
|
@@ -955,7 +955,7 @@ class StableDiffusionControlNetPAGInpaintPipeline(
|
|
955
955
|
return self._clip_skip
|
956
956
|
|
957
957
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
958
|
-
# of the Imagen paper: https://
|
958
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
959
959
|
# corresponds to doing no classifier free guidance.
|
960
960
|
@property
|
961
961
|
def do_classifier_free_guidance(self):
|
@@ -1064,8 +1064,8 @@ class StableDiffusionControlNetPAGInpaintPipeline(
|
|
1064
1064
|
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
1065
1065
|
The number of images to generate per prompt.
|
1066
1066
|
eta (`float`, *optional*, defaults to 0.0):
|
1067
|
-
Corresponds to parameter eta (η) from the [DDIM](https://
|
1068
|
-
to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
|
1067
|
+
Corresponds to parameter eta (η) from the [DDIM](https://huggingface.co/papers/2010.02502) paper. Only
|
1068
|
+
applies to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
|
1069
1069
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
1070
1070
|
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
|
1071
1071
|
generation deterministic.
|
@@ -1521,7 +1521,7 @@ class StableDiffusionControlNetPAGInpaintPipeline(
|
|
1521
1521
|
if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None:
|
1522
1522
|
self.unet.to("cpu")
|
1523
1523
|
self.controlnet.to("cpu")
|
1524
|
-
|
1524
|
+
empty_device_cache()
|
1525
1525
|
|
1526
1526
|
if not output_type == "latent":
|
1527
1527
|
image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False, generator=generator)[
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -619,7 +619,7 @@ class StableDiffusionXLControlNetPAGPipeline(
|
|
619
619
|
def prepare_extra_step_kwargs(self, generator, eta):
|
620
620
|
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
621
621
|
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
622
|
-
# eta corresponds to η in DDIM paper: https://
|
622
|
+
# eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
|
623
623
|
# and should be between [0, 1]
|
624
624
|
|
625
625
|
accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
|
@@ -992,7 +992,7 @@ class StableDiffusionXLControlNetPAGPipeline(
|
|
992
992
|
return self._clip_skip
|
993
993
|
|
994
994
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
995
|
-
# of the Imagen paper: https://
|
995
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
996
996
|
# corresponds to doing no classifier free guidance.
|
997
997
|
@property
|
998
998
|
def do_classifier_free_guidance(self):
|
@@ -1111,8 +1111,8 @@ class StableDiffusionXLControlNetPAGPipeline(
|
|
1111
1111
|
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
1112
1112
|
The number of images to generate per prompt.
|
1113
1113
|
eta (`float`, *optional*, defaults to 0.0):
|
1114
|
-
Corresponds to parameter eta (η) from the [DDIM](https://
|
1115
|
-
to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
|
1114
|
+
Corresponds to parameter eta (η) from the [DDIM](https://huggingface.co/papers/2010.02502) paper. Only
|
1115
|
+
applies to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
|
1116
1116
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
1117
1117
|
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
|
1118
1118
|
generation deterministic.
|
@@ -1498,7 +1498,11 @@ class StableDiffusionXLControlNetPAGPipeline(
|
|
1498
1498
|
for i, t in enumerate(timesteps):
|
1499
1499
|
# Relevant thread:
|
1500
1500
|
# https://dev-discuss.pytorch.org/t/cudagraphs-in-pytorch-2-0/1428
|
1501
|
-
if (
|
1501
|
+
if (
|
1502
|
+
torch.cuda.is_available()
|
1503
|
+
and (is_unet_compiled and is_controlnet_compiled)
|
1504
|
+
and is_torch_higher_equal_2_1
|
1505
|
+
):
|
1502
1506
|
torch._inductor.cudagraph_mark_step_begin()
|
1503
1507
|
# expand the latents if we are doing classifier free guidance
|
1504
1508
|
latent_model_input = torch.cat([latents] * (prompt_embeds.shape[0] // latents.shape[0]))
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -52,7 +52,7 @@ from ...utils import (
|
|
52
52
|
scale_lora_layers,
|
53
53
|
unscale_lora_layers,
|
54
54
|
)
|
55
|
-
from ...utils.torch_utils import is_compiled_module, randn_tensor
|
55
|
+
from ...utils.torch_utils import empty_device_cache, is_compiled_module, randn_tensor
|
56
56
|
from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
57
57
|
from ..stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput
|
58
58
|
from .pag_utils import PAGMixin
|
@@ -115,7 +115,7 @@ EXAMPLE_DOC_STRING = """
|
|
115
115
|
... with torch.no_grad(), torch.autocast("cuda"):
|
116
116
|
... depth_map = depth_estimator(image).predicted_depth
|
117
117
|
|
118
|
-
... depth_map = torch.nn.
|
118
|
+
... depth_map = torch.nn.functional.interpolate(
|
119
119
|
... depth_map.unsqueeze(1),
|
120
120
|
... size=(1024, 1024),
|
121
121
|
... mode="bicubic",
|
@@ -611,7 +611,7 @@ class StableDiffusionXLControlNetPAGImg2ImgPipeline(
|
|
611
611
|
def prepare_extra_step_kwargs(self, generator, eta):
|
612
612
|
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
613
613
|
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
614
|
-
# eta corresponds to η in DDIM paper: https://
|
614
|
+
# eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
|
615
615
|
# and should be between [0, 1]
|
616
616
|
|
617
617
|
accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
|
@@ -926,7 +926,7 @@ class StableDiffusionXLControlNetPAGImg2ImgPipeline(
|
|
926
926
|
# Offload text encoder if `enable_model_cpu_offload` was enabled
|
927
927
|
if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None:
|
928
928
|
self.text_encoder_2.to("cpu")
|
929
|
-
|
929
|
+
empty_device_cache()
|
930
930
|
|
931
931
|
image = image.to(device=device, dtype=dtype)
|
932
932
|
|
@@ -1074,7 +1074,7 @@ class StableDiffusionXLControlNetPAGImg2ImgPipeline(
|
|
1074
1074
|
return self._clip_skip
|
1075
1075
|
|
1076
1076
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
1077
|
-
# of the Imagen paper: https://
|
1077
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
1078
1078
|
# corresponds to doing no classifier free guidance.
|
1079
1079
|
@property
|
1080
1080
|
def do_classifier_free_guidance(self):
|
@@ -1176,11 +1176,11 @@ class StableDiffusionXLControlNetPAGImg2ImgPipeline(
|
|
1176
1176
|
The number of denoising steps. More denoising steps usually lead to a higher quality image at the
|
1177
1177
|
expense of slower inference.
|
1178
1178
|
guidance_scale (`float`, *optional*, defaults to 7.5):
|
1179
|
-
Guidance scale as defined in [Classifier-Free Diffusion
|
1180
|
-
`guidance_scale` is defined as `w` of equation 2.
|
1181
|
-
Paper](https://
|
1182
|
-
1`. Higher guidance scale encourages to generate images that are closely linked to
|
1183
|
-
usually at the expense of lower image quality.
|
1179
|
+
Guidance scale as defined in [Classifier-Free Diffusion
|
1180
|
+
Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
|
1181
|
+
of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
|
1182
|
+
`guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
|
1183
|
+
the text `prompt`, usually at the expense of lower image quality.
|
1184
1184
|
negative_prompt (`str` or `List[str]`, *optional*):
|
1185
1185
|
The prompt or prompts not to guide the image generation. If not defined, one has to pass
|
1186
1186
|
`negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
|
@@ -1191,8 +1191,8 @@ class StableDiffusionXLControlNetPAGImg2ImgPipeline(
|
|
1191
1191
|
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
1192
1192
|
The number of images to generate per prompt.
|
1193
1193
|
eta (`float`, *optional*, defaults to 0.0):
|
1194
|
-
Corresponds to parameter eta (η) in the DDIM paper: https://
|
1195
|
-
[`schedulers.DDIMScheduler`], will be ignored for others.
|
1194
|
+
Corresponds to parameter eta (η) in the DDIM paper: https://huggingface.co/papers/2010.02502. Only
|
1195
|
+
applies to [`schedulers.DDIMScheduler`], will be ignored for others.
|
1196
1196
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
1197
1197
|
One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
|
1198
1198
|
to make generation deterministic.
|
@@ -1648,7 +1648,7 @@ class StableDiffusionXLControlNetPAGImg2ImgPipeline(
|
|
1648
1648
|
if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None:
|
1649
1649
|
self.unet.to("cpu")
|
1650
1650
|
self.controlnet.to("cpu")
|
1651
|
-
|
1651
|
+
empty_device_cache()
|
1652
1652
|
|
1653
1653
|
if not output_type == "latent":
|
1654
1654
|
# make sure the VAE is in float32 mode, as it overflows in float16
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 HunyuanDiT Authors and The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -131,7 +131,7 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
|
|
131
131
|
r"""
|
132
132
|
Rescales `noise_cfg` tensor based on `guidance_rescale` to improve image quality and fix overexposure. Based on
|
133
133
|
Section 3.4 from [Common Diffusion Noise Schedules and Sample Steps are
|
134
|
-
Flawed](https://
|
134
|
+
Flawed](https://huggingface.co/papers/2305.08891).
|
135
135
|
|
136
136
|
Args:
|
137
137
|
noise_cfg (`torch.Tensor`):
|
@@ -443,7 +443,7 @@ class HunyuanDiTPAGPipeline(DiffusionPipeline, PAGMixin):
|
|
443
443
|
def prepare_extra_step_kwargs(self, generator, eta):
|
444
444
|
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
445
445
|
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
446
|
-
# eta corresponds to η in DDIM paper: https://
|
446
|
+
# eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
|
447
447
|
# and should be between [0, 1]
|
448
448
|
|
449
449
|
accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
|
@@ -566,7 +566,7 @@ class HunyuanDiTPAGPipeline(DiffusionPipeline, PAGMixin):
|
|
566
566
|
return self._guidance_rescale
|
567
567
|
|
568
568
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
569
|
-
# of the Imagen paper: https://
|
569
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
570
570
|
# corresponds to doing no classifier free guidance.
|
571
571
|
@property
|
572
572
|
def do_classifier_free_guidance(self):
|
@@ -638,8 +638,8 @@ class HunyuanDiTPAGPipeline(DiffusionPipeline, PAGMixin):
|
|
638
638
|
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
639
639
|
The number of images to generate per prompt.
|
640
640
|
eta (`float`, *optional*, defaults to 0.0):
|
641
|
-
Corresponds to parameter eta (η) from the [DDIM](https://
|
642
|
-
to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
|
641
|
+
Corresponds to parameter eta (η) from the [DDIM](https://huggingface.co/papers/2010.02502) paper. Only
|
642
|
+
applies to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
|
643
643
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
644
644
|
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
|
645
645
|
generation deterministic.
|
@@ -675,7 +675,7 @@ class HunyuanDiTPAGPipeline(DiffusionPipeline, PAGMixin):
|
|
675
675
|
inputs will be passed.
|
676
676
|
guidance_rescale (`float`, *optional*, defaults to 0.0):
|
677
677
|
Rescale the noise_cfg according to `guidance_rescale`. Based on findings of [Common Diffusion Noise
|
678
|
-
Schedules and Sample Steps are Flawed](https://
|
678
|
+
Schedules and Sample Steps are Flawed](https://huggingface.co/papers/2305.08891). See Section 3.4
|
679
679
|
original_size (`Tuple[int, int]`, *optional*, defaults to `(1024, 1024)`):
|
680
680
|
The original size of the image. Used to calculate the time ids.
|
681
681
|
target_size (`Tuple[int, int]`, *optional*):
|
@@ -915,7 +915,7 @@ class HunyuanDiTPAGPipeline(DiffusionPipeline, PAGMixin):
|
|
915
915
|
noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
|
916
916
|
|
917
917
|
if self.do_classifier_free_guidance and guidance_rescale > 0.0:
|
918
|
-
# Based on 3.4. in https://
|
918
|
+
# Based on 3.4. in https://huggingface.co/papers/2305.08891
|
919
919
|
noise_pred = rescale_noise_cfg(noise_pred, noise_pred_text, guidance_rescale=guidance_rescale)
|
920
920
|
|
921
921
|
# compute the previous noisy sample x_t -> x_t-1
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 Stability AI, Kwai-Kolors Team and The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -453,7 +453,7 @@ class KolorsPAGPipeline(
|
|
453
453
|
def prepare_extra_step_kwargs(self, generator, eta):
|
454
454
|
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
455
455
|
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
456
|
-
# eta corresponds to η in DDIM paper: https://
|
456
|
+
# eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
|
457
457
|
# and should be between [0, 1]
|
458
458
|
|
459
459
|
accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
|
@@ -651,7 +651,7 @@ class KolorsPAGPipeline(
|
|
651
651
|
return self._guidance_scale
|
652
652
|
|
653
653
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
654
|
-
# of the Imagen paper: https://
|
654
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
655
655
|
# corresponds to doing no classifier free guidance.
|
656
656
|
@property
|
657
657
|
def do_classifier_free_guidance(self):
|
@@ -749,11 +749,11 @@ class KolorsPAGPipeline(
|
|
749
749
|
"Mixture of Denoisers" multi-pipeline setup, as elaborated in [**Refining the Image
|
750
750
|
Output**](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/stable_diffusion_xl#refining-the-image-output)
|
751
751
|
guidance_scale (`float`, *optional*, defaults to 5.0):
|
752
|
-
Guidance scale as defined in [Classifier-Free Diffusion
|
753
|
-
`guidance_scale` is defined as `w` of equation 2.
|
754
|
-
Paper](https://
|
755
|
-
1`. Higher guidance scale encourages to generate images that are closely linked to
|
756
|
-
usually at the expense of lower image quality.
|
752
|
+
Guidance scale as defined in [Classifier-Free Diffusion
|
753
|
+
Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
|
754
|
+
of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
|
755
|
+
`guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
|
756
|
+
the text `prompt`, usually at the expense of lower image quality.
|
757
757
|
negative_prompt (`str` or `List[str]`, *optional*):
|
758
758
|
The prompt or prompts not to guide the image generation. If not defined, one has to pass
|
759
759
|
`negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
|
@@ -761,8 +761,8 @@ class KolorsPAGPipeline(
|
|
761
761
|
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
762
762
|
The number of images to generate per prompt.
|
763
763
|
eta (`float`, *optional*, defaults to 0.0):
|
764
|
-
Corresponds to parameter eta (η) in the DDIM paper: https://
|
765
|
-
[`schedulers.DDIMScheduler`], will be ignored for others.
|
764
|
+
Corresponds to parameter eta (η) in the DDIM paper: https://huggingface.co/papers/2010.02502. Only
|
765
|
+
applies to [`schedulers.DDIMScheduler`], will be ignored for others.
|
766
766
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
767
767
|
One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
|
768
768
|
to make generation deterministic.
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 PixArt-Sigma Authors and The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -326,7 +326,7 @@ class PixArtSigmaPAGPipeline(DiffusionPipeline, PAGMixin):
|
|
326
326
|
def prepare_extra_step_kwargs(self, generator, eta):
|
327
327
|
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
328
328
|
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
329
|
-
# eta corresponds to η in DDIM paper: https://
|
329
|
+
# eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
|
330
330
|
# and should be between [0, 1]
|
331
331
|
|
332
332
|
accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
|
@@ -488,7 +488,7 @@ class PixArtSigmaPAGPipeline(DiffusionPipeline, PAGMixin):
|
|
488
488
|
# &
|
489
489
|
caption = re.sub(r"&", "", caption)
|
490
490
|
|
491
|
-
# ip
|
491
|
+
# ip addresses:
|
492
492
|
caption = re.sub(r"\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}", " ", caption)
|
493
493
|
|
494
494
|
# article ids:
|
@@ -624,11 +624,11 @@ class PixArtSigmaPAGPipeline(DiffusionPipeline, PAGMixin):
|
|
624
624
|
their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
|
625
625
|
will be used.
|
626
626
|
guidance_scale (`float`, *optional*, defaults to 4.5):
|
627
|
-
Guidance scale as defined in [Classifier-Free Diffusion
|
628
|
-
`guidance_scale` is defined as `w` of equation 2.
|
629
|
-
Paper](https://
|
630
|
-
1`. Higher guidance scale encourages to generate images that are closely linked to
|
631
|
-
usually at the expense of lower image quality.
|
627
|
+
Guidance scale as defined in [Classifier-Free Diffusion
|
628
|
+
Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
|
629
|
+
of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
|
630
|
+
`guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
|
631
|
+
the text `prompt`, usually at the expense of lower image quality.
|
632
632
|
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
633
633
|
The number of images to generate per prompt.
|
634
634
|
height (`int`, *optional*, defaults to self.unet.config.sample_size):
|
@@ -636,8 +636,8 @@ class PixArtSigmaPAGPipeline(DiffusionPipeline, PAGMixin):
|
|
636
636
|
width (`int`, *optional*, defaults to self.unet.config.sample_size):
|
637
637
|
The width in pixels of the generated image.
|
638
638
|
eta (`float`, *optional*, defaults to 0.0):
|
639
|
-
Corresponds to parameter eta (η) in the DDIM paper: https://
|
640
|
-
[`schedulers.DDIMScheduler`], will be ignored for others.
|
639
|
+
Corresponds to parameter eta (η) in the DDIM paper: https://huggingface.co/papers/2010.02502. Only
|
640
|
+
applies to [`schedulers.DDIMScheduler`], will be ignored for others.
|
641
641
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
642
642
|
One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
|
643
643
|
to make generation deterministic.
|
@@ -729,7 +729,7 @@ class PixArtSigmaPAGPipeline(DiffusionPipeline, PAGMixin):
|
|
729
729
|
device = self._execution_device
|
730
730
|
|
731
731
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
732
|
-
# of the Imagen paper: https://
|
732
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
733
733
|
# corresponds to doing no classifier free guidance.
|
734
734
|
do_classifier_free_guidance = guidance_scale > 1.0
|
735
735
|
|