diffusers 0.33.0__py3-none-any.whl → 0.34.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diffusers/__init__.py +48 -1
- diffusers/commands/__init__.py +1 -1
- diffusers/commands/diffusers_cli.py +1 -1
- diffusers/commands/env.py +1 -1
- diffusers/commands/fp16_safetensors.py +1 -1
- diffusers/dependency_versions_check.py +1 -1
- diffusers/dependency_versions_table.py +1 -1
- diffusers/experimental/rl/value_guided_sampling.py +1 -1
- diffusers/hooks/faster_cache.py +2 -2
- diffusers/hooks/group_offloading.py +128 -29
- diffusers/hooks/hooks.py +2 -2
- diffusers/hooks/layerwise_casting.py +3 -3
- diffusers/hooks/pyramid_attention_broadcast.py +1 -1
- diffusers/image_processor.py +7 -2
- diffusers/loaders/__init__.py +4 -0
- diffusers/loaders/ip_adapter.py +5 -14
- diffusers/loaders/lora_base.py +212 -111
- diffusers/loaders/lora_conversion_utils.py +275 -34
- diffusers/loaders/lora_pipeline.py +1554 -819
- diffusers/loaders/peft.py +52 -109
- diffusers/loaders/single_file.py +2 -2
- diffusers/loaders/single_file_model.py +20 -4
- diffusers/loaders/single_file_utils.py +225 -5
- diffusers/loaders/textual_inversion.py +3 -2
- diffusers/loaders/transformer_flux.py +1 -1
- diffusers/loaders/transformer_sd3.py +2 -2
- diffusers/loaders/unet.py +2 -16
- diffusers/loaders/unet_loader_utils.py +1 -1
- diffusers/loaders/utils.py +1 -1
- diffusers/models/__init__.py +15 -1
- diffusers/models/activations.py +5 -5
- diffusers/models/adapter.py +2 -3
- diffusers/models/attention.py +4 -4
- diffusers/models/attention_flax.py +10 -10
- diffusers/models/attention_processor.py +14 -10
- diffusers/models/auto_model.py +47 -10
- diffusers/models/autoencoders/__init__.py +1 -0
- diffusers/models/autoencoders/autoencoder_asym_kl.py +4 -4
- diffusers/models/autoencoders/autoencoder_dc.py +3 -3
- diffusers/models/autoencoders/autoencoder_kl.py +4 -4
- diffusers/models/autoencoders/autoencoder_kl_allegro.py +4 -4
- diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +6 -6
- diffusers/models/autoencoders/autoencoder_kl_cosmos.py +1108 -0
- diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py +2 -2
- diffusers/models/autoencoders/autoencoder_kl_ltx.py +3 -3
- diffusers/models/autoencoders/autoencoder_kl_magvit.py +4 -4
- diffusers/models/autoencoders/autoencoder_kl_mochi.py +3 -3
- diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +4 -4
- diffusers/models/autoencoders/autoencoder_kl_wan.py +256 -22
- diffusers/models/autoencoders/autoencoder_oobleck.py +1 -1
- diffusers/models/autoencoders/autoencoder_tiny.py +3 -3
- diffusers/models/autoencoders/consistency_decoder_vae.py +1 -1
- diffusers/models/autoencoders/vae.py +13 -2
- diffusers/models/autoencoders/vq_model.py +2 -2
- diffusers/models/cache_utils.py +1 -1
- diffusers/models/controlnet.py +1 -1
- diffusers/models/controlnet_flux.py +1 -1
- diffusers/models/controlnet_sd3.py +1 -1
- diffusers/models/controlnet_sparsectrl.py +1 -1
- diffusers/models/controlnets/__init__.py +1 -0
- diffusers/models/controlnets/controlnet.py +3 -3
- diffusers/models/controlnets/controlnet_flax.py +1 -1
- diffusers/models/controlnets/controlnet_flux.py +16 -15
- diffusers/models/controlnets/controlnet_hunyuan.py +2 -2
- diffusers/models/controlnets/controlnet_sana.py +290 -0
- diffusers/models/controlnets/controlnet_sd3.py +1 -1
- diffusers/models/controlnets/controlnet_sparsectrl.py +2 -2
- diffusers/models/controlnets/controlnet_union.py +1 -1
- diffusers/models/controlnets/controlnet_xs.py +7 -7
- diffusers/models/controlnets/multicontrolnet.py +4 -5
- diffusers/models/controlnets/multicontrolnet_union.py +5 -6
- diffusers/models/downsampling.py +2 -2
- diffusers/models/embeddings.py +10 -12
- diffusers/models/embeddings_flax.py +2 -2
- diffusers/models/lora.py +3 -3
- diffusers/models/modeling_utils.py +44 -14
- diffusers/models/normalization.py +4 -4
- diffusers/models/resnet.py +2 -2
- diffusers/models/resnet_flax.py +1 -1
- diffusers/models/transformers/__init__.py +5 -0
- diffusers/models/transformers/auraflow_transformer_2d.py +70 -24
- diffusers/models/transformers/cogvideox_transformer_3d.py +1 -1
- diffusers/models/transformers/consisid_transformer_3d.py +1 -1
- diffusers/models/transformers/dit_transformer_2d.py +2 -2
- diffusers/models/transformers/dual_transformer_2d.py +1 -1
- diffusers/models/transformers/hunyuan_transformer_2d.py +2 -2
- diffusers/models/transformers/latte_transformer_3d.py +4 -5
- diffusers/models/transformers/lumina_nextdit2d.py +2 -2
- diffusers/models/transformers/pixart_transformer_2d.py +3 -3
- diffusers/models/transformers/prior_transformer.py +1 -1
- diffusers/models/transformers/sana_transformer.py +8 -3
- diffusers/models/transformers/stable_audio_transformer.py +5 -9
- diffusers/models/transformers/t5_film_transformer.py +3 -3
- diffusers/models/transformers/transformer_2d.py +1 -1
- diffusers/models/transformers/transformer_allegro.py +1 -1
- diffusers/models/transformers/transformer_chroma.py +742 -0
- diffusers/models/transformers/transformer_cogview3plus.py +5 -10
- diffusers/models/transformers/transformer_cogview4.py +317 -25
- diffusers/models/transformers/transformer_cosmos.py +579 -0
- diffusers/models/transformers/transformer_flux.py +9 -11
- diffusers/models/transformers/transformer_hidream_image.py +942 -0
- diffusers/models/transformers/transformer_hunyuan_video.py +6 -8
- diffusers/models/transformers/transformer_hunyuan_video_framepack.py +416 -0
- diffusers/models/transformers/transformer_ltx.py +2 -2
- diffusers/models/transformers/transformer_lumina2.py +1 -1
- diffusers/models/transformers/transformer_mochi.py +1 -1
- diffusers/models/transformers/transformer_omnigen.py +2 -2
- diffusers/models/transformers/transformer_sd3.py +7 -7
- diffusers/models/transformers/transformer_temporal.py +1 -1
- diffusers/models/transformers/transformer_wan.py +24 -8
- diffusers/models/transformers/transformer_wan_vace.py +393 -0
- diffusers/models/unets/unet_1d.py +1 -1
- diffusers/models/unets/unet_1d_blocks.py +1 -1
- diffusers/models/unets/unet_2d.py +1 -1
- diffusers/models/unets/unet_2d_blocks.py +1 -1
- diffusers/models/unets/unet_2d_blocks_flax.py +8 -7
- diffusers/models/unets/unet_2d_condition.py +2 -2
- diffusers/models/unets/unet_2d_condition_flax.py +2 -2
- diffusers/models/unets/unet_3d_blocks.py +1 -1
- diffusers/models/unets/unet_3d_condition.py +3 -3
- diffusers/models/unets/unet_i2vgen_xl.py +3 -3
- diffusers/models/unets/unet_kandinsky3.py +1 -1
- diffusers/models/unets/unet_motion_model.py +2 -2
- diffusers/models/unets/unet_stable_cascade.py +1 -1
- diffusers/models/upsampling.py +2 -2
- diffusers/models/vae_flax.py +2 -2
- diffusers/models/vq_model.py +1 -1
- diffusers/pipelines/__init__.py +37 -6
- diffusers/pipelines/allegro/pipeline_allegro.py +11 -11
- diffusers/pipelines/amused/pipeline_amused.py +7 -6
- diffusers/pipelines/amused/pipeline_amused_img2img.py +6 -5
- diffusers/pipelines/amused/pipeline_amused_inpaint.py +6 -5
- diffusers/pipelines/animatediff/pipeline_animatediff.py +6 -6
- diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +6 -6
- diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +16 -15
- diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +6 -6
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +5 -5
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +5 -5
- diffusers/pipelines/audioldm/pipeline_audioldm.py +8 -7
- diffusers/pipelines/audioldm2/modeling_audioldm2.py +1 -1
- diffusers/pipelines/audioldm2/pipeline_audioldm2.py +23 -13
- diffusers/pipelines/aura_flow/pipeline_aura_flow.py +48 -11
- diffusers/pipelines/auto_pipeline.py +6 -7
- diffusers/pipelines/blip_diffusion/modeling_blip2.py +1 -1
- diffusers/pipelines/blip_diffusion/modeling_ctx_clip.py +2 -2
- diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +11 -10
- diffusers/pipelines/chroma/__init__.py +49 -0
- diffusers/pipelines/chroma/pipeline_chroma.py +949 -0
- diffusers/pipelines/chroma/pipeline_chroma_img2img.py +1034 -0
- diffusers/pipelines/chroma/pipeline_output.py +21 -0
- diffusers/pipelines/cogvideo/pipeline_cogvideox.py +8 -8
- diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +8 -8
- diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +8 -8
- diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +8 -8
- diffusers/pipelines/cogview3/pipeline_cogview3plus.py +9 -9
- diffusers/pipelines/cogview4/pipeline_cogview4.py +7 -7
- diffusers/pipelines/cogview4/pipeline_cogview4_control.py +7 -7
- diffusers/pipelines/consisid/consisid_utils.py +2 -2
- diffusers/pipelines/consisid/pipeline_consisid.py +8 -8
- diffusers/pipelines/consistency_models/pipeline_consistency_models.py +1 -1
- diffusers/pipelines/controlnet/pipeline_controlnet.py +7 -7
- diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +8 -8
- diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +7 -7
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +7 -7
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +14 -14
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +10 -6
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +13 -13
- diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +14 -14
- diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +5 -5
- diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +13 -13
- diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +1 -1
- diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +8 -8
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +7 -7
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +7 -7
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +12 -10
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +9 -7
- diffusers/pipelines/cosmos/__init__.py +54 -0
- diffusers/pipelines/cosmos/pipeline_cosmos2_text2image.py +673 -0
- diffusers/pipelines/cosmos/pipeline_cosmos2_video2world.py +792 -0
- diffusers/pipelines/cosmos/pipeline_cosmos_text2world.py +664 -0
- diffusers/pipelines/cosmos/pipeline_cosmos_video2world.py +826 -0
- diffusers/pipelines/cosmos/pipeline_output.py +40 -0
- diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py +5 -4
- diffusers/pipelines/ddim/pipeline_ddim.py +4 -4
- diffusers/pipelines/ddpm/pipeline_ddpm.py +1 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +10 -10
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +8 -8
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +5 -5
- diffusers/pipelines/deprecated/audio_diffusion/mel.py +1 -1
- diffusers/pipelines/deprecated/audio_diffusion/pipeline_audio_diffusion.py +3 -3
- diffusers/pipelines/deprecated/latent_diffusion_uncond/pipeline_latent_diffusion_uncond.py +1 -1
- diffusers/pipelines/deprecated/pndm/pipeline_pndm.py +2 -2
- diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +4 -3
- diffusers/pipelines/deprecated/score_sde_ve/pipeline_score_sde_ve.py +1 -1
- diffusers/pipelines/deprecated/spectrogram_diffusion/continuous_encoder.py +1 -1
- diffusers/pipelines/deprecated/spectrogram_diffusion/midi_utils.py +1 -1
- diffusers/pipelines/deprecated/spectrogram_diffusion/notes_encoder.py +1 -1
- diffusers/pipelines/deprecated/spectrogram_diffusion/pipeline_spectrogram_diffusion.py +1 -1
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +7 -7
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_onnx_stable_diffusion_inpaint_legacy.py +9 -9
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +10 -10
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +10 -8
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +5 -5
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +18 -18
- diffusers/pipelines/deprecated/stochastic_karras_ve/pipeline_stochastic_karras_ve.py +1 -1
- diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +2 -2
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +6 -6
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +5 -5
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +5 -5
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +5 -5
- diffusers/pipelines/deprecated/vq_diffusion/pipeline_vq_diffusion.py +1 -1
- diffusers/pipelines/dit/pipeline_dit.py +1 -1
- diffusers/pipelines/easyanimate/pipeline_easyanimate.py +4 -4
- diffusers/pipelines/easyanimate/pipeline_easyanimate_control.py +4 -4
- diffusers/pipelines/easyanimate/pipeline_easyanimate_inpaint.py +7 -6
- diffusers/pipelines/flux/modeling_flux.py +1 -1
- diffusers/pipelines/flux/pipeline_flux.py +10 -17
- diffusers/pipelines/flux/pipeline_flux_control.py +6 -6
- diffusers/pipelines/flux/pipeline_flux_control_img2img.py +6 -6
- diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +6 -6
- diffusers/pipelines/flux/pipeline_flux_controlnet.py +6 -6
- diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +30 -22
- diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +2 -1
- diffusers/pipelines/flux/pipeline_flux_fill.py +6 -6
- diffusers/pipelines/flux/pipeline_flux_img2img.py +39 -6
- diffusers/pipelines/flux/pipeline_flux_inpaint.py +11 -6
- diffusers/pipelines/flux/pipeline_flux_prior_redux.py +1 -1
- diffusers/pipelines/free_init_utils.py +2 -2
- diffusers/pipelines/free_noise_utils.py +3 -3
- diffusers/pipelines/hidream_image/__init__.py +47 -0
- diffusers/pipelines/hidream_image/pipeline_hidream_image.py +1026 -0
- diffusers/pipelines/hidream_image/pipeline_output.py +35 -0
- diffusers/pipelines/hunyuan_video/__init__.py +2 -0
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_skyreels_image2video.py +8 -8
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +8 -8
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_framepack.py +1114 -0
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_image2video.py +71 -15
- diffusers/pipelines/hunyuan_video/pipeline_output.py +19 -0
- diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +8 -8
- diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +10 -8
- diffusers/pipelines/kandinsky/pipeline_kandinsky.py +6 -6
- diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +34 -34
- diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +19 -26
- diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +7 -7
- diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +11 -11
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +6 -6
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +35 -35
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +6 -6
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +17 -39
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +17 -45
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +7 -7
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +10 -10
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +10 -10
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +7 -7
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +17 -38
- diffusers/pipelines/kolors/pipeline_kolors.py +10 -10
- diffusers/pipelines/kolors/pipeline_kolors_img2img.py +12 -12
- diffusers/pipelines/kolors/text_encoder.py +3 -3
- diffusers/pipelines/kolors/tokenizer.py +1 -1
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +2 -2
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +2 -2
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +1 -1
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion_superresolution.py +3 -3
- diffusers/pipelines/latte/pipeline_latte.py +12 -12
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +13 -13
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +17 -16
- diffusers/pipelines/ltx/__init__.py +4 -0
- diffusers/pipelines/ltx/modeling_latent_upsampler.py +188 -0
- diffusers/pipelines/ltx/pipeline_ltx.py +51 -6
- diffusers/pipelines/ltx/pipeline_ltx_condition.py +107 -29
- diffusers/pipelines/ltx/pipeline_ltx_image2video.py +50 -6
- diffusers/pipelines/ltx/pipeline_ltx_latent_upsample.py +277 -0
- diffusers/pipelines/lumina/pipeline_lumina.py +13 -13
- diffusers/pipelines/lumina2/pipeline_lumina2.py +10 -10
- diffusers/pipelines/marigold/marigold_image_processing.py +2 -2
- diffusers/pipelines/mochi/pipeline_mochi.py +6 -6
- diffusers/pipelines/musicldm/pipeline_musicldm.py +16 -13
- diffusers/pipelines/omnigen/pipeline_omnigen.py +13 -11
- diffusers/pipelines/omnigen/processor_omnigen.py +8 -3
- diffusers/pipelines/onnx_utils.py +15 -2
- diffusers/pipelines/pag/pag_utils.py +2 -2
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +12 -8
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +7 -7
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +10 -6
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +14 -14
- diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +8 -8
- diffusers/pipelines/pag/pipeline_pag_kolors.py +10 -10
- diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +11 -11
- diffusers/pipelines/pag/pipeline_pag_sana.py +18 -12
- diffusers/pipelines/pag/pipeline_pag_sd.py +8 -8
- diffusers/pipelines/pag/pipeline_pag_sd_3.py +7 -7
- diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py +7 -7
- diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +6 -6
- diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +5 -5
- diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py +8 -8
- diffusers/pipelines/pag/pipeline_pag_sd_xl.py +16 -15
- diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +18 -17
- diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +12 -12
- diffusers/pipelines/paint_by_example/image_encoder.py +1 -1
- diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +8 -7
- diffusers/pipelines/pia/pipeline_pia.py +8 -6
- diffusers/pipelines/pipeline_flax_utils.py +3 -4
- diffusers/pipelines/pipeline_loading_utils.py +89 -13
- diffusers/pipelines/pipeline_utils.py +105 -33
- diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +11 -11
- diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +11 -11
- diffusers/pipelines/sana/__init__.py +4 -0
- diffusers/pipelines/sana/pipeline_sana.py +23 -21
- diffusers/pipelines/sana/pipeline_sana_controlnet.py +1106 -0
- diffusers/pipelines/sana/pipeline_sana_sprint.py +23 -19
- diffusers/pipelines/sana/pipeline_sana_sprint_img2img.py +981 -0
- diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +7 -6
- diffusers/pipelines/shap_e/camera.py +1 -1
- diffusers/pipelines/shap_e/pipeline_shap_e.py +1 -1
- diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +1 -1
- diffusers/pipelines/shap_e/renderer.py +3 -3
- diffusers/pipelines/stable_audio/modeling_stable_audio.py +1 -1
- diffusers/pipelines/stable_audio/pipeline_stable_audio.py +5 -5
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +8 -8
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +13 -13
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +9 -9
- diffusers/pipelines/stable_diffusion/__init__.py +0 -7
- diffusers/pipelines/stable_diffusion/clip_image_project_model.py +1 -1
- diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +11 -4
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_img2img.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_inpaint.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +10 -10
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py +10 -10
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +10 -10
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +9 -9
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +8 -8
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +4 -4
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +7 -7
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +5 -5
- diffusers/pipelines/stable_diffusion/safety_checker.py +1 -1
- diffusers/pipelines/stable_diffusion/safety_checker_flax.py +1 -1
- diffusers/pipelines/stable_diffusion/stable_unclip_image_normalizer.py +1 -1
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +7 -7
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +7 -7
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +7 -7
- diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +12 -8
- diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +15 -9
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +11 -9
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +11 -9
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +18 -12
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +11 -8
- diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +11 -8
- diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +15 -12
- diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +8 -6
- diffusers/pipelines/stable_diffusion_safe/safety_checker.py +1 -1
- diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +15 -11
- diffusers/pipelines/stable_diffusion_xl/pipeline_flax_stable_diffusion_xl.py +1 -1
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +16 -15
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +18 -17
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +12 -12
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +16 -15
- diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +3 -3
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +12 -12
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +18 -17
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +12 -7
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +12 -7
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +15 -13
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +24 -21
- diffusers/pipelines/unclip/pipeline_unclip.py +4 -3
- diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +4 -3
- diffusers/pipelines/unclip/text_proj.py +2 -2
- diffusers/pipelines/unidiffuser/modeling_text_decoder.py +2 -2
- diffusers/pipelines/unidiffuser/modeling_uvit.py +1 -1
- diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +8 -7
- diffusers/pipelines/visualcloze/__init__.py +52 -0
- diffusers/pipelines/visualcloze/pipeline_visualcloze_combined.py +444 -0
- diffusers/pipelines/visualcloze/pipeline_visualcloze_generation.py +952 -0
- diffusers/pipelines/visualcloze/visualcloze_utils.py +251 -0
- diffusers/pipelines/wan/__init__.py +2 -0
- diffusers/pipelines/wan/pipeline_wan.py +17 -12
- diffusers/pipelines/wan/pipeline_wan_i2v.py +42 -20
- diffusers/pipelines/wan/pipeline_wan_vace.py +976 -0
- diffusers/pipelines/wan/pipeline_wan_video2video.py +18 -18
- diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py +1 -1
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_diffnext.py +1 -1
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +1 -1
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +8 -8
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_combined.py +16 -15
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +6 -6
- diffusers/quantizers/__init__.py +179 -1
- diffusers/quantizers/base.py +6 -1
- diffusers/quantizers/bitsandbytes/bnb_quantizer.py +4 -0
- diffusers/quantizers/bitsandbytes/utils.py +10 -7
- diffusers/quantizers/gguf/gguf_quantizer.py +13 -4
- diffusers/quantizers/gguf/utils.py +16 -13
- diffusers/quantizers/quantization_config.py +18 -16
- diffusers/quantizers/quanto/quanto_quantizer.py +4 -0
- diffusers/quantizers/torchao/torchao_quantizer.py +5 -1
- diffusers/schedulers/__init__.py +3 -1
- diffusers/schedulers/deprecated/scheduling_karras_ve.py +4 -3
- diffusers/schedulers/deprecated/scheduling_sde_vp.py +1 -1
- diffusers/schedulers/scheduling_consistency_models.py +1 -1
- diffusers/schedulers/scheduling_cosine_dpmsolver_multistep.py +10 -5
- diffusers/schedulers/scheduling_ddim.py +8 -8
- diffusers/schedulers/scheduling_ddim_cogvideox.py +5 -5
- diffusers/schedulers/scheduling_ddim_flax.py +6 -6
- diffusers/schedulers/scheduling_ddim_inverse.py +6 -6
- diffusers/schedulers/scheduling_ddim_parallel.py +22 -22
- diffusers/schedulers/scheduling_ddpm.py +9 -9
- diffusers/schedulers/scheduling_ddpm_flax.py +7 -7
- diffusers/schedulers/scheduling_ddpm_parallel.py +18 -18
- diffusers/schedulers/scheduling_ddpm_wuerstchen.py +2 -2
- diffusers/schedulers/scheduling_deis_multistep.py +8 -8
- diffusers/schedulers/scheduling_dpm_cogvideox.py +5 -5
- diffusers/schedulers/scheduling_dpmsolver_multistep.py +12 -12
- diffusers/schedulers/scheduling_dpmsolver_multistep_flax.py +22 -20
- diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +11 -11
- diffusers/schedulers/scheduling_dpmsolver_sde.py +2 -2
- diffusers/schedulers/scheduling_dpmsolver_singlestep.py +13 -13
- diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py +13 -8
- diffusers/schedulers/scheduling_edm_euler.py +20 -11
- diffusers/schedulers/scheduling_euler_ancestral_discrete.py +3 -3
- diffusers/schedulers/scheduling_euler_discrete.py +3 -3
- diffusers/schedulers/scheduling_euler_discrete_flax.py +3 -3
- diffusers/schedulers/scheduling_flow_match_euler_discrete.py +20 -5
- diffusers/schedulers/scheduling_flow_match_heun_discrete.py +1 -1
- diffusers/schedulers/scheduling_flow_match_lcm.py +561 -0
- diffusers/schedulers/scheduling_heun_discrete.py +2 -2
- diffusers/schedulers/scheduling_ipndm.py +2 -2
- diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +2 -2
- diffusers/schedulers/scheduling_k_dpm_2_discrete.py +2 -2
- diffusers/schedulers/scheduling_karras_ve_flax.py +5 -5
- diffusers/schedulers/scheduling_lcm.py +3 -3
- diffusers/schedulers/scheduling_lms_discrete.py +2 -2
- diffusers/schedulers/scheduling_lms_discrete_flax.py +1 -1
- diffusers/schedulers/scheduling_pndm.py +4 -4
- diffusers/schedulers/scheduling_pndm_flax.py +4 -4
- diffusers/schedulers/scheduling_repaint.py +9 -9
- diffusers/schedulers/scheduling_sasolver.py +15 -15
- diffusers/schedulers/scheduling_scm.py +1 -1
- diffusers/schedulers/scheduling_sde_ve.py +1 -1
- diffusers/schedulers/scheduling_sde_ve_flax.py +2 -2
- diffusers/schedulers/scheduling_tcd.py +3 -3
- diffusers/schedulers/scheduling_unclip.py +5 -5
- diffusers/schedulers/scheduling_unipc_multistep.py +11 -11
- diffusers/schedulers/scheduling_utils.py +1 -1
- diffusers/schedulers/scheduling_utils_flax.py +1 -1
- diffusers/schedulers/scheduling_vq_diffusion.py +1 -1
- diffusers/training_utils.py +13 -5
- diffusers/utils/__init__.py +5 -0
- diffusers/utils/accelerate_utils.py +1 -1
- diffusers/utils/doc_utils.py +1 -1
- diffusers/utils/dummy_pt_objects.py +120 -0
- diffusers/utils/dummy_torch_and_transformers_objects.py +225 -0
- diffusers/utils/dynamic_modules_utils.py +21 -3
- diffusers/utils/export_utils.py +1 -1
- diffusers/utils/import_utils.py +81 -18
- diffusers/utils/logging.py +1 -1
- diffusers/utils/outputs.py +2 -1
- diffusers/utils/peft_utils.py +91 -8
- diffusers/utils/state_dict_utils.py +20 -3
- diffusers/utils/testing_utils.py +59 -7
- diffusers/utils/torch_utils.py +25 -5
- diffusers/video_processor.py +2 -2
- {diffusers-0.33.0.dist-info → diffusers-0.34.0.dist-info}/METADATA +3 -3
- diffusers-0.34.0.dist-info/RECORD +639 -0
- diffusers-0.33.0.dist-info/RECORD +0 -608
- {diffusers-0.33.0.dist-info → diffusers-0.34.0.dist-info}/LICENSE +0 -0
- {diffusers-0.33.0.dist-info → diffusers-0.34.0.dist-info}/WHEEL +0 -0
- {diffusers-0.33.0.dist-info → diffusers-0.34.0.dist-info}/entry_points.txt +0 -0
- {diffusers-0.33.0.dist-info → diffusers-0.34.0.dist-info}/top_level.txt +0 -0
@@ -11,7 +11,7 @@ from ...pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyCh
|
|
11
11
|
from ...schedulers import KarrasDiffusionSchedulers
|
12
12
|
from ...utils import deprecate, is_torch_xla_available, logging
|
13
13
|
from ...utils.torch_utils import randn_tensor
|
14
|
-
from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
14
|
+
from ..pipeline_utils import DeprecatedPipelineMixin, DiffusionPipeline, StableDiffusionMixin
|
15
15
|
from .pipeline_output import SemanticStableDiffusionPipelineOutput
|
16
16
|
|
17
17
|
|
@@ -25,7 +25,8 @@ else:
|
|
25
25
|
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
26
26
|
|
27
27
|
|
28
|
-
class SemanticStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin):
|
28
|
+
class SemanticStableDiffusionPipeline(DeprecatedPipelineMixin, DiffusionPipeline, StableDiffusionMixin):
|
29
|
+
_last_supported_version = "0.33.1"
|
29
30
|
r"""
|
30
31
|
Pipeline for text-to-image generation using Stable Diffusion with latent editing.
|
31
32
|
|
@@ -129,7 +130,7 @@ class SemanticStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin):
|
|
129
130
|
def prepare_extra_step_kwargs(self, generator, eta):
|
130
131
|
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
131
132
|
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
132
|
-
# eta corresponds to η in DDIM paper: https://
|
133
|
+
# eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
|
133
134
|
# and should be between [0, 1]
|
134
135
|
|
135
136
|
accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
|
@@ -270,8 +271,8 @@ class SemanticStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin):
|
|
270
271
|
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
271
272
|
The number of images to generate per prompt.
|
272
273
|
eta (`float`, *optional*, defaults to 0.0):
|
273
|
-
Corresponds to parameter eta (η) from the [DDIM](https://
|
274
|
-
to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
|
274
|
+
Corresponds to parameter eta (η) from the [DDIM](https://huggingface.co/papers/2010.02502) paper. Only
|
275
|
+
applies to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
|
275
276
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
276
277
|
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
|
277
278
|
generation deterministic.
|
@@ -451,7 +452,7 @@ class SemanticStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin):
|
|
451
452
|
edit_concepts = edit_concepts.view(bs_embed_edit * num_images_per_prompt, seq_len_edit, -1)
|
452
453
|
|
453
454
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
454
|
-
# of the Imagen paper: https://
|
455
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
455
456
|
# corresponds to doing no classifier free guidance.
|
456
457
|
do_classifier_free_guidance = guidance_scale > 1.0
|
457
458
|
# get unconditional embeddings for classifier free guidance
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 Open AI and The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 Open AI and The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 Open AI and The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 Open AI and The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -54,7 +54,7 @@ def posenc_nerf(x: torch.Tensor, min_deg: int = 0, max_deg: int = 15) -> torch.T
|
|
54
54
|
"""
|
55
55
|
Concatenate x and its positional encodings, following NeRF.
|
56
56
|
|
57
|
-
Reference: https://
|
57
|
+
Reference: https://huggingface.co/papers/2210.04628
|
58
58
|
"""
|
59
59
|
if min_deg == max_deg:
|
60
60
|
return x
|
@@ -1038,7 +1038,7 @@ class ShapERenderer(ModelMixin, ConfigMixin):
|
|
1038
1038
|
textures = _convert_srgb_to_linear(textures)
|
1039
1039
|
textures = textures.float()
|
1040
1040
|
|
1041
|
-
# 3.3
|
1041
|
+
# 3.3 augment the mesh with texture data
|
1042
1042
|
assert len(textures.shape) == 3 and textures.shape[-1] == len(texture_channels), (
|
1043
1043
|
f"expected [meta_batch x inner_batch x texture_channels] field results, but got {textures.shape}"
|
1044
1044
|
)
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 Stability AI and The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 Stability AI and The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -306,7 +306,7 @@ class StableAudioPipeline(DiffusionPipeline):
|
|
306
306
|
def prepare_extra_step_kwargs(self, generator, eta):
|
307
307
|
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
308
308
|
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
309
|
-
# eta corresponds to η in DDIM paper: https://
|
309
|
+
# eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
|
310
310
|
# and should be between [0, 1]
|
311
311
|
|
312
312
|
accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
|
@@ -526,8 +526,8 @@ class StableAudioPipeline(DiffusionPipeline):
|
|
526
526
|
num_waveforms_per_prompt (`int`, *optional*, defaults to 1):
|
527
527
|
The number of waveforms to generate per prompt.
|
528
528
|
eta (`float`, *optional*, defaults to 0.0):
|
529
|
-
Corresponds to parameter eta (η) from the [DDIM](https://
|
530
|
-
to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
|
529
|
+
Corresponds to parameter eta (η) from the [DDIM](https://huggingface.co/papers/2010.02502) paper. Only
|
530
|
+
applies to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
|
531
531
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
532
532
|
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
|
533
533
|
generation deterministic.
|
@@ -616,7 +616,7 @@ class StableAudioPipeline(DiffusionPipeline):
|
|
616
616
|
|
617
617
|
device = self._execution_device
|
618
618
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
619
|
-
# of the Imagen paper: https://
|
619
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
620
620
|
# corresponds to doing no classifier free guidance.
|
621
621
|
do_classifier_free_guidance = guidance_scale > 1.0
|
622
622
|
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -332,11 +332,11 @@ class StableCascadeDecoderPipeline(DiffusionPipeline):
|
|
332
332
|
The number of denoising steps. More denoising steps usually lead to a higher quality image at the
|
333
333
|
expense of slower inference.
|
334
334
|
guidance_scale (`float`, *optional*, defaults to 0.0):
|
335
|
-
Guidance scale as defined in [Classifier-Free Diffusion
|
336
|
-
`decoder_guidance_scale` is defined as `w` of
|
337
|
-
Paper](https://
|
338
|
-
`decoder_guidance_scale > 1`. Higher guidance scale encourages to generate images that are
|
339
|
-
linked to the text `prompt`, usually at the expense of lower image quality.
|
335
|
+
Guidance scale as defined in [Classifier-Free Diffusion
|
336
|
+
Guidance](https://huggingface.co/papers/2207.12598). `decoder_guidance_scale` is defined as `w` of
|
337
|
+
equation 2. of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by
|
338
|
+
setting `decoder_guidance_scale > 1`. Higher guidance scale encourages to generate images that are
|
339
|
+
closely linked to the text `prompt`, usually at the expense of lower image quality.
|
340
340
|
negative_prompt (`str` or `List[str]`, *optional*):
|
341
341
|
The prompt or prompts not to guide the image generation. Ignored when not using guidance (i.e., ignored
|
342
342
|
if `decoder_guidance_scale` is less than `1`).
|
@@ -524,9 +524,9 @@ class StableCascadeDecoderPipeline(DiffusionPipeline):
|
|
524
524
|
latents = self.vqgan.config.scale_factor * latents
|
525
525
|
images = self.vqgan.decode(latents).sample.clamp(0, 1)
|
526
526
|
if output_type == "np":
|
527
|
-
images = images.permute(0, 2, 3, 1).cpu().float().numpy() # float() as bfloat16-> numpy
|
527
|
+
images = images.permute(0, 2, 3, 1).cpu().float().numpy() # float() as bfloat16-> numpy doesn't work
|
528
528
|
elif output_type == "pil":
|
529
|
-
images = images.permute(0, 2, 3, 1).cpu().float().numpy() # float() as bfloat16-> numpy
|
529
|
+
images = images.permute(0, 2, 3, 1).cpu().float().numpy() # float() as bfloat16-> numpy doesn't work
|
530
530
|
images = self.numpy_to_pil(images)
|
531
531
|
else:
|
532
532
|
images = latents
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -125,7 +125,7 @@ class StableCascadeCombinedPipeline(DiffusionPipeline):
|
|
125
125
|
def enable_xformers_memory_efficient_attention(self, attention_op: Optional[Callable] = None):
|
126
126
|
self.decoder_pipe.enable_xformers_memory_efficient_attention(attention_op)
|
127
127
|
|
128
|
-
def enable_model_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[torch.device, str] =
|
128
|
+
def enable_model_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[torch.device, str] = None):
|
129
129
|
r"""
|
130
130
|
Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared
|
131
131
|
to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward`
|
@@ -135,7 +135,7 @@ class StableCascadeCombinedPipeline(DiffusionPipeline):
|
|
135
135
|
self.prior_pipe.enable_model_cpu_offload(gpu_id=gpu_id, device=device)
|
136
136
|
self.decoder_pipe.enable_model_cpu_offload(gpu_id=gpu_id, device=device)
|
137
137
|
|
138
|
-
def enable_sequential_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[torch.device, str] =
|
138
|
+
def enable_sequential_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[torch.device, str] = None):
|
139
139
|
r"""
|
140
140
|
Offloads all models (`unet`, `text_encoder`, `vae`, and `safety checker` state dicts) to CPU using 🤗
|
141
141
|
Accelerate, significantly reducing memory usage. Models are moved to a `torch.device('meta')` and loaded on a
|
@@ -212,11 +212,11 @@ class StableCascadeCombinedPipeline(DiffusionPipeline):
|
|
212
212
|
width (`int`, *optional*, defaults to 512):
|
213
213
|
The width in pixels of the generated image.
|
214
214
|
prior_guidance_scale (`float`, *optional*, defaults to 4.0):
|
215
|
-
Guidance scale as defined in [Classifier-Free Diffusion
|
216
|
-
`prior_guidance_scale` is defined as `w` of
|
217
|
-
Paper](https://
|
218
|
-
`prior_guidance_scale > 1`. Higher guidance scale encourages to generate images that are
|
219
|
-
to the text `prompt`, usually at the expense of lower image quality.
|
215
|
+
Guidance scale as defined in [Classifier-Free Diffusion
|
216
|
+
Guidance](https://huggingface.co/papers/2207.12598). `prior_guidance_scale` is defined as `w` of
|
217
|
+
equation 2. of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by
|
218
|
+
setting `prior_guidance_scale > 1`. Higher guidance scale encourages to generate images that are
|
219
|
+
closely linked to the text `prompt`, usually at the expense of lower image quality.
|
220
220
|
prior_num_inference_steps (`Union[int, Dict[float, int]]`, *optional*, defaults to 60):
|
221
221
|
The number of prior denoising steps. More denoising steps usually lead to a higher quality image at the
|
222
222
|
expense of slower inference. For more specific timestep spacing, you can pass customized
|
@@ -226,11 +226,11 @@ class StableCascadeCombinedPipeline(DiffusionPipeline):
|
|
226
226
|
the expense of slower inference. For more specific timestep spacing, you can pass customized
|
227
227
|
`timesteps`
|
228
228
|
decoder_guidance_scale (`float`, *optional*, defaults to 0.0):
|
229
|
-
Guidance scale as defined in [Classifier-Free Diffusion
|
230
|
-
`guidance_scale` is defined as `w` of equation 2.
|
231
|
-
Paper](https://
|
232
|
-
1`. Higher guidance scale encourages to generate images that are closely linked to
|
233
|
-
usually at the expense of lower image quality.
|
229
|
+
Guidance scale as defined in [Classifier-Free Diffusion
|
230
|
+
Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
|
231
|
+
of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
|
232
|
+
`guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
|
233
|
+
the text `prompt`, usually at the expense of lower image quality.
|
234
234
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
235
235
|
One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
|
236
236
|
to make generation deterministic.
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -409,11 +409,11 @@ class StableCascadePriorPipeline(DiffusionPipeline):
|
|
409
409
|
The number of denoising steps. More denoising steps usually lead to a higher quality image at the
|
410
410
|
expense of slower inference.
|
411
411
|
guidance_scale (`float`, *optional*, defaults to 8.0):
|
412
|
-
Guidance scale as defined in [Classifier-Free Diffusion
|
413
|
-
`decoder_guidance_scale` is defined as `w` of
|
414
|
-
Paper](https://
|
415
|
-
`decoder_guidance_scale > 1`. Higher guidance scale encourages to generate images that are
|
416
|
-
linked to the text `prompt`, usually at the expense of lower image quality.
|
412
|
+
Guidance scale as defined in [Classifier-Free Diffusion
|
413
|
+
Guidance](https://huggingface.co/papers/2207.12598). `decoder_guidance_scale` is defined as `w` of
|
414
|
+
equation 2. of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by
|
415
|
+
setting `decoder_guidance_scale > 1`. Higher guidance scale encourages to generate images that are
|
416
|
+
closely linked to the text `prompt`, usually at the expense of lower image quality.
|
417
417
|
negative_prompt (`str` or `List[str]`, *optional*):
|
418
418
|
The prompt or prompts not to guide the image generation. Ignored when not using guidance (i.e., ignored
|
419
419
|
if `decoder_guidance_scale` is less than `1`).
|
@@ -626,11 +626,11 @@ class StableCascadePriorPipeline(DiffusionPipeline):
|
|
626
626
|
self.maybe_free_model_hooks()
|
627
627
|
|
628
628
|
if output_type == "np":
|
629
|
-
latents = latents.cpu().float().numpy() # float() as bfloat16-> numpy
|
630
|
-
prompt_embeds = prompt_embeds.cpu().float().numpy() # float() as bfloat16-> numpy
|
629
|
+
latents = latents.cpu().float().numpy() # float() as bfloat16-> numpy doesn't work
|
630
|
+
prompt_embeds = prompt_embeds.cpu().float().numpy() # float() as bfloat16-> numpy doesn't work
|
631
631
|
negative_prompt_embeds = (
|
632
632
|
negative_prompt_embeds.cpu().float().numpy() if negative_prompt_embeds is not None else None
|
633
|
-
) # float() as bfloat16-> numpy
|
633
|
+
) # float() as bfloat16-> numpy doesn't work
|
634
634
|
|
635
635
|
if not return_dict:
|
636
636
|
return (
|
@@ -30,18 +30,11 @@ except OptionalDependencyNotAvailable:
|
|
30
30
|
_dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
|
31
31
|
else:
|
32
32
|
_import_structure["clip_image_project_model"] = ["CLIPImageProjection"]
|
33
|
-
_import_structure["pipeline_cycle_diffusion"] = ["CycleDiffusionPipeline"]
|
34
33
|
_import_structure["pipeline_stable_diffusion"] = ["StableDiffusionPipeline"]
|
35
|
-
_import_structure["pipeline_stable_diffusion_attend_and_excite"] = ["StableDiffusionAttendAndExcitePipeline"]
|
36
|
-
_import_structure["pipeline_stable_diffusion_gligen"] = ["StableDiffusionGLIGENPipeline"]
|
37
|
-
_import_structure["pipeline_stable_diffusion_gligen_text_image"] = ["StableDiffusionGLIGENTextImagePipeline"]
|
38
34
|
_import_structure["pipeline_stable_diffusion_img2img"] = ["StableDiffusionImg2ImgPipeline"]
|
39
35
|
_import_structure["pipeline_stable_diffusion_inpaint"] = ["StableDiffusionInpaintPipeline"]
|
40
|
-
_import_structure["pipeline_stable_diffusion_inpaint_legacy"] = ["StableDiffusionInpaintPipelineLegacy"]
|
41
36
|
_import_structure["pipeline_stable_diffusion_instruct_pix2pix"] = ["StableDiffusionInstructPix2PixPipeline"]
|
42
37
|
_import_structure["pipeline_stable_diffusion_latent_upscale"] = ["StableDiffusionLatentUpscalePipeline"]
|
43
|
-
_import_structure["pipeline_stable_diffusion_model_editing"] = ["StableDiffusionModelEditingPipeline"]
|
44
|
-
_import_structure["pipeline_stable_diffusion_paradigms"] = ["StableDiffusionParadigmsPipeline"]
|
45
38
|
_import_structure["pipeline_stable_diffusion_upscale"] = ["StableDiffusionUpscalePipeline"]
|
46
39
|
_import_structure["pipeline_stable_unclip"] = ["StableUnCLIPPipeline"]
|
47
40
|
_import_structure["pipeline_stable_unclip_img2img"] = ["StableUnCLIPImg2ImgPipeline"]
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 The GLIGEN Authors and HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -53,6 +53,7 @@ from ...schedulers import (
|
|
53
53
|
)
|
54
54
|
from ...utils import is_accelerate_available, logging
|
55
55
|
from ...utils.constants import DIFFUSERS_REQUEST_TIMEOUT
|
56
|
+
from ...utils.torch_utils import get_device
|
56
57
|
from ..latent_diffusion.pipeline_latent_diffusion import LDMBertConfig, LDMBertModel
|
57
58
|
from ..paint_by_example import PaintByExampleImageEncoder
|
58
59
|
from ..pipeline_utils import DiffusionPipeline
|
@@ -350,8 +351,14 @@ def create_vae_diffusers_config(original_config, image_size: int):
|
|
350
351
|
_ = original_config["model"]["params"]["first_stage_config"]["params"]["embed_dim"]
|
351
352
|
|
352
353
|
block_out_channels = [vae_params["ch"] * mult for mult in vae_params["ch_mult"]]
|
353
|
-
down_block_types = [
|
354
|
-
|
354
|
+
down_block_types = [
|
355
|
+
"DownEncoderBlock2D" if image_size // 2**i not in vae_params["attn_resolutions"] else "AttnDownEncoderBlock2D"
|
356
|
+
for i, _ in enumerate(block_out_channels)
|
357
|
+
]
|
358
|
+
up_block_types = [
|
359
|
+
"UpDecoderBlock2D" if image_size // 2**i not in vae_params["attn_resolutions"] else "AttnUpDecoderBlock2D"
|
360
|
+
for i, _ in enumerate(block_out_channels)
|
361
|
+
][::-1]
|
355
362
|
|
356
363
|
config = {
|
357
364
|
"sample_size": image_size,
|
@@ -1266,7 +1273,7 @@ def download_from_original_stable_diffusion_ckpt(
|
|
1266
1273
|
checkpoint = safe_load(checkpoint_path_or_dict, device="cpu")
|
1267
1274
|
else:
|
1268
1275
|
if device is None:
|
1269
|
-
device =
|
1276
|
+
device = get_device()
|
1270
1277
|
checkpoint = torch.load(checkpoint_path_or_dict, map_location=device)
|
1271
1278
|
else:
|
1272
1279
|
checkpoint = torch.load(checkpoint_path_or_dict, map_location=device)
|
@@ -1836,7 +1843,7 @@ def download_controlnet_from_original_ckpt(
|
|
1836
1843
|
checkpoint[key] = f.get_tensor(key)
|
1837
1844
|
else:
|
1838
1845
|
if device is None:
|
1839
|
-
device =
|
1846
|
+
device = get_device()
|
1840
1847
|
checkpoint = torch.load(checkpoint_path, map_location=device)
|
1841
1848
|
else:
|
1842
1849
|
checkpoint = torch.load(checkpoint_path, map_location=device)
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -294,11 +294,11 @@ class OnnxStableDiffusionPipeline(DiffusionPipeline):
|
|
294
294
|
The number of denoising steps. More denoising steps usually lead to a higher quality image at the
|
295
295
|
expense of slower inference.
|
296
296
|
guidance_scale (`float`, *optional*, defaults to 7.5):
|
297
|
-
Guidance scale as defined in [Classifier-Free Diffusion
|
298
|
-
`guidance_scale` is defined as `w` of equation 2.
|
299
|
-
Paper](https://
|
300
|
-
1`. Higher guidance scale encourages to generate images that are closely linked to
|
301
|
-
usually at the expense of lower image quality.
|
297
|
+
Guidance scale as defined in [Classifier-Free Diffusion
|
298
|
+
Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
|
299
|
+
of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
|
300
|
+
`guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
|
301
|
+
the text `prompt`, usually at the expense of lower image quality.
|
302
302
|
negative_prompt (`str` or `List[str]`, *optional*):
|
303
303
|
The prompt or prompts not to guide the image generation. If not defined, one has to pass
|
304
304
|
`negative_prompt_embeds`. instead. Ignored when not using guidance (i.e., ignored if `guidance_scale`
|
@@ -306,8 +306,8 @@ class OnnxStableDiffusionPipeline(DiffusionPipeline):
|
|
306
306
|
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
307
307
|
The number of images to generate per prompt.
|
308
308
|
eta (`float`, *optional*, defaults to 0.0):
|
309
|
-
Corresponds to parameter eta (η) in the DDIM paper: https://
|
310
|
-
[`schedulers.DDIMScheduler`], will be ignored for others.
|
309
|
+
Corresponds to parameter eta (η) in the DDIM paper: https://huggingface.co/papers/2010.02502. Only
|
310
|
+
applies to [`schedulers.DDIMScheduler`], will be ignored for others.
|
311
311
|
generator (`np.random.RandomState`, *optional*):
|
312
312
|
One or a list of [numpy generator(s)](TODO) to make generation deterministic.
|
313
313
|
latents (`np.ndarray`, *optional*):
|
@@ -359,7 +359,7 @@ class OnnxStableDiffusionPipeline(DiffusionPipeline):
|
|
359
359
|
generator = np.random
|
360
360
|
|
361
361
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
362
|
-
# of the Imagen paper: https://
|
362
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
363
363
|
# corresponds to doing no classifier free guidance.
|
364
364
|
do_classifier_free_guidance = guidance_scale > 1.0
|
365
365
|
|
@@ -387,7 +387,7 @@ class OnnxStableDiffusionPipeline(DiffusionPipeline):
|
|
387
387
|
|
388
388
|
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
389
389
|
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
390
|
-
# eta corresponds to η in DDIM paper: https://
|
390
|
+
# eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
|
391
391
|
# and should be between [0, 1]
|
392
392
|
accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
|
393
393
|
extra_step_kwargs = {}
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -348,19 +348,19 @@ class OnnxStableDiffusionImg2ImgPipeline(DiffusionPipeline):
|
|
348
348
|
The number of denoising steps. More denoising steps usually lead to a higher quality image at the
|
349
349
|
expense of slower inference. This parameter will be modulated by `strength`.
|
350
350
|
guidance_scale (`float`, *optional*, defaults to 7.5):
|
351
|
-
Guidance scale as defined in [Classifier-Free Diffusion
|
352
|
-
`guidance_scale` is defined as `w` of equation 2.
|
353
|
-
Paper](https://
|
354
|
-
1`. Higher guidance scale encourages to generate images that are closely linked to
|
355
|
-
usually at the expense of lower image quality.
|
351
|
+
Guidance scale as defined in [Classifier-Free Diffusion
|
352
|
+
Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
|
353
|
+
of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
|
354
|
+
`guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
|
355
|
+
the text `prompt`, usually at the expense of lower image quality.
|
356
356
|
negative_prompt (`str` or `List[str]`, *optional*):
|
357
357
|
The prompt or prompts not to guide the image generation. Ignored when not using guidance (i.e., ignored
|
358
358
|
if `guidance_scale` is less than `1`).
|
359
359
|
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
360
360
|
The number of images to generate per prompt.
|
361
361
|
eta (`float`, *optional*, defaults to 0.0):
|
362
|
-
Corresponds to parameter eta (η) in the DDIM paper: https://
|
363
|
-
[`schedulers.DDIMScheduler`], will be ignored for others.
|
362
|
+
Corresponds to parameter eta (η) in the DDIM paper: https://huggingface.co/papers/2010.02502. Only
|
363
|
+
applies to [`schedulers.DDIMScheduler`], will be ignored for others.
|
364
364
|
generator (`np.random.RandomState`, *optional*):
|
365
365
|
A np.random.RandomState to make generation deterministic.
|
366
366
|
prompt_embeds (`np.ndarray`, *optional*):
|
@@ -414,7 +414,7 @@ class OnnxStableDiffusionImg2ImgPipeline(DiffusionPipeline):
|
|
414
414
|
image = preprocess(image).cpu().numpy()
|
415
415
|
|
416
416
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
417
|
-
# of the Imagen paper: https://
|
417
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
418
418
|
# corresponds to doing no classifier free guidance.
|
419
419
|
do_classifier_free_guidance = guidance_scale > 1.0
|
420
420
|
|
@@ -470,7 +470,7 @@ class OnnxStableDiffusionImg2ImgPipeline(DiffusionPipeline):
|
|
470
470
|
|
471
471
|
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
472
472
|
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
473
|
-
# eta corresponds to η in DDIM paper: https://
|
473
|
+
# eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
|
474
474
|
# and should be between [0, 1]
|
475
475
|
accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
|
476
476
|
extra_step_kwargs = {}
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -360,19 +360,19 @@ class OnnxStableDiffusionInpaintPipeline(DiffusionPipeline):
|
|
360
360
|
The number of denoising steps. More denoising steps usually lead to a higher quality image at the
|
361
361
|
expense of slower inference.
|
362
362
|
guidance_scale (`float`, *optional*, defaults to 7.5):
|
363
|
-
Guidance scale as defined in [Classifier-Free Diffusion
|
364
|
-
`guidance_scale` is defined as `w` of equation 2.
|
365
|
-
Paper](https://
|
366
|
-
1`. Higher guidance scale encourages to generate images that are closely linked to
|
367
|
-
usually at the expense of lower image quality.
|
363
|
+
Guidance scale as defined in [Classifier-Free Diffusion
|
364
|
+
Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
|
365
|
+
of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
|
366
|
+
`guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
|
367
|
+
the text `prompt`, usually at the expense of lower image quality.
|
368
368
|
negative_prompt (`str` or `List[str]`, *optional*):
|
369
369
|
The prompt or prompts not to guide the image generation. Ignored when not using guidance (i.e., ignored
|
370
370
|
if `guidance_scale` is less than `1`).
|
371
371
|
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
372
372
|
The number of images to generate per prompt.
|
373
373
|
eta (`float`, *optional*, defaults to 0.0):
|
374
|
-
Corresponds to parameter eta (η) in the DDIM paper: https://
|
375
|
-
[`schedulers.DDIMScheduler`], will be ignored for others.
|
374
|
+
Corresponds to parameter eta (η) in the DDIM paper: https://huggingface.co/papers/2010.02502. Only
|
375
|
+
applies to [`schedulers.DDIMScheduler`], will be ignored for others.
|
376
376
|
generator (`np.random.RandomState`, *optional*):
|
377
377
|
A np.random.RandomState to make generation deterministic.
|
378
378
|
latents (`np.ndarray`, *optional*):
|
@@ -427,7 +427,7 @@ class OnnxStableDiffusionInpaintPipeline(DiffusionPipeline):
|
|
427
427
|
self.scheduler.set_timesteps(num_inference_steps)
|
428
428
|
|
429
429
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
430
|
-
# of the Imagen paper: https://
|
430
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
431
431
|
# corresponds to doing no classifier free guidance.
|
432
432
|
do_classifier_free_guidance = guidance_scale > 1.0
|
433
433
|
|
@@ -487,7 +487,7 @@ class OnnxStableDiffusionInpaintPipeline(DiffusionPipeline):
|
|
487
487
|
|
488
488
|
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
489
489
|
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
490
|
-
# eta corresponds to η in DDIM paper: https://
|
490
|
+
# eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
|
491
491
|
# and should be between [0, 1]
|
492
492
|
accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
|
493
493
|
extra_step_kwargs = {}
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -378,11 +378,11 @@ class OnnxStableDiffusionUpscalePipeline(DiffusionPipeline):
|
|
378
378
|
The number of denoising steps. More denoising steps usually lead to a higher quality image at the
|
379
379
|
expense of slower inference. This parameter will be modulated by `strength`.
|
380
380
|
guidance_scale (`float`, *optional*, defaults to 7.5):
|
381
|
-
Guidance scale as defined in [Classifier-Free Diffusion
|
382
|
-
`guidance_scale` is defined as `w` of equation 2.
|
383
|
-
Paper](https://
|
384
|
-
1`. Higher guidance scale encourages to generate images that are closely linked to
|
385
|
-
usually at the expense of lower image quality.
|
381
|
+
Guidance scale as defined in [Classifier-Free Diffusion
|
382
|
+
Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
|
383
|
+
of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
|
384
|
+
`guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
|
385
|
+
the text `prompt`, usually at the expense of lower image quality.
|
386
386
|
noise_level (`float`, defaults to 0.2):
|
387
387
|
Deteremines the amount of noise to add to the initial image before performing upscaling.
|
388
388
|
negative_prompt (`str` or `List[str]`, *optional*):
|
@@ -391,8 +391,8 @@ class OnnxStableDiffusionUpscalePipeline(DiffusionPipeline):
|
|
391
391
|
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
392
392
|
The number of images to generate per prompt.
|
393
393
|
eta (`float`, *optional*, defaults to 0.0):
|
394
|
-
Corresponds to parameter eta (η) in the DDIM paper: https://
|
395
|
-
[`schedulers.DDIMScheduler`], will be ignored for others.
|
394
|
+
Corresponds to parameter eta (η) in the DDIM paper: https://huggingface.co/papers/2010.02502. Only
|
395
|
+
applies to [`schedulers.DDIMScheduler`], will be ignored for others.
|
396
396
|
generator (`np.random.RandomState`, *optional*):
|
397
397
|
A np.random.RandomState to make generation deterministic.
|
398
398
|
latents (`torch.Tensor`, *optional*):
|
@@ -450,7 +450,7 @@ class OnnxStableDiffusionUpscalePipeline(DiffusionPipeline):
|
|
450
450
|
generator = np.random
|
451
451
|
|
452
452
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
453
|
-
# of the Imagen paper: https://
|
453
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
454
454
|
# corresponds to doing no classifier free guidance.
|
455
455
|
do_classifier_free_guidance = guidance_scale > 1.0
|
456
456
|
|