diffusers 0.33.1__py3-none-any.whl → 0.35.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diffusers/__init__.py +145 -1
- diffusers/callbacks.py +35 -0
- diffusers/commands/__init__.py +1 -1
- diffusers/commands/custom_blocks.py +134 -0
- diffusers/commands/diffusers_cli.py +3 -1
- diffusers/commands/env.py +1 -1
- diffusers/commands/fp16_safetensors.py +2 -2
- diffusers/configuration_utils.py +11 -2
- diffusers/dependency_versions_check.py +1 -1
- diffusers/dependency_versions_table.py +3 -3
- diffusers/experimental/rl/value_guided_sampling.py +1 -1
- diffusers/guiders/__init__.py +41 -0
- diffusers/guiders/adaptive_projected_guidance.py +188 -0
- diffusers/guiders/auto_guidance.py +190 -0
- diffusers/guiders/classifier_free_guidance.py +141 -0
- diffusers/guiders/classifier_free_zero_star_guidance.py +152 -0
- diffusers/guiders/frequency_decoupled_guidance.py +327 -0
- diffusers/guiders/guider_utils.py +309 -0
- diffusers/guiders/perturbed_attention_guidance.py +271 -0
- diffusers/guiders/skip_layer_guidance.py +262 -0
- diffusers/guiders/smoothed_energy_guidance.py +251 -0
- diffusers/guiders/tangential_classifier_free_guidance.py +143 -0
- diffusers/hooks/__init__.py +17 -0
- diffusers/hooks/_common.py +56 -0
- diffusers/hooks/_helpers.py +293 -0
- diffusers/hooks/faster_cache.py +9 -8
- diffusers/hooks/first_block_cache.py +259 -0
- diffusers/hooks/group_offloading.py +332 -227
- diffusers/hooks/hooks.py +58 -3
- diffusers/hooks/layer_skip.py +263 -0
- diffusers/hooks/layerwise_casting.py +5 -10
- diffusers/hooks/pyramid_attention_broadcast.py +15 -12
- diffusers/hooks/smoothed_energy_guidance_utils.py +167 -0
- diffusers/hooks/utils.py +43 -0
- diffusers/image_processor.py +7 -2
- diffusers/loaders/__init__.py +10 -0
- diffusers/loaders/ip_adapter.py +260 -18
- diffusers/loaders/lora_base.py +261 -127
- diffusers/loaders/lora_conversion_utils.py +657 -35
- diffusers/loaders/lora_pipeline.py +2778 -1246
- diffusers/loaders/peft.py +78 -112
- diffusers/loaders/single_file.py +2 -2
- diffusers/loaders/single_file_model.py +64 -15
- diffusers/loaders/single_file_utils.py +395 -7
- diffusers/loaders/textual_inversion.py +3 -2
- diffusers/loaders/transformer_flux.py +10 -11
- diffusers/loaders/transformer_sd3.py +8 -3
- diffusers/loaders/unet.py +24 -21
- diffusers/loaders/unet_loader_utils.py +6 -3
- diffusers/loaders/utils.py +1 -1
- diffusers/models/__init__.py +23 -1
- diffusers/models/activations.py +5 -5
- diffusers/models/adapter.py +2 -3
- diffusers/models/attention.py +488 -7
- diffusers/models/attention_dispatch.py +1218 -0
- diffusers/models/attention_flax.py +10 -10
- diffusers/models/attention_processor.py +113 -667
- diffusers/models/auto_model.py +49 -12
- diffusers/models/autoencoders/__init__.py +2 -0
- diffusers/models/autoencoders/autoencoder_asym_kl.py +4 -4
- diffusers/models/autoencoders/autoencoder_dc.py +17 -4
- diffusers/models/autoencoders/autoencoder_kl.py +5 -5
- diffusers/models/autoencoders/autoencoder_kl_allegro.py +4 -4
- diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +6 -6
- diffusers/models/autoencoders/autoencoder_kl_cosmos.py +1110 -0
- diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py +2 -2
- diffusers/models/autoencoders/autoencoder_kl_ltx.py +3 -3
- diffusers/models/autoencoders/autoencoder_kl_magvit.py +4 -4
- diffusers/models/autoencoders/autoencoder_kl_mochi.py +3 -3
- diffusers/models/autoencoders/autoencoder_kl_qwenimage.py +1070 -0
- diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +4 -4
- diffusers/models/autoencoders/autoencoder_kl_wan.py +626 -62
- diffusers/models/autoencoders/autoencoder_oobleck.py +1 -1
- diffusers/models/autoencoders/autoencoder_tiny.py +3 -3
- diffusers/models/autoencoders/consistency_decoder_vae.py +1 -1
- diffusers/models/autoencoders/vae.py +13 -2
- diffusers/models/autoencoders/vq_model.py +2 -2
- diffusers/models/cache_utils.py +32 -10
- diffusers/models/controlnet.py +1 -1
- diffusers/models/controlnet_flux.py +1 -1
- diffusers/models/controlnet_sd3.py +1 -1
- diffusers/models/controlnet_sparsectrl.py +1 -1
- diffusers/models/controlnets/__init__.py +1 -0
- diffusers/models/controlnets/controlnet.py +3 -3
- diffusers/models/controlnets/controlnet_flax.py +1 -1
- diffusers/models/controlnets/controlnet_flux.py +21 -20
- diffusers/models/controlnets/controlnet_hunyuan.py +2 -2
- diffusers/models/controlnets/controlnet_sana.py +290 -0
- diffusers/models/controlnets/controlnet_sd3.py +1 -1
- diffusers/models/controlnets/controlnet_sparsectrl.py +2 -2
- diffusers/models/controlnets/controlnet_union.py +5 -5
- diffusers/models/controlnets/controlnet_xs.py +7 -7
- diffusers/models/controlnets/multicontrolnet.py +4 -5
- diffusers/models/controlnets/multicontrolnet_union.py +5 -6
- diffusers/models/downsampling.py +2 -2
- diffusers/models/embeddings.py +36 -46
- diffusers/models/embeddings_flax.py +2 -2
- diffusers/models/lora.py +3 -3
- diffusers/models/model_loading_utils.py +233 -1
- diffusers/models/modeling_flax_utils.py +1 -2
- diffusers/models/modeling_utils.py +203 -108
- diffusers/models/normalization.py +4 -4
- diffusers/models/resnet.py +2 -2
- diffusers/models/resnet_flax.py +1 -1
- diffusers/models/transformers/__init__.py +7 -0
- diffusers/models/transformers/auraflow_transformer_2d.py +70 -24
- diffusers/models/transformers/cogvideox_transformer_3d.py +1 -1
- diffusers/models/transformers/consisid_transformer_3d.py +1 -1
- diffusers/models/transformers/dit_transformer_2d.py +2 -2
- diffusers/models/transformers/dual_transformer_2d.py +1 -1
- diffusers/models/transformers/hunyuan_transformer_2d.py +2 -2
- diffusers/models/transformers/latte_transformer_3d.py +4 -5
- diffusers/models/transformers/lumina_nextdit2d.py +2 -2
- diffusers/models/transformers/pixart_transformer_2d.py +3 -3
- diffusers/models/transformers/prior_transformer.py +1 -1
- diffusers/models/transformers/sana_transformer.py +8 -3
- diffusers/models/transformers/stable_audio_transformer.py +5 -9
- diffusers/models/transformers/t5_film_transformer.py +3 -3
- diffusers/models/transformers/transformer_2d.py +1 -1
- diffusers/models/transformers/transformer_allegro.py +1 -1
- diffusers/models/transformers/transformer_chroma.py +641 -0
- diffusers/models/transformers/transformer_cogview3plus.py +5 -10
- diffusers/models/transformers/transformer_cogview4.py +353 -27
- diffusers/models/transformers/transformer_cosmos.py +586 -0
- diffusers/models/transformers/transformer_flux.py +376 -138
- diffusers/models/transformers/transformer_hidream_image.py +942 -0
- diffusers/models/transformers/transformer_hunyuan_video.py +12 -8
- diffusers/models/transformers/transformer_hunyuan_video_framepack.py +416 -0
- diffusers/models/transformers/transformer_ltx.py +105 -24
- diffusers/models/transformers/transformer_lumina2.py +1 -1
- diffusers/models/transformers/transformer_mochi.py +1 -1
- diffusers/models/transformers/transformer_omnigen.py +2 -2
- diffusers/models/transformers/transformer_qwenimage.py +645 -0
- diffusers/models/transformers/transformer_sd3.py +7 -7
- diffusers/models/transformers/transformer_skyreels_v2.py +607 -0
- diffusers/models/transformers/transformer_temporal.py +1 -1
- diffusers/models/transformers/transformer_wan.py +316 -87
- diffusers/models/transformers/transformer_wan_vace.py +387 -0
- diffusers/models/unets/unet_1d.py +1 -1
- diffusers/models/unets/unet_1d_blocks.py +1 -1
- diffusers/models/unets/unet_2d.py +1 -1
- diffusers/models/unets/unet_2d_blocks.py +1 -1
- diffusers/models/unets/unet_2d_blocks_flax.py +8 -7
- diffusers/models/unets/unet_2d_condition.py +4 -3
- diffusers/models/unets/unet_2d_condition_flax.py +2 -2
- diffusers/models/unets/unet_3d_blocks.py +1 -1
- diffusers/models/unets/unet_3d_condition.py +3 -3
- diffusers/models/unets/unet_i2vgen_xl.py +3 -3
- diffusers/models/unets/unet_kandinsky3.py +1 -1
- diffusers/models/unets/unet_motion_model.py +2 -2
- diffusers/models/unets/unet_stable_cascade.py +1 -1
- diffusers/models/upsampling.py +2 -2
- diffusers/models/vae_flax.py +2 -2
- diffusers/models/vq_model.py +1 -1
- diffusers/modular_pipelines/__init__.py +83 -0
- diffusers/modular_pipelines/components_manager.py +1068 -0
- diffusers/modular_pipelines/flux/__init__.py +66 -0
- diffusers/modular_pipelines/flux/before_denoise.py +689 -0
- diffusers/modular_pipelines/flux/decoders.py +109 -0
- diffusers/modular_pipelines/flux/denoise.py +227 -0
- diffusers/modular_pipelines/flux/encoders.py +412 -0
- diffusers/modular_pipelines/flux/modular_blocks.py +181 -0
- diffusers/modular_pipelines/flux/modular_pipeline.py +59 -0
- diffusers/modular_pipelines/modular_pipeline.py +2446 -0
- diffusers/modular_pipelines/modular_pipeline_utils.py +672 -0
- diffusers/modular_pipelines/node_utils.py +665 -0
- diffusers/modular_pipelines/stable_diffusion_xl/__init__.py +77 -0
- diffusers/modular_pipelines/stable_diffusion_xl/before_denoise.py +1874 -0
- diffusers/modular_pipelines/stable_diffusion_xl/decoders.py +208 -0
- diffusers/modular_pipelines/stable_diffusion_xl/denoise.py +771 -0
- diffusers/modular_pipelines/stable_diffusion_xl/encoders.py +887 -0
- diffusers/modular_pipelines/stable_diffusion_xl/modular_blocks.py +380 -0
- diffusers/modular_pipelines/stable_diffusion_xl/modular_pipeline.py +365 -0
- diffusers/modular_pipelines/wan/__init__.py +66 -0
- diffusers/modular_pipelines/wan/before_denoise.py +365 -0
- diffusers/modular_pipelines/wan/decoders.py +105 -0
- diffusers/modular_pipelines/wan/denoise.py +261 -0
- diffusers/modular_pipelines/wan/encoders.py +242 -0
- diffusers/modular_pipelines/wan/modular_blocks.py +144 -0
- diffusers/modular_pipelines/wan/modular_pipeline.py +90 -0
- diffusers/pipelines/__init__.py +68 -6
- diffusers/pipelines/allegro/pipeline_allegro.py +11 -11
- diffusers/pipelines/amused/pipeline_amused.py +7 -6
- diffusers/pipelines/amused/pipeline_amused_img2img.py +6 -5
- diffusers/pipelines/amused/pipeline_amused_inpaint.py +6 -5
- diffusers/pipelines/animatediff/pipeline_animatediff.py +6 -6
- diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +6 -6
- diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +16 -15
- diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +6 -6
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +5 -5
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +5 -5
- diffusers/pipelines/audioldm/pipeline_audioldm.py +8 -7
- diffusers/pipelines/audioldm2/modeling_audioldm2.py +1 -1
- diffusers/pipelines/audioldm2/pipeline_audioldm2.py +22 -13
- diffusers/pipelines/aura_flow/pipeline_aura_flow.py +48 -11
- diffusers/pipelines/auto_pipeline.py +23 -20
- diffusers/pipelines/blip_diffusion/modeling_blip2.py +1 -1
- diffusers/pipelines/blip_diffusion/modeling_ctx_clip.py +2 -2
- diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +11 -10
- diffusers/pipelines/chroma/__init__.py +49 -0
- diffusers/pipelines/chroma/pipeline_chroma.py +949 -0
- diffusers/pipelines/chroma/pipeline_chroma_img2img.py +1034 -0
- diffusers/pipelines/chroma/pipeline_output.py +21 -0
- diffusers/pipelines/cogvideo/pipeline_cogvideox.py +17 -16
- diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +17 -16
- diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +18 -17
- diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +17 -16
- diffusers/pipelines/cogview3/pipeline_cogview3plus.py +9 -9
- diffusers/pipelines/cogview4/pipeline_cogview4.py +23 -22
- diffusers/pipelines/cogview4/pipeline_cogview4_control.py +7 -7
- diffusers/pipelines/consisid/consisid_utils.py +2 -2
- diffusers/pipelines/consisid/pipeline_consisid.py +8 -8
- diffusers/pipelines/consistency_models/pipeline_consistency_models.py +1 -1
- diffusers/pipelines/controlnet/pipeline_controlnet.py +7 -7
- diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +11 -10
- diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +7 -7
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +7 -7
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +14 -14
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +10 -6
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +13 -13
- diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +226 -107
- diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +12 -8
- diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +207 -105
- diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +1 -1
- diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +8 -8
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +7 -7
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +7 -7
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +12 -10
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +9 -7
- diffusers/pipelines/cosmos/__init__.py +54 -0
- diffusers/pipelines/cosmos/pipeline_cosmos2_text2image.py +673 -0
- diffusers/pipelines/cosmos/pipeline_cosmos2_video2world.py +792 -0
- diffusers/pipelines/cosmos/pipeline_cosmos_text2world.py +664 -0
- diffusers/pipelines/cosmos/pipeline_cosmos_video2world.py +826 -0
- diffusers/pipelines/cosmos/pipeline_output.py +40 -0
- diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py +5 -4
- diffusers/pipelines/ddim/pipeline_ddim.py +4 -4
- diffusers/pipelines/ddpm/pipeline_ddpm.py +1 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +10 -10
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +8 -8
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +5 -5
- diffusers/pipelines/deprecated/audio_diffusion/mel.py +1 -1
- diffusers/pipelines/deprecated/audio_diffusion/pipeline_audio_diffusion.py +3 -3
- diffusers/pipelines/deprecated/latent_diffusion_uncond/pipeline_latent_diffusion_uncond.py +1 -1
- diffusers/pipelines/deprecated/pndm/pipeline_pndm.py +2 -2
- diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +4 -3
- diffusers/pipelines/deprecated/score_sde_ve/pipeline_score_sde_ve.py +1 -1
- diffusers/pipelines/deprecated/spectrogram_diffusion/continuous_encoder.py +1 -1
- diffusers/pipelines/deprecated/spectrogram_diffusion/midi_utils.py +1 -1
- diffusers/pipelines/deprecated/spectrogram_diffusion/notes_encoder.py +1 -1
- diffusers/pipelines/deprecated/spectrogram_diffusion/pipeline_spectrogram_diffusion.py +1 -1
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +8 -8
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_onnx_stable_diffusion_inpaint_legacy.py +9 -9
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +10 -10
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +10 -8
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +5 -5
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +18 -18
- diffusers/pipelines/deprecated/stochastic_karras_ve/pipeline_stochastic_karras_ve.py +1 -1
- diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +2 -2
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +6 -6
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +5 -5
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +5 -5
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +5 -5
- diffusers/pipelines/deprecated/vq_diffusion/pipeline_vq_diffusion.py +1 -1
- diffusers/pipelines/dit/pipeline_dit.py +4 -2
- diffusers/pipelines/easyanimate/pipeline_easyanimate.py +4 -4
- diffusers/pipelines/easyanimate/pipeline_easyanimate_control.py +4 -4
- diffusers/pipelines/easyanimate/pipeline_easyanimate_inpaint.py +7 -6
- diffusers/pipelines/flux/__init__.py +4 -0
- diffusers/pipelines/flux/modeling_flux.py +1 -1
- diffusers/pipelines/flux/pipeline_flux.py +37 -36
- diffusers/pipelines/flux/pipeline_flux_control.py +9 -9
- diffusers/pipelines/flux/pipeline_flux_control_img2img.py +7 -7
- diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +7 -7
- diffusers/pipelines/flux/pipeline_flux_controlnet.py +7 -7
- diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +31 -23
- diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +3 -2
- diffusers/pipelines/flux/pipeline_flux_fill.py +7 -7
- diffusers/pipelines/flux/pipeline_flux_img2img.py +40 -7
- diffusers/pipelines/flux/pipeline_flux_inpaint.py +12 -7
- diffusers/pipelines/flux/pipeline_flux_kontext.py +1134 -0
- diffusers/pipelines/flux/pipeline_flux_kontext_inpaint.py +1460 -0
- diffusers/pipelines/flux/pipeline_flux_prior_redux.py +2 -2
- diffusers/pipelines/flux/pipeline_output.py +6 -4
- diffusers/pipelines/free_init_utils.py +2 -2
- diffusers/pipelines/free_noise_utils.py +3 -3
- diffusers/pipelines/hidream_image/__init__.py +47 -0
- diffusers/pipelines/hidream_image/pipeline_hidream_image.py +1026 -0
- diffusers/pipelines/hidream_image/pipeline_output.py +35 -0
- diffusers/pipelines/hunyuan_video/__init__.py +2 -0
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_skyreels_image2video.py +8 -8
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +26 -25
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_framepack.py +1114 -0
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_image2video.py +71 -15
- diffusers/pipelines/hunyuan_video/pipeline_output.py +19 -0
- diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +8 -8
- diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +10 -8
- diffusers/pipelines/kandinsky/pipeline_kandinsky.py +6 -6
- diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +34 -34
- diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +19 -26
- diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +7 -7
- diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +11 -11
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +6 -6
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +35 -35
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +6 -6
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +17 -39
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +17 -45
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +7 -7
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +10 -10
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +10 -10
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +7 -7
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +17 -38
- diffusers/pipelines/kolors/pipeline_kolors.py +10 -10
- diffusers/pipelines/kolors/pipeline_kolors_img2img.py +12 -12
- diffusers/pipelines/kolors/text_encoder.py +3 -3
- diffusers/pipelines/kolors/tokenizer.py +1 -1
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +2 -2
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +2 -2
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +1 -1
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion_superresolution.py +3 -3
- diffusers/pipelines/latte/pipeline_latte.py +12 -12
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +13 -13
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +17 -16
- diffusers/pipelines/ltx/__init__.py +4 -0
- diffusers/pipelines/ltx/modeling_latent_upsampler.py +188 -0
- diffusers/pipelines/ltx/pipeline_ltx.py +64 -18
- diffusers/pipelines/ltx/pipeline_ltx_condition.py +117 -38
- diffusers/pipelines/ltx/pipeline_ltx_image2video.py +63 -18
- diffusers/pipelines/ltx/pipeline_ltx_latent_upsample.py +277 -0
- diffusers/pipelines/lumina/pipeline_lumina.py +13 -13
- diffusers/pipelines/lumina2/pipeline_lumina2.py +10 -10
- diffusers/pipelines/marigold/marigold_image_processing.py +2 -2
- diffusers/pipelines/mochi/pipeline_mochi.py +15 -14
- diffusers/pipelines/musicldm/pipeline_musicldm.py +16 -13
- diffusers/pipelines/omnigen/pipeline_omnigen.py +13 -11
- diffusers/pipelines/omnigen/processor_omnigen.py +8 -3
- diffusers/pipelines/onnx_utils.py +15 -2
- diffusers/pipelines/pag/pag_utils.py +2 -2
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +12 -8
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +7 -7
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +10 -6
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +14 -14
- diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +8 -8
- diffusers/pipelines/pag/pipeline_pag_kolors.py +10 -10
- diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +11 -11
- diffusers/pipelines/pag/pipeline_pag_sana.py +18 -12
- diffusers/pipelines/pag/pipeline_pag_sd.py +8 -8
- diffusers/pipelines/pag/pipeline_pag_sd_3.py +7 -7
- diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py +7 -7
- diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +6 -6
- diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +5 -5
- diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py +8 -8
- diffusers/pipelines/pag/pipeline_pag_sd_xl.py +16 -15
- diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +18 -17
- diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +12 -12
- diffusers/pipelines/paint_by_example/image_encoder.py +1 -1
- diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +8 -7
- diffusers/pipelines/pia/pipeline_pia.py +8 -6
- diffusers/pipelines/pipeline_flax_utils.py +5 -6
- diffusers/pipelines/pipeline_loading_utils.py +113 -15
- diffusers/pipelines/pipeline_utils.py +127 -48
- diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +14 -12
- diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +31 -11
- diffusers/pipelines/qwenimage/__init__.py +55 -0
- diffusers/pipelines/qwenimage/pipeline_output.py +21 -0
- diffusers/pipelines/qwenimage/pipeline_qwenimage.py +726 -0
- diffusers/pipelines/qwenimage/pipeline_qwenimage_edit.py +882 -0
- diffusers/pipelines/qwenimage/pipeline_qwenimage_img2img.py +829 -0
- diffusers/pipelines/qwenimage/pipeline_qwenimage_inpaint.py +1015 -0
- diffusers/pipelines/sana/__init__.py +4 -0
- diffusers/pipelines/sana/pipeline_sana.py +23 -21
- diffusers/pipelines/sana/pipeline_sana_controlnet.py +1106 -0
- diffusers/pipelines/sana/pipeline_sana_sprint.py +23 -19
- diffusers/pipelines/sana/pipeline_sana_sprint_img2img.py +981 -0
- diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +7 -6
- diffusers/pipelines/shap_e/camera.py +1 -1
- diffusers/pipelines/shap_e/pipeline_shap_e.py +1 -1
- diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +1 -1
- diffusers/pipelines/shap_e/renderer.py +3 -3
- diffusers/pipelines/skyreels_v2/__init__.py +59 -0
- diffusers/pipelines/skyreels_v2/pipeline_output.py +20 -0
- diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2.py +610 -0
- diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_diffusion_forcing.py +978 -0
- diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_diffusion_forcing_i2v.py +1059 -0
- diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_diffusion_forcing_v2v.py +1063 -0
- diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_i2v.py +745 -0
- diffusers/pipelines/stable_audio/modeling_stable_audio.py +1 -1
- diffusers/pipelines/stable_audio/pipeline_stable_audio.py +5 -5
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +8 -8
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +13 -13
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +9 -9
- diffusers/pipelines/stable_diffusion/__init__.py +0 -7
- diffusers/pipelines/stable_diffusion/clip_image_project_model.py +1 -1
- diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +11 -4
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_img2img.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_inpaint.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +12 -11
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py +10 -10
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +11 -11
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +10 -10
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +10 -9
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +4 -4
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +7 -7
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +5 -5
- diffusers/pipelines/stable_diffusion/safety_checker.py +1 -1
- diffusers/pipelines/stable_diffusion/safety_checker_flax.py +1 -1
- diffusers/pipelines/stable_diffusion/stable_unclip_image_normalizer.py +1 -1
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +13 -12
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +7 -7
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +7 -7
- diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +12 -8
- diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +15 -9
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +11 -9
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +11 -9
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +18 -12
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +11 -8
- diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +11 -8
- diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +15 -12
- diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +8 -6
- diffusers/pipelines/stable_diffusion_safe/safety_checker.py +1 -1
- diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +15 -11
- diffusers/pipelines/stable_diffusion_xl/pipeline_flax_stable_diffusion_xl.py +1 -1
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +16 -15
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +18 -17
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +12 -12
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +16 -15
- diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +3 -3
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +12 -12
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +18 -17
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +12 -7
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +12 -7
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +15 -13
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +24 -21
- diffusers/pipelines/unclip/pipeline_unclip.py +4 -3
- diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +4 -3
- diffusers/pipelines/unclip/text_proj.py +2 -2
- diffusers/pipelines/unidiffuser/modeling_text_decoder.py +2 -2
- diffusers/pipelines/unidiffuser/modeling_uvit.py +1 -1
- diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +8 -7
- diffusers/pipelines/visualcloze/__init__.py +52 -0
- diffusers/pipelines/visualcloze/pipeline_visualcloze_combined.py +444 -0
- diffusers/pipelines/visualcloze/pipeline_visualcloze_generation.py +952 -0
- diffusers/pipelines/visualcloze/visualcloze_utils.py +251 -0
- diffusers/pipelines/wan/__init__.py +2 -0
- diffusers/pipelines/wan/pipeline_wan.py +91 -30
- diffusers/pipelines/wan/pipeline_wan_i2v.py +145 -45
- diffusers/pipelines/wan/pipeline_wan_vace.py +975 -0
- diffusers/pipelines/wan/pipeline_wan_video2video.py +14 -16
- diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py +1 -1
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_diffnext.py +1 -1
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +1 -1
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +8 -8
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_combined.py +16 -15
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +6 -6
- diffusers/quantizers/__init__.py +3 -1
- diffusers/quantizers/base.py +17 -1
- diffusers/quantizers/bitsandbytes/bnb_quantizer.py +4 -0
- diffusers/quantizers/bitsandbytes/utils.py +10 -7
- diffusers/quantizers/gguf/gguf_quantizer.py +13 -4
- diffusers/quantizers/gguf/utils.py +108 -16
- diffusers/quantizers/pipe_quant_config.py +202 -0
- diffusers/quantizers/quantization_config.py +18 -16
- diffusers/quantizers/quanto/quanto_quantizer.py +4 -0
- diffusers/quantizers/torchao/torchao_quantizer.py +31 -1
- diffusers/schedulers/__init__.py +3 -1
- diffusers/schedulers/deprecated/scheduling_karras_ve.py +4 -3
- diffusers/schedulers/deprecated/scheduling_sde_vp.py +1 -1
- diffusers/schedulers/scheduling_consistency_models.py +1 -1
- diffusers/schedulers/scheduling_cosine_dpmsolver_multistep.py +10 -5
- diffusers/schedulers/scheduling_ddim.py +8 -8
- diffusers/schedulers/scheduling_ddim_cogvideox.py +5 -5
- diffusers/schedulers/scheduling_ddim_flax.py +6 -6
- diffusers/schedulers/scheduling_ddim_inverse.py +6 -6
- diffusers/schedulers/scheduling_ddim_parallel.py +22 -22
- diffusers/schedulers/scheduling_ddpm.py +9 -9
- diffusers/schedulers/scheduling_ddpm_flax.py +7 -7
- diffusers/schedulers/scheduling_ddpm_parallel.py +18 -18
- diffusers/schedulers/scheduling_ddpm_wuerstchen.py +2 -2
- diffusers/schedulers/scheduling_deis_multistep.py +16 -9
- diffusers/schedulers/scheduling_dpm_cogvideox.py +5 -5
- diffusers/schedulers/scheduling_dpmsolver_multistep.py +18 -12
- diffusers/schedulers/scheduling_dpmsolver_multistep_flax.py +22 -20
- diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +11 -11
- diffusers/schedulers/scheduling_dpmsolver_sde.py +2 -2
- diffusers/schedulers/scheduling_dpmsolver_singlestep.py +19 -13
- diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py +13 -8
- diffusers/schedulers/scheduling_edm_euler.py +20 -11
- diffusers/schedulers/scheduling_euler_ancestral_discrete.py +3 -3
- diffusers/schedulers/scheduling_euler_discrete.py +3 -3
- diffusers/schedulers/scheduling_euler_discrete_flax.py +3 -3
- diffusers/schedulers/scheduling_flow_match_euler_discrete.py +20 -5
- diffusers/schedulers/scheduling_flow_match_heun_discrete.py +1 -1
- diffusers/schedulers/scheduling_flow_match_lcm.py +561 -0
- diffusers/schedulers/scheduling_heun_discrete.py +2 -2
- diffusers/schedulers/scheduling_ipndm.py +2 -2
- diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +2 -2
- diffusers/schedulers/scheduling_k_dpm_2_discrete.py +2 -2
- diffusers/schedulers/scheduling_karras_ve_flax.py +5 -5
- diffusers/schedulers/scheduling_lcm.py +3 -3
- diffusers/schedulers/scheduling_lms_discrete.py +2 -2
- diffusers/schedulers/scheduling_lms_discrete_flax.py +1 -1
- diffusers/schedulers/scheduling_pndm.py +4 -4
- diffusers/schedulers/scheduling_pndm_flax.py +4 -4
- diffusers/schedulers/scheduling_repaint.py +9 -9
- diffusers/schedulers/scheduling_sasolver.py +15 -15
- diffusers/schedulers/scheduling_scm.py +1 -2
- diffusers/schedulers/scheduling_sde_ve.py +1 -1
- diffusers/schedulers/scheduling_sde_ve_flax.py +2 -2
- diffusers/schedulers/scheduling_tcd.py +3 -3
- diffusers/schedulers/scheduling_unclip.py +5 -5
- diffusers/schedulers/scheduling_unipc_multistep.py +21 -12
- diffusers/schedulers/scheduling_utils.py +3 -3
- diffusers/schedulers/scheduling_utils_flax.py +2 -2
- diffusers/schedulers/scheduling_vq_diffusion.py +1 -1
- diffusers/training_utils.py +91 -5
- diffusers/utils/__init__.py +15 -0
- diffusers/utils/accelerate_utils.py +1 -1
- diffusers/utils/constants.py +4 -0
- diffusers/utils/doc_utils.py +1 -1
- diffusers/utils/dummy_pt_objects.py +432 -0
- diffusers/utils/dummy_torch_and_transformers_objects.py +480 -0
- diffusers/utils/dynamic_modules_utils.py +85 -8
- diffusers/utils/export_utils.py +1 -1
- diffusers/utils/hub_utils.py +33 -17
- diffusers/utils/import_utils.py +151 -18
- diffusers/utils/logging.py +1 -1
- diffusers/utils/outputs.py +2 -1
- diffusers/utils/peft_utils.py +96 -10
- diffusers/utils/state_dict_utils.py +20 -3
- diffusers/utils/testing_utils.py +195 -17
- diffusers/utils/torch_utils.py +43 -5
- diffusers/video_processor.py +2 -2
- {diffusers-0.33.1.dist-info → diffusers-0.35.0.dist-info}/METADATA +72 -57
- diffusers-0.35.0.dist-info/RECORD +703 -0
- {diffusers-0.33.1.dist-info → diffusers-0.35.0.dist-info}/WHEEL +1 -1
- diffusers-0.33.1.dist-info/RECORD +0 -608
- {diffusers-0.33.1.dist-info → diffusers-0.35.0.dist-info}/LICENSE +0 -0
- {diffusers-0.33.1.dist-info → diffusers-0.35.0.dist-info}/entry_points.txt +0 -0
- {diffusers-0.33.1.dist-info → diffusers-0.35.0.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 Stability AI and The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 Stability AI and The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -306,7 +306,7 @@ class StableAudioPipeline(DiffusionPipeline):
|
|
306
306
|
def prepare_extra_step_kwargs(self, generator, eta):
|
307
307
|
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
308
308
|
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
309
|
-
# eta corresponds to η in DDIM paper: https://
|
309
|
+
# eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
|
310
310
|
# and should be between [0, 1]
|
311
311
|
|
312
312
|
accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
|
@@ -526,8 +526,8 @@ class StableAudioPipeline(DiffusionPipeline):
|
|
526
526
|
num_waveforms_per_prompt (`int`, *optional*, defaults to 1):
|
527
527
|
The number of waveforms to generate per prompt.
|
528
528
|
eta (`float`, *optional*, defaults to 0.0):
|
529
|
-
Corresponds to parameter eta (η) from the [DDIM](https://
|
530
|
-
to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
|
529
|
+
Corresponds to parameter eta (η) from the [DDIM](https://huggingface.co/papers/2010.02502) paper. Only
|
530
|
+
applies to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
|
531
531
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
532
532
|
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
|
533
533
|
generation deterministic.
|
@@ -616,7 +616,7 @@ class StableAudioPipeline(DiffusionPipeline):
|
|
616
616
|
|
617
617
|
device = self._execution_device
|
618
618
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
619
|
-
# of the Imagen paper: https://
|
619
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
620
620
|
# corresponds to doing no classifier free guidance.
|
621
621
|
do_classifier_free_guidance = guidance_scale > 1.0
|
622
622
|
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -332,11 +332,11 @@ class StableCascadeDecoderPipeline(DiffusionPipeline):
|
|
332
332
|
The number of denoising steps. More denoising steps usually lead to a higher quality image at the
|
333
333
|
expense of slower inference.
|
334
334
|
guidance_scale (`float`, *optional*, defaults to 0.0):
|
335
|
-
Guidance scale as defined in [Classifier-Free Diffusion
|
336
|
-
`decoder_guidance_scale` is defined as `w` of
|
337
|
-
Paper](https://
|
338
|
-
`decoder_guidance_scale > 1`. Higher guidance scale encourages to generate images that are
|
339
|
-
linked to the text `prompt`, usually at the expense of lower image quality.
|
335
|
+
Guidance scale as defined in [Classifier-Free Diffusion
|
336
|
+
Guidance](https://huggingface.co/papers/2207.12598). `decoder_guidance_scale` is defined as `w` of
|
337
|
+
equation 2. of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by
|
338
|
+
setting `decoder_guidance_scale > 1`. Higher guidance scale encourages to generate images that are
|
339
|
+
closely linked to the text `prompt`, usually at the expense of lower image quality.
|
340
340
|
negative_prompt (`str` or `List[str]`, *optional*):
|
341
341
|
The prompt or prompts not to guide the image generation. Ignored when not using guidance (i.e., ignored
|
342
342
|
if `decoder_guidance_scale` is less than `1`).
|
@@ -524,9 +524,9 @@ class StableCascadeDecoderPipeline(DiffusionPipeline):
|
|
524
524
|
latents = self.vqgan.config.scale_factor * latents
|
525
525
|
images = self.vqgan.decode(latents).sample.clamp(0, 1)
|
526
526
|
if output_type == "np":
|
527
|
-
images = images.permute(0, 2, 3, 1).cpu().float().numpy() # float() as bfloat16-> numpy
|
527
|
+
images = images.permute(0, 2, 3, 1).cpu().float().numpy() # float() as bfloat16-> numpy doesn't work
|
528
528
|
elif output_type == "pil":
|
529
|
-
images = images.permute(0, 2, 3, 1).cpu().float().numpy() # float() as bfloat16-> numpy
|
529
|
+
images = images.permute(0, 2, 3, 1).cpu().float().numpy() # float() as bfloat16-> numpy doesn't work
|
530
530
|
images = self.numpy_to_pil(images)
|
531
531
|
else:
|
532
532
|
images = latents
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -125,7 +125,7 @@ class StableCascadeCombinedPipeline(DiffusionPipeline):
|
|
125
125
|
def enable_xformers_memory_efficient_attention(self, attention_op: Optional[Callable] = None):
|
126
126
|
self.decoder_pipe.enable_xformers_memory_efficient_attention(attention_op)
|
127
127
|
|
128
|
-
def enable_model_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[torch.device, str] =
|
128
|
+
def enable_model_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[torch.device, str] = None):
|
129
129
|
r"""
|
130
130
|
Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared
|
131
131
|
to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward`
|
@@ -135,7 +135,7 @@ class StableCascadeCombinedPipeline(DiffusionPipeline):
|
|
135
135
|
self.prior_pipe.enable_model_cpu_offload(gpu_id=gpu_id, device=device)
|
136
136
|
self.decoder_pipe.enable_model_cpu_offload(gpu_id=gpu_id, device=device)
|
137
137
|
|
138
|
-
def enable_sequential_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[torch.device, str] =
|
138
|
+
def enable_sequential_cpu_offload(self, gpu_id: Optional[int] = None, device: Union[torch.device, str] = None):
|
139
139
|
r"""
|
140
140
|
Offloads all models (`unet`, `text_encoder`, `vae`, and `safety checker` state dicts) to CPU using 🤗
|
141
141
|
Accelerate, significantly reducing memory usage. Models are moved to a `torch.device('meta')` and loaded on a
|
@@ -212,11 +212,11 @@ class StableCascadeCombinedPipeline(DiffusionPipeline):
|
|
212
212
|
width (`int`, *optional*, defaults to 512):
|
213
213
|
The width in pixels of the generated image.
|
214
214
|
prior_guidance_scale (`float`, *optional*, defaults to 4.0):
|
215
|
-
Guidance scale as defined in [Classifier-Free Diffusion
|
216
|
-
`prior_guidance_scale` is defined as `w` of
|
217
|
-
Paper](https://
|
218
|
-
`prior_guidance_scale > 1`. Higher guidance scale encourages to generate images that are
|
219
|
-
to the text `prompt`, usually at the expense of lower image quality.
|
215
|
+
Guidance scale as defined in [Classifier-Free Diffusion
|
216
|
+
Guidance](https://huggingface.co/papers/2207.12598). `prior_guidance_scale` is defined as `w` of
|
217
|
+
equation 2. of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by
|
218
|
+
setting `prior_guidance_scale > 1`. Higher guidance scale encourages to generate images that are
|
219
|
+
closely linked to the text `prompt`, usually at the expense of lower image quality.
|
220
220
|
prior_num_inference_steps (`Union[int, Dict[float, int]]`, *optional*, defaults to 60):
|
221
221
|
The number of prior denoising steps. More denoising steps usually lead to a higher quality image at the
|
222
222
|
expense of slower inference. For more specific timestep spacing, you can pass customized
|
@@ -226,11 +226,11 @@ class StableCascadeCombinedPipeline(DiffusionPipeline):
|
|
226
226
|
the expense of slower inference. For more specific timestep spacing, you can pass customized
|
227
227
|
`timesteps`
|
228
228
|
decoder_guidance_scale (`float`, *optional*, defaults to 0.0):
|
229
|
-
Guidance scale as defined in [Classifier-Free Diffusion
|
230
|
-
`guidance_scale` is defined as `w` of equation 2.
|
231
|
-
Paper](https://
|
232
|
-
1`. Higher guidance scale encourages to generate images that are closely linked to
|
233
|
-
usually at the expense of lower image quality.
|
229
|
+
Guidance scale as defined in [Classifier-Free Diffusion
|
230
|
+
Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
|
231
|
+
of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
|
232
|
+
`guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
|
233
|
+
the text `prompt`, usually at the expense of lower image quality.
|
234
234
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
235
235
|
One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
|
236
236
|
to make generation deterministic.
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -409,11 +409,11 @@ class StableCascadePriorPipeline(DiffusionPipeline):
|
|
409
409
|
The number of denoising steps. More denoising steps usually lead to a higher quality image at the
|
410
410
|
expense of slower inference.
|
411
411
|
guidance_scale (`float`, *optional*, defaults to 8.0):
|
412
|
-
Guidance scale as defined in [Classifier-Free Diffusion
|
413
|
-
`decoder_guidance_scale` is defined as `w` of
|
414
|
-
Paper](https://
|
415
|
-
`decoder_guidance_scale > 1`. Higher guidance scale encourages to generate images that are
|
416
|
-
linked to the text `prompt`, usually at the expense of lower image quality.
|
412
|
+
Guidance scale as defined in [Classifier-Free Diffusion
|
413
|
+
Guidance](https://huggingface.co/papers/2207.12598). `decoder_guidance_scale` is defined as `w` of
|
414
|
+
equation 2. of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by
|
415
|
+
setting `decoder_guidance_scale > 1`. Higher guidance scale encourages to generate images that are
|
416
|
+
closely linked to the text `prompt`, usually at the expense of lower image quality.
|
417
417
|
negative_prompt (`str` or `List[str]`, *optional*):
|
418
418
|
The prompt or prompts not to guide the image generation. Ignored when not using guidance (i.e., ignored
|
419
419
|
if `decoder_guidance_scale` is less than `1`).
|
@@ -626,11 +626,11 @@ class StableCascadePriorPipeline(DiffusionPipeline):
|
|
626
626
|
self.maybe_free_model_hooks()
|
627
627
|
|
628
628
|
if output_type == "np":
|
629
|
-
latents = latents.cpu().float().numpy() # float() as bfloat16-> numpy
|
630
|
-
prompt_embeds = prompt_embeds.cpu().float().numpy() # float() as bfloat16-> numpy
|
629
|
+
latents = latents.cpu().float().numpy() # float() as bfloat16-> numpy doesn't work
|
630
|
+
prompt_embeds = prompt_embeds.cpu().float().numpy() # float() as bfloat16-> numpy doesn't work
|
631
631
|
negative_prompt_embeds = (
|
632
632
|
negative_prompt_embeds.cpu().float().numpy() if negative_prompt_embeds is not None else None
|
633
|
-
) # float() as bfloat16-> numpy
|
633
|
+
) # float() as bfloat16-> numpy doesn't work
|
634
634
|
|
635
635
|
if not return_dict:
|
636
636
|
return (
|
@@ -30,18 +30,11 @@ except OptionalDependencyNotAvailable:
|
|
30
30
|
_dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
|
31
31
|
else:
|
32
32
|
_import_structure["clip_image_project_model"] = ["CLIPImageProjection"]
|
33
|
-
_import_structure["pipeline_cycle_diffusion"] = ["CycleDiffusionPipeline"]
|
34
33
|
_import_structure["pipeline_stable_diffusion"] = ["StableDiffusionPipeline"]
|
35
|
-
_import_structure["pipeline_stable_diffusion_attend_and_excite"] = ["StableDiffusionAttendAndExcitePipeline"]
|
36
|
-
_import_structure["pipeline_stable_diffusion_gligen"] = ["StableDiffusionGLIGENPipeline"]
|
37
|
-
_import_structure["pipeline_stable_diffusion_gligen_text_image"] = ["StableDiffusionGLIGENTextImagePipeline"]
|
38
34
|
_import_structure["pipeline_stable_diffusion_img2img"] = ["StableDiffusionImg2ImgPipeline"]
|
39
35
|
_import_structure["pipeline_stable_diffusion_inpaint"] = ["StableDiffusionInpaintPipeline"]
|
40
|
-
_import_structure["pipeline_stable_diffusion_inpaint_legacy"] = ["StableDiffusionInpaintPipelineLegacy"]
|
41
36
|
_import_structure["pipeline_stable_diffusion_instruct_pix2pix"] = ["StableDiffusionInstructPix2PixPipeline"]
|
42
37
|
_import_structure["pipeline_stable_diffusion_latent_upscale"] = ["StableDiffusionLatentUpscalePipeline"]
|
43
|
-
_import_structure["pipeline_stable_diffusion_model_editing"] = ["StableDiffusionModelEditingPipeline"]
|
44
|
-
_import_structure["pipeline_stable_diffusion_paradigms"] = ["StableDiffusionParadigmsPipeline"]
|
45
38
|
_import_structure["pipeline_stable_diffusion_upscale"] = ["StableDiffusionUpscalePipeline"]
|
46
39
|
_import_structure["pipeline_stable_unclip"] = ["StableUnCLIPPipeline"]
|
47
40
|
_import_structure["pipeline_stable_unclip_img2img"] = ["StableUnCLIPImg2ImgPipeline"]
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 The GLIGEN Authors and HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -53,6 +53,7 @@ from ...schedulers import (
|
|
53
53
|
)
|
54
54
|
from ...utils import is_accelerate_available, logging
|
55
55
|
from ...utils.constants import DIFFUSERS_REQUEST_TIMEOUT
|
56
|
+
from ...utils.torch_utils import get_device
|
56
57
|
from ..latent_diffusion.pipeline_latent_diffusion import LDMBertConfig, LDMBertModel
|
57
58
|
from ..paint_by_example import PaintByExampleImageEncoder
|
58
59
|
from ..pipeline_utils import DiffusionPipeline
|
@@ -350,8 +351,14 @@ def create_vae_diffusers_config(original_config, image_size: int):
|
|
350
351
|
_ = original_config["model"]["params"]["first_stage_config"]["params"]["embed_dim"]
|
351
352
|
|
352
353
|
block_out_channels = [vae_params["ch"] * mult for mult in vae_params["ch_mult"]]
|
353
|
-
down_block_types = [
|
354
|
-
|
354
|
+
down_block_types = [
|
355
|
+
"DownEncoderBlock2D" if image_size // 2**i not in vae_params["attn_resolutions"] else "AttnDownEncoderBlock2D"
|
356
|
+
for i, _ in enumerate(block_out_channels)
|
357
|
+
]
|
358
|
+
up_block_types = [
|
359
|
+
"UpDecoderBlock2D" if image_size // 2**i not in vae_params["attn_resolutions"] else "AttnUpDecoderBlock2D"
|
360
|
+
for i, _ in enumerate(block_out_channels)
|
361
|
+
][::-1]
|
355
362
|
|
356
363
|
config = {
|
357
364
|
"sample_size": image_size,
|
@@ -1266,7 +1273,7 @@ def download_from_original_stable_diffusion_ckpt(
|
|
1266
1273
|
checkpoint = safe_load(checkpoint_path_or_dict, device="cpu")
|
1267
1274
|
else:
|
1268
1275
|
if device is None:
|
1269
|
-
device =
|
1276
|
+
device = get_device()
|
1270
1277
|
checkpoint = torch.load(checkpoint_path_or_dict, map_location=device)
|
1271
1278
|
else:
|
1272
1279
|
checkpoint = torch.load(checkpoint_path_or_dict, map_location=device)
|
@@ -1836,7 +1843,7 @@ def download_controlnet_from_original_ckpt(
|
|
1836
1843
|
checkpoint[key] = f.get_tensor(key)
|
1837
1844
|
else:
|
1838
1845
|
if device is None:
|
1839
|
-
device =
|
1846
|
+
device = get_device()
|
1840
1847
|
checkpoint = torch.load(checkpoint_path, map_location=device)
|
1841
1848
|
else:
|
1842
1849
|
checkpoint = torch.load(checkpoint_path, map_location=device)
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -294,11 +294,11 @@ class OnnxStableDiffusionPipeline(DiffusionPipeline):
|
|
294
294
|
The number of denoising steps. More denoising steps usually lead to a higher quality image at the
|
295
295
|
expense of slower inference.
|
296
296
|
guidance_scale (`float`, *optional*, defaults to 7.5):
|
297
|
-
Guidance scale as defined in [Classifier-Free Diffusion
|
298
|
-
`guidance_scale` is defined as `w` of equation 2.
|
299
|
-
Paper](https://
|
300
|
-
1`. Higher guidance scale encourages to generate images that are closely linked to
|
301
|
-
usually at the expense of lower image quality.
|
297
|
+
Guidance scale as defined in [Classifier-Free Diffusion
|
298
|
+
Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
|
299
|
+
of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
|
300
|
+
`guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
|
301
|
+
the text `prompt`, usually at the expense of lower image quality.
|
302
302
|
negative_prompt (`str` or `List[str]`, *optional*):
|
303
303
|
The prompt or prompts not to guide the image generation. If not defined, one has to pass
|
304
304
|
`negative_prompt_embeds`. instead. Ignored when not using guidance (i.e., ignored if `guidance_scale`
|
@@ -306,8 +306,8 @@ class OnnxStableDiffusionPipeline(DiffusionPipeline):
|
|
306
306
|
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
307
307
|
The number of images to generate per prompt.
|
308
308
|
eta (`float`, *optional*, defaults to 0.0):
|
309
|
-
Corresponds to parameter eta (η) in the DDIM paper: https://
|
310
|
-
[`schedulers.DDIMScheduler`], will be ignored for others.
|
309
|
+
Corresponds to parameter eta (η) in the DDIM paper: https://huggingface.co/papers/2010.02502. Only
|
310
|
+
applies to [`schedulers.DDIMScheduler`], will be ignored for others.
|
311
311
|
generator (`np.random.RandomState`, *optional*):
|
312
312
|
One or a list of [numpy generator(s)](TODO) to make generation deterministic.
|
313
313
|
latents (`np.ndarray`, *optional*):
|
@@ -359,7 +359,7 @@ class OnnxStableDiffusionPipeline(DiffusionPipeline):
|
|
359
359
|
generator = np.random
|
360
360
|
|
361
361
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
362
|
-
# of the Imagen paper: https://
|
362
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
363
363
|
# corresponds to doing no classifier free guidance.
|
364
364
|
do_classifier_free_guidance = guidance_scale > 1.0
|
365
365
|
|
@@ -383,11 +383,12 @@ class OnnxStableDiffusionPipeline(DiffusionPipeline):
|
|
383
383
|
# set timesteps
|
384
384
|
self.scheduler.set_timesteps(num_inference_steps)
|
385
385
|
|
386
|
-
|
386
|
+
# scale the initial noise by the standard deviation required by the scheduler
|
387
|
+
latents = latents * self.scheduler.init_noise_sigma
|
387
388
|
|
388
389
|
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
389
390
|
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
390
|
-
# eta corresponds to η in DDIM paper: https://
|
391
|
+
# eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
|
391
392
|
# and should be between [0, 1]
|
392
393
|
accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
|
393
394
|
extra_step_kwargs = {}
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -348,19 +348,19 @@ class OnnxStableDiffusionImg2ImgPipeline(DiffusionPipeline):
|
|
348
348
|
The number of denoising steps. More denoising steps usually lead to a higher quality image at the
|
349
349
|
expense of slower inference. This parameter will be modulated by `strength`.
|
350
350
|
guidance_scale (`float`, *optional*, defaults to 7.5):
|
351
|
-
Guidance scale as defined in [Classifier-Free Diffusion
|
352
|
-
`guidance_scale` is defined as `w` of equation 2.
|
353
|
-
Paper](https://
|
354
|
-
1`. Higher guidance scale encourages to generate images that are closely linked to
|
355
|
-
usually at the expense of lower image quality.
|
351
|
+
Guidance scale as defined in [Classifier-Free Diffusion
|
352
|
+
Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
|
353
|
+
of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
|
354
|
+
`guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
|
355
|
+
the text `prompt`, usually at the expense of lower image quality.
|
356
356
|
negative_prompt (`str` or `List[str]`, *optional*):
|
357
357
|
The prompt or prompts not to guide the image generation. Ignored when not using guidance (i.e., ignored
|
358
358
|
if `guidance_scale` is less than `1`).
|
359
359
|
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
360
360
|
The number of images to generate per prompt.
|
361
361
|
eta (`float`, *optional*, defaults to 0.0):
|
362
|
-
Corresponds to parameter eta (η) in the DDIM paper: https://
|
363
|
-
[`schedulers.DDIMScheduler`], will be ignored for others.
|
362
|
+
Corresponds to parameter eta (η) in the DDIM paper: https://huggingface.co/papers/2010.02502. Only
|
363
|
+
applies to [`schedulers.DDIMScheduler`], will be ignored for others.
|
364
364
|
generator (`np.random.RandomState`, *optional*):
|
365
365
|
A np.random.RandomState to make generation deterministic.
|
366
366
|
prompt_embeds (`np.ndarray`, *optional*):
|
@@ -414,7 +414,7 @@ class OnnxStableDiffusionImg2ImgPipeline(DiffusionPipeline):
|
|
414
414
|
image = preprocess(image).cpu().numpy()
|
415
415
|
|
416
416
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
417
|
-
# of the Imagen paper: https://
|
417
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
418
418
|
# corresponds to doing no classifier free guidance.
|
419
419
|
do_classifier_free_guidance = guidance_scale > 1.0
|
420
420
|
|
@@ -470,7 +470,7 @@ class OnnxStableDiffusionImg2ImgPipeline(DiffusionPipeline):
|
|
470
470
|
|
471
471
|
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
472
472
|
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
473
|
-
# eta corresponds to η in DDIM paper: https://
|
473
|
+
# eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
|
474
474
|
# and should be between [0, 1]
|
475
475
|
accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
|
476
476
|
extra_step_kwargs = {}
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -360,19 +360,19 @@ class OnnxStableDiffusionInpaintPipeline(DiffusionPipeline):
|
|
360
360
|
The number of denoising steps. More denoising steps usually lead to a higher quality image at the
|
361
361
|
expense of slower inference.
|
362
362
|
guidance_scale (`float`, *optional*, defaults to 7.5):
|
363
|
-
Guidance scale as defined in [Classifier-Free Diffusion
|
364
|
-
`guidance_scale` is defined as `w` of equation 2.
|
365
|
-
Paper](https://
|
366
|
-
1`. Higher guidance scale encourages to generate images that are closely linked to
|
367
|
-
usually at the expense of lower image quality.
|
363
|
+
Guidance scale as defined in [Classifier-Free Diffusion
|
364
|
+
Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
|
365
|
+
of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
|
366
|
+
`guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
|
367
|
+
the text `prompt`, usually at the expense of lower image quality.
|
368
368
|
negative_prompt (`str` or `List[str]`, *optional*):
|
369
369
|
The prompt or prompts not to guide the image generation. Ignored when not using guidance (i.e., ignored
|
370
370
|
if `guidance_scale` is less than `1`).
|
371
371
|
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
372
372
|
The number of images to generate per prompt.
|
373
373
|
eta (`float`, *optional*, defaults to 0.0):
|
374
|
-
Corresponds to parameter eta (η) in the DDIM paper: https://
|
375
|
-
[`schedulers.DDIMScheduler`], will be ignored for others.
|
374
|
+
Corresponds to parameter eta (η) in the DDIM paper: https://huggingface.co/papers/2010.02502. Only
|
375
|
+
applies to [`schedulers.DDIMScheduler`], will be ignored for others.
|
376
376
|
generator (`np.random.RandomState`, *optional*):
|
377
377
|
A np.random.RandomState to make generation deterministic.
|
378
378
|
latents (`np.ndarray`, *optional*):
|
@@ -427,7 +427,7 @@ class OnnxStableDiffusionInpaintPipeline(DiffusionPipeline):
|
|
427
427
|
self.scheduler.set_timesteps(num_inference_steps)
|
428
428
|
|
429
429
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
430
|
-
# of the Imagen paper: https://
|
430
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
431
431
|
# corresponds to doing no classifier free guidance.
|
432
432
|
do_classifier_free_guidance = guidance_scale > 1.0
|
433
433
|
|
@@ -483,11 +483,11 @@ class OnnxStableDiffusionInpaintPipeline(DiffusionPipeline):
|
|
483
483
|
self.scheduler.set_timesteps(num_inference_steps)
|
484
484
|
|
485
485
|
# scale the initial noise by the standard deviation required by the scheduler
|
486
|
-
latents = latents *
|
486
|
+
latents = latents * self.scheduler.init_noise_sigma
|
487
487
|
|
488
488
|
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
489
489
|
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
490
|
-
# eta corresponds to η in DDIM paper: https://
|
490
|
+
# eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
|
491
491
|
# and should be between [0, 1]
|
492
492
|
accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
|
493
493
|
extra_step_kwargs = {}
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -378,11 +378,11 @@ class OnnxStableDiffusionUpscalePipeline(DiffusionPipeline):
|
|
378
378
|
The number of denoising steps. More denoising steps usually lead to a higher quality image at the
|
379
379
|
expense of slower inference. This parameter will be modulated by `strength`.
|
380
380
|
guidance_scale (`float`, *optional*, defaults to 7.5):
|
381
|
-
Guidance scale as defined in [Classifier-Free Diffusion
|
382
|
-
`guidance_scale` is defined as `w` of equation 2.
|
383
|
-
Paper](https://
|
384
|
-
1`. Higher guidance scale encourages to generate images that are closely linked to
|
385
|
-
usually at the expense of lower image quality.
|
381
|
+
Guidance scale as defined in [Classifier-Free Diffusion
|
382
|
+
Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
|
383
|
+
of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
|
384
|
+
`guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
|
385
|
+
the text `prompt`, usually at the expense of lower image quality.
|
386
386
|
noise_level (`float`, defaults to 0.2):
|
387
387
|
Deteremines the amount of noise to add to the initial image before performing upscaling.
|
388
388
|
negative_prompt (`str` or `List[str]`, *optional*):
|
@@ -391,8 +391,8 @@ class OnnxStableDiffusionUpscalePipeline(DiffusionPipeline):
|
|
391
391
|
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
392
392
|
The number of images to generate per prompt.
|
393
393
|
eta (`float`, *optional*, defaults to 0.0):
|
394
|
-
Corresponds to parameter eta (η) in the DDIM paper: https://
|
395
|
-
[`schedulers.DDIMScheduler`], will be ignored for others.
|
394
|
+
Corresponds to parameter eta (η) in the DDIM paper: https://huggingface.co/papers/2010.02502. Only
|
395
|
+
applies to [`schedulers.DDIMScheduler`], will be ignored for others.
|
396
396
|
generator (`np.random.RandomState`, *optional*):
|
397
397
|
A np.random.RandomState to make generation deterministic.
|
398
398
|
latents (`torch.Tensor`, *optional*):
|
@@ -450,7 +450,7 @@ class OnnxStableDiffusionUpscalePipeline(DiffusionPipeline):
|
|
450
450
|
generator = np.random
|
451
451
|
|
452
452
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
453
|
-
# of the Imagen paper: https://
|
453
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
454
454
|
# corresponds to doing no classifier free guidance.
|
455
455
|
do_classifier_free_guidance = guidance_scale > 1.0
|
456
456
|
|
@@ -481,7 +481,7 @@ class OnnxStableDiffusionUpscalePipeline(DiffusionPipeline):
|
|
481
481
|
timesteps = self.scheduler.timesteps
|
482
482
|
|
483
483
|
# Scale the initial noise by the standard deviation required by the scheduler
|
484
|
-
latents = latents *
|
484
|
+
latents = latents * self.scheduler.init_noise_sigma
|
485
485
|
|
486
486
|
# 5. Add noise to image
|
487
487
|
noise_level = np.array([noise_level]).astype(np.int64)
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -70,7 +70,7 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
|
|
70
70
|
r"""
|
71
71
|
Rescales `noise_cfg` tensor based on `guidance_rescale` to improve image quality and fix overexposure. Based on
|
72
72
|
Section 3.4 from [Common Diffusion Noise Schedules and Sample Steps are
|
73
|
-
Flawed](https://
|
73
|
+
Flawed](https://huggingface.co/papers/2305.08891).
|
74
74
|
|
75
75
|
Args:
|
76
76
|
noise_cfg (`torch.Tensor`):
|
@@ -608,7 +608,7 @@ class StableDiffusionPipeline(
|
|
608
608
|
def prepare_extra_step_kwargs(self, generator, eta):
|
609
609
|
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
610
610
|
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
611
|
-
# eta corresponds to η in DDIM paper: https://
|
611
|
+
# eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
|
612
612
|
# and should be between [0, 1]
|
613
613
|
|
614
614
|
accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
|
@@ -757,7 +757,7 @@ class StableDiffusionPipeline(
|
|
757
757
|
return self._clip_skip
|
758
758
|
|
759
759
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
760
|
-
# of the Imagen paper: https://
|
760
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
761
761
|
# corresponds to doing no classifier free guidance.
|
762
762
|
@property
|
763
763
|
def do_classifier_free_guidance(self):
|
@@ -836,8 +836,8 @@ class StableDiffusionPipeline(
|
|
836
836
|
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
837
837
|
The number of images to generate per prompt.
|
838
838
|
eta (`float`, *optional*, defaults to 0.0):
|
839
|
-
Corresponds to parameter eta (η) from the [DDIM](https://
|
840
|
-
to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
|
839
|
+
Corresponds to parameter eta (η) from the [DDIM](https://huggingface.co/papers/2010.02502) paper. Only
|
840
|
+
applies to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
|
841
841
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
842
842
|
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
|
843
843
|
generation deterministic.
|
@@ -867,7 +867,7 @@ class StableDiffusionPipeline(
|
|
867
867
|
[`self.processor`](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
868
868
|
guidance_rescale (`float`, *optional*, defaults to 0.0):
|
869
869
|
Guidance rescale factor from [Common Diffusion Noise Schedules and Sample Steps are
|
870
|
-
Flawed](https://
|
870
|
+
Flawed](https://huggingface.co/papers/2305.08891). Guidance rescale factor should fix overexposure when
|
871
871
|
using zero terminal SNR.
|
872
872
|
clip_skip (`int`, *optional*):
|
873
873
|
Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that
|
@@ -1034,7 +1034,8 @@ class StableDiffusionPipeline(
|
|
1034
1034
|
|
1035
1035
|
# expand the latents if we are doing classifier free guidance
|
1036
1036
|
latent_model_input = torch.cat([latents] * 2) if self.do_classifier_free_guidance else latents
|
1037
|
-
|
1037
|
+
if hasattr(self.scheduler, "scale_model_input"):
|
1038
|
+
latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
|
1038
1039
|
|
1039
1040
|
# predict the noise residual
|
1040
1041
|
noise_pred = self.unet(
|
@@ -1053,7 +1054,7 @@ class StableDiffusionPipeline(
|
|
1053
1054
|
noise_pred = noise_pred_uncond + self.guidance_scale * (noise_pred_text - noise_pred_uncond)
|
1054
1055
|
|
1055
1056
|
if self.do_classifier_free_guidance and self.guidance_rescale > 0.0:
|
1056
|
-
# Based on 3.4. in https://
|
1057
|
+
# Based on 3.4. in https://huggingface.co/papers/2305.08891
|
1057
1058
|
noise_pred = rescale_noise_cfg(noise_pred, noise_pred_text, guidance_rescale=self.guidance_rescale)
|
1058
1059
|
|
1059
1060
|
# compute the previous noisy sample x_t -> x_t-1
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -414,7 +414,7 @@ class StableDiffusionDepth2ImgPipeline(DiffusionPipeline, TextualInversionLoader
|
|
414
414
|
def prepare_extra_step_kwargs(self, generator, eta):
|
415
415
|
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
416
416
|
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
417
|
-
# eta corresponds to η in DDIM paper: https://
|
417
|
+
# eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
|
418
418
|
# and should be between [0, 1]
|
419
419
|
|
420
420
|
accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
|
@@ -617,7 +617,7 @@ class StableDiffusionDepth2ImgPipeline(DiffusionPipeline, TextualInversionLoader
|
|
617
617
|
return self._clip_skip
|
618
618
|
|
619
619
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
620
|
-
# of the Imagen paper: https://
|
620
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
621
621
|
# corresponds to doing no classifier free guidance.
|
622
622
|
@property
|
623
623
|
def do_classifier_free_guidance(self):
|
@@ -684,8 +684,8 @@ class StableDiffusionDepth2ImgPipeline(DiffusionPipeline, TextualInversionLoader
|
|
684
684
|
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
685
685
|
The number of images to generate per prompt.
|
686
686
|
eta (`float`, *optional*, defaults to 0.0):
|
687
|
-
Corresponds to parameter eta (η) from the [DDIM](https://
|
688
|
-
to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
|
687
|
+
Corresponds to parameter eta (η) from the [DDIM](https://huggingface.co/papers/2010.02502) paper. Only
|
688
|
+
applies to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
|
689
689
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
690
690
|
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
|
691
691
|
generation deterministic.
|