diffusers 0.33.1__py3-none-any.whl → 0.34.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diffusers/__init__.py +48 -1
- diffusers/commands/__init__.py +1 -1
- diffusers/commands/diffusers_cli.py +1 -1
- diffusers/commands/env.py +1 -1
- diffusers/commands/fp16_safetensors.py +1 -1
- diffusers/dependency_versions_check.py +1 -1
- diffusers/dependency_versions_table.py +1 -1
- diffusers/experimental/rl/value_guided_sampling.py +1 -1
- diffusers/hooks/faster_cache.py +2 -2
- diffusers/hooks/group_offloading.py +128 -29
- diffusers/hooks/hooks.py +2 -2
- diffusers/hooks/layerwise_casting.py +3 -3
- diffusers/hooks/pyramid_attention_broadcast.py +1 -1
- diffusers/image_processor.py +7 -2
- diffusers/loaders/__init__.py +4 -0
- diffusers/loaders/ip_adapter.py +5 -14
- diffusers/loaders/lora_base.py +212 -111
- diffusers/loaders/lora_conversion_utils.py +275 -34
- diffusers/loaders/lora_pipeline.py +1554 -819
- diffusers/loaders/peft.py +52 -109
- diffusers/loaders/single_file.py +2 -2
- diffusers/loaders/single_file_model.py +20 -4
- diffusers/loaders/single_file_utils.py +225 -5
- diffusers/loaders/textual_inversion.py +3 -2
- diffusers/loaders/transformer_flux.py +1 -1
- diffusers/loaders/transformer_sd3.py +2 -2
- diffusers/loaders/unet.py +2 -16
- diffusers/loaders/unet_loader_utils.py +1 -1
- diffusers/loaders/utils.py +1 -1
- diffusers/models/__init__.py +15 -1
- diffusers/models/activations.py +5 -5
- diffusers/models/adapter.py +2 -3
- diffusers/models/attention.py +4 -4
- diffusers/models/attention_flax.py +10 -10
- diffusers/models/attention_processor.py +14 -10
- diffusers/models/auto_model.py +47 -10
- diffusers/models/autoencoders/__init__.py +1 -0
- diffusers/models/autoencoders/autoencoder_asym_kl.py +4 -4
- diffusers/models/autoencoders/autoencoder_dc.py +3 -3
- diffusers/models/autoencoders/autoencoder_kl.py +4 -4
- diffusers/models/autoencoders/autoencoder_kl_allegro.py +4 -4
- diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +6 -6
- diffusers/models/autoencoders/autoencoder_kl_cosmos.py +1108 -0
- diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py +2 -2
- diffusers/models/autoencoders/autoencoder_kl_ltx.py +3 -3
- diffusers/models/autoencoders/autoencoder_kl_magvit.py +4 -4
- diffusers/models/autoencoders/autoencoder_kl_mochi.py +3 -3
- diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +4 -4
- diffusers/models/autoencoders/autoencoder_kl_wan.py +256 -22
- diffusers/models/autoencoders/autoencoder_oobleck.py +1 -1
- diffusers/models/autoencoders/autoencoder_tiny.py +3 -3
- diffusers/models/autoencoders/consistency_decoder_vae.py +1 -1
- diffusers/models/autoencoders/vae.py +13 -2
- diffusers/models/autoencoders/vq_model.py +2 -2
- diffusers/models/cache_utils.py +1 -1
- diffusers/models/controlnet.py +1 -1
- diffusers/models/controlnet_flux.py +1 -1
- diffusers/models/controlnet_sd3.py +1 -1
- diffusers/models/controlnet_sparsectrl.py +1 -1
- diffusers/models/controlnets/__init__.py +1 -0
- diffusers/models/controlnets/controlnet.py +3 -3
- diffusers/models/controlnets/controlnet_flax.py +1 -1
- diffusers/models/controlnets/controlnet_flux.py +16 -15
- diffusers/models/controlnets/controlnet_hunyuan.py +2 -2
- diffusers/models/controlnets/controlnet_sana.py +290 -0
- diffusers/models/controlnets/controlnet_sd3.py +1 -1
- diffusers/models/controlnets/controlnet_sparsectrl.py +2 -2
- diffusers/models/controlnets/controlnet_union.py +1 -1
- diffusers/models/controlnets/controlnet_xs.py +7 -7
- diffusers/models/controlnets/multicontrolnet.py +4 -5
- diffusers/models/controlnets/multicontrolnet_union.py +5 -6
- diffusers/models/downsampling.py +2 -2
- diffusers/models/embeddings.py +10 -12
- diffusers/models/embeddings_flax.py +2 -2
- diffusers/models/lora.py +3 -3
- diffusers/models/modeling_utils.py +44 -14
- diffusers/models/normalization.py +4 -4
- diffusers/models/resnet.py +2 -2
- diffusers/models/resnet_flax.py +1 -1
- diffusers/models/transformers/__init__.py +5 -0
- diffusers/models/transformers/auraflow_transformer_2d.py +70 -24
- diffusers/models/transformers/cogvideox_transformer_3d.py +1 -1
- diffusers/models/transformers/consisid_transformer_3d.py +1 -1
- diffusers/models/transformers/dit_transformer_2d.py +2 -2
- diffusers/models/transformers/dual_transformer_2d.py +1 -1
- diffusers/models/transformers/hunyuan_transformer_2d.py +2 -2
- diffusers/models/transformers/latte_transformer_3d.py +4 -5
- diffusers/models/transformers/lumina_nextdit2d.py +2 -2
- diffusers/models/transformers/pixart_transformer_2d.py +3 -3
- diffusers/models/transformers/prior_transformer.py +1 -1
- diffusers/models/transformers/sana_transformer.py +8 -3
- diffusers/models/transformers/stable_audio_transformer.py +5 -9
- diffusers/models/transformers/t5_film_transformer.py +3 -3
- diffusers/models/transformers/transformer_2d.py +1 -1
- diffusers/models/transformers/transformer_allegro.py +1 -1
- diffusers/models/transformers/transformer_chroma.py +742 -0
- diffusers/models/transformers/transformer_cogview3plus.py +5 -10
- diffusers/models/transformers/transformer_cogview4.py +317 -25
- diffusers/models/transformers/transformer_cosmos.py +579 -0
- diffusers/models/transformers/transformer_flux.py +9 -11
- diffusers/models/transformers/transformer_hidream_image.py +942 -0
- diffusers/models/transformers/transformer_hunyuan_video.py +6 -8
- diffusers/models/transformers/transformer_hunyuan_video_framepack.py +416 -0
- diffusers/models/transformers/transformer_ltx.py +2 -2
- diffusers/models/transformers/transformer_lumina2.py +1 -1
- diffusers/models/transformers/transformer_mochi.py +1 -1
- diffusers/models/transformers/transformer_omnigen.py +2 -2
- diffusers/models/transformers/transformer_sd3.py +7 -7
- diffusers/models/transformers/transformer_temporal.py +1 -1
- diffusers/models/transformers/transformer_wan.py +24 -8
- diffusers/models/transformers/transformer_wan_vace.py +393 -0
- diffusers/models/unets/unet_1d.py +1 -1
- diffusers/models/unets/unet_1d_blocks.py +1 -1
- diffusers/models/unets/unet_2d.py +1 -1
- diffusers/models/unets/unet_2d_blocks.py +1 -1
- diffusers/models/unets/unet_2d_blocks_flax.py +8 -7
- diffusers/models/unets/unet_2d_condition.py +2 -2
- diffusers/models/unets/unet_2d_condition_flax.py +2 -2
- diffusers/models/unets/unet_3d_blocks.py +1 -1
- diffusers/models/unets/unet_3d_condition.py +3 -3
- diffusers/models/unets/unet_i2vgen_xl.py +3 -3
- diffusers/models/unets/unet_kandinsky3.py +1 -1
- diffusers/models/unets/unet_motion_model.py +2 -2
- diffusers/models/unets/unet_stable_cascade.py +1 -1
- diffusers/models/upsampling.py +2 -2
- diffusers/models/vae_flax.py +2 -2
- diffusers/models/vq_model.py +1 -1
- diffusers/pipelines/__init__.py +37 -6
- diffusers/pipelines/allegro/pipeline_allegro.py +11 -11
- diffusers/pipelines/amused/pipeline_amused.py +7 -6
- diffusers/pipelines/amused/pipeline_amused_img2img.py +6 -5
- diffusers/pipelines/amused/pipeline_amused_inpaint.py +6 -5
- diffusers/pipelines/animatediff/pipeline_animatediff.py +6 -6
- diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +6 -6
- diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +16 -15
- diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +6 -6
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +5 -5
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +5 -5
- diffusers/pipelines/audioldm/pipeline_audioldm.py +8 -7
- diffusers/pipelines/audioldm2/modeling_audioldm2.py +1 -1
- diffusers/pipelines/audioldm2/pipeline_audioldm2.py +23 -13
- diffusers/pipelines/aura_flow/pipeline_aura_flow.py +48 -11
- diffusers/pipelines/auto_pipeline.py +6 -7
- diffusers/pipelines/blip_diffusion/modeling_blip2.py +1 -1
- diffusers/pipelines/blip_diffusion/modeling_ctx_clip.py +2 -2
- diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +11 -10
- diffusers/pipelines/chroma/__init__.py +49 -0
- diffusers/pipelines/chroma/pipeline_chroma.py +949 -0
- diffusers/pipelines/chroma/pipeline_chroma_img2img.py +1034 -0
- diffusers/pipelines/chroma/pipeline_output.py +21 -0
- diffusers/pipelines/cogvideo/pipeline_cogvideox.py +8 -8
- diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +8 -8
- diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +8 -8
- diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +8 -8
- diffusers/pipelines/cogview3/pipeline_cogview3plus.py +9 -9
- diffusers/pipelines/cogview4/pipeline_cogview4.py +7 -7
- diffusers/pipelines/cogview4/pipeline_cogview4_control.py +7 -7
- diffusers/pipelines/consisid/consisid_utils.py +2 -2
- diffusers/pipelines/consisid/pipeline_consisid.py +8 -8
- diffusers/pipelines/consistency_models/pipeline_consistency_models.py +1 -1
- diffusers/pipelines/controlnet/pipeline_controlnet.py +7 -7
- diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +8 -8
- diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +7 -7
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +7 -7
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +14 -14
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +10 -6
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +13 -13
- diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +14 -14
- diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +5 -5
- diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +13 -13
- diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +1 -1
- diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +8 -8
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +7 -7
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +7 -7
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +12 -10
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +9 -7
- diffusers/pipelines/cosmos/__init__.py +54 -0
- diffusers/pipelines/cosmos/pipeline_cosmos2_text2image.py +673 -0
- diffusers/pipelines/cosmos/pipeline_cosmos2_video2world.py +792 -0
- diffusers/pipelines/cosmos/pipeline_cosmos_text2world.py +664 -0
- diffusers/pipelines/cosmos/pipeline_cosmos_video2world.py +826 -0
- diffusers/pipelines/cosmos/pipeline_output.py +40 -0
- diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py +5 -4
- diffusers/pipelines/ddim/pipeline_ddim.py +4 -4
- diffusers/pipelines/ddpm/pipeline_ddpm.py +1 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +10 -10
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +8 -8
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +5 -5
- diffusers/pipelines/deprecated/audio_diffusion/mel.py +1 -1
- diffusers/pipelines/deprecated/audio_diffusion/pipeline_audio_diffusion.py +3 -3
- diffusers/pipelines/deprecated/latent_diffusion_uncond/pipeline_latent_diffusion_uncond.py +1 -1
- diffusers/pipelines/deprecated/pndm/pipeline_pndm.py +2 -2
- diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +4 -3
- diffusers/pipelines/deprecated/score_sde_ve/pipeline_score_sde_ve.py +1 -1
- diffusers/pipelines/deprecated/spectrogram_diffusion/continuous_encoder.py +1 -1
- diffusers/pipelines/deprecated/spectrogram_diffusion/midi_utils.py +1 -1
- diffusers/pipelines/deprecated/spectrogram_diffusion/notes_encoder.py +1 -1
- diffusers/pipelines/deprecated/spectrogram_diffusion/pipeline_spectrogram_diffusion.py +1 -1
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +7 -7
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_onnx_stable_diffusion_inpaint_legacy.py +9 -9
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +10 -10
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +10 -8
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +5 -5
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +18 -18
- diffusers/pipelines/deprecated/stochastic_karras_ve/pipeline_stochastic_karras_ve.py +1 -1
- diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +2 -2
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +6 -6
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +5 -5
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +5 -5
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +5 -5
- diffusers/pipelines/deprecated/vq_diffusion/pipeline_vq_diffusion.py +1 -1
- diffusers/pipelines/dit/pipeline_dit.py +1 -1
- diffusers/pipelines/easyanimate/pipeline_easyanimate.py +4 -4
- diffusers/pipelines/easyanimate/pipeline_easyanimate_control.py +4 -4
- diffusers/pipelines/easyanimate/pipeline_easyanimate_inpaint.py +7 -6
- diffusers/pipelines/flux/modeling_flux.py +1 -1
- diffusers/pipelines/flux/pipeline_flux.py +10 -17
- diffusers/pipelines/flux/pipeline_flux_control.py +6 -6
- diffusers/pipelines/flux/pipeline_flux_control_img2img.py +6 -6
- diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +6 -6
- diffusers/pipelines/flux/pipeline_flux_controlnet.py +6 -6
- diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +30 -22
- diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +2 -1
- diffusers/pipelines/flux/pipeline_flux_fill.py +6 -6
- diffusers/pipelines/flux/pipeline_flux_img2img.py +39 -6
- diffusers/pipelines/flux/pipeline_flux_inpaint.py +11 -6
- diffusers/pipelines/flux/pipeline_flux_prior_redux.py +1 -1
- diffusers/pipelines/free_init_utils.py +2 -2
- diffusers/pipelines/free_noise_utils.py +3 -3
- diffusers/pipelines/hidream_image/__init__.py +47 -0
- diffusers/pipelines/hidream_image/pipeline_hidream_image.py +1026 -0
- diffusers/pipelines/hidream_image/pipeline_output.py +35 -0
- diffusers/pipelines/hunyuan_video/__init__.py +2 -0
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_skyreels_image2video.py +8 -8
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +8 -8
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_framepack.py +1114 -0
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_image2video.py +71 -15
- diffusers/pipelines/hunyuan_video/pipeline_output.py +19 -0
- diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +8 -8
- diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +10 -8
- diffusers/pipelines/kandinsky/pipeline_kandinsky.py +6 -6
- diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +34 -34
- diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +19 -26
- diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +7 -7
- diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +11 -11
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +6 -6
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +35 -35
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +6 -6
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +17 -39
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +17 -45
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +7 -7
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +10 -10
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +10 -10
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +7 -7
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +17 -38
- diffusers/pipelines/kolors/pipeline_kolors.py +10 -10
- diffusers/pipelines/kolors/pipeline_kolors_img2img.py +12 -12
- diffusers/pipelines/kolors/text_encoder.py +3 -3
- diffusers/pipelines/kolors/tokenizer.py +1 -1
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +2 -2
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +2 -2
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +1 -1
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion_superresolution.py +3 -3
- diffusers/pipelines/latte/pipeline_latte.py +12 -12
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +13 -13
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +17 -16
- diffusers/pipelines/ltx/__init__.py +4 -0
- diffusers/pipelines/ltx/modeling_latent_upsampler.py +188 -0
- diffusers/pipelines/ltx/pipeline_ltx.py +51 -6
- diffusers/pipelines/ltx/pipeline_ltx_condition.py +107 -29
- diffusers/pipelines/ltx/pipeline_ltx_image2video.py +50 -6
- diffusers/pipelines/ltx/pipeline_ltx_latent_upsample.py +277 -0
- diffusers/pipelines/lumina/pipeline_lumina.py +13 -13
- diffusers/pipelines/lumina2/pipeline_lumina2.py +10 -10
- diffusers/pipelines/marigold/marigold_image_processing.py +2 -2
- diffusers/pipelines/mochi/pipeline_mochi.py +6 -6
- diffusers/pipelines/musicldm/pipeline_musicldm.py +16 -13
- diffusers/pipelines/omnigen/pipeline_omnigen.py +13 -11
- diffusers/pipelines/omnigen/processor_omnigen.py +8 -3
- diffusers/pipelines/onnx_utils.py +15 -2
- diffusers/pipelines/pag/pag_utils.py +2 -2
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +12 -8
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +7 -7
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +10 -6
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +14 -14
- diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +8 -8
- diffusers/pipelines/pag/pipeline_pag_kolors.py +10 -10
- diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +11 -11
- diffusers/pipelines/pag/pipeline_pag_sana.py +18 -12
- diffusers/pipelines/pag/pipeline_pag_sd.py +8 -8
- diffusers/pipelines/pag/pipeline_pag_sd_3.py +7 -7
- diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py +7 -7
- diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +6 -6
- diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +5 -5
- diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py +8 -8
- diffusers/pipelines/pag/pipeline_pag_sd_xl.py +16 -15
- diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +18 -17
- diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +12 -12
- diffusers/pipelines/paint_by_example/image_encoder.py +1 -1
- diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +8 -7
- diffusers/pipelines/pia/pipeline_pia.py +8 -6
- diffusers/pipelines/pipeline_flax_utils.py +3 -4
- diffusers/pipelines/pipeline_loading_utils.py +89 -13
- diffusers/pipelines/pipeline_utils.py +105 -33
- diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +11 -11
- diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +11 -11
- diffusers/pipelines/sana/__init__.py +4 -0
- diffusers/pipelines/sana/pipeline_sana.py +23 -21
- diffusers/pipelines/sana/pipeline_sana_controlnet.py +1106 -0
- diffusers/pipelines/sana/pipeline_sana_sprint.py +23 -19
- diffusers/pipelines/sana/pipeline_sana_sprint_img2img.py +981 -0
- diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +7 -6
- diffusers/pipelines/shap_e/camera.py +1 -1
- diffusers/pipelines/shap_e/pipeline_shap_e.py +1 -1
- diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +1 -1
- diffusers/pipelines/shap_e/renderer.py +3 -3
- diffusers/pipelines/stable_audio/modeling_stable_audio.py +1 -1
- diffusers/pipelines/stable_audio/pipeline_stable_audio.py +5 -5
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +8 -8
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +13 -13
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +9 -9
- diffusers/pipelines/stable_diffusion/__init__.py +0 -7
- diffusers/pipelines/stable_diffusion/clip_image_project_model.py +1 -1
- diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +11 -4
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_img2img.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_inpaint.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +10 -10
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py +10 -10
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +10 -10
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +9 -9
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +8 -8
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +4 -4
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +7 -7
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +5 -5
- diffusers/pipelines/stable_diffusion/safety_checker.py +1 -1
- diffusers/pipelines/stable_diffusion/safety_checker_flax.py +1 -1
- diffusers/pipelines/stable_diffusion/stable_unclip_image_normalizer.py +1 -1
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +7 -7
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +7 -7
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +7 -7
- diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +12 -8
- diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +15 -9
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +11 -9
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +11 -9
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +18 -12
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +11 -8
- diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +11 -8
- diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +15 -12
- diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +8 -6
- diffusers/pipelines/stable_diffusion_safe/safety_checker.py +1 -1
- diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +15 -11
- diffusers/pipelines/stable_diffusion_xl/pipeline_flax_stable_diffusion_xl.py +1 -1
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +16 -15
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +18 -17
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +12 -12
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +16 -15
- diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +3 -3
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +12 -12
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +18 -17
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +12 -7
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +12 -7
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +15 -13
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +24 -21
- diffusers/pipelines/unclip/pipeline_unclip.py +4 -3
- diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +4 -3
- diffusers/pipelines/unclip/text_proj.py +2 -2
- diffusers/pipelines/unidiffuser/modeling_text_decoder.py +2 -2
- diffusers/pipelines/unidiffuser/modeling_uvit.py +1 -1
- diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +8 -7
- diffusers/pipelines/visualcloze/__init__.py +52 -0
- diffusers/pipelines/visualcloze/pipeline_visualcloze_combined.py +444 -0
- diffusers/pipelines/visualcloze/pipeline_visualcloze_generation.py +952 -0
- diffusers/pipelines/visualcloze/visualcloze_utils.py +251 -0
- diffusers/pipelines/wan/__init__.py +2 -0
- diffusers/pipelines/wan/pipeline_wan.py +13 -10
- diffusers/pipelines/wan/pipeline_wan_i2v.py +38 -18
- diffusers/pipelines/wan/pipeline_wan_vace.py +976 -0
- diffusers/pipelines/wan/pipeline_wan_video2video.py +14 -16
- diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py +1 -1
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_diffnext.py +1 -1
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +1 -1
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +8 -8
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_combined.py +16 -15
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +6 -6
- diffusers/quantizers/__init__.py +179 -1
- diffusers/quantizers/base.py +6 -1
- diffusers/quantizers/bitsandbytes/bnb_quantizer.py +4 -0
- diffusers/quantizers/bitsandbytes/utils.py +10 -7
- diffusers/quantizers/gguf/gguf_quantizer.py +13 -4
- diffusers/quantizers/gguf/utils.py +16 -13
- diffusers/quantizers/quantization_config.py +18 -16
- diffusers/quantizers/quanto/quanto_quantizer.py +4 -0
- diffusers/quantizers/torchao/torchao_quantizer.py +5 -1
- diffusers/schedulers/__init__.py +3 -1
- diffusers/schedulers/deprecated/scheduling_karras_ve.py +4 -3
- diffusers/schedulers/deprecated/scheduling_sde_vp.py +1 -1
- diffusers/schedulers/scheduling_consistency_models.py +1 -1
- diffusers/schedulers/scheduling_cosine_dpmsolver_multistep.py +10 -5
- diffusers/schedulers/scheduling_ddim.py +8 -8
- diffusers/schedulers/scheduling_ddim_cogvideox.py +5 -5
- diffusers/schedulers/scheduling_ddim_flax.py +6 -6
- diffusers/schedulers/scheduling_ddim_inverse.py +6 -6
- diffusers/schedulers/scheduling_ddim_parallel.py +22 -22
- diffusers/schedulers/scheduling_ddpm.py +9 -9
- diffusers/schedulers/scheduling_ddpm_flax.py +7 -7
- diffusers/schedulers/scheduling_ddpm_parallel.py +18 -18
- diffusers/schedulers/scheduling_ddpm_wuerstchen.py +2 -2
- diffusers/schedulers/scheduling_deis_multistep.py +8 -8
- diffusers/schedulers/scheduling_dpm_cogvideox.py +5 -5
- diffusers/schedulers/scheduling_dpmsolver_multistep.py +12 -12
- diffusers/schedulers/scheduling_dpmsolver_multistep_flax.py +22 -20
- diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +11 -11
- diffusers/schedulers/scheduling_dpmsolver_sde.py +2 -2
- diffusers/schedulers/scheduling_dpmsolver_singlestep.py +13 -13
- diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py +13 -8
- diffusers/schedulers/scheduling_edm_euler.py +20 -11
- diffusers/schedulers/scheduling_euler_ancestral_discrete.py +3 -3
- diffusers/schedulers/scheduling_euler_discrete.py +3 -3
- diffusers/schedulers/scheduling_euler_discrete_flax.py +3 -3
- diffusers/schedulers/scheduling_flow_match_euler_discrete.py +20 -5
- diffusers/schedulers/scheduling_flow_match_heun_discrete.py +1 -1
- diffusers/schedulers/scheduling_flow_match_lcm.py +561 -0
- diffusers/schedulers/scheduling_heun_discrete.py +2 -2
- diffusers/schedulers/scheduling_ipndm.py +2 -2
- diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +2 -2
- diffusers/schedulers/scheduling_k_dpm_2_discrete.py +2 -2
- diffusers/schedulers/scheduling_karras_ve_flax.py +5 -5
- diffusers/schedulers/scheduling_lcm.py +3 -3
- diffusers/schedulers/scheduling_lms_discrete.py +2 -2
- diffusers/schedulers/scheduling_lms_discrete_flax.py +1 -1
- diffusers/schedulers/scheduling_pndm.py +4 -4
- diffusers/schedulers/scheduling_pndm_flax.py +4 -4
- diffusers/schedulers/scheduling_repaint.py +9 -9
- diffusers/schedulers/scheduling_sasolver.py +15 -15
- diffusers/schedulers/scheduling_scm.py +1 -1
- diffusers/schedulers/scheduling_sde_ve.py +1 -1
- diffusers/schedulers/scheduling_sde_ve_flax.py +2 -2
- diffusers/schedulers/scheduling_tcd.py +3 -3
- diffusers/schedulers/scheduling_unclip.py +5 -5
- diffusers/schedulers/scheduling_unipc_multistep.py +11 -11
- diffusers/schedulers/scheduling_utils.py +1 -1
- diffusers/schedulers/scheduling_utils_flax.py +1 -1
- diffusers/schedulers/scheduling_vq_diffusion.py +1 -1
- diffusers/training_utils.py +13 -5
- diffusers/utils/__init__.py +5 -0
- diffusers/utils/accelerate_utils.py +1 -1
- diffusers/utils/doc_utils.py +1 -1
- diffusers/utils/dummy_pt_objects.py +120 -0
- diffusers/utils/dummy_torch_and_transformers_objects.py +225 -0
- diffusers/utils/dynamic_modules_utils.py +21 -3
- diffusers/utils/export_utils.py +1 -1
- diffusers/utils/import_utils.py +81 -18
- diffusers/utils/logging.py +1 -1
- diffusers/utils/outputs.py +2 -1
- diffusers/utils/peft_utils.py +91 -8
- diffusers/utils/state_dict_utils.py +20 -3
- diffusers/utils/testing_utils.py +59 -7
- diffusers/utils/torch_utils.py +25 -5
- diffusers/video_processor.py +2 -2
- {diffusers-0.33.1.dist-info → diffusers-0.34.0.dist-info}/METADATA +70 -55
- diffusers-0.34.0.dist-info/RECORD +639 -0
- {diffusers-0.33.1.dist-info → diffusers-0.34.0.dist-info}/WHEEL +1 -1
- diffusers-0.33.1.dist-info/RECORD +0 -608
- {diffusers-0.33.1.dist-info → diffusers-0.34.0.dist-info}/LICENSE +0 -0
- {diffusers-0.33.1.dist-info → diffusers-0.34.0.dist-info}/entry_points.txt +0 -0
- {diffusers-0.33.1.dist-info → diffusers-0.34.0.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 DiffEdit Authors and Pix2Pix Zero Authors and The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -40,7 +40,7 @@ from ...utils import (
|
|
40
40
|
unscale_lora_layers,
|
41
41
|
)
|
42
42
|
from ...utils.torch_utils import randn_tensor
|
43
|
-
from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
43
|
+
from ..pipeline_utils import DeprecatedPipelineMixin, DiffusionPipeline, StableDiffusionMixin
|
44
44
|
from ..stable_diffusion import StableDiffusionPipelineOutput
|
45
45
|
from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
|
46
46
|
|
@@ -242,7 +242,11 @@ def preprocess_mask(mask, batch_size: int = 1):
|
|
242
242
|
|
243
243
|
|
244
244
|
class StableDiffusionDiffEditPipeline(
|
245
|
-
|
245
|
+
DeprecatedPipelineMixin,
|
246
|
+
DiffusionPipeline,
|
247
|
+
StableDiffusionMixin,
|
248
|
+
TextualInversionLoaderMixin,
|
249
|
+
StableDiffusionLoraLoaderMixin,
|
246
250
|
):
|
247
251
|
r"""
|
248
252
|
<Tip warning={true}>
|
@@ -282,6 +286,8 @@ class StableDiffusionDiffEditPipeline(
|
|
282
286
|
A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
|
283
287
|
"""
|
284
288
|
|
289
|
+
_last_supported_version = "0.33.1"
|
290
|
+
|
285
291
|
model_cpu_offload_seq = "text_encoder->unet->vae"
|
286
292
|
_optional_components = ["safety_checker", "feature_extractor", "inverse_scheduler"]
|
287
293
|
_exclude_from_cpu_offload = ["safety_checker"]
|
@@ -618,7 +624,7 @@ class StableDiffusionDiffEditPipeline(
|
|
618
624
|
def prepare_extra_step_kwargs(self, generator, eta):
|
619
625
|
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
620
626
|
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
621
|
-
# eta corresponds to η in DDIM paper: https://
|
627
|
+
# eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
|
622
628
|
# and should be between [0, 1]
|
623
629
|
|
624
630
|
accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
|
@@ -969,7 +975,7 @@ class StableDiffusionDiffEditPipeline(
|
|
969
975
|
|
970
976
|
device = self._execution_device
|
971
977
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
972
|
-
# of the Imagen paper: https://
|
978
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
973
979
|
# corresponds to doing no classifier free guidance.
|
974
980
|
do_classifier_free_guidance = guidance_scale > 1.0
|
975
981
|
|
@@ -1176,7 +1182,7 @@ class StableDiffusionDiffEditPipeline(
|
|
1176
1182
|
|
1177
1183
|
device = self._execution_device
|
1178
1184
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
1179
|
-
# of the Imagen paper: https://
|
1185
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
1180
1186
|
# corresponds to doing no classifier free guidance.
|
1181
1187
|
do_classifier_free_guidance = guidance_scale > 1.0
|
1182
1188
|
|
@@ -1349,8 +1355,8 @@ class StableDiffusionDiffEditPipeline(
|
|
1349
1355
|
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
1350
1356
|
The number of images to generate per prompt.
|
1351
1357
|
eta (`float`, *optional*, defaults to 0.0):
|
1352
|
-
Corresponds to parameter eta (η) from the [DDIM](https://
|
1353
|
-
to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
|
1358
|
+
Corresponds to parameter eta (η) from the [DDIM](https://huggingface.co/papers/2010.02502) paper. Only
|
1359
|
+
applies to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
|
1354
1360
|
generator (`torch.Generator`, *optional*):
|
1355
1361
|
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
|
1356
1362
|
generation deterministic.
|
@@ -1422,7 +1428,7 @@ class StableDiffusionDiffEditPipeline(
|
|
1422
1428
|
|
1423
1429
|
device = self._execution_device
|
1424
1430
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
1425
|
-
# of the Imagen paper: https://
|
1431
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
1426
1432
|
# corresponds to doing no classifier free guidance.
|
1427
1433
|
do_classifier_free_guidance = guidance_scale > 1.0
|
1428
1434
|
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 The GLIGEN Authors and HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -36,7 +36,7 @@ from ...utils import (
|
|
36
36
|
unscale_lora_layers,
|
37
37
|
)
|
38
38
|
from ...utils.torch_utils import randn_tensor
|
39
|
-
from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
39
|
+
from ..pipeline_utils import DeprecatedPipelineMixin, DiffusionPipeline, StableDiffusionMixin
|
40
40
|
from ..stable_diffusion import StableDiffusionPipelineOutput
|
41
41
|
from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
|
42
42
|
|
@@ -108,7 +108,7 @@ EXAMPLE_DOC_STRING = """
|
|
108
108
|
"""
|
109
109
|
|
110
110
|
|
111
|
-
class StableDiffusionGLIGENPipeline(DiffusionPipeline, StableDiffusionMixin):
|
111
|
+
class StableDiffusionGLIGENPipeline(DeprecatedPipelineMixin, DiffusionPipeline, StableDiffusionMixin):
|
112
112
|
r"""
|
113
113
|
Pipeline for text-to-image generation using Stable Diffusion with Grounded-Language-to-Image Generation (GLIGEN).
|
114
114
|
|
@@ -135,6 +135,8 @@ class StableDiffusionGLIGENPipeline(DiffusionPipeline, StableDiffusionMixin):
|
|
135
135
|
A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
|
136
136
|
"""
|
137
137
|
|
138
|
+
_last_supported_version = "0.33.1"
|
139
|
+
|
138
140
|
_optional_components = ["safety_checker", "feature_extractor"]
|
139
141
|
model_cpu_offload_seq = "text_encoder->unet->vae"
|
140
142
|
_exclude_from_cpu_offload = ["safety_checker"]
|
@@ -415,7 +417,7 @@ class StableDiffusionGLIGENPipeline(DiffusionPipeline, StableDiffusionMixin):
|
|
415
417
|
def prepare_extra_step_kwargs(self, generator, eta):
|
416
418
|
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
417
419
|
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
418
|
-
# eta corresponds to η in DDIM paper: https://
|
420
|
+
# eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
|
419
421
|
# and should be between [0, 1]
|
420
422
|
|
421
423
|
accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
|
@@ -589,7 +591,7 @@ class StableDiffusionGLIGENPipeline(DiffusionPipeline, StableDiffusionMixin):
|
|
589
591
|
`gligen_phrases`. Otherwise, it is treated as a generation task on a blank input image.
|
590
592
|
gligen_scheduled_sampling_beta (`float`, defaults to 0.3):
|
591
593
|
Scheduled Sampling factor from [GLIGEN: Open-Set Grounded Text-to-Image
|
592
|
-
Generation](https://
|
594
|
+
Generation](https://huggingface.co/papers/2301.07093). Scheduled Sampling factor is only varied for
|
593
595
|
scheduled sampling during inference for improved quality and controllability.
|
594
596
|
negative_prompt (`str` or `List[str]`, *optional*):
|
595
597
|
The prompt or prompts to guide what to not include in image generation. If not defined, you need to
|
@@ -597,8 +599,8 @@ class StableDiffusionGLIGENPipeline(DiffusionPipeline, StableDiffusionMixin):
|
|
597
599
|
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
598
600
|
The number of images to generate per prompt.
|
599
601
|
eta (`float`, *optional*, defaults to 0.0):
|
600
|
-
Corresponds to parameter eta (η) from the [DDIM](https://
|
601
|
-
to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
|
602
|
+
Corresponds to parameter eta (η) from the [DDIM](https://huggingface.co/papers/2010.02502) paper. Only
|
603
|
+
applies to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
|
602
604
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
603
605
|
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
|
604
606
|
generation deterministic.
|
@@ -628,7 +630,7 @@ class StableDiffusionGLIGENPipeline(DiffusionPipeline, StableDiffusionMixin):
|
|
628
630
|
[`self.processor`](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
629
631
|
guidance_rescale (`float`, *optional*, defaults to 0.0):
|
630
632
|
Guidance rescale factor from [Common Diffusion Noise Schedules and Sample Steps are
|
631
|
-
Flawed](https://
|
633
|
+
Flawed](https://huggingface.co/papers/2305.08891). Guidance rescale factor should fix overexposure when
|
632
634
|
using zero terminal SNR.
|
633
635
|
clip_skip (`int`, *optional*):
|
634
636
|
Number of layers to be skipped from CLIP while computing the prompt embeddings. A value of 1 means that
|
@@ -669,7 +671,7 @@ class StableDiffusionGLIGENPipeline(DiffusionPipeline, StableDiffusionMixin):
|
|
669
671
|
|
670
672
|
device = self._execution_device
|
671
673
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
672
|
-
# of the Imagen paper: https://
|
674
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
673
675
|
# corresponds to doing no classifier free guidance.
|
674
676
|
do_classifier_free_guidance = guidance_scale > 1.0
|
675
677
|
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 The GLIGEN Authors and HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -41,7 +41,7 @@ from ...utils import (
|
|
41
41
|
unscale_lora_layers,
|
42
42
|
)
|
43
43
|
from ...utils.torch_utils import randn_tensor
|
44
|
-
from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
44
|
+
from ..pipeline_utils import DeprecatedPipelineMixin, DiffusionPipeline, StableDiffusionMixin
|
45
45
|
from ..stable_diffusion import StableDiffusionPipelineOutput
|
46
46
|
from ..stable_diffusion.clip_image_project_model import CLIPImageProjection
|
47
47
|
from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
|
@@ -160,7 +160,7 @@ EXAMPLE_DOC_STRING = """
|
|
160
160
|
"""
|
161
161
|
|
162
162
|
|
163
|
-
class StableDiffusionGLIGENTextImagePipeline(DiffusionPipeline, StableDiffusionMixin):
|
163
|
+
class StableDiffusionGLIGENTextImagePipeline(DeprecatedPipelineMixin, DiffusionPipeline, StableDiffusionMixin):
|
164
164
|
r"""
|
165
165
|
Pipeline for text-to-image generation using Stable Diffusion with Grounded-Language-to-Image Generation (GLIGEN).
|
166
166
|
|
@@ -175,7 +175,7 @@ class StableDiffusionGLIGENTextImagePipeline(DiffusionPipeline, StableDiffusionM
|
|
175
175
|
tokenizer ([`~transformers.CLIPTokenizer`]):
|
176
176
|
A `CLIPTokenizer` to tokenize text.
|
177
177
|
processor ([`~transformers.CLIPProcessor`]):
|
178
|
-
A `CLIPProcessor` to
|
178
|
+
A `CLIPProcessor` to process reference image.
|
179
179
|
image_encoder ([`~transformers.CLIPVisionModelWithProjection`]):
|
180
180
|
Frozen image-encoder ([clip-vit-large-patch14](https://huggingface.co/openai/clip-vit-large-patch14)).
|
181
181
|
image_project ([`CLIPImageProjection`]):
|
@@ -193,6 +193,8 @@ class StableDiffusionGLIGENTextImagePipeline(DiffusionPipeline, StableDiffusionM
|
|
193
193
|
A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
|
194
194
|
"""
|
195
195
|
|
196
|
+
_last_supported_version = "0.33.1"
|
197
|
+
|
196
198
|
model_cpu_offload_seq = "text_encoder->unet->vae"
|
197
199
|
_optional_components = ["safety_checker", "feature_extractor"]
|
198
200
|
_exclude_from_cpu_offload = ["safety_checker"]
|
@@ -447,7 +449,7 @@ class StableDiffusionGLIGENTextImagePipeline(DiffusionPipeline, StableDiffusionM
|
|
447
449
|
def prepare_extra_step_kwargs(self, generator, eta):
|
448
450
|
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
449
451
|
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
450
|
-
# eta corresponds to η in DDIM paper: https://
|
452
|
+
# eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
|
451
453
|
# and should be between [0, 1]
|
452
454
|
|
453
455
|
accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
|
@@ -775,7 +777,7 @@ class StableDiffusionGLIGENTextImagePipeline(DiffusionPipeline, StableDiffusionM
|
|
775
777
|
`gligen_phrases`. Otherwise, it is treated as a generation task on a blank input image.
|
776
778
|
gligen_scheduled_sampling_beta (`float`, defaults to 0.3):
|
777
779
|
Scheduled Sampling factor from [GLIGEN: Open-Set Grounded Text-to-Image
|
778
|
-
Generation](https://
|
780
|
+
Generation](https://huggingface.co/papers/2301.07093). Scheduled Sampling factor is only varied for
|
779
781
|
scheduled sampling during inference for improved quality and controllability.
|
780
782
|
negative_prompt (`str` or `List[str]`, *optional*):
|
781
783
|
The prompt or prompts to guide what to not include in image generation. If not defined, you need to
|
@@ -783,8 +785,8 @@ class StableDiffusionGLIGENTextImagePipeline(DiffusionPipeline, StableDiffusionM
|
|
783
785
|
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
784
786
|
The number of images to generate per prompt.
|
785
787
|
eta (`float`, *optional*, defaults to 0.0):
|
786
|
-
Corresponds to parameter eta (η) from the [DDIM](https://
|
787
|
-
to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
|
788
|
+
Corresponds to parameter eta (η) from the [DDIM](https://huggingface.co/papers/2010.02502) paper. Only
|
789
|
+
applies to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
|
788
790
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
789
791
|
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
|
790
792
|
generation deterministic.
|
@@ -854,7 +856,7 @@ class StableDiffusionGLIGENTextImagePipeline(DiffusionPipeline, StableDiffusionM
|
|
854
856
|
|
855
857
|
device = self._execution_device
|
856
858
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
857
|
-
# of the Imagen paper: https://
|
859
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
858
860
|
# corresponds to doing no classifier free guidance.
|
859
861
|
do_classifier_free_guidance = guidance_scale > 1.0
|
860
862
|
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -42,7 +42,7 @@ from ...utils import (
|
|
42
42
|
unscale_lora_layers,
|
43
43
|
)
|
44
44
|
from ...utils.torch_utils import randn_tensor
|
45
|
-
from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
45
|
+
from ..pipeline_utils import DeprecatedPipelineMixin, DiffusionPipeline, StableDiffusionMixin
|
46
46
|
from ..stable_diffusion import StableDiffusionPipelineOutput, StableDiffusionSafetyChecker
|
47
47
|
|
48
48
|
|
@@ -64,7 +64,11 @@ class ModelWrapper:
|
|
64
64
|
|
65
65
|
|
66
66
|
class StableDiffusionKDiffusionPipeline(
|
67
|
-
|
67
|
+
DeprecatedPipelineMixin,
|
68
|
+
DiffusionPipeline,
|
69
|
+
StableDiffusionMixin,
|
70
|
+
TextualInversionLoaderMixin,
|
71
|
+
StableDiffusionLoraLoaderMixin,
|
68
72
|
):
|
69
73
|
r"""
|
70
74
|
Pipeline for text-to-image generation using Stable Diffusion.
|
@@ -105,6 +109,8 @@ class StableDiffusionKDiffusionPipeline(
|
|
105
109
|
Model that extracts features from generated images to be used as inputs for the `safety_checker`.
|
106
110
|
"""
|
107
111
|
|
112
|
+
_last_supported_version = "0.33.1"
|
113
|
+
|
108
114
|
model_cpu_offload_seq = "text_encoder->unet->vae"
|
109
115
|
_optional_components = ["safety_checker", "feature_extractor"]
|
110
116
|
_exclude_from_cpu_offload = ["safety_checker"]
|
@@ -123,7 +129,7 @@ class StableDiffusionKDiffusionPipeline(
|
|
123
129
|
super().__init__()
|
124
130
|
|
125
131
|
logger.info(
|
126
|
-
f"{self.__class__} is an
|
132
|
+
f"{self.__class__} is an experimental pipeline and is likely to change in the future. We recommend to use"
|
127
133
|
" this pipeline for fast experimentation / iteration if needed, but advice to rely on existing pipelines"
|
128
134
|
" as defined in https://huggingface.co/docs/diffusers/api/schedulers#implemented-schedulers for"
|
129
135
|
" production settings."
|
@@ -513,11 +519,11 @@ class StableDiffusionKDiffusionPipeline(
|
|
513
519
|
The number of denoising steps. More denoising steps usually lead to a higher quality image at the
|
514
520
|
expense of slower inference.
|
515
521
|
guidance_scale (`float`, *optional*, defaults to 7.5):
|
516
|
-
Guidance scale as defined in [Classifier-Free Diffusion
|
517
|
-
`guidance_scale` is defined as `w` of equation 2.
|
518
|
-
Paper](https://
|
519
|
-
1`. Higher guidance scale encourages to generate images that are closely linked to
|
520
|
-
usually at the expense of lower image quality.
|
522
|
+
Guidance scale as defined in [Classifier-Free Diffusion
|
523
|
+
Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
|
524
|
+
of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
|
525
|
+
`guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
|
526
|
+
the text `prompt`, usually at the expense of lower image quality.
|
521
527
|
negative_prompt (`str` or `List[str]`, *optional*):
|
522
528
|
The prompt or prompts not to guide the image generation. If not defined, one has to pass
|
523
529
|
`negative_prompt_embeds`. instead. Ignored when not using guidance (i.e., ignored if `guidance_scale`
|
@@ -525,8 +531,8 @@ class StableDiffusionKDiffusionPipeline(
|
|
525
531
|
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
526
532
|
The number of images to generate per prompt.
|
527
533
|
eta (`float`, *optional*, defaults to 0.0):
|
528
|
-
Corresponds to parameter eta (η) in the DDIM paper: https://
|
529
|
-
[`schedulers.DDIMScheduler`], will be ignored for others.
|
534
|
+
Corresponds to parameter eta (η) in the DDIM paper: https://huggingface.co/papers/2010.02502. Only
|
535
|
+
applies to [`schedulers.DDIMScheduler`], will be ignored for others.
|
530
536
|
generator (`torch.Generator`, *optional*):
|
531
537
|
One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
|
532
538
|
to make generation deterministic.
|
@@ -588,7 +594,7 @@ class StableDiffusionKDiffusionPipeline(
|
|
588
594
|
|
589
595
|
device = self._execution_device
|
590
596
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
591
|
-
# of the Imagen paper: https://
|
597
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
592
598
|
# corresponds to doing no classifier free guidance.
|
593
599
|
do_classifier_free_guidance = True
|
594
600
|
if guidance_scale <= 1.0:
|
diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -48,7 +48,7 @@ from ...utils import (
|
|
48
48
|
unscale_lora_layers,
|
49
49
|
)
|
50
50
|
from ...utils.torch_utils import randn_tensor
|
51
|
-
from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
51
|
+
from ..pipeline_utils import DeprecatedPipelineMixin, DiffusionPipeline, StableDiffusionMixin
|
52
52
|
from ..stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput
|
53
53
|
|
54
54
|
|
@@ -88,6 +88,7 @@ class ModelWrapper:
|
|
88
88
|
|
89
89
|
|
90
90
|
class StableDiffusionXLKDiffusionPipeline(
|
91
|
+
DeprecatedPipelineMixin,
|
91
92
|
DiffusionPipeline,
|
92
93
|
StableDiffusionMixin,
|
93
94
|
FromSingleFileMixin,
|
@@ -95,6 +96,8 @@ class StableDiffusionXLKDiffusionPipeline(
|
|
95
96
|
TextualInversionLoaderMixin,
|
96
97
|
IPAdapterMixin,
|
97
98
|
):
|
99
|
+
_last_supported_version = "0.33.1"
|
100
|
+
|
98
101
|
r"""
|
99
102
|
Pipeline for text-to-image generation using Stable Diffusion XL and k-diffusion.
|
100
103
|
|
@@ -568,7 +571,7 @@ class StableDiffusionXLKDiffusionPipeline(
|
|
568
571
|
return self._clip_skip
|
569
572
|
|
570
573
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
571
|
-
# of the Imagen paper: https://
|
574
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
572
575
|
# corresponds to doing no classifier free guidance.
|
573
576
|
@property
|
574
577
|
def do_classifier_free_guidance(self):
|
@@ -629,11 +632,11 @@ class StableDiffusionXLKDiffusionPipeline(
|
|
629
632
|
The number of denoising steps. More denoising steps usually lead to a higher quality image at the
|
630
633
|
expense of slower inference.
|
631
634
|
guidance_scale (`float`, *optional*, defaults to 5.0):
|
632
|
-
Guidance scale as defined in [Classifier-Free Diffusion
|
633
|
-
`guidance_scale` is defined as `w` of equation 2.
|
634
|
-
Paper](https://
|
635
|
-
1`. Higher guidance scale encourages to generate images that are closely linked to
|
636
|
-
usually at the expense of lower image quality.
|
635
|
+
Guidance scale as defined in [Classifier-Free Diffusion
|
636
|
+
Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
|
637
|
+
of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
|
638
|
+
`guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
|
639
|
+
the text `prompt`, usually at the expense of lower image quality.
|
637
640
|
negative_prompt (`str` or `List[str]`, *optional*):
|
638
641
|
The prompt or prompts not to guide the image generation. If not defined, one has to pass
|
639
642
|
`negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 The Intel Labs Team Authors and the HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -37,7 +37,7 @@ from ...utils import (
|
|
37
37
|
unscale_lora_layers,
|
38
38
|
)
|
39
39
|
from ...utils.torch_utils import randn_tensor
|
40
|
-
from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
40
|
+
from ..pipeline_utils import DeprecatedPipelineMixin, DiffusionPipeline, StableDiffusionMixin
|
41
41
|
from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
|
42
42
|
|
43
43
|
|
@@ -73,7 +73,7 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
|
|
73
73
|
r"""
|
74
74
|
Rescales `noise_cfg` tensor based on `guidance_rescale` to improve image quality and fix overexposure. Based on
|
75
75
|
Section 3.4 from [Common Diffusion Noise Schedules and Sample Steps are
|
76
|
-
Flawed](https://
|
76
|
+
Flawed](https://huggingface.co/papers/2305.08891).
|
77
77
|
|
78
78
|
Args:
|
79
79
|
noise_cfg (`torch.Tensor`):
|
@@ -178,6 +178,7 @@ class LDM3DPipelineOutput(BaseOutput):
|
|
178
178
|
|
179
179
|
|
180
180
|
class StableDiffusionLDM3DPipeline(
|
181
|
+
DeprecatedPipelineMixin,
|
181
182
|
DiffusionPipeline,
|
182
183
|
StableDiffusionMixin,
|
183
184
|
TextualInversionLoaderMixin,
|
@@ -185,6 +186,8 @@ class StableDiffusionLDM3DPipeline(
|
|
185
186
|
StableDiffusionLoraLoaderMixin,
|
186
187
|
FromSingleFileMixin,
|
187
188
|
):
|
189
|
+
_last_supported_version = "0.33.1"
|
190
|
+
|
188
191
|
r"""
|
189
192
|
Pipeline for text-to-image and 3D generation using LDM3D.
|
190
193
|
|
@@ -573,7 +576,7 @@ class StableDiffusionLDM3DPipeline(
|
|
573
576
|
def prepare_extra_step_kwargs(self, generator, eta):
|
574
577
|
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
575
578
|
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
576
|
-
# eta corresponds to η in DDIM paper: https://
|
579
|
+
# eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
|
577
580
|
# and should be between [0, 1]
|
578
581
|
|
579
582
|
accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
|
@@ -723,7 +726,7 @@ class StableDiffusionLDM3DPipeline(
|
|
723
726
|
return self._clip_skip
|
724
727
|
|
725
728
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
726
|
-
# of the Imagen paper: https://
|
729
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
727
730
|
# corresponds to doing no classifier free guidance.
|
728
731
|
@property
|
729
732
|
def do_classifier_free_guidance(self):
|
@@ -800,8 +803,8 @@ class StableDiffusionLDM3DPipeline(
|
|
800
803
|
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
801
804
|
The number of images to generate per prompt.
|
802
805
|
eta (`float`, *optional*, defaults to 0.0):
|
803
|
-
Corresponds to parameter eta (η) from the [DDIM](https://
|
804
|
-
to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
|
806
|
+
Corresponds to parameter eta (η) from the [DDIM](https://huggingface.co/papers/2010.02502) paper. Only
|
807
|
+
applies to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
|
805
808
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
806
809
|
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
|
807
810
|
generation deterministic.
|
@@ -988,7 +991,7 @@ class StableDiffusionLDM3DPipeline(
|
|
988
991
|
noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
|
989
992
|
|
990
993
|
if self.do_classifier_free_guidance and self.guidance_rescale > 0.0:
|
991
|
-
# Based on 3.4. in https://
|
994
|
+
# Based on 3.4. in https://huggingface.co/papers/2305.08891
|
992
995
|
noise_pred = rescale_noise_cfg(noise_pred, noise_pred_text, guidance_rescale=self.guidance_rescale)
|
993
996
|
|
994
997
|
# compute the previous noisy sample x_t -> x_t-1
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 MultiDiffusion Authors and The HuggingFace Team. All rights reserved."
|
2
2
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
3
3
|
# you may not use this file except in compliance with the License.
|
4
4
|
# You may obtain a copy of the License at
|
@@ -33,7 +33,7 @@ from ...utils import (
|
|
33
33
|
unscale_lora_layers,
|
34
34
|
)
|
35
35
|
from ...utils.torch_utils import randn_tensor
|
36
|
-
from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
36
|
+
from ..pipeline_utils import DeprecatedPipelineMixin, DiffusionPipeline, StableDiffusionMixin
|
37
37
|
from ..stable_diffusion import StableDiffusionPipelineOutput
|
38
38
|
from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
|
39
39
|
|
@@ -73,7 +73,7 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
|
|
73
73
|
r"""
|
74
74
|
Rescales `noise_cfg` tensor based on `guidance_rescale` to improve image quality and fix overexposure. Based on
|
75
75
|
Section 3.4 from [Common Diffusion Noise Schedules and Sample Steps are
|
76
|
-
Flawed](https://
|
76
|
+
Flawed](https://huggingface.co/papers/2305.08891).
|
77
77
|
|
78
78
|
Args:
|
79
79
|
noise_cfg (`torch.Tensor`):
|
@@ -156,12 +156,15 @@ def retrieve_timesteps(
|
|
156
156
|
|
157
157
|
|
158
158
|
class StableDiffusionPanoramaPipeline(
|
159
|
+
DeprecatedPipelineMixin,
|
159
160
|
DiffusionPipeline,
|
160
161
|
StableDiffusionMixin,
|
161
162
|
TextualInversionLoaderMixin,
|
162
163
|
StableDiffusionLoraLoaderMixin,
|
163
164
|
IPAdapterMixin,
|
164
165
|
):
|
166
|
+
_last_supported_version = "0.33.1"
|
167
|
+
|
165
168
|
r"""
|
166
169
|
Pipeline for text-to-image generation using MultiDiffusion.
|
167
170
|
|
@@ -587,7 +590,7 @@ class StableDiffusionPanoramaPipeline(
|
|
587
590
|
def prepare_extra_step_kwargs(self, generator, eta):
|
588
591
|
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
589
592
|
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
590
|
-
# eta corresponds to η in DDIM paper: https://
|
593
|
+
# eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
|
591
594
|
# and should be between [0, 1]
|
592
595
|
|
593
596
|
accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
|
@@ -735,8 +738,8 @@ class StableDiffusionPanoramaPipeline(
|
|
735
738
|
) -> List[Tuple[int, int, int, int]]:
|
736
739
|
"""
|
737
740
|
Generates a list of views based on the given parameters. Here, we define the mappings F_i (see Eq. 7 in the
|
738
|
-
MultiDiffusion paper https://
|
739
|
-
height/width should return 1.
|
741
|
+
MultiDiffusion paper https://huggingface.co/papers/2302.08113). If panorama's height/width < window_size,
|
742
|
+
num_blocks of height/width should return 1.
|
740
743
|
|
741
744
|
Args:
|
742
745
|
panorama_height (int): The height of the panorama.
|
@@ -854,8 +857,8 @@ class StableDiffusionPanoramaPipeline(
|
|
854
857
|
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
855
858
|
The number of images to generate per prompt.
|
856
859
|
eta (`float`, *optional*, defaults to 0.0):
|
857
|
-
Corresponds to parameter eta (η) from the [DDIM](https://
|
858
|
-
to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
|
860
|
+
Corresponds to parameter eta (η) from the [DDIM](https://huggingface.co/papers/2010.02502) paper. Only
|
861
|
+
applies to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
|
859
862
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
860
863
|
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
|
861
864
|
generation deterministic.
|
@@ -962,7 +965,7 @@ class StableDiffusionPanoramaPipeline(
|
|
962
965
|
|
963
966
|
device = self._execution_device
|
964
967
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
965
|
-
# of the Imagen paper: https://
|
968
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
966
969
|
# corresponds to doing no classifier free guidance.
|
967
970
|
do_classifier_free_guidance = guidance_scale > 1.0
|
968
971
|
|
@@ -1054,7 +1057,7 @@ class StableDiffusionPanoramaPipeline(
|
|
1054
1057
|
# Here, we iterate through different spatial crops of the latents and denoise them. These
|
1055
1058
|
# denoised (latent) crops are then averaged to produce the final latent
|
1056
1059
|
# for the current timestep via MultiDiffusion. Please see Sec. 4.1 in the
|
1057
|
-
# MultiDiffusion paper for more details: https://
|
1060
|
+
# MultiDiffusion paper for more details: https://huggingface.co/papers/2302.08113
|
1058
1061
|
# Batch views denoise
|
1059
1062
|
for j, batch_view in enumerate(views_batch):
|
1060
1063
|
vb_size = len(batch_view)
|
@@ -1113,7 +1116,7 @@ class StableDiffusionPanoramaPipeline(
|
|
1113
1116
|
noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
|
1114
1117
|
|
1115
1118
|
if self.do_classifier_free_guidance and self.guidance_rescale > 0.0:
|
1116
|
-
# Based on 3.4. in https://
|
1119
|
+
# Based on 3.4. in https://huggingface.co/papers/2305.08891
|
1117
1120
|
noise_pred = rescale_noise_cfg(
|
1118
1121
|
noise_pred, noise_pred_text, guidance_rescale=self.guidance_rescale
|
1119
1122
|
)
|
@@ -1144,7 +1147,7 @@ class StableDiffusionPanoramaPipeline(
|
|
1144
1147
|
value[:, :, h_start:h_end, w_start:w_end] += latents_view_denoised
|
1145
1148
|
count[:, :, h_start:h_end, w_start:w_end] += 1
|
1146
1149
|
|
1147
|
-
# take the MultiDiffusion step. Eq. 5 in MultiDiffusion paper: https://
|
1150
|
+
# take the MultiDiffusion step. Eq. 5 in MultiDiffusion paper: https://huggingface.co/papers/2302.08113
|
1148
1151
|
latents = torch.where(count > 0, value / count, value)
|
1149
1152
|
|
1150
1153
|
if callback_on_step_end is not None:
|
@@ -14,7 +14,7 @@ from ...models import AutoencoderKL, ImageProjection, UNet2DConditionModel
|
|
14
14
|
from ...schedulers import KarrasDiffusionSchedulers
|
15
15
|
from ...utils import deprecate, is_torch_xla_available, logging
|
16
16
|
from ...utils.torch_utils import randn_tensor
|
17
|
-
from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
17
|
+
from ..pipeline_utils import DeprecatedPipelineMixin, DiffusionPipeline, StableDiffusionMixin
|
18
18
|
from . import StableDiffusionSafePipelineOutput
|
19
19
|
from .safety_checker import SafeStableDiffusionSafetyChecker
|
20
20
|
|
@@ -29,7 +29,9 @@ else:
|
|
29
29
|
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
30
30
|
|
31
31
|
|
32
|
-
class StableDiffusionPipelineSafe(DiffusionPipeline, StableDiffusionMixin, IPAdapterMixin):
|
32
|
+
class StableDiffusionPipelineSafe(DeprecatedPipelineMixin, DiffusionPipeline, StableDiffusionMixin, IPAdapterMixin):
|
33
|
+
_last_supported_version = "0.33.1"
|
34
|
+
|
33
35
|
r"""
|
34
36
|
Pipeline based on the [`StableDiffusionPipeline`] for text-to-image generation using Safe Latent Diffusion.
|
35
37
|
|
@@ -358,7 +360,7 @@ class StableDiffusionPipelineSafe(DiffusionPipeline, StableDiffusionMixin, IPAda
|
|
358
360
|
def prepare_extra_step_kwargs(self, generator, eta):
|
359
361
|
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
360
362
|
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
361
|
-
# eta corresponds to η in DDIM paper: https://
|
363
|
+
# eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
|
362
364
|
# and should be between [0, 1]
|
363
365
|
|
364
366
|
accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
|
@@ -561,8 +563,8 @@ class StableDiffusionPipelineSafe(DiffusionPipeline, StableDiffusionMixin, IPAda
|
|
561
563
|
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
562
564
|
The number of images to generate per prompt.
|
563
565
|
eta (`float`, *optional*, defaults to 0.0):
|
564
|
-
Corresponds to parameter eta (η) from the [DDIM](https://
|
565
|
-
to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
|
566
|
+
Corresponds to parameter eta (η) from the [DDIM](https://huggingface.co/papers/2010.02502) paper. Only
|
567
|
+
applies to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
|
566
568
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
567
569
|
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
|
568
570
|
generation deterministic.
|
@@ -632,7 +634,7 @@ class StableDiffusionPipelineSafe(DiffusionPipeline, StableDiffusionMixin, IPAda
|
|
632
634
|
device = self._execution_device
|
633
635
|
|
634
636
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
635
|
-
# of the Imagen paper: https://
|
637
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
636
638
|
# corresponds to doing no classifier free guidance.
|
637
639
|
do_classifier_free_guidance = guidance_scale > 1.0
|
638
640
|
|