diffusers 0.33.1__py3-none-any.whl → 0.35.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diffusers/__init__.py +145 -1
- diffusers/callbacks.py +35 -0
- diffusers/commands/__init__.py +1 -1
- diffusers/commands/custom_blocks.py +134 -0
- diffusers/commands/diffusers_cli.py +3 -1
- diffusers/commands/env.py +1 -1
- diffusers/commands/fp16_safetensors.py +2 -2
- diffusers/configuration_utils.py +11 -2
- diffusers/dependency_versions_check.py +1 -1
- diffusers/dependency_versions_table.py +3 -3
- diffusers/experimental/rl/value_guided_sampling.py +1 -1
- diffusers/guiders/__init__.py +41 -0
- diffusers/guiders/adaptive_projected_guidance.py +188 -0
- diffusers/guiders/auto_guidance.py +190 -0
- diffusers/guiders/classifier_free_guidance.py +141 -0
- diffusers/guiders/classifier_free_zero_star_guidance.py +152 -0
- diffusers/guiders/frequency_decoupled_guidance.py +327 -0
- diffusers/guiders/guider_utils.py +309 -0
- diffusers/guiders/perturbed_attention_guidance.py +271 -0
- diffusers/guiders/skip_layer_guidance.py +262 -0
- diffusers/guiders/smoothed_energy_guidance.py +251 -0
- diffusers/guiders/tangential_classifier_free_guidance.py +143 -0
- diffusers/hooks/__init__.py +17 -0
- diffusers/hooks/_common.py +56 -0
- diffusers/hooks/_helpers.py +293 -0
- diffusers/hooks/faster_cache.py +9 -8
- diffusers/hooks/first_block_cache.py +259 -0
- diffusers/hooks/group_offloading.py +332 -227
- diffusers/hooks/hooks.py +58 -3
- diffusers/hooks/layer_skip.py +263 -0
- diffusers/hooks/layerwise_casting.py +5 -10
- diffusers/hooks/pyramid_attention_broadcast.py +15 -12
- diffusers/hooks/smoothed_energy_guidance_utils.py +167 -0
- diffusers/hooks/utils.py +43 -0
- diffusers/image_processor.py +7 -2
- diffusers/loaders/__init__.py +10 -0
- diffusers/loaders/ip_adapter.py +260 -18
- diffusers/loaders/lora_base.py +261 -127
- diffusers/loaders/lora_conversion_utils.py +657 -35
- diffusers/loaders/lora_pipeline.py +2778 -1246
- diffusers/loaders/peft.py +78 -112
- diffusers/loaders/single_file.py +2 -2
- diffusers/loaders/single_file_model.py +64 -15
- diffusers/loaders/single_file_utils.py +395 -7
- diffusers/loaders/textual_inversion.py +3 -2
- diffusers/loaders/transformer_flux.py +10 -11
- diffusers/loaders/transformer_sd3.py +8 -3
- diffusers/loaders/unet.py +24 -21
- diffusers/loaders/unet_loader_utils.py +6 -3
- diffusers/loaders/utils.py +1 -1
- diffusers/models/__init__.py +23 -1
- diffusers/models/activations.py +5 -5
- diffusers/models/adapter.py +2 -3
- diffusers/models/attention.py +488 -7
- diffusers/models/attention_dispatch.py +1218 -0
- diffusers/models/attention_flax.py +10 -10
- diffusers/models/attention_processor.py +113 -667
- diffusers/models/auto_model.py +49 -12
- diffusers/models/autoencoders/__init__.py +2 -0
- diffusers/models/autoencoders/autoencoder_asym_kl.py +4 -4
- diffusers/models/autoencoders/autoencoder_dc.py +17 -4
- diffusers/models/autoencoders/autoencoder_kl.py +5 -5
- diffusers/models/autoencoders/autoencoder_kl_allegro.py +4 -4
- diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +6 -6
- diffusers/models/autoencoders/autoencoder_kl_cosmos.py +1110 -0
- diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py +2 -2
- diffusers/models/autoencoders/autoencoder_kl_ltx.py +3 -3
- diffusers/models/autoencoders/autoencoder_kl_magvit.py +4 -4
- diffusers/models/autoencoders/autoencoder_kl_mochi.py +3 -3
- diffusers/models/autoencoders/autoencoder_kl_qwenimage.py +1070 -0
- diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +4 -4
- diffusers/models/autoencoders/autoencoder_kl_wan.py +626 -62
- diffusers/models/autoencoders/autoencoder_oobleck.py +1 -1
- diffusers/models/autoencoders/autoencoder_tiny.py +3 -3
- diffusers/models/autoencoders/consistency_decoder_vae.py +1 -1
- diffusers/models/autoencoders/vae.py +13 -2
- diffusers/models/autoencoders/vq_model.py +2 -2
- diffusers/models/cache_utils.py +32 -10
- diffusers/models/controlnet.py +1 -1
- diffusers/models/controlnet_flux.py +1 -1
- diffusers/models/controlnet_sd3.py +1 -1
- diffusers/models/controlnet_sparsectrl.py +1 -1
- diffusers/models/controlnets/__init__.py +1 -0
- diffusers/models/controlnets/controlnet.py +3 -3
- diffusers/models/controlnets/controlnet_flax.py +1 -1
- diffusers/models/controlnets/controlnet_flux.py +21 -20
- diffusers/models/controlnets/controlnet_hunyuan.py +2 -2
- diffusers/models/controlnets/controlnet_sana.py +290 -0
- diffusers/models/controlnets/controlnet_sd3.py +1 -1
- diffusers/models/controlnets/controlnet_sparsectrl.py +2 -2
- diffusers/models/controlnets/controlnet_union.py +5 -5
- diffusers/models/controlnets/controlnet_xs.py +7 -7
- diffusers/models/controlnets/multicontrolnet.py +4 -5
- diffusers/models/controlnets/multicontrolnet_union.py +5 -6
- diffusers/models/downsampling.py +2 -2
- diffusers/models/embeddings.py +36 -46
- diffusers/models/embeddings_flax.py +2 -2
- diffusers/models/lora.py +3 -3
- diffusers/models/model_loading_utils.py +233 -1
- diffusers/models/modeling_flax_utils.py +1 -2
- diffusers/models/modeling_utils.py +203 -108
- diffusers/models/normalization.py +4 -4
- diffusers/models/resnet.py +2 -2
- diffusers/models/resnet_flax.py +1 -1
- diffusers/models/transformers/__init__.py +7 -0
- diffusers/models/transformers/auraflow_transformer_2d.py +70 -24
- diffusers/models/transformers/cogvideox_transformer_3d.py +1 -1
- diffusers/models/transformers/consisid_transformer_3d.py +1 -1
- diffusers/models/transformers/dit_transformer_2d.py +2 -2
- diffusers/models/transformers/dual_transformer_2d.py +1 -1
- diffusers/models/transformers/hunyuan_transformer_2d.py +2 -2
- diffusers/models/transformers/latte_transformer_3d.py +4 -5
- diffusers/models/transformers/lumina_nextdit2d.py +2 -2
- diffusers/models/transformers/pixart_transformer_2d.py +3 -3
- diffusers/models/transformers/prior_transformer.py +1 -1
- diffusers/models/transformers/sana_transformer.py +8 -3
- diffusers/models/transformers/stable_audio_transformer.py +5 -9
- diffusers/models/transformers/t5_film_transformer.py +3 -3
- diffusers/models/transformers/transformer_2d.py +1 -1
- diffusers/models/transformers/transformer_allegro.py +1 -1
- diffusers/models/transformers/transformer_chroma.py +641 -0
- diffusers/models/transformers/transformer_cogview3plus.py +5 -10
- diffusers/models/transformers/transformer_cogview4.py +353 -27
- diffusers/models/transformers/transformer_cosmos.py +586 -0
- diffusers/models/transformers/transformer_flux.py +376 -138
- diffusers/models/transformers/transformer_hidream_image.py +942 -0
- diffusers/models/transformers/transformer_hunyuan_video.py +12 -8
- diffusers/models/transformers/transformer_hunyuan_video_framepack.py +416 -0
- diffusers/models/transformers/transformer_ltx.py +105 -24
- diffusers/models/transformers/transformer_lumina2.py +1 -1
- diffusers/models/transformers/transformer_mochi.py +1 -1
- diffusers/models/transformers/transformer_omnigen.py +2 -2
- diffusers/models/transformers/transformer_qwenimage.py +645 -0
- diffusers/models/transformers/transformer_sd3.py +7 -7
- diffusers/models/transformers/transformer_skyreels_v2.py +607 -0
- diffusers/models/transformers/transformer_temporal.py +1 -1
- diffusers/models/transformers/transformer_wan.py +316 -87
- diffusers/models/transformers/transformer_wan_vace.py +387 -0
- diffusers/models/unets/unet_1d.py +1 -1
- diffusers/models/unets/unet_1d_blocks.py +1 -1
- diffusers/models/unets/unet_2d.py +1 -1
- diffusers/models/unets/unet_2d_blocks.py +1 -1
- diffusers/models/unets/unet_2d_blocks_flax.py +8 -7
- diffusers/models/unets/unet_2d_condition.py +4 -3
- diffusers/models/unets/unet_2d_condition_flax.py +2 -2
- diffusers/models/unets/unet_3d_blocks.py +1 -1
- diffusers/models/unets/unet_3d_condition.py +3 -3
- diffusers/models/unets/unet_i2vgen_xl.py +3 -3
- diffusers/models/unets/unet_kandinsky3.py +1 -1
- diffusers/models/unets/unet_motion_model.py +2 -2
- diffusers/models/unets/unet_stable_cascade.py +1 -1
- diffusers/models/upsampling.py +2 -2
- diffusers/models/vae_flax.py +2 -2
- diffusers/models/vq_model.py +1 -1
- diffusers/modular_pipelines/__init__.py +83 -0
- diffusers/modular_pipelines/components_manager.py +1068 -0
- diffusers/modular_pipelines/flux/__init__.py +66 -0
- diffusers/modular_pipelines/flux/before_denoise.py +689 -0
- diffusers/modular_pipelines/flux/decoders.py +109 -0
- diffusers/modular_pipelines/flux/denoise.py +227 -0
- diffusers/modular_pipelines/flux/encoders.py +412 -0
- diffusers/modular_pipelines/flux/modular_blocks.py +181 -0
- diffusers/modular_pipelines/flux/modular_pipeline.py +59 -0
- diffusers/modular_pipelines/modular_pipeline.py +2446 -0
- diffusers/modular_pipelines/modular_pipeline_utils.py +672 -0
- diffusers/modular_pipelines/node_utils.py +665 -0
- diffusers/modular_pipelines/stable_diffusion_xl/__init__.py +77 -0
- diffusers/modular_pipelines/stable_diffusion_xl/before_denoise.py +1874 -0
- diffusers/modular_pipelines/stable_diffusion_xl/decoders.py +208 -0
- diffusers/modular_pipelines/stable_diffusion_xl/denoise.py +771 -0
- diffusers/modular_pipelines/stable_diffusion_xl/encoders.py +887 -0
- diffusers/modular_pipelines/stable_diffusion_xl/modular_blocks.py +380 -0
- diffusers/modular_pipelines/stable_diffusion_xl/modular_pipeline.py +365 -0
- diffusers/modular_pipelines/wan/__init__.py +66 -0
- diffusers/modular_pipelines/wan/before_denoise.py +365 -0
- diffusers/modular_pipelines/wan/decoders.py +105 -0
- diffusers/modular_pipelines/wan/denoise.py +261 -0
- diffusers/modular_pipelines/wan/encoders.py +242 -0
- diffusers/modular_pipelines/wan/modular_blocks.py +144 -0
- diffusers/modular_pipelines/wan/modular_pipeline.py +90 -0
- diffusers/pipelines/__init__.py +68 -6
- diffusers/pipelines/allegro/pipeline_allegro.py +11 -11
- diffusers/pipelines/amused/pipeline_amused.py +7 -6
- diffusers/pipelines/amused/pipeline_amused_img2img.py +6 -5
- diffusers/pipelines/amused/pipeline_amused_inpaint.py +6 -5
- diffusers/pipelines/animatediff/pipeline_animatediff.py +6 -6
- diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +6 -6
- diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +16 -15
- diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +6 -6
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +5 -5
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +5 -5
- diffusers/pipelines/audioldm/pipeline_audioldm.py +8 -7
- diffusers/pipelines/audioldm2/modeling_audioldm2.py +1 -1
- diffusers/pipelines/audioldm2/pipeline_audioldm2.py +22 -13
- diffusers/pipelines/aura_flow/pipeline_aura_flow.py +48 -11
- diffusers/pipelines/auto_pipeline.py +23 -20
- diffusers/pipelines/blip_diffusion/modeling_blip2.py +1 -1
- diffusers/pipelines/blip_diffusion/modeling_ctx_clip.py +2 -2
- diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +11 -10
- diffusers/pipelines/chroma/__init__.py +49 -0
- diffusers/pipelines/chroma/pipeline_chroma.py +949 -0
- diffusers/pipelines/chroma/pipeline_chroma_img2img.py +1034 -0
- diffusers/pipelines/chroma/pipeline_output.py +21 -0
- diffusers/pipelines/cogvideo/pipeline_cogvideox.py +17 -16
- diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +17 -16
- diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +18 -17
- diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +17 -16
- diffusers/pipelines/cogview3/pipeline_cogview3plus.py +9 -9
- diffusers/pipelines/cogview4/pipeline_cogview4.py +23 -22
- diffusers/pipelines/cogview4/pipeline_cogview4_control.py +7 -7
- diffusers/pipelines/consisid/consisid_utils.py +2 -2
- diffusers/pipelines/consisid/pipeline_consisid.py +8 -8
- diffusers/pipelines/consistency_models/pipeline_consistency_models.py +1 -1
- diffusers/pipelines/controlnet/pipeline_controlnet.py +7 -7
- diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +11 -10
- diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +7 -7
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +7 -7
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +14 -14
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +10 -6
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +13 -13
- diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +226 -107
- diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +12 -8
- diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +207 -105
- diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +1 -1
- diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +8 -8
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +7 -7
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +7 -7
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +12 -10
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +9 -7
- diffusers/pipelines/cosmos/__init__.py +54 -0
- diffusers/pipelines/cosmos/pipeline_cosmos2_text2image.py +673 -0
- diffusers/pipelines/cosmos/pipeline_cosmos2_video2world.py +792 -0
- diffusers/pipelines/cosmos/pipeline_cosmos_text2world.py +664 -0
- diffusers/pipelines/cosmos/pipeline_cosmos_video2world.py +826 -0
- diffusers/pipelines/cosmos/pipeline_output.py +40 -0
- diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py +5 -4
- diffusers/pipelines/ddim/pipeline_ddim.py +4 -4
- diffusers/pipelines/ddpm/pipeline_ddpm.py +1 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +10 -10
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +8 -8
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +5 -5
- diffusers/pipelines/deprecated/audio_diffusion/mel.py +1 -1
- diffusers/pipelines/deprecated/audio_diffusion/pipeline_audio_diffusion.py +3 -3
- diffusers/pipelines/deprecated/latent_diffusion_uncond/pipeline_latent_diffusion_uncond.py +1 -1
- diffusers/pipelines/deprecated/pndm/pipeline_pndm.py +2 -2
- diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +4 -3
- diffusers/pipelines/deprecated/score_sde_ve/pipeline_score_sde_ve.py +1 -1
- diffusers/pipelines/deprecated/spectrogram_diffusion/continuous_encoder.py +1 -1
- diffusers/pipelines/deprecated/spectrogram_diffusion/midi_utils.py +1 -1
- diffusers/pipelines/deprecated/spectrogram_diffusion/notes_encoder.py +1 -1
- diffusers/pipelines/deprecated/spectrogram_diffusion/pipeline_spectrogram_diffusion.py +1 -1
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +8 -8
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_onnx_stable_diffusion_inpaint_legacy.py +9 -9
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +10 -10
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +10 -8
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +5 -5
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +18 -18
- diffusers/pipelines/deprecated/stochastic_karras_ve/pipeline_stochastic_karras_ve.py +1 -1
- diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +2 -2
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +6 -6
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +5 -5
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +5 -5
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +5 -5
- diffusers/pipelines/deprecated/vq_diffusion/pipeline_vq_diffusion.py +1 -1
- diffusers/pipelines/dit/pipeline_dit.py +4 -2
- diffusers/pipelines/easyanimate/pipeline_easyanimate.py +4 -4
- diffusers/pipelines/easyanimate/pipeline_easyanimate_control.py +4 -4
- diffusers/pipelines/easyanimate/pipeline_easyanimate_inpaint.py +7 -6
- diffusers/pipelines/flux/__init__.py +4 -0
- diffusers/pipelines/flux/modeling_flux.py +1 -1
- diffusers/pipelines/flux/pipeline_flux.py +37 -36
- diffusers/pipelines/flux/pipeline_flux_control.py +9 -9
- diffusers/pipelines/flux/pipeline_flux_control_img2img.py +7 -7
- diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +7 -7
- diffusers/pipelines/flux/pipeline_flux_controlnet.py +7 -7
- diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +31 -23
- diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +3 -2
- diffusers/pipelines/flux/pipeline_flux_fill.py +7 -7
- diffusers/pipelines/flux/pipeline_flux_img2img.py +40 -7
- diffusers/pipelines/flux/pipeline_flux_inpaint.py +12 -7
- diffusers/pipelines/flux/pipeline_flux_kontext.py +1134 -0
- diffusers/pipelines/flux/pipeline_flux_kontext_inpaint.py +1460 -0
- diffusers/pipelines/flux/pipeline_flux_prior_redux.py +2 -2
- diffusers/pipelines/flux/pipeline_output.py +6 -4
- diffusers/pipelines/free_init_utils.py +2 -2
- diffusers/pipelines/free_noise_utils.py +3 -3
- diffusers/pipelines/hidream_image/__init__.py +47 -0
- diffusers/pipelines/hidream_image/pipeline_hidream_image.py +1026 -0
- diffusers/pipelines/hidream_image/pipeline_output.py +35 -0
- diffusers/pipelines/hunyuan_video/__init__.py +2 -0
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_skyreels_image2video.py +8 -8
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +26 -25
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_framepack.py +1114 -0
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_image2video.py +71 -15
- diffusers/pipelines/hunyuan_video/pipeline_output.py +19 -0
- diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +8 -8
- diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +10 -8
- diffusers/pipelines/kandinsky/pipeline_kandinsky.py +6 -6
- diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +34 -34
- diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +19 -26
- diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +7 -7
- diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +11 -11
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +6 -6
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +35 -35
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +6 -6
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +17 -39
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +17 -45
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +7 -7
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +10 -10
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +10 -10
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +7 -7
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +17 -38
- diffusers/pipelines/kolors/pipeline_kolors.py +10 -10
- diffusers/pipelines/kolors/pipeline_kolors_img2img.py +12 -12
- diffusers/pipelines/kolors/text_encoder.py +3 -3
- diffusers/pipelines/kolors/tokenizer.py +1 -1
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +2 -2
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +2 -2
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +1 -1
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion_superresolution.py +3 -3
- diffusers/pipelines/latte/pipeline_latte.py +12 -12
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +13 -13
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +17 -16
- diffusers/pipelines/ltx/__init__.py +4 -0
- diffusers/pipelines/ltx/modeling_latent_upsampler.py +188 -0
- diffusers/pipelines/ltx/pipeline_ltx.py +64 -18
- diffusers/pipelines/ltx/pipeline_ltx_condition.py +117 -38
- diffusers/pipelines/ltx/pipeline_ltx_image2video.py +63 -18
- diffusers/pipelines/ltx/pipeline_ltx_latent_upsample.py +277 -0
- diffusers/pipelines/lumina/pipeline_lumina.py +13 -13
- diffusers/pipelines/lumina2/pipeline_lumina2.py +10 -10
- diffusers/pipelines/marigold/marigold_image_processing.py +2 -2
- diffusers/pipelines/mochi/pipeline_mochi.py +15 -14
- diffusers/pipelines/musicldm/pipeline_musicldm.py +16 -13
- diffusers/pipelines/omnigen/pipeline_omnigen.py +13 -11
- diffusers/pipelines/omnigen/processor_omnigen.py +8 -3
- diffusers/pipelines/onnx_utils.py +15 -2
- diffusers/pipelines/pag/pag_utils.py +2 -2
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +12 -8
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +7 -7
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +10 -6
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +14 -14
- diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +8 -8
- diffusers/pipelines/pag/pipeline_pag_kolors.py +10 -10
- diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +11 -11
- diffusers/pipelines/pag/pipeline_pag_sana.py +18 -12
- diffusers/pipelines/pag/pipeline_pag_sd.py +8 -8
- diffusers/pipelines/pag/pipeline_pag_sd_3.py +7 -7
- diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py +7 -7
- diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +6 -6
- diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +5 -5
- diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py +8 -8
- diffusers/pipelines/pag/pipeline_pag_sd_xl.py +16 -15
- diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +18 -17
- diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +12 -12
- diffusers/pipelines/paint_by_example/image_encoder.py +1 -1
- diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +8 -7
- diffusers/pipelines/pia/pipeline_pia.py +8 -6
- diffusers/pipelines/pipeline_flax_utils.py +5 -6
- diffusers/pipelines/pipeline_loading_utils.py +113 -15
- diffusers/pipelines/pipeline_utils.py +127 -48
- diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +14 -12
- diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +31 -11
- diffusers/pipelines/qwenimage/__init__.py +55 -0
- diffusers/pipelines/qwenimage/pipeline_output.py +21 -0
- diffusers/pipelines/qwenimage/pipeline_qwenimage.py +726 -0
- diffusers/pipelines/qwenimage/pipeline_qwenimage_edit.py +882 -0
- diffusers/pipelines/qwenimage/pipeline_qwenimage_img2img.py +829 -0
- diffusers/pipelines/qwenimage/pipeline_qwenimage_inpaint.py +1015 -0
- diffusers/pipelines/sana/__init__.py +4 -0
- diffusers/pipelines/sana/pipeline_sana.py +23 -21
- diffusers/pipelines/sana/pipeline_sana_controlnet.py +1106 -0
- diffusers/pipelines/sana/pipeline_sana_sprint.py +23 -19
- diffusers/pipelines/sana/pipeline_sana_sprint_img2img.py +981 -0
- diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +7 -6
- diffusers/pipelines/shap_e/camera.py +1 -1
- diffusers/pipelines/shap_e/pipeline_shap_e.py +1 -1
- diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +1 -1
- diffusers/pipelines/shap_e/renderer.py +3 -3
- diffusers/pipelines/skyreels_v2/__init__.py +59 -0
- diffusers/pipelines/skyreels_v2/pipeline_output.py +20 -0
- diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2.py +610 -0
- diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_diffusion_forcing.py +978 -0
- diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_diffusion_forcing_i2v.py +1059 -0
- diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_diffusion_forcing_v2v.py +1063 -0
- diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_i2v.py +745 -0
- diffusers/pipelines/stable_audio/modeling_stable_audio.py +1 -1
- diffusers/pipelines/stable_audio/pipeline_stable_audio.py +5 -5
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +8 -8
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +13 -13
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +9 -9
- diffusers/pipelines/stable_diffusion/__init__.py +0 -7
- diffusers/pipelines/stable_diffusion/clip_image_project_model.py +1 -1
- diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +11 -4
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_img2img.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_inpaint.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +12 -11
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py +10 -10
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +11 -11
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +10 -10
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +10 -9
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +4 -4
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +7 -7
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +5 -5
- diffusers/pipelines/stable_diffusion/safety_checker.py +1 -1
- diffusers/pipelines/stable_diffusion/safety_checker_flax.py +1 -1
- diffusers/pipelines/stable_diffusion/stable_unclip_image_normalizer.py +1 -1
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +13 -12
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +7 -7
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +7 -7
- diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +12 -8
- diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +15 -9
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +11 -9
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +11 -9
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +18 -12
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +11 -8
- diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +11 -8
- diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +15 -12
- diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +8 -6
- diffusers/pipelines/stable_diffusion_safe/safety_checker.py +1 -1
- diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +15 -11
- diffusers/pipelines/stable_diffusion_xl/pipeline_flax_stable_diffusion_xl.py +1 -1
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +16 -15
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +18 -17
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +12 -12
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +16 -15
- diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +3 -3
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +12 -12
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +18 -17
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +12 -7
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +12 -7
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +15 -13
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +24 -21
- diffusers/pipelines/unclip/pipeline_unclip.py +4 -3
- diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +4 -3
- diffusers/pipelines/unclip/text_proj.py +2 -2
- diffusers/pipelines/unidiffuser/modeling_text_decoder.py +2 -2
- diffusers/pipelines/unidiffuser/modeling_uvit.py +1 -1
- diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +8 -7
- diffusers/pipelines/visualcloze/__init__.py +52 -0
- diffusers/pipelines/visualcloze/pipeline_visualcloze_combined.py +444 -0
- diffusers/pipelines/visualcloze/pipeline_visualcloze_generation.py +952 -0
- diffusers/pipelines/visualcloze/visualcloze_utils.py +251 -0
- diffusers/pipelines/wan/__init__.py +2 -0
- diffusers/pipelines/wan/pipeline_wan.py +91 -30
- diffusers/pipelines/wan/pipeline_wan_i2v.py +145 -45
- diffusers/pipelines/wan/pipeline_wan_vace.py +975 -0
- diffusers/pipelines/wan/pipeline_wan_video2video.py +14 -16
- diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py +1 -1
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_diffnext.py +1 -1
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +1 -1
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +8 -8
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_combined.py +16 -15
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +6 -6
- diffusers/quantizers/__init__.py +3 -1
- diffusers/quantizers/base.py +17 -1
- diffusers/quantizers/bitsandbytes/bnb_quantizer.py +4 -0
- diffusers/quantizers/bitsandbytes/utils.py +10 -7
- diffusers/quantizers/gguf/gguf_quantizer.py +13 -4
- diffusers/quantizers/gguf/utils.py +108 -16
- diffusers/quantizers/pipe_quant_config.py +202 -0
- diffusers/quantizers/quantization_config.py +18 -16
- diffusers/quantizers/quanto/quanto_quantizer.py +4 -0
- diffusers/quantizers/torchao/torchao_quantizer.py +31 -1
- diffusers/schedulers/__init__.py +3 -1
- diffusers/schedulers/deprecated/scheduling_karras_ve.py +4 -3
- diffusers/schedulers/deprecated/scheduling_sde_vp.py +1 -1
- diffusers/schedulers/scheduling_consistency_models.py +1 -1
- diffusers/schedulers/scheduling_cosine_dpmsolver_multistep.py +10 -5
- diffusers/schedulers/scheduling_ddim.py +8 -8
- diffusers/schedulers/scheduling_ddim_cogvideox.py +5 -5
- diffusers/schedulers/scheduling_ddim_flax.py +6 -6
- diffusers/schedulers/scheduling_ddim_inverse.py +6 -6
- diffusers/schedulers/scheduling_ddim_parallel.py +22 -22
- diffusers/schedulers/scheduling_ddpm.py +9 -9
- diffusers/schedulers/scheduling_ddpm_flax.py +7 -7
- diffusers/schedulers/scheduling_ddpm_parallel.py +18 -18
- diffusers/schedulers/scheduling_ddpm_wuerstchen.py +2 -2
- diffusers/schedulers/scheduling_deis_multistep.py +16 -9
- diffusers/schedulers/scheduling_dpm_cogvideox.py +5 -5
- diffusers/schedulers/scheduling_dpmsolver_multistep.py +18 -12
- diffusers/schedulers/scheduling_dpmsolver_multistep_flax.py +22 -20
- diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +11 -11
- diffusers/schedulers/scheduling_dpmsolver_sde.py +2 -2
- diffusers/schedulers/scheduling_dpmsolver_singlestep.py +19 -13
- diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py +13 -8
- diffusers/schedulers/scheduling_edm_euler.py +20 -11
- diffusers/schedulers/scheduling_euler_ancestral_discrete.py +3 -3
- diffusers/schedulers/scheduling_euler_discrete.py +3 -3
- diffusers/schedulers/scheduling_euler_discrete_flax.py +3 -3
- diffusers/schedulers/scheduling_flow_match_euler_discrete.py +20 -5
- diffusers/schedulers/scheduling_flow_match_heun_discrete.py +1 -1
- diffusers/schedulers/scheduling_flow_match_lcm.py +561 -0
- diffusers/schedulers/scheduling_heun_discrete.py +2 -2
- diffusers/schedulers/scheduling_ipndm.py +2 -2
- diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +2 -2
- diffusers/schedulers/scheduling_k_dpm_2_discrete.py +2 -2
- diffusers/schedulers/scheduling_karras_ve_flax.py +5 -5
- diffusers/schedulers/scheduling_lcm.py +3 -3
- diffusers/schedulers/scheduling_lms_discrete.py +2 -2
- diffusers/schedulers/scheduling_lms_discrete_flax.py +1 -1
- diffusers/schedulers/scheduling_pndm.py +4 -4
- diffusers/schedulers/scheduling_pndm_flax.py +4 -4
- diffusers/schedulers/scheduling_repaint.py +9 -9
- diffusers/schedulers/scheduling_sasolver.py +15 -15
- diffusers/schedulers/scheduling_scm.py +1 -2
- diffusers/schedulers/scheduling_sde_ve.py +1 -1
- diffusers/schedulers/scheduling_sde_ve_flax.py +2 -2
- diffusers/schedulers/scheduling_tcd.py +3 -3
- diffusers/schedulers/scheduling_unclip.py +5 -5
- diffusers/schedulers/scheduling_unipc_multistep.py +21 -12
- diffusers/schedulers/scheduling_utils.py +3 -3
- diffusers/schedulers/scheduling_utils_flax.py +2 -2
- diffusers/schedulers/scheduling_vq_diffusion.py +1 -1
- diffusers/training_utils.py +91 -5
- diffusers/utils/__init__.py +15 -0
- diffusers/utils/accelerate_utils.py +1 -1
- diffusers/utils/constants.py +4 -0
- diffusers/utils/doc_utils.py +1 -1
- diffusers/utils/dummy_pt_objects.py +432 -0
- diffusers/utils/dummy_torch_and_transformers_objects.py +480 -0
- diffusers/utils/dynamic_modules_utils.py +85 -8
- diffusers/utils/export_utils.py +1 -1
- diffusers/utils/hub_utils.py +33 -17
- diffusers/utils/import_utils.py +151 -18
- diffusers/utils/logging.py +1 -1
- diffusers/utils/outputs.py +2 -1
- diffusers/utils/peft_utils.py +96 -10
- diffusers/utils/state_dict_utils.py +20 -3
- diffusers/utils/testing_utils.py +195 -17
- diffusers/utils/torch_utils.py +43 -5
- diffusers/video_processor.py +2 -2
- {diffusers-0.33.1.dist-info → diffusers-0.35.0.dist-info}/METADATA +72 -57
- diffusers-0.35.0.dist-info/RECORD +703 -0
- {diffusers-0.33.1.dist-info → diffusers-0.35.0.dist-info}/WHEEL +1 -1
- diffusers-0.33.1.dist-info/RECORD +0 -608
- {diffusers-0.33.1.dist-info → diffusers-0.35.0.dist-info}/LICENSE +0 -0
- {diffusers-0.33.1.dist-info → diffusers-0.35.0.dist-info}/entry_points.txt +0 -0
- {diffusers-0.33.1.dist-info → diffusers-0.35.0.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -19,7 +19,6 @@ from typing import Any, Callable, Dict, List, Optional, Tuple, Union
|
|
19
19
|
import numpy as np
|
20
20
|
import PIL.Image
|
21
21
|
import torch
|
22
|
-
import torch.nn.functional as F
|
23
22
|
from transformers import (
|
24
23
|
CLIPImageProcessor,
|
25
24
|
CLIPTextModel,
|
@@ -38,7 +37,13 @@ from ...loaders import (
|
|
38
37
|
StableDiffusionXLLoraLoaderMixin,
|
39
38
|
TextualInversionLoaderMixin,
|
40
39
|
)
|
41
|
-
from ...models import
|
40
|
+
from ...models import (
|
41
|
+
AutoencoderKL,
|
42
|
+
ControlNetUnionModel,
|
43
|
+
ImageProjection,
|
44
|
+
MultiControlNetUnionModel,
|
45
|
+
UNet2DConditionModel,
|
46
|
+
)
|
42
47
|
from ...models.attention_processor import (
|
43
48
|
AttnProcessor2_0,
|
44
49
|
XFormersAttnProcessor,
|
@@ -53,7 +58,7 @@ from ...utils import (
|
|
53
58
|
scale_lora_layers,
|
54
59
|
unscale_lora_layers,
|
55
60
|
)
|
56
|
-
from ...utils.torch_utils import is_compiled_module, randn_tensor
|
61
|
+
from ...utils.torch_utils import empty_device_cache, is_compiled_module, randn_tensor
|
57
62
|
from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
58
63
|
from ..stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput
|
59
64
|
|
@@ -262,7 +267,9 @@ class StableDiffusionXLControlNetUnionImg2ImgPipeline(
|
|
262
267
|
tokenizer: CLIPTokenizer,
|
263
268
|
tokenizer_2: CLIPTokenizer,
|
264
269
|
unet: UNet2DConditionModel,
|
265
|
-
controlnet:
|
270
|
+
controlnet: Union[
|
271
|
+
ControlNetUnionModel, List[ControlNetUnionModel], Tuple[ControlNetUnionModel], MultiControlNetUnionModel
|
272
|
+
],
|
266
273
|
scheduler: KarrasDiffusionSchedulers,
|
267
274
|
requires_aesthetics_score: bool = False,
|
268
275
|
force_zeros_for_empty_prompt: bool = True,
|
@@ -272,8 +279,8 @@ class StableDiffusionXLControlNetUnionImg2ImgPipeline(
|
|
272
279
|
):
|
273
280
|
super().__init__()
|
274
281
|
|
275
|
-
if
|
276
|
-
|
282
|
+
if isinstance(controlnet, (list, tuple)):
|
283
|
+
controlnet = MultiControlNetUnionModel(controlnet)
|
277
284
|
|
278
285
|
self.register_modules(
|
279
286
|
vae=vae,
|
@@ -616,7 +623,7 @@ class StableDiffusionXLControlNetUnionImg2ImgPipeline(
|
|
616
623
|
def prepare_extra_step_kwargs(self, generator, eta):
|
617
624
|
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
618
625
|
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
619
|
-
# eta corresponds to η in DDIM paper: https://
|
626
|
+
# eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
|
620
627
|
# and should be between [0, 1]
|
621
628
|
|
622
629
|
accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
|
@@ -649,6 +656,7 @@ class StableDiffusionXLControlNetUnionImg2ImgPipeline(
|
|
649
656
|
controlnet_conditioning_scale=1.0,
|
650
657
|
control_guidance_start=0.0,
|
651
658
|
control_guidance_end=1.0,
|
659
|
+
control_mode=None,
|
652
660
|
callback_on_step_end_tensor_inputs=None,
|
653
661
|
):
|
654
662
|
if strength < 0 or strength > 1:
|
@@ -722,28 +730,44 @@ class StableDiffusionXLControlNetUnionImg2ImgPipeline(
|
|
722
730
|
"If `negative_prompt_embeds` are provided, `negative_pooled_prompt_embeds` also have to be passed. Make sure to generate `negative_pooled_prompt_embeds` from the same text encoder that was used to generate `negative_prompt_embeds`."
|
723
731
|
)
|
724
732
|
|
733
|
+
# `prompt` needs more sophisticated handling when there are multiple
|
734
|
+
# conditionings.
|
735
|
+
if isinstance(self.controlnet, MultiControlNetUnionModel):
|
736
|
+
if isinstance(prompt, list):
|
737
|
+
logger.warning(
|
738
|
+
f"You have {len(self.controlnet.nets)} ControlNets and you have passed {len(prompt)}"
|
739
|
+
" prompts. The conditionings will be fixed across the prompts."
|
740
|
+
)
|
741
|
+
|
725
742
|
# Check `image`
|
726
|
-
|
727
|
-
|
728
|
-
)
|
729
|
-
|
730
|
-
|
731
|
-
|
732
|
-
|
733
|
-
|
734
|
-
|
735
|
-
|
736
|
-
|
737
|
-
|
738
|
-
|
739
|
-
|
740
|
-
|
741
|
-
|
742
|
-
|
743
|
+
controlnet = self.controlnet._orig_mod if is_compiled_module(self.controlnet) else self.controlnet
|
744
|
+
|
745
|
+
if isinstance(controlnet, ControlNetUnionModel):
|
746
|
+
for image_ in image:
|
747
|
+
self.check_image(image_, prompt, prompt_embeds)
|
748
|
+
elif isinstance(controlnet, MultiControlNetUnionModel):
|
749
|
+
if not isinstance(image, list):
|
750
|
+
raise TypeError("For multiple controlnets: `image` must be type `list`")
|
751
|
+
elif not all(isinstance(i, list) for i in image):
|
752
|
+
raise ValueError("For multiple controlnets: elements of `image` must be list of conditionings.")
|
753
|
+
elif len(image) != len(self.controlnet.nets):
|
754
|
+
raise ValueError(
|
755
|
+
f"For multiple controlnets: `image` must have the same length as the number of controlnets, but got {len(image)} images and {len(self.controlnet.nets)} ControlNets."
|
756
|
+
)
|
757
|
+
|
758
|
+
for images_ in image:
|
759
|
+
for image_ in images_:
|
760
|
+
self.check_image(image_, prompt, prompt_embeds)
|
743
761
|
|
744
762
|
if not isinstance(control_guidance_start, (tuple, list)):
|
745
763
|
control_guidance_start = [control_guidance_start]
|
746
764
|
|
765
|
+
if isinstance(controlnet, MultiControlNetUnionModel):
|
766
|
+
if len(control_guidance_start) != len(self.controlnet.nets):
|
767
|
+
raise ValueError(
|
768
|
+
f"`control_guidance_start`: {control_guidance_start} has {len(control_guidance_start)} elements but there are {len(self.controlnet.nets)} controlnets available. Make sure to provide {len(self.controlnet.nets)}."
|
769
|
+
)
|
770
|
+
|
747
771
|
if not isinstance(control_guidance_end, (tuple, list)):
|
748
772
|
control_guidance_end = [control_guidance_end]
|
749
773
|
|
@@ -762,6 +786,15 @@ class StableDiffusionXLControlNetUnionImg2ImgPipeline(
|
|
762
786
|
if end > 1.0:
|
763
787
|
raise ValueError(f"control guidance end: {end} can't be larger than 1.0.")
|
764
788
|
|
789
|
+
# Check `control_mode`
|
790
|
+
if isinstance(controlnet, ControlNetUnionModel):
|
791
|
+
if max(control_mode) >= controlnet.config.num_control_type:
|
792
|
+
raise ValueError(f"control_mode: must be lower than {controlnet.config.num_control_type}.")
|
793
|
+
elif isinstance(controlnet, MultiControlNetUnionModel):
|
794
|
+
for _control_mode, _controlnet in zip(control_mode, self.controlnet.nets):
|
795
|
+
if max(_control_mode) >= _controlnet.config.num_control_type:
|
796
|
+
raise ValueError(f"control_mode: must be lower than {_controlnet.config.num_control_type}.")
|
797
|
+
|
765
798
|
if ip_adapter_image is not None and ip_adapter_image_embeds is not None:
|
766
799
|
raise ValueError(
|
767
800
|
"Provide either `ip_adapter_image` or `ip_adapter_image_embeds`. Cannot leave both `ip_adapter_image` and `ip_adapter_image_embeds` defined."
|
@@ -876,7 +909,7 @@ class StableDiffusionXLControlNetUnionImg2ImgPipeline(
|
|
876
909
|
# Offload text encoder if `enable_model_cpu_offload` was enabled
|
877
910
|
if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None:
|
878
911
|
self.text_encoder_2.to("cpu")
|
879
|
-
|
912
|
+
empty_device_cache()
|
880
913
|
|
881
914
|
image = image.to(device=device, dtype=dtype)
|
882
915
|
|
@@ -1024,7 +1057,7 @@ class StableDiffusionXLControlNetUnionImg2ImgPipeline(
|
|
1024
1057
|
return self._clip_skip
|
1025
1058
|
|
1026
1059
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
1027
|
-
# of the Imagen paper: https://
|
1060
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
1028
1061
|
# corresponds to doing no classifier free guidance.
|
1029
1062
|
@property
|
1030
1063
|
def do_classifier_free_guidance(self):
|
@@ -1049,7 +1082,7 @@ class StableDiffusionXLControlNetUnionImg2ImgPipeline(
|
|
1049
1082
|
prompt: Union[str, List[str]] = None,
|
1050
1083
|
prompt_2: Optional[Union[str, List[str]]] = None,
|
1051
1084
|
image: PipelineImageInput = None,
|
1052
|
-
control_image: PipelineImageInput = None,
|
1085
|
+
control_image: Union[PipelineImageInput, List[PipelineImageInput]] = None,
|
1053
1086
|
height: Optional[int] = None,
|
1054
1087
|
width: Optional[int] = None,
|
1055
1088
|
strength: float = 0.8,
|
@@ -1074,7 +1107,7 @@ class StableDiffusionXLControlNetUnionImg2ImgPipeline(
|
|
1074
1107
|
guess_mode: bool = False,
|
1075
1108
|
control_guidance_start: Union[float, List[float]] = 0.0,
|
1076
1109
|
control_guidance_end: Union[float, List[float]] = 1.0,
|
1077
|
-
control_mode: Optional[Union[int, List[int]]] = None,
|
1110
|
+
control_mode: Optional[Union[int, List[int], List[List[int]]]] = None,
|
1078
1111
|
original_size: Tuple[int, int] = None,
|
1079
1112
|
crops_coords_top_left: Tuple[int, int] = (0, 0),
|
1080
1113
|
target_size: Tuple[int, int] = None,
|
@@ -1104,13 +1137,13 @@ class StableDiffusionXLControlNetUnionImg2ImgPipeline(
|
|
1104
1137
|
`List[List[torch.Tensor]]`, `List[List[np.ndarray]]` or `List[List[PIL.Image.Image]]`):
|
1105
1138
|
The initial image will be used as the starting point for the image generation process. Can also accept
|
1106
1139
|
image latents as `image`, if passing latents directly, it will not be encoded again.
|
1107
|
-
control_image (`PipelineImageInput`):
|
1108
|
-
The ControlNet input condition
|
1109
|
-
|
1110
|
-
|
1111
|
-
|
1112
|
-
|
1113
|
-
|
1140
|
+
control_image (`PipelineImageInput` or `List[PipelineImageInput]`, *optional*):
|
1141
|
+
The ControlNet input condition to provide guidance to the `unet` for generation. If the type is
|
1142
|
+
specified as `torch.Tensor`, it is passed to ControlNet as is. `PIL.Image.Image` can also be accepted
|
1143
|
+
as an image. The dimensions of the output image defaults to `image`'s dimensions. If height and/or
|
1144
|
+
width are passed, `image` is resized accordingly. If multiple ControlNets are specified in `init`,
|
1145
|
+
images must be passed as a list such that each element of the list can be correctly batched for input
|
1146
|
+
to a single ControlNet.
|
1114
1147
|
height (`int`, *optional*, defaults to the size of control_image):
|
1115
1148
|
The height in pixels of the generated image. Anything below 512 pixels won't work well for
|
1116
1149
|
[stabilityai/stable-diffusion-xl-base-1.0](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0)
|
@@ -1129,11 +1162,11 @@ class StableDiffusionXLControlNetUnionImg2ImgPipeline(
|
|
1129
1162
|
The number of denoising steps. More denoising steps usually lead to a higher quality image at the
|
1130
1163
|
expense of slower inference.
|
1131
1164
|
guidance_scale (`float`, *optional*, defaults to 7.5):
|
1132
|
-
Guidance scale as defined in [Classifier-Free Diffusion
|
1133
|
-
`guidance_scale` is defined as `w` of equation 2.
|
1134
|
-
Paper](https://
|
1135
|
-
1`. Higher guidance scale encourages to generate images that are closely linked to
|
1136
|
-
usually at the expense of lower image quality.
|
1165
|
+
Guidance scale as defined in [Classifier-Free Diffusion
|
1166
|
+
Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
|
1167
|
+
of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
|
1168
|
+
`guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
|
1169
|
+
the text `prompt`, usually at the expense of lower image quality.
|
1137
1170
|
negative_prompt (`str` or `List[str]`, *optional*):
|
1138
1171
|
The prompt or prompts not to guide the image generation. If not defined, one has to pass
|
1139
1172
|
`negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
|
@@ -1144,8 +1177,8 @@ class StableDiffusionXLControlNetUnionImg2ImgPipeline(
|
|
1144
1177
|
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
1145
1178
|
The number of images to generate per prompt.
|
1146
1179
|
eta (`float`, *optional*, defaults to 0.0):
|
1147
|
-
Corresponds to parameter eta (η) in the DDIM paper: https://
|
1148
|
-
[`schedulers.DDIMScheduler`], will be ignored for others.
|
1180
|
+
Corresponds to parameter eta (η) in the DDIM paper: https://huggingface.co/papers/2010.02502. Only
|
1181
|
+
applies to [`schedulers.DDIMScheduler`], will be ignored for others.
|
1149
1182
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
1150
1183
|
One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
|
1151
1184
|
to make generation deterministic.
|
@@ -1184,16 +1217,21 @@ class StableDiffusionXLControlNetUnionImg2ImgPipeline(
|
|
1184
1217
|
`self.processor` in
|
1185
1218
|
[diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
1186
1219
|
controlnet_conditioning_scale (`float` or `List[float]`, *optional*, defaults to 1.0):
|
1187
|
-
The outputs of the
|
1188
|
-
to the residual in the original unet
|
1189
|
-
corresponding scale as a list.
|
1220
|
+
The outputs of the ControlNet are multiplied by `controlnet_conditioning_scale` before they are added
|
1221
|
+
to the residual in the original `unet`. If multiple ControlNets are specified in `init`, you can set
|
1222
|
+
the corresponding scale as a list.
|
1190
1223
|
guess_mode (`bool`, *optional*, defaults to `False`):
|
1191
1224
|
In this mode, the ControlNet encoder will try best to recognize the content of the input image even if
|
1192
1225
|
you remove all prompts. The `guidance_scale` between 3.0 and 5.0 is recommended.
|
1193
1226
|
control_guidance_start (`float` or `List[float]`, *optional*, defaults to 0.0):
|
1194
|
-
The percentage of total steps at which the
|
1227
|
+
The percentage of total steps at which the ControlNet starts applying.
|
1195
1228
|
control_guidance_end (`float` or `List[float]`, *optional*, defaults to 1.0):
|
1196
|
-
The percentage of total steps at which the
|
1229
|
+
The percentage of total steps at which the ControlNet stops applying.
|
1230
|
+
control_mode (`int` or `List[int]` or `List[List[int]], *optional*):
|
1231
|
+
The control condition types for the ControlNet. See the ControlNet's model card forinformation on the
|
1232
|
+
available control modes. If multiple ControlNets are specified in `init`, control_mode should be a list
|
1233
|
+
where each ControlNet should have its corresponding control mode list. Should reflect the order of
|
1234
|
+
conditions in control_image
|
1197
1235
|
original_size (`Tuple[int]`, *optional*, defaults to (1024, 1024)):
|
1198
1236
|
If `original_size` is not the same as `target_size` the image will appear to be down- or upsampled.
|
1199
1237
|
`original_size` defaults to `(height, width)` if not specified. Part of SDXL's micro-conditioning as
|
@@ -1273,12 +1311,6 @@ class StableDiffusionXLControlNetUnionImg2ImgPipeline(
|
|
1273
1311
|
|
1274
1312
|
controlnet = self.controlnet._orig_mod if is_compiled_module(self.controlnet) else self.controlnet
|
1275
1313
|
|
1276
|
-
# align format for control guidance
|
1277
|
-
if not isinstance(control_guidance_start, list) and isinstance(control_guidance_end, list):
|
1278
|
-
control_guidance_start = len(control_guidance_end) * [control_guidance_start]
|
1279
|
-
elif not isinstance(control_guidance_end, list) and isinstance(control_guidance_start, list):
|
1280
|
-
control_guidance_end = len(control_guidance_start) * [control_guidance_end]
|
1281
|
-
|
1282
1314
|
if not isinstance(control_image, list):
|
1283
1315
|
control_image = [control_image]
|
1284
1316
|
else:
|
@@ -1287,37 +1319,56 @@ class StableDiffusionXLControlNetUnionImg2ImgPipeline(
|
|
1287
1319
|
if not isinstance(control_mode, list):
|
1288
1320
|
control_mode = [control_mode]
|
1289
1321
|
|
1290
|
-
if
|
1291
|
-
|
1322
|
+
if isinstance(controlnet, MultiControlNetUnionModel):
|
1323
|
+
control_image = [[item] for item in control_image]
|
1324
|
+
control_mode = [[item] for item in control_mode]
|
1292
1325
|
|
1293
|
-
|
1326
|
+
# align format for control guidance
|
1327
|
+
if not isinstance(control_guidance_start, list) and isinstance(control_guidance_end, list):
|
1328
|
+
control_guidance_start = len(control_guidance_end) * [control_guidance_start]
|
1329
|
+
elif not isinstance(control_guidance_end, list) and isinstance(control_guidance_start, list):
|
1330
|
+
control_guidance_end = len(control_guidance_start) * [control_guidance_end]
|
1331
|
+
elif not isinstance(control_guidance_start, list) and not isinstance(control_guidance_end, list):
|
1332
|
+
mult = len(controlnet.nets) if isinstance(controlnet, MultiControlNetUnionModel) else len(control_mode)
|
1333
|
+
control_guidance_start, control_guidance_end = (
|
1334
|
+
mult * [control_guidance_start],
|
1335
|
+
mult * [control_guidance_end],
|
1336
|
+
)
|
1337
|
+
|
1338
|
+
if isinstance(controlnet_conditioning_scale, float):
|
1339
|
+
mult = len(controlnet.nets) if isinstance(controlnet, MultiControlNetUnionModel) else len(control_mode)
|
1340
|
+
controlnet_conditioning_scale = [controlnet_conditioning_scale] * mult
|
1294
1341
|
|
1295
1342
|
# 1. Check inputs
|
1296
|
-
|
1297
|
-
|
1298
|
-
|
1299
|
-
|
1300
|
-
|
1301
|
-
|
1302
|
-
|
1303
|
-
|
1304
|
-
|
1305
|
-
|
1306
|
-
|
1307
|
-
|
1308
|
-
|
1309
|
-
|
1310
|
-
|
1311
|
-
|
1312
|
-
|
1313
|
-
|
1314
|
-
|
1315
|
-
|
1316
|
-
|
1317
|
-
callback_on_step_end_tensor_inputs,
|
1318
|
-
)
|
1343
|
+
self.check_inputs(
|
1344
|
+
prompt,
|
1345
|
+
prompt_2,
|
1346
|
+
control_image,
|
1347
|
+
strength,
|
1348
|
+
num_inference_steps,
|
1349
|
+
callback_steps,
|
1350
|
+
negative_prompt,
|
1351
|
+
negative_prompt_2,
|
1352
|
+
prompt_embeds,
|
1353
|
+
negative_prompt_embeds,
|
1354
|
+
pooled_prompt_embeds,
|
1355
|
+
negative_pooled_prompt_embeds,
|
1356
|
+
ip_adapter_image,
|
1357
|
+
ip_adapter_image_embeds,
|
1358
|
+
controlnet_conditioning_scale,
|
1359
|
+
control_guidance_start,
|
1360
|
+
control_guidance_end,
|
1361
|
+
control_mode,
|
1362
|
+
callback_on_step_end_tensor_inputs,
|
1363
|
+
)
|
1319
1364
|
|
1320
|
-
|
1365
|
+
if isinstance(controlnet, ControlNetUnionModel):
|
1366
|
+
control_type = torch.zeros(controlnet.config.num_control_type).scatter_(0, torch.tensor(control_mode), 1)
|
1367
|
+
elif isinstance(controlnet, MultiControlNetUnionModel):
|
1368
|
+
control_type = [
|
1369
|
+
torch.zeros(controlnet_.config.num_control_type).scatter_(0, torch.tensor(control_mode_), 1)
|
1370
|
+
for control_mode_, controlnet_ in zip(control_mode, self.controlnet.nets)
|
1371
|
+
]
|
1321
1372
|
|
1322
1373
|
self._guidance_scale = guidance_scale
|
1323
1374
|
self._clip_skip = clip_skip
|
@@ -1334,7 +1385,11 @@ class StableDiffusionXLControlNetUnionImg2ImgPipeline(
|
|
1334
1385
|
|
1335
1386
|
device = self._execution_device
|
1336
1387
|
|
1337
|
-
global_pool_conditions =
|
1388
|
+
global_pool_conditions = (
|
1389
|
+
controlnet.config.global_pool_conditions
|
1390
|
+
if isinstance(controlnet, ControlNetUnionModel)
|
1391
|
+
else controlnet.nets[0].config.global_pool_conditions
|
1392
|
+
)
|
1338
1393
|
guess_mode = guess_mode or global_pool_conditions
|
1339
1394
|
|
1340
1395
|
# 3.1. Encode input prompt
|
@@ -1372,22 +1427,55 @@ class StableDiffusionXLControlNetUnionImg2ImgPipeline(
|
|
1372
1427
|
self.do_classifier_free_guidance,
|
1373
1428
|
)
|
1374
1429
|
|
1375
|
-
# 4. Prepare image
|
1430
|
+
# 4.1 Prepare image
|
1376
1431
|
image = self.image_processor.preprocess(image, height=height, width=width).to(dtype=torch.float32)
|
1377
1432
|
|
1378
|
-
|
1379
|
-
|
1380
|
-
|
1381
|
-
|
1382
|
-
|
1383
|
-
|
1384
|
-
|
1385
|
-
|
1386
|
-
|
1387
|
-
|
1388
|
-
|
1389
|
-
|
1390
|
-
|
1433
|
+
# 4.2 Prepare control images
|
1434
|
+
if isinstance(controlnet, ControlNetUnionModel):
|
1435
|
+
control_images = []
|
1436
|
+
|
1437
|
+
for image_ in control_image:
|
1438
|
+
image_ = self.prepare_control_image(
|
1439
|
+
image=image_,
|
1440
|
+
width=width,
|
1441
|
+
height=height,
|
1442
|
+
batch_size=batch_size * num_images_per_prompt,
|
1443
|
+
num_images_per_prompt=num_images_per_prompt,
|
1444
|
+
device=device,
|
1445
|
+
dtype=controlnet.dtype,
|
1446
|
+
do_classifier_free_guidance=self.do_classifier_free_guidance,
|
1447
|
+
guess_mode=guess_mode,
|
1448
|
+
)
|
1449
|
+
|
1450
|
+
control_images.append(image_)
|
1451
|
+
|
1452
|
+
control_image = control_images
|
1453
|
+
height, width = control_image[0].shape[-2:]
|
1454
|
+
|
1455
|
+
elif isinstance(controlnet, MultiControlNetUnionModel):
|
1456
|
+
control_images = []
|
1457
|
+
|
1458
|
+
for control_image_ in control_image:
|
1459
|
+
images = []
|
1460
|
+
|
1461
|
+
for image_ in control_image_:
|
1462
|
+
image_ = self.prepare_control_image(
|
1463
|
+
image=image_,
|
1464
|
+
width=width,
|
1465
|
+
height=height,
|
1466
|
+
batch_size=batch_size * num_images_per_prompt,
|
1467
|
+
num_images_per_prompt=num_images_per_prompt,
|
1468
|
+
device=device,
|
1469
|
+
dtype=controlnet.dtype,
|
1470
|
+
do_classifier_free_guidance=self.do_classifier_free_guidance,
|
1471
|
+
guess_mode=guess_mode,
|
1472
|
+
)
|
1473
|
+
|
1474
|
+
images.append(image_)
|
1475
|
+
control_images.append(images)
|
1476
|
+
|
1477
|
+
control_image = control_images
|
1478
|
+
height, width = control_image[0][0].shape[-2:]
|
1391
1479
|
|
1392
1480
|
# 5. Prepare timesteps
|
1393
1481
|
self.scheduler.set_timesteps(num_inference_steps, device=device)
|
@@ -1414,10 +1502,11 @@ class StableDiffusionXLControlNetUnionImg2ImgPipeline(
|
|
1414
1502
|
# 7.1 Create tensor stating which controlnets to keep
|
1415
1503
|
controlnet_keep = []
|
1416
1504
|
for i in range(len(timesteps)):
|
1417
|
-
|
1418
|
-
1.0
|
1419
|
-
|
1420
|
-
|
1505
|
+
keeps = [
|
1506
|
+
1.0 - float(i / len(timesteps) < s or (i + 1) / len(timesteps) > e)
|
1507
|
+
for s, e in zip(control_guidance_start, control_guidance_end)
|
1508
|
+
]
|
1509
|
+
controlnet_keep.append(keeps)
|
1421
1510
|
|
1422
1511
|
# 7.2 Prepare added time ids & embeddings
|
1423
1512
|
original_size = original_size or (height, width)
|
@@ -1460,12 +1549,25 @@ class StableDiffusionXLControlNetUnionImg2ImgPipeline(
|
|
1460
1549
|
prompt_embeds = prompt_embeds.to(device)
|
1461
1550
|
add_text_embeds = add_text_embeds.to(device)
|
1462
1551
|
add_time_ids = add_time_ids.to(device)
|
1463
|
-
|
1464
|
-
|
1465
|
-
|
1466
|
-
.repeat(batch_size * num_images_per_prompt * 2, 1)
|
1552
|
+
|
1553
|
+
control_type_repeat_factor = (
|
1554
|
+
batch_size * num_images_per_prompt * (2 if self.do_classifier_free_guidance else 1)
|
1467
1555
|
)
|
1468
1556
|
|
1557
|
+
if isinstance(controlnet, ControlNetUnionModel):
|
1558
|
+
control_type = (
|
1559
|
+
control_type.reshape(1, -1)
|
1560
|
+
.to(self._execution_device, dtype=prompt_embeds.dtype)
|
1561
|
+
.repeat(control_type_repeat_factor, 1)
|
1562
|
+
)
|
1563
|
+
elif isinstance(controlnet, MultiControlNetUnionModel):
|
1564
|
+
control_type = [
|
1565
|
+
_control_type.reshape(1, -1)
|
1566
|
+
.to(self._execution_device, dtype=prompt_embeds.dtype)
|
1567
|
+
.repeat(control_type_repeat_factor, 1)
|
1568
|
+
for _control_type in control_type
|
1569
|
+
]
|
1570
|
+
|
1469
1571
|
# 8. Denoising loop
|
1470
1572
|
num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order
|
1471
1573
|
with self.progress_bar(total=num_inference_steps) as progress_bar:
|
@@ -1574,7 +1676,7 @@ class StableDiffusionXLControlNetUnionImg2ImgPipeline(
|
|
1574
1676
|
if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None:
|
1575
1677
|
self.unet.to("cpu")
|
1576
1678
|
self.controlnet.to("cpu")
|
1577
|
-
|
1679
|
+
empty_device_cache()
|
1578
1680
|
|
1579
1681
|
if not output_type == "latent":
|
1580
1682
|
# make sure the VAE is in float32 mode, as it overflows in float16
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 HunyuanDiT Authors and The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -144,7 +144,7 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
|
|
144
144
|
r"""
|
145
145
|
Rescales `noise_cfg` tensor based on `guidance_rescale` to improve image quality and fix overexposure. Based on
|
146
146
|
Section 3.4 from [Common Diffusion Noise Schedules and Sample Steps are
|
147
|
-
Flawed](https://
|
147
|
+
Flawed](https://huggingface.co/papers/2305.08891).
|
148
148
|
|
149
149
|
Args:
|
150
150
|
noise_cfg (`torch.Tensor`):
|
@@ -463,7 +463,7 @@ class HunyuanDiTControlNetPipeline(DiffusionPipeline):
|
|
463
463
|
def prepare_extra_step_kwargs(self, generator, eta):
|
464
464
|
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
465
465
|
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
466
|
-
# eta corresponds to η in DDIM paper: https://
|
466
|
+
# eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
|
467
467
|
# and should be between [0, 1]
|
468
468
|
|
469
469
|
accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
|
@@ -621,7 +621,7 @@ class HunyuanDiTControlNetPipeline(DiffusionPipeline):
|
|
621
621
|
return self._guidance_rescale
|
622
622
|
|
623
623
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
624
|
-
# of the Imagen paper: https://
|
624
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
625
625
|
# corresponds to doing no classifier free guidance.
|
626
626
|
@property
|
627
627
|
def do_classifier_free_guidance(self):
|
@@ -709,8 +709,8 @@ class HunyuanDiTControlNetPipeline(DiffusionPipeline):
|
|
709
709
|
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
710
710
|
The number of images to generate per prompt.
|
711
711
|
eta (`float`, *optional*, defaults to 0.0):
|
712
|
-
Corresponds to parameter eta (η) from the [DDIM](https://
|
713
|
-
to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
|
712
|
+
Corresponds to parameter eta (η) from the [DDIM](https://huggingface.co/papers/2010.02502) paper. Only
|
713
|
+
applies to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
|
714
714
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
715
715
|
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
|
716
716
|
generation deterministic.
|
@@ -746,7 +746,7 @@ class HunyuanDiTControlNetPipeline(DiffusionPipeline):
|
|
746
746
|
inputs will be passed.
|
747
747
|
guidance_rescale (`float`, *optional*, defaults to 0.0):
|
748
748
|
Rescale the noise_cfg according to `guidance_rescale`. Based on findings of [Common Diffusion Noise
|
749
|
-
Schedules and Sample Steps are Flawed](https://
|
749
|
+
Schedules and Sample Steps are Flawed](https://huggingface.co/papers/2305.08891). See Section 3.4
|
750
750
|
original_size (`Tuple[int, int]`, *optional*, defaults to `(1024, 1024)`):
|
751
751
|
The original size of the image. Used to calculate the time ids.
|
752
752
|
target_size (`Tuple[int, int]`, *optional*):
|
@@ -1009,7 +1009,7 @@ class HunyuanDiTControlNetPipeline(DiffusionPipeline):
|
|
1009
1009
|
noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
|
1010
1010
|
|
1011
1011
|
if self.do_classifier_free_guidance and guidance_rescale > 0.0:
|
1012
|
-
# Based on 3.4. in https://
|
1012
|
+
# Based on 3.4. in https://huggingface.co/papers/2305.08891
|
1013
1013
|
noise_pred = rescale_noise_cfg(noise_pred, noise_pred_text, guidance_rescale=guidance_rescale)
|
1014
1014
|
|
1015
1015
|
# compute the previous noisy sample x_t -> x_t-1
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 Stability AI, The HuggingFace Team and The InstantX Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -719,7 +719,7 @@ class StableDiffusion3ControlNetPipeline(
|
|
719
719
|
return self._clip_skip
|
720
720
|
|
721
721
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
722
|
-
# of the Imagen paper: https://
|
722
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
723
723
|
# corresponds to doing no classifier free guidance.
|
724
724
|
@property
|
725
725
|
def do_classifier_free_guidance(self):
|
@@ -877,11 +877,11 @@ class StableDiffusion3ControlNetPipeline(
|
|
877
877
|
their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
|
878
878
|
will be used.
|
879
879
|
guidance_scale (`float`, *optional*, defaults to 5.0):
|
880
|
-
Guidance scale as defined in [Classifier-Free Diffusion
|
881
|
-
`guidance_scale` is defined as `w` of equation 2.
|
882
|
-
Paper](https://
|
883
|
-
1`. Higher guidance scale encourages to generate images that are closely linked to
|
884
|
-
usually at the expense of lower image quality.
|
880
|
+
Guidance scale as defined in [Classifier-Free Diffusion
|
881
|
+
Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
|
882
|
+
of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
|
883
|
+
`guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
|
884
|
+
the text `prompt`, usually at the expense of lower image quality.
|
885
885
|
control_guidance_start (`float` or `List[float]`, *optional*, defaults to 0.0):
|
886
886
|
The percentage of total steps at which the ControlNet starts applying.
|
887
887
|
control_guidance_end (`float` or `List[float]`, *optional*, defaults to 1.0):
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 Stability AI, The HuggingFace Team and The AlimamaCreative Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -769,7 +769,7 @@ class StableDiffusion3ControlNetInpaintingPipeline(
|
|
769
769
|
return self._clip_skip
|
770
770
|
|
771
771
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
772
|
-
# of the Imagen paper: https://
|
772
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
773
773
|
# corresponds to doing no classifier free guidance.
|
774
774
|
@property
|
775
775
|
def do_classifier_free_guidance(self):
|
@@ -928,11 +928,11 @@ class StableDiffusion3ControlNetInpaintingPipeline(
|
|
928
928
|
their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
|
929
929
|
will be used.
|
930
930
|
guidance_scale (`float`, *optional*, defaults to 5.0):
|
931
|
-
Guidance scale as defined in [Classifier-Free Diffusion
|
932
|
-
`guidance_scale` is defined as `w` of equation 2.
|
933
|
-
Paper](https://
|
934
|
-
1`. Higher guidance scale encourages to generate images that are closely linked to
|
935
|
-
usually at the expense of lower image quality.
|
931
|
+
Guidance scale as defined in [Classifier-Free Diffusion
|
932
|
+
Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
|
933
|
+
of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
|
934
|
+
`guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
|
935
|
+
the text `prompt`, usually at the expense of lower image quality.
|
936
936
|
control_guidance_start (`float` or `List[float]`, *optional*, defaults to 0.0):
|
937
937
|
The percentage of total steps at which the ControlNet starts applying.
|
938
938
|
control_guidance_end (`float` or `List[float]`, *optional*, defaults to 1.0):
|