diffusers 0.32.1__py3-none-any.whl → 0.33.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diffusers/__init__.py +186 -3
- diffusers/configuration_utils.py +40 -12
- diffusers/dependency_versions_table.py +9 -2
- diffusers/hooks/__init__.py +9 -0
- diffusers/hooks/faster_cache.py +653 -0
- diffusers/hooks/group_offloading.py +793 -0
- diffusers/hooks/hooks.py +236 -0
- diffusers/hooks/layerwise_casting.py +245 -0
- diffusers/hooks/pyramid_attention_broadcast.py +311 -0
- diffusers/loaders/__init__.py +6 -0
- diffusers/loaders/ip_adapter.py +38 -30
- diffusers/loaders/lora_base.py +198 -28
- diffusers/loaders/lora_conversion_utils.py +679 -44
- diffusers/loaders/lora_pipeline.py +1963 -801
- diffusers/loaders/peft.py +169 -84
- diffusers/loaders/single_file.py +17 -2
- diffusers/loaders/single_file_model.py +53 -5
- diffusers/loaders/single_file_utils.py +653 -75
- diffusers/loaders/textual_inversion.py +9 -9
- diffusers/loaders/transformer_flux.py +8 -9
- diffusers/loaders/transformer_sd3.py +120 -39
- diffusers/loaders/unet.py +22 -32
- diffusers/models/__init__.py +22 -0
- diffusers/models/activations.py +9 -9
- diffusers/models/attention.py +0 -1
- diffusers/models/attention_processor.py +163 -25
- diffusers/models/auto_model.py +169 -0
- diffusers/models/autoencoders/__init__.py +2 -0
- diffusers/models/autoencoders/autoencoder_asym_kl.py +2 -0
- diffusers/models/autoencoders/autoencoder_dc.py +106 -4
- diffusers/models/autoencoders/autoencoder_kl.py +0 -4
- diffusers/models/autoencoders/autoencoder_kl_allegro.py +5 -23
- diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +17 -55
- diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py +17 -97
- diffusers/models/autoencoders/autoencoder_kl_ltx.py +326 -107
- diffusers/models/autoencoders/autoencoder_kl_magvit.py +1094 -0
- diffusers/models/autoencoders/autoencoder_kl_mochi.py +21 -56
- diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +11 -42
- diffusers/models/autoencoders/autoencoder_kl_wan.py +855 -0
- diffusers/models/autoencoders/autoencoder_oobleck.py +1 -0
- diffusers/models/autoencoders/autoencoder_tiny.py +0 -4
- diffusers/models/autoencoders/consistency_decoder_vae.py +3 -1
- diffusers/models/autoencoders/vae.py +31 -141
- diffusers/models/autoencoders/vq_model.py +3 -0
- diffusers/models/cache_utils.py +108 -0
- diffusers/models/controlnets/__init__.py +1 -0
- diffusers/models/controlnets/controlnet.py +3 -8
- diffusers/models/controlnets/controlnet_flux.py +14 -42
- diffusers/models/controlnets/controlnet_sd3.py +58 -34
- diffusers/models/controlnets/controlnet_sparsectrl.py +4 -7
- diffusers/models/controlnets/controlnet_union.py +27 -18
- diffusers/models/controlnets/controlnet_xs.py +7 -46
- diffusers/models/controlnets/multicontrolnet_union.py +196 -0
- diffusers/models/embeddings.py +18 -7
- diffusers/models/model_loading_utils.py +122 -80
- diffusers/models/modeling_flax_pytorch_utils.py +1 -1
- diffusers/models/modeling_flax_utils.py +1 -1
- diffusers/models/modeling_pytorch_flax_utils.py +1 -1
- diffusers/models/modeling_utils.py +617 -272
- diffusers/models/normalization.py +67 -14
- diffusers/models/resnet.py +1 -1
- diffusers/models/transformers/__init__.py +6 -0
- diffusers/models/transformers/auraflow_transformer_2d.py +9 -35
- diffusers/models/transformers/cogvideox_transformer_3d.py +13 -24
- diffusers/models/transformers/consisid_transformer_3d.py +789 -0
- diffusers/models/transformers/dit_transformer_2d.py +5 -19
- diffusers/models/transformers/hunyuan_transformer_2d.py +4 -3
- diffusers/models/transformers/latte_transformer_3d.py +20 -15
- diffusers/models/transformers/lumina_nextdit2d.py +3 -1
- diffusers/models/transformers/pixart_transformer_2d.py +4 -19
- diffusers/models/transformers/prior_transformer.py +5 -1
- diffusers/models/transformers/sana_transformer.py +144 -40
- diffusers/models/transformers/stable_audio_transformer.py +5 -20
- diffusers/models/transformers/transformer_2d.py +7 -22
- diffusers/models/transformers/transformer_allegro.py +9 -17
- diffusers/models/transformers/transformer_cogview3plus.py +6 -17
- diffusers/models/transformers/transformer_cogview4.py +462 -0
- diffusers/models/transformers/transformer_easyanimate.py +527 -0
- diffusers/models/transformers/transformer_flux.py +68 -110
- diffusers/models/transformers/transformer_hunyuan_video.py +409 -49
- diffusers/models/transformers/transformer_ltx.py +53 -35
- diffusers/models/transformers/transformer_lumina2.py +548 -0
- diffusers/models/transformers/transformer_mochi.py +6 -17
- diffusers/models/transformers/transformer_omnigen.py +469 -0
- diffusers/models/transformers/transformer_sd3.py +56 -86
- diffusers/models/transformers/transformer_temporal.py +5 -11
- diffusers/models/transformers/transformer_wan.py +469 -0
- diffusers/models/unets/unet_1d.py +3 -1
- diffusers/models/unets/unet_2d.py +21 -20
- diffusers/models/unets/unet_2d_blocks.py +19 -243
- diffusers/models/unets/unet_2d_condition.py +4 -6
- diffusers/models/unets/unet_3d_blocks.py +14 -127
- diffusers/models/unets/unet_3d_condition.py +8 -12
- diffusers/models/unets/unet_i2vgen_xl.py +5 -13
- diffusers/models/unets/unet_kandinsky3.py +0 -4
- diffusers/models/unets/unet_motion_model.py +20 -114
- diffusers/models/unets/unet_spatio_temporal_condition.py +7 -8
- diffusers/models/unets/unet_stable_cascade.py +8 -35
- diffusers/models/unets/uvit_2d.py +1 -4
- diffusers/optimization.py +2 -2
- diffusers/pipelines/__init__.py +57 -8
- diffusers/pipelines/allegro/pipeline_allegro.py +22 -2
- diffusers/pipelines/amused/pipeline_amused.py +15 -2
- diffusers/pipelines/amused/pipeline_amused_img2img.py +15 -2
- diffusers/pipelines/amused/pipeline_amused_inpaint.py +15 -2
- diffusers/pipelines/animatediff/pipeline_animatediff.py +15 -2
- diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +15 -3
- diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +24 -4
- diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +15 -2
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +16 -4
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +16 -4
- diffusers/pipelines/audioldm/pipeline_audioldm.py +13 -2
- diffusers/pipelines/audioldm2/modeling_audioldm2.py +13 -68
- diffusers/pipelines/audioldm2/pipeline_audioldm2.py +39 -9
- diffusers/pipelines/aura_flow/pipeline_aura_flow.py +63 -7
- diffusers/pipelines/auto_pipeline.py +35 -14
- diffusers/pipelines/blip_diffusion/blip_image_processing.py +1 -1
- diffusers/pipelines/blip_diffusion/modeling_blip2.py +5 -8
- diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +12 -0
- diffusers/pipelines/cogvideo/pipeline_cogvideox.py +22 -6
- diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +22 -6
- diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +22 -5
- diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +22 -6
- diffusers/pipelines/cogview3/pipeline_cogview3plus.py +12 -4
- diffusers/pipelines/cogview4/__init__.py +49 -0
- diffusers/pipelines/cogview4/pipeline_cogview4.py +684 -0
- diffusers/pipelines/cogview4/pipeline_cogview4_control.py +732 -0
- diffusers/pipelines/cogview4/pipeline_output.py +21 -0
- diffusers/pipelines/consisid/__init__.py +49 -0
- diffusers/pipelines/consisid/consisid_utils.py +357 -0
- diffusers/pipelines/consisid/pipeline_consisid.py +974 -0
- diffusers/pipelines/consisid/pipeline_output.py +20 -0
- diffusers/pipelines/consistency_models/pipeline_consistency_models.py +11 -0
- diffusers/pipelines/controlnet/pipeline_controlnet.py +6 -5
- diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +13 -0
- diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +17 -5
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +31 -12
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +26 -7
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +20 -3
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +22 -3
- diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +26 -25
- diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +224 -109
- diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +25 -29
- diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +7 -4
- diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +3 -5
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +121 -10
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +122 -11
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +12 -1
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +20 -3
- diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py +14 -2
- diffusers/pipelines/ddim/pipeline_ddim.py +14 -1
- diffusers/pipelines/ddpm/pipeline_ddpm.py +15 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if.py +12 -0
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +12 -0
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +14 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +12 -0
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +14 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +14 -1
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +11 -7
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +11 -7
- diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +1 -1
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +10 -6
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_onnx_stable_diffusion_inpaint_legacy.py +2 -2
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +11 -7
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +1 -1
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +1 -1
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +1 -1
- diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +10 -105
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +1 -1
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +1 -1
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +1 -1
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +1 -1
- diffusers/pipelines/dit/pipeline_dit.py +15 -2
- diffusers/pipelines/easyanimate/__init__.py +52 -0
- diffusers/pipelines/easyanimate/pipeline_easyanimate.py +770 -0
- diffusers/pipelines/easyanimate/pipeline_easyanimate_control.py +994 -0
- diffusers/pipelines/easyanimate/pipeline_easyanimate_inpaint.py +1234 -0
- diffusers/pipelines/easyanimate/pipeline_output.py +20 -0
- diffusers/pipelines/flux/pipeline_flux.py +53 -21
- diffusers/pipelines/flux/pipeline_flux_control.py +9 -12
- diffusers/pipelines/flux/pipeline_flux_control_img2img.py +6 -10
- diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +8 -10
- diffusers/pipelines/flux/pipeline_flux_controlnet.py +185 -13
- diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +8 -10
- diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +16 -16
- diffusers/pipelines/flux/pipeline_flux_fill.py +107 -39
- diffusers/pipelines/flux/pipeline_flux_img2img.py +193 -15
- diffusers/pipelines/flux/pipeline_flux_inpaint.py +199 -19
- diffusers/pipelines/free_noise_utils.py +3 -3
- diffusers/pipelines/hunyuan_video/__init__.py +4 -0
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_skyreels_image2video.py +804 -0
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +90 -23
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_image2video.py +924 -0
- diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +3 -5
- diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +13 -1
- diffusers/pipelines/kandinsky/pipeline_kandinsky.py +12 -0
- diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +1 -1
- diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +12 -0
- diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +13 -1
- diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +12 -0
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +12 -1
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +13 -0
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +12 -0
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +12 -1
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +12 -1
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +12 -0
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +12 -0
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +12 -0
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +12 -0
- diffusers/pipelines/kolors/pipeline_kolors.py +10 -8
- diffusers/pipelines/kolors/pipeline_kolors_img2img.py +6 -4
- diffusers/pipelines/kolors/text_encoder.py +7 -34
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +12 -1
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +13 -1
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +14 -13
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion_superresolution.py +12 -1
- diffusers/pipelines/latte/pipeline_latte.py +36 -7
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +67 -13
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +60 -15
- diffusers/pipelines/ltx/__init__.py +2 -0
- diffusers/pipelines/ltx/pipeline_ltx.py +25 -13
- diffusers/pipelines/ltx/pipeline_ltx_condition.py +1194 -0
- diffusers/pipelines/ltx/pipeline_ltx_image2video.py +31 -17
- diffusers/pipelines/lumina/__init__.py +2 -2
- diffusers/pipelines/lumina/pipeline_lumina.py +83 -20
- diffusers/pipelines/lumina2/__init__.py +48 -0
- diffusers/pipelines/lumina2/pipeline_lumina2.py +790 -0
- diffusers/pipelines/marigold/__init__.py +2 -0
- diffusers/pipelines/marigold/marigold_image_processing.py +127 -14
- diffusers/pipelines/marigold/pipeline_marigold_depth.py +31 -16
- diffusers/pipelines/marigold/pipeline_marigold_intrinsics.py +721 -0
- diffusers/pipelines/marigold/pipeline_marigold_normals.py +31 -16
- diffusers/pipelines/mochi/pipeline_mochi.py +14 -18
- diffusers/pipelines/musicldm/pipeline_musicldm.py +16 -1
- diffusers/pipelines/omnigen/__init__.py +50 -0
- diffusers/pipelines/omnigen/pipeline_omnigen.py +512 -0
- diffusers/pipelines/omnigen/processor_omnigen.py +327 -0
- diffusers/pipelines/onnx_utils.py +5 -3
- diffusers/pipelines/pag/pag_utils.py +1 -1
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +12 -1
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +15 -4
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +20 -3
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +20 -3
- diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +1 -3
- diffusers/pipelines/pag/pipeline_pag_kolors.py +6 -4
- diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +16 -3
- diffusers/pipelines/pag/pipeline_pag_sana.py +65 -8
- diffusers/pipelines/pag/pipeline_pag_sd.py +23 -7
- diffusers/pipelines/pag/pipeline_pag_sd_3.py +3 -5
- diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py +3 -5
- diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +13 -1
- diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +23 -7
- diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py +26 -10
- diffusers/pipelines/pag/pipeline_pag_sd_xl.py +12 -4
- diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +7 -3
- diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +10 -6
- diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +13 -3
- diffusers/pipelines/pia/pipeline_pia.py +13 -1
- diffusers/pipelines/pipeline_flax_utils.py +7 -7
- diffusers/pipelines/pipeline_loading_utils.py +193 -83
- diffusers/pipelines/pipeline_utils.py +221 -106
- diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +17 -5
- diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +17 -4
- diffusers/pipelines/sana/__init__.py +2 -0
- diffusers/pipelines/sana/pipeline_sana.py +183 -58
- diffusers/pipelines/sana/pipeline_sana_sprint.py +889 -0
- diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +12 -2
- diffusers/pipelines/shap_e/pipeline_shap_e.py +12 -0
- diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +12 -0
- diffusers/pipelines/shap_e/renderer.py +6 -6
- diffusers/pipelines/stable_audio/pipeline_stable_audio.py +1 -1
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +15 -4
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +12 -8
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +12 -1
- diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +3 -2
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +14 -10
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_img2img.py +3 -3
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_inpaint.py +14 -10
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +2 -2
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py +4 -3
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +5 -4
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +2 -2
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +18 -13
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +30 -8
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +24 -10
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +28 -12
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +39 -18
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +17 -6
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +13 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +20 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +14 -2
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +13 -1
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +16 -17
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +136 -18
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +150 -21
- diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +15 -3
- diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +26 -11
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +15 -3
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +22 -4
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +30 -13
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +12 -4
- diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +15 -3
- diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +15 -3
- diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +26 -12
- diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +16 -4
- diffusers/pipelines/stable_diffusion_xl/pipeline_flax_stable_diffusion_xl.py +1 -1
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +12 -4
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +7 -3
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +10 -6
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +11 -4
- diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +13 -2
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +18 -4
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +26 -5
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +13 -1
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +13 -1
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +28 -6
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +26 -4
- diffusers/pipelines/transformers_loading_utils.py +121 -0
- diffusers/pipelines/unclip/pipeline_unclip.py +11 -1
- diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +11 -1
- diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +19 -2
- diffusers/pipelines/wan/__init__.py +51 -0
- diffusers/pipelines/wan/pipeline_output.py +20 -0
- diffusers/pipelines/wan/pipeline_wan.py +593 -0
- diffusers/pipelines/wan/pipeline_wan_i2v.py +722 -0
- diffusers/pipelines/wan/pipeline_wan_video2video.py +725 -0
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +7 -31
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +12 -1
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +12 -1
- diffusers/quantizers/auto.py +5 -1
- diffusers/quantizers/base.py +5 -9
- diffusers/quantizers/bitsandbytes/bnb_quantizer.py +41 -29
- diffusers/quantizers/bitsandbytes/utils.py +30 -20
- diffusers/quantizers/gguf/gguf_quantizer.py +1 -0
- diffusers/quantizers/gguf/utils.py +4 -2
- diffusers/quantizers/quantization_config.py +59 -4
- diffusers/quantizers/quanto/__init__.py +1 -0
- diffusers/quantizers/quanto/quanto_quantizer.py +177 -0
- diffusers/quantizers/quanto/utils.py +60 -0
- diffusers/quantizers/torchao/__init__.py +1 -1
- diffusers/quantizers/torchao/torchao_quantizer.py +47 -2
- diffusers/schedulers/__init__.py +2 -1
- diffusers/schedulers/scheduling_consistency_models.py +1 -2
- diffusers/schedulers/scheduling_ddim_inverse.py +1 -1
- diffusers/schedulers/scheduling_ddpm.py +2 -3
- diffusers/schedulers/scheduling_ddpm_parallel.py +1 -2
- diffusers/schedulers/scheduling_dpmsolver_multistep.py +12 -4
- diffusers/schedulers/scheduling_edm_euler.py +45 -10
- diffusers/schedulers/scheduling_flow_match_euler_discrete.py +116 -28
- diffusers/schedulers/scheduling_flow_match_heun_discrete.py +7 -6
- diffusers/schedulers/scheduling_heun_discrete.py +1 -1
- diffusers/schedulers/scheduling_lcm.py +1 -2
- diffusers/schedulers/scheduling_lms_discrete.py +1 -1
- diffusers/schedulers/scheduling_repaint.py +5 -1
- diffusers/schedulers/scheduling_scm.py +265 -0
- diffusers/schedulers/scheduling_tcd.py +1 -2
- diffusers/schedulers/scheduling_utils.py +2 -1
- diffusers/training_utils.py +14 -7
- diffusers/utils/__init__.py +10 -2
- diffusers/utils/constants.py +13 -1
- diffusers/utils/deprecation_utils.py +1 -1
- diffusers/utils/dummy_bitsandbytes_objects.py +17 -0
- diffusers/utils/dummy_gguf_objects.py +17 -0
- diffusers/utils/dummy_optimum_quanto_objects.py +17 -0
- diffusers/utils/dummy_pt_objects.py +233 -0
- diffusers/utils/dummy_torch_and_transformers_and_opencv_objects.py +17 -0
- diffusers/utils/dummy_torch_and_transformers_objects.py +270 -0
- diffusers/utils/dummy_torchao_objects.py +17 -0
- diffusers/utils/dynamic_modules_utils.py +1 -1
- diffusers/utils/export_utils.py +28 -3
- diffusers/utils/hub_utils.py +52 -102
- diffusers/utils/import_utils.py +121 -221
- diffusers/utils/loading_utils.py +14 -1
- diffusers/utils/logging.py +1 -2
- diffusers/utils/peft_utils.py +6 -14
- diffusers/utils/remote_utils.py +425 -0
- diffusers/utils/source_code_parsing_utils.py +52 -0
- diffusers/utils/state_dict_utils.py +15 -1
- diffusers/utils/testing_utils.py +243 -13
- diffusers/utils/torch_utils.py +10 -0
- diffusers/utils/typing_utils.py +91 -0
- diffusers/video_processor.py +1 -1
- {diffusers-0.32.1.dist-info → diffusers-0.33.0.dist-info}/METADATA +76 -44
- diffusers-0.33.0.dist-info/RECORD +608 -0
- {diffusers-0.32.1.dist-info → diffusers-0.33.0.dist-info}/WHEEL +1 -1
- diffusers-0.32.1.dist-info/RECORD +0 -550
- {diffusers-0.32.1.dist-info → diffusers-0.33.0.dist-info}/LICENSE +0 -0
- {diffusers-0.32.1.dist-info → diffusers-0.33.0.dist-info}/entry_points.txt +0 -0
- {diffusers-0.32.1.dist-info → diffusers-0.33.0.dist-info}/top_level.txt +0 -0
@@ -15,7 +15,6 @@ from typing import Any, Dict, List, Optional, Tuple, Union
|
|
15
15
|
|
16
16
|
import torch
|
17
17
|
import torch.nn as nn
|
18
|
-
import torch.nn.functional as F
|
19
18
|
|
20
19
|
from ...configuration_utils import ConfigMixin, register_to_config
|
21
20
|
from ...loaders import FromOriginalModelMixin, PeftAdapterMixin, SD3Transformer2DLoadersMixin
|
@@ -28,7 +27,7 @@ from ...models.attention_processor import (
|
|
28
27
|
)
|
29
28
|
from ...models.modeling_utils import ModelMixin
|
30
29
|
from ...models.normalization import AdaLayerNormContinuous, AdaLayerNormZero
|
31
|
-
from ...utils import USE_PEFT_BACKEND,
|
30
|
+
from ...utils import USE_PEFT_BACKEND, logging, scale_lora_layers, unscale_lora_layers
|
32
31
|
from ...utils.torch_utils import maybe_allow_in_graph
|
33
32
|
from ..embeddings import CombinedTimestepTextProjEmbeddings, PatchEmbed
|
34
33
|
from ..modeling_outputs import Transformer2DModelOutput
|
@@ -39,17 +38,6 @@ logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
|
39
38
|
|
40
39
|
@maybe_allow_in_graph
|
41
40
|
class SD3SingleTransformerBlock(nn.Module):
|
42
|
-
r"""
|
43
|
-
A Single Transformer block as part of the MMDiT architecture, used in Stable Diffusion 3 ControlNet.
|
44
|
-
|
45
|
-
Reference: https://arxiv.org/abs/2403.03206
|
46
|
-
|
47
|
-
Parameters:
|
48
|
-
dim (`int`): The number of channels in the input and output.
|
49
|
-
num_attention_heads (`int`): The number of heads to use for multi-head attention.
|
50
|
-
attention_head_dim (`int`): The number of channels in each head.
|
51
|
-
"""
|
52
|
-
|
53
41
|
def __init__(
|
54
42
|
self,
|
55
43
|
dim: int,
|
@@ -59,21 +47,13 @@ class SD3SingleTransformerBlock(nn.Module):
|
|
59
47
|
super().__init__()
|
60
48
|
|
61
49
|
self.norm1 = AdaLayerNormZero(dim)
|
62
|
-
|
63
|
-
if hasattr(F, "scaled_dot_product_attention"):
|
64
|
-
processor = JointAttnProcessor2_0()
|
65
|
-
else:
|
66
|
-
raise ValueError(
|
67
|
-
"The current PyTorch version does not support the `scaled_dot_product_attention` function."
|
68
|
-
)
|
69
|
-
|
70
50
|
self.attn = Attention(
|
71
51
|
query_dim=dim,
|
72
52
|
dim_head=attention_head_dim,
|
73
53
|
heads=num_attention_heads,
|
74
54
|
out_dim=dim,
|
75
55
|
bias=True,
|
76
|
-
processor=
|
56
|
+
processor=JointAttnProcessor2_0(),
|
77
57
|
eps=1e-6,
|
78
58
|
)
|
79
59
|
|
@@ -81,23 +61,17 @@ class SD3SingleTransformerBlock(nn.Module):
|
|
81
61
|
self.ff = FeedForward(dim=dim, dim_out=dim, activation_fn="gelu-approximate")
|
82
62
|
|
83
63
|
def forward(self, hidden_states: torch.Tensor, temb: torch.Tensor):
|
64
|
+
# 1. Attention
|
84
65
|
norm_hidden_states, gate_msa, shift_mlp, scale_mlp, gate_mlp = self.norm1(hidden_states, emb=temb)
|
85
|
-
|
86
|
-
attn_output = self.attn(
|
87
|
-
hidden_states=norm_hidden_states,
|
88
|
-
encoder_hidden_states=None,
|
89
|
-
)
|
90
|
-
|
91
|
-
# Process attention outputs for the `hidden_states`.
|
66
|
+
attn_output = self.attn(hidden_states=norm_hidden_states, encoder_hidden_states=None)
|
92
67
|
attn_output = gate_msa.unsqueeze(1) * attn_output
|
93
68
|
hidden_states = hidden_states + attn_output
|
94
69
|
|
70
|
+
# 2. Feed Forward
|
95
71
|
norm_hidden_states = self.norm2(hidden_states)
|
96
|
-
norm_hidden_states = norm_hidden_states * (1 + scale_mlp
|
97
|
-
|
72
|
+
norm_hidden_states = norm_hidden_states * (1 + scale_mlp.unsqueeze(1)) + shift_mlp.unsqueeze(1)
|
98
73
|
ff_output = self.ff(norm_hidden_states)
|
99
74
|
ff_output = gate_mlp.unsqueeze(1) * ff_output
|
100
|
-
|
101
75
|
hidden_states = hidden_states + ff_output
|
102
76
|
|
103
77
|
return hidden_states
|
@@ -107,26 +81,41 @@ class SD3Transformer2DModel(
|
|
107
81
|
ModelMixin, ConfigMixin, PeftAdapterMixin, FromOriginalModelMixin, SD3Transformer2DLoadersMixin
|
108
82
|
):
|
109
83
|
"""
|
110
|
-
The Transformer model introduced in Stable Diffusion 3.
|
111
|
-
|
112
|
-
Reference: https://arxiv.org/abs/2403.03206
|
84
|
+
The Transformer model introduced in [Stable Diffusion 3](https://huggingface.co/papers/2403.03206).
|
113
85
|
|
114
86
|
Parameters:
|
115
|
-
sample_size (`int`):
|
116
|
-
it is used to learn a number of
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
87
|
+
sample_size (`int`, defaults to `128`):
|
88
|
+
The width/height of the latents. This is fixed during training since it is used to learn a number of
|
89
|
+
position embeddings.
|
90
|
+
patch_size (`int`, defaults to `2`):
|
91
|
+
Patch size to turn the input data into small patches.
|
92
|
+
in_channels (`int`, defaults to `16`):
|
93
|
+
The number of latent channels in the input.
|
94
|
+
num_layers (`int`, defaults to `18`):
|
95
|
+
The number of layers of transformer blocks to use.
|
96
|
+
attention_head_dim (`int`, defaults to `64`):
|
97
|
+
The number of channels in each head.
|
98
|
+
num_attention_heads (`int`, defaults to `18`):
|
99
|
+
The number of heads to use for multi-head attention.
|
100
|
+
joint_attention_dim (`int`, defaults to `4096`):
|
101
|
+
The embedding dimension to use for joint text-image attention.
|
102
|
+
caption_projection_dim (`int`, defaults to `1152`):
|
103
|
+
The embedding dimension of caption embeddings.
|
104
|
+
pooled_projection_dim (`int`, defaults to `2048`):
|
105
|
+
The embedding dimension of pooled text projections.
|
106
|
+
out_channels (`int`, defaults to `16`):
|
107
|
+
The number of latent channels in the output.
|
108
|
+
pos_embed_max_size (`int`, defaults to `96`):
|
109
|
+
The maximum latent height/width of positional embeddings.
|
110
|
+
dual_attention_layers (`Tuple[int, ...]`, defaults to `()`):
|
111
|
+
The number of dual-stream transformer blocks to use.
|
112
|
+
qk_norm (`str`, *optional*, defaults to `None`):
|
113
|
+
The normalization to use for query and key in the attention layer. If `None`, no normalization is used.
|
127
114
|
"""
|
128
115
|
|
129
116
|
_supports_gradient_checkpointing = True
|
117
|
+
_no_split_modules = ["JointTransformerBlock"]
|
118
|
+
_skip_layerwise_casting_patterns = ["pos_embed", "norm"]
|
130
119
|
|
131
120
|
@register_to_config
|
132
121
|
def __init__(
|
@@ -148,36 +137,33 @@ class SD3Transformer2DModel(
|
|
148
137
|
qk_norm: Optional[str] = None,
|
149
138
|
):
|
150
139
|
super().__init__()
|
151
|
-
|
152
|
-
self.
|
153
|
-
self.inner_dim = self.config.num_attention_heads * self.config.attention_head_dim
|
140
|
+
self.out_channels = out_channels if out_channels is not None else in_channels
|
141
|
+
self.inner_dim = num_attention_heads * attention_head_dim
|
154
142
|
|
155
143
|
self.pos_embed = PatchEmbed(
|
156
|
-
height=
|
157
|
-
width=
|
158
|
-
patch_size=
|
159
|
-
in_channels=
|
144
|
+
height=sample_size,
|
145
|
+
width=sample_size,
|
146
|
+
patch_size=patch_size,
|
147
|
+
in_channels=in_channels,
|
160
148
|
embed_dim=self.inner_dim,
|
161
149
|
pos_embed_max_size=pos_embed_max_size, # hard-code for now.
|
162
150
|
)
|
163
151
|
self.time_text_embed = CombinedTimestepTextProjEmbeddings(
|
164
|
-
embedding_dim=self.inner_dim, pooled_projection_dim=
|
152
|
+
embedding_dim=self.inner_dim, pooled_projection_dim=pooled_projection_dim
|
165
153
|
)
|
166
|
-
self.context_embedder = nn.Linear(
|
154
|
+
self.context_embedder = nn.Linear(joint_attention_dim, caption_projection_dim)
|
167
155
|
|
168
|
-
# `attention_head_dim` is doubled to account for the mixing.
|
169
|
-
# It needs to crafted when we get the actual checkpoints.
|
170
156
|
self.transformer_blocks = nn.ModuleList(
|
171
157
|
[
|
172
158
|
JointTransformerBlock(
|
173
159
|
dim=self.inner_dim,
|
174
|
-
num_attention_heads=
|
175
|
-
attention_head_dim=
|
160
|
+
num_attention_heads=num_attention_heads,
|
161
|
+
attention_head_dim=attention_head_dim,
|
176
162
|
context_pre_only=i == num_layers - 1,
|
177
163
|
qk_norm=qk_norm,
|
178
164
|
use_dual_attention=True if i in dual_attention_layers else False,
|
179
165
|
)
|
180
|
-
for i in range(
|
166
|
+
for i in range(num_layers)
|
181
167
|
]
|
182
168
|
)
|
183
169
|
|
@@ -328,30 +314,26 @@ class SD3Transformer2DModel(
|
|
328
314
|
if self.original_attn_processors is not None:
|
329
315
|
self.set_attn_processor(self.original_attn_processors)
|
330
316
|
|
331
|
-
def _set_gradient_checkpointing(self, module, value=False):
|
332
|
-
if hasattr(module, "gradient_checkpointing"):
|
333
|
-
module.gradient_checkpointing = value
|
334
|
-
|
335
317
|
def forward(
|
336
318
|
self,
|
337
|
-
hidden_states: torch.
|
338
|
-
encoder_hidden_states: torch.
|
339
|
-
pooled_projections: torch.
|
319
|
+
hidden_states: torch.Tensor,
|
320
|
+
encoder_hidden_states: torch.Tensor = None,
|
321
|
+
pooled_projections: torch.Tensor = None,
|
340
322
|
timestep: torch.LongTensor = None,
|
341
323
|
block_controlnet_hidden_states: List = None,
|
342
324
|
joint_attention_kwargs: Optional[Dict[str, Any]] = None,
|
343
325
|
return_dict: bool = True,
|
344
326
|
skip_layers: Optional[List[int]] = None,
|
345
|
-
) -> Union[torch.
|
327
|
+
) -> Union[torch.Tensor, Transformer2DModelOutput]:
|
346
328
|
"""
|
347
329
|
The [`SD3Transformer2DModel`] forward method.
|
348
330
|
|
349
331
|
Args:
|
350
|
-
hidden_states (`torch.
|
332
|
+
hidden_states (`torch.Tensor` of shape `(batch size, channel, height, width)`):
|
351
333
|
Input `hidden_states`.
|
352
|
-
encoder_hidden_states (`torch.
|
334
|
+
encoder_hidden_states (`torch.Tensor` of shape `(batch size, sequence_len, embed_dims)`):
|
353
335
|
Conditional embeddings (embeddings computed from the input conditions such as prompts) to use.
|
354
|
-
pooled_projections (`torch.
|
336
|
+
pooled_projections (`torch.Tensor` of shape `(batch_size, projection_dim)`):
|
355
337
|
Embeddings projected from the embeddings of input conditions.
|
356
338
|
timestep (`torch.LongTensor`):
|
357
339
|
Used to indicate denoising step.
|
@@ -403,24 +385,12 @@ class SD3Transformer2DModel(
|
|
403
385
|
is_skip = True if skip_layers is not None and index_block in skip_layers else False
|
404
386
|
|
405
387
|
if torch.is_grad_enabled() and self.gradient_checkpointing and not is_skip:
|
406
|
-
|
407
|
-
|
408
|
-
def custom_forward(*inputs):
|
409
|
-
if return_dict is not None:
|
410
|
-
return module(*inputs, return_dict=return_dict)
|
411
|
-
else:
|
412
|
-
return module(*inputs)
|
413
|
-
|
414
|
-
return custom_forward
|
415
|
-
|
416
|
-
ckpt_kwargs: Dict[str, Any] = {"use_reentrant": False} if is_torch_version(">=", "1.11.0") else {}
|
417
|
-
encoder_hidden_states, hidden_states = torch.utils.checkpoint.checkpoint(
|
418
|
-
create_custom_forward(block),
|
388
|
+
encoder_hidden_states, hidden_states = self._gradient_checkpointing_func(
|
389
|
+
block,
|
419
390
|
hidden_states,
|
420
391
|
encoder_hidden_states,
|
421
392
|
temb,
|
422
393
|
joint_attention_kwargs,
|
423
|
-
**ckpt_kwargs,
|
424
394
|
)
|
425
395
|
elif not is_skip:
|
426
396
|
encoder_hidden_states, hidden_states = block(
|
@@ -67,6 +67,8 @@ class TransformerTemporalModel(ModelMixin, ConfigMixin):
|
|
67
67
|
The maximum length of the sequence over which to apply positional embeddings.
|
68
68
|
"""
|
69
69
|
|
70
|
+
_skip_layerwise_casting_patterns = ["norm"]
|
71
|
+
|
70
72
|
@register_to_config
|
71
73
|
def __init__(
|
72
74
|
self,
|
@@ -341,19 +343,11 @@ class TransformerSpatioTemporalModel(nn.Module):
|
|
341
343
|
# 2. Blocks
|
342
344
|
for block, temporal_block in zip(self.transformer_blocks, self.temporal_transformer_blocks):
|
343
345
|
if torch.is_grad_enabled() and self.gradient_checkpointing:
|
344
|
-
hidden_states =
|
345
|
-
block,
|
346
|
-
hidden_states,
|
347
|
-
None,
|
348
|
-
encoder_hidden_states,
|
349
|
-
None,
|
350
|
-
use_reentrant=False,
|
346
|
+
hidden_states = self._gradient_checkpointing_func(
|
347
|
+
block, hidden_states, None, encoder_hidden_states, None
|
351
348
|
)
|
352
349
|
else:
|
353
|
-
hidden_states = block(
|
354
|
-
hidden_states,
|
355
|
-
encoder_hidden_states=encoder_hidden_states,
|
356
|
-
)
|
350
|
+
hidden_states = block(hidden_states, encoder_hidden_states=encoder_hidden_states)
|
357
351
|
|
358
352
|
hidden_states_mix = hidden_states
|
359
353
|
hidden_states_mix = hidden_states_mix + emb
|