diffusers 0.32.2__py3-none-any.whl → 0.33.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diffusers/__init__.py +186 -3
- diffusers/configuration_utils.py +40 -12
- diffusers/dependency_versions_table.py +9 -2
- diffusers/hooks/__init__.py +9 -0
- diffusers/hooks/faster_cache.py +653 -0
- diffusers/hooks/group_offloading.py +793 -0
- diffusers/hooks/hooks.py +236 -0
- diffusers/hooks/layerwise_casting.py +245 -0
- diffusers/hooks/pyramid_attention_broadcast.py +311 -0
- diffusers/loaders/__init__.py +6 -0
- diffusers/loaders/ip_adapter.py +38 -30
- diffusers/loaders/lora_base.py +121 -86
- diffusers/loaders/lora_conversion_utils.py +504 -44
- diffusers/loaders/lora_pipeline.py +1769 -181
- diffusers/loaders/peft.py +167 -57
- diffusers/loaders/single_file.py +17 -2
- diffusers/loaders/single_file_model.py +53 -5
- diffusers/loaders/single_file_utils.py +646 -72
- diffusers/loaders/textual_inversion.py +9 -9
- diffusers/loaders/transformer_flux.py +8 -9
- diffusers/loaders/transformer_sd3.py +120 -39
- diffusers/loaders/unet.py +20 -7
- diffusers/models/__init__.py +22 -0
- diffusers/models/activations.py +9 -9
- diffusers/models/attention.py +0 -1
- diffusers/models/attention_processor.py +163 -25
- diffusers/models/auto_model.py +169 -0
- diffusers/models/autoencoders/__init__.py +2 -0
- diffusers/models/autoencoders/autoencoder_asym_kl.py +2 -0
- diffusers/models/autoencoders/autoencoder_dc.py +106 -4
- diffusers/models/autoencoders/autoencoder_kl.py +0 -4
- diffusers/models/autoencoders/autoencoder_kl_allegro.py +5 -23
- diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +17 -55
- diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py +17 -97
- diffusers/models/autoencoders/autoencoder_kl_ltx.py +326 -107
- diffusers/models/autoencoders/autoencoder_kl_magvit.py +1094 -0
- diffusers/models/autoencoders/autoencoder_kl_mochi.py +21 -56
- diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +11 -42
- diffusers/models/autoencoders/autoencoder_kl_wan.py +855 -0
- diffusers/models/autoencoders/autoencoder_oobleck.py +1 -0
- diffusers/models/autoencoders/autoencoder_tiny.py +0 -4
- diffusers/models/autoencoders/consistency_decoder_vae.py +3 -1
- diffusers/models/autoencoders/vae.py +31 -141
- diffusers/models/autoencoders/vq_model.py +3 -0
- diffusers/models/cache_utils.py +108 -0
- diffusers/models/controlnets/__init__.py +1 -0
- diffusers/models/controlnets/controlnet.py +3 -8
- diffusers/models/controlnets/controlnet_flux.py +14 -42
- diffusers/models/controlnets/controlnet_sd3.py +58 -34
- diffusers/models/controlnets/controlnet_sparsectrl.py +4 -7
- diffusers/models/controlnets/controlnet_union.py +27 -18
- diffusers/models/controlnets/controlnet_xs.py +7 -46
- diffusers/models/controlnets/multicontrolnet_union.py +196 -0
- diffusers/models/embeddings.py +18 -7
- diffusers/models/model_loading_utils.py +122 -80
- diffusers/models/modeling_flax_pytorch_utils.py +1 -1
- diffusers/models/modeling_flax_utils.py +1 -1
- diffusers/models/modeling_pytorch_flax_utils.py +1 -1
- diffusers/models/modeling_utils.py +617 -272
- diffusers/models/normalization.py +67 -14
- diffusers/models/resnet.py +1 -1
- diffusers/models/transformers/__init__.py +6 -0
- diffusers/models/transformers/auraflow_transformer_2d.py +9 -35
- diffusers/models/transformers/cogvideox_transformer_3d.py +13 -24
- diffusers/models/transformers/consisid_transformer_3d.py +789 -0
- diffusers/models/transformers/dit_transformer_2d.py +5 -19
- diffusers/models/transformers/hunyuan_transformer_2d.py +4 -3
- diffusers/models/transformers/latte_transformer_3d.py +20 -15
- diffusers/models/transformers/lumina_nextdit2d.py +3 -1
- diffusers/models/transformers/pixart_transformer_2d.py +4 -19
- diffusers/models/transformers/prior_transformer.py +5 -1
- diffusers/models/transformers/sana_transformer.py +144 -40
- diffusers/models/transformers/stable_audio_transformer.py +5 -20
- diffusers/models/transformers/transformer_2d.py +7 -22
- diffusers/models/transformers/transformer_allegro.py +9 -17
- diffusers/models/transformers/transformer_cogview3plus.py +6 -17
- diffusers/models/transformers/transformer_cogview4.py +462 -0
- diffusers/models/transformers/transformer_easyanimate.py +527 -0
- diffusers/models/transformers/transformer_flux.py +68 -110
- diffusers/models/transformers/transformer_hunyuan_video.py +404 -46
- diffusers/models/transformers/transformer_ltx.py +53 -35
- diffusers/models/transformers/transformer_lumina2.py +548 -0
- diffusers/models/transformers/transformer_mochi.py +6 -17
- diffusers/models/transformers/transformer_omnigen.py +469 -0
- diffusers/models/transformers/transformer_sd3.py +56 -86
- diffusers/models/transformers/transformer_temporal.py +5 -11
- diffusers/models/transformers/transformer_wan.py +469 -0
- diffusers/models/unets/unet_1d.py +3 -1
- diffusers/models/unets/unet_2d.py +21 -20
- diffusers/models/unets/unet_2d_blocks.py +19 -243
- diffusers/models/unets/unet_2d_condition.py +4 -6
- diffusers/models/unets/unet_3d_blocks.py +14 -127
- diffusers/models/unets/unet_3d_condition.py +8 -12
- diffusers/models/unets/unet_i2vgen_xl.py +5 -13
- diffusers/models/unets/unet_kandinsky3.py +0 -4
- diffusers/models/unets/unet_motion_model.py +20 -114
- diffusers/models/unets/unet_spatio_temporal_condition.py +7 -8
- diffusers/models/unets/unet_stable_cascade.py +8 -35
- diffusers/models/unets/uvit_2d.py +1 -4
- diffusers/optimization.py +2 -2
- diffusers/pipelines/__init__.py +57 -8
- diffusers/pipelines/allegro/pipeline_allegro.py +22 -2
- diffusers/pipelines/amused/pipeline_amused.py +15 -2
- diffusers/pipelines/amused/pipeline_amused_img2img.py +15 -2
- diffusers/pipelines/amused/pipeline_amused_inpaint.py +15 -2
- diffusers/pipelines/animatediff/pipeline_animatediff.py +15 -2
- diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +15 -3
- diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +24 -4
- diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +15 -2
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +16 -4
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +16 -4
- diffusers/pipelines/audioldm/pipeline_audioldm.py +13 -2
- diffusers/pipelines/audioldm2/modeling_audioldm2.py +13 -68
- diffusers/pipelines/audioldm2/pipeline_audioldm2.py +39 -9
- diffusers/pipelines/aura_flow/pipeline_aura_flow.py +63 -7
- diffusers/pipelines/auto_pipeline.py +35 -14
- diffusers/pipelines/blip_diffusion/blip_image_processing.py +1 -1
- diffusers/pipelines/blip_diffusion/modeling_blip2.py +5 -8
- diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +12 -0
- diffusers/pipelines/cogvideo/pipeline_cogvideox.py +22 -6
- diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +22 -6
- diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +22 -5
- diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +22 -6
- diffusers/pipelines/cogview3/pipeline_cogview3plus.py +12 -4
- diffusers/pipelines/cogview4/__init__.py +49 -0
- diffusers/pipelines/cogview4/pipeline_cogview4.py +684 -0
- diffusers/pipelines/cogview4/pipeline_cogview4_control.py +732 -0
- diffusers/pipelines/cogview4/pipeline_output.py +21 -0
- diffusers/pipelines/consisid/__init__.py +49 -0
- diffusers/pipelines/consisid/consisid_utils.py +357 -0
- diffusers/pipelines/consisid/pipeline_consisid.py +974 -0
- diffusers/pipelines/consisid/pipeline_output.py +20 -0
- diffusers/pipelines/consistency_models/pipeline_consistency_models.py +11 -0
- diffusers/pipelines/controlnet/pipeline_controlnet.py +6 -5
- diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +13 -0
- diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +17 -5
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +31 -12
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +26 -7
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +20 -3
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +22 -3
- diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +26 -25
- diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +224 -109
- diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +25 -29
- diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +7 -4
- diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +3 -5
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +121 -10
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +122 -11
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +12 -1
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +20 -3
- diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py +14 -2
- diffusers/pipelines/ddim/pipeline_ddim.py +14 -1
- diffusers/pipelines/ddpm/pipeline_ddpm.py +15 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if.py +12 -0
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +12 -0
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +14 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +12 -0
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +14 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +14 -1
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +11 -7
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +11 -7
- diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +1 -1
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +10 -6
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_onnx_stable_diffusion_inpaint_legacy.py +2 -2
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +11 -7
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +1 -1
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +1 -1
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +1 -1
- diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +10 -105
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +1 -1
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +1 -1
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +1 -1
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +1 -1
- diffusers/pipelines/dit/pipeline_dit.py +15 -2
- diffusers/pipelines/easyanimate/__init__.py +52 -0
- diffusers/pipelines/easyanimate/pipeline_easyanimate.py +770 -0
- diffusers/pipelines/easyanimate/pipeline_easyanimate_control.py +994 -0
- diffusers/pipelines/easyanimate/pipeline_easyanimate_inpaint.py +1234 -0
- diffusers/pipelines/easyanimate/pipeline_output.py +20 -0
- diffusers/pipelines/flux/pipeline_flux.py +53 -21
- diffusers/pipelines/flux/pipeline_flux_control.py +9 -12
- diffusers/pipelines/flux/pipeline_flux_control_img2img.py +6 -10
- diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +8 -10
- diffusers/pipelines/flux/pipeline_flux_controlnet.py +185 -13
- diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +8 -10
- diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +16 -16
- diffusers/pipelines/flux/pipeline_flux_fill.py +107 -39
- diffusers/pipelines/flux/pipeline_flux_img2img.py +193 -15
- diffusers/pipelines/flux/pipeline_flux_inpaint.py +199 -19
- diffusers/pipelines/free_noise_utils.py +3 -3
- diffusers/pipelines/hunyuan_video/__init__.py +4 -0
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_skyreels_image2video.py +804 -0
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +90 -23
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_image2video.py +924 -0
- diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +3 -5
- diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +13 -1
- diffusers/pipelines/kandinsky/pipeline_kandinsky.py +12 -0
- diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +1 -1
- diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +12 -0
- diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +13 -1
- diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +12 -0
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +12 -1
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +13 -0
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +12 -0
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +12 -1
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +12 -1
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +12 -0
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +12 -0
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +12 -0
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +12 -0
- diffusers/pipelines/kolors/pipeline_kolors.py +10 -8
- diffusers/pipelines/kolors/pipeline_kolors_img2img.py +6 -4
- diffusers/pipelines/kolors/text_encoder.py +7 -34
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +12 -1
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +13 -1
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +14 -13
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion_superresolution.py +12 -1
- diffusers/pipelines/latte/pipeline_latte.py +36 -7
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +67 -13
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +60 -15
- diffusers/pipelines/ltx/__init__.py +2 -0
- diffusers/pipelines/ltx/pipeline_ltx.py +25 -13
- diffusers/pipelines/ltx/pipeline_ltx_condition.py +1194 -0
- diffusers/pipelines/ltx/pipeline_ltx_image2video.py +31 -17
- diffusers/pipelines/lumina/__init__.py +2 -2
- diffusers/pipelines/lumina/pipeline_lumina.py +83 -20
- diffusers/pipelines/lumina2/__init__.py +48 -0
- diffusers/pipelines/lumina2/pipeline_lumina2.py +790 -0
- diffusers/pipelines/marigold/__init__.py +2 -0
- diffusers/pipelines/marigold/marigold_image_processing.py +127 -14
- diffusers/pipelines/marigold/pipeline_marigold_depth.py +31 -16
- diffusers/pipelines/marigold/pipeline_marigold_intrinsics.py +721 -0
- diffusers/pipelines/marigold/pipeline_marigold_normals.py +31 -16
- diffusers/pipelines/mochi/pipeline_mochi.py +14 -18
- diffusers/pipelines/musicldm/pipeline_musicldm.py +16 -1
- diffusers/pipelines/omnigen/__init__.py +50 -0
- diffusers/pipelines/omnigen/pipeline_omnigen.py +512 -0
- diffusers/pipelines/omnigen/processor_omnigen.py +327 -0
- diffusers/pipelines/onnx_utils.py +5 -3
- diffusers/pipelines/pag/pag_utils.py +1 -1
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +12 -1
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +15 -4
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +20 -3
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +20 -3
- diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +1 -3
- diffusers/pipelines/pag/pipeline_pag_kolors.py +6 -4
- diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +16 -3
- diffusers/pipelines/pag/pipeline_pag_sana.py +65 -8
- diffusers/pipelines/pag/pipeline_pag_sd.py +23 -7
- diffusers/pipelines/pag/pipeline_pag_sd_3.py +3 -5
- diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py +3 -5
- diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +13 -1
- diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +23 -7
- diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py +26 -10
- diffusers/pipelines/pag/pipeline_pag_sd_xl.py +12 -4
- diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +7 -3
- diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +10 -6
- diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +13 -3
- diffusers/pipelines/pia/pipeline_pia.py +13 -1
- diffusers/pipelines/pipeline_flax_utils.py +7 -7
- diffusers/pipelines/pipeline_loading_utils.py +193 -83
- diffusers/pipelines/pipeline_utils.py +221 -106
- diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +17 -5
- diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +17 -4
- diffusers/pipelines/sana/__init__.py +2 -0
- diffusers/pipelines/sana/pipeline_sana.py +183 -58
- diffusers/pipelines/sana/pipeline_sana_sprint.py +889 -0
- diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +12 -2
- diffusers/pipelines/shap_e/pipeline_shap_e.py +12 -0
- diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +12 -0
- diffusers/pipelines/shap_e/renderer.py +6 -6
- diffusers/pipelines/stable_audio/pipeline_stable_audio.py +1 -1
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +15 -4
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +12 -8
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +12 -1
- diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +3 -2
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +14 -10
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_img2img.py +3 -3
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_inpaint.py +14 -10
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +2 -2
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py +4 -3
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +5 -4
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +2 -2
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +18 -13
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +30 -8
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +24 -10
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +28 -12
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +39 -18
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +17 -6
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +13 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +20 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +14 -2
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +13 -1
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +16 -17
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +136 -18
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +150 -21
- diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +15 -3
- diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +26 -11
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +15 -3
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +22 -4
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +30 -13
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +12 -4
- diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +15 -3
- diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +15 -3
- diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +26 -12
- diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +16 -4
- diffusers/pipelines/stable_diffusion_xl/pipeline_flax_stable_diffusion_xl.py +1 -1
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +12 -4
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +7 -3
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +10 -6
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +11 -4
- diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +13 -2
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +18 -4
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +26 -5
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +13 -1
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +13 -1
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +28 -6
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +26 -4
- diffusers/pipelines/transformers_loading_utils.py +121 -0
- diffusers/pipelines/unclip/pipeline_unclip.py +11 -1
- diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +11 -1
- diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +19 -2
- diffusers/pipelines/wan/__init__.py +51 -0
- diffusers/pipelines/wan/pipeline_output.py +20 -0
- diffusers/pipelines/wan/pipeline_wan.py +595 -0
- diffusers/pipelines/wan/pipeline_wan_i2v.py +724 -0
- diffusers/pipelines/wan/pipeline_wan_video2video.py +727 -0
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +7 -31
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +12 -1
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +12 -1
- diffusers/quantizers/auto.py +5 -1
- diffusers/quantizers/base.py +5 -9
- diffusers/quantizers/bitsandbytes/bnb_quantizer.py +41 -29
- diffusers/quantizers/bitsandbytes/utils.py +30 -20
- diffusers/quantizers/gguf/gguf_quantizer.py +1 -0
- diffusers/quantizers/gguf/utils.py +4 -2
- diffusers/quantizers/quantization_config.py +59 -4
- diffusers/quantizers/quanto/__init__.py +1 -0
- diffusers/quantizers/quanto/quanto_quantizer.py +177 -0
- diffusers/quantizers/quanto/utils.py +60 -0
- diffusers/quantizers/torchao/__init__.py +1 -1
- diffusers/quantizers/torchao/torchao_quantizer.py +47 -2
- diffusers/schedulers/__init__.py +2 -1
- diffusers/schedulers/scheduling_consistency_models.py +1 -2
- diffusers/schedulers/scheduling_ddim_inverse.py +1 -1
- diffusers/schedulers/scheduling_ddpm.py +2 -3
- diffusers/schedulers/scheduling_ddpm_parallel.py +1 -2
- diffusers/schedulers/scheduling_dpmsolver_multistep.py +12 -4
- diffusers/schedulers/scheduling_edm_euler.py +45 -10
- diffusers/schedulers/scheduling_flow_match_euler_discrete.py +116 -28
- diffusers/schedulers/scheduling_flow_match_heun_discrete.py +7 -6
- diffusers/schedulers/scheduling_heun_discrete.py +1 -1
- diffusers/schedulers/scheduling_lcm.py +1 -2
- diffusers/schedulers/scheduling_lms_discrete.py +1 -1
- diffusers/schedulers/scheduling_repaint.py +5 -1
- diffusers/schedulers/scheduling_scm.py +265 -0
- diffusers/schedulers/scheduling_tcd.py +1 -2
- diffusers/schedulers/scheduling_utils.py +2 -1
- diffusers/training_utils.py +14 -7
- diffusers/utils/__init__.py +9 -1
- diffusers/utils/constants.py +13 -1
- diffusers/utils/deprecation_utils.py +1 -1
- diffusers/utils/dummy_bitsandbytes_objects.py +17 -0
- diffusers/utils/dummy_gguf_objects.py +17 -0
- diffusers/utils/dummy_optimum_quanto_objects.py +17 -0
- diffusers/utils/dummy_pt_objects.py +233 -0
- diffusers/utils/dummy_torch_and_transformers_and_opencv_objects.py +17 -0
- diffusers/utils/dummy_torch_and_transformers_objects.py +270 -0
- diffusers/utils/dummy_torchao_objects.py +17 -0
- diffusers/utils/dynamic_modules_utils.py +1 -1
- diffusers/utils/export_utils.py +28 -3
- diffusers/utils/hub_utils.py +52 -102
- diffusers/utils/import_utils.py +121 -221
- diffusers/utils/loading_utils.py +2 -1
- diffusers/utils/logging.py +1 -2
- diffusers/utils/peft_utils.py +6 -14
- diffusers/utils/remote_utils.py +425 -0
- diffusers/utils/source_code_parsing_utils.py +52 -0
- diffusers/utils/state_dict_utils.py +15 -1
- diffusers/utils/testing_utils.py +243 -13
- diffusers/utils/torch_utils.py +10 -0
- diffusers/utils/typing_utils.py +91 -0
- diffusers/video_processor.py +1 -1
- {diffusers-0.32.2.dist-info → diffusers-0.33.1.dist-info}/METADATA +21 -4
- diffusers-0.33.1.dist-info/RECORD +608 -0
- {diffusers-0.32.2.dist-info → diffusers-0.33.1.dist-info}/WHEEL +1 -1
- diffusers-0.32.2.dist-info/RECORD +0 -550
- {diffusers-0.32.2.dist-info → diffusers-0.33.1.dist-info}/LICENSE +0 -0
- {diffusers-0.32.2.dist-info → diffusers-0.33.1.dist-info}/entry_points.txt +0 -0
- {diffusers-0.32.2.dist-info → diffusers-0.33.1.dist-info}/top_level.txt +0 -0
@@ -40,7 +40,7 @@ def load_textual_inversion_state_dicts(pretrained_model_name_or_paths, **kwargs)
|
|
40
40
|
force_download = kwargs.pop("force_download", False)
|
41
41
|
proxies = kwargs.pop("proxies", None)
|
42
42
|
local_files_only = kwargs.pop("local_files_only", None)
|
43
|
-
|
43
|
+
hf_token = kwargs.pop("hf_token", None)
|
44
44
|
revision = kwargs.pop("revision", None)
|
45
45
|
subfolder = kwargs.pop("subfolder", None)
|
46
46
|
weight_name = kwargs.pop("weight_name", None)
|
@@ -73,7 +73,7 @@ def load_textual_inversion_state_dicts(pretrained_model_name_or_paths, **kwargs)
|
|
73
73
|
force_download=force_download,
|
74
74
|
proxies=proxies,
|
75
75
|
local_files_only=local_files_only,
|
76
|
-
token=
|
76
|
+
token=hf_token,
|
77
77
|
revision=revision,
|
78
78
|
subfolder=subfolder,
|
79
79
|
user_agent=user_agent,
|
@@ -93,7 +93,7 @@ def load_textual_inversion_state_dicts(pretrained_model_name_or_paths, **kwargs)
|
|
93
93
|
force_download=force_download,
|
94
94
|
proxies=proxies,
|
95
95
|
local_files_only=local_files_only,
|
96
|
-
token=
|
96
|
+
token=hf_token,
|
97
97
|
revision=revision,
|
98
98
|
subfolder=subfolder,
|
99
99
|
user_agent=user_agent,
|
@@ -312,7 +312,7 @@ class TextualInversionLoaderMixin:
|
|
312
312
|
local_files_only (`bool`, *optional*, defaults to `False`):
|
313
313
|
Whether to only load local model weights and configuration files or not. If set to `True`, the model
|
314
314
|
won't be downloaded from the Hub.
|
315
|
-
|
315
|
+
hf_token (`str` or *bool*, *optional*):
|
316
316
|
The token to use as HTTP bearer authorization for remote files. If `True`, the token generated from
|
317
317
|
`diffusers-cli login` (stored in `~/.huggingface`) is used.
|
318
318
|
revision (`str`, *optional*, defaults to `"main"`):
|
@@ -333,7 +333,7 @@ class TextualInversionLoaderMixin:
|
|
333
333
|
from diffusers import StableDiffusionPipeline
|
334
334
|
import torch
|
335
335
|
|
336
|
-
model_id = "
|
336
|
+
model_id = "stable-diffusion-v1-5/stable-diffusion-v1-5"
|
337
337
|
pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16).to("cuda")
|
338
338
|
|
339
339
|
pipe.load_textual_inversion("sd-concepts-library/cat-toy")
|
@@ -352,7 +352,7 @@ class TextualInversionLoaderMixin:
|
|
352
352
|
from diffusers import StableDiffusionPipeline
|
353
353
|
import torch
|
354
354
|
|
355
|
-
model_id = "
|
355
|
+
model_id = "stable-diffusion-v1-5/stable-diffusion-v1-5"
|
356
356
|
pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16).to("cuda")
|
357
357
|
|
358
358
|
pipe.load_textual_inversion("./charturnerv2.pt", token="charturnerv2")
|
@@ -449,9 +449,9 @@ class TextualInversionLoaderMixin:
|
|
449
449
|
|
450
450
|
# 7.5 Offload the model again
|
451
451
|
if is_model_cpu_offload:
|
452
|
-
self.enable_model_cpu_offload()
|
452
|
+
self.enable_model_cpu_offload(device=device)
|
453
453
|
elif is_sequential_cpu_offload:
|
454
|
-
self.enable_sequential_cpu_offload()
|
454
|
+
self.enable_sequential_cpu_offload(device=device)
|
455
455
|
|
456
456
|
# / Unsafe Code >
|
457
457
|
|
@@ -469,7 +469,7 @@ class TextualInversionLoaderMixin:
|
|
469
469
|
from diffusers import AutoPipelineForText2Image
|
470
470
|
import torch
|
471
471
|
|
472
|
-
pipeline = AutoPipelineForText2Image.from_pretrained("
|
472
|
+
pipeline = AutoPipelineForText2Image.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5")
|
473
473
|
|
474
474
|
# Example 1
|
475
475
|
pipeline.load_textual_inversion("sd-concepts-library/gta5-artwork")
|
@@ -17,7 +17,7 @@ from ..models.embeddings import (
|
|
17
17
|
ImageProjection,
|
18
18
|
MultiIPAdapterImageProjection,
|
19
19
|
)
|
20
|
-
from ..models.modeling_utils import load_model_dict_into_meta
|
20
|
+
from ..models.modeling_utils import _LOW_CPU_MEM_USAGE_DEFAULT, load_model_dict_into_meta
|
21
21
|
from ..utils import (
|
22
22
|
is_accelerate_available,
|
23
23
|
is_torch_version,
|
@@ -36,7 +36,7 @@ class FluxTransformer2DLoadersMixin:
|
|
36
36
|
Load layers into a [`FluxTransformer2DModel`].
|
37
37
|
"""
|
38
38
|
|
39
|
-
def _convert_ip_adapter_image_proj_to_diffusers(self, state_dict, low_cpu_mem_usage=
|
39
|
+
def _convert_ip_adapter_image_proj_to_diffusers(self, state_dict, low_cpu_mem_usage=_LOW_CPU_MEM_USAGE_DEFAULT):
|
40
40
|
if low_cpu_mem_usage:
|
41
41
|
if is_accelerate_available():
|
42
42
|
from accelerate import init_empty_weights
|
@@ -82,11 +82,12 @@ class FluxTransformer2DLoadersMixin:
|
|
82
82
|
if not low_cpu_mem_usage:
|
83
83
|
image_projection.load_state_dict(updated_state_dict, strict=True)
|
84
84
|
else:
|
85
|
-
|
85
|
+
device_map = {"": self.device}
|
86
|
+
load_model_dict_into_meta(image_projection, updated_state_dict, device_map=device_map, dtype=self.dtype)
|
86
87
|
|
87
88
|
return image_projection
|
88
89
|
|
89
|
-
def _convert_ip_adapter_attn_to_diffusers(self, state_dicts, low_cpu_mem_usage=
|
90
|
+
def _convert_ip_adapter_attn_to_diffusers(self, state_dicts, low_cpu_mem_usage=_LOW_CPU_MEM_USAGE_DEFAULT):
|
90
91
|
from ..models.attention_processor import (
|
91
92
|
FluxIPAdapterJointAttnProcessor2_0,
|
92
93
|
)
|
@@ -151,15 +152,15 @@ class FluxTransformer2DLoadersMixin:
|
|
151
152
|
if not low_cpu_mem_usage:
|
152
153
|
attn_procs[name].load_state_dict(value_dict)
|
153
154
|
else:
|
154
|
-
|
155
|
+
device_map = {"": self.device}
|
155
156
|
dtype = self.dtype
|
156
|
-
load_model_dict_into_meta(attn_procs[name], value_dict,
|
157
|
+
load_model_dict_into_meta(attn_procs[name], value_dict, device_map=device_map, dtype=dtype)
|
157
158
|
|
158
159
|
key_id += 1
|
159
160
|
|
160
161
|
return attn_procs
|
161
162
|
|
162
|
-
def _load_ip_adapter_weights(self, state_dicts, low_cpu_mem_usage=
|
163
|
+
def _load_ip_adapter_weights(self, state_dicts, low_cpu_mem_usage=_LOW_CPU_MEM_USAGE_DEFAULT):
|
163
164
|
if not isinstance(state_dicts, list):
|
164
165
|
state_dicts = [state_dicts]
|
165
166
|
|
@@ -177,5 +178,3 @@ class FluxTransformer2DLoadersMixin:
|
|
177
178
|
|
178
179
|
self.encoder_hid_proj = MultiIPAdapterImageProjection(image_projection_layers)
|
179
180
|
self.config.encoder_hid_dim_type = "ip_image_proj"
|
180
|
-
|
181
|
-
self.to(dtype=self.dtype, device=self.device)
|
@@ -11,79 +11,160 @@
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
12
|
# See the License for the specific language governing permissions and
|
13
13
|
# limitations under the License.
|
14
|
+
from contextlib import nullcontext
|
14
15
|
from typing import Dict
|
15
16
|
|
16
17
|
from ..models.attention_processor import SD3IPAdapterJointAttnProcessor2_0
|
17
18
|
from ..models.embeddings import IPAdapterTimeImageProjection
|
18
19
|
from ..models.modeling_utils import _LOW_CPU_MEM_USAGE_DEFAULT, load_model_dict_into_meta
|
20
|
+
from ..utils import is_accelerate_available, is_torch_version, logging
|
21
|
+
|
22
|
+
|
23
|
+
logger = logging.get_logger(__name__)
|
19
24
|
|
20
25
|
|
21
26
|
class SD3Transformer2DLoadersMixin:
|
22
27
|
"""Load IP-Adapters and LoRA layers into a `[SD3Transformer2DModel]`."""
|
23
28
|
|
24
|
-
def
|
25
|
-
|
29
|
+
def _convert_ip_adapter_attn_to_diffusers(
|
30
|
+
self, state_dict: Dict, low_cpu_mem_usage: bool = _LOW_CPU_MEM_USAGE_DEFAULT
|
31
|
+
) -> Dict:
|
32
|
+
if low_cpu_mem_usage:
|
33
|
+
if is_accelerate_available():
|
34
|
+
from accelerate import init_empty_weights
|
35
|
+
|
36
|
+
else:
|
37
|
+
low_cpu_mem_usage = False
|
38
|
+
logger.warning(
|
39
|
+
"Cannot initialize model with low cpu memory usage because `accelerate` was not found in the"
|
40
|
+
" environment. Defaulting to `low_cpu_mem_usage=False`. It is strongly recommended to install"
|
41
|
+
" `accelerate` for faster and less memory-intense model loading. You can do so with: \n```\npip"
|
42
|
+
" install accelerate\n```\n."
|
43
|
+
)
|
44
|
+
|
45
|
+
if low_cpu_mem_usage is True and not is_torch_version(">=", "1.9.0"):
|
46
|
+
raise NotImplementedError(
|
47
|
+
"Low memory initialization requires torch >= 1.9.0. Please either update your PyTorch version or set"
|
48
|
+
" `low_cpu_mem_usage=False`."
|
49
|
+
)
|
26
50
|
|
27
|
-
Args:
|
28
|
-
state_dict (`Dict`):
|
29
|
-
State dict with keys "ip_adapter", which contains parameters for attention processors, and
|
30
|
-
"image_proj", which contains parameters for image projection net.
|
31
|
-
low_cpu_mem_usage (`bool`, *optional*, defaults to `True` if torch version >= 1.9.0 else `False`):
|
32
|
-
Speed up model loading only loading the pretrained weights and not initializing the weights. This also
|
33
|
-
tries to not use more than 1x model size in CPU memory (including peak memory) while loading the model.
|
34
|
-
Only supported for PyTorch >= 1.9.0. If you are using an older version of PyTorch, setting this
|
35
|
-
argument to `True` will raise an error.
|
36
|
-
"""
|
37
51
|
# IP-Adapter cross attention parameters
|
38
52
|
hidden_size = self.config.attention_head_dim * self.config.num_attention_heads
|
39
53
|
ip_hidden_states_dim = self.config.attention_head_dim * self.config.num_attention_heads
|
40
|
-
timesteps_emb_dim = state_dict["
|
54
|
+
timesteps_emb_dim = state_dict["0.norm_ip.linear.weight"].shape[1]
|
41
55
|
|
42
56
|
# Dict where key is transformer layer index, value is attention processor's state dict
|
43
57
|
# ip_adapter state dict keys example: "0.norm_ip.linear.weight"
|
44
58
|
layer_state_dict = {idx: {} for idx in range(len(self.attn_processors))}
|
45
|
-
for key, weights in state_dict
|
59
|
+
for key, weights in state_dict.items():
|
46
60
|
idx, name = key.split(".", maxsplit=1)
|
47
61
|
layer_state_dict[int(idx)][name] = weights
|
48
62
|
|
49
|
-
# Create IP-Adapter attention processor
|
63
|
+
# Create IP-Adapter attention processor & load state_dict
|
50
64
|
attn_procs = {}
|
65
|
+
init_context = init_empty_weights if low_cpu_mem_usage else nullcontext
|
51
66
|
for idx, name in enumerate(self.attn_processors.keys()):
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
67
|
+
with init_context():
|
68
|
+
attn_procs[name] = SD3IPAdapterJointAttnProcessor2_0(
|
69
|
+
hidden_size=hidden_size,
|
70
|
+
ip_hidden_states_dim=ip_hidden_states_dim,
|
71
|
+
head_dim=self.config.attention_head_dim,
|
72
|
+
timesteps_emb_dim=timesteps_emb_dim,
|
73
|
+
)
|
58
74
|
|
59
75
|
if not low_cpu_mem_usage:
|
60
76
|
attn_procs[name].load_state_dict(layer_state_dict[idx], strict=True)
|
61
77
|
else:
|
78
|
+
device_map = {"": self.device}
|
62
79
|
load_model_dict_into_meta(
|
63
|
-
attn_procs[name], layer_state_dict[idx],
|
80
|
+
attn_procs[name], layer_state_dict[idx], device_map=device_map, dtype=self.dtype
|
64
81
|
)
|
65
82
|
|
66
|
-
|
83
|
+
return attn_procs
|
84
|
+
|
85
|
+
def _convert_ip_adapter_image_proj_to_diffusers(
|
86
|
+
self, state_dict: Dict, low_cpu_mem_usage: bool = _LOW_CPU_MEM_USAGE_DEFAULT
|
87
|
+
) -> IPAdapterTimeImageProjection:
|
88
|
+
if low_cpu_mem_usage:
|
89
|
+
if is_accelerate_available():
|
90
|
+
from accelerate import init_empty_weights
|
91
|
+
|
92
|
+
else:
|
93
|
+
low_cpu_mem_usage = False
|
94
|
+
logger.warning(
|
95
|
+
"Cannot initialize model with low cpu memory usage because `accelerate` was not found in the"
|
96
|
+
" environment. Defaulting to `low_cpu_mem_usage=False`. It is strongly recommended to install"
|
97
|
+
" `accelerate` for faster and less memory-intense model loading. You can do so with: \n```\npip"
|
98
|
+
" install accelerate\n```\n."
|
99
|
+
)
|
100
|
+
|
101
|
+
if low_cpu_mem_usage is True and not is_torch_version(">=", "1.9.0"):
|
102
|
+
raise NotImplementedError(
|
103
|
+
"Low memory initialization requires torch >= 1.9.0. Please either update your PyTorch version or set"
|
104
|
+
" `low_cpu_mem_usage=False`."
|
105
|
+
)
|
106
|
+
|
107
|
+
init_context = init_empty_weights if low_cpu_mem_usage else nullcontext
|
108
|
+
|
109
|
+
# Convert to diffusers
|
110
|
+
updated_state_dict = {}
|
111
|
+
for key, value in state_dict.items():
|
112
|
+
# InstantX/SD3.5-Large-IP-Adapter
|
113
|
+
if key.startswith("layers."):
|
114
|
+
idx = key.split(".")[1]
|
115
|
+
key = key.replace(f"layers.{idx}.0.norm1", f"layers.{idx}.ln0")
|
116
|
+
key = key.replace(f"layers.{idx}.0.norm2", f"layers.{idx}.ln1")
|
117
|
+
key = key.replace(f"layers.{idx}.0.to_q", f"layers.{idx}.attn.to_q")
|
118
|
+
key = key.replace(f"layers.{idx}.0.to_kv", f"layers.{idx}.attn.to_kv")
|
119
|
+
key = key.replace(f"layers.{idx}.0.to_out", f"layers.{idx}.attn.to_out.0")
|
120
|
+
key = key.replace(f"layers.{idx}.1.0", f"layers.{idx}.adaln_norm")
|
121
|
+
key = key.replace(f"layers.{idx}.1.1", f"layers.{idx}.ff.net.0.proj")
|
122
|
+
key = key.replace(f"layers.{idx}.1.3", f"layers.{idx}.ff.net.2")
|
123
|
+
key = key.replace(f"layers.{idx}.2.1", f"layers.{idx}.adaln_proj")
|
124
|
+
updated_state_dict[key] = value
|
67
125
|
|
68
126
|
# Image projetion parameters
|
69
|
-
embed_dim =
|
70
|
-
output_dim =
|
71
|
-
hidden_dim =
|
72
|
-
heads =
|
73
|
-
num_queries =
|
74
|
-
timestep_in_dim =
|
127
|
+
embed_dim = updated_state_dict["proj_in.weight"].shape[1]
|
128
|
+
output_dim = updated_state_dict["proj_out.weight"].shape[0]
|
129
|
+
hidden_dim = updated_state_dict["proj_in.weight"].shape[0]
|
130
|
+
heads = updated_state_dict["layers.0.attn.to_q.weight"].shape[0] // 64
|
131
|
+
num_queries = updated_state_dict["latents"].shape[1]
|
132
|
+
timestep_in_dim = updated_state_dict["time_embedding.linear_1.weight"].shape[1]
|
75
133
|
|
76
134
|
# Image projection
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
135
|
+
with init_context():
|
136
|
+
image_proj = IPAdapterTimeImageProjection(
|
137
|
+
embed_dim=embed_dim,
|
138
|
+
output_dim=output_dim,
|
139
|
+
hidden_dim=hidden_dim,
|
140
|
+
heads=heads,
|
141
|
+
num_queries=num_queries,
|
142
|
+
timestep_in_dim=timestep_in_dim,
|
143
|
+
)
|
85
144
|
|
86
145
|
if not low_cpu_mem_usage:
|
87
|
-
|
146
|
+
image_proj.load_state_dict(updated_state_dict, strict=True)
|
88
147
|
else:
|
89
|
-
|
148
|
+
device_map = {"": self.device}
|
149
|
+
load_model_dict_into_meta(image_proj, updated_state_dict, device_map=device_map, dtype=self.dtype)
|
150
|
+
|
151
|
+
return image_proj
|
152
|
+
|
153
|
+
def _load_ip_adapter_weights(self, state_dict: Dict, low_cpu_mem_usage: bool = _LOW_CPU_MEM_USAGE_DEFAULT) -> None:
|
154
|
+
"""Sets IP-Adapter attention processors, image projection, and loads state_dict.
|
155
|
+
|
156
|
+
Args:
|
157
|
+
state_dict (`Dict`):
|
158
|
+
State dict with keys "ip_adapter", which contains parameters for attention processors, and
|
159
|
+
"image_proj", which contains parameters for image projection net.
|
160
|
+
low_cpu_mem_usage (`bool`, *optional*, defaults to `True` if torch version >= 1.9.0 else `False`):
|
161
|
+
Speed up model loading only loading the pretrained weights and not initializing the weights. This also
|
162
|
+
tries to not use more than 1x model size in CPU memory (including peak memory) while loading the model.
|
163
|
+
Only supported for PyTorch >= 1.9.0. If you are using an older version of PyTorch, setting this
|
164
|
+
argument to `True` will raise an error.
|
165
|
+
"""
|
166
|
+
|
167
|
+
attn_procs = self._convert_ip_adapter_attn_to_diffusers(state_dict["ip_adapter"], low_cpu_mem_usage)
|
168
|
+
self.set_attn_processor(attn_procs)
|
169
|
+
|
170
|
+
self.image_proj = self._convert_ip_adapter_image_proj_to_diffusers(state_dict["image_proj"], low_cpu_mem_usage)
|
diffusers/loaders/unet.py
CHANGED
@@ -30,7 +30,7 @@ from ..models.embeddings import (
|
|
30
30
|
IPAdapterPlusImageProjection,
|
31
31
|
MultiIPAdapterImageProjection,
|
32
32
|
)
|
33
|
-
from ..models.modeling_utils import load_model_dict_into_meta, load_state_dict
|
33
|
+
from ..models.modeling_utils import _LOW_CPU_MEM_USAGE_DEFAULT, load_model_dict_into_meta, load_state_dict
|
34
34
|
from ..utils import (
|
35
35
|
USE_PEFT_BACKEND,
|
36
36
|
_get_model_file,
|
@@ -143,7 +143,7 @@ class UNet2DConditionLoadersMixin:
|
|
143
143
|
adapter_name = kwargs.pop("adapter_name", None)
|
144
144
|
_pipeline = kwargs.pop("_pipeline", None)
|
145
145
|
network_alphas = kwargs.pop("network_alphas", None)
|
146
|
-
low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage",
|
146
|
+
low_cpu_mem_usage = kwargs.pop("low_cpu_mem_usage", _LOW_CPU_MEM_USAGE_DEFAULT)
|
147
147
|
allow_pickle = False
|
148
148
|
|
149
149
|
if low_cpu_mem_usage and is_peft_version("<=", "0.13.0"):
|
@@ -340,6 +340,17 @@ class UNet2DConditionLoadersMixin:
|
|
340
340
|
else:
|
341
341
|
if is_peft_version("<", "0.9.0"):
|
342
342
|
lora_config_kwargs.pop("use_dora")
|
343
|
+
|
344
|
+
if "lora_bias" in lora_config_kwargs:
|
345
|
+
if lora_config_kwargs["lora_bias"]:
|
346
|
+
if is_peft_version("<=", "0.13.2"):
|
347
|
+
raise ValueError(
|
348
|
+
"You need `peft` 0.14.0 at least to use `bias` in LoRAs. Please upgrade your installation of `peft`."
|
349
|
+
)
|
350
|
+
else:
|
351
|
+
if is_peft_version("<=", "0.13.2"):
|
352
|
+
lora_config_kwargs.pop("lora_bias")
|
353
|
+
|
343
354
|
lora_config = LoraConfig(**lora_config_kwargs)
|
344
355
|
|
345
356
|
# adapter_name
|
@@ -529,7 +540,7 @@ class UNet2DConditionLoadersMixin:
|
|
529
540
|
|
530
541
|
return state_dict
|
531
542
|
|
532
|
-
def _convert_ip_adapter_image_proj_to_diffusers(self, state_dict, low_cpu_mem_usage=
|
543
|
+
def _convert_ip_adapter_image_proj_to_diffusers(self, state_dict, low_cpu_mem_usage=_LOW_CPU_MEM_USAGE_DEFAULT):
|
533
544
|
if low_cpu_mem_usage:
|
534
545
|
if is_accelerate_available():
|
535
546
|
from accelerate import init_empty_weights
|
@@ -742,11 +753,12 @@ class UNet2DConditionLoadersMixin:
|
|
742
753
|
if not low_cpu_mem_usage:
|
743
754
|
image_projection.load_state_dict(updated_state_dict, strict=True)
|
744
755
|
else:
|
745
|
-
|
756
|
+
device_map = {"": self.device}
|
757
|
+
load_model_dict_into_meta(image_projection, updated_state_dict, device_map=device_map, dtype=self.dtype)
|
746
758
|
|
747
759
|
return image_projection
|
748
760
|
|
749
|
-
def _convert_ip_adapter_attn_to_diffusers(self, state_dicts, low_cpu_mem_usage=
|
761
|
+
def _convert_ip_adapter_attn_to_diffusers(self, state_dicts, low_cpu_mem_usage=_LOW_CPU_MEM_USAGE_DEFAULT):
|
750
762
|
from ..models.attention_processor import (
|
751
763
|
IPAdapterAttnProcessor,
|
752
764
|
IPAdapterAttnProcessor2_0,
|
@@ -835,13 +847,14 @@ class UNet2DConditionLoadersMixin:
|
|
835
847
|
else:
|
836
848
|
device = next(iter(value_dict.values())).device
|
837
849
|
dtype = next(iter(value_dict.values())).dtype
|
838
|
-
|
850
|
+
device_map = {"": device}
|
851
|
+
load_model_dict_into_meta(attn_procs[name], value_dict, device_map=device_map, dtype=dtype)
|
839
852
|
|
840
853
|
key_id += 2
|
841
854
|
|
842
855
|
return attn_procs
|
843
856
|
|
844
|
-
def _load_ip_adapter_weights(self, state_dicts, low_cpu_mem_usage=
|
857
|
+
def _load_ip_adapter_weights(self, state_dicts, low_cpu_mem_usage=_LOW_CPU_MEM_USAGE_DEFAULT):
|
845
858
|
if not isinstance(state_dicts, list):
|
846
859
|
state_dicts = [state_dicts]
|
847
860
|
|
diffusers/models/__init__.py
CHANGED
@@ -26,6 +26,7 @@ _import_structure = {}
|
|
26
26
|
|
27
27
|
if is_torch_available():
|
28
28
|
_import_structure["adapter"] = ["MultiAdapter", "T2IAdapter"]
|
29
|
+
_import_structure["auto_model"] = ["AutoModel"]
|
29
30
|
_import_structure["autoencoders.autoencoder_asym_kl"] = ["AsymmetricAutoencoderKL"]
|
30
31
|
_import_structure["autoencoders.autoencoder_dc"] = ["AutoencoderDC"]
|
31
32
|
_import_structure["autoencoders.autoencoder_kl"] = ["AutoencoderKL"]
|
@@ -33,12 +34,15 @@ if is_torch_available():
|
|
33
34
|
_import_structure["autoencoders.autoencoder_kl_cogvideox"] = ["AutoencoderKLCogVideoX"]
|
34
35
|
_import_structure["autoencoders.autoencoder_kl_hunyuan_video"] = ["AutoencoderKLHunyuanVideo"]
|
35
36
|
_import_structure["autoencoders.autoencoder_kl_ltx"] = ["AutoencoderKLLTXVideo"]
|
37
|
+
_import_structure["autoencoders.autoencoder_kl_magvit"] = ["AutoencoderKLMagvit"]
|
36
38
|
_import_structure["autoencoders.autoencoder_kl_mochi"] = ["AutoencoderKLMochi"]
|
37
39
|
_import_structure["autoencoders.autoencoder_kl_temporal_decoder"] = ["AutoencoderKLTemporalDecoder"]
|
40
|
+
_import_structure["autoencoders.autoencoder_kl_wan"] = ["AutoencoderKLWan"]
|
38
41
|
_import_structure["autoencoders.autoencoder_oobleck"] = ["AutoencoderOobleck"]
|
39
42
|
_import_structure["autoencoders.autoencoder_tiny"] = ["AutoencoderTiny"]
|
40
43
|
_import_structure["autoencoders.consistency_decoder_vae"] = ["ConsistencyDecoderVAE"]
|
41
44
|
_import_structure["autoencoders.vq_model"] = ["VQModel"]
|
45
|
+
_import_structure["cache_utils"] = ["CacheMixin"]
|
42
46
|
_import_structure["controlnets.controlnet"] = ["ControlNetModel"]
|
43
47
|
_import_structure["controlnets.controlnet_flux"] = ["FluxControlNetModel", "FluxMultiControlNetModel"]
|
44
48
|
_import_structure["controlnets.controlnet_hunyuan"] = [
|
@@ -50,10 +54,12 @@ if is_torch_available():
|
|
50
54
|
_import_structure["controlnets.controlnet_union"] = ["ControlNetUnionModel"]
|
51
55
|
_import_structure["controlnets.controlnet_xs"] = ["ControlNetXSAdapter", "UNetControlNetXSModel"]
|
52
56
|
_import_structure["controlnets.multicontrolnet"] = ["MultiControlNetModel"]
|
57
|
+
_import_structure["controlnets.multicontrolnet_union"] = ["MultiControlNetUnionModel"]
|
53
58
|
_import_structure["embeddings"] = ["ImageProjection"]
|
54
59
|
_import_structure["modeling_utils"] = ["ModelMixin"]
|
55
60
|
_import_structure["transformers.auraflow_transformer_2d"] = ["AuraFlowTransformer2DModel"]
|
56
61
|
_import_structure["transformers.cogvideox_transformer_3d"] = ["CogVideoXTransformer3DModel"]
|
62
|
+
_import_structure["transformers.consisid_transformer_3d"] = ["ConsisIDTransformer3DModel"]
|
57
63
|
_import_structure["transformers.dit_transformer_2d"] = ["DiTTransformer2DModel"]
|
58
64
|
_import_structure["transformers.dual_transformer_2d"] = ["DualTransformer2DModel"]
|
59
65
|
_import_structure["transformers.hunyuan_transformer_2d"] = ["HunyuanDiT2DModel"]
|
@@ -67,12 +73,17 @@ if is_torch_available():
|
|
67
73
|
_import_structure["transformers.transformer_2d"] = ["Transformer2DModel"]
|
68
74
|
_import_structure["transformers.transformer_allegro"] = ["AllegroTransformer3DModel"]
|
69
75
|
_import_structure["transformers.transformer_cogview3plus"] = ["CogView3PlusTransformer2DModel"]
|
76
|
+
_import_structure["transformers.transformer_cogview4"] = ["CogView4Transformer2DModel"]
|
77
|
+
_import_structure["transformers.transformer_easyanimate"] = ["EasyAnimateTransformer3DModel"]
|
70
78
|
_import_structure["transformers.transformer_flux"] = ["FluxTransformer2DModel"]
|
71
79
|
_import_structure["transformers.transformer_hunyuan_video"] = ["HunyuanVideoTransformer3DModel"]
|
72
80
|
_import_structure["transformers.transformer_ltx"] = ["LTXVideoTransformer3DModel"]
|
81
|
+
_import_structure["transformers.transformer_lumina2"] = ["Lumina2Transformer2DModel"]
|
73
82
|
_import_structure["transformers.transformer_mochi"] = ["MochiTransformer3DModel"]
|
83
|
+
_import_structure["transformers.transformer_omnigen"] = ["OmniGenTransformer2DModel"]
|
74
84
|
_import_structure["transformers.transformer_sd3"] = ["SD3Transformer2DModel"]
|
75
85
|
_import_structure["transformers.transformer_temporal"] = ["TransformerTemporalModel"]
|
86
|
+
_import_structure["transformers.transformer_wan"] = ["WanTransformer3DModel"]
|
76
87
|
_import_structure["unets.unet_1d"] = ["UNet1DModel"]
|
77
88
|
_import_structure["unets.unet_2d"] = ["UNet2DModel"]
|
78
89
|
_import_structure["unets.unet_2d_condition"] = ["UNet2DConditionModel"]
|
@@ -93,6 +104,7 @@ if is_flax_available():
|
|
93
104
|
if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
94
105
|
if is_torch_available():
|
95
106
|
from .adapter import MultiAdapter, T2IAdapter
|
107
|
+
from .auto_model import AutoModel
|
96
108
|
from .autoencoders import (
|
97
109
|
AsymmetricAutoencoderKL,
|
98
110
|
AutoencoderDC,
|
@@ -101,13 +113,16 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|
101
113
|
AutoencoderKLCogVideoX,
|
102
114
|
AutoencoderKLHunyuanVideo,
|
103
115
|
AutoencoderKLLTXVideo,
|
116
|
+
AutoencoderKLMagvit,
|
104
117
|
AutoencoderKLMochi,
|
105
118
|
AutoencoderKLTemporalDecoder,
|
119
|
+
AutoencoderKLWan,
|
106
120
|
AutoencoderOobleck,
|
107
121
|
AutoencoderTiny,
|
108
122
|
ConsistencyDecoderVAE,
|
109
123
|
VQModel,
|
110
124
|
)
|
125
|
+
from .cache_utils import CacheMixin
|
111
126
|
from .controlnets import (
|
112
127
|
ControlNetModel,
|
113
128
|
ControlNetUnionModel,
|
@@ -117,6 +132,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|
117
132
|
HunyuanDiT2DControlNetModel,
|
118
133
|
HunyuanDiT2DMultiControlNetModel,
|
119
134
|
MultiControlNetModel,
|
135
|
+
MultiControlNetUnionModel,
|
120
136
|
SD3ControlNetModel,
|
121
137
|
SD3MultiControlNetModel,
|
122
138
|
SparseControlNetModel,
|
@@ -129,15 +145,20 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|
129
145
|
AuraFlowTransformer2DModel,
|
130
146
|
CogVideoXTransformer3DModel,
|
131
147
|
CogView3PlusTransformer2DModel,
|
148
|
+
CogView4Transformer2DModel,
|
149
|
+
ConsisIDTransformer3DModel,
|
132
150
|
DiTTransformer2DModel,
|
133
151
|
DualTransformer2DModel,
|
152
|
+
EasyAnimateTransformer3DModel,
|
134
153
|
FluxTransformer2DModel,
|
135
154
|
HunyuanDiT2DModel,
|
136
155
|
HunyuanVideoTransformer3DModel,
|
137
156
|
LatteTransformer3DModel,
|
138
157
|
LTXVideoTransformer3DModel,
|
158
|
+
Lumina2Transformer2DModel,
|
139
159
|
LuminaNextDiT2DModel,
|
140
160
|
MochiTransformer3DModel,
|
161
|
+
OmniGenTransformer2DModel,
|
141
162
|
PixArtTransformer2DModel,
|
142
163
|
PriorTransformer,
|
143
164
|
SanaTransformer2DModel,
|
@@ -146,6 +167,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|
146
167
|
T5FilmDecoder,
|
147
168
|
Transformer2DModel,
|
148
169
|
TransformerTemporalModel,
|
170
|
+
WanTransformer3DModel,
|
149
171
|
)
|
150
172
|
from .unets import (
|
151
173
|
I2VGenXLUNet,
|
diffusers/models/activations.py
CHANGED
@@ -24,12 +24,12 @@ from ..utils.import_utils import is_torch_npu_available, is_torch_version
|
|
24
24
|
if is_torch_npu_available():
|
25
25
|
import torch_npu
|
26
26
|
|
27
|
-
|
28
|
-
"swish": nn.SiLU
|
29
|
-
"silu": nn.SiLU
|
30
|
-
"mish": nn.Mish
|
31
|
-
"gelu": nn.GELU
|
32
|
-
"relu": nn.ReLU
|
27
|
+
ACT2CLS = {
|
28
|
+
"swish": nn.SiLU,
|
29
|
+
"silu": nn.SiLU,
|
30
|
+
"mish": nn.Mish,
|
31
|
+
"gelu": nn.GELU,
|
32
|
+
"relu": nn.ReLU,
|
33
33
|
}
|
34
34
|
|
35
35
|
|
@@ -44,10 +44,10 @@ def get_activation(act_fn: str) -> nn.Module:
|
|
44
44
|
"""
|
45
45
|
|
46
46
|
act_fn = act_fn.lower()
|
47
|
-
if act_fn in
|
48
|
-
return
|
47
|
+
if act_fn in ACT2CLS:
|
48
|
+
return ACT2CLS[act_fn]()
|
49
49
|
else:
|
50
|
-
raise ValueError(f"
|
50
|
+
raise ValueError(f"activation function {act_fn} not found in ACT2FN mapping {list(ACT2CLS.keys())}")
|
51
51
|
|
52
52
|
|
53
53
|
class FP32SiLU(nn.Module):
|
diffusers/models/attention.py
CHANGED
@@ -612,7 +612,6 @@ class LuminaFeedForward(nn.Module):
|
|
612
612
|
ffn_dim_multiplier: Optional[float] = None,
|
613
613
|
):
|
614
614
|
super().__init__()
|
615
|
-
inner_dim = int(2 * inner_dim / 3)
|
616
615
|
# custom hidden_size factor multiplier
|
617
616
|
if ffn_dim_multiplier is not None:
|
618
617
|
inner_dim = int(ffn_dim_multiplier * inner_dim)
|