diffusers 0.32.1__py3-none-any.whl → 0.33.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diffusers/__init__.py +186 -3
- diffusers/configuration_utils.py +40 -12
- diffusers/dependency_versions_table.py +9 -2
- diffusers/hooks/__init__.py +9 -0
- diffusers/hooks/faster_cache.py +653 -0
- diffusers/hooks/group_offloading.py +793 -0
- diffusers/hooks/hooks.py +236 -0
- diffusers/hooks/layerwise_casting.py +245 -0
- diffusers/hooks/pyramid_attention_broadcast.py +311 -0
- diffusers/loaders/__init__.py +6 -0
- diffusers/loaders/ip_adapter.py +38 -30
- diffusers/loaders/lora_base.py +198 -28
- diffusers/loaders/lora_conversion_utils.py +679 -44
- diffusers/loaders/lora_pipeline.py +1963 -801
- diffusers/loaders/peft.py +169 -84
- diffusers/loaders/single_file.py +17 -2
- diffusers/loaders/single_file_model.py +53 -5
- diffusers/loaders/single_file_utils.py +653 -75
- diffusers/loaders/textual_inversion.py +9 -9
- diffusers/loaders/transformer_flux.py +8 -9
- diffusers/loaders/transformer_sd3.py +120 -39
- diffusers/loaders/unet.py +22 -32
- diffusers/models/__init__.py +22 -0
- diffusers/models/activations.py +9 -9
- diffusers/models/attention.py +0 -1
- diffusers/models/attention_processor.py +163 -25
- diffusers/models/auto_model.py +169 -0
- diffusers/models/autoencoders/__init__.py +2 -0
- diffusers/models/autoencoders/autoencoder_asym_kl.py +2 -0
- diffusers/models/autoencoders/autoencoder_dc.py +106 -4
- diffusers/models/autoencoders/autoencoder_kl.py +0 -4
- diffusers/models/autoencoders/autoencoder_kl_allegro.py +5 -23
- diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +17 -55
- diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py +17 -97
- diffusers/models/autoencoders/autoencoder_kl_ltx.py +326 -107
- diffusers/models/autoencoders/autoencoder_kl_magvit.py +1094 -0
- diffusers/models/autoencoders/autoencoder_kl_mochi.py +21 -56
- diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +11 -42
- diffusers/models/autoencoders/autoencoder_kl_wan.py +855 -0
- diffusers/models/autoencoders/autoencoder_oobleck.py +1 -0
- diffusers/models/autoencoders/autoencoder_tiny.py +0 -4
- diffusers/models/autoencoders/consistency_decoder_vae.py +3 -1
- diffusers/models/autoencoders/vae.py +31 -141
- diffusers/models/autoencoders/vq_model.py +3 -0
- diffusers/models/cache_utils.py +108 -0
- diffusers/models/controlnets/__init__.py +1 -0
- diffusers/models/controlnets/controlnet.py +3 -8
- diffusers/models/controlnets/controlnet_flux.py +14 -42
- diffusers/models/controlnets/controlnet_sd3.py +58 -34
- diffusers/models/controlnets/controlnet_sparsectrl.py +4 -7
- diffusers/models/controlnets/controlnet_union.py +27 -18
- diffusers/models/controlnets/controlnet_xs.py +7 -46
- diffusers/models/controlnets/multicontrolnet_union.py +196 -0
- diffusers/models/embeddings.py +18 -7
- diffusers/models/model_loading_utils.py +122 -80
- diffusers/models/modeling_flax_pytorch_utils.py +1 -1
- diffusers/models/modeling_flax_utils.py +1 -1
- diffusers/models/modeling_pytorch_flax_utils.py +1 -1
- diffusers/models/modeling_utils.py +617 -272
- diffusers/models/normalization.py +67 -14
- diffusers/models/resnet.py +1 -1
- diffusers/models/transformers/__init__.py +6 -0
- diffusers/models/transformers/auraflow_transformer_2d.py +9 -35
- diffusers/models/transformers/cogvideox_transformer_3d.py +13 -24
- diffusers/models/transformers/consisid_transformer_3d.py +789 -0
- diffusers/models/transformers/dit_transformer_2d.py +5 -19
- diffusers/models/transformers/hunyuan_transformer_2d.py +4 -3
- diffusers/models/transformers/latte_transformer_3d.py +20 -15
- diffusers/models/transformers/lumina_nextdit2d.py +3 -1
- diffusers/models/transformers/pixart_transformer_2d.py +4 -19
- diffusers/models/transformers/prior_transformer.py +5 -1
- diffusers/models/transformers/sana_transformer.py +144 -40
- diffusers/models/transformers/stable_audio_transformer.py +5 -20
- diffusers/models/transformers/transformer_2d.py +7 -22
- diffusers/models/transformers/transformer_allegro.py +9 -17
- diffusers/models/transformers/transformer_cogview3plus.py +6 -17
- diffusers/models/transformers/transformer_cogview4.py +462 -0
- diffusers/models/transformers/transformer_easyanimate.py +527 -0
- diffusers/models/transformers/transformer_flux.py +68 -110
- diffusers/models/transformers/transformer_hunyuan_video.py +409 -49
- diffusers/models/transformers/transformer_ltx.py +53 -35
- diffusers/models/transformers/transformer_lumina2.py +548 -0
- diffusers/models/transformers/transformer_mochi.py +6 -17
- diffusers/models/transformers/transformer_omnigen.py +469 -0
- diffusers/models/transformers/transformer_sd3.py +56 -86
- diffusers/models/transformers/transformer_temporal.py +5 -11
- diffusers/models/transformers/transformer_wan.py +469 -0
- diffusers/models/unets/unet_1d.py +3 -1
- diffusers/models/unets/unet_2d.py +21 -20
- diffusers/models/unets/unet_2d_blocks.py +19 -243
- diffusers/models/unets/unet_2d_condition.py +4 -6
- diffusers/models/unets/unet_3d_blocks.py +14 -127
- diffusers/models/unets/unet_3d_condition.py +8 -12
- diffusers/models/unets/unet_i2vgen_xl.py +5 -13
- diffusers/models/unets/unet_kandinsky3.py +0 -4
- diffusers/models/unets/unet_motion_model.py +20 -114
- diffusers/models/unets/unet_spatio_temporal_condition.py +7 -8
- diffusers/models/unets/unet_stable_cascade.py +8 -35
- diffusers/models/unets/uvit_2d.py +1 -4
- diffusers/optimization.py +2 -2
- diffusers/pipelines/__init__.py +57 -8
- diffusers/pipelines/allegro/pipeline_allegro.py +22 -2
- diffusers/pipelines/amused/pipeline_amused.py +15 -2
- diffusers/pipelines/amused/pipeline_amused_img2img.py +15 -2
- diffusers/pipelines/amused/pipeline_amused_inpaint.py +15 -2
- diffusers/pipelines/animatediff/pipeline_animatediff.py +15 -2
- diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +15 -3
- diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +24 -4
- diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +15 -2
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +16 -4
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +16 -4
- diffusers/pipelines/audioldm/pipeline_audioldm.py +13 -2
- diffusers/pipelines/audioldm2/modeling_audioldm2.py +13 -68
- diffusers/pipelines/audioldm2/pipeline_audioldm2.py +39 -9
- diffusers/pipelines/aura_flow/pipeline_aura_flow.py +63 -7
- diffusers/pipelines/auto_pipeline.py +35 -14
- diffusers/pipelines/blip_diffusion/blip_image_processing.py +1 -1
- diffusers/pipelines/blip_diffusion/modeling_blip2.py +5 -8
- diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +12 -0
- diffusers/pipelines/cogvideo/pipeline_cogvideox.py +22 -6
- diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +22 -6
- diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +22 -5
- diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +22 -6
- diffusers/pipelines/cogview3/pipeline_cogview3plus.py +12 -4
- diffusers/pipelines/cogview4/__init__.py +49 -0
- diffusers/pipelines/cogview4/pipeline_cogview4.py +684 -0
- diffusers/pipelines/cogview4/pipeline_cogview4_control.py +732 -0
- diffusers/pipelines/cogview4/pipeline_output.py +21 -0
- diffusers/pipelines/consisid/__init__.py +49 -0
- diffusers/pipelines/consisid/consisid_utils.py +357 -0
- diffusers/pipelines/consisid/pipeline_consisid.py +974 -0
- diffusers/pipelines/consisid/pipeline_output.py +20 -0
- diffusers/pipelines/consistency_models/pipeline_consistency_models.py +11 -0
- diffusers/pipelines/controlnet/pipeline_controlnet.py +6 -5
- diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +13 -0
- diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +17 -5
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +31 -12
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +26 -7
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +20 -3
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +22 -3
- diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +26 -25
- diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +224 -109
- diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +25 -29
- diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +7 -4
- diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +3 -5
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +121 -10
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +122 -11
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +12 -1
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +20 -3
- diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py +14 -2
- diffusers/pipelines/ddim/pipeline_ddim.py +14 -1
- diffusers/pipelines/ddpm/pipeline_ddpm.py +15 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if.py +12 -0
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +12 -0
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +14 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +12 -0
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +14 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +14 -1
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +11 -7
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +11 -7
- diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +1 -1
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +10 -6
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_onnx_stable_diffusion_inpaint_legacy.py +2 -2
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +11 -7
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +1 -1
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +1 -1
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +1 -1
- diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +10 -105
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +1 -1
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +1 -1
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +1 -1
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +1 -1
- diffusers/pipelines/dit/pipeline_dit.py +15 -2
- diffusers/pipelines/easyanimate/__init__.py +52 -0
- diffusers/pipelines/easyanimate/pipeline_easyanimate.py +770 -0
- diffusers/pipelines/easyanimate/pipeline_easyanimate_control.py +994 -0
- diffusers/pipelines/easyanimate/pipeline_easyanimate_inpaint.py +1234 -0
- diffusers/pipelines/easyanimate/pipeline_output.py +20 -0
- diffusers/pipelines/flux/pipeline_flux.py +53 -21
- diffusers/pipelines/flux/pipeline_flux_control.py +9 -12
- diffusers/pipelines/flux/pipeline_flux_control_img2img.py +6 -10
- diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +8 -10
- diffusers/pipelines/flux/pipeline_flux_controlnet.py +185 -13
- diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +8 -10
- diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +16 -16
- diffusers/pipelines/flux/pipeline_flux_fill.py +107 -39
- diffusers/pipelines/flux/pipeline_flux_img2img.py +193 -15
- diffusers/pipelines/flux/pipeline_flux_inpaint.py +199 -19
- diffusers/pipelines/free_noise_utils.py +3 -3
- diffusers/pipelines/hunyuan_video/__init__.py +4 -0
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_skyreels_image2video.py +804 -0
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +90 -23
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_image2video.py +924 -0
- diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +3 -5
- diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +13 -1
- diffusers/pipelines/kandinsky/pipeline_kandinsky.py +12 -0
- diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +1 -1
- diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +12 -0
- diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +13 -1
- diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +12 -0
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +12 -1
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +13 -0
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +12 -0
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +12 -1
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +12 -1
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +12 -0
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +12 -0
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +12 -0
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +12 -0
- diffusers/pipelines/kolors/pipeline_kolors.py +10 -8
- diffusers/pipelines/kolors/pipeline_kolors_img2img.py +6 -4
- diffusers/pipelines/kolors/text_encoder.py +7 -34
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +12 -1
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +13 -1
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +14 -13
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion_superresolution.py +12 -1
- diffusers/pipelines/latte/pipeline_latte.py +36 -7
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +67 -13
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +60 -15
- diffusers/pipelines/ltx/__init__.py +2 -0
- diffusers/pipelines/ltx/pipeline_ltx.py +25 -13
- diffusers/pipelines/ltx/pipeline_ltx_condition.py +1194 -0
- diffusers/pipelines/ltx/pipeline_ltx_image2video.py +31 -17
- diffusers/pipelines/lumina/__init__.py +2 -2
- diffusers/pipelines/lumina/pipeline_lumina.py +83 -20
- diffusers/pipelines/lumina2/__init__.py +48 -0
- diffusers/pipelines/lumina2/pipeline_lumina2.py +790 -0
- diffusers/pipelines/marigold/__init__.py +2 -0
- diffusers/pipelines/marigold/marigold_image_processing.py +127 -14
- diffusers/pipelines/marigold/pipeline_marigold_depth.py +31 -16
- diffusers/pipelines/marigold/pipeline_marigold_intrinsics.py +721 -0
- diffusers/pipelines/marigold/pipeline_marigold_normals.py +31 -16
- diffusers/pipelines/mochi/pipeline_mochi.py +14 -18
- diffusers/pipelines/musicldm/pipeline_musicldm.py +16 -1
- diffusers/pipelines/omnigen/__init__.py +50 -0
- diffusers/pipelines/omnigen/pipeline_omnigen.py +512 -0
- diffusers/pipelines/omnigen/processor_omnigen.py +327 -0
- diffusers/pipelines/onnx_utils.py +5 -3
- diffusers/pipelines/pag/pag_utils.py +1 -1
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +12 -1
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +15 -4
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +20 -3
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +20 -3
- diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +1 -3
- diffusers/pipelines/pag/pipeline_pag_kolors.py +6 -4
- diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +16 -3
- diffusers/pipelines/pag/pipeline_pag_sana.py +65 -8
- diffusers/pipelines/pag/pipeline_pag_sd.py +23 -7
- diffusers/pipelines/pag/pipeline_pag_sd_3.py +3 -5
- diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py +3 -5
- diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +13 -1
- diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +23 -7
- diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py +26 -10
- diffusers/pipelines/pag/pipeline_pag_sd_xl.py +12 -4
- diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +7 -3
- diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +10 -6
- diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +13 -3
- diffusers/pipelines/pia/pipeline_pia.py +13 -1
- diffusers/pipelines/pipeline_flax_utils.py +7 -7
- diffusers/pipelines/pipeline_loading_utils.py +193 -83
- diffusers/pipelines/pipeline_utils.py +221 -106
- diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +17 -5
- diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +17 -4
- diffusers/pipelines/sana/__init__.py +2 -0
- diffusers/pipelines/sana/pipeline_sana.py +183 -58
- diffusers/pipelines/sana/pipeline_sana_sprint.py +889 -0
- diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +12 -2
- diffusers/pipelines/shap_e/pipeline_shap_e.py +12 -0
- diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +12 -0
- diffusers/pipelines/shap_e/renderer.py +6 -6
- diffusers/pipelines/stable_audio/pipeline_stable_audio.py +1 -1
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +15 -4
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +12 -8
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +12 -1
- diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +3 -2
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +14 -10
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_img2img.py +3 -3
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_inpaint.py +14 -10
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +2 -2
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py +4 -3
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +5 -4
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +2 -2
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +18 -13
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +30 -8
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +24 -10
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +28 -12
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +39 -18
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +17 -6
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +13 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +20 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +14 -2
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +13 -1
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +16 -17
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +136 -18
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +150 -21
- diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +15 -3
- diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +26 -11
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +15 -3
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +22 -4
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +30 -13
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +12 -4
- diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +15 -3
- diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +15 -3
- diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +26 -12
- diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +16 -4
- diffusers/pipelines/stable_diffusion_xl/pipeline_flax_stable_diffusion_xl.py +1 -1
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +12 -4
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +7 -3
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +10 -6
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +11 -4
- diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +13 -2
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +18 -4
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +26 -5
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +13 -1
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +13 -1
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +28 -6
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +26 -4
- diffusers/pipelines/transformers_loading_utils.py +121 -0
- diffusers/pipelines/unclip/pipeline_unclip.py +11 -1
- diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +11 -1
- diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +19 -2
- diffusers/pipelines/wan/__init__.py +51 -0
- diffusers/pipelines/wan/pipeline_output.py +20 -0
- diffusers/pipelines/wan/pipeline_wan.py +593 -0
- diffusers/pipelines/wan/pipeline_wan_i2v.py +722 -0
- diffusers/pipelines/wan/pipeline_wan_video2video.py +725 -0
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +7 -31
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +12 -1
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +12 -1
- diffusers/quantizers/auto.py +5 -1
- diffusers/quantizers/base.py +5 -9
- diffusers/quantizers/bitsandbytes/bnb_quantizer.py +41 -29
- diffusers/quantizers/bitsandbytes/utils.py +30 -20
- diffusers/quantizers/gguf/gguf_quantizer.py +1 -0
- diffusers/quantizers/gguf/utils.py +4 -2
- diffusers/quantizers/quantization_config.py +59 -4
- diffusers/quantizers/quanto/__init__.py +1 -0
- diffusers/quantizers/quanto/quanto_quantizer.py +177 -0
- diffusers/quantizers/quanto/utils.py +60 -0
- diffusers/quantizers/torchao/__init__.py +1 -1
- diffusers/quantizers/torchao/torchao_quantizer.py +47 -2
- diffusers/schedulers/__init__.py +2 -1
- diffusers/schedulers/scheduling_consistency_models.py +1 -2
- diffusers/schedulers/scheduling_ddim_inverse.py +1 -1
- diffusers/schedulers/scheduling_ddpm.py +2 -3
- diffusers/schedulers/scheduling_ddpm_parallel.py +1 -2
- diffusers/schedulers/scheduling_dpmsolver_multistep.py +12 -4
- diffusers/schedulers/scheduling_edm_euler.py +45 -10
- diffusers/schedulers/scheduling_flow_match_euler_discrete.py +116 -28
- diffusers/schedulers/scheduling_flow_match_heun_discrete.py +7 -6
- diffusers/schedulers/scheduling_heun_discrete.py +1 -1
- diffusers/schedulers/scheduling_lcm.py +1 -2
- diffusers/schedulers/scheduling_lms_discrete.py +1 -1
- diffusers/schedulers/scheduling_repaint.py +5 -1
- diffusers/schedulers/scheduling_scm.py +265 -0
- diffusers/schedulers/scheduling_tcd.py +1 -2
- diffusers/schedulers/scheduling_utils.py +2 -1
- diffusers/training_utils.py +14 -7
- diffusers/utils/__init__.py +10 -2
- diffusers/utils/constants.py +13 -1
- diffusers/utils/deprecation_utils.py +1 -1
- diffusers/utils/dummy_bitsandbytes_objects.py +17 -0
- diffusers/utils/dummy_gguf_objects.py +17 -0
- diffusers/utils/dummy_optimum_quanto_objects.py +17 -0
- diffusers/utils/dummy_pt_objects.py +233 -0
- diffusers/utils/dummy_torch_and_transformers_and_opencv_objects.py +17 -0
- diffusers/utils/dummy_torch_and_transformers_objects.py +270 -0
- diffusers/utils/dummy_torchao_objects.py +17 -0
- diffusers/utils/dynamic_modules_utils.py +1 -1
- diffusers/utils/export_utils.py +28 -3
- diffusers/utils/hub_utils.py +52 -102
- diffusers/utils/import_utils.py +121 -221
- diffusers/utils/loading_utils.py +14 -1
- diffusers/utils/logging.py +1 -2
- diffusers/utils/peft_utils.py +6 -14
- diffusers/utils/remote_utils.py +425 -0
- diffusers/utils/source_code_parsing_utils.py +52 -0
- diffusers/utils/state_dict_utils.py +15 -1
- diffusers/utils/testing_utils.py +243 -13
- diffusers/utils/torch_utils.py +10 -0
- diffusers/utils/typing_utils.py +91 -0
- diffusers/video_processor.py +1 -1
- {diffusers-0.32.1.dist-info → diffusers-0.33.0.dist-info}/METADATA +76 -44
- diffusers-0.33.0.dist-info/RECORD +608 -0
- {diffusers-0.32.1.dist-info → diffusers-0.33.0.dist-info}/WHEEL +1 -1
- diffusers-0.32.1.dist-info/RECORD +0 -550
- {diffusers-0.32.1.dist-info → diffusers-0.33.0.dist-info}/LICENSE +0 -0
- {diffusers-0.32.1.dist-info → diffusers-0.33.0.dist-info}/entry_points.txt +0 -0
- {diffusers-0.32.1.dist-info → diffusers-0.33.0.dist-info}/top_level.txt +0 -0
@@ -43,6 +43,7 @@ from ...schedulers import KarrasDiffusionSchedulers
|
|
43
43
|
from ...utils import (
|
44
44
|
PIL_INTERPOLATION,
|
45
45
|
USE_PEFT_BACKEND,
|
46
|
+
is_torch_xla_available,
|
46
47
|
logging,
|
47
48
|
replace_example_docstring,
|
48
49
|
scale_lora_layers,
|
@@ -53,8 +54,16 @@ from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
|
53
54
|
from ..stable_diffusion_xl.pipeline_output import StableDiffusionXLPipelineOutput
|
54
55
|
|
55
56
|
|
57
|
+
if is_torch_xla_available():
|
58
|
+
import torch_xla.core.xla_model as xm
|
59
|
+
|
60
|
+
XLA_AVAILABLE = True
|
61
|
+
else:
|
62
|
+
XLA_AVAILABLE = False
|
63
|
+
|
56
64
|
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
57
65
|
|
66
|
+
|
58
67
|
EXAMPLE_DOC_STRING = """
|
59
68
|
Examples:
|
60
69
|
```py
|
@@ -248,7 +257,8 @@ class StableDiffusionXLAdapterPipeline(
|
|
248
257
|
[`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`].
|
249
258
|
safety_checker ([`StableDiffusionSafetyChecker`]):
|
250
259
|
Classification module that estimates whether generated images could be considered offensive or harmful.
|
251
|
-
Please, refer to the [model card](https://huggingface.co/
|
260
|
+
Please, refer to the [model card](https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5) for
|
261
|
+
details.
|
252
262
|
feature_extractor ([`CLIPImageProcessor`]):
|
253
263
|
Model that extracts features from generated images to be used as inputs for the `safety_checker`.
|
254
264
|
"""
|
@@ -292,9 +302,13 @@ class StableDiffusionXLAdapterPipeline(
|
|
292
302
|
image_encoder=image_encoder,
|
293
303
|
)
|
294
304
|
self.register_to_config(force_zeros_for_empty_prompt=force_zeros_for_empty_prompt)
|
295
|
-
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
305
|
+
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
|
296
306
|
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
|
297
|
-
self.default_sample_size =
|
307
|
+
self.default_sample_size = (
|
308
|
+
self.unet.config.sample_size
|
309
|
+
if hasattr(self, "unet") and self.unet is not None and hasattr(self.unet.config, "sample_size")
|
310
|
+
else 128
|
311
|
+
)
|
298
312
|
|
299
313
|
# Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.encode_prompt
|
300
314
|
def encode_prompt(
|
@@ -422,7 +436,9 @@ class StableDiffusionXLAdapterPipeline(
|
|
422
436
|
prompt_embeds = text_encoder(text_input_ids.to(device), output_hidden_states=True)
|
423
437
|
|
424
438
|
# We are only ALWAYS interested in the pooled output of the final text encoder
|
425
|
-
pooled_prompt_embeds
|
439
|
+
if pooled_prompt_embeds is None and prompt_embeds[0].ndim == 2:
|
440
|
+
pooled_prompt_embeds = prompt_embeds[0]
|
441
|
+
|
426
442
|
if clip_skip is None:
|
427
443
|
prompt_embeds = prompt_embeds.hidden_states[-2]
|
428
444
|
else:
|
@@ -481,8 +497,10 @@ class StableDiffusionXLAdapterPipeline(
|
|
481
497
|
uncond_input.input_ids.to(device),
|
482
498
|
output_hidden_states=True,
|
483
499
|
)
|
500
|
+
|
484
501
|
# We are only ALWAYS interested in the pooled output of the final text encoder
|
485
|
-
negative_pooled_prompt_embeds
|
502
|
+
if negative_pooled_prompt_embeds is None and negative_prompt_embeds[0].ndim == 2:
|
503
|
+
negative_pooled_prompt_embeds = negative_prompt_embeds[0]
|
486
504
|
negative_prompt_embeds = negative_prompt_embeds.hidden_states[-2]
|
487
505
|
|
488
506
|
negative_prompt_embeds_list.append(negative_prompt_embeds)
|
@@ -1261,6 +1279,9 @@ class StableDiffusionXLAdapterPipeline(
|
|
1261
1279
|
step_idx = i // getattr(self.scheduler, "order", 1)
|
1262
1280
|
callback(step_idx, t, latents)
|
1263
1281
|
|
1282
|
+
if XLA_AVAILABLE:
|
1283
|
+
xm.mark_step()
|
1284
|
+
|
1264
1285
|
if not output_type == "latent":
|
1265
1286
|
# make sure the VAE is in float32 mode, as it overflows in float16
|
1266
1287
|
needs_upcasting = self.vae.dtype == torch.float16 and self.vae.config.force_upcast
|
@@ -25,6 +25,7 @@ from ...schedulers import KarrasDiffusionSchedulers
|
|
25
25
|
from ...utils import (
|
26
26
|
USE_PEFT_BACKEND,
|
27
27
|
deprecate,
|
28
|
+
is_torch_xla_available,
|
28
29
|
logging,
|
29
30
|
replace_example_docstring,
|
30
31
|
scale_lora_layers,
|
@@ -36,8 +37,16 @@ from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
|
36
37
|
from . import TextToVideoSDPipelineOutput
|
37
38
|
|
38
39
|
|
40
|
+
if is_torch_xla_available():
|
41
|
+
import torch_xla.core.xla_model as xm
|
42
|
+
|
43
|
+
XLA_AVAILABLE = True
|
44
|
+
else:
|
45
|
+
XLA_AVAILABLE = False
|
46
|
+
|
39
47
|
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
40
48
|
|
49
|
+
|
41
50
|
EXAMPLE_DOC_STRING = """
|
42
51
|
Examples:
|
43
52
|
```py
|
@@ -105,7 +114,7 @@ class TextToVideoSDPipeline(
|
|
105
114
|
unet=unet,
|
106
115
|
scheduler=scheduler,
|
107
116
|
)
|
108
|
-
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
117
|
+
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
|
109
118
|
self.video_processor = VideoProcessor(do_resize=False, vae_scale_factor=self.vae_scale_factor)
|
110
119
|
|
111
120
|
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt
|
@@ -627,6 +636,9 @@ class TextToVideoSDPipeline(
|
|
627
636
|
step_idx = i // getattr(self.scheduler, "order", 1)
|
628
637
|
callback(step_idx, t, latents)
|
629
638
|
|
639
|
+
if XLA_AVAILABLE:
|
640
|
+
xm.mark_step()
|
641
|
+
|
630
642
|
# 8. Post processing
|
631
643
|
if output_type == "latent":
|
632
644
|
video = latents
|
@@ -26,6 +26,7 @@ from ...schedulers import KarrasDiffusionSchedulers
|
|
26
26
|
from ...utils import (
|
27
27
|
USE_PEFT_BACKEND,
|
28
28
|
deprecate,
|
29
|
+
is_torch_xla_available,
|
29
30
|
logging,
|
30
31
|
replace_example_docstring,
|
31
32
|
scale_lora_layers,
|
@@ -37,8 +38,16 @@ from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
|
37
38
|
from . import TextToVideoSDPipelineOutput
|
38
39
|
|
39
40
|
|
41
|
+
if is_torch_xla_available():
|
42
|
+
import torch_xla.core.xla_model as xm
|
43
|
+
|
44
|
+
XLA_AVAILABLE = True
|
45
|
+
else:
|
46
|
+
XLA_AVAILABLE = False
|
47
|
+
|
40
48
|
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
41
49
|
|
50
|
+
|
42
51
|
EXAMPLE_DOC_STRING = """
|
43
52
|
Examples:
|
44
53
|
```py
|
@@ -140,7 +149,7 @@ class VideoToVideoSDPipeline(
|
|
140
149
|
unet=unet,
|
141
150
|
scheduler=scheduler,
|
142
151
|
)
|
143
|
-
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
152
|
+
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
|
144
153
|
self.video_processor = VideoProcessor(do_resize=False, vae_scale_factor=self.vae_scale_factor)
|
145
154
|
|
146
155
|
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt
|
@@ -679,6 +688,9 @@ class VideoToVideoSDPipeline(
|
|
679
688
|
step_idx = i // getattr(self.scheduler, "order", 1)
|
680
689
|
callback(step_idx, t, latents)
|
681
690
|
|
691
|
+
if XLA_AVAILABLE:
|
692
|
+
xm.mark_step()
|
693
|
+
|
682
694
|
# manually for max memory savings
|
683
695
|
if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None:
|
684
696
|
self.unet.to("cpu")
|
@@ -11,16 +11,30 @@ from torch.nn.functional import grid_sample
|
|
11
11
|
from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer
|
12
12
|
|
13
13
|
from ...image_processor import VaeImageProcessor
|
14
|
-
from ...loaders import StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
|
14
|
+
from ...loaders import FromSingleFileMixin, StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
|
15
15
|
from ...models import AutoencoderKL, UNet2DConditionModel
|
16
16
|
from ...models.lora import adjust_lora_scale_text_encoder
|
17
17
|
from ...schedulers import KarrasDiffusionSchedulers
|
18
|
-
from ...utils import
|
18
|
+
from ...utils import (
|
19
|
+
USE_PEFT_BACKEND,
|
20
|
+
BaseOutput,
|
21
|
+
is_torch_xla_available,
|
22
|
+
logging,
|
23
|
+
scale_lora_layers,
|
24
|
+
unscale_lora_layers,
|
25
|
+
)
|
19
26
|
from ...utils.torch_utils import randn_tensor
|
20
27
|
from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
21
28
|
from ..stable_diffusion import StableDiffusionSafetyChecker
|
22
29
|
|
23
30
|
|
31
|
+
if is_torch_xla_available():
|
32
|
+
import torch_xla.core.xla_model as xm
|
33
|
+
|
34
|
+
XLA_AVAILABLE = True
|
35
|
+
else:
|
36
|
+
XLA_AVAILABLE = False
|
37
|
+
|
24
38
|
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
25
39
|
|
26
40
|
|
@@ -282,7 +296,11 @@ def create_motion_field_and_warp_latents(motion_field_strength_x, motion_field_s
|
|
282
296
|
|
283
297
|
|
284
298
|
class TextToVideoZeroPipeline(
|
285
|
-
DiffusionPipeline,
|
299
|
+
DiffusionPipeline,
|
300
|
+
StableDiffusionMixin,
|
301
|
+
TextualInversionLoaderMixin,
|
302
|
+
StableDiffusionLoraLoaderMixin,
|
303
|
+
FromSingleFileMixin,
|
286
304
|
):
|
287
305
|
r"""
|
288
306
|
Pipeline for zero-shot text-to-video generation using Stable Diffusion.
|
@@ -304,8 +322,8 @@ class TextToVideoZeroPipeline(
|
|
304
322
|
[`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`].
|
305
323
|
safety_checker ([`StableDiffusionSafetyChecker`]):
|
306
324
|
Classification module that estimates whether generated images could be considered offensive or harmful.
|
307
|
-
Please refer to the [model card](https://huggingface.co/
|
308
|
-
about a model's potential harms.
|
325
|
+
Please refer to the [model card](https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5) for
|
326
|
+
more details about a model's potential harms.
|
309
327
|
feature_extractor ([`CLIPImageProcessor`]):
|
310
328
|
A [`CLIPImageProcessor`] to extract features from generated images; used as inputs to the `safety_checker`.
|
311
329
|
"""
|
@@ -340,7 +358,7 @@ class TextToVideoZeroPipeline(
|
|
340
358
|
" it only for use-cases that involve analyzing network behavior or auditing its results. For more"
|
341
359
|
" information, please have a look at https://github.com/huggingface/diffusers/pull/254 ."
|
342
360
|
)
|
343
|
-
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
361
|
+
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
|
344
362
|
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
|
345
363
|
|
346
364
|
def forward_loop(self, x_t0, t0, t1, generator):
|
@@ -440,6 +458,10 @@ class TextToVideoZeroPipeline(
|
|
440
458
|
if callback is not None and i % callback_steps == 0:
|
441
459
|
step_idx = i // getattr(self.scheduler, "order", 1)
|
442
460
|
callback(step_idx, t, latents)
|
461
|
+
|
462
|
+
if XLA_AVAILABLE:
|
463
|
+
xm.mark_step()
|
464
|
+
|
443
465
|
return latents.clone().detach()
|
444
466
|
|
445
467
|
# Copied from diffusers.pipelines.stable_diffusion_k_diffusion.pipeline_stable_diffusion_k_diffusion.StableDiffusionKDiffusionPipeline.check_inputs
|
@@ -42,6 +42,16 @@ if is_invisible_watermark_available():
|
|
42
42
|
from ..stable_diffusion_xl.watermark import StableDiffusionXLWatermarker
|
43
43
|
|
44
44
|
|
45
|
+
from ...utils import is_torch_xla_available
|
46
|
+
|
47
|
+
|
48
|
+
if is_torch_xla_available():
|
49
|
+
import torch_xla.core.xla_model as xm
|
50
|
+
|
51
|
+
XLA_AVAILABLE = True
|
52
|
+
else:
|
53
|
+
XLA_AVAILABLE = False
|
54
|
+
|
45
55
|
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
46
56
|
|
47
57
|
|
@@ -409,10 +419,14 @@ class TextToVideoZeroSDXLPipeline(
|
|
409
419
|
feature_extractor=feature_extractor,
|
410
420
|
)
|
411
421
|
self.register_to_config(force_zeros_for_empty_prompt=force_zeros_for_empty_prompt)
|
412
|
-
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
422
|
+
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
|
413
423
|
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
|
414
424
|
|
415
|
-
self.default_sample_size =
|
425
|
+
self.default_sample_size = (
|
426
|
+
self.unet.config.sample_size
|
427
|
+
if hasattr(self, "unet") and self.unet is not None and hasattr(self.unet.config, "sample_size")
|
428
|
+
else 128
|
429
|
+
)
|
416
430
|
|
417
431
|
add_watermarker = add_watermarker if add_watermarker is not None else is_invisible_watermark_available()
|
418
432
|
|
@@ -705,7 +719,9 @@ class TextToVideoZeroSDXLPipeline(
|
|
705
719
|
prompt_embeds = text_encoder(text_input_ids.to(device), output_hidden_states=True)
|
706
720
|
|
707
721
|
# We are only ALWAYS interested in the pooled output of the final text encoder
|
708
|
-
pooled_prompt_embeds
|
722
|
+
if pooled_prompt_embeds is None and prompt_embeds[0].ndim == 2:
|
723
|
+
pooled_prompt_embeds = prompt_embeds[0]
|
724
|
+
|
709
725
|
if clip_skip is None:
|
710
726
|
prompt_embeds = prompt_embeds.hidden_states[-2]
|
711
727
|
else:
|
@@ -764,8 +780,10 @@ class TextToVideoZeroSDXLPipeline(
|
|
764
780
|
uncond_input.input_ids.to(device),
|
765
781
|
output_hidden_states=True,
|
766
782
|
)
|
783
|
+
|
767
784
|
# We are only ALWAYS interested in the pooled output of the final text encoder
|
768
|
-
negative_pooled_prompt_embeds
|
785
|
+
if negative_pooled_prompt_embeds is None and negative_prompt_embeds[0].ndim == 2:
|
786
|
+
negative_pooled_prompt_embeds = negative_prompt_embeds[0]
|
769
787
|
negative_prompt_embeds = negative_prompt_embeds.hidden_states[-2]
|
770
788
|
|
771
789
|
negative_prompt_embeds_list.append(negative_prompt_embeds)
|
@@ -922,6 +940,10 @@ class TextToVideoZeroSDXLPipeline(
|
|
922
940
|
progress_bar.update()
|
923
941
|
if callback is not None and i % callback_steps == 0:
|
924
942
|
callback(i, t, latents)
|
943
|
+
|
944
|
+
if XLA_AVAILABLE:
|
945
|
+
xm.mark_step()
|
946
|
+
|
925
947
|
return latents.clone().detach()
|
926
948
|
|
927
949
|
@torch.no_grad()
|
@@ -0,0 +1,121 @@
|
|
1
|
+
# coding=utf-8
|
2
|
+
# Copyright 2025 The HuggingFace Inc. team.
|
3
|
+
#
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
# you may not use this file except in compliance with the License.
|
6
|
+
# You may obtain a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
# See the License for the specific language governing permissions and
|
14
|
+
# limitations under the License.
|
15
|
+
import contextlib
|
16
|
+
import os
|
17
|
+
import tempfile
|
18
|
+
from typing import TYPE_CHECKING, Dict
|
19
|
+
|
20
|
+
from huggingface_hub import DDUFEntry
|
21
|
+
from tqdm import tqdm
|
22
|
+
|
23
|
+
from ..utils import is_safetensors_available, is_transformers_available, is_transformers_version
|
24
|
+
|
25
|
+
|
26
|
+
if TYPE_CHECKING:
|
27
|
+
from transformers import PreTrainedModel, PreTrainedTokenizer
|
28
|
+
|
29
|
+
if is_transformers_available():
|
30
|
+
from transformers import PreTrainedModel, PreTrainedTokenizer
|
31
|
+
|
32
|
+
if is_safetensors_available():
|
33
|
+
import safetensors.torch
|
34
|
+
|
35
|
+
|
36
|
+
def _load_tokenizer_from_dduf(
|
37
|
+
cls: "PreTrainedTokenizer", name: str, dduf_entries: Dict[str, DDUFEntry], **kwargs
|
38
|
+
) -> "PreTrainedTokenizer":
|
39
|
+
"""
|
40
|
+
Load a tokenizer from a DDUF archive.
|
41
|
+
|
42
|
+
In practice, `transformers` do not provide a way to load a tokenizer from a DDUF archive. This function is a
|
43
|
+
workaround by extracting the tokenizer files from the DDUF archive and loading the tokenizer from the extracted
|
44
|
+
files. There is an extra cost of extracting the files, but of limited impact as the tokenizer files are usually
|
45
|
+
small-ish.
|
46
|
+
"""
|
47
|
+
with tempfile.TemporaryDirectory() as tmp_dir:
|
48
|
+
for entry_name, entry in dduf_entries.items():
|
49
|
+
if entry_name.startswith(name + "/"):
|
50
|
+
tmp_entry_path = os.path.join(tmp_dir, *entry_name.split("/"))
|
51
|
+
# need to create intermediary directory if they don't exist
|
52
|
+
os.makedirs(os.path.dirname(tmp_entry_path), exist_ok=True)
|
53
|
+
with open(tmp_entry_path, "wb") as f:
|
54
|
+
with entry.as_mmap() as mm:
|
55
|
+
f.write(mm)
|
56
|
+
return cls.from_pretrained(os.path.dirname(tmp_entry_path), **kwargs)
|
57
|
+
|
58
|
+
|
59
|
+
def _load_transformers_model_from_dduf(
|
60
|
+
cls: "PreTrainedModel", name: str, dduf_entries: Dict[str, DDUFEntry], **kwargs
|
61
|
+
) -> "PreTrainedModel":
|
62
|
+
"""
|
63
|
+
Load a transformers model from a DDUF archive.
|
64
|
+
|
65
|
+
In practice, `transformers` do not provide a way to load a model from a DDUF archive. This function is a workaround
|
66
|
+
by instantiating a model from the config file and loading the weights from the DDUF archive directly.
|
67
|
+
"""
|
68
|
+
config_file = dduf_entries.get(f"{name}/config.json")
|
69
|
+
if config_file is None:
|
70
|
+
raise EnvironmentError(
|
71
|
+
f"Could not find a config.json file for component {name} in DDUF file (contains {dduf_entries.keys()})."
|
72
|
+
)
|
73
|
+
generation_config = dduf_entries.get(f"{name}/generation_config.json", None)
|
74
|
+
|
75
|
+
weight_files = [
|
76
|
+
entry
|
77
|
+
for entry_name, entry in dduf_entries.items()
|
78
|
+
if entry_name.startswith(f"{name}/") and entry_name.endswith(".safetensors")
|
79
|
+
]
|
80
|
+
if not weight_files:
|
81
|
+
raise EnvironmentError(
|
82
|
+
f"Could not find any weight file for component {name} in DDUF file (contains {dduf_entries.keys()})."
|
83
|
+
)
|
84
|
+
if not is_safetensors_available():
|
85
|
+
raise EnvironmentError(
|
86
|
+
"Safetensors is not available, cannot load model from DDUF. Please `pip install safetensors`."
|
87
|
+
)
|
88
|
+
if is_transformers_version("<", "4.47.0"):
|
89
|
+
raise ImportError(
|
90
|
+
"You need to install `transformers>4.47.0` in order to load a transformers model from a DDUF file. "
|
91
|
+
"You can install it with: `pip install --upgrade transformers`"
|
92
|
+
)
|
93
|
+
|
94
|
+
with tempfile.TemporaryDirectory() as tmp_dir:
|
95
|
+
from transformers import AutoConfig, GenerationConfig
|
96
|
+
|
97
|
+
tmp_config_file = os.path.join(tmp_dir, "config.json")
|
98
|
+
with open(tmp_config_file, "w") as f:
|
99
|
+
f.write(config_file.read_text())
|
100
|
+
config = AutoConfig.from_pretrained(tmp_config_file)
|
101
|
+
if generation_config is not None:
|
102
|
+
tmp_generation_config_file = os.path.join(tmp_dir, "generation_config.json")
|
103
|
+
with open(tmp_generation_config_file, "w") as f:
|
104
|
+
f.write(generation_config.read_text())
|
105
|
+
generation_config = GenerationConfig.from_pretrained(tmp_generation_config_file)
|
106
|
+
state_dict = {}
|
107
|
+
with contextlib.ExitStack() as stack:
|
108
|
+
for entry in tqdm(weight_files, desc="Loading state_dict"): # Loop over safetensors files
|
109
|
+
# Memory-map the safetensors file
|
110
|
+
mmap = stack.enter_context(entry.as_mmap())
|
111
|
+
# Load tensors from the memory-mapped file
|
112
|
+
tensors = safetensors.torch.load(mmap)
|
113
|
+
# Update the state dictionary with tensors
|
114
|
+
state_dict.update(tensors)
|
115
|
+
return cls.from_pretrained(
|
116
|
+
pretrained_model_name_or_path=None,
|
117
|
+
config=config,
|
118
|
+
generation_config=generation_config,
|
119
|
+
state_dict=state_dict,
|
120
|
+
**kwargs,
|
121
|
+
)
|
@@ -22,12 +22,19 @@ from transformers.models.clip.modeling_clip import CLIPTextModelOutput
|
|
22
22
|
|
23
23
|
from ...models import PriorTransformer, UNet2DConditionModel, UNet2DModel
|
24
24
|
from ...schedulers import UnCLIPScheduler
|
25
|
-
from ...utils import logging
|
25
|
+
from ...utils import is_torch_xla_available, logging
|
26
26
|
from ...utils.torch_utils import randn_tensor
|
27
27
|
from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
|
28
28
|
from .text_proj import UnCLIPTextProjModel
|
29
29
|
|
30
30
|
|
31
|
+
if is_torch_xla_available():
|
32
|
+
import torch_xla.core.xla_model as xm
|
33
|
+
|
34
|
+
XLA_AVAILABLE = True
|
35
|
+
else:
|
36
|
+
XLA_AVAILABLE = False
|
37
|
+
|
31
38
|
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
32
39
|
|
33
40
|
|
@@ -474,6 +481,9 @@ class UnCLIPPipeline(DiffusionPipeline):
|
|
474
481
|
noise_pred, t, super_res_latents, prev_timestep=prev_timestep, generator=generator
|
475
482
|
).prev_sample
|
476
483
|
|
484
|
+
if XLA_AVAILABLE:
|
485
|
+
xm.mark_step()
|
486
|
+
|
477
487
|
image = super_res_latents
|
478
488
|
# done super res
|
479
489
|
|
@@ -27,12 +27,19 @@ from transformers import (
|
|
27
27
|
|
28
28
|
from ...models import UNet2DConditionModel, UNet2DModel
|
29
29
|
from ...schedulers import UnCLIPScheduler
|
30
|
-
from ...utils import logging
|
30
|
+
from ...utils import is_torch_xla_available, logging
|
31
31
|
from ...utils.torch_utils import randn_tensor
|
32
32
|
from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
|
33
33
|
from .text_proj import UnCLIPTextProjModel
|
34
34
|
|
35
35
|
|
36
|
+
if is_torch_xla_available():
|
37
|
+
import torch_xla.core.xla_model as xm
|
38
|
+
|
39
|
+
XLA_AVAILABLE = True
|
40
|
+
else:
|
41
|
+
XLA_AVAILABLE = False
|
42
|
+
|
36
43
|
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
37
44
|
|
38
45
|
|
@@ -400,6 +407,9 @@ class UnCLIPImageVariationPipeline(DiffusionPipeline):
|
|
400
407
|
noise_pred, t, super_res_latents, prev_timestep=prev_timestep, generator=generator
|
401
408
|
).prev_sample
|
402
409
|
|
410
|
+
if XLA_AVAILABLE:
|
411
|
+
xm.mark_step()
|
412
|
+
|
403
413
|
image = super_res_latents
|
404
414
|
|
405
415
|
# done super res
|
@@ -18,7 +18,14 @@ from ...loaders import StableDiffusionLoraLoaderMixin, TextualInversionLoaderMix
|
|
18
18
|
from ...models import AutoencoderKL
|
19
19
|
from ...models.lora import adjust_lora_scale_text_encoder
|
20
20
|
from ...schedulers import KarrasDiffusionSchedulers
|
21
|
-
from ...utils import
|
21
|
+
from ...utils import (
|
22
|
+
USE_PEFT_BACKEND,
|
23
|
+
deprecate,
|
24
|
+
is_torch_xla_available,
|
25
|
+
logging,
|
26
|
+
scale_lora_layers,
|
27
|
+
unscale_lora_layers,
|
28
|
+
)
|
22
29
|
from ...utils.outputs import BaseOutput
|
23
30
|
from ...utils.torch_utils import randn_tensor
|
24
31
|
from ..pipeline_utils import DiffusionPipeline
|
@@ -26,6 +33,13 @@ from .modeling_text_decoder import UniDiffuserTextDecoder
|
|
26
33
|
from .modeling_uvit import UniDiffuserModel
|
27
34
|
|
28
35
|
|
36
|
+
if is_torch_xla_available():
|
37
|
+
import torch_xla.core.xla_model as xm
|
38
|
+
|
39
|
+
XLA_AVAILABLE = True
|
40
|
+
else:
|
41
|
+
XLA_AVAILABLE = False
|
42
|
+
|
29
43
|
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
30
44
|
|
31
45
|
|
@@ -117,7 +131,7 @@ class UniDiffuserPipeline(DiffusionPipeline):
|
|
117
131
|
scheduler=scheduler,
|
118
132
|
)
|
119
133
|
|
120
|
-
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
134
|
+
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
|
121
135
|
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
|
122
136
|
|
123
137
|
self.num_channels_latents = vae.config.latent_channels
|
@@ -1378,6 +1392,9 @@ class UniDiffuserPipeline(DiffusionPipeline):
|
|
1378
1392
|
step_idx = i // getattr(self.scheduler, "order", 1)
|
1379
1393
|
callback(step_idx, t, latents)
|
1380
1394
|
|
1395
|
+
if XLA_AVAILABLE:
|
1396
|
+
xm.mark_step()
|
1397
|
+
|
1381
1398
|
# 9. Post-processing
|
1382
1399
|
image = None
|
1383
1400
|
text = None
|
@@ -0,0 +1,51 @@
|
|
1
|
+
from typing import TYPE_CHECKING
|
2
|
+
|
3
|
+
from ...utils import (
|
4
|
+
DIFFUSERS_SLOW_IMPORT,
|
5
|
+
OptionalDependencyNotAvailable,
|
6
|
+
_LazyModule,
|
7
|
+
get_objects_from_module,
|
8
|
+
is_torch_available,
|
9
|
+
is_transformers_available,
|
10
|
+
)
|
11
|
+
|
12
|
+
|
13
|
+
_dummy_objects = {}
|
14
|
+
_import_structure = {}
|
15
|
+
|
16
|
+
|
17
|
+
try:
|
18
|
+
if not (is_transformers_available() and is_torch_available()):
|
19
|
+
raise OptionalDependencyNotAvailable()
|
20
|
+
except OptionalDependencyNotAvailable:
|
21
|
+
from ...utils import dummy_torch_and_transformers_objects # noqa F403
|
22
|
+
|
23
|
+
_dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
|
24
|
+
else:
|
25
|
+
_import_structure["pipeline_wan"] = ["WanPipeline"]
|
26
|
+
_import_structure["pipeline_wan_i2v"] = ["WanImageToVideoPipeline"]
|
27
|
+
_import_structure["pipeline_wan_video2video"] = ["WanVideoToVideoPipeline"]
|
28
|
+
if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
29
|
+
try:
|
30
|
+
if not (is_transformers_available() and is_torch_available()):
|
31
|
+
raise OptionalDependencyNotAvailable()
|
32
|
+
|
33
|
+
except OptionalDependencyNotAvailable:
|
34
|
+
from ...utils.dummy_torch_and_transformers_objects import *
|
35
|
+
else:
|
36
|
+
from .pipeline_wan import WanPipeline
|
37
|
+
from .pipeline_wan_i2v import WanImageToVideoPipeline
|
38
|
+
from .pipeline_wan_video2video import WanVideoToVideoPipeline
|
39
|
+
|
40
|
+
else:
|
41
|
+
import sys
|
42
|
+
|
43
|
+
sys.modules[__name__] = _LazyModule(
|
44
|
+
__name__,
|
45
|
+
globals()["__file__"],
|
46
|
+
_import_structure,
|
47
|
+
module_spec=__spec__,
|
48
|
+
)
|
49
|
+
|
50
|
+
for name, value in _dummy_objects.items():
|
51
|
+
setattr(sys.modules[__name__], name, value)
|
@@ -0,0 +1,20 @@
|
|
1
|
+
from dataclasses import dataclass
|
2
|
+
|
3
|
+
import torch
|
4
|
+
|
5
|
+
from diffusers.utils import BaseOutput
|
6
|
+
|
7
|
+
|
8
|
+
@dataclass
|
9
|
+
class WanPipelineOutput(BaseOutput):
|
10
|
+
r"""
|
11
|
+
Output class for Wan pipelines.
|
12
|
+
|
13
|
+
Args:
|
14
|
+
frames (`torch.Tensor`, `np.ndarray`, or List[List[PIL.Image.Image]]):
|
15
|
+
List of video outputs - It can be a nested list of length `batch_size,` with each sub-list containing
|
16
|
+
denoised PIL image sequences of length `num_frames.` It can also be a NumPy array or Torch tensor of shape
|
17
|
+
`(batch_size, num_frames, channels, height, width)`.
|
18
|
+
"""
|
19
|
+
|
20
|
+
frames: torch.Tensor
|