diffusers 0.32.2__py3-none-any.whl → 0.33.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diffusers/__init__.py +186 -3
- diffusers/configuration_utils.py +40 -12
- diffusers/dependency_versions_table.py +9 -2
- diffusers/hooks/__init__.py +9 -0
- diffusers/hooks/faster_cache.py +653 -0
- diffusers/hooks/group_offloading.py +793 -0
- diffusers/hooks/hooks.py +236 -0
- diffusers/hooks/layerwise_casting.py +245 -0
- diffusers/hooks/pyramid_attention_broadcast.py +311 -0
- diffusers/loaders/__init__.py +6 -0
- diffusers/loaders/ip_adapter.py +38 -30
- diffusers/loaders/lora_base.py +121 -86
- diffusers/loaders/lora_conversion_utils.py +504 -44
- diffusers/loaders/lora_pipeline.py +1769 -181
- diffusers/loaders/peft.py +167 -57
- diffusers/loaders/single_file.py +17 -2
- diffusers/loaders/single_file_model.py +53 -5
- diffusers/loaders/single_file_utils.py +646 -72
- diffusers/loaders/textual_inversion.py +9 -9
- diffusers/loaders/transformer_flux.py +8 -9
- diffusers/loaders/transformer_sd3.py +120 -39
- diffusers/loaders/unet.py +20 -7
- diffusers/models/__init__.py +22 -0
- diffusers/models/activations.py +9 -9
- diffusers/models/attention.py +0 -1
- diffusers/models/attention_processor.py +163 -25
- diffusers/models/auto_model.py +169 -0
- diffusers/models/autoencoders/__init__.py +2 -0
- diffusers/models/autoencoders/autoencoder_asym_kl.py +2 -0
- diffusers/models/autoencoders/autoencoder_dc.py +106 -4
- diffusers/models/autoencoders/autoencoder_kl.py +0 -4
- diffusers/models/autoencoders/autoencoder_kl_allegro.py +5 -23
- diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +17 -55
- diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py +17 -97
- diffusers/models/autoencoders/autoencoder_kl_ltx.py +326 -107
- diffusers/models/autoencoders/autoencoder_kl_magvit.py +1094 -0
- diffusers/models/autoencoders/autoencoder_kl_mochi.py +21 -56
- diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +11 -42
- diffusers/models/autoencoders/autoencoder_kl_wan.py +855 -0
- diffusers/models/autoencoders/autoencoder_oobleck.py +1 -0
- diffusers/models/autoencoders/autoencoder_tiny.py +0 -4
- diffusers/models/autoencoders/consistency_decoder_vae.py +3 -1
- diffusers/models/autoencoders/vae.py +31 -141
- diffusers/models/autoencoders/vq_model.py +3 -0
- diffusers/models/cache_utils.py +108 -0
- diffusers/models/controlnets/__init__.py +1 -0
- diffusers/models/controlnets/controlnet.py +3 -8
- diffusers/models/controlnets/controlnet_flux.py +14 -42
- diffusers/models/controlnets/controlnet_sd3.py +58 -34
- diffusers/models/controlnets/controlnet_sparsectrl.py +4 -7
- diffusers/models/controlnets/controlnet_union.py +27 -18
- diffusers/models/controlnets/controlnet_xs.py +7 -46
- diffusers/models/controlnets/multicontrolnet_union.py +196 -0
- diffusers/models/embeddings.py +18 -7
- diffusers/models/model_loading_utils.py +122 -80
- diffusers/models/modeling_flax_pytorch_utils.py +1 -1
- diffusers/models/modeling_flax_utils.py +1 -1
- diffusers/models/modeling_pytorch_flax_utils.py +1 -1
- diffusers/models/modeling_utils.py +617 -272
- diffusers/models/normalization.py +67 -14
- diffusers/models/resnet.py +1 -1
- diffusers/models/transformers/__init__.py +6 -0
- diffusers/models/transformers/auraflow_transformer_2d.py +9 -35
- diffusers/models/transformers/cogvideox_transformer_3d.py +13 -24
- diffusers/models/transformers/consisid_transformer_3d.py +789 -0
- diffusers/models/transformers/dit_transformer_2d.py +5 -19
- diffusers/models/transformers/hunyuan_transformer_2d.py +4 -3
- diffusers/models/transformers/latte_transformer_3d.py +20 -15
- diffusers/models/transformers/lumina_nextdit2d.py +3 -1
- diffusers/models/transformers/pixart_transformer_2d.py +4 -19
- diffusers/models/transformers/prior_transformer.py +5 -1
- diffusers/models/transformers/sana_transformer.py +144 -40
- diffusers/models/transformers/stable_audio_transformer.py +5 -20
- diffusers/models/transformers/transformer_2d.py +7 -22
- diffusers/models/transformers/transformer_allegro.py +9 -17
- diffusers/models/transformers/transformer_cogview3plus.py +6 -17
- diffusers/models/transformers/transformer_cogview4.py +462 -0
- diffusers/models/transformers/transformer_easyanimate.py +527 -0
- diffusers/models/transformers/transformer_flux.py +68 -110
- diffusers/models/transformers/transformer_hunyuan_video.py +404 -46
- diffusers/models/transformers/transformer_ltx.py +53 -35
- diffusers/models/transformers/transformer_lumina2.py +548 -0
- diffusers/models/transformers/transformer_mochi.py +6 -17
- diffusers/models/transformers/transformer_omnigen.py +469 -0
- diffusers/models/transformers/transformer_sd3.py +56 -86
- diffusers/models/transformers/transformer_temporal.py +5 -11
- diffusers/models/transformers/transformer_wan.py +469 -0
- diffusers/models/unets/unet_1d.py +3 -1
- diffusers/models/unets/unet_2d.py +21 -20
- diffusers/models/unets/unet_2d_blocks.py +19 -243
- diffusers/models/unets/unet_2d_condition.py +4 -6
- diffusers/models/unets/unet_3d_blocks.py +14 -127
- diffusers/models/unets/unet_3d_condition.py +8 -12
- diffusers/models/unets/unet_i2vgen_xl.py +5 -13
- diffusers/models/unets/unet_kandinsky3.py +0 -4
- diffusers/models/unets/unet_motion_model.py +20 -114
- diffusers/models/unets/unet_spatio_temporal_condition.py +7 -8
- diffusers/models/unets/unet_stable_cascade.py +8 -35
- diffusers/models/unets/uvit_2d.py +1 -4
- diffusers/optimization.py +2 -2
- diffusers/pipelines/__init__.py +57 -8
- diffusers/pipelines/allegro/pipeline_allegro.py +22 -2
- diffusers/pipelines/amused/pipeline_amused.py +15 -2
- diffusers/pipelines/amused/pipeline_amused_img2img.py +15 -2
- diffusers/pipelines/amused/pipeline_amused_inpaint.py +15 -2
- diffusers/pipelines/animatediff/pipeline_animatediff.py +15 -2
- diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +15 -3
- diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +24 -4
- diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +15 -2
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +16 -4
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +16 -4
- diffusers/pipelines/audioldm/pipeline_audioldm.py +13 -2
- diffusers/pipelines/audioldm2/modeling_audioldm2.py +13 -68
- diffusers/pipelines/audioldm2/pipeline_audioldm2.py +39 -9
- diffusers/pipelines/aura_flow/pipeline_aura_flow.py +63 -7
- diffusers/pipelines/auto_pipeline.py +35 -14
- diffusers/pipelines/blip_diffusion/blip_image_processing.py +1 -1
- diffusers/pipelines/blip_diffusion/modeling_blip2.py +5 -8
- diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +12 -0
- diffusers/pipelines/cogvideo/pipeline_cogvideox.py +22 -6
- diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +22 -6
- diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +22 -5
- diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +22 -6
- diffusers/pipelines/cogview3/pipeline_cogview3plus.py +12 -4
- diffusers/pipelines/cogview4/__init__.py +49 -0
- diffusers/pipelines/cogview4/pipeline_cogview4.py +684 -0
- diffusers/pipelines/cogview4/pipeline_cogview4_control.py +732 -0
- diffusers/pipelines/cogview4/pipeline_output.py +21 -0
- diffusers/pipelines/consisid/__init__.py +49 -0
- diffusers/pipelines/consisid/consisid_utils.py +357 -0
- diffusers/pipelines/consisid/pipeline_consisid.py +974 -0
- diffusers/pipelines/consisid/pipeline_output.py +20 -0
- diffusers/pipelines/consistency_models/pipeline_consistency_models.py +11 -0
- diffusers/pipelines/controlnet/pipeline_controlnet.py +6 -5
- diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +13 -0
- diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +17 -5
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +31 -12
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +26 -7
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +20 -3
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +22 -3
- diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +26 -25
- diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +224 -109
- diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +25 -29
- diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +7 -4
- diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +3 -5
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +121 -10
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +122 -11
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +12 -1
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +20 -3
- diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py +14 -2
- diffusers/pipelines/ddim/pipeline_ddim.py +14 -1
- diffusers/pipelines/ddpm/pipeline_ddpm.py +15 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if.py +12 -0
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +12 -0
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +14 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +12 -0
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +14 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +14 -1
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +11 -7
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +11 -7
- diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +1 -1
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +10 -6
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_onnx_stable_diffusion_inpaint_legacy.py +2 -2
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +11 -7
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +1 -1
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +1 -1
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +1 -1
- diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +10 -105
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +1 -1
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +1 -1
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +1 -1
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +1 -1
- diffusers/pipelines/dit/pipeline_dit.py +15 -2
- diffusers/pipelines/easyanimate/__init__.py +52 -0
- diffusers/pipelines/easyanimate/pipeline_easyanimate.py +770 -0
- diffusers/pipelines/easyanimate/pipeline_easyanimate_control.py +994 -0
- diffusers/pipelines/easyanimate/pipeline_easyanimate_inpaint.py +1234 -0
- diffusers/pipelines/easyanimate/pipeline_output.py +20 -0
- diffusers/pipelines/flux/pipeline_flux.py +53 -21
- diffusers/pipelines/flux/pipeline_flux_control.py +9 -12
- diffusers/pipelines/flux/pipeline_flux_control_img2img.py +6 -10
- diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +8 -10
- diffusers/pipelines/flux/pipeline_flux_controlnet.py +185 -13
- diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +8 -10
- diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +16 -16
- diffusers/pipelines/flux/pipeline_flux_fill.py +107 -39
- diffusers/pipelines/flux/pipeline_flux_img2img.py +193 -15
- diffusers/pipelines/flux/pipeline_flux_inpaint.py +199 -19
- diffusers/pipelines/free_noise_utils.py +3 -3
- diffusers/pipelines/hunyuan_video/__init__.py +4 -0
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_skyreels_image2video.py +804 -0
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +90 -23
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_image2video.py +924 -0
- diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +3 -5
- diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +13 -1
- diffusers/pipelines/kandinsky/pipeline_kandinsky.py +12 -0
- diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +1 -1
- diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +12 -0
- diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +13 -1
- diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +12 -0
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +12 -1
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +13 -0
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +12 -0
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +12 -1
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +12 -1
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +12 -0
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +12 -0
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +12 -0
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +12 -0
- diffusers/pipelines/kolors/pipeline_kolors.py +10 -8
- diffusers/pipelines/kolors/pipeline_kolors_img2img.py +6 -4
- diffusers/pipelines/kolors/text_encoder.py +7 -34
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +12 -1
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +13 -1
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +14 -13
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion_superresolution.py +12 -1
- diffusers/pipelines/latte/pipeline_latte.py +36 -7
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +67 -13
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +60 -15
- diffusers/pipelines/ltx/__init__.py +2 -0
- diffusers/pipelines/ltx/pipeline_ltx.py +25 -13
- diffusers/pipelines/ltx/pipeline_ltx_condition.py +1194 -0
- diffusers/pipelines/ltx/pipeline_ltx_image2video.py +31 -17
- diffusers/pipelines/lumina/__init__.py +2 -2
- diffusers/pipelines/lumina/pipeline_lumina.py +83 -20
- diffusers/pipelines/lumina2/__init__.py +48 -0
- diffusers/pipelines/lumina2/pipeline_lumina2.py +790 -0
- diffusers/pipelines/marigold/__init__.py +2 -0
- diffusers/pipelines/marigold/marigold_image_processing.py +127 -14
- diffusers/pipelines/marigold/pipeline_marigold_depth.py +31 -16
- diffusers/pipelines/marigold/pipeline_marigold_intrinsics.py +721 -0
- diffusers/pipelines/marigold/pipeline_marigold_normals.py +31 -16
- diffusers/pipelines/mochi/pipeline_mochi.py +14 -18
- diffusers/pipelines/musicldm/pipeline_musicldm.py +16 -1
- diffusers/pipelines/omnigen/__init__.py +50 -0
- diffusers/pipelines/omnigen/pipeline_omnigen.py +512 -0
- diffusers/pipelines/omnigen/processor_omnigen.py +327 -0
- diffusers/pipelines/onnx_utils.py +5 -3
- diffusers/pipelines/pag/pag_utils.py +1 -1
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +12 -1
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +15 -4
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +20 -3
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +20 -3
- diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +1 -3
- diffusers/pipelines/pag/pipeline_pag_kolors.py +6 -4
- diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +16 -3
- diffusers/pipelines/pag/pipeline_pag_sana.py +65 -8
- diffusers/pipelines/pag/pipeline_pag_sd.py +23 -7
- diffusers/pipelines/pag/pipeline_pag_sd_3.py +3 -5
- diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py +3 -5
- diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +13 -1
- diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +23 -7
- diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py +26 -10
- diffusers/pipelines/pag/pipeline_pag_sd_xl.py +12 -4
- diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +7 -3
- diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +10 -6
- diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +13 -3
- diffusers/pipelines/pia/pipeline_pia.py +13 -1
- diffusers/pipelines/pipeline_flax_utils.py +7 -7
- diffusers/pipelines/pipeline_loading_utils.py +193 -83
- diffusers/pipelines/pipeline_utils.py +221 -106
- diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +17 -5
- diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +17 -4
- diffusers/pipelines/sana/__init__.py +2 -0
- diffusers/pipelines/sana/pipeline_sana.py +183 -58
- diffusers/pipelines/sana/pipeline_sana_sprint.py +889 -0
- diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +12 -2
- diffusers/pipelines/shap_e/pipeline_shap_e.py +12 -0
- diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +12 -0
- diffusers/pipelines/shap_e/renderer.py +6 -6
- diffusers/pipelines/stable_audio/pipeline_stable_audio.py +1 -1
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +15 -4
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +12 -8
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +12 -1
- diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +3 -2
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +14 -10
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_img2img.py +3 -3
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_inpaint.py +14 -10
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +2 -2
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py +4 -3
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +5 -4
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +2 -2
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +18 -13
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +30 -8
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +24 -10
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +28 -12
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +39 -18
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +17 -6
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +13 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +20 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +14 -2
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +13 -1
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +16 -17
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +136 -18
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +150 -21
- diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +15 -3
- diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +26 -11
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +15 -3
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +22 -4
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +30 -13
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +12 -4
- diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +15 -3
- diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +15 -3
- diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +26 -12
- diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +16 -4
- diffusers/pipelines/stable_diffusion_xl/pipeline_flax_stable_diffusion_xl.py +1 -1
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +12 -4
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +7 -3
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +10 -6
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +11 -4
- diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +13 -2
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +18 -4
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +26 -5
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +13 -1
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +13 -1
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +28 -6
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +26 -4
- diffusers/pipelines/transformers_loading_utils.py +121 -0
- diffusers/pipelines/unclip/pipeline_unclip.py +11 -1
- diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +11 -1
- diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +19 -2
- diffusers/pipelines/wan/__init__.py +51 -0
- diffusers/pipelines/wan/pipeline_output.py +20 -0
- diffusers/pipelines/wan/pipeline_wan.py +595 -0
- diffusers/pipelines/wan/pipeline_wan_i2v.py +724 -0
- diffusers/pipelines/wan/pipeline_wan_video2video.py +727 -0
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +7 -31
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +12 -1
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +12 -1
- diffusers/quantizers/auto.py +5 -1
- diffusers/quantizers/base.py +5 -9
- diffusers/quantizers/bitsandbytes/bnb_quantizer.py +41 -29
- diffusers/quantizers/bitsandbytes/utils.py +30 -20
- diffusers/quantizers/gguf/gguf_quantizer.py +1 -0
- diffusers/quantizers/gguf/utils.py +4 -2
- diffusers/quantizers/quantization_config.py +59 -4
- diffusers/quantizers/quanto/__init__.py +1 -0
- diffusers/quantizers/quanto/quanto_quantizer.py +177 -0
- diffusers/quantizers/quanto/utils.py +60 -0
- diffusers/quantizers/torchao/__init__.py +1 -1
- diffusers/quantizers/torchao/torchao_quantizer.py +47 -2
- diffusers/schedulers/__init__.py +2 -1
- diffusers/schedulers/scheduling_consistency_models.py +1 -2
- diffusers/schedulers/scheduling_ddim_inverse.py +1 -1
- diffusers/schedulers/scheduling_ddpm.py +2 -3
- diffusers/schedulers/scheduling_ddpm_parallel.py +1 -2
- diffusers/schedulers/scheduling_dpmsolver_multistep.py +12 -4
- diffusers/schedulers/scheduling_edm_euler.py +45 -10
- diffusers/schedulers/scheduling_flow_match_euler_discrete.py +116 -28
- diffusers/schedulers/scheduling_flow_match_heun_discrete.py +7 -6
- diffusers/schedulers/scheduling_heun_discrete.py +1 -1
- diffusers/schedulers/scheduling_lcm.py +1 -2
- diffusers/schedulers/scheduling_lms_discrete.py +1 -1
- diffusers/schedulers/scheduling_repaint.py +5 -1
- diffusers/schedulers/scheduling_scm.py +265 -0
- diffusers/schedulers/scheduling_tcd.py +1 -2
- diffusers/schedulers/scheduling_utils.py +2 -1
- diffusers/training_utils.py +14 -7
- diffusers/utils/__init__.py +9 -1
- diffusers/utils/constants.py +13 -1
- diffusers/utils/deprecation_utils.py +1 -1
- diffusers/utils/dummy_bitsandbytes_objects.py +17 -0
- diffusers/utils/dummy_gguf_objects.py +17 -0
- diffusers/utils/dummy_optimum_quanto_objects.py +17 -0
- diffusers/utils/dummy_pt_objects.py +233 -0
- diffusers/utils/dummy_torch_and_transformers_and_opencv_objects.py +17 -0
- diffusers/utils/dummy_torch_and_transformers_objects.py +270 -0
- diffusers/utils/dummy_torchao_objects.py +17 -0
- diffusers/utils/dynamic_modules_utils.py +1 -1
- diffusers/utils/export_utils.py +28 -3
- diffusers/utils/hub_utils.py +52 -102
- diffusers/utils/import_utils.py +121 -221
- diffusers/utils/loading_utils.py +2 -1
- diffusers/utils/logging.py +1 -2
- diffusers/utils/peft_utils.py +6 -14
- diffusers/utils/remote_utils.py +425 -0
- diffusers/utils/source_code_parsing_utils.py +52 -0
- diffusers/utils/state_dict_utils.py +15 -1
- diffusers/utils/testing_utils.py +243 -13
- diffusers/utils/torch_utils.py +10 -0
- diffusers/utils/typing_utils.py +91 -0
- diffusers/video_processor.py +1 -1
- {diffusers-0.32.2.dist-info → diffusers-0.33.1.dist-info}/METADATA +21 -4
- diffusers-0.33.1.dist-info/RECORD +608 -0
- {diffusers-0.32.2.dist-info → diffusers-0.33.1.dist-info}/WHEEL +1 -1
- diffusers-0.32.2.dist-info/RECORD +0 -550
- {diffusers-0.32.2.dist-info → diffusers-0.33.1.dist-info}/LICENSE +0 -0
- {diffusers-0.32.2.dist-info → diffusers-0.33.1.dist-info}/entry_points.txt +0 -0
- {diffusers-0.32.2.dist-info → diffusers-0.33.1.dist-info}/top_level.txt +0 -0
diffusers/pipelines/__init__.py
CHANGED
@@ -10,6 +10,7 @@ from ..utils import (
|
|
10
10
|
is_librosa_available,
|
11
11
|
is_note_seq_available,
|
12
12
|
is_onnx_available,
|
13
|
+
is_opencv_available,
|
13
14
|
is_sentencepiece_available,
|
14
15
|
is_torch_available,
|
15
16
|
is_torch_npu_available,
|
@@ -154,6 +155,7 @@ else:
|
|
154
155
|
"CogVideoXFunControlPipeline",
|
155
156
|
]
|
156
157
|
_import_structure["cogview3"] = ["CogView3PlusPipeline"]
|
158
|
+
_import_structure["cogview4"] = ["CogView4Pipeline", "CogView4ControlPipeline"]
|
157
159
|
_import_structure["controlnet"].extend(
|
158
160
|
[
|
159
161
|
"BlipDiffusionControlNetPipeline",
|
@@ -214,8 +216,17 @@ else:
|
|
214
216
|
"IFPipeline",
|
215
217
|
"IFSuperResolutionPipeline",
|
216
218
|
]
|
219
|
+
_import_structure["easyanimate"] = [
|
220
|
+
"EasyAnimatePipeline",
|
221
|
+
"EasyAnimateInpaintPipeline",
|
222
|
+
"EasyAnimateControlPipeline",
|
223
|
+
]
|
217
224
|
_import_structure["hunyuandit"] = ["HunyuanDiTPipeline"]
|
218
|
-
_import_structure["hunyuan_video"] = [
|
225
|
+
_import_structure["hunyuan_video"] = [
|
226
|
+
"HunyuanVideoPipeline",
|
227
|
+
"HunyuanSkyreelsImageToVideoPipeline",
|
228
|
+
"HunyuanVideoImageToVideoPipeline",
|
229
|
+
]
|
219
230
|
_import_structure["kandinsky"] = [
|
220
231
|
"KandinskyCombinedPipeline",
|
221
232
|
"KandinskyImg2ImgCombinedPipeline",
|
@@ -253,20 +264,23 @@ else:
|
|
253
264
|
]
|
254
265
|
)
|
255
266
|
_import_structure["latte"] = ["LattePipeline"]
|
256
|
-
_import_structure["ltx"] = ["LTXPipeline", "LTXImageToVideoPipeline"]
|
257
|
-
_import_structure["lumina"] = ["LuminaText2ImgPipeline"]
|
267
|
+
_import_structure["ltx"] = ["LTXPipeline", "LTXImageToVideoPipeline", "LTXConditionPipeline"]
|
268
|
+
_import_structure["lumina"] = ["LuminaPipeline", "LuminaText2ImgPipeline"]
|
269
|
+
_import_structure["lumina2"] = ["Lumina2Pipeline", "Lumina2Text2ImgPipeline"]
|
258
270
|
_import_structure["marigold"].extend(
|
259
271
|
[
|
260
272
|
"MarigoldDepthPipeline",
|
273
|
+
"MarigoldIntrinsicsPipeline",
|
261
274
|
"MarigoldNormalsPipeline",
|
262
275
|
]
|
263
276
|
)
|
264
277
|
_import_structure["mochi"] = ["MochiPipeline"]
|
265
278
|
_import_structure["musicldm"] = ["MusicLDMPipeline"]
|
279
|
+
_import_structure["omnigen"] = ["OmniGenPipeline"]
|
266
280
|
_import_structure["paint_by_example"] = ["PaintByExamplePipeline"]
|
267
281
|
_import_structure["pia"] = ["PIAPipeline"]
|
268
282
|
_import_structure["pixart_alpha"] = ["PixArtAlphaPipeline", "PixArtSigmaPipeline"]
|
269
|
-
_import_structure["sana"] = ["SanaPipeline"]
|
283
|
+
_import_structure["sana"] = ["SanaPipeline", "SanaSprintPipeline"]
|
270
284
|
_import_structure["semantic_stable_diffusion"] = ["SemanticStableDiffusionPipeline"]
|
271
285
|
_import_structure["shap_e"] = ["ShapEImg2ImgPipeline", "ShapEPipeline"]
|
272
286
|
_import_structure["stable_audio"] = [
|
@@ -342,6 +356,7 @@ else:
|
|
342
356
|
"WuerstchenDecoderPipeline",
|
343
357
|
"WuerstchenPriorPipeline",
|
344
358
|
]
|
359
|
+
_import_structure["wan"] = ["WanPipeline", "WanImageToVideoPipeline", "WanVideoToVideoPipeline"]
|
345
360
|
try:
|
346
361
|
if not is_onnx_available():
|
347
362
|
raise OptionalDependencyNotAvailable()
|
@@ -399,6 +414,18 @@ else:
|
|
399
414
|
"KolorsImg2ImgPipeline",
|
400
415
|
]
|
401
416
|
|
417
|
+
try:
|
418
|
+
if not (is_torch_available() and is_transformers_available() and is_opencv_available()):
|
419
|
+
raise OptionalDependencyNotAvailable()
|
420
|
+
except OptionalDependencyNotAvailable:
|
421
|
+
from ..utils import (
|
422
|
+
dummy_torch_and_transformers_and_opencv_objects,
|
423
|
+
)
|
424
|
+
|
425
|
+
_dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_and_opencv_objects))
|
426
|
+
else:
|
427
|
+
_import_structure["consisid"] = ["ConsisIDPipeline"]
|
428
|
+
|
402
429
|
try:
|
403
430
|
if not is_flax_available():
|
404
431
|
raise OptionalDependencyNotAvailable()
|
@@ -496,6 +523,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|
496
523
|
CogVideoXVideoToVideoPipeline,
|
497
524
|
)
|
498
525
|
from .cogview3 import CogView3PlusPipeline
|
526
|
+
from .cogview4 import CogView4ControlPipeline, CogView4Pipeline
|
499
527
|
from .controlnet import (
|
500
528
|
BlipDiffusionControlNetPipeline,
|
501
529
|
StableDiffusionControlNetImg2ImgPipeline,
|
@@ -538,6 +566,11 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|
538
566
|
VersatileDiffusionTextToImagePipeline,
|
539
567
|
VQDiffusionPipeline,
|
540
568
|
)
|
569
|
+
from .easyanimate import (
|
570
|
+
EasyAnimateControlPipeline,
|
571
|
+
EasyAnimateInpaintPipeline,
|
572
|
+
EasyAnimatePipeline,
|
573
|
+
)
|
541
574
|
from .flux import (
|
542
575
|
FluxControlImg2ImgPipeline,
|
543
576
|
FluxControlInpaintPipeline,
|
@@ -552,7 +585,11 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|
552
585
|
FluxPriorReduxPipeline,
|
553
586
|
ReduxImageEncoder,
|
554
587
|
)
|
555
|
-
from .hunyuan_video import
|
588
|
+
from .hunyuan_video import (
|
589
|
+
HunyuanSkyreelsImageToVideoPipeline,
|
590
|
+
HunyuanVideoImageToVideoPipeline,
|
591
|
+
HunyuanVideoPipeline,
|
592
|
+
)
|
556
593
|
from .hunyuandit import HunyuanDiTPipeline
|
557
594
|
from .i2vgen_xl import I2VGenXLPipeline
|
558
595
|
from .kandinsky import (
|
@@ -592,14 +629,17 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|
592
629
|
LEditsPPPipelineStableDiffusion,
|
593
630
|
LEditsPPPipelineStableDiffusionXL,
|
594
631
|
)
|
595
|
-
from .ltx import LTXImageToVideoPipeline, LTXPipeline
|
596
|
-
from .lumina import LuminaText2ImgPipeline
|
632
|
+
from .ltx import LTXConditionPipeline, LTXImageToVideoPipeline, LTXPipeline
|
633
|
+
from .lumina import LuminaPipeline, LuminaText2ImgPipeline
|
634
|
+
from .lumina2 import Lumina2Pipeline, Lumina2Text2ImgPipeline
|
597
635
|
from .marigold import (
|
598
636
|
MarigoldDepthPipeline,
|
637
|
+
MarigoldIntrinsicsPipeline,
|
599
638
|
MarigoldNormalsPipeline,
|
600
639
|
)
|
601
640
|
from .mochi import MochiPipeline
|
602
641
|
from .musicldm import MusicLDMPipeline
|
642
|
+
from .omnigen import OmniGenPipeline
|
603
643
|
from .pag import (
|
604
644
|
AnimateDiffPAGPipeline,
|
605
645
|
HunyuanDiTPAGPipeline,
|
@@ -622,7 +662,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|
622
662
|
from .paint_by_example import PaintByExamplePipeline
|
623
663
|
from .pia import PIAPipeline
|
624
664
|
from .pixart_alpha import PixArtAlphaPipeline, PixArtSigmaPipeline
|
625
|
-
from .sana import SanaPipeline
|
665
|
+
from .sana import SanaPipeline, SanaSprintPipeline
|
626
666
|
from .semantic_stable_diffusion import SemanticStableDiffusionPipeline
|
627
667
|
from .shap_e import ShapEImg2ImgPipeline, ShapEPipeline
|
628
668
|
from .stable_audio import StableAudioPipeline, StableAudioProjectionModel
|
@@ -680,6 +720,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|
680
720
|
UniDiffuserPipeline,
|
681
721
|
UniDiffuserTextDecoder,
|
682
722
|
)
|
723
|
+
from .wan import WanImageToVideoPipeline, WanPipeline, WanVideoToVideoPipeline
|
683
724
|
from .wuerstchen import (
|
684
725
|
WuerstchenCombinedPipeline,
|
685
726
|
WuerstchenDecoderPipeline,
|
@@ -731,6 +772,14 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|
731
772
|
KolorsPipeline,
|
732
773
|
)
|
733
774
|
|
775
|
+
try:
|
776
|
+
if not (is_torch_available() and is_transformers_available() and is_opencv_available()):
|
777
|
+
raise OptionalDependencyNotAvailable()
|
778
|
+
except OptionalDependencyNotAvailable:
|
779
|
+
from ..utils.dummy_torch_and_transformers_and_opencv_objects import *
|
780
|
+
else:
|
781
|
+
from .consisid import ConsisIDPipeline
|
782
|
+
|
734
783
|
try:
|
735
784
|
if not is_flax_available():
|
736
785
|
raise OptionalDependencyNotAvailable()
|
@@ -33,6 +33,7 @@ from ...utils import (
|
|
33
33
|
deprecate,
|
34
34
|
is_bs4_available,
|
35
35
|
is_ftfy_available,
|
36
|
+
is_torch_xla_available,
|
36
37
|
logging,
|
37
38
|
replace_example_docstring,
|
38
39
|
)
|
@@ -41,6 +42,14 @@ from ...video_processor import VideoProcessor
|
|
41
42
|
from .pipeline_output import AllegroPipelineOutput
|
42
43
|
|
43
44
|
|
45
|
+
if is_torch_xla_available():
|
46
|
+
import torch_xla.core.xla_model as xm
|
47
|
+
|
48
|
+
XLA_AVAILABLE = True
|
49
|
+
else:
|
50
|
+
XLA_AVAILABLE = False
|
51
|
+
|
52
|
+
|
44
53
|
logger = logging.get_logger(__name__)
|
45
54
|
|
46
55
|
if is_bs4_available():
|
@@ -194,10 +203,10 @@ class AllegroPipeline(DiffusionPipeline):
|
|
194
203
|
tokenizer=tokenizer, text_encoder=text_encoder, vae=vae, transformer=transformer, scheduler=scheduler
|
195
204
|
)
|
196
205
|
self.vae_scale_factor_spatial = (
|
197
|
-
2 ** (len(self.vae.config.block_out_channels) - 1) if
|
206
|
+
2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
|
198
207
|
)
|
199
208
|
self.vae_scale_factor_temporal = (
|
200
|
-
self.vae.config.temporal_compression_ratio if
|
209
|
+
self.vae.config.temporal_compression_ratio if getattr(self, "vae", None) else 4
|
201
210
|
)
|
202
211
|
|
203
212
|
self.video_processor = VideoProcessor(vae_scale_factor=self.vae_scale_factor_spatial)
|
@@ -674,6 +683,10 @@ class AllegroPipeline(DiffusionPipeline):
|
|
674
683
|
def num_timesteps(self):
|
675
684
|
return self._num_timesteps
|
676
685
|
|
686
|
+
@property
|
687
|
+
def current_timestep(self):
|
688
|
+
return self._current_timestep
|
689
|
+
|
677
690
|
@property
|
678
691
|
def interrupt(self):
|
679
692
|
return self._interrupt
|
@@ -806,6 +819,7 @@ class AllegroPipeline(DiffusionPipeline):
|
|
806
819
|
negative_prompt_attention_mask,
|
807
820
|
)
|
808
821
|
self._guidance_scale = guidance_scale
|
822
|
+
self._current_timestep = None
|
809
823
|
self._interrupt = False
|
810
824
|
|
811
825
|
# 2. Default height and width to transformer
|
@@ -883,6 +897,7 @@ class AllegroPipeline(DiffusionPipeline):
|
|
883
897
|
if self.interrupt:
|
884
898
|
continue
|
885
899
|
|
900
|
+
self._current_timestep = t
|
886
901
|
latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents
|
887
902
|
latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
|
888
903
|
|
@@ -921,6 +936,11 @@ class AllegroPipeline(DiffusionPipeline):
|
|
921
936
|
if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
|
922
937
|
progress_bar.update()
|
923
938
|
|
939
|
+
if XLA_AVAILABLE:
|
940
|
+
xm.mark_step()
|
941
|
+
|
942
|
+
self._current_timestep = None
|
943
|
+
|
924
944
|
if not output_type == "latent":
|
925
945
|
latents = latents.to(self.vae.dtype)
|
926
946
|
video = self.decode_latents(latents)
|
@@ -20,10 +20,18 @@ from transformers import CLIPTextModelWithProjection, CLIPTokenizer
|
|
20
20
|
from ...image_processor import VaeImageProcessor
|
21
21
|
from ...models import UVit2DModel, VQModel
|
22
22
|
from ...schedulers import AmusedScheduler
|
23
|
-
from ...utils import replace_example_docstring
|
23
|
+
from ...utils import is_torch_xla_available, replace_example_docstring
|
24
24
|
from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
|
25
25
|
|
26
26
|
|
27
|
+
if is_torch_xla_available():
|
28
|
+
import torch_xla.core.xla_model as xm
|
29
|
+
|
30
|
+
XLA_AVAILABLE = True
|
31
|
+
else:
|
32
|
+
XLA_AVAILABLE = False
|
33
|
+
|
34
|
+
|
27
35
|
EXAMPLE_DOC_STRING = """
|
28
36
|
Examples:
|
29
37
|
```py
|
@@ -66,7 +74,9 @@ class AmusedPipeline(DiffusionPipeline):
|
|
66
74
|
transformer=transformer,
|
67
75
|
scheduler=scheduler,
|
68
76
|
)
|
69
|
-
self.vae_scale_factor =
|
77
|
+
self.vae_scale_factor = (
|
78
|
+
2 ** (len(self.vqvae.config.block_out_channels) - 1) if getattr(self, "vqvae", None) else 8
|
79
|
+
)
|
70
80
|
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor, do_normalize=False)
|
71
81
|
|
72
82
|
@torch.no_grad()
|
@@ -297,6 +307,9 @@ class AmusedPipeline(DiffusionPipeline):
|
|
297
307
|
step_idx = i // getattr(self.scheduler, "order", 1)
|
298
308
|
callback(step_idx, timestep, latents)
|
299
309
|
|
310
|
+
if XLA_AVAILABLE:
|
311
|
+
xm.mark_step()
|
312
|
+
|
300
313
|
if output_type == "latent":
|
301
314
|
output = latents
|
302
315
|
else:
|
@@ -20,10 +20,18 @@ from transformers import CLIPTextModelWithProjection, CLIPTokenizer
|
|
20
20
|
from ...image_processor import PipelineImageInput, VaeImageProcessor
|
21
21
|
from ...models import UVit2DModel, VQModel
|
22
22
|
from ...schedulers import AmusedScheduler
|
23
|
-
from ...utils import replace_example_docstring
|
23
|
+
from ...utils import is_torch_xla_available, replace_example_docstring
|
24
24
|
from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
|
25
25
|
|
26
26
|
|
27
|
+
if is_torch_xla_available():
|
28
|
+
import torch_xla.core.xla_model as xm
|
29
|
+
|
30
|
+
XLA_AVAILABLE = True
|
31
|
+
else:
|
32
|
+
XLA_AVAILABLE = False
|
33
|
+
|
34
|
+
|
27
35
|
EXAMPLE_DOC_STRING = """
|
28
36
|
Examples:
|
29
37
|
```py
|
@@ -81,7 +89,9 @@ class AmusedImg2ImgPipeline(DiffusionPipeline):
|
|
81
89
|
transformer=transformer,
|
82
90
|
scheduler=scheduler,
|
83
91
|
)
|
84
|
-
self.vae_scale_factor =
|
92
|
+
self.vae_scale_factor = (
|
93
|
+
2 ** (len(self.vqvae.config.block_out_channels) - 1) if getattr(self, "vqvae", None) else 8
|
94
|
+
)
|
85
95
|
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor, do_normalize=False)
|
86
96
|
|
87
97
|
@torch.no_grad()
|
@@ -323,6 +333,9 @@ class AmusedImg2ImgPipeline(DiffusionPipeline):
|
|
323
333
|
step_idx = i // getattr(self.scheduler, "order", 1)
|
324
334
|
callback(step_idx, timestep, latents)
|
325
335
|
|
336
|
+
if XLA_AVAILABLE:
|
337
|
+
xm.mark_step()
|
338
|
+
|
326
339
|
if output_type == "latent":
|
327
340
|
output = latents
|
328
341
|
else:
|
@@ -21,10 +21,18 @@ from transformers import CLIPTextModelWithProjection, CLIPTokenizer
|
|
21
21
|
from ...image_processor import PipelineImageInput, VaeImageProcessor
|
22
22
|
from ...models import UVit2DModel, VQModel
|
23
23
|
from ...schedulers import AmusedScheduler
|
24
|
-
from ...utils import replace_example_docstring
|
24
|
+
from ...utils import is_torch_xla_available, replace_example_docstring
|
25
25
|
from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
|
26
26
|
|
27
27
|
|
28
|
+
if is_torch_xla_available():
|
29
|
+
import torch_xla.core.xla_model as xm
|
30
|
+
|
31
|
+
XLA_AVAILABLE = True
|
32
|
+
else:
|
33
|
+
XLA_AVAILABLE = False
|
34
|
+
|
35
|
+
|
28
36
|
EXAMPLE_DOC_STRING = """
|
29
37
|
Examples:
|
30
38
|
```py
|
@@ -89,7 +97,9 @@ class AmusedInpaintPipeline(DiffusionPipeline):
|
|
89
97
|
transformer=transformer,
|
90
98
|
scheduler=scheduler,
|
91
99
|
)
|
92
|
-
self.vae_scale_factor =
|
100
|
+
self.vae_scale_factor = (
|
101
|
+
2 ** (len(self.vqvae.config.block_out_channels) - 1) if getattr(self, "vqvae", None) else 8
|
102
|
+
)
|
93
103
|
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor, do_normalize=False)
|
94
104
|
self.mask_processor = VaeImageProcessor(
|
95
105
|
vae_scale_factor=self.vae_scale_factor,
|
@@ -354,6 +364,9 @@ class AmusedInpaintPipeline(DiffusionPipeline):
|
|
354
364
|
step_idx = i // getattr(self.scheduler, "order", 1)
|
355
365
|
callback(step_idx, timestep, latents)
|
356
366
|
|
367
|
+
if XLA_AVAILABLE:
|
368
|
+
xm.mark_step()
|
369
|
+
|
357
370
|
if output_type == "latent":
|
358
371
|
output = latents
|
359
372
|
else:
|
@@ -19,7 +19,7 @@ import torch
|
|
19
19
|
from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer, CLIPVisionModelWithProjection
|
20
20
|
|
21
21
|
from ...image_processor import PipelineImageInput
|
22
|
-
from ...loaders import IPAdapterMixin, StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
|
22
|
+
from ...loaders import FromSingleFileMixin, IPAdapterMixin, StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
|
23
23
|
from ...models import AutoencoderKL, ImageProjection, UNet2DConditionModel, UNetMotionModel
|
24
24
|
from ...models.lora import adjust_lora_scale_text_encoder
|
25
25
|
from ...models.unets.unet_motion_model import MotionAdapter
|
@@ -34,6 +34,7 @@ from ...schedulers import (
|
|
34
34
|
from ...utils import (
|
35
35
|
USE_PEFT_BACKEND,
|
36
36
|
deprecate,
|
37
|
+
is_torch_xla_available,
|
37
38
|
logging,
|
38
39
|
replace_example_docstring,
|
39
40
|
scale_lora_layers,
|
@@ -47,8 +48,16 @@ from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
|
47
48
|
from .pipeline_output import AnimateDiffPipelineOutput
|
48
49
|
|
49
50
|
|
51
|
+
if is_torch_xla_available():
|
52
|
+
import torch_xla.core.xla_model as xm
|
53
|
+
|
54
|
+
XLA_AVAILABLE = True
|
55
|
+
else:
|
56
|
+
XLA_AVAILABLE = False
|
57
|
+
|
50
58
|
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
51
59
|
|
60
|
+
|
52
61
|
EXAMPLE_DOC_STRING = """
|
53
62
|
Examples:
|
54
63
|
```py
|
@@ -74,6 +83,7 @@ class AnimateDiffPipeline(
|
|
74
83
|
StableDiffusionLoraLoaderMixin,
|
75
84
|
FreeInitMixin,
|
76
85
|
AnimateDiffFreeNoiseMixin,
|
86
|
+
FromSingleFileMixin,
|
77
87
|
):
|
78
88
|
r"""
|
79
89
|
Pipeline for text-to-video generation.
|
@@ -139,7 +149,7 @@ class AnimateDiffPipeline(
|
|
139
149
|
feature_extractor=feature_extractor,
|
140
150
|
image_encoder=image_encoder,
|
141
151
|
)
|
142
|
-
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
152
|
+
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
|
143
153
|
self.video_processor = VideoProcessor(do_resize=False, vae_scale_factor=self.vae_scale_factor)
|
144
154
|
|
145
155
|
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.encode_prompt with num_images_per_prompt -> num_videos_per_prompt
|
@@ -844,6 +854,9 @@ class AnimateDiffPipeline(
|
|
844
854
|
if callback is not None and i % callback_steps == 0:
|
845
855
|
callback(i, t, latents)
|
846
856
|
|
857
|
+
if XLA_AVAILABLE:
|
858
|
+
xm.mark_step()
|
859
|
+
|
847
860
|
# 9. Post processing
|
848
861
|
if output_type == "latent":
|
849
862
|
video = latents
|
@@ -20,7 +20,7 @@ import torch.nn.functional as F
|
|
20
20
|
from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer, CLIPVisionModelWithProjection
|
21
21
|
|
22
22
|
from ...image_processor import PipelineImageInput
|
23
|
-
from ...loaders import IPAdapterMixin, StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
|
23
|
+
from ...loaders import FromSingleFileMixin, IPAdapterMixin, StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
|
24
24
|
from ...models import (
|
25
25
|
AutoencoderKL,
|
26
26
|
ControlNetModel,
|
@@ -32,7 +32,7 @@ from ...models import (
|
|
32
32
|
from ...models.lora import adjust_lora_scale_text_encoder
|
33
33
|
from ...models.unets.unet_motion_model import MotionAdapter
|
34
34
|
from ...schedulers import KarrasDiffusionSchedulers
|
35
|
-
from ...utils import USE_PEFT_BACKEND, logging, scale_lora_layers, unscale_lora_layers
|
35
|
+
from ...utils import USE_PEFT_BACKEND, is_torch_xla_available, logging, scale_lora_layers, unscale_lora_layers
|
36
36
|
from ...utils.torch_utils import is_compiled_module, randn_tensor
|
37
37
|
from ...video_processor import VideoProcessor
|
38
38
|
from ..free_init_utils import FreeInitMixin
|
@@ -41,8 +41,16 @@ from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
|
41
41
|
from .pipeline_output import AnimateDiffPipelineOutput
|
42
42
|
|
43
43
|
|
44
|
+
if is_torch_xla_available():
|
45
|
+
import torch_xla.core.xla_model as xm
|
46
|
+
|
47
|
+
XLA_AVAILABLE = True
|
48
|
+
else:
|
49
|
+
XLA_AVAILABLE = False
|
50
|
+
|
44
51
|
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
45
52
|
|
53
|
+
|
46
54
|
EXAMPLE_DOC_STRING = """
|
47
55
|
Examples:
|
48
56
|
```py
|
@@ -117,6 +125,7 @@ class AnimateDiffControlNetPipeline(
|
|
117
125
|
StableDiffusionLoraLoaderMixin,
|
118
126
|
FreeInitMixin,
|
119
127
|
AnimateDiffFreeNoiseMixin,
|
128
|
+
FromSingleFileMixin,
|
120
129
|
):
|
121
130
|
r"""
|
122
131
|
Pipeline for text-to-video generation with ControlNet guidance.
|
@@ -180,7 +189,7 @@ class AnimateDiffControlNetPipeline(
|
|
180
189
|
feature_extractor=feature_extractor,
|
181
190
|
image_encoder=image_encoder,
|
182
191
|
)
|
183
|
-
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
192
|
+
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
|
184
193
|
self.video_processor = VideoProcessor(vae_scale_factor=self.vae_scale_factor)
|
185
194
|
self.control_video_processor = VideoProcessor(
|
186
195
|
vae_scale_factor=self.vae_scale_factor, do_convert_rgb=True, do_normalize=False
|
@@ -1090,6 +1099,9 @@ class AnimateDiffControlNetPipeline(
|
|
1090
1099
|
if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
|
1091
1100
|
progress_bar.update()
|
1092
1101
|
|
1102
|
+
if XLA_AVAILABLE:
|
1103
|
+
xm.mark_step()
|
1104
|
+
|
1093
1105
|
# 9. Post processing
|
1094
1106
|
if output_type == "latent":
|
1095
1107
|
video = latents
|
@@ -48,6 +48,7 @@ from ...schedulers import (
|
|
48
48
|
)
|
49
49
|
from ...utils import (
|
50
50
|
USE_PEFT_BACKEND,
|
51
|
+
is_torch_xla_available,
|
51
52
|
logging,
|
52
53
|
replace_example_docstring,
|
53
54
|
scale_lora_layers,
|
@@ -60,8 +61,16 @@ from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
|
60
61
|
from .pipeline_output import AnimateDiffPipelineOutput
|
61
62
|
|
62
63
|
|
64
|
+
if is_torch_xla_available():
|
65
|
+
import torch_xla.core.xla_model as xm
|
66
|
+
|
67
|
+
XLA_AVAILABLE = True
|
68
|
+
else:
|
69
|
+
XLA_AVAILABLE = False
|
70
|
+
|
63
71
|
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
64
72
|
|
73
|
+
|
65
74
|
EXAMPLE_DOC_STRING = """
|
66
75
|
Examples:
|
67
76
|
```py
|
@@ -307,10 +316,14 @@ class AnimateDiffSDXLPipeline(
|
|
307
316
|
feature_extractor=feature_extractor,
|
308
317
|
)
|
309
318
|
self.register_to_config(force_zeros_for_empty_prompt=force_zeros_for_empty_prompt)
|
310
|
-
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
319
|
+
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
|
311
320
|
self.video_processor = VideoProcessor(vae_scale_factor=self.vae_scale_factor)
|
312
321
|
|
313
|
-
self.default_sample_size =
|
322
|
+
self.default_sample_size = (
|
323
|
+
self.unet.config.sample_size
|
324
|
+
if hasattr(self, "unet") and self.unet is not None and hasattr(self.unet.config, "sample_size")
|
325
|
+
else 128
|
326
|
+
)
|
314
327
|
|
315
328
|
# Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.encode_prompt with num_images_per_prompt->num_videos_per_prompt
|
316
329
|
def encode_prompt(
|
@@ -438,7 +451,9 @@ class AnimateDiffSDXLPipeline(
|
|
438
451
|
prompt_embeds = text_encoder(text_input_ids.to(device), output_hidden_states=True)
|
439
452
|
|
440
453
|
# We are only ALWAYS interested in the pooled output of the final text encoder
|
441
|
-
pooled_prompt_embeds
|
454
|
+
if pooled_prompt_embeds is None and prompt_embeds[0].ndim == 2:
|
455
|
+
pooled_prompt_embeds = prompt_embeds[0]
|
456
|
+
|
442
457
|
if clip_skip is None:
|
443
458
|
prompt_embeds = prompt_embeds.hidden_states[-2]
|
444
459
|
else:
|
@@ -497,8 +512,10 @@ class AnimateDiffSDXLPipeline(
|
|
497
512
|
uncond_input.input_ids.to(device),
|
498
513
|
output_hidden_states=True,
|
499
514
|
)
|
515
|
+
|
500
516
|
# We are only ALWAYS interested in the pooled output of the final text encoder
|
501
|
-
negative_pooled_prompt_embeds
|
517
|
+
if negative_pooled_prompt_embeds is None and negative_prompt_embeds[0].ndim == 2:
|
518
|
+
negative_pooled_prompt_embeds = negative_prompt_embeds[0]
|
502
519
|
negative_prompt_embeds = negative_prompt_embeds.hidden_states[-2]
|
503
520
|
|
504
521
|
negative_prompt_embeds_list.append(negative_prompt_embeds)
|
@@ -1261,6 +1278,9 @@ class AnimateDiffSDXLPipeline(
|
|
1261
1278
|
|
1262
1279
|
progress_bar.update()
|
1263
1280
|
|
1281
|
+
if XLA_AVAILABLE:
|
1282
|
+
xm.mark_step()
|
1283
|
+
|
1264
1284
|
# make sure the VAE is in float32 mode, as it overflows in float16
|
1265
1285
|
needs_upcasting = self.vae.dtype == torch.float16 and self.vae.config.force_upcast
|
1266
1286
|
|
@@ -22,7 +22,7 @@ import torch.nn.functional as F
|
|
22
22
|
from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer, CLIPVisionModelWithProjection
|
23
23
|
|
24
24
|
from ...image_processor import PipelineImageInput, VaeImageProcessor
|
25
|
-
from ...loaders import IPAdapterMixin, StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
|
25
|
+
from ...loaders import FromSingleFileMixin, IPAdapterMixin, StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
|
26
26
|
from ...models import AutoencoderKL, ImageProjection, UNet2DConditionModel, UNetMotionModel
|
27
27
|
from ...models.controlnets.controlnet_sparsectrl import SparseControlNetModel
|
28
28
|
from ...models.lora import adjust_lora_scale_text_encoder
|
@@ -30,6 +30,7 @@ from ...models.unets.unet_motion_model import MotionAdapter
|
|
30
30
|
from ...schedulers import KarrasDiffusionSchedulers
|
31
31
|
from ...utils import (
|
32
32
|
USE_PEFT_BACKEND,
|
33
|
+
is_torch_xla_available,
|
33
34
|
logging,
|
34
35
|
replace_example_docstring,
|
35
36
|
scale_lora_layers,
|
@@ -42,8 +43,16 @@ from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
|
42
43
|
from .pipeline_output import AnimateDiffPipelineOutput
|
43
44
|
|
44
45
|
|
46
|
+
if is_torch_xla_available():
|
47
|
+
import torch_xla.core.xla_model as xm
|
48
|
+
|
49
|
+
XLA_AVAILABLE = True
|
50
|
+
else:
|
51
|
+
XLA_AVAILABLE = False
|
52
|
+
|
45
53
|
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
46
54
|
|
55
|
+
|
47
56
|
EXAMPLE_DOC_STRING = """
|
48
57
|
Examples:
|
49
58
|
```python
|
@@ -127,6 +136,7 @@ class AnimateDiffSparseControlNetPipeline(
|
|
127
136
|
IPAdapterMixin,
|
128
137
|
StableDiffusionLoraLoaderMixin,
|
129
138
|
FreeInitMixin,
|
139
|
+
FromSingleFileMixin,
|
130
140
|
):
|
131
141
|
r"""
|
132
142
|
Pipeline for controlled text-to-video generation using the method described in [SparseCtrl: Adding Sparse Controls
|
@@ -188,7 +198,7 @@ class AnimateDiffSparseControlNetPipeline(
|
|
188
198
|
feature_extractor=feature_extractor,
|
189
199
|
image_encoder=image_encoder,
|
190
200
|
)
|
191
|
-
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
201
|
+
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
|
192
202
|
self.video_processor = VideoProcessor(do_resize=False, vae_scale_factor=self.vae_scale_factor)
|
193
203
|
self.control_image_processor = VaeImageProcessor(
|
194
204
|
vae_scale_factor=self.vae_scale_factor, do_convert_rgb=True, do_normalize=False
|
@@ -994,6 +1004,9 @@ class AnimateDiffSparseControlNetPipeline(
|
|
994
1004
|
if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
|
995
1005
|
progress_bar.update()
|
996
1006
|
|
1007
|
+
if XLA_AVAILABLE:
|
1008
|
+
xm.mark_step()
|
1009
|
+
|
997
1010
|
# 11. Post processing
|
998
1011
|
if output_type == "latent":
|
999
1012
|
video = latents
|