diffusers 0.32.2__py3-none-any.whl → 0.33.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diffusers/__init__.py +186 -3
- diffusers/configuration_utils.py +40 -12
- diffusers/dependency_versions_table.py +9 -2
- diffusers/hooks/__init__.py +9 -0
- diffusers/hooks/faster_cache.py +653 -0
- diffusers/hooks/group_offloading.py +793 -0
- diffusers/hooks/hooks.py +236 -0
- diffusers/hooks/layerwise_casting.py +245 -0
- diffusers/hooks/pyramid_attention_broadcast.py +311 -0
- diffusers/loaders/__init__.py +6 -0
- diffusers/loaders/ip_adapter.py +38 -30
- diffusers/loaders/lora_base.py +121 -86
- diffusers/loaders/lora_conversion_utils.py +504 -44
- diffusers/loaders/lora_pipeline.py +1769 -181
- diffusers/loaders/peft.py +167 -57
- diffusers/loaders/single_file.py +17 -2
- diffusers/loaders/single_file_model.py +53 -5
- diffusers/loaders/single_file_utils.py +646 -72
- diffusers/loaders/textual_inversion.py +9 -9
- diffusers/loaders/transformer_flux.py +8 -9
- diffusers/loaders/transformer_sd3.py +120 -39
- diffusers/loaders/unet.py +20 -7
- diffusers/models/__init__.py +22 -0
- diffusers/models/activations.py +9 -9
- diffusers/models/attention.py +0 -1
- diffusers/models/attention_processor.py +163 -25
- diffusers/models/auto_model.py +169 -0
- diffusers/models/autoencoders/__init__.py +2 -0
- diffusers/models/autoencoders/autoencoder_asym_kl.py +2 -0
- diffusers/models/autoencoders/autoencoder_dc.py +106 -4
- diffusers/models/autoencoders/autoencoder_kl.py +0 -4
- diffusers/models/autoencoders/autoencoder_kl_allegro.py +5 -23
- diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +17 -55
- diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py +17 -97
- diffusers/models/autoencoders/autoencoder_kl_ltx.py +326 -107
- diffusers/models/autoencoders/autoencoder_kl_magvit.py +1094 -0
- diffusers/models/autoencoders/autoencoder_kl_mochi.py +21 -56
- diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +11 -42
- diffusers/models/autoencoders/autoencoder_kl_wan.py +855 -0
- diffusers/models/autoencoders/autoencoder_oobleck.py +1 -0
- diffusers/models/autoencoders/autoencoder_tiny.py +0 -4
- diffusers/models/autoencoders/consistency_decoder_vae.py +3 -1
- diffusers/models/autoencoders/vae.py +31 -141
- diffusers/models/autoencoders/vq_model.py +3 -0
- diffusers/models/cache_utils.py +108 -0
- diffusers/models/controlnets/__init__.py +1 -0
- diffusers/models/controlnets/controlnet.py +3 -8
- diffusers/models/controlnets/controlnet_flux.py +14 -42
- diffusers/models/controlnets/controlnet_sd3.py +58 -34
- diffusers/models/controlnets/controlnet_sparsectrl.py +4 -7
- diffusers/models/controlnets/controlnet_union.py +27 -18
- diffusers/models/controlnets/controlnet_xs.py +7 -46
- diffusers/models/controlnets/multicontrolnet_union.py +196 -0
- diffusers/models/embeddings.py +18 -7
- diffusers/models/model_loading_utils.py +122 -80
- diffusers/models/modeling_flax_pytorch_utils.py +1 -1
- diffusers/models/modeling_flax_utils.py +1 -1
- diffusers/models/modeling_pytorch_flax_utils.py +1 -1
- diffusers/models/modeling_utils.py +617 -272
- diffusers/models/normalization.py +67 -14
- diffusers/models/resnet.py +1 -1
- diffusers/models/transformers/__init__.py +6 -0
- diffusers/models/transformers/auraflow_transformer_2d.py +9 -35
- diffusers/models/transformers/cogvideox_transformer_3d.py +13 -24
- diffusers/models/transformers/consisid_transformer_3d.py +789 -0
- diffusers/models/transformers/dit_transformer_2d.py +5 -19
- diffusers/models/transformers/hunyuan_transformer_2d.py +4 -3
- diffusers/models/transformers/latte_transformer_3d.py +20 -15
- diffusers/models/transformers/lumina_nextdit2d.py +3 -1
- diffusers/models/transformers/pixart_transformer_2d.py +4 -19
- diffusers/models/transformers/prior_transformer.py +5 -1
- diffusers/models/transformers/sana_transformer.py +144 -40
- diffusers/models/transformers/stable_audio_transformer.py +5 -20
- diffusers/models/transformers/transformer_2d.py +7 -22
- diffusers/models/transformers/transformer_allegro.py +9 -17
- diffusers/models/transformers/transformer_cogview3plus.py +6 -17
- diffusers/models/transformers/transformer_cogview4.py +462 -0
- diffusers/models/transformers/transformer_easyanimate.py +527 -0
- diffusers/models/transformers/transformer_flux.py +68 -110
- diffusers/models/transformers/transformer_hunyuan_video.py +404 -46
- diffusers/models/transformers/transformer_ltx.py +53 -35
- diffusers/models/transformers/transformer_lumina2.py +548 -0
- diffusers/models/transformers/transformer_mochi.py +6 -17
- diffusers/models/transformers/transformer_omnigen.py +469 -0
- diffusers/models/transformers/transformer_sd3.py +56 -86
- diffusers/models/transformers/transformer_temporal.py +5 -11
- diffusers/models/transformers/transformer_wan.py +469 -0
- diffusers/models/unets/unet_1d.py +3 -1
- diffusers/models/unets/unet_2d.py +21 -20
- diffusers/models/unets/unet_2d_blocks.py +19 -243
- diffusers/models/unets/unet_2d_condition.py +4 -6
- diffusers/models/unets/unet_3d_blocks.py +14 -127
- diffusers/models/unets/unet_3d_condition.py +8 -12
- diffusers/models/unets/unet_i2vgen_xl.py +5 -13
- diffusers/models/unets/unet_kandinsky3.py +0 -4
- diffusers/models/unets/unet_motion_model.py +20 -114
- diffusers/models/unets/unet_spatio_temporal_condition.py +7 -8
- diffusers/models/unets/unet_stable_cascade.py +8 -35
- diffusers/models/unets/uvit_2d.py +1 -4
- diffusers/optimization.py +2 -2
- diffusers/pipelines/__init__.py +57 -8
- diffusers/pipelines/allegro/pipeline_allegro.py +22 -2
- diffusers/pipelines/amused/pipeline_amused.py +15 -2
- diffusers/pipelines/amused/pipeline_amused_img2img.py +15 -2
- diffusers/pipelines/amused/pipeline_amused_inpaint.py +15 -2
- diffusers/pipelines/animatediff/pipeline_animatediff.py +15 -2
- diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +15 -3
- diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +24 -4
- diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +15 -2
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +16 -4
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +16 -4
- diffusers/pipelines/audioldm/pipeline_audioldm.py +13 -2
- diffusers/pipelines/audioldm2/modeling_audioldm2.py +13 -68
- diffusers/pipelines/audioldm2/pipeline_audioldm2.py +39 -9
- diffusers/pipelines/aura_flow/pipeline_aura_flow.py +63 -7
- diffusers/pipelines/auto_pipeline.py +35 -14
- diffusers/pipelines/blip_diffusion/blip_image_processing.py +1 -1
- diffusers/pipelines/blip_diffusion/modeling_blip2.py +5 -8
- diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +12 -0
- diffusers/pipelines/cogvideo/pipeline_cogvideox.py +22 -6
- diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +22 -6
- diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +22 -5
- diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +22 -6
- diffusers/pipelines/cogview3/pipeline_cogview3plus.py +12 -4
- diffusers/pipelines/cogview4/__init__.py +49 -0
- diffusers/pipelines/cogview4/pipeline_cogview4.py +684 -0
- diffusers/pipelines/cogview4/pipeline_cogview4_control.py +732 -0
- diffusers/pipelines/cogview4/pipeline_output.py +21 -0
- diffusers/pipelines/consisid/__init__.py +49 -0
- diffusers/pipelines/consisid/consisid_utils.py +357 -0
- diffusers/pipelines/consisid/pipeline_consisid.py +974 -0
- diffusers/pipelines/consisid/pipeline_output.py +20 -0
- diffusers/pipelines/consistency_models/pipeline_consistency_models.py +11 -0
- diffusers/pipelines/controlnet/pipeline_controlnet.py +6 -5
- diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +13 -0
- diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +17 -5
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +31 -12
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +26 -7
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +20 -3
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +22 -3
- diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +26 -25
- diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +224 -109
- diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +25 -29
- diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +7 -4
- diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +3 -5
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +121 -10
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +122 -11
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +12 -1
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +20 -3
- diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py +14 -2
- diffusers/pipelines/ddim/pipeline_ddim.py +14 -1
- diffusers/pipelines/ddpm/pipeline_ddpm.py +15 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if.py +12 -0
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +12 -0
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +14 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +12 -0
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +14 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +14 -1
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +11 -7
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +11 -7
- diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +1 -1
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +10 -6
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_onnx_stable_diffusion_inpaint_legacy.py +2 -2
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +11 -7
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +1 -1
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +1 -1
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +1 -1
- diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +10 -105
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +1 -1
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +1 -1
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +1 -1
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +1 -1
- diffusers/pipelines/dit/pipeline_dit.py +15 -2
- diffusers/pipelines/easyanimate/__init__.py +52 -0
- diffusers/pipelines/easyanimate/pipeline_easyanimate.py +770 -0
- diffusers/pipelines/easyanimate/pipeline_easyanimate_control.py +994 -0
- diffusers/pipelines/easyanimate/pipeline_easyanimate_inpaint.py +1234 -0
- diffusers/pipelines/easyanimate/pipeline_output.py +20 -0
- diffusers/pipelines/flux/pipeline_flux.py +53 -21
- diffusers/pipelines/flux/pipeline_flux_control.py +9 -12
- diffusers/pipelines/flux/pipeline_flux_control_img2img.py +6 -10
- diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +8 -10
- diffusers/pipelines/flux/pipeline_flux_controlnet.py +185 -13
- diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +8 -10
- diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +16 -16
- diffusers/pipelines/flux/pipeline_flux_fill.py +107 -39
- diffusers/pipelines/flux/pipeline_flux_img2img.py +193 -15
- diffusers/pipelines/flux/pipeline_flux_inpaint.py +199 -19
- diffusers/pipelines/free_noise_utils.py +3 -3
- diffusers/pipelines/hunyuan_video/__init__.py +4 -0
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_skyreels_image2video.py +804 -0
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +90 -23
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_image2video.py +924 -0
- diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +3 -5
- diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +13 -1
- diffusers/pipelines/kandinsky/pipeline_kandinsky.py +12 -0
- diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +1 -1
- diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +12 -0
- diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +13 -1
- diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +12 -0
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +12 -1
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +13 -0
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +12 -0
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +12 -1
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +12 -1
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +12 -0
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +12 -0
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +12 -0
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +12 -0
- diffusers/pipelines/kolors/pipeline_kolors.py +10 -8
- diffusers/pipelines/kolors/pipeline_kolors_img2img.py +6 -4
- diffusers/pipelines/kolors/text_encoder.py +7 -34
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +12 -1
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +13 -1
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +14 -13
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion_superresolution.py +12 -1
- diffusers/pipelines/latte/pipeline_latte.py +36 -7
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +67 -13
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +60 -15
- diffusers/pipelines/ltx/__init__.py +2 -0
- diffusers/pipelines/ltx/pipeline_ltx.py +25 -13
- diffusers/pipelines/ltx/pipeline_ltx_condition.py +1194 -0
- diffusers/pipelines/ltx/pipeline_ltx_image2video.py +31 -17
- diffusers/pipelines/lumina/__init__.py +2 -2
- diffusers/pipelines/lumina/pipeline_lumina.py +83 -20
- diffusers/pipelines/lumina2/__init__.py +48 -0
- diffusers/pipelines/lumina2/pipeline_lumina2.py +790 -0
- diffusers/pipelines/marigold/__init__.py +2 -0
- diffusers/pipelines/marigold/marigold_image_processing.py +127 -14
- diffusers/pipelines/marigold/pipeline_marigold_depth.py +31 -16
- diffusers/pipelines/marigold/pipeline_marigold_intrinsics.py +721 -0
- diffusers/pipelines/marigold/pipeline_marigold_normals.py +31 -16
- diffusers/pipelines/mochi/pipeline_mochi.py +14 -18
- diffusers/pipelines/musicldm/pipeline_musicldm.py +16 -1
- diffusers/pipelines/omnigen/__init__.py +50 -0
- diffusers/pipelines/omnigen/pipeline_omnigen.py +512 -0
- diffusers/pipelines/omnigen/processor_omnigen.py +327 -0
- diffusers/pipelines/onnx_utils.py +5 -3
- diffusers/pipelines/pag/pag_utils.py +1 -1
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +12 -1
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +15 -4
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +20 -3
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +20 -3
- diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +1 -3
- diffusers/pipelines/pag/pipeline_pag_kolors.py +6 -4
- diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +16 -3
- diffusers/pipelines/pag/pipeline_pag_sana.py +65 -8
- diffusers/pipelines/pag/pipeline_pag_sd.py +23 -7
- diffusers/pipelines/pag/pipeline_pag_sd_3.py +3 -5
- diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py +3 -5
- diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +13 -1
- diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +23 -7
- diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py +26 -10
- diffusers/pipelines/pag/pipeline_pag_sd_xl.py +12 -4
- diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +7 -3
- diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +10 -6
- diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +13 -3
- diffusers/pipelines/pia/pipeline_pia.py +13 -1
- diffusers/pipelines/pipeline_flax_utils.py +7 -7
- diffusers/pipelines/pipeline_loading_utils.py +193 -83
- diffusers/pipelines/pipeline_utils.py +221 -106
- diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +17 -5
- diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +17 -4
- diffusers/pipelines/sana/__init__.py +2 -0
- diffusers/pipelines/sana/pipeline_sana.py +183 -58
- diffusers/pipelines/sana/pipeline_sana_sprint.py +889 -0
- diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +12 -2
- diffusers/pipelines/shap_e/pipeline_shap_e.py +12 -0
- diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +12 -0
- diffusers/pipelines/shap_e/renderer.py +6 -6
- diffusers/pipelines/stable_audio/pipeline_stable_audio.py +1 -1
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +15 -4
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +12 -8
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +12 -1
- diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +3 -2
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +14 -10
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_img2img.py +3 -3
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_inpaint.py +14 -10
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +2 -2
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py +4 -3
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +5 -4
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +2 -2
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +18 -13
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +30 -8
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +24 -10
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +28 -12
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +39 -18
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +17 -6
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +13 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +20 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +14 -2
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +13 -1
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +16 -17
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +136 -18
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +150 -21
- diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +15 -3
- diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +26 -11
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +15 -3
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +22 -4
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +30 -13
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +12 -4
- diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +15 -3
- diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +15 -3
- diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +26 -12
- diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +16 -4
- diffusers/pipelines/stable_diffusion_xl/pipeline_flax_stable_diffusion_xl.py +1 -1
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +12 -4
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +7 -3
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +10 -6
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +11 -4
- diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +13 -2
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +18 -4
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +26 -5
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +13 -1
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +13 -1
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +28 -6
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +26 -4
- diffusers/pipelines/transformers_loading_utils.py +121 -0
- diffusers/pipelines/unclip/pipeline_unclip.py +11 -1
- diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +11 -1
- diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +19 -2
- diffusers/pipelines/wan/__init__.py +51 -0
- diffusers/pipelines/wan/pipeline_output.py +20 -0
- diffusers/pipelines/wan/pipeline_wan.py +593 -0
- diffusers/pipelines/wan/pipeline_wan_i2v.py +722 -0
- diffusers/pipelines/wan/pipeline_wan_video2video.py +725 -0
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +7 -31
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +12 -1
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +12 -1
- diffusers/quantizers/auto.py +5 -1
- diffusers/quantizers/base.py +5 -9
- diffusers/quantizers/bitsandbytes/bnb_quantizer.py +41 -29
- diffusers/quantizers/bitsandbytes/utils.py +30 -20
- diffusers/quantizers/gguf/gguf_quantizer.py +1 -0
- diffusers/quantizers/gguf/utils.py +4 -2
- diffusers/quantizers/quantization_config.py +59 -4
- diffusers/quantizers/quanto/__init__.py +1 -0
- diffusers/quantizers/quanto/quanto_quantizer.py +177 -0
- diffusers/quantizers/quanto/utils.py +60 -0
- diffusers/quantizers/torchao/__init__.py +1 -1
- diffusers/quantizers/torchao/torchao_quantizer.py +47 -2
- diffusers/schedulers/__init__.py +2 -1
- diffusers/schedulers/scheduling_consistency_models.py +1 -2
- diffusers/schedulers/scheduling_ddim_inverse.py +1 -1
- diffusers/schedulers/scheduling_ddpm.py +2 -3
- diffusers/schedulers/scheduling_ddpm_parallel.py +1 -2
- diffusers/schedulers/scheduling_dpmsolver_multistep.py +12 -4
- diffusers/schedulers/scheduling_edm_euler.py +45 -10
- diffusers/schedulers/scheduling_flow_match_euler_discrete.py +116 -28
- diffusers/schedulers/scheduling_flow_match_heun_discrete.py +7 -6
- diffusers/schedulers/scheduling_heun_discrete.py +1 -1
- diffusers/schedulers/scheduling_lcm.py +1 -2
- diffusers/schedulers/scheduling_lms_discrete.py +1 -1
- diffusers/schedulers/scheduling_repaint.py +5 -1
- diffusers/schedulers/scheduling_scm.py +265 -0
- diffusers/schedulers/scheduling_tcd.py +1 -2
- diffusers/schedulers/scheduling_utils.py +2 -1
- diffusers/training_utils.py +14 -7
- diffusers/utils/__init__.py +9 -1
- diffusers/utils/constants.py +13 -1
- diffusers/utils/deprecation_utils.py +1 -1
- diffusers/utils/dummy_bitsandbytes_objects.py +17 -0
- diffusers/utils/dummy_gguf_objects.py +17 -0
- diffusers/utils/dummy_optimum_quanto_objects.py +17 -0
- diffusers/utils/dummy_pt_objects.py +233 -0
- diffusers/utils/dummy_torch_and_transformers_and_opencv_objects.py +17 -0
- diffusers/utils/dummy_torch_and_transformers_objects.py +270 -0
- diffusers/utils/dummy_torchao_objects.py +17 -0
- diffusers/utils/dynamic_modules_utils.py +1 -1
- diffusers/utils/export_utils.py +28 -3
- diffusers/utils/hub_utils.py +52 -102
- diffusers/utils/import_utils.py +121 -221
- diffusers/utils/loading_utils.py +2 -1
- diffusers/utils/logging.py +1 -2
- diffusers/utils/peft_utils.py +6 -14
- diffusers/utils/remote_utils.py +425 -0
- diffusers/utils/source_code_parsing_utils.py +52 -0
- diffusers/utils/state_dict_utils.py +15 -1
- diffusers/utils/testing_utils.py +243 -13
- diffusers/utils/torch_utils.py +10 -0
- diffusers/utils/typing_utils.py +91 -0
- diffusers/video_processor.py +1 -1
- {diffusers-0.32.2.dist-info → diffusers-0.33.0.dist-info}/METADATA +76 -44
- diffusers-0.33.0.dist-info/RECORD +608 -0
- {diffusers-0.32.2.dist-info → diffusers-0.33.0.dist-info}/WHEEL +1 -1
- diffusers-0.32.2.dist-info/RECORD +0 -550
- {diffusers-0.32.2.dist-info → diffusers-0.33.0.dist-info}/LICENSE +0 -0
- {diffusers-0.32.2.dist-info → diffusers-0.33.0.dist-info}/entry_points.txt +0 -0
- {diffusers-0.32.2.dist-info → diffusers-0.33.0.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,5 @@
|
|
1
1
|
# coding=utf-8
|
2
|
-
# Copyright
|
2
|
+
# Copyright 2025 The HuggingFace Inc. team.
|
3
3
|
#
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
5
5
|
# you may not use this file except in compliance with the License.
|
@@ -22,6 +22,7 @@ from ..models.controlnets import ControlNetUnionModel
|
|
22
22
|
from ..utils import is_sentencepiece_available
|
23
23
|
from .aura_flow import AuraFlowPipeline
|
24
24
|
from .cogview3 import CogView3PlusPipeline
|
25
|
+
from .cogview4 import CogView4ControlPipeline, CogView4Pipeline
|
25
26
|
from .controlnet import (
|
26
27
|
StableDiffusionControlNetImg2ImgPipeline,
|
27
28
|
StableDiffusionControlNetInpaintPipeline,
|
@@ -33,6 +34,10 @@ from .controlnet import (
|
|
33
34
|
StableDiffusionXLControlNetUnionInpaintPipeline,
|
34
35
|
StableDiffusionXLControlNetUnionPipeline,
|
35
36
|
)
|
37
|
+
from .controlnet_sd3 import (
|
38
|
+
StableDiffusion3ControlNetInpaintingPipeline,
|
39
|
+
StableDiffusion3ControlNetPipeline,
|
40
|
+
)
|
36
41
|
from .deepfloyd_if import IFImg2ImgPipeline, IFInpaintingPipeline, IFPipeline
|
37
42
|
from .flux import (
|
38
43
|
FluxControlImg2ImgPipeline,
|
@@ -64,10 +69,12 @@ from .kandinsky2_2 import (
|
|
64
69
|
)
|
65
70
|
from .kandinsky3 import Kandinsky3Img2ImgPipeline, Kandinsky3Pipeline
|
66
71
|
from .latent_consistency_models import LatentConsistencyModelImg2ImgPipeline, LatentConsistencyModelPipeline
|
67
|
-
from .lumina import
|
72
|
+
from .lumina import LuminaPipeline
|
73
|
+
from .lumina2 import Lumina2Pipeline
|
68
74
|
from .pag import (
|
69
75
|
HunyuanDiTPAGPipeline,
|
70
76
|
PixArtSigmaPAGPipeline,
|
77
|
+
SanaPAGPipeline,
|
71
78
|
StableDiffusion3PAGImg2ImgPipeline,
|
72
79
|
StableDiffusion3PAGPipeline,
|
73
80
|
StableDiffusionControlNetPAGInpaintPipeline,
|
@@ -82,6 +89,7 @@ from .pag import (
|
|
82
89
|
StableDiffusionXLPAGPipeline,
|
83
90
|
)
|
84
91
|
from .pixart_alpha import PixArtAlphaPipeline, PixArtSigmaPipeline
|
92
|
+
from .sana import SanaPipeline
|
85
93
|
from .stable_cascade import StableCascadeCombinedPipeline, StableCascadeDecoderPipeline
|
86
94
|
from .stable_diffusion import (
|
87
95
|
StableDiffusionImg2ImgPipeline,
|
@@ -116,11 +124,14 @@ AUTO_TEXT2IMAGE_PIPELINES_MAPPING = OrderedDict(
|
|
116
124
|
("stable-diffusion-controlnet", StableDiffusionControlNetPipeline),
|
117
125
|
("stable-diffusion-xl-controlnet", StableDiffusionXLControlNetPipeline),
|
118
126
|
("stable-diffusion-xl-controlnet-union", StableDiffusionXLControlNetUnionPipeline),
|
127
|
+
("stable-diffusion-3-controlnet", StableDiffusion3ControlNetPipeline),
|
119
128
|
("wuerstchen", WuerstchenCombinedPipeline),
|
120
129
|
("cascade", StableCascadeCombinedPipeline),
|
121
130
|
("lcm", LatentConsistencyModelPipeline),
|
122
131
|
("pixart-alpha", PixArtAlphaPipeline),
|
123
132
|
("pixart-sigma", PixArtSigmaPipeline),
|
133
|
+
("sana", SanaPipeline),
|
134
|
+
("sana-pag", SanaPAGPipeline),
|
124
135
|
("stable-diffusion-pag", StableDiffusionPAGPipeline),
|
125
136
|
("stable-diffusion-controlnet-pag", StableDiffusionControlNetPAGPipeline),
|
126
137
|
("stable-diffusion-xl-pag", StableDiffusionXLPAGPipeline),
|
@@ -130,8 +141,11 @@ AUTO_TEXT2IMAGE_PIPELINES_MAPPING = OrderedDict(
|
|
130
141
|
("flux", FluxPipeline),
|
131
142
|
("flux-control", FluxControlPipeline),
|
132
143
|
("flux-controlnet", FluxControlNetPipeline),
|
133
|
-
("lumina",
|
144
|
+
("lumina", LuminaPipeline),
|
145
|
+
("lumina2", Lumina2Pipeline),
|
134
146
|
("cogview3", CogView3PlusPipeline),
|
147
|
+
("cogview4", CogView4Pipeline),
|
148
|
+
("cogview4-control", CogView4ControlPipeline),
|
135
149
|
]
|
136
150
|
)
|
137
151
|
|
@@ -170,6 +184,7 @@ AUTO_INPAINT_PIPELINES_MAPPING = OrderedDict(
|
|
170
184
|
("stable-diffusion-controlnet-pag", StableDiffusionControlNetPAGInpaintPipeline),
|
171
185
|
("stable-diffusion-xl-controlnet", StableDiffusionXLControlNetInpaintPipeline),
|
172
186
|
("stable-diffusion-xl-controlnet-union", StableDiffusionXLControlNetUnionInpaintPipeline),
|
187
|
+
("stable-diffusion-3-controlnet", StableDiffusion3ControlNetInpaintingPipeline),
|
173
188
|
("stable-diffusion-xl-pag", StableDiffusionXLPAGInpaintPipeline),
|
174
189
|
("flux", FluxInpaintPipeline),
|
175
190
|
("flux-controlnet", FluxControlNetInpaintPipeline),
|
@@ -293,7 +308,7 @@ class AutoPipelineForText2Image(ConfigMixin):
|
|
293
308
|
If you get the error message below, you need to finetune the weights for your downstream task:
|
294
309
|
|
295
310
|
```
|
296
|
-
Some weights of UNet2DConditionModel were not initialized from the model checkpoint at
|
311
|
+
Some weights of UNet2DConditionModel were not initialized from the model checkpoint at stable-diffusion-v1-5/stable-diffusion-v1-5 and are newly initialized because the shapes did not match:
|
297
312
|
- conv_in.weight: found shape torch.Size([320, 4, 3, 3]) in the checkpoint and torch.Size([320, 9, 3, 3]) in the model instantiated
|
298
313
|
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
|
299
314
|
```
|
@@ -385,7 +400,7 @@ class AutoPipelineForText2Image(ConfigMixin):
|
|
385
400
|
```py
|
386
401
|
>>> from diffusers import AutoPipelineForText2Image
|
387
402
|
|
388
|
-
>>> pipeline = AutoPipelineForText2Image.from_pretrained("
|
403
|
+
>>> pipeline = AutoPipelineForText2Image.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5")
|
389
404
|
>>> image = pipeline(prompt).images[0]
|
390
405
|
```
|
391
406
|
"""
|
@@ -448,7 +463,7 @@ class AutoPipelineForText2Image(ConfigMixin):
|
|
448
463
|
>>> from diffusers import AutoPipelineForText2Image, AutoPipelineForImage2Image
|
449
464
|
|
450
465
|
>>> pipe_i2i = AutoPipelineForImage2Image.from_pretrained(
|
451
|
-
... "
|
466
|
+
... "stable-diffusion-v1-5/stable-diffusion-v1-5", requires_safety_checker=False
|
452
467
|
... )
|
453
468
|
|
454
469
|
>>> pipe_t2i = AutoPipelineForText2Image.from_pipe(pipe_i2i)
|
@@ -528,7 +543,9 @@ class AutoPipelineForText2Image(ConfigMixin):
|
|
528
543
|
if k not in text_2_image_kwargs
|
529
544
|
}
|
530
545
|
|
531
|
-
missing_modules =
|
546
|
+
missing_modules = (
|
547
|
+
set(expected_modules) - set(text_2_image_cls._optional_components) - set(text_2_image_kwargs.keys())
|
548
|
+
)
|
532
549
|
|
533
550
|
if len(missing_modules) > 0:
|
534
551
|
raise ValueError(
|
@@ -587,7 +604,7 @@ class AutoPipelineForImage2Image(ConfigMixin):
|
|
587
604
|
If you get the error message below, you need to finetune the weights for your downstream task:
|
588
605
|
|
589
606
|
```
|
590
|
-
Some weights of UNet2DConditionModel were not initialized from the model checkpoint at
|
607
|
+
Some weights of UNet2DConditionModel were not initialized from the model checkpoint at stable-diffusion-v1-5/stable-diffusion-v1-5 and are newly initialized because the shapes did not match:
|
591
608
|
- conv_in.weight: found shape torch.Size([320, 4, 3, 3]) in the checkpoint and torch.Size([320, 9, 3, 3]) in the model instantiated
|
592
609
|
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
|
593
610
|
```
|
@@ -679,7 +696,7 @@ class AutoPipelineForImage2Image(ConfigMixin):
|
|
679
696
|
```py
|
680
697
|
>>> from diffusers import AutoPipelineForImage2Image
|
681
698
|
|
682
|
-
>>> pipeline = AutoPipelineForImage2Image.from_pretrained("
|
699
|
+
>>> pipeline = AutoPipelineForImage2Image.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5")
|
683
700
|
>>> image = pipeline(prompt, image).images[0]
|
684
701
|
```
|
685
702
|
"""
|
@@ -754,7 +771,7 @@ class AutoPipelineForImage2Image(ConfigMixin):
|
|
754
771
|
>>> from diffusers import AutoPipelineForText2Image, AutoPipelineForImage2Image
|
755
772
|
|
756
773
|
>>> pipe_t2i = AutoPipelineForText2Image.from_pretrained(
|
757
|
-
... "
|
774
|
+
... "stable-diffusion-v1-5/stable-diffusion-v1-5", requires_safety_checker=False
|
758
775
|
... )
|
759
776
|
|
760
777
|
>>> pipe_i2i = AutoPipelineForImage2Image.from_pipe(pipe_t2i)
|
@@ -838,7 +855,9 @@ class AutoPipelineForImage2Image(ConfigMixin):
|
|
838
855
|
if k not in image_2_image_kwargs
|
839
856
|
}
|
840
857
|
|
841
|
-
missing_modules =
|
858
|
+
missing_modules = (
|
859
|
+
set(expected_modules) - set(image_2_image_cls._optional_components) - set(image_2_image_kwargs.keys())
|
860
|
+
)
|
842
861
|
|
843
862
|
if len(missing_modules) > 0:
|
844
863
|
raise ValueError(
|
@@ -896,7 +915,7 @@ class AutoPipelineForInpainting(ConfigMixin):
|
|
896
915
|
If you get the error message below, you need to finetune the weights for your downstream task:
|
897
916
|
|
898
917
|
```
|
899
|
-
Some weights of UNet2DConditionModel were not initialized from the model checkpoint at
|
918
|
+
Some weights of UNet2DConditionModel were not initialized from the model checkpoint at stable-diffusion-v1-5/stable-diffusion-v1-5 and are newly initialized because the shapes did not match:
|
900
919
|
- conv_in.weight: found shape torch.Size([320, 4, 3, 3]) in the checkpoint and torch.Size([320, 9, 3, 3]) in the model instantiated
|
901
920
|
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
|
902
921
|
```
|
@@ -988,7 +1007,7 @@ class AutoPipelineForInpainting(ConfigMixin):
|
|
988
1007
|
```py
|
989
1008
|
>>> from diffusers import AutoPipelineForInpainting
|
990
1009
|
|
991
|
-
>>> pipeline = AutoPipelineForInpainting.from_pretrained("
|
1010
|
+
>>> pipeline = AutoPipelineForInpainting.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5")
|
992
1011
|
>>> image = pipeline(prompt, image=init_image, mask_image=mask_image).images[0]
|
993
1012
|
```
|
994
1013
|
"""
|
@@ -1141,7 +1160,9 @@ class AutoPipelineForInpainting(ConfigMixin):
|
|
1141
1160
|
if k not in inpainting_kwargs
|
1142
1161
|
}
|
1143
1162
|
|
1144
|
-
missing_modules =
|
1163
|
+
missing_modules = (
|
1164
|
+
set(expected_modules) - set(inpainting_cls._optional_components) - set(inpainting_kwargs.keys())
|
1165
|
+
)
|
1145
1166
|
|
1146
1167
|
if len(missing_modules) > 0:
|
1147
1168
|
raise ValueError(
|
@@ -1,5 +1,5 @@
|
|
1
1
|
# coding=utf-8
|
2
|
-
# Copyright
|
2
|
+
# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
|
3
3
|
#
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
5
5
|
# you may not use this file except in compliance with the License.
|
@@ -174,19 +174,16 @@ class Blip2QFormerEncoder(nn.Module):
|
|
174
174
|
)
|
175
175
|
use_cache = False
|
176
176
|
|
177
|
-
|
178
|
-
|
179
|
-
return module(*inputs, past_key_value, output_attentions, query_length)
|
180
|
-
|
181
|
-
return custom_forward
|
182
|
-
|
183
|
-
layer_outputs = torch.utils.checkpoint.checkpoint(
|
184
|
-
create_custom_forward(layer_module),
|
177
|
+
layer_outputs = self._gradient_checkpointing_func(
|
178
|
+
layer_module,
|
185
179
|
hidden_states,
|
186
180
|
attention_mask,
|
187
181
|
layer_head_mask,
|
188
182
|
encoder_hidden_states,
|
189
183
|
encoder_attention_mask,
|
184
|
+
past_key_value,
|
185
|
+
output_attentions,
|
186
|
+
query_length,
|
190
187
|
)
|
191
188
|
else:
|
192
189
|
layer_outputs = layer_module(
|
@@ -20,6 +20,7 @@ from transformers import CLIPTokenizer
|
|
20
20
|
from ...models import AutoencoderKL, UNet2DConditionModel
|
21
21
|
from ...schedulers import PNDMScheduler
|
22
22
|
from ...utils import (
|
23
|
+
is_torch_xla_available,
|
23
24
|
logging,
|
24
25
|
replace_example_docstring,
|
25
26
|
)
|
@@ -30,8 +31,16 @@ from .modeling_blip2 import Blip2QFormerModel
|
|
30
31
|
from .modeling_ctx_clip import ContextCLIPTextModel
|
31
32
|
|
32
33
|
|
34
|
+
if is_torch_xla_available():
|
35
|
+
import torch_xla.core.xla_model as xm
|
36
|
+
|
37
|
+
XLA_AVAILABLE = True
|
38
|
+
else:
|
39
|
+
XLA_AVAILABLE = False
|
40
|
+
|
33
41
|
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
34
42
|
|
43
|
+
|
35
44
|
EXAMPLE_DOC_STRING = """
|
36
45
|
Examples:
|
37
46
|
```py
|
@@ -336,6 +345,9 @@ class BlipDiffusionPipeline(DiffusionPipeline):
|
|
336
345
|
latents,
|
337
346
|
)["prev_sample"]
|
338
347
|
|
348
|
+
if XLA_AVAILABLE:
|
349
|
+
xm.mark_step()
|
350
|
+
|
339
351
|
image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False)[0]
|
340
352
|
image = self.image_processor.postprocess(image, output_type=output_type)
|
341
353
|
|
@@ -26,12 +26,19 @@ from ...models import AutoencoderKLCogVideoX, CogVideoXTransformer3DModel
|
|
26
26
|
from ...models.embeddings import get_3d_rotary_pos_embed
|
27
27
|
from ...pipelines.pipeline_utils import DiffusionPipeline
|
28
28
|
from ...schedulers import CogVideoXDDIMScheduler, CogVideoXDPMScheduler
|
29
|
-
from ...utils import logging, replace_example_docstring
|
29
|
+
from ...utils import is_torch_xla_available, logging, replace_example_docstring
|
30
30
|
from ...utils.torch_utils import randn_tensor
|
31
31
|
from ...video_processor import VideoProcessor
|
32
32
|
from .pipeline_output import CogVideoXPipelineOutput
|
33
33
|
|
34
34
|
|
35
|
+
if is_torch_xla_available():
|
36
|
+
import torch_xla.core.xla_model as xm
|
37
|
+
|
38
|
+
XLA_AVAILABLE = True
|
39
|
+
else:
|
40
|
+
XLA_AVAILABLE = False
|
41
|
+
|
35
42
|
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
36
43
|
|
37
44
|
|
@@ -183,14 +190,12 @@ class CogVideoXPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin):
|
|
183
190
|
tokenizer=tokenizer, text_encoder=text_encoder, vae=vae, transformer=transformer, scheduler=scheduler
|
184
191
|
)
|
185
192
|
self.vae_scale_factor_spatial = (
|
186
|
-
2 ** (len(self.vae.config.block_out_channels) - 1) if
|
193
|
+
2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
|
187
194
|
)
|
188
195
|
self.vae_scale_factor_temporal = (
|
189
|
-
self.vae.config.temporal_compression_ratio if
|
190
|
-
)
|
191
|
-
self.vae_scaling_factor_image = (
|
192
|
-
self.vae.config.scaling_factor if hasattr(self, "vae") and self.vae is not None else 0.7
|
196
|
+
self.vae.config.temporal_compression_ratio if getattr(self, "vae", None) else 4
|
193
197
|
)
|
198
|
+
self.vae_scaling_factor_image = self.vae.config.scaling_factor if getattr(self, "vae", None) else 0.7
|
194
199
|
|
195
200
|
self.video_processor = VideoProcessor(vae_scale_factor=self.vae_scale_factor_spatial)
|
196
201
|
|
@@ -489,6 +494,10 @@ class CogVideoXPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin):
|
|
489
494
|
def attention_kwargs(self):
|
490
495
|
return self._attention_kwargs
|
491
496
|
|
497
|
+
@property
|
498
|
+
def current_timestep(self):
|
499
|
+
return self._current_timestep
|
500
|
+
|
492
501
|
@property
|
493
502
|
def interrupt(self):
|
494
503
|
return self._interrupt
|
@@ -622,6 +631,7 @@ class CogVideoXPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin):
|
|
622
631
|
)
|
623
632
|
self._guidance_scale = guidance_scale
|
624
633
|
self._attention_kwargs = attention_kwargs
|
634
|
+
self._current_timestep = None
|
625
635
|
self._interrupt = False
|
626
636
|
|
627
637
|
# 2. Default call parameters
|
@@ -700,6 +710,7 @@ class CogVideoXPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin):
|
|
700
710
|
if self.interrupt:
|
701
711
|
continue
|
702
712
|
|
713
|
+
self._current_timestep = t
|
703
714
|
latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents
|
704
715
|
latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
|
705
716
|
|
@@ -755,6 +766,11 @@ class CogVideoXPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin):
|
|
755
766
|
if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
|
756
767
|
progress_bar.update()
|
757
768
|
|
769
|
+
if XLA_AVAILABLE:
|
770
|
+
xm.mark_step()
|
771
|
+
|
772
|
+
self._current_timestep = None
|
773
|
+
|
758
774
|
if not output_type == "latent":
|
759
775
|
# Discard any padding frames that were added for CogVideoX 1.5
|
760
776
|
latents = latents[:, additional_frames:]
|
@@ -27,12 +27,19 @@ from ...models import AutoencoderKLCogVideoX, CogVideoXTransformer3DModel
|
|
27
27
|
from ...models.embeddings import get_3d_rotary_pos_embed
|
28
28
|
from ...pipelines.pipeline_utils import DiffusionPipeline
|
29
29
|
from ...schedulers import KarrasDiffusionSchedulers
|
30
|
-
from ...utils import logging, replace_example_docstring
|
30
|
+
from ...utils import is_torch_xla_available, logging, replace_example_docstring
|
31
31
|
from ...utils.torch_utils import randn_tensor
|
32
32
|
from ...video_processor import VideoProcessor
|
33
33
|
from .pipeline_output import CogVideoXPipelineOutput
|
34
34
|
|
35
35
|
|
36
|
+
if is_torch_xla_available():
|
37
|
+
import torch_xla.core.xla_model as xm
|
38
|
+
|
39
|
+
XLA_AVAILABLE = True
|
40
|
+
else:
|
41
|
+
XLA_AVAILABLE = False
|
42
|
+
|
36
43
|
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
37
44
|
|
38
45
|
|
@@ -190,14 +197,12 @@ class CogVideoXFunControlPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin):
|
|
190
197
|
tokenizer=tokenizer, text_encoder=text_encoder, vae=vae, transformer=transformer, scheduler=scheduler
|
191
198
|
)
|
192
199
|
self.vae_scale_factor_spatial = (
|
193
|
-
2 ** (len(self.vae.config.block_out_channels) - 1) if
|
200
|
+
2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
|
194
201
|
)
|
195
202
|
self.vae_scale_factor_temporal = (
|
196
|
-
self.vae.config.temporal_compression_ratio if
|
197
|
-
)
|
198
|
-
self.vae_scaling_factor_image = (
|
199
|
-
self.vae.config.scaling_factor if hasattr(self, "vae") and self.vae is not None else 0.7
|
203
|
+
self.vae.config.temporal_compression_ratio if getattr(self, "vae", None) else 4
|
200
204
|
)
|
205
|
+
self.vae_scaling_factor_image = self.vae.config.scaling_factor if getattr(self, "vae", None) else 0.7
|
201
206
|
|
202
207
|
self.video_processor = VideoProcessor(vae_scale_factor=self.vae_scale_factor_spatial)
|
203
208
|
|
@@ -535,6 +540,10 @@ class CogVideoXFunControlPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin):
|
|
535
540
|
def attention_kwargs(self):
|
536
541
|
return self._attention_kwargs
|
537
542
|
|
543
|
+
@property
|
544
|
+
def current_timestep(self):
|
545
|
+
return self._current_timestep
|
546
|
+
|
538
547
|
@property
|
539
548
|
def interrupt(self):
|
540
549
|
return self._interrupt
|
@@ -675,6 +684,7 @@ class CogVideoXFunControlPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin):
|
|
675
684
|
)
|
676
685
|
self._guidance_scale = guidance_scale
|
677
686
|
self._attention_kwargs = attention_kwargs
|
687
|
+
self._current_timestep = None
|
678
688
|
self._interrupt = False
|
679
689
|
|
680
690
|
# 2. Default call parameters
|
@@ -761,6 +771,7 @@ class CogVideoXFunControlPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin):
|
|
761
771
|
if self.interrupt:
|
762
772
|
continue
|
763
773
|
|
774
|
+
self._current_timestep = t
|
764
775
|
latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents
|
765
776
|
latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
|
766
777
|
|
@@ -810,6 +821,11 @@ class CogVideoXFunControlPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin):
|
|
810
821
|
if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
|
811
822
|
progress_bar.update()
|
812
823
|
|
824
|
+
if XLA_AVAILABLE:
|
825
|
+
xm.mark_step()
|
826
|
+
|
827
|
+
self._current_timestep = None
|
828
|
+
|
813
829
|
if not output_type == "latent":
|
814
830
|
video = self.decode_latents(latents)
|
815
831
|
video = self.video_processor.postprocess_video(video=video, output_type=output_type)
|
@@ -29,6 +29,7 @@ from ...models.embeddings import get_3d_rotary_pos_embed
|
|
29
29
|
from ...pipelines.pipeline_utils import DiffusionPipeline
|
30
30
|
from ...schedulers import CogVideoXDDIMScheduler, CogVideoXDPMScheduler
|
31
31
|
from ...utils import (
|
32
|
+
is_torch_xla_available,
|
32
33
|
logging,
|
33
34
|
replace_example_docstring,
|
34
35
|
)
|
@@ -37,6 +38,13 @@ from ...video_processor import VideoProcessor
|
|
37
38
|
from .pipeline_output import CogVideoXPipelineOutput
|
38
39
|
|
39
40
|
|
41
|
+
if is_torch_xla_available():
|
42
|
+
import torch_xla.core.xla_model as xm
|
43
|
+
|
44
|
+
XLA_AVAILABLE = True
|
45
|
+
else:
|
46
|
+
XLA_AVAILABLE = False
|
47
|
+
|
40
48
|
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
41
49
|
|
42
50
|
|
@@ -203,14 +211,12 @@ class CogVideoXImageToVideoPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin)
|
|
203
211
|
scheduler=scheduler,
|
204
212
|
)
|
205
213
|
self.vae_scale_factor_spatial = (
|
206
|
-
2 ** (len(self.vae.config.block_out_channels) - 1) if
|
214
|
+
2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
|
207
215
|
)
|
208
216
|
self.vae_scale_factor_temporal = (
|
209
|
-
self.vae.config.temporal_compression_ratio if
|
210
|
-
)
|
211
|
-
self.vae_scaling_factor_image = (
|
212
|
-
self.vae.config.scaling_factor if hasattr(self, "vae") and self.vae is not None else 0.7
|
217
|
+
self.vae.config.temporal_compression_ratio if getattr(self, "vae", None) else 4
|
213
218
|
)
|
219
|
+
self.vae_scaling_factor_image = self.vae.config.scaling_factor if getattr(self, "vae", None) else 0.7
|
214
220
|
|
215
221
|
self.video_processor = VideoProcessor(vae_scale_factor=self.vae_scale_factor_spatial)
|
216
222
|
|
@@ -585,6 +591,10 @@ class CogVideoXImageToVideoPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin)
|
|
585
591
|
def attention_kwargs(self):
|
586
592
|
return self._attention_kwargs
|
587
593
|
|
594
|
+
@property
|
595
|
+
def current_timestep(self):
|
596
|
+
return self._current_timestep
|
597
|
+
|
588
598
|
@property
|
589
599
|
def interrupt(self):
|
590
600
|
return self._interrupt
|
@@ -722,6 +732,7 @@ class CogVideoXImageToVideoPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin)
|
|
722
732
|
negative_prompt_embeds=negative_prompt_embeds,
|
723
733
|
)
|
724
734
|
self._guidance_scale = guidance_scale
|
735
|
+
self._current_timestep = None
|
725
736
|
self._attention_kwargs = attention_kwargs
|
726
737
|
self._interrupt = False
|
727
738
|
|
@@ -809,6 +820,7 @@ class CogVideoXImageToVideoPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin)
|
|
809
820
|
if self.interrupt:
|
810
821
|
continue
|
811
822
|
|
823
|
+
self._current_timestep = t
|
812
824
|
latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents
|
813
825
|
latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
|
814
826
|
|
@@ -868,6 +880,11 @@ class CogVideoXImageToVideoPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin)
|
|
868
880
|
if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
|
869
881
|
progress_bar.update()
|
870
882
|
|
883
|
+
if XLA_AVAILABLE:
|
884
|
+
xm.mark_step()
|
885
|
+
|
886
|
+
self._current_timestep = None
|
887
|
+
|
871
888
|
if not output_type == "latent":
|
872
889
|
# Discard any padding frames that were added for CogVideoX 1.5
|
873
890
|
latents = latents[:, additional_frames:]
|
@@ -27,12 +27,19 @@ from ...models import AutoencoderKLCogVideoX, CogVideoXTransformer3DModel
|
|
27
27
|
from ...models.embeddings import get_3d_rotary_pos_embed
|
28
28
|
from ...pipelines.pipeline_utils import DiffusionPipeline
|
29
29
|
from ...schedulers import CogVideoXDDIMScheduler, CogVideoXDPMScheduler
|
30
|
-
from ...utils import logging, replace_example_docstring
|
30
|
+
from ...utils import is_torch_xla_available, logging, replace_example_docstring
|
31
31
|
from ...utils.torch_utils import randn_tensor
|
32
32
|
from ...video_processor import VideoProcessor
|
33
33
|
from .pipeline_output import CogVideoXPipelineOutput
|
34
34
|
|
35
35
|
|
36
|
+
if is_torch_xla_available():
|
37
|
+
import torch_xla.core.xla_model as xm
|
38
|
+
|
39
|
+
XLA_AVAILABLE = True
|
40
|
+
else:
|
41
|
+
XLA_AVAILABLE = False
|
42
|
+
|
36
43
|
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
37
44
|
|
38
45
|
|
@@ -206,14 +213,12 @@ class CogVideoXVideoToVideoPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin)
|
|
206
213
|
)
|
207
214
|
|
208
215
|
self.vae_scale_factor_spatial = (
|
209
|
-
2 ** (len(self.vae.config.block_out_channels) - 1) if
|
216
|
+
2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
|
210
217
|
)
|
211
218
|
self.vae_scale_factor_temporal = (
|
212
|
-
self.vae.config.temporal_compression_ratio if
|
213
|
-
)
|
214
|
-
self.vae_scaling_factor_image = (
|
215
|
-
self.vae.config.scaling_factor if hasattr(self, "vae") and self.vae is not None else 0.7
|
219
|
+
self.vae.config.temporal_compression_ratio if getattr(self, "vae", None) else 4
|
216
220
|
)
|
221
|
+
self.vae_scaling_factor_image = self.vae.config.scaling_factor if getattr(self, "vae", None) else 0.7
|
217
222
|
|
218
223
|
self.video_processor = VideoProcessor(vae_scale_factor=self.vae_scale_factor_spatial)
|
219
224
|
|
@@ -559,6 +564,10 @@ class CogVideoXVideoToVideoPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin)
|
|
559
564
|
def attention_kwargs(self):
|
560
565
|
return self._attention_kwargs
|
561
566
|
|
567
|
+
@property
|
568
|
+
def current_timestep(self):
|
569
|
+
return self._current_timestep
|
570
|
+
|
562
571
|
@property
|
563
572
|
def interrupt(self):
|
564
573
|
return self._interrupt
|
@@ -695,6 +704,7 @@ class CogVideoXVideoToVideoPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin)
|
|
695
704
|
)
|
696
705
|
self._guidance_scale = guidance_scale
|
697
706
|
self._attention_kwargs = attention_kwargs
|
707
|
+
self._current_timestep = None
|
698
708
|
self._interrupt = False
|
699
709
|
|
700
710
|
# 2. Default call parameters
|
@@ -781,6 +791,7 @@ class CogVideoXVideoToVideoPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin)
|
|
781
791
|
if self.interrupt:
|
782
792
|
continue
|
783
793
|
|
794
|
+
self._current_timestep = t
|
784
795
|
latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents
|
785
796
|
latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
|
786
797
|
|
@@ -836,6 +847,11 @@ class CogVideoXVideoToVideoPipeline(DiffusionPipeline, CogVideoXLoraLoaderMixin)
|
|
836
847
|
if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
|
837
848
|
progress_bar.update()
|
838
849
|
|
850
|
+
if XLA_AVAILABLE:
|
851
|
+
xm.mark_step()
|
852
|
+
|
853
|
+
self._current_timestep = None
|
854
|
+
|
839
855
|
if not output_type == "latent":
|
840
856
|
video = self.decode_latents(latents)
|
841
857
|
video = self.video_processor.postprocess_video(video=video, output_type=output_type)
|
@@ -24,11 +24,18 @@ from ...image_processor import VaeImageProcessor
|
|
24
24
|
from ...models import AutoencoderKL, CogView3PlusTransformer2DModel
|
25
25
|
from ...pipelines.pipeline_utils import DiffusionPipeline
|
26
26
|
from ...schedulers import CogVideoXDDIMScheduler, CogVideoXDPMScheduler
|
27
|
-
from ...utils import logging, replace_example_docstring
|
27
|
+
from ...utils import is_torch_xla_available, logging, replace_example_docstring
|
28
28
|
from ...utils.torch_utils import randn_tensor
|
29
29
|
from .pipeline_output import CogView3PipelineOutput
|
30
30
|
|
31
31
|
|
32
|
+
if is_torch_xla_available():
|
33
|
+
import torch_xla.core.xla_model as xm
|
34
|
+
|
35
|
+
XLA_AVAILABLE = True
|
36
|
+
else:
|
37
|
+
XLA_AVAILABLE = False
|
38
|
+
|
32
39
|
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
33
40
|
|
34
41
|
|
@@ -153,9 +160,7 @@ class CogView3PlusPipeline(DiffusionPipeline):
|
|
153
160
|
self.register_modules(
|
154
161
|
tokenizer=tokenizer, text_encoder=text_encoder, vae=vae, transformer=transformer, scheduler=scheduler
|
155
162
|
)
|
156
|
-
self.vae_scale_factor = (
|
157
|
-
2 ** (len(self.vae.config.block_out_channels) - 1) if hasattr(self, "vae") and self.vae is not None else 8
|
158
|
-
)
|
163
|
+
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
|
159
164
|
|
160
165
|
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
|
161
166
|
|
@@ -656,6 +661,9 @@ class CogView3PlusPipeline(DiffusionPipeline):
|
|
656
661
|
if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
|
657
662
|
progress_bar.update()
|
658
663
|
|
664
|
+
if XLA_AVAILABLE:
|
665
|
+
xm.mark_step()
|
666
|
+
|
659
667
|
if not output_type == "latent":
|
660
668
|
image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False, generator=generator)[
|
661
669
|
0
|
@@ -0,0 +1,49 @@
|
|
1
|
+
from typing import TYPE_CHECKING
|
2
|
+
|
3
|
+
from ...utils import (
|
4
|
+
DIFFUSERS_SLOW_IMPORT,
|
5
|
+
OptionalDependencyNotAvailable,
|
6
|
+
_LazyModule,
|
7
|
+
get_objects_from_module,
|
8
|
+
is_torch_available,
|
9
|
+
is_transformers_available,
|
10
|
+
)
|
11
|
+
|
12
|
+
|
13
|
+
_dummy_objects = {}
|
14
|
+
_additional_imports = {}
|
15
|
+
_import_structure = {"pipeline_output": ["CogView4PlusPipelineOutput"]}
|
16
|
+
|
17
|
+
try:
|
18
|
+
if not (is_transformers_available() and is_torch_available()):
|
19
|
+
raise OptionalDependencyNotAvailable()
|
20
|
+
except OptionalDependencyNotAvailable:
|
21
|
+
from ...utils import dummy_torch_and_transformers_objects # noqa F403
|
22
|
+
|
23
|
+
_dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
|
24
|
+
else:
|
25
|
+
_import_structure["pipeline_cogview4"] = ["CogView4Pipeline"]
|
26
|
+
_import_structure["pipeline_cogview4_control"] = ["CogView4ControlPipeline"]
|
27
|
+
if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
28
|
+
try:
|
29
|
+
if not (is_transformers_available() and is_torch_available()):
|
30
|
+
raise OptionalDependencyNotAvailable()
|
31
|
+
except OptionalDependencyNotAvailable:
|
32
|
+
from ...utils.dummy_torch_and_transformers_objects import * # noqa F403
|
33
|
+
else:
|
34
|
+
from .pipeline_cogview4 import CogView4Pipeline
|
35
|
+
from .pipeline_cogview4_control import CogView4ControlPipeline
|
36
|
+
else:
|
37
|
+
import sys
|
38
|
+
|
39
|
+
sys.modules[__name__] = _LazyModule(
|
40
|
+
__name__,
|
41
|
+
globals()["__file__"],
|
42
|
+
_import_structure,
|
43
|
+
module_spec=__spec__,
|
44
|
+
)
|
45
|
+
|
46
|
+
for name, value in _dummy_objects.items():
|
47
|
+
setattr(sys.modules[__name__], name, value)
|
48
|
+
for name, value in _additional_imports.items():
|
49
|
+
setattr(sys.modules[__name__], name, value)
|