diffusers 0.32.2__py3-none-any.whl → 0.33.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diffusers/__init__.py +186 -3
- diffusers/configuration_utils.py +40 -12
- diffusers/dependency_versions_table.py +9 -2
- diffusers/hooks/__init__.py +9 -0
- diffusers/hooks/faster_cache.py +653 -0
- diffusers/hooks/group_offloading.py +793 -0
- diffusers/hooks/hooks.py +236 -0
- diffusers/hooks/layerwise_casting.py +245 -0
- diffusers/hooks/pyramid_attention_broadcast.py +311 -0
- diffusers/loaders/__init__.py +6 -0
- diffusers/loaders/ip_adapter.py +38 -30
- diffusers/loaders/lora_base.py +121 -86
- diffusers/loaders/lora_conversion_utils.py +504 -44
- diffusers/loaders/lora_pipeline.py +1769 -181
- diffusers/loaders/peft.py +167 -57
- diffusers/loaders/single_file.py +17 -2
- diffusers/loaders/single_file_model.py +53 -5
- diffusers/loaders/single_file_utils.py +646 -72
- diffusers/loaders/textual_inversion.py +9 -9
- diffusers/loaders/transformer_flux.py +8 -9
- diffusers/loaders/transformer_sd3.py +120 -39
- diffusers/loaders/unet.py +20 -7
- diffusers/models/__init__.py +22 -0
- diffusers/models/activations.py +9 -9
- diffusers/models/attention.py +0 -1
- diffusers/models/attention_processor.py +163 -25
- diffusers/models/auto_model.py +169 -0
- diffusers/models/autoencoders/__init__.py +2 -0
- diffusers/models/autoencoders/autoencoder_asym_kl.py +2 -0
- diffusers/models/autoencoders/autoencoder_dc.py +106 -4
- diffusers/models/autoencoders/autoencoder_kl.py +0 -4
- diffusers/models/autoencoders/autoencoder_kl_allegro.py +5 -23
- diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +17 -55
- diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py +17 -97
- diffusers/models/autoencoders/autoencoder_kl_ltx.py +326 -107
- diffusers/models/autoencoders/autoencoder_kl_magvit.py +1094 -0
- diffusers/models/autoencoders/autoencoder_kl_mochi.py +21 -56
- diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +11 -42
- diffusers/models/autoencoders/autoencoder_kl_wan.py +855 -0
- diffusers/models/autoencoders/autoencoder_oobleck.py +1 -0
- diffusers/models/autoencoders/autoencoder_tiny.py +0 -4
- diffusers/models/autoencoders/consistency_decoder_vae.py +3 -1
- diffusers/models/autoencoders/vae.py +31 -141
- diffusers/models/autoencoders/vq_model.py +3 -0
- diffusers/models/cache_utils.py +108 -0
- diffusers/models/controlnets/__init__.py +1 -0
- diffusers/models/controlnets/controlnet.py +3 -8
- diffusers/models/controlnets/controlnet_flux.py +14 -42
- diffusers/models/controlnets/controlnet_sd3.py +58 -34
- diffusers/models/controlnets/controlnet_sparsectrl.py +4 -7
- diffusers/models/controlnets/controlnet_union.py +27 -18
- diffusers/models/controlnets/controlnet_xs.py +7 -46
- diffusers/models/controlnets/multicontrolnet_union.py +196 -0
- diffusers/models/embeddings.py +18 -7
- diffusers/models/model_loading_utils.py +122 -80
- diffusers/models/modeling_flax_pytorch_utils.py +1 -1
- diffusers/models/modeling_flax_utils.py +1 -1
- diffusers/models/modeling_pytorch_flax_utils.py +1 -1
- diffusers/models/modeling_utils.py +617 -272
- diffusers/models/normalization.py +67 -14
- diffusers/models/resnet.py +1 -1
- diffusers/models/transformers/__init__.py +6 -0
- diffusers/models/transformers/auraflow_transformer_2d.py +9 -35
- diffusers/models/transformers/cogvideox_transformer_3d.py +13 -24
- diffusers/models/transformers/consisid_transformer_3d.py +789 -0
- diffusers/models/transformers/dit_transformer_2d.py +5 -19
- diffusers/models/transformers/hunyuan_transformer_2d.py +4 -3
- diffusers/models/transformers/latte_transformer_3d.py +20 -15
- diffusers/models/transformers/lumina_nextdit2d.py +3 -1
- diffusers/models/transformers/pixart_transformer_2d.py +4 -19
- diffusers/models/transformers/prior_transformer.py +5 -1
- diffusers/models/transformers/sana_transformer.py +144 -40
- diffusers/models/transformers/stable_audio_transformer.py +5 -20
- diffusers/models/transformers/transformer_2d.py +7 -22
- diffusers/models/transformers/transformer_allegro.py +9 -17
- diffusers/models/transformers/transformer_cogview3plus.py +6 -17
- diffusers/models/transformers/transformer_cogview4.py +462 -0
- diffusers/models/transformers/transformer_easyanimate.py +527 -0
- diffusers/models/transformers/transformer_flux.py +68 -110
- diffusers/models/transformers/transformer_hunyuan_video.py +404 -46
- diffusers/models/transformers/transformer_ltx.py +53 -35
- diffusers/models/transformers/transformer_lumina2.py +548 -0
- diffusers/models/transformers/transformer_mochi.py +6 -17
- diffusers/models/transformers/transformer_omnigen.py +469 -0
- diffusers/models/transformers/transformer_sd3.py +56 -86
- diffusers/models/transformers/transformer_temporal.py +5 -11
- diffusers/models/transformers/transformer_wan.py +469 -0
- diffusers/models/unets/unet_1d.py +3 -1
- diffusers/models/unets/unet_2d.py +21 -20
- diffusers/models/unets/unet_2d_blocks.py +19 -243
- diffusers/models/unets/unet_2d_condition.py +4 -6
- diffusers/models/unets/unet_3d_blocks.py +14 -127
- diffusers/models/unets/unet_3d_condition.py +8 -12
- diffusers/models/unets/unet_i2vgen_xl.py +5 -13
- diffusers/models/unets/unet_kandinsky3.py +0 -4
- diffusers/models/unets/unet_motion_model.py +20 -114
- diffusers/models/unets/unet_spatio_temporal_condition.py +7 -8
- diffusers/models/unets/unet_stable_cascade.py +8 -35
- diffusers/models/unets/uvit_2d.py +1 -4
- diffusers/optimization.py +2 -2
- diffusers/pipelines/__init__.py +57 -8
- diffusers/pipelines/allegro/pipeline_allegro.py +22 -2
- diffusers/pipelines/amused/pipeline_amused.py +15 -2
- diffusers/pipelines/amused/pipeline_amused_img2img.py +15 -2
- diffusers/pipelines/amused/pipeline_amused_inpaint.py +15 -2
- diffusers/pipelines/animatediff/pipeline_animatediff.py +15 -2
- diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +15 -3
- diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +24 -4
- diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +15 -2
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +16 -4
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +16 -4
- diffusers/pipelines/audioldm/pipeline_audioldm.py +13 -2
- diffusers/pipelines/audioldm2/modeling_audioldm2.py +13 -68
- diffusers/pipelines/audioldm2/pipeline_audioldm2.py +39 -9
- diffusers/pipelines/aura_flow/pipeline_aura_flow.py +63 -7
- diffusers/pipelines/auto_pipeline.py +35 -14
- diffusers/pipelines/blip_diffusion/blip_image_processing.py +1 -1
- diffusers/pipelines/blip_diffusion/modeling_blip2.py +5 -8
- diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +12 -0
- diffusers/pipelines/cogvideo/pipeline_cogvideox.py +22 -6
- diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +22 -6
- diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +22 -5
- diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +22 -6
- diffusers/pipelines/cogview3/pipeline_cogview3plus.py +12 -4
- diffusers/pipelines/cogview4/__init__.py +49 -0
- diffusers/pipelines/cogview4/pipeline_cogview4.py +684 -0
- diffusers/pipelines/cogview4/pipeline_cogview4_control.py +732 -0
- diffusers/pipelines/cogview4/pipeline_output.py +21 -0
- diffusers/pipelines/consisid/__init__.py +49 -0
- diffusers/pipelines/consisid/consisid_utils.py +357 -0
- diffusers/pipelines/consisid/pipeline_consisid.py +974 -0
- diffusers/pipelines/consisid/pipeline_output.py +20 -0
- diffusers/pipelines/consistency_models/pipeline_consistency_models.py +11 -0
- diffusers/pipelines/controlnet/pipeline_controlnet.py +6 -5
- diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +13 -0
- diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +17 -5
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +31 -12
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +26 -7
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +20 -3
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +22 -3
- diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +26 -25
- diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +224 -109
- diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +25 -29
- diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +7 -4
- diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +3 -5
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +121 -10
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +122 -11
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +12 -1
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +20 -3
- diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py +14 -2
- diffusers/pipelines/ddim/pipeline_ddim.py +14 -1
- diffusers/pipelines/ddpm/pipeline_ddpm.py +15 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if.py +12 -0
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +12 -0
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +14 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +12 -0
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +14 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +14 -1
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +11 -7
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +11 -7
- diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +1 -1
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +10 -6
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_onnx_stable_diffusion_inpaint_legacy.py +2 -2
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +11 -7
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +1 -1
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +1 -1
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +1 -1
- diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +10 -105
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +1 -1
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +1 -1
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +1 -1
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +1 -1
- diffusers/pipelines/dit/pipeline_dit.py +15 -2
- diffusers/pipelines/easyanimate/__init__.py +52 -0
- diffusers/pipelines/easyanimate/pipeline_easyanimate.py +770 -0
- diffusers/pipelines/easyanimate/pipeline_easyanimate_control.py +994 -0
- diffusers/pipelines/easyanimate/pipeline_easyanimate_inpaint.py +1234 -0
- diffusers/pipelines/easyanimate/pipeline_output.py +20 -0
- diffusers/pipelines/flux/pipeline_flux.py +53 -21
- diffusers/pipelines/flux/pipeline_flux_control.py +9 -12
- diffusers/pipelines/flux/pipeline_flux_control_img2img.py +6 -10
- diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +8 -10
- diffusers/pipelines/flux/pipeline_flux_controlnet.py +185 -13
- diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +8 -10
- diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +16 -16
- diffusers/pipelines/flux/pipeline_flux_fill.py +107 -39
- diffusers/pipelines/flux/pipeline_flux_img2img.py +193 -15
- diffusers/pipelines/flux/pipeline_flux_inpaint.py +199 -19
- diffusers/pipelines/free_noise_utils.py +3 -3
- diffusers/pipelines/hunyuan_video/__init__.py +4 -0
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_skyreels_image2video.py +804 -0
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +90 -23
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_image2video.py +924 -0
- diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +3 -5
- diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +13 -1
- diffusers/pipelines/kandinsky/pipeline_kandinsky.py +12 -0
- diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +1 -1
- diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +12 -0
- diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +13 -1
- diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +12 -0
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +12 -1
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +13 -0
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +12 -0
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +12 -1
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +12 -1
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +12 -0
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +12 -0
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +12 -0
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +12 -0
- diffusers/pipelines/kolors/pipeline_kolors.py +10 -8
- diffusers/pipelines/kolors/pipeline_kolors_img2img.py +6 -4
- diffusers/pipelines/kolors/text_encoder.py +7 -34
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +12 -1
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +13 -1
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +14 -13
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion_superresolution.py +12 -1
- diffusers/pipelines/latte/pipeline_latte.py +36 -7
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +67 -13
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +60 -15
- diffusers/pipelines/ltx/__init__.py +2 -0
- diffusers/pipelines/ltx/pipeline_ltx.py +25 -13
- diffusers/pipelines/ltx/pipeline_ltx_condition.py +1194 -0
- diffusers/pipelines/ltx/pipeline_ltx_image2video.py +31 -17
- diffusers/pipelines/lumina/__init__.py +2 -2
- diffusers/pipelines/lumina/pipeline_lumina.py +83 -20
- diffusers/pipelines/lumina2/__init__.py +48 -0
- diffusers/pipelines/lumina2/pipeline_lumina2.py +790 -0
- diffusers/pipelines/marigold/__init__.py +2 -0
- diffusers/pipelines/marigold/marigold_image_processing.py +127 -14
- diffusers/pipelines/marigold/pipeline_marigold_depth.py +31 -16
- diffusers/pipelines/marigold/pipeline_marigold_intrinsics.py +721 -0
- diffusers/pipelines/marigold/pipeline_marigold_normals.py +31 -16
- diffusers/pipelines/mochi/pipeline_mochi.py +14 -18
- diffusers/pipelines/musicldm/pipeline_musicldm.py +16 -1
- diffusers/pipelines/omnigen/__init__.py +50 -0
- diffusers/pipelines/omnigen/pipeline_omnigen.py +512 -0
- diffusers/pipelines/omnigen/processor_omnigen.py +327 -0
- diffusers/pipelines/onnx_utils.py +5 -3
- diffusers/pipelines/pag/pag_utils.py +1 -1
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +12 -1
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +15 -4
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +20 -3
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +20 -3
- diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +1 -3
- diffusers/pipelines/pag/pipeline_pag_kolors.py +6 -4
- diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +16 -3
- diffusers/pipelines/pag/pipeline_pag_sana.py +65 -8
- diffusers/pipelines/pag/pipeline_pag_sd.py +23 -7
- diffusers/pipelines/pag/pipeline_pag_sd_3.py +3 -5
- diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py +3 -5
- diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +13 -1
- diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +23 -7
- diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py +26 -10
- diffusers/pipelines/pag/pipeline_pag_sd_xl.py +12 -4
- diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +7 -3
- diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +10 -6
- diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +13 -3
- diffusers/pipelines/pia/pipeline_pia.py +13 -1
- diffusers/pipelines/pipeline_flax_utils.py +7 -7
- diffusers/pipelines/pipeline_loading_utils.py +193 -83
- diffusers/pipelines/pipeline_utils.py +221 -106
- diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +17 -5
- diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +17 -4
- diffusers/pipelines/sana/__init__.py +2 -0
- diffusers/pipelines/sana/pipeline_sana.py +183 -58
- diffusers/pipelines/sana/pipeline_sana_sprint.py +889 -0
- diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +12 -2
- diffusers/pipelines/shap_e/pipeline_shap_e.py +12 -0
- diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +12 -0
- diffusers/pipelines/shap_e/renderer.py +6 -6
- diffusers/pipelines/stable_audio/pipeline_stable_audio.py +1 -1
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +15 -4
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +12 -8
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +12 -1
- diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +3 -2
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +14 -10
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_img2img.py +3 -3
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_inpaint.py +14 -10
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +2 -2
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py +4 -3
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +5 -4
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +2 -2
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +18 -13
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +30 -8
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +24 -10
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +28 -12
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +39 -18
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +17 -6
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +13 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +20 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +14 -2
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +13 -1
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +16 -17
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +136 -18
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +150 -21
- diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +15 -3
- diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +26 -11
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +15 -3
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +22 -4
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +30 -13
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +12 -4
- diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +15 -3
- diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +15 -3
- diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +26 -12
- diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +16 -4
- diffusers/pipelines/stable_diffusion_xl/pipeline_flax_stable_diffusion_xl.py +1 -1
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +12 -4
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +7 -3
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +10 -6
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +11 -4
- diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +13 -2
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +18 -4
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +26 -5
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +13 -1
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +13 -1
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +28 -6
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +26 -4
- diffusers/pipelines/transformers_loading_utils.py +121 -0
- diffusers/pipelines/unclip/pipeline_unclip.py +11 -1
- diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +11 -1
- diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +19 -2
- diffusers/pipelines/wan/__init__.py +51 -0
- diffusers/pipelines/wan/pipeline_output.py +20 -0
- diffusers/pipelines/wan/pipeline_wan.py +595 -0
- diffusers/pipelines/wan/pipeline_wan_i2v.py +724 -0
- diffusers/pipelines/wan/pipeline_wan_video2video.py +727 -0
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +7 -31
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +12 -1
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +12 -1
- diffusers/quantizers/auto.py +5 -1
- diffusers/quantizers/base.py +5 -9
- diffusers/quantizers/bitsandbytes/bnb_quantizer.py +41 -29
- diffusers/quantizers/bitsandbytes/utils.py +30 -20
- diffusers/quantizers/gguf/gguf_quantizer.py +1 -0
- diffusers/quantizers/gguf/utils.py +4 -2
- diffusers/quantizers/quantization_config.py +59 -4
- diffusers/quantizers/quanto/__init__.py +1 -0
- diffusers/quantizers/quanto/quanto_quantizer.py +177 -0
- diffusers/quantizers/quanto/utils.py +60 -0
- diffusers/quantizers/torchao/__init__.py +1 -1
- diffusers/quantizers/torchao/torchao_quantizer.py +47 -2
- diffusers/schedulers/__init__.py +2 -1
- diffusers/schedulers/scheduling_consistency_models.py +1 -2
- diffusers/schedulers/scheduling_ddim_inverse.py +1 -1
- diffusers/schedulers/scheduling_ddpm.py +2 -3
- diffusers/schedulers/scheduling_ddpm_parallel.py +1 -2
- diffusers/schedulers/scheduling_dpmsolver_multistep.py +12 -4
- diffusers/schedulers/scheduling_edm_euler.py +45 -10
- diffusers/schedulers/scheduling_flow_match_euler_discrete.py +116 -28
- diffusers/schedulers/scheduling_flow_match_heun_discrete.py +7 -6
- diffusers/schedulers/scheduling_heun_discrete.py +1 -1
- diffusers/schedulers/scheduling_lcm.py +1 -2
- diffusers/schedulers/scheduling_lms_discrete.py +1 -1
- diffusers/schedulers/scheduling_repaint.py +5 -1
- diffusers/schedulers/scheduling_scm.py +265 -0
- diffusers/schedulers/scheduling_tcd.py +1 -2
- diffusers/schedulers/scheduling_utils.py +2 -1
- diffusers/training_utils.py +14 -7
- diffusers/utils/__init__.py +9 -1
- diffusers/utils/constants.py +13 -1
- diffusers/utils/deprecation_utils.py +1 -1
- diffusers/utils/dummy_bitsandbytes_objects.py +17 -0
- diffusers/utils/dummy_gguf_objects.py +17 -0
- diffusers/utils/dummy_optimum_quanto_objects.py +17 -0
- diffusers/utils/dummy_pt_objects.py +233 -0
- diffusers/utils/dummy_torch_and_transformers_and_opencv_objects.py +17 -0
- diffusers/utils/dummy_torch_and_transformers_objects.py +270 -0
- diffusers/utils/dummy_torchao_objects.py +17 -0
- diffusers/utils/dynamic_modules_utils.py +1 -1
- diffusers/utils/export_utils.py +28 -3
- diffusers/utils/hub_utils.py +52 -102
- diffusers/utils/import_utils.py +121 -221
- diffusers/utils/loading_utils.py +2 -1
- diffusers/utils/logging.py +1 -2
- diffusers/utils/peft_utils.py +6 -14
- diffusers/utils/remote_utils.py +425 -0
- diffusers/utils/source_code_parsing_utils.py +52 -0
- diffusers/utils/state_dict_utils.py +15 -1
- diffusers/utils/testing_utils.py +243 -13
- diffusers/utils/torch_utils.py +10 -0
- diffusers/utils/typing_utils.py +91 -0
- diffusers/video_processor.py +1 -1
- {diffusers-0.32.2.dist-info → diffusers-0.33.1.dist-info}/METADATA +21 -4
- diffusers-0.33.1.dist-info/RECORD +608 -0
- {diffusers-0.32.2.dist-info → diffusers-0.33.1.dist-info}/WHEEL +1 -1
- diffusers-0.32.2.dist-info/RECORD +0 -550
- {diffusers-0.32.2.dist-info → diffusers-0.33.1.dist-info}/LICENSE +0 -0
- {diffusers-0.32.2.dist-info → diffusers-0.33.1.dist-info}/entry_points.txt +0 -0
- {diffusers-0.32.2.dist-info → diffusers-0.33.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,20 @@
|
|
1
|
+
from dataclasses import dataclass
|
2
|
+
|
3
|
+
import torch
|
4
|
+
|
5
|
+
from diffusers.utils import BaseOutput
|
6
|
+
|
7
|
+
|
8
|
+
@dataclass
|
9
|
+
class ConsisIDPipelineOutput(BaseOutput):
|
10
|
+
r"""
|
11
|
+
Output class for ConsisID pipelines.
|
12
|
+
|
13
|
+
Args:
|
14
|
+
frames (`torch.Tensor`, `np.ndarray`, or List[List[PIL.Image.Image]]):
|
15
|
+
List of video outputs - It can be a nested list of length `batch_size,` with each sub-list containing
|
16
|
+
denoised PIL image sequences of length `num_frames.` It can also be a NumPy array or Torch tensor of shape
|
17
|
+
`(batch_size, num_frames, channels, height, width)`.
|
18
|
+
"""
|
19
|
+
|
20
|
+
frames: torch.Tensor
|
@@ -19,6 +19,7 @@ import torch
|
|
19
19
|
from ...models import UNet2DModel
|
20
20
|
from ...schedulers import CMStochasticIterativeScheduler
|
21
21
|
from ...utils import (
|
22
|
+
is_torch_xla_available,
|
22
23
|
logging,
|
23
24
|
replace_example_docstring,
|
24
25
|
)
|
@@ -26,6 +27,13 @@ from ...utils.torch_utils import randn_tensor
|
|
26
27
|
from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
|
27
28
|
|
28
29
|
|
30
|
+
if is_torch_xla_available():
|
31
|
+
import torch_xla.core.xla_model as xm
|
32
|
+
|
33
|
+
XLA_AVAILABLE = True
|
34
|
+
else:
|
35
|
+
XLA_AVAILABLE = False
|
36
|
+
|
29
37
|
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
30
38
|
|
31
39
|
|
@@ -263,6 +271,9 @@ class ConsistencyModelPipeline(DiffusionPipeline):
|
|
263
271
|
if callback is not None and i % callback_steps == 0:
|
264
272
|
callback(i, t, sample)
|
265
273
|
|
274
|
+
if XLA_AVAILABLE:
|
275
|
+
xm.mark_step()
|
276
|
+
|
266
277
|
# 6. Post-process image sample
|
267
278
|
image = self.postprocess_image(sample, output_type=output_type)
|
268
279
|
|
@@ -80,7 +80,7 @@ EXAMPLE_DOC_STRING = """
|
|
80
80
|
>>> # load control net and stable diffusion v1-5
|
81
81
|
>>> controlnet = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-canny", torch_dtype=torch.float16)
|
82
82
|
>>> pipe = StableDiffusionControlNetPipeline.from_pretrained(
|
83
|
-
... "
|
83
|
+
... "stable-diffusion-v1-5/stable-diffusion-v1-5", controlnet=controlnet, torch_dtype=torch.float16
|
84
84
|
... )
|
85
85
|
|
86
86
|
>>> # speed up diffusion process with faster scheduler and memory optimization
|
@@ -198,8 +198,8 @@ class StableDiffusionControlNetPipeline(
|
|
198
198
|
[`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`].
|
199
199
|
safety_checker ([`StableDiffusionSafetyChecker`]):
|
200
200
|
Classification module that estimates whether generated images could be considered offensive or harmful.
|
201
|
-
Please refer to the [model card](https://huggingface.co/
|
202
|
-
about a model's potential harms.
|
201
|
+
Please refer to the [model card](https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5) for
|
202
|
+
more details about a model's potential harms.
|
203
203
|
feature_extractor ([`~transformers.CLIPImageProcessor`]):
|
204
204
|
A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
|
205
205
|
"""
|
@@ -207,7 +207,7 @@ class StableDiffusionControlNetPipeline(
|
|
207
207
|
model_cpu_offload_seq = "text_encoder->image_encoder->unet->vae"
|
208
208
|
_optional_components = ["safety_checker", "feature_extractor", "image_encoder"]
|
209
209
|
_exclude_from_cpu_offload = ["safety_checker"]
|
210
|
-
_callback_tensor_inputs = ["latents", "prompt_embeds", "negative_prompt_embeds"]
|
210
|
+
_callback_tensor_inputs = ["latents", "prompt_embeds", "negative_prompt_embeds", "image"]
|
211
211
|
|
212
212
|
def __init__(
|
213
213
|
self,
|
@@ -254,7 +254,7 @@ class StableDiffusionControlNetPipeline(
|
|
254
254
|
feature_extractor=feature_extractor,
|
255
255
|
image_encoder=image_encoder,
|
256
256
|
)
|
257
|
-
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
257
|
+
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
|
258
258
|
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor, do_convert_rgb=True)
|
259
259
|
self.control_image_processor = VaeImageProcessor(
|
260
260
|
vae_scale_factor=self.vae_scale_factor, do_convert_rgb=True, do_normalize=False
|
@@ -1323,6 +1323,7 @@ class StableDiffusionControlNetPipeline(
|
|
1323
1323
|
latents = callback_outputs.pop("latents", latents)
|
1324
1324
|
prompt_embeds = callback_outputs.pop("prompt_embeds", prompt_embeds)
|
1325
1325
|
negative_prompt_embeds = callback_outputs.pop("negative_prompt_embeds", negative_prompt_embeds)
|
1326
|
+
image = callback_outputs.pop("image", image)
|
1326
1327
|
|
1327
1328
|
# call the callback, if provided
|
1328
1329
|
if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
|
@@ -21,6 +21,7 @@ from transformers import CLIPTokenizer
|
|
21
21
|
from ...models import AutoencoderKL, ControlNetModel, UNet2DConditionModel
|
22
22
|
from ...schedulers import PNDMScheduler
|
23
23
|
from ...utils import (
|
24
|
+
is_torch_xla_available,
|
24
25
|
logging,
|
25
26
|
replace_example_docstring,
|
26
27
|
)
|
@@ -31,8 +32,16 @@ from ..blip_diffusion.modeling_ctx_clip import ContextCLIPTextModel
|
|
31
32
|
from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
|
32
33
|
|
33
34
|
|
35
|
+
if is_torch_xla_available():
|
36
|
+
import torch_xla.core.xla_model as xm
|
37
|
+
|
38
|
+
XLA_AVAILABLE = True
|
39
|
+
else:
|
40
|
+
XLA_AVAILABLE = False
|
41
|
+
|
34
42
|
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
35
43
|
|
44
|
+
|
36
45
|
EXAMPLE_DOC_STRING = """
|
37
46
|
Examples:
|
38
47
|
```py
|
@@ -401,6 +410,10 @@ class BlipDiffusionControlNetPipeline(DiffusionPipeline):
|
|
401
410
|
t,
|
402
411
|
latents,
|
403
412
|
)["prev_sample"]
|
413
|
+
|
414
|
+
if XLA_AVAILABLE:
|
415
|
+
xm.mark_step()
|
416
|
+
|
404
417
|
image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False)[0]
|
405
418
|
image = self.image_processor.postprocess(image, output_type=output_type)
|
406
419
|
|
@@ -30,6 +30,7 @@ from ...schedulers import KarrasDiffusionSchedulers
|
|
30
30
|
from ...utils import (
|
31
31
|
USE_PEFT_BACKEND,
|
32
32
|
deprecate,
|
33
|
+
is_torch_xla_available,
|
33
34
|
logging,
|
34
35
|
replace_example_docstring,
|
35
36
|
scale_lora_layers,
|
@@ -41,6 +42,13 @@ from ..stable_diffusion import StableDiffusionPipelineOutput
|
|
41
42
|
from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
|
42
43
|
|
43
44
|
|
45
|
+
if is_torch_xla_available():
|
46
|
+
import torch_xla.core.xla_model as xm
|
47
|
+
|
48
|
+
XLA_AVAILABLE = True
|
49
|
+
else:
|
50
|
+
XLA_AVAILABLE = False
|
51
|
+
|
44
52
|
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
45
53
|
|
46
54
|
|
@@ -71,7 +79,7 @@ EXAMPLE_DOC_STRING = """
|
|
71
79
|
>>> # load control net and stable diffusion v1-5
|
72
80
|
>>> controlnet = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-canny", torch_dtype=torch.float16)
|
73
81
|
>>> pipe = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
|
74
|
-
... "
|
82
|
+
... "stable-diffusion-v1-5/stable-diffusion-v1-5", controlnet=controlnet, torch_dtype=torch.float16
|
75
83
|
... )
|
76
84
|
|
77
85
|
>>> # speed up diffusion process with faster scheduler and memory optimization
|
@@ -168,8 +176,8 @@ class StableDiffusionControlNetImg2ImgPipeline(
|
|
168
176
|
[`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`].
|
169
177
|
safety_checker ([`StableDiffusionSafetyChecker`]):
|
170
178
|
Classification module that estimates whether generated images could be considered offensive or harmful.
|
171
|
-
Please refer to the [model card](https://huggingface.co/
|
172
|
-
about a model's potential harms.
|
179
|
+
Please refer to the [model card](https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5) for
|
180
|
+
more details about a model's potential harms.
|
173
181
|
feature_extractor ([`~transformers.CLIPImageProcessor`]):
|
174
182
|
A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
|
175
183
|
"""
|
@@ -177,7 +185,7 @@ class StableDiffusionControlNetImg2ImgPipeline(
|
|
177
185
|
model_cpu_offload_seq = "text_encoder->unet->vae"
|
178
186
|
_optional_components = ["safety_checker", "feature_extractor", "image_encoder"]
|
179
187
|
_exclude_from_cpu_offload = ["safety_checker"]
|
180
|
-
_callback_tensor_inputs = ["latents", "prompt_embeds", "negative_prompt_embeds"]
|
188
|
+
_callback_tensor_inputs = ["latents", "prompt_embeds", "negative_prompt_embeds", "control_image"]
|
181
189
|
|
182
190
|
def __init__(
|
183
191
|
self,
|
@@ -224,7 +232,7 @@ class StableDiffusionControlNetImg2ImgPipeline(
|
|
224
232
|
feature_extractor=feature_extractor,
|
225
233
|
image_encoder=image_encoder,
|
226
234
|
)
|
227
|
-
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
235
|
+
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
|
228
236
|
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor, do_convert_rgb=True)
|
229
237
|
self.control_image_processor = VaeImageProcessor(
|
230
238
|
vae_scale_factor=self.vae_scale_factor, do_convert_rgb=True, do_normalize=False
|
@@ -1286,6 +1294,7 @@ class StableDiffusionControlNetImg2ImgPipeline(
|
|
1286
1294
|
latents = callback_outputs.pop("latents", latents)
|
1287
1295
|
prompt_embeds = callback_outputs.pop("prompt_embeds", prompt_embeds)
|
1288
1296
|
negative_prompt_embeds = callback_outputs.pop("negative_prompt_embeds", negative_prompt_embeds)
|
1297
|
+
control_image = callback_outputs.pop("control_image", control_image)
|
1289
1298
|
|
1290
1299
|
# call the callback, if provided
|
1291
1300
|
if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
|
@@ -1294,6 +1303,9 @@ class StableDiffusionControlNetImg2ImgPipeline(
|
|
1294
1303
|
step_idx = i // getattr(self.scheduler, "order", 1)
|
1295
1304
|
callback(step_idx, t, latents)
|
1296
1305
|
|
1306
|
+
if XLA_AVAILABLE:
|
1307
|
+
xm.mark_step()
|
1308
|
+
|
1297
1309
|
# If we do sequential model offloading, let's offload unet and controlnet
|
1298
1310
|
# manually for max memory savings
|
1299
1311
|
if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None:
|
@@ -32,6 +32,7 @@ from ...schedulers import KarrasDiffusionSchedulers
|
|
32
32
|
from ...utils import (
|
33
33
|
USE_PEFT_BACKEND,
|
34
34
|
deprecate,
|
35
|
+
is_torch_xla_available,
|
35
36
|
logging,
|
36
37
|
replace_example_docstring,
|
37
38
|
scale_lora_layers,
|
@@ -43,6 +44,13 @@ from ..stable_diffusion import StableDiffusionPipelineOutput
|
|
43
44
|
from ..stable_diffusion.safety_checker import StableDiffusionSafetyChecker
|
44
45
|
|
45
46
|
|
47
|
+
if is_torch_xla_available():
|
48
|
+
import torch_xla.core.xla_model as xm
|
49
|
+
|
50
|
+
XLA_AVAILABLE = True
|
51
|
+
else:
|
52
|
+
XLA_AVAILABLE = False
|
53
|
+
|
46
54
|
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
47
55
|
|
48
56
|
|
@@ -83,7 +91,7 @@ EXAMPLE_DOC_STRING = """
|
|
83
91
|
... "lllyasviel/control_v11p_sd15_inpaint", torch_dtype=torch.float16
|
84
92
|
... )
|
85
93
|
>>> pipe = StableDiffusionControlNetInpaintPipeline.from_pretrained(
|
86
|
-
... "
|
94
|
+
... "stable-diffusion-v1-5/stable-diffusion-v1-5", controlnet=controlnet, torch_dtype=torch.float16
|
87
95
|
... )
|
88
96
|
|
89
97
|
>>> pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config)
|
@@ -141,11 +149,11 @@ class StableDiffusionControlNetInpaintPipeline(
|
|
141
149
|
<Tip>
|
142
150
|
|
143
151
|
This pipeline can be used with checkpoints that have been specifically fine-tuned for inpainting
|
144
|
-
([
|
145
|
-
default text-to-image Stable Diffusion checkpoints
|
146
|
-
([
|
147
|
-
Stable Diffusion checkpoints might be preferable for ControlNets that have been fine-tuned on
|
148
|
-
[lllyasviel/control_v11p_sd15_inpaint](https://huggingface.co/lllyasviel/control_v11p_sd15_inpaint).
|
152
|
+
([stable-diffusion-v1-5/stable-diffusion-inpainting](https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-inpainting))
|
153
|
+
as well as default text-to-image Stable Diffusion checkpoints
|
154
|
+
([stable-diffusion-v1-5/stable-diffusion-v1-5](https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5)).
|
155
|
+
Default text-to-image Stable Diffusion checkpoints might be preferable for ControlNets that have been fine-tuned on
|
156
|
+
those, such as [lllyasviel/control_v11p_sd15_inpaint](https://huggingface.co/lllyasviel/control_v11p_sd15_inpaint).
|
149
157
|
|
150
158
|
</Tip>
|
151
159
|
|
@@ -167,8 +175,8 @@ class StableDiffusionControlNetInpaintPipeline(
|
|
167
175
|
[`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`].
|
168
176
|
safety_checker ([`StableDiffusionSafetyChecker`]):
|
169
177
|
Classification module that estimates whether generated images could be considered offensive or harmful.
|
170
|
-
Please refer to the [model card](https://huggingface.co/
|
171
|
-
about a model's potential harms.
|
178
|
+
Please refer to the [model card](https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5) for
|
179
|
+
more details about a model's potential harms.
|
172
180
|
feature_extractor ([`~transformers.CLIPImageProcessor`]):
|
173
181
|
A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
|
174
182
|
"""
|
@@ -176,7 +184,14 @@ class StableDiffusionControlNetInpaintPipeline(
|
|
176
184
|
model_cpu_offload_seq = "text_encoder->image_encoder->unet->vae"
|
177
185
|
_optional_components = ["safety_checker", "feature_extractor", "image_encoder"]
|
178
186
|
_exclude_from_cpu_offload = ["safety_checker"]
|
179
|
-
_callback_tensor_inputs = [
|
187
|
+
_callback_tensor_inputs = [
|
188
|
+
"latents",
|
189
|
+
"prompt_embeds",
|
190
|
+
"negative_prompt_embeds",
|
191
|
+
"control_image",
|
192
|
+
"mask",
|
193
|
+
"masked_image_latents",
|
194
|
+
]
|
180
195
|
|
181
196
|
def __init__(
|
182
197
|
self,
|
@@ -223,7 +238,7 @@ class StableDiffusionControlNetInpaintPipeline(
|
|
223
238
|
feature_extractor=feature_extractor,
|
224
239
|
image_encoder=image_encoder,
|
225
240
|
)
|
226
|
-
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
241
|
+
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
|
227
242
|
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
|
228
243
|
self.mask_processor = VaeImageProcessor(
|
229
244
|
vae_scale_factor=self.vae_scale_factor, do_normalize=False, do_binarize=True, do_convert_grayscale=True
|
@@ -642,7 +657,7 @@ class StableDiffusionControlNetInpaintPipeline(
|
|
642
657
|
if padding_mask_crop is not None:
|
643
658
|
if not isinstance(image, PIL.Image.Image):
|
644
659
|
raise ValueError(
|
645
|
-
f"The image should be a PIL image when inpainting mask crop, but is of type
|
660
|
+
f"The image should be a PIL image when inpainting mask crop, but is of type {type(image)}."
|
646
661
|
)
|
647
662
|
if not isinstance(mask_image, PIL.Image.Image):
|
648
663
|
raise ValueError(
|
@@ -650,7 +665,7 @@ class StableDiffusionControlNetInpaintPipeline(
|
|
650
665
|
f" {type(mask_image)}."
|
651
666
|
)
|
652
667
|
if output_type != "pil":
|
653
|
-
raise ValueError(f"The output type should be PIL when inpainting mask crop, but is
|
668
|
+
raise ValueError(f"The output type should be PIL when inpainting mask crop, but is {output_type}.")
|
654
669
|
|
655
670
|
# `prompt` needs more sophisticated handling when there are multiple
|
656
671
|
# conditionings.
|
@@ -1468,6 +1483,7 @@ class StableDiffusionControlNetInpaintPipeline(
|
|
1468
1483
|
latents = callback_outputs.pop("latents", latents)
|
1469
1484
|
prompt_embeds = callback_outputs.pop("prompt_embeds", prompt_embeds)
|
1470
1485
|
negative_prompt_embeds = callback_outputs.pop("negative_prompt_embeds", negative_prompt_embeds)
|
1486
|
+
control_image = callback_outputs.pop("control_image", control_image)
|
1471
1487
|
|
1472
1488
|
# call the callback, if provided
|
1473
1489
|
if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
|
@@ -1476,6 +1492,9 @@ class StableDiffusionControlNetInpaintPipeline(
|
|
1476
1492
|
step_idx = i // getattr(self.scheduler, "order", 1)
|
1477
1493
|
callback(step_idx, t, latents)
|
1478
1494
|
|
1495
|
+
if XLA_AVAILABLE:
|
1496
|
+
xm.mark_step()
|
1497
|
+
|
1479
1498
|
# If we do sequential model offloading, let's offload unet and controlnet
|
1480
1499
|
# manually for max memory savings
|
1481
1500
|
if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None:
|
@@ -60,6 +60,16 @@ if is_invisible_watermark_available():
|
|
60
60
|
from diffusers.pipelines.stable_diffusion_xl.watermark import StableDiffusionXLWatermarker
|
61
61
|
|
62
62
|
|
63
|
+
from ...utils import is_torch_xla_available
|
64
|
+
|
65
|
+
|
66
|
+
if is_torch_xla_available():
|
67
|
+
import torch_xla.core.xla_model as xm
|
68
|
+
|
69
|
+
XLA_AVAILABLE = True
|
70
|
+
else:
|
71
|
+
XLA_AVAILABLE = False
|
72
|
+
|
63
73
|
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
64
74
|
|
65
75
|
|
@@ -227,6 +237,7 @@ class StableDiffusionXLControlNetInpaintPipeline(
|
|
227
237
|
"add_neg_time_ids",
|
228
238
|
"mask",
|
229
239
|
"masked_image_latents",
|
240
|
+
"control_image",
|
230
241
|
]
|
231
242
|
|
232
243
|
def __init__(
|
@@ -264,7 +275,7 @@ class StableDiffusionXLControlNetInpaintPipeline(
|
|
264
275
|
)
|
265
276
|
self.register_to_config(force_zeros_for_empty_prompt=force_zeros_for_empty_prompt)
|
266
277
|
self.register_to_config(requires_aesthetics_score=requires_aesthetics_score)
|
267
|
-
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
278
|
+
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
|
268
279
|
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
|
269
280
|
self.mask_processor = VaeImageProcessor(
|
270
281
|
vae_scale_factor=self.vae_scale_factor, do_normalize=False, do_binarize=True, do_convert_grayscale=True
|
@@ -406,7 +417,9 @@ class StableDiffusionXLControlNetInpaintPipeline(
|
|
406
417
|
prompt_embeds = text_encoder(text_input_ids.to(device), output_hidden_states=True)
|
407
418
|
|
408
419
|
# We are only ALWAYS interested in the pooled output of the final text encoder
|
409
|
-
pooled_prompt_embeds
|
420
|
+
if pooled_prompt_embeds is None and prompt_embeds[0].ndim == 2:
|
421
|
+
pooled_prompt_embeds = prompt_embeds[0]
|
422
|
+
|
410
423
|
if clip_skip is None:
|
411
424
|
prompt_embeds = prompt_embeds.hidden_states[-2]
|
412
425
|
else:
|
@@ -465,8 +478,10 @@ class StableDiffusionXLControlNetInpaintPipeline(
|
|
465
478
|
uncond_input.input_ids.to(device),
|
466
479
|
output_hidden_states=True,
|
467
480
|
)
|
481
|
+
|
468
482
|
# We are only ALWAYS interested in the pooled output of the final text encoder
|
469
|
-
negative_pooled_prompt_embeds
|
483
|
+
if negative_pooled_prompt_embeds is None and negative_prompt_embeds[0].ndim == 2:
|
484
|
+
negative_pooled_prompt_embeds = negative_prompt_embeds[0]
|
470
485
|
negative_prompt_embeds = negative_prompt_embeds.hidden_states[-2]
|
471
486
|
|
472
487
|
negative_prompt_embeds_list.append(negative_prompt_embeds)
|
@@ -729,7 +744,7 @@ class StableDiffusionXLControlNetInpaintPipeline(
|
|
729
744
|
if padding_mask_crop is not None:
|
730
745
|
if not isinstance(image, PIL.Image.Image):
|
731
746
|
raise ValueError(
|
732
|
-
f"The image should be a PIL image when inpainting mask crop, but is of type
|
747
|
+
f"The image should be a PIL image when inpainting mask crop, but is of type {type(image)}."
|
733
748
|
)
|
734
749
|
if not isinstance(mask_image, PIL.Image.Image):
|
735
750
|
raise ValueError(
|
@@ -737,7 +752,7 @@ class StableDiffusionXLControlNetInpaintPipeline(
|
|
737
752
|
f" {type(mask_image)}."
|
738
753
|
)
|
739
754
|
if output_type != "pil":
|
740
|
-
raise ValueError(f"The output type should be PIL when inpainting mask crop, but is
|
755
|
+
raise ValueError(f"The output type should be PIL when inpainting mask crop, but is {output_type}.")
|
741
756
|
|
742
757
|
if prompt_embeds is not None and pooled_prompt_embeds is None:
|
743
758
|
raise ValueError(
|
@@ -1622,7 +1637,7 @@ class StableDiffusionXLControlNetInpaintPipeline(
|
|
1622
1637
|
|
1623
1638
|
# 8. Check that sizes of mask, masked image and latents match
|
1624
1639
|
if num_channels_unet == 9:
|
1625
|
-
# default case for
|
1640
|
+
# default case for stable-diffusion-v1-5/stable-diffusion-inpainting
|
1626
1641
|
num_channels_mask = mask.shape[1]
|
1627
1642
|
num_channels_masked_image = masked_image_latents.shape[1]
|
1628
1643
|
if num_channels_latents + num_channels_mask + num_channels_masked_image != self.unet.config.in_channels:
|
@@ -1630,7 +1645,7 @@ class StableDiffusionXLControlNetInpaintPipeline(
|
|
1630
1645
|
f"Incorrect configuration settings! The config of `pipeline.unet`: {self.unet.config} expects"
|
1631
1646
|
f" {self.unet.config.in_channels} but received `num_channels_latents`: {num_channels_latents} +"
|
1632
1647
|
f" `num_channels_mask`: {num_channels_mask} + `num_channels_masked_image`: {num_channels_masked_image}"
|
1633
|
-
f" = {num_channels_latents+num_channels_masked_image+num_channels_mask}. Please verify the config of"
|
1648
|
+
f" = {num_channels_latents + num_channels_masked_image + num_channels_mask}. Please verify the config of"
|
1634
1649
|
" `pipeline.unet` or your `mask_image` or `image` input."
|
1635
1650
|
)
|
1636
1651
|
elif num_channels_unet != 4:
|
@@ -1821,6 +1836,7 @@ class StableDiffusionXLControlNetInpaintPipeline(
|
|
1821
1836
|
latents = callback_outputs.pop("latents", latents)
|
1822
1837
|
prompt_embeds = callback_outputs.pop("prompt_embeds", prompt_embeds)
|
1823
1838
|
negative_prompt_embeds = callback_outputs.pop("negative_prompt_embeds", negative_prompt_embeds)
|
1839
|
+
control_image = callback_outputs.pop("control_image", control_image)
|
1824
1840
|
|
1825
1841
|
# call the callback, if provided
|
1826
1842
|
if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
|
@@ -1829,6 +1845,9 @@ class StableDiffusionXLControlNetInpaintPipeline(
|
|
1829
1845
|
step_idx = i // getattr(self.scheduler, "order", 1)
|
1830
1846
|
callback(step_idx, t, latents)
|
1831
1847
|
|
1848
|
+
if XLA_AVAILABLE:
|
1849
|
+
xm.mark_step()
|
1850
|
+
|
1832
1851
|
# make sure the VAE is in float32 mode, as it overflows in float16
|
1833
1852
|
if self.vae.dtype == torch.float16 and self.vae.config.force_upcast:
|
1834
1853
|
self.upcast_vae()
|
@@ -62,6 +62,16 @@ if is_invisible_watermark_available():
|
|
62
62
|
from ..stable_diffusion_xl.watermark import StableDiffusionXLWatermarker
|
63
63
|
|
64
64
|
|
65
|
+
from ...utils import is_torch_xla_available
|
66
|
+
|
67
|
+
|
68
|
+
if is_torch_xla_available():
|
69
|
+
import torch_xla.core.xla_model as xm
|
70
|
+
|
71
|
+
XLA_AVAILABLE = True
|
72
|
+
else:
|
73
|
+
XLA_AVAILABLE = False
|
74
|
+
|
65
75
|
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
66
76
|
|
67
77
|
|
@@ -275,7 +285,7 @@ class StableDiffusionXLControlNetPipeline(
|
|
275
285
|
feature_extractor=feature_extractor,
|
276
286
|
image_encoder=image_encoder,
|
277
287
|
)
|
278
|
-
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
288
|
+
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
|
279
289
|
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor, do_convert_rgb=True)
|
280
290
|
self.control_image_processor = VaeImageProcessor(
|
281
291
|
vae_scale_factor=self.vae_scale_factor, do_convert_rgb=True, do_normalize=False
|
@@ -415,7 +425,9 @@ class StableDiffusionXLControlNetPipeline(
|
|
415
425
|
prompt_embeds = text_encoder(text_input_ids.to(device), output_hidden_states=True)
|
416
426
|
|
417
427
|
# We are only ALWAYS interested in the pooled output of the final text encoder
|
418
|
-
pooled_prompt_embeds
|
428
|
+
if pooled_prompt_embeds is None and prompt_embeds[0].ndim == 2:
|
429
|
+
pooled_prompt_embeds = prompt_embeds[0]
|
430
|
+
|
419
431
|
if clip_skip is None:
|
420
432
|
prompt_embeds = prompt_embeds.hidden_states[-2]
|
421
433
|
else:
|
@@ -474,8 +486,10 @@ class StableDiffusionXLControlNetPipeline(
|
|
474
486
|
uncond_input.input_ids.to(device),
|
475
487
|
output_hidden_states=True,
|
476
488
|
)
|
489
|
+
|
477
490
|
# We are only ALWAYS interested in the pooled output of the final text encoder
|
478
|
-
negative_pooled_prompt_embeds
|
491
|
+
if negative_pooled_prompt_embeds is None and negative_prompt_embeds[0].ndim == 2:
|
492
|
+
negative_pooled_prompt_embeds = negative_prompt_embeds[0]
|
479
493
|
negative_prompt_embeds = negative_prompt_embeds.hidden_states[-2]
|
480
494
|
|
481
495
|
negative_prompt_embeds_list.append(negative_prompt_embeds)
|
@@ -1548,6 +1562,9 @@ class StableDiffusionXLControlNetPipeline(
|
|
1548
1562
|
step_idx = i // getattr(self.scheduler, "order", 1)
|
1549
1563
|
callback(step_idx, t, latents)
|
1550
1564
|
|
1565
|
+
if XLA_AVAILABLE:
|
1566
|
+
xm.mark_step()
|
1567
|
+
|
1551
1568
|
if not output_type == "latent":
|
1552
1569
|
# make sure the VAE is in float32 mode, as it overflows in float16
|
1553
1570
|
needs_upcasting = self.vae.dtype == torch.float16 and self.vae.config.force_upcast
|
@@ -62,6 +62,16 @@ if is_invisible_watermark_available():
|
|
62
62
|
from ..stable_diffusion_xl.watermark import StableDiffusionXLWatermarker
|
63
63
|
|
64
64
|
|
65
|
+
from ...utils import is_torch_xla_available
|
66
|
+
|
67
|
+
|
68
|
+
if is_torch_xla_available():
|
69
|
+
import torch_xla.core.xla_model as xm
|
70
|
+
|
71
|
+
XLA_AVAILABLE = True
|
72
|
+
else:
|
73
|
+
XLA_AVAILABLE = False
|
74
|
+
|
65
75
|
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
66
76
|
|
67
77
|
|
@@ -232,6 +242,7 @@ class StableDiffusionXLControlNetImg2ImgPipeline(
|
|
232
242
|
"add_time_ids",
|
233
243
|
"negative_pooled_prompt_embeds",
|
234
244
|
"add_neg_time_ids",
|
245
|
+
"control_image",
|
235
246
|
]
|
236
247
|
|
237
248
|
def __init__(
|
@@ -267,7 +278,7 @@ class StableDiffusionXLControlNetImg2ImgPipeline(
|
|
267
278
|
feature_extractor=feature_extractor,
|
268
279
|
image_encoder=image_encoder,
|
269
280
|
)
|
270
|
-
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
281
|
+
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
|
271
282
|
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor, do_convert_rgb=True)
|
272
283
|
self.control_image_processor = VaeImageProcessor(
|
273
284
|
vae_scale_factor=self.vae_scale_factor, do_convert_rgb=True, do_normalize=False
|
@@ -408,7 +419,9 @@ class StableDiffusionXLControlNetImg2ImgPipeline(
|
|
408
419
|
prompt_embeds = text_encoder(text_input_ids.to(device), output_hidden_states=True)
|
409
420
|
|
410
421
|
# We are only ALWAYS interested in the pooled output of the final text encoder
|
411
|
-
pooled_prompt_embeds
|
422
|
+
if pooled_prompt_embeds is None and prompt_embeds[0].ndim == 2:
|
423
|
+
pooled_prompt_embeds = prompt_embeds[0]
|
424
|
+
|
412
425
|
if clip_skip is None:
|
413
426
|
prompt_embeds = prompt_embeds.hidden_states[-2]
|
414
427
|
else:
|
@@ -467,8 +480,10 @@ class StableDiffusionXLControlNetImg2ImgPipeline(
|
|
467
480
|
uncond_input.input_ids.to(device),
|
468
481
|
output_hidden_states=True,
|
469
482
|
)
|
483
|
+
|
470
484
|
# We are only ALWAYS interested in the pooled output of the final text encoder
|
471
|
-
negative_pooled_prompt_embeds
|
485
|
+
if negative_pooled_prompt_embeds is None and negative_prompt_embeds[0].ndim == 2:
|
486
|
+
negative_pooled_prompt_embeds = negative_prompt_embeds[0]
|
472
487
|
negative_prompt_embeds = negative_prompt_embeds.hidden_states[-2]
|
473
488
|
|
474
489
|
negative_prompt_embeds_list.append(negative_prompt_embeds)
|
@@ -1600,6 +1615,7 @@ class StableDiffusionXLControlNetImg2ImgPipeline(
|
|
1600
1615
|
)
|
1601
1616
|
add_time_ids = callback_outputs.pop("add_time_ids", add_time_ids)
|
1602
1617
|
add_neg_time_ids = callback_outputs.pop("add_neg_time_ids", add_neg_time_ids)
|
1618
|
+
control_image = callback_outputs.pop("control_image", control_image)
|
1603
1619
|
|
1604
1620
|
# call the callback, if provided
|
1605
1621
|
if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
|
@@ -1608,6 +1624,9 @@ class StableDiffusionXLControlNetImg2ImgPipeline(
|
|
1608
1624
|
step_idx = i // getattr(self.scheduler, "order", 1)
|
1609
1625
|
callback(step_idx, t, latents)
|
1610
1626
|
|
1627
|
+
if XLA_AVAILABLE:
|
1628
|
+
xm.mark_step()
|
1629
|
+
|
1611
1630
|
# If we do sequential model offloading, let's offload unet and controlnet
|
1612
1631
|
# manually for max memory savings
|
1613
1632
|
if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None:
|