diffusers 0.32.2__py3-none-any.whl → 0.33.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diffusers/__init__.py +186 -3
- diffusers/configuration_utils.py +40 -12
- diffusers/dependency_versions_table.py +9 -2
- diffusers/hooks/__init__.py +9 -0
- diffusers/hooks/faster_cache.py +653 -0
- diffusers/hooks/group_offloading.py +793 -0
- diffusers/hooks/hooks.py +236 -0
- diffusers/hooks/layerwise_casting.py +245 -0
- diffusers/hooks/pyramid_attention_broadcast.py +311 -0
- diffusers/loaders/__init__.py +6 -0
- diffusers/loaders/ip_adapter.py +38 -30
- diffusers/loaders/lora_base.py +121 -86
- diffusers/loaders/lora_conversion_utils.py +504 -44
- diffusers/loaders/lora_pipeline.py +1769 -181
- diffusers/loaders/peft.py +167 -57
- diffusers/loaders/single_file.py +17 -2
- diffusers/loaders/single_file_model.py +53 -5
- diffusers/loaders/single_file_utils.py +646 -72
- diffusers/loaders/textual_inversion.py +9 -9
- diffusers/loaders/transformer_flux.py +8 -9
- diffusers/loaders/transformer_sd3.py +120 -39
- diffusers/loaders/unet.py +20 -7
- diffusers/models/__init__.py +22 -0
- diffusers/models/activations.py +9 -9
- diffusers/models/attention.py +0 -1
- diffusers/models/attention_processor.py +163 -25
- diffusers/models/auto_model.py +169 -0
- diffusers/models/autoencoders/__init__.py +2 -0
- diffusers/models/autoencoders/autoencoder_asym_kl.py +2 -0
- diffusers/models/autoencoders/autoencoder_dc.py +106 -4
- diffusers/models/autoencoders/autoencoder_kl.py +0 -4
- diffusers/models/autoencoders/autoencoder_kl_allegro.py +5 -23
- diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +17 -55
- diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py +17 -97
- diffusers/models/autoencoders/autoencoder_kl_ltx.py +326 -107
- diffusers/models/autoencoders/autoencoder_kl_magvit.py +1094 -0
- diffusers/models/autoencoders/autoencoder_kl_mochi.py +21 -56
- diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +11 -42
- diffusers/models/autoencoders/autoencoder_kl_wan.py +855 -0
- diffusers/models/autoencoders/autoencoder_oobleck.py +1 -0
- diffusers/models/autoencoders/autoencoder_tiny.py +0 -4
- diffusers/models/autoencoders/consistency_decoder_vae.py +3 -1
- diffusers/models/autoencoders/vae.py +31 -141
- diffusers/models/autoencoders/vq_model.py +3 -0
- diffusers/models/cache_utils.py +108 -0
- diffusers/models/controlnets/__init__.py +1 -0
- diffusers/models/controlnets/controlnet.py +3 -8
- diffusers/models/controlnets/controlnet_flux.py +14 -42
- diffusers/models/controlnets/controlnet_sd3.py +58 -34
- diffusers/models/controlnets/controlnet_sparsectrl.py +4 -7
- diffusers/models/controlnets/controlnet_union.py +27 -18
- diffusers/models/controlnets/controlnet_xs.py +7 -46
- diffusers/models/controlnets/multicontrolnet_union.py +196 -0
- diffusers/models/embeddings.py +18 -7
- diffusers/models/model_loading_utils.py +122 -80
- diffusers/models/modeling_flax_pytorch_utils.py +1 -1
- diffusers/models/modeling_flax_utils.py +1 -1
- diffusers/models/modeling_pytorch_flax_utils.py +1 -1
- diffusers/models/modeling_utils.py +617 -272
- diffusers/models/normalization.py +67 -14
- diffusers/models/resnet.py +1 -1
- diffusers/models/transformers/__init__.py +6 -0
- diffusers/models/transformers/auraflow_transformer_2d.py +9 -35
- diffusers/models/transformers/cogvideox_transformer_3d.py +13 -24
- diffusers/models/transformers/consisid_transformer_3d.py +789 -0
- diffusers/models/transformers/dit_transformer_2d.py +5 -19
- diffusers/models/transformers/hunyuan_transformer_2d.py +4 -3
- diffusers/models/transformers/latte_transformer_3d.py +20 -15
- diffusers/models/transformers/lumina_nextdit2d.py +3 -1
- diffusers/models/transformers/pixart_transformer_2d.py +4 -19
- diffusers/models/transformers/prior_transformer.py +5 -1
- diffusers/models/transformers/sana_transformer.py +144 -40
- diffusers/models/transformers/stable_audio_transformer.py +5 -20
- diffusers/models/transformers/transformer_2d.py +7 -22
- diffusers/models/transformers/transformer_allegro.py +9 -17
- diffusers/models/transformers/transformer_cogview3plus.py +6 -17
- diffusers/models/transformers/transformer_cogview4.py +462 -0
- diffusers/models/transformers/transformer_easyanimate.py +527 -0
- diffusers/models/transformers/transformer_flux.py +68 -110
- diffusers/models/transformers/transformer_hunyuan_video.py +404 -46
- diffusers/models/transformers/transformer_ltx.py +53 -35
- diffusers/models/transformers/transformer_lumina2.py +548 -0
- diffusers/models/transformers/transformer_mochi.py +6 -17
- diffusers/models/transformers/transformer_omnigen.py +469 -0
- diffusers/models/transformers/transformer_sd3.py +56 -86
- diffusers/models/transformers/transformer_temporal.py +5 -11
- diffusers/models/transformers/transformer_wan.py +469 -0
- diffusers/models/unets/unet_1d.py +3 -1
- diffusers/models/unets/unet_2d.py +21 -20
- diffusers/models/unets/unet_2d_blocks.py +19 -243
- diffusers/models/unets/unet_2d_condition.py +4 -6
- diffusers/models/unets/unet_3d_blocks.py +14 -127
- diffusers/models/unets/unet_3d_condition.py +8 -12
- diffusers/models/unets/unet_i2vgen_xl.py +5 -13
- diffusers/models/unets/unet_kandinsky3.py +0 -4
- diffusers/models/unets/unet_motion_model.py +20 -114
- diffusers/models/unets/unet_spatio_temporal_condition.py +7 -8
- diffusers/models/unets/unet_stable_cascade.py +8 -35
- diffusers/models/unets/uvit_2d.py +1 -4
- diffusers/optimization.py +2 -2
- diffusers/pipelines/__init__.py +57 -8
- diffusers/pipelines/allegro/pipeline_allegro.py +22 -2
- diffusers/pipelines/amused/pipeline_amused.py +15 -2
- diffusers/pipelines/amused/pipeline_amused_img2img.py +15 -2
- diffusers/pipelines/amused/pipeline_amused_inpaint.py +15 -2
- diffusers/pipelines/animatediff/pipeline_animatediff.py +15 -2
- diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +15 -3
- diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +24 -4
- diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +15 -2
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +16 -4
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +16 -4
- diffusers/pipelines/audioldm/pipeline_audioldm.py +13 -2
- diffusers/pipelines/audioldm2/modeling_audioldm2.py +13 -68
- diffusers/pipelines/audioldm2/pipeline_audioldm2.py +39 -9
- diffusers/pipelines/aura_flow/pipeline_aura_flow.py +63 -7
- diffusers/pipelines/auto_pipeline.py +35 -14
- diffusers/pipelines/blip_diffusion/blip_image_processing.py +1 -1
- diffusers/pipelines/blip_diffusion/modeling_blip2.py +5 -8
- diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +12 -0
- diffusers/pipelines/cogvideo/pipeline_cogvideox.py +22 -6
- diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +22 -6
- diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +22 -5
- diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +22 -6
- diffusers/pipelines/cogview3/pipeline_cogview3plus.py +12 -4
- diffusers/pipelines/cogview4/__init__.py +49 -0
- diffusers/pipelines/cogview4/pipeline_cogview4.py +684 -0
- diffusers/pipelines/cogview4/pipeline_cogview4_control.py +732 -0
- diffusers/pipelines/cogview4/pipeline_output.py +21 -0
- diffusers/pipelines/consisid/__init__.py +49 -0
- diffusers/pipelines/consisid/consisid_utils.py +357 -0
- diffusers/pipelines/consisid/pipeline_consisid.py +974 -0
- diffusers/pipelines/consisid/pipeline_output.py +20 -0
- diffusers/pipelines/consistency_models/pipeline_consistency_models.py +11 -0
- diffusers/pipelines/controlnet/pipeline_controlnet.py +6 -5
- diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +13 -0
- diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +17 -5
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +31 -12
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +26 -7
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +20 -3
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +22 -3
- diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +26 -25
- diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +224 -109
- diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +25 -29
- diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +7 -4
- diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +3 -5
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +121 -10
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +122 -11
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +12 -1
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +20 -3
- diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py +14 -2
- diffusers/pipelines/ddim/pipeline_ddim.py +14 -1
- diffusers/pipelines/ddpm/pipeline_ddpm.py +15 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if.py +12 -0
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +12 -0
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +14 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +12 -0
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +14 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +14 -1
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +11 -7
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +11 -7
- diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +1 -1
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +10 -6
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_onnx_stable_diffusion_inpaint_legacy.py +2 -2
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +11 -7
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +1 -1
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +1 -1
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +1 -1
- diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +10 -105
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +1 -1
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +1 -1
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +1 -1
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +1 -1
- diffusers/pipelines/dit/pipeline_dit.py +15 -2
- diffusers/pipelines/easyanimate/__init__.py +52 -0
- diffusers/pipelines/easyanimate/pipeline_easyanimate.py +770 -0
- diffusers/pipelines/easyanimate/pipeline_easyanimate_control.py +994 -0
- diffusers/pipelines/easyanimate/pipeline_easyanimate_inpaint.py +1234 -0
- diffusers/pipelines/easyanimate/pipeline_output.py +20 -0
- diffusers/pipelines/flux/pipeline_flux.py +53 -21
- diffusers/pipelines/flux/pipeline_flux_control.py +9 -12
- diffusers/pipelines/flux/pipeline_flux_control_img2img.py +6 -10
- diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +8 -10
- diffusers/pipelines/flux/pipeline_flux_controlnet.py +185 -13
- diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +8 -10
- diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +16 -16
- diffusers/pipelines/flux/pipeline_flux_fill.py +107 -39
- diffusers/pipelines/flux/pipeline_flux_img2img.py +193 -15
- diffusers/pipelines/flux/pipeline_flux_inpaint.py +199 -19
- diffusers/pipelines/free_noise_utils.py +3 -3
- diffusers/pipelines/hunyuan_video/__init__.py +4 -0
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_skyreels_image2video.py +804 -0
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +90 -23
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_image2video.py +924 -0
- diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +3 -5
- diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +13 -1
- diffusers/pipelines/kandinsky/pipeline_kandinsky.py +12 -0
- diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +1 -1
- diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +12 -0
- diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +13 -1
- diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +12 -0
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +12 -1
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +13 -0
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +12 -0
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +12 -1
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +12 -1
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +12 -0
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +12 -0
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +12 -0
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +12 -0
- diffusers/pipelines/kolors/pipeline_kolors.py +10 -8
- diffusers/pipelines/kolors/pipeline_kolors_img2img.py +6 -4
- diffusers/pipelines/kolors/text_encoder.py +7 -34
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +12 -1
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +13 -1
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +14 -13
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion_superresolution.py +12 -1
- diffusers/pipelines/latte/pipeline_latte.py +36 -7
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +67 -13
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +60 -15
- diffusers/pipelines/ltx/__init__.py +2 -0
- diffusers/pipelines/ltx/pipeline_ltx.py +25 -13
- diffusers/pipelines/ltx/pipeline_ltx_condition.py +1194 -0
- diffusers/pipelines/ltx/pipeline_ltx_image2video.py +31 -17
- diffusers/pipelines/lumina/__init__.py +2 -2
- diffusers/pipelines/lumina/pipeline_lumina.py +83 -20
- diffusers/pipelines/lumina2/__init__.py +48 -0
- diffusers/pipelines/lumina2/pipeline_lumina2.py +790 -0
- diffusers/pipelines/marigold/__init__.py +2 -0
- diffusers/pipelines/marigold/marigold_image_processing.py +127 -14
- diffusers/pipelines/marigold/pipeline_marigold_depth.py +31 -16
- diffusers/pipelines/marigold/pipeline_marigold_intrinsics.py +721 -0
- diffusers/pipelines/marigold/pipeline_marigold_normals.py +31 -16
- diffusers/pipelines/mochi/pipeline_mochi.py +14 -18
- diffusers/pipelines/musicldm/pipeline_musicldm.py +16 -1
- diffusers/pipelines/omnigen/__init__.py +50 -0
- diffusers/pipelines/omnigen/pipeline_omnigen.py +512 -0
- diffusers/pipelines/omnigen/processor_omnigen.py +327 -0
- diffusers/pipelines/onnx_utils.py +5 -3
- diffusers/pipelines/pag/pag_utils.py +1 -1
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +12 -1
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +15 -4
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +20 -3
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +20 -3
- diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +1 -3
- diffusers/pipelines/pag/pipeline_pag_kolors.py +6 -4
- diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +16 -3
- diffusers/pipelines/pag/pipeline_pag_sana.py +65 -8
- diffusers/pipelines/pag/pipeline_pag_sd.py +23 -7
- diffusers/pipelines/pag/pipeline_pag_sd_3.py +3 -5
- diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py +3 -5
- diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +13 -1
- diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +23 -7
- diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py +26 -10
- diffusers/pipelines/pag/pipeline_pag_sd_xl.py +12 -4
- diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +7 -3
- diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +10 -6
- diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +13 -3
- diffusers/pipelines/pia/pipeline_pia.py +13 -1
- diffusers/pipelines/pipeline_flax_utils.py +7 -7
- diffusers/pipelines/pipeline_loading_utils.py +193 -83
- diffusers/pipelines/pipeline_utils.py +221 -106
- diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +17 -5
- diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +17 -4
- diffusers/pipelines/sana/__init__.py +2 -0
- diffusers/pipelines/sana/pipeline_sana.py +183 -58
- diffusers/pipelines/sana/pipeline_sana_sprint.py +889 -0
- diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +12 -2
- diffusers/pipelines/shap_e/pipeline_shap_e.py +12 -0
- diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +12 -0
- diffusers/pipelines/shap_e/renderer.py +6 -6
- diffusers/pipelines/stable_audio/pipeline_stable_audio.py +1 -1
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +15 -4
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +12 -8
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +12 -1
- diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +3 -2
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +14 -10
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_img2img.py +3 -3
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_inpaint.py +14 -10
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +2 -2
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py +4 -3
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +5 -4
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +2 -2
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +18 -13
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +30 -8
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +24 -10
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +28 -12
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +39 -18
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +17 -6
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +13 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +20 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +14 -2
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +13 -1
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +16 -17
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +136 -18
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +150 -21
- diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +15 -3
- diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +26 -11
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +15 -3
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +22 -4
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +30 -13
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +12 -4
- diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +15 -3
- diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +15 -3
- diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +26 -12
- diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +16 -4
- diffusers/pipelines/stable_diffusion_xl/pipeline_flax_stable_diffusion_xl.py +1 -1
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +12 -4
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +7 -3
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +10 -6
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +11 -4
- diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +13 -2
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +18 -4
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +26 -5
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +13 -1
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +13 -1
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +28 -6
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +26 -4
- diffusers/pipelines/transformers_loading_utils.py +121 -0
- diffusers/pipelines/unclip/pipeline_unclip.py +11 -1
- diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +11 -1
- diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +19 -2
- diffusers/pipelines/wan/__init__.py +51 -0
- diffusers/pipelines/wan/pipeline_output.py +20 -0
- diffusers/pipelines/wan/pipeline_wan.py +593 -0
- diffusers/pipelines/wan/pipeline_wan_i2v.py +722 -0
- diffusers/pipelines/wan/pipeline_wan_video2video.py +725 -0
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +7 -31
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +12 -1
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +12 -1
- diffusers/quantizers/auto.py +5 -1
- diffusers/quantizers/base.py +5 -9
- diffusers/quantizers/bitsandbytes/bnb_quantizer.py +41 -29
- diffusers/quantizers/bitsandbytes/utils.py +30 -20
- diffusers/quantizers/gguf/gguf_quantizer.py +1 -0
- diffusers/quantizers/gguf/utils.py +4 -2
- diffusers/quantizers/quantization_config.py +59 -4
- diffusers/quantizers/quanto/__init__.py +1 -0
- diffusers/quantizers/quanto/quanto_quantizer.py +177 -0
- diffusers/quantizers/quanto/utils.py +60 -0
- diffusers/quantizers/torchao/__init__.py +1 -1
- diffusers/quantizers/torchao/torchao_quantizer.py +47 -2
- diffusers/schedulers/__init__.py +2 -1
- diffusers/schedulers/scheduling_consistency_models.py +1 -2
- diffusers/schedulers/scheduling_ddim_inverse.py +1 -1
- diffusers/schedulers/scheduling_ddpm.py +2 -3
- diffusers/schedulers/scheduling_ddpm_parallel.py +1 -2
- diffusers/schedulers/scheduling_dpmsolver_multistep.py +12 -4
- diffusers/schedulers/scheduling_edm_euler.py +45 -10
- diffusers/schedulers/scheduling_flow_match_euler_discrete.py +116 -28
- diffusers/schedulers/scheduling_flow_match_heun_discrete.py +7 -6
- diffusers/schedulers/scheduling_heun_discrete.py +1 -1
- diffusers/schedulers/scheduling_lcm.py +1 -2
- diffusers/schedulers/scheduling_lms_discrete.py +1 -1
- diffusers/schedulers/scheduling_repaint.py +5 -1
- diffusers/schedulers/scheduling_scm.py +265 -0
- diffusers/schedulers/scheduling_tcd.py +1 -2
- diffusers/schedulers/scheduling_utils.py +2 -1
- diffusers/training_utils.py +14 -7
- diffusers/utils/__init__.py +9 -1
- diffusers/utils/constants.py +13 -1
- diffusers/utils/deprecation_utils.py +1 -1
- diffusers/utils/dummy_bitsandbytes_objects.py +17 -0
- diffusers/utils/dummy_gguf_objects.py +17 -0
- diffusers/utils/dummy_optimum_quanto_objects.py +17 -0
- diffusers/utils/dummy_pt_objects.py +233 -0
- diffusers/utils/dummy_torch_and_transformers_and_opencv_objects.py +17 -0
- diffusers/utils/dummy_torch_and_transformers_objects.py +270 -0
- diffusers/utils/dummy_torchao_objects.py +17 -0
- diffusers/utils/dynamic_modules_utils.py +1 -1
- diffusers/utils/export_utils.py +28 -3
- diffusers/utils/hub_utils.py +52 -102
- diffusers/utils/import_utils.py +121 -221
- diffusers/utils/loading_utils.py +2 -1
- diffusers/utils/logging.py +1 -2
- diffusers/utils/peft_utils.py +6 -14
- diffusers/utils/remote_utils.py +425 -0
- diffusers/utils/source_code_parsing_utils.py +52 -0
- diffusers/utils/state_dict_utils.py +15 -1
- diffusers/utils/testing_utils.py +243 -13
- diffusers/utils/torch_utils.py +10 -0
- diffusers/utils/typing_utils.py +91 -0
- diffusers/video_processor.py +1 -1
- {diffusers-0.32.2.dist-info → diffusers-0.33.0.dist-info}/METADATA +76 -44
- diffusers-0.33.0.dist-info/RECORD +608 -0
- {diffusers-0.32.2.dist-info → diffusers-0.33.0.dist-info}/WHEEL +1 -1
- diffusers-0.32.2.dist-info/RECORD +0 -550
- {diffusers-0.32.2.dist-info → diffusers-0.33.0.dist-info}/LICENSE +0 -0
- {diffusers-0.32.2.dist-info → diffusers-0.33.0.dist-info}/entry_points.txt +0 -0
- {diffusers-0.32.2.dist-info → diffusers-0.33.0.dist-info}/top_level.txt +0 -0
@@ -28,11 +28,26 @@ from ...loaders import StableDiffusionLoraLoaderMixin, TextualInversionLoaderMix
|
|
28
28
|
from ...models import AutoencoderKL, UNet2DConditionModel
|
29
29
|
from ...models.lora import adjust_lora_scale_text_encoder
|
30
30
|
from ...schedulers import KarrasDiffusionSchedulers
|
31
|
-
from ...utils import
|
31
|
+
from ...utils import (
|
32
|
+
PIL_INTERPOLATION,
|
33
|
+
USE_PEFT_BACKEND,
|
34
|
+
deprecate,
|
35
|
+
is_torch_xla_available,
|
36
|
+
logging,
|
37
|
+
scale_lora_layers,
|
38
|
+
unscale_lora_layers,
|
39
|
+
)
|
32
40
|
from ...utils.torch_utils import randn_tensor
|
33
41
|
from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
|
34
42
|
|
35
43
|
|
44
|
+
if is_torch_xla_available():
|
45
|
+
import torch_xla.core.xla_model as xm
|
46
|
+
|
47
|
+
XLA_AVAILABLE = True
|
48
|
+
else:
|
49
|
+
XLA_AVAILABLE = False
|
50
|
+
|
36
51
|
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
37
52
|
|
38
53
|
|
@@ -115,17 +130,21 @@ class StableDiffusionDepth2ImgPipeline(DiffusionPipeline, TextualInversionLoader
|
|
115
130
|
):
|
116
131
|
super().__init__()
|
117
132
|
|
118
|
-
is_unet_version_less_0_9_0 =
|
119
|
-
|
120
|
-
|
121
|
-
|
133
|
+
is_unet_version_less_0_9_0 = (
|
134
|
+
unet is not None
|
135
|
+
and hasattr(unet.config, "_diffusers_version")
|
136
|
+
and version.parse(version.parse(unet.config._diffusers_version).base_version) < version.parse("0.9.0.dev0")
|
137
|
+
)
|
138
|
+
is_unet_sample_size_less_64 = (
|
139
|
+
unet is not None and hasattr(unet.config, "sample_size") and unet.config.sample_size < 64
|
140
|
+
)
|
122
141
|
if is_unet_version_less_0_9_0 and is_unet_sample_size_less_64:
|
123
142
|
deprecation_message = (
|
124
143
|
"The configuration file of the unet has set the default `sample_size` to smaller than"
|
125
144
|
" 64 which seems highly unlikely .If you're checkpoint is a fine-tuned version of any of the"
|
126
145
|
" following: \n- CompVis/stable-diffusion-v1-4 \n- CompVis/stable-diffusion-v1-3 \n-"
|
127
|
-
" CompVis/stable-diffusion-v1-2 \n- CompVis/stable-diffusion-v1-1 \n-
|
128
|
-
" \n-
|
146
|
+
" CompVis/stable-diffusion-v1-2 \n- CompVis/stable-diffusion-v1-1 \n- stable-diffusion-v1-5/stable-diffusion-v1-5"
|
147
|
+
" \n- stable-diffusion-v1-5/stable-diffusion-inpainting \n you should change 'sample_size' to 64 in the"
|
129
148
|
" configuration file. Please make sure to update the config accordingly as leaving `sample_size=32`"
|
130
149
|
" in the config might lead to incorrect results in future versions. If you have downloaded this"
|
131
150
|
" checkpoint from the Hugging Face Hub, it would be very nice if you could open a Pull request for"
|
@@ -145,7 +164,7 @@ class StableDiffusionDepth2ImgPipeline(DiffusionPipeline, TextualInversionLoader
|
|
145
164
|
depth_estimator=depth_estimator,
|
146
165
|
feature_extractor=feature_extractor,
|
147
166
|
)
|
148
|
-
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
167
|
+
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
|
149
168
|
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
|
150
169
|
|
151
170
|
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt
|
@@ -861,6 +880,9 @@ class StableDiffusionDepth2ImgPipeline(DiffusionPipeline, TextualInversionLoader
|
|
861
880
|
step_idx = i // getattr(self.scheduler, "order", 1)
|
862
881
|
callback(step_idx, t, latents)
|
863
882
|
|
883
|
+
if XLA_AVAILABLE:
|
884
|
+
xm.mark_step()
|
885
|
+
|
864
886
|
if not output_type == "latent":
|
865
887
|
image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False)[0]
|
866
888
|
else:
|
@@ -24,13 +24,20 @@ from ...configuration_utils import FrozenDict
|
|
24
24
|
from ...image_processor import VaeImageProcessor
|
25
25
|
from ...models import AutoencoderKL, UNet2DConditionModel
|
26
26
|
from ...schedulers import KarrasDiffusionSchedulers
|
27
|
-
from ...utils import deprecate, logging
|
27
|
+
from ...utils import deprecate, is_torch_xla_available, logging
|
28
28
|
from ...utils.torch_utils import randn_tensor
|
29
29
|
from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
30
30
|
from . import StableDiffusionPipelineOutput
|
31
31
|
from .safety_checker import StableDiffusionSafetyChecker
|
32
32
|
|
33
33
|
|
34
|
+
if is_torch_xla_available():
|
35
|
+
import torch_xla.core.xla_model as xm
|
36
|
+
|
37
|
+
XLA_AVAILABLE = True
|
38
|
+
else:
|
39
|
+
XLA_AVAILABLE = False
|
40
|
+
|
34
41
|
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
35
42
|
|
36
43
|
|
@@ -57,8 +64,8 @@ class StableDiffusionImageVariationPipeline(DiffusionPipeline, StableDiffusionMi
|
|
57
64
|
[`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`].
|
58
65
|
safety_checker ([`StableDiffusionSafetyChecker`]):
|
59
66
|
Classification module that estimates whether generated images could be considered offensive or harmful.
|
60
|
-
Please refer to the [model card](https://huggingface.co/
|
61
|
-
about a model's potential harms.
|
67
|
+
Please refer to the [model card](https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5) for
|
68
|
+
more details about a model's potential harms.
|
62
69
|
feature_extractor ([`~transformers.CLIPImageProcessor`]):
|
63
70
|
A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
|
64
71
|
"""
|
@@ -97,17 +104,21 @@ class StableDiffusionImageVariationPipeline(DiffusionPipeline, StableDiffusionMi
|
|
97
104
|
" checker. If you do not want to use the safety checker, you can pass `'safety_checker=None'` instead."
|
98
105
|
)
|
99
106
|
|
100
|
-
is_unet_version_less_0_9_0 =
|
101
|
-
|
102
|
-
|
103
|
-
|
107
|
+
is_unet_version_less_0_9_0 = (
|
108
|
+
unet is not None
|
109
|
+
and hasattr(unet.config, "_diffusers_version")
|
110
|
+
and version.parse(version.parse(unet.config._diffusers_version).base_version) < version.parse("0.9.0.dev0")
|
111
|
+
)
|
112
|
+
is_unet_sample_size_less_64 = (
|
113
|
+
unet is not None and hasattr(unet.config, "sample_size") and unet.config.sample_size < 64
|
114
|
+
)
|
104
115
|
if is_unet_version_less_0_9_0 and is_unet_sample_size_less_64:
|
105
116
|
deprecation_message = (
|
106
117
|
"The configuration file of the unet has set the default `sample_size` to smaller than"
|
107
118
|
" 64 which seems highly unlikely .If you're checkpoint is a fine-tuned version of any of the"
|
108
119
|
" following: \n- CompVis/stable-diffusion-v1-4 \n- CompVis/stable-diffusion-v1-3 \n-"
|
109
|
-
" CompVis/stable-diffusion-v1-2 \n- CompVis/stable-diffusion-v1-1 \n-
|
110
|
-
" \n-
|
120
|
+
" CompVis/stable-diffusion-v1-2 \n- CompVis/stable-diffusion-v1-1 \n- stable-diffusion-v1-5/stable-diffusion-v1-5"
|
121
|
+
" \n- stable-diffusion-v1-5/stable-diffusion-inpainting \n you should change 'sample_size' to 64 in the"
|
111
122
|
" configuration file. Please make sure to update the config accordingly as leaving `sample_size=32`"
|
112
123
|
" in the config might lead to incorrect results in future versions. If you have downloaded this"
|
113
124
|
" checkpoint from the Hugging Face Hub, it would be very nice if you could open a Pull request for"
|
@@ -126,7 +137,7 @@ class StableDiffusionImageVariationPipeline(DiffusionPipeline, StableDiffusionMi
|
|
126
137
|
safety_checker=safety_checker,
|
127
138
|
feature_extractor=feature_extractor,
|
128
139
|
)
|
129
|
-
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
140
|
+
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
|
130
141
|
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
|
131
142
|
self.register_to_config(requires_safety_checker=requires_safety_checker)
|
132
143
|
|
@@ -401,6 +412,9 @@ class StableDiffusionImageVariationPipeline(DiffusionPipeline, StableDiffusionMi
|
|
401
412
|
step_idx = i // getattr(self.scheduler, "order", 1)
|
402
413
|
callback(step_idx, t, latents)
|
403
414
|
|
415
|
+
if XLA_AVAILABLE:
|
416
|
+
xm.mark_step()
|
417
|
+
|
404
418
|
self.maybe_free_model_hooks()
|
405
419
|
|
406
420
|
if not output_type == "latent":
|
@@ -32,6 +32,7 @@ from ...utils import (
|
|
32
32
|
PIL_INTERPOLATION,
|
33
33
|
USE_PEFT_BACKEND,
|
34
34
|
deprecate,
|
35
|
+
is_torch_xla_available,
|
35
36
|
logging,
|
36
37
|
replace_example_docstring,
|
37
38
|
scale_lora_layers,
|
@@ -43,8 +44,16 @@ from . import StableDiffusionPipelineOutput
|
|
43
44
|
from .safety_checker import StableDiffusionSafetyChecker
|
44
45
|
|
45
46
|
|
47
|
+
if is_torch_xla_available():
|
48
|
+
import torch_xla.core.xla_model as xm
|
49
|
+
|
50
|
+
XLA_AVAILABLE = True
|
51
|
+
else:
|
52
|
+
XLA_AVAILABLE = False
|
53
|
+
|
46
54
|
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
47
55
|
|
56
|
+
|
48
57
|
EXAMPLE_DOC_STRING = """
|
49
58
|
Examples:
|
50
59
|
```py
|
@@ -56,7 +65,7 @@ EXAMPLE_DOC_STRING = """
|
|
56
65
|
>>> from diffusers import StableDiffusionImg2ImgPipeline
|
57
66
|
|
58
67
|
>>> device = "cuda"
|
59
|
-
>>> model_id_or_path = "
|
68
|
+
>>> model_id_or_path = "stable-diffusion-v1-5/stable-diffusion-v1-5"
|
60
69
|
>>> pipe = StableDiffusionImg2ImgPipeline.from_pretrained(model_id_or_path, torch_dtype=torch.float16)
|
61
70
|
>>> pipe = pipe.to(device)
|
62
71
|
|
@@ -205,8 +214,8 @@ class StableDiffusionImg2ImgPipeline(
|
|
205
214
|
[`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`].
|
206
215
|
safety_checker ([`StableDiffusionSafetyChecker`]):
|
207
216
|
Classification module that estimates whether generated images could be considered offensive or harmful.
|
208
|
-
Please refer to the [model card](https://huggingface.co/
|
209
|
-
about a model's potential harms.
|
217
|
+
Please refer to the [model card](https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5) for
|
218
|
+
more details about a model's potential harms.
|
210
219
|
feature_extractor ([`~transformers.CLIPImageProcessor`]):
|
211
220
|
A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
|
212
221
|
"""
|
@@ -230,7 +239,7 @@ class StableDiffusionImg2ImgPipeline(
|
|
230
239
|
):
|
231
240
|
super().__init__()
|
232
241
|
|
233
|
-
if
|
242
|
+
if scheduler is not None and getattr(scheduler.config, "steps_offset", 1) != 1:
|
234
243
|
deprecation_message = (
|
235
244
|
f"The configuration file of this scheduler: {scheduler} is outdated. `steps_offset`"
|
236
245
|
f" should be set to 1 instead of {scheduler.config.steps_offset}. Please make sure "
|
@@ -244,7 +253,7 @@ class StableDiffusionImg2ImgPipeline(
|
|
244
253
|
new_config["steps_offset"] = 1
|
245
254
|
scheduler._internal_dict = FrozenDict(new_config)
|
246
255
|
|
247
|
-
if
|
256
|
+
if scheduler is not None and getattr(scheduler.config, "clip_sample", False) is True:
|
248
257
|
deprecation_message = (
|
249
258
|
f"The configuration file of this scheduler: {scheduler} has not set the configuration `clip_sample`."
|
250
259
|
" `clip_sample` should be set to False in the configuration file. Please make sure to update the"
|
@@ -273,17 +282,21 @@ class StableDiffusionImg2ImgPipeline(
|
|
273
282
|
" checker. If you do not want to use the safety checker, you can pass `'safety_checker=None'` instead."
|
274
283
|
)
|
275
284
|
|
276
|
-
is_unet_version_less_0_9_0 =
|
277
|
-
|
278
|
-
|
279
|
-
|
285
|
+
is_unet_version_less_0_9_0 = (
|
286
|
+
unet is not None
|
287
|
+
and hasattr(unet.config, "_diffusers_version")
|
288
|
+
and version.parse(version.parse(unet.config._diffusers_version).base_version) < version.parse("0.9.0.dev0")
|
289
|
+
)
|
290
|
+
is_unet_sample_size_less_64 = (
|
291
|
+
unet is not None and hasattr(unet.config, "sample_size") and unet.config.sample_size < 64
|
292
|
+
)
|
280
293
|
if is_unet_version_less_0_9_0 and is_unet_sample_size_less_64:
|
281
294
|
deprecation_message = (
|
282
295
|
"The configuration file of the unet has set the default `sample_size` to smaller than"
|
283
296
|
" 64 which seems highly unlikely. If your checkpoint is a fine-tuned version of any of the"
|
284
297
|
" following: \n- CompVis/stable-diffusion-v1-4 \n- CompVis/stable-diffusion-v1-3 \n-"
|
285
|
-
" CompVis/stable-diffusion-v1-2 \n- CompVis/stable-diffusion-v1-1 \n-
|
286
|
-
" \n-
|
298
|
+
" CompVis/stable-diffusion-v1-2 \n- CompVis/stable-diffusion-v1-1 \n- stable-diffusion-v1-5/stable-diffusion-v1-5"
|
299
|
+
" \n- stable-diffusion-v1-5/stable-diffusion-inpainting \n you should change 'sample_size' to 64 in the"
|
287
300
|
" configuration file. Please make sure to update the config accordingly as leaving `sample_size=32`"
|
288
301
|
" in the config might lead to incorrect results in future versions. If you have downloaded this"
|
289
302
|
" checkpoint from the Hugging Face Hub, it would be very nice if you could open a Pull request for"
|
@@ -304,7 +317,7 @@ class StableDiffusionImg2ImgPipeline(
|
|
304
317
|
feature_extractor=feature_extractor,
|
305
318
|
image_encoder=image_encoder,
|
306
319
|
)
|
307
|
-
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
320
|
+
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
|
308
321
|
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
|
309
322
|
self.register_to_config(requires_safety_checker=requires_safety_checker)
|
310
323
|
|
@@ -1120,6 +1133,9 @@ class StableDiffusionImg2ImgPipeline(
|
|
1120
1133
|
step_idx = i // getattr(self.scheduler, "order", 1)
|
1121
1134
|
callback(step_idx, t, latents)
|
1122
1135
|
|
1136
|
+
if XLA_AVAILABLE:
|
1137
|
+
xm.mark_step()
|
1138
|
+
|
1123
1139
|
if not output_type == "latent":
|
1124
1140
|
image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False, generator=generator)[
|
1125
1141
|
0
|
@@ -27,13 +27,27 @@ from ...loaders import FromSingleFileMixin, IPAdapterMixin, StableDiffusionLoraL
|
|
27
27
|
from ...models import AsymmetricAutoencoderKL, AutoencoderKL, ImageProjection, UNet2DConditionModel
|
28
28
|
from ...models.lora import adjust_lora_scale_text_encoder
|
29
29
|
from ...schedulers import KarrasDiffusionSchedulers
|
30
|
-
from ...utils import
|
30
|
+
from ...utils import (
|
31
|
+
USE_PEFT_BACKEND,
|
32
|
+
deprecate,
|
33
|
+
is_torch_xla_available,
|
34
|
+
logging,
|
35
|
+
scale_lora_layers,
|
36
|
+
unscale_lora_layers,
|
37
|
+
)
|
31
38
|
from ...utils.torch_utils import randn_tensor
|
32
39
|
from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
33
40
|
from . import StableDiffusionPipelineOutput
|
34
41
|
from .safety_checker import StableDiffusionSafetyChecker
|
35
42
|
|
36
43
|
|
44
|
+
if is_torch_xla_available():
|
45
|
+
import torch_xla.core.xla_model as xm
|
46
|
+
|
47
|
+
XLA_AVAILABLE = True
|
48
|
+
else:
|
49
|
+
XLA_AVAILABLE = False
|
50
|
+
|
37
51
|
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
38
52
|
|
39
53
|
|
@@ -146,8 +160,8 @@ class StableDiffusionInpaintPipeline(
|
|
146
160
|
[`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`].
|
147
161
|
safety_checker ([`StableDiffusionSafetyChecker`]):
|
148
162
|
Classification module that estimates whether generated images could be considered offensive or harmful.
|
149
|
-
Please refer to the [model card](https://huggingface.co/
|
150
|
-
about a model's potential harms.
|
163
|
+
Please refer to the [model card](https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5) for
|
164
|
+
more details about a model's potential harms.
|
151
165
|
feature_extractor ([`~transformers.CLIPImageProcessor`]):
|
152
166
|
A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
|
153
167
|
"""
|
@@ -171,7 +185,7 @@ class StableDiffusionInpaintPipeline(
|
|
171
185
|
):
|
172
186
|
super().__init__()
|
173
187
|
|
174
|
-
if
|
188
|
+
if scheduler is not None and getattr(scheduler.config, "steps_offset", 1) != 1:
|
175
189
|
deprecation_message = (
|
176
190
|
f"The configuration file of this scheduler: {scheduler} is outdated. `steps_offset`"
|
177
191
|
f" should be set to 1 instead of {scheduler.config.steps_offset}. Please make sure "
|
@@ -185,7 +199,7 @@ class StableDiffusionInpaintPipeline(
|
|
185
199
|
new_config["steps_offset"] = 1
|
186
200
|
scheduler._internal_dict = FrozenDict(new_config)
|
187
201
|
|
188
|
-
if
|
202
|
+
if scheduler is not None and getattr(scheduler.config, "skip_prk_steps", True) is False:
|
189
203
|
deprecation_message = (
|
190
204
|
f"The configuration file of this scheduler: {scheduler} has not set the configuration"
|
191
205
|
" `skip_prk_steps`. `skip_prk_steps` should be set to True in the configuration file. Please make"
|
@@ -215,17 +229,21 @@ class StableDiffusionInpaintPipeline(
|
|
215
229
|
" checker. If you do not want to use the safety checker, you can pass `'safety_checker=None'` instead."
|
216
230
|
)
|
217
231
|
|
218
|
-
is_unet_version_less_0_9_0 =
|
219
|
-
|
220
|
-
|
221
|
-
|
232
|
+
is_unet_version_less_0_9_0 = (
|
233
|
+
unet is not None
|
234
|
+
and hasattr(unet.config, "_diffusers_version")
|
235
|
+
and version.parse(version.parse(unet.config._diffusers_version).base_version) < version.parse("0.9.0.dev0")
|
236
|
+
)
|
237
|
+
is_unet_sample_size_less_64 = (
|
238
|
+
unet is not None and hasattr(unet.config, "sample_size") and unet.config.sample_size < 64
|
239
|
+
)
|
222
240
|
if is_unet_version_less_0_9_0 and is_unet_sample_size_less_64:
|
223
241
|
deprecation_message = (
|
224
242
|
"The configuration file of the unet has set the default `sample_size` to smaller than"
|
225
243
|
" 64 which seems highly unlikely .If you're checkpoint is a fine-tuned version of any of the"
|
226
244
|
" following: \n- CompVis/stable-diffusion-v1-4 \n- CompVis/stable-diffusion-v1-3 \n-"
|
227
|
-
" CompVis/stable-diffusion-v1-2 \n- CompVis/stable-diffusion-v1-1 \n-
|
228
|
-
" \n-
|
245
|
+
" CompVis/stable-diffusion-v1-2 \n- CompVis/stable-diffusion-v1-1 \n- stable-diffusion-v1-5/stable-diffusion-v1-5"
|
246
|
+
" \n- stable-diffusion-v1-5/stable-diffusion-inpainting \n you should change 'sample_size' to 64 in the"
|
229
247
|
" configuration file. Please make sure to update the config accordingly as leaving `sample_size=32`"
|
230
248
|
" in the config might lead to incorrect results in future versions. If you have downloaded this"
|
231
249
|
" checkpoint from the Hugging Face Hub, it would be very nice if you could open a Pull request for"
|
@@ -237,7 +255,7 @@ class StableDiffusionInpaintPipeline(
|
|
237
255
|
unet._internal_dict = FrozenDict(new_config)
|
238
256
|
|
239
257
|
# Check shapes, assume num_channels_latents == 4, num_channels_mask == 1, num_channels_masked == 4
|
240
|
-
if unet.config.in_channels != 9:
|
258
|
+
if unet is not None and unet.config.in_channels != 9:
|
241
259
|
logger.info(f"You have loaded a UNet with {unet.config.in_channels} input channels which.")
|
242
260
|
|
243
261
|
self.register_modules(
|
@@ -250,7 +268,7 @@ class StableDiffusionInpaintPipeline(
|
|
250
268
|
feature_extractor=feature_extractor,
|
251
269
|
image_encoder=image_encoder,
|
252
270
|
)
|
253
|
-
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
271
|
+
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
|
254
272
|
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
|
255
273
|
self.mask_processor = VaeImageProcessor(
|
256
274
|
vae_scale_factor=self.vae_scale_factor, do_normalize=False, do_binarize=True, do_convert_grayscale=True
|
@@ -642,7 +660,7 @@ class StableDiffusionInpaintPipeline(
|
|
642
660
|
if padding_mask_crop is not None:
|
643
661
|
if not isinstance(image, PIL.Image.Image):
|
644
662
|
raise ValueError(
|
645
|
-
f"The image should be a PIL image when inpainting mask crop, but is of type
|
663
|
+
f"The image should be a PIL image when inpainting mask crop, but is of type {type(image)}."
|
646
664
|
)
|
647
665
|
if not isinstance(mask_image, PIL.Image.Image):
|
648
666
|
raise ValueError(
|
@@ -650,7 +668,7 @@ class StableDiffusionInpaintPipeline(
|
|
650
668
|
f" {type(mask_image)}."
|
651
669
|
)
|
652
670
|
if output_type != "pil":
|
653
|
-
raise ValueError(f"The output type should be PIL when inpainting mask crop, but is
|
671
|
+
raise ValueError(f"The output type should be PIL when inpainting mask crop, but is {output_type}.")
|
654
672
|
|
655
673
|
if ip_adapter_image is not None and ip_adapter_image_embeds is not None:
|
656
674
|
raise ValueError(
|
@@ -1014,7 +1032,7 @@ class StableDiffusionInpaintPipeline(
|
|
1014
1032
|
>>> mask_image = download_image(mask_url).resize((512, 512))
|
1015
1033
|
|
1016
1034
|
>>> pipe = StableDiffusionInpaintPipeline.from_pretrained(
|
1017
|
-
... "
|
1035
|
+
... "stable-diffusion-v1-5/stable-diffusion-inpainting", torch_dtype=torch.float16
|
1018
1036
|
... )
|
1019
1037
|
>>> pipe = pipe.to("cuda")
|
1020
1038
|
|
@@ -1200,7 +1218,7 @@ class StableDiffusionInpaintPipeline(
|
|
1200
1218
|
|
1201
1219
|
# 8. Check that sizes of mask, masked image and latents match
|
1202
1220
|
if num_channels_unet == 9:
|
1203
|
-
# default case for
|
1221
|
+
# default case for stable-diffusion-v1-5/stable-diffusion-inpainting
|
1204
1222
|
num_channels_mask = mask.shape[1]
|
1205
1223
|
num_channels_masked_image = masked_image_latents.shape[1]
|
1206
1224
|
if num_channels_latents + num_channels_mask + num_channels_masked_image != self.unet.config.in_channels:
|
@@ -1208,7 +1226,7 @@ class StableDiffusionInpaintPipeline(
|
|
1208
1226
|
f"Incorrect configuration settings! The config of `pipeline.unet`: {self.unet.config} expects"
|
1209
1227
|
f" {self.unet.config.in_channels} but received `num_channels_latents`: {num_channels_latents} +"
|
1210
1228
|
f" `num_channels_mask`: {num_channels_mask} + `num_channels_masked_image`: {num_channels_masked_image}"
|
1211
|
-
f" = {num_channels_latents+num_channels_masked_image+num_channels_mask}. Please verify the config of"
|
1229
|
+
f" = {num_channels_latents + num_channels_masked_image + num_channels_mask}. Please verify the config of"
|
1212
1230
|
" `pipeline.unet` or your `mask_image` or `image` input."
|
1213
1231
|
)
|
1214
1232
|
elif num_channels_unet != 4:
|
@@ -1303,6 +1321,9 @@ class StableDiffusionInpaintPipeline(
|
|
1303
1321
|
step_idx = i // getattr(self.scheduler, "order", 1)
|
1304
1322
|
callback(step_idx, t, latents)
|
1305
1323
|
|
1324
|
+
if XLA_AVAILABLE:
|
1325
|
+
xm.mark_step()
|
1326
|
+
|
1306
1327
|
if not output_type == "latent":
|
1307
1328
|
condition_kwargs = {}
|
1308
1329
|
if isinstance(self.vae, AsymmetricAutoencoderKL):
|
@@ -22,16 +22,23 @@ from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer, CLIPV
|
|
22
22
|
|
23
23
|
from ...callbacks import MultiPipelineCallbacks, PipelineCallback
|
24
24
|
from ...image_processor import PipelineImageInput, VaeImageProcessor
|
25
|
-
from ...loaders import IPAdapterMixin, StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
|
25
|
+
from ...loaders import FromSingleFileMixin, IPAdapterMixin, StableDiffusionLoraLoaderMixin, TextualInversionLoaderMixin
|
26
26
|
from ...models import AutoencoderKL, ImageProjection, UNet2DConditionModel
|
27
27
|
from ...schedulers import KarrasDiffusionSchedulers
|
28
|
-
from ...utils import PIL_INTERPOLATION, deprecate, logging
|
28
|
+
from ...utils import PIL_INTERPOLATION, deprecate, is_torch_xla_available, logging
|
29
29
|
from ...utils.torch_utils import randn_tensor
|
30
30
|
from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
31
31
|
from . import StableDiffusionPipelineOutput
|
32
32
|
from .safety_checker import StableDiffusionSafetyChecker
|
33
33
|
|
34
34
|
|
35
|
+
if is_torch_xla_available():
|
36
|
+
import torch_xla.core.xla_model as xm
|
37
|
+
|
38
|
+
XLA_AVAILABLE = True
|
39
|
+
else:
|
40
|
+
XLA_AVAILABLE = False
|
41
|
+
|
35
42
|
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
36
43
|
|
37
44
|
|
@@ -79,6 +86,7 @@ class StableDiffusionInstructPix2PixPipeline(
|
|
79
86
|
TextualInversionLoaderMixin,
|
80
87
|
StableDiffusionLoraLoaderMixin,
|
81
88
|
IPAdapterMixin,
|
89
|
+
FromSingleFileMixin,
|
82
90
|
):
|
83
91
|
r"""
|
84
92
|
Pipeline for pixel-level image editing by following text instructions (based on Stable Diffusion).
|
@@ -106,8 +114,8 @@ class StableDiffusionInstructPix2PixPipeline(
|
|
106
114
|
[`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`].
|
107
115
|
safety_checker ([`StableDiffusionSafetyChecker`]):
|
108
116
|
Classification module that estimates whether generated images could be considered offensive or harmful.
|
109
|
-
Please refer to the [model card](https://huggingface.co/
|
110
|
-
about a model's potential harms.
|
117
|
+
Please refer to the [model card](https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5) for
|
118
|
+
more details about a model's potential harms.
|
111
119
|
feature_extractor ([`~transformers.CLIPImageProcessor`]):
|
112
120
|
A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
|
113
121
|
"""
|
@@ -157,7 +165,7 @@ class StableDiffusionInstructPix2PixPipeline(
|
|
157
165
|
feature_extractor=feature_extractor,
|
158
166
|
image_encoder=image_encoder,
|
159
167
|
)
|
160
|
-
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
168
|
+
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
|
161
169
|
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
|
162
170
|
self.register_to_config(requires_safety_checker=requires_safety_checker)
|
163
171
|
|
@@ -393,7 +401,7 @@ class StableDiffusionInstructPix2PixPipeline(
|
|
393
401
|
f"Incorrect configuration settings! The config of `pipeline.unet`: {self.unet.config} expects"
|
394
402
|
f" {self.unet.config.in_channels} but received `num_channels_latents`: {num_channels_latents} +"
|
395
403
|
f" `num_channels_image`: {num_channels_image} "
|
396
|
-
f" = {num_channels_latents+num_channels_image}. Please verify the config of"
|
404
|
+
f" = {num_channels_latents + num_channels_image}. Please verify the config of"
|
397
405
|
" `pipeline.unet` or your `image` input."
|
398
406
|
)
|
399
407
|
|
@@ -457,6 +465,9 @@ class StableDiffusionInstructPix2PixPipeline(
|
|
457
465
|
step_idx = i // getattr(self.scheduler, "order", 1)
|
458
466
|
callback(step_idx, t, latents)
|
459
467
|
|
468
|
+
if XLA_AVAILABLE:
|
469
|
+
xm.mark_step()
|
470
|
+
|
460
471
|
if not output_type == "latent":
|
461
472
|
image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False)[0]
|
462
473
|
image, has_nsfw_concept = self.run_safety_checker(image, device, prompt_embeds.dtype)
|
@@ -25,11 +25,18 @@ from ...image_processor import PipelineImageInput, VaeImageProcessor
|
|
25
25
|
from ...loaders import FromSingleFileMixin
|
26
26
|
from ...models import AutoencoderKL, UNet2DConditionModel
|
27
27
|
from ...schedulers import EulerDiscreteScheduler
|
28
|
-
from ...utils import deprecate, logging
|
28
|
+
from ...utils import deprecate, is_torch_xla_available, logging
|
29
29
|
from ...utils.torch_utils import randn_tensor
|
30
30
|
from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput, StableDiffusionMixin
|
31
31
|
|
32
32
|
|
33
|
+
if is_torch_xla_available():
|
34
|
+
import torch_xla.core.xla_model as xm
|
35
|
+
|
36
|
+
XLA_AVAILABLE = True
|
37
|
+
else:
|
38
|
+
XLA_AVAILABLE = False
|
39
|
+
|
33
40
|
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
34
41
|
|
35
42
|
|
@@ -116,7 +123,7 @@ class StableDiffusionLatentUpscalePipeline(DiffusionPipeline, StableDiffusionMix
|
|
116
123
|
unet=unet,
|
117
124
|
scheduler=scheduler,
|
118
125
|
)
|
119
|
-
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
126
|
+
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
|
120
127
|
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor, resample="bicubic")
|
121
128
|
|
122
129
|
def _encode_prompt(
|
@@ -593,7 +600,7 @@ class StableDiffusionLatentUpscalePipeline(DiffusionPipeline, StableDiffusionMix
|
|
593
600
|
f"Incorrect configuration settings! The config of `pipeline.unet`: {self.unet.config} expects"
|
594
601
|
f" {self.unet.config.in_channels} but received `num_channels_latents`: {num_channels_latents} +"
|
595
602
|
f" `num_channels_image`: {num_channels_image} "
|
596
|
-
f" = {num_channels_latents+num_channels_image}. Please verify the config of"
|
603
|
+
f" = {num_channels_latents + num_channels_image}. Please verify the config of"
|
597
604
|
" `pipeline.unet` or your `image` input."
|
598
605
|
)
|
599
606
|
|
@@ -640,6 +647,9 @@ class StableDiffusionLatentUpscalePipeline(DiffusionPipeline, StableDiffusionMix
|
|
640
647
|
step_idx = i // getattr(self.scheduler, "order", 1)
|
641
648
|
callback(step_idx, t, latents)
|
642
649
|
|
650
|
+
if XLA_AVAILABLE:
|
651
|
+
xm.mark_step()
|
652
|
+
|
643
653
|
if not output_type == "latent":
|
644
654
|
image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False)[0]
|
645
655
|
else:
|
@@ -30,12 +30,26 @@ from ...models.attention_processor import (
|
|
30
30
|
)
|
31
31
|
from ...models.lora import adjust_lora_scale_text_encoder
|
32
32
|
from ...schedulers import DDPMScheduler, KarrasDiffusionSchedulers
|
33
|
-
from ...utils import
|
33
|
+
from ...utils import (
|
34
|
+
USE_PEFT_BACKEND,
|
35
|
+
deprecate,
|
36
|
+
is_torch_xla_available,
|
37
|
+
logging,
|
38
|
+
scale_lora_layers,
|
39
|
+
unscale_lora_layers,
|
40
|
+
)
|
34
41
|
from ...utils.torch_utils import randn_tensor
|
35
42
|
from ..pipeline_utils import DiffusionPipeline, StableDiffusionMixin
|
36
43
|
from . import StableDiffusionPipelineOutput
|
37
44
|
|
38
45
|
|
46
|
+
if is_torch_xla_available():
|
47
|
+
import torch_xla.core.xla_model as xm
|
48
|
+
|
49
|
+
XLA_AVAILABLE = True
|
50
|
+
else:
|
51
|
+
XLA_AVAILABLE = False
|
52
|
+
|
39
53
|
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
40
54
|
|
41
55
|
|
@@ -149,7 +163,7 @@ class StableDiffusionUpscalePipeline(
|
|
149
163
|
watermarker=watermarker,
|
150
164
|
feature_extractor=feature_extractor,
|
151
165
|
)
|
152
|
-
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
166
|
+
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
|
153
167
|
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor, resample="bicubic")
|
154
168
|
self.register_to_config(max_noise_level=max_noise_level)
|
155
169
|
|
@@ -726,7 +740,7 @@ class StableDiffusionUpscalePipeline(
|
|
726
740
|
f"Incorrect configuration settings! The config of `pipeline.unet`: {self.unet.config} expects"
|
727
741
|
f" {self.unet.config.in_channels} but received `num_channels_latents`: {num_channels_latents} +"
|
728
742
|
f" `num_channels_image`: {num_channels_image} "
|
729
|
-
f" = {num_channels_latents+num_channels_image}. Please verify the config of"
|
743
|
+
f" = {num_channels_latents + num_channels_image}. Please verify the config of"
|
730
744
|
" `pipeline.unet` or your `image` input."
|
731
745
|
)
|
732
746
|
|
@@ -769,6 +783,9 @@ class StableDiffusionUpscalePipeline(
|
|
769
783
|
step_idx = i // getattr(self.scheduler, "order", 1)
|
770
784
|
callback(step_idx, t, latents)
|
771
785
|
|
786
|
+
if XLA_AVAILABLE:
|
787
|
+
xm.mark_step()
|
788
|
+
|
772
789
|
if not output_type == "latent":
|
773
790
|
# make sure the VAE is in float32 mode, as it overflows in float16
|
774
791
|
needs_upcasting = self.vae.dtype == torch.float16 and self.vae.config.force_upcast
|
@@ -28,6 +28,7 @@ from ...schedulers import KarrasDiffusionSchedulers
|
|
28
28
|
from ...utils import (
|
29
29
|
USE_PEFT_BACKEND,
|
30
30
|
deprecate,
|
31
|
+
is_torch_xla_available,
|
31
32
|
logging,
|
32
33
|
replace_example_docstring,
|
33
34
|
scale_lora_layers,
|
@@ -38,8 +39,16 @@ from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput, StableDiffu
|
|
38
39
|
from .stable_unclip_image_normalizer import StableUnCLIPImageNormalizer
|
39
40
|
|
40
41
|
|
42
|
+
if is_torch_xla_available():
|
43
|
+
import torch_xla.core.xla_model as xm
|
44
|
+
|
45
|
+
XLA_AVAILABLE = True
|
46
|
+
else:
|
47
|
+
XLA_AVAILABLE = False
|
48
|
+
|
41
49
|
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
42
50
|
|
51
|
+
|
43
52
|
EXAMPLE_DOC_STRING = """
|
44
53
|
Examples:
|
45
54
|
```py
|
@@ -132,7 +141,7 @@ class StableUnCLIPPipeline(
|
|
132
141
|
image_noising_scheduler: KarrasDiffusionSchedulers,
|
133
142
|
# regular denoising components
|
134
143
|
tokenizer: CLIPTokenizer,
|
135
|
-
text_encoder:
|
144
|
+
text_encoder: CLIPTextModel,
|
136
145
|
unet: UNet2DConditionModel,
|
137
146
|
scheduler: KarrasDiffusionSchedulers,
|
138
147
|
# vae
|
@@ -154,7 +163,7 @@ class StableUnCLIPPipeline(
|
|
154
163
|
vae=vae,
|
155
164
|
)
|
156
165
|
|
157
|
-
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
166
|
+
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
|
158
167
|
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
|
159
168
|
|
160
169
|
# Copied from diffusers.pipelines.unclip.pipeline_unclip.UnCLIPPipeline._encode_prompt with _encode_prompt->_encode_prior_prompt, tokenizer->prior_tokenizer, text_encoder->prior_text_encoder
|
@@ -924,6 +933,9 @@ class StableUnCLIPPipeline(
|
|
924
933
|
step_idx = i // getattr(self.scheduler, "order", 1)
|
925
934
|
callback(step_idx, t, latents)
|
926
935
|
|
936
|
+
if XLA_AVAILABLE:
|
937
|
+
xm.mark_step()
|
938
|
+
|
927
939
|
if not output_type == "latent":
|
928
940
|
image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False)[0]
|
929
941
|
else:
|