diffusers 0.32.1__py3-none-any.whl → 0.33.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diffusers/__init__.py +186 -3
- diffusers/configuration_utils.py +40 -12
- diffusers/dependency_versions_table.py +9 -2
- diffusers/hooks/__init__.py +9 -0
- diffusers/hooks/faster_cache.py +653 -0
- diffusers/hooks/group_offloading.py +793 -0
- diffusers/hooks/hooks.py +236 -0
- diffusers/hooks/layerwise_casting.py +245 -0
- diffusers/hooks/pyramid_attention_broadcast.py +311 -0
- diffusers/loaders/__init__.py +6 -0
- diffusers/loaders/ip_adapter.py +38 -30
- diffusers/loaders/lora_base.py +198 -28
- diffusers/loaders/lora_conversion_utils.py +679 -44
- diffusers/loaders/lora_pipeline.py +1963 -801
- diffusers/loaders/peft.py +169 -84
- diffusers/loaders/single_file.py +17 -2
- diffusers/loaders/single_file_model.py +53 -5
- diffusers/loaders/single_file_utils.py +653 -75
- diffusers/loaders/textual_inversion.py +9 -9
- diffusers/loaders/transformer_flux.py +8 -9
- diffusers/loaders/transformer_sd3.py +120 -39
- diffusers/loaders/unet.py +22 -32
- diffusers/models/__init__.py +22 -0
- diffusers/models/activations.py +9 -9
- diffusers/models/attention.py +0 -1
- diffusers/models/attention_processor.py +163 -25
- diffusers/models/auto_model.py +169 -0
- diffusers/models/autoencoders/__init__.py +2 -0
- diffusers/models/autoencoders/autoencoder_asym_kl.py +2 -0
- diffusers/models/autoencoders/autoencoder_dc.py +106 -4
- diffusers/models/autoencoders/autoencoder_kl.py +0 -4
- diffusers/models/autoencoders/autoencoder_kl_allegro.py +5 -23
- diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +17 -55
- diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py +17 -97
- diffusers/models/autoencoders/autoencoder_kl_ltx.py +326 -107
- diffusers/models/autoencoders/autoencoder_kl_magvit.py +1094 -0
- diffusers/models/autoencoders/autoencoder_kl_mochi.py +21 -56
- diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +11 -42
- diffusers/models/autoencoders/autoencoder_kl_wan.py +855 -0
- diffusers/models/autoencoders/autoencoder_oobleck.py +1 -0
- diffusers/models/autoencoders/autoencoder_tiny.py +0 -4
- diffusers/models/autoencoders/consistency_decoder_vae.py +3 -1
- diffusers/models/autoencoders/vae.py +31 -141
- diffusers/models/autoencoders/vq_model.py +3 -0
- diffusers/models/cache_utils.py +108 -0
- diffusers/models/controlnets/__init__.py +1 -0
- diffusers/models/controlnets/controlnet.py +3 -8
- diffusers/models/controlnets/controlnet_flux.py +14 -42
- diffusers/models/controlnets/controlnet_sd3.py +58 -34
- diffusers/models/controlnets/controlnet_sparsectrl.py +4 -7
- diffusers/models/controlnets/controlnet_union.py +27 -18
- diffusers/models/controlnets/controlnet_xs.py +7 -46
- diffusers/models/controlnets/multicontrolnet_union.py +196 -0
- diffusers/models/embeddings.py +18 -7
- diffusers/models/model_loading_utils.py +122 -80
- diffusers/models/modeling_flax_pytorch_utils.py +1 -1
- diffusers/models/modeling_flax_utils.py +1 -1
- diffusers/models/modeling_pytorch_flax_utils.py +1 -1
- diffusers/models/modeling_utils.py +617 -272
- diffusers/models/normalization.py +67 -14
- diffusers/models/resnet.py +1 -1
- diffusers/models/transformers/__init__.py +6 -0
- diffusers/models/transformers/auraflow_transformer_2d.py +9 -35
- diffusers/models/transformers/cogvideox_transformer_3d.py +13 -24
- diffusers/models/transformers/consisid_transformer_3d.py +789 -0
- diffusers/models/transformers/dit_transformer_2d.py +5 -19
- diffusers/models/transformers/hunyuan_transformer_2d.py +4 -3
- diffusers/models/transformers/latte_transformer_3d.py +20 -15
- diffusers/models/transformers/lumina_nextdit2d.py +3 -1
- diffusers/models/transformers/pixart_transformer_2d.py +4 -19
- diffusers/models/transformers/prior_transformer.py +5 -1
- diffusers/models/transformers/sana_transformer.py +144 -40
- diffusers/models/transformers/stable_audio_transformer.py +5 -20
- diffusers/models/transformers/transformer_2d.py +7 -22
- diffusers/models/transformers/transformer_allegro.py +9 -17
- diffusers/models/transformers/transformer_cogview3plus.py +6 -17
- diffusers/models/transformers/transformer_cogview4.py +462 -0
- diffusers/models/transformers/transformer_easyanimate.py +527 -0
- diffusers/models/transformers/transformer_flux.py +68 -110
- diffusers/models/transformers/transformer_hunyuan_video.py +409 -49
- diffusers/models/transformers/transformer_ltx.py +53 -35
- diffusers/models/transformers/transformer_lumina2.py +548 -0
- diffusers/models/transformers/transformer_mochi.py +6 -17
- diffusers/models/transformers/transformer_omnigen.py +469 -0
- diffusers/models/transformers/transformer_sd3.py +56 -86
- diffusers/models/transformers/transformer_temporal.py +5 -11
- diffusers/models/transformers/transformer_wan.py +469 -0
- diffusers/models/unets/unet_1d.py +3 -1
- diffusers/models/unets/unet_2d.py +21 -20
- diffusers/models/unets/unet_2d_blocks.py +19 -243
- diffusers/models/unets/unet_2d_condition.py +4 -6
- diffusers/models/unets/unet_3d_blocks.py +14 -127
- diffusers/models/unets/unet_3d_condition.py +8 -12
- diffusers/models/unets/unet_i2vgen_xl.py +5 -13
- diffusers/models/unets/unet_kandinsky3.py +0 -4
- diffusers/models/unets/unet_motion_model.py +20 -114
- diffusers/models/unets/unet_spatio_temporal_condition.py +7 -8
- diffusers/models/unets/unet_stable_cascade.py +8 -35
- diffusers/models/unets/uvit_2d.py +1 -4
- diffusers/optimization.py +2 -2
- diffusers/pipelines/__init__.py +57 -8
- diffusers/pipelines/allegro/pipeline_allegro.py +22 -2
- diffusers/pipelines/amused/pipeline_amused.py +15 -2
- diffusers/pipelines/amused/pipeline_amused_img2img.py +15 -2
- diffusers/pipelines/amused/pipeline_amused_inpaint.py +15 -2
- diffusers/pipelines/animatediff/pipeline_animatediff.py +15 -2
- diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +15 -3
- diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +24 -4
- diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +15 -2
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +16 -4
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +16 -4
- diffusers/pipelines/audioldm/pipeline_audioldm.py +13 -2
- diffusers/pipelines/audioldm2/modeling_audioldm2.py +13 -68
- diffusers/pipelines/audioldm2/pipeline_audioldm2.py +39 -9
- diffusers/pipelines/aura_flow/pipeline_aura_flow.py +63 -7
- diffusers/pipelines/auto_pipeline.py +35 -14
- diffusers/pipelines/blip_diffusion/blip_image_processing.py +1 -1
- diffusers/pipelines/blip_diffusion/modeling_blip2.py +5 -8
- diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +12 -0
- diffusers/pipelines/cogvideo/pipeline_cogvideox.py +22 -6
- diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +22 -6
- diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +22 -5
- diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +22 -6
- diffusers/pipelines/cogview3/pipeline_cogview3plus.py +12 -4
- diffusers/pipelines/cogview4/__init__.py +49 -0
- diffusers/pipelines/cogview4/pipeline_cogview4.py +684 -0
- diffusers/pipelines/cogview4/pipeline_cogview4_control.py +732 -0
- diffusers/pipelines/cogview4/pipeline_output.py +21 -0
- diffusers/pipelines/consisid/__init__.py +49 -0
- diffusers/pipelines/consisid/consisid_utils.py +357 -0
- diffusers/pipelines/consisid/pipeline_consisid.py +974 -0
- diffusers/pipelines/consisid/pipeline_output.py +20 -0
- diffusers/pipelines/consistency_models/pipeline_consistency_models.py +11 -0
- diffusers/pipelines/controlnet/pipeline_controlnet.py +6 -5
- diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +13 -0
- diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +17 -5
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +31 -12
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +26 -7
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +20 -3
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +22 -3
- diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +26 -25
- diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +224 -109
- diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +25 -29
- diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +7 -4
- diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +3 -5
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +121 -10
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +122 -11
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +12 -1
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +20 -3
- diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py +14 -2
- diffusers/pipelines/ddim/pipeline_ddim.py +14 -1
- diffusers/pipelines/ddpm/pipeline_ddpm.py +15 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if.py +12 -0
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +12 -0
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +14 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +12 -0
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +14 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +14 -1
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +11 -7
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +11 -7
- diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +1 -1
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +10 -6
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_onnx_stable_diffusion_inpaint_legacy.py +2 -2
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +11 -7
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +1 -1
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +1 -1
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +1 -1
- diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +10 -105
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +1 -1
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +1 -1
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +1 -1
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +1 -1
- diffusers/pipelines/dit/pipeline_dit.py +15 -2
- diffusers/pipelines/easyanimate/__init__.py +52 -0
- diffusers/pipelines/easyanimate/pipeline_easyanimate.py +770 -0
- diffusers/pipelines/easyanimate/pipeline_easyanimate_control.py +994 -0
- diffusers/pipelines/easyanimate/pipeline_easyanimate_inpaint.py +1234 -0
- diffusers/pipelines/easyanimate/pipeline_output.py +20 -0
- diffusers/pipelines/flux/pipeline_flux.py +53 -21
- diffusers/pipelines/flux/pipeline_flux_control.py +9 -12
- diffusers/pipelines/flux/pipeline_flux_control_img2img.py +6 -10
- diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +8 -10
- diffusers/pipelines/flux/pipeline_flux_controlnet.py +185 -13
- diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +8 -10
- diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +16 -16
- diffusers/pipelines/flux/pipeline_flux_fill.py +107 -39
- diffusers/pipelines/flux/pipeline_flux_img2img.py +193 -15
- diffusers/pipelines/flux/pipeline_flux_inpaint.py +199 -19
- diffusers/pipelines/free_noise_utils.py +3 -3
- diffusers/pipelines/hunyuan_video/__init__.py +4 -0
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_skyreels_image2video.py +804 -0
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +90 -23
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_image2video.py +924 -0
- diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +3 -5
- diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +13 -1
- diffusers/pipelines/kandinsky/pipeline_kandinsky.py +12 -0
- diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +1 -1
- diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +12 -0
- diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +13 -1
- diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +12 -0
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +12 -1
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +13 -0
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +12 -0
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +12 -1
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +12 -1
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +12 -0
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +12 -0
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +12 -0
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +12 -0
- diffusers/pipelines/kolors/pipeline_kolors.py +10 -8
- diffusers/pipelines/kolors/pipeline_kolors_img2img.py +6 -4
- diffusers/pipelines/kolors/text_encoder.py +7 -34
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +12 -1
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +13 -1
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +14 -13
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion_superresolution.py +12 -1
- diffusers/pipelines/latte/pipeline_latte.py +36 -7
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +67 -13
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +60 -15
- diffusers/pipelines/ltx/__init__.py +2 -0
- diffusers/pipelines/ltx/pipeline_ltx.py +25 -13
- diffusers/pipelines/ltx/pipeline_ltx_condition.py +1194 -0
- diffusers/pipelines/ltx/pipeline_ltx_image2video.py +31 -17
- diffusers/pipelines/lumina/__init__.py +2 -2
- diffusers/pipelines/lumina/pipeline_lumina.py +83 -20
- diffusers/pipelines/lumina2/__init__.py +48 -0
- diffusers/pipelines/lumina2/pipeline_lumina2.py +790 -0
- diffusers/pipelines/marigold/__init__.py +2 -0
- diffusers/pipelines/marigold/marigold_image_processing.py +127 -14
- diffusers/pipelines/marigold/pipeline_marigold_depth.py +31 -16
- diffusers/pipelines/marigold/pipeline_marigold_intrinsics.py +721 -0
- diffusers/pipelines/marigold/pipeline_marigold_normals.py +31 -16
- diffusers/pipelines/mochi/pipeline_mochi.py +14 -18
- diffusers/pipelines/musicldm/pipeline_musicldm.py +16 -1
- diffusers/pipelines/omnigen/__init__.py +50 -0
- diffusers/pipelines/omnigen/pipeline_omnigen.py +512 -0
- diffusers/pipelines/omnigen/processor_omnigen.py +327 -0
- diffusers/pipelines/onnx_utils.py +5 -3
- diffusers/pipelines/pag/pag_utils.py +1 -1
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +12 -1
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +15 -4
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +20 -3
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +20 -3
- diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +1 -3
- diffusers/pipelines/pag/pipeline_pag_kolors.py +6 -4
- diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +16 -3
- diffusers/pipelines/pag/pipeline_pag_sana.py +65 -8
- diffusers/pipelines/pag/pipeline_pag_sd.py +23 -7
- diffusers/pipelines/pag/pipeline_pag_sd_3.py +3 -5
- diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py +3 -5
- diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +13 -1
- diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +23 -7
- diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py +26 -10
- diffusers/pipelines/pag/pipeline_pag_sd_xl.py +12 -4
- diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +7 -3
- diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +10 -6
- diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +13 -3
- diffusers/pipelines/pia/pipeline_pia.py +13 -1
- diffusers/pipelines/pipeline_flax_utils.py +7 -7
- diffusers/pipelines/pipeline_loading_utils.py +193 -83
- diffusers/pipelines/pipeline_utils.py +221 -106
- diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +17 -5
- diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +17 -4
- diffusers/pipelines/sana/__init__.py +2 -0
- diffusers/pipelines/sana/pipeline_sana.py +183 -58
- diffusers/pipelines/sana/pipeline_sana_sprint.py +889 -0
- diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +12 -2
- diffusers/pipelines/shap_e/pipeline_shap_e.py +12 -0
- diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +12 -0
- diffusers/pipelines/shap_e/renderer.py +6 -6
- diffusers/pipelines/stable_audio/pipeline_stable_audio.py +1 -1
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +15 -4
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +12 -8
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +12 -1
- diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +3 -2
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +14 -10
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_img2img.py +3 -3
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_inpaint.py +14 -10
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +2 -2
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py +4 -3
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +5 -4
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +2 -2
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +18 -13
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +30 -8
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +24 -10
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +28 -12
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +39 -18
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +17 -6
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +13 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +20 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +14 -2
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +13 -1
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +16 -17
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +136 -18
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +150 -21
- diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +15 -3
- diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +26 -11
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +15 -3
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +22 -4
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +30 -13
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +12 -4
- diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +15 -3
- diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +15 -3
- diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +26 -12
- diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +16 -4
- diffusers/pipelines/stable_diffusion_xl/pipeline_flax_stable_diffusion_xl.py +1 -1
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +12 -4
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +7 -3
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +10 -6
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +11 -4
- diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +13 -2
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +18 -4
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +26 -5
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +13 -1
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +13 -1
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +28 -6
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +26 -4
- diffusers/pipelines/transformers_loading_utils.py +121 -0
- diffusers/pipelines/unclip/pipeline_unclip.py +11 -1
- diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +11 -1
- diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +19 -2
- diffusers/pipelines/wan/__init__.py +51 -0
- diffusers/pipelines/wan/pipeline_output.py +20 -0
- diffusers/pipelines/wan/pipeline_wan.py +593 -0
- diffusers/pipelines/wan/pipeline_wan_i2v.py +722 -0
- diffusers/pipelines/wan/pipeline_wan_video2video.py +725 -0
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +7 -31
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +12 -1
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +12 -1
- diffusers/quantizers/auto.py +5 -1
- diffusers/quantizers/base.py +5 -9
- diffusers/quantizers/bitsandbytes/bnb_quantizer.py +41 -29
- diffusers/quantizers/bitsandbytes/utils.py +30 -20
- diffusers/quantizers/gguf/gguf_quantizer.py +1 -0
- diffusers/quantizers/gguf/utils.py +4 -2
- diffusers/quantizers/quantization_config.py +59 -4
- diffusers/quantizers/quanto/__init__.py +1 -0
- diffusers/quantizers/quanto/quanto_quantizer.py +177 -0
- diffusers/quantizers/quanto/utils.py +60 -0
- diffusers/quantizers/torchao/__init__.py +1 -1
- diffusers/quantizers/torchao/torchao_quantizer.py +47 -2
- diffusers/schedulers/__init__.py +2 -1
- diffusers/schedulers/scheduling_consistency_models.py +1 -2
- diffusers/schedulers/scheduling_ddim_inverse.py +1 -1
- diffusers/schedulers/scheduling_ddpm.py +2 -3
- diffusers/schedulers/scheduling_ddpm_parallel.py +1 -2
- diffusers/schedulers/scheduling_dpmsolver_multistep.py +12 -4
- diffusers/schedulers/scheduling_edm_euler.py +45 -10
- diffusers/schedulers/scheduling_flow_match_euler_discrete.py +116 -28
- diffusers/schedulers/scheduling_flow_match_heun_discrete.py +7 -6
- diffusers/schedulers/scheduling_heun_discrete.py +1 -1
- diffusers/schedulers/scheduling_lcm.py +1 -2
- diffusers/schedulers/scheduling_lms_discrete.py +1 -1
- diffusers/schedulers/scheduling_repaint.py +5 -1
- diffusers/schedulers/scheduling_scm.py +265 -0
- diffusers/schedulers/scheduling_tcd.py +1 -2
- diffusers/schedulers/scheduling_utils.py +2 -1
- diffusers/training_utils.py +14 -7
- diffusers/utils/__init__.py +10 -2
- diffusers/utils/constants.py +13 -1
- diffusers/utils/deprecation_utils.py +1 -1
- diffusers/utils/dummy_bitsandbytes_objects.py +17 -0
- diffusers/utils/dummy_gguf_objects.py +17 -0
- diffusers/utils/dummy_optimum_quanto_objects.py +17 -0
- diffusers/utils/dummy_pt_objects.py +233 -0
- diffusers/utils/dummy_torch_and_transformers_and_opencv_objects.py +17 -0
- diffusers/utils/dummy_torch_and_transformers_objects.py +270 -0
- diffusers/utils/dummy_torchao_objects.py +17 -0
- diffusers/utils/dynamic_modules_utils.py +1 -1
- diffusers/utils/export_utils.py +28 -3
- diffusers/utils/hub_utils.py +52 -102
- diffusers/utils/import_utils.py +121 -221
- diffusers/utils/loading_utils.py +14 -1
- diffusers/utils/logging.py +1 -2
- diffusers/utils/peft_utils.py +6 -14
- diffusers/utils/remote_utils.py +425 -0
- diffusers/utils/source_code_parsing_utils.py +52 -0
- diffusers/utils/state_dict_utils.py +15 -1
- diffusers/utils/testing_utils.py +243 -13
- diffusers/utils/torch_utils.py +10 -0
- diffusers/utils/typing_utils.py +91 -0
- diffusers/video_processor.py +1 -1
- {diffusers-0.32.1.dist-info → diffusers-0.33.0.dist-info}/METADATA +76 -44
- diffusers-0.33.0.dist-info/RECORD +608 -0
- {diffusers-0.32.1.dist-info → diffusers-0.33.0.dist-info}/WHEEL +1 -1
- diffusers-0.32.1.dist-info/RECORD +0 -550
- {diffusers-0.32.1.dist-info → diffusers-0.33.0.dist-info}/LICENSE +0 -0
- {diffusers-0.32.1.dist-info → diffusers-0.33.0.dist-info}/entry_points.txt +0 -0
- {diffusers-0.32.1.dist-info → diffusers-0.33.0.dist-info}/top_level.txt +0 -0
@@ -23,6 +23,7 @@ except OptionalDependencyNotAvailable:
|
|
23
23
|
else:
|
24
24
|
_import_structure["marigold_image_processing"] = ["MarigoldImageProcessor"]
|
25
25
|
_import_structure["pipeline_marigold_depth"] = ["MarigoldDepthOutput", "MarigoldDepthPipeline"]
|
26
|
+
_import_structure["pipeline_marigold_intrinsics"] = ["MarigoldIntrinsicsOutput", "MarigoldIntrinsicsPipeline"]
|
26
27
|
_import_structure["pipeline_marigold_normals"] = ["MarigoldNormalsOutput", "MarigoldNormalsPipeline"]
|
27
28
|
|
28
29
|
if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
@@ -35,6 +36,7 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|
35
36
|
else:
|
36
37
|
from .marigold_image_processing import MarigoldImageProcessor
|
37
38
|
from .pipeline_marigold_depth import MarigoldDepthOutput, MarigoldDepthPipeline
|
39
|
+
from .pipeline_marigold_intrinsics import MarigoldIntrinsicsOutput, MarigoldIntrinsicsPipeline
|
38
40
|
from .pipeline_marigold_normals import MarigoldNormalsOutput, MarigoldNormalsPipeline
|
39
41
|
|
40
42
|
else:
|
@@ -1,4 +1,22 @@
|
|
1
|
-
|
1
|
+
# Copyright 2023-2025 Marigold Team, ETH Zürich. All rights reserved.
|
2
|
+
# Copyright 2024-2025 The HuggingFace Team. All rights reserved.
|
3
|
+
#
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
# you may not use this file except in compliance with the License.
|
6
|
+
# You may obtain a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
# See the License for the specific language governing permissions and
|
14
|
+
# limitations under the License.
|
15
|
+
# --------------------------------------------------------------------------
|
16
|
+
# More information and citation instructions are available on the
|
17
|
+
# Marigold project website: https://marigoldcomputervision.github.io
|
18
|
+
# --------------------------------------------------------------------------
|
19
|
+
from typing import Any, Dict, List, Optional, Tuple, Union
|
2
20
|
|
3
21
|
import numpy as np
|
4
22
|
import PIL
|
@@ -379,7 +397,7 @@ class MarigoldImageProcessor(ConfigMixin):
|
|
379
397
|
val_min: float = 0.0,
|
380
398
|
val_max: float = 1.0,
|
381
399
|
color_map: str = "Spectral",
|
382
|
-
) ->
|
400
|
+
) -> List[PIL.Image.Image]:
|
383
401
|
"""
|
384
402
|
Visualizes depth maps, such as predictions of the `MarigoldDepthPipeline`.
|
385
403
|
|
@@ -391,7 +409,7 @@ class MarigoldImageProcessor(ConfigMixin):
|
|
391
409
|
color_map (`str`, *optional*, defaults to `"Spectral"`): Color map used to convert a single-channel
|
392
410
|
depth prediction into colored representation.
|
393
411
|
|
394
|
-
Returns: `
|
412
|
+
Returns: `List[PIL.Image.Image]` with depth maps visualization.
|
395
413
|
"""
|
396
414
|
if val_max <= val_min:
|
397
415
|
raise ValueError(f"Invalid values range: [{val_min}, {val_max}].")
|
@@ -436,7 +454,7 @@ class MarigoldImageProcessor(ConfigMixin):
|
|
436
454
|
depth: Union[np.ndarray, torch.Tensor, List[np.ndarray], List[torch.Tensor]],
|
437
455
|
val_min: float = 0.0,
|
438
456
|
val_max: float = 1.0,
|
439
|
-
) ->
|
457
|
+
) -> List[PIL.Image.Image]:
|
440
458
|
def export_depth_to_16bit_png_one(img, idx=None):
|
441
459
|
prefix = "Depth" + (f"[{idx}]" if idx else "")
|
442
460
|
if not isinstance(img, np.ndarray) and not torch.is_tensor(img):
|
@@ -478,7 +496,7 @@ class MarigoldImageProcessor(ConfigMixin):
|
|
478
496
|
flip_x: bool = False,
|
479
497
|
flip_y: bool = False,
|
480
498
|
flip_z: bool = False,
|
481
|
-
) ->
|
499
|
+
) -> List[PIL.Image.Image]:
|
482
500
|
"""
|
483
501
|
Visualizes surface normals, such as predictions of the `MarigoldNormalsPipeline`.
|
484
502
|
|
@@ -492,7 +510,7 @@ class MarigoldImageProcessor(ConfigMixin):
|
|
492
510
|
flip_z (`bool`, *optional*, defaults to `False`): Flips the Z axis of the normals frame of reference.
|
493
511
|
Default direction is facing the observer.
|
494
512
|
|
495
|
-
Returns: `
|
513
|
+
Returns: `List[PIL.Image.Image]` with surface normals visualization.
|
496
514
|
"""
|
497
515
|
flip_vec = None
|
498
516
|
if any((flip_x, flip_y, flip_z)):
|
@@ -528,6 +546,99 @@ class MarigoldImageProcessor(ConfigMixin):
|
|
528
546
|
else:
|
529
547
|
raise ValueError(f"Unexpected input type: {type(normals)}")
|
530
548
|
|
549
|
+
@staticmethod
|
550
|
+
def visualize_intrinsics(
|
551
|
+
prediction: Union[
|
552
|
+
np.ndarray,
|
553
|
+
torch.Tensor,
|
554
|
+
List[np.ndarray],
|
555
|
+
List[torch.Tensor],
|
556
|
+
],
|
557
|
+
target_properties: Dict[str, Any],
|
558
|
+
color_map: Union[str, Dict[str, str]] = "binary",
|
559
|
+
) -> List[Dict[str, PIL.Image.Image]]:
|
560
|
+
"""
|
561
|
+
Visualizes intrinsic image decomposition, such as predictions of the `MarigoldIntrinsicsPipeline`.
|
562
|
+
|
563
|
+
Args:
|
564
|
+
prediction (`Union[np.ndarray, torch.Tensor, List[np.ndarray], List[torch.Tensor]]`):
|
565
|
+
Intrinsic image decomposition.
|
566
|
+
target_properties (`Dict[str, Any]`):
|
567
|
+
Decomposition properties. Expected entries: `target_names: List[str]` and a dictionary with keys
|
568
|
+
`prediction_space: str`, `sub_target_names: List[Union[str, Null]]` (must have 3 entries, null for
|
569
|
+
missing modalities), `up_to_scale: bool`, one for each target and sub-target.
|
570
|
+
color_map (`Union[str, Dict[str, str]]`, *optional*, defaults to `"Spectral"`):
|
571
|
+
Color map used to convert a single-channel predictions into colored representations. When a dictionary
|
572
|
+
is passed, each modality can be colored with its own color map.
|
573
|
+
|
574
|
+
Returns: `List[Dict[str, PIL.Image.Image]]` with intrinsic image decomposition visualization.
|
575
|
+
"""
|
576
|
+
if "target_names" not in target_properties:
|
577
|
+
raise ValueError("Missing `target_names` in target_properties")
|
578
|
+
if not isinstance(color_map, str) and not (
|
579
|
+
isinstance(color_map, dict)
|
580
|
+
and all(isinstance(k, str) and isinstance(v, str) for k, v in color_map.items())
|
581
|
+
):
|
582
|
+
raise ValueError("`color_map` must be a string or a dictionary of strings")
|
583
|
+
n_targets = len(target_properties["target_names"])
|
584
|
+
|
585
|
+
def visualize_targets_one(images, idx=None):
|
586
|
+
# img: [T, 3, H, W]
|
587
|
+
out = {}
|
588
|
+
for target_name, img in zip(target_properties["target_names"], images):
|
589
|
+
img = img.permute(1, 2, 0) # [H, W, 3]
|
590
|
+
prediction_space = target_properties[target_name].get("prediction_space", "srgb")
|
591
|
+
if prediction_space == "stack":
|
592
|
+
sub_target_names = target_properties[target_name]["sub_target_names"]
|
593
|
+
if len(sub_target_names) != 3 or any(
|
594
|
+
not (isinstance(s, str) or s is None) for s in sub_target_names
|
595
|
+
):
|
596
|
+
raise ValueError(f"Unexpected target sub-names {sub_target_names} in {target_name}")
|
597
|
+
for i, sub_target_name in enumerate(sub_target_names):
|
598
|
+
if sub_target_name is None:
|
599
|
+
continue
|
600
|
+
sub_img = img[:, :, i]
|
601
|
+
sub_prediction_space = target_properties[sub_target_name].get("prediction_space", "srgb")
|
602
|
+
if sub_prediction_space == "linear":
|
603
|
+
sub_up_to_scale = target_properties[sub_target_name].get("up_to_scale", False)
|
604
|
+
if sub_up_to_scale:
|
605
|
+
sub_img = sub_img / max(sub_img.max().item(), 1e-6)
|
606
|
+
sub_img = sub_img ** (1 / 2.2)
|
607
|
+
cmap_name = (
|
608
|
+
color_map if isinstance(color_map, str) else color_map.get(sub_target_name, "binary")
|
609
|
+
)
|
610
|
+
sub_img = MarigoldImageProcessor.colormap(sub_img, cmap=cmap_name, bytes=True)
|
611
|
+
sub_img = PIL.Image.fromarray(sub_img.cpu().numpy())
|
612
|
+
out[sub_target_name] = sub_img
|
613
|
+
elif prediction_space == "linear":
|
614
|
+
up_to_scale = target_properties[target_name].get("up_to_scale", False)
|
615
|
+
if up_to_scale:
|
616
|
+
img = img / max(img.max().item(), 1e-6)
|
617
|
+
img = img ** (1 / 2.2)
|
618
|
+
elif prediction_space == "srgb":
|
619
|
+
pass
|
620
|
+
img = (img * 255).to(dtype=torch.uint8, device="cpu").numpy()
|
621
|
+
img = PIL.Image.fromarray(img)
|
622
|
+
out[target_name] = img
|
623
|
+
return out
|
624
|
+
|
625
|
+
if prediction is None or isinstance(prediction, list) and any(o is None for o in prediction):
|
626
|
+
raise ValueError("Input prediction is `None`")
|
627
|
+
if isinstance(prediction, (np.ndarray, torch.Tensor)):
|
628
|
+
prediction = MarigoldImageProcessor.expand_tensor_or_array(prediction)
|
629
|
+
if isinstance(prediction, np.ndarray):
|
630
|
+
prediction = MarigoldImageProcessor.numpy_to_pt(prediction) # [N*T,3,H,W]
|
631
|
+
if not (prediction.ndim == 4 and prediction.shape[1] == 3 and prediction.shape[0] % n_targets == 0):
|
632
|
+
raise ValueError(f"Unexpected input shape={prediction.shape}, expecting [N*T,3,H,W].")
|
633
|
+
N_T, _, H, W = prediction.shape
|
634
|
+
N = N_T // n_targets
|
635
|
+
prediction = prediction.reshape(N, n_targets, 3, H, W)
|
636
|
+
return [visualize_targets_one(img, idx) for idx, img in enumerate(prediction)]
|
637
|
+
elif isinstance(prediction, list):
|
638
|
+
return [visualize_targets_one(img, idx) for idx, img in enumerate(prediction)]
|
639
|
+
else:
|
640
|
+
raise ValueError(f"Unexpected input type: {type(prediction)}")
|
641
|
+
|
531
642
|
@staticmethod
|
532
643
|
def visualize_uncertainty(
|
533
644
|
uncertainty: Union[
|
@@ -537,9 +648,10 @@ class MarigoldImageProcessor(ConfigMixin):
|
|
537
648
|
List[torch.Tensor],
|
538
649
|
],
|
539
650
|
saturation_percentile=95,
|
540
|
-
) ->
|
651
|
+
) -> List[PIL.Image.Image]:
|
541
652
|
"""
|
542
|
-
Visualizes dense uncertainties, such as produced by `MarigoldDepthPipeline` or
|
653
|
+
Visualizes dense uncertainties, such as produced by `MarigoldDepthPipeline`, `MarigoldNormalsPipeline`, or
|
654
|
+
`MarigoldIntrinsicsPipeline`.
|
543
655
|
|
544
656
|
Args:
|
545
657
|
uncertainty (`Union[np.ndarray, torch.Tensor, List[np.ndarray], List[torch.Tensor]]`):
|
@@ -547,14 +659,15 @@ class MarigoldImageProcessor(ConfigMixin):
|
|
547
659
|
saturation_percentile (`int`, *optional*, defaults to `95`):
|
548
660
|
Specifies the percentile uncertainty value visualized with maximum intensity.
|
549
661
|
|
550
|
-
Returns: `
|
662
|
+
Returns: `List[PIL.Image.Image]` with uncertainty visualization.
|
551
663
|
"""
|
552
664
|
|
553
665
|
def visualize_uncertainty_one(img, idx=None):
|
554
666
|
prefix = "Uncertainty" + (f"[{idx}]" if idx else "")
|
555
667
|
if img.min() < 0:
|
556
|
-
raise ValueError(f"{prefix}:
|
557
|
-
img = img.
|
668
|
+
raise ValueError(f"{prefix}: unexpected data range, min={img.min()}.")
|
669
|
+
img = img.permute(1, 2, 0) # [H,W,C]
|
670
|
+
img = img.squeeze(2).cpu().numpy() # [H,W] or [H,W,3]
|
558
671
|
saturation_value = np.percentile(img, saturation_percentile)
|
559
672
|
img = np.clip(img * 255 / saturation_value, 0, 255)
|
560
673
|
img = img.astype(np.uint8)
|
@@ -566,9 +679,9 @@ class MarigoldImageProcessor(ConfigMixin):
|
|
566
679
|
if isinstance(uncertainty, (np.ndarray, torch.Tensor)):
|
567
680
|
uncertainty = MarigoldImageProcessor.expand_tensor_or_array(uncertainty)
|
568
681
|
if isinstance(uncertainty, np.ndarray):
|
569
|
-
uncertainty = MarigoldImageProcessor.numpy_to_pt(uncertainty) # [N,
|
570
|
-
if not (uncertainty.ndim == 4 and uncertainty.shape[1]
|
571
|
-
raise ValueError(f"Unexpected input shape={uncertainty.shape}, expecting [N,
|
682
|
+
uncertainty = MarigoldImageProcessor.numpy_to_pt(uncertainty) # [N,C,H,W]
|
683
|
+
if not (uncertainty.ndim == 4 and uncertainty.shape[1] in (1, 3)):
|
684
|
+
raise ValueError(f"Unexpected input shape={uncertainty.shape}, expecting [N,C,H,W] with C in (1,3).")
|
572
685
|
return [visualize_uncertainty_one(img, idx) for idx, img in enumerate(uncertainty)]
|
573
686
|
elif isinstance(uncertainty, list):
|
574
687
|
return [visualize_uncertainty_one(img, idx) for idx, img in enumerate(uncertainty)]
|
@@ -1,5 +1,5 @@
|
|
1
|
-
# Copyright
|
2
|
-
# Copyright 2024 The HuggingFace Team. All rights reserved.
|
1
|
+
# Copyright 2023-2025 Marigold Team, ETH Zürich. All rights reserved.
|
2
|
+
# Copyright 2024-2025 The HuggingFace Team. All rights reserved.
|
3
3
|
#
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
5
5
|
# you may not use this file except in compliance with the License.
|
@@ -14,7 +14,7 @@
|
|
14
14
|
# limitations under the License.
|
15
15
|
# --------------------------------------------------------------------------
|
16
16
|
# More information and citation instructions are available on the
|
17
|
-
# Marigold project website: https://
|
17
|
+
# Marigold project website: https://marigoldcomputervision.github.io
|
18
18
|
# --------------------------------------------------------------------------
|
19
19
|
from dataclasses import dataclass
|
20
20
|
from functools import partial
|
@@ -37,6 +37,7 @@ from ...schedulers import (
|
|
37
37
|
)
|
38
38
|
from ...utils import (
|
39
39
|
BaseOutput,
|
40
|
+
is_torch_xla_available,
|
40
41
|
logging,
|
41
42
|
replace_example_docstring,
|
42
43
|
)
|
@@ -46,6 +47,13 @@ from ..pipeline_utils import DiffusionPipeline
|
|
46
47
|
from .marigold_image_processing import MarigoldImageProcessor
|
47
48
|
|
48
49
|
|
50
|
+
if is_torch_xla_available():
|
51
|
+
import torch_xla.core.xla_model as xm
|
52
|
+
|
53
|
+
XLA_AVAILABLE = True
|
54
|
+
else:
|
55
|
+
XLA_AVAILABLE = False
|
56
|
+
|
49
57
|
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
50
58
|
|
51
59
|
|
@@ -56,7 +64,7 @@ Examples:
|
|
56
64
|
>>> import torch
|
57
65
|
|
58
66
|
>>> pipe = diffusers.MarigoldDepthPipeline.from_pretrained(
|
59
|
-
... "prs-eth/marigold-depth-
|
67
|
+
... "prs-eth/marigold-depth-v1-1", variant="fp16", torch_dtype=torch.float16
|
60
68
|
... ).to("cuda")
|
61
69
|
|
62
70
|
>>> image = diffusers.utils.load_image("https://marigoldmonodepth.github.io/images/einstein.jpg")
|
@@ -78,11 +86,12 @@ class MarigoldDepthOutput(BaseOutput):
|
|
78
86
|
|
79
87
|
Args:
|
80
88
|
prediction (`np.ndarray`, `torch.Tensor`):
|
81
|
-
Predicted depth maps with values in the range [0, 1]. The shape is
|
82
|
-
|
89
|
+
Predicted depth maps with values in the range [0, 1]. The shape is $numimages \times 1 \times height \times
|
90
|
+
width$ for `torch.Tensor` or $numimages \times height \times width \times 1$ for `np.ndarray`.
|
83
91
|
uncertainty (`None`, `np.ndarray`, `torch.Tensor`):
|
84
92
|
Uncertainty maps computed from the ensemble, with values in the range [0, 1]. The shape is $numimages
|
85
|
-
\times 1 \times height \times width
|
93
|
+
\times 1 \times height \times width$ for `torch.Tensor` or $numimages \times height \times width \times 1$
|
94
|
+
for `np.ndarray`.
|
86
95
|
latent (`None`, `torch.Tensor`):
|
87
96
|
Latent features corresponding to the predictions, compatible with the `latents` argument of the pipeline.
|
88
97
|
The shape is $numimages * numensemble \times 4 \times latentheight \times latentwidth$.
|
@@ -174,7 +183,7 @@ class MarigoldDepthPipeline(DiffusionPipeline):
|
|
174
183
|
default_processing_resolution=default_processing_resolution,
|
175
184
|
)
|
176
185
|
|
177
|
-
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
186
|
+
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
|
178
187
|
|
179
188
|
self.scale_invariant = scale_invariant
|
180
189
|
self.shift_invariant = shift_invariant
|
@@ -200,6 +209,11 @@ class MarigoldDepthPipeline(DiffusionPipeline):
|
|
200
209
|
output_type: str,
|
201
210
|
output_uncertainty: bool,
|
202
211
|
) -> int:
|
212
|
+
actual_vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
213
|
+
if actual_vae_scale_factor != self.vae_scale_factor:
|
214
|
+
raise ValueError(
|
215
|
+
f"`vae_scale_factor` computed at initialization ({self.vae_scale_factor}) differs from the actual one ({actual_vae_scale_factor})."
|
216
|
+
)
|
203
217
|
if num_inference_steps is None:
|
204
218
|
raise ValueError("`num_inference_steps` is not specified and could not be resolved from the model config.")
|
205
219
|
if num_inference_steps < 1:
|
@@ -312,6 +326,7 @@ class MarigoldDepthPipeline(DiffusionPipeline):
|
|
312
326
|
|
313
327
|
return num_images
|
314
328
|
|
329
|
+
@torch.compiler.disable
|
315
330
|
def progress_bar(self, iterable=None, total=None, desc=None, leave=True):
|
316
331
|
if not hasattr(self, "_progress_bar_config"):
|
317
332
|
self._progress_bar_config = {}
|
@@ -362,11 +377,9 @@ class MarigoldDepthPipeline(DiffusionPipeline):
|
|
362
377
|
same width and height.
|
363
378
|
num_inference_steps (`int`, *optional*, defaults to `None`):
|
364
379
|
Number of denoising diffusion steps during inference. The default value `None` results in automatic
|
365
|
-
selection.
|
366
|
-
for Marigold-LCM models.
|
380
|
+
selection.
|
367
381
|
ensemble_size (`int`, defaults to `1`):
|
368
|
-
Number of ensemble predictions.
|
369
|
-
faster inference.
|
382
|
+
Number of ensemble predictions. Higher values result in measurable improvements and visual degradation.
|
370
383
|
processing_resolution (`int`, *optional*, defaults to `None`):
|
371
384
|
Effective processing resolution. When set to `0`, matches the larger input image dimension. This
|
372
385
|
produces crisper predictions, but may also lead to the overall loss of global context. The default
|
@@ -478,9 +491,7 @@ class MarigoldDepthPipeline(DiffusionPipeline):
|
|
478
491
|
# `pred_latent` variable. The variable `image_latent` is of the same shape: it contains each input image encoded
|
479
492
|
# into latent space and replicated `E` times. The latents can be either generated (see `generator` to ensure
|
480
493
|
# reproducibility), or passed explicitly via the `latents` argument. The latter can be set outside the pipeline
|
481
|
-
# code.
|
482
|
-
# as a convex combination of the latents output of the pipeline for the previous frame and a newly-sampled
|
483
|
-
# noise. This behavior can be achieved by setting the `output_latent` argument to `True`. The latent space
|
494
|
+
# code. This behavior can be achieved by setting the `output_latent` argument to `True`. The latent space
|
484
495
|
# dimensions are `(h, w)`. Encoding into latent space happens in batches of size `batch_size`.
|
485
496
|
# Model invocation: self.vae.encoder.
|
486
497
|
image_latent, pred_latent = self.prepare_latents(
|
@@ -517,6 +528,9 @@ class MarigoldDepthPipeline(DiffusionPipeline):
|
|
517
528
|
noise, t, batch_pred_latent, generator=generator
|
518
529
|
).prev_sample # [B,4,h,w]
|
519
530
|
|
531
|
+
if XLA_AVAILABLE:
|
532
|
+
xm.mark_step()
|
533
|
+
|
520
534
|
pred_latents.append(batch_pred_latent)
|
521
535
|
|
522
536
|
pred_latent = torch.cat(pred_latents, dim=0) # [N*E,4,h,w]
|
@@ -722,6 +736,7 @@ class MarigoldDepthPipeline(DiffusionPipeline):
|
|
722
736
|
param = init_s.cpu().numpy()
|
723
737
|
else:
|
724
738
|
raise ValueError("Unrecognized alignment.")
|
739
|
+
param = param.astype(np.float64)
|
725
740
|
|
726
741
|
return param
|
727
742
|
|
@@ -764,7 +779,7 @@ class MarigoldDepthPipeline(DiffusionPipeline):
|
|
764
779
|
|
765
780
|
if regularizer_strength > 0:
|
766
781
|
prediction, _ = ensemble(depth_aligned, return_uncertainty=False)
|
767
|
-
err_near =
|
782
|
+
err_near = prediction.min().abs().item()
|
768
783
|
err_far = (1.0 - prediction.max()).abs().item()
|
769
784
|
cost += (err_near + err_far) * regularizer_strength
|
770
785
|
|