diffusers 0.32.2__py3-none-any.whl → 0.33.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diffusers/__init__.py +186 -3
- diffusers/configuration_utils.py +40 -12
- diffusers/dependency_versions_table.py +9 -2
- diffusers/hooks/__init__.py +9 -0
- diffusers/hooks/faster_cache.py +653 -0
- diffusers/hooks/group_offloading.py +793 -0
- diffusers/hooks/hooks.py +236 -0
- diffusers/hooks/layerwise_casting.py +245 -0
- diffusers/hooks/pyramid_attention_broadcast.py +311 -0
- diffusers/loaders/__init__.py +6 -0
- diffusers/loaders/ip_adapter.py +38 -30
- diffusers/loaders/lora_base.py +121 -86
- diffusers/loaders/lora_conversion_utils.py +504 -44
- diffusers/loaders/lora_pipeline.py +1769 -181
- diffusers/loaders/peft.py +167 -57
- diffusers/loaders/single_file.py +17 -2
- diffusers/loaders/single_file_model.py +53 -5
- diffusers/loaders/single_file_utils.py +646 -72
- diffusers/loaders/textual_inversion.py +9 -9
- diffusers/loaders/transformer_flux.py +8 -9
- diffusers/loaders/transformer_sd3.py +120 -39
- diffusers/loaders/unet.py +20 -7
- diffusers/models/__init__.py +22 -0
- diffusers/models/activations.py +9 -9
- diffusers/models/attention.py +0 -1
- diffusers/models/attention_processor.py +163 -25
- diffusers/models/auto_model.py +169 -0
- diffusers/models/autoencoders/__init__.py +2 -0
- diffusers/models/autoencoders/autoencoder_asym_kl.py +2 -0
- diffusers/models/autoencoders/autoencoder_dc.py +106 -4
- diffusers/models/autoencoders/autoencoder_kl.py +0 -4
- diffusers/models/autoencoders/autoencoder_kl_allegro.py +5 -23
- diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +17 -55
- diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py +17 -97
- diffusers/models/autoencoders/autoencoder_kl_ltx.py +326 -107
- diffusers/models/autoencoders/autoencoder_kl_magvit.py +1094 -0
- diffusers/models/autoencoders/autoencoder_kl_mochi.py +21 -56
- diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +11 -42
- diffusers/models/autoencoders/autoencoder_kl_wan.py +855 -0
- diffusers/models/autoencoders/autoencoder_oobleck.py +1 -0
- diffusers/models/autoencoders/autoencoder_tiny.py +0 -4
- diffusers/models/autoencoders/consistency_decoder_vae.py +3 -1
- diffusers/models/autoencoders/vae.py +31 -141
- diffusers/models/autoencoders/vq_model.py +3 -0
- diffusers/models/cache_utils.py +108 -0
- diffusers/models/controlnets/__init__.py +1 -0
- diffusers/models/controlnets/controlnet.py +3 -8
- diffusers/models/controlnets/controlnet_flux.py +14 -42
- diffusers/models/controlnets/controlnet_sd3.py +58 -34
- diffusers/models/controlnets/controlnet_sparsectrl.py +4 -7
- diffusers/models/controlnets/controlnet_union.py +27 -18
- diffusers/models/controlnets/controlnet_xs.py +7 -46
- diffusers/models/controlnets/multicontrolnet_union.py +196 -0
- diffusers/models/embeddings.py +18 -7
- diffusers/models/model_loading_utils.py +122 -80
- diffusers/models/modeling_flax_pytorch_utils.py +1 -1
- diffusers/models/modeling_flax_utils.py +1 -1
- diffusers/models/modeling_pytorch_flax_utils.py +1 -1
- diffusers/models/modeling_utils.py +617 -272
- diffusers/models/normalization.py +67 -14
- diffusers/models/resnet.py +1 -1
- diffusers/models/transformers/__init__.py +6 -0
- diffusers/models/transformers/auraflow_transformer_2d.py +9 -35
- diffusers/models/transformers/cogvideox_transformer_3d.py +13 -24
- diffusers/models/transformers/consisid_transformer_3d.py +789 -0
- diffusers/models/transformers/dit_transformer_2d.py +5 -19
- diffusers/models/transformers/hunyuan_transformer_2d.py +4 -3
- diffusers/models/transformers/latte_transformer_3d.py +20 -15
- diffusers/models/transformers/lumina_nextdit2d.py +3 -1
- diffusers/models/transformers/pixart_transformer_2d.py +4 -19
- diffusers/models/transformers/prior_transformer.py +5 -1
- diffusers/models/transformers/sana_transformer.py +144 -40
- diffusers/models/transformers/stable_audio_transformer.py +5 -20
- diffusers/models/transformers/transformer_2d.py +7 -22
- diffusers/models/transformers/transformer_allegro.py +9 -17
- diffusers/models/transformers/transformer_cogview3plus.py +6 -17
- diffusers/models/transformers/transformer_cogview4.py +462 -0
- diffusers/models/transformers/transformer_easyanimate.py +527 -0
- diffusers/models/transformers/transformer_flux.py +68 -110
- diffusers/models/transformers/transformer_hunyuan_video.py +404 -46
- diffusers/models/transformers/transformer_ltx.py +53 -35
- diffusers/models/transformers/transformer_lumina2.py +548 -0
- diffusers/models/transformers/transformer_mochi.py +6 -17
- diffusers/models/transformers/transformer_omnigen.py +469 -0
- diffusers/models/transformers/transformer_sd3.py +56 -86
- diffusers/models/transformers/transformer_temporal.py +5 -11
- diffusers/models/transformers/transformer_wan.py +469 -0
- diffusers/models/unets/unet_1d.py +3 -1
- diffusers/models/unets/unet_2d.py +21 -20
- diffusers/models/unets/unet_2d_blocks.py +19 -243
- diffusers/models/unets/unet_2d_condition.py +4 -6
- diffusers/models/unets/unet_3d_blocks.py +14 -127
- diffusers/models/unets/unet_3d_condition.py +8 -12
- diffusers/models/unets/unet_i2vgen_xl.py +5 -13
- diffusers/models/unets/unet_kandinsky3.py +0 -4
- diffusers/models/unets/unet_motion_model.py +20 -114
- diffusers/models/unets/unet_spatio_temporal_condition.py +7 -8
- diffusers/models/unets/unet_stable_cascade.py +8 -35
- diffusers/models/unets/uvit_2d.py +1 -4
- diffusers/optimization.py +2 -2
- diffusers/pipelines/__init__.py +57 -8
- diffusers/pipelines/allegro/pipeline_allegro.py +22 -2
- diffusers/pipelines/amused/pipeline_amused.py +15 -2
- diffusers/pipelines/amused/pipeline_amused_img2img.py +15 -2
- diffusers/pipelines/amused/pipeline_amused_inpaint.py +15 -2
- diffusers/pipelines/animatediff/pipeline_animatediff.py +15 -2
- diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +15 -3
- diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +24 -4
- diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +15 -2
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +16 -4
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +16 -4
- diffusers/pipelines/audioldm/pipeline_audioldm.py +13 -2
- diffusers/pipelines/audioldm2/modeling_audioldm2.py +13 -68
- diffusers/pipelines/audioldm2/pipeline_audioldm2.py +39 -9
- diffusers/pipelines/aura_flow/pipeline_aura_flow.py +63 -7
- diffusers/pipelines/auto_pipeline.py +35 -14
- diffusers/pipelines/blip_diffusion/blip_image_processing.py +1 -1
- diffusers/pipelines/blip_diffusion/modeling_blip2.py +5 -8
- diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +12 -0
- diffusers/pipelines/cogvideo/pipeline_cogvideox.py +22 -6
- diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +22 -6
- diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +22 -5
- diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +22 -6
- diffusers/pipelines/cogview3/pipeline_cogview3plus.py +12 -4
- diffusers/pipelines/cogview4/__init__.py +49 -0
- diffusers/pipelines/cogview4/pipeline_cogview4.py +684 -0
- diffusers/pipelines/cogview4/pipeline_cogview4_control.py +732 -0
- diffusers/pipelines/cogview4/pipeline_output.py +21 -0
- diffusers/pipelines/consisid/__init__.py +49 -0
- diffusers/pipelines/consisid/consisid_utils.py +357 -0
- diffusers/pipelines/consisid/pipeline_consisid.py +974 -0
- diffusers/pipelines/consisid/pipeline_output.py +20 -0
- diffusers/pipelines/consistency_models/pipeline_consistency_models.py +11 -0
- diffusers/pipelines/controlnet/pipeline_controlnet.py +6 -5
- diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +13 -0
- diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +17 -5
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +31 -12
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +26 -7
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +20 -3
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +22 -3
- diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +26 -25
- diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +224 -109
- diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +25 -29
- diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +7 -4
- diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +3 -5
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +121 -10
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +122 -11
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +12 -1
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +20 -3
- diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py +14 -2
- diffusers/pipelines/ddim/pipeline_ddim.py +14 -1
- diffusers/pipelines/ddpm/pipeline_ddpm.py +15 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if.py +12 -0
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +12 -0
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +14 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +12 -0
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +14 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +14 -1
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +11 -7
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +11 -7
- diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +1 -1
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +10 -6
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_onnx_stable_diffusion_inpaint_legacy.py +2 -2
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +11 -7
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +1 -1
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +1 -1
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +1 -1
- diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +10 -105
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +1 -1
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +1 -1
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +1 -1
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +1 -1
- diffusers/pipelines/dit/pipeline_dit.py +15 -2
- diffusers/pipelines/easyanimate/__init__.py +52 -0
- diffusers/pipelines/easyanimate/pipeline_easyanimate.py +770 -0
- diffusers/pipelines/easyanimate/pipeline_easyanimate_control.py +994 -0
- diffusers/pipelines/easyanimate/pipeline_easyanimate_inpaint.py +1234 -0
- diffusers/pipelines/easyanimate/pipeline_output.py +20 -0
- diffusers/pipelines/flux/pipeline_flux.py +53 -21
- diffusers/pipelines/flux/pipeline_flux_control.py +9 -12
- diffusers/pipelines/flux/pipeline_flux_control_img2img.py +6 -10
- diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +8 -10
- diffusers/pipelines/flux/pipeline_flux_controlnet.py +185 -13
- diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +8 -10
- diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +16 -16
- diffusers/pipelines/flux/pipeline_flux_fill.py +107 -39
- diffusers/pipelines/flux/pipeline_flux_img2img.py +193 -15
- diffusers/pipelines/flux/pipeline_flux_inpaint.py +199 -19
- diffusers/pipelines/free_noise_utils.py +3 -3
- diffusers/pipelines/hunyuan_video/__init__.py +4 -0
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_skyreels_image2video.py +804 -0
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +90 -23
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_image2video.py +924 -0
- diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +3 -5
- diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +13 -1
- diffusers/pipelines/kandinsky/pipeline_kandinsky.py +12 -0
- diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +1 -1
- diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +12 -0
- diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +13 -1
- diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +12 -0
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +12 -1
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +13 -0
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +12 -0
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +12 -1
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +12 -1
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +12 -0
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +12 -0
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +12 -0
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +12 -0
- diffusers/pipelines/kolors/pipeline_kolors.py +10 -8
- diffusers/pipelines/kolors/pipeline_kolors_img2img.py +6 -4
- diffusers/pipelines/kolors/text_encoder.py +7 -34
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +12 -1
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +13 -1
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +14 -13
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion_superresolution.py +12 -1
- diffusers/pipelines/latte/pipeline_latte.py +36 -7
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +67 -13
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +60 -15
- diffusers/pipelines/ltx/__init__.py +2 -0
- diffusers/pipelines/ltx/pipeline_ltx.py +25 -13
- diffusers/pipelines/ltx/pipeline_ltx_condition.py +1194 -0
- diffusers/pipelines/ltx/pipeline_ltx_image2video.py +31 -17
- diffusers/pipelines/lumina/__init__.py +2 -2
- diffusers/pipelines/lumina/pipeline_lumina.py +83 -20
- diffusers/pipelines/lumina2/__init__.py +48 -0
- diffusers/pipelines/lumina2/pipeline_lumina2.py +790 -0
- diffusers/pipelines/marigold/__init__.py +2 -0
- diffusers/pipelines/marigold/marigold_image_processing.py +127 -14
- diffusers/pipelines/marigold/pipeline_marigold_depth.py +31 -16
- diffusers/pipelines/marigold/pipeline_marigold_intrinsics.py +721 -0
- diffusers/pipelines/marigold/pipeline_marigold_normals.py +31 -16
- diffusers/pipelines/mochi/pipeline_mochi.py +14 -18
- diffusers/pipelines/musicldm/pipeline_musicldm.py +16 -1
- diffusers/pipelines/omnigen/__init__.py +50 -0
- diffusers/pipelines/omnigen/pipeline_omnigen.py +512 -0
- diffusers/pipelines/omnigen/processor_omnigen.py +327 -0
- diffusers/pipelines/onnx_utils.py +5 -3
- diffusers/pipelines/pag/pag_utils.py +1 -1
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +12 -1
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +15 -4
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +20 -3
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +20 -3
- diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +1 -3
- diffusers/pipelines/pag/pipeline_pag_kolors.py +6 -4
- diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +16 -3
- diffusers/pipelines/pag/pipeline_pag_sana.py +65 -8
- diffusers/pipelines/pag/pipeline_pag_sd.py +23 -7
- diffusers/pipelines/pag/pipeline_pag_sd_3.py +3 -5
- diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py +3 -5
- diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +13 -1
- diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +23 -7
- diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py +26 -10
- diffusers/pipelines/pag/pipeline_pag_sd_xl.py +12 -4
- diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +7 -3
- diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +10 -6
- diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +13 -3
- diffusers/pipelines/pia/pipeline_pia.py +13 -1
- diffusers/pipelines/pipeline_flax_utils.py +7 -7
- diffusers/pipelines/pipeline_loading_utils.py +193 -83
- diffusers/pipelines/pipeline_utils.py +221 -106
- diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +17 -5
- diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +17 -4
- diffusers/pipelines/sana/__init__.py +2 -0
- diffusers/pipelines/sana/pipeline_sana.py +183 -58
- diffusers/pipelines/sana/pipeline_sana_sprint.py +889 -0
- diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +12 -2
- diffusers/pipelines/shap_e/pipeline_shap_e.py +12 -0
- diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +12 -0
- diffusers/pipelines/shap_e/renderer.py +6 -6
- diffusers/pipelines/stable_audio/pipeline_stable_audio.py +1 -1
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +15 -4
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +12 -8
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +12 -1
- diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +3 -2
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +14 -10
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_img2img.py +3 -3
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_inpaint.py +14 -10
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +2 -2
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py +4 -3
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +5 -4
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +2 -2
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +18 -13
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +30 -8
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +24 -10
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +28 -12
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +39 -18
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +17 -6
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +13 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +20 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +14 -2
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +13 -1
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +16 -17
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +136 -18
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +150 -21
- diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +15 -3
- diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +26 -11
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +15 -3
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +22 -4
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +30 -13
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +12 -4
- diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +15 -3
- diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +15 -3
- diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +26 -12
- diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +16 -4
- diffusers/pipelines/stable_diffusion_xl/pipeline_flax_stable_diffusion_xl.py +1 -1
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +12 -4
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +7 -3
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +10 -6
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +11 -4
- diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +13 -2
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +18 -4
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +26 -5
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +13 -1
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +13 -1
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +28 -6
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +26 -4
- diffusers/pipelines/transformers_loading_utils.py +121 -0
- diffusers/pipelines/unclip/pipeline_unclip.py +11 -1
- diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +11 -1
- diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +19 -2
- diffusers/pipelines/wan/__init__.py +51 -0
- diffusers/pipelines/wan/pipeline_output.py +20 -0
- diffusers/pipelines/wan/pipeline_wan.py +595 -0
- diffusers/pipelines/wan/pipeline_wan_i2v.py +724 -0
- diffusers/pipelines/wan/pipeline_wan_video2video.py +727 -0
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +7 -31
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +12 -1
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +12 -1
- diffusers/quantizers/auto.py +5 -1
- diffusers/quantizers/base.py +5 -9
- diffusers/quantizers/bitsandbytes/bnb_quantizer.py +41 -29
- diffusers/quantizers/bitsandbytes/utils.py +30 -20
- diffusers/quantizers/gguf/gguf_quantizer.py +1 -0
- diffusers/quantizers/gguf/utils.py +4 -2
- diffusers/quantizers/quantization_config.py +59 -4
- diffusers/quantizers/quanto/__init__.py +1 -0
- diffusers/quantizers/quanto/quanto_quantizer.py +177 -0
- diffusers/quantizers/quanto/utils.py +60 -0
- diffusers/quantizers/torchao/__init__.py +1 -1
- diffusers/quantizers/torchao/torchao_quantizer.py +47 -2
- diffusers/schedulers/__init__.py +2 -1
- diffusers/schedulers/scheduling_consistency_models.py +1 -2
- diffusers/schedulers/scheduling_ddim_inverse.py +1 -1
- diffusers/schedulers/scheduling_ddpm.py +2 -3
- diffusers/schedulers/scheduling_ddpm_parallel.py +1 -2
- diffusers/schedulers/scheduling_dpmsolver_multistep.py +12 -4
- diffusers/schedulers/scheduling_edm_euler.py +45 -10
- diffusers/schedulers/scheduling_flow_match_euler_discrete.py +116 -28
- diffusers/schedulers/scheduling_flow_match_heun_discrete.py +7 -6
- diffusers/schedulers/scheduling_heun_discrete.py +1 -1
- diffusers/schedulers/scheduling_lcm.py +1 -2
- diffusers/schedulers/scheduling_lms_discrete.py +1 -1
- diffusers/schedulers/scheduling_repaint.py +5 -1
- diffusers/schedulers/scheduling_scm.py +265 -0
- diffusers/schedulers/scheduling_tcd.py +1 -2
- diffusers/schedulers/scheduling_utils.py +2 -1
- diffusers/training_utils.py +14 -7
- diffusers/utils/__init__.py +9 -1
- diffusers/utils/constants.py +13 -1
- diffusers/utils/deprecation_utils.py +1 -1
- diffusers/utils/dummy_bitsandbytes_objects.py +17 -0
- diffusers/utils/dummy_gguf_objects.py +17 -0
- diffusers/utils/dummy_optimum_quanto_objects.py +17 -0
- diffusers/utils/dummy_pt_objects.py +233 -0
- diffusers/utils/dummy_torch_and_transformers_and_opencv_objects.py +17 -0
- diffusers/utils/dummy_torch_and_transformers_objects.py +270 -0
- diffusers/utils/dummy_torchao_objects.py +17 -0
- diffusers/utils/dynamic_modules_utils.py +1 -1
- diffusers/utils/export_utils.py +28 -3
- diffusers/utils/hub_utils.py +52 -102
- diffusers/utils/import_utils.py +121 -221
- diffusers/utils/loading_utils.py +2 -1
- diffusers/utils/logging.py +1 -2
- diffusers/utils/peft_utils.py +6 -14
- diffusers/utils/remote_utils.py +425 -0
- diffusers/utils/source_code_parsing_utils.py +52 -0
- diffusers/utils/state_dict_utils.py +15 -1
- diffusers/utils/testing_utils.py +243 -13
- diffusers/utils/torch_utils.py +10 -0
- diffusers/utils/typing_utils.py +91 -0
- diffusers/video_processor.py +1 -1
- {diffusers-0.32.2.dist-info → diffusers-0.33.1.dist-info}/METADATA +21 -4
- diffusers-0.33.1.dist-info/RECORD +608 -0
- {diffusers-0.32.2.dist-info → diffusers-0.33.1.dist-info}/WHEEL +1 -1
- diffusers-0.32.2.dist-info/RECORD +0 -550
- {diffusers-0.32.2.dist-info → diffusers-0.33.1.dist-info}/LICENSE +0 -0
- {diffusers-0.32.2.dist-info → diffusers-0.33.1.dist-info}/entry_points.txt +0 -0
- {diffusers-0.32.2.dist-info → diffusers-0.33.1.dist-info}/top_level.txt +0 -0
@@ -18,10 +18,17 @@ from typing import Any, Callable, Dict, List, Optional, Union
|
|
18
18
|
import numpy as np
|
19
19
|
import PIL.Image
|
20
20
|
import torch
|
21
|
-
from transformers import
|
21
|
+
from transformers import (
|
22
|
+
CLIPImageProcessor,
|
23
|
+
CLIPTextModel,
|
24
|
+
CLIPTokenizer,
|
25
|
+
CLIPVisionModelWithProjection,
|
26
|
+
T5EncoderModel,
|
27
|
+
T5TokenizerFast,
|
28
|
+
)
|
22
29
|
|
23
30
|
from ...image_processor import PipelineImageInput, VaeImageProcessor
|
24
|
-
from ...loaders import FluxLoraLoaderMixin, TextualInversionLoaderMixin
|
31
|
+
from ...loaders import FluxIPAdapterMixin, FluxLoraLoaderMixin, TextualInversionLoaderMixin
|
25
32
|
from ...models.autoencoders import AutoencoderKL
|
26
33
|
from ...models.transformers import FluxTransformer2DModel
|
27
34
|
from ...schedulers import FlowMatchEulerDiscreteScheduler
|
@@ -74,7 +81,7 @@ def calculate_shift(
|
|
74
81
|
base_seq_len: int = 256,
|
75
82
|
max_seq_len: int = 4096,
|
76
83
|
base_shift: float = 0.5,
|
77
|
-
max_shift: float = 1.
|
84
|
+
max_shift: float = 1.15,
|
78
85
|
):
|
79
86
|
m = (max_shift - base_shift) / (max_seq_len - base_seq_len)
|
80
87
|
b = base_shift - m * base_seq_len
|
@@ -156,7 +163,7 @@ def retrieve_timesteps(
|
|
156
163
|
return timesteps, num_inference_steps
|
157
164
|
|
158
165
|
|
159
|
-
class FluxInpaintPipeline(DiffusionPipeline, FluxLoraLoaderMixin):
|
166
|
+
class FluxInpaintPipeline(DiffusionPipeline, FluxLoraLoaderMixin, FluxIPAdapterMixin):
|
160
167
|
r"""
|
161
168
|
The Flux pipeline for image inpainting.
|
162
169
|
|
@@ -183,8 +190,8 @@ class FluxInpaintPipeline(DiffusionPipeline, FluxLoraLoaderMixin):
|
|
183
190
|
[T5TokenizerFast](https://huggingface.co/docs/transformers/en/model_doc/t5#transformers.T5TokenizerFast).
|
184
191
|
"""
|
185
192
|
|
186
|
-
model_cpu_offload_seq = "text_encoder->text_encoder_2->transformer->vae"
|
187
|
-
_optional_components = []
|
193
|
+
model_cpu_offload_seq = "text_encoder->text_encoder_2->image_encoder->transformer->vae"
|
194
|
+
_optional_components = ["image_encoder", "feature_extractor"]
|
188
195
|
_callback_tensor_inputs = ["latents", "prompt_embeds"]
|
189
196
|
|
190
197
|
def __init__(
|
@@ -196,6 +203,8 @@ class FluxInpaintPipeline(DiffusionPipeline, FluxLoraLoaderMixin):
|
|
196
203
|
text_encoder_2: T5EncoderModel,
|
197
204
|
tokenizer_2: T5TokenizerFast,
|
198
205
|
transformer: FluxTransformer2DModel,
|
206
|
+
image_encoder: CLIPVisionModelWithProjection = None,
|
207
|
+
feature_extractor: CLIPImageProcessor = None,
|
199
208
|
):
|
200
209
|
super().__init__()
|
201
210
|
|
@@ -207,16 +216,19 @@ class FluxInpaintPipeline(DiffusionPipeline, FluxLoraLoaderMixin):
|
|
207
216
|
tokenizer_2=tokenizer_2,
|
208
217
|
transformer=transformer,
|
209
218
|
scheduler=scheduler,
|
219
|
+
image_encoder=image_encoder,
|
220
|
+
feature_extractor=feature_extractor,
|
210
221
|
)
|
211
|
-
self.vae_scale_factor = (
|
212
|
-
2 ** (len(self.vae.config.block_out_channels) - 1) if hasattr(self, "vae") and self.vae is not None else 8
|
213
|
-
)
|
222
|
+
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
|
214
223
|
# Flux latents are turned into 2x2 patches and packed. This means the latent width and height has to be divisible
|
215
224
|
# by the patch size. So the vae scale factor is multiplied by the patch size to account for this
|
216
|
-
self.
|
225
|
+
self.latent_channels = self.vae.config.latent_channels if getattr(self, "vae", None) else 16
|
226
|
+
self.image_processor = VaeImageProcessor(
|
227
|
+
vae_scale_factor=self.vae_scale_factor * 2, vae_latent_channels=self.latent_channels
|
228
|
+
)
|
217
229
|
self.mask_processor = VaeImageProcessor(
|
218
230
|
vae_scale_factor=self.vae_scale_factor * 2,
|
219
|
-
vae_latent_channels=self.
|
231
|
+
vae_latent_channels=self.latent_channels,
|
220
232
|
do_normalize=False,
|
221
233
|
do_binarize=True,
|
222
234
|
do_convert_grayscale=True,
|
@@ -401,6 +413,55 @@ class FluxInpaintPipeline(DiffusionPipeline, FluxLoraLoaderMixin):
|
|
401
413
|
|
402
414
|
return prompt_embeds, pooled_prompt_embeds, text_ids
|
403
415
|
|
416
|
+
# Copied from diffusers.pipelines.flux.pipeline_flux.FluxPipeline.encode_image
|
417
|
+
def encode_image(self, image, device, num_images_per_prompt):
|
418
|
+
dtype = next(self.image_encoder.parameters()).dtype
|
419
|
+
|
420
|
+
if not isinstance(image, torch.Tensor):
|
421
|
+
image = self.feature_extractor(image, return_tensors="pt").pixel_values
|
422
|
+
|
423
|
+
image = image.to(device=device, dtype=dtype)
|
424
|
+
image_embeds = self.image_encoder(image).image_embeds
|
425
|
+
image_embeds = image_embeds.repeat_interleave(num_images_per_prompt, dim=0)
|
426
|
+
return image_embeds
|
427
|
+
|
428
|
+
# Copied from diffusers.pipelines.flux.pipeline_flux.FluxPipeline.prepare_ip_adapter_image_embeds
|
429
|
+
def prepare_ip_adapter_image_embeds(
|
430
|
+
self, ip_adapter_image, ip_adapter_image_embeds, device, num_images_per_prompt
|
431
|
+
):
|
432
|
+
image_embeds = []
|
433
|
+
if ip_adapter_image_embeds is None:
|
434
|
+
if not isinstance(ip_adapter_image, list):
|
435
|
+
ip_adapter_image = [ip_adapter_image]
|
436
|
+
|
437
|
+
if len(ip_adapter_image) != self.transformer.encoder_hid_proj.num_ip_adapters:
|
438
|
+
raise ValueError(
|
439
|
+
f"`ip_adapter_image` must have same length as the number of IP Adapters. Got {len(ip_adapter_image)} images and {self.transformer.encoder_hid_proj.num_ip_adapters} IP Adapters."
|
440
|
+
)
|
441
|
+
|
442
|
+
for single_ip_adapter_image in ip_adapter_image:
|
443
|
+
single_image_embeds = self.encode_image(single_ip_adapter_image, device, 1)
|
444
|
+
image_embeds.append(single_image_embeds[None, :])
|
445
|
+
else:
|
446
|
+
if not isinstance(ip_adapter_image_embeds, list):
|
447
|
+
ip_adapter_image_embeds = [ip_adapter_image_embeds]
|
448
|
+
|
449
|
+
if len(ip_adapter_image_embeds) != self.transformer.encoder_hid_proj.num_ip_adapters:
|
450
|
+
raise ValueError(
|
451
|
+
f"`ip_adapter_image_embeds` must have same length as the number of IP Adapters. Got {len(ip_adapter_image_embeds)} image embeds and {self.transformer.encoder_hid_proj.num_ip_adapters} IP Adapters."
|
452
|
+
)
|
453
|
+
|
454
|
+
for single_image_embeds in ip_adapter_image_embeds:
|
455
|
+
image_embeds.append(single_image_embeds)
|
456
|
+
|
457
|
+
ip_adapter_image_embeds = []
|
458
|
+
for single_image_embeds in image_embeds:
|
459
|
+
single_image_embeds = torch.cat([single_image_embeds] * num_images_per_prompt, dim=0)
|
460
|
+
single_image_embeds = single_image_embeds.to(device=device)
|
461
|
+
ip_adapter_image_embeds.append(single_image_embeds)
|
462
|
+
|
463
|
+
return ip_adapter_image_embeds
|
464
|
+
|
404
465
|
# Copied from diffusers.pipelines.stable_diffusion_3.pipeline_stable_diffusion_3_inpaint.StableDiffusion3InpaintPipeline._encode_vae_image
|
405
466
|
def _encode_vae_image(self, image: torch.Tensor, generator: torch.Generator):
|
406
467
|
if isinstance(generator, list):
|
@@ -438,8 +499,12 @@ class FluxInpaintPipeline(DiffusionPipeline, FluxLoraLoaderMixin):
|
|
438
499
|
height,
|
439
500
|
width,
|
440
501
|
output_type,
|
502
|
+
negative_prompt=None,
|
503
|
+
negative_prompt_2=None,
|
441
504
|
prompt_embeds=None,
|
505
|
+
negative_prompt_embeds=None,
|
442
506
|
pooled_prompt_embeds=None,
|
507
|
+
negative_pooled_prompt_embeds=None,
|
443
508
|
callback_on_step_end_tensor_inputs=None,
|
444
509
|
padding_mask_crop=None,
|
445
510
|
max_sequence_length=None,
|
@@ -478,15 +543,38 @@ class FluxInpaintPipeline(DiffusionPipeline, FluxLoraLoaderMixin):
|
|
478
543
|
elif prompt_2 is not None and (not isinstance(prompt_2, str) and not isinstance(prompt_2, list)):
|
479
544
|
raise ValueError(f"`prompt_2` has to be of type `str` or `list` but is {type(prompt_2)}")
|
480
545
|
|
546
|
+
if negative_prompt is not None and negative_prompt_embeds is not None:
|
547
|
+
raise ValueError(
|
548
|
+
f"Cannot forward both `negative_prompt`: {negative_prompt} and `negative_prompt_embeds`:"
|
549
|
+
f" {negative_prompt_embeds}. Please make sure to only forward one of the two."
|
550
|
+
)
|
551
|
+
elif negative_prompt_2 is not None and negative_prompt_embeds is not None:
|
552
|
+
raise ValueError(
|
553
|
+
f"Cannot forward both `negative_prompt_2`: {negative_prompt_2} and `negative_prompt_embeds`:"
|
554
|
+
f" {negative_prompt_embeds}. Please make sure to only forward one of the two."
|
555
|
+
)
|
556
|
+
|
557
|
+
if prompt_embeds is not None and negative_prompt_embeds is not None:
|
558
|
+
if prompt_embeds.shape != negative_prompt_embeds.shape:
|
559
|
+
raise ValueError(
|
560
|
+
"`prompt_embeds` and `negative_prompt_embeds` must have the same shape when passed directly, but"
|
561
|
+
f" got: `prompt_embeds` {prompt_embeds.shape} != `negative_prompt_embeds`"
|
562
|
+
f" {negative_prompt_embeds.shape}."
|
563
|
+
)
|
564
|
+
|
481
565
|
if prompt_embeds is not None and pooled_prompt_embeds is None:
|
482
566
|
raise ValueError(
|
483
567
|
"If `prompt_embeds` are provided, `pooled_prompt_embeds` also have to be passed. Make sure to generate `pooled_prompt_embeds` from the same text encoder that was used to generate `prompt_embeds`."
|
484
568
|
)
|
569
|
+
if negative_prompt_embeds is not None and negative_pooled_prompt_embeds is None:
|
570
|
+
raise ValueError(
|
571
|
+
"If `negative_prompt_embeds` are provided, `negative_pooled_prompt_embeds` also have to be passed. Make sure to generate `negative_pooled_prompt_embeds` from the same text encoder that was used to generate `negative_prompt_embeds`."
|
572
|
+
)
|
485
573
|
|
486
574
|
if padding_mask_crop is not None:
|
487
575
|
if not isinstance(image, PIL.Image.Image):
|
488
576
|
raise ValueError(
|
489
|
-
f"The image should be a PIL image when inpainting mask crop, but is of type
|
577
|
+
f"The image should be a PIL image when inpainting mask crop, but is of type {type(image)}."
|
490
578
|
)
|
491
579
|
if not isinstance(mask_image, PIL.Image.Image):
|
492
580
|
raise ValueError(
|
@@ -494,7 +582,7 @@ class FluxInpaintPipeline(DiffusionPipeline, FluxLoraLoaderMixin):
|
|
494
582
|
f" {type(mask_image)}."
|
495
583
|
)
|
496
584
|
if output_type != "pil":
|
497
|
-
raise ValueError(f"The output type should be PIL when inpainting mask crop, but is
|
585
|
+
raise ValueError(f"The output type should be PIL when inpainting mask crop, but is {output_type}.")
|
498
586
|
|
499
587
|
if max_sequence_length is not None and max_sequence_length > 512:
|
500
588
|
raise ValueError(f"`max_sequence_length` cannot be greater than 512 but is {max_sequence_length}")
|
@@ -567,7 +655,10 @@ class FluxInpaintPipeline(DiffusionPipeline, FluxLoraLoaderMixin):
|
|
567
655
|
latent_image_ids = self._prepare_latent_image_ids(batch_size, height // 2, width // 2, device, dtype)
|
568
656
|
|
569
657
|
image = image.to(device=device, dtype=dtype)
|
570
|
-
|
658
|
+
if image.shape[1] != self.latent_channels:
|
659
|
+
image_latents = self._encode_vae_image(image=image, generator=generator)
|
660
|
+
else:
|
661
|
+
image_latents = image
|
571
662
|
|
572
663
|
if batch_size > image_latents.shape[0] and batch_size % image_latents.shape[0] == 0:
|
573
664
|
# expand init_latents for batch_size
|
@@ -624,7 +715,9 @@ class FluxInpaintPipeline(DiffusionPipeline, FluxLoraLoaderMixin):
|
|
624
715
|
else:
|
625
716
|
masked_image_latents = retrieve_latents(self.vae.encode(masked_image), generator=generator)
|
626
717
|
|
627
|
-
|
718
|
+
masked_image_latents = (
|
719
|
+
masked_image_latents - self.vae.config.shift_factor
|
720
|
+
) * self.vae.config.scaling_factor
|
628
721
|
|
629
722
|
# duplicate mask and masked_image_latents for each generation per prompt, using mps friendly method
|
630
723
|
if mask.shape[0] < batch_size:
|
@@ -685,6 +778,9 @@ class FluxInpaintPipeline(DiffusionPipeline, FluxLoraLoaderMixin):
|
|
685
778
|
self,
|
686
779
|
prompt: Union[str, List[str]] = None,
|
687
780
|
prompt_2: Optional[Union[str, List[str]]] = None,
|
781
|
+
negative_prompt: Union[str, List[str]] = None,
|
782
|
+
negative_prompt_2: Optional[Union[str, List[str]]] = None,
|
783
|
+
true_cfg_scale: float = 1.0,
|
688
784
|
image: PipelineImageInput = None,
|
689
785
|
mask_image: PipelineImageInput = None,
|
690
786
|
masked_image_latents: PipelineImageInput = None,
|
@@ -700,6 +796,12 @@ class FluxInpaintPipeline(DiffusionPipeline, FluxLoraLoaderMixin):
|
|
700
796
|
latents: Optional[torch.FloatTensor] = None,
|
701
797
|
prompt_embeds: Optional[torch.FloatTensor] = None,
|
702
798
|
pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
|
799
|
+
ip_adapter_image: Optional[PipelineImageInput] = None,
|
800
|
+
ip_adapter_image_embeds: Optional[List[torch.Tensor]] = None,
|
801
|
+
negative_ip_adapter_image: Optional[PipelineImageInput] = None,
|
802
|
+
negative_ip_adapter_image_embeds: Optional[List[torch.Tensor]] = None,
|
803
|
+
negative_prompt_embeds: Optional[torch.FloatTensor] = None,
|
804
|
+
negative_pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
|
703
805
|
output_type: Optional[str] = "pil",
|
704
806
|
return_dict: bool = True,
|
705
807
|
joint_attention_kwargs: Optional[Dict[str, Any]] = None,
|
@@ -778,6 +880,17 @@ class FluxInpaintPipeline(DiffusionPipeline, FluxLoraLoaderMixin):
|
|
778
880
|
pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
|
779
881
|
Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting.
|
780
882
|
If not provided, pooled text embeddings will be generated from `prompt` input argument.
|
883
|
+
ip_adapter_image: (`PipelineImageInput`, *optional*): Optional image input to work with IP Adapters.
|
884
|
+
ip_adapter_image_embeds (`List[torch.Tensor]`, *optional*):
|
885
|
+
Pre-generated image embeddings for IP-Adapter. It should be a list of length same as number of
|
886
|
+
IP-adapters. Each element should be a tensor of shape `(batch_size, num_images, emb_dim)`. If not
|
887
|
+
provided, embeddings are computed from the `ip_adapter_image` input argument.
|
888
|
+
negative_ip_adapter_image:
|
889
|
+
(`PipelineImageInput`, *optional*): Optional image input to work with IP Adapters.
|
890
|
+
negative_ip_adapter_image_embeds (`List[torch.Tensor]`, *optional*):
|
891
|
+
Pre-generated image embeddings for IP-Adapter. It should be a list of length same as number of
|
892
|
+
IP-adapters. Each element should be a tensor of shape `(batch_size, num_images, emb_dim)`. If not
|
893
|
+
provided, embeddings are computed from the `ip_adapter_image` input argument.
|
781
894
|
output_type (`str`, *optional*, defaults to `"pil"`):
|
782
895
|
The output format of the generate image. Choose between
|
783
896
|
[PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`.
|
@@ -819,8 +932,12 @@ class FluxInpaintPipeline(DiffusionPipeline, FluxLoraLoaderMixin):
|
|
819
932
|
height,
|
820
933
|
width,
|
821
934
|
output_type=output_type,
|
935
|
+
negative_prompt=negative_prompt,
|
936
|
+
negative_prompt_2=negative_prompt_2,
|
822
937
|
prompt_embeds=prompt_embeds,
|
938
|
+
negative_prompt_embeds=negative_prompt_embeds,
|
823
939
|
pooled_prompt_embeds=pooled_prompt_embeds,
|
940
|
+
negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
|
824
941
|
callback_on_step_end_tensor_inputs=callback_on_step_end_tensor_inputs,
|
825
942
|
padding_mask_crop=padding_mask_crop,
|
826
943
|
max_sequence_length=max_sequence_length,
|
@@ -857,6 +974,7 @@ class FluxInpaintPipeline(DiffusionPipeline, FluxLoraLoaderMixin):
|
|
857
974
|
lora_scale = (
|
858
975
|
self.joint_attention_kwargs.get("scale", None) if self.joint_attention_kwargs is not None else None
|
859
976
|
)
|
977
|
+
do_true_cfg = true_cfg_scale > 1 and negative_prompt is not None
|
860
978
|
(
|
861
979
|
prompt_embeds,
|
862
980
|
pooled_prompt_embeds,
|
@@ -871,16 +989,31 @@ class FluxInpaintPipeline(DiffusionPipeline, FluxLoraLoaderMixin):
|
|
871
989
|
max_sequence_length=max_sequence_length,
|
872
990
|
lora_scale=lora_scale,
|
873
991
|
)
|
992
|
+
if do_true_cfg:
|
993
|
+
(
|
994
|
+
negative_prompt_embeds,
|
995
|
+
negative_pooled_prompt_embeds,
|
996
|
+
_,
|
997
|
+
) = self.encode_prompt(
|
998
|
+
prompt=negative_prompt,
|
999
|
+
prompt_2=negative_prompt_2,
|
1000
|
+
prompt_embeds=negative_prompt_embeds,
|
1001
|
+
pooled_prompt_embeds=negative_pooled_prompt_embeds,
|
1002
|
+
device=device,
|
1003
|
+
num_images_per_prompt=num_images_per_prompt,
|
1004
|
+
max_sequence_length=max_sequence_length,
|
1005
|
+
lora_scale=lora_scale,
|
1006
|
+
)
|
874
1007
|
|
875
1008
|
# 4.Prepare timesteps
|
876
1009
|
sigmas = np.linspace(1.0, 1 / num_inference_steps, num_inference_steps) if sigmas is None else sigmas
|
877
1010
|
image_seq_len = (int(height) // self.vae_scale_factor // 2) * (int(width) // self.vae_scale_factor // 2)
|
878
1011
|
mu = calculate_shift(
|
879
1012
|
image_seq_len,
|
880
|
-
self.scheduler.config.base_image_seq_len,
|
881
|
-
self.scheduler.config.max_image_seq_len,
|
882
|
-
self.scheduler.config.base_shift,
|
883
|
-
self.scheduler.config.max_shift,
|
1013
|
+
self.scheduler.config.get("base_image_seq_len", 256),
|
1014
|
+
self.scheduler.config.get("max_image_seq_len", 4096),
|
1015
|
+
self.scheduler.config.get("base_shift", 0.5),
|
1016
|
+
self.scheduler.config.get("max_shift", 1.15),
|
884
1017
|
)
|
885
1018
|
timesteps, num_inference_steps = retrieve_timesteps(
|
886
1019
|
self.scheduler,
|
@@ -947,12 +1080,43 @@ class FluxInpaintPipeline(DiffusionPipeline, FluxLoraLoaderMixin):
|
|
947
1080
|
else:
|
948
1081
|
guidance = None
|
949
1082
|
|
1083
|
+
if (ip_adapter_image is not None or ip_adapter_image_embeds is not None) and (
|
1084
|
+
negative_ip_adapter_image is None and negative_ip_adapter_image_embeds is None
|
1085
|
+
):
|
1086
|
+
negative_ip_adapter_image = np.zeros((width, height, 3), dtype=np.uint8)
|
1087
|
+
elif (ip_adapter_image is None and ip_adapter_image_embeds is None) and (
|
1088
|
+
negative_ip_adapter_image is not None or negative_ip_adapter_image_embeds is not None
|
1089
|
+
):
|
1090
|
+
ip_adapter_image = np.zeros((width, height, 3), dtype=np.uint8)
|
1091
|
+
|
1092
|
+
if self.joint_attention_kwargs is None:
|
1093
|
+
self._joint_attention_kwargs = {}
|
1094
|
+
|
1095
|
+
image_embeds = None
|
1096
|
+
negative_image_embeds = None
|
1097
|
+
if ip_adapter_image is not None or ip_adapter_image_embeds is not None:
|
1098
|
+
image_embeds = self.prepare_ip_adapter_image_embeds(
|
1099
|
+
ip_adapter_image,
|
1100
|
+
ip_adapter_image_embeds,
|
1101
|
+
device,
|
1102
|
+
batch_size * num_images_per_prompt,
|
1103
|
+
)
|
1104
|
+
if negative_ip_adapter_image is not None or negative_ip_adapter_image_embeds is not None:
|
1105
|
+
negative_image_embeds = self.prepare_ip_adapter_image_embeds(
|
1106
|
+
negative_ip_adapter_image,
|
1107
|
+
negative_ip_adapter_image_embeds,
|
1108
|
+
device,
|
1109
|
+
batch_size * num_images_per_prompt,
|
1110
|
+
)
|
1111
|
+
|
950
1112
|
# 6. Denoising loop
|
951
1113
|
with self.progress_bar(total=num_inference_steps) as progress_bar:
|
952
1114
|
for i, t in enumerate(timesteps):
|
953
1115
|
if self.interrupt:
|
954
1116
|
continue
|
955
1117
|
|
1118
|
+
if image_embeds is not None:
|
1119
|
+
self._joint_attention_kwargs["ip_adapter_image_embeds"] = image_embeds
|
956
1120
|
# broadcast to batch dimension in a way that's compatible with ONNX/Core ML
|
957
1121
|
timestep = t.expand(latents.shape[0]).to(latents.dtype)
|
958
1122
|
noise_pred = self.transformer(
|
@@ -967,6 +1131,22 @@ class FluxInpaintPipeline(DiffusionPipeline, FluxLoraLoaderMixin):
|
|
967
1131
|
return_dict=False,
|
968
1132
|
)[0]
|
969
1133
|
|
1134
|
+
if do_true_cfg:
|
1135
|
+
if negative_image_embeds is not None:
|
1136
|
+
self._joint_attention_kwargs["ip_adapter_image_embeds"] = negative_image_embeds
|
1137
|
+
neg_noise_pred = self.transformer(
|
1138
|
+
hidden_states=latents,
|
1139
|
+
timestep=timestep / 1000,
|
1140
|
+
guidance=guidance,
|
1141
|
+
pooled_projections=negative_pooled_prompt_embeds,
|
1142
|
+
encoder_hidden_states=negative_prompt_embeds,
|
1143
|
+
txt_ids=text_ids,
|
1144
|
+
img_ids=latent_image_ids,
|
1145
|
+
joint_attention_kwargs=self.joint_attention_kwargs,
|
1146
|
+
return_dict=False,
|
1147
|
+
)[0]
|
1148
|
+
noise_pred = neg_noise_pred + true_cfg_scale * (noise_pred - neg_noise_pred)
|
1149
|
+
|
970
1150
|
# compute the previous noisy sample x_t -> x_t-1
|
971
1151
|
latents_dtype = latents.dtype
|
972
1152
|
latents = self.scheduler.step(noise_pred, t, latents, return_dict=False)[0]
|
@@ -341,9 +341,9 @@ class AnimateDiffFreeNoiseMixin:
|
|
341
341
|
start_tensor = negative_prompt_embeds[i].unsqueeze(0)
|
342
342
|
end_tensor = negative_prompt_embeds[i + 1].unsqueeze(0)
|
343
343
|
|
344
|
-
negative_prompt_interpolation_embeds[
|
345
|
-
start_frame
|
346
|
-
|
344
|
+
negative_prompt_interpolation_embeds[start_frame : end_frame + 1] = (
|
345
|
+
self._free_noise_prompt_interpolation_callback(start_frame, end_frame, start_tensor, end_tensor)
|
346
|
+
)
|
347
347
|
|
348
348
|
prompt_embeds = prompt_interpolation_embeds
|
349
349
|
negative_prompt_embeds = negative_prompt_interpolation_embeds
|
@@ -22,7 +22,9 @@ except OptionalDependencyNotAvailable:
|
|
22
22
|
|
23
23
|
_dummy_objects.update(get_objects_from_module(dummy_torch_and_transformers_objects))
|
24
24
|
else:
|
25
|
+
_import_structure["pipeline_hunyuan_skyreels_image2video"] = ["HunyuanSkyreelsImageToVideoPipeline"]
|
25
26
|
_import_structure["pipeline_hunyuan_video"] = ["HunyuanVideoPipeline"]
|
27
|
+
_import_structure["pipeline_hunyuan_video_image2video"] = ["HunyuanVideoImageToVideoPipeline"]
|
26
28
|
|
27
29
|
if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
28
30
|
try:
|
@@ -32,7 +34,9 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|
32
34
|
except OptionalDependencyNotAvailable:
|
33
35
|
from ...utils.dummy_torch_and_transformers_objects import *
|
34
36
|
else:
|
37
|
+
from .pipeline_hunyuan_skyreels_image2video import HunyuanSkyreelsImageToVideoPipeline
|
35
38
|
from .pipeline_hunyuan_video import HunyuanVideoPipeline
|
39
|
+
from .pipeline_hunyuan_video_image2video import HunyuanVideoImageToVideoPipeline
|
36
40
|
|
37
41
|
else:
|
38
42
|
import sys
|