diffusers 0.32.2__py3-none-any.whl → 0.33.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diffusers/__init__.py +186 -3
- diffusers/configuration_utils.py +40 -12
- diffusers/dependency_versions_table.py +9 -2
- diffusers/hooks/__init__.py +9 -0
- diffusers/hooks/faster_cache.py +653 -0
- diffusers/hooks/group_offloading.py +793 -0
- diffusers/hooks/hooks.py +236 -0
- diffusers/hooks/layerwise_casting.py +245 -0
- diffusers/hooks/pyramid_attention_broadcast.py +311 -0
- diffusers/loaders/__init__.py +6 -0
- diffusers/loaders/ip_adapter.py +38 -30
- diffusers/loaders/lora_base.py +121 -86
- diffusers/loaders/lora_conversion_utils.py +504 -44
- diffusers/loaders/lora_pipeline.py +1769 -181
- diffusers/loaders/peft.py +167 -57
- diffusers/loaders/single_file.py +17 -2
- diffusers/loaders/single_file_model.py +53 -5
- diffusers/loaders/single_file_utils.py +646 -72
- diffusers/loaders/textual_inversion.py +9 -9
- diffusers/loaders/transformer_flux.py +8 -9
- diffusers/loaders/transformer_sd3.py +120 -39
- diffusers/loaders/unet.py +20 -7
- diffusers/models/__init__.py +22 -0
- diffusers/models/activations.py +9 -9
- diffusers/models/attention.py +0 -1
- diffusers/models/attention_processor.py +163 -25
- diffusers/models/auto_model.py +169 -0
- diffusers/models/autoencoders/__init__.py +2 -0
- diffusers/models/autoencoders/autoencoder_asym_kl.py +2 -0
- diffusers/models/autoencoders/autoencoder_dc.py +106 -4
- diffusers/models/autoencoders/autoencoder_kl.py +0 -4
- diffusers/models/autoencoders/autoencoder_kl_allegro.py +5 -23
- diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +17 -55
- diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py +17 -97
- diffusers/models/autoencoders/autoencoder_kl_ltx.py +326 -107
- diffusers/models/autoencoders/autoencoder_kl_magvit.py +1094 -0
- diffusers/models/autoencoders/autoencoder_kl_mochi.py +21 -56
- diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +11 -42
- diffusers/models/autoencoders/autoencoder_kl_wan.py +855 -0
- diffusers/models/autoencoders/autoencoder_oobleck.py +1 -0
- diffusers/models/autoencoders/autoencoder_tiny.py +0 -4
- diffusers/models/autoencoders/consistency_decoder_vae.py +3 -1
- diffusers/models/autoencoders/vae.py +31 -141
- diffusers/models/autoencoders/vq_model.py +3 -0
- diffusers/models/cache_utils.py +108 -0
- diffusers/models/controlnets/__init__.py +1 -0
- diffusers/models/controlnets/controlnet.py +3 -8
- diffusers/models/controlnets/controlnet_flux.py +14 -42
- diffusers/models/controlnets/controlnet_sd3.py +58 -34
- diffusers/models/controlnets/controlnet_sparsectrl.py +4 -7
- diffusers/models/controlnets/controlnet_union.py +27 -18
- diffusers/models/controlnets/controlnet_xs.py +7 -46
- diffusers/models/controlnets/multicontrolnet_union.py +196 -0
- diffusers/models/embeddings.py +18 -7
- diffusers/models/model_loading_utils.py +122 -80
- diffusers/models/modeling_flax_pytorch_utils.py +1 -1
- diffusers/models/modeling_flax_utils.py +1 -1
- diffusers/models/modeling_pytorch_flax_utils.py +1 -1
- diffusers/models/modeling_utils.py +617 -272
- diffusers/models/normalization.py +67 -14
- diffusers/models/resnet.py +1 -1
- diffusers/models/transformers/__init__.py +6 -0
- diffusers/models/transformers/auraflow_transformer_2d.py +9 -35
- diffusers/models/transformers/cogvideox_transformer_3d.py +13 -24
- diffusers/models/transformers/consisid_transformer_3d.py +789 -0
- diffusers/models/transformers/dit_transformer_2d.py +5 -19
- diffusers/models/transformers/hunyuan_transformer_2d.py +4 -3
- diffusers/models/transformers/latte_transformer_3d.py +20 -15
- diffusers/models/transformers/lumina_nextdit2d.py +3 -1
- diffusers/models/transformers/pixart_transformer_2d.py +4 -19
- diffusers/models/transformers/prior_transformer.py +5 -1
- diffusers/models/transformers/sana_transformer.py +144 -40
- diffusers/models/transformers/stable_audio_transformer.py +5 -20
- diffusers/models/transformers/transformer_2d.py +7 -22
- diffusers/models/transformers/transformer_allegro.py +9 -17
- diffusers/models/transformers/transformer_cogview3plus.py +6 -17
- diffusers/models/transformers/transformer_cogview4.py +462 -0
- diffusers/models/transformers/transformer_easyanimate.py +527 -0
- diffusers/models/transformers/transformer_flux.py +68 -110
- diffusers/models/transformers/transformer_hunyuan_video.py +404 -46
- diffusers/models/transformers/transformer_ltx.py +53 -35
- diffusers/models/transformers/transformer_lumina2.py +548 -0
- diffusers/models/transformers/transformer_mochi.py +6 -17
- diffusers/models/transformers/transformer_omnigen.py +469 -0
- diffusers/models/transformers/transformer_sd3.py +56 -86
- diffusers/models/transformers/transformer_temporal.py +5 -11
- diffusers/models/transformers/transformer_wan.py +469 -0
- diffusers/models/unets/unet_1d.py +3 -1
- diffusers/models/unets/unet_2d.py +21 -20
- diffusers/models/unets/unet_2d_blocks.py +19 -243
- diffusers/models/unets/unet_2d_condition.py +4 -6
- diffusers/models/unets/unet_3d_blocks.py +14 -127
- diffusers/models/unets/unet_3d_condition.py +8 -12
- diffusers/models/unets/unet_i2vgen_xl.py +5 -13
- diffusers/models/unets/unet_kandinsky3.py +0 -4
- diffusers/models/unets/unet_motion_model.py +20 -114
- diffusers/models/unets/unet_spatio_temporal_condition.py +7 -8
- diffusers/models/unets/unet_stable_cascade.py +8 -35
- diffusers/models/unets/uvit_2d.py +1 -4
- diffusers/optimization.py +2 -2
- diffusers/pipelines/__init__.py +57 -8
- diffusers/pipelines/allegro/pipeline_allegro.py +22 -2
- diffusers/pipelines/amused/pipeline_amused.py +15 -2
- diffusers/pipelines/amused/pipeline_amused_img2img.py +15 -2
- diffusers/pipelines/amused/pipeline_amused_inpaint.py +15 -2
- diffusers/pipelines/animatediff/pipeline_animatediff.py +15 -2
- diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +15 -3
- diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +24 -4
- diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +15 -2
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +16 -4
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +16 -4
- diffusers/pipelines/audioldm/pipeline_audioldm.py +13 -2
- diffusers/pipelines/audioldm2/modeling_audioldm2.py +13 -68
- diffusers/pipelines/audioldm2/pipeline_audioldm2.py +39 -9
- diffusers/pipelines/aura_flow/pipeline_aura_flow.py +63 -7
- diffusers/pipelines/auto_pipeline.py +35 -14
- diffusers/pipelines/blip_diffusion/blip_image_processing.py +1 -1
- diffusers/pipelines/blip_diffusion/modeling_blip2.py +5 -8
- diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +12 -0
- diffusers/pipelines/cogvideo/pipeline_cogvideox.py +22 -6
- diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +22 -6
- diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +22 -5
- diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +22 -6
- diffusers/pipelines/cogview3/pipeline_cogview3plus.py +12 -4
- diffusers/pipelines/cogview4/__init__.py +49 -0
- diffusers/pipelines/cogview4/pipeline_cogview4.py +684 -0
- diffusers/pipelines/cogview4/pipeline_cogview4_control.py +732 -0
- diffusers/pipelines/cogview4/pipeline_output.py +21 -0
- diffusers/pipelines/consisid/__init__.py +49 -0
- diffusers/pipelines/consisid/consisid_utils.py +357 -0
- diffusers/pipelines/consisid/pipeline_consisid.py +974 -0
- diffusers/pipelines/consisid/pipeline_output.py +20 -0
- diffusers/pipelines/consistency_models/pipeline_consistency_models.py +11 -0
- diffusers/pipelines/controlnet/pipeline_controlnet.py +6 -5
- diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +13 -0
- diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +17 -5
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +31 -12
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +26 -7
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +20 -3
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +22 -3
- diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +26 -25
- diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +224 -109
- diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +25 -29
- diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +7 -4
- diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +3 -5
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +121 -10
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +122 -11
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +12 -1
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +20 -3
- diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py +14 -2
- diffusers/pipelines/ddim/pipeline_ddim.py +14 -1
- diffusers/pipelines/ddpm/pipeline_ddpm.py +15 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if.py +12 -0
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +12 -0
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +14 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +12 -0
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +14 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +14 -1
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +11 -7
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +11 -7
- diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +1 -1
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +10 -6
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_onnx_stable_diffusion_inpaint_legacy.py +2 -2
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +11 -7
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +1 -1
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +1 -1
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +1 -1
- diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +10 -105
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +1 -1
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +1 -1
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +1 -1
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +1 -1
- diffusers/pipelines/dit/pipeline_dit.py +15 -2
- diffusers/pipelines/easyanimate/__init__.py +52 -0
- diffusers/pipelines/easyanimate/pipeline_easyanimate.py +770 -0
- diffusers/pipelines/easyanimate/pipeline_easyanimate_control.py +994 -0
- diffusers/pipelines/easyanimate/pipeline_easyanimate_inpaint.py +1234 -0
- diffusers/pipelines/easyanimate/pipeline_output.py +20 -0
- diffusers/pipelines/flux/pipeline_flux.py +53 -21
- diffusers/pipelines/flux/pipeline_flux_control.py +9 -12
- diffusers/pipelines/flux/pipeline_flux_control_img2img.py +6 -10
- diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +8 -10
- diffusers/pipelines/flux/pipeline_flux_controlnet.py +185 -13
- diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +8 -10
- diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +16 -16
- diffusers/pipelines/flux/pipeline_flux_fill.py +107 -39
- diffusers/pipelines/flux/pipeline_flux_img2img.py +193 -15
- diffusers/pipelines/flux/pipeline_flux_inpaint.py +199 -19
- diffusers/pipelines/free_noise_utils.py +3 -3
- diffusers/pipelines/hunyuan_video/__init__.py +4 -0
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_skyreels_image2video.py +804 -0
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +90 -23
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_image2video.py +924 -0
- diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +3 -5
- diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +13 -1
- diffusers/pipelines/kandinsky/pipeline_kandinsky.py +12 -0
- diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +1 -1
- diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +12 -0
- diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +13 -1
- diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +12 -0
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +12 -1
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +13 -0
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +12 -0
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +12 -1
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +12 -1
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +12 -0
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +12 -0
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +12 -0
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +12 -0
- diffusers/pipelines/kolors/pipeline_kolors.py +10 -8
- diffusers/pipelines/kolors/pipeline_kolors_img2img.py +6 -4
- diffusers/pipelines/kolors/text_encoder.py +7 -34
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +12 -1
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +13 -1
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +14 -13
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion_superresolution.py +12 -1
- diffusers/pipelines/latte/pipeline_latte.py +36 -7
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +67 -13
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +60 -15
- diffusers/pipelines/ltx/__init__.py +2 -0
- diffusers/pipelines/ltx/pipeline_ltx.py +25 -13
- diffusers/pipelines/ltx/pipeline_ltx_condition.py +1194 -0
- diffusers/pipelines/ltx/pipeline_ltx_image2video.py +31 -17
- diffusers/pipelines/lumina/__init__.py +2 -2
- diffusers/pipelines/lumina/pipeline_lumina.py +83 -20
- diffusers/pipelines/lumina2/__init__.py +48 -0
- diffusers/pipelines/lumina2/pipeline_lumina2.py +790 -0
- diffusers/pipelines/marigold/__init__.py +2 -0
- diffusers/pipelines/marigold/marigold_image_processing.py +127 -14
- diffusers/pipelines/marigold/pipeline_marigold_depth.py +31 -16
- diffusers/pipelines/marigold/pipeline_marigold_intrinsics.py +721 -0
- diffusers/pipelines/marigold/pipeline_marigold_normals.py +31 -16
- diffusers/pipelines/mochi/pipeline_mochi.py +14 -18
- diffusers/pipelines/musicldm/pipeline_musicldm.py +16 -1
- diffusers/pipelines/omnigen/__init__.py +50 -0
- diffusers/pipelines/omnigen/pipeline_omnigen.py +512 -0
- diffusers/pipelines/omnigen/processor_omnigen.py +327 -0
- diffusers/pipelines/onnx_utils.py +5 -3
- diffusers/pipelines/pag/pag_utils.py +1 -1
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +12 -1
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +15 -4
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +20 -3
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +20 -3
- diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +1 -3
- diffusers/pipelines/pag/pipeline_pag_kolors.py +6 -4
- diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +16 -3
- diffusers/pipelines/pag/pipeline_pag_sana.py +65 -8
- diffusers/pipelines/pag/pipeline_pag_sd.py +23 -7
- diffusers/pipelines/pag/pipeline_pag_sd_3.py +3 -5
- diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py +3 -5
- diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +13 -1
- diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +23 -7
- diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py +26 -10
- diffusers/pipelines/pag/pipeline_pag_sd_xl.py +12 -4
- diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +7 -3
- diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +10 -6
- diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +13 -3
- diffusers/pipelines/pia/pipeline_pia.py +13 -1
- diffusers/pipelines/pipeline_flax_utils.py +7 -7
- diffusers/pipelines/pipeline_loading_utils.py +193 -83
- diffusers/pipelines/pipeline_utils.py +221 -106
- diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +17 -5
- diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +17 -4
- diffusers/pipelines/sana/__init__.py +2 -0
- diffusers/pipelines/sana/pipeline_sana.py +183 -58
- diffusers/pipelines/sana/pipeline_sana_sprint.py +889 -0
- diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +12 -2
- diffusers/pipelines/shap_e/pipeline_shap_e.py +12 -0
- diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +12 -0
- diffusers/pipelines/shap_e/renderer.py +6 -6
- diffusers/pipelines/stable_audio/pipeline_stable_audio.py +1 -1
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +15 -4
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +12 -8
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +12 -1
- diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +3 -2
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +14 -10
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_img2img.py +3 -3
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_inpaint.py +14 -10
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +2 -2
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py +4 -3
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +5 -4
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +2 -2
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +18 -13
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +30 -8
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +24 -10
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +28 -12
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +39 -18
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +17 -6
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +13 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +20 -3
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +14 -2
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +13 -1
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +16 -17
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +136 -18
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +150 -21
- diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +15 -3
- diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +26 -11
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +15 -3
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +22 -4
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +30 -13
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +12 -4
- diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +15 -3
- diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +15 -3
- diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +26 -12
- diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +16 -4
- diffusers/pipelines/stable_diffusion_xl/pipeline_flax_stable_diffusion_xl.py +1 -1
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +12 -4
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +7 -3
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +10 -6
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +11 -4
- diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +13 -2
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +18 -4
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +26 -5
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +13 -1
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +13 -1
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +28 -6
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +26 -4
- diffusers/pipelines/transformers_loading_utils.py +121 -0
- diffusers/pipelines/unclip/pipeline_unclip.py +11 -1
- diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +11 -1
- diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +19 -2
- diffusers/pipelines/wan/__init__.py +51 -0
- diffusers/pipelines/wan/pipeline_output.py +20 -0
- diffusers/pipelines/wan/pipeline_wan.py +593 -0
- diffusers/pipelines/wan/pipeline_wan_i2v.py +722 -0
- diffusers/pipelines/wan/pipeline_wan_video2video.py +725 -0
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +7 -31
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +12 -1
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +12 -1
- diffusers/quantizers/auto.py +5 -1
- diffusers/quantizers/base.py +5 -9
- diffusers/quantizers/bitsandbytes/bnb_quantizer.py +41 -29
- diffusers/quantizers/bitsandbytes/utils.py +30 -20
- diffusers/quantizers/gguf/gguf_quantizer.py +1 -0
- diffusers/quantizers/gguf/utils.py +4 -2
- diffusers/quantizers/quantization_config.py +59 -4
- diffusers/quantizers/quanto/__init__.py +1 -0
- diffusers/quantizers/quanto/quanto_quantizer.py +177 -0
- diffusers/quantizers/quanto/utils.py +60 -0
- diffusers/quantizers/torchao/__init__.py +1 -1
- diffusers/quantizers/torchao/torchao_quantizer.py +47 -2
- diffusers/schedulers/__init__.py +2 -1
- diffusers/schedulers/scheduling_consistency_models.py +1 -2
- diffusers/schedulers/scheduling_ddim_inverse.py +1 -1
- diffusers/schedulers/scheduling_ddpm.py +2 -3
- diffusers/schedulers/scheduling_ddpm_parallel.py +1 -2
- diffusers/schedulers/scheduling_dpmsolver_multistep.py +12 -4
- diffusers/schedulers/scheduling_edm_euler.py +45 -10
- diffusers/schedulers/scheduling_flow_match_euler_discrete.py +116 -28
- diffusers/schedulers/scheduling_flow_match_heun_discrete.py +7 -6
- diffusers/schedulers/scheduling_heun_discrete.py +1 -1
- diffusers/schedulers/scheduling_lcm.py +1 -2
- diffusers/schedulers/scheduling_lms_discrete.py +1 -1
- diffusers/schedulers/scheduling_repaint.py +5 -1
- diffusers/schedulers/scheduling_scm.py +265 -0
- diffusers/schedulers/scheduling_tcd.py +1 -2
- diffusers/schedulers/scheduling_utils.py +2 -1
- diffusers/training_utils.py +14 -7
- diffusers/utils/__init__.py +9 -1
- diffusers/utils/constants.py +13 -1
- diffusers/utils/deprecation_utils.py +1 -1
- diffusers/utils/dummy_bitsandbytes_objects.py +17 -0
- diffusers/utils/dummy_gguf_objects.py +17 -0
- diffusers/utils/dummy_optimum_quanto_objects.py +17 -0
- diffusers/utils/dummy_pt_objects.py +233 -0
- diffusers/utils/dummy_torch_and_transformers_and_opencv_objects.py +17 -0
- diffusers/utils/dummy_torch_and_transformers_objects.py +270 -0
- diffusers/utils/dummy_torchao_objects.py +17 -0
- diffusers/utils/dynamic_modules_utils.py +1 -1
- diffusers/utils/export_utils.py +28 -3
- diffusers/utils/hub_utils.py +52 -102
- diffusers/utils/import_utils.py +121 -221
- diffusers/utils/loading_utils.py +2 -1
- diffusers/utils/logging.py +1 -2
- diffusers/utils/peft_utils.py +6 -14
- diffusers/utils/remote_utils.py +425 -0
- diffusers/utils/source_code_parsing_utils.py +52 -0
- diffusers/utils/state_dict_utils.py +15 -1
- diffusers/utils/testing_utils.py +243 -13
- diffusers/utils/torch_utils.py +10 -0
- diffusers/utils/typing_utils.py +91 -0
- diffusers/video_processor.py +1 -1
- {diffusers-0.32.2.dist-info → diffusers-0.33.0.dist-info}/METADATA +76 -44
- diffusers-0.33.0.dist-info/RECORD +608 -0
- {diffusers-0.32.2.dist-info → diffusers-0.33.0.dist-info}/WHEEL +1 -1
- diffusers-0.32.2.dist-info/RECORD +0 -550
- {diffusers-0.32.2.dist-info → diffusers-0.33.0.dist-info}/LICENSE +0 -0
- {diffusers-0.32.2.dist-info → diffusers-0.33.0.dist-info}/entry_points.txt +0 -0
- {diffusers-0.32.2.dist-info → diffusers-0.33.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,20 @@
|
|
1
|
+
from dataclasses import dataclass
|
2
|
+
|
3
|
+
import torch
|
4
|
+
|
5
|
+
from diffusers.utils import BaseOutput
|
6
|
+
|
7
|
+
|
8
|
+
@dataclass
|
9
|
+
class EasyAnimatePipelineOutput(BaseOutput):
|
10
|
+
r"""
|
11
|
+
Output class for EasyAnimate pipelines.
|
12
|
+
|
13
|
+
Args:
|
14
|
+
frames (`torch.Tensor`, `np.ndarray`, or List[List[PIL.Image.Image]]):
|
15
|
+
List of video outputs - It can be a nested list of length `batch_size,` with each sub-list containing
|
16
|
+
denoised PIL image sequences of length `num_frames.` It can also be a NumPy array or Torch tensor of shape
|
17
|
+
`(batch_size, num_frames, channels, height, width)`.
|
18
|
+
"""
|
19
|
+
|
20
|
+
frames: torch.Tensor
|
@@ -28,8 +28,7 @@ from transformers import (
|
|
28
28
|
|
29
29
|
from ...image_processor import PipelineImageInput, VaeImageProcessor
|
30
30
|
from ...loaders import FluxIPAdapterMixin, FluxLoraLoaderMixin, FromSingleFileMixin, TextualInversionLoaderMixin
|
31
|
-
from ...models
|
32
|
-
from ...models.transformers import FluxTransformer2DModel
|
31
|
+
from ...models import AutoencoderKL, FluxTransformer2DModel
|
33
32
|
from ...schedulers import FlowMatchEulerDiscreteScheduler
|
34
33
|
from ...utils import (
|
35
34
|
USE_PEFT_BACKEND,
|
@@ -76,7 +75,7 @@ def calculate_shift(
|
|
76
75
|
base_seq_len: int = 256,
|
77
76
|
max_seq_len: int = 4096,
|
78
77
|
base_shift: float = 0.5,
|
79
|
-
max_shift: float = 1.
|
78
|
+
max_shift: float = 1.15,
|
80
79
|
):
|
81
80
|
m = (max_shift - base_shift) / (max_seq_len - base_seq_len)
|
82
81
|
b = base_shift - m * base_seq_len
|
@@ -206,9 +205,7 @@ class FluxPipeline(
|
|
206
205
|
image_encoder=image_encoder,
|
207
206
|
feature_extractor=feature_extractor,
|
208
207
|
)
|
209
|
-
self.vae_scale_factor = (
|
210
|
-
2 ** (len(self.vae.config.block_out_channels) - 1) if hasattr(self, "vae") and self.vae is not None else 8
|
211
|
-
)
|
208
|
+
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
|
212
209
|
# Flux latents are turned into 2x2 patches and packed. This means the latent width and height has to be divisible
|
213
210
|
# by the patch size. So the vae scale factor is multiplied by the patch size to account for this
|
214
211
|
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor * 2)
|
@@ -408,23 +405,28 @@ class FluxPipeline(
|
|
408
405
|
if not isinstance(ip_adapter_image, list):
|
409
406
|
ip_adapter_image = [ip_adapter_image]
|
410
407
|
|
411
|
-
if len(ip_adapter_image) !=
|
408
|
+
if len(ip_adapter_image) != self.transformer.encoder_hid_proj.num_ip_adapters:
|
412
409
|
raise ValueError(
|
413
|
-
f"`ip_adapter_image` must have same length as the number of IP Adapters. Got {len(ip_adapter_image)} images and {
|
410
|
+
f"`ip_adapter_image` must have same length as the number of IP Adapters. Got {len(ip_adapter_image)} images and {self.transformer.encoder_hid_proj.num_ip_adapters} IP Adapters."
|
414
411
|
)
|
415
412
|
|
416
|
-
for single_ip_adapter_image
|
417
|
-
ip_adapter_image, self.transformer.encoder_hid_proj.image_projection_layers
|
418
|
-
):
|
413
|
+
for single_ip_adapter_image in ip_adapter_image:
|
419
414
|
single_image_embeds = self.encode_image(single_ip_adapter_image, device, 1)
|
420
|
-
|
421
415
|
image_embeds.append(single_image_embeds[None, :])
|
422
416
|
else:
|
417
|
+
if not isinstance(ip_adapter_image_embeds, list):
|
418
|
+
ip_adapter_image_embeds = [ip_adapter_image_embeds]
|
419
|
+
|
420
|
+
if len(ip_adapter_image_embeds) != self.transformer.encoder_hid_proj.num_ip_adapters:
|
421
|
+
raise ValueError(
|
422
|
+
f"`ip_adapter_image_embeds` must have same length as the number of IP Adapters. Got {len(ip_adapter_image_embeds)} image embeds and {self.transformer.encoder_hid_proj.num_ip_adapters} IP Adapters."
|
423
|
+
)
|
424
|
+
|
423
425
|
for single_image_embeds in ip_adapter_image_embeds:
|
424
426
|
image_embeds.append(single_image_embeds)
|
425
427
|
|
426
428
|
ip_adapter_image_embeds = []
|
427
|
-
for
|
429
|
+
for single_image_embeds in image_embeds:
|
428
430
|
single_image_embeds = torch.cat([single_image_embeds] * num_images_per_prompt, dim=0)
|
429
431
|
single_image_embeds = single_image_embeds.to(device=device)
|
430
432
|
ip_adapter_image_embeds.append(single_image_embeds)
|
@@ -622,6 +624,10 @@ class FluxPipeline(
|
|
622
624
|
def num_timesteps(self):
|
623
625
|
return self._num_timesteps
|
624
626
|
|
627
|
+
@property
|
628
|
+
def current_timestep(self):
|
629
|
+
return self._current_timestep
|
630
|
+
|
625
631
|
@property
|
626
632
|
def interrupt(self):
|
627
633
|
return self._interrupt
|
@@ -667,7 +673,16 @@ class FluxPipeline(
|
|
667
673
|
instead.
|
668
674
|
prompt_2 (`str` or `List[str]`, *optional*):
|
669
675
|
The prompt or prompts to be sent to `tokenizer_2` and `text_encoder_2`. If not defined, `prompt` is
|
670
|
-
will be used instead
|
676
|
+
will be used instead.
|
677
|
+
negative_prompt (`str` or `List[str]`, *optional*):
|
678
|
+
The prompt or prompts not to guide the image generation. If not defined, one has to pass
|
679
|
+
`negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `true_cfg_scale` is
|
680
|
+
not greater than `1`).
|
681
|
+
negative_prompt_2 (`str` or `List[str]`, *optional*):
|
682
|
+
The prompt or prompts not to guide the image generation to be sent to `tokenizer_2` and
|
683
|
+
`text_encoder_2`. If not defined, `negative_prompt` is used in all the text-encoders.
|
684
|
+
true_cfg_scale (`float`, *optional*, defaults to 1.0):
|
685
|
+
When > 1.0 and a provided `negative_prompt`, enables true classifier-free guidance.
|
671
686
|
height (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
|
672
687
|
The height in pixels of the generated image. This is set to 1024 by default for the best results.
|
673
688
|
width (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
|
@@ -679,7 +694,7 @@ class FluxPipeline(
|
|
679
694
|
Custom sigmas to use for the denoising process with schedulers which support a `sigmas` argument in
|
680
695
|
their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
|
681
696
|
will be used.
|
682
|
-
guidance_scale (`float`, *optional*, defaults to
|
697
|
+
guidance_scale (`float`, *optional*, defaults to 3.5):
|
683
698
|
Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
|
684
699
|
`guidance_scale` is defined as `w` of equation 2. of [Imagen
|
685
700
|
Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
|
@@ -711,6 +726,14 @@ class FluxPipeline(
|
|
711
726
|
Pre-generated image embeddings for IP-Adapter. It should be a list of length same as number of
|
712
727
|
IP-adapters. Each element should be a tensor of shape `(batch_size, num_images, emb_dim)`. If not
|
713
728
|
provided, embeddings are computed from the `ip_adapter_image` input argument.
|
729
|
+
negative_prompt_embeds (`torch.FloatTensor`, *optional*):
|
730
|
+
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
|
731
|
+
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
|
732
|
+
argument.
|
733
|
+
negative_pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
|
734
|
+
Pre-generated negative pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
|
735
|
+
weighting. If not provided, pooled negative_prompt_embeds will be generated from `negative_prompt`
|
736
|
+
input argument.
|
714
737
|
output_type (`str`, *optional*, defaults to `"pil"`):
|
715
738
|
The output format of the generate image. Choose between
|
716
739
|
[PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`.
|
@@ -760,6 +783,7 @@ class FluxPipeline(
|
|
760
783
|
|
761
784
|
self._guidance_scale = guidance_scale
|
762
785
|
self._joint_attention_kwargs = joint_attention_kwargs
|
786
|
+
self._current_timestep = None
|
763
787
|
self._interrupt = False
|
764
788
|
|
765
789
|
# 2. Define call parameters
|
@@ -775,7 +799,10 @@ class FluxPipeline(
|
|
775
799
|
lora_scale = (
|
776
800
|
self.joint_attention_kwargs.get("scale", None) if self.joint_attention_kwargs is not None else None
|
777
801
|
)
|
778
|
-
|
802
|
+
has_neg_prompt = negative_prompt is not None or (
|
803
|
+
negative_prompt_embeds is not None and negative_pooled_prompt_embeds is not None
|
804
|
+
)
|
805
|
+
do_true_cfg = true_cfg_scale > 1 and has_neg_prompt
|
779
806
|
(
|
780
807
|
prompt_embeds,
|
781
808
|
pooled_prompt_embeds,
|
@@ -824,10 +851,10 @@ class FluxPipeline(
|
|
824
851
|
image_seq_len = latents.shape[1]
|
825
852
|
mu = calculate_shift(
|
826
853
|
image_seq_len,
|
827
|
-
self.scheduler.config.base_image_seq_len,
|
828
|
-
self.scheduler.config.max_image_seq_len,
|
829
|
-
self.scheduler.config.base_shift,
|
830
|
-
self.scheduler.config.max_shift,
|
854
|
+
self.scheduler.config.get("base_image_seq_len", 256),
|
855
|
+
self.scheduler.config.get("max_image_seq_len", 4096),
|
856
|
+
self.scheduler.config.get("base_shift", 0.5),
|
857
|
+
self.scheduler.config.get("max_shift", 1.15),
|
831
858
|
)
|
832
859
|
timesteps, num_inference_steps = retrieve_timesteps(
|
833
860
|
self.scheduler,
|
@@ -850,10 +877,13 @@ class FluxPipeline(
|
|
850
877
|
negative_ip_adapter_image is None and negative_ip_adapter_image_embeds is None
|
851
878
|
):
|
852
879
|
negative_ip_adapter_image = np.zeros((width, height, 3), dtype=np.uint8)
|
880
|
+
negative_ip_adapter_image = [negative_ip_adapter_image] * self.transformer.encoder_hid_proj.num_ip_adapters
|
881
|
+
|
853
882
|
elif (ip_adapter_image is None and ip_adapter_image_embeds is None) and (
|
854
883
|
negative_ip_adapter_image is not None or negative_ip_adapter_image_embeds is not None
|
855
884
|
):
|
856
885
|
ip_adapter_image = np.zeros((width, height, 3), dtype=np.uint8)
|
886
|
+
ip_adapter_image = [ip_adapter_image] * self.transformer.encoder_hid_proj.num_ip_adapters
|
857
887
|
|
858
888
|
if self.joint_attention_kwargs is None:
|
859
889
|
self._joint_attention_kwargs = {}
|
@@ -881,6 +911,7 @@ class FluxPipeline(
|
|
881
911
|
if self.interrupt:
|
882
912
|
continue
|
883
913
|
|
914
|
+
self._current_timestep = t
|
884
915
|
if image_embeds is not None:
|
885
916
|
self._joint_attention_kwargs["ip_adapter_image_embeds"] = image_embeds
|
886
917
|
# broadcast to batch dimension in a way that's compatible with ONNX/Core ML
|
@@ -939,9 +970,10 @@ class FluxPipeline(
|
|
939
970
|
if XLA_AVAILABLE:
|
940
971
|
xm.mark_step()
|
941
972
|
|
973
|
+
self._current_timestep = None
|
974
|
+
|
942
975
|
if output_type == "latent":
|
943
976
|
image = latents
|
944
|
-
|
945
977
|
else:
|
946
978
|
latents = self._unpack_latents(latents, height, width, self.vae_scale_factor)
|
947
979
|
latents = (latents / self.vae.config.scaling_factor) + self.vae.config.shift_factor
|
@@ -82,12 +82,13 @@ EXAMPLE_DOC_STRING = """
|
|
82
82
|
"""
|
83
83
|
|
84
84
|
|
85
|
+
# Copied from diffusers.pipelines.flux.pipeline_flux.calculate_shift
|
85
86
|
def calculate_shift(
|
86
87
|
image_seq_len,
|
87
88
|
base_seq_len: int = 256,
|
88
89
|
max_seq_len: int = 4096,
|
89
90
|
base_shift: float = 0.5,
|
90
|
-
max_shift: float = 1.
|
91
|
+
max_shift: float = 1.15,
|
91
92
|
):
|
92
93
|
m = (max_shift - base_shift) / (max_seq_len - base_seq_len)
|
93
94
|
b = base_shift - m * base_seq_len
|
@@ -212,12 +213,8 @@ class FluxControlPipeline(
|
|
212
213
|
transformer=transformer,
|
213
214
|
scheduler=scheduler,
|
214
215
|
)
|
215
|
-
self.vae_scale_factor = (
|
216
|
-
|
217
|
-
)
|
218
|
-
self.vae_latent_channels = (
|
219
|
-
self.vae.config.latent_channels if hasattr(self, "vae") and self.vae is not None else 16
|
220
|
-
)
|
216
|
+
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
|
217
|
+
self.vae_latent_channels = self.vae.config.latent_channels if getattr(self, "vae", None) else 16
|
221
218
|
# Flux latents are turned into 2x2 patches and packed. This means the latent width and height has to be divisible
|
222
219
|
# by the patch size. So the vae scale factor is multiplied by the patch size to account for this
|
223
220
|
self.image_processor = VaeImageProcessor(
|
@@ -663,7 +660,7 @@ class FluxControlPipeline(
|
|
663
660
|
Custom sigmas to use for the denoising process with schedulers which support a `sigmas` argument in
|
664
661
|
their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
|
665
662
|
will be used.
|
666
|
-
guidance_scale (`float`, *optional*, defaults to
|
663
|
+
guidance_scale (`float`, *optional*, defaults to 3.5):
|
667
664
|
Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598).
|
668
665
|
`guidance_scale` is defined as `w` of equation 2. of [Imagen
|
669
666
|
Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale >
|
@@ -802,10 +799,10 @@ class FluxControlPipeline(
|
|
802
799
|
image_seq_len = latents.shape[1]
|
803
800
|
mu = calculate_shift(
|
804
801
|
image_seq_len,
|
805
|
-
self.scheduler.config.base_image_seq_len,
|
806
|
-
self.scheduler.config.max_image_seq_len,
|
807
|
-
self.scheduler.config.base_shift,
|
808
|
-
self.scheduler.config.max_shift,
|
802
|
+
self.scheduler.config.get("base_image_seq_len", 256),
|
803
|
+
self.scheduler.config.get("max_image_seq_len", 4096),
|
804
|
+
self.scheduler.config.get("base_shift", 0.5),
|
805
|
+
self.scheduler.config.get("max_shift", 1.15),
|
809
806
|
)
|
810
807
|
timesteps, num_inference_steps = retrieve_timesteps(
|
811
808
|
self.scheduler,
|
@@ -93,7 +93,7 @@ def calculate_shift(
|
|
93
93
|
base_seq_len: int = 256,
|
94
94
|
max_seq_len: int = 4096,
|
95
95
|
base_shift: float = 0.5,
|
96
|
-
max_shift: float = 1.
|
96
|
+
max_shift: float = 1.15,
|
97
97
|
):
|
98
98
|
m = (max_shift - base_shift) / (max_seq_len - base_seq_len)
|
99
99
|
b = base_shift - m * base_seq_len
|
@@ -227,9 +227,7 @@ class FluxControlImg2ImgPipeline(DiffusionPipeline, FluxLoraLoaderMixin, FromSin
|
|
227
227
|
transformer=transformer,
|
228
228
|
scheduler=scheduler,
|
229
229
|
)
|
230
|
-
self.vae_scale_factor = (
|
231
|
-
2 ** (len(self.vae.config.block_out_channels) - 1) if hasattr(self, "vae") and self.vae is not None else 8
|
232
|
-
)
|
230
|
+
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
|
233
231
|
# Flux latents are turned into 2x2 patches and packed. This means the latent width and height has to be divisible
|
234
232
|
# by the patch size. So the vae scale factor is multiplied by the patch size to account for this
|
235
233
|
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor * 2)
|
@@ -440,7 +438,6 @@ class FluxControlImg2ImgPipeline(DiffusionPipeline, FluxLoraLoaderMixin, FromSin
|
|
440
438
|
|
441
439
|
return timesteps, num_inference_steps - t_start
|
442
440
|
|
443
|
-
# Copied from diffusers.pipelines.flux.pipeline_flux_img2img.FluxImg2ImgPipeline.check_inputs
|
444
441
|
def check_inputs(
|
445
442
|
self,
|
446
443
|
prompt,
|
@@ -536,7 +533,6 @@ class FluxControlImg2ImgPipeline(DiffusionPipeline, FluxLoraLoaderMixin, FromSin
|
|
536
533
|
|
537
534
|
return latents
|
538
535
|
|
539
|
-
# Copied from diffusers.pipelines.flux.pipeline_flux_img2img.FluxImg2ImgPipeline.prepare_latents
|
540
536
|
def prepare_latents(
|
541
537
|
self,
|
542
538
|
image,
|
@@ -809,10 +805,10 @@ class FluxControlImg2ImgPipeline(DiffusionPipeline, FluxLoraLoaderMixin, FromSin
|
|
809
805
|
image_seq_len = (int(height) // self.vae_scale_factor // 2) * (int(width) // self.vae_scale_factor // 2)
|
810
806
|
mu = calculate_shift(
|
811
807
|
image_seq_len,
|
812
|
-
self.scheduler.config.base_image_seq_len,
|
813
|
-
self.scheduler.config.max_image_seq_len,
|
814
|
-
self.scheduler.config.base_shift,
|
815
|
-
self.scheduler.config.max_shift,
|
808
|
+
self.scheduler.config.get("base_image_seq_len", 256),
|
809
|
+
self.scheduler.config.get("max_image_seq_len", 4096),
|
810
|
+
self.scheduler.config.get("base_shift", 0.5),
|
811
|
+
self.scheduler.config.get("max_shift", 1.15),
|
816
812
|
)
|
817
813
|
timesteps, num_inference_steps = retrieve_timesteps(
|
818
814
|
self.scheduler,
|
@@ -119,7 +119,7 @@ def calculate_shift(
|
|
119
119
|
base_seq_len: int = 256,
|
120
120
|
max_seq_len: int = 4096,
|
121
121
|
base_shift: float = 0.5,
|
122
|
-
max_shift: float = 1.
|
122
|
+
max_shift: float = 1.15,
|
123
123
|
):
|
124
124
|
m = (max_shift - base_shift) / (max_seq_len - base_seq_len)
|
125
125
|
b = base_shift - m * base_seq_len
|
@@ -258,15 +258,14 @@ class FluxControlInpaintPipeline(
|
|
258
258
|
transformer=transformer,
|
259
259
|
scheduler=scheduler,
|
260
260
|
)
|
261
|
-
self.vae_scale_factor = (
|
262
|
-
2 ** (len(self.vae.config.block_out_channels) - 1) if hasattr(self, "vae") and self.vae is not None else 8
|
263
|
-
)
|
261
|
+
self.vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1) if getattr(self, "vae", None) else 8
|
264
262
|
# Flux latents are turned into 2x2 patches and packed. This means the latent width and height has to be divisible
|
265
263
|
# by the patch size. So the vae scale factor is multiplied by the patch size to account for this
|
266
264
|
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor * 2)
|
265
|
+
latent_channels = self.vae.config.latent_channels if getattr(self, "vae", None) else 16
|
267
266
|
self.mask_processor = VaeImageProcessor(
|
268
267
|
vae_scale_factor=self.vae_scale_factor * 2,
|
269
|
-
vae_latent_channels=
|
268
|
+
vae_latent_channels=latent_channels,
|
270
269
|
do_normalize=False,
|
271
270
|
do_binarize=True,
|
272
271
|
do_convert_grayscale=True,
|
@@ -478,7 +477,6 @@ class FluxControlInpaintPipeline(
|
|
478
477
|
|
479
478
|
return timesteps, num_inference_steps - t_start
|
480
479
|
|
481
|
-
# Copied from diffusers.pipelines.flux.pipeline_flux_img2img.FluxImg2ImgPipeline.check_inputs
|
482
480
|
def check_inputs(
|
483
481
|
self,
|
484
482
|
prompt,
|
@@ -985,10 +983,10 @@ class FluxControlInpaintPipeline(
|
|
985
983
|
image_seq_len = (int(height) // self.vae_scale_factor // 2) * (int(width) // self.vae_scale_factor // 2)
|
986
984
|
mu = calculate_shift(
|
987
985
|
image_seq_len,
|
988
|
-
self.scheduler.config.base_image_seq_len,
|
989
|
-
self.scheduler.config.max_image_seq_len,
|
990
|
-
self.scheduler.config.base_shift,
|
991
|
-
self.scheduler.config.max_shift,
|
986
|
+
self.scheduler.config.get("base_image_seq_len", 256),
|
987
|
+
self.scheduler.config.get("max_image_seq_len", 4096),
|
988
|
+
self.scheduler.config.get("base_shift", 0.5),
|
989
|
+
self.scheduler.config.get("max_shift", 1.15),
|
992
990
|
)
|
993
991
|
timesteps, num_inference_steps = retrieve_timesteps(
|
994
992
|
self.scheduler,
|