diffusers 0.33.1__py3-none-any.whl → 0.35.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diffusers/__init__.py +145 -1
- diffusers/callbacks.py +35 -0
- diffusers/commands/__init__.py +1 -1
- diffusers/commands/custom_blocks.py +134 -0
- diffusers/commands/diffusers_cli.py +3 -1
- diffusers/commands/env.py +1 -1
- diffusers/commands/fp16_safetensors.py +2 -2
- diffusers/configuration_utils.py +11 -2
- diffusers/dependency_versions_check.py +1 -1
- diffusers/dependency_versions_table.py +3 -3
- diffusers/experimental/rl/value_guided_sampling.py +1 -1
- diffusers/guiders/__init__.py +41 -0
- diffusers/guiders/adaptive_projected_guidance.py +188 -0
- diffusers/guiders/auto_guidance.py +190 -0
- diffusers/guiders/classifier_free_guidance.py +141 -0
- diffusers/guiders/classifier_free_zero_star_guidance.py +152 -0
- diffusers/guiders/frequency_decoupled_guidance.py +327 -0
- diffusers/guiders/guider_utils.py +309 -0
- diffusers/guiders/perturbed_attention_guidance.py +271 -0
- diffusers/guiders/skip_layer_guidance.py +262 -0
- diffusers/guiders/smoothed_energy_guidance.py +251 -0
- diffusers/guiders/tangential_classifier_free_guidance.py +143 -0
- diffusers/hooks/__init__.py +17 -0
- diffusers/hooks/_common.py +56 -0
- diffusers/hooks/_helpers.py +293 -0
- diffusers/hooks/faster_cache.py +9 -8
- diffusers/hooks/first_block_cache.py +259 -0
- diffusers/hooks/group_offloading.py +332 -227
- diffusers/hooks/hooks.py +58 -3
- diffusers/hooks/layer_skip.py +263 -0
- diffusers/hooks/layerwise_casting.py +5 -10
- diffusers/hooks/pyramid_attention_broadcast.py +15 -12
- diffusers/hooks/smoothed_energy_guidance_utils.py +167 -0
- diffusers/hooks/utils.py +43 -0
- diffusers/image_processor.py +7 -2
- diffusers/loaders/__init__.py +10 -0
- diffusers/loaders/ip_adapter.py +260 -18
- diffusers/loaders/lora_base.py +261 -127
- diffusers/loaders/lora_conversion_utils.py +657 -35
- diffusers/loaders/lora_pipeline.py +2778 -1246
- diffusers/loaders/peft.py +78 -112
- diffusers/loaders/single_file.py +2 -2
- diffusers/loaders/single_file_model.py +64 -15
- diffusers/loaders/single_file_utils.py +395 -7
- diffusers/loaders/textual_inversion.py +3 -2
- diffusers/loaders/transformer_flux.py +10 -11
- diffusers/loaders/transformer_sd3.py +8 -3
- diffusers/loaders/unet.py +24 -21
- diffusers/loaders/unet_loader_utils.py +6 -3
- diffusers/loaders/utils.py +1 -1
- diffusers/models/__init__.py +23 -1
- diffusers/models/activations.py +5 -5
- diffusers/models/adapter.py +2 -3
- diffusers/models/attention.py +488 -7
- diffusers/models/attention_dispatch.py +1218 -0
- diffusers/models/attention_flax.py +10 -10
- diffusers/models/attention_processor.py +113 -667
- diffusers/models/auto_model.py +49 -12
- diffusers/models/autoencoders/__init__.py +2 -0
- diffusers/models/autoencoders/autoencoder_asym_kl.py +4 -4
- diffusers/models/autoencoders/autoencoder_dc.py +17 -4
- diffusers/models/autoencoders/autoencoder_kl.py +5 -5
- diffusers/models/autoencoders/autoencoder_kl_allegro.py +4 -4
- diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +6 -6
- diffusers/models/autoencoders/autoencoder_kl_cosmos.py +1110 -0
- diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py +2 -2
- diffusers/models/autoencoders/autoencoder_kl_ltx.py +3 -3
- diffusers/models/autoencoders/autoencoder_kl_magvit.py +4 -4
- diffusers/models/autoencoders/autoencoder_kl_mochi.py +3 -3
- diffusers/models/autoencoders/autoencoder_kl_qwenimage.py +1070 -0
- diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +4 -4
- diffusers/models/autoencoders/autoencoder_kl_wan.py +626 -62
- diffusers/models/autoencoders/autoencoder_oobleck.py +1 -1
- diffusers/models/autoencoders/autoencoder_tiny.py +3 -3
- diffusers/models/autoencoders/consistency_decoder_vae.py +1 -1
- diffusers/models/autoencoders/vae.py +13 -2
- diffusers/models/autoencoders/vq_model.py +2 -2
- diffusers/models/cache_utils.py +32 -10
- diffusers/models/controlnet.py +1 -1
- diffusers/models/controlnet_flux.py +1 -1
- diffusers/models/controlnet_sd3.py +1 -1
- diffusers/models/controlnet_sparsectrl.py +1 -1
- diffusers/models/controlnets/__init__.py +1 -0
- diffusers/models/controlnets/controlnet.py +3 -3
- diffusers/models/controlnets/controlnet_flax.py +1 -1
- diffusers/models/controlnets/controlnet_flux.py +21 -20
- diffusers/models/controlnets/controlnet_hunyuan.py +2 -2
- diffusers/models/controlnets/controlnet_sana.py +290 -0
- diffusers/models/controlnets/controlnet_sd3.py +1 -1
- diffusers/models/controlnets/controlnet_sparsectrl.py +2 -2
- diffusers/models/controlnets/controlnet_union.py +5 -5
- diffusers/models/controlnets/controlnet_xs.py +7 -7
- diffusers/models/controlnets/multicontrolnet.py +4 -5
- diffusers/models/controlnets/multicontrolnet_union.py +5 -6
- diffusers/models/downsampling.py +2 -2
- diffusers/models/embeddings.py +36 -46
- diffusers/models/embeddings_flax.py +2 -2
- diffusers/models/lora.py +3 -3
- diffusers/models/model_loading_utils.py +233 -1
- diffusers/models/modeling_flax_utils.py +1 -2
- diffusers/models/modeling_utils.py +203 -108
- diffusers/models/normalization.py +4 -4
- diffusers/models/resnet.py +2 -2
- diffusers/models/resnet_flax.py +1 -1
- diffusers/models/transformers/__init__.py +7 -0
- diffusers/models/transformers/auraflow_transformer_2d.py +70 -24
- diffusers/models/transformers/cogvideox_transformer_3d.py +1 -1
- diffusers/models/transformers/consisid_transformer_3d.py +1 -1
- diffusers/models/transformers/dit_transformer_2d.py +2 -2
- diffusers/models/transformers/dual_transformer_2d.py +1 -1
- diffusers/models/transformers/hunyuan_transformer_2d.py +2 -2
- diffusers/models/transformers/latte_transformer_3d.py +4 -5
- diffusers/models/transformers/lumina_nextdit2d.py +2 -2
- diffusers/models/transformers/pixart_transformer_2d.py +3 -3
- diffusers/models/transformers/prior_transformer.py +1 -1
- diffusers/models/transformers/sana_transformer.py +8 -3
- diffusers/models/transformers/stable_audio_transformer.py +5 -9
- diffusers/models/transformers/t5_film_transformer.py +3 -3
- diffusers/models/transformers/transformer_2d.py +1 -1
- diffusers/models/transformers/transformer_allegro.py +1 -1
- diffusers/models/transformers/transformer_chroma.py +641 -0
- diffusers/models/transformers/transformer_cogview3plus.py +5 -10
- diffusers/models/transformers/transformer_cogview4.py +353 -27
- diffusers/models/transformers/transformer_cosmos.py +586 -0
- diffusers/models/transformers/transformer_flux.py +376 -138
- diffusers/models/transformers/transformer_hidream_image.py +942 -0
- diffusers/models/transformers/transformer_hunyuan_video.py +12 -8
- diffusers/models/transformers/transformer_hunyuan_video_framepack.py +416 -0
- diffusers/models/transformers/transformer_ltx.py +105 -24
- diffusers/models/transformers/transformer_lumina2.py +1 -1
- diffusers/models/transformers/transformer_mochi.py +1 -1
- diffusers/models/transformers/transformer_omnigen.py +2 -2
- diffusers/models/transformers/transformer_qwenimage.py +645 -0
- diffusers/models/transformers/transformer_sd3.py +7 -7
- diffusers/models/transformers/transformer_skyreels_v2.py +607 -0
- diffusers/models/transformers/transformer_temporal.py +1 -1
- diffusers/models/transformers/transformer_wan.py +316 -87
- diffusers/models/transformers/transformer_wan_vace.py +387 -0
- diffusers/models/unets/unet_1d.py +1 -1
- diffusers/models/unets/unet_1d_blocks.py +1 -1
- diffusers/models/unets/unet_2d.py +1 -1
- diffusers/models/unets/unet_2d_blocks.py +1 -1
- diffusers/models/unets/unet_2d_blocks_flax.py +8 -7
- diffusers/models/unets/unet_2d_condition.py +4 -3
- diffusers/models/unets/unet_2d_condition_flax.py +2 -2
- diffusers/models/unets/unet_3d_blocks.py +1 -1
- diffusers/models/unets/unet_3d_condition.py +3 -3
- diffusers/models/unets/unet_i2vgen_xl.py +3 -3
- diffusers/models/unets/unet_kandinsky3.py +1 -1
- diffusers/models/unets/unet_motion_model.py +2 -2
- diffusers/models/unets/unet_stable_cascade.py +1 -1
- diffusers/models/upsampling.py +2 -2
- diffusers/models/vae_flax.py +2 -2
- diffusers/models/vq_model.py +1 -1
- diffusers/modular_pipelines/__init__.py +83 -0
- diffusers/modular_pipelines/components_manager.py +1068 -0
- diffusers/modular_pipelines/flux/__init__.py +66 -0
- diffusers/modular_pipelines/flux/before_denoise.py +689 -0
- diffusers/modular_pipelines/flux/decoders.py +109 -0
- diffusers/modular_pipelines/flux/denoise.py +227 -0
- diffusers/modular_pipelines/flux/encoders.py +412 -0
- diffusers/modular_pipelines/flux/modular_blocks.py +181 -0
- diffusers/modular_pipelines/flux/modular_pipeline.py +59 -0
- diffusers/modular_pipelines/modular_pipeline.py +2446 -0
- diffusers/modular_pipelines/modular_pipeline_utils.py +672 -0
- diffusers/modular_pipelines/node_utils.py +665 -0
- diffusers/modular_pipelines/stable_diffusion_xl/__init__.py +77 -0
- diffusers/modular_pipelines/stable_diffusion_xl/before_denoise.py +1874 -0
- diffusers/modular_pipelines/stable_diffusion_xl/decoders.py +208 -0
- diffusers/modular_pipelines/stable_diffusion_xl/denoise.py +771 -0
- diffusers/modular_pipelines/stable_diffusion_xl/encoders.py +887 -0
- diffusers/modular_pipelines/stable_diffusion_xl/modular_blocks.py +380 -0
- diffusers/modular_pipelines/stable_diffusion_xl/modular_pipeline.py +365 -0
- diffusers/modular_pipelines/wan/__init__.py +66 -0
- diffusers/modular_pipelines/wan/before_denoise.py +365 -0
- diffusers/modular_pipelines/wan/decoders.py +105 -0
- diffusers/modular_pipelines/wan/denoise.py +261 -0
- diffusers/modular_pipelines/wan/encoders.py +242 -0
- diffusers/modular_pipelines/wan/modular_blocks.py +144 -0
- diffusers/modular_pipelines/wan/modular_pipeline.py +90 -0
- diffusers/pipelines/__init__.py +68 -6
- diffusers/pipelines/allegro/pipeline_allegro.py +11 -11
- diffusers/pipelines/amused/pipeline_amused.py +7 -6
- diffusers/pipelines/amused/pipeline_amused_img2img.py +6 -5
- diffusers/pipelines/amused/pipeline_amused_inpaint.py +6 -5
- diffusers/pipelines/animatediff/pipeline_animatediff.py +6 -6
- diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +6 -6
- diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +16 -15
- diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +6 -6
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +5 -5
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +5 -5
- diffusers/pipelines/audioldm/pipeline_audioldm.py +8 -7
- diffusers/pipelines/audioldm2/modeling_audioldm2.py +1 -1
- diffusers/pipelines/audioldm2/pipeline_audioldm2.py +22 -13
- diffusers/pipelines/aura_flow/pipeline_aura_flow.py +48 -11
- diffusers/pipelines/auto_pipeline.py +23 -20
- diffusers/pipelines/blip_diffusion/modeling_blip2.py +1 -1
- diffusers/pipelines/blip_diffusion/modeling_ctx_clip.py +2 -2
- diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +11 -10
- diffusers/pipelines/chroma/__init__.py +49 -0
- diffusers/pipelines/chroma/pipeline_chroma.py +949 -0
- diffusers/pipelines/chroma/pipeline_chroma_img2img.py +1034 -0
- diffusers/pipelines/chroma/pipeline_output.py +21 -0
- diffusers/pipelines/cogvideo/pipeline_cogvideox.py +17 -16
- diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +17 -16
- diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +18 -17
- diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +17 -16
- diffusers/pipelines/cogview3/pipeline_cogview3plus.py +9 -9
- diffusers/pipelines/cogview4/pipeline_cogview4.py +23 -22
- diffusers/pipelines/cogview4/pipeline_cogview4_control.py +7 -7
- diffusers/pipelines/consisid/consisid_utils.py +2 -2
- diffusers/pipelines/consisid/pipeline_consisid.py +8 -8
- diffusers/pipelines/consistency_models/pipeline_consistency_models.py +1 -1
- diffusers/pipelines/controlnet/pipeline_controlnet.py +7 -7
- diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +11 -10
- diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +7 -7
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +7 -7
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +14 -14
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +10 -6
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +13 -13
- diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +226 -107
- diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +12 -8
- diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +207 -105
- diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +1 -1
- diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +8 -8
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +7 -7
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +7 -7
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +12 -10
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +9 -7
- diffusers/pipelines/cosmos/__init__.py +54 -0
- diffusers/pipelines/cosmos/pipeline_cosmos2_text2image.py +673 -0
- diffusers/pipelines/cosmos/pipeline_cosmos2_video2world.py +792 -0
- diffusers/pipelines/cosmos/pipeline_cosmos_text2world.py +664 -0
- diffusers/pipelines/cosmos/pipeline_cosmos_video2world.py +826 -0
- diffusers/pipelines/cosmos/pipeline_output.py +40 -0
- diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py +5 -4
- diffusers/pipelines/ddim/pipeline_ddim.py +4 -4
- diffusers/pipelines/ddpm/pipeline_ddpm.py +1 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +10 -10
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +8 -8
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +5 -5
- diffusers/pipelines/deprecated/audio_diffusion/mel.py +1 -1
- diffusers/pipelines/deprecated/audio_diffusion/pipeline_audio_diffusion.py +3 -3
- diffusers/pipelines/deprecated/latent_diffusion_uncond/pipeline_latent_diffusion_uncond.py +1 -1
- diffusers/pipelines/deprecated/pndm/pipeline_pndm.py +2 -2
- diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +4 -3
- diffusers/pipelines/deprecated/score_sde_ve/pipeline_score_sde_ve.py +1 -1
- diffusers/pipelines/deprecated/spectrogram_diffusion/continuous_encoder.py +1 -1
- diffusers/pipelines/deprecated/spectrogram_diffusion/midi_utils.py +1 -1
- diffusers/pipelines/deprecated/spectrogram_diffusion/notes_encoder.py +1 -1
- diffusers/pipelines/deprecated/spectrogram_diffusion/pipeline_spectrogram_diffusion.py +1 -1
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +8 -8
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_onnx_stable_diffusion_inpaint_legacy.py +9 -9
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +10 -10
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +10 -8
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +5 -5
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +18 -18
- diffusers/pipelines/deprecated/stochastic_karras_ve/pipeline_stochastic_karras_ve.py +1 -1
- diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +2 -2
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +6 -6
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +5 -5
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +5 -5
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +5 -5
- diffusers/pipelines/deprecated/vq_diffusion/pipeline_vq_diffusion.py +1 -1
- diffusers/pipelines/dit/pipeline_dit.py +4 -2
- diffusers/pipelines/easyanimate/pipeline_easyanimate.py +4 -4
- diffusers/pipelines/easyanimate/pipeline_easyanimate_control.py +4 -4
- diffusers/pipelines/easyanimate/pipeline_easyanimate_inpaint.py +7 -6
- diffusers/pipelines/flux/__init__.py +4 -0
- diffusers/pipelines/flux/modeling_flux.py +1 -1
- diffusers/pipelines/flux/pipeline_flux.py +37 -36
- diffusers/pipelines/flux/pipeline_flux_control.py +9 -9
- diffusers/pipelines/flux/pipeline_flux_control_img2img.py +7 -7
- diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +7 -7
- diffusers/pipelines/flux/pipeline_flux_controlnet.py +7 -7
- diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +31 -23
- diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +3 -2
- diffusers/pipelines/flux/pipeline_flux_fill.py +7 -7
- diffusers/pipelines/flux/pipeline_flux_img2img.py +40 -7
- diffusers/pipelines/flux/pipeline_flux_inpaint.py +12 -7
- diffusers/pipelines/flux/pipeline_flux_kontext.py +1134 -0
- diffusers/pipelines/flux/pipeline_flux_kontext_inpaint.py +1460 -0
- diffusers/pipelines/flux/pipeline_flux_prior_redux.py +2 -2
- diffusers/pipelines/flux/pipeline_output.py +6 -4
- diffusers/pipelines/free_init_utils.py +2 -2
- diffusers/pipelines/free_noise_utils.py +3 -3
- diffusers/pipelines/hidream_image/__init__.py +47 -0
- diffusers/pipelines/hidream_image/pipeline_hidream_image.py +1026 -0
- diffusers/pipelines/hidream_image/pipeline_output.py +35 -0
- diffusers/pipelines/hunyuan_video/__init__.py +2 -0
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_skyreels_image2video.py +8 -8
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +26 -25
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_framepack.py +1114 -0
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_image2video.py +71 -15
- diffusers/pipelines/hunyuan_video/pipeline_output.py +19 -0
- diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +8 -8
- diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +10 -8
- diffusers/pipelines/kandinsky/pipeline_kandinsky.py +6 -6
- diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +34 -34
- diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +19 -26
- diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +7 -7
- diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +11 -11
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +6 -6
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +35 -35
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +6 -6
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +17 -39
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +17 -45
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +7 -7
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +10 -10
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +10 -10
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +7 -7
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +17 -38
- diffusers/pipelines/kolors/pipeline_kolors.py +10 -10
- diffusers/pipelines/kolors/pipeline_kolors_img2img.py +12 -12
- diffusers/pipelines/kolors/text_encoder.py +3 -3
- diffusers/pipelines/kolors/tokenizer.py +1 -1
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +2 -2
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +2 -2
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +1 -1
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion_superresolution.py +3 -3
- diffusers/pipelines/latte/pipeline_latte.py +12 -12
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +13 -13
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +17 -16
- diffusers/pipelines/ltx/__init__.py +4 -0
- diffusers/pipelines/ltx/modeling_latent_upsampler.py +188 -0
- diffusers/pipelines/ltx/pipeline_ltx.py +64 -18
- diffusers/pipelines/ltx/pipeline_ltx_condition.py +117 -38
- diffusers/pipelines/ltx/pipeline_ltx_image2video.py +63 -18
- diffusers/pipelines/ltx/pipeline_ltx_latent_upsample.py +277 -0
- diffusers/pipelines/lumina/pipeline_lumina.py +13 -13
- diffusers/pipelines/lumina2/pipeline_lumina2.py +10 -10
- diffusers/pipelines/marigold/marigold_image_processing.py +2 -2
- diffusers/pipelines/mochi/pipeline_mochi.py +15 -14
- diffusers/pipelines/musicldm/pipeline_musicldm.py +16 -13
- diffusers/pipelines/omnigen/pipeline_omnigen.py +13 -11
- diffusers/pipelines/omnigen/processor_omnigen.py +8 -3
- diffusers/pipelines/onnx_utils.py +15 -2
- diffusers/pipelines/pag/pag_utils.py +2 -2
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +12 -8
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +7 -7
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +10 -6
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +14 -14
- diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +8 -8
- diffusers/pipelines/pag/pipeline_pag_kolors.py +10 -10
- diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +11 -11
- diffusers/pipelines/pag/pipeline_pag_sana.py +18 -12
- diffusers/pipelines/pag/pipeline_pag_sd.py +8 -8
- diffusers/pipelines/pag/pipeline_pag_sd_3.py +7 -7
- diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py +7 -7
- diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +6 -6
- diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +5 -5
- diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py +8 -8
- diffusers/pipelines/pag/pipeline_pag_sd_xl.py +16 -15
- diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +18 -17
- diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +12 -12
- diffusers/pipelines/paint_by_example/image_encoder.py +1 -1
- diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +8 -7
- diffusers/pipelines/pia/pipeline_pia.py +8 -6
- diffusers/pipelines/pipeline_flax_utils.py +5 -6
- diffusers/pipelines/pipeline_loading_utils.py +113 -15
- diffusers/pipelines/pipeline_utils.py +127 -48
- diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +14 -12
- diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +31 -11
- diffusers/pipelines/qwenimage/__init__.py +55 -0
- diffusers/pipelines/qwenimage/pipeline_output.py +21 -0
- diffusers/pipelines/qwenimage/pipeline_qwenimage.py +726 -0
- diffusers/pipelines/qwenimage/pipeline_qwenimage_edit.py +882 -0
- diffusers/pipelines/qwenimage/pipeline_qwenimage_img2img.py +829 -0
- diffusers/pipelines/qwenimage/pipeline_qwenimage_inpaint.py +1015 -0
- diffusers/pipelines/sana/__init__.py +4 -0
- diffusers/pipelines/sana/pipeline_sana.py +23 -21
- diffusers/pipelines/sana/pipeline_sana_controlnet.py +1106 -0
- diffusers/pipelines/sana/pipeline_sana_sprint.py +23 -19
- diffusers/pipelines/sana/pipeline_sana_sprint_img2img.py +981 -0
- diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +7 -6
- diffusers/pipelines/shap_e/camera.py +1 -1
- diffusers/pipelines/shap_e/pipeline_shap_e.py +1 -1
- diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +1 -1
- diffusers/pipelines/shap_e/renderer.py +3 -3
- diffusers/pipelines/skyreels_v2/__init__.py +59 -0
- diffusers/pipelines/skyreels_v2/pipeline_output.py +20 -0
- diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2.py +610 -0
- diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_diffusion_forcing.py +978 -0
- diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_diffusion_forcing_i2v.py +1059 -0
- diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_diffusion_forcing_v2v.py +1063 -0
- diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_i2v.py +745 -0
- diffusers/pipelines/stable_audio/modeling_stable_audio.py +1 -1
- diffusers/pipelines/stable_audio/pipeline_stable_audio.py +5 -5
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +8 -8
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +13 -13
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +9 -9
- diffusers/pipelines/stable_diffusion/__init__.py +0 -7
- diffusers/pipelines/stable_diffusion/clip_image_project_model.py +1 -1
- diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +11 -4
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_img2img.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_inpaint.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +12 -11
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py +10 -10
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +11 -11
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +10 -10
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +10 -9
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +4 -4
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +7 -7
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +5 -5
- diffusers/pipelines/stable_diffusion/safety_checker.py +1 -1
- diffusers/pipelines/stable_diffusion/safety_checker_flax.py +1 -1
- diffusers/pipelines/stable_diffusion/stable_unclip_image_normalizer.py +1 -1
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +13 -12
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +7 -7
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +7 -7
- diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +12 -8
- diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +15 -9
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +11 -9
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +11 -9
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +18 -12
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +11 -8
- diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +11 -8
- diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +15 -12
- diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +8 -6
- diffusers/pipelines/stable_diffusion_safe/safety_checker.py +1 -1
- diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +15 -11
- diffusers/pipelines/stable_diffusion_xl/pipeline_flax_stable_diffusion_xl.py +1 -1
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +16 -15
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +18 -17
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +12 -12
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +16 -15
- diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +3 -3
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +12 -12
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +18 -17
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +12 -7
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +12 -7
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +15 -13
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +24 -21
- diffusers/pipelines/unclip/pipeline_unclip.py +4 -3
- diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +4 -3
- diffusers/pipelines/unclip/text_proj.py +2 -2
- diffusers/pipelines/unidiffuser/modeling_text_decoder.py +2 -2
- diffusers/pipelines/unidiffuser/modeling_uvit.py +1 -1
- diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +8 -7
- diffusers/pipelines/visualcloze/__init__.py +52 -0
- diffusers/pipelines/visualcloze/pipeline_visualcloze_combined.py +444 -0
- diffusers/pipelines/visualcloze/pipeline_visualcloze_generation.py +952 -0
- diffusers/pipelines/visualcloze/visualcloze_utils.py +251 -0
- diffusers/pipelines/wan/__init__.py +2 -0
- diffusers/pipelines/wan/pipeline_wan.py +91 -30
- diffusers/pipelines/wan/pipeline_wan_i2v.py +145 -45
- diffusers/pipelines/wan/pipeline_wan_vace.py +975 -0
- diffusers/pipelines/wan/pipeline_wan_video2video.py +14 -16
- diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py +1 -1
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_diffnext.py +1 -1
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +1 -1
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +8 -8
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_combined.py +16 -15
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +6 -6
- diffusers/quantizers/__init__.py +3 -1
- diffusers/quantizers/base.py +17 -1
- diffusers/quantizers/bitsandbytes/bnb_quantizer.py +4 -0
- diffusers/quantizers/bitsandbytes/utils.py +10 -7
- diffusers/quantizers/gguf/gguf_quantizer.py +13 -4
- diffusers/quantizers/gguf/utils.py +108 -16
- diffusers/quantizers/pipe_quant_config.py +202 -0
- diffusers/quantizers/quantization_config.py +18 -16
- diffusers/quantizers/quanto/quanto_quantizer.py +4 -0
- diffusers/quantizers/torchao/torchao_quantizer.py +31 -1
- diffusers/schedulers/__init__.py +3 -1
- diffusers/schedulers/deprecated/scheduling_karras_ve.py +4 -3
- diffusers/schedulers/deprecated/scheduling_sde_vp.py +1 -1
- diffusers/schedulers/scheduling_consistency_models.py +1 -1
- diffusers/schedulers/scheduling_cosine_dpmsolver_multistep.py +10 -5
- diffusers/schedulers/scheduling_ddim.py +8 -8
- diffusers/schedulers/scheduling_ddim_cogvideox.py +5 -5
- diffusers/schedulers/scheduling_ddim_flax.py +6 -6
- diffusers/schedulers/scheduling_ddim_inverse.py +6 -6
- diffusers/schedulers/scheduling_ddim_parallel.py +22 -22
- diffusers/schedulers/scheduling_ddpm.py +9 -9
- diffusers/schedulers/scheduling_ddpm_flax.py +7 -7
- diffusers/schedulers/scheduling_ddpm_parallel.py +18 -18
- diffusers/schedulers/scheduling_ddpm_wuerstchen.py +2 -2
- diffusers/schedulers/scheduling_deis_multistep.py +16 -9
- diffusers/schedulers/scheduling_dpm_cogvideox.py +5 -5
- diffusers/schedulers/scheduling_dpmsolver_multistep.py +18 -12
- diffusers/schedulers/scheduling_dpmsolver_multistep_flax.py +22 -20
- diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +11 -11
- diffusers/schedulers/scheduling_dpmsolver_sde.py +2 -2
- diffusers/schedulers/scheduling_dpmsolver_singlestep.py +19 -13
- diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py +13 -8
- diffusers/schedulers/scheduling_edm_euler.py +20 -11
- diffusers/schedulers/scheduling_euler_ancestral_discrete.py +3 -3
- diffusers/schedulers/scheduling_euler_discrete.py +3 -3
- diffusers/schedulers/scheduling_euler_discrete_flax.py +3 -3
- diffusers/schedulers/scheduling_flow_match_euler_discrete.py +20 -5
- diffusers/schedulers/scheduling_flow_match_heun_discrete.py +1 -1
- diffusers/schedulers/scheduling_flow_match_lcm.py +561 -0
- diffusers/schedulers/scheduling_heun_discrete.py +2 -2
- diffusers/schedulers/scheduling_ipndm.py +2 -2
- diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +2 -2
- diffusers/schedulers/scheduling_k_dpm_2_discrete.py +2 -2
- diffusers/schedulers/scheduling_karras_ve_flax.py +5 -5
- diffusers/schedulers/scheduling_lcm.py +3 -3
- diffusers/schedulers/scheduling_lms_discrete.py +2 -2
- diffusers/schedulers/scheduling_lms_discrete_flax.py +1 -1
- diffusers/schedulers/scheduling_pndm.py +4 -4
- diffusers/schedulers/scheduling_pndm_flax.py +4 -4
- diffusers/schedulers/scheduling_repaint.py +9 -9
- diffusers/schedulers/scheduling_sasolver.py +15 -15
- diffusers/schedulers/scheduling_scm.py +1 -2
- diffusers/schedulers/scheduling_sde_ve.py +1 -1
- diffusers/schedulers/scheduling_sde_ve_flax.py +2 -2
- diffusers/schedulers/scheduling_tcd.py +3 -3
- diffusers/schedulers/scheduling_unclip.py +5 -5
- diffusers/schedulers/scheduling_unipc_multistep.py +21 -12
- diffusers/schedulers/scheduling_utils.py +3 -3
- diffusers/schedulers/scheduling_utils_flax.py +2 -2
- diffusers/schedulers/scheduling_vq_diffusion.py +1 -1
- diffusers/training_utils.py +91 -5
- diffusers/utils/__init__.py +15 -0
- diffusers/utils/accelerate_utils.py +1 -1
- diffusers/utils/constants.py +4 -0
- diffusers/utils/doc_utils.py +1 -1
- diffusers/utils/dummy_pt_objects.py +432 -0
- diffusers/utils/dummy_torch_and_transformers_objects.py +480 -0
- diffusers/utils/dynamic_modules_utils.py +85 -8
- diffusers/utils/export_utils.py +1 -1
- diffusers/utils/hub_utils.py +33 -17
- diffusers/utils/import_utils.py +151 -18
- diffusers/utils/logging.py +1 -1
- diffusers/utils/outputs.py +2 -1
- diffusers/utils/peft_utils.py +96 -10
- diffusers/utils/state_dict_utils.py +20 -3
- diffusers/utils/testing_utils.py +195 -17
- diffusers/utils/torch_utils.py +43 -5
- diffusers/video_processor.py +2 -2
- {diffusers-0.33.1.dist-info → diffusers-0.35.0.dist-info}/METADATA +72 -57
- diffusers-0.35.0.dist-info/RECORD +703 -0
- {diffusers-0.33.1.dist-info → diffusers-0.35.0.dist-info}/WHEEL +1 -1
- diffusers-0.33.1.dist-info/RECORD +0 -608
- {diffusers-0.33.1.dist-info → diffusers-0.35.0.dist-info}/LICENSE +0 -0
- {diffusers-0.33.1.dist-info → diffusers-0.35.0.dist-info}/entry_points.txt +0 -0
- {diffusers-0.33.1.dist-info → diffusers-0.35.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,412 @@
|
|
1
|
+
# Copyright 2025 The HuggingFace Team. All rights reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
import html
|
16
|
+
from typing import List, Optional, Union
|
17
|
+
|
18
|
+
import regex as re
|
19
|
+
import torch
|
20
|
+
from transformers import CLIPTextModel, CLIPTokenizer, T5EncoderModel, T5TokenizerFast
|
21
|
+
|
22
|
+
from ...configuration_utils import FrozenDict
|
23
|
+
from ...image_processor import VaeImageProcessor
|
24
|
+
from ...loaders import FluxLoraLoaderMixin, TextualInversionLoaderMixin
|
25
|
+
from ...models import AutoencoderKL
|
26
|
+
from ...utils import USE_PEFT_BACKEND, is_ftfy_available, logging, scale_lora_layers, unscale_lora_layers
|
27
|
+
from ..modular_pipeline import ModularPipelineBlocks, PipelineState
|
28
|
+
from ..modular_pipeline_utils import ComponentSpec, ConfigSpec, InputParam, OutputParam
|
29
|
+
from .modular_pipeline import FluxModularPipeline
|
30
|
+
|
31
|
+
|
32
|
+
if is_ftfy_available():
|
33
|
+
import ftfy
|
34
|
+
|
35
|
+
|
36
|
+
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
37
|
+
|
38
|
+
|
39
|
+
def basic_clean(text):
|
40
|
+
text = ftfy.fix_text(text)
|
41
|
+
text = html.unescape(html.unescape(text))
|
42
|
+
return text.strip()
|
43
|
+
|
44
|
+
|
45
|
+
def whitespace_clean(text):
|
46
|
+
text = re.sub(r"\s+", " ", text)
|
47
|
+
text = text.strip()
|
48
|
+
return text
|
49
|
+
|
50
|
+
|
51
|
+
def prompt_clean(text):
|
52
|
+
text = whitespace_clean(basic_clean(text))
|
53
|
+
return text
|
54
|
+
|
55
|
+
|
56
|
+
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img.retrieve_latents
|
57
|
+
def retrieve_latents(
|
58
|
+
encoder_output: torch.Tensor, generator: Optional[torch.Generator] = None, sample_mode: str = "sample"
|
59
|
+
):
|
60
|
+
if hasattr(encoder_output, "latent_dist") and sample_mode == "sample":
|
61
|
+
return encoder_output.latent_dist.sample(generator)
|
62
|
+
elif hasattr(encoder_output, "latent_dist") and sample_mode == "argmax":
|
63
|
+
return encoder_output.latent_dist.mode()
|
64
|
+
elif hasattr(encoder_output, "latents"):
|
65
|
+
return encoder_output.latents
|
66
|
+
else:
|
67
|
+
raise AttributeError("Could not access latents of provided encoder_output")
|
68
|
+
|
69
|
+
|
70
|
+
class FluxVaeEncoderStep(ModularPipelineBlocks):
|
71
|
+
model_name = "flux"
|
72
|
+
|
73
|
+
@property
|
74
|
+
def description(self) -> str:
|
75
|
+
return "Vae Encoder step that encode the input image into a latent representation"
|
76
|
+
|
77
|
+
@property
|
78
|
+
def expected_components(self) -> List[ComponentSpec]:
|
79
|
+
return [
|
80
|
+
ComponentSpec("vae", AutoencoderKL),
|
81
|
+
ComponentSpec(
|
82
|
+
"image_processor",
|
83
|
+
VaeImageProcessor,
|
84
|
+
config=FrozenDict({"vae_scale_factor": 16, "vae_latent_channels": 16}),
|
85
|
+
default_creation_method="from_config",
|
86
|
+
),
|
87
|
+
]
|
88
|
+
|
89
|
+
@property
|
90
|
+
def inputs(self) -> List[InputParam]:
|
91
|
+
return [
|
92
|
+
InputParam("image", required=True),
|
93
|
+
InputParam("height"),
|
94
|
+
InputParam("width"),
|
95
|
+
InputParam("generator"),
|
96
|
+
InputParam("dtype", type_hint=torch.dtype, description="Data type of model tensor inputs"),
|
97
|
+
InputParam(
|
98
|
+
"preprocess_kwargs",
|
99
|
+
type_hint=Optional[dict],
|
100
|
+
description="A kwargs dictionary that if specified is passed along to the `ImageProcessor` as defined under `self.image_processor` in [diffusers.image_processor.VaeImageProcessor]",
|
101
|
+
),
|
102
|
+
]
|
103
|
+
|
104
|
+
@property
|
105
|
+
def intermediate_outputs(self) -> List[OutputParam]:
|
106
|
+
return [
|
107
|
+
OutputParam(
|
108
|
+
"image_latents",
|
109
|
+
type_hint=torch.Tensor,
|
110
|
+
description="The latents representing the reference image for image-to-image/inpainting generation",
|
111
|
+
)
|
112
|
+
]
|
113
|
+
|
114
|
+
@staticmethod
|
115
|
+
# Copied from diffusers.pipelines.stable_diffusion_3.pipeline_stable_diffusion_3_inpaint.StableDiffusion3InpaintPipeline._encode_vae_image with self.vae->vae
|
116
|
+
def _encode_vae_image(vae, image: torch.Tensor, generator: torch.Generator):
|
117
|
+
if isinstance(generator, list):
|
118
|
+
image_latents = [
|
119
|
+
retrieve_latents(vae.encode(image[i : i + 1]), generator=generator[i]) for i in range(image.shape[0])
|
120
|
+
]
|
121
|
+
image_latents = torch.cat(image_latents, dim=0)
|
122
|
+
else:
|
123
|
+
image_latents = retrieve_latents(vae.encode(image), generator=generator)
|
124
|
+
|
125
|
+
image_latents = (image_latents - vae.config.shift_factor) * vae.config.scaling_factor
|
126
|
+
|
127
|
+
return image_latents
|
128
|
+
|
129
|
+
@torch.no_grad()
|
130
|
+
def __call__(self, components: FluxModularPipeline, state: PipelineState) -> PipelineState:
|
131
|
+
block_state = self.get_block_state(state)
|
132
|
+
block_state.preprocess_kwargs = block_state.preprocess_kwargs or {}
|
133
|
+
block_state.device = components._execution_device
|
134
|
+
block_state.dtype = block_state.dtype if block_state.dtype is not None else components.vae.dtype
|
135
|
+
|
136
|
+
block_state.image = components.image_processor.preprocess(
|
137
|
+
block_state.image, height=block_state.height, width=block_state.width, **block_state.preprocess_kwargs
|
138
|
+
)
|
139
|
+
block_state.image = block_state.image.to(device=block_state.device, dtype=block_state.dtype)
|
140
|
+
|
141
|
+
block_state.batch_size = block_state.image.shape[0]
|
142
|
+
|
143
|
+
# if generator is a list, make sure the length of it matches the length of images (both should be batch_size)
|
144
|
+
if isinstance(block_state.generator, list) and len(block_state.generator) != block_state.batch_size:
|
145
|
+
raise ValueError(
|
146
|
+
f"You have passed a list of generators of length {len(block_state.generator)}, but requested an effective batch"
|
147
|
+
f" size of {block_state.batch_size}. Make sure the batch size matches the length of the generators."
|
148
|
+
)
|
149
|
+
|
150
|
+
block_state.image_latents = self._encode_vae_image(
|
151
|
+
components.vae, image=block_state.image, generator=block_state.generator
|
152
|
+
)
|
153
|
+
|
154
|
+
self.set_block_state(state, block_state)
|
155
|
+
|
156
|
+
return components, state
|
157
|
+
|
158
|
+
|
159
|
+
class FluxTextEncoderStep(ModularPipelineBlocks):
|
160
|
+
model_name = "flux"
|
161
|
+
|
162
|
+
@property
|
163
|
+
def description(self) -> str:
|
164
|
+
return "Text Encoder step that generate text_embeddings to guide the video generation"
|
165
|
+
|
166
|
+
@property
|
167
|
+
def expected_components(self) -> List[ComponentSpec]:
|
168
|
+
return [
|
169
|
+
ComponentSpec("text_encoder", CLIPTextModel),
|
170
|
+
ComponentSpec("tokenizer", CLIPTokenizer),
|
171
|
+
ComponentSpec("text_encoder_2", T5EncoderModel),
|
172
|
+
ComponentSpec("tokenizer_2", T5TokenizerFast),
|
173
|
+
]
|
174
|
+
|
175
|
+
@property
|
176
|
+
def expected_configs(self) -> List[ConfigSpec]:
|
177
|
+
return []
|
178
|
+
|
179
|
+
@property
|
180
|
+
def inputs(self) -> List[InputParam]:
|
181
|
+
return [
|
182
|
+
InputParam("prompt"),
|
183
|
+
InputParam("prompt_2"),
|
184
|
+
InputParam("joint_attention_kwargs"),
|
185
|
+
]
|
186
|
+
|
187
|
+
@property
|
188
|
+
def intermediate_outputs(self) -> List[OutputParam]:
|
189
|
+
return [
|
190
|
+
OutputParam(
|
191
|
+
"prompt_embeds",
|
192
|
+
type_hint=torch.Tensor,
|
193
|
+
description="text embeddings used to guide the image generation",
|
194
|
+
),
|
195
|
+
OutputParam(
|
196
|
+
"pooled_prompt_embeds",
|
197
|
+
type_hint=torch.Tensor,
|
198
|
+
description="pooled text embeddings used to guide the image generation",
|
199
|
+
),
|
200
|
+
OutputParam(
|
201
|
+
"text_ids",
|
202
|
+
type_hint=torch.Tensor,
|
203
|
+
description="ids from the text sequence for RoPE",
|
204
|
+
),
|
205
|
+
]
|
206
|
+
|
207
|
+
@staticmethod
|
208
|
+
def check_inputs(block_state):
|
209
|
+
for prompt in [block_state.prompt, block_state.prompt_2]:
|
210
|
+
if prompt is not None and (not isinstance(prompt, str) and not isinstance(prompt, list)):
|
211
|
+
raise ValueError(f"`prompt` or `prompt_2` has to be of type `str` or `list` but is {type(prompt)}")
|
212
|
+
|
213
|
+
@staticmethod
|
214
|
+
def _get_t5_prompt_embeds(
|
215
|
+
components,
|
216
|
+
prompt: Union[str, List[str]],
|
217
|
+
num_images_per_prompt: int,
|
218
|
+
max_sequence_length: int,
|
219
|
+
device: torch.device,
|
220
|
+
):
|
221
|
+
dtype = components.text_encoder_2.dtype
|
222
|
+
|
223
|
+
prompt = [prompt] if isinstance(prompt, str) else prompt
|
224
|
+
batch_size = len(prompt)
|
225
|
+
|
226
|
+
if isinstance(components, TextualInversionLoaderMixin):
|
227
|
+
prompt = components.maybe_convert_prompt(prompt, components.tokenizer_2)
|
228
|
+
|
229
|
+
text_inputs = components.tokenizer_2(
|
230
|
+
prompt,
|
231
|
+
padding="max_length",
|
232
|
+
max_length=max_sequence_length,
|
233
|
+
truncation=True,
|
234
|
+
return_length=False,
|
235
|
+
return_overflowing_tokens=False,
|
236
|
+
return_tensors="pt",
|
237
|
+
)
|
238
|
+
text_input_ids = text_inputs.input_ids
|
239
|
+
|
240
|
+
untruncated_ids = components.tokenizer_2(prompt, padding="longest", return_tensors="pt").input_ids
|
241
|
+
if untruncated_ids.shape[-1] >= text_input_ids.shape[-1] and not torch.equal(text_input_ids, untruncated_ids):
|
242
|
+
removed_text = components.tokenizer_2.batch_decode(untruncated_ids[:, max_sequence_length - 1 : -1])
|
243
|
+
logger.warning(
|
244
|
+
"The following part of your input was truncated because `max_sequence_length` is set to "
|
245
|
+
f" {max_sequence_length} tokens: {removed_text}"
|
246
|
+
)
|
247
|
+
|
248
|
+
prompt_embeds = components.text_encoder_2(text_input_ids.to(device), output_hidden_states=False)[0]
|
249
|
+
prompt_embeds = prompt_embeds.to(dtype=dtype, device=device)
|
250
|
+
_, seq_len, _ = prompt_embeds.shape
|
251
|
+
|
252
|
+
# duplicate text embeddings and attention mask for each generation per prompt, using mps friendly method
|
253
|
+
prompt_embeds = prompt_embeds.repeat(1, num_images_per_prompt, 1)
|
254
|
+
prompt_embeds = prompt_embeds.view(batch_size * num_images_per_prompt, seq_len, -1)
|
255
|
+
|
256
|
+
return prompt_embeds
|
257
|
+
|
258
|
+
@staticmethod
|
259
|
+
def _get_clip_prompt_embeds(
|
260
|
+
components,
|
261
|
+
prompt: Union[str, List[str]],
|
262
|
+
num_images_per_prompt: int,
|
263
|
+
device: torch.device,
|
264
|
+
):
|
265
|
+
prompt = [prompt] if isinstance(prompt, str) else prompt
|
266
|
+
batch_size = len(prompt)
|
267
|
+
|
268
|
+
if isinstance(components, TextualInversionLoaderMixin):
|
269
|
+
prompt = components.maybe_convert_prompt(prompt, components.tokenizer)
|
270
|
+
|
271
|
+
text_inputs = components.tokenizer(
|
272
|
+
prompt,
|
273
|
+
padding="max_length",
|
274
|
+
max_length=components.tokenizer.model_max_length,
|
275
|
+
truncation=True,
|
276
|
+
return_overflowing_tokens=False,
|
277
|
+
return_length=False,
|
278
|
+
return_tensors="pt",
|
279
|
+
)
|
280
|
+
|
281
|
+
text_input_ids = text_inputs.input_ids
|
282
|
+
tokenizer_max_length = components.tokenizer.model_max_length
|
283
|
+
untruncated_ids = components.tokenizer(prompt, padding="longest", return_tensors="pt").input_ids
|
284
|
+
if untruncated_ids.shape[-1] >= text_input_ids.shape[-1] and not torch.equal(text_input_ids, untruncated_ids):
|
285
|
+
removed_text = components.tokenizer.batch_decode(untruncated_ids[:, tokenizer_max_length - 1 : -1])
|
286
|
+
logger.warning(
|
287
|
+
"The following part of your input was truncated because CLIP can only handle sequences up to"
|
288
|
+
f" {tokenizer_max_length} tokens: {removed_text}"
|
289
|
+
)
|
290
|
+
prompt_embeds = components.text_encoder(text_input_ids.to(device), output_hidden_states=False)
|
291
|
+
|
292
|
+
# Use pooled output of CLIPTextModel
|
293
|
+
prompt_embeds = prompt_embeds.pooler_output
|
294
|
+
prompt_embeds = prompt_embeds.to(dtype=components.text_encoder.dtype, device=device)
|
295
|
+
|
296
|
+
# duplicate text embeddings for each generation per prompt, using mps friendly method
|
297
|
+
prompt_embeds = prompt_embeds.repeat(1, num_images_per_prompt)
|
298
|
+
prompt_embeds = prompt_embeds.view(batch_size * num_images_per_prompt, -1)
|
299
|
+
|
300
|
+
return prompt_embeds
|
301
|
+
|
302
|
+
@staticmethod
|
303
|
+
def encode_prompt(
|
304
|
+
components,
|
305
|
+
prompt: Union[str, List[str]],
|
306
|
+
prompt_2: Union[str, List[str]],
|
307
|
+
device: Optional[torch.device] = None,
|
308
|
+
num_images_per_prompt: int = 1,
|
309
|
+
prompt_embeds: Optional[torch.FloatTensor] = None,
|
310
|
+
pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
|
311
|
+
max_sequence_length: int = 512,
|
312
|
+
lora_scale: Optional[float] = None,
|
313
|
+
):
|
314
|
+
r"""
|
315
|
+
Encodes the prompt into text encoder hidden states.
|
316
|
+
|
317
|
+
Args:
|
318
|
+
prompt (`str` or `List[str]`, *optional*):
|
319
|
+
prompt to be encoded
|
320
|
+
prompt_2 (`str` or `List[str]`, *optional*):
|
321
|
+
The prompt or prompts to be sent to the `tokenizer_2` and `text_encoder_2`. If not defined, `prompt` is
|
322
|
+
used in all text-encoders
|
323
|
+
device: (`torch.device`):
|
324
|
+
torch device
|
325
|
+
num_images_per_prompt (`int`):
|
326
|
+
number of images that should be generated per prompt
|
327
|
+
prompt_embeds (`torch.FloatTensor`, *optional*):
|
328
|
+
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
|
329
|
+
provided, text embeddings will be generated from `prompt` input argument.
|
330
|
+
pooled_prompt_embeds (`torch.FloatTensor`, *optional*):
|
331
|
+
Pre-generated pooled text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting.
|
332
|
+
If not provided, pooled text embeddings will be generated from `prompt` input argument.
|
333
|
+
lora_scale (`float`, *optional*):
|
334
|
+
A lora scale that will be applied to all LoRA layers of the text encoder if LoRA layers are loaded.
|
335
|
+
"""
|
336
|
+
device = device or components._execution_device
|
337
|
+
|
338
|
+
# set lora scale so that monkey patched LoRA
|
339
|
+
# function of text encoder can correctly access it
|
340
|
+
if lora_scale is not None and isinstance(components, FluxLoraLoaderMixin):
|
341
|
+
components._lora_scale = lora_scale
|
342
|
+
|
343
|
+
# dynamically adjust the LoRA scale
|
344
|
+
if components.text_encoder is not None and USE_PEFT_BACKEND:
|
345
|
+
scale_lora_layers(components.text_encoder, lora_scale)
|
346
|
+
if components.text_encoder_2 is not None and USE_PEFT_BACKEND:
|
347
|
+
scale_lora_layers(components.text_encoder_2, lora_scale)
|
348
|
+
|
349
|
+
prompt = [prompt] if isinstance(prompt, str) else prompt
|
350
|
+
|
351
|
+
if prompt_embeds is None:
|
352
|
+
prompt_2 = prompt_2 or prompt
|
353
|
+
prompt_2 = [prompt_2] if isinstance(prompt_2, str) else prompt_2
|
354
|
+
|
355
|
+
# We only use the pooled prompt output from the CLIPTextModel
|
356
|
+
pooled_prompt_embeds = FluxTextEncoderStep._get_clip_prompt_embeds(
|
357
|
+
components,
|
358
|
+
prompt=prompt,
|
359
|
+
device=device,
|
360
|
+
num_images_per_prompt=num_images_per_prompt,
|
361
|
+
)
|
362
|
+
prompt_embeds = FluxTextEncoderStep._get_t5_prompt_embeds(
|
363
|
+
components,
|
364
|
+
prompt=prompt_2,
|
365
|
+
num_images_per_prompt=num_images_per_prompt,
|
366
|
+
max_sequence_length=max_sequence_length,
|
367
|
+
device=device,
|
368
|
+
)
|
369
|
+
|
370
|
+
if components.text_encoder is not None:
|
371
|
+
if isinstance(components, FluxLoraLoaderMixin) and USE_PEFT_BACKEND:
|
372
|
+
# Retrieve the original scale by scaling back the LoRA layers
|
373
|
+
unscale_lora_layers(components.text_encoder, lora_scale)
|
374
|
+
|
375
|
+
if components.text_encoder_2 is not None:
|
376
|
+
if isinstance(components, FluxLoraLoaderMixin) and USE_PEFT_BACKEND:
|
377
|
+
# Retrieve the original scale by scaling back the LoRA layers
|
378
|
+
unscale_lora_layers(components.text_encoder_2, lora_scale)
|
379
|
+
|
380
|
+
dtype = components.text_encoder.dtype if components.text_encoder is not None else torch.bfloat16
|
381
|
+
text_ids = torch.zeros(prompt_embeds.shape[1], 3).to(device=device, dtype=dtype)
|
382
|
+
|
383
|
+
return prompt_embeds, pooled_prompt_embeds, text_ids
|
384
|
+
|
385
|
+
@torch.no_grad()
|
386
|
+
def __call__(self, components: FluxModularPipeline, state: PipelineState) -> PipelineState:
|
387
|
+
# Get inputs and intermediates
|
388
|
+
block_state = self.get_block_state(state)
|
389
|
+
self.check_inputs(block_state)
|
390
|
+
|
391
|
+
block_state.device = components._execution_device
|
392
|
+
|
393
|
+
# Encode input prompt
|
394
|
+
block_state.text_encoder_lora_scale = (
|
395
|
+
block_state.joint_attention_kwargs.get("scale", None)
|
396
|
+
if block_state.joint_attention_kwargs is not None
|
397
|
+
else None
|
398
|
+
)
|
399
|
+
(block_state.prompt_embeds, block_state.pooled_prompt_embeds, block_state.text_ids) = self.encode_prompt(
|
400
|
+
components,
|
401
|
+
prompt=block_state.prompt,
|
402
|
+
prompt_2=None,
|
403
|
+
prompt_embeds=None,
|
404
|
+
pooled_prompt_embeds=None,
|
405
|
+
device=block_state.device,
|
406
|
+
num_images_per_prompt=1, # TODO: hardcoded for now.
|
407
|
+
lora_scale=block_state.text_encoder_lora_scale,
|
408
|
+
)
|
409
|
+
|
410
|
+
# Add outputs
|
411
|
+
self.set_block_state(state, block_state)
|
412
|
+
return components, state
|
@@ -0,0 +1,181 @@
|
|
1
|
+
# Copyright 2025 The HuggingFace Team. All rights reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
from ...utils import logging
|
16
|
+
from ..modular_pipeline import AutoPipelineBlocks, SequentialPipelineBlocks
|
17
|
+
from ..modular_pipeline_utils import InsertableDict
|
18
|
+
from .before_denoise import (
|
19
|
+
FluxImg2ImgPrepareLatentsStep,
|
20
|
+
FluxImg2ImgSetTimestepsStep,
|
21
|
+
FluxInputStep,
|
22
|
+
FluxPrepareLatentsStep,
|
23
|
+
FluxSetTimestepsStep,
|
24
|
+
)
|
25
|
+
from .decoders import FluxDecodeStep
|
26
|
+
from .denoise import FluxDenoiseStep
|
27
|
+
from .encoders import FluxTextEncoderStep, FluxVaeEncoderStep
|
28
|
+
|
29
|
+
|
30
|
+
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
31
|
+
|
32
|
+
|
33
|
+
# vae encoder (run before before_denoise)
|
34
|
+
class FluxAutoVaeEncoderStep(AutoPipelineBlocks):
|
35
|
+
block_classes = [FluxVaeEncoderStep]
|
36
|
+
block_names = ["img2img"]
|
37
|
+
block_trigger_inputs = ["image"]
|
38
|
+
|
39
|
+
@property
|
40
|
+
def description(self):
|
41
|
+
return (
|
42
|
+
"Vae encoder step that encode the image inputs into their latent representations.\n"
|
43
|
+
+ "This is an auto pipeline block that works for img2img tasks.\n"
|
44
|
+
+ " - `FluxVaeEncoderStep` (img2img) is used when only `image` is provided."
|
45
|
+
+ " - if `image` is provided, step will be skipped."
|
46
|
+
)
|
47
|
+
|
48
|
+
|
49
|
+
# before_denoise: text2img, img2img
|
50
|
+
class FluxBeforeDenoiseStep(SequentialPipelineBlocks):
|
51
|
+
block_classes = [
|
52
|
+
FluxInputStep,
|
53
|
+
FluxPrepareLatentsStep,
|
54
|
+
FluxSetTimestepsStep,
|
55
|
+
]
|
56
|
+
block_names = ["input", "prepare_latents", "set_timesteps"]
|
57
|
+
|
58
|
+
@property
|
59
|
+
def description(self):
|
60
|
+
return (
|
61
|
+
"Before denoise step that prepare the inputs for the denoise step.\n"
|
62
|
+
+ "This is a sequential pipeline blocks:\n"
|
63
|
+
+ " - `FluxInputStep` is used to adjust the batch size of the model inputs\n"
|
64
|
+
+ " - `FluxPrepareLatentsStep` is used to prepare the latents\n"
|
65
|
+
+ " - `FluxSetTimestepsStep` is used to set the timesteps\n"
|
66
|
+
)
|
67
|
+
|
68
|
+
|
69
|
+
# before_denoise: img2img
|
70
|
+
class FluxImg2ImgBeforeDenoiseStep(SequentialPipelineBlocks):
|
71
|
+
block_classes = [FluxInputStep, FluxImg2ImgSetTimestepsStep, FluxImg2ImgPrepareLatentsStep]
|
72
|
+
block_names = ["input", "set_timesteps", "prepare_latents"]
|
73
|
+
|
74
|
+
@property
|
75
|
+
def description(self):
|
76
|
+
return (
|
77
|
+
"Before denoise step that prepare the inputs for the denoise step for img2img task.\n"
|
78
|
+
+ "This is a sequential pipeline blocks:\n"
|
79
|
+
+ " - `FluxInputStep` is used to adjust the batch size of the model inputs\n"
|
80
|
+
+ " - `FluxImg2ImgSetTimestepsStep` is used to set the timesteps\n"
|
81
|
+
+ " - `FluxImg2ImgPrepareLatentsStep` is used to prepare the latents\n"
|
82
|
+
)
|
83
|
+
|
84
|
+
|
85
|
+
# before_denoise: all task (text2img, img2img)
|
86
|
+
class FluxAutoBeforeDenoiseStep(AutoPipelineBlocks):
|
87
|
+
block_classes = [FluxBeforeDenoiseStep, FluxImg2ImgBeforeDenoiseStep]
|
88
|
+
block_names = ["text2image", "img2img"]
|
89
|
+
block_trigger_inputs = [None, "image_latents"]
|
90
|
+
|
91
|
+
@property
|
92
|
+
def description(self):
|
93
|
+
return (
|
94
|
+
"Before denoise step that prepare the inputs for the denoise step.\n"
|
95
|
+
+ "This is an auto pipeline block that works for text2image.\n"
|
96
|
+
+ " - `FluxBeforeDenoiseStep` (text2image) is used.\n"
|
97
|
+
+ " - `FluxImg2ImgBeforeDenoiseStep` (img2img) is used when only `image_latents` is provided.\n"
|
98
|
+
)
|
99
|
+
|
100
|
+
|
101
|
+
# denoise: text2image
|
102
|
+
class FluxAutoDenoiseStep(AutoPipelineBlocks):
|
103
|
+
block_classes = [FluxDenoiseStep]
|
104
|
+
block_names = ["denoise"]
|
105
|
+
block_trigger_inputs = [None]
|
106
|
+
|
107
|
+
@property
|
108
|
+
def description(self) -> str:
|
109
|
+
return (
|
110
|
+
"Denoise step that iteratively denoise the latents. "
|
111
|
+
"This is a auto pipeline block that works for text2image and img2img tasks."
|
112
|
+
" - `FluxDenoiseStep` (denoise) for text2image and img2img tasks."
|
113
|
+
)
|
114
|
+
|
115
|
+
|
116
|
+
# decode: all task (text2img, img2img, inpainting)
|
117
|
+
class FluxAutoDecodeStep(AutoPipelineBlocks):
|
118
|
+
block_classes = [FluxDecodeStep]
|
119
|
+
block_names = ["non-inpaint"]
|
120
|
+
block_trigger_inputs = [None]
|
121
|
+
|
122
|
+
@property
|
123
|
+
def description(self):
|
124
|
+
return "Decode step that decode the denoised latents into image outputs.\n - `FluxDecodeStep`"
|
125
|
+
|
126
|
+
|
127
|
+
# text2image
|
128
|
+
class FluxAutoBlocks(SequentialPipelineBlocks):
|
129
|
+
block_classes = [
|
130
|
+
FluxTextEncoderStep,
|
131
|
+
FluxAutoVaeEncoderStep,
|
132
|
+
FluxAutoBeforeDenoiseStep,
|
133
|
+
FluxAutoDenoiseStep,
|
134
|
+
FluxAutoDecodeStep,
|
135
|
+
]
|
136
|
+
block_names = ["text_encoder", "image_encoder", "before_denoise", "denoise", "decoder"]
|
137
|
+
|
138
|
+
@property
|
139
|
+
def description(self):
|
140
|
+
return (
|
141
|
+
"Auto Modular pipeline for text-to-image and image-to-image using Flux.\n"
|
142
|
+
+ "- for text-to-image generation, all you need to provide is `prompt`\n"
|
143
|
+
+ "- for image-to-image generation, you need to provide either `image` or `image_latents`"
|
144
|
+
)
|
145
|
+
|
146
|
+
|
147
|
+
TEXT2IMAGE_BLOCKS = InsertableDict(
|
148
|
+
[
|
149
|
+
("text_encoder", FluxTextEncoderStep),
|
150
|
+
("input", FluxInputStep),
|
151
|
+
("set_timesteps", FluxSetTimestepsStep),
|
152
|
+
("prepare_latents", FluxPrepareLatentsStep),
|
153
|
+
("denoise", FluxDenoiseStep),
|
154
|
+
("decode", FluxDecodeStep),
|
155
|
+
]
|
156
|
+
)
|
157
|
+
|
158
|
+
IMAGE2IMAGE_BLOCKS = InsertableDict(
|
159
|
+
[
|
160
|
+
("text_encoder", FluxTextEncoderStep),
|
161
|
+
("image_encoder", FluxVaeEncoderStep),
|
162
|
+
("input", FluxInputStep),
|
163
|
+
("set_timesteps", FluxImg2ImgSetTimestepsStep),
|
164
|
+
("prepare_latents", FluxImg2ImgPrepareLatentsStep),
|
165
|
+
("denoise", FluxDenoiseStep),
|
166
|
+
("decode", FluxDecodeStep),
|
167
|
+
]
|
168
|
+
)
|
169
|
+
|
170
|
+
AUTO_BLOCKS = InsertableDict(
|
171
|
+
[
|
172
|
+
("text_encoder", FluxTextEncoderStep),
|
173
|
+
("image_encoder", FluxAutoVaeEncoderStep),
|
174
|
+
("before_denoise", FluxAutoBeforeDenoiseStep),
|
175
|
+
("denoise", FluxAutoDenoiseStep),
|
176
|
+
("decode", FluxAutoDecodeStep),
|
177
|
+
]
|
178
|
+
)
|
179
|
+
|
180
|
+
|
181
|
+
ALL_BLOCKS = {"text2image": TEXT2IMAGE_BLOCKS, "img2img": IMAGE2IMAGE_BLOCKS, "auto": AUTO_BLOCKS}
|
@@ -0,0 +1,59 @@
|
|
1
|
+
# Copyright 2025 The HuggingFace Team. All rights reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
|
16
|
+
from ...loaders import FluxLoraLoaderMixin, TextualInversionLoaderMixin
|
17
|
+
from ...utils import logging
|
18
|
+
from ..modular_pipeline import ModularPipeline
|
19
|
+
|
20
|
+
|
21
|
+
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
22
|
+
|
23
|
+
|
24
|
+
class FluxModularPipeline(ModularPipeline, FluxLoraLoaderMixin, TextualInversionLoaderMixin):
|
25
|
+
"""
|
26
|
+
A ModularPipeline for Flux.
|
27
|
+
|
28
|
+
<Tip warning={true}>
|
29
|
+
|
30
|
+
This is an experimental feature and is likely to change in the future.
|
31
|
+
|
32
|
+
</Tip>
|
33
|
+
"""
|
34
|
+
|
35
|
+
@property
|
36
|
+
def default_height(self):
|
37
|
+
return self.default_sample_size * self.vae_scale_factor
|
38
|
+
|
39
|
+
@property
|
40
|
+
def default_width(self):
|
41
|
+
return self.default_sample_size * self.vae_scale_factor
|
42
|
+
|
43
|
+
@property
|
44
|
+
def default_sample_size(self):
|
45
|
+
return 128
|
46
|
+
|
47
|
+
@property
|
48
|
+
def vae_scale_factor(self):
|
49
|
+
vae_scale_factor = 8
|
50
|
+
if getattr(self, "vae", None) is not None:
|
51
|
+
vae_scale_factor = 2 ** (len(self.vae.config.block_out_channels) - 1)
|
52
|
+
return vae_scale_factor
|
53
|
+
|
54
|
+
@property
|
55
|
+
def num_channels_latents(self):
|
56
|
+
num_channels_latents = 16
|
57
|
+
if getattr(self, "transformer", None):
|
58
|
+
num_channels_latents = self.transformer.config.in_channels // 4
|
59
|
+
return num_channels_latents
|