diffusers 0.33.1__py3-none-any.whl → 0.35.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diffusers/__init__.py +145 -1
- diffusers/callbacks.py +35 -0
- diffusers/commands/__init__.py +1 -1
- diffusers/commands/custom_blocks.py +134 -0
- diffusers/commands/diffusers_cli.py +3 -1
- diffusers/commands/env.py +1 -1
- diffusers/commands/fp16_safetensors.py +2 -2
- diffusers/configuration_utils.py +11 -2
- diffusers/dependency_versions_check.py +1 -1
- diffusers/dependency_versions_table.py +3 -3
- diffusers/experimental/rl/value_guided_sampling.py +1 -1
- diffusers/guiders/__init__.py +41 -0
- diffusers/guiders/adaptive_projected_guidance.py +188 -0
- diffusers/guiders/auto_guidance.py +190 -0
- diffusers/guiders/classifier_free_guidance.py +141 -0
- diffusers/guiders/classifier_free_zero_star_guidance.py +152 -0
- diffusers/guiders/frequency_decoupled_guidance.py +327 -0
- diffusers/guiders/guider_utils.py +309 -0
- diffusers/guiders/perturbed_attention_guidance.py +271 -0
- diffusers/guiders/skip_layer_guidance.py +262 -0
- diffusers/guiders/smoothed_energy_guidance.py +251 -0
- diffusers/guiders/tangential_classifier_free_guidance.py +143 -0
- diffusers/hooks/__init__.py +17 -0
- diffusers/hooks/_common.py +56 -0
- diffusers/hooks/_helpers.py +293 -0
- diffusers/hooks/faster_cache.py +9 -8
- diffusers/hooks/first_block_cache.py +259 -0
- diffusers/hooks/group_offloading.py +332 -227
- diffusers/hooks/hooks.py +58 -3
- diffusers/hooks/layer_skip.py +263 -0
- diffusers/hooks/layerwise_casting.py +5 -10
- diffusers/hooks/pyramid_attention_broadcast.py +15 -12
- diffusers/hooks/smoothed_energy_guidance_utils.py +167 -0
- diffusers/hooks/utils.py +43 -0
- diffusers/image_processor.py +7 -2
- diffusers/loaders/__init__.py +10 -0
- diffusers/loaders/ip_adapter.py +260 -18
- diffusers/loaders/lora_base.py +261 -127
- diffusers/loaders/lora_conversion_utils.py +657 -35
- diffusers/loaders/lora_pipeline.py +2778 -1246
- diffusers/loaders/peft.py +78 -112
- diffusers/loaders/single_file.py +2 -2
- diffusers/loaders/single_file_model.py +64 -15
- diffusers/loaders/single_file_utils.py +395 -7
- diffusers/loaders/textual_inversion.py +3 -2
- diffusers/loaders/transformer_flux.py +10 -11
- diffusers/loaders/transformer_sd3.py +8 -3
- diffusers/loaders/unet.py +24 -21
- diffusers/loaders/unet_loader_utils.py +6 -3
- diffusers/loaders/utils.py +1 -1
- diffusers/models/__init__.py +23 -1
- diffusers/models/activations.py +5 -5
- diffusers/models/adapter.py +2 -3
- diffusers/models/attention.py +488 -7
- diffusers/models/attention_dispatch.py +1218 -0
- diffusers/models/attention_flax.py +10 -10
- diffusers/models/attention_processor.py +113 -667
- diffusers/models/auto_model.py +49 -12
- diffusers/models/autoencoders/__init__.py +2 -0
- diffusers/models/autoencoders/autoencoder_asym_kl.py +4 -4
- diffusers/models/autoencoders/autoencoder_dc.py +17 -4
- diffusers/models/autoencoders/autoencoder_kl.py +5 -5
- diffusers/models/autoencoders/autoencoder_kl_allegro.py +4 -4
- diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +6 -6
- diffusers/models/autoencoders/autoencoder_kl_cosmos.py +1110 -0
- diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py +2 -2
- diffusers/models/autoencoders/autoencoder_kl_ltx.py +3 -3
- diffusers/models/autoencoders/autoencoder_kl_magvit.py +4 -4
- diffusers/models/autoencoders/autoencoder_kl_mochi.py +3 -3
- diffusers/models/autoencoders/autoencoder_kl_qwenimage.py +1070 -0
- diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +4 -4
- diffusers/models/autoencoders/autoencoder_kl_wan.py +626 -62
- diffusers/models/autoencoders/autoencoder_oobleck.py +1 -1
- diffusers/models/autoencoders/autoencoder_tiny.py +3 -3
- diffusers/models/autoencoders/consistency_decoder_vae.py +1 -1
- diffusers/models/autoencoders/vae.py +13 -2
- diffusers/models/autoencoders/vq_model.py +2 -2
- diffusers/models/cache_utils.py +32 -10
- diffusers/models/controlnet.py +1 -1
- diffusers/models/controlnet_flux.py +1 -1
- diffusers/models/controlnet_sd3.py +1 -1
- diffusers/models/controlnet_sparsectrl.py +1 -1
- diffusers/models/controlnets/__init__.py +1 -0
- diffusers/models/controlnets/controlnet.py +3 -3
- diffusers/models/controlnets/controlnet_flax.py +1 -1
- diffusers/models/controlnets/controlnet_flux.py +21 -20
- diffusers/models/controlnets/controlnet_hunyuan.py +2 -2
- diffusers/models/controlnets/controlnet_sana.py +290 -0
- diffusers/models/controlnets/controlnet_sd3.py +1 -1
- diffusers/models/controlnets/controlnet_sparsectrl.py +2 -2
- diffusers/models/controlnets/controlnet_union.py +5 -5
- diffusers/models/controlnets/controlnet_xs.py +7 -7
- diffusers/models/controlnets/multicontrolnet.py +4 -5
- diffusers/models/controlnets/multicontrolnet_union.py +5 -6
- diffusers/models/downsampling.py +2 -2
- diffusers/models/embeddings.py +36 -46
- diffusers/models/embeddings_flax.py +2 -2
- diffusers/models/lora.py +3 -3
- diffusers/models/model_loading_utils.py +233 -1
- diffusers/models/modeling_flax_utils.py +1 -2
- diffusers/models/modeling_utils.py +203 -108
- diffusers/models/normalization.py +4 -4
- diffusers/models/resnet.py +2 -2
- diffusers/models/resnet_flax.py +1 -1
- diffusers/models/transformers/__init__.py +7 -0
- diffusers/models/transformers/auraflow_transformer_2d.py +70 -24
- diffusers/models/transformers/cogvideox_transformer_3d.py +1 -1
- diffusers/models/transformers/consisid_transformer_3d.py +1 -1
- diffusers/models/transformers/dit_transformer_2d.py +2 -2
- diffusers/models/transformers/dual_transformer_2d.py +1 -1
- diffusers/models/transformers/hunyuan_transformer_2d.py +2 -2
- diffusers/models/transformers/latte_transformer_3d.py +4 -5
- diffusers/models/transformers/lumina_nextdit2d.py +2 -2
- diffusers/models/transformers/pixart_transformer_2d.py +3 -3
- diffusers/models/transformers/prior_transformer.py +1 -1
- diffusers/models/transformers/sana_transformer.py +8 -3
- diffusers/models/transformers/stable_audio_transformer.py +5 -9
- diffusers/models/transformers/t5_film_transformer.py +3 -3
- diffusers/models/transformers/transformer_2d.py +1 -1
- diffusers/models/transformers/transformer_allegro.py +1 -1
- diffusers/models/transformers/transformer_chroma.py +641 -0
- diffusers/models/transformers/transformer_cogview3plus.py +5 -10
- diffusers/models/transformers/transformer_cogview4.py +353 -27
- diffusers/models/transformers/transformer_cosmos.py +586 -0
- diffusers/models/transformers/transformer_flux.py +376 -138
- diffusers/models/transformers/transformer_hidream_image.py +942 -0
- diffusers/models/transformers/transformer_hunyuan_video.py +12 -8
- diffusers/models/transformers/transformer_hunyuan_video_framepack.py +416 -0
- diffusers/models/transformers/transformer_ltx.py +105 -24
- diffusers/models/transformers/transformer_lumina2.py +1 -1
- diffusers/models/transformers/transformer_mochi.py +1 -1
- diffusers/models/transformers/transformer_omnigen.py +2 -2
- diffusers/models/transformers/transformer_qwenimage.py +645 -0
- diffusers/models/transformers/transformer_sd3.py +7 -7
- diffusers/models/transformers/transformer_skyreels_v2.py +607 -0
- diffusers/models/transformers/transformer_temporal.py +1 -1
- diffusers/models/transformers/transformer_wan.py +316 -87
- diffusers/models/transformers/transformer_wan_vace.py +387 -0
- diffusers/models/unets/unet_1d.py +1 -1
- diffusers/models/unets/unet_1d_blocks.py +1 -1
- diffusers/models/unets/unet_2d.py +1 -1
- diffusers/models/unets/unet_2d_blocks.py +1 -1
- diffusers/models/unets/unet_2d_blocks_flax.py +8 -7
- diffusers/models/unets/unet_2d_condition.py +4 -3
- diffusers/models/unets/unet_2d_condition_flax.py +2 -2
- diffusers/models/unets/unet_3d_blocks.py +1 -1
- diffusers/models/unets/unet_3d_condition.py +3 -3
- diffusers/models/unets/unet_i2vgen_xl.py +3 -3
- diffusers/models/unets/unet_kandinsky3.py +1 -1
- diffusers/models/unets/unet_motion_model.py +2 -2
- diffusers/models/unets/unet_stable_cascade.py +1 -1
- diffusers/models/upsampling.py +2 -2
- diffusers/models/vae_flax.py +2 -2
- diffusers/models/vq_model.py +1 -1
- diffusers/modular_pipelines/__init__.py +83 -0
- diffusers/modular_pipelines/components_manager.py +1068 -0
- diffusers/modular_pipelines/flux/__init__.py +66 -0
- diffusers/modular_pipelines/flux/before_denoise.py +689 -0
- diffusers/modular_pipelines/flux/decoders.py +109 -0
- diffusers/modular_pipelines/flux/denoise.py +227 -0
- diffusers/modular_pipelines/flux/encoders.py +412 -0
- diffusers/modular_pipelines/flux/modular_blocks.py +181 -0
- diffusers/modular_pipelines/flux/modular_pipeline.py +59 -0
- diffusers/modular_pipelines/modular_pipeline.py +2446 -0
- diffusers/modular_pipelines/modular_pipeline_utils.py +672 -0
- diffusers/modular_pipelines/node_utils.py +665 -0
- diffusers/modular_pipelines/stable_diffusion_xl/__init__.py +77 -0
- diffusers/modular_pipelines/stable_diffusion_xl/before_denoise.py +1874 -0
- diffusers/modular_pipelines/stable_diffusion_xl/decoders.py +208 -0
- diffusers/modular_pipelines/stable_diffusion_xl/denoise.py +771 -0
- diffusers/modular_pipelines/stable_diffusion_xl/encoders.py +887 -0
- diffusers/modular_pipelines/stable_diffusion_xl/modular_blocks.py +380 -0
- diffusers/modular_pipelines/stable_diffusion_xl/modular_pipeline.py +365 -0
- diffusers/modular_pipelines/wan/__init__.py +66 -0
- diffusers/modular_pipelines/wan/before_denoise.py +365 -0
- diffusers/modular_pipelines/wan/decoders.py +105 -0
- diffusers/modular_pipelines/wan/denoise.py +261 -0
- diffusers/modular_pipelines/wan/encoders.py +242 -0
- diffusers/modular_pipelines/wan/modular_blocks.py +144 -0
- diffusers/modular_pipelines/wan/modular_pipeline.py +90 -0
- diffusers/pipelines/__init__.py +68 -6
- diffusers/pipelines/allegro/pipeline_allegro.py +11 -11
- diffusers/pipelines/amused/pipeline_amused.py +7 -6
- diffusers/pipelines/amused/pipeline_amused_img2img.py +6 -5
- diffusers/pipelines/amused/pipeline_amused_inpaint.py +6 -5
- diffusers/pipelines/animatediff/pipeline_animatediff.py +6 -6
- diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +6 -6
- diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +16 -15
- diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +6 -6
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +5 -5
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +5 -5
- diffusers/pipelines/audioldm/pipeline_audioldm.py +8 -7
- diffusers/pipelines/audioldm2/modeling_audioldm2.py +1 -1
- diffusers/pipelines/audioldm2/pipeline_audioldm2.py +22 -13
- diffusers/pipelines/aura_flow/pipeline_aura_flow.py +48 -11
- diffusers/pipelines/auto_pipeline.py +23 -20
- diffusers/pipelines/blip_diffusion/modeling_blip2.py +1 -1
- diffusers/pipelines/blip_diffusion/modeling_ctx_clip.py +2 -2
- diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +11 -10
- diffusers/pipelines/chroma/__init__.py +49 -0
- diffusers/pipelines/chroma/pipeline_chroma.py +949 -0
- diffusers/pipelines/chroma/pipeline_chroma_img2img.py +1034 -0
- diffusers/pipelines/chroma/pipeline_output.py +21 -0
- diffusers/pipelines/cogvideo/pipeline_cogvideox.py +17 -16
- diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +17 -16
- diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +18 -17
- diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +17 -16
- diffusers/pipelines/cogview3/pipeline_cogview3plus.py +9 -9
- diffusers/pipelines/cogview4/pipeline_cogview4.py +23 -22
- diffusers/pipelines/cogview4/pipeline_cogview4_control.py +7 -7
- diffusers/pipelines/consisid/consisid_utils.py +2 -2
- diffusers/pipelines/consisid/pipeline_consisid.py +8 -8
- diffusers/pipelines/consistency_models/pipeline_consistency_models.py +1 -1
- diffusers/pipelines/controlnet/pipeline_controlnet.py +7 -7
- diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +11 -10
- diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +7 -7
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +7 -7
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +14 -14
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +10 -6
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +13 -13
- diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +226 -107
- diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +12 -8
- diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +207 -105
- diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +1 -1
- diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +8 -8
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +7 -7
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +7 -7
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +12 -10
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +9 -7
- diffusers/pipelines/cosmos/__init__.py +54 -0
- diffusers/pipelines/cosmos/pipeline_cosmos2_text2image.py +673 -0
- diffusers/pipelines/cosmos/pipeline_cosmos2_video2world.py +792 -0
- diffusers/pipelines/cosmos/pipeline_cosmos_text2world.py +664 -0
- diffusers/pipelines/cosmos/pipeline_cosmos_video2world.py +826 -0
- diffusers/pipelines/cosmos/pipeline_output.py +40 -0
- diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py +5 -4
- diffusers/pipelines/ddim/pipeline_ddim.py +4 -4
- diffusers/pipelines/ddpm/pipeline_ddpm.py +1 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +10 -10
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +8 -8
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +5 -5
- diffusers/pipelines/deprecated/audio_diffusion/mel.py +1 -1
- diffusers/pipelines/deprecated/audio_diffusion/pipeline_audio_diffusion.py +3 -3
- diffusers/pipelines/deprecated/latent_diffusion_uncond/pipeline_latent_diffusion_uncond.py +1 -1
- diffusers/pipelines/deprecated/pndm/pipeline_pndm.py +2 -2
- diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +4 -3
- diffusers/pipelines/deprecated/score_sde_ve/pipeline_score_sde_ve.py +1 -1
- diffusers/pipelines/deprecated/spectrogram_diffusion/continuous_encoder.py +1 -1
- diffusers/pipelines/deprecated/spectrogram_diffusion/midi_utils.py +1 -1
- diffusers/pipelines/deprecated/spectrogram_diffusion/notes_encoder.py +1 -1
- diffusers/pipelines/deprecated/spectrogram_diffusion/pipeline_spectrogram_diffusion.py +1 -1
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +8 -8
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_onnx_stable_diffusion_inpaint_legacy.py +9 -9
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +10 -10
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +10 -8
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +5 -5
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +18 -18
- diffusers/pipelines/deprecated/stochastic_karras_ve/pipeline_stochastic_karras_ve.py +1 -1
- diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +2 -2
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +6 -6
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +5 -5
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +5 -5
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +5 -5
- diffusers/pipelines/deprecated/vq_diffusion/pipeline_vq_diffusion.py +1 -1
- diffusers/pipelines/dit/pipeline_dit.py +4 -2
- diffusers/pipelines/easyanimate/pipeline_easyanimate.py +4 -4
- diffusers/pipelines/easyanimate/pipeline_easyanimate_control.py +4 -4
- diffusers/pipelines/easyanimate/pipeline_easyanimate_inpaint.py +7 -6
- diffusers/pipelines/flux/__init__.py +4 -0
- diffusers/pipelines/flux/modeling_flux.py +1 -1
- diffusers/pipelines/flux/pipeline_flux.py +37 -36
- diffusers/pipelines/flux/pipeline_flux_control.py +9 -9
- diffusers/pipelines/flux/pipeline_flux_control_img2img.py +7 -7
- diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +7 -7
- diffusers/pipelines/flux/pipeline_flux_controlnet.py +7 -7
- diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +31 -23
- diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +3 -2
- diffusers/pipelines/flux/pipeline_flux_fill.py +7 -7
- diffusers/pipelines/flux/pipeline_flux_img2img.py +40 -7
- diffusers/pipelines/flux/pipeline_flux_inpaint.py +12 -7
- diffusers/pipelines/flux/pipeline_flux_kontext.py +1134 -0
- diffusers/pipelines/flux/pipeline_flux_kontext_inpaint.py +1460 -0
- diffusers/pipelines/flux/pipeline_flux_prior_redux.py +2 -2
- diffusers/pipelines/flux/pipeline_output.py +6 -4
- diffusers/pipelines/free_init_utils.py +2 -2
- diffusers/pipelines/free_noise_utils.py +3 -3
- diffusers/pipelines/hidream_image/__init__.py +47 -0
- diffusers/pipelines/hidream_image/pipeline_hidream_image.py +1026 -0
- diffusers/pipelines/hidream_image/pipeline_output.py +35 -0
- diffusers/pipelines/hunyuan_video/__init__.py +2 -0
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_skyreels_image2video.py +8 -8
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +26 -25
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_framepack.py +1114 -0
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_image2video.py +71 -15
- diffusers/pipelines/hunyuan_video/pipeline_output.py +19 -0
- diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +8 -8
- diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +10 -8
- diffusers/pipelines/kandinsky/pipeline_kandinsky.py +6 -6
- diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +34 -34
- diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +19 -26
- diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +7 -7
- diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +11 -11
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +6 -6
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +35 -35
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +6 -6
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +17 -39
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +17 -45
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +7 -7
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +10 -10
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +10 -10
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +7 -7
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +17 -38
- diffusers/pipelines/kolors/pipeline_kolors.py +10 -10
- diffusers/pipelines/kolors/pipeline_kolors_img2img.py +12 -12
- diffusers/pipelines/kolors/text_encoder.py +3 -3
- diffusers/pipelines/kolors/tokenizer.py +1 -1
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +2 -2
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +2 -2
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +1 -1
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion_superresolution.py +3 -3
- diffusers/pipelines/latte/pipeline_latte.py +12 -12
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +13 -13
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +17 -16
- diffusers/pipelines/ltx/__init__.py +4 -0
- diffusers/pipelines/ltx/modeling_latent_upsampler.py +188 -0
- diffusers/pipelines/ltx/pipeline_ltx.py +64 -18
- diffusers/pipelines/ltx/pipeline_ltx_condition.py +117 -38
- diffusers/pipelines/ltx/pipeline_ltx_image2video.py +63 -18
- diffusers/pipelines/ltx/pipeline_ltx_latent_upsample.py +277 -0
- diffusers/pipelines/lumina/pipeline_lumina.py +13 -13
- diffusers/pipelines/lumina2/pipeline_lumina2.py +10 -10
- diffusers/pipelines/marigold/marigold_image_processing.py +2 -2
- diffusers/pipelines/mochi/pipeline_mochi.py +15 -14
- diffusers/pipelines/musicldm/pipeline_musicldm.py +16 -13
- diffusers/pipelines/omnigen/pipeline_omnigen.py +13 -11
- diffusers/pipelines/omnigen/processor_omnigen.py +8 -3
- diffusers/pipelines/onnx_utils.py +15 -2
- diffusers/pipelines/pag/pag_utils.py +2 -2
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +12 -8
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +7 -7
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +10 -6
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +14 -14
- diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +8 -8
- diffusers/pipelines/pag/pipeline_pag_kolors.py +10 -10
- diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +11 -11
- diffusers/pipelines/pag/pipeline_pag_sana.py +18 -12
- diffusers/pipelines/pag/pipeline_pag_sd.py +8 -8
- diffusers/pipelines/pag/pipeline_pag_sd_3.py +7 -7
- diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py +7 -7
- diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +6 -6
- diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +5 -5
- diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py +8 -8
- diffusers/pipelines/pag/pipeline_pag_sd_xl.py +16 -15
- diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +18 -17
- diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +12 -12
- diffusers/pipelines/paint_by_example/image_encoder.py +1 -1
- diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +8 -7
- diffusers/pipelines/pia/pipeline_pia.py +8 -6
- diffusers/pipelines/pipeline_flax_utils.py +5 -6
- diffusers/pipelines/pipeline_loading_utils.py +113 -15
- diffusers/pipelines/pipeline_utils.py +127 -48
- diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +14 -12
- diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +31 -11
- diffusers/pipelines/qwenimage/__init__.py +55 -0
- diffusers/pipelines/qwenimage/pipeline_output.py +21 -0
- diffusers/pipelines/qwenimage/pipeline_qwenimage.py +726 -0
- diffusers/pipelines/qwenimage/pipeline_qwenimage_edit.py +882 -0
- diffusers/pipelines/qwenimage/pipeline_qwenimage_img2img.py +829 -0
- diffusers/pipelines/qwenimage/pipeline_qwenimage_inpaint.py +1015 -0
- diffusers/pipelines/sana/__init__.py +4 -0
- diffusers/pipelines/sana/pipeline_sana.py +23 -21
- diffusers/pipelines/sana/pipeline_sana_controlnet.py +1106 -0
- diffusers/pipelines/sana/pipeline_sana_sprint.py +23 -19
- diffusers/pipelines/sana/pipeline_sana_sprint_img2img.py +981 -0
- diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +7 -6
- diffusers/pipelines/shap_e/camera.py +1 -1
- diffusers/pipelines/shap_e/pipeline_shap_e.py +1 -1
- diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +1 -1
- diffusers/pipelines/shap_e/renderer.py +3 -3
- diffusers/pipelines/skyreels_v2/__init__.py +59 -0
- diffusers/pipelines/skyreels_v2/pipeline_output.py +20 -0
- diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2.py +610 -0
- diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_diffusion_forcing.py +978 -0
- diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_diffusion_forcing_i2v.py +1059 -0
- diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_diffusion_forcing_v2v.py +1063 -0
- diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_i2v.py +745 -0
- diffusers/pipelines/stable_audio/modeling_stable_audio.py +1 -1
- diffusers/pipelines/stable_audio/pipeline_stable_audio.py +5 -5
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +8 -8
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +13 -13
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +9 -9
- diffusers/pipelines/stable_diffusion/__init__.py +0 -7
- diffusers/pipelines/stable_diffusion/clip_image_project_model.py +1 -1
- diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +11 -4
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_img2img.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_inpaint.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +12 -11
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py +10 -10
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +11 -11
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +10 -10
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +10 -9
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +4 -4
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +7 -7
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +5 -5
- diffusers/pipelines/stable_diffusion/safety_checker.py +1 -1
- diffusers/pipelines/stable_diffusion/safety_checker_flax.py +1 -1
- diffusers/pipelines/stable_diffusion/stable_unclip_image_normalizer.py +1 -1
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +13 -12
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +7 -7
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +7 -7
- diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +12 -8
- diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +15 -9
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +11 -9
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +11 -9
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +18 -12
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +11 -8
- diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +11 -8
- diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +15 -12
- diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +8 -6
- diffusers/pipelines/stable_diffusion_safe/safety_checker.py +1 -1
- diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +15 -11
- diffusers/pipelines/stable_diffusion_xl/pipeline_flax_stable_diffusion_xl.py +1 -1
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +16 -15
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +18 -17
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +12 -12
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +16 -15
- diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +3 -3
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +12 -12
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +18 -17
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +12 -7
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +12 -7
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +15 -13
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +24 -21
- diffusers/pipelines/unclip/pipeline_unclip.py +4 -3
- diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +4 -3
- diffusers/pipelines/unclip/text_proj.py +2 -2
- diffusers/pipelines/unidiffuser/modeling_text_decoder.py +2 -2
- diffusers/pipelines/unidiffuser/modeling_uvit.py +1 -1
- diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +8 -7
- diffusers/pipelines/visualcloze/__init__.py +52 -0
- diffusers/pipelines/visualcloze/pipeline_visualcloze_combined.py +444 -0
- diffusers/pipelines/visualcloze/pipeline_visualcloze_generation.py +952 -0
- diffusers/pipelines/visualcloze/visualcloze_utils.py +251 -0
- diffusers/pipelines/wan/__init__.py +2 -0
- diffusers/pipelines/wan/pipeline_wan.py +91 -30
- diffusers/pipelines/wan/pipeline_wan_i2v.py +145 -45
- diffusers/pipelines/wan/pipeline_wan_vace.py +975 -0
- diffusers/pipelines/wan/pipeline_wan_video2video.py +14 -16
- diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py +1 -1
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_diffnext.py +1 -1
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +1 -1
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +8 -8
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_combined.py +16 -15
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +6 -6
- diffusers/quantizers/__init__.py +3 -1
- diffusers/quantizers/base.py +17 -1
- diffusers/quantizers/bitsandbytes/bnb_quantizer.py +4 -0
- diffusers/quantizers/bitsandbytes/utils.py +10 -7
- diffusers/quantizers/gguf/gguf_quantizer.py +13 -4
- diffusers/quantizers/gguf/utils.py +108 -16
- diffusers/quantizers/pipe_quant_config.py +202 -0
- diffusers/quantizers/quantization_config.py +18 -16
- diffusers/quantizers/quanto/quanto_quantizer.py +4 -0
- diffusers/quantizers/torchao/torchao_quantizer.py +31 -1
- diffusers/schedulers/__init__.py +3 -1
- diffusers/schedulers/deprecated/scheduling_karras_ve.py +4 -3
- diffusers/schedulers/deprecated/scheduling_sde_vp.py +1 -1
- diffusers/schedulers/scheduling_consistency_models.py +1 -1
- diffusers/schedulers/scheduling_cosine_dpmsolver_multistep.py +10 -5
- diffusers/schedulers/scheduling_ddim.py +8 -8
- diffusers/schedulers/scheduling_ddim_cogvideox.py +5 -5
- diffusers/schedulers/scheduling_ddim_flax.py +6 -6
- diffusers/schedulers/scheduling_ddim_inverse.py +6 -6
- diffusers/schedulers/scheduling_ddim_parallel.py +22 -22
- diffusers/schedulers/scheduling_ddpm.py +9 -9
- diffusers/schedulers/scheduling_ddpm_flax.py +7 -7
- diffusers/schedulers/scheduling_ddpm_parallel.py +18 -18
- diffusers/schedulers/scheduling_ddpm_wuerstchen.py +2 -2
- diffusers/schedulers/scheduling_deis_multistep.py +16 -9
- diffusers/schedulers/scheduling_dpm_cogvideox.py +5 -5
- diffusers/schedulers/scheduling_dpmsolver_multistep.py +18 -12
- diffusers/schedulers/scheduling_dpmsolver_multistep_flax.py +22 -20
- diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +11 -11
- diffusers/schedulers/scheduling_dpmsolver_sde.py +2 -2
- diffusers/schedulers/scheduling_dpmsolver_singlestep.py +19 -13
- diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py +13 -8
- diffusers/schedulers/scheduling_edm_euler.py +20 -11
- diffusers/schedulers/scheduling_euler_ancestral_discrete.py +3 -3
- diffusers/schedulers/scheduling_euler_discrete.py +3 -3
- diffusers/schedulers/scheduling_euler_discrete_flax.py +3 -3
- diffusers/schedulers/scheduling_flow_match_euler_discrete.py +20 -5
- diffusers/schedulers/scheduling_flow_match_heun_discrete.py +1 -1
- diffusers/schedulers/scheduling_flow_match_lcm.py +561 -0
- diffusers/schedulers/scheduling_heun_discrete.py +2 -2
- diffusers/schedulers/scheduling_ipndm.py +2 -2
- diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +2 -2
- diffusers/schedulers/scheduling_k_dpm_2_discrete.py +2 -2
- diffusers/schedulers/scheduling_karras_ve_flax.py +5 -5
- diffusers/schedulers/scheduling_lcm.py +3 -3
- diffusers/schedulers/scheduling_lms_discrete.py +2 -2
- diffusers/schedulers/scheduling_lms_discrete_flax.py +1 -1
- diffusers/schedulers/scheduling_pndm.py +4 -4
- diffusers/schedulers/scheduling_pndm_flax.py +4 -4
- diffusers/schedulers/scheduling_repaint.py +9 -9
- diffusers/schedulers/scheduling_sasolver.py +15 -15
- diffusers/schedulers/scheduling_scm.py +1 -2
- diffusers/schedulers/scheduling_sde_ve.py +1 -1
- diffusers/schedulers/scheduling_sde_ve_flax.py +2 -2
- diffusers/schedulers/scheduling_tcd.py +3 -3
- diffusers/schedulers/scheduling_unclip.py +5 -5
- diffusers/schedulers/scheduling_unipc_multistep.py +21 -12
- diffusers/schedulers/scheduling_utils.py +3 -3
- diffusers/schedulers/scheduling_utils_flax.py +2 -2
- diffusers/schedulers/scheduling_vq_diffusion.py +1 -1
- diffusers/training_utils.py +91 -5
- diffusers/utils/__init__.py +15 -0
- diffusers/utils/accelerate_utils.py +1 -1
- diffusers/utils/constants.py +4 -0
- diffusers/utils/doc_utils.py +1 -1
- diffusers/utils/dummy_pt_objects.py +432 -0
- diffusers/utils/dummy_torch_and_transformers_objects.py +480 -0
- diffusers/utils/dynamic_modules_utils.py +85 -8
- diffusers/utils/export_utils.py +1 -1
- diffusers/utils/hub_utils.py +33 -17
- diffusers/utils/import_utils.py +151 -18
- diffusers/utils/logging.py +1 -1
- diffusers/utils/outputs.py +2 -1
- diffusers/utils/peft_utils.py +96 -10
- diffusers/utils/state_dict_utils.py +20 -3
- diffusers/utils/testing_utils.py +195 -17
- diffusers/utils/torch_utils.py +43 -5
- diffusers/video_processor.py +2 -2
- {diffusers-0.33.1.dist-info → diffusers-0.35.0.dist-info}/METADATA +72 -57
- diffusers-0.35.0.dist-info/RECORD +703 -0
- {diffusers-0.33.1.dist-info → diffusers-0.35.0.dist-info}/WHEEL +1 -1
- diffusers-0.33.1.dist-info/RECORD +0 -608
- {diffusers-0.33.1.dist-info → diffusers-0.35.0.dist-info}/LICENSE +0 -0
- {diffusers-0.33.1.dist-info → diffusers-0.35.0.dist-info}/entry_points.txt +0 -0
- {diffusers-0.33.1.dist-info → diffusers-0.35.0.dist-info}/top_level.txt +0 -0
@@ -177,7 +177,7 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
|
|
177
177
|
r"""
|
178
178
|
Rescales `noise_cfg` tensor based on `guidance_rescale` to improve image quality and fix overexposure. Based on
|
179
179
|
Section 3.4 from [Common Diffusion Noise Schedules and Sample Steps are
|
180
|
-
Flawed](https://
|
180
|
+
Flawed](https://huggingface.co/papers/2305.08891).
|
181
181
|
|
182
182
|
Args:
|
183
183
|
noise_cfg (`torch.Tensor`):
|
@@ -515,7 +515,7 @@ class EasyAnimateControlPipeline(DiffusionPipeline):
|
|
515
515
|
def prepare_extra_step_kwargs(self, generator, eta):
|
516
516
|
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
517
517
|
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
518
|
-
# eta corresponds to η in DDIM paper: https://
|
518
|
+
# eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
|
519
519
|
# and should be between [0, 1]
|
520
520
|
|
521
521
|
accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
|
@@ -653,7 +653,7 @@ class EasyAnimateControlPipeline(DiffusionPipeline):
|
|
653
653
|
return self._guidance_rescale
|
654
654
|
|
655
655
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
656
|
-
# of the Imagen paper: https://
|
656
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
657
657
|
# corresponds to doing no classifier free guidance.
|
658
658
|
@property
|
659
659
|
def do_classifier_free_guidance(self):
|
@@ -956,7 +956,7 @@ class EasyAnimateControlPipeline(DiffusionPipeline):
|
|
956
956
|
noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
|
957
957
|
|
958
958
|
if self.do_classifier_free_guidance and guidance_rescale > 0.0:
|
959
|
-
# Based on 3.4. in https://
|
959
|
+
# Based on 3.4. in https://huggingface.co/papers/2305.08891
|
960
960
|
noise_pred = rescale_noise_cfg(noise_pred, noise_pred_text, guidance_rescale=guidance_rescale)
|
961
961
|
|
962
962
|
# compute the previous noisy sample x_t -> x_t-1
|
@@ -199,7 +199,7 @@ def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
|
|
199
199
|
r"""
|
200
200
|
Rescales `noise_cfg` tensor based on `guidance_rescale` to improve image quality and fix overexposure. Based on
|
201
201
|
Section 3.4 from [Common Diffusion Noise Schedules and Sample Steps are
|
202
|
-
Flawed](https://
|
202
|
+
Flawed](https://huggingface.co/papers/2305.08891).
|
203
203
|
|
204
204
|
Args:
|
205
205
|
noise_cfg (`torch.Tensor`):
|
@@ -557,7 +557,7 @@ class EasyAnimateInpaintPipeline(DiffusionPipeline):
|
|
557
557
|
def prepare_extra_step_kwargs(self, generator, eta):
|
558
558
|
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
559
559
|
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
560
|
-
# eta corresponds to η in DDIM paper: https://
|
560
|
+
# eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
|
561
561
|
# and should be between [0, 1]
|
562
562
|
|
563
563
|
accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
|
@@ -771,7 +771,7 @@ class EasyAnimateInpaintPipeline(DiffusionPipeline):
|
|
771
771
|
return self._guidance_rescale
|
772
772
|
|
773
773
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
774
|
-
# of the Imagen paper: https://
|
774
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
775
775
|
# corresponds to doing no classifier free guidance.
|
776
776
|
@property
|
777
777
|
def do_classifier_free_guidance(self):
|
@@ -849,7 +849,7 @@ class EasyAnimateInpaintPipeline(DiffusionPipeline):
|
|
849
849
|
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
850
850
|
The number of images to generate per prompt.
|
851
851
|
eta (`float`, *optional*, defaults to 0.0):
|
852
|
-
A parameter defined in the [DDIM](https://
|
852
|
+
A parameter defined in the [DDIM](https://huggingface.co/papers/2010.02502) paper. Only applies to the
|
853
853
|
[`~schedulers.DDIMScheduler`] and is ignored in other schedulers. It adjusts noise level during the
|
854
854
|
inference process.
|
855
855
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
@@ -883,7 +883,8 @@ class EasyAnimateInpaintPipeline(DiffusionPipeline):
|
|
883
883
|
inputs will be passed, facilitating enhanced logging or monitoring of the generation process.
|
884
884
|
guidance_rescale (`float`, *optional*, defaults to 0.0):
|
885
885
|
Rescale parameter for adjusting noise configuration based on guidance rescale. Based on findings from
|
886
|
-
[Common Diffusion Noise Schedules and Sample Steps are
|
886
|
+
[Common Diffusion Noise Schedules and Sample Steps are
|
887
|
+
Flawed](https://huggingface.co/papers/2305.08891).
|
887
888
|
strength (`float`, *optional*, defaults to 1.0):
|
888
889
|
Affects the overall styling or quality of the generated output. Values closer to 1 usually provide
|
889
890
|
direct adherence to prompts.
|
@@ -1180,7 +1181,7 @@ class EasyAnimateInpaintPipeline(DiffusionPipeline):
|
|
1180
1181
|
noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
|
1181
1182
|
|
1182
1183
|
if self.do_classifier_free_guidance and guidance_rescale > 0.0:
|
1183
|
-
# Based on 3.4. in https://
|
1184
|
+
# Based on 3.4. in https://huggingface.co/papers/2305.08891
|
1184
1185
|
noise_pred = rescale_noise_cfg(noise_pred, noise_pred_text, guidance_rescale=guidance_rescale)
|
1185
1186
|
|
1186
1187
|
# compute the previous noisy sample x_t -> x_t-1
|
@@ -33,6 +33,8 @@ else:
|
|
33
33
|
_import_structure["pipeline_flux_fill"] = ["FluxFillPipeline"]
|
34
34
|
_import_structure["pipeline_flux_img2img"] = ["FluxImg2ImgPipeline"]
|
35
35
|
_import_structure["pipeline_flux_inpaint"] = ["FluxInpaintPipeline"]
|
36
|
+
_import_structure["pipeline_flux_kontext"] = ["FluxKontextPipeline"]
|
37
|
+
_import_structure["pipeline_flux_kontext_inpaint"] = ["FluxKontextInpaintPipeline"]
|
36
38
|
_import_structure["pipeline_flux_prior_redux"] = ["FluxPriorReduxPipeline"]
|
37
39
|
if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
38
40
|
try:
|
@@ -52,6 +54,8 @@ if TYPE_CHECKING or DIFFUSERS_SLOW_IMPORT:
|
|
52
54
|
from .pipeline_flux_fill import FluxFillPipeline
|
53
55
|
from .pipeline_flux_img2img import FluxImg2ImgPipeline
|
54
56
|
from .pipeline_flux_inpaint import FluxInpaintPipeline
|
57
|
+
from .pipeline_flux_kontext import FluxKontextPipeline
|
58
|
+
from .pipeline_flux_kontext_inpaint import FluxKontextInpaintPipeline
|
55
59
|
from .pipeline_flux_prior_redux import FluxPriorReduxPipeline
|
56
60
|
else:
|
57
61
|
import sys
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 Black Forest Labs and The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 Black Forest Labs and The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -310,7 +310,7 @@ class FluxPipeline(
|
|
310
310
|
def encode_prompt(
|
311
311
|
self,
|
312
312
|
prompt: Union[str, List[str]],
|
313
|
-
prompt_2: Union[str, List[str]],
|
313
|
+
prompt_2: Optional[Union[str, List[str]]] = None,
|
314
314
|
device: Optional[torch.device] = None,
|
315
315
|
num_images_per_prompt: int = 1,
|
316
316
|
prompt_embeds: Optional[torch.FloatTensor] = None,
|
@@ -490,14 +490,6 @@ class FluxPipeline(
|
|
490
490
|
f" {negative_prompt_embeds}. Please make sure to only forward one of the two."
|
491
491
|
)
|
492
492
|
|
493
|
-
if prompt_embeds is not None and negative_prompt_embeds is not None:
|
494
|
-
if prompt_embeds.shape != negative_prompt_embeds.shape:
|
495
|
-
raise ValueError(
|
496
|
-
"`prompt_embeds` and `negative_prompt_embeds` must have the same shape when passed directly, but"
|
497
|
-
f" got: `prompt_embeds` {prompt_embeds.shape} != `negative_prompt_embeds`"
|
498
|
-
f" {negative_prompt_embeds.shape}."
|
499
|
-
)
|
500
|
-
|
501
493
|
if prompt_embeds is not None and pooled_prompt_embeds is None:
|
502
494
|
raise ValueError(
|
503
495
|
"If `prompt_embeds` are provided, `pooled_prompt_embeds` also have to be passed. Make sure to generate `pooled_prompt_embeds` from the same text encoder that was used to generate `prompt_embeds`."
|
@@ -682,7 +674,8 @@ class FluxPipeline(
|
|
682
674
|
The prompt or prompts not to guide the image generation to be sent to `tokenizer_2` and
|
683
675
|
`text_encoder_2`. If not defined, `negative_prompt` is used in all the text-encoders.
|
684
676
|
true_cfg_scale (`float`, *optional*, defaults to 1.0):
|
685
|
-
|
677
|
+
True classifier-free guidance (guidance scale) is enabled when `true_cfg_scale` > 1 and
|
678
|
+
`negative_prompt` is provided.
|
686
679
|
height (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
|
687
680
|
The height in pixels of the generated image. This is set to 1024 by default for the best results.
|
688
681
|
width (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
|
@@ -695,11 +688,11 @@ class FluxPipeline(
|
|
695
688
|
their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
|
696
689
|
will be used.
|
697
690
|
guidance_scale (`float`, *optional*, defaults to 3.5):
|
698
|
-
|
699
|
-
|
700
|
-
|
701
|
-
|
702
|
-
|
691
|
+
Embedded guiddance scale is enabled by setting `guidance_scale` > 1. Higher `guidance_scale` encourages
|
692
|
+
a model to generate images more aligned with `prompt` at the expense of lower image quality.
|
693
|
+
|
694
|
+
Guidance-distilled models approximates true classifer-free guidance for `guidance_scale` > 1. Refer to
|
695
|
+
the [paper](https://huggingface.co/papers/2210.03142) to learn more.
|
703
696
|
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
704
697
|
The number of images to generate per prompt.
|
705
698
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
@@ -708,7 +701,7 @@ class FluxPipeline(
|
|
708
701
|
latents (`torch.FloatTensor`, *optional*):
|
709
702
|
Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
|
710
703
|
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
|
711
|
-
tensor will
|
704
|
+
tensor will be generated by sampling using the supplied random `generator`.
|
712
705
|
prompt_embeds (`torch.FloatTensor`, *optional*):
|
713
706
|
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
|
714
707
|
provided, text embeddings will be generated from `prompt` input argument.
|
@@ -821,7 +814,7 @@ class FluxPipeline(
|
|
821
814
|
(
|
822
815
|
negative_prompt_embeds,
|
823
816
|
negative_pooled_prompt_embeds,
|
824
|
-
|
817
|
+
negative_text_ids,
|
825
818
|
) = self.encode_prompt(
|
826
819
|
prompt=negative_prompt,
|
827
820
|
prompt_2=negative_prompt_2,
|
@@ -848,6 +841,8 @@ class FluxPipeline(
|
|
848
841
|
|
849
842
|
# 5. Prepare timesteps
|
850
843
|
sigmas = np.linspace(1.0, 1 / num_inference_steps, num_inference_steps) if sigmas is None else sigmas
|
844
|
+
if hasattr(self.scheduler.config, "use_flow_sigmas") and self.scheduler.config.use_flow_sigmas:
|
845
|
+
sigmas = None
|
851
846
|
image_seq_len = latents.shape[1]
|
852
847
|
mu = calculate_shift(
|
853
848
|
image_seq_len,
|
@@ -906,6 +901,9 @@ class FluxPipeline(
|
|
906
901
|
)
|
907
902
|
|
908
903
|
# 6. Denoising loop
|
904
|
+
# We set the index here to remove DtoH sync, helpful especially during compilation.
|
905
|
+
# Check out more details here: https://github.com/huggingface/diffusers/pull/11696
|
906
|
+
self.scheduler.set_begin_index(0)
|
909
907
|
with self.progress_bar(total=num_inference_steps) as progress_bar:
|
910
908
|
for i, t in enumerate(timesteps):
|
911
909
|
if self.interrupt:
|
@@ -917,32 +915,35 @@ class FluxPipeline(
|
|
917
915
|
# broadcast to batch dimension in a way that's compatible with ONNX/Core ML
|
918
916
|
timestep = t.expand(latents.shape[0]).to(latents.dtype)
|
919
917
|
|
920
|
-
|
921
|
-
|
922
|
-
timestep=timestep / 1000,
|
923
|
-
guidance=guidance,
|
924
|
-
pooled_projections=pooled_prompt_embeds,
|
925
|
-
encoder_hidden_states=prompt_embeds,
|
926
|
-
txt_ids=text_ids,
|
927
|
-
img_ids=latent_image_ids,
|
928
|
-
joint_attention_kwargs=self.joint_attention_kwargs,
|
929
|
-
return_dict=False,
|
930
|
-
)[0]
|
931
|
-
|
932
|
-
if do_true_cfg:
|
933
|
-
if negative_image_embeds is not None:
|
934
|
-
self._joint_attention_kwargs["ip_adapter_image_embeds"] = negative_image_embeds
|
935
|
-
neg_noise_pred = self.transformer(
|
918
|
+
with self.transformer.cache_context("cond"):
|
919
|
+
noise_pred = self.transformer(
|
936
920
|
hidden_states=latents,
|
937
921
|
timestep=timestep / 1000,
|
938
922
|
guidance=guidance,
|
939
|
-
pooled_projections=
|
940
|
-
encoder_hidden_states=
|
923
|
+
pooled_projections=pooled_prompt_embeds,
|
924
|
+
encoder_hidden_states=prompt_embeds,
|
941
925
|
txt_ids=text_ids,
|
942
926
|
img_ids=latent_image_ids,
|
943
927
|
joint_attention_kwargs=self.joint_attention_kwargs,
|
944
928
|
return_dict=False,
|
945
929
|
)[0]
|
930
|
+
|
931
|
+
if do_true_cfg:
|
932
|
+
if negative_image_embeds is not None:
|
933
|
+
self._joint_attention_kwargs["ip_adapter_image_embeds"] = negative_image_embeds
|
934
|
+
|
935
|
+
with self.transformer.cache_context("uncond"):
|
936
|
+
neg_noise_pred = self.transformer(
|
937
|
+
hidden_states=latents,
|
938
|
+
timestep=timestep / 1000,
|
939
|
+
guidance=guidance,
|
940
|
+
pooled_projections=negative_pooled_prompt_embeds,
|
941
|
+
encoder_hidden_states=negative_prompt_embeds,
|
942
|
+
txt_ids=negative_text_ids,
|
943
|
+
img_ids=latent_image_ids,
|
944
|
+
joint_attention_kwargs=self.joint_attention_kwargs,
|
945
|
+
return_dict=False,
|
946
|
+
)[0]
|
946
947
|
noise_pred = neg_noise_pred + true_cfg_scale * (noise_pred - neg_noise_pred)
|
947
948
|
|
948
949
|
# compute the previous noisy sample x_t -> x_t-1
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 Black Forest Labs and The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -163,9 +163,9 @@ class FluxControlPipeline(
|
|
163
163
|
TextualInversionLoaderMixin,
|
164
164
|
):
|
165
165
|
r"""
|
166
|
-
The Flux pipeline for controllable text-to-image generation.
|
166
|
+
The Flux pipeline for controllable text-to-image generation with image conditions.
|
167
167
|
|
168
|
-
Reference: https://
|
168
|
+
Reference: https://bfl.ai/flux-1-tools
|
169
169
|
|
170
170
|
Args:
|
171
171
|
transformer ([`FluxTransformer2DModel`]):
|
@@ -324,7 +324,7 @@ class FluxControlPipeline(
|
|
324
324
|
def encode_prompt(
|
325
325
|
self,
|
326
326
|
prompt: Union[str, List[str]],
|
327
|
-
prompt_2: Union[str, List[str]],
|
327
|
+
prompt_2: Optional[Union[str, List[str]]] = None,
|
328
328
|
device: Optional[torch.device] = None,
|
329
329
|
num_images_per_prompt: int = 1,
|
330
330
|
prompt_embeds: Optional[torch.FloatTensor] = None,
|
@@ -661,11 +661,11 @@ class FluxControlPipeline(
|
|
661
661
|
their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
|
662
662
|
will be used.
|
663
663
|
guidance_scale (`float`, *optional*, defaults to 3.5):
|
664
|
-
|
665
|
-
|
666
|
-
|
667
|
-
|
668
|
-
|
664
|
+
Embedded guidance scale is enabled by setting `guidance_scale` > 1. Higher `guidance_scale` encourages
|
665
|
+
a model to generate images more aligned with prompt at the expense of lower image quality.
|
666
|
+
|
667
|
+
Guidance-distilled models approximates true classifier-free guidance for `guidance_scale` > 1. Refer to
|
668
|
+
the [paper](https://huggingface.co/papers/2210.03142) to learn more.
|
669
669
|
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
670
670
|
The number of images to generate per prompt.
|
671
671
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 Black Forest Labs and The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -335,7 +335,7 @@ class FluxControlImg2ImgPipeline(DiffusionPipeline, FluxLoraLoaderMixin, FromSin
|
|
335
335
|
def encode_prompt(
|
336
336
|
self,
|
337
337
|
prompt: Union[str, List[str]],
|
338
|
-
prompt_2: Union[str, List[str]],
|
338
|
+
prompt_2: Optional[Union[str, List[str]]] = None,
|
339
339
|
device: Optional[torch.device] = None,
|
340
340
|
num_images_per_prompt: int = 1,
|
341
341
|
prompt_embeds: Optional[torch.FloatTensor] = None,
|
@@ -699,11 +699,11 @@ class FluxControlImg2ImgPipeline(DiffusionPipeline, FluxLoraLoaderMixin, FromSin
|
|
699
699
|
their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
|
700
700
|
will be used.
|
701
701
|
guidance_scale (`float`, *optional*, defaults to 7.0):
|
702
|
-
Guidance scale as defined in [Classifier-Free Diffusion
|
703
|
-
`guidance_scale` is defined as `w` of equation 2.
|
704
|
-
Paper](https://
|
705
|
-
1`. Higher guidance scale encourages to generate images that are closely linked to
|
706
|
-
usually at the expense of lower image quality.
|
702
|
+
Guidance scale as defined in [Classifier-Free Diffusion
|
703
|
+
Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
|
704
|
+
of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
|
705
|
+
`guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
|
706
|
+
the text `prompt`, usually at the expense of lower image quality.
|
707
707
|
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
708
708
|
The number of images to generate per prompt.
|
709
709
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 Black Forest Labs and The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -374,7 +374,7 @@ class FluxControlInpaintPipeline(
|
|
374
374
|
def encode_prompt(
|
375
375
|
self,
|
376
376
|
prompt: Union[str, List[str]],
|
377
|
-
prompt_2: Union[str, List[str]],
|
377
|
+
prompt_2: Optional[Union[str, List[str]]] = None,
|
378
378
|
device: Optional[torch.device] = None,
|
379
379
|
num_images_per_prompt: int = 1,
|
380
380
|
prompt_embeds: Optional[torch.FloatTensor] = None,
|
@@ -857,11 +857,11 @@ class FluxControlInpaintPipeline(
|
|
857
857
|
their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
|
858
858
|
will be used.
|
859
859
|
guidance_scale (`float`, *optional*, defaults to 7.0):
|
860
|
-
Guidance scale as defined in [Classifier-Free Diffusion
|
861
|
-
`guidance_scale` is defined as `w` of equation 2.
|
862
|
-
Paper](https://
|
863
|
-
1`. Higher guidance scale encourages to generate images that are closely linked to
|
864
|
-
usually at the expense of lower image quality.
|
860
|
+
Guidance scale as defined in [Classifier-Free Diffusion
|
861
|
+
Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
|
862
|
+
of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
|
863
|
+
`guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
|
864
|
+
the text `prompt`, usually at the expense of lower image quality.
|
865
865
|
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
866
866
|
The number of images to generate per prompt.
|
867
867
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 Black Forest Labs, The HuggingFace Team and The InstantX Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -341,7 +341,7 @@ class FluxControlNetPipeline(DiffusionPipeline, FluxLoraLoaderMixin, FromSingleF
|
|
341
341
|
def encode_prompt(
|
342
342
|
self,
|
343
343
|
prompt: Union[str, List[str]],
|
344
|
-
prompt_2: Union[str, List[str]],
|
344
|
+
prompt_2: Optional[Union[str, List[str]]] = None,
|
345
345
|
device: Optional[torch.device] = None,
|
346
346
|
num_images_per_prompt: int = 1,
|
347
347
|
prompt_embeds: Optional[torch.FloatTensor] = None,
|
@@ -733,11 +733,11 @@ class FluxControlNetPipeline(DiffusionPipeline, FluxLoraLoaderMixin, FromSingleF
|
|
733
733
|
their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
|
734
734
|
will be used.
|
735
735
|
guidance_scale (`float`, *optional*, defaults to 7.0):
|
736
|
-
Guidance scale as defined in [Classifier-Free Diffusion
|
737
|
-
`guidance_scale` is defined as `w` of equation 2.
|
738
|
-
Paper](https://
|
739
|
-
1`. Higher guidance scale encourages to generate images that are closely linked to
|
740
|
-
usually at the expense of lower image quality.
|
736
|
+
Guidance scale as defined in [Classifier-Free Diffusion
|
737
|
+
Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
|
738
|
+
of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
|
739
|
+
`guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
|
740
|
+
the text `prompt`, usually at the expense of lower image quality.
|
741
741
|
control_guidance_start (`float` or `List[float]`, *optional*, defaults to 0.0):
|
742
742
|
The percentage of total steps at which the ControlNet starts applying.
|
743
743
|
control_guidance_end (`float` or `List[float]`, *optional*, defaults to 1.0):
|
@@ -335,7 +335,7 @@ class FluxControlNetImg2ImgPipeline(DiffusionPipeline, FluxLoraLoaderMixin, From
|
|
335
335
|
def encode_prompt(
|
336
336
|
self,
|
337
337
|
prompt: Union[str, List[str]],
|
338
|
-
prompt_2: Union[str, List[str]],
|
338
|
+
prompt_2: Optional[Union[str, List[str]]] = None,
|
339
339
|
device: Optional[torch.device] = None,
|
340
340
|
num_images_per_prompt: int = 1,
|
341
341
|
prompt_embeds: Optional[torch.FloatTensor] = None,
|
@@ -687,7 +687,8 @@ class FluxControlNetImg2ImgPipeline(DiffusionPipeline, FluxLoraLoaderMixin, From
|
|
687
687
|
their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
|
688
688
|
will be used.
|
689
689
|
guidance_scale (`float`, *optional*, defaults to 7.0):
|
690
|
-
Guidance scale as defined in [Classifier-Free Diffusion
|
690
|
+
Guidance scale as defined in [Classifier-Free Diffusion
|
691
|
+
Guidance](https://huggingface.co/papers/2207.12598).
|
691
692
|
control_mode (`int` or `List[int]`, *optional*):
|
692
693
|
The mode for the ControlNet. If multiple ControlNets are used, this should be a list.
|
693
694
|
controlnet_conditioning_scale (`float` or `List[float]`, *optional*, defaults to 1.0):
|
@@ -800,17 +801,20 @@ class FluxControlNetImg2ImgPipeline(DiffusionPipeline, FluxLoraLoaderMixin, From
|
|
800
801
|
)
|
801
802
|
height, width = control_image.shape[-2:]
|
802
803
|
|
803
|
-
|
804
|
-
|
804
|
+
# xlab controlnet has a input_hint_block and instantx controlnet does not
|
805
|
+
controlnet_blocks_repeat = False if self.controlnet.input_hint_block is None else True
|
806
|
+
if self.controlnet.input_hint_block is None:
|
807
|
+
control_image = retrieve_latents(self.vae.encode(control_image), generator=generator)
|
808
|
+
control_image = (control_image - self.vae.config.shift_factor) * self.vae.config.scaling_factor
|
805
809
|
|
806
|
-
|
807
|
-
|
808
|
-
|
809
|
-
|
810
|
-
|
811
|
-
|
812
|
-
|
813
|
-
|
810
|
+
height_control_image, width_control_image = control_image.shape[2:]
|
811
|
+
control_image = self._pack_latents(
|
812
|
+
control_image,
|
813
|
+
batch_size * num_images_per_prompt,
|
814
|
+
num_channels_latents,
|
815
|
+
height_control_image,
|
816
|
+
width_control_image,
|
817
|
+
)
|
814
818
|
|
815
819
|
if control_mode is not None:
|
816
820
|
control_mode = torch.tensor(control_mode).to(device, dtype=torch.long)
|
@@ -819,7 +823,9 @@ class FluxControlNetImg2ImgPipeline(DiffusionPipeline, FluxLoraLoaderMixin, From
|
|
819
823
|
elif isinstance(self.controlnet, FluxMultiControlNetModel):
|
820
824
|
control_images = []
|
821
825
|
|
822
|
-
|
826
|
+
# xlab controlnet has a input_hint_block and instantx controlnet does not
|
827
|
+
controlnet_blocks_repeat = False if self.controlnet.nets[0].input_hint_block is None else True
|
828
|
+
for i, control_image_ in enumerate(control_image):
|
823
829
|
control_image_ = self.prepare_image(
|
824
830
|
image=control_image_,
|
825
831
|
width=width,
|
@@ -831,17 +837,18 @@ class FluxControlNetImg2ImgPipeline(DiffusionPipeline, FluxLoraLoaderMixin, From
|
|
831
837
|
)
|
832
838
|
height, width = control_image_.shape[-2:]
|
833
839
|
|
834
|
-
|
835
|
-
|
840
|
+
if self.controlnet.nets[0].input_hint_block is None:
|
841
|
+
control_image_ = retrieve_latents(self.vae.encode(control_image_), generator=generator)
|
842
|
+
control_image_ = (control_image_ - self.vae.config.shift_factor) * self.vae.config.scaling_factor
|
836
843
|
|
837
|
-
|
838
|
-
|
839
|
-
|
840
|
-
|
841
|
-
|
842
|
-
|
843
|
-
|
844
|
-
|
844
|
+
height_control_image, width_control_image = control_image_.shape[2:]
|
845
|
+
control_image_ = self._pack_latents(
|
846
|
+
control_image_,
|
847
|
+
batch_size * num_images_per_prompt,
|
848
|
+
num_channels_latents,
|
849
|
+
height_control_image,
|
850
|
+
width_control_image,
|
851
|
+
)
|
845
852
|
|
846
853
|
control_images.append(control_image_)
|
847
854
|
|
@@ -955,6 +962,7 @@ class FluxControlNetImg2ImgPipeline(DiffusionPipeline, FluxLoraLoaderMixin, From
|
|
955
962
|
img_ids=latent_image_ids,
|
956
963
|
joint_attention_kwargs=self.joint_attention_kwargs,
|
957
964
|
return_dict=False,
|
965
|
+
controlnet_blocks_repeat=controlnet_blocks_repeat,
|
958
966
|
)[0]
|
959
967
|
|
960
968
|
latents_dtype = latents.dtype
|
@@ -346,7 +346,7 @@ class FluxControlNetInpaintPipeline(DiffusionPipeline, FluxLoraLoaderMixin, From
|
|
346
346
|
def encode_prompt(
|
347
347
|
self,
|
348
348
|
prompt: Union[str, List[str]],
|
349
|
-
prompt_2: Union[str, List[str]],
|
349
|
+
prompt_2: Optional[Union[str, List[str]]] = None,
|
350
350
|
device: Optional[torch.device] = None,
|
351
351
|
num_images_per_prompt: int = 1,
|
352
352
|
prompt_embeds: Optional[torch.FloatTensor] = None,
|
@@ -801,7 +801,8 @@ class FluxControlNetInpaintPipeline(DiffusionPipeline, FluxLoraLoaderMixin, From
|
|
801
801
|
their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
|
802
802
|
will be used.
|
803
803
|
guidance_scale (`float`, *optional*, defaults to 7.0):
|
804
|
-
Guidance scale as defined in [Classifier-Free Diffusion
|
804
|
+
Guidance scale as defined in [Classifier-Free Diffusion
|
805
|
+
Guidance](https://huggingface.co/papers/2207.12598).
|
805
806
|
control_guidance_start (`float` or `List[float]`, *optional*, defaults to 0.0):
|
806
807
|
The percentage of total steps at which the ControlNet starts applying.
|
807
808
|
control_guidance_end (`float` or `List[float]`, *optional*, defaults to 1.0):
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 Black Forest Labs and The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -419,7 +419,7 @@ class FluxFillPipeline(
|
|
419
419
|
def encode_prompt(
|
420
420
|
self,
|
421
421
|
prompt: Union[str, List[str]],
|
422
|
-
prompt_2: Union[str, List[str]],
|
422
|
+
prompt_2: Optional[Union[str, List[str]]] = None,
|
423
423
|
device: Optional[torch.device] = None,
|
424
424
|
num_images_per_prompt: int = 1,
|
425
425
|
prompt_embeds: Optional[torch.FloatTensor] = None,
|
@@ -794,11 +794,11 @@ class FluxFillPipeline(
|
|
794
794
|
their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
|
795
795
|
will be used.
|
796
796
|
guidance_scale (`float`, *optional*, defaults to 30.0):
|
797
|
-
Guidance scale as defined in [Classifier-Free Diffusion
|
798
|
-
`guidance_scale` is defined as `w` of equation 2.
|
799
|
-
Paper](https://
|
800
|
-
1`. Higher guidance scale encourages to generate images that are closely linked to
|
801
|
-
usually at the expense of lower image quality.
|
797
|
+
Guidance scale as defined in [Classifier-Free Diffusion
|
798
|
+
Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
|
799
|
+
of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
|
800
|
+
`guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
|
801
|
+
the text `prompt`, usually at the expense of lower image quality.
|
802
802
|
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
803
803
|
The number of images to generate per prompt.
|
804
804
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 Black Forest Labs and The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -333,7 +333,7 @@ class FluxImg2ImgPipeline(DiffusionPipeline, FluxLoraLoaderMixin, FromSingleFile
|
|
333
333
|
def encode_prompt(
|
334
334
|
self,
|
335
335
|
prompt: Union[str, List[str]],
|
336
|
-
prompt_2: Union[str, List[str]],
|
336
|
+
prompt_2: Optional[Union[str, List[str]]] = None,
|
337
337
|
device: Optional[torch.device] = None,
|
338
338
|
num_images_per_prompt: int = 1,
|
339
339
|
prompt_embeds: Optional[torch.FloatTensor] = None,
|
@@ -607,6 +607,39 @@ class FluxImg2ImgPipeline(DiffusionPipeline, FluxLoraLoaderMixin, FromSingleFile
|
|
607
607
|
|
608
608
|
return latents
|
609
609
|
|
610
|
+
# Copied from diffusers.pipelines.flux.pipeline_flux.FluxPipeline.enable_vae_slicing
|
611
|
+
def enable_vae_slicing(self):
|
612
|
+
r"""
|
613
|
+
Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
|
614
|
+
compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
|
615
|
+
"""
|
616
|
+
self.vae.enable_slicing()
|
617
|
+
|
618
|
+
# Copied from diffusers.pipelines.flux.pipeline_flux.FluxPipeline.disable_vae_slicing
|
619
|
+
def disable_vae_slicing(self):
|
620
|
+
r"""
|
621
|
+
Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
|
622
|
+
computing decoding in one step.
|
623
|
+
"""
|
624
|
+
self.vae.disable_slicing()
|
625
|
+
|
626
|
+
# Copied from diffusers.pipelines.flux.pipeline_flux.FluxPipeline.enable_vae_tiling
|
627
|
+
def enable_vae_tiling(self):
|
628
|
+
r"""
|
629
|
+
Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to
|
630
|
+
compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
|
631
|
+
processing larger images.
|
632
|
+
"""
|
633
|
+
self.vae.enable_tiling()
|
634
|
+
|
635
|
+
# Copied from diffusers.pipelines.flux.pipeline_flux.FluxPipeline.disable_vae_tiling
|
636
|
+
def disable_vae_tiling(self):
|
637
|
+
r"""
|
638
|
+
Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
|
639
|
+
computing decoding in one step.
|
640
|
+
"""
|
641
|
+
self.vae.disable_tiling()
|
642
|
+
|
610
643
|
def prepare_latents(
|
611
644
|
self,
|
612
645
|
image,
|
@@ -741,11 +774,11 @@ class FluxImg2ImgPipeline(DiffusionPipeline, FluxLoraLoaderMixin, FromSingleFile
|
|
741
774
|
their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
|
742
775
|
will be used.
|
743
776
|
guidance_scale (`float`, *optional*, defaults to 7.0):
|
744
|
-
Guidance scale as defined in [Classifier-Free Diffusion
|
745
|
-
`guidance_scale` is defined as `w` of equation 2.
|
746
|
-
Paper](https://
|
747
|
-
1`. Higher guidance scale encourages to generate images that are closely linked to
|
748
|
-
usually at the expense of lower image quality.
|
777
|
+
Guidance scale as defined in [Classifier-Free Diffusion
|
778
|
+
Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
|
779
|
+
of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
|
780
|
+
`guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
|
781
|
+
the text `prompt`, usually at the expense of lower image quality.
|
749
782
|
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
750
783
|
The number of images to generate per prompt.
|
751
784
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|