diffusers 0.33.1__py3-none-any.whl → 0.35.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diffusers/__init__.py +145 -1
- diffusers/callbacks.py +35 -0
- diffusers/commands/__init__.py +1 -1
- diffusers/commands/custom_blocks.py +134 -0
- diffusers/commands/diffusers_cli.py +3 -1
- diffusers/commands/env.py +1 -1
- diffusers/commands/fp16_safetensors.py +2 -2
- diffusers/configuration_utils.py +11 -2
- diffusers/dependency_versions_check.py +1 -1
- diffusers/dependency_versions_table.py +3 -3
- diffusers/experimental/rl/value_guided_sampling.py +1 -1
- diffusers/guiders/__init__.py +41 -0
- diffusers/guiders/adaptive_projected_guidance.py +188 -0
- diffusers/guiders/auto_guidance.py +190 -0
- diffusers/guiders/classifier_free_guidance.py +141 -0
- diffusers/guiders/classifier_free_zero_star_guidance.py +152 -0
- diffusers/guiders/frequency_decoupled_guidance.py +327 -0
- diffusers/guiders/guider_utils.py +309 -0
- diffusers/guiders/perturbed_attention_guidance.py +271 -0
- diffusers/guiders/skip_layer_guidance.py +262 -0
- diffusers/guiders/smoothed_energy_guidance.py +251 -0
- diffusers/guiders/tangential_classifier_free_guidance.py +143 -0
- diffusers/hooks/__init__.py +17 -0
- diffusers/hooks/_common.py +56 -0
- diffusers/hooks/_helpers.py +293 -0
- diffusers/hooks/faster_cache.py +9 -8
- diffusers/hooks/first_block_cache.py +259 -0
- diffusers/hooks/group_offloading.py +332 -227
- diffusers/hooks/hooks.py +58 -3
- diffusers/hooks/layer_skip.py +263 -0
- diffusers/hooks/layerwise_casting.py +5 -10
- diffusers/hooks/pyramid_attention_broadcast.py +15 -12
- diffusers/hooks/smoothed_energy_guidance_utils.py +167 -0
- diffusers/hooks/utils.py +43 -0
- diffusers/image_processor.py +7 -2
- diffusers/loaders/__init__.py +10 -0
- diffusers/loaders/ip_adapter.py +260 -18
- diffusers/loaders/lora_base.py +261 -127
- diffusers/loaders/lora_conversion_utils.py +657 -35
- diffusers/loaders/lora_pipeline.py +2778 -1246
- diffusers/loaders/peft.py +78 -112
- diffusers/loaders/single_file.py +2 -2
- diffusers/loaders/single_file_model.py +64 -15
- diffusers/loaders/single_file_utils.py +395 -7
- diffusers/loaders/textual_inversion.py +3 -2
- diffusers/loaders/transformer_flux.py +10 -11
- diffusers/loaders/transformer_sd3.py +8 -3
- diffusers/loaders/unet.py +24 -21
- diffusers/loaders/unet_loader_utils.py +6 -3
- diffusers/loaders/utils.py +1 -1
- diffusers/models/__init__.py +23 -1
- diffusers/models/activations.py +5 -5
- diffusers/models/adapter.py +2 -3
- diffusers/models/attention.py +488 -7
- diffusers/models/attention_dispatch.py +1218 -0
- diffusers/models/attention_flax.py +10 -10
- diffusers/models/attention_processor.py +113 -667
- diffusers/models/auto_model.py +49 -12
- diffusers/models/autoencoders/__init__.py +2 -0
- diffusers/models/autoencoders/autoencoder_asym_kl.py +4 -4
- diffusers/models/autoencoders/autoencoder_dc.py +17 -4
- diffusers/models/autoencoders/autoencoder_kl.py +5 -5
- diffusers/models/autoencoders/autoencoder_kl_allegro.py +4 -4
- diffusers/models/autoencoders/autoencoder_kl_cogvideox.py +6 -6
- diffusers/models/autoencoders/autoencoder_kl_cosmos.py +1110 -0
- diffusers/models/autoencoders/autoencoder_kl_hunyuan_video.py +2 -2
- diffusers/models/autoencoders/autoencoder_kl_ltx.py +3 -3
- diffusers/models/autoencoders/autoencoder_kl_magvit.py +4 -4
- diffusers/models/autoencoders/autoencoder_kl_mochi.py +3 -3
- diffusers/models/autoencoders/autoencoder_kl_qwenimage.py +1070 -0
- diffusers/models/autoencoders/autoencoder_kl_temporal_decoder.py +4 -4
- diffusers/models/autoencoders/autoencoder_kl_wan.py +626 -62
- diffusers/models/autoencoders/autoencoder_oobleck.py +1 -1
- diffusers/models/autoencoders/autoencoder_tiny.py +3 -3
- diffusers/models/autoencoders/consistency_decoder_vae.py +1 -1
- diffusers/models/autoencoders/vae.py +13 -2
- diffusers/models/autoencoders/vq_model.py +2 -2
- diffusers/models/cache_utils.py +32 -10
- diffusers/models/controlnet.py +1 -1
- diffusers/models/controlnet_flux.py +1 -1
- diffusers/models/controlnet_sd3.py +1 -1
- diffusers/models/controlnet_sparsectrl.py +1 -1
- diffusers/models/controlnets/__init__.py +1 -0
- diffusers/models/controlnets/controlnet.py +3 -3
- diffusers/models/controlnets/controlnet_flax.py +1 -1
- diffusers/models/controlnets/controlnet_flux.py +21 -20
- diffusers/models/controlnets/controlnet_hunyuan.py +2 -2
- diffusers/models/controlnets/controlnet_sana.py +290 -0
- diffusers/models/controlnets/controlnet_sd3.py +1 -1
- diffusers/models/controlnets/controlnet_sparsectrl.py +2 -2
- diffusers/models/controlnets/controlnet_union.py +5 -5
- diffusers/models/controlnets/controlnet_xs.py +7 -7
- diffusers/models/controlnets/multicontrolnet.py +4 -5
- diffusers/models/controlnets/multicontrolnet_union.py +5 -6
- diffusers/models/downsampling.py +2 -2
- diffusers/models/embeddings.py +36 -46
- diffusers/models/embeddings_flax.py +2 -2
- diffusers/models/lora.py +3 -3
- diffusers/models/model_loading_utils.py +233 -1
- diffusers/models/modeling_flax_utils.py +1 -2
- diffusers/models/modeling_utils.py +203 -108
- diffusers/models/normalization.py +4 -4
- diffusers/models/resnet.py +2 -2
- diffusers/models/resnet_flax.py +1 -1
- diffusers/models/transformers/__init__.py +7 -0
- diffusers/models/transformers/auraflow_transformer_2d.py +70 -24
- diffusers/models/transformers/cogvideox_transformer_3d.py +1 -1
- diffusers/models/transformers/consisid_transformer_3d.py +1 -1
- diffusers/models/transformers/dit_transformer_2d.py +2 -2
- diffusers/models/transformers/dual_transformer_2d.py +1 -1
- diffusers/models/transformers/hunyuan_transformer_2d.py +2 -2
- diffusers/models/transformers/latte_transformer_3d.py +4 -5
- diffusers/models/transformers/lumina_nextdit2d.py +2 -2
- diffusers/models/transformers/pixart_transformer_2d.py +3 -3
- diffusers/models/transformers/prior_transformer.py +1 -1
- diffusers/models/transformers/sana_transformer.py +8 -3
- diffusers/models/transformers/stable_audio_transformer.py +5 -9
- diffusers/models/transformers/t5_film_transformer.py +3 -3
- diffusers/models/transformers/transformer_2d.py +1 -1
- diffusers/models/transformers/transformer_allegro.py +1 -1
- diffusers/models/transformers/transformer_chroma.py +641 -0
- diffusers/models/transformers/transformer_cogview3plus.py +5 -10
- diffusers/models/transformers/transformer_cogview4.py +353 -27
- diffusers/models/transformers/transformer_cosmos.py +586 -0
- diffusers/models/transformers/transformer_flux.py +376 -138
- diffusers/models/transformers/transformer_hidream_image.py +942 -0
- diffusers/models/transformers/transformer_hunyuan_video.py +12 -8
- diffusers/models/transformers/transformer_hunyuan_video_framepack.py +416 -0
- diffusers/models/transformers/transformer_ltx.py +105 -24
- diffusers/models/transformers/transformer_lumina2.py +1 -1
- diffusers/models/transformers/transformer_mochi.py +1 -1
- diffusers/models/transformers/transformer_omnigen.py +2 -2
- diffusers/models/transformers/transformer_qwenimage.py +645 -0
- diffusers/models/transformers/transformer_sd3.py +7 -7
- diffusers/models/transformers/transformer_skyreels_v2.py +607 -0
- diffusers/models/transformers/transformer_temporal.py +1 -1
- diffusers/models/transformers/transformer_wan.py +316 -87
- diffusers/models/transformers/transformer_wan_vace.py +387 -0
- diffusers/models/unets/unet_1d.py +1 -1
- diffusers/models/unets/unet_1d_blocks.py +1 -1
- diffusers/models/unets/unet_2d.py +1 -1
- diffusers/models/unets/unet_2d_blocks.py +1 -1
- diffusers/models/unets/unet_2d_blocks_flax.py +8 -7
- diffusers/models/unets/unet_2d_condition.py +4 -3
- diffusers/models/unets/unet_2d_condition_flax.py +2 -2
- diffusers/models/unets/unet_3d_blocks.py +1 -1
- diffusers/models/unets/unet_3d_condition.py +3 -3
- diffusers/models/unets/unet_i2vgen_xl.py +3 -3
- diffusers/models/unets/unet_kandinsky3.py +1 -1
- diffusers/models/unets/unet_motion_model.py +2 -2
- diffusers/models/unets/unet_stable_cascade.py +1 -1
- diffusers/models/upsampling.py +2 -2
- diffusers/models/vae_flax.py +2 -2
- diffusers/models/vq_model.py +1 -1
- diffusers/modular_pipelines/__init__.py +83 -0
- diffusers/modular_pipelines/components_manager.py +1068 -0
- diffusers/modular_pipelines/flux/__init__.py +66 -0
- diffusers/modular_pipelines/flux/before_denoise.py +689 -0
- diffusers/modular_pipelines/flux/decoders.py +109 -0
- diffusers/modular_pipelines/flux/denoise.py +227 -0
- diffusers/modular_pipelines/flux/encoders.py +412 -0
- diffusers/modular_pipelines/flux/modular_blocks.py +181 -0
- diffusers/modular_pipelines/flux/modular_pipeline.py +59 -0
- diffusers/modular_pipelines/modular_pipeline.py +2446 -0
- diffusers/modular_pipelines/modular_pipeline_utils.py +672 -0
- diffusers/modular_pipelines/node_utils.py +665 -0
- diffusers/modular_pipelines/stable_diffusion_xl/__init__.py +77 -0
- diffusers/modular_pipelines/stable_diffusion_xl/before_denoise.py +1874 -0
- diffusers/modular_pipelines/stable_diffusion_xl/decoders.py +208 -0
- diffusers/modular_pipelines/stable_diffusion_xl/denoise.py +771 -0
- diffusers/modular_pipelines/stable_diffusion_xl/encoders.py +887 -0
- diffusers/modular_pipelines/stable_diffusion_xl/modular_blocks.py +380 -0
- diffusers/modular_pipelines/stable_diffusion_xl/modular_pipeline.py +365 -0
- diffusers/modular_pipelines/wan/__init__.py +66 -0
- diffusers/modular_pipelines/wan/before_denoise.py +365 -0
- diffusers/modular_pipelines/wan/decoders.py +105 -0
- diffusers/modular_pipelines/wan/denoise.py +261 -0
- diffusers/modular_pipelines/wan/encoders.py +242 -0
- diffusers/modular_pipelines/wan/modular_blocks.py +144 -0
- diffusers/modular_pipelines/wan/modular_pipeline.py +90 -0
- diffusers/pipelines/__init__.py +68 -6
- diffusers/pipelines/allegro/pipeline_allegro.py +11 -11
- diffusers/pipelines/amused/pipeline_amused.py +7 -6
- diffusers/pipelines/amused/pipeline_amused_img2img.py +6 -5
- diffusers/pipelines/amused/pipeline_amused_inpaint.py +6 -5
- diffusers/pipelines/animatediff/pipeline_animatediff.py +6 -6
- diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py +6 -6
- diffusers/pipelines/animatediff/pipeline_animatediff_sdxl.py +16 -15
- diffusers/pipelines/animatediff/pipeline_animatediff_sparsectrl.py +6 -6
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py +5 -5
- diffusers/pipelines/animatediff/pipeline_animatediff_video2video_controlnet.py +5 -5
- diffusers/pipelines/audioldm/pipeline_audioldm.py +8 -7
- diffusers/pipelines/audioldm2/modeling_audioldm2.py +1 -1
- diffusers/pipelines/audioldm2/pipeline_audioldm2.py +22 -13
- diffusers/pipelines/aura_flow/pipeline_aura_flow.py +48 -11
- diffusers/pipelines/auto_pipeline.py +23 -20
- diffusers/pipelines/blip_diffusion/modeling_blip2.py +1 -1
- diffusers/pipelines/blip_diffusion/modeling_ctx_clip.py +2 -2
- diffusers/pipelines/blip_diffusion/pipeline_blip_diffusion.py +11 -10
- diffusers/pipelines/chroma/__init__.py +49 -0
- diffusers/pipelines/chroma/pipeline_chroma.py +949 -0
- diffusers/pipelines/chroma/pipeline_chroma_img2img.py +1034 -0
- diffusers/pipelines/chroma/pipeline_output.py +21 -0
- diffusers/pipelines/cogvideo/pipeline_cogvideox.py +17 -16
- diffusers/pipelines/cogvideo/pipeline_cogvideox_fun_control.py +17 -16
- diffusers/pipelines/cogvideo/pipeline_cogvideox_image2video.py +18 -17
- diffusers/pipelines/cogvideo/pipeline_cogvideox_video2video.py +17 -16
- diffusers/pipelines/cogview3/pipeline_cogview3plus.py +9 -9
- diffusers/pipelines/cogview4/pipeline_cogview4.py +23 -22
- diffusers/pipelines/cogview4/pipeline_cogview4_control.py +7 -7
- diffusers/pipelines/consisid/consisid_utils.py +2 -2
- diffusers/pipelines/consisid/pipeline_consisid.py +8 -8
- diffusers/pipelines/consistency_models/pipeline_consistency_models.py +1 -1
- diffusers/pipelines/controlnet/pipeline_controlnet.py +7 -7
- diffusers/pipelines/controlnet/pipeline_controlnet_blip_diffusion.py +11 -10
- diffusers/pipelines/controlnet/pipeline_controlnet_img2img.py +7 -7
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint.py +7 -7
- diffusers/pipelines/controlnet/pipeline_controlnet_inpaint_sd_xl.py +14 -14
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl.py +10 -6
- diffusers/pipelines/controlnet/pipeline_controlnet_sd_xl_img2img.py +13 -13
- diffusers/pipelines/controlnet/pipeline_controlnet_union_inpaint_sd_xl.py +226 -107
- diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl.py +12 -8
- diffusers/pipelines/controlnet/pipeline_controlnet_union_sd_xl_img2img.py +207 -105
- diffusers/pipelines/controlnet/pipeline_flax_controlnet.py +1 -1
- diffusers/pipelines/controlnet_hunyuandit/pipeline_hunyuandit_controlnet.py +8 -8
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet.py +7 -7
- diffusers/pipelines/controlnet_sd3/pipeline_stable_diffusion_3_controlnet_inpainting.py +7 -7
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs.py +12 -10
- diffusers/pipelines/controlnet_xs/pipeline_controlnet_xs_sd_xl.py +9 -7
- diffusers/pipelines/cosmos/__init__.py +54 -0
- diffusers/pipelines/cosmos/pipeline_cosmos2_text2image.py +673 -0
- diffusers/pipelines/cosmos/pipeline_cosmos2_video2world.py +792 -0
- diffusers/pipelines/cosmos/pipeline_cosmos_text2world.py +664 -0
- diffusers/pipelines/cosmos/pipeline_cosmos_video2world.py +826 -0
- diffusers/pipelines/cosmos/pipeline_output.py +40 -0
- diffusers/pipelines/dance_diffusion/pipeline_dance_diffusion.py +5 -4
- diffusers/pipelines/ddim/pipeline_ddim.py +4 -4
- diffusers/pipelines/ddpm/pipeline_ddpm.py +1 -1
- diffusers/pipelines/deepfloyd_if/pipeline_if.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_img2img_superresolution.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_inpainting_superresolution.py +10 -10
- diffusers/pipelines/deepfloyd_if/pipeline_if_superresolution.py +10 -10
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion.py +8 -8
- diffusers/pipelines/deprecated/alt_diffusion/pipeline_alt_diffusion_img2img.py +5 -5
- diffusers/pipelines/deprecated/audio_diffusion/mel.py +1 -1
- diffusers/pipelines/deprecated/audio_diffusion/pipeline_audio_diffusion.py +3 -3
- diffusers/pipelines/deprecated/latent_diffusion_uncond/pipeline_latent_diffusion_uncond.py +1 -1
- diffusers/pipelines/deprecated/pndm/pipeline_pndm.py +2 -2
- diffusers/pipelines/deprecated/repaint/pipeline_repaint.py +4 -3
- diffusers/pipelines/deprecated/score_sde_ve/pipeline_score_sde_ve.py +1 -1
- diffusers/pipelines/deprecated/spectrogram_diffusion/continuous_encoder.py +1 -1
- diffusers/pipelines/deprecated/spectrogram_diffusion/midi_utils.py +1 -1
- diffusers/pipelines/deprecated/spectrogram_diffusion/notes_encoder.py +1 -1
- diffusers/pipelines/deprecated/spectrogram_diffusion/pipeline_spectrogram_diffusion.py +1 -1
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_cycle_diffusion.py +8 -8
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_onnx_stable_diffusion_inpaint_legacy.py +9 -9
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_inpaint_legacy.py +10 -10
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_model_editing.py +10 -8
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_paradigms.py +5 -5
- diffusers/pipelines/deprecated/stable_diffusion_variants/pipeline_stable_diffusion_pix2pix_zero.py +18 -18
- diffusers/pipelines/deprecated/stochastic_karras_ve/pipeline_stochastic_karras_ve.py +1 -1
- diffusers/pipelines/deprecated/versatile_diffusion/modeling_text_unet.py +2 -2
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion.py +6 -6
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_dual_guided.py +5 -5
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_image_variation.py +5 -5
- diffusers/pipelines/deprecated/versatile_diffusion/pipeline_versatile_diffusion_text_to_image.py +5 -5
- diffusers/pipelines/deprecated/vq_diffusion/pipeline_vq_diffusion.py +1 -1
- diffusers/pipelines/dit/pipeline_dit.py +4 -2
- diffusers/pipelines/easyanimate/pipeline_easyanimate.py +4 -4
- diffusers/pipelines/easyanimate/pipeline_easyanimate_control.py +4 -4
- diffusers/pipelines/easyanimate/pipeline_easyanimate_inpaint.py +7 -6
- diffusers/pipelines/flux/__init__.py +4 -0
- diffusers/pipelines/flux/modeling_flux.py +1 -1
- diffusers/pipelines/flux/pipeline_flux.py +37 -36
- diffusers/pipelines/flux/pipeline_flux_control.py +9 -9
- diffusers/pipelines/flux/pipeline_flux_control_img2img.py +7 -7
- diffusers/pipelines/flux/pipeline_flux_control_inpaint.py +7 -7
- diffusers/pipelines/flux/pipeline_flux_controlnet.py +7 -7
- diffusers/pipelines/flux/pipeline_flux_controlnet_image_to_image.py +31 -23
- diffusers/pipelines/flux/pipeline_flux_controlnet_inpainting.py +3 -2
- diffusers/pipelines/flux/pipeline_flux_fill.py +7 -7
- diffusers/pipelines/flux/pipeline_flux_img2img.py +40 -7
- diffusers/pipelines/flux/pipeline_flux_inpaint.py +12 -7
- diffusers/pipelines/flux/pipeline_flux_kontext.py +1134 -0
- diffusers/pipelines/flux/pipeline_flux_kontext_inpaint.py +1460 -0
- diffusers/pipelines/flux/pipeline_flux_prior_redux.py +2 -2
- diffusers/pipelines/flux/pipeline_output.py +6 -4
- diffusers/pipelines/free_init_utils.py +2 -2
- diffusers/pipelines/free_noise_utils.py +3 -3
- diffusers/pipelines/hidream_image/__init__.py +47 -0
- diffusers/pipelines/hidream_image/pipeline_hidream_image.py +1026 -0
- diffusers/pipelines/hidream_image/pipeline_output.py +35 -0
- diffusers/pipelines/hunyuan_video/__init__.py +2 -0
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_skyreels_image2video.py +8 -8
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video.py +26 -25
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_framepack.py +1114 -0
- diffusers/pipelines/hunyuan_video/pipeline_hunyuan_video_image2video.py +71 -15
- diffusers/pipelines/hunyuan_video/pipeline_output.py +19 -0
- diffusers/pipelines/hunyuandit/pipeline_hunyuandit.py +8 -8
- diffusers/pipelines/i2vgen_xl/pipeline_i2vgen_xl.py +10 -8
- diffusers/pipelines/kandinsky/pipeline_kandinsky.py +6 -6
- diffusers/pipelines/kandinsky/pipeline_kandinsky_combined.py +34 -34
- diffusers/pipelines/kandinsky/pipeline_kandinsky_img2img.py +19 -26
- diffusers/pipelines/kandinsky/pipeline_kandinsky_inpaint.py +7 -7
- diffusers/pipelines/kandinsky/pipeline_kandinsky_prior.py +11 -11
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.py +6 -6
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_combined.py +35 -35
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet.py +6 -6
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_controlnet_img2img.py +17 -39
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_img2img.py +17 -45
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_inpainting.py +7 -7
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior.py +10 -10
- diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2_prior_emb2emb.py +10 -10
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3.py +7 -7
- diffusers/pipelines/kandinsky3/pipeline_kandinsky3_img2img.py +17 -38
- diffusers/pipelines/kolors/pipeline_kolors.py +10 -10
- diffusers/pipelines/kolors/pipeline_kolors_img2img.py +12 -12
- diffusers/pipelines/kolors/text_encoder.py +3 -3
- diffusers/pipelines/kolors/tokenizer.py +1 -1
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_img2img.py +2 -2
- diffusers/pipelines/latent_consistency_models/pipeline_latent_consistency_text2img.py +2 -2
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py +1 -1
- diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion_superresolution.py +3 -3
- diffusers/pipelines/latte/pipeline_latte.py +12 -12
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion.py +13 -13
- diffusers/pipelines/ledits_pp/pipeline_leditspp_stable_diffusion_xl.py +17 -16
- diffusers/pipelines/ltx/__init__.py +4 -0
- diffusers/pipelines/ltx/modeling_latent_upsampler.py +188 -0
- diffusers/pipelines/ltx/pipeline_ltx.py +64 -18
- diffusers/pipelines/ltx/pipeline_ltx_condition.py +117 -38
- diffusers/pipelines/ltx/pipeline_ltx_image2video.py +63 -18
- diffusers/pipelines/ltx/pipeline_ltx_latent_upsample.py +277 -0
- diffusers/pipelines/lumina/pipeline_lumina.py +13 -13
- diffusers/pipelines/lumina2/pipeline_lumina2.py +10 -10
- diffusers/pipelines/marigold/marigold_image_processing.py +2 -2
- diffusers/pipelines/mochi/pipeline_mochi.py +15 -14
- diffusers/pipelines/musicldm/pipeline_musicldm.py +16 -13
- diffusers/pipelines/omnigen/pipeline_omnigen.py +13 -11
- diffusers/pipelines/omnigen/processor_omnigen.py +8 -3
- diffusers/pipelines/onnx_utils.py +15 -2
- diffusers/pipelines/pag/pag_utils.py +2 -2
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd.py +12 -8
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_inpaint.py +7 -7
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl.py +10 -6
- diffusers/pipelines/pag/pipeline_pag_controlnet_sd_xl_img2img.py +14 -14
- diffusers/pipelines/pag/pipeline_pag_hunyuandit.py +8 -8
- diffusers/pipelines/pag/pipeline_pag_kolors.py +10 -10
- diffusers/pipelines/pag/pipeline_pag_pixart_sigma.py +11 -11
- diffusers/pipelines/pag/pipeline_pag_sana.py +18 -12
- diffusers/pipelines/pag/pipeline_pag_sd.py +8 -8
- diffusers/pipelines/pag/pipeline_pag_sd_3.py +7 -7
- diffusers/pipelines/pag/pipeline_pag_sd_3_img2img.py +7 -7
- diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py +6 -6
- diffusers/pipelines/pag/pipeline_pag_sd_img2img.py +5 -5
- diffusers/pipelines/pag/pipeline_pag_sd_inpaint.py +8 -8
- diffusers/pipelines/pag/pipeline_pag_sd_xl.py +16 -15
- diffusers/pipelines/pag/pipeline_pag_sd_xl_img2img.py +18 -17
- diffusers/pipelines/pag/pipeline_pag_sd_xl_inpaint.py +12 -12
- diffusers/pipelines/paint_by_example/image_encoder.py +1 -1
- diffusers/pipelines/paint_by_example/pipeline_paint_by_example.py +8 -7
- diffusers/pipelines/pia/pipeline_pia.py +8 -6
- diffusers/pipelines/pipeline_flax_utils.py +5 -6
- diffusers/pipelines/pipeline_loading_utils.py +113 -15
- diffusers/pipelines/pipeline_utils.py +127 -48
- diffusers/pipelines/pixart_alpha/pipeline_pixart_alpha.py +14 -12
- diffusers/pipelines/pixart_alpha/pipeline_pixart_sigma.py +31 -11
- diffusers/pipelines/qwenimage/__init__.py +55 -0
- diffusers/pipelines/qwenimage/pipeline_output.py +21 -0
- diffusers/pipelines/qwenimage/pipeline_qwenimage.py +726 -0
- diffusers/pipelines/qwenimage/pipeline_qwenimage_edit.py +882 -0
- diffusers/pipelines/qwenimage/pipeline_qwenimage_img2img.py +829 -0
- diffusers/pipelines/qwenimage/pipeline_qwenimage_inpaint.py +1015 -0
- diffusers/pipelines/sana/__init__.py +4 -0
- diffusers/pipelines/sana/pipeline_sana.py +23 -21
- diffusers/pipelines/sana/pipeline_sana_controlnet.py +1106 -0
- diffusers/pipelines/sana/pipeline_sana_sprint.py +23 -19
- diffusers/pipelines/sana/pipeline_sana_sprint_img2img.py +981 -0
- diffusers/pipelines/semantic_stable_diffusion/pipeline_semantic_stable_diffusion.py +7 -6
- diffusers/pipelines/shap_e/camera.py +1 -1
- diffusers/pipelines/shap_e/pipeline_shap_e.py +1 -1
- diffusers/pipelines/shap_e/pipeline_shap_e_img2img.py +1 -1
- diffusers/pipelines/shap_e/renderer.py +3 -3
- diffusers/pipelines/skyreels_v2/__init__.py +59 -0
- diffusers/pipelines/skyreels_v2/pipeline_output.py +20 -0
- diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2.py +610 -0
- diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_diffusion_forcing.py +978 -0
- diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_diffusion_forcing_i2v.py +1059 -0
- diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_diffusion_forcing_v2v.py +1063 -0
- diffusers/pipelines/skyreels_v2/pipeline_skyreels_v2_i2v.py +745 -0
- diffusers/pipelines/stable_audio/modeling_stable_audio.py +1 -1
- diffusers/pipelines/stable_audio/pipeline_stable_audio.py +5 -5
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade.py +8 -8
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_combined.py +13 -13
- diffusers/pipelines/stable_cascade/pipeline_stable_cascade_prior.py +9 -9
- diffusers/pipelines/stable_diffusion/__init__.py +0 -7
- diffusers/pipelines/stable_diffusion/clip_image_project_model.py +1 -1
- diffusers/pipelines/stable_diffusion/convert_from_ckpt.py +11 -4
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_img2img.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_flax_stable_diffusion_inpaint.py +1 -1
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion.py +12 -11
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_img2img.py +10 -10
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_inpaint.py +11 -11
- diffusers/pipelines/stable_diffusion/pipeline_onnx_stable_diffusion_upscale.py +10 -10
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion.py +10 -9
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_depth2img.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_image_variation.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_img2img.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_inpaint.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_instruct_pix2pix.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_latent_upscale.py +4 -4
- diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py +5 -5
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip.py +7 -7
- diffusers/pipelines/stable_diffusion/pipeline_stable_unclip_img2img.py +5 -5
- diffusers/pipelines/stable_diffusion/safety_checker.py +1 -1
- diffusers/pipelines/stable_diffusion/safety_checker_flax.py +1 -1
- diffusers/pipelines/stable_diffusion/stable_unclip_image_normalizer.py +1 -1
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3.py +13 -12
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_img2img.py +7 -7
- diffusers/pipelines/stable_diffusion_3/pipeline_stable_diffusion_3_inpaint.py +7 -7
- diffusers/pipelines/stable_diffusion_attend_and_excite/pipeline_stable_diffusion_attend_and_excite.py +12 -8
- diffusers/pipelines/stable_diffusion_diffedit/pipeline_stable_diffusion_diffedit.py +15 -9
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen.py +11 -9
- diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +11 -9
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_k_diffusion.py +18 -12
- diffusers/pipelines/stable_diffusion_k_diffusion/pipeline_stable_diffusion_xl_k_diffusion.py +11 -8
- diffusers/pipelines/stable_diffusion_ldm3d/pipeline_stable_diffusion_ldm3d.py +11 -8
- diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py +15 -12
- diffusers/pipelines/stable_diffusion_safe/pipeline_stable_diffusion_safe.py +8 -6
- diffusers/pipelines/stable_diffusion_safe/safety_checker.py +1 -1
- diffusers/pipelines/stable_diffusion_sag/pipeline_stable_diffusion_sag.py +15 -11
- diffusers/pipelines/stable_diffusion_xl/pipeline_flax_stable_diffusion_xl.py +1 -1
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl.py +16 -15
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py +18 -17
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_inpaint.py +12 -12
- diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_instruct_pix2pix.py +16 -15
- diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py +3 -3
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_adapter.py +12 -12
- diffusers/pipelines/t2i_adapter/pipeline_stable_diffusion_xl_adapter.py +18 -17
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth.py +12 -7
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_synth_img2img.py +12 -7
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero.py +15 -13
- diffusers/pipelines/text_to_video_synthesis/pipeline_text_to_video_zero_sdxl.py +24 -21
- diffusers/pipelines/unclip/pipeline_unclip.py +4 -3
- diffusers/pipelines/unclip/pipeline_unclip_image_variation.py +4 -3
- diffusers/pipelines/unclip/text_proj.py +2 -2
- diffusers/pipelines/unidiffuser/modeling_text_decoder.py +2 -2
- diffusers/pipelines/unidiffuser/modeling_uvit.py +1 -1
- diffusers/pipelines/unidiffuser/pipeline_unidiffuser.py +8 -7
- diffusers/pipelines/visualcloze/__init__.py +52 -0
- diffusers/pipelines/visualcloze/pipeline_visualcloze_combined.py +444 -0
- diffusers/pipelines/visualcloze/pipeline_visualcloze_generation.py +952 -0
- diffusers/pipelines/visualcloze/visualcloze_utils.py +251 -0
- diffusers/pipelines/wan/__init__.py +2 -0
- diffusers/pipelines/wan/pipeline_wan.py +91 -30
- diffusers/pipelines/wan/pipeline_wan_i2v.py +145 -45
- diffusers/pipelines/wan/pipeline_wan_vace.py +975 -0
- diffusers/pipelines/wan/pipeline_wan_video2video.py +14 -16
- diffusers/pipelines/wuerstchen/modeling_paella_vq_model.py +1 -1
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_diffnext.py +1 -1
- diffusers/pipelines/wuerstchen/modeling_wuerstchen_prior.py +1 -1
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen.py +8 -8
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_combined.py +16 -15
- diffusers/pipelines/wuerstchen/pipeline_wuerstchen_prior.py +6 -6
- diffusers/quantizers/__init__.py +3 -1
- diffusers/quantizers/base.py +17 -1
- diffusers/quantizers/bitsandbytes/bnb_quantizer.py +4 -0
- diffusers/quantizers/bitsandbytes/utils.py +10 -7
- diffusers/quantizers/gguf/gguf_quantizer.py +13 -4
- diffusers/quantizers/gguf/utils.py +108 -16
- diffusers/quantizers/pipe_quant_config.py +202 -0
- diffusers/quantizers/quantization_config.py +18 -16
- diffusers/quantizers/quanto/quanto_quantizer.py +4 -0
- diffusers/quantizers/torchao/torchao_quantizer.py +31 -1
- diffusers/schedulers/__init__.py +3 -1
- diffusers/schedulers/deprecated/scheduling_karras_ve.py +4 -3
- diffusers/schedulers/deprecated/scheduling_sde_vp.py +1 -1
- diffusers/schedulers/scheduling_consistency_models.py +1 -1
- diffusers/schedulers/scheduling_cosine_dpmsolver_multistep.py +10 -5
- diffusers/schedulers/scheduling_ddim.py +8 -8
- diffusers/schedulers/scheduling_ddim_cogvideox.py +5 -5
- diffusers/schedulers/scheduling_ddim_flax.py +6 -6
- diffusers/schedulers/scheduling_ddim_inverse.py +6 -6
- diffusers/schedulers/scheduling_ddim_parallel.py +22 -22
- diffusers/schedulers/scheduling_ddpm.py +9 -9
- diffusers/schedulers/scheduling_ddpm_flax.py +7 -7
- diffusers/schedulers/scheduling_ddpm_parallel.py +18 -18
- diffusers/schedulers/scheduling_ddpm_wuerstchen.py +2 -2
- diffusers/schedulers/scheduling_deis_multistep.py +16 -9
- diffusers/schedulers/scheduling_dpm_cogvideox.py +5 -5
- diffusers/schedulers/scheduling_dpmsolver_multistep.py +18 -12
- diffusers/schedulers/scheduling_dpmsolver_multistep_flax.py +22 -20
- diffusers/schedulers/scheduling_dpmsolver_multistep_inverse.py +11 -11
- diffusers/schedulers/scheduling_dpmsolver_sde.py +2 -2
- diffusers/schedulers/scheduling_dpmsolver_singlestep.py +19 -13
- diffusers/schedulers/scheduling_edm_dpmsolver_multistep.py +13 -8
- diffusers/schedulers/scheduling_edm_euler.py +20 -11
- diffusers/schedulers/scheduling_euler_ancestral_discrete.py +3 -3
- diffusers/schedulers/scheduling_euler_discrete.py +3 -3
- diffusers/schedulers/scheduling_euler_discrete_flax.py +3 -3
- diffusers/schedulers/scheduling_flow_match_euler_discrete.py +20 -5
- diffusers/schedulers/scheduling_flow_match_heun_discrete.py +1 -1
- diffusers/schedulers/scheduling_flow_match_lcm.py +561 -0
- diffusers/schedulers/scheduling_heun_discrete.py +2 -2
- diffusers/schedulers/scheduling_ipndm.py +2 -2
- diffusers/schedulers/scheduling_k_dpm_2_ancestral_discrete.py +2 -2
- diffusers/schedulers/scheduling_k_dpm_2_discrete.py +2 -2
- diffusers/schedulers/scheduling_karras_ve_flax.py +5 -5
- diffusers/schedulers/scheduling_lcm.py +3 -3
- diffusers/schedulers/scheduling_lms_discrete.py +2 -2
- diffusers/schedulers/scheduling_lms_discrete_flax.py +1 -1
- diffusers/schedulers/scheduling_pndm.py +4 -4
- diffusers/schedulers/scheduling_pndm_flax.py +4 -4
- diffusers/schedulers/scheduling_repaint.py +9 -9
- diffusers/schedulers/scheduling_sasolver.py +15 -15
- diffusers/schedulers/scheduling_scm.py +1 -2
- diffusers/schedulers/scheduling_sde_ve.py +1 -1
- diffusers/schedulers/scheduling_sde_ve_flax.py +2 -2
- diffusers/schedulers/scheduling_tcd.py +3 -3
- diffusers/schedulers/scheduling_unclip.py +5 -5
- diffusers/schedulers/scheduling_unipc_multistep.py +21 -12
- diffusers/schedulers/scheduling_utils.py +3 -3
- diffusers/schedulers/scheduling_utils_flax.py +2 -2
- diffusers/schedulers/scheduling_vq_diffusion.py +1 -1
- diffusers/training_utils.py +91 -5
- diffusers/utils/__init__.py +15 -0
- diffusers/utils/accelerate_utils.py +1 -1
- diffusers/utils/constants.py +4 -0
- diffusers/utils/doc_utils.py +1 -1
- diffusers/utils/dummy_pt_objects.py +432 -0
- diffusers/utils/dummy_torch_and_transformers_objects.py +480 -0
- diffusers/utils/dynamic_modules_utils.py +85 -8
- diffusers/utils/export_utils.py +1 -1
- diffusers/utils/hub_utils.py +33 -17
- diffusers/utils/import_utils.py +151 -18
- diffusers/utils/logging.py +1 -1
- diffusers/utils/outputs.py +2 -1
- diffusers/utils/peft_utils.py +96 -10
- diffusers/utils/state_dict_utils.py +20 -3
- diffusers/utils/testing_utils.py +195 -17
- diffusers/utils/torch_utils.py +43 -5
- diffusers/video_processor.py +2 -2
- {diffusers-0.33.1.dist-info → diffusers-0.35.0.dist-info}/METADATA +72 -57
- diffusers-0.35.0.dist-info/RECORD +703 -0
- {diffusers-0.33.1.dist-info → diffusers-0.35.0.dist-info}/WHEEL +1 -1
- diffusers-0.33.1.dist-info/RECORD +0 -608
- {diffusers-0.33.1.dist-info → diffusers-0.35.0.dist-info}/LICENSE +0 -0
- {diffusers-0.33.1.dist-info → diffusers-0.35.0.dist-info}/entry_points.txt +0 -0
- {diffusers-0.33.1.dist-info → diffusers-0.35.0.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -140,7 +140,7 @@ class AnimateDiffSparseControlNetPipeline(
|
|
140
140
|
):
|
141
141
|
r"""
|
142
142
|
Pipeline for controlled text-to-video generation using the method described in [SparseCtrl: Adding Sparse Controls
|
143
|
-
to Text-to-Video Diffusion Models](https://
|
143
|
+
to Text-to-Video Diffusion Models](https://huggingface.co/papers/2311.16933).
|
144
144
|
|
145
145
|
This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods
|
146
146
|
implemented for all pipelines (downloading, saving, running on a particular device, etc.).
|
@@ -475,7 +475,7 @@ class AnimateDiffSparseControlNetPipeline(
|
|
475
475
|
def prepare_extra_step_kwargs(self, generator, eta):
|
476
476
|
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
477
477
|
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
478
|
-
# eta corresponds to η in DDIM paper: https://
|
478
|
+
# eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
|
479
479
|
# and should be between [0, 1]
|
480
480
|
|
481
481
|
accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
|
@@ -695,7 +695,7 @@ class AnimateDiffSparseControlNetPipeline(
|
|
695
695
|
return self._clip_skip
|
696
696
|
|
697
697
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
698
|
-
# of the Imagen paper: https://
|
698
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
699
699
|
# corresponds to doing no classifier free guidance.
|
700
700
|
@property
|
701
701
|
def do_classifier_free_guidance(self):
|
@@ -762,8 +762,8 @@ class AnimateDiffSparseControlNetPipeline(
|
|
762
762
|
The prompt or prompts to guide what to not include in image generation. If not defined, you need to
|
763
763
|
pass `negative_prompt_embeds` instead. Ignored when not using guidance (`guidance_scale < 1`).
|
764
764
|
eta (`float`, *optional*, defaults to 0.0):
|
765
|
-
Corresponds to parameter eta (η) from the [DDIM](https://
|
766
|
-
to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
|
765
|
+
Corresponds to parameter eta (η) from the [DDIM](https://huggingface.co/papers/2010.02502) paper. Only
|
766
|
+
applies to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
|
767
767
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
768
768
|
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
|
769
769
|
generation deterministic.
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -539,7 +539,7 @@ class AnimateDiffVideoToVideoPipeline(
|
|
539
539
|
def prepare_extra_step_kwargs(self, generator, eta):
|
540
540
|
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
541
541
|
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
542
|
-
# eta corresponds to η in DDIM paper: https://
|
542
|
+
# eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
|
543
543
|
# and should be between [0, 1]
|
544
544
|
|
545
545
|
accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
|
@@ -725,7 +725,7 @@ class AnimateDiffVideoToVideoPipeline(
|
|
725
725
|
return self._clip_skip
|
726
726
|
|
727
727
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
728
|
-
# of the Imagen paper: https://
|
728
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
729
729
|
# corresponds to doing no classifier free guidance.
|
730
730
|
@property
|
731
731
|
def do_classifier_free_guidance(self):
|
@@ -805,8 +805,8 @@ class AnimateDiffVideoToVideoPipeline(
|
|
805
805
|
The prompt or prompts to guide what to not include in image generation. If not defined, you need to
|
806
806
|
pass `negative_prompt_embeds` instead. Ignored when not using guidance (`guidance_scale < 1`).
|
807
807
|
eta (`float`, *optional*, defaults to 0.0):
|
808
|
-
Corresponds to parameter eta (η) from the [DDIM](https://
|
809
|
-
to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
|
808
|
+
Corresponds to parameter eta (η) from the [DDIM](https://huggingface.co/papers/2010.02502) paper. Only
|
809
|
+
applies to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
|
810
810
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
811
811
|
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
|
812
812
|
generation deterministic.
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -571,7 +571,7 @@ class AnimateDiffVideoToVideoControlNetPipeline(
|
|
571
571
|
def prepare_extra_step_kwargs(self, generator, eta):
|
572
572
|
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
573
573
|
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
574
|
-
# eta corresponds to η in DDIM paper: https://
|
574
|
+
# eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
|
575
575
|
# and should be between [0, 1]
|
576
576
|
|
577
577
|
accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
|
@@ -890,7 +890,7 @@ class AnimateDiffVideoToVideoControlNetPipeline(
|
|
890
890
|
return self._clip_skip
|
891
891
|
|
892
892
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
893
|
-
# of the Imagen paper: https://
|
893
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
894
894
|
# corresponds to doing no classifier free guidance.
|
895
895
|
@property
|
896
896
|
def do_classifier_free_guidance(self):
|
@@ -975,8 +975,8 @@ class AnimateDiffVideoToVideoControlNetPipeline(
|
|
975
975
|
The prompt or prompts to guide what to not include in image generation. If not defined, you need to
|
976
976
|
pass `negative_prompt_embeds` instead. Ignored when not using guidance (`guidance_scale < 1`).
|
977
977
|
eta (`float`, *optional*, defaults to 0.0):
|
978
|
-
Corresponds to parameter eta (η) from the [DDIM](https://
|
979
|
-
to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
|
978
|
+
Corresponds to parameter eta (η) from the [DDIM](https://huggingface.co/papers/2010.02502) paper. Only
|
979
|
+
applies to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
|
980
980
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
981
981
|
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
|
982
982
|
generation deterministic.
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -24,7 +24,7 @@ from ...models import AutoencoderKL, UNet2DConditionModel
|
|
24
24
|
from ...schedulers import KarrasDiffusionSchedulers
|
25
25
|
from ...utils import is_torch_xla_available, logging, replace_example_docstring
|
26
26
|
from ...utils.torch_utils import randn_tensor
|
27
|
-
from ..pipeline_utils import AudioPipelineOutput, DiffusionPipeline, StableDiffusionMixin
|
27
|
+
from ..pipeline_utils import AudioPipelineOutput, DeprecatedPipelineMixin, DiffusionPipeline, StableDiffusionMixin
|
28
28
|
|
29
29
|
|
30
30
|
if is_torch_xla_available():
|
@@ -57,7 +57,7 @@ EXAMPLE_DOC_STRING = """
|
|
57
57
|
"""
|
58
58
|
|
59
59
|
|
60
|
-
class AudioLDMPipeline(DiffusionPipeline, StableDiffusionMixin):
|
60
|
+
class AudioLDMPipeline(DeprecatedPipelineMixin, DiffusionPipeline, StableDiffusionMixin):
|
61
61
|
r"""
|
62
62
|
Pipeline for text-to-audio generation using AudioLDM.
|
63
63
|
|
@@ -81,6 +81,7 @@ class AudioLDMPipeline(DiffusionPipeline, StableDiffusionMixin):
|
|
81
81
|
Vocoder of class `SpeechT5HifiGan`.
|
82
82
|
"""
|
83
83
|
|
84
|
+
_last_supported_version = "0.33.1"
|
84
85
|
model_cpu_offload_seq = "text_encoder->unet->vae"
|
85
86
|
|
86
87
|
def __init__(
|
@@ -261,7 +262,7 @@ class AudioLDMPipeline(DiffusionPipeline, StableDiffusionMixin):
|
|
261
262
|
def prepare_extra_step_kwargs(self, generator, eta):
|
262
263
|
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
263
264
|
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
264
|
-
# eta corresponds to η in DDIM paper: https://
|
265
|
+
# eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
|
265
266
|
# and should be between [0, 1]
|
266
267
|
|
267
268
|
accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
|
@@ -397,8 +398,8 @@ class AudioLDMPipeline(DiffusionPipeline, StableDiffusionMixin):
|
|
397
398
|
num_waveforms_per_prompt (`int`, *optional*, defaults to 1):
|
398
399
|
The number of waveforms to generate per prompt.
|
399
400
|
eta (`float`, *optional*, defaults to 0.0):
|
400
|
-
Corresponds to parameter eta (η) from the [DDIM](https://
|
401
|
-
to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
|
401
|
+
Corresponds to parameter eta (η) from the [DDIM](https://huggingface.co/papers/2010.02502) paper. Only
|
402
|
+
applies to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
|
402
403
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
403
404
|
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
|
404
405
|
generation deterministic.
|
@@ -472,7 +473,7 @@ class AudioLDMPipeline(DiffusionPipeline, StableDiffusionMixin):
|
|
472
473
|
|
473
474
|
device = self._execution_device
|
474
475
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
475
|
-
# of the Imagen paper: https://
|
476
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
476
477
|
# corresponds to doing no classifier free guidance.
|
477
478
|
do_classifier_free_guidance = guidance_scale > 1.0
|
478
479
|
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 CVSSP, ByteDance and The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -40,7 +40,8 @@ from ...utils import (
|
|
40
40
|
logging,
|
41
41
|
replace_example_docstring,
|
42
42
|
)
|
43
|
-
from ...utils.
|
43
|
+
from ...utils.import_utils import is_transformers_version
|
44
|
+
from ...utils.torch_utils import empty_device_cache, randn_tensor
|
44
45
|
from ..pipeline_utils import AudioPipelineOutput, DiffusionPipeline
|
45
46
|
from .modeling_audioldm2 import AudioLDM2ProjectionModel, AudioLDM2UNet2DConditionModel
|
46
47
|
|
@@ -266,9 +267,7 @@ class AudioLDM2Pipeline(DiffusionPipeline):
|
|
266
267
|
|
267
268
|
if self.device.type != "cpu":
|
268
269
|
self.to("cpu", silence_dtype_warnings=True)
|
269
|
-
|
270
|
-
if hasattr(device_mod, "empty_cache") and device_mod.is_available():
|
271
|
-
device_mod.empty_cache() # otherwise we don't see the memory savings (but they probably exist)
|
270
|
+
empty_device_cache(device.type)
|
272
271
|
|
273
272
|
model_sequence = [
|
274
273
|
self.text_encoder.text_model,
|
@@ -312,8 +311,18 @@ class AudioLDM2Pipeline(DiffusionPipeline):
|
|
312
311
|
`inputs_embeds (`torch.Tensor` of shape `(batch_size, sequence_length, hidden_size)`):
|
313
312
|
The sequence of generated hidden-states.
|
314
313
|
"""
|
314
|
+
cache_position_kwargs = {}
|
315
|
+
if is_transformers_version("<", "4.52.1"):
|
316
|
+
cache_position_kwargs["input_ids"] = inputs_embeds
|
317
|
+
else:
|
318
|
+
cache_position_kwargs["seq_length"] = inputs_embeds.shape[0]
|
319
|
+
cache_position_kwargs["device"] = (
|
320
|
+
self.language_model.device if getattr(self, "language_model", None) is not None else self.device
|
321
|
+
)
|
322
|
+
cache_position_kwargs["model_kwargs"] = model_kwargs
|
315
323
|
max_new_tokens = max_new_tokens if max_new_tokens is not None else self.language_model.config.max_new_tokens
|
316
|
-
model_kwargs = self.language_model._get_initial_cache_position(
|
324
|
+
model_kwargs = self.language_model._get_initial_cache_position(**cache_position_kwargs)
|
325
|
+
|
317
326
|
for _ in range(max_new_tokens):
|
318
327
|
# prepare model inputs
|
319
328
|
model_inputs = prepare_inputs_for_generation(inputs_embeds, **model_kwargs)
|
@@ -373,7 +382,7 @@ class AudioLDM2Pipeline(DiffusionPipeline):
|
|
373
382
|
*e.g.* prompt weighting. If not provided, negative_prompt_embeds will be computed from
|
374
383
|
`negative_prompt` input argument.
|
375
384
|
generated_prompt_embeds (`torch.Tensor`, *optional*):
|
376
|
-
Pre-generated text embeddings from the GPT2
|
385
|
+
Pre-generated text embeddings from the GPT2 language model. Can be used to easily tweak text inputs,
|
377
386
|
*e.g.* prompt weighting. If not provided, text embeddings will be generated from `prompt` input
|
378
387
|
argument.
|
379
388
|
negative_generated_prompt_embeds (`torch.Tensor`, *optional*):
|
@@ -394,7 +403,7 @@ class AudioLDM2Pipeline(DiffusionPipeline):
|
|
394
403
|
attention_mask (`torch.LongTensor`):
|
395
404
|
Attention mask to be applied to the `prompt_embeds`.
|
396
405
|
generated_prompt_embeds (`torch.Tensor`):
|
397
|
-
Text embeddings generated from the GPT2
|
406
|
+
Text embeddings generated from the GPT2 language model.
|
398
407
|
|
399
408
|
Example:
|
400
409
|
|
@@ -701,7 +710,7 @@ class AudioLDM2Pipeline(DiffusionPipeline):
|
|
701
710
|
def prepare_extra_step_kwargs(self, generator, eta):
|
702
711
|
# prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
|
703
712
|
# eta (η) is only used with the DDIMScheduler, it will be ignored for other schedulers.
|
704
|
-
# eta corresponds to η in DDIM paper: https://
|
713
|
+
# eta corresponds to η in DDIM paper: https://huggingface.co/papers/2010.02502
|
705
714
|
# and should be between [0, 1]
|
706
715
|
|
707
716
|
accepts_eta = "eta" in set(inspect.signature(self.scheduler.step).parameters.keys())
|
@@ -888,8 +897,8 @@ class AudioLDM2Pipeline(DiffusionPipeline):
|
|
888
897
|
generated waveforms based on their cosine similarity with the text input in the joint text-audio
|
889
898
|
embedding space.
|
890
899
|
eta (`float`, *optional*, defaults to 0.0):
|
891
|
-
Corresponds to parameter eta (η) from the [DDIM](https://
|
892
|
-
to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
|
900
|
+
Corresponds to parameter eta (η) from the [DDIM](https://huggingface.co/papers/2010.02502) paper. Only
|
901
|
+
applies to the [`~schedulers.DDIMScheduler`], and is ignored in other schedulers.
|
893
902
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
894
903
|
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
|
895
904
|
generation deterministic.
|
@@ -904,7 +913,7 @@ class AudioLDM2Pipeline(DiffusionPipeline):
|
|
904
913
|
Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If
|
905
914
|
not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument.
|
906
915
|
generated_prompt_embeds (`torch.Tensor`, *optional*):
|
907
|
-
Pre-generated text embeddings from the GPT2
|
916
|
+
Pre-generated text embeddings from the GPT2 language model. Can be used to easily tweak text inputs,
|
908
917
|
*e.g.* prompt weighting. If not provided, text embeddings will be generated from `prompt` input
|
909
918
|
argument.
|
910
919
|
negative_generated_prompt_embeds (`torch.Tensor`, *optional*):
|
@@ -987,7 +996,7 @@ class AudioLDM2Pipeline(DiffusionPipeline):
|
|
987
996
|
|
988
997
|
device = self._execution_device
|
989
998
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
990
|
-
# of the Imagen paper: https://
|
999
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
991
1000
|
# corresponds to doing no classifier free guidance.
|
992
1001
|
do_classifier_free_guidance = guidance_scale > 1.0
|
993
1002
|
|
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright
|
1
|
+
# Copyright 2025 AuraFlow Authors and The HuggingFace Team. All rights reserved.
|
2
2
|
#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
@@ -12,17 +12,25 @@
|
|
12
12
|
# See the License for the specific language governing permissions and
|
13
13
|
# limitations under the License.
|
14
14
|
import inspect
|
15
|
-
from typing import Callable, Dict, List, Optional, Tuple, Union
|
15
|
+
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
|
16
16
|
|
17
17
|
import torch
|
18
18
|
from transformers import T5Tokenizer, UMT5EncoderModel
|
19
19
|
|
20
20
|
from ...callbacks import MultiPipelineCallbacks, PipelineCallback
|
21
21
|
from ...image_processor import VaeImageProcessor
|
22
|
+
from ...loaders import AuraFlowLoraLoaderMixin
|
22
23
|
from ...models import AuraFlowTransformer2DModel, AutoencoderKL
|
23
24
|
from ...models.attention_processor import AttnProcessor2_0, FusedAttnProcessor2_0, XFormersAttnProcessor
|
24
25
|
from ...schedulers import FlowMatchEulerDiscreteScheduler
|
25
|
-
from ...utils import
|
26
|
+
from ...utils import (
|
27
|
+
USE_PEFT_BACKEND,
|
28
|
+
is_torch_xla_available,
|
29
|
+
logging,
|
30
|
+
replace_example_docstring,
|
31
|
+
scale_lora_layers,
|
32
|
+
unscale_lora_layers,
|
33
|
+
)
|
26
34
|
from ...utils.torch_utils import randn_tensor
|
27
35
|
from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
|
28
36
|
|
@@ -112,7 +120,7 @@ def retrieve_timesteps(
|
|
112
120
|
return timesteps, num_inference_steps
|
113
121
|
|
114
122
|
|
115
|
-
class AuraFlowPipeline(DiffusionPipeline):
|
123
|
+
class AuraFlowPipeline(DiffusionPipeline, AuraFlowLoraLoaderMixin):
|
116
124
|
r"""
|
117
125
|
Args:
|
118
126
|
tokenizer (`T5TokenizerFast`):
|
@@ -233,6 +241,7 @@ class AuraFlowPipeline(DiffusionPipeline):
|
|
233
241
|
prompt_attention_mask: Optional[torch.Tensor] = None,
|
234
242
|
negative_prompt_attention_mask: Optional[torch.Tensor] = None,
|
235
243
|
max_sequence_length: int = 256,
|
244
|
+
lora_scale: Optional[float] = None,
|
236
245
|
):
|
237
246
|
r"""
|
238
247
|
Encodes the prompt into text encoder hidden states.
|
@@ -259,10 +268,20 @@ class AuraFlowPipeline(DiffusionPipeline):
|
|
259
268
|
negative_prompt_attention_mask (`torch.Tensor`, *optional*):
|
260
269
|
Pre-generated attention mask for negative text embeddings.
|
261
270
|
max_sequence_length (`int`, defaults to 256): Maximum sequence length to use for the prompt.
|
271
|
+
lora_scale (`float`, *optional*):
|
272
|
+
A lora scale that will be applied to all LoRA layers of the text encoder if LoRA layers are loaded.
|
262
273
|
"""
|
274
|
+
# set lora scale so that monkey patched LoRA
|
275
|
+
# function of text encoder can correctly access it
|
276
|
+
if lora_scale is not None and isinstance(self, AuraFlowLoraLoaderMixin):
|
277
|
+
self._lora_scale = lora_scale
|
278
|
+
|
279
|
+
# dynamically adjust the LoRA scale
|
280
|
+
if self.text_encoder is not None and USE_PEFT_BACKEND:
|
281
|
+
scale_lora_layers(self.text_encoder, lora_scale)
|
282
|
+
|
263
283
|
if device is None:
|
264
284
|
device = self._execution_device
|
265
|
-
|
266
285
|
if prompt is not None and isinstance(prompt, str):
|
267
286
|
batch_size = 1
|
268
287
|
elif prompt is not None and isinstance(prompt, list):
|
@@ -346,6 +365,11 @@ class AuraFlowPipeline(DiffusionPipeline):
|
|
346
365
|
negative_prompt_embeds = None
|
347
366
|
negative_prompt_attention_mask = None
|
348
367
|
|
368
|
+
if self.text_encoder is not None:
|
369
|
+
if isinstance(self, AuraFlowLoraLoaderMixin) and USE_PEFT_BACKEND:
|
370
|
+
# Retrieve the original scale by scaling back the LoRA layers
|
371
|
+
unscale_lora_layers(self.text_encoder, lora_scale)
|
372
|
+
|
349
373
|
return prompt_embeds, prompt_attention_mask, negative_prompt_embeds, negative_prompt_attention_mask
|
350
374
|
|
351
375
|
# Copied from diffusers.pipelines.stable_diffusion_3.pipeline_stable_diffusion_3.StableDiffusion3Pipeline.prepare_latents
|
@@ -403,6 +427,10 @@ class AuraFlowPipeline(DiffusionPipeline):
|
|
403
427
|
def guidance_scale(self):
|
404
428
|
return self._guidance_scale
|
405
429
|
|
430
|
+
@property
|
431
|
+
def attention_kwargs(self):
|
432
|
+
return self._attention_kwargs
|
433
|
+
|
406
434
|
@property
|
407
435
|
def num_timesteps(self):
|
408
436
|
return self._num_timesteps
|
@@ -428,6 +456,7 @@ class AuraFlowPipeline(DiffusionPipeline):
|
|
428
456
|
max_sequence_length: int = 256,
|
429
457
|
output_type: Optional[str] = "pil",
|
430
458
|
return_dict: bool = True,
|
459
|
+
attention_kwargs: Optional[Dict[str, Any]] = None,
|
431
460
|
callback_on_step_end: Optional[
|
432
461
|
Union[Callable[[int, int, Dict], None], PipelineCallback, MultiPipelineCallbacks]
|
433
462
|
] = None,
|
@@ -455,11 +484,11 @@ class AuraFlowPipeline(DiffusionPipeline):
|
|
455
484
|
Custom sigmas used to override the timestep spacing strategy of the scheduler. If `sigmas` is passed,
|
456
485
|
`num_inference_steps` and `timesteps` must be `None`.
|
457
486
|
guidance_scale (`float`, *optional*, defaults to 5.0):
|
458
|
-
Guidance scale as defined in [Classifier-Free Diffusion
|
459
|
-
`guidance_scale` is defined as `w` of equation 2.
|
460
|
-
Paper](https://
|
461
|
-
1`. Higher guidance scale encourages to generate images that are closely linked to
|
462
|
-
usually at the expense of lower image quality.
|
487
|
+
Guidance scale as defined in [Classifier-Free Diffusion
|
488
|
+
Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
|
489
|
+
of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
|
490
|
+
`guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
|
491
|
+
the text `prompt`, usually at the expense of lower image quality.
|
463
492
|
num_images_per_prompt (`int`, *optional*, defaults to 1):
|
464
493
|
The number of images to generate per prompt.
|
465
494
|
generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
|
@@ -486,6 +515,10 @@ class AuraFlowPipeline(DiffusionPipeline):
|
|
486
515
|
return_dict (`bool`, *optional*, defaults to `True`):
|
487
516
|
Whether or not to return a [`~pipelines.stable_diffusion_xl.StableDiffusionXLPipelineOutput`] instead
|
488
517
|
of a plain tuple.
|
518
|
+
attention_kwargs (`dict`, *optional*):
|
519
|
+
A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
|
520
|
+
`self.processor` in
|
521
|
+
[diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
|
489
522
|
callback_on_step_end (`Callable`, *optional*):
|
490
523
|
A function that calls at the end of each denoising steps during the inference. The function is called
|
491
524
|
with the following arguments: `callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int,
|
@@ -520,6 +553,7 @@ class AuraFlowPipeline(DiffusionPipeline):
|
|
520
553
|
)
|
521
554
|
|
522
555
|
self._guidance_scale = guidance_scale
|
556
|
+
self._attention_kwargs = attention_kwargs
|
523
557
|
|
524
558
|
# 2. Determine batch size.
|
525
559
|
if prompt is not None and isinstance(prompt, str):
|
@@ -530,9 +564,10 @@ class AuraFlowPipeline(DiffusionPipeline):
|
|
530
564
|
batch_size = prompt_embeds.shape[0]
|
531
565
|
|
532
566
|
device = self._execution_device
|
567
|
+
lora_scale = self.attention_kwargs.get("scale", None) if self.attention_kwargs is not None else None
|
533
568
|
|
534
569
|
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
|
535
|
-
# of the Imagen paper: https://
|
570
|
+
# of the Imagen paper: https://huggingface.co/papers/2205.11487 . `guidance_scale = 1`
|
536
571
|
# corresponds to doing no classifier free guidance.
|
537
572
|
do_classifier_free_guidance = guidance_scale > 1.0
|
538
573
|
|
@@ -553,6 +588,7 @@ class AuraFlowPipeline(DiffusionPipeline):
|
|
553
588
|
prompt_attention_mask=prompt_attention_mask,
|
554
589
|
negative_prompt_attention_mask=negative_prompt_attention_mask,
|
555
590
|
max_sequence_length=max_sequence_length,
|
591
|
+
lora_scale=lora_scale,
|
556
592
|
)
|
557
593
|
if do_classifier_free_guidance:
|
558
594
|
prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds], dim=0)
|
@@ -594,6 +630,7 @@ class AuraFlowPipeline(DiffusionPipeline):
|
|
594
630
|
encoder_hidden_states=prompt_embeds,
|
595
631
|
timestep=timestep,
|
596
632
|
return_dict=False,
|
633
|
+
attention_kwargs=self.attention_kwargs,
|
597
634
|
)[0]
|
598
635
|
|
599
636
|
# perform guidance
|
@@ -21,6 +21,7 @@ from ..configuration_utils import ConfigMixin
|
|
21
21
|
from ..models.controlnets import ControlNetUnionModel
|
22
22
|
from ..utils import is_sentencepiece_available
|
23
23
|
from .aura_flow import AuraFlowPipeline
|
24
|
+
from .chroma import ChromaPipeline
|
24
25
|
from .cogview3 import CogView3PlusPipeline
|
25
26
|
from .cogview4 import CogView4ControlPipeline, CogView4Pipeline
|
26
27
|
from .controlnet import (
|
@@ -48,6 +49,7 @@ from .flux import (
|
|
48
49
|
FluxControlPipeline,
|
49
50
|
FluxImg2ImgPipeline,
|
50
51
|
FluxInpaintPipeline,
|
52
|
+
FluxKontextPipeline,
|
51
53
|
FluxPipeline,
|
52
54
|
)
|
53
55
|
from .hunyuandit import HunyuanDiTPipeline
|
@@ -141,8 +143,10 @@ AUTO_TEXT2IMAGE_PIPELINES_MAPPING = OrderedDict(
|
|
141
143
|
("flux", FluxPipeline),
|
142
144
|
("flux-control", FluxControlPipeline),
|
143
145
|
("flux-controlnet", FluxControlNetPipeline),
|
146
|
+
("flux-kontext", FluxKontextPipeline),
|
144
147
|
("lumina", LuminaPipeline),
|
145
148
|
("lumina2", Lumina2Pipeline),
|
149
|
+
("chroma", ChromaPipeline),
|
146
150
|
("cogview3", CogView3PlusPipeline),
|
147
151
|
("cogview4", CogView4Pipeline),
|
148
152
|
("cogview4-control", CogView4ControlPipeline),
|
@@ -169,6 +173,7 @@ AUTO_IMAGE2IMAGE_PIPELINES_MAPPING = OrderedDict(
|
|
169
173
|
("flux", FluxImg2ImgPipeline),
|
170
174
|
("flux-controlnet", FluxControlNetImg2ImgPipeline),
|
171
175
|
("flux-control", FluxControlImg2ImgPipeline),
|
176
|
+
("flux-kontext", FluxKontextPipeline),
|
172
177
|
]
|
173
178
|
)
|
174
179
|
|
@@ -246,14 +251,15 @@ def _get_connected_pipeline(pipeline_cls):
|
|
246
251
|
return _get_task_class(AUTO_INPAINT_PIPELINES_MAPPING, pipeline_cls.__name__, throw_error_if_not_exist=False)
|
247
252
|
|
248
253
|
|
249
|
-
def
|
250
|
-
|
251
|
-
for
|
252
|
-
|
253
|
-
|
254
|
-
|
254
|
+
def _get_model(pipeline_class_name):
|
255
|
+
for task_mapping in SUPPORTED_TASKS_MAPPINGS:
|
256
|
+
for model_name, pipeline in task_mapping.items():
|
257
|
+
if pipeline.__name__ == pipeline_class_name:
|
258
|
+
return model_name
|
259
|
+
|
255
260
|
|
256
|
-
|
261
|
+
def _get_task_class(mapping, pipeline_class_name, throw_error_if_not_exist: bool = True):
|
262
|
+
model_name = _get_model(pipeline_class_name)
|
257
263
|
|
258
264
|
if model_name is not None:
|
259
265
|
task_class = mapping.get(model_name, None)
|
@@ -322,9 +328,8 @@ class AutoPipelineForText2Image(ConfigMixin):
|
|
322
328
|
- A path to a *directory* (for example `./my_pipeline_directory/`) containing pipeline weights
|
323
329
|
saved using
|
324
330
|
[`~DiffusionPipeline.save_pretrained`].
|
325
|
-
torch_dtype (`
|
326
|
-
Override the default `torch.dtype` and load the model with another dtype.
|
327
|
-
dtype is automatically derived from the model's weights.
|
331
|
+
torch_dtype (`torch.dtype`, *optional*):
|
332
|
+
Override the default `torch.dtype` and load the model with another dtype.
|
328
333
|
force_download (`bool`, *optional*, defaults to `False`):
|
329
334
|
Whether or not to force the (re-)download of the model weights and configuration files, overriding the
|
330
335
|
cached versions if they exist.
|
@@ -390,8 +395,8 @@ class AutoPipelineForText2Image(ConfigMixin):
|
|
390
395
|
|
391
396
|
<Tip>
|
392
397
|
|
393
|
-
To use private or [gated](https://huggingface.co/docs/hub/models-gated#gated-models) models, log-in with
|
394
|
-
|
398
|
+
To use private or [gated](https://huggingface.co/docs/hub/models-gated#gated-models) models, log-in with `hf
|
399
|
+
auth login`.
|
395
400
|
|
396
401
|
</Tip>
|
397
402
|
|
@@ -619,8 +624,7 @@ class AutoPipelineForImage2Image(ConfigMixin):
|
|
619
624
|
saved using
|
620
625
|
[`~DiffusionPipeline.save_pretrained`].
|
621
626
|
torch_dtype (`str` or `torch.dtype`, *optional*):
|
622
|
-
Override the default `torch.dtype` and load the model with another dtype.
|
623
|
-
dtype is automatically derived from the model's weights.
|
627
|
+
Override the default `torch.dtype` and load the model with another dtype.
|
624
628
|
force_download (`bool`, *optional*, defaults to `False`):
|
625
629
|
Whether or not to force the (re-)download of the model weights and configuration files, overriding the
|
626
630
|
cached versions if they exist.
|
@@ -686,8 +690,8 @@ class AutoPipelineForImage2Image(ConfigMixin):
|
|
686
690
|
|
687
691
|
<Tip>
|
688
692
|
|
689
|
-
To use private or [gated](https://huggingface.co/docs/hub/models-gated#gated-models) models, log-in with
|
690
|
-
|
693
|
+
To use private or [gated](https://huggingface.co/docs/hub/models-gated#gated-models) models, log-in with `hf
|
694
|
+
auth login`.
|
691
695
|
|
692
696
|
</Tip>
|
693
697
|
|
@@ -930,8 +934,7 @@ class AutoPipelineForInpainting(ConfigMixin):
|
|
930
934
|
saved using
|
931
935
|
[`~DiffusionPipeline.save_pretrained`].
|
932
936
|
torch_dtype (`str` or `torch.dtype`, *optional*):
|
933
|
-
Override the default `torch.dtype` and load the model with another dtype.
|
934
|
-
dtype is automatically derived from the model's weights.
|
937
|
+
Override the default `torch.dtype` and load the model with another dtype.
|
935
938
|
force_download (`bool`, *optional*, defaults to `False`):
|
936
939
|
Whether or not to force the (re-)download of the model weights and configuration files, overriding the
|
937
940
|
cached versions if they exist.
|
@@ -997,8 +1000,8 @@ class AutoPipelineForInpainting(ConfigMixin):
|
|
997
1000
|
|
998
1001
|
<Tip>
|
999
1002
|
|
1000
|
-
To use private or [gated](https://huggingface.co/docs/hub/models-gated#gated-models) models, log-in with
|
1001
|
-
|
1003
|
+
To use private or [gated](https://huggingface.co/docs/hub/models-gated#gated-models) models, log-in with `hf
|
1004
|
+
auth login`.
|
1002
1005
|
|
1003
1006
|
</Tip>
|
1004
1007
|
|
@@ -1,5 +1,5 @@
|
|
1
|
-
# Copyright
|
2
|
-
# Copyright
|
1
|
+
# Copyright 2025 Salesforce.com, inc.
|
2
|
+
# Copyright 2025 The HuggingFace Team. All rights reserved.
|
3
3
|
#
|
4
4
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
5
5
|
# you may not use this file except in compliance with the License.
|
@@ -1,5 +1,5 @@
|
|
1
|
-
# Copyright
|
2
|
-
# Copyright
|
1
|
+
# Copyright 2025 Salesforce.com, inc.
|
2
|
+
# Copyright 2025 The HuggingFace Team. All rights reserved.#
|
3
3
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
4
|
# you may not use this file except in compliance with the License.
|
5
5
|
# You may obtain a copy of the License at
|
@@ -25,7 +25,7 @@ from ...utils import (
|
|
25
25
|
replace_example_docstring,
|
26
26
|
)
|
27
27
|
from ...utils.torch_utils import randn_tensor
|
28
|
-
from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
|
28
|
+
from ..pipeline_utils import DeprecatedPipelineMixin, DiffusionPipeline, ImagePipelineOutput
|
29
29
|
from .blip_image_processing import BlipImageProcessor
|
30
30
|
from .modeling_blip2 import Blip2QFormerModel
|
31
31
|
from .modeling_ctx_clip import ContextCLIPTextModel
|
@@ -81,7 +81,7 @@ EXAMPLE_DOC_STRING = """
|
|
81
81
|
"""
|
82
82
|
|
83
83
|
|
84
|
-
class BlipDiffusionPipeline(DiffusionPipeline):
|
84
|
+
class BlipDiffusionPipeline(DeprecatedPipelineMixin, DiffusionPipeline):
|
85
85
|
"""
|
86
86
|
Pipeline for Zero-Shot Subject Driven Generation using Blip Diffusion.
|
87
87
|
|
@@ -107,6 +107,7 @@ class BlipDiffusionPipeline(DiffusionPipeline):
|
|
107
107
|
Position of the context token in the text encoder.
|
108
108
|
"""
|
109
109
|
|
110
|
+
_last_supported_version = "0.33.1"
|
110
111
|
model_cpu_offload_seq = "qformer->text_encoder->unet->vae"
|
111
112
|
|
112
113
|
def __init__(
|
@@ -138,7 +139,7 @@ class BlipDiffusionPipeline(DiffusionPipeline):
|
|
138
139
|
def get_query_embeddings(self, input_image, src_subject):
|
139
140
|
return self.qformer(image_input=input_image, text_input=src_subject, return_dict=False)
|
140
141
|
|
141
|
-
# from the original Blip Diffusion code,
|
142
|
+
# from the original Blip Diffusion code, specifies the target subject and augments the prompt by repeating it
|
142
143
|
def _build_prompt(self, prompts, tgt_subjects, prompt_strength=1.0, prompt_reps=20):
|
143
144
|
rv = []
|
144
145
|
for prompt, tgt_subject in zip(prompts, tgt_subjects):
|
@@ -229,11 +230,11 @@ class BlipDiffusionPipeline(DiffusionPipeline):
|
|
229
230
|
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
|
230
231
|
tensor will ge generated by random sampling.
|
231
232
|
guidance_scale (`float`, *optional*, defaults to 7.5):
|
232
|
-
Guidance scale as defined in [Classifier-Free Diffusion
|
233
|
-
`guidance_scale` is defined as `w` of equation 2.
|
234
|
-
Paper](https://
|
235
|
-
1`. Higher guidance scale encourages to generate images that are closely linked to
|
236
|
-
usually at the expense of lower image quality.
|
233
|
+
Guidance scale as defined in [Classifier-Free Diffusion
|
234
|
+
Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
|
235
|
+
of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
|
236
|
+
`guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
|
237
|
+
the text `prompt`, usually at the expense of lower image quality.
|
237
238
|
height (`int`, *optional*, defaults to 512):
|
238
239
|
The height of the generated image.
|
239
240
|
width (`int`, *optional*, defaults to 512):
|